File size: 4,769 Bytes
e904228
96c3529
c346bb7
 
70deb6a
 
 
 
 
 
fcbfa92
d8ecef0
70deb6a
96c3529
 
 
e904228
c346bb7
70deb6a
c346bb7
 
 
70deb6a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96c3529
 
 
e904228
d8ecef0
70deb6a
 
 
 
 
 
 
315b363
70deb6a
 
 
315b363
70deb6a
315b363
70deb6a
 
 
d8ecef0
70deb6a
c346bb7
70deb6a
 
e904228
70deb6a
96c3529
70deb6a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
315b363
70deb6a
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
import streamlit as st
import requests
from PIL import Image
import pytesseract
import os
from langchain_huggingface import HuggingFaceEndpoint
from langchain.chains import LLMChain
from langchain_core.prompts import PromptTemplate
import re
import json

api_key = os.environ.get("HFBearer")
os.environ["HUGGINGFACEHUB_API_TOKEN"] = api_key

# API URL and headers
API_URL = "https://pllfc7e5i0rujahy.us-east-1.aws.endpoints.huggingface.cloud"

# Function to extract text from image
def extract_text_from_image(image):
    text = pytesseract.image_to_string(image)
    return text

# Function to extract JSON from text
def extract_json(text):
    # Use regex to find the JSON between <JSON> and </JSON>
    match = re.search(r'<JSON>\s*(.*?)\s*</JSON>', text, re.DOTALL)
    
    if match:
        json_str = match.group(1)  # Get the JSON string
        try:
            # Load the JSON string into a Python dictionary
            json_data = json.loads(json_str)
            return json_data
        except json.JSONDecodeError:
            return "Erreur de décodage JSON"
    else:
        return "Aucun JSON trouvé"

# Function to get metadata title from image
def get_image_metadata(image):
    # You can customize this function to extract other metadata as needed
    title = image.name.split('.')[0]  # Simple title extraction from file name without extension
    return title

def count_tokens(text):
    return len(text.split())

image_params = {
    "bilan-atherosclerose": "medecin_responsable, rythme_sinusal, valeur_EIM, score_calcique",
    "bilan-medical": "medecin_responsable, date_naissance, prenom, nom, identifiant_patient, nom_medecin",
    "ECG": "medecin_responsable, poids, taille, ECG_repos_valeur_par_minute), valeur_FMT, valeur_niveau_atteint, valeur_diminution_frequence_cardiaque_bpm",
    "echo-doppler": "medecin_responsable, sous_clavieres, vertebrales, carotides",
    "echographie-poumons": "medecin_responsable, score calcique, technique, resultats",
    "echotomographie-abdominale": "medecin_responsable, foie, vesicule, pancreas, reins, rate, aorte_abdominale, conclusion",
    "echotomographie-cardiaque": "medecin_responsable, taille, poids, surface_corporelle, conclusion",
    "echotomographie-prostate": "medecin_responsable, vessie, ureteres, prostate, conclusion",
    "hematologie": "medecin_responsable, leucocytes, hematies, hemoglobines, hematocrite"
}

# Streamlit app layout
st.title("API Query App")
st.write("This app allows you to query the API and retrieve responses.")

user_input = """
Vous allez extraire des paramètres d'un texte à l'intérieur d'un objet JSON, écrit entre <JSON> et </JSON>.
Liste des paramètres : {parameters}

Voici un exemple de réponse valide :
<JSON>
{{"date_naissance": "", "prenom": "", "nom": ""}}
</JSON>

Voici le texte à partir duquel vous devez extraire les paramètres :
{texte}
"""

prompt = PromptTemplate.from_template(user_input)

llm = HuggingFaceEndpoint(
    endpoint_url=API_URL,
)

llm_chain = prompt | llm

# File uploader for multiple images
uploaded_images = st.file_uploader("Upload images", type=["png", "jpg", "jpeg"], accept_multiple_files=True)

# Modify the Streamlit section to extract the JSON for multiple images
if st.button("Submit"):
    if uploaded_images:
        all_json_data = {}  # Dictionary to store JSON data for each image
        for uploaded_image in uploaded_images:
            with st.spinner(f"Extracting text from image: {uploaded_image.name}..."):
                image = Image.open(uploaded_image)
                extracted_text = extract_text_from_image(image)

                max_text_length = 500  # Adjust as needed to keep total tokens under 1024
                if count_tokens(extracted_text) > max_text_length:
                    extracted_text = " ".join(extracted_text.split()[:max_text_length])

                with st.spinner(f"Fetching response from API for {uploaded_image.name}..."):
                    # Get metadata title from the image
                    title = get_image_metadata(uploaded_image)
                    parameters = image_params[title]
                    output = llm_chain.invoke({"texte": extracted_text, "parameters": parameters})     
                    st.success(f"Response received for {uploaded_image.name}!")

                    # Extract JSON from the API output
                    json_data = extract_json(output)  # Extract JSON from the API output
                    all_json_data[title] = json_data  # Store JSON data with title as key
                    st.write(title, json_data)
        
        # Display all extracted JSON data
        st.write("Extracted JSON Data for all images.")
    else:
        st.warning("Please upload at least one image to extract text.")