import streamlit as st
import requests
from PIL import Image
import pytesseract
import os
from langchain_huggingface import HuggingFaceEndpoint
from langchain.chains import LLMChain
from langchain_core.prompts import PromptTemplate
import re
import json
api_key = os.environ.get("HFBearer")
os.environ["HUGGINGFACEHUB_API_TOKEN"] = api_key
# API URL and headers
API_URL = "https://pllfc7e5i0rujahy.us-east-1.aws.endpoints.huggingface.cloud"
# Function to extract text from image
def extract_text_from_image(image):
text = pytesseract.image_to_string(image)
return text
# Function to extract JSON from text
def extract_json(text):
# Use regex to find the JSON between and
match = re.search(r'\s*(.*?)\s*', text, re.DOTALL)
if match:
json_str = match.group(1) # Get the JSON string
try:
# Load the JSON string into a Python dictionary
json_data = json.loads(json_str)
return json_data
except json.JSONDecodeError:
return "Erreur de décodage JSON"
else:
return "Aucun JSON trouvé"
# Function to get metadata title from image
def get_image_metadata(image):
# You can customize this function to extract other metadata as needed
title = image.name.split('.')[0] # Simple title extraction from file name without extension
return title
def count_tokens(text):
return len(text.split())
image_params = {
"bilan-atherosclerose": "medecin_responsable, rythme_sinusal, valeur_EIM, score_calcique",
"bilan-medical": "medecin_responsable, date_naissance, prenom, nom, identifiant_patient, nom_medecin",
"ECG": "medecin_responsable, poids, taille, ECG_repos_valeur_par_minute), valeur_FMT, valeur_niveau_atteint, valeur_diminution_frequence_cardiaque_bpm",
"echo-doppler": "medecin_responsable, sous_clavieres, vertebrales, carotides",
"echographie-poumons": "medecin_responsable, score calcique, technique, resultats",
"echotomographie-abdominale": "medecin_responsable, foie, vesicule, pancreas, reins, rate, aorte_abdominale, conclusion",
"echotomographie-cardiaque": "medecin_responsable, taille, poids, surface_corporelle, conclusion",
"echotomographie-prostate": "medecin_responsable, vessie, ureteres, prostate, conclusion",
"hematologie": "medecin_responsable, leucocytes, hematies, hemoglobines, hematocrite"
}
# Streamlit app layout
st.title("API Query App")
st.write("This app allows you to query the API and retrieve responses.")
user_input = """
Vous allez extraire des paramètres d'un texte à l'intérieur d'un objet JSON, écrit entre et .
Liste des paramètres : {parameters}
Voici un exemple de réponse valide :
{{"date_naissance": "", "prenom": "", "nom": ""}}
Voici le texte à partir duquel vous devez extraire les paramètres :
{texte}
"""
prompt = PromptTemplate.from_template(user_input)
llm = HuggingFaceEndpoint(
endpoint_url=API_URL,
)
llm_chain = prompt | llm
# File uploader for multiple images
uploaded_images = st.file_uploader("Upload images", type=["png", "jpg", "jpeg"], accept_multiple_files=True)
# Modify the Streamlit section to extract the JSON for multiple images
if st.button("Submit"):
if uploaded_images:
all_json_data = {} # Dictionary to store JSON data for each image
for uploaded_image in uploaded_images:
with st.spinner(f"Extracting text from image: {uploaded_image.name}..."):
image = Image.open(uploaded_image)
extracted_text = extract_text_from_image(image)
max_text_length = 500 # Adjust as needed to keep total tokens under 1024
if count_tokens(extracted_text) > max_text_length:
extracted_text = " ".join(extracted_text.split()[:max_text_length])
with st.spinner(f"Fetching response from API for {uploaded_image.name}..."):
# Get metadata title from the image
title = get_image_metadata(uploaded_image)
parameters = image_params[title]
output = llm_chain.invoke({"texte": extracted_text, "parameters": parameters})
st.success(f"Response received for {uploaded_image.name}!")
# Extract JSON from the API output
json_data = extract_json(output) # Extract JSON from the API output
all_json_data[title] = json_data # Store JSON data with title as key
st.write(title, json_data)
# Display all extracted JSON data
st.write("Extracted JSON Data for all images.")
else:
st.warning("Please upload at least one image to extract text.")