Spaces:
Sleeping
Sleeping
File size: 4,769 Bytes
e904228 96c3529 c346bb7 70deb6a fcbfa92 d8ecef0 70deb6a 96c3529 e904228 c346bb7 70deb6a c346bb7 70deb6a 96c3529 e904228 d8ecef0 70deb6a 315b363 70deb6a 315b363 70deb6a 315b363 70deb6a d8ecef0 70deb6a c346bb7 70deb6a e904228 70deb6a 96c3529 70deb6a 315b363 70deb6a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 |
import streamlit as st
import requests
from PIL import Image
import pytesseract
import os
from langchain_huggingface import HuggingFaceEndpoint
from langchain.chains import LLMChain
from langchain_core.prompts import PromptTemplate
import re
import json
api_key = os.environ.get("HFBearer")
os.environ["HUGGINGFACEHUB_API_TOKEN"] = api_key
# API URL and headers
API_URL = "https://pllfc7e5i0rujahy.us-east-1.aws.endpoints.huggingface.cloud"
# Function to extract text from image
def extract_text_from_image(image):
text = pytesseract.image_to_string(image)
return text
# Function to extract JSON from text
def extract_json(text):
# Use regex to find the JSON between <JSON> and </JSON>
match = re.search(r'<JSON>\s*(.*?)\s*</JSON>', text, re.DOTALL)
if match:
json_str = match.group(1) # Get the JSON string
try:
# Load the JSON string into a Python dictionary
json_data = json.loads(json_str)
return json_data
except json.JSONDecodeError:
return "Erreur de décodage JSON"
else:
return "Aucun JSON trouvé"
# Function to get metadata title from image
def get_image_metadata(image):
# You can customize this function to extract other metadata as needed
title = image.name.split('.')[0] # Simple title extraction from file name without extension
return title
def count_tokens(text):
return len(text.split())
image_params = {
"bilan-atherosclerose": "medecin_responsable, rythme_sinusal, valeur_EIM, score_calcique",
"bilan-medical": "medecin_responsable, date_naissance, prenom, nom, identifiant_patient, nom_medecin",
"ECG": "medecin_responsable, poids, taille, ECG_repos_valeur_par_minute), valeur_FMT, valeur_niveau_atteint, valeur_diminution_frequence_cardiaque_bpm",
"echo-doppler": "medecin_responsable, sous_clavieres, vertebrales, carotides",
"echographie-poumons": "medecin_responsable, score calcique, technique, resultats",
"echotomographie-abdominale": "medecin_responsable, foie, vesicule, pancreas, reins, rate, aorte_abdominale, conclusion",
"echotomographie-cardiaque": "medecin_responsable, taille, poids, surface_corporelle, conclusion",
"echotomographie-prostate": "medecin_responsable, vessie, ureteres, prostate, conclusion",
"hematologie": "medecin_responsable, leucocytes, hematies, hemoglobines, hematocrite"
}
# Streamlit app layout
st.title("API Query App")
st.write("This app allows you to query the API and retrieve responses.")
user_input = """
Vous allez extraire des paramètres d'un texte à l'intérieur d'un objet JSON, écrit entre <JSON> et </JSON>.
Liste des paramètres : {parameters}
Voici un exemple de réponse valide :
<JSON>
{{"date_naissance": "", "prenom": "", "nom": ""}}
</JSON>
Voici le texte à partir duquel vous devez extraire les paramètres :
{texte}
"""
prompt = PromptTemplate.from_template(user_input)
llm = HuggingFaceEndpoint(
endpoint_url=API_URL,
)
llm_chain = prompt | llm
# File uploader for multiple images
uploaded_images = st.file_uploader("Upload images", type=["png", "jpg", "jpeg"], accept_multiple_files=True)
# Modify the Streamlit section to extract the JSON for multiple images
if st.button("Submit"):
if uploaded_images:
all_json_data = {} # Dictionary to store JSON data for each image
for uploaded_image in uploaded_images:
with st.spinner(f"Extracting text from image: {uploaded_image.name}..."):
image = Image.open(uploaded_image)
extracted_text = extract_text_from_image(image)
max_text_length = 500 # Adjust as needed to keep total tokens under 1024
if count_tokens(extracted_text) > max_text_length:
extracted_text = " ".join(extracted_text.split()[:max_text_length])
with st.spinner(f"Fetching response from API for {uploaded_image.name}..."):
# Get metadata title from the image
title = get_image_metadata(uploaded_image)
parameters = image_params[title]
output = llm_chain.invoke({"texte": extracted_text, "parameters": parameters})
st.success(f"Response received for {uploaded_image.name}!")
# Extract JSON from the API output
json_data = extract_json(output) # Extract JSON from the API output
all_json_data[title] = json_data # Store JSON data with title as key
st.write(title, json_data)
# Display all extracted JSON data
st.write("Extracted JSON Data for all images.")
else:
st.warning("Please upload at least one image to extract text.") |