|
from transformers import LayoutLMForTokenClassification, LayoutLMTokenizer |
|
import gradio as gr |
|
import cv2 |
|
import easyocr |
|
from simple_salesforce import Salesforce |
|
import re |
|
|
|
|
|
Salesforce_User_Name = '[email protected]' |
|
Salesforce_Password = 'Sathkrutha@06' |
|
SALESFORCE_INSTANCE_URL = 'https://sathkruthatechsolutions63-dev-ed.develop.lightning.force.com' |
|
SALESFORCE_ACCESS_TOKEN = 'UnByPih7PWmoWLzRuRyFrXzw' |
|
|
|
|
|
reader = easyocr.Reader(['en']) |
|
|
|
|
|
model = LayoutLMForTokenClassification.from_pretrained("microsoft/layoutlm-large-uncased") |
|
tokenizer = LayoutLMTokenizer.from_pretrained("microsoft/layoutlm-large-uncased") |
|
|
|
|
|
sf = Salesforce(username=Salesforce_User_Name, password=Salesforce_Password, security_token=SALESFORCE_ACCESS_TOKEN) |
|
|
|
|
|
def extract_patient_info(image): |
|
|
|
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) |
|
|
|
|
|
result = reader.readtext(image_rgb) |
|
|
|
|
|
print("OCR Result:", result) |
|
|
|
|
|
extracted_text = " ".join([detection[1] for detection in result]) |
|
|
|
|
|
print("Extracted Text:", extracted_text) |
|
|
|
|
|
details = extract_details_from_text(extracted_text) |
|
|
|
|
|
print("Parsed Details:", details) |
|
|
|
|
|
create_salesforce_record(details) |
|
|
|
|
|
return extracted_text |
|
|
|
|
|
def extract_details_from_text(extracted_text): |
|
|
|
details = {} |
|
|
|
|
|
name_match = re.search(r"Name[:\s]*([A-Za-z\s]+)", extracted_text) |
|
if name_match: |
|
details['Name'] = name_match.group(1) |
|
else: |
|
print("Error: Name not found!") |
|
|
|
|
|
age_match = re.search(r"Age[:\s]*([\d]+)", extracted_text) |
|
if age_match: |
|
details['Age'] = age_match.group(1) |
|
else: |
|
print("Error: Age not found!") |
|
|
|
|
|
gender_match = re.search(r"Gender[:\s]*(Male|Female)", extracted_text, re.IGNORECASE) |
|
if gender_match: |
|
details['Gender'] = gender_match.group(1) |
|
else: |
|
print("Error: Gender not found!") |
|
|
|
|
|
phone_match = re.search(r"Phone number[:\s]*([\d]+)", extracted_text) |
|
if phone_match: |
|
details['Phone Number'] = phone_match.group(1) |
|
else: |
|
print("Error: Phone number not found!") |
|
|
|
return details |
|
|
|
|
|
def create_salesforce_record(details): |
|
|
|
data = { |
|
'Name__c': details['Name'], |
|
'Age__c': int(details['Age']), |
|
'Gender__c': details['Gender'], |
|
'Phone_Number__c': details['Phone Number'] |
|
} |
|
|
|
|
|
print("Data to be inserted into Salesforce:", data) |
|
|
|
try: |
|
|
|
sf.Patient_Registration__c.create(data) |
|
print("Salesforce record created successfully!") |
|
except Exception as e: |
|
|
|
print(f"Error creating Salesforce record: {e}") |
|
|
|
|
|
with gr.Blocks() as demo: |
|
gr.Markdown("### OCR Using LayoutLM Pretrained Model with EasyOCR and Salesforce Integration") |
|
|
|
|
|
image_input = gr.Image(type="numpy", label="Upload Image") |
|
|
|
|
|
output_text = gr.Textbox(label="Extracted Text") |
|
|
|
|
|
process_button = gr.Button("Process Image") |
|
|
|
|
|
process_button.click(fn=extract_patient_info, inputs=image_input, outputs=output_text) |
|
|
|
|
|
if __name__ == "__main__": |
|
demo.launch() |
|
|