Spaces:
Sleeping
Sleeping
import os | |
import subprocess | |
# Function to install a package if it is not already installed | |
def install(package): | |
subprocess.check_call([os.sys.executable, "-m", "pip", "install", package]) | |
# Ensure the necessary packages are installed | |
install("transformers") | |
install("torch") | |
install("pandas") | |
install("gradio") | |
import pandas as pd | |
import gradio as gr | |
from transformers import AutoModel, AutoTokenizer | |
import torch | |
# Load the dataset containing PEC numbers and names | |
def load_dataset(file_path='PEC_Numbers_and_Names.xlsx'): | |
if not os.path.exists(file_path): | |
raise FileNotFoundError(f"File not found: {file_path}") | |
df = pd.read_excel(file_path) | |
return df | |
# Debugging function to get PEC number based on the name | |
def get_pec_number(name, df): | |
print("Column names in DataFrame:", df.columns.tolist()) # Print the column names | |
print(f"Looking for Name: '{name}'") | |
# Normalize the input and dataset | |
df['Name'] = df['Name'].str.strip().str.lower() | |
name = name.strip().str.lower() | |
result = df[df['Name'] == name] | |
if not result.empty: | |
print(f"Found PEC Number: {result.iloc[0]['PEC No.']}") | |
return result.iloc[0]['PEC No.'] | |
else: | |
print("Name not found.") | |
return "Name not found." | |
# Function to process the name using the Hugging Face model | |
def process_with_model(name): | |
inputs = tokenizer(name, return_tensors="pt") | |
with torch.no_grad(): | |
outputs = model(**inputs) | |
return outputs.last_hidden_state.mean(dim=1).squeeze().tolist() | |
# Combine both functions to create a prediction | |
def predict(name, file): | |
try: | |
# Load the dataset from the uploaded file if provided | |
if file is not None: | |
df = pd.read_excel(file.name) | |
else: | |
df = load_dataset() | |
pec_number = get_pec_number(name, df) | |
model_output = process_with_model(name) | |
return f"PEC Number: {pec_number}\nModel Output: {model_output}" | |
except FileNotFoundError as e: | |
return str(e) | |
# Load the model and tokenizer from Hugging Face | |
tokenizer = AutoTokenizer.from_pretrained("Alibaba-NLP/gte-multilingual-base", trust_remote_code=True) | |
model = AutoModel.from_pretrained("Alibaba-NLP/gte-multilingual-base", trust_remote_code=True) | |
# Build the Gradio interface with file upload option | |
iface = gr.Interface( | |
fn=predict, | |
inputs=[ | |
gr.Textbox(lines=1, placeholder="Enter Name..."), | |
gr.File(label="Upload PEC Numbers and Names file (optional)") | |
], | |
outputs="text", | |
title="Name to PEC Number Lookup with Model Integration", | |
description="Enter a name to retrieve the corresponding PEC number and process it with a Hugging Face model. Optionally, upload the Excel file if not found." | |
) | |
# Run the Gradio interface | |
if __name__ == "__main__": | |
iface.launch() | |