atifsial123 commited on
Commit
c12ca9b
·
verified ·
1 Parent(s): a78f83f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +47 -39
app.py CHANGED
@@ -12,25 +12,17 @@ install("torch")
12
  install("pandas")
13
  install("scikit-learn")
14
  install("gradio")
15
-
16
  import os
17
  import pandas as pd
18
  import gradio as gr
19
  from transformers import AutoModel, AutoTokenizer
 
 
20
 
21
- # Load the pre-trained model and tokenizer
22
- def load_model_and_tokenizer():
23
- try:
24
- model = AutoModel.from_pretrained("Alibaba-NLP/gte-multilingual-base", trust_remote_code=True)
25
- tokenizer = AutoTokenizer.from_pretrained("Alibaba-NLP/gte-multilingual-base", trust_remote_code=True)
26
- return model, tokenizer
27
- except Exception as e:
28
- print(f"Error loading model or tokenizer: {e}")
29
- return None, None
30
-
31
- # Function to load the dataset
32
  def load_dataset():
33
  file_path = "Valid-part-2.xlsx"
 
34
  if not os.path.exists(file_path):
35
  raise FileNotFoundError(f"Dataset not found. Please ensure that '{file_path}' exists.")
36
 
@@ -42,44 +34,60 @@ def load_dataset():
42
  print(f"Error loading dataset: {e}")
43
  return None
44
 
45
- # Function to search by name and return the PEC number
46
- def search_by_name(name, df):
47
- if df is None:
48
- return "Error: Dataset not loaded."
 
 
 
 
 
 
49
 
50
- try:
51
- name_matches = df[df['name'].str.contains(name, case=False, na=False)]
52
- if not name_matches.empty:
53
- return f"Your PEC number: {name_matches['PEC number'].values[0]}"
54
- else:
55
- return "No matches found for your name."
56
- except Exception as e:
57
- return f"Error during search: {e}"
 
 
 
 
 
 
 
 
 
 
 
 
 
58
 
59
- # Gradio interface
60
  def build_interface():
61
  df = load_dataset() # Load your dataset
62
  if df is None:
63
  return None
 
 
 
64
 
65
  iface = gr.Interface(
66
- fn=lambda name: search_by_name(name, df),
67
- inputs=gr.Textbox(label="Please write your Name"),
68
- outputs=gr.Textbox(label="Your PEC number"),
69
- title="PEC Number Lookup",
70
- description="Enter your name to find your PEC number."
71
  )
72
  return iface
73
 
74
- # Main function to run the Gradio app
75
  if __name__ == "__main__":
76
- model, tokenizer = load_model_and_tokenizer()
77
- if model is None or tokenizer is None:
78
- print("Failed to load model or tokenizer. Exiting.")
79
  else:
80
- iface = build_interface()
81
- if iface is not None:
82
- iface.launch()
83
- else:
84
- print("Failed to build interface due to dataset issues.")
85
 
 
12
  install("pandas")
13
  install("scikit-learn")
14
  install("gradio")
 
15
  import os
16
  import pandas as pd
17
  import gradio as gr
18
  from transformers import AutoModel, AutoTokenizer
19
+ import torch
20
+ from sklearn.model_selection import train_test_split
21
 
22
+ # Load your dataset
 
 
 
 
 
 
 
 
 
 
23
  def load_dataset():
24
  file_path = "Valid-part-2.xlsx"
25
+ print(f"Current working directory: {os.getcwd()}")
26
  if not os.path.exists(file_path):
27
  raise FileNotFoundError(f"Dataset not found. Please ensure that '{file_path}' exists.")
28
 
 
34
  print(f"Error loading dataset: {e}")
35
  return None
36
 
37
+ # Preprocess the data
38
+ def preprocess_data(df):
39
+ # Add your preprocessing steps here
40
+ # For example: cleaning, tokenization, etc.
41
+ return df
42
+
43
+ # Train your model
44
+ def train_model(df):
45
+ # Split the dataset into training and testing sets
46
+ train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)
47
 
48
+ # Load your pre-trained model and tokenizer from Hugging Face
49
+ tokenizer = AutoTokenizer.from_pretrained("Alibaba-NLP/gte-multilingual-base")
50
+ model = AutoModel.from_pretrained("Alibaba-NLP/gte-multilingual-base")
51
+
52
+ # Add your training code here
53
+ # This may involve tokenizing the data and feeding it into the model
54
+ return model
55
+
56
+ # Define the Gradio interface function
57
+ def predict(input_text):
58
+ # Load the model and tokenizer
59
+ tokenizer = AutoTokenizer.from_pretrained("Alibaba-NLP/gte-multilingual-base")
60
+ model = AutoModel.from_pretrained("Alibaba-NLP/gte-multilingual-base")
61
+
62
+ # Tokenize input and make predictions
63
+ inputs = tokenizer(input_text, return_tensors="pt")
64
+ with torch.no_grad():
65
+ outputs = model(**inputs)
66
+
67
+ # Process the outputs as needed (e.g., extracting relevant information)
68
+ return outputs.last_hidden_state
69
 
70
+ # Build the Gradio interface
71
  def build_interface():
72
  df = load_dataset() # Load your dataset
73
  if df is None:
74
  return None
75
+
76
+ df = preprocess_data(df) # Preprocess the dataset
77
+ model = train_model(df) # Train your model
78
 
79
  iface = gr.Interface(
80
+ fn=predict,
81
+ inputs=gr.inputs.Textbox(lines=2, placeholder="Enter text here..."),
82
+ outputs="text"
 
 
83
  )
84
  return iface
85
 
86
+ # Run the Gradio interface
87
  if __name__ == "__main__":
88
+ iface = build_interface()
89
+ if iface:
90
+ iface.launch()
91
  else:
92
+ print("Failed to build the Gradio interface. Please check the dataset and model.")
 
 
 
 
93