Spaces:
Running
Running
import gradio as gr | |
import pandas as pd | |
import tiktoken | |
import time | |
from sentence_transformers import SentenceTransformer | |
import os | |
import torch | |
from openai.embeddings_utils import get_embedding, cosine_similarity | |
import os | |
df = pd.read_pickle('entire_data.pkl') #to load 123.pkl back to the dataframe df | |
embedder = SentenceTransformer('all-mpnet-base-v2') | |
def search(query): | |
n = 15 | |
query_embedding = embedder.encode(query) | |
df["similarity"] = df.embedding.apply(lambda x: cosine_similarity(x, query_embedding.reshape(768,-1))) | |
results = ( | |
df.sort_values("similarity", ascending=False) | |
.head(n)) | |
resultlist = [] | |
hlist = [] | |
for r in results.index: | |
if results.name[r] not in hlist: | |
smalldf = results.loc[results.name == results.name[r]] | |
smallarr = smalldf.similarity[r].max() | |
sm =smalldf.rating[r].mean() | |
if smalldf.shape[1] > 3: | |
smalldf = smalldf[:3] | |
resultlist.append( | |
{ | |
"name":results.name[r], | |
"description":results.description[r], | |
"relevance score": smallarr.tolist(), | |
"rating": sm.tolist(), | |
"relevant_reviews": [ smalldf.text[s] for s in smalldf.index] | |
}) | |
hlist.append(results.name[r]) | |
return resultlist | |
def greet(query): | |
bm25 = search(query) | |
return bm25 | |
demo = gr.Interface(fn=greet, inputs="text", outputs="json") | |
demo.launch() |