Spaces:

simonza
/

Product_Search_v0

Runtime error

App Files Files Community

simonza commited on Aug 12, 2023

Commit

6afda1a

1 Parent(s): b0a36c0

Update app.py

Browse files

Files changed (1) hide show

app.py +14 -160

app.py CHANGED Viewed

@@ -6,19 +6,7 @@ pinecone.init(
     environment="asia-southeast1-gcp-free"  # find next to api key
 )
-# index_name = "hybrid-image-search"
-# if index_name not in pinecone.list_indexes():
-#     # create the index
-#     pinecone.create_index(
-#       index_name,
-#       dimension=512,
-#       metric="dotproduct",
-#       pod_type="s1"
-#     )
-index_name = pinecone.list_indexes()[0]
-print(index_name)
 index = pinecone.GRPCIndex(index_name)
 from datasets import load_dataset
@@ -31,14 +19,6 @@ fashion = load_dataset(
 images = fashion["image"]
 metadata = fashion.remove_columns("image")
-images[900]
-import pandas as pd
-metadata = metadata.to_pandas()
-filtered = metadata[ (metadata['gender'] == 'Men') & (metadata['articleType'] == 'Jeans')& (metadata['baseColour'] == 'Blue')]
-print(len(filtered))
-metadata.head()
 import requests
@@ -61,14 +41,8 @@ def tokenize_func(text):
     return tokenizer.convert_ids_to_tokens(token_ids)
 bm25 = pinecone_text.BM25(tokenize_func)
-tokenize_func('Turtle Check Men Navy Blue Shirt')
 bm25.fit(metadata['productDisplayName'])
-display(metadata['productDisplayName'][0])
-bm25.transform_query(metadata['productDisplayName'][0])
 from sentence_transformers import SentenceTransformer
 import transformers.models.clip.image_processing_clip
 import torch
@@ -80,87 +54,15 @@ model = SentenceTransformer(
     'sentence-transformers/clip-ViT-B-32',
     device=device
 )
-model
-dense_vec = model.encode([metadata['productDisplayName'][0]])
-dense_vec.shape
-#len(fashion)
-"""##Encode the dataset to index
-"""
-# from tqdm.auto import tqdm
-# batch_size = 200
-# for i in tqdm(range(0, len(fashion), batch_size)):
-#     # find end of batch
-#     i_end = min(i+batch_size, len(fashion))
-#     # extract metadata batch
-#     meta_batch = metadata.iloc[i:i_end]
-#     meta_dict = meta_batch.to_dict(orient="records")
-#     # concatinate all metadata field except for id and year to form a single string
-#     meta_batch = [" ".join(x) for x in meta_batch.loc[:, ~meta_batch.columns.isin(['id', 'year'])].values.tolist()]
-#     # extract image batch
-#     img_batch = images[i:i_end]
-#     # create sparse BM25 vectors
-#     sparse_embeds = [bm25.transform_doc(text) for text in meta_batch]
-#     # create dense vectors
-#     dense_embeds = model.encode(img_batch).tolist()
-#     # create unique IDs
-#     ids = [str(x) for x in range(i, i_end)]
-#     upserts = []
-#     # loop through the data and create dictionaries for uploading documents to pinecone index
-#     for _id, sparse, dense, meta in zip(ids, sparse_embeds, dense_embeds, meta_dict):
-#         upserts.append({
-#             'id': _id,
-#             'sparse_values': sparse,
-#             'values': dense,
-#             'metadata': meta
-#         })
-#     # upload the documents to the new hybrid index
-#     index.upsert(upserts)
-# show index description after uploading the documents
-index.describe_index_stats()
 from IPython.core.display import HTML
 from io import BytesIO
 from base64 import b64encode
 import pinecone_text
-# function to display product images
-def display_result(image_batch):
-    figures = []
-    for img in image_batch:
-        b = BytesIO()
-        img.save(b, format='png')
-        figures.append(f'''
-            <figure style="margin: 5px !important;">
-              <img src="data:image/png;base64,{b64encode(b.getvalue()).decode('utf-8')}" style="width: 90px; height: 120px" >
-            </figure>
-        ''')
-    return HTML(data=f'''
-        <div style="display: flex; flex-flow: row wrap; text-align: center;">
-        {''.join(figures)}
-        </div>
-    ''')
 def hybrid_scale(dense, sparse, alpha: float):
-    """Hybrid vector scaling using a convex combination
-    alpha * dense + (1 - alpha) * sparse
-    Args:
-        dense: Array of floats representing
-        sparse: a dict of `indices` and `values`
-        alpha: float between 0 and 1 where 0 == sparse only
-               and 1 == dense only
-    """
     if alpha < 0 or alpha > 1:
         raise ValueError("Alpha must be between 0 and 1")
     # scale sparse and dense vectors to create hybrid search vecs
@@ -171,8 +73,9 @@ def hybrid_scale(dense, sparse, alpha: float):
     hdense = [v * alpha for v in dense]
     return hdense, hsparse
-def text_to_image(query, alpha, k_results):
   sparse = bm25.transform_query(query)
   dense = model.encode(query).tolist()
@@ -195,40 +98,27 @@ def text_to_image(query, alpha, k_results):
   return imgs, description
-def show_dir_content():
-  for dirname, _, filenames in os.walk('./'):
-    for filename in filenames:
-        print(os.path.join(dirname, filename))
-import shutil
-from PIL import Image
-import os
 counter = {"dir_num": 1}
 img_files = {'x':[]}
 def img_to_file_list(imgs):
-  os.chdir('/content')
   path = "searches"
-  sub_path = 'content/' + path + '/' + 'search' + '_' + str(counter["dir_num"])
   # Check whether the specified path exists or not
   isExist = os.path.exists('content'+'/'+path)
   if not isExist:
     print("Directory does not exists")
   # Create a new directory because it does not exist
-    os.makedirs('content'+'/'+path, exist_ok = True)
     print("The new directory is created!")
-  #else:
-  #  os.chdir('/content/'+path)
-  print("Subdir ->The Current working directory is: {0}".format(os.getcwd()))
   # Check whether the specified path exists or not
   isExist = os.path.exists(sub_path)
   if isExist:
     shutil.rmtree(sub_path)
@@ -240,50 +130,15 @@ def img_to_file_list(imgs):
   for img in imgs:
     img.save(sub_path+"/img_" + str(i) + ".png","PNG")
     img_files['search'+str(counter["dir_num"])].append(sub_path + '/' + 'img_'+ str(i) + ".png")
     i+=1
   counter["dir_num"]+=1
   return img_files['search'+str(counter["dir_num"]-1)]
-#print(os.getcwd())
-# os.chdir('/content/searches')
-# print("The Current working directory is: {0}".format(os.getcwd()))
-# show_dir_content()
-# imgs2, descr = text_to_image('blue jeans for women', 0.5, 4)
-# print("The Current working directory is: {0}".format(os.getcwd()))
-# show_dir_content()
-# img_files = img_to_file_list(imgs2)
-# display(img_files)
-# print("The Current working directory is: {0}".format(os.getcwd()))
-# show_dir_content()
-# shutil.rmtree('/content/searches')
-# #shutil.rmtree('./content/searches')
-# #print("The Current working directory is: {0}".format(os.getcwd()))
-# #show_dir_content()
-#     #counter, img_files = img_to_file_list(imgs1, counter, img_files)
-#     #display(img_files)
-# #counter, img_files = img_to_file_list(imgs2)
 import gradio as gr
 from deep_translator import GoogleTranslator
-css = '''
-.gallery img {
-    width: 45px;
-    height: 60px;
-    object-fit: contain;
-}
-'''
 counter = {"dir_num": 1}
 img_files = {'x':[]}
@@ -299,9 +154,10 @@ def fake_text(text, alpha):
     img , descr = text_to_image(en_text, alpha, 3)
     return descr
 with gr.Blocks() as demo:
-    with gr.Row():#variant="compact"):
         text = gr.Textbox(
             value = "ג'ינס כחול לגברים",
@@ -329,11 +185,9 @@ with gr.Blocks() as demo:
     def get_select_index(evt: gr.SelectData,text,alpha):
         print(evt.index)
         eng_text = fake_text(text, alpha)[evt.index]
-        heb_text = GoogleTranslator(source='en', target='iw').translate(eng_text)
-        return heb_text
-    #gallery.select( get_select_index, None, selected )
     gallery.select( fn=get_select_index, inputs=[text,alpha], outputs=selected )
 demo.launch()
-#shutil.rmtree('/content/searches')

     environment="asia-southeast1-gcp-free"  # find next to api key
 )
+index_name = "hybrid-image-search"
 index = pinecone.GRPCIndex(index_name)
 from datasets import load_dataset
 images = fashion["image"]
 metadata = fashion.remove_columns("image")
 import requests
     return tokenizer.convert_ids_to_tokens(token_ids)
 bm25 = pinecone_text.BM25(tokenize_func)
 bm25.fit(metadata['productDisplayName'])
 from sentence_transformers import SentenceTransformer
 import transformers.models.clip.image_processing_clip
 import torch
     'sentence-transformers/clip-ViT-B-32',
     device=device
 )
 from IPython.core.display import HTML
 from io import BytesIO
 from base64 import b64encode
 import pinecone_text
 def hybrid_scale(dense, sparse, alpha: float):
     if alpha < 0 or alpha > 1:
         raise ValueError("Alpha must be between 0 and 1")
     # scale sparse and dense vectors to create hybrid search vecs
     hdense = [v * alpha for v in dense]
     return hdense, hsparse
+def text_to_image(query, alpha, k_results):
   sparse = bm25.transform_query(query)
   dense = model.encode(query).tolist()
   return imgs, description
 counter = {"dir_num": 1}
 img_files = {'x':[]}
 def img_to_file_list(imgs):
   path = "searches"
+  sub_path = './' + path + '/' + 'search' + '_' + str(counter["dir_num"])
   # Check whether the specified path exists or not
   isExist = os.path.exists('content'+'/'+path)
   if not isExist:
     print("Directory does not exists")
   # Create a new directory because it does not exist
+    os.makedirs('.'+'/'+path, exist_ok = True)
     print("The new directory is created!")
   # Check whether the specified path exists or not
   isExist = os.path.exists(sub_path)
   if isExist:
     shutil.rmtree(sub_path)
   for img in imgs:
     img.save(sub_path+"/img_" + str(i) + ".png","PNG")
     img_files['search'+str(counter["dir_num"])].append(sub_path + '/' + 'img_'+ str(i) + ".png")
     i+=1
   counter["dir_num"]+=1
   return img_files['search'+str(counter["dir_num"]-1)]
 import gradio as gr
 from deep_translator import GoogleTranslator
 counter = {"dir_num": 1}
 img_files = {'x':[]}
     img , descr = text_to_image(en_text, alpha, 3)
     return descr
 with gr.Blocks() as demo:
+    with gr.Row():
         text = gr.Textbox(
             value = "ג'ינס כחול לגברים",
     def get_select_index(evt: gr.SelectData,text,alpha):
         print(evt.index)
         eng_text = fake_text(text, alpha)[evt.index]
+        #heb_text = GoogleTranslator(source='en', target='iw').translate(eng_text)
+        return eng_text
     gallery.select( fn=get_select_index, inputs=[text,alpha], outputs=selected )
 demo.launch()