Spaces:
Runtime error
Runtime error
try query
Browse files
app.py
CHANGED
@@ -2,20 +2,35 @@ import gradio as gr
|
|
2 |
import chromadb
|
3 |
import pandas as pd
|
4 |
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
|
14 |
-
collection.add(ids=ids, documents=documents, metadatas=metadatas)
|
15 |
|
16 |
-
|
17 |
-
|
18 |
-
|
|
|
19 |
|
20 |
-
|
21 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
import chromadb
|
3 |
import pandas as pd
|
4 |
|
5 |
+
client = chromadb.Client()
|
6 |
+
collection = client.create_collection("bolivian-recipes")
|
7 |
+
df = pd.read_parquet("hf://datasets/asoria/bolivian-recipes@~parquet/default/other/0000.parquet")
|
8 |
+
text_column = "preparation"
|
9 |
+
ids = [str(i) for i in range(df.shape[0])]
|
10 |
+
documents = df[text_column].to_list()
|
11 |
+
metadatas = df.drop(text_column, axis=1).to_dict("records")
|
12 |
+
collection.add(ids=ids, documents=documents, metadatas=metadatas)
|
13 |
|
|
|
14 |
|
15 |
+
with gr.Blocks() as demo:
|
16 |
+
gr.Markdown(" ## PandasAI demo using datasets library")
|
17 |
+
gr.Markdown(" pandasai library https://github.com/gventuri/pandas-ai")
|
18 |
+
gr.Markdown(" datasets library https://huggingface.co/docs/datasets")
|
19 |
|
20 |
+
dataset = gr.Textbox(label="dataset", placeholder="mstz/iris", value="mstz/iris")
|
21 |
+
config = gr.Textbox(label="config", placeholder="iris", value="iris")
|
22 |
+
split = gr.Textbox(label="split", placeholder="train", value="train")
|
23 |
+
prompt = gr.Textbox(label="prompt (str)", placeholder="How many records do I have?. Give me the median of sepal_width. Show me the first 5 rows.")
|
24 |
+
cached_responses_table = gr.DataFrame()
|
25 |
+
get_result_button = gr.Button("Submit")
|
26 |
+
|
27 |
+
def get_result(dataset, config, split, prompt) -> str:
|
28 |
+
result = collection.query(query_texts=[prompt], n_results=4)
|
29 |
+
return {
|
30 |
+
cached_responses_table: gr.update(value=result)
|
31 |
+
}
|
32 |
+
|
33 |
+
get_result_button.click(get_result, inputs=[dataset, config, split, prompt], outputs=cached_responses_table)
|
34 |
+
|
35 |
+
if __name__ == "__main__":
|
36 |
+
demo.launch(debug=True)
|