Spaces:
Sleeping
Sleeping
Commit
·
4eaf3da
0
Parent(s):
first logic
Browse files- .env_example +2 -0
- .gitignore +2 -0
- __pycache__/database.cpython-311.pyc +0 -0
- __pycache__/preprocess.cpython-311.pyc +0 -0
- __pycache__/utilities.cpython-311.pyc +0 -0
- chatbot.py +76 -0
- database.py +15 -0
- preprocess.py +48 -0
- readme.md +3 -0
- requirements.txt +5 -0
- utilities.py +32 -0
.env_example
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
REDIS_KEY = ''
|
2 |
+
OPENAI_API_KEY = ''
|
.gitignore
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
product_data.csv
|
2 |
+
.env
|
__pycache__/database.cpython-311.pyc
ADDED
Binary file (610 Bytes). View file
|
|
__pycache__/preprocess.cpython-311.pyc
ADDED
Binary file (1.35 kB). View file
|
|
__pycache__/utilities.cpython-311.pyc
ADDED
Binary file (2.19 kB). View file
|
|
chatbot.py
ADDED
@@ -0,0 +1,76 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from langchain.prompts import PromptTemplate
|
2 |
+
from langchain.llms import OpenAI
|
3 |
+
from langchain.embeddings import OpenAIEmbeddings
|
4 |
+
from langchain.chains import LLMChain
|
5 |
+
from langchain.memory import ConversationBufferMemory
|
6 |
+
from redis.commands.search.query import Query
|
7 |
+
import time
|
8 |
+
import os
|
9 |
+
from dotenv import load_dotenv
|
10 |
+
import numpy as np
|
11 |
+
from database import redis_conn
|
12 |
+
|
13 |
+
load_dotenv()
|
14 |
+
|
15 |
+
llm = OpenAI(model_name="gpt-3.5-turbo", temperature=0.3, openai_api_key=os.getenv('OPENAI_API_KEY'))
|
16 |
+
prompt = PromptTemplate(
|
17 |
+
input_variables=["product_description"],
|
18 |
+
template="Create comma seperated product keywords to perform a query on a amazon dataset for this user input: {product_description}",
|
19 |
+
)
|
20 |
+
|
21 |
+
chain = LLMChain(llm=llm, prompt=prompt)
|
22 |
+
|
23 |
+
userinput = input("Hey im a E-commerce Chatbot, how can i help you today? ")
|
24 |
+
print("User:", userinput)
|
25 |
+
# Run the chain only specifying the input variable.
|
26 |
+
keywords = chain.run(userinput)
|
27 |
+
|
28 |
+
embedding_model = OpenAIEmbeddings(openai_api_key=os.getenv('OPENAI_API_KEY'))
|
29 |
+
#vectorize the query
|
30 |
+
query_vector = embedding_model.embed_query(keywords)
|
31 |
+
query_vector = np.array(query_vector).astype(np.float32).tobytes()
|
32 |
+
|
33 |
+
|
34 |
+
#prepare the query
|
35 |
+
ITEM_KEYWORD_EMBEDDING_FIELD = 'item_vector'
|
36 |
+
topK=5
|
37 |
+
q = Query(f'*=>[KNN {topK} @{ITEM_KEYWORD_EMBEDDING_FIELD} $vec_param AS vector_score]').sort_by('vector_score').paging(0,topK).return_fields('vector_score','item_name','item_id','item_keywords').dialect(2)
|
38 |
+
params_dict = {"vec_param": query_vector}
|
39 |
+
#Execute the query
|
40 |
+
results = redis_conn.ft().search(q, query_params = params_dict)
|
41 |
+
|
42 |
+
full_result_string = ''
|
43 |
+
for product in results.docs:
|
44 |
+
full_result_string += product.item_name + ' ' + product.item_keywords + ' ' + product.item_id + "\n\n\n"
|
45 |
+
|
46 |
+
# code The response
|
47 |
+
template = """You are a chatbot. Be kind, detailed and nice. Present the given queried search result in a nice way as answer to the user input. dont ask questions back! just take the given context
|
48 |
+
|
49 |
+
{chat_history}
|
50 |
+
Human: {user_msg}
|
51 |
+
Chatbot:"""
|
52 |
+
|
53 |
+
prompt = PromptTemplate(
|
54 |
+
input_variables=["chat_history", "user_msg"],
|
55 |
+
template=template
|
56 |
+
)
|
57 |
+
memory = ConversationBufferMemory(memory_key="chat_history")
|
58 |
+
llm_chain = LLMChain(
|
59 |
+
llm=OpenAI(model_name="gpt-3.5-turbo", temperature=0.8, openai_api_key=os.getenv('OPENAI_API_KEY')),
|
60 |
+
prompt=prompt,
|
61 |
+
verbose=False,
|
62 |
+
memory=memory,
|
63 |
+
)
|
64 |
+
|
65 |
+
answer = llm_chain.predict(user_msg=f"{full_result_string} ---\n\n {userinput}")
|
66 |
+
print("Bot:", answer)
|
67 |
+
time.sleep(0.5)
|
68 |
+
|
69 |
+
while True:
|
70 |
+
follow_up = input("Anything else you want to ask about this topic?")
|
71 |
+
print("User:", follow_up)
|
72 |
+
answer = llm_chain.predict(
|
73 |
+
user_msg=follow_up
|
74 |
+
)
|
75 |
+
print("Bot:", answer)
|
76 |
+
time.sleep(0.5)
|
database.py
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import redis
|
2 |
+
import os
|
3 |
+
from dotenv import load_dotenv
|
4 |
+
|
5 |
+
load_dotenv()
|
6 |
+
redis_key = os.getenv('REDIS_KEY')
|
7 |
+
|
8 |
+
|
9 |
+
|
10 |
+
redis_conn = redis.Redis(
|
11 |
+
host='redis-10923.c10.us-east-1-4.ec2.cloud.redislabs.com',
|
12 |
+
port=10923,
|
13 |
+
password=redis_key)
|
14 |
+
|
15 |
+
print('connected to redis')
|
preprocess.py
ADDED
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from langchain.embeddings import OpenAIEmbeddings
|
2 |
+
import os
|
3 |
+
import pandas as pd
|
4 |
+
import numpy as np
|
5 |
+
from dotenv import load_dotenv
|
6 |
+
from database import redis_conn
|
7 |
+
from utilities import create_flat_index, load_vectors
|
8 |
+
|
9 |
+
load_dotenv()
|
10 |
+
openai_api_key = os.getenv("OPENAI_API_KEY")
|
11 |
+
|
12 |
+
#set maximum length for text fields
|
13 |
+
MAX_TEXT_LENGTH = 512
|
14 |
+
|
15 |
+
def auto_truncate(text:str):
|
16 |
+
return text[0:MAX_TEXT_LENGTH]
|
17 |
+
|
18 |
+
data = pd.read_csv('product_data.csv',converters={'bullet_point':auto_truncate,'item_keywords':auto_truncate,'item_name':auto_truncate})
|
19 |
+
data['primary_key'] = data['item_id'] + '-' + data['domain_name']
|
20 |
+
data.drop(columns=['item_id','domain_name'],inplace=True)
|
21 |
+
data['item_keywords'].replace('',np.nan,inplace=True)
|
22 |
+
data.dropna(subset=['item_keywords'],inplace=True)
|
23 |
+
data.reset_index(drop=True, inplace=True)
|
24 |
+
data_metadata = data.head(500).to_dict(orient='index')
|
25 |
+
|
26 |
+
#generating embeddings (vectors) for the item keywords
|
27 |
+
# embedding_model = SentenceTransformer('sentence-transformers/all-distilroberta-v1')
|
28 |
+
embedding_model = OpenAIEmbeddings(openai_api_key=openai_api_key)
|
29 |
+
|
30 |
+
#get the item keywords attribute for each product and encode them into vector embeddings
|
31 |
+
item_keywords = [data_metadata[i]['item_keywords'] for i in data_metadata.keys()]
|
32 |
+
item_keywords_vectors = [embedding_model.embed_query(item) for item in item_keywords]
|
33 |
+
|
34 |
+
TEXT_EMBEDDING_DIMENSION=768
|
35 |
+
NUMBER_PRODUCTS=500
|
36 |
+
|
37 |
+
print ('Loading and Indexing + ' + str(NUMBER_PRODUCTS) + ' products')
|
38 |
+
#flush all data
|
39 |
+
redis_conn.flushall()
|
40 |
+
#create flat index & load vectors
|
41 |
+
create_flat_index(redis_conn,NUMBER_PRODUCTS,TEXT_EMBEDDING_DIMENSION,'COSINE')
|
42 |
+
load_vectors(redis_conn,data_metadata,item_keywords_vectors)
|
43 |
+
|
44 |
+
|
45 |
+
|
46 |
+
|
47 |
+
|
48 |
+
|
readme.md
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
An ***e-commerce chatBot*** which goes through the Amazon dataset products and suggests the most suitable goods according to the user needs.
|
2 |
+
By utilizing the power of product embeddings and large language models exploiting Langchain and Redis technologies, this chatbot acts as a real salesperson, can understand the client's request and efficiently search for relevant product recommendations based on the user description and present them in an engaging and informative manner.
|
3 |
+
**link to download the Amazon product dataset** : https://drive.google.com/file/d/1tHWB6u3yQCuAgOYc-DxtZ8Mru3uV5_lj/view
|
requirements.txt
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
langchain == 0.0.242
|
2 |
+
openai == 0.27.8
|
3 |
+
redis == 5.0.1
|
4 |
+
pandas == 2.0.3
|
5 |
+
sentence-transformers == 2.2.2
|
utilities.py
ADDED
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from redis import Redis
|
2 |
+
from redis.commands.search.field import VectorField
|
3 |
+
from redis.commands.search.field import TextField
|
4 |
+
from redis.commands.search.field import TagField
|
5 |
+
from redis.commands.search.result import Result
|
6 |
+
import numpy as np
|
7 |
+
|
8 |
+
def load_vectors(client:Redis, product_metadata, vector_dict):
|
9 |
+
p = client.pipeline(transaction=False)
|
10 |
+
for index in product_metadata.keys():
|
11 |
+
#hash key
|
12 |
+
key='product:'+ str(index)+ ':' + product_metadata[index]['primary_key']
|
13 |
+
|
14 |
+
#hash values
|
15 |
+
item_metadata = product_metadata[index]
|
16 |
+
item_keywords_vector = np.array(vector_dict[index], dtype=np.float32).tobytes()
|
17 |
+
item_metadata['item_vector']=item_keywords_vector
|
18 |
+
|
19 |
+
# HSET
|
20 |
+
p.hset(key,mapping=item_metadata)
|
21 |
+
|
22 |
+
p.execute()
|
23 |
+
|
24 |
+
def create_flat_index (redis_conn, number_of_vectors, vector_dimensions=512, distance_metric='L2'):
|
25 |
+
redis_conn.ft().create_index([
|
26 |
+
VectorField('item_vector', "FLAT", {"TYPE": "FLOAT32", "DIM": vector_dimensions, "DISTANCE_METRIC": distance_metric, "INITIAL_CAP": number_of_vectors, "BLOCK_SIZE":number_of_vectors }),
|
27 |
+
TagField("product_type"),
|
28 |
+
TextField("item_name"),
|
29 |
+
TextField("item_keywords"),
|
30 |
+
TagField("country")
|
31 |
+
])
|
32 |
+
|