Update app.py
Browse files
app.py
CHANGED
@@ -1,23 +1,36 @@
|
|
1 |
-
import gradio as gr
|
2 |
-
import pandas as pd
|
3 |
-
import os
|
4 |
from langchain_community.vectorstores import FAISS
|
5 |
from langchain_community.embeddings import OpenAIEmbeddings
|
6 |
from langchain.prompts import PromptTemplate
|
7 |
from langchain.chains import RetrievalQAWithSourcesChain
|
8 |
-
from langchain_openai import OpenAI
|
|
|
|
|
|
|
|
|
9 |
from langchain.docstore.document import Document
|
|
|
|
|
10 |
|
11 |
# Setting OpenAI API key
|
12 |
-
os.environ["OPENAI_API_KEY"] = "
|
13 |
-
|
14 |
|
15 |
-
#
|
16 |
course_data = [
|
17 |
-
{
|
18 |
-
|
19 |
-
|
20 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
]
|
22 |
|
23 |
# Convert the course data into a DataFrame
|
@@ -26,13 +39,14 @@ df = pd.DataFrame(course_data)
|
|
26 |
# Combine title, description, and curriculum into a single searchable text column
|
27 |
df["combined_text"] = df["title"] + " " + df["description"] + " " + df["curriculum"]
|
28 |
|
29 |
-
# Step
|
30 |
-
embedding_model = OpenAIEmbeddings(
|
31 |
|
32 |
# Generate embeddings for the combined text
|
33 |
course_embeddings = embedding_model.embed_documents(df["combined_text"].tolist())
|
34 |
|
35 |
-
|
|
|
36 |
documents = [
|
37 |
Document(
|
38 |
page_content=text,
|
@@ -41,19 +55,14 @@ documents = [
|
|
41 |
for i, text in enumerate(df["combined_text"].tolist())
|
42 |
]
|
43 |
|
44 |
-
# Create the FAISS vector store
|
45 |
vector_store = FAISS.from_documents(documents, embedding_model)
|
46 |
|
47 |
-
# Step
|
48 |
prompt_template = PromptTemplate(
|
49 |
input_variables=["context", "question"],
|
50 |
-
template="
|
51 |
-
Context: {context}
|
52 |
-
Question: {question}
|
53 |
-
Answer:"""
|
54 |
)
|
55 |
|
56 |
-
# Setup the retriever and QA chain with sources
|
57 |
retriever = vector_store.as_retriever()
|
58 |
qa_chain = RetrievalQAWithSourcesChain.from_chain_type(
|
59 |
llm=OpenAI(temperature=0),
|
@@ -62,28 +71,19 @@ qa_chain = RetrievalQAWithSourcesChain.from_chain_type(
|
|
62 |
return_source_documents=True
|
63 |
)
|
64 |
|
65 |
-
# Step
|
66 |
def smart_search(query):
|
67 |
result = qa_chain({"question": query})
|
68 |
-
|
69 |
-
sources = "\n".join([doc.metadata["source"] for doc in result['source_documents']])
|
70 |
-
return f"Answer: {answer}\n\nSource(s): {sources}"
|
71 |
|
72 |
-
#
|
73 |
iface = gr.Interface(
|
74 |
fn=smart_search,
|
75 |
-
inputs=
|
76 |
-
|
77 |
-
|
78 |
-
outputs=[
|
79 |
-
gr.Textbox(label="Answer", placeholder="Here will be your search result..."),
|
80 |
-
gr.Textbox(label="Sources", placeholder="Related courses will be listed here...", interactive=False),
|
81 |
-
],
|
82 |
-
live=True,
|
83 |
-
title="Smart Course Search Tool",
|
84 |
-
description="This tool allows you to find relevant free courses on Analytics Vidhya platform. Ask any question related to the courses to get the most relevant results.",
|
85 |
-
theme="huggingface",
|
86 |
)
|
87 |
|
88 |
if __name__ == "__main__":
|
89 |
iface.launch()
|
|
|
|
|
|
|
|
|
|
1 |
from langchain_community.vectorstores import FAISS
|
2 |
from langchain_community.embeddings import OpenAIEmbeddings
|
3 |
from langchain.prompts import PromptTemplate
|
4 |
from langchain.chains import RetrievalQAWithSourcesChain
|
5 |
+
from langchain_openai import OpenAIEmbeddings, OpenAI
|
6 |
+
# Gradio imports
|
7 |
+
import gradio as gr
|
8 |
+
|
9 |
+
|
10 |
from langchain.docstore.document import Document
|
11 |
+
import pandas as pd
|
12 |
+
import os
|
13 |
|
14 |
# Setting OpenAI API key
|
15 |
+
os.environ["OPENAI_API_KEY"] = "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"
|
|
|
16 |
|
17 |
+
# Step 1: Load Course Data (as an example dataset)
|
18 |
course_data = [
|
19 |
+
{
|
20 |
+
"title": "Introduction to Data Science",
|
21 |
+
"description": "Learn the basics of data science including Python, statistics, and visualization.",
|
22 |
+
"curriculum": "Python basics, statistics, visualization, case studies"
|
23 |
+
},
|
24 |
+
{
|
25 |
+
"title": "Machine Learning Basics",
|
26 |
+
"description": "Understand the fundamentals of machine learning algorithms and their applications.",
|
27 |
+
"curriculum": "Supervised learning, unsupervised learning, regression, classification"
|
28 |
+
},
|
29 |
+
{
|
30 |
+
"title": "Deep Learning Essentials",
|
31 |
+
"description": "Dive into deep learning concepts including neural networks and TensorFlow.",
|
32 |
+
"curriculum": "Neural networks, TensorFlow basics, image classification"
|
33 |
+
}
|
34 |
]
|
35 |
|
36 |
# Convert the course data into a DataFrame
|
|
|
39 |
# Combine title, description, and curriculum into a single searchable text column
|
40 |
df["combined_text"] = df["title"] + " " + df["description"] + " " + df["curriculum"]
|
41 |
|
42 |
+
# Step 2: Generate Embeddings for the Data
|
43 |
+
embedding_model = OpenAIEmbeddings()
|
44 |
|
45 |
# Generate embeddings for the combined text
|
46 |
course_embeddings = embedding_model.embed_documents(df["combined_text"].tolist())
|
47 |
|
48 |
+
|
49 |
+
# Step 3: Store the Embeddings in a Vector Database (FAISS)
|
50 |
documents = [
|
51 |
Document(
|
52 |
page_content=text,
|
|
|
55 |
for i, text in enumerate(df["combined_text"].tolist())
|
56 |
]
|
57 |
|
|
|
58 |
vector_store = FAISS.from_documents(documents, embedding_model)
|
59 |
|
60 |
+
# Step 4: Build the Smart Search System
|
61 |
prompt_template = PromptTemplate(
|
62 |
input_variables=["context", "question"],
|
63 |
+
template="Use the following context to answer the question.\nContext: {context}\nQuestion: {question}\nAnswer:"
|
|
|
|
|
|
|
64 |
)
|
65 |
|
|
|
66 |
retriever = vector_store.as_retriever()
|
67 |
qa_chain = RetrievalQAWithSourcesChain.from_chain_type(
|
68 |
llm=OpenAI(temperature=0),
|
|
|
71 |
return_source_documents=True
|
72 |
)
|
73 |
|
74 |
+
# Step 5: Gradio Interface
|
75 |
def smart_search(query):
|
76 |
result = qa_chain({"question": query})
|
77 |
+
return result['answer']
|
|
|
|
|
78 |
|
79 |
+
# Creating a Gradio interface
|
80 |
iface = gr.Interface(
|
81 |
fn=smart_search,
|
82 |
+
inputs=gr.Textbox(label="Ask a Question", placeholder="Enter your question here..."),
|
83 |
+
outputs=gr.Textbox(label="Answer"),
|
84 |
+
live=True
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
85 |
)
|
86 |
|
87 |
if __name__ == "__main__":
|
88 |
iface.launch()
|
89 |
+
|