nikhildsst commited on
Commit
339a135
·
verified ·
1 Parent(s): 9b7d715

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -37
app.py CHANGED
@@ -1,23 +1,36 @@
1
- import gradio as gr
2
- import pandas as pd
3
- import os
4
  from langchain_community.vectorstores import FAISS
5
  from langchain_community.embeddings import OpenAIEmbeddings
6
  from langchain.prompts import PromptTemplate
7
  from langchain.chains import RetrievalQAWithSourcesChain
8
- from langchain_openai import OpenAI
 
 
 
 
9
  from langchain.docstore.document import Document
 
 
10
 
11
  # Setting OpenAI API key
12
- os.environ["OPENAI_API_KEY"] = "openai_api_key"
13
-
14
 
15
- # Sample Data (Replace with actual API or scraping code for real-time data from Analytics Vidhya)
16
  course_data = [
17
- {"title": "Introduction to Data Science", "description": "Learn the basics of data science including Python, statistics, and visualization.", "curriculum": "Python basics, statistics, visualization, case studies"},
18
- {"title": "Machine Learning Basics", "description": "Understand the fundamentals of machine learning algorithms and their applications.", "curriculum": "Supervised learning, unsupervised learning, regression, classification"},
19
- {"title": "Deep Learning Essentials", "description": "Dive into deep learning concepts including neural networks and TensorFlow.", "curriculum": "Neural networks, TensorFlow basics, image classification"},
20
- # Add more courses here
 
 
 
 
 
 
 
 
 
 
 
21
  ]
22
 
23
  # Convert the course data into a DataFrame
@@ -26,13 +39,14 @@ df = pd.DataFrame(course_data)
26
  # Combine title, description, and curriculum into a single searchable text column
27
  df["combined_text"] = df["title"] + " " + df["description"] + " " + df["curriculum"]
28
 
29
- # Step 1: Generate Embeddings for the Data
30
- embedding_model = OpenAIEmbeddings(model="text-embedding-ada-002") # Use advanced embeddings model
31
 
32
  # Generate embeddings for the combined text
33
  course_embeddings = embedding_model.embed_documents(df["combined_text"].tolist())
34
 
35
- # Step 2: Store the Embeddings in a Vector Database (FAISS)
 
36
  documents = [
37
  Document(
38
  page_content=text,
@@ -41,19 +55,14 @@ documents = [
41
  for i, text in enumerate(df["combined_text"].tolist())
42
  ]
43
 
44
- # Create the FAISS vector store
45
  vector_store = FAISS.from_documents(documents, embedding_model)
46
 
47
- # Step 3: Build the Smart Search System
48
  prompt_template = PromptTemplate(
49
  input_variables=["context", "question"],
50
- template="""Use the following context to answer the question. Please be precise and use the information from the course details.
51
- Context: {context}
52
- Question: {question}
53
- Answer:"""
54
  )
55
 
56
- # Setup the retriever and QA chain with sources
57
  retriever = vector_store.as_retriever()
58
  qa_chain = RetrievalQAWithSourcesChain.from_chain_type(
59
  llm=OpenAI(temperature=0),
@@ -62,28 +71,19 @@ qa_chain = RetrievalQAWithSourcesChain.from_chain_type(
62
  return_source_documents=True
63
  )
64
 
65
- # Step 4: Gradio Interface with Categories and Result Display
66
  def smart_search(query):
67
  result = qa_chain({"question": query})
68
- answer = result['answer']
69
- sources = "\n".join([doc.metadata["source"] for doc in result['source_documents']])
70
- return f"Answer: {answer}\n\nSource(s): {sources}"
71
 
72
- # Enhance the user interface
73
  iface = gr.Interface(
74
  fn=smart_search,
75
- inputs=[
76
- gr.Textbox(label="Ask a Question", placeholder="Enter your question here..."),
77
- ],
78
- outputs=[
79
- gr.Textbox(label="Answer", placeholder="Here will be your search result..."),
80
- gr.Textbox(label="Sources", placeholder="Related courses will be listed here...", interactive=False),
81
- ],
82
- live=True,
83
- title="Smart Course Search Tool",
84
- description="This tool allows you to find relevant free courses on Analytics Vidhya platform. Ask any question related to the courses to get the most relevant results.",
85
- theme="huggingface",
86
  )
87
 
88
  if __name__ == "__main__":
89
  iface.launch()
 
 
 
 
 
1
  from langchain_community.vectorstores import FAISS
2
  from langchain_community.embeddings import OpenAIEmbeddings
3
  from langchain.prompts import PromptTemplate
4
  from langchain.chains import RetrievalQAWithSourcesChain
5
+ from langchain_openai import OpenAIEmbeddings, OpenAI
6
+ # Gradio imports
7
+ import gradio as gr
8
+
9
+
10
  from langchain.docstore.document import Document
11
+ import pandas as pd
12
+ import os
13
 
14
  # Setting OpenAI API key
15
+ os.environ["OPENAI_API_KEY"] = "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"
 
16
 
17
+ # Step 1: Load Course Data (as an example dataset)
18
  course_data = [
19
+ {
20
+ "title": "Introduction to Data Science",
21
+ "description": "Learn the basics of data science including Python, statistics, and visualization.",
22
+ "curriculum": "Python basics, statistics, visualization, case studies"
23
+ },
24
+ {
25
+ "title": "Machine Learning Basics",
26
+ "description": "Understand the fundamentals of machine learning algorithms and their applications.",
27
+ "curriculum": "Supervised learning, unsupervised learning, regression, classification"
28
+ },
29
+ {
30
+ "title": "Deep Learning Essentials",
31
+ "description": "Dive into deep learning concepts including neural networks and TensorFlow.",
32
+ "curriculum": "Neural networks, TensorFlow basics, image classification"
33
+ }
34
  ]
35
 
36
  # Convert the course data into a DataFrame
 
39
  # Combine title, description, and curriculum into a single searchable text column
40
  df["combined_text"] = df["title"] + " " + df["description"] + " " + df["curriculum"]
41
 
42
+ # Step 2: Generate Embeddings for the Data
43
+ embedding_model = OpenAIEmbeddings()
44
 
45
  # Generate embeddings for the combined text
46
  course_embeddings = embedding_model.embed_documents(df["combined_text"].tolist())
47
 
48
+
49
+ # Step 3: Store the Embeddings in a Vector Database (FAISS)
50
  documents = [
51
  Document(
52
  page_content=text,
 
55
  for i, text in enumerate(df["combined_text"].tolist())
56
  ]
57
 
 
58
  vector_store = FAISS.from_documents(documents, embedding_model)
59
 
60
+ # Step 4: Build the Smart Search System
61
  prompt_template = PromptTemplate(
62
  input_variables=["context", "question"],
63
+ template="Use the following context to answer the question.\nContext: {context}\nQuestion: {question}\nAnswer:"
 
 
 
64
  )
65
 
 
66
  retriever = vector_store.as_retriever()
67
  qa_chain = RetrievalQAWithSourcesChain.from_chain_type(
68
  llm=OpenAI(temperature=0),
 
71
  return_source_documents=True
72
  )
73
 
74
+ # Step 5: Gradio Interface
75
  def smart_search(query):
76
  result = qa_chain({"question": query})
77
+ return result['answer']
 
 
78
 
79
+ # Creating a Gradio interface
80
  iface = gr.Interface(
81
  fn=smart_search,
82
+ inputs=gr.Textbox(label="Ask a Question", placeholder="Enter your question here..."),
83
+ outputs=gr.Textbox(label="Answer"),
84
+ live=True
 
 
 
 
 
 
 
 
85
  )
86
 
87
  if __name__ == "__main__":
88
  iface.launch()
89
+