Spaces:

nikhildsst
/

smart_search_projects

Running

App Files Files Community

nikhildsst commited on Jan 7

Commit

339a135

verified ·

1 Parent(s): 9b7d715

Update app.py

Browse files

Files changed (1) hide show

app.py +37 -37

app.py CHANGED Viewed

@@ -1,23 +1,36 @@
-import gradio as gr
-import pandas as pd
-import os
 from langchain_community.vectorstores import FAISS
 from langchain_community.embeddings import OpenAIEmbeddings
 from langchain.prompts import PromptTemplate
 from langchain.chains import RetrievalQAWithSourcesChain
-from langchain_openai import OpenAI
 from langchain.docstore.document import Document
 # Setting OpenAI API key
-os.environ["OPENAI_API_KEY"] = "openai_api_key"
-# Sample Data (Replace with actual API or scraping code for real-time data from Analytics Vidhya)
 course_data = [
-    {"title": "Introduction to Data Science", "description": "Learn the basics of data science including Python, statistics, and visualization.", "curriculum": "Python basics, statistics, visualization, case studies"},
-    {"title": "Machine Learning Basics", "description": "Understand the fundamentals of machine learning algorithms and their applications.", "curriculum": "Supervised learning, unsupervised learning, regression, classification"},
-    {"title": "Deep Learning Essentials", "description": "Dive into deep learning concepts including neural networks and TensorFlow.", "curriculum": "Neural networks, TensorFlow basics, image classification"},
-    # Add more courses here
 ]
 # Convert the course data into a DataFrame
@@ -26,13 +39,14 @@ df = pd.DataFrame(course_data)
 # Combine title, description, and curriculum into a single searchable text column
 df["combined_text"] = df["title"] + " " + df["description"] + " " + df["curriculum"]
-# Step 1: Generate Embeddings for the Data
-embedding_model = OpenAIEmbeddings(model="text-embedding-ada-002")  # Use advanced embeddings model
 # Generate embeddings for the combined text
 course_embeddings = embedding_model.embed_documents(df["combined_text"].tolist())
-# Step 2: Store the Embeddings in a Vector Database (FAISS)
 documents = [
     Document(
         page_content=text,
@@ -41,19 +55,14 @@ documents = [
     for i, text in enumerate(df["combined_text"].tolist())
 ]
-# Create the FAISS vector store
 vector_store = FAISS.from_documents(documents, embedding_model)
-# Step 3: Build the Smart Search System
 prompt_template = PromptTemplate(
     input_variables=["context", "question"],
-    template="""Use the following context to answer the question. Please be precise and use the information from the course details.
-Context: {context}
-Question: {question}
-Answer:"""
 )
-# Setup the retriever and QA chain with sources
 retriever = vector_store.as_retriever()
 qa_chain = RetrievalQAWithSourcesChain.from_chain_type(
     llm=OpenAI(temperature=0),
@@ -62,28 +71,19 @@ qa_chain = RetrievalQAWithSourcesChain.from_chain_type(
     return_source_documents=True
 )
-# Step 4: Gradio Interface with Categories and Result Display
 def smart_search(query):
     result = qa_chain({"question": query})
-    answer = result['answer']
-    sources = "\n".join([doc.metadata["source"] for doc in result['source_documents']])
-    return f"Answer: {answer}\n\nSource(s): {sources}"
-# Enhance the user interface
 iface = gr.Interface(
     fn=smart_search,
-    inputs=[
-        gr.Textbox(label="Ask a Question", placeholder="Enter your question here..."),
-    ],
-    outputs=[
-        gr.Textbox(label="Answer", placeholder="Here will be your search result..."),
-        gr.Textbox(label="Sources", placeholder="Related courses will be listed here...", interactive=False),
-    ],
-    live=True,
-    title="Smart Course Search Tool",
-    description="This tool allows you to find relevant free courses on Analytics Vidhya platform. Ask any question related to the courses to get the most relevant results.",
-    theme="huggingface",
 )
 if __name__ == "__main__":
     iface.launch()

 from langchain_community.vectorstores import FAISS
 from langchain_community.embeddings import OpenAIEmbeddings
 from langchain.prompts import PromptTemplate
 from langchain.chains import RetrievalQAWithSourcesChain
+from langchain_openai import OpenAIEmbeddings, OpenAI
+# Gradio imports
+import gradio as gr
 from langchain.docstore.document import Document
+import pandas as pd
+import os
 # Setting OpenAI API key
+os.environ["OPENAI_API_KEY"] = "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"
+# Step 1: Load Course Data (as an example dataset)
 course_data = [
+    {
+        "title": "Introduction to Data Science",
+        "description": "Learn the basics of data science including Python, statistics, and visualization.",
+        "curriculum": "Python basics, statistics, visualization, case studies"
+    },
+    {
+        "title": "Machine Learning Basics",
+        "description": "Understand the fundamentals of machine learning algorithms and their applications.",
+        "curriculum": "Supervised learning, unsupervised learning, regression, classification"
+    },
+    {
+        "title": "Deep Learning Essentials",
+        "description": "Dive into deep learning concepts including neural networks and TensorFlow.",
+        "curriculum": "Neural networks, TensorFlow basics, image classification"
+    }
 ]
 # Convert the course data into a DataFrame
 # Combine title, description, and curriculum into a single searchable text column
 df["combined_text"] = df["title"] + " " + df["description"] + " " + df["curriculum"]
+# Step 2: Generate Embeddings for the Data
+embedding_model = OpenAIEmbeddings()
 # Generate embeddings for the combined text
 course_embeddings = embedding_model.embed_documents(df["combined_text"].tolist())
+# Step 3: Store the Embeddings in a Vector Database (FAISS)
 documents = [
     Document(
         page_content=text,
     for i, text in enumerate(df["combined_text"].tolist())
 ]
 vector_store = FAISS.from_documents(documents, embedding_model)
+# Step 4: Build the Smart Search System
 prompt_template = PromptTemplate(
     input_variables=["context", "question"],
+    template="Use the following context to answer the question.\nContext: {context}\nQuestion: {question}\nAnswer:"
 )
 retriever = vector_store.as_retriever()
 qa_chain = RetrievalQAWithSourcesChain.from_chain_type(
     llm=OpenAI(temperature=0),
     return_source_documents=True
 )
+# Step 5: Gradio Interface
 def smart_search(query):
     result = qa_chain({"question": query})
+    return result['answer']
+# Creating a Gradio interface
 iface = gr.Interface(
     fn=smart_search,
+    inputs=gr.Textbox(label="Ask a Question", placeholder="Enter your question here..."),
+    outputs=gr.Textbox(label="Answer"),
+    live=True
 )
 if __name__ == "__main__":
     iface.launch()