Spaces:

Vasanth
/

Dummy_Researcher

Sleeping

Dummy_Researcher / researcher.py

Vasanth

Researcher Done

4d8deb8 12 months ago

3.25 kB

	from config import *
	import os
	from dotenv import load_dotenv, find_dotenv
	import json
	import requests
	from langchain_groq import ChatGroq
	from langchain.text_splitter import RecursiveCharacterTextSplitter
	from langchain.chains import RetrievalQA
	from langchain.prompts import PromptTemplate
	from langchain.document_loaders.url import UnstructuredURLLoader
	from langchain.vectorstores.faiss import FAISS
	from langchain_community.embeddings import HuggingFaceEmbeddings
	import os
	load_dotenv(find_dotenv())
	from langchain.globals import set_debug

	set_debug(True)

	class Researcher:

	def __init__(self):
	self.serper_api_key = os.getenv("SERPER_API_KEY")
	self.groq_api_key = os.getenv("GROQ_API_KEY")
	self.prompt_template = PromptTemplate(
	template=PROMPT_TEMPLATE,
	input_variables=INPUT_VARIABLES
	)
	self.text_splitter = RecursiveCharacterTextSplitter(
	separators=SEPARATORS,
	chunk_size=CHUNK_SIZE,
	chunk_overlap=CHUNK_OVERLAP
	)
	self.llm = ChatGroq(temperature=0.5, model_name="mixtral-8x7b-32768", groq_api_key=self.groq_api_key)
	self.hfembeddings = HuggingFaceEmbeddings(
	model_name=EMBEDDER,
	model_kwargs={'device': 'cpu'}
	)

	def search_articles(self, query):

	url = "https://google.serper.dev/search"
	data = json.dumps({"q":query})

	headers = {
	'X-API-KEY': self.serper_api_key,
	'Content-Type': 'application/json'
	}

	response = requests.request("POST", url, headers=headers, data=data)

	return response.json()

	def research_answerer(self):

	research_qa_chain = RetrievalQA.from_chain_type(
	llm=self.llm,
	chain_type=CHAIN_TYPE,
	retriever= self.db.as_retriever(search_kwargs=SEARCH_KWARGS),
	return_source_documents=True,
	verbose=True,
	chain_type_kwargs={"prompt": self.prompt_template}
	)
	return research_qa_chain

	def get_urls(self, articles):
	urls = []
	try:
	urls.append(articles["answerBox"]["link"])
	except:
	pass
	for i in range(0, min(3, len(articles["organic"]))):
	urls.append(articles["organic"][i]["link"])
	return urls

	def get_content_from_urls(self, urls):
	loader = UnstructuredURLLoader(urls=urls)
	research_content = loader.load()
	return research_content

	def research_given_query(self, research_objective, research_content):

	docs = self.text_splitter.split_documents(research_content)
	self.db = FAISS.from_documents(documents=docs, embedding=self.hfembeddings)
	bot = self.research_answerer()
	research_out =bot({"query": research_objective})
	return research_out["result"]

	def research(self, query):
	search_articles = self.search_articles(query)
	urls = self.get_urls(search_articles)
	research_content = self.get_content_from_urls(urls)
	answer = self.research_given_query(query, research_content)
	return answer