from langchain_community.document_loaders import PyPDFLoader import os from langchain_openai import ChatOpenAI from langchain_chroma import Chroma from langchain_openai import OpenAIEmbeddings from langchain_text_splitters import RecursiveCharacterTextSplitter from langchain.chains.combine_documents import create_stuff_documents_chain from langchain_core.prompts import ChatPromptTemplate from langchain_huggingface import HuggingFaceEndpoint os.environ.get("OPENAI_API_KEY") def getPDF(file_path="./nike.pdf"): text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200) loader = PyPDFLoader(file_path, extract_images=False) pages = loader.load_and_split(text_splitter) return pages def create_retriever(documents): vectorstore = Chroma.from_documents( documents, embedding=OpenAIEmbeddings(), ) retriever = vectorstore.as_retriever( search_type="similarity", search_kwargs={"k": 1}, ) return retriever def create_prompt_llm_chain(system_prompt, modelParam="gpt-4o-mini"): print('model: ', model) if modelParam == "gpt-4o-mini": model = ChatOpenAI(model=modelParam) else: os.environ.get("HUGGINGFACEHUB_API_TOKEN") llm = HuggingFaceEndpoint( repo_id="meta-llama/Meta-Llama-3-8B-Instruct", task="text-generation", max_new_tokens=100, do_sample=False, ) result = llm.invoke("Hugging Face is") print('result: ', result) system_prompt = system_prompt + "\n\n" + "{context}" prompt = ChatPromptTemplate.from_messages( [ ("system", system_prompt), ("human", "{input}"), ] ) question_answer_chain = create_stuff_documents_chain(model, prompt) return question_answer_chain