Spaces:
Sleeping
Sleeping
File size: 2,204 Bytes
e63103b 3f199c2 68d3cc8 0870c96 e63103b 3f199c2 e63103b a37a365 3f199c2 e63103b 0870c96 b700f35 0870c96 e63103b b700f35 0870c96 3f199c2 0870c96 e63103b 3f199c2 0870c96 3f199c2 0870c96 e63103b e79797a e63103b 1a93363 8b9c100 869c9f9 8b9c100 3f199c2 8b9c100 fc85f25 8b9c100 e63103b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 |
from langchain_community.document_loaders import PyPDFLoader
import os
from langchain_openai import ChatOpenAI
from langchain_chroma import Chroma
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain_huggingface import HuggingFaceEndpoint, HuggingFaceEmbeddings
from setup.environment import default_model
from uuid import uuid4
os.environ.get("OPENAI_API_KEY")
os.environ.get("HUGGINGFACEHUB_API_TOKEN")
embeddings_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
allIds = []
def getPDF(file_paths):
documentId = 0
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
pages = []
for file in file_paths:
loader = PyPDFLoader(file, extract_images=False)
pagesDoc = loader.load_and_split(text_splitter)
pages = pages + pagesDoc
# loader = PyPDFLoader(file_paths, extract_images=False)
# pages = loader.load_and_split(text_splitter)
for page in pages:
print('\n')
print('allIds: ', allIds)
documentId = str(uuid4())
allIds.append(documentId)
page.id = documentId
return pages
def create_retriever(documents, vectorstore):
print('\n\n')
print('documents: ', documents[:2])
vectorstore.add_documents(documents=documents)
retriever = vectorstore.as_retriever(
# search_type="similarity",
# search_kwargs={"k": 3},
)
return retriever
def create_prompt_llm_chain(system_prompt, modelParam):
if modelParam == default_model:
model = ChatOpenAI(model=modelParam)
else:
model = HuggingFaceEndpoint(
repo_id=modelParam,
task="text-generation",
# max_new_tokens=100,
do_sample=False,
huggingfacehub_api_token=os.environ.get("HUGGINGFACEHUB_API_TOKEN")
)
system_prompt = system_prompt + "\n\n" + "{context}"
prompt = ChatPromptTemplate.from_messages(
[
("system", system_prompt),
("human", "{input}"),
]
)
question_answer_chain = create_stuff_documents_chain(model, prompt)
return question_answer_chain |