File size: 2,204 Bytes
e63103b
 
 
 
 
 
 
3f199c2
68d3cc8
0870c96
e63103b
3f199c2
e63103b
a37a365
3f199c2
e63103b
0870c96
 
b700f35
0870c96
e63103b
b700f35
 
 
 
 
 
 
 
 
0870c96
3f199c2
0870c96
 
 
 
e63103b
 
3f199c2
0870c96
3f199c2
0870c96
 
e63103b
 
e79797a
 
e63103b
 
 
 
1a93363
 
8b9c100
 
869c9f9
 
8b9c100
3f199c2
8b9c100
fc85f25
8b9c100
e63103b
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
from langchain_community.document_loaders import PyPDFLoader
import os
from langchain_openai import ChatOpenAI
from langchain_chroma import Chroma
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain_huggingface import HuggingFaceEndpoint, HuggingFaceEmbeddings
from setup.environment import default_model
from uuid import uuid4


os.environ.get("OPENAI_API_KEY")
os.environ.get("HUGGINGFACEHUB_API_TOKEN")
embeddings_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")

allIds = []

def getPDF(file_paths):
  documentId = 0
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
  pages = []
  for file in file_paths:
    loader = PyPDFLoader(file, extract_images=False)
    pagesDoc = loader.load_and_split(text_splitter)
    pages = pages + pagesDoc
    
    
  # loader = PyPDFLoader(file_paths, extract_images=False)
  # pages = loader.load_and_split(text_splitter)
  for page in pages:
    print('\n')
    print('allIds: ', allIds)
    documentId = str(uuid4())
    allIds.append(documentId)
    page.id = documentId
  return pages

def create_retriever(documents, vectorstore):
  print('\n\n')
  print('documents: ', documents[:2])

  vectorstore.add_documents(documents=documents)

  retriever = vectorstore.as_retriever(
      # search_type="similarity",
      # search_kwargs={"k": 3},
  )
  
  return retriever

def create_prompt_llm_chain(system_prompt, modelParam):
  if modelParam == default_model:
    model = ChatOpenAI(model=modelParam)
  else:
    model = HuggingFaceEndpoint(
        repo_id=modelParam,
        task="text-generation",
        # max_new_tokens=100,
        do_sample=False,
        huggingfacehub_api_token=os.environ.get("HUGGINGFACEHUB_API_TOKEN")
    )

  system_prompt = system_prompt + "\n\n" + "{context}"
  prompt = ChatPromptTemplate.from_messages(
      [
          ("system", system_prompt),
          ("human", "{input}"),
      ]
  )
  question_answer_chain = create_stuff_documents_chain(model, prompt)
  return question_answer_chain