ashok2216 commited on
Commit
e4a6244
·
verified ·
1 Parent(s): 8d71f5d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +1 -90
app.py CHANGED
@@ -1,93 +1,3 @@
1
- # import chromadb
2
- # from chromadb.utils import embedding_functions
3
- # from chromadb.config import Settings
4
- # from sentence_transformers import SentenceTransformer
5
- # from transformers import pipeline
6
- # import streamlit as st
7
- # import fitz # PyMuPDF for PDF parsing
8
-
9
-
10
-
11
- # # Configure ChromaDB with persistent SQLite database
12
- # config = Settings(
13
- # persist_directory="./chromadb_data",
14
- # chroma_db_impl="sqlite",
15
- # )
16
-
17
- # # Initialize persistent client with SQLite
18
- # def setup_chromadb():
19
- # client = chromadb.PersistentClient(path="./chromadb_data")
20
- # collection = client.get_or_create_collection(
21
- # name="pdf_data",
22
- # embedding_function=chromadb.utils.embedding_functions.SentenceTransformerEmbeddingFunction(
23
- # model_name="sentence-transformers/all-MiniLM-L6-v2"
24
- # ),
25
- # )
26
- # return client, collection
27
-
28
- # def extract_text_from_pdf(uploaded_file):
29
- # with fitz.open(stream=uploaded_file.read(), filetype="pdf") as doc:
30
- # text = ""
31
- # for page in doc:
32
- # text += page.get_text()
33
- # return text
34
-
35
- # def add_pdf_text_to_db(collection, pdf_text):
36
- # sentences = pdf_text.split("\n") # Split text into lines for granularity
37
- # for idx, sentence in enumerate(sentences):
38
- # if sentence.strip(): # Avoid empty lines
39
- # collection.add(
40
- # ids=[f"pdf_text_{idx}"],
41
- # documents=[sentence],
42
- # metadatas={"line_number": idx, "text": sentence}
43
- # )
44
-
45
- # def query_pdf_data(collection, query, retriever_model):
46
- # results = collection.query(
47
- # query_texts=[query],
48
- # n_results=3
49
- # )
50
- # context = " ".join([doc for doc in results["documents"][0]])
51
- # answer = retriever_model(f"Context: {context}\nQuestion: {query}")
52
- # return answer, results["metadatas"]
53
-
54
- # # Streamlit Interface
55
- # def main():
56
- # st.title("PDF Chatbot with Retrieval-Augmented Generation")
57
- # st.write("Upload a PDF, and ask questions about its content!")
58
-
59
- # # Initialize components
60
- # client, collection = setup_chromadb()
61
- # retriever_model = pipeline("text2text-generation", model="google/flan-t5-small") # Free LLM
62
-
63
- # # File upload
64
- # uploaded_file = st.file_uploader("Upload your PDF file", type="pdf")
65
- # if uploaded_file:
66
- # try:
67
- # pdf_text = extract_text_from_pdf(uploaded_file)
68
- # st.success("Text extracted successfully!")
69
- # st.text_area("Extracted Text:", pdf_text, height=300)
70
- # add_pdf_text_to_db(collection, pdf_text)
71
- # st.success("PDF text has been added to the database. You can now query it!")
72
- # except Exception as e:
73
- # st.error(f"Error extracting text: {e}")
74
-
75
- # query = st.text_input("Enter your query about the PDF:")
76
- # if query:
77
- # try:
78
- # answer, metadata = query_pdf_data(collection, query, retriever_model)
79
- # st.subheader("Answer:")
80
- # st.write(answer[0]['generated_text'])
81
- # st.subheader("Retrieved Context:")
82
- # for meta in metadata[0]:
83
- # st.write(meta)
84
- # except Exception as e:
85
- # st.error(f"An error occurred: {str(e)}")
86
-
87
-
88
- # if __name__ == "__main__":
89
- # main()
90
-
91
  import chromadb
92
  from chromadb.utils import embedding_functions
93
  from chromadb.config import Settings
@@ -173,6 +83,7 @@ def main():
173
  st.text_area("Extracted Text:", pdf_text, height=300)
174
  add_pdf_text_to_db(collection, pdf_text)
175
  st.success("PDF text has been added to the database. You can now query it!")
 
176
  except Exception as e:
177
  st.error(f"Error extracting text: {e}")
178
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import chromadb
2
  from chromadb.utils import embedding_functions
3
  from chromadb.config import Settings
 
83
  st.text_area("Extracted Text:", pdf_text, height=300)
84
  add_pdf_text_to_db(collection, pdf_text)
85
  st.success("PDF text has been added to the database. You can now query it!")
86
+
87
  except Exception as e:
88
  st.error(f"Error extracting text: {e}")
89