ashok2216 commited on
Commit
d051bce
·
verified ·
1 Parent(s): 1788a8d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +36 -22
app.py CHANGED
@@ -66,12 +66,19 @@ def setup_chromadb():
66
 
67
 
68
  # Step 2: Extract Text from PDF
69
- def extract_text_from_pdf(pdf_path):
70
- pdf_text = ""
71
- with fitz.open(pdf_path) as doc:
 
 
 
 
 
 
 
72
  for page in doc:
73
- pdf_text += page.get_text()
74
- return pdf_text
75
 
76
  # Step 3: Add Extracted Text to Vector Database
77
  def add_pdf_text_to_db(collection, pdf_text):
@@ -106,23 +113,30 @@ def main():
106
  # File upload
107
  uploaded_file = st.file_uploader("Upload your PDF file", type="pdf")
108
  if uploaded_file:
109
- st.write("Extracting text and populating the database...")
110
- pdf_text = extract_text_from_pdf(uploaded_file)
111
- add_pdf_text_to_db(collection, pdf_text)
112
- st.success("PDF text has been added to the database. You can now query it!")
113
-
114
- # Query Input
115
- query = st.text_input("Enter your query about the PDF:")
116
- if query:
117
- try:
118
- answer, metadata = query_pdf_data(collection, query, retriever_model)
119
- st.subheader("Answer:")
120
- st.write(answer[0]['generated_text'])
121
- st.subheader("Retrieved Context:")
122
- for meta in metadata[0]:
123
- st.write(meta)
124
- except Exception as e:
125
- st.error(f"An error occurred: {str(e)}")
 
 
 
 
 
 
 
126
 
127
  if __name__ == "__main__":
128
  main()
 
66
 
67
 
68
  # Step 2: Extract Text from PDF
69
+ # def extract_text_from_pdf(pdf_path):
70
+ # pdf_text = ""
71
+ # with fitz.open(pdf_path) as doc:
72
+ # for page in doc:
73
+ # pdf_text += page.get_text()
74
+ # return pdf_text
75
+
76
+ def extract_text_from_pdf(uploaded_file):
77
+ with fitz.open(stream=uploaded_file.read(), filetype="pdf") as doc:
78
+ text = ""
79
  for page in doc:
80
+ text += page.get_text()
81
+ return text
82
 
83
  # Step 3: Add Extracted Text to Vector Database
84
  def add_pdf_text_to_db(collection, pdf_text):
 
113
  # File upload
114
  uploaded_file = st.file_uploader("Upload your PDF file", type="pdf")
115
  if uploaded_file:
116
+ try:
117
+ pdf_text = extract_text_from_pdf(uploaded_file)
118
+ st.success("Text extracted successfully!")
119
+ st.text_area("Extracted Text:", pdf_text, height=300)
120
+ except Exception as e:
121
+ st.error(f"Error extracting text: {e}")
122
+ # if uploaded_file:
123
+ # st.write("Extracting text and populating the database...")
124
+ # pdf_text = extract_text_from_pdf(uploaded_file)
125
+ # add_pdf_text_to_db(collection, pdf_text)
126
+ # st.success("PDF text has been added to the database. You can now query it!")
127
+
128
+ # # Query Input
129
+ # query = st.text_input("Enter your query about the PDF:")
130
+ # if query:
131
+ # try:
132
+ # answer, metadata = query_pdf_data(collection, query, retriever_model)
133
+ # st.subheader("Answer:")
134
+ # st.write(answer[0]['generated_text'])
135
+ # st.subheader("Retrieved Context:")
136
+ # for meta in metadata[0]:
137
+ # st.write(meta)
138
+ # except Exception as e:
139
+ # st.error(f"An error occurred: {str(e)}")
140
 
141
  if __name__ == "__main__":
142
  main()