vhr1007
commited on
Commit
·
ebf9d7d
1
Parent(s):
a4a70d5
pptx format included
Browse files- requirements.txt +1 -0
- utils/embedding_utils.py +1 -1
requirements.txt
CHANGED
@@ -84,6 +84,7 @@ python-dateutil==2.9.0.post0
|
|
84 |
python-docx==0.8.11
|
85 |
python-dotenv==1.0.1
|
86 |
python-multipart==0.0.9
|
|
|
87 |
pytz==2024.1
|
88 |
PyYAML==6.0.1
|
89 |
qdrant-client==1.10.1
|
|
|
84 |
python-docx==0.8.11
|
85 |
python-dotenv==1.0.1
|
86 |
python-multipart==0.0.9
|
87 |
+
python-pptx==0.6.21
|
88 |
pytz==2024.1
|
89 |
PyYAML==6.0.1
|
90 |
qdrant-client==1.10.1
|
utils/embedding_utils.py
CHANGED
@@ -59,7 +59,7 @@ def read_document(file_content: bytes, file_id: int, file_format: str) -> str:
|
|
59 |
text_content = extract_text_from_docx(file_path)
|
60 |
elif file_format.lower() == 'pdf':
|
61 |
text_content = extract_text_from_pdf(file_path)
|
62 |
-
elif file_format.lower() in ['txt', 'md', 'csv', 'xlsx']:
|
63 |
reader = SimpleDirectoryReader(input_files=[file_path])
|
64 |
documents = reader.load_data()
|
65 |
text_content = documents[0].text if documents else ''
|
|
|
59 |
text_content = extract_text_from_docx(file_path)
|
60 |
elif file_format.lower() == 'pdf':
|
61 |
text_content = extract_text_from_pdf(file_path)
|
62 |
+
elif file_format.lower() in ['txt', 'md', 'csv', 'xlsx','pptx']:
|
63 |
reader = SimpleDirectoryReader(input_files=[file_path])
|
64 |
documents = reader.load_data()
|
65 |
text_content = documents[0].text if documents else ''
|