vhr1007 commited on
Commit
ebf9d7d
·
1 Parent(s): a4a70d5

pptx format included

Browse files
Files changed (2) hide show
  1. requirements.txt +1 -0
  2. utils/embedding_utils.py +1 -1
requirements.txt CHANGED
@@ -84,6 +84,7 @@ python-dateutil==2.9.0.post0
84
  python-docx==0.8.11
85
  python-dotenv==1.0.1
86
  python-multipart==0.0.9
 
87
  pytz==2024.1
88
  PyYAML==6.0.1
89
  qdrant-client==1.10.1
 
84
  python-docx==0.8.11
85
  python-dotenv==1.0.1
86
  python-multipart==0.0.9
87
+ python-pptx==0.6.21
88
  pytz==2024.1
89
  PyYAML==6.0.1
90
  qdrant-client==1.10.1
utils/embedding_utils.py CHANGED
@@ -59,7 +59,7 @@ def read_document(file_content: bytes, file_id: int, file_format: str) -> str:
59
  text_content = extract_text_from_docx(file_path)
60
  elif file_format.lower() == 'pdf':
61
  text_content = extract_text_from_pdf(file_path)
62
- elif file_format.lower() in ['txt', 'md', 'csv', 'xlsx']:
63
  reader = SimpleDirectoryReader(input_files=[file_path])
64
  documents = reader.load_data()
65
  text_content = documents[0].text if documents else ''
 
59
  text_content = extract_text_from_docx(file_path)
60
  elif file_format.lower() == 'pdf':
61
  text_content = extract_text_from_pdf(file_path)
62
+ elif file_format.lower() in ['txt', 'md', 'csv', 'xlsx','pptx']:
63
  reader = SimpleDirectoryReader(input_files=[file_path])
64
  documents = reader.load_data()
65
  text_content = documents[0].text if documents else ''