Spaces:
Running
Running
File size: 2,476 Bytes
12b0dd7 b374298 cb23311 39fc36b 12b0dd7 e70ffc1 12b0dd7 b374298 12b0dd7 b374298 78209bc b374298 78209bc b374298 78209bc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 |
import tempfile, os
from typing import List
from langchain_core.documents import Document as LangchainDocument
from llama_index import Document
from llama_parse import LlamaParse, ResultType
llama_parser_keys = [
os.getenv("LLAMA_CLOUD_API_KEY_POPS"),
os.getenv("LLAMA_CLOUD_API_KEY_PEIXE"),
]
def handle_pdf_files_from_serializer(files):
listaPDFs = []
for file in files:
file.seek(0)
with tempfile.NamedTemporaryFile(
delete=False, suffix=".pdf"
) as temp_file: # Create a temporary file to save the uploaded PDF
for (
chunk
) in file.chunks(): # Write the uploaded file content to the temporary file
temp_file.write(chunk)
temp_file_path = temp_file.name # Get the path of the temporary file
listaPDFs.append(temp_file_path)
print("\n\nlistaPDFs: ", listaPDFs)
return listaPDFs
def remove_pdf_temp_files(listaPDFs):
for file in listaPDFs:
os.remove(file)
async def return_document_list_with_llama_parser(file: str):
for key in llama_parser_keys:
documents: List[LangchainDocument] = []
if key:
parser = LlamaParse(
api_key=key,
result_type=ResultType.JSON, # Options: 'text', 'markdown', 'json', 'structured'
language="pt",
verbose=True,
)
try:
parsed_document = await parser.aget_json(file)
except:
print(f"Error with llama parser key ending with {key[-4:]}")
continue # Faz com que comece o pr贸ximo loop
if len(parsed_document) == 0:
continue
for doc in parsed_document[0].get("pages"): # type: ignore
# documents.append(doc.to_langchain_format())
langchain_document = LangchainDocument(
page_content=doc.get("md"), # type: ignore
metadata={
"page": doc.get("page"), # type: ignore
# **doc.get("metadata", {}), # type: ignore
}, # Include page number in metadata
)
documents.append(langchain_document)
return documents
# C贸digo abaixo s贸 茅 executado se o loop acima acabar e n茫o tiver retornado um valor nenhuma vez
raise ValueError(f"ALGO DEU ERRADO NO PARSER DO LLAMA PARSE:")
|