import tempfile, os from typing import List from langchain_core.documents import Document as LangchainDocument from llama_index import Document from llama_parse import LlamaParse, ResultType llama_parser_keys = [ os.getenv("LLAMA_CLOUD_API_KEY_POPS"), os.getenv("LLAMA_CLOUD_API_KEY_PEIXE"), ] def handle_pdf_files_from_serializer(files): listaPDFs = [] for file in files: file.seek(0) with tempfile.NamedTemporaryFile( delete=False, suffix=".pdf" ) as temp_file: # Create a temporary file to save the uploaded PDF for ( chunk ) in file.chunks(): # Write the uploaded file content to the temporary file temp_file.write(chunk) temp_file_path = temp_file.name # Get the path of the temporary file listaPDFs.append(temp_file_path) print("\n\nlistaPDFs: ", listaPDFs) return listaPDFs def remove_pdf_temp_files(listaPDFs): for file in listaPDFs: os.remove(file) async def return_document_list_with_llama_parser(file: str): for key in llama_parser_keys: documents: List[LangchainDocument] = [] if key: parser = LlamaParse( api_key=key, result_type=ResultType.JSON, # Options: 'text', 'markdown', 'json', 'structured' language="pt", verbose=True, ) try: parsed_document = await parser.aget_json(file) except: print(f"Error with llama parser key ending with {key[-4:]}") continue # Faz com que comece o próximo loop if len(parsed_document) == 0: continue for doc in parsed_document[0].get("pages"): # type: ignore # documents.append(doc.to_langchain_format()) langchain_document = LangchainDocument( page_content=doc.get("md"), # type: ignore metadata={ "page": doc.get("page"), # type: ignore # **doc.get("metadata", {}), # type: ignore }, # Include page number in metadata ) documents.append(langchain_document) return documents # Código abaixo só é executado se o loop acima acabar e não tiver retornado um valor nenhuma vez raise ValueError(f"ALGO DEU ERRADO NO PARSER DO LLAMA PARSE:")