luanpoppe commited on
Commit
39fc36b
·
1 Parent(s): 78209bc

feat: começandoa a adicionar testes com pytest

Browse files
_utils/handle_files.py CHANGED
@@ -4,6 +4,11 @@ from langchain_core.documents import Document as LangchainDocument
4
  from llama_index import Document
5
  from llama_parse import LlamaParse, ResultType
6
 
 
 
 
 
 
7
 
8
  def handle_pdf_files_from_serializer(files):
9
  listaPDFs = []
@@ -28,11 +33,6 @@ def remove_pdf_temp_files(listaPDFs):
28
 
29
 
30
  async def return_document_list_with_llama_parser(file: str):
31
- llama_parser_keys = [
32
- os.getenv("LLAMA_CLOUD_API_KEY_POPS"),
33
- os.getenv("LLAMA_CLOUD_API_KEY_PEIXE"),
34
- ]
35
-
36
  for key in llama_parser_keys:
37
  documents: List[LangchainDocument] = []
38
  if key:
@@ -48,7 +48,6 @@ async def return_document_list_with_llama_parser(file: str):
48
  except:
49
  print(f"Error with llama parser key ending with {key[-4:]}")
50
  continue # Faz com que comece o próximo loop
51
- print("parsed_document: ", parsed_document)
52
  if len(parsed_document) == 0:
53
  continue
54
 
 
4
  from llama_index import Document
5
  from llama_parse import LlamaParse, ResultType
6
 
7
+ llama_parser_keys = [
8
+ os.getenv("LLAMA_CLOUD_API_KEY_POPS"),
9
+ os.getenv("LLAMA_CLOUD_API_KEY_PEIXE"),
10
+ ]
11
+
12
 
13
  def handle_pdf_files_from_serializer(files):
14
  listaPDFs = []
 
33
 
34
 
35
  async def return_document_list_with_llama_parser(file: str):
 
 
 
 
 
36
  for key in llama_parser_keys:
37
  documents: List[LangchainDocument] = []
38
  if key:
 
48
  except:
49
  print(f"Error with llama parser key ending with {key[-4:]}")
50
  continue # Faz com que comece o próximo loop
 
51
  if len(parsed_document) == 0:
52
  continue
53
 
requirements.txt CHANGED
Binary files a/requirements.txt and b/requirements.txt differ
 
tests/bubble_integrations/test_obter_arquivo.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pytest
2
+ from _utils.bubble_integrations.obter_arquivo import get_pdf_from_bubble
3
+ from langchain_core.documents import Document
4
+
5
+ pdf_file_url = "https://vella.app.br/version-test/fileupload/f1736298232170x993758712541722200/0002269-86.2009.8.05.0032%20processo%20teste.pdf"
6
+
7
+
8
+ class TestObterArquivo:
9
+ @pytest.mark.asyncio
10
+ async def test_get_pdf_from_bubble_No_llama_parse(self):
11
+ should_use_llama_parse = False
12
+ result = await get_pdf_from_bubble(pdf_file_url, should_use_llama_parse)
13
+
14
+ assert isinstance(result, list)
15
+ assert len(result) > 0
16
+ print("\n\nresult", result)
17
+ assert all(isinstance(item, Document) for item in result)
18
+
19
+ # Teste abaixo não funciona com arquivos grandes -> O Llama Parse dá erro de timeout
20
+ # @pytest.mark.asyncio
21
+ # async def test_get_pdf_from_bubble_With_llama_parse(self):
22
+ # should_use_llama_parse = True
23
+ # result = await get_pdf_from_bubble(pdf_file_url, should_use_llama_parse)
24
+
25
+ # assert isinstance(result, list)
26
+ # assert len(result) > 0
27
+ # print("\n\nresult", result)
28
+ # assert all(isinstance(item, Document) for item in result)
tests/conftest.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+ import os
3
+ import django
4
+
5
+ # Configura o Django
6
+ # Adiciona o diretório raiz do projeto ao sys.path
7
+ sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
8
+ os.environ.setdefault("DJANGO_SETTINGS_MODULE", "setup.settings")
9
+ django.setup()
tests/fixtures/_pdf-uma-pagina.pdf ADDED
Binary file (26.6 kB). View file
 
tests/test_handle_files.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pytest
2
+ import os
3
+ from langchain_core.documents import Document
4
+
5
+ from _utils.handle_files import return_document_list_with_llama_parser
6
+
7
+ cwd = os.getcwd()
8
+ pdf_file_url = os.path.join(cwd, "tests", "fixtures", "_pdf-uma-pagina.pdf")
9
+
10
+
11
+ class TestHandleFiles:
12
+ @pytest.mark.asyncio
13
+ async def test_return_document_list_with_llama_parser_With_wrong_keys(
14
+ self, monkeypatch
15
+ ):
16
+
17
+ monkeypatch.setattr(
18
+ "_utils.handle_files.llama_parser_keys",
19
+ ["abc", os.getenv("LLAMA_CLOUD_API_KEY_PEIXE")],
20
+ )
21
+
22
+ result = await return_document_list_with_llama_parser(pdf_file_url)
23
+
24
+ assert isinstance(result, list)
25
+ assert len(result) > 0
26
+ assert all(isinstance(item, Document) for item in result)
27
+ assert all(len(item.page_content) > 0 for item in result)
28
+ assert all(int(item.metadata.get("page", 0)) > 0 for item in result)
tests/test_splitters.py ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pytest
2
+ import os
3
+ from _utils.splitters.Splitter_class import Splitter
4
+ from _utils.models.gerar_relatorio import (
5
+ DocumentChunk,
6
+ )
7
+
8
+ base_dir = os.path.dirname(os.path.abspath(__file__))
9
+ chunk_size = 1000
10
+ chunk_overlap = 200
11
+ cwd = os.getcwd()
12
+ pdf_file = os.path.join(cwd, "tests", "fixtures", "_pdf-uma-pagina.pdf")
13
+
14
+
15
+ class TestSplitters:
16
+ splitter = Splitter(chunk_size, chunk_overlap)
17
+
18
+ @pytest.mark.asyncio
19
+ async def test_load_and_split_document_No_llama_parse_No_Bubble(self, monkeypatch):
20
+ should_use_llama_parse = False
21
+ isBubble = False
22
+
23
+ result_chunks, result_strings = await self.splitter.load_and_split_document(
24
+ pdf_file, should_use_llama_parse, isBubble
25
+ )
26
+
27
+ assert isinstance(result_chunks, list)
28
+ assert isinstance(result_strings, list)
29
+ assert len(result_chunks) > 0
30
+ assert len(result_strings) > 0
31
+ assert all(isinstance(item, str) for item in result_strings)
32
+ assert all(isinstance(item, DocumentChunk) for item in result_chunks)
33
+ assert all(
34
+ (chunk_size - 100) < len(item.content) < (chunk_size + 100)
35
+ for item in result_chunks
36
+ )
37
+
38
+ @pytest.mark.asyncio
39
+ async def test_load_and_split_document_No_llama_parse_No_Bubble_with_bigger_chunk(
40
+ self, monkeypatch
41
+ ):
42
+ should_use_llama_parse = False
43
+ isBubble = False
44
+ chunk_size = 3500
45
+ splitter_temp = Splitter(chunk_size, chunk_overlap)
46
+
47
+ result_chunks, result_strings = await splitter_temp.load_and_split_document(
48
+ pdf_file, should_use_llama_parse, isBubble
49
+ )
50
+
51
+ assert isinstance(result_chunks, list)
52
+ assert isinstance(result_strings, list)
53
+ assert len(result_chunks) > 0
54
+ assert len(result_strings) > 0
55
+ assert all(isinstance(item, str) for item in result_strings)
56
+ assert all(isinstance(item, DocumentChunk) for item in result_chunks)
57
+ assert all(
58
+ (chunk_size - 200) < len(item.content) < (chunk_size + 200)
59
+ for item in result_chunks
60
+ )
61
+
62
+ @pytest.mark.asyncio
63
+ async def test_load_and_split_document_With_llama_parse_No_Bubble(
64
+ self, monkeypatch
65
+ ):
66
+ should_use_llama_parse = True
67
+ isBubble = False
68
+ result_chunks, result_strings = await self.splitter.load_and_split_document(
69
+ pdf_file, should_use_llama_parse, isBubble
70
+ )
71
+
72
+ assert isinstance(result_chunks, list)
73
+ assert isinstance(result_strings, list)
74
+ assert len(result_chunks) > 0
75
+ assert len(result_strings) > 0
76
+ assert all(isinstance(item, str) for item in result_strings)
77
+ assert all(isinstance(item, DocumentChunk) for item in result_chunks)
78
+ # Teste abaixo não passa ainda --> Será consertado no futuro
79
+ # assert all(
80
+ # (chunk_size - 100) < len(item.content) < (chunk_size + 100)
81
+ # for item in result_chunks
82
+ # )