Luis Chaves commited on
Commit
d86a1f5
·
1 Parent(s): 14cef21

split pdf into chunks after finding max lenth from warning, also updated dockerfie to have cuda installed and use gpu if available

Browse files
Files changed (5) hide show
  1. Dockerfile +7 -1
  2. learning.md +1 -0
  3. pyproject.toml +3 -2
  4. src/everycure/extractor.py +81 -29
  5. uv.lock +119 -0
Dockerfile CHANGED
@@ -1,5 +1,11 @@
1
  FROM python:3.12-slim
2
 
 
 
 
 
 
 
3
  WORKDIR /code
4
 
5
  COPY ./pyproject.toml /code/
@@ -9,4 +15,4 @@ COPY ./openapi.yaml /code/
9
 
10
  RUN pip install --no-cache-dir .[all]
11
 
12
- CMD ["uvicorn", "everycure.app:app", "--host", "0.0.0.0", "--port", "7860"]
 
1
  FROM python:3.12-slim
2
 
3
+ # Install CUDA dependencies
4
+ RUN apt-get update && apt-get install -y \
5
+ cuda-cudart-12-1 \
6
+ cuda-libraries-12-1 \
7
+ && rm -rf /var/lib/apt/lists/*
8
+
9
  WORKDIR /code
10
 
11
  COPY ./pyproject.toml /code/
 
15
 
16
  RUN pip install --no-cache-dir .[all]
17
 
18
+ CMD ["uvicorn", "everycure.app:app", "--host", "0.0.0.0", "--port", "7860"]
learning.md CHANGED
@@ -61,6 +61,7 @@ https://huggingface.co/blaze999/Medical-NER
61
 
62
  <https://docs.astral.sh/uv/guides/integration/pytorch/#installing-pytorch>
63
 
 
64
 
65
  ## what's the max length that gliner accepts?
66
 
 
61
 
62
  <https://docs.astral.sh/uv/guides/integration/pytorch/#installing-pytorch>
63
 
64
+ nice: https://huggingface.co/urchade/gliner_base
65
 
66
  ## what's the max length that gliner accepts?
67
 
pyproject.toml CHANGED
@@ -10,9 +10,10 @@ dependencies = [
10
  "torch>=2.5.1",
11
  "transformers>=4.48.1",
12
  "fastapi>=0.109.0",
13
- "python-multipart>=0.0.6", # Required for handling file uploads
14
  "pydantic>=2.5.3",
15
- "uvicorn>=0.27.0"
 
16
  ]
17
 
18
  [build-system]
 
10
  "torch>=2.5.1",
11
  "transformers>=4.48.1",
12
  "fastapi>=0.109.0",
13
+ "python-multipart>=0.0.6", # Required for handling file uploads
14
  "pydantic>=2.5.3",
15
+ "uvicorn>=0.27.0",
16
+ "gliner>=0.2.16",
17
  ]
18
 
19
  [build-system]
src/everycure/extractor.py CHANGED
@@ -4,9 +4,11 @@ import pdfplumber
4
  from fastapi import UploadFile
5
  from gliner import GLiNER
6
  import logging
 
 
7
 
8
  # Set up logging
9
- logging.basicConfig(level=logging.WARNING)
10
  logger = logging.getLogger(__name__)
11
 
12
  class Entity(BaseModel):
@@ -30,8 +32,50 @@ MEDICAL_LABELS = [
30
  "procedure", "treatment", "device", "diagnostic_aid", "event"
31
  ]
32
 
 
 
 
 
33
  # Initialize model
34
  gliner_model = GLiNER.from_pretrained("knowledgator/gliner-multitask-large-v0.5")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
 
36
  def extract_entities_from_pdf(file: UploadFile) -> List[Entity]:
37
  """
@@ -48,39 +92,47 @@ def extract_entities_from_pdf(file: UploadFile) -> List[Entity]:
48
  try:
49
  # Create a temporary file to handle the upload
50
  with pdfplumber.open(file.file) as pdf:
51
- logger.debug(f"Successfully opened PDF with {len(pdf.pages)} pages")
52
  # Join all pages into single string
53
  pdf_text = " ".join(p.extract_text() for p in pdf.pages)
54
- logger.debug(f"Extracted text length: {len(pdf_text)} characters")
 
 
 
 
55
 
56
- # Extract entities using GLiNER
57
- logger.debug("Starting GLiNER entity extraction")
58
- entities = gliner_model.predict_entities(pdf_text, MEDICAL_LABELS, threshold=0.7)
59
- logger.debug(f"Found {len(entities)} entities")
60
 
61
- # Convert to our Entity model format
62
- result = []
63
- for ent in entities:
64
- if len(ent["text"]) <= 2: # Skip very short entities
65
- continue
66
-
67
- # Find the context (text surrounding the entity)
68
- start_idx = pdf_text.find(ent["text"])
69
- if start_idx != -1:
70
- # Get surrounding context (50 chars before and after)
71
- context_start = max(0, start_idx - 50)
72
- context_end = min(len(pdf_text), start_idx + len(ent["text"]) + 50)
73
- context = pdf_text[context_start:context_end]
74
-
75
- result.append(Entity(
76
- entity=ent["text"],
77
- context=context,
78
- start=start_idx - context_start, # Adjust start position relative to context
79
- end=start_idx - context_start + len(ent["text"])
80
- ))
 
 
 
 
 
81
 
82
- logger.debug(f"Returning {len(result)} processed entities")
83
- return result
84
 
85
  except Exception as e:
86
  logger.error(f"Error during extraction: {str(e)}", exc_info=True)
 
4
  from fastapi import UploadFile
5
  from gliner import GLiNER
6
  import logging
7
+ import torch
8
+ import re
9
 
10
  # Set up logging
11
+ logging.basicConfig(level=logging.INFO)
12
  logger = logging.getLogger(__name__)
13
 
14
  class Entity(BaseModel):
 
32
  "procedure", "treatment", "device", "diagnostic_aid", "event"
33
  ]
34
 
35
+ # Check for GPU availability
36
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
37
+ logger.info(f"Using device: {device}")
38
+
39
  # Initialize model
40
  gliner_model = GLiNER.from_pretrained("knowledgator/gliner-multitask-large-v0.5")
41
+ gliner_model.to(device) # Move model to GPU if available
42
+
43
+ def chunk_text(text: str, max_tokens: int = 700) -> List[str]:
44
+ """
45
+ Split text into chunks that respect sentence boundaries and token limit.
46
+ We use 700 tokens to leave some margin for the model's special tokens.
47
+
48
+ Args:
49
+ text (str): Input text to chunk
50
+ max_tokens (int): Maximum number of tokens per chunk
51
+
52
+ Returns:
53
+ List[str]: List of text chunks
54
+ """
55
+ # Split into sentences (simple approach)
56
+ sentences = re.split(r'(?<=[.!?])\s+', text)
57
+ chunks = []
58
+ current_chunk = []
59
+ current_length = 0
60
+
61
+ for sentence in sentences:
62
+ # Rough estimation of tokens (words + punctuation)
63
+ sentence_tokens = len(re.findall(r'\w+|[^\w\s]', sentence))
64
+
65
+ if current_length + sentence_tokens > max_tokens:
66
+ if current_chunk: # Save current chunk if it exists
67
+ chunks.append(' '.join(current_chunk))
68
+ current_chunk = []
69
+ current_length = 0
70
+
71
+ current_chunk.append(sentence)
72
+ current_length += sentence_tokens
73
+
74
+ # Don't forget the last chunk
75
+ if current_chunk:
76
+ chunks.append(' '.join(current_chunk))
77
+
78
+ return chunks
79
 
80
  def extract_entities_from_pdf(file: UploadFile) -> List[Entity]:
81
  """
 
92
  try:
93
  # Create a temporary file to handle the upload
94
  with pdfplumber.open(file.file) as pdf:
95
+ logger.info(f"Successfully opened PDF with {len(pdf.pages)} pages")
96
  # Join all pages into single string
97
  pdf_text = " ".join(p.extract_text() for p in pdf.pages)
98
+ logger.info(f"Extracted text length: {len(pdf_text)} characters")
99
+
100
+ # Split text into chunks
101
+ text_chunks = chunk_text(pdf_text)
102
+ logger.info(f"Split text into {len(text_chunks)} chunks")
103
 
104
+ # Extract entities from each chunk
105
+ all_entities = []
106
+ base_offset = 0 # Keep track of the absolute position in the original text
 
107
 
108
+ for chunk in text_chunks:
109
+ # Extract entities using GLiNER
110
+ chunk_entities = gliner_model.predict_entities(chunk, MEDICAL_LABELS, threshold=0.7)
111
+
112
+ # Process entities from this chunk
113
+ for ent in chunk_entities:
114
+ if len(ent["text"]) <= 2: # Skip very short entities
115
+ continue
116
+
117
+ # Find the context (text surrounding the entity)
118
+ start_idx = chunk.find(ent["text"])
119
+ if start_idx != -1:
120
+ # Get surrounding context (50 chars before and after)
121
+ context_start = max(0, start_idx - 50)
122
+ context_end = min(len(chunk), start_idx + len(ent["text"]) + 50)
123
+ context = chunk[context_start:context_end]
124
+
125
+ all_entities.append(Entity(
126
+ entity=ent["text"],
127
+ context=context,
128
+ start=base_offset + start_idx, # Use absolute position in original text
129
+ end=base_offset + start_idx + len(ent["text"])
130
+ ))
131
+
132
+ base_offset += len(chunk) + 1 # +1 for the space between chunks
133
 
134
+ logger.info(f"Returning {len(all_entities)} processed entities")
135
+ return all_entities
136
 
137
  except Exception as e:
138
  logger.error(f"Error during extraction: {str(e)}", exc_info=True)
uv.lock CHANGED
@@ -122,6 +122,18 @@ wheels = [
122
  { url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335 },
123
  ]
124
 
 
 
 
 
 
 
 
 
 
 
 
 
125
  [[package]]
126
  name = "cryptography"
127
  version = "44.0.0"
@@ -168,23 +180,27 @@ version = "0.1.0"
168
  source = { editable = "." }
169
  dependencies = [
170
  { name = "fastapi" },
 
171
  { name = "marimo" },
172
  { name = "pdfplumber" },
173
  { name = "pydantic" },
174
  { name = "python-multipart" },
175
  { name = "torch" },
176
  { name = "transformers" },
 
177
  ]
178
 
179
  [package.metadata]
180
  requires-dist = [
181
  { name = "fastapi", specifier = ">=0.109.0" },
 
182
  { name = "marimo", specifier = ">=0.10.16" },
183
  { name = "pdfplumber", specifier = ">=0.11.5" },
184
  { name = "pydantic", specifier = ">=2.5.3" },
185
  { name = "python-multipart", specifier = ">=0.0.6" },
186
  { name = "torch", specifier = ">=2.5.1" },
187
  { name = "transformers", specifier = ">=4.48.1" },
 
188
  ]
189
 
190
  [[package]]
@@ -210,6 +226,15 @@ wheels = [
210
  { url = "https://files.pythonhosted.org/packages/89/ec/00d68c4ddfedfe64159999e5f8a98fb8442729a63e2077eb9dcd89623d27/filelock-3.17.0-py3-none-any.whl", hash = "sha256:533dc2f7ba78dc2f0f531fc6c4940addf7b70a481e269a5a3b93be94ffbe8338", size = 16164 },
211
  ]
212
 
 
 
 
 
 
 
 
 
 
213
  [[package]]
214
  name = "fsspec"
215
  version = "2024.12.0"
@@ -219,6 +244,23 @@ wheels = [
219
  { url = "https://files.pythonhosted.org/packages/de/86/5486b0188d08aa643e127774a99bac51ffa6cf343e3deb0583956dca5b22/fsspec-2024.12.0-py3-none-any.whl", hash = "sha256:b520aed47ad9804237ff878b504267a3b0b441e97508bd6d2d8774e3db85cee2", size = 183862 },
220
  ]
221
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
222
  [[package]]
223
  name = "h11"
224
  version = "0.14.0"
@@ -246,6 +288,18 @@ wheels = [
246
  { url = "https://files.pythonhosted.org/packages/6c/3f/50f6b25fafdcfb1c089187a328c95081abf882309afd86f4053951507cd1/huggingface_hub-0.27.1-py3-none-any.whl", hash = "sha256:1c5155ca7d60b60c2e2fc38cbb3ffb7f7c3adf48f824015b219af9061771daec", size = 450658 },
247
  ]
248
 
 
 
 
 
 
 
 
 
 
 
 
 
249
  [[package]]
250
  name = "idna"
251
  version = "3.10"
@@ -538,6 +592,32 @@ wheels = [
538
  { url = "https://files.pythonhosted.org/packages/87/20/199b8713428322a2f22b722c62b8cc278cc53dffa9705d744484b5035ee9/nvidia_nvtx_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl", hash = "sha256:781e950d9b9f60d8241ccea575b32f5105a5baf4c2351cab5256a24869f12a1a", size = 99144 },
539
  ]
540
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
541
  [[package]]
542
  name = "packaging"
543
  version = "24.2"
@@ -621,6 +701,20 @@ wheels = [
621
  { url = "https://files.pythonhosted.org/packages/cf/6c/41c21c6c8af92b9fea313aa47c75de49e2f9a467964ee33eb0135d47eb64/pillow-11.1.0-cp313-cp313t-win_arm64.whl", hash = "sha256:67cd427c68926108778a9005f2a04adbd5e67c442ed21d95389fe1d595458756", size = 2377651 },
622
  ]
623
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
624
  [[package]]
625
  name = "psutil"
626
  version = "6.1.1"
@@ -769,6 +863,15 @@ wheels = [
769
  { url = "https://files.pythonhosted.org/packages/e1/6b/2706497c86e8d69fb76afe5ea857fe1794621aa0f3b1d863feb953fe0f22/pypdfium2-4.30.1-py3-none-win_arm64.whl", hash = "sha256:c2b6d63f6d425d9416c08d2511822b54b8e3ac38e639fc41164b1d75584b3a8c", size = 2814810 },
770
  ]
771
 
 
 
 
 
 
 
 
 
 
772
  [[package]]
773
  name = "python-multipart"
774
  version = "0.0.20"
@@ -904,6 +1007,22 @@ wheels = [
904
  { url = "https://files.pythonhosted.org/packages/86/ca/aa489392ec6fb59223ffce825461e1f811a3affd417121a2088be7a5758b/safetensors-0.5.2-cp38-abi3-win_amd64.whl", hash = "sha256:78abdddd03a406646107f973c7843276e7b64e5e32623529dc17f3d94a20f589", size = 303756 },
905
  ]
906
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
907
  [[package]]
908
  name = "setuptools"
909
  version = "75.8.0"
 
122
  { url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335 },
123
  ]
124
 
125
+ [[package]]
126
+ name = "coloredlogs"
127
+ version = "15.0.1"
128
+ source = { registry = "https://pypi.org/simple" }
129
+ dependencies = [
130
+ { name = "humanfriendly" },
131
+ ]
132
+ sdist = { url = "https://files.pythonhosted.org/packages/cc/c7/eed8f27100517e8c0e6b923d5f0845d0cb99763da6fdee00478f91db7325/coloredlogs-15.0.1.tar.gz", hash = "sha256:7c991aa71a4577af2f82600d8f8f3a89f936baeaf9b50a9c197da014e5bf16b0", size = 278520 }
133
+ wheels = [
134
+ { url = "https://files.pythonhosted.org/packages/a7/06/3d6badcf13db419e25b07041d9c7b4a2c331d3f4e7134445ec5df57714cd/coloredlogs-15.0.1-py2.py3-none-any.whl", hash = "sha256:612ee75c546f53e92e70049c9dbfcc18c935a2b9a53b66085ce9ef6a6e5c0934", size = 46018 },
135
+ ]
136
+
137
  [[package]]
138
  name = "cryptography"
139
  version = "44.0.0"
 
180
  source = { editable = "." }
181
  dependencies = [
182
  { name = "fastapi" },
183
+ { name = "gliner" },
184
  { name = "marimo" },
185
  { name = "pdfplumber" },
186
  { name = "pydantic" },
187
  { name = "python-multipart" },
188
  { name = "torch" },
189
  { name = "transformers" },
190
+ { name = "uvicorn" },
191
  ]
192
 
193
  [package.metadata]
194
  requires-dist = [
195
  { name = "fastapi", specifier = ">=0.109.0" },
196
+ { name = "gliner", specifier = ">=0.2.16" },
197
  { name = "marimo", specifier = ">=0.10.16" },
198
  { name = "pdfplumber", specifier = ">=0.11.5" },
199
  { name = "pydantic", specifier = ">=2.5.3" },
200
  { name = "python-multipart", specifier = ">=0.0.6" },
201
  { name = "torch", specifier = ">=2.5.1" },
202
  { name = "transformers", specifier = ">=4.48.1" },
203
+ { name = "uvicorn", specifier = ">=0.27.0" },
204
  ]
205
 
206
  [[package]]
 
226
  { url = "https://files.pythonhosted.org/packages/89/ec/00d68c4ddfedfe64159999e5f8a98fb8442729a63e2077eb9dcd89623d27/filelock-3.17.0-py3-none-any.whl", hash = "sha256:533dc2f7ba78dc2f0f531fc6c4940addf7b70a481e269a5a3b93be94ffbe8338", size = 16164 },
227
  ]
228
 
229
+ [[package]]
230
+ name = "flatbuffers"
231
+ version = "25.1.24"
232
+ source = { registry = "https://pypi.org/simple" }
233
+ sdist = { url = "https://files.pythonhosted.org/packages/64/20/c380c311843318b577650286b2c7eaaac3a011fb982df0050bdbd7e453c5/flatbuffers-25.1.24.tar.gz", hash = "sha256:e0f7b7d806c0abdf166275492663130af40c11f89445045fbef0aa3c9a8643ad", size = 22155 }
234
+ wheels = [
235
+ { url = "https://files.pythonhosted.org/packages/0e/e2/b066e6e02d67bf5261a6d7539648c6da3365cc9eff3eb6d82009595d84d9/flatbuffers-25.1.24-py2.py3-none-any.whl", hash = "sha256:1abfebaf4083117225d0723087ea909896a34e3fec933beedb490d595ba24145", size = 30955 },
236
+ ]
237
+
238
  [[package]]
239
  name = "fsspec"
240
  version = "2024.12.0"
 
244
  { url = "https://files.pythonhosted.org/packages/de/86/5486b0188d08aa643e127774a99bac51ffa6cf343e3deb0583956dca5b22/fsspec-2024.12.0-py3-none-any.whl", hash = "sha256:b520aed47ad9804237ff878b504267a3b0b441e97508bd6d2d8774e3db85cee2", size = 183862 },
245
  ]
246
 
247
+ [[package]]
248
+ name = "gliner"
249
+ version = "0.2.16"
250
+ source = { registry = "https://pypi.org/simple" }
251
+ dependencies = [
252
+ { name = "huggingface-hub" },
253
+ { name = "onnxruntime" },
254
+ { name = "sentencepiece" },
255
+ { name = "torch" },
256
+ { name = "tqdm" },
257
+ { name = "transformers" },
258
+ ]
259
+ sdist = { url = "https://files.pythonhosted.org/packages/44/a6/3ae136b0996689e497b6ca07490d6ecfda62e2e215fd16e39b4558a42194/gliner-0.2.16.tar.gz", hash = "sha256:030139c3cceb78269d40f1bb8a040c9563705b33d9ff88e9ae9f699edb841738", size = 52631 }
260
+ wheels = [
261
+ { url = "https://files.pythonhosted.org/packages/2f/2b/92766d80acde0523c4be58f7fb288dd572697d664ddceecaf644bef3e48d/gliner-0.2.16-py3-none-any.whl", hash = "sha256:101734441b96e757e58cb30250957c81489f87abfdf3e583bafeef10aa36ef84", size = 62324 },
262
+ ]
263
+
264
  [[package]]
265
  name = "h11"
266
  version = "0.14.0"
 
288
  { url = "https://files.pythonhosted.org/packages/6c/3f/50f6b25fafdcfb1c089187a328c95081abf882309afd86f4053951507cd1/huggingface_hub-0.27.1-py3-none-any.whl", hash = "sha256:1c5155ca7d60b60c2e2fc38cbb3ffb7f7c3adf48f824015b219af9061771daec", size = 450658 },
289
  ]
290
 
291
+ [[package]]
292
+ name = "humanfriendly"
293
+ version = "10.0"
294
+ source = { registry = "https://pypi.org/simple" }
295
+ dependencies = [
296
+ { name = "pyreadline3", marker = "sys_platform == 'win32'" },
297
+ ]
298
+ sdist = { url = "https://files.pythonhosted.org/packages/cc/3f/2c29224acb2e2df4d2046e4c73ee2662023c58ff5b113c4c1adac0886c43/humanfriendly-10.0.tar.gz", hash = "sha256:6b0b831ce8f15f7300721aa49829fc4e83921a9a301cc7f606be6686a2288ddc", size = 360702 }
299
+ wheels = [
300
+ { url = "https://files.pythonhosted.org/packages/f0/0f/310fb31e39e2d734ccaa2c0fb981ee41f7bd5056ce9bc29b2248bd569169/humanfriendly-10.0-py2.py3-none-any.whl", hash = "sha256:1697e1a8a8f550fd43c2865cd84542fc175a61dcb779b6fee18cf6b6ccba1477", size = 86794 },
301
+ ]
302
+
303
  [[package]]
304
  name = "idna"
305
  version = "3.10"
 
592
  { url = "https://files.pythonhosted.org/packages/87/20/199b8713428322a2f22b722c62b8cc278cc53dffa9705d744484b5035ee9/nvidia_nvtx_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl", hash = "sha256:781e950d9b9f60d8241ccea575b32f5105a5baf4c2351cab5256a24869f12a1a", size = 99144 },
593
  ]
594
 
595
+ [[package]]
596
+ name = "onnxruntime"
597
+ version = "1.20.1"
598
+ source = { registry = "https://pypi.org/simple" }
599
+ dependencies = [
600
+ { name = "coloredlogs" },
601
+ { name = "flatbuffers" },
602
+ { name = "numpy" },
603
+ { name = "packaging" },
604
+ { name = "protobuf" },
605
+ { name = "sympy" },
606
+ ]
607
+ wheels = [
608
+ { url = "https://files.pythonhosted.org/packages/e5/39/9335e0874f68f7d27103cbffc0e235e32e26759202df6085716375c078bb/onnxruntime-1.20.1-cp312-cp312-macosx_13_0_universal2.whl", hash = "sha256:22b0655e2bf4f2161d52706e31f517a0e54939dc393e92577df51808a7edc8c9", size = 31007580 },
609
+ { url = "https://files.pythonhosted.org/packages/c5/9d/a42a84e10f1744dd27c6f2f9280cc3fb98f869dd19b7cd042e391ee2ab61/onnxruntime-1.20.1-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f1f56e898815963d6dc4ee1c35fc6c36506466eff6d16f3cb9848cea4e8c8172", size = 11952833 },
610
+ { url = "https://files.pythonhosted.org/packages/47/42/2f71f5680834688a9c81becbe5c5bb996fd33eaed5c66ae0606c3b1d6a02/onnxruntime-1.20.1-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bb71a814f66517a65628c9e4a2bb530a6edd2cd5d87ffa0af0f6f773a027d99e", size = 13333903 },
611
+ { url = "https://files.pythonhosted.org/packages/c8/f1/aabfdf91d013320aa2fc46cf43c88ca0182860ff15df872b4552254a9680/onnxruntime-1.20.1-cp312-cp312-win32.whl", hash = "sha256:bd386cc9ee5f686ee8a75ba74037750aca55183085bf1941da8efcfe12d5b120", size = 9814562 },
612
+ { url = "https://files.pythonhosted.org/packages/dd/80/76979e0b744307d488c79e41051117634b956612cc731f1028eb17ee7294/onnxruntime-1.20.1-cp312-cp312-win_amd64.whl", hash = "sha256:19c2d843eb074f385e8bbb753a40df780511061a63f9def1b216bf53860223fb", size = 11331482 },
613
+ { url = "https://files.pythonhosted.org/packages/f7/71/c5d980ac4189589267a06f758bd6c5667d07e55656bed6c6c0580733ad07/onnxruntime-1.20.1-cp313-cp313-macosx_13_0_universal2.whl", hash = "sha256:cc01437a32d0042b606f462245c8bbae269e5442797f6213e36ce61d5abdd8cc", size = 31007574 },
614
+ { url = "https://files.pythonhosted.org/packages/81/0d/13bbd9489be2a6944f4a940084bfe388f1100472f38c07080a46fbd4ab96/onnxruntime-1.20.1-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:fb44b08e017a648924dbe91b82d89b0c105b1adcfe31e90d1dc06b8677ad37be", size = 11951459 },
615
+ { url = "https://files.pythonhosted.org/packages/c0/ea/4454ae122874fd52bbb8a961262de81c5f932edeb1b72217f594c700d6ef/onnxruntime-1.20.1-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bda6aebdf7917c1d811f21d41633df00c58aff2bef2f598f69289c1f1dabc4b3", size = 13331620 },
616
+ { url = "https://files.pythonhosted.org/packages/d8/e0/50db43188ca1c945decaa8fc2a024c33446d31afed40149897d4f9de505f/onnxruntime-1.20.1-cp313-cp313-win_amd64.whl", hash = "sha256:d30367df7e70f1d9fc5a6a68106f5961686d39b54d3221f760085524e8d38e16", size = 11331758 },
617
+ { url = "https://files.pythonhosted.org/packages/d8/55/3821c5fd60b52a6c82a00bba18531793c93c4addfe64fbf061e235c5617a/onnxruntime-1.20.1-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c9158465745423b2b5d97ed25aa7740c7d38d2993ee2e5c3bfacb0c4145c49d8", size = 11950342 },
618
+ { url = "https://files.pythonhosted.org/packages/14/56/fd990ca222cef4f9f4a9400567b9a15b220dee2eafffb16b2adbc55c8281/onnxruntime-1.20.1-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0df6f2df83d61f46e842dbcde610ede27218947c33e994545a22333491e72a3b", size = 13337040 },
619
+ ]
620
+
621
  [[package]]
622
  name = "packaging"
623
  version = "24.2"
 
701
  { url = "https://files.pythonhosted.org/packages/cf/6c/41c21c6c8af92b9fea313aa47c75de49e2f9a467964ee33eb0135d47eb64/pillow-11.1.0-cp313-cp313t-win_arm64.whl", hash = "sha256:67cd427c68926108778a9005f2a04adbd5e67c442ed21d95389fe1d595458756", size = 2377651 },
702
  ]
703
 
704
+ [[package]]
705
+ name = "protobuf"
706
+ version = "5.29.3"
707
+ source = { registry = "https://pypi.org/simple" }
708
+ sdist = { url = "https://files.pythonhosted.org/packages/f7/d1/e0a911544ca9993e0f17ce6d3cc0932752356c1b0a834397f28e63479344/protobuf-5.29.3.tar.gz", hash = "sha256:5da0f41edaf117bde316404bad1a486cb4ededf8e4a54891296f648e8e076620", size = 424945 }
709
+ wheels = [
710
+ { url = "https://files.pythonhosted.org/packages/dc/7a/1e38f3cafa022f477ca0f57a1f49962f21ad25850c3ca0acd3b9d0091518/protobuf-5.29.3-cp310-abi3-win32.whl", hash = "sha256:3ea51771449e1035f26069c4c7fd51fba990d07bc55ba80701c78f886bf9c888", size = 422708 },
711
+ { url = "https://files.pythonhosted.org/packages/61/fa/aae8e10512b83de633f2646506a6d835b151edf4b30d18d73afd01447253/protobuf-5.29.3-cp310-abi3-win_amd64.whl", hash = "sha256:a4fa6f80816a9a0678429e84973f2f98cbc218cca434abe8db2ad0bffc98503a", size = 434508 },
712
+ { url = "https://files.pythonhosted.org/packages/dd/04/3eaedc2ba17a088961d0e3bd396eac764450f431621b58a04ce898acd126/protobuf-5.29.3-cp38-abi3-macosx_10_9_universal2.whl", hash = "sha256:a8434404bbf139aa9e1300dbf989667a83d42ddda9153d8ab76e0d5dcaca484e", size = 417825 },
713
+ { url = "https://files.pythonhosted.org/packages/4f/06/7c467744d23c3979ce250397e26d8ad8eeb2bea7b18ca12ad58313c1b8d5/protobuf-5.29.3-cp38-abi3-manylinux2014_aarch64.whl", hash = "sha256:daaf63f70f25e8689c072cfad4334ca0ac1d1e05a92fc15c54eb9cf23c3efd84", size = 319573 },
714
+ { url = "https://files.pythonhosted.org/packages/a8/45/2ebbde52ad2be18d3675b6bee50e68cd73c9e0654de77d595540b5129df8/protobuf-5.29.3-cp38-abi3-manylinux2014_x86_64.whl", hash = "sha256:c027e08a08be10b67c06bf2370b99c811c466398c357e615ca88c91c07f0910f", size = 319672 },
715
+ { url = "https://files.pythonhosted.org/packages/fd/b2/ab07b09e0f6d143dfb839693aa05765257bceaa13d03bf1a696b78323e7a/protobuf-5.29.3-py3-none-any.whl", hash = "sha256:0a18ed4a24198528f2333802eb075e59dea9d679ab7a6c5efb017a59004d849f", size = 172550 },
716
+ ]
717
+
718
  [[package]]
719
  name = "psutil"
720
  version = "6.1.1"
 
863
  { url = "https://files.pythonhosted.org/packages/e1/6b/2706497c86e8d69fb76afe5ea857fe1794621aa0f3b1d863feb953fe0f22/pypdfium2-4.30.1-py3-none-win_arm64.whl", hash = "sha256:c2b6d63f6d425d9416c08d2511822b54b8e3ac38e639fc41164b1d75584b3a8c", size = 2814810 },
864
  ]
865
 
866
+ [[package]]
867
+ name = "pyreadline3"
868
+ version = "3.5.4"
869
+ source = { registry = "https://pypi.org/simple" }
870
+ sdist = { url = "https://files.pythonhosted.org/packages/0f/49/4cea918a08f02817aabae639e3d0ac046fef9f9180518a3ad394e22da148/pyreadline3-3.5.4.tar.gz", hash = "sha256:8d57d53039a1c75adba8e50dd3d992b28143480816187ea5efbd5c78e6c885b7", size = 99839 }
871
+ wheels = [
872
+ { url = "https://files.pythonhosted.org/packages/5a/dc/491b7661614ab97483abf2056be1deee4dc2490ecbf7bff9ab5cdbac86e1/pyreadline3-3.5.4-py3-none-any.whl", hash = "sha256:eaf8e6cc3c49bcccf145fc6067ba8643d1df34d604a1ec0eccbf7a18e6d3fae6", size = 83178 },
873
+ ]
874
+
875
  [[package]]
876
  name = "python-multipart"
877
  version = "0.0.20"
 
1007
  { url = "https://files.pythonhosted.org/packages/86/ca/aa489392ec6fb59223ffce825461e1f811a3affd417121a2088be7a5758b/safetensors-0.5.2-cp38-abi3-win_amd64.whl", hash = "sha256:78abdddd03a406646107f973c7843276e7b64e5e32623529dc17f3d94a20f589", size = 303756 },
1008
  ]
1009
 
1010
+ [[package]]
1011
+ name = "sentencepiece"
1012
+ version = "0.2.0"
1013
+ source = { registry = "https://pypi.org/simple" }
1014
+ sdist = { url = "https://files.pythonhosted.org/packages/c9/d2/b9c7ca067c26d8ff085d252c89b5f69609ca93fb85a00ede95f4857865d4/sentencepiece-0.2.0.tar.gz", hash = "sha256:a52c19171daaf2e697dc6cbe67684e0fa341b1248966f6aebb541de654d15843", size = 2632106 }
1015
+ wheels = [
1016
+ { url = "https://files.pythonhosted.org/packages/27/5a/141b227ed54293360a9ffbb7bf8252b4e5efc0400cdeac5809340e5d2b21/sentencepiece-0.2.0-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:ea5f536e32ea8ec96086ee00d7a4a131ce583a1b18d130711707c10e69601cb2", size = 2409370 },
1017
+ { url = "https://files.pythonhosted.org/packages/2e/08/a4c135ad6fc2ce26798d14ab72790d66e813efc9589fd30a5316a88ca8d5/sentencepiece-0.2.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:d0cb51f53b6aae3c36bafe41e86167c71af8370a039f542c43b0cce5ef24a68c", size = 1239288 },
1018
+ { url = "https://files.pythonhosted.org/packages/49/0a/2fe387f825ac5aad5a0bfe221904882106cac58e1b693ba7818785a882b6/sentencepiece-0.2.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:3212121805afc58d8b00ab4e7dd1f8f76c203ddb9dc94aa4079618a31cf5da0f", size = 1181597 },
1019
+ { url = "https://files.pythonhosted.org/packages/cc/38/e4698ee2293fe4835dc033c49796a39b3eebd8752098f6bd0aa53a14af1f/sentencepiece-0.2.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2a3149e3066c2a75e0d68a43eb632d7ae728c7925b517f4c05c40f6f7280ce08", size = 1259220 },
1020
+ { url = "https://files.pythonhosted.org/packages/12/24/fd7ef967c9dad2f6e6e5386d0cadaf65cda8b7be6e3861a9ab3121035139/sentencepiece-0.2.0-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:632f3594d3e7ac8b367bca204cb3fd05a01d5b21455acd097ea4c0e30e2f63d7", size = 1355962 },
1021
+ { url = "https://files.pythonhosted.org/packages/4f/d2/18246f43ca730bb81918f87b7e886531eda32d835811ad9f4657c54eee35/sentencepiece-0.2.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f295105c6bdbb05bd5e1b0cafbd78ff95036f5d3641e7949455a3f4e5e7c3109", size = 1301706 },
1022
+ { url = "https://files.pythonhosted.org/packages/8a/47/ca237b562f420044ab56ddb4c278672f7e8c866e183730a20e413b38a989/sentencepiece-0.2.0-cp312-cp312-win32.whl", hash = "sha256:fb89f811e5efd18bab141afc3fea3de141c3f69f3fe9e898f710ae7fe3aab251", size = 936941 },
1023
+ { url = "https://files.pythonhosted.org/packages/c6/97/d159c32642306ee2b70732077632895438867b3b6df282354bd550cf2a67/sentencepiece-0.2.0-cp312-cp312-win_amd64.whl", hash = "sha256:7a673a72aab81fef5ebe755c6e0cc60087d1f3a4700835d40537183c1703a45f", size = 991994 },
1024
+ ]
1025
+
1026
  [[package]]
1027
  name = "setuptools"
1028
  version = "75.8.0"