Spaces:
Sleeping
Sleeping
UPDATE: ThreadPoolExecutor
Browse files- functions.py +4 -4
functions.py
CHANGED
@@ -292,13 +292,13 @@ def getLinks(url: str, timeout = 30):
|
|
292 |
return list(set([x[:len(x) - 1] if x[-1] == "/" else x for x in uniqueLinks]))
|
293 |
|
294 |
|
295 |
-
def getText(image):
|
296 |
-
global reader
|
297 |
-
return "\n".join([text[1] for text in reader.readtext(np.array(image.resize((500, 500))), paragraph=True)])
|
298 |
|
299 |
def getTextFromImagePDF(pdfBytes):
|
|
|
|
|
|
|
300 |
allImages = convert_from_bytes(pdfBytes)
|
301 |
-
with ThreadPoolExecutor(max_workers =
|
302 |
texts = list(p.map(getText, allImages))
|
303 |
return "\n\n\n".join(texts)
|
304 |
|
|
|
292 |
return list(set([x[:len(x) - 1] if x[-1] == "/" else x for x in uniqueLinks]))
|
293 |
|
294 |
|
|
|
|
|
|
|
295 |
|
296 |
def getTextFromImagePDF(pdfBytes):
|
297 |
+
def getText(image):
|
298 |
+
global reader
|
299 |
+
return "\n".join([text[1] for text in reader.readtext(np.array(image), paragraph=True)])
|
300 |
allImages = convert_from_bytes(pdfBytes)
|
301 |
+
with ThreadPoolExecutor(max_workers = 32) as p:
|
302 |
texts = list(p.map(getText, allImages))
|
303 |
return "\n\n\n".join(texts)
|
304 |
|