Spaces:
Sleeping
Sleeping
UPDATE: New Endpoints
Browse files- EasyOCRModels/craft_mlt_25k.pth +3 -0
- EasyOCRModels/english_g2.pth +3 -0
- functions.py +2 -1
EasyOCRModels/craft_mlt_25k.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4a5efbfb48b4081100544e75e1e2b57f8de3d84f213004b14b85fd4b3748db17
|
3 |
+
size 83152330
|
EasyOCRModels/english_g2.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e2272681d9d67a04e2dff396b6e95077bc19001f8f6d3593c307b9852e1c29e8
|
3 |
+
size 15143997
|
functions.py
CHANGED
@@ -40,6 +40,7 @@ vectorEmbeddings = HuggingFaceEmbeddings(
|
|
40 |
model_kwargs = model_kwargs,
|
41 |
encode_kwargs = encode_kwargs
|
42 |
)
|
|
|
43 |
sparseEmbeddings = FastEmbedSparse(model = "Qdrant/BM25")
|
44 |
prompt = """
|
45 |
INSTRUCTIONS:
|
@@ -289,7 +290,7 @@ def getLinks(url: str, timeout = 30):
|
|
289 |
|
290 |
|
291 |
def getTextFromImagePDF(pdfBytes):
|
292 |
-
reader
|
293 |
allImages = convert_from_bytes(pdfBytes)
|
294 |
allImages = [np.array(image) for image in allImages]
|
295 |
text = "\n\n\n".join(["\n".join([text[1] for text in reader.readtext(image, paragraph=True)]) for image in allImages])
|
|
|
40 |
model_kwargs = model_kwargs,
|
41 |
encode_kwargs = encode_kwargs
|
42 |
)
|
43 |
+
reader = easyocr.Reader(['en'], gpu = True, model_storage_directory = "/app/EasyOCRModels")
|
44 |
sparseEmbeddings = FastEmbedSparse(model = "Qdrant/BM25")
|
45 |
prompt = """
|
46 |
INSTRUCTIONS:
|
|
|
290 |
|
291 |
|
292 |
def getTextFromImagePDF(pdfBytes):
|
293 |
+
global reader
|
294 |
allImages = convert_from_bytes(pdfBytes)
|
295 |
allImages = [np.array(image) for image in allImages]
|
296 |
text = "\n\n\n".join(["\n".join([text[1] for text in reader.readtext(image, paragraph=True)]) for image in allImages])
|