Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -1,12 +1,34 @@
|
|
1 |
import gradio as gr
|
2 |
from docling.document_converter import DocumentConverter
|
|
|
|
|
|
|
|
|
|
|
3 |
import spaces
|
4 |
from marker.converters.pdf import PdfConverter
|
5 |
from marker.models import create_model_dict
|
6 |
from marker.output import text_from_rendered
|
7 |
|
8 |
# Docling
|
9 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
|
11 |
# Marker
|
12 |
marker_converter = PdfConverter(
|
|
|
1 |
import gradio as gr
|
2 |
from docling.document_converter import DocumentConverter
|
3 |
+
from docling.datamodel.pipeline_options import (
|
4 |
+
AcceleratorDevice,
|
5 |
+
PdfPipelineOptions,
|
6 |
+
AcceleratorOptions
|
7 |
+
)
|
8 |
import spaces
|
9 |
from marker.converters.pdf import PdfConverter
|
10 |
from marker.models import create_model_dict
|
11 |
from marker.output import text_from_rendered
|
12 |
|
13 |
# Docling
|
14 |
+
accelerator_options = AcceleratorOptions(
|
15 |
+
num_threads=8, device=AcceleratorDevice.CPU
|
16 |
+
)
|
17 |
+
|
18 |
+
|
19 |
+
pipeline_options = PdfPipelineOptions()
|
20 |
+
pipeline_options.accelerator_options = accelerator_options
|
21 |
+
pipeline_options.do_ocr = True
|
22 |
+
pipeline_options.do_table_structure = True
|
23 |
+
pipeline_options.table_structure_options.do_cell_matching = True
|
24 |
+
|
25 |
+
docling_converter = DocumentConverter(
|
26 |
+
format_options={
|
27 |
+
InputFormat.PDF: PdfFormatOption(
|
28 |
+
pipeline_options=pipeline_options,
|
29 |
+
)
|
30 |
+
}
|
31 |
+
)
|
32 |
|
33 |
# Marker
|
34 |
marker_converter = PdfConverter(
|