asynchronousai commited on
Commit
0884aec
·
verified ·
1 Parent(s): b562a6c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -1
app.py CHANGED
@@ -1,12 +1,34 @@
1
  import gradio as gr
2
  from docling.document_converter import DocumentConverter
 
 
 
 
 
3
  import spaces
4
  from marker.converters.pdf import PdfConverter
5
  from marker.models import create_model_dict
6
  from marker.output import text_from_rendered
7
 
8
  # Docling
9
- docling_converter = DocumentConverter()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
  # Marker
12
  marker_converter = PdfConverter(
 
1
  import gradio as gr
2
  from docling.document_converter import DocumentConverter
3
+ from docling.datamodel.pipeline_options import (
4
+ AcceleratorDevice,
5
+ PdfPipelineOptions,
6
+ AcceleratorOptions
7
+ )
8
  import spaces
9
  from marker.converters.pdf import PdfConverter
10
  from marker.models import create_model_dict
11
  from marker.output import text_from_rendered
12
 
13
  # Docling
14
+ accelerator_options = AcceleratorOptions(
15
+ num_threads=8, device=AcceleratorDevice.CPU
16
+ )
17
+
18
+
19
+ pipeline_options = PdfPipelineOptions()
20
+ pipeline_options.accelerator_options = accelerator_options
21
+ pipeline_options.do_ocr = True
22
+ pipeline_options.do_table_structure = True
23
+ pipeline_options.table_structure_options.do_cell_matching = True
24
+
25
+ docling_converter = DocumentConverter(
26
+ format_options={
27
+ InputFormat.PDF: PdfFormatOption(
28
+ pipeline_options=pipeline_options,
29
+ )
30
+ }
31
+ )
32
 
33
  # Marker
34
  marker_converter = PdfConverter(