import gradio as gr from docling.document_converter import DocumentConverter, PdfFormatOption from docling.datamodel.pipeline_options import ( AcceleratorDevice, PdfPipelineOptions, AcceleratorOptions ) import spaces from docling.datamodel.base_models import InputFormat from marker.converters.pdf import PdfConverter from marker.models import create_model_dict from marker.output import text_from_rendered # Docling accelerator_options = AcceleratorOptions( num_threads=8, device=AcceleratorDevice.CPU ) pipeline_options = PdfPipelineOptions() pipeline_options.accelerator_options = accelerator_options pipeline_options.do_ocr = True pipeline_options.do_table_structure = True pipeline_options.table_structure_options.do_cell_matching = True docling_converter = DocumentConverter( format_options={ InputFormat.PDF: PdfFormatOption( pipeline_options=pipeline_options, ) } ) # Marker marker_converter = PdfConverter( artifact_dict=create_model_dict(), ) def convert_document(file, method): if method == "Docling": result = docling_converter.convert(file.name) return result.document.export_to_markdown() elif method == "Marker": rendered = marker_converter(file.name) text, _, images = text_from_rendered(rendered) return text else: return 'unknown method' with gr.Blocks() as app: gr.Markdown("# Document Converter") gr.Markdown("Upload a document, choose the backend, and get the converted text with metadata.") file_input = gr.File(label="Upload Document") method_input = gr.Radio(["Docling", "Marker"], label="Choose Conversion Backend") output_text = gr.Textbox(label="Converted Document") convert_button = gr.Button("Convert") convert_button.click( convert_document, inputs=[file_input, method_input], outputs=[output_text] ) app.launch(debug=True, show_error=True)