Spaces:
Running
Running
import gradio as gr | |
from docling.document_converter import DocumentConverter | |
import spaces | |
from marker.converters.pdf import PdfConverter | |
from marker.models import create_model_dict | |
from marker.output import text_from_rendered | |
# Docling | |
docling_converter = DocumentConverter() | |
# Marker | |
marker_converter = PdfConverter( | |
artifact_dict=create_model_dict(), | |
) | |
def convert_document(file, method): | |
if method == "Docling": | |
result = docling_converter.convert(file.name) | |
return result.document.export_to_markdown() | |
elif method == "Marker": | |
rendered = marker_converter(file.name) | |
text, _, images = text_from_rendered(rendered) | |
return text | |
else: | |
return 'unknown method' | |
with gr.Blocks() as app: | |
gr.Markdown("# Document Converter") | |
gr.Markdown("Upload a document, choose the backend, and get the converted text with metadata.") | |
file_input = gr.File(label="Upload Document") | |
method_input = gr.Radio(["Docling", "Marker"], label="Choose Conversion Backend") | |
output_text = gr.Textbox(label="Converted Document") | |
convert_button = gr.Button("Convert") | |
convert_button.click( | |
convert_document, | |
inputs=[file_input, method_input], | |
outputs=[output_text] | |
) | |
app.launch(debug=True, show_error=True) |