File size: 1,591 Bytes
e2d728a
 
8e024f6
e2d728a
8e024f6
e2d728a
 
 
 
4678d36
 
 
 
e2d728a
 
 
 
 
 
 
 
4678d36
e2d728a
4678d36
e2d728a
 
 
 
4678d36
a07d796
 
 
e2d728a
a07d796
 
 
 
 
 
 
 
 
 
 
 
e2d728a
f5a56aa
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
import gradio as gr
from docling.document_converter import DocumentConverter
import spaces

@spaces.GPU
def convert_document(file, output_format):
    # Load document and convert it using Docling
    converter = DocumentConverter()
    result = converter.convert(file.name)

    # Check available attributes in DoclingDocument
    available_attributes = dir(result.document)

    # Choose the output format
    if output_format == "Markdown":
        converted_text = result.document.export_to_markdown()
    elif output_format == "JSON":
        converted_text = result.document.export_to_json()
    else:
        converted_text = "Unsupported format"

    # Placeholder metadata extraction based on available attributes
    metadata = {
        "Available Attributes": available_attributes
    }

    return converted_text, metadata

# Define the Gradio interface using the new component syntax
with gr.Blocks() as app:
    gr.Markdown("# Document Converter with Docling")
    gr.Markdown("Upload a document, choose the output format, and get the converted text with metadata.")

    file_input = gr.File(label="Upload Document")
    format_input = gr.Radio(["Markdown", "JSON"], label="Choose Output Format")
    output_text = gr.Textbox(label="Converted Document")
    output_metadata = gr.JSON(label="Metadata")

    # Define the process button and bind it to the function
    convert_button = gr.Button("Convert")
    convert_button.click(
        convert_document,
        inputs=[file_input, format_input],
        outputs=[output_text, output_metadata]
    )

app.launch(debug=True)