Spaces:
Running
on
Zero
Running
on
Zero
File size: 1,591 Bytes
e2d728a 8e024f6 e2d728a 8e024f6 e2d728a 4678d36 e2d728a 4678d36 e2d728a 4678d36 e2d728a 4678d36 a07d796 e2d728a a07d796 e2d728a f5a56aa |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 |
import gradio as gr
from docling.document_converter import DocumentConverter
import spaces
@spaces.GPU
def convert_document(file, output_format):
# Load document and convert it using Docling
converter = DocumentConverter()
result = converter.convert(file.name)
# Check available attributes in DoclingDocument
available_attributes = dir(result.document)
# Choose the output format
if output_format == "Markdown":
converted_text = result.document.export_to_markdown()
elif output_format == "JSON":
converted_text = result.document.export_to_json()
else:
converted_text = "Unsupported format"
# Placeholder metadata extraction based on available attributes
metadata = {
"Available Attributes": available_attributes
}
return converted_text, metadata
# Define the Gradio interface using the new component syntax
with gr.Blocks() as app:
gr.Markdown("# Document Converter with Docling")
gr.Markdown("Upload a document, choose the output format, and get the converted text with metadata.")
file_input = gr.File(label="Upload Document")
format_input = gr.Radio(["Markdown", "JSON"], label="Choose Output Format")
output_text = gr.Textbox(label="Converted Document")
output_metadata = gr.JSON(label="Metadata")
# Define the process button and bind it to the function
convert_button = gr.Button("Convert")
convert_button.click(
convert_document,
inputs=[file_input, format_input],
outputs=[output_text, output_metadata]
)
app.launch(debug=True) |