Spaces:
Running
on
Zero
Running
on
Zero
import gradio as gr | |
from docling.document_converter import DocumentConverter | |
import spaces | |
def convert_document(file, output_format): | |
# Load document and convert it using Docling | |
converter = DocumentConverter() | |
result = converter.convert(file.name) | |
# Check available attributes in DoclingDocument | |
available_attributes = dir(result.document) | |
# Choose the output format | |
if output_format == "Markdown": | |
converted_text = result.document.export_to_markdown() | |
elif output_format == "JSON": | |
converted_text = result.document.export_to_json() | |
else: | |
converted_text = "Unsupported format" | |
# Placeholder metadata extraction based on available attributes | |
metadata = { | |
"Available Attributes": available_attributes | |
} | |
return converted_text, metadata | |
# Define the Gradio interface using the new component syntax | |
with gr.Blocks() as app: | |
gr.Markdown("# Document Converter with Docling") | |
gr.Markdown("Upload a document, choose the output format, and get the converted text with metadata.") | |
file_input = gr.File(label="Upload Document") | |
format_input = gr.Radio(["Markdown", "JSON"], label="Choose Output Format") | |
output_text = gr.Textbox(label="Converted Document") | |
output_metadata = gr.JSON(label="Metadata") | |
# Define the process button and bind it to the function | |
convert_button = gr.Button("Convert") | |
convert_button.click( | |
convert_document, | |
inputs=[file_input, format_input], | |
outputs=[output_text, output_metadata] | |
) | |
app.launch(debug=True) |