import gradio as gr
from transformers import TrOCRProcessor, VisionEncoderDecoderModel
from PIL import Image

# Load model and processor
processor = TrOCRProcessor.from_pretrained("Pisethan/sangapac_ocr")
model = VisionEncoderDecoderModel.from_pretrained("Pisethan/sangapac_ocr")

# Define the prediction function
def recognize_text(image_path):
    image = Image.open(image_path).convert("RGB")
    inputs = processor(images=image, return_tensors="pt").pixel_values
    outputs = model.generate(inputs)
    predicted_text = processor.tokenizer.decode(outputs[0], skip_special_tokens=True)
    return predicted_text

# Gradio Interface
interface = gr.Interface(
    fn=recognize_text,
    inputs=gr.Image(type="filepath", label="Upload an Image"),
    outputs=gr.Textbox(label="Recognized Text"),
    title="OCR Model Demo",
    description="Upload an image containing text to test the OCR model.",
)

# Launch the app
interface.launch()