Spaces:

Hammedalmodel
/

handwritten_to_text

Sleeping

App Files Files Community

handwritten_to_text / app.py

Hammedalmodel

Update app.py

0e323a0 verified 12 days ago

raw

history blame contribute delete

2.04 kB

	from transformers import MllamaForConditionalGeneration, AutoProcessor
	from PIL import Image
	import torch
	import gradio as gr
	import spaces

	# Initialize model and processor
	ckpt = "unsloth/Llama-3.2-11B-Vision-Instruct"
	model = MllamaForConditionalGeneration.from_pretrained(
	ckpt,
	torch_dtype=torch.bfloat16
	).to("cuda")
	processor = AutoProcessor.from_pretrained(ckpt)

	@spaces.GPU
	def extract_text(image):
	# Convert image to RGB
	image = Image.open(image).convert("RGB")

	# Create message structure
	messages = [
	{
	"role": "user",
	"content": [
	{"type": "text", "text": "Extract handwritten text from the image and output only the extracted text without any additional description or commentary in output"},
	{"type": "image"}
	]
	}
	]

	# Process input
	texts = processor.apply_chat_template(messages, add_generation_prompt=True)
	inputs = processor(text=texts, images=[image], return_tensors="pt").to("cuda")


	# Generate output
	outputs = model.generate(**inputs, max_new_tokens=250)
	result = processor.decode(outputs[0], skip_special_tokens=True)

	print(result)

	# Clean up the output to remove the prompt and assistant text
	if "assistant" in result.lower():
	result = result[result.lower().find("assistant") + len("assistant"):].strip()

	# Remove any remaining conversation markers
	result = result.replace("user", "").replace("Extract handwritten text from the image and output only the extracted text without any additional description or commentary in output", "").strip()

	print(result)

	return result

	# Create Gradio interface
	demo = gr.Interface(
	fn=extract_text,
	inputs=gr.Image(type="filepath", label="Upload Image"),
	outputs=gr.Textbox(label="Extracted Text"),
	title="Handwritten Text Extractor",
	description="Upload an image containing handwritten text to extract its content.",
	)

	# Launch the app
	demo.launch(debug=True)