Spaces:

RPW
/

NIH1.2_Llama3.2

Runtime error

App Files Files Community

NIH1.2_Llama3.2 / app.py

RPW

Update app.py

e85dbaa verified about 1 month ago

raw

history blame

1.73 kB

	import gradio as gr
	from PIL import Image
	from transformers import AutoTokenizer, AutoModelForVision2Seq, TextStreamer
	import torch

	# Load model and tokenizer
	MODEL_NAME = "RPW/NIH-1.2_Llama-3.2-11B-Vision-Instruct"
	device = "cuda" if torch.cuda.is_available() else "cpu"

	tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
	model = AutoModelForVision2Seq.from_pretrained(MODEL_NAME).to(device)

	# Inference function
	def generate_caption(image: Image.Image, instruction: str):
	# Prepare input data
	messages = [{"role": "user", "content": [
	{"type": "image"},
	{"type": "text", "text": instruction}
	]}]
	input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)

	inputs = tokenizer(
	image,
	input_text,
	add_special_tokens=False,
	return_tensors="pt"
	).to(device)

	# Text generation
	text_streamer = TextStreamer(tokenizer, skip_prompt=True)
	output = model.generate(
	**inputs, streamer=text_streamer,
	max_new_tokens=128,
	use_cache=True, temperature=1.5, min_p=0.1
	)
	return tokenizer.decode(output[0], skip_special_tokens=True)

	# Gradio interface
	def gradio_interface(image):
	instruction = "You are an expert radiographer. Describe accurately what you see in this image."
	caption = generate_caption(image, instruction)
	return caption

	# Create Gradio interface
	interface = gr.Interface(
	fn=gradio_interface,
	inputs=gr.Image(type="pil"),
	outputs=gr.Textbox(),
	live=True,
	title="Radiograph Image Captioning",
	description="Upload a radiograph image, and the model will generate a caption describing it.",
	)

	# Launch the Gradio app
	interface.launch()