Spaces:
Runtime error
Runtime error
import gradio as gr | |
from PIL import Image | |
from transformers import AutoTokenizer, AutoModelForVision2Seq, TextStreamer | |
import torch | |
# Load model and tokenizer | |
MODEL_NAME = "RPW/NIH-1.2_Llama-3.2-11B-Vision-Instruct" | |
device = "cuda" if torch.cuda.is_available() else "cpu" | |
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) | |
model = AutoModelForVision2Seq.from_pretrained(MODEL_NAME).to(device) | |
# Inference function | |
def generate_caption(image: Image.Image, instruction: str): | |
# Prepare input data | |
messages = [{"role": "user", "content": [ | |
{"type": "image"}, | |
{"type": "text", "text": instruction} | |
]}] | |
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True) | |
inputs = tokenizer( | |
image, | |
input_text, | |
add_special_tokens=False, | |
return_tensors="pt" | |
).to(device) | |
# Text generation | |
text_streamer = TextStreamer(tokenizer, skip_prompt=True) | |
output = model.generate( | |
**inputs, streamer=text_streamer, | |
max_new_tokens=128, | |
use_cache=True, temperature=1.5, min_p=0.1 | |
) | |
return tokenizer.decode(output[0], skip_special_tokens=True) | |
# Gradio interface | |
def gradio_interface(image): | |
instruction = "You are an expert radiographer. Describe accurately what you see in this image." | |
caption = generate_caption(image, instruction) | |
return caption | |
# Create Gradio interface | |
interface = gr.Interface( | |
fn=gradio_interface, | |
inputs=gr.Image(type="pil"), | |
outputs=gr.Textbox(), | |
live=True, | |
title="Radiograph Image Captioning", | |
description="Upload a radiograph image, and the model will generate a caption describing it.", | |
) | |
# Launch the Gradio app | |
interface.launch() | |