NIH1.2_Llama3.2 / app.py
RPW's picture
Update app.py
e85dbaa verified
raw
history blame
1.73 kB
import gradio as gr
from PIL import Image
from transformers import AutoTokenizer, AutoModelForVision2Seq, TextStreamer
import torch
# Load model and tokenizer
MODEL_NAME = "RPW/NIH-1.2_Llama-3.2-11B-Vision-Instruct"
device = "cuda" if torch.cuda.is_available() else "cpu"
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForVision2Seq.from_pretrained(MODEL_NAME).to(device)
# Inference function
def generate_caption(image: Image.Image, instruction: str):
# Prepare input data
messages = [{"role": "user", "content": [
{"type": "image"},
{"type": "text", "text": instruction}
]}]
input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
inputs = tokenizer(
image,
input_text,
add_special_tokens=False,
return_tensors="pt"
).to(device)
# Text generation
text_streamer = TextStreamer(tokenizer, skip_prompt=True)
output = model.generate(
**inputs, streamer=text_streamer,
max_new_tokens=128,
use_cache=True, temperature=1.5, min_p=0.1
)
return tokenizer.decode(output[0], skip_special_tokens=True)
# Gradio interface
def gradio_interface(image):
instruction = "You are an expert radiographer. Describe accurately what you see in this image."
caption = generate_caption(image, instruction)
return caption
# Create Gradio interface
interface = gr.Interface(
fn=gradio_interface,
inputs=gr.Image(type="pil"),
outputs=gr.Textbox(),
live=True,
title="Radiograph Image Captioning",
description="Upload a radiograph image, and the model will generate a caption describing it.",
)
# Launch the Gradio app
interface.launch()