Spaces:

RPW
/

NIH1.2_Llama3.2

Runtime error

App Files Files Community

RPW commited on Dec 16, 2024

Commit

e85dbaa

verified ·

1 Parent(s): 70fbe17

Update app.py

Browse files

Files changed (1) hide show

app.py +55 -39

app.py CHANGED Viewed

@@ -1,39 +1,55 @@
-import gradio as gr
-from transformers import pipeline
-from PIL import Image
-# โหลดโมเดลและ pipeline
-model, tokenizer = FastVisionModel.from_pretrained(
-    "unsloth/Llama-3.2-11B-Vision-Instruct",
-    load_in_4bit=True,
-    use_gradient_checkpointing="unsloth",
-    device_map="auto"
-)
-vision_pipeline = pipeline(
-    "image-to-text",
-    model=model,
-    tokenizer=tokenizer,
-    device=0
-)
-# ฟังก์ชันสำหรับ Gradio
-def analyze_image(image, instruction):
-    result = vision_pipeline(images=image, instruction=instruction, max_new_tokens=128)
-    return result[0]["generated_text"]
-# สร้าง UI ด้วย Gradio
-interface = gr.Interface(
-    fn=analyze_image,
-    inputs=[
-        gr.Image(type="pil"),
-        gr.Textbox(label="Instruction", value="You are an expert radiographer. Describe accurately what you see in this image.")
-    ],
-    outputs=gr.Textbox(label="Generated Text"),
-    title="Medical Image Analysis",
-    description="Analyze X-ray images and generate a diagnostic report."
-)
-# รันแอป
-if __name__ == "__main__":
-    interface.launch()

+import gradio as gr
+from PIL import Image
+from transformers import AutoTokenizer, AutoModelForVision2Seq, TextStreamer
+import torch
+# Load model and tokenizer
+MODEL_NAME = "RPW/NIH-1.2_Llama-3.2-11B-Vision-Instruct"
+device = "cuda" if torch.cuda.is_available() else "cpu"
+tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
+model = AutoModelForVision2Seq.from_pretrained(MODEL_NAME).to(device)
+# Inference function
+def generate_caption(image: Image.Image, instruction: str):
+    # Prepare input data
+    messages = [{"role": "user", "content": [
+        {"type": "image"},
+        {"type": "text", "text": instruction}
+    ]}]
+    input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
+    inputs = tokenizer(
+        image,
+        input_text,
+        add_special_tokens=False,
+        return_tensors="pt"
+    ).to(device)
+    # Text generation
+    text_streamer = TextStreamer(tokenizer, skip_prompt=True)
+    output = model.generate(
+        **inputs, streamer=text_streamer,
+        max_new_tokens=128,
+        use_cache=True, temperature=1.5, min_p=0.1
+    )
+    return tokenizer.decode(output[0], skip_special_tokens=True)
+# Gradio interface
+def gradio_interface(image):
+    instruction = "You are an expert radiographer. Describe accurately what you see in this image."
+    caption = generate_caption(image, instruction)
+    return caption
+# Create Gradio interface
+interface = gr.Interface(
+    fn=gradio_interface,
+    inputs=gr.Image(type="pil"),
+    outputs=gr.Textbox(),
+    live=True,
+    title="Radiograph Image Captioning",
+    description="Upload a radiograph image, and the model will generate a caption describing it.",
+)
+# Launch the Gradio app
+interface.launch()