RPW commited on
Commit
e85dbaa
·
verified ·
1 Parent(s): 70fbe17

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +55 -39
app.py CHANGED
@@ -1,39 +1,55 @@
1
- import gradio as gr
2
- from transformers import pipeline
3
- from PIL import Image
4
-
5
- # โหลดโมเดลและ pipeline
6
- model, tokenizer = FastVisionModel.from_pretrained(
7
- "unsloth/Llama-3.2-11B-Vision-Instruct",
8
- load_in_4bit=True,
9
- use_gradient_checkpointing="unsloth",
10
- device_map="auto"
11
- )
12
-
13
- vision_pipeline = pipeline(
14
- "image-to-text",
15
- model=model,
16
- tokenizer=tokenizer,
17
- device=0
18
- )
19
-
20
- # ฟังก์ชันสำหรับ Gradio
21
- def analyze_image(image, instruction):
22
- result = vision_pipeline(images=image, instruction=instruction, max_new_tokens=128)
23
- return result[0]["generated_text"]
24
-
25
- # สร้าง UI ด้วย Gradio
26
- interface = gr.Interface(
27
- fn=analyze_image,
28
- inputs=[
29
- gr.Image(type="pil"),
30
- gr.Textbox(label="Instruction", value="You are an expert radiographer. Describe accurately what you see in this image.")
31
- ],
32
- outputs=gr.Textbox(label="Generated Text"),
33
- title="Medical Image Analysis",
34
- description="Analyze X-ray images and generate a diagnostic report."
35
- )
36
-
37
- # รันแอป
38
- if __name__ == "__main__":
39
- interface.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from PIL import Image
3
+ from transformers import AutoTokenizer, AutoModelForVision2Seq, TextStreamer
4
+ import torch
5
+
6
+ # Load model and tokenizer
7
+ MODEL_NAME = "RPW/NIH-1.2_Llama-3.2-11B-Vision-Instruct"
8
+ device = "cuda" if torch.cuda.is_available() else "cpu"
9
+
10
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
11
+ model = AutoModelForVision2Seq.from_pretrained(MODEL_NAME).to(device)
12
+
13
+ # Inference function
14
+ def generate_caption(image: Image.Image, instruction: str):
15
+ # Prepare input data
16
+ messages = [{"role": "user", "content": [
17
+ {"type": "image"},
18
+ {"type": "text", "text": instruction}
19
+ ]}]
20
+ input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
21
+
22
+ inputs = tokenizer(
23
+ image,
24
+ input_text,
25
+ add_special_tokens=False,
26
+ return_tensors="pt"
27
+ ).to(device)
28
+
29
+ # Text generation
30
+ text_streamer = TextStreamer(tokenizer, skip_prompt=True)
31
+ output = model.generate(
32
+ **inputs, streamer=text_streamer,
33
+ max_new_tokens=128,
34
+ use_cache=True, temperature=1.5, min_p=0.1
35
+ )
36
+ return tokenizer.decode(output[0], skip_special_tokens=True)
37
+
38
+ # Gradio interface
39
+ def gradio_interface(image):
40
+ instruction = "You are an expert radiographer. Describe accurately what you see in this image."
41
+ caption = generate_caption(image, instruction)
42
+ return caption
43
+
44
+ # Create Gradio interface
45
+ interface = gr.Interface(
46
+ fn=gradio_interface,
47
+ inputs=gr.Image(type="pil"),
48
+ outputs=gr.Textbox(),
49
+ live=True,
50
+ title="Radiograph Image Captioning",
51
+ description="Upload a radiograph image, and the model will generate a caption describing it.",
52
+ )
53
+
54
+ # Launch the Gradio app
55
+ interface.launch()