RPW commited on
Commit
0dde4ee
·
verified ·
1 Parent(s): 78e7a1f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -46
app.py CHANGED
@@ -1,55 +1,18 @@
1
- import gradio as gr
2
- from PIL import Image
3
- from transformers import AutoTokenizer, AutoModelForVision2Seq, TextStreamer
4
  import torch
5
 
6
- # Load model and tokenizer
7
  MODEL_NAME = "RPW/NIH-1.2_Llama-3.2-11B-Vision-Instruct"
8
  device = "cuda" if torch.cuda.is_available() else "cpu"
9
 
 
10
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
11
  model = AutoModelForVision2Seq.from_pretrained(MODEL_NAME).to(device)
12
 
13
- # Inference function
14
- def generate_caption(image: Image.Image, instruction: str):
15
- # Prepare input data
16
- messages = [{"role": "user", "content": [
17
- {"type": "image"},
18
- {"type": "text", "text": instruction}
19
- ]}]
20
- input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
21
-
22
- inputs = tokenizer(
23
- image,
24
- input_text,
25
- add_special_tokens=False,
26
- return_tensors="pt"
27
- ).to(device)
28
-
29
- # Text generation
30
- text_streamer = TextStreamer(tokenizer, skip_prompt=True)
31
- output = model.generate(
32
- **inputs, streamer=text_streamer,
33
- max_new_tokens=128,
34
- use_cache=True, temperature=1.5, min_p=0.1
35
- )
36
- return tokenizer.decode(output[0], skip_special_tokens=True)
37
 
38
- # Gradio interface
39
- def gradio_interface(image):
40
- instruction = "You are an expert radiographer. Describe accurately what you see in this image."
41
- caption = generate_caption(image, instruction)
42
- return caption
43
-
44
- # Create Gradio interface
45
- interface = gr.Interface(
46
- fn=gradio_interface,
47
- inputs=gr.Image(type="pil"),
48
- outputs=gr.Textbox(),
49
- live=True,
50
- title="Radiograph Image Captioning",
51
- description="Upload a radiograph image, and the model will generate a caption describing it.",
52
- )
53
-
54
- # Launch the Gradio app
55
- interface.launch()
 
1
+ from transformers import AutoTokenizer, AutoModelForVision2Seq
 
 
2
  import torch
3
 
4
+ # Define the model path
5
  MODEL_NAME = "RPW/NIH-1.2_Llama-3.2-11B-Vision-Instruct"
6
  device = "cuda" if torch.cuda.is_available() else "cpu"
7
 
8
+ # Load the tokenizer and model
9
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
10
  model = AutoModelForVision2Seq.from_pretrained(MODEL_NAME).to(device)
11
 
12
+ # Example usage: Tokenizing some input text
13
+ input_text = "Describe this image."
14
+ inputs = tokenizer(input_text, return_tensors="pt").to(device)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
+ # Generating output
17
+ output = model.generate(**inputs)
18
+ print(tokenizer.decode(output[0], skip_special_tokens=True))