passing2961
commited on
Update README.md
Browse files
README.md
CHANGED
@@ -34,6 +34,146 @@ tags:
|
|
34 |
|
35 |
## How to Use
|
36 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
## License and Recommendations
|
38 |
|
39 |
🚨 Ultron-11B is intended to be used for research purposes only.
|
|
|
34 |
|
35 |
## How to Use
|
36 |
|
37 |
+
```python
|
38 |
+
import logging
|
39 |
+
from PIL import Image
|
40 |
+
import torch
|
41 |
+
from transformers import (
|
42 |
+
AutoModelForVision2Seq,
|
43 |
+
BitsAndBytesConfig,
|
44 |
+
AutoProcessor,
|
45 |
+
)
|
46 |
+
|
47 |
+
# Define Ultron template
|
48 |
+
ULTRON_TEMPLATE = 'You are an excellent image sharing system that generates <RET> token with the following image description. The image description must be provided with the following format: <RET> <h> image description </h>. The following conversation is between {name} and AI assistant on {date}. The given image is {name}\'s appearance.\n{dialogue}'
|
49 |
+
|
50 |
+
# Ultron model initialization
|
51 |
+
def load_ultron_model(model_path):
|
52 |
+
"""
|
53 |
+
Loads the Ultron model and processor.
|
54 |
+
|
55 |
+
Args:
|
56 |
+
model_path (str): Path to the pre-trained model.
|
57 |
+
|
58 |
+
Returns:
|
59 |
+
model: Loaded Vision-to-Seq model.
|
60 |
+
processor: Corresponding processor for the model.
|
61 |
+
"""
|
62 |
+
logging.info(f"Loading Ultron model from {model_path}...")
|
63 |
+
quantization_config = BitsAndBytesConfig(
|
64 |
+
load_in_4bit=True,
|
65 |
+
bnb_4bit_compute_dtype=torch.bfloat16,
|
66 |
+
bnb_4bit_use_double_quant=True,
|
67 |
+
bnb_4bit_quant_type='nf4'
|
68 |
+
)
|
69 |
+
model_kwargs = dict(
|
70 |
+
torch_dtype=torch.bfloat16,
|
71 |
+
low_cpu_mem_usage=True,
|
72 |
+
trust_remote_code=True,
|
73 |
+
device_map="auto",
|
74 |
+
)
|
75 |
+
processor = AutoProcessor.from_pretrained(
|
76 |
+
'meta-llama/Llama-3.2-11B-Vision-Instruct', torch_dtype=torch.bfloat16
|
77 |
+
)
|
78 |
+
model = AutoModelForVision2Seq.from_pretrained(
|
79 |
+
model_path,
|
80 |
+
**model_kwargs
|
81 |
+
).eval()
|
82 |
+
logging.info("Ultron model loaded successfully.")
|
83 |
+
return model, processor
|
84 |
+
|
85 |
+
# Run Ultron model
|
86 |
+
def run_ultron_model(model, processor, dialogue, name='Tom', date='2023.04.20', face_image_path='sample_face.png'):
|
87 |
+
"""
|
88 |
+
Runs the Ultron model with a given dialogue, name, and image.
|
89 |
+
|
90 |
+
Args:
|
91 |
+
model: Pre-trained model instance.
|
92 |
+
processor: Processor for model input.
|
93 |
+
dialogue (str): Input dialogue for the assistant.
|
94 |
+
name (str): Name of the user.
|
95 |
+
date (str): Date of the conversation.
|
96 |
+
face_image_path (str): Path to the face image file.
|
97 |
+
|
98 |
+
Returns:
|
99 |
+
str: Description of the shared image.
|
100 |
+
"""
|
101 |
+
logging.info("Running Ultron model...")
|
102 |
+
face_image = Image.open(face_image_path).convert("RGB")
|
103 |
+
|
104 |
+
prompt = ULTRON_TEMPLATE.format(
|
105 |
+
dialogue=dialogue,
|
106 |
+
name=name,
|
107 |
+
date=date
|
108 |
+
)
|
109 |
+
messages = [
|
110 |
+
{
|
111 |
+
"content": [
|
112 |
+
{"text": prompt, "type": "text"},
|
113 |
+
{"type": "image"}
|
114 |
+
],
|
115 |
+
"role": "user"
|
116 |
+
},
|
117 |
+
]
|
118 |
+
|
119 |
+
logging.info("Preparing input for Ultron model...")
|
120 |
+
prompt_input = processor.apply_chat_template(messages, add_generation_prompt=True)
|
121 |
+
inputs = processor(face_image, prompt_input, return_tensors='pt').to('cuda')
|
122 |
+
|
123 |
+
with torch.inference_mode():
|
124 |
+
logging.info("Generating output from Ultron model...")
|
125 |
+
output = model.generate(
|
126 |
+
**inputs,
|
127 |
+
do_sample=True,
|
128 |
+
temperature=0.9,
|
129 |
+
max_new_tokens=512,
|
130 |
+
top_p=1.0,
|
131 |
+
use_cache=True,
|
132 |
+
num_beams=1,
|
133 |
+
)
|
134 |
+
|
135 |
+
output_text = processor.decode(output[0], skip_special_token=True)
|
136 |
+
logging.info("Output generated successfully from Ultron model.")
|
137 |
+
return parse_ultron_output(output_text)
|
138 |
+
|
139 |
+
# Parse Ultron output
|
140 |
+
def parse_ultron_output(output):
|
141 |
+
"""
|
142 |
+
Parses the output to extract the image description.
|
143 |
+
|
144 |
+
Args:
|
145 |
+
output (str): The generated output text from the model.
|
146 |
+
|
147 |
+
Returns:
|
148 |
+
str: Extracted image description.
|
149 |
+
"""
|
150 |
+
logging.info("Parsing output from Ultron model...")
|
151 |
+
if '<RET>' in output:
|
152 |
+
return output.split('<h>')[-1].split('</h>')[0].strip()
|
153 |
+
else:
|
154 |
+
logging.warning("<RET> not found in output.")
|
155 |
+
return output
|
156 |
+
|
157 |
+
# Example usage
|
158 |
+
def main():
|
159 |
+
"""
|
160 |
+
Example usage of Ultron model.
|
161 |
+
"""
|
162 |
+
model_path = "passing2961/Ultron-11B"
|
163 |
+
model, processor = load_ultron_model(model_path)
|
164 |
+
|
165 |
+
dialogue = """Tom: I have so much work at the office, I'm exhausted...
|
166 |
+
Personal AI Assistant: How can I help you feel less tired?
|
167 |
+
Tom: Hmm.. I miss my dog Star at home.
|
168 |
+
Personal AI Assistant: """
|
169 |
+
|
170 |
+
image_description = run_ultron_model(model, processor, dialogue)
|
171 |
+
logging.info(f"Image description generated: {image_description}")
|
172 |
+
|
173 |
+
if __name__ == "__main__":
|
174 |
+
main()
|
175 |
+
```
|
176 |
+
|
177 |
## License and Recommendations
|
178 |
|
179 |
🚨 Ultron-11B is intended to be used for research purposes only.
|