passing2961
/

Ultron-11B

Safetensors

English

mllama

conversational ai

Model card Files Files and versions Community

passing2961 commited on Nov 6, 2024

Commit

dec7b74

verified ·

1 Parent(s): d1bcbec

Update README.md

Browse files

Files changed (1) hide show

README.md +140 -0

README.md CHANGED Viewed

@@ -34,6 +34,146 @@ tags:
 ## How to Use
 ## License and Recommendations
 🚨 Ultron-11B is intended to be used for research purposes only.

 ## How to Use
+```python
+import logging
+from PIL import Image
+import torch
+from transformers import (
+    AutoModelForVision2Seq,
+    BitsAndBytesConfig,
+    AutoProcessor,
+)
+# Define Ultron template
+ULTRON_TEMPLATE = 'You are an excellent image sharing system that generates <RET> token with the following image description. The image description must be provided with the following format: <RET> <h> image description </h>. The following conversation is between {name} and AI assistant on {date}. The given image is {name}\'s appearance.\n{dialogue}'
+# Ultron model initialization
+def load_ultron_model(model_path):
+    """
+    Loads the Ultron model and processor.
+    Args:
+        model_path (str): Path to the pre-trained model.
+    Returns:
+        model: Loaded Vision-to-Seq model.
+        processor: Corresponding processor for the model.
+    """
+    logging.info(f"Loading Ultron model from {model_path}...")
+    quantization_config = BitsAndBytesConfig(
+        load_in_4bit=True,
+        bnb_4bit_compute_dtype=torch.bfloat16,
+        bnb_4bit_use_double_quant=True,
+        bnb_4bit_quant_type='nf4'
+    )
+    model_kwargs = dict(
+        torch_dtype=torch.bfloat16,
+        low_cpu_mem_usage=True,
+        trust_remote_code=True,
+        device_map="auto",
+    )
+    processor = AutoProcessor.from_pretrained(
+        'meta-llama/Llama-3.2-11B-Vision-Instruct', torch_dtype=torch.bfloat16
+    )
+    model = AutoModelForVision2Seq.from_pretrained(
+        model_path,
+        **model_kwargs
+    ).eval()
+    logging.info("Ultron model loaded successfully.")
+    return model, processor
+# Run Ultron model
+def run_ultron_model(model, processor, dialogue, name='Tom', date='2023.04.20', face_image_path='sample_face.png'):
+    """
+    Runs the Ultron model with a given dialogue, name, and image.
+    Args:
+        model: Pre-trained model instance.
+        processor: Processor for model input.
+        dialogue (str): Input dialogue for the assistant.
+        name (str): Name of the user.
+        date (str): Date of the conversation.
+        face_image_path (str): Path to the face image file.
+    Returns:
+        str: Description of the shared image.
+    """
+    logging.info("Running Ultron model...")
+    face_image = Image.open(face_image_path).convert("RGB")
+    prompt = ULTRON_TEMPLATE.format(
+        dialogue=dialogue,
+        name=name,
+        date=date
+    )
+    messages = [
+        {
+            "content": [
+                {"text": prompt, "type": "text"},
+                {"type": "image"}
+            ],
+            "role": "user"
+        },
+    ]
+    logging.info("Preparing input for Ultron model...")
+    prompt_input = processor.apply_chat_template(messages, add_generation_prompt=True)
+    inputs = processor(face_image, prompt_input, return_tensors='pt').to('cuda')
+    with torch.inference_mode():
+        logging.info("Generating output from Ultron model...")
+        output = model.generate(
+            **inputs,
+            do_sample=True,
+            temperature=0.9,
+            max_new_tokens=512,
+            top_p=1.0,
+            use_cache=True,
+            num_beams=1,
+        )
+    output_text = processor.decode(output[0], skip_special_token=True)
+    logging.info("Output generated successfully from Ultron model.")
+    return parse_ultron_output(output_text)
+# Parse Ultron output
+def parse_ultron_output(output):
+    """
+    Parses the output to extract the image description.
+    Args:
+        output (str): The generated output text from the model.
+    Returns:
+        str: Extracted image description.
+    """
+    logging.info("Parsing output from Ultron model...")
+    if '<RET>' in output:
+        return output.split('<h>')[-1].split('</h>')[0].strip()
+    else:
+        logging.warning("<RET> not found in output.")
+        return output
+# Example usage
+def main():
+    """
+    Example usage of Ultron model.
+    """
+    model_path = "passing2961/Ultron-11B"
+    model, processor = load_ultron_model(model_path)
+    dialogue = """Tom: I have so much work at the office, I'm exhausted...
+    Personal AI Assistant: How can I help you feel less tired?
+    Tom: Hmm.. I miss my dog Star at home.
+    Personal AI Assistant: """
+    image_description = run_ultron_model(model, processor, dialogue)
+    logging.info(f"Image description generated: {image_description}")
+if __name__ == "__main__":
+    main()
+```
 ## License and Recommendations
 🚨 Ultron-11B is intended to be used for research purposes only.