Maverick17
commited on
Update README.md
Browse filesAdded sample description for inference
README.md
CHANGED
@@ -16,17 +16,68 @@ should probably proofread and complete it, then remove this comment. -->
|
|
16 |
|
17 |
This model is a fine-tuned version of [HuggingFaceM4/Idefics3-8B-Llama3](https://huggingface.co/HuggingFaceM4/Idefics3-8B-Llama3) on https://huggingface.co/datasets/Agent-Eval-Refine/GUI-Dense-Descriptions dataset
|
18 |
|
19 |
-
##
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
30 |
|
31 |
## Training procedure
|
32 |
|
@@ -44,10 +95,6 @@ The following hyperparameters were used during training:
|
|
44 |
- lr_scheduler_warmup_steps: 50
|
45 |
- num_epochs: 1
|
46 |
|
47 |
-
### Training results
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
### Framework versions
|
52 |
|
53 |
- PEFT 0.13.0
|
|
|
16 |
|
17 |
This model is a fine-tuned version of [HuggingFaceM4/Idefics3-8B-Llama3](https://huggingface.co/HuggingFaceM4/Idefics3-8B-Llama3) on https://huggingface.co/datasets/Agent-Eval-Refine/GUI-Dense-Descriptions dataset
|
18 |
|
19 |
+
## Intended usage
|
20 |
+
|
21 |
+
```python
|
22 |
+
from peft import PeftModel
|
23 |
+
from transformers import AutoProcessor, Idefics3ForConditionalGeneration
|
24 |
+
from transformers.image_utils import load_image
|
25 |
+
import torch
|
26 |
+
|
27 |
+
adapter_path = "Maverick17/idefics3-llama-gui-dense-descriptions"
|
28 |
+
base_model_id = "HuggingFaceM4/Idefics3-8B-Llama3"
|
29 |
+
|
30 |
+
# Load Model base model
|
31 |
+
model = Idefics3ForConditionalGeneration.from_pretrained(
|
32 |
+
base_model_id,
|
33 |
+
_attn_implementation="flash_attention_2",
|
34 |
+
device_map="auto",
|
35 |
+
torch_dtype=torch.bfloat16,
|
36 |
+
)
|
37 |
+
|
38 |
+
# Merge LoRA and base model
|
39 |
+
peft_model = PeftModel.from_pretrained(model, adapter_path)
|
40 |
+
merged_model = peft_model.merge_and_unload()
|
41 |
+
|
42 |
+
processor = AutoProcessor.from_pretrained(base_model_id)
|
43 |
+
|
44 |
+
image = load_image("path/to/ui/image.png")
|
45 |
+
|
46 |
+
# Create inputs
|
47 |
+
messages = [
|
48 |
+
{
|
49 |
+
"role": "user",
|
50 |
+
"content": [
|
51 |
+
{"type": "image"},
|
52 |
+
{
|
53 |
+
"type": "text",
|
54 |
+
"text": "Provide a detailed description of the image.",
|
55 |
+
},
|
56 |
+
],
|
57 |
+
},
|
58 |
+
]
|
59 |
+
|
60 |
+
prompt = processor.apply_chat_template(messages, add_generation_prompt=True)
|
61 |
+
inputs = processor(text=prompt, images=[image], return_tensors="pt")
|
62 |
+
inputs = {k: v.to("cuda") for k, v in inputs.items()}
|
63 |
+
|
64 |
+
generation_args = {
|
65 |
+
"max_new_tokens": 1024,
|
66 |
+
"repetition_penalty": 1,
|
67 |
+
}
|
68 |
+
|
69 |
+
generation_args["do_sample"] = False
|
70 |
+
generation_args.update(inputs)
|
71 |
+
|
72 |
+
# Generate
|
73 |
+
generated_ids = model.generate(**generation_args)
|
74 |
+
|
75 |
+
generated_texts = processor.batch_decode(
|
76 |
+
generated_ids[:, generation_args["input_ids"].size(1) :], skip_special_tokens=True
|
77 |
+
)
|
78 |
+
|
79 |
+
print(generated_texts[0].strip())
|
80 |
+
```
|
81 |
|
82 |
## Training procedure
|
83 |
|
|
|
95 |
- lr_scheduler_warmup_steps: 50
|
96 |
- num_epochs: 1
|
97 |
|
|
|
|
|
|
|
|
|
98 |
### Framework versions
|
99 |
|
100 |
- PEFT 0.13.0
|