(Trained with Unsloth)

Browse files

Files changed (8) hide show

added_tokens.json +16 -0
chat_template.json +1 -1
config.json +39 -217
generation_config.json +8 -7
merges.txt +0 -0
model.safetensors.index.json +0 -0
preprocessor_config.json +11 -8
vocab.json +0 -0

added_tokens.json ADDED Viewed

	@@ -0,0 +1,16 @@

+{
+  "<|box_end|>": 151649,
+  "<|box_start|>": 151648,
+  "<|endoftext|>": 151643,
+  "<|im_end|>": 151645,
+  "<|im_start|>": 151644,
+  "<|image_pad|>": 151655,
+  "<|object_ref_end|>": 151647,
+  "<|object_ref_start|>": 151646,
+  "<|quad_end|>": 151651,
+  "<|quad_start|>": 151650,
+  "<|video_pad|>": 151656,
+  "<|vision_end|>": 151653,
+  "<|vision_pad|>": 151654,
+  "<|vision_start|>": 151652
+}

chat_template.json CHANGED Viewed

@@ -1,3 +1,3 @@
 {
-  "chat_template": "{{- bos_token }}\n{%- if custom_tools is defined %}\n    {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n    {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not date_string is defined %}\n    {%- if strftime_now is defined %}\n        {%- set date_string = strftime_now(\"%d %b %Y\") %}\n    {%- else %}\n        {%- set date_string = \"26 Jul 2024\" %}\n    {%- endif %}\n{%- endif %}\n{%- if not tools is defined %}\n    {%- set tools = none %}\n{%- endif %}\n\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0]['role'] == 'system' %}\n    {%- set system_message = messages[0]['content']|trim %}\n    {%- set messages = messages[1:] %}\n{%- else %}\n    {%- set system_message = \"\" %}\n{%- endif %}\n\n{#- Find out if there are any images #}\n{% set image_ns = namespace(has_images=false) %}      \n{%- for message in messages %}\n    {%- for content in message['content'] %}\n        {%- if content['type'] == 'image' %}\n            {%- set image_ns.has_images = true %}\n        {%- endif %}\n    {%- endfor %}\n{%- endfor %}\n\n{#- Error out if there are images and system message #}\n{%- if image_ns.has_images and not system_message == \"\" %}\n    {{- raise_exception(\"Prompting with images is incompatible with system messages.\") }}\n{%- endif %}\n\n{#- System message if there are no images #}\n{%- if not image_ns.has_images %}\n    {{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n    {%- if tools is not none %}\n        {{- \"Environment: ipython\\n\" }}\n    {%- endif %}\n    {{- \"Cutting Knowledge Date: December 2023\\n\" }}\n    {{- \"Today Date: \" + date_string + \"\\n\\n\" }}\n    {%- if tools is not none and not tools_in_user_message %}\n        {{- \"You have access to the following functions. To call a function, please respond with JSON for a function call.\" }}\n        {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n        {{- \"Do not use variables.\\n\\n\" }}\n        {%- for t in tools %}\n            {{- t | tojson(indent=4) }}\n            {{- \"\\n\\n\" }}\n        {%- endfor %}\n    {%- endif %}\n    {{- system_message }}\n    {{- \"<|eot_id|>\" }}\n{%- endif %}\n\n{#- Custom tools are passed in a user message with some extra guidance #}\n{%- if tools_in_user_message and not tools is none %}\n    {#- Extract the first user message so we can plug it in here #}\n    {%- if messages | length != 0 %}\n        {%- set first_user_message = messages[0]['content']|trim %}\n        {%- set messages = messages[1:] %}\n    {%- else %}\n        {{- raise_exception(\"Cannot put tools in the first user message when there's no first user message!\") }}\n{%- endif %}\n    {{- '<|start_header_id|>user<|end_header_id|>\\n\\n' -}}\n    {{- \"Given the following functions, please respond with a JSON for a function call \" }}\n    {{- \"with its proper arguments that best answers the given prompt.\\n\\n\" }}\n    {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n    {{- \"Do not use variables.\\n\\n\" }}\n    {%- for t in tools %}\n        {{- t | tojson(indent=4) }}\n        {{- \"\\n\\n\" }}\n    {%- endfor %}\n    {{- first_user_message + \"<|eot_id|>\"}}\n{%- endif %}\n\n{%- for message in messages %}\n    {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n    {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n' }}\n        {%- if message['content'] is string %}\n            {{- message['content'] }}\n        {%- else %}\n            {%- for content in message['content'] %}\n                {%- if content['type'] == 'image' %}\n                    {{- '<|image|>' }}\n                {%- elif content['type'] == 'text' %}\n                    {{- content['text'] }}\n                {%- endif %}\n            {%- endfor %}\n        {%- endif %}\n        {{- '<|eot_id|>' }}\n    {%- elif 'tool_calls' in message %}\n        {%- if not message.tool_calls|length == 1 %}\n            {{- raise_exception(\"This model only supports single tool-calls at once!\") }}\n        {%- endif %}\n        {%- set tool_call = message.tool_calls[0].function %}\n        {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n        {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n        {{- '\"parameters\": ' }}\n        {{- tool_call.arguments | tojson }}\n        {{- \"}\" }}\n        {{- \"<|eot_id|>\" }}\n    {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n        {{- \"<|start_header_id|>ipython<|end_header_id|>\\n\\n\" }}\n        {%- if message.content is mapping or message.content is iterable %}\n            {{- message.content | tojson }}\n        {%- else %}\n            {{- message.content }}\n        {%- endif %}\n        {{- \"<|eot_id|>\" }}\n    {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n    {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}\n"
 }

 {
+  "chat_template": "{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n{% endif %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}<|im_end|>\n{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}"
 }

config.json CHANGED Viewed

@@ -1,227 +1,49 @@
 {
-  "_name_or_path": "meta-llama/Llama-3.2-11B-Vision-Instruct",
   "architectures": [
-    "MllamaForConditionalGeneration"
   ],
-  "image_token_index": 128256,
-  "model_type": "mllama",
-  "text_config": {
-    "_attn_implementation_autoset": false,
-    "_name_or_path": "",
-    "add_cross_attention": false,
-    "architectures": null,
-    "bad_words_ids": null,
-    "begin_suppress_tokens": null,
-    "bos_token_id": 128000,
-    "chunk_size_feed_forward": 0,
-    "cross_attention_hidden_size": null,
-    "cross_attention_layers": [
-      3,
-      8,
-      13,
-      18,
-      23,
-      28,
-      33,
-      38
     ],
-    "decoder_start_token_id": null,
-    "diversity_penalty": 0.0,
-    "do_sample": false,
-    "dropout": 0,
-    "early_stopping": false,
-    "encoder_no_repeat_ngram_size": 0,
-    "eos_token_id": [
-      128001,
-      128008,
-      128009
-    ],
-    "exponential_decay_length_penalty": null,
-    "finetuning_task": null,
-    "forced_bos_token_id": null,
-    "forced_eos_token_id": null,
-    "hidden_act": "silu",
-    "hidden_size": 4096,
-    "id2label": {
-      "0": "LABEL_0",
-      "1": "LABEL_1"
-    },
-    "initializer_range": 0.02,
-    "intermediate_size": 14336,
-    "is_decoder": false,
-    "is_encoder_decoder": false,
-    "label2id": {
-      "LABEL_0": 0,
-      "LABEL_1": 1
-    },
-    "length_penalty": 1.0,
-    "max_length": 20,
-    "max_position_embeddings": 131072,
-    "min_length": 0,
-    "model_type": "mllama_text_model",
-    "no_repeat_ngram_size": 0,
-    "num_attention_heads": 32,
-    "num_beam_groups": 1,
-    "num_beams": 1,
-    "num_hidden_layers": 40,
-    "num_key_value_heads": 8,
-    "num_return_sequences": 1,
-    "output_attentions": false,
-    "output_hidden_states": false,
-    "output_scores": false,
-    "pad_token_id": 128004,
-    "prefix": null,
-    "problem_type": null,
-    "pruned_heads": {},
-    "remove_invalid_values": false,
-    "repetition_penalty": 1.0,
-    "return_dict": true,
-    "return_dict_in_generate": false,
-    "rms_norm_eps": 1e-05,
-    "rope_scaling": {
-      "factor": 8.0,
-      "high_freq_factor": 4.0,
-      "low_freq_factor": 1.0,
-      "original_max_position_embeddings": 8192,
-      "rope_type": "llama3"
-    },
-    "rope_theta": 500000.0,
-    "sep_token_id": null,
-    "suppress_tokens": null,
-    "task_specific_params": null,
-    "temperature": 1.0,
-    "tf_legacy_loss": false,
-    "tie_encoder_decoder": false,
-    "tie_word_embeddings": false,
-    "tokenizer_class": null,
-    "top_k": 50,
-    "top_p": 1.0,
-    "torch_dtype": "bfloat16",
-    "torchscript": false,
-    "typical_p": 1.0,
-    "use_bfloat16": false,
-    "use_cache": true,
-    "vocab_size": 128256
   },
   "torch_dtype": "bfloat16",
   "transformers_version": "4.46.3",
   "unsloth_fixed": true,
   "vision_config": {
-    "_attn_implementation_autoset": false,
-    "_name_or_path": "",
-    "add_cross_attention": false,
-    "architectures": null,
-    "attention_heads": 16,
-    "bad_words_ids": null,
-    "begin_suppress_tokens": null,
-    "bos_token_id": null,
-    "chunk_size_feed_forward": 0,
-    "cross_attention_hidden_size": null,
-    "decoder_start_token_id": null,
-    "diversity_penalty": 0.0,
-    "do_sample": false,
-    "early_stopping": false,
-    "encoder_no_repeat_ngram_size": 0,
-    "eos_token_id": null,
-    "exponential_decay_length_penalty": null,
-    "finetuning_task": null,
-    "forced_bos_token_id": null,
-    "forced_eos_token_id": null,
-    "hidden_act": "gelu",
-    "hidden_size": 1280,
-    "id2label": {
-      "0": "LABEL_0",
-      "1": "LABEL_1"
-    },
-    "image_size": 560,
-    "initializer_range": 0.02,
-    "intermediate_layers_indices": [
-      3,
-      7,
-      15,
-      23,
-      30
-    ],
-    "intermediate_size": 5120,
-    "is_decoder": false,
-    "is_encoder_decoder": false,
-    "label2id": {
-      "LABEL_0": 0,
-      "LABEL_1": 1
-    },
-    "length_penalty": 1.0,
-    "max_length": 20,
-    "max_num_tiles": 4,
-    "min_length": 0,
-    "model_type": "mllama_vision_model",
-    "no_repeat_ngram_size": 0,
-    "norm_eps": 1e-05,
-    "num_beam_groups": 1,
-    "num_beams": 1,
-    "num_channels": 3,
-    "num_global_layers": 8,
-    "num_hidden_layers": 32,
-    "num_return_sequences": 1,
-    "output_attentions": false,
-    "output_hidden_states": false,
-    "output_scores": false,
-    "pad_token_id": null,
-    "patch_size": 14,
-    "prefix": null,
-    "problem_type": null,
-    "pruned_heads": {},
-    "remove_invalid_values": false,
-    "repetition_penalty": 1.0,
-    "return_dict": true,
-    "return_dict_in_generate": false,
-    "sep_token_id": null,
-    "supported_aspect_ratios": [
-      [
-        1,
-        1
-      ],
-      [
-        1,
-        2
-      ],
-      [
-        1,
-        3
-      ],
-      [
-        1,
-        4
-      ],
-      [
-        2,
-        1
-      ],
-      [
-        2,
-        2
-      ],
-      [
-        3,
-        1
-      ],
-      [
-        4,
-        1
-      ]
-    ],
-    "suppress_tokens": null,
-    "task_specific_params": null,
-    "temperature": 1.0,
-    "tf_legacy_loss": false,
-    "tie_encoder_decoder": false,
-    "tie_word_embeddings": true,
-    "tokenizer_class": null,
-    "top_k": 50,
-    "top_p": 1.0,
-    "torch_dtype": "bfloat16",
-    "torchscript": false,
-    "typical_p": 1.0,
-    "use_bfloat16": false,
-    "vision_output_dim": 7680
-  }
 }

 {
+  "_name_or_path": "Qwen/Qwen2-VL-7B-Instruct",
   "architectures": [
+    "Qwen2VLForConditionalGeneration"
   ],
+  "attention_dropout": 0.0,
+  "bos_token_id": 151643,
+  "eos_token_id": 151645,
+  "hidden_act": "silu",
+  "hidden_size": 3584,
+  "image_token_id": 151655,
+  "initializer_range": 0.02,
+  "intermediate_size": 18944,
+  "max_position_embeddings": 32768,
+  "max_window_layers": 28,
+  "model_type": "qwen2_vl",
+  "num_attention_heads": 28,
+  "num_hidden_layers": 28,
+  "num_key_value_heads": 4,
+  "pad_token_id": 151654,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": {
+    "mrope_section": [
+      16,
+      24,
+      24
     ],
+    "rope_type": "default",
+    "type": "default"
   },
+  "rope_theta": 1000000.0,
+  "sliding_window": 32768,
+  "tie_word_embeddings": false,
   "torch_dtype": "bfloat16",
   "transformers_version": "4.46.3",
   "unsloth_fixed": true,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "video_token_id": 151656,
   "vision_config": {
+    "in_chans": 3,
+    "model_type": "qwen2_vl",
+    "spatial_patch_size": 14
+  },
+  "vision_end_token_id": 151653,
+  "vision_start_token_id": 151652,
+  "vision_token_id": 151654,
+  "vocab_size": 152064
 }

generation_config.json CHANGED Viewed

@@ -1,13 +1,14 @@
 {
-  "bos_token_id": 128000,
   "do_sample": true,
   "eos_token_id": [
-    128001,
-    128008,
-    128009
   ],
-  "pad_token_id": 128004,
-  "temperature": 0.6,
-  "top_p": 0.9,
   "transformers_version": "4.46.3"
 }

 {
+  "bos_token_id": 151643,
   "do_sample": true,
   "eos_token_id": [
+    151645,
+    151643
   ],
+  "max_length": 32768,
+  "pad_token_id": 151654,
+  "temperature": 0.01,
+  "top_k": 1,
+  "top_p": 0.001,
   "transformers_version": "4.46.3"
 }

merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

model.safetensors.index.json CHANGED Viewed

The diff for this file is too large to render. See raw diff

preprocessor_config.json CHANGED Viewed

@@ -1,7 +1,6 @@
 {
   "do_convert_rgb": true,
   "do_normalize": true,
-  "do_pad": true,
   "do_rescale": true,
   "do_resize": true,
   "image_mean": [
@@ -9,18 +8,22 @@
     0.4578275,
     0.40821073
   ],
-  "image_processor_type": "MllamaImageProcessor",
   "image_std": [
     0.26862954,
     0.26130258,
     0.27577711
   ],
-  "max_image_tiles": 4,
-  "processor_class": "MllamaProcessor",
-  "resample": 2,
   "rescale_factor": 0.00392156862745098,
   "size": {
-    "height": 560,
-    "width": 560
-  }
 }

 {
   "do_convert_rgb": true,
   "do_normalize": true,
   "do_rescale": true,
   "do_resize": true,
   "image_mean": [
     0.4578275,
     0.40821073
   ],
+  "image_processor_type": "Qwen2VLImageProcessor",
   "image_std": [
     0.26862954,
     0.26130258,
     0.27577711
   ],
+  "max_pixels": 12845056,
+  "merge_size": 2,
+  "min_pixels": 3136,
+  "patch_size": 14,
+  "processor_class": "Qwen2VLProcessor",
+  "resample": 3,
   "rescale_factor": 0.00392156862745098,
   "size": {
+    "max_pixels": 12845056,
+    "min_pixels": 3136
+  },
+  "temporal_patch_size": 2
 }

vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff