ivanfioravanti commited on 25 days ago

Commit

82b2de9

•

1 Parent(s): 2d23e3f

Upload folder using huggingface_hub (#1)

- 9e25ccad259193076f9edee558a5a9e3c6b12b7c8c286b301999040f2967d297 (ecddcd87586fa792442edb8905d4565d57cee75b)
- 047d05130ef9c5e903d1265d159a500e87ff45bd3438fa28553d0500308382c6 (edf12b52832a39dd94af854acb54b7b4a2e671f1)
- 28e829034ffd91ee045fc96503d3069424b0aeb0fb421956be4af748eb71cef6 (7479960efbc731a4334feda361cf36328b0449c7)
- c561065bc101eee3a9ac067b4e4f7b89edbc375d785f3e75379c5540c336148e (52c30bb14a604cdc4cfb627e0e5e4dc889fe8aa6)
- 96ce2550713792f5663ca48c4bcd28e491a35b6d047454fa05cd84694659d181 (8f943fa320636f77a46187322cd9b6c4e03a0c57)
- 010b382c793a0301de34bba1548adb19239f1f6fcfb58b05bfa9921ee52a263f (8619403f3b984accd93951de90d6f2b9187dff41)
- b51b3b5ceaf9ddf674b1bf32635367d8499009589b22b72209d13fea56343be6 (e067c76df410998d4b2447f8d7d2a4546ffb6dd6)
- e49f81a019d15887a70d187c239050bf8dd490148d8e556ade09b1d5a74e085b (c9e67db89cb0805c6aaaf2377379e4f726bcf6d2)
- ecb00aec31b87db721869d4bedffc19cd9616507132ece8dc62804a53e56db0a (9f9e7ffc99155dcf7af7940735fc99e45aa17287)

Files changed (19) hide show

.gitattributes +1 -0
README.md +143 -0
added_tokens.json +5 -0
config.json +35 -0
merges.txt +0 -0
model-00001-of-00009.safetensors +3 -0
model-00002-of-00009.safetensors +3 -0
model-00003-of-00009.safetensors +3 -0
model-00004-of-00009.safetensors +3 -0
model-00005-of-00009.safetensors +3 -0
model-00006-of-00009.safetensors +3 -0
model-00007-of-00009.safetensors +3 -0
model-00008-of-00009.safetensors +3 -0
model-00009-of-00009.safetensors +3 -0
model.safetensors.index.json +0 -0
special_tokens_map.json +20 -0
tokenizer.json +3 -0
tokenizer_config.json +43 -0
vocab.json +0 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+tokenizer.json filter=lfs diff=lfs merge=lfs -text

README.md ADDED Viewed

	@@ -0,0 +1,143 @@

+---
+language:
+- en
+license: other
+library_name: transformers
+tags:
+- chat
+- qwen
+- qwen2.5
+- finetune
+- english
+- mlx
+base_model: MaziyarPanahi/calme-3.2-instruct-78b
+license_name: qwen
+license_link: https://huggingface.co/Qwen/Qwen2.5-72B-Instruct/blob/main/LICENSE
+pipeline_tag: text-generation
+inference: false
+model_creator: MaziyarPanahi
+quantized_by: MaziyarPanahi
+model-index:
+- name: calme-3.2-instruct-78b
+  results:
+  - task:
+      type: text-generation
+      name: Text Generation
+    dataset:
+      name: IFEval (0-Shot)
+      type: HuggingFaceH4/ifeval
+      args:
+        num_few_shot: 0
+    metrics:
+    - type: inst_level_strict_acc and prompt_level_strict_acc
+      value: 80.63
+      name: strict accuracy
+    source:
+      url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=MaziyarPanahi/calme-3.2-instruct-78b
+      name: Open LLM Leaderboard
+  - task:
+      type: text-generation
+      name: Text Generation
+    dataset:
+      name: BBH (3-Shot)
+      type: BBH
+      args:
+        num_few_shot: 3
+    metrics:
+    - type: acc_norm
+      value: 62.61
+      name: normalized accuracy
+    source:
+      url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=MaziyarPanahi/calme-3.2-instruct-78b
+      name: Open LLM Leaderboard
+  - task:
+      type: text-generation
+      name: Text Generation
+    dataset:
+      name: MATH Lvl 5 (4-Shot)
+      type: hendrycks/competition_math
+      args:
+        num_few_shot: 4
+    metrics:
+    - type: exact_match
+      value: 39.95
+      name: exact match
+    source:
+      url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=MaziyarPanahi/calme-3.2-instruct-78b
+      name: Open LLM Leaderboard
+  - task:
+      type: text-generation
+      name: Text Generation
+    dataset:
+      name: GPQA (0-shot)
+      type: Idavidrein/gpqa
+      args:
+        num_few_shot: 0
+    metrics:
+    - type: acc_norm
+      value: 20.36
+      name: acc_norm
+    source:
+      url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=MaziyarPanahi/calme-3.2-instruct-78b
+      name: Open LLM Leaderboard
+  - task:
+      type: text-generation
+      name: Text Generation
+    dataset:
+      name: MuSR (0-shot)
+      type: TAUR-Lab/MuSR
+      args:
+        num_few_shot: 0
+    metrics:
+    - type: acc_norm
+      value: 38.53
+      name: acc_norm
+    source:
+      url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=MaziyarPanahi/calme-3.2-instruct-78b
+      name: Open LLM Leaderboard
+  - task:
+      type: text-generation
+      name: Text Generation
+    dataset:
+      name: MMLU-PRO (5-shot)
+      type: TIGER-Lab/MMLU-Pro
+      config: main
+      split: test
+      args:
+        num_few_shot: 5
+    metrics:
+    - type: acc
+      value: 70.03
+      name: accuracy
+    source:
+      url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=MaziyarPanahi/calme-3.2-instruct-78b
+      name: Open LLM Leaderboard
+---
+# mlx-community/calme-3.2-instruct-78b-4bit
+The Model [mlx-community/calme-3.2-instruct-78b-4bit](https://huggingface.co/mlx-community/calme-3.2-instruct-78b-4bit) was
+converted to MLX format from [MaziyarPanahi/calme-3.2-instruct-78b](https://huggingface.co/MaziyarPanahi/calme-3.2-instruct-78b)
+using mlx-lm version **0.20.4**.
+## Use with mlx
+```bash
+pip install mlx-lm
+```
+```python
+from mlx_lm import load, generate
+model, tokenizer = load("mlx-community/calme-3.2-instruct-78b-4bit")
+prompt="hello"
+if hasattr(tokenizer, "apply_chat_template") and tokenizer.chat_template is not None:
+    messages = [{"role": "user", "content": prompt}]
+    prompt = tokenizer.apply_chat_template(
+        messages, tokenize=False, add_generation_prompt=True
+    )
+response = generate(model, tokenizer, prompt=prompt, verbose=True)
+```

added_tokens.json ADDED Viewed

	@@ -0,0 +1,5 @@

+{
+  "<|endoftext|>": 151643,
+  "<|im_end|>": 151645,
+  "<|im_start|>": 151644
+}

config.json ADDED Viewed

	@@ -0,0 +1,35 @@

+{
+    "architectures": [
+        "Qwen2ForCausalLM"
+    ],
+    "attention_dropout": 0.0,
+    "eos_token_id": 151645,
+    "hidden_act": "silu",
+    "hidden_size": 8192,
+    "initializer_range": 0.02,
+    "intermediate_size": 29568,
+    "max_position_embeddings": 32768,
+    "max_window_layers": 80,
+    "model_type": "qwen2",
+    "num_attention_heads": 64,
+    "num_hidden_layers": 86,
+    "num_key_value_heads": 8,
+    "quantization": {
+        "group_size": 64,
+        "bits": 4
+    },
+    "quantization_config": {
+        "group_size": 64,
+        "bits": 4
+    },
+    "rms_norm_eps": 1e-06,
+    "rope_scaling": null,
+    "rope_theta": 1000000.0,
+    "sliding_window": null,
+    "tie_word_embeddings": false,
+    "torch_dtype": "bfloat16",
+    "transformers_version": "4.46.1",
+    "use_cache": false,
+    "use_sliding_window": false,
+    "vocab_size": 151646
+}

merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

model-00001-of-00009.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3ac22c44b4a543804cc29b1f12c069fb2c0fa7382b0c1c18ed38853fbb76c0f9
+size 5363641525

model-00002-of-00009.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c23c0281aaf795a1546b74f1c7e237a473a435f9a012dc9163b0b0aca8c8cf7e
+size 5294878254

model-00003-of-00009.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b5fef18350fdf1c3731ec3c28919449909eeccdf175d53c0eb01fc5817a63694
+size 5346171097

model-00004-of-00009.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4e42db1da98530c18167a574f7add8f9c70be84a863549a22b6c9eafd7e99e1f
+size 5294845297

model-00005-of-00009.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6c2124ed1e3d64af7cd735cfdc1ca6b00e981c37d4f5149ba7010c0220debc10
+size 5294878217

model-00006-of-00009.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:03f853b0bdfcd45643d36119649071ec9795ce91ad2b620c0e49865fd6e64292
+size 5294878204

model-00007-of-00009.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fb0fbc66626a24d6b7ee3110fe764fbfc8de6b1fe553f233cecaf9e33dd7c1f3
+size 5346171091

model-00008-of-00009.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bb8923d58954eff606fc372077b97528786f3ab0b1b72bb68c41c4264909b88b
+size 5294845297

model-00009-of-00009.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:45871f3e3f05882272e5315bf2551c8f39b960a962b582f509e18ccde69bbe93
+size 1328823071

model.safetensors.index.json ADDED Viewed

The diff for this file is too large to render. See raw diff

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,20 @@

+{
+  "additional_special_tokens": [
+    "<|im_start|>",
+    "<|im_end|>"
+  ],
+  "eos_token": {
+    "content": "<|im_end|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bcfe42da0a4497e8b2b172c1f9f4ec423a46dc12907f4349c55025f670422ba9
+size 11418266

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,43 @@

+{
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "151643": {
+      "content": "<|endoftext|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151644": {
+      "content": "<|im_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151645": {
+      "content": "<|im_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "additional_special_tokens": [
+    "<|im_start|>",
+    "<|im_end|>"
+  ],
+  "bos_token": null,
+  "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "<|im_end|>",
+  "errors": "replace",
+  "model_max_length": 131072,
+  "pad_token": "<|endoftext|>",
+  "split_special_tokens": false,
+  "tokenizer_class": "Qwen2Tokenizer",
+  "unk_token": null
+}

vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff