Upload 14 files

Files changed (14) hide show

config.json ADDED Viewed

+{
+  "architectures": [
+    "MistralForCausalLM"
+  ],
+  "attention_dropout": 0.0,
+  "bos_token_id": 1,
+  "eos_token_id": 2,
+  "hidden_act": "silu",
+  "hidden_size": 12288,
+  "initializer_range": 0.02,
+  "intermediate_size": 28672,
+  "max_position_embeddings": 32768,
+  "model_type": "mistral",
+  "num_attention_heads": 96,
+  "num_hidden_layers": 88,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-05,
+  "rope_theta": 1000000.0,
+  "sliding_window": null,
+  "tie_word_embeddings": false,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.42.3",
+  "use_cache": true,
+  "vocab_size": 32768
+}

generation_config.json ADDED Viewed

+{
+  "_from_model_config": true,
+  "bos_token_id": 1,
+  "eos_token_id": 2,
+  "transformers_version": "4.42.3"
+}

output-00001-of-00007.safetensors ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:ce856eacee738cbdbd3fe514245a54ae3788c7161a435dadbdfa0095d6df4b3f
+size 8583689644

output-00002-of-00007.safetensors ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:33cafb63f8f04b9a173706ed184f0dd56a9db76d2260b63f646b7064c2f48e7c
+size 8504228506

output-00003-of-00007.safetensors ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:56769a07ec18c87451ebbecf70aba1eb0f6008024ae2fb5f0f6ffb9ef4b1618c
+size 8588679832

output-00004-of-00007.safetensors ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:33db0d1b6d2ae8954c5ff076b0bacdddc385bf495aa730b07c419d0472bd0246
+size 8462536444

output-00005-of-00007.safetensors ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:8abef56b803d3d2bc4d885383883fa66eda1fe3ce96428f08eddf70d6e22f4aa
+size 8488630568

output-00006-of-00007.safetensors ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:e4c579b5c69c29dc584c11f930b23aff610873751694b5d0c828175e459e3197
+size 8571493588

output-00007-of-00007.safetensors ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:2c42159d8f76eb273881ad205b55bcaaa9ac36b3bb8033a5606c1ca73b78ba0f
+size 7021565176

params.json ADDED Viewed

+{
+    "dim": 12288,
+    "n_layers": 88,
+    "head_dim": 128,
+    "hidden_dim": 28672,
+    "n_heads": 96,
+    "n_kv_heads": 8,
+    "norm_eps": 1e-05,
+    "vocab_size": 32768,
+    "rope_theta": 1000000.0
+}

test.py ADDED Viewed

+import json
+from typing import Dict
+from safetensors.torch import load_file, save_file
+from huggingface_hub import split_torch_state_dict_into_shards
+import torch
+import os
+def save_state_dict(state_dict: Dict[str, torch.Tensor], save_directory: str):
+    state_dict_split = split_torch_state_dict_into_shards(state_dict, filename_pattern='consolidated{suffix}.safetensors')
+    for filename, tensors in state_dict_split.filename_to_tensors.items():
+        shard = {tensor: state_dict[tensor] for tensor in tensors}
+        print("Saving", save_directory, filename)
+        save_file(shard, os.path.join(save_directory, filename))
+    if state_dict_split.is_sharded:
+        index = {
+            "metadata": state_dict_split.metadata,
+            "weight_map": state_dict_split.tensor_to_filename,
+        }
+        with open(os.path.join(save_directory, "consolidated.safetensors.index.json"), "w") as f:
+            f.write(json.dumps(index, indent=2))
+big_file = 'consolidated.safetensors'
+loaded = load_file(big_file)
+save_state_dict(loaded, save_directory=f'.')

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer.model.v3 ADDED Viewed

Binary file (588 kB). View file

tokenizer_config.json ADDED Viewed

The diff for this file is too large to render. See raw diff