Upload 14 files
Browse files- config.json +25 -0
- generation_config.json +6 -0
- output-00001-of-00007.safetensors +3 -0
- output-00002-of-00007.safetensors +3 -0
- output-00003-of-00007.safetensors +3 -0
- output-00004-of-00007.safetensors +3 -0
- output-00005-of-00007.safetensors +3 -0
- output-00006-of-00007.safetensors +3 -0
- output-00007-of-00007.safetensors +3 -0
- params.json +11 -0
- test.py +26 -0
- tokenizer.json +0 -0
- tokenizer.model.v3 +0 -0
- tokenizer_config.json +0 -0
config.json
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"architectures": [
|
3 |
+
"MistralForCausalLM"
|
4 |
+
],
|
5 |
+
"attention_dropout": 0.0,
|
6 |
+
"bos_token_id": 1,
|
7 |
+
"eos_token_id": 2,
|
8 |
+
"hidden_act": "silu",
|
9 |
+
"hidden_size": 12288,
|
10 |
+
"initializer_range": 0.02,
|
11 |
+
"intermediate_size": 28672,
|
12 |
+
"max_position_embeddings": 32768,
|
13 |
+
"model_type": "mistral",
|
14 |
+
"num_attention_heads": 96,
|
15 |
+
"num_hidden_layers": 88,
|
16 |
+
"num_key_value_heads": 8,
|
17 |
+
"rms_norm_eps": 1e-05,
|
18 |
+
"rope_theta": 1000000.0,
|
19 |
+
"sliding_window": null,
|
20 |
+
"tie_word_embeddings": false,
|
21 |
+
"torch_dtype": "bfloat16",
|
22 |
+
"transformers_version": "4.42.3",
|
23 |
+
"use_cache": true,
|
24 |
+
"vocab_size": 32768
|
25 |
+
}
|
generation_config.json
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_from_model_config": true,
|
3 |
+
"bos_token_id": 1,
|
4 |
+
"eos_token_id": 2,
|
5 |
+
"transformers_version": "4.42.3"
|
6 |
+
}
|
output-00001-of-00007.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ce856eacee738cbdbd3fe514245a54ae3788c7161a435dadbdfa0095d6df4b3f
|
3 |
+
size 8583689644
|
output-00002-of-00007.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:33cafb63f8f04b9a173706ed184f0dd56a9db76d2260b63f646b7064c2f48e7c
|
3 |
+
size 8504228506
|
output-00003-of-00007.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:56769a07ec18c87451ebbecf70aba1eb0f6008024ae2fb5f0f6ffb9ef4b1618c
|
3 |
+
size 8588679832
|
output-00004-of-00007.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:33db0d1b6d2ae8954c5ff076b0bacdddc385bf495aa730b07c419d0472bd0246
|
3 |
+
size 8462536444
|
output-00005-of-00007.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8abef56b803d3d2bc4d885383883fa66eda1fe3ce96428f08eddf70d6e22f4aa
|
3 |
+
size 8488630568
|
output-00006-of-00007.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e4c579b5c69c29dc584c11f930b23aff610873751694b5d0c828175e459e3197
|
3 |
+
size 8571493588
|
output-00007-of-00007.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2c42159d8f76eb273881ad205b55bcaaa9ac36b3bb8033a5606c1ca73b78ba0f
|
3 |
+
size 7021565176
|
params.json
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"dim": 12288,
|
3 |
+
"n_layers": 88,
|
4 |
+
"head_dim": 128,
|
5 |
+
"hidden_dim": 28672,
|
6 |
+
"n_heads": 96,
|
7 |
+
"n_kv_heads": 8,
|
8 |
+
"norm_eps": 1e-05,
|
9 |
+
"vocab_size": 32768,
|
10 |
+
"rope_theta": 1000000.0
|
11 |
+
}
|
test.py
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
from typing import Dict
|
3 |
+
|
4 |
+
from safetensors.torch import load_file, save_file
|
5 |
+
from huggingface_hub import split_torch_state_dict_into_shards
|
6 |
+
import torch
|
7 |
+
import os
|
8 |
+
|
9 |
+
def save_state_dict(state_dict: Dict[str, torch.Tensor], save_directory: str):
|
10 |
+
state_dict_split = split_torch_state_dict_into_shards(state_dict, filename_pattern='consolidated{suffix}.safetensors')
|
11 |
+
for filename, tensors in state_dict_split.filename_to_tensors.items():
|
12 |
+
shard = {tensor: state_dict[tensor] for tensor in tensors}
|
13 |
+
print("Saving", save_directory, filename)
|
14 |
+
save_file(shard, os.path.join(save_directory, filename))
|
15 |
+
if state_dict_split.is_sharded:
|
16 |
+
index = {
|
17 |
+
"metadata": state_dict_split.metadata,
|
18 |
+
"weight_map": state_dict_split.tensor_to_filename,
|
19 |
+
}
|
20 |
+
with open(os.path.join(save_directory, "consolidated.safetensors.index.json"), "w") as f:
|
21 |
+
f.write(json.dumps(index, indent=2))
|
22 |
+
|
23 |
+
big_file = 'consolidated.safetensors'
|
24 |
+
loaded = load_file(big_file)
|
25 |
+
|
26 |
+
save_state_dict(loaded, save_directory=f'.')
|
tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer.model.v3
ADDED
Binary file (588 kB). View file
|
|
tokenizer_config.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|