joelb commited on
Commit
7f78f28
·
verified ·
1 Parent(s): 33e3969

Upload model

Browse files
config.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "_name_or_path": "mistralai/Mixtral-8x7B-v0.1",
3
  "architectures": [
4
- "MyModel"
5
  ],
6
  "attention_dropout": 0.0,
7
  "bos_token_id": 1,
@@ -21,11 +21,10 @@
21
  "rms_norm_eps": 1e-05,
22
  "rope_theta": 1000000.0,
23
  "router_aux_loss_coef": 0.02,
24
- "router_jitter_noise": 0.0,
25
  "sliding_window": null,
26
  "tie_word_embeddings": false,
27
  "torch_dtype": "float32",
28
- "transformers_version": "4.40.2",
29
  "use_cache": true,
30
  "vocab_size": 32000
31
  }
 
1
  {
2
  "_name_or_path": "mistralai/Mixtral-8x7B-v0.1",
3
  "architectures": [
4
+ "MyModelForCausalLM"
5
  ],
6
  "attention_dropout": 0.0,
7
  "bos_token_id": 1,
 
21
  "rms_norm_eps": 1e-05,
22
  "rope_theta": 1000000.0,
23
  "router_aux_loss_coef": 0.02,
 
24
  "sliding_window": null,
25
  "tie_word_embeddings": false,
26
  "torch_dtype": "float32",
27
+ "transformers_version": "4.39.3",
28
  "use_cache": true,
29
  "vocab_size": 32000
30
  }
model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ad28dc81f970598a6152b029a9834c4ddb73709f7a80766b75048104500a6aa7
3
- size 4920052576
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0befbb0f84e298c5eed4fc1ffebb19ce6855dd57431d2b4e6a0e917809b87656
3
+ size 4920052720
model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:098d3a70edc491f5a049e2ff8b053013d72dfae9eb74e0eead26623004964b9f
3
- size 1409336368
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b9f788219adf59dc4aab4f4b54f331f4a6481ffb17b96ae6c002e65b8469614
3
+ size 1409336424
model.safetensors.index.json CHANGED
@@ -3,38 +3,38 @@
3
  "total_size": 6329384960
4
  },
5
  "weight_map": {
6
- "embed_tokens.weight": "model-00001-of-00002.safetensors",
7
- "layers.0.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00002.safetensors",
8
- "layers.0.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00002.safetensors",
9
- "layers.0.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00002.safetensors",
10
- "layers.0.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00002.safetensors",
11
- "layers.0.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00002.safetensors",
12
- "layers.0.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00002.safetensors",
13
- "layers.0.block_sparse_moe.experts.2.w1.weight": "model-00001-of-00002.safetensors",
14
- "layers.0.block_sparse_moe.experts.2.w2.weight": "model-00001-of-00002.safetensors",
15
- "layers.0.block_sparse_moe.experts.2.w3.weight": "model-00001-of-00002.safetensors",
16
- "layers.0.block_sparse_moe.experts.3.w1.weight": "model-00001-of-00002.safetensors",
17
- "layers.0.block_sparse_moe.experts.3.w2.weight": "model-00001-of-00002.safetensors",
18
- "layers.0.block_sparse_moe.experts.3.w3.weight": "model-00001-of-00002.safetensors",
19
- "layers.0.block_sparse_moe.experts.4.w1.weight": "model-00001-of-00002.safetensors",
20
- "layers.0.block_sparse_moe.experts.4.w2.weight": "model-00001-of-00002.safetensors",
21
- "layers.0.block_sparse_moe.experts.4.w3.weight": "model-00001-of-00002.safetensors",
22
- "layers.0.block_sparse_moe.experts.5.w1.weight": "model-00001-of-00002.safetensors",
23
- "layers.0.block_sparse_moe.experts.5.w2.weight": "model-00001-of-00002.safetensors",
24
- "layers.0.block_sparse_moe.experts.5.w3.weight": "model-00001-of-00002.safetensors",
25
- "layers.0.block_sparse_moe.experts.6.w1.weight": "model-00002-of-00002.safetensors",
26
- "layers.0.block_sparse_moe.experts.6.w2.weight": "model-00002-of-00002.safetensors",
27
- "layers.0.block_sparse_moe.experts.6.w3.weight": "model-00002-of-00002.safetensors",
28
- "layers.0.block_sparse_moe.experts.7.w1.weight": "model-00002-of-00002.safetensors",
29
- "layers.0.block_sparse_moe.experts.7.w2.weight": "model-00002-of-00002.safetensors",
30
- "layers.0.block_sparse_moe.experts.7.w3.weight": "model-00002-of-00002.safetensors",
31
- "layers.0.block_sparse_moe.gate.weight": "model-00001-of-00002.safetensors",
32
- "layers.0.input_layernorm.weight": "model-00002-of-00002.safetensors",
33
- "layers.0.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
34
- "layers.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
35
- "layers.0.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
36
- "layers.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
37
- "layers.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
38
- "norm.weight": "model-00002-of-00002.safetensors"
39
  }
40
  }
 
3
  "total_size": 6329384960
4
  },
5
  "weight_map": {
6
+ "model.embed_tokens.weight": "model-00001-of-00002.safetensors",
7
+ "model.layers.0.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00002.safetensors",
8
+ "model.layers.0.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00002.safetensors",
9
+ "model.layers.0.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00002.safetensors",
10
+ "model.layers.0.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00002.safetensors",
11
+ "model.layers.0.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00002.safetensors",
12
+ "model.layers.0.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00002.safetensors",
13
+ "model.layers.0.block_sparse_moe.experts.2.w1.weight": "model-00001-of-00002.safetensors",
14
+ "model.layers.0.block_sparse_moe.experts.2.w2.weight": "model-00001-of-00002.safetensors",
15
+ "model.layers.0.block_sparse_moe.experts.2.w3.weight": "model-00001-of-00002.safetensors",
16
+ "model.layers.0.block_sparse_moe.experts.3.w1.weight": "model-00001-of-00002.safetensors",
17
+ "model.layers.0.block_sparse_moe.experts.3.w2.weight": "model-00001-of-00002.safetensors",
18
+ "model.layers.0.block_sparse_moe.experts.3.w3.weight": "model-00001-of-00002.safetensors",
19
+ "model.layers.0.block_sparse_moe.experts.4.w1.weight": "model-00001-of-00002.safetensors",
20
+ "model.layers.0.block_sparse_moe.experts.4.w2.weight": "model-00001-of-00002.safetensors",
21
+ "model.layers.0.block_sparse_moe.experts.4.w3.weight": "model-00001-of-00002.safetensors",
22
+ "model.layers.0.block_sparse_moe.experts.5.w1.weight": "model-00001-of-00002.safetensors",
23
+ "model.layers.0.block_sparse_moe.experts.5.w2.weight": "model-00001-of-00002.safetensors",
24
+ "model.layers.0.block_sparse_moe.experts.5.w3.weight": "model-00001-of-00002.safetensors",
25
+ "model.layers.0.block_sparse_moe.experts.6.w1.weight": "model-00002-of-00002.safetensors",
26
+ "model.layers.0.block_sparse_moe.experts.6.w2.weight": "model-00002-of-00002.safetensors",
27
+ "model.layers.0.block_sparse_moe.experts.6.w3.weight": "model-00002-of-00002.safetensors",
28
+ "model.layers.0.block_sparse_moe.experts.7.w1.weight": "model-00002-of-00002.safetensors",
29
+ "model.layers.0.block_sparse_moe.experts.7.w2.weight": "model-00002-of-00002.safetensors",
30
+ "model.layers.0.block_sparse_moe.experts.7.w3.weight": "model-00002-of-00002.safetensors",
31
+ "model.layers.0.block_sparse_moe.gate.weight": "model-00001-of-00002.safetensors",
32
+ "model.layers.0.input_layernorm.weight": "model-00002-of-00002.safetensors",
33
+ "model.layers.0.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
34
+ "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
35
+ "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
36
+ "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
37
+ "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
38
+ "model.norm.weight": "model-00002-of-00002.safetensors"
39
  }
40
  }