danielhanchen commited on
Commit
6142200
·
verified ·
1 Parent(s): 0784d26

Upload LlamaForCausalLM

Browse files
config.json CHANGED
@@ -1,17 +1,21 @@
1
  {
2
- "_name_or_path": "mattshumer/Reflection-Llama-3.1-70B",
3
  "architectures": [
4
  "LlamaForCausalLM"
5
  ],
6
  "attention_bias": false,
7
  "attention_dropout": 0.0,
8
  "bos_token_id": 128000,
9
- "eos_token_id": 128009,
 
 
 
 
10
  "hidden_act": "silu",
11
  "hidden_size": 8192,
12
  "initializer_range": 0.02,
13
  "intermediate_size": 28672,
14
- "max_position_embeddings": 8192,
15
  "mlp_bias": false,
16
  "model_type": "llama",
17
  "num_attention_heads": 64,
@@ -34,7 +38,13 @@
34
  "quant_method": "bitsandbytes"
35
  },
36
  "rms_norm_eps": 1e-05,
37
- "rope_scaling": null,
 
 
 
 
 
 
38
  "rope_theta": 500000.0,
39
  "tie_word_embeddings": false,
40
  "torch_dtype": "bfloat16",
 
1
  {
2
+ "_name_or_path": "mattshumer/ref_70_e3",
3
  "architectures": [
4
  "LlamaForCausalLM"
5
  ],
6
  "attention_bias": false,
7
  "attention_dropout": 0.0,
8
  "bos_token_id": 128000,
9
+ "eos_token_id": [
10
+ 128001,
11
+ 128008,
12
+ 128009
13
+ ],
14
  "hidden_act": "silu",
15
  "hidden_size": 8192,
16
  "initializer_range": 0.02,
17
  "intermediate_size": 28672,
18
+ "max_position_embeddings": 131072,
19
  "mlp_bias": false,
20
  "model_type": "llama",
21
  "num_attention_heads": 64,
 
38
  "quant_method": "bitsandbytes"
39
  },
40
  "rms_norm_eps": 1e-05,
41
+ "rope_scaling": {
42
+ "factor": 8.0,
43
+ "high_freq_factor": 4.0,
44
+ "low_freq_factor": 1.0,
45
+ "original_max_position_embeddings": 8192,
46
+ "rope_type": "llama3"
47
+ },
48
  "rope_theta": 500000.0,
49
  "tie_word_embeddings": false,
50
  "torch_dtype": "bfloat16",
generation_config.json CHANGED
@@ -3,9 +3,9 @@
3
  "do_sample": true,
4
  "eos_token_id": [
5
  128001,
 
6
  128009
7
  ],
8
- "max_length": 4096,
9
  "temperature": 0.6,
10
  "top_p": 0.9,
11
  "transformers_version": "4.44.2"
 
3
  "do_sample": true,
4
  "eos_token_id": [
5
  128001,
6
+ 128008,
7
  128009
8
  ],
 
9
  "temperature": 0.6,
10
  "top_p": 0.9,
11
  "transformers_version": "4.44.2"
model-00001-of-00008.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5f25b44e5e89a38d84a1df21a4cc48250c405b1bb06ca263914f03d5d12d718c
3
- size 4949180592
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9306fc59ddfd0906cc4a554a6f87dd61252f88839e592e3e2db403feec1876a2
3
+ size 4949180596
model-00002-of-00008.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:38ee9630f9113b81603505a416b3bde0cb0faa45f0271a17135c75a8c11d24cd
3
- size 4977054937
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:891abb23a0059c9f13becd5037ae96a9e25d895a8b2d425553320f474378e8ab
3
+ size 4977054932
model-00003-of-00008.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b1f201406c98cb5ddb6556c7590972a2eafa1dd0a5a9b12388f2415aa83316ec
3
- size 4977088089
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f7e9969ead51d756d3e9bcddf36aa96b21307ed104ad38be9de32467f7d3c43
3
+ size 4977088085
model-00004-of-00008.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:571409702b8e5805a3e29eba02a81e6c48c927a5661d280574f9a8b23a3e5e60
3
- size 4933786851
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b5f8bd882253c2add028f060f4ad80a1ce5fd4c5488b76da92b73946d0aef84
3
+ size 4933786843
model-00005-of-00008.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e24219198f4136b092ed88f637f91c5777ece2a488833bb9cf3727b494e016a4
3
- size 4977055135
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f52398ba994433fe561a2801d1867223758b38d04a4209e65c86a015ff1a39ab
3
+ size 4977055140
model-00006-of-00008.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ecaa5c823f92772e338093a6fc8356a3ee7c05d3fcfe5770d6b01879ad4d7a23
3
- size 4977055104
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b4eaae7f54f4f3b9223cd73d08d89875575651ce099b8d539021a3e897073d3
3
+ size 4977055097
model-00007-of-00008.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:aeaabf66442e56253508eefce1a29eaee70cee84fb67616a3ac8dc78b6163da0
3
- size 4977088092
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:92340d511e752837b70b3839d1389896dee68f5772d8fa459698987c90c130d8
3
+ size 4977088087
model-00008-of-00008.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a9a19adb72fba3822d66ac50c358914299fc86ca07cd9e9c983f87e3216df84e
3
- size 4750125895
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3dca28e2728bfaf1c224bfbe5664bbce1c03fb9a4b3c939e4012d7011ae23909
3
+ size 4750125896
model.safetensors.index.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "metadata": {
3
- "total_size": 39518012903
4
  },
5
  "weight_map": {
6
  "lm_head.weight": "model-00008-of-00008.safetensors",
 
1
  {
2
  "metadata": {
3
+ "total_size": 39518012884
4
  },
5
  "weight_map": {
6
  "lm_head.weight": "model-00008-of-00008.safetensors",