bowenbaoamd commited on
Commit
e5899f6
·
verified ·
1 Parent(s): 975dbc8

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. config.json +43 -0
  2. configuration_grok1.py +62 -0
  3. generation_config.json +7 -0
  4. model-00001-of-00065.safetensors +3 -0
  5. model-00002-of-00065.safetensors +3 -0
  6. model-00003-of-00065.safetensors +3 -0
  7. model-00004-of-00065.safetensors +3 -0
  8. model-00005-of-00065.safetensors +3 -0
  9. model-00006-of-00065.safetensors +3 -0
  10. model-00007-of-00065.safetensors +3 -0
  11. model-00008-of-00065.safetensors +3 -0
  12. model-00009-of-00065.safetensors +3 -0
  13. model-00010-of-00065.safetensors +3 -0
  14. model-00011-of-00065.safetensors +3 -0
  15. model-00012-of-00065.safetensors +3 -0
  16. model-00013-of-00065.safetensors +3 -0
  17. model-00014-of-00065.safetensors +3 -0
  18. model-00015-of-00065.safetensors +3 -0
  19. model-00016-of-00065.safetensors +3 -0
  20. model-00017-of-00065.safetensors +3 -0
  21. model-00018-of-00065.safetensors +3 -0
  22. model-00019-of-00065.safetensors +3 -0
  23. model-00020-of-00065.safetensors +3 -0
  24. model-00021-of-00065.safetensors +3 -0
  25. model-00022-of-00065.safetensors +3 -0
  26. model-00023-of-00065.safetensors +3 -0
  27. model-00024-of-00065.safetensors +3 -0
  28. model-00025-of-00065.safetensors +3 -0
  29. model-00026-of-00065.safetensors +3 -0
  30. model-00027-of-00065.safetensors +3 -0
  31. model-00028-of-00065.safetensors +3 -0
  32. model-00029-of-00065.safetensors +3 -0
  33. model-00030-of-00065.safetensors +3 -0
  34. model-00031-of-00065.safetensors +3 -0
  35. model-00032-of-00065.safetensors +3 -0
  36. model-00033-of-00065.safetensors +3 -0
  37. model-00034-of-00065.safetensors +3 -0
  38. model-00035-of-00065.safetensors +3 -0
  39. model-00036-of-00065.safetensors +3 -0
  40. model-00037-of-00065.safetensors +3 -0
  41. model-00038-of-00065.safetensors +3 -0
  42. model-00039-of-00065.safetensors +3 -0
  43. model-00040-of-00065.safetensors +3 -0
  44. model-00041-of-00065.safetensors +3 -0
  45. model-00042-of-00065.safetensors +3 -0
  46. model-00043-of-00065.safetensors +3 -0
  47. model-00044-of-00065.safetensors +3 -0
  48. model-00045-of-00065.safetensors +3 -0
  49. model-00046-of-00065.safetensors +3 -0
  50. model-00047-of-00065.safetensors +3 -0
config.json ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "/data/llm/grok-1",
3
+ "architectures": [
4
+ "Grok1ModelForCausalLM"
5
+ ],
6
+ "attn_output_multiplier": 0.08838834764831845,
7
+ "auto_map": {
8
+ "AutoConfig": "configuration_grok1.Grok1Config",
9
+ "AutoModel": "modeling_grok1.Grok1Model",
10
+ "AutoModelForCausalLM": "modeling_grok1.Grok1ModelForCausalLM"
11
+ },
12
+ "bos_token_id": 1,
13
+ "embedding_multiplier_scale": 78.38367176906169,
14
+ "eos_token_id": 2,
15
+ "hidden_size": 6144,
16
+ "intermediate_size": 32768,
17
+ "max_attn_value": 30.0,
18
+ "max_position_embeddings": 8192,
19
+ "model_type": "grok-1",
20
+ "num_attention_heads": 48,
21
+ "num_experts": 8,
22
+ "num_experts_per_tok": 2,
23
+ "num_hidden_layers": 64,
24
+ "num_key_value_heads": 8,
25
+ "output_multiplier_scale": 0.5773502691896257,
26
+ "output_router_logits": false,
27
+ "pad_token_id": 0,
28
+ "quantization_config": {
29
+ "activation_scheme": "static",
30
+ "ignored_layers": [
31
+ "lm_head",
32
+ "*.gate"
33
+ ],
34
+ "kv_cache_scheme": "static",
35
+ "quant_method": "fp8"
36
+ },
37
+ "rms_norm_eps": 1e-05,
38
+ "router_aux_loss_coef": 0.001,
39
+ "torch_dtype": "bfloat16",
40
+ "transformers_version": "4.45.0.dev0",
41
+ "use_cache": true,
42
+ "vocab_size": 131072
43
+ }
configuration_grok1.py ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers.configuration_utils import PretrainedConfig
2
+
3
+
4
+ class Grok1Config(PretrainedConfig):
5
+ model_type = "grok-1"
6
+ keys_to_ignore_at_inference = ["past_key_values"]
7
+
8
+ def __init__(
9
+ self,
10
+ vocab_size=32000,
11
+ hidden_size=4096,
12
+ intermediate_size=32768,
13
+ num_hidden_layers=32,
14
+ num_attention_heads=32,
15
+ num_key_value_heads=32,
16
+ attn_output_multiplier=1.0,
17
+ max_attn_value=1.0,
18
+ max_position_embeddings=4096,
19
+ embedding_multiplier_scale: float = 1.0,
20
+ output_multiplier_scale: float = 1.0,
21
+ rms_norm_eps=1e-5,
22
+ use_cache=True,
23
+ pad_token_id=None,
24
+ bos_token_id=1,
25
+ eos_token_id=2,
26
+ tie_word_embeddings=True,
27
+ num_experts_per_tok=2,
28
+ num_experts=8,
29
+ output_router_logits=False,
30
+ router_aux_loss_coef=0.001,
31
+ **kwargs
32
+ ):
33
+ self.vocab_size = vocab_size
34
+ self.attn_output_multiplier = attn_output_multiplier
35
+ self.max_attn_value = max_attn_value
36
+ self.max_position_embeddings = max_position_embeddings
37
+ self.embedding_multiplier_scale = embedding_multiplier_scale
38
+ self.output_multiplier_scale = output_multiplier_scale
39
+ self.hidden_size = hidden_size
40
+ self.intermediate_size = intermediate_size
41
+ self.num_hidden_layers = num_hidden_layers
42
+ self.num_attention_heads = num_attention_heads
43
+
44
+ # for backward compatibility
45
+ if num_key_value_heads is None:
46
+ num_key_value_heads = num_attention_heads
47
+
48
+ self.num_key_value_heads = num_key_value_heads
49
+ self.rms_norm_eps = rms_norm_eps
50
+ self.use_cache = use_cache
51
+
52
+ self.num_experts_per_tok = num_experts_per_tok
53
+ self.num_experts = num_experts
54
+ self.output_router_logits = output_router_logits
55
+ self.router_aux_loss_coef = router_aux_loss_coef
56
+ super().__init__(
57
+ pad_token_id=pad_token_id,
58
+ bos_token_id=bos_token_id,
59
+ eos_token_id=eos_token_id,
60
+ tie_word_embeddings=tie_word_embeddings,
61
+ **kwargs,
62
+ )
generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 1,
4
+ "eos_token_id": 2,
5
+ "pad_token_id": 0,
6
+ "transformers_version": "4.45.0.dev0"
7
+ }
model-00001-of-00065.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eda24ec446dbe40759227ae09067cd019ee612eade23899b1ef470aaf18b0950
3
+ size 4920024188
model-00002-of-00065.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb9a3a7b57fc186bf32db9bd365bd2d5dc1af879cdf7715241a48a2d2531c005
3
+ size 4920076484
model-00003-of-00065.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e0225396d8f112473b96ecc90f94babdd845f471d2fcef5e46a9853ebe9003f3
3
+ size 4920076484
model-00004-of-00065.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:969ef7ee0efdcf041854a6a868fc5f836cf09d38268a04125e24df2c5aa15f76
3
+ size 4920076484
model-00005-of-00065.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:26cc7a73f3b66d7e0c3b63ea277b1cc5830d3a6281ea65c71e21b23f569e669f
3
+ size 4920076484
model-00006-of-00065.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:59095dd43f042b68097f289434a6faababa23aaa637ad88428fab8b5fcde8e9b
3
+ size 4920076484
model-00007-of-00065.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:703ee997d6ccaa7511d63fb7bfb49405617db3dd67081ea9ec44e50e051b8a32
3
+ size 4920076484
model-00008-of-00065.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:add2bffa2ef05500713969763684cd215d5cbfd020dc81aa9e29bd63f8aa96b6
3
+ size 4920076484
model-00009-of-00065.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2a86e9da6434f9e333fa2ff43e6a1a3830145a6d67e65e30baf4be7116c6111b
3
+ size 4920076484
model-00010-of-00065.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3dc5df4e54a6c832c5e05a08c3244f3bd750d8917d18137edf1e9d3fba521dad
3
+ size 4920076484
model-00011-of-00065.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f5fb367d7567e8b1b974e6bd8a78bdddfc5c8a0ea1ca285c7dc6e61336db30c2
3
+ size 4920076532
model-00012-of-00065.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:39afef49c6b99ee82476a5ade327adfd4c2fc2dbe25e98094436c8adcd1c805c
3
+ size 4920076572
model-00013-of-00065.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8dd82d64b0a94c8f0992032a3497fab10ed9981c8760cb188ffa6d613b8a2f24
3
+ size 4920076572
model-00014-of-00065.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:96c438278bc8252695d3ce1a27a87d9926c2752418828b8206c5619db56e5abe
3
+ size 4920076572
model-00015-of-00065.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f55e448b2e13dd2d43fd92a1fc28a0f1e210c315b219443c3303da975c8b94a4
3
+ size 4920076572
model-00016-of-00065.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6725e4004736d15a5c7cc2c40ffdb1a2d69a0ea4650411068564f4780de75eac
3
+ size 4920076572
model-00017-of-00065.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:07a13a28c495cb58cfd16300c40e3d1c02ca858b5150e32d8c73873b7a5d1bb2
3
+ size 4920076572
model-00018-of-00065.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf9210824c61e7930e89f9fa760a26652658936dd5676159a0e35baeba10b688
3
+ size 4920076572
model-00019-of-00065.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a013e73a315fdedc0a47ada83ad784e516c94beb3ae093876e6cd6f98fe7698c
3
+ size 4920076572
model-00020-of-00065.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:662dccaad758ebdd37d07ec3fc7b78d33de2c6260eb6e3c09a9a24ede908d15f
3
+ size 4920076572
model-00021-of-00065.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d1a58427a05f2922b5c7a3b7bfe065bb53ee9d8e3543a71c1d366f2482a45297
3
+ size 4920076572
model-00022-of-00065.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce3e09867b361f0cd1fdd929778bda3fbef4681f5c34eb162eb5676933040935
3
+ size 4920076572
model-00023-of-00065.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c50d684bd86338a2771affc056edbd5e413f8dd09499c375b0fda78b4976ff70
3
+ size 4920076572
model-00024-of-00065.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fbad6ecb99f5f4757205c3d7e62a844fd2ea2bd4fc6feddabecdc2cb5bf3492c
3
+ size 4920076572
model-00025-of-00065.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b324660b2316c80eabad87f4c9b96be6c0aca7e8e01be7714c5e317d74eb45d
3
+ size 4920076572
model-00026-of-00065.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:04a7ff9cf7a31c1d214ce107e213f553f32c5f0e17eefd676771bc7127dfe18f
3
+ size 4920076572
model-00027-of-00065.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ad438a3ea8b6dc705a23276d1b4881e6ba24d7eb49fb1472d2cbe3bda23a81d
3
+ size 4920076572
model-00028-of-00065.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ead4bb474ee298467513c58fd6287531384a03dd4ec003d75c226c0a97bdb5f
3
+ size 4920076572
model-00029-of-00065.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a9c156ce342140ee1ac4152c8fde3ee47cfe4b2e1b6f7a99396ed2095fad6a8d
3
+ size 4920076572
model-00030-of-00065.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a63062598b8751f57809f0003cee7b07a011a0af44380174b5b7bbfde16f1463
3
+ size 4920076572
model-00031-of-00065.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df295ec76d6c28b1929778a048f76f20f55d70951c63d2cb075d832133d399ab
3
+ size 4920076572
model-00032-of-00065.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a3259a66291a22d443df0c2f3e65670524fac9949ed63999b35f144fb5f826be
3
+ size 4920076572
model-00033-of-00065.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d4fedccae57354211f1bb51b05cc96b0f00de440ddfe849796774cfc29cb9259
3
+ size 4920076572
model-00034-of-00065.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e03eada9e998e3b6949def3de1e245636894e4fb7953ef38af1ae611b79fe6c9
3
+ size 4920076572
model-00035-of-00065.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:834c4091064d4cb02db4f715a04aba32336e60f35e57bde3e48c009a8e5bef80
3
+ size 4920076572
model-00036-of-00065.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d9411192f798395a1a5c1a8d30dbb20fcb159abfbebe4edf8709d78239f32bd8
3
+ size 4920076572
model-00037-of-00065.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7a85f5e750b04fa9d18f8eea14f4257317818866a37947c6acd90563eac67af8
3
+ size 4920076572
model-00038-of-00065.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:54052bbd8e690f2d76bf063dff2217e4373dd2133699974117f33fa40f89fea4
3
+ size 4920076572
model-00039-of-00065.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f5deddffb1322af90a2757e52a574fa8829c9de1e3ae595a31edff8a30804429
3
+ size 4920076572
model-00040-of-00065.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f6838bb2ef6bd810172602bd9c3fb46986e9c71b23436a48aca7a5db4acd7ad6
3
+ size 4920076572
model-00041-of-00065.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:792cbd13676d17644f2e69c04dfd0f941ce0c1458c4c01b4852f9fd55c5acc09
3
+ size 4920076572
model-00042-of-00065.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4f4de30aafb6010274c26ccf47504f07dc9806e65c862eec50d21811e865a781
3
+ size 4920076572
model-00043-of-00065.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:779bf3942cf2385a8ca4f578499fd39777ed72134c6b363e93e410ec4c1cf5d1
3
+ size 4920076572
model-00044-of-00065.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cfadf6e880742cb20ba3bcccc4dd7a5cf5f9ae7b0f9ec4164b99b0930d66e39f
3
+ size 4920076572
model-00045-of-00065.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:75f4a7b282d25da5d9f93b9814af83985b28d20a515ec821e13aa85bf8a7c97f
3
+ size 4920076572
model-00046-of-00065.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f1f35d607ffe8557d873c61fb807be4850348f1b6cd9fd1505e4d33daa0056a1
3
+ size 4920076572
model-00047-of-00065.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d0b1ee692b4b1e9c73b5f92b7842e270572db0a35b509b84afda2a832e5f52c
3
+ size 4920076572