georgeyw commited on Mar 14, 2024

Commit

743a576

verified ·

1 Parent(s): 3e6365b

Training in progress, step 62000, checkpoint

Browse files

Files changed (30) hide show

checkpoint-61100/config.json +31 -0
checkpoint-61100/model.safetensors +3 -0
checkpoint-61100/training_args.bin +3 -0
checkpoint-61200/config.json +31 -0
checkpoint-61200/model.safetensors +3 -0
checkpoint-61200/training_args.bin +3 -0
checkpoint-61300/config.json +31 -0
checkpoint-61300/model.safetensors +3 -0
checkpoint-61300/training_args.bin +3 -0
checkpoint-61400/config.json +31 -0
checkpoint-61400/model.safetensors +3 -0
checkpoint-61400/training_args.bin +3 -0
checkpoint-61500/config.json +31 -0
checkpoint-61500/model.safetensors +3 -0
checkpoint-61500/training_args.bin +3 -0
checkpoint-61600/config.json +31 -0
checkpoint-61600/model.safetensors +3 -0
checkpoint-61600/training_args.bin +3 -0
checkpoint-61700/config.json +31 -0
checkpoint-61700/model.safetensors +3 -0
checkpoint-61700/training_args.bin +3 -0
checkpoint-61800/config.json +31 -0
checkpoint-61800/model.safetensors +3 -0
checkpoint-61800/training_args.bin +3 -0
checkpoint-61900/config.json +31 -0
checkpoint-61900/model.safetensors +3 -0
checkpoint-61900/training_args.bin +3 -0
checkpoint-62000/config.json +31 -0
checkpoint-62000/model.safetensors +3 -0
checkpoint-62000/training_args.bin +3 -0

checkpoint-61100/config.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "_name_or_path": "georgeyw/gpt-2-small-init-seed-5",
+  "architectures": [
+    "GPTNeoXForCausalLM"
+  ],
+  "attention_bias": true,
+  "attention_dropout": 0.0,
+  "bos_token_id": 0,
+  "classifier_dropout": 0.1,
+  "eos_token_id": 2,
+  "hidden_act": "gelu",
+  "hidden_dropout": 0.0,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "layer_norm_eps": 1e-05,
+  "layer_norm_epsilon": 1e-05,
+  "max_position_embeddings": 1024,
+  "model_type": "gpt_neox",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "rope_scaling": null,
+  "rotary_emb_base": 10000,
+  "rotary_pct": 0.25,
+  "tie_word_embeddings": false,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.38.2",
+  "use_cache": true,
+  "use_parallel_residual": true,
+  "vocab_size": 50304
+}

checkpoint-61100/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:342fe41e9417a7ab74d99ce274fbdf85a843c084d87ae246936f33eea870d493
+size 324662984

checkpoint-61100/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:de3f522b6b895157d4ae37816ea2b39e4b24555bc3782f9f18492c6709abd779
+size 6520

checkpoint-61200/config.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "_name_or_path": "georgeyw/gpt-2-small-init-seed-5",
+  "architectures": [
+    "GPTNeoXForCausalLM"
+  ],
+  "attention_bias": true,
+  "attention_dropout": 0.0,
+  "bos_token_id": 0,
+  "classifier_dropout": 0.1,
+  "eos_token_id": 2,
+  "hidden_act": "gelu",
+  "hidden_dropout": 0.0,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "layer_norm_eps": 1e-05,
+  "layer_norm_epsilon": 1e-05,
+  "max_position_embeddings": 1024,
+  "model_type": "gpt_neox",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "rope_scaling": null,
+  "rotary_emb_base": 10000,
+  "rotary_pct": 0.25,
+  "tie_word_embeddings": false,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.38.2",
+  "use_cache": true,
+  "use_parallel_residual": true,
+  "vocab_size": 50304
+}

checkpoint-61200/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:00395e3db3a514c10bf2525a69a4062b7a161101b8416774dd7ca48fbc743022
+size 324662984

checkpoint-61200/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:de3f522b6b895157d4ae37816ea2b39e4b24555bc3782f9f18492c6709abd779
+size 6520

checkpoint-61300/config.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "_name_or_path": "georgeyw/gpt-2-small-init-seed-5",
+  "architectures": [
+    "GPTNeoXForCausalLM"
+  ],
+  "attention_bias": true,
+  "attention_dropout": 0.0,
+  "bos_token_id": 0,
+  "classifier_dropout": 0.1,
+  "eos_token_id": 2,
+  "hidden_act": "gelu",
+  "hidden_dropout": 0.0,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "layer_norm_eps": 1e-05,
+  "layer_norm_epsilon": 1e-05,
+  "max_position_embeddings": 1024,
+  "model_type": "gpt_neox",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "rope_scaling": null,
+  "rotary_emb_base": 10000,
+  "rotary_pct": 0.25,
+  "tie_word_embeddings": false,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.38.2",
+  "use_cache": true,
+  "use_parallel_residual": true,
+  "vocab_size": 50304
+}

checkpoint-61300/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:372ef5633f0282baf296c7476eaf59cff26656a343c32ab1ae30102ba49b79d9
+size 324662984

checkpoint-61300/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:de3f522b6b895157d4ae37816ea2b39e4b24555bc3782f9f18492c6709abd779
+size 6520

checkpoint-61400/config.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "_name_or_path": "georgeyw/gpt-2-small-init-seed-5",
+  "architectures": [
+    "GPTNeoXForCausalLM"
+  ],
+  "attention_bias": true,
+  "attention_dropout": 0.0,
+  "bos_token_id": 0,
+  "classifier_dropout": 0.1,
+  "eos_token_id": 2,
+  "hidden_act": "gelu",
+  "hidden_dropout": 0.0,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "layer_norm_eps": 1e-05,
+  "layer_norm_epsilon": 1e-05,
+  "max_position_embeddings": 1024,
+  "model_type": "gpt_neox",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "rope_scaling": null,
+  "rotary_emb_base": 10000,
+  "rotary_pct": 0.25,
+  "tie_word_embeddings": false,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.38.2",
+  "use_cache": true,
+  "use_parallel_residual": true,
+  "vocab_size": 50304
+}

checkpoint-61400/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9543ccbb118a2d0d7bd1f0f66bb02d53f544d85edd6cee9cb12f5fde4ecbebaa
+size 324662984

checkpoint-61400/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:de3f522b6b895157d4ae37816ea2b39e4b24555bc3782f9f18492c6709abd779
+size 6520

checkpoint-61500/config.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "_name_or_path": "georgeyw/gpt-2-small-init-seed-5",
+  "architectures": [
+    "GPTNeoXForCausalLM"
+  ],
+  "attention_bias": true,
+  "attention_dropout": 0.0,
+  "bos_token_id": 0,
+  "classifier_dropout": 0.1,
+  "eos_token_id": 2,
+  "hidden_act": "gelu",
+  "hidden_dropout": 0.0,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "layer_norm_eps": 1e-05,
+  "layer_norm_epsilon": 1e-05,
+  "max_position_embeddings": 1024,
+  "model_type": "gpt_neox",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "rope_scaling": null,
+  "rotary_emb_base": 10000,
+  "rotary_pct": 0.25,
+  "tie_word_embeddings": false,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.38.2",
+  "use_cache": true,
+  "use_parallel_residual": true,
+  "vocab_size": 50304
+}

checkpoint-61500/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f7203b09741192de73184d653e52eaf6e997c22ce892d99fd93e4436a135afa9
+size 324662984

checkpoint-61500/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:de3f522b6b895157d4ae37816ea2b39e4b24555bc3782f9f18492c6709abd779
+size 6520

checkpoint-61600/config.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "_name_or_path": "georgeyw/gpt-2-small-init-seed-5",
+  "architectures": [
+    "GPTNeoXForCausalLM"
+  ],
+  "attention_bias": true,
+  "attention_dropout": 0.0,
+  "bos_token_id": 0,
+  "classifier_dropout": 0.1,
+  "eos_token_id": 2,
+  "hidden_act": "gelu",
+  "hidden_dropout": 0.0,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "layer_norm_eps": 1e-05,
+  "layer_norm_epsilon": 1e-05,
+  "max_position_embeddings": 1024,
+  "model_type": "gpt_neox",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "rope_scaling": null,
+  "rotary_emb_base": 10000,
+  "rotary_pct": 0.25,
+  "tie_word_embeddings": false,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.38.2",
+  "use_cache": true,
+  "use_parallel_residual": true,
+  "vocab_size": 50304
+}

checkpoint-61600/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a77a5f2c71fc3fce2dc0ec58c72090d51fe8ca7f516f03bb67f2f5fddb06d6ae
+size 324662984

checkpoint-61600/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:de3f522b6b895157d4ae37816ea2b39e4b24555bc3782f9f18492c6709abd779
+size 6520

checkpoint-61700/config.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "_name_or_path": "georgeyw/gpt-2-small-init-seed-5",
+  "architectures": [
+    "GPTNeoXForCausalLM"
+  ],
+  "attention_bias": true,
+  "attention_dropout": 0.0,
+  "bos_token_id": 0,
+  "classifier_dropout": 0.1,
+  "eos_token_id": 2,
+  "hidden_act": "gelu",
+  "hidden_dropout": 0.0,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "layer_norm_eps": 1e-05,
+  "layer_norm_epsilon": 1e-05,
+  "max_position_embeddings": 1024,
+  "model_type": "gpt_neox",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "rope_scaling": null,
+  "rotary_emb_base": 10000,
+  "rotary_pct": 0.25,
+  "tie_word_embeddings": false,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.38.2",
+  "use_cache": true,
+  "use_parallel_residual": true,
+  "vocab_size": 50304
+}

checkpoint-61700/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:dc9657e987f27b818370545e4d71bc16749940026f9b8ab38b2f789b078dd1eb
+size 324662984

checkpoint-61700/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:de3f522b6b895157d4ae37816ea2b39e4b24555bc3782f9f18492c6709abd779
+size 6520

checkpoint-61800/config.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "_name_or_path": "georgeyw/gpt-2-small-init-seed-5",
+  "architectures": [
+    "GPTNeoXForCausalLM"
+  ],
+  "attention_bias": true,
+  "attention_dropout": 0.0,
+  "bos_token_id": 0,
+  "classifier_dropout": 0.1,
+  "eos_token_id": 2,
+  "hidden_act": "gelu",
+  "hidden_dropout": 0.0,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "layer_norm_eps": 1e-05,
+  "layer_norm_epsilon": 1e-05,
+  "max_position_embeddings": 1024,
+  "model_type": "gpt_neox",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "rope_scaling": null,
+  "rotary_emb_base": 10000,
+  "rotary_pct": 0.25,
+  "tie_word_embeddings": false,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.38.2",
+  "use_cache": true,
+  "use_parallel_residual": true,
+  "vocab_size": 50304
+}

checkpoint-61800/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:728a97406b9d639b80419d1f970a6583f885b3720c8b0705c0044354b2e8b44e
+size 324662984

checkpoint-61800/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:de3f522b6b895157d4ae37816ea2b39e4b24555bc3782f9f18492c6709abd779
+size 6520

checkpoint-61900/config.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "_name_or_path": "georgeyw/gpt-2-small-init-seed-5",
+  "architectures": [
+    "GPTNeoXForCausalLM"
+  ],
+  "attention_bias": true,
+  "attention_dropout": 0.0,
+  "bos_token_id": 0,
+  "classifier_dropout": 0.1,
+  "eos_token_id": 2,
+  "hidden_act": "gelu",
+  "hidden_dropout": 0.0,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "layer_norm_eps": 1e-05,
+  "layer_norm_epsilon": 1e-05,
+  "max_position_embeddings": 1024,
+  "model_type": "gpt_neox",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "rope_scaling": null,
+  "rotary_emb_base": 10000,
+  "rotary_pct": 0.25,
+  "tie_word_embeddings": false,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.38.2",
+  "use_cache": true,
+  "use_parallel_residual": true,
+  "vocab_size": 50304
+}

checkpoint-61900/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c7befbe582ee93cc3391f82280df65603a84bdad1bac30357fc4a1874d230a2e
+size 324662984

checkpoint-61900/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:de3f522b6b895157d4ae37816ea2b39e4b24555bc3782f9f18492c6709abd779
+size 6520

checkpoint-62000/config.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "_name_or_path": "georgeyw/gpt-2-small-init-seed-5",
+  "architectures": [
+    "GPTNeoXForCausalLM"
+  ],
+  "attention_bias": true,
+  "attention_dropout": 0.0,
+  "bos_token_id": 0,
+  "classifier_dropout": 0.1,
+  "eos_token_id": 2,
+  "hidden_act": "gelu",
+  "hidden_dropout": 0.0,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "layer_norm_eps": 1e-05,
+  "layer_norm_epsilon": 1e-05,
+  "max_position_embeddings": 1024,
+  "model_type": "gpt_neox",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "rope_scaling": null,
+  "rotary_emb_base": 10000,
+  "rotary_pct": 0.25,
+  "tie_word_embeddings": false,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.38.2",
+  "use_cache": true,
+  "use_parallel_residual": true,
+  "vocab_size": 50304
+}

checkpoint-62000/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b0990b41b6b2e15ffd481af42406ef2cd237f3370aa6a2f78c5631be262b8697
+size 324662984

checkpoint-62000/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:de3f522b6b895157d4ae37816ea2b39e4b24555bc3782f9f18492c6709abd779
+size 6520