georgeyw commited on Mar 14, 2024

Commit

2c18d86

verified ·

1 Parent(s): 3c766fc

Training in progress, step 1000, checkpoint

Browse files

Files changed (30) hide show

checkpoint-100/config.json +31 -0
checkpoint-100/model.safetensors +3 -0
checkpoint-100/training_args.bin +3 -0
checkpoint-1000/config.json +31 -0
checkpoint-1000/model.safetensors +3 -0
checkpoint-1000/training_args.bin +3 -0
checkpoint-200/config.json +31 -0
checkpoint-200/model.safetensors +3 -0
checkpoint-200/training_args.bin +3 -0
checkpoint-300/config.json +31 -0
checkpoint-300/model.safetensors +3 -0
checkpoint-300/training_args.bin +3 -0
checkpoint-400/config.json +31 -0
checkpoint-400/model.safetensors +3 -0
checkpoint-400/training_args.bin +3 -0
checkpoint-500/config.json +31 -0
checkpoint-500/model.safetensors +3 -0
checkpoint-500/training_args.bin +3 -0
checkpoint-600/config.json +31 -0
checkpoint-600/model.safetensors +3 -0
checkpoint-600/training_args.bin +3 -0
checkpoint-700/config.json +31 -0
checkpoint-700/model.safetensors +3 -0
checkpoint-700/training_args.bin +3 -0
checkpoint-800/config.json +31 -0
checkpoint-800/model.safetensors +3 -0
checkpoint-800/training_args.bin +3 -0
checkpoint-900/config.json +31 -0
checkpoint-900/model.safetensors +3 -0
checkpoint-900/training_args.bin +3 -0

checkpoint-100/config.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "_name_or_path": "georgeyw/gpt-2-small-init-seed-5",
+  "architectures": [
+    "GPTNeoXForCausalLM"
+  ],
+  "attention_bias": true,
+  "attention_dropout": 0.0,
+  "bos_token_id": 0,
+  "classifier_dropout": 0.1,
+  "eos_token_id": 2,
+  "hidden_act": "gelu",
+  "hidden_dropout": 0.0,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "layer_norm_eps": 1e-05,
+  "layer_norm_epsilon": 1e-05,
+  "max_position_embeddings": 1024,
+  "model_type": "gpt_neox",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "rope_scaling": null,
+  "rotary_emb_base": 10000,
+  "rotary_pct": 0.25,
+  "tie_word_embeddings": false,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.38.2",
+  "use_cache": true,
+  "use_parallel_residual": true,
+  "vocab_size": 50304
+}

checkpoint-100/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:03e56f5b3dd4afc21190625e190827309ea556064666d746669af2409569127a
+size 324662984

checkpoint-100/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:de3f522b6b895157d4ae37816ea2b39e4b24555bc3782f9f18492c6709abd779
+size 6520

checkpoint-1000/config.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "_name_or_path": "georgeyw/gpt-2-small-init-seed-5",
+  "architectures": [
+    "GPTNeoXForCausalLM"
+  ],
+  "attention_bias": true,
+  "attention_dropout": 0.0,
+  "bos_token_id": 0,
+  "classifier_dropout": 0.1,
+  "eos_token_id": 2,
+  "hidden_act": "gelu",
+  "hidden_dropout": 0.0,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "layer_norm_eps": 1e-05,
+  "layer_norm_epsilon": 1e-05,
+  "max_position_embeddings": 1024,
+  "model_type": "gpt_neox",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "rope_scaling": null,
+  "rotary_emb_base": 10000,
+  "rotary_pct": 0.25,
+  "tie_word_embeddings": false,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.38.2",
+  "use_cache": true,
+  "use_parallel_residual": true,
+  "vocab_size": 50304
+}

checkpoint-1000/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a56aaeb73fd5a850495c644f8711e0fd2770f3e773c4f808af9f7d9b344fd53f
+size 324662984

checkpoint-1000/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:de3f522b6b895157d4ae37816ea2b39e4b24555bc3782f9f18492c6709abd779
+size 6520

checkpoint-200/config.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "_name_or_path": "georgeyw/gpt-2-small-init-seed-5",
+  "architectures": [
+    "GPTNeoXForCausalLM"
+  ],
+  "attention_bias": true,
+  "attention_dropout": 0.0,
+  "bos_token_id": 0,
+  "classifier_dropout": 0.1,
+  "eos_token_id": 2,
+  "hidden_act": "gelu",
+  "hidden_dropout": 0.0,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "layer_norm_eps": 1e-05,
+  "layer_norm_epsilon": 1e-05,
+  "max_position_embeddings": 1024,
+  "model_type": "gpt_neox",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "rope_scaling": null,
+  "rotary_emb_base": 10000,
+  "rotary_pct": 0.25,
+  "tie_word_embeddings": false,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.38.2",
+  "use_cache": true,
+  "use_parallel_residual": true,
+  "vocab_size": 50304
+}

checkpoint-200/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:dd23533d50f604282bb0a5f22d02f3f22b956993282389c73cafa912b443f37b
+size 324662984

checkpoint-200/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:de3f522b6b895157d4ae37816ea2b39e4b24555bc3782f9f18492c6709abd779
+size 6520

checkpoint-300/config.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "_name_or_path": "georgeyw/gpt-2-small-init-seed-5",
+  "architectures": [
+    "GPTNeoXForCausalLM"
+  ],
+  "attention_bias": true,
+  "attention_dropout": 0.0,
+  "bos_token_id": 0,
+  "classifier_dropout": 0.1,
+  "eos_token_id": 2,
+  "hidden_act": "gelu",
+  "hidden_dropout": 0.0,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "layer_norm_eps": 1e-05,
+  "layer_norm_epsilon": 1e-05,
+  "max_position_embeddings": 1024,
+  "model_type": "gpt_neox",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "rope_scaling": null,
+  "rotary_emb_base": 10000,
+  "rotary_pct": 0.25,
+  "tie_word_embeddings": false,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.38.2",
+  "use_cache": true,
+  "use_parallel_residual": true,
+  "vocab_size": 50304
+}

checkpoint-300/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:36f18c740a272ca7e32993e375d9b59d0a23be68c1052f6cb99f88c68dd8e9d9
+size 324662984

checkpoint-300/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:de3f522b6b895157d4ae37816ea2b39e4b24555bc3782f9f18492c6709abd779
+size 6520

checkpoint-400/config.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "_name_or_path": "georgeyw/gpt-2-small-init-seed-5",
+  "architectures": [
+    "GPTNeoXForCausalLM"
+  ],
+  "attention_bias": true,
+  "attention_dropout": 0.0,
+  "bos_token_id": 0,
+  "classifier_dropout": 0.1,
+  "eos_token_id": 2,
+  "hidden_act": "gelu",
+  "hidden_dropout": 0.0,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "layer_norm_eps": 1e-05,
+  "layer_norm_epsilon": 1e-05,
+  "max_position_embeddings": 1024,
+  "model_type": "gpt_neox",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "rope_scaling": null,
+  "rotary_emb_base": 10000,
+  "rotary_pct": 0.25,
+  "tie_word_embeddings": false,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.38.2",
+  "use_cache": true,
+  "use_parallel_residual": true,
+  "vocab_size": 50304
+}

checkpoint-400/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6c52b1b0659f152c1779a66ada1c70277cd1169830e5defefe2ebb2b81bc1e54
+size 324662984

checkpoint-400/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:de3f522b6b895157d4ae37816ea2b39e4b24555bc3782f9f18492c6709abd779
+size 6520

checkpoint-500/config.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "_name_or_path": "georgeyw/gpt-2-small-init-seed-5",
+  "architectures": [
+    "GPTNeoXForCausalLM"
+  ],
+  "attention_bias": true,
+  "attention_dropout": 0.0,
+  "bos_token_id": 0,
+  "classifier_dropout": 0.1,
+  "eos_token_id": 2,
+  "hidden_act": "gelu",
+  "hidden_dropout": 0.0,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "layer_norm_eps": 1e-05,
+  "layer_norm_epsilon": 1e-05,
+  "max_position_embeddings": 1024,
+  "model_type": "gpt_neox",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "rope_scaling": null,
+  "rotary_emb_base": 10000,
+  "rotary_pct": 0.25,
+  "tie_word_embeddings": false,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.38.2",
+  "use_cache": true,
+  "use_parallel_residual": true,
+  "vocab_size": 50304
+}

checkpoint-500/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6d46c2ed38a318af855fe6a7b7e3bd8d23f9c90968d76ea49c9c04a19a340fe6
+size 324662984

checkpoint-500/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:de3f522b6b895157d4ae37816ea2b39e4b24555bc3782f9f18492c6709abd779
+size 6520

checkpoint-600/config.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "_name_or_path": "georgeyw/gpt-2-small-init-seed-5",
+  "architectures": [
+    "GPTNeoXForCausalLM"
+  ],
+  "attention_bias": true,
+  "attention_dropout": 0.0,
+  "bos_token_id": 0,
+  "classifier_dropout": 0.1,
+  "eos_token_id": 2,
+  "hidden_act": "gelu",
+  "hidden_dropout": 0.0,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "layer_norm_eps": 1e-05,
+  "layer_norm_epsilon": 1e-05,
+  "max_position_embeddings": 1024,
+  "model_type": "gpt_neox",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "rope_scaling": null,
+  "rotary_emb_base": 10000,
+  "rotary_pct": 0.25,
+  "tie_word_embeddings": false,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.38.2",
+  "use_cache": true,
+  "use_parallel_residual": true,
+  "vocab_size": 50304
+}

checkpoint-600/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1aeacc2c448ac0651561ddccb1ae81763195f7fdf4bd769ac1edbbe4b2be2597
+size 324662984

checkpoint-600/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:de3f522b6b895157d4ae37816ea2b39e4b24555bc3782f9f18492c6709abd779
+size 6520

checkpoint-700/config.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "_name_or_path": "georgeyw/gpt-2-small-init-seed-5",
+  "architectures": [
+    "GPTNeoXForCausalLM"
+  ],
+  "attention_bias": true,
+  "attention_dropout": 0.0,
+  "bos_token_id": 0,
+  "classifier_dropout": 0.1,
+  "eos_token_id": 2,
+  "hidden_act": "gelu",
+  "hidden_dropout": 0.0,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "layer_norm_eps": 1e-05,
+  "layer_norm_epsilon": 1e-05,
+  "max_position_embeddings": 1024,
+  "model_type": "gpt_neox",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "rope_scaling": null,
+  "rotary_emb_base": 10000,
+  "rotary_pct": 0.25,
+  "tie_word_embeddings": false,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.38.2",
+  "use_cache": true,
+  "use_parallel_residual": true,
+  "vocab_size": 50304
+}

checkpoint-700/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:724bb60a2c9fe997583beba8959b4f2c8e454faacea8001b80b930465e690fed
+size 324662984

checkpoint-700/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:de3f522b6b895157d4ae37816ea2b39e4b24555bc3782f9f18492c6709abd779
+size 6520

checkpoint-800/config.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "_name_or_path": "georgeyw/gpt-2-small-init-seed-5",
+  "architectures": [
+    "GPTNeoXForCausalLM"
+  ],
+  "attention_bias": true,
+  "attention_dropout": 0.0,
+  "bos_token_id": 0,
+  "classifier_dropout": 0.1,
+  "eos_token_id": 2,
+  "hidden_act": "gelu",
+  "hidden_dropout": 0.0,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "layer_norm_eps": 1e-05,
+  "layer_norm_epsilon": 1e-05,
+  "max_position_embeddings": 1024,
+  "model_type": "gpt_neox",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "rope_scaling": null,
+  "rotary_emb_base": 10000,
+  "rotary_pct": 0.25,
+  "tie_word_embeddings": false,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.38.2",
+  "use_cache": true,
+  "use_parallel_residual": true,
+  "vocab_size": 50304
+}

checkpoint-800/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:de87fc2b26d177e393555124766b0d1959f2ee409b61a092812128ab7e82fd73
+size 324662984

checkpoint-800/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:de3f522b6b895157d4ae37816ea2b39e4b24555bc3782f9f18492c6709abd779
+size 6520

checkpoint-900/config.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "_name_or_path": "georgeyw/gpt-2-small-init-seed-5",
+  "architectures": [
+    "GPTNeoXForCausalLM"
+  ],
+  "attention_bias": true,
+  "attention_dropout": 0.0,
+  "bos_token_id": 0,
+  "classifier_dropout": 0.1,
+  "eos_token_id": 2,
+  "hidden_act": "gelu",
+  "hidden_dropout": 0.0,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "layer_norm_eps": 1e-05,
+  "layer_norm_epsilon": 1e-05,
+  "max_position_embeddings": 1024,
+  "model_type": "gpt_neox",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "rope_scaling": null,
+  "rotary_emb_base": 10000,
+  "rotary_pct": 0.25,
+  "tie_word_embeddings": false,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.38.2",
+  "use_cache": true,
+  "use_parallel_residual": true,
+  "vocab_size": 50304
+}

checkpoint-900/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ef4d020683dcf2ce71ffebf9c5c3b3414691acb36632c56bf2296cd12a9d9ad4
+size 324662984

checkpoint-900/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:de3f522b6b895157d4ae37816ea2b39e4b24555bc3782f9f18492c6709abd779
+size 6520