Electricarchmage commited on Dec 5, 2024

Commit

3bb0283

verified ·

1 Parent(s): e603d9e

Upload folder using huggingface_hub

Browse files

Files changed (37) hide show

checkpoint-1000/config.json +36 -0
checkpoint-1000/optimizer.pt +3 -0
checkpoint-1000/pytorch_model.bin +3 -0
checkpoint-1000/scheduler.pt +3 -0
checkpoint-1000/trainer_state.json +28 -0
checkpoint-1000/training_args.bin +3 -0
checkpoint-1500/config.json +36 -0
checkpoint-1500/optimizer.pt +3 -0
checkpoint-1500/pytorch_model.bin +3 -0
checkpoint-1500/scheduler.pt +3 -0
checkpoint-1500/trainer_state.json +34 -0
checkpoint-1500/training_args.bin +3 -0
checkpoint-2000/config.json +36 -0
checkpoint-2000/optimizer.pt +3 -0
checkpoint-2000/pytorch_model.bin +3 -0
checkpoint-2000/scheduler.pt +3 -0
checkpoint-2000/trainer_state.json +40 -0
checkpoint-2000/training_args.bin +3 -0
checkpoint-2500/config.json +36 -0
checkpoint-2500/optimizer.pt +3 -0
checkpoint-2500/pytorch_model.bin +3 -0
checkpoint-2500/scheduler.pt +3 -0
checkpoint-2500/trainer_state.json +46 -0
checkpoint-2500/training_args.bin +3 -0
checkpoint-500/config.json +36 -0
checkpoint-500/optimizer.pt +3 -0
checkpoint-500/pytorch_model.bin +3 -0
checkpoint-500/scheduler.pt +3 -0
checkpoint-500/trainer_state.json +22 -0
checkpoint-500/training_args.bin +3 -0
config.json +36 -0
merges.txt +0 -0
pytorch_model.bin +3 -0
special_tokens_map.json +1 -0
tokenizer_config.json +1 -0
training_args.bin +3 -0
vocab.json +0 -0

checkpoint-1000/config.json ADDED Viewed

	@@ -0,0 +1,36 @@

+{
+  "_name_or_path": "gpt2",
+  "activation_function": "gelu_new",
+  "architectures": [
+    "GPT2LMHeadModel"
+  ],
+  "attn_pdrop": 0.1,
+  "bos_token_id": 50256,
+  "embd_pdrop": 0.1,
+  "eos_token_id": 50256,
+  "gradient_checkpointing": false,
+  "initializer_range": 0.02,
+  "layer_norm_epsilon": 1e-05,
+  "model_type": "gpt2",
+  "n_ctx": 1024,
+  "n_embd": 768,
+  "n_head": 12,
+  "n_inner": null,
+  "n_layer": 12,
+  "n_positions": 1024,
+  "resid_pdrop": 0.1,
+  "summary_activation": null,
+  "summary_first_dropout": 0.1,
+  "summary_proj_to_labels": true,
+  "summary_type": "cls_index",
+  "summary_use_proj": true,
+  "task_specific_params": {
+    "text-generation": {
+      "do_sample": true,
+      "max_length": 50
+    }
+  },
+  "transformers_version": "4.5.1",
+  "use_cache": true,
+  "vocab_size": 50257
+}

checkpoint-1000/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f63e66a1a26ffcfebf1314aeb9abbd3abf6c6629386cfd2bd3138bd7fd87698a
+size 995611287

checkpoint-1000/pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0e133321d38bcf7e8c19bc6a8e99e06fb28b31ee6f3a6835288ee7eabf8e7e4a
+size 510408315

checkpoint-1000/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0ff041ee588bb7c24b268940aa60323de14015cae4a35073615ec0ebea8861cb
+size 623

checkpoint-1000/trainer_state.json ADDED Viewed

	@@ -0,0 +1,28 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 1.721170395869191,
+  "global_step": 1000,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.86,
+      "learning_rate": 4.1394148020654047e-05,
+      "loss": 2.054,
+      "step": 500
+    },
+    {
+      "epoch": 1.72,
+      "learning_rate": 3.278829604130809e-05,
+      "loss": 1.8443,
+      "step": 1000
+    }
+  ],
+  "max_steps": 2905,
+  "num_train_epochs": 5,
+  "total_flos": 1528925221158912.0,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-1000/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b512c2246ea8ce40f0e79ca9339f773b42d9398b27fc07181baaad57bd109ea4
+size 2351

checkpoint-1500/config.json ADDED Viewed

	@@ -0,0 +1,36 @@

+{
+  "_name_or_path": "gpt2",
+  "activation_function": "gelu_new",
+  "architectures": [
+    "GPT2LMHeadModel"
+  ],
+  "attn_pdrop": 0.1,
+  "bos_token_id": 50256,
+  "embd_pdrop": 0.1,
+  "eos_token_id": 50256,
+  "gradient_checkpointing": false,
+  "initializer_range": 0.02,
+  "layer_norm_epsilon": 1e-05,
+  "model_type": "gpt2",
+  "n_ctx": 1024,
+  "n_embd": 768,
+  "n_head": 12,
+  "n_inner": null,
+  "n_layer": 12,
+  "n_positions": 1024,
+  "resid_pdrop": 0.1,
+  "summary_activation": null,
+  "summary_first_dropout": 0.1,
+  "summary_proj_to_labels": true,
+  "summary_type": "cls_index",
+  "summary_use_proj": true,
+  "task_specific_params": {
+    "text-generation": {
+      "do_sample": true,
+      "max_length": 50
+    }
+  },
+  "transformers_version": "4.5.1",
+  "use_cache": true,
+  "vocab_size": 50257
+}

checkpoint-1500/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5b2daac124e824e49bd31b399ca087659d346c1a1ac7aa9193295bf9ec45f156
+size 995611287

checkpoint-1500/pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:74b1ab44b3ce1d07a755f7cce767d178844e6e30b6a2fbb6fca3b8ba6f4382a5
+size 510408315

checkpoint-1500/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:eab5b6b8d802a03af35dcd062a20071bed7b0ec5c9e12fd7537d44ffd562b2c2
+size 623

checkpoint-1500/trainer_state.json ADDED Viewed

	@@ -0,0 +1,34 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 2.581755593803787,
+  "global_step": 1500,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.86,
+      "learning_rate": 4.1394148020654047e-05,
+      "loss": 2.054,
+      "step": 500
+    },
+    {
+      "epoch": 1.72,
+      "learning_rate": 3.278829604130809e-05,
+      "loss": 1.8443,
+      "step": 1000
+    },
+    {
+      "epoch": 2.58,
+      "learning_rate": 2.418244406196214e-05,
+      "loss": 1.7694,
+      "step": 1500
+    }
+  ],
+  "max_steps": 2905,
+  "num_train_epochs": 5,
+  "total_flos": 2293292261965824.0,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-1500/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b512c2246ea8ce40f0e79ca9339f773b42d9398b27fc07181baaad57bd109ea4
+size 2351

checkpoint-2000/config.json ADDED Viewed

	@@ -0,0 +1,36 @@

+{
+  "_name_or_path": "gpt2",
+  "activation_function": "gelu_new",
+  "architectures": [
+    "GPT2LMHeadModel"
+  ],
+  "attn_pdrop": 0.1,
+  "bos_token_id": 50256,
+  "embd_pdrop": 0.1,
+  "eos_token_id": 50256,
+  "gradient_checkpointing": false,
+  "initializer_range": 0.02,
+  "layer_norm_epsilon": 1e-05,
+  "model_type": "gpt2",
+  "n_ctx": 1024,
+  "n_embd": 768,
+  "n_head": 12,
+  "n_inner": null,
+  "n_layer": 12,
+  "n_positions": 1024,
+  "resid_pdrop": 0.1,
+  "summary_activation": null,
+  "summary_first_dropout": 0.1,
+  "summary_proj_to_labels": true,
+  "summary_type": "cls_index",
+  "summary_use_proj": true,
+  "task_specific_params": {
+    "text-generation": {
+      "do_sample": true,
+      "max_length": 50
+    }
+  },
+  "transformers_version": "4.5.1",
+  "use_cache": true,
+  "vocab_size": 50257
+}

checkpoint-2000/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7f909c1e62849153ee15e900f8c0433705cf7c7ac783525575114c8af6e581e4
+size 995611287

checkpoint-2000/pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:21e693fab4d7c534dcb42baf90cdc6cfc35c8cdd3d9b205d243ae649b32987bf
+size 510408315

checkpoint-2000/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e350a5dc2daebb8a63b7dfbb20a7c162dfff62f2ce389205d7579ecb8c28e1fc
+size 623

checkpoint-2000/trainer_state.json ADDED Viewed

	@@ -0,0 +1,40 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 3.442340791738382,
+  "global_step": 2000,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.86,
+      "learning_rate": 4.1394148020654047e-05,
+      "loss": 2.054,
+      "step": 500
+    },
+    {
+      "epoch": 1.72,
+      "learning_rate": 3.278829604130809e-05,
+      "loss": 1.8443,
+      "step": 1000
+    },
+    {
+      "epoch": 2.58,
+      "learning_rate": 2.418244406196214e-05,
+      "loss": 1.7694,
+      "step": 1500
+    },
+    {
+      "epoch": 3.44,
+      "learning_rate": 1.557659208261618e-05,
+      "loss": 1.7264,
+      "step": 2000
+    }
+  ],
+  "max_steps": 2905,
+  "num_train_epochs": 5,
+  "total_flos": 3057659302772736.0,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-2000/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b512c2246ea8ce40f0e79ca9339f773b42d9398b27fc07181baaad57bd109ea4
+size 2351

checkpoint-2500/config.json ADDED Viewed

	@@ -0,0 +1,36 @@

+{
+  "_name_or_path": "gpt2",
+  "activation_function": "gelu_new",
+  "architectures": [
+    "GPT2LMHeadModel"
+  ],
+  "attn_pdrop": 0.1,
+  "bos_token_id": 50256,
+  "embd_pdrop": 0.1,
+  "eos_token_id": 50256,
+  "gradient_checkpointing": false,
+  "initializer_range": 0.02,
+  "layer_norm_epsilon": 1e-05,
+  "model_type": "gpt2",
+  "n_ctx": 1024,
+  "n_embd": 768,
+  "n_head": 12,
+  "n_inner": null,
+  "n_layer": 12,
+  "n_positions": 1024,
+  "resid_pdrop": 0.1,
+  "summary_activation": null,
+  "summary_first_dropout": 0.1,
+  "summary_proj_to_labels": true,
+  "summary_type": "cls_index",
+  "summary_use_proj": true,
+  "task_specific_params": {
+    "text-generation": {
+      "do_sample": true,
+      "max_length": 50
+    }
+  },
+  "transformers_version": "4.5.1",
+  "use_cache": true,
+  "vocab_size": 50257
+}

checkpoint-2500/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ae01e5fdedb27d8e1360ef6ca388e1ecc14ece34b7abaecdc945723834975b10
+size 995611287

checkpoint-2500/pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b5574596717a6a554450d509244cb62d156371157ed091dc252107832b8377e0
+size 510408315

checkpoint-2500/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9f3ad3f2c4a67c5b749fc34f16f20e3ea27e487d311ad4aab625cc68487af8e3
+size 623

checkpoint-2500/trainer_state.json ADDED Viewed

	@@ -0,0 +1,46 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 4.3029259896729775,
+  "global_step": 2500,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.86,
+      "learning_rate": 4.1394148020654047e-05,
+      "loss": 2.054,
+      "step": 500
+    },
+    {
+      "epoch": 1.72,
+      "learning_rate": 3.278829604130809e-05,
+      "loss": 1.8443,
+      "step": 1000
+    },
+    {
+      "epoch": 2.58,
+      "learning_rate": 2.418244406196214e-05,
+      "loss": 1.7694,
+      "step": 1500
+    },
+    {
+      "epoch": 3.44,
+      "learning_rate": 1.557659208261618e-05,
+      "loss": 1.7264,
+      "step": 2000
+    },
+    {
+      "epoch": 4.3,
+      "learning_rate": 6.970740103270223e-06,
+      "loss": 1.6912,
+      "step": 2500
+    }
+  ],
+  "max_steps": 2905,
+  "num_train_epochs": 5,
+  "total_flos": 3822026343579648.0,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-2500/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b512c2246ea8ce40f0e79ca9339f773b42d9398b27fc07181baaad57bd109ea4
+size 2351

checkpoint-500/config.json ADDED Viewed

	@@ -0,0 +1,36 @@

+{
+  "_name_or_path": "gpt2",
+  "activation_function": "gelu_new",
+  "architectures": [
+    "GPT2LMHeadModel"
+  ],
+  "attn_pdrop": 0.1,
+  "bos_token_id": 50256,
+  "embd_pdrop": 0.1,
+  "eos_token_id": 50256,
+  "gradient_checkpointing": false,
+  "initializer_range": 0.02,
+  "layer_norm_epsilon": 1e-05,
+  "model_type": "gpt2",
+  "n_ctx": 1024,
+  "n_embd": 768,
+  "n_head": 12,
+  "n_inner": null,
+  "n_layer": 12,
+  "n_positions": 1024,
+  "resid_pdrop": 0.1,
+  "summary_activation": null,
+  "summary_first_dropout": 0.1,
+  "summary_proj_to_labels": true,
+  "summary_type": "cls_index",
+  "summary_use_proj": true,
+  "task_specific_params": {
+    "text-generation": {
+      "do_sample": true,
+      "max_length": 50
+    }
+  },
+  "transformers_version": "4.5.1",
+  "use_cache": true,
+  "vocab_size": 50257
+}

checkpoint-500/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:34da1a820dc6c05c132c9ed9f1fe0507f0f06a6e525bccfe0086ed2e8ebaa872
+size 995611287

checkpoint-500/pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6583f64305d2c1c7030b8912d53d166aa4187ab2ed0815f36e6923edf41d6028
+size 510408315

checkpoint-500/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:856361f9b6a54898158931d388acfb0680a58a59bcd141d39c8cc219ff523d60
+size 623

checkpoint-500/trainer_state.json ADDED Viewed

	@@ -0,0 +1,22 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.8605851979345955,
+  "global_step": 500,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.86,
+      "learning_rate": 4.1394148020654047e-05,
+      "loss": 2.054,
+      "step": 500
+    }
+  ],
+  "max_steps": 2905,
+  "num_train_epochs": 5,
+  "total_flos": 764558180352000.0,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-500/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b512c2246ea8ce40f0e79ca9339f773b42d9398b27fc07181baaad57bd109ea4
+size 2351

config.json ADDED Viewed

	@@ -0,0 +1,36 @@

+{
+  "_name_or_path": "/kaggle/working/result",
+  "activation_function": "gelu_new",
+  "architectures": [
+    "GPT2LMHeadModel"
+  ],
+  "attn_pdrop": 0.1,
+  "bos_token_id": 50256,
+  "embd_pdrop": 0.1,
+  "eos_token_id": 50256,
+  "gradient_checkpointing": false,
+  "initializer_range": 0.02,
+  "layer_norm_epsilon": 1e-05,
+  "model_type": "gpt2",
+  "n_ctx": 1024,
+  "n_embd": 768,
+  "n_head": 12,
+  "n_inner": null,
+  "n_layer": 12,
+  "n_positions": 1024,
+  "resid_pdrop": 0.1,
+  "summary_activation": null,
+  "summary_first_dropout": 0.1,
+  "summary_proj_to_labels": true,
+  "summary_type": "cls_index",
+  "summary_use_proj": true,
+  "task_specific_params": {
+    "text-generation": {
+      "do_sample": true,
+      "max_length": 50
+    }
+  },
+  "transformers_version": "4.5.1",
+  "use_cache": true,
+  "vocab_size": 50257
+}

merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:eb2feeb9158acb7d5822361bb81b4b9132324399f6fe4213710c304ae4457f48
+size 510405883

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"bos_token": {"content": "<\|endoftext\|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "eos_token": {"content": "<\|endoftext\|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "unk_token": {"content": "<\|endoftext\|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}}

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"unk_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "bos_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "eos_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "add_prefix_space": false, "errors": "replace", "model_max_length": 1024, "special_tokens_map_file": null, "name_or_path": "/kaggle/working/result"}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b512c2246ea8ce40f0e79ca9339f773b42d9398b27fc07181baaad57bd109ea4
+size 2351

vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff