huggingartists

Browse files

Files changed (10) hide show

README.md +3 -3
config.json +1 -1
evaluation.txt +1 -1
flax_model.msgpack +1 -1
optimizer.pt +1 -1
pytorch_model.bin +1 -1
rng_state.pth +2 -2
scheduler.pt +1 -1
trainer_state.json +195 -7
training_args.bin +2 -2

README.md CHANGED Viewed

@@ -45,15 +45,15 @@ from datasets import load_dataset
 dataset = load_dataset("huggingartists/og-buda")
 ```
-[Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/11co51jr/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
 ## Training procedure
 The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on OG Buda's lyrics.
-Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/10khp2s0) for full transparency and reproducibility.
-At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/10khp2s0/artifacts) is logged and versioned.
 ## How to use

 dataset = load_dataset("huggingartists/og-buda")
 ```
+[Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/2ic775kv/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
 ## Training procedure
 The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on OG Buda's lyrics.
+Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/1g4193mx) for full transparency and reproducibility.
+At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/1g4193mx/artifacts) is logged and versioned.
 ## How to use

config.json CHANGED Viewed

@@ -35,7 +35,7 @@
     }
   },
   "torch_dtype": "float32",
-  "transformers_version": "4.10.2",
   "use_cache": true,
   "vocab_size": 50257
 }

     }
   },
   "torch_dtype": "float32",
+  "transformers_version": "4.11.1",
   "use_cache": true,
   "vocab_size": 50257
 }

evaluation.txt CHANGED Viewed

	@@ -1 +1 @@
1	- {"eval_loss": 1.~~6418354511260986~~, "eval_runtime": 10.~~1368~~, "eval_samples_per_second": 22.69, "eval_steps_per_second": 2.~~861~~, "epoch": 4.0}


1	+ {"eval_loss": 1.5331557989120483, "eval_runtime": 11.1146, "eval_samples_per_second": 21.233, "eval_steps_per_second": 2.699, "epoch": 5.0}

flax_model.msgpack CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:87d33c0069e8b72372c9e6dfc7f9a8187c137ac978f25b032d134b7ef5fd064f
 size 497764120

 version https://git-lfs.github.com/spec/v1
+oid sha256:b6ca1185ef4c01671071a96d4a6bc08a99dfb851bc262d5b345d99c44c8777b8
 size 497764120

optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d3160fb582d67d179d5ee50aa5804c6359d28643e90fcd54e9bd4c89655dfc68
 size 995604017

 version https://git-lfs.github.com/spec/v1
+oid sha256:66b59554fe6cde8204954f2fd3c802193fa21f23c03e5477bc320159740bc767
 size 995604017

pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:911581e98e184bd3b5eb8b1cf8a6bf2e4ec0ff4f88f7ea521640e2dd07b94f03
 size 510403817

 version https://git-lfs.github.com/spec/v1
+oid sha256:acc420fc59da46155e275e30ee4f33e49901084326e259ee2daba1b1bc168d40
 size 510403817

rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7dbbbccbd02e27dbdb79311ae0bdd36d65163767a8d048823783db7230b9c01f
-size 14567

 version https://git-lfs.github.com/spec/v1
+oid sha256:151664dce0a47953ee30b57a0b85f89039a9cddc1f9c94e479fa890f7dfb89e4
+size 14503

scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e1b342b9aa08984a7d22e3d2af7c55d45d34bb20259c62266df710a4afe559ae
 size 623

 version https://git-lfs.github.com/spec/v1
+oid sha256:9a5b7a99e2b2ebcfb7b31d39b7daabc1659b5909216616127ab13d8f6b3cc088
 size 623

trainer_state.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
-  "best_metric": 1.6418354511260986,
-  "best_model_checkpoint": "output/og-buda/checkpoint-600",
-  "epoch": 4.0,
-  "global_step": 600,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -766,11 +766,199 @@
       "eval_samples_per_second": 22.942,
       "eval_steps_per_second": 2.893,
       "step": 600
     }
   ],
-  "max_steps": 600,
-  "num_train_epochs": 4,
-  "total_flos": 625141186560000.0,
   "trial_name": null,
   "trial_params": null
 }

 {
+  "best_metric": 1.5331557989120483,
+  "best_model_checkpoint": "output/og-buda/checkpoint-750",
+  "epoch": 5.0,
+  "global_step": 750,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 22.942,
       "eval_steps_per_second": 2.893,
       "step": 600
+    },
+    {
+      "epoch": 4.03,
+      "learning_rate": 0.00013682420202226357,
+      "loss": 1.7067,
+      "step": 605
+    },
+    {
+      "epoch": 4.07,
+      "learning_rate": 0.0001357009254103391,
+      "loss": 1.7111,
+      "step": 610
+    },
+    {
+      "epoch": 4.1,
+      "learning_rate": 0.0001338424770178476,
+      "loss": 1.6736,
+      "step": 615
+    },
+    {
+      "epoch": 4.13,
+      "learning_rate": 0.00013126921839428241,
+      "loss": 1.7152,
+      "step": 620
+    },
+    {
+      "epoch": 4.17,
+      "learning_rate": 0.00012800934269961248,
+      "loss": 1.7257,
+      "step": 625
+    },
+    {
+      "epoch": 4.2,
+      "learning_rate": 0.00012409856581412142,
+      "loss": 1.7207,
+      "step": 630
+    },
+    {
+      "epoch": 4.23,
+      "learning_rate": 0.00011957973502774922,
+      "loss": 1.6457,
+      "step": 635
+    },
+    {
+      "epoch": 4.27,
+      "learning_rate": 0.00011450235959621773,
+      "loss": 1.713,
+      "step": 640
+    },
+    {
+      "epoch": 4.3,
+      "learning_rate": 0.00010892206830726369,
+      "loss": 1.7242,
+      "step": 645
+    },
+    {
+      "epoch": 4.33,
+      "learning_rate": 0.00010290000000000009,
+      "loss": 1.6386,
+      "step": 650
+    },
+    {
+      "epoch": 4.37,
+      "learning_rate": 9.650213371499996e-05,
+      "loss": 1.6539,
+      "step": 655
+    },
+    {
+      "epoch": 4.4,
+      "learning_rate": 8.97985658141213e-05,
+      "loss": 1.7034,
+      "step": 660
+    },
+    {
+      "epoch": 4.43,
+      "learning_rate": 8.286274199009828e-05,
+      "loss": 1.6681,
+      "step": 665
+    },
+    {
+      "epoch": 4.47,
+      "learning_rate": 7.577065258016099e-05,
+      "loss": 1.6742,
+      "step": 670
+    },
+    {
+      "epoch": 4.5,
+      "learning_rate": 6.860000000000003e-05,
+      "loss": 1.687,
+      "step": 675
+    },
+    {
+      "epoch": 4.53,
+      "learning_rate": 6.14293474198391e-05,
+      "loss": 1.7066,
+      "step": 680
+    },
+    {
+      "epoch": 4.57,
+      "learning_rate": 5.433725800990179e-05,
+      "loss": 1.6622,
+      "step": 685
+    },
+    {
+      "epoch": 4.6,
+      "learning_rate": 4.740143418587876e-05,
+      "loss": 1.6647,
+      "step": 690
+    },
+    {
+      "epoch": 4.63,
+      "learning_rate": 4.069786628500011e-05,
+      "loss": 1.6251,
+      "step": 695
+    },
+    {
+      "epoch": 4.67,
+      "learning_rate": 3.429999999999996e-05,
+      "loss": 1.6732,
+      "step": 700
+    },
+    {
+      "epoch": 4.7,
+      "learning_rate": 2.8277931692736372e-05,
+      "loss": 1.5917,
+      "step": 705
+    },
+    {
+      "epoch": 4.73,
+      "learning_rate": 2.2697640403782324e-05,
+      "loss": 1.5938,
+      "step": 710
+    },
+    {
+      "epoch": 4.77,
+      "learning_rate": 1.762026497225081e-05,
+      "loss": 1.7145,
+      "step": 715
+    },
+    {
+      "epoch": 4.8,
+      "learning_rate": 1.3101434185878628e-05,
+      "loss": 1.6437,
+      "step": 720
+    },
+    {
+      "epoch": 4.83,
+      "learning_rate": 9.190657300387574e-06,
+      "loss": 1.5361,
+      "step": 725
+    },
+    {
+      "epoch": 4.87,
+      "learning_rate": 5.930781605717611e-06,
+      "loss": 1.5239,
+      "step": 730
+    },
+    {
+      "epoch": 4.9,
+      "learning_rate": 3.3575229821524373e-06,
+      "loss": 1.656,
+      "step": 735
+    },
+    {
+      "epoch": 4.93,
+      "learning_rate": 1.4990745896609297e-06,
+      "loss": 1.6033,
+      "step": 740
+    },
+    {
+      "epoch": 4.97,
+      "learning_rate": 3.757979777364447e-07,
+      "loss": 1.6277,
+      "step": 745
+    },
+    {
+      "epoch": 5.0,
+      "learning_rate": 0.0,
+      "loss": 1.5745,
+      "step": 750
+    },
+    {
+      "epoch": 5.0,
+      "eval_loss": 1.5331557989120483,
+      "eval_runtime": 11.0656,
+      "eval_samples_per_second": 21.327,
+      "eval_steps_per_second": 2.711,
+      "step": 750
     }
   ],
+  "max_steps": 750,
+  "num_train_epochs": 5,
+  "total_flos": 781001883648000.0,
   "trial_name": null,
   "trial_params": null
 }

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:241f4fc691c28aaa2ddb496ddc8870ce626761c7412d518a7188ac1aaea6de47
-size 2671

 version https://git-lfs.github.com/spec/v1
+oid sha256:09831668545d53e1902c5d27a1cebb81afa6b22d9d6f902498224fa69e5221ac
+size 2863