huggingartists

Browse files

Files changed (9) hide show

README.md +3 -3
evaluation.txt +1 -1
flax_model.msgpack +1 -1
optimizer.pt +1 -1
pytorch_model.bin +1 -1
rng_state.pth +1 -1
scheduler.pt +1 -1
trainer_state.json +182 -6
training_args.bin +1 -1

README.md CHANGED Viewed

@@ -45,15 +45,15 @@ from datasets import load_dataset
 dataset = load_dataset("huggingartists/og-buda")
 ```
-[Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/19koddag/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
 ## Training procedure
 The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on OG Buda's lyrics.
-Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/2pjx3dty) for full transparency and reproducibility.
-At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/2pjx3dty/artifacts) is logged and versioned.
 ## How to use

 dataset = load_dataset("huggingartists/og-buda")
 ```
+[Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/11co51jr/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
 ## Training procedure
 The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on OG Buda's lyrics.
+Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/10khp2s0) for full transparency and reproducibility.
+At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/10khp2s0/artifacts) is logged and versioned.
 ## How to use

evaluation.txt CHANGED Viewed

	@@ -1 +1 @@
1	- {"eval_loss": 1.~~6688779592514038~~, "eval_runtime": 9.~~5584~~, "eval_samples_per_second": 20.~~819~~, "eval_steps_per_second": 2.~~616~~, "epoch": 4.0}


1	+ {"eval_loss": 1.6418354511260986, "eval_runtime": 10.1368, "eval_samples_per_second": 22.69, "eval_steps_per_second": 2.861, "epoch": 4.0}

flax_model.msgpack CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e333fb388e02210c1f3ff984ad6c4d21e6d008bcab46dfd83e150235f59d89bc
 size 497764120

 version https://git-lfs.github.com/spec/v1
+oid sha256:87d33c0069e8b72372c9e6dfc7f9a8187c137ac978f25b032d134b7ef5fd064f
 size 497764120

optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2ebbd0aa38a33ae4017b166fd8f1134893410206219de2130750e6d7d5f39b3e
 size 995604017

 version https://git-lfs.github.com/spec/v1
+oid sha256:d3160fb582d67d179d5ee50aa5804c6359d28643e90fcd54e9bd4c89655dfc68
 size 995604017

pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1cb6cef5d2d7cbde31a058ffa3aefb7f08a7a2e4806e882a70db75aeff20abb1
 size 510403817

 version https://git-lfs.github.com/spec/v1
+oid sha256:911581e98e184bd3b5eb8b1cf8a6bf2e4ec0ff4f88f7ea521640e2dd07b94f03
 size 510403817

rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a598a815349f8b1988c86709fdd881dafa589aaad49c7f20d98d3860abe8f36f
 size 14567

 version https://git-lfs.github.com/spec/v1
+oid sha256:7dbbbccbd02e27dbdb79311ae0bdd36d65163767a8d048823783db7230b9c01f
 size 14567

scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e86c0f8b5c2b1ad48d13df79807268e5d50299fba1287501961674ecf5c92d8c
 size 623

 version https://git-lfs.github.com/spec/v1
+oid sha256:e1b342b9aa08984a7d22e3d2af7c55d45d34bb20259c62266df710a4afe559ae
 size 623

trainer_state.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
-  "best_metric": 1.6688779592514038,
-  "best_model_checkpoint": "output/og-buda/checkpoint-462",
-  "epoch": 3.0,
-  "global_step": 462,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -590,11 +590,187 @@
       "eval_samples_per_second": 20.576,
       "eval_steps_per_second": 2.585,
       "step": 462
     }
   ],
-  "max_steps": 616,
   "num_train_epochs": 4,
-  "total_flos": 481038630912000.0,
   "trial_name": null,
   "trial_params": null
 }

 {
+  "best_metric": 1.6418354511260986,
+  "best_model_checkpoint": "output/og-buda/checkpoint-600",
+  "epoch": 4.0,
+  "global_step": 600,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 20.576,
       "eval_steps_per_second": 2.585,
       "step": 462
+    },
+    {
+      "epoch": 3.1,
+      "learning_rate": 3.35752298215246e-06,
+      "loss": 1.6773,
+      "step": 465
+    },
+    {
+      "epoch": 3.13,
+      "learning_rate": 5.930781605717588e-06,
+      "loss": 1.7743,
+      "step": 470
+    },
+    {
+      "epoch": 3.17,
+      "learning_rate": 9.190657300387474e-06,
+      "loss": 1.6782,
+      "step": 475
+    },
+    {
+      "epoch": 3.2,
+      "learning_rate": 1.310143418587859e-05,
+      "loss": 1.7254,
+      "step": 480
+    },
+    {
+      "epoch": 3.23,
+      "learning_rate": 1.7620264972250762e-05,
+      "loss": 1.7116,
+      "step": 485
+    },
+    {
+      "epoch": 3.27,
+      "learning_rate": 2.269764040378228e-05,
+      "loss": 1.737,
+      "step": 490
+    },
+    {
+      "epoch": 3.3,
+      "learning_rate": 2.827793169273632e-05,
+      "loss": 1.6298,
+      "step": 495
+    },
+    {
+      "epoch": 3.33,
+      "learning_rate": 3.4300000000000014e-05,
+      "loss": 1.784,
+      "step": 500
+    },
+    {
+      "epoch": 3.37,
+      "learning_rate": 4.069786628500004e-05,
+      "loss": 1.7561,
+      "step": 505
+    },
+    {
+      "epoch": 3.4,
+      "learning_rate": 4.740143418587858e-05,
+      "loss": 1.7454,
+      "step": 510
+    },
+    {
+      "epoch": 3.43,
+      "learning_rate": 5.4337258009901596e-05,
+      "loss": 1.8009,
+      "step": 515
+    },
+    {
+      "epoch": 3.47,
+      "learning_rate": 6.142934741983902e-05,
+      "loss": 1.7686,
+      "step": 520
+    },
+    {
+      "epoch": 3.5,
+      "learning_rate": 6.859999999999997e-05,
+      "loss": 1.697,
+      "step": 525
+    },
+    {
+      "epoch": 3.53,
+      "learning_rate": 7.577065258016093e-05,
+      "loss": 1.6804,
+      "step": 530
+    },
+    {
+      "epoch": 3.57,
+      "learning_rate": 8.286274199009834e-05,
+      "loss": 1.7485,
+      "step": 535
+    },
+    {
+      "epoch": 3.6,
+      "learning_rate": 8.979856581412137e-05,
+      "loss": 1.7389,
+      "step": 540
+    },
+    {
+      "epoch": 3.63,
+      "learning_rate": 9.650213371499991e-05,
+      "loss": 1.6993,
+      "step": 545
+    },
+    {
+      "epoch": 3.67,
+      "learning_rate": 0.00010289999999999994,
+      "loss": 1.7811,
+      "step": 550
+    },
+    {
+      "epoch": 3.7,
+      "learning_rate": 0.00010892206830726364,
+      "loss": 1.6838,
+      "step": 555
+    },
+    {
+      "epoch": 3.73,
+      "learning_rate": 0.00011450235959621768,
+      "loss": 1.76,
+      "step": 560
+    },
+    {
+      "epoch": 3.77,
+      "learning_rate": 0.0001195797350277492,
+      "loss": 1.7419,
+      "step": 565
+    },
+    {
+      "epoch": 3.8,
+      "learning_rate": 0.00012409856581412136,
+      "loss": 1.7739,
+      "step": 570
+    },
+    {
+      "epoch": 3.83,
+      "learning_rate": 0.0001280093426996125,
+      "loss": 1.7108,
+      "step": 575
+    },
+    {
+      "epoch": 3.87,
+      "learning_rate": 0.0001312692183942824,
+      "loss": 1.7828,
+      "step": 580
+    },
+    {
+      "epoch": 3.9,
+      "learning_rate": 0.00013384247701784751,
+      "loss": 1.7588,
+      "step": 585
+    },
+    {
+      "epoch": 3.93,
+      "learning_rate": 0.00013570092541033904,
+      "loss": 1.6845,
+      "step": 590
+    },
+    {
+      "epoch": 3.97,
+      "learning_rate": 0.00013682420202226357,
+      "loss": 1.7817,
+      "step": 595
+    },
+    {
+      "epoch": 4.0,
+      "learning_rate": 0.0001372,
+      "loss": 1.7611,
+      "step": 600
+    },
+    {
+      "epoch": 4.0,
+      "eval_loss": 1.6418354511260986,
+      "eval_runtime": 10.0255,
+      "eval_samples_per_second": 22.942,
+      "eval_steps_per_second": 2.893,
+      "step": 600
     }
   ],
+  "max_steps": 600,
   "num_train_epochs": 4,
+  "total_flos": 625141186560000.0,
   "trial_name": null,
   "trial_params": null
 }

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:14e6c3bf99fa49e34d3e8e2fd8f9080f73be0573bb5ad5de841304e2c8bb6fc0
 size 2671

 version https://git-lfs.github.com/spec/v1
+oid sha256:241f4fc691c28aaa2ddb496ddc8870ce626761c7412d518a7188ac1aaea6de47
 size 2671