huggingartists

Browse files

Files changed (10) hide show

README.md +4 -4
config.json +4 -2
evaluation.txt +1 -0
flax_model.msgpack +1 -1
optimizer.pt +1 -1
pytorch_model.bin +1 -1
rng_state.pth +1 -1
scheduler.pt +1 -1
trainer_state.json +321 -7
training_args.bin +2 -2

README.md CHANGED Viewed

@@ -14,7 +14,7 @@ widget:
 <div class="inline-flex flex-col" style="line-height: 1.5;">
     <div class="flex">
         <div
-			style="display:DISPLAY_1; margin-left: auto; margin-right: auto; width: 92px; height:92px; border-radius: 50%; background-size: cover; background-image: url(&#39;https://images.genius.com/e525200b65911e414a9d38c348eb1c6b.667x667x1.jpg&#39;)">
         </div>
     </div>
     <div style="text-align: center; margin-top: 3px; font-size: 16px; font-weight: 800">🤖 HuggingArtists Model 🤖</div>
@@ -45,15 +45,15 @@ from datasets import load_dataset
 dataset = load_dataset("huggingartists/oxxxymiron")
 ```
-[Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/296e4zy2/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
 ## Training procedure
 The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on Oxxxymiron's lyrics.
-Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/lyd324n8) for full transparency and reproducibility.
-At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/lyd324n8/artifacts) is logged and versioned.
 ## How to use

 <div class="inline-flex flex-col" style="line-height: 1.5;">
     <div class="flex">
         <div
+			style="display:DISPLAY_1; margin-left: auto; margin-right: auto; width: 92px; height:92px; border-radius: 50%; background-size: cover; background-image: url(&#39;https://images.genius.com/57ecbbdaf70c671be2d8b7bd39112db0.1000x1000x1.jpg&#39;)">
         </div>
     </div>
     <div style="text-align: center; margin-top: 3px; font-size: 16px; font-weight: 800">🤖 HuggingArtists Model 🤖</div>
 dataset = load_dataset("huggingartists/oxxxymiron")
 ```
+[Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/35c25tqd/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
 ## Training procedure
 The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on Oxxxymiron's lyrics.
+Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/1z3u6lod) for full transparency and reproducibility.
+At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/1z3u6lod/artifacts) is logged and versioned.
 ## How to use

config.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-  "_name_or_path": "huggingartists/oxxxymiron",
   "activation_function": "gelu_new",
   "architectures": [
     "GPT2LMHeadModel"
@@ -18,7 +18,9 @@
   "n_inner": null,
   "n_layer": 12,
   "n_positions": 1024,
   "resid_pdrop": 0.1,
   "scale_attn_weights": true,
   "summary_activation": null,
   "summary_first_dropout": 0.1,
@@ -35,7 +37,7 @@
     }
   },
   "torch_dtype": "float32",
-  "transformers_version": "4.9.2",
   "use_cache": true,
   "vocab_size": 50257
 }

 {
+  "_name_or_path": "oxxxymiron",
   "activation_function": "gelu_new",
   "architectures": [
     "GPT2LMHeadModel"
   "n_inner": null,
   "n_layer": 12,
   "n_positions": 1024,
+  "reorder_and_upcast_attn": false,
   "resid_pdrop": 0.1,
+  "scale_attn_by_inverse_layer_idx": false,
   "scale_attn_weights": true,
   "summary_activation": null,
   "summary_first_dropout": 0.1,
     }
   },
   "torch_dtype": "float32",
+  "transformers_version": "4.15.0",
   "use_cache": true,
   "vocab_size": 50257
 }

evaluation.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"eval_loss": 1.3626197576522827, "eval_runtime": 22.1222, "eval_samples_per_second": 20.839, "eval_steps_per_second": 2.622, "epoch": 16.0}

flax_model.msgpack CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bf9a57b36b9276338603832189c301268f56bae13bf00fb4ac5da15b09879d59
 size 497764120

 version https://git-lfs.github.com/spec/v1
+oid sha256:cb82b94c7d34decc95ea0c0ed51c0cccd4dc0cf1f0a8925648cdfafaeb1ad6e6
 size 497764120

optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6fcbd0efc1ba3f1cf85a1ad153a13999c85e76f9f2b7c9440f87ec591788eaa1
 size 995604017

 version https://git-lfs.github.com/spec/v1
+oid sha256:cb0cfb7ad5f79a151980f8e8fcb534c9dbe0dc5ca44d80132aa39db60c590550
 size 995604017

pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:aa19aa16a103bc8d557dbe011a86e4135bafaa6acf98d21f7c8a1f0ef4362155
 size 510403817

 version https://git-lfs.github.com/spec/v1
+oid sha256:94f6e5d2d51fc4962178d6141bcb4e442c8a4b69e91f35e541dc52137ea1701b
 size 510403817

rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:44cd6576738e9780ee196c4c60bbd639b2c9174b22df17c2b94e485513761999
 size 14439

 version https://git-lfs.github.com/spec/v1
+oid sha256:2072ed4e05f726a0eacce3eb003550040e7f5d45ef05ee6a83533a4c6834a56f
 size 14439

scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3757bed4046f1195d42cf6b407ad0ea93daf46be4a6bcc490f1daeb7bdf87f8c
 size 623

 version https://git-lfs.github.com/spec/v1
+oid sha256:6b130d72fae4cb24993ff792bae18a70e194b6d74fdcb623b38c9f59180e61d5
 size 623

trainer_state.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
-  "best_metric": null,
-  "best_model_checkpoint": null,
-  "epoch": 15.0,
-  "global_step": 3885,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -4668,11 +4668,325 @@
       "learning_rate": 0.0,
       "loss": 1.42,
       "step": 3885
     }
   ],
-  "max_steps": 3885,
-  "num_train_epochs": 15,
-  "total_flos": 4058518487040000.0,
   "trial_name": null,
   "trial_params": null
 }

 {
+  "best_metric": 1.3626197576522827,
+  "best_model_checkpoint": "output/oxxxymiron/checkpoint-4144",
+  "epoch": 16.0,
+  "global_step": 4144,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 0.0,
       "loss": 1.42,
       "step": 3885
+    },
+    {
+      "epoch": 15.02,
+      "learning_rate": 1.261250123775442e-07,
+      "loss": 1.4818,
+      "step": 3890
+    },
+    {
+      "epoch": 15.04,
+      "learning_rate": 5.040362734534007e-07,
+      "loss": 1.4719,
+      "step": 3895
+    },
+    {
+      "epoch": 15.06,
+      "learning_rate": 1.132344160414715e-06,
+      "loss": 1.3957,
+      "step": 3900
+    },
+    {
+      "epoch": 15.08,
+      "learning_rate": 2.008738313494259e-06,
+      "loss": 1.4262,
+      "step": 3905
+    },
+    {
+      "epoch": 15.1,
+      "learning_rate": 3.1299961314263817e-06,
+      "loss": 1.3789,
+      "step": 3910
+    },
+    {
+      "epoch": 15.12,
+      "learning_rate": 4.491994621320232e-06,
+      "loss": 1.4566,
+      "step": 3915
+    },
+    {
+      "epoch": 15.14,
+      "learning_rate": 6.089725559373869e-06,
+      "loss": 1.4144,
+      "step": 3920
+    },
+    {
+      "epoch": 15.15,
+      "learning_rate": 7.91731390668544e-06,
+      "loss": 1.4525,
+      "step": 3925
+    },
+    {
+      "epoch": 15.17,
+      "learning_rate": 9.968039412440788e-06,
+      "loss": 1.4557,
+      "step": 3930
+    },
+    {
+      "epoch": 15.19,
+      "learning_rate": 1.2234361325042642e-05,
+      "loss": 1.4918,
+      "step": 3935
+    },
+    {
+      "epoch": 15.21,
+      "learning_rate": 1.4707946120313543e-05,
+      "loss": 1.5974,
+      "step": 3940
+    },
+    {
+      "epoch": 15.23,
+      "learning_rate": 1.7379698144815265e-05,
+      "loss": 1.3931,
+      "step": 3945
+    },
+    {
+      "epoch": 15.25,
+      "learning_rate": 2.0239793061604638e-05,
+      "loss": 1.3826,
+      "step": 3950
+    },
+    {
+      "epoch": 15.27,
+      "learning_rate": 2.3277713975440236e-05,
+      "loss": 1.4445,
+      "step": 3955
+    },
+    {
+      "epoch": 15.29,
+      "learning_rate": 2.6482290104606033e-05,
+      "loss": 1.4149,
+      "step": 3960
+    },
+    {
+      "epoch": 15.31,
+      "learning_rate": 2.984173785715038e-05,
+      "loss": 1.3804,
+      "step": 3965
+    },
+    {
+      "epoch": 15.33,
+      "learning_rate": 3.334370416049605e-05,
+      "loss": 1.3937,
+      "step": 3970
+    },
+    {
+      "epoch": 15.35,
+      "learning_rate": 3.697531188509998e-05,
+      "loss": 1.4221,
+      "step": 3975
+    },
+    {
+      "epoch": 15.37,
+      "learning_rate": 4.072320719512414e-05,
+      "loss": 1.4485,
+      "step": 3980
+    },
+    {
+      "epoch": 15.39,
+      "learning_rate": 4.4573608652015956e-05,
+      "loss": 1.5171,
+      "step": 3985
+    },
+    {
+      "epoch": 15.41,
+      "learning_rate": 4.851235789042871e-05,
+      "loss": 1.4849,
+      "step": 3990
+    },
+    {
+      "epoch": 15.42,
+      "learning_rate": 5.2524971680144367e-05,
+      "loss": 1.4614,
+      "step": 3995
+    },
+    {
+      "epoch": 15.44,
+      "learning_rate": 5.659669518256589e-05,
+      "loss": 1.413,
+      "step": 4000
+    },
+    {
+      "epoch": 15.46,
+      "learning_rate": 6.071255620594038e-05,
+      "loss": 1.4743,
+      "step": 4005
+    },
+    {
+      "epoch": 15.48,
+      "learning_rate": 6.485742025981448e-05,
+      "loss": 1.4832,
+      "step": 4010
+    },
+    {
+      "epoch": 15.5,
+      "learning_rate": 6.901604620628492e-05,
+      "loss": 1.5146,
+      "step": 4015
+    },
+    {
+      "epoch": 15.52,
+      "learning_rate": 7.317314230339967e-05,
+      "loss": 1.5513,
+      "step": 4020
+    },
+    {
+      "epoch": 15.54,
+      "learning_rate": 7.731342243463577e-05,
+      "loss": 1.5379,
+      "step": 4025
+    },
+    {
+      "epoch": 15.56,
+      "learning_rate": 8.142166231769639e-05,
+      "loss": 1.4753,
+      "step": 4030
+    },
+    {
+      "epoch": 15.58,
+      "learning_rate": 8.548275548593135e-05,
+      "loss": 1.5384,
+      "step": 4035
+    },
+    {
+      "epoch": 15.6,
+      "learning_rate": 8.948176883653908e-05,
+      "loss": 1.5967,
+      "step": 4040
+    },
+    {
+      "epoch": 15.62,
+      "learning_rate": 9.340399754128752e-05,
+      "loss": 1.4906,
+      "step": 4045
+    },
+    {
+      "epoch": 15.64,
+      "learning_rate": 9.723501911784575e-05,
+      "loss": 1.5322,
+      "step": 4050
+    },
+    {
+      "epoch": 15.66,
+      "learning_rate": 0.0001009607464628976,
+      "loss": 1.5223,
+      "step": 4055
+    },
+    {
+      "epoch": 15.68,
+      "learning_rate": 0.00010456747965202585,
+      "loss": 1.4992,
+      "step": 4060
+    },
+    {
+      "epoch": 15.69,
+      "learning_rate": 0.00010804195631589752,
+      "loss": 1.5217,
+      "step": 4065
+    },
+    {
+      "epoch": 15.71,
+      "learning_rate": 0.00011137140040750902,
+      "loss": 1.4526,
+      "step": 4070
+    },
+    {
+      "epoch": 15.73,
+      "learning_rate": 0.00011454356918116707,
+      "loss": 1.5553,
+      "step": 4075
+    },
+    {
+      "epoch": 15.75,
+      "learning_rate": 0.00011754679821046198,
+      "loss": 1.4297,
+      "step": 4080
+    },
+    {
+      "epoch": 15.77,
+      "learning_rate": 0.00012037004427969463,
+      "loss": 1.4843,
+      "step": 4085
+    },
+    {
+      "epoch": 15.79,
+      "learning_rate": 0.00012300292599103915,
+      "loss": 1.4636,
+      "step": 4090
+    },
+    {
+      "epoch": 15.81,
+      "learning_rate": 0.00012543576193812758,
+      "loss": 1.4776,
+      "step": 4095
+    },
+    {
+      "epoch": 15.83,
+      "learning_rate": 0.00012765960630568412,
+      "loss": 1.514,
+      "step": 4100
+    },
+    {
+      "epoch": 15.85,
+      "learning_rate": 0.00012966628176431014,
+      "loss": 1.4759,
+      "step": 4105
+    },
+    {
+      "epoch": 15.87,
+      "learning_rate": 0.00013144840953945605,
+      "loss": 1.396,
+      "step": 4110
+    },
+    {
+      "epoch": 15.89,
+      "learning_rate": 0.00013299943654401656,
+      "loss": 1.5696,
+      "step": 4115
+    },
+    {
+      "epoch": 15.91,
+      "learning_rate": 0.0001343136594747805,
+      "loss": 1.5059,
+      "step": 4120
+    },
+    {
+      "epoch": 15.93,
+      "learning_rate": 0.0001353862457841268,
+      "loss": 1.4919,
+      "step": 4125
+    },
+    {
+      "epoch": 15.95,
+      "learning_rate": 0.00013621325144985277,
+      "loss": 1.515,
+      "step": 4130
+    },
+    {
+      "epoch": 15.97,
+      "learning_rate": 0.00013679163547779453,
+      "loss": 1.4448,
+      "step": 4135
+    },
+    {
+      "epoch": 15.98,
+      "learning_rate": 0.00013711927108390882,
+      "loss": 1.5143,
+      "step": 4140
+    },
+    {
+      "epoch": 16.0,
+      "eval_loss": 1.3626197576522827,
+      "eval_runtime": 22.0604,
+      "eval_samples_per_second": 20.897,
+      "eval_steps_per_second": 2.629,
+      "step": 4144
     }
   ],
+  "max_steps": 4144,
+  "num_train_epochs": 16,
+  "total_flos": 4329217032192000.0,
   "trial_name": null,
   "trial_params": null
 }

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0c8781197db3a3403466497c5da7316ba5318202ed62c8d7147bd3a02f7bd353
-size 2671

 version https://git-lfs.github.com/spec/v1
+oid sha256:aa22e711f18ef9c494e076522c9fc6c23c5e5db9e44a9588da998b4d53d50ce7
+size 2991