Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0eeefc0953974bc6182f9295ec367311fa09efcbbaf8a8ab168c32d15f25f808
 size 479005064

 version https://git-lfs.github.com/spec/v1
+oid sha256:f27b98f6cf0f23eab77e3d54b5a7f589dce996ac161e0893df2660837e38ed6a
 size 479005064

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1df59c04fddedcf73dfce705890c7e47e311eb0086c1c02b12b30d61216ce28e
 size 958299770

 version https://git-lfs.github.com/spec/v1
+oid sha256:620c1fdc3c9b39083b8f370f6580946a66b53e2567231209749bc2443392a45d
 size 958299770

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a2a6b3e0c3ff5893f84a90a6bf08c86c55e142c9337e1326b6f66fd9467659ae
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:01d13d0ea722ac4b27a5252dde4d9e2a0755a70348289e0361a2b12030351906
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f4c9c807f0681c8b7e53ada9b6ec3dba530d303de7da0d0a0562a3d8d0bbba08
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:d2d754412c61116546142914503e7369d0cc35d3c380a07e5218f595d76b6d96
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.045548081398010254,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 2.1342756183745584,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,364 @@
       "eval_samples_per_second": 18.832,
       "eval_steps_per_second": 9.495,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1112,12 +1470,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 8.53625152733184e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.02226005494594574,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 2.840989399293286,
   "eval_steps": 50,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 18.832,
       "eval_steps_per_second": 9.495,
       "step": 150
+    },
+    {
+      "epoch": 2.148409893992933,
+      "grad_norm": 0.12965770065784454,
+      "learning_rate": 1.7197048550474643e-05,
+      "loss": 0.0034,
+      "step": 151
+    },
+    {
+      "epoch": 2.1625441696113072,
+      "grad_norm": 0.06591859459877014,
+      "learning_rate": 1.6543469682057106e-05,
+      "loss": 0.0023,
+      "step": 152
+    },
+    {
+      "epoch": 2.176678445229682,
+      "grad_norm": 0.05958675220608711,
+      "learning_rate": 1.5900081996875083e-05,
+      "loss": 0.0021,
+      "step": 153
+    },
+    {
+      "epoch": 2.1908127208480566,
+      "grad_norm": 0.863341748714447,
+      "learning_rate": 1.526708147705013e-05,
+      "loss": 0.0085,
+      "step": 154
+    },
+    {
+      "epoch": 2.204946996466431,
+      "grad_norm": 0.31874069571495056,
+      "learning_rate": 1.4644660940672627e-05,
+      "loss": 0.0094,
+      "step": 155
+    },
+    {
+      "epoch": 2.2190812720848054,
+      "grad_norm": 0.22433172166347504,
+      "learning_rate": 1.4033009983067452e-05,
+      "loss": 0.0084,
+      "step": 156
+    },
+    {
+      "epoch": 2.23321554770318,
+      "grad_norm": 0.03787798061966896,
+      "learning_rate": 1.3432314919041478e-05,
+      "loss": 0.0014,
+      "step": 157
+    },
+    {
+      "epoch": 2.2473498233215548,
+      "grad_norm": 0.09445440024137497,
+      "learning_rate": 1.2842758726130283e-05,
+      "loss": 0.0028,
+      "step": 158
+    },
+    {
+      "epoch": 2.2614840989399294,
+      "grad_norm": 1.9811441898345947,
+      "learning_rate": 1.22645209888614e-05,
+      "loss": 0.0709,
+      "step": 159
+    },
+    {
+      "epoch": 2.275618374558304,
+      "grad_norm": 0.33604347705841064,
+      "learning_rate": 1.1697777844051105e-05,
+      "loss": 0.0134,
+      "step": 160
+    },
+    {
+      "epoch": 2.2897526501766783,
+      "grad_norm": 0.4111025035381317,
+      "learning_rate": 1.1142701927151456e-05,
+      "loss": 0.0376,
+      "step": 161
+    },
+    {
+      "epoch": 2.303886925795053,
+      "grad_norm": 0.5210508108139038,
+      "learning_rate": 1.0599462319663905e-05,
+      "loss": 0.0372,
+      "step": 162
+    },
+    {
+      "epoch": 2.3180212014134276,
+      "grad_norm": 0.09174925088882446,
+      "learning_rate": 1.006822449763537e-05,
+      "loss": 0.0026,
+      "step": 163
+    },
+    {
+      "epoch": 2.3321554770318023,
+      "grad_norm": 0.04885992035269737,
+      "learning_rate": 9.549150281252633e-06,
+      "loss": 0.0016,
+      "step": 164
+    },
+    {
+      "epoch": 2.3462897526501765,
+      "grad_norm": 0.0838780626654625,
+      "learning_rate": 9.042397785550405e-06,
+      "loss": 0.0028,
+      "step": 165
+    },
+    {
+      "epoch": 2.360424028268551,
+      "grad_norm": 0.06478317826986313,
+      "learning_rate": 8.548121372247918e-06,
+      "loss": 0.0021,
+      "step": 166
+    },
+    {
+      "epoch": 2.374558303886926,
+      "grad_norm": 0.09824801236391068,
+      "learning_rate": 8.066471602728803e-06,
+      "loss": 0.0026,
+      "step": 167
+    },
+    {
+      "epoch": 2.3886925795053005,
+      "grad_norm": 0.06248147413134575,
+      "learning_rate": 7.597595192178702e-06,
+      "loss": 0.0018,
+      "step": 168
+    },
+    {
+      "epoch": 2.402826855123675,
+      "grad_norm": 0.1567322015762329,
+      "learning_rate": 7.1416349648943894e-06,
+      "loss": 0.0062,
+      "step": 169
+    },
+    {
+      "epoch": 2.4169611307420493,
+      "grad_norm": 0.28212109208106995,
+      "learning_rate": 6.698729810778065e-06,
+      "loss": 0.0068,
+      "step": 170
+    },
+    {
+      "epoch": 2.431095406360424,
+      "grad_norm": 0.11935470998287201,
+      "learning_rate": 6.269014643030213e-06,
+      "loss": 0.0023,
+      "step": 171
+    },
+    {
+      "epoch": 2.4452296819787986,
+      "grad_norm": 0.15177536010742188,
+      "learning_rate": 5.852620357053651e-06,
+      "loss": 0.0026,
+      "step": 172
+    },
+    {
+      "epoch": 2.4593639575971733,
+      "grad_norm": 0.22410543262958527,
+      "learning_rate": 5.449673790581611e-06,
+      "loss": 0.0029,
+      "step": 173
+    },
+    {
+      "epoch": 2.4734982332155475,
+      "grad_norm": 0.2390422374010086,
+      "learning_rate": 5.060297685041659e-06,
+      "loss": 0.0027,
+      "step": 174
+    },
+    {
+      "epoch": 2.487632508833922,
+      "grad_norm": 0.3170912563800812,
+      "learning_rate": 4.684610648167503e-06,
+      "loss": 0.0106,
+      "step": 175
+    },
+    {
+      "epoch": 2.501766784452297,
+      "grad_norm": 0.2848634421825409,
+      "learning_rate": 4.322727117869951e-06,
+      "loss": 0.0198,
+      "step": 176
+    },
+    {
+      "epoch": 2.5159010600706715,
+      "grad_norm": 0.3001430630683899,
+      "learning_rate": 3.974757327377981e-06,
+      "loss": 0.0082,
+      "step": 177
+    },
+    {
+      "epoch": 2.530035335689046,
+      "grad_norm": 0.15458659827709198,
+      "learning_rate": 3.6408072716606346e-06,
+      "loss": 0.0055,
+      "step": 178
+    },
+    {
+      "epoch": 2.5441696113074204,
+      "grad_norm": 0.19815073907375336,
+      "learning_rate": 3.3209786751399187e-06,
+      "loss": 0.0108,
+      "step": 179
+    },
+    {
+      "epoch": 2.558303886925795,
+      "grad_norm": 0.06913748383522034,
+      "learning_rate": 3.0153689607045845e-06,
+      "loss": 0.0024,
+      "step": 180
+    },
+    {
+      "epoch": 2.5724381625441697,
+      "grad_norm": 0.6175816059112549,
+      "learning_rate": 2.724071220034158e-06,
+      "loss": 0.017,
+      "step": 181
+    },
+    {
+      "epoch": 2.586572438162544,
+      "grad_norm": 0.17493540048599243,
+      "learning_rate": 2.4471741852423237e-06,
+      "loss": 0.0062,
+      "step": 182
+    },
+    {
+      "epoch": 2.6007067137809186,
+      "grad_norm": 0.030856192111968994,
+      "learning_rate": 2.1847622018482283e-06,
+      "loss": 0.0011,
+      "step": 183
+    },
+    {
+      "epoch": 2.614840989399293,
+      "grad_norm": 0.1881757527589798,
+      "learning_rate": 1.9369152030840556e-06,
+      "loss": 0.0075,
+      "step": 184
+    },
+    {
+      "epoch": 2.628975265017668,
+      "grad_norm": 0.03700214624404907,
+      "learning_rate": 1.70370868554659e-06,
+      "loss": 0.0014,
+      "step": 185
+    },
+    {
+      "epoch": 2.6431095406360425,
+      "grad_norm": 0.1864822953939438,
+      "learning_rate": 1.4852136862001764e-06,
+      "loss": 0.0089,
+      "step": 186
+    },
+    {
+      "epoch": 2.657243816254417,
+      "grad_norm": 0.07615940272808075,
+      "learning_rate": 1.2814967607382432e-06,
+      "loss": 0.0026,
+      "step": 187
+    },
+    {
+      "epoch": 2.6713780918727914,
+      "grad_norm": 0.04182976111769676,
+      "learning_rate": 1.0926199633097157e-06,
+      "loss": 0.0014,
+      "step": 188
+    },
+    {
+      "epoch": 2.685512367491166,
+      "grad_norm": 0.11160821467638016,
+      "learning_rate": 9.186408276168013e-07,
+      "loss": 0.0019,
+      "step": 189
+    },
+    {
+      "epoch": 2.6996466431095407,
+      "grad_norm": 0.030893972143530846,
+      "learning_rate": 7.596123493895991e-07,
+      "loss": 0.0012,
+      "step": 190
+    },
+    {
+      "epoch": 2.713780918727915,
+      "grad_norm": 0.03732634335756302,
+      "learning_rate": 6.15582970243117e-07,
+      "loss": 0.0012,
+      "step": 191
+    },
+    {
+      "epoch": 2.7279151943462896,
+      "grad_norm": 0.1454208493232727,
+      "learning_rate": 4.865965629214819e-07,
+      "loss": 0.01,
+      "step": 192
+    },
+    {
+      "epoch": 2.7420494699646643,
+      "grad_norm": 0.11776148527860641,
+      "learning_rate": 3.7269241793390085e-07,
+      "loss": 0.0078,
+      "step": 193
+    },
+    {
+      "epoch": 2.756183745583039,
+      "grad_norm": 0.1260043829679489,
+      "learning_rate": 2.7390523158633554e-07,
+      "loss": 0.0033,
+      "step": 194
+    },
+    {
+      "epoch": 2.7703180212014136,
+      "grad_norm": 0.5196786522865295,
+      "learning_rate": 1.9026509541272275e-07,
+      "loss": 0.0244,
+      "step": 195
+    },
+    {
+      "epoch": 2.7844522968197882,
+      "grad_norm": 0.4225069284439087,
+      "learning_rate": 1.2179748700879012e-07,
+      "loss": 0.0241,
+      "step": 196
+    },
+    {
+      "epoch": 2.7985865724381624,
+      "grad_norm": 0.1836262196302414,
+      "learning_rate": 6.852326227130834e-08,
+      "loss": 0.0072,
+      "step": 197
+    },
+    {
+      "epoch": 2.812720848056537,
+      "grad_norm": 0.04496635124087334,
+      "learning_rate": 3.04586490452119e-08,
+      "loss": 0.0018,
+      "step": 198
+    },
+    {
+      "epoch": 2.8268551236749118,
+      "grad_norm": 0.3070247769355774,
+      "learning_rate": 7.615242180436522e-09,
+      "loss": 0.0095,
+      "step": 199
+    },
+    {
+      "epoch": 2.840989399293286,
+      "grad_norm": 0.13280314207077026,
+      "learning_rate": 0.0,
+      "loss": 0.0015,
+      "step": 200
+    },
+    {
+      "epoch": 2.840989399293286,
+      "eval_loss": 0.02226005494594574,
+      "eval_runtime": 6.3135,
+      "eval_samples_per_second": 18.848,
+      "eval_steps_per_second": 9.503,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 1.138166870310912e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null