Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:99c8ade0af856785e19b65fc998cc03ea920bac8d4a412cf1fb5f277f5d6397e
 size 134235712

 version https://git-lfs.github.com/spec/v1
+oid sha256:0df6756b059f16aa07f7d79123376b38f052a910e6945be9aa722410007449b3
 size 134235712

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:734337b12337c6baf1b23268266eb1f1de114d39a143bdd3e8b49580e2c54023
 size 68309690

 version https://git-lfs.github.com/spec/v1
+oid sha256:2d63b56671fb9a4c6c73d5ed13f8a2b0a244a69851a7431ee23bbdf3aeb47967
 size 68309690

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:945aadf6174da231a93134fe3bd703be19228e08fa5c2afe1a14824d79db10e3
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:0be0051830674b0e43f75c7df4507df2503b349daa02c6eec6b2010069e4815b
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:970068ebf9c0dc6a40c93653c563bb0b2ba5296a6c46496b504a3f1343bf3a62
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:7df65c8f4b426598b0abc4173b1983dcf7411aee63ea4061d980eae7a1af2363
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.8573229908943176,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.42796005706134094,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,364 @@
       "eval_samples_per_second": 14.677,
       "eval_steps_per_second": 3.682,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1112,12 +1470,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 2.77899550654464e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.8437546491622925,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.5706134094151213,
   "eval_steps": 50,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 14.677,
       "eval_steps_per_second": 3.682,
       "step": 150
+    },
+    {
+      "epoch": 0.43081312410841655,
+      "grad_norm": 3.604283332824707,
+      "learning_rate": 1.7085562498478883e-05,
+      "loss": 4.3125,
+      "step": 151
+    },
+    {
+      "epoch": 0.43366619115549215,
+      "grad_norm": 3.603677988052368,
+      "learning_rate": 1.6431976865013128e-05,
+      "loss": 3.4679,
+      "step": 152
+    },
+    {
+      "epoch": 0.43651925820256776,
+      "grad_norm": 3.7253434658050537,
+      "learning_rate": 1.5788935343973164e-05,
+      "loss": 3.146,
+      "step": 153
+    },
+    {
+      "epoch": 0.43937232524964337,
+      "grad_norm": 3.057725667953491,
+      "learning_rate": 1.5156613736490108e-05,
+      "loss": 2.8557,
+      "step": 154
+    },
+    {
+      "epoch": 0.442225392296719,
+      "grad_norm": 3.019693374633789,
+      "learning_rate": 1.4535184912977763e-05,
+      "loss": 3.3619,
+      "step": 155
+    },
+    {
+      "epoch": 0.4450784593437946,
+      "grad_norm": 3.0786960124969482,
+      "learning_rate": 1.3924818765871553e-05,
+      "loss": 3.4618,
+      "step": 156
+    },
+    {
+      "epoch": 0.4479315263908702,
+      "grad_norm": 2.744488000869751,
+      "learning_rate": 1.3325682163181601e-05,
+      "loss": 3.319,
+      "step": 157
+    },
+    {
+      "epoch": 0.4507845934379458,
+      "grad_norm": 2.7371444702148438,
+      "learning_rate": 1.2737938902872767e-05,
+      "loss": 3.0077,
+      "step": 158
+    },
+    {
+      "epoch": 0.4536376604850214,
+      "grad_norm": 2.9462661743164062,
+      "learning_rate": 1.2161749668083823e-05,
+      "loss": 3.0789,
+      "step": 159
+    },
+    {
+      "epoch": 0.456490727532097,
+      "grad_norm": 2.8661088943481445,
+      "learning_rate": 1.159727198319836e-05,
+      "loss": 3.1991,
+      "step": 160
+    },
+    {
+      "epoch": 0.4593437945791726,
+      "grad_norm": 3.022242307662964,
+      "learning_rate": 1.1044660170779142e-05,
+      "loss": 3.4119,
+      "step": 161
+    },
+    {
+      "epoch": 0.46219686162624823,
+      "grad_norm": 3.091960906982422,
+      "learning_rate": 1.0504065309377897e-05,
+      "loss": 3.6683,
+      "step": 162
+    },
+    {
+      "epoch": 0.46504992867332384,
+      "grad_norm": 2.757582426071167,
+      "learning_rate": 9.97563519223192e-06,
+      "loss": 3.0697,
+      "step": 163
+    },
+    {
+      "epoch": 0.46790299572039945,
+      "grad_norm": 3.1551263332366943,
+      "learning_rate": 9.459514286858898e-06,
+      "loss": 3.1847,
+      "step": 164
+    },
+    {
+      "epoch": 0.47075606276747506,
+      "grad_norm": 2.770595073699951,
+      "learning_rate": 8.95584369556093e-06,
+      "loss": 3.0202,
+      "step": 165
+    },
+    {
+      "epoch": 0.47360912981455067,
+      "grad_norm": 2.8422300815582275,
+      "learning_rate": 8.464761116848546e-06,
+      "loss": 3.0087,
+      "step": 166
+    },
+    {
+      "epoch": 0.4764621968616263,
+      "grad_norm": 2.983774185180664,
+      "learning_rate": 7.986400807795349e-06,
+      "loss": 3.0064,
+      "step": 167
+    },
+    {
+      "epoch": 0.4793152639087018,
+      "grad_norm": 3.055002212524414,
+      "learning_rate": 7.520893547333436e-06,
+      "loss": 3.3407,
+      "step": 168
+    },
+    {
+      "epoch": 0.48216833095577744,
+      "grad_norm": 2.7819559574127197,
+      "learning_rate": 7.068366600499744e-06,
+      "loss": 2.991,
+      "step": 169
+    },
+    {
+      "epoch": 0.48502139800285304,
+      "grad_norm": 2.748218059539795,
+      "learning_rate": 6.6289436836431076e-06,
+      "loss": 2.8718,
+      "step": 170
+    },
+    {
+      "epoch": 0.48787446504992865,
+      "grad_norm": 2.8351593017578125,
+      "learning_rate": 6.20274493060135e-06,
+      "loss": 3.1991,
+      "step": 171
+    },
+    {
+      "epoch": 0.49072753209700426,
+      "grad_norm": 2.8751378059387207,
+      "learning_rate": 5.789886859858009e-06,
+      "loss": 3.3775,
+      "step": 172
+    },
+    {
+      "epoch": 0.49358059914407987,
+      "grad_norm": 3.1689553260803223,
+      "learning_rate": 5.3904823426872605e-06,
+      "loss": 3.2741,
+      "step": 173
+    },
+    {
+      "epoch": 0.4964336661911555,
+      "grad_norm": 3.32289457321167,
+      "learning_rate": 5.004640572296062e-06,
+      "loss": 3.173,
+      "step": 174
+    },
+    {
+      "epoch": 0.4992867332382311,
+      "grad_norm": 2.96350359916687,
+      "learning_rate": 4.632467033971838e-06,
+      "loss": 3.0591,
+      "step": 175
+    },
+    {
+      "epoch": 0.5021398002853067,
+      "grad_norm": 3.0512301921844482,
+      "learning_rate": 4.274063476243839e-06,
+      "loss": 3.3743,
+      "step": 176
+    },
+    {
+      "epoch": 0.5049928673323824,
+      "grad_norm": 3.3975963592529297,
+      "learning_rate": 3.929527883066117e-06,
+      "loss": 3.2659,
+      "step": 177
+    },
+    {
+      "epoch": 0.5078459343794579,
+      "grad_norm": 2.7579398155212402,
+      "learning_rate": 3.5989544470296595e-06,
+      "loss": 3.1854,
+      "step": 178
+    },
+    {
+      "epoch": 0.5106990014265336,
+      "grad_norm": 3.1428921222686768,
+      "learning_rate": 3.282433543611136e-06,
+      "loss": 3.2512,
+      "step": 179
+    },
+    {
+      "epoch": 0.5135520684736091,
+      "grad_norm": 3.0572800636291504,
+      "learning_rate": 2.980051706465095e-06,
+      "loss": 3.3415,
+      "step": 180
+    },
+    {
+      "epoch": 0.5164051355206848,
+      "grad_norm": 3.4359803199768066,
+      "learning_rate": 2.691891603766556e-06,
+      "loss": 3.2923,
+      "step": 181
+    },
+    {
+      "epoch": 0.5192582025677603,
+      "grad_norm": 2.7140772342681885,
+      "learning_rate": 2.4180320156103298e-06,
+      "loss": 2.7363,
+      "step": 182
+    },
+    {
+      "epoch": 0.5221112696148359,
+      "grad_norm": 3.058346748352051,
+      "learning_rate": 2.158547812473352e-06,
+      "loss": 2.8894,
+      "step": 183
+    },
+    {
+      "epoch": 0.5249643366619116,
+      "grad_norm": 3.607133150100708,
+      "learning_rate": 1.9135099347458293e-06,
+      "loss": 3.2894,
+      "step": 184
+    },
+    {
+      "epoch": 0.5278174037089871,
+      "grad_norm": 3.3340420722961426,
+      "learning_rate": 1.6829853733368294e-06,
+      "loss": 3.4845,
+      "step": 185
+    },
+    {
+      "epoch": 0.5306704707560628,
+      "grad_norm": 3.4746623039245605,
+      "learning_rate": 1.4670371513596842e-06,
+      "loss": 3.5554,
+      "step": 186
+    },
+    {
+      "epoch": 0.5335235378031383,
+      "grad_norm": 2.9171152114868164,
+      "learning_rate": 1.2657243069020402e-06,
+      "loss": 3.1133,
+      "step": 187
+    },
+    {
+      "epoch": 0.536376604850214,
+      "grad_norm": 3.17696213722229,
+      "learning_rate": 1.0791018768854855e-06,
+      "loss": 3.0668,
+      "step": 188
+    },
+    {
+      "epoch": 0.5392296718972895,
+      "grad_norm": 3.4017016887664795,
+      "learning_rate": 9.072208820189698e-07,
+      "loss": 3.7365,
+      "step": 189
+    },
+    {
+      "epoch": 0.5420827389443652,
+      "grad_norm": 3.1811530590057373,
+      "learning_rate": 7.501283128502722e-07,
+      "loss": 3.3977,
+      "step": 190
+    },
+    {
+      "epoch": 0.5449358059914408,
+      "grad_norm": 3.294144630432129,
+      "learning_rate": 6.07867116919233e-07,
+      "loss": 3.4082,
+      "step": 191
+    },
+    {
+      "epoch": 0.5477888730385164,
+      "grad_norm": 3.589067220687866,
+      "learning_rate": 4.804761870163643e-07,
+      "loss": 3.5386,
+      "step": 192
+    },
+    {
+      "epoch": 0.550641940085592,
+      "grad_norm": 3.937700033187866,
+      "learning_rate": 3.6799035054990215e-07,
+      "loss": 3.4633,
+      "step": 193
+    },
+    {
+      "epoch": 0.5534950071326676,
+      "grad_norm": 3.4960124492645264,
+      "learning_rate": 2.704403600243721e-07,
+      "loss": 3.1967,
+      "step": 194
+    },
+    {
+      "epoch": 0.5563480741797432,
+      "grad_norm": 3.7518258094787598,
+      "learning_rate": 1.878528846331584e-07,
+      "loss": 3.3838,
+      "step": 195
+    },
+    {
+      "epoch": 0.5592011412268189,
+      "grad_norm": 3.488942861557007,
+      "learning_rate": 1.202505029674006e-07,
+      "loss": 3.4166,
+      "step": 196
+    },
+    {
+      "epoch": 0.5620542082738944,
+      "grad_norm": 3.753591775894165,
+      "learning_rate": 6.765169684323947e-08,
+      "loss": 3.4096,
+      "step": 197
+    },
+    {
+      "epoch": 0.5649072753209701,
+      "grad_norm": 4.523373126983643,
+      "learning_rate": 3.007084624906731e-08,
+      "loss": 3.9432,
+      "step": 198
+    },
+    {
+      "epoch": 0.5677603423680456,
+      "grad_norm": 3.705737829208374,
+      "learning_rate": 7.518225414204771e-09,
+      "loss": 3.5566,
+      "step": 199
+    },
+    {
+      "epoch": 0.5706134094151213,
+      "grad_norm": 4.9319071769714355,
+      "learning_rate": 0.0,
+      "loss": 3.8969,
+      "step": 200
+    },
+    {
+      "epoch": 0.5706134094151213,
+      "eval_loss": 0.8437546491622925,
+      "eval_runtime": 39.0271,
+      "eval_samples_per_second": 15.118,
+      "eval_steps_per_second": 3.792,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 3.70532734205952e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null