Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b26069ee1422ac74d8588b1f3dcd92e3e60f8dab7c4d0b5ed63b0e125030bd16
 size 671149168

 version https://git-lfs.github.com/spec/v1
+oid sha256:a361909fa9ba0b32c255a2f4770b30e5a77063056f4e91e451999d62d0475634
 size 671149168

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8eea2c994f4350f9c4253748a2d13d473a737f9ea84991d4def15bed075cecd3
 size 1342555602

 version https://git-lfs.github.com/spec/v1
+oid sha256:e40fab486d78153122d08fc5ca0f612e9e35973b05db00b808386ecd37daa91f
 size 1342555602

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3b69cdb707aa3f39d657a7d2544a45f3c7caee2569520a86497cccb1d8ed619f
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:64a009b1323d64363b75244b78b716415959290dc9584ccc9af208a99d43f280
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:dfd59dca009004df561617f8f6994512d029a952a68609cac24b36df5a0757ce
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:d2d754412c61116546142914503e7369d0cc35d3c380a07e5218f595d76b6d96
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.1532428115606308,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.11033468186833395,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,364 @@
       "eval_samples_per_second": 11.868,
       "eval_steps_per_second": 5.934,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1112,12 +1470,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 2.274084219959378e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.15221890807151794,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.14711290915777858,
   "eval_steps": 50,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 11.868,
       "eval_steps_per_second": 5.934,
       "step": 150
+    },
+    {
+      "epoch": 0.11107024641412284,
+      "grad_norm": 0.12797152996063232,
+      "learning_rate": 1.9136088935510362e-05,
+      "loss": 0.1411,
+      "step": 151
+    },
+    {
+      "epoch": 0.11180581095991174,
+      "grad_norm": 0.1103443130850792,
+      "learning_rate": 1.8414449687337464e-05,
+      "loss": 0.1444,
+      "step": 152
+    },
+    {
+      "epoch": 0.11254137550570062,
+      "grad_norm": 0.08198443800210953,
+      "learning_rate": 1.7703596875660645e-05,
+      "loss": 0.1422,
+      "step": 153
+    },
+    {
+      "epoch": 0.11327694005148951,
+      "grad_norm": 0.10905614495277405,
+      "learning_rate": 1.700377325606388e-05,
+      "loss": 0.1533,
+      "step": 154
+    },
+    {
+      "epoch": 0.11401250459727841,
+      "grad_norm": 0.13996587693691254,
+      "learning_rate": 1.631521781767214e-05,
+      "loss": 0.1418,
+      "step": 155
+    },
+    {
+      "epoch": 0.1147480691430673,
+      "grad_norm": 0.07795163244009018,
+      "learning_rate": 1.5638165701536868e-05,
+      "loss": 0.1514,
+      "step": 156
+    },
+    {
+      "epoch": 0.1154836336888562,
+      "grad_norm": 0.07931917160749435,
+      "learning_rate": 1.4972848120335453e-05,
+      "loss": 0.1464,
+      "step": 157
+    },
+    {
+      "epoch": 0.11621919823464509,
+      "grad_norm": 0.0686430111527443,
+      "learning_rate": 1.4319492279412388e-05,
+      "loss": 0.1472,
+      "step": 158
+    },
+    {
+      "epoch": 0.11695476278043399,
+      "grad_norm": 0.083328977227211,
+      "learning_rate": 1.3678321299188801e-05,
+      "loss": 0.1532,
+      "step": 159
+    },
+    {
+      "epoch": 0.11769032732622288,
+      "grad_norm": 0.09695445746183395,
+      "learning_rate": 1.3049554138967051e-05,
+      "loss": 0.1645,
+      "step": 160
+    },
+    {
+      "epoch": 0.11842589187201177,
+      "grad_norm": 0.1066075786948204,
+      "learning_rate": 1.2433405522156332e-05,
+      "loss": 0.1596,
+      "step": 161
+    },
+    {
+      "epoch": 0.11916145641780067,
+      "grad_norm": 0.08189854025840759,
+      "learning_rate": 1.183008586294485e-05,
+      "loss": 0.1734,
+      "step": 162
+    },
+    {
+      "epoch": 0.11989702096358955,
+      "grad_norm": 0.06908301264047623,
+      "learning_rate": 1.1239801194443506e-05,
+      "loss": 0.1726,
+      "step": 163
+    },
+    {
+      "epoch": 0.12063258550937844,
+      "grad_norm": 0.08635183423757553,
+      "learning_rate": 1.066275309832584e-05,
+      "loss": 0.1892,
+      "step": 164
+    },
+    {
+      "epoch": 0.12136815005516734,
+      "grad_norm": 0.1024232879281044,
+      "learning_rate": 1.0099138635988026e-05,
+      "loss": 0.174,
+      "step": 165
+    },
+    {
+      "epoch": 0.12210371460095623,
+      "grad_norm": 0.09103770554065704,
+      "learning_rate": 9.549150281252633e-06,
+      "loss": 0.1606,
+      "step": 166
+    },
+    {
+      "epoch": 0.12283927914674513,
+      "grad_norm": 0.07270883768796921,
+      "learning_rate": 9.012975854638949e-06,
+      "loss": 0.1572,
+      "step": 167
+    },
+    {
+      "epoch": 0.12357484369253402,
+      "grad_norm": 0.0831652358174324,
+      "learning_rate": 8.490798459222476e-06,
+      "loss": 0.172,
+      "step": 168
+    },
+    {
+      "epoch": 0.12431040823832291,
+      "grad_norm": 0.11702089756727219,
+      "learning_rate": 7.982796418105371e-06,
+      "loss": 0.1667,
+      "step": 169
+    },
+    {
+      "epoch": 0.1250459727841118,
+      "grad_norm": 0.08802536129951477,
+      "learning_rate": 7.489143213519301e-06,
+      "loss": 0.1639,
+      "step": 170
+    },
+    {
+      "epoch": 0.1257815373299007,
+      "grad_norm": 0.08707013726234436,
+      "learning_rate": 7.010007427581378e-06,
+      "loss": 0.1817,
+      "step": 171
+    },
+    {
+      "epoch": 0.1265171018756896,
+      "grad_norm": 0.0758250504732132,
+      "learning_rate": 6.5455526847235825e-06,
+      "loss": 0.1584,
+      "step": 172
+    },
+    {
+      "epoch": 0.12725266642147848,
+      "grad_norm": 0.08350931107997894,
+      "learning_rate": 6.0959375958151045e-06,
+      "loss": 0.1696,
+      "step": 173
+    },
+    {
+      "epoch": 0.12798823096726739,
+      "grad_norm": 0.07942811399698257,
+      "learning_rate": 5.6613157039969055e-06,
+      "loss": 0.1441,
+      "step": 174
+    },
+    {
+      "epoch": 0.12872379551305627,
+      "grad_norm": 0.062362104654312134,
+      "learning_rate": 5.241835432246889e-06,
+      "loss": 0.1323,
+      "step": 175
+    },
+    {
+      "epoch": 0.12945936005884517,
+      "grad_norm": 0.07514508813619614,
+      "learning_rate": 4.837640032693558e-06,
+      "loss": 0.1618,
+      "step": 176
+    },
+    {
+      "epoch": 0.13019492460463405,
+      "grad_norm": 0.07202325016260147,
+      "learning_rate": 4.448867537695578e-06,
+      "loss": 0.1222,
+      "step": 177
+    },
+    {
+      "epoch": 0.13093048915042296,
+      "grad_norm": 0.0819399282336235,
+      "learning_rate": 4.075650712703849e-06,
+      "loss": 0.1395,
+      "step": 178
+    },
+    {
+      "epoch": 0.13166605369621184,
+      "grad_norm": 0.092143714427948,
+      "learning_rate": 3.71811701092219e-06,
+      "loss": 0.1463,
+      "step": 179
+    },
+    {
+      "epoch": 0.13240161824200072,
+      "grad_norm": 0.0711476281285286,
+      "learning_rate": 3.376388529782215e-06,
+      "loss": 0.1356,
+      "step": 180
+    },
+    {
+      "epoch": 0.13313718278778963,
+      "grad_norm": 0.06504477560520172,
+      "learning_rate": 3.0505819692471792e-06,
+      "loss": 0.1335,
+      "step": 181
+    },
+    {
+      "epoch": 0.1338727473335785,
+      "grad_norm": 0.07897264510393143,
+      "learning_rate": 2.7408085919590264e-06,
+      "loss": 0.1623,
+      "step": 182
+    },
+    {
+      "epoch": 0.13460831187936742,
+      "grad_norm": 0.08557404577732086,
+      "learning_rate": 2.4471741852423237e-06,
+      "loss": 0.1515,
+      "step": 183
+    },
+    {
+      "epoch": 0.1353438764251563,
+      "grad_norm": 0.06471476703882217,
+      "learning_rate": 2.1697790249779636e-06,
+      "loss": 0.134,
+      "step": 184
+    },
+    {
+      "epoch": 0.1360794409709452,
+      "grad_norm": 0.07636171579360962,
+      "learning_rate": 1.908717841359048e-06,
+      "loss": 0.1438,
+      "step": 185
+    },
+    {
+      "epoch": 0.1368150055167341,
+      "grad_norm": 0.05042177066206932,
+      "learning_rate": 1.6640797865406288e-06,
+      "loss": 0.1344,
+      "step": 186
+    },
+    {
+      "epoch": 0.137550570062523,
+      "grad_norm": 0.05630411207675934,
+      "learning_rate": 1.4359484041943038e-06,
+      "loss": 0.1498,
+      "step": 187
+    },
+    {
+      "epoch": 0.13828613460831188,
+      "grad_norm": 0.04595191031694412,
+      "learning_rate": 1.2244016009781701e-06,
+      "loss": 0.1455,
+      "step": 188
+    },
+    {
+      "epoch": 0.13902169915410076,
+      "grad_norm": 0.07169954478740692,
+      "learning_rate": 1.0295116199317057e-06,
+      "loss": 0.1542,
+      "step": 189
+    },
+    {
+      "epoch": 0.13975726369988967,
+      "grad_norm": 0.05570828542113304,
+      "learning_rate": 8.513450158049108e-07,
+      "loss": 0.1326,
+      "step": 190
+    },
+    {
+      "epoch": 0.14049282824567855,
+      "grad_norm": 0.09222900122404099,
+      "learning_rate": 6.899626323298713e-07,
+      "loss": 0.1656,
+      "step": 191
+    },
+    {
+      "epoch": 0.14122839279146746,
+      "grad_norm": 0.09059873968362808,
+      "learning_rate": 5.454195814427021e-07,
+      "loss": 0.1732,
+      "step": 192
+    },
+    {
+      "epoch": 0.14196395733725634,
+      "grad_norm": 0.11539854854345322,
+      "learning_rate": 4.177652244628627e-07,
+      "loss": 0.157,
+      "step": 193
+    },
+    {
+      "epoch": 0.14269952188304524,
+      "grad_norm": 0.08133802562952042,
+      "learning_rate": 3.0704315523631953e-07,
+      "loss": 0.1733,
+      "step": 194
+    },
+    {
+      "epoch": 0.14343508642883412,
+      "grad_norm": 0.0870978981256485,
+      "learning_rate": 2.1329118524827662e-07,
+      "loss": 0.1564,
+      "step": 195
+    },
+    {
+      "epoch": 0.14417065097462303,
+      "grad_norm": 0.07082478702068329,
+      "learning_rate": 1.3654133071059893e-07,
+      "loss": 0.1379,
+      "step": 196
+    },
+    {
+      "epoch": 0.1449062155204119,
+      "grad_norm": 0.11655426025390625,
+      "learning_rate": 7.681980162830282e-08,
+      "loss": 0.1102,
+      "step": 197
+    },
+    {
+      "epoch": 0.14564178006620082,
+      "grad_norm": 0.07025150209665298,
+      "learning_rate": 3.4146992848854695e-08,
+      "loss": 0.1203,
+      "step": 198
+    },
+    {
+      "epoch": 0.1463773446119897,
+      "grad_norm": 0.08077063411474228,
+      "learning_rate": 8.537477097364522e-09,
+      "loss": 0.1903,
+      "step": 199
+    },
+    {
+      "epoch": 0.14711290915777858,
+      "grad_norm": 0.10279268771409988,
+      "learning_rate": 0.0,
+      "loss": 0.194,
+      "step": 200
+    },
+    {
+      "epoch": 0.14711290915777858,
+      "eval_loss": 0.15221890807151794,
+      "eval_runtime": 193.1181,
+      "eval_samples_per_second": 11.858,
+      "eval_steps_per_second": 5.929,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 3.0321122932791706e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null