Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:40f29cc3fd1011bc57055603035bbba1f4431d4e253a521435ce2e3da69f21fb
 size 639691872

 version https://git-lfs.github.com/spec/v1
+oid sha256:be5dbdc32e7445c42a512cf4c17e5e5e33f9aa0999a712bd731f78cf66e4dab6
 size 639691872

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3ac38274e56e9cfc0f25d093e807ce8008d28348dc2a180f6aba5ef65d428730
 size 325339796

 version https://git-lfs.github.com/spec/v1
+oid sha256:55bd09d29bddb55b195b56bb697c19145fbe7de4cde97acade6d5a55473608b9
 size 325339796

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d7d4990bd536f1641f898e1713d3de7b91391d52505ffcd9bd57efd82cc5d05e
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:8480e8b46ee0b9b64f328ba50ce5041930bd6768d3c8910a21e7038018f33a05
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d8ce05761f46e7cf72fb17a02e3a0ca15c9d25ce3babf590eeb40568923b8bac
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:d2d754412c61116546142914503e7369d0cc35d3c380a07e5218f595d76b6d96
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 1.6340785026550293,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.09751340809361288,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,364 @@
       "eval_samples_per_second": 14.049,
       "eval_steps_per_second": 3.514,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1112,12 +1470,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 2.0023961402631782e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 1.6038893461227417,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.1300178774581505,
   "eval_steps": 50,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 14.049,
       "eval_steps_per_second": 3.514,
       "step": 150
+    },
+    {
+      "epoch": 0.09816349748090362,
+      "grad_norm": 0.2876153588294983,
+      "learning_rate": 1.553232954407171e-05,
+      "loss": 1.5118,
+      "step": 151
+    },
+    {
+      "epoch": 0.09881358686819437,
+      "grad_norm": 0.34358447790145874,
+      "learning_rate": 1.4938160786375572e-05,
+      "loss": 1.6048,
+      "step": 152
+    },
+    {
+      "epoch": 0.09946367625548513,
+      "grad_norm": 0.37248337268829346,
+      "learning_rate": 1.435357758543015e-05,
+      "loss": 1.6351,
+      "step": 153
+    },
+    {
+      "epoch": 0.10011376564277588,
+      "grad_norm": 0.39405882358551025,
+      "learning_rate": 1.3778739760445552e-05,
+      "loss": 1.6198,
+      "step": 154
+    },
+    {
+      "epoch": 0.10076385503006663,
+      "grad_norm": 0.40504729747772217,
+      "learning_rate": 1.3213804466343421e-05,
+      "loss": 1.6154,
+      "step": 155
+    },
+    {
+      "epoch": 0.10141394441735739,
+      "grad_norm": 0.41873466968536377,
+      "learning_rate": 1.2658926150792322e-05,
+      "loss": 1.6639,
+      "step": 156
+    },
+    {
+      "epoch": 0.10206403380464814,
+      "grad_norm": 0.40044283866882324,
+      "learning_rate": 1.2114256511983274e-05,
+      "loss": 1.5751,
+      "step": 157
+    },
+    {
+      "epoch": 0.10271412319193889,
+      "grad_norm": 0.3957197368144989,
+      "learning_rate": 1.157994445715706e-05,
+      "loss": 1.6357,
+      "step": 158
+    },
+    {
+      "epoch": 0.10336421257922965,
+      "grad_norm": 0.4234384298324585,
+      "learning_rate": 1.1056136061894384e-05,
+      "loss": 1.599,
+      "step": 159
+    },
+    {
+      "epoch": 0.1040143019665204,
+      "grad_norm": 0.4401893615722656,
+      "learning_rate": 1.0542974530180327e-05,
+      "loss": 1.7311,
+      "step": 160
+    },
+    {
+      "epoch": 0.10466439135381114,
+      "grad_norm": 0.4216889441013336,
+      "learning_rate": 1.0040600155253765e-05,
+      "loss": 1.6892,
+      "step": 161
+    },
+    {
+      "epoch": 0.1053144807411019,
+      "grad_norm": 0.4414180517196655,
+      "learning_rate": 9.549150281252633e-06,
+      "loss": 1.6253,
+      "step": 162
+    },
+    {
+      "epoch": 0.10596457012839265,
+      "grad_norm": 0.4374978542327881,
+      "learning_rate": 9.068759265665384e-06,
+      "loss": 1.6735,
+      "step": 163
+    },
+    {
+      "epoch": 0.10661465951568341,
+      "grad_norm": 0.4437277615070343,
+      "learning_rate": 8.599558442598998e-06,
+      "loss": 1.6566,
+      "step": 164
+    },
+    {
+      "epoch": 0.10726474890297416,
+      "grad_norm": 0.43293166160583496,
+      "learning_rate": 8.141676086873572e-06,
+      "loss": 1.6214,
+      "step": 165
+    },
+    {
+      "epoch": 0.10791483829026491,
+      "grad_norm": 0.43972572684288025,
+      "learning_rate": 7.695237378953223e-06,
+      "loss": 1.6663,
+      "step": 166
+    },
+    {
+      "epoch": 0.10856492767755567,
+      "grad_norm": 0.46562856435775757,
+      "learning_rate": 7.260364370723044e-06,
+      "loss": 1.6176,
+      "step": 167
+    },
+    {
+      "epoch": 0.10921501706484642,
+      "grad_norm": 0.45943161845207214,
+      "learning_rate": 6.837175952121306e-06,
+      "loss": 1.617,
+      "step": 168
+    },
+    {
+      "epoch": 0.10986510645213717,
+      "grad_norm": 0.46964287757873535,
+      "learning_rate": 6.425787818636131e-06,
+      "loss": 1.6882,
+      "step": 169
+    },
+    {
+      "epoch": 0.11051519583942793,
+      "grad_norm": 0.5158145427703857,
+      "learning_rate": 6.026312439675552e-06,
+      "loss": 1.6663,
+      "step": 170
+    },
+    {
+      "epoch": 0.11116528522671867,
+      "grad_norm": 0.5057429671287537,
+      "learning_rate": 5.6388590278194096e-06,
+      "loss": 1.6674,
+      "step": 171
+    },
+    {
+      "epoch": 0.11181537461400942,
+      "grad_norm": 0.5682820081710815,
+      "learning_rate": 5.263533508961827e-06,
+      "loss": 1.776,
+      "step": 172
+    },
+    {
+      "epoch": 0.11246546400130018,
+      "grad_norm": 0.5342135429382324,
+      "learning_rate": 4.900438493352055e-06,
+      "loss": 1.6675,
+      "step": 173
+    },
+    {
+      "epoch": 0.11311555338859093,
+      "grad_norm": 0.5700634121894836,
+      "learning_rate": 4.549673247541875e-06,
+      "loss": 1.6312,
+      "step": 174
+    },
+    {
+      "epoch": 0.11376564277588168,
+      "grad_norm": 0.6046521663665771,
+      "learning_rate": 4.2113336672471245e-06,
+      "loss": 1.7924,
+      "step": 175
+    },
+    {
+      "epoch": 0.11441573216317244,
+      "grad_norm": 0.6340796947479248,
+      "learning_rate": 3.885512251130763e-06,
+      "loss": 1.5288,
+      "step": 176
+    },
+    {
+      "epoch": 0.11506582155046319,
+      "grad_norm": 0.6920514106750488,
+      "learning_rate": 3.5722980755146517e-06,
+      "loss": 1.746,
+      "step": 177
+    },
+    {
+      "epoch": 0.11571591093775394,
+      "grad_norm": 0.6686057448387146,
+      "learning_rate": 3.271776770026963e-06,
+      "loss": 1.5386,
+      "step": 178
+    },
+    {
+      "epoch": 0.1163660003250447,
+      "grad_norm": 0.7893428802490234,
+      "learning_rate": 2.9840304941919415e-06,
+      "loss": 1.5343,
+      "step": 179
+    },
+    {
+      "epoch": 0.11701608971233544,
+      "grad_norm": 0.8433814644813538,
+      "learning_rate": 2.7091379149682685e-06,
+      "loss": 1.5279,
+      "step": 180
+    },
+    {
+      "epoch": 0.11766617909962619,
+      "grad_norm": 0.8944865465164185,
+      "learning_rate": 2.4471741852423237e-06,
+      "loss": 1.7916,
+      "step": 181
+    },
+    {
+      "epoch": 0.11831626848691695,
+      "grad_norm": 0.8914141058921814,
+      "learning_rate": 2.1982109232821178e-06,
+      "loss": 1.4167,
+      "step": 182
+    },
+    {
+      "epoch": 0.1189663578742077,
+      "grad_norm": 0.8821380734443665,
+      "learning_rate": 1.962316193157593e-06,
+      "loss": 1.4686,
+      "step": 183
+    },
+    {
+      "epoch": 0.11961644726149846,
+      "grad_norm": 1.094041109085083,
+      "learning_rate": 1.7395544861325718e-06,
+      "loss": 1.4481,
+      "step": 184
+    },
+    {
+      "epoch": 0.12026653664878921,
+      "grad_norm": 0.951711893081665,
+      "learning_rate": 1.5299867030334814e-06,
+      "loss": 1.4217,
+      "step": 185
+    },
+    {
+      "epoch": 0.12091662603607996,
+      "grad_norm": 0.9843878149986267,
+      "learning_rate": 1.333670137599713e-06,
+      "loss": 1.3521,
+      "step": 186
+    },
+    {
+      "epoch": 0.12156671542337072,
+      "grad_norm": 1.0316113233566284,
+      "learning_rate": 1.1506584608200367e-06,
+      "loss": 1.343,
+      "step": 187
+    },
+    {
+      "epoch": 0.12221680481066147,
+      "grad_norm": 1.122435212135315,
+      "learning_rate": 9.810017062595322e-07,
+      "loss": 1.5465,
+      "step": 188
+    },
+    {
+      "epoch": 0.12286689419795221,
+      "grad_norm": 1.0913511514663696,
+      "learning_rate": 8.247462563808817e-07,
+      "loss": 1.647,
+      "step": 189
+    },
+    {
+      "epoch": 0.12351698358524298,
+      "grad_norm": 1.1212204694747925,
+      "learning_rate": 6.819348298638839e-07,
+      "loss": 1.4082,
+      "step": 190
+    },
+    {
+      "epoch": 0.12416707297253372,
+      "grad_norm": 1.4503830671310425,
+      "learning_rate": 5.526064699265753e-07,
+      "loss": 2.0094,
+      "step": 191
+    },
+    {
+      "epoch": 0.12481716235982447,
+      "grad_norm": 1.1698222160339355,
+      "learning_rate": 4.367965336512403e-07,
+      "loss": 1.3171,
+      "step": 192
+    },
+    {
+      "epoch": 0.12546725174711523,
+      "grad_norm": 1.3957828283309937,
+      "learning_rate": 3.3453668231809286e-07,
+      "loss": 1.7542,
+      "step": 193
+    },
+    {
+      "epoch": 0.12611734113440598,
+      "grad_norm": 1.4261584281921387,
+      "learning_rate": 2.458548727494292e-07,
+      "loss": 1.7136,
+      "step": 194
+    },
+    {
+      "epoch": 0.12676743052169673,
+      "grad_norm": 1.3807520866394043,
+      "learning_rate": 1.7077534966650766e-07,
+      "loss": 1.4914,
+      "step": 195
+    },
+    {
+      "epoch": 0.12741751990898748,
+      "grad_norm": 1.4543288946151733,
+      "learning_rate": 1.0931863906127327e-07,
+      "loss": 1.5193,
+      "step": 196
+    },
+    {
+      "epoch": 0.12806760929627825,
+      "grad_norm": 1.5122958421707153,
+      "learning_rate": 6.150154258476315e-08,
+      "loss": 1.49,
+      "step": 197
+    },
+    {
+      "epoch": 0.128717698683569,
+      "grad_norm": 1.9588783979415894,
+      "learning_rate": 2.7337132953697554e-08,
+      "loss": 1.6422,
+      "step": 198
+    },
+    {
+      "epoch": 0.12936778807085975,
+      "grad_norm": 1.8873589038848877,
+      "learning_rate": 6.834750376549792e-09,
+      "loss": 1.8841,
+      "step": 199
+    },
+    {
+      "epoch": 0.1300178774581505,
+      "grad_norm": 3.451848268508911,
+      "learning_rate": 0.0,
+      "loss": 2.0363,
+      "step": 200
+    },
+    {
+      "epoch": 0.1300178774581505,
+      "eval_loss": 1.6038893461227417,
+      "eval_runtime": 184.177,
+      "eval_samples_per_second": 14.068,
+      "eval_steps_per_second": 3.518,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 2.6709702668294554e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null