Training in progress, step 50, checkpoint

Browse files

Files changed (8) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +189 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:147088364087c3fad831775bc4469f46b950d182162ba02099669db1fe55f4af
 size 113864

 version https://git-lfs.github.com/spec/v1
+oid sha256:4aace6758aa333819f1d4668b2d13633c6c2718433ec6dbc84c79e9b4712b6ea
 size 113864

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4ceba59b68425b47fb51eefdfe5c5aa4595e5a3557307d13e74b2314aff76372
 size 244554

 version https://git-lfs.github.com/spec/v1
+oid sha256:0a3dd285134fb12f75c1b167173fe8cf7c2a29221bbb324e8cf314dfe19f52e0
 size 244554

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f9cd81c56c60d7508b1b3f30d02a538a526d8fbeceb1c089c3417001c05ccc1d
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:e09cc71e727f796256be622d90a4d3d7f00f1b4922fdd76300ccc5819359e922
 size 14960

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:430a0fac64bfcd46ac91ef4a7b278a6834898a13d105fde4623d6c1e2515fa17
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:e27910f8d144b5cddf12f704ada3c8509971bf1c6aaf0d219a83b4cdc30c4841
 size 15024

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f86b25789c082b2fd448f1034ad4a179d965deca6e8b4c22aa5bbe3df85d6bd6
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:e1ccdb4caecd9cae2621f3f792975b146d7ecc5d6593b1955a0d774c67bdb21b
 size 15024

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f589e486dc0a192b5a542614dbf6b8b5f161b4f5ea456e379174bcdfd96cb9b0
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:53635b930d2269330cdda6f49cd5c3974b1acf8ffb2673e19621d554c8c3b59c
 size 15024

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f37b2aa490ccb1598b01e14cda36e9081f7ce646deab4d3c2d03de0d2169a755
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:b1df0528620c07325b8faa7567e59b0c1e86a1f1ee6af1245a69c6c0463fe4e2
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 10.364595413208008,
-  "best_model_checkpoint": "miner_id_24/checkpoint-25",
-  "epoch": 0.04056795131845842,
   "eval_steps": 25,
-  "global_step": 25,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -198,6 +198,189 @@
       "eval_samples_per_second": 617.126,
       "eval_steps_per_second": 77.141,
       "step": 25
     }
   ],
   "logging_steps": 1,
@@ -221,12 +404,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 41877189427200.0,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 10.35922622680664,
+  "best_model_checkpoint": "miner_id_24/checkpoint-50",
+  "epoch": 0.08113590263691683,
   "eval_steps": 25,
+  "global_step": 50,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 617.126,
       "eval_steps_per_second": 77.141,
       "step": 25
+    },
+    {
+      "epoch": 0.042190669371196754,
+      "grad_norm": 3.5818777084350586,
+      "learning_rate": 5e-05,
+      "loss": 10.3649,
+      "step": 26
+    },
+    {
+      "epoch": 0.04381338742393509,
+      "grad_norm": 3.4645745754241943,
+      "learning_rate": 4.6729843538492847e-05,
+      "loss": 10.364,
+      "step": 27
+    },
+    {
+      "epoch": 0.04543610547667343,
+      "grad_norm": 3.6278162002563477,
+      "learning_rate": 4.347369038899744e-05,
+      "loss": 10.3643,
+      "step": 28
+    },
+    {
+      "epoch": 0.047058823529411764,
+      "grad_norm": 3.793539524078369,
+      "learning_rate": 4.0245483899193595e-05,
+      "loss": 10.3632,
+      "step": 29
+    },
+    {
+      "epoch": 0.0486815415821501,
+      "grad_norm": 3.703801393508911,
+      "learning_rate": 3.705904774487396e-05,
+      "loss": 10.3633,
+      "step": 30
+    },
+    {
+      "epoch": 0.05030425963488844,
+      "grad_norm": 3.6949877738952637,
+      "learning_rate": 3.392802673484193e-05,
+      "loss": 10.3622,
+      "step": 31
+    },
+    {
+      "epoch": 0.051926977687626774,
+      "grad_norm": 3.5052080154418945,
+      "learning_rate": 3.086582838174551e-05,
+      "loss": 10.3632,
+      "step": 32
+    },
+    {
+      "epoch": 0.05354969574036511,
+      "grad_norm": 3.9035098552703857,
+      "learning_rate": 2.7885565489049946e-05,
+      "loss": 10.3618,
+      "step": 33
+    },
+    {
+      "epoch": 0.05517241379310345,
+      "grad_norm": 3.7165908813476562,
+      "learning_rate": 2.500000000000001e-05,
+      "loss": 10.3643,
+      "step": 34
+    },
+    {
+      "epoch": 0.056795131845841784,
+      "grad_norm": 3.94091534614563,
+      "learning_rate": 2.2221488349019903e-05,
+      "loss": 10.3604,
+      "step": 35
+    },
+    {
+      "epoch": 0.05841784989858012,
+      "grad_norm": 3.948307752609253,
+      "learning_rate": 1.9561928549563968e-05,
+      "loss": 10.3584,
+      "step": 36
+    },
+    {
+      "epoch": 0.06004056795131846,
+      "grad_norm": 4.001682758331299,
+      "learning_rate": 1.703270924499656e-05,
+      "loss": 10.361,
+      "step": 37
+    },
+    {
+      "epoch": 0.061663286004056794,
+      "grad_norm": 4.184301853179932,
+      "learning_rate": 1.4644660940672627e-05,
+      "loss": 10.3577,
+      "step": 38
+    },
+    {
+      "epoch": 0.06328600405679513,
+      "grad_norm": 4.08730411529541,
+      "learning_rate": 1.2408009626051137e-05,
+      "loss": 10.3604,
+      "step": 39
+    },
+    {
+      "epoch": 0.06490872210953347,
+      "grad_norm": 3.9339916706085205,
+      "learning_rate": 1.0332332985438248e-05,
+      "loss": 10.3606,
+      "step": 40
+    },
+    {
+      "epoch": 0.0665314401622718,
+      "grad_norm": 4.170976161956787,
+      "learning_rate": 8.426519384872733e-06,
+      "loss": 10.3583,
+      "step": 41
+    },
+    {
+      "epoch": 0.06815415821501014,
+      "grad_norm": 4.157967567443848,
+      "learning_rate": 6.698729810778065e-06,
+      "loss": 10.3596,
+      "step": 42
+    },
+    {
+      "epoch": 0.06977687626774848,
+      "grad_norm": 3.94195556640625,
+      "learning_rate": 5.156362923365588e-06,
+      "loss": 10.361,
+      "step": 43
+    },
+    {
+      "epoch": 0.07139959432048681,
+      "grad_norm": 3.8017725944519043,
+      "learning_rate": 3.8060233744356633e-06,
+      "loss": 10.3617,
+      "step": 44
+    },
+    {
+      "epoch": 0.07302231237322515,
+      "grad_norm": 4.163522720336914,
+      "learning_rate": 2.653493525244721e-06,
+      "loss": 10.361,
+      "step": 45
+    },
+    {
+      "epoch": 0.07464503042596349,
+      "grad_norm": 3.995657444000244,
+      "learning_rate": 1.70370868554659e-06,
+      "loss": 10.3593,
+      "step": 46
+    },
+    {
+      "epoch": 0.07626774847870182,
+      "grad_norm": 4.11703634262085,
+      "learning_rate": 9.607359798384785e-07,
+      "loss": 10.3603,
+      "step": 47
+    },
+    {
+      "epoch": 0.07789046653144016,
+      "grad_norm": 4.220155715942383,
+      "learning_rate": 4.277569313094809e-07,
+      "loss": 10.3574,
+      "step": 48
+    },
+    {
+      "epoch": 0.0795131845841785,
+      "grad_norm": 4.050826549530029,
+      "learning_rate": 1.0705383806982606e-07,
+      "loss": 10.3566,
+      "step": 49
+    },
+    {
+      "epoch": 0.08113590263691683,
+      "grad_norm": 4.6649274826049805,
+      "learning_rate": 0.0,
+      "loss": 10.3551,
+      "step": 50
+    },
+    {
+      "epoch": 0.08113590263691683,
+      "eval_loss": 10.35922622680664,
+      "eval_runtime": 3.6442,
+      "eval_samples_per_second": 1139.359,
+      "eval_steps_per_second": 142.42,
+      "step": 50
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 83754378854400.0,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null