Training in progress, step 59808, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +410 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5bd47bd97c3be53d8c53262c25bb1d1459da7779b48ee7dabe13d9939834d3f4
 size 1484196216

 version https://git-lfs.github.com/spec/v1
+oid sha256:fbb44ad69600872ef0bb443d3b43f12a5f06e90608b73f03ff4028fa72c313ed
 size 1484196216

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e2271c4f336470e82e9f78edd39a4c7d5b223f2b5eb75c296d4ab288085f9335
 size 2968683840

 version https://git-lfs.github.com/spec/v1
+oid sha256:af01e6e7f36514043e3ce67e600b1d8d265d938af879be15564a49cf1e029d6f
 size 2968683840

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fffaaeb63cee162b502936a8086aa019d07c502f0d331d152aac98a58f846ddf
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:ae79343f53c51d705aaa2e789fb3a4d88d0a467b2f986398151d95d4faac2c55
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:67fb714e7769791dd1cf39d1f0bc2dae6d2eef2bc4029c1bc0bca242452dd77c
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:023b85de6d1b039ecc813c0216bc60820d589f64ee137f692f288c5c4fc4729e
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 2.9970281013705846,
   "eval_steps": 500,
-  "global_step": 59750,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -418273,6 +418273,412 @@
       "learning_rate": 2.3282539897695464e-10,
       "loss": 1.4932,
       "step": 59750
     }
   ],
   "logging_steps": 1,
@@ -418287,12 +418693,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 5.399461518976745e+18,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 2.9999373017166793,
   "eval_steps": 500,
+  "global_step": 59808,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 2.3282539897695464e-10,
       "loss": 1.4932,
       "step": 59750
+    },
+    {
+      "epoch": 2.997078259997241,
+      "grad_norm": 2.9519903659820557,
+      "learning_rate": 2.2486615378625176e-10,
+      "loss": 1.1974,
+      "step": 59751
+    },
+    {
+      "epoch": 2.9971284186238982,
+      "grad_norm": 2.8692264556884766,
+      "learning_rate": 2.1704532984223592e-10,
+      "loss": 1.525,
+      "step": 59752
+    },
+    {
+      "epoch": 2.997178577250555,
+      "grad_norm": 2.799671173095703,
+      "learning_rate": 2.0936292716711158e-10,
+      "loss": 1.2972,
+      "step": 59753
+    },
+    {
+      "epoch": 2.997228735877212,
+      "grad_norm": 3.265291690826416,
+      "learning_rate": 2.0181894577198103e-10,
+      "loss": 1.5709,
+      "step": 59754
+    },
+    {
+      "epoch": 2.9972788945038684,
+      "grad_norm": 2.9593746662139893,
+      "learning_rate": 1.944133856901509e-10,
+      "loss": 1.4483,
+      "step": 59755
+    },
+    {
+      "epoch": 2.9973290531305254,
+      "grad_norm": 2.9063448905944824,
+      "learning_rate": 1.8714624693272342e-10,
+      "loss": 1.113,
+      "step": 59756
+    },
+    {
+      "epoch": 2.997379211757182,
+      "grad_norm": 2.510237693786621,
+      "learning_rate": 1.800175295274542e-10,
+      "loss": 0.9191,
+      "step": 59757
+    },
+    {
+      "epoch": 2.997429370383839,
+      "grad_norm": 3.348395824432373,
+      "learning_rate": 1.730272334909966e-10,
+      "loss": 1.4245,
+      "step": 59758
+    },
+    {
+      "epoch": 2.9974795290104956,
+      "grad_norm": 3.254422426223755,
+      "learning_rate": 1.661753588400039e-10,
+      "loss": 1.3145,
+      "step": 59759
+    },
+    {
+      "epoch": 2.9975296876371527,
+      "grad_norm": 3.8515803813934326,
+      "learning_rate": 1.5946190559668063e-10,
+      "loss": 1.3135,
+      "step": 59760
+    },
+    {
+      "epoch": 2.9975798462638092,
+      "grad_norm": 3.0799078941345215,
+      "learning_rate": 1.5288687377768008e-10,
+      "loss": 1.2047,
+      "step": 59761
+    },
+    {
+      "epoch": 2.997630004890466,
+      "grad_norm": 3.328263998031616,
+      "learning_rate": 1.4645026339965562e-10,
+      "loss": 1.4763,
+      "step": 59762
+    },
+    {
+      "epoch": 2.997680163517123,
+      "grad_norm": 3.2846860885620117,
+      "learning_rate": 1.4015207448481172e-10,
+      "loss": 1.349,
+      "step": 59763
+    },
+    {
+      "epoch": 2.99773032214378,
+      "grad_norm": 3.0045347213745117,
+      "learning_rate": 1.339923070498017e-10,
+      "loss": 1.3809,
+      "step": 59764
+    },
+    {
+      "epoch": 2.9977804807704365,
+      "grad_norm": 3.032623052597046,
+      "learning_rate": 1.2797096111127893e-10,
+      "loss": 1.155,
+      "step": 59765
+    },
+    {
+      "epoch": 2.997830639397093,
+      "grad_norm": 3.425898313522339,
+      "learning_rate": 1.2208803668034563e-10,
+      "loss": 1.4988,
+      "step": 59766
+    },
+    {
+      "epoch": 2.99788079802375,
+      "grad_norm": 3.9673779010772705,
+      "learning_rate": 1.1634353377920626e-10,
+      "loss": 1.5579,
+      "step": 59767
+    },
+    {
+      "epoch": 2.997930956650407,
+      "grad_norm": 3.9234225749969482,
+      "learning_rate": 1.1073745242451417e-10,
+      "loss": 1.2646,
+      "step": 59768
+    },
+    {
+      "epoch": 2.9979811152770637,
+      "grad_norm": 4.005583763122559,
+      "learning_rate": 1.052697926329227e-10,
+      "loss": 1.1601,
+      "step": 59769
+    },
+    {
+      "epoch": 2.9980312739037203,
+      "grad_norm": 4.037759304046631,
+      "learning_rate": 9.994055440998296e-11,
+      "loss": 1.0708,
+      "step": 59770
+    },
+    {
+      "epoch": 2.9980814325303773,
+      "grad_norm": 4.791607856750488,
+      "learning_rate": 9.474973778345053e-11,
+      "loss": 1.1979,
+      "step": 59771
+    },
+    {
+      "epoch": 2.9981315911570343,
+      "grad_norm": 5.135807514190674,
+      "learning_rate": 8.969734275332542e-11,
+      "loss": 1.0534,
+      "step": 59772
+    },
+    {
+      "epoch": 2.998181749783691,
+      "grad_norm": 3.4575726985931396,
+      "learning_rate": 8.478336934736319e-11,
+      "loss": 1.4946,
+      "step": 59773
+    },
+    {
+      "epoch": 2.9982319084103475,
+      "grad_norm": 1.5949138402938843,
+      "learning_rate": 8.000781757111498e-11,
+      "loss": 1.1375,
+      "step": 59774
+    },
+    {
+      "epoch": 2.9982820670370045,
+      "grad_norm": 2.864654779434204,
+      "learning_rate": 7.53706874412341e-11,
+      "loss": 1.275,
+      "step": 59775
+    },
+    {
+      "epoch": 2.9983322256636615,
+      "grad_norm": 2.6163508892059326,
+      "learning_rate": 7.087197896882281e-11,
+      "loss": 1.0021,
+      "step": 59776
+    },
+    {
+      "epoch": 2.998382384290318,
+      "grad_norm": 3.790781259536743,
+      "learning_rate": 6.651169217053443e-11,
+      "loss": 1.2672,
+      "step": 59777
+    },
+    {
+      "epoch": 2.9984325429169747,
+      "grad_norm": 2.571331262588501,
+      "learning_rate": 6.228982704636899e-11,
+      "loss": 0.5672,
+      "step": 59778
+    },
+    {
+      "epoch": 2.9984827015436317,
+      "grad_norm": 3.5116817951202393,
+      "learning_rate": 5.820638362408204e-11,
+      "loss": 2.0333,
+      "step": 59779
+    },
+    {
+      "epoch": 2.9985328601702887,
+      "grad_norm": 3.2379603385925293,
+      "learning_rate": 5.426136190367359e-11,
+      "loss": 1.7546,
+      "step": 59780
+    },
+    {
+      "epoch": 2.9985830187969453,
+      "grad_norm": 2.421940803527832,
+      "learning_rate": 5.045476190179699e-11,
+      "loss": 1.0303,
+      "step": 59781
+    },
+    {
+      "epoch": 2.998633177423602,
+      "grad_norm": 3.13572359085083,
+      "learning_rate": 4.6786583624003346e-11,
+      "loss": 1.247,
+      "step": 59782
+    },
+    {
+      "epoch": 2.998683336050259,
+      "grad_norm": 2.828307628631592,
+      "learning_rate": 4.3256827081394894e-11,
+      "loss": 1.2053,
+      "step": 59783
+    },
+    {
+      "epoch": 2.998733494676916,
+      "grad_norm": 3.0084455013275146,
+      "learning_rate": 3.9865492290624973e-11,
+      "loss": 1.4298,
+      "step": 59784
+    },
+    {
+      "epoch": 2.9987836533035725,
+      "grad_norm": 3.4063498973846436,
+      "learning_rate": 3.6612579257244704e-11,
+      "loss": 1.7157,
+      "step": 59785
+    },
+    {
+      "epoch": 2.998833811930229,
+      "grad_norm": 2.2077646255493164,
+      "learning_rate": 3.349808798125409e-11,
+      "loss": 1.1338,
+      "step": 59786
+    },
+    {
+      "epoch": 2.998883970556886,
+      "grad_norm": 2.179716110229492,
+      "learning_rate": 3.052201848485759e-11,
+      "loss": 1.272,
+      "step": 59787
+    },
+    {
+      "epoch": 2.998934129183543,
+      "grad_norm": 2.593482255935669,
+      "learning_rate": 2.768437076805519e-11,
+      "loss": 1.3971,
+      "step": 59788
+    },
+    {
+      "epoch": 2.9989842878101998,
+      "grad_norm": 2.6535258293151855,
+      "learning_rate": 2.4985144847500253e-11,
+      "loss": 1.2945,
+      "step": 59789
+    },
+    {
+      "epoch": 2.9990344464368563,
+      "grad_norm": 3.077737808227539,
+      "learning_rate": 2.2424340717641656e-11,
+      "loss": 2.0234,
+      "step": 59790
+    },
+    {
+      "epoch": 2.9990846050635134,
+      "grad_norm": 2.6477880477905273,
+      "learning_rate": 2.0001958395132748e-11,
+      "loss": 1.1044,
+      "step": 59791
+    },
+    {
+      "epoch": 2.9991347636901704,
+      "grad_norm": 3.315833568572998,
+      "learning_rate": 1.7717997879973523e-11,
+      "loss": 1.578,
+      "step": 59792
+    },
+    {
+      "epoch": 2.999184922316827,
+      "grad_norm": 2.8919079303741455,
+      "learning_rate": 1.5572459188817334e-11,
+      "loss": 1.1892,
+      "step": 59793
+    },
+    {
+      "epoch": 2.9992350809434836,
+      "grad_norm": 2.9915194511413574,
+      "learning_rate": 1.3565342316113061e-11,
+      "loss": 1.4405,
+      "step": 59794
+    },
+    {
+      "epoch": 2.9992852395701406,
+      "grad_norm": 2.5658187866210938,
+      "learning_rate": 1.1696647272962935e-11,
+      "loss": 1.0032,
+      "step": 59795
+    },
+    {
+      "epoch": 2.9993353981967976,
+      "grad_norm": 3.2243688106536865,
+      "learning_rate": 9.966374064918071e-12,
+      "loss": 1.459,
+      "step": 59796
+    },
+    {
+      "epoch": 2.999385556823454,
+      "grad_norm": 3.0176734924316406,
+      "learning_rate": 8.374522697529586e-12,
+      "loss": 1.1177,
+      "step": 59797
+    },
+    {
+      "epoch": 2.999435715450111,
+      "grad_norm": 3.0600199699401855,
+      "learning_rate": 6.921093170797477e-12,
+      "loss": 1.6016,
+      "step": 59798
+    },
+    {
+      "epoch": 2.999485874076768,
+      "grad_norm": 3.130082607269287,
+      "learning_rate": 5.6060854958239765e-12,
+      "loss": 1.7026,
+      "step": 59799
+    },
+    {
+      "epoch": 2.999536032703425,
+      "grad_norm": 2.7928144931793213,
+      "learning_rate": 4.429499667057968e-12,
+      "loss": 1.1228,
+      "step": 59800
+    },
+    {
+      "epoch": 2.9995861913300814,
+      "grad_norm": 3.355782985687256,
+      "learning_rate": 3.3913356956016827e-12,
+      "loss": 1.6128,
+      "step": 59801
+    },
+    {
+      "epoch": 2.999636349956738,
+      "grad_norm": 3.621020555496216,
+      "learning_rate": 2.4915935759040053e-12,
+      "loss": 1.3013,
+      "step": 59802
+    },
+    {
+      "epoch": 2.999686508583395,
+      "grad_norm": 3.6413798332214355,
+      "learning_rate": 1.7302733246182811e-12,
+      "loss": 0.9964,
+      "step": 59803
+    },
+    {
+      "epoch": 2.999736667210052,
+      "grad_norm": 5.63632345199585,
+      "learning_rate": 1.10737493064228e-12,
+      "loss": 1.7908,
+      "step": 59804
+    },
+    {
+      "epoch": 2.9997868258367086,
+      "grad_norm": 3.6524646282196045,
+      "learning_rate": 6.228983995271165e-13,
+      "loss": 1.058,
+      "step": 59805
+    },
+    {
+      "epoch": 2.999836984463365,
+      "grad_norm": 4.080533981323242,
+      "learning_rate": 2.768437312727912e-13,
+      "loss": 1.2011,
+      "step": 59806
+    },
+    {
+      "epoch": 2.9998871430900222,
+      "grad_norm": 5.047213554382324,
+      "learning_rate": 6.921093143041901e-14,
+      "loss": 1.139,
+      "step": 59807
+    },
+    {
+      "epoch": 2.9999373017166793,
+      "grad_norm": 4.863078594207764,
+      "learning_rate": 0.0,
+      "loss": 1.1164,
+      "step": 59808
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 5.404390115877323e+18,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null