Upload folder using huggingface_hub

Browse files

Files changed (4) hide show

adapter_model.safetensors +1 -1
optimizer.pt +2 -2
scheduler.pt +1 -1
trainer_state.json +4 -536

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:afbc6e043d0ab7befad92a4106f25dc191ef8714ecd6a6257e278b655eecfe64
 size 3313653480

 version https://git-lfs.github.com/spec/v1
+oid sha256:91c399c7fab17d481c5bbf1dfbd90dbeebecebbd329a17a2dde1aa51acda686a
 size 3313653480

optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a7e689976e4312255cb93f037fa286a639a87e0cc535b9f73a7563fe1d87e3d3
-size 1661302932

 version https://git-lfs.github.com/spec/v1
+oid sha256:0fb50d16fc9c5c8e728744bd029ccecdc61a224d890aaf064309981ad5194cb9
+size 1661301780

scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b94353295745dabde6ca38201fb34d47c6e72a69c913f73ae1c8ca60f3423acf
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:3b6ef9ad0d92f6fffee2bdaedbc1e0b68b977b45a2ed7ec889f6406883a665cf
 size 1064

trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 1.996383363471971,
   "eval_steps": 1000,
-  "global_step": 276,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1407,538 +1407,6 @@
       "learning_rate": 2.8044280442804427e-05,
       "loss": 0.3215,
       "step": 200
-    },
-    {
-      "epoch": 1.453887884267631,
-      "grad_norm": 0.13975730538368225,
-      "learning_rate": 2.767527675276753e-05,
-      "loss": 0.3254,
-      "step": 201
-    },
-    {
-      "epoch": 1.461121157323689,
-      "grad_norm": 0.12640978395938873,
-      "learning_rate": 2.730627306273063e-05,
-      "loss": 0.3525,
-      "step": 202
-    },
-    {
-      "epoch": 1.4683544303797469,
-      "grad_norm": 0.11131294071674347,
-      "learning_rate": 2.693726937269373e-05,
-      "loss": 0.3132,
-      "step": 203
-    },
-    {
-      "epoch": 1.4755877034358047,
-      "grad_norm": 0.12206707894802094,
-      "learning_rate": 2.6568265682656828e-05,
-      "loss": 0.3561,
-      "step": 204
-    },
-    {
-      "epoch": 1.4828209764918625,
-      "grad_norm": 0.11679227650165558,
-      "learning_rate": 2.619926199261993e-05,
-      "loss": 0.3393,
-      "step": 205
-    },
-    {
-      "epoch": 1.4900542495479203,
-      "grad_norm": 0.12166301161050797,
-      "learning_rate": 2.5830258302583026e-05,
-      "loss": 0.3436,
-      "step": 206
-    },
-    {
-      "epoch": 1.4972875226039783,
-      "grad_norm": 0.13859513401985168,
-      "learning_rate": 2.5461254612546127e-05,
-      "loss": 0.3722,
-      "step": 207
-    },
-    {
-      "epoch": 1.5045207956600362,
-      "grad_norm": 0.11228498816490173,
-      "learning_rate": 2.5092250922509224e-05,
-      "loss": 0.3189,
-      "step": 208
-    },
-    {
-      "epoch": 1.511754068716094,
-      "grad_norm": 0.11623143404722214,
-      "learning_rate": 2.472324723247233e-05,
-      "loss": 0.3731,
-      "step": 209
-    },
-    {
-      "epoch": 1.518987341772152,
-      "grad_norm": 0.1316087245941162,
-      "learning_rate": 2.4354243542435426e-05,
-      "loss": 0.3364,
-      "step": 210
-    },
-    {
-      "epoch": 1.5262206148282098,
-      "grad_norm": 0.12064289301633835,
-      "learning_rate": 2.3985239852398524e-05,
-      "loss": 0.3511,
-      "step": 211
-    },
-    {
-      "epoch": 1.5334538878842676,
-      "grad_norm": 0.14924070239067078,
-      "learning_rate": 2.3616236162361624e-05,
-      "loss": 0.3313,
-      "step": 212
-    },
-    {
-      "epoch": 1.5406871609403257,
-      "grad_norm": 0.10872308164834976,
-      "learning_rate": 2.3247232472324722e-05,
-      "loss": 0.3186,
-      "step": 213
-    },
-    {
-      "epoch": 1.5479204339963832,
-      "grad_norm": 0.12435383349657059,
-      "learning_rate": 2.2878228782287826e-05,
-      "loss": 0.3382,
-      "step": 214
-    },
-    {
-      "epoch": 1.5551537070524413,
-      "grad_norm": 0.12237284332513809,
-      "learning_rate": 2.2509225092250924e-05,
-      "loss": 0.3427,
-      "step": 215
-    },
-    {
-      "epoch": 1.562386980108499,
-      "grad_norm": 0.1082320362329483,
-      "learning_rate": 2.2140221402214025e-05,
-      "loss": 0.3141,
-      "step": 216
-    },
-    {
-      "epoch": 1.5696202531645569,
-      "grad_norm": 0.12488240003585815,
-      "learning_rate": 2.1771217712177122e-05,
-      "loss": 0.3062,
-      "step": 217
-    },
-    {
-      "epoch": 1.576853526220615,
-      "grad_norm": 0.1263773888349533,
-      "learning_rate": 2.140221402214022e-05,
-      "loss": 0.3477,
-      "step": 218
-    },
-    {
-      "epoch": 1.5840867992766727,
-      "grad_norm": 0.11632055044174194,
-      "learning_rate": 2.1033210332103324e-05,
-      "loss": 0.3558,
-      "step": 219
-    },
-    {
-      "epoch": 1.5913200723327305,
-      "grad_norm": 0.13615989685058594,
-      "learning_rate": 2.066420664206642e-05,
-      "loss": 0.3806,
-      "step": 220
-    },
-    {
-      "epoch": 1.5985533453887886,
-      "grad_norm": 0.17589685320854187,
-      "learning_rate": 2.0295202952029522e-05,
-      "loss": 0.3327,
-      "step": 221
-    },
-    {
-      "epoch": 1.6057866184448462,
-      "grad_norm": 0.1255197674036026,
-      "learning_rate": 1.992619926199262e-05,
-      "loss": 0.3582,
-      "step": 222
-    },
-    {
-      "epoch": 1.6130198915009042,
-      "grad_norm": 0.29970669746398926,
-      "learning_rate": 1.955719557195572e-05,
-      "loss": 0.3587,
-      "step": 223
-    },
-    {
-      "epoch": 1.620253164556962,
-      "grad_norm": 0.12951691448688507,
-      "learning_rate": 1.918819188191882e-05,
-      "loss": 0.3527,
-      "step": 224
-    },
-    {
-      "epoch": 1.6274864376130198,
-      "grad_norm": 0.2785731256008148,
-      "learning_rate": 1.881918819188192e-05,
-      "loss": 0.338,
-      "step": 225
-    },
-    {
-      "epoch": 1.6347197106690778,
-      "grad_norm": 0.12442605197429657,
-      "learning_rate": 1.845018450184502e-05,
-      "loss": 0.3606,
-      "step": 226
-    },
-    {
-      "epoch": 1.6419529837251357,
-      "grad_norm": 0.12413132935762405,
-      "learning_rate": 1.8081180811808117e-05,
-      "loss": 0.3164,
-      "step": 227
-    },
-    {
-      "epoch": 1.6491862567811935,
-      "grad_norm": 0.1774081587791443,
-      "learning_rate": 1.771217712177122e-05,
-      "loss": 0.3408,
-      "step": 228
-    },
-    {
-      "epoch": 1.6564195298372515,
-      "grad_norm": 0.12615852057933807,
-      "learning_rate": 1.734317343173432e-05,
-      "loss": 0.3433,
-      "step": 229
-    },
-    {
-      "epoch": 1.663652802893309,
-      "grad_norm": 0.1367713063955307,
-      "learning_rate": 1.6974169741697417e-05,
-      "loss": 0.3642,
-      "step": 230
-    },
-    {
-      "epoch": 1.6708860759493671,
-      "grad_norm": 0.12680459022521973,
-      "learning_rate": 1.6605166051660518e-05,
-      "loss": 0.3828,
-      "step": 231
-    },
-    {
-      "epoch": 1.678119349005425,
-      "grad_norm": 0.12927737832069397,
-      "learning_rate": 1.6236162361623615e-05,
-      "loss": 0.3253,
-      "step": 232
-    },
-    {
-      "epoch": 1.6853526220614827,
-      "grad_norm": 0.11796507984399796,
-      "learning_rate": 1.5867158671586716e-05,
-      "loss": 0.3763,
-      "step": 233
-    },
-    {
-      "epoch": 1.6925858951175408,
-      "grad_norm": 0.12181632965803146,
-      "learning_rate": 1.5498154981549817e-05,
-      "loss": 0.3311,
-      "step": 234
-    },
-    {
-      "epoch": 1.6998191681735986,
-      "grad_norm": 0.11845839768648148,
-      "learning_rate": 1.5129151291512916e-05,
-      "loss": 0.3718,
-      "step": 235
-    },
-    {
-      "epoch": 1.7070524412296564,
-      "grad_norm": 0.11736506223678589,
-      "learning_rate": 1.4760147601476015e-05,
-      "loss": 0.3225,
-      "step": 236
-    },
-    {
-      "epoch": 1.7142857142857144,
-      "grad_norm": 0.12600649893283844,
-      "learning_rate": 1.4391143911439114e-05,
-      "loss": 0.3309,
-      "step": 237
-    },
-    {
-      "epoch": 1.721518987341772,
-      "grad_norm": 0.12421372532844543,
-      "learning_rate": 1.4022140221402214e-05,
-      "loss": 0.3516,
-      "step": 238
-    },
-    {
-      "epoch": 1.72875226039783,
-      "grad_norm": 0.1250220090150833,
-      "learning_rate": 1.3653136531365315e-05,
-      "loss": 0.3634,
-      "step": 239
-    },
-    {
-      "epoch": 1.7359855334538878,
-      "grad_norm": 0.12365727126598358,
-      "learning_rate": 1.3284132841328414e-05,
-      "loss": 0.3827,
-      "step": 240
-    },
-    {
-      "epoch": 1.7432188065099457,
-      "grad_norm": 0.12409546226263046,
-      "learning_rate": 1.2915129151291513e-05,
-      "loss": 0.3443,
-      "step": 241
-    },
-    {
-      "epoch": 1.7504520795660037,
-      "grad_norm": 0.1293025016784668,
-      "learning_rate": 1.2546125461254612e-05,
-      "loss": 0.3284,
-      "step": 242
-    },
-    {
-      "epoch": 1.7576853526220615,
-      "grad_norm": 0.12537458539009094,
-      "learning_rate": 1.2177121771217713e-05,
-      "loss": 0.3196,
-      "step": 243
-    },
-    {
-      "epoch": 1.7649186256781193,
-      "grad_norm": 0.13035526871681213,
-      "learning_rate": 1.1808118081180812e-05,
-      "loss": 0.3114,
-      "step": 244
-    },
-    {
-      "epoch": 1.7721518987341773,
-      "grad_norm": 0.15101519227027893,
-      "learning_rate": 1.1439114391143913e-05,
-      "loss": 0.3607,
-      "step": 245
-    },
-    {
-      "epoch": 1.779385171790235,
-      "grad_norm": 0.12607994675636292,
-      "learning_rate": 1.1070110701107012e-05,
-      "loss": 0.3202,
-      "step": 246
-    },
-    {
-      "epoch": 1.786618444846293,
-      "grad_norm": 0.12627242505550385,
-      "learning_rate": 1.070110701107011e-05,
-      "loss": 0.3394,
-      "step": 247
-    },
-    {
-      "epoch": 1.7938517179023508,
-      "grad_norm": 0.12351588159799576,
-      "learning_rate": 1.033210332103321e-05,
-      "loss": 0.3222,
-      "step": 248
-    },
-    {
-      "epoch": 1.8010849909584086,
-      "grad_norm": 0.12709592282772064,
-      "learning_rate": 9.96309963099631e-06,
-      "loss": 0.3392,
-      "step": 249
-    },
-    {
-      "epoch": 1.8083182640144666,
-      "grad_norm": 0.20409362018108368,
-      "learning_rate": 9.59409594095941e-06,
-      "loss": 0.3541,
-      "step": 250
-    },
-    {
-      "epoch": 1.8155515370705244,
-      "grad_norm": 0.13211952149868011,
-      "learning_rate": 9.22509225092251e-06,
-      "loss": 0.3798,
-      "step": 251
-    },
-    {
-      "epoch": 1.8227848101265822,
-      "grad_norm": 0.1471939980983734,
-      "learning_rate": 8.85608856088561e-06,
-      "loss": 0.3716,
-      "step": 252
-    },
-    {
-      "epoch": 1.8300180831826403,
-      "grad_norm": 0.12940147519111633,
-      "learning_rate": 8.487084870848708e-06,
-      "loss": 0.3324,
-      "step": 253
-    },
-    {
-      "epoch": 1.837251356238698,
-      "grad_norm": 0.1352042704820633,
-      "learning_rate": 8.118081180811808e-06,
-      "loss": 0.357,
-      "step": 254
-    },
-    {
-      "epoch": 1.8444846292947559,
-      "grad_norm": 0.12222684174776077,
-      "learning_rate": 7.749077490774908e-06,
-      "loss": 0.3262,
-      "step": 255
-    },
-    {
-      "epoch": 1.851717902350814,
-      "grad_norm": 0.12854433059692383,
-      "learning_rate": 7.380073800738008e-06,
-      "loss": 0.3452,
-      "step": 256
-    },
-    {
-      "epoch": 1.8589511754068715,
-      "grad_norm": 0.1557794213294983,
-      "learning_rate": 7.011070110701107e-06,
-      "loss": 0.3443,
-      "step": 257
-    },
-    {
-      "epoch": 1.8661844484629295,
-      "grad_norm": 0.12235873192548752,
-      "learning_rate": 6.642066420664207e-06,
-      "loss": 0.3185,
-      "step": 258
-    },
-    {
-      "epoch": 1.8734177215189873,
-      "grad_norm": 0.12504766881465912,
-      "learning_rate": 6.273062730627306e-06,
-      "loss": 0.356,
-      "step": 259
-    },
-    {
-      "epoch": 1.8806509945750451,
-      "grad_norm": 0.1318463236093521,
-      "learning_rate": 5.904059040590406e-06,
-      "loss": 0.3276,
-      "step": 260
-    },
-    {
-      "epoch": 1.8878842676311032,
-      "grad_norm": 0.12830232083797455,
-      "learning_rate": 5.535055350553506e-06,
-      "loss": 0.3242,
-      "step": 261
-    },
-    {
-      "epoch": 1.895117540687161,
-      "grad_norm": 0.12111414223909378,
-      "learning_rate": 5.166051660516605e-06,
-      "loss": 0.3703,
-      "step": 262
-    },
-    {
-      "epoch": 1.9023508137432188,
-      "grad_norm": 0.12544532120227814,
-      "learning_rate": 4.797047970479705e-06,
-      "loss": 0.3375,
-      "step": 263
-    },
-    {
-      "epoch": 1.9095840867992768,
-      "grad_norm": 0.12667147815227509,
-      "learning_rate": 4.428044280442805e-06,
-      "loss": 0.326,
-      "step": 264
-    },
-    {
-      "epoch": 1.9168173598553344,
-      "grad_norm": 0.11932243406772614,
-      "learning_rate": 4.059040590405904e-06,
-      "loss": 0.372,
-      "step": 265
-    },
-    {
-      "epoch": 1.9240506329113924,
-      "grad_norm": 0.12806957960128784,
-      "learning_rate": 3.690036900369004e-06,
-      "loss": 0.3406,
-      "step": 266
-    },
-    {
-      "epoch": 1.9312839059674503,
-      "grad_norm": 0.11929921805858612,
-      "learning_rate": 3.3210332103321034e-06,
-      "loss": 0.3481,
-      "step": 267
-    },
-    {
-      "epoch": 1.938517179023508,
-      "grad_norm": 0.12515687942504883,
-      "learning_rate": 2.952029520295203e-06,
-      "loss": 0.345,
-      "step": 268
-    },
-    {
-      "epoch": 1.945750452079566,
-      "grad_norm": 0.11791153252124786,
-      "learning_rate": 2.5830258302583027e-06,
-      "loss": 0.3297,
-      "step": 269
-    },
-    {
-      "epoch": 1.952983725135624,
-      "grad_norm": 0.13056673109531403,
-      "learning_rate": 2.2140221402214023e-06,
-      "loss": 0.3939,
-      "step": 270
-    },
-    {
-      "epoch": 1.9602169981916817,
-      "grad_norm": 0.13385014235973358,
-      "learning_rate": 1.845018450184502e-06,
-      "loss": 0.3902,
-      "step": 271
-    },
-    {
-      "epoch": 1.9674502712477397,
-      "grad_norm": 0.1214594915509224,
-      "learning_rate": 1.4760147601476015e-06,
-      "loss": 0.3336,
-      "step": 272
-    },
-    {
-      "epoch": 1.9746835443037973,
-      "grad_norm": 0.1306677609682083,
-      "learning_rate": 1.1070110701107011e-06,
-      "loss": 0.3614,
-      "step": 273
-    },
-    {
-      "epoch": 1.9819168173598554,
-      "grad_norm": 0.12312816828489304,
-      "learning_rate": 7.380073800738008e-07,
-      "loss": 0.3337,
-      "step": 274
-    },
-    {
-      "epoch": 1.9891500904159132,
-      "grad_norm": 0.11654796451330185,
-      "learning_rate": 3.690036900369004e-07,
-      "loss": 0.3406,
-      "step": 275
-    },
-    {
-      "epoch": 1.996383363471971,
-      "grad_norm": 0.12927745282649994,
-      "learning_rate": 0.0,
-      "loss": 0.3392,
-      "step": 276
     }
   ],
   "logging_steps": 1,
@@ -1953,12 +1421,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": true
       },
       "attributes": {}
     }
   },
-  "total_flos": 1.1916352503043277e+19,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 1.4466546112115732,
   "eval_steps": 1000,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 2.8044280442804427e-05,
       "loss": 0.3215,
       "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": false
       },
       "attributes": {}
     }
   },
+  "total_flos": 8.61836422398301e+18,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null