PEFT
tktung's picture
Upload folder using huggingface_hub
3c375d0 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9990884229717412,
"eval_steps": 500,
"global_step": 274,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0036463081130355514,
"grad_norm": 1.5564329067676301,
"learning_rate": 1.818181818181818e-06,
"loss": 1.699,
"step": 1
},
{
"epoch": 0.007292616226071103,
"grad_norm": 44902.29063456451,
"learning_rate": 3.636363636363636e-06,
"loss": 1.7642,
"step": 2
},
{
"epoch": 0.014585232452142206,
"grad_norm": 1.8353451125445739,
"learning_rate": 7.272727272727272e-06,
"loss": 1.7172,
"step": 4
},
{
"epoch": 0.02187784867821331,
"grad_norm": 1.8226533138713261,
"learning_rate": 1.0909090909090909e-05,
"loss": 1.7472,
"step": 6
},
{
"epoch": 0.02917046490428441,
"grad_norm": 51.31275799965548,
"learning_rate": 1.4545454545454545e-05,
"loss": 1.6743,
"step": 8
},
{
"epoch": 0.03646308113035551,
"grad_norm": 2.4177896118244924,
"learning_rate": 1.8181818181818182e-05,
"loss": 1.6505,
"step": 10
},
{
"epoch": 0.04375569735642662,
"grad_norm": 1.4056780909392894,
"learning_rate": 2.1818181818181818e-05,
"loss": 1.662,
"step": 12
},
{
"epoch": 0.05104831358249772,
"grad_norm": 1.123663560927214,
"learning_rate": 2.5454545454545454e-05,
"loss": 1.6014,
"step": 14
},
{
"epoch": 0.05834092980856882,
"grad_norm": 0.660170892307739,
"learning_rate": 2.909090909090909e-05,
"loss": 1.5094,
"step": 16
},
{
"epoch": 0.06563354603463993,
"grad_norm": 0.42755156023092894,
"learning_rate": 3.272727272727273e-05,
"loss": 1.5272,
"step": 18
},
{
"epoch": 0.07292616226071102,
"grad_norm": 0.3829807576012777,
"learning_rate": 3.6363636363636364e-05,
"loss": 1.4067,
"step": 20
},
{
"epoch": 0.08021877848678213,
"grad_norm": 0.41242113134912567,
"learning_rate": 4e-05,
"loss": 1.3437,
"step": 22
},
{
"epoch": 0.08751139471285324,
"grad_norm": 0.4213871682292563,
"learning_rate": 4.3636363636363636e-05,
"loss": 1.3318,
"step": 24
},
{
"epoch": 0.09480401093892434,
"grad_norm": 0.3915162267949715,
"learning_rate": 4.7272727272727275e-05,
"loss": 1.3009,
"step": 26
},
{
"epoch": 0.10209662716499544,
"grad_norm": 0.37294398118323746,
"learning_rate": 5.090909090909091e-05,
"loss": 1.2709,
"step": 28
},
{
"epoch": 0.10938924339106655,
"grad_norm": 0.3180941760614004,
"learning_rate": 5.4545454545454546e-05,
"loss": 1.1872,
"step": 30
},
{
"epoch": 0.11668185961713765,
"grad_norm": 0.22743090816603856,
"learning_rate": 5.818181818181818e-05,
"loss": 1.1793,
"step": 32
},
{
"epoch": 0.12397447584320875,
"grad_norm": 0.15136600844382636,
"learning_rate": 6.181818181818182e-05,
"loss": 1.137,
"step": 34
},
{
"epoch": 0.13126709206927986,
"grad_norm": 0.14922343265678947,
"learning_rate": 6.545454545454546e-05,
"loss": 1.1545,
"step": 36
},
{
"epoch": 0.13855970829535097,
"grad_norm": 0.11950915139776307,
"learning_rate": 6.90909090909091e-05,
"loss": 1.0923,
"step": 38
},
{
"epoch": 0.14585232452142205,
"grad_norm": 0.12975286429339442,
"learning_rate": 7.272727272727273e-05,
"loss": 1.0946,
"step": 40
},
{
"epoch": 0.15314494074749316,
"grad_norm": 0.11636352912910496,
"learning_rate": 7.636363636363637e-05,
"loss": 1.0651,
"step": 42
},
{
"epoch": 0.16043755697356427,
"grad_norm": 0.12013810386339561,
"learning_rate": 8e-05,
"loss": 1.0725,
"step": 44
},
{
"epoch": 0.16773017319963537,
"grad_norm": 0.09545625176758893,
"learning_rate": 8.363636363636364e-05,
"loss": 1.1092,
"step": 46
},
{
"epoch": 0.17502278942570648,
"grad_norm": 0.08240688428637283,
"learning_rate": 8.727272727272727e-05,
"loss": 1.0449,
"step": 48
},
{
"epoch": 0.18231540565177756,
"grad_norm": 0.08336026564156031,
"learning_rate": 9.090909090909092e-05,
"loss": 1.053,
"step": 50
},
{
"epoch": 0.18960802187784867,
"grad_norm": 0.08733239972775936,
"learning_rate": 9.454545454545455e-05,
"loss": 1.0629,
"step": 52
},
{
"epoch": 0.19690063810391978,
"grad_norm": 0.07845494536138678,
"learning_rate": 9.818181818181818e-05,
"loss": 1.0642,
"step": 54
},
{
"epoch": 0.2041932543299909,
"grad_norm": 0.0792882467275035,
"learning_rate": 9.999977231314127e-05,
"loss": 1.0824,
"step": 56
},
{
"epoch": 0.211485870556062,
"grad_norm": 0.07831448467807114,
"learning_rate": 9.999795083071328e-05,
"loss": 1.0554,
"step": 58
},
{
"epoch": 0.2187784867821331,
"grad_norm": 0.09325192568500952,
"learning_rate": 9.999430793221355e-05,
"loss": 1.0361,
"step": 60
},
{
"epoch": 0.22607110300820418,
"grad_norm": 0.08001023877218082,
"learning_rate": 9.998884375035221e-05,
"loss": 1.0501,
"step": 62
},
{
"epoch": 0.2333637192342753,
"grad_norm": 0.08525503053749453,
"learning_rate": 9.99815584841884e-05,
"loss": 1.0421,
"step": 64
},
{
"epoch": 0.2406563354603464,
"grad_norm": 0.18952595666861716,
"learning_rate": 9.997245239912299e-05,
"loss": 1.0598,
"step": 66
},
{
"epoch": 0.2479489516864175,
"grad_norm": 0.5356478609043368,
"learning_rate": 9.996152582688898e-05,
"loss": 1.0195,
"step": 68
},
{
"epoch": 0.2552415679124886,
"grad_norm": 0.07439723408480747,
"learning_rate": 9.994877916553938e-05,
"loss": 1.0119,
"step": 70
},
{
"epoch": 0.2625341841385597,
"grad_norm": 0.08769208744835676,
"learning_rate": 9.993421287943269e-05,
"loss": 1.0367,
"step": 72
},
{
"epoch": 0.2698268003646308,
"grad_norm": 0.07727464499454006,
"learning_rate": 9.991782749921601e-05,
"loss": 1.0222,
"step": 74
},
{
"epoch": 0.27711941659070194,
"grad_norm": 0.07684989155110282,
"learning_rate": 9.98996236218057e-05,
"loss": 1.0291,
"step": 76
},
{
"epoch": 0.284412032816773,
"grad_norm": 0.074797273929808,
"learning_rate": 9.987960191036562e-05,
"loss": 1.0265,
"step": 78
},
{
"epoch": 0.2917046490428441,
"grad_norm": 0.07808867527853167,
"learning_rate": 9.985776309428305e-05,
"loss": 1.0394,
"step": 80
},
{
"epoch": 0.29899726526891524,
"grad_norm": 0.08366367602394825,
"learning_rate": 9.983410796914196e-05,
"loss": 0.9918,
"step": 82
},
{
"epoch": 0.3062898814949863,
"grad_norm": 0.08235936275195418,
"learning_rate": 9.98086373966942e-05,
"loss": 1.0093,
"step": 84
},
{
"epoch": 0.31358249772105745,
"grad_norm": 0.07114879422193107,
"learning_rate": 9.978135230482797e-05,
"loss": 1.0331,
"step": 86
},
{
"epoch": 0.32087511394712853,
"grad_norm": 0.0789659528845,
"learning_rate": 9.975225368753412e-05,
"loss": 1.0083,
"step": 88
},
{
"epoch": 0.3281677301731996,
"grad_norm": 0.12722043213844833,
"learning_rate": 9.972134260486988e-05,
"loss": 0.9868,
"step": 90
},
{
"epoch": 0.33546034639927075,
"grad_norm": 0.07460212363414274,
"learning_rate": 9.968862018292026e-05,
"loss": 0.9787,
"step": 92
},
{
"epoch": 0.34275296262534183,
"grad_norm": 0.0794228233950649,
"learning_rate": 9.965408761375701e-05,
"loss": 0.9983,
"step": 94
},
{
"epoch": 0.35004557885141296,
"grad_norm": 0.07805234354253066,
"learning_rate": 9.961774615539522e-05,
"loss": 0.9712,
"step": 96
},
{
"epoch": 0.35733819507748404,
"grad_norm": 0.07531557083466454,
"learning_rate": 9.957959713174748e-05,
"loss": 0.9888,
"step": 98
},
{
"epoch": 0.3646308113035551,
"grad_norm": 0.07667315677213174,
"learning_rate": 9.953964193257563e-05,
"loss": 0.9782,
"step": 100
},
{
"epoch": 0.37192342752962626,
"grad_norm": 0.0769553697010856,
"learning_rate": 9.949788201344019e-05,
"loss": 0.9615,
"step": 102
},
{
"epoch": 0.37921604375569734,
"grad_norm": 0.08212042393870912,
"learning_rate": 9.945431889564723e-05,
"loss": 0.9755,
"step": 104
},
{
"epoch": 0.3865086599817685,
"grad_norm": 0.08314540306193914,
"learning_rate": 9.940895416619309e-05,
"loss": 1.0069,
"step": 106
},
{
"epoch": 0.39380127620783956,
"grad_norm": 0.07970050714561497,
"learning_rate": 9.936178947770641e-05,
"loss": 0.9686,
"step": 108
},
{
"epoch": 0.4010938924339107,
"grad_norm": 0.2747778373047559,
"learning_rate": 9.931282654838803e-05,
"loss": 0.9878,
"step": 110
},
{
"epoch": 0.4083865086599818,
"grad_norm": 0.08106771126002589,
"learning_rate": 9.926206716194842e-05,
"loss": 0.9948,
"step": 112
},
{
"epoch": 0.41567912488605285,
"grad_norm": 0.08133105923791639,
"learning_rate": 9.920951316754259e-05,
"loss": 0.9621,
"step": 114
},
{
"epoch": 0.422971741112124,
"grad_norm": 0.07751316025392309,
"learning_rate": 9.915516647970282e-05,
"loss": 1.009,
"step": 116
},
{
"epoch": 0.43026435733819507,
"grad_norm": 0.07429775187060293,
"learning_rate": 9.909902907826884e-05,
"loss": 0.9564,
"step": 118
},
{
"epoch": 0.4375569735642662,
"grad_norm": 0.06885396447375916,
"learning_rate": 9.904110300831577e-05,
"loss": 0.9516,
"step": 120
},
{
"epoch": 0.4448495897903373,
"grad_norm": 0.11911641611028377,
"learning_rate": 9.898139038007961e-05,
"loss": 0.9501,
"step": 122
},
{
"epoch": 0.45214220601640837,
"grad_norm": 0.08134204746867607,
"learning_rate": 9.891989336888032e-05,
"loss": 0.9787,
"step": 124
},
{
"epoch": 0.4594348222424795,
"grad_norm": 0.06703677306759141,
"learning_rate": 9.88566142150426e-05,
"loss": 0.9383,
"step": 126
},
{
"epoch": 0.4667274384685506,
"grad_norm": 0.08875939081498214,
"learning_rate": 9.87915552238143e-05,
"loss": 0.9565,
"step": 128
},
{
"epoch": 0.4740200546946217,
"grad_norm": 0.06975398482601901,
"learning_rate": 9.872471876528236e-05,
"loss": 0.9351,
"step": 130
},
{
"epoch": 0.4813126709206928,
"grad_norm": 0.08668686890512382,
"learning_rate": 9.865610727428661e-05,
"loss": 0.9619,
"step": 132
},
{
"epoch": 0.4886052871467639,
"grad_norm": 0.07299762770901298,
"learning_rate": 9.858572325033089e-05,
"loss": 0.9666,
"step": 134
},
{
"epoch": 0.495897903372835,
"grad_norm": 0.22619188588419292,
"learning_rate": 9.851356925749217e-05,
"loss": 0.9564,
"step": 136
},
{
"epoch": 0.5031905195989061,
"grad_norm": 0.06929553898073587,
"learning_rate": 9.843964792432702e-05,
"loss": 0.9416,
"step": 138
},
{
"epoch": 0.5104831358249772,
"grad_norm": 0.07296219603236209,
"learning_rate": 9.836396194377586e-05,
"loss": 0.9606,
"step": 140
},
{
"epoch": 0.5177757520510483,
"grad_norm": 0.17258152427782444,
"learning_rate": 9.828651407306495e-05,
"loss": 0.9405,
"step": 142
},
{
"epoch": 0.5250683682771194,
"grad_norm": 0.08048053517750381,
"learning_rate": 9.820730713360584e-05,
"loss": 0.9308,
"step": 144
},
{
"epoch": 0.5323609845031905,
"grad_norm": 0.07478421518396797,
"learning_rate": 9.812634401089265e-05,
"loss": 0.9433,
"step": 146
},
{
"epoch": 0.5396536007292616,
"grad_norm": 0.0744608512059185,
"learning_rate": 9.804362765439688e-05,
"loss": 0.9545,
"step": 148
},
{
"epoch": 0.5469462169553327,
"grad_norm": 0.07446299592606315,
"learning_rate": 9.795916107746009e-05,
"loss": 0.925,
"step": 150
},
{
"epoch": 0.5542388331814039,
"grad_norm": 0.07071717466745271,
"learning_rate": 9.787294735718397e-05,
"loss": 0.9173,
"step": 152
},
{
"epoch": 0.5615314494074749,
"grad_norm": 0.0701984606480579,
"learning_rate": 9.778498963431837e-05,
"loss": 0.9082,
"step": 154
},
{
"epoch": 0.568824065633546,
"grad_norm": 0.07669702930168845,
"learning_rate": 9.769529111314682e-05,
"loss": 0.9497,
"step": 156
},
{
"epoch": 0.5761166818596172,
"grad_norm": 0.11697404287375447,
"learning_rate": 9.76038550613698e-05,
"loss": 0.9144,
"step": 158
},
{
"epoch": 0.5834092980856882,
"grad_norm": 0.07825369109874582,
"learning_rate": 9.75106848099857e-05,
"loss": 0.8793,
"step": 160
},
{
"epoch": 0.5907019143117593,
"grad_norm": 1.1005245093691354,
"learning_rate": 9.741578375316952e-05,
"loss": 0.9145,
"step": 162
},
{
"epoch": 0.5979945305378305,
"grad_norm": 0.08370330168443421,
"learning_rate": 9.731915534814912e-05,
"loss": 0.9511,
"step": 164
},
{
"epoch": 0.6052871467639015,
"grad_norm": 0.08122736176843745,
"learning_rate": 9.722080311507937e-05,
"loss": 0.9584,
"step": 166
},
{
"epoch": 0.6125797629899726,
"grad_norm": 0.07648634858832534,
"learning_rate": 9.712073063691386e-05,
"loss": 0.9262,
"step": 168
},
{
"epoch": 0.6198723792160438,
"grad_norm": 0.08053849014890098,
"learning_rate": 9.701894155927445e-05,
"loss": 0.9313,
"step": 170
},
{
"epoch": 0.6271649954421149,
"grad_norm": 0.07210396245886411,
"learning_rate": 9.69154395903183e-05,
"loss": 0.9174,
"step": 172
},
{
"epoch": 0.6344576116681859,
"grad_norm": 0.07286947087570095,
"learning_rate": 9.681022850060296e-05,
"loss": 0.8893,
"step": 174
},
{
"epoch": 0.6417502278942571,
"grad_norm": 0.06923686161728522,
"learning_rate": 9.670331212294889e-05,
"loss": 0.9395,
"step": 176
},
{
"epoch": 0.6490428441203282,
"grad_norm": 0.0673865610450944,
"learning_rate": 9.659469435229992e-05,
"loss": 0.91,
"step": 178
},
{
"epoch": 0.6563354603463992,
"grad_norm": 0.06948906848590711,
"learning_rate": 9.648437914558124e-05,
"loss": 0.9168,
"step": 180
},
{
"epoch": 0.6636280765724704,
"grad_norm": 0.07289843532121563,
"learning_rate": 9.63723705215554e-05,
"loss": 0.9552,
"step": 182
},
{
"epoch": 0.6709206927985415,
"grad_norm": 0.06990802284713049,
"learning_rate": 9.625867256067578e-05,
"loss": 0.9033,
"step": 184
},
{
"epoch": 0.6782133090246126,
"grad_norm": 0.07555680912326869,
"learning_rate": 9.614328940493798e-05,
"loss": 0.9206,
"step": 186
},
{
"epoch": 0.6855059252506837,
"grad_norm": 0.07743540693217674,
"learning_rate": 9.602622525772895e-05,
"loss": 0.9005,
"step": 188
},
{
"epoch": 0.6927985414767548,
"grad_norm": 0.07177992266999077,
"learning_rate": 9.590748438367388e-05,
"loss": 0.9041,
"step": 190
},
{
"epoch": 0.7000911577028259,
"grad_norm": 0.06957828378926412,
"learning_rate": 9.578707110848078e-05,
"loss": 0.8957,
"step": 192
},
{
"epoch": 0.707383773928897,
"grad_norm": 0.06622017193744313,
"learning_rate": 9.56649898187829e-05,
"loss": 0.9108,
"step": 194
},
{
"epoch": 0.7146763901549681,
"grad_norm": 0.06939738922827116,
"learning_rate": 9.554124496197898e-05,
"loss": 0.9467,
"step": 196
},
{
"epoch": 0.7219690063810392,
"grad_norm": 0.06510687273754218,
"learning_rate": 9.54158410460712e-05,
"loss": 0.9101,
"step": 198
},
{
"epoch": 0.7292616226071102,
"grad_norm": 0.12122407982819745,
"learning_rate": 9.528878263950094e-05,
"loss": 0.9271,
"step": 200
},
{
"epoch": 0.7365542388331814,
"grad_norm": 0.0703901780763089,
"learning_rate": 9.516007437098237e-05,
"loss": 0.9162,
"step": 202
},
{
"epoch": 0.7438468550592525,
"grad_norm": 0.13768411678101625,
"learning_rate": 9.502972092933384e-05,
"loss": 0.8917,
"step": 204
},
{
"epoch": 0.7511394712853237,
"grad_norm": 0.13103063463158354,
"learning_rate": 9.489772706330706e-05,
"loss": 0.9074,
"step": 206
},
{
"epoch": 0.7584320875113947,
"grad_norm": 0.07056505425629164,
"learning_rate": 9.476409758141405e-05,
"loss": 0.9288,
"step": 208
},
{
"epoch": 0.7657247037374658,
"grad_norm": 0.07383435280021707,
"learning_rate": 9.462883735175205e-05,
"loss": 0.9059,
"step": 210
},
{
"epoch": 0.773017319963537,
"grad_norm": 0.1015687322518938,
"learning_rate": 9.449195130182613e-05,
"loss": 0.9193,
"step": 212
},
{
"epoch": 0.780309936189608,
"grad_norm": 0.07104451767439372,
"learning_rate": 9.435344441836968e-05,
"loss": 0.9097,
"step": 214
},
{
"epoch": 0.7876025524156791,
"grad_norm": 0.06852107601664209,
"learning_rate": 9.42133217471628e-05,
"loss": 0.9126,
"step": 216
},
{
"epoch": 0.7948951686417502,
"grad_norm": 0.07368568637779982,
"learning_rate": 9.407158839284835e-05,
"loss": 0.945,
"step": 218
},
{
"epoch": 0.8021877848678214,
"grad_norm": 0.0648178325805628,
"learning_rate": 9.392824951874617e-05,
"loss": 0.8912,
"step": 220
},
{
"epoch": 0.8094804010938924,
"grad_norm": 0.06456581944586823,
"learning_rate": 9.378331034666484e-05,
"loss": 0.8899,
"step": 222
},
{
"epoch": 0.8167730173199635,
"grad_norm": 0.06900542842249857,
"learning_rate": 9.363677615671148e-05,
"loss": 0.9119,
"step": 224
},
{
"epoch": 0.8240656335460347,
"grad_norm": 0.06967833214883962,
"learning_rate": 9.348865228709947e-05,
"loss": 0.889,
"step": 226
},
{
"epoch": 0.8313582497721057,
"grad_norm": 0.08360540263683244,
"learning_rate": 9.333894413395387e-05,
"loss": 0.865,
"step": 228
},
{
"epoch": 0.8386508659981768,
"grad_norm": 0.07306384866137224,
"learning_rate": 9.318765715111497e-05,
"loss": 0.9074,
"step": 230
},
{
"epoch": 0.845943482224248,
"grad_norm": 0.07245184365008314,
"learning_rate": 9.303479684993942e-05,
"loss": 0.908,
"step": 232
},
{
"epoch": 0.853236098450319,
"grad_norm": 0.08363970365051233,
"learning_rate": 9.288036879909968e-05,
"loss": 0.8873,
"step": 234
},
{
"epoch": 0.8605287146763901,
"grad_norm": 0.07030389813918717,
"learning_rate": 9.272437862438094e-05,
"loss": 0.8869,
"step": 236
},
{
"epoch": 0.8678213309024613,
"grad_norm": 0.08293652025190876,
"learning_rate": 9.256683200847638e-05,
"loss": 0.871,
"step": 238
},
{
"epoch": 0.8751139471285324,
"grad_norm": 0.07471078064157549,
"learning_rate": 9.240773469077993e-05,
"loss": 0.8742,
"step": 240
},
{
"epoch": 0.8824065633546034,
"grad_norm": 0.07710454753144116,
"learning_rate": 9.22470924671774e-05,
"loss": 0.8743,
"step": 242
},
{
"epoch": 0.8896991795806746,
"grad_norm": 0.06889403226940291,
"learning_rate": 9.208491118983514e-05,
"loss": 0.8367,
"step": 244
},
{
"epoch": 0.8969917958067457,
"grad_norm": 0.07079796419035034,
"learning_rate": 9.192119676698703e-05,
"loss": 0.8699,
"step": 246
},
{
"epoch": 0.9042844120328167,
"grad_norm": 0.08107811116365189,
"learning_rate": 9.17559551627191e-05,
"loss": 0.8794,
"step": 248
},
{
"epoch": 0.9115770282588879,
"grad_norm": 0.07964800640935288,
"learning_rate": 9.158919239675236e-05,
"loss": 0.9364,
"step": 250
},
{
"epoch": 0.918869644484959,
"grad_norm": 0.07802559563260855,
"learning_rate": 9.14209145442234e-05,
"loss": 0.8561,
"step": 252
},
{
"epoch": 0.92616226071103,
"grad_norm": 0.08673021093662099,
"learning_rate": 9.125112773546315e-05,
"loss": 0.8854,
"step": 254
},
{
"epoch": 0.9334548769371012,
"grad_norm": 0.073859691158192,
"learning_rate": 9.107983815577359e-05,
"loss": 0.8949,
"step": 256
},
{
"epoch": 0.9407474931631723,
"grad_norm": 0.06585824434560679,
"learning_rate": 9.090705204520231e-05,
"loss": 0.8642,
"step": 258
},
{
"epoch": 0.9480401093892434,
"grad_norm": 0.07928660195604606,
"learning_rate": 9.073277569831526e-05,
"loss": 0.8936,
"step": 260
},
{
"epoch": 0.9553327256153145,
"grad_norm": 0.07629458882620191,
"learning_rate": 9.05570154639674e-05,
"loss": 0.889,
"step": 262
},
{
"epoch": 0.9626253418413856,
"grad_norm": 0.12177477305372565,
"learning_rate": 9.03797777450715e-05,
"loss": 0.8869,
"step": 264
},
{
"epoch": 0.9699179580674567,
"grad_norm": 0.07512333956042923,
"learning_rate": 9.020106899836472e-05,
"loss": 0.8821,
"step": 266
},
{
"epoch": 0.9772105742935278,
"grad_norm": 0.07673096729175252,
"learning_rate": 9.002089573417356e-05,
"loss": 0.8406,
"step": 268
},
{
"epoch": 0.9845031905195989,
"grad_norm": 0.13567692134785442,
"learning_rate": 8.983926451617664e-05,
"loss": 0.8644,
"step": 270
},
{
"epoch": 0.99179580674567,
"grad_norm": 0.08759393012824235,
"learning_rate": 8.965618196116549e-05,
"loss": 0.844,
"step": 272
},
{
"epoch": 0.9990884229717412,
"grad_norm": 0.08369780561981159,
"learning_rate": 8.947165473880363e-05,
"loss": 0.8516,
"step": 274
}
],
"logging_steps": 2,
"max_steps": 1096,
"num_input_tokens_seen": 0,
"num_train_epochs": 4,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 1698152759427072.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}