Mistral-7b-uncensored-sft-lora / trainer_state.json
jdqqjr's picture
init
00e6106
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 7.988183161004431,
"eval_steps": 500,
"global_step": 1352,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.059084194977843424,
"grad_norm": 0.6387189059092414,
"learning_rate": 9.998650208062712e-05,
"loss": 0.6972,
"step": 10
},
{
"epoch": 0.11816838995568685,
"grad_norm": 0.48601036335500336,
"learning_rate": 9.994601561026155e-05,
"loss": 0.5423,
"step": 20
},
{
"epoch": 0.17725258493353027,
"grad_norm": 0.40437591619558344,
"learning_rate": 9.98785624482278e-05,
"loss": 0.5149,
"step": 30
},
{
"epoch": 0.2363367799113737,
"grad_norm": 0.5030211090458666,
"learning_rate": 9.978417901361958e-05,
"loss": 0.513,
"step": 40
},
{
"epoch": 0.29542097488921715,
"grad_norm": 0.49098083558596206,
"learning_rate": 9.96629162656365e-05,
"loss": 0.4957,
"step": 50
},
{
"epoch": 0.35450516986706054,
"grad_norm": 0.6329012343929873,
"learning_rate": 9.951483967607041e-05,
"loss": 0.495,
"step": 60
},
{
"epoch": 0.413589364844904,
"grad_norm": 0.48252149699962754,
"learning_rate": 9.934002919395592e-05,
"loss": 0.4943,
"step": 70
},
{
"epoch": 0.4726735598227474,
"grad_norm": 0.5221971068930835,
"learning_rate": 9.91385792024048e-05,
"loss": 0.4738,
"step": 80
},
{
"epoch": 0.5317577548005908,
"grad_norm": 0.42549092560536134,
"learning_rate": 9.891059846764679e-05,
"loss": 0.4565,
"step": 90
},
{
"epoch": 0.5908419497784343,
"grad_norm": 0.4606171719652433,
"learning_rate": 9.865621008030492e-05,
"loss": 0.4674,
"step": 100
},
{
"epoch": 0.6499261447562777,
"grad_norm": 0.46353540699946943,
"learning_rate": 9.83755513889369e-05,
"loss": 0.4727,
"step": 110
},
{
"epoch": 0.7090103397341211,
"grad_norm": 0.4815928480268326,
"learning_rate": 9.80687739258782e-05,
"loss": 0.4736,
"step": 120
},
{
"epoch": 0.7680945347119645,
"grad_norm": 0.485838392040906,
"learning_rate": 9.773604332542729e-05,
"loss": 0.47,
"step": 130
},
{
"epoch": 0.827178729689808,
"grad_norm": 0.5025850953484241,
"learning_rate": 9.737753923441688e-05,
"loss": 0.467,
"step": 140
},
{
"epoch": 0.8862629246676514,
"grad_norm": 0.45997226542102815,
"learning_rate": 9.69934552152196e-05,
"loss": 0.4522,
"step": 150
},
{
"epoch": 0.9453471196454948,
"grad_norm": 0.4546309634161405,
"learning_rate": 9.658399864124037e-05,
"loss": 0.4613,
"step": 160
},
{
"epoch": 1.0044313146233383,
"grad_norm": 0.6577670848472944,
"learning_rate": 9.61493905849521e-05,
"loss": 0.4641,
"step": 170
},
{
"epoch": 1.0635155096011817,
"grad_norm": 0.5743711541659764,
"learning_rate": 9.568986569853487e-05,
"loss": 0.3946,
"step": 180
},
{
"epoch": 1.122599704579025,
"grad_norm": 0.5374571463070855,
"learning_rate": 9.520567208718337e-05,
"loss": 0.3882,
"step": 190
},
{
"epoch": 1.1816838995568686,
"grad_norm": 0.6531134738909915,
"learning_rate": 9.469707117515067e-05,
"loss": 0.4205,
"step": 200
},
{
"epoch": 1.240768094534712,
"grad_norm": 0.5201345622237219,
"learning_rate": 9.416433756460091e-05,
"loss": 0.386,
"step": 210
},
{
"epoch": 1.2998522895125553,
"grad_norm": 0.7267530776250423,
"learning_rate": 9.360775888734698e-05,
"loss": 0.4096,
"step": 220
},
{
"epoch": 1.3589364844903988,
"grad_norm": 0.5480147887972682,
"learning_rate": 9.302763564955331e-05,
"loss": 0.3921,
"step": 230
},
{
"epoch": 1.4180206794682422,
"grad_norm": 0.6228502280992079,
"learning_rate": 9.242428106948749e-05,
"loss": 0.3788,
"step": 240
},
{
"epoch": 1.4771048744460857,
"grad_norm": 0.7119116813192373,
"learning_rate": 9.179802090840853e-05,
"loss": 0.3894,
"step": 250
},
{
"epoch": 1.536189069423929,
"grad_norm": 0.5987495739217404,
"learning_rate": 9.114919329468282e-05,
"loss": 0.3707,
"step": 260
},
{
"epoch": 1.5952732644017726,
"grad_norm": 0.6278387167291306,
"learning_rate": 9.04781485412231e-05,
"loss": 0.3938,
"step": 270
},
{
"epoch": 1.654357459379616,
"grad_norm": 0.5312391005336979,
"learning_rate": 8.978524895634842e-05,
"loss": 0.3799,
"step": 280
},
{
"epoch": 1.7134416543574593,
"grad_norm": 0.7329625126762797,
"learning_rate": 8.907086864816803e-05,
"loss": 0.403,
"step": 290
},
{
"epoch": 1.7725258493353029,
"grad_norm": 0.5600401734580108,
"learning_rate": 8.833539332259398e-05,
"loss": 0.3758,
"step": 300
},
{
"epoch": 1.8316100443131462,
"grad_norm": 0.503395394356135,
"learning_rate": 8.757922007509207e-05,
"loss": 0.3963,
"step": 310
},
{
"epoch": 1.8906942392909896,
"grad_norm": 0.6324775953771359,
"learning_rate": 8.680275717628337e-05,
"loss": 0.3858,
"step": 320
},
{
"epoch": 1.9497784342688331,
"grad_norm": 0.5574107174736728,
"learning_rate": 8.600642385151205e-05,
"loss": 0.3799,
"step": 330
},
{
"epoch": 2.0088626292466767,
"grad_norm": 0.5250864362886176,
"learning_rate": 8.519065005449858e-05,
"loss": 0.3763,
"step": 340
},
{
"epoch": 2.06794682422452,
"grad_norm": 0.7754155552722259,
"learning_rate": 8.43558762352005e-05,
"loss": 0.2934,
"step": 350
},
{
"epoch": 2.1270310192023634,
"grad_norm": 0.6889925166837503,
"learning_rate": 8.350255310200612e-05,
"loss": 0.3078,
"step": 360
},
{
"epoch": 2.186115214180207,
"grad_norm": 0.6281519102970855,
"learning_rate": 8.263114137838947e-05,
"loss": 0.3028,
"step": 370
},
{
"epoch": 2.24519940915805,
"grad_norm": 0.7619069663070173,
"learning_rate": 8.174211155415799e-05,
"loss": 0.2972,
"step": 380
},
{
"epoch": 2.3042836041358936,
"grad_norm": 0.6469214076306162,
"learning_rate": 8.083594363142717e-05,
"loss": 0.2995,
"step": 390
},
{
"epoch": 2.363367799113737,
"grad_norm": 0.7761443706661119,
"learning_rate": 7.991312686545937e-05,
"loss": 0.2963,
"step": 400
},
{
"epoch": 2.4224519940915803,
"grad_norm": 0.6652219653359559,
"learning_rate": 7.897415950050676e-05,
"loss": 0.2987,
"step": 410
},
{
"epoch": 2.481536189069424,
"grad_norm": 0.7586700003856579,
"learning_rate": 7.801954850080075e-05,
"loss": 0.3092,
"step": 420
},
{
"epoch": 2.5406203840472674,
"grad_norm": 0.6663785963305503,
"learning_rate": 7.704980927683359e-05,
"loss": 0.2951,
"step": 430
},
{
"epoch": 2.5997045790251105,
"grad_norm": 0.6913254608481674,
"learning_rate": 7.60654654070796e-05,
"loss": 0.3097,
"step": 440
},
{
"epoch": 2.658788774002954,
"grad_norm": 0.6943652391419598,
"learning_rate": 7.506704835530634e-05,
"loss": 0.2999,
"step": 450
},
{
"epoch": 2.7178729689807977,
"grad_norm": 0.644096596125316,
"learning_rate": 7.405509718362842e-05,
"loss": 0.2905,
"step": 460
},
{
"epoch": 2.7769571639586412,
"grad_norm": 0.7660517390503399,
"learning_rate": 7.303015826145885e-05,
"loss": 0.309,
"step": 470
},
{
"epoch": 2.8360413589364843,
"grad_norm": 0.7708918433168639,
"learning_rate": 7.199278497051498e-05,
"loss": 0.302,
"step": 480
},
{
"epoch": 2.895125553914328,
"grad_norm": 0.626692120748867,
"learning_rate": 7.094353740603839e-05,
"loss": 0.297,
"step": 490
},
{
"epoch": 2.9542097488921715,
"grad_norm": 0.7926892798861292,
"learning_rate": 6.988298207439021e-05,
"loss": 0.3101,
"step": 500
},
{
"epoch": 2.9542097488921715,
"eval_loss": 0.4939613938331604,
"eval_runtime": 53.5552,
"eval_samples_per_second": 2.073,
"eval_steps_per_second": 0.523,
"step": 500
},
{
"epoch": 3.0132939438700146,
"grad_norm": 0.5747285632843896,
"learning_rate": 6.881169158718474e-05,
"loss": 0.2736,
"step": 510
},
{
"epoch": 3.072378138847858,
"grad_norm": 0.7661842329328333,
"learning_rate": 6.773024435212678e-05,
"loss": 0.2187,
"step": 520
},
{
"epoch": 3.1314623338257017,
"grad_norm": 0.8401289392682185,
"learning_rate": 6.663922426071977e-05,
"loss": 0.2057,
"step": 530
},
{
"epoch": 3.1905465288035453,
"grad_norm": 0.7506650372127406,
"learning_rate": 6.553922037301283e-05,
"loss": 0.2067,
"step": 540
},
{
"epoch": 3.2496307237813884,
"grad_norm": 0.7842941173009434,
"learning_rate": 6.443082659955738e-05,
"loss": 0.1989,
"step": 550
},
{
"epoch": 3.308714918759232,
"grad_norm": 0.7593744868915973,
"learning_rate": 6.331464138074493e-05,
"loss": 0.2179,
"step": 560
},
{
"epoch": 3.3677991137370755,
"grad_norm": 0.756091123285611,
"learning_rate": 6.219126736369903e-05,
"loss": 0.2176,
"step": 570
},
{
"epoch": 3.4268833087149186,
"grad_norm": 0.7869512689224245,
"learning_rate": 6.106131107689599e-05,
"loss": 0.2215,
"step": 580
},
{
"epoch": 3.485967503692762,
"grad_norm": 0.7636724676762149,
"learning_rate": 5.9925382602689974e-05,
"loss": 0.2153,
"step": 590
},
{
"epoch": 3.5450516986706058,
"grad_norm": 0.6815318566179079,
"learning_rate": 5.8784095247919305e-05,
"loss": 0.2133,
"step": 600
},
{
"epoch": 3.604135893648449,
"grad_norm": 0.7848948829508617,
"learning_rate": 5.763806521277184e-05,
"loss": 0.2109,
"step": 610
},
{
"epoch": 3.6632200886262924,
"grad_norm": 0.7715200213470335,
"learning_rate": 5.648791125808809e-05,
"loss": 0.2214,
"step": 620
},
{
"epoch": 3.722304283604136,
"grad_norm": 0.6480973448749833,
"learning_rate": 5.5334254371281934e-05,
"loss": 0.212,
"step": 630
},
{
"epoch": 3.781388478581979,
"grad_norm": 0.6618796234383102,
"learning_rate": 5.417771743105907e-05,
"loss": 0.2196,
"step": 640
},
{
"epoch": 3.8404726735598227,
"grad_norm": 0.9462710180746308,
"learning_rate": 5.3018924871114305e-05,
"loss": 0.2145,
"step": 650
},
{
"epoch": 3.8995568685376663,
"grad_norm": 0.7674586984793125,
"learning_rate": 5.185850234298942e-05,
"loss": 0.2199,
"step": 660
},
{
"epoch": 3.9586410635155094,
"grad_norm": 0.8420752213128357,
"learning_rate": 5.0697076378273354e-05,
"loss": 0.218,
"step": 670
},
{
"epoch": 4.017725258493353,
"grad_norm": 0.6235542870137168,
"learning_rate": 4.953527405032723e-05,
"loss": 0.1987,
"step": 680
},
{
"epoch": 4.0768094534711965,
"grad_norm": 0.8247620064480204,
"learning_rate": 4.8373722635717086e-05,
"loss": 0.1425,
"step": 690
},
{
"epoch": 4.13589364844904,
"grad_norm": 0.806070554149328,
"learning_rate": 4.721304927553658e-05,
"loss": 0.1313,
"step": 700
},
{
"epoch": 4.194977843426884,
"grad_norm": 1.1960115947293068,
"learning_rate": 4.60538806368031e-05,
"loss": 0.1397,
"step": 710
},
{
"epoch": 4.254062038404727,
"grad_norm": 0.7571134276917469,
"learning_rate": 4.489684257410958e-05,
"loss": 0.1421,
"step": 720
},
{
"epoch": 4.31314623338257,
"grad_norm": 0.8865025891479803,
"learning_rate": 4.374255979171538e-05,
"loss": 0.1386,
"step": 730
},
{
"epoch": 4.372230428360414,
"grad_norm": 0.9290220213968287,
"learning_rate": 4.2591655506257645e-05,
"loss": 0.1444,
"step": 740
},
{
"epoch": 4.431314623338257,
"grad_norm": 0.8896418597603776,
"learning_rate": 4.144475111026643e-05,
"loss": 0.1391,
"step": 750
},
{
"epoch": 4.4903988183161,
"grad_norm": 0.9053211649213782,
"learning_rate": 4.030246583666437e-05,
"loss": 0.1438,
"step": 760
},
{
"epoch": 4.549483013293944,
"grad_norm": 0.8453862385052026,
"learning_rate": 3.9165416424432414e-05,
"loss": 0.1415,
"step": 770
},
{
"epoch": 4.608567208271787,
"grad_norm": 0.8724405655899441,
"learning_rate": 3.803421678562213e-05,
"loss": 0.1492,
"step": 780
},
{
"epoch": 4.66765140324963,
"grad_norm": 0.98302153579186,
"learning_rate": 3.690947767389426e-05,
"loss": 0.1512,
"step": 790
},
{
"epoch": 4.726735598227474,
"grad_norm": 0.8137008256198869,
"learning_rate": 3.57918063547627e-05,
"loss": 0.1481,
"step": 800
},
{
"epoch": 4.7858197932053175,
"grad_norm": 0.8604546990181478,
"learning_rate": 3.468180627772144e-05,
"loss": 0.1418,
"step": 810
},
{
"epoch": 4.844903988183161,
"grad_norm": 0.8028241587093687,
"learning_rate": 3.358007675043224e-05,
"loss": 0.146,
"step": 820
},
{
"epoch": 4.903988183161005,
"grad_norm": 0.8209334713352179,
"learning_rate": 3.2487212615148316e-05,
"loss": 0.1407,
"step": 830
},
{
"epoch": 4.963072378138848,
"grad_norm": 0.9318035909918932,
"learning_rate": 3.1403803927549006e-05,
"loss": 0.1502,
"step": 840
},
{
"epoch": 5.022156573116692,
"grad_norm": 0.6982555180872486,
"learning_rate": 3.0330435638158806e-05,
"loss": 0.1322,
"step": 850
},
{
"epoch": 5.081240768094535,
"grad_norm": 0.9099529668785485,
"learning_rate": 2.9267687276522876e-05,
"loss": 0.0985,
"step": 860
},
{
"epoch": 5.140324963072378,
"grad_norm": 0.8093436219539469,
"learning_rate": 2.821613263830912e-05,
"loss": 0.0929,
"step": 870
},
{
"epoch": 5.199409158050222,
"grad_norm": 0.908675419764728,
"learning_rate": 2.717633947550651e-05,
"loss": 0.0941,
"step": 880
},
{
"epoch": 5.258493353028065,
"grad_norm": 0.9144782506889362,
"learning_rate": 2.614886918988604e-05,
"loss": 0.0951,
"step": 890
},
{
"epoch": 5.317577548005908,
"grad_norm": 0.8302439773379418,
"learning_rate": 2.5134276529890644e-05,
"loss": 0.0926,
"step": 900
},
{
"epoch": 5.376661742983752,
"grad_norm": 0.8145999759475107,
"learning_rate": 2.4133109291117156e-05,
"loss": 0.095,
"step": 910
},
{
"epoch": 5.435745937961595,
"grad_norm": 0.7459552675334057,
"learning_rate": 2.314590802055232e-05,
"loss": 0.0886,
"step": 920
},
{
"epoch": 5.4948301329394384,
"grad_norm": 0.9033419203896008,
"learning_rate": 2.2173205724722318e-05,
"loss": 0.096,
"step": 930
},
{
"epoch": 5.5539143279172825,
"grad_norm": 0.882377183289936,
"learning_rate": 2.121552758191366e-05,
"loss": 0.0962,
"step": 940
},
{
"epoch": 5.612998522895126,
"grad_norm": 0.8033415953079253,
"learning_rate": 2.027339065862064e-05,
"loss": 0.0985,
"step": 950
},
{
"epoch": 5.672082717872969,
"grad_norm": 0.9269892443045443,
"learning_rate": 1.934730363037237e-05,
"loss": 0.0939,
"step": 960
},
{
"epoch": 5.731166912850813,
"grad_norm": 0.8483635995759108,
"learning_rate": 1.843776650709046e-05,
"loss": 0.0975,
"step": 970
},
{
"epoch": 5.790251107828656,
"grad_norm": 0.7664349269568396,
"learning_rate": 1.7545270363125153e-05,
"loss": 0.093,
"step": 980
},
{
"epoch": 5.849335302806499,
"grad_norm": 0.7270945706246518,
"learning_rate": 1.6670297072116165e-05,
"loss": 0.0959,
"step": 990
},
{
"epoch": 5.908419497784343,
"grad_norm": 0.4763779090409354,
"learning_rate": 1.581331904682089e-05,
"loss": 0.0918,
"step": 1000
},
{
"epoch": 5.908419497784343,
"eval_loss": 0.7311862707138062,
"eval_runtime": 52.641,
"eval_samples_per_second": 2.109,
"eval_steps_per_second": 0.532,
"step": 1000
},
{
"epoch": 5.967503692762186,
"grad_norm": 0.7880059330281542,
"learning_rate": 1.4974798984050942e-05,
"loss": 0.0933,
"step": 1010
},
{
"epoch": 6.026587887740029,
"grad_norm": 0.5434228078562925,
"learning_rate": 1.4155189614854275e-05,
"loss": 0.0828,
"step": 1020
},
{
"epoch": 6.085672082717873,
"grad_norm": 0.6734629959345333,
"learning_rate": 1.3354933460078217e-05,
"loss": 0.0678,
"step": 1030
},
{
"epoch": 6.144756277695716,
"grad_norm": 0.6754051572707525,
"learning_rate": 1.257446259144494e-05,
"loss": 0.0629,
"step": 1040
},
{
"epoch": 6.203840472673559,
"grad_norm": 0.6799650760230072,
"learning_rate": 1.1814198398268794e-05,
"loss": 0.0697,
"step": 1050
},
{
"epoch": 6.262924667651403,
"grad_norm": 0.6540795313123376,
"learning_rate": 1.1074551359941021e-05,
"loss": 0.0644,
"step": 1060
},
{
"epoch": 6.3220088626292466,
"grad_norm": 0.814973115457644,
"learning_rate": 1.0355920824305127e-05,
"loss": 0.069,
"step": 1070
},
{
"epoch": 6.381093057607091,
"grad_norm": 0.8090154097885933,
"learning_rate": 9.658694792042284e-06,
"loss": 0.0666,
"step": 1080
},
{
"epoch": 6.440177252584934,
"grad_norm": 0.7016078216972328,
"learning_rate": 8.98324970718319e-06,
"loss": 0.0679,
"step": 1090
},
{
"epoch": 6.499261447562777,
"grad_norm": 0.7288879724763213,
"learning_rate": 8.329950253859703e-06,
"loss": 0.0656,
"step": 1100
},
{
"epoch": 6.558345642540621,
"grad_norm": 0.7247263147770627,
"learning_rate": 7.699149159405734e-06,
"loss": 0.0664,
"step": 1110
},
{
"epoch": 6.617429837518464,
"grad_norm": 0.6219352284185693,
"learning_rate": 7.0911870039138015e-06,
"loss": 0.0673,
"step": 1120
},
{
"epoch": 6.676514032496307,
"grad_norm": 0.7481718513639776,
"learning_rate": 6.506392036350167e-06,
"loss": 0.0697,
"step": 1130
},
{
"epoch": 6.735598227474151,
"grad_norm": 0.7642809598885449,
"learning_rate": 5.945079997327713e-06,
"loss": 0.0669,
"step": 1140
},
{
"epoch": 6.794682422451994,
"grad_norm": 0.7060296135322504,
"learning_rate": 5.407553948632277e-06,
"loss": 0.0683,
"step": 1150
},
{
"epoch": 6.853766617429837,
"grad_norm": 0.7218214547563072,
"learning_rate": 4.894104109594466e-06,
"loss": 0.0684,
"step": 1160
},
{
"epoch": 6.912850812407681,
"grad_norm": 0.6944520001481999,
"learning_rate": 4.405007700395497e-06,
"loss": 0.0687,
"step": 1170
},
{
"epoch": 6.971935007385524,
"grad_norm": 0.8260640922228497,
"learning_rate": 3.940528792391223e-06,
"loss": 0.0721,
"step": 1180
},
{
"epoch": 7.0310192023633675,
"grad_norm": 0.6707004455651014,
"learning_rate": 3.5009181655356826e-06,
"loss": 0.0653,
"step": 1190
},
{
"epoch": 7.0901033973412115,
"grad_norm": 0.6805257469848828,
"learning_rate": 3.0864131729807398e-06,
"loss": 0.0579,
"step": 1200
},
{
"epoch": 7.149187592319055,
"grad_norm": 0.5495171159123228,
"learning_rate": 2.6972376129251686e-06,
"loss": 0.0585,
"step": 1210
},
{
"epoch": 7.208271787296898,
"grad_norm": 0.6136512130241976,
"learning_rate": 2.3336016077822154e-06,
"loss": 0.0562,
"step": 1220
},
{
"epoch": 7.267355982274742,
"grad_norm": 0.7591755724568,
"learning_rate": 1.9957014907310224e-06,
"loss": 0.0572,
"step": 1230
},
{
"epoch": 7.326440177252585,
"grad_norm": 0.6022538817881757,
"learning_rate": 1.6837196997130434e-06,
"loss": 0.06,
"step": 1240
},
{
"epoch": 7.385524372230428,
"grad_norm": 0.774444698651241,
"learning_rate": 1.3978246789307149e-06,
"loss": 0.0565,
"step": 1250
},
{
"epoch": 7.444608567208272,
"grad_norm": 0.7442025153147653,
"learning_rate": 1.1381707879016157e-06,
"loss": 0.0562,
"step": 1260
},
{
"epoch": 7.503692762186115,
"grad_norm": 0.5700289390004409,
"learning_rate": 9.048982181171894e-07,
"loss": 0.0556,
"step": 1270
},
{
"epoch": 7.562776957163958,
"grad_norm": 0.695724523693832,
"learning_rate": 6.98132917350991e-07,
"loss": 0.0525,
"step": 1280
},
{
"epoch": 7.621861152141802,
"grad_norm": 0.6867233808424225,
"learning_rate": 5.179865216573654e-07,
"loss": 0.0549,
"step": 1290
},
{
"epoch": 7.680945347119645,
"grad_norm": 0.7051791342497011,
"learning_rate": 3.6455629509730136e-07,
"loss": 0.0583,
"step": 1300
},
{
"epoch": 7.7400295420974885,
"grad_norm": 0.6772707185024253,
"learning_rate": 2.3792507722388835e-07,
"loss": 0.0553,
"step": 1310
},
{
"epoch": 7.7991137370753325,
"grad_norm": 0.6815226755163512,
"learning_rate": 1.3816123835588834e-07,
"loss": 0.0556,
"step": 1320
},
{
"epoch": 7.858197932053176,
"grad_norm": 0.6475756485632287,
"learning_rate": 6.531864266343113e-08,
"loss": 0.0589,
"step": 1330
},
{
"epoch": 7.917282127031019,
"grad_norm": 0.7007100620520975,
"learning_rate": 1.943661908586636e-08,
"loss": 0.0603,
"step": 1340
},
{
"epoch": 7.976366322008863,
"grad_norm": 0.7030879428421536,
"learning_rate": 5.399400973882251e-10,
"loss": 0.0568,
"step": 1350
},
{
"epoch": 7.988183161004431,
"step": 1352,
"total_flos": 1435586865135616.0,
"train_loss": 0.22072081432606167,
"train_runtime": 22818.9262,
"train_samples_per_second": 1.896,
"train_steps_per_second": 0.059
}
],
"logging_steps": 10,
"max_steps": 1352,
"num_input_tokens_seen": 0,
"num_train_epochs": 8,
"save_steps": 1000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 1435586865135616.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}