sagarpatelcompstak's picture
Upload folder using huggingface_hub
16f5acc verified
{
"best_metric": 0.26649972796440125,
"best_model_checkpoint": "classify-google-basic-3/checkpoint-7857",
"epoch": 3.0,
"eval_steps": 500,
"global_step": 7857,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.009545628102329133,
"grad_norm": 10.965401649475098,
"learning_rate": 1.4631043256997457e-06,
"loss": 2.2469,
"step": 25
},
{
"epoch": 0.019091256204658267,
"grad_norm": 10.05473804473877,
"learning_rate": 3.053435114503817e-06,
"loss": 1.6956,
"step": 50
},
{
"epoch": 0.0286368843069874,
"grad_norm": 16.2789306640625,
"learning_rate": 4.643765903307888e-06,
"loss": 1.2291,
"step": 75
},
{
"epoch": 0.038182512409316534,
"grad_norm": 11.012917518615723,
"learning_rate": 6.2340966921119596e-06,
"loss": 1.1946,
"step": 100
},
{
"epoch": 0.047728140511645666,
"grad_norm": 5.637146949768066,
"learning_rate": 7.824427480916032e-06,
"loss": 1.1144,
"step": 125
},
{
"epoch": 0.0572737686139748,
"grad_norm": 13.986495971679688,
"learning_rate": 9.414758269720102e-06,
"loss": 0.9237,
"step": 150
},
{
"epoch": 0.06681939671630394,
"grad_norm": 10.580368995666504,
"learning_rate": 1.1005089058524173e-05,
"loss": 0.9343,
"step": 175
},
{
"epoch": 0.07636502481863307,
"grad_norm": 18.909542083740234,
"learning_rate": 1.2595419847328243e-05,
"loss": 0.7544,
"step": 200
},
{
"epoch": 0.0859106529209622,
"grad_norm": 9.856317520141602,
"learning_rate": 1.4185750636132317e-05,
"loss": 0.7901,
"step": 225
},
{
"epoch": 0.09545628102329133,
"grad_norm": 11.235512733459473,
"learning_rate": 1.5776081424936386e-05,
"loss": 0.7968,
"step": 250
},
{
"epoch": 0.10500190912562046,
"grad_norm": 11.741060256958008,
"learning_rate": 1.736641221374046e-05,
"loss": 0.8511,
"step": 275
},
{
"epoch": 0.1145475372279496,
"grad_norm": 10.903525352478027,
"learning_rate": 1.895674300254453e-05,
"loss": 0.5989,
"step": 300
},
{
"epoch": 0.12409316533027873,
"grad_norm": 18.30093002319336,
"learning_rate": 2.05470737913486e-05,
"loss": 0.862,
"step": 325
},
{
"epoch": 0.13363879343260787,
"grad_norm": 14.775749206542969,
"learning_rate": 2.2137404580152673e-05,
"loss": 0.8368,
"step": 350
},
{
"epoch": 0.143184421534937,
"grad_norm": 13.009576797485352,
"learning_rate": 2.3727735368956743e-05,
"loss": 0.7068,
"step": 375
},
{
"epoch": 0.15273004963726614,
"grad_norm": 10.315067291259766,
"learning_rate": 2.5318066157760816e-05,
"loss": 0.6387,
"step": 400
},
{
"epoch": 0.16227567773959525,
"grad_norm": 9.280678749084473,
"learning_rate": 2.6908396946564886e-05,
"loss": 0.6895,
"step": 425
},
{
"epoch": 0.1718213058419244,
"grad_norm": 11.16199016571045,
"learning_rate": 2.849872773536896e-05,
"loss": 0.7864,
"step": 450
},
{
"epoch": 0.18136693394425354,
"grad_norm": 13.201117515563965,
"learning_rate": 3.008905852417303e-05,
"loss": 0.7781,
"step": 475
},
{
"epoch": 0.19091256204658266,
"grad_norm": 0.5530382394790649,
"learning_rate": 3.16793893129771e-05,
"loss": 0.6258,
"step": 500
},
{
"epoch": 0.2004581901489118,
"grad_norm": 11.698955535888672,
"learning_rate": 3.326972010178117e-05,
"loss": 0.7465,
"step": 525
},
{
"epoch": 0.21000381825124093,
"grad_norm": 11.760673522949219,
"learning_rate": 3.4860050890585245e-05,
"loss": 0.96,
"step": 550
},
{
"epoch": 0.21954944635357007,
"grad_norm": 9.295748710632324,
"learning_rate": 3.645038167938932e-05,
"loss": 0.6984,
"step": 575
},
{
"epoch": 0.2290950744558992,
"grad_norm": 4.5281548500061035,
"learning_rate": 3.8040712468193385e-05,
"loss": 0.6741,
"step": 600
},
{
"epoch": 0.23864070255822833,
"grad_norm": 5.818964004516602,
"learning_rate": 3.963104325699746e-05,
"loss": 0.7867,
"step": 625
},
{
"epoch": 0.24818633066055745,
"grad_norm": 4.941270351409912,
"learning_rate": 4.122137404580153e-05,
"loss": 0.7967,
"step": 650
},
{
"epoch": 0.25773195876288657,
"grad_norm": 1.6748733520507812,
"learning_rate": 4.28117048346056e-05,
"loss": 0.7384,
"step": 675
},
{
"epoch": 0.26727758686521574,
"grad_norm": 4.042880535125732,
"learning_rate": 4.440203562340967e-05,
"loss": 0.8029,
"step": 700
},
{
"epoch": 0.27682321496754486,
"grad_norm": 8.729666709899902,
"learning_rate": 4.5992366412213745e-05,
"loss": 0.6758,
"step": 725
},
{
"epoch": 0.286368843069874,
"grad_norm": 5.898663520812988,
"learning_rate": 4.758269720101781e-05,
"loss": 0.8116,
"step": 750
},
{
"epoch": 0.29591447117220315,
"grad_norm": 14.431380271911621,
"learning_rate": 4.9173027989821884e-05,
"loss": 0.7054,
"step": 775
},
{
"epoch": 0.30546009927453227,
"grad_norm": 9.767568588256836,
"learning_rate": 4.991514637250743e-05,
"loss": 0.6299,
"step": 800
},
{
"epoch": 0.3150057273768614,
"grad_norm": 2.037874698638916,
"learning_rate": 4.973836798189789e-05,
"loss": 0.574,
"step": 825
},
{
"epoch": 0.3245513554791905,
"grad_norm": 1.7174073457717896,
"learning_rate": 4.9561589591288364e-05,
"loss": 0.8259,
"step": 850
},
{
"epoch": 0.3340969835815197,
"grad_norm": 6.078500270843506,
"learning_rate": 4.9384811200678834e-05,
"loss": 0.5543,
"step": 875
},
{
"epoch": 0.3436426116838488,
"grad_norm": 6.436212062835693,
"learning_rate": 4.92080328100693e-05,
"loss": 0.6127,
"step": 900
},
{
"epoch": 0.3531882397861779,
"grad_norm": 10.703376770019531,
"learning_rate": 4.903125441945977e-05,
"loss": 0.5309,
"step": 925
},
{
"epoch": 0.3627338678885071,
"grad_norm": 4.9388651847839355,
"learning_rate": 4.885447602885024e-05,
"loss": 0.8011,
"step": 950
},
{
"epoch": 0.3722794959908362,
"grad_norm": 7.828670978546143,
"learning_rate": 4.86776976382407e-05,
"loss": 1.0022,
"step": 975
},
{
"epoch": 0.3818251240931653,
"grad_norm": 9.149444580078125,
"learning_rate": 4.8500919247631174e-05,
"loss": 0.6282,
"step": 1000
},
{
"epoch": 0.39137075219549444,
"grad_norm": 10.579568862915039,
"learning_rate": 4.832414085702164e-05,
"loss": 0.7651,
"step": 1025
},
{
"epoch": 0.4009163802978236,
"grad_norm": 5.5516815185546875,
"learning_rate": 4.814736246641211e-05,
"loss": 0.7698,
"step": 1050
},
{
"epoch": 0.41046200840015273,
"grad_norm": 9.71567153930664,
"learning_rate": 4.797058407580258e-05,
"loss": 0.626,
"step": 1075
},
{
"epoch": 0.42000763650248185,
"grad_norm": 8.423727989196777,
"learning_rate": 4.779380568519304e-05,
"loss": 0.6038,
"step": 1100
},
{
"epoch": 0.42955326460481097,
"grad_norm": 17.86850357055664,
"learning_rate": 4.761702729458351e-05,
"loss": 0.8356,
"step": 1125
},
{
"epoch": 0.43909889270714014,
"grad_norm": 11.408333778381348,
"learning_rate": 4.744024890397398e-05,
"loss": 0.6385,
"step": 1150
},
{
"epoch": 0.44864452080946926,
"grad_norm": 9.420711517333984,
"learning_rate": 4.726347051336445e-05,
"loss": 0.5861,
"step": 1175
},
{
"epoch": 0.4581901489117984,
"grad_norm": 5.354550838470459,
"learning_rate": 4.708669212275492e-05,
"loss": 0.7066,
"step": 1200
},
{
"epoch": 0.46773577701412755,
"grad_norm": 8.641571998596191,
"learning_rate": 4.690991373214538e-05,
"loss": 0.5686,
"step": 1225
},
{
"epoch": 0.47728140511645667,
"grad_norm": 9.010872840881348,
"learning_rate": 4.673313534153586e-05,
"loss": 0.6195,
"step": 1250
},
{
"epoch": 0.4868270332187858,
"grad_norm": 2.913404941558838,
"learning_rate": 4.655635695092632e-05,
"loss": 0.7095,
"step": 1275
},
{
"epoch": 0.4963726613211149,
"grad_norm": 8.326951026916504,
"learning_rate": 4.6379578560316786e-05,
"loss": 0.6289,
"step": 1300
},
{
"epoch": 0.5059182894234441,
"grad_norm": 5.571317672729492,
"learning_rate": 4.6202800169707257e-05,
"loss": 0.6153,
"step": 1325
},
{
"epoch": 0.5154639175257731,
"grad_norm": 12.953869819641113,
"learning_rate": 4.602602177909773e-05,
"loss": 0.538,
"step": 1350
},
{
"epoch": 0.5250095456281023,
"grad_norm": 11.328479766845703,
"learning_rate": 4.58492433884882e-05,
"loss": 0.6928,
"step": 1375
},
{
"epoch": 0.5345551737304315,
"grad_norm": 7.584653854370117,
"learning_rate": 4.567246499787866e-05,
"loss": 0.6932,
"step": 1400
},
{
"epoch": 0.5441008018327605,
"grad_norm": 5.772885799407959,
"learning_rate": 4.5495686607269125e-05,
"loss": 0.7512,
"step": 1425
},
{
"epoch": 0.5536464299350897,
"grad_norm": 1.1656907796859741,
"learning_rate": 4.53189082166596e-05,
"loss": 0.5952,
"step": 1450
},
{
"epoch": 0.5631920580374189,
"grad_norm": 6.189632892608643,
"learning_rate": 4.5142129826050066e-05,
"loss": 0.5763,
"step": 1475
},
{
"epoch": 0.572737686139748,
"grad_norm": 5.039992332458496,
"learning_rate": 4.496535143544054e-05,
"loss": 0.6456,
"step": 1500
},
{
"epoch": 0.5822833142420771,
"grad_norm": 4.58499813079834,
"learning_rate": 4.4788573044831e-05,
"loss": 0.662,
"step": 1525
},
{
"epoch": 0.5918289423444063,
"grad_norm": 9.357542991638184,
"learning_rate": 4.461179465422147e-05,
"loss": 0.6355,
"step": 1550
},
{
"epoch": 0.6013745704467354,
"grad_norm": 4.846512794494629,
"learning_rate": 4.443501626361194e-05,
"loss": 0.5721,
"step": 1575
},
{
"epoch": 0.6109201985490645,
"grad_norm": 3.824963331222534,
"learning_rate": 4.4258237873002405e-05,
"loss": 0.611,
"step": 1600
},
{
"epoch": 0.6204658266513937,
"grad_norm": 7.103879928588867,
"learning_rate": 4.4081459482392876e-05,
"loss": 0.3302,
"step": 1625
},
{
"epoch": 0.6300114547537228,
"grad_norm": 4.977944374084473,
"learning_rate": 4.3904681091783346e-05,
"loss": 0.7139,
"step": 1650
},
{
"epoch": 0.639557082856052,
"grad_norm": 0.8024571537971497,
"learning_rate": 4.372790270117381e-05,
"loss": 0.6078,
"step": 1675
},
{
"epoch": 0.649102710958381,
"grad_norm": 3.4169600009918213,
"learning_rate": 4.355112431056428e-05,
"loss": 0.5165,
"step": 1700
},
{
"epoch": 0.6586483390607102,
"grad_norm": 7.830475330352783,
"learning_rate": 4.3374345919954744e-05,
"loss": 0.7304,
"step": 1725
},
{
"epoch": 0.6681939671630394,
"grad_norm": 6.655337810516357,
"learning_rate": 4.3197567529345215e-05,
"loss": 0.5174,
"step": 1750
},
{
"epoch": 0.6777395952653684,
"grad_norm": 3.1303579807281494,
"learning_rate": 4.3020789138735685e-05,
"loss": 0.6751,
"step": 1775
},
{
"epoch": 0.6872852233676976,
"grad_norm": 4.309839248657227,
"learning_rate": 4.284401074812615e-05,
"loss": 0.5188,
"step": 1800
},
{
"epoch": 0.6968308514700268,
"grad_norm": 4.880556583404541,
"learning_rate": 4.266723235751662e-05,
"loss": 0.5401,
"step": 1825
},
{
"epoch": 0.7063764795723558,
"grad_norm": 7.556941509246826,
"learning_rate": 4.249045396690709e-05,
"loss": 0.4667,
"step": 1850
},
{
"epoch": 0.715922107674685,
"grad_norm": 6.895393371582031,
"learning_rate": 4.2313675576297554e-05,
"loss": 0.7513,
"step": 1875
},
{
"epoch": 0.7254677357770142,
"grad_norm": 2.2813351154327393,
"learning_rate": 4.2136897185688025e-05,
"loss": 0.6731,
"step": 1900
},
{
"epoch": 0.7350133638793432,
"grad_norm": 8.803994178771973,
"learning_rate": 4.196011879507849e-05,
"loss": 0.5013,
"step": 1925
},
{
"epoch": 0.7445589919816724,
"grad_norm": 1.5111371278762817,
"learning_rate": 4.1783340404468966e-05,
"loss": 0.6204,
"step": 1950
},
{
"epoch": 0.7541046200840015,
"grad_norm": 5.194613933563232,
"learning_rate": 4.160656201385943e-05,
"loss": 0.5988,
"step": 1975
},
{
"epoch": 0.7636502481863306,
"grad_norm": 9.08621597290039,
"learning_rate": 4.142978362324989e-05,
"loss": 0.5617,
"step": 2000
},
{
"epoch": 0.7731958762886598,
"grad_norm": 9.078089714050293,
"learning_rate": 4.1253005232640364e-05,
"loss": 0.567,
"step": 2025
},
{
"epoch": 0.7827415043909889,
"grad_norm": 9.430150985717773,
"learning_rate": 4.107622684203083e-05,
"loss": 0.5309,
"step": 2050
},
{
"epoch": 0.7922871324933181,
"grad_norm": 4.572437286376953,
"learning_rate": 4.0899448451421305e-05,
"loss": 0.5597,
"step": 2075
},
{
"epoch": 0.8018327605956472,
"grad_norm": 5.556495189666748,
"learning_rate": 4.072267006081177e-05,
"loss": 0.5909,
"step": 2100
},
{
"epoch": 0.8113783886979763,
"grad_norm": 6.426826477050781,
"learning_rate": 4.054589167020223e-05,
"loss": 0.5907,
"step": 2125
},
{
"epoch": 0.8209240168003055,
"grad_norm": 5.4699530601501465,
"learning_rate": 4.03691132795927e-05,
"loss": 0.7061,
"step": 2150
},
{
"epoch": 0.8304696449026346,
"grad_norm": 3.550435781478882,
"learning_rate": 4.019233488898317e-05,
"loss": 0.6254,
"step": 2175
},
{
"epoch": 0.8400152730049637,
"grad_norm": 3.2830183506011963,
"learning_rate": 4.0015556498373644e-05,
"loss": 0.551,
"step": 2200
},
{
"epoch": 0.8495609011072929,
"grad_norm": 16.018274307250977,
"learning_rate": 3.983877810776411e-05,
"loss": 0.4898,
"step": 2225
},
{
"epoch": 0.8591065292096219,
"grad_norm": 1.2912545204162598,
"learning_rate": 3.966199971715457e-05,
"loss": 0.4058,
"step": 2250
},
{
"epoch": 0.8686521573119511,
"grad_norm": 6.0288896560668945,
"learning_rate": 3.948522132654505e-05,
"loss": 0.6408,
"step": 2275
},
{
"epoch": 0.8781977854142803,
"grad_norm": 3.8952457904815674,
"learning_rate": 3.930844293593551e-05,
"loss": 0.7051,
"step": 2300
},
{
"epoch": 0.8877434135166093,
"grad_norm": 0.5457330346107483,
"learning_rate": 3.913166454532598e-05,
"loss": 0.6625,
"step": 2325
},
{
"epoch": 0.8972890416189385,
"grad_norm": 9.08011245727539,
"learning_rate": 3.895488615471645e-05,
"loss": 0.4587,
"step": 2350
},
{
"epoch": 0.9068346697212677,
"grad_norm": 27.94516372680664,
"learning_rate": 3.877810776410692e-05,
"loss": 0.4202,
"step": 2375
},
{
"epoch": 0.9163802978235968,
"grad_norm": 4.565572261810303,
"learning_rate": 3.860132937349739e-05,
"loss": 0.5311,
"step": 2400
},
{
"epoch": 0.9259259259259259,
"grad_norm": 5.967700004577637,
"learning_rate": 3.842455098288785e-05,
"loss": 0.5012,
"step": 2425
},
{
"epoch": 0.9354715540282551,
"grad_norm": 7.316773414611816,
"learning_rate": 3.824777259227832e-05,
"loss": 0.4585,
"step": 2450
},
{
"epoch": 0.9450171821305842,
"grad_norm": 0.7666218280792236,
"learning_rate": 3.807099420166879e-05,
"loss": 0.5148,
"step": 2475
},
{
"epoch": 0.9545628102329133,
"grad_norm": 8.033583641052246,
"learning_rate": 3.7894215811059256e-05,
"loss": 0.5568,
"step": 2500
},
{
"epoch": 0.9641084383352425,
"grad_norm": 3.06626558303833,
"learning_rate": 3.771743742044973e-05,
"loss": 0.5371,
"step": 2525
},
{
"epoch": 0.9736540664375716,
"grad_norm": 4.116180896759033,
"learning_rate": 3.754065902984019e-05,
"loss": 0.5567,
"step": 2550
},
{
"epoch": 0.9831996945399007,
"grad_norm": 5.341053485870361,
"learning_rate": 3.736388063923066e-05,
"loss": 0.5501,
"step": 2575
},
{
"epoch": 0.9927453226422298,
"grad_norm": 0.6019271016120911,
"learning_rate": 3.718710224862113e-05,
"loss": 0.5312,
"step": 2600
},
{
"epoch": 1.0,
"eval_accuracy": 0.8806796487208859,
"eval_f1_macro": 0.5481103080327878,
"eval_f1_micro": 0.8806796487208859,
"eval_f1_weighted": 0.8693359028593166,
"eval_loss": 0.45116129517555237,
"eval_precision_macro": 0.676151706940216,
"eval_precision_micro": 0.8806796487208859,
"eval_precision_weighted": 0.8732388021312899,
"eval_recall_macro": 0.5439759320123948,
"eval_recall_micro": 0.8806796487208859,
"eval_recall_weighted": 0.8806796487208859,
"eval_runtime": 30.8839,
"eval_samples_per_second": 169.603,
"eval_steps_per_second": 10.62,
"step": 2619
},
{
"epoch": 1.002290950744559,
"grad_norm": 6.413674354553223,
"learning_rate": 3.7010323858011595e-05,
"loss": 0.5644,
"step": 2625
},
{
"epoch": 1.0118365788468882,
"grad_norm": 3.4250919818878174,
"learning_rate": 3.6833545467402066e-05,
"loss": 0.4757,
"step": 2650
},
{
"epoch": 1.0213822069492173,
"grad_norm": 10.983148574829102,
"learning_rate": 3.6656767076792536e-05,
"loss": 0.4419,
"step": 2675
},
{
"epoch": 1.0309278350515463,
"grad_norm": 7.499892234802246,
"learning_rate": 3.6479988686183e-05,
"loss": 0.6204,
"step": 2700
},
{
"epoch": 1.0404734631538755,
"grad_norm": 3.595353603363037,
"learning_rate": 3.630321029557347e-05,
"loss": 0.5175,
"step": 2725
},
{
"epoch": 1.0500190912562046,
"grad_norm": 7.1321539878845215,
"learning_rate": 3.6126431904963934e-05,
"loss": 0.5092,
"step": 2750
},
{
"epoch": 1.0595647193585338,
"grad_norm": 6.408048152923584,
"learning_rate": 3.594965351435441e-05,
"loss": 0.6063,
"step": 2775
},
{
"epoch": 1.069110347460863,
"grad_norm": 6.020789623260498,
"learning_rate": 3.5772875123744875e-05,
"loss": 0.6509,
"step": 2800
},
{
"epoch": 1.0786559755631921,
"grad_norm": 5.091155529022217,
"learning_rate": 3.559609673313534e-05,
"loss": 0.5431,
"step": 2825
},
{
"epoch": 1.088201603665521,
"grad_norm": 6.301450729370117,
"learning_rate": 3.541931834252581e-05,
"loss": 0.5184,
"step": 2850
},
{
"epoch": 1.0977472317678503,
"grad_norm": 3.534942626953125,
"learning_rate": 3.524253995191628e-05,
"loss": 0.3056,
"step": 2875
},
{
"epoch": 1.1072928598701794,
"grad_norm": 3.7696588039398193,
"learning_rate": 3.506576156130675e-05,
"loss": 0.5706,
"step": 2900
},
{
"epoch": 1.1168384879725086,
"grad_norm": 7.325591564178467,
"learning_rate": 3.4888983170697215e-05,
"loss": 0.5182,
"step": 2925
},
{
"epoch": 1.1263841160748378,
"grad_norm": 1.0723004341125488,
"learning_rate": 3.471220478008768e-05,
"loss": 0.4222,
"step": 2950
},
{
"epoch": 1.135929744177167,
"grad_norm": 9.240309715270996,
"learning_rate": 3.4535426389478156e-05,
"loss": 0.4109,
"step": 2975
},
{
"epoch": 1.145475372279496,
"grad_norm": 4.310164451599121,
"learning_rate": 3.435864799886862e-05,
"loss": 0.6056,
"step": 3000
},
{
"epoch": 1.155021000381825,
"grad_norm": 0.06030479073524475,
"learning_rate": 3.418186960825909e-05,
"loss": 0.3311,
"step": 3025
},
{
"epoch": 1.1645666284841543,
"grad_norm": 5.938070297241211,
"learning_rate": 3.4005091217649554e-05,
"loss": 0.5636,
"step": 3050
},
{
"epoch": 1.1741122565864834,
"grad_norm": 3.0062716007232666,
"learning_rate": 3.3828312827040024e-05,
"loss": 0.3369,
"step": 3075
},
{
"epoch": 1.1836578846888126,
"grad_norm": 9.84875202178955,
"learning_rate": 3.3651534436430495e-05,
"loss": 0.3655,
"step": 3100
},
{
"epoch": 1.1932035127911416,
"grad_norm": 0.40218663215637207,
"learning_rate": 3.347475604582096e-05,
"loss": 0.3761,
"step": 3125
},
{
"epoch": 1.2027491408934707,
"grad_norm": 6.9253973960876465,
"learning_rate": 3.329797765521143e-05,
"loss": 0.4421,
"step": 3150
},
{
"epoch": 1.2122947689958,
"grad_norm": 12.239662170410156,
"learning_rate": 3.31211992646019e-05,
"loss": 0.63,
"step": 3175
},
{
"epoch": 1.221840397098129,
"grad_norm": 6.089052200317383,
"learning_rate": 3.294442087399236e-05,
"loss": 0.6012,
"step": 3200
},
{
"epoch": 1.2313860252004583,
"grad_norm": 3.271571159362793,
"learning_rate": 3.2767642483382834e-05,
"loss": 0.4335,
"step": 3225
},
{
"epoch": 1.2409316533027872,
"grad_norm": 10.282990455627441,
"learning_rate": 3.25908640927733e-05,
"loss": 0.573,
"step": 3250
},
{
"epoch": 1.2504772814051164,
"grad_norm": 4.201958179473877,
"learning_rate": 3.241408570216377e-05,
"loss": 0.4124,
"step": 3275
},
{
"epoch": 1.2600229095074456,
"grad_norm": 14.91673469543457,
"learning_rate": 3.223730731155424e-05,
"loss": 0.363,
"step": 3300
},
{
"epoch": 1.2695685376097747,
"grad_norm": 3.084709644317627,
"learning_rate": 3.20605289209447e-05,
"loss": 0.5874,
"step": 3325
},
{
"epoch": 1.279114165712104,
"grad_norm": 9.723969459533691,
"learning_rate": 3.188375053033517e-05,
"loss": 0.4661,
"step": 3350
},
{
"epoch": 1.2886597938144329,
"grad_norm": 0.48647192120552063,
"learning_rate": 3.1706972139725643e-05,
"loss": 0.5431,
"step": 3375
},
{
"epoch": 1.2982054219167622,
"grad_norm": 1.6292872428894043,
"learning_rate": 3.153019374911611e-05,
"loss": 0.535,
"step": 3400
},
{
"epoch": 1.3077510500190912,
"grad_norm": 7.161569595336914,
"learning_rate": 3.135341535850658e-05,
"loss": 0.4232,
"step": 3425
},
{
"epoch": 1.3172966781214204,
"grad_norm": 7.734975337982178,
"learning_rate": 3.117663696789704e-05,
"loss": 0.5533,
"step": 3450
},
{
"epoch": 1.3268423062237495,
"grad_norm": 8.083685874938965,
"learning_rate": 3.099985857728752e-05,
"loss": 0.5145,
"step": 3475
},
{
"epoch": 1.3363879343260787,
"grad_norm": 0.11425119638442993,
"learning_rate": 3.082308018667798e-05,
"loss": 0.3885,
"step": 3500
},
{
"epoch": 1.345933562428408,
"grad_norm": 10.0468168258667,
"learning_rate": 3.0646301796068446e-05,
"loss": 0.5237,
"step": 3525
},
{
"epoch": 1.3554791905307368,
"grad_norm": 5.5162763595581055,
"learning_rate": 3.0469523405458917e-05,
"loss": 0.5081,
"step": 3550
},
{
"epoch": 1.365024818633066,
"grad_norm": 0.6519923210144043,
"learning_rate": 3.0292745014849387e-05,
"loss": 0.6263,
"step": 3575
},
{
"epoch": 1.3745704467353952,
"grad_norm": 5.725049018859863,
"learning_rate": 3.0115966624239855e-05,
"loss": 0.4986,
"step": 3600
},
{
"epoch": 1.3841160748377244,
"grad_norm": 5.0273661613464355,
"learning_rate": 2.993918823363032e-05,
"loss": 0.3676,
"step": 3625
},
{
"epoch": 1.3936617029400535,
"grad_norm": 5.7512288093566895,
"learning_rate": 2.976240984302079e-05,
"loss": 0.5586,
"step": 3650
},
{
"epoch": 1.4032073310423825,
"grad_norm": 6.943084716796875,
"learning_rate": 2.958563145241126e-05,
"loss": 0.5734,
"step": 3675
},
{
"epoch": 1.4127529591447117,
"grad_norm": 6.364502906799316,
"learning_rate": 2.9408853061801726e-05,
"loss": 0.6174,
"step": 3700
},
{
"epoch": 1.4222985872470408,
"grad_norm": 9.310466766357422,
"learning_rate": 2.9232074671192194e-05,
"loss": 0.4916,
"step": 3725
},
{
"epoch": 1.43184421534937,
"grad_norm": 4.276609897613525,
"learning_rate": 2.905529628058266e-05,
"loss": 0.4982,
"step": 3750
},
{
"epoch": 1.4413898434516992,
"grad_norm": 8.0429105758667,
"learning_rate": 2.8878517889973135e-05,
"loss": 0.4593,
"step": 3775
},
{
"epoch": 1.4509354715540281,
"grad_norm": 2.093593120574951,
"learning_rate": 2.8701739499363602e-05,
"loss": 0.5528,
"step": 3800
},
{
"epoch": 1.4604810996563573,
"grad_norm": 0.39132124185562134,
"learning_rate": 2.8524961108754066e-05,
"loss": 0.4153,
"step": 3825
},
{
"epoch": 1.4700267277586865,
"grad_norm": 4.517558574676514,
"learning_rate": 2.8348182718144533e-05,
"loss": 0.4191,
"step": 3850
},
{
"epoch": 1.4795723558610157,
"grad_norm": 3.886760950088501,
"learning_rate": 2.8171404327535007e-05,
"loss": 0.3378,
"step": 3875
},
{
"epoch": 1.4891179839633448,
"grad_norm": 10.147708892822266,
"learning_rate": 2.7994625936925474e-05,
"loss": 0.4821,
"step": 3900
},
{
"epoch": 1.498663612065674,
"grad_norm": 9.77696704864502,
"learning_rate": 2.781784754631594e-05,
"loss": 0.5534,
"step": 3925
},
{
"epoch": 1.5082092401680032,
"grad_norm": 8.54476547241211,
"learning_rate": 2.7641069155706405e-05,
"loss": 0.5286,
"step": 3950
},
{
"epoch": 1.5177548682703321,
"grad_norm": 4.897209644317627,
"learning_rate": 2.746429076509688e-05,
"loss": 0.5163,
"step": 3975
},
{
"epoch": 1.5273004963726613,
"grad_norm": 6.026910781860352,
"learning_rate": 2.7294583510111726e-05,
"loss": 0.5637,
"step": 4000
},
{
"epoch": 1.5368461244749905,
"grad_norm": 1.9394190311431885,
"learning_rate": 2.7117805119502193e-05,
"loss": 0.4337,
"step": 4025
},
{
"epoch": 1.5463917525773194,
"grad_norm": 10.731663703918457,
"learning_rate": 2.694102672889266e-05,
"loss": 0.365,
"step": 4050
},
{
"epoch": 1.5559373806796488,
"grad_norm": 2.1111578941345215,
"learning_rate": 2.676424833828313e-05,
"loss": 0.4874,
"step": 4075
},
{
"epoch": 1.5654830087819778,
"grad_norm": 8.03298282623291,
"learning_rate": 2.6587469947673598e-05,
"loss": 0.5184,
"step": 4100
},
{
"epoch": 1.575028636884307,
"grad_norm": 4.874744415283203,
"learning_rate": 2.6410691557064065e-05,
"loss": 0.6084,
"step": 4125
},
{
"epoch": 1.5845742649866361,
"grad_norm": 1.2749981880187988,
"learning_rate": 2.6233913166454532e-05,
"loss": 0.5066,
"step": 4150
},
{
"epoch": 1.5941198930889653,
"grad_norm": 6.089448928833008,
"learning_rate": 2.6057134775845003e-05,
"loss": 0.4774,
"step": 4175
},
{
"epoch": 1.6036655211912945,
"grad_norm": 6.618624687194824,
"learning_rate": 2.588035638523547e-05,
"loss": 0.5735,
"step": 4200
},
{
"epoch": 1.6132111492936234,
"grad_norm": 3.0154378414154053,
"learning_rate": 2.5703577994625937e-05,
"loss": 0.4776,
"step": 4225
},
{
"epoch": 1.6227567773959528,
"grad_norm": 8.030562400817871,
"learning_rate": 2.5526799604016404e-05,
"loss": 0.4864,
"step": 4250
},
{
"epoch": 1.6323024054982818,
"grad_norm": 8.929983139038086,
"learning_rate": 2.535002121340688e-05,
"loss": 0.5598,
"step": 4275
},
{
"epoch": 1.641848033600611,
"grad_norm": 2.6678271293640137,
"learning_rate": 2.5173242822797345e-05,
"loss": 0.5868,
"step": 4300
},
{
"epoch": 1.65139366170294,
"grad_norm": 6.8634819984436035,
"learning_rate": 2.499646443218781e-05,
"loss": 0.4856,
"step": 4325
},
{
"epoch": 1.660939289805269,
"grad_norm": 1.4916170835494995,
"learning_rate": 2.481968604157828e-05,
"loss": 0.3447,
"step": 4350
},
{
"epoch": 1.6704849179075985,
"grad_norm": 5.674633979797363,
"learning_rate": 2.4642907650968747e-05,
"loss": 0.3216,
"step": 4375
},
{
"epoch": 1.6800305460099274,
"grad_norm": 9.564282417297363,
"learning_rate": 2.4466129260359214e-05,
"loss": 0.4884,
"step": 4400
},
{
"epoch": 1.6895761741122566,
"grad_norm": 6.336232662200928,
"learning_rate": 2.4289350869749685e-05,
"loss": 0.4915,
"step": 4425
},
{
"epoch": 1.6991218022145858,
"grad_norm": 4.912391185760498,
"learning_rate": 2.411257247914015e-05,
"loss": 0.3279,
"step": 4450
},
{
"epoch": 1.7086674303169147,
"grad_norm": 2.542475938796997,
"learning_rate": 2.393579408853062e-05,
"loss": 0.5385,
"step": 4475
},
{
"epoch": 1.718213058419244,
"grad_norm": 6.017487049102783,
"learning_rate": 2.3759015697921086e-05,
"loss": 0.5516,
"step": 4500
},
{
"epoch": 1.727758686521573,
"grad_norm": 7.965273380279541,
"learning_rate": 2.3582237307311557e-05,
"loss": 0.3421,
"step": 4525
},
{
"epoch": 1.7373043146239022,
"grad_norm": 0.6928049325942993,
"learning_rate": 2.3405458916702024e-05,
"loss": 0.5857,
"step": 4550
},
{
"epoch": 1.7468499427262314,
"grad_norm": 2.849928379058838,
"learning_rate": 2.322868052609249e-05,
"loss": 0.3379,
"step": 4575
},
{
"epoch": 1.7563955708285606,
"grad_norm": 17.360977172851562,
"learning_rate": 2.3051902135482958e-05,
"loss": 0.5657,
"step": 4600
},
{
"epoch": 1.7659411989308897,
"grad_norm": 4.397252559661865,
"learning_rate": 2.287512374487343e-05,
"loss": 0.4145,
"step": 4625
},
{
"epoch": 1.7754868270332187,
"grad_norm": 1.0898733139038086,
"learning_rate": 2.2698345354263896e-05,
"loss": 0.3977,
"step": 4650
},
{
"epoch": 1.7850324551355479,
"grad_norm": 5.723209381103516,
"learning_rate": 2.2521566963654363e-05,
"loss": 0.3961,
"step": 4675
},
{
"epoch": 1.794578083237877,
"grad_norm": 6.174077033996582,
"learning_rate": 2.234478857304483e-05,
"loss": 0.3459,
"step": 4700
},
{
"epoch": 1.8041237113402062,
"grad_norm": 4.179036617279053,
"learning_rate": 2.21680101824353e-05,
"loss": 0.3503,
"step": 4725
},
{
"epoch": 1.8136693394425354,
"grad_norm": 13.33944320678711,
"learning_rate": 2.1991231791825768e-05,
"loss": 0.5015,
"step": 4750
},
{
"epoch": 1.8232149675448643,
"grad_norm": 3.4436299800872803,
"learning_rate": 2.1814453401216238e-05,
"loss": 0.5311,
"step": 4775
},
{
"epoch": 1.8327605956471937,
"grad_norm": 4.949787616729736,
"learning_rate": 2.1637675010606702e-05,
"loss": 0.3274,
"step": 4800
},
{
"epoch": 1.8423062237495227,
"grad_norm": 3.8653883934020996,
"learning_rate": 2.1460896619997172e-05,
"loss": 0.4668,
"step": 4825
},
{
"epoch": 1.8518518518518519,
"grad_norm": 3.763760566711426,
"learning_rate": 2.128411822938764e-05,
"loss": 0.3929,
"step": 4850
},
{
"epoch": 1.861397479954181,
"grad_norm": 0.6590569019317627,
"learning_rate": 2.110733983877811e-05,
"loss": 0.3992,
"step": 4875
},
{
"epoch": 1.87094310805651,
"grad_norm": 4.360814571380615,
"learning_rate": 2.0930561448168577e-05,
"loss": 0.4132,
"step": 4900
},
{
"epoch": 1.8804887361588394,
"grad_norm": 0.8837212324142456,
"learning_rate": 2.0753783057559044e-05,
"loss": 0.559,
"step": 4925
},
{
"epoch": 1.8900343642611683,
"grad_norm": 4.778452396392822,
"learning_rate": 2.057700466694951e-05,
"loss": 0.5726,
"step": 4950
},
{
"epoch": 1.8995799923634975,
"grad_norm": 4.478198528289795,
"learning_rate": 2.0400226276339982e-05,
"loss": 0.4385,
"step": 4975
},
{
"epoch": 1.9091256204658267,
"grad_norm": 1.186051607131958,
"learning_rate": 2.022344788573045e-05,
"loss": 0.4041,
"step": 5000
},
{
"epoch": 1.9186712485681556,
"grad_norm": 10.195874214172363,
"learning_rate": 2.0046669495120916e-05,
"loss": 0.3768,
"step": 5025
},
{
"epoch": 1.928216876670485,
"grad_norm": 4.346467018127441,
"learning_rate": 1.9869891104511383e-05,
"loss": 0.601,
"step": 5050
},
{
"epoch": 1.937762504772814,
"grad_norm": 3.2331788539886475,
"learning_rate": 1.9693112713901854e-05,
"loss": 0.4731,
"step": 5075
},
{
"epoch": 1.9473081328751431,
"grad_norm": 4.1959381103515625,
"learning_rate": 1.951633432329232e-05,
"loss": 0.3686,
"step": 5100
},
{
"epoch": 1.9568537609774723,
"grad_norm": 0.6196494698524475,
"learning_rate": 1.933955593268279e-05,
"loss": 0.3442,
"step": 5125
},
{
"epoch": 1.9663993890798015,
"grad_norm": 0.898938000202179,
"learning_rate": 1.9162777542073255e-05,
"loss": 0.2922,
"step": 5150
},
{
"epoch": 1.9759450171821307,
"grad_norm": 6.5429768562316895,
"learning_rate": 1.8985999151463726e-05,
"loss": 0.4482,
"step": 5175
},
{
"epoch": 1.9854906452844596,
"grad_norm": 6.415786266326904,
"learning_rate": 1.8809220760854193e-05,
"loss": 0.5297,
"step": 5200
},
{
"epoch": 1.995036273386789,
"grad_norm": 5.376886367797852,
"learning_rate": 1.8632442370244664e-05,
"loss": 0.4509,
"step": 5225
},
{
"epoch": 2.0,
"eval_accuracy": 0.9177166857579229,
"eval_f1_macro": 0.6752206415980724,
"eval_f1_micro": 0.9177166857579229,
"eval_f1_weighted": 0.9116556748785886,
"eval_loss": 0.302744597196579,
"eval_precision_macro": 0.7389088670621011,
"eval_precision_micro": 0.9177166857579229,
"eval_precision_weighted": 0.9087828192763279,
"eval_recall_macro": 0.6430310667333932,
"eval_recall_micro": 0.9177166857579229,
"eval_recall_weighted": 0.9177166857579229,
"eval_runtime": 30.839,
"eval_samples_per_second": 169.85,
"eval_steps_per_second": 10.636,
"step": 5238
},
{
"epoch": 2.004581901489118,
"grad_norm": 4.516357898712158,
"learning_rate": 1.845566397963513e-05,
"loss": 0.5612,
"step": 5250
},
{
"epoch": 2.014127529591447,
"grad_norm": 0.7673831582069397,
"learning_rate": 1.8278885589025598e-05,
"loss": 0.2938,
"step": 5275
},
{
"epoch": 2.0236731576937763,
"grad_norm": 5.432100296020508,
"learning_rate": 1.8102107198416065e-05,
"loss": 0.4495,
"step": 5300
},
{
"epoch": 2.0332187857961053,
"grad_norm": 0.7653512954711914,
"learning_rate": 1.7925328807806536e-05,
"loss": 0.2454,
"step": 5325
},
{
"epoch": 2.0427644138984347,
"grad_norm": 0.0857914537191391,
"learning_rate": 1.7748550417197003e-05,
"loss": 0.3485,
"step": 5350
},
{
"epoch": 2.0523100420007636,
"grad_norm": 7.614650249481201,
"learning_rate": 1.757177202658747e-05,
"loss": 0.44,
"step": 5375
},
{
"epoch": 2.0618556701030926,
"grad_norm": 7.014926433563232,
"learning_rate": 1.7394993635977937e-05,
"loss": 0.462,
"step": 5400
},
{
"epoch": 2.071401298205422,
"grad_norm": 3.3274178504943848,
"learning_rate": 1.7218215245368408e-05,
"loss": 0.3796,
"step": 5425
},
{
"epoch": 2.080946926307751,
"grad_norm": 1.7487058639526367,
"learning_rate": 1.7041436854758875e-05,
"loss": 0.4075,
"step": 5450
},
{
"epoch": 2.0904925544100803,
"grad_norm": 1.2080419063568115,
"learning_rate": 1.6864658464149345e-05,
"loss": 0.3875,
"step": 5475
},
{
"epoch": 2.1000381825124093,
"grad_norm": 10.531205177307129,
"learning_rate": 1.668788007353981e-05,
"loss": 0.4767,
"step": 5500
},
{
"epoch": 2.1095838106147387,
"grad_norm": 6.064085960388184,
"learning_rate": 1.651110168293028e-05,
"loss": 0.5239,
"step": 5525
},
{
"epoch": 2.1191294387170676,
"grad_norm": 0.7669031620025635,
"learning_rate": 1.6334323292320747e-05,
"loss": 0.4222,
"step": 5550
},
{
"epoch": 2.1286750668193966,
"grad_norm": 1.6597367525100708,
"learning_rate": 1.6157544901711217e-05,
"loss": 0.378,
"step": 5575
},
{
"epoch": 2.138220694921726,
"grad_norm": 5.749689102172852,
"learning_rate": 1.5980766511101684e-05,
"loss": 0.409,
"step": 5600
},
{
"epoch": 2.147766323024055,
"grad_norm": 3.9973812103271484,
"learning_rate": 1.580398812049215e-05,
"loss": 0.387,
"step": 5625
},
{
"epoch": 2.1573119511263843,
"grad_norm": 5.586762428283691,
"learning_rate": 1.562720972988262e-05,
"loss": 0.476,
"step": 5650
},
{
"epoch": 2.1668575792287132,
"grad_norm": 4.630229949951172,
"learning_rate": 1.545043133927309e-05,
"loss": 0.4483,
"step": 5675
},
{
"epoch": 2.176403207331042,
"grad_norm": 10.38274097442627,
"learning_rate": 1.5273652948663556e-05,
"loss": 0.3905,
"step": 5700
},
{
"epoch": 2.1859488354333716,
"grad_norm": 6.280734539031982,
"learning_rate": 1.5096874558054025e-05,
"loss": 0.4345,
"step": 5725
},
{
"epoch": 2.1954944635357005,
"grad_norm": 0.5489762425422668,
"learning_rate": 1.4920096167444492e-05,
"loss": 0.3986,
"step": 5750
},
{
"epoch": 2.20504009163803,
"grad_norm": 7.297660827636719,
"learning_rate": 1.4743317776834961e-05,
"loss": 0.5698,
"step": 5775
},
{
"epoch": 2.214585719740359,
"grad_norm": 1.4060806035995483,
"learning_rate": 1.4566539386225428e-05,
"loss": 0.4576,
"step": 5800
},
{
"epoch": 2.224131347842688,
"grad_norm": 4.472266674041748,
"learning_rate": 1.4389760995615897e-05,
"loss": 0.3482,
"step": 5825
},
{
"epoch": 2.2336769759450172,
"grad_norm": 15.711027145385742,
"learning_rate": 1.4212982605006364e-05,
"loss": 0.3594,
"step": 5850
},
{
"epoch": 2.243222604047346,
"grad_norm": 4.598949909210205,
"learning_rate": 1.4036204214396833e-05,
"loss": 0.4257,
"step": 5875
},
{
"epoch": 2.2527682321496756,
"grad_norm": 3.651421308517456,
"learning_rate": 1.38594258237873e-05,
"loss": 0.2769,
"step": 5900
},
{
"epoch": 2.2623138602520045,
"grad_norm": 0.5090247392654419,
"learning_rate": 1.368264743317777e-05,
"loss": 0.3132,
"step": 5925
},
{
"epoch": 2.271859488354334,
"grad_norm": 0.6600648760795593,
"learning_rate": 1.3505869042568236e-05,
"loss": 0.3831,
"step": 5950
},
{
"epoch": 2.281405116456663,
"grad_norm": 8.323454856872559,
"learning_rate": 1.3329090651958707e-05,
"loss": 0.2981,
"step": 5975
},
{
"epoch": 2.290950744558992,
"grad_norm": 1.1440905332565308,
"learning_rate": 1.3152312261349172e-05,
"loss": 0.399,
"step": 6000
},
{
"epoch": 2.3004963726613212,
"grad_norm": 12.562394142150879,
"learning_rate": 1.2975533870739643e-05,
"loss": 0.5276,
"step": 6025
},
{
"epoch": 2.31004200076365,
"grad_norm": 2.5904836654663086,
"learning_rate": 1.279875548013011e-05,
"loss": 0.3681,
"step": 6050
},
{
"epoch": 2.319587628865979,
"grad_norm": 0.3445684611797333,
"learning_rate": 1.2621977089520579e-05,
"loss": 0.2997,
"step": 6075
},
{
"epoch": 2.3291332569683085,
"grad_norm": 8.17843246459961,
"learning_rate": 1.2445198698911046e-05,
"loss": 0.339,
"step": 6100
},
{
"epoch": 2.3386788850706375,
"grad_norm": 11.221162796020508,
"learning_rate": 1.2268420308301513e-05,
"loss": 0.4197,
"step": 6125
},
{
"epoch": 2.348224513172967,
"grad_norm": 3.742429494857788,
"learning_rate": 1.2091641917691982e-05,
"loss": 0.3571,
"step": 6150
},
{
"epoch": 2.357770141275296,
"grad_norm": 1.143278956413269,
"learning_rate": 1.1914863527082449e-05,
"loss": 0.3652,
"step": 6175
},
{
"epoch": 2.3673157693776252,
"grad_norm": 4.214432239532471,
"learning_rate": 1.1738085136472918e-05,
"loss": 0.3808,
"step": 6200
},
{
"epoch": 2.376861397479954,
"grad_norm": 2.011171579360962,
"learning_rate": 1.1561306745863387e-05,
"loss": 0.3548,
"step": 6225
},
{
"epoch": 2.386407025582283,
"grad_norm": 9.829928398132324,
"learning_rate": 1.1384528355253854e-05,
"loss": 0.4195,
"step": 6250
},
{
"epoch": 2.3959526536846125,
"grad_norm": 3.113661050796509,
"learning_rate": 1.1207749964644323e-05,
"loss": 0.4596,
"step": 6275
},
{
"epoch": 2.4054982817869415,
"grad_norm": 5.8311357498168945,
"learning_rate": 1.103097157403479e-05,
"loss": 0.2321,
"step": 6300
},
{
"epoch": 2.415043909889271,
"grad_norm": 3.50929856300354,
"learning_rate": 1.0854193183425259e-05,
"loss": 0.3387,
"step": 6325
},
{
"epoch": 2.4245895379916,
"grad_norm": 7.452309608459473,
"learning_rate": 1.0677414792815726e-05,
"loss": 0.3926,
"step": 6350
},
{
"epoch": 2.434135166093929,
"grad_norm": 5.029876708984375,
"learning_rate": 1.0500636402206195e-05,
"loss": 0.5649,
"step": 6375
},
{
"epoch": 2.443680794196258,
"grad_norm": 2.944840431213379,
"learning_rate": 1.0323858011596663e-05,
"loss": 0.3464,
"step": 6400
},
{
"epoch": 2.453226422298587,
"grad_norm": 0.7697826027870178,
"learning_rate": 1.014707962098713e-05,
"loss": 0.2758,
"step": 6425
},
{
"epoch": 2.4627720504009165,
"grad_norm": 1.96221125125885,
"learning_rate": 9.9703012303776e-06,
"loss": 0.4873,
"step": 6450
},
{
"epoch": 2.4723176785032455,
"grad_norm": 1.6927087306976318,
"learning_rate": 9.793522839768066e-06,
"loss": 0.3795,
"step": 6475
},
{
"epoch": 2.4818633066055744,
"grad_norm": 7.51122522354126,
"learning_rate": 9.616744449158535e-06,
"loss": 0.4095,
"step": 6500
},
{
"epoch": 2.491408934707904,
"grad_norm": 0.06873871386051178,
"learning_rate": 9.439966058549002e-06,
"loss": 0.4527,
"step": 6525
},
{
"epoch": 2.5009545628102328,
"grad_norm": 0.3129890263080597,
"learning_rate": 9.263187667939471e-06,
"loss": 0.3683,
"step": 6550
},
{
"epoch": 2.510500190912562,
"grad_norm": 0.21196362376213074,
"learning_rate": 9.08640927732994e-06,
"loss": 0.4153,
"step": 6575
},
{
"epoch": 2.520045819014891,
"grad_norm": 0.36541563272476196,
"learning_rate": 8.909630886720407e-06,
"loss": 0.3903,
"step": 6600
},
{
"epoch": 2.5295914471172205,
"grad_norm": 8.781929969787598,
"learning_rate": 8.732852496110876e-06,
"loss": 0.37,
"step": 6625
},
{
"epoch": 2.5391370752195495,
"grad_norm": 3.3782460689544678,
"learning_rate": 8.556074105501343e-06,
"loss": 0.394,
"step": 6650
},
{
"epoch": 2.5486827033218784,
"grad_norm": 2.571878671646118,
"learning_rate": 8.379295714891812e-06,
"loss": 0.3002,
"step": 6675
},
{
"epoch": 2.558228331424208,
"grad_norm": 0.6651304960250854,
"learning_rate": 8.20251732428228e-06,
"loss": 0.2164,
"step": 6700
},
{
"epoch": 2.5677739595265368,
"grad_norm": 0.8912906050682068,
"learning_rate": 8.025738933672748e-06,
"loss": 0.4621,
"step": 6725
},
{
"epoch": 2.5773195876288657,
"grad_norm": 7.837925434112549,
"learning_rate": 7.848960543063217e-06,
"loss": 0.3876,
"step": 6750
},
{
"epoch": 2.586865215731195,
"grad_norm": 12.685746192932129,
"learning_rate": 7.672182152453684e-06,
"loss": 0.3219,
"step": 6775
},
{
"epoch": 2.5964108438335245,
"grad_norm": 0.4601318836212158,
"learning_rate": 7.495403761844153e-06,
"loss": 0.3114,
"step": 6800
},
{
"epoch": 2.6059564719358534,
"grad_norm": 14.409846305847168,
"learning_rate": 7.318625371234621e-06,
"loss": 0.3973,
"step": 6825
},
{
"epoch": 2.6155021000381824,
"grad_norm": 3.2858171463012695,
"learning_rate": 7.141846980625089e-06,
"loss": 0.2882,
"step": 6850
},
{
"epoch": 2.625047728140512,
"grad_norm": 6.5941901206970215,
"learning_rate": 6.965068590015557e-06,
"loss": 0.4302,
"step": 6875
},
{
"epoch": 2.6345933562428407,
"grad_norm": 3.348790407180786,
"learning_rate": 6.788290199406025e-06,
"loss": 0.3969,
"step": 6900
},
{
"epoch": 2.6441389843451697,
"grad_norm": 4.6731672286987305,
"learning_rate": 6.611511808796493e-06,
"loss": 0.4622,
"step": 6925
},
{
"epoch": 2.653684612447499,
"grad_norm": 6.919814586639404,
"learning_rate": 6.434733418186961e-06,
"loss": 0.4073,
"step": 6950
},
{
"epoch": 2.663230240549828,
"grad_norm": 0.3117620646953583,
"learning_rate": 6.25795502757743e-06,
"loss": 0.2941,
"step": 6975
},
{
"epoch": 2.6727758686521574,
"grad_norm": 9.325462341308594,
"learning_rate": 6.081176636967898e-06,
"loss": 0.185,
"step": 7000
},
{
"epoch": 2.6823214967544864,
"grad_norm": 5.677446365356445,
"learning_rate": 5.904398246358366e-06,
"loss": 0.3638,
"step": 7025
},
{
"epoch": 2.691867124856816,
"grad_norm": 9.070378303527832,
"learning_rate": 5.727619855748834e-06,
"loss": 0.2228,
"step": 7050
},
{
"epoch": 2.7014127529591447,
"grad_norm": 0.04487950727343559,
"learning_rate": 5.550841465139302e-06,
"loss": 0.4335,
"step": 7075
},
{
"epoch": 2.7109583810614737,
"grad_norm": 5.6081013679504395,
"learning_rate": 5.37406307452977e-06,
"loss": 0.3926,
"step": 7100
},
{
"epoch": 2.720504009163803,
"grad_norm": 9.620038986206055,
"learning_rate": 5.1972846839202376e-06,
"loss": 0.3399,
"step": 7125
},
{
"epoch": 2.730049637266132,
"grad_norm": 6.807074546813965,
"learning_rate": 5.020506293310706e-06,
"loss": 0.325,
"step": 7150
},
{
"epoch": 2.739595265368461,
"grad_norm": 4.536885738372803,
"learning_rate": 4.843727902701174e-06,
"loss": 0.2597,
"step": 7175
},
{
"epoch": 2.7491408934707904,
"grad_norm": 1.0145533084869385,
"learning_rate": 4.666949512091642e-06,
"loss": 0.3867,
"step": 7200
},
{
"epoch": 2.7586865215731198,
"grad_norm": 7.323050022125244,
"learning_rate": 4.49017112148211e-06,
"loss": 0.3497,
"step": 7225
},
{
"epoch": 2.7682321496754487,
"grad_norm": 4.958881855010986,
"learning_rate": 4.313392730872578e-06,
"loss": 0.3986,
"step": 7250
},
{
"epoch": 2.7777777777777777,
"grad_norm": 0.2114107459783554,
"learning_rate": 4.136614340263046e-06,
"loss": 0.5498,
"step": 7275
},
{
"epoch": 2.787323405880107,
"grad_norm": 5.643067836761475,
"learning_rate": 3.959835949653515e-06,
"loss": 0.4156,
"step": 7300
},
{
"epoch": 2.796869033982436,
"grad_norm": 4.4545392990112305,
"learning_rate": 3.7830575590439827e-06,
"loss": 0.3753,
"step": 7325
},
{
"epoch": 2.806414662084765,
"grad_norm": 1.9792596101760864,
"learning_rate": 3.606279168434451e-06,
"loss": 0.3298,
"step": 7350
},
{
"epoch": 2.8159602901870944,
"grad_norm": 5.517035484313965,
"learning_rate": 3.429500777824919e-06,
"loss": 0.3346,
"step": 7375
},
{
"epoch": 2.8255059182894233,
"grad_norm": 0.44272342324256897,
"learning_rate": 3.252722387215387e-06,
"loss": 0.3044,
"step": 7400
},
{
"epoch": 2.8350515463917527,
"grad_norm": 0.38276228308677673,
"learning_rate": 3.075943996605855e-06,
"loss": 0.4889,
"step": 7425
},
{
"epoch": 2.8445971744940817,
"grad_norm": 8.356722831726074,
"learning_rate": 2.899165605996323e-06,
"loss": 0.4638,
"step": 7450
},
{
"epoch": 2.854142802596411,
"grad_norm": 4.638859748840332,
"learning_rate": 2.722387215386791e-06,
"loss": 0.2617,
"step": 7475
},
{
"epoch": 2.86368843069874,
"grad_norm": 3.0936622619628906,
"learning_rate": 2.5456088247772595e-06,
"loss": 0.2356,
"step": 7500
},
{
"epoch": 2.873234058801069,
"grad_norm": 0.17107409238815308,
"learning_rate": 2.3688304341677275e-06,
"loss": 0.369,
"step": 7525
},
{
"epoch": 2.8827796869033984,
"grad_norm": 3.591745376586914,
"learning_rate": 2.1920520435581955e-06,
"loss": 0.3562,
"step": 7550
},
{
"epoch": 2.8923253150057273,
"grad_norm": 5.612440586090088,
"learning_rate": 2.015273652948664e-06,
"loss": 0.313,
"step": 7575
},
{
"epoch": 2.9018709431080563,
"grad_norm": 6.346649169921875,
"learning_rate": 1.838495262339132e-06,
"loss": 0.3335,
"step": 7600
},
{
"epoch": 2.9114165712103857,
"grad_norm": 5.032191753387451,
"learning_rate": 1.6617168717295999e-06,
"loss": 0.3034,
"step": 7625
},
{
"epoch": 2.9209621993127146,
"grad_norm": 0.9331425428390503,
"learning_rate": 1.4849384811200679e-06,
"loss": 0.3184,
"step": 7650
},
{
"epoch": 2.930507827415044,
"grad_norm": 0.6821147203445435,
"learning_rate": 1.308160090510536e-06,
"loss": 0.4349,
"step": 7675
},
{
"epoch": 2.940053455517373,
"grad_norm": 1.1358468532562256,
"learning_rate": 1.131381699901004e-06,
"loss": 0.3712,
"step": 7700
},
{
"epoch": 2.9495990836197024,
"grad_norm": 0.07542699575424194,
"learning_rate": 9.546033092914723e-07,
"loss": 0.2718,
"step": 7725
},
{
"epoch": 2.9591447117220313,
"grad_norm": 4.9422607421875,
"learning_rate": 7.778249186819404e-07,
"loss": 0.1391,
"step": 7750
},
{
"epoch": 2.9686903398243603,
"grad_norm": 0.34055715799331665,
"learning_rate": 6.010465280724085e-07,
"loss": 0.288,
"step": 7775
},
{
"epoch": 2.9782359679266897,
"grad_norm": 6.950995445251465,
"learning_rate": 4.2426813746287655e-07,
"loss": 0.213,
"step": 7800
},
{
"epoch": 2.9877815960290186,
"grad_norm": 7.761420726776123,
"learning_rate": 2.474897468533447e-07,
"loss": 0.3362,
"step": 7825
},
{
"epoch": 2.997327224131348,
"grad_norm": 0.21115273237228394,
"learning_rate": 7.071135624381275e-08,
"loss": 0.3279,
"step": 7850
},
{
"epoch": 3.0,
"eval_accuracy": 0.9289805269186713,
"eval_f1_macro": 0.7242775388539527,
"eval_f1_micro": 0.9289805269186713,
"eval_f1_weighted": 0.9228868300266074,
"eval_loss": 0.26649972796440125,
"eval_precision_macro": 0.8358131483686612,
"eval_precision_micro": 0.9289805269186713,
"eval_precision_weighted": 0.9228054152229083,
"eval_recall_macro": 0.687057008455895,
"eval_recall_micro": 0.9289805269186713,
"eval_recall_weighted": 0.9289805269186713,
"eval_runtime": 30.8008,
"eval_samples_per_second": 170.061,
"eval_steps_per_second": 10.649,
"step": 7857
}
],
"logging_steps": 25,
"max_steps": 7857,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 5,
"early_stopping_threshold": 0.01
},
"attributes": {
"early_stopping_patience_counter": 0
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.7216523206291792e+19,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}