|
{ |
|
"best_metric": 0.26649972796440125, |
|
"best_model_checkpoint": "classify-google-basic-3/checkpoint-7857", |
|
"epoch": 3.0, |
|
"eval_steps": 500, |
|
"global_step": 7857, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.009545628102329133, |
|
"grad_norm": 10.965401649475098, |
|
"learning_rate": 1.4631043256997457e-06, |
|
"loss": 2.2469, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.019091256204658267, |
|
"grad_norm": 10.05473804473877, |
|
"learning_rate": 3.053435114503817e-06, |
|
"loss": 1.6956, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0286368843069874, |
|
"grad_norm": 16.2789306640625, |
|
"learning_rate": 4.643765903307888e-06, |
|
"loss": 1.2291, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.038182512409316534, |
|
"grad_norm": 11.012917518615723, |
|
"learning_rate": 6.2340966921119596e-06, |
|
"loss": 1.1946, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.047728140511645666, |
|
"grad_norm": 5.637146949768066, |
|
"learning_rate": 7.824427480916032e-06, |
|
"loss": 1.1144, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.0572737686139748, |
|
"grad_norm": 13.986495971679688, |
|
"learning_rate": 9.414758269720102e-06, |
|
"loss": 0.9237, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.06681939671630394, |
|
"grad_norm": 10.580368995666504, |
|
"learning_rate": 1.1005089058524173e-05, |
|
"loss": 0.9343, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.07636502481863307, |
|
"grad_norm": 18.909542083740234, |
|
"learning_rate": 1.2595419847328243e-05, |
|
"loss": 0.7544, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.0859106529209622, |
|
"grad_norm": 9.856317520141602, |
|
"learning_rate": 1.4185750636132317e-05, |
|
"loss": 0.7901, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.09545628102329133, |
|
"grad_norm": 11.235512733459473, |
|
"learning_rate": 1.5776081424936386e-05, |
|
"loss": 0.7968, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.10500190912562046, |
|
"grad_norm": 11.741060256958008, |
|
"learning_rate": 1.736641221374046e-05, |
|
"loss": 0.8511, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.1145475372279496, |
|
"grad_norm": 10.903525352478027, |
|
"learning_rate": 1.895674300254453e-05, |
|
"loss": 0.5989, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.12409316533027873, |
|
"grad_norm": 18.30093002319336, |
|
"learning_rate": 2.05470737913486e-05, |
|
"loss": 0.862, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.13363879343260787, |
|
"grad_norm": 14.775749206542969, |
|
"learning_rate": 2.2137404580152673e-05, |
|
"loss": 0.8368, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.143184421534937, |
|
"grad_norm": 13.009576797485352, |
|
"learning_rate": 2.3727735368956743e-05, |
|
"loss": 0.7068, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.15273004963726614, |
|
"grad_norm": 10.315067291259766, |
|
"learning_rate": 2.5318066157760816e-05, |
|
"loss": 0.6387, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.16227567773959525, |
|
"grad_norm": 9.280678749084473, |
|
"learning_rate": 2.6908396946564886e-05, |
|
"loss": 0.6895, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.1718213058419244, |
|
"grad_norm": 11.16199016571045, |
|
"learning_rate": 2.849872773536896e-05, |
|
"loss": 0.7864, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.18136693394425354, |
|
"grad_norm": 13.201117515563965, |
|
"learning_rate": 3.008905852417303e-05, |
|
"loss": 0.7781, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.19091256204658266, |
|
"grad_norm": 0.5530382394790649, |
|
"learning_rate": 3.16793893129771e-05, |
|
"loss": 0.6258, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.2004581901489118, |
|
"grad_norm": 11.698955535888672, |
|
"learning_rate": 3.326972010178117e-05, |
|
"loss": 0.7465, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.21000381825124093, |
|
"grad_norm": 11.760673522949219, |
|
"learning_rate": 3.4860050890585245e-05, |
|
"loss": 0.96, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.21954944635357007, |
|
"grad_norm": 9.295748710632324, |
|
"learning_rate": 3.645038167938932e-05, |
|
"loss": 0.6984, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.2290950744558992, |
|
"grad_norm": 4.5281548500061035, |
|
"learning_rate": 3.8040712468193385e-05, |
|
"loss": 0.6741, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.23864070255822833, |
|
"grad_norm": 5.818964004516602, |
|
"learning_rate": 3.963104325699746e-05, |
|
"loss": 0.7867, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.24818633066055745, |
|
"grad_norm": 4.941270351409912, |
|
"learning_rate": 4.122137404580153e-05, |
|
"loss": 0.7967, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.25773195876288657, |
|
"grad_norm": 1.6748733520507812, |
|
"learning_rate": 4.28117048346056e-05, |
|
"loss": 0.7384, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.26727758686521574, |
|
"grad_norm": 4.042880535125732, |
|
"learning_rate": 4.440203562340967e-05, |
|
"loss": 0.8029, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.27682321496754486, |
|
"grad_norm": 8.729666709899902, |
|
"learning_rate": 4.5992366412213745e-05, |
|
"loss": 0.6758, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.286368843069874, |
|
"grad_norm": 5.898663520812988, |
|
"learning_rate": 4.758269720101781e-05, |
|
"loss": 0.8116, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.29591447117220315, |
|
"grad_norm": 14.431380271911621, |
|
"learning_rate": 4.9173027989821884e-05, |
|
"loss": 0.7054, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.30546009927453227, |
|
"grad_norm": 9.767568588256836, |
|
"learning_rate": 4.991514637250743e-05, |
|
"loss": 0.6299, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.3150057273768614, |
|
"grad_norm": 2.037874698638916, |
|
"learning_rate": 4.973836798189789e-05, |
|
"loss": 0.574, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.3245513554791905, |
|
"grad_norm": 1.7174073457717896, |
|
"learning_rate": 4.9561589591288364e-05, |
|
"loss": 0.8259, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.3340969835815197, |
|
"grad_norm": 6.078500270843506, |
|
"learning_rate": 4.9384811200678834e-05, |
|
"loss": 0.5543, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.3436426116838488, |
|
"grad_norm": 6.436212062835693, |
|
"learning_rate": 4.92080328100693e-05, |
|
"loss": 0.6127, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.3531882397861779, |
|
"grad_norm": 10.703376770019531, |
|
"learning_rate": 4.903125441945977e-05, |
|
"loss": 0.5309, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.3627338678885071, |
|
"grad_norm": 4.9388651847839355, |
|
"learning_rate": 4.885447602885024e-05, |
|
"loss": 0.8011, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.3722794959908362, |
|
"grad_norm": 7.828670978546143, |
|
"learning_rate": 4.86776976382407e-05, |
|
"loss": 1.0022, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.3818251240931653, |
|
"grad_norm": 9.149444580078125, |
|
"learning_rate": 4.8500919247631174e-05, |
|
"loss": 0.6282, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.39137075219549444, |
|
"grad_norm": 10.579568862915039, |
|
"learning_rate": 4.832414085702164e-05, |
|
"loss": 0.7651, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.4009163802978236, |
|
"grad_norm": 5.5516815185546875, |
|
"learning_rate": 4.814736246641211e-05, |
|
"loss": 0.7698, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.41046200840015273, |
|
"grad_norm": 9.71567153930664, |
|
"learning_rate": 4.797058407580258e-05, |
|
"loss": 0.626, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.42000763650248185, |
|
"grad_norm": 8.423727989196777, |
|
"learning_rate": 4.779380568519304e-05, |
|
"loss": 0.6038, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.42955326460481097, |
|
"grad_norm": 17.86850357055664, |
|
"learning_rate": 4.761702729458351e-05, |
|
"loss": 0.8356, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.43909889270714014, |
|
"grad_norm": 11.408333778381348, |
|
"learning_rate": 4.744024890397398e-05, |
|
"loss": 0.6385, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.44864452080946926, |
|
"grad_norm": 9.420711517333984, |
|
"learning_rate": 4.726347051336445e-05, |
|
"loss": 0.5861, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.4581901489117984, |
|
"grad_norm": 5.354550838470459, |
|
"learning_rate": 4.708669212275492e-05, |
|
"loss": 0.7066, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.46773577701412755, |
|
"grad_norm": 8.641571998596191, |
|
"learning_rate": 4.690991373214538e-05, |
|
"loss": 0.5686, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.47728140511645667, |
|
"grad_norm": 9.010872840881348, |
|
"learning_rate": 4.673313534153586e-05, |
|
"loss": 0.6195, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.4868270332187858, |
|
"grad_norm": 2.913404941558838, |
|
"learning_rate": 4.655635695092632e-05, |
|
"loss": 0.7095, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 0.4963726613211149, |
|
"grad_norm": 8.326951026916504, |
|
"learning_rate": 4.6379578560316786e-05, |
|
"loss": 0.6289, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.5059182894234441, |
|
"grad_norm": 5.571317672729492, |
|
"learning_rate": 4.6202800169707257e-05, |
|
"loss": 0.6153, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 0.5154639175257731, |
|
"grad_norm": 12.953869819641113, |
|
"learning_rate": 4.602602177909773e-05, |
|
"loss": 0.538, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.5250095456281023, |
|
"grad_norm": 11.328479766845703, |
|
"learning_rate": 4.58492433884882e-05, |
|
"loss": 0.6928, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 0.5345551737304315, |
|
"grad_norm": 7.584653854370117, |
|
"learning_rate": 4.567246499787866e-05, |
|
"loss": 0.6932, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.5441008018327605, |
|
"grad_norm": 5.772885799407959, |
|
"learning_rate": 4.5495686607269125e-05, |
|
"loss": 0.7512, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 0.5536464299350897, |
|
"grad_norm": 1.1656907796859741, |
|
"learning_rate": 4.53189082166596e-05, |
|
"loss": 0.5952, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.5631920580374189, |
|
"grad_norm": 6.189632892608643, |
|
"learning_rate": 4.5142129826050066e-05, |
|
"loss": 0.5763, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 0.572737686139748, |
|
"grad_norm": 5.039992332458496, |
|
"learning_rate": 4.496535143544054e-05, |
|
"loss": 0.6456, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.5822833142420771, |
|
"grad_norm": 4.58499813079834, |
|
"learning_rate": 4.4788573044831e-05, |
|
"loss": 0.662, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 0.5918289423444063, |
|
"grad_norm": 9.357542991638184, |
|
"learning_rate": 4.461179465422147e-05, |
|
"loss": 0.6355, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.6013745704467354, |
|
"grad_norm": 4.846512794494629, |
|
"learning_rate": 4.443501626361194e-05, |
|
"loss": 0.5721, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 0.6109201985490645, |
|
"grad_norm": 3.824963331222534, |
|
"learning_rate": 4.4258237873002405e-05, |
|
"loss": 0.611, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.6204658266513937, |
|
"grad_norm": 7.103879928588867, |
|
"learning_rate": 4.4081459482392876e-05, |
|
"loss": 0.3302, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 0.6300114547537228, |
|
"grad_norm": 4.977944374084473, |
|
"learning_rate": 4.3904681091783346e-05, |
|
"loss": 0.7139, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.639557082856052, |
|
"grad_norm": 0.8024571537971497, |
|
"learning_rate": 4.372790270117381e-05, |
|
"loss": 0.6078, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 0.649102710958381, |
|
"grad_norm": 3.4169600009918213, |
|
"learning_rate": 4.355112431056428e-05, |
|
"loss": 0.5165, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.6586483390607102, |
|
"grad_norm": 7.830475330352783, |
|
"learning_rate": 4.3374345919954744e-05, |
|
"loss": 0.7304, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 0.6681939671630394, |
|
"grad_norm": 6.655337810516357, |
|
"learning_rate": 4.3197567529345215e-05, |
|
"loss": 0.5174, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.6777395952653684, |
|
"grad_norm": 3.1303579807281494, |
|
"learning_rate": 4.3020789138735685e-05, |
|
"loss": 0.6751, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 0.6872852233676976, |
|
"grad_norm": 4.309839248657227, |
|
"learning_rate": 4.284401074812615e-05, |
|
"loss": 0.5188, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.6968308514700268, |
|
"grad_norm": 4.880556583404541, |
|
"learning_rate": 4.266723235751662e-05, |
|
"loss": 0.5401, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 0.7063764795723558, |
|
"grad_norm": 7.556941509246826, |
|
"learning_rate": 4.249045396690709e-05, |
|
"loss": 0.4667, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.715922107674685, |
|
"grad_norm": 6.895393371582031, |
|
"learning_rate": 4.2313675576297554e-05, |
|
"loss": 0.7513, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 0.7254677357770142, |
|
"grad_norm": 2.2813351154327393, |
|
"learning_rate": 4.2136897185688025e-05, |
|
"loss": 0.6731, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.7350133638793432, |
|
"grad_norm": 8.803994178771973, |
|
"learning_rate": 4.196011879507849e-05, |
|
"loss": 0.5013, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 0.7445589919816724, |
|
"grad_norm": 1.5111371278762817, |
|
"learning_rate": 4.1783340404468966e-05, |
|
"loss": 0.6204, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.7541046200840015, |
|
"grad_norm": 5.194613933563232, |
|
"learning_rate": 4.160656201385943e-05, |
|
"loss": 0.5988, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 0.7636502481863306, |
|
"grad_norm": 9.08621597290039, |
|
"learning_rate": 4.142978362324989e-05, |
|
"loss": 0.5617, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.7731958762886598, |
|
"grad_norm": 9.078089714050293, |
|
"learning_rate": 4.1253005232640364e-05, |
|
"loss": 0.567, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 0.7827415043909889, |
|
"grad_norm": 9.430150985717773, |
|
"learning_rate": 4.107622684203083e-05, |
|
"loss": 0.5309, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.7922871324933181, |
|
"grad_norm": 4.572437286376953, |
|
"learning_rate": 4.0899448451421305e-05, |
|
"loss": 0.5597, |
|
"step": 2075 |
|
}, |
|
{ |
|
"epoch": 0.8018327605956472, |
|
"grad_norm": 5.556495189666748, |
|
"learning_rate": 4.072267006081177e-05, |
|
"loss": 0.5909, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.8113783886979763, |
|
"grad_norm": 6.426826477050781, |
|
"learning_rate": 4.054589167020223e-05, |
|
"loss": 0.5907, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 0.8209240168003055, |
|
"grad_norm": 5.4699530601501465, |
|
"learning_rate": 4.03691132795927e-05, |
|
"loss": 0.7061, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.8304696449026346, |
|
"grad_norm": 3.550435781478882, |
|
"learning_rate": 4.019233488898317e-05, |
|
"loss": 0.6254, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 0.8400152730049637, |
|
"grad_norm": 3.2830183506011963, |
|
"learning_rate": 4.0015556498373644e-05, |
|
"loss": 0.551, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.8495609011072929, |
|
"grad_norm": 16.018274307250977, |
|
"learning_rate": 3.983877810776411e-05, |
|
"loss": 0.4898, |
|
"step": 2225 |
|
}, |
|
{ |
|
"epoch": 0.8591065292096219, |
|
"grad_norm": 1.2912545204162598, |
|
"learning_rate": 3.966199971715457e-05, |
|
"loss": 0.4058, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.8686521573119511, |
|
"grad_norm": 6.0288896560668945, |
|
"learning_rate": 3.948522132654505e-05, |
|
"loss": 0.6408, |
|
"step": 2275 |
|
}, |
|
{ |
|
"epoch": 0.8781977854142803, |
|
"grad_norm": 3.8952457904815674, |
|
"learning_rate": 3.930844293593551e-05, |
|
"loss": 0.7051, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.8877434135166093, |
|
"grad_norm": 0.5457330346107483, |
|
"learning_rate": 3.913166454532598e-05, |
|
"loss": 0.6625, |
|
"step": 2325 |
|
}, |
|
{ |
|
"epoch": 0.8972890416189385, |
|
"grad_norm": 9.08011245727539, |
|
"learning_rate": 3.895488615471645e-05, |
|
"loss": 0.4587, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.9068346697212677, |
|
"grad_norm": 27.94516372680664, |
|
"learning_rate": 3.877810776410692e-05, |
|
"loss": 0.4202, |
|
"step": 2375 |
|
}, |
|
{ |
|
"epoch": 0.9163802978235968, |
|
"grad_norm": 4.565572261810303, |
|
"learning_rate": 3.860132937349739e-05, |
|
"loss": 0.5311, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.9259259259259259, |
|
"grad_norm": 5.967700004577637, |
|
"learning_rate": 3.842455098288785e-05, |
|
"loss": 0.5012, |
|
"step": 2425 |
|
}, |
|
{ |
|
"epoch": 0.9354715540282551, |
|
"grad_norm": 7.316773414611816, |
|
"learning_rate": 3.824777259227832e-05, |
|
"loss": 0.4585, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.9450171821305842, |
|
"grad_norm": 0.7666218280792236, |
|
"learning_rate": 3.807099420166879e-05, |
|
"loss": 0.5148, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 0.9545628102329133, |
|
"grad_norm": 8.033583641052246, |
|
"learning_rate": 3.7894215811059256e-05, |
|
"loss": 0.5568, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.9641084383352425, |
|
"grad_norm": 3.06626558303833, |
|
"learning_rate": 3.771743742044973e-05, |
|
"loss": 0.5371, |
|
"step": 2525 |
|
}, |
|
{ |
|
"epoch": 0.9736540664375716, |
|
"grad_norm": 4.116180896759033, |
|
"learning_rate": 3.754065902984019e-05, |
|
"loss": 0.5567, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.9831996945399007, |
|
"grad_norm": 5.341053485870361, |
|
"learning_rate": 3.736388063923066e-05, |
|
"loss": 0.5501, |
|
"step": 2575 |
|
}, |
|
{ |
|
"epoch": 0.9927453226422298, |
|
"grad_norm": 0.6019271016120911, |
|
"learning_rate": 3.718710224862113e-05, |
|
"loss": 0.5312, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.8806796487208859, |
|
"eval_f1_macro": 0.5481103080327878, |
|
"eval_f1_micro": 0.8806796487208859, |
|
"eval_f1_weighted": 0.8693359028593166, |
|
"eval_loss": 0.45116129517555237, |
|
"eval_precision_macro": 0.676151706940216, |
|
"eval_precision_micro": 0.8806796487208859, |
|
"eval_precision_weighted": 0.8732388021312899, |
|
"eval_recall_macro": 0.5439759320123948, |
|
"eval_recall_micro": 0.8806796487208859, |
|
"eval_recall_weighted": 0.8806796487208859, |
|
"eval_runtime": 30.8839, |
|
"eval_samples_per_second": 169.603, |
|
"eval_steps_per_second": 10.62, |
|
"step": 2619 |
|
}, |
|
{ |
|
"epoch": 1.002290950744559, |
|
"grad_norm": 6.413674354553223, |
|
"learning_rate": 3.7010323858011595e-05, |
|
"loss": 0.5644, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 1.0118365788468882, |
|
"grad_norm": 3.4250919818878174, |
|
"learning_rate": 3.6833545467402066e-05, |
|
"loss": 0.4757, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 1.0213822069492173, |
|
"grad_norm": 10.983148574829102, |
|
"learning_rate": 3.6656767076792536e-05, |
|
"loss": 0.4419, |
|
"step": 2675 |
|
}, |
|
{ |
|
"epoch": 1.0309278350515463, |
|
"grad_norm": 7.499892234802246, |
|
"learning_rate": 3.6479988686183e-05, |
|
"loss": 0.6204, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.0404734631538755, |
|
"grad_norm": 3.595353603363037, |
|
"learning_rate": 3.630321029557347e-05, |
|
"loss": 0.5175, |
|
"step": 2725 |
|
}, |
|
{ |
|
"epoch": 1.0500190912562046, |
|
"grad_norm": 7.1321539878845215, |
|
"learning_rate": 3.6126431904963934e-05, |
|
"loss": 0.5092, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 1.0595647193585338, |
|
"grad_norm": 6.408048152923584, |
|
"learning_rate": 3.594965351435441e-05, |
|
"loss": 0.6063, |
|
"step": 2775 |
|
}, |
|
{ |
|
"epoch": 1.069110347460863, |
|
"grad_norm": 6.020789623260498, |
|
"learning_rate": 3.5772875123744875e-05, |
|
"loss": 0.6509, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.0786559755631921, |
|
"grad_norm": 5.091155529022217, |
|
"learning_rate": 3.559609673313534e-05, |
|
"loss": 0.5431, |
|
"step": 2825 |
|
}, |
|
{ |
|
"epoch": 1.088201603665521, |
|
"grad_norm": 6.301450729370117, |
|
"learning_rate": 3.541931834252581e-05, |
|
"loss": 0.5184, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 1.0977472317678503, |
|
"grad_norm": 3.534942626953125, |
|
"learning_rate": 3.524253995191628e-05, |
|
"loss": 0.3056, |
|
"step": 2875 |
|
}, |
|
{ |
|
"epoch": 1.1072928598701794, |
|
"grad_norm": 3.7696588039398193, |
|
"learning_rate": 3.506576156130675e-05, |
|
"loss": 0.5706, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 1.1168384879725086, |
|
"grad_norm": 7.325591564178467, |
|
"learning_rate": 3.4888983170697215e-05, |
|
"loss": 0.5182, |
|
"step": 2925 |
|
}, |
|
{ |
|
"epoch": 1.1263841160748378, |
|
"grad_norm": 1.0723004341125488, |
|
"learning_rate": 3.471220478008768e-05, |
|
"loss": 0.4222, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 1.135929744177167, |
|
"grad_norm": 9.240309715270996, |
|
"learning_rate": 3.4535426389478156e-05, |
|
"loss": 0.4109, |
|
"step": 2975 |
|
}, |
|
{ |
|
"epoch": 1.145475372279496, |
|
"grad_norm": 4.310164451599121, |
|
"learning_rate": 3.435864799886862e-05, |
|
"loss": 0.6056, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.155021000381825, |
|
"grad_norm": 0.06030479073524475, |
|
"learning_rate": 3.418186960825909e-05, |
|
"loss": 0.3311, |
|
"step": 3025 |
|
}, |
|
{ |
|
"epoch": 1.1645666284841543, |
|
"grad_norm": 5.938070297241211, |
|
"learning_rate": 3.4005091217649554e-05, |
|
"loss": 0.5636, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 1.1741122565864834, |
|
"grad_norm": 3.0062716007232666, |
|
"learning_rate": 3.3828312827040024e-05, |
|
"loss": 0.3369, |
|
"step": 3075 |
|
}, |
|
{ |
|
"epoch": 1.1836578846888126, |
|
"grad_norm": 9.84875202178955, |
|
"learning_rate": 3.3651534436430495e-05, |
|
"loss": 0.3655, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 1.1932035127911416, |
|
"grad_norm": 0.40218663215637207, |
|
"learning_rate": 3.347475604582096e-05, |
|
"loss": 0.3761, |
|
"step": 3125 |
|
}, |
|
{ |
|
"epoch": 1.2027491408934707, |
|
"grad_norm": 6.9253973960876465, |
|
"learning_rate": 3.329797765521143e-05, |
|
"loss": 0.4421, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 1.2122947689958, |
|
"grad_norm": 12.239662170410156, |
|
"learning_rate": 3.31211992646019e-05, |
|
"loss": 0.63, |
|
"step": 3175 |
|
}, |
|
{ |
|
"epoch": 1.221840397098129, |
|
"grad_norm": 6.089052200317383, |
|
"learning_rate": 3.294442087399236e-05, |
|
"loss": 0.6012, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 1.2313860252004583, |
|
"grad_norm": 3.271571159362793, |
|
"learning_rate": 3.2767642483382834e-05, |
|
"loss": 0.4335, |
|
"step": 3225 |
|
}, |
|
{ |
|
"epoch": 1.2409316533027872, |
|
"grad_norm": 10.282990455627441, |
|
"learning_rate": 3.25908640927733e-05, |
|
"loss": 0.573, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 1.2504772814051164, |
|
"grad_norm": 4.201958179473877, |
|
"learning_rate": 3.241408570216377e-05, |
|
"loss": 0.4124, |
|
"step": 3275 |
|
}, |
|
{ |
|
"epoch": 1.2600229095074456, |
|
"grad_norm": 14.91673469543457, |
|
"learning_rate": 3.223730731155424e-05, |
|
"loss": 0.363, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 1.2695685376097747, |
|
"grad_norm": 3.084709644317627, |
|
"learning_rate": 3.20605289209447e-05, |
|
"loss": 0.5874, |
|
"step": 3325 |
|
}, |
|
{ |
|
"epoch": 1.279114165712104, |
|
"grad_norm": 9.723969459533691, |
|
"learning_rate": 3.188375053033517e-05, |
|
"loss": 0.4661, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 1.2886597938144329, |
|
"grad_norm": 0.48647192120552063, |
|
"learning_rate": 3.1706972139725643e-05, |
|
"loss": 0.5431, |
|
"step": 3375 |
|
}, |
|
{ |
|
"epoch": 1.2982054219167622, |
|
"grad_norm": 1.6292872428894043, |
|
"learning_rate": 3.153019374911611e-05, |
|
"loss": 0.535, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 1.3077510500190912, |
|
"grad_norm": 7.161569595336914, |
|
"learning_rate": 3.135341535850658e-05, |
|
"loss": 0.4232, |
|
"step": 3425 |
|
}, |
|
{ |
|
"epoch": 1.3172966781214204, |
|
"grad_norm": 7.734975337982178, |
|
"learning_rate": 3.117663696789704e-05, |
|
"loss": 0.5533, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 1.3268423062237495, |
|
"grad_norm": 8.083685874938965, |
|
"learning_rate": 3.099985857728752e-05, |
|
"loss": 0.5145, |
|
"step": 3475 |
|
}, |
|
{ |
|
"epoch": 1.3363879343260787, |
|
"grad_norm": 0.11425119638442993, |
|
"learning_rate": 3.082308018667798e-05, |
|
"loss": 0.3885, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.345933562428408, |
|
"grad_norm": 10.0468168258667, |
|
"learning_rate": 3.0646301796068446e-05, |
|
"loss": 0.5237, |
|
"step": 3525 |
|
}, |
|
{ |
|
"epoch": 1.3554791905307368, |
|
"grad_norm": 5.5162763595581055, |
|
"learning_rate": 3.0469523405458917e-05, |
|
"loss": 0.5081, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 1.365024818633066, |
|
"grad_norm": 0.6519923210144043, |
|
"learning_rate": 3.0292745014849387e-05, |
|
"loss": 0.6263, |
|
"step": 3575 |
|
}, |
|
{ |
|
"epoch": 1.3745704467353952, |
|
"grad_norm": 5.725049018859863, |
|
"learning_rate": 3.0115966624239855e-05, |
|
"loss": 0.4986, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 1.3841160748377244, |
|
"grad_norm": 5.0273661613464355, |
|
"learning_rate": 2.993918823363032e-05, |
|
"loss": 0.3676, |
|
"step": 3625 |
|
}, |
|
{ |
|
"epoch": 1.3936617029400535, |
|
"grad_norm": 5.7512288093566895, |
|
"learning_rate": 2.976240984302079e-05, |
|
"loss": 0.5586, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 1.4032073310423825, |
|
"grad_norm": 6.943084716796875, |
|
"learning_rate": 2.958563145241126e-05, |
|
"loss": 0.5734, |
|
"step": 3675 |
|
}, |
|
{ |
|
"epoch": 1.4127529591447117, |
|
"grad_norm": 6.364502906799316, |
|
"learning_rate": 2.9408853061801726e-05, |
|
"loss": 0.6174, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 1.4222985872470408, |
|
"grad_norm": 9.310466766357422, |
|
"learning_rate": 2.9232074671192194e-05, |
|
"loss": 0.4916, |
|
"step": 3725 |
|
}, |
|
{ |
|
"epoch": 1.43184421534937, |
|
"grad_norm": 4.276609897613525, |
|
"learning_rate": 2.905529628058266e-05, |
|
"loss": 0.4982, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 1.4413898434516992, |
|
"grad_norm": 8.0429105758667, |
|
"learning_rate": 2.8878517889973135e-05, |
|
"loss": 0.4593, |
|
"step": 3775 |
|
}, |
|
{ |
|
"epoch": 1.4509354715540281, |
|
"grad_norm": 2.093593120574951, |
|
"learning_rate": 2.8701739499363602e-05, |
|
"loss": 0.5528, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 1.4604810996563573, |
|
"grad_norm": 0.39132124185562134, |
|
"learning_rate": 2.8524961108754066e-05, |
|
"loss": 0.4153, |
|
"step": 3825 |
|
}, |
|
{ |
|
"epoch": 1.4700267277586865, |
|
"grad_norm": 4.517558574676514, |
|
"learning_rate": 2.8348182718144533e-05, |
|
"loss": 0.4191, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 1.4795723558610157, |
|
"grad_norm": 3.886760950088501, |
|
"learning_rate": 2.8171404327535007e-05, |
|
"loss": 0.3378, |
|
"step": 3875 |
|
}, |
|
{ |
|
"epoch": 1.4891179839633448, |
|
"grad_norm": 10.147708892822266, |
|
"learning_rate": 2.7994625936925474e-05, |
|
"loss": 0.4821, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 1.498663612065674, |
|
"grad_norm": 9.77696704864502, |
|
"learning_rate": 2.781784754631594e-05, |
|
"loss": 0.5534, |
|
"step": 3925 |
|
}, |
|
{ |
|
"epoch": 1.5082092401680032, |
|
"grad_norm": 8.54476547241211, |
|
"learning_rate": 2.7641069155706405e-05, |
|
"loss": 0.5286, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 1.5177548682703321, |
|
"grad_norm": 4.897209644317627, |
|
"learning_rate": 2.746429076509688e-05, |
|
"loss": 0.5163, |
|
"step": 3975 |
|
}, |
|
{ |
|
"epoch": 1.5273004963726613, |
|
"grad_norm": 6.026910781860352, |
|
"learning_rate": 2.7294583510111726e-05, |
|
"loss": 0.5637, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.5368461244749905, |
|
"grad_norm": 1.9394190311431885, |
|
"learning_rate": 2.7117805119502193e-05, |
|
"loss": 0.4337, |
|
"step": 4025 |
|
}, |
|
{ |
|
"epoch": 1.5463917525773194, |
|
"grad_norm": 10.731663703918457, |
|
"learning_rate": 2.694102672889266e-05, |
|
"loss": 0.365, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 1.5559373806796488, |
|
"grad_norm": 2.1111578941345215, |
|
"learning_rate": 2.676424833828313e-05, |
|
"loss": 0.4874, |
|
"step": 4075 |
|
}, |
|
{ |
|
"epoch": 1.5654830087819778, |
|
"grad_norm": 8.03298282623291, |
|
"learning_rate": 2.6587469947673598e-05, |
|
"loss": 0.5184, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 1.575028636884307, |
|
"grad_norm": 4.874744415283203, |
|
"learning_rate": 2.6410691557064065e-05, |
|
"loss": 0.6084, |
|
"step": 4125 |
|
}, |
|
{ |
|
"epoch": 1.5845742649866361, |
|
"grad_norm": 1.2749981880187988, |
|
"learning_rate": 2.6233913166454532e-05, |
|
"loss": 0.5066, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 1.5941198930889653, |
|
"grad_norm": 6.089448928833008, |
|
"learning_rate": 2.6057134775845003e-05, |
|
"loss": 0.4774, |
|
"step": 4175 |
|
}, |
|
{ |
|
"epoch": 1.6036655211912945, |
|
"grad_norm": 6.618624687194824, |
|
"learning_rate": 2.588035638523547e-05, |
|
"loss": 0.5735, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 1.6132111492936234, |
|
"grad_norm": 3.0154378414154053, |
|
"learning_rate": 2.5703577994625937e-05, |
|
"loss": 0.4776, |
|
"step": 4225 |
|
}, |
|
{ |
|
"epoch": 1.6227567773959528, |
|
"grad_norm": 8.030562400817871, |
|
"learning_rate": 2.5526799604016404e-05, |
|
"loss": 0.4864, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 1.6323024054982818, |
|
"grad_norm": 8.929983139038086, |
|
"learning_rate": 2.535002121340688e-05, |
|
"loss": 0.5598, |
|
"step": 4275 |
|
}, |
|
{ |
|
"epoch": 1.641848033600611, |
|
"grad_norm": 2.6678271293640137, |
|
"learning_rate": 2.5173242822797345e-05, |
|
"loss": 0.5868, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 1.65139366170294, |
|
"grad_norm": 6.8634819984436035, |
|
"learning_rate": 2.499646443218781e-05, |
|
"loss": 0.4856, |
|
"step": 4325 |
|
}, |
|
{ |
|
"epoch": 1.660939289805269, |
|
"grad_norm": 1.4916170835494995, |
|
"learning_rate": 2.481968604157828e-05, |
|
"loss": 0.3447, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 1.6704849179075985, |
|
"grad_norm": 5.674633979797363, |
|
"learning_rate": 2.4642907650968747e-05, |
|
"loss": 0.3216, |
|
"step": 4375 |
|
}, |
|
{ |
|
"epoch": 1.6800305460099274, |
|
"grad_norm": 9.564282417297363, |
|
"learning_rate": 2.4466129260359214e-05, |
|
"loss": 0.4884, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 1.6895761741122566, |
|
"grad_norm": 6.336232662200928, |
|
"learning_rate": 2.4289350869749685e-05, |
|
"loss": 0.4915, |
|
"step": 4425 |
|
}, |
|
{ |
|
"epoch": 1.6991218022145858, |
|
"grad_norm": 4.912391185760498, |
|
"learning_rate": 2.411257247914015e-05, |
|
"loss": 0.3279, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 1.7086674303169147, |
|
"grad_norm": 2.542475938796997, |
|
"learning_rate": 2.393579408853062e-05, |
|
"loss": 0.5385, |
|
"step": 4475 |
|
}, |
|
{ |
|
"epoch": 1.718213058419244, |
|
"grad_norm": 6.017487049102783, |
|
"learning_rate": 2.3759015697921086e-05, |
|
"loss": 0.5516, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.727758686521573, |
|
"grad_norm": 7.965273380279541, |
|
"learning_rate": 2.3582237307311557e-05, |
|
"loss": 0.3421, |
|
"step": 4525 |
|
}, |
|
{ |
|
"epoch": 1.7373043146239022, |
|
"grad_norm": 0.6928049325942993, |
|
"learning_rate": 2.3405458916702024e-05, |
|
"loss": 0.5857, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 1.7468499427262314, |
|
"grad_norm": 2.849928379058838, |
|
"learning_rate": 2.322868052609249e-05, |
|
"loss": 0.3379, |
|
"step": 4575 |
|
}, |
|
{ |
|
"epoch": 1.7563955708285606, |
|
"grad_norm": 17.360977172851562, |
|
"learning_rate": 2.3051902135482958e-05, |
|
"loss": 0.5657, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 1.7659411989308897, |
|
"grad_norm": 4.397252559661865, |
|
"learning_rate": 2.287512374487343e-05, |
|
"loss": 0.4145, |
|
"step": 4625 |
|
}, |
|
{ |
|
"epoch": 1.7754868270332187, |
|
"grad_norm": 1.0898733139038086, |
|
"learning_rate": 2.2698345354263896e-05, |
|
"loss": 0.3977, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 1.7850324551355479, |
|
"grad_norm": 5.723209381103516, |
|
"learning_rate": 2.2521566963654363e-05, |
|
"loss": 0.3961, |
|
"step": 4675 |
|
}, |
|
{ |
|
"epoch": 1.794578083237877, |
|
"grad_norm": 6.174077033996582, |
|
"learning_rate": 2.234478857304483e-05, |
|
"loss": 0.3459, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 1.8041237113402062, |
|
"grad_norm": 4.179036617279053, |
|
"learning_rate": 2.21680101824353e-05, |
|
"loss": 0.3503, |
|
"step": 4725 |
|
}, |
|
{ |
|
"epoch": 1.8136693394425354, |
|
"grad_norm": 13.33944320678711, |
|
"learning_rate": 2.1991231791825768e-05, |
|
"loss": 0.5015, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 1.8232149675448643, |
|
"grad_norm": 3.4436299800872803, |
|
"learning_rate": 2.1814453401216238e-05, |
|
"loss": 0.5311, |
|
"step": 4775 |
|
}, |
|
{ |
|
"epoch": 1.8327605956471937, |
|
"grad_norm": 4.949787616729736, |
|
"learning_rate": 2.1637675010606702e-05, |
|
"loss": 0.3274, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 1.8423062237495227, |
|
"grad_norm": 3.8653883934020996, |
|
"learning_rate": 2.1460896619997172e-05, |
|
"loss": 0.4668, |
|
"step": 4825 |
|
}, |
|
{ |
|
"epoch": 1.8518518518518519, |
|
"grad_norm": 3.763760566711426, |
|
"learning_rate": 2.128411822938764e-05, |
|
"loss": 0.3929, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 1.861397479954181, |
|
"grad_norm": 0.6590569019317627, |
|
"learning_rate": 2.110733983877811e-05, |
|
"loss": 0.3992, |
|
"step": 4875 |
|
}, |
|
{ |
|
"epoch": 1.87094310805651, |
|
"grad_norm": 4.360814571380615, |
|
"learning_rate": 2.0930561448168577e-05, |
|
"loss": 0.4132, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 1.8804887361588394, |
|
"grad_norm": 0.8837212324142456, |
|
"learning_rate": 2.0753783057559044e-05, |
|
"loss": 0.559, |
|
"step": 4925 |
|
}, |
|
{ |
|
"epoch": 1.8900343642611683, |
|
"grad_norm": 4.778452396392822, |
|
"learning_rate": 2.057700466694951e-05, |
|
"loss": 0.5726, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 1.8995799923634975, |
|
"grad_norm": 4.478198528289795, |
|
"learning_rate": 2.0400226276339982e-05, |
|
"loss": 0.4385, |
|
"step": 4975 |
|
}, |
|
{ |
|
"epoch": 1.9091256204658267, |
|
"grad_norm": 1.186051607131958, |
|
"learning_rate": 2.022344788573045e-05, |
|
"loss": 0.4041, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.9186712485681556, |
|
"grad_norm": 10.195874214172363, |
|
"learning_rate": 2.0046669495120916e-05, |
|
"loss": 0.3768, |
|
"step": 5025 |
|
}, |
|
{ |
|
"epoch": 1.928216876670485, |
|
"grad_norm": 4.346467018127441, |
|
"learning_rate": 1.9869891104511383e-05, |
|
"loss": 0.601, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 1.937762504772814, |
|
"grad_norm": 3.2331788539886475, |
|
"learning_rate": 1.9693112713901854e-05, |
|
"loss": 0.4731, |
|
"step": 5075 |
|
}, |
|
{ |
|
"epoch": 1.9473081328751431, |
|
"grad_norm": 4.1959381103515625, |
|
"learning_rate": 1.951633432329232e-05, |
|
"loss": 0.3686, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 1.9568537609774723, |
|
"grad_norm": 0.6196494698524475, |
|
"learning_rate": 1.933955593268279e-05, |
|
"loss": 0.3442, |
|
"step": 5125 |
|
}, |
|
{ |
|
"epoch": 1.9663993890798015, |
|
"grad_norm": 0.898938000202179, |
|
"learning_rate": 1.9162777542073255e-05, |
|
"loss": 0.2922, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 1.9759450171821307, |
|
"grad_norm": 6.5429768562316895, |
|
"learning_rate": 1.8985999151463726e-05, |
|
"loss": 0.4482, |
|
"step": 5175 |
|
}, |
|
{ |
|
"epoch": 1.9854906452844596, |
|
"grad_norm": 6.415786266326904, |
|
"learning_rate": 1.8809220760854193e-05, |
|
"loss": 0.5297, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 1.995036273386789, |
|
"grad_norm": 5.376886367797852, |
|
"learning_rate": 1.8632442370244664e-05, |
|
"loss": 0.4509, |
|
"step": 5225 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.9177166857579229, |
|
"eval_f1_macro": 0.6752206415980724, |
|
"eval_f1_micro": 0.9177166857579229, |
|
"eval_f1_weighted": 0.9116556748785886, |
|
"eval_loss": 0.302744597196579, |
|
"eval_precision_macro": 0.7389088670621011, |
|
"eval_precision_micro": 0.9177166857579229, |
|
"eval_precision_weighted": 0.9087828192763279, |
|
"eval_recall_macro": 0.6430310667333932, |
|
"eval_recall_micro": 0.9177166857579229, |
|
"eval_recall_weighted": 0.9177166857579229, |
|
"eval_runtime": 30.839, |
|
"eval_samples_per_second": 169.85, |
|
"eval_steps_per_second": 10.636, |
|
"step": 5238 |
|
}, |
|
{ |
|
"epoch": 2.004581901489118, |
|
"grad_norm": 4.516357898712158, |
|
"learning_rate": 1.845566397963513e-05, |
|
"loss": 0.5612, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 2.014127529591447, |
|
"grad_norm": 0.7673831582069397, |
|
"learning_rate": 1.8278885589025598e-05, |
|
"loss": 0.2938, |
|
"step": 5275 |
|
}, |
|
{ |
|
"epoch": 2.0236731576937763, |
|
"grad_norm": 5.432100296020508, |
|
"learning_rate": 1.8102107198416065e-05, |
|
"loss": 0.4495, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 2.0332187857961053, |
|
"grad_norm": 0.7653512954711914, |
|
"learning_rate": 1.7925328807806536e-05, |
|
"loss": 0.2454, |
|
"step": 5325 |
|
}, |
|
{ |
|
"epoch": 2.0427644138984347, |
|
"grad_norm": 0.0857914537191391, |
|
"learning_rate": 1.7748550417197003e-05, |
|
"loss": 0.3485, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 2.0523100420007636, |
|
"grad_norm": 7.614650249481201, |
|
"learning_rate": 1.757177202658747e-05, |
|
"loss": 0.44, |
|
"step": 5375 |
|
}, |
|
{ |
|
"epoch": 2.0618556701030926, |
|
"grad_norm": 7.014926433563232, |
|
"learning_rate": 1.7394993635977937e-05, |
|
"loss": 0.462, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 2.071401298205422, |
|
"grad_norm": 3.3274178504943848, |
|
"learning_rate": 1.7218215245368408e-05, |
|
"loss": 0.3796, |
|
"step": 5425 |
|
}, |
|
{ |
|
"epoch": 2.080946926307751, |
|
"grad_norm": 1.7487058639526367, |
|
"learning_rate": 1.7041436854758875e-05, |
|
"loss": 0.4075, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 2.0904925544100803, |
|
"grad_norm": 1.2080419063568115, |
|
"learning_rate": 1.6864658464149345e-05, |
|
"loss": 0.3875, |
|
"step": 5475 |
|
}, |
|
{ |
|
"epoch": 2.1000381825124093, |
|
"grad_norm": 10.531205177307129, |
|
"learning_rate": 1.668788007353981e-05, |
|
"loss": 0.4767, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 2.1095838106147387, |
|
"grad_norm": 6.064085960388184, |
|
"learning_rate": 1.651110168293028e-05, |
|
"loss": 0.5239, |
|
"step": 5525 |
|
}, |
|
{ |
|
"epoch": 2.1191294387170676, |
|
"grad_norm": 0.7669031620025635, |
|
"learning_rate": 1.6334323292320747e-05, |
|
"loss": 0.4222, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 2.1286750668193966, |
|
"grad_norm": 1.6597367525100708, |
|
"learning_rate": 1.6157544901711217e-05, |
|
"loss": 0.378, |
|
"step": 5575 |
|
}, |
|
{ |
|
"epoch": 2.138220694921726, |
|
"grad_norm": 5.749689102172852, |
|
"learning_rate": 1.5980766511101684e-05, |
|
"loss": 0.409, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 2.147766323024055, |
|
"grad_norm": 3.9973812103271484, |
|
"learning_rate": 1.580398812049215e-05, |
|
"loss": 0.387, |
|
"step": 5625 |
|
}, |
|
{ |
|
"epoch": 2.1573119511263843, |
|
"grad_norm": 5.586762428283691, |
|
"learning_rate": 1.562720972988262e-05, |
|
"loss": 0.476, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 2.1668575792287132, |
|
"grad_norm": 4.630229949951172, |
|
"learning_rate": 1.545043133927309e-05, |
|
"loss": 0.4483, |
|
"step": 5675 |
|
}, |
|
{ |
|
"epoch": 2.176403207331042, |
|
"grad_norm": 10.38274097442627, |
|
"learning_rate": 1.5273652948663556e-05, |
|
"loss": 0.3905, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 2.1859488354333716, |
|
"grad_norm": 6.280734539031982, |
|
"learning_rate": 1.5096874558054025e-05, |
|
"loss": 0.4345, |
|
"step": 5725 |
|
}, |
|
{ |
|
"epoch": 2.1954944635357005, |
|
"grad_norm": 0.5489762425422668, |
|
"learning_rate": 1.4920096167444492e-05, |
|
"loss": 0.3986, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 2.20504009163803, |
|
"grad_norm": 7.297660827636719, |
|
"learning_rate": 1.4743317776834961e-05, |
|
"loss": 0.5698, |
|
"step": 5775 |
|
}, |
|
{ |
|
"epoch": 2.214585719740359, |
|
"grad_norm": 1.4060806035995483, |
|
"learning_rate": 1.4566539386225428e-05, |
|
"loss": 0.4576, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 2.224131347842688, |
|
"grad_norm": 4.472266674041748, |
|
"learning_rate": 1.4389760995615897e-05, |
|
"loss": 0.3482, |
|
"step": 5825 |
|
}, |
|
{ |
|
"epoch": 2.2336769759450172, |
|
"grad_norm": 15.711027145385742, |
|
"learning_rate": 1.4212982605006364e-05, |
|
"loss": 0.3594, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 2.243222604047346, |
|
"grad_norm": 4.598949909210205, |
|
"learning_rate": 1.4036204214396833e-05, |
|
"loss": 0.4257, |
|
"step": 5875 |
|
}, |
|
{ |
|
"epoch": 2.2527682321496756, |
|
"grad_norm": 3.651421308517456, |
|
"learning_rate": 1.38594258237873e-05, |
|
"loss": 0.2769, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 2.2623138602520045, |
|
"grad_norm": 0.5090247392654419, |
|
"learning_rate": 1.368264743317777e-05, |
|
"loss": 0.3132, |
|
"step": 5925 |
|
}, |
|
{ |
|
"epoch": 2.271859488354334, |
|
"grad_norm": 0.6600648760795593, |
|
"learning_rate": 1.3505869042568236e-05, |
|
"loss": 0.3831, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 2.281405116456663, |
|
"grad_norm": 8.323454856872559, |
|
"learning_rate": 1.3329090651958707e-05, |
|
"loss": 0.2981, |
|
"step": 5975 |
|
}, |
|
{ |
|
"epoch": 2.290950744558992, |
|
"grad_norm": 1.1440905332565308, |
|
"learning_rate": 1.3152312261349172e-05, |
|
"loss": 0.399, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.3004963726613212, |
|
"grad_norm": 12.562394142150879, |
|
"learning_rate": 1.2975533870739643e-05, |
|
"loss": 0.5276, |
|
"step": 6025 |
|
}, |
|
{ |
|
"epoch": 2.31004200076365, |
|
"grad_norm": 2.5904836654663086, |
|
"learning_rate": 1.279875548013011e-05, |
|
"loss": 0.3681, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 2.319587628865979, |
|
"grad_norm": 0.3445684611797333, |
|
"learning_rate": 1.2621977089520579e-05, |
|
"loss": 0.2997, |
|
"step": 6075 |
|
}, |
|
{ |
|
"epoch": 2.3291332569683085, |
|
"grad_norm": 8.17843246459961, |
|
"learning_rate": 1.2445198698911046e-05, |
|
"loss": 0.339, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 2.3386788850706375, |
|
"grad_norm": 11.221162796020508, |
|
"learning_rate": 1.2268420308301513e-05, |
|
"loss": 0.4197, |
|
"step": 6125 |
|
}, |
|
{ |
|
"epoch": 2.348224513172967, |
|
"grad_norm": 3.742429494857788, |
|
"learning_rate": 1.2091641917691982e-05, |
|
"loss": 0.3571, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 2.357770141275296, |
|
"grad_norm": 1.143278956413269, |
|
"learning_rate": 1.1914863527082449e-05, |
|
"loss": 0.3652, |
|
"step": 6175 |
|
}, |
|
{ |
|
"epoch": 2.3673157693776252, |
|
"grad_norm": 4.214432239532471, |
|
"learning_rate": 1.1738085136472918e-05, |
|
"loss": 0.3808, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 2.376861397479954, |
|
"grad_norm": 2.011171579360962, |
|
"learning_rate": 1.1561306745863387e-05, |
|
"loss": 0.3548, |
|
"step": 6225 |
|
}, |
|
{ |
|
"epoch": 2.386407025582283, |
|
"grad_norm": 9.829928398132324, |
|
"learning_rate": 1.1384528355253854e-05, |
|
"loss": 0.4195, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 2.3959526536846125, |
|
"grad_norm": 3.113661050796509, |
|
"learning_rate": 1.1207749964644323e-05, |
|
"loss": 0.4596, |
|
"step": 6275 |
|
}, |
|
{ |
|
"epoch": 2.4054982817869415, |
|
"grad_norm": 5.8311357498168945, |
|
"learning_rate": 1.103097157403479e-05, |
|
"loss": 0.2321, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 2.415043909889271, |
|
"grad_norm": 3.50929856300354, |
|
"learning_rate": 1.0854193183425259e-05, |
|
"loss": 0.3387, |
|
"step": 6325 |
|
}, |
|
{ |
|
"epoch": 2.4245895379916, |
|
"grad_norm": 7.452309608459473, |
|
"learning_rate": 1.0677414792815726e-05, |
|
"loss": 0.3926, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 2.434135166093929, |
|
"grad_norm": 5.029876708984375, |
|
"learning_rate": 1.0500636402206195e-05, |
|
"loss": 0.5649, |
|
"step": 6375 |
|
}, |
|
{ |
|
"epoch": 2.443680794196258, |
|
"grad_norm": 2.944840431213379, |
|
"learning_rate": 1.0323858011596663e-05, |
|
"loss": 0.3464, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 2.453226422298587, |
|
"grad_norm": 0.7697826027870178, |
|
"learning_rate": 1.014707962098713e-05, |
|
"loss": 0.2758, |
|
"step": 6425 |
|
}, |
|
{ |
|
"epoch": 2.4627720504009165, |
|
"grad_norm": 1.96221125125885, |
|
"learning_rate": 9.9703012303776e-06, |
|
"loss": 0.4873, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 2.4723176785032455, |
|
"grad_norm": 1.6927087306976318, |
|
"learning_rate": 9.793522839768066e-06, |
|
"loss": 0.3795, |
|
"step": 6475 |
|
}, |
|
{ |
|
"epoch": 2.4818633066055744, |
|
"grad_norm": 7.51122522354126, |
|
"learning_rate": 9.616744449158535e-06, |
|
"loss": 0.4095, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 2.491408934707904, |
|
"grad_norm": 0.06873871386051178, |
|
"learning_rate": 9.439966058549002e-06, |
|
"loss": 0.4527, |
|
"step": 6525 |
|
}, |
|
{ |
|
"epoch": 2.5009545628102328, |
|
"grad_norm": 0.3129890263080597, |
|
"learning_rate": 9.263187667939471e-06, |
|
"loss": 0.3683, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 2.510500190912562, |
|
"grad_norm": 0.21196362376213074, |
|
"learning_rate": 9.08640927732994e-06, |
|
"loss": 0.4153, |
|
"step": 6575 |
|
}, |
|
{ |
|
"epoch": 2.520045819014891, |
|
"grad_norm": 0.36541563272476196, |
|
"learning_rate": 8.909630886720407e-06, |
|
"loss": 0.3903, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 2.5295914471172205, |
|
"grad_norm": 8.781929969787598, |
|
"learning_rate": 8.732852496110876e-06, |
|
"loss": 0.37, |
|
"step": 6625 |
|
}, |
|
{ |
|
"epoch": 2.5391370752195495, |
|
"grad_norm": 3.3782460689544678, |
|
"learning_rate": 8.556074105501343e-06, |
|
"loss": 0.394, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 2.5486827033218784, |
|
"grad_norm": 2.571878671646118, |
|
"learning_rate": 8.379295714891812e-06, |
|
"loss": 0.3002, |
|
"step": 6675 |
|
}, |
|
{ |
|
"epoch": 2.558228331424208, |
|
"grad_norm": 0.6651304960250854, |
|
"learning_rate": 8.20251732428228e-06, |
|
"loss": 0.2164, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 2.5677739595265368, |
|
"grad_norm": 0.8912906050682068, |
|
"learning_rate": 8.025738933672748e-06, |
|
"loss": 0.4621, |
|
"step": 6725 |
|
}, |
|
{ |
|
"epoch": 2.5773195876288657, |
|
"grad_norm": 7.837925434112549, |
|
"learning_rate": 7.848960543063217e-06, |
|
"loss": 0.3876, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 2.586865215731195, |
|
"grad_norm": 12.685746192932129, |
|
"learning_rate": 7.672182152453684e-06, |
|
"loss": 0.3219, |
|
"step": 6775 |
|
}, |
|
{ |
|
"epoch": 2.5964108438335245, |
|
"grad_norm": 0.4601318836212158, |
|
"learning_rate": 7.495403761844153e-06, |
|
"loss": 0.3114, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 2.6059564719358534, |
|
"grad_norm": 14.409846305847168, |
|
"learning_rate": 7.318625371234621e-06, |
|
"loss": 0.3973, |
|
"step": 6825 |
|
}, |
|
{ |
|
"epoch": 2.6155021000381824, |
|
"grad_norm": 3.2858171463012695, |
|
"learning_rate": 7.141846980625089e-06, |
|
"loss": 0.2882, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 2.625047728140512, |
|
"grad_norm": 6.5941901206970215, |
|
"learning_rate": 6.965068590015557e-06, |
|
"loss": 0.4302, |
|
"step": 6875 |
|
}, |
|
{ |
|
"epoch": 2.6345933562428407, |
|
"grad_norm": 3.348790407180786, |
|
"learning_rate": 6.788290199406025e-06, |
|
"loss": 0.3969, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 2.6441389843451697, |
|
"grad_norm": 4.6731672286987305, |
|
"learning_rate": 6.611511808796493e-06, |
|
"loss": 0.4622, |
|
"step": 6925 |
|
}, |
|
{ |
|
"epoch": 2.653684612447499, |
|
"grad_norm": 6.919814586639404, |
|
"learning_rate": 6.434733418186961e-06, |
|
"loss": 0.4073, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 2.663230240549828, |
|
"grad_norm": 0.3117620646953583, |
|
"learning_rate": 6.25795502757743e-06, |
|
"loss": 0.2941, |
|
"step": 6975 |
|
}, |
|
{ |
|
"epoch": 2.6727758686521574, |
|
"grad_norm": 9.325462341308594, |
|
"learning_rate": 6.081176636967898e-06, |
|
"loss": 0.185, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 2.6823214967544864, |
|
"grad_norm": 5.677446365356445, |
|
"learning_rate": 5.904398246358366e-06, |
|
"loss": 0.3638, |
|
"step": 7025 |
|
}, |
|
{ |
|
"epoch": 2.691867124856816, |
|
"grad_norm": 9.070378303527832, |
|
"learning_rate": 5.727619855748834e-06, |
|
"loss": 0.2228, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 2.7014127529591447, |
|
"grad_norm": 0.04487950727343559, |
|
"learning_rate": 5.550841465139302e-06, |
|
"loss": 0.4335, |
|
"step": 7075 |
|
}, |
|
{ |
|
"epoch": 2.7109583810614737, |
|
"grad_norm": 5.6081013679504395, |
|
"learning_rate": 5.37406307452977e-06, |
|
"loss": 0.3926, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 2.720504009163803, |
|
"grad_norm": 9.620038986206055, |
|
"learning_rate": 5.1972846839202376e-06, |
|
"loss": 0.3399, |
|
"step": 7125 |
|
}, |
|
{ |
|
"epoch": 2.730049637266132, |
|
"grad_norm": 6.807074546813965, |
|
"learning_rate": 5.020506293310706e-06, |
|
"loss": 0.325, |
|
"step": 7150 |
|
}, |
|
{ |
|
"epoch": 2.739595265368461, |
|
"grad_norm": 4.536885738372803, |
|
"learning_rate": 4.843727902701174e-06, |
|
"loss": 0.2597, |
|
"step": 7175 |
|
}, |
|
{ |
|
"epoch": 2.7491408934707904, |
|
"grad_norm": 1.0145533084869385, |
|
"learning_rate": 4.666949512091642e-06, |
|
"loss": 0.3867, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 2.7586865215731198, |
|
"grad_norm": 7.323050022125244, |
|
"learning_rate": 4.49017112148211e-06, |
|
"loss": 0.3497, |
|
"step": 7225 |
|
}, |
|
{ |
|
"epoch": 2.7682321496754487, |
|
"grad_norm": 4.958881855010986, |
|
"learning_rate": 4.313392730872578e-06, |
|
"loss": 0.3986, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 2.7777777777777777, |
|
"grad_norm": 0.2114107459783554, |
|
"learning_rate": 4.136614340263046e-06, |
|
"loss": 0.5498, |
|
"step": 7275 |
|
}, |
|
{ |
|
"epoch": 2.787323405880107, |
|
"grad_norm": 5.643067836761475, |
|
"learning_rate": 3.959835949653515e-06, |
|
"loss": 0.4156, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 2.796869033982436, |
|
"grad_norm": 4.4545392990112305, |
|
"learning_rate": 3.7830575590439827e-06, |
|
"loss": 0.3753, |
|
"step": 7325 |
|
}, |
|
{ |
|
"epoch": 2.806414662084765, |
|
"grad_norm": 1.9792596101760864, |
|
"learning_rate": 3.606279168434451e-06, |
|
"loss": 0.3298, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 2.8159602901870944, |
|
"grad_norm": 5.517035484313965, |
|
"learning_rate": 3.429500777824919e-06, |
|
"loss": 0.3346, |
|
"step": 7375 |
|
}, |
|
{ |
|
"epoch": 2.8255059182894233, |
|
"grad_norm": 0.44272342324256897, |
|
"learning_rate": 3.252722387215387e-06, |
|
"loss": 0.3044, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 2.8350515463917527, |
|
"grad_norm": 0.38276228308677673, |
|
"learning_rate": 3.075943996605855e-06, |
|
"loss": 0.4889, |
|
"step": 7425 |
|
}, |
|
{ |
|
"epoch": 2.8445971744940817, |
|
"grad_norm": 8.356722831726074, |
|
"learning_rate": 2.899165605996323e-06, |
|
"loss": 0.4638, |
|
"step": 7450 |
|
}, |
|
{ |
|
"epoch": 2.854142802596411, |
|
"grad_norm": 4.638859748840332, |
|
"learning_rate": 2.722387215386791e-06, |
|
"loss": 0.2617, |
|
"step": 7475 |
|
}, |
|
{ |
|
"epoch": 2.86368843069874, |
|
"grad_norm": 3.0936622619628906, |
|
"learning_rate": 2.5456088247772595e-06, |
|
"loss": 0.2356, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 2.873234058801069, |
|
"grad_norm": 0.17107409238815308, |
|
"learning_rate": 2.3688304341677275e-06, |
|
"loss": 0.369, |
|
"step": 7525 |
|
}, |
|
{ |
|
"epoch": 2.8827796869033984, |
|
"grad_norm": 3.591745376586914, |
|
"learning_rate": 2.1920520435581955e-06, |
|
"loss": 0.3562, |
|
"step": 7550 |
|
}, |
|
{ |
|
"epoch": 2.8923253150057273, |
|
"grad_norm": 5.612440586090088, |
|
"learning_rate": 2.015273652948664e-06, |
|
"loss": 0.313, |
|
"step": 7575 |
|
}, |
|
{ |
|
"epoch": 2.9018709431080563, |
|
"grad_norm": 6.346649169921875, |
|
"learning_rate": 1.838495262339132e-06, |
|
"loss": 0.3335, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 2.9114165712103857, |
|
"grad_norm": 5.032191753387451, |
|
"learning_rate": 1.6617168717295999e-06, |
|
"loss": 0.3034, |
|
"step": 7625 |
|
}, |
|
{ |
|
"epoch": 2.9209621993127146, |
|
"grad_norm": 0.9331425428390503, |
|
"learning_rate": 1.4849384811200679e-06, |
|
"loss": 0.3184, |
|
"step": 7650 |
|
}, |
|
{ |
|
"epoch": 2.930507827415044, |
|
"grad_norm": 0.6821147203445435, |
|
"learning_rate": 1.308160090510536e-06, |
|
"loss": 0.4349, |
|
"step": 7675 |
|
}, |
|
{ |
|
"epoch": 2.940053455517373, |
|
"grad_norm": 1.1358468532562256, |
|
"learning_rate": 1.131381699901004e-06, |
|
"loss": 0.3712, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 2.9495990836197024, |
|
"grad_norm": 0.07542699575424194, |
|
"learning_rate": 9.546033092914723e-07, |
|
"loss": 0.2718, |
|
"step": 7725 |
|
}, |
|
{ |
|
"epoch": 2.9591447117220313, |
|
"grad_norm": 4.9422607421875, |
|
"learning_rate": 7.778249186819404e-07, |
|
"loss": 0.1391, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 2.9686903398243603, |
|
"grad_norm": 0.34055715799331665, |
|
"learning_rate": 6.010465280724085e-07, |
|
"loss": 0.288, |
|
"step": 7775 |
|
}, |
|
{ |
|
"epoch": 2.9782359679266897, |
|
"grad_norm": 6.950995445251465, |
|
"learning_rate": 4.2426813746287655e-07, |
|
"loss": 0.213, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 2.9877815960290186, |
|
"grad_norm": 7.761420726776123, |
|
"learning_rate": 2.474897468533447e-07, |
|
"loss": 0.3362, |
|
"step": 7825 |
|
}, |
|
{ |
|
"epoch": 2.997327224131348, |
|
"grad_norm": 0.21115273237228394, |
|
"learning_rate": 7.071135624381275e-08, |
|
"loss": 0.3279, |
|
"step": 7850 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.9289805269186713, |
|
"eval_f1_macro": 0.7242775388539527, |
|
"eval_f1_micro": 0.9289805269186713, |
|
"eval_f1_weighted": 0.9228868300266074, |
|
"eval_loss": 0.26649972796440125, |
|
"eval_precision_macro": 0.8358131483686612, |
|
"eval_precision_micro": 0.9289805269186713, |
|
"eval_precision_weighted": 0.9228054152229083, |
|
"eval_recall_macro": 0.687057008455895, |
|
"eval_recall_micro": 0.9289805269186713, |
|
"eval_recall_weighted": 0.9289805269186713, |
|
"eval_runtime": 30.8008, |
|
"eval_samples_per_second": 170.061, |
|
"eval_steps_per_second": 10.649, |
|
"step": 7857 |
|
} |
|
], |
|
"logging_steps": 25, |
|
"max_steps": 7857, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.01 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.7216523206291792e+19, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|