|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"eval_steps": 500.0, |
|
"global_step": 18789, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.003193357815743254, |
|
"grad_norm": 1.9077140880050831, |
|
"learning_rate": 2.1287919105907397e-07, |
|
"loss": 0.293, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.006386715631486508, |
|
"grad_norm": 1.2741126203759185, |
|
"learning_rate": 4.2575838211814794e-07, |
|
"loss": 0.2755, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.009580073447229762, |
|
"grad_norm": 0.9579957272491677, |
|
"learning_rate": 6.38637573177222e-07, |
|
"loss": 0.2663, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.012773431262973017, |
|
"grad_norm": 0.7572791753798676, |
|
"learning_rate": 8.515167642362959e-07, |
|
"loss": 0.2483, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.01596678907871627, |
|
"grad_norm": 0.7631906150781687, |
|
"learning_rate": 1.0643959552953699e-06, |
|
"loss": 0.2345, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.019160146894459523, |
|
"grad_norm": 0.6910231975995771, |
|
"learning_rate": 1.277275146354444e-06, |
|
"loss": 0.2325, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.02235350471020278, |
|
"grad_norm": 0.6148039205762914, |
|
"learning_rate": 1.490154337413518e-06, |
|
"loss": 0.2204, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.025546862525946033, |
|
"grad_norm": 0.5789874935423447, |
|
"learning_rate": 1.7030335284725918e-06, |
|
"loss": 0.2098, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.028740220341689285, |
|
"grad_norm": 0.7476463496285388, |
|
"learning_rate": 1.915912719531666e-06, |
|
"loss": 0.2032, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.03193357815743254, |
|
"grad_norm": 0.7428740533196578, |
|
"learning_rate": 2.1287919105907398e-06, |
|
"loss": 0.2012, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.035126935973175795, |
|
"grad_norm": 0.7037948631808884, |
|
"learning_rate": 2.341671101649814e-06, |
|
"loss": 0.2009, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.038320293788919046, |
|
"grad_norm": 0.7550244833274011, |
|
"learning_rate": 2.554550292708888e-06, |
|
"loss": 0.1909, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.041513651604662305, |
|
"grad_norm": 0.6563323462206586, |
|
"learning_rate": 2.7674294837679623e-06, |
|
"loss": 0.1921, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.04470700942040556, |
|
"grad_norm": 0.6164020880535914, |
|
"learning_rate": 2.980308674827036e-06, |
|
"loss": 0.18, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.04790036723614881, |
|
"grad_norm": 0.6690736448965662, |
|
"learning_rate": 3.1931878658861097e-06, |
|
"loss": 0.1775, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.05109372505189207, |
|
"grad_norm": 0.6290006787340572, |
|
"learning_rate": 3.4060670569451835e-06, |
|
"loss": 0.1806, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.05428708286763532, |
|
"grad_norm": 0.6635212199407985, |
|
"learning_rate": 3.6189462480042583e-06, |
|
"loss": 0.1779, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.05748044068337857, |
|
"grad_norm": 0.6197593777818997, |
|
"learning_rate": 3.831825439063332e-06, |
|
"loss": 0.1702, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.06067379849912183, |
|
"grad_norm": 0.7111109613175086, |
|
"learning_rate": 4.044704630122406e-06, |
|
"loss": 0.1726, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.06386715631486509, |
|
"grad_norm": 0.7014844198325911, |
|
"learning_rate": 4.2575838211814795e-06, |
|
"loss": 0.1677, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.06706051413060833, |
|
"grad_norm": 0.6960545421354288, |
|
"learning_rate": 4.470463012240554e-06, |
|
"loss": 0.1661, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.07025387194635159, |
|
"grad_norm": 0.6526527280390234, |
|
"learning_rate": 4.683342203299628e-06, |
|
"loss": 0.1599, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.07344722976209485, |
|
"grad_norm": 0.6231986280201518, |
|
"learning_rate": 4.896221394358702e-06, |
|
"loss": 0.1608, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.07664058757783809, |
|
"grad_norm": 0.6963837819044139, |
|
"learning_rate": 5.109100585417776e-06, |
|
"loss": 0.1622, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.07983394539358135, |
|
"grad_norm": 0.6109671730909707, |
|
"learning_rate": 5.32197977647685e-06, |
|
"loss": 0.1598, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.08302730320932461, |
|
"grad_norm": 0.5371960923403704, |
|
"learning_rate": 5.534858967535925e-06, |
|
"loss": 0.1594, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.08622066102506785, |
|
"grad_norm": 0.5367820828152228, |
|
"learning_rate": 5.747738158594997e-06, |
|
"loss": 0.1596, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.08941401884081111, |
|
"grad_norm": 0.5470627592374788, |
|
"learning_rate": 5.960617349654072e-06, |
|
"loss": 0.1572, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.09260737665655437, |
|
"grad_norm": 0.6499813395079859, |
|
"learning_rate": 6.173496540713145e-06, |
|
"loss": 0.1608, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.09580073447229762, |
|
"grad_norm": 0.5979118456987372, |
|
"learning_rate": 6.386375731772219e-06, |
|
"loss": 0.1556, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.09899409228804087, |
|
"grad_norm": 0.6192365185802242, |
|
"learning_rate": 6.5992549228312945e-06, |
|
"loss": 0.1558, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.10218745010378413, |
|
"grad_norm": 0.6113365486687661, |
|
"learning_rate": 6.812134113890367e-06, |
|
"loss": 0.1529, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.10538080791952738, |
|
"grad_norm": 0.5734345240030044, |
|
"learning_rate": 7.025013304949441e-06, |
|
"loss": 0.1569, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.10857416573527064, |
|
"grad_norm": 0.5661084962098065, |
|
"learning_rate": 7.2378924960085166e-06, |
|
"loss": 0.1516, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.1117675235510139, |
|
"grad_norm": 0.5846067384703525, |
|
"learning_rate": 7.450771687067589e-06, |
|
"loss": 0.15, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.11496088136675714, |
|
"grad_norm": 0.6374604468076045, |
|
"learning_rate": 7.663650878126664e-06, |
|
"loss": 0.1595, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.1181542391825004, |
|
"grad_norm": 0.6518179456002492, |
|
"learning_rate": 7.876530069185738e-06, |
|
"loss": 0.1534, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.12134759699824366, |
|
"grad_norm": 0.6418033051046158, |
|
"learning_rate": 8.089409260244812e-06, |
|
"loss": 0.1544, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.1245409548139869, |
|
"grad_norm": 0.5560485727454112, |
|
"learning_rate": 8.302288451303886e-06, |
|
"loss": 0.1519, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.12773431262973017, |
|
"grad_norm": 0.5218192022156695, |
|
"learning_rate": 8.515167642362959e-06, |
|
"loss": 0.1526, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.1309276704454734, |
|
"grad_norm": 0.5873485030073047, |
|
"learning_rate": 8.728046833422033e-06, |
|
"loss": 0.1477, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.13412102826121666, |
|
"grad_norm": 0.52521133386056, |
|
"learning_rate": 8.940926024481108e-06, |
|
"loss": 0.1462, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.13731438607695992, |
|
"grad_norm": 0.49868364100795765, |
|
"learning_rate": 9.153805215540182e-06, |
|
"loss": 0.1459, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.14050774389270318, |
|
"grad_norm": 0.46689167597503883, |
|
"learning_rate": 9.366684406599256e-06, |
|
"loss": 0.1522, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.14370110170844644, |
|
"grad_norm": 0.5355408722325465, |
|
"learning_rate": 9.57956359765833e-06, |
|
"loss": 0.1512, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.1468944595241897, |
|
"grad_norm": 0.46406202651388007, |
|
"learning_rate": 9.792442788717403e-06, |
|
"loss": 0.1503, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.15008781733993293, |
|
"grad_norm": 0.5480845186900385, |
|
"learning_rate": 1.0005321979776476e-05, |
|
"loss": 0.1474, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.15328117515567619, |
|
"grad_norm": 0.5541284715722858, |
|
"learning_rate": 1.0218201170835552e-05, |
|
"loss": 0.1469, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.15647453297141944, |
|
"grad_norm": 0.6186186072342443, |
|
"learning_rate": 1.0431080361894626e-05, |
|
"loss": 0.1495, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.1596678907871627, |
|
"grad_norm": 0.5921353515589192, |
|
"learning_rate": 1.06439595529537e-05, |
|
"loss": 0.1463, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.16286124860290596, |
|
"grad_norm": 0.5433507613364311, |
|
"learning_rate": 1.0856838744012775e-05, |
|
"loss": 0.1472, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.16605460641864922, |
|
"grad_norm": 0.5979565391588779, |
|
"learning_rate": 1.106971793507185e-05, |
|
"loss": 0.1506, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.16924796423439245, |
|
"grad_norm": 0.48287899522708827, |
|
"learning_rate": 1.128259712613092e-05, |
|
"loss": 0.1486, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.1724413220501357, |
|
"grad_norm": 0.5655815878985752, |
|
"learning_rate": 1.1495476317189994e-05, |
|
"loss": 0.1457, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.17563467986587897, |
|
"grad_norm": 0.512570124033007, |
|
"learning_rate": 1.170835550824907e-05, |
|
"loss": 0.1461, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.17882803768162223, |
|
"grad_norm": 0.46185823489920147, |
|
"learning_rate": 1.1921234699308145e-05, |
|
"loss": 0.144, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.18202139549736548, |
|
"grad_norm": 0.44757029208547805, |
|
"learning_rate": 1.2134113890367219e-05, |
|
"loss": 0.1473, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.18521475331310874, |
|
"grad_norm": 0.4396528603456714, |
|
"learning_rate": 1.234699308142629e-05, |
|
"loss": 0.142, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.188408111128852, |
|
"grad_norm": 0.4916233270385887, |
|
"learning_rate": 1.2559872272485364e-05, |
|
"loss": 0.1459, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.19160146894459523, |
|
"grad_norm": 0.49164646615860214, |
|
"learning_rate": 1.2772751463544439e-05, |
|
"loss": 0.1442, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.1947948267603385, |
|
"grad_norm": 0.47947460832408273, |
|
"learning_rate": 1.2985630654603515e-05, |
|
"loss": 0.1491, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.19798818457608175, |
|
"grad_norm": 0.5469424153214216, |
|
"learning_rate": 1.3198509845662589e-05, |
|
"loss": 0.1502, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.201181542391825, |
|
"grad_norm": 0.4638562794822929, |
|
"learning_rate": 1.3411389036721663e-05, |
|
"loss": 0.1442, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.20437490020756827, |
|
"grad_norm": 0.44344070089234844, |
|
"learning_rate": 1.3624268227780734e-05, |
|
"loss": 0.1488, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.20756825802331152, |
|
"grad_norm": 0.47717361703152655, |
|
"learning_rate": 1.3837147418839808e-05, |
|
"loss": 0.1488, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.21076161583905476, |
|
"grad_norm": 0.4703261039559369, |
|
"learning_rate": 1.4050026609898883e-05, |
|
"loss": 0.145, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.21395497365479801, |
|
"grad_norm": 0.45451915177321617, |
|
"learning_rate": 1.4262905800957957e-05, |
|
"loss": 0.1514, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.21714833147054127, |
|
"grad_norm": 0.4705441248128481, |
|
"learning_rate": 1.4475784992017033e-05, |
|
"loss": 0.1487, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.22034168928628453, |
|
"grad_norm": 0.4369632636042999, |
|
"learning_rate": 1.4688664183076104e-05, |
|
"loss": 0.1459, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.2235350471020278, |
|
"grad_norm": 0.42608257790275605, |
|
"learning_rate": 1.4901543374135178e-05, |
|
"loss": 0.1455, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.22672840491777105, |
|
"grad_norm": 0.50356002082837, |
|
"learning_rate": 1.5114422565194253e-05, |
|
"loss": 0.1451, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.22992176273351428, |
|
"grad_norm": 0.4561937833231143, |
|
"learning_rate": 1.5327301756253327e-05, |
|
"loss": 0.1477, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.23311512054925754, |
|
"grad_norm": 0.40765594909388037, |
|
"learning_rate": 1.55401809473124e-05, |
|
"loss": 0.1425, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.2363084783650008, |
|
"grad_norm": 0.487476872013174, |
|
"learning_rate": 1.5753060138371476e-05, |
|
"loss": 0.1412, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.23950183618074405, |
|
"grad_norm": 0.4680001690322545, |
|
"learning_rate": 1.596593932943055e-05, |
|
"loss": 0.1443, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.2426951939964873, |
|
"grad_norm": 0.41230879655966063, |
|
"learning_rate": 1.6178818520489624e-05, |
|
"loss": 0.1455, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.24588855181223057, |
|
"grad_norm": 0.4315075010200903, |
|
"learning_rate": 1.63916977115487e-05, |
|
"loss": 0.1453, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.2490819096279738, |
|
"grad_norm": 0.3880821656792041, |
|
"learning_rate": 1.6604576902607773e-05, |
|
"loss": 0.1367, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.2522752674437171, |
|
"grad_norm": 0.4170991591966089, |
|
"learning_rate": 1.6817456093666847e-05, |
|
"loss": 0.1444, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.25546862525946035, |
|
"grad_norm": 0.4352470317730404, |
|
"learning_rate": 1.7030335284725918e-05, |
|
"loss": 0.1462, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.25866198307520355, |
|
"grad_norm": 0.41926976953754025, |
|
"learning_rate": 1.7243214475784992e-05, |
|
"loss": 0.1427, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.2618553408909468, |
|
"grad_norm": 0.4067020140616968, |
|
"learning_rate": 1.7456093666844067e-05, |
|
"loss": 0.1435, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.26504869870669007, |
|
"grad_norm": 0.4568169742722482, |
|
"learning_rate": 1.766897285790314e-05, |
|
"loss": 0.1425, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.2682420565224333, |
|
"grad_norm": 0.4952678328950158, |
|
"learning_rate": 1.7881852048962215e-05, |
|
"loss": 0.1411, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.2714354143381766, |
|
"grad_norm": 0.36574600840843885, |
|
"learning_rate": 1.809473124002129e-05, |
|
"loss": 0.1424, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.27462877215391984, |
|
"grad_norm": 0.40710244186170225, |
|
"learning_rate": 1.8307610431080364e-05, |
|
"loss": 0.1435, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.2778221299696631, |
|
"grad_norm": 0.41415797524036474, |
|
"learning_rate": 1.852048962213944e-05, |
|
"loss": 0.1443, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.28101548778540636, |
|
"grad_norm": 0.38093938436737673, |
|
"learning_rate": 1.8733368813198513e-05, |
|
"loss": 0.1459, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.2842088456011496, |
|
"grad_norm": 0.36699157301783514, |
|
"learning_rate": 1.8946248004257587e-05, |
|
"loss": 0.1503, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.2874022034168929, |
|
"grad_norm": 0.4426133669933364, |
|
"learning_rate": 1.915912719531666e-05, |
|
"loss": 0.1458, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.29059556123263613, |
|
"grad_norm": 0.37577866094305634, |
|
"learning_rate": 1.9372006386375732e-05, |
|
"loss": 0.1437, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.2937889190483794, |
|
"grad_norm": 0.3853315977661372, |
|
"learning_rate": 1.9584885577434807e-05, |
|
"loss": 0.1424, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.29698227686412265, |
|
"grad_norm": 0.39658703817733554, |
|
"learning_rate": 1.979776476849388e-05, |
|
"loss": 0.143, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.30017563467986585, |
|
"grad_norm": 0.34168487028906286, |
|
"learning_rate": 1.9999999827423154e-05, |
|
"loss": 0.1472, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.3033689924956091, |
|
"grad_norm": 0.428099056379712, |
|
"learning_rate": 1.9999923893706236e-05, |
|
"loss": 0.1424, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.30656235031135237, |
|
"grad_norm": 0.47981664372403626, |
|
"learning_rate": 1.9999709899719893e-05, |
|
"loss": 0.1414, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.30975570812709563, |
|
"grad_norm": 0.4495236935209742, |
|
"learning_rate": 1.9999357848418547e-05, |
|
"loss": 0.1432, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.3129490659428389, |
|
"grad_norm": 0.4335827442743115, |
|
"learning_rate": 1.999886774466267e-05, |
|
"loss": 0.1449, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.31614242375858215, |
|
"grad_norm": 0.3740214770732922, |
|
"learning_rate": 1.9998239595218693e-05, |
|
"loss": 0.1455, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.3193357815743254, |
|
"grad_norm": 0.35431822792110484, |
|
"learning_rate": 1.999747340875894e-05, |
|
"loss": 0.14, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.32252913939006866, |
|
"grad_norm": 0.37271856793106084, |
|
"learning_rate": 1.9996569195861474e-05, |
|
"loss": 0.1433, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.3257224972058119, |
|
"grad_norm": 0.36904721824612496, |
|
"learning_rate": 1.999552696900998e-05, |
|
"loss": 0.1474, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.3289158550215552, |
|
"grad_norm": 0.4329302625174645, |
|
"learning_rate": 1.9994346742593577e-05, |
|
"loss": 0.1409, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.33210921283729844, |
|
"grad_norm": 0.4659341494260738, |
|
"learning_rate": 1.999302853290663e-05, |
|
"loss": 0.1453, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.3353025706530417, |
|
"grad_norm": 0.40127911103988617, |
|
"learning_rate": 1.9991572358148522e-05, |
|
"loss": 0.1396, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.3384959284687849, |
|
"grad_norm": 0.3087442313177786, |
|
"learning_rate": 1.9989978238423383e-05, |
|
"loss": 0.1474, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.34168928628452816, |
|
"grad_norm": 0.37193584969289195, |
|
"learning_rate": 1.9988246195739846e-05, |
|
"loss": 0.1422, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.3448826441002714, |
|
"grad_norm": 0.3814913494874711, |
|
"learning_rate": 1.998637625401072e-05, |
|
"loss": 0.1422, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.3480760019160147, |
|
"grad_norm": 0.34898690836821306, |
|
"learning_rate": 1.9984368439052668e-05, |
|
"loss": 0.1396, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.35126935973175794, |
|
"grad_norm": 0.3951747534549505, |
|
"learning_rate": 1.9982222778585845e-05, |
|
"loss": 0.1458, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.3544627175475012, |
|
"grad_norm": 0.34562618895160807, |
|
"learning_rate": 1.9979939302233524e-05, |
|
"loss": 0.1402, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.35765607536324445, |
|
"grad_norm": 0.388573729018997, |
|
"learning_rate": 1.9977518041521683e-05, |
|
"loss": 0.1402, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.3608494331789877, |
|
"grad_norm": 0.37200937634013864, |
|
"learning_rate": 1.9974959029878568e-05, |
|
"loss": 0.1438, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.36404279099473097, |
|
"grad_norm": 0.3965761915716373, |
|
"learning_rate": 1.9972262302634228e-05, |
|
"loss": 0.1401, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.3672361488104742, |
|
"grad_norm": 0.3173011648096044, |
|
"learning_rate": 1.996942789702004e-05, |
|
"loss": 0.1392, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.3704295066262175, |
|
"grad_norm": 0.3017733588197737, |
|
"learning_rate": 1.996645585216818e-05, |
|
"loss": 0.1424, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.37362286444196074, |
|
"grad_norm": 0.3363017850364413, |
|
"learning_rate": 1.9963346209111084e-05, |
|
"loss": 0.1396, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.376816222257704, |
|
"grad_norm": 0.36015352029461045, |
|
"learning_rate": 1.9960099010780906e-05, |
|
"loss": 0.1364, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.3800095800734472, |
|
"grad_norm": 0.3944315476618534, |
|
"learning_rate": 1.995671430200889e-05, |
|
"loss": 0.1367, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.38320293788919046, |
|
"grad_norm": 0.3511161782236592, |
|
"learning_rate": 1.9953192129524774e-05, |
|
"loss": 0.134, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.3863962957049337, |
|
"grad_norm": 0.28417767585244963, |
|
"learning_rate": 1.994953254195613e-05, |
|
"loss": 0.1345, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.389589653520677, |
|
"grad_norm": 0.33674300015583525, |
|
"learning_rate": 1.9945735589827714e-05, |
|
"loss": 0.1414, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.39278301133642024, |
|
"grad_norm": 0.35738285989377994, |
|
"learning_rate": 1.9941801325560748e-05, |
|
"loss": 0.1379, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.3959763691521635, |
|
"grad_norm": 0.3281943522856012, |
|
"learning_rate": 1.9937729803472198e-05, |
|
"loss": 0.1377, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.39916972696790676, |
|
"grad_norm": 0.45986080956623454, |
|
"learning_rate": 1.9933521079774043e-05, |
|
"loss": 0.1375, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.40236308478365, |
|
"grad_norm": 0.3345998745429948, |
|
"learning_rate": 1.9929175212572473e-05, |
|
"loss": 0.1376, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.4055564425993933, |
|
"grad_norm": 0.3589364581224636, |
|
"learning_rate": 1.9924692261867107e-05, |
|
"loss": 0.136, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.40874980041513653, |
|
"grad_norm": 0.3214892613419847, |
|
"learning_rate": 1.9920072289550152e-05, |
|
"loss": 0.1375, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.4119431582308798, |
|
"grad_norm": 0.293317969518762, |
|
"learning_rate": 1.9915315359405556e-05, |
|
"loss": 0.1396, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.41513651604662305, |
|
"grad_norm": 0.30656393646036983, |
|
"learning_rate": 1.9910421537108124e-05, |
|
"loss": 0.1417, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.41832987386236625, |
|
"grad_norm": 0.3145662089610958, |
|
"learning_rate": 1.990539089022262e-05, |
|
"loss": 0.1361, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.4215232316781095, |
|
"grad_norm": 0.3321283029025178, |
|
"learning_rate": 1.9900223488202807e-05, |
|
"loss": 0.1374, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.42471658949385277, |
|
"grad_norm": 0.3121994972039942, |
|
"learning_rate": 1.9894919402390527e-05, |
|
"loss": 0.1369, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.42790994730959603, |
|
"grad_norm": 0.3207245571484729, |
|
"learning_rate": 1.9889478706014687e-05, |
|
"loss": 0.1365, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.4311033051253393, |
|
"grad_norm": 0.3214891191577016, |
|
"learning_rate": 1.9883901474190258e-05, |
|
"loss": 0.134, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.43429666294108255, |
|
"grad_norm": 0.2948703624055813, |
|
"learning_rate": 1.9878187783917246e-05, |
|
"loss": 0.1358, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.4374900207568258, |
|
"grad_norm": 0.2962758480800377, |
|
"learning_rate": 1.9872337714079604e-05, |
|
"loss": 0.1353, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.44068337857256906, |
|
"grad_norm": 0.28750701401056433, |
|
"learning_rate": 1.9866351345444172e-05, |
|
"loss": 0.1397, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.4438767363883123, |
|
"grad_norm": 0.33961478398235684, |
|
"learning_rate": 1.9860228760659547e-05, |
|
"loss": 0.1395, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.4470700942040556, |
|
"grad_norm": 0.3327701106038422, |
|
"learning_rate": 1.9853970044254942e-05, |
|
"loss": 0.1362, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.45026345201979884, |
|
"grad_norm": 0.34046480079798697, |
|
"learning_rate": 1.9847575282639022e-05, |
|
"loss": 0.1357, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.4534568098355421, |
|
"grad_norm": 0.2591827835319709, |
|
"learning_rate": 1.984104456409871e-05, |
|
"loss": 0.1319, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.4566501676512853, |
|
"grad_norm": 0.31099418106495114, |
|
"learning_rate": 1.983437797879797e-05, |
|
"loss": 0.134, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.45984352546702856, |
|
"grad_norm": 0.34942376213984855, |
|
"learning_rate": 1.9827575618776556e-05, |
|
"loss": 0.1353, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.4630368832827718, |
|
"grad_norm": 0.29857742338407706, |
|
"learning_rate": 1.9820637577948746e-05, |
|
"loss": 0.1336, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.4662302410985151, |
|
"grad_norm": 0.2701149477986023, |
|
"learning_rate": 1.9813563952102056e-05, |
|
"loss": 0.1338, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.46942359891425833, |
|
"grad_norm": 0.35582085328111446, |
|
"learning_rate": 1.980635483889589e-05, |
|
"loss": 0.1325, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.4726169567300016, |
|
"grad_norm": 0.36536478089468427, |
|
"learning_rate": 1.979901033786022e-05, |
|
"loss": 0.138, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.47581031454574485, |
|
"grad_norm": 0.34482414871566835, |
|
"learning_rate": 1.9791530550394197e-05, |
|
"loss": 0.14, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.4790036723614881, |
|
"grad_norm": 0.313925122152452, |
|
"learning_rate": 1.9783915579764755e-05, |
|
"loss": 0.1349, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.48219703017723137, |
|
"grad_norm": 0.33065001108381514, |
|
"learning_rate": 1.9776165531105182e-05, |
|
"loss": 0.1334, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 0.4853903879929746, |
|
"grad_norm": 0.33106961743791363, |
|
"learning_rate": 1.9768280511413676e-05, |
|
"loss": 0.1346, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.4885837458087179, |
|
"grad_norm": 0.3038455922499442, |
|
"learning_rate": 1.9760260629551856e-05, |
|
"loss": 0.13, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 0.49177710362446114, |
|
"grad_norm": 0.32774568750571736, |
|
"learning_rate": 1.975210599624327e-05, |
|
"loss": 0.1317, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 0.4949704614402044, |
|
"grad_norm": 0.27913297393743014, |
|
"learning_rate": 1.9743816724071864e-05, |
|
"loss": 0.1299, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.4981638192559476, |
|
"grad_norm": 0.25535801906865635, |
|
"learning_rate": 1.9735392927480425e-05, |
|
"loss": 0.1341, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 0.5013571770716909, |
|
"grad_norm": 0.3450201878469047, |
|
"learning_rate": 1.9726834722768998e-05, |
|
"loss": 0.1307, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 0.5045505348874342, |
|
"grad_norm": 0.3355377854047922, |
|
"learning_rate": 1.9718142228093286e-05, |
|
"loss": 0.1373, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 0.5077438927031774, |
|
"grad_norm": 0.29501763605746917, |
|
"learning_rate": 1.9709315563463022e-05, |
|
"loss": 0.1329, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 0.5109372505189207, |
|
"grad_norm": 0.29498443847446687, |
|
"learning_rate": 1.9700354850740305e-05, |
|
"loss": 0.1302, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.514130608334664, |
|
"grad_norm": 0.3374549804904556, |
|
"learning_rate": 1.969126021363791e-05, |
|
"loss": 0.1332, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 0.5173239661504071, |
|
"grad_norm": 0.2937151476643792, |
|
"learning_rate": 1.9682031777717602e-05, |
|
"loss": 0.1289, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 0.5205173239661504, |
|
"grad_norm": 0.34027338318157424, |
|
"learning_rate": 1.9672669670388387e-05, |
|
"loss": 0.1335, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 0.5237106817818936, |
|
"grad_norm": 0.2958186800446919, |
|
"learning_rate": 1.966317402090475e-05, |
|
"loss": 0.1321, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 0.5269040395976369, |
|
"grad_norm": 0.2937191900726174, |
|
"learning_rate": 1.9653544960364886e-05, |
|
"loss": 0.132, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.5300973974133801, |
|
"grad_norm": 0.3133245335540435, |
|
"learning_rate": 1.9643782621708875e-05, |
|
"loss": 0.1311, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 0.5332907552291234, |
|
"grad_norm": 0.29304130620982005, |
|
"learning_rate": 1.963388713971685e-05, |
|
"loss": 0.1355, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 0.5364841130448667, |
|
"grad_norm": 0.31292116477262744, |
|
"learning_rate": 1.962385865100715e-05, |
|
"loss": 0.1351, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 0.5396774708606099, |
|
"grad_norm": 0.26493353801679925, |
|
"learning_rate": 1.9613697294034403e-05, |
|
"loss": 0.1315, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 0.5428708286763532, |
|
"grad_norm": 0.2630906644626646, |
|
"learning_rate": 1.9603403209087655e-05, |
|
"loss": 0.1312, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.5460641864920964, |
|
"grad_norm": 0.26085033107366945, |
|
"learning_rate": 1.9592976538288392e-05, |
|
"loss": 0.1296, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 0.5492575443078397, |
|
"grad_norm": 0.2940107915040809, |
|
"learning_rate": 1.9582417425588615e-05, |
|
"loss": 0.1305, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 0.5524509021235829, |
|
"grad_norm": 0.2648782222390229, |
|
"learning_rate": 1.9571726016768825e-05, |
|
"loss": 0.1298, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 0.5556442599393262, |
|
"grad_norm": 0.25767016449009617, |
|
"learning_rate": 1.9560902459436027e-05, |
|
"loss": 0.1287, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 0.5588376177550695, |
|
"grad_norm": 0.304832191804209, |
|
"learning_rate": 1.9549946903021676e-05, |
|
"loss": 0.1335, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.5620309755708127, |
|
"grad_norm": 0.2814622371172937, |
|
"learning_rate": 1.953885949877963e-05, |
|
"loss": 0.1287, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.565224333386556, |
|
"grad_norm": 0.27565323140470793, |
|
"learning_rate": 1.9527640399784066e-05, |
|
"loss": 0.132, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 0.5684176912022992, |
|
"grad_norm": 0.2874659718873625, |
|
"learning_rate": 1.9516289760927337e-05, |
|
"loss": 0.1306, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 0.5716110490180425, |
|
"grad_norm": 0.24637256127265056, |
|
"learning_rate": 1.9504807738917864e-05, |
|
"loss": 0.1294, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 0.5748044068337858, |
|
"grad_norm": 0.2683166797652062, |
|
"learning_rate": 1.949319449227796e-05, |
|
"loss": 0.1265, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.577997764649529, |
|
"grad_norm": 0.2991655914571407, |
|
"learning_rate": 1.9481450181341636e-05, |
|
"loss": 0.1307, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 0.5811911224652723, |
|
"grad_norm": 0.2629061135815468, |
|
"learning_rate": 1.9469574968252405e-05, |
|
"loss": 0.131, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 0.5843844802810155, |
|
"grad_norm": 0.30352941453895776, |
|
"learning_rate": 1.9457569016961025e-05, |
|
"loss": 0.1315, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 0.5875778380967588, |
|
"grad_norm": 0.32189790257315865, |
|
"learning_rate": 1.9445432493223243e-05, |
|
"loss": 0.1301, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 0.590771195912502, |
|
"grad_norm": 0.2262924484205468, |
|
"learning_rate": 1.943316556459751e-05, |
|
"loss": 0.1265, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.5939645537282453, |
|
"grad_norm": 0.2711892071402863, |
|
"learning_rate": 1.9420768400442657e-05, |
|
"loss": 0.1271, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 0.5971579115439885, |
|
"grad_norm": 0.256185445894437, |
|
"learning_rate": 1.9408241171915576e-05, |
|
"loss": 0.1277, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 0.6003512693597317, |
|
"grad_norm": 0.25593240031460607, |
|
"learning_rate": 1.9395584051968833e-05, |
|
"loss": 0.1287, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 0.603544627175475, |
|
"grad_norm": 0.2979762688925845, |
|
"learning_rate": 1.9382797215348303e-05, |
|
"loss": 0.1287, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 0.6067379849912182, |
|
"grad_norm": 0.2761523818504427, |
|
"learning_rate": 1.936988083859073e-05, |
|
"loss": 0.1289, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.6099313428069615, |
|
"grad_norm": 0.31322754272354847, |
|
"learning_rate": 1.935683510002133e-05, |
|
"loss": 0.1289, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 0.6131247006227047, |
|
"grad_norm": 0.32118979161692, |
|
"learning_rate": 1.934366017975128e-05, |
|
"loss": 0.1291, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 0.616318058438448, |
|
"grad_norm": 0.3965221736956701, |
|
"learning_rate": 1.9330356259675277e-05, |
|
"loss": 0.1291, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 0.6195114162541913, |
|
"grad_norm": 0.23124317796079472, |
|
"learning_rate": 1.9316923523468988e-05, |
|
"loss": 0.127, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 0.6227047740699345, |
|
"grad_norm": 0.26107711518189003, |
|
"learning_rate": 1.9303362156586554e-05, |
|
"loss": 0.1267, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.6258981318856778, |
|
"grad_norm": 0.23776916842759366, |
|
"learning_rate": 1.9289672346257988e-05, |
|
"loss": 0.1246, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 0.629091489701421, |
|
"grad_norm": 0.26149208748799935, |
|
"learning_rate": 1.9275854281486626e-05, |
|
"loss": 0.1251, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 0.6322848475171643, |
|
"grad_norm": 0.2488232391306922, |
|
"learning_rate": 1.9261908153046485e-05, |
|
"loss": 0.1268, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 0.6354782053329076, |
|
"grad_norm": 0.2541408784103856, |
|
"learning_rate": 1.924783415347966e-05, |
|
"loss": 0.1271, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 0.6386715631486508, |
|
"grad_norm": 0.2731460017360242, |
|
"learning_rate": 1.9233632477093655e-05, |
|
"loss": 0.1255, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.6418649209643941, |
|
"grad_norm": 0.22543278383772555, |
|
"learning_rate": 1.9219303319958675e-05, |
|
"loss": 0.1252, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 0.6450582787801373, |
|
"grad_norm": 0.3043429192344086, |
|
"learning_rate": 1.9204846879904966e-05, |
|
"loss": 0.1261, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 0.6482516365958806, |
|
"grad_norm": 0.2787988968661325, |
|
"learning_rate": 1.9190263356520044e-05, |
|
"loss": 0.1285, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 0.6514449944116238, |
|
"grad_norm": 0.28334459179072036, |
|
"learning_rate": 1.9175552951145953e-05, |
|
"loss": 0.1312, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 0.6546383522273671, |
|
"grad_norm": 0.2699681672265312, |
|
"learning_rate": 1.91607158668765e-05, |
|
"loss": 0.128, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.6578317100431104, |
|
"grad_norm": 0.2653884535783852, |
|
"learning_rate": 1.9145752308554422e-05, |
|
"loss": 0.1236, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 0.6610250678588536, |
|
"grad_norm": 0.24370062543889062, |
|
"learning_rate": 1.913066248276859e-05, |
|
"loss": 0.1267, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 0.6642184256745969, |
|
"grad_norm": 0.268136522123535, |
|
"learning_rate": 1.911544659785112e-05, |
|
"loss": 0.1251, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 0.6674117834903401, |
|
"grad_norm": 0.2804716904650792, |
|
"learning_rate": 1.9100104863874535e-05, |
|
"loss": 0.1282, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 0.6706051413060834, |
|
"grad_norm": 0.25256532175596885, |
|
"learning_rate": 1.9084637492648834e-05, |
|
"loss": 0.1291, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.6737984991218267, |
|
"grad_norm": 0.20654144385500267, |
|
"learning_rate": 1.9069044697718596e-05, |
|
"loss": 0.1275, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 0.6769918569375698, |
|
"grad_norm": 0.3170119063036349, |
|
"learning_rate": 1.9053326694359996e-05, |
|
"loss": 0.1252, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 0.6801852147533131, |
|
"grad_norm": 0.2518310103396095, |
|
"learning_rate": 1.9037483699577866e-05, |
|
"loss": 0.1252, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 0.6833785725690563, |
|
"grad_norm": 0.24576567016775977, |
|
"learning_rate": 1.9021515932102687e-05, |
|
"loss": 0.1262, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 0.6865719303847996, |
|
"grad_norm": 0.2272326194356311, |
|
"learning_rate": 1.9005423612387564e-05, |
|
"loss": 0.1277, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.6897652882005428, |
|
"grad_norm": 0.2241851322819629, |
|
"learning_rate": 1.8989206962605183e-05, |
|
"loss": 0.1254, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 0.6929586460162861, |
|
"grad_norm": 0.28963794959769024, |
|
"learning_rate": 1.8972866206644756e-05, |
|
"loss": 0.1269, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 0.6961520038320294, |
|
"grad_norm": 0.27244182001640865, |
|
"learning_rate": 1.8956401570108918e-05, |
|
"loss": 0.1268, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 0.6993453616477726, |
|
"grad_norm": 0.23505587827589292, |
|
"learning_rate": 1.893981328031061e-05, |
|
"loss": 0.128, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 0.7025387194635159, |
|
"grad_norm": 0.2636460746314892, |
|
"learning_rate": 1.8923101566269956e-05, |
|
"loss": 0.1268, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.7057320772792591, |
|
"grad_norm": 0.287020211559549, |
|
"learning_rate": 1.890626665871108e-05, |
|
"loss": 0.1251, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 0.7089254350950024, |
|
"grad_norm": 0.3666813610337495, |
|
"learning_rate": 1.8889308790058944e-05, |
|
"loss": 0.122, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 0.7121187929107456, |
|
"grad_norm": 0.24200632888509, |
|
"learning_rate": 1.887222819443612e-05, |
|
"loss": 0.1234, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 0.7153121507264889, |
|
"grad_norm": 0.3142721600257018, |
|
"learning_rate": 1.8855025107659565e-05, |
|
"loss": 0.1247, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 0.7185055085422322, |
|
"grad_norm": 0.2542404530052441, |
|
"learning_rate": 1.8837699767237363e-05, |
|
"loss": 0.1267, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.7216988663579754, |
|
"grad_norm": 0.2513575844512111, |
|
"learning_rate": 1.882025241236546e-05, |
|
"loss": 0.1254, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 0.7248922241737187, |
|
"grad_norm": 0.24131168314941073, |
|
"learning_rate": 1.880268328392433e-05, |
|
"loss": 0.1251, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 0.7280855819894619, |
|
"grad_norm": 0.22534176261187136, |
|
"learning_rate": 1.878499262447569e-05, |
|
"loss": 0.1241, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 0.7312789398052052, |
|
"grad_norm": 0.2812964686320165, |
|
"learning_rate": 1.8767180678259113e-05, |
|
"loss": 0.1257, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 0.7344722976209485, |
|
"grad_norm": 0.23889076217882216, |
|
"learning_rate": 1.874924769118868e-05, |
|
"loss": 0.1273, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.7376656554366917, |
|
"grad_norm": 0.27177520658222915, |
|
"learning_rate": 1.873119391084958e-05, |
|
"loss": 0.125, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 0.740859013252435, |
|
"grad_norm": 0.21614884950104765, |
|
"learning_rate": 1.8713019586494687e-05, |
|
"loss": 0.1244, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 0.7440523710681782, |
|
"grad_norm": 0.26972504495310423, |
|
"learning_rate": 1.869472496904112e-05, |
|
"loss": 0.1278, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 0.7472457288839215, |
|
"grad_norm": 0.26832330471480753, |
|
"learning_rate": 1.867631031106679e-05, |
|
"loss": 0.1217, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 0.7504390866996647, |
|
"grad_norm": 0.2204440405656476, |
|
"learning_rate": 1.8657775866806885e-05, |
|
"loss": 0.1226, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.753632444515408, |
|
"grad_norm": 0.25422716012201274, |
|
"learning_rate": 1.86391218921504e-05, |
|
"loss": 0.1264, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 0.7568258023311512, |
|
"grad_norm": 0.2334817914407686, |
|
"learning_rate": 1.8620348644636572e-05, |
|
"loss": 0.123, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 0.7600191601468944, |
|
"grad_norm": 0.24736892038169794, |
|
"learning_rate": 1.8601456383451325e-05, |
|
"loss": 0.1245, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 0.7632125179626377, |
|
"grad_norm": 0.23376798290995154, |
|
"learning_rate": 1.8582445369423716e-05, |
|
"loss": 0.1259, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 0.7664058757783809, |
|
"grad_norm": 0.24649571282123517, |
|
"learning_rate": 1.8563315865022318e-05, |
|
"loss": 0.125, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.7695992335941242, |
|
"grad_norm": 0.24228210919101548, |
|
"learning_rate": 1.8544068134351585e-05, |
|
"loss": 0.1225, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 0.7727925914098674, |
|
"grad_norm": 0.25429442512742784, |
|
"learning_rate": 1.852470244314824e-05, |
|
"loss": 0.1261, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 0.7759859492256107, |
|
"grad_norm": 0.2309045224127907, |
|
"learning_rate": 1.850521905877756e-05, |
|
"loss": 0.1249, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 0.779179307041354, |
|
"grad_norm": 0.25672801367790765, |
|
"learning_rate": 1.848561825022973e-05, |
|
"loss": 0.1234, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 0.7823726648570972, |
|
"grad_norm": 0.2473205806486083, |
|
"learning_rate": 1.8465900288116098e-05, |
|
"loss": 0.1284, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.7855660226728405, |
|
"grad_norm": 0.3035165882865362, |
|
"learning_rate": 1.844606544466545e-05, |
|
"loss": 0.1237, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 0.7887593804885837, |
|
"grad_norm": 0.26837139940976074, |
|
"learning_rate": 1.8426113993720255e-05, |
|
"loss": 0.1252, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 0.791952738304327, |
|
"grad_norm": 0.26373147498792854, |
|
"learning_rate": 1.840604621073288e-05, |
|
"loss": 0.1227, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 0.7951460961200703, |
|
"grad_norm": 0.2581673321881109, |
|
"learning_rate": 1.8385862372761784e-05, |
|
"loss": 0.1273, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 0.7983394539358135, |
|
"grad_norm": 0.26439250344256154, |
|
"learning_rate": 1.83655627584677e-05, |
|
"loss": 0.1218, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.8015328117515568, |
|
"grad_norm": 0.2816537144327537, |
|
"learning_rate": 1.8345147648109784e-05, |
|
"loss": 0.1263, |
|
"step": 5020 |
|
}, |
|
{ |
|
"epoch": 0.8047261695673, |
|
"grad_norm": 0.2647977758183829, |
|
"learning_rate": 1.8324617323541738e-05, |
|
"loss": 0.1238, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 0.8079195273830433, |
|
"grad_norm": 0.2593258946289472, |
|
"learning_rate": 1.830397206820794e-05, |
|
"loss": 0.1246, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 0.8111128851987865, |
|
"grad_norm": 0.22990124735756534, |
|
"learning_rate": 1.8283212167139513e-05, |
|
"loss": 0.1226, |
|
"step": 5080 |
|
}, |
|
{ |
|
"epoch": 0.8143062430145298, |
|
"grad_norm": 0.27455958743278586, |
|
"learning_rate": 1.8262337906950385e-05, |
|
"loss": 0.1261, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.8174996008302731, |
|
"grad_norm": 0.2608809929482469, |
|
"learning_rate": 1.8241349575833352e-05, |
|
"loss": 0.1226, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 0.8206929586460163, |
|
"grad_norm": 0.2640419564306298, |
|
"learning_rate": 1.822024746355608e-05, |
|
"loss": 0.1381, |
|
"step": 5140 |
|
}, |
|
{ |
|
"epoch": 0.8238863164617596, |
|
"grad_norm": 0.29262015087553245, |
|
"learning_rate": 1.8199031861457123e-05, |
|
"loss": 0.1214, |
|
"step": 5160 |
|
}, |
|
{ |
|
"epoch": 0.8270796742775028, |
|
"grad_norm": 0.2319619995331439, |
|
"learning_rate": 1.8177703062441882e-05, |
|
"loss": 0.1232, |
|
"step": 5180 |
|
}, |
|
{ |
|
"epoch": 0.8302730320932461, |
|
"grad_norm": 0.26293647732336844, |
|
"learning_rate": 1.815626136097857e-05, |
|
"loss": 0.1233, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.8334663899089892, |
|
"grad_norm": 0.24081197327765444, |
|
"learning_rate": 1.8134707053094146e-05, |
|
"loss": 0.1202, |
|
"step": 5220 |
|
}, |
|
{ |
|
"epoch": 0.8366597477247325, |
|
"grad_norm": 0.2736597574126886, |
|
"learning_rate": 1.8113040436370236e-05, |
|
"loss": 0.1189, |
|
"step": 5240 |
|
}, |
|
{ |
|
"epoch": 0.8398531055404758, |
|
"grad_norm": 0.22867160064093073, |
|
"learning_rate": 1.809126180993901e-05, |
|
"loss": 0.1227, |
|
"step": 5260 |
|
}, |
|
{ |
|
"epoch": 0.843046463356219, |
|
"grad_norm": 0.20241019354224027, |
|
"learning_rate": 1.8069371474479055e-05, |
|
"loss": 0.1207, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 0.8462398211719623, |
|
"grad_norm": 0.23512641329119113, |
|
"learning_rate": 1.8047369732211236e-05, |
|
"loss": 0.1227, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.8494331789877055, |
|
"grad_norm": 0.21831678736014193, |
|
"learning_rate": 1.8025256886894512e-05, |
|
"loss": 0.1263, |
|
"step": 5320 |
|
}, |
|
{ |
|
"epoch": 0.8526265368034488, |
|
"grad_norm": 0.22942586598137038, |
|
"learning_rate": 1.800303324382174e-05, |
|
"loss": 0.1226, |
|
"step": 5340 |
|
}, |
|
{ |
|
"epoch": 0.8558198946191921, |
|
"grad_norm": 0.22565630953315605, |
|
"learning_rate": 1.7980699109815476e-05, |
|
"loss": 0.1227, |
|
"step": 5360 |
|
}, |
|
{ |
|
"epoch": 0.8590132524349353, |
|
"grad_norm": 0.2110233708822902, |
|
"learning_rate": 1.795825479322372e-05, |
|
"loss": 0.123, |
|
"step": 5380 |
|
}, |
|
{ |
|
"epoch": 0.8622066102506786, |
|
"grad_norm": 0.2588140422630483, |
|
"learning_rate": 1.793570060391567e-05, |
|
"loss": 0.1233, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.8653999680664218, |
|
"grad_norm": 0.20643049269214508, |
|
"learning_rate": 1.791303685327744e-05, |
|
"loss": 0.1216, |
|
"step": 5420 |
|
}, |
|
{ |
|
"epoch": 0.8685933258821651, |
|
"grad_norm": 0.2450716780518527, |
|
"learning_rate": 1.7890263854207766e-05, |
|
"loss": 0.1187, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 0.8717866836979083, |
|
"grad_norm": 0.2626908104568787, |
|
"learning_rate": 1.7867381921113672e-05, |
|
"loss": 0.1318, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 0.8749800415136516, |
|
"grad_norm": 0.21046084433838286, |
|
"learning_rate": 1.784439136990616e-05, |
|
"loss": 0.1216, |
|
"step": 5480 |
|
}, |
|
{ |
|
"epoch": 0.8781733993293949, |
|
"grad_norm": 0.22390590052286838, |
|
"learning_rate": 1.7821292517995802e-05, |
|
"loss": 0.1222, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.8813667571451381, |
|
"grad_norm": 0.21545360667161884, |
|
"learning_rate": 1.7798085684288408e-05, |
|
"loss": 0.1245, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 0.8845601149608814, |
|
"grad_norm": 0.23969169247272867, |
|
"learning_rate": 1.777477118918058e-05, |
|
"loss": 0.1199, |
|
"step": 5540 |
|
}, |
|
{ |
|
"epoch": 0.8877534727766246, |
|
"grad_norm": 0.25616719983123853, |
|
"learning_rate": 1.7751349354555315e-05, |
|
"loss": 0.12, |
|
"step": 5560 |
|
}, |
|
{ |
|
"epoch": 0.8909468305923679, |
|
"grad_norm": 0.2327465548031593, |
|
"learning_rate": 1.7727820503777563e-05, |
|
"loss": 0.1188, |
|
"step": 5580 |
|
}, |
|
{ |
|
"epoch": 0.8941401884081112, |
|
"grad_norm": 0.2704312448776363, |
|
"learning_rate": 1.770418496168973e-05, |
|
"loss": 0.1266, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.8973335462238544, |
|
"grad_norm": 0.280731488755357, |
|
"learning_rate": 1.7680443054607247e-05, |
|
"loss": 0.1186, |
|
"step": 5620 |
|
}, |
|
{ |
|
"epoch": 0.9005269040395977, |
|
"grad_norm": 0.2190704544630761, |
|
"learning_rate": 1.7656595110314003e-05, |
|
"loss": 0.1227, |
|
"step": 5640 |
|
}, |
|
{ |
|
"epoch": 0.9037202618553409, |
|
"grad_norm": 0.2676299073758256, |
|
"learning_rate": 1.7632641458057874e-05, |
|
"loss": 0.1166, |
|
"step": 5660 |
|
}, |
|
{ |
|
"epoch": 0.9069136196710842, |
|
"grad_norm": 0.2699663729747412, |
|
"learning_rate": 1.7608582428546142e-05, |
|
"loss": 0.1245, |
|
"step": 5680 |
|
}, |
|
{ |
|
"epoch": 0.9101069774868275, |
|
"grad_norm": 0.38105163760645616, |
|
"learning_rate": 1.7584418353940943e-05, |
|
"loss": 0.1218, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.9133003353025706, |
|
"grad_norm": 0.23014658636555574, |
|
"learning_rate": 1.756014956785468e-05, |
|
"loss": 0.1181, |
|
"step": 5720 |
|
}, |
|
{ |
|
"epoch": 0.9164936931183139, |
|
"grad_norm": 0.24389786019248447, |
|
"learning_rate": 1.7535776405345428e-05, |
|
"loss": 0.1196, |
|
"step": 5740 |
|
}, |
|
{ |
|
"epoch": 0.9196870509340571, |
|
"grad_norm": 0.26113050468693977, |
|
"learning_rate": 1.7511299202912275e-05, |
|
"loss": 0.1202, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 0.9228804087498004, |
|
"grad_norm": 0.2078740201372768, |
|
"learning_rate": 1.7486718298490713e-05, |
|
"loss": 0.124, |
|
"step": 5780 |
|
}, |
|
{ |
|
"epoch": 0.9260737665655436, |
|
"grad_norm": 0.3157327866928938, |
|
"learning_rate": 1.7462034031447954e-05, |
|
"loss": 0.1252, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.9292671243812869, |
|
"grad_norm": 0.21114581099853116, |
|
"learning_rate": 1.7437246742578246e-05, |
|
"loss": 0.1204, |
|
"step": 5820 |
|
}, |
|
{ |
|
"epoch": 0.9324604821970301, |
|
"grad_norm": 0.2200062852329027, |
|
"learning_rate": 1.7412356774098175e-05, |
|
"loss": 0.1249, |
|
"step": 5840 |
|
}, |
|
{ |
|
"epoch": 0.9356538400127734, |
|
"grad_norm": 0.2739829354403811, |
|
"learning_rate": 1.7387364469641928e-05, |
|
"loss": 0.1207, |
|
"step": 5860 |
|
}, |
|
{ |
|
"epoch": 0.9388471978285167, |
|
"grad_norm": 0.22036300962797467, |
|
"learning_rate": 1.736227017425656e-05, |
|
"loss": 0.1182, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 0.9420405556442599, |
|
"grad_norm": 0.2010246775840929, |
|
"learning_rate": 1.7337074234397228e-05, |
|
"loss": 0.1199, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 0.9452339134600032, |
|
"grad_norm": 0.22961494443205888, |
|
"learning_rate": 1.7311776997922404e-05, |
|
"loss": 0.1207, |
|
"step": 5920 |
|
}, |
|
{ |
|
"epoch": 0.9484272712757464, |
|
"grad_norm": 0.26165957694875003, |
|
"learning_rate": 1.7286378814089072e-05, |
|
"loss": 0.1188, |
|
"step": 5940 |
|
}, |
|
{ |
|
"epoch": 0.9516206290914897, |
|
"grad_norm": 0.22131834255107544, |
|
"learning_rate": 1.726088003354791e-05, |
|
"loss": 0.1205, |
|
"step": 5960 |
|
}, |
|
{ |
|
"epoch": 0.954813986907233, |
|
"grad_norm": 0.2549539175287136, |
|
"learning_rate": 1.7235281008338452e-05, |
|
"loss": 0.1213, |
|
"step": 5980 |
|
}, |
|
{ |
|
"epoch": 0.9580073447229762, |
|
"grad_norm": 0.2427772520556814, |
|
"learning_rate": 1.720958209188422e-05, |
|
"loss": 0.1211, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.9612007025387195, |
|
"grad_norm": 0.2442539895798861, |
|
"learning_rate": 1.7183783638987845e-05, |
|
"loss": 0.1193, |
|
"step": 6020 |
|
}, |
|
{ |
|
"epoch": 0.9643940603544627, |
|
"grad_norm": 0.23954523978335746, |
|
"learning_rate": 1.7157886005826173e-05, |
|
"loss": 0.1196, |
|
"step": 6040 |
|
}, |
|
{ |
|
"epoch": 0.967587418170206, |
|
"grad_norm": 0.20571373812832114, |
|
"learning_rate": 1.7131889549945348e-05, |
|
"loss": 0.1149, |
|
"step": 6060 |
|
}, |
|
{ |
|
"epoch": 0.9707807759859493, |
|
"grad_norm": 0.22749917178842363, |
|
"learning_rate": 1.710579463025587e-05, |
|
"loss": 0.1176, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 0.9739741338016925, |
|
"grad_norm": 0.23012462875837292, |
|
"learning_rate": 1.7079601607027643e-05, |
|
"loss": 0.1186, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 0.9771674916174358, |
|
"grad_norm": 0.20338632953694447, |
|
"learning_rate": 1.7053310841885012e-05, |
|
"loss": 0.1187, |
|
"step": 6120 |
|
}, |
|
{ |
|
"epoch": 0.980360849433179, |
|
"grad_norm": 0.23280208486194112, |
|
"learning_rate": 1.7026922697801746e-05, |
|
"loss": 0.1196, |
|
"step": 6140 |
|
}, |
|
{ |
|
"epoch": 0.9835542072489223, |
|
"grad_norm": 0.20786109950948006, |
|
"learning_rate": 1.7000437539096046e-05, |
|
"loss": 0.1202, |
|
"step": 6160 |
|
}, |
|
{ |
|
"epoch": 0.9867475650646655, |
|
"grad_norm": 0.21375986615043702, |
|
"learning_rate": 1.6973855731425507e-05, |
|
"loss": 0.1159, |
|
"step": 6180 |
|
}, |
|
{ |
|
"epoch": 0.9899409228804088, |
|
"grad_norm": 0.20748661803980806, |
|
"learning_rate": 1.694717764178208e-05, |
|
"loss": 0.1153, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.993134280696152, |
|
"grad_norm": 0.22516009929996467, |
|
"learning_rate": 1.692040363848699e-05, |
|
"loss": 0.1204, |
|
"step": 6220 |
|
}, |
|
{ |
|
"epoch": 0.9963276385118952, |
|
"grad_norm": 0.2595564019615457, |
|
"learning_rate": 1.6893534091185658e-05, |
|
"loss": 0.1197, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 0.9995209963276385, |
|
"grad_norm": 0.18297342882482412, |
|
"learning_rate": 1.686656937084261e-05, |
|
"loss": 0.1151, |
|
"step": 6260 |
|
}, |
|
{ |
|
"epoch": 1.0027143541433818, |
|
"grad_norm": 0.22852815920466457, |
|
"learning_rate": 1.6839509849736326e-05, |
|
"loss": 0.0949, |
|
"step": 6280 |
|
}, |
|
{ |
|
"epoch": 1.005907711959125, |
|
"grad_norm": 0.19728357385077158, |
|
"learning_rate": 1.6812355901454132e-05, |
|
"loss": 0.0872, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 1.0091010697748684, |
|
"grad_norm": 0.2623149708691154, |
|
"learning_rate": 1.678510790088702e-05, |
|
"loss": 0.0887, |
|
"step": 6320 |
|
}, |
|
{ |
|
"epoch": 1.0122944275906116, |
|
"grad_norm": 0.18893451371595926, |
|
"learning_rate": 1.6757766224224483e-05, |
|
"loss": 0.0919, |
|
"step": 6340 |
|
}, |
|
{ |
|
"epoch": 1.0154877854063549, |
|
"grad_norm": 0.21837196710349846, |
|
"learning_rate": 1.673033124894932e-05, |
|
"loss": 0.0871, |
|
"step": 6360 |
|
}, |
|
{ |
|
"epoch": 1.0186811432220981, |
|
"grad_norm": 0.19258941847945746, |
|
"learning_rate": 1.670280335383242e-05, |
|
"loss": 0.0885, |
|
"step": 6380 |
|
}, |
|
{ |
|
"epoch": 1.0218745010378414, |
|
"grad_norm": 0.19005062378076065, |
|
"learning_rate": 1.667518291892754e-05, |
|
"loss": 0.0893, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 1.0250678588535846, |
|
"grad_norm": 0.20663392660314553, |
|
"learning_rate": 1.6647470325566045e-05, |
|
"loss": 0.0891, |
|
"step": 6420 |
|
}, |
|
{ |
|
"epoch": 1.028261216669328, |
|
"grad_norm": 0.22234403999553295, |
|
"learning_rate": 1.6619665956351664e-05, |
|
"loss": 0.0881, |
|
"step": 6440 |
|
}, |
|
{ |
|
"epoch": 1.0314545744850712, |
|
"grad_norm": 0.2218548835051233, |
|
"learning_rate": 1.6591770195155185e-05, |
|
"loss": 0.0891, |
|
"step": 6460 |
|
}, |
|
{ |
|
"epoch": 1.0346479323008142, |
|
"grad_norm": 0.19448202424429442, |
|
"learning_rate": 1.6563783427109173e-05, |
|
"loss": 0.0882, |
|
"step": 6480 |
|
}, |
|
{ |
|
"epoch": 1.0378412901165575, |
|
"grad_norm": 0.2042849289860482, |
|
"learning_rate": 1.6535706038602637e-05, |
|
"loss": 0.0878, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.0410346479323007, |
|
"grad_norm": 0.2512755796704539, |
|
"learning_rate": 1.6507538417275716e-05, |
|
"loss": 0.0875, |
|
"step": 6520 |
|
}, |
|
{ |
|
"epoch": 1.044228005748044, |
|
"grad_norm": 0.2131890646498463, |
|
"learning_rate": 1.6479280952014304e-05, |
|
"loss": 0.0898, |
|
"step": 6540 |
|
}, |
|
{ |
|
"epoch": 1.0474213635637872, |
|
"grad_norm": 0.21427122055121073, |
|
"learning_rate": 1.6450934032944698e-05, |
|
"loss": 0.088, |
|
"step": 6560 |
|
}, |
|
{ |
|
"epoch": 1.0506147213795305, |
|
"grad_norm": 0.2110500487102777, |
|
"learning_rate": 1.64224980514282e-05, |
|
"loss": 0.0877, |
|
"step": 6580 |
|
}, |
|
{ |
|
"epoch": 1.0538080791952738, |
|
"grad_norm": 0.21674633072630997, |
|
"learning_rate": 1.6393973400055737e-05, |
|
"loss": 0.0919, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 1.057001437011017, |
|
"grad_norm": 0.20250013575431305, |
|
"learning_rate": 1.63653604726424e-05, |
|
"loss": 0.0878, |
|
"step": 6620 |
|
}, |
|
{ |
|
"epoch": 1.0601947948267603, |
|
"grad_norm": 0.22853386096908568, |
|
"learning_rate": 1.6336659664222048e-05, |
|
"loss": 0.0865, |
|
"step": 6640 |
|
}, |
|
{ |
|
"epoch": 1.0633881526425035, |
|
"grad_norm": 0.23371366704528887, |
|
"learning_rate": 1.630787137104183e-05, |
|
"loss": 0.0917, |
|
"step": 6660 |
|
}, |
|
{ |
|
"epoch": 1.0665815104582468, |
|
"grad_norm": 0.2520515744099512, |
|
"learning_rate": 1.6278995990556725e-05, |
|
"loss": 0.0885, |
|
"step": 6680 |
|
}, |
|
{ |
|
"epoch": 1.06977486827399, |
|
"grad_norm": 0.226518466734716, |
|
"learning_rate": 1.6250033921424038e-05, |
|
"loss": 0.089, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 1.0729682260897333, |
|
"grad_norm": 0.19588721298026593, |
|
"learning_rate": 1.6220985563497933e-05, |
|
"loss": 0.0893, |
|
"step": 6720 |
|
}, |
|
{ |
|
"epoch": 1.0761615839054766, |
|
"grad_norm": 0.20545809450126928, |
|
"learning_rate": 1.6191851317823864e-05, |
|
"loss": 0.0878, |
|
"step": 6740 |
|
}, |
|
{ |
|
"epoch": 1.0793549417212198, |
|
"grad_norm": 0.19233602078710613, |
|
"learning_rate": 1.6162631586633076e-05, |
|
"loss": 0.0866, |
|
"step": 6760 |
|
}, |
|
{ |
|
"epoch": 1.082548299536963, |
|
"grad_norm": 0.16678814329219444, |
|
"learning_rate": 1.6133326773337033e-05, |
|
"loss": 0.0871, |
|
"step": 6780 |
|
}, |
|
{ |
|
"epoch": 1.0857416573527063, |
|
"grad_norm": 0.1872528998042832, |
|
"learning_rate": 1.610393728252186e-05, |
|
"loss": 0.0855, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 1.0889350151684496, |
|
"grad_norm": 0.2125566089494784, |
|
"learning_rate": 1.6074463519942747e-05, |
|
"loss": 0.0868, |
|
"step": 6820 |
|
}, |
|
{ |
|
"epoch": 1.0921283729841929, |
|
"grad_norm": 0.2174911829451179, |
|
"learning_rate": 1.604490589251835e-05, |
|
"loss": 0.0883, |
|
"step": 6840 |
|
}, |
|
{ |
|
"epoch": 1.095321730799936, |
|
"grad_norm": 0.18461972367391402, |
|
"learning_rate": 1.6015264808325172e-05, |
|
"loss": 0.0866, |
|
"step": 6860 |
|
}, |
|
{ |
|
"epoch": 1.0985150886156794, |
|
"grad_norm": 0.21622527426814506, |
|
"learning_rate": 1.5985540676591938e-05, |
|
"loss": 0.0863, |
|
"step": 6880 |
|
}, |
|
{ |
|
"epoch": 1.1017084464314226, |
|
"grad_norm": 0.22055823564651658, |
|
"learning_rate": 1.5955733907693938e-05, |
|
"loss": 0.0864, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 1.1049018042471659, |
|
"grad_norm": 0.21748955927958816, |
|
"learning_rate": 1.592584491314735e-05, |
|
"loss": 0.0914, |
|
"step": 6920 |
|
}, |
|
{ |
|
"epoch": 1.1080951620629091, |
|
"grad_norm": 0.19288286925997916, |
|
"learning_rate": 1.589587410560359e-05, |
|
"loss": 0.0886, |
|
"step": 6940 |
|
}, |
|
{ |
|
"epoch": 1.1112885198786524, |
|
"grad_norm": 0.22073550271753697, |
|
"learning_rate": 1.586582189884357e-05, |
|
"loss": 0.0874, |
|
"step": 6960 |
|
}, |
|
{ |
|
"epoch": 1.1144818776943957, |
|
"grad_norm": 0.19094293529375386, |
|
"learning_rate": 1.5835688707772035e-05, |
|
"loss": 0.0855, |
|
"step": 6980 |
|
}, |
|
{ |
|
"epoch": 1.117675235510139, |
|
"grad_norm": 0.21947645518408387, |
|
"learning_rate": 1.5805474948411792e-05, |
|
"loss": 0.0891, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.1208685933258822, |
|
"grad_norm": 0.19228306320542188, |
|
"learning_rate": 1.5775181037897995e-05, |
|
"loss": 0.0864, |
|
"step": 7020 |
|
}, |
|
{ |
|
"epoch": 1.1240619511416254, |
|
"grad_norm": 0.2416878479220072, |
|
"learning_rate": 1.5744807394472372e-05, |
|
"loss": 0.0892, |
|
"step": 7040 |
|
}, |
|
{ |
|
"epoch": 1.1272553089573687, |
|
"grad_norm": 0.2763423491442259, |
|
"learning_rate": 1.5714354437477454e-05, |
|
"loss": 0.0903, |
|
"step": 7060 |
|
}, |
|
{ |
|
"epoch": 1.130448666773112, |
|
"grad_norm": 9.009817853561485, |
|
"learning_rate": 1.568382258735078e-05, |
|
"loss": 0.0896, |
|
"step": 7080 |
|
}, |
|
{ |
|
"epoch": 1.1336420245888552, |
|
"grad_norm": 0.21069452452749907, |
|
"learning_rate": 1.5653212265619114e-05, |
|
"loss": 0.0908, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 1.1368353824045985, |
|
"grad_norm": 0.20407807891775565, |
|
"learning_rate": 1.5622523894892587e-05, |
|
"loss": 0.0908, |
|
"step": 7120 |
|
}, |
|
{ |
|
"epoch": 1.1400287402203417, |
|
"grad_norm": 0.2619102068507488, |
|
"learning_rate": 1.5591757898858907e-05, |
|
"loss": 0.0872, |
|
"step": 7140 |
|
}, |
|
{ |
|
"epoch": 1.143222098036085, |
|
"grad_norm": 0.20634106575751654, |
|
"learning_rate": 1.556091470227747e-05, |
|
"loss": 0.0875, |
|
"step": 7160 |
|
}, |
|
{ |
|
"epoch": 1.1464154558518282, |
|
"grad_norm": 0.23775033570197862, |
|
"learning_rate": 1.5529994730973522e-05, |
|
"loss": 0.0868, |
|
"step": 7180 |
|
}, |
|
{ |
|
"epoch": 1.1496088136675715, |
|
"grad_norm": 0.20245603598906314, |
|
"learning_rate": 1.549899841183227e-05, |
|
"loss": 0.0868, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 1.1528021714833148, |
|
"grad_norm": 0.19815804657454472, |
|
"learning_rate": 1.546792617279299e-05, |
|
"loss": 0.0899, |
|
"step": 7220 |
|
}, |
|
{ |
|
"epoch": 1.155995529299058, |
|
"grad_norm": 0.18751806743751373, |
|
"learning_rate": 1.5436778442843107e-05, |
|
"loss": 0.0884, |
|
"step": 7240 |
|
}, |
|
{ |
|
"epoch": 1.1591888871148013, |
|
"grad_norm": 0.22312780655020503, |
|
"learning_rate": 1.5405555652012302e-05, |
|
"loss": 0.0895, |
|
"step": 7260 |
|
}, |
|
{ |
|
"epoch": 1.1623822449305445, |
|
"grad_norm": 0.1924743563793643, |
|
"learning_rate": 1.5374258231366546e-05, |
|
"loss": 0.0881, |
|
"step": 7280 |
|
}, |
|
{ |
|
"epoch": 1.1655756027462878, |
|
"grad_norm": 0.20844406290416265, |
|
"learning_rate": 1.5342886613002155e-05, |
|
"loss": 0.0867, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 1.168768960562031, |
|
"grad_norm": 0.1761650680293785, |
|
"learning_rate": 1.531144123003984e-05, |
|
"loss": 0.087, |
|
"step": 7320 |
|
}, |
|
{ |
|
"epoch": 1.1719623183777743, |
|
"grad_norm": 0.1914806702266616, |
|
"learning_rate": 1.5279922516618702e-05, |
|
"loss": 0.0866, |
|
"step": 7340 |
|
}, |
|
{ |
|
"epoch": 1.1751556761935176, |
|
"grad_norm": 0.2112719185689836, |
|
"learning_rate": 1.5248330907890272e-05, |
|
"loss": 0.0867, |
|
"step": 7360 |
|
}, |
|
{ |
|
"epoch": 1.1783490340092608, |
|
"grad_norm": 0.20744289591360074, |
|
"learning_rate": 1.5216666840012455e-05, |
|
"loss": 0.0848, |
|
"step": 7380 |
|
}, |
|
{ |
|
"epoch": 1.1815423918250039, |
|
"grad_norm": 0.21602516707177483, |
|
"learning_rate": 1.5184930750143565e-05, |
|
"loss": 0.0889, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 1.1847357496407471, |
|
"grad_norm": 0.1942180064010259, |
|
"learning_rate": 1.515312307643624e-05, |
|
"loss": 0.0871, |
|
"step": 7420 |
|
}, |
|
{ |
|
"epoch": 1.1879291074564904, |
|
"grad_norm": 0.1809045891368503, |
|
"learning_rate": 1.5121244258031427e-05, |
|
"loss": 0.0887, |
|
"step": 7440 |
|
}, |
|
{ |
|
"epoch": 1.1911224652722336, |
|
"grad_norm": 0.21509016663666897, |
|
"learning_rate": 1.50892947350523e-05, |
|
"loss": 0.0875, |
|
"step": 7460 |
|
}, |
|
{ |
|
"epoch": 1.194315823087977, |
|
"grad_norm": 0.22222425875493532, |
|
"learning_rate": 1.5057274948598192e-05, |
|
"loss": 0.0904, |
|
"step": 7480 |
|
}, |
|
{ |
|
"epoch": 1.1975091809037202, |
|
"grad_norm": 0.17436626344650585, |
|
"learning_rate": 1.5025185340738499e-05, |
|
"loss": 0.0869, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.2007025387194634, |
|
"grad_norm": 0.2315956494531892, |
|
"learning_rate": 1.4993026354506588e-05, |
|
"loss": 0.0893, |
|
"step": 7520 |
|
}, |
|
{ |
|
"epoch": 1.2038958965352067, |
|
"grad_norm": 0.19438867498932094, |
|
"learning_rate": 1.4960798433893664e-05, |
|
"loss": 0.0898, |
|
"step": 7540 |
|
}, |
|
{ |
|
"epoch": 1.20708925435095, |
|
"grad_norm": 0.21507570120321423, |
|
"learning_rate": 1.492850202384266e-05, |
|
"loss": 0.0888, |
|
"step": 7560 |
|
}, |
|
{ |
|
"epoch": 1.2102826121666932, |
|
"grad_norm": 0.1756005064132717, |
|
"learning_rate": 1.4896137570242068e-05, |
|
"loss": 0.0886, |
|
"step": 7580 |
|
}, |
|
{ |
|
"epoch": 1.2134759699824365, |
|
"grad_norm": 0.21082827374254784, |
|
"learning_rate": 1.486370551991981e-05, |
|
"loss": 0.0877, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 1.2166693277981797, |
|
"grad_norm": 0.25062287626591706, |
|
"learning_rate": 1.483120632063706e-05, |
|
"loss": 0.0889, |
|
"step": 7620 |
|
}, |
|
{ |
|
"epoch": 1.219862685613923, |
|
"grad_norm": 0.18123970615998264, |
|
"learning_rate": 1.4798640421082047e-05, |
|
"loss": 0.0886, |
|
"step": 7640 |
|
}, |
|
{ |
|
"epoch": 1.2230560434296662, |
|
"grad_norm": 0.21468260494577018, |
|
"learning_rate": 1.4766008270863883e-05, |
|
"loss": 0.0906, |
|
"step": 7660 |
|
}, |
|
{ |
|
"epoch": 1.2262494012454095, |
|
"grad_norm": 0.18876901647341507, |
|
"learning_rate": 1.4733310320506343e-05, |
|
"loss": 0.0882, |
|
"step": 7680 |
|
}, |
|
{ |
|
"epoch": 1.2294427590611527, |
|
"grad_norm": 0.19790235853542382, |
|
"learning_rate": 1.4700547021441642e-05, |
|
"loss": 0.0877, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 1.232636116876896, |
|
"grad_norm": 0.18688689214473558, |
|
"learning_rate": 1.4667718826004214e-05, |
|
"loss": 0.0882, |
|
"step": 7720 |
|
}, |
|
{ |
|
"epoch": 1.2358294746926393, |
|
"grad_norm": 0.1951758945258833, |
|
"learning_rate": 1.463482618742446e-05, |
|
"loss": 0.0869, |
|
"step": 7740 |
|
}, |
|
{ |
|
"epoch": 1.2390228325083825, |
|
"grad_norm": 0.19995389074426362, |
|
"learning_rate": 1.4601869559822488e-05, |
|
"loss": 0.0872, |
|
"step": 7760 |
|
}, |
|
{ |
|
"epoch": 1.2422161903241258, |
|
"grad_norm": 0.2218492641305999, |
|
"learning_rate": 1.4568849398201855e-05, |
|
"loss": 0.0883, |
|
"step": 7780 |
|
}, |
|
{ |
|
"epoch": 1.245409548139869, |
|
"grad_norm": 0.18443852015389814, |
|
"learning_rate": 1.4535766158443265e-05, |
|
"loss": 0.087, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 1.2486029059556123, |
|
"grad_norm": 0.19503753956864983, |
|
"learning_rate": 1.45026202972983e-05, |
|
"loss": 0.0885, |
|
"step": 7820 |
|
}, |
|
{ |
|
"epoch": 1.2517962637713556, |
|
"grad_norm": 0.19853902671151866, |
|
"learning_rate": 1.446941227238309e-05, |
|
"loss": 0.0861, |
|
"step": 7840 |
|
}, |
|
{ |
|
"epoch": 1.2549896215870988, |
|
"grad_norm": 0.21865153532249126, |
|
"learning_rate": 1.4436142542172009e-05, |
|
"loss": 0.0886, |
|
"step": 7860 |
|
}, |
|
{ |
|
"epoch": 1.258182979402842, |
|
"grad_norm": 0.20818634190936489, |
|
"learning_rate": 1.4402811565991353e-05, |
|
"loss": 0.0889, |
|
"step": 7880 |
|
}, |
|
{ |
|
"epoch": 1.2613763372185853, |
|
"grad_norm": 0.23080624800369903, |
|
"learning_rate": 1.436941980401297e-05, |
|
"loss": 0.0858, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 1.2645696950343286, |
|
"grad_norm": 0.19862256058128666, |
|
"learning_rate": 1.4335967717247941e-05, |
|
"loss": 0.0865, |
|
"step": 7920 |
|
}, |
|
{ |
|
"epoch": 1.2677630528500718, |
|
"grad_norm": 0.18954472715597112, |
|
"learning_rate": 1.4302455767540189e-05, |
|
"loss": 0.0886, |
|
"step": 7940 |
|
}, |
|
{ |
|
"epoch": 1.270956410665815, |
|
"grad_norm": 0.18922957380652522, |
|
"learning_rate": 1.4268884417560119e-05, |
|
"loss": 0.0881, |
|
"step": 7960 |
|
}, |
|
{ |
|
"epoch": 1.2741497684815584, |
|
"grad_norm": 0.23661467243107595, |
|
"learning_rate": 1.4235254130798213e-05, |
|
"loss": 0.0884, |
|
"step": 7980 |
|
}, |
|
{ |
|
"epoch": 1.2773431262973016, |
|
"grad_norm": 0.21028360452170922, |
|
"learning_rate": 1.4201565371558657e-05, |
|
"loss": 0.0858, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.2805364841130449, |
|
"grad_norm": 0.1857031394163611, |
|
"learning_rate": 1.4167818604952906e-05, |
|
"loss": 0.0865, |
|
"step": 8020 |
|
}, |
|
{ |
|
"epoch": 1.2837298419287881, |
|
"grad_norm": 0.227255800263239, |
|
"learning_rate": 1.4134014296893275e-05, |
|
"loss": 0.0884, |
|
"step": 8040 |
|
}, |
|
{ |
|
"epoch": 1.2869231997445314, |
|
"grad_norm": 0.26940362233973403, |
|
"learning_rate": 1.4100152914086504e-05, |
|
"loss": 0.0845, |
|
"step": 8060 |
|
}, |
|
{ |
|
"epoch": 1.2901165575602747, |
|
"grad_norm": 0.22762705633128913, |
|
"learning_rate": 1.4066234924027318e-05, |
|
"loss": 0.0863, |
|
"step": 8080 |
|
}, |
|
{ |
|
"epoch": 1.293309915376018, |
|
"grad_norm": 0.24522046661200322, |
|
"learning_rate": 1.4032260794991956e-05, |
|
"loss": 0.0854, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 1.2965032731917612, |
|
"grad_norm": 0.17298541823238414, |
|
"learning_rate": 1.3998230996031736e-05, |
|
"loss": 0.0884, |
|
"step": 8120 |
|
}, |
|
{ |
|
"epoch": 1.2996966310075044, |
|
"grad_norm": 0.21973030306429478, |
|
"learning_rate": 1.3964145996966555e-05, |
|
"loss": 0.0879, |
|
"step": 8140 |
|
}, |
|
{ |
|
"epoch": 1.3028899888232477, |
|
"grad_norm": 0.18077115808310013, |
|
"learning_rate": 1.3930006268378407e-05, |
|
"loss": 0.089, |
|
"step": 8160 |
|
}, |
|
{ |
|
"epoch": 1.306083346638991, |
|
"grad_norm": 0.18437205616695954, |
|
"learning_rate": 1.3895812281604895e-05, |
|
"loss": 0.0887, |
|
"step": 8180 |
|
}, |
|
{ |
|
"epoch": 1.3092767044547342, |
|
"grad_norm": 0.22324698589088907, |
|
"learning_rate": 1.386156450873271e-05, |
|
"loss": 0.1099, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 1.3124700622704775, |
|
"grad_norm": 0.1866174959700542, |
|
"learning_rate": 1.382726342259113e-05, |
|
"loss": 0.0899, |
|
"step": 8220 |
|
}, |
|
{ |
|
"epoch": 1.3156634200862207, |
|
"grad_norm": 0.22011208651394024, |
|
"learning_rate": 1.3792909496745475e-05, |
|
"loss": 0.0869, |
|
"step": 8240 |
|
}, |
|
{ |
|
"epoch": 1.318856777901964, |
|
"grad_norm": 0.21878645198323823, |
|
"learning_rate": 1.3758503205490583e-05, |
|
"loss": 0.0859, |
|
"step": 8260 |
|
}, |
|
{ |
|
"epoch": 1.3220501357177072, |
|
"grad_norm": 0.1869477105143079, |
|
"learning_rate": 1.3724045023844253e-05, |
|
"loss": 0.0898, |
|
"step": 8280 |
|
}, |
|
{ |
|
"epoch": 1.3252434935334505, |
|
"grad_norm": 0.21199782150015953, |
|
"learning_rate": 1.3689535427540687e-05, |
|
"loss": 0.0861, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 1.3284368513491938, |
|
"grad_norm": 0.18518093738165986, |
|
"learning_rate": 1.3654974893023934e-05, |
|
"loss": 0.0908, |
|
"step": 8320 |
|
}, |
|
{ |
|
"epoch": 1.331630209164937, |
|
"grad_norm": 0.18688147397601756, |
|
"learning_rate": 1.3620363897441289e-05, |
|
"loss": 0.0868, |
|
"step": 8340 |
|
}, |
|
{ |
|
"epoch": 1.3348235669806803, |
|
"grad_norm": 0.2067483479178462, |
|
"learning_rate": 1.358570291863673e-05, |
|
"loss": 0.0884, |
|
"step": 8360 |
|
}, |
|
{ |
|
"epoch": 1.3380169247964235, |
|
"grad_norm": 0.21329007217550264, |
|
"learning_rate": 1.3550992435144304e-05, |
|
"loss": 0.086, |
|
"step": 8380 |
|
}, |
|
{ |
|
"epoch": 1.3412102826121668, |
|
"grad_norm": 0.18073209909106028, |
|
"learning_rate": 1.3516232926181529e-05, |
|
"loss": 0.0868, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 1.34440364042791, |
|
"grad_norm": 0.23014446893395585, |
|
"learning_rate": 1.3481424871642778e-05, |
|
"loss": 0.088, |
|
"step": 8420 |
|
}, |
|
{ |
|
"epoch": 1.3475969982436533, |
|
"grad_norm": 0.3028280433486724, |
|
"learning_rate": 1.3446568752092643e-05, |
|
"loss": 0.0848, |
|
"step": 8440 |
|
}, |
|
{ |
|
"epoch": 1.3507903560593966, |
|
"grad_norm": 0.20888924306544646, |
|
"learning_rate": 1.3411665048759313e-05, |
|
"loss": 0.0885, |
|
"step": 8460 |
|
}, |
|
{ |
|
"epoch": 1.3539837138751398, |
|
"grad_norm": 0.22324045695426223, |
|
"learning_rate": 1.3376714243527925e-05, |
|
"loss": 0.0901, |
|
"step": 8480 |
|
}, |
|
{ |
|
"epoch": 1.357177071690883, |
|
"grad_norm": 0.19474459814659545, |
|
"learning_rate": 1.3341716818933912e-05, |
|
"loss": 0.088, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.3603704295066263, |
|
"grad_norm": 0.22602725655780065, |
|
"learning_rate": 1.3306673258156334e-05, |
|
"loss": 0.0867, |
|
"step": 8520 |
|
}, |
|
{ |
|
"epoch": 1.3635637873223696, |
|
"grad_norm": 0.23360209320607728, |
|
"learning_rate": 1.3271584045011217e-05, |
|
"loss": 0.0886, |
|
"step": 8540 |
|
}, |
|
{ |
|
"epoch": 1.3667571451381129, |
|
"grad_norm": 0.1873427703628018, |
|
"learning_rate": 1.3236449663944875e-05, |
|
"loss": 0.0866, |
|
"step": 8560 |
|
}, |
|
{ |
|
"epoch": 1.369950502953856, |
|
"grad_norm": 0.1985433902478951, |
|
"learning_rate": 1.3201270600027208e-05, |
|
"loss": 0.0876, |
|
"step": 8580 |
|
}, |
|
{ |
|
"epoch": 1.3731438607695992, |
|
"grad_norm": 0.18896595210872472, |
|
"learning_rate": 1.3166047338945019e-05, |
|
"loss": 0.0861, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 1.3763372185853424, |
|
"grad_norm": 0.22957720239257226, |
|
"learning_rate": 1.3130780366995297e-05, |
|
"loss": 0.0853, |
|
"step": 8620 |
|
}, |
|
{ |
|
"epoch": 1.3795305764010857, |
|
"grad_norm": 0.1933824287848287, |
|
"learning_rate": 1.3095470171078512e-05, |
|
"loss": 0.0867, |
|
"step": 8640 |
|
}, |
|
{ |
|
"epoch": 1.382723934216829, |
|
"grad_norm": 0.22324019535172776, |
|
"learning_rate": 1.3060117238691894e-05, |
|
"loss": 0.085, |
|
"step": 8660 |
|
}, |
|
{ |
|
"epoch": 1.3859172920325722, |
|
"grad_norm": 0.2316030267030887, |
|
"learning_rate": 1.3024722057922696e-05, |
|
"loss": 0.0841, |
|
"step": 8680 |
|
}, |
|
{ |
|
"epoch": 1.3891106498483154, |
|
"grad_norm": 0.1973247421696361, |
|
"learning_rate": 1.2989285117441452e-05, |
|
"loss": 0.0878, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 1.3923040076640587, |
|
"grad_norm": 0.2080002656842217, |
|
"learning_rate": 1.2953806906495244e-05, |
|
"loss": 0.0883, |
|
"step": 8720 |
|
}, |
|
{ |
|
"epoch": 1.395497365479802, |
|
"grad_norm": 0.18517780070734782, |
|
"learning_rate": 1.2918287914900933e-05, |
|
"loss": 0.0852, |
|
"step": 8740 |
|
}, |
|
{ |
|
"epoch": 1.3986907232955452, |
|
"grad_norm": 0.19697224463698385, |
|
"learning_rate": 1.2882728633038406e-05, |
|
"loss": 0.0855, |
|
"step": 8760 |
|
}, |
|
{ |
|
"epoch": 1.4018840811112885, |
|
"grad_norm": 0.19736259450538857, |
|
"learning_rate": 1.2847129551843807e-05, |
|
"loss": 0.0876, |
|
"step": 8780 |
|
}, |
|
{ |
|
"epoch": 1.4050774389270317, |
|
"grad_norm": 0.18942542996017805, |
|
"learning_rate": 1.2811491162802744e-05, |
|
"loss": 0.0884, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 1.408270796742775, |
|
"grad_norm": 0.19254196108878727, |
|
"learning_rate": 1.277581395794353e-05, |
|
"loss": 0.088, |
|
"step": 8820 |
|
}, |
|
{ |
|
"epoch": 1.4114641545585183, |
|
"grad_norm": 0.24282865106690285, |
|
"learning_rate": 1.2740098429830357e-05, |
|
"loss": 0.0891, |
|
"step": 8840 |
|
}, |
|
{ |
|
"epoch": 1.4146575123742615, |
|
"grad_norm": 0.23984915406072307, |
|
"learning_rate": 1.2704345071556525e-05, |
|
"loss": 0.0886, |
|
"step": 8860 |
|
}, |
|
{ |
|
"epoch": 1.4178508701900048, |
|
"grad_norm": 0.2184606228075661, |
|
"learning_rate": 1.2668554376737619e-05, |
|
"loss": 0.087, |
|
"step": 8880 |
|
}, |
|
{ |
|
"epoch": 1.421044228005748, |
|
"grad_norm": 0.19798737334853378, |
|
"learning_rate": 1.2632726839504693e-05, |
|
"loss": 0.0875, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 1.4242375858214913, |
|
"grad_norm": 0.23442081669151446, |
|
"learning_rate": 1.2596862954497458e-05, |
|
"loss": 0.0849, |
|
"step": 8920 |
|
}, |
|
{ |
|
"epoch": 1.4274309436372346, |
|
"grad_norm": 0.21286909537115775, |
|
"learning_rate": 1.2560963216857447e-05, |
|
"loss": 0.0845, |
|
"step": 8940 |
|
}, |
|
{ |
|
"epoch": 1.4306243014529778, |
|
"grad_norm": 0.19037684375350825, |
|
"learning_rate": 1.2525028122221172e-05, |
|
"loss": 0.0857, |
|
"step": 8960 |
|
}, |
|
{ |
|
"epoch": 1.433817659268721, |
|
"grad_norm": 0.18725372186680364, |
|
"learning_rate": 1.24890581667133e-05, |
|
"loss": 0.0875, |
|
"step": 8980 |
|
}, |
|
{ |
|
"epoch": 1.4370110170844643, |
|
"grad_norm": 0.20844623553872596, |
|
"learning_rate": 1.2453053846939783e-05, |
|
"loss": 0.0898, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.4402043749002076, |
|
"grad_norm": 0.21140506066201004, |
|
"learning_rate": 1.2417015659981007e-05, |
|
"loss": 0.0883, |
|
"step": 9020 |
|
}, |
|
{ |
|
"epoch": 1.4433977327159508, |
|
"grad_norm": 0.2064339774677841, |
|
"learning_rate": 1.2380944103384946e-05, |
|
"loss": 0.0849, |
|
"step": 9040 |
|
}, |
|
{ |
|
"epoch": 1.446591090531694, |
|
"grad_norm": 0.17652746458033255, |
|
"learning_rate": 1.2344839675160271e-05, |
|
"loss": 0.0867, |
|
"step": 9060 |
|
}, |
|
{ |
|
"epoch": 1.4497844483474374, |
|
"grad_norm": 0.19101046403484023, |
|
"learning_rate": 1.2308702873769486e-05, |
|
"loss": 0.0865, |
|
"step": 9080 |
|
}, |
|
{ |
|
"epoch": 1.4529778061631806, |
|
"grad_norm": 0.19778410360898788, |
|
"learning_rate": 1.227253419812204e-05, |
|
"loss": 0.0876, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 1.4561711639789239, |
|
"grad_norm": 0.1884773288145621, |
|
"learning_rate": 1.2236334147567442e-05, |
|
"loss": 0.0873, |
|
"step": 9120 |
|
}, |
|
{ |
|
"epoch": 1.4593645217946671, |
|
"grad_norm": 0.22741564087867433, |
|
"learning_rate": 1.2200103221888365e-05, |
|
"loss": 0.0842, |
|
"step": 9140 |
|
}, |
|
{ |
|
"epoch": 1.4625578796104104, |
|
"grad_norm": 0.19382271044214394, |
|
"learning_rate": 1.2163841921293761e-05, |
|
"loss": 0.0846, |
|
"step": 9160 |
|
}, |
|
{ |
|
"epoch": 1.4657512374261537, |
|
"grad_norm": 0.2225438873976966, |
|
"learning_rate": 1.2127550746411932e-05, |
|
"loss": 0.086, |
|
"step": 9180 |
|
}, |
|
{ |
|
"epoch": 1.468944595241897, |
|
"grad_norm": 0.20309796630710175, |
|
"learning_rate": 1.2091230198283626e-05, |
|
"loss": 0.0872, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 1.4721379530576402, |
|
"grad_norm": 0.21309103603253518, |
|
"learning_rate": 1.2054880778355122e-05, |
|
"loss": 0.0856, |
|
"step": 9220 |
|
}, |
|
{ |
|
"epoch": 1.4753313108733834, |
|
"grad_norm": 0.20007800804028458, |
|
"learning_rate": 1.201850298847132e-05, |
|
"loss": 0.0843, |
|
"step": 9240 |
|
}, |
|
{ |
|
"epoch": 1.4785246686891267, |
|
"grad_norm": 0.22102981325152446, |
|
"learning_rate": 1.198209733086878e-05, |
|
"loss": 0.0865, |
|
"step": 9260 |
|
}, |
|
{ |
|
"epoch": 1.48171802650487, |
|
"grad_norm": 0.2509432577302147, |
|
"learning_rate": 1.194566430816882e-05, |
|
"loss": 0.0872, |
|
"step": 9280 |
|
}, |
|
{ |
|
"epoch": 1.4849113843206132, |
|
"grad_norm": 0.21078643240774367, |
|
"learning_rate": 1.1909204423370564e-05, |
|
"loss": 0.0856, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 1.4881047421363562, |
|
"grad_norm": 0.22252302888210984, |
|
"learning_rate": 1.1872718179843994e-05, |
|
"loss": 0.0838, |
|
"step": 9320 |
|
}, |
|
{ |
|
"epoch": 1.4912980999520995, |
|
"grad_norm": 0.18987560853570382, |
|
"learning_rate": 1.1836206081323003e-05, |
|
"loss": 0.085, |
|
"step": 9340 |
|
}, |
|
{ |
|
"epoch": 1.4944914577678428, |
|
"grad_norm": 0.19549774907184778, |
|
"learning_rate": 1.1799668631898445e-05, |
|
"loss": 0.0877, |
|
"step": 9360 |
|
}, |
|
{ |
|
"epoch": 1.497684815583586, |
|
"grad_norm": 0.19228104758868642, |
|
"learning_rate": 1.176310633601117e-05, |
|
"loss": 0.0956, |
|
"step": 9380 |
|
}, |
|
{ |
|
"epoch": 1.5008781733993293, |
|
"grad_norm": 0.20819820045783494, |
|
"learning_rate": 1.1726519698445056e-05, |
|
"loss": 0.0867, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 1.5040715312150725, |
|
"grad_norm": 0.20733767509582143, |
|
"learning_rate": 1.1689909224320062e-05, |
|
"loss": 0.0863, |
|
"step": 9420 |
|
}, |
|
{ |
|
"epoch": 1.5072648890308158, |
|
"grad_norm": 0.20925265086202188, |
|
"learning_rate": 1.165327541908522e-05, |
|
"loss": 0.0861, |
|
"step": 9440 |
|
}, |
|
{ |
|
"epoch": 1.510458246846559, |
|
"grad_norm": 0.18493554321077676, |
|
"learning_rate": 1.1616618788511684e-05, |
|
"loss": 0.0849, |
|
"step": 9460 |
|
}, |
|
{ |
|
"epoch": 1.5136516046623023, |
|
"grad_norm": 0.18797864341732143, |
|
"learning_rate": 1.1579939838685731e-05, |
|
"loss": 0.085, |
|
"step": 9480 |
|
}, |
|
{ |
|
"epoch": 1.5168449624780456, |
|
"grad_norm": 0.2242101441050116, |
|
"learning_rate": 1.154323907600179e-05, |
|
"loss": 0.0867, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 1.5200383202937888, |
|
"grad_norm": 0.17084103768025352, |
|
"learning_rate": 1.1506517007155432e-05, |
|
"loss": 0.0838, |
|
"step": 9520 |
|
}, |
|
{ |
|
"epoch": 1.523231678109532, |
|
"grad_norm": 0.18934207218377755, |
|
"learning_rate": 1.1469774139136389e-05, |
|
"loss": 0.0857, |
|
"step": 9540 |
|
}, |
|
{ |
|
"epoch": 1.5264250359252753, |
|
"grad_norm": 0.2265706734312821, |
|
"learning_rate": 1.1433010979221545e-05, |
|
"loss": 0.0866, |
|
"step": 9560 |
|
}, |
|
{ |
|
"epoch": 1.5296183937410186, |
|
"grad_norm": 0.22302910930406783, |
|
"learning_rate": 1.1396228034967942e-05, |
|
"loss": 0.0841, |
|
"step": 9580 |
|
}, |
|
{ |
|
"epoch": 1.5328117515567619, |
|
"grad_norm": 0.20180278303765992, |
|
"learning_rate": 1.1359425814205767e-05, |
|
"loss": 0.0863, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 1.5360051093725051, |
|
"grad_norm": 0.22800639526769467, |
|
"learning_rate": 1.132260482503133e-05, |
|
"loss": 0.0873, |
|
"step": 9620 |
|
}, |
|
{ |
|
"epoch": 1.5391984671882484, |
|
"grad_norm": 0.21277101714684102, |
|
"learning_rate": 1.1285765575800076e-05, |
|
"loss": 0.0874, |
|
"step": 9640 |
|
}, |
|
{ |
|
"epoch": 1.5423918250039916, |
|
"grad_norm": 0.18816604414097163, |
|
"learning_rate": 1.1248908575119539e-05, |
|
"loss": 0.0862, |
|
"step": 9660 |
|
}, |
|
{ |
|
"epoch": 1.545585182819735, |
|
"grad_norm": 0.20138026843291984, |
|
"learning_rate": 1.1212034331842338e-05, |
|
"loss": 0.0856, |
|
"step": 9680 |
|
}, |
|
{ |
|
"epoch": 1.5487785406354782, |
|
"grad_norm": 0.18862474943057217, |
|
"learning_rate": 1.1175143355059144e-05, |
|
"loss": 0.085, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 1.5519718984512214, |
|
"grad_norm": 0.18561382698856643, |
|
"learning_rate": 1.1138236154091656e-05, |
|
"loss": 0.0852, |
|
"step": 9720 |
|
}, |
|
{ |
|
"epoch": 1.5551652562669647, |
|
"grad_norm": 0.18884644793283215, |
|
"learning_rate": 1.1101313238485552e-05, |
|
"loss": 0.0839, |
|
"step": 9740 |
|
}, |
|
{ |
|
"epoch": 1.558358614082708, |
|
"grad_norm": 0.17345642894126198, |
|
"learning_rate": 1.1064375118003487e-05, |
|
"loss": 0.0844, |
|
"step": 9760 |
|
}, |
|
{ |
|
"epoch": 1.5615519718984512, |
|
"grad_norm": 0.1991026940192444, |
|
"learning_rate": 1.1027422302618032e-05, |
|
"loss": 0.0846, |
|
"step": 9780 |
|
}, |
|
{ |
|
"epoch": 1.5647453297141944, |
|
"grad_norm": 0.22201127220587602, |
|
"learning_rate": 1.099045530250463e-05, |
|
"loss": 0.0823, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 1.5679386875299377, |
|
"grad_norm": 0.23679974637337212, |
|
"learning_rate": 1.0953474628034562e-05, |
|
"loss": 0.087, |
|
"step": 9820 |
|
}, |
|
{ |
|
"epoch": 1.571132045345681, |
|
"grad_norm": 0.18945453405405135, |
|
"learning_rate": 1.0916480789767907e-05, |
|
"loss": 0.0861, |
|
"step": 9840 |
|
}, |
|
{ |
|
"epoch": 1.5743254031614242, |
|
"grad_norm": 0.18943349755537386, |
|
"learning_rate": 1.0879474298446479e-05, |
|
"loss": 0.0831, |
|
"step": 9860 |
|
}, |
|
{ |
|
"epoch": 1.5775187609771675, |
|
"grad_norm": 0.20905996320818215, |
|
"learning_rate": 1.0842455664986782e-05, |
|
"loss": 0.0858, |
|
"step": 9880 |
|
}, |
|
{ |
|
"epoch": 1.5807121187929107, |
|
"grad_norm": 0.1863924849638652, |
|
"learning_rate": 1.0805425400472956e-05, |
|
"loss": 0.0856, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 1.583905476608654, |
|
"grad_norm": 0.2091352813903984, |
|
"learning_rate": 1.076838401614972e-05, |
|
"loss": 0.0857, |
|
"step": 9920 |
|
}, |
|
{ |
|
"epoch": 1.5870988344243973, |
|
"grad_norm": 0.24371561015345014, |
|
"learning_rate": 1.0731332023415319e-05, |
|
"loss": 0.089, |
|
"step": 9940 |
|
}, |
|
{ |
|
"epoch": 1.5902921922401405, |
|
"grad_norm": 0.2128926213672918, |
|
"learning_rate": 1.0694269933814456e-05, |
|
"loss": 0.084, |
|
"step": 9960 |
|
}, |
|
{ |
|
"epoch": 1.5934855500558838, |
|
"grad_norm": 0.21916373322291655, |
|
"learning_rate": 1.0657198259031232e-05, |
|
"loss": 0.0826, |
|
"step": 9980 |
|
}, |
|
{ |
|
"epoch": 1.596678907871627, |
|
"grad_norm": 0.1824350216961259, |
|
"learning_rate": 1.0620117510882083e-05, |
|
"loss": 0.0864, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.5998722656873703, |
|
"grad_norm": 0.17881824213547054, |
|
"learning_rate": 1.058302820130871e-05, |
|
"loss": 0.0839, |
|
"step": 10020 |
|
}, |
|
{ |
|
"epoch": 1.6030656235031135, |
|
"grad_norm": 0.19729212364378013, |
|
"learning_rate": 1.0545930842371022e-05, |
|
"loss": 0.0854, |
|
"step": 10040 |
|
}, |
|
{ |
|
"epoch": 1.6062589813188568, |
|
"grad_norm": 0.2087951067289451, |
|
"learning_rate": 1.0508825946240053e-05, |
|
"loss": 0.085, |
|
"step": 10060 |
|
}, |
|
{ |
|
"epoch": 1.6094523391346, |
|
"grad_norm": 0.19718155636666373, |
|
"learning_rate": 1.0471714025190897e-05, |
|
"loss": 0.0856, |
|
"step": 10080 |
|
}, |
|
{ |
|
"epoch": 1.6126456969503433, |
|
"grad_norm": 0.20228614287118912, |
|
"learning_rate": 1.0434595591595635e-05, |
|
"loss": 0.0853, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 1.6158390547660866, |
|
"grad_norm": 0.18693736149298812, |
|
"learning_rate": 1.0397471157916263e-05, |
|
"loss": 0.0849, |
|
"step": 10120 |
|
}, |
|
{ |
|
"epoch": 1.6190324125818298, |
|
"grad_norm": 0.191886465159157, |
|
"learning_rate": 1.0360341236697611e-05, |
|
"loss": 0.0838, |
|
"step": 10140 |
|
}, |
|
{ |
|
"epoch": 1.622225770397573, |
|
"grad_norm": 0.20503489329364863, |
|
"learning_rate": 1.0323206340560275e-05, |
|
"loss": 0.0856, |
|
"step": 10160 |
|
}, |
|
{ |
|
"epoch": 1.6254191282133164, |
|
"grad_norm": 0.24260576208421464, |
|
"learning_rate": 1.028606698219353e-05, |
|
"loss": 0.0865, |
|
"step": 10180 |
|
}, |
|
{ |
|
"epoch": 1.6286124860290596, |
|
"grad_norm": 0.22639324906871056, |
|
"learning_rate": 1.0248923674348268e-05, |
|
"loss": 0.0859, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 1.6318058438448029, |
|
"grad_norm": 0.176153514574258, |
|
"learning_rate": 1.0211776929829893e-05, |
|
"loss": 0.0867, |
|
"step": 10220 |
|
}, |
|
{ |
|
"epoch": 1.6349992016605461, |
|
"grad_norm": 0.1877599319113198, |
|
"learning_rate": 1.0174627261491268e-05, |
|
"loss": 0.0829, |
|
"step": 10240 |
|
}, |
|
{ |
|
"epoch": 1.6381925594762894, |
|
"grad_norm": 0.19998890698860952, |
|
"learning_rate": 1.0137475182225617e-05, |
|
"loss": 0.0841, |
|
"step": 10260 |
|
}, |
|
{ |
|
"epoch": 1.6413859172920326, |
|
"grad_norm": 0.21610758072730218, |
|
"learning_rate": 1.0100321204959449e-05, |
|
"loss": 0.0841, |
|
"step": 10280 |
|
}, |
|
{ |
|
"epoch": 1.644579275107776, |
|
"grad_norm": 0.18193308572064754, |
|
"learning_rate": 1.0063165842645484e-05, |
|
"loss": 0.0849, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 1.6477726329235192, |
|
"grad_norm": 0.20248453381225345, |
|
"learning_rate": 1.0026009608255555e-05, |
|
"loss": 0.0845, |
|
"step": 10320 |
|
}, |
|
{ |
|
"epoch": 1.6509659907392624, |
|
"grad_norm": 0.2320711692298343, |
|
"learning_rate": 9.988853014773542e-06, |
|
"loss": 0.0852, |
|
"step": 10340 |
|
}, |
|
{ |
|
"epoch": 1.6541593485550057, |
|
"grad_norm": 0.19584194025576318, |
|
"learning_rate": 9.951696575188278e-06, |
|
"loss": 0.085, |
|
"step": 10360 |
|
}, |
|
{ |
|
"epoch": 1.657352706370749, |
|
"grad_norm": 0.21553283351451755, |
|
"learning_rate": 9.914540802486474e-06, |
|
"loss": 0.0856, |
|
"step": 10380 |
|
}, |
|
{ |
|
"epoch": 1.6605460641864922, |
|
"grad_norm": 0.18489531692862257, |
|
"learning_rate": 9.877386209645633e-06, |
|
"loss": 0.0858, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 1.6637394220022355, |
|
"grad_norm": 0.20763123012361048, |
|
"learning_rate": 9.84023330962697e-06, |
|
"loss": 0.0852, |
|
"step": 10420 |
|
}, |
|
{ |
|
"epoch": 1.6669327798179787, |
|
"grad_norm": 0.19596129486655178, |
|
"learning_rate": 9.803082615368323e-06, |
|
"loss": 0.0835, |
|
"step": 10440 |
|
}, |
|
{ |
|
"epoch": 1.670126137633722, |
|
"grad_norm": 0.18532596629268455, |
|
"learning_rate": 9.765934639777087e-06, |
|
"loss": 0.0841, |
|
"step": 10460 |
|
}, |
|
{ |
|
"epoch": 1.6733194954494652, |
|
"grad_norm": 0.17232132477688097, |
|
"learning_rate": 9.728789895723109e-06, |
|
"loss": 0.0835, |
|
"step": 10480 |
|
}, |
|
{ |
|
"epoch": 1.6765128532652085, |
|
"grad_norm": 0.19279722790694026, |
|
"learning_rate": 9.691648896031642e-06, |
|
"loss": 0.0877, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 1.6797062110809517, |
|
"grad_norm": 0.20508136042678382, |
|
"learning_rate": 9.65451215347622e-06, |
|
"loss": 0.0849, |
|
"step": 10520 |
|
}, |
|
{ |
|
"epoch": 1.682899568896695, |
|
"grad_norm": 0.2030859465238483, |
|
"learning_rate": 9.61738018077162e-06, |
|
"loss": 0.0828, |
|
"step": 10540 |
|
}, |
|
{ |
|
"epoch": 1.6860929267124383, |
|
"grad_norm": 0.21547506058080174, |
|
"learning_rate": 9.580253490566753e-06, |
|
"loss": 0.0837, |
|
"step": 10560 |
|
}, |
|
{ |
|
"epoch": 1.6892862845281815, |
|
"grad_norm": 0.22700049094169877, |
|
"learning_rate": 9.543132595437612e-06, |
|
"loss": 0.0849, |
|
"step": 10580 |
|
}, |
|
{ |
|
"epoch": 1.6924796423439248, |
|
"grad_norm": 0.19256501341459278, |
|
"learning_rate": 9.506018007880169e-06, |
|
"loss": 0.0845, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 1.695673000159668, |
|
"grad_norm": 0.20795374910309583, |
|
"learning_rate": 9.468910240303324e-06, |
|
"loss": 0.0819, |
|
"step": 10620 |
|
}, |
|
{ |
|
"epoch": 1.6988663579754113, |
|
"grad_norm": 0.19009887752439592, |
|
"learning_rate": 9.431809805021815e-06, |
|
"loss": 0.0816, |
|
"step": 10640 |
|
}, |
|
{ |
|
"epoch": 1.7020597157911546, |
|
"grad_norm": 0.19799389204125842, |
|
"learning_rate": 9.394717214249147e-06, |
|
"loss": 0.0851, |
|
"step": 10660 |
|
}, |
|
{ |
|
"epoch": 1.7052530736068978, |
|
"grad_norm": 0.2271148145004972, |
|
"learning_rate": 9.357632980090528e-06, |
|
"loss": 0.0852, |
|
"step": 10680 |
|
}, |
|
{ |
|
"epoch": 1.7084464314226409, |
|
"grad_norm": 0.2344519009086231, |
|
"learning_rate": 9.320557614535787e-06, |
|
"loss": 0.0831, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 1.7116397892383841, |
|
"grad_norm": 0.23925944266837562, |
|
"learning_rate": 9.283491629452315e-06, |
|
"loss": 0.0853, |
|
"step": 10720 |
|
}, |
|
{ |
|
"epoch": 1.7148331470541274, |
|
"grad_norm": 0.20081263527645807, |
|
"learning_rate": 9.246435536577999e-06, |
|
"loss": 0.085, |
|
"step": 10740 |
|
}, |
|
{ |
|
"epoch": 1.7180265048698706, |
|
"grad_norm": 0.20700627503253236, |
|
"learning_rate": 9.20938984751415e-06, |
|
"loss": 0.0851, |
|
"step": 10760 |
|
}, |
|
{ |
|
"epoch": 1.7212198626856139, |
|
"grad_norm": 0.201866541369534, |
|
"learning_rate": 9.172355073718439e-06, |
|
"loss": 0.0842, |
|
"step": 10780 |
|
}, |
|
{ |
|
"epoch": 1.7244132205013571, |
|
"grad_norm": 0.20280888722283474, |
|
"learning_rate": 9.135331726497843e-06, |
|
"loss": 0.0822, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 1.7276065783171004, |
|
"grad_norm": 0.19344393506408358, |
|
"learning_rate": 9.09832031700158e-06, |
|
"loss": 0.0828, |
|
"step": 10820 |
|
}, |
|
{ |
|
"epoch": 1.7307999361328437, |
|
"grad_norm": 0.16624847494447237, |
|
"learning_rate": 9.06132135621406e-06, |
|
"loss": 0.0829, |
|
"step": 10840 |
|
}, |
|
{ |
|
"epoch": 1.733993293948587, |
|
"grad_norm": 0.21724180904413368, |
|
"learning_rate": 9.024335354947812e-06, |
|
"loss": 0.0838, |
|
"step": 10860 |
|
}, |
|
{ |
|
"epoch": 1.7371866517643302, |
|
"grad_norm": 0.23846515088949718, |
|
"learning_rate": 8.987362823836461e-06, |
|
"loss": 0.0852, |
|
"step": 10880 |
|
}, |
|
{ |
|
"epoch": 1.7403800095800734, |
|
"grad_norm": 0.20925184991512286, |
|
"learning_rate": 8.950404273327646e-06, |
|
"loss": 0.0834, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 1.7435733673958167, |
|
"grad_norm": 0.1716514985288543, |
|
"learning_rate": 8.913460213675998e-06, |
|
"loss": 0.0836, |
|
"step": 10920 |
|
}, |
|
{ |
|
"epoch": 1.74676672521156, |
|
"grad_norm": 0.20494704853492865, |
|
"learning_rate": 8.876531154936084e-06, |
|
"loss": 0.0817, |
|
"step": 10940 |
|
}, |
|
{ |
|
"epoch": 1.7499600830273032, |
|
"grad_norm": 0.24261768980108364, |
|
"learning_rate": 8.839617606955355e-06, |
|
"loss": 0.0842, |
|
"step": 10960 |
|
}, |
|
{ |
|
"epoch": 1.7531534408430465, |
|
"grad_norm": 0.2090462161591236, |
|
"learning_rate": 8.802720079367136e-06, |
|
"loss": 0.0828, |
|
"step": 10980 |
|
}, |
|
{ |
|
"epoch": 1.7563467986587897, |
|
"grad_norm": 0.18696550737565137, |
|
"learning_rate": 8.765839081583564e-06, |
|
"loss": 0.082, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 1.759540156474533, |
|
"grad_norm": 0.19121809093030445, |
|
"learning_rate": 8.72897512278856e-06, |
|
"loss": 0.0848, |
|
"step": 11020 |
|
}, |
|
{ |
|
"epoch": 1.7627335142902762, |
|
"grad_norm": 0.2102957076954447, |
|
"learning_rate": 8.692128711930805e-06, |
|
"loss": 0.084, |
|
"step": 11040 |
|
}, |
|
{ |
|
"epoch": 1.7659268721060195, |
|
"grad_norm": 0.20622666368626175, |
|
"learning_rate": 8.655300357716716e-06, |
|
"loss": 0.0845, |
|
"step": 11060 |
|
}, |
|
{ |
|
"epoch": 1.7691202299217628, |
|
"grad_norm": 0.2091346262679367, |
|
"learning_rate": 8.618490568603409e-06, |
|
"loss": 0.0821, |
|
"step": 11080 |
|
}, |
|
{ |
|
"epoch": 1.772313587737506, |
|
"grad_norm": 0.18255707260911555, |
|
"learning_rate": 8.581699852791696e-06, |
|
"loss": 0.0824, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 1.7755069455532493, |
|
"grad_norm": 0.2201418888200012, |
|
"learning_rate": 8.54492871821905e-06, |
|
"loss": 0.0836, |
|
"step": 11120 |
|
}, |
|
{ |
|
"epoch": 1.7787003033689925, |
|
"grad_norm": 0.1875915274082898, |
|
"learning_rate": 8.508177672552617e-06, |
|
"loss": 0.0842, |
|
"step": 11140 |
|
}, |
|
{ |
|
"epoch": 1.7818936611847358, |
|
"grad_norm": 0.19600313792607987, |
|
"learning_rate": 8.471447223182179e-06, |
|
"loss": 0.0836, |
|
"step": 11160 |
|
}, |
|
{ |
|
"epoch": 1.785087019000479, |
|
"grad_norm": 0.19719362419525954, |
|
"learning_rate": 8.434737877213172e-06, |
|
"loss": 0.0856, |
|
"step": 11180 |
|
}, |
|
{ |
|
"epoch": 1.788280376816222, |
|
"grad_norm": 0.17156639629201742, |
|
"learning_rate": 8.398050141459674e-06, |
|
"loss": 0.0819, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 1.7914737346319654, |
|
"grad_norm": 0.2039792577946715, |
|
"learning_rate": 8.361384522437402e-06, |
|
"loss": 0.0827, |
|
"step": 11220 |
|
}, |
|
{ |
|
"epoch": 1.7946670924477086, |
|
"grad_norm": 0.19179744785660258, |
|
"learning_rate": 8.324741526356738e-06, |
|
"loss": 0.0826, |
|
"step": 11240 |
|
}, |
|
{ |
|
"epoch": 1.7978604502634519, |
|
"grad_norm": 0.18215189474588084, |
|
"learning_rate": 8.288121659115727e-06, |
|
"loss": 0.0819, |
|
"step": 11260 |
|
}, |
|
{ |
|
"epoch": 1.8010538080791951, |
|
"grad_norm": 0.1644377928850563, |
|
"learning_rate": 8.251525426293084e-06, |
|
"loss": 0.0827, |
|
"step": 11280 |
|
}, |
|
{ |
|
"epoch": 1.8042471658949384, |
|
"grad_norm": 0.21222246533392128, |
|
"learning_rate": 8.21495333314123e-06, |
|
"loss": 0.0843, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 1.8074405237106816, |
|
"grad_norm": 0.25181863269369087, |
|
"learning_rate": 8.178405884579317e-06, |
|
"loss": 0.0842, |
|
"step": 11320 |
|
}, |
|
{ |
|
"epoch": 1.810633881526425, |
|
"grad_norm": 0.2109399815982731, |
|
"learning_rate": 8.141883585186241e-06, |
|
"loss": 0.0829, |
|
"step": 11340 |
|
}, |
|
{ |
|
"epoch": 1.8138272393421682, |
|
"grad_norm": 0.18073042845539122, |
|
"learning_rate": 8.10538693919369e-06, |
|
"loss": 0.0834, |
|
"step": 11360 |
|
}, |
|
{ |
|
"epoch": 1.8170205971579114, |
|
"grad_norm": 0.20526943895282074, |
|
"learning_rate": 8.068916450479174e-06, |
|
"loss": 0.081, |
|
"step": 11380 |
|
}, |
|
{ |
|
"epoch": 1.8202139549736547, |
|
"grad_norm": 0.19361555670993416, |
|
"learning_rate": 8.03247262255908e-06, |
|
"loss": 0.0836, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 1.823407312789398, |
|
"grad_norm": 0.24389934893406925, |
|
"learning_rate": 7.996055958581703e-06, |
|
"loss": 0.0828, |
|
"step": 11420 |
|
}, |
|
{ |
|
"epoch": 1.8266006706051412, |
|
"grad_norm": 0.1877153126969613, |
|
"learning_rate": 7.959666961320314e-06, |
|
"loss": 0.0823, |
|
"step": 11440 |
|
}, |
|
{ |
|
"epoch": 1.8297940284208845, |
|
"grad_norm": 0.19815842442257633, |
|
"learning_rate": 7.923306133166218e-06, |
|
"loss": 0.0827, |
|
"step": 11460 |
|
}, |
|
{ |
|
"epoch": 1.8329873862366277, |
|
"grad_norm": 0.21678547999171613, |
|
"learning_rate": 7.886973976121797e-06, |
|
"loss": 0.0821, |
|
"step": 11480 |
|
}, |
|
{ |
|
"epoch": 1.836180744052371, |
|
"grad_norm": 0.21618607294885436, |
|
"learning_rate": 7.850670991793621e-06, |
|
"loss": 0.0847, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 1.8393741018681142, |
|
"grad_norm": 0.1704593983368394, |
|
"learning_rate": 7.81439768138548e-06, |
|
"loss": 0.082, |
|
"step": 11520 |
|
}, |
|
{ |
|
"epoch": 1.8425674596838575, |
|
"grad_norm": 0.18606341720829214, |
|
"learning_rate": 7.778154545691481e-06, |
|
"loss": 0.0812, |
|
"step": 11540 |
|
}, |
|
{ |
|
"epoch": 1.8457608174996007, |
|
"grad_norm": 0.21208825422427718, |
|
"learning_rate": 7.741942085089146e-06, |
|
"loss": 0.083, |
|
"step": 11560 |
|
}, |
|
{ |
|
"epoch": 1.848954175315344, |
|
"grad_norm": 0.18782574055868467, |
|
"learning_rate": 7.705760799532485e-06, |
|
"loss": 0.0828, |
|
"step": 11580 |
|
}, |
|
{ |
|
"epoch": 1.8521475331310873, |
|
"grad_norm": 0.19574167645932028, |
|
"learning_rate": 7.669611188545103e-06, |
|
"loss": 0.083, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 1.8553408909468305, |
|
"grad_norm": 0.2065298678199762, |
|
"learning_rate": 7.6334937512133e-06, |
|
"loss": 0.0825, |
|
"step": 11620 |
|
}, |
|
{ |
|
"epoch": 1.8585342487625738, |
|
"grad_norm": 0.1977503317300438, |
|
"learning_rate": 7.597408986179184e-06, |
|
"loss": 0.0806, |
|
"step": 11640 |
|
}, |
|
{ |
|
"epoch": 1.861727606578317, |
|
"grad_norm": 0.20586182397186595, |
|
"learning_rate": 7.561357391633789e-06, |
|
"loss": 0.0824, |
|
"step": 11660 |
|
}, |
|
{ |
|
"epoch": 1.8649209643940603, |
|
"grad_norm": 0.21998998145214102, |
|
"learning_rate": 7.525339465310183e-06, |
|
"loss": 0.0838, |
|
"step": 11680 |
|
}, |
|
{ |
|
"epoch": 1.8681143222098036, |
|
"grad_norm": 0.24487809053970366, |
|
"learning_rate": 7.4893557044766145e-06, |
|
"loss": 0.0821, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 1.8713076800255468, |
|
"grad_norm": 0.18687218223534408, |
|
"learning_rate": 7.453406605929637e-06, |
|
"loss": 0.0806, |
|
"step": 11720 |
|
}, |
|
{ |
|
"epoch": 1.87450103784129, |
|
"grad_norm": 0.17318503959159254, |
|
"learning_rate": 7.417492665987247e-06, |
|
"loss": 0.0819, |
|
"step": 11740 |
|
}, |
|
{ |
|
"epoch": 1.8776943956570333, |
|
"grad_norm": 0.18945197729794094, |
|
"learning_rate": 7.3816143804820454e-06, |
|
"loss": 0.0835, |
|
"step": 11760 |
|
}, |
|
{ |
|
"epoch": 1.8808877534727766, |
|
"grad_norm": 0.20142501192350587, |
|
"learning_rate": 7.345772244754377e-06, |
|
"loss": 0.0844, |
|
"step": 11780 |
|
}, |
|
{ |
|
"epoch": 1.8840811112885198, |
|
"grad_norm": 0.20568732816869706, |
|
"learning_rate": 7.309966753645496e-06, |
|
"loss": 0.0801, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 1.887274469104263, |
|
"grad_norm": 0.20182816399217324, |
|
"learning_rate": 7.274198401490744e-06, |
|
"loss": 0.0846, |
|
"step": 11820 |
|
}, |
|
{ |
|
"epoch": 1.8904678269200064, |
|
"grad_norm": 0.20018924573509358, |
|
"learning_rate": 7.2384676821127135e-06, |
|
"loss": 0.0798, |
|
"step": 11840 |
|
}, |
|
{ |
|
"epoch": 1.8936611847357496, |
|
"grad_norm": 0.28199792560782483, |
|
"learning_rate": 7.202775088814429e-06, |
|
"loss": 0.0815, |
|
"step": 11860 |
|
}, |
|
{ |
|
"epoch": 1.8968545425514929, |
|
"grad_norm": 0.22764478972933266, |
|
"learning_rate": 7.1671211143725485e-06, |
|
"loss": 0.0815, |
|
"step": 11880 |
|
}, |
|
{ |
|
"epoch": 1.9000479003672361, |
|
"grad_norm": 0.1981593984765646, |
|
"learning_rate": 7.131506251030547e-06, |
|
"loss": 0.0809, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 1.9032412581829794, |
|
"grad_norm": 0.20992169378762218, |
|
"learning_rate": 7.095930990491933e-06, |
|
"loss": 0.0809, |
|
"step": 11920 |
|
}, |
|
{ |
|
"epoch": 1.9064346159987227, |
|
"grad_norm": 0.19005910859773092, |
|
"learning_rate": 7.060395823913447e-06, |
|
"loss": 0.0842, |
|
"step": 11940 |
|
}, |
|
{ |
|
"epoch": 1.909627973814466, |
|
"grad_norm": 0.19205175219083725, |
|
"learning_rate": 7.024901241898292e-06, |
|
"loss": 0.0819, |
|
"step": 11960 |
|
}, |
|
{ |
|
"epoch": 1.9128213316302092, |
|
"grad_norm": 0.20008872943717196, |
|
"learning_rate": 6.9894477344893505e-06, |
|
"loss": 0.0819, |
|
"step": 11980 |
|
}, |
|
{ |
|
"epoch": 1.9160146894459524, |
|
"grad_norm": 0.1773872749793287, |
|
"learning_rate": 6.9540357911624336e-06, |
|
"loss": 0.0823, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.9192080472616957, |
|
"grad_norm": 0.19417086960624413, |
|
"learning_rate": 6.918665900819497e-06, |
|
"loss": 0.0791, |
|
"step": 12020 |
|
}, |
|
{ |
|
"epoch": 1.922401405077439, |
|
"grad_norm": 0.1814650138072353, |
|
"learning_rate": 6.883338551781923e-06, |
|
"loss": 0.0811, |
|
"step": 12040 |
|
}, |
|
{ |
|
"epoch": 1.9255947628931822, |
|
"grad_norm": 0.1702657944804681, |
|
"learning_rate": 6.8480542317837505e-06, |
|
"loss": 0.0803, |
|
"step": 12060 |
|
}, |
|
{ |
|
"epoch": 1.9287881207089255, |
|
"grad_norm": 0.18416550882743182, |
|
"learning_rate": 6.812813427964963e-06, |
|
"loss": 0.081, |
|
"step": 12080 |
|
}, |
|
{ |
|
"epoch": 1.9319814785246687, |
|
"grad_norm": 0.21054620503327667, |
|
"learning_rate": 6.77761662686475e-06, |
|
"loss": 0.0837, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 1.935174836340412, |
|
"grad_norm": 0.1788773690242681, |
|
"learning_rate": 6.742464314414791e-06, |
|
"loss": 0.0809, |
|
"step": 12120 |
|
}, |
|
{ |
|
"epoch": 1.9383681941561552, |
|
"grad_norm": 0.19629223674022553, |
|
"learning_rate": 6.707356975932559e-06, |
|
"loss": 0.0821, |
|
"step": 12140 |
|
}, |
|
{ |
|
"epoch": 1.9415615519718985, |
|
"grad_norm": 0.17739114236704748, |
|
"learning_rate": 6.672295096114597e-06, |
|
"loss": 0.0816, |
|
"step": 12160 |
|
}, |
|
{ |
|
"epoch": 1.9447549097876418, |
|
"grad_norm": 0.20468934483234205, |
|
"learning_rate": 6.637279159029851e-06, |
|
"loss": 0.0827, |
|
"step": 12180 |
|
}, |
|
{ |
|
"epoch": 1.947948267603385, |
|
"grad_norm": 0.16608032221866548, |
|
"learning_rate": 6.602309648112968e-06, |
|
"loss": 0.0792, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 1.9511416254191283, |
|
"grad_norm": 0.1759677545684069, |
|
"learning_rate": 6.567387046157632e-06, |
|
"loss": 0.0785, |
|
"step": 12220 |
|
}, |
|
{ |
|
"epoch": 1.9543349832348715, |
|
"grad_norm": 0.18405948214393053, |
|
"learning_rate": 6.532511835309896e-06, |
|
"loss": 0.0822, |
|
"step": 12240 |
|
}, |
|
{ |
|
"epoch": 1.9575283410506148, |
|
"grad_norm": 0.2012173937759783, |
|
"learning_rate": 6.497684497061531e-06, |
|
"loss": 0.0818, |
|
"step": 12260 |
|
}, |
|
{ |
|
"epoch": 1.960721698866358, |
|
"grad_norm": 0.2057906504416338, |
|
"learning_rate": 6.462905512243359e-06, |
|
"loss": 0.0806, |
|
"step": 12280 |
|
}, |
|
{ |
|
"epoch": 1.9639150566821013, |
|
"grad_norm": 0.20687177701805626, |
|
"learning_rate": 6.428175361018643e-06, |
|
"loss": 0.0794, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 1.9671084144978446, |
|
"grad_norm": 0.2064196549144857, |
|
"learning_rate": 6.393494522876428e-06, |
|
"loss": 0.0816, |
|
"step": 12320 |
|
}, |
|
{ |
|
"epoch": 1.9703017723135878, |
|
"grad_norm": 0.2133102540844893, |
|
"learning_rate": 6.358863476624948e-06, |
|
"loss": 0.0821, |
|
"step": 12340 |
|
}, |
|
{ |
|
"epoch": 1.973495130129331, |
|
"grad_norm": 0.18497415279048168, |
|
"learning_rate": 6.324282700385e-06, |
|
"loss": 0.0824, |
|
"step": 12360 |
|
}, |
|
{ |
|
"epoch": 1.9766884879450743, |
|
"grad_norm": 0.19520821054839646, |
|
"learning_rate": 6.289752671583344e-06, |
|
"loss": 0.0792, |
|
"step": 12380 |
|
}, |
|
{ |
|
"epoch": 1.9798818457608176, |
|
"grad_norm": 0.18726221094986775, |
|
"learning_rate": 6.255273866946119e-06, |
|
"loss": 0.0799, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 1.9830752035765609, |
|
"grad_norm": 0.19525199269461027, |
|
"learning_rate": 6.22084676249225e-06, |
|
"loss": 0.0796, |
|
"step": 12420 |
|
}, |
|
{ |
|
"epoch": 1.9862685613923041, |
|
"grad_norm": 0.16345775381577554, |
|
"learning_rate": 6.186471833526888e-06, |
|
"loss": 0.082, |
|
"step": 12440 |
|
}, |
|
{ |
|
"epoch": 1.9894619192080474, |
|
"grad_norm": 0.1972221294843483, |
|
"learning_rate": 6.15214955463484e-06, |
|
"loss": 0.0787, |
|
"step": 12460 |
|
}, |
|
{ |
|
"epoch": 1.9926552770237906, |
|
"grad_norm": 0.1935374722805669, |
|
"learning_rate": 6.117880399674016e-06, |
|
"loss": 0.0827, |
|
"step": 12480 |
|
}, |
|
{ |
|
"epoch": 1.995848634839534, |
|
"grad_norm": 0.18315518408993714, |
|
"learning_rate": 6.083664841768901e-06, |
|
"loss": 0.0816, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 1.9990419926552772, |
|
"grad_norm": 0.16860052008855017, |
|
"learning_rate": 6.049503353304e-06, |
|
"loss": 0.0844, |
|
"step": 12520 |
|
}, |
|
{ |
|
"epoch": 2.0022353504710204, |
|
"grad_norm": 0.18498027675472176, |
|
"learning_rate": 6.015396405917333e-06, |
|
"loss": 0.061, |
|
"step": 12540 |
|
}, |
|
{ |
|
"epoch": 2.0054287082867637, |
|
"grad_norm": 0.20247862079416473, |
|
"learning_rate": 5.98134447049392e-06, |
|
"loss": 0.0494, |
|
"step": 12560 |
|
}, |
|
{ |
|
"epoch": 2.008622066102507, |
|
"grad_norm": 0.17717972255777836, |
|
"learning_rate": 5.947348017159272e-06, |
|
"loss": 0.0496, |
|
"step": 12580 |
|
}, |
|
{ |
|
"epoch": 2.01181542391825, |
|
"grad_norm": 0.17560899509079128, |
|
"learning_rate": 5.913407515272918e-06, |
|
"loss": 0.0484, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 2.0150087817339934, |
|
"grad_norm": 0.2107019559801837, |
|
"learning_rate": 5.879523433421903e-06, |
|
"loss": 0.0455, |
|
"step": 12620 |
|
}, |
|
{ |
|
"epoch": 2.0182021395497367, |
|
"grad_norm": 0.17228228604398835, |
|
"learning_rate": 5.845696239414336e-06, |
|
"loss": 0.0481, |
|
"step": 12640 |
|
}, |
|
{ |
|
"epoch": 2.02139549736548, |
|
"grad_norm": 0.16576058508327604, |
|
"learning_rate": 5.8119264002729244e-06, |
|
"loss": 0.0484, |
|
"step": 12660 |
|
}, |
|
{ |
|
"epoch": 2.0245888551812232, |
|
"grad_norm": 0.17885300287909717, |
|
"learning_rate": 5.778214382228524e-06, |
|
"loss": 0.047, |
|
"step": 12680 |
|
}, |
|
{ |
|
"epoch": 2.0277822129969665, |
|
"grad_norm": 0.20671449403256986, |
|
"learning_rate": 5.744560650713704e-06, |
|
"loss": 0.0471, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 2.0309755708127097, |
|
"grad_norm": 0.20083359478447635, |
|
"learning_rate": 5.710965670356332e-06, |
|
"loss": 0.0479, |
|
"step": 12720 |
|
}, |
|
{ |
|
"epoch": 2.034168928628453, |
|
"grad_norm": 0.18961936533749266, |
|
"learning_rate": 5.6774299049731325e-06, |
|
"loss": 0.0478, |
|
"step": 12740 |
|
}, |
|
{ |
|
"epoch": 2.0373622864441963, |
|
"grad_norm": 0.21979140727547378, |
|
"learning_rate": 5.643953817563318e-06, |
|
"loss": 0.0453, |
|
"step": 12760 |
|
}, |
|
{ |
|
"epoch": 2.0405556442599395, |
|
"grad_norm": 0.16165099720000836, |
|
"learning_rate": 5.610537870302164e-06, |
|
"loss": 0.0476, |
|
"step": 12780 |
|
}, |
|
{ |
|
"epoch": 2.0437490020756828, |
|
"grad_norm": 0.18343428699528758, |
|
"learning_rate": 5.577182524534657e-06, |
|
"loss": 0.0478, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 2.046942359891426, |
|
"grad_norm": 0.17215552651589366, |
|
"learning_rate": 5.5438882407691e-06, |
|
"loss": 0.0472, |
|
"step": 12820 |
|
}, |
|
{ |
|
"epoch": 2.0501357177071693, |
|
"grad_norm": 0.1624976046442029, |
|
"learning_rate": 5.510655478670769e-06, |
|
"loss": 0.0478, |
|
"step": 12840 |
|
}, |
|
{ |
|
"epoch": 2.0533290755229126, |
|
"grad_norm": 0.22026015940397797, |
|
"learning_rate": 5.4774846970555615e-06, |
|
"loss": 0.0461, |
|
"step": 12860 |
|
}, |
|
{ |
|
"epoch": 2.056522433338656, |
|
"grad_norm": 0.17519613837123435, |
|
"learning_rate": 5.444376353883678e-06, |
|
"loss": 0.0462, |
|
"step": 12880 |
|
}, |
|
{ |
|
"epoch": 2.059715791154399, |
|
"grad_norm": 0.18277575133361915, |
|
"learning_rate": 5.411330906253269e-06, |
|
"loss": 0.0455, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 2.0629091489701423, |
|
"grad_norm": 0.18787731365044255, |
|
"learning_rate": 5.378348810394143e-06, |
|
"loss": 0.0462, |
|
"step": 12920 |
|
}, |
|
{ |
|
"epoch": 2.066102506785885, |
|
"grad_norm": 0.18201430894959444, |
|
"learning_rate": 5.3454305216614766e-06, |
|
"loss": 0.0473, |
|
"step": 12940 |
|
}, |
|
{ |
|
"epoch": 2.0692958646016284, |
|
"grad_norm": 0.1904233887751224, |
|
"learning_rate": 5.312576494529507e-06, |
|
"loss": 0.0494, |
|
"step": 12960 |
|
}, |
|
{ |
|
"epoch": 2.0724892224173717, |
|
"grad_norm": 0.18985642952053444, |
|
"learning_rate": 5.279787182585271e-06, |
|
"loss": 0.0462, |
|
"step": 12980 |
|
}, |
|
{ |
|
"epoch": 2.075682580233115, |
|
"grad_norm": 0.1582812242047444, |
|
"learning_rate": 5.247063038522329e-06, |
|
"loss": 0.0469, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 2.078875938048858, |
|
"grad_norm": 0.19286531510895663, |
|
"learning_rate": 5.21440451413455e-06, |
|
"loss": 0.0465, |
|
"step": 13020 |
|
}, |
|
{ |
|
"epoch": 2.0820692958646014, |
|
"grad_norm": 0.22047888942684946, |
|
"learning_rate": 5.181812060309825e-06, |
|
"loss": 0.0463, |
|
"step": 13040 |
|
}, |
|
{ |
|
"epoch": 2.0852626536803447, |
|
"grad_norm": 0.22499631209380672, |
|
"learning_rate": 5.149286127023874e-06, |
|
"loss": 0.0467, |
|
"step": 13060 |
|
}, |
|
{ |
|
"epoch": 2.088456011496088, |
|
"grad_norm": 0.18796568419290619, |
|
"learning_rate": 5.1168271633340235e-06, |
|
"loss": 0.0471, |
|
"step": 13080 |
|
}, |
|
{ |
|
"epoch": 2.091649369311831, |
|
"grad_norm": 0.1796719273681106, |
|
"learning_rate": 5.084435617373018e-06, |
|
"loss": 0.048, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 2.0948427271275745, |
|
"grad_norm": 0.1916078526748605, |
|
"learning_rate": 5.052111936342812e-06, |
|
"loss": 0.0467, |
|
"step": 13120 |
|
}, |
|
{ |
|
"epoch": 2.0980360849433177, |
|
"grad_norm": 0.19878847514842057, |
|
"learning_rate": 5.019856566508412e-06, |
|
"loss": 0.0478, |
|
"step": 13140 |
|
}, |
|
{ |
|
"epoch": 2.101229442759061, |
|
"grad_norm": 0.2088933392167675, |
|
"learning_rate": 4.9876699531917186e-06, |
|
"loss": 0.0473, |
|
"step": 13160 |
|
}, |
|
{ |
|
"epoch": 2.1044228005748042, |
|
"grad_norm": 0.20402583213332395, |
|
"learning_rate": 4.95555254076536e-06, |
|
"loss": 0.0457, |
|
"step": 13180 |
|
}, |
|
{ |
|
"epoch": 2.1076161583905475, |
|
"grad_norm": 0.16605435030952836, |
|
"learning_rate": 4.923504772646573e-06, |
|
"loss": 0.0473, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 2.1108095162062908, |
|
"grad_norm": 0.17651776985556464, |
|
"learning_rate": 4.891527091291071e-06, |
|
"loss": 0.0477, |
|
"step": 13220 |
|
}, |
|
{ |
|
"epoch": 2.114002874022034, |
|
"grad_norm": 0.1763790661182835, |
|
"learning_rate": 4.859619938186947e-06, |
|
"loss": 0.0456, |
|
"step": 13240 |
|
}, |
|
{ |
|
"epoch": 2.1171962318377773, |
|
"grad_norm": 0.18886660022445972, |
|
"learning_rate": 4.827783753848575e-06, |
|
"loss": 0.0455, |
|
"step": 13260 |
|
}, |
|
{ |
|
"epoch": 2.1203895896535205, |
|
"grad_norm": 0.2059211240085781, |
|
"learning_rate": 4.796018977810514e-06, |
|
"loss": 0.0457, |
|
"step": 13280 |
|
}, |
|
{ |
|
"epoch": 2.123582947469264, |
|
"grad_norm": 0.19168043665328116, |
|
"learning_rate": 4.76432604862145e-06, |
|
"loss": 0.046, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 2.126776305285007, |
|
"grad_norm": 0.17778767466228898, |
|
"learning_rate": 4.732705403838159e-06, |
|
"loss": 0.0465, |
|
"step": 13320 |
|
}, |
|
{ |
|
"epoch": 2.1299696631007503, |
|
"grad_norm": 0.170308319213917, |
|
"learning_rate": 4.701157480019429e-06, |
|
"loss": 0.0474, |
|
"step": 13340 |
|
}, |
|
{ |
|
"epoch": 2.1331630209164936, |
|
"grad_norm": 0.1711104888651996, |
|
"learning_rate": 4.669682712720065e-06, |
|
"loss": 0.0462, |
|
"step": 13360 |
|
}, |
|
{ |
|
"epoch": 2.136356378732237, |
|
"grad_norm": 0.1825464435577293, |
|
"learning_rate": 4.638281536484854e-06, |
|
"loss": 0.0485, |
|
"step": 13380 |
|
}, |
|
{ |
|
"epoch": 2.13954973654798, |
|
"grad_norm": 0.1835185156049789, |
|
"learning_rate": 4.606954384842587e-06, |
|
"loss": 0.0455, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 2.1427430943637233, |
|
"grad_norm": 0.19538449656271248, |
|
"learning_rate": 4.575701690300051e-06, |
|
"loss": 0.0457, |
|
"step": 13420 |
|
}, |
|
{ |
|
"epoch": 2.1459364521794666, |
|
"grad_norm": 0.20119853731280407, |
|
"learning_rate": 4.544523884336073e-06, |
|
"loss": 0.0462, |
|
"step": 13440 |
|
}, |
|
{ |
|
"epoch": 2.14912980999521, |
|
"grad_norm": 0.19230165287264112, |
|
"learning_rate": 4.513421397395563e-06, |
|
"loss": 0.0449, |
|
"step": 13460 |
|
}, |
|
{ |
|
"epoch": 2.152323167810953, |
|
"grad_norm": 0.19371541515972485, |
|
"learning_rate": 4.482394658883557e-06, |
|
"loss": 0.0465, |
|
"step": 13480 |
|
}, |
|
{ |
|
"epoch": 2.1555165256266964, |
|
"grad_norm": 0.2749584429863373, |
|
"learning_rate": 4.451444097159301e-06, |
|
"loss": 0.0465, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 2.1587098834424396, |
|
"grad_norm": 0.181430213502962, |
|
"learning_rate": 4.4205701395303424e-06, |
|
"loss": 0.0469, |
|
"step": 13520 |
|
}, |
|
{ |
|
"epoch": 2.161903241258183, |
|
"grad_norm": 0.21832000463916046, |
|
"learning_rate": 4.38977321224661e-06, |
|
"loss": 0.0472, |
|
"step": 13540 |
|
}, |
|
{ |
|
"epoch": 2.165096599073926, |
|
"grad_norm": 0.36594927042777403, |
|
"learning_rate": 4.3590537404945535e-06, |
|
"loss": 0.0471, |
|
"step": 13560 |
|
}, |
|
{ |
|
"epoch": 2.1682899568896694, |
|
"grad_norm": 0.19062769875876745, |
|
"learning_rate": 4.3284121483912525e-06, |
|
"loss": 0.0464, |
|
"step": 13580 |
|
}, |
|
{ |
|
"epoch": 2.1714833147054127, |
|
"grad_norm": 0.18521477830070004, |
|
"learning_rate": 4.297848858978569e-06, |
|
"loss": 0.0461, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 2.174676672521156, |
|
"grad_norm": 0.2064934921930085, |
|
"learning_rate": 4.2673642942173184e-06, |
|
"loss": 0.0451, |
|
"step": 13620 |
|
}, |
|
{ |
|
"epoch": 2.177870030336899, |
|
"grad_norm": 0.19089143723142035, |
|
"learning_rate": 4.236958874981423e-06, |
|
"loss": 0.0448, |
|
"step": 13640 |
|
}, |
|
{ |
|
"epoch": 2.1810633881526424, |
|
"grad_norm": 0.17162658742427372, |
|
"learning_rate": 4.206633021052115e-06, |
|
"loss": 0.0453, |
|
"step": 13660 |
|
}, |
|
{ |
|
"epoch": 2.1842567459683857, |
|
"grad_norm": 0.18039037729927956, |
|
"learning_rate": 4.176387151112134e-06, |
|
"loss": 0.0455, |
|
"step": 13680 |
|
}, |
|
{ |
|
"epoch": 2.187450103784129, |
|
"grad_norm": 0.16510411035975564, |
|
"learning_rate": 4.1462216827399585e-06, |
|
"loss": 0.0446, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 2.190643461599872, |
|
"grad_norm": 0.2215703230886645, |
|
"learning_rate": 4.116137032404026e-06, |
|
"loss": 0.0453, |
|
"step": 13720 |
|
}, |
|
{ |
|
"epoch": 2.1938368194156155, |
|
"grad_norm": 0.18140462418275824, |
|
"learning_rate": 4.0861336154569855e-06, |
|
"loss": 0.0446, |
|
"step": 13740 |
|
}, |
|
{ |
|
"epoch": 2.1970301772313587, |
|
"grad_norm": 0.164963005058681, |
|
"learning_rate": 4.056211846129977e-06, |
|
"loss": 0.0451, |
|
"step": 13760 |
|
}, |
|
{ |
|
"epoch": 2.200223535047102, |
|
"grad_norm": 0.22161978868062865, |
|
"learning_rate": 4.0263721375269e-06, |
|
"loss": 0.0439, |
|
"step": 13780 |
|
}, |
|
{ |
|
"epoch": 2.2034168928628453, |
|
"grad_norm": 0.18997163122166422, |
|
"learning_rate": 3.99661490161871e-06, |
|
"loss": 0.0452, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 2.2066102506785885, |
|
"grad_norm": 0.19721572060634018, |
|
"learning_rate": 3.966940549237728e-06, |
|
"loss": 0.046, |
|
"step": 13820 |
|
}, |
|
{ |
|
"epoch": 2.2098036084943318, |
|
"grad_norm": 0.1613696871656721, |
|
"learning_rate": 3.937349490071989e-06, |
|
"loss": 0.0451, |
|
"step": 13840 |
|
}, |
|
{ |
|
"epoch": 2.212996966310075, |
|
"grad_norm": 0.23649764683113925, |
|
"learning_rate": 3.9078421326595575e-06, |
|
"loss": 0.0473, |
|
"step": 13860 |
|
}, |
|
{ |
|
"epoch": 2.2161903241258183, |
|
"grad_norm": 0.15900455957581072, |
|
"learning_rate": 3.8784188843829075e-06, |
|
"loss": 0.0467, |
|
"step": 13880 |
|
}, |
|
{ |
|
"epoch": 2.2193836819415615, |
|
"grad_norm": 0.16623211370488078, |
|
"learning_rate": 3.849080151463284e-06, |
|
"loss": 0.0447, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 2.222577039757305, |
|
"grad_norm": 0.23855246445899472, |
|
"learning_rate": 3.819826338955115e-06, |
|
"loss": 0.045, |
|
"step": 13920 |
|
}, |
|
{ |
|
"epoch": 2.225770397573048, |
|
"grad_norm": 0.16852273819977373, |
|
"learning_rate": 3.7906578507403925e-06, |
|
"loss": 0.044, |
|
"step": 13940 |
|
}, |
|
{ |
|
"epoch": 2.2289637553887913, |
|
"grad_norm": 0.19176422233347587, |
|
"learning_rate": 3.761575089523114e-06, |
|
"loss": 0.0451, |
|
"step": 13960 |
|
}, |
|
{ |
|
"epoch": 2.2321571132045346, |
|
"grad_norm": 0.19217003400101632, |
|
"learning_rate": 3.7325784568237267e-06, |
|
"loss": 0.0456, |
|
"step": 13980 |
|
}, |
|
{ |
|
"epoch": 2.235350471020278, |
|
"grad_norm": 0.2142815186061357, |
|
"learning_rate": 3.7036683529735616e-06, |
|
"loss": 0.0438, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 2.238543828836021, |
|
"grad_norm": 0.16980952681099654, |
|
"learning_rate": 3.6748451771093386e-06, |
|
"loss": 0.0456, |
|
"step": 14020 |
|
}, |
|
{ |
|
"epoch": 2.2417371866517644, |
|
"grad_norm": 0.20792979968816608, |
|
"learning_rate": 3.6461093271676216e-06, |
|
"loss": 0.045, |
|
"step": 14040 |
|
}, |
|
{ |
|
"epoch": 2.2449305444675076, |
|
"grad_norm": 0.19749481308114683, |
|
"learning_rate": 3.6174611998793486e-06, |
|
"loss": 0.0455, |
|
"step": 14060 |
|
}, |
|
{ |
|
"epoch": 2.248123902283251, |
|
"grad_norm": 0.208757882997406, |
|
"learning_rate": 3.5889011907643523e-06, |
|
"loss": 0.0468, |
|
"step": 14080 |
|
}, |
|
{ |
|
"epoch": 2.251317260098994, |
|
"grad_norm": 0.18603971145921822, |
|
"learning_rate": 3.5604296941258854e-06, |
|
"loss": 0.0456, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 2.2545106179147374, |
|
"grad_norm": 0.24232186850665094, |
|
"learning_rate": 3.532047103045185e-06, |
|
"loss": 0.0442, |
|
"step": 14120 |
|
}, |
|
{ |
|
"epoch": 2.2577039757304806, |
|
"grad_norm": 0.24810029826855062, |
|
"learning_rate": 3.503753809376059e-06, |
|
"loss": 0.0463, |
|
"step": 14140 |
|
}, |
|
{ |
|
"epoch": 2.260897333546224, |
|
"grad_norm": 0.23406287255675895, |
|
"learning_rate": 3.475550203739452e-06, |
|
"loss": 0.0451, |
|
"step": 14160 |
|
}, |
|
{ |
|
"epoch": 2.264090691361967, |
|
"grad_norm": 0.17282967387502232, |
|
"learning_rate": 3.4474366755180644e-06, |
|
"loss": 0.0453, |
|
"step": 14180 |
|
}, |
|
{ |
|
"epoch": 2.2672840491777104, |
|
"grad_norm": 0.21126534883401732, |
|
"learning_rate": 3.419413612850976e-06, |
|
"loss": 0.0461, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 2.2704774069934537, |
|
"grad_norm": 0.16104640464566056, |
|
"learning_rate": 3.391481402628297e-06, |
|
"loss": 0.0476, |
|
"step": 14220 |
|
}, |
|
{ |
|
"epoch": 2.273670764809197, |
|
"grad_norm": 0.21435527733602905, |
|
"learning_rate": 3.363640430485804e-06, |
|
"loss": 0.0446, |
|
"step": 14240 |
|
}, |
|
{ |
|
"epoch": 2.27686412262494, |
|
"grad_norm": 0.18548507359762656, |
|
"learning_rate": 3.3358910807996325e-06, |
|
"loss": 0.0451, |
|
"step": 14260 |
|
}, |
|
{ |
|
"epoch": 2.2800574804406835, |
|
"grad_norm": 0.19423383437023095, |
|
"learning_rate": 3.3082337366809704e-06, |
|
"loss": 0.0448, |
|
"step": 14280 |
|
}, |
|
{ |
|
"epoch": 2.2832508382564267, |
|
"grad_norm": 0.17237074664312235, |
|
"learning_rate": 3.2806687799707647e-06, |
|
"loss": 0.0459, |
|
"step": 14300 |
|
}, |
|
{ |
|
"epoch": 2.28644419607217, |
|
"grad_norm": 0.22791506612179063, |
|
"learning_rate": 3.253196591234443e-06, |
|
"loss": 0.0449, |
|
"step": 14320 |
|
}, |
|
{ |
|
"epoch": 2.2896375538879132, |
|
"grad_norm": 0.18890323777751128, |
|
"learning_rate": 3.2258175497566678e-06, |
|
"loss": 0.0449, |
|
"step": 14340 |
|
}, |
|
{ |
|
"epoch": 2.2928309117036565, |
|
"grad_norm": 0.22098418299523961, |
|
"learning_rate": 3.198532033536107e-06, |
|
"loss": 0.0437, |
|
"step": 14360 |
|
}, |
|
{ |
|
"epoch": 2.2960242695193998, |
|
"grad_norm": 0.22834203263219127, |
|
"learning_rate": 3.1713404192801945e-06, |
|
"loss": 0.0462, |
|
"step": 14380 |
|
}, |
|
{ |
|
"epoch": 2.299217627335143, |
|
"grad_norm": 0.19033969048906568, |
|
"learning_rate": 3.144243082399947e-06, |
|
"loss": 0.0454, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 2.3024109851508863, |
|
"grad_norm": 0.1772642418355086, |
|
"learning_rate": 3.1172403970047725e-06, |
|
"loss": 0.0441, |
|
"step": 14420 |
|
}, |
|
{ |
|
"epoch": 2.3056043429666295, |
|
"grad_norm": 0.2048657544909403, |
|
"learning_rate": 3.0903327358973168e-06, |
|
"loss": 0.0446, |
|
"step": 14440 |
|
}, |
|
{ |
|
"epoch": 2.308797700782373, |
|
"grad_norm": 0.18540450076918674, |
|
"learning_rate": 3.0635204705682976e-06, |
|
"loss": 0.0451, |
|
"step": 14460 |
|
}, |
|
{ |
|
"epoch": 2.311991058598116, |
|
"grad_norm": 0.18445665460036134, |
|
"learning_rate": 3.0368039711913867e-06, |
|
"loss": 0.0459, |
|
"step": 14480 |
|
}, |
|
{ |
|
"epoch": 2.3151844164138593, |
|
"grad_norm": 0.22336940402363192, |
|
"learning_rate": 3.0101836066181033e-06, |
|
"loss": 0.0455, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 2.3183777742296026, |
|
"grad_norm": 0.16285692399794796, |
|
"learning_rate": 2.983659744372721e-06, |
|
"loss": 0.045, |
|
"step": 14520 |
|
}, |
|
{ |
|
"epoch": 2.321571132045346, |
|
"grad_norm": 0.19697000745739243, |
|
"learning_rate": 2.9572327506471775e-06, |
|
"loss": 0.0454, |
|
"step": 14540 |
|
}, |
|
{ |
|
"epoch": 2.324764489861089, |
|
"grad_norm": 0.1950278510185452, |
|
"learning_rate": 2.9309029902960395e-06, |
|
"loss": 0.0452, |
|
"step": 14560 |
|
}, |
|
{ |
|
"epoch": 2.3279578476768323, |
|
"grad_norm": 0.1926073736357789, |
|
"learning_rate": 2.9046708268314494e-06, |
|
"loss": 0.0455, |
|
"step": 14580 |
|
}, |
|
{ |
|
"epoch": 2.3311512054925756, |
|
"grad_norm": 0.5787988360468825, |
|
"learning_rate": 2.8785366224181265e-06, |
|
"loss": 0.047, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 2.334344563308319, |
|
"grad_norm": 0.19178497872154512, |
|
"learning_rate": 2.8525007378683433e-06, |
|
"loss": 0.0441, |
|
"step": 14620 |
|
}, |
|
{ |
|
"epoch": 2.337537921124062, |
|
"grad_norm": 0.20463851817417028, |
|
"learning_rate": 2.8265635326369557e-06, |
|
"loss": 0.0443, |
|
"step": 14640 |
|
}, |
|
{ |
|
"epoch": 2.3407312789398054, |
|
"grad_norm": 0.18832526122080892, |
|
"learning_rate": 2.8007253648164502e-06, |
|
"loss": 0.0447, |
|
"step": 14660 |
|
}, |
|
{ |
|
"epoch": 2.3439246367555486, |
|
"grad_norm": 0.25535504048141416, |
|
"learning_rate": 2.7749865911319786e-06, |
|
"loss": 0.0462, |
|
"step": 14680 |
|
}, |
|
{ |
|
"epoch": 2.347117994571292, |
|
"grad_norm": 0.2783926831983617, |
|
"learning_rate": 2.74934756693645e-06, |
|
"loss": 0.0461, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 2.350311352387035, |
|
"grad_norm": 0.1799001156488928, |
|
"learning_rate": 2.7238086462056125e-06, |
|
"loss": 0.0451, |
|
"step": 14720 |
|
}, |
|
{ |
|
"epoch": 2.3535047102027784, |
|
"grad_norm": 0.22749744937087824, |
|
"learning_rate": 2.6983701815331844e-06, |
|
"loss": 0.0449, |
|
"step": 14740 |
|
}, |
|
{ |
|
"epoch": 2.3566980680185217, |
|
"grad_norm": 0.192235427214562, |
|
"learning_rate": 2.6730325241259605e-06, |
|
"loss": 0.0447, |
|
"step": 14760 |
|
}, |
|
{ |
|
"epoch": 2.359891425834265, |
|
"grad_norm": 0.1779393552771597, |
|
"learning_rate": 2.647796023798991e-06, |
|
"loss": 0.0455, |
|
"step": 14780 |
|
}, |
|
{ |
|
"epoch": 2.3630847836500077, |
|
"grad_norm": 0.17636063193070986, |
|
"learning_rate": 2.6226610289707235e-06, |
|
"loss": 0.0453, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 2.3662781414657514, |
|
"grad_norm": 0.17751151289004394, |
|
"learning_rate": 2.5976278866582226e-06, |
|
"loss": 0.0439, |
|
"step": 14820 |
|
}, |
|
{ |
|
"epoch": 2.3694714992814943, |
|
"grad_norm": 0.1612714192997329, |
|
"learning_rate": 2.5726969424723514e-06, |
|
"loss": 0.0451, |
|
"step": 14840 |
|
}, |
|
{ |
|
"epoch": 2.372664857097238, |
|
"grad_norm": 0.19257379967637422, |
|
"learning_rate": 2.5478685406130143e-06, |
|
"loss": 0.0535, |
|
"step": 14860 |
|
}, |
|
{ |
|
"epoch": 2.3758582149129808, |
|
"grad_norm": 0.18593345377491236, |
|
"learning_rate": 2.5231430238644106e-06, |
|
"loss": 0.045, |
|
"step": 14880 |
|
}, |
|
{ |
|
"epoch": 2.3790515727287245, |
|
"grad_norm": 0.19051880160399431, |
|
"learning_rate": 2.4985207335902863e-06, |
|
"loss": 0.0451, |
|
"step": 14900 |
|
}, |
|
{ |
|
"epoch": 2.3822449305444673, |
|
"grad_norm": 0.18531119849649635, |
|
"learning_rate": 2.4740020097292318e-06, |
|
"loss": 0.0426, |
|
"step": 14920 |
|
}, |
|
{ |
|
"epoch": 2.385438288360211, |
|
"grad_norm": 0.23011458580940014, |
|
"learning_rate": 2.4495871907899816e-06, |
|
"loss": 0.0456, |
|
"step": 14940 |
|
}, |
|
{ |
|
"epoch": 2.388631646175954, |
|
"grad_norm": 0.22814782369226178, |
|
"learning_rate": 2.425276613846755e-06, |
|
"loss": 0.0458, |
|
"step": 14960 |
|
}, |
|
{ |
|
"epoch": 2.3918250039916975, |
|
"grad_norm": 0.18964633782059312, |
|
"learning_rate": 2.401070614534585e-06, |
|
"loss": 0.0445, |
|
"step": 14980 |
|
}, |
|
{ |
|
"epoch": 2.3950183618074403, |
|
"grad_norm": 0.18585844070460122, |
|
"learning_rate": 2.3769695270446903e-06, |
|
"loss": 0.0433, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 2.398211719623184, |
|
"grad_norm": 0.2173023589979796, |
|
"learning_rate": 2.352973684119868e-06, |
|
"loss": 0.0452, |
|
"step": 15020 |
|
}, |
|
{ |
|
"epoch": 2.401405077438927, |
|
"grad_norm": 0.1888223260670983, |
|
"learning_rate": 2.329083417049899e-06, |
|
"loss": 0.0453, |
|
"step": 15040 |
|
}, |
|
{ |
|
"epoch": 2.40459843525467, |
|
"grad_norm": 0.2000345304946633, |
|
"learning_rate": 2.3052990556669587e-06, |
|
"loss": 0.0443, |
|
"step": 15060 |
|
}, |
|
{ |
|
"epoch": 2.4077917930704134, |
|
"grad_norm": 0.350402818921811, |
|
"learning_rate": 2.2816209283410815e-06, |
|
"loss": 0.0446, |
|
"step": 15080 |
|
}, |
|
{ |
|
"epoch": 2.4109851508861566, |
|
"grad_norm": 0.17540258992531277, |
|
"learning_rate": 2.258049361975616e-06, |
|
"loss": 0.0448, |
|
"step": 15100 |
|
}, |
|
{ |
|
"epoch": 2.4141785087019, |
|
"grad_norm": 0.2240022668610996, |
|
"learning_rate": 2.234584682002726e-06, |
|
"loss": 0.0436, |
|
"step": 15120 |
|
}, |
|
{ |
|
"epoch": 2.417371866517643, |
|
"grad_norm": 0.19377910419185784, |
|
"learning_rate": 2.211227212378877e-06, |
|
"loss": 0.0449, |
|
"step": 15140 |
|
}, |
|
{ |
|
"epoch": 2.4205652243333864, |
|
"grad_norm": 0.18307979574559963, |
|
"learning_rate": 2.1879772755803763e-06, |
|
"loss": 0.0437, |
|
"step": 15160 |
|
}, |
|
{ |
|
"epoch": 2.4237585821491296, |
|
"grad_norm": 0.18479960232316164, |
|
"learning_rate": 2.1648351925989253e-06, |
|
"loss": 0.0469, |
|
"step": 15180 |
|
}, |
|
{ |
|
"epoch": 2.426951939964873, |
|
"grad_norm": 0.19121025995799099, |
|
"learning_rate": 2.1418012829371735e-06, |
|
"loss": 0.0438, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 2.430145297780616, |
|
"grad_norm": 0.19858616833926596, |
|
"learning_rate": 2.1188758646043206e-06, |
|
"loss": 0.044, |
|
"step": 15220 |
|
}, |
|
{ |
|
"epoch": 2.4333386555963594, |
|
"grad_norm": 0.18772227683807235, |
|
"learning_rate": 2.0960592541117143e-06, |
|
"loss": 0.0452, |
|
"step": 15240 |
|
}, |
|
{ |
|
"epoch": 2.4365320134121027, |
|
"grad_norm": 0.1743929147084694, |
|
"learning_rate": 2.0733517664684944e-06, |
|
"loss": 0.0438, |
|
"step": 15260 |
|
}, |
|
{ |
|
"epoch": 2.439725371227846, |
|
"grad_norm": 0.18605377215327853, |
|
"learning_rate": 2.050753715177236e-06, |
|
"loss": 0.0464, |
|
"step": 15280 |
|
}, |
|
{ |
|
"epoch": 2.442918729043589, |
|
"grad_norm": 0.19099944392969617, |
|
"learning_rate": 2.0282654122296154e-06, |
|
"loss": 0.0434, |
|
"step": 15300 |
|
}, |
|
{ |
|
"epoch": 2.4461120868593325, |
|
"grad_norm": 0.19579885958359836, |
|
"learning_rate": 2.0058871681021087e-06, |
|
"loss": 0.0433, |
|
"step": 15320 |
|
}, |
|
{ |
|
"epoch": 2.4493054446750757, |
|
"grad_norm": 0.2037719797424841, |
|
"learning_rate": 1.983619291751716e-06, |
|
"loss": 0.0445, |
|
"step": 15340 |
|
}, |
|
{ |
|
"epoch": 2.452498802490819, |
|
"grad_norm": 0.2288507482341902, |
|
"learning_rate": 1.961462090611673e-06, |
|
"loss": 0.0445, |
|
"step": 15360 |
|
}, |
|
{ |
|
"epoch": 2.4556921603065622, |
|
"grad_norm": 0.18192991033918157, |
|
"learning_rate": 1.9394158705872244e-06, |
|
"loss": 0.0453, |
|
"step": 15380 |
|
}, |
|
{ |
|
"epoch": 2.4588855181223055, |
|
"grad_norm": 0.2180936188857526, |
|
"learning_rate": 1.9174809360513935e-06, |
|
"loss": 0.045, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 2.4620788759380487, |
|
"grad_norm": 0.1894861914106852, |
|
"learning_rate": 1.8956575898407847e-06, |
|
"loss": 0.0464, |
|
"step": 15420 |
|
}, |
|
{ |
|
"epoch": 2.465272233753792, |
|
"grad_norm": 0.2021847245639915, |
|
"learning_rate": 1.8739461332513953e-06, |
|
"loss": 0.0459, |
|
"step": 15440 |
|
}, |
|
{ |
|
"epoch": 2.4684655915695353, |
|
"grad_norm": 0.1992201840351267, |
|
"learning_rate": 1.85234686603446e-06, |
|
"loss": 0.044, |
|
"step": 15460 |
|
}, |
|
{ |
|
"epoch": 2.4716589493852785, |
|
"grad_norm": 0.18202769181733872, |
|
"learning_rate": 1.8308600863923164e-06, |
|
"loss": 0.0464, |
|
"step": 15480 |
|
}, |
|
{ |
|
"epoch": 2.474852307201022, |
|
"grad_norm": 0.17956705043459079, |
|
"learning_rate": 1.8094860909742795e-06, |
|
"loss": 0.0457, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 2.478045665016765, |
|
"grad_norm": 0.1780847660838803, |
|
"learning_rate": 1.78822517487255e-06, |
|
"loss": 0.044, |
|
"step": 15520 |
|
}, |
|
{ |
|
"epoch": 2.4812390228325083, |
|
"grad_norm": 0.19200813107543122, |
|
"learning_rate": 1.7670776316181427e-06, |
|
"loss": 0.0432, |
|
"step": 15540 |
|
}, |
|
{ |
|
"epoch": 2.4844323806482516, |
|
"grad_norm": 0.2516917996505797, |
|
"learning_rate": 1.746043753176836e-06, |
|
"loss": 0.0448, |
|
"step": 15560 |
|
}, |
|
{ |
|
"epoch": 2.487625738463995, |
|
"grad_norm": 0.17194174394098138, |
|
"learning_rate": 1.7251238299451301e-06, |
|
"loss": 0.0449, |
|
"step": 15580 |
|
}, |
|
{ |
|
"epoch": 2.490819096279738, |
|
"grad_norm": 0.17011442140145003, |
|
"learning_rate": 1.7043181507462448e-06, |
|
"loss": 0.0457, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 2.4940124540954813, |
|
"grad_norm": 0.17376564573157416, |
|
"learning_rate": 1.6836270028261326e-06, |
|
"loss": 0.0446, |
|
"step": 15620 |
|
}, |
|
{ |
|
"epoch": 2.4972058119112246, |
|
"grad_norm": 0.2600424543600025, |
|
"learning_rate": 1.66305067184952e-06, |
|
"loss": 0.0435, |
|
"step": 15640 |
|
}, |
|
{ |
|
"epoch": 2.500399169726968, |
|
"grad_norm": 0.1728773334170149, |
|
"learning_rate": 1.6425894418959433e-06, |
|
"loss": 0.0444, |
|
"step": 15660 |
|
}, |
|
{ |
|
"epoch": 2.503592527542711, |
|
"grad_norm": 0.2117397902480935, |
|
"learning_rate": 1.6222435954558435e-06, |
|
"loss": 0.0424, |
|
"step": 15680 |
|
}, |
|
{ |
|
"epoch": 2.5067858853584544, |
|
"grad_norm": 0.20379918000728395, |
|
"learning_rate": 1.6020134134266674e-06, |
|
"loss": 0.0449, |
|
"step": 15700 |
|
}, |
|
{ |
|
"epoch": 2.5099792431741976, |
|
"grad_norm": 0.3110350981628874, |
|
"learning_rate": 1.5818991751089762e-06, |
|
"loss": 0.0434, |
|
"step": 15720 |
|
}, |
|
{ |
|
"epoch": 2.513172600989941, |
|
"grad_norm": 0.18429144606858047, |
|
"learning_rate": 1.5619011582025988e-06, |
|
"loss": 0.0439, |
|
"step": 15740 |
|
}, |
|
{ |
|
"epoch": 2.516365958805684, |
|
"grad_norm": 0.1756584956115843, |
|
"learning_rate": 1.5420196388027963e-06, |
|
"loss": 0.0423, |
|
"step": 15760 |
|
}, |
|
{ |
|
"epoch": 2.5195593166214274, |
|
"grad_norm": 0.18747969624165203, |
|
"learning_rate": 1.5222548913964508e-06, |
|
"loss": 0.0432, |
|
"step": 15780 |
|
}, |
|
{ |
|
"epoch": 2.5227526744371707, |
|
"grad_norm": 0.17351521964113906, |
|
"learning_rate": 1.5026071888582771e-06, |
|
"loss": 0.0428, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 2.525946032252914, |
|
"grad_norm": 0.1763855716931325, |
|
"learning_rate": 1.4830768024470487e-06, |
|
"loss": 0.0437, |
|
"step": 15820 |
|
}, |
|
{ |
|
"epoch": 2.529139390068657, |
|
"grad_norm": 0.19172367578038851, |
|
"learning_rate": 1.4636640018018556e-06, |
|
"loss": 0.0436, |
|
"step": 15840 |
|
}, |
|
{ |
|
"epoch": 2.5323327478844004, |
|
"grad_norm": 0.18955098367053075, |
|
"learning_rate": 1.4443690549383904e-06, |
|
"loss": 0.0422, |
|
"step": 15860 |
|
}, |
|
{ |
|
"epoch": 2.5355261057001437, |
|
"grad_norm": 0.2062297852474484, |
|
"learning_rate": 1.4251922282452356e-06, |
|
"loss": 0.0423, |
|
"step": 15880 |
|
}, |
|
{ |
|
"epoch": 2.538719463515887, |
|
"grad_norm": 0.184016665131291, |
|
"learning_rate": 1.4061337864801916e-06, |
|
"loss": 0.0441, |
|
"step": 15900 |
|
}, |
|
{ |
|
"epoch": 2.54191282133163, |
|
"grad_norm": 0.21880976113017805, |
|
"learning_rate": 1.3871939927666189e-06, |
|
"loss": 0.046, |
|
"step": 15920 |
|
}, |
|
{ |
|
"epoch": 2.5451061791473735, |
|
"grad_norm": 0.17335074095350983, |
|
"learning_rate": 1.3683731085898144e-06, |
|
"loss": 0.0441, |
|
"step": 15940 |
|
}, |
|
{ |
|
"epoch": 2.5482995369631167, |
|
"grad_norm": 0.19234479041549446, |
|
"learning_rate": 1.349671393793388e-06, |
|
"loss": 0.0427, |
|
"step": 15960 |
|
}, |
|
{ |
|
"epoch": 2.55149289477886, |
|
"grad_norm": 0.18631232012636342, |
|
"learning_rate": 1.3310891065756814e-06, |
|
"loss": 0.0435, |
|
"step": 15980 |
|
}, |
|
{ |
|
"epoch": 2.5546862525946032, |
|
"grad_norm": 0.19243767802224285, |
|
"learning_rate": 1.3126265034862084e-06, |
|
"loss": 0.0441, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 2.5578796104103465, |
|
"grad_norm": 0.22553668043830372, |
|
"learning_rate": 1.2942838394221002e-06, |
|
"loss": 0.0438, |
|
"step": 16020 |
|
}, |
|
{ |
|
"epoch": 2.5610729682260898, |
|
"grad_norm": 0.2414806098978672, |
|
"learning_rate": 1.2760613676246037e-06, |
|
"loss": 0.0455, |
|
"step": 16040 |
|
}, |
|
{ |
|
"epoch": 2.564266326041833, |
|
"grad_norm": 0.17562297042382372, |
|
"learning_rate": 1.2579593396755652e-06, |
|
"loss": 0.0437, |
|
"step": 16060 |
|
}, |
|
{ |
|
"epoch": 2.5674596838575763, |
|
"grad_norm": 0.1714929007989254, |
|
"learning_rate": 1.2399780054939758e-06, |
|
"loss": 0.0435, |
|
"step": 16080 |
|
}, |
|
{ |
|
"epoch": 2.5706530416733195, |
|
"grad_norm": 0.18944429187488632, |
|
"learning_rate": 1.2221176133325097e-06, |
|
"loss": 0.0432, |
|
"step": 16100 |
|
}, |
|
{ |
|
"epoch": 2.573846399489063, |
|
"grad_norm": 0.18830587754770226, |
|
"learning_rate": 1.2043784097740951e-06, |
|
"loss": 0.044, |
|
"step": 16120 |
|
}, |
|
{ |
|
"epoch": 2.577039757304806, |
|
"grad_norm": 0.20515213794452525, |
|
"learning_rate": 1.1867606397285191e-06, |
|
"loss": 0.0444, |
|
"step": 16140 |
|
}, |
|
{ |
|
"epoch": 2.5802331151205493, |
|
"grad_norm": 0.2068320912840683, |
|
"learning_rate": 1.1692645464290441e-06, |
|
"loss": 0.0443, |
|
"step": 16160 |
|
}, |
|
{ |
|
"epoch": 2.5834264729362926, |
|
"grad_norm": 0.2065451583149461, |
|
"learning_rate": 1.151890371429042e-06, |
|
"loss": 0.0447, |
|
"step": 16180 |
|
}, |
|
{ |
|
"epoch": 2.586619830752036, |
|
"grad_norm": 0.20955876801496184, |
|
"learning_rate": 1.1346383545986629e-06, |
|
"loss": 0.043, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 2.589813188567779, |
|
"grad_norm": 0.18475336946843543, |
|
"learning_rate": 1.117508734121535e-06, |
|
"loss": 0.0439, |
|
"step": 16220 |
|
}, |
|
{ |
|
"epoch": 2.5930065463835223, |
|
"grad_norm": 0.19250755490602636, |
|
"learning_rate": 1.1005017464914568e-06, |
|
"loss": 0.0431, |
|
"step": 16240 |
|
}, |
|
{ |
|
"epoch": 2.5961999041992656, |
|
"grad_norm": 0.2138444193531275, |
|
"learning_rate": 1.0836176265091448e-06, |
|
"loss": 0.0447, |
|
"step": 16260 |
|
}, |
|
{ |
|
"epoch": 2.599393262015009, |
|
"grad_norm": 0.19283181561318452, |
|
"learning_rate": 1.0668566072789876e-06, |
|
"loss": 0.0434, |
|
"step": 16280 |
|
}, |
|
{ |
|
"epoch": 2.602586619830752, |
|
"grad_norm": 0.19258136254237682, |
|
"learning_rate": 1.05021892020583e-06, |
|
"loss": 0.0452, |
|
"step": 16300 |
|
}, |
|
{ |
|
"epoch": 2.6057799776464954, |
|
"grad_norm": 0.239296573931001, |
|
"learning_rate": 1.0337047949917777e-06, |
|
"loss": 0.0432, |
|
"step": 16320 |
|
}, |
|
{ |
|
"epoch": 2.6089733354622386, |
|
"grad_norm": 0.18442185794546465, |
|
"learning_rate": 1.0173144596330231e-06, |
|
"loss": 0.0439, |
|
"step": 16340 |
|
}, |
|
{ |
|
"epoch": 2.612166693277982, |
|
"grad_norm": 0.17759720874685755, |
|
"learning_rate": 1.0010481404166972e-06, |
|
"loss": 0.0434, |
|
"step": 16360 |
|
}, |
|
{ |
|
"epoch": 2.615360051093725, |
|
"grad_norm": 0.1999834786965281, |
|
"learning_rate": 9.849060619177553e-07, |
|
"loss": 0.0446, |
|
"step": 16380 |
|
}, |
|
{ |
|
"epoch": 2.6185534089094684, |
|
"grad_norm": 0.21313365667220596, |
|
"learning_rate": 9.688884469958604e-07, |
|
"loss": 0.0434, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 2.6217467667252117, |
|
"grad_norm": 0.19320209419752543, |
|
"learning_rate": 9.5299551679232e-07, |
|
"loss": 0.0445, |
|
"step": 16420 |
|
}, |
|
{ |
|
"epoch": 2.624940124540955, |
|
"grad_norm": 0.17847623577962735, |
|
"learning_rate": 9.372274907270251e-07, |
|
"loss": 0.0437, |
|
"step": 16440 |
|
}, |
|
{ |
|
"epoch": 2.628133482356698, |
|
"grad_norm": 0.23166885515187532, |
|
"learning_rate": 9.215845864954287e-07, |
|
"loss": 0.0419, |
|
"step": 16460 |
|
}, |
|
{ |
|
"epoch": 2.6313268401724415, |
|
"grad_norm": 0.18325681984081477, |
|
"learning_rate": 9.060670200655286e-07, |
|
"loss": 0.0439, |
|
"step": 16480 |
|
}, |
|
{ |
|
"epoch": 2.6345201979881847, |
|
"grad_norm": 0.20540975477642068, |
|
"learning_rate": 8.906750056748947e-07, |
|
"loss": 0.0448, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 2.637713555803928, |
|
"grad_norm": 0.1786617783763284, |
|
"learning_rate": 8.754087558277113e-07, |
|
"loss": 0.0444, |
|
"step": 16520 |
|
}, |
|
{ |
|
"epoch": 2.6409069136196712, |
|
"grad_norm": 0.1901267431080617, |
|
"learning_rate": 8.602684812918416e-07, |
|
"loss": 0.0438, |
|
"step": 16540 |
|
}, |
|
{ |
|
"epoch": 2.6441002714354145, |
|
"grad_norm": 0.18259614623005302, |
|
"learning_rate": 8.452543910959121e-07, |
|
"loss": 0.0432, |
|
"step": 16560 |
|
}, |
|
{ |
|
"epoch": 2.6472936292511577, |
|
"grad_norm": 0.18713135077039142, |
|
"learning_rate": 8.303666925264331e-07, |
|
"loss": 0.0437, |
|
"step": 16580 |
|
}, |
|
{ |
|
"epoch": 2.650486987066901, |
|
"grad_norm": 0.1801858452235725, |
|
"learning_rate": 8.156055911249394e-07, |
|
"loss": 0.0448, |
|
"step": 16600 |
|
}, |
|
{ |
|
"epoch": 2.6536803448826443, |
|
"grad_norm": 0.17771380124624228, |
|
"learning_rate": 8.00971290685143e-07, |
|
"loss": 0.0445, |
|
"step": 16620 |
|
}, |
|
{ |
|
"epoch": 2.6568737026983875, |
|
"grad_norm": 0.22250062270982698, |
|
"learning_rate": 7.864639932501294e-07, |
|
"loss": 0.0427, |
|
"step": 16640 |
|
}, |
|
{ |
|
"epoch": 2.6600670605141303, |
|
"grad_norm": 0.20866465188062733, |
|
"learning_rate": 7.720838991095602e-07, |
|
"loss": 0.0427, |
|
"step": 16660 |
|
}, |
|
{ |
|
"epoch": 2.663260418329874, |
|
"grad_norm": 0.2055356708135395, |
|
"learning_rate": 7.578312067969162e-07, |
|
"loss": 0.043, |
|
"step": 16680 |
|
}, |
|
{ |
|
"epoch": 2.666453776145617, |
|
"grad_norm": 0.20698005060615937, |
|
"learning_rate": 7.437061130867473e-07, |
|
"loss": 0.0442, |
|
"step": 16700 |
|
}, |
|
{ |
|
"epoch": 2.6696471339613606, |
|
"grad_norm": 0.20876117607511466, |
|
"learning_rate": 7.297088129919616e-07, |
|
"loss": 0.0498, |
|
"step": 16720 |
|
}, |
|
{ |
|
"epoch": 2.6728404917771034, |
|
"grad_norm": 0.24032862358776724, |
|
"learning_rate": 7.158394997611329e-07, |
|
"loss": 0.0429, |
|
"step": 16740 |
|
}, |
|
{ |
|
"epoch": 2.676033849592847, |
|
"grad_norm": 0.20969273760927634, |
|
"learning_rate": 7.020983648758318e-07, |
|
"loss": 0.0447, |
|
"step": 16760 |
|
}, |
|
{ |
|
"epoch": 2.67922720740859, |
|
"grad_norm": 0.2174374325052259, |
|
"learning_rate": 6.884855980479777e-07, |
|
"loss": 0.0452, |
|
"step": 16780 |
|
}, |
|
{ |
|
"epoch": 2.6824205652243336, |
|
"grad_norm": 0.18004577133887417, |
|
"learning_rate": 6.750013872172301e-07, |
|
"loss": 0.0438, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 2.6856139230400764, |
|
"grad_norm": 0.2035569950209219, |
|
"learning_rate": 6.616459185483793e-07, |
|
"loss": 0.0438, |
|
"step": 16820 |
|
}, |
|
{ |
|
"epoch": 2.68880728085582, |
|
"grad_norm": 0.20132465630515528, |
|
"learning_rate": 6.484193764287938e-07, |
|
"loss": 0.0445, |
|
"step": 16840 |
|
}, |
|
{ |
|
"epoch": 2.692000638671563, |
|
"grad_norm": 0.1712570311869676, |
|
"learning_rate": 6.353219434658587e-07, |
|
"loss": 0.0432, |
|
"step": 16860 |
|
}, |
|
{ |
|
"epoch": 2.6951939964873066, |
|
"grad_norm": 0.19144286472815933, |
|
"learning_rate": 6.223538004844587e-07, |
|
"loss": 0.0426, |
|
"step": 16880 |
|
}, |
|
{ |
|
"epoch": 2.6983873543030494, |
|
"grad_norm": 0.1761969500556086, |
|
"learning_rate": 6.095151265244937e-07, |
|
"loss": 0.0436, |
|
"step": 16900 |
|
}, |
|
{ |
|
"epoch": 2.701580712118793, |
|
"grad_norm": 0.18412941719997428, |
|
"learning_rate": 5.968060988383884e-07, |
|
"loss": 0.0419, |
|
"step": 16920 |
|
}, |
|
{ |
|
"epoch": 2.704774069934536, |
|
"grad_norm": 0.2088468477123862, |
|
"learning_rate": 5.842268928886563e-07, |
|
"loss": 0.0435, |
|
"step": 16940 |
|
}, |
|
{ |
|
"epoch": 2.7079674277502797, |
|
"grad_norm": 0.21087568774149862, |
|
"learning_rate": 5.717776823454746e-07, |
|
"loss": 0.0434, |
|
"step": 16960 |
|
}, |
|
{ |
|
"epoch": 2.7111607855660225, |
|
"grad_norm": 0.20533012449268137, |
|
"learning_rate": 5.594586390842915e-07, |
|
"loss": 0.0436, |
|
"step": 16980 |
|
}, |
|
{ |
|
"epoch": 2.714354143381766, |
|
"grad_norm": 0.23130477787372275, |
|
"learning_rate": 5.472699331834408e-07, |
|
"loss": 0.0434, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 2.717547501197509, |
|
"grad_norm": 0.19246797825033052, |
|
"learning_rate": 5.352117329218065e-07, |
|
"loss": 0.0443, |
|
"step": 17020 |
|
}, |
|
{ |
|
"epoch": 2.7207408590132527, |
|
"grad_norm": 0.19825650332574749, |
|
"learning_rate": 5.23284204776493e-07, |
|
"loss": 0.0432, |
|
"step": 17040 |
|
}, |
|
{ |
|
"epoch": 2.7239342168289955, |
|
"grad_norm": 0.19435989820475502, |
|
"learning_rate": 5.1148751342053e-07, |
|
"loss": 0.0437, |
|
"step": 17060 |
|
}, |
|
{ |
|
"epoch": 2.727127574644739, |
|
"grad_norm": 0.17105286427984273, |
|
"learning_rate": 4.998218217205941e-07, |
|
"loss": 0.0431, |
|
"step": 17080 |
|
}, |
|
{ |
|
"epoch": 2.730320932460482, |
|
"grad_norm": 0.2076555517606956, |
|
"learning_rate": 4.882872907347657e-07, |
|
"loss": 0.0441, |
|
"step": 17100 |
|
}, |
|
{ |
|
"epoch": 2.7335142902762257, |
|
"grad_norm": 0.17467573768445724, |
|
"learning_rate": 4.768840797103014e-07, |
|
"loss": 0.0426, |
|
"step": 17120 |
|
}, |
|
{ |
|
"epoch": 2.7367076480919685, |
|
"grad_norm": 0.23656714472082974, |
|
"learning_rate": 4.6561234608143993e-07, |
|
"loss": 0.0442, |
|
"step": 17140 |
|
}, |
|
{ |
|
"epoch": 2.739901005907712, |
|
"grad_norm": 0.1991265479506836, |
|
"learning_rate": 4.544722454672223e-07, |
|
"loss": 0.0443, |
|
"step": 17160 |
|
}, |
|
{ |
|
"epoch": 2.743094363723455, |
|
"grad_norm": 0.16764542580219924, |
|
"learning_rate": 4.434639316693479e-07, |
|
"loss": 0.0441, |
|
"step": 17180 |
|
}, |
|
{ |
|
"epoch": 2.7462877215391983, |
|
"grad_norm": 0.18540914909816514, |
|
"learning_rate": 4.3258755667005104e-07, |
|
"loss": 0.0427, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 2.7494810793549416, |
|
"grad_norm": 0.16756011986354746, |
|
"learning_rate": 4.218432706300013e-07, |
|
"loss": 0.0442, |
|
"step": 17220 |
|
}, |
|
{ |
|
"epoch": 2.752674437170685, |
|
"grad_norm": 0.19477880662403543, |
|
"learning_rate": 4.1123122188623024e-07, |
|
"loss": 0.0419, |
|
"step": 17240 |
|
}, |
|
{ |
|
"epoch": 2.755867794986428, |
|
"grad_norm": 0.16692137735923454, |
|
"learning_rate": 4.0075155695008193e-07, |
|
"loss": 0.0439, |
|
"step": 17260 |
|
}, |
|
{ |
|
"epoch": 2.7590611528021713, |
|
"grad_norm": 0.27371092487152754, |
|
"learning_rate": 3.904044205051938e-07, |
|
"loss": 0.0415, |
|
"step": 17280 |
|
}, |
|
{ |
|
"epoch": 2.7622545106179146, |
|
"grad_norm": 0.1730044575542229, |
|
"learning_rate": 3.801899554055011e-07, |
|
"loss": 0.0434, |
|
"step": 17300 |
|
}, |
|
{ |
|
"epoch": 2.765447868433658, |
|
"grad_norm": 0.2957249889697754, |
|
"learning_rate": 3.7010830267325546e-07, |
|
"loss": 0.0432, |
|
"step": 17320 |
|
}, |
|
{ |
|
"epoch": 2.768641226249401, |
|
"grad_norm": 0.20211132503418788, |
|
"learning_rate": 3.601596014970843e-07, |
|
"loss": 0.0448, |
|
"step": 17340 |
|
}, |
|
{ |
|
"epoch": 2.7718345840651444, |
|
"grad_norm": 0.2192148080396869, |
|
"learning_rate": 3.5034398923007195e-07, |
|
"loss": 0.0429, |
|
"step": 17360 |
|
}, |
|
{ |
|
"epoch": 2.7750279418808876, |
|
"grad_norm": 0.19416701667619607, |
|
"learning_rate": 3.40661601387855e-07, |
|
"loss": 0.0442, |
|
"step": 17380 |
|
}, |
|
{ |
|
"epoch": 2.778221299696631, |
|
"grad_norm": 0.2194341949029401, |
|
"learning_rate": 3.311125716467578e-07, |
|
"loss": 0.0451, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 2.781414657512374, |
|
"grad_norm": 0.23997919053006325, |
|
"learning_rate": 3.216970318419488e-07, |
|
"loss": 0.0433, |
|
"step": 17420 |
|
}, |
|
{ |
|
"epoch": 2.7846080153281174, |
|
"grad_norm": 0.20048137685529088, |
|
"learning_rate": 3.1241511196561045e-07, |
|
"loss": 0.0436, |
|
"step": 17440 |
|
}, |
|
{ |
|
"epoch": 2.7878013731438607, |
|
"grad_norm": 0.18418386343058352, |
|
"learning_rate": 3.0326694016515555e-07, |
|
"loss": 0.0431, |
|
"step": 17460 |
|
}, |
|
{ |
|
"epoch": 2.790994730959604, |
|
"grad_norm": 0.18647531186123847, |
|
"learning_rate": 2.9425264274144937e-07, |
|
"loss": 0.0441, |
|
"step": 17480 |
|
}, |
|
{ |
|
"epoch": 2.794188088775347, |
|
"grad_norm": 0.18103520276457064, |
|
"learning_rate": 2.8537234414707573e-07, |
|
"loss": 0.0424, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 2.7973814465910904, |
|
"grad_norm": 0.175838788085868, |
|
"learning_rate": 2.766261669846071e-07, |
|
"loss": 0.0428, |
|
"step": 17520 |
|
}, |
|
{ |
|
"epoch": 2.8005748044068337, |
|
"grad_norm": 0.18597288140297774, |
|
"learning_rate": 2.680142320049195e-07, |
|
"loss": 0.0461, |
|
"step": 17540 |
|
}, |
|
{ |
|
"epoch": 2.803768162222577, |
|
"grad_norm": 0.19306825995055335, |
|
"learning_rate": 2.5953665810552586e-07, |
|
"loss": 0.0432, |
|
"step": 17560 |
|
}, |
|
{ |
|
"epoch": 2.8069615200383202, |
|
"grad_norm": 0.19244074182083917, |
|
"learning_rate": 2.5119356232892965e-07, |
|
"loss": 0.0447, |
|
"step": 17580 |
|
}, |
|
{ |
|
"epoch": 2.8101548778540635, |
|
"grad_norm": 0.20041935845397732, |
|
"learning_rate": 2.4298505986101397e-07, |
|
"loss": 0.0417, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 2.8133482356698067, |
|
"grad_norm": 0.1897352035064278, |
|
"learning_rate": 2.3491126402944597e-07, |
|
"loss": 0.0447, |
|
"step": 17620 |
|
}, |
|
{ |
|
"epoch": 2.81654159348555, |
|
"grad_norm": 0.1859749113332233, |
|
"learning_rate": 2.269722863021162e-07, |
|
"loss": 0.0441, |
|
"step": 17640 |
|
}, |
|
{ |
|
"epoch": 2.8197349513012933, |
|
"grad_norm": 0.18154530556190202, |
|
"learning_rate": 2.191682362856018e-07, |
|
"loss": 0.0449, |
|
"step": 17660 |
|
}, |
|
{ |
|
"epoch": 2.8229283091170365, |
|
"grad_norm": 0.19576462753720822, |
|
"learning_rate": 2.1149922172364557e-07, |
|
"loss": 0.043, |
|
"step": 17680 |
|
}, |
|
{ |
|
"epoch": 2.8261216669327798, |
|
"grad_norm": 0.19317600637380156, |
|
"learning_rate": 2.0396534849567384e-07, |
|
"loss": 0.0435, |
|
"step": 17700 |
|
}, |
|
{ |
|
"epoch": 2.829315024748523, |
|
"grad_norm": 0.18270539963789229, |
|
"learning_rate": 1.9656672061533876e-07, |
|
"loss": 0.0448, |
|
"step": 17720 |
|
}, |
|
{ |
|
"epoch": 2.8325083825642663, |
|
"grad_norm": 0.25190362174641373, |
|
"learning_rate": 1.8930344022907055e-07, |
|
"loss": 0.0433, |
|
"step": 17740 |
|
}, |
|
{ |
|
"epoch": 2.8357017403800096, |
|
"grad_norm": 0.19271629305777457, |
|
"learning_rate": 1.8217560761467744e-07, |
|
"loss": 0.0442, |
|
"step": 17760 |
|
}, |
|
{ |
|
"epoch": 2.838895098195753, |
|
"grad_norm": 0.6386981477198299, |
|
"learning_rate": 1.7518332117995695e-07, |
|
"loss": 0.0431, |
|
"step": 17780 |
|
}, |
|
{ |
|
"epoch": 2.842088456011496, |
|
"grad_norm": 0.20346250081845807, |
|
"learning_rate": 1.6832667746134236e-07, |
|
"loss": 0.0422, |
|
"step": 17800 |
|
}, |
|
{ |
|
"epoch": 2.8452818138272393, |
|
"grad_norm": 0.17777460027714007, |
|
"learning_rate": 1.6160577112255827e-07, |
|
"loss": 0.0425, |
|
"step": 17820 |
|
}, |
|
{ |
|
"epoch": 2.8484751716429826, |
|
"grad_norm": 0.255413137859645, |
|
"learning_rate": 1.5502069495332616e-07, |
|
"loss": 0.0435, |
|
"step": 17840 |
|
}, |
|
{ |
|
"epoch": 2.851668529458726, |
|
"grad_norm": 0.19607428087584267, |
|
"learning_rate": 1.4857153986807649e-07, |
|
"loss": 0.0418, |
|
"step": 17860 |
|
}, |
|
{ |
|
"epoch": 2.854861887274469, |
|
"grad_norm": 0.1780772888139799, |
|
"learning_rate": 1.4225839490469628e-07, |
|
"loss": 0.0427, |
|
"step": 17880 |
|
}, |
|
{ |
|
"epoch": 2.8580552450902124, |
|
"grad_norm": 0.21241047060680943, |
|
"learning_rate": 1.3608134722329803e-07, |
|
"loss": 0.0437, |
|
"step": 17900 |
|
}, |
|
{ |
|
"epoch": 2.8612486029059556, |
|
"grad_norm": 0.19239115510673255, |
|
"learning_rate": 1.3004048210501718e-07, |
|
"loss": 0.0434, |
|
"step": 17920 |
|
}, |
|
{ |
|
"epoch": 2.864441960721699, |
|
"grad_norm": 0.18795522932841213, |
|
"learning_rate": 1.2413588295083656e-07, |
|
"loss": 0.0431, |
|
"step": 17940 |
|
}, |
|
{ |
|
"epoch": 2.867635318537442, |
|
"grad_norm": 0.18585931164828967, |
|
"learning_rate": 1.183676312804305e-07, |
|
"loss": 0.0442, |
|
"step": 17960 |
|
}, |
|
{ |
|
"epoch": 2.8708286763531854, |
|
"grad_norm": 0.18075501501439709, |
|
"learning_rate": 1.1273580673104245e-07, |
|
"loss": 0.0444, |
|
"step": 17980 |
|
}, |
|
{ |
|
"epoch": 2.8740220341689287, |
|
"grad_norm": 0.19563735408076433, |
|
"learning_rate": 1.072404870563859e-07, |
|
"loss": 0.0447, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 2.877215391984672, |
|
"grad_norm": 0.19825850897569677, |
|
"learning_rate": 1.0188174812557073e-07, |
|
"loss": 0.0439, |
|
"step": 18020 |
|
}, |
|
{ |
|
"epoch": 2.880408749800415, |
|
"grad_norm": 0.17410835997084562, |
|
"learning_rate": 9.665966392205295e-08, |
|
"loss": 0.0446, |
|
"step": 18040 |
|
}, |
|
{ |
|
"epoch": 2.8836021076161584, |
|
"grad_norm": 0.17894750425194603, |
|
"learning_rate": 9.157430654261778e-08, |
|
"loss": 0.0444, |
|
"step": 18060 |
|
}, |
|
{ |
|
"epoch": 2.8867954654319017, |
|
"grad_norm": 0.1932898053763739, |
|
"learning_rate": 8.662574619637931e-08, |
|
"loss": 0.043, |
|
"step": 18080 |
|
}, |
|
{ |
|
"epoch": 2.889988823247645, |
|
"grad_norm": 0.19451425195215136, |
|
"learning_rate": 8.18140512038157e-08, |
|
"loss": 0.0428, |
|
"step": 18100 |
|
}, |
|
{ |
|
"epoch": 2.893182181063388, |
|
"grad_norm": 0.18451759369547344, |
|
"learning_rate": 7.713928799582215e-08, |
|
"loss": 0.0443, |
|
"step": 18120 |
|
}, |
|
{ |
|
"epoch": 2.8963755388791315, |
|
"grad_norm": 0.21235909068408473, |
|
"learning_rate": 7.260152111279839e-08, |
|
"loss": 0.0443, |
|
"step": 18140 |
|
}, |
|
{ |
|
"epoch": 2.8995688966948747, |
|
"grad_norm": 0.18028750928095402, |
|
"learning_rate": 6.82008132037515e-08, |
|
"loss": 0.0425, |
|
"step": 18160 |
|
}, |
|
{ |
|
"epoch": 2.902762254510618, |
|
"grad_norm": 0.1865997727595832, |
|
"learning_rate": 6.393722502543665e-08, |
|
"loss": 0.045, |
|
"step": 18180 |
|
}, |
|
{ |
|
"epoch": 2.9059556123263612, |
|
"grad_norm": 0.18553943543624984, |
|
"learning_rate": 5.981081544151446e-08, |
|
"loss": 0.0428, |
|
"step": 18200 |
|
}, |
|
{ |
|
"epoch": 2.9091489701421045, |
|
"grad_norm": 0.19032355954882516, |
|
"learning_rate": 5.5821641421741625e-08, |
|
"loss": 0.0443, |
|
"step": 18220 |
|
}, |
|
{ |
|
"epoch": 2.9123423279578478, |
|
"grad_norm": 0.18084808651831624, |
|
"learning_rate": 5.196975804117932e-08, |
|
"loss": 0.0435, |
|
"step": 18240 |
|
}, |
|
{ |
|
"epoch": 2.915535685773591, |
|
"grad_norm": 0.21753074838538441, |
|
"learning_rate": 4.825521847944048e-08, |
|
"loss": 0.0418, |
|
"step": 18260 |
|
}, |
|
{ |
|
"epoch": 2.9187290435893343, |
|
"grad_norm": 0.1883119176872824, |
|
"learning_rate": 4.467807401994706e-08, |
|
"loss": 0.0426, |
|
"step": 18280 |
|
}, |
|
{ |
|
"epoch": 2.9219224014050775, |
|
"grad_norm": 0.17894355455146954, |
|
"learning_rate": 4.123837404922726e-08, |
|
"loss": 0.0429, |
|
"step": 18300 |
|
}, |
|
{ |
|
"epoch": 2.925115759220821, |
|
"grad_norm": 0.20477512942702414, |
|
"learning_rate": 3.7936166056233845e-08, |
|
"loss": 0.0421, |
|
"step": 18320 |
|
}, |
|
{ |
|
"epoch": 2.928309117036564, |
|
"grad_norm": 0.17982986336579576, |
|
"learning_rate": 3.4771495631686914e-08, |
|
"loss": 0.0433, |
|
"step": 18340 |
|
}, |
|
{ |
|
"epoch": 2.9315024748523073, |
|
"grad_norm": 0.19778942398473365, |
|
"learning_rate": 3.174440646744326e-08, |
|
"loss": 0.0434, |
|
"step": 18360 |
|
}, |
|
{ |
|
"epoch": 2.9346958326680506, |
|
"grad_norm": 0.1840797815880338, |
|
"learning_rate": 2.8854940355895756e-08, |
|
"loss": 0.0422, |
|
"step": 18380 |
|
}, |
|
{ |
|
"epoch": 2.937889190483794, |
|
"grad_norm": 0.20492139151779767, |
|
"learning_rate": 2.6103137189394945e-08, |
|
"loss": 0.0433, |
|
"step": 18400 |
|
}, |
|
{ |
|
"epoch": 2.941082548299537, |
|
"grad_norm": 0.18649980327789625, |
|
"learning_rate": 2.3489034959698342e-08, |
|
"loss": 0.0423, |
|
"step": 18420 |
|
}, |
|
{ |
|
"epoch": 2.9442759061152803, |
|
"grad_norm": 0.18710560587786274, |
|
"learning_rate": 2.1012669757446423e-08, |
|
"loss": 0.0447, |
|
"step": 18440 |
|
}, |
|
{ |
|
"epoch": 2.9474692639310236, |
|
"grad_norm": 0.1950435200815635, |
|
"learning_rate": 1.8674075771665246e-08, |
|
"loss": 0.0441, |
|
"step": 18460 |
|
}, |
|
{ |
|
"epoch": 2.950662621746767, |
|
"grad_norm": 0.23718279280034166, |
|
"learning_rate": 1.647328528929126e-08, |
|
"loss": 0.0443, |
|
"step": 18480 |
|
}, |
|
{ |
|
"epoch": 2.95385597956251, |
|
"grad_norm": 0.1828813035697597, |
|
"learning_rate": 1.441032869472725e-08, |
|
"loss": 0.0434, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 2.9570493373782534, |
|
"grad_norm": 0.18330846523906766, |
|
"learning_rate": 1.2485234469425955e-08, |
|
"loss": 0.0447, |
|
"step": 18520 |
|
}, |
|
{ |
|
"epoch": 2.9602426951939966, |
|
"grad_norm": 0.19409145822202675, |
|
"learning_rate": 1.0698029191491543e-08, |
|
"loss": 0.0424, |
|
"step": 18540 |
|
}, |
|
{ |
|
"epoch": 2.96343605300974, |
|
"grad_norm": 0.19044949836984276, |
|
"learning_rate": 9.048737535317654e-09, |
|
"loss": 0.0421, |
|
"step": 18560 |
|
}, |
|
{ |
|
"epoch": 2.966629410825483, |
|
"grad_norm": 0.24441457630679606, |
|
"learning_rate": 7.5373822712399e-09, |
|
"loss": 0.0429, |
|
"step": 18580 |
|
}, |
|
{ |
|
"epoch": 2.9698227686412264, |
|
"grad_norm": 0.17238090085354812, |
|
"learning_rate": 6.163984265230571e-09, |
|
"loss": 0.0436, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 2.9730161264569697, |
|
"grad_norm": 0.17724705729907833, |
|
"learning_rate": 4.928562478603294e-09, |
|
"loss": 0.0438, |
|
"step": 18620 |
|
}, |
|
{ |
|
"epoch": 2.9762094842727125, |
|
"grad_norm": 0.17813664105789478, |
|
"learning_rate": 3.831133967754363e-09, |
|
"loss": 0.0443, |
|
"step": 18640 |
|
}, |
|
{ |
|
"epoch": 2.979402842088456, |
|
"grad_norm": 0.19807197965691153, |
|
"learning_rate": 2.8717138839262638e-09, |
|
"loss": 0.0423, |
|
"step": 18660 |
|
}, |
|
{ |
|
"epoch": 2.982596199904199, |
|
"grad_norm": 0.20792602007313574, |
|
"learning_rate": 2.050315473000053e-09, |
|
"loss": 0.0437, |
|
"step": 18680 |
|
}, |
|
{ |
|
"epoch": 2.9857895577199427, |
|
"grad_norm": 0.18039436629311245, |
|
"learning_rate": 1.3669500753099586e-09, |
|
"loss": 0.0449, |
|
"step": 18700 |
|
}, |
|
{ |
|
"epoch": 2.9889829155356855, |
|
"grad_norm": 0.16777557295223433, |
|
"learning_rate": 8.216271254901653e-10, |
|
"loss": 0.0433, |
|
"step": 18720 |
|
}, |
|
{ |
|
"epoch": 2.992176273351429, |
|
"grad_norm": 0.21329586917274732, |
|
"learning_rate": 4.1435415233936903e-10, |
|
"loss": 0.0437, |
|
"step": 18740 |
|
}, |
|
{ |
|
"epoch": 2.995369631167172, |
|
"grad_norm": 0.17382850175198178, |
|
"learning_rate": 1.451367787230762e-10, |
|
"loss": 0.0434, |
|
"step": 18760 |
|
}, |
|
{ |
|
"epoch": 2.9985629889829157, |
|
"grad_norm": 0.19059012257580193, |
|
"learning_rate": 1.3978721492557968e-11, |
|
"loss": 0.0444, |
|
"step": 18780 |
|
} |
|
], |
|
"logging_steps": 20, |
|
"max_steps": 18789, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 6401980526886912.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|