|
{ |
|
"best_metric": 1.613356113433838, |
|
"best_model_checkpoint": "output/checkpoint-6000", |
|
"epoch": 3.0, |
|
"eval_steps": 2000, |
|
"global_step": 9984, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.030048076923076924, |
|
"grad_norm": 2.3787589073181152, |
|
"learning_rate": 2e-05, |
|
"loss": 3.6173, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.06009615384615385, |
|
"grad_norm": 1.996222734451294, |
|
"learning_rate": 4e-05, |
|
"loss": 1.7498, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.09014423076923077, |
|
"grad_norm": 1.5797070264816284, |
|
"learning_rate": 6e-05, |
|
"loss": 1.7224, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.1201923076923077, |
|
"grad_norm": 1.4705523252487183, |
|
"learning_rate": 8e-05, |
|
"loss": 1.6887, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.1502403846153846, |
|
"grad_norm": 1.807113766670227, |
|
"learning_rate": 0.0001, |
|
"loss": 1.6883, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.18028846153846154, |
|
"grad_norm": 1.211004376411438, |
|
"learning_rate": 9.99725705593595e-05, |
|
"loss": 1.6908, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.21033653846153846, |
|
"grad_norm": 1.194908857345581, |
|
"learning_rate": 9.989031233240653e-05, |
|
"loss": 1.6865, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.2403846153846154, |
|
"grad_norm": 1.125349998474121, |
|
"learning_rate": 9.975331557102723e-05, |
|
"loss": 1.6796, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.2704326923076923, |
|
"grad_norm": 1.2960246801376343, |
|
"learning_rate": 9.9561730585003e-05, |
|
"loss": 1.6697, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.3004807692307692, |
|
"grad_norm": 1.1631627082824707, |
|
"learning_rate": 9.931576757709384e-05, |
|
"loss": 1.6675, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.33052884615384615, |
|
"grad_norm": 1.0823206901550293, |
|
"learning_rate": 9.901569641240883e-05, |
|
"loss": 1.6532, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.3605769230769231, |
|
"grad_norm": 1.1251226663589478, |
|
"learning_rate": 9.866184632231592e-05, |
|
"loss": 1.6625, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.390625, |
|
"grad_norm": 1.27994704246521, |
|
"learning_rate": 9.825460554321679e-05, |
|
"loss": 1.6463, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.4206730769230769, |
|
"grad_norm": 1.3841296434402466, |
|
"learning_rate": 9.779442089058252e-05, |
|
"loss": 1.6462, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.45072115384615385, |
|
"grad_norm": 1.1315901279449463, |
|
"learning_rate": 9.728179726871762e-05, |
|
"loss": 1.6452, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.4807692307692308, |
|
"grad_norm": 1.2269665002822876, |
|
"learning_rate": 9.671729711679036e-05, |
|
"loss": 1.6449, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.5108173076923077, |
|
"grad_norm": 1.135764479637146, |
|
"learning_rate": 9.610153979173711e-05, |
|
"loss": 1.6367, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.5408653846153846, |
|
"grad_norm": 1.2129323482513428, |
|
"learning_rate": 9.543520088871773e-05, |
|
"loss": 1.6271, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.5709134615384616, |
|
"grad_norm": 1.2092684507369995, |
|
"learning_rate": 9.471901149986767e-05, |
|
"loss": 1.6353, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.6009615384615384, |
|
"grad_norm": 1.2054263353347778, |
|
"learning_rate": 9.39537574121601e-05, |
|
"loss": 1.6386, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.6009615384615384, |
|
"eval_loss": 1.627854585647583, |
|
"eval_runtime": 242.6417, |
|
"eval_samples_per_second": 73.141, |
|
"eval_steps_per_second": 9.145, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.6310096153846154, |
|
"grad_norm": 1.4155864715576172, |
|
"learning_rate": 9.314027824525798e-05, |
|
"loss": 1.6322, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.6610576923076923, |
|
"grad_norm": 1.2875721454620361, |
|
"learning_rate": 9.22794665303021e-05, |
|
"loss": 1.6205, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.6911057692307693, |
|
"grad_norm": 1.2036750316619873, |
|
"learning_rate": 9.137226673064603e-05, |
|
"loss": 1.6201, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.7211538461538461, |
|
"grad_norm": 1.3741754293441772, |
|
"learning_rate": 9.04196742056119e-05, |
|
"loss": 1.6197, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.7512019230769231, |
|
"grad_norm": 1.3001148700714111, |
|
"learning_rate": 8.942273411840452e-05, |
|
"loss": 1.6285, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.78125, |
|
"grad_norm": 1.3223652839660645, |
|
"learning_rate": 8.838254028938162e-05, |
|
"loss": 1.6323, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.8112980769230769, |
|
"grad_norm": 1.399418592453003, |
|
"learning_rate": 8.730023399593876e-05, |
|
"loss": 1.6184, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.8413461538461539, |
|
"grad_norm": 1.2166377305984497, |
|
"learning_rate": 8.617700272032516e-05, |
|
"loss": 1.6165, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.8713942307692307, |
|
"grad_norm": 1.208473563194275, |
|
"learning_rate": 8.501407884676479e-05, |
|
"loss": 1.616, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.9014423076923077, |
|
"grad_norm": 1.2277066707611084, |
|
"learning_rate": 8.381273830931207e-05, |
|
"loss": 1.6122, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.9314903846153846, |
|
"grad_norm": 1.204302191734314, |
|
"learning_rate": 8.257429919192542e-05, |
|
"loss": 1.6186, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.9615384615384616, |
|
"grad_norm": 1.2529523372650146, |
|
"learning_rate": 8.130012028229512e-05, |
|
"loss": 1.6164, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.9915865384615384, |
|
"grad_norm": 1.513980507850647, |
|
"learning_rate": 7.999159958101186e-05, |
|
"loss": 1.5971, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 1.0216346153846154, |
|
"grad_norm": 1.420433521270752, |
|
"learning_rate": 7.865017276771173e-05, |
|
"loss": 1.4976, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 1.0516826923076923, |
|
"grad_norm": 1.4616764783859253, |
|
"learning_rate": 7.727731162588074e-05, |
|
"loss": 1.4486, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.0817307692307692, |
|
"grad_norm": 1.4156601428985596, |
|
"learning_rate": 7.587452242804676e-05, |
|
"loss": 1.4467, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 1.1117788461538463, |
|
"grad_norm": 1.3352571725845337, |
|
"learning_rate": 7.444334428313112e-05, |
|
"loss": 1.4516, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 1.1418269230769231, |
|
"grad_norm": 1.4259686470031738, |
|
"learning_rate": 7.298534744777267e-05, |
|
"loss": 1.4466, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 1.171875, |
|
"grad_norm": 1.4755374193191528, |
|
"learning_rate": 7.150213160347743e-05, |
|
"loss": 1.446, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 1.2019230769230769, |
|
"grad_norm": 1.4399892091751099, |
|
"learning_rate": 6.999532410148371e-05, |
|
"loss": 1.4331, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.2019230769230769, |
|
"eval_loss": 1.6244958639144897, |
|
"eval_runtime": 248.0586, |
|
"eval_samples_per_second": 71.544, |
|
"eval_steps_per_second": 8.945, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.2319711538461537, |
|
"grad_norm": 1.629622220993042, |
|
"learning_rate": 6.846657817726882e-05, |
|
"loss": 1.4356, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 1.2620192307692308, |
|
"grad_norm": 1.5240803956985474, |
|
"learning_rate": 6.691757113665606e-05, |
|
"loss": 1.4403, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 1.2920673076923077, |
|
"grad_norm": 1.947218894958496, |
|
"learning_rate": 6.535000251551231e-05, |
|
"loss": 1.452, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 1.3221153846153846, |
|
"grad_norm": 1.6493359804153442, |
|
"learning_rate": 6.376559221505535e-05, |
|
"loss": 1.4435, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 1.3521634615384617, |
|
"grad_norm": 1.6366957426071167, |
|
"learning_rate": 6.216607861481659e-05, |
|
"loss": 1.4385, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.3822115384615383, |
|
"grad_norm": 1.679699182510376, |
|
"learning_rate": 6.055321666533013e-05, |
|
"loss": 1.4509, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 1.4122596153846154, |
|
"grad_norm": 1.5405994653701782, |
|
"learning_rate": 5.8928775962640146e-05, |
|
"loss": 1.4375, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 1.4423076923076923, |
|
"grad_norm": 1.5734689235687256, |
|
"learning_rate": 5.7294538806739775e-05, |
|
"loss": 1.4315, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 1.4723557692307692, |
|
"grad_norm": 1.6284011602401733, |
|
"learning_rate": 5.565229824607143e-05, |
|
"loss": 1.4457, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 1.5024038461538463, |
|
"grad_norm": 1.5765283107757568, |
|
"learning_rate": 5.400385611023416e-05, |
|
"loss": 1.4374, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.5324519230769231, |
|
"grad_norm": 1.7722498178482056, |
|
"learning_rate": 5.235102103305654e-05, |
|
"loss": 1.4513, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 1.5625, |
|
"grad_norm": 1.6080434322357178, |
|
"learning_rate": 5.0695606468204095e-05, |
|
"loss": 1.4322, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 1.5925480769230769, |
|
"grad_norm": 1.7113045454025269, |
|
"learning_rate": 4.90394286994985e-05, |
|
"loss": 1.4372, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 1.6225961538461537, |
|
"grad_norm": 1.595045566558838, |
|
"learning_rate": 4.738430484813162e-05, |
|
"loss": 1.4391, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 1.6526442307692308, |
|
"grad_norm": 1.5261282920837402, |
|
"learning_rate": 4.5732050878960816e-05, |
|
"loss": 1.4375, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.6826923076923077, |
|
"grad_norm": 1.6319518089294434, |
|
"learning_rate": 4.40844796080729e-05, |
|
"loss": 1.4269, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 1.7127403846153846, |
|
"grad_norm": 1.6978216171264648, |
|
"learning_rate": 4.244339871380291e-05, |
|
"loss": 1.4261, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 1.7427884615384617, |
|
"grad_norm": 1.748810887336731, |
|
"learning_rate": 4.0810608753389864e-05, |
|
"loss": 1.4349, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 1.7728365384615383, |
|
"grad_norm": 1.4340012073516846, |
|
"learning_rate": 3.9187901187445675e-05, |
|
"loss": 1.4349, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 1.8028846153846154, |
|
"grad_norm": 1.7568167448043823, |
|
"learning_rate": 3.757705641440461e-05, |
|
"loss": 1.4318, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.8028846153846154, |
|
"eval_loss": 1.613356113433838, |
|
"eval_runtime": 246.395, |
|
"eval_samples_per_second": 72.027, |
|
"eval_steps_per_second": 9.006, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.8329326923076923, |
|
"grad_norm": 1.7349011898040771, |
|
"learning_rate": 3.5979841817110014e-05, |
|
"loss": 1.4335, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 1.8629807692307692, |
|
"grad_norm": 1.9418445825576782, |
|
"learning_rate": 3.439800982368133e-05, |
|
"loss": 1.4282, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 1.8930288461538463, |
|
"grad_norm": 1.6687694787979126, |
|
"learning_rate": 3.283329598478926e-05, |
|
"loss": 1.4309, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 1.9230769230769231, |
|
"grad_norm": 1.694411039352417, |
|
"learning_rate": 3.128741706944832e-05, |
|
"loss": 1.4178, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 1.953125, |
|
"grad_norm": 1.5258992910385132, |
|
"learning_rate": 2.976206918141635e-05, |
|
"loss": 1.4322, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.9831730769230769, |
|
"grad_norm": 1.8099026679992676, |
|
"learning_rate": 2.8258925898267385e-05, |
|
"loss": 1.416, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 2.0132211538461537, |
|
"grad_norm": 2.1050777435302734, |
|
"learning_rate": 2.6779636435179777e-05, |
|
"loss": 1.3215, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 2.043269230769231, |
|
"grad_norm": 2.225262403488159, |
|
"learning_rate": 2.5325823835454278e-05, |
|
"loss": 1.1716, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 2.0733173076923075, |
|
"grad_norm": 2.9141433238983154, |
|
"learning_rate": 2.3899083189747123e-05, |
|
"loss": 1.1695, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 2.1033653846153846, |
|
"grad_norm": 2.4766488075256348, |
|
"learning_rate": 2.250097988597234e-05, |
|
"loss": 1.1692, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 2.1334134615384617, |
|
"grad_norm": 2.1980690956115723, |
|
"learning_rate": 2.1133047891793174e-05, |
|
"loss": 1.1755, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 2.1634615384615383, |
|
"grad_norm": 2.393937587738037, |
|
"learning_rate": 1.979678807158698e-05, |
|
"loss": 1.1536, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 2.1935096153846154, |
|
"grad_norm": 2.4240365028381348, |
|
"learning_rate": 1.8493666539730515e-05, |
|
"loss": 1.169, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 2.2235576923076925, |
|
"grad_norm": 2.4492228031158447, |
|
"learning_rate": 1.7225113052011964e-05, |
|
"loss": 1.1532, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 2.253605769230769, |
|
"grad_norm": 2.1071839332580566, |
|
"learning_rate": 1.5992519436935022e-05, |
|
"loss": 1.1595, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 2.2836538461538463, |
|
"grad_norm": 2.5761914253234863, |
|
"learning_rate": 1.4797238068635566e-05, |
|
"loss": 1.1628, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 2.313701923076923, |
|
"grad_norm": 2.590533971786499, |
|
"learning_rate": 1.3640580383087232e-05, |
|
"loss": 1.1634, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 2.34375, |
|
"grad_norm": 2.3521673679351807, |
|
"learning_rate": 1.252381543922313e-05, |
|
"loss": 1.1545, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 2.373798076923077, |
|
"grad_norm": 2.7083418369293213, |
|
"learning_rate": 1.1448168526552727e-05, |
|
"loss": 1.1542, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 2.4038461538461537, |
|
"grad_norm": 2.427485227584839, |
|
"learning_rate": 1.0414819820801663e-05, |
|
"loss": 1.1633, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 2.4038461538461537, |
|
"eval_loss": 1.730972409248352, |
|
"eval_runtime": 246.4517, |
|
"eval_samples_per_second": 72.01, |
|
"eval_steps_per_second": 9.004, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 2.433894230769231, |
|
"grad_norm": 2.347303628921509, |
|
"learning_rate": 9.424903089049375e-06, |
|
"loss": 1.1631, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 2.4639423076923075, |
|
"grad_norm": 2.697683095932007, |
|
"learning_rate": 8.479504445785158e-06, |
|
"loss": 1.1569, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 2.4939903846153846, |
|
"grad_norm": 2.401367425918579, |
|
"learning_rate": 7.5796611612476916e-06, |
|
"loss": 1.1482, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 2.5240384615384617, |
|
"grad_norm": 2.610416889190674, |
|
"learning_rate": 6.726360523355324e-06, |
|
"loss": 1.1614, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 2.5540865384615383, |
|
"grad_norm": 2.711500644683838, |
|
"learning_rate": 5.920538754475901e-06, |
|
"loss": 1.1555, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 2.5841346153846154, |
|
"grad_norm": 2.241981029510498, |
|
"learning_rate": 5.163079984224467e-06, |
|
"loss": 1.1707, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 2.6141826923076925, |
|
"grad_norm": 2.55198073387146, |
|
"learning_rate": 4.454815279416058e-06, |
|
"loss": 1.1605, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 2.644230769230769, |
|
"grad_norm": 2.5612592697143555, |
|
"learning_rate": 3.7965217322378287e-06, |
|
"loss": 1.1629, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 2.6742788461538463, |
|
"grad_norm": 2.2738687992095947, |
|
"learning_rate": 3.188921607640816e-06, |
|
"loss": 1.1419, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 2.7043269230769234, |
|
"grad_norm": 2.611133575439453, |
|
"learning_rate": 2.6326815508870616e-06, |
|
"loss": 1.1618, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 2.734375, |
|
"grad_norm": 3.1775176525115967, |
|
"learning_rate": 2.1284118561212986e-06, |
|
"loss": 1.1419, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 2.7644230769230766, |
|
"grad_norm": 2.3005447387695312, |
|
"learning_rate": 1.6766657967699163e-06, |
|
"loss": 1.1591, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 2.7944711538461537, |
|
"grad_norm": 2.749657154083252, |
|
"learning_rate": 1.2779390185016838e-06, |
|
"loss": 1.1565, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 2.824519230769231, |
|
"grad_norm": 2.717308521270752, |
|
"learning_rate": 9.326689954164636e-07, |
|
"loss": 1.157, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 2.8545673076923075, |
|
"grad_norm": 2.5376195907592773, |
|
"learning_rate": 6.412345500583783e-07, |
|
"loss": 1.1587, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 2.8846153846153846, |
|
"grad_norm": 2.3610806465148926, |
|
"learning_rate": 4.0395543778020686e-07, |
|
"loss": 1.1494, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 2.9146634615384617, |
|
"grad_norm": 2.4964439868927, |
|
"learning_rate": 2.210919959149682e-07, |
|
"loss": 1.1545, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 2.9447115384615383, |
|
"grad_norm": 2.600806951522827, |
|
"learning_rate": 9.284485813962906e-08, |
|
"loss": 1.1585, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 2.9747596153846154, |
|
"grad_norm": 2.45125412940979, |
|
"learning_rate": 1.9354734344295687e-08, |
|
"loss": 1.1554, |
|
"step": 9900 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 9984, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 2000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3.0448513983410995e+17, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|