|
{ |
|
"best_metric": 2.5792043209075928, |
|
"best_model_checkpoint": "./clip-roberta-finetuned/checkpoint-48000", |
|
"epoch": 10.0, |
|
"global_step": 68710, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.963760733517683e-05, |
|
"loss": 2.9841, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_loss": 3.411221504211426, |
|
"eval_runtime": 218.2214, |
|
"eval_samples_per_second": 447.83, |
|
"eval_steps_per_second": 1.751, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.927521467035366e-05, |
|
"loss": 2.72, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_loss": 3.3430113792419434, |
|
"eval_runtime": 214.9656, |
|
"eval_samples_per_second": 454.612, |
|
"eval_steps_per_second": 1.777, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.891209430941639e-05, |
|
"loss": 2.6319, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_loss": 3.2295451164245605, |
|
"eval_runtime": 250.7246, |
|
"eval_samples_per_second": 389.774, |
|
"eval_steps_per_second": 1.524, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.854824625236501e-05, |
|
"loss": 2.5781, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_loss": 3.1644504070281982, |
|
"eval_runtime": 249.2113, |
|
"eval_samples_per_second": 392.141, |
|
"eval_steps_per_second": 1.533, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.818439819531364e-05, |
|
"loss": 2.5339, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"eval_loss": 3.1226284503936768, |
|
"eval_runtime": 249.7319, |
|
"eval_samples_per_second": 391.324, |
|
"eval_steps_per_second": 1.53, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.782055013826226e-05, |
|
"loss": 2.503, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_loss": 3.0856029987335205, |
|
"eval_runtime": 260.6356, |
|
"eval_samples_per_second": 374.953, |
|
"eval_steps_per_second": 1.466, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.745670208121089e-05, |
|
"loss": 2.4581, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_loss": 3.063863515853882, |
|
"eval_runtime": 246.5877, |
|
"eval_samples_per_second": 396.313, |
|
"eval_steps_per_second": 1.549, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.709285402415951e-05, |
|
"loss": 2.4494, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_loss": 3.0415244102478027, |
|
"eval_runtime": 244.6941, |
|
"eval_samples_per_second": 399.38, |
|
"eval_steps_per_second": 1.561, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 4.6729005967108134e-05, |
|
"loss": 2.4275, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"eval_loss": 3.0244903564453125, |
|
"eval_runtime": 210.3742, |
|
"eval_samples_per_second": 464.534, |
|
"eval_steps_per_second": 1.816, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 4.636515791005676e-05, |
|
"loss": 2.3909, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"eval_loss": 2.999117851257324, |
|
"eval_runtime": 210.4024, |
|
"eval_samples_per_second": 464.472, |
|
"eval_steps_per_second": 1.816, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 4.6001309853005384e-05, |
|
"loss": 2.3902, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_loss": 2.9931323528289795, |
|
"eval_runtime": 208.7009, |
|
"eval_samples_per_second": 468.259, |
|
"eval_steps_per_second": 1.83, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 4.563746179595401e-05, |
|
"loss": 2.3741, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"eval_loss": 2.9612369537353516, |
|
"eval_runtime": 212.7001, |
|
"eval_samples_per_second": 459.454, |
|
"eval_steps_per_second": 1.796, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 4.5273613738902634e-05, |
|
"loss": 2.3536, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"eval_loss": 2.9508631229400635, |
|
"eval_runtime": 210.803, |
|
"eval_samples_per_second": 463.589, |
|
"eval_steps_per_second": 1.812, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.490976568185126e-05, |
|
"loss": 2.3392, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"eval_loss": 2.9288971424102783, |
|
"eval_runtime": 210.6758, |
|
"eval_samples_per_second": 463.869, |
|
"eval_steps_per_second": 1.813, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 4.454591762479989e-05, |
|
"loss": 2.3083, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"eval_loss": 2.9214062690734863, |
|
"eval_runtime": 211.6271, |
|
"eval_samples_per_second": 461.784, |
|
"eval_steps_per_second": 1.805, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 4.418206956774851e-05, |
|
"loss": 2.3094, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"eval_loss": 2.915283441543579, |
|
"eval_runtime": 210.1146, |
|
"eval_samples_per_second": 465.108, |
|
"eval_steps_per_second": 1.818, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 4.3818221510697134e-05, |
|
"loss": 2.2864, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"eval_loss": 2.903420925140381, |
|
"eval_runtime": 214.0395, |
|
"eval_samples_per_second": 456.579, |
|
"eval_steps_per_second": 1.785, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 4.3454373453645755e-05, |
|
"loss": 2.2893, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"eval_loss": 2.8963093757629395, |
|
"eval_runtime": 218.4194, |
|
"eval_samples_per_second": 447.424, |
|
"eval_steps_per_second": 1.749, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 4.3090525396594384e-05, |
|
"loss": 2.2697, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"eval_loss": 2.884676456451416, |
|
"eval_runtime": 207.7625, |
|
"eval_samples_per_second": 470.374, |
|
"eval_steps_per_second": 1.839, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 4.2726677339543005e-05, |
|
"loss": 2.2762, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"eval_loss": 2.866511106491089, |
|
"eval_runtime": 207.4714, |
|
"eval_samples_per_second": 471.034, |
|
"eval_steps_per_second": 1.841, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 4.2363556978605734e-05, |
|
"loss": 2.2667, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"eval_loss": 2.853637456893921, |
|
"eval_runtime": 209.4841, |
|
"eval_samples_per_second": 466.508, |
|
"eval_steps_per_second": 1.824, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 4.1999708921554356e-05, |
|
"loss": 2.2548, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"eval_loss": 2.8472321033477783, |
|
"eval_runtime": 207.7935, |
|
"eval_samples_per_second": 470.303, |
|
"eval_steps_per_second": 1.838, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 4.1635860864502984e-05, |
|
"loss": 2.238, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"eval_loss": 2.849086284637451, |
|
"eval_runtime": 207.5183, |
|
"eval_samples_per_second": 470.927, |
|
"eval_steps_per_second": 1.841, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 4.127201280745161e-05, |
|
"loss": 2.2423, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"eval_loss": 2.825746774673462, |
|
"eval_runtime": 218.1498, |
|
"eval_samples_per_second": 447.977, |
|
"eval_steps_per_second": 1.751, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 4.0908164750400234e-05, |
|
"loss": 2.2406, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"eval_loss": 2.82869029045105, |
|
"eval_runtime": 208.9781, |
|
"eval_samples_per_second": 467.637, |
|
"eval_steps_per_second": 1.828, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 4.054431669334886e-05, |
|
"loss": 2.2248, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"eval_loss": 2.81931734085083, |
|
"eval_runtime": 210.3496, |
|
"eval_samples_per_second": 464.588, |
|
"eval_steps_per_second": 1.816, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 4.0181196332411585e-05, |
|
"loss": 2.223, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"eval_loss": 2.810143232345581, |
|
"eval_runtime": 211.5447, |
|
"eval_samples_per_second": 461.964, |
|
"eval_steps_per_second": 1.806, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 3.981734827536021e-05, |
|
"loss": 2.1995, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"eval_loss": 2.802741527557373, |
|
"eval_runtime": 210.9696, |
|
"eval_samples_per_second": 463.223, |
|
"eval_steps_per_second": 1.811, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 3.9453500218308835e-05, |
|
"loss": 2.1834, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"eval_loss": 2.787959098815918, |
|
"eval_runtime": 207.5007, |
|
"eval_samples_per_second": 470.967, |
|
"eval_steps_per_second": 1.841, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 3.908965216125746e-05, |
|
"loss": 2.1723, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"eval_loss": 2.778273582458496, |
|
"eval_runtime": 217.4843, |
|
"eval_samples_per_second": 449.347, |
|
"eval_steps_per_second": 1.756, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 3.8725804104206085e-05, |
|
"loss": 2.1651, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"eval_loss": 2.773916721343994, |
|
"eval_runtime": 211.3325, |
|
"eval_samples_per_second": 462.428, |
|
"eval_steps_per_second": 1.808, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 3.836195604715471e-05, |
|
"loss": 2.1575, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"eval_loss": 2.782458543777466, |
|
"eval_runtime": 214.023, |
|
"eval_samples_per_second": 456.615, |
|
"eval_steps_per_second": 1.785, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 3.7998107990103335e-05, |
|
"loss": 2.1598, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"eval_loss": 2.7659904956817627, |
|
"eval_runtime": 211.2594, |
|
"eval_samples_per_second": 462.588, |
|
"eval_steps_per_second": 1.808, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 3.7634259933051956e-05, |
|
"loss": 2.1667, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"eval_loss": 2.75777530670166, |
|
"eval_runtime": 209.9442, |
|
"eval_samples_per_second": 465.486, |
|
"eval_steps_per_second": 1.82, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 3.7271139572114685e-05, |
|
"loss": 2.1565, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"eval_loss": 2.757976770401001, |
|
"eval_runtime": 211.0178, |
|
"eval_samples_per_second": 463.117, |
|
"eval_steps_per_second": 1.81, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 3.6907291515063314e-05, |
|
"loss": 2.1558, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"eval_loss": 2.7561423778533936, |
|
"eval_runtime": 210.4961, |
|
"eval_samples_per_second": 464.265, |
|
"eval_steps_per_second": 1.815, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 3.6543443458011935e-05, |
|
"loss": 2.1642, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"eval_loss": 2.751215934753418, |
|
"eval_runtime": 210.4158, |
|
"eval_samples_per_second": 464.442, |
|
"eval_steps_per_second": 1.815, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 3.617959540096056e-05, |
|
"loss": 2.1374, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"eval_loss": 2.736060619354248, |
|
"eval_runtime": 214.104, |
|
"eval_samples_per_second": 456.442, |
|
"eval_steps_per_second": 1.784, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 3.5815747343909185e-05, |
|
"loss": 2.1402, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"eval_loss": 2.7384564876556396, |
|
"eval_runtime": 210.1981, |
|
"eval_samples_per_second": 464.923, |
|
"eval_steps_per_second": 1.817, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 3.545189928685781e-05, |
|
"loss": 2.1326, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"eval_loss": 2.723484516143799, |
|
"eval_runtime": 210.8719, |
|
"eval_samples_per_second": 463.438, |
|
"eval_steps_per_second": 1.812, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 3.5088051229806435e-05, |
|
"loss": 2.1272, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"eval_loss": 2.7183401584625244, |
|
"eval_runtime": 239.7397, |
|
"eval_samples_per_second": 407.634, |
|
"eval_steps_per_second": 1.593, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"learning_rate": 3.4724203172755057e-05, |
|
"loss": 2.0954, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"eval_loss": 2.7156314849853516, |
|
"eval_runtime": 211.1012, |
|
"eval_samples_per_second": 462.934, |
|
"eval_steps_per_second": 1.81, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"learning_rate": 3.4360355115703685e-05, |
|
"loss": 2.0842, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"eval_loss": 2.7065327167510986, |
|
"eval_runtime": 210.8515, |
|
"eval_samples_per_second": 463.483, |
|
"eval_steps_per_second": 1.812, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 3.399650705865231e-05, |
|
"loss": 2.0859, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"eval_loss": 2.7088747024536133, |
|
"eval_runtime": 215.2076, |
|
"eval_samples_per_second": 454.101, |
|
"eval_steps_per_second": 1.775, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 3.27, |
|
"learning_rate": 3.3632659001600935e-05, |
|
"loss": 2.0856, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 3.27, |
|
"eval_loss": 2.6962101459503174, |
|
"eval_runtime": 210.6214, |
|
"eval_samples_per_second": 463.989, |
|
"eval_steps_per_second": 1.814, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"learning_rate": 3.3268810944549556e-05, |
|
"loss": 2.0775, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"eval_loss": 2.693091630935669, |
|
"eval_runtime": 210.6882, |
|
"eval_samples_per_second": 463.842, |
|
"eval_steps_per_second": 1.813, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 3.42, |
|
"learning_rate": 3.2905690583612286e-05, |
|
"loss": 2.0821, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 3.42, |
|
"eval_loss": 2.693345069885254, |
|
"eval_runtime": 217.6654, |
|
"eval_samples_per_second": 448.973, |
|
"eval_steps_per_second": 1.755, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 3.49, |
|
"learning_rate": 3.2541842526560914e-05, |
|
"loss": 2.0706, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 3.49, |
|
"eval_loss": 2.70108699798584, |
|
"eval_runtime": 210.1763, |
|
"eval_samples_per_second": 464.971, |
|
"eval_steps_per_second": 1.818, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"learning_rate": 3.2177994469509535e-05, |
|
"loss": 2.0689, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"eval_loss": 2.7009191513061523, |
|
"eval_runtime": 207.9861, |
|
"eval_samples_per_second": 469.868, |
|
"eval_steps_per_second": 1.837, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"learning_rate": 3.181414641245816e-05, |
|
"loss": 2.0807, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"eval_loss": 2.682542324066162, |
|
"eval_runtime": 214.5749, |
|
"eval_samples_per_second": 455.44, |
|
"eval_steps_per_second": 1.78, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 3.71, |
|
"learning_rate": 3.1450298355406785e-05, |
|
"loss": 2.0639, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 3.71, |
|
"eval_loss": 2.674436330795288, |
|
"eval_runtime": 245.508, |
|
"eval_samples_per_second": 398.056, |
|
"eval_steps_per_second": 1.556, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 3.78, |
|
"learning_rate": 3.108645029835541e-05, |
|
"loss": 2.0742, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 3.78, |
|
"eval_loss": 2.677746295928955, |
|
"eval_runtime": 245.3374, |
|
"eval_samples_per_second": 398.333, |
|
"eval_steps_per_second": 1.557, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 3.86, |
|
"learning_rate": 3.0722602241304035e-05, |
|
"loss": 2.0789, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 3.86, |
|
"eval_loss": 2.6688921451568604, |
|
"eval_runtime": 246.4423, |
|
"eval_samples_per_second": 396.547, |
|
"eval_steps_per_second": 1.55, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"learning_rate": 3.0359481880366758e-05, |
|
"loss": 2.0594, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"eval_loss": 2.6566038131713867, |
|
"eval_runtime": 252.2995, |
|
"eval_samples_per_second": 387.341, |
|
"eval_steps_per_second": 1.514, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 2.9995633823315383e-05, |
|
"loss": 2.056, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 2.667599678039551, |
|
"eval_runtime": 245.2202, |
|
"eval_samples_per_second": 398.523, |
|
"eval_steps_per_second": 1.558, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"learning_rate": 2.9631785766264007e-05, |
|
"loss": 2.0223, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"eval_loss": 2.6711361408233643, |
|
"eval_runtime": 245.2433, |
|
"eval_samples_per_second": 398.486, |
|
"eval_steps_per_second": 1.558, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 4.15, |
|
"learning_rate": 2.9267937709212632e-05, |
|
"loss": 2.0185, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 4.15, |
|
"eval_loss": 2.65678071975708, |
|
"eval_runtime": 208.848, |
|
"eval_samples_per_second": 467.929, |
|
"eval_steps_per_second": 1.829, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 4.22, |
|
"learning_rate": 2.890408965216126e-05, |
|
"loss": 2.018, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 4.22, |
|
"eval_loss": 2.656717538833618, |
|
"eval_runtime": 209.2801, |
|
"eval_samples_per_second": 466.963, |
|
"eval_steps_per_second": 1.825, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 4.29, |
|
"learning_rate": 2.8540241595109886e-05, |
|
"loss": 2.0036, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 4.29, |
|
"eval_loss": 2.6545379161834717, |
|
"eval_runtime": 210.0273, |
|
"eval_samples_per_second": 465.301, |
|
"eval_steps_per_second": 1.819, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 4.37, |
|
"learning_rate": 2.817639353805851e-05, |
|
"loss": 2.0238, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 4.37, |
|
"eval_loss": 2.6558964252471924, |
|
"eval_runtime": 211.3486, |
|
"eval_samples_per_second": 462.392, |
|
"eval_steps_per_second": 1.807, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 4.44, |
|
"learning_rate": 2.781472856934944e-05, |
|
"loss": 2.0091, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 4.44, |
|
"eval_loss": 2.6450281143188477, |
|
"eval_runtime": 208.486, |
|
"eval_samples_per_second": 468.741, |
|
"eval_steps_per_second": 1.832, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 4.51, |
|
"learning_rate": 2.7450880512298066e-05, |
|
"loss": 2.0096, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 4.51, |
|
"eval_loss": 2.6388843059539795, |
|
"eval_runtime": 210.8413, |
|
"eval_samples_per_second": 463.505, |
|
"eval_steps_per_second": 1.812, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 4.58, |
|
"learning_rate": 2.708703245524669e-05, |
|
"loss": 2.0083, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 4.58, |
|
"eval_loss": 2.6401255130767822, |
|
"eval_runtime": 213.1597, |
|
"eval_samples_per_second": 458.464, |
|
"eval_steps_per_second": 1.792, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 4.66, |
|
"learning_rate": 2.6723184398195316e-05, |
|
"loss": 2.0012, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 4.66, |
|
"eval_loss": 2.639948844909668, |
|
"eval_runtime": 234.1271, |
|
"eval_samples_per_second": 417.406, |
|
"eval_steps_per_second": 1.632, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 4.73, |
|
"learning_rate": 2.635933634114394e-05, |
|
"loss": 2.0166, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 4.73, |
|
"eval_loss": 2.628899097442627, |
|
"eval_runtime": 242.1039, |
|
"eval_samples_per_second": 403.653, |
|
"eval_steps_per_second": 1.578, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"learning_rate": 2.5995488284092563e-05, |
|
"loss": 1.9963, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"eval_loss": 2.634817361831665, |
|
"eval_runtime": 275.7387, |
|
"eval_samples_per_second": 354.415, |
|
"eval_steps_per_second": 1.385, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 4.88, |
|
"learning_rate": 2.5631640227041188e-05, |
|
"loss": 1.9943, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 4.88, |
|
"eval_loss": 2.6239511966705322, |
|
"eval_runtime": 223.4038, |
|
"eval_samples_per_second": 437.441, |
|
"eval_steps_per_second": 1.71, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 4.95, |
|
"learning_rate": 2.5267792169989813e-05, |
|
"loss": 2.0099, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 4.95, |
|
"eval_loss": 2.618997812271118, |
|
"eval_runtime": 215.7939, |
|
"eval_samples_per_second": 452.867, |
|
"eval_steps_per_second": 1.77, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 5.02, |
|
"learning_rate": 2.4903944112938438e-05, |
|
"loss": 1.9895, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 5.02, |
|
"eval_loss": 2.630808115005493, |
|
"eval_runtime": 235.2775, |
|
"eval_samples_per_second": 415.365, |
|
"eval_steps_per_second": 1.624, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 5.09, |
|
"learning_rate": 2.4540096055887063e-05, |
|
"loss": 1.9581, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 5.09, |
|
"eval_loss": 2.638457775115967, |
|
"eval_runtime": 232.5729, |
|
"eval_samples_per_second": 420.195, |
|
"eval_steps_per_second": 1.642, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 5.17, |
|
"learning_rate": 2.4176247998835687e-05, |
|
"loss": 1.9502, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 5.17, |
|
"eval_loss": 2.6236515045166016, |
|
"eval_runtime": 233.0168, |
|
"eval_samples_per_second": 419.395, |
|
"eval_steps_per_second": 1.639, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 5.24, |
|
"learning_rate": 2.3812399941784312e-05, |
|
"loss": 1.9485, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 5.24, |
|
"eval_loss": 2.624785900115967, |
|
"eval_runtime": 246.6057, |
|
"eval_samples_per_second": 396.284, |
|
"eval_steps_per_second": 1.549, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 5.31, |
|
"learning_rate": 2.3448551884732937e-05, |
|
"loss": 1.9643, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 5.31, |
|
"eval_loss": 2.627931833267212, |
|
"eval_runtime": 212.8412, |
|
"eval_samples_per_second": 459.15, |
|
"eval_steps_per_second": 1.795, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 5.38, |
|
"learning_rate": 2.3084703827681562e-05, |
|
"loss": 1.9535, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 5.38, |
|
"eval_loss": 2.6185333728790283, |
|
"eval_runtime": 210.7296, |
|
"eval_samples_per_second": 463.751, |
|
"eval_steps_per_second": 1.813, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 5.46, |
|
"learning_rate": 2.2720855770630187e-05, |
|
"loss": 1.9575, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 5.46, |
|
"eval_loss": 2.614642381668091, |
|
"eval_runtime": 211.3001, |
|
"eval_samples_per_second": 462.499, |
|
"eval_steps_per_second": 1.808, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 5.53, |
|
"learning_rate": 2.235700771357881e-05, |
|
"loss": 1.9475, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 5.53, |
|
"eval_loss": 2.6092729568481445, |
|
"eval_runtime": 212.2513, |
|
"eval_samples_per_second": 460.426, |
|
"eval_steps_per_second": 1.8, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 5.6, |
|
"learning_rate": 2.1993159656527434e-05, |
|
"loss": 1.9434, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 5.6, |
|
"eval_loss": 2.60904598236084, |
|
"eval_runtime": 209.1736, |
|
"eval_samples_per_second": 467.2, |
|
"eval_steps_per_second": 1.826, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 5.68, |
|
"learning_rate": 2.1630039295590163e-05, |
|
"loss": 1.954, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 5.68, |
|
"eval_loss": 2.60274338722229, |
|
"eval_runtime": 223.8437, |
|
"eval_samples_per_second": 436.581, |
|
"eval_steps_per_second": 1.707, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 5.75, |
|
"learning_rate": 2.1266191238538788e-05, |
|
"loss": 1.9509, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 5.75, |
|
"eval_loss": 2.6107161045074463, |
|
"eval_runtime": 213.2878, |
|
"eval_samples_per_second": 458.188, |
|
"eval_steps_per_second": 1.791, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 5.82, |
|
"learning_rate": 2.0902343181487413e-05, |
|
"loss": 1.9454, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 5.82, |
|
"eval_loss": 2.59796142578125, |
|
"eval_runtime": 214.4371, |
|
"eval_samples_per_second": 455.733, |
|
"eval_steps_per_second": 1.781, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 5.89, |
|
"learning_rate": 2.053922282055014e-05, |
|
"loss": 1.9479, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 5.89, |
|
"eval_loss": 2.6016438007354736, |
|
"eval_runtime": 218.6403, |
|
"eval_samples_per_second": 446.972, |
|
"eval_steps_per_second": 1.747, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 5.97, |
|
"learning_rate": 2.0175374763498764e-05, |
|
"loss": 1.9539, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 5.97, |
|
"eval_loss": 2.5970652103424072, |
|
"eval_runtime": 214.0818, |
|
"eval_samples_per_second": 456.489, |
|
"eval_steps_per_second": 1.784, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 6.04, |
|
"learning_rate": 1.981152670644739e-05, |
|
"loss": 1.9119, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 6.04, |
|
"eval_loss": 2.622750759124756, |
|
"eval_runtime": 211.471, |
|
"eval_samples_per_second": 462.125, |
|
"eval_steps_per_second": 1.806, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 6.11, |
|
"learning_rate": 1.9447678649396013e-05, |
|
"loss": 1.8974, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 6.11, |
|
"eval_loss": 2.6169052124023438, |
|
"eval_runtime": 221.6976, |
|
"eval_samples_per_second": 440.808, |
|
"eval_steps_per_second": 1.723, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 6.19, |
|
"learning_rate": 1.908383059234464e-05, |
|
"loss": 1.9038, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 6.19, |
|
"eval_loss": 2.6027112007141113, |
|
"eval_runtime": 212.7185, |
|
"eval_samples_per_second": 459.415, |
|
"eval_steps_per_second": 1.796, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 6.26, |
|
"learning_rate": 1.8719982535293263e-05, |
|
"loss": 1.9008, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 6.26, |
|
"eval_loss": 2.602651357650757, |
|
"eval_runtime": 212.6929, |
|
"eval_samples_per_second": 459.47, |
|
"eval_steps_per_second": 1.796, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 6.33, |
|
"learning_rate": 1.8356134478241888e-05, |
|
"loss": 1.9142, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 6.33, |
|
"eval_loss": 2.6011383533477783, |
|
"eval_runtime": 217.9682, |
|
"eval_samples_per_second": 448.35, |
|
"eval_steps_per_second": 1.753, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"learning_rate": 1.7992286421190513e-05, |
|
"loss": 1.8783, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"eval_loss": 2.595999002456665, |
|
"eval_runtime": 216.5321, |
|
"eval_samples_per_second": 451.323, |
|
"eval_steps_per_second": 1.764, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 6.48, |
|
"learning_rate": 1.7628438364139135e-05, |
|
"loss": 1.8896, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 6.48, |
|
"eval_loss": 2.6111366748809814, |
|
"eval_runtime": 209.5809, |
|
"eval_samples_per_second": 466.293, |
|
"eval_steps_per_second": 1.823, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 6.55, |
|
"learning_rate": 1.7265318003201864e-05, |
|
"loss": 1.8975, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 6.55, |
|
"eval_loss": 2.588871955871582, |
|
"eval_runtime": 215.4187, |
|
"eval_samples_per_second": 453.656, |
|
"eval_steps_per_second": 1.773, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 6.62, |
|
"learning_rate": 1.690146994615049e-05, |
|
"loss": 1.9048, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 6.62, |
|
"eval_loss": 2.600691556930542, |
|
"eval_runtime": 225.7312, |
|
"eval_samples_per_second": 432.931, |
|
"eval_steps_per_second": 1.692, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 6.69, |
|
"learning_rate": 1.6537621889099114e-05, |
|
"loss": 1.9049, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 6.69, |
|
"eval_loss": 2.5971837043762207, |
|
"eval_runtime": 213.1258, |
|
"eval_samples_per_second": 458.537, |
|
"eval_steps_per_second": 1.792, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 6.77, |
|
"learning_rate": 1.6173773832047735e-05, |
|
"loss": 1.8969, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 6.77, |
|
"eval_loss": 2.605257987976074, |
|
"eval_runtime": 212.2604, |
|
"eval_samples_per_second": 460.406, |
|
"eval_steps_per_second": 1.8, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 6.84, |
|
"learning_rate": 1.580992577499636e-05, |
|
"loss": 1.9105, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 6.84, |
|
"eval_loss": 2.589334726333618, |
|
"eval_runtime": 211.6138, |
|
"eval_samples_per_second": 461.813, |
|
"eval_steps_per_second": 1.805, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 6.91, |
|
"learning_rate": 1.544680541405909e-05, |
|
"loss": 1.8921, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 6.91, |
|
"eval_loss": 2.5882816314697266, |
|
"eval_runtime": 211.2215, |
|
"eval_samples_per_second": 462.671, |
|
"eval_steps_per_second": 1.809, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 6.99, |
|
"learning_rate": 1.5083685053121819e-05, |
|
"loss": 1.8918, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 6.99, |
|
"eval_loss": 2.5792043209075928, |
|
"eval_runtime": 211.5529, |
|
"eval_samples_per_second": 461.946, |
|
"eval_steps_per_second": 1.806, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 7.06, |
|
"learning_rate": 1.471983699607044e-05, |
|
"loss": 1.8671, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 7.06, |
|
"eval_loss": 2.604069232940674, |
|
"eval_runtime": 212.2008, |
|
"eval_samples_per_second": 460.536, |
|
"eval_steps_per_second": 1.8, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 7.13, |
|
"learning_rate": 1.4355988939019067e-05, |
|
"loss": 1.8551, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 7.13, |
|
"eval_loss": 2.6070237159729004, |
|
"eval_runtime": 220.205, |
|
"eval_samples_per_second": 443.795, |
|
"eval_steps_per_second": 1.735, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 7.2, |
|
"learning_rate": 1.3992140881967692e-05, |
|
"loss": 1.8555, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 7.2, |
|
"eval_loss": 2.614821434020996, |
|
"eval_runtime": 263.5514, |
|
"eval_samples_per_second": 370.804, |
|
"eval_steps_per_second": 1.449, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 7.28, |
|
"learning_rate": 1.3628292824916317e-05, |
|
"loss": 1.8543, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 7.28, |
|
"eval_loss": 2.607656955718994, |
|
"eval_runtime": 264.5295, |
|
"eval_samples_per_second": 369.433, |
|
"eval_steps_per_second": 1.444, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 7.35, |
|
"learning_rate": 1.326444476786494e-05, |
|
"loss": 1.8485, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 7.35, |
|
"eval_loss": 2.613083839416504, |
|
"eval_runtime": 263.0661, |
|
"eval_samples_per_second": 371.488, |
|
"eval_steps_per_second": 1.452, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 7.42, |
|
"learning_rate": 1.2900596710813565e-05, |
|
"loss": 1.8474, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 7.42, |
|
"eval_loss": 2.603851079940796, |
|
"eval_runtime": 285.6161, |
|
"eval_samples_per_second": 342.159, |
|
"eval_steps_per_second": 1.337, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 7.5, |
|
"learning_rate": 1.253674865376219e-05, |
|
"loss": 1.8474, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 7.5, |
|
"eval_loss": 2.5973451137542725, |
|
"eval_runtime": 273.9949, |
|
"eval_samples_per_second": 356.671, |
|
"eval_steps_per_second": 1.394, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 7.57, |
|
"learning_rate": 1.2172900596710813e-05, |
|
"loss": 1.8442, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 7.57, |
|
"eval_loss": 2.5946028232574463, |
|
"eval_runtime": 244.7175, |
|
"eval_samples_per_second": 399.342, |
|
"eval_steps_per_second": 1.561, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 7.64, |
|
"learning_rate": 1.1809780235773542e-05, |
|
"loss": 1.8329, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 7.64, |
|
"eval_loss": 2.606858253479004, |
|
"eval_runtime": 275.6967, |
|
"eval_samples_per_second": 354.469, |
|
"eval_steps_per_second": 1.386, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 7.71, |
|
"learning_rate": 1.1445932178722165e-05, |
|
"loss": 1.8551, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 7.71, |
|
"eval_loss": 2.592348337173462, |
|
"eval_runtime": 253.7347, |
|
"eval_samples_per_second": 385.15, |
|
"eval_steps_per_second": 1.506, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 7.79, |
|
"learning_rate": 1.108208412167079e-05, |
|
"loss": 1.8433, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 7.79, |
|
"eval_loss": 2.592151641845703, |
|
"eval_runtime": 250.8033, |
|
"eval_samples_per_second": 389.652, |
|
"eval_steps_per_second": 1.523, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 7.86, |
|
"learning_rate": 1.0718236064619415e-05, |
|
"loss": 1.851, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 7.86, |
|
"eval_loss": 2.5993497371673584, |
|
"eval_runtime": 244.9443, |
|
"eval_samples_per_second": 398.972, |
|
"eval_steps_per_second": 1.56, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 7.93, |
|
"learning_rate": 1.035438800756804e-05, |
|
"loss": 1.8313, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 7.93, |
|
"eval_loss": 2.59601092338562, |
|
"eval_runtime": 247.9824, |
|
"eval_samples_per_second": 394.084, |
|
"eval_steps_per_second": 1.54, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 9.991267646630768e-06, |
|
"loss": 1.8298, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_loss": 2.6058406829833984, |
|
"eval_runtime": 248.1822, |
|
"eval_samples_per_second": 393.767, |
|
"eval_steps_per_second": 1.539, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 8.08, |
|
"learning_rate": 9.628147285693495e-06, |
|
"loss": 1.8159, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 8.08, |
|
"eval_loss": 2.6286239624023438, |
|
"eval_runtime": 249.0138, |
|
"eval_samples_per_second": 392.452, |
|
"eval_steps_per_second": 1.534, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 8.15, |
|
"learning_rate": 9.264299228642118e-06, |
|
"loss": 1.817, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 8.15, |
|
"eval_loss": 2.634847402572632, |
|
"eval_runtime": 257.4805, |
|
"eval_samples_per_second": 379.547, |
|
"eval_steps_per_second": 1.484, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 8.22, |
|
"learning_rate": 8.900451171590745e-06, |
|
"loss": 1.8066, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 8.22, |
|
"eval_loss": 2.6410584449768066, |
|
"eval_runtime": 307.9556, |
|
"eval_samples_per_second": 317.338, |
|
"eval_steps_per_second": 1.24, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 8.3, |
|
"learning_rate": 8.536603114539368e-06, |
|
"loss": 1.7935, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 8.3, |
|
"eval_loss": 2.633836269378662, |
|
"eval_runtime": 253.2423, |
|
"eval_samples_per_second": 385.899, |
|
"eval_steps_per_second": 1.508, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 8.37, |
|
"learning_rate": 8.172755057487993e-06, |
|
"loss": 1.809, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 8.37, |
|
"eval_loss": 2.629018783569336, |
|
"eval_runtime": 250.9739, |
|
"eval_samples_per_second": 389.387, |
|
"eval_steps_per_second": 1.522, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 8.44, |
|
"learning_rate": 7.808907000436618e-06, |
|
"loss": 1.812, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 8.44, |
|
"eval_loss": 2.6257762908935547, |
|
"eval_runtime": 253.0757, |
|
"eval_samples_per_second": 386.153, |
|
"eval_steps_per_second": 1.509, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 8.51, |
|
"learning_rate": 7.445058943385242e-06, |
|
"loss": 1.79, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 8.51, |
|
"eval_loss": 2.6320676803588867, |
|
"eval_runtime": 250.9004, |
|
"eval_samples_per_second": 389.501, |
|
"eval_steps_per_second": 1.523, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 8.59, |
|
"learning_rate": 7.0812108863338665e-06, |
|
"loss": 1.8046, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 8.59, |
|
"eval_loss": 2.6290555000305176, |
|
"eval_runtime": 248.8573, |
|
"eval_samples_per_second": 392.699, |
|
"eval_steps_per_second": 1.535, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 8.66, |
|
"learning_rate": 6.718090525396595e-06, |
|
"loss": 1.7975, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 8.66, |
|
"eval_loss": 2.6282989978790283, |
|
"eval_runtime": 245.3595, |
|
"eval_samples_per_second": 398.297, |
|
"eval_steps_per_second": 1.557, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 8.73, |
|
"learning_rate": 6.354970164459321e-06, |
|
"loss": 1.7968, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 8.73, |
|
"eval_loss": 2.628397226333618, |
|
"eval_runtime": 253.0259, |
|
"eval_samples_per_second": 386.229, |
|
"eval_steps_per_second": 1.51, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 8.81, |
|
"learning_rate": 5.991122107407947e-06, |
|
"loss": 1.7779, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 8.81, |
|
"eval_loss": 2.625650405883789, |
|
"eval_runtime": 247.6056, |
|
"eval_samples_per_second": 394.684, |
|
"eval_steps_per_second": 1.543, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 8.88, |
|
"learning_rate": 5.627274050356571e-06, |
|
"loss": 1.7664, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 8.88, |
|
"eval_loss": 2.623215675354004, |
|
"eval_runtime": 268.5188, |
|
"eval_samples_per_second": 363.945, |
|
"eval_steps_per_second": 1.423, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 8.95, |
|
"learning_rate": 5.263425993305196e-06, |
|
"loss": 1.792, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 8.95, |
|
"eval_loss": 2.63053297996521, |
|
"eval_runtime": 248.0445, |
|
"eval_samples_per_second": 393.986, |
|
"eval_steps_per_second": 1.54, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 9.02, |
|
"learning_rate": 4.89957793625382e-06, |
|
"loss": 1.7725, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 9.02, |
|
"eval_loss": 2.6525118350982666, |
|
"eval_runtime": 247.425, |
|
"eval_samples_per_second": 394.972, |
|
"eval_steps_per_second": 1.544, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 9.1, |
|
"learning_rate": 4.535729879202445e-06, |
|
"loss": 1.7563, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 9.1, |
|
"eval_loss": 2.679419755935669, |
|
"eval_runtime": 249.9688, |
|
"eval_samples_per_second": 390.953, |
|
"eval_steps_per_second": 1.528, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 9.17, |
|
"learning_rate": 4.17188182215107e-06, |
|
"loss": 1.7606, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 9.17, |
|
"eval_loss": 2.6783671379089355, |
|
"eval_runtime": 248.5877, |
|
"eval_samples_per_second": 393.125, |
|
"eval_steps_per_second": 1.537, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 9.24, |
|
"learning_rate": 3.8080337650996943e-06, |
|
"loss": 1.7666, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 9.24, |
|
"eval_loss": 2.679766893386841, |
|
"eval_runtime": 253.2315, |
|
"eval_samples_per_second": 385.916, |
|
"eval_steps_per_second": 1.509, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 9.31, |
|
"learning_rate": 3.4449134041624217e-06, |
|
"loss": 1.7551, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 9.31, |
|
"eval_loss": 2.6813337802886963, |
|
"eval_runtime": 248.5037, |
|
"eval_samples_per_second": 393.258, |
|
"eval_steps_per_second": 1.537, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 9.39, |
|
"learning_rate": 3.0810653471110467e-06, |
|
"loss": 1.7578, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 9.39, |
|
"eval_loss": 2.683032751083374, |
|
"eval_runtime": 245.3373, |
|
"eval_samples_per_second": 398.333, |
|
"eval_steps_per_second": 1.557, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 9.46, |
|
"learning_rate": 2.717217290059671e-06, |
|
"loss": 1.7483, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 9.46, |
|
"eval_loss": 2.6832828521728516, |
|
"eval_runtime": 266.4744, |
|
"eval_samples_per_second": 366.737, |
|
"eval_steps_per_second": 1.434, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 9.53, |
|
"learning_rate": 2.3533692330082957e-06, |
|
"loss": 1.7431, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 9.53, |
|
"eval_loss": 2.6883933544158936, |
|
"eval_runtime": 256.629, |
|
"eval_samples_per_second": 380.807, |
|
"eval_steps_per_second": 1.489, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 9.61, |
|
"learning_rate": 1.9895211759569207e-06, |
|
"loss": 1.743, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 9.61, |
|
"eval_loss": 2.6931965351104736, |
|
"eval_runtime": 260.7406, |
|
"eval_samples_per_second": 374.802, |
|
"eval_steps_per_second": 1.465, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 9.68, |
|
"learning_rate": 1.6264008150196477e-06, |
|
"loss": 1.7395, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 9.68, |
|
"eval_loss": 2.6927101612091064, |
|
"eval_runtime": 254.3781, |
|
"eval_samples_per_second": 384.176, |
|
"eval_steps_per_second": 1.502, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 9.75, |
|
"learning_rate": 1.2625527579682726e-06, |
|
"loss": 1.7473, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 9.75, |
|
"eval_loss": 2.6903834342956543, |
|
"eval_runtime": 221.4662, |
|
"eval_samples_per_second": 441.268, |
|
"eval_steps_per_second": 1.725, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 9.82, |
|
"learning_rate": 8.987047009168971e-07, |
|
"loss": 1.7413, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 9.82, |
|
"eval_loss": 2.6892080307006836, |
|
"eval_runtime": 233.1215, |
|
"eval_samples_per_second": 419.206, |
|
"eval_steps_per_second": 1.639, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 9.9, |
|
"learning_rate": 5.348566438655218e-07, |
|
"loss": 1.7437, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 9.9, |
|
"eval_loss": 2.6897966861724854, |
|
"eval_runtime": 217.2083, |
|
"eval_samples_per_second": 449.918, |
|
"eval_steps_per_second": 1.759, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 9.97, |
|
"learning_rate": 1.7173628292824918e-07, |
|
"loss": 1.7546, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 9.97, |
|
"eval_loss": 2.689425468444824, |
|
"eval_runtime": 214.7197, |
|
"eval_samples_per_second": 455.133, |
|
"eval_steps_per_second": 1.779, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 68710, |
|
"total_flos": 1.1742684555264e+18, |
|
"train_loss": 2.026226943438967, |
|
"train_runtime": 84552.1773, |
|
"train_samples_per_second": 104.022, |
|
"train_steps_per_second": 0.813 |
|
} |
|
], |
|
"max_steps": 68710, |
|
"num_train_epochs": 10, |
|
"total_flos": 1.1742684555264e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|