|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.2130013831258646, |
|
"eval_steps": 50, |
|
"global_step": 2700, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.04098150709492342, |
|
"grad_norm": 0.5333279371261597, |
|
"learning_rate": 3.663003663003663e-06, |
|
"loss": 2.2117, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.04098150709492342, |
|
"eval_loss": 2.7143771648406982, |
|
"eval_runtime": 282.6413, |
|
"eval_samples_per_second": 0.768, |
|
"eval_steps_per_second": 0.099, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.08196301418984683, |
|
"grad_norm": 0.42295441031455994, |
|
"learning_rate": 7.326007326007326e-06, |
|
"loss": 2.1609, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.08196301418984683, |
|
"eval_loss": 2.6825826168060303, |
|
"eval_runtime": 283.0578, |
|
"eval_samples_per_second": 0.767, |
|
"eval_steps_per_second": 0.099, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.12294452128477025, |
|
"grad_norm": 0.277256041765213, |
|
"learning_rate": 1.098901098901099e-05, |
|
"loss": 2.0212, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.12294452128477025, |
|
"eval_loss": 2.602968215942383, |
|
"eval_runtime": 282.5654, |
|
"eval_samples_per_second": 0.768, |
|
"eval_steps_per_second": 0.099, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.16392602837969367, |
|
"grad_norm": 0.2273157685995102, |
|
"learning_rate": 1.4652014652014653e-05, |
|
"loss": 1.8938, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.16392602837969367, |
|
"eval_loss": 2.506340980529785, |
|
"eval_runtime": 283.8897, |
|
"eval_samples_per_second": 0.764, |
|
"eval_steps_per_second": 0.099, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.20490753547461707, |
|
"grad_norm": 0.21983110904693604, |
|
"learning_rate": 1.8315018315018315e-05, |
|
"loss": 1.7918, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.20490753547461707, |
|
"eval_loss": 2.4188530445098877, |
|
"eval_runtime": 283.2047, |
|
"eval_samples_per_second": 0.766, |
|
"eval_steps_per_second": 0.099, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.2458890425695405, |
|
"grad_norm": 0.20148395001888275, |
|
"learning_rate": 1.999406558079547e-05, |
|
"loss": 1.7115, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.2458890425695405, |
|
"eval_loss": 2.3436834812164307, |
|
"eval_runtime": 282.2728, |
|
"eval_samples_per_second": 0.769, |
|
"eval_steps_per_second": 0.099, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.2868705496644639, |
|
"grad_norm": 0.20601870119571686, |
|
"learning_rate": 1.9951769064396967e-05, |
|
"loss": 1.6595, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.2868705496644639, |
|
"eval_loss": 2.3192338943481445, |
|
"eval_runtime": 282.1213, |
|
"eval_samples_per_second": 0.769, |
|
"eval_steps_per_second": 0.099, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.32785205675938733, |
|
"grad_norm": 0.19198212027549744, |
|
"learning_rate": 1.986897612915546e-05, |
|
"loss": 1.6081, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.32785205675938733, |
|
"eval_loss": 2.3058478832244873, |
|
"eval_runtime": 282.7227, |
|
"eval_samples_per_second": 0.768, |
|
"eval_steps_per_second": 0.099, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.36883356385431076, |
|
"grad_norm": 0.19531460106372833, |
|
"learning_rate": 1.9746023681741606e-05, |
|
"loss": 1.6127, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.36883356385431076, |
|
"eval_loss": 2.2954719066619873, |
|
"eval_runtime": 283.3445, |
|
"eval_samples_per_second": 0.766, |
|
"eval_steps_per_second": 0.099, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.40981507094923414, |
|
"grad_norm": 0.2188873440027237, |
|
"learning_rate": 1.9583412048657773e-05, |
|
"loss": 1.5999, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.40981507094923414, |
|
"eval_loss": 2.2856028079986572, |
|
"eval_runtime": 282.6955, |
|
"eval_samples_per_second": 0.768, |
|
"eval_steps_per_second": 0.099, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.45079657804415757, |
|
"grad_norm": 0.2119743824005127, |
|
"learning_rate": 1.9381802940275198e-05, |
|
"loss": 1.6074, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.45079657804415757, |
|
"eval_loss": 2.2773895263671875, |
|
"eval_runtime": 283.317, |
|
"eval_samples_per_second": 0.766, |
|
"eval_steps_per_second": 0.099, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.491778085139081, |
|
"grad_norm": 0.21725280582904816, |
|
"learning_rate": 1.914201675815694e-05, |
|
"loss": 1.5989, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.491778085139081, |
|
"eval_loss": 2.2710676193237305, |
|
"eval_runtime": 283.5744, |
|
"eval_samples_per_second": 0.765, |
|
"eval_steps_per_second": 0.099, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.5327595922340044, |
|
"grad_norm": 0.22087362408638, |
|
"learning_rate": 1.8865029256623765e-05, |
|
"loss": 1.5708, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.5327595922340044, |
|
"eval_loss": 2.2653110027313232, |
|
"eval_runtime": 282.6403, |
|
"eval_samples_per_second": 0.768, |
|
"eval_steps_per_second": 0.099, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.5737410993289278, |
|
"grad_norm": 0.24597273766994476, |
|
"learning_rate": 1.855196757214796e-05, |
|
"loss": 1.5981, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.5737410993289278, |
|
"eval_loss": 2.2589895725250244, |
|
"eval_runtime": 282.6053, |
|
"eval_samples_per_second": 0.768, |
|
"eval_steps_per_second": 0.099, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.6147226064238512, |
|
"grad_norm": 0.20500172674655914, |
|
"learning_rate": 1.8204105636732604e-05, |
|
"loss": 1.5859, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.6147226064238512, |
|
"eval_loss": 2.2537271976470947, |
|
"eval_runtime": 282.8109, |
|
"eval_samples_per_second": 0.767, |
|
"eval_steps_per_second": 0.099, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.6557041135187747, |
|
"grad_norm": 0.24009792506694794, |
|
"learning_rate": 1.782285899394034e-05, |
|
"loss": 1.5765, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.6557041135187747, |
|
"eval_loss": 2.250108242034912, |
|
"eval_runtime": 282.295, |
|
"eval_samples_per_second": 0.769, |
|
"eval_steps_per_second": 0.099, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.6966856206136981, |
|
"grad_norm": 0.23266170918941498, |
|
"learning_rate": 1.74097790386668e-05, |
|
"loss": 1.5676, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.6966856206136981, |
|
"eval_loss": 2.2453207969665527, |
|
"eval_runtime": 281.9055, |
|
"eval_samples_per_second": 0.77, |
|
"eval_steps_per_second": 0.099, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.7376671277086215, |
|
"grad_norm": 0.2351984679698944, |
|
"learning_rate": 1.6966546704098455e-05, |
|
"loss": 1.5688, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.7376671277086215, |
|
"eval_loss": 2.2418150901794434, |
|
"eval_runtime": 281.9821, |
|
"eval_samples_per_second": 0.77, |
|
"eval_steps_per_second": 0.099, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.7786486348035448, |
|
"grad_norm": 0.237622931599617, |
|
"learning_rate": 1.6494965621544403e-05, |
|
"loss": 1.5643, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.7786486348035448, |
|
"eval_loss": 2.2379164695739746, |
|
"eval_runtime": 282.2084, |
|
"eval_samples_per_second": 0.769, |
|
"eval_steps_per_second": 0.099, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.8196301418984683, |
|
"grad_norm": 0.24924997985363007, |
|
"learning_rate": 1.5996954780976568e-05, |
|
"loss": 1.5346, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.8196301418984683, |
|
"eval_loss": 2.2346854209899902, |
|
"eval_runtime": 283.4225, |
|
"eval_samples_per_second": 0.766, |
|
"eval_steps_per_second": 0.099, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.8606116489933917, |
|
"grad_norm": 0.24776747822761536, |
|
"learning_rate": 1.547454072214457e-05, |
|
"loss": 1.5507, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.8606116489933917, |
|
"eval_loss": 2.2320806980133057, |
|
"eval_runtime": 282.9227, |
|
"eval_samples_per_second": 0.767, |
|
"eval_steps_per_second": 0.099, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.9015931560883151, |
|
"grad_norm": 0.2700960040092468, |
|
"learning_rate": 1.4929849288041656e-05, |
|
"loss": 1.5582, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.9015931560883151, |
|
"eval_loss": 2.2285408973693848, |
|
"eval_runtime": 283.0683, |
|
"eval_samples_per_second": 0.767, |
|
"eval_steps_per_second": 0.099, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.9425746631832386, |
|
"grad_norm": 0.26280835270881653, |
|
"learning_rate": 1.4365096974279093e-05, |
|
"loss": 1.5275, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.9425746631832386, |
|
"eval_loss": 2.2259719371795654, |
|
"eval_runtime": 282.6889, |
|
"eval_samples_per_second": 0.768, |
|
"eval_steps_per_second": 0.099, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.983556170278162, |
|
"grad_norm": 0.2555929124355316, |
|
"learning_rate": 1.3782581909570757e-05, |
|
"loss": 1.523, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.983556170278162, |
|
"eval_loss": 2.2228543758392334, |
|
"eval_runtime": 284.1011, |
|
"eval_samples_per_second": 0.764, |
|
"eval_steps_per_second": 0.099, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.0245376773730854, |
|
"grad_norm": 0.27351436018943787, |
|
"learning_rate": 1.3184674504030679e-05, |
|
"loss": 1.5354, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 1.0245376773730854, |
|
"eval_loss": 2.2217090129852295, |
|
"eval_runtime": 284.3948, |
|
"eval_samples_per_second": 0.763, |
|
"eval_steps_per_second": 0.098, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 1.0655191844680088, |
|
"grad_norm": 0.2628322243690491, |
|
"learning_rate": 1.2573807803338216e-05, |
|
"loss": 1.5386, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.0655191844680088, |
|
"eval_loss": 2.2189579010009766, |
|
"eval_runtime": 282.9448, |
|
"eval_samples_per_second": 0.767, |
|
"eval_steps_per_second": 0.099, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.1065006915629323, |
|
"grad_norm": 0.27614346146583557, |
|
"learning_rate": 1.1952467588022282e-05, |
|
"loss": 1.5338, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 1.1065006915629323, |
|
"eval_loss": 2.21720814704895, |
|
"eval_runtime": 282.859, |
|
"eval_samples_per_second": 0.767, |
|
"eval_steps_per_second": 0.099, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 1.1474821986578556, |
|
"grad_norm": 0.27579498291015625, |
|
"learning_rate": 1.1323182258153314e-05, |
|
"loss": 1.5292, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.1474821986578556, |
|
"eval_loss": 2.2148287296295166, |
|
"eval_runtime": 283.7073, |
|
"eval_samples_per_second": 0.765, |
|
"eval_steps_per_second": 0.099, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.1884637057527792, |
|
"grad_norm": 0.2812260687351227, |
|
"learning_rate": 1.0688512544604915e-05, |
|
"loss": 1.5376, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 1.1884637057527792, |
|
"eval_loss": 2.2129642963409424, |
|
"eval_runtime": 281.4833, |
|
"eval_samples_per_second": 0.771, |
|
"eval_steps_per_second": 0.099, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 1.2294452128477025, |
|
"grad_norm": 0.28713124990463257, |
|
"learning_rate": 1.005104108875275e-05, |
|
"loss": 1.5273, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.2294452128477025, |
|
"eval_loss": 2.2116787433624268, |
|
"eval_runtime": 283.0969, |
|
"eval_samples_per_second": 0.767, |
|
"eval_steps_per_second": 0.099, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.270426719942626, |
|
"grad_norm": 0.27820634841918945, |
|
"learning_rate": 9.41336193301377e-06, |
|
"loss": 1.526, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 1.270426719942626, |
|
"eval_loss": 2.2110674381256104, |
|
"eval_runtime": 281.8783, |
|
"eval_samples_per_second": 0.77, |
|
"eval_steps_per_second": 0.099, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 1.3114082270375493, |
|
"grad_norm": 0.26735347509384155, |
|
"learning_rate": 8.778069964991484e-06, |
|
"loss": 1.537, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.3114082270375493, |
|
"eval_loss": 2.2088816165924072, |
|
"eval_runtime": 281.9837, |
|
"eval_samples_per_second": 0.77, |
|
"eval_steps_per_second": 0.099, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.3523897341324727, |
|
"grad_norm": 0.2734984755516052, |
|
"learning_rate": 8.147750358182e-06, |
|
"loss": 1.5431, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 1.3523897341324727, |
|
"eval_loss": 2.207979917526245, |
|
"eval_runtime": 282.6878, |
|
"eval_samples_per_second": 0.768, |
|
"eval_steps_per_second": 0.099, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 1.3933712412273962, |
|
"grad_norm": 0.3110925257205963, |
|
"learning_rate": 7.524968052209331e-06, |
|
"loss": 1.5401, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.3933712412273962, |
|
"eval_loss": 2.2074739933013916, |
|
"eval_runtime": 282.1594, |
|
"eval_samples_per_second": 0.769, |
|
"eval_steps_per_second": 0.099, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.4343527483223195, |
|
"grad_norm": 0.3202177882194519, |
|
"learning_rate": 6.912257315397784e-06, |
|
"loss": 1.5331, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 1.4343527483223195, |
|
"eval_loss": 2.205904245376587, |
|
"eval_runtime": 280.7829, |
|
"eval_samples_per_second": 0.773, |
|
"eval_steps_per_second": 0.1, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 1.475334255417243, |
|
"grad_norm": 0.28285861015319824, |
|
"learning_rate": 6.312111432154074e-06, |
|
"loss": 1.5395, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.475334255417243, |
|
"eval_loss": 2.2046620845794678, |
|
"eval_runtime": 282.5413, |
|
"eval_samples_per_second": 0.768, |
|
"eval_steps_per_second": 0.099, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.5163157625121664, |
|
"grad_norm": 0.2836764454841614, |
|
"learning_rate": 5.726972557124022e-06, |
|
"loss": 1.542, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 1.5163157625121664, |
|
"eval_loss": 2.203850507736206, |
|
"eval_runtime": 283.3938, |
|
"eval_samples_per_second": 0.766, |
|
"eval_steps_per_second": 0.099, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 1.5572972696070897, |
|
"grad_norm": 0.2900823652744293, |
|
"learning_rate": 5.159221777409953e-06, |
|
"loss": 1.502, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.5572972696070897, |
|
"eval_loss": 2.2032361030578613, |
|
"eval_runtime": 281.099, |
|
"eval_samples_per_second": 0.772, |
|
"eval_steps_per_second": 0.1, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.5982787767020132, |
|
"grad_norm": 0.282176673412323, |
|
"learning_rate": 4.611169423288323e-06, |
|
"loss": 1.5267, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 1.5982787767020132, |
|
"eval_loss": 2.202617883682251, |
|
"eval_runtime": 283.2145, |
|
"eval_samples_per_second": 0.766, |
|
"eval_steps_per_second": 0.099, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 1.6392602837969368, |
|
"grad_norm": 0.29563507437705994, |
|
"learning_rate": 4.085045666855846e-06, |
|
"loss": 1.51, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.6392602837969368, |
|
"eval_loss": 2.2023086547851562, |
|
"eval_runtime": 282.4408, |
|
"eval_samples_per_second": 0.768, |
|
"eval_steps_per_second": 0.099, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.68024179089186, |
|
"grad_norm": 0.2923285663127899, |
|
"learning_rate": 3.5829914468607874e-06, |
|
"loss": 1.5319, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 1.68024179089186, |
|
"eval_loss": 2.2018585205078125, |
|
"eval_runtime": 281.9707, |
|
"eval_samples_per_second": 0.77, |
|
"eval_steps_per_second": 0.099, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 1.7212232979867834, |
|
"grad_norm": 0.2978394031524658, |
|
"learning_rate": 3.1070497566486825e-06, |
|
"loss": 1.5267, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.7212232979867834, |
|
"eval_loss": 2.2010927200317383, |
|
"eval_runtime": 282.5046, |
|
"eval_samples_per_second": 0.768, |
|
"eval_steps_per_second": 0.099, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.7622048050817067, |
|
"grad_norm": 0.2987017035484314, |
|
"learning_rate": 2.6591573306741704e-06, |
|
"loss": 1.5201, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 1.7622048050817067, |
|
"eval_loss": 2.200831174850464, |
|
"eval_runtime": 283.0038, |
|
"eval_samples_per_second": 0.767, |
|
"eval_steps_per_second": 0.099, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 1.8031863121766303, |
|
"grad_norm": 0.28967171907424927, |
|
"learning_rate": 2.241136763408801e-06, |
|
"loss": 1.5204, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.8031863121766303, |
|
"eval_loss": 2.2002053260803223, |
|
"eval_runtime": 281.6629, |
|
"eval_samples_per_second": 0.77, |
|
"eval_steps_per_second": 0.099, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.8441678192715538, |
|
"grad_norm": 0.321614146232605, |
|
"learning_rate": 1.8546890927150273e-06, |
|
"loss": 1.5094, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 1.8441678192715538, |
|
"eval_loss": 2.2001166343688965, |
|
"eval_runtime": 282.843, |
|
"eval_samples_per_second": 0.767, |
|
"eval_steps_per_second": 0.099, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 1.8851493263664771, |
|
"grad_norm": 0.2887614965438843, |
|
"learning_rate": 1.501386877866694e-06, |
|
"loss": 1.5268, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.8851493263664771, |
|
"eval_loss": 2.199751615524292, |
|
"eval_runtime": 282.0209, |
|
"eval_samples_per_second": 0.769, |
|
"eval_steps_per_second": 0.099, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.9261308334614005, |
|
"grad_norm": 0.30499428510665894, |
|
"learning_rate": 1.1826678003833402e-06, |
|
"loss": 1.513, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 1.9261308334614005, |
|
"eval_loss": 2.1995420455932617, |
|
"eval_runtime": 283.2079, |
|
"eval_samples_per_second": 0.766, |
|
"eval_steps_per_second": 0.099, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 1.967112340556324, |
|
"grad_norm": 0.29722264409065247, |
|
"learning_rate": 8.998288137183209e-07, |
|
"loss": 1.5263, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.967112340556324, |
|
"eval_loss": 2.199302911758423, |
|
"eval_runtime": 281.4307, |
|
"eval_samples_per_second": 0.771, |
|
"eval_steps_per_second": 0.099, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 2.0080938476512475, |
|
"grad_norm": 0.30117130279541016, |
|
"learning_rate": 6.540208656071601e-07, |
|
"loss": 1.5291, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 2.0080938476512475, |
|
"eval_loss": 2.199272394180298, |
|
"eval_runtime": 281.4783, |
|
"eval_samples_per_second": 0.771, |
|
"eval_steps_per_second": 0.099, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 2.049075354746171, |
|
"grad_norm": 0.29823198914527893, |
|
"learning_rate": 4.4624421455236156e-07, |
|
"loss": 1.5187, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 2.049075354746171, |
|
"eval_loss": 2.1992440223693848, |
|
"eval_runtime": 280.9197, |
|
"eval_samples_per_second": 0.772, |
|
"eval_steps_per_second": 0.1, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 2.090056861841094, |
|
"grad_norm": 0.3053443729877472, |
|
"learning_rate": 2.7734435950315663e-07, |
|
"loss": 1.5324, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 2.090056861841094, |
|
"eval_loss": 2.1991806030273438, |
|
"eval_runtime": 281.8919, |
|
"eval_samples_per_second": 0.77, |
|
"eval_steps_per_second": 0.099, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 2.1310383689360175, |
|
"grad_norm": 0.31491023302078247, |
|
"learning_rate": 1.4800859929338218e-07, |
|
"loss": 1.5314, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 2.1310383689360175, |
|
"eval_loss": 2.199115037918091, |
|
"eval_runtime": 282.019, |
|
"eval_samples_per_second": 0.769, |
|
"eval_steps_per_second": 0.099, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 2.172019876030941, |
|
"grad_norm": 0.300465852022171, |
|
"learning_rate": 5.876323583810184e-08, |
|
"loss": 1.5357, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 2.172019876030941, |
|
"eval_loss": 2.19909930229187, |
|
"eval_runtime": 281.0522, |
|
"eval_samples_per_second": 0.772, |
|
"eval_steps_per_second": 0.1, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 2.2130013831258646, |
|
"grad_norm": 0.3176944851875305, |
|
"learning_rate": 9.971432469871866e-09, |
|
"loss": 1.507, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 2.2130013831258646, |
|
"eval_loss": 2.1991000175476074, |
|
"eval_runtime": 281.6411, |
|
"eval_samples_per_second": 0.77, |
|
"eval_steps_per_second": 0.099, |
|
"step": 2700 |
|
} |
|
], |
|
"logging_steps": 50, |
|
"max_steps": 2735, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 50, |
|
"total_flos": 1.100581152473088e+18, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|