|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 300.0, |
|
"global_step": 11700, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 12.82, |
|
"learning_rate": 1.24e-05, |
|
"loss": 4.0279, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 12.82, |
|
"eval_bleu": 49.841, |
|
"eval_em": 0.0, |
|
"eval_gen_len": 51.6403, |
|
"eval_loss": 2.4031472206115723, |
|
"eval_rm": 0.0, |
|
"eval_runtime": 118.4236, |
|
"eval_samples_per_second": 3.521, |
|
"eval_steps_per_second": 0.448, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 25.64, |
|
"learning_rate": 2.4900000000000002e-05, |
|
"loss": 1.3442, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 25.64, |
|
"eval_bleu": 85.0177, |
|
"eval_em": 0.0, |
|
"eval_gen_len": 57.9784, |
|
"eval_loss": 0.501366138458252, |
|
"eval_rm": 0.0, |
|
"eval_runtime": 163.8536, |
|
"eval_samples_per_second": 2.545, |
|
"eval_steps_per_second": 0.323, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 38.46, |
|
"learning_rate": 3.74e-05, |
|
"loss": 0.2522, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 38.46, |
|
"eval_bleu": 94.0714, |
|
"eval_em": 0.0168, |
|
"eval_gen_len": 57.9137, |
|
"eval_loss": 0.3293180763721466, |
|
"eval_rm": 0.0216, |
|
"eval_runtime": 113.0226, |
|
"eval_samples_per_second": 3.69, |
|
"eval_steps_per_second": 0.469, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 51.28, |
|
"learning_rate": 4.99e-05, |
|
"loss": 0.1534, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 51.28, |
|
"eval_bleu": 94.4328, |
|
"eval_em": 0.0024, |
|
"eval_gen_len": 58.9448, |
|
"eval_loss": 0.320736825466156, |
|
"eval_rm": 0.0072, |
|
"eval_runtime": 116.3746, |
|
"eval_samples_per_second": 3.583, |
|
"eval_steps_per_second": 0.455, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 64.1, |
|
"learning_rate": 4.744329896907217e-05, |
|
"loss": 0.1305, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 64.1, |
|
"eval_bleu": 94.0708, |
|
"eval_em": 0.0, |
|
"eval_gen_len": 59.6115, |
|
"eval_loss": 0.3247060477733612, |
|
"eval_rm": 0.0, |
|
"eval_runtime": 117.0173, |
|
"eval_samples_per_second": 3.564, |
|
"eval_steps_per_second": 0.453, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 76.92, |
|
"learning_rate": 4.48659793814433e-05, |
|
"loss": 0.1226, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 76.92, |
|
"eval_bleu": 94.3143, |
|
"eval_em": 0.0024, |
|
"eval_gen_len": 58.235, |
|
"eval_loss": 0.33251264691352844, |
|
"eval_rm": 0.0024, |
|
"eval_runtime": 119.1624, |
|
"eval_samples_per_second": 3.499, |
|
"eval_steps_per_second": 0.445, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 89.74, |
|
"learning_rate": 4.228865979381443e-05, |
|
"loss": 0.1131, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 89.74, |
|
"eval_bleu": 94.5678, |
|
"eval_em": 0.0048, |
|
"eval_gen_len": 59.6811, |
|
"eval_loss": 0.3400600850582123, |
|
"eval_rm": 0.0144, |
|
"eval_runtime": 116.7251, |
|
"eval_samples_per_second": 3.572, |
|
"eval_steps_per_second": 0.454, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 102.56, |
|
"learning_rate": 3.971134020618557e-05, |
|
"loss": 0.1053, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 102.56, |
|
"eval_bleu": 94.4738, |
|
"eval_em": 0.0168, |
|
"eval_gen_len": 59.0288, |
|
"eval_loss": 0.3373829424381256, |
|
"eval_rm": 0.0552, |
|
"eval_runtime": 118.4954, |
|
"eval_samples_per_second": 3.519, |
|
"eval_steps_per_second": 0.447, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 115.38, |
|
"learning_rate": 3.71340206185567e-05, |
|
"loss": 0.0999, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 115.38, |
|
"eval_bleu": 94.6291, |
|
"eval_em": 0.0336, |
|
"eval_gen_len": 58.6283, |
|
"eval_loss": 0.3437003791332245, |
|
"eval_rm": 0.0624, |
|
"eval_runtime": 119.5949, |
|
"eval_samples_per_second": 3.487, |
|
"eval_steps_per_second": 0.443, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 128.21, |
|
"learning_rate": 3.455670103092783e-05, |
|
"loss": 0.0941, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 128.21, |
|
"eval_bleu": 94.7896, |
|
"eval_em": 0.0695, |
|
"eval_gen_len": 58.4149, |
|
"eval_loss": 0.351246178150177, |
|
"eval_rm": 0.1271, |
|
"eval_runtime": 121.3634, |
|
"eval_samples_per_second": 3.436, |
|
"eval_steps_per_second": 0.437, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 141.03, |
|
"learning_rate": 3.197938144329897e-05, |
|
"loss": 0.0904, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 141.03, |
|
"eval_bleu": 94.4101, |
|
"eval_em": 0.0719, |
|
"eval_gen_len": 58.2518, |
|
"eval_loss": 0.34235823154449463, |
|
"eval_rm": 0.1439, |
|
"eval_runtime": 118.818, |
|
"eval_samples_per_second": 3.51, |
|
"eval_steps_per_second": 0.446, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 153.85, |
|
"learning_rate": 2.9402061855670106e-05, |
|
"loss": 0.0833, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 153.85, |
|
"eval_bleu": 94.7141, |
|
"eval_em": 0.0887, |
|
"eval_gen_len": 59.0312, |
|
"eval_loss": 0.3461511433124542, |
|
"eval_rm": 0.1775, |
|
"eval_runtime": 116.2495, |
|
"eval_samples_per_second": 3.587, |
|
"eval_steps_per_second": 0.456, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 166.67, |
|
"learning_rate": 2.6824742268041237e-05, |
|
"loss": 0.0772, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 166.67, |
|
"eval_bleu": 94.6758, |
|
"eval_em": 0.0911, |
|
"eval_gen_len": 59.0767, |
|
"eval_loss": 0.34671926498413086, |
|
"eval_rm": 0.2062, |
|
"eval_runtime": 116.1647, |
|
"eval_samples_per_second": 3.59, |
|
"eval_steps_per_second": 0.456, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 179.49, |
|
"learning_rate": 2.4247422680412372e-05, |
|
"loss": 0.0722, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 179.49, |
|
"eval_bleu": 94.5698, |
|
"eval_em": 0.1055, |
|
"eval_gen_len": 58.1415, |
|
"eval_loss": 0.3461613953113556, |
|
"eval_rm": 0.2398, |
|
"eval_runtime": 119.2771, |
|
"eval_samples_per_second": 3.496, |
|
"eval_steps_per_second": 0.444, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 192.31, |
|
"learning_rate": 2.1670103092783507e-05, |
|
"loss": 0.0669, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 192.31, |
|
"eval_bleu": 95.0365, |
|
"eval_em": 0.1223, |
|
"eval_gen_len": 58.7794, |
|
"eval_loss": 0.35367459058761597, |
|
"eval_rm": 0.2782, |
|
"eval_runtime": 115.018, |
|
"eval_samples_per_second": 3.626, |
|
"eval_steps_per_second": 0.461, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 205.13, |
|
"learning_rate": 1.9092783505154642e-05, |
|
"loss": 0.062, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 205.13, |
|
"eval_bleu": 94.8694, |
|
"eval_em": 0.1247, |
|
"eval_gen_len": 58.211, |
|
"eval_loss": 0.35051023960113525, |
|
"eval_rm": 0.2686, |
|
"eval_runtime": 113.7476, |
|
"eval_samples_per_second": 3.666, |
|
"eval_steps_per_second": 0.466, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 217.95, |
|
"learning_rate": 1.6515463917525774e-05, |
|
"loss": 0.0576, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 217.95, |
|
"eval_bleu": 94.8168, |
|
"eval_em": 0.1271, |
|
"eval_gen_len": 59.0791, |
|
"eval_loss": 0.3510896563529968, |
|
"eval_rm": 0.2926, |
|
"eval_runtime": 117.1223, |
|
"eval_samples_per_second": 3.56, |
|
"eval_steps_per_second": 0.453, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 230.77, |
|
"learning_rate": 1.3938144329896907e-05, |
|
"loss": 0.0539, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 230.77, |
|
"eval_bleu": 95.1935, |
|
"eval_em": 0.1367, |
|
"eval_gen_len": 58.6787, |
|
"eval_loss": 0.34899094700813293, |
|
"eval_rm": 0.3046, |
|
"eval_runtime": 117.1796, |
|
"eval_samples_per_second": 3.559, |
|
"eval_steps_per_second": 0.452, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 243.59, |
|
"learning_rate": 1.1360824742268042e-05, |
|
"loss": 0.0502, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 243.59, |
|
"eval_bleu": 95.1882, |
|
"eval_em": 0.1319, |
|
"eval_gen_len": 58.5228, |
|
"eval_loss": 0.3490062654018402, |
|
"eval_rm": 0.3141, |
|
"eval_runtime": 118.559, |
|
"eval_samples_per_second": 3.517, |
|
"eval_steps_per_second": 0.447, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 256.41, |
|
"learning_rate": 8.783505154639175e-06, |
|
"loss": 0.0473, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 256.41, |
|
"eval_bleu": 95.1198, |
|
"eval_em": 0.1319, |
|
"eval_gen_len": 58.4245, |
|
"eval_loss": 0.3504057824611664, |
|
"eval_rm": 0.307, |
|
"eval_runtime": 118.462, |
|
"eval_samples_per_second": 3.52, |
|
"eval_steps_per_second": 0.447, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 269.23, |
|
"learning_rate": 6.206185567010309e-06, |
|
"loss": 0.045, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 269.23, |
|
"eval_bleu": 95.047, |
|
"eval_em": 0.1343, |
|
"eval_gen_len": 58.3213, |
|
"eval_loss": 0.35046613216400146, |
|
"eval_rm": 0.307, |
|
"eval_runtime": 118.1147, |
|
"eval_samples_per_second": 3.53, |
|
"eval_steps_per_second": 0.449, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 282.05, |
|
"learning_rate": 3.6288659793814435e-06, |
|
"loss": 0.0429, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 282.05, |
|
"eval_bleu": 95.2397, |
|
"eval_em": 0.1391, |
|
"eval_gen_len": 58.7242, |
|
"eval_loss": 0.3522409200668335, |
|
"eval_rm": 0.3046, |
|
"eval_runtime": 119.4326, |
|
"eval_samples_per_second": 3.492, |
|
"eval_steps_per_second": 0.444, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 294.87, |
|
"learning_rate": 1.0515463917525774e-06, |
|
"loss": 0.0416, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 294.87, |
|
"eval_bleu": 95.2821, |
|
"eval_em": 0.1415, |
|
"eval_gen_len": 58.7746, |
|
"eval_loss": 0.3522770404815674, |
|
"eval_rm": 0.3046, |
|
"eval_runtime": 119.2922, |
|
"eval_samples_per_second": 3.496, |
|
"eval_steps_per_second": 0.444, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 300.0, |
|
"step": 11700, |
|
"total_flos": 9455798374608960.0, |
|
"train_loss": 0.006238417584671934, |
|
"train_runtime": 1626.7385, |
|
"train_samples_per_second": 226.097, |
|
"train_steps_per_second": 7.192 |
|
} |
|
], |
|
"max_steps": 11700, |
|
"num_train_epochs": 300, |
|
"total_flos": 9455798374608960.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|