|
{ |
|
"best_metric": 0.7927327277273106, |
|
"best_model_checkpoint": "./XLM-V_96-multi-outputs/checkpoint-9000", |
|
"epoch": 14.8619957537155, |
|
"eval_steps": 1000, |
|
"global_step": 14000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0615711252653928, |
|
"grad_norm": 3.787013530731201, |
|
"learning_rate": 1.4154281670205236e-06, |
|
"loss": 0.6909, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.0615711252653928, |
|
"eval_accuracy": 0.6001194862093, |
|
"eval_f1": 0.7023421286688408, |
|
"eval_loss": 0.659694254398346, |
|
"eval_precision": 0.5653937947494033, |
|
"eval_recall": 0.9268388106416275, |
|
"eval_runtime": 59.6001, |
|
"eval_samples_per_second": 168.506, |
|
"eval_steps_per_second": 1.762, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.1231422505307855, |
|
"grad_norm": 10.855875015258789, |
|
"learning_rate": 1.907682629550995e-06, |
|
"loss": 0.6595, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.1231422505307855, |
|
"eval_accuracy": 0.6375584984566365, |
|
"eval_f1": 0.727830118139674, |
|
"eval_loss": 0.6210178136825562, |
|
"eval_precision": 0.5890825465988865, |
|
"eval_recall": 0.952073552425665, |
|
"eval_runtime": 59.4085, |
|
"eval_samples_per_second": 169.05, |
|
"eval_steps_per_second": 1.767, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 3.1847133757961785, |
|
"grad_norm": 1.80464768409729, |
|
"learning_rate": 1.7504128332153809e-06, |
|
"loss": 0.6382, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 3.1847133757961785, |
|
"eval_accuracy": 0.6707159215373892, |
|
"eval_f1": 0.7504339295147536, |
|
"eval_loss": 0.5838897228240967, |
|
"eval_precision": 0.6108858582135398, |
|
"eval_recall": 0.9726134585289515, |
|
"eval_runtime": 59.4982, |
|
"eval_samples_per_second": 168.795, |
|
"eval_steps_per_second": 1.765, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 4.246284501061571, |
|
"grad_norm": 3.50860595703125, |
|
"learning_rate": 1.593143036879767e-06, |
|
"loss": 0.6155, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 4.246284501061571, |
|
"eval_accuracy": 0.6774868067310564, |
|
"eval_f1": 0.752426813421998, |
|
"eval_loss": 0.5712079405784607, |
|
"eval_precision": 0.6174883954334462, |
|
"eval_recall": 0.9628325508607198, |
|
"eval_runtime": 59.4454, |
|
"eval_samples_per_second": 168.945, |
|
"eval_steps_per_second": 1.766, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 5.307855626326964, |
|
"grad_norm": 10.334639549255371, |
|
"learning_rate": 1.4358732405441535e-06, |
|
"loss": 0.5986, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 5.307855626326964, |
|
"eval_accuracy": 0.7027780543662252, |
|
"eval_f1": 0.7685867121482285, |
|
"eval_loss": 0.5451286435127258, |
|
"eval_precision": 0.6365737768074997, |
|
"eval_recall": 0.969679186228482, |
|
"eval_runtime": 59.5608, |
|
"eval_samples_per_second": 168.618, |
|
"eval_steps_per_second": 1.763, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 6.369426751592357, |
|
"grad_norm": 4.514008045196533, |
|
"learning_rate": 1.2786034442085396e-06, |
|
"loss": 0.5815, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 6.369426751592357, |
|
"eval_accuracy": 0.7189086926217266, |
|
"eval_f1": 0.7648479800083299, |
|
"eval_loss": 0.527675986289978, |
|
"eval_precision": 0.6660380095749311, |
|
"eval_recall": 0.8980829420970265, |
|
"eval_runtime": 59.5312, |
|
"eval_samples_per_second": 168.702, |
|
"eval_steps_per_second": 1.764, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 7.43099787685775, |
|
"grad_norm": 15.843857765197754, |
|
"learning_rate": 1.121333647872926e-06, |
|
"loss": 0.5596, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 7.43099787685775, |
|
"eval_accuracy": 0.7367320521756447, |
|
"eval_f1": 0.7817401353805514, |
|
"eval_loss": 0.5013874769210815, |
|
"eval_precision": 0.6762353613253356, |
|
"eval_recall": 0.9262519561815337, |
|
"eval_runtime": 59.9272, |
|
"eval_samples_per_second": 167.587, |
|
"eval_steps_per_second": 1.752, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 8.492569002123142, |
|
"grad_norm": 26.940380096435547, |
|
"learning_rate": 9.640638515373122e-07, |
|
"loss": 0.5332, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 8.492569002123142, |
|
"eval_accuracy": 0.7425072189584785, |
|
"eval_f1": 0.7898244473342002, |
|
"eval_loss": 0.4773547947406769, |
|
"eval_precision": 0.6756117908787542, |
|
"eval_recall": 0.9505086071987481, |
|
"eval_runtime": 59.4845, |
|
"eval_samples_per_second": 168.834, |
|
"eval_steps_per_second": 1.765, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 9.554140127388536, |
|
"grad_norm": 15.124056816101074, |
|
"learning_rate": 8.067940552016985e-07, |
|
"loss": 0.4836, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 9.554140127388536, |
|
"eval_accuracy": 0.7523648312257294, |
|
"eval_f1": 0.7927327277273106, |
|
"eval_loss": 0.4266820549964905, |
|
"eval_precision": 0.6905764483810077, |
|
"eval_recall": 0.9303599374021909, |
|
"eval_runtime": 59.8735, |
|
"eval_samples_per_second": 167.737, |
|
"eval_steps_per_second": 1.754, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 10.615711252653927, |
|
"grad_norm": 11.718687057495117, |
|
"learning_rate": 6.495242588660847e-07, |
|
"loss": 0.4479, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 10.615711252653927, |
|
"eval_accuracy": 0.7561485611868963, |
|
"eval_f1": 0.7909874541264829, |
|
"eval_loss": 0.40309131145477295, |
|
"eval_precision": 0.7015897047691143, |
|
"eval_recall": 0.9064945226917058, |
|
"eval_runtime": 59.4764, |
|
"eval_samples_per_second": 168.857, |
|
"eval_steps_per_second": 1.765, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 11.67728237791932, |
|
"grad_norm": 10.142501831054688, |
|
"learning_rate": 4.92254462530471e-07, |
|
"loss": 0.4328, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 11.67728237791932, |
|
"eval_accuracy": 0.7591357164193966, |
|
"eval_f1": 0.7869285651369682, |
|
"eval_loss": 0.3995474874973297, |
|
"eval_precision": 0.7157506809806121, |
|
"eval_recall": 0.8738262910798122, |
|
"eval_runtime": 59.6214, |
|
"eval_samples_per_second": 168.446, |
|
"eval_steps_per_second": 1.761, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 12.738853503184714, |
|
"grad_norm": 5.893862247467041, |
|
"learning_rate": 3.3498466619485727e-07, |
|
"loss": 0.4217, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 12.738853503184714, |
|
"eval_accuracy": 0.7629194463805635, |
|
"eval_f1": 0.783683110747706, |
|
"eval_loss": 0.3929121494293213, |
|
"eval_precision": 0.7316369804919424, |
|
"eval_recall": 0.8437010954616588, |
|
"eval_runtime": 59.4335, |
|
"eval_samples_per_second": 168.979, |
|
"eval_steps_per_second": 1.767, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 13.800424628450106, |
|
"grad_norm": 18.15483283996582, |
|
"learning_rate": 1.7771486985924354e-07, |
|
"loss": 0.4159, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 13.800424628450106, |
|
"eval_accuracy": 0.7652095987254804, |
|
"eval_f1": 0.7918799646954987, |
|
"eval_loss": 0.39117059111595154, |
|
"eval_precision": 0.7214538436796397, |
|
"eval_recall": 0.8775430359937402, |
|
"eval_runtime": 59.605, |
|
"eval_samples_per_second": 168.493, |
|
"eval_steps_per_second": 1.762, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 14.8619957537155, |
|
"grad_norm": 9.832246780395508, |
|
"learning_rate": 2.0445073523629785e-08, |
|
"loss": 0.4121, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 14.8619957537155, |
|
"eval_accuracy": 0.7667031763417306, |
|
"eval_f1": 0.7908222480135703, |
|
"eval_loss": 0.3898768126964569, |
|
"eval_precision": 0.7273772376416489, |
|
"eval_recall": 0.8663928012519562, |
|
"eval_runtime": 59.4519, |
|
"eval_samples_per_second": 168.926, |
|
"eval_steps_per_second": 1.766, |
|
"step": 14000 |
|
} |
|
], |
|
"logging_steps": 1000, |
|
"max_steps": 14130, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 15, |
|
"save_steps": 1000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3.2803191582601536e+17, |
|
"train_batch_size": 96, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|