|
{ |
|
"best_metric": 0.996, |
|
"best_model_checkpoint": "xlm-roberta-base-finetuned-enron/checkpoint-7932", |
|
"epoch": 4.0, |
|
"global_step": 7932, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 1.8999495713565305e-05, |
|
"loss": 0.1679, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 1.799293998991427e-05, |
|
"loss": 0.0773, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.698436712052446e-05, |
|
"loss": 0.0504, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.995, |
|
"eval_loss": 0.022810379043221474, |
|
"eval_runtime": 7.2177, |
|
"eval_samples_per_second": 277.095, |
|
"eval_steps_per_second": 17.318, |
|
"step": 1983 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 1.5977811396873426e-05, |
|
"loss": 0.0436, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 1.4969238527483611e-05, |
|
"loss": 0.0377, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 1.39606656580938e-05, |
|
"loss": 0.0283, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 1.2952092788703986e-05, |
|
"loss": 0.0211, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.991, |
|
"eval_loss": 0.05412155017256737, |
|
"eval_runtime": 8.3181, |
|
"eval_samples_per_second": 240.441, |
|
"eval_steps_per_second": 15.028, |
|
"step": 3966 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 1.194351991931417e-05, |
|
"loss": 0.0201, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 1.0936964195663137e-05, |
|
"loss": 0.0073, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 9.928391326273324e-06, |
|
"loss": 0.0094, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 8.919818456883512e-06, |
|
"loss": 0.0118, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.993, |
|
"eval_loss": 0.0558784045279026, |
|
"eval_runtime": 7.2776, |
|
"eval_samples_per_second": 274.815, |
|
"eval_steps_per_second": 17.176, |
|
"step": 5949 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 7.911245587493697e-06, |
|
"loss": 0.0122, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"learning_rate": 6.902672718103883e-06, |
|
"loss": 0.0034, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 3.53, |
|
"learning_rate": 5.89409984871407e-06, |
|
"loss": 0.0091, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 3.78, |
|
"learning_rate": 4.885526979324257e-06, |
|
"loss": 0.0027, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.996, |
|
"eval_loss": 0.031464751809835434, |
|
"eval_runtime": 7.3051, |
|
"eval_samples_per_second": 273.782, |
|
"eval_steps_per_second": 17.111, |
|
"step": 7932 |
|
} |
|
], |
|
"max_steps": 9915, |
|
"num_train_epochs": 5, |
|
"total_flos": 3.326409472938432e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|