|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 120.0, |
|
"global_step": 2040, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 5.88, |
|
"learning_rate": 1.455e-05, |
|
"loss": 16.0345, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 11.76, |
|
"learning_rate": 2.955e-05, |
|
"loss": 6.2463, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 17.65, |
|
"learning_rate": 4.454999999999999e-05, |
|
"loss": 4.2277, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 23.53, |
|
"learning_rate": 5.955e-05, |
|
"loss": 3.4915, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 29.41, |
|
"learning_rate": 7.455e-05, |
|
"loss": 3.3231, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 29.41, |
|
"eval_loss": 3.352241277694702, |
|
"eval_runtime": 5.0475, |
|
"eval_samples_per_second": 22.189, |
|
"eval_steps_per_second": 1.387, |
|
"eval_wer": 0.998972250770812, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 35.29, |
|
"learning_rate": 7.027597402597401e-05, |
|
"loss": 3.2264, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 41.18, |
|
"learning_rate": 6.540584415584416e-05, |
|
"loss": 3.1652, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 47.06, |
|
"learning_rate": 6.0535714285714285e-05, |
|
"loss": 3.019, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 52.94, |
|
"learning_rate": 5.566558441558441e-05, |
|
"loss": 2.6429, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 58.82, |
|
"learning_rate": 5.0795454545454536e-05, |
|
"loss": 2.1146, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 58.82, |
|
"eval_loss": 1.3367875814437866, |
|
"eval_runtime": 5.0633, |
|
"eval_samples_per_second": 22.12, |
|
"eval_steps_per_second": 1.382, |
|
"eval_wer": 0.9383350462487153, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 64.71, |
|
"learning_rate": 4.592532467532467e-05, |
|
"loss": 1.7769, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 70.59, |
|
"learning_rate": 4.10551948051948e-05, |
|
"loss": 1.5937, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 76.47, |
|
"learning_rate": 3.6185064935064934e-05, |
|
"loss": 1.4694, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 82.35, |
|
"learning_rate": 3.131493506493506e-05, |
|
"loss": 1.3781, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 88.24, |
|
"learning_rate": 2.6444805194805193e-05, |
|
"loss": 1.3134, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 88.24, |
|
"eval_loss": 0.7372016310691833, |
|
"eval_runtime": 5.0913, |
|
"eval_samples_per_second": 21.998, |
|
"eval_steps_per_second": 1.375, |
|
"eval_wer": 0.9578622816032888, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 94.12, |
|
"learning_rate": 2.162337662337662e-05, |
|
"loss": 1.271, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"learning_rate": 1.6753246753246752e-05, |
|
"loss": 1.2273, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 105.88, |
|
"learning_rate": 1.1883116883116881e-05, |
|
"loss": 1.2121, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 111.76, |
|
"learning_rate": 7.012987012987012e-06, |
|
"loss": 1.169, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 117.65, |
|
"learning_rate": 2.1428571428571427e-06, |
|
"loss": 1.1506, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 117.65, |
|
"eval_loss": 0.6582115292549133, |
|
"eval_runtime": 5.0953, |
|
"eval_samples_per_second": 21.981, |
|
"eval_steps_per_second": 1.374, |
|
"eval_wer": 0.9681397738951696, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 120.0, |
|
"step": 2040, |
|
"total_flos": 1.105779936504619e+19, |
|
"train_loss": 3.015083034365785, |
|
"train_runtime": 4204.6915, |
|
"train_samples_per_second": 15.326, |
|
"train_steps_per_second": 0.485 |
|
} |
|
], |
|
"max_steps": 2040, |
|
"num_train_epochs": 120, |
|
"total_flos": 1.105779936504619e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|