|
{ |
|
"best_metric": 18.683812405446293, |
|
"best_model_checkpoint": "./checkpoint-1000", |
|
"epoch": 42.10526315789474, |
|
"eval_steps": 1000, |
|
"global_step": 1000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.05, |
|
"grad_norm": 115.67640686035156, |
|
"learning_rate": 4.800000000000001e-07, |
|
"loss": 6.5238, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"grad_norm": 27.986173629760742, |
|
"learning_rate": 9.800000000000001e-07, |
|
"loss": 4.874, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"grad_norm": 15.896848678588867, |
|
"learning_rate": 1.48e-06, |
|
"loss": 3.0956, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 4.21, |
|
"grad_norm": 13.427591323852539, |
|
"learning_rate": 1.98e-06, |
|
"loss": 2.0652, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 5.26, |
|
"grad_norm": 13.170661926269531, |
|
"learning_rate": 2.4800000000000004e-06, |
|
"loss": 1.5245, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 6.32, |
|
"grad_norm": 12.342823028564453, |
|
"learning_rate": 2.9800000000000003e-06, |
|
"loss": 1.1716, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 7.37, |
|
"grad_norm": 12.604913711547852, |
|
"learning_rate": 3.46e-06, |
|
"loss": 0.8993, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 8.42, |
|
"grad_norm": 11.273714065551758, |
|
"learning_rate": 3.96e-06, |
|
"loss": 0.6749, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 9.47, |
|
"grad_norm": 10.14577579498291, |
|
"learning_rate": 4.4600000000000005e-06, |
|
"loss": 0.4599, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 10.53, |
|
"grad_norm": 6.002610683441162, |
|
"learning_rate": 4.960000000000001e-06, |
|
"loss": 0.1846, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 11.58, |
|
"grad_norm": 2.2945444583892822, |
|
"learning_rate": 5.460000000000001e-06, |
|
"loss": 0.1001, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 12.63, |
|
"grad_norm": 2.423794984817505, |
|
"learning_rate": 5.9600000000000005e-06, |
|
"loss": 0.0509, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 13.68, |
|
"grad_norm": 1.5324437618255615, |
|
"learning_rate": 6.460000000000001e-06, |
|
"loss": 0.0319, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 14.74, |
|
"grad_norm": 2.4710068702697754, |
|
"learning_rate": 6.96e-06, |
|
"loss": 0.0197, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 15.79, |
|
"grad_norm": 2.177290916442871, |
|
"learning_rate": 7.4600000000000006e-06, |
|
"loss": 0.016, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 16.84, |
|
"grad_norm": 1.212317705154419, |
|
"learning_rate": 7.960000000000002e-06, |
|
"loss": 0.0111, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 17.89, |
|
"grad_norm": 1.4730677604675293, |
|
"learning_rate": 8.46e-06, |
|
"loss": 0.0093, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 18.95, |
|
"grad_norm": 0.6026259064674377, |
|
"learning_rate": 8.96e-06, |
|
"loss": 0.0062, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 1.0059149265289307, |
|
"learning_rate": 9.460000000000001e-06, |
|
"loss": 0.0072, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 21.05, |
|
"grad_norm": 1.2911185026168823, |
|
"learning_rate": 9.960000000000001e-06, |
|
"loss": 0.0066, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 22.11, |
|
"grad_norm": 1.1619590520858765, |
|
"learning_rate": 9.94888888888889e-06, |
|
"loss": 0.0062, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 23.16, |
|
"grad_norm": 1.0487051010131836, |
|
"learning_rate": 9.893333333333334e-06, |
|
"loss": 0.0039, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 24.21, |
|
"grad_norm": 0.25514963269233704, |
|
"learning_rate": 9.837777777777778e-06, |
|
"loss": 0.0031, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 25.26, |
|
"grad_norm": 0.05470306798815727, |
|
"learning_rate": 9.782222222222222e-06, |
|
"loss": 0.0021, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 26.32, |
|
"grad_norm": 0.060789547860622406, |
|
"learning_rate": 9.726666666666668e-06, |
|
"loss": 0.0018, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 27.37, |
|
"grad_norm": 0.03411395475268364, |
|
"learning_rate": 9.671111111111112e-06, |
|
"loss": 0.0017, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 28.42, |
|
"grad_norm": 0.019400257617235184, |
|
"learning_rate": 9.615555555555558e-06, |
|
"loss": 0.0009, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 29.47, |
|
"grad_norm": 0.015261244960129261, |
|
"learning_rate": 9.56e-06, |
|
"loss": 0.0006, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 30.53, |
|
"grad_norm": 0.01666310615837574, |
|
"learning_rate": 9.504444444444446e-06, |
|
"loss": 0.0007, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 31.58, |
|
"grad_norm": 0.37447166442871094, |
|
"learning_rate": 9.44888888888889e-06, |
|
"loss": 0.0007, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 32.63, |
|
"grad_norm": 0.013453792780637741, |
|
"learning_rate": 9.393333333333334e-06, |
|
"loss": 0.0006, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 33.68, |
|
"grad_norm": 0.011604133062064648, |
|
"learning_rate": 9.33777777777778e-06, |
|
"loss": 0.0005, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 34.74, |
|
"grad_norm": 0.010738051496446133, |
|
"learning_rate": 9.282222222222222e-06, |
|
"loss": 0.0005, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 35.79, |
|
"grad_norm": 0.010525302961468697, |
|
"learning_rate": 9.226666666666668e-06, |
|
"loss": 0.0005, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 36.84, |
|
"grad_norm": 0.009034625254571438, |
|
"learning_rate": 9.171111111111112e-06, |
|
"loss": 0.0005, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 37.89, |
|
"grad_norm": 0.009822655469179153, |
|
"learning_rate": 9.115555555555556e-06, |
|
"loss": 0.0004, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 38.95, |
|
"grad_norm": 0.008321026340126991, |
|
"learning_rate": 9.060000000000001e-06, |
|
"loss": 0.0004, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"grad_norm": 0.008215431123971939, |
|
"learning_rate": 9.004444444444445e-06, |
|
"loss": 0.0004, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 41.05, |
|
"grad_norm": 0.007949727587401867, |
|
"learning_rate": 8.94888888888889e-06, |
|
"loss": 0.0004, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 42.11, |
|
"grad_norm": 0.007843004539608955, |
|
"learning_rate": 8.893333333333333e-06, |
|
"loss": 0.0003, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 42.11, |
|
"eval_loss": 0.29718565940856934, |
|
"eval_runtime": 131.8407, |
|
"eval_samples_per_second": 2.89, |
|
"eval_steps_per_second": 0.728, |
|
"eval_wer": 18.683812405446293, |
|
"step": 1000 |
|
} |
|
], |
|
"logging_steps": 25, |
|
"max_steps": 5000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 218, |
|
"save_steps": 1000, |
|
"total_flos": 1.846946562048e+19, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|