|
{ |
|
"best_metric": 0.32343700528144836, |
|
"best_model_checkpoint": "/scratch/skscla001/speech/results/mms-1b-toigen-balanced-model/checkpoint-2000", |
|
"epoch": 21.238938053097346, |
|
"eval_steps": 100, |
|
"global_step": 2400, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.8849557522123894, |
|
"grad_norm": 3.8822503089904785, |
|
"learning_rate": 0.00028799999999999995, |
|
"loss": 14.2297, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.8849557522123894, |
|
"eval_loss": 3.483584403991699, |
|
"eval_runtime": 18.7703, |
|
"eval_samples_per_second": 11.081, |
|
"eval_steps_per_second": 2.77, |
|
"eval_wer": 1.0055555555555555, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.7699115044247788, |
|
"grad_norm": 3.1678736209869385, |
|
"learning_rate": 0.0002998980169971671, |
|
"loss": 4.1389, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.7699115044247788, |
|
"eval_loss": 0.5561802983283997, |
|
"eval_runtime": 18.75, |
|
"eval_samples_per_second": 11.093, |
|
"eval_steps_per_second": 2.773, |
|
"eval_wer": 0.5694444444444444, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.6548672566371683, |
|
"grad_norm": 1.6712620258331299, |
|
"learning_rate": 0.00029979178470254956, |
|
"loss": 1.3643, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.6548672566371683, |
|
"eval_loss": 0.4360348582267761, |
|
"eval_runtime": 18.7668, |
|
"eval_samples_per_second": 11.083, |
|
"eval_steps_per_second": 2.771, |
|
"eval_wer": 0.49583333333333335, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 3.5398230088495577, |
|
"grad_norm": 1.3147025108337402, |
|
"learning_rate": 0.00029968555240793195, |
|
"loss": 1.1715, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 3.5398230088495577, |
|
"eval_loss": 0.3980385661125183, |
|
"eval_runtime": 18.8024, |
|
"eval_samples_per_second": 11.062, |
|
"eval_steps_per_second": 2.766, |
|
"eval_wer": 0.4824074074074074, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 4.424778761061947, |
|
"grad_norm": 1.7749208211898804, |
|
"learning_rate": 0.00029957932011331445, |
|
"loss": 1.1309, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 4.424778761061947, |
|
"eval_loss": 0.37851694226264954, |
|
"eval_runtime": 19.1098, |
|
"eval_samples_per_second": 10.884, |
|
"eval_steps_per_second": 2.721, |
|
"eval_wer": 0.4583333333333333, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 5.3097345132743365, |
|
"grad_norm": 1.853244662284851, |
|
"learning_rate": 0.0002994730878186969, |
|
"loss": 1.0283, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 5.3097345132743365, |
|
"eval_loss": 0.37413156032562256, |
|
"eval_runtime": 18.6437, |
|
"eval_samples_per_second": 11.157, |
|
"eval_steps_per_second": 2.789, |
|
"eval_wer": 0.4476851851851852, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 6.1946902654867255, |
|
"grad_norm": 1.4990218877792358, |
|
"learning_rate": 0.0002993668555240793, |
|
"loss": 1.0148, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 6.1946902654867255, |
|
"eval_loss": 0.36694276332855225, |
|
"eval_runtime": 18.6699, |
|
"eval_samples_per_second": 11.141, |
|
"eval_steps_per_second": 2.785, |
|
"eval_wer": 0.44027777777777777, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 7.079646017699115, |
|
"grad_norm": 2.7431230545043945, |
|
"learning_rate": 0.0002992606232294617, |
|
"loss": 0.9961, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 7.079646017699115, |
|
"eval_loss": 0.36071425676345825, |
|
"eval_runtime": 18.6525, |
|
"eval_samples_per_second": 11.151, |
|
"eval_steps_per_second": 2.788, |
|
"eval_wer": 0.4356481481481482, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 7.964601769911504, |
|
"grad_norm": 2.0985348224639893, |
|
"learning_rate": 0.00029915439093484416, |
|
"loss": 0.9248, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 7.964601769911504, |
|
"eval_loss": 0.3580877482891083, |
|
"eval_runtime": 18.871, |
|
"eval_samples_per_second": 11.022, |
|
"eval_steps_per_second": 2.756, |
|
"eval_wer": 0.4236111111111111, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 8.849557522123893, |
|
"grad_norm": 1.3972795009613037, |
|
"learning_rate": 0.0002990481586402266, |
|
"loss": 0.9482, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 8.849557522123893, |
|
"eval_loss": 0.3462725281715393, |
|
"eval_runtime": 18.815, |
|
"eval_samples_per_second": 11.055, |
|
"eval_steps_per_second": 2.764, |
|
"eval_wer": 0.4356481481481482, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 9.734513274336283, |
|
"grad_norm": 3.017667293548584, |
|
"learning_rate": 0.00029894192634560905, |
|
"loss": 0.8815, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 9.734513274336283, |
|
"eval_loss": 0.3487873673439026, |
|
"eval_runtime": 18.8526, |
|
"eval_samples_per_second": 11.033, |
|
"eval_steps_per_second": 2.758, |
|
"eval_wer": 0.4273148148148148, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 10.619469026548673, |
|
"grad_norm": 1.357649803161621, |
|
"learning_rate": 0.0002988356940509915, |
|
"loss": 0.8209, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 10.619469026548673, |
|
"eval_loss": 0.33840110898017883, |
|
"eval_runtime": 18.6886, |
|
"eval_samples_per_second": 11.13, |
|
"eval_steps_per_second": 2.782, |
|
"eval_wer": 0.4, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 11.504424778761061, |
|
"grad_norm": 1.8951735496520996, |
|
"learning_rate": 0.00029872946175637393, |
|
"loss": 0.8754, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 11.504424778761061, |
|
"eval_loss": 0.3459264636039734, |
|
"eval_runtime": 18.8754, |
|
"eval_samples_per_second": 11.02, |
|
"eval_steps_per_second": 2.755, |
|
"eval_wer": 0.4050925925925926, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 12.389380530973451, |
|
"grad_norm": 1.3216720819473267, |
|
"learning_rate": 0.0002986232294617563, |
|
"loss": 0.8454, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 12.389380530973451, |
|
"eval_loss": 0.33166107535362244, |
|
"eval_runtime": 18.7736, |
|
"eval_samples_per_second": 11.079, |
|
"eval_steps_per_second": 2.77, |
|
"eval_wer": 0.38842592592592595, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 13.274336283185841, |
|
"grad_norm": 2.39943528175354, |
|
"learning_rate": 0.00029851699716713876, |
|
"loss": 0.8164, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 13.274336283185841, |
|
"eval_loss": 0.33193060755729675, |
|
"eval_runtime": 18.8153, |
|
"eval_samples_per_second": 11.055, |
|
"eval_steps_per_second": 2.764, |
|
"eval_wer": 0.40324074074074073, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 14.15929203539823, |
|
"grad_norm": 6.335964202880859, |
|
"learning_rate": 0.00029841076487252126, |
|
"loss": 0.7673, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 14.15929203539823, |
|
"eval_loss": 0.33113545179367065, |
|
"eval_runtime": 18.711, |
|
"eval_samples_per_second": 11.116, |
|
"eval_steps_per_second": 2.779, |
|
"eval_wer": 0.3921296296296296, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 15.044247787610619, |
|
"grad_norm": 1.1695411205291748, |
|
"learning_rate": 0.00029830559490084984, |
|
"loss": 0.7953, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 15.044247787610619, |
|
"eval_loss": 0.33329564332962036, |
|
"eval_runtime": 18.8623, |
|
"eval_samples_per_second": 11.027, |
|
"eval_steps_per_second": 2.757, |
|
"eval_wer": 0.39444444444444443, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 15.929203539823009, |
|
"grad_norm": 13.718667030334473, |
|
"learning_rate": 0.0002981993626062323, |
|
"loss": 0.7527, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 15.929203539823009, |
|
"eval_loss": 0.3312545120716095, |
|
"eval_runtime": 18.8046, |
|
"eval_samples_per_second": 11.061, |
|
"eval_steps_per_second": 2.765, |
|
"eval_wer": 0.39166666666666666, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 16.8141592920354, |
|
"grad_norm": 1.9348554611206055, |
|
"learning_rate": 0.0002980931303116147, |
|
"loss": 0.763, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 16.8141592920354, |
|
"eval_loss": 0.3277539610862732, |
|
"eval_runtime": 18.7599, |
|
"eval_samples_per_second": 11.087, |
|
"eval_steps_per_second": 2.772, |
|
"eval_wer": 0.39305555555555555, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 17.699115044247787, |
|
"grad_norm": 18.476669311523438, |
|
"learning_rate": 0.0002979868980169971, |
|
"loss": 0.7319, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 17.699115044247787, |
|
"eval_loss": 0.32343700528144836, |
|
"eval_runtime": 18.8597, |
|
"eval_samples_per_second": 11.029, |
|
"eval_steps_per_second": 2.757, |
|
"eval_wer": 0.37546296296296294, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 18.58407079646018, |
|
"grad_norm": 2.2555744647979736, |
|
"learning_rate": 0.00029788066572237955, |
|
"loss": 0.7352, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 18.58407079646018, |
|
"eval_loss": 0.3248392343521118, |
|
"eval_runtime": 18.8863, |
|
"eval_samples_per_second": 11.013, |
|
"eval_steps_per_second": 2.753, |
|
"eval_wer": 0.38055555555555554, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 19.469026548672566, |
|
"grad_norm": 2.2022745609283447, |
|
"learning_rate": 0.00029777443342776205, |
|
"loss": 0.7017, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 19.469026548672566, |
|
"eval_loss": 0.3333507776260376, |
|
"eval_runtime": 18.7742, |
|
"eval_samples_per_second": 11.079, |
|
"eval_steps_per_second": 2.77, |
|
"eval_wer": 0.3851851851851852, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 20.353982300884955, |
|
"grad_norm": 7.710162162780762, |
|
"learning_rate": 0.00029766820113314444, |
|
"loss": 0.6902, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 20.353982300884955, |
|
"eval_loss": 0.330443412065506, |
|
"eval_runtime": 18.756, |
|
"eval_samples_per_second": 11.09, |
|
"eval_steps_per_second": 2.772, |
|
"eval_wer": 0.3888888888888889, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 21.238938053097346, |
|
"grad_norm": 3.896944522857666, |
|
"learning_rate": 0.0002975619688385269, |
|
"loss": 0.707, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 21.238938053097346, |
|
"eval_loss": 0.3313958942890167, |
|
"eval_runtime": 18.7923, |
|
"eval_samples_per_second": 11.068, |
|
"eval_steps_per_second": 2.767, |
|
"eval_wer": 0.38564814814814813, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 21.238938053097346, |
|
"step": 2400, |
|
"total_flos": 1.5969122077347269e+19, |
|
"train_loss": 1.5763085651397706, |
|
"train_runtime": 3424.9125, |
|
"train_samples_per_second": 657.681, |
|
"train_steps_per_second": 82.484 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 282500, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2500, |
|
"save_steps": 400, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 4, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 4 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.5969122077347269e+19, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|