{ "best_metric": 0.32343700528144836, "best_model_checkpoint": "/scratch/skscla001/speech/results/mms-1b-toigen-balanced-model/checkpoint-2000", "epoch": 21.238938053097346, "eval_steps": 100, "global_step": 2400, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.8849557522123894, "grad_norm": 3.8822503089904785, "learning_rate": 0.00028799999999999995, "loss": 14.2297, "step": 100 }, { "epoch": 0.8849557522123894, "eval_loss": 3.483584403991699, "eval_runtime": 18.7703, "eval_samples_per_second": 11.081, "eval_steps_per_second": 2.77, "eval_wer": 1.0055555555555555, "step": 100 }, { "epoch": 1.7699115044247788, "grad_norm": 3.1678736209869385, "learning_rate": 0.0002998980169971671, "loss": 4.1389, "step": 200 }, { "epoch": 1.7699115044247788, "eval_loss": 0.5561802983283997, "eval_runtime": 18.75, "eval_samples_per_second": 11.093, "eval_steps_per_second": 2.773, "eval_wer": 0.5694444444444444, "step": 200 }, { "epoch": 2.6548672566371683, "grad_norm": 1.6712620258331299, "learning_rate": 0.00029979178470254956, "loss": 1.3643, "step": 300 }, { "epoch": 2.6548672566371683, "eval_loss": 0.4360348582267761, "eval_runtime": 18.7668, "eval_samples_per_second": 11.083, "eval_steps_per_second": 2.771, "eval_wer": 0.49583333333333335, "step": 300 }, { "epoch": 3.5398230088495577, "grad_norm": 1.3147025108337402, "learning_rate": 0.00029968555240793195, "loss": 1.1715, "step": 400 }, { "epoch": 3.5398230088495577, "eval_loss": 0.3980385661125183, "eval_runtime": 18.8024, "eval_samples_per_second": 11.062, "eval_steps_per_second": 2.766, "eval_wer": 0.4824074074074074, "step": 400 }, { "epoch": 4.424778761061947, "grad_norm": 1.7749208211898804, "learning_rate": 0.00029957932011331445, "loss": 1.1309, "step": 500 }, { "epoch": 4.424778761061947, "eval_loss": 0.37851694226264954, "eval_runtime": 19.1098, "eval_samples_per_second": 10.884, "eval_steps_per_second": 2.721, "eval_wer": 0.4583333333333333, "step": 500 }, { "epoch": 5.3097345132743365, "grad_norm": 1.853244662284851, "learning_rate": 0.0002994730878186969, "loss": 1.0283, "step": 600 }, { "epoch": 5.3097345132743365, "eval_loss": 0.37413156032562256, "eval_runtime": 18.6437, "eval_samples_per_second": 11.157, "eval_steps_per_second": 2.789, "eval_wer": 0.4476851851851852, "step": 600 }, { "epoch": 6.1946902654867255, "grad_norm": 1.4990218877792358, "learning_rate": 0.0002993668555240793, "loss": 1.0148, "step": 700 }, { "epoch": 6.1946902654867255, "eval_loss": 0.36694276332855225, "eval_runtime": 18.6699, "eval_samples_per_second": 11.141, "eval_steps_per_second": 2.785, "eval_wer": 0.44027777777777777, "step": 700 }, { "epoch": 7.079646017699115, "grad_norm": 2.7431230545043945, "learning_rate": 0.0002992606232294617, "loss": 0.9961, "step": 800 }, { "epoch": 7.079646017699115, "eval_loss": 0.36071425676345825, "eval_runtime": 18.6525, "eval_samples_per_second": 11.151, "eval_steps_per_second": 2.788, "eval_wer": 0.4356481481481482, "step": 800 }, { "epoch": 7.964601769911504, "grad_norm": 2.0985348224639893, "learning_rate": 0.00029915439093484416, "loss": 0.9248, "step": 900 }, { "epoch": 7.964601769911504, "eval_loss": 0.3580877482891083, "eval_runtime": 18.871, "eval_samples_per_second": 11.022, "eval_steps_per_second": 2.756, "eval_wer": 0.4236111111111111, "step": 900 }, { "epoch": 8.849557522123893, "grad_norm": 1.3972795009613037, "learning_rate": 0.0002990481586402266, "loss": 0.9482, "step": 1000 }, { "epoch": 8.849557522123893, "eval_loss": 0.3462725281715393, "eval_runtime": 18.815, "eval_samples_per_second": 11.055, "eval_steps_per_second": 2.764, "eval_wer": 0.4356481481481482, "step": 1000 }, { "epoch": 9.734513274336283, "grad_norm": 3.017667293548584, "learning_rate": 0.00029894192634560905, "loss": 0.8815, "step": 1100 }, { "epoch": 9.734513274336283, "eval_loss": 0.3487873673439026, "eval_runtime": 18.8526, "eval_samples_per_second": 11.033, "eval_steps_per_second": 2.758, "eval_wer": 0.4273148148148148, "step": 1100 }, { "epoch": 10.619469026548673, "grad_norm": 1.357649803161621, "learning_rate": 0.0002988356940509915, "loss": 0.8209, "step": 1200 }, { "epoch": 10.619469026548673, "eval_loss": 0.33840110898017883, "eval_runtime": 18.6886, "eval_samples_per_second": 11.13, "eval_steps_per_second": 2.782, "eval_wer": 0.4, "step": 1200 }, { "epoch": 11.504424778761061, "grad_norm": 1.8951735496520996, "learning_rate": 0.00029872946175637393, "loss": 0.8754, "step": 1300 }, { "epoch": 11.504424778761061, "eval_loss": 0.3459264636039734, "eval_runtime": 18.8754, "eval_samples_per_second": 11.02, "eval_steps_per_second": 2.755, "eval_wer": 0.4050925925925926, "step": 1300 }, { "epoch": 12.389380530973451, "grad_norm": 1.3216720819473267, "learning_rate": 0.0002986232294617563, "loss": 0.8454, "step": 1400 }, { "epoch": 12.389380530973451, "eval_loss": 0.33166107535362244, "eval_runtime": 18.7736, "eval_samples_per_second": 11.079, "eval_steps_per_second": 2.77, "eval_wer": 0.38842592592592595, "step": 1400 }, { "epoch": 13.274336283185841, "grad_norm": 2.39943528175354, "learning_rate": 0.00029851699716713876, "loss": 0.8164, "step": 1500 }, { "epoch": 13.274336283185841, "eval_loss": 0.33193060755729675, "eval_runtime": 18.8153, "eval_samples_per_second": 11.055, "eval_steps_per_second": 2.764, "eval_wer": 0.40324074074074073, "step": 1500 }, { "epoch": 14.15929203539823, "grad_norm": 6.335964202880859, "learning_rate": 0.00029841076487252126, "loss": 0.7673, "step": 1600 }, { "epoch": 14.15929203539823, "eval_loss": 0.33113545179367065, "eval_runtime": 18.711, "eval_samples_per_second": 11.116, "eval_steps_per_second": 2.779, "eval_wer": 0.3921296296296296, "step": 1600 }, { "epoch": 15.044247787610619, "grad_norm": 1.1695411205291748, "learning_rate": 0.00029830559490084984, "loss": 0.7953, "step": 1700 }, { "epoch": 15.044247787610619, "eval_loss": 0.33329564332962036, "eval_runtime": 18.8623, "eval_samples_per_second": 11.027, "eval_steps_per_second": 2.757, "eval_wer": 0.39444444444444443, "step": 1700 }, { "epoch": 15.929203539823009, "grad_norm": 13.718667030334473, "learning_rate": 0.0002981993626062323, "loss": 0.7527, "step": 1800 }, { "epoch": 15.929203539823009, "eval_loss": 0.3312545120716095, "eval_runtime": 18.8046, "eval_samples_per_second": 11.061, "eval_steps_per_second": 2.765, "eval_wer": 0.39166666666666666, "step": 1800 }, { "epoch": 16.8141592920354, "grad_norm": 1.9348554611206055, "learning_rate": 0.0002980931303116147, "loss": 0.763, "step": 1900 }, { "epoch": 16.8141592920354, "eval_loss": 0.3277539610862732, "eval_runtime": 18.7599, "eval_samples_per_second": 11.087, "eval_steps_per_second": 2.772, "eval_wer": 0.39305555555555555, "step": 1900 }, { "epoch": 17.699115044247787, "grad_norm": 18.476669311523438, "learning_rate": 0.0002979868980169971, "loss": 0.7319, "step": 2000 }, { "epoch": 17.699115044247787, "eval_loss": 0.32343700528144836, "eval_runtime": 18.8597, "eval_samples_per_second": 11.029, "eval_steps_per_second": 2.757, "eval_wer": 0.37546296296296294, "step": 2000 }, { "epoch": 18.58407079646018, "grad_norm": 2.2555744647979736, "learning_rate": 0.00029788066572237955, "loss": 0.7352, "step": 2100 }, { "epoch": 18.58407079646018, "eval_loss": 0.3248392343521118, "eval_runtime": 18.8863, "eval_samples_per_second": 11.013, "eval_steps_per_second": 2.753, "eval_wer": 0.38055555555555554, "step": 2100 }, { "epoch": 19.469026548672566, "grad_norm": 2.2022745609283447, "learning_rate": 0.00029777443342776205, "loss": 0.7017, "step": 2200 }, { "epoch": 19.469026548672566, "eval_loss": 0.3333507776260376, "eval_runtime": 18.7742, "eval_samples_per_second": 11.079, "eval_steps_per_second": 2.77, "eval_wer": 0.3851851851851852, "step": 2200 }, { "epoch": 20.353982300884955, "grad_norm": 7.710162162780762, "learning_rate": 0.00029766820113314444, "loss": 0.6902, "step": 2300 }, { "epoch": 20.353982300884955, "eval_loss": 0.330443412065506, "eval_runtime": 18.756, "eval_samples_per_second": 11.09, "eval_steps_per_second": 2.772, "eval_wer": 0.3888888888888889, "step": 2300 }, { "epoch": 21.238938053097346, "grad_norm": 3.896944522857666, "learning_rate": 0.0002975619688385269, "loss": 0.707, "step": 2400 }, { "epoch": 21.238938053097346, "eval_loss": 0.3313958942890167, "eval_runtime": 18.7923, "eval_samples_per_second": 11.068, "eval_steps_per_second": 2.767, "eval_wer": 0.38564814814814813, "step": 2400 }, { "epoch": 21.238938053097346, "step": 2400, "total_flos": 1.5969122077347269e+19, "train_loss": 1.5763085651397706, "train_runtime": 3424.9125, "train_samples_per_second": 657.681, "train_steps_per_second": 82.484 } ], "logging_steps": 100, "max_steps": 282500, "num_input_tokens_seen": 0, "num_train_epochs": 2500, "save_steps": 400, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 4, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 4 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.5969122077347269e+19, "train_batch_size": 4, "trial_name": null, "trial_params": null }