{ "best_metric": 0.3741886615753174, "best_model_checkpoint": "/scratch/skscla001/speech/results/mms-1b-toigen-balanced-model/checkpoint-2400", "epoch": 12.053571428571429, "eval_steps": 100, "global_step": 2700, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.44642857142857145, "grad_norm": 3.282467842102051, "learning_rate": 0.00028199999999999997, "loss": 7.7726, "step": 100 }, { "epoch": 0.44642857142857145, "eval_loss": 3.8109493255615234, "eval_runtime": 17.8235, "eval_samples_per_second": 11.446, "eval_steps_per_second": 2.861, "eval_wer": 0.9938476100331283, "step": 100 }, { "epoch": 0.8928571428571429, "grad_norm": 2.164923667907715, "learning_rate": 0.00029574018126888213, "loss": 2.5726, "step": 200 }, { "epoch": 0.8928571428571429, "eval_loss": 0.8106288313865662, "eval_runtime": 17.6933, "eval_samples_per_second": 11.53, "eval_steps_per_second": 2.882, "eval_wer": 0.616658778987222, "step": 200 }, { "epoch": 1.3392857142857144, "grad_norm": 1.3534202575683594, "learning_rate": 0.0002912084592145015, "loss": 0.7986, "step": 300 }, { "epoch": 1.3392857142857144, "eval_loss": 0.5409455299377441, "eval_runtime": 17.7815, "eval_samples_per_second": 11.473, "eval_steps_per_second": 2.868, "eval_wer": 0.5257927117841931, "step": 300 }, { "epoch": 1.7857142857142856, "grad_norm": 0.9854668378829956, "learning_rate": 0.00028667673716012085, "loss": 0.6324, "step": 400 }, { "epoch": 1.7857142857142856, "eval_loss": 0.5256258845329285, "eval_runtime": 17.7084, "eval_samples_per_second": 11.52, "eval_steps_per_second": 2.88, "eval_wer": 0.5054424988168481, "step": 400 }, { "epoch": 2.232142857142857, "grad_norm": 18.835981369018555, "learning_rate": 0.00028214501510574015, "loss": 0.603, "step": 500 }, { "epoch": 2.232142857142857, "eval_loss": 0.4854464828968048, "eval_runtime": 17.7046, "eval_samples_per_second": 11.522, "eval_steps_per_second": 2.881, "eval_wer": 0.4831992427827733, "step": 500 }, { "epoch": 2.678571428571429, "grad_norm": 32.54256820678711, "learning_rate": 0.0002776132930513595, "loss": 0.59, "step": 600 }, { "epoch": 2.678571428571429, "eval_loss": 0.47332894802093506, "eval_runtime": 17.6431, "eval_samples_per_second": 11.563, "eval_steps_per_second": 2.891, "eval_wer": 0.4846190250828206, "step": 600 }, { "epoch": 3.125, "grad_norm": 1.3252086639404297, "learning_rate": 0.0002730815709969788, "loss": 0.5489, "step": 700 }, { "epoch": 3.125, "eval_loss": 0.4439888894557953, "eval_runtime": 17.8297, "eval_samples_per_second": 11.442, "eval_steps_per_second": 2.86, "eval_wer": 0.46568859441552296, "step": 700 }, { "epoch": 3.571428571428571, "grad_norm": 0.5452375411987305, "learning_rate": 0.00026854984894259817, "loss": 0.5173, "step": 800 }, { "epoch": 3.571428571428571, "eval_loss": 0.43219566345214844, "eval_runtime": 17.6382, "eval_samples_per_second": 11.566, "eval_steps_per_second": 2.891, "eval_wer": 0.45764316138192146, "step": 800 }, { "epoch": 4.017857142857143, "grad_norm": 0.7151035070419312, "learning_rate": 0.0002640181268882175, "loss": 0.5315, "step": 900 }, { "epoch": 4.017857142857143, "eval_loss": 0.4285721480846405, "eval_runtime": 17.7542, "eval_samples_per_second": 11.49, "eval_steps_per_second": 2.873, "eval_wer": 0.44533838144817794, "step": 900 }, { "epoch": 4.464285714285714, "grad_norm": 1.8268319368362427, "learning_rate": 0.0002594864048338368, "loss": 0.4912, "step": 1000 }, { "epoch": 4.464285714285714, "eval_loss": 0.42536306381225586, "eval_runtime": 17.765, "eval_samples_per_second": 11.483, "eval_steps_per_second": 2.871, "eval_wer": 0.4458116422148604, "step": 1000 }, { "epoch": 4.910714285714286, "grad_norm": 0.850709080696106, "learning_rate": 0.0002549546827794562, "loss": 0.4728, "step": 1100 }, { "epoch": 4.910714285714286, "eval_loss": 0.43455594778060913, "eval_runtime": 17.7563, "eval_samples_per_second": 11.489, "eval_steps_per_second": 2.872, "eval_wer": 0.44297207761476576, "step": 1100 }, { "epoch": 5.357142857142857, "grad_norm": 0.7361202836036682, "learning_rate": 0.00025042296072507554, "loss": 0.4989, "step": 1200 }, { "epoch": 5.357142857142857, "eval_loss": 0.40502411127090454, "eval_runtime": 17.6139, "eval_samples_per_second": 11.582, "eval_steps_per_second": 2.895, "eval_wer": 0.42924751538097494, "step": 1200 }, { "epoch": 5.803571428571429, "grad_norm": 1.305498719215393, "learning_rate": 0.00024589123867069484, "loss": 0.4661, "step": 1300 }, { "epoch": 5.803571428571429, "eval_loss": 0.4019148647785187, "eval_runtime": 17.792, "eval_samples_per_second": 11.466, "eval_steps_per_second": 2.866, "eval_wer": 0.4254614292475154, "step": 1300 }, { "epoch": 6.25, "grad_norm": 1.875386357307434, "learning_rate": 0.00024135951661631417, "loss": 0.4755, "step": 1400 }, { "epoch": 6.25, "eval_loss": 0.4128676652908325, "eval_runtime": 17.7386, "eval_samples_per_second": 11.5, "eval_steps_per_second": 2.875, "eval_wer": 0.44486512068149553, "step": 1400 }, { "epoch": 6.696428571428571, "grad_norm": 1.3318761587142944, "learning_rate": 0.0002368277945619335, "loss": 0.4603, "step": 1500 }, { "epoch": 6.696428571428571, "eval_loss": 0.40455254912376404, "eval_runtime": 17.8115, "eval_samples_per_second": 11.453, "eval_steps_per_second": 2.863, "eval_wer": 0.4254614292475154, "step": 1500 }, { "epoch": 7.142857142857143, "grad_norm": 1.7303593158721924, "learning_rate": 0.00023229607250755283, "loss": 0.4229, "step": 1600 }, { "epoch": 7.142857142857143, "eval_loss": 0.3939039707183838, "eval_runtime": 17.7505, "eval_samples_per_second": 11.493, "eval_steps_per_second": 2.873, "eval_wer": 0.41504969238050166, "step": 1600 }, { "epoch": 7.589285714285714, "grad_norm": 0.9812105894088745, "learning_rate": 0.0002277643504531722, "loss": 0.455, "step": 1700 }, { "epoch": 7.589285714285714, "eval_loss": 0.41328728199005127, "eval_runtime": 17.7719, "eval_samples_per_second": 11.479, "eval_steps_per_second": 2.87, "eval_wer": 0.41552295314718407, "step": 1700 }, { "epoch": 8.035714285714286, "grad_norm": 0.6829022765159607, "learning_rate": 0.00022323262839879152, "loss": 0.4501, "step": 1800 }, { "epoch": 8.035714285714286, "eval_loss": 0.3978167176246643, "eval_runtime": 17.743, "eval_samples_per_second": 11.497, "eval_steps_per_second": 2.874, "eval_wer": 0.4065309985802177, "step": 1800 }, { "epoch": 8.482142857142858, "grad_norm": 1.3150678873062134, "learning_rate": 0.00021870090634441088, "loss": 0.45, "step": 1900 }, { "epoch": 8.482142857142858, "eval_loss": 0.3925248682498932, "eval_runtime": 17.6795, "eval_samples_per_second": 11.539, "eval_steps_per_second": 2.885, "eval_wer": 0.42309512541410316, "step": 1900 }, { "epoch": 8.928571428571429, "grad_norm": 1.0976217985153198, "learning_rate": 0.0002141691842900302, "loss": 0.4226, "step": 2000 }, { "epoch": 8.928571428571429, "eval_loss": 0.3901020586490631, "eval_runtime": 17.7714, "eval_samples_per_second": 11.479, "eval_steps_per_second": 2.87, "eval_wer": 0.4098438239469948, "step": 2000 }, { "epoch": 9.375, "grad_norm": 0.727407693862915, "learning_rate": 0.00020963746223564954, "loss": 0.3973, "step": 2100 }, { "epoch": 9.375, "eval_loss": 0.38098010420799255, "eval_runtime": 17.825, "eval_samples_per_second": 11.445, "eval_steps_per_second": 2.861, "eval_wer": 0.4055844770468528, "step": 2100 }, { "epoch": 9.821428571428571, "grad_norm": 2.031233072280884, "learning_rate": 0.00020510574018126884, "loss": 0.4038, "step": 2200 }, { "epoch": 9.821428571428571, "eval_loss": 0.41775575280189514, "eval_runtime": 17.8559, "eval_samples_per_second": 11.425, "eval_steps_per_second": 2.856, "eval_wer": 0.4117368670137246, "step": 2200 }, { "epoch": 10.267857142857142, "grad_norm": 1.3557627201080322, "learning_rate": 0.0002005740181268882, "loss": 0.4559, "step": 2300 }, { "epoch": 10.267857142857142, "eval_loss": 0.38752201199531555, "eval_runtime": 17.7454, "eval_samples_per_second": 11.496, "eval_steps_per_second": 2.874, "eval_wer": 0.40747752011358257, "step": 2300 }, { "epoch": 10.714285714285714, "grad_norm": 0.7480702996253967, "learning_rate": 0.00019604229607250753, "loss": 0.4399, "step": 2400 }, { "epoch": 10.714285714285714, "eval_loss": 0.3741886615753174, "eval_runtime": 17.7283, "eval_samples_per_second": 11.507, "eval_steps_per_second": 2.877, "eval_wer": 0.39895882631329865, "step": 2400 }, { "epoch": 11.160714285714286, "grad_norm": 1.065514087677002, "learning_rate": 0.00019151057401812688, "loss": 0.3545, "step": 2500 }, { "epoch": 11.160714285714286, "eval_loss": 0.38181087374687195, "eval_runtime": 17.8745, "eval_samples_per_second": 11.413, "eval_steps_per_second": 2.853, "eval_wer": 0.40132513014671084, "step": 2500 }, { "epoch": 11.607142857142858, "grad_norm": 1.19502854347229, "learning_rate": 0.0001869788519637462, "loss": 0.4452, "step": 2600 }, { "epoch": 11.607142857142858, "eval_loss": 0.3905617594718933, "eval_runtime": 17.8748, "eval_samples_per_second": 11.413, "eval_steps_per_second": 2.853, "eval_wer": 0.39801230477993377, "step": 2600 }, { "epoch": 12.053571428571429, "grad_norm": 0.8653120994567871, "learning_rate": 0.00018244712990936554, "loss": 0.4014, "step": 2700 }, { "epoch": 12.053571428571429, "eval_loss": 0.3751629889011383, "eval_runtime": 17.7431, "eval_samples_per_second": 11.497, "eval_steps_per_second": 2.874, "eval_wer": 0.39990534784666354, "step": 2700 }, { "epoch": 12.053571428571429, "step": 2700, "total_flos": 9.163482510982138e+18, "train_loss": 0.8344545293737341, "train_runtime": 2390.0152, "train_samples_per_second": 11.222, "train_steps_per_second": 2.812 } ], "logging_steps": 100, "max_steps": 6720, "num_input_tokens_seen": 0, "num_train_epochs": 30, "save_steps": 400, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 3, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 9.163482510982138e+18, "train_batch_size": 4, "trial_name": null, "trial_params": null }