|
{ |
|
"best_metric": 0.3741886615753174, |
|
"best_model_checkpoint": "/scratch/skscla001/speech/results/mms-1b-toigen-balanced-model/checkpoint-2400", |
|
"epoch": 12.053571428571429, |
|
"eval_steps": 100, |
|
"global_step": 2700, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.44642857142857145, |
|
"grad_norm": 3.282467842102051, |
|
"learning_rate": 0.00028199999999999997, |
|
"loss": 7.7726, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.44642857142857145, |
|
"eval_loss": 3.8109493255615234, |
|
"eval_runtime": 17.8235, |
|
"eval_samples_per_second": 11.446, |
|
"eval_steps_per_second": 2.861, |
|
"eval_wer": 0.9938476100331283, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.8928571428571429, |
|
"grad_norm": 2.164923667907715, |
|
"learning_rate": 0.00029574018126888213, |
|
"loss": 2.5726, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.8928571428571429, |
|
"eval_loss": 0.8106288313865662, |
|
"eval_runtime": 17.6933, |
|
"eval_samples_per_second": 11.53, |
|
"eval_steps_per_second": 2.882, |
|
"eval_wer": 0.616658778987222, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.3392857142857144, |
|
"grad_norm": 1.3534202575683594, |
|
"learning_rate": 0.0002912084592145015, |
|
"loss": 0.7986, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.3392857142857144, |
|
"eval_loss": 0.5409455299377441, |
|
"eval_runtime": 17.7815, |
|
"eval_samples_per_second": 11.473, |
|
"eval_steps_per_second": 2.868, |
|
"eval_wer": 0.5257927117841931, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.7857142857142856, |
|
"grad_norm": 0.9854668378829956, |
|
"learning_rate": 0.00028667673716012085, |
|
"loss": 0.6324, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.7857142857142856, |
|
"eval_loss": 0.5256258845329285, |
|
"eval_runtime": 17.7084, |
|
"eval_samples_per_second": 11.52, |
|
"eval_steps_per_second": 2.88, |
|
"eval_wer": 0.5054424988168481, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.232142857142857, |
|
"grad_norm": 18.835981369018555, |
|
"learning_rate": 0.00028214501510574015, |
|
"loss": 0.603, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.232142857142857, |
|
"eval_loss": 0.4854464828968048, |
|
"eval_runtime": 17.7046, |
|
"eval_samples_per_second": 11.522, |
|
"eval_steps_per_second": 2.881, |
|
"eval_wer": 0.4831992427827733, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.678571428571429, |
|
"grad_norm": 32.54256820678711, |
|
"learning_rate": 0.0002776132930513595, |
|
"loss": 0.59, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.678571428571429, |
|
"eval_loss": 0.47332894802093506, |
|
"eval_runtime": 17.6431, |
|
"eval_samples_per_second": 11.563, |
|
"eval_steps_per_second": 2.891, |
|
"eval_wer": 0.4846190250828206, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 3.125, |
|
"grad_norm": 1.3252086639404297, |
|
"learning_rate": 0.0002730815709969788, |
|
"loss": 0.5489, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 3.125, |
|
"eval_loss": 0.4439888894557953, |
|
"eval_runtime": 17.8297, |
|
"eval_samples_per_second": 11.442, |
|
"eval_steps_per_second": 2.86, |
|
"eval_wer": 0.46568859441552296, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 3.571428571428571, |
|
"grad_norm": 0.5452375411987305, |
|
"learning_rate": 0.00026854984894259817, |
|
"loss": 0.5173, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 3.571428571428571, |
|
"eval_loss": 0.43219566345214844, |
|
"eval_runtime": 17.6382, |
|
"eval_samples_per_second": 11.566, |
|
"eval_steps_per_second": 2.891, |
|
"eval_wer": 0.45764316138192146, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 4.017857142857143, |
|
"grad_norm": 0.7151035070419312, |
|
"learning_rate": 0.0002640181268882175, |
|
"loss": 0.5315, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 4.017857142857143, |
|
"eval_loss": 0.4285721480846405, |
|
"eval_runtime": 17.7542, |
|
"eval_samples_per_second": 11.49, |
|
"eval_steps_per_second": 2.873, |
|
"eval_wer": 0.44533838144817794, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 4.464285714285714, |
|
"grad_norm": 1.8268319368362427, |
|
"learning_rate": 0.0002594864048338368, |
|
"loss": 0.4912, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.464285714285714, |
|
"eval_loss": 0.42536306381225586, |
|
"eval_runtime": 17.765, |
|
"eval_samples_per_second": 11.483, |
|
"eval_steps_per_second": 2.871, |
|
"eval_wer": 0.4458116422148604, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.910714285714286, |
|
"grad_norm": 0.850709080696106, |
|
"learning_rate": 0.0002549546827794562, |
|
"loss": 0.4728, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 4.910714285714286, |
|
"eval_loss": 0.43455594778060913, |
|
"eval_runtime": 17.7563, |
|
"eval_samples_per_second": 11.489, |
|
"eval_steps_per_second": 2.872, |
|
"eval_wer": 0.44297207761476576, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 5.357142857142857, |
|
"grad_norm": 0.7361202836036682, |
|
"learning_rate": 0.00025042296072507554, |
|
"loss": 0.4989, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 5.357142857142857, |
|
"eval_loss": 0.40502411127090454, |
|
"eval_runtime": 17.6139, |
|
"eval_samples_per_second": 11.582, |
|
"eval_steps_per_second": 2.895, |
|
"eval_wer": 0.42924751538097494, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 5.803571428571429, |
|
"grad_norm": 1.305498719215393, |
|
"learning_rate": 0.00024589123867069484, |
|
"loss": 0.4661, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 5.803571428571429, |
|
"eval_loss": 0.4019148647785187, |
|
"eval_runtime": 17.792, |
|
"eval_samples_per_second": 11.466, |
|
"eval_steps_per_second": 2.866, |
|
"eval_wer": 0.4254614292475154, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 6.25, |
|
"grad_norm": 1.875386357307434, |
|
"learning_rate": 0.00024135951661631417, |
|
"loss": 0.4755, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 6.25, |
|
"eval_loss": 0.4128676652908325, |
|
"eval_runtime": 17.7386, |
|
"eval_samples_per_second": 11.5, |
|
"eval_steps_per_second": 2.875, |
|
"eval_wer": 0.44486512068149553, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 6.696428571428571, |
|
"grad_norm": 1.3318761587142944, |
|
"learning_rate": 0.0002368277945619335, |
|
"loss": 0.4603, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 6.696428571428571, |
|
"eval_loss": 0.40455254912376404, |
|
"eval_runtime": 17.8115, |
|
"eval_samples_per_second": 11.453, |
|
"eval_steps_per_second": 2.863, |
|
"eval_wer": 0.4254614292475154, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 7.142857142857143, |
|
"grad_norm": 1.7303593158721924, |
|
"learning_rate": 0.00023229607250755283, |
|
"loss": 0.4229, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 7.142857142857143, |
|
"eval_loss": 0.3939039707183838, |
|
"eval_runtime": 17.7505, |
|
"eval_samples_per_second": 11.493, |
|
"eval_steps_per_second": 2.873, |
|
"eval_wer": 0.41504969238050166, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 7.589285714285714, |
|
"grad_norm": 0.9812105894088745, |
|
"learning_rate": 0.0002277643504531722, |
|
"loss": 0.455, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 7.589285714285714, |
|
"eval_loss": 0.41328728199005127, |
|
"eval_runtime": 17.7719, |
|
"eval_samples_per_second": 11.479, |
|
"eval_steps_per_second": 2.87, |
|
"eval_wer": 0.41552295314718407, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 8.035714285714286, |
|
"grad_norm": 0.6829022765159607, |
|
"learning_rate": 0.00022323262839879152, |
|
"loss": 0.4501, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 8.035714285714286, |
|
"eval_loss": 0.3978167176246643, |
|
"eval_runtime": 17.743, |
|
"eval_samples_per_second": 11.497, |
|
"eval_steps_per_second": 2.874, |
|
"eval_wer": 0.4065309985802177, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 8.482142857142858, |
|
"grad_norm": 1.3150678873062134, |
|
"learning_rate": 0.00021870090634441088, |
|
"loss": 0.45, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 8.482142857142858, |
|
"eval_loss": 0.3925248682498932, |
|
"eval_runtime": 17.6795, |
|
"eval_samples_per_second": 11.539, |
|
"eval_steps_per_second": 2.885, |
|
"eval_wer": 0.42309512541410316, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 8.928571428571429, |
|
"grad_norm": 1.0976217985153198, |
|
"learning_rate": 0.0002141691842900302, |
|
"loss": 0.4226, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 8.928571428571429, |
|
"eval_loss": 0.3901020586490631, |
|
"eval_runtime": 17.7714, |
|
"eval_samples_per_second": 11.479, |
|
"eval_steps_per_second": 2.87, |
|
"eval_wer": 0.4098438239469948, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 9.375, |
|
"grad_norm": 0.727407693862915, |
|
"learning_rate": 0.00020963746223564954, |
|
"loss": 0.3973, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 9.375, |
|
"eval_loss": 0.38098010420799255, |
|
"eval_runtime": 17.825, |
|
"eval_samples_per_second": 11.445, |
|
"eval_steps_per_second": 2.861, |
|
"eval_wer": 0.4055844770468528, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 9.821428571428571, |
|
"grad_norm": 2.031233072280884, |
|
"learning_rate": 0.00020510574018126884, |
|
"loss": 0.4038, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 9.821428571428571, |
|
"eval_loss": 0.41775575280189514, |
|
"eval_runtime": 17.8559, |
|
"eval_samples_per_second": 11.425, |
|
"eval_steps_per_second": 2.856, |
|
"eval_wer": 0.4117368670137246, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 10.267857142857142, |
|
"grad_norm": 1.3557627201080322, |
|
"learning_rate": 0.0002005740181268882, |
|
"loss": 0.4559, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 10.267857142857142, |
|
"eval_loss": 0.38752201199531555, |
|
"eval_runtime": 17.7454, |
|
"eval_samples_per_second": 11.496, |
|
"eval_steps_per_second": 2.874, |
|
"eval_wer": 0.40747752011358257, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 10.714285714285714, |
|
"grad_norm": 0.7480702996253967, |
|
"learning_rate": 0.00019604229607250753, |
|
"loss": 0.4399, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 10.714285714285714, |
|
"eval_loss": 0.3741886615753174, |
|
"eval_runtime": 17.7283, |
|
"eval_samples_per_second": 11.507, |
|
"eval_steps_per_second": 2.877, |
|
"eval_wer": 0.39895882631329865, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 11.160714285714286, |
|
"grad_norm": 1.065514087677002, |
|
"learning_rate": 0.00019151057401812688, |
|
"loss": 0.3545, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 11.160714285714286, |
|
"eval_loss": 0.38181087374687195, |
|
"eval_runtime": 17.8745, |
|
"eval_samples_per_second": 11.413, |
|
"eval_steps_per_second": 2.853, |
|
"eval_wer": 0.40132513014671084, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 11.607142857142858, |
|
"grad_norm": 1.19502854347229, |
|
"learning_rate": 0.0001869788519637462, |
|
"loss": 0.4452, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 11.607142857142858, |
|
"eval_loss": 0.3905617594718933, |
|
"eval_runtime": 17.8748, |
|
"eval_samples_per_second": 11.413, |
|
"eval_steps_per_second": 2.853, |
|
"eval_wer": 0.39801230477993377, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 12.053571428571429, |
|
"grad_norm": 0.8653120994567871, |
|
"learning_rate": 0.00018244712990936554, |
|
"loss": 0.4014, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 12.053571428571429, |
|
"eval_loss": 0.3751629889011383, |
|
"eval_runtime": 17.7431, |
|
"eval_samples_per_second": 11.497, |
|
"eval_steps_per_second": 2.874, |
|
"eval_wer": 0.39990534784666354, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 12.053571428571429, |
|
"step": 2700, |
|
"total_flos": 9.163482510982138e+18, |
|
"train_loss": 0.8344545293737341, |
|
"train_runtime": 2390.0152, |
|
"train_samples_per_second": 11.222, |
|
"train_steps_per_second": 2.812 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 6720, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 30, |
|
"save_steps": 400, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 3, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 9.163482510982138e+18, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|