{ "best_metric": 48.88501742160279, "best_model_checkpoint": "./whisper-small-ar/checkpoint-60", "epoch": 34.285714285714285, "eval_steps": 10, "global_step": 60, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.5714285714285714, "grad_norm": 58.168914794921875, "learning_rate": 1e-07, "loss": 2.3005, "step": 1 }, { "epoch": 1.1428571428571428, "grad_norm": 20.130443572998047, "learning_rate": 1.0080000000000002e-05, "loss": 1.829, "step": 2 }, { "epoch": 1.7142857142857144, "grad_norm": 21.1949405670166, "learning_rate": 2.006e-05, "loss": 1.8278, "step": 3 }, { "epoch": 2.2857142857142856, "grad_norm": 17.402198791503906, "learning_rate": 3.0039999999999997e-05, "loss": 1.6203, "step": 4 }, { "epoch": 2.857142857142857, "grad_norm": 8.070812225341797, "learning_rate": 4.0020000000000006e-05, "loss": 1.3838, "step": 5 }, { "epoch": 3.4285714285714284, "grad_norm": 10.189924240112305, "learning_rate": 5e-05, "loss": 1.2624, "step": 6 }, { "epoch": 4.0, "grad_norm": 11.481833457946777, "learning_rate": 4.9744102564102566e-05, "loss": 1.1801, "step": 7 }, { "epoch": 4.571428571428571, "grad_norm": 8.908258438110352, "learning_rate": 4.948820512820513e-05, "loss": 1.036, "step": 8 }, { "epoch": 5.142857142857143, "grad_norm": 6.949155807495117, "learning_rate": 4.92323076923077e-05, "loss": 0.9218, "step": 9 }, { "epoch": 5.714285714285714, "grad_norm": 5.213706016540527, "learning_rate": 4.897641025641026e-05, "loss": 0.821, "step": 10 }, { "epoch": 5.714285714285714, "eval_loss": 1.0209691524505615, "eval_runtime": 36.4465, "eval_samples_per_second": 2.771, "eval_steps_per_second": 0.192, "eval_wer": 93.58885017421603, "step": 10 }, { "epoch": 6.285714285714286, "grad_norm": 5.321272850036621, "learning_rate": 4.872051282051282e-05, "loss": 0.7761, "step": 11 }, { "epoch": 6.857142857142857, "grad_norm": 4.693880081176758, "learning_rate": 4.8464615384615386e-05, "loss": 0.7099, "step": 12 }, { "epoch": 7.428571428571429, "grad_norm": 4.722415924072266, "learning_rate": 4.820871794871795e-05, "loss": 0.6289, "step": 13 }, { "epoch": 8.0, "grad_norm": 3.9184811115264893, "learning_rate": 4.795282051282052e-05, "loss": 0.5725, "step": 14 }, { "epoch": 8.571428571428571, "grad_norm": 3.7839274406433105, "learning_rate": 4.7696923076923084e-05, "loss": 0.5235, "step": 15 }, { "epoch": 9.142857142857142, "grad_norm": 4.117933750152588, "learning_rate": 4.744102564102564e-05, "loss": 0.4812, "step": 16 }, { "epoch": 9.714285714285714, "grad_norm": 3.781254529953003, "learning_rate": 4.7185128205128205e-05, "loss": 0.4397, "step": 17 }, { "epoch": 10.285714285714286, "grad_norm": 3.4398269653320312, "learning_rate": 4.692923076923077e-05, "loss": 0.3749, "step": 18 }, { "epoch": 10.857142857142858, "grad_norm": 3.5433449745178223, "learning_rate": 4.667333333333334e-05, "loss": 0.3506, "step": 19 }, { "epoch": 11.428571428571429, "grad_norm": 3.3172426223754883, "learning_rate": 4.64174358974359e-05, "loss": 0.3185, "step": 20 }, { "epoch": 11.428571428571429, "eval_loss": 0.83645099401474, "eval_runtime": 27.2985, "eval_samples_per_second": 3.7, "eval_steps_per_second": 0.256, "eval_wer": 109.26829268292684, "step": 20 }, { "epoch": 12.0, "grad_norm": 3.2160873413085938, "learning_rate": 4.616153846153847e-05, "loss": 0.2779, "step": 21 }, { "epoch": 12.571428571428571, "grad_norm": 3.265079975128174, "learning_rate": 4.5905641025641024e-05, "loss": 0.2553, "step": 22 }, { "epoch": 13.142857142857142, "grad_norm": 3.1832408905029297, "learning_rate": 4.564974358974359e-05, "loss": 0.2157, "step": 23 }, { "epoch": 13.714285714285714, "grad_norm": 2.9767119884490967, "learning_rate": 4.539384615384616e-05, "loss": 0.1921, "step": 24 }, { "epoch": 14.285714285714286, "grad_norm": 2.942561388015747, "learning_rate": 4.513794871794872e-05, "loss": 0.1741, "step": 25 }, { "epoch": 14.857142857142858, "grad_norm": 2.837989091873169, "learning_rate": 4.4882051282051286e-05, "loss": 0.1422, "step": 26 }, { "epoch": 15.428571428571429, "grad_norm": 2.894479513168335, "learning_rate": 4.462615384615385e-05, "loss": 0.1257, "step": 27 }, { "epoch": 16.0, "grad_norm": 2.841754198074341, "learning_rate": 4.437025641025641e-05, "loss": 0.1039, "step": 28 }, { "epoch": 16.571428571428573, "grad_norm": 2.616755485534668, "learning_rate": 4.411435897435898e-05, "loss": 0.0847, "step": 29 }, { "epoch": 17.142857142857142, "grad_norm": 2.05346417427063, "learning_rate": 4.385846153846154e-05, "loss": 0.0606, "step": 30 }, { "epoch": 17.142857142857142, "eval_loss": 0.8186278939247131, "eval_runtime": 26.7925, "eval_samples_per_second": 3.77, "eval_steps_per_second": 0.261, "eval_wer": 54.843205574912886, "step": 30 }, { "epoch": 17.714285714285715, "grad_norm": 1.4241214990615845, "learning_rate": 4.3602564102564106e-05, "loss": 0.051, "step": 31 }, { "epoch": 18.285714285714285, "grad_norm": 0.806209921836853, "learning_rate": 4.334666666666667e-05, "loss": 0.0336, "step": 32 }, { "epoch": 18.857142857142858, "grad_norm": 0.7847088575363159, "learning_rate": 4.309076923076923e-05, "loss": 0.0282, "step": 33 }, { "epoch": 19.428571428571427, "grad_norm": 0.9520515203475952, "learning_rate": 4.28348717948718e-05, "loss": 0.0204, "step": 34 }, { "epoch": 20.0, "grad_norm": 0.37412479519844055, "learning_rate": 4.257897435897436e-05, "loss": 0.0163, "step": 35 }, { "epoch": 20.571428571428573, "grad_norm": 0.3346174657344818, "learning_rate": 4.2323076923076925e-05, "loss": 0.0122, "step": 36 }, { "epoch": 21.142857142857142, "grad_norm": 0.23975922167301178, "learning_rate": 4.206717948717949e-05, "loss": 0.0105, "step": 37 }, { "epoch": 21.714285714285715, "grad_norm": 0.2158885896205902, "learning_rate": 4.181128205128205e-05, "loss": 0.0078, "step": 38 }, { "epoch": 22.285714285714285, "grad_norm": 0.15689243376255035, "learning_rate": 4.155538461538462e-05, "loss": 0.0065, "step": 39 }, { "epoch": 22.857142857142858, "grad_norm": 0.10141926258802414, "learning_rate": 4.129948717948718e-05, "loss": 0.0056, "step": 40 }, { "epoch": 22.857142857142858, "eval_loss": 0.9213815927505493, "eval_runtime": 26.8113, "eval_samples_per_second": 3.767, "eval_steps_per_second": 0.261, "eval_wer": 50.139372822299656, "step": 40 }, { "epoch": 23.428571428571427, "grad_norm": 0.07463113218545914, "learning_rate": 4.1043589743589744e-05, "loss": 0.0047, "step": 41 }, { "epoch": 24.0, "grad_norm": 0.09193433821201324, "learning_rate": 4.078769230769231e-05, "loss": 0.0041, "step": 42 }, { "epoch": 24.571428571428573, "grad_norm": 0.06787554919719696, "learning_rate": 4.053179487179487e-05, "loss": 0.0035, "step": 43 }, { "epoch": 25.142857142857142, "grad_norm": 0.07034426182508469, "learning_rate": 4.027589743589744e-05, "loss": 0.0031, "step": 44 }, { "epoch": 25.714285714285715, "grad_norm": 0.06088101118803024, "learning_rate": 4.0020000000000006e-05, "loss": 0.0028, "step": 45 }, { "epoch": 26.285714285714285, "grad_norm": 0.03634655103087425, "learning_rate": 3.9764102564102564e-05, "loss": 0.0024, "step": 46 }, { "epoch": 26.857142857142858, "grad_norm": 0.13105067610740662, "learning_rate": 3.950820512820513e-05, "loss": 0.0022, "step": 47 }, { "epoch": 27.428571428571427, "grad_norm": 0.02541457489132881, "learning_rate": 3.925230769230769e-05, "loss": 0.002, "step": 48 }, { "epoch": 28.0, "grad_norm": 0.04503984376788139, "learning_rate": 3.899641025641026e-05, "loss": 0.0017, "step": 49 }, { "epoch": 28.571428571428573, "grad_norm": 0.08539522439241409, "learning_rate": 3.8740512820512826e-05, "loss": 0.0019, "step": 50 }, { "epoch": 28.571428571428573, "eval_loss": 1.0016592741012573, "eval_runtime": 26.5804, "eval_samples_per_second": 3.8, "eval_steps_per_second": 0.263, "eval_wer": 50.87108013937283, "step": 50 }, { "epoch": 29.142857142857142, "grad_norm": 0.06402863562107086, "learning_rate": 3.848461538461539e-05, "loss": 0.0017, "step": 51 }, { "epoch": 29.714285714285715, "grad_norm": 0.016784947365522385, "learning_rate": 3.822871794871795e-05, "loss": 0.0013, "step": 52 }, { "epoch": 30.285714285714285, "grad_norm": 0.030725400894880295, "learning_rate": 3.797282051282051e-05, "loss": 0.0013, "step": 53 }, { "epoch": 30.857142857142858, "grad_norm": 0.015844004228711128, "learning_rate": 3.771692307692308e-05, "loss": 0.0011, "step": 54 }, { "epoch": 31.428571428571427, "grad_norm": 0.015942782163619995, "learning_rate": 3.7461025641025645e-05, "loss": 0.0011, "step": 55 }, { "epoch": 32.0, "grad_norm": 0.18212947249412537, "learning_rate": 3.720512820512821e-05, "loss": 0.0012, "step": 56 }, { "epoch": 32.57142857142857, "grad_norm": 0.016346724703907967, "learning_rate": 3.694923076923077e-05, "loss": 0.0009, "step": 57 }, { "epoch": 33.142857142857146, "grad_norm": 0.010927367024123669, "learning_rate": 3.669333333333333e-05, "loss": 0.0009, "step": 58 }, { "epoch": 33.714285714285715, "grad_norm": 0.013612424023449421, "learning_rate": 3.64374358974359e-05, "loss": 0.0008, "step": 59 }, { "epoch": 34.285714285714285, "grad_norm": 0.10505015403032303, "learning_rate": 3.6181538461538464e-05, "loss": 0.0009, "step": 60 }, { "epoch": 34.285714285714285, "eval_loss": 1.0416876077651978, "eval_runtime": 26.3087, "eval_samples_per_second": 3.839, "eval_steps_per_second": 0.266, "eval_wer": 48.88501742160279, "step": 60 } ], "logging_steps": 1, "max_steps": 200, "num_input_tokens_seen": 0, "num_train_epochs": 200, "save_steps": 10, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 4.432671678450893e+18, "train_batch_size": 16, "trial_name": null, "trial_params": null }