|
{ |
|
"best_metric": 23.09402795425667, |
|
"best_model_checkpoint": "whisper4/checkpoint-130", |
|
"epoch": 8.333333333333334, |
|
"eval_steps": 10, |
|
"global_step": 300, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.1388888888888889, |
|
"grad_norm": 45.38771057128906, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 3.9755, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.2777777777777778, |
|
"grad_norm": 42.970829010009766, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 3.8231, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.2777777777777778, |
|
"eval_loss": 3.7088065147399902, |
|
"eval_runtime": 261.4715, |
|
"eval_samples_per_second": 1.912, |
|
"eval_steps_per_second": 0.241, |
|
"eval_wer": 76.93773824650572, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.4166666666666667, |
|
"grad_norm": 42.34159469604492, |
|
"learning_rate": 3e-06, |
|
"loss": 3.5982, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.5555555555555556, |
|
"grad_norm": 41.4112548828125, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 3.1925, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.5555555555555556, |
|
"eval_loss": 2.9438674449920654, |
|
"eval_runtime": 254.9124, |
|
"eval_samples_per_second": 1.961, |
|
"eval_steps_per_second": 0.247, |
|
"eval_wer": 65.56543837357052, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.6944444444444444, |
|
"grad_norm": 39.323768615722656, |
|
"learning_rate": 5e-06, |
|
"loss": 2.7453, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.8333333333333334, |
|
"grad_norm": 30.337932586669922, |
|
"learning_rate": 6e-06, |
|
"loss": 2.1383, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.8333333333333334, |
|
"eval_loss": 1.722086787223816, |
|
"eval_runtime": 256.1025, |
|
"eval_samples_per_second": 1.952, |
|
"eval_steps_per_second": 0.246, |
|
"eval_wer": 61.53113087674714, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.9722222222222222, |
|
"grad_norm": 18.886234283447266, |
|
"learning_rate": 7.000000000000001e-06, |
|
"loss": 1.4936, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 1.1111111111111112, |
|
"grad_norm": 10.311893463134766, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 1.0671, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.1111111111111112, |
|
"eval_loss": 0.8320145606994629, |
|
"eval_runtime": 254.169, |
|
"eval_samples_per_second": 1.967, |
|
"eval_steps_per_second": 0.248, |
|
"eval_wer": 50.69885641677255, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"grad_norm": 6.484711647033691, |
|
"learning_rate": 9e-06, |
|
"loss": 0.7616, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 1.3888888888888888, |
|
"grad_norm": 5.488741874694824, |
|
"learning_rate": 1e-05, |
|
"loss": 0.6947, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.3888888888888888, |
|
"eval_loss": 0.6587409377098083, |
|
"eval_runtime": 256.6295, |
|
"eval_samples_per_second": 1.948, |
|
"eval_steps_per_second": 0.245, |
|
"eval_wer": 41.01016518424396, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.5277777777777777, |
|
"grad_norm": 5.178290843963623, |
|
"learning_rate": 1.1000000000000001e-05, |
|
"loss": 0.6099, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 1.6666666666666665, |
|
"grad_norm": 5.149004936218262, |
|
"learning_rate": 1.2e-05, |
|
"loss": 0.6263, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.6666666666666665, |
|
"eval_loss": 0.5873834490776062, |
|
"eval_runtime": 254.3491, |
|
"eval_samples_per_second": 1.966, |
|
"eval_steps_per_second": 0.248, |
|
"eval_wer": 29.796696315120712, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.8055555555555556, |
|
"grad_norm": 5.192800521850586, |
|
"learning_rate": 1.3000000000000001e-05, |
|
"loss": 0.5654, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 1.9444444444444444, |
|
"grad_norm": 4.883419036865234, |
|
"learning_rate": 1.4000000000000001e-05, |
|
"loss": 0.5827, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.9444444444444444, |
|
"eval_loss": 0.5402054190635681, |
|
"eval_runtime": 256.4853, |
|
"eval_samples_per_second": 1.949, |
|
"eval_steps_per_second": 0.246, |
|
"eval_wer": 27.38246505717916, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 2.0833333333333335, |
|
"grad_norm": 4.878260612487793, |
|
"learning_rate": 1.5e-05, |
|
"loss": 0.5057, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 2.2222222222222223, |
|
"grad_norm": 4.442017555236816, |
|
"learning_rate": 1.6000000000000003e-05, |
|
"loss": 0.4222, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 2.2222222222222223, |
|
"eval_loss": 0.5154020190238953, |
|
"eval_runtime": 256.4153, |
|
"eval_samples_per_second": 1.95, |
|
"eval_steps_per_second": 0.246, |
|
"eval_wer": 32.05209656925032, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 2.361111111111111, |
|
"grad_norm": 4.793032646179199, |
|
"learning_rate": 1.7000000000000003e-05, |
|
"loss": 0.3807, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"grad_norm": 4.618255138397217, |
|
"learning_rate": 1.8e-05, |
|
"loss": 0.4065, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"eval_loss": 0.49971044063568115, |
|
"eval_runtime": 254.4642, |
|
"eval_samples_per_second": 1.965, |
|
"eval_steps_per_second": 0.248, |
|
"eval_wer": 25.698856416772554, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 2.638888888888889, |
|
"grad_norm": 4.196300983428955, |
|
"learning_rate": 1.9e-05, |
|
"loss": 0.3807, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 2.7777777777777777, |
|
"grad_norm": 3.9986796379089355, |
|
"learning_rate": 2e-05, |
|
"loss": 0.3959, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 2.7777777777777777, |
|
"eval_loss": 0.48037058115005493, |
|
"eval_runtime": 254.5231, |
|
"eval_samples_per_second": 1.964, |
|
"eval_steps_per_second": 0.248, |
|
"eval_wer": 23.824650571791615, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 2.9166666666666665, |
|
"grad_norm": 4.1782402992248535, |
|
"learning_rate": 2.1e-05, |
|
"loss": 0.3847, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 3.0555555555555554, |
|
"grad_norm": 3.125694751739502, |
|
"learning_rate": 2.2000000000000003e-05, |
|
"loss": 0.3081, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 3.0555555555555554, |
|
"eval_loss": 0.46701571345329285, |
|
"eval_runtime": 254.757, |
|
"eval_samples_per_second": 1.963, |
|
"eval_steps_per_second": 0.247, |
|
"eval_wer": 24.841168996188056, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 3.1944444444444446, |
|
"grad_norm": 3.2956347465515137, |
|
"learning_rate": 2.3000000000000003e-05, |
|
"loss": 0.2701, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 3.3333333333333335, |
|
"grad_norm": 3.4233288764953613, |
|
"learning_rate": 2.4e-05, |
|
"loss": 0.2497, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 3.3333333333333335, |
|
"eval_loss": 0.4687294661998749, |
|
"eval_runtime": 255.0619, |
|
"eval_samples_per_second": 1.96, |
|
"eval_steps_per_second": 0.247, |
|
"eval_wer": 23.284625158831005, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 3.4722222222222223, |
|
"grad_norm": 3.383148193359375, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.268, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 3.611111111111111, |
|
"grad_norm": 3.4522266387939453, |
|
"learning_rate": 2.6000000000000002e-05, |
|
"loss": 0.2535, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 3.611111111111111, |
|
"eval_loss": 0.4594000279903412, |
|
"eval_runtime": 254.9771, |
|
"eval_samples_per_second": 1.961, |
|
"eval_steps_per_second": 0.247, |
|
"eval_wer": 23.09402795425667, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"grad_norm": 3.3171420097351074, |
|
"learning_rate": 2.7000000000000002e-05, |
|
"loss": 0.2174, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 3.888888888888889, |
|
"grad_norm": 3.3734307289123535, |
|
"learning_rate": 2.8000000000000003e-05, |
|
"loss": 0.2428, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 3.888888888888889, |
|
"eval_loss": 0.45448967814445496, |
|
"eval_runtime": 252.6636, |
|
"eval_samples_per_second": 1.979, |
|
"eval_steps_per_second": 0.249, |
|
"eval_wer": 23.506988564167724, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 4.027777777777778, |
|
"grad_norm": 3.152697801589966, |
|
"learning_rate": 2.9e-05, |
|
"loss": 0.2136, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 4.166666666666667, |
|
"grad_norm": 3.5505619049072266, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1627, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 4.166666666666667, |
|
"eval_loss": 0.46508893370628357, |
|
"eval_runtime": 253.2857, |
|
"eval_samples_per_second": 1.974, |
|
"eval_steps_per_second": 0.249, |
|
"eval_wer": 24.49174078780178, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 4.305555555555555, |
|
"grad_norm": 2.473661184310913, |
|
"learning_rate": 3.1e-05, |
|
"loss": 0.1302, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 4.444444444444445, |
|
"grad_norm": 2.767871379852295, |
|
"learning_rate": 3.2000000000000005e-05, |
|
"loss": 0.1224, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 4.444444444444445, |
|
"eval_loss": 0.46860620379447937, |
|
"eval_runtime": 252.3676, |
|
"eval_samples_per_second": 1.981, |
|
"eval_steps_per_second": 0.25, |
|
"eval_wer": 23.69758576874206, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 4.583333333333333, |
|
"grad_norm": 2.4746224880218506, |
|
"learning_rate": 3.3e-05, |
|
"loss": 0.1386, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 4.722222222222222, |
|
"grad_norm": 2.802751302719116, |
|
"learning_rate": 3.4000000000000007e-05, |
|
"loss": 0.1326, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 4.722222222222222, |
|
"eval_loss": 0.46528080105781555, |
|
"eval_runtime": 252.901, |
|
"eval_samples_per_second": 1.977, |
|
"eval_steps_per_second": 0.249, |
|
"eval_wer": 23.69758576874206, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 4.861111111111111, |
|
"grad_norm": 3.2861883640289307, |
|
"learning_rate": 3.5e-05, |
|
"loss": 0.1416, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 7.430758953094482, |
|
"learning_rate": 3.6e-05, |
|
"loss": 0.1334, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 0.474071204662323, |
|
"eval_runtime": 253.2666, |
|
"eval_samples_per_second": 1.974, |
|
"eval_steps_per_second": 0.249, |
|
"eval_wer": 24.74587039390089, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 5.138888888888889, |
|
"grad_norm": 2.6109585762023926, |
|
"learning_rate": 3.7e-05, |
|
"loss": 0.0807, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 5.277777777777778, |
|
"grad_norm": 1.921268343925476, |
|
"learning_rate": 3.8e-05, |
|
"loss": 0.0659, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 5.277777777777778, |
|
"eval_loss": 0.4791569113731384, |
|
"eval_runtime": 252.5034, |
|
"eval_samples_per_second": 1.98, |
|
"eval_steps_per_second": 0.25, |
|
"eval_wer": 24.68233799237611, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 5.416666666666667, |
|
"grad_norm": 1.5123528242111206, |
|
"learning_rate": 3.9000000000000006e-05, |
|
"loss": 0.0548, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 5.555555555555555, |
|
"grad_norm": 1.7394624948501587, |
|
"learning_rate": 4e-05, |
|
"loss": 0.0639, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 5.555555555555555, |
|
"eval_loss": 0.4760441780090332, |
|
"eval_runtime": 254.7055, |
|
"eval_samples_per_second": 1.963, |
|
"eval_steps_per_second": 0.247, |
|
"eval_wer": 33.38627700127065, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 5.694444444444445, |
|
"grad_norm": 2.006833553314209, |
|
"learning_rate": 4.1e-05, |
|
"loss": 0.0718, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 5.833333333333333, |
|
"grad_norm": 1.9101831912994385, |
|
"learning_rate": 4.2e-05, |
|
"loss": 0.0667, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 5.833333333333333, |
|
"eval_loss": 0.48197290301322937, |
|
"eval_runtime": 252.8934, |
|
"eval_samples_per_second": 1.977, |
|
"eval_steps_per_second": 0.249, |
|
"eval_wer": 25.47649301143583, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 5.972222222222222, |
|
"grad_norm": 2.15120267868042, |
|
"learning_rate": 4.3e-05, |
|
"loss": 0.0833, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 6.111111111111111, |
|
"grad_norm": 1.230398178100586, |
|
"learning_rate": 4.4000000000000006e-05, |
|
"loss": 0.042, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 6.111111111111111, |
|
"eval_loss": 0.4932812750339508, |
|
"eval_runtime": 254.0189, |
|
"eval_samples_per_second": 1.968, |
|
"eval_steps_per_second": 0.248, |
|
"eval_wer": 29.415501905972047, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 6.25, |
|
"grad_norm": 1.1129037141799927, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.0285, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 6.388888888888889, |
|
"grad_norm": 1.4872666597366333, |
|
"learning_rate": 4.600000000000001e-05, |
|
"loss": 0.0325, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 6.388888888888889, |
|
"eval_loss": 0.5065749883651733, |
|
"eval_runtime": 253.3018, |
|
"eval_samples_per_second": 1.974, |
|
"eval_steps_per_second": 0.249, |
|
"eval_wer": 29.987293519695047, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 6.527777777777778, |
|
"grad_norm": 1.527269959449768, |
|
"learning_rate": 4.7e-05, |
|
"loss": 0.0322, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 6.666666666666667, |
|
"grad_norm": 1.6628929376602173, |
|
"learning_rate": 4.8e-05, |
|
"loss": 0.0333, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 6.666666666666667, |
|
"eval_loss": 0.5125746130943298, |
|
"eval_runtime": 252.7719, |
|
"eval_samples_per_second": 1.978, |
|
"eval_steps_per_second": 0.249, |
|
"eval_wer": 26.08005082592122, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 6.805555555555555, |
|
"grad_norm": 2.2532899379730225, |
|
"learning_rate": 4.9e-05, |
|
"loss": 0.0314, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 6.944444444444445, |
|
"grad_norm": 3.415356397628784, |
|
"learning_rate": 5e-05, |
|
"loss": 0.0333, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 6.944444444444445, |
|
"eval_loss": 0.5072929859161377, |
|
"eval_runtime": 252.6089, |
|
"eval_samples_per_second": 1.979, |
|
"eval_steps_per_second": 0.249, |
|
"eval_wer": 24.618805590851334, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 7.083333333333333, |
|
"grad_norm": 0.6166062951087952, |
|
"learning_rate": 5.1000000000000006e-05, |
|
"loss": 0.0251, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 7.222222222222222, |
|
"grad_norm": 1.0601465702056885, |
|
"learning_rate": 5.2000000000000004e-05, |
|
"loss": 0.0187, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 7.222222222222222, |
|
"eval_loss": 0.5128570795059204, |
|
"eval_runtime": 253.8562, |
|
"eval_samples_per_second": 1.97, |
|
"eval_steps_per_second": 0.248, |
|
"eval_wer": 27.350698856416773, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 7.361111111111111, |
|
"grad_norm": 1.31247878074646, |
|
"learning_rate": 5.300000000000001e-05, |
|
"loss": 0.0219, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 7.5, |
|
"grad_norm": 0.7731389999389648, |
|
"learning_rate": 5.4000000000000005e-05, |
|
"loss": 0.0214, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 7.5, |
|
"eval_loss": 0.5208793878555298, |
|
"eval_runtime": 254.7138, |
|
"eval_samples_per_second": 1.963, |
|
"eval_steps_per_second": 0.247, |
|
"eval_wer": 28.208386277001267, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 7.638888888888889, |
|
"grad_norm": 2.0568604469299316, |
|
"learning_rate": 5.500000000000001e-05, |
|
"loss": 0.0248, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 7.777777777777778, |
|
"grad_norm": 1.7497611045837402, |
|
"learning_rate": 5.6000000000000006e-05, |
|
"loss": 0.0187, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 7.777777777777778, |
|
"eval_loss": 0.5212948322296143, |
|
"eval_runtime": 254.6913, |
|
"eval_samples_per_second": 1.963, |
|
"eval_steps_per_second": 0.247, |
|
"eval_wer": 29.320203303684877, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 7.916666666666667, |
|
"grad_norm": 2.408604383468628, |
|
"learning_rate": 5.6999999999999996e-05, |
|
"loss": 0.0303, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 8.055555555555555, |
|
"grad_norm": 1.3855714797973633, |
|
"learning_rate": 5.8e-05, |
|
"loss": 0.0312, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 8.055555555555555, |
|
"eval_loss": 0.5274094939231873, |
|
"eval_runtime": 253.5749, |
|
"eval_samples_per_second": 1.972, |
|
"eval_steps_per_second": 0.248, |
|
"eval_wer": 34.6569250317662, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 8.194444444444445, |
|
"grad_norm": 1.87900972366333, |
|
"learning_rate": 5.9e-05, |
|
"loss": 0.0153, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 8.333333333333334, |
|
"grad_norm": 1.318336009979248, |
|
"learning_rate": 6e-05, |
|
"loss": 0.0172, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 8.333333333333334, |
|
"eval_loss": 0.5408744812011719, |
|
"eval_runtime": 252.2148, |
|
"eval_samples_per_second": 1.982, |
|
"eval_steps_per_second": 0.25, |
|
"eval_wer": 28.27191867852605, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 8.333333333333334, |
|
"step": 300, |
|
"total_flos": 9.2409447186432e+17, |
|
"train_loss": 0.5446730978041887, |
|
"train_runtime": 8175.9362, |
|
"train_samples_per_second": 4.697, |
|
"train_steps_per_second": 0.037 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 300, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 9, |
|
"save_steps": 10, |
|
"total_flos": 9.2409447186432e+17, |
|
"train_batch_size": 128, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|