|
{ |
|
"best_metric": 0.946596004891969, |
|
"best_model_checkpoint": "/scratch/mrahma45/pixel/finetuned_models/mbert/mbert-base-finetuned-masakhaner-zh/checkpoint-3000", |
|
"epoch": 3.4066713981547196, |
|
"global_step": 4800, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.14, |
|
"eval_accuracy_score": 0.9769949905874095, |
|
"eval_f1": 0.8151797603195738, |
|
"eval_loss": 0.06385169178247452, |
|
"eval_precision": 0.7889175257731958, |
|
"eval_recall": 0.8432506887052341, |
|
"eval_runtime": 24.1345, |
|
"eval_samples_per_second": 71.309, |
|
"eval_steps_per_second": 8.95, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"eval_accuracy_score": 0.9891728619593079, |
|
"eval_f1": 0.9067463010723497, |
|
"eval_loss": 0.03630797192454338, |
|
"eval_precision": 0.8937650521808937, |
|
"eval_recall": 0.9201101928374655, |
|
"eval_runtime": 24.1366, |
|
"eval_samples_per_second": 71.302, |
|
"eval_steps_per_second": 8.949, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.865771812080537e-05, |
|
"loss": 0.1392, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"eval_accuracy_score": 0.9874924220669411, |
|
"eval_f1": 0.9033054450692636, |
|
"eval_loss": 0.03671281412243843, |
|
"eval_precision": 0.8994810161158153, |
|
"eval_recall": 0.9071625344352617, |
|
"eval_runtime": 24.1358, |
|
"eval_samples_per_second": 71.305, |
|
"eval_steps_per_second": 8.949, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"eval_accuracy_score": 0.9906405879412484, |
|
"eval_f1": 0.9202271498107085, |
|
"eval_loss": 0.030743952840566635, |
|
"eval_precision": 0.903611258629846, |
|
"eval_recall": 0.937465564738292, |
|
"eval_runtime": 24.1206, |
|
"eval_samples_per_second": 71.35, |
|
"eval_steps_per_second": 8.955, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 4.697986577181208e-05, |
|
"loss": 0.0391, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"eval_accuracy_score": 0.987524329153505, |
|
"eval_f1": 0.9087680961485933, |
|
"eval_loss": 0.03989069163799286, |
|
"eval_precision": 0.9011375947995667, |
|
"eval_recall": 0.9165289256198347, |
|
"eval_runtime": 24.1074, |
|
"eval_samples_per_second": 71.389, |
|
"eval_steps_per_second": 8.96, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"eval_accuracy_score": 0.9905555023770779, |
|
"eval_f1": 0.926657645466847, |
|
"eval_loss": 0.032413773238658905, |
|
"eval_precision": 0.9106382978723404, |
|
"eval_recall": 0.9432506887052342, |
|
"eval_runtime": 24.1368, |
|
"eval_samples_per_second": 71.302, |
|
"eval_steps_per_second": 8.949, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_accuracy_score": 0.989800367995065, |
|
"eval_f1": 0.9251387572762962, |
|
"eval_loss": 0.036592062562704086, |
|
"eval_precision": 0.9095022624434389, |
|
"eval_recall": 0.9413223140495868, |
|
"eval_runtime": 24.1374, |
|
"eval_samples_per_second": 71.3, |
|
"eval_steps_per_second": 8.949, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 4.530201342281879e-05, |
|
"loss": 0.0319, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"eval_accuracy_score": 0.9906724950278123, |
|
"eval_f1": 0.9408690869086909, |
|
"eval_loss": 0.03387833759188652, |
|
"eval_precision": 0.9393190554640307, |
|
"eval_recall": 0.9424242424242424, |
|
"eval_runtime": 24.1207, |
|
"eval_samples_per_second": 71.35, |
|
"eval_steps_per_second": 8.955, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"eval_accuracy_score": 0.9921508567052743, |
|
"eval_f1": 0.9472521402927369, |
|
"eval_loss": 0.031188521534204483, |
|
"eval_precision": 0.9496124031007752, |
|
"eval_recall": 0.9449035812672176, |
|
"eval_runtime": 24.0966, |
|
"eval_samples_per_second": 71.421, |
|
"eval_steps_per_second": 8.964, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 4.36241610738255e-05, |
|
"loss": 0.0215, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"eval_accuracy_score": 0.9917041574933793, |
|
"eval_f1": 0.9353570944572435, |
|
"eval_loss": 0.028981657698750496, |
|
"eval_precision": 0.920512136569752, |
|
"eval_recall": 0.9506887052341598, |
|
"eval_runtime": 24.134, |
|
"eval_samples_per_second": 71.31, |
|
"eval_steps_per_second": 8.95, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"eval_accuracy_score": 0.991278729672527, |
|
"eval_f1": 0.9291854277527629, |
|
"eval_loss": 0.027085134759545326, |
|
"eval_precision": 0.9205190592051906, |
|
"eval_recall": 0.9380165289256198, |
|
"eval_runtime": 24.1322, |
|
"eval_samples_per_second": 71.316, |
|
"eval_steps_per_second": 8.951, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"eval_accuracy_score": 0.9881624708847835, |
|
"eval_f1": 0.9310910582444627, |
|
"eval_loss": 0.04595605656504631, |
|
"eval_precision": 0.9242671009771987, |
|
"eval_recall": 0.9380165289256198, |
|
"eval_runtime": 24.1249, |
|
"eval_samples_per_second": 71.337, |
|
"eval_steps_per_second": 8.953, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 4.194630872483222e-05, |
|
"loss": 0.0212, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"eval_accuracy_score": 0.9916509790157727, |
|
"eval_f1": 0.9383054565396216, |
|
"eval_loss": 0.02704950049519539, |
|
"eval_precision": 0.9339519650655022, |
|
"eval_recall": 0.9426997245179063, |
|
"eval_runtime": 24.1289, |
|
"eval_samples_per_second": 71.325, |
|
"eval_steps_per_second": 8.952, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"eval_accuracy_score": 0.9914701721919105, |
|
"eval_f1": 0.9384028345598255, |
|
"eval_loss": 0.031768567860126495, |
|
"eval_precision": 0.9285329018338727, |
|
"eval_recall": 0.9484848484848485, |
|
"eval_runtime": 24.1114, |
|
"eval_samples_per_second": 71.377, |
|
"eval_steps_per_second": 8.958, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 4.026845637583892e-05, |
|
"loss": 0.0183, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"eval_accuracy_score": 0.9919806855769333, |
|
"eval_f1": 0.946596004891969, |
|
"eval_loss": 0.033798202872276306, |
|
"eval_precision": 0.9340305711987128, |
|
"eval_recall": 0.959504132231405, |
|
"eval_runtime": 24.1185, |
|
"eval_samples_per_second": 71.356, |
|
"eval_steps_per_second": 8.956, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"eval_accuracy_score": 0.9928740840007232, |
|
"eval_f1": 0.9457025920873124, |
|
"eval_loss": 0.02753218449652195, |
|
"eval_precision": 0.9367567567567567, |
|
"eval_recall": 0.9548209366391185, |
|
"eval_runtime": 24.1012, |
|
"eval_samples_per_second": 71.407, |
|
"eval_steps_per_second": 8.962, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"eval_accuracy_score": 0.9928421769141593, |
|
"eval_f1": 0.9465940054495913, |
|
"eval_loss": 0.026147814467549324, |
|
"eval_precision": 0.9363881401617251, |
|
"eval_recall": 0.9570247933884297, |
|
"eval_runtime": 24.1545, |
|
"eval_samples_per_second": 71.25, |
|
"eval_steps_per_second": 8.942, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 3.859060402684564e-05, |
|
"loss": 0.0128, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"eval_accuracy_score": 0.9918105144485924, |
|
"eval_f1": 0.9442399454669393, |
|
"eval_loss": 0.030992617830634117, |
|
"eval_precision": 0.9346828609986505, |
|
"eval_recall": 0.9539944903581267, |
|
"eval_runtime": 24.1093, |
|
"eval_samples_per_second": 71.383, |
|
"eval_steps_per_second": 8.959, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"eval_accuracy_score": 0.9916828861023367, |
|
"eval_f1": 0.9474550692824805, |
|
"eval_loss": 0.03291752561926842, |
|
"eval_precision": 0.9437004646078163, |
|
"eval_recall": 0.9512396694214876, |
|
"eval_runtime": 24.1311, |
|
"eval_samples_per_second": 71.319, |
|
"eval_steps_per_second": 8.951, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 3.6912751677852356e-05, |
|
"loss": 0.0134, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"eval_accuracy_score": 0.9919168714038055, |
|
"eval_f1": 0.9425444596443229, |
|
"eval_loss": 0.029299931600689888, |
|
"eval_precision": 0.936141304347826, |
|
"eval_recall": 0.9490358126721763, |
|
"eval_runtime": 24.0874, |
|
"eval_samples_per_second": 71.448, |
|
"eval_steps_per_second": 8.967, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"eval_accuracy_score": 0.9920125926634973, |
|
"eval_f1": 0.9384928716904277, |
|
"eval_loss": 0.03006567806005478, |
|
"eval_precision": 0.9253012048192771, |
|
"eval_recall": 0.9520661157024793, |
|
"eval_runtime": 24.1243, |
|
"eval_samples_per_second": 71.339, |
|
"eval_steps_per_second": 8.954, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"eval_accuracy_score": 0.9918530572306776, |
|
"eval_f1": 0.943499725726824, |
|
"eval_loss": 0.03543129563331604, |
|
"eval_precision": 0.9393773894046968, |
|
"eval_recall": 0.9476584022038568, |
|
"eval_runtime": 24.0895, |
|
"eval_samples_per_second": 71.442, |
|
"eval_steps_per_second": 8.967, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"learning_rate": 3.523489932885906e-05, |
|
"loss": 0.01, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"eval_accuracy_score": 0.9917147931889005, |
|
"eval_f1": 0.9457131136332558, |
|
"eval_loss": 0.03731205314397812, |
|
"eval_precision": 0.9389084985066521, |
|
"eval_recall": 0.9526170798898072, |
|
"eval_runtime": 24.1136, |
|
"eval_samples_per_second": 71.371, |
|
"eval_steps_per_second": 8.958, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 3.41, |
|
"eval_accuracy_score": 0.9919594141858907, |
|
"eval_f1": 0.9425412856557936, |
|
"eval_loss": 0.034975141286849976, |
|
"eval_precision": 0.9340005409791723, |
|
"eval_recall": 0.9512396694214876, |
|
"eval_runtime": 24.1047, |
|
"eval_samples_per_second": 71.397, |
|
"eval_steps_per_second": 8.961, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 3.41, |
|
"step": 4800, |
|
"total_flos": 2.005670985822259e+16, |
|
"train_loss": 0.03252722313006719, |
|
"train_runtime": 5922.6834, |
|
"train_samples_per_second": 81.044, |
|
"train_steps_per_second": 2.533 |
|
} |
|
], |
|
"max_steps": 15000, |
|
"num_train_epochs": 11, |
|
"total_flos": 2.005670985822259e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|