|
{ |
|
"best_metric": 0.8802370452039692, |
|
"best_model_checkpoint": "test/checkpoint-3000", |
|
"epoch": 12.539184952978056, |
|
"global_step": 4000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.31, |
|
"eval_accuracy": 0.6750517260154634, |
|
"eval_f1": 0.38478707258280515, |
|
"eval_loss": 1.2847243547439575, |
|
"eval_precision": 0.5487649605296664, |
|
"eval_recall": 0.29626065438548255, |
|
"eval_runtime": 48.6236, |
|
"eval_samples_per_second": 9.728, |
|
"eval_steps_per_second": 2.447, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_accuracy": 0.7829140803658935, |
|
"eval_f1": 0.589244320815948, |
|
"eval_loss": 0.8382514119148254, |
|
"eval_precision": 0.6727240649258998, |
|
"eval_recall": 0.5241957657409954, |
|
"eval_runtime": 47.451, |
|
"eval_samples_per_second": 9.968, |
|
"eval_steps_per_second": 2.508, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"eval_accuracy": 0.8379070020690407, |
|
"eval_f1": 0.7082447783070723, |
|
"eval_loss": 0.6504582166671753, |
|
"eval_precision": 0.7584366661434626, |
|
"eval_recall": 0.6642837503436898, |
|
"eval_runtime": 47.1785, |
|
"eval_samples_per_second": 10.026, |
|
"eval_steps_per_second": 2.522, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"eval_accuracy": 0.8560383316998802, |
|
"eval_f1": 0.74649361523969, |
|
"eval_loss": 0.5616394877433777, |
|
"eval_precision": 0.7579708091256908, |
|
"eval_recall": 0.7353588122078636, |
|
"eval_runtime": 47.4319, |
|
"eval_samples_per_second": 9.972, |
|
"eval_steps_per_second": 2.509, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 8.750000000000001e-06, |
|
"loss": 1.0044, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"eval_accuracy": 0.8640967004246978, |
|
"eval_f1": 0.7635898529098273, |
|
"eval_loss": 0.48347049951553345, |
|
"eval_precision": 0.7902632740108839, |
|
"eval_recall": 0.7386582348089085, |
|
"eval_runtime": 47.4677, |
|
"eval_samples_per_second": 9.965, |
|
"eval_steps_per_second": 2.507, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"eval_accuracy": 0.8795600566263748, |
|
"eval_f1": 0.7909914858555341, |
|
"eval_loss": 0.4531821012496948, |
|
"eval_precision": 0.7901234567901234, |
|
"eval_recall": 0.7918614242507561, |
|
"eval_runtime": 48.4837, |
|
"eval_samples_per_second": 9.756, |
|
"eval_steps_per_second": 2.454, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"eval_accuracy": 0.8883262550364804, |
|
"eval_f1": 0.8112951398316589, |
|
"eval_loss": 0.4272719919681549, |
|
"eval_precision": 0.8012872083668544, |
|
"eval_recall": 0.8215562276601595, |
|
"eval_runtime": 47.6145, |
|
"eval_samples_per_second": 9.934, |
|
"eval_steps_per_second": 2.499, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"eval_accuracy": 0.8954590003266907, |
|
"eval_f1": 0.8212751449028298, |
|
"eval_loss": 0.3849042057991028, |
|
"eval_precision": 0.8147747260181302, |
|
"eval_recall": 0.8278801209788287, |
|
"eval_runtime": 47.9087, |
|
"eval_samples_per_second": 9.873, |
|
"eval_steps_per_second": 2.484, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"eval_accuracy": 0.8950234128280519, |
|
"eval_f1": 0.8168306752629793, |
|
"eval_loss": 0.3808012306690216, |
|
"eval_precision": 0.8065942903096099, |
|
"eval_recall": 0.8273302172119879, |
|
"eval_runtime": 47.5595, |
|
"eval_samples_per_second": 9.945, |
|
"eval_steps_per_second": 2.502, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"learning_rate": 7.500000000000001e-06, |
|
"loss": 0.4068, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"eval_accuracy": 0.9042796471741261, |
|
"eval_f1": 0.838237306692213, |
|
"eval_loss": 0.35156166553497314, |
|
"eval_precision": 0.8344686648501363, |
|
"eval_recall": 0.8420401429749794, |
|
"eval_runtime": 48.1704, |
|
"eval_samples_per_second": 9.819, |
|
"eval_steps_per_second": 2.47, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"eval_accuracy": 0.9095066971577915, |
|
"eval_f1": 0.8464466126911634, |
|
"eval_loss": 0.33900508284568787, |
|
"eval_precision": 0.8521666434443361, |
|
"eval_recall": 0.8408028594995876, |
|
"eval_runtime": 48.0055, |
|
"eval_samples_per_second": 9.853, |
|
"eval_steps_per_second": 2.479, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 3.76, |
|
"eval_accuracy": 0.9087444190351737, |
|
"eval_f1": 0.8463414634146341, |
|
"eval_loss": 0.32527512311935425, |
|
"eval_precision": 0.8343574672722415, |
|
"eval_recall": 0.8586747319219137, |
|
"eval_runtime": 48.0035, |
|
"eval_samples_per_second": 9.853, |
|
"eval_steps_per_second": 2.479, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"eval_accuracy": 0.9081999346618752, |
|
"eval_f1": 0.8466865833785986, |
|
"eval_loss": 0.33180123567581177, |
|
"eval_precision": 0.8364636436812449, |
|
"eval_recall": 0.8571624965631015, |
|
"eval_runtime": 47.8611, |
|
"eval_samples_per_second": 9.883, |
|
"eval_steps_per_second": 2.486, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 4.39, |
|
"eval_accuracy": 0.9187629315038658, |
|
"eval_f1": 0.8616123499142367, |
|
"eval_loss": 0.3064488470554352, |
|
"eval_precision": 0.8600191754554171, |
|
"eval_recall": 0.8632114379983503, |
|
"eval_runtime": 47.6825, |
|
"eval_samples_per_second": 9.92, |
|
"eval_steps_per_second": 2.496, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 4.7, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.2678, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 4.7, |
|
"eval_accuracy": 0.9184906893172166, |
|
"eval_f1": 0.8611149536588739, |
|
"eval_loss": 0.30125683546066284, |
|
"eval_precision": 0.8665089086859689, |
|
"eval_recall": 0.8557877371459994, |
|
"eval_runtime": 47.6577, |
|
"eval_samples_per_second": 9.925, |
|
"eval_steps_per_second": 2.497, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 5.02, |
|
"eval_accuracy": 0.9157138190133943, |
|
"eval_f1": 0.8587268993839836, |
|
"eval_loss": 0.30989399552345276, |
|
"eval_precision": 0.8550981461286805, |
|
"eval_recall": 0.8623865823480891, |
|
"eval_runtime": 47.7978, |
|
"eval_samples_per_second": 9.896, |
|
"eval_steps_per_second": 2.49, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 5.33, |
|
"eval_accuracy": 0.9191440705651748, |
|
"eval_f1": 0.8643950889777902, |
|
"eval_loss": 0.3114277124404907, |
|
"eval_precision": 0.8673864894795127, |
|
"eval_recall": 0.8614242507561177, |
|
"eval_runtime": 47.8331, |
|
"eval_samples_per_second": 9.889, |
|
"eval_steps_per_second": 2.488, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 5.64, |
|
"eval_accuracy": 0.9207230752477404, |
|
"eval_f1": 0.8651245795867938, |
|
"eval_loss": 0.3051866888999939, |
|
"eval_precision": 0.8638793694311172, |
|
"eval_recall": 0.8663733846576849, |
|
"eval_runtime": 47.7044, |
|
"eval_samples_per_second": 9.915, |
|
"eval_steps_per_second": 2.495, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 5.96, |
|
"eval_accuracy": 0.9236088424262223, |
|
"eval_f1": 0.8706572930175415, |
|
"eval_loss": 0.2927141785621643, |
|
"eval_precision": 0.8645790971939813, |
|
"eval_recall": 0.8768215562276601, |
|
"eval_runtime": 47.4403, |
|
"eval_samples_per_second": 9.97, |
|
"eval_steps_per_second": 2.508, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 6.27, |
|
"learning_rate": 5e-06, |
|
"loss": 0.2068, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 6.27, |
|
"eval_accuracy": 0.9194707611891538, |
|
"eval_f1": 0.8640943047083819, |
|
"eval_loss": 0.30293476581573486, |
|
"eval_precision": 0.8615552822194888, |
|
"eval_recall": 0.8666483365411053, |
|
"eval_runtime": 47.7322, |
|
"eval_samples_per_second": 9.909, |
|
"eval_steps_per_second": 2.493, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 6.58, |
|
"eval_accuracy": 0.9211042143090493, |
|
"eval_f1": 0.8684515293957353, |
|
"eval_loss": 0.3005865216255188, |
|
"eval_precision": 0.8607697501688049, |
|
"eval_recall": 0.8762716524608194, |
|
"eval_runtime": 47.8357, |
|
"eval_samples_per_second": 9.888, |
|
"eval_steps_per_second": 2.488, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 6.9, |
|
"eval_accuracy": 0.923391048676903, |
|
"eval_f1": 0.8718300205620287, |
|
"eval_loss": 0.2952803373336792, |
|
"eval_precision": 0.8693275013668671, |
|
"eval_recall": 0.8743469892768766, |
|
"eval_runtime": 47.6233, |
|
"eval_samples_per_second": 9.932, |
|
"eval_steps_per_second": 2.499, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 7.21, |
|
"eval_accuracy": 0.9212675596210389, |
|
"eval_f1": 0.8683849739511927, |
|
"eval_loss": 0.3120380938053131, |
|
"eval_precision": 0.8660103910308996, |
|
"eval_recall": 0.8707726147924113, |
|
"eval_runtime": 48.4779, |
|
"eval_samples_per_second": 9.757, |
|
"eval_steps_per_second": 2.455, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 7.52, |
|
"eval_accuracy": 0.9225198736796254, |
|
"eval_f1": 0.8689645739296951, |
|
"eval_loss": 0.3099469840526581, |
|
"eval_precision": 0.8645890038105607, |
|
"eval_recall": 0.8733846576849051, |
|
"eval_runtime": 47.5215, |
|
"eval_samples_per_second": 9.953, |
|
"eval_steps_per_second": 2.504, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 7.84, |
|
"learning_rate": 3.7500000000000005e-06, |
|
"loss": 0.16, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 7.84, |
|
"eval_accuracy": 0.9236088424262223, |
|
"eval_f1": 0.8730713844887883, |
|
"eval_loss": 0.299545019865036, |
|
"eval_precision": 0.8709809823505268, |
|
"eval_recall": 0.8751718449271377, |
|
"eval_runtime": 47.9228, |
|
"eval_samples_per_second": 9.87, |
|
"eval_steps_per_second": 2.483, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 8.15, |
|
"eval_accuracy": 0.9233366002395731, |
|
"eval_f1": 0.8713222745579903, |
|
"eval_loss": 0.3018472492694855, |
|
"eval_precision": 0.8653559322033898, |
|
"eval_recall": 0.877371459994501, |
|
"eval_runtime": 47.7011, |
|
"eval_samples_per_second": 9.916, |
|
"eval_steps_per_second": 2.495, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 8.46, |
|
"eval_accuracy": 0.9234999455515627, |
|
"eval_f1": 0.8743416102332581, |
|
"eval_loss": 0.305177241563797, |
|
"eval_precision": 0.8701157249829816, |
|
"eval_recall": 0.8786087434698928, |
|
"eval_runtime": 48.329, |
|
"eval_samples_per_second": 9.787, |
|
"eval_steps_per_second": 2.462, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 8.78, |
|
"eval_accuracy": 0.9252967439834476, |
|
"eval_f1": 0.8783895960154953, |
|
"eval_loss": 0.2957703769207001, |
|
"eval_precision": 0.8840155945419104, |
|
"eval_recall": 0.8728347539180643, |
|
"eval_runtime": 47.6638, |
|
"eval_samples_per_second": 9.924, |
|
"eval_steps_per_second": 2.497, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 9.09, |
|
"eval_accuracy": 0.9270935424153327, |
|
"eval_f1": 0.8792522850663185, |
|
"eval_loss": 0.2976396679878235, |
|
"eval_precision": 0.8790710457606157, |
|
"eval_recall": 0.879433599120154, |
|
"eval_runtime": 48.2883, |
|
"eval_samples_per_second": 9.795, |
|
"eval_steps_per_second": 2.464, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 9.4, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.1364, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 9.4, |
|
"eval_accuracy": 0.9288358924098878, |
|
"eval_f1": 0.8802370452039692, |
|
"eval_loss": 0.30077481269836426, |
|
"eval_precision": 0.8824260845537442, |
|
"eval_recall": 0.878058839703052, |
|
"eval_runtime": 47.4506, |
|
"eval_samples_per_second": 9.968, |
|
"eval_steps_per_second": 2.508, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 9.72, |
|
"eval_accuracy": 0.9276380267886312, |
|
"eval_f1": 0.8794581144594968, |
|
"eval_loss": 0.295279324054718, |
|
"eval_precision": 0.8843480678343064, |
|
"eval_recall": 0.8746219411602969, |
|
"eval_runtime": 48.0211, |
|
"eval_samples_per_second": 9.85, |
|
"eval_steps_per_second": 2.478, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 10.03, |
|
"eval_accuracy": 0.9261134705433954, |
|
"eval_f1": 0.8766407807023572, |
|
"eval_loss": 0.30134841799736023, |
|
"eval_precision": 0.8764600797031744, |
|
"eval_recall": 0.8768215562276601, |
|
"eval_runtime": 47.9205, |
|
"eval_samples_per_second": 9.871, |
|
"eval_steps_per_second": 2.483, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 10.34, |
|
"eval_accuracy": 0.9256234346074268, |
|
"eval_f1": 0.876704818038517, |
|
"eval_loss": 0.306386798620224, |
|
"eval_precision": 0.8741287412874129, |
|
"eval_recall": 0.8792961231784437, |
|
"eval_runtime": 47.48, |
|
"eval_samples_per_second": 9.962, |
|
"eval_steps_per_second": 2.506, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 10.66, |
|
"eval_accuracy": 0.9298159642818251, |
|
"eval_f1": 0.8808639427706698, |
|
"eval_loss": 0.2949816882610321, |
|
"eval_precision": 0.8814702643171806, |
|
"eval_recall": 0.8802584547704152, |
|
"eval_runtime": 47.5486, |
|
"eval_samples_per_second": 9.948, |
|
"eval_steps_per_second": 2.503, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 10.97, |
|
"learning_rate": 1.25e-06, |
|
"loss": 0.1192, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 10.97, |
|
"eval_accuracy": 0.9262223674180551, |
|
"eval_f1": 0.8796156485929993, |
|
"eval_loss": 0.3046295940876007, |
|
"eval_precision": 0.8782894736842105, |
|
"eval_recall": 0.8809458344789661, |
|
"eval_runtime": 47.8942, |
|
"eval_samples_per_second": 9.876, |
|
"eval_steps_per_second": 2.485, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 11.29, |
|
"eval_accuracy": 0.9261134705433954, |
|
"eval_f1": 0.8789643126241523, |
|
"eval_loss": 0.30884549021720886, |
|
"eval_precision": 0.8759044368600682, |
|
"eval_recall": 0.8820456420126478, |
|
"eval_runtime": 48.0682, |
|
"eval_samples_per_second": 9.84, |
|
"eval_steps_per_second": 2.476, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 11.6, |
|
"eval_accuracy": 0.9268757486660133, |
|
"eval_f1": 0.8796163069544365, |
|
"eval_loss": 0.30328136682510376, |
|
"eval_precision": 0.8767927878705095, |
|
"eval_recall": 0.8824580698377784, |
|
"eval_runtime": 47.6224, |
|
"eval_samples_per_second": 9.932, |
|
"eval_steps_per_second": 2.499, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 11.91, |
|
"eval_accuracy": 0.9269301971033431, |
|
"eval_f1": 0.8791058081327573, |
|
"eval_loss": 0.30441808700561523, |
|
"eval_precision": 0.8770009577233547, |
|
"eval_recall": 0.8812207863623865, |
|
"eval_runtime": 47.6174, |
|
"eval_samples_per_second": 9.933, |
|
"eval_steps_per_second": 2.499, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 12.23, |
|
"eval_accuracy": 0.9272568877273222, |
|
"eval_f1": 0.8795933786661172, |
|
"eval_loss": 0.3058357238769531, |
|
"eval_precision": 0.8789293067947838, |
|
"eval_recall": 0.8802584547704152, |
|
"eval_runtime": 47.5523, |
|
"eval_samples_per_second": 9.947, |
|
"eval_steps_per_second": 2.503, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 12.54, |
|
"learning_rate": 0.0, |
|
"loss": 0.1044, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 12.54, |
|
"eval_accuracy": 0.9272024392899924, |
|
"eval_f1": 0.8791826659352715, |
|
"eval_loss": 0.3065986931324005, |
|
"eval_precision": 0.8770177838577291, |
|
"eval_recall": 0.8813582623040968, |
|
"eval_runtime": 47.8877, |
|
"eval_samples_per_second": 9.877, |
|
"eval_steps_per_second": 2.485, |
|
"step": 4000 |
|
} |
|
], |
|
"max_steps": 4000, |
|
"num_train_epochs": 13, |
|
"total_flos": 4247432429568000.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|