|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.3147128245476003, |
|
"eval_steps": 9, |
|
"global_step": 100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.003147128245476003, |
|
"eval_loss": 3.6290574073791504, |
|
"eval_runtime": 23.1819, |
|
"eval_samples_per_second": 23.078, |
|
"eval_steps_per_second": 2.89, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.00944138473642801, |
|
"grad_norm": 3.3081445693969727, |
|
"learning_rate": 1.5e-05, |
|
"loss": 3.6087, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.01888276947285602, |
|
"grad_norm": 3.533874034881592, |
|
"learning_rate": 3e-05, |
|
"loss": 3.8267, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.02832415420928403, |
|
"grad_norm": 3.927079200744629, |
|
"learning_rate": 4.5e-05, |
|
"loss": 3.5564, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.02832415420928403, |
|
"eval_loss": 3.374492883682251, |
|
"eval_runtime": 23.4572, |
|
"eval_samples_per_second": 22.808, |
|
"eval_steps_per_second": 2.856, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.03776553894571204, |
|
"grad_norm": 3.0393035411834717, |
|
"learning_rate": 4.993910125649561e-05, |
|
"loss": 3.1098, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.04720692368214005, |
|
"grad_norm": 3.247901439666748, |
|
"learning_rate": 4.962019382530521e-05, |
|
"loss": 2.8185, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.05664830841856806, |
|
"grad_norm": 3.1178035736083984, |
|
"learning_rate": 4.9031542398457974e-05, |
|
"loss": 2.3526, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.05664830841856806, |
|
"eval_loss": 2.1767749786376953, |
|
"eval_runtime": 23.5223, |
|
"eval_samples_per_second": 22.744, |
|
"eval_steps_per_second": 2.848, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.06608969315499606, |
|
"grad_norm": 3.0339815616607666, |
|
"learning_rate": 4.817959636416969e-05, |
|
"loss": 2.0888, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.07553107789142408, |
|
"grad_norm": 2.707477569580078, |
|
"learning_rate": 4.707368982147318e-05, |
|
"loss": 1.9323, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.08497246262785209, |
|
"grad_norm": 2.7072622776031494, |
|
"learning_rate": 4.572593931387604e-05, |
|
"loss": 1.5336, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.08497246262785209, |
|
"eval_loss": 1.420081377029419, |
|
"eval_runtime": 23.535, |
|
"eval_samples_per_second": 22.732, |
|
"eval_steps_per_second": 2.847, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.0944138473642801, |
|
"grad_norm": 2.5003926753997803, |
|
"learning_rate": 4.415111107797445e-05, |
|
"loss": 1.3794, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.1038552321007081, |
|
"grad_norm": 2.235706329345703, |
|
"learning_rate": 4.2366459261474933e-05, |
|
"loss": 1.1922, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.11329661683713611, |
|
"grad_norm": 1.9673322439193726, |
|
"learning_rate": 4.039153688314145e-05, |
|
"loss": 0.9896, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.11329661683713611, |
|
"eval_loss": 1.117264986038208, |
|
"eval_runtime": 23.5414, |
|
"eval_samples_per_second": 22.726, |
|
"eval_steps_per_second": 2.846, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.12273800157356413, |
|
"grad_norm": 2.4774956703186035, |
|
"learning_rate": 3.824798160583012e-05, |
|
"loss": 1.0046, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.13217938630999213, |
|
"grad_norm": 2.5677967071533203, |
|
"learning_rate": 3.5959278669726935e-05, |
|
"loss": 1.1025, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.14162077104642015, |
|
"grad_norm": 1.922568678855896, |
|
"learning_rate": 3.355050358314172e-05, |
|
"loss": 0.9316, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.14162077104642015, |
|
"eval_loss": 0.97767573595047, |
|
"eval_runtime": 23.5651, |
|
"eval_samples_per_second": 22.703, |
|
"eval_steps_per_second": 2.843, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.15106215578284815, |
|
"grad_norm": 1.8747705221176147, |
|
"learning_rate": 3.104804738999169e-05, |
|
"loss": 0.818, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.16050354051927615, |
|
"grad_norm": 2.5462067127227783, |
|
"learning_rate": 2.8479327524001636e-05, |
|
"loss": 1.0874, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.16994492525570418, |
|
"grad_norm": 2.608792781829834, |
|
"learning_rate": 2.587248741756253e-05, |
|
"loss": 0.859, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.16994492525570418, |
|
"eval_loss": 0.9035796523094177, |
|
"eval_runtime": 23.5717, |
|
"eval_samples_per_second": 22.697, |
|
"eval_steps_per_second": 2.842, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.17938630999213218, |
|
"grad_norm": 1.9697829484939575, |
|
"learning_rate": 2.3256088156396868e-05, |
|
"loss": 0.7758, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.1888276947285602, |
|
"grad_norm": 1.9587650299072266, |
|
"learning_rate": 2.0658795558326743e-05, |
|
"loss": 0.8138, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.1982690794649882, |
|
"grad_norm": 1.9131907224655151, |
|
"learning_rate": 1.8109066104575023e-05, |
|
"loss": 0.8053, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.1982690794649882, |
|
"eval_loss": 0.8657433986663818, |
|
"eval_runtime": 23.5867, |
|
"eval_samples_per_second": 22.682, |
|
"eval_steps_per_second": 2.841, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.2077104642014162, |
|
"grad_norm": 2.0765535831451416, |
|
"learning_rate": 1.56348351646022e-05, |
|
"loss": 0.7012, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.21715184893784423, |
|
"grad_norm": 2.891166925430298, |
|
"learning_rate": 1.3263210930352737e-05, |
|
"loss": 0.7901, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.22659323367427223, |
|
"grad_norm": 2.2231130599975586, |
|
"learning_rate": 1.1020177413231334e-05, |
|
"loss": 0.7215, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.22659323367427223, |
|
"eval_loss": 0.8475407958030701, |
|
"eval_runtime": 23.4992, |
|
"eval_samples_per_second": 22.767, |
|
"eval_steps_per_second": 2.851, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.23603461841070023, |
|
"grad_norm": 2.564457654953003, |
|
"learning_rate": 8.930309757836517e-06, |
|
"loss": 0.894, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.24547600314712825, |
|
"grad_norm": 2.3753879070281982, |
|
"learning_rate": 7.016504991533726e-06, |
|
"loss": 0.8234, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.2549173878835563, |
|
"grad_norm": 1.8506582975387573, |
|
"learning_rate": 5.299731159831953e-06, |
|
"loss": 0.7381, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.2549173878835563, |
|
"eval_loss": 0.8388699293136597, |
|
"eval_runtime": 23.5662, |
|
"eval_samples_per_second": 22.702, |
|
"eval_steps_per_second": 2.843, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.26435877261998425, |
|
"grad_norm": 2.3776350021362305, |
|
"learning_rate": 3.798797596089351e-06, |
|
"loss": 0.7022, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.2738001573564123, |
|
"grad_norm": 3.1713807582855225, |
|
"learning_rate": 2.5301488425208296e-06, |
|
"loss": 0.847, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.2832415420928403, |
|
"grad_norm": 2.261918783187866, |
|
"learning_rate": 1.5076844803522922e-06, |
|
"loss": 0.7808, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.2832415420928403, |
|
"eval_loss": 0.8342262506484985, |
|
"eval_runtime": 23.5671, |
|
"eval_samples_per_second": 22.701, |
|
"eval_steps_per_second": 2.843, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.2926829268292683, |
|
"grad_norm": 2.4710519313812256, |
|
"learning_rate": 7.426068431000882e-07, |
|
"loss": 0.784, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.3021243115656963, |
|
"grad_norm": 2.067655563354492, |
|
"learning_rate": 2.4329828146074095e-07, |
|
"loss": 0.7981, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.31156569630212433, |
|
"grad_norm": 2.4378199577331543, |
|
"learning_rate": 1.522932452260595e-08, |
|
"loss": 0.8747, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.31156569630212433, |
|
"eval_loss": 0.8323006629943848, |
|
"eval_runtime": 23.5479, |
|
"eval_samples_per_second": 22.72, |
|
"eval_steps_per_second": 2.845, |
|
"step": 99 |
|
} |
|
], |
|
"logging_steps": 3, |
|
"max_steps": 100, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 9, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 5.51427898343424e+16, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|