|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.009493745994825909, |
|
"eval_steps": 9, |
|
"global_step": 100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 9.493745994825909e-05, |
|
"eval_loss": 10.594181060791016, |
|
"eval_runtime": 113.8111, |
|
"eval_samples_per_second": 155.872, |
|
"eval_steps_per_second": 19.488, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0002848123798447773, |
|
"grad_norm": 0.8785746097564697, |
|
"learning_rate": 1.5e-05, |
|
"loss": 10.5968, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0005696247596895546, |
|
"grad_norm": 0.7744207382202148, |
|
"learning_rate": 3e-05, |
|
"loss": 10.5731, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0008544371395343318, |
|
"grad_norm": 0.8253825306892395, |
|
"learning_rate": 4.5e-05, |
|
"loss": 10.5738, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.0008544371395343318, |
|
"eval_loss": 10.563873291015625, |
|
"eval_runtime": 113.9599, |
|
"eval_samples_per_second": 155.669, |
|
"eval_steps_per_second": 19.463, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.0011392495193791091, |
|
"grad_norm": 0.9217737913131714, |
|
"learning_rate": 4.993910125649561e-05, |
|
"loss": 10.5507, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.0014240618992238862, |
|
"grad_norm": 1.0646398067474365, |
|
"learning_rate": 4.962019382530521e-05, |
|
"loss": 10.503, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.0017088742790686636, |
|
"grad_norm": 1.061179757118225, |
|
"learning_rate": 4.9031542398457974e-05, |
|
"loss": 10.4435, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.0017088742790686636, |
|
"eval_loss": 10.36979866027832, |
|
"eval_runtime": 113.6826, |
|
"eval_samples_per_second": 156.049, |
|
"eval_steps_per_second": 19.51, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.001993686658913441, |
|
"grad_norm": 1.328324556350708, |
|
"learning_rate": 4.817959636416969e-05, |
|
"loss": 10.3437, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.0022784990387582182, |
|
"grad_norm": 1.2739237546920776, |
|
"learning_rate": 4.707368982147318e-05, |
|
"loss": 10.264, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.002563311418602995, |
|
"grad_norm": 1.499740719795227, |
|
"learning_rate": 4.572593931387604e-05, |
|
"loss": 10.1468, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.002563311418602995, |
|
"eval_loss": 10.06716537475586, |
|
"eval_runtime": 113.41, |
|
"eval_samples_per_second": 156.424, |
|
"eval_steps_per_second": 19.557, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.0028481237984477725, |
|
"grad_norm": 1.6156394481658936, |
|
"learning_rate": 4.415111107797445e-05, |
|
"loss": 10.0428, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.00313293617829255, |
|
"grad_norm": 1.7073334455490112, |
|
"learning_rate": 4.2366459261474933e-05, |
|
"loss": 9.9061, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.003417748558137327, |
|
"grad_norm": 1.6044425964355469, |
|
"learning_rate": 4.039153688314145e-05, |
|
"loss": 9.8311, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.003417748558137327, |
|
"eval_loss": 9.738450050354004, |
|
"eval_runtime": 114.0416, |
|
"eval_samples_per_second": 155.557, |
|
"eval_steps_per_second": 19.449, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.0037025609379821045, |
|
"grad_norm": 1.611886739730835, |
|
"learning_rate": 3.824798160583012e-05, |
|
"loss": 9.7576, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.003987373317826882, |
|
"grad_norm": 1.7682589292526245, |
|
"learning_rate": 3.5959278669726935e-05, |
|
"loss": 9.5977, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.004272185697671659, |
|
"grad_norm": 1.7930411100387573, |
|
"learning_rate": 3.355050358314172e-05, |
|
"loss": 9.4899, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.004272185697671659, |
|
"eval_loss": 9.44653034210205, |
|
"eval_runtime": 113.5607, |
|
"eval_samples_per_second": 156.216, |
|
"eval_steps_per_second": 19.531, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.0045569980775164365, |
|
"grad_norm": 1.6953896284103394, |
|
"learning_rate": 3.104804738999169e-05, |
|
"loss": 9.4481, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.004841810457361213, |
|
"grad_norm": 1.7754610776901245, |
|
"learning_rate": 2.8479327524001636e-05, |
|
"loss": 9.363, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.00512662283720599, |
|
"grad_norm": 1.6957319974899292, |
|
"learning_rate": 2.587248741756253e-05, |
|
"loss": 9.2884, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.00512662283720599, |
|
"eval_loss": 9.21696949005127, |
|
"eval_runtime": 113.9372, |
|
"eval_samples_per_second": 155.7, |
|
"eval_steps_per_second": 19.467, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.005411435217050768, |
|
"grad_norm": 1.9713038206100464, |
|
"learning_rate": 2.3256088156396868e-05, |
|
"loss": 9.1375, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.005696247596895545, |
|
"grad_norm": 1.8748242855072021, |
|
"learning_rate": 2.0658795558326743e-05, |
|
"loss": 9.1021, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.005981059976740323, |
|
"grad_norm": 1.7590270042419434, |
|
"learning_rate": 1.8109066104575023e-05, |
|
"loss": 9.1081, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.005981059976740323, |
|
"eval_loss": 9.055033683776855, |
|
"eval_runtime": 113.6017, |
|
"eval_samples_per_second": 156.16, |
|
"eval_steps_per_second": 19.524, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.0062658723565851, |
|
"grad_norm": 1.6782749891281128, |
|
"learning_rate": 1.56348351646022e-05, |
|
"loss": 8.9914, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.0065506847364298765, |
|
"grad_norm": 1.5772511959075928, |
|
"learning_rate": 1.3263210930352737e-05, |
|
"loss": 9.0474, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.006835497116274654, |
|
"grad_norm": 1.980194091796875, |
|
"learning_rate": 1.1020177413231334e-05, |
|
"loss": 8.957, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.006835497116274654, |
|
"eval_loss": 8.955224990844727, |
|
"eval_runtime": 114.5372, |
|
"eval_samples_per_second": 154.884, |
|
"eval_steps_per_second": 19.365, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.007120309496119431, |
|
"grad_norm": 1.7712100744247437, |
|
"learning_rate": 8.930309757836517e-06, |
|
"loss": 8.9299, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.007405121875964209, |
|
"grad_norm": 1.6637734174728394, |
|
"learning_rate": 7.016504991533726e-06, |
|
"loss": 8.8943, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.007689934255808986, |
|
"grad_norm": 1.4766582250595093, |
|
"learning_rate": 5.299731159831953e-06, |
|
"loss": 8.9262, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.007689934255808986, |
|
"eval_loss": 8.90304183959961, |
|
"eval_runtime": 113.7323, |
|
"eval_samples_per_second": 155.98, |
|
"eval_steps_per_second": 19.502, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.007974746635653764, |
|
"grad_norm": 1.5637836456298828, |
|
"learning_rate": 3.798797596089351e-06, |
|
"loss": 8.8611, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.00825955901549854, |
|
"grad_norm": 1.6171281337738037, |
|
"learning_rate": 2.5301488425208296e-06, |
|
"loss": 8.9338, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.008544371395343317, |
|
"grad_norm": 1.5863943099975586, |
|
"learning_rate": 1.5076844803522922e-06, |
|
"loss": 8.9591, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.008544371395343317, |
|
"eval_loss": 8.882512092590332, |
|
"eval_runtime": 113.4322, |
|
"eval_samples_per_second": 156.393, |
|
"eval_steps_per_second": 19.554, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.008829183775188095, |
|
"grad_norm": 1.6321587562561035, |
|
"learning_rate": 7.426068431000882e-07, |
|
"loss": 8.9392, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.009113996155032873, |
|
"grad_norm": 1.5234466791152954, |
|
"learning_rate": 2.4329828146074095e-07, |
|
"loss": 8.8882, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.009398808534877649, |
|
"grad_norm": 1.6211401224136353, |
|
"learning_rate": 1.522932452260595e-08, |
|
"loss": 8.8878, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.009398808534877649, |
|
"eval_loss": 8.878786087036133, |
|
"eval_runtime": 114.8285, |
|
"eval_samples_per_second": 154.491, |
|
"eval_steps_per_second": 19.316, |
|
"step": 99 |
|
} |
|
], |
|
"logging_steps": 3, |
|
"max_steps": 100, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 9, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 8571248128819200.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|