|
{ |
|
"best_metric": 0.6209476309226932, |
|
"best_model_checkpoint": "./experiment/t5-sparc-0312/checkpoint-2112", |
|
"epoch": 270.2149003147954, |
|
"global_step": 2432, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0001, |
|
"loss": 3.9936, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.0001, |
|
"loss": 2.032, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 0.0001, |
|
"loss": 0.9708, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6016, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 0.0001, |
|
"loss": 0.4405, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 0.0001, |
|
"loss": 0.3827, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 0.0001, |
|
"loss": 0.2745, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 3.11, |
|
"learning_rate": 0.0001, |
|
"loss": 0.2728, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"learning_rate": 0.0001, |
|
"loss": 0.2312, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 3.97, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1982, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 4.43, |
|
"learning_rate": 0.0001, |
|
"loss": 0.2122, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 4.86, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1494, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 5.32, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1485, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 5.75, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1209, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 6.21, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1364, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 6.64, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1182, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 7.11, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1055, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 7.11, |
|
"eval_exact_match": 0.44472152950955945, |
|
"eval_exec": 0.49542809642560265, |
|
"eval_loss": 0.17247992753982544, |
|
"eval_runtime": 539.6836, |
|
"eval_samples_per_second": 3.011, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 7.54, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0938, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 7.97, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0889, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 8.43, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1041, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 8.86, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0821, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 9.32, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0969, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 9.75, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0664, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 10.21, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0788, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 10.64, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0722, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 11.11, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0657, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 11.54, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0613, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 11.97, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0551, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 12.43, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0584, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 12.86, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0474, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 13.32, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0516, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 13.75, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0461, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 14.21, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0486, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 14.21, |
|
"eval_exact_match": 0.5544472152950956, |
|
"eval_exec": 0.6093100581878637, |
|
"eval_loss": 0.18642085790634155, |
|
"eval_runtime": 697.2854, |
|
"eval_samples_per_second": 2.33, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 14.64, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0421, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 15.11, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0398, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 15.54, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0436, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 15.97, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0358, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 16.43, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0373, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 16.86, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0323, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 17.32, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0349, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 17.75, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0285, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 18.21, |
|
"learning_rate": 0.0001, |
|
"loss": 0.041, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 18.64, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0403, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 19.11, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0378, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 19.54, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0297, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 19.97, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0236, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 20.43, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0233, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 20.86, |
|
"learning_rate": 0.0001, |
|
"loss": 0.022, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 21.32, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0214, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 21.32, |
|
"eval_exact_match": 0.5685785536159601, |
|
"eval_exec": 0.6226101413133832, |
|
"eval_loss": 0.21789631247520447, |
|
"eval_runtime": 744.163, |
|
"eval_samples_per_second": 2.184, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 21.75, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0191, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 22.21, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0223, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 22.64, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0198, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 23.11, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0192, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 23.54, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0183, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 23.97, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0167, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 24.43, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0163, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 24.86, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0184, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 25.32, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0155, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 25.75, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0155, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 26.21, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0217, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 26.64, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0766, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 27.11, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0221, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 27.54, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0141, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 27.97, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0138, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 28.43, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0116, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 28.43, |
|
"eval_exact_match": 0.5885286783042394, |
|
"eval_exec": 0.6425602660016625, |
|
"eval_loss": 0.22936737537384033, |
|
"eval_runtime": 755.0825, |
|
"eval_samples_per_second": 2.152, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 28.86, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0114, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 29.32, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0114, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 29.75, |
|
"learning_rate": 0.0001, |
|
"loss": 0.01, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 30.21, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0107, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 30.64, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0099, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 31.11, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0103, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 31.54, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0097, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 31.97, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0094, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 32.43, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0097, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 32.86, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0084, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 33.32, |
|
"learning_rate": 0.0001, |
|
"loss": 0.01, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 33.75, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0083, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 34.21, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0082, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 34.64, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0077, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 35.11, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0077, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 35.54, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0082, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 35.54, |
|
"eval_exact_match": 0.600997506234414, |
|
"eval_exec": 0.655860349127182, |
|
"eval_loss": 0.25818198919296265, |
|
"eval_runtime": 756.517, |
|
"eval_samples_per_second": 2.148, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 35.97, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0072, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 36.43, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0074, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 36.86, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0067, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 37.32, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0061, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 37.75, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0066, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 38.21, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0073, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 38.64, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0075, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 39.11, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0075, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 39.54, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0185, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 39.97, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0075, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 40.43, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0061, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 40.86, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0058, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 41.32, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0061, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 41.75, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0056, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 42.21, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0059, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 42.64, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0104, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 42.64, |
|
"eval_exact_match": 0.5876974231088944, |
|
"eval_exec": 0.6433915211970075, |
|
"eval_loss": 0.26233145594596863, |
|
"eval_runtime": 745.8121, |
|
"eval_samples_per_second": 2.179, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 43.11, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0057, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 43.54, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0053, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 43.97, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0049, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 44.43, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0049, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 44.86, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0047, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 45.32, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0045, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 45.75, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0046, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 46.21, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0048, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 46.64, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0256, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 47.11, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0058, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 47.54, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0043, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 47.97, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0044, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 48.43, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0038, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 48.86, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0041, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 49.32, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0043, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 49.75, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0033, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 49.75, |
|
"eval_exact_match": 0.6076475477971738, |
|
"eval_exec": 0.6541978387364921, |
|
"eval_loss": 0.3040614724159241, |
|
"eval_runtime": 745.4384, |
|
"eval_samples_per_second": 2.18, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 50.21, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0037, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 50.64, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0037, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 51.11, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0041, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 51.54, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0033, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 51.97, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0033, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 52.43, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0037, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 52.86, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0034, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 53.32, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0035, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 53.75, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0031, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 54.21, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0038, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 54.64, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0039, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 55.11, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0059, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 55.54, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0362, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 55.97, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0034, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 56.43, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0034, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 56.86, |
|
"learning_rate": 0.0001, |
|
"loss": 0.003, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 56.86, |
|
"eval_exact_match": 0.6051537822111388, |
|
"eval_exec": 0.656691604322527, |
|
"eval_loss": 0.30350586771965027, |
|
"eval_runtime": 754.5149, |
|
"eval_samples_per_second": 2.154, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 57.32, |
|
"learning_rate": 0.0001, |
|
"loss": 0.003, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 57.75, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0026, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 58.21, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0028, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 58.64, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0025, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 59.11, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0028, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 59.54, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0026, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 59.97, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0026, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 60.43, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0024, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 60.86, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0024, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 61.32, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0028, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 61.75, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0025, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 62.21, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0033, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 62.64, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0088, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 63.11, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0031, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 63.54, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0024, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 63.97, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0024, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 63.97, |
|
"eval_exact_match": 0.5960099750623441, |
|
"eval_exec": 0.6492103075644223, |
|
"eval_loss": 0.30405256152153015, |
|
"eval_runtime": 759.5993, |
|
"eval_samples_per_second": 2.139, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 64.43, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0023, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 64.86, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0046, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 65.32, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0086, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 65.75, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0023, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 66.21, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0026, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 66.64, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0021, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 67.11, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0024, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 67.54, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0032, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 67.97, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0028, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 68.43, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0022, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 68.86, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0019, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 69.32, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0022, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 69.75, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0022, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 70.21, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0019, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 70.64, |
|
"learning_rate": 0.0001, |
|
"loss": 0.002, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 71.11, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0024, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 71.11, |
|
"eval_exact_match": 0.5818786367414797, |
|
"eval_exec": 0.6317539484621779, |
|
"eval_loss": 0.3286347985267639, |
|
"eval_runtime": 754.2184, |
|
"eval_samples_per_second": 2.155, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 71.54, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0019, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 71.97, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0019, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 72.43, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0022, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 72.86, |
|
"learning_rate": 0.0001, |
|
"loss": 0.002, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 73.32, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0029, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 73.75, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0025, |
|
"step": 664 |
|
}, |
|
{ |
|
"epoch": 74.21, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0018, |
|
"step": 668 |
|
}, |
|
{ |
|
"epoch": 74.64, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0019, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 75.11, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0022, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 75.54, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0018, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 75.97, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0019, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 76.43, |
|
"learning_rate": 0.0001, |
|
"loss": 0.002, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 76.86, |
|
"learning_rate": 0.0001, |
|
"loss": 0.002, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 77.32, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0024, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 77.75, |
|
"learning_rate": 0.0001, |
|
"loss": 0.002, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 78.21, |
|
"learning_rate": 0.0001, |
|
"loss": 0.002, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 78.21, |
|
"eval_exact_match": 0.5926849542809642, |
|
"eval_exec": 0.6359102244389028, |
|
"eval_loss": 0.3365001380443573, |
|
"eval_runtime": 744.2096, |
|
"eval_samples_per_second": 2.184, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 78.64, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0016, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 79.11, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0018, |
|
"step": 712 |
|
}, |
|
{ |
|
"epoch": 79.54, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0022, |
|
"step": 716 |
|
}, |
|
{ |
|
"epoch": 79.97, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0019, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 80.43, |
|
"learning_rate": 0.0001, |
|
"loss": 0.002, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 80.86, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0021, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 81.32, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0031, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 81.75, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0021, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 82.21, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0018, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 82.64, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0021, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 83.11, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0018, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 83.54, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0018, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 83.97, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0018, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 84.43, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0015, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 84.86, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0026, |
|
"step": 764 |
|
}, |
|
{ |
|
"epoch": 85.32, |
|
"learning_rate": 0.0001, |
|
"loss": 0.002, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 85.32, |
|
"eval_exact_match": 0.5993349958437241, |
|
"eval_exec": 0.6533665835411472, |
|
"eval_loss": 0.33010169863700867, |
|
"eval_runtime": 749.4582, |
|
"eval_samples_per_second": 2.168, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 85.75, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0019, |
|
"step": 772 |
|
}, |
|
{ |
|
"epoch": 86.21, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0022, |
|
"step": 776 |
|
}, |
|
{ |
|
"epoch": 86.64, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0021, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 87.11, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0024, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 87.54, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0018, |
|
"step": 788 |
|
}, |
|
{ |
|
"epoch": 87.97, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0017, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 88.43, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0017, |
|
"step": 796 |
|
}, |
|
{ |
|
"epoch": 88.86, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0015, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 89.32, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0018, |
|
"step": 804 |
|
}, |
|
{ |
|
"epoch": 89.75, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0019, |
|
"step": 808 |
|
}, |
|
{ |
|
"epoch": 90.21, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0022, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 90.64, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0018, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 91.11, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0042, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 91.54, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0017, |
|
"step": 824 |
|
}, |
|
{ |
|
"epoch": 91.97, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0015, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 92.43, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0017, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 92.43, |
|
"eval_exact_match": 0.5960099750623441, |
|
"eval_exec": 0.657522859517872, |
|
"eval_loss": 0.3371131718158722, |
|
"eval_runtime": 749.5299, |
|
"eval_samples_per_second": 2.168, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 92.86, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0019, |
|
"step": 836 |
|
}, |
|
{ |
|
"epoch": 93.32, |
|
"learning_rate": 0.0001, |
|
"loss": 0.002, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 93.75, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0019, |
|
"step": 844 |
|
}, |
|
{ |
|
"epoch": 94.21, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0018, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 94.64, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0014, |
|
"step": 852 |
|
}, |
|
{ |
|
"epoch": 95.11, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0018, |
|
"step": 856 |
|
}, |
|
{ |
|
"epoch": 95.54, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0018, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 95.97, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0018, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 96.43, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0058, |
|
"step": 868 |
|
}, |
|
{ |
|
"epoch": 96.86, |
|
"learning_rate": 0.0001, |
|
"loss": 0.012, |
|
"step": 872 |
|
}, |
|
{ |
|
"epoch": 97.32, |
|
"learning_rate": 0.0001, |
|
"loss": 0.011, |
|
"step": 876 |
|
}, |
|
{ |
|
"epoch": 97.75, |
|
"learning_rate": 0.0001, |
|
"loss": 0.002, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 98.21, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0017, |
|
"step": 884 |
|
}, |
|
{ |
|
"epoch": 98.64, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0014, |
|
"step": 888 |
|
}, |
|
{ |
|
"epoch": 99.11, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0018, |
|
"step": 892 |
|
}, |
|
{ |
|
"epoch": 99.54, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0017, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 99.54, |
|
"eval_exact_match": 0.5951787198669992, |
|
"eval_exec": 0.6492103075644223, |
|
"eval_loss": 0.35070350766181946, |
|
"eval_runtime": 738.8339, |
|
"eval_samples_per_second": 2.199, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 99.97, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0159, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 100.43, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0076, |
|
"step": 904 |
|
}, |
|
{ |
|
"epoch": 100.86, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0019, |
|
"step": 908 |
|
}, |
|
{ |
|
"epoch": 101.32, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0016, |
|
"step": 912 |
|
}, |
|
{ |
|
"epoch": 101.75, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0016, |
|
"step": 916 |
|
}, |
|
{ |
|
"epoch": 102.21, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0016, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 102.64, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0014, |
|
"step": 924 |
|
}, |
|
{ |
|
"epoch": 103.11, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0015, |
|
"step": 928 |
|
}, |
|
{ |
|
"epoch": 103.54, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0012, |
|
"step": 932 |
|
}, |
|
{ |
|
"epoch": 103.97, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0011, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 104.43, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0014, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 104.86, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0012, |
|
"step": 944 |
|
}, |
|
{ |
|
"epoch": 105.32, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0014, |
|
"step": 948 |
|
}, |
|
{ |
|
"epoch": 105.75, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0011, |
|
"step": 952 |
|
}, |
|
{ |
|
"epoch": 106.21, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0013, |
|
"step": 956 |
|
}, |
|
{ |
|
"epoch": 106.64, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0013, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 106.64, |
|
"eval_exact_match": 0.6076475477971738, |
|
"eval_exec": 0.6674979218620116, |
|
"eval_loss": 0.36282214522361755, |
|
"eval_runtime": 748.9956, |
|
"eval_samples_per_second": 2.17, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 107.11, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0013, |
|
"step": 964 |
|
}, |
|
{ |
|
"epoch": 107.54, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0013, |
|
"step": 968 |
|
}, |
|
{ |
|
"epoch": 107.97, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0011, |
|
"step": 972 |
|
}, |
|
{ |
|
"epoch": 108.43, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0014, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 108.86, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0015, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 109.32, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0015, |
|
"step": 984 |
|
}, |
|
{ |
|
"epoch": 109.75, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0014, |
|
"step": 988 |
|
}, |
|
{ |
|
"epoch": 110.21, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0016, |
|
"step": 992 |
|
}, |
|
{ |
|
"epoch": 110.64, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0015, |
|
"step": 996 |
|
}, |
|
{ |
|
"epoch": 111.11, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0014, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 111.54, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0012, |
|
"step": 1004 |
|
}, |
|
{ |
|
"epoch": 111.97, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0011, |
|
"step": 1008 |
|
}, |
|
{ |
|
"epoch": 112.43, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0012, |
|
"step": 1012 |
|
}, |
|
{ |
|
"epoch": 112.86, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0015, |
|
"step": 1016 |
|
}, |
|
{ |
|
"epoch": 113.32, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0015, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 113.75, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0012, |
|
"step": 1024 |
|
}, |
|
{ |
|
"epoch": 113.75, |
|
"eval_exact_match": 0.6151288445552785, |
|
"eval_exec": 0.6708229426433915, |
|
"eval_loss": 0.351724237203598, |
|
"eval_runtime": 774.4973, |
|
"eval_samples_per_second": 2.098, |
|
"step": 1024 |
|
}, |
|
{ |
|
"epoch": 114.21, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0012, |
|
"step": 1028 |
|
}, |
|
{ |
|
"epoch": 114.64, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0011, |
|
"step": 1032 |
|
}, |
|
{ |
|
"epoch": 115.11, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0015, |
|
"step": 1036 |
|
}, |
|
{ |
|
"epoch": 115.54, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0011, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 115.97, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0019, |
|
"step": 1044 |
|
}, |
|
{ |
|
"epoch": 116.43, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0049, |
|
"step": 1048 |
|
}, |
|
{ |
|
"epoch": 116.86, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0045, |
|
"step": 1052 |
|
}, |
|
{ |
|
"epoch": 117.32, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0014, |
|
"step": 1056 |
|
}, |
|
{ |
|
"epoch": 117.75, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0012, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 118.21, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0014, |
|
"step": 1064 |
|
}, |
|
{ |
|
"epoch": 118.64, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0017, |
|
"step": 1068 |
|
}, |
|
{ |
|
"epoch": 119.11, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0016, |
|
"step": 1072 |
|
}, |
|
{ |
|
"epoch": 119.54, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0011, |
|
"step": 1076 |
|
}, |
|
{ |
|
"epoch": 119.97, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0013, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 120.43, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0012, |
|
"step": 1084 |
|
}, |
|
{ |
|
"epoch": 120.86, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0013, |
|
"step": 1088 |
|
}, |
|
{ |
|
"epoch": 120.86, |
|
"eval_exact_match": 0.5993349958437241, |
|
"eval_exec": 0.6608478802992519, |
|
"eval_loss": 0.35145050287246704, |
|
"eval_runtime": 749.1176, |
|
"eval_samples_per_second": 2.169, |
|
"step": 1088 |
|
}, |
|
{ |
|
"epoch": 121.32, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0015, |
|
"step": 1092 |
|
}, |
|
{ |
|
"epoch": 121.75, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0013, |
|
"step": 1096 |
|
}, |
|
{ |
|
"epoch": 122.21, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0015, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 122.64, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0015, |
|
"step": 1104 |
|
}, |
|
{ |
|
"epoch": 123.11, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0023, |
|
"step": 1108 |
|
}, |
|
{ |
|
"epoch": 123.54, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0065, |
|
"step": 1112 |
|
}, |
|
{ |
|
"epoch": 123.97, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0012, |
|
"step": 1116 |
|
}, |
|
{ |
|
"epoch": 124.43, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0011, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 124.86, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0011, |
|
"step": 1124 |
|
}, |
|
{ |
|
"epoch": 125.32, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0013, |
|
"step": 1128 |
|
}, |
|
{ |
|
"epoch": 125.75, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0012, |
|
"step": 1132 |
|
}, |
|
{ |
|
"epoch": 126.21, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0011, |
|
"step": 1136 |
|
}, |
|
{ |
|
"epoch": 126.64, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0011, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 127.11, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0012, |
|
"step": 1144 |
|
}, |
|
{ |
|
"epoch": 127.54, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0011, |
|
"step": 1148 |
|
}, |
|
{ |
|
"epoch": 127.97, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0014, |
|
"step": 1152 |
|
}, |
|
{ |
|
"epoch": 127.97, |
|
"eval_exact_match": 0.6109725685785536, |
|
"eval_exec": 0.6699916874480466, |
|
"eval_loss": 0.35655492544174194, |
|
"eval_runtime": 756.087, |
|
"eval_samples_per_second": 2.149, |
|
"step": 1152 |
|
}, |
|
{ |
|
"epoch": 128.43, |
|
"learning_rate": 0.0001, |
|
"loss": 0.001, |
|
"step": 1156 |
|
}, |
|
{ |
|
"epoch": 128.86, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0012, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 129.32, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0013, |
|
"step": 1164 |
|
}, |
|
{ |
|
"epoch": 129.75, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0012, |
|
"step": 1168 |
|
}, |
|
{ |
|
"epoch": 130.21, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0012, |
|
"step": 1172 |
|
}, |
|
{ |
|
"epoch": 130.64, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0009, |
|
"step": 1176 |
|
}, |
|
{ |
|
"epoch": 131.11, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0012, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 131.54, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0009, |
|
"step": 1184 |
|
}, |
|
{ |
|
"epoch": 131.97, |
|
"learning_rate": 0.0001, |
|
"loss": 0.001, |
|
"step": 1188 |
|
}, |
|
{ |
|
"epoch": 132.43, |
|
"learning_rate": 0.0001, |
|
"loss": 0.001, |
|
"step": 1192 |
|
}, |
|
{ |
|
"epoch": 132.86, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0011, |
|
"step": 1196 |
|
}, |
|
{ |
|
"epoch": 133.32, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0011, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 133.75, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0016, |
|
"step": 1204 |
|
}, |
|
{ |
|
"epoch": 134.21, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0011, |
|
"step": 1208 |
|
}, |
|
{ |
|
"epoch": 134.64, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0011, |
|
"step": 1212 |
|
}, |
|
{ |
|
"epoch": 135.11, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0014, |
|
"step": 1216 |
|
}, |
|
{ |
|
"epoch": 135.11, |
|
"eval_exact_match": 0.5985037406483791, |
|
"eval_exec": 0.6583541147132169, |
|
"eval_loss": 0.3626713454723358, |
|
"eval_runtime": 771.5036, |
|
"eval_samples_per_second": 2.106, |
|
"step": 1216 |
|
}, |
|
{ |
|
"epoch": 135.54, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0052, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 135.97, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0032, |
|
"step": 1224 |
|
}, |
|
{ |
|
"epoch": 136.43, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0076, |
|
"step": 1228 |
|
}, |
|
{ |
|
"epoch": 136.86, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0021, |
|
"step": 1232 |
|
}, |
|
{ |
|
"epoch": 137.32, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0011, |
|
"step": 1236 |
|
}, |
|
{ |
|
"epoch": 137.75, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0011, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 138.21, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0012, |
|
"step": 1244 |
|
}, |
|
{ |
|
"epoch": 138.64, |
|
"learning_rate": 0.0001, |
|
"loss": 0.001, |
|
"step": 1248 |
|
}, |
|
{ |
|
"epoch": 139.11, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0014, |
|
"step": 1252 |
|
}, |
|
{ |
|
"epoch": 139.54, |
|
"learning_rate": 0.0001, |
|
"loss": 0.001, |
|
"step": 1256 |
|
}, |
|
{ |
|
"epoch": 139.97, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0011, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 140.43, |
|
"learning_rate": 0.0001, |
|
"loss": 0.001, |
|
"step": 1264 |
|
}, |
|
{ |
|
"epoch": 140.86, |
|
"learning_rate": 0.0001, |
|
"loss": 0.001, |
|
"step": 1268 |
|
}, |
|
{ |
|
"epoch": 141.32, |
|
"learning_rate": 0.0001, |
|
"loss": 0.001, |
|
"step": 1272 |
|
}, |
|
{ |
|
"epoch": 141.75, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0011, |
|
"step": 1276 |
|
}, |
|
{ |
|
"epoch": 142.21, |
|
"learning_rate": 0.0001, |
|
"loss": 0.001, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 142.21, |
|
"eval_exact_match": 0.6068162926018288, |
|
"eval_exec": 0.6625103906899418, |
|
"eval_loss": 0.37810268998146057, |
|
"eval_runtime": 744.2729, |
|
"eval_samples_per_second": 2.183, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 142.64, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0009, |
|
"step": 1284 |
|
}, |
|
{ |
|
"epoch": 143.11, |
|
"learning_rate": 0.0001, |
|
"loss": 0.001, |
|
"step": 1288 |
|
}, |
|
{ |
|
"epoch": 143.54, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 1292 |
|
}, |
|
{ |
|
"epoch": 143.97, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0012, |
|
"step": 1296 |
|
}, |
|
{ |
|
"epoch": 144.43, |
|
"learning_rate": 0.0001, |
|
"loss": 0.001, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 144.86, |
|
"learning_rate": 0.0001, |
|
"loss": 0.001, |
|
"step": 1304 |
|
}, |
|
{ |
|
"epoch": 145.32, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0013, |
|
"step": 1308 |
|
}, |
|
{ |
|
"epoch": 145.75, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0009, |
|
"step": 1312 |
|
}, |
|
{ |
|
"epoch": 146.21, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0011, |
|
"step": 1316 |
|
}, |
|
{ |
|
"epoch": 146.64, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0013, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 147.11, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0016, |
|
"step": 1324 |
|
}, |
|
{ |
|
"epoch": 147.54, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0058, |
|
"step": 1328 |
|
}, |
|
{ |
|
"epoch": 147.97, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0015, |
|
"step": 1332 |
|
}, |
|
{ |
|
"epoch": 148.43, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0011, |
|
"step": 1336 |
|
}, |
|
{ |
|
"epoch": 148.86, |
|
"learning_rate": 0.0001, |
|
"loss": 0.001, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 149.32, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 1344 |
|
}, |
|
{ |
|
"epoch": 149.32, |
|
"eval_exact_match": 0.6184538653366584, |
|
"eval_exec": 0.6749792186201163, |
|
"eval_loss": 0.37218576669692993, |
|
"eval_runtime": 745.0951, |
|
"eval_samples_per_second": 2.181, |
|
"step": 1344 |
|
}, |
|
{ |
|
"epoch": 149.75, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0009, |
|
"step": 1348 |
|
}, |
|
{ |
|
"epoch": 150.21, |
|
"learning_rate": 0.0001, |
|
"loss": 0.001, |
|
"step": 1352 |
|
}, |
|
{ |
|
"epoch": 150.64, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0013, |
|
"step": 1356 |
|
}, |
|
{ |
|
"epoch": 151.11, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0012, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 151.54, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 1364 |
|
}, |
|
{ |
|
"epoch": 151.97, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0009, |
|
"step": 1368 |
|
}, |
|
{ |
|
"epoch": 152.43, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0011, |
|
"step": 1372 |
|
}, |
|
{ |
|
"epoch": 152.86, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0048, |
|
"step": 1376 |
|
}, |
|
{ |
|
"epoch": 153.32, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0011, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 153.75, |
|
"learning_rate": 0.0001, |
|
"loss": 0.001, |
|
"step": 1384 |
|
}, |
|
{ |
|
"epoch": 154.21, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0009, |
|
"step": 1388 |
|
}, |
|
{ |
|
"epoch": 154.64, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0011, |
|
"step": 1392 |
|
}, |
|
{ |
|
"epoch": 155.11, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0059, |
|
"step": 1396 |
|
}, |
|
{ |
|
"epoch": 155.54, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0011, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 155.97, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0013, |
|
"step": 1404 |
|
}, |
|
{ |
|
"epoch": 156.43, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0009, |
|
"step": 1408 |
|
}, |
|
{ |
|
"epoch": 156.43, |
|
"eval_exact_match": 0.6059850374064838, |
|
"eval_exec": 0.6633416458852868, |
|
"eval_loss": 0.3731986880302429, |
|
"eval_runtime": 751.4238, |
|
"eval_samples_per_second": 2.163, |
|
"step": 1408 |
|
}, |
|
{ |
|
"epoch": 156.86, |
|
"learning_rate": 0.0001, |
|
"loss": 0.001, |
|
"step": 1412 |
|
}, |
|
{ |
|
"epoch": 157.32, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0017, |
|
"step": 1416 |
|
}, |
|
{ |
|
"epoch": 157.75, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0011, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 158.21, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0009, |
|
"step": 1424 |
|
}, |
|
{ |
|
"epoch": 158.64, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 1428 |
|
}, |
|
{ |
|
"epoch": 159.11, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0009, |
|
"step": 1432 |
|
}, |
|
{ |
|
"epoch": 159.54, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0009, |
|
"step": 1436 |
|
}, |
|
{ |
|
"epoch": 159.97, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0011, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 160.43, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0009, |
|
"step": 1444 |
|
}, |
|
{ |
|
"epoch": 160.86, |
|
"learning_rate": 0.0001, |
|
"loss": 0.001, |
|
"step": 1448 |
|
}, |
|
{ |
|
"epoch": 161.32, |
|
"learning_rate": 0.0001, |
|
"loss": 0.001, |
|
"step": 1452 |
|
}, |
|
{ |
|
"epoch": 161.75, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0011, |
|
"step": 1456 |
|
}, |
|
{ |
|
"epoch": 162.21, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 162.64, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 163.11, |
|
"learning_rate": 0.0001, |
|
"loss": 0.001, |
|
"step": 1468 |
|
}, |
|
{ |
|
"epoch": 163.54, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0009, |
|
"step": 1472 |
|
}, |
|
{ |
|
"epoch": 163.54, |
|
"eval_exact_match": 0.6101413133832086, |
|
"eval_exec": 0.6674979218620116, |
|
"eval_loss": 0.38048475980758667, |
|
"eval_runtime": 813.5844, |
|
"eval_samples_per_second": 1.997, |
|
"step": 1472 |
|
}, |
|
{ |
|
"epoch": 163.97, |
|
"learning_rate": 0.0001, |
|
"loss": 0.001, |
|
"step": 1476 |
|
}, |
|
{ |
|
"epoch": 164.43, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0134, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 164.86, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0011, |
|
"step": 1484 |
|
}, |
|
{ |
|
"epoch": 165.32, |
|
"learning_rate": 0.0001, |
|
"loss": 0.001, |
|
"step": 1488 |
|
}, |
|
{ |
|
"epoch": 165.75, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0009, |
|
"step": 1492 |
|
}, |
|
{ |
|
"epoch": 166.21, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0009, |
|
"step": 1496 |
|
}, |
|
{ |
|
"epoch": 166.64, |
|
"learning_rate": 0.0001, |
|
"loss": 0.001, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 167.11, |
|
"learning_rate": 0.0001, |
|
"loss": 0.001, |
|
"step": 1504 |
|
}, |
|
{ |
|
"epoch": 167.54, |
|
"learning_rate": 0.0001, |
|
"loss": 0.001, |
|
"step": 1508 |
|
}, |
|
{ |
|
"epoch": 167.97, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0009, |
|
"step": 1512 |
|
}, |
|
{ |
|
"epoch": 168.43, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0016, |
|
"step": 1516 |
|
}, |
|
{ |
|
"epoch": 168.86, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0012, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 169.32, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0009, |
|
"step": 1524 |
|
}, |
|
{ |
|
"epoch": 169.75, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0009, |
|
"step": 1528 |
|
}, |
|
{ |
|
"epoch": 170.21, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0009, |
|
"step": 1532 |
|
}, |
|
{ |
|
"epoch": 170.64, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 1536 |
|
}, |
|
{ |
|
"epoch": 170.64, |
|
"eval_exact_match": 0.6101413133832086, |
|
"eval_exec": 0.6683291770573566, |
|
"eval_loss": 0.38680797815322876, |
|
"eval_runtime": 781.939, |
|
"eval_samples_per_second": 2.078, |
|
"step": 1536 |
|
}, |
|
{ |
|
"epoch": 171.11, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0012, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 171.54, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0009, |
|
"step": 1544 |
|
}, |
|
{ |
|
"epoch": 171.97, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0028, |
|
"step": 1548 |
|
}, |
|
{ |
|
"epoch": 172.43, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0012, |
|
"step": 1552 |
|
}, |
|
{ |
|
"epoch": 172.86, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0013, |
|
"step": 1556 |
|
}, |
|
{ |
|
"epoch": 173.32, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0011, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 173.75, |
|
"learning_rate": 0.0001, |
|
"loss": 0.001, |
|
"step": 1564 |
|
}, |
|
{ |
|
"epoch": 174.21, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0011, |
|
"step": 1568 |
|
}, |
|
{ |
|
"epoch": 174.64, |
|
"learning_rate": 0.0001, |
|
"loss": 0.001, |
|
"step": 1572 |
|
}, |
|
{ |
|
"epoch": 175.11, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 1576 |
|
}, |
|
{ |
|
"epoch": 175.54, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 175.97, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0009, |
|
"step": 1584 |
|
}, |
|
{ |
|
"epoch": 176.43, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 1588 |
|
}, |
|
{ |
|
"epoch": 176.86, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0009, |
|
"step": 1592 |
|
}, |
|
{ |
|
"epoch": 177.32, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 1596 |
|
}, |
|
{ |
|
"epoch": 177.75, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 177.75, |
|
"eval_exact_match": 0.6068162926018288, |
|
"eval_exec": 0.6650041562759768, |
|
"eval_loss": 0.4018971621990204, |
|
"eval_runtime": 775.9241, |
|
"eval_samples_per_second": 2.094, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 178.21, |
|
"learning_rate": 0.0001, |
|
"loss": 0.001, |
|
"step": 1604 |
|
}, |
|
{ |
|
"epoch": 178.64, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0015, |
|
"step": 1608 |
|
}, |
|
{ |
|
"epoch": 179.11, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0018, |
|
"step": 1612 |
|
}, |
|
{ |
|
"epoch": 179.54, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0079, |
|
"step": 1616 |
|
}, |
|
{ |
|
"epoch": 179.97, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0011, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 180.43, |
|
"learning_rate": 0.0001, |
|
"loss": 0.001, |
|
"step": 1624 |
|
}, |
|
{ |
|
"epoch": 180.86, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0009, |
|
"step": 1628 |
|
}, |
|
{ |
|
"epoch": 181.32, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 1632 |
|
}, |
|
{ |
|
"epoch": 181.75, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 1636 |
|
}, |
|
{ |
|
"epoch": 182.21, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 182.64, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0009, |
|
"step": 1644 |
|
}, |
|
{ |
|
"epoch": 183.11, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0011, |
|
"step": 1648 |
|
}, |
|
{ |
|
"epoch": 183.54, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0011, |
|
"step": 1652 |
|
}, |
|
{ |
|
"epoch": 183.97, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0014, |
|
"step": 1656 |
|
}, |
|
{ |
|
"epoch": 184.43, |
|
"learning_rate": 0.0001, |
|
"loss": 0.001, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 184.86, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0009, |
|
"step": 1664 |
|
}, |
|
{ |
|
"epoch": 184.86, |
|
"eval_exact_match": 0.6026600166251039, |
|
"eval_exec": 0.6641729010806318, |
|
"eval_loss": 0.385076105594635, |
|
"eval_runtime": 773.5213, |
|
"eval_samples_per_second": 2.101, |
|
"step": 1664 |
|
}, |
|
{ |
|
"epoch": 185.32, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0009, |
|
"step": 1668 |
|
}, |
|
{ |
|
"epoch": 185.75, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0012, |
|
"step": 1672 |
|
}, |
|
{ |
|
"epoch": 186.21, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0019, |
|
"step": 1676 |
|
}, |
|
{ |
|
"epoch": 186.64, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0019, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 187.11, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0011, |
|
"step": 1684 |
|
}, |
|
{ |
|
"epoch": 187.54, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0016, |
|
"step": 1688 |
|
}, |
|
{ |
|
"epoch": 187.97, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0014, |
|
"step": 1692 |
|
}, |
|
{ |
|
"epoch": 188.43, |
|
"learning_rate": 0.0001, |
|
"loss": 0.001, |
|
"step": 1696 |
|
}, |
|
{ |
|
"epoch": 188.86, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 189.32, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0012, |
|
"step": 1704 |
|
}, |
|
{ |
|
"epoch": 189.75, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0009, |
|
"step": 1708 |
|
}, |
|
{ |
|
"epoch": 190.21, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0009, |
|
"step": 1712 |
|
}, |
|
{ |
|
"epoch": 190.64, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 1716 |
|
}, |
|
{ |
|
"epoch": 191.11, |
|
"learning_rate": 0.0001, |
|
"loss": 0.001, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 191.54, |
|
"learning_rate": 0.0001, |
|
"loss": 0.001, |
|
"step": 1724 |
|
}, |
|
{ |
|
"epoch": 191.97, |
|
"learning_rate": 0.0001, |
|
"loss": 0.001, |
|
"step": 1728 |
|
}, |
|
{ |
|
"epoch": 191.97, |
|
"eval_exact_match": 0.6018287614297589, |
|
"eval_exec": 0.655860349127182, |
|
"eval_loss": 0.3692590594291687, |
|
"eval_runtime": 780.5695, |
|
"eval_samples_per_second": 2.082, |
|
"step": 1728 |
|
}, |
|
{ |
|
"epoch": 192.43, |
|
"learning_rate": 0.0001, |
|
"loss": 0.001, |
|
"step": 1732 |
|
}, |
|
{ |
|
"epoch": 192.86, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 1736 |
|
}, |
|
{ |
|
"epoch": 193.32, |
|
"learning_rate": 0.0001, |
|
"loss": 0.001, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 193.75, |
|
"learning_rate": 0.0001, |
|
"loss": 0.001, |
|
"step": 1744 |
|
}, |
|
{ |
|
"epoch": 194.21, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 1748 |
|
}, |
|
{ |
|
"epoch": 194.64, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0009, |
|
"step": 1752 |
|
}, |
|
{ |
|
"epoch": 195.11, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0009, |
|
"step": 1756 |
|
}, |
|
{ |
|
"epoch": 195.54, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0009, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 195.97, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0011, |
|
"step": 1764 |
|
}, |
|
{ |
|
"epoch": 196.43, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0011, |
|
"step": 1768 |
|
}, |
|
{ |
|
"epoch": 196.86, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 1772 |
|
}, |
|
{ |
|
"epoch": 197.32, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 1776 |
|
}, |
|
{ |
|
"epoch": 197.75, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0009, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 198.21, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 1784 |
|
}, |
|
{ |
|
"epoch": 198.64, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 1788 |
|
}, |
|
{ |
|
"epoch": 199.11, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 1792 |
|
}, |
|
{ |
|
"epoch": 199.11, |
|
"eval_exact_match": 0.6159600997506235, |
|
"eval_exec": 0.6758104738154613, |
|
"eval_loss": 0.39024052023887634, |
|
"eval_runtime": 765.2545, |
|
"eval_samples_per_second": 2.123, |
|
"step": 1792 |
|
}, |
|
{ |
|
"epoch": 199.54, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0009, |
|
"step": 1796 |
|
}, |
|
{ |
|
"epoch": 199.97, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 200.43, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 1804 |
|
}, |
|
{ |
|
"epoch": 200.86, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 1808 |
|
}, |
|
{ |
|
"epoch": 201.32, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 1812 |
|
}, |
|
{ |
|
"epoch": 201.75, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0011, |
|
"step": 1816 |
|
}, |
|
{ |
|
"epoch": 202.21, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 202.64, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 1824 |
|
}, |
|
{ |
|
"epoch": 203.11, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0009, |
|
"step": 1828 |
|
}, |
|
{ |
|
"epoch": 203.54, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 1832 |
|
}, |
|
{ |
|
"epoch": 203.97, |
|
"learning_rate": 0.0001, |
|
"loss": 0.001, |
|
"step": 1836 |
|
}, |
|
{ |
|
"epoch": 204.43, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0009, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 204.86, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0006, |
|
"step": 1844 |
|
}, |
|
{ |
|
"epoch": 205.32, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 1848 |
|
}, |
|
{ |
|
"epoch": 205.75, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0075, |
|
"step": 1852 |
|
}, |
|
{ |
|
"epoch": 206.21, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0083, |
|
"step": 1856 |
|
}, |
|
{ |
|
"epoch": 206.21, |
|
"eval_exact_match": 0.5960099750623441, |
|
"eval_exec": 0.6483790523690773, |
|
"eval_loss": 0.3317241966724396, |
|
"eval_runtime": 755.6025, |
|
"eval_samples_per_second": 2.151, |
|
"step": 1856 |
|
}, |
|
{ |
|
"epoch": 206.64, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0065, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 207.11, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0009, |
|
"step": 1864 |
|
}, |
|
{ |
|
"epoch": 207.54, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 1868 |
|
}, |
|
{ |
|
"epoch": 207.97, |
|
"learning_rate": 0.0001, |
|
"loss": 0.001, |
|
"step": 1872 |
|
}, |
|
{ |
|
"epoch": 208.43, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0009, |
|
"step": 1876 |
|
}, |
|
{ |
|
"epoch": 208.86, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 209.32, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0009, |
|
"step": 1884 |
|
}, |
|
{ |
|
"epoch": 209.75, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 1888 |
|
}, |
|
{ |
|
"epoch": 210.21, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 1892 |
|
}, |
|
{ |
|
"epoch": 210.64, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 1896 |
|
}, |
|
{ |
|
"epoch": 211.11, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0011, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 211.54, |
|
"learning_rate": 0.0001, |
|
"loss": 0.001, |
|
"step": 1904 |
|
}, |
|
{ |
|
"epoch": 211.97, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0009, |
|
"step": 1908 |
|
}, |
|
{ |
|
"epoch": 212.43, |
|
"learning_rate": 0.0001, |
|
"loss": 0.001, |
|
"step": 1912 |
|
}, |
|
{ |
|
"epoch": 212.86, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0016, |
|
"step": 1916 |
|
}, |
|
{ |
|
"epoch": 213.32, |
|
"learning_rate": 0.0001, |
|
"loss": 0.004, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 213.32, |
|
"eval_exact_match": 0.5669160432252701, |
|
"eval_exec": 0.6242726517040732, |
|
"eval_loss": 0.34172990918159485, |
|
"eval_runtime": 754.9365, |
|
"eval_samples_per_second": 2.152, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 213.75, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0056, |
|
"step": 1924 |
|
}, |
|
{ |
|
"epoch": 214.21, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0009, |
|
"step": 1928 |
|
}, |
|
{ |
|
"epoch": 214.64, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 1932 |
|
}, |
|
{ |
|
"epoch": 215.11, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 1936 |
|
}, |
|
{ |
|
"epoch": 215.54, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 215.97, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0009, |
|
"step": 1944 |
|
}, |
|
{ |
|
"epoch": 216.43, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0009, |
|
"step": 1948 |
|
}, |
|
{ |
|
"epoch": 216.86, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0006, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 217.32, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 1956 |
|
}, |
|
{ |
|
"epoch": 217.75, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0006, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 218.21, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 1964 |
|
}, |
|
{ |
|
"epoch": 218.64, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 1968 |
|
}, |
|
{ |
|
"epoch": 219.11, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 1972 |
|
}, |
|
{ |
|
"epoch": 219.54, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0006, |
|
"step": 1976 |
|
}, |
|
{ |
|
"epoch": 219.97, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 220.43, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 1984 |
|
}, |
|
{ |
|
"epoch": 220.43, |
|
"eval_exact_match": 0.6142975893599335, |
|
"eval_exec": 0.6758104738154613, |
|
"eval_loss": 0.4046369194984436, |
|
"eval_runtime": 757.7323, |
|
"eval_samples_per_second": 2.145, |
|
"step": 1984 |
|
}, |
|
{ |
|
"epoch": 220.86, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 1988 |
|
}, |
|
{ |
|
"epoch": 221.32, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0006, |
|
"step": 1992 |
|
}, |
|
{ |
|
"epoch": 221.75, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 1996 |
|
}, |
|
{ |
|
"epoch": 222.21, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 222.64, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0006, |
|
"step": 2004 |
|
}, |
|
{ |
|
"epoch": 223.11, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 2008 |
|
}, |
|
{ |
|
"epoch": 223.54, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0039, |
|
"step": 2012 |
|
}, |
|
{ |
|
"epoch": 223.97, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0012, |
|
"step": 2016 |
|
}, |
|
{ |
|
"epoch": 224.43, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0011, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 224.86, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 2024 |
|
}, |
|
{ |
|
"epoch": 225.32, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 2028 |
|
}, |
|
{ |
|
"epoch": 225.75, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 2032 |
|
}, |
|
{ |
|
"epoch": 226.21, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 2036 |
|
}, |
|
{ |
|
"epoch": 226.64, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 227.11, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0005, |
|
"step": 2044 |
|
}, |
|
{ |
|
"epoch": 227.54, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0009, |
|
"step": 2048 |
|
}, |
|
{ |
|
"epoch": 227.54, |
|
"eval_exact_match": 0.6118038237738986, |
|
"eval_exec": 0.6708229426433915, |
|
"eval_loss": 0.4125141203403473, |
|
"eval_runtime": 756.5464, |
|
"eval_samples_per_second": 2.148, |
|
"step": 2048 |
|
}, |
|
{ |
|
"epoch": 227.97, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 2052 |
|
}, |
|
{ |
|
"epoch": 228.43, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 2056 |
|
}, |
|
{ |
|
"epoch": 228.86, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0009, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 229.32, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0009, |
|
"step": 2064 |
|
}, |
|
{ |
|
"epoch": 229.75, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0009, |
|
"step": 2068 |
|
}, |
|
{ |
|
"epoch": 230.21, |
|
"learning_rate": 0.0001, |
|
"loss": 0.001, |
|
"step": 2072 |
|
}, |
|
{ |
|
"epoch": 230.64, |
|
"learning_rate": 0.0001, |
|
"loss": 0.001, |
|
"step": 2076 |
|
}, |
|
{ |
|
"epoch": 231.11, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0011, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 231.54, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0028, |
|
"step": 2084 |
|
}, |
|
{ |
|
"epoch": 231.97, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 2088 |
|
}, |
|
{ |
|
"epoch": 232.43, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 2092 |
|
}, |
|
{ |
|
"epoch": 232.86, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 2096 |
|
}, |
|
{ |
|
"epoch": 233.32, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0009, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 233.75, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0006, |
|
"step": 2104 |
|
}, |
|
{ |
|
"epoch": 234.21, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 2108 |
|
}, |
|
{ |
|
"epoch": 234.64, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 2112 |
|
}, |
|
{ |
|
"epoch": 234.64, |
|
"eval_exact_match": 0.6209476309226932, |
|
"eval_exec": 0.6749792186201163, |
|
"eval_loss": 0.3990643322467804, |
|
"eval_runtime": 757.1625, |
|
"eval_samples_per_second": 2.146, |
|
"step": 2112 |
|
}, |
|
{ |
|
"epoch": 235.11, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 2116 |
|
}, |
|
{ |
|
"epoch": 235.54, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 235.97, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 2124 |
|
}, |
|
{ |
|
"epoch": 236.43, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 2128 |
|
}, |
|
{ |
|
"epoch": 236.86, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0023, |
|
"step": 2132 |
|
}, |
|
{ |
|
"epoch": 237.32, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0029, |
|
"step": 2136 |
|
}, |
|
{ |
|
"epoch": 237.75, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 238.21, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 2144 |
|
}, |
|
{ |
|
"epoch": 238.64, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 2148 |
|
}, |
|
{ |
|
"epoch": 239.11, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0006, |
|
"step": 2152 |
|
}, |
|
{ |
|
"epoch": 239.54, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0011, |
|
"step": 2156 |
|
}, |
|
{ |
|
"epoch": 239.97, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 240.43, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 2164 |
|
}, |
|
{ |
|
"epoch": 240.86, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 2168 |
|
}, |
|
{ |
|
"epoch": 241.32, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0009, |
|
"step": 2172 |
|
}, |
|
{ |
|
"epoch": 241.75, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 2176 |
|
}, |
|
{ |
|
"epoch": 241.75, |
|
"eval_exact_match": 0.6126350789692435, |
|
"eval_exec": 0.6758104738154613, |
|
"eval_loss": 0.4011194407939911, |
|
"eval_runtime": 757.6531, |
|
"eval_samples_per_second": 2.145, |
|
"step": 2176 |
|
}, |
|
{ |
|
"epoch": 242.21, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 242.64, |
|
"learning_rate": 0.0001, |
|
"loss": 0.001, |
|
"step": 2184 |
|
}, |
|
{ |
|
"epoch": 243.11, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 2188 |
|
}, |
|
{ |
|
"epoch": 243.54, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 2192 |
|
}, |
|
{ |
|
"epoch": 243.97, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0009, |
|
"step": 2196 |
|
}, |
|
{ |
|
"epoch": 244.43, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 244.86, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 2204 |
|
}, |
|
{ |
|
"epoch": 245.32, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0009, |
|
"step": 2208 |
|
}, |
|
{ |
|
"epoch": 245.75, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0012, |
|
"step": 2212 |
|
}, |
|
{ |
|
"epoch": 246.21, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 2216 |
|
}, |
|
{ |
|
"epoch": 246.64, |
|
"learning_rate": 0.0001, |
|
"loss": 0.005, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 247.11, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0009, |
|
"step": 2224 |
|
}, |
|
{ |
|
"epoch": 247.54, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0011, |
|
"step": 2228 |
|
}, |
|
{ |
|
"epoch": 247.97, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 2232 |
|
}, |
|
{ |
|
"epoch": 248.43, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0009, |
|
"step": 2236 |
|
}, |
|
{ |
|
"epoch": 248.86, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0021, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 248.86, |
|
"eval_exact_match": 0.5968412302576891, |
|
"eval_exec": 0.6658354114713217, |
|
"eval_loss": 0.35461094975471497, |
|
"eval_runtime": 763.816, |
|
"eval_samples_per_second": 2.127, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 249.32, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0011, |
|
"step": 2244 |
|
}, |
|
{ |
|
"epoch": 249.75, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 2248 |
|
}, |
|
{ |
|
"epoch": 250.21, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 2252 |
|
}, |
|
{ |
|
"epoch": 250.64, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 2256 |
|
}, |
|
{ |
|
"epoch": 251.11, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0009, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 251.54, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0005, |
|
"step": 2264 |
|
}, |
|
{ |
|
"epoch": 251.97, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 2268 |
|
}, |
|
{ |
|
"epoch": 252.43, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0009, |
|
"step": 2272 |
|
}, |
|
{ |
|
"epoch": 252.86, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 2276 |
|
}, |
|
{ |
|
"epoch": 253.32, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 253.75, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0006, |
|
"step": 2284 |
|
}, |
|
{ |
|
"epoch": 254.21, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 2288 |
|
}, |
|
{ |
|
"epoch": 254.64, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0006, |
|
"step": 2292 |
|
}, |
|
{ |
|
"epoch": 255.11, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 2296 |
|
}, |
|
{ |
|
"epoch": 255.54, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 255.97, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 2304 |
|
}, |
|
{ |
|
"epoch": 255.97, |
|
"eval_exact_match": 0.6159600997506235, |
|
"eval_exec": 0.6733167082294265, |
|
"eval_loss": 0.4026164412498474, |
|
"eval_runtime": 775.2221, |
|
"eval_samples_per_second": 2.096, |
|
"step": 2304 |
|
}, |
|
{ |
|
"epoch": 256.43, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0009, |
|
"step": 2308 |
|
}, |
|
{ |
|
"epoch": 256.86, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 2312 |
|
}, |
|
{ |
|
"epoch": 257.32, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0009, |
|
"step": 2316 |
|
}, |
|
{ |
|
"epoch": 257.75, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0009, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 258.21, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 2324 |
|
}, |
|
{ |
|
"epoch": 258.64, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0011, |
|
"step": 2328 |
|
}, |
|
{ |
|
"epoch": 259.11, |
|
"learning_rate": 0.0001, |
|
"loss": 0.001, |
|
"step": 2332 |
|
}, |
|
{ |
|
"epoch": 259.54, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 2336 |
|
}, |
|
{ |
|
"epoch": 259.97, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 260.43, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 2344 |
|
}, |
|
{ |
|
"epoch": 260.86, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 2348 |
|
}, |
|
{ |
|
"epoch": 261.32, |
|
"learning_rate": 0.0001, |
|
"loss": 0.001, |
|
"step": 2352 |
|
}, |
|
{ |
|
"epoch": 261.75, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0009, |
|
"step": 2356 |
|
}, |
|
{ |
|
"epoch": 262.21, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 262.64, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0018, |
|
"step": 2364 |
|
}, |
|
{ |
|
"epoch": 263.11, |
|
"learning_rate": 0.0001, |
|
"loss": 0.001, |
|
"step": 2368 |
|
}, |
|
{ |
|
"epoch": 263.11, |
|
"eval_exact_match": 0.6018287614297589, |
|
"eval_exec": 0.6641729010806318, |
|
"eval_loss": 0.3777616024017334, |
|
"eval_runtime": 772.9319, |
|
"eval_samples_per_second": 2.102, |
|
"step": 2368 |
|
}, |
|
{ |
|
"epoch": 263.54, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0006, |
|
"step": 2372 |
|
}, |
|
{ |
|
"epoch": 263.97, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 2376 |
|
}, |
|
{ |
|
"epoch": 264.43, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 264.86, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 2384 |
|
}, |
|
{ |
|
"epoch": 265.32, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 2388 |
|
}, |
|
{ |
|
"epoch": 265.75, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0006, |
|
"step": 2392 |
|
}, |
|
{ |
|
"epoch": 266.21, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0006, |
|
"step": 2396 |
|
}, |
|
{ |
|
"epoch": 266.64, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 267.11, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0006, |
|
"step": 2404 |
|
}, |
|
{ |
|
"epoch": 267.54, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 2408 |
|
}, |
|
{ |
|
"epoch": 267.97, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0008, |
|
"step": 2412 |
|
}, |
|
{ |
|
"epoch": 268.43, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0009, |
|
"step": 2416 |
|
}, |
|
{ |
|
"epoch": 268.86, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0007, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 269.32, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0011, |
|
"step": 2424 |
|
}, |
|
{ |
|
"epoch": 269.75, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0012, |
|
"step": 2428 |
|
}, |
|
{ |
|
"epoch": 270.21, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0034, |
|
"step": 2432 |
|
}, |
|
{ |
|
"epoch": 270.21, |
|
"eval_exact_match": 0.6101413133832086, |
|
"eval_exec": 0.6791354945968412, |
|
"eval_loss": 0.34622296690940857, |
|
"eval_runtime": 764.1562, |
|
"eval_samples_per_second": 2.127, |
|
"step": 2432 |
|
} |
|
], |
|
"max_steps": 27648, |
|
"num_train_epochs": 3072, |
|
"total_flos": 2.8779670279190217e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|