{ "best_metric": 0.6209476309226932, "best_model_checkpoint": "./experiment/t5-sparc-0312/checkpoint-2112", "epoch": 270.2149003147954, "global_step": 2432, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.11, "learning_rate": 0.0001, "loss": 3.9936, "step": 1 }, { "epoch": 0.43, "learning_rate": 0.0001, "loss": 2.032, "step": 4 }, { "epoch": 0.86, "learning_rate": 0.0001, "loss": 0.9708, "step": 8 }, { "epoch": 1.32, "learning_rate": 0.0001, "loss": 0.6016, "step": 12 }, { "epoch": 1.75, "learning_rate": 0.0001, "loss": 0.4405, "step": 16 }, { "epoch": 2.21, "learning_rate": 0.0001, "loss": 0.3827, "step": 20 }, { "epoch": 2.64, "learning_rate": 0.0001, "loss": 0.2745, "step": 24 }, { "epoch": 3.11, "learning_rate": 0.0001, "loss": 0.2728, "step": 28 }, { "epoch": 3.54, "learning_rate": 0.0001, "loss": 0.2312, "step": 32 }, { "epoch": 3.97, "learning_rate": 0.0001, "loss": 0.1982, "step": 36 }, { "epoch": 4.43, "learning_rate": 0.0001, "loss": 0.2122, "step": 40 }, { "epoch": 4.86, "learning_rate": 0.0001, "loss": 0.1494, "step": 44 }, { "epoch": 5.32, "learning_rate": 0.0001, "loss": 0.1485, "step": 48 }, { "epoch": 5.75, "learning_rate": 0.0001, "loss": 0.1209, "step": 52 }, { "epoch": 6.21, "learning_rate": 0.0001, "loss": 0.1364, "step": 56 }, { "epoch": 6.64, "learning_rate": 0.0001, "loss": 0.1182, "step": 60 }, { "epoch": 7.11, "learning_rate": 0.0001, "loss": 0.1055, "step": 64 }, { "epoch": 7.11, "eval_exact_match": 0.44472152950955945, "eval_exec": 0.49542809642560265, "eval_loss": 0.17247992753982544, "eval_runtime": 539.6836, "eval_samples_per_second": 3.011, "step": 64 }, { "epoch": 7.54, "learning_rate": 0.0001, "loss": 0.0938, "step": 68 }, { "epoch": 7.97, "learning_rate": 0.0001, "loss": 0.0889, "step": 72 }, { "epoch": 8.43, "learning_rate": 0.0001, "loss": 0.1041, "step": 76 }, { "epoch": 8.86, "learning_rate": 0.0001, "loss": 0.0821, "step": 80 }, { "epoch": 9.32, "learning_rate": 0.0001, "loss": 0.0969, "step": 84 }, { "epoch": 9.75, "learning_rate": 0.0001, "loss": 0.0664, "step": 88 }, { "epoch": 10.21, "learning_rate": 0.0001, "loss": 0.0788, "step": 92 }, { "epoch": 10.64, "learning_rate": 0.0001, "loss": 0.0722, "step": 96 }, { "epoch": 11.11, "learning_rate": 0.0001, "loss": 0.0657, "step": 100 }, { "epoch": 11.54, "learning_rate": 0.0001, "loss": 0.0613, "step": 104 }, { "epoch": 11.97, "learning_rate": 0.0001, "loss": 0.0551, "step": 108 }, { "epoch": 12.43, "learning_rate": 0.0001, "loss": 0.0584, "step": 112 }, { "epoch": 12.86, "learning_rate": 0.0001, "loss": 0.0474, "step": 116 }, { "epoch": 13.32, "learning_rate": 0.0001, "loss": 0.0516, "step": 120 }, { "epoch": 13.75, "learning_rate": 0.0001, "loss": 0.0461, "step": 124 }, { "epoch": 14.21, "learning_rate": 0.0001, "loss": 0.0486, "step": 128 }, { "epoch": 14.21, "eval_exact_match": 0.5544472152950956, "eval_exec": 0.6093100581878637, "eval_loss": 0.18642085790634155, "eval_runtime": 697.2854, "eval_samples_per_second": 2.33, "step": 128 }, { "epoch": 14.64, "learning_rate": 0.0001, "loss": 0.0421, "step": 132 }, { "epoch": 15.11, "learning_rate": 0.0001, "loss": 0.0398, "step": 136 }, { "epoch": 15.54, "learning_rate": 0.0001, "loss": 0.0436, "step": 140 }, { "epoch": 15.97, "learning_rate": 0.0001, "loss": 0.0358, "step": 144 }, { "epoch": 16.43, "learning_rate": 0.0001, "loss": 0.0373, "step": 148 }, { "epoch": 16.86, "learning_rate": 0.0001, "loss": 0.0323, "step": 152 }, { "epoch": 17.32, "learning_rate": 0.0001, "loss": 0.0349, "step": 156 }, { "epoch": 17.75, "learning_rate": 0.0001, "loss": 0.0285, "step": 160 }, { "epoch": 18.21, "learning_rate": 0.0001, "loss": 0.041, "step": 164 }, { "epoch": 18.64, "learning_rate": 0.0001, "loss": 0.0403, "step": 168 }, { "epoch": 19.11, "learning_rate": 0.0001, "loss": 0.0378, "step": 172 }, { "epoch": 19.54, "learning_rate": 0.0001, "loss": 0.0297, "step": 176 }, { "epoch": 19.97, "learning_rate": 0.0001, "loss": 0.0236, "step": 180 }, { "epoch": 20.43, "learning_rate": 0.0001, "loss": 0.0233, "step": 184 }, { "epoch": 20.86, "learning_rate": 0.0001, "loss": 0.022, "step": 188 }, { "epoch": 21.32, "learning_rate": 0.0001, "loss": 0.0214, "step": 192 }, { "epoch": 21.32, "eval_exact_match": 0.5685785536159601, "eval_exec": 0.6226101413133832, "eval_loss": 0.21789631247520447, "eval_runtime": 744.163, "eval_samples_per_second": 2.184, "step": 192 }, { "epoch": 21.75, "learning_rate": 0.0001, "loss": 0.0191, "step": 196 }, { "epoch": 22.21, "learning_rate": 0.0001, "loss": 0.0223, "step": 200 }, { "epoch": 22.64, "learning_rate": 0.0001, "loss": 0.0198, "step": 204 }, { "epoch": 23.11, "learning_rate": 0.0001, "loss": 0.0192, "step": 208 }, { "epoch": 23.54, "learning_rate": 0.0001, "loss": 0.0183, "step": 212 }, { "epoch": 23.97, "learning_rate": 0.0001, "loss": 0.0167, "step": 216 }, { "epoch": 24.43, "learning_rate": 0.0001, "loss": 0.0163, "step": 220 }, { "epoch": 24.86, "learning_rate": 0.0001, "loss": 0.0184, "step": 224 }, { "epoch": 25.32, "learning_rate": 0.0001, "loss": 0.0155, "step": 228 }, { "epoch": 25.75, "learning_rate": 0.0001, "loss": 0.0155, "step": 232 }, { "epoch": 26.21, "learning_rate": 0.0001, "loss": 0.0217, "step": 236 }, { "epoch": 26.64, "learning_rate": 0.0001, "loss": 0.0766, "step": 240 }, { "epoch": 27.11, "learning_rate": 0.0001, "loss": 0.0221, "step": 244 }, { "epoch": 27.54, "learning_rate": 0.0001, "loss": 0.0141, "step": 248 }, { "epoch": 27.97, "learning_rate": 0.0001, "loss": 0.0138, "step": 252 }, { "epoch": 28.43, "learning_rate": 0.0001, "loss": 0.0116, "step": 256 }, { "epoch": 28.43, "eval_exact_match": 0.5885286783042394, "eval_exec": 0.6425602660016625, "eval_loss": 0.22936737537384033, "eval_runtime": 755.0825, "eval_samples_per_second": 2.152, "step": 256 }, { "epoch": 28.86, "learning_rate": 0.0001, "loss": 0.0114, "step": 260 }, { "epoch": 29.32, "learning_rate": 0.0001, "loss": 0.0114, "step": 264 }, { "epoch": 29.75, "learning_rate": 0.0001, "loss": 0.01, "step": 268 }, { "epoch": 30.21, "learning_rate": 0.0001, "loss": 0.0107, "step": 272 }, { "epoch": 30.64, "learning_rate": 0.0001, "loss": 0.0099, "step": 276 }, { "epoch": 31.11, "learning_rate": 0.0001, "loss": 0.0103, "step": 280 }, { "epoch": 31.54, "learning_rate": 0.0001, "loss": 0.0097, "step": 284 }, { "epoch": 31.97, "learning_rate": 0.0001, "loss": 0.0094, "step": 288 }, { "epoch": 32.43, "learning_rate": 0.0001, "loss": 0.0097, "step": 292 }, { "epoch": 32.86, "learning_rate": 0.0001, "loss": 0.0084, "step": 296 }, { "epoch": 33.32, "learning_rate": 0.0001, "loss": 0.01, "step": 300 }, { "epoch": 33.75, "learning_rate": 0.0001, "loss": 0.0083, "step": 304 }, { "epoch": 34.21, "learning_rate": 0.0001, "loss": 0.0082, "step": 308 }, { "epoch": 34.64, "learning_rate": 0.0001, "loss": 0.0077, "step": 312 }, { "epoch": 35.11, "learning_rate": 0.0001, "loss": 0.0077, "step": 316 }, { "epoch": 35.54, "learning_rate": 0.0001, "loss": 0.0082, "step": 320 }, { "epoch": 35.54, "eval_exact_match": 0.600997506234414, "eval_exec": 0.655860349127182, "eval_loss": 0.25818198919296265, "eval_runtime": 756.517, "eval_samples_per_second": 2.148, "step": 320 }, { "epoch": 35.97, "learning_rate": 0.0001, "loss": 0.0072, "step": 324 }, { "epoch": 36.43, "learning_rate": 0.0001, "loss": 0.0074, "step": 328 }, { "epoch": 36.86, "learning_rate": 0.0001, "loss": 0.0067, "step": 332 }, { "epoch": 37.32, "learning_rate": 0.0001, "loss": 0.0061, "step": 336 }, { "epoch": 37.75, "learning_rate": 0.0001, "loss": 0.0066, "step": 340 }, { "epoch": 38.21, "learning_rate": 0.0001, "loss": 0.0073, "step": 344 }, { "epoch": 38.64, "learning_rate": 0.0001, "loss": 0.0075, "step": 348 }, { "epoch": 39.11, "learning_rate": 0.0001, "loss": 0.0075, "step": 352 }, { "epoch": 39.54, "learning_rate": 0.0001, "loss": 0.0185, "step": 356 }, { "epoch": 39.97, "learning_rate": 0.0001, "loss": 0.0075, "step": 360 }, { "epoch": 40.43, "learning_rate": 0.0001, "loss": 0.0061, "step": 364 }, { "epoch": 40.86, "learning_rate": 0.0001, "loss": 0.0058, "step": 368 }, { "epoch": 41.32, "learning_rate": 0.0001, "loss": 0.0061, "step": 372 }, { "epoch": 41.75, "learning_rate": 0.0001, "loss": 0.0056, "step": 376 }, { "epoch": 42.21, "learning_rate": 0.0001, "loss": 0.0059, "step": 380 }, { "epoch": 42.64, "learning_rate": 0.0001, "loss": 0.0104, "step": 384 }, { "epoch": 42.64, "eval_exact_match": 0.5876974231088944, "eval_exec": 0.6433915211970075, "eval_loss": 0.26233145594596863, "eval_runtime": 745.8121, "eval_samples_per_second": 2.179, "step": 384 }, { "epoch": 43.11, "learning_rate": 0.0001, "loss": 0.0057, "step": 388 }, { "epoch": 43.54, "learning_rate": 0.0001, "loss": 0.0053, "step": 392 }, { "epoch": 43.97, "learning_rate": 0.0001, "loss": 0.0049, "step": 396 }, { "epoch": 44.43, "learning_rate": 0.0001, "loss": 0.0049, "step": 400 }, { "epoch": 44.86, "learning_rate": 0.0001, "loss": 0.0047, "step": 404 }, { "epoch": 45.32, "learning_rate": 0.0001, "loss": 0.0045, "step": 408 }, { "epoch": 45.75, "learning_rate": 0.0001, "loss": 0.0046, "step": 412 }, { "epoch": 46.21, "learning_rate": 0.0001, "loss": 0.0048, "step": 416 }, { "epoch": 46.64, "learning_rate": 0.0001, "loss": 0.0256, "step": 420 }, { "epoch": 47.11, "learning_rate": 0.0001, "loss": 0.0058, "step": 424 }, { "epoch": 47.54, "learning_rate": 0.0001, "loss": 0.0043, "step": 428 }, { "epoch": 47.97, "learning_rate": 0.0001, "loss": 0.0044, "step": 432 }, { "epoch": 48.43, "learning_rate": 0.0001, "loss": 0.0038, "step": 436 }, { "epoch": 48.86, "learning_rate": 0.0001, "loss": 0.0041, "step": 440 }, { "epoch": 49.32, "learning_rate": 0.0001, "loss": 0.0043, "step": 444 }, { "epoch": 49.75, "learning_rate": 0.0001, "loss": 0.0033, "step": 448 }, { "epoch": 49.75, "eval_exact_match": 0.6076475477971738, "eval_exec": 0.6541978387364921, "eval_loss": 0.3040614724159241, "eval_runtime": 745.4384, "eval_samples_per_second": 2.18, "step": 448 }, { "epoch": 50.21, "learning_rate": 0.0001, "loss": 0.0037, "step": 452 }, { "epoch": 50.64, "learning_rate": 0.0001, "loss": 0.0037, "step": 456 }, { "epoch": 51.11, "learning_rate": 0.0001, "loss": 0.0041, "step": 460 }, { "epoch": 51.54, "learning_rate": 0.0001, "loss": 0.0033, "step": 464 }, { "epoch": 51.97, "learning_rate": 0.0001, "loss": 0.0033, "step": 468 }, { "epoch": 52.43, "learning_rate": 0.0001, "loss": 0.0037, "step": 472 }, { "epoch": 52.86, "learning_rate": 0.0001, "loss": 0.0034, "step": 476 }, { "epoch": 53.32, "learning_rate": 0.0001, "loss": 0.0035, "step": 480 }, { "epoch": 53.75, "learning_rate": 0.0001, "loss": 0.0031, "step": 484 }, { "epoch": 54.21, "learning_rate": 0.0001, "loss": 0.0038, "step": 488 }, { "epoch": 54.64, "learning_rate": 0.0001, "loss": 0.0039, "step": 492 }, { "epoch": 55.11, "learning_rate": 0.0001, "loss": 0.0059, "step": 496 }, { "epoch": 55.54, "learning_rate": 0.0001, "loss": 0.0362, "step": 500 }, { "epoch": 55.97, "learning_rate": 0.0001, "loss": 0.0034, "step": 504 }, { "epoch": 56.43, "learning_rate": 0.0001, "loss": 0.0034, "step": 508 }, { "epoch": 56.86, "learning_rate": 0.0001, "loss": 0.003, "step": 512 }, { "epoch": 56.86, "eval_exact_match": 0.6051537822111388, "eval_exec": 0.656691604322527, "eval_loss": 0.30350586771965027, "eval_runtime": 754.5149, "eval_samples_per_second": 2.154, "step": 512 }, { "epoch": 57.32, "learning_rate": 0.0001, "loss": 0.003, "step": 516 }, { "epoch": 57.75, "learning_rate": 0.0001, "loss": 0.0026, "step": 520 }, { "epoch": 58.21, "learning_rate": 0.0001, "loss": 0.0028, "step": 524 }, { "epoch": 58.64, "learning_rate": 0.0001, "loss": 0.0025, "step": 528 }, { "epoch": 59.11, "learning_rate": 0.0001, "loss": 0.0028, "step": 532 }, { "epoch": 59.54, "learning_rate": 0.0001, "loss": 0.0026, "step": 536 }, { "epoch": 59.97, "learning_rate": 0.0001, "loss": 0.0026, "step": 540 }, { "epoch": 60.43, "learning_rate": 0.0001, "loss": 0.0024, "step": 544 }, { "epoch": 60.86, "learning_rate": 0.0001, "loss": 0.0024, "step": 548 }, { "epoch": 61.32, "learning_rate": 0.0001, "loss": 0.0028, "step": 552 }, { "epoch": 61.75, "learning_rate": 0.0001, "loss": 0.0025, "step": 556 }, { "epoch": 62.21, "learning_rate": 0.0001, "loss": 0.0033, "step": 560 }, { "epoch": 62.64, "learning_rate": 0.0001, "loss": 0.0088, "step": 564 }, { "epoch": 63.11, "learning_rate": 0.0001, "loss": 0.0031, "step": 568 }, { "epoch": 63.54, "learning_rate": 0.0001, "loss": 0.0024, "step": 572 }, { "epoch": 63.97, "learning_rate": 0.0001, "loss": 0.0024, "step": 576 }, { "epoch": 63.97, "eval_exact_match": 0.5960099750623441, "eval_exec": 0.6492103075644223, "eval_loss": 0.30405256152153015, "eval_runtime": 759.5993, "eval_samples_per_second": 2.139, "step": 576 }, { "epoch": 64.43, "learning_rate": 0.0001, "loss": 0.0023, "step": 580 }, { "epoch": 64.86, "learning_rate": 0.0001, "loss": 0.0046, "step": 584 }, { "epoch": 65.32, "learning_rate": 0.0001, "loss": 0.0086, "step": 588 }, { "epoch": 65.75, "learning_rate": 0.0001, "loss": 0.0023, "step": 592 }, { "epoch": 66.21, "learning_rate": 0.0001, "loss": 0.0026, "step": 596 }, { "epoch": 66.64, "learning_rate": 0.0001, "loss": 0.0021, "step": 600 }, { "epoch": 67.11, "learning_rate": 0.0001, "loss": 0.0024, "step": 604 }, { "epoch": 67.54, "learning_rate": 0.0001, "loss": 0.0032, "step": 608 }, { "epoch": 67.97, "learning_rate": 0.0001, "loss": 0.0028, "step": 612 }, { "epoch": 68.43, "learning_rate": 0.0001, "loss": 0.0022, "step": 616 }, { "epoch": 68.86, "learning_rate": 0.0001, "loss": 0.0019, "step": 620 }, { "epoch": 69.32, "learning_rate": 0.0001, "loss": 0.0022, "step": 624 }, { "epoch": 69.75, "learning_rate": 0.0001, "loss": 0.0022, "step": 628 }, { "epoch": 70.21, "learning_rate": 0.0001, "loss": 0.0019, "step": 632 }, { "epoch": 70.64, "learning_rate": 0.0001, "loss": 0.002, "step": 636 }, { "epoch": 71.11, "learning_rate": 0.0001, "loss": 0.0024, "step": 640 }, { "epoch": 71.11, "eval_exact_match": 0.5818786367414797, "eval_exec": 0.6317539484621779, "eval_loss": 0.3286347985267639, "eval_runtime": 754.2184, "eval_samples_per_second": 2.155, "step": 640 }, { "epoch": 71.54, "learning_rate": 0.0001, "loss": 0.0019, "step": 644 }, { "epoch": 71.97, "learning_rate": 0.0001, "loss": 0.0019, "step": 648 }, { "epoch": 72.43, "learning_rate": 0.0001, "loss": 0.0022, "step": 652 }, { "epoch": 72.86, "learning_rate": 0.0001, "loss": 0.002, "step": 656 }, { "epoch": 73.32, "learning_rate": 0.0001, "loss": 0.0029, "step": 660 }, { "epoch": 73.75, "learning_rate": 0.0001, "loss": 0.0025, "step": 664 }, { "epoch": 74.21, "learning_rate": 0.0001, "loss": 0.0018, "step": 668 }, { "epoch": 74.64, "learning_rate": 0.0001, "loss": 0.0019, "step": 672 }, { "epoch": 75.11, "learning_rate": 0.0001, "loss": 0.0022, "step": 676 }, { "epoch": 75.54, "learning_rate": 0.0001, "loss": 0.0018, "step": 680 }, { "epoch": 75.97, "learning_rate": 0.0001, "loss": 0.0019, "step": 684 }, { "epoch": 76.43, "learning_rate": 0.0001, "loss": 0.002, "step": 688 }, { "epoch": 76.86, "learning_rate": 0.0001, "loss": 0.002, "step": 692 }, { "epoch": 77.32, "learning_rate": 0.0001, "loss": 0.0024, "step": 696 }, { "epoch": 77.75, "learning_rate": 0.0001, "loss": 0.002, "step": 700 }, { "epoch": 78.21, "learning_rate": 0.0001, "loss": 0.002, "step": 704 }, { "epoch": 78.21, "eval_exact_match": 0.5926849542809642, "eval_exec": 0.6359102244389028, "eval_loss": 0.3365001380443573, "eval_runtime": 744.2096, "eval_samples_per_second": 2.184, "step": 704 }, { "epoch": 78.64, "learning_rate": 0.0001, "loss": 0.0016, "step": 708 }, { "epoch": 79.11, "learning_rate": 0.0001, "loss": 0.0018, "step": 712 }, { "epoch": 79.54, "learning_rate": 0.0001, "loss": 0.0022, "step": 716 }, { "epoch": 79.97, "learning_rate": 0.0001, "loss": 0.0019, "step": 720 }, { "epoch": 80.43, "learning_rate": 0.0001, "loss": 0.002, "step": 724 }, { "epoch": 80.86, "learning_rate": 0.0001, "loss": 0.0021, "step": 728 }, { "epoch": 81.32, "learning_rate": 0.0001, "loss": 0.0031, "step": 732 }, { "epoch": 81.75, "learning_rate": 0.0001, "loss": 0.0021, "step": 736 }, { "epoch": 82.21, "learning_rate": 0.0001, "loss": 0.0018, "step": 740 }, { "epoch": 82.64, "learning_rate": 0.0001, "loss": 0.0021, "step": 744 }, { "epoch": 83.11, "learning_rate": 0.0001, "loss": 0.0018, "step": 748 }, { "epoch": 83.54, "learning_rate": 0.0001, "loss": 0.0018, "step": 752 }, { "epoch": 83.97, "learning_rate": 0.0001, "loss": 0.0018, "step": 756 }, { "epoch": 84.43, "learning_rate": 0.0001, "loss": 0.0015, "step": 760 }, { "epoch": 84.86, "learning_rate": 0.0001, "loss": 0.0026, "step": 764 }, { "epoch": 85.32, "learning_rate": 0.0001, "loss": 0.002, "step": 768 }, { "epoch": 85.32, "eval_exact_match": 0.5993349958437241, "eval_exec": 0.6533665835411472, "eval_loss": 0.33010169863700867, "eval_runtime": 749.4582, "eval_samples_per_second": 2.168, "step": 768 }, { "epoch": 85.75, "learning_rate": 0.0001, "loss": 0.0019, "step": 772 }, { "epoch": 86.21, "learning_rate": 0.0001, "loss": 0.0022, "step": 776 }, { "epoch": 86.64, "learning_rate": 0.0001, "loss": 0.0021, "step": 780 }, { "epoch": 87.11, "learning_rate": 0.0001, "loss": 0.0024, "step": 784 }, { "epoch": 87.54, "learning_rate": 0.0001, "loss": 0.0018, "step": 788 }, { "epoch": 87.97, "learning_rate": 0.0001, "loss": 0.0017, "step": 792 }, { "epoch": 88.43, "learning_rate": 0.0001, "loss": 0.0017, "step": 796 }, { "epoch": 88.86, "learning_rate": 0.0001, "loss": 0.0015, "step": 800 }, { "epoch": 89.32, "learning_rate": 0.0001, "loss": 0.0018, "step": 804 }, { "epoch": 89.75, "learning_rate": 0.0001, "loss": 0.0019, "step": 808 }, { "epoch": 90.21, "learning_rate": 0.0001, "loss": 0.0022, "step": 812 }, { "epoch": 90.64, "learning_rate": 0.0001, "loss": 0.0018, "step": 816 }, { "epoch": 91.11, "learning_rate": 0.0001, "loss": 0.0042, "step": 820 }, { "epoch": 91.54, "learning_rate": 0.0001, "loss": 0.0017, "step": 824 }, { "epoch": 91.97, "learning_rate": 0.0001, "loss": 0.0015, "step": 828 }, { "epoch": 92.43, "learning_rate": 0.0001, "loss": 0.0017, "step": 832 }, { "epoch": 92.43, "eval_exact_match": 0.5960099750623441, "eval_exec": 0.657522859517872, "eval_loss": 0.3371131718158722, "eval_runtime": 749.5299, "eval_samples_per_second": 2.168, "step": 832 }, { "epoch": 92.86, "learning_rate": 0.0001, "loss": 0.0019, "step": 836 }, { "epoch": 93.32, "learning_rate": 0.0001, "loss": 0.002, "step": 840 }, { "epoch": 93.75, "learning_rate": 0.0001, "loss": 0.0019, "step": 844 }, { "epoch": 94.21, "learning_rate": 0.0001, "loss": 0.0018, "step": 848 }, { "epoch": 94.64, "learning_rate": 0.0001, "loss": 0.0014, "step": 852 }, { "epoch": 95.11, "learning_rate": 0.0001, "loss": 0.0018, "step": 856 }, { "epoch": 95.54, "learning_rate": 0.0001, "loss": 0.0018, "step": 860 }, { "epoch": 95.97, "learning_rate": 0.0001, "loss": 0.0018, "step": 864 }, { "epoch": 96.43, "learning_rate": 0.0001, "loss": 0.0058, "step": 868 }, { "epoch": 96.86, "learning_rate": 0.0001, "loss": 0.012, "step": 872 }, { "epoch": 97.32, "learning_rate": 0.0001, "loss": 0.011, "step": 876 }, { "epoch": 97.75, "learning_rate": 0.0001, "loss": 0.002, "step": 880 }, { "epoch": 98.21, "learning_rate": 0.0001, "loss": 0.0017, "step": 884 }, { "epoch": 98.64, "learning_rate": 0.0001, "loss": 0.0014, "step": 888 }, { "epoch": 99.11, "learning_rate": 0.0001, "loss": 0.0018, "step": 892 }, { "epoch": 99.54, "learning_rate": 0.0001, "loss": 0.0017, "step": 896 }, { "epoch": 99.54, "eval_exact_match": 0.5951787198669992, "eval_exec": 0.6492103075644223, "eval_loss": 0.35070350766181946, "eval_runtime": 738.8339, "eval_samples_per_second": 2.199, "step": 896 }, { "epoch": 99.97, "learning_rate": 0.0001, "loss": 0.0159, "step": 900 }, { "epoch": 100.43, "learning_rate": 0.0001, "loss": 0.0076, "step": 904 }, { "epoch": 100.86, "learning_rate": 0.0001, "loss": 0.0019, "step": 908 }, { "epoch": 101.32, "learning_rate": 0.0001, "loss": 0.0016, "step": 912 }, { "epoch": 101.75, "learning_rate": 0.0001, "loss": 0.0016, "step": 916 }, { "epoch": 102.21, "learning_rate": 0.0001, "loss": 0.0016, "step": 920 }, { "epoch": 102.64, "learning_rate": 0.0001, "loss": 0.0014, "step": 924 }, { "epoch": 103.11, "learning_rate": 0.0001, "loss": 0.0015, "step": 928 }, { "epoch": 103.54, "learning_rate": 0.0001, "loss": 0.0012, "step": 932 }, { "epoch": 103.97, "learning_rate": 0.0001, "loss": 0.0011, "step": 936 }, { "epoch": 104.43, "learning_rate": 0.0001, "loss": 0.0014, "step": 940 }, { "epoch": 104.86, "learning_rate": 0.0001, "loss": 0.0012, "step": 944 }, { "epoch": 105.32, "learning_rate": 0.0001, "loss": 0.0014, "step": 948 }, { "epoch": 105.75, "learning_rate": 0.0001, "loss": 0.0011, "step": 952 }, { "epoch": 106.21, "learning_rate": 0.0001, "loss": 0.0013, "step": 956 }, { "epoch": 106.64, "learning_rate": 0.0001, "loss": 0.0013, "step": 960 }, { "epoch": 106.64, "eval_exact_match": 0.6076475477971738, "eval_exec": 0.6674979218620116, "eval_loss": 0.36282214522361755, "eval_runtime": 748.9956, "eval_samples_per_second": 2.17, "step": 960 }, { "epoch": 107.11, "learning_rate": 0.0001, "loss": 0.0013, "step": 964 }, { "epoch": 107.54, "learning_rate": 0.0001, "loss": 0.0013, "step": 968 }, { "epoch": 107.97, "learning_rate": 0.0001, "loss": 0.0011, "step": 972 }, { "epoch": 108.43, "learning_rate": 0.0001, "loss": 0.0014, "step": 976 }, { "epoch": 108.86, "learning_rate": 0.0001, "loss": 0.0015, "step": 980 }, { "epoch": 109.32, "learning_rate": 0.0001, "loss": 0.0015, "step": 984 }, { "epoch": 109.75, "learning_rate": 0.0001, "loss": 0.0014, "step": 988 }, { "epoch": 110.21, "learning_rate": 0.0001, "loss": 0.0016, "step": 992 }, { "epoch": 110.64, "learning_rate": 0.0001, "loss": 0.0015, "step": 996 }, { "epoch": 111.11, "learning_rate": 0.0001, "loss": 0.0014, "step": 1000 }, { "epoch": 111.54, "learning_rate": 0.0001, "loss": 0.0012, "step": 1004 }, { "epoch": 111.97, "learning_rate": 0.0001, "loss": 0.0011, "step": 1008 }, { "epoch": 112.43, "learning_rate": 0.0001, "loss": 0.0012, "step": 1012 }, { "epoch": 112.86, "learning_rate": 0.0001, "loss": 0.0015, "step": 1016 }, { "epoch": 113.32, "learning_rate": 0.0001, "loss": 0.0015, "step": 1020 }, { "epoch": 113.75, "learning_rate": 0.0001, "loss": 0.0012, "step": 1024 }, { "epoch": 113.75, "eval_exact_match": 0.6151288445552785, "eval_exec": 0.6708229426433915, "eval_loss": 0.351724237203598, "eval_runtime": 774.4973, "eval_samples_per_second": 2.098, "step": 1024 }, { "epoch": 114.21, "learning_rate": 0.0001, "loss": 0.0012, "step": 1028 }, { "epoch": 114.64, "learning_rate": 0.0001, "loss": 0.0011, "step": 1032 }, { "epoch": 115.11, "learning_rate": 0.0001, "loss": 0.0015, "step": 1036 }, { "epoch": 115.54, "learning_rate": 0.0001, "loss": 0.0011, "step": 1040 }, { "epoch": 115.97, "learning_rate": 0.0001, "loss": 0.0019, "step": 1044 }, { "epoch": 116.43, "learning_rate": 0.0001, "loss": 0.0049, "step": 1048 }, { "epoch": 116.86, "learning_rate": 0.0001, "loss": 0.0045, "step": 1052 }, { "epoch": 117.32, "learning_rate": 0.0001, "loss": 0.0014, "step": 1056 }, { "epoch": 117.75, "learning_rate": 0.0001, "loss": 0.0012, "step": 1060 }, { "epoch": 118.21, "learning_rate": 0.0001, "loss": 0.0014, "step": 1064 }, { "epoch": 118.64, "learning_rate": 0.0001, "loss": 0.0017, "step": 1068 }, { "epoch": 119.11, "learning_rate": 0.0001, "loss": 0.0016, "step": 1072 }, { "epoch": 119.54, "learning_rate": 0.0001, "loss": 0.0011, "step": 1076 }, { "epoch": 119.97, "learning_rate": 0.0001, "loss": 0.0013, "step": 1080 }, { "epoch": 120.43, "learning_rate": 0.0001, "loss": 0.0012, "step": 1084 }, { "epoch": 120.86, "learning_rate": 0.0001, "loss": 0.0013, "step": 1088 }, { "epoch": 120.86, "eval_exact_match": 0.5993349958437241, "eval_exec": 0.6608478802992519, "eval_loss": 0.35145050287246704, "eval_runtime": 749.1176, "eval_samples_per_second": 2.169, "step": 1088 }, { "epoch": 121.32, "learning_rate": 0.0001, "loss": 0.0015, "step": 1092 }, { "epoch": 121.75, "learning_rate": 0.0001, "loss": 0.0013, "step": 1096 }, { "epoch": 122.21, "learning_rate": 0.0001, "loss": 0.0015, "step": 1100 }, { "epoch": 122.64, "learning_rate": 0.0001, "loss": 0.0015, "step": 1104 }, { "epoch": 123.11, "learning_rate": 0.0001, "loss": 0.0023, "step": 1108 }, { "epoch": 123.54, "learning_rate": 0.0001, "loss": 0.0065, "step": 1112 }, { "epoch": 123.97, "learning_rate": 0.0001, "loss": 0.0012, "step": 1116 }, { "epoch": 124.43, "learning_rate": 0.0001, "loss": 0.0011, "step": 1120 }, { "epoch": 124.86, "learning_rate": 0.0001, "loss": 0.0011, "step": 1124 }, { "epoch": 125.32, "learning_rate": 0.0001, "loss": 0.0013, "step": 1128 }, { "epoch": 125.75, "learning_rate": 0.0001, "loss": 0.0012, "step": 1132 }, { "epoch": 126.21, "learning_rate": 0.0001, "loss": 0.0011, "step": 1136 }, { "epoch": 126.64, "learning_rate": 0.0001, "loss": 0.0011, "step": 1140 }, { "epoch": 127.11, "learning_rate": 0.0001, "loss": 0.0012, "step": 1144 }, { "epoch": 127.54, "learning_rate": 0.0001, "loss": 0.0011, "step": 1148 }, { "epoch": 127.97, "learning_rate": 0.0001, "loss": 0.0014, "step": 1152 }, { "epoch": 127.97, "eval_exact_match": 0.6109725685785536, "eval_exec": 0.6699916874480466, "eval_loss": 0.35655492544174194, "eval_runtime": 756.087, "eval_samples_per_second": 2.149, "step": 1152 }, { "epoch": 128.43, "learning_rate": 0.0001, "loss": 0.001, "step": 1156 }, { "epoch": 128.86, "learning_rate": 0.0001, "loss": 0.0012, "step": 1160 }, { "epoch": 129.32, "learning_rate": 0.0001, "loss": 0.0013, "step": 1164 }, { "epoch": 129.75, "learning_rate": 0.0001, "loss": 0.0012, "step": 1168 }, { "epoch": 130.21, "learning_rate": 0.0001, "loss": 0.0012, "step": 1172 }, { "epoch": 130.64, "learning_rate": 0.0001, "loss": 0.0009, "step": 1176 }, { "epoch": 131.11, "learning_rate": 0.0001, "loss": 0.0012, "step": 1180 }, { "epoch": 131.54, "learning_rate": 0.0001, "loss": 0.0009, "step": 1184 }, { "epoch": 131.97, "learning_rate": 0.0001, "loss": 0.001, "step": 1188 }, { "epoch": 132.43, "learning_rate": 0.0001, "loss": 0.001, "step": 1192 }, { "epoch": 132.86, "learning_rate": 0.0001, "loss": 0.0011, "step": 1196 }, { "epoch": 133.32, "learning_rate": 0.0001, "loss": 0.0011, "step": 1200 }, { "epoch": 133.75, "learning_rate": 0.0001, "loss": 0.0016, "step": 1204 }, { "epoch": 134.21, "learning_rate": 0.0001, "loss": 0.0011, "step": 1208 }, { "epoch": 134.64, "learning_rate": 0.0001, "loss": 0.0011, "step": 1212 }, { "epoch": 135.11, "learning_rate": 0.0001, "loss": 0.0014, "step": 1216 }, { "epoch": 135.11, "eval_exact_match": 0.5985037406483791, "eval_exec": 0.6583541147132169, "eval_loss": 0.3626713454723358, "eval_runtime": 771.5036, "eval_samples_per_second": 2.106, "step": 1216 }, { "epoch": 135.54, "learning_rate": 0.0001, "loss": 0.0052, "step": 1220 }, { "epoch": 135.97, "learning_rate": 0.0001, "loss": 0.0032, "step": 1224 }, { "epoch": 136.43, "learning_rate": 0.0001, "loss": 0.0076, "step": 1228 }, { "epoch": 136.86, "learning_rate": 0.0001, "loss": 0.0021, "step": 1232 }, { "epoch": 137.32, "learning_rate": 0.0001, "loss": 0.0011, "step": 1236 }, { "epoch": 137.75, "learning_rate": 0.0001, "loss": 0.0011, "step": 1240 }, { "epoch": 138.21, "learning_rate": 0.0001, "loss": 0.0012, "step": 1244 }, { "epoch": 138.64, "learning_rate": 0.0001, "loss": 0.001, "step": 1248 }, { "epoch": 139.11, "learning_rate": 0.0001, "loss": 0.0014, "step": 1252 }, { "epoch": 139.54, "learning_rate": 0.0001, "loss": 0.001, "step": 1256 }, { "epoch": 139.97, "learning_rate": 0.0001, "loss": 0.0011, "step": 1260 }, { "epoch": 140.43, "learning_rate": 0.0001, "loss": 0.001, "step": 1264 }, { "epoch": 140.86, "learning_rate": 0.0001, "loss": 0.001, "step": 1268 }, { "epoch": 141.32, "learning_rate": 0.0001, "loss": 0.001, "step": 1272 }, { "epoch": 141.75, "learning_rate": 0.0001, "loss": 0.0011, "step": 1276 }, { "epoch": 142.21, "learning_rate": 0.0001, "loss": 0.001, "step": 1280 }, { "epoch": 142.21, "eval_exact_match": 0.6068162926018288, "eval_exec": 0.6625103906899418, "eval_loss": 0.37810268998146057, "eval_runtime": 744.2729, "eval_samples_per_second": 2.183, "step": 1280 }, { "epoch": 142.64, "learning_rate": 0.0001, "loss": 0.0009, "step": 1284 }, { "epoch": 143.11, "learning_rate": 0.0001, "loss": 0.001, "step": 1288 }, { "epoch": 143.54, "learning_rate": 0.0001, "loss": 0.0008, "step": 1292 }, { "epoch": 143.97, "learning_rate": 0.0001, "loss": 0.0012, "step": 1296 }, { "epoch": 144.43, "learning_rate": 0.0001, "loss": 0.001, "step": 1300 }, { "epoch": 144.86, "learning_rate": 0.0001, "loss": 0.001, "step": 1304 }, { "epoch": 145.32, "learning_rate": 0.0001, "loss": 0.0013, "step": 1308 }, { "epoch": 145.75, "learning_rate": 0.0001, "loss": 0.0009, "step": 1312 }, { "epoch": 146.21, "learning_rate": 0.0001, "loss": 0.0011, "step": 1316 }, { "epoch": 146.64, "learning_rate": 0.0001, "loss": 0.0013, "step": 1320 }, { "epoch": 147.11, "learning_rate": 0.0001, "loss": 0.0016, "step": 1324 }, { "epoch": 147.54, "learning_rate": 0.0001, "loss": 0.0058, "step": 1328 }, { "epoch": 147.97, "learning_rate": 0.0001, "loss": 0.0015, "step": 1332 }, { "epoch": 148.43, "learning_rate": 0.0001, "loss": 0.0011, "step": 1336 }, { "epoch": 148.86, "learning_rate": 0.0001, "loss": 0.001, "step": 1340 }, { "epoch": 149.32, "learning_rate": 0.0001, "loss": 0.0007, "step": 1344 }, { "epoch": 149.32, "eval_exact_match": 0.6184538653366584, "eval_exec": 0.6749792186201163, "eval_loss": 0.37218576669692993, "eval_runtime": 745.0951, "eval_samples_per_second": 2.181, "step": 1344 }, { "epoch": 149.75, "learning_rate": 0.0001, "loss": 0.0009, "step": 1348 }, { "epoch": 150.21, "learning_rate": 0.0001, "loss": 0.001, "step": 1352 }, { "epoch": 150.64, "learning_rate": 0.0001, "loss": 0.0013, "step": 1356 }, { "epoch": 151.11, "learning_rate": 0.0001, "loss": 0.0012, "step": 1360 }, { "epoch": 151.54, "learning_rate": 0.0001, "loss": 0.0008, "step": 1364 }, { "epoch": 151.97, "learning_rate": 0.0001, "loss": 0.0009, "step": 1368 }, { "epoch": 152.43, "learning_rate": 0.0001, "loss": 0.0011, "step": 1372 }, { "epoch": 152.86, "learning_rate": 0.0001, "loss": 0.0048, "step": 1376 }, { "epoch": 153.32, "learning_rate": 0.0001, "loss": 0.0011, "step": 1380 }, { "epoch": 153.75, "learning_rate": 0.0001, "loss": 0.001, "step": 1384 }, { "epoch": 154.21, "learning_rate": 0.0001, "loss": 0.0009, "step": 1388 }, { "epoch": 154.64, "learning_rate": 0.0001, "loss": 0.0011, "step": 1392 }, { "epoch": 155.11, "learning_rate": 0.0001, "loss": 0.0059, "step": 1396 }, { "epoch": 155.54, "learning_rate": 0.0001, "loss": 0.0011, "step": 1400 }, { "epoch": 155.97, "learning_rate": 0.0001, "loss": 0.0013, "step": 1404 }, { "epoch": 156.43, "learning_rate": 0.0001, "loss": 0.0009, "step": 1408 }, { "epoch": 156.43, "eval_exact_match": 0.6059850374064838, "eval_exec": 0.6633416458852868, "eval_loss": 0.3731986880302429, "eval_runtime": 751.4238, "eval_samples_per_second": 2.163, "step": 1408 }, { "epoch": 156.86, "learning_rate": 0.0001, "loss": 0.001, "step": 1412 }, { "epoch": 157.32, "learning_rate": 0.0001, "loss": 0.0017, "step": 1416 }, { "epoch": 157.75, "learning_rate": 0.0001, "loss": 0.0011, "step": 1420 }, { "epoch": 158.21, "learning_rate": 0.0001, "loss": 0.0009, "step": 1424 }, { "epoch": 158.64, "learning_rate": 0.0001, "loss": 0.0008, "step": 1428 }, { "epoch": 159.11, "learning_rate": 0.0001, "loss": 0.0009, "step": 1432 }, { "epoch": 159.54, "learning_rate": 0.0001, "loss": 0.0009, "step": 1436 }, { "epoch": 159.97, "learning_rate": 0.0001, "loss": 0.0011, "step": 1440 }, { "epoch": 160.43, "learning_rate": 0.0001, "loss": 0.0009, "step": 1444 }, { "epoch": 160.86, "learning_rate": 0.0001, "loss": 0.001, "step": 1448 }, { "epoch": 161.32, "learning_rate": 0.0001, "loss": 0.001, "step": 1452 }, { "epoch": 161.75, "learning_rate": 0.0001, "loss": 0.0011, "step": 1456 }, { "epoch": 162.21, "learning_rate": 0.0001, "loss": 0.0008, "step": 1460 }, { "epoch": 162.64, "learning_rate": 0.0001, "loss": 0.0008, "step": 1464 }, { "epoch": 163.11, "learning_rate": 0.0001, "loss": 0.001, "step": 1468 }, { "epoch": 163.54, "learning_rate": 0.0001, "loss": 0.0009, "step": 1472 }, { "epoch": 163.54, "eval_exact_match": 0.6101413133832086, "eval_exec": 0.6674979218620116, "eval_loss": 0.38048475980758667, "eval_runtime": 813.5844, "eval_samples_per_second": 1.997, "step": 1472 }, { "epoch": 163.97, "learning_rate": 0.0001, "loss": 0.001, "step": 1476 }, { "epoch": 164.43, "learning_rate": 0.0001, "loss": 0.0134, "step": 1480 }, { "epoch": 164.86, "learning_rate": 0.0001, "loss": 0.0011, "step": 1484 }, { "epoch": 165.32, "learning_rate": 0.0001, "loss": 0.001, "step": 1488 }, { "epoch": 165.75, "learning_rate": 0.0001, "loss": 0.0009, "step": 1492 }, { "epoch": 166.21, "learning_rate": 0.0001, "loss": 0.0009, "step": 1496 }, { "epoch": 166.64, "learning_rate": 0.0001, "loss": 0.001, "step": 1500 }, { "epoch": 167.11, "learning_rate": 0.0001, "loss": 0.001, "step": 1504 }, { "epoch": 167.54, "learning_rate": 0.0001, "loss": 0.001, "step": 1508 }, { "epoch": 167.97, "learning_rate": 0.0001, "loss": 0.0009, "step": 1512 }, { "epoch": 168.43, "learning_rate": 0.0001, "loss": 0.0016, "step": 1516 }, { "epoch": 168.86, "learning_rate": 0.0001, "loss": 0.0012, "step": 1520 }, { "epoch": 169.32, "learning_rate": 0.0001, "loss": 0.0009, "step": 1524 }, { "epoch": 169.75, "learning_rate": 0.0001, "loss": 0.0009, "step": 1528 }, { "epoch": 170.21, "learning_rate": 0.0001, "loss": 0.0009, "step": 1532 }, { "epoch": 170.64, "learning_rate": 0.0001, "loss": 0.0007, "step": 1536 }, { "epoch": 170.64, "eval_exact_match": 0.6101413133832086, "eval_exec": 0.6683291770573566, "eval_loss": 0.38680797815322876, "eval_runtime": 781.939, "eval_samples_per_second": 2.078, "step": 1536 }, { "epoch": 171.11, "learning_rate": 0.0001, "loss": 0.0012, "step": 1540 }, { "epoch": 171.54, "learning_rate": 0.0001, "loss": 0.0009, "step": 1544 }, { "epoch": 171.97, "learning_rate": 0.0001, "loss": 0.0028, "step": 1548 }, { "epoch": 172.43, "learning_rate": 0.0001, "loss": 0.0012, "step": 1552 }, { "epoch": 172.86, "learning_rate": 0.0001, "loss": 0.0013, "step": 1556 }, { "epoch": 173.32, "learning_rate": 0.0001, "loss": 0.0011, "step": 1560 }, { "epoch": 173.75, "learning_rate": 0.0001, "loss": 0.001, "step": 1564 }, { "epoch": 174.21, "learning_rate": 0.0001, "loss": 0.0011, "step": 1568 }, { "epoch": 174.64, "learning_rate": 0.0001, "loss": 0.001, "step": 1572 }, { "epoch": 175.11, "learning_rate": 0.0001, "loss": 0.0008, "step": 1576 }, { "epoch": 175.54, "learning_rate": 0.0001, "loss": 0.0007, "step": 1580 }, { "epoch": 175.97, "learning_rate": 0.0001, "loss": 0.0009, "step": 1584 }, { "epoch": 176.43, "learning_rate": 0.0001, "loss": 0.0007, "step": 1588 }, { "epoch": 176.86, "learning_rate": 0.0001, "loss": 0.0009, "step": 1592 }, { "epoch": 177.32, "learning_rate": 0.0001, "loss": 0.0008, "step": 1596 }, { "epoch": 177.75, "learning_rate": 0.0001, "loss": 0.0008, "step": 1600 }, { "epoch": 177.75, "eval_exact_match": 0.6068162926018288, "eval_exec": 0.6650041562759768, "eval_loss": 0.4018971621990204, "eval_runtime": 775.9241, "eval_samples_per_second": 2.094, "step": 1600 }, { "epoch": 178.21, "learning_rate": 0.0001, "loss": 0.001, "step": 1604 }, { "epoch": 178.64, "learning_rate": 0.0001, "loss": 0.0015, "step": 1608 }, { "epoch": 179.11, "learning_rate": 0.0001, "loss": 0.0018, "step": 1612 }, { "epoch": 179.54, "learning_rate": 0.0001, "loss": 0.0079, "step": 1616 }, { "epoch": 179.97, "learning_rate": 0.0001, "loss": 0.0011, "step": 1620 }, { "epoch": 180.43, "learning_rate": 0.0001, "loss": 0.001, "step": 1624 }, { "epoch": 180.86, "learning_rate": 0.0001, "loss": 0.0009, "step": 1628 }, { "epoch": 181.32, "learning_rate": 0.0001, "loss": 0.0008, "step": 1632 }, { "epoch": 181.75, "learning_rate": 0.0001, "loss": 0.0008, "step": 1636 }, { "epoch": 182.21, "learning_rate": 0.0001, "loss": 0.0008, "step": 1640 }, { "epoch": 182.64, "learning_rate": 0.0001, "loss": 0.0009, "step": 1644 }, { "epoch": 183.11, "learning_rate": 0.0001, "loss": 0.0011, "step": 1648 }, { "epoch": 183.54, "learning_rate": 0.0001, "loss": 0.0011, "step": 1652 }, { "epoch": 183.97, "learning_rate": 0.0001, "loss": 0.0014, "step": 1656 }, { "epoch": 184.43, "learning_rate": 0.0001, "loss": 0.001, "step": 1660 }, { "epoch": 184.86, "learning_rate": 0.0001, "loss": 0.0009, "step": 1664 }, { "epoch": 184.86, "eval_exact_match": 0.6026600166251039, "eval_exec": 0.6641729010806318, "eval_loss": 0.385076105594635, "eval_runtime": 773.5213, "eval_samples_per_second": 2.101, "step": 1664 }, { "epoch": 185.32, "learning_rate": 0.0001, "loss": 0.0009, "step": 1668 }, { "epoch": 185.75, "learning_rate": 0.0001, "loss": 0.0012, "step": 1672 }, { "epoch": 186.21, "learning_rate": 0.0001, "loss": 0.0019, "step": 1676 }, { "epoch": 186.64, "learning_rate": 0.0001, "loss": 0.0019, "step": 1680 }, { "epoch": 187.11, "learning_rate": 0.0001, "loss": 0.0011, "step": 1684 }, { "epoch": 187.54, "learning_rate": 0.0001, "loss": 0.0016, "step": 1688 }, { "epoch": 187.97, "learning_rate": 0.0001, "loss": 0.0014, "step": 1692 }, { "epoch": 188.43, "learning_rate": 0.0001, "loss": 0.001, "step": 1696 }, { "epoch": 188.86, "learning_rate": 0.0001, "loss": 0.0008, "step": 1700 }, { "epoch": 189.32, "learning_rate": 0.0001, "loss": 0.0012, "step": 1704 }, { "epoch": 189.75, "learning_rate": 0.0001, "loss": 0.0009, "step": 1708 }, { "epoch": 190.21, "learning_rate": 0.0001, "loss": 0.0009, "step": 1712 }, { "epoch": 190.64, "learning_rate": 0.0001, "loss": 0.0008, "step": 1716 }, { "epoch": 191.11, "learning_rate": 0.0001, "loss": 0.001, "step": 1720 }, { "epoch": 191.54, "learning_rate": 0.0001, "loss": 0.001, "step": 1724 }, { "epoch": 191.97, "learning_rate": 0.0001, "loss": 0.001, "step": 1728 }, { "epoch": 191.97, "eval_exact_match": 0.6018287614297589, "eval_exec": 0.655860349127182, "eval_loss": 0.3692590594291687, "eval_runtime": 780.5695, "eval_samples_per_second": 2.082, "step": 1728 }, { "epoch": 192.43, "learning_rate": 0.0001, "loss": 0.001, "step": 1732 }, { "epoch": 192.86, "learning_rate": 0.0001, "loss": 0.0007, "step": 1736 }, { "epoch": 193.32, "learning_rate": 0.0001, "loss": 0.001, "step": 1740 }, { "epoch": 193.75, "learning_rate": 0.0001, "loss": 0.001, "step": 1744 }, { "epoch": 194.21, "learning_rate": 0.0001, "loss": 0.0008, "step": 1748 }, { "epoch": 194.64, "learning_rate": 0.0001, "loss": 0.0009, "step": 1752 }, { "epoch": 195.11, "learning_rate": 0.0001, "loss": 0.0009, "step": 1756 }, { "epoch": 195.54, "learning_rate": 0.0001, "loss": 0.0009, "step": 1760 }, { "epoch": 195.97, "learning_rate": 0.0001, "loss": 0.0011, "step": 1764 }, { "epoch": 196.43, "learning_rate": 0.0001, "loss": 0.0011, "step": 1768 }, { "epoch": 196.86, "learning_rate": 0.0001, "loss": 0.0008, "step": 1772 }, { "epoch": 197.32, "learning_rate": 0.0001, "loss": 0.0008, "step": 1776 }, { "epoch": 197.75, "learning_rate": 0.0001, "loss": 0.0009, "step": 1780 }, { "epoch": 198.21, "learning_rate": 0.0001, "loss": 0.0008, "step": 1784 }, { "epoch": 198.64, "learning_rate": 0.0001, "loss": 0.0008, "step": 1788 }, { "epoch": 199.11, "learning_rate": 0.0001, "loss": 0.0008, "step": 1792 }, { "epoch": 199.11, "eval_exact_match": 0.6159600997506235, "eval_exec": 0.6758104738154613, "eval_loss": 0.39024052023887634, "eval_runtime": 765.2545, "eval_samples_per_second": 2.123, "step": 1792 }, { "epoch": 199.54, "learning_rate": 0.0001, "loss": 0.0009, "step": 1796 }, { "epoch": 199.97, "learning_rate": 0.0001, "loss": 0.0008, "step": 1800 }, { "epoch": 200.43, "learning_rate": 0.0001, "loss": 0.0008, "step": 1804 }, { "epoch": 200.86, "learning_rate": 0.0001, "loss": 0.0008, "step": 1808 }, { "epoch": 201.32, "learning_rate": 0.0001, "loss": 0.0008, "step": 1812 }, { "epoch": 201.75, "learning_rate": 0.0001, "loss": 0.0011, "step": 1816 }, { "epoch": 202.21, "learning_rate": 0.0001, "loss": 0.0008, "step": 1820 }, { "epoch": 202.64, "learning_rate": 0.0001, "loss": 0.0008, "step": 1824 }, { "epoch": 203.11, "learning_rate": 0.0001, "loss": 0.0009, "step": 1828 }, { "epoch": 203.54, "learning_rate": 0.0001, "loss": 0.0007, "step": 1832 }, { "epoch": 203.97, "learning_rate": 0.0001, "loss": 0.001, "step": 1836 }, { "epoch": 204.43, "learning_rate": 0.0001, "loss": 0.0009, "step": 1840 }, { "epoch": 204.86, "learning_rate": 0.0001, "loss": 0.0006, "step": 1844 }, { "epoch": 205.32, "learning_rate": 0.0001, "loss": 0.0008, "step": 1848 }, { "epoch": 205.75, "learning_rate": 0.0001, "loss": 0.0075, "step": 1852 }, { "epoch": 206.21, "learning_rate": 0.0001, "loss": 0.0083, "step": 1856 }, { "epoch": 206.21, "eval_exact_match": 0.5960099750623441, "eval_exec": 0.6483790523690773, "eval_loss": 0.3317241966724396, "eval_runtime": 755.6025, "eval_samples_per_second": 2.151, "step": 1856 }, { "epoch": 206.64, "learning_rate": 0.0001, "loss": 0.0065, "step": 1860 }, { "epoch": 207.11, "learning_rate": 0.0001, "loss": 0.0009, "step": 1864 }, { "epoch": 207.54, "learning_rate": 0.0001, "loss": 0.0007, "step": 1868 }, { "epoch": 207.97, "learning_rate": 0.0001, "loss": 0.001, "step": 1872 }, { "epoch": 208.43, "learning_rate": 0.0001, "loss": 0.0009, "step": 1876 }, { "epoch": 208.86, "learning_rate": 0.0001, "loss": 0.0007, "step": 1880 }, { "epoch": 209.32, "learning_rate": 0.0001, "loss": 0.0009, "step": 1884 }, { "epoch": 209.75, "learning_rate": 0.0001, "loss": 0.0008, "step": 1888 }, { "epoch": 210.21, "learning_rate": 0.0001, "loss": 0.0007, "step": 1892 }, { "epoch": 210.64, "learning_rate": 0.0001, "loss": 0.0008, "step": 1896 }, { "epoch": 211.11, "learning_rate": 0.0001, "loss": 0.0011, "step": 1900 }, { "epoch": 211.54, "learning_rate": 0.0001, "loss": 0.001, "step": 1904 }, { "epoch": 211.97, "learning_rate": 0.0001, "loss": 0.0009, "step": 1908 }, { "epoch": 212.43, "learning_rate": 0.0001, "loss": 0.001, "step": 1912 }, { "epoch": 212.86, "learning_rate": 0.0001, "loss": 0.0016, "step": 1916 }, { "epoch": 213.32, "learning_rate": 0.0001, "loss": 0.004, "step": 1920 }, { "epoch": 213.32, "eval_exact_match": 0.5669160432252701, "eval_exec": 0.6242726517040732, "eval_loss": 0.34172990918159485, "eval_runtime": 754.9365, "eval_samples_per_second": 2.152, "step": 1920 }, { "epoch": 213.75, "learning_rate": 0.0001, "loss": 0.0056, "step": 1924 }, { "epoch": 214.21, "learning_rate": 0.0001, "loss": 0.0009, "step": 1928 }, { "epoch": 214.64, "learning_rate": 0.0001, "loss": 0.0008, "step": 1932 }, { "epoch": 215.11, "learning_rate": 0.0001, "loss": 0.0008, "step": 1936 }, { "epoch": 215.54, "learning_rate": 0.0001, "loss": 0.0007, "step": 1940 }, { "epoch": 215.97, "learning_rate": 0.0001, "loss": 0.0009, "step": 1944 }, { "epoch": 216.43, "learning_rate": 0.0001, "loss": 0.0009, "step": 1948 }, { "epoch": 216.86, "learning_rate": 0.0001, "loss": 0.0006, "step": 1952 }, { "epoch": 217.32, "learning_rate": 0.0001, "loss": 0.0008, "step": 1956 }, { "epoch": 217.75, "learning_rate": 0.0001, "loss": 0.0006, "step": 1960 }, { "epoch": 218.21, "learning_rate": 0.0001, "loss": 0.0007, "step": 1964 }, { "epoch": 218.64, "learning_rate": 0.0001, "loss": 0.0007, "step": 1968 }, { "epoch": 219.11, "learning_rate": 0.0001, "loss": 0.0008, "step": 1972 }, { "epoch": 219.54, "learning_rate": 0.0001, "loss": 0.0006, "step": 1976 }, { "epoch": 219.97, "learning_rate": 0.0001, "loss": 0.0008, "step": 1980 }, { "epoch": 220.43, "learning_rate": 0.0001, "loss": 0.0007, "step": 1984 }, { "epoch": 220.43, "eval_exact_match": 0.6142975893599335, "eval_exec": 0.6758104738154613, "eval_loss": 0.4046369194984436, "eval_runtime": 757.7323, "eval_samples_per_second": 2.145, "step": 1984 }, { "epoch": 220.86, "learning_rate": 0.0001, "loss": 0.0007, "step": 1988 }, { "epoch": 221.32, "learning_rate": 0.0001, "loss": 0.0006, "step": 1992 }, { "epoch": 221.75, "learning_rate": 0.0001, "loss": 0.0008, "step": 1996 }, { "epoch": 222.21, "learning_rate": 0.0001, "loss": 0.0008, "step": 2000 }, { "epoch": 222.64, "learning_rate": 0.0001, "loss": 0.0006, "step": 2004 }, { "epoch": 223.11, "learning_rate": 0.0001, "loss": 0.0008, "step": 2008 }, { "epoch": 223.54, "learning_rate": 0.0001, "loss": 0.0039, "step": 2012 }, { "epoch": 223.97, "learning_rate": 0.0001, "loss": 0.0012, "step": 2016 }, { "epoch": 224.43, "learning_rate": 0.0001, "loss": 0.0011, "step": 2020 }, { "epoch": 224.86, "learning_rate": 0.0001, "loss": 0.0008, "step": 2024 }, { "epoch": 225.32, "learning_rate": 0.0001, "loss": 0.0008, "step": 2028 }, { "epoch": 225.75, "learning_rate": 0.0001, "loss": 0.0007, "step": 2032 }, { "epoch": 226.21, "learning_rate": 0.0001, "loss": 0.0007, "step": 2036 }, { "epoch": 226.64, "learning_rate": 0.0001, "loss": 0.0008, "step": 2040 }, { "epoch": 227.11, "learning_rate": 0.0001, "loss": 0.0005, "step": 2044 }, { "epoch": 227.54, "learning_rate": 0.0001, "loss": 0.0009, "step": 2048 }, { "epoch": 227.54, "eval_exact_match": 0.6118038237738986, "eval_exec": 0.6708229426433915, "eval_loss": 0.4125141203403473, "eval_runtime": 756.5464, "eval_samples_per_second": 2.148, "step": 2048 }, { "epoch": 227.97, "learning_rate": 0.0001, "loss": 0.0008, "step": 2052 }, { "epoch": 228.43, "learning_rate": 0.0001, "loss": 0.0008, "step": 2056 }, { "epoch": 228.86, "learning_rate": 0.0001, "loss": 0.0009, "step": 2060 }, { "epoch": 229.32, "learning_rate": 0.0001, "loss": 0.0009, "step": 2064 }, { "epoch": 229.75, "learning_rate": 0.0001, "loss": 0.0009, "step": 2068 }, { "epoch": 230.21, "learning_rate": 0.0001, "loss": 0.001, "step": 2072 }, { "epoch": 230.64, "learning_rate": 0.0001, "loss": 0.001, "step": 2076 }, { "epoch": 231.11, "learning_rate": 0.0001, "loss": 0.0011, "step": 2080 }, { "epoch": 231.54, "learning_rate": 0.0001, "loss": 0.0028, "step": 2084 }, { "epoch": 231.97, "learning_rate": 0.0001, "loss": 0.0008, "step": 2088 }, { "epoch": 232.43, "learning_rate": 0.0001, "loss": 0.0008, "step": 2092 }, { "epoch": 232.86, "learning_rate": 0.0001, "loss": 0.0008, "step": 2096 }, { "epoch": 233.32, "learning_rate": 0.0001, "loss": 0.0009, "step": 2100 }, { "epoch": 233.75, "learning_rate": 0.0001, "loss": 0.0006, "step": 2104 }, { "epoch": 234.21, "learning_rate": 0.0001, "loss": 0.0007, "step": 2108 }, { "epoch": 234.64, "learning_rate": 0.0001, "loss": 0.0007, "step": 2112 }, { "epoch": 234.64, "eval_exact_match": 0.6209476309226932, "eval_exec": 0.6749792186201163, "eval_loss": 0.3990643322467804, "eval_runtime": 757.1625, "eval_samples_per_second": 2.146, "step": 2112 }, { "epoch": 235.11, "learning_rate": 0.0001, "loss": 0.0007, "step": 2116 }, { "epoch": 235.54, "learning_rate": 0.0001, "loss": 0.0007, "step": 2120 }, { "epoch": 235.97, "learning_rate": 0.0001, "loss": 0.0007, "step": 2124 }, { "epoch": 236.43, "learning_rate": 0.0001, "loss": 0.0008, "step": 2128 }, { "epoch": 236.86, "learning_rate": 0.0001, "loss": 0.0023, "step": 2132 }, { "epoch": 237.32, "learning_rate": 0.0001, "loss": 0.0029, "step": 2136 }, { "epoch": 237.75, "learning_rate": 0.0001, "loss": 0.0007, "step": 2140 }, { "epoch": 238.21, "learning_rate": 0.0001, "loss": 0.0008, "step": 2144 }, { "epoch": 238.64, "learning_rate": 0.0001, "loss": 0.0008, "step": 2148 }, { "epoch": 239.11, "learning_rate": 0.0001, "loss": 0.0006, "step": 2152 }, { "epoch": 239.54, "learning_rate": 0.0001, "loss": 0.0011, "step": 2156 }, { "epoch": 239.97, "learning_rate": 0.0001, "loss": 0.0007, "step": 2160 }, { "epoch": 240.43, "learning_rate": 0.0001, "loss": 0.0008, "step": 2164 }, { "epoch": 240.86, "learning_rate": 0.0001, "loss": 0.0008, "step": 2168 }, { "epoch": 241.32, "learning_rate": 0.0001, "loss": 0.0009, "step": 2172 }, { "epoch": 241.75, "learning_rate": 0.0001, "loss": 0.0007, "step": 2176 }, { "epoch": 241.75, "eval_exact_match": 0.6126350789692435, "eval_exec": 0.6758104738154613, "eval_loss": 0.4011194407939911, "eval_runtime": 757.6531, "eval_samples_per_second": 2.145, "step": 2176 }, { "epoch": 242.21, "learning_rate": 0.0001, "loss": 0.0007, "step": 2180 }, { "epoch": 242.64, "learning_rate": 0.0001, "loss": 0.001, "step": 2184 }, { "epoch": 243.11, "learning_rate": 0.0001, "loss": 0.0007, "step": 2188 }, { "epoch": 243.54, "learning_rate": 0.0001, "loss": 0.0007, "step": 2192 }, { "epoch": 243.97, "learning_rate": 0.0001, "loss": 0.0009, "step": 2196 }, { "epoch": 244.43, "learning_rate": 0.0001, "loss": 0.0007, "step": 2200 }, { "epoch": 244.86, "learning_rate": 0.0001, "loss": 0.0007, "step": 2204 }, { "epoch": 245.32, "learning_rate": 0.0001, "loss": 0.0009, "step": 2208 }, { "epoch": 245.75, "learning_rate": 0.0001, "loss": 0.0012, "step": 2212 }, { "epoch": 246.21, "learning_rate": 0.0001, "loss": 0.0008, "step": 2216 }, { "epoch": 246.64, "learning_rate": 0.0001, "loss": 0.005, "step": 2220 }, { "epoch": 247.11, "learning_rate": 0.0001, "loss": 0.0009, "step": 2224 }, { "epoch": 247.54, "learning_rate": 0.0001, "loss": 0.0011, "step": 2228 }, { "epoch": 247.97, "learning_rate": 0.0001, "loss": 0.0007, "step": 2232 }, { "epoch": 248.43, "learning_rate": 0.0001, "loss": 0.0009, "step": 2236 }, { "epoch": 248.86, "learning_rate": 0.0001, "loss": 0.0021, "step": 2240 }, { "epoch": 248.86, "eval_exact_match": 0.5968412302576891, "eval_exec": 0.6658354114713217, "eval_loss": 0.35461094975471497, "eval_runtime": 763.816, "eval_samples_per_second": 2.127, "step": 2240 }, { "epoch": 249.32, "learning_rate": 0.0001, "loss": 0.0011, "step": 2244 }, { "epoch": 249.75, "learning_rate": 0.0001, "loss": 0.0007, "step": 2248 }, { "epoch": 250.21, "learning_rate": 0.0001, "loss": 0.0008, "step": 2252 }, { "epoch": 250.64, "learning_rate": 0.0001, "loss": 0.0007, "step": 2256 }, { "epoch": 251.11, "learning_rate": 0.0001, "loss": 0.0009, "step": 2260 }, { "epoch": 251.54, "learning_rate": 0.0001, "loss": 0.0005, "step": 2264 }, { "epoch": 251.97, "learning_rate": 0.0001, "loss": 0.0007, "step": 2268 }, { "epoch": 252.43, "learning_rate": 0.0001, "loss": 0.0009, "step": 2272 }, { "epoch": 252.86, "learning_rate": 0.0001, "loss": 0.0007, "step": 2276 }, { "epoch": 253.32, "learning_rate": 0.0001, "loss": 0.0008, "step": 2280 }, { "epoch": 253.75, "learning_rate": 0.0001, "loss": 0.0006, "step": 2284 }, { "epoch": 254.21, "learning_rate": 0.0001, "loss": 0.0008, "step": 2288 }, { "epoch": 254.64, "learning_rate": 0.0001, "loss": 0.0006, "step": 2292 }, { "epoch": 255.11, "learning_rate": 0.0001, "loss": 0.0008, "step": 2296 }, { "epoch": 255.54, "learning_rate": 0.0001, "loss": 0.0007, "step": 2300 }, { "epoch": 255.97, "learning_rate": 0.0001, "loss": 0.0007, "step": 2304 }, { "epoch": 255.97, "eval_exact_match": 0.6159600997506235, "eval_exec": 0.6733167082294265, "eval_loss": 0.4026164412498474, "eval_runtime": 775.2221, "eval_samples_per_second": 2.096, "step": 2304 }, { "epoch": 256.43, "learning_rate": 0.0001, "loss": 0.0009, "step": 2308 }, { "epoch": 256.86, "learning_rate": 0.0001, "loss": 0.0008, "step": 2312 }, { "epoch": 257.32, "learning_rate": 0.0001, "loss": 0.0009, "step": 2316 }, { "epoch": 257.75, "learning_rate": 0.0001, "loss": 0.0009, "step": 2320 }, { "epoch": 258.21, "learning_rate": 0.0001, "loss": 0.0008, "step": 2324 }, { "epoch": 258.64, "learning_rate": 0.0001, "loss": 0.0011, "step": 2328 }, { "epoch": 259.11, "learning_rate": 0.0001, "loss": 0.001, "step": 2332 }, { "epoch": 259.54, "learning_rate": 0.0001, "loss": 0.0008, "step": 2336 }, { "epoch": 259.97, "learning_rate": 0.0001, "loss": 0.0007, "step": 2340 }, { "epoch": 260.43, "learning_rate": 0.0001, "loss": 0.0007, "step": 2344 }, { "epoch": 260.86, "learning_rate": 0.0001, "loss": 0.0008, "step": 2348 }, { "epoch": 261.32, "learning_rate": 0.0001, "loss": 0.001, "step": 2352 }, { "epoch": 261.75, "learning_rate": 0.0001, "loss": 0.0009, "step": 2356 }, { "epoch": 262.21, "learning_rate": 0.0001, "loss": 0.0007, "step": 2360 }, { "epoch": 262.64, "learning_rate": 0.0001, "loss": 0.0018, "step": 2364 }, { "epoch": 263.11, "learning_rate": 0.0001, "loss": 0.001, "step": 2368 }, { "epoch": 263.11, "eval_exact_match": 0.6018287614297589, "eval_exec": 0.6641729010806318, "eval_loss": 0.3777616024017334, "eval_runtime": 772.9319, "eval_samples_per_second": 2.102, "step": 2368 }, { "epoch": 263.54, "learning_rate": 0.0001, "loss": 0.0006, "step": 2372 }, { "epoch": 263.97, "learning_rate": 0.0001, "loss": 0.0008, "step": 2376 }, { "epoch": 264.43, "learning_rate": 0.0001, "loss": 0.0007, "step": 2380 }, { "epoch": 264.86, "learning_rate": 0.0001, "loss": 0.0007, "step": 2384 }, { "epoch": 265.32, "learning_rate": 0.0001, "loss": 0.0008, "step": 2388 }, { "epoch": 265.75, "learning_rate": 0.0001, "loss": 0.0006, "step": 2392 }, { "epoch": 266.21, "learning_rate": 0.0001, "loss": 0.0006, "step": 2396 }, { "epoch": 266.64, "learning_rate": 0.0001, "loss": 0.0007, "step": 2400 }, { "epoch": 267.11, "learning_rate": 0.0001, "loss": 0.0006, "step": 2404 }, { "epoch": 267.54, "learning_rate": 0.0001, "loss": 0.0007, "step": 2408 }, { "epoch": 267.97, "learning_rate": 0.0001, "loss": 0.0008, "step": 2412 }, { "epoch": 268.43, "learning_rate": 0.0001, "loss": 0.0009, "step": 2416 }, { "epoch": 268.86, "learning_rate": 0.0001, "loss": 0.0007, "step": 2420 }, { "epoch": 269.32, "learning_rate": 0.0001, "loss": 0.0011, "step": 2424 }, { "epoch": 269.75, "learning_rate": 0.0001, "loss": 0.0012, "step": 2428 }, { "epoch": 270.21, "learning_rate": 0.0001, "loss": 0.0034, "step": 2432 }, { "epoch": 270.21, "eval_exact_match": 0.6101413133832086, "eval_exec": 0.6791354945968412, "eval_loss": 0.34622296690940857, "eval_runtime": 764.1562, "eval_samples_per_second": 2.127, "step": 2432 } ], "max_steps": 27648, "num_train_epochs": 3072, "total_flos": 2.8779670279190217e+19, "trial_name": null, "trial_params": null }