{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.7790697674418605, "global_step": 1300, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "eval_loss": 2.1321089267730713, "eval_runtime": 5.3447, "eval_samples_per_second": 7.11, "eval_steps_per_second": 1.871, "step": 5 }, { "epoch": 0.03, "eval_loss": 1.7755846977233887, "eval_runtime": 6.2618, "eval_samples_per_second": 6.069, "eval_steps_per_second": 1.597, "step": 10 }, { "epoch": 0.04, "eval_loss": 1.661117672920227, "eval_runtime": 5.1457, "eval_samples_per_second": 7.385, "eval_steps_per_second": 1.943, "step": 15 }, { "epoch": 0.06, "eval_loss": 1.6071234941482544, "eval_runtime": 6.3033, "eval_samples_per_second": 6.029, "eval_steps_per_second": 1.586, "step": 20 }, { "epoch": 0.07, "eval_loss": 1.5763286352157593, "eval_runtime": 5.1664, "eval_samples_per_second": 7.355, "eval_steps_per_second": 1.936, "step": 25 }, { "epoch": 0.09, "eval_loss": 1.54438316822052, "eval_runtime": 12.625, "eval_samples_per_second": 3.01, "eval_steps_per_second": 0.792, "step": 30 }, { "epoch": 0.1, "eval_loss": 1.529874563217163, "eval_runtime": 5.1855, "eval_samples_per_second": 7.328, "eval_steps_per_second": 1.928, "step": 35 }, { "epoch": 0.12, "eval_loss": 1.511551856994629, "eval_runtime": 5.1943, "eval_samples_per_second": 7.316, "eval_steps_per_second": 1.925, "step": 40 }, { "epoch": 0.13, "eval_loss": 1.499739170074463, "eval_runtime": 5.1845, "eval_samples_per_second": 7.33, "eval_steps_per_second": 1.929, "step": 45 }, { "epoch": 0.15, "eval_loss": 1.482990026473999, "eval_runtime": 5.191, "eval_samples_per_second": 7.32, "eval_steps_per_second": 1.926, "step": 50 }, { "epoch": 0.16, "eval_loss": 1.4811052083969116, "eval_runtime": 5.2095, "eval_samples_per_second": 7.294, "eval_steps_per_second": 1.92, "step": 55 }, { "epoch": 0.17, "eval_loss": 1.470807671546936, "eval_runtime": 5.1982, "eval_samples_per_second": 7.31, "eval_steps_per_second": 1.924, "step": 60 }, { "epoch": 0.19, "eval_loss": 1.4635131359100342, "eval_runtime": 5.22, "eval_samples_per_second": 7.28, "eval_steps_per_second": 1.916, "step": 65 }, { "epoch": 0.2, "eval_loss": 1.4525114297866821, "eval_runtime": 5.22, "eval_samples_per_second": 7.28, "eval_steps_per_second": 1.916, "step": 70 }, { "epoch": 0.22, "eval_loss": 1.4475783109664917, "eval_runtime": 5.2098, "eval_samples_per_second": 7.294, "eval_steps_per_second": 1.919, "step": 75 }, { "epoch": 0.23, "eval_loss": 1.4456068277359009, "eval_runtime": 5.2045, "eval_samples_per_second": 7.301, "eval_steps_per_second": 1.921, "step": 80 }, { "epoch": 0.25, "eval_loss": 1.4422187805175781, "eval_runtime": 5.2064, "eval_samples_per_second": 7.299, "eval_steps_per_second": 1.921, "step": 85 }, { "epoch": 0.26, "eval_loss": 1.4375722408294678, "eval_runtime": 5.2062, "eval_samples_per_second": 7.299, "eval_steps_per_second": 1.921, "step": 90 }, { "epoch": 0.28, "eval_loss": 1.435781717300415, "eval_runtime": 5.2273, "eval_samples_per_second": 7.27, "eval_steps_per_second": 1.913, "step": 95 }, { "epoch": 0.29, "learning_rate": 9.854651162790698e-06, "loss": 1.5857, "step": 100 }, { "epoch": 0.29, "eval_loss": 1.4250125885009766, "eval_runtime": 5.2302, "eval_samples_per_second": 7.266, "eval_steps_per_second": 1.912, "step": 100 }, { "epoch": 0.31, "eval_loss": 1.4228482246398926, "eval_runtime": 5.1419, "eval_samples_per_second": 7.39, "eval_steps_per_second": 1.945, "step": 105 }, { "epoch": 0.32, "eval_loss": 1.41592538356781, "eval_runtime": 5.1705, "eval_samples_per_second": 7.349, "eval_steps_per_second": 1.934, "step": 110 }, { "epoch": 0.33, "eval_loss": 1.4095944166183472, "eval_runtime": 5.1762, "eval_samples_per_second": 7.341, "eval_steps_per_second": 1.932, "step": 115 }, { "epoch": 0.35, "eval_loss": 1.405040979385376, "eval_runtime": 5.1734, "eval_samples_per_second": 7.345, "eval_steps_per_second": 1.933, "step": 120 }, { "epoch": 0.36, "eval_loss": 1.4017434120178223, "eval_runtime": 5.1936, "eval_samples_per_second": 7.317, "eval_steps_per_second": 1.925, "step": 125 }, { "epoch": 0.38, "eval_loss": 1.3985930681228638, "eval_runtime": 5.1856, "eval_samples_per_second": 7.328, "eval_steps_per_second": 1.928, "step": 130 }, { "epoch": 0.39, "eval_loss": 1.3949148654937744, "eval_runtime": 6.2986, "eval_samples_per_second": 6.033, "eval_steps_per_second": 1.588, "step": 135 }, { "epoch": 0.41, "eval_loss": 1.3919830322265625, "eval_runtime": 5.193, "eval_samples_per_second": 7.318, "eval_steps_per_second": 1.926, "step": 140 }, { "epoch": 0.42, "eval_loss": 1.389172077178955, "eval_runtime": 5.1972, "eval_samples_per_second": 7.312, "eval_steps_per_second": 1.924, "step": 145 }, { "epoch": 0.44, "eval_loss": 1.3876895904541016, "eval_runtime": 5.215, "eval_samples_per_second": 7.287, "eval_steps_per_second": 1.918, "step": 150 }, { "epoch": 0.45, "eval_loss": 1.387901782989502, "eval_runtime": 5.2122, "eval_samples_per_second": 7.291, "eval_steps_per_second": 1.919, "step": 155 }, { "epoch": 0.47, "eval_loss": 1.3818577527999878, "eval_runtime": 5.2068, "eval_samples_per_second": 7.298, "eval_steps_per_second": 1.921, "step": 160 }, { "epoch": 0.48, "eval_loss": 1.3780750036239624, "eval_runtime": 5.2225, "eval_samples_per_second": 7.276, "eval_steps_per_second": 1.915, "step": 165 }, { "epoch": 0.49, "eval_loss": 1.374315857887268, "eval_runtime": 5.2084, "eval_samples_per_second": 7.296, "eval_steps_per_second": 1.92, "step": 170 }, { "epoch": 0.51, "eval_loss": 1.372006893157959, "eval_runtime": 5.209, "eval_samples_per_second": 7.295, "eval_steps_per_second": 1.92, "step": 175 }, { "epoch": 0.52, "eval_loss": 1.3736425638198853, "eval_runtime": 5.2311, "eval_samples_per_second": 7.264, "eval_steps_per_second": 1.912, "step": 180 }, { "epoch": 0.54, "eval_loss": 1.3681703805923462, "eval_runtime": 5.2032, "eval_samples_per_second": 7.303, "eval_steps_per_second": 1.922, "step": 185 }, { "epoch": 0.55, "eval_loss": 1.3626172542572021, "eval_runtime": 5.2168, "eval_samples_per_second": 7.284, "eval_steps_per_second": 1.917, "step": 190 }, { "epoch": 0.57, "eval_loss": 1.3621835708618164, "eval_runtime": 5.2269, "eval_samples_per_second": 7.27, "eval_steps_per_second": 1.913, "step": 195 }, { "epoch": 0.58, "learning_rate": 9.709302325581395e-06, "loss": 1.4034, "step": 200 }, { "epoch": 0.58, "eval_loss": 1.364233374595642, "eval_runtime": 5.3231, "eval_samples_per_second": 7.139, "eval_steps_per_second": 1.879, "step": 200 }, { "epoch": 0.6, "eval_loss": 1.3565268516540527, "eval_runtime": 5.1533, "eval_samples_per_second": 7.374, "eval_steps_per_second": 1.941, "step": 205 }, { "epoch": 0.61, "eval_loss": 1.35232412815094, "eval_runtime": 14.9101, "eval_samples_per_second": 2.549, "eval_steps_per_second": 0.671, "step": 210 }, { "epoch": 0.62, "eval_loss": 1.3503801822662354, "eval_runtime": 5.1561, "eval_samples_per_second": 7.37, "eval_steps_per_second": 1.939, "step": 215 }, { "epoch": 0.64, "eval_loss": 1.3449465036392212, "eval_runtime": 5.1639, "eval_samples_per_second": 7.359, "eval_steps_per_second": 1.937, "step": 220 }, { "epoch": 0.65, "eval_loss": 1.3519093990325928, "eval_runtime": 5.1765, "eval_samples_per_second": 7.341, "eval_steps_per_second": 1.932, "step": 225 }, { "epoch": 0.67, "eval_loss": 1.352266550064087, "eval_runtime": 17.2964, "eval_samples_per_second": 2.197, "eval_steps_per_second": 0.578, "step": 230 }, { "epoch": 0.68, "eval_loss": 1.3380497694015503, "eval_runtime": 5.1906, "eval_samples_per_second": 7.321, "eval_steps_per_second": 1.927, "step": 235 }, { "epoch": 0.7, "eval_loss": 1.340211033821106, "eval_runtime": 6.4362, "eval_samples_per_second": 5.904, "eval_steps_per_second": 1.554, "step": 240 }, { "epoch": 0.71, "eval_loss": 1.335599660873413, "eval_runtime": 5.1978, "eval_samples_per_second": 7.311, "eval_steps_per_second": 1.924, "step": 245 }, { "epoch": 0.73, "eval_loss": 1.3313099145889282, "eval_runtime": 5.1628, "eval_samples_per_second": 7.36, "eval_steps_per_second": 1.937, "step": 250 }, { "epoch": 0.74, "eval_loss": 1.334537386894226, "eval_runtime": 7.3524, "eval_samples_per_second": 5.168, "eval_steps_per_second": 1.36, "step": 255 }, { "epoch": 0.76, "eval_loss": 1.3304086923599243, "eval_runtime": 5.1618, "eval_samples_per_second": 7.362, "eval_steps_per_second": 1.937, "step": 260 }, { "epoch": 0.77, "eval_loss": 1.3271808624267578, "eval_runtime": 5.1719, "eval_samples_per_second": 7.347, "eval_steps_per_second": 1.934, "step": 265 }, { "epoch": 0.78, "eval_loss": 1.324944019317627, "eval_runtime": 5.1801, "eval_samples_per_second": 7.336, "eval_steps_per_second": 1.93, "step": 270 }, { "epoch": 0.8, "eval_loss": 1.3218427896499634, "eval_runtime": 5.1851, "eval_samples_per_second": 7.329, "eval_steps_per_second": 1.929, "step": 275 }, { "epoch": 0.81, "eval_loss": 1.3217554092407227, "eval_runtime": 5.2075, "eval_samples_per_second": 7.297, "eval_steps_per_second": 1.92, "step": 280 }, { "epoch": 0.83, "eval_loss": 1.3211233615875244, "eval_runtime": 5.198, "eval_samples_per_second": 7.31, "eval_steps_per_second": 1.924, "step": 285 }, { "epoch": 0.84, "eval_loss": 1.3166344165802002, "eval_runtime": 6.1478, "eval_samples_per_second": 6.181, "eval_steps_per_second": 1.627, "step": 290 }, { "epoch": 0.86, "eval_loss": 1.3159008026123047, "eval_runtime": 5.2103, "eval_samples_per_second": 7.293, "eval_steps_per_second": 1.919, "step": 295 }, { "epoch": 0.87, "learning_rate": 9.563953488372094e-06, "loss": 1.3513, "step": 300 }, { "epoch": 0.87, "eval_loss": 1.3075319528579712, "eval_runtime": 5.2012, "eval_samples_per_second": 7.306, "eval_steps_per_second": 1.923, "step": 300 }, { "epoch": 0.89, "eval_loss": 1.3096569776535034, "eval_runtime": 5.144, "eval_samples_per_second": 7.387, "eval_steps_per_second": 1.944, "step": 305 }, { "epoch": 0.9, "eval_loss": 1.3026896715164185, "eval_runtime": 5.1657, "eval_samples_per_second": 7.356, "eval_steps_per_second": 1.936, "step": 310 }, { "epoch": 0.92, "eval_loss": 1.2962864637374878, "eval_runtime": 5.1706, "eval_samples_per_second": 7.349, "eval_steps_per_second": 1.934, "step": 315 }, { "epoch": 0.93, "eval_loss": 1.294777512550354, "eval_runtime": 5.1816, "eval_samples_per_second": 7.334, "eval_steps_per_second": 1.93, "step": 320 }, { "epoch": 0.94, "eval_loss": 1.2898850440979004, "eval_runtime": 6.0458, "eval_samples_per_second": 6.285, "eval_steps_per_second": 1.654, "step": 325 }, { "epoch": 0.96, "eval_loss": 1.2885679006576538, "eval_runtime": 5.1898, "eval_samples_per_second": 7.322, "eval_steps_per_second": 1.927, "step": 330 }, { "epoch": 0.97, "eval_loss": 1.288139820098877, "eval_runtime": 5.1979, "eval_samples_per_second": 7.311, "eval_steps_per_second": 1.924, "step": 335 }, { "epoch": 0.99, "eval_loss": 1.281440258026123, "eval_runtime": 5.1891, "eval_samples_per_second": 7.323, "eval_steps_per_second": 1.927, "step": 340 }, { "epoch": 1.0, "eval_loss": 1.2789274454116821, "eval_runtime": 5.2144, "eval_samples_per_second": 7.288, "eval_steps_per_second": 1.918, "step": 345 }, { "epoch": 1.02, "eval_loss": 1.273104190826416, "eval_runtime": 5.1983, "eval_samples_per_second": 7.31, "eval_steps_per_second": 1.924, "step": 350 }, { "epoch": 1.03, "eval_loss": 1.2735443115234375, "eval_runtime": 5.2215, "eval_samples_per_second": 7.278, "eval_steps_per_second": 1.915, "step": 355 }, { "epoch": 1.05, "eval_loss": 1.2782071828842163, "eval_runtime": 5.6853, "eval_samples_per_second": 6.684, "eval_steps_per_second": 1.759, "step": 360 }, { "epoch": 1.06, "eval_loss": 1.2720850706100464, "eval_runtime": 5.222, "eval_samples_per_second": 7.277, "eval_steps_per_second": 1.915, "step": 365 }, { "epoch": 1.08, "eval_loss": 1.2730863094329834, "eval_runtime": 5.2055, "eval_samples_per_second": 7.3, "eval_steps_per_second": 1.921, "step": 370 }, { "epoch": 1.09, "eval_loss": 1.2692456245422363, "eval_runtime": 7.9591, "eval_samples_per_second": 4.774, "eval_steps_per_second": 1.256, "step": 375 }, { "epoch": 1.1, "eval_loss": 1.2623660564422607, "eval_runtime": 5.1922, "eval_samples_per_second": 7.319, "eval_steps_per_second": 1.926, "step": 380 }, { "epoch": 1.12, "eval_loss": 1.264005422592163, "eval_runtime": 5.2073, "eval_samples_per_second": 7.297, "eval_steps_per_second": 1.92, "step": 385 }, { "epoch": 1.13, "eval_loss": 1.264898419380188, "eval_runtime": 5.1964, "eval_samples_per_second": 7.313, "eval_steps_per_second": 1.924, "step": 390 }, { "epoch": 1.15, "eval_loss": 1.2602505683898926, "eval_runtime": 5.2183, "eval_samples_per_second": 7.282, "eval_steps_per_second": 1.916, "step": 395 }, { "epoch": 1.16, "learning_rate": 9.418604651162791e-06, "loss": 1.2432, "step": 400 }, { "epoch": 1.16, "eval_loss": 1.2597508430480957, "eval_runtime": 5.2008, "eval_samples_per_second": 7.307, "eval_steps_per_second": 1.923, "step": 400 }, { "epoch": 1.18, "eval_loss": 1.256094217300415, "eval_runtime": 5.1612, "eval_samples_per_second": 7.363, "eval_steps_per_second": 1.938, "step": 405 }, { "epoch": 1.19, "eval_loss": 1.2484766244888306, "eval_runtime": 5.1459, "eval_samples_per_second": 7.384, "eval_steps_per_second": 1.943, "step": 410 }, { "epoch": 1.21, "eval_loss": 1.2463735342025757, "eval_runtime": 5.1578, "eval_samples_per_second": 7.367, "eval_steps_per_second": 1.939, "step": 415 }, { "epoch": 1.22, "eval_loss": 1.244008183479309, "eval_runtime": 5.1699, "eval_samples_per_second": 7.35, "eval_steps_per_second": 1.934, "step": 420 }, { "epoch": 1.24, "eval_loss": 1.2463507652282715, "eval_runtime": 5.5355, "eval_samples_per_second": 6.865, "eval_steps_per_second": 1.807, "step": 425 }, { "epoch": 1.25, "eval_loss": 1.23945152759552, "eval_runtime": 5.1986, "eval_samples_per_second": 7.31, "eval_steps_per_second": 1.924, "step": 430 }, { "epoch": 1.26, "eval_loss": 1.2383465766906738, "eval_runtime": 5.2615, "eval_samples_per_second": 7.222, "eval_steps_per_second": 1.901, "step": 435 }, { "epoch": 1.28, "eval_loss": 1.240387201309204, "eval_runtime": 5.2049, "eval_samples_per_second": 7.301, "eval_steps_per_second": 1.921, "step": 440 }, { "epoch": 1.29, "eval_loss": 1.2330259084701538, "eval_runtime": 5.1959, "eval_samples_per_second": 7.313, "eval_steps_per_second": 1.925, "step": 445 }, { "epoch": 1.31, "eval_loss": 1.2376052141189575, "eval_runtime": 5.1991, "eval_samples_per_second": 7.309, "eval_steps_per_second": 1.923, "step": 450 }, { "epoch": 1.32, "eval_loss": 1.2420060634613037, "eval_runtime": 14.5358, "eval_samples_per_second": 2.614, "eval_steps_per_second": 0.688, "step": 455 }, { "epoch": 1.34, "eval_loss": 1.2296521663665771, "eval_runtime": 7.0505, "eval_samples_per_second": 5.39, "eval_steps_per_second": 1.418, "step": 460 }, { "epoch": 1.35, "eval_loss": 1.226386547088623, "eval_runtime": 5.1853, "eval_samples_per_second": 7.328, "eval_steps_per_second": 1.929, "step": 465 }, { "epoch": 1.37, "eval_loss": 1.2350796461105347, "eval_runtime": 5.1891, "eval_samples_per_second": 7.323, "eval_steps_per_second": 1.927, "step": 470 }, { "epoch": 1.38, "eval_loss": 1.2271411418914795, "eval_runtime": 5.187, "eval_samples_per_second": 7.326, "eval_steps_per_second": 1.928, "step": 475 }, { "epoch": 1.4, "eval_loss": 1.2260706424713135, "eval_runtime": 5.1939, "eval_samples_per_second": 7.316, "eval_steps_per_second": 1.925, "step": 480 }, { "epoch": 1.41, "eval_loss": 1.2366853952407837, "eval_runtime": 5.2167, "eval_samples_per_second": 7.284, "eval_steps_per_second": 1.917, "step": 485 }, { "epoch": 1.42, "eval_loss": 1.2304943799972534, "eval_runtime": 5.2057, "eval_samples_per_second": 7.3, "eval_steps_per_second": 1.921, "step": 490 }, { "epoch": 1.44, "eval_loss": 1.219275951385498, "eval_runtime": 5.1983, "eval_samples_per_second": 7.31, "eval_steps_per_second": 1.924, "step": 495 }, { "epoch": 1.45, "learning_rate": 9.273255813953488e-06, "loss": 1.1933, "step": 500 }, { "epoch": 1.45, "eval_loss": 1.220719575881958, "eval_runtime": 8.1461, "eval_samples_per_second": 4.665, "eval_steps_per_second": 1.228, "step": 500 }, { "epoch": 1.47, "eval_loss": 1.2208986282348633, "eval_runtime": 5.1444, "eval_samples_per_second": 7.387, "eval_steps_per_second": 1.944, "step": 505 }, { "epoch": 1.48, "eval_loss": 1.2238763570785522, "eval_runtime": 5.1637, "eval_samples_per_second": 7.359, "eval_steps_per_second": 1.937, "step": 510 }, { "epoch": 1.5, "eval_loss": 1.219651699066162, "eval_runtime": 5.177, "eval_samples_per_second": 7.34, "eval_steps_per_second": 1.932, "step": 515 }, { "epoch": 1.51, "eval_loss": 1.2128971815109253, "eval_runtime": 5.1833, "eval_samples_per_second": 7.331, "eval_steps_per_second": 1.929, "step": 520 }, { "epoch": 1.53, "eval_loss": 1.2134791612625122, "eval_runtime": 6.8202, "eval_samples_per_second": 5.572, "eval_steps_per_second": 1.466, "step": 525 }, { "epoch": 1.54, "eval_loss": 1.2144546508789062, "eval_runtime": 5.1795, "eval_samples_per_second": 7.337, "eval_steps_per_second": 1.931, "step": 530 }, { "epoch": 1.56, "eval_loss": 1.2091519832611084, "eval_runtime": 5.1715, "eval_samples_per_second": 7.348, "eval_steps_per_second": 1.934, "step": 535 }, { "epoch": 1.57, "eval_loss": 1.2074944972991943, "eval_runtime": 5.1783, "eval_samples_per_second": 7.338, "eval_steps_per_second": 1.931, "step": 540 }, { "epoch": 1.58, "eval_loss": 1.2082455158233643, "eval_runtime": 5.1756, "eval_samples_per_second": 7.342, "eval_steps_per_second": 1.932, "step": 545 }, { "epoch": 1.6, "eval_loss": 1.2062183618545532, "eval_runtime": 5.183, "eval_samples_per_second": 7.332, "eval_steps_per_second": 1.929, "step": 550 }, { "epoch": 1.61, "eval_loss": 1.2009626626968384, "eval_runtime": 5.6305, "eval_samples_per_second": 6.749, "eval_steps_per_second": 1.776, "step": 555 }, { "epoch": 1.63, "eval_loss": 1.200234293937683, "eval_runtime": 5.2064, "eval_samples_per_second": 7.299, "eval_steps_per_second": 1.921, "step": 560 }, { "epoch": 1.64, "eval_loss": 1.1972352266311646, "eval_runtime": 5.3956, "eval_samples_per_second": 7.043, "eval_steps_per_second": 1.853, "step": 565 }, { "epoch": 1.66, "eval_loss": 1.199525237083435, "eval_runtime": 5.2172, "eval_samples_per_second": 7.284, "eval_steps_per_second": 1.917, "step": 570 }, { "epoch": 1.67, "eval_loss": 1.1978554725646973, "eval_runtime": 5.5745, "eval_samples_per_second": 6.817, "eval_steps_per_second": 1.794, "step": 575 }, { "epoch": 1.69, "eval_loss": 1.1939880847930908, "eval_runtime": 5.2146, "eval_samples_per_second": 7.287, "eval_steps_per_second": 1.918, "step": 580 }, { "epoch": 1.7, "eval_loss": 1.1886717081069946, "eval_runtime": 5.2166, "eval_samples_per_second": 7.284, "eval_steps_per_second": 1.917, "step": 585 }, { "epoch": 1.72, "eval_loss": 1.1879463195800781, "eval_runtime": 5.2066, "eval_samples_per_second": 7.298, "eval_steps_per_second": 1.921, "step": 590 }, { "epoch": 1.73, "eval_loss": 1.1874936819076538, "eval_runtime": 5.2058, "eval_samples_per_second": 7.3, "eval_steps_per_second": 1.921, "step": 595 }, { "epoch": 1.74, "learning_rate": 9.127906976744186e-06, "loss": 1.1598, "step": 600 }, { "epoch": 1.74, "eval_loss": 1.184380054473877, "eval_runtime": 5.2949, "eval_samples_per_second": 7.177, "eval_steps_per_second": 1.889, "step": 600 }, { "epoch": 1.76, "eval_loss": 1.1831018924713135, "eval_runtime": 5.1436, "eval_samples_per_second": 7.388, "eval_steps_per_second": 1.944, "step": 605 }, { "epoch": 1.77, "eval_loss": 1.1859960556030273, "eval_runtime": 5.1652, "eval_samples_per_second": 7.357, "eval_steps_per_second": 1.936, "step": 610 }, { "epoch": 1.79, "eval_loss": 1.186689853668213, "eval_runtime": 5.1749, "eval_samples_per_second": 7.343, "eval_steps_per_second": 1.932, "step": 615 }, { "epoch": 1.8, "eval_loss": 1.1849379539489746, "eval_runtime": 5.8205, "eval_samples_per_second": 6.529, "eval_steps_per_second": 1.718, "step": 620 }, { "epoch": 1.82, "eval_loss": 1.1795214414596558, "eval_runtime": 5.1765, "eval_samples_per_second": 7.341, "eval_steps_per_second": 1.932, "step": 625 }, { "epoch": 1.83, "eval_loss": 1.1802735328674316, "eval_runtime": 7.6116, "eval_samples_per_second": 4.992, "eval_steps_per_second": 1.314, "step": 630 }, { "epoch": 1.85, "eval_loss": 1.179026484489441, "eval_runtime": 5.2022, "eval_samples_per_second": 7.305, "eval_steps_per_second": 1.922, "step": 635 }, { "epoch": 1.86, "eval_loss": 1.1783987283706665, "eval_runtime": 5.1881, "eval_samples_per_second": 7.324, "eval_steps_per_second": 1.927, "step": 640 }, { "epoch": 1.88, "eval_loss": 1.1762430667877197, "eval_runtime": 5.2146, "eval_samples_per_second": 7.287, "eval_steps_per_second": 1.918, "step": 645 }, { "epoch": 1.89, "eval_loss": 1.180951476097107, "eval_runtime": 5.1987, "eval_samples_per_second": 7.31, "eval_steps_per_second": 1.924, "step": 650 }, { "epoch": 1.9, "eval_loss": 1.175215244293213, "eval_runtime": 5.222, "eval_samples_per_second": 7.277, "eval_steps_per_second": 1.915, "step": 655 }, { "epoch": 1.92, "eval_loss": 1.1738007068634033, "eval_runtime": 5.2057, "eval_samples_per_second": 7.3, "eval_steps_per_second": 1.921, "step": 660 }, { "epoch": 1.93, "eval_loss": 1.1722891330718994, "eval_runtime": 5.1993, "eval_samples_per_second": 7.309, "eval_steps_per_second": 1.923, "step": 665 }, { "epoch": 1.95, "eval_loss": 1.1736668348312378, "eval_runtime": 5.202, "eval_samples_per_second": 7.305, "eval_steps_per_second": 1.922, "step": 670 }, { "epoch": 1.96, "eval_loss": 1.1691397428512573, "eval_runtime": 5.2043, "eval_samples_per_second": 7.302, "eval_steps_per_second": 1.922, "step": 675 }, { "epoch": 1.98, "eval_loss": 1.1707478761672974, "eval_runtime": 5.2058, "eval_samples_per_second": 7.3, "eval_steps_per_second": 1.921, "step": 680 }, { "epoch": 1.99, "eval_loss": 1.17750084400177, "eval_runtime": 5.2084, "eval_samples_per_second": 7.296, "eval_steps_per_second": 1.92, "step": 685 }, { "epoch": 2.01, "eval_loss": 1.1693485975265503, "eval_runtime": 5.2105, "eval_samples_per_second": 7.293, "eval_steps_per_second": 1.919, "step": 690 }, { "epoch": 2.02, "eval_loss": 1.174012541770935, "eval_runtime": 8.4066, "eval_samples_per_second": 4.52, "eval_steps_per_second": 1.19, "step": 695 }, { "epoch": 2.03, "learning_rate": 8.982558139534884e-06, "loss": 1.1203, "step": 700 }, { "epoch": 2.03, "eval_loss": 1.1877881288528442, "eval_runtime": 5.1939, "eval_samples_per_second": 7.316, "eval_steps_per_second": 1.925, "step": 700 }, { "epoch": 2.05, "eval_loss": 1.1876509189605713, "eval_runtime": 5.1556, "eval_samples_per_second": 7.371, "eval_steps_per_second": 1.94, "step": 705 }, { "epoch": 2.06, "eval_loss": 1.1674307584762573, "eval_runtime": 5.149, "eval_samples_per_second": 7.38, "eval_steps_per_second": 1.942, "step": 710 }, { "epoch": 2.08, "eval_loss": 1.167423129081726, "eval_runtime": 5.1595, "eval_samples_per_second": 7.365, "eval_steps_per_second": 1.938, "step": 715 }, { "epoch": 2.09, "eval_loss": 1.177182674407959, "eval_runtime": 8.3224, "eval_samples_per_second": 4.566, "eval_steps_per_second": 1.202, "step": 720 }, { "epoch": 2.11, "eval_loss": 1.1748428344726562, "eval_runtime": 5.1694, "eval_samples_per_second": 7.351, "eval_steps_per_second": 1.934, "step": 725 }, { "epoch": 2.12, "eval_loss": 1.1770687103271484, "eval_runtime": 6.4645, "eval_samples_per_second": 5.878, "eval_steps_per_second": 1.547, "step": 730 }, { "epoch": 2.14, "eval_loss": 1.1737899780273438, "eval_runtime": 5.1852, "eval_samples_per_second": 7.329, "eval_steps_per_second": 1.929, "step": 735 }, { "epoch": 2.15, "eval_loss": 1.164935827255249, "eval_runtime": 5.1999, "eval_samples_per_second": 7.308, "eval_steps_per_second": 1.923, "step": 740 }, { "epoch": 2.17, "eval_loss": 1.1611236333847046, "eval_runtime": 5.1943, "eval_samples_per_second": 7.316, "eval_steps_per_second": 1.925, "step": 745 }, { "epoch": 2.18, "eval_loss": 1.16485595703125, "eval_runtime": 5.2046, "eval_samples_per_second": 7.301, "eval_steps_per_second": 1.921, "step": 750 }, { "epoch": 2.19, "eval_loss": 1.1628483533859253, "eval_runtime": 5.1977, "eval_samples_per_second": 7.311, "eval_steps_per_second": 1.924, "step": 755 }, { "epoch": 2.21, "eval_loss": 1.1594696044921875, "eval_runtime": 5.1698, "eval_samples_per_second": 7.35, "eval_steps_per_second": 1.934, "step": 760 }, { "epoch": 2.22, "eval_loss": 1.1594704389572144, "eval_runtime": 5.1839, "eval_samples_per_second": 7.33, "eval_steps_per_second": 1.929, "step": 765 }, { "epoch": 2.24, "eval_loss": 1.1618760824203491, "eval_runtime": 5.1971, "eval_samples_per_second": 7.312, "eval_steps_per_second": 1.924, "step": 770 }, { "epoch": 2.25, "eval_loss": 1.1532135009765625, "eval_runtime": 5.1859, "eval_samples_per_second": 7.327, "eval_steps_per_second": 1.928, "step": 775 }, { "epoch": 2.27, "eval_loss": 1.148254156112671, "eval_runtime": 5.1949, "eval_samples_per_second": 7.315, "eval_steps_per_second": 1.925, "step": 780 }, { "epoch": 2.28, "eval_loss": 1.1526851654052734, "eval_runtime": 5.2153, "eval_samples_per_second": 7.286, "eval_steps_per_second": 1.917, "step": 785 }, { "epoch": 2.3, "eval_loss": 1.1553150415420532, "eval_runtime": 7.2932, "eval_samples_per_second": 5.21, "eval_steps_per_second": 1.371, "step": 790 }, { "epoch": 2.31, "eval_loss": 1.1568715572357178, "eval_runtime": 5.2156, "eval_samples_per_second": 7.286, "eval_steps_per_second": 1.917, "step": 795 }, { "epoch": 2.33, "learning_rate": 8.837209302325582e-06, "loss": 1.0236, "step": 800 }, { "epoch": 2.33, "eval_loss": 1.1593568325042725, "eval_runtime": 15.4054, "eval_samples_per_second": 2.467, "eval_steps_per_second": 0.649, "step": 800 }, { "epoch": 2.34, "eval_loss": 1.1579800844192505, "eval_runtime": 5.1382, "eval_samples_per_second": 7.396, "eval_steps_per_second": 1.946, "step": 805 }, { "epoch": 2.35, "eval_loss": 1.1529394388198853, "eval_runtime": 5.1435, "eval_samples_per_second": 7.388, "eval_steps_per_second": 1.944, "step": 810 }, { "epoch": 2.37, "eval_loss": 1.1465649604797363, "eval_runtime": 5.1543, "eval_samples_per_second": 7.372, "eval_steps_per_second": 1.94, "step": 815 }, { "epoch": 2.38, "eval_loss": 1.1523265838623047, "eval_runtime": 5.1668, "eval_samples_per_second": 7.355, "eval_steps_per_second": 1.935, "step": 820 }, { "epoch": 2.4, "eval_loss": 1.1473236083984375, "eval_runtime": 5.1929, "eval_samples_per_second": 7.318, "eval_steps_per_second": 1.926, "step": 825 }, { "epoch": 2.41, "eval_loss": 1.1426836252212524, "eval_runtime": 5.1805, "eval_samples_per_second": 7.335, "eval_steps_per_second": 1.93, "step": 830 }, { "epoch": 2.43, "eval_loss": 1.1456120014190674, "eval_runtime": 5.1879, "eval_samples_per_second": 7.325, "eval_steps_per_second": 1.928, "step": 835 }, { "epoch": 2.44, "eval_loss": 1.1546478271484375, "eval_runtime": 6.9177, "eval_samples_per_second": 5.493, "eval_steps_per_second": 1.446, "step": 840 }, { "epoch": 2.46, "eval_loss": 1.150542140007019, "eval_runtime": 5.2006, "eval_samples_per_second": 7.307, "eval_steps_per_second": 1.923, "step": 845 }, { "epoch": 2.47, "eval_loss": 1.1451619863510132, "eval_runtime": 5.209, "eval_samples_per_second": 7.295, "eval_steps_per_second": 1.92, "step": 850 }, { "epoch": 2.49, "eval_loss": 1.1448664665222168, "eval_runtime": 5.1984, "eval_samples_per_second": 7.31, "eval_steps_per_second": 1.924, "step": 855 }, { "epoch": 2.5, "eval_loss": 1.142190933227539, "eval_runtime": 5.2014, "eval_samples_per_second": 7.306, "eval_steps_per_second": 1.923, "step": 860 }, { "epoch": 2.51, "eval_loss": 1.1360749006271362, "eval_runtime": 5.213, "eval_samples_per_second": 7.289, "eval_steps_per_second": 1.918, "step": 865 }, { "epoch": 2.53, "eval_loss": 1.1361669301986694, "eval_runtime": 5.225, "eval_samples_per_second": 7.273, "eval_steps_per_second": 1.914, "step": 870 }, { "epoch": 2.54, "eval_loss": 1.152858018875122, "eval_runtime": 5.2072, "eval_samples_per_second": 7.298, "eval_steps_per_second": 1.92, "step": 875 }, { "epoch": 2.56, "eval_loss": 1.1414388418197632, "eval_runtime": 5.2264, "eval_samples_per_second": 7.271, "eval_steps_per_second": 1.913, "step": 880 }, { "epoch": 2.57, "eval_loss": 1.1355433464050293, "eval_runtime": 5.2274, "eval_samples_per_second": 7.269, "eval_steps_per_second": 1.913, "step": 885 }, { "epoch": 2.59, "eval_loss": 1.140533447265625, "eval_runtime": 5.2091, "eval_samples_per_second": 7.295, "eval_steps_per_second": 1.92, "step": 890 }, { "epoch": 2.6, "eval_loss": 1.1398884057998657, "eval_runtime": 5.2089, "eval_samples_per_second": 7.295, "eval_steps_per_second": 1.92, "step": 895 }, { "epoch": 2.62, "learning_rate": 8.69186046511628e-06, "loss": 1.005, "step": 900 }, { "epoch": 2.62, "eval_loss": 1.1313834190368652, "eval_runtime": 7.9714, "eval_samples_per_second": 4.767, "eval_steps_per_second": 1.254, "step": 900 }, { "epoch": 2.63, "eval_loss": 1.1292839050292969, "eval_runtime": 5.1391, "eval_samples_per_second": 7.394, "eval_steps_per_second": 1.946, "step": 905 }, { "epoch": 2.65, "eval_loss": 1.1339421272277832, "eval_runtime": 5.1448, "eval_samples_per_second": 7.386, "eval_steps_per_second": 1.944, "step": 910 }, { "epoch": 2.66, "eval_loss": 1.1322623491287231, "eval_runtime": 5.1737, "eval_samples_per_second": 7.345, "eval_steps_per_second": 1.933, "step": 915 }, { "epoch": 2.67, "eval_loss": 1.1225138902664185, "eval_runtime": 5.1822, "eval_samples_per_second": 7.333, "eval_steps_per_second": 1.93, "step": 920 }, { "epoch": 2.69, "eval_loss": 1.117431640625, "eval_runtime": 5.1739, "eval_samples_per_second": 7.345, "eval_steps_per_second": 1.933, "step": 925 }, { "epoch": 2.7, "eval_loss": 1.1242172718048096, "eval_runtime": 7.2847, "eval_samples_per_second": 5.216, "eval_steps_per_second": 1.373, "step": 930 }, { "epoch": 2.72, "eval_loss": 1.1318007707595825, "eval_runtime": 5.4547, "eval_samples_per_second": 6.966, "eval_steps_per_second": 1.833, "step": 935 }, { "epoch": 2.73, "eval_loss": 1.1215720176696777, "eval_runtime": 5.1989, "eval_samples_per_second": 7.309, "eval_steps_per_second": 1.923, "step": 940 }, { "epoch": 2.75, "eval_loss": 1.1199665069580078, "eval_runtime": 5.1934, "eval_samples_per_second": 7.317, "eval_steps_per_second": 1.926, "step": 945 }, { "epoch": 2.76, "eval_loss": 1.1283185482025146, "eval_runtime": 5.2047, "eval_samples_per_second": 7.301, "eval_steps_per_second": 1.921, "step": 950 }, { "epoch": 2.78, "eval_loss": 1.1272627115249634, "eval_runtime": 5.2028, "eval_samples_per_second": 7.304, "eval_steps_per_second": 1.922, "step": 955 }, { "epoch": 2.79, "eval_loss": 1.116969347000122, "eval_runtime": 5.2048, "eval_samples_per_second": 7.301, "eval_steps_per_second": 1.921, "step": 960 }, { "epoch": 2.81, "eval_loss": 1.1125539541244507, "eval_runtime": 5.2168, "eval_samples_per_second": 7.284, "eval_steps_per_second": 1.917, "step": 965 }, { "epoch": 2.82, "eval_loss": 1.1195616722106934, "eval_runtime": 5.1986, "eval_samples_per_second": 7.31, "eval_steps_per_second": 1.924, "step": 970 }, { "epoch": 2.83, "eval_loss": 1.1192817687988281, "eval_runtime": 5.2203, "eval_samples_per_second": 7.279, "eval_steps_per_second": 1.916, "step": 975 }, { "epoch": 2.85, "eval_loss": 1.1130948066711426, "eval_runtime": 9.4606, "eval_samples_per_second": 4.017, "eval_steps_per_second": 1.057, "step": 980 }, { "epoch": 2.86, "eval_loss": 1.1146584749221802, "eval_runtime": 5.2013, "eval_samples_per_second": 7.306, "eval_steps_per_second": 1.923, "step": 985 }, { "epoch": 2.88, "eval_loss": 1.118652582168579, "eval_runtime": 5.201, "eval_samples_per_second": 7.306, "eval_steps_per_second": 1.923, "step": 990 }, { "epoch": 2.89, "eval_loss": 1.1133984327316284, "eval_runtime": 5.2035, "eval_samples_per_second": 7.303, "eval_steps_per_second": 1.922, "step": 995 }, { "epoch": 2.91, "learning_rate": 8.546511627906978e-06, "loss": 1.0003, "step": 1000 }, { "epoch": 2.91, "eval_loss": 1.1122275590896606, "eval_runtime": 5.5704, "eval_samples_per_second": 6.822, "eval_steps_per_second": 1.795, "step": 1000 }, { "epoch": 2.92, "eval_loss": 1.1242709159851074, "eval_runtime": 5.1423, "eval_samples_per_second": 7.39, "eval_steps_per_second": 1.945, "step": 1005 }, { "epoch": 2.94, "eval_loss": 1.1163734197616577, "eval_runtime": 5.159, "eval_samples_per_second": 7.366, "eval_steps_per_second": 1.938, "step": 1010 }, { "epoch": 2.95, "eval_loss": 1.1083950996398926, "eval_runtime": 5.1716, "eval_samples_per_second": 7.348, "eval_steps_per_second": 1.934, "step": 1015 }, { "epoch": 2.97, "eval_loss": 1.1112630367279053, "eval_runtime": 5.1839, "eval_samples_per_second": 7.33, "eval_steps_per_second": 1.929, "step": 1020 }, { "epoch": 2.98, "eval_loss": 1.1141904592514038, "eval_runtime": 5.1752, "eval_samples_per_second": 7.343, "eval_steps_per_second": 1.932, "step": 1025 }, { "epoch": 2.99, "eval_loss": 1.109470248222351, "eval_runtime": 5.1964, "eval_samples_per_second": 7.313, "eval_steps_per_second": 1.924, "step": 1030 }, { "epoch": 3.01, "eval_loss": 1.114129662513733, "eval_runtime": 5.1874, "eval_samples_per_second": 7.325, "eval_steps_per_second": 1.928, "step": 1035 }, { "epoch": 3.02, "eval_loss": 1.1438744068145752, "eval_runtime": 6.8038, "eval_samples_per_second": 5.585, "eval_steps_per_second": 1.47, "step": 1040 }, { "epoch": 3.04, "eval_loss": 1.1356358528137207, "eval_runtime": 5.186, "eval_samples_per_second": 7.327, "eval_steps_per_second": 1.928, "step": 1045 }, { "epoch": 3.05, "eval_loss": 1.1280732154846191, "eval_runtime": 5.1897, "eval_samples_per_second": 7.322, "eval_steps_per_second": 1.927, "step": 1050 }, { "epoch": 3.07, "eval_loss": 1.130995750427246, "eval_runtime": 5.1866, "eval_samples_per_second": 7.327, "eval_steps_per_second": 1.928, "step": 1055 }, { "epoch": 3.08, "eval_loss": 1.1382439136505127, "eval_runtime": 5.2111, "eval_samples_per_second": 7.292, "eval_steps_per_second": 1.919, "step": 1060 }, { "epoch": 3.1, "eval_loss": 1.142386555671692, "eval_runtime": 5.1981, "eval_samples_per_second": 7.31, "eval_steps_per_second": 1.924, "step": 1065 }, { "epoch": 3.11, "eval_loss": 1.126651406288147, "eval_runtime": 5.2225, "eval_samples_per_second": 7.276, "eval_steps_per_second": 1.915, "step": 1070 }, { "epoch": 3.12, "eval_loss": 1.1165131330490112, "eval_runtime": 5.2221, "eval_samples_per_second": 7.277, "eval_steps_per_second": 1.915, "step": 1075 }, { "epoch": 3.14, "eval_loss": 1.1212615966796875, "eval_runtime": 5.2824, "eval_samples_per_second": 7.194, "eval_steps_per_second": 1.893, "step": 1080 }, { "epoch": 3.15, "eval_loss": 1.134791374206543, "eval_runtime": 5.2245, "eval_samples_per_second": 7.273, "eval_steps_per_second": 1.914, "step": 1085 }, { "epoch": 3.17, "eval_loss": 1.1270241737365723, "eval_runtime": 5.3762, "eval_samples_per_second": 7.068, "eval_steps_per_second": 1.86, "step": 1090 }, { "epoch": 3.18, "eval_loss": 1.130293846130371, "eval_runtime": 5.2263, "eval_samples_per_second": 7.271, "eval_steps_per_second": 1.913, "step": 1095 }, { "epoch": 3.2, "learning_rate": 8.401162790697675e-06, "loss": 0.9125, "step": 1100 }, { "epoch": 3.2, "eval_loss": 1.1280182600021362, "eval_runtime": 5.2073, "eval_samples_per_second": 7.297, "eval_steps_per_second": 1.92, "step": 1100 }, { "epoch": 3.21, "eval_loss": 1.1197805404663086, "eval_runtime": 5.1419, "eval_samples_per_second": 7.39, "eval_steps_per_second": 1.945, "step": 1105 }, { "epoch": 3.23, "eval_loss": 1.1133369207382202, "eval_runtime": 5.1481, "eval_samples_per_second": 7.381, "eval_steps_per_second": 1.942, "step": 1110 }, { "epoch": 3.24, "eval_loss": 1.1289596557617188, "eval_runtime": 5.1768, "eval_samples_per_second": 7.34, "eval_steps_per_second": 1.932, "step": 1115 }, { "epoch": 3.26, "eval_loss": 1.1368350982666016, "eval_runtime": 5.1857, "eval_samples_per_second": 7.328, "eval_steps_per_second": 1.928, "step": 1120 }, { "epoch": 3.27, "eval_loss": 1.1226954460144043, "eval_runtime": 5.4937, "eval_samples_per_second": 6.917, "eval_steps_per_second": 1.82, "step": 1125 }, { "epoch": 3.28, "eval_loss": 1.1116739511489868, "eval_runtime": 5.1807, "eval_samples_per_second": 7.335, "eval_steps_per_second": 1.93, "step": 1130 }, { "epoch": 3.3, "eval_loss": 1.1072471141815186, "eval_runtime": 5.89, "eval_samples_per_second": 6.452, "eval_steps_per_second": 1.698, "step": 1135 }, { "epoch": 3.31, "eval_loss": 1.1166975498199463, "eval_runtime": 5.2072, "eval_samples_per_second": 7.298, "eval_steps_per_second": 1.92, "step": 1140 }, { "epoch": 3.33, "eval_loss": 1.119249939918518, "eval_runtime": 5.7107, "eval_samples_per_second": 6.654, "eval_steps_per_second": 1.751, "step": 1145 }, { "epoch": 3.34, "eval_loss": 1.1184831857681274, "eval_runtime": 5.2137, "eval_samples_per_second": 7.289, "eval_steps_per_second": 1.918, "step": 1150 }, { "epoch": 3.36, "eval_loss": 1.1111912727355957, "eval_runtime": 5.9367, "eval_samples_per_second": 6.401, "eval_steps_per_second": 1.684, "step": 1155 }, { "epoch": 3.37, "eval_loss": 1.105357050895691, "eval_runtime": 5.2105, "eval_samples_per_second": 7.293, "eval_steps_per_second": 1.919, "step": 1160 }, { "epoch": 3.39, "eval_loss": 1.1098227500915527, "eval_runtime": 5.2189, "eval_samples_per_second": 7.281, "eval_steps_per_second": 1.916, "step": 1165 }, { "epoch": 3.4, "eval_loss": 1.1147123575210571, "eval_runtime": 5.2081, "eval_samples_per_second": 7.296, "eval_steps_per_second": 1.92, "step": 1170 }, { "epoch": 3.42, "eval_loss": 1.1088005304336548, "eval_runtime": 7.6087, "eval_samples_per_second": 4.994, "eval_steps_per_second": 1.314, "step": 1175 }, { "epoch": 3.43, "eval_loss": 1.1164624691009521, "eval_runtime": 5.2125, "eval_samples_per_second": 7.29, "eval_steps_per_second": 1.918, "step": 1180 }, { "epoch": 3.44, "eval_loss": 1.121812105178833, "eval_runtime": 5.2191, "eval_samples_per_second": 7.281, "eval_steps_per_second": 1.916, "step": 1185 }, { "epoch": 3.46, "eval_loss": 1.1162291765213013, "eval_runtime": 5.2203, "eval_samples_per_second": 7.279, "eval_steps_per_second": 1.916, "step": 1190 }, { "epoch": 3.47, "eval_loss": 1.1079984903335571, "eval_runtime": 5.2071, "eval_samples_per_second": 7.298, "eval_steps_per_second": 1.92, "step": 1195 }, { "epoch": 3.49, "learning_rate": 8.255813953488374e-06, "loss": 0.8748, "step": 1200 }, { "epoch": 3.49, "eval_loss": 1.105452060699463, "eval_runtime": 5.2094, "eval_samples_per_second": 7.294, "eval_steps_per_second": 1.92, "step": 1200 }, { "epoch": 3.5, "eval_loss": 1.106619119644165, "eval_runtime": 5.1432, "eval_samples_per_second": 7.388, "eval_steps_per_second": 1.944, "step": 1205 }, { "epoch": 3.52, "eval_loss": 1.106793761253357, "eval_runtime": 5.1604, "eval_samples_per_second": 7.364, "eval_steps_per_second": 1.938, "step": 1210 }, { "epoch": 3.53, "eval_loss": 1.1173338890075684, "eval_runtime": 5.1605, "eval_samples_per_second": 7.364, "eval_steps_per_second": 1.938, "step": 1215 }, { "epoch": 3.55, "eval_loss": 1.114292025566101, "eval_runtime": 5.1855, "eval_samples_per_second": 7.328, "eval_steps_per_second": 1.928, "step": 1220 }, { "epoch": 3.56, "eval_loss": 1.1030842065811157, "eval_runtime": 6.5892, "eval_samples_per_second": 5.767, "eval_steps_per_second": 1.518, "step": 1225 }, { "epoch": 3.58, "eval_loss": 1.1062043905258179, "eval_runtime": 5.1841, "eval_samples_per_second": 7.33, "eval_steps_per_second": 1.929, "step": 1230 }, { "epoch": 3.59, "eval_loss": 1.1063551902770996, "eval_runtime": 5.1918, "eval_samples_per_second": 7.319, "eval_steps_per_second": 1.926, "step": 1235 }, { "epoch": 3.6, "eval_loss": 1.1138606071472168, "eval_runtime": 5.2119, "eval_samples_per_second": 7.291, "eval_steps_per_second": 1.919, "step": 1240 }, { "epoch": 3.62, "eval_loss": 1.110581874847412, "eval_runtime": 5.1939, "eval_samples_per_second": 7.316, "eval_steps_per_second": 1.925, "step": 1245 }, { "epoch": 3.63, "eval_loss": 1.100100040435791, "eval_runtime": 5.1966, "eval_samples_per_second": 7.312, "eval_steps_per_second": 1.924, "step": 1250 }, { "epoch": 3.65, "eval_loss": 1.0952939987182617, "eval_runtime": 5.1974, "eval_samples_per_second": 7.311, "eval_steps_per_second": 1.924, "step": 1255 }, { "epoch": 3.66, "eval_loss": 1.0998646020889282, "eval_runtime": 5.19, "eval_samples_per_second": 7.322, "eval_steps_per_second": 1.927, "step": 1260 }, { "epoch": 3.68, "eval_loss": 1.1082687377929688, "eval_runtime": 5.4201, "eval_samples_per_second": 7.011, "eval_steps_per_second": 1.845, "step": 1265 }, { "epoch": 3.69, "eval_loss": 1.1146087646484375, "eval_runtime": 5.1949, "eval_samples_per_second": 7.315, "eval_steps_per_second": 1.925, "step": 1270 }, { "epoch": 3.71, "eval_loss": 1.1090608835220337, "eval_runtime": 5.2206, "eval_samples_per_second": 7.279, "eval_steps_per_second": 1.915, "step": 1275 }, { "epoch": 3.72, "eval_loss": 1.1007745265960693, "eval_runtime": 5.2062, "eval_samples_per_second": 7.299, "eval_steps_per_second": 1.921, "step": 1280 }, { "epoch": 3.74, "eval_loss": 1.0957188606262207, "eval_runtime": 5.2196, "eval_samples_per_second": 7.28, "eval_steps_per_second": 1.916, "step": 1285 }, { "epoch": 3.75, "eval_loss": 1.1012167930603027, "eval_runtime": 5.2206, "eval_samples_per_second": 7.279, "eval_steps_per_second": 1.915, "step": 1290 }, { "epoch": 3.76, "eval_loss": 1.09498929977417, "eval_runtime": 5.2178, "eval_samples_per_second": 7.283, "eval_steps_per_second": 1.917, "step": 1295 }, { "epoch": 3.78, "learning_rate": 8.110465116279071e-06, "loss": 0.8599, "step": 1300 }, { "epoch": 3.78, "eval_loss": 1.0983973741531372, "eval_runtime": 5.203, "eval_samples_per_second": 7.303, "eval_steps_per_second": 1.922, "step": 1300 } ], "max_steps": 6880, "num_train_epochs": 20, "total_flos": 5658055802880000.0, "trial_name": null, "trial_params": null }