diff --git "a/trainer_state.json" "b/trainer_state.json" deleted file mode 100644--- "a/trainer_state.json" +++ /dev/null @@ -1,5590 +0,0 @@ -{ - "best_metric": 0.1484375, - "best_model_checkpoint": "/mnt/vdc/metamath_leaderboard/checkpoint-6168", - "epoch": 3.0, - "global_step": 9252, - "is_hyper_param_search": false, - "is_local_process_zero": true, - "is_world_process_zero": true, - "log_history": [ - { - "epoch": 0.0, - "learning_rate": 7.194244604316547e-07, - "loss": 0.7531, - "step": 10 - }, - { - "epoch": 0.01, - "learning_rate": 1.4388489208633094e-06, - "loss": 0.6605, - "step": 20 - }, - { - "epoch": 0.01, - "learning_rate": 2.158273381294964e-06, - "loss": 0.4646, - "step": 30 - }, - { - "epoch": 0.01, - "learning_rate": 2.877697841726619e-06, - "loss": 0.3787, - "step": 40 - }, - { - "epoch": 0.02, - "learning_rate": 3.5971223021582737e-06, - "loss": 0.3369, - "step": 50 - }, - { - "epoch": 0.02, - "learning_rate": 4.316546762589928e-06, - "loss": 0.3264, - "step": 60 - }, - { - "epoch": 0.02, - "learning_rate": 5.035971223021583e-06, - "loss": 0.3008, - "step": 70 - }, - { - "epoch": 0.03, - "learning_rate": 5.755395683453238e-06, - "loss": 0.2889, - "step": 80 - }, - { - "epoch": 0.03, - "learning_rate": 6.474820143884892e-06, - "loss": 0.2497, - "step": 90 - }, - { - "epoch": 0.03, - "learning_rate": 7.194244604316547e-06, - "loss": 0.2774, - "step": 100 - }, - { - "epoch": 0.04, - "learning_rate": 7.913669064748202e-06, - "loss": 0.2653, - "step": 110 - }, - { - "epoch": 0.04, - "learning_rate": 8.633093525179856e-06, - "loss": 0.2691, - "step": 120 - }, - { - "epoch": 0.04, - "learning_rate": 9.35251798561151e-06, - "loss": 0.2661, - "step": 130 - }, - { - "epoch": 0.05, - "learning_rate": 1.0071942446043167e-05, - "loss": 0.2732, - "step": 140 - }, - { - "epoch": 0.05, - "learning_rate": 1.0791366906474821e-05, - "loss": 0.2507, - "step": 150 - }, - { - "epoch": 0.05, - "learning_rate": 1.1510791366906475e-05, - "loss": 0.2482, - "step": 160 - }, - { - "epoch": 0.06, - "learning_rate": 1.223021582733813e-05, - "loss": 0.2368, - "step": 170 - }, - { - "epoch": 0.06, - "learning_rate": 1.2949640287769784e-05, - "loss": 0.2357, - "step": 180 - }, - { - "epoch": 0.06, - "learning_rate": 1.3669064748201439e-05, - "loss": 0.2404, - "step": 190 - }, - { - "epoch": 0.06, - "learning_rate": 1.4388489208633095e-05, - "loss": 0.2479, - "step": 200 - }, - { - "epoch": 0.07, - "learning_rate": 1.5107913669064749e-05, - "loss": 0.2501, - "step": 210 - }, - { - "epoch": 0.07, - "learning_rate": 1.5827338129496403e-05, - "loss": 0.2502, - "step": 220 - }, - { - "epoch": 0.07, - "learning_rate": 1.6546762589928058e-05, - "loss": 0.232, - "step": 230 - }, - { - "epoch": 0.08, - "learning_rate": 1.7266187050359712e-05, - "loss": 0.237, - "step": 240 - }, - { - "epoch": 0.08, - "learning_rate": 1.7985611510791367e-05, - "loss": 0.2469, - "step": 250 - }, - { - "epoch": 0.08, - "learning_rate": 1.870503597122302e-05, - "loss": 0.2302, - "step": 260 - }, - { - "epoch": 0.09, - "learning_rate": 1.9424460431654675e-05, - "loss": 0.2409, - "step": 270 - }, - { - "epoch": 0.09, - "learning_rate": 1.99999975489194e-05, - "loss": 0.2106, - "step": 280 - }, - { - "epoch": 0.09, - "learning_rate": 1.9999911761224496e-05, - "loss": 0.2308, - "step": 290 - }, - { - "epoch": 0.1, - "learning_rate": 1.999970342070106e-05, - "loss": 0.2361, - "step": 300 - }, - { - "epoch": 0.1, - "learning_rate": 1.9999372529902386e-05, - "loss": 0.2277, - "step": 310 - }, - { - "epoch": 0.1, - "learning_rate": 1.9998919092883666e-05, - "loss": 0.2204, - "step": 320 - }, - { - "epoch": 0.11, - "learning_rate": 1.9998343115201945e-05, - "loss": 0.2374, - "step": 330 - }, - { - "epoch": 0.11, - "learning_rate": 1.999764460391606e-05, - "loss": 0.2073, - "step": 340 - }, - { - "epoch": 0.11, - "learning_rate": 1.999682356758654e-05, - "loss": 0.2217, - "step": 350 - }, - { - "epoch": 0.12, - "learning_rate": 1.9995880016275502e-05, - "loss": 0.2327, - "step": 360 - }, - { - "epoch": 0.12, - "learning_rate": 1.9994813961546543e-05, - "loss": 0.2303, - "step": 370 - }, - { - "epoch": 0.12, - "learning_rate": 1.9993625416464575e-05, - "loss": 0.2229, - "step": 380 - }, - { - "epoch": 0.13, - "learning_rate": 1.9992314395595686e-05, - "loss": 0.2188, - "step": 390 - }, - { - "epoch": 0.13, - "learning_rate": 1.9990880915006945e-05, - "loss": 0.2244, - "step": 400 - }, - { - "epoch": 0.13, - "learning_rate": 1.998932499226622e-05, - "loss": 0.2188, - "step": 410 - }, - { - "epoch": 0.14, - "learning_rate": 1.9987646646441956e-05, - "loss": 0.2196, - "step": 420 - }, - { - "epoch": 0.14, - "learning_rate": 1.9985845898102933e-05, - "loss": 0.2022, - "step": 430 - }, - { - "epoch": 0.14, - "learning_rate": 1.9983922769318024e-05, - "loss": 0.2219, - "step": 440 - }, - { - "epoch": 0.15, - "learning_rate": 1.9981877283655924e-05, - "loss": 0.2014, - "step": 450 - }, - { - "epoch": 0.15, - "learning_rate": 1.997970946618487e-05, - "loss": 0.1935, - "step": 460 - }, - { - "epoch": 0.15, - "learning_rate": 1.99774193434723e-05, - "loss": 0.2011, - "step": 470 - }, - { - "epoch": 0.16, - "learning_rate": 1.997500694358457e-05, - "loss": 0.2003, - "step": 480 - }, - { - "epoch": 0.16, - "learning_rate": 1.9972472296086583e-05, - "loss": 0.1996, - "step": 490 - }, - { - "epoch": 0.16, - "learning_rate": 1.9969815432041434e-05, - "loss": 0.2131, - "step": 500 - }, - { - "epoch": 0.17, - "learning_rate": 1.996703638401003e-05, - "loss": 0.2119, - "step": 510 - }, - { - "epoch": 0.17, - "learning_rate": 1.9964135186050692e-05, - "loss": 0.2192, - "step": 520 - }, - { - "epoch": 0.17, - "learning_rate": 1.996111187371874e-05, - "loss": 0.2051, - "step": 530 - }, - { - "epoch": 0.18, - "learning_rate": 1.995796648406604e-05, - "loss": 0.1961, - "step": 540 - }, - { - "epoch": 0.18, - "learning_rate": 1.9954699055640576e-05, - "loss": 0.2017, - "step": 550 - }, - { - "epoch": 0.18, - "learning_rate": 1.9951309628485963e-05, - "loss": 0.1997, - "step": 560 - }, - { - "epoch": 0.18, - "learning_rate": 1.9947798244140954e-05, - "loss": 0.2003, - "step": 570 - }, - { - "epoch": 0.19, - "learning_rate": 1.994416494563894e-05, - "loss": 0.2025, - "step": 580 - }, - { - "epoch": 0.19, - "learning_rate": 1.9940409777507407e-05, - "loss": 0.2038, - "step": 590 - }, - { - "epoch": 0.19, - "learning_rate": 1.9936532785767416e-05, - "loss": 0.2068, - "step": 600 - }, - { - "epoch": 0.2, - "learning_rate": 1.9932534017933015e-05, - "loss": 0.205, - "step": 610 - }, - { - "epoch": 0.2, - "learning_rate": 1.9928413523010667e-05, - "loss": 0.2122, - "step": 620 - }, - { - "epoch": 0.2, - "learning_rate": 1.9924171351498645e-05, - "loss": 0.1979, - "step": 630 - }, - { - "epoch": 0.21, - "learning_rate": 1.9919807555386426e-05, - "loss": 0.1921, - "step": 640 - }, - { - "epoch": 0.21, - "learning_rate": 1.9915322188154033e-05, - "loss": 0.2027, - "step": 650 - }, - { - "epoch": 0.21, - "learning_rate": 1.9910715304771396e-05, - "loss": 0.1852, - "step": 660 - }, - { - "epoch": 0.22, - "learning_rate": 1.9905986961697675e-05, - "loss": 0.1957, - "step": 670 - }, - { - "epoch": 0.22, - "learning_rate": 1.9901137216880556e-05, - "loss": 0.1945, - "step": 680 - }, - { - "epoch": 0.22, - "learning_rate": 1.989616612975557e-05, - "loss": 0.209, - "step": 690 - }, - { - "epoch": 0.23, - "learning_rate": 1.9891073761245318e-05, - "loss": 0.1963, - "step": 700 - }, - { - "epoch": 0.23, - "learning_rate": 1.988586017375878e-05, - "loss": 0.1757, - "step": 710 - }, - { - "epoch": 0.23, - "learning_rate": 1.9880525431190503e-05, - "loss": 0.1856, - "step": 720 - }, - { - "epoch": 0.24, - "learning_rate": 1.9875069598919844e-05, - "loss": 0.179, - "step": 730 - }, - { - "epoch": 0.24, - "learning_rate": 1.9869492743810163e-05, - "loss": 0.1891, - "step": 740 - }, - { - "epoch": 0.24, - "learning_rate": 1.9863794934207994e-05, - "loss": 0.1975, - "step": 750 - }, - { - "epoch": 0.25, - "learning_rate": 1.9857976239942228e-05, - "loss": 0.1819, - "step": 760 - }, - { - "epoch": 0.25, - "learning_rate": 1.9852036732323237e-05, - "loss": 0.2062, - "step": 770 - }, - { - "epoch": 0.25, - "learning_rate": 1.9845976484142003e-05, - "loss": 0.1912, - "step": 780 - }, - { - "epoch": 0.26, - "learning_rate": 1.9839795569669246e-05, - "loss": 0.1938, - "step": 790 - }, - { - "epoch": 0.26, - "learning_rate": 1.9833494064654485e-05, - "loss": 0.1864, - "step": 800 - }, - { - "epoch": 0.26, - "learning_rate": 1.982707204632513e-05, - "loss": 0.1907, - "step": 810 - }, - { - "epoch": 0.27, - "learning_rate": 1.9820529593385516e-05, - "loss": 0.188, - "step": 820 - }, - { - "epoch": 0.27, - "learning_rate": 1.981386678601598e-05, - "loss": 0.1889, - "step": 830 - }, - { - "epoch": 0.27, - "learning_rate": 1.980708370587182e-05, - "loss": 0.1871, - "step": 840 - }, - { - "epoch": 0.28, - "learning_rate": 1.9800180436082335e-05, - "loss": 0.1772, - "step": 850 - }, - { - "epoch": 0.28, - "learning_rate": 1.97931570612498e-05, - "loss": 0.1817, - "step": 860 - }, - { - "epoch": 0.28, - "learning_rate": 1.9786013667448416e-05, - "loss": 0.1765, - "step": 870 - }, - { - "epoch": 0.29, - "learning_rate": 1.977875034222327e-05, - "loss": 0.1987, - "step": 880 - }, - { - "epoch": 0.29, - "learning_rate": 1.977136717458925e-05, - "loss": 0.2069, - "step": 890 - }, - { - "epoch": 0.29, - "learning_rate": 1.9763864255029962e-05, - "loss": 0.1817, - "step": 900 - }, - { - "epoch": 0.3, - "learning_rate": 1.975624167549662e-05, - "loss": 0.1883, - "step": 910 - }, - { - "epoch": 0.3, - "learning_rate": 1.9748499529406918e-05, - "loss": 0.1738, - "step": 920 - }, - { - "epoch": 0.3, - "learning_rate": 1.9740637911643882e-05, - "loss": 0.1873, - "step": 930 - }, - { - "epoch": 0.3, - "learning_rate": 1.973265691855471e-05, - "loss": 0.193, - "step": 940 - }, - { - "epoch": 0.31, - "learning_rate": 1.9724556647949597e-05, - "loss": 0.1725, - "step": 950 - }, - { - "epoch": 0.31, - "learning_rate": 1.971633719910052e-05, - "loss": 0.1896, - "step": 960 - }, - { - "epoch": 0.31, - "learning_rate": 1.9707998672740045e-05, - "loss": 0.185, - "step": 970 - }, - { - "epoch": 0.32, - "learning_rate": 1.9699541171060068e-05, - "loss": 0.1745, - "step": 980 - }, - { - "epoch": 0.32, - "learning_rate": 1.9690964797710585e-05, - "loss": 0.1862, - "step": 990 - }, - { - "epoch": 0.32, - "learning_rate": 1.9682269657798395e-05, - "loss": 0.1801, - "step": 1000 - }, - { - "epoch": 0.33, - "learning_rate": 1.9673455857885846e-05, - "loss": 0.1754, - "step": 1010 - }, - { - "epoch": 0.33, - "learning_rate": 1.9664523505989498e-05, - "loss": 0.1882, - "step": 1020 - }, - { - "epoch": 0.33, - "learning_rate": 1.965547271157882e-05, - "loss": 0.1888, - "step": 1030 - }, - { - "epoch": 0.34, - "learning_rate": 1.9646303585574832e-05, - "loss": 0.1965, - "step": 1040 - }, - { - "epoch": 0.34, - "learning_rate": 1.9637016240348755e-05, - "loss": 0.1785, - "step": 1050 - }, - { - "epoch": 0.34, - "learning_rate": 1.9627610789720647e-05, - "loss": 0.19, - "step": 1060 - }, - { - "epoch": 0.35, - "learning_rate": 1.9618087348957973e-05, - "loss": 0.1789, - "step": 1070 - }, - { - "epoch": 0.35, - "learning_rate": 1.9608446034774225e-05, - "loss": 0.1785, - "step": 1080 - }, - { - "epoch": 0.35, - "learning_rate": 1.9598686965327483e-05, - "loss": 0.2006, - "step": 1090 - }, - { - "epoch": 0.36, - "learning_rate": 1.9588810260218955e-05, - "loss": 0.1937, - "step": 1100 - }, - { - "epoch": 0.36, - "learning_rate": 1.9578816040491526e-05, - "loss": 0.183, - "step": 1110 - }, - { - "epoch": 0.36, - "learning_rate": 1.956870442862826e-05, - "loss": 0.1834, - "step": 1120 - }, - { - "epoch": 0.37, - "learning_rate": 1.9558475548550924e-05, - "loss": 0.1784, - "step": 1130 - }, - { - "epoch": 0.37, - "learning_rate": 1.9548129525618434e-05, - "loss": 0.1753, - "step": 1140 - }, - { - "epoch": 0.37, - "learning_rate": 1.9537666486625352e-05, - "loss": 0.1813, - "step": 1150 - }, - { - "epoch": 0.38, - "learning_rate": 1.9527086559800307e-05, - "loss": 0.191, - "step": 1160 - }, - { - "epoch": 0.38, - "learning_rate": 1.9516389874804442e-05, - "loss": 0.1749, - "step": 1170 - }, - { - "epoch": 0.38, - "learning_rate": 1.9505576562729818e-05, - "loss": 0.184, - "step": 1180 - }, - { - "epoch": 0.39, - "learning_rate": 1.949464675609779e-05, - "loss": 0.1711, - "step": 1190 - }, - { - "epoch": 0.39, - "learning_rate": 1.9483600588857428e-05, - "loss": 0.1784, - "step": 1200 - }, - { - "epoch": 0.39, - "learning_rate": 1.9472438196383817e-05, - "loss": 0.1721, - "step": 1210 - }, - { - "epoch": 0.4, - "learning_rate": 1.946115971547645e-05, - "loss": 0.1883, - "step": 1220 - }, - { - "epoch": 0.4, - "learning_rate": 1.9449765284357514e-05, - "loss": 0.181, - "step": 1230 - }, - { - "epoch": 0.4, - "learning_rate": 1.943825504267022e-05, - "loss": 0.1884, - "step": 1240 - }, - { - "epoch": 0.41, - "learning_rate": 1.942662913147708e-05, - "loss": 0.1586, - "step": 1250 - }, - { - "epoch": 0.41, - "learning_rate": 1.9414887693258185e-05, - "loss": 0.1689, - "step": 1260 - }, - { - "epoch": 0.41, - "learning_rate": 1.9403030871909443e-05, - "loss": 0.1663, - "step": 1270 - }, - { - "epoch": 0.42, - "learning_rate": 1.9391058812740845e-05, - "loss": 0.1652, - "step": 1280 - }, - { - "epoch": 0.42, - "learning_rate": 1.9378971662474652e-05, - "loss": 0.1728, - "step": 1290 - }, - { - "epoch": 0.42, - "learning_rate": 1.9366769569243614e-05, - "loss": 0.1883, - "step": 1300 - }, - { - "epoch": 0.42, - "learning_rate": 1.9354452682589162e-05, - "loss": 0.183, - "step": 1310 - }, - { - "epoch": 0.43, - "learning_rate": 1.9342021153459554e-05, - "loss": 0.1786, - "step": 1320 - }, - { - "epoch": 0.43, - "learning_rate": 1.9329475134208037e-05, - "loss": 0.158, - "step": 1330 - }, - { - "epoch": 0.43, - "learning_rate": 1.9316814778590984e-05, - "loss": 0.1811, - "step": 1340 - }, - { - "epoch": 0.44, - "learning_rate": 1.9304040241766008e-05, - "loss": 0.1834, - "step": 1350 - }, - { - "epoch": 0.44, - "learning_rate": 1.9291151680290045e-05, - "loss": 0.1691, - "step": 1360 - }, - { - "epoch": 0.44, - "learning_rate": 1.927814925211746e-05, - "loss": 0.1707, - "step": 1370 - }, - { - "epoch": 0.45, - "learning_rate": 1.9265033116598096e-05, - "loss": 0.1738, - "step": 1380 - }, - { - "epoch": 0.45, - "learning_rate": 1.9251803434475317e-05, - "loss": 0.1783, - "step": 1390 - }, - { - "epoch": 0.45, - "learning_rate": 1.923846036788405e-05, - "loss": 0.1722, - "step": 1400 - }, - { - "epoch": 0.46, - "learning_rate": 1.92250040803488e-05, - "loss": 0.1752, - "step": 1410 - }, - { - "epoch": 0.46, - "learning_rate": 1.9211434736781624e-05, - "loss": 0.1702, - "step": 1420 - }, - { - "epoch": 0.46, - "learning_rate": 1.919775250348014e-05, - "loss": 0.1676, - "step": 1430 - }, - { - "epoch": 0.47, - "learning_rate": 1.918395754812546e-05, - "loss": 0.1676, - "step": 1440 - }, - { - "epoch": 0.47, - "learning_rate": 1.9170050039780158e-05, - "loss": 0.1753, - "step": 1450 - }, - { - "epoch": 0.47, - "learning_rate": 1.9156030148886193e-05, - "loss": 0.1604, - "step": 1460 - }, - { - "epoch": 0.48, - "learning_rate": 1.91418980472628e-05, - "loss": 0.184, - "step": 1470 - }, - { - "epoch": 0.48, - "learning_rate": 1.9127653908104414e-05, - "loss": 0.1724, - "step": 1480 - }, - { - "epoch": 0.48, - "learning_rate": 1.911329790597853e-05, - "loss": 0.1765, - "step": 1490 - }, - { - "epoch": 0.49, - "learning_rate": 1.9098830216823568e-05, - "loss": 0.1708, - "step": 1500 - }, - { - "epoch": 0.49, - "learning_rate": 1.9084251017946713e-05, - "loss": 0.1725, - "step": 1510 - }, - { - "epoch": 0.49, - "learning_rate": 1.9069560488021744e-05, - "loss": 0.178, - "step": 1520 - }, - { - "epoch": 0.5, - "learning_rate": 1.905475880708686e-05, - "loss": 0.1853, - "step": 1530 - }, - { - "epoch": 0.5, - "learning_rate": 1.9039846156542442e-05, - "loss": 0.1619, - "step": 1540 - }, - { - "epoch": 0.5, - "learning_rate": 1.9024822719148853e-05, - "loss": 0.1616, - "step": 1550 - }, - { - "epoch": 0.51, - "learning_rate": 1.900968867902419e-05, - "loss": 0.1689, - "step": 1560 - }, - { - "epoch": 0.51, - "learning_rate": 1.899444422164204e-05, - "loss": 0.1567, - "step": 1570 - }, - { - "epoch": 0.51, - "learning_rate": 1.8979089533829182e-05, - "loss": 0.1683, - "step": 1580 - }, - { - "epoch": 0.52, - "learning_rate": 1.8963624803763318e-05, - "loss": 0.1677, - "step": 1590 - }, - { - "epoch": 0.52, - "learning_rate": 1.8948050220970763e-05, - "loss": 0.1642, - "step": 1600 - }, - { - "epoch": 0.52, - "learning_rate": 1.893236597632412e-05, - "loss": 0.1792, - "step": 1610 - }, - { - "epoch": 0.53, - "learning_rate": 1.891657226203994e-05, - "loss": 0.1805, - "step": 1620 - }, - { - "epoch": 0.53, - "learning_rate": 1.8900669271676367e-05, - "loss": 0.1573, - "step": 1630 - }, - { - "epoch": 0.53, - "learning_rate": 1.8884657200130763e-05, - "loss": 0.1696, - "step": 1640 - }, - { - "epoch": 0.54, - "learning_rate": 1.8868536243637327e-05, - "loss": 0.1725, - "step": 1650 - }, - { - "epoch": 0.54, - "learning_rate": 1.8852306599764683e-05, - "loss": 0.1755, - "step": 1660 - }, - { - "epoch": 0.54, - "learning_rate": 1.8835968467413465e-05, - "loss": 0.1597, - "step": 1670 - }, - { - "epoch": 0.54, - "learning_rate": 1.8819522046813873e-05, - "loss": 0.1741, - "step": 1680 - }, - { - "epoch": 0.55, - "learning_rate": 1.8802967539523215e-05, - "loss": 0.1712, - "step": 1690 - }, - { - "epoch": 0.55, - "learning_rate": 1.8786305148423463e-05, - "loss": 0.1759, - "step": 1700 - }, - { - "epoch": 0.55, - "learning_rate": 1.8769535077718725e-05, - "loss": 0.1602, - "step": 1710 - }, - { - "epoch": 0.56, - "learning_rate": 1.8752657532932774e-05, - "loss": 0.1693, - "step": 1720 - }, - { - "epoch": 0.56, - "learning_rate": 1.8735672720906527e-05, - "loss": 0.1539, - "step": 1730 - }, - { - "epoch": 0.56, - "learning_rate": 1.8718580849795494e-05, - "loss": 0.166, - "step": 1740 - }, - { - "epoch": 0.57, - "learning_rate": 1.8701382129067232e-05, - "loss": 0.1695, - "step": 1750 - }, - { - "epoch": 0.57, - "learning_rate": 1.86840767694988e-05, - "loss": 0.1664, - "step": 1760 - }, - { - "epoch": 0.57, - "learning_rate": 1.8666664983174137e-05, - "loss": 0.1693, - "step": 1770 - }, - { - "epoch": 0.58, - "learning_rate": 1.864914698348149e-05, - "loss": 0.168, - "step": 1780 - }, - { - "epoch": 0.58, - "learning_rate": 1.8631522985110803e-05, - "loss": 0.161, - "step": 1790 - }, - { - "epoch": 0.58, - "learning_rate": 1.8613793204051066e-05, - "loss": 0.1825, - "step": 1800 - }, - { - "epoch": 0.59, - "learning_rate": 1.859595785758767e-05, - "loss": 0.1688, - "step": 1810 - }, - { - "epoch": 0.59, - "learning_rate": 1.8578017164299767e-05, - "loss": 0.1584, - "step": 1820 - }, - { - "epoch": 0.59, - "learning_rate": 1.8559971344057562e-05, - "loss": 0.1602, - "step": 1830 - }, - { - "epoch": 0.6, - "learning_rate": 1.8541820618019647e-05, - "loss": 0.1773, - "step": 1840 - }, - { - "epoch": 0.6, - "learning_rate": 1.8523565208630257e-05, - "loss": 0.1665, - "step": 1850 - }, - { - "epoch": 0.6, - "learning_rate": 1.8505205339616577e-05, - "loss": 0.1706, - "step": 1860 - }, - { - "epoch": 0.61, - "learning_rate": 1.848674123598598e-05, - "loss": 0.1699, - "step": 1870 - }, - { - "epoch": 0.61, - "learning_rate": 1.846817312402327e-05, - "loss": 0.1613, - "step": 1880 - }, - { - "epoch": 0.61, - "learning_rate": 1.8449501231287926e-05, - "loss": 0.1678, - "step": 1890 - }, - { - "epoch": 0.62, - "learning_rate": 1.8430725786611293e-05, - "loss": 0.1777, - "step": 1900 - }, - { - "epoch": 0.62, - "learning_rate": 1.8411847020093784e-05, - "loss": 0.1729, - "step": 1910 - }, - { - "epoch": 0.62, - "learning_rate": 1.8392865163102065e-05, - "loss": 0.1619, - "step": 1920 - }, - { - "epoch": 0.63, - "learning_rate": 1.8373780448266213e-05, - "loss": 0.1723, - "step": 1930 - }, - { - "epoch": 0.63, - "learning_rate": 1.8354593109476877e-05, - "loss": 0.1561, - "step": 1940 - }, - { - "epoch": 0.63, - "learning_rate": 1.833530338188239e-05, - "loss": 0.166, - "step": 1950 - }, - { - "epoch": 0.64, - "learning_rate": 1.8315911501885905e-05, - "loss": 0.1684, - "step": 1960 - }, - { - "epoch": 0.64, - "learning_rate": 1.82964177071425e-05, - "loss": 0.1627, - "step": 1970 - }, - { - "epoch": 0.64, - "learning_rate": 1.8276822236556246e-05, - "loss": 0.171, - "step": 1980 - }, - { - "epoch": 0.65, - "learning_rate": 1.82571253302773e-05, - "loss": 0.1604, - "step": 1990 - }, - { - "epoch": 0.65, - "learning_rate": 1.8237327229698943e-05, - "loss": 0.176, - "step": 2000 - }, - { - "epoch": 0.65, - "learning_rate": 1.821742817745465e-05, - "loss": 0.1724, - "step": 2010 - }, - { - "epoch": 0.65, - "learning_rate": 1.8197428417415075e-05, - "loss": 0.1688, - "step": 2020 - }, - { - "epoch": 0.66, - "learning_rate": 1.8177328194685108e-05, - "loss": 0.1579, - "step": 2030 - }, - { - "epoch": 0.66, - "learning_rate": 1.8157127755600826e-05, - "loss": 0.1561, - "step": 2040 - }, - { - "epoch": 0.66, - "learning_rate": 1.8136827347726516e-05, - "loss": 0.1663, - "step": 2050 - }, - { - "epoch": 0.67, - "learning_rate": 1.8116427219851615e-05, - "loss": 0.1621, - "step": 2060 - }, - { - "epoch": 0.67, - "learning_rate": 1.8095927621987658e-05, - "loss": 0.1647, - "step": 2070 - }, - { - "epoch": 0.67, - "learning_rate": 1.807532880536524e-05, - "loss": 0.1773, - "step": 2080 - }, - { - "epoch": 0.68, - "learning_rate": 1.8054631022430913e-05, - "loss": 0.1668, - "step": 2090 - }, - { - "epoch": 0.68, - "learning_rate": 1.8033834526844095e-05, - "loss": 0.1496, - "step": 2100 - }, - { - "epoch": 0.68, - "learning_rate": 1.8012939573473972e-05, - "loss": 0.169, - "step": 2110 - }, - { - "epoch": 0.69, - "learning_rate": 1.7991946418396365e-05, - "loss": 0.1706, - "step": 2120 - }, - { - "epoch": 0.69, - "learning_rate": 1.7970855318890606e-05, - "loss": 0.1599, - "step": 2130 - }, - { - "epoch": 0.69, - "learning_rate": 1.7949666533436358e-05, - "loss": 0.1673, - "step": 2140 - }, - { - "epoch": 0.7, - "learning_rate": 1.792838032171047e-05, - "loss": 0.1586, - "step": 2150 - }, - { - "epoch": 0.7, - "learning_rate": 1.79069969445838e-05, - "loss": 0.1623, - "step": 2160 - }, - { - "epoch": 0.7, - "learning_rate": 1.7885516664117982e-05, - "loss": 0.1572, - "step": 2170 - }, - { - "epoch": 0.71, - "learning_rate": 1.7863939743562266e-05, - "loss": 0.1637, - "step": 2180 - }, - { - "epoch": 0.71, - "learning_rate": 1.7842266447350236e-05, - "loss": 0.1637, - "step": 2190 - }, - { - "epoch": 0.71, - "learning_rate": 1.782049704109662e-05, - "loss": 0.1568, - "step": 2200 - }, - { - "epoch": 0.72, - "learning_rate": 1.7798631791594e-05, - "loss": 0.1585, - "step": 2210 - }, - { - "epoch": 0.72, - "learning_rate": 1.777667096680956e-05, - "loss": 0.1649, - "step": 2220 - }, - { - "epoch": 0.72, - "learning_rate": 1.7754614835881795e-05, - "loss": 0.1646, - "step": 2230 - }, - { - "epoch": 0.73, - "learning_rate": 1.7732463669117206e-05, - "loss": 0.1605, - "step": 2240 - }, - { - "epoch": 0.73, - "learning_rate": 1.7710217737987008e-05, - "loss": 0.1515, - "step": 2250 - }, - { - "epoch": 0.73, - "learning_rate": 1.768787731512379e-05, - "loss": 0.1458, - "step": 2260 - }, - { - "epoch": 0.74, - "learning_rate": 1.766544267431816e-05, - "loss": 0.1674, - "step": 2270 - }, - { - "epoch": 0.74, - "learning_rate": 1.7642914090515423e-05, - "loss": 0.1659, - "step": 2280 - }, - { - "epoch": 0.74, - "learning_rate": 1.762029183981217e-05, - "loss": 0.1512, - "step": 2290 - }, - { - "epoch": 0.75, - "learning_rate": 1.759757619945294e-05, - "loss": 0.1736, - "step": 2300 - }, - { - "epoch": 0.75, - "learning_rate": 1.7574767447826776e-05, - "loss": 0.1656, - "step": 2310 - }, - { - "epoch": 0.75, - "learning_rate": 1.7551865864463857e-05, - "loss": 0.157, - "step": 2320 - }, - { - "epoch": 0.76, - "learning_rate": 1.7528871730032034e-05, - "loss": 0.1588, - "step": 2330 - }, - { - "epoch": 0.76, - "learning_rate": 1.750578532633342e-05, - "loss": 0.1547, - "step": 2340 - }, - { - "epoch": 0.76, - "learning_rate": 1.748260693630092e-05, - "loss": 0.1528, - "step": 2350 - }, - { - "epoch": 0.77, - "learning_rate": 1.7459336843994758e-05, - "loss": 0.1541, - "step": 2360 - }, - { - "epoch": 0.77, - "learning_rate": 1.7435975334599026e-05, - "loss": 0.1554, - "step": 2370 - }, - { - "epoch": 0.77, - "learning_rate": 1.741252269441815e-05, - "loss": 0.1728, - "step": 2380 - }, - { - "epoch": 0.77, - "learning_rate": 1.73889792108734e-05, - "loss": 0.1678, - "step": 2390 - }, - { - "epoch": 0.78, - "learning_rate": 1.736534517249938e-05, - "loss": 0.1586, - "step": 2400 - }, - { - "epoch": 0.78, - "learning_rate": 1.7341620868940467e-05, - "loss": 0.1549, - "step": 2410 - }, - { - "epoch": 0.78, - "learning_rate": 1.731780659094728e-05, - "loss": 0.1561, - "step": 2420 - }, - { - "epoch": 0.79, - "learning_rate": 1.7293902630373103e-05, - "loss": 0.1624, - "step": 2430 - }, - { - "epoch": 0.79, - "learning_rate": 1.726990928017032e-05, - "loss": 0.1561, - "step": 2440 - }, - { - "epoch": 0.79, - "learning_rate": 1.7245826834386825e-05, - "loss": 0.1424, - "step": 2450 - }, - { - "epoch": 0.8, - "learning_rate": 1.7221655588162397e-05, - "loss": 0.1605, - "step": 2460 - }, - { - "epoch": 0.8, - "learning_rate": 1.7197395837725118e-05, - "loss": 0.1547, - "step": 2470 - }, - { - "epoch": 0.8, - "learning_rate": 1.717304788038771e-05, - "loss": 0.164, - "step": 2480 - }, - { - "epoch": 0.81, - "learning_rate": 1.7148612014543915e-05, - "loss": 0.1569, - "step": 2490 - }, - { - "epoch": 0.81, - "learning_rate": 1.712408853966482e-05, - "loss": 0.1527, - "step": 2500 - }, - { - "epoch": 0.81, - "learning_rate": 1.7099477756295195e-05, - "loss": 0.154, - "step": 2510 - }, - { - "epoch": 0.82, - "learning_rate": 1.7074779966049818e-05, - "loss": 0.1588, - "step": 2520 - }, - { - "epoch": 0.82, - "learning_rate": 1.7049995471609765e-05, - "loss": 0.1595, - "step": 2530 - }, - { - "epoch": 0.82, - "learning_rate": 1.70251245767187e-05, - "loss": 0.1761, - "step": 2540 - }, - { - "epoch": 0.83, - "learning_rate": 1.7000167586179173e-05, - "loss": 0.1563, - "step": 2550 - }, - { - "epoch": 0.83, - "learning_rate": 1.6975124805848852e-05, - "loss": 0.1592, - "step": 2560 - }, - { - "epoch": 0.83, - "learning_rate": 1.694999654263681e-05, - "loss": 0.1597, - "step": 2570 - }, - { - "epoch": 0.84, - "learning_rate": 1.692478310449973e-05, - "loss": 0.1611, - "step": 2580 - }, - { - "epoch": 0.84, - "learning_rate": 1.689948480043816e-05, - "loss": 0.1712, - "step": 2590 - }, - { - "epoch": 0.84, - "learning_rate": 1.6874101940492707e-05, - "loss": 0.1603, - "step": 2600 - }, - { - "epoch": 0.85, - "learning_rate": 1.684863483574024e-05, - "loss": 0.1666, - "step": 2610 - }, - { - "epoch": 0.85, - "learning_rate": 1.6823083798290092e-05, - "loss": 0.1599, - "step": 2620 - }, - { - "epoch": 0.85, - "learning_rate": 1.6797449141280213e-05, - "loss": 0.1468, - "step": 2630 - }, - { - "epoch": 0.86, - "learning_rate": 1.6771731178873344e-05, - "loss": 0.1519, - "step": 2640 - }, - { - "epoch": 0.86, - "learning_rate": 1.674593022625318e-05, - "loss": 0.1565, - "step": 2650 - }, - { - "epoch": 0.86, - "learning_rate": 1.6720046599620476e-05, - "loss": 0.1513, - "step": 2660 - }, - { - "epoch": 0.87, - "learning_rate": 1.6694080616189197e-05, - "loss": 0.1616, - "step": 2670 - }, - { - "epoch": 0.87, - "learning_rate": 1.6668032594182623e-05, - "loss": 0.1642, - "step": 2680 - }, - { - "epoch": 0.87, - "learning_rate": 1.664190285282945e-05, - "loss": 0.1564, - "step": 2690 - }, - { - "epoch": 0.88, - "learning_rate": 1.661569171235988e-05, - "loss": 0.1604, - "step": 2700 - }, - { - "epoch": 0.88, - "learning_rate": 1.658939949400167e-05, - "loss": 0.1552, - "step": 2710 - }, - { - "epoch": 0.88, - "learning_rate": 1.656302651997626e-05, - "loss": 0.1526, - "step": 2720 - }, - { - "epoch": 0.89, - "learning_rate": 1.6536573113494737e-05, - "loss": 0.16, - "step": 2730 - }, - { - "epoch": 0.89, - "learning_rate": 1.6510039598753953e-05, - "loss": 0.155, - "step": 2740 - }, - { - "epoch": 0.89, - "learning_rate": 1.64834263009325e-05, - "loss": 0.1641, - "step": 2750 - }, - { - "epoch": 0.89, - "learning_rate": 1.6456733546186755e-05, - "loss": 0.1423, - "step": 2760 - }, - { - "epoch": 0.9, - "learning_rate": 1.6429961661646858e-05, - "loss": 0.1604, - "step": 2770 - }, - { - "epoch": 0.9, - "learning_rate": 1.6403110975412723e-05, - "loss": 0.1698, - "step": 2780 - }, - { - "epoch": 0.9, - "learning_rate": 1.637618181655001e-05, - "loss": 0.1537, - "step": 2790 - }, - { - "epoch": 0.91, - "learning_rate": 1.6349174515086087e-05, - "loss": 0.158, - "step": 2800 - }, - { - "epoch": 0.91, - "learning_rate": 1.6322089402005995e-05, - "loss": 0.145, - "step": 2810 - }, - { - "epoch": 0.91, - "learning_rate": 1.629492680924839e-05, - "loss": 0.1462, - "step": 2820 - }, - { - "epoch": 0.92, - "learning_rate": 1.6267687069701455e-05, - "loss": 0.1536, - "step": 2830 - }, - { - "epoch": 0.92, - "learning_rate": 1.6240370517198855e-05, - "loss": 0.1456, - "step": 2840 - }, - { - "epoch": 0.92, - "learning_rate": 1.6212977486515626e-05, - "loss": 0.1576, - "step": 2850 - }, - { - "epoch": 0.93, - "learning_rate": 1.618550831336406e-05, - "loss": 0.1555, - "step": 2860 - }, - { - "epoch": 0.93, - "learning_rate": 1.6157963334389623e-05, - "loss": 0.1593, - "step": 2870 - }, - { - "epoch": 0.93, - "learning_rate": 1.61303428871668e-05, - "loss": 0.155, - "step": 2880 - }, - { - "epoch": 0.94, - "learning_rate": 1.6102647310194964e-05, - "loss": 0.1502, - "step": 2890 - }, - { - "epoch": 0.94, - "learning_rate": 1.607487694289425e-05, - "loss": 0.144, - "step": 2900 - }, - { - "epoch": 0.94, - "learning_rate": 1.6047032125601364e-05, - "loss": 0.1422, - "step": 2910 - }, - { - "epoch": 0.95, - "learning_rate": 1.6019113199565424e-05, - "loss": 0.1594, - "step": 2920 - }, - { - "epoch": 0.95, - "learning_rate": 1.599112050694379e-05, - "loss": 0.1488, - "step": 2930 - }, - { - "epoch": 0.95, - "learning_rate": 1.596305439079785e-05, - "loss": 0.1631, - "step": 2940 - }, - { - "epoch": 0.96, - "learning_rate": 1.5934915195088842e-05, - "loss": 0.1401, - "step": 2950 - }, - { - "epoch": 0.96, - "learning_rate": 1.5906703264673598e-05, - "loss": 0.1526, - "step": 2960 - }, - { - "epoch": 0.96, - "learning_rate": 1.5878418945300363e-05, - "loss": 0.15, - "step": 2970 - }, - { - "epoch": 0.97, - "learning_rate": 1.5850062583604534e-05, - "loss": 0.1589, - "step": 2980 - }, - { - "epoch": 0.97, - "learning_rate": 1.58216345271044e-05, - "loss": 0.1551, - "step": 2990 - }, - { - "epoch": 0.97, - "learning_rate": 1.5793135124196916e-05, - "loss": 0.1482, - "step": 3000 - }, - { - "epoch": 0.98, - "learning_rate": 1.5764564724153406e-05, - "loss": 0.1518, - "step": 3010 - }, - { - "epoch": 0.98, - "learning_rate": 1.5735923677115298e-05, - "loss": 0.1495, - "step": 3020 - }, - { - "epoch": 0.98, - "learning_rate": 1.570721233408981e-05, - "loss": 0.1492, - "step": 3030 - }, - { - "epoch": 0.99, - "learning_rate": 1.567843104694569e-05, - "loss": 0.1538, - "step": 3040 - }, - { - "epoch": 0.99, - "learning_rate": 1.5649580168408854e-05, - "loss": 0.1521, - "step": 3050 - }, - { - "epoch": 0.99, - "learning_rate": 1.5620660052058108e-05, - "loss": 0.1593, - "step": 3060 - }, - { - "epoch": 1.0, - "learning_rate": 1.5591671052320784e-05, - "loss": 0.1604, - "step": 3070 - }, - { - "epoch": 1.0, - "learning_rate": 1.55626135244684e-05, - "loss": 0.1405, - "step": 3080 - }, - { - "epoch": 1.0, - "eval_loss": 0.1611328125, - "eval_runtime": 6.2849, - "eval_samples_per_second": 20.366, - "eval_steps_per_second": 0.159, - "step": 3084 - }, - { - "epoch": 1.0, - "learning_rate": 1.553348782461233e-05, - "loss": 0.1398, - "step": 3090 - }, - { - "epoch": 1.01, - "learning_rate": 1.550429430969941e-05, - "loss": 0.1198, - "step": 3100 - }, - { - "epoch": 1.01, - "learning_rate": 1.5475033337507583e-05, - "loss": 0.109, - "step": 3110 - }, - { - "epoch": 1.01, - "learning_rate": 1.54457052666415e-05, - "loss": 0.1167, - "step": 3120 - }, - { - "epoch": 1.01, - "learning_rate": 1.541631045652814e-05, - "loss": 0.108, - "step": 3130 - }, - { - "epoch": 1.02, - "learning_rate": 1.5386849267412388e-05, - "loss": 0.1184, - "step": 3140 - }, - { - "epoch": 1.02, - "learning_rate": 1.5357322060352646e-05, - "loss": 0.1193, - "step": 3150 - }, - { - "epoch": 1.02, - "learning_rate": 1.5327729197216373e-05, - "loss": 0.1218, - "step": 3160 - }, - { - "epoch": 1.03, - "learning_rate": 1.529807104067568e-05, - "loss": 0.1152, - "step": 3170 - }, - { - "epoch": 1.03, - "learning_rate": 1.5268347954202872e-05, - "loss": 0.1079, - "step": 3180 - }, - { - "epoch": 1.03, - "learning_rate": 1.5238560302065992e-05, - "loss": 0.1128, - "step": 3190 - }, - { - "epoch": 1.04, - "learning_rate": 1.5208708449324369e-05, - "loss": 0.1158, - "step": 3200 - }, - { - "epoch": 1.04, - "learning_rate": 1.5178792761824129e-05, - "loss": 0.1204, - "step": 3210 - }, - { - "epoch": 1.04, - "learning_rate": 1.5148813606193715e-05, - "loss": 0.111, - "step": 3220 - }, - { - "epoch": 1.05, - "learning_rate": 1.5118771349839402e-05, - "loss": 0.1161, - "step": 3230 - }, - { - "epoch": 1.05, - "learning_rate": 1.5088666360940795e-05, - "loss": 0.1158, - "step": 3240 - }, - { - "epoch": 1.05, - "learning_rate": 1.5058499008446296e-05, - "loss": 0.1143, - "step": 3250 - }, - { - "epoch": 1.06, - "learning_rate": 1.502826966206861e-05, - "loss": 0.113, - "step": 3260 - }, - { - "epoch": 1.06, - "learning_rate": 1.4997978692280191e-05, - "loss": 0.122, - "step": 3270 - }, - { - "epoch": 1.06, - "learning_rate": 1.496762647030872e-05, - "loss": 0.1213, - "step": 3280 - }, - { - "epoch": 1.07, - "learning_rate": 1.4937213368132549e-05, - "loss": 0.125, - "step": 3290 - }, - { - "epoch": 1.07, - "learning_rate": 1.490673975847613e-05, - "loss": 0.1162, - "step": 3300 - }, - { - "epoch": 1.07, - "learning_rate": 1.4876206014805465e-05, - "loss": 0.1181, - "step": 3310 - }, - { - "epoch": 1.08, - "learning_rate": 1.4845612511323526e-05, - "loss": 0.1216, - "step": 3320 - }, - { - "epoch": 1.08, - "learning_rate": 1.4814959622965657e-05, - "loss": 0.1216, - "step": 3330 - }, - { - "epoch": 1.08, - "learning_rate": 1.478424772539499e-05, - "loss": 0.106, - "step": 3340 - }, - { - "epoch": 1.09, - "learning_rate": 1.4753477194997836e-05, - "loss": 0.1239, - "step": 3350 - }, - { - "epoch": 1.09, - "learning_rate": 1.4722648408879078e-05, - "loss": 0.1101, - "step": 3360 - }, - { - "epoch": 1.09, - "learning_rate": 1.4691761744857545e-05, - "loss": 0.1233, - "step": 3370 - }, - { - "epoch": 1.1, - "learning_rate": 1.466081758146138e-05, - "loss": 0.117, - "step": 3380 - }, - { - "epoch": 1.1, - "learning_rate": 1.4629816297923404e-05, - "loss": 0.1162, - "step": 3390 - }, - { - "epoch": 1.1, - "learning_rate": 1.4598758274176467e-05, - "loss": 0.1214, - "step": 3400 - }, - { - "epoch": 1.11, - "learning_rate": 1.4567643890848796e-05, - "loss": 0.1139, - "step": 3410 - }, - { - "epoch": 1.11, - "learning_rate": 1.4536473529259325e-05, - "loss": 0.1191, - "step": 3420 - }, - { - "epoch": 1.11, - "learning_rate": 1.4505247571413019e-05, - "loss": 0.1132, - "step": 3430 - }, - { - "epoch": 1.12, - "learning_rate": 1.4473966399996203e-05, - "loss": 0.1151, - "step": 3440 - }, - { - "epoch": 1.12, - "learning_rate": 1.444263039837186e-05, - "loss": 0.1244, - "step": 3450 - }, - { - "epoch": 1.12, - "learning_rate": 1.4411239950574946e-05, - "loss": 0.113, - "step": 3460 - }, - { - "epoch": 1.13, - "learning_rate": 1.4379795441307673e-05, - "loss": 0.1155, - "step": 3470 - }, - { - "epoch": 1.13, - "learning_rate": 1.4348297255934793e-05, - "loss": 0.12, - "step": 3480 - }, - { - "epoch": 1.13, - "learning_rate": 1.4316745780478885e-05, - "loss": 0.1129, - "step": 3490 - }, - { - "epoch": 1.13, - "learning_rate": 1.4285141401615619e-05, - "loss": 0.1191, - "step": 3500 - }, - { - "epoch": 1.14, - "learning_rate": 1.4253484506669012e-05, - "loss": 0.1143, - "step": 3510 - }, - { - "epoch": 1.14, - "learning_rate": 1.422177548360669e-05, - "loss": 0.124, - "step": 3520 - }, - { - "epoch": 1.14, - "learning_rate": 1.4190014721035127e-05, - "loss": 0.1236, - "step": 3530 - }, - { - "epoch": 1.15, - "learning_rate": 1.4158202608194893e-05, - "loss": 0.116, - "step": 3540 - }, - { - "epoch": 1.15, - "learning_rate": 1.4126339534955863e-05, - "loss": 0.1128, - "step": 3550 - }, - { - "epoch": 1.15, - "learning_rate": 1.4094425891812457e-05, - "loss": 0.1196, - "step": 3560 - }, - { - "epoch": 1.16, - "learning_rate": 1.4062462069878855e-05, - "loss": 0.1128, - "step": 3570 - }, - { - "epoch": 1.16, - "learning_rate": 1.4030448460884191e-05, - "loss": 0.1163, - "step": 3580 - }, - { - "epoch": 1.16, - "learning_rate": 1.3998385457167758e-05, - "loss": 0.1178, - "step": 3590 - }, - { - "epoch": 1.17, - "learning_rate": 1.3966273451674203e-05, - "loss": 0.1128, - "step": 3600 - }, - { - "epoch": 1.17, - "learning_rate": 1.3934112837948712e-05, - "loss": 0.1167, - "step": 3610 - }, - { - "epoch": 1.17, - "learning_rate": 1.3901904010132178e-05, - "loss": 0.1181, - "step": 3620 - }, - { - "epoch": 1.18, - "learning_rate": 1.3869647362956381e-05, - "loss": 0.1124, - "step": 3630 - }, - { - "epoch": 1.18, - "learning_rate": 1.3837343291739143e-05, - "loss": 0.1189, - "step": 3640 - }, - { - "epoch": 1.18, - "learning_rate": 1.3804992192379487e-05, - "loss": 0.121, - "step": 3650 - }, - { - "epoch": 1.19, - "learning_rate": 1.3772594461352786e-05, - "loss": 0.1185, - "step": 3660 - }, - { - "epoch": 1.19, - "learning_rate": 1.3740150495705904e-05, - "loss": 0.1208, - "step": 3670 - }, - { - "epoch": 1.19, - "learning_rate": 1.3707660693052318e-05, - "loss": 0.1214, - "step": 3680 - }, - { - "epoch": 1.2, - "learning_rate": 1.3675125451567268e-05, - "loss": 0.1103, - "step": 3690 - }, - { - "epoch": 1.2, - "learning_rate": 1.364254516998286e-05, - "loss": 0.1119, - "step": 3700 - }, - { - "epoch": 1.2, - "learning_rate": 1.3609920247583182e-05, - "loss": 0.1192, - "step": 3710 - }, - { - "epoch": 1.21, - "learning_rate": 1.3577251084199412e-05, - "loss": 0.1249, - "step": 3720 - }, - { - "epoch": 1.21, - "learning_rate": 1.3544538080204922e-05, - "loss": 0.1212, - "step": 3730 - }, - { - "epoch": 1.21, - "learning_rate": 1.351178163651037e-05, - "loss": 0.115, - "step": 3740 - }, - { - "epoch": 1.22, - "learning_rate": 1.3478982154558778e-05, - "loss": 0.1195, - "step": 3750 - }, - { - "epoch": 1.22, - "learning_rate": 1.3446140036320621e-05, - "loss": 0.1264, - "step": 3760 - }, - { - "epoch": 1.22, - "learning_rate": 1.34132556842889e-05, - "loss": 0.1165, - "step": 3770 - }, - { - "epoch": 1.23, - "learning_rate": 1.3380329501474207e-05, - "loss": 0.1211, - "step": 3780 - }, - { - "epoch": 1.23, - "learning_rate": 1.3347361891399786e-05, - "loss": 0.113, - "step": 3790 - }, - { - "epoch": 1.23, - "learning_rate": 1.3314353258096588e-05, - "loss": 0.1135, - "step": 3800 - }, - { - "epoch": 1.24, - "learning_rate": 1.3281304006098324e-05, - "loss": 0.1125, - "step": 3810 - }, - { - "epoch": 1.24, - "learning_rate": 1.3248214540436495e-05, - "loss": 0.1245, - "step": 3820 - }, - { - "epoch": 1.24, - "learning_rate": 1.3215085266635442e-05, - "loss": 0.1112, - "step": 3830 - }, - { - "epoch": 1.25, - "learning_rate": 1.3181916590707366e-05, - "loss": 0.1209, - "step": 3840 - }, - { - "epoch": 1.25, - "learning_rate": 1.3148708919147364e-05, - "loss": 0.117, - "step": 3850 - }, - { - "epoch": 1.25, - "learning_rate": 1.3115462658928434e-05, - "loss": 0.1164, - "step": 3860 - }, - { - "epoch": 1.25, - "learning_rate": 1.3082178217496488e-05, - "loss": 0.1148, - "step": 3870 - }, - { - "epoch": 1.26, - "learning_rate": 1.304885600276538e-05, - "loss": 0.1159, - "step": 3880 - }, - { - "epoch": 1.26, - "learning_rate": 1.3015496423111871e-05, - "loss": 0.1198, - "step": 3890 - }, - { - "epoch": 1.26, - "learning_rate": 1.298209988737066e-05, - "loss": 0.1186, - "step": 3900 - }, - { - "epoch": 1.27, - "learning_rate": 1.2948666804829345e-05, - "loss": 0.1093, - "step": 3910 - }, - { - "epoch": 1.27, - "learning_rate": 1.2915197585223427e-05, - "loss": 0.1189, - "step": 3920 - }, - { - "epoch": 1.27, - "learning_rate": 1.288169263873128e-05, - "loss": 0.1027, - "step": 3930 - }, - { - "epoch": 1.28, - "learning_rate": 1.284815237596912e-05, - "loss": 0.114, - "step": 3940 - }, - { - "epoch": 1.28, - "learning_rate": 1.2814577207985984e-05, - "loss": 0.11, - "step": 3950 - }, - { - "epoch": 1.28, - "learning_rate": 1.2780967546258683e-05, - "loss": 0.1129, - "step": 3960 - }, - { - "epoch": 1.29, - "learning_rate": 1.2747323802686761e-05, - "loss": 0.1159, - "step": 3970 - }, - { - "epoch": 1.29, - "learning_rate": 1.2713646389587453e-05, - "loss": 0.1213, - "step": 3980 - }, - { - "epoch": 1.29, - "learning_rate": 1.267993571969062e-05, - "loss": 0.1117, - "step": 3990 - }, - { - "epoch": 1.3, - "learning_rate": 1.2646192206133705e-05, - "loss": 0.1187, - "step": 4000 - }, - { - "epoch": 1.3, - "learning_rate": 1.2612416262456659e-05, - "loss": 0.1165, - "step": 4010 - }, - { - "epoch": 1.3, - "learning_rate": 1.2578608302596878e-05, - "loss": 0.1277, - "step": 4020 - }, - { - "epoch": 1.31, - "learning_rate": 1.254476874088413e-05, - "loss": 0.1223, - "step": 4030 - }, - { - "epoch": 1.31, - "learning_rate": 1.2510897992035475e-05, - "loss": 0.1187, - "step": 4040 - }, - { - "epoch": 1.31, - "learning_rate": 1.2476996471150183e-05, - "loss": 0.1177, - "step": 4050 - }, - { - "epoch": 1.32, - "learning_rate": 1.2443064593704645e-05, - "loss": 0.1202, - "step": 4060 - }, - { - "epoch": 1.32, - "learning_rate": 1.240910277554729e-05, - "loss": 0.1163, - "step": 4070 - }, - { - "epoch": 1.32, - "learning_rate": 1.2375111432893479e-05, - "loss": 0.1062, - "step": 4080 - }, - { - "epoch": 1.33, - "learning_rate": 1.2341090982320398e-05, - "loss": 0.1186, - "step": 4090 - }, - { - "epoch": 1.33, - "learning_rate": 1.2307041840761983e-05, - "loss": 0.1193, - "step": 4100 - }, - { - "epoch": 1.33, - "learning_rate": 1.2272964425503768e-05, - "loss": 0.1174, - "step": 4110 - }, - { - "epoch": 1.34, - "learning_rate": 1.2238859154177805e-05, - "loss": 0.109, - "step": 4120 - }, - { - "epoch": 1.34, - "learning_rate": 1.2204726444757527e-05, - "loss": 0.1251, - "step": 4130 - }, - { - "epoch": 1.34, - "learning_rate": 1.2170566715552634e-05, - "loss": 0.1166, - "step": 4140 - }, - { - "epoch": 1.35, - "learning_rate": 1.2136380385203965e-05, - "loss": 0.1123, - "step": 4150 - }, - { - "epoch": 1.35, - "learning_rate": 1.2102167872678366e-05, - "loss": 0.1273, - "step": 4160 - }, - { - "epoch": 1.35, - "learning_rate": 1.2067929597263552e-05, - "loss": 0.1201, - "step": 4170 - }, - { - "epoch": 1.36, - "learning_rate": 1.2033665978562973e-05, - "loss": 0.1197, - "step": 4180 - }, - { - "epoch": 1.36, - "learning_rate": 1.1999377436490682e-05, - "loss": 0.1126, - "step": 4190 - }, - { - "epoch": 1.36, - "learning_rate": 1.1965064391266158e-05, - "loss": 0.1264, - "step": 4200 - }, - { - "epoch": 1.37, - "learning_rate": 1.1930727263409194e-05, - "loss": 0.1153, - "step": 4210 - }, - { - "epoch": 1.37, - "learning_rate": 1.1896366473734715e-05, - "loss": 0.1085, - "step": 4220 - }, - { - "epoch": 1.37, - "learning_rate": 1.1861982443347633e-05, - "loss": 0.1116, - "step": 4230 - }, - { - "epoch": 1.37, - "learning_rate": 1.1827575593637683e-05, - "loss": 0.1107, - "step": 4240 - }, - { - "epoch": 1.38, - "learning_rate": 1.1793146346274262e-05, - "loss": 0.121, - "step": 4250 - }, - { - "epoch": 1.38, - "learning_rate": 1.1758695123201262e-05, - "loss": 0.1179, - "step": 4260 - }, - { - "epoch": 1.38, - "learning_rate": 1.1724222346631886e-05, - "loss": 0.1118, - "step": 4270 - }, - { - "epoch": 1.39, - "learning_rate": 1.1689728439043495e-05, - "loss": 0.1135, - "step": 4280 - }, - { - "epoch": 1.39, - "learning_rate": 1.1655213823172407e-05, - "loss": 0.1168, - "step": 4290 - }, - { - "epoch": 1.39, - "learning_rate": 1.1620678922008736e-05, - "loss": 0.1076, - "step": 4300 - }, - { - "epoch": 1.4, - "learning_rate": 1.1586124158791205e-05, - "loss": 0.1145, - "step": 4310 - }, - { - "epoch": 1.4, - "learning_rate": 1.1551549957001944e-05, - "loss": 0.1222, - "step": 4320 - }, - { - "epoch": 1.4, - "learning_rate": 1.151695674036131e-05, - "loss": 0.1219, - "step": 4330 - }, - { - "epoch": 1.41, - "learning_rate": 1.1482344932822706e-05, - "loss": 0.1145, - "step": 4340 - }, - { - "epoch": 1.41, - "learning_rate": 1.1447714958567361e-05, - "loss": 0.1201, - "step": 4350 - }, - { - "epoch": 1.41, - "learning_rate": 1.1413067241999153e-05, - "loss": 0.1203, - "step": 4360 - }, - { - "epoch": 1.42, - "learning_rate": 1.1378402207739394e-05, - "loss": 0.1135, - "step": 4370 - }, - { - "epoch": 1.42, - "learning_rate": 1.134372028062163e-05, - "loss": 0.1151, - "step": 4380 - }, - { - "epoch": 1.42, - "learning_rate": 1.1309021885686446e-05, - "loss": 0.1167, - "step": 4390 - }, - { - "epoch": 1.43, - "learning_rate": 1.1274307448176227e-05, - "loss": 0.1125, - "step": 4400 - }, - { - "epoch": 1.43, - "learning_rate": 1.1239577393529988e-05, - "loss": 0.1128, - "step": 4410 - }, - { - "epoch": 1.43, - "learning_rate": 1.1204832147378125e-05, - "loss": 0.1201, - "step": 4420 - }, - { - "epoch": 1.44, - "learning_rate": 1.1170072135537213e-05, - "loss": 0.1081, - "step": 4430 - }, - { - "epoch": 1.44, - "learning_rate": 1.113529778400479e-05, - "loss": 0.1055, - "step": 4440 - }, - { - "epoch": 1.44, - "learning_rate": 1.110050951895413e-05, - "loss": 0.1167, - "step": 4450 - }, - { - "epoch": 1.45, - "learning_rate": 1.1065707766729024e-05, - "loss": 0.1257, - "step": 4460 - }, - { - "epoch": 1.45, - "learning_rate": 1.1030892953838548e-05, - "loss": 0.1137, - "step": 4470 - }, - { - "epoch": 1.45, - "learning_rate": 1.0996065506951854e-05, - "loss": 0.1106, - "step": 4480 - }, - { - "epoch": 1.46, - "learning_rate": 1.0961225852892914e-05, - "loss": 0.111, - "step": 4490 - }, - { - "epoch": 1.46, - "learning_rate": 1.0926374418635317e-05, - "loss": 0.107, - "step": 4500 - }, - { - "epoch": 1.46, - "learning_rate": 1.0891511631297009e-05, - "loss": 0.117, - "step": 4510 - }, - { - "epoch": 1.47, - "learning_rate": 1.0856637918135087e-05, - "loss": 0.1237, - "step": 4520 - }, - { - "epoch": 1.47, - "learning_rate": 1.0821753706540539e-05, - "loss": 0.1168, - "step": 4530 - }, - { - "epoch": 1.47, - "learning_rate": 1.0786859424033014e-05, - "loss": 0.1055, - "step": 4540 - }, - { - "epoch": 1.48, - "learning_rate": 1.0751955498255595e-05, - "loss": 0.1207, - "step": 4550 - }, - { - "epoch": 1.48, - "learning_rate": 1.0717042356969529e-05, - "loss": 0.1104, - "step": 4560 - }, - { - "epoch": 1.48, - "learning_rate": 1.0682120428049025e-05, - "loss": 0.1231, - "step": 4570 - }, - { - "epoch": 1.49, - "learning_rate": 1.0647190139475967e-05, - "loss": 0.1176, - "step": 4580 - }, - { - "epoch": 1.49, - "learning_rate": 1.0612251919334703e-05, - "loss": 0.1168, - "step": 4590 - }, - { - "epoch": 1.49, - "learning_rate": 1.057730619580678e-05, - "loss": 0.1098, - "step": 4600 - }, - { - "epoch": 1.49, - "learning_rate": 1.0542353397165706e-05, - "loss": 0.1119, - "step": 4610 - }, - { - "epoch": 1.5, - "learning_rate": 1.0507393951771695e-05, - "loss": 0.111, - "step": 4620 - }, - { - "epoch": 1.5, - "learning_rate": 1.0472428288066413e-05, - "loss": 0.1134, - "step": 4630 - }, - { - "epoch": 1.5, - "learning_rate": 1.043745683456775e-05, - "loss": 0.1146, - "step": 4640 - }, - { - "epoch": 1.51, - "learning_rate": 1.040248001986453e-05, - "loss": 0.1133, - "step": 4650 - }, - { - "epoch": 1.51, - "learning_rate": 1.0367498272611303e-05, - "loss": 0.1121, - "step": 4660 - }, - { - "epoch": 1.51, - "learning_rate": 1.0332512021523054e-05, - "loss": 0.1174, - "step": 4670 - }, - { - "epoch": 1.52, - "learning_rate": 1.0297521695369974e-05, - "loss": 0.1161, - "step": 4680 - }, - { - "epoch": 1.52, - "learning_rate": 1.0262527722972185e-05, - "loss": 0.1004, - "step": 4690 - }, - { - "epoch": 1.52, - "learning_rate": 1.0227530533194508e-05, - "loss": 0.1155, - "step": 4700 - }, - { - "epoch": 1.53, - "learning_rate": 1.0192530554941177e-05, - "loss": 0.1261, - "step": 4710 - }, - { - "epoch": 1.53, - "learning_rate": 1.0157528217150624e-05, - "loss": 0.1201, - "step": 4720 - }, - { - "epoch": 1.53, - "learning_rate": 1.0122523948790174e-05, - "loss": 0.1192, - "step": 4730 - }, - { - "epoch": 1.54, - "learning_rate": 1.0087518178850824e-05, - "loss": 0.1115, - "step": 4740 - }, - { - "epoch": 1.54, - "learning_rate": 1.005251133634198e-05, - "loss": 0.1127, - "step": 4750 - }, - { - "epoch": 1.54, - "learning_rate": 1.0017503850286167e-05, - "loss": 0.1117, - "step": 4760 - }, - { - "epoch": 1.55, - "learning_rate": 9.982496149713835e-06, - "loss": 0.1112, - "step": 4770 - }, - { - "epoch": 1.55, - "learning_rate": 9.947488663658027e-06, - "loss": 0.1084, - "step": 4780 - }, - { - "epoch": 1.55, - "learning_rate": 9.912481821149176e-06, - "loss": 0.1109, - "step": 4790 - }, - { - "epoch": 1.56, - "learning_rate": 9.877476051209827e-06, - "loss": 0.1051, - "step": 4800 - }, - { - "epoch": 1.56, - "learning_rate": 9.842471782849381e-06, - "loss": 0.1187, - "step": 4810 - }, - { - "epoch": 1.56, - "learning_rate": 9.807469445058824e-06, - "loss": 0.1246, - "step": 4820 - }, - { - "epoch": 1.57, - "learning_rate": 9.772469466805499e-06, - "loss": 0.1111, - "step": 4830 - }, - { - "epoch": 1.57, - "learning_rate": 9.737472277027817e-06, - "loss": 0.112, - "step": 4840 - }, - { - "epoch": 1.57, - "learning_rate": 9.702478304630028e-06, - "loss": 0.112, - "step": 4850 - }, - { - "epoch": 1.58, - "learning_rate": 9.66748797847695e-06, - "loss": 0.115, - "step": 4860 - }, - { - "epoch": 1.58, - "learning_rate": 9.6325017273887e-06, - "loss": 0.1166, - "step": 4870 - }, - { - "epoch": 1.58, - "learning_rate": 9.597519980135472e-06, - "loss": 0.1186, - "step": 4880 - }, - { - "epoch": 1.59, - "learning_rate": 9.562543165432255e-06, - "loss": 0.1185, - "step": 4890 - }, - { - "epoch": 1.59, - "learning_rate": 9.52757171193359e-06, - "loss": 0.1133, - "step": 4900 - }, - { - "epoch": 1.59, - "learning_rate": 9.49260604822831e-06, - "loss": 0.1193, - "step": 4910 - }, - { - "epoch": 1.6, - "learning_rate": 9.457646602834295e-06, - "loss": 0.1076, - "step": 4920 - }, - { - "epoch": 1.6, - "learning_rate": 9.42269380419322e-06, - "loss": 0.1147, - "step": 4930 - }, - { - "epoch": 1.6, - "learning_rate": 9.387748080665298e-06, - "loss": 0.1067, - "step": 4940 - }, - { - "epoch": 1.61, - "learning_rate": 9.352809860524037e-06, - "loss": 0.1146, - "step": 4950 - }, - { - "epoch": 1.61, - "learning_rate": 9.31787957195098e-06, - "loss": 0.1094, - "step": 4960 - }, - { - "epoch": 1.61, - "learning_rate": 9.28295764303047e-06, - "loss": 0.1011, - "step": 4970 - }, - { - "epoch": 1.61, - "learning_rate": 9.248044501744409e-06, - "loss": 0.1108, - "step": 4980 - }, - { - "epoch": 1.62, - "learning_rate": 9.21314057596699e-06, - "loss": 0.1108, - "step": 4990 - }, - { - "epoch": 1.62, - "learning_rate": 9.178246293459466e-06, - "loss": 0.1078, - "step": 5000 - }, - { - "epoch": 1.62, - "learning_rate": 9.143362081864917e-06, - "loss": 0.1123, - "step": 5010 - }, - { - "epoch": 1.63, - "learning_rate": 9.108488368702991e-06, - "loss": 0.1079, - "step": 5020 - }, - { - "epoch": 1.63, - "learning_rate": 9.073625581364686e-06, - "loss": 0.1053, - "step": 5030 - }, - { - "epoch": 1.63, - "learning_rate": 9.03877414710709e-06, - "loss": 0.1116, - "step": 5040 - }, - { - "epoch": 1.64, - "learning_rate": 9.00393449304815e-06, - "loss": 0.1056, - "step": 5050 - }, - { - "epoch": 1.64, - "learning_rate": 8.969107046161452e-06, - "loss": 0.1082, - "step": 5060 - }, - { - "epoch": 1.64, - "learning_rate": 8.93429223327098e-06, - "loss": 0.1106, - "step": 5070 - }, - { - "epoch": 1.65, - "learning_rate": 8.899490481045873e-06, - "loss": 0.1157, - "step": 5080 - }, - { - "epoch": 1.65, - "learning_rate": 8.864702215995213e-06, - "loss": 0.1134, - "step": 5090 - }, - { - "epoch": 1.65, - "learning_rate": 8.82992786446279e-06, - "loss": 0.111, - "step": 5100 - }, - { - "epoch": 1.66, - "learning_rate": 8.795167852621877e-06, - "loss": 0.1267, - "step": 5110 - }, - { - "epoch": 1.66, - "learning_rate": 8.760422606470015e-06, - "loss": 0.1096, - "step": 5120 - }, - { - "epoch": 1.66, - "learning_rate": 8.725692551823776e-06, - "loss": 0.111, - "step": 5130 - }, - { - "epoch": 1.67, - "learning_rate": 8.69097811431356e-06, - "loss": 0.1127, - "step": 5140 - }, - { - "epoch": 1.67, - "learning_rate": 8.65627971937837e-06, - "loss": 0.1062, - "step": 5150 - }, - { - "epoch": 1.67, - "learning_rate": 8.621597792260608e-06, - "loss": 0.1128, - "step": 5160 - }, - { - "epoch": 1.68, - "learning_rate": 8.58693275800085e-06, - "loss": 0.1089, - "step": 5170 - }, - { - "epoch": 1.68, - "learning_rate": 8.55228504143264e-06, - "loss": 0.1109, - "step": 5180 - }, - { - "epoch": 1.68, - "learning_rate": 8.517655067177295e-06, - "loss": 0.1125, - "step": 5190 - }, - { - "epoch": 1.69, - "learning_rate": 8.48304325963869e-06, - "loss": 0.1176, - "step": 5200 - }, - { - "epoch": 1.69, - "learning_rate": 8.44845004299806e-06, - "loss": 0.1101, - "step": 5210 - }, - { - "epoch": 1.69, - "learning_rate": 8.413875841208797e-06, - "loss": 0.1122, - "step": 5220 - }, - { - "epoch": 1.7, - "learning_rate": 8.379321077991265e-06, - "loss": 0.1115, - "step": 5230 - }, - { - "epoch": 1.7, - "learning_rate": 8.344786176827594e-06, - "loss": 0.1139, - "step": 5240 - }, - { - "epoch": 1.7, - "learning_rate": 8.310271560956509e-06, - "loss": 0.1117, - "step": 5250 - }, - { - "epoch": 1.71, - "learning_rate": 8.275777653368119e-06, - "loss": 0.1073, - "step": 5260 - }, - { - "epoch": 1.71, - "learning_rate": 8.241304876798742e-06, - "loss": 0.1193, - "step": 5270 - }, - { - "epoch": 1.71, - "learning_rate": 8.20685365372574e-06, - "loss": 0.1171, - "step": 5280 - }, - { - "epoch": 1.72, - "learning_rate": 8.172424406362319e-06, - "loss": 0.1189, - "step": 5290 - }, - { - "epoch": 1.72, - "learning_rate": 8.13801755665237e-06, - "loss": 0.1183, - "step": 5300 - }, - { - "epoch": 1.72, - "learning_rate": 8.103633526265289e-06, - "loss": 0.1169, - "step": 5310 - }, - { - "epoch": 1.73, - "learning_rate": 8.069272736590809e-06, - "loss": 0.1044, - "step": 5320 - }, - { - "epoch": 1.73, - "learning_rate": 8.034935608733843e-06, - "loss": 0.1128, - "step": 5330 - }, - { - "epoch": 1.73, - "learning_rate": 8.00062256350932e-06, - "loss": 0.1086, - "step": 5340 - }, - { - "epoch": 1.73, - "learning_rate": 7.966334021437028e-06, - "loss": 0.1181, - "step": 5350 - }, - { - "epoch": 1.74, - "learning_rate": 7.932070402736451e-06, - "loss": 0.1153, - "step": 5360 - }, - { - "epoch": 1.74, - "learning_rate": 7.897832127321639e-06, - "loss": 0.1158, - "step": 5370 - }, - { - "epoch": 1.74, - "learning_rate": 7.863619614796035e-06, - "loss": 0.1068, - "step": 5380 - }, - { - "epoch": 1.75, - "learning_rate": 7.829433284447367e-06, - "loss": 0.1138, - "step": 5390 - }, - { - "epoch": 1.75, - "learning_rate": 7.795273555242476e-06, - "loss": 0.1123, - "step": 5400 - }, - { - "epoch": 1.75, - "learning_rate": 7.761140845822199e-06, - "loss": 0.1093, - "step": 5410 - }, - { - "epoch": 1.76, - "learning_rate": 7.727035574496234e-06, - "loss": 0.1094, - "step": 5420 - }, - { - "epoch": 1.76, - "learning_rate": 7.69295815923802e-06, - "loss": 0.113, - "step": 5430 - }, - { - "epoch": 1.76, - "learning_rate": 7.658909017679604e-06, - "loss": 0.1124, - "step": 5440 - }, - { - "epoch": 1.77, - "learning_rate": 7.6248885671065264e-06, - "loss": 0.1058, - "step": 5450 - }, - { - "epoch": 1.77, - "learning_rate": 7.590897224452716e-06, - "loss": 0.1107, - "step": 5460 - }, - { - "epoch": 1.77, - "learning_rate": 7.556935406295356e-06, - "loss": 0.106, - "step": 5470 - }, - { - "epoch": 1.78, - "learning_rate": 7.5230035288498204e-06, - "loss": 0.115, - "step": 5480 - }, - { - "epoch": 1.78, - "learning_rate": 7.4891020079645285e-06, - "loss": 0.1082, - "step": 5490 - }, - { - "epoch": 1.78, - "learning_rate": 7.455231259115872e-06, - "loss": 0.1146, - "step": 5500 - }, - { - "epoch": 1.79, - "learning_rate": 7.421391697403122e-06, - "loss": 0.1126, - "step": 5510 - }, - { - "epoch": 1.79, - "learning_rate": 7.3875837375433445e-06, - "loss": 0.1119, - "step": 5520 - }, - { - "epoch": 1.79, - "learning_rate": 7.353807793866299e-06, - "loss": 0.1081, - "step": 5530 - }, - { - "epoch": 1.8, - "learning_rate": 7.3200642803093835e-06, - "loss": 0.1127, - "step": 5540 - }, - { - "epoch": 1.8, - "learning_rate": 7.286353610412553e-06, - "loss": 0.1146, - "step": 5550 - }, - { - "epoch": 1.8, - "learning_rate": 7.2526761973132395e-06, - "loss": 0.1079, - "step": 5560 - }, - { - "epoch": 1.81, - "learning_rate": 7.2190324537413196e-06, - "loss": 0.1059, - "step": 5570 - }, - { - "epoch": 1.81, - "learning_rate": 7.185422792014019e-06, - "loss": 0.1072, - "step": 5580 - }, - { - "epoch": 1.81, - "learning_rate": 7.151847624030882e-06, - "loss": 0.1123, - "step": 5590 - }, - { - "epoch": 1.82, - "learning_rate": 7.118307361268721e-06, - "loss": 0.108, - "step": 5600 - }, - { - "epoch": 1.82, - "learning_rate": 7.084802414776575e-06, - "loss": 0.1056, - "step": 5610 - }, - { - "epoch": 1.82, - "learning_rate": 7.051333195170658e-06, - "loss": 0.099, - "step": 5620 - }, - { - "epoch": 1.83, - "learning_rate": 7.0179001126293435e-06, - "loss": 0.1123, - "step": 5630 - }, - { - "epoch": 1.83, - "learning_rate": 6.9845035768881285e-06, - "loss": 0.1089, - "step": 5640 - }, - { - "epoch": 1.83, - "learning_rate": 6.951143997234622e-06, - "loss": 0.1123, - "step": 5650 - }, - { - "epoch": 1.84, - "learning_rate": 6.917821782503513e-06, - "loss": 0.1081, - "step": 5660 - }, - { - "epoch": 1.84, - "learning_rate": 6.884537341071571e-06, - "loss": 0.1112, - "step": 5670 - }, - { - "epoch": 1.84, - "learning_rate": 6.85129108085264e-06, - "loss": 0.1064, - "step": 5680 - }, - { - "epoch": 1.85, - "learning_rate": 6.818083409292634e-06, - "loss": 0.1145, - "step": 5690 - }, - { - "epoch": 1.85, - "learning_rate": 6.784914733364563e-06, - "loss": 0.1083, - "step": 5700 - }, - { - "epoch": 1.85, - "learning_rate": 6.751785459563509e-06, - "loss": 0.119, - "step": 5710 - }, - { - "epoch": 1.85, - "learning_rate": 6.718695993901678e-06, - "loss": 0.1134, - "step": 5720 - }, - { - "epoch": 1.86, - "learning_rate": 6.685646741903411e-06, - "loss": 0.1154, - "step": 5730 - }, - { - "epoch": 1.86, - "learning_rate": 6.652638108600215e-06, - "loss": 0.1128, - "step": 5740 - }, - { - "epoch": 1.86, - "learning_rate": 6.619670498525796e-06, - "loss": 0.1043, - "step": 5750 - }, - { - "epoch": 1.87, - "learning_rate": 6.586744315711102e-06, - "loss": 0.1103, - "step": 5760 - }, - { - "epoch": 1.87, - "learning_rate": 6.5538599636793846e-06, - "loss": 0.1063, - "step": 5770 - }, - { - "epoch": 1.87, - "learning_rate": 6.521017845441225e-06, - "loss": 0.1125, - "step": 5780 - }, - { - "epoch": 1.88, - "learning_rate": 6.488218363489633e-06, - "loss": 0.105, - "step": 5790 - }, - { - "epoch": 1.88, - "learning_rate": 6.455461919795079e-06, - "loss": 0.1096, - "step": 5800 - }, - { - "epoch": 1.88, - "learning_rate": 6.422748915800592e-06, - "loss": 0.1126, - "step": 5810 - }, - { - "epoch": 1.89, - "learning_rate": 6.39007975241682e-06, - "loss": 0.1078, - "step": 5820 - }, - { - "epoch": 1.89, - "learning_rate": 6.357454830017143e-06, - "loss": 0.1161, - "step": 5830 - }, - { - "epoch": 1.89, - "learning_rate": 6.324874548432734e-06, - "loss": 0.1121, - "step": 5840 - }, - { - "epoch": 1.9, - "learning_rate": 6.292339306947685e-06, - "loss": 0.1067, - "step": 5850 - }, - { - "epoch": 1.9, - "learning_rate": 6.259849504294102e-06, - "loss": 0.1119, - "step": 5860 - }, - { - "epoch": 1.9, - "learning_rate": 6.227405538647213e-06, - "loss": 0.1046, - "step": 5870 - }, - { - "epoch": 1.91, - "learning_rate": 6.195007807620514e-06, - "loss": 0.1049, - "step": 5880 - }, - { - "epoch": 1.91, - "learning_rate": 6.16265670826086e-06, - "loss": 0.1111, - "step": 5890 - }, - { - "epoch": 1.91, - "learning_rate": 6.130352637043622e-06, - "loss": 0.0993, - "step": 5900 - }, - { - "epoch": 1.92, - "learning_rate": 6.098095989867822e-06, - "loss": 0.1073, - "step": 5910 - }, - { - "epoch": 1.92, - "learning_rate": 6.065887162051291e-06, - "loss": 0.1219, - "step": 5920 - }, - { - "epoch": 1.92, - "learning_rate": 6.033726548325798e-06, - "loss": 0.1139, - "step": 5930 - }, - { - "epoch": 1.93, - "learning_rate": 6.0016145428322445e-06, - "loss": 0.1108, - "step": 5940 - }, - { - "epoch": 1.93, - "learning_rate": 5.969551539115814e-06, - "loss": 0.1118, - "step": 5950 - }, - { - "epoch": 1.93, - "learning_rate": 5.937537930121145e-06, - "loss": 0.1002, - "step": 5960 - }, - { - "epoch": 1.94, - "learning_rate": 5.905574108187544e-06, - "loss": 0.1038, - "step": 5970 - }, - { - "epoch": 1.94, - "learning_rate": 5.873660465044141e-06, - "loss": 0.1023, - "step": 5980 - }, - { - "epoch": 1.94, - "learning_rate": 5.841797391805113e-06, - "loss": 0.1099, - "step": 5990 - }, - { - "epoch": 1.95, - "learning_rate": 5.809985278964875e-06, - "loss": 0.104, - "step": 6000 - }, - { - "epoch": 1.95, - "learning_rate": 5.778224516393312e-06, - "loss": 0.1036, - "step": 6010 - }, - { - "epoch": 1.95, - "learning_rate": 5.746515493330992e-06, - "loss": 0.1053, - "step": 6020 - }, - { - "epoch": 1.96, - "learning_rate": 5.714858598384387e-06, - "loss": 0.1099, - "step": 6030 - }, - { - "epoch": 1.96, - "learning_rate": 5.683254219521117e-06, - "loss": 0.1014, - "step": 6040 - }, - { - "epoch": 1.96, - "learning_rate": 5.651702744065207e-06, - "loss": 0.1054, - "step": 6050 - }, - { - "epoch": 1.96, - "learning_rate": 5.620204558692331e-06, - "loss": 0.102, - "step": 6060 - }, - { - "epoch": 1.97, - "learning_rate": 5.588760049425057e-06, - "loss": 0.1084, - "step": 6070 - }, - { - "epoch": 1.97, - "learning_rate": 5.557369601628142e-06, - "loss": 0.1095, - "step": 6080 - }, - { - "epoch": 1.97, - "learning_rate": 5.5260336000038e-06, - "loss": 0.1104, - "step": 6090 - }, - { - "epoch": 1.98, - "learning_rate": 5.494752428586985e-06, - "loss": 0.1011, - "step": 6100 - }, - { - "epoch": 1.98, - "learning_rate": 5.46352647074068e-06, - "loss": 0.1108, - "step": 6110 - }, - { - "epoch": 1.98, - "learning_rate": 5.4323561091512045e-06, - "loss": 0.1034, - "step": 6120 - }, - { - "epoch": 1.99, - "learning_rate": 5.401241725823536e-06, - "loss": 0.1085, - "step": 6130 - }, - { - "epoch": 1.99, - "learning_rate": 5.370183702076599e-06, - "loss": 0.116, - "step": 6140 - }, - { - "epoch": 1.99, - "learning_rate": 5.33918241853862e-06, - "loss": 0.1138, - "step": 6150 - }, - { - "epoch": 2.0, - "learning_rate": 5.308238255142457e-06, - "loss": 0.112, - "step": 6160 - }, - { - "epoch": 2.0, - "eval_loss": 0.1484375, - "eval_runtime": 6.4637, - "eval_samples_per_second": 19.803, - "eval_steps_per_second": 0.155, - "step": 6168 - }, - { - "epoch": 2.0, - "learning_rate": 5.277351591120926e-06, - "loss": 0.0985, - "step": 6170 - }, - { - "epoch": 2.0, - "learning_rate": 5.246522805002168e-06, - "loss": 0.0714, - "step": 6180 - }, - { - "epoch": 2.01, - "learning_rate": 5.215752274605012e-06, - "loss": 0.0702, - "step": 6190 - }, - { - "epoch": 2.01, - "learning_rate": 5.185040377034347e-06, - "loss": 0.0655, - "step": 6200 - }, - { - "epoch": 2.01, - "learning_rate": 5.1543874886764774e-06, - "loss": 0.0648, - "step": 6210 - }, - { - "epoch": 2.02, - "learning_rate": 5.123793985194536e-06, - "loss": 0.0666, - "step": 6220 - }, - { - "epoch": 2.02, - "learning_rate": 5.093260241523872e-06, - "loss": 0.0688, - "step": 6230 - }, - { - "epoch": 2.02, - "learning_rate": 5.0627866318674544e-06, - "loss": 0.0657, - "step": 6240 - }, - { - "epoch": 2.03, - "learning_rate": 5.032373529691283e-06, - "loss": 0.0696, - "step": 6250 - }, - { - "epoch": 2.03, - "learning_rate": 5.002021307719811e-06, - "loss": 0.0691, - "step": 6260 - }, - { - "epoch": 2.03, - "learning_rate": 4.971730337931391e-06, - "loss": 0.065, - "step": 6270 - }, - { - "epoch": 2.04, - "learning_rate": 4.9415009915537045e-06, - "loss": 0.0648, - "step": 6280 - }, - { - "epoch": 2.04, - "learning_rate": 4.911333639059208e-06, - "loss": 0.0624, - "step": 6290 - }, - { - "epoch": 2.04, - "learning_rate": 4.881228650160598e-06, - "loss": 0.0708, - "step": 6300 - }, - { - "epoch": 2.05, - "learning_rate": 4.85118639380629e-06, - "loss": 0.0691, - "step": 6310 - }, - { - "epoch": 2.05, - "learning_rate": 4.8212072381758744e-06, - "loss": 0.0708, - "step": 6320 - }, - { - "epoch": 2.05, - "learning_rate": 4.791291550675635e-06, - "loss": 0.0716, - "step": 6330 - }, - { - "epoch": 2.06, - "learning_rate": 4.761439697934009e-06, - "loss": 0.0712, - "step": 6340 - }, - { - "epoch": 2.06, - "learning_rate": 4.731652045797134e-06, - "loss": 0.0689, - "step": 6350 - }, - { - "epoch": 2.06, - "learning_rate": 4.701928959324323e-06, - "loss": 0.0662, - "step": 6360 - }, - { - "epoch": 2.07, - "learning_rate": 4.672270802783628e-06, - "loss": 0.0718, - "step": 6370 - }, - { - "epoch": 2.07, - "learning_rate": 4.642677939647356e-06, - "loss": 0.0733, - "step": 6380 - }, - { - "epoch": 2.07, - "learning_rate": 4.6131507325876144e-06, - "loss": 0.0686, - "step": 6390 - }, - { - "epoch": 2.08, - "learning_rate": 4.583689543471863e-06, - "loss": 0.0706, - "step": 6400 - }, - { - "epoch": 2.08, - "learning_rate": 4.5542947333585e-06, - "loss": 0.0649, - "step": 6410 - }, - { - "epoch": 2.08, - "learning_rate": 4.5249666624924195e-06, - "loss": 0.0677, - "step": 6420 - }, - { - "epoch": 2.08, - "learning_rate": 4.495705690300593e-06, - "loss": 0.0675, - "step": 6430 - }, - { - "epoch": 2.09, - "learning_rate": 4.466512175387672e-06, - "loss": 0.0642, - "step": 6440 - }, - { - "epoch": 2.09, - "learning_rate": 4.437386475531601e-06, - "loss": 0.0714, - "step": 6450 - }, - { - "epoch": 2.09, - "learning_rate": 4.408328947679221e-06, - "loss": 0.0693, - "step": 6460 - }, - { - "epoch": 2.1, - "learning_rate": 4.379339947941896e-06, - "loss": 0.0676, - "step": 6470 - }, - { - "epoch": 2.1, - "learning_rate": 4.350419831591147e-06, - "loss": 0.068, - "step": 6480 - }, - { - "epoch": 2.1, - "learning_rate": 4.321568953054316e-06, - "loss": 0.0696, - "step": 6490 - }, - { - "epoch": 2.11, - "learning_rate": 4.2927876659101905e-06, - "loss": 0.0699, - "step": 6500 - }, - { - "epoch": 2.11, - "learning_rate": 4.264076322884708e-06, - "loss": 0.0683, - "step": 6510 - }, - { - "epoch": 2.11, - "learning_rate": 4.2354352758465945e-06, - "loss": 0.0673, - "step": 6520 - }, - { - "epoch": 2.12, - "learning_rate": 4.206864875803086e-06, - "loss": 0.0702, - "step": 6530 - }, - { - "epoch": 2.12, - "learning_rate": 4.178365472895602e-06, - "loss": 0.0692, - "step": 6540 - }, - { - "epoch": 2.12, - "learning_rate": 4.149937416395468e-06, - "loss": 0.0699, - "step": 6550 - }, - { - "epoch": 2.13, - "learning_rate": 4.121581054699636e-06, - "loss": 0.0651, - "step": 6560 - }, - { - "epoch": 2.13, - "learning_rate": 4.093296735326404e-06, - "loss": 0.07, - "step": 6570 - }, - { - "epoch": 2.13, - "learning_rate": 4.065084804911165e-06, - "loss": 0.0689, - "step": 6580 - }, - { - "epoch": 2.14, - "learning_rate": 4.036945609202146e-06, - "loss": 0.0759, - "step": 6590 - }, - { - "epoch": 2.14, - "learning_rate": 4.008879493056212e-06, - "loss": 0.0721, - "step": 6600 - }, - { - "epoch": 2.14, - "learning_rate": 3.98088680043458e-06, - "loss": 0.0662, - "step": 6610 - }, - { - "epoch": 2.15, - "learning_rate": 3.95296787439864e-06, - "loss": 0.0758, - "step": 6620 - }, - { - "epoch": 2.15, - "learning_rate": 3.9251230571057495e-06, - "loss": 0.0663, - "step": 6630 - }, - { - "epoch": 2.15, - "learning_rate": 3.897352689805036e-06, - "loss": 0.069, - "step": 6640 - }, - { - "epoch": 2.16, - "learning_rate": 3.869657112833206e-06, - "loss": 0.069, - "step": 6650 - }, - { - "epoch": 2.16, - "learning_rate": 3.842036665610379e-06, - "loss": 0.0711, - "step": 6660 - }, - { - "epoch": 2.16, - "learning_rate": 3.814491686635943e-06, - "loss": 0.0671, - "step": 6670 - }, - { - "epoch": 2.17, - "learning_rate": 3.7870225134843776e-06, - "loss": 0.0706, - "step": 6680 - }, - { - "epoch": 2.17, - "learning_rate": 3.7596294828011483e-06, - "loss": 0.0685, - "step": 6690 - }, - { - "epoch": 2.17, - "learning_rate": 3.7323129302985485e-06, - "loss": 0.0659, - "step": 6700 - }, - { - "epoch": 2.18, - "learning_rate": 3.705073190751617e-06, - "loss": 0.0664, - "step": 6710 - }, - { - "epoch": 2.18, - "learning_rate": 3.6779105979940056e-06, - "loss": 0.0702, - "step": 6720 - }, - { - "epoch": 2.18, - "learning_rate": 3.650825484913916e-06, - "loss": 0.0657, - "step": 6730 - }, - { - "epoch": 2.19, - "learning_rate": 3.623818183449992e-06, - "loss": 0.0666, - "step": 6740 - }, - { - "epoch": 2.19, - "learning_rate": 3.59688902458728e-06, - "loss": 0.0668, - "step": 6750 - }, - { - "epoch": 2.19, - "learning_rate": 3.5700383383531467e-06, - "loss": 0.0643, - "step": 6760 - }, - { - "epoch": 2.2, - "learning_rate": 3.5432664538132446e-06, - "loss": 0.0618, - "step": 6770 - }, - { - "epoch": 2.2, - "learning_rate": 3.516573699067499e-06, - "loss": 0.0685, - "step": 6780 - }, - { - "epoch": 2.2, - "learning_rate": 3.48996040124605e-06, - "loss": 0.0664, - "step": 6790 - }, - { - "epoch": 2.2, - "learning_rate": 3.463426886505268e-06, - "loss": 0.0704, - "step": 6800 - }, - { - "epoch": 2.21, - "learning_rate": 3.436973480023743e-06, - "loss": 0.0671, - "step": 6810 - }, - { - "epoch": 2.21, - "learning_rate": 3.4106005059983283e-06, - "loss": 0.068, - "step": 6820 - }, - { - "epoch": 2.21, - "learning_rate": 3.3843082876401265e-06, - "loss": 0.0685, - "step": 6830 - }, - { - "epoch": 2.22, - "learning_rate": 3.3580971471705492e-06, - "loss": 0.0677, - "step": 6840 - }, - { - "epoch": 2.22, - "learning_rate": 3.331967405817379e-06, - "loss": 0.07, - "step": 6850 - }, - { - "epoch": 2.22, - "learning_rate": 3.3059193838108037e-06, - "loss": 0.0684, - "step": 6860 - }, - { - "epoch": 2.23, - "learning_rate": 3.2799534003795274e-06, - "loss": 0.0677, - "step": 6870 - }, - { - "epoch": 2.23, - "learning_rate": 3.254069773746822e-06, - "loss": 0.0719, - "step": 6880 - }, - { - "epoch": 2.23, - "learning_rate": 3.2282688211266568e-06, - "loss": 0.0706, - "step": 6890 - }, - { - "epoch": 2.24, - "learning_rate": 3.2025508587197907e-06, - "loss": 0.067, - "step": 6900 - }, - { - "epoch": 2.24, - "learning_rate": 3.176916201709912e-06, - "loss": 0.069, - "step": 6910 - }, - { - "epoch": 2.24, - "learning_rate": 3.1513651642597607e-06, - "loss": 0.065, - "step": 6920 - }, - { - "epoch": 2.25, - "learning_rate": 3.1258980595072976e-06, - "loss": 0.0708, - "step": 6930 - }, - { - "epoch": 2.25, - "learning_rate": 3.1005151995618454e-06, - "loss": 0.0684, - "step": 6940 - }, - { - "epoch": 2.25, - "learning_rate": 3.0752168955002735e-06, - "loss": 0.068, - "step": 6950 - }, - { - "epoch": 2.26, - "learning_rate": 3.0500034573631943e-06, - "loss": 0.0661, - "step": 6960 - }, - { - "epoch": 2.26, - "learning_rate": 3.024875194151151e-06, - "loss": 0.0669, - "step": 6970 - }, - { - "epoch": 2.26, - "learning_rate": 2.9998324138208336e-06, - "loss": 0.0672, - "step": 6980 - }, - { - "epoch": 2.27, - "learning_rate": 2.974875423281299e-06, - "loss": 0.0702, - "step": 6990 - }, - { - "epoch": 2.27, - "learning_rate": 2.950004528390238e-06, - "loss": 0.0674, - "step": 7000 - }, - { - "epoch": 2.27, - "learning_rate": 2.9252200339501847e-06, - "loss": 0.0687, - "step": 7010 - }, - { - "epoch": 2.28, - "learning_rate": 2.9005222437048054e-06, - "loss": 0.0689, - "step": 7020 - }, - { - "epoch": 2.28, - "learning_rate": 2.8759114603351836e-06, - "loss": 0.0695, - "step": 7030 - }, - { - "epoch": 2.28, - "learning_rate": 2.8513879854560856e-06, - "loss": 0.0667, - "step": 7040 - }, - { - "epoch": 2.29, - "learning_rate": 2.8269521196122907e-06, - "loss": 0.072, - "step": 7050 - }, - { - "epoch": 2.29, - "learning_rate": 2.8026041622748822e-06, - "loss": 0.0665, - "step": 7060 - }, - { - "epoch": 2.29, - "learning_rate": 2.7783444118376046e-06, - "loss": 0.0633, - "step": 7070 - }, - { - "epoch": 2.3, - "learning_rate": 2.754173165613179e-06, - "loss": 0.0663, - "step": 7080 - }, - { - "epoch": 2.3, - "learning_rate": 2.730090719829682e-06, - "loss": 0.0682, - "step": 7090 - }, - { - "epoch": 2.3, - "learning_rate": 2.7060973696269e-06, - "loss": 0.0688, - "step": 7100 - }, - { - "epoch": 2.31, - "learning_rate": 2.6821934090527245e-06, - "loss": 0.0683, - "step": 7110 - }, - { - "epoch": 2.31, - "learning_rate": 2.6583791310595376e-06, - "loss": 0.0666, - "step": 7120 - }, - { - "epoch": 2.31, - "learning_rate": 2.6346548275006232e-06, - "loss": 0.0664, - "step": 7130 - }, - { - "epoch": 2.32, - "learning_rate": 2.6110207891266013e-06, - "loss": 0.0667, - "step": 7140 - }, - { - "epoch": 2.32, - "learning_rate": 2.5874773055818557e-06, - "loss": 0.0663, - "step": 7150 - }, - { - "epoch": 2.32, - "learning_rate": 2.564024665400978e-06, - "loss": 0.0671, - "step": 7160 - }, - { - "epoch": 2.32, - "learning_rate": 2.5406631560052396e-06, - "loss": 0.0703, - "step": 7170 - }, - { - "epoch": 2.33, - "learning_rate": 2.517393063699084e-06, - "loss": 0.0665, - "step": 7180 - }, - { - "epoch": 2.33, - "learning_rate": 2.4942146736665827e-06, - "loss": 0.0667, - "step": 7190 - }, - { - "epoch": 2.33, - "learning_rate": 2.4711282699679718e-06, - "loss": 0.0665, - "step": 7200 - }, - { - "epoch": 2.34, - "learning_rate": 2.4481341355361487e-06, - "loss": 0.0656, - "step": 7210 - }, - { - "epoch": 2.34, - "learning_rate": 2.4252325521732267e-06, - "loss": 0.0676, - "step": 7220 - }, - { - "epoch": 2.34, - "learning_rate": 2.402423800547067e-06, - "loss": 0.0703, - "step": 7230 - }, - { - "epoch": 2.35, - "learning_rate": 2.3797081601878315e-06, - "loss": 0.0702, - "step": 7240 - }, - { - "epoch": 2.35, - "learning_rate": 2.3570859094845823e-06, - "loss": 0.0661, - "step": 7250 - }, - { - "epoch": 2.35, - "learning_rate": 2.33455732568184e-06, - "loss": 0.0714, - "step": 7260 - }, - { - "epoch": 2.36, - "learning_rate": 2.3121226848762124e-06, - "loss": 0.0687, - "step": 7270 - }, - { - "epoch": 2.36, - "learning_rate": 2.2897822620129904e-06, - "loss": 0.0669, - "step": 7280 - }, - { - "epoch": 2.36, - "learning_rate": 2.267536330882797e-06, - "loss": 0.0683, - "step": 7290 - }, - { - "epoch": 2.37, - "learning_rate": 2.2453851641182124e-06, - "loss": 0.0663, - "step": 7300 - }, - { - "epoch": 2.37, - "learning_rate": 2.2233290331904432e-06, - "loss": 0.0669, - "step": 7310 - }, - { - "epoch": 2.37, - "learning_rate": 2.2013682084060008e-06, - "loss": 0.0673, - "step": 7320 - }, - { - "epoch": 2.38, - "learning_rate": 2.1795029589033835e-06, - "loss": 0.061, - "step": 7330 - }, - { - "epoch": 2.38, - "learning_rate": 2.1577335526497677e-06, - "loss": 0.065, - "step": 7340 - }, - { - "epoch": 2.38, - "learning_rate": 2.1360602564377386e-06, - "loss": 0.0653, - "step": 7350 - }, - { - "epoch": 2.39, - "learning_rate": 2.114483335882017e-06, - "loss": 0.0653, - "step": 7360 - }, - { - "epoch": 2.39, - "learning_rate": 2.093003055416204e-06, - "loss": 0.0671, - "step": 7370 - }, - { - "epoch": 2.39, - "learning_rate": 2.0716196782895326e-06, - "loss": 0.0707, - "step": 7380 - }, - { - "epoch": 2.4, - "learning_rate": 2.050333466563643e-06, - "loss": 0.0639, - "step": 7390 - }, - { - "epoch": 2.4, - "learning_rate": 2.0291446811093964e-06, - "loss": 0.0665, - "step": 7400 - }, - { - "epoch": 2.4, - "learning_rate": 2.0080535816036363e-06, - "loss": 0.0657, - "step": 7410 - }, - { - "epoch": 2.41, - "learning_rate": 1.987060426526033e-06, - "loss": 0.0671, - "step": 7420 - }, - { - "epoch": 2.41, - "learning_rate": 1.9661654731559086e-06, - "loss": 0.0618, - "step": 7430 - }, - { - "epoch": 2.41, - "learning_rate": 1.945368977569089e-06, - "loss": 0.0697, - "step": 7440 - }, - { - "epoch": 2.42, - "learning_rate": 1.924671194634761e-06, - "loss": 0.0662, - "step": 7450 - }, - { - "epoch": 2.42, - "learning_rate": 1.9040723780123416e-06, - "loss": 0.0629, - "step": 7460 - }, - { - "epoch": 2.42, - "learning_rate": 1.8835727801483894e-06, - "loss": 0.0706, - "step": 7470 - }, - { - "epoch": 2.43, - "learning_rate": 1.863172652273485e-06, - "loss": 0.065, - "step": 7480 - }, - { - "epoch": 2.43, - "learning_rate": 1.8428722443991764e-06, - "loss": 0.0634, - "step": 7490 - }, - { - "epoch": 2.43, - "learning_rate": 1.8226718053148951e-06, - "loss": 0.0637, - "step": 7500 - }, - { - "epoch": 2.44, - "learning_rate": 1.8025715825849266e-06, - "loss": 0.0683, - "step": 7510 - }, - { - "epoch": 2.44, - "learning_rate": 1.7825718225453547e-06, - "loss": 0.0688, - "step": 7520 - }, - { - "epoch": 2.44, - "learning_rate": 1.762672770301057e-06, - "loss": 0.0666, - "step": 7530 - }, - { - "epoch": 2.44, - "learning_rate": 1.742874669722703e-06, - "loss": 0.0679, - "step": 7540 - }, - { - "epoch": 2.45, - "learning_rate": 1.7231777634437563e-06, - "loss": 0.0645, - "step": 7550 - }, - { - "epoch": 2.45, - "learning_rate": 1.703582292857503e-06, - "loss": 0.0667, - "step": 7560 - }, - { - "epoch": 2.45, - "learning_rate": 1.6840884981140948e-06, - "loss": 0.0674, - "step": 7570 - }, - { - "epoch": 2.46, - "learning_rate": 1.6646966181176117e-06, - "loss": 0.0671, - "step": 7580 - }, - { - "epoch": 2.46, - "learning_rate": 1.6454068905231258e-06, - "loss": 0.0687, - "step": 7590 - }, - { - "epoch": 2.46, - "learning_rate": 1.6262195517337887e-06, - "loss": 0.0637, - "step": 7600 - }, - { - "epoch": 2.47, - "learning_rate": 1.6071348368979377e-06, - "loss": 0.0636, - "step": 7610 - }, - { - "epoch": 2.47, - "learning_rate": 1.5881529799062167e-06, - "loss": 0.0675, - "step": 7620 - }, - { - "epoch": 2.47, - "learning_rate": 1.5692742133887095e-06, - "loss": 0.0637, - "step": 7630 - }, - { - "epoch": 2.48, - "learning_rate": 1.550498768712073e-06, - "loss": 0.065, - "step": 7640 - }, - { - "epoch": 2.48, - "learning_rate": 1.5318268759767307e-06, - "loss": 0.0623, - "step": 7650 - }, - { - "epoch": 2.48, - "learning_rate": 1.5132587640140227e-06, - "loss": 0.0681, - "step": 7660 - }, - { - "epoch": 2.49, - "learning_rate": 1.494794660383425e-06, - "loss": 0.0692, - "step": 7670 - }, - { - "epoch": 2.49, - "learning_rate": 1.4764347913697441e-06, - "loss": 0.0678, - "step": 7680 - }, - { - "epoch": 2.49, - "learning_rate": 1.4581793819803559e-06, - "loss": 0.0617, - "step": 7690 - }, - { - "epoch": 2.5, - "learning_rate": 1.4400286559424392e-06, - "loss": 0.0675, - "step": 7700 - }, - { - "epoch": 2.5, - "learning_rate": 1.4219828357002351e-06, - "loss": 0.0681, - "step": 7710 - }, - { - "epoch": 2.5, - "learning_rate": 1.4040421424123308e-06, - "loss": 0.0651, - "step": 7720 - }, - { - "epoch": 2.51, - "learning_rate": 1.3862067959489377e-06, - "loss": 0.0666, - "step": 7730 - }, - { - "epoch": 2.51, - "learning_rate": 1.368477014889199e-06, - "loss": 0.0637, - "step": 7740 - }, - { - "epoch": 2.51, - "learning_rate": 1.3508530165185096e-06, - "loss": 0.0677, - "step": 7750 - }, - { - "epoch": 2.52, - "learning_rate": 1.3333350168258651e-06, - "loss": 0.0639, - "step": 7760 - }, - { - "epoch": 2.52, - "learning_rate": 1.3159232305012027e-06, - "loss": 0.064, - "step": 7770 - }, - { - "epoch": 2.52, - "learning_rate": 1.298617870932769e-06, - "loss": 0.0643, - "step": 7780 - }, - { - "epoch": 2.53, - "learning_rate": 1.2814191502045093e-06, - "loss": 0.0655, - "step": 7790 - }, - { - "epoch": 2.53, - "learning_rate": 1.2643272790934735e-06, - "loss": 0.0666, - "step": 7800 - }, - { - "epoch": 2.53, - "learning_rate": 1.2473424670672264e-06, - "loss": 0.0645, - "step": 7810 - }, - { - "epoch": 2.54, - "learning_rate": 1.2304649222812792e-06, - "loss": 0.0662, - "step": 7820 - }, - { - "epoch": 2.54, - "learning_rate": 1.2136948515765402e-06, - "loss": 0.0688, - "step": 7830 - }, - { - "epoch": 2.54, - "learning_rate": 1.1970324604767836e-06, - "loss": 0.0655, - "step": 7840 - }, - { - "epoch": 2.55, - "learning_rate": 1.180477953186131e-06, - "loss": 0.0646, - "step": 7850 - }, - { - "epoch": 2.55, - "learning_rate": 1.1640315325865358e-06, - "loss": 0.069, - "step": 7860 - }, - { - "epoch": 2.55, - "learning_rate": 1.1476934002353191e-06, - "loss": 0.0635, - "step": 7870 - }, - { - "epoch": 2.56, - "learning_rate": 1.1314637563626774e-06, - "loss": 0.0638, - "step": 7880 - }, - { - "epoch": 2.56, - "learning_rate": 1.1153427998692401e-06, - "loss": 0.0656, - "step": 7890 - }, - { - "epoch": 2.56, - "learning_rate": 1.0993307283236355e-06, - "loss": 0.0647, - "step": 7900 - }, - { - "epoch": 2.56, - "learning_rate": 1.083427737960062e-06, - "loss": 0.0664, - "step": 7910 - }, - { - "epoch": 2.57, - "learning_rate": 1.067634023675882e-06, - "loss": 0.0653, - "step": 7920 - }, - { - "epoch": 2.57, - "learning_rate": 1.0519497790292388e-06, - "loss": 0.0662, - "step": 7930 - }, - { - "epoch": 2.57, - "learning_rate": 1.036375196236684e-06, - "loss": 0.0632, - "step": 7940 - }, - { - "epoch": 2.58, - "learning_rate": 1.0209104661708225e-06, - "loss": 0.0627, - "step": 7950 - }, - { - "epoch": 2.58, - "learning_rate": 1.0055557783579627e-06, - "loss": 0.0646, - "step": 7960 - }, - { - "epoch": 2.58, - "learning_rate": 9.903113209758098e-07, - "loss": 0.0689, - "step": 7970 - }, - { - "epoch": 2.59, - "learning_rate": 9.751772808511474e-07, - "loss": 0.0667, - "step": 7980 - }, - { - "epoch": 2.59, - "learning_rate": 9.601538434575586e-07, - "loss": 0.0589, - "step": 7990 - }, - { - "epoch": 2.59, - "learning_rate": 9.452411929131411e-07, - "loss": 0.0668, - "step": 8000 - }, - { - "epoch": 2.6, - "learning_rate": 9.30439511978255e-07, - "loss": 0.0633, - "step": 8010 - }, - { - "epoch": 2.6, - "learning_rate": 9.157489820532905e-07, - "loss": 0.0669, - "step": 8020 - }, - { - "epoch": 2.6, - "learning_rate": 9.011697831764366e-07, - "loss": 0.0614, - "step": 8030 - }, - { - "epoch": 2.61, - "learning_rate": 8.867020940214743e-07, - "loss": 0.0641, - "step": 8040 - }, - { - "epoch": 2.61, - "learning_rate": 8.723460918955895e-07, - "loss": 0.0676, - "step": 8050 - }, - { - "epoch": 2.61, - "learning_rate": 8.581019527372037e-07, - "loss": 0.0687, - "step": 8060 - }, - { - "epoch": 2.62, - "learning_rate": 8.439698511138106e-07, - "loss": 0.0665, - "step": 8070 - }, - { - "epoch": 2.62, - "learning_rate": 8.299499602198413e-07, - "loss": 0.0664, - "step": 8080 - }, - { - "epoch": 2.62, - "learning_rate": 8.160424518745425e-07, - "loss": 0.0693, - "step": 8090 - }, - { - "epoch": 2.63, - "learning_rate": 8.022474965198635e-07, - "loss": 0.0628, - "step": 8100 - }, - { - "epoch": 2.63, - "learning_rate": 7.885652632183771e-07, - "loss": 0.0626, - "step": 8110 - }, - { - "epoch": 2.63, - "learning_rate": 7.749959196512014e-07, - "loss": 0.0674, - "step": 8120 - }, - { - "epoch": 2.64, - "learning_rate": 7.615396321159496e-07, - "loss": 0.0685, - "step": 8130 - }, - { - "epoch": 2.64, - "learning_rate": 7.481965655246859e-07, - "loss": 0.0663, - "step": 8140 - }, - { - "epoch": 2.64, - "learning_rate": 7.349668834019063e-07, - "loss": 0.0663, - "step": 8150 - }, - { - "epoch": 2.65, - "learning_rate": 7.218507478825387e-07, - "loss": 0.0636, - "step": 8160 - }, - { - "epoch": 2.65, - "learning_rate": 7.088483197099561e-07, - "loss": 0.0643, - "step": 8170 - }, - { - "epoch": 2.65, - "learning_rate": 6.95959758233995e-07, - "loss": 0.0625, - "step": 8180 - }, - { - "epoch": 2.66, - "learning_rate": 6.831852214090163e-07, - "loss": 0.064, - "step": 8190 - }, - { - "epoch": 2.66, - "learning_rate": 6.705248657919638e-07, - "loss": 0.064, - "step": 8200 - }, - { - "epoch": 2.66, - "learning_rate": 6.579788465404491e-07, - "loss": 0.064, - "step": 8210 - }, - { - "epoch": 2.67, - "learning_rate": 6.455473174108396e-07, - "loss": 0.0641, - "step": 8220 - }, - { - "epoch": 2.67, - "learning_rate": 6.332304307563853e-07, - "loss": 0.0647, - "step": 8230 - }, - { - "epoch": 2.67, - "learning_rate": 6.210283375253512e-07, - "loss": 0.0653, - "step": 8240 - }, - { - "epoch": 2.68, - "learning_rate": 6.089411872591566e-07, - "loss": 0.0643, - "step": 8250 - }, - { - "epoch": 2.68, - "learning_rate": 5.969691280905565e-07, - "loss": 0.0635, - "step": 8260 - }, - { - "epoch": 2.68, - "learning_rate": 5.851123067418185e-07, - "loss": 0.0685, - "step": 8270 - }, - { - "epoch": 2.68, - "learning_rate": 5.733708685229222e-07, - "loss": 0.0644, - "step": 8280 - }, - { - "epoch": 2.69, - "learning_rate": 5.617449573297828e-07, - "loss": 0.0665, - "step": 8290 - }, - { - "epoch": 2.69, - "learning_rate": 5.502347156424881e-07, - "loss": 0.064, - "step": 8300 - }, - { - "epoch": 2.69, - "learning_rate": 5.388402845235541e-07, - "loss": 0.0673, - "step": 8310 - }, - { - "epoch": 2.7, - "learning_rate": 5.275618036161856e-07, - "loss": 0.0619, - "step": 8320 - }, - { - "epoch": 2.7, - "learning_rate": 5.163994111425752e-07, - "loss": 0.0654, - "step": 8330 - }, - { - "epoch": 2.7, - "learning_rate": 5.05353243902208e-07, - "loss": 0.0627, - "step": 8340 - }, - { - "epoch": 2.71, - "learning_rate": 4.944234372701851e-07, - "loss": 0.067, - "step": 8350 - }, - { - "epoch": 2.71, - "learning_rate": 4.836101251955583e-07, - "loss": 0.0639, - "step": 8360 - }, - { - "epoch": 2.71, - "learning_rate": 4.7291344019969374e-07, - "loss": 0.0671, - "step": 8370 - }, - { - "epoch": 2.72, - "learning_rate": 4.6233351337464984e-07, - "loss": 0.066, - "step": 8380 - }, - { - "epoch": 2.72, - "learning_rate": 4.518704743815672e-07, - "loss": 0.0688, - "step": 8390 - }, - { - "epoch": 2.72, - "learning_rate": 4.415244514490791e-07, - "loss": 0.0644, - "step": 8400 - }, - { - "epoch": 2.73, - "learning_rate": 4.312955713717404e-07, - "loss": 0.0632, - "step": 8410 - }, - { - "epoch": 2.73, - "learning_rate": 4.2118395950847767e-07, - "loss": 0.0641, - "step": 8420 - }, - { - "epoch": 2.73, - "learning_rate": 4.1118973978104603e-07, - "loss": 0.0665, - "step": 8430 - }, - { - "epoch": 2.74, - "learning_rate": 4.0131303467251804e-07, - "loss": 0.0643, - "step": 8440 - }, - { - "epoch": 2.74, - "learning_rate": 3.9155396522577496e-07, - "loss": 0.0628, - "step": 8450 - }, - { - "epoch": 2.74, - "learning_rate": 3.8191265104203014e-07, - "loss": 0.0638, - "step": 8460 - }, - { - "epoch": 2.75, - "learning_rate": 3.723892102793558e-07, - "loss": 0.0648, - "step": 8470 - }, - { - "epoch": 2.75, - "learning_rate": 3.629837596512453e-07, - "loss": 0.0665, - "step": 8480 - }, - { - "epoch": 2.75, - "learning_rate": 3.53696414425172e-07, - "loss": 0.0661, - "step": 8490 - }, - { - "epoch": 2.76, - "learning_rate": 3.445272884211837e-07, - "loss": 0.0691, - "step": 8500 - }, - { - "epoch": 2.76, - "learning_rate": 3.3547649401050265e-07, - "loss": 0.0628, - "step": 8510 - }, - { - "epoch": 2.76, - "learning_rate": 3.2654414211415463e-07, - "loss": 0.0624, - "step": 8520 - }, - { - "epoch": 2.77, - "learning_rate": 3.177303422016065e-07, - "loss": 0.0662, - "step": 8530 - }, - { - "epoch": 2.77, - "learning_rate": 3.0903520228941944e-07, - "loss": 0.0636, - "step": 8540 - }, - { - "epoch": 2.77, - "learning_rate": 3.004588289399324e-07, - "loss": 0.0657, - "step": 8550 - }, - { - "epoch": 2.78, - "learning_rate": 2.9200132725995644e-07, - "loss": 0.0639, - "step": 8560 - }, - { - "epoch": 2.78, - "learning_rate": 2.8366280089948126e-07, - "loss": 0.0648, - "step": 8570 - }, - { - "epoch": 2.78, - "learning_rate": 2.7544335205040626e-07, - "loss": 0.0648, - "step": 8580 - }, - { - "epoch": 2.79, - "learning_rate": 2.6734308144529154e-07, - "loss": 0.0644, - "step": 8590 - }, - { - "epoch": 2.79, - "learning_rate": 2.59362088356121e-07, - "loss": 0.0631, - "step": 8600 - }, - { - "epoch": 2.79, - "learning_rate": 2.515004705930835e-07, - "loss": 0.0628, - "step": 8610 - }, - { - "epoch": 2.8, - "learning_rate": 2.437583245033814e-07, - "loss": 0.0631, - "step": 8620 - }, - { - "epoch": 2.8, - "learning_rate": 2.3613574497003967e-07, - "loss": 0.0624, - "step": 8630 - }, - { - "epoch": 2.8, - "learning_rate": 2.2863282541075394e-07, - "loss": 0.0617, - "step": 8640 - }, - { - "epoch": 2.8, - "learning_rate": 2.2124965777673313e-07, - "loss": 0.067, - "step": 8650 - }, - { - "epoch": 2.81, - "learning_rate": 2.1398633255158675e-07, - "loss": 0.0618, - "step": 8660 - }, - { - "epoch": 2.81, - "learning_rate": 2.0684293875020245e-07, - "loss": 0.0644, - "step": 8670 - }, - { - "epoch": 2.81, - "learning_rate": 1.99819563917667e-07, - "loss": 0.0698, - "step": 8680 - }, - { - "epoch": 2.82, - "learning_rate": 1.9291629412818368e-07, - "loss": 0.0672, - "step": 8690 - }, - { - "epoch": 2.82, - "learning_rate": 1.8613321398402107e-07, - "loss": 0.0694, - "step": 8700 - }, - { - "epoch": 2.82, - "learning_rate": 1.7947040661448256e-07, - "loss": 0.0661, - "step": 8710 - }, - { - "epoch": 2.83, - "learning_rate": 1.7292795367487513e-07, - "loss": 0.0628, - "step": 8720 - }, - { - "epoch": 2.83, - "learning_rate": 1.6650593534551673e-07, - "loss": 0.0646, - "step": 8730 - }, - { - "epoch": 2.83, - "learning_rate": 1.6020443033075485e-07, - "loss": 0.0627, - "step": 8740 - }, - { - "epoch": 2.84, - "learning_rate": 1.5402351585799725e-07, - "loss": 0.0647, - "step": 8750 - }, - { - "epoch": 2.84, - "learning_rate": 1.4796326767676617e-07, - "loss": 0.0675, - "step": 8760 - }, - { - "epoch": 2.84, - "learning_rate": 1.420237600577734e-07, - "loss": 0.0627, - "step": 8770 - }, - { - "epoch": 2.85, - "learning_rate": 1.3620506579200777e-07, - "loss": 0.0614, - "step": 8780 - }, - { - "epoch": 2.85, - "learning_rate": 1.3050725618984017e-07, - "loss": 0.0644, - "step": 8790 - }, - { - "epoch": 2.85, - "learning_rate": 1.2493040108015774e-07, - "loss": 0.0634, - "step": 8800 - }, - { - "epoch": 2.86, - "learning_rate": 1.1947456880949893e-07, - "loss": 0.0649, - "step": 8810 - }, - { - "epoch": 2.86, - "learning_rate": 1.1413982624122189e-07, - "loss": 0.0632, - "step": 8820 - }, - { - "epoch": 2.86, - "learning_rate": 1.08926238754683e-07, - "loss": 0.0639, - "step": 8830 - }, - { - "epoch": 2.87, - "learning_rate": 1.0383387024443414e-07, - "loss": 0.0619, - "step": 8840 - }, - { - "epoch": 2.87, - "learning_rate": 9.88627831194433e-08, - "loss": 0.0627, - "step": 8850 - }, - { - "epoch": 2.87, - "learning_rate": 9.401303830232855e-08, - "loss": 0.0657, - "step": 8860 - }, - { - "epoch": 2.88, - "learning_rate": 8.928469522860527e-08, - "loss": 0.0646, - "step": 8870 - }, - { - "epoch": 2.88, - "learning_rate": 8.467781184596901e-08, - "loss": 0.0635, - "step": 8880 - }, - { - "epoch": 2.88, - "learning_rate": 8.0192444613576e-08, - "loss": 0.0671, - "step": 8890 - }, - { - "epoch": 2.89, - "learning_rate": 7.582864850135707e-08, - "loss": 0.0652, - "step": 8900 - }, - { - "epoch": 2.89, - "learning_rate": 7.158647698933707e-08, - "loss": 0.0676, - "step": 8910 - }, - { - "epoch": 2.89, - "learning_rate": 6.746598206698762e-08, - "loss": 0.0631, - "step": 8920 - }, - { - "epoch": 2.9, - "learning_rate": 6.34672142325865e-08, - "loss": 0.065, - "step": 8930 - }, - { - "epoch": 2.9, - "learning_rate": 5.959022249259594e-08, - "loss": 0.0669, - "step": 8940 - }, - { - "epoch": 2.9, - "learning_rate": 5.583505436106529e-08, - "loss": 0.067, - "step": 8950 - }, - { - "epoch": 2.91, - "learning_rate": 5.220175585904819e-08, - "loss": 0.061, - "step": 8960 - }, - { - "epoch": 2.91, - "learning_rate": 4.8690371514039656e-08, - "loss": 0.0661, - "step": 8970 - }, - { - "epoch": 2.91, - "learning_rate": 4.5300944359425446e-08, - "loss": 0.0626, - "step": 8980 - }, - { - "epoch": 2.92, - "learning_rate": 4.203351593396354e-08, - "loss": 0.0633, - "step": 8990 - }, - { - "epoch": 2.92, - "learning_rate": 3.8888126281264593e-08, - "loss": 0.0652, - "step": 9000 - }, - { - "epoch": 2.92, - "learning_rate": 3.586481394930896e-08, - "loss": 0.065, - "step": 9010 - }, - { - "epoch": 2.92, - "learning_rate": 3.2963615989971553e-08, - "loss": 0.0652, - "step": 9020 - }, - { - "epoch": 2.93, - "learning_rate": 3.0184567958567724e-08, - "loss": 0.0655, - "step": 9030 - }, - { - "epoch": 2.93, - "learning_rate": 2.752770391341919e-08, - "loss": 0.066, - "step": 9040 - }, - { - "epoch": 2.93, - "learning_rate": 2.499305641543104e-08, - "loss": 0.0654, - "step": 9050 - }, - { - "epoch": 2.94, - "learning_rate": 2.2580656527700916e-08, - "loss": 0.0673, - "step": 9060 - }, - { - "epoch": 2.94, - "learning_rate": 2.0290533815132683e-08, - "loss": 0.0658, - "step": 9070 - }, - { - "epoch": 2.94, - "learning_rate": 1.8122716344074476e-08, - "loss": 0.0628, - "step": 9080 - }, - { - "epoch": 2.95, - "learning_rate": 1.6077230681978972e-08, - "loss": 0.0635, - "step": 9090 - }, - { - "epoch": 2.95, - "learning_rate": 1.4154101897070338e-08, - "loss": 0.0635, - "step": 9100 - }, - { - "epoch": 2.95, - "learning_rate": 1.2353353558045566e-08, - "loss": 0.0679, - "step": 9110 - }, - { - "epoch": 2.96, - "learning_rate": 1.0675007733780273e-08, - "loss": 0.0656, - "step": 9120 - }, - { - "epoch": 2.96, - "learning_rate": 9.119084993055583e-09, - "loss": 0.0652, - "step": 9130 - }, - { - "epoch": 2.96, - "learning_rate": 7.685604404316094e-09, - "loss": 0.0594, - "step": 9140 - }, - { - "epoch": 2.97, - "learning_rate": 6.374583535426748e-09, - "loss": 0.0645, - "step": 9150 - }, - { - "epoch": 2.97, - "learning_rate": 5.186038453458553e-09, - "loss": 0.0657, - "step": 9160 - }, - { - "epoch": 2.97, - "learning_rate": 4.119983724497623e-09, - "loss": 0.0668, - "step": 9170 - }, - { - "epoch": 2.98, - "learning_rate": 3.1764324134631043e-09, - "loss": 0.0617, - "step": 9180 - }, - { - "epoch": 2.98, - "learning_rate": 2.355396083941752e-09, - "loss": 0.0632, - "step": 9190 - }, - { - "epoch": 2.98, - "learning_rate": 1.656884798058034e-09, - "loss": 0.0633, - "step": 9200 - }, - { - "epoch": 2.99, - "learning_rate": 1.0809071163386808e-09, - "loss": 0.0661, - "step": 9210 - }, - { - "epoch": 2.99, - "learning_rate": 6.274700976161008e-10, - "loss": 0.0656, - "step": 9220 - }, - { - "epoch": 2.99, - "learning_rate": 2.9657929893955886e-10, - "loss": 0.0652, - "step": 9230 - }, - { - "epoch": 3.0, - "learning_rate": 8.823877550301341e-11, - "loss": 0.0603, - "step": 9240 - }, - { - "epoch": 3.0, - "learning_rate": 2.4510806018174237e-12, - "loss": 0.06, - "step": 9250 - }, - { - "epoch": 3.0, - "eval_loss": 0.1728515625, - "eval_runtime": 6.4727, - "eval_samples_per_second": 19.775, - "eval_steps_per_second": 0.154, - "step": 9252 - } - ], - "max_steps": 9252, - "num_train_epochs": 3, - "total_flos": 9.615097431625354e+19, - "trial_name": null, - "trial_params": null -}