Upload folder using huggingface_hub
Browse files- adapter_model.safetensors +1 -1
- optimizer.pt +2 -2
- rng_state.pth +1 -1
- scheduler.pt +1 -1
- trainer_state.json +5 -1721
adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 13648432
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:aa98c40609798e6f4cfcbbafb151e75cdef708c54c4a3564ef5296d75d28b5f1
|
3 |
size 13648432
|
optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:59092ceb72403b0d9977c2a0ba1db9172168abed4967107f331983008560aa41
|
3 |
+
size 27338682
|
rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3fe596a702809ef49c023daf531ae82bd3c0eb2758ce76d6aab47acc5b60f1de
|
3 |
size 14244
|
scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:88f7006546ae39d71f2a19fc4c83914c37ff6ff09beb58f4d4b6e6094751adc3
|
3 |
size 1064
|
trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
-
"best_metric": 2.
|
3 |
-
"best_model_checkpoint": "/home/sunggeunan/data/ICL/outputs/lora/SKIML-ICL_mrqa_nq_v3/Meta-Llama-3-8B-Instruct-unanswerable-0Q-0U-0C-qa_first/checkpoint-
|
4 |
-
"epoch":
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -1723,1722 +1723,6 @@
|
|
1723 |
"eval_samples_per_second": 1.06,
|
1724 |
"eval_steps_per_second": 0.267,
|
1725 |
"step": 244
|
1726 |
-
},
|
1727 |
-
{
|
1728 |
-
"epoch": 1.003584229390681,
|
1729 |
-
"grad_norm": 0.3930998742580414,
|
1730 |
-
"learning_rate": 3.503597122302158e-07,
|
1731 |
-
"loss": 2.1367,
|
1732 |
-
"step": 245
|
1733 |
-
},
|
1734 |
-
{
|
1735 |
-
"epoch": 1.0076804915514592,
|
1736 |
-
"grad_norm": 0.4150741696357727,
|
1737 |
-
"learning_rate": 3.4964028776978416e-07,
|
1738 |
-
"loss": 2.1955,
|
1739 |
-
"step": 246
|
1740 |
-
},
|
1741 |
-
{
|
1742 |
-
"epoch": 1.0117767537122375,
|
1743 |
-
"grad_norm": 0.4133647680282593,
|
1744 |
-
"learning_rate": 3.489208633093525e-07,
|
1745 |
-
"loss": 2.2012,
|
1746 |
-
"step": 247
|
1747 |
-
},
|
1748 |
-
{
|
1749 |
-
"epoch": 1.0158730158730158,
|
1750 |
-
"grad_norm": 0.4073452055454254,
|
1751 |
-
"learning_rate": 3.4820143884892086e-07,
|
1752 |
-
"loss": 2.2049,
|
1753 |
-
"step": 248
|
1754 |
-
},
|
1755 |
-
{
|
1756 |
-
"epoch": 1.0199692780337941,
|
1757 |
-
"grad_norm": 0.38356712460517883,
|
1758 |
-
"learning_rate": 3.4748201438848916e-07,
|
1759 |
-
"loss": 2.041,
|
1760 |
-
"step": 249
|
1761 |
-
},
|
1762 |
-
{
|
1763 |
-
"epoch": 1.0240655401945724,
|
1764 |
-
"grad_norm": 0.41012680530548096,
|
1765 |
-
"learning_rate": 3.467625899280575e-07,
|
1766 |
-
"loss": 2.1309,
|
1767 |
-
"step": 250
|
1768 |
-
},
|
1769 |
-
{
|
1770 |
-
"epoch": 1.0281618023553507,
|
1771 |
-
"grad_norm": 0.403844952583313,
|
1772 |
-
"learning_rate": 3.460431654676259e-07,
|
1773 |
-
"loss": 2.1805,
|
1774 |
-
"step": 251
|
1775 |
-
},
|
1776 |
-
{
|
1777 |
-
"epoch": 1.032258064516129,
|
1778 |
-
"grad_norm": 0.4057970643043518,
|
1779 |
-
"learning_rate": 3.4532374100719426e-07,
|
1780 |
-
"loss": 2.2623,
|
1781 |
-
"step": 252
|
1782 |
-
},
|
1783 |
-
{
|
1784 |
-
"epoch": 1.0363543266769073,
|
1785 |
-
"grad_norm": 0.40142911672592163,
|
1786 |
-
"learning_rate": 3.4460431654676256e-07,
|
1787 |
-
"loss": 2.2811,
|
1788 |
-
"step": 253
|
1789 |
-
},
|
1790 |
-
{
|
1791 |
-
"epoch": 1.0404505888376856,
|
1792 |
-
"grad_norm": 0.41189393401145935,
|
1793 |
-
"learning_rate": 3.438848920863309e-07,
|
1794 |
-
"loss": 2.1732,
|
1795 |
-
"step": 254
|
1796 |
-
},
|
1797 |
-
{
|
1798 |
-
"epoch": 1.044546850998464,
|
1799 |
-
"grad_norm": 0.41403740644454956,
|
1800 |
-
"learning_rate": 3.4316546762589926e-07,
|
1801 |
-
"loss": 2.228,
|
1802 |
-
"step": 255
|
1803 |
-
},
|
1804 |
-
{
|
1805 |
-
"epoch": 1.0486431131592422,
|
1806 |
-
"grad_norm": 0.39605844020843506,
|
1807 |
-
"learning_rate": 3.424460431654676e-07,
|
1808 |
-
"loss": 2.1744,
|
1809 |
-
"step": 256
|
1810 |
-
},
|
1811 |
-
{
|
1812 |
-
"epoch": 1.0527393753200205,
|
1813 |
-
"grad_norm": 0.4222925007343292,
|
1814 |
-
"learning_rate": 3.4172661870503596e-07,
|
1815 |
-
"loss": 2.1764,
|
1816 |
-
"step": 257
|
1817 |
-
},
|
1818 |
-
{
|
1819 |
-
"epoch": 1.0568356374807988,
|
1820 |
-
"grad_norm": 0.41609424352645874,
|
1821 |
-
"learning_rate": 3.4100719424460426e-07,
|
1822 |
-
"loss": 2.1652,
|
1823 |
-
"step": 258
|
1824 |
-
},
|
1825 |
-
{
|
1826 |
-
"epoch": 1.060931899641577,
|
1827 |
-
"grad_norm": 0.40381646156311035,
|
1828 |
-
"learning_rate": 3.4028776978417267e-07,
|
1829 |
-
"loss": 2.1973,
|
1830 |
-
"step": 259
|
1831 |
-
},
|
1832 |
-
{
|
1833 |
-
"epoch": 1.0650281618023554,
|
1834 |
-
"grad_norm": 0.4225187599658966,
|
1835 |
-
"learning_rate": 3.39568345323741e-07,
|
1836 |
-
"loss": 2.2157,
|
1837 |
-
"step": 260
|
1838 |
-
},
|
1839 |
-
{
|
1840 |
-
"epoch": 1.0691244239631337,
|
1841 |
-
"grad_norm": 0.41316112875938416,
|
1842 |
-
"learning_rate": 3.3884892086330937e-07,
|
1843 |
-
"loss": 2.2225,
|
1844 |
-
"step": 261
|
1845 |
-
},
|
1846 |
-
{
|
1847 |
-
"epoch": 1.073220686123912,
|
1848 |
-
"grad_norm": 0.40172699093818665,
|
1849 |
-
"learning_rate": 3.3812949640287766e-07,
|
1850 |
-
"loss": 2.1289,
|
1851 |
-
"step": 262
|
1852 |
-
},
|
1853 |
-
{
|
1854 |
-
"epoch": 1.0773169482846903,
|
1855 |
-
"grad_norm": 0.4120056927204132,
|
1856 |
-
"learning_rate": 3.37410071942446e-07,
|
1857 |
-
"loss": 2.1729,
|
1858 |
-
"step": 263
|
1859 |
-
},
|
1860 |
-
{
|
1861 |
-
"epoch": 1.0814132104454686,
|
1862 |
-
"grad_norm": 0.40594205260276794,
|
1863 |
-
"learning_rate": 3.3669064748201437e-07,
|
1864 |
-
"loss": 2.2475,
|
1865 |
-
"step": 264
|
1866 |
-
},
|
1867 |
-
{
|
1868 |
-
"epoch": 1.0855094726062469,
|
1869 |
-
"grad_norm": 0.41929081082344055,
|
1870 |
-
"learning_rate": 3.359712230215827e-07,
|
1871 |
-
"loss": 2.1487,
|
1872 |
-
"step": 265
|
1873 |
-
},
|
1874 |
-
{
|
1875 |
-
"epoch": 1.0896057347670252,
|
1876 |
-
"grad_norm": 0.4305683672428131,
|
1877 |
-
"learning_rate": 3.3525179856115107e-07,
|
1878 |
-
"loss": 2.1619,
|
1879 |
-
"step": 266
|
1880 |
-
},
|
1881 |
-
{
|
1882 |
-
"epoch": 1.0937019969278035,
|
1883 |
-
"grad_norm": 0.4111643433570862,
|
1884 |
-
"learning_rate": 3.3453237410071937e-07,
|
1885 |
-
"loss": 2.2187,
|
1886 |
-
"step": 267
|
1887 |
-
},
|
1888 |
-
{
|
1889 |
-
"epoch": 1.0977982590885818,
|
1890 |
-
"grad_norm": 0.416690468788147,
|
1891 |
-
"learning_rate": 3.3381294964028777e-07,
|
1892 |
-
"loss": 2.1601,
|
1893 |
-
"step": 268
|
1894 |
-
},
|
1895 |
-
{
|
1896 |
-
"epoch": 1.10189452124936,
|
1897 |
-
"grad_norm": 0.3775942623615265,
|
1898 |
-
"learning_rate": 3.330935251798561e-07,
|
1899 |
-
"loss": 2.2014,
|
1900 |
-
"step": 269
|
1901 |
-
},
|
1902 |
-
{
|
1903 |
-
"epoch": 1.1059907834101383,
|
1904 |
-
"grad_norm": 0.41255277395248413,
|
1905 |
-
"learning_rate": 3.3237410071942447e-07,
|
1906 |
-
"loss": 2.1528,
|
1907 |
-
"step": 270
|
1908 |
-
},
|
1909 |
-
{
|
1910 |
-
"epoch": 1.1100870455709166,
|
1911 |
-
"grad_norm": 0.40753600001335144,
|
1912 |
-
"learning_rate": 3.3165467625899277e-07,
|
1913 |
-
"loss": 2.1237,
|
1914 |
-
"step": 271
|
1915 |
-
},
|
1916 |
-
{
|
1917 |
-
"epoch": 1.114183307731695,
|
1918 |
-
"grad_norm": 0.3956006169319153,
|
1919 |
-
"learning_rate": 3.309352517985611e-07,
|
1920 |
-
"loss": 2.1724,
|
1921 |
-
"step": 272
|
1922 |
-
},
|
1923 |
-
{
|
1924 |
-
"epoch": 1.118279569892473,
|
1925 |
-
"grad_norm": 0.40840500593185425,
|
1926 |
-
"learning_rate": 3.3021582733812947e-07,
|
1927 |
-
"loss": 2.1518,
|
1928 |
-
"step": 273
|
1929 |
-
},
|
1930 |
-
{
|
1931 |
-
"epoch": 1.1223758320532513,
|
1932 |
-
"grad_norm": 0.4007817506790161,
|
1933 |
-
"learning_rate": 3.294964028776978e-07,
|
1934 |
-
"loss": 2.1971,
|
1935 |
-
"step": 274
|
1936 |
-
},
|
1937 |
-
{
|
1938 |
-
"epoch": 1.1264720942140296,
|
1939 |
-
"grad_norm": 0.3911168575286865,
|
1940 |
-
"learning_rate": 3.2877697841726617e-07,
|
1941 |
-
"loss": 2.1539,
|
1942 |
-
"step": 275
|
1943 |
-
},
|
1944 |
-
{
|
1945 |
-
"epoch": 1.130568356374808,
|
1946 |
-
"grad_norm": 0.3956235349178314,
|
1947 |
-
"learning_rate": 3.280575539568345e-07,
|
1948 |
-
"loss": 2.1144,
|
1949 |
-
"step": 276
|
1950 |
-
},
|
1951 |
-
{
|
1952 |
-
"epoch": 1.1346646185355862,
|
1953 |
-
"grad_norm": 0.41161370277404785,
|
1954 |
-
"learning_rate": 3.2733812949640287e-07,
|
1955 |
-
"loss": 2.2036,
|
1956 |
-
"step": 277
|
1957 |
-
},
|
1958 |
-
{
|
1959 |
-
"epoch": 1.1387608806963645,
|
1960 |
-
"grad_norm": 0.41950327157974243,
|
1961 |
-
"learning_rate": 3.266187050359712e-07,
|
1962 |
-
"loss": 2.1087,
|
1963 |
-
"step": 278
|
1964 |
-
},
|
1965 |
-
{
|
1966 |
-
"epoch": 1.1428571428571428,
|
1967 |
-
"grad_norm": 0.40638211369514465,
|
1968 |
-
"learning_rate": 3.2589928057553957e-07,
|
1969 |
-
"loss": 2.1663,
|
1970 |
-
"step": 279
|
1971 |
-
},
|
1972 |
-
{
|
1973 |
-
"epoch": 1.146953405017921,
|
1974 |
-
"grad_norm": 0.41957443952560425,
|
1975 |
-
"learning_rate": 3.251798561151079e-07,
|
1976 |
-
"loss": 2.1168,
|
1977 |
-
"step": 280
|
1978 |
-
},
|
1979 |
-
{
|
1980 |
-
"epoch": 1.1510496671786994,
|
1981 |
-
"grad_norm": 0.42328333854675293,
|
1982 |
-
"learning_rate": 3.244604316546762e-07,
|
1983 |
-
"loss": 2.2045,
|
1984 |
-
"step": 281
|
1985 |
-
},
|
1986 |
-
{
|
1987 |
-
"epoch": 1.1551459293394777,
|
1988 |
-
"grad_norm": 0.41651472449302673,
|
1989 |
-
"learning_rate": 3.2374100719424457e-07,
|
1990 |
-
"loss": 2.2012,
|
1991 |
-
"step": 282
|
1992 |
-
},
|
1993 |
-
{
|
1994 |
-
"epoch": 1.159242191500256,
|
1995 |
-
"grad_norm": 0.42932000756263733,
|
1996 |
-
"learning_rate": 3.230215827338129e-07,
|
1997 |
-
"loss": 2.1443,
|
1998 |
-
"step": 283
|
1999 |
-
},
|
2000 |
-
{
|
2001 |
-
"epoch": 1.1633384536610343,
|
2002 |
-
"grad_norm": 0.428455114364624,
|
2003 |
-
"learning_rate": 3.223021582733813e-07,
|
2004 |
-
"loss": 2.1448,
|
2005 |
-
"step": 284
|
2006 |
-
},
|
2007 |
-
{
|
2008 |
-
"epoch": 1.1674347158218126,
|
2009 |
-
"grad_norm": 0.4306034445762634,
|
2010 |
-
"learning_rate": 3.215827338129496e-07,
|
2011 |
-
"loss": 2.1642,
|
2012 |
-
"step": 285
|
2013 |
-
},
|
2014 |
-
{
|
2015 |
-
"epoch": 1.1715309779825909,
|
2016 |
-
"grad_norm": 0.40823614597320557,
|
2017 |
-
"learning_rate": 3.20863309352518e-07,
|
2018 |
-
"loss": 2.2248,
|
2019 |
-
"step": 286
|
2020 |
-
},
|
2021 |
-
{
|
2022 |
-
"epoch": 1.1756272401433692,
|
2023 |
-
"grad_norm": 0.40993741154670715,
|
2024 |
-
"learning_rate": 3.201438848920863e-07,
|
2025 |
-
"loss": 2.1691,
|
2026 |
-
"step": 287
|
2027 |
-
},
|
2028 |
-
{
|
2029 |
-
"epoch": 1.1797235023041475,
|
2030 |
-
"grad_norm": 0.3925482928752899,
|
2031 |
-
"learning_rate": 3.194244604316547e-07,
|
2032 |
-
"loss": 2.1062,
|
2033 |
-
"step": 288
|
2034 |
-
},
|
2035 |
-
{
|
2036 |
-
"epoch": 1.1838197644649258,
|
2037 |
-
"grad_norm": 0.3945087790489197,
|
2038 |
-
"learning_rate": 3.1870503597122303e-07,
|
2039 |
-
"loss": 2.1627,
|
2040 |
-
"step": 289
|
2041 |
-
},
|
2042 |
-
{
|
2043 |
-
"epoch": 1.187916026625704,
|
2044 |
-
"grad_norm": 0.3984071612358093,
|
2045 |
-
"learning_rate": 3.179856115107913e-07,
|
2046 |
-
"loss": 2.2023,
|
2047 |
-
"step": 290
|
2048 |
-
},
|
2049 |
-
{
|
2050 |
-
"epoch": 1.1920122887864824,
|
2051 |
-
"grad_norm": 0.40720757842063904,
|
2052 |
-
"learning_rate": 3.172661870503597e-07,
|
2053 |
-
"loss": 2.1707,
|
2054 |
-
"step": 291
|
2055 |
-
},
|
2056 |
-
{
|
2057 |
-
"epoch": 1.1961085509472607,
|
2058 |
-
"grad_norm": 0.41563913226127625,
|
2059 |
-
"learning_rate": 3.1654676258992803e-07,
|
2060 |
-
"loss": 2.2008,
|
2061 |
-
"step": 292
|
2062 |
-
},
|
2063 |
-
{
|
2064 |
-
"epoch": 1.200204813108039,
|
2065 |
-
"grad_norm": 0.40958184003829956,
|
2066 |
-
"learning_rate": 3.1582733812949643e-07,
|
2067 |
-
"loss": 2.1686,
|
2068 |
-
"step": 293
|
2069 |
-
},
|
2070 |
-
{
|
2071 |
-
"epoch": 1.2043010752688172,
|
2072 |
-
"grad_norm": 0.39920181035995483,
|
2073 |
-
"learning_rate": 3.1510791366906473e-07,
|
2074 |
-
"loss": 2.1179,
|
2075 |
-
"step": 294
|
2076 |
-
},
|
2077 |
-
{
|
2078 |
-
"epoch": 1.2083973374295955,
|
2079 |
-
"grad_norm": 0.40914613008499146,
|
2080 |
-
"learning_rate": 3.143884892086331e-07,
|
2081 |
-
"loss": 2.1923,
|
2082 |
-
"step": 295
|
2083 |
-
},
|
2084 |
-
{
|
2085 |
-
"epoch": 1.2124935995903738,
|
2086 |
-
"grad_norm": 0.41489359736442566,
|
2087 |
-
"learning_rate": 3.1366906474820143e-07,
|
2088 |
-
"loss": 2.2239,
|
2089 |
-
"step": 296
|
2090 |
-
},
|
2091 |
-
{
|
2092 |
-
"epoch": 1.2165898617511521,
|
2093 |
-
"grad_norm": 0.4089359939098358,
|
2094 |
-
"learning_rate": 3.129496402877698e-07,
|
2095 |
-
"loss": 2.1533,
|
2096 |
-
"step": 297
|
2097 |
-
},
|
2098 |
-
{
|
2099 |
-
"epoch": 1.2206861239119304,
|
2100 |
-
"grad_norm": 0.41259390115737915,
|
2101 |
-
"learning_rate": 3.1223021582733813e-07,
|
2102 |
-
"loss": 2.1626,
|
2103 |
-
"step": 298
|
2104 |
-
},
|
2105 |
-
{
|
2106 |
-
"epoch": 1.2247823860727087,
|
2107 |
-
"grad_norm": 0.41184961795806885,
|
2108 |
-
"learning_rate": 3.1151079136690643e-07,
|
2109 |
-
"loss": 2.1428,
|
2110 |
-
"step": 299
|
2111 |
-
},
|
2112 |
-
{
|
2113 |
-
"epoch": 1.228878648233487,
|
2114 |
-
"grad_norm": 0.41699355840682983,
|
2115 |
-
"learning_rate": 3.107913669064748e-07,
|
2116 |
-
"loss": 2.1351,
|
2117 |
-
"step": 300
|
2118 |
-
},
|
2119 |
-
{
|
2120 |
-
"epoch": 1.2329749103942653,
|
2121 |
-
"grad_norm": 0.4199273884296417,
|
2122 |
-
"learning_rate": 3.100719424460432e-07,
|
2123 |
-
"loss": 2.236,
|
2124 |
-
"step": 301
|
2125 |
-
},
|
2126 |
-
{
|
2127 |
-
"epoch": 1.2370711725550436,
|
2128 |
-
"grad_norm": 0.40552636981010437,
|
2129 |
-
"learning_rate": 3.0935251798561153e-07,
|
2130 |
-
"loss": 2.1816,
|
2131 |
-
"step": 302
|
2132 |
-
},
|
2133 |
-
{
|
2134 |
-
"epoch": 1.241167434715822,
|
2135 |
-
"grad_norm": 0.409997820854187,
|
2136 |
-
"learning_rate": 3.0863309352517983e-07,
|
2137 |
-
"loss": 2.171,
|
2138 |
-
"step": 303
|
2139 |
-
},
|
2140 |
-
{
|
2141 |
-
"epoch": 1.2452636968766,
|
2142 |
-
"grad_norm": 0.41188836097717285,
|
2143 |
-
"learning_rate": 3.079136690647482e-07,
|
2144 |
-
"loss": 2.2222,
|
2145 |
-
"step": 304
|
2146 |
-
},
|
2147 |
-
{
|
2148 |
-
"epoch": 1.2493599590373785,
|
2149 |
-
"grad_norm": 0.41902589797973633,
|
2150 |
-
"learning_rate": 3.0719424460431653e-07,
|
2151 |
-
"loss": 2.1499,
|
2152 |
-
"step": 305
|
2153 |
-
},
|
2154 |
-
{
|
2155 |
-
"epoch": 1.2534562211981566,
|
2156 |
-
"grad_norm": 0.4427715241909027,
|
2157 |
-
"learning_rate": 3.064748201438849e-07,
|
2158 |
-
"loss": 2.1765,
|
2159 |
-
"step": 306
|
2160 |
-
},
|
2161 |
-
{
|
2162 |
-
"epoch": 1.257552483358935,
|
2163 |
-
"grad_norm": 0.4119572341442108,
|
2164 |
-
"learning_rate": 3.0575539568345323e-07,
|
2165 |
-
"loss": 2.1972,
|
2166 |
-
"step": 307
|
2167 |
-
},
|
2168 |
-
{
|
2169 |
-
"epoch": 1.2616487455197132,
|
2170 |
-
"grad_norm": 0.42796561121940613,
|
2171 |
-
"learning_rate": 3.0503597122302153e-07,
|
2172 |
-
"loss": 2.1539,
|
2173 |
-
"step": 308
|
2174 |
-
},
|
2175 |
-
{
|
2176 |
-
"epoch": 1.2657450076804915,
|
2177 |
-
"grad_norm": 0.4017777740955353,
|
2178 |
-
"learning_rate": 3.043165467625899e-07,
|
2179 |
-
"loss": 2.117,
|
2180 |
-
"step": 309
|
2181 |
-
},
|
2182 |
-
{
|
2183 |
-
"epoch": 1.2698412698412698,
|
2184 |
-
"grad_norm": 0.43004101514816284,
|
2185 |
-
"learning_rate": 3.035971223021583e-07,
|
2186 |
-
"loss": 2.1607,
|
2187 |
-
"step": 310
|
2188 |
-
},
|
2189 |
-
{
|
2190 |
-
"epoch": 1.273937532002048,
|
2191 |
-
"grad_norm": 0.41391560435295105,
|
2192 |
-
"learning_rate": 3.0287769784172664e-07,
|
2193 |
-
"loss": 2.1634,
|
2194 |
-
"step": 311
|
2195 |
-
},
|
2196 |
-
{
|
2197 |
-
"epoch": 1.2780337941628264,
|
2198 |
-
"grad_norm": 0.40540462732315063,
|
2199 |
-
"learning_rate": 3.0215827338129493e-07,
|
2200 |
-
"loss": 2.1965,
|
2201 |
-
"step": 312
|
2202 |
-
},
|
2203 |
-
{
|
2204 |
-
"epoch": 1.2821300563236047,
|
2205 |
-
"grad_norm": 0.4336321949958801,
|
2206 |
-
"learning_rate": 3.014388489208633e-07,
|
2207 |
-
"loss": 2.1836,
|
2208 |
-
"step": 313
|
2209 |
-
},
|
2210 |
-
{
|
2211 |
-
"epoch": 1.286226318484383,
|
2212 |
-
"grad_norm": 0.42947065830230713,
|
2213 |
-
"learning_rate": 3.0071942446043164e-07,
|
2214 |
-
"loss": 2.1795,
|
2215 |
-
"step": 314
|
2216 |
-
},
|
2217 |
-
{
|
2218 |
-
"epoch": 1.2903225806451613,
|
2219 |
-
"grad_norm": 0.4341902434825897,
|
2220 |
-
"learning_rate": 3e-07,
|
2221 |
-
"loss": 2.2515,
|
2222 |
-
"step": 315
|
2223 |
-
},
|
2224 |
-
{
|
2225 |
-
"epoch": 1.2944188428059396,
|
2226 |
-
"grad_norm": 0.42092180252075195,
|
2227 |
-
"learning_rate": 2.9928057553956834e-07,
|
2228 |
-
"loss": 2.1852,
|
2229 |
-
"step": 316
|
2230 |
-
},
|
2231 |
-
{
|
2232 |
-
"epoch": 1.2985151049667178,
|
2233 |
-
"grad_norm": 0.42151835560798645,
|
2234 |
-
"learning_rate": 2.9856115107913663e-07,
|
2235 |
-
"loss": 2.1826,
|
2236 |
-
"step": 317
|
2237 |
-
},
|
2238 |
-
{
|
2239 |
-
"epoch": 1.3026113671274961,
|
2240 |
-
"grad_norm": 0.42949485778808594,
|
2241 |
-
"learning_rate": 2.9784172661870504e-07,
|
2242 |
-
"loss": 2.2645,
|
2243 |
-
"step": 318
|
2244 |
-
},
|
2245 |
-
{
|
2246 |
-
"epoch": 1.3067076292882744,
|
2247 |
-
"grad_norm": 0.4163380265235901,
|
2248 |
-
"learning_rate": 2.971223021582734e-07,
|
2249 |
-
"loss": 2.1581,
|
2250 |
-
"step": 319
|
2251 |
-
},
|
2252 |
-
{
|
2253 |
-
"epoch": 1.3108038914490527,
|
2254 |
-
"grad_norm": 0.42143404483795166,
|
2255 |
-
"learning_rate": 2.9640287769784174e-07,
|
2256 |
-
"loss": 2.1059,
|
2257 |
-
"step": 320
|
2258 |
-
},
|
2259 |
-
{
|
2260 |
-
"epoch": 1.314900153609831,
|
2261 |
-
"grad_norm": 0.40830960869789124,
|
2262 |
-
"learning_rate": 2.9568345323741004e-07,
|
2263 |
-
"loss": 2.1475,
|
2264 |
-
"step": 321
|
2265 |
-
},
|
2266 |
-
{
|
2267 |
-
"epoch": 1.3189964157706093,
|
2268 |
-
"grad_norm": 0.428190141916275,
|
2269 |
-
"learning_rate": 2.949640287769784e-07,
|
2270 |
-
"loss": 2.1596,
|
2271 |
-
"step": 322
|
2272 |
-
},
|
2273 |
-
{
|
2274 |
-
"epoch": 1.3230926779313876,
|
2275 |
-
"grad_norm": 0.421878844499588,
|
2276 |
-
"learning_rate": 2.9424460431654674e-07,
|
2277 |
-
"loss": 2.2263,
|
2278 |
-
"step": 323
|
2279 |
-
},
|
2280 |
-
{
|
2281 |
-
"epoch": 1.327188940092166,
|
2282 |
-
"grad_norm": 0.42208564281463623,
|
2283 |
-
"learning_rate": 2.935251798561151e-07,
|
2284 |
-
"loss": 2.1505,
|
2285 |
-
"step": 324
|
2286 |
-
},
|
2287 |
-
{
|
2288 |
-
"epoch": 1.3312852022529442,
|
2289 |
-
"grad_norm": 0.40618959069252014,
|
2290 |
-
"learning_rate": 2.9280575539568344e-07,
|
2291 |
-
"loss": 2.2077,
|
2292 |
-
"step": 325
|
2293 |
-
},
|
2294 |
-
{
|
2295 |
-
"epoch": 1.3353814644137225,
|
2296 |
-
"grad_norm": 0.442332923412323,
|
2297 |
-
"learning_rate": 2.920863309352518e-07,
|
2298 |
-
"loss": 2.1896,
|
2299 |
-
"step": 326
|
2300 |
-
},
|
2301 |
-
{
|
2302 |
-
"epoch": 1.3394777265745008,
|
2303 |
-
"grad_norm": 0.42952120304107666,
|
2304 |
-
"learning_rate": 2.9136690647482014e-07,
|
2305 |
-
"loss": 2.2024,
|
2306 |
-
"step": 327
|
2307 |
-
},
|
2308 |
-
{
|
2309 |
-
"epoch": 1.3435739887352791,
|
2310 |
-
"grad_norm": 0.4050465226173401,
|
2311 |
-
"learning_rate": 2.906474820143885e-07,
|
2312 |
-
"loss": 2.195,
|
2313 |
-
"step": 328
|
2314 |
-
},
|
2315 |
-
{
|
2316 |
-
"epoch": 1.3476702508960574,
|
2317 |
-
"grad_norm": 0.42554470896720886,
|
2318 |
-
"learning_rate": 2.8992805755395684e-07,
|
2319 |
-
"loss": 2.1964,
|
2320 |
-
"step": 329
|
2321 |
-
},
|
2322 |
-
{
|
2323 |
-
"epoch": 1.3517665130568357,
|
2324 |
-
"grad_norm": 0.41254305839538574,
|
2325 |
-
"learning_rate": 2.8920863309352514e-07,
|
2326 |
-
"loss": 2.0905,
|
2327 |
-
"step": 330
|
2328 |
-
},
|
2329 |
-
{
|
2330 |
-
"epoch": 1.355862775217614,
|
2331 |
-
"grad_norm": 0.41307729482650757,
|
2332 |
-
"learning_rate": 2.884892086330935e-07,
|
2333 |
-
"loss": 2.1318,
|
2334 |
-
"step": 331
|
2335 |
-
},
|
2336 |
-
{
|
2337 |
-
"epoch": 1.3599590373783923,
|
2338 |
-
"grad_norm": 0.44530072808265686,
|
2339 |
-
"learning_rate": 2.8776978417266184e-07,
|
2340 |
-
"loss": 2.2702,
|
2341 |
-
"step": 332
|
2342 |
-
},
|
2343 |
-
{
|
2344 |
-
"epoch": 1.3640552995391704,
|
2345 |
-
"grad_norm": 0.43894681334495544,
|
2346 |
-
"learning_rate": 2.870503597122302e-07,
|
2347 |
-
"loss": 2.1374,
|
2348 |
-
"step": 333
|
2349 |
-
},
|
2350 |
-
{
|
2351 |
-
"epoch": 1.368151561699949,
|
2352 |
-
"grad_norm": 0.4340452551841736,
|
2353 |
-
"learning_rate": 2.8633093525179854e-07,
|
2354 |
-
"loss": 2.1686,
|
2355 |
-
"step": 334
|
2356 |
-
},
|
2357 |
-
{
|
2358 |
-
"epoch": 1.372247823860727,
|
2359 |
-
"grad_norm": 0.4140284061431885,
|
2360 |
-
"learning_rate": 2.856115107913669e-07,
|
2361 |
-
"loss": 2.0931,
|
2362 |
-
"step": 335
|
2363 |
-
},
|
2364 |
-
{
|
2365 |
-
"epoch": 1.3763440860215055,
|
2366 |
-
"grad_norm": 0.44661757349967957,
|
2367 |
-
"learning_rate": 2.8489208633093525e-07,
|
2368 |
-
"loss": 2.2074,
|
2369 |
-
"step": 336
|
2370 |
-
},
|
2371 |
-
{
|
2372 |
-
"epoch": 1.3804403481822836,
|
2373 |
-
"grad_norm": 0.44463956356048584,
|
2374 |
-
"learning_rate": 2.841726618705036e-07,
|
2375 |
-
"loss": 2.1452,
|
2376 |
-
"step": 337
|
2377 |
-
},
|
2378 |
-
{
|
2379 |
-
"epoch": 1.384536610343062,
|
2380 |
-
"grad_norm": 0.4229423701763153,
|
2381 |
-
"learning_rate": 2.8345323741007195e-07,
|
2382 |
-
"loss": 2.146,
|
2383 |
-
"step": 338
|
2384 |
-
},
|
2385 |
-
{
|
2386 |
-
"epoch": 1.3886328725038402,
|
2387 |
-
"grad_norm": 0.4193302392959595,
|
2388 |
-
"learning_rate": 2.827338129496403e-07,
|
2389 |
-
"loss": 2.1487,
|
2390 |
-
"step": 339
|
2391 |
-
},
|
2392 |
-
{
|
2393 |
-
"epoch": 1.3927291346646187,
|
2394 |
-
"grad_norm": 0.4283891022205353,
|
2395 |
-
"learning_rate": 2.820143884892086e-07,
|
2396 |
-
"loss": 2.166,
|
2397 |
-
"step": 340
|
2398 |
-
},
|
2399 |
-
{
|
2400 |
-
"epoch": 1.3968253968253967,
|
2401 |
-
"grad_norm": 0.43441158533096313,
|
2402 |
-
"learning_rate": 2.8129496402877695e-07,
|
2403 |
-
"loss": 2.1737,
|
2404 |
-
"step": 341
|
2405 |
-
},
|
2406 |
-
{
|
2407 |
-
"epoch": 1.400921658986175,
|
2408 |
-
"grad_norm": 0.4387691020965576,
|
2409 |
-
"learning_rate": 2.805755395683453e-07,
|
2410 |
-
"loss": 2.1342,
|
2411 |
-
"step": 342
|
2412 |
-
},
|
2413 |
-
{
|
2414 |
-
"epoch": 1.4050179211469533,
|
2415 |
-
"grad_norm": 0.4267665147781372,
|
2416 |
-
"learning_rate": 2.798561151079137e-07,
|
2417 |
-
"loss": 2.207,
|
2418 |
-
"step": 343
|
2419 |
-
},
|
2420 |
-
{
|
2421 |
-
"epoch": 1.4091141833077316,
|
2422 |
-
"grad_norm": 0.436928927898407,
|
2423 |
-
"learning_rate": 2.79136690647482e-07,
|
2424 |
-
"loss": 2.223,
|
2425 |
-
"step": 344
|
2426 |
-
},
|
2427 |
-
{
|
2428 |
-
"epoch": 1.41321044546851,
|
2429 |
-
"grad_norm": 0.435537189245224,
|
2430 |
-
"learning_rate": 2.7841726618705035e-07,
|
2431 |
-
"loss": 2.1663,
|
2432 |
-
"step": 345
|
2433 |
-
},
|
2434 |
-
{
|
2435 |
-
"epoch": 1.4173067076292882,
|
2436 |
-
"grad_norm": 0.43269649147987366,
|
2437 |
-
"learning_rate": 2.776978417266187e-07,
|
2438 |
-
"loss": 2.1686,
|
2439 |
-
"step": 346
|
2440 |
-
},
|
2441 |
-
{
|
2442 |
-
"epoch": 1.4214029697900665,
|
2443 |
-
"grad_norm": 0.44341838359832764,
|
2444 |
-
"learning_rate": 2.7697841726618705e-07,
|
2445 |
-
"loss": 2.1223,
|
2446 |
-
"step": 347
|
2447 |
-
},
|
2448 |
-
{
|
2449 |
-
"epoch": 1.4254992319508448,
|
2450 |
-
"grad_norm": 0.44788920879364014,
|
2451 |
-
"learning_rate": 2.762589928057554e-07,
|
2452 |
-
"loss": 2.1691,
|
2453 |
-
"step": 348
|
2454 |
-
},
|
2455 |
-
{
|
2456 |
-
"epoch": 1.4295954941116231,
|
2457 |
-
"grad_norm": 0.41909247636795044,
|
2458 |
-
"learning_rate": 2.755395683453237e-07,
|
2459 |
-
"loss": 2.0696,
|
2460 |
-
"step": 349
|
2461 |
-
},
|
2462 |
-
{
|
2463 |
-
"epoch": 1.4336917562724014,
|
2464 |
-
"grad_norm": 0.42750445008277893,
|
2465 |
-
"learning_rate": 2.7482014388489205e-07,
|
2466 |
-
"loss": 2.1584,
|
2467 |
-
"step": 350
|
2468 |
-
},
|
2469 |
-
{
|
2470 |
-
"epoch": 1.4377880184331797,
|
2471 |
-
"grad_norm": 0.4520404636859894,
|
2472 |
-
"learning_rate": 2.741007194244604e-07,
|
2473 |
-
"loss": 2.2197,
|
2474 |
-
"step": 351
|
2475 |
-
},
|
2476 |
-
{
|
2477 |
-
"epoch": 1.441884280593958,
|
2478 |
-
"grad_norm": 0.44759395718574524,
|
2479 |
-
"learning_rate": 2.733812949640288e-07,
|
2480 |
-
"loss": 2.1204,
|
2481 |
-
"step": 352
|
2482 |
-
},
|
2483 |
-
{
|
2484 |
-
"epoch": 1.4459805427547363,
|
2485 |
-
"grad_norm": 0.41428056359291077,
|
2486 |
-
"learning_rate": 2.726618705035971e-07,
|
2487 |
-
"loss": 2.1472,
|
2488 |
-
"step": 353
|
2489 |
-
},
|
2490 |
-
{
|
2491 |
-
"epoch": 1.4500768049155146,
|
2492 |
-
"grad_norm": 0.4151472747325897,
|
2493 |
-
"learning_rate": 2.7194244604316545e-07,
|
2494 |
-
"loss": 2.1502,
|
2495 |
-
"step": 354
|
2496 |
-
},
|
2497 |
-
{
|
2498 |
-
"epoch": 1.454173067076293,
|
2499 |
-
"grad_norm": 0.4091090261936188,
|
2500 |
-
"learning_rate": 2.712230215827338e-07,
|
2501 |
-
"loss": 2.2055,
|
2502 |
-
"step": 355
|
2503 |
-
},
|
2504 |
-
{
|
2505 |
-
"epoch": 1.4582693292370712,
|
2506 |
-
"grad_norm": 0.43719011545181274,
|
2507 |
-
"learning_rate": 2.7050359712230215e-07,
|
2508 |
-
"loss": 2.1249,
|
2509 |
-
"step": 356
|
2510 |
-
},
|
2511 |
-
{
|
2512 |
-
"epoch": 1.4623655913978495,
|
2513 |
-
"grad_norm": 0.4476158916950226,
|
2514 |
-
"learning_rate": 2.697841726618705e-07,
|
2515 |
-
"loss": 2.2397,
|
2516 |
-
"step": 357
|
2517 |
-
},
|
2518 |
-
{
|
2519 |
-
"epoch": 1.4664618535586278,
|
2520 |
-
"grad_norm": 0.43802887201309204,
|
2521 |
-
"learning_rate": 2.690647482014388e-07,
|
2522 |
-
"loss": 2.0942,
|
2523 |
-
"step": 358
|
2524 |
-
},
|
2525 |
-
{
|
2526 |
-
"epoch": 1.470558115719406,
|
2527 |
-
"grad_norm": 0.4598129987716675,
|
2528 |
-
"learning_rate": 2.6834532374100715e-07,
|
2529 |
-
"loss": 2.2245,
|
2530 |
-
"step": 359
|
2531 |
-
},
|
2532 |
-
{
|
2533 |
-
"epoch": 1.4746543778801844,
|
2534 |
-
"grad_norm": 0.451800137758255,
|
2535 |
-
"learning_rate": 2.6762589928057556e-07,
|
2536 |
-
"loss": 2.2199,
|
2537 |
-
"step": 360
|
2538 |
-
},
|
2539 |
-
{
|
2540 |
-
"epoch": 1.4787506400409627,
|
2541 |
-
"grad_norm": 0.4362044334411621,
|
2542 |
-
"learning_rate": 2.669064748201439e-07,
|
2543 |
-
"loss": 2.182,
|
2544 |
-
"step": 361
|
2545 |
-
},
|
2546 |
-
{
|
2547 |
-
"epoch": 1.482846902201741,
|
2548 |
-
"grad_norm": 0.42909565567970276,
|
2549 |
-
"learning_rate": 2.661870503597122e-07,
|
2550 |
-
"loss": 2.1693,
|
2551 |
-
"step": 362
|
2552 |
-
},
|
2553 |
-
{
|
2554 |
-
"epoch": 1.4869431643625193,
|
2555 |
-
"grad_norm": 0.43067359924316406,
|
2556 |
-
"learning_rate": 2.6546762589928055e-07,
|
2557 |
-
"loss": 2.1917,
|
2558 |
-
"step": 363
|
2559 |
-
},
|
2560 |
-
{
|
2561 |
-
"epoch": 1.4910394265232976,
|
2562 |
-
"grad_norm": 0.4296717941761017,
|
2563 |
-
"learning_rate": 2.647482014388489e-07,
|
2564 |
-
"loss": 2.2075,
|
2565 |
-
"step": 364
|
2566 |
-
},
|
2567 |
-
{
|
2568 |
-
"epoch": 1.4951356886840759,
|
2569 |
-
"grad_norm": 0.426975816488266,
|
2570 |
-
"learning_rate": 2.6402877697841726e-07,
|
2571 |
-
"loss": 2.1604,
|
2572 |
-
"step": 365
|
2573 |
-
},
|
2574 |
-
{
|
2575 |
-
"epoch": 1.499231950844854,
|
2576 |
-
"grad_norm": 0.43292105197906494,
|
2577 |
-
"learning_rate": 2.633093525179856e-07,
|
2578 |
-
"loss": 2.2458,
|
2579 |
-
"step": 366
|
2580 |
-
},
|
2581 |
-
{
|
2582 |
-
"epoch": 1.5033282130056325,
|
2583 |
-
"grad_norm": 0.44505226612091064,
|
2584 |
-
"learning_rate": 2.625899280575539e-07,
|
2585 |
-
"loss": 2.1828,
|
2586 |
-
"step": 367
|
2587 |
-
},
|
2588 |
-
{
|
2589 |
-
"epoch": 1.5074244751664105,
|
2590 |
-
"grad_norm": 0.44908809661865234,
|
2591 |
-
"learning_rate": 2.618705035971223e-07,
|
2592 |
-
"loss": 2.1756,
|
2593 |
-
"step": 368
|
2594 |
-
},
|
2595 |
-
{
|
2596 |
-
"epoch": 1.511520737327189,
|
2597 |
-
"grad_norm": 0.4480590224266052,
|
2598 |
-
"learning_rate": 2.6115107913669066e-07,
|
2599 |
-
"loss": 2.1625,
|
2600 |
-
"step": 369
|
2601 |
-
},
|
2602 |
-
{
|
2603 |
-
"epoch": 1.5156169994879671,
|
2604 |
-
"grad_norm": 0.4370006024837494,
|
2605 |
-
"learning_rate": 2.60431654676259e-07,
|
2606 |
-
"loss": 2.2316,
|
2607 |
-
"step": 370
|
2608 |
-
},
|
2609 |
-
{
|
2610 |
-
"epoch": 1.5197132616487457,
|
2611 |
-
"grad_norm": 0.43972134590148926,
|
2612 |
-
"learning_rate": 2.597122302158273e-07,
|
2613 |
-
"loss": 2.1986,
|
2614 |
-
"step": 371
|
2615 |
-
},
|
2616 |
-
{
|
2617 |
-
"epoch": 1.5238095238095237,
|
2618 |
-
"grad_norm": 0.44018790125846863,
|
2619 |
-
"learning_rate": 2.5899280575539566e-07,
|
2620 |
-
"loss": 2.2074,
|
2621 |
-
"step": 372
|
2622 |
-
},
|
2623 |
-
{
|
2624 |
-
"epoch": 1.5279057859703022,
|
2625 |
-
"grad_norm": 0.43901485204696655,
|
2626 |
-
"learning_rate": 2.58273381294964e-07,
|
2627 |
-
"loss": 2.1769,
|
2628 |
-
"step": 373
|
2629 |
-
},
|
2630 |
-
{
|
2631 |
-
"epoch": 1.5320020481310803,
|
2632 |
-
"grad_norm": 0.4321320950984955,
|
2633 |
-
"learning_rate": 2.5755395683453236e-07,
|
2634 |
-
"loss": 2.214,
|
2635 |
-
"step": 374
|
2636 |
-
},
|
2637 |
-
{
|
2638 |
-
"epoch": 1.5360983102918588,
|
2639 |
-
"grad_norm": 0.4323144257068634,
|
2640 |
-
"learning_rate": 2.568345323741007e-07,
|
2641 |
-
"loss": 2.1692,
|
2642 |
-
"step": 375
|
2643 |
-
},
|
2644 |
-
{
|
2645 |
-
"epoch": 1.540194572452637,
|
2646 |
-
"grad_norm": 0.4142138361930847,
|
2647 |
-
"learning_rate": 2.56115107913669e-07,
|
2648 |
-
"loss": 2.1387,
|
2649 |
-
"step": 376
|
2650 |
-
},
|
2651 |
-
{
|
2652 |
-
"epoch": 1.5442908346134152,
|
2653 |
-
"grad_norm": 0.45704299211502075,
|
2654 |
-
"learning_rate": 2.553956834532374e-07,
|
2655 |
-
"loss": 2.2468,
|
2656 |
-
"step": 377
|
2657 |
-
},
|
2658 |
-
{
|
2659 |
-
"epoch": 1.5483870967741935,
|
2660 |
-
"grad_norm": 0.43568485975265503,
|
2661 |
-
"learning_rate": 2.5467625899280576e-07,
|
2662 |
-
"loss": 2.1166,
|
2663 |
-
"step": 378
|
2664 |
-
},
|
2665 |
-
{
|
2666 |
-
"epoch": 1.5524833589349718,
|
2667 |
-
"grad_norm": 0.4460386633872986,
|
2668 |
-
"learning_rate": 2.539568345323741e-07,
|
2669 |
-
"loss": 2.135,
|
2670 |
-
"step": 379
|
2671 |
-
},
|
2672 |
-
{
|
2673 |
-
"epoch": 1.55657962109575,
|
2674 |
-
"grad_norm": 0.42913010716438293,
|
2675 |
-
"learning_rate": 2.532374100719424e-07,
|
2676 |
-
"loss": 2.1493,
|
2677 |
-
"step": 380
|
2678 |
-
},
|
2679 |
-
{
|
2680 |
-
"epoch": 1.5606758832565284,
|
2681 |
-
"grad_norm": 0.47101569175720215,
|
2682 |
-
"learning_rate": 2.5251798561151076e-07,
|
2683 |
-
"loss": 2.1889,
|
2684 |
-
"step": 381
|
2685 |
-
},
|
2686 |
-
{
|
2687 |
-
"epoch": 1.5647721454173067,
|
2688 |
-
"grad_norm": 0.44840678572654724,
|
2689 |
-
"learning_rate": 2.517985611510791e-07,
|
2690 |
-
"loss": 2.1642,
|
2691 |
-
"step": 382
|
2692 |
-
},
|
2693 |
-
{
|
2694 |
-
"epoch": 1.568868407578085,
|
2695 |
-
"grad_norm": 0.43295708298683167,
|
2696 |
-
"learning_rate": 2.5107913669064746e-07,
|
2697 |
-
"loss": 2.1544,
|
2698 |
-
"step": 383
|
2699 |
-
},
|
2700 |
-
{
|
2701 |
-
"epoch": 1.5729646697388633,
|
2702 |
-
"grad_norm": 0.41599521040916443,
|
2703 |
-
"learning_rate": 2.503597122302158e-07,
|
2704 |
-
"loss": 2.1753,
|
2705 |
-
"step": 384
|
2706 |
-
},
|
2707 |
-
{
|
2708 |
-
"epoch": 1.5770609318996416,
|
2709 |
-
"grad_norm": 0.4268709719181061,
|
2710 |
-
"learning_rate": 2.4964028776978416e-07,
|
2711 |
-
"loss": 2.0926,
|
2712 |
-
"step": 385
|
2713 |
-
},
|
2714 |
-
{
|
2715 |
-
"epoch": 1.5811571940604199,
|
2716 |
-
"grad_norm": 0.4657405614852905,
|
2717 |
-
"learning_rate": 2.489208633093525e-07,
|
2718 |
-
"loss": 2.1476,
|
2719 |
-
"step": 386
|
2720 |
-
},
|
2721 |
-
{
|
2722 |
-
"epoch": 1.5852534562211982,
|
2723 |
-
"grad_norm": 0.4485984742641449,
|
2724 |
-
"learning_rate": 2.4820143884892087e-07,
|
2725 |
-
"loss": 2.1548,
|
2726 |
-
"step": 387
|
2727 |
-
},
|
2728 |
-
{
|
2729 |
-
"epoch": 1.5893497183819765,
|
2730 |
-
"grad_norm": 0.45854660868644714,
|
2731 |
-
"learning_rate": 2.474820143884892e-07,
|
2732 |
-
"loss": 2.2286,
|
2733 |
-
"step": 388
|
2734 |
-
},
|
2735 |
-
{
|
2736 |
-
"epoch": 1.5934459805427548,
|
2737 |
-
"grad_norm": 0.45758455991744995,
|
2738 |
-
"learning_rate": 2.4676258992805757e-07,
|
2739 |
-
"loss": 2.1227,
|
2740 |
-
"step": 389
|
2741 |
-
},
|
2742 |
-
{
|
2743 |
-
"epoch": 1.597542242703533,
|
2744 |
-
"grad_norm": 0.43784299492836,
|
2745 |
-
"learning_rate": 2.4604316546762586e-07,
|
2746 |
-
"loss": 2.1464,
|
2747 |
-
"step": 390
|
2748 |
-
},
|
2749 |
-
{
|
2750 |
-
"epoch": 1.6016385048643114,
|
2751 |
-
"grad_norm": 0.4363243877887726,
|
2752 |
-
"learning_rate": 2.4532374100719427e-07,
|
2753 |
-
"loss": 2.1742,
|
2754 |
-
"step": 391
|
2755 |
-
},
|
2756 |
-
{
|
2757 |
-
"epoch": 1.6057347670250897,
|
2758 |
-
"grad_norm": 0.43903884291648865,
|
2759 |
-
"learning_rate": 2.4460431654676257e-07,
|
2760 |
-
"loss": 2.1671,
|
2761 |
-
"step": 392
|
2762 |
-
},
|
2763 |
-
{
|
2764 |
-
"epoch": 1.6098310291858677,
|
2765 |
-
"grad_norm": 0.4562489986419678,
|
2766 |
-
"learning_rate": 2.438848920863309e-07,
|
2767 |
-
"loss": 2.1712,
|
2768 |
-
"step": 393
|
2769 |
-
},
|
2770 |
-
{
|
2771 |
-
"epoch": 1.6139272913466463,
|
2772 |
-
"grad_norm": 0.4432866871356964,
|
2773 |
-
"learning_rate": 2.4316546762589927e-07,
|
2774 |
-
"loss": 2.2507,
|
2775 |
-
"step": 394
|
2776 |
-
},
|
2777 |
-
{
|
2778 |
-
"epoch": 1.6180235535074243,
|
2779 |
-
"grad_norm": 0.43589603900909424,
|
2780 |
-
"learning_rate": 2.424460431654676e-07,
|
2781 |
-
"loss": 2.2042,
|
2782 |
-
"step": 395
|
2783 |
-
},
|
2784 |
-
{
|
2785 |
-
"epoch": 1.6221198156682028,
|
2786 |
-
"grad_norm": 0.4364205002784729,
|
2787 |
-
"learning_rate": 2.4172661870503597e-07,
|
2788 |
-
"loss": 2.1516,
|
2789 |
-
"step": 396
|
2790 |
-
},
|
2791 |
-
{
|
2792 |
-
"epoch": 1.626216077828981,
|
2793 |
-
"grad_norm": 0.46468114852905273,
|
2794 |
-
"learning_rate": 2.410071942446043e-07,
|
2795 |
-
"loss": 2.2033,
|
2796 |
-
"step": 397
|
2797 |
-
},
|
2798 |
-
{
|
2799 |
-
"epoch": 1.6303123399897594,
|
2800 |
-
"grad_norm": 0.4613543748855591,
|
2801 |
-
"learning_rate": 2.4028776978417267e-07,
|
2802 |
-
"loss": 2.1611,
|
2803 |
-
"step": 398
|
2804 |
-
},
|
2805 |
-
{
|
2806 |
-
"epoch": 1.6344086021505375,
|
2807 |
-
"grad_norm": 0.44671180844306946,
|
2808 |
-
"learning_rate": 2.3956834532374097e-07,
|
2809 |
-
"loss": 2.1348,
|
2810 |
-
"step": 399
|
2811 |
-
},
|
2812 |
-
{
|
2813 |
-
"epoch": 1.638504864311316,
|
2814 |
-
"grad_norm": 0.44752389192581177,
|
2815 |
-
"learning_rate": 2.3884892086330937e-07,
|
2816 |
-
"loss": 2.1847,
|
2817 |
-
"step": 400
|
2818 |
-
},
|
2819 |
-
{
|
2820 |
-
"epoch": 1.642601126472094,
|
2821 |
-
"grad_norm": 0.4505477249622345,
|
2822 |
-
"learning_rate": 2.381294964028777e-07,
|
2823 |
-
"loss": 2.0769,
|
2824 |
-
"step": 401
|
2825 |
-
},
|
2826 |
-
{
|
2827 |
-
"epoch": 1.6466973886328726,
|
2828 |
-
"grad_norm": 0.43532052636146545,
|
2829 |
-
"learning_rate": 2.3741007194244602e-07,
|
2830 |
-
"loss": 2.2235,
|
2831 |
-
"step": 402
|
2832 |
-
},
|
2833 |
-
{
|
2834 |
-
"epoch": 1.6507936507936507,
|
2835 |
-
"grad_norm": 0.486672043800354,
|
2836 |
-
"learning_rate": 2.3669064748201437e-07,
|
2837 |
-
"loss": 2.1498,
|
2838 |
-
"step": 403
|
2839 |
-
},
|
2840 |
-
{
|
2841 |
-
"epoch": 1.6548899129544292,
|
2842 |
-
"grad_norm": 0.4452424943447113,
|
2843 |
-
"learning_rate": 2.3597122302158272e-07,
|
2844 |
-
"loss": 2.1934,
|
2845 |
-
"step": 404
|
2846 |
-
},
|
2847 |
-
{
|
2848 |
-
"epoch": 1.6589861751152073,
|
2849 |
-
"grad_norm": 0.450989305973053,
|
2850 |
-
"learning_rate": 2.3525179856115107e-07,
|
2851 |
-
"loss": 2.2262,
|
2852 |
-
"step": 405
|
2853 |
-
},
|
2854 |
-
{
|
2855 |
-
"epoch": 1.6630824372759858,
|
2856 |
-
"grad_norm": 0.4479716420173645,
|
2857 |
-
"learning_rate": 2.3453237410071942e-07,
|
2858 |
-
"loss": 2.1605,
|
2859 |
-
"step": 406
|
2860 |
-
},
|
2861 |
-
{
|
2862 |
-
"epoch": 1.667178699436764,
|
2863 |
-
"grad_norm": 0.4486874043941498,
|
2864 |
-
"learning_rate": 2.3381294964028775e-07,
|
2865 |
-
"loss": 2.1718,
|
2866 |
-
"step": 407
|
2867 |
-
},
|
2868 |
-
{
|
2869 |
-
"epoch": 1.6712749615975424,
|
2870 |
-
"grad_norm": 0.45162245631217957,
|
2871 |
-
"learning_rate": 2.3309352517985612e-07,
|
2872 |
-
"loss": 2.1607,
|
2873 |
-
"step": 408
|
2874 |
-
},
|
2875 |
-
{
|
2876 |
-
"epoch": 1.6753712237583205,
|
2877 |
-
"grad_norm": 0.4415742754936218,
|
2878 |
-
"learning_rate": 2.3237410071942445e-07,
|
2879 |
-
"loss": 2.2471,
|
2880 |
-
"step": 409
|
2881 |
-
},
|
2882 |
-
{
|
2883 |
-
"epoch": 1.6794674859190988,
|
2884 |
-
"grad_norm": 0.43303442001342773,
|
2885 |
-
"learning_rate": 2.316546762589928e-07,
|
2886 |
-
"loss": 2.1657,
|
2887 |
-
"step": 410
|
2888 |
-
},
|
2889 |
-
{
|
2890 |
-
"epoch": 1.683563748079877,
|
2891 |
-
"grad_norm": 0.43201303482055664,
|
2892 |
-
"learning_rate": 2.3093525179856112e-07,
|
2893 |
-
"loss": 2.1187,
|
2894 |
-
"step": 411
|
2895 |
-
},
|
2896 |
-
{
|
2897 |
-
"epoch": 1.6876600102406554,
|
2898 |
-
"grad_norm": 0.44855329394340515,
|
2899 |
-
"learning_rate": 2.302158273381295e-07,
|
2900 |
-
"loss": 2.1382,
|
2901 |
-
"step": 412
|
2902 |
-
},
|
2903 |
-
{
|
2904 |
-
"epoch": 1.6917562724014337,
|
2905 |
-
"grad_norm": 0.4393642842769623,
|
2906 |
-
"learning_rate": 2.2949640287769782e-07,
|
2907 |
-
"loss": 2.2042,
|
2908 |
-
"step": 413
|
2909 |
-
},
|
2910 |
-
{
|
2911 |
-
"epoch": 1.695852534562212,
|
2912 |
-
"grad_norm": 0.4404168128967285,
|
2913 |
-
"learning_rate": 2.2877697841726618e-07,
|
2914 |
-
"loss": 2.1707,
|
2915 |
-
"step": 414
|
2916 |
-
},
|
2917 |
-
{
|
2918 |
-
"epoch": 1.6999487967229903,
|
2919 |
-
"grad_norm": 0.456390380859375,
|
2920 |
-
"learning_rate": 2.2805755395683453e-07,
|
2921 |
-
"loss": 2.1967,
|
2922 |
-
"step": 415
|
2923 |
-
},
|
2924 |
-
{
|
2925 |
-
"epoch": 1.7040450588837686,
|
2926 |
-
"grad_norm": 0.42096608877182007,
|
2927 |
-
"learning_rate": 2.2733812949640288e-07,
|
2928 |
-
"loss": 2.123,
|
2929 |
-
"step": 416
|
2930 |
-
},
|
2931 |
-
{
|
2932 |
-
"epoch": 1.7081413210445469,
|
2933 |
-
"grad_norm": 0.44943612813949585,
|
2934 |
-
"learning_rate": 2.2661870503597123e-07,
|
2935 |
-
"loss": 2.1185,
|
2936 |
-
"step": 417
|
2937 |
-
},
|
2938 |
-
{
|
2939 |
-
"epoch": 1.7122375832053252,
|
2940 |
-
"grad_norm": 0.46540147066116333,
|
2941 |
-
"learning_rate": 2.2589928057553955e-07,
|
2942 |
-
"loss": 2.1738,
|
2943 |
-
"step": 418
|
2944 |
-
},
|
2945 |
-
{
|
2946 |
-
"epoch": 1.7163338453661034,
|
2947 |
-
"grad_norm": 0.45654571056365967,
|
2948 |
-
"learning_rate": 2.251798561151079e-07,
|
2949 |
-
"loss": 2.1686,
|
2950 |
-
"step": 419
|
2951 |
-
},
|
2952 |
-
{
|
2953 |
-
"epoch": 1.7204301075268817,
|
2954 |
-
"grad_norm": 0.440854012966156,
|
2955 |
-
"learning_rate": 2.2446043165467623e-07,
|
2956 |
-
"loss": 2.1839,
|
2957 |
-
"step": 420
|
2958 |
-
},
|
2959 |
-
{
|
2960 |
-
"epoch": 1.72452636968766,
|
2961 |
-
"grad_norm": 0.4610227048397064,
|
2962 |
-
"learning_rate": 2.237410071942446e-07,
|
2963 |
-
"loss": 2.1793,
|
2964 |
-
"step": 421
|
2965 |
-
},
|
2966 |
-
{
|
2967 |
-
"epoch": 1.7286226318484383,
|
2968 |
-
"grad_norm": 0.4351898431777954,
|
2969 |
-
"learning_rate": 2.2302158273381293e-07,
|
2970 |
-
"loss": 2.1694,
|
2971 |
-
"step": 422
|
2972 |
-
},
|
2973 |
-
{
|
2974 |
-
"epoch": 1.7327188940092166,
|
2975 |
-
"grad_norm": 0.4405521750450134,
|
2976 |
-
"learning_rate": 2.2230215827338128e-07,
|
2977 |
-
"loss": 2.1842,
|
2978 |
-
"step": 423
|
2979 |
-
},
|
2980 |
-
{
|
2981 |
-
"epoch": 1.736815156169995,
|
2982 |
-
"grad_norm": 0.4544859528541565,
|
2983 |
-
"learning_rate": 2.2158273381294963e-07,
|
2984 |
-
"loss": 2.1987,
|
2985 |
-
"step": 424
|
2986 |
-
},
|
2987 |
-
{
|
2988 |
-
"epoch": 1.7409114183307732,
|
2989 |
-
"grad_norm": 0.45099693536758423,
|
2990 |
-
"learning_rate": 2.2086330935251798e-07,
|
2991 |
-
"loss": 2.1647,
|
2992 |
-
"step": 425
|
2993 |
-
},
|
2994 |
-
{
|
2995 |
-
"epoch": 1.7450076804915513,
|
2996 |
-
"grad_norm": 0.44613611698150635,
|
2997 |
-
"learning_rate": 2.2014388489208633e-07,
|
2998 |
-
"loss": 2.1923,
|
2999 |
-
"step": 426
|
3000 |
-
},
|
3001 |
-
{
|
3002 |
-
"epoch": 1.7491039426523298,
|
3003 |
-
"grad_norm": 0.4412600100040436,
|
3004 |
-
"learning_rate": 2.1942446043165465e-07,
|
3005 |
-
"loss": 2.1434,
|
3006 |
-
"step": 427
|
3007 |
-
},
|
3008 |
-
{
|
3009 |
-
"epoch": 1.753200204813108,
|
3010 |
-
"grad_norm": 0.43077391386032104,
|
3011 |
-
"learning_rate": 2.18705035971223e-07,
|
3012 |
-
"loss": 2.1382,
|
3013 |
-
"step": 428
|
3014 |
-
},
|
3015 |
-
{
|
3016 |
-
"epoch": 1.7572964669738864,
|
3017 |
-
"grad_norm": 0.4740639626979828,
|
3018 |
-
"learning_rate": 2.1798561151079136e-07,
|
3019 |
-
"loss": 2.1449,
|
3020 |
-
"step": 429
|
3021 |
-
},
|
3022 |
-
{
|
3023 |
-
"epoch": 1.7613927291346645,
|
3024 |
-
"grad_norm": 0.4652875065803528,
|
3025 |
-
"learning_rate": 2.172661870503597e-07,
|
3026 |
-
"loss": 2.1219,
|
3027 |
-
"step": 430
|
3028 |
-
},
|
3029 |
-
{
|
3030 |
-
"epoch": 1.765488991295443,
|
3031 |
-
"grad_norm": 0.4421563744544983,
|
3032 |
-
"learning_rate": 2.1654676258992806e-07,
|
3033 |
-
"loss": 2.1368,
|
3034 |
-
"step": 431
|
3035 |
-
},
|
3036 |
-
{
|
3037 |
-
"epoch": 1.769585253456221,
|
3038 |
-
"grad_norm": 0.4524243175983429,
|
3039 |
-
"learning_rate": 2.1582733812949638e-07,
|
3040 |
-
"loss": 2.1554,
|
3041 |
-
"step": 432
|
3042 |
-
},
|
3043 |
-
{
|
3044 |
-
"epoch": 1.7736815156169996,
|
3045 |
-
"grad_norm": 0.46135076880455017,
|
3046 |
-
"learning_rate": 2.1510791366906476e-07,
|
3047 |
-
"loss": 2.1913,
|
3048 |
-
"step": 433
|
3049 |
-
},
|
3050 |
-
{
|
3051 |
-
"epoch": 1.7777777777777777,
|
3052 |
-
"grad_norm": 0.43038061261177063,
|
3053 |
-
"learning_rate": 2.1438848920863308e-07,
|
3054 |
-
"loss": 2.1957,
|
3055 |
-
"step": 434
|
3056 |
-
},
|
3057 |
-
{
|
3058 |
-
"epoch": 1.7818740399385562,
|
3059 |
-
"grad_norm": 0.43895038962364197,
|
3060 |
-
"learning_rate": 2.1366906474820143e-07,
|
3061 |
-
"loss": 2.0823,
|
3062 |
-
"step": 435
|
3063 |
-
},
|
3064 |
-
{
|
3065 |
-
"epoch": 1.7859703020993343,
|
3066 |
-
"grad_norm": 0.4570868909358978,
|
3067 |
-
"learning_rate": 2.1294964028776976e-07,
|
3068 |
-
"loss": 2.1822,
|
3069 |
-
"step": 436
|
3070 |
-
},
|
3071 |
-
{
|
3072 |
-
"epoch": 1.7900665642601128,
|
3073 |
-
"grad_norm": 0.42809584736824036,
|
3074 |
-
"learning_rate": 2.1223021582733814e-07,
|
3075 |
-
"loss": 2.1772,
|
3076 |
-
"step": 437
|
3077 |
-
},
|
3078 |
-
{
|
3079 |
-
"epoch": 1.7941628264208909,
|
3080 |
-
"grad_norm": 0.45852047204971313,
|
3081 |
-
"learning_rate": 2.1151079136690646e-07,
|
3082 |
-
"loss": 2.1472,
|
3083 |
-
"step": 438
|
3084 |
-
},
|
3085 |
-
{
|
3086 |
-
"epoch": 1.7982590885816694,
|
3087 |
-
"grad_norm": 0.4554458260536194,
|
3088 |
-
"learning_rate": 2.107913669064748e-07,
|
3089 |
-
"loss": 2.1843,
|
3090 |
-
"step": 439
|
3091 |
-
},
|
3092 |
-
{
|
3093 |
-
"epoch": 1.8023553507424475,
|
3094 |
-
"grad_norm": 0.4390040934085846,
|
3095 |
-
"learning_rate": 2.1007194244604316e-07,
|
3096 |
-
"loss": 2.1083,
|
3097 |
-
"step": 440
|
3098 |
-
},
|
3099 |
-
{
|
3100 |
-
"epoch": 1.8064516129032258,
|
3101 |
-
"grad_norm": 0.4527462124824524,
|
3102 |
-
"learning_rate": 2.0935251798561148e-07,
|
3103 |
-
"loss": 2.0939,
|
3104 |
-
"step": 441
|
3105 |
-
},
|
3106 |
-
{
|
3107 |
-
"epoch": 1.810547875064004,
|
3108 |
-
"grad_norm": 0.4538320004940033,
|
3109 |
-
"learning_rate": 2.0863309352517986e-07,
|
3110 |
-
"loss": 2.1896,
|
3111 |
-
"step": 442
|
3112 |
-
},
|
3113 |
-
{
|
3114 |
-
"epoch": 1.8146441372247823,
|
3115 |
-
"grad_norm": 0.44015762209892273,
|
3116 |
-
"learning_rate": 2.0791366906474819e-07,
|
3117 |
-
"loss": 2.1689,
|
3118 |
-
"step": 443
|
3119 |
-
},
|
3120 |
-
{
|
3121 |
-
"epoch": 1.8187403993855606,
|
3122 |
-
"grad_norm": 0.45545652508735657,
|
3123 |
-
"learning_rate": 2.0719424460431654e-07,
|
3124 |
-
"loss": 2.2726,
|
3125 |
-
"step": 444
|
3126 |
-
},
|
3127 |
-
{
|
3128 |
-
"epoch": 1.822836661546339,
|
3129 |
-
"grad_norm": 0.4651506245136261,
|
3130 |
-
"learning_rate": 2.0647482014388486e-07,
|
3131 |
-
"loss": 2.1771,
|
3132 |
-
"step": 445
|
3133 |
-
},
|
3134 |
-
{
|
3135 |
-
"epoch": 1.8269329237071172,
|
3136 |
-
"grad_norm": 0.4575079083442688,
|
3137 |
-
"learning_rate": 2.0575539568345324e-07,
|
3138 |
-
"loss": 2.2199,
|
3139 |
-
"step": 446
|
3140 |
-
},
|
3141 |
-
{
|
3142 |
-
"epoch": 1.8310291858678955,
|
3143 |
-
"grad_norm": 0.4568464457988739,
|
3144 |
-
"learning_rate": 2.0503597122302156e-07,
|
3145 |
-
"loss": 2.1224,
|
3146 |
-
"step": 447
|
3147 |
-
},
|
3148 |
-
{
|
3149 |
-
"epoch": 1.8351254480286738,
|
3150 |
-
"grad_norm": 0.4724454879760742,
|
3151 |
-
"learning_rate": 2.0431654676258991e-07,
|
3152 |
-
"loss": 2.1746,
|
3153 |
-
"step": 448
|
3154 |
-
},
|
3155 |
-
{
|
3156 |
-
"epoch": 1.8392217101894521,
|
3157 |
-
"grad_norm": 0.455714613199234,
|
3158 |
-
"learning_rate": 2.0359712230215826e-07,
|
3159 |
-
"loss": 2.1329,
|
3160 |
-
"step": 449
|
3161 |
-
},
|
3162 |
-
{
|
3163 |
-
"epoch": 1.8433179723502304,
|
3164 |
-
"grad_norm": 0.4729180634021759,
|
3165 |
-
"learning_rate": 2.0287769784172661e-07,
|
3166 |
-
"loss": 2.2226,
|
3167 |
-
"step": 450
|
3168 |
-
},
|
3169 |
-
{
|
3170 |
-
"epoch": 1.8474142345110087,
|
3171 |
-
"grad_norm": 0.4588530957698822,
|
3172 |
-
"learning_rate": 2.0215827338129497e-07,
|
3173 |
-
"loss": 2.1673,
|
3174 |
-
"step": 451
|
3175 |
-
},
|
3176 |
-
{
|
3177 |
-
"epoch": 1.851510496671787,
|
3178 |
-
"grad_norm": 0.4554830491542816,
|
3179 |
-
"learning_rate": 2.014388489208633e-07,
|
3180 |
-
"loss": 2.1832,
|
3181 |
-
"step": 452
|
3182 |
-
},
|
3183 |
-
{
|
3184 |
-
"epoch": 1.8556067588325653,
|
3185 |
-
"grad_norm": 0.4510416090488434,
|
3186 |
-
"learning_rate": 2.0071942446043164e-07,
|
3187 |
-
"loss": 2.2874,
|
3188 |
-
"step": 453
|
3189 |
-
},
|
3190 |
-
{
|
3191 |
-
"epoch": 1.8597030209933436,
|
3192 |
-
"grad_norm": 0.45428797602653503,
|
3193 |
-
"learning_rate": 2e-07,
|
3194 |
-
"loss": 2.1852,
|
3195 |
-
"step": 454
|
3196 |
-
},
|
3197 |
-
{
|
3198 |
-
"epoch": 1.863799283154122,
|
3199 |
-
"grad_norm": 0.46545976400375366,
|
3200 |
-
"learning_rate": 1.9928057553956834e-07,
|
3201 |
-
"loss": 2.1722,
|
3202 |
-
"step": 455
|
3203 |
-
},
|
3204 |
-
{
|
3205 |
-
"epoch": 1.8678955453149002,
|
3206 |
-
"grad_norm": 0.42648664116859436,
|
3207 |
-
"learning_rate": 1.985611510791367e-07,
|
3208 |
-
"loss": 2.2262,
|
3209 |
-
"step": 456
|
3210 |
-
},
|
3211 |
-
{
|
3212 |
-
"epoch": 1.8719918074756783,
|
3213 |
-
"grad_norm": 0.45800670981407166,
|
3214 |
-
"learning_rate": 1.9784172661870502e-07,
|
3215 |
-
"loss": 2.1138,
|
3216 |
-
"step": 457
|
3217 |
-
},
|
3218 |
-
{
|
3219 |
-
"epoch": 1.8760880696364568,
|
3220 |
-
"grad_norm": 0.4593866765499115,
|
3221 |
-
"learning_rate": 1.971223021582734e-07,
|
3222 |
-
"loss": 2.2131,
|
3223 |
-
"step": 458
|
3224 |
-
},
|
3225 |
-
{
|
3226 |
-
"epoch": 1.8801843317972349,
|
3227 |
-
"grad_norm": 0.4596412181854248,
|
3228 |
-
"learning_rate": 1.9640287769784172e-07,
|
3229 |
-
"loss": 2.1818,
|
3230 |
-
"step": 459
|
3231 |
-
},
|
3232 |
-
{
|
3233 |
-
"epoch": 1.8842805939580134,
|
3234 |
-
"grad_norm": 0.46211445331573486,
|
3235 |
-
"learning_rate": 1.9568345323741007e-07,
|
3236 |
-
"loss": 2.1449,
|
3237 |
-
"step": 460
|
3238 |
-
},
|
3239 |
-
{
|
3240 |
-
"epoch": 1.8883768561187915,
|
3241 |
-
"grad_norm": 0.47017550468444824,
|
3242 |
-
"learning_rate": 1.949640287769784e-07,
|
3243 |
-
"loss": 2.252,
|
3244 |
-
"step": 461
|
3245 |
-
},
|
3246 |
-
{
|
3247 |
-
"epoch": 1.89247311827957,
|
3248 |
-
"grad_norm": 0.4683350622653961,
|
3249 |
-
"learning_rate": 1.9424460431654677e-07,
|
3250 |
-
"loss": 2.2191,
|
3251 |
-
"step": 462
|
3252 |
-
},
|
3253 |
-
{
|
3254 |
-
"epoch": 1.896569380440348,
|
3255 |
-
"grad_norm": 0.4557268023490906,
|
3256 |
-
"learning_rate": 1.935251798561151e-07,
|
3257 |
-
"loss": 2.2006,
|
3258 |
-
"step": 463
|
3259 |
-
},
|
3260 |
-
{
|
3261 |
-
"epoch": 1.9006656426011266,
|
3262 |
-
"grad_norm": 0.46528077125549316,
|
3263 |
-
"learning_rate": 1.9280575539568344e-07,
|
3264 |
-
"loss": 2.1315,
|
3265 |
-
"step": 464
|
3266 |
-
},
|
3267 |
-
{
|
3268 |
-
"epoch": 1.9047619047619047,
|
3269 |
-
"grad_norm": 0.4454066753387451,
|
3270 |
-
"learning_rate": 1.920863309352518e-07,
|
3271 |
-
"loss": 2.1661,
|
3272 |
-
"step": 465
|
3273 |
-
},
|
3274 |
-
{
|
3275 |
-
"epoch": 1.9088581669226832,
|
3276 |
-
"grad_norm": 0.43077215552330017,
|
3277 |
-
"learning_rate": 1.9136690647482012e-07,
|
3278 |
-
"loss": 2.1052,
|
3279 |
-
"step": 466
|
3280 |
-
},
|
3281 |
-
{
|
3282 |
-
"epoch": 1.9129544290834612,
|
3283 |
-
"grad_norm": 0.4632050693035126,
|
3284 |
-
"learning_rate": 1.906474820143885e-07,
|
3285 |
-
"loss": 2.216,
|
3286 |
-
"step": 467
|
3287 |
-
},
|
3288 |
-
{
|
3289 |
-
"epoch": 1.9170506912442398,
|
3290 |
-
"grad_norm": 0.4669223725795746,
|
3291 |
-
"learning_rate": 1.8992805755395682e-07,
|
3292 |
-
"loss": 2.1975,
|
3293 |
-
"step": 468
|
3294 |
-
},
|
3295 |
-
{
|
3296 |
-
"epoch": 1.9211469534050178,
|
3297 |
-
"grad_norm": 0.4701545834541321,
|
3298 |
-
"learning_rate": 1.8920863309352517e-07,
|
3299 |
-
"loss": 2.2016,
|
3300 |
-
"step": 469
|
3301 |
-
},
|
3302 |
-
{
|
3303 |
-
"epoch": 1.9252432155657964,
|
3304 |
-
"grad_norm": 0.4669329524040222,
|
3305 |
-
"learning_rate": 1.884892086330935e-07,
|
3306 |
-
"loss": 2.2006,
|
3307 |
-
"step": 470
|
3308 |
-
},
|
3309 |
-
{
|
3310 |
-
"epoch": 1.9293394777265744,
|
3311 |
-
"grad_norm": 0.4742164611816406,
|
3312 |
-
"learning_rate": 1.8776978417266187e-07,
|
3313 |
-
"loss": 2.1532,
|
3314 |
-
"step": 471
|
3315 |
-
},
|
3316 |
-
{
|
3317 |
-
"epoch": 1.933435739887353,
|
3318 |
-
"grad_norm": 0.4579870104789734,
|
3319 |
-
"learning_rate": 1.870503597122302e-07,
|
3320 |
-
"loss": 2.1853,
|
3321 |
-
"step": 472
|
3322 |
-
},
|
3323 |
-
{
|
3324 |
-
"epoch": 1.937532002048131,
|
3325 |
-
"grad_norm": 0.44689640402793884,
|
3326 |
-
"learning_rate": 1.8633093525179855e-07,
|
3327 |
-
"loss": 2.1818,
|
3328 |
-
"step": 473
|
3329 |
-
},
|
3330 |
-
{
|
3331 |
-
"epoch": 1.9416282642089093,
|
3332 |
-
"grad_norm": 0.4678220748901367,
|
3333 |
-
"learning_rate": 1.856115107913669e-07,
|
3334 |
-
"loss": 2.1414,
|
3335 |
-
"step": 474
|
3336 |
-
},
|
3337 |
-
{
|
3338 |
-
"epoch": 1.9457245263696876,
|
3339 |
-
"grad_norm": 0.47106945514678955,
|
3340 |
-
"learning_rate": 1.8489208633093525e-07,
|
3341 |
-
"loss": 2.1421,
|
3342 |
-
"step": 475
|
3343 |
-
},
|
3344 |
-
{
|
3345 |
-
"epoch": 1.949820788530466,
|
3346 |
-
"grad_norm": 0.44147756695747375,
|
3347 |
-
"learning_rate": 1.841726618705036e-07,
|
3348 |
-
"loss": 2.1849,
|
3349 |
-
"step": 476
|
3350 |
-
},
|
3351 |
-
{
|
3352 |
-
"epoch": 1.9539170506912442,
|
3353 |
-
"grad_norm": 0.4621652662754059,
|
3354 |
-
"learning_rate": 1.8345323741007192e-07,
|
3355 |
-
"loss": 2.2051,
|
3356 |
-
"step": 477
|
3357 |
-
},
|
3358 |
-
{
|
3359 |
-
"epoch": 1.9580133128520225,
|
3360 |
-
"grad_norm": 0.4599338173866272,
|
3361 |
-
"learning_rate": 1.8273381294964028e-07,
|
3362 |
-
"loss": 2.1471,
|
3363 |
-
"step": 478
|
3364 |
-
},
|
3365 |
-
{
|
3366 |
-
"epoch": 1.9621095750128008,
|
3367 |
-
"grad_norm": 0.46919938921928406,
|
3368 |
-
"learning_rate": 1.8201438848920863e-07,
|
3369 |
-
"loss": 2.2248,
|
3370 |
-
"step": 479
|
3371 |
-
},
|
3372 |
-
{
|
3373 |
-
"epoch": 1.966205837173579,
|
3374 |
-
"grad_norm": 0.4454819858074188,
|
3375 |
-
"learning_rate": 1.8129496402877698e-07,
|
3376 |
-
"loss": 2.2012,
|
3377 |
-
"step": 480
|
3378 |
-
},
|
3379 |
-
{
|
3380 |
-
"epoch": 1.9703020993343574,
|
3381 |
-
"grad_norm": 0.45672351121902466,
|
3382 |
-
"learning_rate": 1.8057553956834533e-07,
|
3383 |
-
"loss": 2.2469,
|
3384 |
-
"step": 481
|
3385 |
-
},
|
3386 |
-
{
|
3387 |
-
"epoch": 1.9743983614951357,
|
3388 |
-
"grad_norm": 0.461247980594635,
|
3389 |
-
"learning_rate": 1.7985611510791365e-07,
|
3390 |
-
"loss": 2.1643,
|
3391 |
-
"step": 482
|
3392 |
-
},
|
3393 |
-
{
|
3394 |
-
"epoch": 1.978494623655914,
|
3395 |
-
"grad_norm": 0.46405044198036194,
|
3396 |
-
"learning_rate": 1.7913669064748203e-07,
|
3397 |
-
"loss": 2.114,
|
3398 |
-
"step": 483
|
3399 |
-
},
|
3400 |
-
{
|
3401 |
-
"epoch": 1.9825908858166923,
|
3402 |
-
"grad_norm": 0.47663643956184387,
|
3403 |
-
"learning_rate": 1.7841726618705035e-07,
|
3404 |
-
"loss": 2.1491,
|
3405 |
-
"step": 484
|
3406 |
-
},
|
3407 |
-
{
|
3408 |
-
"epoch": 1.9866871479774706,
|
3409 |
-
"grad_norm": 0.4617835581302643,
|
3410 |
-
"learning_rate": 1.776978417266187e-07,
|
3411 |
-
"loss": 2.1637,
|
3412 |
-
"step": 485
|
3413 |
-
},
|
3414 |
-
{
|
3415 |
-
"epoch": 1.9907834101382489,
|
3416 |
-
"grad_norm": 0.4443981349468231,
|
3417 |
-
"learning_rate": 1.7697841726618703e-07,
|
3418 |
-
"loss": 2.0994,
|
3419 |
-
"step": 486
|
3420 |
-
},
|
3421 |
-
{
|
3422 |
-
"epoch": 1.9948796722990272,
|
3423 |
-
"grad_norm": 0.46476656198501587,
|
3424 |
-
"learning_rate": 1.7625899280575538e-07,
|
3425 |
-
"loss": 2.1024,
|
3426 |
-
"step": 487
|
3427 |
-
},
|
3428 |
-
{
|
3429 |
-
"epoch": 1.9989759344598055,
|
3430 |
-
"grad_norm": 0.4705009460449219,
|
3431 |
-
"learning_rate": 1.7553956834532373e-07,
|
3432 |
-
"loss": 2.1419,
|
3433 |
-
"step": 488
|
3434 |
-
},
|
3435 |
-
{
|
3436 |
-
"epoch": 1.9989759344598055,
|
3437 |
-
"eval_loss": 2.1822314262390137,
|
3438 |
-
"eval_runtime": 281.8918,
|
3439 |
-
"eval_samples_per_second": 1.057,
|
3440 |
-
"eval_steps_per_second": 0.266,
|
3441 |
-
"step": 488
|
3442 |
}
|
3443 |
],
|
3444 |
"logging_steps": 1,
|
@@ -3458,7 +1742,7 @@
|
|
3458 |
"attributes": {}
|
3459 |
}
|
3460 |
},
|
3461 |
-
"total_flos":
|
3462 |
"train_batch_size": 2,
|
3463 |
"trial_name": null,
|
3464 |
"trial_params": null
|
|
|
1 |
{
|
2 |
+
"best_metric": 2.192843198776245,
|
3 |
+
"best_model_checkpoint": "/home/sunggeunan/data/ICL/outputs/lora/SKIML-ICL_mrqa_nq_v3/Meta-Llama-3-8B-Instruct-unanswerable-0Q-0U-0C-qa_first/checkpoint-244",
|
4 |
+
"epoch": 0.9994879672299027,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 244,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
1723 |
"eval_samples_per_second": 1.06,
|
1724 |
"eval_steps_per_second": 0.267,
|
1725 |
"step": 244
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1726 |
}
|
1727 |
],
|
1728 |
"logging_steps": 1,
|
|
|
1742 |
"attributes": {}
|
1743 |
}
|
1744 |
},
|
1745 |
+
"total_flos": 7.207537930288497e+17,
|
1746 |
"train_batch_size": 2,
|
1747 |
"trial_name": null,
|
1748 |
"trial_params": null
|