hf-100 commited on
Commit
6933b63
·
verified ·
1 Parent(s): 00d7b00

Upload folder using huggingface_hub

Browse files
Files changed (4) hide show
  1. adapter_model.safetensors +1 -1
  2. optimizer.pt +2 -2
  3. scheduler.pt +1 -1
  4. trainer_state.json +4 -536
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:afbc6e043d0ab7befad92a4106f25dc191ef8714ecd6a6257e278b655eecfe64
3
  size 3313653480
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:91c399c7fab17d481c5bbf1dfbd90dbeebecebbd329a17a2dde1aa51acda686a
3
  size 3313653480
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a7e689976e4312255cb93f037fa286a639a87e0cc535b9f73a7563fe1d87e3d3
3
- size 1661302932
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0fb50d16fc9c5c8e728744bd029ccecdc61a224d890aaf064309981ad5194cb9
3
+ size 1661301780
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b94353295745dabde6ca38201fb34d47c6e72a69c913f73ae1c8ca60f3423acf
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b6ef9ad0d92f6fffee2bdaedbc1e0b68b977b45a2ed7ec889f6406883a665cf
3
  size 1064
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.996383363471971,
5
  "eval_steps": 1000,
6
- "global_step": 276,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1407,538 +1407,6 @@
1407
  "learning_rate": 2.8044280442804427e-05,
1408
  "loss": 0.3215,
1409
  "step": 200
1410
- },
1411
- {
1412
- "epoch": 1.453887884267631,
1413
- "grad_norm": 0.13975730538368225,
1414
- "learning_rate": 2.767527675276753e-05,
1415
- "loss": 0.3254,
1416
- "step": 201
1417
- },
1418
- {
1419
- "epoch": 1.461121157323689,
1420
- "grad_norm": 0.12640978395938873,
1421
- "learning_rate": 2.730627306273063e-05,
1422
- "loss": 0.3525,
1423
- "step": 202
1424
- },
1425
- {
1426
- "epoch": 1.4683544303797469,
1427
- "grad_norm": 0.11131294071674347,
1428
- "learning_rate": 2.693726937269373e-05,
1429
- "loss": 0.3132,
1430
- "step": 203
1431
- },
1432
- {
1433
- "epoch": 1.4755877034358047,
1434
- "grad_norm": 0.12206707894802094,
1435
- "learning_rate": 2.6568265682656828e-05,
1436
- "loss": 0.3561,
1437
- "step": 204
1438
- },
1439
- {
1440
- "epoch": 1.4828209764918625,
1441
- "grad_norm": 0.11679227650165558,
1442
- "learning_rate": 2.619926199261993e-05,
1443
- "loss": 0.3393,
1444
- "step": 205
1445
- },
1446
- {
1447
- "epoch": 1.4900542495479203,
1448
- "grad_norm": 0.12166301161050797,
1449
- "learning_rate": 2.5830258302583026e-05,
1450
- "loss": 0.3436,
1451
- "step": 206
1452
- },
1453
- {
1454
- "epoch": 1.4972875226039783,
1455
- "grad_norm": 0.13859513401985168,
1456
- "learning_rate": 2.5461254612546127e-05,
1457
- "loss": 0.3722,
1458
- "step": 207
1459
- },
1460
- {
1461
- "epoch": 1.5045207956600362,
1462
- "grad_norm": 0.11228498816490173,
1463
- "learning_rate": 2.5092250922509224e-05,
1464
- "loss": 0.3189,
1465
- "step": 208
1466
- },
1467
- {
1468
- "epoch": 1.511754068716094,
1469
- "grad_norm": 0.11623143404722214,
1470
- "learning_rate": 2.472324723247233e-05,
1471
- "loss": 0.3731,
1472
- "step": 209
1473
- },
1474
- {
1475
- "epoch": 1.518987341772152,
1476
- "grad_norm": 0.1316087245941162,
1477
- "learning_rate": 2.4354243542435426e-05,
1478
- "loss": 0.3364,
1479
- "step": 210
1480
- },
1481
- {
1482
- "epoch": 1.5262206148282098,
1483
- "grad_norm": 0.12064289301633835,
1484
- "learning_rate": 2.3985239852398524e-05,
1485
- "loss": 0.3511,
1486
- "step": 211
1487
- },
1488
- {
1489
- "epoch": 1.5334538878842676,
1490
- "grad_norm": 0.14924070239067078,
1491
- "learning_rate": 2.3616236162361624e-05,
1492
- "loss": 0.3313,
1493
- "step": 212
1494
- },
1495
- {
1496
- "epoch": 1.5406871609403257,
1497
- "grad_norm": 0.10872308164834976,
1498
- "learning_rate": 2.3247232472324722e-05,
1499
- "loss": 0.3186,
1500
- "step": 213
1501
- },
1502
- {
1503
- "epoch": 1.5479204339963832,
1504
- "grad_norm": 0.12435383349657059,
1505
- "learning_rate": 2.2878228782287826e-05,
1506
- "loss": 0.3382,
1507
- "step": 214
1508
- },
1509
- {
1510
- "epoch": 1.5551537070524413,
1511
- "grad_norm": 0.12237284332513809,
1512
- "learning_rate": 2.2509225092250924e-05,
1513
- "loss": 0.3427,
1514
- "step": 215
1515
- },
1516
- {
1517
- "epoch": 1.562386980108499,
1518
- "grad_norm": 0.1082320362329483,
1519
- "learning_rate": 2.2140221402214025e-05,
1520
- "loss": 0.3141,
1521
- "step": 216
1522
- },
1523
- {
1524
- "epoch": 1.5696202531645569,
1525
- "grad_norm": 0.12488240003585815,
1526
- "learning_rate": 2.1771217712177122e-05,
1527
- "loss": 0.3062,
1528
- "step": 217
1529
- },
1530
- {
1531
- "epoch": 1.576853526220615,
1532
- "grad_norm": 0.1263773888349533,
1533
- "learning_rate": 2.140221402214022e-05,
1534
- "loss": 0.3477,
1535
- "step": 218
1536
- },
1537
- {
1538
- "epoch": 1.5840867992766727,
1539
- "grad_norm": 0.11632055044174194,
1540
- "learning_rate": 2.1033210332103324e-05,
1541
- "loss": 0.3558,
1542
- "step": 219
1543
- },
1544
- {
1545
- "epoch": 1.5913200723327305,
1546
- "grad_norm": 0.13615989685058594,
1547
- "learning_rate": 2.066420664206642e-05,
1548
- "loss": 0.3806,
1549
- "step": 220
1550
- },
1551
- {
1552
- "epoch": 1.5985533453887886,
1553
- "grad_norm": 0.17589685320854187,
1554
- "learning_rate": 2.0295202952029522e-05,
1555
- "loss": 0.3327,
1556
- "step": 221
1557
- },
1558
- {
1559
- "epoch": 1.6057866184448462,
1560
- "grad_norm": 0.1255197674036026,
1561
- "learning_rate": 1.992619926199262e-05,
1562
- "loss": 0.3582,
1563
- "step": 222
1564
- },
1565
- {
1566
- "epoch": 1.6130198915009042,
1567
- "grad_norm": 0.29970669746398926,
1568
- "learning_rate": 1.955719557195572e-05,
1569
- "loss": 0.3587,
1570
- "step": 223
1571
- },
1572
- {
1573
- "epoch": 1.620253164556962,
1574
- "grad_norm": 0.12951691448688507,
1575
- "learning_rate": 1.918819188191882e-05,
1576
- "loss": 0.3527,
1577
- "step": 224
1578
- },
1579
- {
1580
- "epoch": 1.6274864376130198,
1581
- "grad_norm": 0.2785731256008148,
1582
- "learning_rate": 1.881918819188192e-05,
1583
- "loss": 0.338,
1584
- "step": 225
1585
- },
1586
- {
1587
- "epoch": 1.6347197106690778,
1588
- "grad_norm": 0.12442605197429657,
1589
- "learning_rate": 1.845018450184502e-05,
1590
- "loss": 0.3606,
1591
- "step": 226
1592
- },
1593
- {
1594
- "epoch": 1.6419529837251357,
1595
- "grad_norm": 0.12413132935762405,
1596
- "learning_rate": 1.8081180811808117e-05,
1597
- "loss": 0.3164,
1598
- "step": 227
1599
- },
1600
- {
1601
- "epoch": 1.6491862567811935,
1602
- "grad_norm": 0.1774081587791443,
1603
- "learning_rate": 1.771217712177122e-05,
1604
- "loss": 0.3408,
1605
- "step": 228
1606
- },
1607
- {
1608
- "epoch": 1.6564195298372515,
1609
- "grad_norm": 0.12615852057933807,
1610
- "learning_rate": 1.734317343173432e-05,
1611
- "loss": 0.3433,
1612
- "step": 229
1613
- },
1614
- {
1615
- "epoch": 1.663652802893309,
1616
- "grad_norm": 0.1367713063955307,
1617
- "learning_rate": 1.6974169741697417e-05,
1618
- "loss": 0.3642,
1619
- "step": 230
1620
- },
1621
- {
1622
- "epoch": 1.6708860759493671,
1623
- "grad_norm": 0.12680459022521973,
1624
- "learning_rate": 1.6605166051660518e-05,
1625
- "loss": 0.3828,
1626
- "step": 231
1627
- },
1628
- {
1629
- "epoch": 1.678119349005425,
1630
- "grad_norm": 0.12927737832069397,
1631
- "learning_rate": 1.6236162361623615e-05,
1632
- "loss": 0.3253,
1633
- "step": 232
1634
- },
1635
- {
1636
- "epoch": 1.6853526220614827,
1637
- "grad_norm": 0.11796507984399796,
1638
- "learning_rate": 1.5867158671586716e-05,
1639
- "loss": 0.3763,
1640
- "step": 233
1641
- },
1642
- {
1643
- "epoch": 1.6925858951175408,
1644
- "grad_norm": 0.12181632965803146,
1645
- "learning_rate": 1.5498154981549817e-05,
1646
- "loss": 0.3311,
1647
- "step": 234
1648
- },
1649
- {
1650
- "epoch": 1.6998191681735986,
1651
- "grad_norm": 0.11845839768648148,
1652
- "learning_rate": 1.5129151291512916e-05,
1653
- "loss": 0.3718,
1654
- "step": 235
1655
- },
1656
- {
1657
- "epoch": 1.7070524412296564,
1658
- "grad_norm": 0.11736506223678589,
1659
- "learning_rate": 1.4760147601476015e-05,
1660
- "loss": 0.3225,
1661
- "step": 236
1662
- },
1663
- {
1664
- "epoch": 1.7142857142857144,
1665
- "grad_norm": 0.12600649893283844,
1666
- "learning_rate": 1.4391143911439114e-05,
1667
- "loss": 0.3309,
1668
- "step": 237
1669
- },
1670
- {
1671
- "epoch": 1.721518987341772,
1672
- "grad_norm": 0.12421372532844543,
1673
- "learning_rate": 1.4022140221402214e-05,
1674
- "loss": 0.3516,
1675
- "step": 238
1676
- },
1677
- {
1678
- "epoch": 1.72875226039783,
1679
- "grad_norm": 0.1250220090150833,
1680
- "learning_rate": 1.3653136531365315e-05,
1681
- "loss": 0.3634,
1682
- "step": 239
1683
- },
1684
- {
1685
- "epoch": 1.7359855334538878,
1686
- "grad_norm": 0.12365727126598358,
1687
- "learning_rate": 1.3284132841328414e-05,
1688
- "loss": 0.3827,
1689
- "step": 240
1690
- },
1691
- {
1692
- "epoch": 1.7432188065099457,
1693
- "grad_norm": 0.12409546226263046,
1694
- "learning_rate": 1.2915129151291513e-05,
1695
- "loss": 0.3443,
1696
- "step": 241
1697
- },
1698
- {
1699
- "epoch": 1.7504520795660037,
1700
- "grad_norm": 0.1293025016784668,
1701
- "learning_rate": 1.2546125461254612e-05,
1702
- "loss": 0.3284,
1703
- "step": 242
1704
- },
1705
- {
1706
- "epoch": 1.7576853526220615,
1707
- "grad_norm": 0.12537458539009094,
1708
- "learning_rate": 1.2177121771217713e-05,
1709
- "loss": 0.3196,
1710
- "step": 243
1711
- },
1712
- {
1713
- "epoch": 1.7649186256781193,
1714
- "grad_norm": 0.13035526871681213,
1715
- "learning_rate": 1.1808118081180812e-05,
1716
- "loss": 0.3114,
1717
- "step": 244
1718
- },
1719
- {
1720
- "epoch": 1.7721518987341773,
1721
- "grad_norm": 0.15101519227027893,
1722
- "learning_rate": 1.1439114391143913e-05,
1723
- "loss": 0.3607,
1724
- "step": 245
1725
- },
1726
- {
1727
- "epoch": 1.779385171790235,
1728
- "grad_norm": 0.12607994675636292,
1729
- "learning_rate": 1.1070110701107012e-05,
1730
- "loss": 0.3202,
1731
- "step": 246
1732
- },
1733
- {
1734
- "epoch": 1.786618444846293,
1735
- "grad_norm": 0.12627242505550385,
1736
- "learning_rate": 1.070110701107011e-05,
1737
- "loss": 0.3394,
1738
- "step": 247
1739
- },
1740
- {
1741
- "epoch": 1.7938517179023508,
1742
- "grad_norm": 0.12351588159799576,
1743
- "learning_rate": 1.033210332103321e-05,
1744
- "loss": 0.3222,
1745
- "step": 248
1746
- },
1747
- {
1748
- "epoch": 1.8010849909584086,
1749
- "grad_norm": 0.12709592282772064,
1750
- "learning_rate": 9.96309963099631e-06,
1751
- "loss": 0.3392,
1752
- "step": 249
1753
- },
1754
- {
1755
- "epoch": 1.8083182640144666,
1756
- "grad_norm": 0.20409362018108368,
1757
- "learning_rate": 9.59409594095941e-06,
1758
- "loss": 0.3541,
1759
- "step": 250
1760
- },
1761
- {
1762
- "epoch": 1.8155515370705244,
1763
- "grad_norm": 0.13211952149868011,
1764
- "learning_rate": 9.22509225092251e-06,
1765
- "loss": 0.3798,
1766
- "step": 251
1767
- },
1768
- {
1769
- "epoch": 1.8227848101265822,
1770
- "grad_norm": 0.1471939980983734,
1771
- "learning_rate": 8.85608856088561e-06,
1772
- "loss": 0.3716,
1773
- "step": 252
1774
- },
1775
- {
1776
- "epoch": 1.8300180831826403,
1777
- "grad_norm": 0.12940147519111633,
1778
- "learning_rate": 8.487084870848708e-06,
1779
- "loss": 0.3324,
1780
- "step": 253
1781
- },
1782
- {
1783
- "epoch": 1.837251356238698,
1784
- "grad_norm": 0.1352042704820633,
1785
- "learning_rate": 8.118081180811808e-06,
1786
- "loss": 0.357,
1787
- "step": 254
1788
- },
1789
- {
1790
- "epoch": 1.8444846292947559,
1791
- "grad_norm": 0.12222684174776077,
1792
- "learning_rate": 7.749077490774908e-06,
1793
- "loss": 0.3262,
1794
- "step": 255
1795
- },
1796
- {
1797
- "epoch": 1.851717902350814,
1798
- "grad_norm": 0.12854433059692383,
1799
- "learning_rate": 7.380073800738008e-06,
1800
- "loss": 0.3452,
1801
- "step": 256
1802
- },
1803
- {
1804
- "epoch": 1.8589511754068715,
1805
- "grad_norm": 0.1557794213294983,
1806
- "learning_rate": 7.011070110701107e-06,
1807
- "loss": 0.3443,
1808
- "step": 257
1809
- },
1810
- {
1811
- "epoch": 1.8661844484629295,
1812
- "grad_norm": 0.12235873192548752,
1813
- "learning_rate": 6.642066420664207e-06,
1814
- "loss": 0.3185,
1815
- "step": 258
1816
- },
1817
- {
1818
- "epoch": 1.8734177215189873,
1819
- "grad_norm": 0.12504766881465912,
1820
- "learning_rate": 6.273062730627306e-06,
1821
- "loss": 0.356,
1822
- "step": 259
1823
- },
1824
- {
1825
- "epoch": 1.8806509945750451,
1826
- "grad_norm": 0.1318463236093521,
1827
- "learning_rate": 5.904059040590406e-06,
1828
- "loss": 0.3276,
1829
- "step": 260
1830
- },
1831
- {
1832
- "epoch": 1.8878842676311032,
1833
- "grad_norm": 0.12830232083797455,
1834
- "learning_rate": 5.535055350553506e-06,
1835
- "loss": 0.3242,
1836
- "step": 261
1837
- },
1838
- {
1839
- "epoch": 1.895117540687161,
1840
- "grad_norm": 0.12111414223909378,
1841
- "learning_rate": 5.166051660516605e-06,
1842
- "loss": 0.3703,
1843
- "step": 262
1844
- },
1845
- {
1846
- "epoch": 1.9023508137432188,
1847
- "grad_norm": 0.12544532120227814,
1848
- "learning_rate": 4.797047970479705e-06,
1849
- "loss": 0.3375,
1850
- "step": 263
1851
- },
1852
- {
1853
- "epoch": 1.9095840867992768,
1854
- "grad_norm": 0.12667147815227509,
1855
- "learning_rate": 4.428044280442805e-06,
1856
- "loss": 0.326,
1857
- "step": 264
1858
- },
1859
- {
1860
- "epoch": 1.9168173598553344,
1861
- "grad_norm": 0.11932243406772614,
1862
- "learning_rate": 4.059040590405904e-06,
1863
- "loss": 0.372,
1864
- "step": 265
1865
- },
1866
- {
1867
- "epoch": 1.9240506329113924,
1868
- "grad_norm": 0.12806957960128784,
1869
- "learning_rate": 3.690036900369004e-06,
1870
- "loss": 0.3406,
1871
- "step": 266
1872
- },
1873
- {
1874
- "epoch": 1.9312839059674503,
1875
- "grad_norm": 0.11929921805858612,
1876
- "learning_rate": 3.3210332103321034e-06,
1877
- "loss": 0.3481,
1878
- "step": 267
1879
- },
1880
- {
1881
- "epoch": 1.938517179023508,
1882
- "grad_norm": 0.12515687942504883,
1883
- "learning_rate": 2.952029520295203e-06,
1884
- "loss": 0.345,
1885
- "step": 268
1886
- },
1887
- {
1888
- "epoch": 1.945750452079566,
1889
- "grad_norm": 0.11791153252124786,
1890
- "learning_rate": 2.5830258302583027e-06,
1891
- "loss": 0.3297,
1892
- "step": 269
1893
- },
1894
- {
1895
- "epoch": 1.952983725135624,
1896
- "grad_norm": 0.13056673109531403,
1897
- "learning_rate": 2.2140221402214023e-06,
1898
- "loss": 0.3939,
1899
- "step": 270
1900
- },
1901
- {
1902
- "epoch": 1.9602169981916817,
1903
- "grad_norm": 0.13385014235973358,
1904
- "learning_rate": 1.845018450184502e-06,
1905
- "loss": 0.3902,
1906
- "step": 271
1907
- },
1908
- {
1909
- "epoch": 1.9674502712477397,
1910
- "grad_norm": 0.1214594915509224,
1911
- "learning_rate": 1.4760147601476015e-06,
1912
- "loss": 0.3336,
1913
- "step": 272
1914
- },
1915
- {
1916
- "epoch": 1.9746835443037973,
1917
- "grad_norm": 0.1306677609682083,
1918
- "learning_rate": 1.1070110701107011e-06,
1919
- "loss": 0.3614,
1920
- "step": 273
1921
- },
1922
- {
1923
- "epoch": 1.9819168173598554,
1924
- "grad_norm": 0.12312816828489304,
1925
- "learning_rate": 7.380073800738008e-07,
1926
- "loss": 0.3337,
1927
- "step": 274
1928
- },
1929
- {
1930
- "epoch": 1.9891500904159132,
1931
- "grad_norm": 0.11654796451330185,
1932
- "learning_rate": 3.690036900369004e-07,
1933
- "loss": 0.3406,
1934
- "step": 275
1935
- },
1936
- {
1937
- "epoch": 1.996383363471971,
1938
- "grad_norm": 0.12927745282649994,
1939
- "learning_rate": 0.0,
1940
- "loss": 0.3392,
1941
- "step": 276
1942
  }
1943
  ],
1944
  "logging_steps": 1,
@@ -1953,12 +1421,12 @@
1953
  "should_evaluate": false,
1954
  "should_log": false,
1955
  "should_save": true,
1956
- "should_training_stop": true
1957
  },
1958
  "attributes": {}
1959
  }
1960
  },
1961
- "total_flos": 1.1916352503043277e+19,
1962
  "train_batch_size": 4,
1963
  "trial_name": null,
1964
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.4466546112115732,
5
  "eval_steps": 1000,
6
+ "global_step": 200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1407
  "learning_rate": 2.8044280442804427e-05,
1408
  "loss": 0.3215,
1409
  "step": 200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1410
  }
1411
  ],
1412
  "logging_steps": 1,
 
1421
  "should_evaluate": false,
1422
  "should_log": false,
1423
  "should_save": true,
1424
+ "should_training_stop": false
1425
  },
1426
  "attributes": {}
1427
  }
1428
  },
1429
+ "total_flos": 8.61836422398301e+18,
1430
  "train_batch_size": 4,
1431
  "trial_name": null,
1432
  "trial_params": null