Upload folder using huggingface_hub
Browse files- adapter_model.safetensors +1 -1
- optimizer.pt +2 -2
- scheduler.pt +1 -1
- trainer_state.json +4 -536
adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3313653480
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:91c399c7fab17d481c5bbf1dfbd90dbeebecebbd329a17a2dde1aa51acda686a
|
3 |
size 3313653480
|
optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0fb50d16fc9c5c8e728744bd029ccecdc61a224d890aaf064309981ad5194cb9
|
3 |
+
size 1661301780
|
scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3b6ef9ad0d92f6fffee2bdaedbc1e0b68b977b45a2ed7ec889f6406883a665cf
|
3 |
size 1064
|
trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 1.
|
5 |
"eval_steps": 1000,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -1407,538 +1407,6 @@
|
|
1407 |
"learning_rate": 2.8044280442804427e-05,
|
1408 |
"loss": 0.3215,
|
1409 |
"step": 200
|
1410 |
-
},
|
1411 |
-
{
|
1412 |
-
"epoch": 1.453887884267631,
|
1413 |
-
"grad_norm": 0.13975730538368225,
|
1414 |
-
"learning_rate": 2.767527675276753e-05,
|
1415 |
-
"loss": 0.3254,
|
1416 |
-
"step": 201
|
1417 |
-
},
|
1418 |
-
{
|
1419 |
-
"epoch": 1.461121157323689,
|
1420 |
-
"grad_norm": 0.12640978395938873,
|
1421 |
-
"learning_rate": 2.730627306273063e-05,
|
1422 |
-
"loss": 0.3525,
|
1423 |
-
"step": 202
|
1424 |
-
},
|
1425 |
-
{
|
1426 |
-
"epoch": 1.4683544303797469,
|
1427 |
-
"grad_norm": 0.11131294071674347,
|
1428 |
-
"learning_rate": 2.693726937269373e-05,
|
1429 |
-
"loss": 0.3132,
|
1430 |
-
"step": 203
|
1431 |
-
},
|
1432 |
-
{
|
1433 |
-
"epoch": 1.4755877034358047,
|
1434 |
-
"grad_norm": 0.12206707894802094,
|
1435 |
-
"learning_rate": 2.6568265682656828e-05,
|
1436 |
-
"loss": 0.3561,
|
1437 |
-
"step": 204
|
1438 |
-
},
|
1439 |
-
{
|
1440 |
-
"epoch": 1.4828209764918625,
|
1441 |
-
"grad_norm": 0.11679227650165558,
|
1442 |
-
"learning_rate": 2.619926199261993e-05,
|
1443 |
-
"loss": 0.3393,
|
1444 |
-
"step": 205
|
1445 |
-
},
|
1446 |
-
{
|
1447 |
-
"epoch": 1.4900542495479203,
|
1448 |
-
"grad_norm": 0.12166301161050797,
|
1449 |
-
"learning_rate": 2.5830258302583026e-05,
|
1450 |
-
"loss": 0.3436,
|
1451 |
-
"step": 206
|
1452 |
-
},
|
1453 |
-
{
|
1454 |
-
"epoch": 1.4972875226039783,
|
1455 |
-
"grad_norm": 0.13859513401985168,
|
1456 |
-
"learning_rate": 2.5461254612546127e-05,
|
1457 |
-
"loss": 0.3722,
|
1458 |
-
"step": 207
|
1459 |
-
},
|
1460 |
-
{
|
1461 |
-
"epoch": 1.5045207956600362,
|
1462 |
-
"grad_norm": 0.11228498816490173,
|
1463 |
-
"learning_rate": 2.5092250922509224e-05,
|
1464 |
-
"loss": 0.3189,
|
1465 |
-
"step": 208
|
1466 |
-
},
|
1467 |
-
{
|
1468 |
-
"epoch": 1.511754068716094,
|
1469 |
-
"grad_norm": 0.11623143404722214,
|
1470 |
-
"learning_rate": 2.472324723247233e-05,
|
1471 |
-
"loss": 0.3731,
|
1472 |
-
"step": 209
|
1473 |
-
},
|
1474 |
-
{
|
1475 |
-
"epoch": 1.518987341772152,
|
1476 |
-
"grad_norm": 0.1316087245941162,
|
1477 |
-
"learning_rate": 2.4354243542435426e-05,
|
1478 |
-
"loss": 0.3364,
|
1479 |
-
"step": 210
|
1480 |
-
},
|
1481 |
-
{
|
1482 |
-
"epoch": 1.5262206148282098,
|
1483 |
-
"grad_norm": 0.12064289301633835,
|
1484 |
-
"learning_rate": 2.3985239852398524e-05,
|
1485 |
-
"loss": 0.3511,
|
1486 |
-
"step": 211
|
1487 |
-
},
|
1488 |
-
{
|
1489 |
-
"epoch": 1.5334538878842676,
|
1490 |
-
"grad_norm": 0.14924070239067078,
|
1491 |
-
"learning_rate": 2.3616236162361624e-05,
|
1492 |
-
"loss": 0.3313,
|
1493 |
-
"step": 212
|
1494 |
-
},
|
1495 |
-
{
|
1496 |
-
"epoch": 1.5406871609403257,
|
1497 |
-
"grad_norm": 0.10872308164834976,
|
1498 |
-
"learning_rate": 2.3247232472324722e-05,
|
1499 |
-
"loss": 0.3186,
|
1500 |
-
"step": 213
|
1501 |
-
},
|
1502 |
-
{
|
1503 |
-
"epoch": 1.5479204339963832,
|
1504 |
-
"grad_norm": 0.12435383349657059,
|
1505 |
-
"learning_rate": 2.2878228782287826e-05,
|
1506 |
-
"loss": 0.3382,
|
1507 |
-
"step": 214
|
1508 |
-
},
|
1509 |
-
{
|
1510 |
-
"epoch": 1.5551537070524413,
|
1511 |
-
"grad_norm": 0.12237284332513809,
|
1512 |
-
"learning_rate": 2.2509225092250924e-05,
|
1513 |
-
"loss": 0.3427,
|
1514 |
-
"step": 215
|
1515 |
-
},
|
1516 |
-
{
|
1517 |
-
"epoch": 1.562386980108499,
|
1518 |
-
"grad_norm": 0.1082320362329483,
|
1519 |
-
"learning_rate": 2.2140221402214025e-05,
|
1520 |
-
"loss": 0.3141,
|
1521 |
-
"step": 216
|
1522 |
-
},
|
1523 |
-
{
|
1524 |
-
"epoch": 1.5696202531645569,
|
1525 |
-
"grad_norm": 0.12488240003585815,
|
1526 |
-
"learning_rate": 2.1771217712177122e-05,
|
1527 |
-
"loss": 0.3062,
|
1528 |
-
"step": 217
|
1529 |
-
},
|
1530 |
-
{
|
1531 |
-
"epoch": 1.576853526220615,
|
1532 |
-
"grad_norm": 0.1263773888349533,
|
1533 |
-
"learning_rate": 2.140221402214022e-05,
|
1534 |
-
"loss": 0.3477,
|
1535 |
-
"step": 218
|
1536 |
-
},
|
1537 |
-
{
|
1538 |
-
"epoch": 1.5840867992766727,
|
1539 |
-
"grad_norm": 0.11632055044174194,
|
1540 |
-
"learning_rate": 2.1033210332103324e-05,
|
1541 |
-
"loss": 0.3558,
|
1542 |
-
"step": 219
|
1543 |
-
},
|
1544 |
-
{
|
1545 |
-
"epoch": 1.5913200723327305,
|
1546 |
-
"grad_norm": 0.13615989685058594,
|
1547 |
-
"learning_rate": 2.066420664206642e-05,
|
1548 |
-
"loss": 0.3806,
|
1549 |
-
"step": 220
|
1550 |
-
},
|
1551 |
-
{
|
1552 |
-
"epoch": 1.5985533453887886,
|
1553 |
-
"grad_norm": 0.17589685320854187,
|
1554 |
-
"learning_rate": 2.0295202952029522e-05,
|
1555 |
-
"loss": 0.3327,
|
1556 |
-
"step": 221
|
1557 |
-
},
|
1558 |
-
{
|
1559 |
-
"epoch": 1.6057866184448462,
|
1560 |
-
"grad_norm": 0.1255197674036026,
|
1561 |
-
"learning_rate": 1.992619926199262e-05,
|
1562 |
-
"loss": 0.3582,
|
1563 |
-
"step": 222
|
1564 |
-
},
|
1565 |
-
{
|
1566 |
-
"epoch": 1.6130198915009042,
|
1567 |
-
"grad_norm": 0.29970669746398926,
|
1568 |
-
"learning_rate": 1.955719557195572e-05,
|
1569 |
-
"loss": 0.3587,
|
1570 |
-
"step": 223
|
1571 |
-
},
|
1572 |
-
{
|
1573 |
-
"epoch": 1.620253164556962,
|
1574 |
-
"grad_norm": 0.12951691448688507,
|
1575 |
-
"learning_rate": 1.918819188191882e-05,
|
1576 |
-
"loss": 0.3527,
|
1577 |
-
"step": 224
|
1578 |
-
},
|
1579 |
-
{
|
1580 |
-
"epoch": 1.6274864376130198,
|
1581 |
-
"grad_norm": 0.2785731256008148,
|
1582 |
-
"learning_rate": 1.881918819188192e-05,
|
1583 |
-
"loss": 0.338,
|
1584 |
-
"step": 225
|
1585 |
-
},
|
1586 |
-
{
|
1587 |
-
"epoch": 1.6347197106690778,
|
1588 |
-
"grad_norm": 0.12442605197429657,
|
1589 |
-
"learning_rate": 1.845018450184502e-05,
|
1590 |
-
"loss": 0.3606,
|
1591 |
-
"step": 226
|
1592 |
-
},
|
1593 |
-
{
|
1594 |
-
"epoch": 1.6419529837251357,
|
1595 |
-
"grad_norm": 0.12413132935762405,
|
1596 |
-
"learning_rate": 1.8081180811808117e-05,
|
1597 |
-
"loss": 0.3164,
|
1598 |
-
"step": 227
|
1599 |
-
},
|
1600 |
-
{
|
1601 |
-
"epoch": 1.6491862567811935,
|
1602 |
-
"grad_norm": 0.1774081587791443,
|
1603 |
-
"learning_rate": 1.771217712177122e-05,
|
1604 |
-
"loss": 0.3408,
|
1605 |
-
"step": 228
|
1606 |
-
},
|
1607 |
-
{
|
1608 |
-
"epoch": 1.6564195298372515,
|
1609 |
-
"grad_norm": 0.12615852057933807,
|
1610 |
-
"learning_rate": 1.734317343173432e-05,
|
1611 |
-
"loss": 0.3433,
|
1612 |
-
"step": 229
|
1613 |
-
},
|
1614 |
-
{
|
1615 |
-
"epoch": 1.663652802893309,
|
1616 |
-
"grad_norm": 0.1367713063955307,
|
1617 |
-
"learning_rate": 1.6974169741697417e-05,
|
1618 |
-
"loss": 0.3642,
|
1619 |
-
"step": 230
|
1620 |
-
},
|
1621 |
-
{
|
1622 |
-
"epoch": 1.6708860759493671,
|
1623 |
-
"grad_norm": 0.12680459022521973,
|
1624 |
-
"learning_rate": 1.6605166051660518e-05,
|
1625 |
-
"loss": 0.3828,
|
1626 |
-
"step": 231
|
1627 |
-
},
|
1628 |
-
{
|
1629 |
-
"epoch": 1.678119349005425,
|
1630 |
-
"grad_norm": 0.12927737832069397,
|
1631 |
-
"learning_rate": 1.6236162361623615e-05,
|
1632 |
-
"loss": 0.3253,
|
1633 |
-
"step": 232
|
1634 |
-
},
|
1635 |
-
{
|
1636 |
-
"epoch": 1.6853526220614827,
|
1637 |
-
"grad_norm": 0.11796507984399796,
|
1638 |
-
"learning_rate": 1.5867158671586716e-05,
|
1639 |
-
"loss": 0.3763,
|
1640 |
-
"step": 233
|
1641 |
-
},
|
1642 |
-
{
|
1643 |
-
"epoch": 1.6925858951175408,
|
1644 |
-
"grad_norm": 0.12181632965803146,
|
1645 |
-
"learning_rate": 1.5498154981549817e-05,
|
1646 |
-
"loss": 0.3311,
|
1647 |
-
"step": 234
|
1648 |
-
},
|
1649 |
-
{
|
1650 |
-
"epoch": 1.6998191681735986,
|
1651 |
-
"grad_norm": 0.11845839768648148,
|
1652 |
-
"learning_rate": 1.5129151291512916e-05,
|
1653 |
-
"loss": 0.3718,
|
1654 |
-
"step": 235
|
1655 |
-
},
|
1656 |
-
{
|
1657 |
-
"epoch": 1.7070524412296564,
|
1658 |
-
"grad_norm": 0.11736506223678589,
|
1659 |
-
"learning_rate": 1.4760147601476015e-05,
|
1660 |
-
"loss": 0.3225,
|
1661 |
-
"step": 236
|
1662 |
-
},
|
1663 |
-
{
|
1664 |
-
"epoch": 1.7142857142857144,
|
1665 |
-
"grad_norm": 0.12600649893283844,
|
1666 |
-
"learning_rate": 1.4391143911439114e-05,
|
1667 |
-
"loss": 0.3309,
|
1668 |
-
"step": 237
|
1669 |
-
},
|
1670 |
-
{
|
1671 |
-
"epoch": 1.721518987341772,
|
1672 |
-
"grad_norm": 0.12421372532844543,
|
1673 |
-
"learning_rate": 1.4022140221402214e-05,
|
1674 |
-
"loss": 0.3516,
|
1675 |
-
"step": 238
|
1676 |
-
},
|
1677 |
-
{
|
1678 |
-
"epoch": 1.72875226039783,
|
1679 |
-
"grad_norm": 0.1250220090150833,
|
1680 |
-
"learning_rate": 1.3653136531365315e-05,
|
1681 |
-
"loss": 0.3634,
|
1682 |
-
"step": 239
|
1683 |
-
},
|
1684 |
-
{
|
1685 |
-
"epoch": 1.7359855334538878,
|
1686 |
-
"grad_norm": 0.12365727126598358,
|
1687 |
-
"learning_rate": 1.3284132841328414e-05,
|
1688 |
-
"loss": 0.3827,
|
1689 |
-
"step": 240
|
1690 |
-
},
|
1691 |
-
{
|
1692 |
-
"epoch": 1.7432188065099457,
|
1693 |
-
"grad_norm": 0.12409546226263046,
|
1694 |
-
"learning_rate": 1.2915129151291513e-05,
|
1695 |
-
"loss": 0.3443,
|
1696 |
-
"step": 241
|
1697 |
-
},
|
1698 |
-
{
|
1699 |
-
"epoch": 1.7504520795660037,
|
1700 |
-
"grad_norm": 0.1293025016784668,
|
1701 |
-
"learning_rate": 1.2546125461254612e-05,
|
1702 |
-
"loss": 0.3284,
|
1703 |
-
"step": 242
|
1704 |
-
},
|
1705 |
-
{
|
1706 |
-
"epoch": 1.7576853526220615,
|
1707 |
-
"grad_norm": 0.12537458539009094,
|
1708 |
-
"learning_rate": 1.2177121771217713e-05,
|
1709 |
-
"loss": 0.3196,
|
1710 |
-
"step": 243
|
1711 |
-
},
|
1712 |
-
{
|
1713 |
-
"epoch": 1.7649186256781193,
|
1714 |
-
"grad_norm": 0.13035526871681213,
|
1715 |
-
"learning_rate": 1.1808118081180812e-05,
|
1716 |
-
"loss": 0.3114,
|
1717 |
-
"step": 244
|
1718 |
-
},
|
1719 |
-
{
|
1720 |
-
"epoch": 1.7721518987341773,
|
1721 |
-
"grad_norm": 0.15101519227027893,
|
1722 |
-
"learning_rate": 1.1439114391143913e-05,
|
1723 |
-
"loss": 0.3607,
|
1724 |
-
"step": 245
|
1725 |
-
},
|
1726 |
-
{
|
1727 |
-
"epoch": 1.779385171790235,
|
1728 |
-
"grad_norm": 0.12607994675636292,
|
1729 |
-
"learning_rate": 1.1070110701107012e-05,
|
1730 |
-
"loss": 0.3202,
|
1731 |
-
"step": 246
|
1732 |
-
},
|
1733 |
-
{
|
1734 |
-
"epoch": 1.786618444846293,
|
1735 |
-
"grad_norm": 0.12627242505550385,
|
1736 |
-
"learning_rate": 1.070110701107011e-05,
|
1737 |
-
"loss": 0.3394,
|
1738 |
-
"step": 247
|
1739 |
-
},
|
1740 |
-
{
|
1741 |
-
"epoch": 1.7938517179023508,
|
1742 |
-
"grad_norm": 0.12351588159799576,
|
1743 |
-
"learning_rate": 1.033210332103321e-05,
|
1744 |
-
"loss": 0.3222,
|
1745 |
-
"step": 248
|
1746 |
-
},
|
1747 |
-
{
|
1748 |
-
"epoch": 1.8010849909584086,
|
1749 |
-
"grad_norm": 0.12709592282772064,
|
1750 |
-
"learning_rate": 9.96309963099631e-06,
|
1751 |
-
"loss": 0.3392,
|
1752 |
-
"step": 249
|
1753 |
-
},
|
1754 |
-
{
|
1755 |
-
"epoch": 1.8083182640144666,
|
1756 |
-
"grad_norm": 0.20409362018108368,
|
1757 |
-
"learning_rate": 9.59409594095941e-06,
|
1758 |
-
"loss": 0.3541,
|
1759 |
-
"step": 250
|
1760 |
-
},
|
1761 |
-
{
|
1762 |
-
"epoch": 1.8155515370705244,
|
1763 |
-
"grad_norm": 0.13211952149868011,
|
1764 |
-
"learning_rate": 9.22509225092251e-06,
|
1765 |
-
"loss": 0.3798,
|
1766 |
-
"step": 251
|
1767 |
-
},
|
1768 |
-
{
|
1769 |
-
"epoch": 1.8227848101265822,
|
1770 |
-
"grad_norm": 0.1471939980983734,
|
1771 |
-
"learning_rate": 8.85608856088561e-06,
|
1772 |
-
"loss": 0.3716,
|
1773 |
-
"step": 252
|
1774 |
-
},
|
1775 |
-
{
|
1776 |
-
"epoch": 1.8300180831826403,
|
1777 |
-
"grad_norm": 0.12940147519111633,
|
1778 |
-
"learning_rate": 8.487084870848708e-06,
|
1779 |
-
"loss": 0.3324,
|
1780 |
-
"step": 253
|
1781 |
-
},
|
1782 |
-
{
|
1783 |
-
"epoch": 1.837251356238698,
|
1784 |
-
"grad_norm": 0.1352042704820633,
|
1785 |
-
"learning_rate": 8.118081180811808e-06,
|
1786 |
-
"loss": 0.357,
|
1787 |
-
"step": 254
|
1788 |
-
},
|
1789 |
-
{
|
1790 |
-
"epoch": 1.8444846292947559,
|
1791 |
-
"grad_norm": 0.12222684174776077,
|
1792 |
-
"learning_rate": 7.749077490774908e-06,
|
1793 |
-
"loss": 0.3262,
|
1794 |
-
"step": 255
|
1795 |
-
},
|
1796 |
-
{
|
1797 |
-
"epoch": 1.851717902350814,
|
1798 |
-
"grad_norm": 0.12854433059692383,
|
1799 |
-
"learning_rate": 7.380073800738008e-06,
|
1800 |
-
"loss": 0.3452,
|
1801 |
-
"step": 256
|
1802 |
-
},
|
1803 |
-
{
|
1804 |
-
"epoch": 1.8589511754068715,
|
1805 |
-
"grad_norm": 0.1557794213294983,
|
1806 |
-
"learning_rate": 7.011070110701107e-06,
|
1807 |
-
"loss": 0.3443,
|
1808 |
-
"step": 257
|
1809 |
-
},
|
1810 |
-
{
|
1811 |
-
"epoch": 1.8661844484629295,
|
1812 |
-
"grad_norm": 0.12235873192548752,
|
1813 |
-
"learning_rate": 6.642066420664207e-06,
|
1814 |
-
"loss": 0.3185,
|
1815 |
-
"step": 258
|
1816 |
-
},
|
1817 |
-
{
|
1818 |
-
"epoch": 1.8734177215189873,
|
1819 |
-
"grad_norm": 0.12504766881465912,
|
1820 |
-
"learning_rate": 6.273062730627306e-06,
|
1821 |
-
"loss": 0.356,
|
1822 |
-
"step": 259
|
1823 |
-
},
|
1824 |
-
{
|
1825 |
-
"epoch": 1.8806509945750451,
|
1826 |
-
"grad_norm": 0.1318463236093521,
|
1827 |
-
"learning_rate": 5.904059040590406e-06,
|
1828 |
-
"loss": 0.3276,
|
1829 |
-
"step": 260
|
1830 |
-
},
|
1831 |
-
{
|
1832 |
-
"epoch": 1.8878842676311032,
|
1833 |
-
"grad_norm": 0.12830232083797455,
|
1834 |
-
"learning_rate": 5.535055350553506e-06,
|
1835 |
-
"loss": 0.3242,
|
1836 |
-
"step": 261
|
1837 |
-
},
|
1838 |
-
{
|
1839 |
-
"epoch": 1.895117540687161,
|
1840 |
-
"grad_norm": 0.12111414223909378,
|
1841 |
-
"learning_rate": 5.166051660516605e-06,
|
1842 |
-
"loss": 0.3703,
|
1843 |
-
"step": 262
|
1844 |
-
},
|
1845 |
-
{
|
1846 |
-
"epoch": 1.9023508137432188,
|
1847 |
-
"grad_norm": 0.12544532120227814,
|
1848 |
-
"learning_rate": 4.797047970479705e-06,
|
1849 |
-
"loss": 0.3375,
|
1850 |
-
"step": 263
|
1851 |
-
},
|
1852 |
-
{
|
1853 |
-
"epoch": 1.9095840867992768,
|
1854 |
-
"grad_norm": 0.12667147815227509,
|
1855 |
-
"learning_rate": 4.428044280442805e-06,
|
1856 |
-
"loss": 0.326,
|
1857 |
-
"step": 264
|
1858 |
-
},
|
1859 |
-
{
|
1860 |
-
"epoch": 1.9168173598553344,
|
1861 |
-
"grad_norm": 0.11932243406772614,
|
1862 |
-
"learning_rate": 4.059040590405904e-06,
|
1863 |
-
"loss": 0.372,
|
1864 |
-
"step": 265
|
1865 |
-
},
|
1866 |
-
{
|
1867 |
-
"epoch": 1.9240506329113924,
|
1868 |
-
"grad_norm": 0.12806957960128784,
|
1869 |
-
"learning_rate": 3.690036900369004e-06,
|
1870 |
-
"loss": 0.3406,
|
1871 |
-
"step": 266
|
1872 |
-
},
|
1873 |
-
{
|
1874 |
-
"epoch": 1.9312839059674503,
|
1875 |
-
"grad_norm": 0.11929921805858612,
|
1876 |
-
"learning_rate": 3.3210332103321034e-06,
|
1877 |
-
"loss": 0.3481,
|
1878 |
-
"step": 267
|
1879 |
-
},
|
1880 |
-
{
|
1881 |
-
"epoch": 1.938517179023508,
|
1882 |
-
"grad_norm": 0.12515687942504883,
|
1883 |
-
"learning_rate": 2.952029520295203e-06,
|
1884 |
-
"loss": 0.345,
|
1885 |
-
"step": 268
|
1886 |
-
},
|
1887 |
-
{
|
1888 |
-
"epoch": 1.945750452079566,
|
1889 |
-
"grad_norm": 0.11791153252124786,
|
1890 |
-
"learning_rate": 2.5830258302583027e-06,
|
1891 |
-
"loss": 0.3297,
|
1892 |
-
"step": 269
|
1893 |
-
},
|
1894 |
-
{
|
1895 |
-
"epoch": 1.952983725135624,
|
1896 |
-
"grad_norm": 0.13056673109531403,
|
1897 |
-
"learning_rate": 2.2140221402214023e-06,
|
1898 |
-
"loss": 0.3939,
|
1899 |
-
"step": 270
|
1900 |
-
},
|
1901 |
-
{
|
1902 |
-
"epoch": 1.9602169981916817,
|
1903 |
-
"grad_norm": 0.13385014235973358,
|
1904 |
-
"learning_rate": 1.845018450184502e-06,
|
1905 |
-
"loss": 0.3902,
|
1906 |
-
"step": 271
|
1907 |
-
},
|
1908 |
-
{
|
1909 |
-
"epoch": 1.9674502712477397,
|
1910 |
-
"grad_norm": 0.1214594915509224,
|
1911 |
-
"learning_rate": 1.4760147601476015e-06,
|
1912 |
-
"loss": 0.3336,
|
1913 |
-
"step": 272
|
1914 |
-
},
|
1915 |
-
{
|
1916 |
-
"epoch": 1.9746835443037973,
|
1917 |
-
"grad_norm": 0.1306677609682083,
|
1918 |
-
"learning_rate": 1.1070110701107011e-06,
|
1919 |
-
"loss": 0.3614,
|
1920 |
-
"step": 273
|
1921 |
-
},
|
1922 |
-
{
|
1923 |
-
"epoch": 1.9819168173598554,
|
1924 |
-
"grad_norm": 0.12312816828489304,
|
1925 |
-
"learning_rate": 7.380073800738008e-07,
|
1926 |
-
"loss": 0.3337,
|
1927 |
-
"step": 274
|
1928 |
-
},
|
1929 |
-
{
|
1930 |
-
"epoch": 1.9891500904159132,
|
1931 |
-
"grad_norm": 0.11654796451330185,
|
1932 |
-
"learning_rate": 3.690036900369004e-07,
|
1933 |
-
"loss": 0.3406,
|
1934 |
-
"step": 275
|
1935 |
-
},
|
1936 |
-
{
|
1937 |
-
"epoch": 1.996383363471971,
|
1938 |
-
"grad_norm": 0.12927745282649994,
|
1939 |
-
"learning_rate": 0.0,
|
1940 |
-
"loss": 0.3392,
|
1941 |
-
"step": 276
|
1942 |
}
|
1943 |
],
|
1944 |
"logging_steps": 1,
|
@@ -1953,12 +1421,12 @@
|
|
1953 |
"should_evaluate": false,
|
1954 |
"should_log": false,
|
1955 |
"should_save": true,
|
1956 |
-
"should_training_stop":
|
1957 |
},
|
1958 |
"attributes": {}
|
1959 |
}
|
1960 |
},
|
1961 |
-
"total_flos":
|
1962 |
"train_batch_size": 4,
|
1963 |
"trial_name": null,
|
1964 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 1.4466546112115732,
|
5 |
"eval_steps": 1000,
|
6 |
+
"global_step": 200,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
1407 |
"learning_rate": 2.8044280442804427e-05,
|
1408 |
"loss": 0.3215,
|
1409 |
"step": 200
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1410 |
}
|
1411 |
],
|
1412 |
"logging_steps": 1,
|
|
|
1421 |
"should_evaluate": false,
|
1422 |
"should_log": false,
|
1423 |
"should_save": true,
|
1424 |
+
"should_training_stop": false
|
1425 |
},
|
1426 |
"attributes": {}
|
1427 |
}
|
1428 |
},
|
1429 |
+
"total_flos": 8.61836422398301e+18,
|
1430 |
"train_batch_size": 4,
|
1431 |
"trial_name": null,
|
1432 |
"trial_params": null
|