nbtpj commited on
Commit
1c53bf1
·
1 Parent(s): 39a1712

Training in progress, step 12500

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:695ea22c16d59b8f8e7d8bcd0620d0c0f0cb7c3578a0fb43c564437f3ba1279f
3
  size 1115513717
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3fa988a042b2cdca0db941098a7b0889a7f6ef2a60d3c2eecc5665835671501f
3
  size 1115513717
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:73a52c17999eaef626df679c5f03ad9c53b61713db7706f9c3432222c1b6c6a3
3
  size 557969145
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:24bd601333f9b24ae84ddecb08d08fa20af94e90b0ef9fe6d30560e699c3000f
3
  size 557969145
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9e5f8f9f68a550cc99dfdaed2042005f3b6ddc4eafa0827c34c6b5a1e335d304
3
  size 15523
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f4192ba426584b0e6f477cc4c524f9890045d0a7e9f4c59e75fdc81e08123b7
3
  size 15523
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8d52951fc965683e697127d8227f41545dee9b14abb397312476c1d018a09dfd
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d2094fcc97958c7c57904b53d4a0a0469e8a2a4a35877adb27094a24fdfc3798
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.149029070435482,
5
- "global_step": 10000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -606,11 +606,161 @@
606
  "learning_rate": 1.8083748244606157e-05,
607
  "loss": 2.3654,
608
  "step": 10000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
609
  }
610
  ],
611
  "max_steps": 15666,
612
  "num_train_epochs": 2,
613
- "total_flos": 3.953048407805952e+16,
614
  "trial_name": null,
615
  "trial_params": null
616
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.4362863380443525,
5
+ "global_step": 12500,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
606
  "learning_rate": 1.8083748244606157e-05,
607
  "loss": 2.3654,
608
  "step": 10000
609
+ },
610
+ {
611
+ "epoch": 1.16,
612
+ "learning_rate": 1.7764585727052216e-05,
613
+ "loss": 2.3825,
614
+ "step": 10100
615
+ },
616
+ {
617
+ "epoch": 1.17,
618
+ "learning_rate": 1.7445423209498275e-05,
619
+ "loss": 2.3245,
620
+ "step": 10200
621
+ },
622
+ {
623
+ "epoch": 1.18,
624
+ "learning_rate": 1.712626069194434e-05,
625
+ "loss": 2.2869,
626
+ "step": 10300
627
+ },
628
+ {
629
+ "epoch": 1.19,
630
+ "learning_rate": 1.68070981743904e-05,
631
+ "loss": 2.3207,
632
+ "step": 10400
633
+ },
634
+ {
635
+ "epoch": 1.21,
636
+ "learning_rate": 1.648793565683646e-05,
637
+ "loss": 2.3041,
638
+ "step": 10500
639
+ },
640
+ {
641
+ "epoch": 1.22,
642
+ "learning_rate": 1.6168773139282523e-05,
643
+ "loss": 2.3543,
644
+ "step": 10600
645
+ },
646
+ {
647
+ "epoch": 1.23,
648
+ "learning_rate": 1.5849610621728586e-05,
649
+ "loss": 2.3026,
650
+ "step": 10700
651
+ },
652
+ {
653
+ "epoch": 1.24,
654
+ "learning_rate": 1.553044810417465e-05,
655
+ "loss": 2.2919,
656
+ "step": 10800
657
+ },
658
+ {
659
+ "epoch": 1.25,
660
+ "learning_rate": 1.5211285586620708e-05,
661
+ "loss": 2.3805,
662
+ "step": 10900
663
+ },
664
+ {
665
+ "epoch": 1.26,
666
+ "learning_rate": 1.4892123069066769e-05,
667
+ "loss": 2.3523,
668
+ "step": 11000
669
+ },
670
+ {
671
+ "epoch": 1.28,
672
+ "learning_rate": 1.4572960551512832e-05,
673
+ "loss": 2.2931,
674
+ "step": 11100
675
+ },
676
+ {
677
+ "epoch": 1.29,
678
+ "learning_rate": 1.4253798033958893e-05,
679
+ "loss": 2.2623,
680
+ "step": 11200
681
+ },
682
+ {
683
+ "epoch": 1.3,
684
+ "learning_rate": 1.3934635516404954e-05,
685
+ "loss": 2.28,
686
+ "step": 11300
687
+ },
688
+ {
689
+ "epoch": 1.31,
690
+ "learning_rate": 1.3615472998851015e-05,
691
+ "loss": 2.3121,
692
+ "step": 11400
693
+ },
694
+ {
695
+ "epoch": 1.32,
696
+ "learning_rate": 1.3296310481297078e-05,
697
+ "loss": 2.3111,
698
+ "step": 11500
699
+ },
700
+ {
701
+ "epoch": 1.33,
702
+ "learning_rate": 1.2977147963743139e-05,
703
+ "loss": 2.332,
704
+ "step": 11600
705
+ },
706
+ {
707
+ "epoch": 1.34,
708
+ "learning_rate": 1.26579854461892e-05,
709
+ "loss": 2.3495,
710
+ "step": 11700
711
+ },
712
+ {
713
+ "epoch": 1.36,
714
+ "learning_rate": 1.233882292863526e-05,
715
+ "loss": 2.2864,
716
+ "step": 11800
717
+ },
718
+ {
719
+ "epoch": 1.37,
720
+ "learning_rate": 1.2019660411081324e-05,
721
+ "loss": 2.3482,
722
+ "step": 11900
723
+ },
724
+ {
725
+ "epoch": 1.38,
726
+ "learning_rate": 1.1700497893527385e-05,
727
+ "loss": 2.2843,
728
+ "step": 12000
729
+ },
730
+ {
731
+ "epoch": 1.39,
732
+ "learning_rate": 1.1381335375973447e-05,
733
+ "loss": 2.3075,
734
+ "step": 12100
735
+ },
736
+ {
737
+ "epoch": 1.4,
738
+ "learning_rate": 1.1062172858419507e-05,
739
+ "loss": 2.3899,
740
+ "step": 12200
741
+ },
742
+ {
743
+ "epoch": 1.41,
744
+ "learning_rate": 1.074301034086557e-05,
745
+ "loss": 2.3496,
746
+ "step": 12300
747
+ },
748
+ {
749
+ "epoch": 1.42,
750
+ "learning_rate": 1.042384782331163e-05,
751
+ "loss": 2.3482,
752
+ "step": 12400
753
+ },
754
+ {
755
+ "epoch": 1.44,
756
+ "learning_rate": 1.0104685305757693e-05,
757
+ "loss": 2.2579,
758
+ "step": 12500
759
  }
760
  ],
761
  "max_steps": 15666,
762
  "num_train_epochs": 2,
763
+ "total_flos": 4.940417519751168e+16,
764
  "trial_name": null,
765
  "trial_params": null
766
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:73a52c17999eaef626df679c5f03ad9c53b61713db7706f9c3432222c1b6c6a3
3
  size 557969145
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:24bd601333f9b24ae84ddecb08d08fa20af94e90b0ef9fe6d30560e699c3000f
3
  size 557969145
runs/Jan01_04-28-26_6ea5fb75c867/events.out.tfevents.1672547319.6ea5fb75c867.24.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f511a742a4b5f180697e41094bff1017383d78f0e31e1610835895acb5b91dcc
3
- size 20535
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b418208d85d0b6e99ed4edaaf8699502243c81bf86dcb36a87f1a130a498c62a
3
+ size 24460