nbtpj commited on
Commit
3780e03
·
1 Parent(s): 1c53bf1

Training in progress, step 15000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3fa988a042b2cdca0db941098a7b0889a7f6ef2a60d3c2eecc5665835671501f
3
  size 1115513717
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fadf2874e53143ee977132af8fb5bd7d426229c919e522908fe884ecb84f4345
3
  size 1115513717
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:24bd601333f9b24ae84ddecb08d08fa20af94e90b0ef9fe6d30560e699c3000f
3
  size 557969145
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8c09b8c06d2814b8003bcbda40c13a2533f7e29d29db2312c80a18a4f8d1cdf9
3
  size 557969145
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7f4192ba426584b0e6f477cc4c524f9890045d0a7e9f4c59e75fdc81e08123b7
3
  size 15523
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3006cbdb81bbb28698a27ec8607059ef2927a18060ae014f8abac7d512aa09fb
3
  size 15523
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d2094fcc97958c7c57904b53d4a0a0469e8a2a4a35877adb27094a24fdfc3798
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f33807aed54b7d234a4b5d087ac80aee63260ad15758612d4571a720e74319ce
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.4362863380443525,
5
- "global_step": 12500,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -756,11 +756,161 @@
756
  "learning_rate": 1.0104685305757693e-05,
757
  "loss": 2.2579,
758
  "step": 12500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
759
  }
760
  ],
761
  "max_steps": 15666,
762
  "num_train_epochs": 2,
763
- "total_flos": 4.940417519751168e+16,
764
  "trial_name": null,
765
  "trial_params": null
766
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.723543605653223,
5
+ "global_step": 15000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
756
  "learning_rate": 1.0104685305757693e-05,
757
  "loss": 2.2579,
758
  "step": 12500
759
+ },
760
+ {
761
+ "epoch": 1.45,
762
+ "learning_rate": 9.785522788203753e-06,
763
+ "loss": 2.2689,
764
+ "step": 12600
765
+ },
766
+ {
767
+ "epoch": 1.46,
768
+ "learning_rate": 9.466360270649816e-06,
769
+ "loss": 2.3111,
770
+ "step": 12700
771
+ },
772
+ {
773
+ "epoch": 1.47,
774
+ "learning_rate": 9.147197753095877e-06,
775
+ "loss": 2.2794,
776
+ "step": 12800
777
+ },
778
+ {
779
+ "epoch": 1.48,
780
+ "learning_rate": 8.82803523554194e-06,
781
+ "loss": 2.2754,
782
+ "step": 12900
783
+ },
784
+ {
785
+ "epoch": 1.49,
786
+ "learning_rate": 8.508872717987999e-06,
787
+ "loss": 2.2837,
788
+ "step": 13000
789
+ },
790
+ {
791
+ "epoch": 1.51,
792
+ "learning_rate": 8.189710200434061e-06,
793
+ "loss": 2.2297,
794
+ "step": 13100
795
+ },
796
+ {
797
+ "epoch": 1.52,
798
+ "learning_rate": 7.870547682880124e-06,
799
+ "loss": 2.2969,
800
+ "step": 13200
801
+ },
802
+ {
803
+ "epoch": 1.53,
804
+ "learning_rate": 7.5513851653261844e-06,
805
+ "loss": 2.3363,
806
+ "step": 13300
807
+ },
808
+ {
809
+ "epoch": 1.54,
810
+ "learning_rate": 7.232222647772246e-06,
811
+ "loss": 2.2079,
812
+ "step": 13400
813
+ },
814
+ {
815
+ "epoch": 1.55,
816
+ "learning_rate": 6.9130601302183074e-06,
817
+ "loss": 2.3057,
818
+ "step": 13500
819
+ },
820
+ {
821
+ "epoch": 1.56,
822
+ "learning_rate": 6.593897612664369e-06,
823
+ "loss": 2.2665,
824
+ "step": 13600
825
+ },
826
+ {
827
+ "epoch": 1.57,
828
+ "learning_rate": 6.27473509511043e-06,
829
+ "loss": 2.2934,
830
+ "step": 13700
831
+ },
832
+ {
833
+ "epoch": 1.59,
834
+ "learning_rate": 5.9555725775564915e-06,
835
+ "loss": 2.2407,
836
+ "step": 13800
837
+ },
838
+ {
839
+ "epoch": 1.6,
840
+ "learning_rate": 5.636410060002553e-06,
841
+ "loss": 2.3309,
842
+ "step": 13900
843
+ },
844
+ {
845
+ "epoch": 1.61,
846
+ "learning_rate": 5.317247542448615e-06,
847
+ "loss": 2.3257,
848
+ "step": 14000
849
+ },
850
+ {
851
+ "epoch": 1.62,
852
+ "learning_rate": 4.998085024894677e-06,
853
+ "loss": 2.2518,
854
+ "step": 14100
855
+ },
856
+ {
857
+ "epoch": 1.63,
858
+ "learning_rate": 4.678922507340738e-06,
859
+ "loss": 2.2359,
860
+ "step": 14200
861
+ },
862
+ {
863
+ "epoch": 1.64,
864
+ "learning_rate": 4.3597599897868e-06,
865
+ "loss": 2.287,
866
+ "step": 14300
867
+ },
868
+ {
869
+ "epoch": 1.65,
870
+ "learning_rate": 4.040597472232861e-06,
871
+ "loss": 2.2462,
872
+ "step": 14400
873
+ },
874
+ {
875
+ "epoch": 1.67,
876
+ "learning_rate": 3.7214349546789228e-06,
877
+ "loss": 2.2815,
878
+ "step": 14500
879
+ },
880
+ {
881
+ "epoch": 1.68,
882
+ "learning_rate": 3.4022724371249842e-06,
883
+ "loss": 2.3361,
884
+ "step": 14600
885
+ },
886
+ {
887
+ "epoch": 1.69,
888
+ "learning_rate": 3.0831099195710457e-06,
889
+ "loss": 2.2774,
890
+ "step": 14700
891
+ },
892
+ {
893
+ "epoch": 1.7,
894
+ "learning_rate": 2.7639474020171072e-06,
895
+ "loss": 2.2895,
896
+ "step": 14800
897
+ },
898
+ {
899
+ "epoch": 1.71,
900
+ "learning_rate": 2.4447848844631687e-06,
901
+ "loss": 2.3155,
902
+ "step": 14900
903
+ },
904
+ {
905
+ "epoch": 1.72,
906
+ "learning_rate": 2.12562236690923e-06,
907
+ "loss": 2.2698,
908
+ "step": 15000
909
  }
910
  ],
911
  "max_steps": 15666,
912
  "num_train_epochs": 2,
913
+ "total_flos": 5.928520220872704e+16,
914
  "trial_name": null,
915
  "trial_params": null
916
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:24bd601333f9b24ae84ddecb08d08fa20af94e90b0ef9fe6d30560e699c3000f
3
  size 557969145
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8c09b8c06d2814b8003bcbda40c13a2533f7e29d29db2312c80a18a4f8d1cdf9
3
  size 557969145
runs/Jan01_04-28-26_6ea5fb75c867/events.out.tfevents.1672547319.6ea5fb75c867.24.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b418208d85d0b6e99ed4edaaf8699502243c81bf86dcb36a87f1a130a498c62a
3
- size 24460
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5b718e5a7190544c3a629ebc3b75370d8445066ffb8392744f21c6550117883a
3
+ size 28385