nbtpj commited on
Commit
55321c0
·
1 Parent(s): 5a153b1

Training in progress, step 15000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fcf16c77fc4c696d5ecec66bb94ab1a72596ecc42da185a1509a0a37aa93d820
3
  size 1115513717
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:38bbc7cd108d4066f57dece94d18666a0619186fb83ead8f178ba5374f85c4d4
3
  size 1115513717
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:001276adc811be2b4e3383d1bfd82644ee01b63c562240704981e80abf57ea78
3
  size 557969145
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7b3390a170051de185cec86c8c4131be12aeb79afd4f2697385bcd3c5fe72132
3
  size 557969145
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3ea4cb9a85921bdde5a8523300b183bff3f70a1aeb0368e6a4c669b696d1922c
3
  size 15523
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4d8425ca758719b66e03746ec5ac1a9a95c444f7ea14cfc4567c5f50fa51c740
3
  size 15523
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:af29429f2a56160c0a3cb3b03c7b28cc9e5f472234bebefa86da284747a0d6d1
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:03778c2e3b15a96003ceff9d06ee5b9ecdadc81dcfa36477cad7912647bf1ff9
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.4356265074078327,
5
- "global_step": 12500,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -756,11 +756,161 @@
756
  "learning_rate": 1.0122503668729662e-05,
757
  "loss": 0.6705,
758
  "step": 12500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
759
  }
760
  ],
761
  "max_steps": 15673,
762
  "num_train_epochs": 2,
763
- "total_flos": 4.939903709604864e+16,
764
  "trial_name": null,
765
  "trial_params": null
766
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.7227518088893994,
5
+ "global_step": 15000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
756
  "learning_rate": 1.0122503668729662e-05,
757
  "loss": 0.6705,
758
  "step": 12500
759
+ },
760
+ {
761
+ "epoch": 1.45,
762
+ "learning_rate": 9.8034836980795e-06,
763
+ "loss": 0.5761,
764
+ "step": 12600
765
+ },
766
+ {
767
+ "epoch": 1.46,
768
+ "learning_rate": 9.484463727429338e-06,
769
+ "loss": 0.5709,
770
+ "step": 12700
771
+ },
772
+ {
773
+ "epoch": 1.47,
774
+ "learning_rate": 9.165443756779176e-06,
775
+ "loss": 0.562,
776
+ "step": 12800
777
+ },
778
+ {
779
+ "epoch": 1.48,
780
+ "learning_rate": 8.846423786129014e-06,
781
+ "loss": 0.6113,
782
+ "step": 12900
783
+ },
784
+ {
785
+ "epoch": 1.49,
786
+ "learning_rate": 8.52740381547885e-06,
787
+ "loss": 0.6137,
788
+ "step": 13000
789
+ },
790
+ {
791
+ "epoch": 1.5,
792
+ "learning_rate": 8.208383844828686e-06,
793
+ "loss": 0.5669,
794
+ "step": 13100
795
+ },
796
+ {
797
+ "epoch": 1.52,
798
+ "learning_rate": 7.889363874178523e-06,
799
+ "loss": 0.6175,
800
+ "step": 13200
801
+ },
802
+ {
803
+ "epoch": 1.53,
804
+ "learning_rate": 7.570343903528361e-06,
805
+ "loss": 0.6069,
806
+ "step": 13300
807
+ },
808
+ {
809
+ "epoch": 1.54,
810
+ "learning_rate": 7.251323932878198e-06,
811
+ "loss": 0.5949,
812
+ "step": 13400
813
+ },
814
+ {
815
+ "epoch": 1.55,
816
+ "learning_rate": 6.932303962228036e-06,
817
+ "loss": 0.6207,
818
+ "step": 13500
819
+ },
820
+ {
821
+ "epoch": 1.56,
822
+ "learning_rate": 6.613283991577873e-06,
823
+ "loss": 0.596,
824
+ "step": 13600
825
+ },
826
+ {
827
+ "epoch": 1.57,
828
+ "learning_rate": 6.294264020927711e-06,
829
+ "loss": 0.5972,
830
+ "step": 13700
831
+ },
832
+ {
833
+ "epoch": 1.58,
834
+ "learning_rate": 5.975244050277548e-06,
835
+ "loss": 0.6127,
836
+ "step": 13800
837
+ },
838
+ {
839
+ "epoch": 1.6,
840
+ "learning_rate": 5.6562240796273845e-06,
841
+ "loss": 0.6214,
842
+ "step": 13900
843
+ },
844
+ {
845
+ "epoch": 1.61,
846
+ "learning_rate": 5.337204108977222e-06,
847
+ "loss": 0.6009,
848
+ "step": 14000
849
+ },
850
+ {
851
+ "epoch": 1.62,
852
+ "learning_rate": 5.018184138327059e-06,
853
+ "loss": 0.628,
854
+ "step": 14100
855
+ },
856
+ {
857
+ "epoch": 1.63,
858
+ "learning_rate": 4.699164167676897e-06,
859
+ "loss": 0.6096,
860
+ "step": 14200
861
+ },
862
+ {
863
+ "epoch": 1.64,
864
+ "learning_rate": 4.380144197026735e-06,
865
+ "loss": 0.5396,
866
+ "step": 14300
867
+ },
868
+ {
869
+ "epoch": 1.65,
870
+ "learning_rate": 4.061124226376571e-06,
871
+ "loss": 0.554,
872
+ "step": 14400
873
+ },
874
+ {
875
+ "epoch": 1.67,
876
+ "learning_rate": 3.7421042557264087e-06,
877
+ "loss": 0.5867,
878
+ "step": 14500
879
+ },
880
+ {
881
+ "epoch": 1.68,
882
+ "learning_rate": 3.423084285076246e-06,
883
+ "loss": 0.584,
884
+ "step": 14600
885
+ },
886
+ {
887
+ "epoch": 1.69,
888
+ "learning_rate": 3.1040643144260834e-06,
889
+ "loss": 0.587,
890
+ "step": 14700
891
+ },
892
+ {
893
+ "epoch": 1.7,
894
+ "learning_rate": 2.7850443437759203e-06,
895
+ "loss": 0.605,
896
+ "step": 14800
897
+ },
898
+ {
899
+ "epoch": 1.71,
900
+ "learning_rate": 2.4660243731257577e-06,
901
+ "loss": 0.5905,
902
+ "step": 14900
903
+ },
904
+ {
905
+ "epoch": 1.72,
906
+ "learning_rate": 2.147004402475595e-06,
907
+ "loss": 0.6208,
908
+ "step": 15000
909
  }
910
  ],
911
  "max_steps": 15673,
912
  "num_train_epochs": 2,
913
+ "total_flos": 5.929193491393536e+16,
914
  "trial_name": null,
915
  "trial_params": null
916
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:001276adc811be2b4e3383d1bfd82644ee01b63c562240704981e80abf57ea78
3
  size 557969145
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7b3390a170051de185cec86c8c4131be12aeb79afd4f2697385bcd3c5fe72132
3
  size 557969145
runs/Jan26_03-13-45_b3489f7155a5/events.out.tfevents.1674703022.b3489f7155a5.24.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f77dd77e2e1de62f98d722747a220a5ac16ebff7589985b9d63fb7e5c44b6c35
3
- size 24460
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:42156792ecc5fb337e2c5b5d1ed6fdc0fe4bf653273b9d8e8c643e6dabea8931
3
+ size 28385