Training in progress, step 15000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +153 -3
- pytorch_model.bin +1 -1
- runs/Jan26_03-13-45_b3489f7155a5/events.out.tfevents.1674703022.b3489f7155a5.24.0 +2 -2
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1115513717
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:38bbc7cd108d4066f57dece94d18666a0619186fb83ead8f178ba5374f85c4d4
|
3 |
size 1115513717
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 557969145
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7b3390a170051de185cec86c8c4131be12aeb79afd4f2697385bcd3c5fe72132
|
3 |
size 557969145
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15523
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4d8425ca758719b66e03746ec5ac1a9a95c444f7ea14cfc4567c5f50fa51c740
|
3 |
size 15523
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:03778c2e3b15a96003ceff9d06ee5b9ecdadc81dcfa36477cad7912647bf1ff9
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 1.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -756,11 +756,161 @@
|
|
756 |
"learning_rate": 1.0122503668729662e-05,
|
757 |
"loss": 0.6705,
|
758 |
"step": 12500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
759 |
}
|
760 |
],
|
761 |
"max_steps": 15673,
|
762 |
"num_train_epochs": 2,
|
763 |
-
"total_flos":
|
764 |
"trial_name": null,
|
765 |
"trial_params": null
|
766 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 1.7227518088893994,
|
5 |
+
"global_step": 15000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
756 |
"learning_rate": 1.0122503668729662e-05,
|
757 |
"loss": 0.6705,
|
758 |
"step": 12500
|
759 |
+
},
|
760 |
+
{
|
761 |
+
"epoch": 1.45,
|
762 |
+
"learning_rate": 9.8034836980795e-06,
|
763 |
+
"loss": 0.5761,
|
764 |
+
"step": 12600
|
765 |
+
},
|
766 |
+
{
|
767 |
+
"epoch": 1.46,
|
768 |
+
"learning_rate": 9.484463727429338e-06,
|
769 |
+
"loss": 0.5709,
|
770 |
+
"step": 12700
|
771 |
+
},
|
772 |
+
{
|
773 |
+
"epoch": 1.47,
|
774 |
+
"learning_rate": 9.165443756779176e-06,
|
775 |
+
"loss": 0.562,
|
776 |
+
"step": 12800
|
777 |
+
},
|
778 |
+
{
|
779 |
+
"epoch": 1.48,
|
780 |
+
"learning_rate": 8.846423786129014e-06,
|
781 |
+
"loss": 0.6113,
|
782 |
+
"step": 12900
|
783 |
+
},
|
784 |
+
{
|
785 |
+
"epoch": 1.49,
|
786 |
+
"learning_rate": 8.52740381547885e-06,
|
787 |
+
"loss": 0.6137,
|
788 |
+
"step": 13000
|
789 |
+
},
|
790 |
+
{
|
791 |
+
"epoch": 1.5,
|
792 |
+
"learning_rate": 8.208383844828686e-06,
|
793 |
+
"loss": 0.5669,
|
794 |
+
"step": 13100
|
795 |
+
},
|
796 |
+
{
|
797 |
+
"epoch": 1.52,
|
798 |
+
"learning_rate": 7.889363874178523e-06,
|
799 |
+
"loss": 0.6175,
|
800 |
+
"step": 13200
|
801 |
+
},
|
802 |
+
{
|
803 |
+
"epoch": 1.53,
|
804 |
+
"learning_rate": 7.570343903528361e-06,
|
805 |
+
"loss": 0.6069,
|
806 |
+
"step": 13300
|
807 |
+
},
|
808 |
+
{
|
809 |
+
"epoch": 1.54,
|
810 |
+
"learning_rate": 7.251323932878198e-06,
|
811 |
+
"loss": 0.5949,
|
812 |
+
"step": 13400
|
813 |
+
},
|
814 |
+
{
|
815 |
+
"epoch": 1.55,
|
816 |
+
"learning_rate": 6.932303962228036e-06,
|
817 |
+
"loss": 0.6207,
|
818 |
+
"step": 13500
|
819 |
+
},
|
820 |
+
{
|
821 |
+
"epoch": 1.56,
|
822 |
+
"learning_rate": 6.613283991577873e-06,
|
823 |
+
"loss": 0.596,
|
824 |
+
"step": 13600
|
825 |
+
},
|
826 |
+
{
|
827 |
+
"epoch": 1.57,
|
828 |
+
"learning_rate": 6.294264020927711e-06,
|
829 |
+
"loss": 0.5972,
|
830 |
+
"step": 13700
|
831 |
+
},
|
832 |
+
{
|
833 |
+
"epoch": 1.58,
|
834 |
+
"learning_rate": 5.975244050277548e-06,
|
835 |
+
"loss": 0.6127,
|
836 |
+
"step": 13800
|
837 |
+
},
|
838 |
+
{
|
839 |
+
"epoch": 1.6,
|
840 |
+
"learning_rate": 5.6562240796273845e-06,
|
841 |
+
"loss": 0.6214,
|
842 |
+
"step": 13900
|
843 |
+
},
|
844 |
+
{
|
845 |
+
"epoch": 1.61,
|
846 |
+
"learning_rate": 5.337204108977222e-06,
|
847 |
+
"loss": 0.6009,
|
848 |
+
"step": 14000
|
849 |
+
},
|
850 |
+
{
|
851 |
+
"epoch": 1.62,
|
852 |
+
"learning_rate": 5.018184138327059e-06,
|
853 |
+
"loss": 0.628,
|
854 |
+
"step": 14100
|
855 |
+
},
|
856 |
+
{
|
857 |
+
"epoch": 1.63,
|
858 |
+
"learning_rate": 4.699164167676897e-06,
|
859 |
+
"loss": 0.6096,
|
860 |
+
"step": 14200
|
861 |
+
},
|
862 |
+
{
|
863 |
+
"epoch": 1.64,
|
864 |
+
"learning_rate": 4.380144197026735e-06,
|
865 |
+
"loss": 0.5396,
|
866 |
+
"step": 14300
|
867 |
+
},
|
868 |
+
{
|
869 |
+
"epoch": 1.65,
|
870 |
+
"learning_rate": 4.061124226376571e-06,
|
871 |
+
"loss": 0.554,
|
872 |
+
"step": 14400
|
873 |
+
},
|
874 |
+
{
|
875 |
+
"epoch": 1.67,
|
876 |
+
"learning_rate": 3.7421042557264087e-06,
|
877 |
+
"loss": 0.5867,
|
878 |
+
"step": 14500
|
879 |
+
},
|
880 |
+
{
|
881 |
+
"epoch": 1.68,
|
882 |
+
"learning_rate": 3.423084285076246e-06,
|
883 |
+
"loss": 0.584,
|
884 |
+
"step": 14600
|
885 |
+
},
|
886 |
+
{
|
887 |
+
"epoch": 1.69,
|
888 |
+
"learning_rate": 3.1040643144260834e-06,
|
889 |
+
"loss": 0.587,
|
890 |
+
"step": 14700
|
891 |
+
},
|
892 |
+
{
|
893 |
+
"epoch": 1.7,
|
894 |
+
"learning_rate": 2.7850443437759203e-06,
|
895 |
+
"loss": 0.605,
|
896 |
+
"step": 14800
|
897 |
+
},
|
898 |
+
{
|
899 |
+
"epoch": 1.71,
|
900 |
+
"learning_rate": 2.4660243731257577e-06,
|
901 |
+
"loss": 0.5905,
|
902 |
+
"step": 14900
|
903 |
+
},
|
904 |
+
{
|
905 |
+
"epoch": 1.72,
|
906 |
+
"learning_rate": 2.147004402475595e-06,
|
907 |
+
"loss": 0.6208,
|
908 |
+
"step": 15000
|
909 |
}
|
910 |
],
|
911 |
"max_steps": 15673,
|
912 |
"num_train_epochs": 2,
|
913 |
+
"total_flos": 5.929193491393536e+16,
|
914 |
"trial_name": null,
|
915 |
"trial_params": null
|
916 |
}
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 557969145
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7b3390a170051de185cec86c8c4131be12aeb79afd4f2697385bcd3c5fe72132
|
3 |
size 557969145
|
runs/Jan26_03-13-45_b3489f7155a5/events.out.tfevents.1674703022.b3489f7155a5.24.0
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:42156792ecc5fb337e2c5b5d1ed6fdc0fe4bf653273b9d8e8c643e6dabea8931
|
3 |
+
size 28385
|