Training in progress, step 15000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +153 -3
- pytorch_model.bin +1 -1
- runs/Jan01_04-28-26_6ea5fb75c867/events.out.tfevents.1672547319.6ea5fb75c867.24.0 +2 -2
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1115513717
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fadf2874e53143ee977132af8fb5bd7d426229c919e522908fe884ecb84f4345
|
3 |
size 1115513717
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 557969145
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8c09b8c06d2814b8003bcbda40c13a2533f7e29d29db2312c80a18a4f8d1cdf9
|
3 |
size 557969145
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15523
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3006cbdb81bbb28698a27ec8607059ef2927a18060ae014f8abac7d512aa09fb
|
3 |
size 15523
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f33807aed54b7d234a4b5d087ac80aee63260ad15758612d4571a720e74319ce
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 1.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -756,11 +756,161 @@
|
|
756 |
"learning_rate": 1.0104685305757693e-05,
|
757 |
"loss": 2.2579,
|
758 |
"step": 12500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
759 |
}
|
760 |
],
|
761 |
"max_steps": 15666,
|
762 |
"num_train_epochs": 2,
|
763 |
-
"total_flos":
|
764 |
"trial_name": null,
|
765 |
"trial_params": null
|
766 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 1.723543605653223,
|
5 |
+
"global_step": 15000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
756 |
"learning_rate": 1.0104685305757693e-05,
|
757 |
"loss": 2.2579,
|
758 |
"step": 12500
|
759 |
+
},
|
760 |
+
{
|
761 |
+
"epoch": 1.45,
|
762 |
+
"learning_rate": 9.785522788203753e-06,
|
763 |
+
"loss": 2.2689,
|
764 |
+
"step": 12600
|
765 |
+
},
|
766 |
+
{
|
767 |
+
"epoch": 1.46,
|
768 |
+
"learning_rate": 9.466360270649816e-06,
|
769 |
+
"loss": 2.3111,
|
770 |
+
"step": 12700
|
771 |
+
},
|
772 |
+
{
|
773 |
+
"epoch": 1.47,
|
774 |
+
"learning_rate": 9.147197753095877e-06,
|
775 |
+
"loss": 2.2794,
|
776 |
+
"step": 12800
|
777 |
+
},
|
778 |
+
{
|
779 |
+
"epoch": 1.48,
|
780 |
+
"learning_rate": 8.82803523554194e-06,
|
781 |
+
"loss": 2.2754,
|
782 |
+
"step": 12900
|
783 |
+
},
|
784 |
+
{
|
785 |
+
"epoch": 1.49,
|
786 |
+
"learning_rate": 8.508872717987999e-06,
|
787 |
+
"loss": 2.2837,
|
788 |
+
"step": 13000
|
789 |
+
},
|
790 |
+
{
|
791 |
+
"epoch": 1.51,
|
792 |
+
"learning_rate": 8.189710200434061e-06,
|
793 |
+
"loss": 2.2297,
|
794 |
+
"step": 13100
|
795 |
+
},
|
796 |
+
{
|
797 |
+
"epoch": 1.52,
|
798 |
+
"learning_rate": 7.870547682880124e-06,
|
799 |
+
"loss": 2.2969,
|
800 |
+
"step": 13200
|
801 |
+
},
|
802 |
+
{
|
803 |
+
"epoch": 1.53,
|
804 |
+
"learning_rate": 7.5513851653261844e-06,
|
805 |
+
"loss": 2.3363,
|
806 |
+
"step": 13300
|
807 |
+
},
|
808 |
+
{
|
809 |
+
"epoch": 1.54,
|
810 |
+
"learning_rate": 7.232222647772246e-06,
|
811 |
+
"loss": 2.2079,
|
812 |
+
"step": 13400
|
813 |
+
},
|
814 |
+
{
|
815 |
+
"epoch": 1.55,
|
816 |
+
"learning_rate": 6.9130601302183074e-06,
|
817 |
+
"loss": 2.3057,
|
818 |
+
"step": 13500
|
819 |
+
},
|
820 |
+
{
|
821 |
+
"epoch": 1.56,
|
822 |
+
"learning_rate": 6.593897612664369e-06,
|
823 |
+
"loss": 2.2665,
|
824 |
+
"step": 13600
|
825 |
+
},
|
826 |
+
{
|
827 |
+
"epoch": 1.57,
|
828 |
+
"learning_rate": 6.27473509511043e-06,
|
829 |
+
"loss": 2.2934,
|
830 |
+
"step": 13700
|
831 |
+
},
|
832 |
+
{
|
833 |
+
"epoch": 1.59,
|
834 |
+
"learning_rate": 5.9555725775564915e-06,
|
835 |
+
"loss": 2.2407,
|
836 |
+
"step": 13800
|
837 |
+
},
|
838 |
+
{
|
839 |
+
"epoch": 1.6,
|
840 |
+
"learning_rate": 5.636410060002553e-06,
|
841 |
+
"loss": 2.3309,
|
842 |
+
"step": 13900
|
843 |
+
},
|
844 |
+
{
|
845 |
+
"epoch": 1.61,
|
846 |
+
"learning_rate": 5.317247542448615e-06,
|
847 |
+
"loss": 2.3257,
|
848 |
+
"step": 14000
|
849 |
+
},
|
850 |
+
{
|
851 |
+
"epoch": 1.62,
|
852 |
+
"learning_rate": 4.998085024894677e-06,
|
853 |
+
"loss": 2.2518,
|
854 |
+
"step": 14100
|
855 |
+
},
|
856 |
+
{
|
857 |
+
"epoch": 1.63,
|
858 |
+
"learning_rate": 4.678922507340738e-06,
|
859 |
+
"loss": 2.2359,
|
860 |
+
"step": 14200
|
861 |
+
},
|
862 |
+
{
|
863 |
+
"epoch": 1.64,
|
864 |
+
"learning_rate": 4.3597599897868e-06,
|
865 |
+
"loss": 2.287,
|
866 |
+
"step": 14300
|
867 |
+
},
|
868 |
+
{
|
869 |
+
"epoch": 1.65,
|
870 |
+
"learning_rate": 4.040597472232861e-06,
|
871 |
+
"loss": 2.2462,
|
872 |
+
"step": 14400
|
873 |
+
},
|
874 |
+
{
|
875 |
+
"epoch": 1.67,
|
876 |
+
"learning_rate": 3.7214349546789228e-06,
|
877 |
+
"loss": 2.2815,
|
878 |
+
"step": 14500
|
879 |
+
},
|
880 |
+
{
|
881 |
+
"epoch": 1.68,
|
882 |
+
"learning_rate": 3.4022724371249842e-06,
|
883 |
+
"loss": 2.3361,
|
884 |
+
"step": 14600
|
885 |
+
},
|
886 |
+
{
|
887 |
+
"epoch": 1.69,
|
888 |
+
"learning_rate": 3.0831099195710457e-06,
|
889 |
+
"loss": 2.2774,
|
890 |
+
"step": 14700
|
891 |
+
},
|
892 |
+
{
|
893 |
+
"epoch": 1.7,
|
894 |
+
"learning_rate": 2.7639474020171072e-06,
|
895 |
+
"loss": 2.2895,
|
896 |
+
"step": 14800
|
897 |
+
},
|
898 |
+
{
|
899 |
+
"epoch": 1.71,
|
900 |
+
"learning_rate": 2.4447848844631687e-06,
|
901 |
+
"loss": 2.3155,
|
902 |
+
"step": 14900
|
903 |
+
},
|
904 |
+
{
|
905 |
+
"epoch": 1.72,
|
906 |
+
"learning_rate": 2.12562236690923e-06,
|
907 |
+
"loss": 2.2698,
|
908 |
+
"step": 15000
|
909 |
}
|
910 |
],
|
911 |
"max_steps": 15666,
|
912 |
"num_train_epochs": 2,
|
913 |
+
"total_flos": 5.928520220872704e+16,
|
914 |
"trial_name": null,
|
915 |
"trial_params": null
|
916 |
}
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 557969145
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8c09b8c06d2814b8003bcbda40c13a2533f7e29d29db2312c80a18a4f8d1cdf9
|
3 |
size 557969145
|
runs/Jan01_04-28-26_6ea5fb75c867/events.out.tfevents.1672547319.6ea5fb75c867.24.0
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5b718e5a7190544c3a629ebc3b75370d8445066ffb8392744f21c6550117883a
|
3 |
+
size 28385
|