AlekseyKorshuk
commited on
Commit
·
a8807a1
1
Parent(s):
1c8741c
huggingartists
Browse files- README.md +3 -3
- config.json +1 -1
- evaluation.txt +1 -1
- flax_model.msgpack +1 -1
- optimizer.pt +1 -1
- pytorch_model.bin +1 -1
- rng_state.pth +2 -2
- scheduler.pt +1 -1
- trainer_state.json +195 -7
- training_args.bin +2 -2
README.md
CHANGED
@@ -45,15 +45,15 @@ from datasets import load_dataset
|
|
45 |
dataset = load_dataset("huggingartists/og-buda")
|
46 |
```
|
47 |
|
48 |
-
[Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/
|
49 |
|
50 |
## Training procedure
|
51 |
|
52 |
The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on OG Buda's lyrics.
|
53 |
|
54 |
-
Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/
|
55 |
|
56 |
-
At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/
|
57 |
|
58 |
## How to use
|
59 |
|
|
|
45 |
dataset = load_dataset("huggingartists/og-buda")
|
46 |
```
|
47 |
|
48 |
+
[Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/2ic775kv/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
|
49 |
|
50 |
## Training procedure
|
51 |
|
52 |
The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on OG Buda's lyrics.
|
53 |
|
54 |
+
Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/1g4193mx) for full transparency and reproducibility.
|
55 |
|
56 |
+
At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/1g4193mx/artifacts) is logged and versioned.
|
57 |
|
58 |
## How to use
|
59 |
|
config.json
CHANGED
@@ -35,7 +35,7 @@
|
|
35 |
}
|
36 |
},
|
37 |
"torch_dtype": "float32",
|
38 |
-
"transformers_version": "4.
|
39 |
"use_cache": true,
|
40 |
"vocab_size": 50257
|
41 |
}
|
|
|
35 |
}
|
36 |
},
|
37 |
"torch_dtype": "float32",
|
38 |
+
"transformers_version": "4.11.1",
|
39 |
"use_cache": true,
|
40 |
"vocab_size": 50257
|
41 |
}
|
evaluation.txt
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"eval_loss": 1.
|
|
|
1 |
+
{"eval_loss": 1.5331557989120483, "eval_runtime": 11.1146, "eval_samples_per_second": 21.233, "eval_steps_per_second": 2.699, "epoch": 5.0}
|
flax_model.msgpack
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 497764120
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b6ca1185ef4c01671071a96d4a6bc08a99dfb851bc262d5b345d99c44c8777b8
|
3 |
size 497764120
|
optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 995604017
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:66b59554fe6cde8204954f2fd3c802193fa21f23c03e5477bc320159740bc767
|
3 |
size 995604017
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 510403817
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:acc420fc59da46155e275e30ee4f33e49901084326e259ee2daba1b1bc168d40
|
3 |
size 510403817
|
rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:151664dce0a47953ee30b57a0b85f89039a9cddc1f9c94e479fa890f7dfb89e4
|
3 |
+
size 14503
|
scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9a5b7a99e2b2ebcfb7b31d39b7daabc1659b5909216616127ab13d8f6b3cc088
|
3 |
size 623
|
trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
-
"best_metric": 1.
|
3 |
-
"best_model_checkpoint": "output/og-buda/checkpoint-
|
4 |
-
"epoch":
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -766,11 +766,199 @@
|
|
766 |
"eval_samples_per_second": 22.942,
|
767 |
"eval_steps_per_second": 2.893,
|
768 |
"step": 600
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
769 |
}
|
770 |
],
|
771 |
-
"max_steps":
|
772 |
-
"num_train_epochs":
|
773 |
-
"total_flos":
|
774 |
"trial_name": null,
|
775 |
"trial_params": null
|
776 |
}
|
|
|
1 |
{
|
2 |
+
"best_metric": 1.5331557989120483,
|
3 |
+
"best_model_checkpoint": "output/og-buda/checkpoint-750",
|
4 |
+
"epoch": 5.0,
|
5 |
+
"global_step": 750,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
766 |
"eval_samples_per_second": 22.942,
|
767 |
"eval_steps_per_second": 2.893,
|
768 |
"step": 600
|
769 |
+
},
|
770 |
+
{
|
771 |
+
"epoch": 4.03,
|
772 |
+
"learning_rate": 0.00013682420202226357,
|
773 |
+
"loss": 1.7067,
|
774 |
+
"step": 605
|
775 |
+
},
|
776 |
+
{
|
777 |
+
"epoch": 4.07,
|
778 |
+
"learning_rate": 0.0001357009254103391,
|
779 |
+
"loss": 1.7111,
|
780 |
+
"step": 610
|
781 |
+
},
|
782 |
+
{
|
783 |
+
"epoch": 4.1,
|
784 |
+
"learning_rate": 0.0001338424770178476,
|
785 |
+
"loss": 1.6736,
|
786 |
+
"step": 615
|
787 |
+
},
|
788 |
+
{
|
789 |
+
"epoch": 4.13,
|
790 |
+
"learning_rate": 0.00013126921839428241,
|
791 |
+
"loss": 1.7152,
|
792 |
+
"step": 620
|
793 |
+
},
|
794 |
+
{
|
795 |
+
"epoch": 4.17,
|
796 |
+
"learning_rate": 0.00012800934269961248,
|
797 |
+
"loss": 1.7257,
|
798 |
+
"step": 625
|
799 |
+
},
|
800 |
+
{
|
801 |
+
"epoch": 4.2,
|
802 |
+
"learning_rate": 0.00012409856581412142,
|
803 |
+
"loss": 1.7207,
|
804 |
+
"step": 630
|
805 |
+
},
|
806 |
+
{
|
807 |
+
"epoch": 4.23,
|
808 |
+
"learning_rate": 0.00011957973502774922,
|
809 |
+
"loss": 1.6457,
|
810 |
+
"step": 635
|
811 |
+
},
|
812 |
+
{
|
813 |
+
"epoch": 4.27,
|
814 |
+
"learning_rate": 0.00011450235959621773,
|
815 |
+
"loss": 1.713,
|
816 |
+
"step": 640
|
817 |
+
},
|
818 |
+
{
|
819 |
+
"epoch": 4.3,
|
820 |
+
"learning_rate": 0.00010892206830726369,
|
821 |
+
"loss": 1.7242,
|
822 |
+
"step": 645
|
823 |
+
},
|
824 |
+
{
|
825 |
+
"epoch": 4.33,
|
826 |
+
"learning_rate": 0.00010290000000000009,
|
827 |
+
"loss": 1.6386,
|
828 |
+
"step": 650
|
829 |
+
},
|
830 |
+
{
|
831 |
+
"epoch": 4.37,
|
832 |
+
"learning_rate": 9.650213371499996e-05,
|
833 |
+
"loss": 1.6539,
|
834 |
+
"step": 655
|
835 |
+
},
|
836 |
+
{
|
837 |
+
"epoch": 4.4,
|
838 |
+
"learning_rate": 8.97985658141213e-05,
|
839 |
+
"loss": 1.7034,
|
840 |
+
"step": 660
|
841 |
+
},
|
842 |
+
{
|
843 |
+
"epoch": 4.43,
|
844 |
+
"learning_rate": 8.286274199009828e-05,
|
845 |
+
"loss": 1.6681,
|
846 |
+
"step": 665
|
847 |
+
},
|
848 |
+
{
|
849 |
+
"epoch": 4.47,
|
850 |
+
"learning_rate": 7.577065258016099e-05,
|
851 |
+
"loss": 1.6742,
|
852 |
+
"step": 670
|
853 |
+
},
|
854 |
+
{
|
855 |
+
"epoch": 4.5,
|
856 |
+
"learning_rate": 6.860000000000003e-05,
|
857 |
+
"loss": 1.687,
|
858 |
+
"step": 675
|
859 |
+
},
|
860 |
+
{
|
861 |
+
"epoch": 4.53,
|
862 |
+
"learning_rate": 6.14293474198391e-05,
|
863 |
+
"loss": 1.7066,
|
864 |
+
"step": 680
|
865 |
+
},
|
866 |
+
{
|
867 |
+
"epoch": 4.57,
|
868 |
+
"learning_rate": 5.433725800990179e-05,
|
869 |
+
"loss": 1.6622,
|
870 |
+
"step": 685
|
871 |
+
},
|
872 |
+
{
|
873 |
+
"epoch": 4.6,
|
874 |
+
"learning_rate": 4.740143418587876e-05,
|
875 |
+
"loss": 1.6647,
|
876 |
+
"step": 690
|
877 |
+
},
|
878 |
+
{
|
879 |
+
"epoch": 4.63,
|
880 |
+
"learning_rate": 4.069786628500011e-05,
|
881 |
+
"loss": 1.6251,
|
882 |
+
"step": 695
|
883 |
+
},
|
884 |
+
{
|
885 |
+
"epoch": 4.67,
|
886 |
+
"learning_rate": 3.429999999999996e-05,
|
887 |
+
"loss": 1.6732,
|
888 |
+
"step": 700
|
889 |
+
},
|
890 |
+
{
|
891 |
+
"epoch": 4.7,
|
892 |
+
"learning_rate": 2.8277931692736372e-05,
|
893 |
+
"loss": 1.5917,
|
894 |
+
"step": 705
|
895 |
+
},
|
896 |
+
{
|
897 |
+
"epoch": 4.73,
|
898 |
+
"learning_rate": 2.2697640403782324e-05,
|
899 |
+
"loss": 1.5938,
|
900 |
+
"step": 710
|
901 |
+
},
|
902 |
+
{
|
903 |
+
"epoch": 4.77,
|
904 |
+
"learning_rate": 1.762026497225081e-05,
|
905 |
+
"loss": 1.7145,
|
906 |
+
"step": 715
|
907 |
+
},
|
908 |
+
{
|
909 |
+
"epoch": 4.8,
|
910 |
+
"learning_rate": 1.3101434185878628e-05,
|
911 |
+
"loss": 1.6437,
|
912 |
+
"step": 720
|
913 |
+
},
|
914 |
+
{
|
915 |
+
"epoch": 4.83,
|
916 |
+
"learning_rate": 9.190657300387574e-06,
|
917 |
+
"loss": 1.5361,
|
918 |
+
"step": 725
|
919 |
+
},
|
920 |
+
{
|
921 |
+
"epoch": 4.87,
|
922 |
+
"learning_rate": 5.930781605717611e-06,
|
923 |
+
"loss": 1.5239,
|
924 |
+
"step": 730
|
925 |
+
},
|
926 |
+
{
|
927 |
+
"epoch": 4.9,
|
928 |
+
"learning_rate": 3.3575229821524373e-06,
|
929 |
+
"loss": 1.656,
|
930 |
+
"step": 735
|
931 |
+
},
|
932 |
+
{
|
933 |
+
"epoch": 4.93,
|
934 |
+
"learning_rate": 1.4990745896609297e-06,
|
935 |
+
"loss": 1.6033,
|
936 |
+
"step": 740
|
937 |
+
},
|
938 |
+
{
|
939 |
+
"epoch": 4.97,
|
940 |
+
"learning_rate": 3.757979777364447e-07,
|
941 |
+
"loss": 1.6277,
|
942 |
+
"step": 745
|
943 |
+
},
|
944 |
+
{
|
945 |
+
"epoch": 5.0,
|
946 |
+
"learning_rate": 0.0,
|
947 |
+
"loss": 1.5745,
|
948 |
+
"step": 750
|
949 |
+
},
|
950 |
+
{
|
951 |
+
"epoch": 5.0,
|
952 |
+
"eval_loss": 1.5331557989120483,
|
953 |
+
"eval_runtime": 11.0656,
|
954 |
+
"eval_samples_per_second": 21.327,
|
955 |
+
"eval_steps_per_second": 2.711,
|
956 |
+
"step": 750
|
957 |
}
|
958 |
],
|
959 |
+
"max_steps": 750,
|
960 |
+
"num_train_epochs": 5,
|
961 |
+
"total_flos": 781001883648000.0,
|
962 |
"trial_name": null,
|
963 |
"trial_params": null
|
964 |
}
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:09831668545d53e1902c5d27a1cebb81afa6b22d9d6f902498224fa69e5221ac
|
3 |
+
size 2863
|