AlekseyKorshuk
commited on
Commit
·
1c8741c
1
Parent(s):
c134d7b
huggingartists
Browse files- README.md +3 -3
- evaluation.txt +1 -1
- flax_model.msgpack +1 -1
- optimizer.pt +1 -1
- pytorch_model.bin +1 -1
- rng_state.pth +1 -1
- scheduler.pt +1 -1
- trainer_state.json +182 -6
- training_args.bin +1 -1
README.md
CHANGED
@@ -45,15 +45,15 @@ from datasets import load_dataset
|
|
45 |
dataset = load_dataset("huggingartists/og-buda")
|
46 |
```
|
47 |
|
48 |
-
[Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/
|
49 |
|
50 |
## Training procedure
|
51 |
|
52 |
The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on OG Buda's lyrics.
|
53 |
|
54 |
-
Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/
|
55 |
|
56 |
-
At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/
|
57 |
|
58 |
## How to use
|
59 |
|
|
|
45 |
dataset = load_dataset("huggingartists/og-buda")
|
46 |
```
|
47 |
|
48 |
+
[Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/11co51jr/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
|
49 |
|
50 |
## Training procedure
|
51 |
|
52 |
The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on OG Buda's lyrics.
|
53 |
|
54 |
+
Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/10khp2s0) for full transparency and reproducibility.
|
55 |
|
56 |
+
At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/10khp2s0/artifacts) is logged and versioned.
|
57 |
|
58 |
## How to use
|
59 |
|
evaluation.txt
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"eval_loss": 1.
|
|
|
1 |
+
{"eval_loss": 1.6418354511260986, "eval_runtime": 10.1368, "eval_samples_per_second": 22.69, "eval_steps_per_second": 2.861, "epoch": 4.0}
|
flax_model.msgpack
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 497764120
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:87d33c0069e8b72372c9e6dfc7f9a8187c137ac978f25b032d134b7ef5fd064f
|
3 |
size 497764120
|
optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 995604017
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d3160fb582d67d179d5ee50aa5804c6359d28643e90fcd54e9bd4c89655dfc68
|
3 |
size 995604017
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 510403817
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:911581e98e184bd3b5eb8b1cf8a6bf2e4ec0ff4f88f7ea521640e2dd07b94f03
|
3 |
size 510403817
|
rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14567
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7dbbbccbd02e27dbdb79311ae0bdd36d65163767a8d048823783db7230b9c01f
|
3 |
size 14567
|
scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e1b342b9aa08984a7d22e3d2af7c55d45d34bb20259c62266df710a4afe559ae
|
3 |
size 623
|
trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
-
"best_metric": 1.
|
3 |
-
"best_model_checkpoint": "output/og-buda/checkpoint-
|
4 |
-
"epoch":
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -590,11 +590,187 @@
|
|
590 |
"eval_samples_per_second": 20.576,
|
591 |
"eval_steps_per_second": 2.585,
|
592 |
"step": 462
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
593 |
}
|
594 |
],
|
595 |
-
"max_steps":
|
596 |
"num_train_epochs": 4,
|
597 |
-
"total_flos":
|
598 |
"trial_name": null,
|
599 |
"trial_params": null
|
600 |
}
|
|
|
1 |
{
|
2 |
+
"best_metric": 1.6418354511260986,
|
3 |
+
"best_model_checkpoint": "output/og-buda/checkpoint-600",
|
4 |
+
"epoch": 4.0,
|
5 |
+
"global_step": 600,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
590 |
"eval_samples_per_second": 20.576,
|
591 |
"eval_steps_per_second": 2.585,
|
592 |
"step": 462
|
593 |
+
},
|
594 |
+
{
|
595 |
+
"epoch": 3.1,
|
596 |
+
"learning_rate": 3.35752298215246e-06,
|
597 |
+
"loss": 1.6773,
|
598 |
+
"step": 465
|
599 |
+
},
|
600 |
+
{
|
601 |
+
"epoch": 3.13,
|
602 |
+
"learning_rate": 5.930781605717588e-06,
|
603 |
+
"loss": 1.7743,
|
604 |
+
"step": 470
|
605 |
+
},
|
606 |
+
{
|
607 |
+
"epoch": 3.17,
|
608 |
+
"learning_rate": 9.190657300387474e-06,
|
609 |
+
"loss": 1.6782,
|
610 |
+
"step": 475
|
611 |
+
},
|
612 |
+
{
|
613 |
+
"epoch": 3.2,
|
614 |
+
"learning_rate": 1.310143418587859e-05,
|
615 |
+
"loss": 1.7254,
|
616 |
+
"step": 480
|
617 |
+
},
|
618 |
+
{
|
619 |
+
"epoch": 3.23,
|
620 |
+
"learning_rate": 1.7620264972250762e-05,
|
621 |
+
"loss": 1.7116,
|
622 |
+
"step": 485
|
623 |
+
},
|
624 |
+
{
|
625 |
+
"epoch": 3.27,
|
626 |
+
"learning_rate": 2.269764040378228e-05,
|
627 |
+
"loss": 1.737,
|
628 |
+
"step": 490
|
629 |
+
},
|
630 |
+
{
|
631 |
+
"epoch": 3.3,
|
632 |
+
"learning_rate": 2.827793169273632e-05,
|
633 |
+
"loss": 1.6298,
|
634 |
+
"step": 495
|
635 |
+
},
|
636 |
+
{
|
637 |
+
"epoch": 3.33,
|
638 |
+
"learning_rate": 3.4300000000000014e-05,
|
639 |
+
"loss": 1.784,
|
640 |
+
"step": 500
|
641 |
+
},
|
642 |
+
{
|
643 |
+
"epoch": 3.37,
|
644 |
+
"learning_rate": 4.069786628500004e-05,
|
645 |
+
"loss": 1.7561,
|
646 |
+
"step": 505
|
647 |
+
},
|
648 |
+
{
|
649 |
+
"epoch": 3.4,
|
650 |
+
"learning_rate": 4.740143418587858e-05,
|
651 |
+
"loss": 1.7454,
|
652 |
+
"step": 510
|
653 |
+
},
|
654 |
+
{
|
655 |
+
"epoch": 3.43,
|
656 |
+
"learning_rate": 5.4337258009901596e-05,
|
657 |
+
"loss": 1.8009,
|
658 |
+
"step": 515
|
659 |
+
},
|
660 |
+
{
|
661 |
+
"epoch": 3.47,
|
662 |
+
"learning_rate": 6.142934741983902e-05,
|
663 |
+
"loss": 1.7686,
|
664 |
+
"step": 520
|
665 |
+
},
|
666 |
+
{
|
667 |
+
"epoch": 3.5,
|
668 |
+
"learning_rate": 6.859999999999997e-05,
|
669 |
+
"loss": 1.697,
|
670 |
+
"step": 525
|
671 |
+
},
|
672 |
+
{
|
673 |
+
"epoch": 3.53,
|
674 |
+
"learning_rate": 7.577065258016093e-05,
|
675 |
+
"loss": 1.6804,
|
676 |
+
"step": 530
|
677 |
+
},
|
678 |
+
{
|
679 |
+
"epoch": 3.57,
|
680 |
+
"learning_rate": 8.286274199009834e-05,
|
681 |
+
"loss": 1.7485,
|
682 |
+
"step": 535
|
683 |
+
},
|
684 |
+
{
|
685 |
+
"epoch": 3.6,
|
686 |
+
"learning_rate": 8.979856581412137e-05,
|
687 |
+
"loss": 1.7389,
|
688 |
+
"step": 540
|
689 |
+
},
|
690 |
+
{
|
691 |
+
"epoch": 3.63,
|
692 |
+
"learning_rate": 9.650213371499991e-05,
|
693 |
+
"loss": 1.6993,
|
694 |
+
"step": 545
|
695 |
+
},
|
696 |
+
{
|
697 |
+
"epoch": 3.67,
|
698 |
+
"learning_rate": 0.00010289999999999994,
|
699 |
+
"loss": 1.7811,
|
700 |
+
"step": 550
|
701 |
+
},
|
702 |
+
{
|
703 |
+
"epoch": 3.7,
|
704 |
+
"learning_rate": 0.00010892206830726364,
|
705 |
+
"loss": 1.6838,
|
706 |
+
"step": 555
|
707 |
+
},
|
708 |
+
{
|
709 |
+
"epoch": 3.73,
|
710 |
+
"learning_rate": 0.00011450235959621768,
|
711 |
+
"loss": 1.76,
|
712 |
+
"step": 560
|
713 |
+
},
|
714 |
+
{
|
715 |
+
"epoch": 3.77,
|
716 |
+
"learning_rate": 0.0001195797350277492,
|
717 |
+
"loss": 1.7419,
|
718 |
+
"step": 565
|
719 |
+
},
|
720 |
+
{
|
721 |
+
"epoch": 3.8,
|
722 |
+
"learning_rate": 0.00012409856581412136,
|
723 |
+
"loss": 1.7739,
|
724 |
+
"step": 570
|
725 |
+
},
|
726 |
+
{
|
727 |
+
"epoch": 3.83,
|
728 |
+
"learning_rate": 0.0001280093426996125,
|
729 |
+
"loss": 1.7108,
|
730 |
+
"step": 575
|
731 |
+
},
|
732 |
+
{
|
733 |
+
"epoch": 3.87,
|
734 |
+
"learning_rate": 0.0001312692183942824,
|
735 |
+
"loss": 1.7828,
|
736 |
+
"step": 580
|
737 |
+
},
|
738 |
+
{
|
739 |
+
"epoch": 3.9,
|
740 |
+
"learning_rate": 0.00013384247701784751,
|
741 |
+
"loss": 1.7588,
|
742 |
+
"step": 585
|
743 |
+
},
|
744 |
+
{
|
745 |
+
"epoch": 3.93,
|
746 |
+
"learning_rate": 0.00013570092541033904,
|
747 |
+
"loss": 1.6845,
|
748 |
+
"step": 590
|
749 |
+
},
|
750 |
+
{
|
751 |
+
"epoch": 3.97,
|
752 |
+
"learning_rate": 0.00013682420202226357,
|
753 |
+
"loss": 1.7817,
|
754 |
+
"step": 595
|
755 |
+
},
|
756 |
+
{
|
757 |
+
"epoch": 4.0,
|
758 |
+
"learning_rate": 0.0001372,
|
759 |
+
"loss": 1.7611,
|
760 |
+
"step": 600
|
761 |
+
},
|
762 |
+
{
|
763 |
+
"epoch": 4.0,
|
764 |
+
"eval_loss": 1.6418354511260986,
|
765 |
+
"eval_runtime": 10.0255,
|
766 |
+
"eval_samples_per_second": 22.942,
|
767 |
+
"eval_steps_per_second": 2.893,
|
768 |
+
"step": 600
|
769 |
}
|
770 |
],
|
771 |
+
"max_steps": 600,
|
772 |
"num_train_epochs": 4,
|
773 |
+
"total_flos": 625141186560000.0,
|
774 |
"trial_name": null,
|
775 |
"trial_params": null
|
776 |
}
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 2671
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:241f4fc691c28aaa2ddb496ddc8870ce626761c7412d518a7188ac1aaea6de47
|
3 |
size 2671
|