AlekseyKorshuk commited on
Commit
1c8741c
·
1 Parent(s): c134d7b

huggingartists

Browse files
README.md CHANGED
@@ -45,15 +45,15 @@ from datasets import load_dataset
45
  dataset = load_dataset("huggingartists/og-buda")
46
  ```
47
 
48
- [Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/19koddag/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
49
 
50
  ## Training procedure
51
 
52
  The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on OG Buda's lyrics.
53
 
54
- Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/2pjx3dty) for full transparency and reproducibility.
55
 
56
- At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/2pjx3dty/artifacts) is logged and versioned.
57
 
58
  ## How to use
59
 
 
45
  dataset = load_dataset("huggingartists/og-buda")
46
  ```
47
 
48
+ [Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/11co51jr/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
49
 
50
  ## Training procedure
51
 
52
  The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on OG Buda's lyrics.
53
 
54
+ Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/10khp2s0) for full transparency and reproducibility.
55
 
56
+ At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/10khp2s0/artifacts) is logged and versioned.
57
 
58
  ## How to use
59
 
evaluation.txt CHANGED
@@ -1 +1 @@
1
- {"eval_loss": 1.6688779592514038, "eval_runtime": 9.5584, "eval_samples_per_second": 20.819, "eval_steps_per_second": 2.616, "epoch": 4.0}
 
1
+ {"eval_loss": 1.6418354511260986, "eval_runtime": 10.1368, "eval_samples_per_second": 22.69, "eval_steps_per_second": 2.861, "epoch": 4.0}
flax_model.msgpack CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e333fb388e02210c1f3ff984ad6c4d21e6d008bcab46dfd83e150235f59d89bc
3
  size 497764120
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:87d33c0069e8b72372c9e6dfc7f9a8187c137ac978f25b032d134b7ef5fd064f
3
  size 497764120
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2ebbd0aa38a33ae4017b166fd8f1134893410206219de2130750e6d7d5f39b3e
3
  size 995604017
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d3160fb582d67d179d5ee50aa5804c6359d28643e90fcd54e9bd4c89655dfc68
3
  size 995604017
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1cb6cef5d2d7cbde31a058ffa3aefb7f08a7a2e4806e882a70db75aeff20abb1
3
  size 510403817
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:911581e98e184bd3b5eb8b1cf8a6bf2e4ec0ff4f88f7ea521640e2dd07b94f03
3
  size 510403817
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a598a815349f8b1988c86709fdd881dafa589aaad49c7f20d98d3860abe8f36f
3
  size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7dbbbccbd02e27dbdb79311ae0bdd36d65163767a8d048823783db7230b9c01f
3
  size 14567
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e86c0f8b5c2b1ad48d13df79807268e5d50299fba1287501961674ecf5c92d8c
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e1b342b9aa08984a7d22e3d2af7c55d45d34bb20259c62266df710a4afe559ae
3
  size 623
trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "best_metric": 1.6688779592514038,
3
- "best_model_checkpoint": "output/og-buda/checkpoint-462",
4
- "epoch": 3.0,
5
- "global_step": 462,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -590,11 +590,187 @@
590
  "eval_samples_per_second": 20.576,
591
  "eval_steps_per_second": 2.585,
592
  "step": 462
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
593
  }
594
  ],
595
- "max_steps": 616,
596
  "num_train_epochs": 4,
597
- "total_flos": 481038630912000.0,
598
  "trial_name": null,
599
  "trial_params": null
600
  }
 
1
  {
2
+ "best_metric": 1.6418354511260986,
3
+ "best_model_checkpoint": "output/og-buda/checkpoint-600",
4
+ "epoch": 4.0,
5
+ "global_step": 600,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
590
  "eval_samples_per_second": 20.576,
591
  "eval_steps_per_second": 2.585,
592
  "step": 462
593
+ },
594
+ {
595
+ "epoch": 3.1,
596
+ "learning_rate": 3.35752298215246e-06,
597
+ "loss": 1.6773,
598
+ "step": 465
599
+ },
600
+ {
601
+ "epoch": 3.13,
602
+ "learning_rate": 5.930781605717588e-06,
603
+ "loss": 1.7743,
604
+ "step": 470
605
+ },
606
+ {
607
+ "epoch": 3.17,
608
+ "learning_rate": 9.190657300387474e-06,
609
+ "loss": 1.6782,
610
+ "step": 475
611
+ },
612
+ {
613
+ "epoch": 3.2,
614
+ "learning_rate": 1.310143418587859e-05,
615
+ "loss": 1.7254,
616
+ "step": 480
617
+ },
618
+ {
619
+ "epoch": 3.23,
620
+ "learning_rate": 1.7620264972250762e-05,
621
+ "loss": 1.7116,
622
+ "step": 485
623
+ },
624
+ {
625
+ "epoch": 3.27,
626
+ "learning_rate": 2.269764040378228e-05,
627
+ "loss": 1.737,
628
+ "step": 490
629
+ },
630
+ {
631
+ "epoch": 3.3,
632
+ "learning_rate": 2.827793169273632e-05,
633
+ "loss": 1.6298,
634
+ "step": 495
635
+ },
636
+ {
637
+ "epoch": 3.33,
638
+ "learning_rate": 3.4300000000000014e-05,
639
+ "loss": 1.784,
640
+ "step": 500
641
+ },
642
+ {
643
+ "epoch": 3.37,
644
+ "learning_rate": 4.069786628500004e-05,
645
+ "loss": 1.7561,
646
+ "step": 505
647
+ },
648
+ {
649
+ "epoch": 3.4,
650
+ "learning_rate": 4.740143418587858e-05,
651
+ "loss": 1.7454,
652
+ "step": 510
653
+ },
654
+ {
655
+ "epoch": 3.43,
656
+ "learning_rate": 5.4337258009901596e-05,
657
+ "loss": 1.8009,
658
+ "step": 515
659
+ },
660
+ {
661
+ "epoch": 3.47,
662
+ "learning_rate": 6.142934741983902e-05,
663
+ "loss": 1.7686,
664
+ "step": 520
665
+ },
666
+ {
667
+ "epoch": 3.5,
668
+ "learning_rate": 6.859999999999997e-05,
669
+ "loss": 1.697,
670
+ "step": 525
671
+ },
672
+ {
673
+ "epoch": 3.53,
674
+ "learning_rate": 7.577065258016093e-05,
675
+ "loss": 1.6804,
676
+ "step": 530
677
+ },
678
+ {
679
+ "epoch": 3.57,
680
+ "learning_rate": 8.286274199009834e-05,
681
+ "loss": 1.7485,
682
+ "step": 535
683
+ },
684
+ {
685
+ "epoch": 3.6,
686
+ "learning_rate": 8.979856581412137e-05,
687
+ "loss": 1.7389,
688
+ "step": 540
689
+ },
690
+ {
691
+ "epoch": 3.63,
692
+ "learning_rate": 9.650213371499991e-05,
693
+ "loss": 1.6993,
694
+ "step": 545
695
+ },
696
+ {
697
+ "epoch": 3.67,
698
+ "learning_rate": 0.00010289999999999994,
699
+ "loss": 1.7811,
700
+ "step": 550
701
+ },
702
+ {
703
+ "epoch": 3.7,
704
+ "learning_rate": 0.00010892206830726364,
705
+ "loss": 1.6838,
706
+ "step": 555
707
+ },
708
+ {
709
+ "epoch": 3.73,
710
+ "learning_rate": 0.00011450235959621768,
711
+ "loss": 1.76,
712
+ "step": 560
713
+ },
714
+ {
715
+ "epoch": 3.77,
716
+ "learning_rate": 0.0001195797350277492,
717
+ "loss": 1.7419,
718
+ "step": 565
719
+ },
720
+ {
721
+ "epoch": 3.8,
722
+ "learning_rate": 0.00012409856581412136,
723
+ "loss": 1.7739,
724
+ "step": 570
725
+ },
726
+ {
727
+ "epoch": 3.83,
728
+ "learning_rate": 0.0001280093426996125,
729
+ "loss": 1.7108,
730
+ "step": 575
731
+ },
732
+ {
733
+ "epoch": 3.87,
734
+ "learning_rate": 0.0001312692183942824,
735
+ "loss": 1.7828,
736
+ "step": 580
737
+ },
738
+ {
739
+ "epoch": 3.9,
740
+ "learning_rate": 0.00013384247701784751,
741
+ "loss": 1.7588,
742
+ "step": 585
743
+ },
744
+ {
745
+ "epoch": 3.93,
746
+ "learning_rate": 0.00013570092541033904,
747
+ "loss": 1.6845,
748
+ "step": 590
749
+ },
750
+ {
751
+ "epoch": 3.97,
752
+ "learning_rate": 0.00013682420202226357,
753
+ "loss": 1.7817,
754
+ "step": 595
755
+ },
756
+ {
757
+ "epoch": 4.0,
758
+ "learning_rate": 0.0001372,
759
+ "loss": 1.7611,
760
+ "step": 600
761
+ },
762
+ {
763
+ "epoch": 4.0,
764
+ "eval_loss": 1.6418354511260986,
765
+ "eval_runtime": 10.0255,
766
+ "eval_samples_per_second": 22.942,
767
+ "eval_steps_per_second": 2.893,
768
+ "step": 600
769
  }
770
  ],
771
+ "max_steps": 600,
772
  "num_train_epochs": 4,
773
+ "total_flos": 625141186560000.0,
774
  "trial_name": null,
775
  "trial_params": null
776
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:14e6c3bf99fa49e34d3e8e2fd8f9080f73be0573bb5ad5de841304e2c8bb6fc0
3
  size 2671
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:241f4fc691c28aaa2ddb496ddc8870ce626761c7412d518a7188ac1aaea6de47
3
  size 2671