nbtpj commited on
Commit
5a153b1
·
1 Parent(s): a26b5c7

Training in progress, step 12500

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0270219bbf3b9d435456104809f3c23a41c4cd1fe51a03b95efb36b90021a044
3
  size 1115513717
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fcf16c77fc4c696d5ecec66bb94ab1a72596ecc42da185a1509a0a37aa93d820
3
  size 1115513717
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9b3509860f8b9090ce37e24daf382d92def3c20605929d105fc7709bfdf4fa92
3
  size 557969145
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:001276adc811be2b4e3383d1bfd82644ee01b63c562240704981e80abf57ea78
3
  size 557969145
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8bd78237a8bff2399c24e5136a001794a3e1c14fbfcbde9fa5704a4fcf3d3828
3
  size 15523
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ea4cb9a85921bdde5a8523300b183bff3f70a1aeb0368e6a4c669b696d1922c
3
  size 15523
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dc6cc4eff0bedca1e1ffd7c8824dc4da7fe034832dbf96d075af2be5a88f8f30
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:af29429f2a56160c0a3cb3b03c7b28cc9e5f472234bebefa86da284747a0d6d1
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.1485012059262663,
5
- "global_step": 10000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -606,11 +606,161 @@
606
  "learning_rate": 1.8098002934983733e-05,
607
  "loss": 0.5978,
608
  "step": 10000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
609
  }
610
  ],
611
  "max_steps": 15673,
612
  "num_train_epochs": 2,
613
- "total_flos": 3.954111338032128e+16,
614
  "trial_name": null,
615
  "trial_params": null
616
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.4356265074078327,
5
+ "global_step": 12500,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
606
  "learning_rate": 1.8098002934983733e-05,
607
  "loss": 0.5978,
608
  "step": 10000
609
+ },
610
+ {
611
+ "epoch": 1.16,
612
+ "learning_rate": 1.7778982964333567e-05,
613
+ "loss": 0.6034,
614
+ "step": 10100
615
+ },
616
+ {
617
+ "epoch": 1.17,
618
+ "learning_rate": 1.7459962993683405e-05,
619
+ "loss": 0.6037,
620
+ "step": 10200
621
+ },
622
+ {
623
+ "epoch": 1.18,
624
+ "learning_rate": 1.7140943023033243e-05,
625
+ "loss": 0.6251,
626
+ "step": 10300
627
+ },
628
+ {
629
+ "epoch": 1.19,
630
+ "learning_rate": 1.6821923052383077e-05,
631
+ "loss": 0.581,
632
+ "step": 10400
633
+ },
634
+ {
635
+ "epoch": 1.21,
636
+ "learning_rate": 1.650290308173292e-05,
637
+ "loss": 0.5919,
638
+ "step": 10500
639
+ },
640
+ {
641
+ "epoch": 1.22,
642
+ "learning_rate": 1.6183883111082753e-05,
643
+ "loss": 0.6073,
644
+ "step": 10600
645
+ },
646
+ {
647
+ "epoch": 1.23,
648
+ "learning_rate": 1.5864863140432594e-05,
649
+ "loss": 0.6273,
650
+ "step": 10700
651
+ },
652
+ {
653
+ "epoch": 1.24,
654
+ "learning_rate": 1.554584316978243e-05,
655
+ "loss": 0.6198,
656
+ "step": 10800
657
+ },
658
+ {
659
+ "epoch": 1.25,
660
+ "learning_rate": 1.5226823199132268e-05,
661
+ "loss": 0.5993,
662
+ "step": 10900
663
+ },
664
+ {
665
+ "epoch": 1.26,
666
+ "learning_rate": 1.4907803228482104e-05,
667
+ "loss": 0.5519,
668
+ "step": 11000
669
+ },
670
+ {
671
+ "epoch": 1.27,
672
+ "learning_rate": 1.4588783257831942e-05,
673
+ "loss": 0.6152,
674
+ "step": 11100
675
+ },
676
+ {
677
+ "epoch": 1.29,
678
+ "learning_rate": 1.4269763287181778e-05,
679
+ "loss": 0.6251,
680
+ "step": 11200
681
+ },
682
+ {
683
+ "epoch": 1.3,
684
+ "learning_rate": 1.3950743316531614e-05,
685
+ "loss": 0.6747,
686
+ "step": 11300
687
+ },
688
+ {
689
+ "epoch": 1.31,
690
+ "learning_rate": 1.3631723345881454e-05,
691
+ "loss": 0.6253,
692
+ "step": 11400
693
+ },
694
+ {
695
+ "epoch": 1.32,
696
+ "learning_rate": 1.331270337523129e-05,
697
+ "loss": 0.6009,
698
+ "step": 11500
699
+ },
700
+ {
701
+ "epoch": 1.33,
702
+ "learning_rate": 1.2993683404581127e-05,
703
+ "loss": 0.6019,
704
+ "step": 11600
705
+ },
706
+ {
707
+ "epoch": 1.34,
708
+ "learning_rate": 1.2674663433930964e-05,
709
+ "loss": 0.6286,
710
+ "step": 11700
711
+ },
712
+ {
713
+ "epoch": 1.36,
714
+ "learning_rate": 1.2355643463280801e-05,
715
+ "loss": 0.5796,
716
+ "step": 11800
717
+ },
718
+ {
719
+ "epoch": 1.37,
720
+ "learning_rate": 1.2036623492630639e-05,
721
+ "loss": 0.6077,
722
+ "step": 11900
723
+ },
724
+ {
725
+ "epoch": 1.38,
726
+ "learning_rate": 1.1717603521980477e-05,
727
+ "loss": 0.6186,
728
+ "step": 12000
729
+ },
730
+ {
731
+ "epoch": 1.39,
732
+ "learning_rate": 1.1398583551330313e-05,
733
+ "loss": 0.6632,
734
+ "step": 12100
735
+ },
736
+ {
737
+ "epoch": 1.4,
738
+ "learning_rate": 1.107956358068015e-05,
739
+ "loss": 0.5796,
740
+ "step": 12200
741
+ },
742
+ {
743
+ "epoch": 1.41,
744
+ "learning_rate": 1.0760543610029989e-05,
745
+ "loss": 0.5951,
746
+ "step": 12300
747
+ },
748
+ {
749
+ "epoch": 1.42,
750
+ "learning_rate": 1.0441523639379826e-05,
751
+ "loss": 0.5743,
752
+ "step": 12400
753
+ },
754
+ {
755
+ "epoch": 1.44,
756
+ "learning_rate": 1.0122503668729662e-05,
757
+ "loss": 0.6705,
758
+ "step": 12500
759
  }
760
  ],
761
  "max_steps": 15673,
762
  "num_train_epochs": 2,
763
+ "total_flos": 4.939903709604864e+16,
764
  "trial_name": null,
765
  "trial_params": null
766
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9b3509860f8b9090ce37e24daf382d92def3c20605929d105fc7709bfdf4fa92
3
  size 557969145
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:001276adc811be2b4e3383d1bfd82644ee01b63c562240704981e80abf57ea78
3
  size 557969145
runs/Jan26_03-13-45_b3489f7155a5/events.out.tfevents.1674703022.b3489f7155a5.24.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e582d90f7535fdd56e7dc87ed69608bed0f439782a1664c33152ca1b8568bf2f
3
- size 20535
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f77dd77e2e1de62f98d722747a220a5ac16ebff7589985b9d63fb7e5c44b6c35
3
+ size 24460