AmberYifan commited on
Commit
94ac3d2
·
verified ·
1 Parent(s): 363d178

Training in progress, epoch 2, checkpoint

Browse files
last-checkpoint/global_step834/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f4d631a1b650315fa7f47d494d5dc90813cfa1a228bbc9829c7e772fc0ee414
3
+ size 20308318462
last-checkpoint/global_step834/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4bd981a3eca97cc976039dc6467c9712d528ec4aea67c25a590460f07981139e
3
+ size 20308318462
last-checkpoint/global_step834/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:763444484d0640da859eaac1055fd9670e227fdab7063816c1ab3f418674fd5f
3
+ size 20308318462
last-checkpoint/global_step834/zero_pp_rank_0_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:024ace79dd673f7972bcda314a22238789f9b2c7ef7d25eb76567e5d3f324fa1
3
+ size 168021
last-checkpoint/global_step834/zero_pp_rank_1_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:46717b755c898ee645bde65296e2b1b0172ede54f0775e47bb2278d05db803f8
3
+ size 168021
last-checkpoint/global_step834/zero_pp_rank_2_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e8cc94c6743e0f17155029d5e411e13115bbbc756d14639250e19dcd6d3fcf17
3
+ size 168021
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step417
 
1
+ global_step834
last-checkpoint/model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:35fcd90d2fa8518d79b9f906e70f89d1da45a2e10a94e0cb7d84fdb271149d5c
3
  size 4877660776
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:503a0542de7434fd94cf507a1454667cc4db309bcb5768fe92cc7048e5cf709b
3
  size 4877660776
last-checkpoint/model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cebf0418a5b9b8e11615f33221f45b6524cd7081ec5d6c5a8fcb30cd8649362f
3
  size 4932751008
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5435bf5d9f48362da28a7f0483244712eab1583416c1d93ba4ff4f8c9571990f
3
  size 4932751008
last-checkpoint/model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a17c0412083715ca51a28dd276cb04145564a6d4f3e608be7ee38fdceb4c4488
3
  size 4330865200
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c64867fad1455680c0ccba908bdb82f5ef2f574e48845eb1662eec4b64a7838f
3
  size 4330865200
last-checkpoint/model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3c10aaab0ab89e8f297bb3356fae83ca80ee5cbd747690e5f05d88586c38d58b
3
  size 1089994880
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:473a3519933c9b22c048af564bfcbc7f216e05e95e6fd261ec5743f527a9163b
3
  size 1089994880
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:631f61dad47c5631344141603fb1c02a0ce0b3b06dfee24d9478b7b6f7a84be9
3
  size 14768
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:122ac94678b6917d31dc059510d8313aa8631655cb9d780b749ba1ea53548879
3
  size 14768
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d9d92931a182cd49df186048006139276c5d8b789e1cb6ac8b24b15490df8cda
3
  size 14768
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:911b3814b631205d77beb4597874c7b3e54a28967f5a2afad57483b742364341
3
  size 14768
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c2f3fbe989a553c9191c882a60380a9a85e77cec0d291a6bbc785dcea37d7325
3
  size 14768
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:132ff809344340444ac3d547eaa795a9ce6b4dbf6ac473db7e4ccf506c2d41e9
3
  size 14768
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b69703daf02b3ddebef74c7f1e3ec0fbb6f4cfd0f928900ca26fcee4ae6f77f3
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b827597f074228bbdf970b7a8c86f735e50cc45cc1d15e8574037a53c30f9d7
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.0,
5
  "eval_steps": 500,
6
- "global_step": 417,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -653,6 +653,652 @@
653
  "eval_samples_per_second": 17.17,
654
  "eval_steps_per_second": 0.773,
655
  "step": 417
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
656
  }
657
  ],
658
  "logging_steps": 10,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 2.0,
5
  "eval_steps": 500,
6
+ "global_step": 834,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
653
  "eval_samples_per_second": 17.17,
654
  "eval_steps_per_second": 0.773,
655
  "step": 417
656
+ },
657
+ {
658
+ "epoch": 1.0071942446043165,
659
+ "grad_norm": 3.0018225466088073,
660
+ "learning_rate": 3.693333333333333e-07,
661
+ "logits/chosen": -1.28125,
662
+ "logits/rejected": -1.765625,
663
+ "logps/chosen": -235.0,
664
+ "logps/rejected": -342.0,
665
+ "loss": 0.0515,
666
+ "rewards/accuracies": 0.987500011920929,
667
+ "rewards/chosen": 1.4453125,
668
+ "rewards/margins": 9.0,
669
+ "rewards/rejected": -7.53125,
670
+ "step": 420
671
+ },
672
+ {
673
+ "epoch": 1.0311750599520384,
674
+ "grad_norm": 0.5745094685784482,
675
+ "learning_rate": 3.6488888888888884e-07,
676
+ "logits/chosen": -1.34375,
677
+ "logits/rejected": -1.78125,
678
+ "logps/chosen": -268.0,
679
+ "logps/rejected": -356.0,
680
+ "loss": 0.0099,
681
+ "rewards/accuracies": 0.987500011920929,
682
+ "rewards/chosen": 1.703125,
683
+ "rewards/margins": 9.75,
684
+ "rewards/rejected": -8.0,
685
+ "step": 430
686
+ },
687
+ {
688
+ "epoch": 1.0551558752997603,
689
+ "grad_norm": 0.23864223290524259,
690
+ "learning_rate": 3.604444444444444e-07,
691
+ "logits/chosen": -1.2734375,
692
+ "logits/rejected": -1.734375,
693
+ "logps/chosen": -237.0,
694
+ "logps/rejected": -372.0,
695
+ "loss": 0.0096,
696
+ "rewards/accuracies": 1.0,
697
+ "rewards/chosen": 1.34375,
698
+ "rewards/margins": 9.625,
699
+ "rewards/rejected": -8.25,
700
+ "step": 440
701
+ },
702
+ {
703
+ "epoch": 1.079136690647482,
704
+ "grad_norm": 7.163267004525079,
705
+ "learning_rate": 3.5599999999999996e-07,
706
+ "logits/chosen": -1.3359375,
707
+ "logits/rejected": -1.71875,
708
+ "logps/chosen": -247.0,
709
+ "logps/rejected": -372.0,
710
+ "loss": 0.0294,
711
+ "rewards/accuracies": 1.0,
712
+ "rewards/chosen": 1.296875,
713
+ "rewards/margins": 10.0,
714
+ "rewards/rejected": -8.6875,
715
+ "step": 450
716
+ },
717
+ {
718
+ "epoch": 1.1031175059952039,
719
+ "grad_norm": 5.519803193762622,
720
+ "learning_rate": 3.5155555555555554e-07,
721
+ "logits/chosen": -1.3125,
722
+ "logits/rejected": -1.8046875,
723
+ "logps/chosen": -235.0,
724
+ "logps/rejected": -354.0,
725
+ "loss": 0.0251,
726
+ "rewards/accuracies": 0.9750000238418579,
727
+ "rewards/chosen": 1.75,
728
+ "rewards/margins": 9.125,
729
+ "rewards/rejected": -7.40625,
730
+ "step": 460
731
+ },
732
+ {
733
+ "epoch": 1.1270983213429258,
734
+ "grad_norm": 0.5909316496529458,
735
+ "learning_rate": 3.471111111111111e-07,
736
+ "logits/chosen": -1.3125,
737
+ "logits/rejected": -1.703125,
738
+ "logps/chosen": -247.0,
739
+ "logps/rejected": -350.0,
740
+ "loss": 0.0093,
741
+ "rewards/accuracies": 1.0,
742
+ "rewards/chosen": 1.4609375,
743
+ "rewards/margins": 9.5,
744
+ "rewards/rejected": -8.0625,
745
+ "step": 470
746
+ },
747
+ {
748
+ "epoch": 1.1510791366906474,
749
+ "grad_norm": 3.011810128815269,
750
+ "learning_rate": 3.4266666666666666e-07,
751
+ "logits/chosen": -1.3359375,
752
+ "logits/rejected": -1.8046875,
753
+ "logps/chosen": -242.0,
754
+ "logps/rejected": -362.0,
755
+ "loss": 0.0096,
756
+ "rewards/accuracies": 1.0,
757
+ "rewards/chosen": 1.6484375,
758
+ "rewards/margins": 10.1875,
759
+ "rewards/rejected": -8.5,
760
+ "step": 480
761
+ },
762
+ {
763
+ "epoch": 1.1750599520383693,
764
+ "grad_norm": 0.17942480332235972,
765
+ "learning_rate": 3.382222222222222e-07,
766
+ "logits/chosen": -1.3828125,
767
+ "logits/rejected": -1.71875,
768
+ "logps/chosen": -247.0,
769
+ "logps/rejected": -364.0,
770
+ "loss": 0.0059,
771
+ "rewards/accuracies": 1.0,
772
+ "rewards/chosen": 1.1875,
773
+ "rewards/margins": 9.1875,
774
+ "rewards/rejected": -8.0,
775
+ "step": 490
776
+ },
777
+ {
778
+ "epoch": 1.1990407673860912,
779
+ "grad_norm": 0.1553054654263466,
780
+ "learning_rate": 3.337777777777778e-07,
781
+ "logits/chosen": -1.296875,
782
+ "logits/rejected": -1.7734375,
783
+ "logps/chosen": -260.0,
784
+ "logps/rejected": -358.0,
785
+ "loss": 0.0064,
786
+ "rewards/accuracies": 1.0,
787
+ "rewards/chosen": 1.671875,
788
+ "rewards/margins": 11.0,
789
+ "rewards/rejected": -9.375,
790
+ "step": 500
791
+ },
792
+ {
793
+ "epoch": 1.223021582733813,
794
+ "grad_norm": 1.1092919311137273,
795
+ "learning_rate": 3.293333333333333e-07,
796
+ "logits/chosen": -1.3046875,
797
+ "logits/rejected": -1.6953125,
798
+ "logps/chosen": -232.0,
799
+ "logps/rejected": -376.0,
800
+ "loss": 0.0198,
801
+ "rewards/accuracies": 1.0,
802
+ "rewards/chosen": 1.265625,
803
+ "rewards/margins": 9.9375,
804
+ "rewards/rejected": -8.625,
805
+ "step": 510
806
+ },
807
+ {
808
+ "epoch": 1.2470023980815348,
809
+ "grad_norm": 0.37821643712530456,
810
+ "learning_rate": 3.248888888888889e-07,
811
+ "logits/chosen": -1.25,
812
+ "logits/rejected": -1.671875,
813
+ "logps/chosen": -245.0,
814
+ "logps/rejected": -354.0,
815
+ "loss": 0.0078,
816
+ "rewards/accuracies": 1.0,
817
+ "rewards/chosen": 1.15625,
818
+ "rewards/margins": 9.8125,
819
+ "rewards/rejected": -8.625,
820
+ "step": 520
821
+ },
822
+ {
823
+ "epoch": 1.2709832134292567,
824
+ "grad_norm": 0.3096451149240373,
825
+ "learning_rate": 3.204444444444444e-07,
826
+ "logits/chosen": -1.328125,
827
+ "logits/rejected": -1.7578125,
828
+ "logps/chosen": -278.0,
829
+ "logps/rejected": -358.0,
830
+ "loss": 0.0213,
831
+ "rewards/accuracies": 1.0,
832
+ "rewards/chosen": 0.953125,
833
+ "rewards/margins": 9.625,
834
+ "rewards/rejected": -8.6875,
835
+ "step": 530
836
+ },
837
+ {
838
+ "epoch": 1.2949640287769784,
839
+ "grad_norm": 4.82912252555677,
840
+ "learning_rate": 3.1599999999999997e-07,
841
+ "logits/chosen": -1.2890625,
842
+ "logits/rejected": -1.734375,
843
+ "logps/chosen": -251.0,
844
+ "logps/rejected": -368.0,
845
+ "loss": 0.0034,
846
+ "rewards/accuracies": 1.0,
847
+ "rewards/chosen": 1.21875,
848
+ "rewards/margins": 10.5,
849
+ "rewards/rejected": -9.3125,
850
+ "step": 540
851
+ },
852
+ {
853
+ "epoch": 1.3189448441247003,
854
+ "grad_norm": 0.21054005672290058,
855
+ "learning_rate": 3.115555555555555e-07,
856
+ "logits/chosen": -1.3515625,
857
+ "logits/rejected": -1.8671875,
858
+ "logps/chosen": -230.0,
859
+ "logps/rejected": -364.0,
860
+ "loss": 0.003,
861
+ "rewards/accuracies": 1.0,
862
+ "rewards/chosen": 1.6953125,
863
+ "rewards/margins": 10.5,
864
+ "rewards/rejected": -8.8125,
865
+ "step": 550
866
+ },
867
+ {
868
+ "epoch": 1.3429256594724222,
869
+ "grad_norm": 0.3308230367772809,
870
+ "learning_rate": 3.071111111111111e-07,
871
+ "logits/chosen": -1.265625,
872
+ "logits/rejected": -1.7421875,
873
+ "logps/chosen": -244.0,
874
+ "logps/rejected": -368.0,
875
+ "loss": 0.0066,
876
+ "rewards/accuracies": 0.987500011920929,
877
+ "rewards/chosen": 1.421875,
878
+ "rewards/margins": 10.5625,
879
+ "rewards/rejected": -9.1875,
880
+ "step": 560
881
+ },
882
+ {
883
+ "epoch": 1.3669064748201438,
884
+ "grad_norm": 2.5398850541641953,
885
+ "learning_rate": 3.026666666666666e-07,
886
+ "logits/chosen": -1.25,
887
+ "logits/rejected": -1.796875,
888
+ "logps/chosen": -234.0,
889
+ "logps/rejected": -366.0,
890
+ "loss": 0.005,
891
+ "rewards/accuracies": 1.0,
892
+ "rewards/chosen": 1.296875,
893
+ "rewards/margins": 10.75,
894
+ "rewards/rejected": -9.5,
895
+ "step": 570
896
+ },
897
+ {
898
+ "epoch": 1.3908872901678657,
899
+ "grad_norm": 0.03226588556395318,
900
+ "learning_rate": 2.982222222222222e-07,
901
+ "logits/chosen": -1.2734375,
902
+ "logits/rejected": -1.71875,
903
+ "logps/chosen": -258.0,
904
+ "logps/rejected": -370.0,
905
+ "loss": 0.0039,
906
+ "rewards/accuracies": 1.0,
907
+ "rewards/chosen": 1.5546875,
908
+ "rewards/margins": 11.5625,
909
+ "rewards/rejected": -10.0,
910
+ "step": 580
911
+ },
912
+ {
913
+ "epoch": 1.4148681055155876,
914
+ "grad_norm": 3.5465151461901057,
915
+ "learning_rate": 2.937777777777778e-07,
916
+ "logits/chosen": -1.3203125,
917
+ "logits/rejected": -1.65625,
918
+ "logps/chosen": -238.0,
919
+ "logps/rejected": -366.0,
920
+ "loss": 0.0054,
921
+ "rewards/accuracies": 1.0,
922
+ "rewards/chosen": 1.421875,
923
+ "rewards/margins": 9.875,
924
+ "rewards/rejected": -8.4375,
925
+ "step": 590
926
+ },
927
+ {
928
+ "epoch": 1.4388489208633093,
929
+ "grad_norm": 0.9672041242288474,
930
+ "learning_rate": 2.8933333333333333e-07,
931
+ "logits/chosen": -1.2890625,
932
+ "logits/rejected": -1.765625,
933
+ "logps/chosen": -256.0,
934
+ "logps/rejected": -378.0,
935
+ "loss": 0.0091,
936
+ "rewards/accuracies": 1.0,
937
+ "rewards/chosen": 1.2421875,
938
+ "rewards/margins": 11.0625,
939
+ "rewards/rejected": -9.8125,
940
+ "step": 600
941
+ },
942
+ {
943
+ "epoch": 1.4628297362110312,
944
+ "grad_norm": 0.23832756129503824,
945
+ "learning_rate": 2.848888888888889e-07,
946
+ "logits/chosen": -1.296875,
947
+ "logits/rejected": -1.6875,
948
+ "logps/chosen": -241.0,
949
+ "logps/rejected": -376.0,
950
+ "loss": 0.0177,
951
+ "rewards/accuracies": 1.0,
952
+ "rewards/chosen": 1.015625,
953
+ "rewards/margins": 10.9375,
954
+ "rewards/rejected": -9.9375,
955
+ "step": 610
956
+ },
957
+ {
958
+ "epoch": 1.486810551558753,
959
+ "grad_norm": 14.900430603440345,
960
+ "learning_rate": 2.8044444444444445e-07,
961
+ "logits/chosen": -1.3515625,
962
+ "logits/rejected": -1.765625,
963
+ "logps/chosen": -274.0,
964
+ "logps/rejected": -372.0,
965
+ "loss": 0.0184,
966
+ "rewards/accuracies": 0.987500011920929,
967
+ "rewards/chosen": 1.46875,
968
+ "rewards/margins": 11.6875,
969
+ "rewards/rejected": -10.25,
970
+ "step": 620
971
+ },
972
+ {
973
+ "epoch": 1.5107913669064748,
974
+ "grad_norm": 29.0158558612294,
975
+ "learning_rate": 2.7600000000000004e-07,
976
+ "logits/chosen": -1.296875,
977
+ "logits/rejected": -1.875,
978
+ "logps/chosen": -264.0,
979
+ "logps/rejected": -392.0,
980
+ "loss": 0.0258,
981
+ "rewards/accuracies": 1.0,
982
+ "rewards/chosen": 0.8515625,
983
+ "rewards/margins": 11.0625,
984
+ "rewards/rejected": -10.1875,
985
+ "step": 630
986
+ },
987
+ {
988
+ "epoch": 1.5347721822541966,
989
+ "grad_norm": 0.10758431473862176,
990
+ "learning_rate": 2.715555555555555e-07,
991
+ "logits/chosen": -1.2734375,
992
+ "logits/rejected": -1.8125,
993
+ "logps/chosen": -258.0,
994
+ "logps/rejected": -380.0,
995
+ "loss": 0.0205,
996
+ "rewards/accuracies": 0.987500011920929,
997
+ "rewards/chosen": 1.390625,
998
+ "rewards/margins": 12.6875,
999
+ "rewards/rejected": -11.3125,
1000
+ "step": 640
1001
+ },
1002
+ {
1003
+ "epoch": 1.5587529976019185,
1004
+ "grad_norm": 0.6643883173223759,
1005
+ "learning_rate": 2.671111111111111e-07,
1006
+ "logits/chosen": -1.3359375,
1007
+ "logits/rejected": -1.734375,
1008
+ "logps/chosen": -247.0,
1009
+ "logps/rejected": -388.0,
1010
+ "loss": 0.001,
1011
+ "rewards/accuracies": 1.0,
1012
+ "rewards/chosen": 1.34375,
1013
+ "rewards/margins": 11.3125,
1014
+ "rewards/rejected": -10.0,
1015
+ "step": 650
1016
+ },
1017
+ {
1018
+ "epoch": 1.5827338129496402,
1019
+ "grad_norm": 0.19081443363615663,
1020
+ "learning_rate": 2.6266666666666664e-07,
1021
+ "logits/chosen": -1.359375,
1022
+ "logits/rejected": -1.7109375,
1023
+ "logps/chosen": -268.0,
1024
+ "logps/rejected": -376.0,
1025
+ "loss": 0.0085,
1026
+ "rewards/accuracies": 1.0,
1027
+ "rewards/chosen": 1.1796875,
1028
+ "rewards/margins": 10.8125,
1029
+ "rewards/rejected": -9.625,
1030
+ "step": 660
1031
+ },
1032
+ {
1033
+ "epoch": 1.6067146282973621,
1034
+ "grad_norm": 0.6802954971662545,
1035
+ "learning_rate": 2.582222222222222e-07,
1036
+ "logits/chosen": -1.2734375,
1037
+ "logits/rejected": -1.71875,
1038
+ "logps/chosen": -241.0,
1039
+ "logps/rejected": -382.0,
1040
+ "loss": 0.004,
1041
+ "rewards/accuracies": 1.0,
1042
+ "rewards/chosen": 1.609375,
1043
+ "rewards/margins": 10.6875,
1044
+ "rewards/rejected": -9.125,
1045
+ "step": 670
1046
+ },
1047
+ {
1048
+ "epoch": 1.630695443645084,
1049
+ "grad_norm": 0.21946623844305102,
1050
+ "learning_rate": 2.5377777777777776e-07,
1051
+ "logits/chosen": -1.3125,
1052
+ "logits/rejected": -1.734375,
1053
+ "logps/chosen": -246.0,
1054
+ "logps/rejected": -378.0,
1055
+ "loss": 0.0077,
1056
+ "rewards/accuracies": 1.0,
1057
+ "rewards/chosen": 1.34375,
1058
+ "rewards/margins": 12.1875,
1059
+ "rewards/rejected": -10.875,
1060
+ "step": 680
1061
+ },
1062
+ {
1063
+ "epoch": 1.6546762589928057,
1064
+ "grad_norm": 14.387030473360179,
1065
+ "learning_rate": 2.493333333333333e-07,
1066
+ "logits/chosen": -1.2890625,
1067
+ "logits/rejected": -1.7265625,
1068
+ "logps/chosen": -252.0,
1069
+ "logps/rejected": -382.0,
1070
+ "loss": 0.0055,
1071
+ "rewards/accuracies": 0.987500011920929,
1072
+ "rewards/chosen": 0.8046875,
1073
+ "rewards/margins": 11.0,
1074
+ "rewards/rejected": -10.1875,
1075
+ "step": 690
1076
+ },
1077
+ {
1078
+ "epoch": 1.6786570743405276,
1079
+ "grad_norm": 4.254572167354162,
1080
+ "learning_rate": 2.448888888888889e-07,
1081
+ "logits/chosen": -1.265625,
1082
+ "logits/rejected": -1.796875,
1083
+ "logps/chosen": -248.0,
1084
+ "logps/rejected": -384.0,
1085
+ "loss": 0.0027,
1086
+ "rewards/accuracies": 1.0,
1087
+ "rewards/chosen": 1.578125,
1088
+ "rewards/margins": 11.5,
1089
+ "rewards/rejected": -9.875,
1090
+ "step": 700
1091
+ },
1092
+ {
1093
+ "epoch": 1.7026378896882495,
1094
+ "grad_norm": 2.551492265163294,
1095
+ "learning_rate": 2.404444444444444e-07,
1096
+ "logits/chosen": -1.3125,
1097
+ "logits/rejected": -1.734375,
1098
+ "logps/chosen": -241.0,
1099
+ "logps/rejected": -384.0,
1100
+ "loss": 0.0037,
1101
+ "rewards/accuracies": 1.0,
1102
+ "rewards/chosen": 1.546875,
1103
+ "rewards/margins": 12.5,
1104
+ "rewards/rejected": -10.9375,
1105
+ "step": 710
1106
+ },
1107
+ {
1108
+ "epoch": 1.7266187050359711,
1109
+ "grad_norm": 3.5788184390591766,
1110
+ "learning_rate": 2.3599999999999997e-07,
1111
+ "logits/chosen": -1.296875,
1112
+ "logits/rejected": -1.6875,
1113
+ "logps/chosen": -270.0,
1114
+ "logps/rejected": -370.0,
1115
+ "loss": 0.002,
1116
+ "rewards/accuracies": 1.0,
1117
+ "rewards/chosen": 0.94921875,
1118
+ "rewards/margins": 11.9375,
1119
+ "rewards/rejected": -11.0,
1120
+ "step": 720
1121
+ },
1122
+ {
1123
+ "epoch": 1.750599520383693,
1124
+ "grad_norm": 0.22812646760052926,
1125
+ "learning_rate": 2.3155555555555553e-07,
1126
+ "logits/chosen": -1.3125,
1127
+ "logits/rejected": -1.7265625,
1128
+ "logps/chosen": -252.0,
1129
+ "logps/rejected": -376.0,
1130
+ "loss": 0.003,
1131
+ "rewards/accuracies": 1.0,
1132
+ "rewards/chosen": 1.421875,
1133
+ "rewards/margins": 11.1875,
1134
+ "rewards/rejected": -9.8125,
1135
+ "step": 730
1136
+ },
1137
+ {
1138
+ "epoch": 1.774580335731415,
1139
+ "grad_norm": 3.5369041443316966,
1140
+ "learning_rate": 2.2711111111111112e-07,
1141
+ "logits/chosen": -1.28125,
1142
+ "logits/rejected": -1.765625,
1143
+ "logps/chosen": -247.0,
1144
+ "logps/rejected": -374.0,
1145
+ "loss": 0.0019,
1146
+ "rewards/accuracies": 1.0,
1147
+ "rewards/chosen": 1.453125,
1148
+ "rewards/margins": 10.9375,
1149
+ "rewards/rejected": -9.4375,
1150
+ "step": 740
1151
+ },
1152
+ {
1153
+ "epoch": 1.7985611510791366,
1154
+ "grad_norm": 0.15385299522364304,
1155
+ "learning_rate": 2.2266666666666668e-07,
1156
+ "logits/chosen": -1.296875,
1157
+ "logits/rejected": -1.6640625,
1158
+ "logps/chosen": -247.0,
1159
+ "logps/rejected": -372.0,
1160
+ "loss": 0.0083,
1161
+ "rewards/accuracies": 1.0,
1162
+ "rewards/chosen": 1.109375,
1163
+ "rewards/margins": 11.25,
1164
+ "rewards/rejected": -10.125,
1165
+ "step": 750
1166
+ },
1167
+ {
1168
+ "epoch": 1.8225419664268585,
1169
+ "grad_norm": 1.8182203002712007,
1170
+ "learning_rate": 2.1822222222222224e-07,
1171
+ "logits/chosen": -1.34375,
1172
+ "logits/rejected": -1.7421875,
1173
+ "logps/chosen": -260.0,
1174
+ "logps/rejected": -378.0,
1175
+ "loss": 0.001,
1176
+ "rewards/accuracies": 1.0,
1177
+ "rewards/chosen": 1.03125,
1178
+ "rewards/margins": 11.5625,
1179
+ "rewards/rejected": -10.5,
1180
+ "step": 760
1181
+ },
1182
+ {
1183
+ "epoch": 1.8465227817745804,
1184
+ "grad_norm": 0.12014261187968397,
1185
+ "learning_rate": 2.1377777777777777e-07,
1186
+ "logits/chosen": -1.234375,
1187
+ "logits/rejected": -1.71875,
1188
+ "logps/chosen": -237.0,
1189
+ "logps/rejected": -366.0,
1190
+ "loss": 0.0005,
1191
+ "rewards/accuracies": 1.0,
1192
+ "rewards/chosen": 1.4609375,
1193
+ "rewards/margins": 11.75,
1194
+ "rewards/rejected": -10.25,
1195
+ "step": 770
1196
+ },
1197
+ {
1198
+ "epoch": 1.870503597122302,
1199
+ "grad_norm": 0.017562287950709583,
1200
+ "learning_rate": 2.0933333333333333e-07,
1201
+ "logits/chosen": -1.328125,
1202
+ "logits/rejected": -1.7109375,
1203
+ "logps/chosen": -256.0,
1204
+ "logps/rejected": -378.0,
1205
+ "loss": 0.0054,
1206
+ "rewards/accuracies": 1.0,
1207
+ "rewards/chosen": 1.4765625,
1208
+ "rewards/margins": 11.6875,
1209
+ "rewards/rejected": -10.1875,
1210
+ "step": 780
1211
+ },
1212
+ {
1213
+ "epoch": 1.894484412470024,
1214
+ "grad_norm": 0.15749847503299128,
1215
+ "learning_rate": 2.048888888888889e-07,
1216
+ "logits/chosen": -1.34375,
1217
+ "logits/rejected": -1.65625,
1218
+ "logps/chosen": -242.0,
1219
+ "logps/rejected": -378.0,
1220
+ "loss": 0.023,
1221
+ "rewards/accuracies": 1.0,
1222
+ "rewards/chosen": 1.59375,
1223
+ "rewards/margins": 11.0,
1224
+ "rewards/rejected": -9.375,
1225
+ "step": 790
1226
+ },
1227
+ {
1228
+ "epoch": 1.9184652278177459,
1229
+ "grad_norm": 0.08616776985304095,
1230
+ "learning_rate": 2.0044444444444445e-07,
1231
+ "logits/chosen": -1.3046875,
1232
+ "logits/rejected": -1.6875,
1233
+ "logps/chosen": -235.0,
1234
+ "logps/rejected": -388.0,
1235
+ "loss": 0.0009,
1236
+ "rewards/accuracies": 1.0,
1237
+ "rewards/chosen": 0.68359375,
1238
+ "rewards/margins": 11.5625,
1239
+ "rewards/rejected": -10.875,
1240
+ "step": 800
1241
+ },
1242
+ {
1243
+ "epoch": 1.9424460431654675,
1244
+ "grad_norm": 0.11034132303896381,
1245
+ "learning_rate": 1.96e-07,
1246
+ "logits/chosen": -1.3515625,
1247
+ "logits/rejected": -1.7109375,
1248
+ "logps/chosen": -258.0,
1249
+ "logps/rejected": -374.0,
1250
+ "loss": 0.0052,
1251
+ "rewards/accuracies": 1.0,
1252
+ "rewards/chosen": 1.375,
1253
+ "rewards/margins": 11.9375,
1254
+ "rewards/rejected": -10.625,
1255
+ "step": 810
1256
+ },
1257
+ {
1258
+ "epoch": 1.9664268585131894,
1259
+ "grad_norm": 0.1125888676469132,
1260
+ "learning_rate": 1.9155555555555554e-07,
1261
+ "logits/chosen": -1.203125,
1262
+ "logits/rejected": -1.7109375,
1263
+ "logps/chosen": -264.0,
1264
+ "logps/rejected": -380.0,
1265
+ "loss": 0.006,
1266
+ "rewards/accuracies": 0.987500011920929,
1267
+ "rewards/chosen": 1.4921875,
1268
+ "rewards/margins": 12.1875,
1269
+ "rewards/rejected": -10.75,
1270
+ "step": 820
1271
+ },
1272
+ {
1273
+ "epoch": 1.9904076738609113,
1274
+ "grad_norm": 0.15670668500960064,
1275
+ "learning_rate": 1.871111111111111e-07,
1276
+ "logits/chosen": -1.296875,
1277
+ "logits/rejected": -1.6953125,
1278
+ "logps/chosen": -252.0,
1279
+ "logps/rejected": -392.0,
1280
+ "loss": 0.0101,
1281
+ "rewards/accuracies": 0.987500011920929,
1282
+ "rewards/chosen": 1.1640625,
1283
+ "rewards/margins": 11.0625,
1284
+ "rewards/rejected": -9.875,
1285
+ "step": 830
1286
+ },
1287
+ {
1288
+ "epoch": 2.0,
1289
+ "eval_logits/chosen": -1.2109375,
1290
+ "eval_logits/rejected": -1.6796875,
1291
+ "eval_logps/chosen": -227.0,
1292
+ "eval_logps/rejected": -376.0,
1293
+ "eval_loss": 0.11118045449256897,
1294
+ "eval_rewards/accuracies": 0.9583333134651184,
1295
+ "eval_rewards/chosen": 1.1171875,
1296
+ "eval_rewards/margins": 10.125,
1297
+ "eval_rewards/rejected": -9.0,
1298
+ "eval_runtime": 11.778,
1299
+ "eval_samples_per_second": 16.981,
1300
+ "eval_steps_per_second": 0.764,
1301
+ "step": 834
1302
  }
1303
  ],
1304
  "logging_steps": 10,