cat-searcher commited on
Commit
0990fc5
·
verified ·
1 Parent(s): bfad752

Training in progress, epoch 16, checkpoint

Browse files
Files changed (29) hide show
  1. last-checkpoint/global_step3160/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
  2. last-checkpoint/global_step3160/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
  3. last-checkpoint/global_step3160/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
  4. last-checkpoint/global_step3160/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
  5. last-checkpoint/global_step3160/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +3 -0
  6. last-checkpoint/global_step3160/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +3 -0
  7. last-checkpoint/global_step3160/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +3 -0
  8. last-checkpoint/global_step3160/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +3 -0
  9. last-checkpoint/global_step3160/zero_pp_rank_0_mp_rank_00_model_states.pt +3 -0
  10. last-checkpoint/global_step3160/zero_pp_rank_1_mp_rank_00_model_states.pt +3 -0
  11. last-checkpoint/global_step3160/zero_pp_rank_2_mp_rank_00_model_states.pt +3 -0
  12. last-checkpoint/global_step3160/zero_pp_rank_3_mp_rank_00_model_states.pt +3 -0
  13. last-checkpoint/global_step3160/zero_pp_rank_4_mp_rank_00_model_states.pt +3 -0
  14. last-checkpoint/global_step3160/zero_pp_rank_5_mp_rank_00_model_states.pt +3 -0
  15. last-checkpoint/global_step3160/zero_pp_rank_6_mp_rank_00_model_states.pt +3 -0
  16. last-checkpoint/global_step3160/zero_pp_rank_7_mp_rank_00_model_states.pt +3 -0
  17. last-checkpoint/latest +1 -1
  18. last-checkpoint/model-00001-of-00002.safetensors +1 -1
  19. last-checkpoint/model-00002-of-00002.safetensors +1 -1
  20. last-checkpoint/rng_state_0.pth +1 -1
  21. last-checkpoint/rng_state_1.pth +1 -1
  22. last-checkpoint/rng_state_2.pth +1 -1
  23. last-checkpoint/rng_state_3.pth +1 -1
  24. last-checkpoint/rng_state_4.pth +1 -1
  25. last-checkpoint/rng_state_5.pth +1 -1
  26. last-checkpoint/rng_state_6.pth +1 -1
  27. last-checkpoint/rng_state_7.pth +1 -1
  28. last-checkpoint/scheduler.pt +1 -1
  29. last-checkpoint/trainer_state.json +302 -2
last-checkpoint/global_step3160/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d7df19b094e998bc270861a0d9638f0e3d610b45c1c4337c8162eb04e57a7a2
3
+ size 2506176112
last-checkpoint/global_step3160/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf4f42fbcb8b45a15c03f4a5762030763f610baf0a359e2053ef909a86b2da52
3
+ size 2506176112
last-checkpoint/global_step3160/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e5431b1dfbf113ec2b512798b731a06e79aa451e1e628f564f1c174e70383cf0
3
+ size 2506176112
last-checkpoint/global_step3160/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bcd6fd8f34c1c234e1aaf5f55d5ee1a115fe054ccf4ff54ec9e5103b0fea4111
3
+ size 2506176112
last-checkpoint/global_step3160/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:961712083c3a0c67f9e4587d014bed9fba8af072a557fd7f240099ba5c84a605
3
+ size 2506176112
last-checkpoint/global_step3160/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a7d01cf37be4172699a0ed5323a6dedae08a4812420de3fd2d87a9b28a105ac
3
+ size 2506176112
last-checkpoint/global_step3160/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a297f1dcf25a2802f686a7e5996c451fcdfa7815bf25dca209979111be6e9905
3
+ size 2506176112
last-checkpoint/global_step3160/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e23ce2caf12e5daef3555edec7095e39194e271cd73cf8627b8dd73442bff97
3
+ size 2506176112
last-checkpoint/global_step3160/zero_pp_rank_0_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6f86f35b2023250b5d6ea9bd4f9c3fb7c0dec434f1923a23a7d97bd8943f278c
3
+ size 85570
last-checkpoint/global_step3160/zero_pp_rank_1_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:258251d0c79eb809b3a76b0f0c4e8d8a3e52e7d17d3e25ea91ac600ed6049250
3
+ size 85506
last-checkpoint/global_step3160/zero_pp_rank_2_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c4d29af929bcdefba5c8d5f5442eb3ae4a94175eb9ecf16c8195ee6e32792bbc
3
+ size 85506
last-checkpoint/global_step3160/zero_pp_rank_3_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d8de8db06dd1ee1a2458a8854bd81ca99c2813a34e47f7931af130fa5c281883
3
+ size 85506
last-checkpoint/global_step3160/zero_pp_rank_4_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7fa05abfe28ad70750e32b024d5ca7ba52e1c23e238a1034eb37528600791b71
3
+ size 85506
last-checkpoint/global_step3160/zero_pp_rank_5_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:413272eae30b1ea540031b8affe42f6ba5ae9d57b787085a79e5eef1fd675e90
3
+ size 85506
last-checkpoint/global_step3160/zero_pp_rank_6_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d41768b98b57b45674684524f1007d4b3e71f713acded1c10019b50b89e73f72
3
+ size 85506
last-checkpoint/global_step3160/zero_pp_rank_7_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:22d9138e5b10b22ccd10a820052819783437ccea3bac4a71a39897156d294c71
3
+ size 85506
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step2962
 
1
+ global_step3160
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d6ab34130d90aebe29868ecf9b47a15403e74bd3aa5e09f06dc3ea9032f8089b
3
  size 4945242264
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d8e8cc757116e636d03d7d2362f38003ee7b34b00b1dae4f4914662ad92e7fad
3
  size 4945242264
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3cf91018b2355dca95814934115beac2e49e42607748ab6a28986a106363bbcd
3
  size 67121608
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba27efdeb5b44a8b8136905559c82e77a7f13309db0036dad3e99a470705fb98
3
  size 67121608
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:07d994b317c4df888a1a1aabc0c532e81f1fa34c18c8313cb2feadca3bb37194
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb7c3bc1248de8b4739437317b988d953fd64a5de9736606d74f9c8277f1b485
3
  size 15984
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4f5b05860618aa49c7f5d8c366d6ee73cf8b3b0d0adc17d9313b72621630d0aa
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b8e571d57a85eb2cdabf3f46c86e446bdb7d26aba8b1467b5e4b5bbe29ad42a7
3
  size 15984
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7262faf861e984775b4fd85bc76a11b0b8b04037690e8a08a58cf9ff5328a042
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:489e5542988617525a395c45dc83ec6bf25b473812e139122f0a3f3d92f031d0
3
  size 15984
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9479cad91150e2e266d17eb95fe678579a770f6df6b53496cf72067b186b094d
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0cd77682efb711872c5be25e87e87a2726a2e7105422cddd00f04da7be35ca20
3
  size 15984
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:435cb6cf559e0ce3fe0d4582cac16ea40b48b7a64589952402a4c399cafbfc00
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e44d9e7d535f5fbcd7cfef16ba22d32d5f445aacceba782a05df1f97d47a608a
3
  size 15984
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f51001b0d8dc5792180c3a9705ccbfa66b61d46d7639afb6f7abf409629ed74f
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a107290a0d9898930bc6abe369ee246ef7322541985fc2a5320e7775f5ea5c88
3
  size 15984
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e1e87084f11088fdce293e1fbbb05e35f5c7385b00e2f9ba195bf61cb36f757d
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:88ab49d56ee4079c2a208376064f825918f070addc8f0c58c5c594265f9e8a78
3
  size 15984
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3d32e9bdd65145ae509e6c6ef4f6ea9d842f94a34c34a0d7d2ab6c248d3f2121
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d15033d06420b17d80db45c89544170faa67833d5a0d9c30a51a38a1102b073
3
  size 15984
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3ef29c4eabe559fffbf188b61164c94ef6c3807ccd683770ebd49ca46d0f6823
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7e02caff31fe06a664e85dd7b31b3300391f1a9f4f3b97aaaec945d54216a88e
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 14.99746835443038,
5
  "eval_steps": 100,
6
- "global_step": 2962,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -4462,6 +4462,306 @@
4462
  "rewards/margins": 0.49641647934913635,
4463
  "rewards/rejected": -0.32356563210487366,
4464
  "step": 2960
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4465
  }
4466
  ],
4467
  "logging_steps": 10,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 16.0,
5
  "eval_steps": 100,
6
+ "global_step": 3160,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
4462
  "rewards/margins": 0.49641647934913635,
4463
  "rewards/rejected": -0.32356563210487366,
4464
  "step": 2960
4465
+ },
4466
+ {
4467
+ "epoch": 15.037974683544304,
4468
+ "grad_norm": 518700.7292410764,
4469
+ "learning_rate": 9.025383892196802e-08,
4470
+ "logits/chosen": 1.0516235828399658,
4471
+ "logits/rejected": 1.4486608505249023,
4472
+ "logps/chosen": -50.19924545288086,
4473
+ "logps/rejected": -568.3731689453125,
4474
+ "loss": 15371.2547,
4475
+ "rewards/accuracies": 1.0,
4476
+ "rewards/chosen": 0.1881760060787201,
4477
+ "rewards/margins": 0.5164635181427002,
4478
+ "rewards/rejected": -0.3282875716686249,
4479
+ "step": 2970
4480
+ },
4481
+ {
4482
+ "epoch": 15.08860759493671,
4483
+ "grad_norm": 331391.7564792058,
4484
+ "learning_rate": 8.868693199623942e-08,
4485
+ "logits/chosen": 2.2234063148498535,
4486
+ "logits/rejected": 2.0345654487609863,
4487
+ "logps/chosen": -52.14508819580078,
4488
+ "logps/rejected": -595.8091430664062,
4489
+ "loss": 14717.8656,
4490
+ "rewards/accuracies": 1.0,
4491
+ "rewards/chosen": 0.1904282122850418,
4492
+ "rewards/margins": 0.5425348877906799,
4493
+ "rewards/rejected": -0.3521067202091217,
4494
+ "step": 2980
4495
+ },
4496
+ {
4497
+ "epoch": 15.139240506329115,
4498
+ "grad_norm": 245591.75428222032,
4499
+ "learning_rate": 8.712002507051081e-08,
4500
+ "logits/chosen": -0.6622523069381714,
4501
+ "logits/rejected": -0.06956877559423447,
4502
+ "logps/chosen": -52.00910186767578,
4503
+ "logps/rejected": -563.0474853515625,
4504
+ "loss": 15161.7313,
4505
+ "rewards/accuracies": 0.9750000238418579,
4506
+ "rewards/chosen": 0.1884680539369583,
4507
+ "rewards/margins": 0.5108307003974915,
4508
+ "rewards/rejected": -0.32236260175704956,
4509
+ "step": 2990
4510
+ },
4511
+ {
4512
+ "epoch": 15.189873417721518,
4513
+ "grad_norm": 310549.6440256543,
4514
+ "learning_rate": 8.555311814478219e-08,
4515
+ "logits/chosen": 0.2366395890712738,
4516
+ "logits/rejected": 0.44344860315322876,
4517
+ "logps/chosen": -41.386192321777344,
4518
+ "logps/rejected": -572.7687377929688,
4519
+ "loss": 14740.5063,
4520
+ "rewards/accuracies": 0.987500011920929,
4521
+ "rewards/chosen": 0.1788499653339386,
4522
+ "rewards/margins": 0.5284099578857422,
4523
+ "rewards/rejected": -0.3495599925518036,
4524
+ "step": 3000
4525
+ },
4526
+ {
4527
+ "epoch": 15.240506329113924,
4528
+ "grad_norm": 306008.0109626414,
4529
+ "learning_rate": 8.398621121905358e-08,
4530
+ "logits/chosen": 0.007425785064697266,
4531
+ "logits/rejected": 0.6882709264755249,
4532
+ "logps/chosen": -61.54619598388672,
4533
+ "logps/rejected": -565.9954833984375,
4534
+ "loss": 14890.1531,
4535
+ "rewards/accuracies": 0.9375,
4536
+ "rewards/chosen": 0.18634898960590363,
4537
+ "rewards/margins": 0.5029118061065674,
4538
+ "rewards/rejected": -0.31656283140182495,
4539
+ "step": 3010
4540
+ },
4541
+ {
4542
+ "epoch": 15.291139240506329,
4543
+ "grad_norm": 542292.2583731171,
4544
+ "learning_rate": 8.241930429332496e-08,
4545
+ "logits/chosen": -1.8317344188690186,
4546
+ "logits/rejected": -1.2810354232788086,
4547
+ "logps/chosen": -55.94157791137695,
4548
+ "logps/rejected": -610.6949462890625,
4549
+ "loss": 14922.1328,
4550
+ "rewards/accuracies": 1.0,
4551
+ "rewards/chosen": 0.20198726654052734,
4552
+ "rewards/margins": 0.5547267198562622,
4553
+ "rewards/rejected": -0.3527393639087677,
4554
+ "step": 3020
4555
+ },
4556
+ {
4557
+ "epoch": 15.341772151898734,
4558
+ "grad_norm": 246111.44147055785,
4559
+ "learning_rate": 8.085239736759636e-08,
4560
+ "logits/chosen": 0.38201937079429626,
4561
+ "logits/rejected": 0.48218441009521484,
4562
+ "logps/chosen": -49.771148681640625,
4563
+ "logps/rejected": -579.5675048828125,
4564
+ "loss": 14315.8422,
4565
+ "rewards/accuracies": 0.9750000238418579,
4566
+ "rewards/chosen": 0.18888349831104279,
4567
+ "rewards/margins": 0.52850741147995,
4568
+ "rewards/rejected": -0.33962392807006836,
4569
+ "step": 3030
4570
+ },
4571
+ {
4572
+ "epoch": 15.39240506329114,
4573
+ "grad_norm": 365392.8501035466,
4574
+ "learning_rate": 7.928549044186775e-08,
4575
+ "logits/chosen": 0.2196371853351593,
4576
+ "logits/rejected": 0.5740281939506531,
4577
+ "logps/chosen": -37.870933532714844,
4578
+ "logps/rejected": -532.795166015625,
4579
+ "loss": 14228.8297,
4580
+ "rewards/accuracies": 1.0,
4581
+ "rewards/chosen": 0.17593248188495636,
4582
+ "rewards/margins": 0.4975932538509369,
4583
+ "rewards/rejected": -0.3216607868671417,
4584
+ "step": 3040
4585
+ },
4586
+ {
4587
+ "epoch": 15.443037974683545,
4588
+ "grad_norm": 601622.5104727764,
4589
+ "learning_rate": 7.771858351613913e-08,
4590
+ "logits/chosen": -0.6718970537185669,
4591
+ "logits/rejected": -0.666345477104187,
4592
+ "logps/chosen": -44.54059600830078,
4593
+ "logps/rejected": -578.719482421875,
4594
+ "loss": 15052.1406,
4595
+ "rewards/accuracies": 1.0,
4596
+ "rewards/chosen": 0.19072814285755157,
4597
+ "rewards/margins": 0.5325638055801392,
4598
+ "rewards/rejected": -0.3418356776237488,
4599
+ "step": 3050
4600
+ },
4601
+ {
4602
+ "epoch": 15.49367088607595,
4603
+ "grad_norm": 343253.0399713909,
4604
+ "learning_rate": 7.615167659041052e-08,
4605
+ "logits/chosen": -1.7298717498779297,
4606
+ "logits/rejected": -1.107236385345459,
4607
+ "logps/chosen": -48.916072845458984,
4608
+ "logps/rejected": -581.4259643554688,
4609
+ "loss": 15088.4312,
4610
+ "rewards/accuracies": 0.987500011920929,
4611
+ "rewards/chosen": 0.18928301334381104,
4612
+ "rewards/margins": 0.5350446701049805,
4613
+ "rewards/rejected": -0.34576165676116943,
4614
+ "step": 3060
4615
+ },
4616
+ {
4617
+ "epoch": 15.544303797468354,
4618
+ "grad_norm": 228770.67672990158,
4619
+ "learning_rate": 7.45847696646819e-08,
4620
+ "logits/chosen": 1.368043303489685,
4621
+ "logits/rejected": 2.1229677200317383,
4622
+ "logps/chosen": -49.823055267333984,
4623
+ "logps/rejected": -576.06103515625,
4624
+ "loss": 13555.7672,
4625
+ "rewards/accuracies": 0.9750000238418579,
4626
+ "rewards/chosen": 0.18895366787910461,
4627
+ "rewards/margins": 0.5293976664543152,
4628
+ "rewards/rejected": -0.34044402837753296,
4629
+ "step": 3070
4630
+ },
4631
+ {
4632
+ "epoch": 15.594936708860759,
4633
+ "grad_norm": 292818.2312129945,
4634
+ "learning_rate": 7.30178627389533e-08,
4635
+ "logits/chosen": -0.7066992521286011,
4636
+ "logits/rejected": 0.058099888265132904,
4637
+ "logps/chosen": -52.58687210083008,
4638
+ "logps/rejected": -577.005859375,
4639
+ "loss": 14893.6594,
4640
+ "rewards/accuracies": 0.9624999761581421,
4641
+ "rewards/chosen": 0.19148708879947662,
4642
+ "rewards/margins": 0.5295326113700867,
4643
+ "rewards/rejected": -0.33804553747177124,
4644
+ "step": 3080
4645
+ },
4646
+ {
4647
+ "epoch": 15.645569620253164,
4648
+ "grad_norm": 275063.1192623706,
4649
+ "learning_rate": 7.145095581322469e-08,
4650
+ "logits/chosen": 0.057862140238285065,
4651
+ "logits/rejected": -0.10827471315860748,
4652
+ "logps/chosen": -51.52691650390625,
4653
+ "logps/rejected": -598.4918212890625,
4654
+ "loss": 14740.6531,
4655
+ "rewards/accuracies": 0.987500011920929,
4656
+ "rewards/chosen": 0.1917671114206314,
4657
+ "rewards/margins": 0.5416404008865356,
4658
+ "rewards/rejected": -0.34987324476242065,
4659
+ "step": 3090
4660
+ },
4661
+ {
4662
+ "epoch": 15.69620253164557,
4663
+ "grad_norm": 270643.231235499,
4664
+ "learning_rate": 6.988404888749608e-08,
4665
+ "logits/chosen": 0.49672946333885193,
4666
+ "logits/rejected": 0.9934390187263489,
4667
+ "logps/chosen": -53.964393615722656,
4668
+ "logps/rejected": -592.7462158203125,
4669
+ "loss": 14747.2812,
4670
+ "rewards/accuracies": 0.987500011920929,
4671
+ "rewards/chosen": 0.19860555231571198,
4672
+ "rewards/margins": 0.5442546010017395,
4673
+ "rewards/rejected": -0.3456490635871887,
4674
+ "step": 3100
4675
+ },
4676
+ {
4677
+ "epoch": 15.746835443037975,
4678
+ "grad_norm": 366703.97931916115,
4679
+ "learning_rate": 6.831714196176746e-08,
4680
+ "logits/chosen": -1.272958517074585,
4681
+ "logits/rejected": -1.2677191495895386,
4682
+ "logps/chosen": -46.67731475830078,
4683
+ "logps/rejected": -578.444091796875,
4684
+ "loss": 14561.6719,
4685
+ "rewards/accuracies": 1.0,
4686
+ "rewards/chosen": 0.19132201373577118,
4687
+ "rewards/margins": 0.5392004251480103,
4688
+ "rewards/rejected": -0.3478783965110779,
4689
+ "step": 3110
4690
+ },
4691
+ {
4692
+ "epoch": 15.79746835443038,
4693
+ "grad_norm": 363431.4061189904,
4694
+ "learning_rate": 6.675023503603886e-08,
4695
+ "logits/chosen": -0.16689462959766388,
4696
+ "logits/rejected": 0.6665533781051636,
4697
+ "logps/chosen": -49.408546447753906,
4698
+ "logps/rejected": -587.0728759765625,
4699
+ "loss": 14602.2328,
4700
+ "rewards/accuracies": 1.0,
4701
+ "rewards/chosen": 0.1951448619365692,
4702
+ "rewards/margins": 0.538873553276062,
4703
+ "rewards/rejected": -0.3437287211418152,
4704
+ "step": 3120
4705
+ },
4706
+ {
4707
+ "epoch": 15.848101265822784,
4708
+ "grad_norm": 1925815.481070705,
4709
+ "learning_rate": 6.518332811031025e-08,
4710
+ "logits/chosen": -0.1888163536787033,
4711
+ "logits/rejected": -0.3901883661746979,
4712
+ "logps/chosen": -37.012611389160156,
4713
+ "logps/rejected": -553.5242919921875,
4714
+ "loss": 15093.5328,
4715
+ "rewards/accuracies": 0.987500011920929,
4716
+ "rewards/chosen": 0.18000957369804382,
4717
+ "rewards/margins": 0.5157765746116638,
4718
+ "rewards/rejected": -0.3357670307159424,
4719
+ "step": 3130
4720
+ },
4721
+ {
4722
+ "epoch": 15.89873417721519,
4723
+ "grad_norm": 406865.81368112064,
4724
+ "learning_rate": 6.361642118458163e-08,
4725
+ "logits/chosen": -1.0143232345581055,
4726
+ "logits/rejected": -1.1421440839767456,
4727
+ "logps/chosen": -39.294063568115234,
4728
+ "logps/rejected": -572.8070068359375,
4729
+ "loss": 15857.7219,
4730
+ "rewards/accuracies": 1.0,
4731
+ "rewards/chosen": 0.18329963088035583,
4732
+ "rewards/margins": 0.5344266891479492,
4733
+ "rewards/rejected": -0.351127028465271,
4734
+ "step": 3140
4735
+ },
4736
+ {
4737
+ "epoch": 15.949367088607595,
4738
+ "grad_norm": 283773.4922827141,
4739
+ "learning_rate": 6.204951425885302e-08,
4740
+ "logits/chosen": 0.45898357033729553,
4741
+ "logits/rejected": 1.1897245645523071,
4742
+ "logps/chosen": -47.45745086669922,
4743
+ "logps/rejected": -564.1045532226562,
4744
+ "loss": 15274.2656,
4745
+ "rewards/accuracies": 0.987500011920929,
4746
+ "rewards/chosen": 0.17995783686637878,
4747
+ "rewards/margins": 0.516915500164032,
4748
+ "rewards/rejected": -0.3369576930999756,
4749
+ "step": 3150
4750
+ },
4751
+ {
4752
+ "epoch": 16.0,
4753
+ "grad_norm": 338639.8303682123,
4754
+ "learning_rate": 6.04826073331244e-08,
4755
+ "logits/chosen": -1.1235512495040894,
4756
+ "logits/rejected": 0.0012889147037640214,
4757
+ "logps/chosen": -41.902889251708984,
4758
+ "logps/rejected": -569.4451293945312,
4759
+ "loss": 15055.2062,
4760
+ "rewards/accuracies": 1.0,
4761
+ "rewards/chosen": 0.18416796624660492,
4762
+ "rewards/margins": 0.5268322825431824,
4763
+ "rewards/rejected": -0.34266436100006104,
4764
+ "step": 3160
4765
  }
4766
  ],
4767
  "logging_steps": 10,