cat-searcher
commited on
Training in progress, epoch 16, checkpoint
Browse files- last-checkpoint/global_step3160/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step3160/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step3160/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step3160/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step3160/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step3160/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step3160/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step3160/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step3160/zero_pp_rank_0_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step3160/zero_pp_rank_1_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step3160/zero_pp_rank_2_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step3160/zero_pp_rank_3_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step3160/zero_pp_rank_4_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step3160/zero_pp_rank_5_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step3160/zero_pp_rank_6_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step3160/zero_pp_rank_7_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/model-00001-of-00002.safetensors +1 -1
- last-checkpoint/model-00002-of-00002.safetensors +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +302 -2
last-checkpoint/global_step3160/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1d7df19b094e998bc270861a0d9638f0e3d610b45c1c4337c8162eb04e57a7a2
|
3 |
+
size 2506176112
|
last-checkpoint/global_step3160/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cf4f42fbcb8b45a15c03f4a5762030763f610baf0a359e2053ef909a86b2da52
|
3 |
+
size 2506176112
|
last-checkpoint/global_step3160/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e5431b1dfbf113ec2b512798b731a06e79aa451e1e628f564f1c174e70383cf0
|
3 |
+
size 2506176112
|
last-checkpoint/global_step3160/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bcd6fd8f34c1c234e1aaf5f55d5ee1a115fe054ccf4ff54ec9e5103b0fea4111
|
3 |
+
size 2506176112
|
last-checkpoint/global_step3160/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:961712083c3a0c67f9e4587d014bed9fba8af072a557fd7f240099ba5c84a605
|
3 |
+
size 2506176112
|
last-checkpoint/global_step3160/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9a7d01cf37be4172699a0ed5323a6dedae08a4812420de3fd2d87a9b28a105ac
|
3 |
+
size 2506176112
|
last-checkpoint/global_step3160/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a297f1dcf25a2802f686a7e5996c451fcdfa7815bf25dca209979111be6e9905
|
3 |
+
size 2506176112
|
last-checkpoint/global_step3160/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3e23ce2caf12e5daef3555edec7095e39194e271cd73cf8627b8dd73442bff97
|
3 |
+
size 2506176112
|
last-checkpoint/global_step3160/zero_pp_rank_0_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6f86f35b2023250b5d6ea9bd4f9c3fb7c0dec434f1923a23a7d97bd8943f278c
|
3 |
+
size 85570
|
last-checkpoint/global_step3160/zero_pp_rank_1_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:258251d0c79eb809b3a76b0f0c4e8d8a3e52e7d17d3e25ea91ac600ed6049250
|
3 |
+
size 85506
|
last-checkpoint/global_step3160/zero_pp_rank_2_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c4d29af929bcdefba5c8d5f5442eb3ae4a94175eb9ecf16c8195ee6e32792bbc
|
3 |
+
size 85506
|
last-checkpoint/global_step3160/zero_pp_rank_3_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d8de8db06dd1ee1a2458a8854bd81ca99c2813a34e47f7931af130fa5c281883
|
3 |
+
size 85506
|
last-checkpoint/global_step3160/zero_pp_rank_4_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7fa05abfe28ad70750e32b024d5ca7ba52e1c23e238a1034eb37528600791b71
|
3 |
+
size 85506
|
last-checkpoint/global_step3160/zero_pp_rank_5_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:413272eae30b1ea540031b8affe42f6ba5ae9d57b787085a79e5eef1fd675e90
|
3 |
+
size 85506
|
last-checkpoint/global_step3160/zero_pp_rank_6_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d41768b98b57b45674684524f1007d4b3e71f713acded1c10019b50b89e73f72
|
3 |
+
size 85506
|
last-checkpoint/global_step3160/zero_pp_rank_7_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:22d9138e5b10b22ccd10a820052819783437ccea3bac4a71a39897156d294c71
|
3 |
+
size 85506
|
last-checkpoint/latest
CHANGED
@@ -1 +1 @@
|
|
1 |
-
|
|
|
1 |
+
global_step3160
|
last-checkpoint/model-00001-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4945242264
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d8e8cc757116e636d03d7d2362f38003ee7b34b00b1dae4f4914662ad92e7fad
|
3 |
size 4945242264
|
last-checkpoint/model-00002-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 67121608
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ba27efdeb5b44a8b8136905559c82e77a7f13309db0036dad3e99a470705fb98
|
3 |
size 67121608
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bb7c3bc1248de8b4739437317b988d953fd64a5de9736606d74f9c8277f1b485
|
3 |
size 15984
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b8e571d57a85eb2cdabf3f46c86e446bdb7d26aba8b1467b5e4b5bbe29ad42a7
|
3 |
size 15984
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:489e5542988617525a395c45dc83ec6bf25b473812e139122f0a3f3d92f031d0
|
3 |
size 15984
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0cd77682efb711872c5be25e87e87a2726a2e7105422cddd00f04da7be35ca20
|
3 |
size 15984
|
last-checkpoint/rng_state_4.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e44d9e7d535f5fbcd7cfef16ba22d32d5f445aacceba782a05df1f97d47a608a
|
3 |
size 15984
|
last-checkpoint/rng_state_5.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a107290a0d9898930bc6abe369ee246ef7322541985fc2a5320e7775f5ea5c88
|
3 |
size 15984
|
last-checkpoint/rng_state_6.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:88ab49d56ee4079c2a208376064f825918f070addc8f0c58c5c594265f9e8a78
|
3 |
size 15984
|
last-checkpoint/rng_state_7.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7d15033d06420b17d80db45c89544170faa67833d5a0d9c30a51a38a1102b073
|
3 |
size 15984
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7e02caff31fe06a664e85dd7b31b3300391f1a9f4f3b97aaaec945d54216a88e
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch":
|
5 |
"eval_steps": 100,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -4462,6 +4462,306 @@
|
|
4462 |
"rewards/margins": 0.49641647934913635,
|
4463 |
"rewards/rejected": -0.32356563210487366,
|
4464 |
"step": 2960
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4465 |
}
|
4466 |
],
|
4467 |
"logging_steps": 10,
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 16.0,
|
5 |
"eval_steps": 100,
|
6 |
+
"global_step": 3160,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
4462 |
"rewards/margins": 0.49641647934913635,
|
4463 |
"rewards/rejected": -0.32356563210487366,
|
4464 |
"step": 2960
|
4465 |
+
},
|
4466 |
+
{
|
4467 |
+
"epoch": 15.037974683544304,
|
4468 |
+
"grad_norm": 518700.7292410764,
|
4469 |
+
"learning_rate": 9.025383892196802e-08,
|
4470 |
+
"logits/chosen": 1.0516235828399658,
|
4471 |
+
"logits/rejected": 1.4486608505249023,
|
4472 |
+
"logps/chosen": -50.19924545288086,
|
4473 |
+
"logps/rejected": -568.3731689453125,
|
4474 |
+
"loss": 15371.2547,
|
4475 |
+
"rewards/accuracies": 1.0,
|
4476 |
+
"rewards/chosen": 0.1881760060787201,
|
4477 |
+
"rewards/margins": 0.5164635181427002,
|
4478 |
+
"rewards/rejected": -0.3282875716686249,
|
4479 |
+
"step": 2970
|
4480 |
+
},
|
4481 |
+
{
|
4482 |
+
"epoch": 15.08860759493671,
|
4483 |
+
"grad_norm": 331391.7564792058,
|
4484 |
+
"learning_rate": 8.868693199623942e-08,
|
4485 |
+
"logits/chosen": 2.2234063148498535,
|
4486 |
+
"logits/rejected": 2.0345654487609863,
|
4487 |
+
"logps/chosen": -52.14508819580078,
|
4488 |
+
"logps/rejected": -595.8091430664062,
|
4489 |
+
"loss": 14717.8656,
|
4490 |
+
"rewards/accuracies": 1.0,
|
4491 |
+
"rewards/chosen": 0.1904282122850418,
|
4492 |
+
"rewards/margins": 0.5425348877906799,
|
4493 |
+
"rewards/rejected": -0.3521067202091217,
|
4494 |
+
"step": 2980
|
4495 |
+
},
|
4496 |
+
{
|
4497 |
+
"epoch": 15.139240506329115,
|
4498 |
+
"grad_norm": 245591.75428222032,
|
4499 |
+
"learning_rate": 8.712002507051081e-08,
|
4500 |
+
"logits/chosen": -0.6622523069381714,
|
4501 |
+
"logits/rejected": -0.06956877559423447,
|
4502 |
+
"logps/chosen": -52.00910186767578,
|
4503 |
+
"logps/rejected": -563.0474853515625,
|
4504 |
+
"loss": 15161.7313,
|
4505 |
+
"rewards/accuracies": 0.9750000238418579,
|
4506 |
+
"rewards/chosen": 0.1884680539369583,
|
4507 |
+
"rewards/margins": 0.5108307003974915,
|
4508 |
+
"rewards/rejected": -0.32236260175704956,
|
4509 |
+
"step": 2990
|
4510 |
+
},
|
4511 |
+
{
|
4512 |
+
"epoch": 15.189873417721518,
|
4513 |
+
"grad_norm": 310549.6440256543,
|
4514 |
+
"learning_rate": 8.555311814478219e-08,
|
4515 |
+
"logits/chosen": 0.2366395890712738,
|
4516 |
+
"logits/rejected": 0.44344860315322876,
|
4517 |
+
"logps/chosen": -41.386192321777344,
|
4518 |
+
"logps/rejected": -572.7687377929688,
|
4519 |
+
"loss": 14740.5063,
|
4520 |
+
"rewards/accuracies": 0.987500011920929,
|
4521 |
+
"rewards/chosen": 0.1788499653339386,
|
4522 |
+
"rewards/margins": 0.5284099578857422,
|
4523 |
+
"rewards/rejected": -0.3495599925518036,
|
4524 |
+
"step": 3000
|
4525 |
+
},
|
4526 |
+
{
|
4527 |
+
"epoch": 15.240506329113924,
|
4528 |
+
"grad_norm": 306008.0109626414,
|
4529 |
+
"learning_rate": 8.398621121905358e-08,
|
4530 |
+
"logits/chosen": 0.007425785064697266,
|
4531 |
+
"logits/rejected": 0.6882709264755249,
|
4532 |
+
"logps/chosen": -61.54619598388672,
|
4533 |
+
"logps/rejected": -565.9954833984375,
|
4534 |
+
"loss": 14890.1531,
|
4535 |
+
"rewards/accuracies": 0.9375,
|
4536 |
+
"rewards/chosen": 0.18634898960590363,
|
4537 |
+
"rewards/margins": 0.5029118061065674,
|
4538 |
+
"rewards/rejected": -0.31656283140182495,
|
4539 |
+
"step": 3010
|
4540 |
+
},
|
4541 |
+
{
|
4542 |
+
"epoch": 15.291139240506329,
|
4543 |
+
"grad_norm": 542292.2583731171,
|
4544 |
+
"learning_rate": 8.241930429332496e-08,
|
4545 |
+
"logits/chosen": -1.8317344188690186,
|
4546 |
+
"logits/rejected": -1.2810354232788086,
|
4547 |
+
"logps/chosen": -55.94157791137695,
|
4548 |
+
"logps/rejected": -610.6949462890625,
|
4549 |
+
"loss": 14922.1328,
|
4550 |
+
"rewards/accuracies": 1.0,
|
4551 |
+
"rewards/chosen": 0.20198726654052734,
|
4552 |
+
"rewards/margins": 0.5547267198562622,
|
4553 |
+
"rewards/rejected": -0.3527393639087677,
|
4554 |
+
"step": 3020
|
4555 |
+
},
|
4556 |
+
{
|
4557 |
+
"epoch": 15.341772151898734,
|
4558 |
+
"grad_norm": 246111.44147055785,
|
4559 |
+
"learning_rate": 8.085239736759636e-08,
|
4560 |
+
"logits/chosen": 0.38201937079429626,
|
4561 |
+
"logits/rejected": 0.48218441009521484,
|
4562 |
+
"logps/chosen": -49.771148681640625,
|
4563 |
+
"logps/rejected": -579.5675048828125,
|
4564 |
+
"loss": 14315.8422,
|
4565 |
+
"rewards/accuracies": 0.9750000238418579,
|
4566 |
+
"rewards/chosen": 0.18888349831104279,
|
4567 |
+
"rewards/margins": 0.52850741147995,
|
4568 |
+
"rewards/rejected": -0.33962392807006836,
|
4569 |
+
"step": 3030
|
4570 |
+
},
|
4571 |
+
{
|
4572 |
+
"epoch": 15.39240506329114,
|
4573 |
+
"grad_norm": 365392.8501035466,
|
4574 |
+
"learning_rate": 7.928549044186775e-08,
|
4575 |
+
"logits/chosen": 0.2196371853351593,
|
4576 |
+
"logits/rejected": 0.5740281939506531,
|
4577 |
+
"logps/chosen": -37.870933532714844,
|
4578 |
+
"logps/rejected": -532.795166015625,
|
4579 |
+
"loss": 14228.8297,
|
4580 |
+
"rewards/accuracies": 1.0,
|
4581 |
+
"rewards/chosen": 0.17593248188495636,
|
4582 |
+
"rewards/margins": 0.4975932538509369,
|
4583 |
+
"rewards/rejected": -0.3216607868671417,
|
4584 |
+
"step": 3040
|
4585 |
+
},
|
4586 |
+
{
|
4587 |
+
"epoch": 15.443037974683545,
|
4588 |
+
"grad_norm": 601622.5104727764,
|
4589 |
+
"learning_rate": 7.771858351613913e-08,
|
4590 |
+
"logits/chosen": -0.6718970537185669,
|
4591 |
+
"logits/rejected": -0.666345477104187,
|
4592 |
+
"logps/chosen": -44.54059600830078,
|
4593 |
+
"logps/rejected": -578.719482421875,
|
4594 |
+
"loss": 15052.1406,
|
4595 |
+
"rewards/accuracies": 1.0,
|
4596 |
+
"rewards/chosen": 0.19072814285755157,
|
4597 |
+
"rewards/margins": 0.5325638055801392,
|
4598 |
+
"rewards/rejected": -0.3418356776237488,
|
4599 |
+
"step": 3050
|
4600 |
+
},
|
4601 |
+
{
|
4602 |
+
"epoch": 15.49367088607595,
|
4603 |
+
"grad_norm": 343253.0399713909,
|
4604 |
+
"learning_rate": 7.615167659041052e-08,
|
4605 |
+
"logits/chosen": -1.7298717498779297,
|
4606 |
+
"logits/rejected": -1.107236385345459,
|
4607 |
+
"logps/chosen": -48.916072845458984,
|
4608 |
+
"logps/rejected": -581.4259643554688,
|
4609 |
+
"loss": 15088.4312,
|
4610 |
+
"rewards/accuracies": 0.987500011920929,
|
4611 |
+
"rewards/chosen": 0.18928301334381104,
|
4612 |
+
"rewards/margins": 0.5350446701049805,
|
4613 |
+
"rewards/rejected": -0.34576165676116943,
|
4614 |
+
"step": 3060
|
4615 |
+
},
|
4616 |
+
{
|
4617 |
+
"epoch": 15.544303797468354,
|
4618 |
+
"grad_norm": 228770.67672990158,
|
4619 |
+
"learning_rate": 7.45847696646819e-08,
|
4620 |
+
"logits/chosen": 1.368043303489685,
|
4621 |
+
"logits/rejected": 2.1229677200317383,
|
4622 |
+
"logps/chosen": -49.823055267333984,
|
4623 |
+
"logps/rejected": -576.06103515625,
|
4624 |
+
"loss": 13555.7672,
|
4625 |
+
"rewards/accuracies": 0.9750000238418579,
|
4626 |
+
"rewards/chosen": 0.18895366787910461,
|
4627 |
+
"rewards/margins": 0.5293976664543152,
|
4628 |
+
"rewards/rejected": -0.34044402837753296,
|
4629 |
+
"step": 3070
|
4630 |
+
},
|
4631 |
+
{
|
4632 |
+
"epoch": 15.594936708860759,
|
4633 |
+
"grad_norm": 292818.2312129945,
|
4634 |
+
"learning_rate": 7.30178627389533e-08,
|
4635 |
+
"logits/chosen": -0.7066992521286011,
|
4636 |
+
"logits/rejected": 0.058099888265132904,
|
4637 |
+
"logps/chosen": -52.58687210083008,
|
4638 |
+
"logps/rejected": -577.005859375,
|
4639 |
+
"loss": 14893.6594,
|
4640 |
+
"rewards/accuracies": 0.9624999761581421,
|
4641 |
+
"rewards/chosen": 0.19148708879947662,
|
4642 |
+
"rewards/margins": 0.5295326113700867,
|
4643 |
+
"rewards/rejected": -0.33804553747177124,
|
4644 |
+
"step": 3080
|
4645 |
+
},
|
4646 |
+
{
|
4647 |
+
"epoch": 15.645569620253164,
|
4648 |
+
"grad_norm": 275063.1192623706,
|
4649 |
+
"learning_rate": 7.145095581322469e-08,
|
4650 |
+
"logits/chosen": 0.057862140238285065,
|
4651 |
+
"logits/rejected": -0.10827471315860748,
|
4652 |
+
"logps/chosen": -51.52691650390625,
|
4653 |
+
"logps/rejected": -598.4918212890625,
|
4654 |
+
"loss": 14740.6531,
|
4655 |
+
"rewards/accuracies": 0.987500011920929,
|
4656 |
+
"rewards/chosen": 0.1917671114206314,
|
4657 |
+
"rewards/margins": 0.5416404008865356,
|
4658 |
+
"rewards/rejected": -0.34987324476242065,
|
4659 |
+
"step": 3090
|
4660 |
+
},
|
4661 |
+
{
|
4662 |
+
"epoch": 15.69620253164557,
|
4663 |
+
"grad_norm": 270643.231235499,
|
4664 |
+
"learning_rate": 6.988404888749608e-08,
|
4665 |
+
"logits/chosen": 0.49672946333885193,
|
4666 |
+
"logits/rejected": 0.9934390187263489,
|
4667 |
+
"logps/chosen": -53.964393615722656,
|
4668 |
+
"logps/rejected": -592.7462158203125,
|
4669 |
+
"loss": 14747.2812,
|
4670 |
+
"rewards/accuracies": 0.987500011920929,
|
4671 |
+
"rewards/chosen": 0.19860555231571198,
|
4672 |
+
"rewards/margins": 0.5442546010017395,
|
4673 |
+
"rewards/rejected": -0.3456490635871887,
|
4674 |
+
"step": 3100
|
4675 |
+
},
|
4676 |
+
{
|
4677 |
+
"epoch": 15.746835443037975,
|
4678 |
+
"grad_norm": 366703.97931916115,
|
4679 |
+
"learning_rate": 6.831714196176746e-08,
|
4680 |
+
"logits/chosen": -1.272958517074585,
|
4681 |
+
"logits/rejected": -1.2677191495895386,
|
4682 |
+
"logps/chosen": -46.67731475830078,
|
4683 |
+
"logps/rejected": -578.444091796875,
|
4684 |
+
"loss": 14561.6719,
|
4685 |
+
"rewards/accuracies": 1.0,
|
4686 |
+
"rewards/chosen": 0.19132201373577118,
|
4687 |
+
"rewards/margins": 0.5392004251480103,
|
4688 |
+
"rewards/rejected": -0.3478783965110779,
|
4689 |
+
"step": 3110
|
4690 |
+
},
|
4691 |
+
{
|
4692 |
+
"epoch": 15.79746835443038,
|
4693 |
+
"grad_norm": 363431.4061189904,
|
4694 |
+
"learning_rate": 6.675023503603886e-08,
|
4695 |
+
"logits/chosen": -0.16689462959766388,
|
4696 |
+
"logits/rejected": 0.6665533781051636,
|
4697 |
+
"logps/chosen": -49.408546447753906,
|
4698 |
+
"logps/rejected": -587.0728759765625,
|
4699 |
+
"loss": 14602.2328,
|
4700 |
+
"rewards/accuracies": 1.0,
|
4701 |
+
"rewards/chosen": 0.1951448619365692,
|
4702 |
+
"rewards/margins": 0.538873553276062,
|
4703 |
+
"rewards/rejected": -0.3437287211418152,
|
4704 |
+
"step": 3120
|
4705 |
+
},
|
4706 |
+
{
|
4707 |
+
"epoch": 15.848101265822784,
|
4708 |
+
"grad_norm": 1925815.481070705,
|
4709 |
+
"learning_rate": 6.518332811031025e-08,
|
4710 |
+
"logits/chosen": -0.1888163536787033,
|
4711 |
+
"logits/rejected": -0.3901883661746979,
|
4712 |
+
"logps/chosen": -37.012611389160156,
|
4713 |
+
"logps/rejected": -553.5242919921875,
|
4714 |
+
"loss": 15093.5328,
|
4715 |
+
"rewards/accuracies": 0.987500011920929,
|
4716 |
+
"rewards/chosen": 0.18000957369804382,
|
4717 |
+
"rewards/margins": 0.5157765746116638,
|
4718 |
+
"rewards/rejected": -0.3357670307159424,
|
4719 |
+
"step": 3130
|
4720 |
+
},
|
4721 |
+
{
|
4722 |
+
"epoch": 15.89873417721519,
|
4723 |
+
"grad_norm": 406865.81368112064,
|
4724 |
+
"learning_rate": 6.361642118458163e-08,
|
4725 |
+
"logits/chosen": -1.0143232345581055,
|
4726 |
+
"logits/rejected": -1.1421440839767456,
|
4727 |
+
"logps/chosen": -39.294063568115234,
|
4728 |
+
"logps/rejected": -572.8070068359375,
|
4729 |
+
"loss": 15857.7219,
|
4730 |
+
"rewards/accuracies": 1.0,
|
4731 |
+
"rewards/chosen": 0.18329963088035583,
|
4732 |
+
"rewards/margins": 0.5344266891479492,
|
4733 |
+
"rewards/rejected": -0.351127028465271,
|
4734 |
+
"step": 3140
|
4735 |
+
},
|
4736 |
+
{
|
4737 |
+
"epoch": 15.949367088607595,
|
4738 |
+
"grad_norm": 283773.4922827141,
|
4739 |
+
"learning_rate": 6.204951425885302e-08,
|
4740 |
+
"logits/chosen": 0.45898357033729553,
|
4741 |
+
"logits/rejected": 1.1897245645523071,
|
4742 |
+
"logps/chosen": -47.45745086669922,
|
4743 |
+
"logps/rejected": -564.1045532226562,
|
4744 |
+
"loss": 15274.2656,
|
4745 |
+
"rewards/accuracies": 0.987500011920929,
|
4746 |
+
"rewards/chosen": 0.17995783686637878,
|
4747 |
+
"rewards/margins": 0.516915500164032,
|
4748 |
+
"rewards/rejected": -0.3369576930999756,
|
4749 |
+
"step": 3150
|
4750 |
+
},
|
4751 |
+
{
|
4752 |
+
"epoch": 16.0,
|
4753 |
+
"grad_norm": 338639.8303682123,
|
4754 |
+
"learning_rate": 6.04826073331244e-08,
|
4755 |
+
"logits/chosen": -1.1235512495040894,
|
4756 |
+
"logits/rejected": 0.0012889147037640214,
|
4757 |
+
"logps/chosen": -41.902889251708984,
|
4758 |
+
"logps/rejected": -569.4451293945312,
|
4759 |
+
"loss": 15055.2062,
|
4760 |
+
"rewards/accuracies": 1.0,
|
4761 |
+
"rewards/chosen": 0.18416796624660492,
|
4762 |
+
"rewards/margins": 0.5268322825431824,
|
4763 |
+
"rewards/rejected": -0.34266436100006104,
|
4764 |
+
"step": 3160
|
4765 |
}
|
4766 |
],
|
4767 |
"logging_steps": 10,
|