cat-searcher
commited on
Training in progress, epoch 4, checkpoint
Browse files- last-checkpoint/global_step987/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step987/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step987/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step987/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step987/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step987/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step987/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step987/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step987/zero_pp_rank_0_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step987/zero_pp_rank_1_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step987/zero_pp_rank_2_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step987/zero_pp_rank_3_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step987/zero_pp_rank_4_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step987/zero_pp_rank_5_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step987/zero_pp_rank_6_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step987/zero_pp_rank_7_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/model-00001-of-00002.safetensors +1 -1
- last-checkpoint/model-00002-of-00002.safetensors +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +287 -2
last-checkpoint/global_step987/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e08ed8f9a3bcbcb2c4d63ac9315322a9c67d1d1853045d6a735fd71555098bc2
|
3 |
+
size 2506176112
|
last-checkpoint/global_step987/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f9475412bd1ce86508201c127efc3e0edee6633a49299c81896146b3652959bd
|
3 |
+
size 2506176112
|
last-checkpoint/global_step987/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a7f9e8477484da2f362a8d538703ae02607946193817e058365b45cf019a6b53
|
3 |
+
size 2506176112
|
last-checkpoint/global_step987/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d63379f76bfad20b1d84d0afcc1f2f2688a7af70f41e4453e65183454d9e9368
|
3 |
+
size 2506176112
|
last-checkpoint/global_step987/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d30dacf4855b30b73cc83e700ae2748a85a94ca8628a2feb75b206cbe909ffec
|
3 |
+
size 2506176112
|
last-checkpoint/global_step987/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b715c7992af463ed7ef9f47ae64199b59d1fb3a71c90915154004d4eab71ad4d
|
3 |
+
size 2506176112
|
last-checkpoint/global_step987/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1ddce83e72ba4d49bc6bc065f2030345a82a5667f8dae4ec6fc90eb324d3f214
|
3 |
+
size 2506176112
|
last-checkpoint/global_step987/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2df8414c72cbc2967c0c40f2215e0bd2917efa93ede3063ed7fe54f4bcf0306e
|
3 |
+
size 2506176112
|
last-checkpoint/global_step987/zero_pp_rank_0_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9c7d49adf53cb4919d277113e5d2286f11daf9bfd0c5fc27770ad51337147826
|
3 |
+
size 85570
|
last-checkpoint/global_step987/zero_pp_rank_1_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:523fb792a177804d3a89c3d960a8b61cb85d786b8588ca6eb40850c88b463f0d
|
3 |
+
size 85506
|
last-checkpoint/global_step987/zero_pp_rank_2_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:db467bc7ac5a73bdda871ba0cf84b28bfbe8ffb532017690ee42e00cf9027801
|
3 |
+
size 85506
|
last-checkpoint/global_step987/zero_pp_rank_3_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:882f62fdd9c0fa0cb53f73107ad9dc72d0756d30a94587750f2dc29a67024c77
|
3 |
+
size 85506
|
last-checkpoint/global_step987/zero_pp_rank_4_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:aeff6161357c3bf99eecd7fe3dad1ef8c5a95c24e2da1d2ad819e9e713e5cc38
|
3 |
+
size 85506
|
last-checkpoint/global_step987/zero_pp_rank_5_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d772a1419efe720b7a7fdd266fcc1365156f5adebf06de57810e9a7b0f603d16
|
3 |
+
size 85506
|
last-checkpoint/global_step987/zero_pp_rank_6_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8ed060b90554fd9e0930346a0c2ae5814d8ca9d73b074ca36b52a358dd056d81
|
3 |
+
size 85506
|
last-checkpoint/global_step987/zero_pp_rank_7_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d08f7be62362f05f62bcbbfde5442c6e140ae6a3be863c7db3bb03c2dcf1686e
|
3 |
+
size 85506
|
last-checkpoint/latest
CHANGED
@@ -1 +1 @@
|
|
1 |
-
|
|
|
1 |
+
global_step987
|
last-checkpoint/model-00001-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4945242264
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a9d6347bbdfb78d6d728cf68948d2c89598dfbfde2c1c992084431e44430796d
|
3 |
size 4945242264
|
last-checkpoint/model-00002-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 67121608
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cbb9d1038339e10330e9562076f77aac42d42c8f7c5245bf246911f8ffc69ef0
|
3 |
size 67121608
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5a0ef6f96a48e59aa52c4b471312c2a62378c19acc7ebbae839612b03a7d775a
|
3 |
size 15984
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ab11d533c0fdad46ea8b8e295ba5fdb705e078eeb88cc28f37d82913508766e9
|
3 |
size 15984
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:615c168147e3465ce5bfab6da2ff4afc68566ce00ec0f0c6c9fc988038a58d0a
|
3 |
size 15984
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:79f71e8f8674ecaef9f8cdcbf7ac457a8b8ff15b12694ba2a2fffcb4b43f0f08
|
3 |
size 15984
|
last-checkpoint/rng_state_4.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:88cf6d674dab5545c300a55135f08ca935730a3d35e2c419fb0b333f19482c19
|
3 |
size 15984
|
last-checkpoint/rng_state_5.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2754f2cd8824702f027870d93748b3c0491b0ecd30f1e3d8e937116b2be6151f
|
3 |
size 15984
|
last-checkpoint/rng_state_6.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1385124ac55604598f45ea6e2d141f29456647d3e7c10d12ca64ec93d312be8d
|
3 |
size 15984
|
last-checkpoint/rng_state_7.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:416538efaec7391fa8fe782fb15146b83e5612d9e1961292c34c53e964806873
|
3 |
size 15984
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ebe1f41c97c016e1df7ebf5446401ec464be377a52a8190323220b8692dc187a
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 4.
|
5 |
"eval_steps": 100,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -1207,6 +1207,291 @@
|
|
1207 |
"rewards/margins": 0.1435452550649643,
|
1208 |
"rewards/rejected": -0.11581633985042572,
|
1209 |
"step": 790
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1210 |
}
|
1211 |
],
|
1212 |
"logging_steps": 10,
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 4.99746835443038,
|
5 |
"eval_steps": 100,
|
6 |
+
"global_step": 987,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
1207 |
"rewards/margins": 0.1435452550649643,
|
1208 |
"rewards/rejected": -0.11581633985042572,
|
1209 |
"step": 790
|
1210 |
+
},
|
1211 |
+
{
|
1212 |
+
"epoch": 4.050632911392405,
|
1213 |
+
"grad_norm": 1782853.8797277175,
|
1214 |
+
"learning_rate": 4.3027264180507676e-07,
|
1215 |
+
"logits/chosen": -8.29829216003418,
|
1216 |
+
"logits/rejected": -8.205643653869629,
|
1217 |
+
"logps/chosen": -178.8797149658203,
|
1218 |
+
"logps/rejected": -378.06121826171875,
|
1219 |
+
"loss": 69143.425,
|
1220 |
+
"rewards/accuracies": 0.9375,
|
1221 |
+
"rewards/chosen": 0.05098045617341995,
|
1222 |
+
"rewards/margins": 0.1993386447429657,
|
1223 |
+
"rewards/rejected": -0.14835818111896515,
|
1224 |
+
"step": 800
|
1225 |
+
},
|
1226 |
+
{
|
1227 |
+
"epoch": 4.10126582278481,
|
1228 |
+
"grad_norm": 1719472.9461235409,
|
1229 |
+
"learning_rate": 4.287057348793481e-07,
|
1230 |
+
"logits/chosen": -7.558290958404541,
|
1231 |
+
"logits/rejected": -7.646592617034912,
|
1232 |
+
"logps/chosen": -186.36911010742188,
|
1233 |
+
"logps/rejected": -386.6961975097656,
|
1234 |
+
"loss": 67634.3375,
|
1235 |
+
"rewards/accuracies": 0.949999988079071,
|
1236 |
+
"rewards/chosen": 0.04189852252602577,
|
1237 |
+
"rewards/margins": 0.19968575239181519,
|
1238 |
+
"rewards/rejected": -0.1577872335910797,
|
1239 |
+
"step": 810
|
1240 |
+
},
|
1241 |
+
{
|
1242 |
+
"epoch": 4.151898734177215,
|
1243 |
+
"grad_norm": 1571399.8942716653,
|
1244 |
+
"learning_rate": 4.2713882795361953e-07,
|
1245 |
+
"logits/chosen": -7.811161994934082,
|
1246 |
+
"logits/rejected": -7.783130645751953,
|
1247 |
+
"logps/chosen": -181.81602478027344,
|
1248 |
+
"logps/rejected": -402.1683654785156,
|
1249 |
+
"loss": 66806.9187,
|
1250 |
+
"rewards/accuracies": 0.9624999761581421,
|
1251 |
+
"rewards/chosen": 0.049001529812812805,
|
1252 |
+
"rewards/margins": 0.21849961578845978,
|
1253 |
+
"rewards/rejected": -0.16949808597564697,
|
1254 |
+
"step": 820
|
1255 |
+
},
|
1256 |
+
{
|
1257 |
+
"epoch": 4.2025316455696204,
|
1258 |
+
"grad_norm": 1992030.3917670588,
|
1259 |
+
"learning_rate": 4.255719210278909e-07,
|
1260 |
+
"logits/chosen": -7.349759101867676,
|
1261 |
+
"logits/rejected": -7.380797386169434,
|
1262 |
+
"logps/chosen": -175.21702575683594,
|
1263 |
+
"logps/rejected": -396.2167053222656,
|
1264 |
+
"loss": 67021.875,
|
1265 |
+
"rewards/accuracies": 0.9624999761581421,
|
1266 |
+
"rewards/chosen": 0.05283821374177933,
|
1267 |
+
"rewards/margins": 0.22190704941749573,
|
1268 |
+
"rewards/rejected": -0.169068843126297,
|
1269 |
+
"step": 830
|
1270 |
+
},
|
1271 |
+
{
|
1272 |
+
"epoch": 4.253164556962025,
|
1273 |
+
"grad_norm": 1859879.670487208,
|
1274 |
+
"learning_rate": 4.2400501410216235e-07,
|
1275 |
+
"logits/chosen": -7.482248783111572,
|
1276 |
+
"logits/rejected": -7.252910614013672,
|
1277 |
+
"logps/chosen": -187.070556640625,
|
1278 |
+
"logps/rejected": -401.1556701660156,
|
1279 |
+
"loss": 68463.9,
|
1280 |
+
"rewards/accuracies": 0.987500011920929,
|
1281 |
+
"rewards/chosen": 0.05697192624211311,
|
1282 |
+
"rewards/margins": 0.21645841002464294,
|
1283 |
+
"rewards/rejected": -0.15948647260665894,
|
1284 |
+
"step": 840
|
1285 |
+
},
|
1286 |
+
{
|
1287 |
+
"epoch": 4.30379746835443,
|
1288 |
+
"grad_norm": 1688181.1410657803,
|
1289 |
+
"learning_rate": 4.224381071764337e-07,
|
1290 |
+
"logits/chosen": -5.693742275238037,
|
1291 |
+
"logits/rejected": -5.435591697692871,
|
1292 |
+
"logps/chosen": -198.21900939941406,
|
1293 |
+
"logps/rejected": -398.49981689453125,
|
1294 |
+
"loss": 67266.2,
|
1295 |
+
"rewards/accuracies": 0.9624999761581421,
|
1296 |
+
"rewards/chosen": 0.04546400159597397,
|
1297 |
+
"rewards/margins": 0.20465342700481415,
|
1298 |
+
"rewards/rejected": -0.15918943285942078,
|
1299 |
+
"step": 850
|
1300 |
+
},
|
1301 |
+
{
|
1302 |
+
"epoch": 4.3544303797468356,
|
1303 |
+
"grad_norm": 1750431.6432656392,
|
1304 |
+
"learning_rate": 4.208712002507051e-07,
|
1305 |
+
"logits/chosen": -8.664016723632812,
|
1306 |
+
"logits/rejected": -8.082508087158203,
|
1307 |
+
"logps/chosen": -178.05966186523438,
|
1308 |
+
"logps/rejected": -402.77093505859375,
|
1309 |
+
"loss": 65760.2625,
|
1310 |
+
"rewards/accuracies": 0.9750000238418579,
|
1311 |
+
"rewards/chosen": 0.056066203862428665,
|
1312 |
+
"rewards/margins": 0.22950176894664764,
|
1313 |
+
"rewards/rejected": -0.17343556880950928,
|
1314 |
+
"step": 860
|
1315 |
+
},
|
1316 |
+
{
|
1317 |
+
"epoch": 4.405063291139241,
|
1318 |
+
"grad_norm": 1904336.610304837,
|
1319 |
+
"learning_rate": 4.193042933249765e-07,
|
1320 |
+
"logits/chosen": -5.778517723083496,
|
1321 |
+
"logits/rejected": -5.432709693908691,
|
1322 |
+
"logps/chosen": -176.563720703125,
|
1323 |
+
"logps/rejected": -379.2276916503906,
|
1324 |
+
"loss": 67058.1125,
|
1325 |
+
"rewards/accuracies": 0.9624999761581421,
|
1326 |
+
"rewards/chosen": 0.05091014504432678,
|
1327 |
+
"rewards/margins": 0.2058809995651245,
|
1328 |
+
"rewards/rejected": -0.15497085452079773,
|
1329 |
+
"step": 870
|
1330 |
+
},
|
1331 |
+
{
|
1332 |
+
"epoch": 4.455696202531645,
|
1333 |
+
"grad_norm": 1779397.1811982268,
|
1334 |
+
"learning_rate": 4.177373863992479e-07,
|
1335 |
+
"logits/chosen": -6.937778472900391,
|
1336 |
+
"logits/rejected": -6.611588954925537,
|
1337 |
+
"logps/chosen": -180.23001098632812,
|
1338 |
+
"logps/rejected": -400.9800720214844,
|
1339 |
+
"loss": 67019.0875,
|
1340 |
+
"rewards/accuracies": 0.987500011920929,
|
1341 |
+
"rewards/chosen": 0.05085798352956772,
|
1342 |
+
"rewards/margins": 0.2235671728849411,
|
1343 |
+
"rewards/rejected": -0.17270918190479279,
|
1344 |
+
"step": 880
|
1345 |
+
},
|
1346 |
+
{
|
1347 |
+
"epoch": 4.506329113924051,
|
1348 |
+
"grad_norm": 1755630.994265544,
|
1349 |
+
"learning_rate": 4.1617047947351925e-07,
|
1350 |
+
"logits/chosen": -6.663479804992676,
|
1351 |
+
"logits/rejected": -6.144991397857666,
|
1352 |
+
"logps/chosen": -189.93707275390625,
|
1353 |
+
"logps/rejected": -383.9622802734375,
|
1354 |
+
"loss": 66060.8813,
|
1355 |
+
"rewards/accuracies": 0.987500011920929,
|
1356 |
+
"rewards/chosen": 0.053109876811504364,
|
1357 |
+
"rewards/margins": 0.20497091114521027,
|
1358 |
+
"rewards/rejected": -0.1518610268831253,
|
1359 |
+
"step": 890
|
1360 |
+
},
|
1361 |
+
{
|
1362 |
+
"epoch": 4.556962025316456,
|
1363 |
+
"grad_norm": 1729683.010514938,
|
1364 |
+
"learning_rate": 4.1460357254779067e-07,
|
1365 |
+
"logits/chosen": -7.10635232925415,
|
1366 |
+
"logits/rejected": -7.227837562561035,
|
1367 |
+
"logps/chosen": -184.3021240234375,
|
1368 |
+
"logps/rejected": -391.59930419921875,
|
1369 |
+
"loss": 67231.6313,
|
1370 |
+
"rewards/accuracies": 0.9750000238418579,
|
1371 |
+
"rewards/chosen": 0.050502438098192215,
|
1372 |
+
"rewards/margins": 0.20674797892570496,
|
1373 |
+
"rewards/rejected": -0.15624557435512543,
|
1374 |
+
"step": 900
|
1375 |
+
},
|
1376 |
+
{
|
1377 |
+
"epoch": 4.6075949367088604,
|
1378 |
+
"grad_norm": 1921064.671845176,
|
1379 |
+
"learning_rate": 4.13036665622062e-07,
|
1380 |
+
"logits/chosen": -7.409733772277832,
|
1381 |
+
"logits/rejected": -7.2668256759643555,
|
1382 |
+
"logps/chosen": -184.89645385742188,
|
1383 |
+
"logps/rejected": -395.2364501953125,
|
1384 |
+
"loss": 67370.1875,
|
1385 |
+
"rewards/accuracies": 0.9750000238418579,
|
1386 |
+
"rewards/chosen": 0.047733135521411896,
|
1387 |
+
"rewards/margins": 0.2108074128627777,
|
1388 |
+
"rewards/rejected": -0.1630742847919464,
|
1389 |
+
"step": 910
|
1390 |
+
},
|
1391 |
+
{
|
1392 |
+
"epoch": 4.658227848101266,
|
1393 |
+
"grad_norm": 1780170.6356310213,
|
1394 |
+
"learning_rate": 4.1146975869633344e-07,
|
1395 |
+
"logits/chosen": -8.294339179992676,
|
1396 |
+
"logits/rejected": -8.312765121459961,
|
1397 |
+
"logps/chosen": -185.74949645996094,
|
1398 |
+
"logps/rejected": -405.0606689453125,
|
1399 |
+
"loss": 64484.2438,
|
1400 |
+
"rewards/accuracies": 0.925000011920929,
|
1401 |
+
"rewards/chosen": 0.05801473185420036,
|
1402 |
+
"rewards/margins": 0.21365991234779358,
|
1403 |
+
"rewards/rejected": -0.15564517676830292,
|
1404 |
+
"step": 920
|
1405 |
+
},
|
1406 |
+
{
|
1407 |
+
"epoch": 4.708860759493671,
|
1408 |
+
"grad_norm": 1755118.627079852,
|
1409 |
+
"learning_rate": 4.099028517706048e-07,
|
1410 |
+
"logits/chosen": -8.692441940307617,
|
1411 |
+
"logits/rejected": -8.729148864746094,
|
1412 |
+
"logps/chosen": -177.8703155517578,
|
1413 |
+
"logps/rejected": -410.15179443359375,
|
1414 |
+
"loss": 65960.6812,
|
1415 |
+
"rewards/accuracies": 0.9750000238418579,
|
1416 |
+
"rewards/chosen": 0.061922211199998856,
|
1417 |
+
"rewards/margins": 0.2333444058895111,
|
1418 |
+
"rewards/rejected": -0.17142215371131897,
|
1419 |
+
"step": 930
|
1420 |
+
},
|
1421 |
+
{
|
1422 |
+
"epoch": 4.759493670886076,
|
1423 |
+
"grad_norm": 1801666.0452341542,
|
1424 |
+
"learning_rate": 4.083359448448762e-07,
|
1425 |
+
"logits/chosen": -8.838138580322266,
|
1426 |
+
"logits/rejected": -8.679426193237305,
|
1427 |
+
"logps/chosen": -160.35488891601562,
|
1428 |
+
"logps/rejected": -387.3427429199219,
|
1429 |
+
"loss": 65957.3,
|
1430 |
+
"rewards/accuracies": 1.0,
|
1431 |
+
"rewards/chosen": 0.061734091490507126,
|
1432 |
+
"rewards/margins": 0.2303626835346222,
|
1433 |
+
"rewards/rejected": -0.16862855851650238,
|
1434 |
+
"step": 940
|
1435 |
+
},
|
1436 |
+
{
|
1437 |
+
"epoch": 4.810126582278481,
|
1438 |
+
"grad_norm": 1823914.1164093877,
|
1439 |
+
"learning_rate": 4.0676903791914757e-07,
|
1440 |
+
"logits/chosen": -8.039133071899414,
|
1441 |
+
"logits/rejected": -8.235550880432129,
|
1442 |
+
"logps/chosen": -181.90818786621094,
|
1443 |
+
"logps/rejected": -390.46075439453125,
|
1444 |
+
"loss": 65100.0437,
|
1445 |
+
"rewards/accuracies": 0.9750000238418579,
|
1446 |
+
"rewards/chosen": 0.05453425645828247,
|
1447 |
+
"rewards/margins": 0.20622405409812927,
|
1448 |
+
"rewards/rejected": -0.1516897976398468,
|
1449 |
+
"step": 950
|
1450 |
+
},
|
1451 |
+
{
|
1452 |
+
"epoch": 4.860759493670886,
|
1453 |
+
"grad_norm": 2552504.752187401,
|
1454 |
+
"learning_rate": 4.05202130993419e-07,
|
1455 |
+
"logits/chosen": -8.228861808776855,
|
1456 |
+
"logits/rejected": -8.044200897216797,
|
1457 |
+
"logps/chosen": -175.62306213378906,
|
1458 |
+
"logps/rejected": -387.7801818847656,
|
1459 |
+
"loss": 65251.5563,
|
1460 |
+
"rewards/accuracies": 0.9750000238418579,
|
1461 |
+
"rewards/chosen": 0.05643890053033829,
|
1462 |
+
"rewards/margins": 0.2162017822265625,
|
1463 |
+
"rewards/rejected": -0.15976287424564362,
|
1464 |
+
"step": 960
|
1465 |
+
},
|
1466 |
+
{
|
1467 |
+
"epoch": 4.911392405063291,
|
1468 |
+
"grad_norm": 2112562.829549655,
|
1469 |
+
"learning_rate": 4.0363522406769034e-07,
|
1470 |
+
"logits/chosen": -8.678482055664062,
|
1471 |
+
"logits/rejected": -8.680012702941895,
|
1472 |
+
"logps/chosen": -180.9581298828125,
|
1473 |
+
"logps/rejected": -402.48944091796875,
|
1474 |
+
"loss": 65731.7188,
|
1475 |
+
"rewards/accuracies": 0.987500011920929,
|
1476 |
+
"rewards/chosen": 0.05988938361406326,
|
1477 |
+
"rewards/margins": 0.22270476818084717,
|
1478 |
+
"rewards/rejected": -0.1628153920173645,
|
1479 |
+
"step": 970
|
1480 |
+
},
|
1481 |
+
{
|
1482 |
+
"epoch": 4.962025316455696,
|
1483 |
+
"grad_norm": 1800725.2761679955,
|
1484 |
+
"learning_rate": 4.0206831714196175e-07,
|
1485 |
+
"logits/chosen": -9.068916320800781,
|
1486 |
+
"logits/rejected": -8.908533096313477,
|
1487 |
+
"logps/chosen": -191.30018615722656,
|
1488 |
+
"logps/rejected": -433.2850036621094,
|
1489 |
+
"loss": 64987.5125,
|
1490 |
+
"rewards/accuracies": 0.987500011920929,
|
1491 |
+
"rewards/chosen": 0.0664498582482338,
|
1492 |
+
"rewards/margins": 0.24509286880493164,
|
1493 |
+
"rewards/rejected": -0.17864301800727844,
|
1494 |
+
"step": 980
|
1495 |
}
|
1496 |
],
|
1497 |
"logging_steps": 10,
|