cat-searcher commited on
Commit
58c9ef9
·
verified ·
1 Parent(s): eea720d

Training in progress, epoch 4, checkpoint

Browse files
Files changed (29) hide show
  1. last-checkpoint/global_step987/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
  2. last-checkpoint/global_step987/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
  3. last-checkpoint/global_step987/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
  4. last-checkpoint/global_step987/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
  5. last-checkpoint/global_step987/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +3 -0
  6. last-checkpoint/global_step987/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +3 -0
  7. last-checkpoint/global_step987/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +3 -0
  8. last-checkpoint/global_step987/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +3 -0
  9. last-checkpoint/global_step987/zero_pp_rank_0_mp_rank_00_model_states.pt +3 -0
  10. last-checkpoint/global_step987/zero_pp_rank_1_mp_rank_00_model_states.pt +3 -0
  11. last-checkpoint/global_step987/zero_pp_rank_2_mp_rank_00_model_states.pt +3 -0
  12. last-checkpoint/global_step987/zero_pp_rank_3_mp_rank_00_model_states.pt +3 -0
  13. last-checkpoint/global_step987/zero_pp_rank_4_mp_rank_00_model_states.pt +3 -0
  14. last-checkpoint/global_step987/zero_pp_rank_5_mp_rank_00_model_states.pt +3 -0
  15. last-checkpoint/global_step987/zero_pp_rank_6_mp_rank_00_model_states.pt +3 -0
  16. last-checkpoint/global_step987/zero_pp_rank_7_mp_rank_00_model_states.pt +3 -0
  17. last-checkpoint/latest +1 -1
  18. last-checkpoint/model-00001-of-00002.safetensors +1 -1
  19. last-checkpoint/model-00002-of-00002.safetensors +1 -1
  20. last-checkpoint/rng_state_0.pth +1 -1
  21. last-checkpoint/rng_state_1.pth +1 -1
  22. last-checkpoint/rng_state_2.pth +1 -1
  23. last-checkpoint/rng_state_3.pth +1 -1
  24. last-checkpoint/rng_state_4.pth +1 -1
  25. last-checkpoint/rng_state_5.pth +1 -1
  26. last-checkpoint/rng_state_6.pth +1 -1
  27. last-checkpoint/rng_state_7.pth +1 -1
  28. last-checkpoint/scheduler.pt +1 -1
  29. last-checkpoint/trainer_state.json +287 -2
last-checkpoint/global_step987/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e08ed8f9a3bcbcb2c4d63ac9315322a9c67d1d1853045d6a735fd71555098bc2
3
+ size 2506176112
last-checkpoint/global_step987/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f9475412bd1ce86508201c127efc3e0edee6633a49299c81896146b3652959bd
3
+ size 2506176112
last-checkpoint/global_step987/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a7f9e8477484da2f362a8d538703ae02607946193817e058365b45cf019a6b53
3
+ size 2506176112
last-checkpoint/global_step987/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d63379f76bfad20b1d84d0afcc1f2f2688a7af70f41e4453e65183454d9e9368
3
+ size 2506176112
last-checkpoint/global_step987/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d30dacf4855b30b73cc83e700ae2748a85a94ca8628a2feb75b206cbe909ffec
3
+ size 2506176112
last-checkpoint/global_step987/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b715c7992af463ed7ef9f47ae64199b59d1fb3a71c90915154004d4eab71ad4d
3
+ size 2506176112
last-checkpoint/global_step987/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1ddce83e72ba4d49bc6bc065f2030345a82a5667f8dae4ec6fc90eb324d3f214
3
+ size 2506176112
last-checkpoint/global_step987/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2df8414c72cbc2967c0c40f2215e0bd2917efa93ede3063ed7fe54f4bcf0306e
3
+ size 2506176112
last-checkpoint/global_step987/zero_pp_rank_0_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9c7d49adf53cb4919d277113e5d2286f11daf9bfd0c5fc27770ad51337147826
3
+ size 85570
last-checkpoint/global_step987/zero_pp_rank_1_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:523fb792a177804d3a89c3d960a8b61cb85d786b8588ca6eb40850c88b463f0d
3
+ size 85506
last-checkpoint/global_step987/zero_pp_rank_2_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db467bc7ac5a73bdda871ba0cf84b28bfbe8ffb532017690ee42e00cf9027801
3
+ size 85506
last-checkpoint/global_step987/zero_pp_rank_3_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:882f62fdd9c0fa0cb53f73107ad9dc72d0756d30a94587750f2dc29a67024c77
3
+ size 85506
last-checkpoint/global_step987/zero_pp_rank_4_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aeff6161357c3bf99eecd7fe3dad1ef8c5a95c24e2da1d2ad819e9e713e5cc38
3
+ size 85506
last-checkpoint/global_step987/zero_pp_rank_5_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d772a1419efe720b7a7fdd266fcc1365156f5adebf06de57810e9a7b0f603d16
3
+ size 85506
last-checkpoint/global_step987/zero_pp_rank_6_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ed060b90554fd9e0930346a0c2ae5814d8ca9d73b074ca36b52a358dd056d81
3
+ size 85506
last-checkpoint/global_step987/zero_pp_rank_7_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d08f7be62362f05f62bcbbfde5442c6e140ae6a3be863c7db3bb03c2dcf1686e
3
+ size 85506
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step790
 
1
+ global_step987
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0ee3f893a00b883ea2140dd40c1fb5676a8e4b4c39e9f77ab126e1a38a9c2786
3
  size 4945242264
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a9d6347bbdfb78d6d728cf68948d2c89598dfbfde2c1c992084431e44430796d
3
  size 4945242264
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:391cf44d17535a2b42d0a567444b9ca191b26d208e2891eee80e248f7f6c3747
3
  size 67121608
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cbb9d1038339e10330e9562076f77aac42d42c8f7c5245bf246911f8ffc69ef0
3
  size 67121608
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:36d2a2034ebb05cb71c510897f2795b31164e50f17b270bc25d2be3ad9a17b22
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a0ef6f96a48e59aa52c4b471312c2a62378c19acc7ebbae839612b03a7d775a
3
  size 15984
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:060dfdb1c49102cbdc8868a6031e68787601b4ccd782f3fb9b137e20c1fd2c7a
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ab11d533c0fdad46ea8b8e295ba5fdb705e078eeb88cc28f37d82913508766e9
3
  size 15984
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:af01895cb66e616591f2e4baa8dcd8151530eab133c73571ccb31c74f35422ce
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:615c168147e3465ce5bfab6da2ff4afc68566ce00ec0f0c6c9fc988038a58d0a
3
  size 15984
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:677921992b1e0cef3aee776f245975003d22f51d9bd6ed20f248ded1deb72fa9
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:79f71e8f8674ecaef9f8cdcbf7ac457a8b8ff15b12694ba2a2fffcb4b43f0f08
3
  size 15984
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d69353c629541c690c5471f8ec05fdab2bfecf3d37afaa436bc45939da6db68f
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:88cf6d674dab5545c300a55135f08ca935730a3d35e2c419fb0b333f19482c19
3
  size 15984
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8e40ba6668cc03c9162c68a933d164bf38ae2d196a9a6fec03ae615491201185
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2754f2cd8824702f027870d93748b3c0491b0ecd30f1e3d8e937116b2be6151f
3
  size 15984
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:870968fea834e24b2e099cf3e4fe1e3fb8caf38d8f8e5b790d7d47386d4d05f5
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1385124ac55604598f45ea6e2d141f29456647d3e7c10d12ca64ec93d312be8d
3
  size 15984
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e9e19618bee7c6ef43256fea25abe19bca88535eb1e7dc213cde8929ae4e8180
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:416538efaec7391fa8fe782fb15146b83e5612d9e1961292c34c53e964806873
3
  size 15984
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9b9e01fb8119366f950b23568c9c5eaa6d3e352534620301a9291190e4d0ef8f
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ebe1f41c97c016e1df7ebf5446401ec464be377a52a8190323220b8692dc187a
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 4.0,
5
  "eval_steps": 100,
6
- "global_step": 790,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1207,6 +1207,291 @@
1207
  "rewards/margins": 0.1435452550649643,
1208
  "rewards/rejected": -0.11581633985042572,
1209
  "step": 790
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1210
  }
1211
  ],
1212
  "logging_steps": 10,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 4.99746835443038,
5
  "eval_steps": 100,
6
+ "global_step": 987,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1207
  "rewards/margins": 0.1435452550649643,
1208
  "rewards/rejected": -0.11581633985042572,
1209
  "step": 790
1210
+ },
1211
+ {
1212
+ "epoch": 4.050632911392405,
1213
+ "grad_norm": 1782853.8797277175,
1214
+ "learning_rate": 4.3027264180507676e-07,
1215
+ "logits/chosen": -8.29829216003418,
1216
+ "logits/rejected": -8.205643653869629,
1217
+ "logps/chosen": -178.8797149658203,
1218
+ "logps/rejected": -378.06121826171875,
1219
+ "loss": 69143.425,
1220
+ "rewards/accuracies": 0.9375,
1221
+ "rewards/chosen": 0.05098045617341995,
1222
+ "rewards/margins": 0.1993386447429657,
1223
+ "rewards/rejected": -0.14835818111896515,
1224
+ "step": 800
1225
+ },
1226
+ {
1227
+ "epoch": 4.10126582278481,
1228
+ "grad_norm": 1719472.9461235409,
1229
+ "learning_rate": 4.287057348793481e-07,
1230
+ "logits/chosen": -7.558290958404541,
1231
+ "logits/rejected": -7.646592617034912,
1232
+ "logps/chosen": -186.36911010742188,
1233
+ "logps/rejected": -386.6961975097656,
1234
+ "loss": 67634.3375,
1235
+ "rewards/accuracies": 0.949999988079071,
1236
+ "rewards/chosen": 0.04189852252602577,
1237
+ "rewards/margins": 0.19968575239181519,
1238
+ "rewards/rejected": -0.1577872335910797,
1239
+ "step": 810
1240
+ },
1241
+ {
1242
+ "epoch": 4.151898734177215,
1243
+ "grad_norm": 1571399.8942716653,
1244
+ "learning_rate": 4.2713882795361953e-07,
1245
+ "logits/chosen": -7.811161994934082,
1246
+ "logits/rejected": -7.783130645751953,
1247
+ "logps/chosen": -181.81602478027344,
1248
+ "logps/rejected": -402.1683654785156,
1249
+ "loss": 66806.9187,
1250
+ "rewards/accuracies": 0.9624999761581421,
1251
+ "rewards/chosen": 0.049001529812812805,
1252
+ "rewards/margins": 0.21849961578845978,
1253
+ "rewards/rejected": -0.16949808597564697,
1254
+ "step": 820
1255
+ },
1256
+ {
1257
+ "epoch": 4.2025316455696204,
1258
+ "grad_norm": 1992030.3917670588,
1259
+ "learning_rate": 4.255719210278909e-07,
1260
+ "logits/chosen": -7.349759101867676,
1261
+ "logits/rejected": -7.380797386169434,
1262
+ "logps/chosen": -175.21702575683594,
1263
+ "logps/rejected": -396.2167053222656,
1264
+ "loss": 67021.875,
1265
+ "rewards/accuracies": 0.9624999761581421,
1266
+ "rewards/chosen": 0.05283821374177933,
1267
+ "rewards/margins": 0.22190704941749573,
1268
+ "rewards/rejected": -0.169068843126297,
1269
+ "step": 830
1270
+ },
1271
+ {
1272
+ "epoch": 4.253164556962025,
1273
+ "grad_norm": 1859879.670487208,
1274
+ "learning_rate": 4.2400501410216235e-07,
1275
+ "logits/chosen": -7.482248783111572,
1276
+ "logits/rejected": -7.252910614013672,
1277
+ "logps/chosen": -187.070556640625,
1278
+ "logps/rejected": -401.1556701660156,
1279
+ "loss": 68463.9,
1280
+ "rewards/accuracies": 0.987500011920929,
1281
+ "rewards/chosen": 0.05697192624211311,
1282
+ "rewards/margins": 0.21645841002464294,
1283
+ "rewards/rejected": -0.15948647260665894,
1284
+ "step": 840
1285
+ },
1286
+ {
1287
+ "epoch": 4.30379746835443,
1288
+ "grad_norm": 1688181.1410657803,
1289
+ "learning_rate": 4.224381071764337e-07,
1290
+ "logits/chosen": -5.693742275238037,
1291
+ "logits/rejected": -5.435591697692871,
1292
+ "logps/chosen": -198.21900939941406,
1293
+ "logps/rejected": -398.49981689453125,
1294
+ "loss": 67266.2,
1295
+ "rewards/accuracies": 0.9624999761581421,
1296
+ "rewards/chosen": 0.04546400159597397,
1297
+ "rewards/margins": 0.20465342700481415,
1298
+ "rewards/rejected": -0.15918943285942078,
1299
+ "step": 850
1300
+ },
1301
+ {
1302
+ "epoch": 4.3544303797468356,
1303
+ "grad_norm": 1750431.6432656392,
1304
+ "learning_rate": 4.208712002507051e-07,
1305
+ "logits/chosen": -8.664016723632812,
1306
+ "logits/rejected": -8.082508087158203,
1307
+ "logps/chosen": -178.05966186523438,
1308
+ "logps/rejected": -402.77093505859375,
1309
+ "loss": 65760.2625,
1310
+ "rewards/accuracies": 0.9750000238418579,
1311
+ "rewards/chosen": 0.056066203862428665,
1312
+ "rewards/margins": 0.22950176894664764,
1313
+ "rewards/rejected": -0.17343556880950928,
1314
+ "step": 860
1315
+ },
1316
+ {
1317
+ "epoch": 4.405063291139241,
1318
+ "grad_norm": 1904336.610304837,
1319
+ "learning_rate": 4.193042933249765e-07,
1320
+ "logits/chosen": -5.778517723083496,
1321
+ "logits/rejected": -5.432709693908691,
1322
+ "logps/chosen": -176.563720703125,
1323
+ "logps/rejected": -379.2276916503906,
1324
+ "loss": 67058.1125,
1325
+ "rewards/accuracies": 0.9624999761581421,
1326
+ "rewards/chosen": 0.05091014504432678,
1327
+ "rewards/margins": 0.2058809995651245,
1328
+ "rewards/rejected": -0.15497085452079773,
1329
+ "step": 870
1330
+ },
1331
+ {
1332
+ "epoch": 4.455696202531645,
1333
+ "grad_norm": 1779397.1811982268,
1334
+ "learning_rate": 4.177373863992479e-07,
1335
+ "logits/chosen": -6.937778472900391,
1336
+ "logits/rejected": -6.611588954925537,
1337
+ "logps/chosen": -180.23001098632812,
1338
+ "logps/rejected": -400.9800720214844,
1339
+ "loss": 67019.0875,
1340
+ "rewards/accuracies": 0.987500011920929,
1341
+ "rewards/chosen": 0.05085798352956772,
1342
+ "rewards/margins": 0.2235671728849411,
1343
+ "rewards/rejected": -0.17270918190479279,
1344
+ "step": 880
1345
+ },
1346
+ {
1347
+ "epoch": 4.506329113924051,
1348
+ "grad_norm": 1755630.994265544,
1349
+ "learning_rate": 4.1617047947351925e-07,
1350
+ "logits/chosen": -6.663479804992676,
1351
+ "logits/rejected": -6.144991397857666,
1352
+ "logps/chosen": -189.93707275390625,
1353
+ "logps/rejected": -383.9622802734375,
1354
+ "loss": 66060.8813,
1355
+ "rewards/accuracies": 0.987500011920929,
1356
+ "rewards/chosen": 0.053109876811504364,
1357
+ "rewards/margins": 0.20497091114521027,
1358
+ "rewards/rejected": -0.1518610268831253,
1359
+ "step": 890
1360
+ },
1361
+ {
1362
+ "epoch": 4.556962025316456,
1363
+ "grad_norm": 1729683.010514938,
1364
+ "learning_rate": 4.1460357254779067e-07,
1365
+ "logits/chosen": -7.10635232925415,
1366
+ "logits/rejected": -7.227837562561035,
1367
+ "logps/chosen": -184.3021240234375,
1368
+ "logps/rejected": -391.59930419921875,
1369
+ "loss": 67231.6313,
1370
+ "rewards/accuracies": 0.9750000238418579,
1371
+ "rewards/chosen": 0.050502438098192215,
1372
+ "rewards/margins": 0.20674797892570496,
1373
+ "rewards/rejected": -0.15624557435512543,
1374
+ "step": 900
1375
+ },
1376
+ {
1377
+ "epoch": 4.6075949367088604,
1378
+ "grad_norm": 1921064.671845176,
1379
+ "learning_rate": 4.13036665622062e-07,
1380
+ "logits/chosen": -7.409733772277832,
1381
+ "logits/rejected": -7.2668256759643555,
1382
+ "logps/chosen": -184.89645385742188,
1383
+ "logps/rejected": -395.2364501953125,
1384
+ "loss": 67370.1875,
1385
+ "rewards/accuracies": 0.9750000238418579,
1386
+ "rewards/chosen": 0.047733135521411896,
1387
+ "rewards/margins": 0.2108074128627777,
1388
+ "rewards/rejected": -0.1630742847919464,
1389
+ "step": 910
1390
+ },
1391
+ {
1392
+ "epoch": 4.658227848101266,
1393
+ "grad_norm": 1780170.6356310213,
1394
+ "learning_rate": 4.1146975869633344e-07,
1395
+ "logits/chosen": -8.294339179992676,
1396
+ "logits/rejected": -8.312765121459961,
1397
+ "logps/chosen": -185.74949645996094,
1398
+ "logps/rejected": -405.0606689453125,
1399
+ "loss": 64484.2438,
1400
+ "rewards/accuracies": 0.925000011920929,
1401
+ "rewards/chosen": 0.05801473185420036,
1402
+ "rewards/margins": 0.21365991234779358,
1403
+ "rewards/rejected": -0.15564517676830292,
1404
+ "step": 920
1405
+ },
1406
+ {
1407
+ "epoch": 4.708860759493671,
1408
+ "grad_norm": 1755118.627079852,
1409
+ "learning_rate": 4.099028517706048e-07,
1410
+ "logits/chosen": -8.692441940307617,
1411
+ "logits/rejected": -8.729148864746094,
1412
+ "logps/chosen": -177.8703155517578,
1413
+ "logps/rejected": -410.15179443359375,
1414
+ "loss": 65960.6812,
1415
+ "rewards/accuracies": 0.9750000238418579,
1416
+ "rewards/chosen": 0.061922211199998856,
1417
+ "rewards/margins": 0.2333444058895111,
1418
+ "rewards/rejected": -0.17142215371131897,
1419
+ "step": 930
1420
+ },
1421
+ {
1422
+ "epoch": 4.759493670886076,
1423
+ "grad_norm": 1801666.0452341542,
1424
+ "learning_rate": 4.083359448448762e-07,
1425
+ "logits/chosen": -8.838138580322266,
1426
+ "logits/rejected": -8.679426193237305,
1427
+ "logps/chosen": -160.35488891601562,
1428
+ "logps/rejected": -387.3427429199219,
1429
+ "loss": 65957.3,
1430
+ "rewards/accuracies": 1.0,
1431
+ "rewards/chosen": 0.061734091490507126,
1432
+ "rewards/margins": 0.2303626835346222,
1433
+ "rewards/rejected": -0.16862855851650238,
1434
+ "step": 940
1435
+ },
1436
+ {
1437
+ "epoch": 4.810126582278481,
1438
+ "grad_norm": 1823914.1164093877,
1439
+ "learning_rate": 4.0676903791914757e-07,
1440
+ "logits/chosen": -8.039133071899414,
1441
+ "logits/rejected": -8.235550880432129,
1442
+ "logps/chosen": -181.90818786621094,
1443
+ "logps/rejected": -390.46075439453125,
1444
+ "loss": 65100.0437,
1445
+ "rewards/accuracies": 0.9750000238418579,
1446
+ "rewards/chosen": 0.05453425645828247,
1447
+ "rewards/margins": 0.20622405409812927,
1448
+ "rewards/rejected": -0.1516897976398468,
1449
+ "step": 950
1450
+ },
1451
+ {
1452
+ "epoch": 4.860759493670886,
1453
+ "grad_norm": 2552504.752187401,
1454
+ "learning_rate": 4.05202130993419e-07,
1455
+ "logits/chosen": -8.228861808776855,
1456
+ "logits/rejected": -8.044200897216797,
1457
+ "logps/chosen": -175.62306213378906,
1458
+ "logps/rejected": -387.7801818847656,
1459
+ "loss": 65251.5563,
1460
+ "rewards/accuracies": 0.9750000238418579,
1461
+ "rewards/chosen": 0.05643890053033829,
1462
+ "rewards/margins": 0.2162017822265625,
1463
+ "rewards/rejected": -0.15976287424564362,
1464
+ "step": 960
1465
+ },
1466
+ {
1467
+ "epoch": 4.911392405063291,
1468
+ "grad_norm": 2112562.829549655,
1469
+ "learning_rate": 4.0363522406769034e-07,
1470
+ "logits/chosen": -8.678482055664062,
1471
+ "logits/rejected": -8.680012702941895,
1472
+ "logps/chosen": -180.9581298828125,
1473
+ "logps/rejected": -402.48944091796875,
1474
+ "loss": 65731.7188,
1475
+ "rewards/accuracies": 0.987500011920929,
1476
+ "rewards/chosen": 0.05988938361406326,
1477
+ "rewards/margins": 0.22270476818084717,
1478
+ "rewards/rejected": -0.1628153920173645,
1479
+ "step": 970
1480
+ },
1481
+ {
1482
+ "epoch": 4.962025316455696,
1483
+ "grad_norm": 1800725.2761679955,
1484
+ "learning_rate": 4.0206831714196175e-07,
1485
+ "logits/chosen": -9.068916320800781,
1486
+ "logits/rejected": -8.908533096313477,
1487
+ "logps/chosen": -191.30018615722656,
1488
+ "logps/rejected": -433.2850036621094,
1489
+ "loss": 64987.5125,
1490
+ "rewards/accuracies": 0.987500011920929,
1491
+ "rewards/chosen": 0.0664498582482338,
1492
+ "rewards/margins": 0.24509286880493164,
1493
+ "rewards/rejected": -0.17864301800727844,
1494
+ "step": 980
1495
  }
1496
  ],
1497
  "logging_steps": 10,