jikaixuan commited on
Commit
346a175
·
verified ·
1 Parent(s): 0eaef4a

Model save

Browse files
Files changed (5) hide show
  1. README.md +16 -19
  2. adapter_model.safetensors +1 -1
  3. all_results.json +4 -19
  4. train_results.json +4 -4
  5. trainer_state.json +596 -596
README.md CHANGED
@@ -2,13 +2,10 @@
2
  license: apache-2.0
3
  library_name: peft
4
  tags:
5
- - alignment-handbook
6
  - trl
7
  - dpo
8
  - generated_from_trainer
9
  base_model: mistralai/Mistral-7B-v0.1
10
- datasets:
11
- - HuggingFaceH4/ultrafeedback_binarized
12
  model-index:
13
  - name: zephyr-7b
14
  results: []
@@ -19,19 +16,19 @@ should probably proofread and complete it, then remove this comment. -->
19
 
20
  # zephyr-7b
21
 
22
- This model is a fine-tuned version of [alignment-handbook/zephyr-7b-sft-qlora](https://huggingface.co/alignment-handbook/zephyr-7b-sft-qlora) on the HuggingFaceH4/ultrafeedback_binarized dataset.
23
  It achieves the following results on the evaluation set:
24
- - Loss: 0.5775
25
- - Rewards/chosen: -0.6783
26
- - Rewards/rejected: -1.1531
27
- - Rewards/accuracies: 0.3672
28
- - Rewards/margins: 0.4748
29
- - Logps/rejected: -192.5041
30
- - Logps/chosen: -131.7414
31
- - Logits/rejected: 1.9235
32
- - Logits/chosen: 1.8283
33
- - Use Label: 7748.125
34
- - Pred Label: 1039.875
35
 
36
  ## Model description
37
 
@@ -68,10 +65,10 @@ The following hyperparameters were used during training:
68
 
69
  | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen | Use Label | Pred Label |
70
  |:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|:---------:|:----------:|
71
- | 0.6531 | 0.21 | 100 | 0.6528 | -0.1643 | -0.2945 | 0.3633 | 0.1303 | -106.6470 | -80.3385 | -1.7198 | -1.7354 | 1725.3125 | 6.6875 |
72
- | 0.6041 | 0.42 | 200 | 0.5936 | -0.7144 | -1.1047 | 0.3516 | 0.3903 | -187.6596 | -135.3474 | 0.9784 | 0.8864 | 3420.5938 | 167.4062 |
73
- | 0.5763 | 0.63 | 300 | 0.5773 | -0.7930 | -1.2317 | 0.3516 | 0.4387 | -200.3615 | -143.2137 | 1.7526 | 1.6599 | 4991.2812 | 452.7188 |
74
- | 0.5836 | 0.84 | 400 | 0.5769 | -0.6646 | -1.1353 | 0.3711 | 0.4707 | -190.7267 | -130.3719 | 1.8500 | 1.7576 | 6517.1875 | 782.8125 |
75
 
76
 
77
  ### Framework versions
 
2
  license: apache-2.0
3
  library_name: peft
4
  tags:
 
5
  - trl
6
  - dpo
7
  - generated_from_trainer
8
  base_model: mistralai/Mistral-7B-v0.1
 
 
9
  model-index:
10
  - name: zephyr-7b
11
  results: []
 
16
 
17
  # zephyr-7b
18
 
19
+ This model is a fine-tuned version of [mistralai/Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1) on the None dataset.
20
  It achieves the following results on the evaluation set:
21
+ - Loss: 0.4405
22
+ - Rewards/chosen: -3.0809
23
+ - Rewards/rejected: -4.8915
24
+ - Rewards/accuracies: 0.3438
25
+ - Rewards/margins: 1.8106
26
+ - Logps/rejected: -566.3419
27
+ - Logps/chosen: -371.9976
28
+ - Logits/rejected: 4.5207
29
+ - Logits/chosen: 4.3874
30
+ - Use Label: 5649.7188
31
+ - Pred Label: 1650.2812
32
 
33
  ## Model description
34
 
 
65
 
66
  | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen | Use Label | Pred Label |
67
  |:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|:---------:|:----------:|
68
+ | 0.6535 | 0.21 | 100 | 0.6432 | -0.2049 | -0.3593 | 0.3516 | 0.1544 | -113.1259 | -84.4063 | -2.0537 | -2.0656 | 1713.5 | 18.5 |
69
+ | 0.507 | 0.42 | 200 | 0.5048 | -1.6723 | -2.3466 | 0.3594 | 0.6743 | -311.8494 | -231.1388 | 2.1626 | 2.0915 | 3214.5625 | 373.4375 |
70
+ | 0.4799 | 0.63 | 300 | 0.4885 | -1.7906 | -2.6624 | 0.3359 | 0.8718 | -343.4285 | -242.9698 | 3.2225 | 3.1511 | 4474.75 | 969.25 |
71
+ | 0.4443 | 0.84 | 400 | 0.4405 | -3.0809 | -4.8915 | 0.3438 | 1.8106 | -566.3419 | -371.9976 | 4.5207 | 4.3874 | 5649.7188 | 1650.2812 |
72
 
73
 
74
  ### Framework versions
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3f521d26d7e93ab0cdaddc249d4a149ab34d31a7bc562dbd07b97d28f64cc84e
3
  size 671150064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:21712d278947feed96b37a5d3c57498f7fad3fa54fa540438cdb9c29a081d336
3
  size 671150064
all_results.json CHANGED
@@ -1,23 +1,8 @@
1
  {
2
  "epoch": 1.0,
3
- "eval_logits/chosen": 1.828277349472046,
4
- "eval_logits/rejected": 1.9234589338302612,
5
- "eval_logps/chosen": -131.74139404296875,
6
- "eval_logps/rejected": -192.50405883789062,
7
- "eval_loss": 0.5775244235992432,
8
- "eval_pred_label": 1039.875,
9
- "eval_rewards/accuracies": 0.3671875,
10
- "eval_rewards/chosen": -0.678291380405426,
11
- "eval_rewards/margins": 0.47481971979141235,
12
- "eval_rewards/rejected": -1.153111219406128,
13
- "eval_runtime": 125.2019,
14
- "eval_samples": 2000,
15
- "eval_samples_per_second": 15.974,
16
- "eval_steps_per_second": 0.256,
17
- "eval_use_label": 7748.125,
18
- "train_loss": 0.6110695428068533,
19
- "train_runtime": 9999.3279,
20
  "train_samples": 61135,
21
- "train_samples_per_second": 6.114,
22
- "train_steps_per_second": 0.048
23
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "train_loss": 0.5378267840019562,
4
+ "train_runtime": 9600.9753,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  "train_samples": 61135,
6
+ "train_samples_per_second": 6.368,
7
+ "train_steps_per_second": 0.05
8
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 1.0,
3
- "train_loss": 0.6110695428068533,
4
- "train_runtime": 9999.3279,
5
  "train_samples": 61135,
6
- "train_samples_per_second": 6.114,
7
- "train_steps_per_second": 0.048
8
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "train_loss": 0.5378267840019562,
4
+ "train_runtime": 9600.9753,
5
  "train_samples": 61135,
6
+ "train_samples_per_second": 6.368,
7
+ "train_steps_per_second": 0.05
8
  }
trainer_state.json CHANGED
@@ -29,16 +29,16 @@
29
  "epoch": 0.02,
30
  "grad_norm": 0.4609375,
31
  "learning_rate": 1.0416666666666667e-06,
32
- "logits/chosen": -2.2421748638153076,
33
- "logits/rejected": -2.2769579887390137,
34
- "logps/chosen": -51.987098693847656,
35
- "logps/rejected": -64.96717071533203,
36
  "loss": 0.6929,
37
  "pred_label": 0.0,
38
- "rewards/accuracies": 0.25,
39
- "rewards/chosen": 0.0019227324519306421,
40
- "rewards/margins": 0.0004911368596367538,
41
- "rewards/rejected": 0.0014315954176709056,
42
  "step": 10,
43
  "use_label": 90.0
44
  },
@@ -46,16 +46,16 @@
46
  "epoch": 0.04,
47
  "grad_norm": 0.39453125,
48
  "learning_rate": 2.0833333333333334e-06,
49
- "logits/chosen": -2.2521612644195557,
50
- "logits/rejected": -2.255767822265625,
51
- "logps/chosen": -62.4937629699707,
52
- "logps/rejected": -72.63874816894531,
53
  "loss": 0.6919,
54
  "pred_label": 0.0,
55
- "rewards/accuracies": 0.2874999940395355,
56
- "rewards/chosen": 0.01600126549601555,
57
- "rewards/margins": 0.0011427802965044975,
58
- "rewards/rejected": 0.0148584870621562,
59
  "step": 20,
60
  "use_label": 242.0
61
  },
@@ -63,16 +63,16 @@
63
  "epoch": 0.06,
64
  "grad_norm": 0.51171875,
65
  "learning_rate": 3.125e-06,
66
- "logits/chosen": -2.3423426151275635,
67
- "logits/rejected": -2.3549609184265137,
68
- "logps/chosen": -79.10475158691406,
69
- "logps/rejected": -98.8157958984375,
70
- "loss": 0.6897,
71
  "pred_label": 0.0,
72
- "rewards/accuracies": 0.28125,
73
- "rewards/chosen": 0.03137165680527687,
74
- "rewards/margins": 0.0032712810207158327,
75
- "rewards/rejected": 0.028100375086069107,
76
  "step": 30,
77
  "use_label": 402.0
78
  },
@@ -80,830 +80,830 @@
80
  "epoch": 0.08,
81
  "grad_norm": 0.51953125,
82
  "learning_rate": 4.166666666666667e-06,
83
- "logits/chosen": -2.323338270187378,
84
- "logits/rejected": -2.3015079498291016,
85
- "logps/chosen": -82.85453796386719,
86
- "logps/rejected": -82.39984893798828,
87
  "loss": 0.6866,
88
  "pred_label": 0.0,
89
- "rewards/accuracies": 0.2874999940395355,
90
- "rewards/chosen": 0.03337595611810684,
91
- "rewards/margins": 0.011919925920665264,
92
- "rewards/rejected": 0.021456023678183556,
93
  "step": 40,
94
  "use_label": 562.0
95
  },
96
  {
97
  "epoch": 0.1,
98
- "grad_norm": 0.671875,
99
  "learning_rate": 4.999731868769027e-06,
100
- "logits/chosen": -2.2404515743255615,
101
- "logits/rejected": -2.262972354888916,
102
- "logps/chosen": -67.89888000488281,
103
- "logps/rejected": -81.8695068359375,
104
  "loss": 0.6805,
105
  "pred_label": 0.0,
106
  "rewards/accuracies": 0.32499998807907104,
107
- "rewards/chosen": 0.009319942444562912,
108
- "rewards/margins": 0.030618786811828613,
109
- "rewards/rejected": -0.0212988443672657,
110
  "step": 50,
111
  "use_label": 722.0
112
  },
113
  {
114
  "epoch": 0.13,
115
- "grad_norm": 1.0234375,
116
  "learning_rate": 4.9903533134293035e-06,
117
- "logits/chosen": -2.2157275676727295,
118
- "logits/rejected": -2.155928134918213,
119
- "logps/chosen": -63.64031982421875,
120
- "logps/rejected": -73.28236389160156,
121
  "loss": 0.6752,
122
  "pred_label": 0.0,
123
- "rewards/accuracies": 0.3062500059604645,
124
- "rewards/chosen": -0.03914070501923561,
125
- "rewards/margins": 0.04399287328124046,
126
- "rewards/rejected": -0.08313358575105667,
127
  "step": 60,
128
  "use_label": 882.0
129
  },
130
  {
131
  "epoch": 0.15,
132
- "grad_norm": 0.859375,
133
  "learning_rate": 4.967625656594782e-06,
134
- "logits/chosen": -2.114478588104248,
135
- "logits/rejected": -2.1126065254211426,
136
- "logps/chosen": -70.76527404785156,
137
- "logps/rejected": -83.94652557373047,
138
- "loss": 0.6712,
139
  "pred_label": 0.0,
140
- "rewards/accuracies": 0.25,
141
- "rewards/chosen": -0.15054164826869965,
142
- "rewards/margins": 0.030909737572073936,
143
- "rewards/rejected": -0.18145139515399933,
144
  "step": 70,
145
  "use_label": 1042.0
146
  },
147
  {
148
  "epoch": 0.17,
149
- "grad_norm": 1.1640625,
150
  "learning_rate": 4.93167072587771e-06,
151
- "logits/chosen": -2.2166943550109863,
152
- "logits/rejected": -2.1609182357788086,
153
- "logps/chosen": -54.8065185546875,
154
- "logps/rejected": -69.45613861083984,
155
- "loss": 0.6589,
156
- "pred_label": 0.4749999940395355,
157
- "rewards/accuracies": 0.26249998807907104,
158
- "rewards/chosen": -0.06275613605976105,
159
- "rewards/margins": 0.10003063827753067,
160
- "rewards/rejected": -0.16278676688671112,
161
  "step": 80,
162
- "use_label": 1201.5250244140625
163
  },
164
  {
165
  "epoch": 0.19,
166
- "grad_norm": 1.8125,
167
  "learning_rate": 4.882681251368549e-06,
168
- "logits/chosen": -1.9692049026489258,
169
- "logits/rejected": -1.9792039394378662,
170
- "logps/chosen": -76.60871887207031,
171
- "logps/rejected": -96.53330993652344,
172
- "loss": 0.6564,
173
- "pred_label": 2.0999999046325684,
174
- "rewards/accuracies": 0.29374998807907104,
175
- "rewards/chosen": -0.18226662278175354,
176
- "rewards/margins": 0.09542477130889893,
177
- "rewards/rejected": -0.27769142389297485,
178
  "step": 90,
179
- "use_label": 1359.9000244140625
180
  },
181
  {
182
  "epoch": 0.21,
183
- "grad_norm": 2.171875,
184
  "learning_rate": 4.8209198325401815e-06,
185
- "logits/chosen": -1.9027693271636963,
186
- "logits/rejected": -1.8775581121444702,
187
- "logps/chosen": -92.94733428955078,
188
- "logps/rejected": -84.73824310302734,
189
- "loss": 0.6531,
190
- "pred_label": 4.0,
191
- "rewards/accuracies": 0.32499998807907104,
192
- "rewards/chosen": -0.12917451560497284,
193
- "rewards/margins": 0.07954015582799911,
194
- "rewards/rejected": -0.20871467888355255,
195
  "step": 100,
196
- "use_label": 1518.0
197
  },
198
  {
199
  "epoch": 0.21,
200
- "eval_logits/chosen": -1.7353737354278564,
201
- "eval_logits/rejected": -1.7198325395584106,
202
- "eval_logps/chosen": -80.33845520019531,
203
- "eval_logps/rejected": -106.64702606201172,
204
- "eval_loss": 0.6527961492538452,
205
- "eval_pred_label": 6.6875,
206
- "eval_rewards/accuracies": 0.36328125,
207
- "eval_rewards/chosen": -0.1642620712518692,
208
- "eval_rewards/margins": 0.13027876615524292,
209
- "eval_rewards/rejected": -0.2945408225059509,
210
- "eval_runtime": 125.2319,
211
- "eval_samples_per_second": 15.97,
212
- "eval_steps_per_second": 0.256,
213
- "eval_use_label": 1725.3125,
214
  "step": 100
215
  },
216
  {
217
  "epoch": 0.23,
218
- "grad_norm": 2.0,
219
  "learning_rate": 4.746717530629565e-06,
220
- "logits/chosen": -1.7974278926849365,
221
- "logits/rejected": -1.7697474956512451,
222
- "logps/chosen": -89.79286193847656,
223
- "logps/rejected": -113.6241455078125,
224
- "loss": 0.6479,
225
- "pred_label": 9.199999809265137,
226
- "rewards/accuracies": 0.36250001192092896,
227
- "rewards/chosen": -0.18692079186439514,
228
- "rewards/margins": 0.16341358423233032,
229
- "rewards/rejected": -0.3503343462944031,
230
  "step": 110,
231
- "use_label": 1928.800048828125
232
  },
233
  {
234
  "epoch": 0.25,
235
- "grad_norm": 2.890625,
236
  "learning_rate": 4.660472094042121e-06,
237
- "logits/chosen": -1.454304814338684,
238
- "logits/rejected": -1.3457725048065186,
239
- "logps/chosen": -109.3675537109375,
240
- "logps/rejected": -133.90725708007812,
241
- "loss": 0.6432,
242
- "pred_label": 14.949999809265137,
243
- "rewards/accuracies": 0.34375,
244
- "rewards/chosen": -0.3942197263240814,
245
- "rewards/margins": 0.21566259860992432,
246
- "rewards/rejected": -0.6098822951316833,
247
  "step": 120,
248
- "use_label": 2083.050048828125
249
  },
250
  {
251
  "epoch": 0.27,
252
- "grad_norm": 2.5625,
253
  "learning_rate": 4.5626458262912745e-06,
254
- "logits/chosen": -1.0859026908874512,
255
- "logits/rejected": -1.0426993370056152,
256
- "logps/chosen": -112.0394515991211,
257
- "logps/rejected": -139.61097717285156,
258
- "loss": 0.6391,
259
- "pred_label": 21.049999237060547,
260
- "rewards/accuracies": 0.36250001192092896,
261
- "rewards/chosen": -0.4626106321811676,
262
- "rewards/margins": 0.20503444969654083,
263
- "rewards/rejected": -0.6676451563835144,
264
  "step": 130,
265
- "use_label": 2236.949951171875
266
  },
267
  {
268
  "epoch": 0.29,
269
- "grad_norm": 2.484375,
270
  "learning_rate": 4.453763107901676e-06,
271
- "logits/chosen": -0.735418975353241,
272
- "logits/rejected": -0.8380192518234253,
273
- "logps/chosen": -138.07081604003906,
274
- "logps/rejected": -150.91665649414062,
275
- "loss": 0.6252,
276
- "pred_label": 31.399999618530273,
277
  "rewards/accuracies": 0.30000001192092896,
278
- "rewards/chosen": -0.5732325315475464,
279
- "rewards/margins": 0.1448771208524704,
280
- "rewards/rejected": -0.7181096076965332,
281
  "step": 140,
282
- "use_label": 2386.60009765625
283
  },
284
  {
285
  "epoch": 0.31,
286
- "grad_norm": 3.859375,
287
  "learning_rate": 4.33440758555951e-06,
288
- "logits/chosen": -0.48231878876686096,
289
- "logits/rejected": -0.43882569670677185,
290
- "logps/chosen": -117.69664001464844,
291
- "logps/rejected": -150.86083984375,
292
- "loss": 0.6219,
293
- "pred_label": 43.45000076293945,
294
- "rewards/accuracies": 0.32499998807907104,
295
- "rewards/chosen": -0.5254992246627808,
296
- "rewards/margins": 0.3047201633453369,
297
- "rewards/rejected": -0.8302194476127625,
298
  "step": 150,
299
- "use_label": 2534.550048828125
300
  },
301
  {
302
  "epoch": 0.33,
303
- "grad_norm": 2.890625,
304
  "learning_rate": 4.205219043576955e-06,
305
- "logits/chosen": -0.15186011791229248,
306
- "logits/rejected": -0.17336201667785645,
307
- "logps/chosen": -128.78500366210938,
308
- "logps/rejected": -159.26498413085938,
309
- "loss": 0.5982,
310
- "pred_label": 58.25,
311
- "rewards/accuracies": 0.2750000059604645,
312
- "rewards/chosen": -0.6445494294166565,
313
- "rewards/margins": 0.17397476732730865,
314
- "rewards/rejected": -0.818524181842804,
315
  "step": 160,
316
- "use_label": 2679.75
317
  },
318
  {
319
  "epoch": 0.36,
320
- "grad_norm": 3.328125,
321
  "learning_rate": 4.066889974440757e-06,
322
- "logits/chosen": 0.14322622120380402,
323
- "logits/rejected": 0.18100713193416595,
324
- "logps/chosen": -108.39127349853516,
325
- "logps/rejected": -140.55824279785156,
326
- "loss": 0.5938,
327
- "pred_label": 79.57499694824219,
328
- "rewards/accuracies": 0.3062500059604645,
329
- "rewards/chosen": -0.5288320779800415,
330
- "rewards/margins": 0.23454061150550842,
331
- "rewards/rejected": -0.7633727192878723,
332
  "step": 170,
333
- "use_label": 2818.425048828125
334
  },
335
  {
336
  "epoch": 0.38,
337
- "grad_norm": 3.0,
338
  "learning_rate": 3.92016186682789e-06,
339
- "logits/chosen": -0.20601686835289001,
340
- "logits/rejected": -0.09364790469408035,
341
- "logps/chosen": -105.94217681884766,
342
- "logps/rejected": -130.695556640625,
343
- "loss": 0.6262,
344
- "pred_label": 100.2750015258789,
345
- "rewards/accuracies": 0.35624998807907104,
346
- "rewards/chosen": -0.4315454065799713,
347
- "rewards/margins": 0.3125666677951813,
348
- "rewards/rejected": -0.7441121339797974,
349
  "step": 180,
350
- "use_label": 2957.72509765625
351
  },
352
  {
353
  "epoch": 0.4,
354
- "grad_norm": 2.734375,
355
  "learning_rate": 3.7658212309857576e-06,
356
- "logits/chosen": -0.34412023425102234,
357
- "logits/rejected": -0.07299783080816269,
358
- "logps/chosen": -107.5626449584961,
359
- "logps/rejected": -141.1322479248047,
360
- "loss": 0.6092,
361
- "pred_label": 121.05000305175781,
362
- "rewards/accuracies": 0.34375,
363
- "rewards/chosen": -0.48451024293899536,
364
- "rewards/margins": 0.28280580043792725,
365
- "rewards/rejected": -0.7673160433769226,
366
  "step": 190,
367
- "use_label": 3096.949951171875
368
  },
369
  {
370
  "epoch": 0.42,
371
- "grad_norm": 6.5,
372
  "learning_rate": 3.604695382782159e-06,
373
- "logits/chosen": 0.03128425031900406,
374
- "logits/rejected": 0.20205454528331757,
375
- "logps/chosen": -145.35342407226562,
376
- "logps/rejected": -162.05667114257812,
377
- "loss": 0.6041,
378
- "pred_label": 135.89999389648438,
379
- "rewards/accuracies": 0.30000001192092896,
380
- "rewards/chosen": -0.6367710828781128,
381
- "rewards/margins": 0.25234952569007874,
382
- "rewards/rejected": -0.8891205787658691,
383
  "step": 200,
384
- "use_label": 3242.10009765625
385
  },
386
  {
387
  "epoch": 0.42,
388
- "eval_logits/chosen": 0.886444091796875,
389
- "eval_logits/rejected": 0.9784458875656128,
390
- "eval_logps/chosen": -135.34742736816406,
391
- "eval_logps/rejected": -187.65963745117188,
392
- "eval_loss": 0.5936154723167419,
393
- "eval_pred_label": 167.40625,
394
- "eval_rewards/accuracies": 0.3515625,
395
- "eval_rewards/chosen": -0.7143516540527344,
396
- "eval_rewards/margins": 0.3903152644634247,
397
- "eval_rewards/rejected": -1.1046667098999023,
398
- "eval_runtime": 125.3006,
399
- "eval_samples_per_second": 15.962,
400
  "eval_steps_per_second": 0.255,
401
- "eval_use_label": 3420.59375,
402
  "step": 200
403
  },
404
  {
405
  "epoch": 0.44,
406
- "grad_norm": 3.796875,
407
  "learning_rate": 3.437648009023905e-06,
408
- "logits/chosen": 0.6729141473770142,
409
- "logits/rejected": 0.6579598188400269,
410
- "logps/chosen": -119.19351959228516,
411
- "logps/rejected": -159.00997924804688,
412
- "loss": 0.5936,
413
- "pred_label": 201.6999969482422,
414
- "rewards/accuracies": 0.3499999940395355,
415
- "rewards/chosen": -0.63218754529953,
416
- "rewards/margins": 0.3281194567680359,
417
- "rewards/rejected": -0.9603070020675659,
418
  "step": 210,
419
- "use_label": 3592.300048828125
420
  },
421
  {
422
  "epoch": 0.46,
423
- "grad_norm": 4.5,
424
  "learning_rate": 3.265574537815398e-06,
425
- "logits/chosen": 0.2854166626930237,
426
- "logits/rejected": 0.4488348066806793,
427
- "logps/chosen": -148.92379760742188,
428
- "logps/rejected": -161.19557189941406,
429
- "loss": 0.5938,
430
- "pred_label": 225.52499389648438,
431
- "rewards/accuracies": 0.3125,
432
- "rewards/chosen": -0.6929510235786438,
433
- "rewards/margins": 0.2553243637084961,
434
- "rewards/rejected": -0.9482753872871399,
435
  "step": 220,
436
- "use_label": 3728.47509765625
437
  },
438
  {
439
  "epoch": 0.48,
440
- "grad_norm": 2.28125,
441
  "learning_rate": 3.089397338773569e-06,
442
- "logits/chosen": 0.00020002425299026072,
443
- "logits/rejected": 0.1493436098098755,
444
- "logps/chosen": -103.05213928222656,
445
- "logps/rejected": -136.05099487304688,
446
- "loss": 0.597,
447
- "pred_label": 252.02499389648438,
448
- "rewards/accuracies": 0.30000001192092896,
449
- "rewards/chosen": -0.3861756920814514,
450
- "rewards/margins": 0.3467464745044708,
451
- "rewards/rejected": -0.7329221963882446,
452
  "step": 230,
453
- "use_label": 3861.97509765625
454
  },
455
  {
456
  "epoch": 0.5,
457
- "grad_norm": 3.125,
458
  "learning_rate": 2.9100607788275547e-06,
459
- "logits/chosen": 0.49308425188064575,
460
- "logits/rejected": 0.44487372040748596,
461
- "logps/chosen": -109.46275329589844,
462
- "logps/rejected": -153.8666534423828,
463
- "loss": 0.584,
464
- "pred_label": 275.375,
465
- "rewards/accuracies": 0.3687500059604645,
466
- "rewards/chosen": -0.40430212020874023,
467
- "rewards/margins": 0.3921273946762085,
468
- "rewards/rejected": -0.7964295148849487,
469
  "step": 240,
470
- "use_label": 3998.625
471
  },
472
  {
473
  "epoch": 0.52,
474
- "grad_norm": 2.0625,
475
  "learning_rate": 2.72852616010567e-06,
476
- "logits/chosen": 0.3891890347003937,
477
- "logits/rejected": 0.47166162729263306,
478
- "logps/chosen": -122.5915298461914,
479
- "logps/rejected": -153.12493896484375,
480
- "loss": 0.5998,
481
- "pred_label": 301.875,
482
- "rewards/accuracies": 0.36250001192092896,
483
- "rewards/chosen": -0.4919242262840271,
484
- "rewards/margins": 0.3470008671283722,
485
- "rewards/rejected": -0.8389250636100769,
486
  "step": 250,
487
- "use_label": 4132.125
488
  },
489
  {
490
  "epoch": 0.54,
491
- "grad_norm": 2.171875,
492
  "learning_rate": 2.5457665670441937e-06,
493
- "logits/chosen": 0.4214790463447571,
494
- "logits/rejected": 0.4202333092689514,
495
- "logps/chosen": -116.09378814697266,
496
- "logps/rejected": -156.8458251953125,
497
- "loss": 0.592,
498
- "pred_label": 326.3500061035156,
499
- "rewards/accuracies": 0.32499998807907104,
500
- "rewards/chosen": -0.4998815953731537,
501
- "rewards/margins": 0.324097216129303,
502
- "rewards/rejected": -0.8239787817001343,
503
  "step": 260,
504
- "use_label": 4267.64990234375
505
  },
506
  {
507
  "epoch": 0.57,
508
- "grad_norm": 3.40625,
509
  "learning_rate": 2.3627616503391813e-06,
510
- "logits/chosen": 0.9609361886978149,
511
- "logits/rejected": 0.8760908246040344,
512
- "logps/chosen": -142.81573486328125,
513
- "logps/rejected": -170.10379028320312,
514
- "loss": 0.5888,
515
- "pred_label": 343.04998779296875,
516
- "rewards/accuracies": 0.3375000059604645,
517
- "rewards/chosen": -0.698999285697937,
518
- "rewards/margins": 0.30828553438186646,
519
- "rewards/rejected": -1.0072848796844482,
520
  "step": 270,
521
- "use_label": 4410.9501953125
522
  },
523
  {
524
  "epoch": 0.59,
525
- "grad_norm": 2.234375,
526
  "learning_rate": 2.1804923757009885e-06,
527
- "logits/chosen": 1.1657536029815674,
528
- "logits/rejected": 1.3259608745574951,
529
- "logps/chosen": -131.4703826904297,
530
- "logps/rejected": -156.4979248046875,
531
- "loss": 0.6007,
532
- "pred_label": 361.5,
533
- "rewards/accuracies": 0.3375000059604645,
534
- "rewards/chosen": -0.6596485376358032,
535
- "rewards/margins": 0.27613458037376404,
536
- "rewards/rejected": -0.9357832074165344,
537
  "step": 280,
538
- "use_label": 4552.5
539
  },
540
  {
541
  "epoch": 0.61,
542
- "grad_norm": 2.875,
543
  "learning_rate": 1.9999357655598894e-06,
544
- "logits/chosen": 0.9594011306762695,
545
- "logits/rejected": 0.9126796722412109,
546
- "logps/chosen": -144.55104064941406,
547
- "logps/rejected": -183.51065063476562,
548
- "loss": 0.5899,
549
- "pred_label": 386.07501220703125,
550
- "rewards/accuracies": 0.3187499940395355,
551
- "rewards/chosen": -0.7800258994102478,
552
- "rewards/margins": 0.2914672791957855,
553
- "rewards/rejected": -1.0714932680130005,
554
  "step": 290,
555
- "use_label": 4687.9248046875
556
  },
557
  {
558
  "epoch": 0.63,
559
- "grad_norm": 3.1875,
560
  "learning_rate": 1.8220596619089576e-06,
561
- "logits/chosen": 1.2753574848175049,
562
- "logits/rejected": 1.1057071685791016,
563
- "logps/chosen": -165.4674072265625,
564
- "logps/rejected": -223.6466064453125,
565
- "loss": 0.5763,
566
- "pred_label": 409.9750061035156,
567
- "rewards/accuracies": 0.4124999940395355,
568
- "rewards/chosen": -0.8787307739257812,
569
- "rewards/margins": 0.41653138399124146,
570
- "rewards/rejected": -1.295262098312378,
571
  "step": 300,
572
- "use_label": 4824.02490234375
573
  },
574
  {
575
  "epoch": 0.63,
576
- "eval_logits/chosen": 1.6598718166351318,
577
- "eval_logits/rejected": 1.7526323795318604,
578
- "eval_logps/chosen": -143.2136993408203,
579
- "eval_logps/rejected": -200.36146545410156,
580
- "eval_loss": 0.5773172974586487,
581
- "eval_pred_label": 452.71875,
582
- "eval_rewards/accuracies": 0.3515625,
583
- "eval_rewards/chosen": -0.7930145263671875,
584
- "eval_rewards/margins": 0.4386705756187439,
585
- "eval_rewards/rejected": -1.2316851615905762,
586
- "eval_runtime": 125.3512,
587
- "eval_samples_per_second": 15.955,
588
  "eval_steps_per_second": 0.255,
589
- "eval_use_label": 4991.28125,
590
  "step": 300
591
  },
592
  {
593
  "epoch": 0.65,
594
- "grad_norm": 3.0625,
595
  "learning_rate": 1.647817538357072e-06,
596
- "logits/chosen": 1.3793504238128662,
597
- "logits/rejected": 1.4072078466415405,
598
- "logps/chosen": -126.9173583984375,
599
- "logps/rejected": -186.46255493164062,
600
- "loss": 0.5633,
601
- "pred_label": 494.42498779296875,
602
- "rewards/accuracies": 0.36250001192092896,
603
- "rewards/chosen": -0.7132076025009155,
604
- "rewards/margins": 0.4689141809940338,
605
- "rewards/rejected": -1.1821218729019165,
606
  "step": 310,
607
- "use_label": 5155.5751953125
608
  },
609
  {
610
  "epoch": 0.67,
611
- "grad_norm": 4.21875,
612
  "learning_rate": 1.4781433892011132e-06,
613
- "logits/chosen": 1.2615296840667725,
614
- "logits/rejected": 1.4717950820922852,
615
- "logps/chosen": -163.67529296875,
616
- "logps/rejected": -205.421142578125,
617
- "loss": 0.5761,
618
- "pred_label": 523.5250244140625,
619
- "rewards/accuracies": 0.3812499940395355,
620
- "rewards/chosen": -0.9060484766960144,
621
- "rewards/margins": 0.4762052893638611,
622
- "rewards/rejected": -1.382253885269165,
623
  "step": 320,
624
- "use_label": 5286.47509765625
625
  },
626
  {
627
  "epoch": 0.69,
628
- "grad_norm": 2.359375,
629
  "learning_rate": 1.3139467229135999e-06,
630
- "logits/chosen": 1.4169238805770874,
631
- "logits/rejected": 1.4296729564666748,
632
- "logps/chosen": -150.2149200439453,
633
- "logps/rejected": -186.73570251464844,
634
- "loss": 0.5799,
635
- "pred_label": 550.125,
636
- "rewards/accuracies": 0.33125001192092896,
637
- "rewards/chosen": -0.8010675311088562,
638
- "rewards/margins": 0.3802093267440796,
639
- "rewards/rejected": -1.1812770366668701,
640
  "step": 330,
641
- "use_label": 5419.875
642
  },
643
  {
644
  "epoch": 0.71,
645
- "grad_norm": 3.15625,
646
  "learning_rate": 1.1561076868822756e-06,
647
- "logits/chosen": 0.9984269142150879,
648
- "logits/rejected": 0.9373771548271179,
649
- "logps/chosen": -161.85842895507812,
650
- "logps/rejected": -182.74703979492188,
651
- "loss": 0.5933,
652
- "pred_label": 567.2000122070312,
653
- "rewards/accuracies": 0.32499998807907104,
654
- "rewards/chosen": -0.771192193031311,
655
- "rewards/margins": 0.2911759614944458,
656
- "rewards/rejected": -1.0623681545257568,
657
  "step": 340,
658
- "use_label": 5562.7998046875
659
  },
660
  {
661
  "epoch": 0.73,
662
- "grad_norm": 2.59375,
663
  "learning_rate": 1.0054723495346484e-06,
664
- "logits/chosen": 0.83796626329422,
665
- "logits/rejected": 0.8520887494087219,
666
- "logps/chosen": -176.03054809570312,
667
- "logps/rejected": -217.10214233398438,
668
- "loss": 0.5863,
669
- "pred_label": 598.5750122070312,
670
- "rewards/accuracies": 0.36250001192092896,
671
- "rewards/chosen": -0.86613929271698,
672
- "rewards/margins": 0.4522012174129486,
673
- "rewards/rejected": -1.318340539932251,
674
  "step": 350,
675
- "use_label": 5691.4248046875
676
  },
677
  {
678
  "epoch": 0.75,
679
- "grad_norm": 2.234375,
680
  "learning_rate": 8.628481651367876e-07,
681
- "logits/chosen": 0.7010875940322876,
682
- "logits/rejected": 0.8413160443305969,
683
- "logps/chosen": -126.9655532836914,
684
- "logps/rejected": -182.5807342529297,
685
- "loss": 0.5885,
686
- "pred_label": 629.0,
687
- "rewards/accuracies": 0.39375001192092896,
688
- "rewards/chosen": -0.6332792043685913,
689
- "rewards/margins": 0.47024598717689514,
690
- "rewards/rejected": -1.103525161743164,
691
  "step": 360,
692
- "use_label": 5821.0
693
  },
694
  {
695
  "epoch": 0.77,
696
- "grad_norm": 2.4375,
697
  "learning_rate": 7.289996455765749e-07,
698
- "logits/chosen": 0.8454801440238953,
699
- "logits/rejected": 0.9659041166305542,
700
- "logps/chosen": -120.26502990722656,
701
- "logps/rejected": -170.44923400878906,
702
- "loss": 0.585,
703
- "pred_label": 655.625,
704
- "rewards/accuracies": 0.34375,
705
- "rewards/chosen": -0.5483022928237915,
706
- "rewards/margins": 0.4770358204841614,
707
- "rewards/rejected": -1.0253381729125977,
708
  "step": 370,
709
- "use_label": 5954.375
710
  },
711
  {
712
  "epoch": 0.8,
713
- "grad_norm": 3.5625,
714
  "learning_rate": 6.046442623320145e-07,
715
- "logits/chosen": 0.7346574664115906,
716
- "logits/rejected": 0.7028430104255676,
717
- "logps/chosen": -131.0785675048828,
718
- "logps/rejected": -188.57435607910156,
719
- "loss": 0.589,
720
- "pred_label": 685.5250244140625,
721
- "rewards/accuracies": 0.3187499940395355,
722
- "rewards/chosen": -0.6524317264556885,
723
- "rewards/margins": 0.3696710765361786,
724
- "rewards/rejected": -1.0221028327941895,
725
  "step": 380,
726
- "use_label": 6084.47509765625
727
  },
728
  {
729
  "epoch": 0.82,
730
- "grad_norm": 3.3125,
731
  "learning_rate": 4.904486005914027e-07,
732
- "logits/chosen": 1.1143369674682617,
733
- "logits/rejected": 0.8643951416015625,
734
- "logps/chosen": -179.11276245117188,
735
- "logps/rejected": -220.11068725585938,
736
- "loss": 0.5727,
737
- "pred_label": 717.625,
738
- "rewards/accuracies": 0.375,
739
- "rewards/chosen": -0.8629444241523743,
740
- "rewards/margins": 0.508463442325592,
741
- "rewards/rejected": -1.3714077472686768,
742
  "step": 390,
743
- "use_label": 6212.375
744
  },
745
  {
746
  "epoch": 0.84,
747
- "grad_norm": 3.25,
748
  "learning_rate": 3.8702478614051353e-07,
749
- "logits/chosen": 0.8043449521064758,
750
- "logits/rejected": 0.9917415380477905,
751
- "logps/chosen": -130.07017517089844,
752
- "logps/rejected": -163.469970703125,
753
- "loss": 0.5836,
754
- "pred_label": 747.5750122070312,
755
- "rewards/accuracies": 0.375,
756
- "rewards/chosen": -0.5757918953895569,
757
- "rewards/margins": 0.42427974939346313,
758
- "rewards/rejected": -1.0000715255737305,
759
  "step": 400,
760
- "use_label": 6342.4248046875
761
  },
762
  {
763
  "epoch": 0.84,
764
- "eval_logits/chosen": 1.75760817527771,
765
- "eval_logits/rejected": 1.8499951362609863,
766
- "eval_logps/chosen": -130.3719482421875,
767
- "eval_logps/rejected": -190.7267303466797,
768
- "eval_loss": 0.5768851041793823,
769
- "eval_pred_label": 782.8125,
770
- "eval_rewards/accuracies": 0.37109375,
771
- "eval_rewards/chosen": -0.6645968556404114,
772
- "eval_rewards/margins": 0.4707409739494324,
773
- "eval_rewards/rejected": -1.1353378295898438,
774
- "eval_runtime": 147.391,
775
- "eval_samples_per_second": 13.569,
776
- "eval_steps_per_second": 0.217,
777
- "eval_use_label": 6517.1875,
778
  "step": 400
779
  },
780
  {
781
  "epoch": 0.86,
782
- "grad_norm": 3.390625,
783
  "learning_rate": 2.9492720416985004e-07,
784
- "logits/chosen": 1.1002473831176758,
785
- "logits/rejected": 1.1428117752075195,
786
- "logps/chosen": -126.85247802734375,
787
- "logps/rejected": -170.77365112304688,
788
- "loss": 0.5838,
789
- "pred_label": 822.75,
790
- "rewards/accuracies": 0.3687500059604645,
791
- "rewards/chosen": -0.6542948484420776,
792
- "rewards/margins": 0.4562492370605469,
793
- "rewards/rejected": -1.1105440855026245,
794
  "step": 410,
795
- "use_label": 6683.25
796
  },
797
  {
798
  "epoch": 0.88,
799
- "grad_norm": 2.609375,
800
  "learning_rate": 2.1464952759020857e-07,
801
- "logits/chosen": 1.3246395587921143,
802
- "logits/rejected": 1.2824434041976929,
803
- "logps/chosen": -122.80003356933594,
804
- "logps/rejected": -138.56423950195312,
805
- "loss": 0.5822,
806
- "pred_label": 846.4249877929688,
807
- "rewards/accuracies": 0.26249998807907104,
808
- "rewards/chosen": -0.6186091303825378,
809
- "rewards/margins": 0.25320303440093994,
810
- "rewards/rejected": -0.8718121647834778,
811
  "step": 420,
812
- "use_label": 6819.5751953125
813
  },
814
  {
815
  "epoch": 0.9,
816
- "grad_norm": 2.09375,
817
  "learning_rate": 1.4662207078575685e-07,
818
- "logits/chosen": 1.270193099975586,
819
- "logits/rejected": 1.253873348236084,
820
- "logps/chosen": -171.46336364746094,
821
- "logps/rejected": -207.75607299804688,
822
- "loss": 0.564,
823
- "pred_label": 873.5499877929688,
824
- "rewards/accuracies": 0.4625000059604645,
825
- "rewards/chosen": -0.7219651341438293,
826
- "rewards/margins": 0.5620242357254028,
827
- "rewards/rejected": -1.283989429473877,
828
  "step": 430,
829
- "use_label": 6952.4501953125
830
  },
831
  {
832
  "epoch": 0.92,
833
- "grad_norm": 2.578125,
834
  "learning_rate": 9.120948298936422e-08,
835
- "logits/chosen": 1.221411943435669,
836
- "logits/rejected": 1.397247552871704,
837
- "logps/chosen": -136.4575653076172,
838
- "logps/rejected": -193.40870666503906,
839
- "loss": 0.5736,
840
- "pred_label": 905.5750122070312,
841
- "rewards/accuracies": 0.36250001192092896,
842
- "rewards/chosen": -0.6955360770225525,
843
- "rewards/margins": 0.4879213869571686,
844
- "rewards/rejected": -1.183457612991333,
845
  "step": 440,
846
- "use_label": 7080.4248046875
847
  },
848
  {
849
  "epoch": 0.94,
850
- "grad_norm": 4.78125,
851
  "learning_rate": 4.870879364444109e-08,
852
- "logits/chosen": 1.6054052114486694,
853
- "logits/rejected": 1.3484258651733398,
854
- "logps/chosen": -148.17161560058594,
855
- "logps/rejected": -205.789306640625,
856
- "loss": 0.583,
857
- "pred_label": 930.4749755859375,
858
  "rewards/accuracies": 0.35624998807907104,
859
- "rewards/chosen": -0.7591380476951599,
860
- "rewards/margins": 0.4158584177494049,
861
- "rewards/rejected": -1.1749964952468872,
862
  "step": 450,
863
- "use_label": 7215.52490234375
864
  },
865
  {
866
  "epoch": 0.96,
867
- "grad_norm": 2.875,
868
  "learning_rate": 1.93478202307823e-08,
869
- "logits/chosen": 1.4640157222747803,
870
- "logits/rejected": 1.4903802871704102,
871
- "logps/chosen": -96.6323471069336,
872
- "logps/rejected": -150.8868865966797,
873
- "loss": 0.5814,
874
- "pred_label": 961.25,
875
- "rewards/accuracies": 0.32499998807907104,
876
- "rewards/chosen": -0.5051247477531433,
877
- "rewards/margins": 0.3702928125858307,
878
- "rewards/rejected": -0.8754175901412964,
879
  "step": 460,
880
- "use_label": 7344.75
881
  },
882
  {
883
  "epoch": 0.98,
884
- "grad_norm": 2.75,
885
  "learning_rate": 3.283947088983663e-09,
886
- "logits/chosen": 1.464422345161438,
887
- "logits/rejected": 1.2297132015228271,
888
- "logps/chosen": -130.30838012695312,
889
- "logps/rejected": -166.67605590820312,
890
- "loss": 0.5822,
891
- "pred_label": 982.8499755859375,
892
- "rewards/accuracies": 0.30000001192092896,
893
- "rewards/chosen": -0.6297920346260071,
894
- "rewards/margins": 0.34639838337898254,
895
- "rewards/rejected": -0.9761903882026672,
896
  "step": 470,
897
- "use_label": 7483.14990234375
898
  },
899
  {
900
  "epoch": 1.0,
901
  "step": 477,
902
  "total_flos": 0.0,
903
- "train_loss": 0.6110695428068533,
904
- "train_runtime": 9999.3279,
905
- "train_samples_per_second": 6.114,
906
- "train_steps_per_second": 0.048
907
  }
908
  ],
909
  "logging_steps": 10,
 
29
  "epoch": 0.02,
30
  "grad_norm": 0.4609375,
31
  "learning_rate": 1.0416666666666667e-06,
32
+ "logits/chosen": -2.2421205043792725,
33
+ "logits/rejected": -2.2769112586975098,
34
+ "logps/chosen": -51.97997283935547,
35
+ "logps/rejected": -64.98096466064453,
36
  "loss": 0.6929,
37
  "pred_label": 0.0,
38
+ "rewards/accuracies": 0.2222222238779068,
39
+ "rewards/chosen": 0.0019939513877034187,
40
+ "rewards/margins": 0.0007003004429861903,
41
+ "rewards/rejected": 0.0012936509447172284,
42
  "step": 10,
43
  "use_label": 90.0
44
  },
 
46
  "epoch": 0.04,
47
  "grad_norm": 0.39453125,
48
  "learning_rate": 2.0833333333333334e-06,
49
+ "logits/chosen": -2.2527966499328613,
50
+ "logits/rejected": -2.256462812423706,
51
+ "logps/chosen": -62.502418518066406,
52
+ "logps/rejected": -72.6461181640625,
53
  "loss": 0.6919,
54
  "pred_label": 0.0,
55
+ "rewards/accuracies": 0.2750000059604645,
56
+ "rewards/chosen": 0.01591477356851101,
57
+ "rewards/margins": 0.0011298481840640306,
58
+ "rewards/rejected": 0.014784926548600197,
59
  "step": 20,
60
  "use_label": 242.0
61
  },
 
63
  "epoch": 0.06,
64
  "grad_norm": 0.51171875,
65
  "learning_rate": 3.125e-06,
66
+ "logits/chosen": -2.342513084411621,
67
+ "logits/rejected": -2.35528564453125,
68
+ "logps/chosen": -79.1588134765625,
69
+ "logps/rejected": -98.83000946044922,
70
+ "loss": 0.6898,
71
  "pred_label": 0.0,
72
+ "rewards/accuracies": 0.2750000059604645,
73
+ "rewards/chosen": 0.030831044539809227,
74
+ "rewards/margins": 0.002872847020626068,
75
+ "rewards/rejected": 0.027958199381828308,
76
  "step": 30,
77
  "use_label": 402.0
78
  },
 
80
  "epoch": 0.08,
81
  "grad_norm": 0.51953125,
82
  "learning_rate": 4.166666666666667e-06,
83
+ "logits/chosen": -2.322958469390869,
84
+ "logits/rejected": -2.3010201454162598,
85
+ "logps/chosen": -82.86949157714844,
86
+ "logps/rejected": -82.41117858886719,
87
  "loss": 0.6866,
88
  "pred_label": 0.0,
89
+ "rewards/accuracies": 0.29374998807907104,
90
+ "rewards/chosen": 0.03322647884488106,
91
+ "rewards/margins": 0.01188388466835022,
92
+ "rewards/rejected": 0.021342596039175987,
93
  "step": 40,
94
  "use_label": 562.0
95
  },
96
  {
97
  "epoch": 0.1,
98
+ "grad_norm": 0.66015625,
99
  "learning_rate": 4.999731868769027e-06,
100
+ "logits/chosen": -2.2394285202026367,
101
+ "logits/rejected": -2.2620723247528076,
102
+ "logps/chosen": -67.9144058227539,
103
+ "logps/rejected": -81.85662841796875,
104
  "loss": 0.6805,
105
  "pred_label": 0.0,
106
  "rewards/accuracies": 0.32499998807907104,
107
+ "rewards/chosen": 0.009164649061858654,
108
+ "rewards/margins": 0.030334800481796265,
109
+ "rewards/rejected": -0.021170150488615036,
110
  "step": 50,
111
  "use_label": 722.0
112
  },
113
  {
114
  "epoch": 0.13,
115
+ "grad_norm": 0.94921875,
116
  "learning_rate": 4.9903533134293035e-06,
117
+ "logits/chosen": -2.215353488922119,
118
+ "logits/rejected": -2.156195640563965,
119
+ "logps/chosen": -62.76350784301758,
120
+ "logps/rejected": -72.54745483398438,
121
  "loss": 0.6752,
122
  "pred_label": 0.0,
123
+ "rewards/accuracies": 0.3125,
124
+ "rewards/chosen": -0.030372655019164085,
125
+ "rewards/margins": 0.04541187360882759,
126
+ "rewards/rejected": -0.07578452676534653,
127
  "step": 60,
128
  "use_label": 882.0
129
  },
130
  {
131
  "epoch": 0.15,
132
+ "grad_norm": 1.140625,
133
  "learning_rate": 4.967625656594782e-06,
134
+ "logits/chosen": -2.1206488609313965,
135
+ "logits/rejected": -2.117661952972412,
136
+ "logps/chosen": -63.197784423828125,
137
+ "logps/rejected": -76.79959869384766,
138
+ "loss": 0.6656,
139
  "pred_label": 0.0,
140
+ "rewards/accuracies": 0.24375000596046448,
141
+ "rewards/chosen": -0.07486678659915924,
142
+ "rewards/margins": 0.03511539101600647,
143
+ "rewards/rejected": -0.10998217016458511,
144
  "step": 70,
145
  "use_label": 1042.0
146
  },
147
  {
148
  "epoch": 0.17,
149
+ "grad_norm": 1.8125,
150
  "learning_rate": 4.93167072587771e-06,
151
+ "logits/chosen": -2.209770679473877,
152
+ "logits/rejected": -2.155240058898926,
153
+ "logps/chosen": -62.25128936767578,
154
+ "logps/rejected": -75.9639663696289,
155
+ "loss": 0.6592,
156
+ "pred_label": 1.6749999523162842,
157
+ "rewards/accuracies": 0.2750000059604645,
158
+ "rewards/chosen": -0.1372038871049881,
159
+ "rewards/margins": 0.0906611904501915,
160
+ "rewards/rejected": -0.227865070104599,
161
  "step": 80,
162
+ "use_label": 1200.324951171875
163
  },
164
  {
165
  "epoch": 0.19,
166
+ "grad_norm": 1.046875,
167
  "learning_rate": 4.882681251368549e-06,
168
+ "logits/chosen": -2.0902276039123535,
169
+ "logits/rejected": -2.0773346424102783,
170
+ "logps/chosen": -77.01739501953125,
171
+ "logps/rejected": -97.2451400756836,
172
+ "loss": 0.6533,
173
+ "pred_label": 7.675000190734863,
174
+ "rewards/accuracies": 0.3062500059604645,
175
+ "rewards/chosen": -0.18635347485542297,
176
+ "rewards/margins": 0.09845630824565887,
177
+ "rewards/rejected": -0.28480976819992065,
178
  "step": 90,
179
+ "use_label": 1354.324951171875
180
  },
181
  {
182
  "epoch": 0.21,
183
+ "grad_norm": 1.0625,
184
  "learning_rate": 4.8209198325401815e-06,
185
+ "logits/chosen": -2.1706321239471436,
186
+ "logits/rejected": -2.1635570526123047,
187
+ "logps/chosen": -92.91756439208984,
188
+ "logps/rejected": -83.92691802978516,
189
+ "loss": 0.6535,
190
+ "pred_label": 10.050000190734863,
191
+ "rewards/accuracies": 0.3125,
192
+ "rewards/chosen": -0.12887680530548096,
193
+ "rewards/margins": 0.07172463834285736,
194
+ "rewards/rejected": -0.20060142874717712,
195
  "step": 100,
196
+ "use_label": 1511.949951171875
197
  },
198
  {
199
  "epoch": 0.21,
200
+ "eval_logits/chosen": -2.0656356811523438,
201
+ "eval_logits/rejected": -2.053668260574341,
202
+ "eval_logps/chosen": -84.40631866455078,
203
+ "eval_logps/rejected": -113.12586975097656,
204
+ "eval_loss": 0.6432419419288635,
205
+ "eval_pred_label": 18.5,
206
+ "eval_rewards/accuracies": 0.3515625,
207
+ "eval_rewards/chosen": -0.20494069159030914,
208
+ "eval_rewards/margins": 0.1543886363506317,
209
+ "eval_rewards/rejected": -0.35932934284210205,
210
+ "eval_runtime": 125.4389,
211
+ "eval_samples_per_second": 15.944,
212
+ "eval_steps_per_second": 0.255,
213
+ "eval_use_label": 1713.5,
214
  "step": 100
215
  },
216
  {
217
  "epoch": 0.23,
218
+ "grad_norm": 2.125,
219
  "learning_rate": 4.746717530629565e-06,
220
+ "logits/chosen": -2.043905019760132,
221
+ "logits/rejected": -2.0294950008392334,
222
+ "logps/chosen": -101.01715850830078,
223
+ "logps/rejected": -123.53236389160156,
224
+ "loss": 0.6416,
225
+ "pred_label": 28.799999237060547,
226
+ "rewards/accuracies": 0.3499999940395355,
227
+ "rewards/chosen": -0.2991637587547302,
228
+ "rewards/margins": 0.15025287866592407,
229
+ "rewards/rejected": -0.4494166374206543,
230
  "step": 110,
231
+ "use_label": 1909.199951171875
232
  },
233
  {
234
  "epoch": 0.25,
235
+ "grad_norm": 1.578125,
236
  "learning_rate": 4.660472094042121e-06,
237
+ "logits/chosen": -1.6428496837615967,
238
+ "logits/rejected": -1.5296450853347778,
239
+ "logps/chosen": -109.25709533691406,
240
+ "logps/rejected": -133.13401794433594,
241
+ "loss": 0.6325,
242
+ "pred_label": 42.32500076293945,
243
+ "rewards/accuracies": 0.375,
244
+ "rewards/chosen": -0.3931151032447815,
245
+ "rewards/margins": 0.20903488993644714,
246
+ "rewards/rejected": -0.602150022983551,
247
  "step": 120,
248
+ "use_label": 2055.675048828125
249
  },
250
  {
251
  "epoch": 0.27,
252
+ "grad_norm": 1.8203125,
253
  "learning_rate": 4.5626458262912745e-06,
254
+ "logits/chosen": -1.0915193557739258,
255
+ "logits/rejected": -1.0688496828079224,
256
+ "logps/chosen": -101.99517822265625,
257
+ "logps/rejected": -131.51425170898438,
258
+ "loss": 0.6265,
259
+ "pred_label": 60.599998474121094,
260
+ "rewards/accuracies": 0.3375000059604645,
261
+ "rewards/chosen": -0.3621678650379181,
262
+ "rewards/margins": 0.22451019287109375,
263
+ "rewards/rejected": -0.5866780877113342,
264
  "step": 130,
265
+ "use_label": 2197.39990234375
266
  },
267
  {
268
  "epoch": 0.29,
269
+ "grad_norm": 2.65625,
270
  "learning_rate": 4.453763107901676e-06,
271
+ "logits/chosen": -0.5608137845993042,
272
+ "logits/rejected": -0.7015228271484375,
273
+ "logps/chosen": -131.24168395996094,
274
+ "logps/rejected": -148.6112060546875,
275
+ "loss": 0.6032,
276
+ "pred_label": 81.6500015258789,
277
  "rewards/accuracies": 0.30000001192092896,
278
+ "rewards/chosen": -0.5049411654472351,
279
+ "rewards/margins": 0.19011390209197998,
280
+ "rewards/rejected": -0.6950551271438599,
281
  "step": 140,
282
+ "use_label": 2336.35009765625
283
  },
284
  {
285
  "epoch": 0.31,
286
+ "grad_norm": 3.828125,
287
  "learning_rate": 4.33440758555951e-06,
288
+ "logits/chosen": -0.30833983421325684,
289
+ "logits/rejected": -0.2849891781806946,
290
+ "logps/chosen": -146.22640991210938,
291
+ "logps/rejected": -189.76602172851562,
292
+ "loss": 0.5689,
293
+ "pred_label": 109.57499694824219,
294
+ "rewards/accuracies": 0.3187499940395355,
295
+ "rewards/chosen": -0.8107970952987671,
296
+ "rewards/margins": 0.4084743559360504,
297
+ "rewards/rejected": -1.2192714214324951,
298
  "step": 150,
299
+ "use_label": 2468.425048828125
300
  },
301
  {
302
  "epoch": 0.33,
303
+ "grad_norm": 3.90625,
304
  "learning_rate": 4.205219043576955e-06,
305
+ "logits/chosen": 0.23603327572345734,
306
+ "logits/rejected": 0.16418711841106415,
307
+ "logps/chosen": -161.76339721679688,
308
+ "logps/rejected": -198.48782348632812,
309
+ "loss": 0.5333,
310
+ "pred_label": 145.22500610351562,
311
+ "rewards/accuracies": 0.26875001192092896,
312
+ "rewards/chosen": -0.9743334650993347,
313
+ "rewards/margins": 0.23641912639141083,
314
+ "rewards/rejected": -1.2107526063919067,
315
  "step": 160,
316
+ "use_label": 2592.77490234375
317
  },
318
  {
319
  "epoch": 0.36,
320
+ "grad_norm": 3.453125,
321
  "learning_rate": 4.066889974440757e-06,
322
+ "logits/chosen": 0.4600732922554016,
323
+ "logits/rejected": 0.5158972144126892,
324
+ "logps/chosen": -129.09141540527344,
325
+ "logps/rejected": -170.87411499023438,
326
+ "loss": 0.5489,
327
+ "pred_label": 192.35000610351562,
328
+ "rewards/accuracies": 0.32499998807907104,
329
+ "rewards/chosen": -0.7358335256576538,
330
+ "rewards/margins": 0.3306979537010193,
331
+ "rewards/rejected": -1.0665314197540283,
332
  "step": 170,
333
+ "use_label": 2705.64990234375
334
  },
335
  {
336
  "epoch": 0.38,
337
+ "grad_norm": 3.65625,
338
  "learning_rate": 3.92016186682789e-06,
339
+ "logits/chosen": 0.6188533902168274,
340
+ "logits/rejected": 0.7289873361587524,
341
+ "logps/chosen": -150.7683868408203,
342
+ "logps/rejected": -179.30160522460938,
343
+ "loss": 0.5565,
344
+ "pred_label": 230.1750030517578,
345
+ "rewards/accuracies": 0.34375,
346
+ "rewards/chosen": -0.8798072934150696,
347
+ "rewards/margins": 0.35036540031433105,
348
+ "rewards/rejected": -1.230172872543335,
349
  "step": 180,
350
+ "use_label": 2827.824951171875
351
  },
352
  {
353
  "epoch": 0.4,
354
+ "grad_norm": 4.15625,
355
  "learning_rate": 3.7658212309857576e-06,
356
+ "logits/chosen": 0.8427504301071167,
357
+ "logits/rejected": 1.2680375576019287,
358
+ "logps/chosen": -149.38197326660156,
359
+ "logps/rejected": -201.4063262939453,
360
+ "loss": 0.5452,
361
+ "pred_label": 274.17498779296875,
362
+ "rewards/accuracies": 0.3375000059604645,
363
+ "rewards/chosen": -0.9027034044265747,
364
+ "rewards/margins": 0.46735334396362305,
365
+ "rewards/rejected": -1.3700568675994873,
366
  "step": 190,
367
+ "use_label": 2943.824951171875
368
  },
369
  {
370
  "epoch": 0.42,
371
+ "grad_norm": 3.109375,
372
  "learning_rate": 3.604695382782159e-06,
373
+ "logits/chosen": 1.0629971027374268,
374
+ "logits/rejected": 1.2417268753051758,
375
+ "logps/chosen": -202.11306762695312,
376
+ "logps/rejected": -230.6200714111328,
377
+ "loss": 0.507,
378
+ "pred_label": 315.54998779296875,
379
+ "rewards/accuracies": 0.29374998807907104,
380
+ "rewards/chosen": -1.204367756843567,
381
+ "rewards/margins": 0.37038713693618774,
382
+ "rewards/rejected": -1.5747547149658203,
383
  "step": 200,
384
+ "use_label": 3062.449951171875
385
  },
386
  {
387
  "epoch": 0.42,
388
+ "eval_logits/chosen": 2.09150767326355,
389
+ "eval_logits/rejected": 2.1625571250915527,
390
+ "eval_logps/chosen": -231.13876342773438,
391
+ "eval_logps/rejected": -311.849365234375,
392
+ "eval_loss": 0.504833996295929,
393
+ "eval_pred_label": 373.4375,
394
+ "eval_rewards/accuracies": 0.359375,
395
+ "eval_rewards/chosen": -1.6722650527954102,
396
+ "eval_rewards/margins": 0.6742992401123047,
397
+ "eval_rewards/rejected": -2.346564292907715,
398
+ "eval_runtime": 125.4772,
399
+ "eval_samples_per_second": 15.939,
400
  "eval_steps_per_second": 0.255,
401
+ "eval_use_label": 3214.5625,
402
  "step": 200
403
  },
404
  {
405
  "epoch": 0.44,
406
+ "grad_norm": 2.859375,
407
  "learning_rate": 3.437648009023905e-06,
408
+ "logits/chosen": 1.6046574115753174,
409
+ "logits/rejected": 1.5769492387771606,
410
+ "logps/chosen": -200.22195434570312,
411
+ "logps/rejected": -262.50018310546875,
412
+ "loss": 0.5179,
413
+ "pred_label": 434.75,
414
+ "rewards/accuracies": 0.3062500059604645,
415
+ "rewards/chosen": -1.4424717426300049,
416
+ "rewards/margins": 0.5527372360229492,
417
+ "rewards/rejected": -1.995208740234375,
418
  "step": 210,
419
+ "use_label": 3359.25
420
  },
421
  {
422
  "epoch": 0.46,
423
+ "grad_norm": 3.46875,
424
  "learning_rate": 3.265574537815398e-06,
425
+ "logits/chosen": 1.2694753408432007,
426
+ "logits/rejected": 1.4022200107574463,
427
+ "logps/chosen": -256.5951232910156,
428
+ "logps/rejected": -258.8177795410156,
429
+ "loss": 0.495,
430
+ "pred_label": 487.0,
431
+ "rewards/accuracies": 0.28125,
432
+ "rewards/chosen": -1.7696645259857178,
433
+ "rewards/margins": 0.1548328697681427,
434
+ "rewards/rejected": -1.9244972467422485,
435
  "step": 220,
436
+ "use_label": 3467.0
437
  },
438
  {
439
  "epoch": 0.48,
440
+ "grad_norm": 4.0,
441
  "learning_rate": 3.089397338773569e-06,
442
+ "logits/chosen": 1.3947041034698486,
443
+ "logits/rejected": 1.5894306898117065,
444
+ "logps/chosen": -181.77613830566406,
445
+ "logps/rejected": -231.12332153320312,
446
+ "loss": 0.518,
447
+ "pred_label": 532.0499877929688,
448
+ "rewards/accuracies": 0.2874999940395355,
449
+ "rewards/chosen": -1.1734154224395752,
450
+ "rewards/margins": 0.5102296471595764,
451
+ "rewards/rejected": -1.683645248413086,
452
  "step": 230,
453
+ "use_label": 3581.949951171875
454
  },
455
  {
456
  "epoch": 0.5,
457
+ "grad_norm": 3.765625,
458
  "learning_rate": 2.9100607788275547e-06,
459
+ "logits/chosen": 1.8860304355621338,
460
+ "logits/rejected": 1.7700283527374268,
461
+ "logps/chosen": -187.30068969726562,
462
+ "logps/rejected": -253.653564453125,
463
+ "loss": 0.513,
464
+ "pred_label": 577.4000244140625,
465
+ "rewards/accuracies": 0.34375,
466
+ "rewards/chosen": -1.1826814413070679,
467
+ "rewards/margins": 0.6116172671318054,
468
+ "rewards/rejected": -1.794298768043518,
469
  "step": 240,
470
+ "use_label": 3696.60009765625
471
  },
472
  {
473
  "epoch": 0.52,
474
+ "grad_norm": 2.96875,
475
  "learning_rate": 2.72852616010567e-06,
476
+ "logits/chosen": 1.8056014776229858,
477
+ "logits/rejected": 1.8892968893051147,
478
+ "logps/chosen": -222.9417266845703,
479
+ "logps/rejected": -281.18231201171875,
480
+ "loss": 0.5089,
481
+ "pred_label": 626.8250122070312,
482
+ "rewards/accuracies": 0.39375001192092896,
483
+ "rewards/chosen": -1.4954261779785156,
484
+ "rewards/margins": 0.6240721940994263,
485
+ "rewards/rejected": -2.1194984912872314,
486
  "step": 250,
487
+ "use_label": 3807.175048828125
488
  },
489
  {
490
  "epoch": 0.54,
491
+ "grad_norm": 2.78125,
492
  "learning_rate": 2.5457665670441937e-06,
493
+ "logits/chosen": 2.1560091972351074,
494
+ "logits/rejected": 2.1253867149353027,
495
+ "logps/chosen": -209.4928741455078,
496
+ "logps/rejected": -288.0691223144531,
497
+ "loss": 0.4966,
498
+ "pred_label": 678.25,
499
+ "rewards/accuracies": 0.3375000059604645,
500
+ "rewards/chosen": -1.4338725805282593,
501
+ "rewards/margins": 0.7023388743400574,
502
+ "rewards/rejected": -2.136211395263672,
503
  "step": 260,
504
+ "use_label": 3915.75
505
  },
506
  {
507
  "epoch": 0.57,
508
+ "grad_norm": 7.0625,
509
  "learning_rate": 2.3627616503391813e-06,
510
+ "logits/chosen": 2.430591583251953,
511
+ "logits/rejected": 2.3143506050109863,
512
+ "logps/chosen": -224.49380493164062,
513
+ "logps/rejected": -281.049072265625,
514
+ "loss": 0.4953,
515
+ "pred_label": 728.0750122070312,
516
+ "rewards/accuracies": 0.3062500059604645,
517
+ "rewards/chosen": -1.5157802104949951,
518
+ "rewards/margins": 0.600957453250885,
519
+ "rewards/rejected": -2.1167378425598145,
520
  "step": 270,
521
+ "use_label": 4025.925048828125
522
  },
523
  {
524
  "epoch": 0.59,
525
+ "grad_norm": 2.84375,
526
  "learning_rate": 2.1804923757009885e-06,
527
+ "logits/chosen": 2.489325523376465,
528
+ "logits/rejected": 2.6676642894744873,
529
+ "logps/chosen": -211.58285522460938,
530
+ "logps/rejected": -260.68853759765625,
531
+ "loss": 0.5164,
532
+ "pred_label": 778.6749877929688,
533
+ "rewards/accuracies": 0.32499998807907104,
534
+ "rewards/chosen": -1.4607734680175781,
535
+ "rewards/margins": 0.5169156193733215,
536
+ "rewards/rejected": -1.9776890277862549,
537
  "step": 280,
538
+ "use_label": 4135.3251953125
539
  },
540
  {
541
  "epoch": 0.61,
542
+ "grad_norm": 3.890625,
543
  "learning_rate": 1.9999357655598894e-06,
544
+ "logits/chosen": 2.1360135078430176,
545
+ "logits/rejected": 2.1046082973480225,
546
+ "logps/chosen": -214.5660400390625,
547
+ "logps/rejected": -288.2430419921875,
548
+ "loss": 0.4926,
549
+ "pred_label": 830.5,
550
+ "rewards/accuracies": 0.32499998807907104,
551
+ "rewards/chosen": -1.4801760911941528,
552
+ "rewards/margins": 0.6386412978172302,
553
+ "rewards/rejected": -2.1188173294067383,
554
  "step": 290,
555
+ "use_label": 4243.5
556
  },
557
  {
558
  "epoch": 0.63,
559
+ "grad_norm": 5.6875,
560
  "learning_rate": 1.8220596619089576e-06,
561
+ "logits/chosen": 2.7119574546813965,
562
+ "logits/rejected": 2.501838207244873,
563
+ "logps/chosen": -269.5751953125,
564
+ "logps/rejected": -370.7685852050781,
565
+ "loss": 0.4799,
566
+ "pred_label": 889.2750244140625,
567
+ "rewards/accuracies": 0.36250001192092896,
568
+ "rewards/chosen": -1.9198087453842163,
569
+ "rewards/margins": 0.8466728329658508,
570
+ "rewards/rejected": -2.766481876373291,
571
  "step": 300,
572
+ "use_label": 4344.72509765625
573
  },
574
  {
575
  "epoch": 0.63,
576
+ "eval_logits/chosen": 3.1510589122772217,
577
+ "eval_logits/rejected": 3.222506284713745,
578
+ "eval_logps/chosen": -242.9697723388672,
579
+ "eval_logps/rejected": -343.4284973144531,
580
+ "eval_loss": 0.48854950070381165,
581
+ "eval_pred_label": 969.25,
582
+ "eval_rewards/accuracies": 0.3359375,
583
+ "eval_rewards/chosen": -1.7905751466751099,
584
+ "eval_rewards/margins": 0.871780276298523,
585
+ "eval_rewards/rejected": -2.662355422973633,
586
+ "eval_runtime": 125.4501,
587
+ "eval_samples_per_second": 15.943,
588
  "eval_steps_per_second": 0.255,
589
+ "eval_use_label": 4474.75,
590
  "step": 300
591
  },
592
  {
593
  "epoch": 0.65,
594
+ "grad_norm": 2.6875,
595
  "learning_rate": 1.647817538357072e-06,
596
+ "logits/chosen": 2.556201457977295,
597
+ "logits/rejected": 2.59236478805542,
598
+ "logps/chosen": -194.0545654296875,
599
+ "logps/rejected": -294.6336975097656,
600
+ "loss": 0.4854,
601
+ "pred_label": 1040.375,
602
+ "rewards/accuracies": 0.35624998807907104,
603
+ "rewards/chosen": -1.3845796585083008,
604
+ "rewards/margins": 0.8792532682418823,
605
+ "rewards/rejected": -2.2638330459594727,
606
  "step": 310,
607
+ "use_label": 4609.625
608
  },
609
  {
610
  "epoch": 0.67,
611
+ "grad_norm": 4.75,
612
  "learning_rate": 1.4781433892011132e-06,
613
+ "logits/chosen": 2.2738680839538574,
614
+ "logits/rejected": 2.5079522132873535,
615
+ "logps/chosen": -225.6286163330078,
616
+ "logps/rejected": -287.9954833984375,
617
+ "loss": 0.4846,
618
+ "pred_label": 1091.550048828125,
619
+ "rewards/accuracies": 0.34375,
620
+ "rewards/chosen": -1.5255814790725708,
621
+ "rewards/margins": 0.6824158430099487,
622
+ "rewards/rejected": -2.2079973220825195,
623
  "step": 320,
624
+ "use_label": 4718.4501953125
625
  },
626
  {
627
  "epoch": 0.69,
628
+ "grad_norm": 5.9375,
629
  "learning_rate": 1.3139467229135999e-06,
630
+ "logits/chosen": 2.808880567550659,
631
+ "logits/rejected": 2.8045334815979004,
632
+ "logps/chosen": -228.8271026611328,
633
+ "logps/rejected": -294.6129150390625,
634
+ "loss": 0.4772,
635
+ "pred_label": 1143.199951171875,
636
+ "rewards/accuracies": 0.3499999940395355,
637
+ "rewards/chosen": -1.587189793586731,
638
+ "rewards/margins": 0.6728593111038208,
639
+ "rewards/rejected": -2.2600488662719727,
640
  "step": 330,
641
+ "use_label": 4826.7998046875
642
  },
643
  {
644
  "epoch": 0.71,
645
+ "grad_norm": 2.765625,
646
  "learning_rate": 1.1561076868822756e-06,
647
+ "logits/chosen": 2.5798306465148926,
648
+ "logits/rejected": 2.4416697025299072,
649
+ "logps/chosen": -287.6748352050781,
650
+ "logps/rejected": -322.15899658203125,
651
+ "loss": 0.4893,
652
+ "pred_label": 1191.0999755859375,
653
+ "rewards/accuracies": 0.2750000059604645,
654
+ "rewards/chosen": -2.0293564796447754,
655
+ "rewards/margins": 0.42713117599487305,
656
+ "rewards/rejected": -2.4564874172210693,
657
  "step": 340,
658
+ "use_label": 4938.89990234375
659
  },
660
  {
661
  "epoch": 0.73,
662
+ "grad_norm": 3.15625,
663
  "learning_rate": 1.0054723495346484e-06,
664
+ "logits/chosen": 2.786007881164551,
665
+ "logits/rejected": 2.829763174057007,
666
+ "logps/chosen": -366.9530944824219,
667
+ "logps/rejected": -448.0956115722656,
668
+ "loss": 0.4576,
669
+ "pred_label": 1257.5250244140625,
670
+ "rewards/accuracies": 0.3187499940395355,
671
+ "rewards/chosen": -2.775364637374878,
672
+ "rewards/margins": 0.8529101610183716,
673
+ "rewards/rejected": -3.628274440765381,
674
  "step": 350,
675
+ "use_label": 5032.47509765625
676
  },
677
  {
678
  "epoch": 0.75,
679
+ "grad_norm": 2.9375,
680
  "learning_rate": 8.628481651367876e-07,
681
+ "logits/chosen": 2.9445724487304688,
682
+ "logits/rejected": 3.1931867599487305,
683
+ "logps/chosen": -301.0486145019531,
684
+ "logps/rejected": -423.34130859375,
685
+ "loss": 0.463,
686
+ "pred_label": 1317.699951171875,
687
+ "rewards/accuracies": 0.375,
688
+ "rewards/chosen": -2.374109983444214,
689
+ "rewards/margins": 1.1370208263397217,
690
+ "rewards/rejected": -3.5111305713653564,
691
  "step": 360,
692
+ "use_label": 5132.2998046875
693
  },
694
  {
695
  "epoch": 0.77,
696
+ "grad_norm": 4.15625,
697
  "learning_rate": 7.289996455765749e-07,
698
+ "logits/chosen": 3.396423816680908,
699
+ "logits/rejected": 3.5762810707092285,
700
+ "logps/chosen": -308.2368469238281,
701
+ "logps/rejected": -430.18524169921875,
702
+ "loss": 0.4574,
703
+ "pred_label": 1379.199951171875,
704
+ "rewards/accuracies": 0.3499999940395355,
705
+ "rewards/chosen": -2.4280202388763428,
706
+ "rewards/margins": 1.1946780681610107,
707
+ "rewards/rejected": -3.6226983070373535,
708
  "step": 370,
709
+ "use_label": 5230.7998046875
710
  },
711
  {
712
  "epoch": 0.8,
713
+ "grad_norm": 5.90625,
714
  "learning_rate": 6.046442623320145e-07,
715
+ "logits/chosen": 2.96122407913208,
716
+ "logits/rejected": 2.9667248725891113,
717
+ "logps/chosen": -357.73297119140625,
718
+ "logps/rejected": -534.4653930664062,
719
+ "loss": 0.4633,
720
+ "pred_label": 1439.324951171875,
721
+ "rewards/accuracies": 0.3062500059604645,
722
+ "rewards/chosen": -2.918975830078125,
723
+ "rewards/margins": 1.5620373487472534,
724
+ "rewards/rejected": -4.481013298034668,
725
  "step": 380,
726
+ "use_label": 5330.6748046875
727
  },
728
  {
729
  "epoch": 0.82,
730
+ "grad_norm": 6.625,
731
  "learning_rate": 4.904486005914027e-07,
732
+ "logits/chosen": 3.662972927093506,
733
+ "logits/rejected": 3.2519752979278564,
734
+ "logps/chosen": -526.6204833984375,
735
+ "logps/rejected": -657.4584350585938,
736
+ "loss": 0.4314,
737
+ "pred_label": 1506.574951171875,
738
+ "rewards/accuracies": 0.32499998807907104,
739
+ "rewards/chosen": -4.338021278381348,
740
+ "rewards/margins": 1.406864047050476,
741
+ "rewards/rejected": -5.744885444641113,
742
  "step": 390,
743
+ "use_label": 5423.4248046875
744
  },
745
  {
746
  "epoch": 0.84,
747
+ "grad_norm": 3.71875,
748
  "learning_rate": 3.8702478614051353e-07,
749
+ "logits/chosen": 3.13154935836792,
750
+ "logits/rejected": 3.3134002685546875,
751
+ "logps/chosen": -389.10174560546875,
752
+ "logps/rejected": -441.57305908203125,
753
+ "loss": 0.4443,
754
+ "pred_label": 1566.699951171875,
755
+ "rewards/accuracies": 0.3125,
756
+ "rewards/chosen": -3.166107654571533,
757
+ "rewards/margins": 0.6149949431419373,
758
+ "rewards/rejected": -3.7811026573181152,
759
  "step": 400,
760
+ "use_label": 5523.2998046875
761
  },
762
  {
763
  "epoch": 0.84,
764
+ "eval_logits/chosen": 4.387378692626953,
765
+ "eval_logits/rejected": 4.5207109451293945,
766
+ "eval_logps/chosen": -371.9975891113281,
767
+ "eval_logps/rejected": -566.3418579101562,
768
+ "eval_loss": 0.44047147035598755,
769
+ "eval_pred_label": 1650.28125,
770
+ "eval_rewards/accuracies": 0.34375,
771
+ "eval_rewards/chosen": -3.080853223800659,
772
+ "eval_rewards/margins": 1.8106356859207153,
773
+ "eval_rewards/rejected": -4.891489028930664,
774
+ "eval_runtime": 125.426,
775
+ "eval_samples_per_second": 15.946,
776
+ "eval_steps_per_second": 0.255,
777
+ "eval_use_label": 5649.71875,
778
  "step": 400
779
  },
780
  {
781
  "epoch": 0.86,
782
+ "grad_norm": 4.78125,
783
  "learning_rate": 2.9492720416985004e-07,
784
+ "logits/chosen": 3.4670283794403076,
785
+ "logits/rejected": 3.4501025676727295,
786
+ "logps/chosen": -364.8808898925781,
787
+ "logps/rejected": -489.7000427246094,
788
+ "loss": 0.4407,
789
+ "pred_label": 1735.4749755859375,
790
+ "rewards/accuracies": 0.33125001192092896,
791
+ "rewards/chosen": -3.034578800201416,
792
+ "rewards/margins": 1.265229344367981,
793
+ "rewards/rejected": -4.299808025360107,
794
  "step": 410,
795
+ "use_label": 5770.52490234375
796
  },
797
  {
798
  "epoch": 0.88,
799
+ "grad_norm": 2.359375,
800
  "learning_rate": 2.1464952759020857e-07,
801
+ "logits/chosen": 3.780207872390747,
802
+ "logits/rejected": 3.6938164234161377,
803
+ "logps/chosen": -389.7974548339844,
804
+ "logps/rejected": -390.455078125,
805
+ "loss": 0.46,
806
+ "pred_label": 1791.824951171875,
807
+ "rewards/accuracies": 0.20624999701976776,
808
+ "rewards/chosen": -3.288583278656006,
809
+ "rewards/margins": 0.10213696956634521,
810
+ "rewards/rejected": -3.3907198905944824,
811
  "step": 420,
812
+ "use_label": 5874.1748046875
813
  },
814
  {
815
  "epoch": 0.9,
816
+ "grad_norm": 4.625,
817
  "learning_rate": 1.4662207078575685e-07,
818
+ "logits/chosen": 3.7266852855682373,
819
+ "logits/rejected": 3.6461174488067627,
820
+ "logps/chosen": -460.2088928222656,
821
+ "logps/rejected": -562.7196655273438,
822
+ "loss": 0.4448,
823
+ "pred_label": 1855.1500244140625,
824
+ "rewards/accuracies": 0.39375001192092896,
825
+ "rewards/chosen": -3.6094202995300293,
826
+ "rewards/margins": 1.2242047786712646,
827
+ "rewards/rejected": -4.833625316619873,
828
  "step": 430,
829
+ "use_label": 5970.85009765625
830
  },
831
  {
832
  "epoch": 0.92,
833
+ "grad_norm": 3.953125,
834
  "learning_rate": 9.120948298936422e-08,
835
+ "logits/chosen": 3.6001758575439453,
836
+ "logits/rejected": 3.7878482341766357,
837
+ "logps/chosen": -407.4084167480469,
838
+ "logps/rejected": -561.12744140625,
839
+ "loss": 0.4359,
840
+ "pred_label": 1919.800048828125,
841
+ "rewards/accuracies": 0.3125,
842
+ "rewards/chosen": -3.4050445556640625,
843
+ "rewards/margins": 1.4556002616882324,
844
+ "rewards/rejected": -4.860644817352295,
845
  "step": 440,
846
+ "use_label": 6066.2001953125
847
  },
848
  {
849
  "epoch": 0.94,
850
+ "grad_norm": 3.65625,
851
  "learning_rate": 4.870879364444109e-08,
852
+ "logits/chosen": 4.037863254547119,
853
+ "logits/rejected": 3.809945583343506,
854
+ "logps/chosen": -381.8726501464844,
855
+ "logps/rejected": -569.0548706054688,
856
+ "loss": 0.4494,
857
+ "pred_label": 1975.4000244140625,
858
  "rewards/accuracies": 0.35624998807907104,
859
+ "rewards/chosen": -3.096148729324341,
860
+ "rewards/margins": 1.7115033864974976,
861
+ "rewards/rejected": -4.807651996612549,
862
  "step": 450,
863
+ "use_label": 6170.60009765625
864
  },
865
  {
866
  "epoch": 0.96,
867
+ "grad_norm": 4.34375,
868
  "learning_rate": 1.93478202307823e-08,
869
+ "logits/chosen": 3.7793803215026855,
870
+ "logits/rejected": 3.7878577709198,
871
+ "logps/chosen": -287.76025390625,
872
+ "logps/rejected": -431.52020263671875,
873
+ "loss": 0.4442,
874
+ "pred_label": 2037.375,
875
+ "rewards/accuracies": 0.2750000059604645,
876
+ "rewards/chosen": -2.4164037704467773,
877
+ "rewards/margins": 1.265346646308899,
878
+ "rewards/rejected": -3.681750535964966,
879
  "step": 460,
880
+ "use_label": 6268.625
881
  },
882
  {
883
  "epoch": 0.98,
884
+ "grad_norm": 5.96875,
885
  "learning_rate": 3.283947088983663e-09,
886
+ "logits/chosen": 3.807328701019287,
887
+ "logits/rejected": 3.645596981048584,
888
+ "logps/chosen": -342.3217468261719,
889
+ "logps/rejected": -512.537109375,
890
+ "loss": 0.4555,
891
+ "pred_label": 2090.22509765625,
892
+ "rewards/accuracies": 0.32499998807907104,
893
+ "rewards/chosen": -2.7499260902404785,
894
+ "rewards/margins": 1.6848747730255127,
895
+ "rewards/rejected": -4.4348015785217285,
896
  "step": 470,
897
+ "use_label": 6375.77490234375
898
  },
899
  {
900
  "epoch": 1.0,
901
  "step": 477,
902
  "total_flos": 0.0,
903
+ "train_loss": 0.5378267840019562,
904
+ "train_runtime": 9600.9753,
905
+ "train_samples_per_second": 6.368,
906
+ "train_steps_per_second": 0.05
907
  }
908
  ],
909
  "logging_steps": 10,