jikaixuan commited on
Commit
3ec4a5c
·
verified ·
1 Parent(s): 945d9e5

Model save

Browse files
Files changed (5) hide show
  1. README.md +16 -23
  2. adapter_model.safetensors +1 -1
  3. all_results.json +4 -19
  4. train_results.json +4 -4
  5. trainer_state.json +600 -600
README.md CHANGED
@@ -2,17 +2,10 @@
2
  license: apache-2.0
3
  library_name: peft
4
  tags:
5
- - alignment-handbook
6
  - trl
7
  - dpo
8
  - generated_from_trainer
9
- - trl
10
- - dpo
11
- - alignment-handbook
12
- - generated_from_trainer
13
  base_model: mistralai/Mistral-7B-v0.1
14
- datasets:
15
- - HuggingFaceH4/ultrafeedback_binarized
16
  model-index:
17
  - name: zephyr-7b
18
  results: []
@@ -23,19 +16,19 @@ should probably proofread and complete it, then remove this comment. -->
23
 
24
  # zephyr-7b
25
 
26
- This model is a fine-tuned version of [alignment-handbook/zephyr-7b-sft-qlora](https://huggingface.co/alignment-handbook/zephyr-7b-sft-qlora) on the HuggingFaceH4/ultrafeedback_binarized dataset.
27
  It achieves the following results on the evaluation set:
28
- - Loss: 0.6152
29
- - Rewards/chosen: -0.5055
30
- - Rewards/rejected: -0.8740
31
- - Rewards/accuracies: 0.3789
32
- - Rewards/margins: 0.3685
33
- - Logps/rejected: -164.5882
34
- - Logps/chosen: -114.4584
35
- - Logits/rejected: 1.5984
36
- - Logits/chosen: 1.5070
37
- - Use Label: 0.0
38
- - Pred Label: 0.0
39
 
40
  ## Model description
41
 
@@ -72,10 +65,10 @@ The following hyperparameters were used during training:
72
 
73
  | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen | Use Label | Pred Label |
74
  |:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|:---------:|:----------:|
75
- | 0.6551 | 0.21 | 100 | 0.6526 | -0.2364 | -0.3728 | 0.3359 | 0.1364 | -114.4721 | -87.5525 | -1.7460 | -1.7620 | 0.0 | 0.0 |
76
- | 0.6376 | 0.42 | 200 | 0.6289 | -0.3405 | -0.6072 | 0.3672 | 0.2667 | -137.9142 | -97.9614 | 0.0432 | -0.0238 | 0.0 | 0.0 |
77
- | 0.6196 | 0.63 | 300 | 0.6189 | -0.3871 | -0.7293 | 0.375 | 0.3422 | -150.1250 | -102.6218 | 1.1831 | 1.0945 | 0.0 | 0.0 |
78
- | 0.6139 | 0.84 | 400 | 0.6157 | -0.4865 | -0.8500 | 0.3711 | 0.3636 | -162.1976 | -112.5605 | 1.5453 | 1.4533 | 0.0 | 0.0 |
79
 
80
 
81
  ### Framework versions
 
2
  license: apache-2.0
3
  library_name: peft
4
  tags:
 
5
  - trl
6
  - dpo
7
  - generated_from_trainer
 
 
 
 
8
  base_model: mistralai/Mistral-7B-v0.1
 
 
9
  model-index:
10
  - name: zephyr-7b
11
  results: []
 
16
 
17
  # zephyr-7b
18
 
19
+ This model is a fine-tuned version of [mistralai/Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1) on the None dataset.
20
  It achieves the following results on the evaluation set:
21
+ - Loss: 0.5769
22
+ - Rewards/chosen: -0.6646
23
+ - Rewards/rejected: -1.1353
24
+ - Rewards/accuracies: 0.3711
25
+ - Rewards/margins: 0.4707
26
+ - Logps/rejected: -190.7267
27
+ - Logps/chosen: -130.3719
28
+ - Logits/rejected: 1.8500
29
+ - Logits/chosen: 1.7576
30
+ - Use Label: 6517.1875
31
+ - Pred Label: 782.8125
32
 
33
  ## Model description
34
 
 
65
 
66
  | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen | Use Label | Pred Label |
67
  |:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|:---------:|:----------:|
68
+ | 0.6531 | 0.21 | 100 | 0.6528 | -0.1643 | -0.2945 | 0.3633 | 0.1303 | -106.6470 | -80.3385 | -1.7198 | -1.7354 | 1725.3125 | 6.6875 |
69
+ | 0.6041 | 0.42 | 200 | 0.5936 | -0.7144 | -1.1047 | 0.3516 | 0.3903 | -187.6596 | -135.3474 | 0.9784 | 0.8864 | 3420.5938 | 167.4062 |
70
+ | 0.5763 | 0.63 | 300 | 0.5773 | -0.7930 | -1.2317 | 0.3516 | 0.4387 | -200.3615 | -143.2137 | 1.7526 | 1.6599 | 4991.2812 | 452.7188 |
71
+ | 0.5836 | 0.84 | 400 | 0.5769 | -0.6646 | -1.1353 | 0.3711 | 0.4707 | -190.7267 | -130.3719 | 1.8500 | 1.7576 | 6517.1875 | 782.8125 |
72
 
73
 
74
  ### Framework versions
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5a370d44f4bd7644f05dddbceaabd4c6255b0c5236b8eded581cb67596ef082a
3
  size 671150064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:96028cf9e913d832d0a70759eea27b9a5c849327ec7dfdcb4154ba5214730296
3
  size 671150064
all_results.json CHANGED
@@ -1,23 +1,8 @@
1
  {
2
  "epoch": 1.0,
3
- "eval_logits/chosen": 1.507018804550171,
4
- "eval_logits/rejected": 1.598363995552063,
5
- "eval_logps/chosen": -114.45843505859375,
6
- "eval_logps/rejected": -164.58822631835938,
7
- "eval_loss": 0.6152364015579224,
8
- "eval_pred_label": 0.0,
9
- "eval_rewards/accuracies": 0.37890625,
10
- "eval_rewards/chosen": -0.5054618120193481,
11
- "eval_rewards/margins": 0.3684910833835602,
12
- "eval_rewards/rejected": -0.8739528656005859,
13
- "eval_runtime": 125.1233,
14
- "eval_samples": 2000,
15
- "eval_samples_per_second": 15.984,
16
- "eval_steps_per_second": 0.256,
17
- "eval_use_label": 0.0,
18
- "train_loss": 0.6357159084743924,
19
- "train_runtime": 9601.7268,
20
  "train_samples": 61135,
21
- "train_samples_per_second": 6.367,
22
- "train_steps_per_second": 0.05
23
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "train_loss": 0.6110695428068533,
4
+ "train_runtime": 9999.3279,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  "train_samples": 61135,
6
+ "train_samples_per_second": 6.114,
7
+ "train_steps_per_second": 0.048
8
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 1.0,
3
- "train_loss": 0.6357159084743924,
4
- "train_runtime": 9601.7268,
5
  "train_samples": 61135,
6
- "train_samples_per_second": 6.367,
7
- "train_steps_per_second": 0.05
8
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "train_loss": 0.6110695428068533,
4
+ "train_runtime": 9999.3279,
5
  "train_samples": 61135,
6
+ "train_samples_per_second": 6.114,
7
+ "train_steps_per_second": 0.048
8
  }
trainer_state.json CHANGED
@@ -23,887 +23,887 @@
23
  "rewards/margins": 0.0,
24
  "rewards/rejected": 0.0,
25
  "step": 1,
26
- "use_label": 0.0
27
  },
28
  {
29
  "epoch": 0.02,
30
  "grad_norm": 0.4609375,
31
  "learning_rate": 1.0416666666666667e-06,
32
- "logits/chosen": -2.2421462535858154,
33
- "logits/rejected": -2.2770614624023438,
34
- "logps/chosen": -51.98179626464844,
35
- "logps/rejected": -64.9604263305664,
36
  "loss": 0.6929,
37
  "pred_label": 0.0,
38
- "rewards/accuracies": 0.2222222238779068,
39
- "rewards/chosen": 0.001975727966055274,
40
- "rewards/margins": 0.00047667179023846984,
41
- "rewards/rejected": 0.001499056350439787,
42
  "step": 10,
43
- "use_label": 0.0
44
  },
45
  {
46
  "epoch": 0.04,
47
  "grad_norm": 0.39453125,
48
  "learning_rate": 2.0833333333333334e-06,
49
- "logits/chosen": -2.2520272731781006,
50
- "logits/rejected": -2.255510091781616,
51
- "logps/chosen": -62.492515563964844,
52
- "logps/rejected": -72.63607788085938,
53
  "loss": 0.6919,
54
  "pred_label": 0.0,
55
  "rewards/accuracies": 0.2874999940395355,
56
- "rewards/chosen": 0.01601376011967659,
57
- "rewards/margins": 0.0011284304782748222,
58
- "rewards/rejected": 0.014885328710079193,
59
  "step": 20,
60
- "use_label": 0.0
61
  },
62
  {
63
  "epoch": 0.06,
64
- "grad_norm": 0.5078125,
65
  "learning_rate": 3.125e-06,
66
- "logits/chosen": -2.3422012329101562,
67
- "logits/rejected": -2.3548905849456787,
68
- "logps/chosen": -79.14694213867188,
69
- "logps/rejected": -98.82722473144531,
70
- "loss": 0.6898,
71
- "pred_label": 0.0,
72
- "rewards/accuracies": 0.2874999940395355,
73
- "rewards/chosen": 0.030949687585234642,
74
- "rewards/margins": 0.0029636542312800884,
75
- "rewards/rejected": 0.027986034750938416,
76
  "step": 30,
77
- "use_label": 0.0
78
  },
79
  {
80
  "epoch": 0.08,
81
- "grad_norm": 0.515625,
82
  "learning_rate": 4.166666666666667e-06,
83
- "logits/chosen": -2.322833776473999,
84
- "logits/rejected": -2.3010501861572266,
85
- "logps/chosen": -82.85880279541016,
86
- "logps/rejected": -82.40392303466797,
87
  "loss": 0.6866,
88
  "pred_label": 0.0,
89
  "rewards/accuracies": 0.2874999940395355,
90
- "rewards/chosen": 0.033333443105220795,
91
- "rewards/margins": 0.011918319389224052,
92
- "rewards/rejected": 0.021415119990706444,
93
  "step": 40,
94
- "use_label": 0.0
95
  },
96
  {
97
  "epoch": 0.1,
98
- "grad_norm": 0.67578125,
99
  "learning_rate": 4.999731868769027e-06,
100
- "logits/chosen": -2.241189956665039,
101
- "logits/rejected": -2.263849973678589,
102
- "logps/chosen": -67.93062591552734,
103
- "logps/rejected": -81.85546875,
104
  "loss": 0.6805,
105
  "pred_label": 0.0,
106
  "rewards/accuracies": 0.32499998807907104,
107
- "rewards/chosen": 0.009002490900456905,
108
- "rewards/margins": 0.03016103245317936,
109
- "rewards/rejected": -0.02115854248404503,
110
  "step": 50,
111
- "use_label": 0.0
112
  },
113
  {
114
  "epoch": 0.13,
115
- "grad_norm": 1.09375,
116
  "learning_rate": 4.9903533134293035e-06,
117
- "logits/chosen": -2.218756914138794,
118
- "logits/rejected": -2.1594481468200684,
119
- "logps/chosen": -62.0407600402832,
120
- "logps/rejected": -71.9369888305664,
121
- "loss": 0.6748,
122
  "pred_label": 0.0,
123
  "rewards/accuracies": 0.3062500059604645,
124
- "rewards/chosen": -0.0231451578438282,
125
- "rewards/margins": 0.04653460532426834,
126
- "rewards/rejected": -0.06967976689338684,
127
  "step": 60,
128
- "use_label": 0.0
129
  },
130
  {
131
  "epoch": 0.15,
132
- "grad_norm": 0.8984375,
133
  "learning_rate": 4.967625656594782e-06,
134
- "logits/chosen": -2.08909273147583,
135
- "logits/rejected": -2.088801383972168,
136
- "logps/chosen": -68.09326171875,
137
- "logps/rejected": -81.9454116821289,
138
- "loss": 0.6684,
139
  "pred_label": 0.0,
140
  "rewards/accuracies": 0.25,
141
- "rewards/chosen": -0.12382155656814575,
142
- "rewards/margins": 0.03761869668960571,
143
- "rewards/rejected": -0.16144026815891266,
144
  "step": 70,
145
- "use_label": 0.0
146
  },
147
  {
148
  "epoch": 0.17,
149
- "grad_norm": 1.15625,
150
  "learning_rate": 4.93167072587771e-06,
151
- "logits/chosen": -2.20400071144104,
152
- "logits/rejected": -2.1452622413635254,
153
- "logps/chosen": -55.867881774902344,
154
- "logps/rejected": -70.91771697998047,
155
- "loss": 0.6588,
156
- "pred_label": 0.0,
157
- "rewards/accuracies": 0.26875001192092896,
158
- "rewards/chosen": -0.0733698159456253,
159
- "rewards/margins": 0.10403277724981308,
160
- "rewards/rejected": -0.17740261554718018,
161
  "step": 80,
162
- "use_label": 0.0
163
  },
164
  {
165
  "epoch": 0.19,
166
- "grad_norm": 1.0546875,
167
  "learning_rate": 4.882681251368549e-06,
168
- "logits/chosen": -1.991231918334961,
169
- "logits/rejected": -1.9964717626571655,
170
- "logps/chosen": -72.28443908691406,
171
- "logps/rejected": -90.79218292236328,
172
- "loss": 0.6587,
173
- "pred_label": 0.0,
174
- "rewards/accuracies": 0.30000001192092896,
175
- "rewards/chosen": -0.13902384042739868,
176
- "rewards/margins": 0.08125626295804977,
177
- "rewards/rejected": -0.22028008103370667,
178
  "step": 90,
179
- "use_label": 0.0
180
  },
181
  {
182
  "epoch": 0.21,
183
- "grad_norm": 2.359375,
184
  "learning_rate": 4.8209198325401815e-06,
185
- "logits/chosen": -1.9231764078140259,
186
- "logits/rejected": -1.9043807983398438,
187
- "logps/chosen": -103.5636978149414,
188
- "logps/rejected": -96.08602142333984,
189
- "loss": 0.6551,
190
- "pred_label": 0.0,
191
- "rewards/accuracies": 0.35624998807907104,
192
- "rewards/chosen": -0.2353379726409912,
193
- "rewards/margins": 0.08685441315174103,
194
- "rewards/rejected": -0.32219237089157104,
195
  "step": 100,
196
- "use_label": 0.0
197
  },
198
  {
199
  "epoch": 0.21,
200
- "eval_logits/chosen": -1.762041687965393,
201
- "eval_logits/rejected": -1.7460479736328125,
202
- "eval_logps/chosen": -87.55253601074219,
203
- "eval_logps/rejected": -114.47212219238281,
204
- "eval_loss": 0.652633547782898,
205
- "eval_pred_label": 0.0,
206
- "eval_rewards/accuracies": 0.3359375,
207
- "eval_rewards/chosen": -0.23640292882919312,
208
- "eval_rewards/margins": 0.136388897895813,
209
- "eval_rewards/rejected": -0.3727918267250061,
210
- "eval_runtime": 125.4491,
211
- "eval_samples_per_second": 15.943,
212
- "eval_steps_per_second": 0.255,
213
- "eval_use_label": 0.0,
214
  "step": 100
215
  },
216
  {
217
  "epoch": 0.23,
218
- "grad_norm": 1.59375,
219
  "learning_rate": 4.746717530629565e-06,
220
- "logits/chosen": -1.7847106456756592,
221
- "logits/rejected": -1.7590484619140625,
222
- "logps/chosen": -85.73925018310547,
223
- "logps/rejected": -106.20509338378906,
224
- "loss": 0.6557,
225
- "pred_label": 0.0,
226
- "rewards/accuracies": 0.3499999940395355,
227
- "rewards/chosen": -0.14638465642929077,
228
- "rewards/margins": 0.12975916266441345,
229
- "rewards/rejected": -0.2761438190937042,
230
  "step": 110,
231
- "use_label": 0.0
232
  },
233
  {
234
  "epoch": 0.25,
235
- "grad_norm": 1.828125,
236
  "learning_rate": 4.660472094042121e-06,
237
- "logits/chosen": -1.1902318000793457,
238
- "logits/rejected": -1.0542975664138794,
239
- "logps/chosen": -108.4779052734375,
240
- "logps/rejected": -127.95109558105469,
241
- "loss": 0.6493,
242
- "pred_label": 0.0,
243
- "rewards/accuracies": 0.36250001192092896,
244
- "rewards/chosen": -0.38532325625419617,
245
- "rewards/margins": 0.1649974286556244,
246
- "rewards/rejected": -0.5503206849098206,
247
  "step": 120,
248
- "use_label": 0.0
249
  },
250
  {
251
  "epoch": 0.27,
252
- "grad_norm": 1.9375,
253
  "learning_rate": 4.5626458262912745e-06,
254
- "logits/chosen": -0.818010687828064,
255
- "logits/rejected": -0.7847374081611633,
256
- "logps/chosen": -109.61775207519531,
257
- "logps/rejected": -133.42086791992188,
258
- "loss": 0.6524,
259
- "pred_label": 0.0,
260
- "rewards/accuracies": 0.33125001192092896,
261
- "rewards/chosen": -0.43839359283447266,
262
- "rewards/margins": 0.16735044121742249,
263
- "rewards/rejected": -0.6057440638542175,
264
  "step": 130,
265
- "use_label": 0.0
266
  },
267
  {
268
  "epoch": 0.29,
269
- "grad_norm": 1.71875,
270
  "learning_rate": 4.453763107901676e-06,
271
- "logits/chosen": -0.7395650148391724,
272
- "logits/rejected": -0.8444339036941528,
273
- "logps/chosen": -116.97528076171875,
274
- "logps/rejected": -130.2399139404297,
275
- "loss": 0.6381,
276
- "pred_label": 0.0,
277
- "rewards/accuracies": 0.33125001192092896,
278
- "rewards/chosen": -0.3622770607471466,
279
- "rewards/margins": 0.1490650475025177,
280
- "rewards/rejected": -0.5113420486450195,
281
  "step": 140,
282
- "use_label": 0.0
283
  },
284
  {
285
  "epoch": 0.31,
286
- "grad_norm": 2.125,
287
  "learning_rate": 4.33440758555951e-06,
288
- "logits/chosen": -0.6497868299484253,
289
- "logits/rejected": -0.6378159523010254,
290
- "logps/chosen": -89.60552978515625,
291
- "logps/rejected": -115.42192077636719,
292
- "loss": 0.6379,
293
- "pred_label": 0.0,
294
- "rewards/accuracies": 0.3187499940395355,
295
- "rewards/chosen": -0.2445882111787796,
296
- "rewards/margins": 0.23124215006828308,
297
- "rewards/rejected": -0.4758303761482239,
298
  "step": 150,
299
- "use_label": 0.0
300
  },
301
  {
302
  "epoch": 0.33,
303
- "grad_norm": 2.15625,
304
  "learning_rate": 4.205219043576955e-06,
305
- "logits/chosen": -0.3159053921699524,
306
- "logits/rejected": -0.33064812421798706,
307
- "logps/chosen": -99.68696594238281,
308
- "logps/rejected": -129.45729064941406,
309
- "loss": 0.6317,
310
- "pred_label": 0.0,
311
- "rewards/accuracies": 0.2874999940395355,
312
- "rewards/chosen": -0.35356926918029785,
313
- "rewards/margins": 0.16687795519828796,
314
- "rewards/rejected": -0.5204472541809082,
315
  "step": 160,
316
- "use_label": 0.0
317
  },
318
  {
319
  "epoch": 0.36,
320
- "grad_norm": 2.4375,
321
  "learning_rate": 4.066889974440757e-06,
322
- "logits/chosen": 0.14531800150871277,
323
- "logits/rejected": 0.18166163563728333,
324
- "logps/chosen": -95.45491027832031,
325
- "logps/rejected": -125.1463623046875,
326
- "loss": 0.6291,
327
- "pred_label": 0.0,
328
- "rewards/accuracies": 0.29374998807907104,
329
- "rewards/chosen": -0.39946848154067993,
330
- "rewards/margins": 0.20978550612926483,
331
- "rewards/rejected": -0.609254002571106,
332
  "step": 170,
333
- "use_label": 0.0
334
  },
335
  {
336
  "epoch": 0.38,
337
- "grad_norm": 2.453125,
338
  "learning_rate": 3.92016186682789e-06,
339
- "logits/chosen": -0.3282355070114136,
340
- "logits/rejected": -0.21966704726219177,
341
- "logps/chosen": -108.00712585449219,
342
- "logps/rejected": -128.67587280273438,
343
- "loss": 0.649,
344
- "pred_label": 0.0,
345
  "rewards/accuracies": 0.35624998807907104,
346
- "rewards/chosen": -0.4521949887275696,
347
- "rewards/margins": 0.27172034978866577,
348
- "rewards/rejected": -0.7239152789115906,
349
  "step": 180,
350
- "use_label": 0.0
351
  },
352
  {
353
  "epoch": 0.4,
354
- "grad_norm": 1.84375,
355
  "learning_rate": 3.7658212309857576e-06,
356
- "logits/chosen": -0.889633297920227,
357
- "logits/rejected": -0.6851574778556824,
358
- "logps/chosen": -91.25111389160156,
359
- "logps/rejected": -118.9649887084961,
360
- "loss": 0.6461,
361
- "pred_label": 0.0,
362
- "rewards/accuracies": 0.33125001192092896,
363
- "rewards/chosen": -0.32139474153518677,
364
- "rewards/margins": 0.22424864768981934,
365
- "rewards/rejected": -0.5456433892250061,
366
  "step": 190,
367
- "use_label": 0.0
368
  },
369
  {
370
  "epoch": 0.42,
371
- "grad_norm": 1.9453125,
372
  "learning_rate": 3.604695382782159e-06,
373
- "logits/chosen": -0.8204952478408813,
374
- "logits/rejected": -0.7186430096626282,
375
- "logps/chosen": -112.41142272949219,
376
- "logps/rejected": -120.7835693359375,
377
- "loss": 0.6376,
378
- "pred_label": 0.0,
379
- "rewards/accuracies": 0.3125,
380
- "rewards/chosen": -0.30735117197036743,
381
- "rewards/margins": 0.169038325548172,
382
- "rewards/rejected": -0.47638946771621704,
383
  "step": 200,
384
- "use_label": 0.0
385
  },
386
  {
387
  "epoch": 0.42,
388
- "eval_logits/chosen": -0.023804781958460808,
389
- "eval_logits/rejected": 0.04317883029580116,
390
- "eval_logps/chosen": -97.96138000488281,
391
- "eval_logps/rejected": -137.9141845703125,
392
- "eval_loss": 0.6288520693778992,
393
- "eval_pred_label": 0.0,
394
- "eval_rewards/accuracies": 0.3671875,
395
- "eval_rewards/chosen": -0.34049129486083984,
396
- "eval_rewards/margins": 0.26672109961509705,
397
- "eval_rewards/rejected": -0.6072123646736145,
398
- "eval_runtime": 125.433,
399
- "eval_samples_per_second": 15.945,
400
  "eval_steps_per_second": 0.255,
401
- "eval_use_label": 0.0,
402
  "step": 200
403
  },
404
  {
405
  "epoch": 0.44,
406
- "grad_norm": 2.265625,
407
  "learning_rate": 3.437648009023905e-06,
408
- "logits/chosen": -0.05805685371160507,
409
- "logits/rejected": -0.06056814268231392,
410
- "logps/chosen": -88.78871154785156,
411
- "logps/rejected": -124.3318862915039,
412
- "loss": 0.6218,
413
- "pred_label": 0.0,
414
- "rewards/accuracies": 0.375,
415
- "rewards/chosen": -0.3281395435333252,
416
- "rewards/margins": 0.28538644313812256,
417
- "rewards/rejected": -0.613525927066803,
418
  "step": 210,
419
- "use_label": 0.0
420
  },
421
  {
422
  "epoch": 0.46,
423
- "grad_norm": 2.21875,
424
  "learning_rate": 3.265574537815398e-06,
425
- "logits/chosen": -0.1400775909423828,
426
- "logits/rejected": -0.005620801355689764,
427
- "logps/chosen": -133.7158660888672,
428
- "logps/rejected": -136.84619140625,
429
- "loss": 0.627,
430
- "pred_label": 0.0,
431
- "rewards/accuracies": 0.30000001192092896,
432
- "rewards/chosen": -0.5408719778060913,
433
- "rewards/margins": 0.16390959918498993,
434
- "rewards/rejected": -0.7047815918922424,
435
  "step": 220,
436
- "use_label": 0.0
437
  },
438
  {
439
  "epoch": 0.48,
440
- "grad_norm": 1.8515625,
441
  "learning_rate": 3.089397338773569e-06,
442
- "logits/chosen": 0.16266627609729767,
443
- "logits/rejected": 0.2626825273036957,
444
- "logps/chosen": -93.3644027709961,
445
- "logps/rejected": -119.67996978759766,
446
- "loss": 0.6261,
447
- "pred_label": 0.0,
448
- "rewards/accuracies": 0.3187499940395355,
449
- "rewards/chosen": -0.28929832577705383,
450
- "rewards/margins": 0.27991363406181335,
451
- "rewards/rejected": -0.5692119598388672,
452
  "step": 230,
453
- "use_label": 0.0
454
  },
455
  {
456
  "epoch": 0.5,
457
- "grad_norm": 1.8984375,
458
  "learning_rate": 2.9100607788275547e-06,
459
- "logits/chosen": 0.854693591594696,
460
- "logits/rejected": 0.7261193990707397,
461
- "logps/chosen": -99.00528717041016,
462
- "logps/rejected": -135.73580932617188,
463
- "loss": 0.6295,
464
- "pred_label": 0.0,
465
  "rewards/accuracies": 0.3687500059604645,
466
- "rewards/chosen": -0.2997274696826935,
467
- "rewards/margins": 0.3153937757015228,
468
- "rewards/rejected": -0.6151211857795715,
469
  "step": 240,
470
- "use_label": 0.0
471
  },
472
  {
473
  "epoch": 0.52,
474
- "grad_norm": 2.03125,
475
  "learning_rate": 2.72852616010567e-06,
476
- "logits/chosen": 0.6816203594207764,
477
- "logits/rejected": 0.7033491134643555,
478
- "logps/chosen": -119.7255859375,
479
- "logps/rejected": -144.8857421875,
480
- "loss": 0.6376,
481
- "pred_label": 0.0,
482
- "rewards/accuracies": 0.3812499940395355,
483
- "rewards/chosen": -0.4632648825645447,
484
- "rewards/margins": 0.2932681143283844,
485
- "rewards/rejected": -0.7565330266952515,
486
  "step": 250,
487
- "use_label": 0.0
488
  },
489
  {
490
  "epoch": 0.54,
491
- "grad_norm": 1.8984375,
492
  "learning_rate": 2.5457665670441937e-06,
493
- "logits/chosen": 0.5938165187835693,
494
- "logits/rejected": 0.5592354536056519,
495
- "logps/chosen": -110.32804870605469,
496
- "logps/rejected": -146.76275634765625,
497
- "loss": 0.6162,
498
- "pred_label": 0.0,
499
- "rewards/accuracies": 0.34375,
500
- "rewards/chosen": -0.44222426414489746,
501
- "rewards/margins": 0.2809238135814667,
502
- "rewards/rejected": -0.7231480479240417,
503
  "step": 260,
504
- "use_label": 0.0
505
  },
506
  {
507
  "epoch": 0.57,
508
- "grad_norm": 2.90625,
509
  "learning_rate": 2.3627616503391813e-06,
510
- "logits/chosen": 0.6390979290008545,
511
- "logits/rejected": 0.5789315700531006,
512
- "logps/chosen": -123.83528137207031,
513
- "logps/rejected": -144.61489868164062,
514
- "loss": 0.6162,
515
- "pred_label": 0.0,
516
- "rewards/accuracies": 0.36250001192092896,
517
- "rewards/chosen": -0.5091949701309204,
518
- "rewards/margins": 0.24320097267627716,
519
- "rewards/rejected": -0.7523959279060364,
520
  "step": 270,
521
- "use_label": 0.0
522
  },
523
  {
524
  "epoch": 0.59,
525
- "grad_norm": 2.34375,
526
  "learning_rate": 2.1804923757009885e-06,
527
- "logits/chosen": 0.8771865963935852,
528
- "logits/rejected": 1.0158352851867676,
529
- "logps/chosen": -118.5296859741211,
530
- "logps/rejected": -138.31729125976562,
531
- "loss": 0.6357,
532
- "pred_label": 0.0,
533
- "rewards/accuracies": 0.30000001192092896,
534
- "rewards/chosen": -0.5302416086196899,
535
- "rewards/margins": 0.2237352430820465,
536
- "rewards/rejected": -0.7539768218994141,
537
  "step": 280,
538
- "use_label": 0.0
539
  },
540
  {
541
  "epoch": 0.61,
542
- "grad_norm": 2.59375,
543
  "learning_rate": 1.9999357655598894e-06,
544
- "logits/chosen": 0.44083184003829956,
545
- "logits/rejected": 0.41123947501182556,
546
- "logps/chosen": -112.27372741699219,
547
- "logps/rejected": -146.95498657226562,
548
- "loss": 0.6228,
549
- "pred_label": 0.0,
550
- "rewards/accuracies": 0.30000001192092896,
551
- "rewards/chosen": -0.4572528004646301,
552
- "rewards/margins": 0.24868395924568176,
553
- "rewards/rejected": -0.7059367299079895,
554
  "step": 290,
555
- "use_label": 0.0
556
  },
557
  {
558
  "epoch": 0.63,
559
- "grad_norm": 2.34375,
560
  "learning_rate": 1.8220596619089576e-06,
561
- "logits/chosen": 0.6273639798164368,
562
- "logits/rejected": 0.5140804052352905,
563
- "logps/chosen": -123.02046966552734,
564
- "logps/rejected": -168.80987548828125,
565
- "loss": 0.6196,
566
- "pred_label": 0.0,
567
- "rewards/accuracies": 0.40625,
568
- "rewards/chosen": -0.4542613625526428,
569
- "rewards/margins": 0.2926333546638489,
570
- "rewards/rejected": -0.7468947172164917,
571
  "step": 300,
572
- "use_label": 0.0
573
  },
574
  {
575
  "epoch": 0.63,
576
- "eval_logits/chosen": 1.0944873094558716,
577
- "eval_logits/rejected": 1.1831356287002563,
578
- "eval_logps/chosen": -102.62176513671875,
579
- "eval_logps/rejected": -150.12503051757812,
580
- "eval_loss": 0.618873655796051,
581
- "eval_pred_label": 0.0,
582
- "eval_rewards/accuracies": 0.375,
583
- "eval_rewards/chosen": -0.3870951533317566,
584
- "eval_rewards/margins": 0.34222573041915894,
585
- "eval_rewards/rejected": -0.7293209433555603,
586
- "eval_runtime": 125.4362,
587
- "eval_samples_per_second": 15.944,
588
  "eval_steps_per_second": 0.255,
589
- "eval_use_label": 0.0,
590
  "step": 300
591
  },
592
  {
593
  "epoch": 0.65,
594
- "grad_norm": 1.8515625,
595
  "learning_rate": 1.647817538357072e-06,
596
- "logits/chosen": 0.8131985664367676,
597
- "logits/rejected": 0.8752232789993286,
598
- "logps/chosen": -91.52378845214844,
599
- "logps/rejected": -139.95840454101562,
600
- "loss": 0.5999,
601
- "pred_label": 0.0,
602
- "rewards/accuracies": 0.3687500059604645,
603
- "rewards/chosen": -0.3592718541622162,
604
- "rewards/margins": 0.3578081727027893,
605
- "rewards/rejected": -0.7170799970626831,
606
  "step": 310,
607
- "use_label": 0.0
608
  },
609
  {
610
  "epoch": 0.67,
611
- "grad_norm": 2.40625,
612
  "learning_rate": 1.4781433892011132e-06,
613
- "logits/chosen": 0.9751952886581421,
614
- "logits/rejected": 1.1630818843841553,
615
- "logps/chosen": -135.82566833496094,
616
- "logps/rejected": -168.11805725097656,
617
- "loss": 0.6109,
618
- "pred_label": 0.0,
619
- "rewards/accuracies": 0.3687500059604645,
620
- "rewards/chosen": -0.6275521516799927,
621
- "rewards/margins": 0.3816707730293274,
622
- "rewards/rejected": -1.0092228651046753,
623
  "step": 320,
624
- "use_label": 0.0
625
  },
626
  {
627
  "epoch": 0.69,
628
- "grad_norm": 1.984375,
629
  "learning_rate": 1.3139467229135999e-06,
630
- "logits/chosen": 1.3293979167938232,
631
- "logits/rejected": 1.3260401487350464,
632
- "logps/chosen": -135.96664428710938,
633
- "logps/rejected": -166.52359008789062,
634
- "loss": 0.6295,
635
- "pred_label": 0.0,
636
  "rewards/accuracies": 0.33125001192092896,
637
- "rewards/chosen": -0.6585850715637207,
638
- "rewards/margins": 0.3205706775188446,
639
- "rewards/rejected": -0.9791557192802429,
640
  "step": 330,
641
- "use_label": 0.0
642
  },
643
  {
644
  "epoch": 0.71,
645
- "grad_norm": 2.09375,
646
  "learning_rate": 1.1561076868822756e-06,
647
- "logits/chosen": 0.7383319139480591,
648
- "logits/rejected": 0.6407849192619324,
649
- "logps/chosen": -150.60504150390625,
650
- "logps/rejected": -166.74940490722656,
651
- "loss": 0.6247,
652
- "pred_label": 0.0,
653
- "rewards/accuracies": 0.3499999940395355,
654
- "rewards/chosen": -0.658658504486084,
655
- "rewards/margins": 0.24373307824134827,
656
- "rewards/rejected": -0.9023915529251099,
657
  "step": 340,
658
- "use_label": 0.0
659
  },
660
  {
661
  "epoch": 0.73,
662
- "grad_norm": 2.21875,
663
  "learning_rate": 1.0054723495346484e-06,
664
- "logits/chosen": 0.6359546184539795,
665
- "logits/rejected": 0.7167641520500183,
666
- "logps/chosen": -163.8385772705078,
667
- "logps/rejected": -195.6297607421875,
668
- "loss": 0.6138,
669
- "pred_label": 0.0,
670
  "rewards/accuracies": 0.36250001192092896,
671
- "rewards/chosen": -0.7442194819450378,
672
- "rewards/margins": 0.3593973219394684,
673
- "rewards/rejected": -1.103616714477539,
674
  "step": 350,
675
- "use_label": 0.0
676
  },
677
  {
678
  "epoch": 0.75,
679
- "grad_norm": 1.859375,
680
  "learning_rate": 8.628481651367876e-07,
681
- "logits/chosen": 0.7298086881637573,
682
- "logits/rejected": 0.8517257571220398,
683
- "logps/chosen": -119.41548156738281,
684
- "logps/rejected": -165.3460235595703,
685
- "loss": 0.6137,
686
- "pred_label": 0.0,
687
- "rewards/accuracies": 0.3812499940395355,
688
- "rewards/chosen": -0.5577787160873413,
689
- "rewards/margins": 0.37339919805526733,
690
- "rewards/rejected": -0.9311779141426086,
691
  "step": 360,
692
- "use_label": 0.0
693
  },
694
  {
695
  "epoch": 0.77,
696
- "grad_norm": 2.421875,
697
  "learning_rate": 7.289996455765749e-07,
698
- "logits/chosen": 0.8383787274360657,
699
- "logits/rejected": 0.9305205345153809,
700
- "logps/chosen": -111.84449768066406,
701
- "logps/rejected": -153.93136596679688,
702
- "loss": 0.6125,
703
- "pred_label": 0.0,
704
- "rewards/accuracies": 0.3499999940395355,
705
- "rewards/chosen": -0.46409696340560913,
706
- "rewards/margins": 0.39606258273124695,
707
- "rewards/rejected": -0.8601595759391785,
708
  "step": 370,
709
- "use_label": 0.0
710
  },
711
  {
712
  "epoch": 0.8,
713
- "grad_norm": 1.8984375,
714
  "learning_rate": 6.046442623320145e-07,
715
- "logits/chosen": 0.5329448580741882,
716
- "logits/rejected": 0.513522744178772,
717
- "logps/chosen": -116.62841796875,
718
- "logps/rejected": -165.17893981933594,
719
- "loss": 0.6191,
720
- "pred_label": 0.0,
721
  "rewards/accuracies": 0.3187499940395355,
722
- "rewards/chosen": -0.5079302787780762,
723
- "rewards/margins": 0.2802185118198395,
724
- "rewards/rejected": -0.7881487607955933,
725
  "step": 380,
726
- "use_label": 0.0
727
  },
728
  {
729
  "epoch": 0.82,
730
- "grad_norm": 2.4375,
731
  "learning_rate": 4.904486005914027e-07,
732
- "logits/chosen": 0.8266662359237671,
733
- "logits/rejected": 0.5234752893447876,
734
- "logps/chosen": -159.83407592773438,
735
- "logps/rejected": -186.96768188476562,
736
- "loss": 0.6085,
737
- "pred_label": 0.0,
738
- "rewards/accuracies": 0.38749998807907104,
739
- "rewards/chosen": -0.6701575517654419,
740
- "rewards/margins": 0.36982032656669617,
741
- "rewards/rejected": -1.039977788925171,
742
  "step": 390,
743
- "use_label": 0.0
744
  },
745
  {
746
  "epoch": 0.84,
747
- "grad_norm": 2.46875,
748
  "learning_rate": 3.8702478614051353e-07,
749
- "logits/chosen": 0.511390745639801,
750
- "logits/rejected": 0.6720080971717834,
751
- "logps/chosen": -116.7987060546875,
752
- "logps/rejected": -141.3931884765625,
753
- "loss": 0.6139,
754
- "pred_label": 0.0,
755
- "rewards/accuracies": 0.3812499940395355,
756
- "rewards/chosen": -0.4430771768093109,
757
- "rewards/margins": 0.3362268805503845,
758
- "rewards/rejected": -0.779304027557373,
759
  "step": 400,
760
- "use_label": 0.0
761
  },
762
  {
763
  "epoch": 0.84,
764
- "eval_logits/chosen": 1.4532994031906128,
765
- "eval_logits/rejected": 1.5453113317489624,
766
- "eval_logps/chosen": -112.56050109863281,
767
- "eval_logps/rejected": -162.19764709472656,
768
- "eval_loss": 0.6157013177871704,
769
- "eval_pred_label": 0.0,
770
  "eval_rewards/accuracies": 0.37109375,
771
- "eval_rewards/chosen": -0.4864824414253235,
772
- "eval_rewards/margins": 0.36356455087661743,
773
- "eval_rewards/rejected": -0.8500469923019409,
774
- "eval_runtime": 125.4203,
775
- "eval_samples_per_second": 15.946,
776
- "eval_steps_per_second": 0.255,
777
- "eval_use_label": 0.0,
778
  "step": 400
779
  },
780
  {
781
  "epoch": 0.86,
782
- "grad_norm": 2.203125,
783
  "learning_rate": 2.9492720416985004e-07,
784
- "logits/chosen": 0.8359997868537903,
785
- "logits/rejected": 0.8144146800041199,
786
- "logps/chosen": -110.30177307128906,
787
- "logps/rejected": -143.6800079345703,
788
- "loss": 0.6222,
789
- "pred_label": 0.0,
790
- "rewards/accuracies": 0.39375001192092896,
791
- "rewards/chosen": -0.4887877404689789,
792
- "rewards/margins": 0.3508199453353882,
793
- "rewards/rejected": -0.8396075963973999,
794
  "step": 410,
795
- "use_label": 0.0
796
  },
797
  {
798
  "epoch": 0.88,
799
- "grad_norm": 1.984375,
800
  "learning_rate": 2.1464952759020857e-07,
801
- "logits/chosen": 1.027252435684204,
802
- "logits/rejected": 0.9827619791030884,
803
- "logps/chosen": -106.49784851074219,
804
- "logps/rejected": -116.97566223144531,
805
- "loss": 0.6216,
806
- "pred_label": 0.0,
807
- "rewards/accuracies": 0.2750000059604645,
808
- "rewards/chosen": -0.4555872976779938,
809
- "rewards/margins": 0.20033884048461914,
810
- "rewards/rejected": -0.6559261083602905,
811
  "step": 420,
812
- "use_label": 0.0
813
  },
814
  {
815
  "epoch": 0.9,
816
- "grad_norm": 1.96875,
817
  "learning_rate": 1.4662207078575685e-07,
818
- "logits/chosen": 0.9206047058105469,
819
- "logits/rejected": 0.8673297166824341,
820
- "logps/chosen": -151.376220703125,
821
- "logps/rejected": -178.04725646972656,
822
- "loss": 0.5986,
823
- "pred_label": 0.0,
824
- "rewards/accuracies": 0.45625001192092896,
825
- "rewards/chosen": -0.5210937261581421,
826
- "rewards/margins": 0.46580758690834045,
827
- "rewards/rejected": -0.9869012832641602,
828
  "step": 430,
829
- "use_label": 0.0
830
  },
831
  {
832
  "epoch": 0.92,
833
- "grad_norm": 2.125,
834
  "learning_rate": 9.120948298936422e-08,
835
- "logits/chosen": 0.9004503488540649,
836
- "logits/rejected": 1.0573413372039795,
837
- "logps/chosen": -119.21500396728516,
838
- "logps/rejected": -165.19241333007812,
839
- "loss": 0.6064,
840
- "pred_label": 0.0,
841
- "rewards/accuracies": 0.35624998807907104,
842
- "rewards/chosen": -0.5231102705001831,
843
- "rewards/margins": 0.37818416953086853,
844
- "rewards/rejected": -0.9012944102287292,
845
  "step": 440,
846
- "use_label": 0.0
847
  },
848
  {
849
  "epoch": 0.94,
850
- "grad_norm": 2.46875,
851
  "learning_rate": 4.870879364444109e-08,
852
- "logits/chosen": 1.300728440284729,
853
- "logits/rejected": 1.0580918788909912,
854
- "logps/chosen": -129.29281616210938,
855
- "logps/rejected": -178.3690948486328,
856
- "loss": 0.6111,
857
- "pred_label": 0.0,
858
- "rewards/accuracies": 0.3499999940395355,
859
- "rewards/chosen": -0.570349931716919,
860
- "rewards/margins": 0.3304445147514343,
861
- "rewards/rejected": -0.9007943868637085,
862
  "step": 450,
863
- "use_label": 0.0
864
  },
865
  {
866
  "epoch": 0.96,
867
- "grad_norm": 1.8359375,
868
  "learning_rate": 1.93478202307823e-08,
869
- "logits/chosen": 1.1906068325042725,
870
- "logits/rejected": 1.2149587869644165,
871
- "logps/chosen": -83.74864196777344,
872
- "logps/rejected": -130.91348266601562,
873
- "loss": 0.6154,
874
- "pred_label": 0.0,
875
- "rewards/accuracies": 0.3375000059604645,
876
- "rewards/chosen": -0.3762877583503723,
877
- "rewards/margins": 0.2993956208229065,
878
- "rewards/rejected": -0.6756833791732788,
879
  "step": 460,
880
- "use_label": 0.0
881
  },
882
  {
883
  "epoch": 0.98,
884
- "grad_norm": 2.375,
885
  "learning_rate": 3.283947088983663e-09,
886
- "logits/chosen": 1.1844379901885986,
887
- "logits/rejected": 0.9474547505378723,
888
- "logps/chosen": -113.1079330444336,
889
- "logps/rejected": -141.49147033691406,
890
- "loss": 0.6213,
891
- "pred_label": 0.0,
892
- "rewards/accuracies": 0.3125,
893
- "rewards/chosen": -0.4577876627445221,
894
- "rewards/margins": 0.26655709743499756,
895
- "rewards/rejected": -0.7243447303771973,
896
  "step": 470,
897
- "use_label": 0.0
898
  },
899
  {
900
  "epoch": 1.0,
901
  "step": 477,
902
  "total_flos": 0.0,
903
- "train_loss": 0.6357159084743924,
904
- "train_runtime": 9601.7268,
905
- "train_samples_per_second": 6.367,
906
- "train_steps_per_second": 0.05
907
  }
908
  ],
909
  "logging_steps": 10,
 
23
  "rewards/margins": 0.0,
24
  "rewards/rejected": 0.0,
25
  "step": 1,
26
+ "use_label": 10.0
27
  },
28
  {
29
  "epoch": 0.02,
30
  "grad_norm": 0.4609375,
31
  "learning_rate": 1.0416666666666667e-06,
32
+ "logits/chosen": -2.2421748638153076,
33
+ "logits/rejected": -2.2769579887390137,
34
+ "logps/chosen": -51.987098693847656,
35
+ "logps/rejected": -64.96717071533203,
36
  "loss": 0.6929,
37
  "pred_label": 0.0,
38
+ "rewards/accuracies": 0.25,
39
+ "rewards/chosen": 0.0019227324519306421,
40
+ "rewards/margins": 0.0004911368596367538,
41
+ "rewards/rejected": 0.0014315954176709056,
42
  "step": 10,
43
+ "use_label": 90.0
44
  },
45
  {
46
  "epoch": 0.04,
47
  "grad_norm": 0.39453125,
48
  "learning_rate": 2.0833333333333334e-06,
49
+ "logits/chosen": -2.2521612644195557,
50
+ "logits/rejected": -2.255767822265625,
51
+ "logps/chosen": -62.4937629699707,
52
+ "logps/rejected": -72.63874816894531,
53
  "loss": 0.6919,
54
  "pred_label": 0.0,
55
  "rewards/accuracies": 0.2874999940395355,
56
+ "rewards/chosen": 0.01600126549601555,
57
+ "rewards/margins": 0.0011427802965044975,
58
+ "rewards/rejected": 0.0148584870621562,
59
  "step": 20,
60
+ "use_label": 242.0
61
  },
62
  {
63
  "epoch": 0.06,
64
+ "grad_norm": 0.51171875,
65
  "learning_rate": 3.125e-06,
66
+ "logits/chosen": -2.3423426151275635,
67
+ "logits/rejected": -2.3549609184265137,
68
+ "logps/chosen": -79.10475158691406,
69
+ "logps/rejected": -98.8157958984375,
70
+ "loss": 0.6897,
71
+ "pred_label": 0.0,
72
+ "rewards/accuracies": 0.28125,
73
+ "rewards/chosen": 0.03137165680527687,
74
+ "rewards/margins": 0.0032712810207158327,
75
+ "rewards/rejected": 0.028100375086069107,
76
  "step": 30,
77
+ "use_label": 402.0
78
  },
79
  {
80
  "epoch": 0.08,
81
+ "grad_norm": 0.51953125,
82
  "learning_rate": 4.166666666666667e-06,
83
+ "logits/chosen": -2.323338270187378,
84
+ "logits/rejected": -2.3015079498291016,
85
+ "logps/chosen": -82.85453796386719,
86
+ "logps/rejected": -82.39984893798828,
87
  "loss": 0.6866,
88
  "pred_label": 0.0,
89
  "rewards/accuracies": 0.2874999940395355,
90
+ "rewards/chosen": 0.03337595611810684,
91
+ "rewards/margins": 0.011919925920665264,
92
+ "rewards/rejected": 0.021456023678183556,
93
  "step": 40,
94
+ "use_label": 562.0
95
  },
96
  {
97
  "epoch": 0.1,
98
+ "grad_norm": 0.671875,
99
  "learning_rate": 4.999731868769027e-06,
100
+ "logits/chosen": -2.2404515743255615,
101
+ "logits/rejected": -2.262972354888916,
102
+ "logps/chosen": -67.89888000488281,
103
+ "logps/rejected": -81.8695068359375,
104
  "loss": 0.6805,
105
  "pred_label": 0.0,
106
  "rewards/accuracies": 0.32499998807907104,
107
+ "rewards/chosen": 0.009319942444562912,
108
+ "rewards/margins": 0.030618786811828613,
109
+ "rewards/rejected": -0.0212988443672657,
110
  "step": 50,
111
+ "use_label": 722.0
112
  },
113
  {
114
  "epoch": 0.13,
115
+ "grad_norm": 1.0234375,
116
  "learning_rate": 4.9903533134293035e-06,
117
+ "logits/chosen": -2.2157275676727295,
118
+ "logits/rejected": -2.155928134918213,
119
+ "logps/chosen": -63.64031982421875,
120
+ "logps/rejected": -73.28236389160156,
121
+ "loss": 0.6752,
122
  "pred_label": 0.0,
123
  "rewards/accuracies": 0.3062500059604645,
124
+ "rewards/chosen": -0.03914070501923561,
125
+ "rewards/margins": 0.04399287328124046,
126
+ "rewards/rejected": -0.08313358575105667,
127
  "step": 60,
128
+ "use_label": 882.0
129
  },
130
  {
131
  "epoch": 0.15,
132
+ "grad_norm": 0.859375,
133
  "learning_rate": 4.967625656594782e-06,
134
+ "logits/chosen": -2.114478588104248,
135
+ "logits/rejected": -2.1126065254211426,
136
+ "logps/chosen": -70.76527404785156,
137
+ "logps/rejected": -83.94652557373047,
138
+ "loss": 0.6712,
139
  "pred_label": 0.0,
140
  "rewards/accuracies": 0.25,
141
+ "rewards/chosen": -0.15054164826869965,
142
+ "rewards/margins": 0.030909737572073936,
143
+ "rewards/rejected": -0.18145139515399933,
144
  "step": 70,
145
+ "use_label": 1042.0
146
  },
147
  {
148
  "epoch": 0.17,
149
+ "grad_norm": 1.1640625,
150
  "learning_rate": 4.93167072587771e-06,
151
+ "logits/chosen": -2.2166943550109863,
152
+ "logits/rejected": -2.1609182357788086,
153
+ "logps/chosen": -54.8065185546875,
154
+ "logps/rejected": -69.45613861083984,
155
+ "loss": 0.6589,
156
+ "pred_label": 0.4749999940395355,
157
+ "rewards/accuracies": 0.26249998807907104,
158
+ "rewards/chosen": -0.06275613605976105,
159
+ "rewards/margins": 0.10003063827753067,
160
+ "rewards/rejected": -0.16278676688671112,
161
  "step": 80,
162
+ "use_label": 1201.5250244140625
163
  },
164
  {
165
  "epoch": 0.19,
166
+ "grad_norm": 1.8125,
167
  "learning_rate": 4.882681251368549e-06,
168
+ "logits/chosen": -1.9692049026489258,
169
+ "logits/rejected": -1.9792039394378662,
170
+ "logps/chosen": -76.60871887207031,
171
+ "logps/rejected": -96.53330993652344,
172
+ "loss": 0.6564,
173
+ "pred_label": 2.0999999046325684,
174
+ "rewards/accuracies": 0.29374998807907104,
175
+ "rewards/chosen": -0.18226662278175354,
176
+ "rewards/margins": 0.09542477130889893,
177
+ "rewards/rejected": -0.27769142389297485,
178
  "step": 90,
179
+ "use_label": 1359.9000244140625
180
  },
181
  {
182
  "epoch": 0.21,
183
+ "grad_norm": 2.171875,
184
  "learning_rate": 4.8209198325401815e-06,
185
+ "logits/chosen": -1.9027693271636963,
186
+ "logits/rejected": -1.8775581121444702,
187
+ "logps/chosen": -92.94733428955078,
188
+ "logps/rejected": -84.73824310302734,
189
+ "loss": 0.6531,
190
+ "pred_label": 4.0,
191
+ "rewards/accuracies": 0.32499998807907104,
192
+ "rewards/chosen": -0.12917451560497284,
193
+ "rewards/margins": 0.07954015582799911,
194
+ "rewards/rejected": -0.20871467888355255,
195
  "step": 100,
196
+ "use_label": 1518.0
197
  },
198
  {
199
  "epoch": 0.21,
200
+ "eval_logits/chosen": -1.7353737354278564,
201
+ "eval_logits/rejected": -1.7198325395584106,
202
+ "eval_logps/chosen": -80.33845520019531,
203
+ "eval_logps/rejected": -106.64702606201172,
204
+ "eval_loss": 0.6527961492538452,
205
+ "eval_pred_label": 6.6875,
206
+ "eval_rewards/accuracies": 0.36328125,
207
+ "eval_rewards/chosen": -0.1642620712518692,
208
+ "eval_rewards/margins": 0.13027876615524292,
209
+ "eval_rewards/rejected": -0.2945408225059509,
210
+ "eval_runtime": 125.2319,
211
+ "eval_samples_per_second": 15.97,
212
+ "eval_steps_per_second": 0.256,
213
+ "eval_use_label": 1725.3125,
214
  "step": 100
215
  },
216
  {
217
  "epoch": 0.23,
218
+ "grad_norm": 2.0,
219
  "learning_rate": 4.746717530629565e-06,
220
+ "logits/chosen": -1.7974278926849365,
221
+ "logits/rejected": -1.7697474956512451,
222
+ "logps/chosen": -89.79286193847656,
223
+ "logps/rejected": -113.6241455078125,
224
+ "loss": 0.6479,
225
+ "pred_label": 9.199999809265137,
226
+ "rewards/accuracies": 0.36250001192092896,
227
+ "rewards/chosen": -0.18692079186439514,
228
+ "rewards/margins": 0.16341358423233032,
229
+ "rewards/rejected": -0.3503343462944031,
230
  "step": 110,
231
+ "use_label": 1928.800048828125
232
  },
233
  {
234
  "epoch": 0.25,
235
+ "grad_norm": 2.890625,
236
  "learning_rate": 4.660472094042121e-06,
237
+ "logits/chosen": -1.454304814338684,
238
+ "logits/rejected": -1.3457725048065186,
239
+ "logps/chosen": -109.3675537109375,
240
+ "logps/rejected": -133.90725708007812,
241
+ "loss": 0.6432,
242
+ "pred_label": 14.949999809265137,
243
+ "rewards/accuracies": 0.34375,
244
+ "rewards/chosen": -0.3942197263240814,
245
+ "rewards/margins": 0.21566259860992432,
246
+ "rewards/rejected": -0.6098822951316833,
247
  "step": 120,
248
+ "use_label": 2083.050048828125
249
  },
250
  {
251
  "epoch": 0.27,
252
+ "grad_norm": 2.5625,
253
  "learning_rate": 4.5626458262912745e-06,
254
+ "logits/chosen": -1.0859026908874512,
255
+ "logits/rejected": -1.0426993370056152,
256
+ "logps/chosen": -112.0394515991211,
257
+ "logps/rejected": -139.61097717285156,
258
+ "loss": 0.6391,
259
+ "pred_label": 21.049999237060547,
260
+ "rewards/accuracies": 0.36250001192092896,
261
+ "rewards/chosen": -0.4626106321811676,
262
+ "rewards/margins": 0.20503444969654083,
263
+ "rewards/rejected": -0.6676451563835144,
264
  "step": 130,
265
+ "use_label": 2236.949951171875
266
  },
267
  {
268
  "epoch": 0.29,
269
+ "grad_norm": 2.484375,
270
  "learning_rate": 4.453763107901676e-06,
271
+ "logits/chosen": -0.735418975353241,
272
+ "logits/rejected": -0.8380192518234253,
273
+ "logps/chosen": -138.07081604003906,
274
+ "logps/rejected": -150.91665649414062,
275
+ "loss": 0.6252,
276
+ "pred_label": 31.399999618530273,
277
+ "rewards/accuracies": 0.30000001192092896,
278
+ "rewards/chosen": -0.5732325315475464,
279
+ "rewards/margins": 0.1448771208524704,
280
+ "rewards/rejected": -0.7181096076965332,
281
  "step": 140,
282
+ "use_label": 2386.60009765625
283
  },
284
  {
285
  "epoch": 0.31,
286
+ "grad_norm": 3.859375,
287
  "learning_rate": 4.33440758555951e-06,
288
+ "logits/chosen": -0.48231878876686096,
289
+ "logits/rejected": -0.43882569670677185,
290
+ "logps/chosen": -117.69664001464844,
291
+ "logps/rejected": -150.86083984375,
292
+ "loss": 0.6219,
293
+ "pred_label": 43.45000076293945,
294
+ "rewards/accuracies": 0.32499998807907104,
295
+ "rewards/chosen": -0.5254992246627808,
296
+ "rewards/margins": 0.3047201633453369,
297
+ "rewards/rejected": -0.8302194476127625,
298
  "step": 150,
299
+ "use_label": 2534.550048828125
300
  },
301
  {
302
  "epoch": 0.33,
303
+ "grad_norm": 2.890625,
304
  "learning_rate": 4.205219043576955e-06,
305
+ "logits/chosen": -0.15186011791229248,
306
+ "logits/rejected": -0.17336201667785645,
307
+ "logps/chosen": -128.78500366210938,
308
+ "logps/rejected": -159.26498413085938,
309
+ "loss": 0.5982,
310
+ "pred_label": 58.25,
311
+ "rewards/accuracies": 0.2750000059604645,
312
+ "rewards/chosen": -0.6445494294166565,
313
+ "rewards/margins": 0.17397476732730865,
314
+ "rewards/rejected": -0.818524181842804,
315
  "step": 160,
316
+ "use_label": 2679.75
317
  },
318
  {
319
  "epoch": 0.36,
320
+ "grad_norm": 3.328125,
321
  "learning_rate": 4.066889974440757e-06,
322
+ "logits/chosen": 0.14322622120380402,
323
+ "logits/rejected": 0.18100713193416595,
324
+ "logps/chosen": -108.39127349853516,
325
+ "logps/rejected": -140.55824279785156,
326
+ "loss": 0.5938,
327
+ "pred_label": 79.57499694824219,
328
+ "rewards/accuracies": 0.3062500059604645,
329
+ "rewards/chosen": -0.5288320779800415,
330
+ "rewards/margins": 0.23454061150550842,
331
+ "rewards/rejected": -0.7633727192878723,
332
  "step": 170,
333
+ "use_label": 2818.425048828125
334
  },
335
  {
336
  "epoch": 0.38,
337
+ "grad_norm": 3.0,
338
  "learning_rate": 3.92016186682789e-06,
339
+ "logits/chosen": -0.20601686835289001,
340
+ "logits/rejected": -0.09364790469408035,
341
+ "logps/chosen": -105.94217681884766,
342
+ "logps/rejected": -130.695556640625,
343
+ "loss": 0.6262,
344
+ "pred_label": 100.2750015258789,
345
  "rewards/accuracies": 0.35624998807907104,
346
+ "rewards/chosen": -0.4315454065799713,
347
+ "rewards/margins": 0.3125666677951813,
348
+ "rewards/rejected": -0.7441121339797974,
349
  "step": 180,
350
+ "use_label": 2957.72509765625
351
  },
352
  {
353
  "epoch": 0.4,
354
+ "grad_norm": 2.734375,
355
  "learning_rate": 3.7658212309857576e-06,
356
+ "logits/chosen": -0.34412023425102234,
357
+ "logits/rejected": -0.07299783080816269,
358
+ "logps/chosen": -107.5626449584961,
359
+ "logps/rejected": -141.1322479248047,
360
+ "loss": 0.6092,
361
+ "pred_label": 121.05000305175781,
362
+ "rewards/accuracies": 0.34375,
363
+ "rewards/chosen": -0.48451024293899536,
364
+ "rewards/margins": 0.28280580043792725,
365
+ "rewards/rejected": -0.7673160433769226,
366
  "step": 190,
367
+ "use_label": 3096.949951171875
368
  },
369
  {
370
  "epoch": 0.42,
371
+ "grad_norm": 6.5,
372
  "learning_rate": 3.604695382782159e-06,
373
+ "logits/chosen": 0.03128425031900406,
374
+ "logits/rejected": 0.20205454528331757,
375
+ "logps/chosen": -145.35342407226562,
376
+ "logps/rejected": -162.05667114257812,
377
+ "loss": 0.6041,
378
+ "pred_label": 135.89999389648438,
379
+ "rewards/accuracies": 0.30000001192092896,
380
+ "rewards/chosen": -0.6367710828781128,
381
+ "rewards/margins": 0.25234952569007874,
382
+ "rewards/rejected": -0.8891205787658691,
383
  "step": 200,
384
+ "use_label": 3242.10009765625
385
  },
386
  {
387
  "epoch": 0.42,
388
+ "eval_logits/chosen": 0.886444091796875,
389
+ "eval_logits/rejected": 0.9784458875656128,
390
+ "eval_logps/chosen": -135.34742736816406,
391
+ "eval_logps/rejected": -187.65963745117188,
392
+ "eval_loss": 0.5936154723167419,
393
+ "eval_pred_label": 167.40625,
394
+ "eval_rewards/accuracies": 0.3515625,
395
+ "eval_rewards/chosen": -0.7143516540527344,
396
+ "eval_rewards/margins": 0.3903152644634247,
397
+ "eval_rewards/rejected": -1.1046667098999023,
398
+ "eval_runtime": 125.3006,
399
+ "eval_samples_per_second": 15.962,
400
  "eval_steps_per_second": 0.255,
401
+ "eval_use_label": 3420.59375,
402
  "step": 200
403
  },
404
  {
405
  "epoch": 0.44,
406
+ "grad_norm": 3.796875,
407
  "learning_rate": 3.437648009023905e-06,
408
+ "logits/chosen": 0.6729141473770142,
409
+ "logits/rejected": 0.6579598188400269,
410
+ "logps/chosen": -119.19351959228516,
411
+ "logps/rejected": -159.00997924804688,
412
+ "loss": 0.5936,
413
+ "pred_label": 201.6999969482422,
414
+ "rewards/accuracies": 0.3499999940395355,
415
+ "rewards/chosen": -0.63218754529953,
416
+ "rewards/margins": 0.3281194567680359,
417
+ "rewards/rejected": -0.9603070020675659,
418
  "step": 210,
419
+ "use_label": 3592.300048828125
420
  },
421
  {
422
  "epoch": 0.46,
423
+ "grad_norm": 4.5,
424
  "learning_rate": 3.265574537815398e-06,
425
+ "logits/chosen": 0.2854166626930237,
426
+ "logits/rejected": 0.4488348066806793,
427
+ "logps/chosen": -148.92379760742188,
428
+ "logps/rejected": -161.19557189941406,
429
+ "loss": 0.5938,
430
+ "pred_label": 225.52499389648438,
431
+ "rewards/accuracies": 0.3125,
432
+ "rewards/chosen": -0.6929510235786438,
433
+ "rewards/margins": 0.2553243637084961,
434
+ "rewards/rejected": -0.9482753872871399,
435
  "step": 220,
436
+ "use_label": 3728.47509765625
437
  },
438
  {
439
  "epoch": 0.48,
440
+ "grad_norm": 2.28125,
441
  "learning_rate": 3.089397338773569e-06,
442
+ "logits/chosen": 0.00020002425299026072,
443
+ "logits/rejected": 0.1493436098098755,
444
+ "logps/chosen": -103.05213928222656,
445
+ "logps/rejected": -136.05099487304688,
446
+ "loss": 0.597,
447
+ "pred_label": 252.02499389648438,
448
+ "rewards/accuracies": 0.30000001192092896,
449
+ "rewards/chosen": -0.3861756920814514,
450
+ "rewards/margins": 0.3467464745044708,
451
+ "rewards/rejected": -0.7329221963882446,
452
  "step": 230,
453
+ "use_label": 3861.97509765625
454
  },
455
  {
456
  "epoch": 0.5,
457
+ "grad_norm": 3.125,
458
  "learning_rate": 2.9100607788275547e-06,
459
+ "logits/chosen": 0.49308425188064575,
460
+ "logits/rejected": 0.44487372040748596,
461
+ "logps/chosen": -109.46275329589844,
462
+ "logps/rejected": -153.8666534423828,
463
+ "loss": 0.584,
464
+ "pred_label": 275.375,
465
  "rewards/accuracies": 0.3687500059604645,
466
+ "rewards/chosen": -0.40430212020874023,
467
+ "rewards/margins": 0.3921273946762085,
468
+ "rewards/rejected": -0.7964295148849487,
469
  "step": 240,
470
+ "use_label": 3998.625
471
  },
472
  {
473
  "epoch": 0.52,
474
+ "grad_norm": 2.0625,
475
  "learning_rate": 2.72852616010567e-06,
476
+ "logits/chosen": 0.3891890347003937,
477
+ "logits/rejected": 0.47166162729263306,
478
+ "logps/chosen": -122.5915298461914,
479
+ "logps/rejected": -153.12493896484375,
480
+ "loss": 0.5998,
481
+ "pred_label": 301.875,
482
+ "rewards/accuracies": 0.36250001192092896,
483
+ "rewards/chosen": -0.4919242262840271,
484
+ "rewards/margins": 0.3470008671283722,
485
+ "rewards/rejected": -0.8389250636100769,
486
  "step": 250,
487
+ "use_label": 4132.125
488
  },
489
  {
490
  "epoch": 0.54,
491
+ "grad_norm": 2.171875,
492
  "learning_rate": 2.5457665670441937e-06,
493
+ "logits/chosen": 0.4214790463447571,
494
+ "logits/rejected": 0.4202333092689514,
495
+ "logps/chosen": -116.09378814697266,
496
+ "logps/rejected": -156.8458251953125,
497
+ "loss": 0.592,
498
+ "pred_label": 326.3500061035156,
499
+ "rewards/accuracies": 0.32499998807907104,
500
+ "rewards/chosen": -0.4998815953731537,
501
+ "rewards/margins": 0.324097216129303,
502
+ "rewards/rejected": -0.8239787817001343,
503
  "step": 260,
504
+ "use_label": 4267.64990234375
505
  },
506
  {
507
  "epoch": 0.57,
508
+ "grad_norm": 3.40625,
509
  "learning_rate": 2.3627616503391813e-06,
510
+ "logits/chosen": 0.9609361886978149,
511
+ "logits/rejected": 0.8760908246040344,
512
+ "logps/chosen": -142.81573486328125,
513
+ "logps/rejected": -170.10379028320312,
514
+ "loss": 0.5888,
515
+ "pred_label": 343.04998779296875,
516
+ "rewards/accuracies": 0.3375000059604645,
517
+ "rewards/chosen": -0.698999285697937,
518
+ "rewards/margins": 0.30828553438186646,
519
+ "rewards/rejected": -1.0072848796844482,
520
  "step": 270,
521
+ "use_label": 4410.9501953125
522
  },
523
  {
524
  "epoch": 0.59,
525
+ "grad_norm": 2.234375,
526
  "learning_rate": 2.1804923757009885e-06,
527
+ "logits/chosen": 1.1657536029815674,
528
+ "logits/rejected": 1.3259608745574951,
529
+ "logps/chosen": -131.4703826904297,
530
+ "logps/rejected": -156.4979248046875,
531
+ "loss": 0.6007,
532
+ "pred_label": 361.5,
533
+ "rewards/accuracies": 0.3375000059604645,
534
+ "rewards/chosen": -0.6596485376358032,
535
+ "rewards/margins": 0.27613458037376404,
536
+ "rewards/rejected": -0.9357832074165344,
537
  "step": 280,
538
+ "use_label": 4552.5
539
  },
540
  {
541
  "epoch": 0.61,
542
+ "grad_norm": 2.875,
543
  "learning_rate": 1.9999357655598894e-06,
544
+ "logits/chosen": 0.9594011306762695,
545
+ "logits/rejected": 0.9126796722412109,
546
+ "logps/chosen": -144.55104064941406,
547
+ "logps/rejected": -183.51065063476562,
548
+ "loss": 0.5899,
549
+ "pred_label": 386.07501220703125,
550
+ "rewards/accuracies": 0.3187499940395355,
551
+ "rewards/chosen": -0.7800258994102478,
552
+ "rewards/margins": 0.2914672791957855,
553
+ "rewards/rejected": -1.0714932680130005,
554
  "step": 290,
555
+ "use_label": 4687.9248046875
556
  },
557
  {
558
  "epoch": 0.63,
559
+ "grad_norm": 3.1875,
560
  "learning_rate": 1.8220596619089576e-06,
561
+ "logits/chosen": 1.2753574848175049,
562
+ "logits/rejected": 1.1057071685791016,
563
+ "logps/chosen": -165.4674072265625,
564
+ "logps/rejected": -223.6466064453125,
565
+ "loss": 0.5763,
566
+ "pred_label": 409.9750061035156,
567
+ "rewards/accuracies": 0.4124999940395355,
568
+ "rewards/chosen": -0.8787307739257812,
569
+ "rewards/margins": 0.41653138399124146,
570
+ "rewards/rejected": -1.295262098312378,
571
  "step": 300,
572
+ "use_label": 4824.02490234375
573
  },
574
  {
575
  "epoch": 0.63,
576
+ "eval_logits/chosen": 1.6598718166351318,
577
+ "eval_logits/rejected": 1.7526323795318604,
578
+ "eval_logps/chosen": -143.2136993408203,
579
+ "eval_logps/rejected": -200.36146545410156,
580
+ "eval_loss": 0.5773172974586487,
581
+ "eval_pred_label": 452.71875,
582
+ "eval_rewards/accuracies": 0.3515625,
583
+ "eval_rewards/chosen": -0.7930145263671875,
584
+ "eval_rewards/margins": 0.4386705756187439,
585
+ "eval_rewards/rejected": -1.2316851615905762,
586
+ "eval_runtime": 125.3512,
587
+ "eval_samples_per_second": 15.955,
588
  "eval_steps_per_second": 0.255,
589
+ "eval_use_label": 4991.28125,
590
  "step": 300
591
  },
592
  {
593
  "epoch": 0.65,
594
+ "grad_norm": 3.0625,
595
  "learning_rate": 1.647817538357072e-06,
596
+ "logits/chosen": 1.3793504238128662,
597
+ "logits/rejected": 1.4072078466415405,
598
+ "logps/chosen": -126.9173583984375,
599
+ "logps/rejected": -186.46255493164062,
600
+ "loss": 0.5633,
601
+ "pred_label": 494.42498779296875,
602
+ "rewards/accuracies": 0.36250001192092896,
603
+ "rewards/chosen": -0.7132076025009155,
604
+ "rewards/margins": 0.4689141809940338,
605
+ "rewards/rejected": -1.1821218729019165,
606
  "step": 310,
607
+ "use_label": 5155.5751953125
608
  },
609
  {
610
  "epoch": 0.67,
611
+ "grad_norm": 4.21875,
612
  "learning_rate": 1.4781433892011132e-06,
613
+ "logits/chosen": 1.2615296840667725,
614
+ "logits/rejected": 1.4717950820922852,
615
+ "logps/chosen": -163.67529296875,
616
+ "logps/rejected": -205.421142578125,
617
+ "loss": 0.5761,
618
+ "pred_label": 523.5250244140625,
619
+ "rewards/accuracies": 0.3812499940395355,
620
+ "rewards/chosen": -0.9060484766960144,
621
+ "rewards/margins": 0.4762052893638611,
622
+ "rewards/rejected": -1.382253885269165,
623
  "step": 320,
624
+ "use_label": 5286.47509765625
625
  },
626
  {
627
  "epoch": 0.69,
628
+ "grad_norm": 2.359375,
629
  "learning_rate": 1.3139467229135999e-06,
630
+ "logits/chosen": 1.4169238805770874,
631
+ "logits/rejected": 1.4296729564666748,
632
+ "logps/chosen": -150.2149200439453,
633
+ "logps/rejected": -186.73570251464844,
634
+ "loss": 0.5799,
635
+ "pred_label": 550.125,
636
  "rewards/accuracies": 0.33125001192092896,
637
+ "rewards/chosen": -0.8010675311088562,
638
+ "rewards/margins": 0.3802093267440796,
639
+ "rewards/rejected": -1.1812770366668701,
640
  "step": 330,
641
+ "use_label": 5419.875
642
  },
643
  {
644
  "epoch": 0.71,
645
+ "grad_norm": 3.15625,
646
  "learning_rate": 1.1561076868822756e-06,
647
+ "logits/chosen": 0.9984269142150879,
648
+ "logits/rejected": 0.9373771548271179,
649
+ "logps/chosen": -161.85842895507812,
650
+ "logps/rejected": -182.74703979492188,
651
+ "loss": 0.5933,
652
+ "pred_label": 567.2000122070312,
653
+ "rewards/accuracies": 0.32499998807907104,
654
+ "rewards/chosen": -0.771192193031311,
655
+ "rewards/margins": 0.2911759614944458,
656
+ "rewards/rejected": -1.0623681545257568,
657
  "step": 340,
658
+ "use_label": 5562.7998046875
659
  },
660
  {
661
  "epoch": 0.73,
662
+ "grad_norm": 2.59375,
663
  "learning_rate": 1.0054723495346484e-06,
664
+ "logits/chosen": 0.83796626329422,
665
+ "logits/rejected": 0.8520887494087219,
666
+ "logps/chosen": -176.03054809570312,
667
+ "logps/rejected": -217.10214233398438,
668
+ "loss": 0.5863,
669
+ "pred_label": 598.5750122070312,
670
  "rewards/accuracies": 0.36250001192092896,
671
+ "rewards/chosen": -0.86613929271698,
672
+ "rewards/margins": 0.4522012174129486,
673
+ "rewards/rejected": -1.318340539932251,
674
  "step": 350,
675
+ "use_label": 5691.4248046875
676
  },
677
  {
678
  "epoch": 0.75,
679
+ "grad_norm": 2.234375,
680
  "learning_rate": 8.628481651367876e-07,
681
+ "logits/chosen": 0.7010875940322876,
682
+ "logits/rejected": 0.8413160443305969,
683
+ "logps/chosen": -126.9655532836914,
684
+ "logps/rejected": -182.5807342529297,
685
+ "loss": 0.5885,
686
+ "pred_label": 629.0,
687
+ "rewards/accuracies": 0.39375001192092896,
688
+ "rewards/chosen": -0.6332792043685913,
689
+ "rewards/margins": 0.47024598717689514,
690
+ "rewards/rejected": -1.103525161743164,
691
  "step": 360,
692
+ "use_label": 5821.0
693
  },
694
  {
695
  "epoch": 0.77,
696
+ "grad_norm": 2.4375,
697
  "learning_rate": 7.289996455765749e-07,
698
+ "logits/chosen": 0.8454801440238953,
699
+ "logits/rejected": 0.9659041166305542,
700
+ "logps/chosen": -120.26502990722656,
701
+ "logps/rejected": -170.44923400878906,
702
+ "loss": 0.585,
703
+ "pred_label": 655.625,
704
+ "rewards/accuracies": 0.34375,
705
+ "rewards/chosen": -0.5483022928237915,
706
+ "rewards/margins": 0.4770358204841614,
707
+ "rewards/rejected": -1.0253381729125977,
708
  "step": 370,
709
+ "use_label": 5954.375
710
  },
711
  {
712
  "epoch": 0.8,
713
+ "grad_norm": 3.5625,
714
  "learning_rate": 6.046442623320145e-07,
715
+ "logits/chosen": 0.7346574664115906,
716
+ "logits/rejected": 0.7028430104255676,
717
+ "logps/chosen": -131.0785675048828,
718
+ "logps/rejected": -188.57435607910156,
719
+ "loss": 0.589,
720
+ "pred_label": 685.5250244140625,
721
  "rewards/accuracies": 0.3187499940395355,
722
+ "rewards/chosen": -0.6524317264556885,
723
+ "rewards/margins": 0.3696710765361786,
724
+ "rewards/rejected": -1.0221028327941895,
725
  "step": 380,
726
+ "use_label": 6084.47509765625
727
  },
728
  {
729
  "epoch": 0.82,
730
+ "grad_norm": 3.3125,
731
  "learning_rate": 4.904486005914027e-07,
732
+ "logits/chosen": 1.1143369674682617,
733
+ "logits/rejected": 0.8643951416015625,
734
+ "logps/chosen": -179.11276245117188,
735
+ "logps/rejected": -220.11068725585938,
736
+ "loss": 0.5727,
737
+ "pred_label": 717.625,
738
+ "rewards/accuracies": 0.375,
739
+ "rewards/chosen": -0.8629444241523743,
740
+ "rewards/margins": 0.508463442325592,
741
+ "rewards/rejected": -1.3714077472686768,
742
  "step": 390,
743
+ "use_label": 6212.375
744
  },
745
  {
746
  "epoch": 0.84,
747
+ "grad_norm": 3.25,
748
  "learning_rate": 3.8702478614051353e-07,
749
+ "logits/chosen": 0.8043449521064758,
750
+ "logits/rejected": 0.9917415380477905,
751
+ "logps/chosen": -130.07017517089844,
752
+ "logps/rejected": -163.469970703125,
753
+ "loss": 0.5836,
754
+ "pred_label": 747.5750122070312,
755
+ "rewards/accuracies": 0.375,
756
+ "rewards/chosen": -0.5757918953895569,
757
+ "rewards/margins": 0.42427974939346313,
758
+ "rewards/rejected": -1.0000715255737305,
759
  "step": 400,
760
+ "use_label": 6342.4248046875
761
  },
762
  {
763
  "epoch": 0.84,
764
+ "eval_logits/chosen": 1.75760817527771,
765
+ "eval_logits/rejected": 1.8499951362609863,
766
+ "eval_logps/chosen": -130.3719482421875,
767
+ "eval_logps/rejected": -190.7267303466797,
768
+ "eval_loss": 0.5768851041793823,
769
+ "eval_pred_label": 782.8125,
770
  "eval_rewards/accuracies": 0.37109375,
771
+ "eval_rewards/chosen": -0.6645968556404114,
772
+ "eval_rewards/margins": 0.4707409739494324,
773
+ "eval_rewards/rejected": -1.1353378295898438,
774
+ "eval_runtime": 147.391,
775
+ "eval_samples_per_second": 13.569,
776
+ "eval_steps_per_second": 0.217,
777
+ "eval_use_label": 6517.1875,
778
  "step": 400
779
  },
780
  {
781
  "epoch": 0.86,
782
+ "grad_norm": 3.390625,
783
  "learning_rate": 2.9492720416985004e-07,
784
+ "logits/chosen": 1.1002473831176758,
785
+ "logits/rejected": 1.1428117752075195,
786
+ "logps/chosen": -126.85247802734375,
787
+ "logps/rejected": -170.77365112304688,
788
+ "loss": 0.5838,
789
+ "pred_label": 822.75,
790
+ "rewards/accuracies": 0.3687500059604645,
791
+ "rewards/chosen": -0.6542948484420776,
792
+ "rewards/margins": 0.4562492370605469,
793
+ "rewards/rejected": -1.1105440855026245,
794
  "step": 410,
795
+ "use_label": 6683.25
796
  },
797
  {
798
  "epoch": 0.88,
799
+ "grad_norm": 2.609375,
800
  "learning_rate": 2.1464952759020857e-07,
801
+ "logits/chosen": 1.3246395587921143,
802
+ "logits/rejected": 1.2824434041976929,
803
+ "logps/chosen": -122.80003356933594,
804
+ "logps/rejected": -138.56423950195312,
805
+ "loss": 0.5822,
806
+ "pred_label": 846.4249877929688,
807
+ "rewards/accuracies": 0.26249998807907104,
808
+ "rewards/chosen": -0.6186091303825378,
809
+ "rewards/margins": 0.25320303440093994,
810
+ "rewards/rejected": -0.8718121647834778,
811
  "step": 420,
812
+ "use_label": 6819.5751953125
813
  },
814
  {
815
  "epoch": 0.9,
816
+ "grad_norm": 2.09375,
817
  "learning_rate": 1.4662207078575685e-07,
818
+ "logits/chosen": 1.270193099975586,
819
+ "logits/rejected": 1.253873348236084,
820
+ "logps/chosen": -171.46336364746094,
821
+ "logps/rejected": -207.75607299804688,
822
+ "loss": 0.564,
823
+ "pred_label": 873.5499877929688,
824
+ "rewards/accuracies": 0.4625000059604645,
825
+ "rewards/chosen": -0.7219651341438293,
826
+ "rewards/margins": 0.5620242357254028,
827
+ "rewards/rejected": -1.283989429473877,
828
  "step": 430,
829
+ "use_label": 6952.4501953125
830
  },
831
  {
832
  "epoch": 0.92,
833
+ "grad_norm": 2.578125,
834
  "learning_rate": 9.120948298936422e-08,
835
+ "logits/chosen": 1.221411943435669,
836
+ "logits/rejected": 1.397247552871704,
837
+ "logps/chosen": -136.4575653076172,
838
+ "logps/rejected": -193.40870666503906,
839
+ "loss": 0.5736,
840
+ "pred_label": 905.5750122070312,
841
+ "rewards/accuracies": 0.36250001192092896,
842
+ "rewards/chosen": -0.6955360770225525,
843
+ "rewards/margins": 0.4879213869571686,
844
+ "rewards/rejected": -1.183457612991333,
845
  "step": 440,
846
+ "use_label": 7080.4248046875
847
  },
848
  {
849
  "epoch": 0.94,
850
+ "grad_norm": 4.78125,
851
  "learning_rate": 4.870879364444109e-08,
852
+ "logits/chosen": 1.6054052114486694,
853
+ "logits/rejected": 1.3484258651733398,
854
+ "logps/chosen": -148.17161560058594,
855
+ "logps/rejected": -205.789306640625,
856
+ "loss": 0.583,
857
+ "pred_label": 930.4749755859375,
858
+ "rewards/accuracies": 0.35624998807907104,
859
+ "rewards/chosen": -0.7591380476951599,
860
+ "rewards/margins": 0.4158584177494049,
861
+ "rewards/rejected": -1.1749964952468872,
862
  "step": 450,
863
+ "use_label": 7215.52490234375
864
  },
865
  {
866
  "epoch": 0.96,
867
+ "grad_norm": 2.875,
868
  "learning_rate": 1.93478202307823e-08,
869
+ "logits/chosen": 1.4640157222747803,
870
+ "logits/rejected": 1.4903802871704102,
871
+ "logps/chosen": -96.6323471069336,
872
+ "logps/rejected": -150.8868865966797,
873
+ "loss": 0.5814,
874
+ "pred_label": 961.25,
875
+ "rewards/accuracies": 0.32499998807907104,
876
+ "rewards/chosen": -0.5051247477531433,
877
+ "rewards/margins": 0.3702928125858307,
878
+ "rewards/rejected": -0.8754175901412964,
879
  "step": 460,
880
+ "use_label": 7344.75
881
  },
882
  {
883
  "epoch": 0.98,
884
+ "grad_norm": 2.75,
885
  "learning_rate": 3.283947088983663e-09,
886
+ "logits/chosen": 1.464422345161438,
887
+ "logits/rejected": 1.2297132015228271,
888
+ "logps/chosen": -130.30838012695312,
889
+ "logps/rejected": -166.67605590820312,
890
+ "loss": 0.5822,
891
+ "pred_label": 982.8499755859375,
892
+ "rewards/accuracies": 0.30000001192092896,
893
+ "rewards/chosen": -0.6297920346260071,
894
+ "rewards/margins": 0.34639838337898254,
895
+ "rewards/rejected": -0.9761903882026672,
896
  "step": 470,
897
+ "use_label": 7483.14990234375
898
  },
899
  {
900
  "epoch": 1.0,
901
  "step": 477,
902
  "total_flos": 0.0,
903
+ "train_loss": 0.6110695428068533,
904
+ "train_runtime": 9999.3279,
905
+ "train_samples_per_second": 6.114,
906
+ "train_steps_per_second": 0.048
907
  }
908
  ],
909
  "logging_steps": 10,