k-r-l commited on
Commit
ad45d66
verified
1 Parent(s): bfb8de2

Training in progress, step 12, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3b1de24cbfa9f4cd9b6b61603c5c430d66f5117b0066916e6a94348617ac0ab3
3
  size 83945296
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d49859a687f5bcf62f01bc8c9be1882fef9ff208931820730895575f61667169
3
  size 83945296
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0b68b8c960390833091b8923f7bf648b58f1b9f278a335e629b104d215fc140a
3
  size 42545748
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:29c35731a9801a0d6782d07ac7c7beaaec9504135a62fbfbd41a6f5842ef98a5
3
  size 42545748
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6a3d18e607a27cfab9cec7cfaa7384cdb877a2330c4bcd4e1efcae25be9908cb
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f6dc9dacb9cf3beacf8a1b58112bb95fab90581585484c32e86dfb3d4ea057b6
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.163109270468302,
5
  "eval_steps": 500,
6
- "global_step": 8,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -151,6 +151,78 @@
151
  "rewards/margins": 0.0022785186301916838,
152
  "rewards/rejected": -0.3476927876472473,
153
  "step": 8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
154
  }
155
  ],
156
  "logging_steps": 1,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.244663905702453,
5
  "eval_steps": 500,
6
+ "global_step": 12,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
151
  "rewards/margins": 0.0022785186301916838,
152
  "rewards/rejected": -0.3476927876472473,
153
  "step": 8
154
+ },
155
+ {
156
+ "epoch": 0.18349792927683975,
157
+ "grad_norm": 6.6126298904418945,
158
+ "learning_rate": 9e-05,
159
+ "log_odds_chosen": 0.16790322959423065,
160
+ "log_odds_ratio": -0.6347489356994629,
161
+ "logits/chosen": -2.5679516792297363,
162
+ "logits/rejected": -2.5715692043304443,
163
+ "logps/chosen": -2.9835710525512695,
164
+ "logps/rejected": -3.140634059906006,
165
+ "loss": 3.2274,
166
+ "nll_loss": 3.163942813873291,
167
+ "rewards/accuracies": 0.625,
168
+ "rewards/chosen": -0.2983570992946625,
169
+ "rewards/margins": 0.015706289559602737,
170
+ "rewards/rejected": -0.3140634000301361,
171
+ "step": 9
172
+ },
173
+ {
174
+ "epoch": 0.20388658808537752,
175
+ "grad_norm": 6.9558281898498535,
176
+ "learning_rate": 0.0001,
177
+ "log_odds_chosen": 0.17972886562347412,
178
+ "log_odds_ratio": -0.6277650594711304,
179
+ "logits/chosen": -2.6125497817993164,
180
+ "logits/rejected": -2.6118133068084717,
181
+ "logps/chosen": -2.6501834392547607,
182
+ "logps/rejected": -2.8179259300231934,
183
+ "loss": 2.8684,
184
+ "nll_loss": 2.8056435585021973,
185
+ "rewards/accuracies": 0.703125,
186
+ "rewards/chosen": -0.26501837372779846,
187
+ "rewards/margins": 0.01677425391972065,
188
+ "rewards/rejected": -0.2817925810813904,
189
+ "step": 10
190
+ },
191
+ {
192
+ "epoch": 0.22427524689391526,
193
+ "grad_norm": 7.142885684967041,
194
+ "learning_rate": 9.743589743589744e-05,
195
+ "log_odds_chosen": 0.11056404560804367,
196
+ "log_odds_ratio": -0.662803053855896,
197
+ "logits/chosen": -2.665982723236084,
198
+ "logits/rejected": -2.6722326278686523,
199
+ "logps/chosen": -2.1527411937713623,
200
+ "logps/rejected": -2.2486300468444824,
201
+ "loss": 2.383,
202
+ "nll_loss": 2.316676378250122,
203
+ "rewards/accuracies": 0.625,
204
+ "rewards/chosen": -0.2152741551399231,
205
+ "rewards/margins": 0.009588859975337982,
206
+ "rewards/rejected": -0.2248629927635193,
207
+ "step": 11
208
+ },
209
+ {
210
+ "epoch": 0.244663905702453,
211
+ "grad_norm": 4.808487415313721,
212
+ "learning_rate": 9.487179487179487e-05,
213
+ "log_odds_chosen": 0.09381386637687683,
214
+ "log_odds_ratio": -0.6690701842308044,
215
+ "logits/chosen": -2.6697304248809814,
216
+ "logits/rejected": -2.684809446334839,
217
+ "logps/chosen": -1.6216576099395752,
218
+ "logps/rejected": -1.695598840713501,
219
+ "loss": 1.9113,
220
+ "nll_loss": 1.8443692922592163,
221
+ "rewards/accuracies": 0.53125,
222
+ "rewards/chosen": -0.16216576099395752,
223
+ "rewards/margins": 0.007394128944724798,
224
+ "rewards/rejected": -0.16955989599227905,
225
+ "step": 12
226
  }
227
  ],
228
  "logging_steps": 1,