DeepDream2045 commited on
Commit
de9212c
·
verified ·
1 Parent(s): c7d051f

Training in progress, step 50, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:147088364087c3fad831775bc4469f46b950d182162ba02099669db1fe55f4af
3
  size 113864
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4aace6758aa333819f1d4668b2d13633c6c2718433ec6dbc84c79e9b4712b6ea
3
  size 113864
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4ceba59b68425b47fb51eefdfe5c5aa4595e5a3557307d13e74b2314aff76372
3
  size 244554
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0a3dd285134fb12f75c1b167173fe8cf7c2a29221bbb324e8cf314dfe19f52e0
3
  size 244554
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f9cd81c56c60d7508b1b3f30d02a538a526d8fbeceb1c089c3417001c05ccc1d
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e09cc71e727f796256be622d90a4d3d7f00f1b4922fdd76300ccc5819359e922
3
  size 14960
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:430a0fac64bfcd46ac91ef4a7b278a6834898a13d105fde4623d6c1e2515fa17
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e27910f8d144b5cddf12f704ada3c8509971bf1c6aaf0d219a83b4cdc30c4841
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f86b25789c082b2fd448f1034ad4a179d965deca6e8b4c22aa5bbe3df85d6bd6
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e1ccdb4caecd9cae2621f3f792975b146d7ecc5d6593b1955a0d774c67bdb21b
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f589e486dc0a192b5a542614dbf6b8b5f161b4f5ea456e379174bcdfd96cb9b0
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:53635b930d2269330cdda6f49cd5c3974b1acf8ffb2673e19621d554c8c3b59c
3
  size 15024
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f37b2aa490ccb1598b01e14cda36e9081f7ce646deab4d3c2d03de0d2169a755
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b1df0528620c07325b8faa7567e59b0c1e86a1f1ee6af1245a69c6c0463fe4e2
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 10.364595413208008,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-25",
4
- "epoch": 0.04056795131845842,
5
  "eval_steps": 25,
6
- "global_step": 25,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -198,6 +198,189 @@
198
  "eval_samples_per_second": 617.126,
199
  "eval_steps_per_second": 77.141,
200
  "step": 25
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
201
  }
202
  ],
203
  "logging_steps": 1,
@@ -221,12 +404,12 @@
221
  "should_evaluate": false,
222
  "should_log": false,
223
  "should_save": true,
224
- "should_training_stop": false
225
  },
226
  "attributes": {}
227
  }
228
  },
229
- "total_flos": 41877189427200.0,
230
  "train_batch_size": 2,
231
  "trial_name": null,
232
  "trial_params": null
 
1
  {
2
+ "best_metric": 10.35922622680664,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-50",
4
+ "epoch": 0.08113590263691683,
5
  "eval_steps": 25,
6
+ "global_step": 50,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
198
  "eval_samples_per_second": 617.126,
199
  "eval_steps_per_second": 77.141,
200
  "step": 25
201
+ },
202
+ {
203
+ "epoch": 0.042190669371196754,
204
+ "grad_norm": 3.5818777084350586,
205
+ "learning_rate": 5e-05,
206
+ "loss": 10.3649,
207
+ "step": 26
208
+ },
209
+ {
210
+ "epoch": 0.04381338742393509,
211
+ "grad_norm": 3.4645745754241943,
212
+ "learning_rate": 4.6729843538492847e-05,
213
+ "loss": 10.364,
214
+ "step": 27
215
+ },
216
+ {
217
+ "epoch": 0.04543610547667343,
218
+ "grad_norm": 3.6278162002563477,
219
+ "learning_rate": 4.347369038899744e-05,
220
+ "loss": 10.3643,
221
+ "step": 28
222
+ },
223
+ {
224
+ "epoch": 0.047058823529411764,
225
+ "grad_norm": 3.793539524078369,
226
+ "learning_rate": 4.0245483899193595e-05,
227
+ "loss": 10.3632,
228
+ "step": 29
229
+ },
230
+ {
231
+ "epoch": 0.0486815415821501,
232
+ "grad_norm": 3.703801393508911,
233
+ "learning_rate": 3.705904774487396e-05,
234
+ "loss": 10.3633,
235
+ "step": 30
236
+ },
237
+ {
238
+ "epoch": 0.05030425963488844,
239
+ "grad_norm": 3.6949877738952637,
240
+ "learning_rate": 3.392802673484193e-05,
241
+ "loss": 10.3622,
242
+ "step": 31
243
+ },
244
+ {
245
+ "epoch": 0.051926977687626774,
246
+ "grad_norm": 3.5052080154418945,
247
+ "learning_rate": 3.086582838174551e-05,
248
+ "loss": 10.3632,
249
+ "step": 32
250
+ },
251
+ {
252
+ "epoch": 0.05354969574036511,
253
+ "grad_norm": 3.9035098552703857,
254
+ "learning_rate": 2.7885565489049946e-05,
255
+ "loss": 10.3618,
256
+ "step": 33
257
+ },
258
+ {
259
+ "epoch": 0.05517241379310345,
260
+ "grad_norm": 3.7165908813476562,
261
+ "learning_rate": 2.500000000000001e-05,
262
+ "loss": 10.3643,
263
+ "step": 34
264
+ },
265
+ {
266
+ "epoch": 0.056795131845841784,
267
+ "grad_norm": 3.94091534614563,
268
+ "learning_rate": 2.2221488349019903e-05,
269
+ "loss": 10.3604,
270
+ "step": 35
271
+ },
272
+ {
273
+ "epoch": 0.05841784989858012,
274
+ "grad_norm": 3.948307752609253,
275
+ "learning_rate": 1.9561928549563968e-05,
276
+ "loss": 10.3584,
277
+ "step": 36
278
+ },
279
+ {
280
+ "epoch": 0.06004056795131846,
281
+ "grad_norm": 4.001682758331299,
282
+ "learning_rate": 1.703270924499656e-05,
283
+ "loss": 10.361,
284
+ "step": 37
285
+ },
286
+ {
287
+ "epoch": 0.061663286004056794,
288
+ "grad_norm": 4.184301853179932,
289
+ "learning_rate": 1.4644660940672627e-05,
290
+ "loss": 10.3577,
291
+ "step": 38
292
+ },
293
+ {
294
+ "epoch": 0.06328600405679513,
295
+ "grad_norm": 4.08730411529541,
296
+ "learning_rate": 1.2408009626051137e-05,
297
+ "loss": 10.3604,
298
+ "step": 39
299
+ },
300
+ {
301
+ "epoch": 0.06490872210953347,
302
+ "grad_norm": 3.9339916706085205,
303
+ "learning_rate": 1.0332332985438248e-05,
304
+ "loss": 10.3606,
305
+ "step": 40
306
+ },
307
+ {
308
+ "epoch": 0.0665314401622718,
309
+ "grad_norm": 4.170976161956787,
310
+ "learning_rate": 8.426519384872733e-06,
311
+ "loss": 10.3583,
312
+ "step": 41
313
+ },
314
+ {
315
+ "epoch": 0.06815415821501014,
316
+ "grad_norm": 4.157967567443848,
317
+ "learning_rate": 6.698729810778065e-06,
318
+ "loss": 10.3596,
319
+ "step": 42
320
+ },
321
+ {
322
+ "epoch": 0.06977687626774848,
323
+ "grad_norm": 3.94195556640625,
324
+ "learning_rate": 5.156362923365588e-06,
325
+ "loss": 10.361,
326
+ "step": 43
327
+ },
328
+ {
329
+ "epoch": 0.07139959432048681,
330
+ "grad_norm": 3.8017725944519043,
331
+ "learning_rate": 3.8060233744356633e-06,
332
+ "loss": 10.3617,
333
+ "step": 44
334
+ },
335
+ {
336
+ "epoch": 0.07302231237322515,
337
+ "grad_norm": 4.163522720336914,
338
+ "learning_rate": 2.653493525244721e-06,
339
+ "loss": 10.361,
340
+ "step": 45
341
+ },
342
+ {
343
+ "epoch": 0.07464503042596349,
344
+ "grad_norm": 3.995657444000244,
345
+ "learning_rate": 1.70370868554659e-06,
346
+ "loss": 10.3593,
347
+ "step": 46
348
+ },
349
+ {
350
+ "epoch": 0.07626774847870182,
351
+ "grad_norm": 4.11703634262085,
352
+ "learning_rate": 9.607359798384785e-07,
353
+ "loss": 10.3603,
354
+ "step": 47
355
+ },
356
+ {
357
+ "epoch": 0.07789046653144016,
358
+ "grad_norm": 4.220155715942383,
359
+ "learning_rate": 4.277569313094809e-07,
360
+ "loss": 10.3574,
361
+ "step": 48
362
+ },
363
+ {
364
+ "epoch": 0.0795131845841785,
365
+ "grad_norm": 4.050826549530029,
366
+ "learning_rate": 1.0705383806982606e-07,
367
+ "loss": 10.3566,
368
+ "step": 49
369
+ },
370
+ {
371
+ "epoch": 0.08113590263691683,
372
+ "grad_norm": 4.6649274826049805,
373
+ "learning_rate": 0.0,
374
+ "loss": 10.3551,
375
+ "step": 50
376
+ },
377
+ {
378
+ "epoch": 0.08113590263691683,
379
+ "eval_loss": 10.35922622680664,
380
+ "eval_runtime": 3.6442,
381
+ "eval_samples_per_second": 1139.359,
382
+ "eval_steps_per_second": 142.42,
383
+ "step": 50
384
  }
385
  ],
386
  "logging_steps": 1,
 
404
  "should_evaluate": false,
405
  "should_log": false,
406
  "should_save": true,
407
+ "should_training_stop": true
408
  },
409
  "attributes": {}
410
  }
411
  },
412
+ "total_flos": 83754378854400.0,
413
  "train_batch_size": 2,
414
  "trial_name": null,
415
  "trial_params": null