dada22231 commited on
Commit
97aa5bd
·
verified ·
1 Parent(s): 75f39ed

Training in progress, step 50, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2259d59255d1fdc1675c0b0524d722cd97fc01ba7beab4c7c6d543525a0cb704
3
  size 323014168
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d6239c7e17da49c894b6f2677ee4ab56f59482de745f43820e0c683a1e16fb9
3
  size 323014168
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c047fddd711af36e1b3aa40f7b4ed12c55fd55e49eeaddc0302fbf033428b3cf
3
  size 646253418
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:73887fccc0e79f65dbc64a0060a90d28f27fdff3c36d6833e25595fdc9b6e1f9
3
  size 646253418
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3fbe7b0e2c56dd6a53333476c13f95c955977e2b58c3d045ab5a67beefef3a75
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc5e23a004b9780c6ae9fa9f4fbdcb5420e59fbb989bbee335e6208e9cac1d22
3
  size 15024
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5b60df96473e6ef83c0bd6e16377bfd2fdfeb6e5df729f23b06aaef5aec0c355
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:055f375cd18ca5ae032eea15fdb1794195933f706d8341b766779e9bc44c6dc8
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d10c01c66b001fcc8d24d40c093888a7ea2dcb0a20768149a5f765080a15d2fe
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4c4a857d6368ea00314aff3a472dff2412db2f8a467f06d999f0fcf84e4bafdd
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a5ec39118955f17c904b7c8e051801b926b756ca7f61042cce5e5a8c5942d61d
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:02942c51dd86f72a9ed350b3926c55e9c9d59d1fa861ba343d8b37eabab10a65
3
  size 15024
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:df0d85110df25b13ee5d42e45cc81a4d1d50e1fb0599366d48777f04979e6c88
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e13907c7b4b4002600f5c88eb70a7e187a99ef52464c3c8ca52c5bbdcac0bd3d
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.907273530960083,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-25",
4
- "epoch": 1.2066365007541477,
5
  "eval_steps": 25,
6
- "global_step": 25,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -198,6 +198,189 @@
198
  "eval_samples_per_second": 15.455,
199
  "eval_steps_per_second": 4.018,
200
  "step": 25
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
201
  }
202
  ],
203
  "logging_steps": 1,
@@ -226,7 +409,7 @@
226
  "attributes": {}
227
  }
228
  },
229
- "total_flos": 2.812028905324544e+17,
230
  "train_batch_size": 1,
231
  "trial_name": null,
232
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.8835566639900208,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-50",
4
+ "epoch": 2.4132730015082955,
5
  "eval_steps": 25,
6
+ "global_step": 50,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
198
  "eval_samples_per_second": 15.455,
199
  "eval_steps_per_second": 4.018,
200
  "step": 25
201
+ },
202
+ {
203
+ "epoch": 1.2549019607843137,
204
+ "grad_norm": 0.5164334177970886,
205
+ "learning_rate": 6.978440718598757e-05,
206
+ "loss": 0.9255,
207
+ "step": 26
208
+ },
209
+ {
210
+ "epoch": 1.3031674208144797,
211
+ "grad_norm": 0.3558405339717865,
212
+ "learning_rate": 6.757685356832243e-05,
213
+ "loss": 0.9118,
214
+ "step": 27
215
+ },
216
+ {
217
+ "epoch": 1.3514328808446456,
218
+ "grad_norm": 0.3763732314109802,
219
+ "learning_rate": 6.533594839593081e-05,
220
+ "loss": 0.9167,
221
+ "step": 28
222
+ },
223
+ {
224
+ "epoch": 1.3996983408748114,
225
+ "grad_norm": 0.41895684599876404,
226
+ "learning_rate": 6.306763414648311e-05,
227
+ "loss": 0.8792,
228
+ "step": 29
229
+ },
230
+ {
231
+ "epoch": 1.4479638009049773,
232
+ "grad_norm": 0.4729040861129761,
233
+ "learning_rate": 6.07779259815948e-05,
234
+ "loss": 0.8689,
235
+ "step": 30
236
+ },
237
+ {
238
+ "epoch": 1.4962292609351433,
239
+ "grad_norm": 0.4113113284111023,
240
+ "learning_rate": 5.84728957956991e-05,
241
+ "loss": 0.8323,
242
+ "step": 31
243
+ },
244
+ {
245
+ "epoch": 1.544494720965309,
246
+ "grad_norm": 0.38738012313842773,
247
+ "learning_rate": 5.61586561144745e-05,
248
+ "loss": 0.9668,
249
+ "step": 32
250
+ },
251
+ {
252
+ "epoch": 1.5927601809954752,
253
+ "grad_norm": 0.4025406241416931,
254
+ "learning_rate": 5.384134388552552e-05,
255
+ "loss": 0.9097,
256
+ "step": 33
257
+ },
258
+ {
259
+ "epoch": 1.641025641025641,
260
+ "grad_norm": 0.4479067027568817,
261
+ "learning_rate": 5.152710420430091e-05,
262
+ "loss": 0.9427,
263
+ "step": 34
264
+ },
265
+ {
266
+ "epoch": 1.689291101055807,
267
+ "grad_norm": 0.44481536746025085,
268
+ "learning_rate": 4.9222074018405206e-05,
269
+ "loss": 0.8624,
270
+ "step": 35
271
+ },
272
+ {
273
+ "epoch": 1.737556561085973,
274
+ "grad_norm": 0.46713030338287354,
275
+ "learning_rate": 4.693236585351691e-05,
276
+ "loss": 0.8882,
277
+ "step": 36
278
+ },
279
+ {
280
+ "epoch": 1.7858220211161386,
281
+ "grad_norm": 0.3744671642780304,
282
+ "learning_rate": 4.4664051604069214e-05,
283
+ "loss": 0.9416,
284
+ "step": 37
285
+ },
286
+ {
287
+ "epoch": 1.8340874811463048,
288
+ "grad_norm": 0.44846299290657043,
289
+ "learning_rate": 4.2423146431677585e-05,
290
+ "loss": 1.0138,
291
+ "step": 38
292
+ },
293
+ {
294
+ "epoch": 1.8823529411764706,
295
+ "grad_norm": 0.4410933256149292,
296
+ "learning_rate": 4.021559281401244e-05,
297
+ "loss": 0.8851,
298
+ "step": 39
299
+ },
300
+ {
301
+ "epoch": 1.9306184012066365,
302
+ "grad_norm": 0.47139236330986023,
303
+ "learning_rate": 3.804724478641667e-05,
304
+ "loss": 0.8259,
305
+ "step": 40
306
+ },
307
+ {
308
+ "epoch": 1.9788838612368025,
309
+ "grad_norm": 0.5577002167701721,
310
+ "learning_rate": 3.592385241805628e-05,
311
+ "loss": 1.0754,
312
+ "step": 41
313
+ },
314
+ {
315
+ "epoch": 2.0271493212669682,
316
+ "grad_norm": 0.4690263569355011,
317
+ "learning_rate": 3.385104656377062e-05,
318
+ "loss": 0.9935,
319
+ "step": 42
320
+ },
321
+ {
322
+ "epoch": 2.0754147812971344,
323
+ "grad_norm": 0.38023629784584045,
324
+ "learning_rate": 3.183432393205763e-05,
325
+ "loss": 0.8952,
326
+ "step": 43
327
+ },
328
+ {
329
+ "epoch": 2.1236802413273,
330
+ "grad_norm": 0.41368529200553894,
331
+ "learning_rate": 2.9879032508791093e-05,
332
+ "loss": 0.9206,
333
+ "step": 44
334
+ },
335
+ {
336
+ "epoch": 2.171945701357466,
337
+ "grad_norm": 0.4637509882450104,
338
+ "learning_rate": 2.799035737532344e-05,
339
+ "loss": 0.826,
340
+ "step": 45
341
+ },
342
+ {
343
+ "epoch": 2.220211161387632,
344
+ "grad_norm": 0.510759711265564,
345
+ "learning_rate": 2.6173306958582123e-05,
346
+ "loss": 0.8312,
347
+ "step": 46
348
+ },
349
+ {
350
+ "epoch": 2.268476621417798,
351
+ "grad_norm": 0.4286244511604309,
352
+ "learning_rate": 2.443269974962181e-05,
353
+ "loss": 0.7866,
354
+ "step": 47
355
+ },
356
+ {
357
+ "epoch": 2.3167420814479636,
358
+ "grad_norm": 0.37086257338523865,
359
+ "learning_rate": 2.277315152585231e-05,
360
+ "loss": 0.7716,
361
+ "step": 48
362
+ },
363
+ {
364
+ "epoch": 2.3650075414781297,
365
+ "grad_norm": 0.415539026260376,
366
+ "learning_rate": 2.1199063110826618e-05,
367
+ "loss": 0.7904,
368
+ "step": 49
369
+ },
370
+ {
371
+ "epoch": 2.4132730015082955,
372
+ "grad_norm": 0.47227543592453003,
373
+ "learning_rate": 1.9714608704048037e-05,
374
+ "loss": 0.7771,
375
+ "step": 50
376
+ },
377
+ {
378
+ "epoch": 2.4132730015082955,
379
+ "eval_loss": 0.8835566639900208,
380
+ "eval_runtime": 3.2406,
381
+ "eval_samples_per_second": 15.429,
382
+ "eval_steps_per_second": 4.012,
383
+ "step": 50
384
  }
385
  ],
386
  "logging_steps": 1,
 
409
  "attributes": {}
410
  }
411
  },
412
+ "total_flos": 5.624057810649088e+17,
413
  "train_batch_size": 1,
414
  "trial_name": null,
415
  "trial_params": null