ardaspear commited on
Commit
5fd6a36
·
verified ·
1 Parent(s): 8b4cb4d

Training in progress, step 50, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7ab7cafe1196ec878b8d3d2a3fc6884678d599ff025eaf88982d550d3f25d645
3
  size 58680
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c94fc6653a6657011149912d979ea84e5d764001724e6c10946162dfa0bb0d5d
3
  size 58680
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:546e9ffa163a869b7fbcf985cc08dc91fa5c651df09b0cd03cc29c573b6d749a
3
  size 127270
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ef74568d1c054685626697a586f47b5fbb2cd890f27cd1bc19f7016bf19783e
3
  size 127270
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1f6aaadfc3a2995ea3bff4c91194faec2c49a54ff830cb39f07c142f67055ad8
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2d1201220196b0eea01cc2e9383598f0df0fda082156bad64ee5af51ec629e09
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c3da5b3ae9debb9fbcab0805794dfaab406f2d5635e0407c25e32749f5f7b455
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e69e2b49ea642509f0c688c16fb190b7cf27dac0a18903a5e2d1467d0343d8b8
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.0073370562100028534,
5
  "eval_steps": 5,
6
- "global_step": 45,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -402,6 +402,49 @@
402
  "eval_samples_per_second": 141.981,
403
  "eval_steps_per_second": 71.018,
404
  "step": 45
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
405
  }
406
  ],
407
  "logging_steps": 1,
@@ -416,12 +459,12 @@
416
  "should_evaluate": false,
417
  "should_log": false,
418
  "should_save": true,
419
- "should_training_stop": false
420
  },
421
  "attributes": {}
422
  }
423
  },
424
- "total_flos": 9300344832000.0,
425
  "train_batch_size": 2,
426
  "trial_name": null,
427
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.008152284677780949,
5
  "eval_steps": 5,
6
+ "global_step": 50,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
402
  "eval_samples_per_second": 141.981,
403
  "eval_steps_per_second": 71.018,
404
  "step": 45
405
+ },
406
+ {
407
+ "epoch": 0.007500101903558472,
408
+ "grad_norm": 0.8296800255775452,
409
+ "learning_rate": 4.8943483704846475e-06,
410
+ "loss": 41.4416,
411
+ "step": 46
412
+ },
413
+ {
414
+ "epoch": 0.007663147597114091,
415
+ "grad_norm": 1.1589857339859009,
416
+ "learning_rate": 2.7630079602323442e-06,
417
+ "loss": 41.4379,
418
+ "step": 47
419
+ },
420
+ {
421
+ "epoch": 0.007826193290669711,
422
+ "grad_norm": 0.8042440414428711,
423
+ "learning_rate": 1.231165940486234e-06,
424
+ "loss": 41.4556,
425
+ "step": 48
426
+ },
427
+ {
428
+ "epoch": 0.007989238984225329,
429
+ "grad_norm": 0.7898399233818054,
430
+ "learning_rate": 3.0826662668720364e-07,
431
+ "loss": 41.4265,
432
+ "step": 49
433
+ },
434
+ {
435
+ "epoch": 0.008152284677780949,
436
+ "grad_norm": 0.9658806920051575,
437
+ "learning_rate": 0.0,
438
+ "loss": 41.4237,
439
+ "step": 50
440
+ },
441
+ {
442
+ "epoch": 0.008152284677780949,
443
+ "eval_loss": 10.35954475402832,
444
+ "eval_runtime": 18.3291,
445
+ "eval_samples_per_second": 140.923,
446
+ "eval_steps_per_second": 70.489,
447
+ "step": 50
448
  }
449
  ],
450
  "logging_steps": 1,
 
459
  "should_evaluate": false,
460
  "should_log": false,
461
  "should_save": true,
462
+ "should_training_stop": true
463
  },
464
  "attributes": {}
465
  }
466
  },
467
+ "total_flos": 10333716480000.0,
468
  "train_batch_size": 2,
469
  "trial_name": null,
470
  "trial_params": null