diaenra commited on
Commit
bf6323d
·
verified ·
1 Parent(s): dbbcaf2

Training in progress, step 59808, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5bd47bd97c3be53d8c53262c25bb1d1459da7779b48ee7dabe13d9939834d3f4
3
  size 1484196216
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fbb44ad69600872ef0bb443d3b43f12a5f06e90608b73f03ff4028fa72c313ed
3
  size 1484196216
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e2271c4f336470e82e9f78edd39a4c7d5b223f2b5eb75c296d4ab288085f9335
3
  size 2968683840
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:af01e6e7f36514043e3ce67e600b1d8d265d938af879be15564a49cf1e029d6f
3
  size 2968683840
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fffaaeb63cee162b502936a8086aa019d07c502f0d331d152aac98a58f846ddf
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae79343f53c51d705aaa2e789fb3a4d88d0a467b2f986398151d95d4faac2c55
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:67fb714e7769791dd1cf39d1f0bc2dae6d2eef2bc4029c1bc0bca242452dd77c
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:023b85de6d1b039ecc813c0216bc60820d589f64ee137f692f288c5c4fc4729e
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.9970281013705846,
5
  "eval_steps": 500,
6
- "global_step": 59750,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -418273,6 +418273,412 @@
418273
  "learning_rate": 2.3282539897695464e-10,
418274
  "loss": 1.4932,
418275
  "step": 59750
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
418276
  }
418277
  ],
418278
  "logging_steps": 1,
@@ -418287,12 +418693,12 @@
418287
  "should_evaluate": false,
418288
  "should_log": false,
418289
  "should_save": true,
418290
- "should_training_stop": false
418291
  },
418292
  "attributes": {}
418293
  }
418294
  },
418295
- "total_flos": 5.399461518976745e+18,
418296
  "train_batch_size": 2,
418297
  "trial_name": null,
418298
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 2.9999373017166793,
5
  "eval_steps": 500,
6
+ "global_step": 59808,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
418273
  "learning_rate": 2.3282539897695464e-10,
418274
  "loss": 1.4932,
418275
  "step": 59750
418276
+ },
418277
+ {
418278
+ "epoch": 2.997078259997241,
418279
+ "grad_norm": 2.9519903659820557,
418280
+ "learning_rate": 2.2486615378625176e-10,
418281
+ "loss": 1.1974,
418282
+ "step": 59751
418283
+ },
418284
+ {
418285
+ "epoch": 2.9971284186238982,
418286
+ "grad_norm": 2.8692264556884766,
418287
+ "learning_rate": 2.1704532984223592e-10,
418288
+ "loss": 1.525,
418289
+ "step": 59752
418290
+ },
418291
+ {
418292
+ "epoch": 2.997178577250555,
418293
+ "grad_norm": 2.799671173095703,
418294
+ "learning_rate": 2.0936292716711158e-10,
418295
+ "loss": 1.2972,
418296
+ "step": 59753
418297
+ },
418298
+ {
418299
+ "epoch": 2.997228735877212,
418300
+ "grad_norm": 3.265291690826416,
418301
+ "learning_rate": 2.0181894577198103e-10,
418302
+ "loss": 1.5709,
418303
+ "step": 59754
418304
+ },
418305
+ {
418306
+ "epoch": 2.9972788945038684,
418307
+ "grad_norm": 2.9593746662139893,
418308
+ "learning_rate": 1.944133856901509e-10,
418309
+ "loss": 1.4483,
418310
+ "step": 59755
418311
+ },
418312
+ {
418313
+ "epoch": 2.9973290531305254,
418314
+ "grad_norm": 2.9063448905944824,
418315
+ "learning_rate": 1.8714624693272342e-10,
418316
+ "loss": 1.113,
418317
+ "step": 59756
418318
+ },
418319
+ {
418320
+ "epoch": 2.997379211757182,
418321
+ "grad_norm": 2.510237693786621,
418322
+ "learning_rate": 1.800175295274542e-10,
418323
+ "loss": 0.9191,
418324
+ "step": 59757
418325
+ },
418326
+ {
418327
+ "epoch": 2.997429370383839,
418328
+ "grad_norm": 3.348395824432373,
418329
+ "learning_rate": 1.730272334909966e-10,
418330
+ "loss": 1.4245,
418331
+ "step": 59758
418332
+ },
418333
+ {
418334
+ "epoch": 2.9974795290104956,
418335
+ "grad_norm": 3.254422426223755,
418336
+ "learning_rate": 1.661753588400039e-10,
418337
+ "loss": 1.3145,
418338
+ "step": 59759
418339
+ },
418340
+ {
418341
+ "epoch": 2.9975296876371527,
418342
+ "grad_norm": 3.8515803813934326,
418343
+ "learning_rate": 1.5946190559668063e-10,
418344
+ "loss": 1.3135,
418345
+ "step": 59760
418346
+ },
418347
+ {
418348
+ "epoch": 2.9975798462638092,
418349
+ "grad_norm": 3.0799078941345215,
418350
+ "learning_rate": 1.5288687377768008e-10,
418351
+ "loss": 1.2047,
418352
+ "step": 59761
418353
+ },
418354
+ {
418355
+ "epoch": 2.997630004890466,
418356
+ "grad_norm": 3.328263998031616,
418357
+ "learning_rate": 1.4645026339965562e-10,
418358
+ "loss": 1.4763,
418359
+ "step": 59762
418360
+ },
418361
+ {
418362
+ "epoch": 2.997680163517123,
418363
+ "grad_norm": 3.2846860885620117,
418364
+ "learning_rate": 1.4015207448481172e-10,
418365
+ "loss": 1.349,
418366
+ "step": 59763
418367
+ },
418368
+ {
418369
+ "epoch": 2.99773032214378,
418370
+ "grad_norm": 3.0045347213745117,
418371
+ "learning_rate": 1.339923070498017e-10,
418372
+ "loss": 1.3809,
418373
+ "step": 59764
418374
+ },
418375
+ {
418376
+ "epoch": 2.9977804807704365,
418377
+ "grad_norm": 3.032623052597046,
418378
+ "learning_rate": 1.2797096111127893e-10,
418379
+ "loss": 1.155,
418380
+ "step": 59765
418381
+ },
418382
+ {
418383
+ "epoch": 2.997830639397093,
418384
+ "grad_norm": 3.425898313522339,
418385
+ "learning_rate": 1.2208803668034563e-10,
418386
+ "loss": 1.4988,
418387
+ "step": 59766
418388
+ },
418389
+ {
418390
+ "epoch": 2.99788079802375,
418391
+ "grad_norm": 3.9673779010772705,
418392
+ "learning_rate": 1.1634353377920626e-10,
418393
+ "loss": 1.5579,
418394
+ "step": 59767
418395
+ },
418396
+ {
418397
+ "epoch": 2.997930956650407,
418398
+ "grad_norm": 3.9234225749969482,
418399
+ "learning_rate": 1.1073745242451417e-10,
418400
+ "loss": 1.2646,
418401
+ "step": 59768
418402
+ },
418403
+ {
418404
+ "epoch": 2.9979811152770637,
418405
+ "grad_norm": 4.005583763122559,
418406
+ "learning_rate": 1.052697926329227e-10,
418407
+ "loss": 1.1601,
418408
+ "step": 59769
418409
+ },
418410
+ {
418411
+ "epoch": 2.9980312739037203,
418412
+ "grad_norm": 4.037759304046631,
418413
+ "learning_rate": 9.994055440998296e-11,
418414
+ "loss": 1.0708,
418415
+ "step": 59770
418416
+ },
418417
+ {
418418
+ "epoch": 2.9980814325303773,
418419
+ "grad_norm": 4.791607856750488,
418420
+ "learning_rate": 9.474973778345053e-11,
418421
+ "loss": 1.1979,
418422
+ "step": 59771
418423
+ },
418424
+ {
418425
+ "epoch": 2.9981315911570343,
418426
+ "grad_norm": 5.135807514190674,
418427
+ "learning_rate": 8.969734275332542e-11,
418428
+ "loss": 1.0534,
418429
+ "step": 59772
418430
+ },
418431
+ {
418432
+ "epoch": 2.998181749783691,
418433
+ "grad_norm": 3.4575726985931396,
418434
+ "learning_rate": 8.478336934736319e-11,
418435
+ "loss": 1.4946,
418436
+ "step": 59773
418437
+ },
418438
+ {
418439
+ "epoch": 2.9982319084103475,
418440
+ "grad_norm": 1.5949138402938843,
418441
+ "learning_rate": 8.000781757111498e-11,
418442
+ "loss": 1.1375,
418443
+ "step": 59774
418444
+ },
418445
+ {
418446
+ "epoch": 2.9982820670370045,
418447
+ "grad_norm": 2.864654779434204,
418448
+ "learning_rate": 7.53706874412341e-11,
418449
+ "loss": 1.275,
418450
+ "step": 59775
418451
+ },
418452
+ {
418453
+ "epoch": 2.9983322256636615,
418454
+ "grad_norm": 2.6163508892059326,
418455
+ "learning_rate": 7.087197896882281e-11,
418456
+ "loss": 1.0021,
418457
+ "step": 59776
418458
+ },
418459
+ {
418460
+ "epoch": 2.998382384290318,
418461
+ "grad_norm": 3.790781259536743,
418462
+ "learning_rate": 6.651169217053443e-11,
418463
+ "loss": 1.2672,
418464
+ "step": 59777
418465
+ },
418466
+ {
418467
+ "epoch": 2.9984325429169747,
418468
+ "grad_norm": 2.571331262588501,
418469
+ "learning_rate": 6.228982704636899e-11,
418470
+ "loss": 0.5672,
418471
+ "step": 59778
418472
+ },
418473
+ {
418474
+ "epoch": 2.9984827015436317,
418475
+ "grad_norm": 3.5116817951202393,
418476
+ "learning_rate": 5.820638362408204e-11,
418477
+ "loss": 2.0333,
418478
+ "step": 59779
418479
+ },
418480
+ {
418481
+ "epoch": 2.9985328601702887,
418482
+ "grad_norm": 3.2379603385925293,
418483
+ "learning_rate": 5.426136190367359e-11,
418484
+ "loss": 1.7546,
418485
+ "step": 59780
418486
+ },
418487
+ {
418488
+ "epoch": 2.9985830187969453,
418489
+ "grad_norm": 2.421940803527832,
418490
+ "learning_rate": 5.045476190179699e-11,
418491
+ "loss": 1.0303,
418492
+ "step": 59781
418493
+ },
418494
+ {
418495
+ "epoch": 2.998633177423602,
418496
+ "grad_norm": 3.13572359085083,
418497
+ "learning_rate": 4.6786583624003346e-11,
418498
+ "loss": 1.247,
418499
+ "step": 59782
418500
+ },
418501
+ {
418502
+ "epoch": 2.998683336050259,
418503
+ "grad_norm": 2.828307628631592,
418504
+ "learning_rate": 4.3256827081394894e-11,
418505
+ "loss": 1.2053,
418506
+ "step": 59783
418507
+ },
418508
+ {
418509
+ "epoch": 2.998733494676916,
418510
+ "grad_norm": 3.0084455013275146,
418511
+ "learning_rate": 3.9865492290624973e-11,
418512
+ "loss": 1.4298,
418513
+ "step": 59784
418514
+ },
418515
+ {
418516
+ "epoch": 2.9987836533035725,
418517
+ "grad_norm": 3.4063498973846436,
418518
+ "learning_rate": 3.6612579257244704e-11,
418519
+ "loss": 1.7157,
418520
+ "step": 59785
418521
+ },
418522
+ {
418523
+ "epoch": 2.998833811930229,
418524
+ "grad_norm": 2.2077646255493164,
418525
+ "learning_rate": 3.349808798125409e-11,
418526
+ "loss": 1.1338,
418527
+ "step": 59786
418528
+ },
418529
+ {
418530
+ "epoch": 2.998883970556886,
418531
+ "grad_norm": 2.179716110229492,
418532
+ "learning_rate": 3.052201848485759e-11,
418533
+ "loss": 1.272,
418534
+ "step": 59787
418535
+ },
418536
+ {
418537
+ "epoch": 2.998934129183543,
418538
+ "grad_norm": 2.593482255935669,
418539
+ "learning_rate": 2.768437076805519e-11,
418540
+ "loss": 1.3971,
418541
+ "step": 59788
418542
+ },
418543
+ {
418544
+ "epoch": 2.9989842878101998,
418545
+ "grad_norm": 2.6535258293151855,
418546
+ "learning_rate": 2.4985144847500253e-11,
418547
+ "loss": 1.2945,
418548
+ "step": 59789
418549
+ },
418550
+ {
418551
+ "epoch": 2.9990344464368563,
418552
+ "grad_norm": 3.077737808227539,
418553
+ "learning_rate": 2.2424340717641656e-11,
418554
+ "loss": 2.0234,
418555
+ "step": 59790
418556
+ },
418557
+ {
418558
+ "epoch": 2.9990846050635134,
418559
+ "grad_norm": 2.6477880477905273,
418560
+ "learning_rate": 2.0001958395132748e-11,
418561
+ "loss": 1.1044,
418562
+ "step": 59791
418563
+ },
418564
+ {
418565
+ "epoch": 2.9991347636901704,
418566
+ "grad_norm": 3.315833568572998,
418567
+ "learning_rate": 1.7717997879973523e-11,
418568
+ "loss": 1.578,
418569
+ "step": 59792
418570
+ },
418571
+ {
418572
+ "epoch": 2.999184922316827,
418573
+ "grad_norm": 2.8919079303741455,
418574
+ "learning_rate": 1.5572459188817334e-11,
418575
+ "loss": 1.1892,
418576
+ "step": 59793
418577
+ },
418578
+ {
418579
+ "epoch": 2.9992350809434836,
418580
+ "grad_norm": 2.9915194511413574,
418581
+ "learning_rate": 1.3565342316113061e-11,
418582
+ "loss": 1.4405,
418583
+ "step": 59794
418584
+ },
418585
+ {
418586
+ "epoch": 2.9992852395701406,
418587
+ "grad_norm": 2.5658187866210938,
418588
+ "learning_rate": 1.1696647272962935e-11,
418589
+ "loss": 1.0032,
418590
+ "step": 59795
418591
+ },
418592
+ {
418593
+ "epoch": 2.9993353981967976,
418594
+ "grad_norm": 3.2243688106536865,
418595
+ "learning_rate": 9.966374064918071e-12,
418596
+ "loss": 1.459,
418597
+ "step": 59796
418598
+ },
418599
+ {
418600
+ "epoch": 2.999385556823454,
418601
+ "grad_norm": 3.0176734924316406,
418602
+ "learning_rate": 8.374522697529586e-12,
418603
+ "loss": 1.1177,
418604
+ "step": 59797
418605
+ },
418606
+ {
418607
+ "epoch": 2.999435715450111,
418608
+ "grad_norm": 3.0600199699401855,
418609
+ "learning_rate": 6.921093170797477e-12,
418610
+ "loss": 1.6016,
418611
+ "step": 59798
418612
+ },
418613
+ {
418614
+ "epoch": 2.999485874076768,
418615
+ "grad_norm": 3.130082607269287,
418616
+ "learning_rate": 5.6060854958239765e-12,
418617
+ "loss": 1.7026,
418618
+ "step": 59799
418619
+ },
418620
+ {
418621
+ "epoch": 2.999536032703425,
418622
+ "grad_norm": 2.7928144931793213,
418623
+ "learning_rate": 4.429499667057968e-12,
418624
+ "loss": 1.1228,
418625
+ "step": 59800
418626
+ },
418627
+ {
418628
+ "epoch": 2.9995861913300814,
418629
+ "grad_norm": 3.355782985687256,
418630
+ "learning_rate": 3.3913356956016827e-12,
418631
+ "loss": 1.6128,
418632
+ "step": 59801
418633
+ },
418634
+ {
418635
+ "epoch": 2.999636349956738,
418636
+ "grad_norm": 3.621020555496216,
418637
+ "learning_rate": 2.4915935759040053e-12,
418638
+ "loss": 1.3013,
418639
+ "step": 59802
418640
+ },
418641
+ {
418642
+ "epoch": 2.999686508583395,
418643
+ "grad_norm": 3.6413798332214355,
418644
+ "learning_rate": 1.7302733246182811e-12,
418645
+ "loss": 0.9964,
418646
+ "step": 59803
418647
+ },
418648
+ {
418649
+ "epoch": 2.999736667210052,
418650
+ "grad_norm": 5.63632345199585,
418651
+ "learning_rate": 1.10737493064228e-12,
418652
+ "loss": 1.7908,
418653
+ "step": 59804
418654
+ },
418655
+ {
418656
+ "epoch": 2.9997868258367086,
418657
+ "grad_norm": 3.6524646282196045,
418658
+ "learning_rate": 6.228983995271165e-13,
418659
+ "loss": 1.058,
418660
+ "step": 59805
418661
+ },
418662
+ {
418663
+ "epoch": 2.999836984463365,
418664
+ "grad_norm": 4.080533981323242,
418665
+ "learning_rate": 2.768437312727912e-13,
418666
+ "loss": 1.2011,
418667
+ "step": 59806
418668
+ },
418669
+ {
418670
+ "epoch": 2.9998871430900222,
418671
+ "grad_norm": 5.047213554382324,
418672
+ "learning_rate": 6.921093143041901e-14,
418673
+ "loss": 1.139,
418674
+ "step": 59807
418675
+ },
418676
+ {
418677
+ "epoch": 2.9999373017166793,
418678
+ "grad_norm": 4.863078594207764,
418679
+ "learning_rate": 0.0,
418680
+ "loss": 1.1164,
418681
+ "step": 59808
418682
  }
418683
  ],
418684
  "logging_steps": 1,
 
418693
  "should_evaluate": false,
418694
  "should_log": false,
418695
  "should_save": true,
418696
+ "should_training_stop": true
418697
  },
418698
  "attributes": {}
418699
  }
418700
  },
418701
+ "total_flos": 5.404390115877323e+18,
418702
  "train_batch_size": 2,
418703
  "trial_name": null,
418704
  "trial_params": null