nbtpj commited on
Commit
a26b5c7
·
1 Parent(s): 7090c6e

Training in progress, step 10000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8246a7bdffa31a2780601cff44a208fa66a42423f08dc44f96fc441f90dcd063
3
  size 1115513717
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0270219bbf3b9d435456104809f3c23a41c4cd1fe51a03b95efb36b90021a044
3
  size 1115513717
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d0f6ee17edbe5be6c0db0fc26e011e3cfca90c6d942052701e91b647fb79172d
3
  size 557969145
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b3509860f8b9090ce37e24daf382d92def3c20605929d105fc7709bfdf4fa92
3
  size 557969145
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c55dab6106e884ae561d5ed424b58463798e65d5293313f2bcf0598e4e0cd039
3
  size 15523
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8bd78237a8bff2399c24e5136a001794a3e1c14fbfcbde9fa5704a4fcf3d3828
3
  size 15523
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:007eb7be8567224005d4825ac66cab713ae55402dc507574403696925ab53db1
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc6cc4eff0bedca1e1ffd7c8824dc4da7fe034832dbf96d075af2be5a88f8f30
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.8613759044446997,
5
- "global_step": 7500,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -456,11 +456,161 @@
456
  "learning_rate": 2.60735022012378e-05,
457
  "loss": 0.6286,
458
  "step": 7500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
459
  }
460
  ],
461
  "max_steps": 15673,
462
  "num_train_epochs": 2,
463
- "total_flos": 2.969453707444224e+16,
464
  "trial_name": null,
465
  "trial_params": null
466
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.1485012059262663,
5
+ "global_step": 10000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
456
  "learning_rate": 2.60735022012378e-05,
457
  "loss": 0.6286,
458
  "step": 7500
459
+ },
460
+ {
461
+ "epoch": 0.87,
462
+ "learning_rate": 2.5754482230587633e-05,
463
+ "loss": 0.6284,
464
+ "step": 7600
465
+ },
466
+ {
467
+ "epoch": 0.88,
468
+ "learning_rate": 2.5435462259937474e-05,
469
+ "loss": 0.6169,
470
+ "step": 7700
471
+ },
472
+ {
473
+ "epoch": 0.9,
474
+ "learning_rate": 2.5116442289287312e-05,
475
+ "loss": 0.6334,
476
+ "step": 7800
477
+ },
478
+ {
479
+ "epoch": 0.91,
480
+ "learning_rate": 2.4797422318637146e-05,
481
+ "loss": 0.6573,
482
+ "step": 7900
483
+ },
484
+ {
485
+ "epoch": 0.92,
486
+ "learning_rate": 2.4478402347986984e-05,
487
+ "loss": 0.6026,
488
+ "step": 8000
489
+ },
490
+ {
491
+ "epoch": 0.93,
492
+ "learning_rate": 2.4159382377336822e-05,
493
+ "loss": 0.6836,
494
+ "step": 8100
495
+ },
496
+ {
497
+ "epoch": 0.94,
498
+ "learning_rate": 2.384036240668666e-05,
499
+ "loss": 0.6093,
500
+ "step": 8200
501
+ },
502
+ {
503
+ "epoch": 0.95,
504
+ "learning_rate": 2.3521342436036498e-05,
505
+ "loss": 0.6603,
506
+ "step": 8300
507
+ },
508
+ {
509
+ "epoch": 0.96,
510
+ "learning_rate": 2.3202322465386332e-05,
511
+ "loss": 0.6312,
512
+ "step": 8400
513
+ },
514
+ {
515
+ "epoch": 0.98,
516
+ "learning_rate": 2.2883302494736173e-05,
517
+ "loss": 0.6312,
518
+ "step": 8500
519
+ },
520
+ {
521
+ "epoch": 0.99,
522
+ "learning_rate": 2.2564282524086008e-05,
523
+ "loss": 0.6278,
524
+ "step": 8600
525
+ },
526
+ {
527
+ "epoch": 1.0,
528
+ "learning_rate": 2.224526255343585e-05,
529
+ "loss": 0.6115,
530
+ "step": 8700
531
+ },
532
+ {
533
+ "epoch": 1.01,
534
+ "learning_rate": 2.1926242582785683e-05,
535
+ "loss": 0.6666,
536
+ "step": 8800
537
+ },
538
+ {
539
+ "epoch": 1.02,
540
+ "learning_rate": 2.160722261213552e-05,
541
+ "loss": 0.6047,
542
+ "step": 8900
543
+ },
544
+ {
545
+ "epoch": 1.03,
546
+ "learning_rate": 2.128820264148536e-05,
547
+ "loss": 0.6174,
548
+ "step": 9000
549
+ },
550
+ {
551
+ "epoch": 1.05,
552
+ "learning_rate": 2.0969182670835193e-05,
553
+ "loss": 0.5977,
554
+ "step": 9100
555
+ },
556
+ {
557
+ "epoch": 1.06,
558
+ "learning_rate": 2.0650162700185034e-05,
559
+ "loss": 0.553,
560
+ "step": 9200
561
+ },
562
+ {
563
+ "epoch": 1.07,
564
+ "learning_rate": 2.033114272953487e-05,
565
+ "loss": 0.6447,
566
+ "step": 9300
567
+ },
568
+ {
569
+ "epoch": 1.08,
570
+ "learning_rate": 2.0012122758884706e-05,
571
+ "loss": 0.5979,
572
+ "step": 9400
573
+ },
574
+ {
575
+ "epoch": 1.09,
576
+ "learning_rate": 1.9693102788234544e-05,
577
+ "loss": 0.6234,
578
+ "step": 9500
579
+ },
580
+ {
581
+ "epoch": 1.1,
582
+ "learning_rate": 1.9374082817584382e-05,
583
+ "loss": 0.5939,
584
+ "step": 9600
585
+ },
586
+ {
587
+ "epoch": 1.11,
588
+ "learning_rate": 1.905506284693422e-05,
589
+ "loss": 0.6481,
590
+ "step": 9700
591
+ },
592
+ {
593
+ "epoch": 1.13,
594
+ "learning_rate": 1.8736042876284058e-05,
595
+ "loss": 0.592,
596
+ "step": 9800
597
+ },
598
+ {
599
+ "epoch": 1.14,
600
+ "learning_rate": 1.8417022905633892e-05,
601
+ "loss": 0.6491,
602
+ "step": 9900
603
+ },
604
+ {
605
+ "epoch": 1.15,
606
+ "learning_rate": 1.8098002934983733e-05,
607
+ "loss": 0.5978,
608
+ "step": 10000
609
  }
610
  ],
611
  "max_steps": 15673,
612
  "num_train_epochs": 2,
613
+ "total_flos": 3.954111338032128e+16,
614
  "trial_name": null,
615
  "trial_params": null
616
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d0f6ee17edbe5be6c0db0fc26e011e3cfca90c6d942052701e91b647fb79172d
3
  size 557969145
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b3509860f8b9090ce37e24daf382d92def3c20605929d105fc7709bfdf4fa92
3
  size 557969145
runs/Jan26_03-13-45_b3489f7155a5/events.out.tfevents.1674703022.b3489f7155a5.24.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b1e19a8b9d0aee47e4149819f08638c1d2effc4c31ead54c6fa4747637024e00
3
- size 16610
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e582d90f7535fdd56e7dc87ed69608bed0f439782a1664c33152ca1b8568bf2f
3
+ size 20535