nbtpj commited on
Commit
39a1712
·
1 Parent(s): 3d71eca

Training in progress, step 10000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f7993740ed5e437b04353de4ecb526bd9918a96a4e9eaf258d67af6f961bd6cf
3
  size 1115513717
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:695ea22c16d59b8f8e7d8bcd0620d0c0f0cb7c3578a0fb43c564437f3ba1279f
3
  size 1115513717
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:97db51ff697a7e7e17772ffa00db91d65d9b5000341e4cb4899d26c9fe90c8e2
3
  size 557969145
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:73a52c17999eaef626df679c5f03ad9c53b61713db7706f9c3432222c1b6c6a3
3
  size 557969145
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:faec401c00d51234b05bffba3c32fc98159e150f1de70dde51eaeeef3f15563a
3
  size 15523
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e5f8f9f68a550cc99dfdaed2042005f3b6ddc4eafa0827c34c6b5a1e335d304
3
  size 15523
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f22490afe27492170ee18653712e55a82f1d2a2adbc61025e6296be8d30c2663
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8d52951fc965683e697127d8227f41545dee9b14abb397312476c1d018a09dfd
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.8617718028266115,
5
- "global_step": 7500,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -456,11 +456,161 @@
456
  "learning_rate": 2.6062811183454615e-05,
457
  "loss": 2.5496,
458
  "step": 7500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
459
  }
460
  ],
461
  "max_steps": 15666,
462
  "num_train_epochs": 2,
463
- "total_flos": 2.963322045186048e+16,
464
  "trial_name": null,
465
  "trial_params": null
466
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.149029070435482,
5
+ "global_step": 10000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
456
  "learning_rate": 2.6062811183454615e-05,
457
  "loss": 2.5496,
458
  "step": 7500
459
+ },
460
+ {
461
+ "epoch": 0.87,
462
+ "learning_rate": 2.5743648665900677e-05,
463
+ "loss": 2.4191,
464
+ "step": 7600
465
+ },
466
+ {
467
+ "epoch": 0.88,
468
+ "learning_rate": 2.5424486148346737e-05,
469
+ "loss": 2.4895,
470
+ "step": 7700
471
+ },
472
+ {
473
+ "epoch": 0.9,
474
+ "learning_rate": 2.51053236307928e-05,
475
+ "loss": 2.4726,
476
+ "step": 7800
477
+ },
478
+ {
479
+ "epoch": 0.91,
480
+ "learning_rate": 2.4786161113238862e-05,
481
+ "loss": 2.3287,
482
+ "step": 7900
483
+ },
484
+ {
485
+ "epoch": 0.92,
486
+ "learning_rate": 2.446699859568492e-05,
487
+ "loss": 2.4278,
488
+ "step": 8000
489
+ },
490
+ {
491
+ "epoch": 0.93,
492
+ "learning_rate": 2.4147836078130988e-05,
493
+ "loss": 2.4548,
494
+ "step": 8100
495
+ },
496
+ {
497
+ "epoch": 0.94,
498
+ "learning_rate": 2.3828673560577047e-05,
499
+ "loss": 2.5173,
500
+ "step": 8200
501
+ },
502
+ {
503
+ "epoch": 0.95,
504
+ "learning_rate": 2.3509511043023107e-05,
505
+ "loss": 2.4883,
506
+ "step": 8300
507
+ },
508
+ {
509
+ "epoch": 0.97,
510
+ "learning_rate": 2.319034852546917e-05,
511
+ "loss": 2.4435,
512
+ "step": 8400
513
+ },
514
+ {
515
+ "epoch": 0.98,
516
+ "learning_rate": 2.2871186007915232e-05,
517
+ "loss": 2.4825,
518
+ "step": 8500
519
+ },
520
+ {
521
+ "epoch": 0.99,
522
+ "learning_rate": 2.2552023490361295e-05,
523
+ "loss": 2.4539,
524
+ "step": 8600
525
+ },
526
+ {
527
+ "epoch": 1.0,
528
+ "learning_rate": 2.2232860972807354e-05,
529
+ "loss": 2.4115,
530
+ "step": 8700
531
+ },
532
+ {
533
+ "epoch": 1.01,
534
+ "learning_rate": 2.1913698455253417e-05,
535
+ "loss": 2.3452,
536
+ "step": 8800
537
+ },
538
+ {
539
+ "epoch": 1.02,
540
+ "learning_rate": 2.159453593769948e-05,
541
+ "loss": 2.3799,
542
+ "step": 8900
543
+ },
544
+ {
545
+ "epoch": 1.03,
546
+ "learning_rate": 2.127537342014554e-05,
547
+ "loss": 2.4019,
548
+ "step": 9000
549
+ },
550
+ {
551
+ "epoch": 1.05,
552
+ "learning_rate": 2.09562109025916e-05,
553
+ "loss": 2.3678,
554
+ "step": 9100
555
+ },
556
+ {
557
+ "epoch": 1.06,
558
+ "learning_rate": 2.0637048385037665e-05,
559
+ "loss": 2.3711,
560
+ "step": 9200
561
+ },
562
+ {
563
+ "epoch": 1.07,
564
+ "learning_rate": 2.0317885867483724e-05,
565
+ "loss": 2.2767,
566
+ "step": 9300
567
+ },
568
+ {
569
+ "epoch": 1.08,
570
+ "learning_rate": 1.9998723349929783e-05,
571
+ "loss": 2.3761,
572
+ "step": 9400
573
+ },
574
+ {
575
+ "epoch": 1.09,
576
+ "learning_rate": 1.9679560832375846e-05,
577
+ "loss": 2.3392,
578
+ "step": 9500
579
+ },
580
+ {
581
+ "epoch": 1.1,
582
+ "learning_rate": 1.936039831482191e-05,
583
+ "loss": 2.3232,
584
+ "step": 9600
585
+ },
586
+ {
587
+ "epoch": 1.11,
588
+ "learning_rate": 1.904123579726797e-05,
589
+ "loss": 2.355,
590
+ "step": 9700
591
+ },
592
+ {
593
+ "epoch": 1.13,
594
+ "learning_rate": 1.872207327971403e-05,
595
+ "loss": 2.3613,
596
+ "step": 9800
597
+ },
598
+ {
599
+ "epoch": 1.14,
600
+ "learning_rate": 1.840291076216009e-05,
601
+ "loss": 2.3475,
602
+ "step": 9900
603
+ },
604
+ {
605
+ "epoch": 1.15,
606
+ "learning_rate": 1.8083748244606157e-05,
607
+ "loss": 2.3654,
608
+ "step": 10000
609
  }
610
  ],
611
  "max_steps": 15666,
612
  "num_train_epochs": 2,
613
+ "total_flos": 3.953048407805952e+16,
614
  "trial_name": null,
615
  "trial_params": null
616
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:97db51ff697a7e7e17772ffa00db91d65d9b5000341e4cb4899d26c9fe90c8e2
3
  size 557969145
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:73a52c17999eaef626df679c5f03ad9c53b61713db7706f9c3432222c1b6c6a3
3
  size 557969145
runs/Jan01_04-28-26_6ea5fb75c867/events.out.tfevents.1672547319.6ea5fb75c867.24.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6c102e25ecb6796a1e93092de6658a83da616eed41abb3c0f7da8764fe5c465e
3
- size 16610
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f511a742a4b5f180697e41094bff1017383d78f0e31e1610835895acb5b91dcc
3
+ size 20535