Training in progress, step 12500
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +153 -3
- pytorch_model.bin +1 -1
- runs/Jan26_03-13-45_b3489f7155a5/events.out.tfevents.1674703022.b3489f7155a5.24.0 +2 -2
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1115513717
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fcf16c77fc4c696d5ecec66bb94ab1a72596ecc42da185a1509a0a37aa93d820
|
3 |
size 1115513717
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 557969145
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:001276adc811be2b4e3383d1bfd82644ee01b63c562240704981e80abf57ea78
|
3 |
size 557969145
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15523
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3ea4cb9a85921bdde5a8523300b183bff3f70a1aeb0368e6a4c669b696d1922c
|
3 |
size 15523
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:af29429f2a56160c0a3cb3b03c7b28cc9e5f472234bebefa86da284747a0d6d1
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 1.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -606,11 +606,161 @@
|
|
606 |
"learning_rate": 1.8098002934983733e-05,
|
607 |
"loss": 0.5978,
|
608 |
"step": 10000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
609 |
}
|
610 |
],
|
611 |
"max_steps": 15673,
|
612 |
"num_train_epochs": 2,
|
613 |
-
"total_flos":
|
614 |
"trial_name": null,
|
615 |
"trial_params": null
|
616 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 1.4356265074078327,
|
5 |
+
"global_step": 12500,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
606 |
"learning_rate": 1.8098002934983733e-05,
|
607 |
"loss": 0.5978,
|
608 |
"step": 10000
|
609 |
+
},
|
610 |
+
{
|
611 |
+
"epoch": 1.16,
|
612 |
+
"learning_rate": 1.7778982964333567e-05,
|
613 |
+
"loss": 0.6034,
|
614 |
+
"step": 10100
|
615 |
+
},
|
616 |
+
{
|
617 |
+
"epoch": 1.17,
|
618 |
+
"learning_rate": 1.7459962993683405e-05,
|
619 |
+
"loss": 0.6037,
|
620 |
+
"step": 10200
|
621 |
+
},
|
622 |
+
{
|
623 |
+
"epoch": 1.18,
|
624 |
+
"learning_rate": 1.7140943023033243e-05,
|
625 |
+
"loss": 0.6251,
|
626 |
+
"step": 10300
|
627 |
+
},
|
628 |
+
{
|
629 |
+
"epoch": 1.19,
|
630 |
+
"learning_rate": 1.6821923052383077e-05,
|
631 |
+
"loss": 0.581,
|
632 |
+
"step": 10400
|
633 |
+
},
|
634 |
+
{
|
635 |
+
"epoch": 1.21,
|
636 |
+
"learning_rate": 1.650290308173292e-05,
|
637 |
+
"loss": 0.5919,
|
638 |
+
"step": 10500
|
639 |
+
},
|
640 |
+
{
|
641 |
+
"epoch": 1.22,
|
642 |
+
"learning_rate": 1.6183883111082753e-05,
|
643 |
+
"loss": 0.6073,
|
644 |
+
"step": 10600
|
645 |
+
},
|
646 |
+
{
|
647 |
+
"epoch": 1.23,
|
648 |
+
"learning_rate": 1.5864863140432594e-05,
|
649 |
+
"loss": 0.6273,
|
650 |
+
"step": 10700
|
651 |
+
},
|
652 |
+
{
|
653 |
+
"epoch": 1.24,
|
654 |
+
"learning_rate": 1.554584316978243e-05,
|
655 |
+
"loss": 0.6198,
|
656 |
+
"step": 10800
|
657 |
+
},
|
658 |
+
{
|
659 |
+
"epoch": 1.25,
|
660 |
+
"learning_rate": 1.5226823199132268e-05,
|
661 |
+
"loss": 0.5993,
|
662 |
+
"step": 10900
|
663 |
+
},
|
664 |
+
{
|
665 |
+
"epoch": 1.26,
|
666 |
+
"learning_rate": 1.4907803228482104e-05,
|
667 |
+
"loss": 0.5519,
|
668 |
+
"step": 11000
|
669 |
+
},
|
670 |
+
{
|
671 |
+
"epoch": 1.27,
|
672 |
+
"learning_rate": 1.4588783257831942e-05,
|
673 |
+
"loss": 0.6152,
|
674 |
+
"step": 11100
|
675 |
+
},
|
676 |
+
{
|
677 |
+
"epoch": 1.29,
|
678 |
+
"learning_rate": 1.4269763287181778e-05,
|
679 |
+
"loss": 0.6251,
|
680 |
+
"step": 11200
|
681 |
+
},
|
682 |
+
{
|
683 |
+
"epoch": 1.3,
|
684 |
+
"learning_rate": 1.3950743316531614e-05,
|
685 |
+
"loss": 0.6747,
|
686 |
+
"step": 11300
|
687 |
+
},
|
688 |
+
{
|
689 |
+
"epoch": 1.31,
|
690 |
+
"learning_rate": 1.3631723345881454e-05,
|
691 |
+
"loss": 0.6253,
|
692 |
+
"step": 11400
|
693 |
+
},
|
694 |
+
{
|
695 |
+
"epoch": 1.32,
|
696 |
+
"learning_rate": 1.331270337523129e-05,
|
697 |
+
"loss": 0.6009,
|
698 |
+
"step": 11500
|
699 |
+
},
|
700 |
+
{
|
701 |
+
"epoch": 1.33,
|
702 |
+
"learning_rate": 1.2993683404581127e-05,
|
703 |
+
"loss": 0.6019,
|
704 |
+
"step": 11600
|
705 |
+
},
|
706 |
+
{
|
707 |
+
"epoch": 1.34,
|
708 |
+
"learning_rate": 1.2674663433930964e-05,
|
709 |
+
"loss": 0.6286,
|
710 |
+
"step": 11700
|
711 |
+
},
|
712 |
+
{
|
713 |
+
"epoch": 1.36,
|
714 |
+
"learning_rate": 1.2355643463280801e-05,
|
715 |
+
"loss": 0.5796,
|
716 |
+
"step": 11800
|
717 |
+
},
|
718 |
+
{
|
719 |
+
"epoch": 1.37,
|
720 |
+
"learning_rate": 1.2036623492630639e-05,
|
721 |
+
"loss": 0.6077,
|
722 |
+
"step": 11900
|
723 |
+
},
|
724 |
+
{
|
725 |
+
"epoch": 1.38,
|
726 |
+
"learning_rate": 1.1717603521980477e-05,
|
727 |
+
"loss": 0.6186,
|
728 |
+
"step": 12000
|
729 |
+
},
|
730 |
+
{
|
731 |
+
"epoch": 1.39,
|
732 |
+
"learning_rate": 1.1398583551330313e-05,
|
733 |
+
"loss": 0.6632,
|
734 |
+
"step": 12100
|
735 |
+
},
|
736 |
+
{
|
737 |
+
"epoch": 1.4,
|
738 |
+
"learning_rate": 1.107956358068015e-05,
|
739 |
+
"loss": 0.5796,
|
740 |
+
"step": 12200
|
741 |
+
},
|
742 |
+
{
|
743 |
+
"epoch": 1.41,
|
744 |
+
"learning_rate": 1.0760543610029989e-05,
|
745 |
+
"loss": 0.5951,
|
746 |
+
"step": 12300
|
747 |
+
},
|
748 |
+
{
|
749 |
+
"epoch": 1.42,
|
750 |
+
"learning_rate": 1.0441523639379826e-05,
|
751 |
+
"loss": 0.5743,
|
752 |
+
"step": 12400
|
753 |
+
},
|
754 |
+
{
|
755 |
+
"epoch": 1.44,
|
756 |
+
"learning_rate": 1.0122503668729662e-05,
|
757 |
+
"loss": 0.6705,
|
758 |
+
"step": 12500
|
759 |
}
|
760 |
],
|
761 |
"max_steps": 15673,
|
762 |
"num_train_epochs": 2,
|
763 |
+
"total_flos": 4.939903709604864e+16,
|
764 |
"trial_name": null,
|
765 |
"trial_params": null
|
766 |
}
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 557969145
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:001276adc811be2b4e3383d1bfd82644ee01b63c562240704981e80abf57ea78
|
3 |
size 557969145
|
runs/Jan26_03-13-45_b3489f7155a5/events.out.tfevents.1674703022.b3489f7155a5.24.0
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f77dd77e2e1de62f98d722747a220a5ac16ebff7589985b9d63fb7e5c44b6c35
|
3 |
+
size 24460
|