polejowska
commited on
Commit
·
1f6685a
1
Parent(s):
8e68891
End of training
Browse files
runs/Feb26_21-44-51_952abec28ae3/events.out.tfevents.1677447897.952abec28ae3.204.18
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:919ecfdf0abfd9fc3122e048fa8400a9b15c3d174c42bfc547962f364933aaff
|
3 |
+
size 6972
|
trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch":
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -638,18 +638,88 @@
|
|
638 |
"step": 4230
|
639 |
},
|
640 |
{
|
641 |
-
"epoch":
|
642 |
-
"
|
643 |
-
"
|
644 |
-
"
|
645 |
-
|
646 |
-
|
647 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
648 |
}
|
649 |
],
|
650 |
-
"max_steps":
|
651 |
-
"num_train_epochs":
|
652 |
-
"total_flos": 1.
|
653 |
"trial_name": null,
|
654 |
"trial_params": null
|
655 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 50.0,
|
5 |
+
"global_step": 4700,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
638 |
"step": 4230
|
639 |
},
|
640 |
{
|
641 |
+
"epoch": 46.0,
|
642 |
+
"learning_rate": 8.000000000000001e-06,
|
643 |
+
"loss": 2.1464,
|
644 |
+
"step": 4324
|
645 |
+
},
|
646 |
+
{
|
647 |
+
"epoch": 46.0,
|
648 |
+
"eval_loss": 1.6795276403427124,
|
649 |
+
"eval_runtime": 37.4735,
|
650 |
+
"eval_samples_per_second": 5.337,
|
651 |
+
"eval_steps_per_second": 0.667,
|
652 |
+
"step": 4324
|
653 |
+
},
|
654 |
+
{
|
655 |
+
"epoch": 47.0,
|
656 |
+
"learning_rate": 6e-06,
|
657 |
+
"loss": 2.1809,
|
658 |
+
"step": 4418
|
659 |
+
},
|
660 |
+
{
|
661 |
+
"epoch": 47.0,
|
662 |
+
"eval_loss": 1.6775075197219849,
|
663 |
+
"eval_runtime": 37.2204,
|
664 |
+
"eval_samples_per_second": 5.373,
|
665 |
+
"eval_steps_per_second": 0.672,
|
666 |
+
"step": 4418
|
667 |
+
},
|
668 |
+
{
|
669 |
+
"epoch": 48.0,
|
670 |
+
"learning_rate": 4.000000000000001e-06,
|
671 |
+
"loss": 2.174,
|
672 |
+
"step": 4512
|
673 |
+
},
|
674 |
+
{
|
675 |
+
"epoch": 48.0,
|
676 |
+
"eval_loss": 1.666752576828003,
|
677 |
+
"eval_runtime": 36.9029,
|
678 |
+
"eval_samples_per_second": 5.42,
|
679 |
+
"eval_steps_per_second": 0.677,
|
680 |
+
"step": 4512
|
681 |
+
},
|
682 |
+
{
|
683 |
+
"epoch": 49.0,
|
684 |
+
"learning_rate": 2.0000000000000003e-06,
|
685 |
+
"loss": 2.1391,
|
686 |
+
"step": 4606
|
687 |
+
},
|
688 |
+
{
|
689 |
+
"epoch": 49.0,
|
690 |
+
"eval_loss": 1.6559373140335083,
|
691 |
+
"eval_runtime": 36.8614,
|
692 |
+
"eval_samples_per_second": 5.426,
|
693 |
+
"eval_steps_per_second": 0.678,
|
694 |
+
"step": 4606
|
695 |
+
},
|
696 |
+
{
|
697 |
+
"epoch": 50.0,
|
698 |
+
"learning_rate": 0.0,
|
699 |
+
"loss": 2.1466,
|
700 |
+
"step": 4700
|
701 |
+
},
|
702 |
+
{
|
703 |
+
"epoch": 50.0,
|
704 |
+
"eval_loss": 1.6657767295837402,
|
705 |
+
"eval_runtime": 37.3694,
|
706 |
+
"eval_samples_per_second": 5.352,
|
707 |
+
"eval_steps_per_second": 0.669,
|
708 |
+
"step": 4700
|
709 |
+
},
|
710 |
+
{
|
711 |
+
"epoch": 50.0,
|
712 |
+
"step": 4700,
|
713 |
+
"total_flos": 1.7920966176e+19,
|
714 |
+
"train_loss": 0.21573773972531582,
|
715 |
+
"train_runtime": 1237.9738,
|
716 |
+
"train_samples_per_second": 30.291,
|
717 |
+
"train_steps_per_second": 3.797
|
718 |
}
|
719 |
],
|
720 |
+
"max_steps": 4700,
|
721 |
+
"num_train_epochs": 50,
|
722 |
+
"total_flos": 1.7920966176e+19,
|
723 |
"trial_name": null,
|
724 |
"trial_params": null
|
725 |
}
|