willtensora
commited on
Training in progress, step 157890, checkpoint
Browse files- last-checkpoint/adapter_model.safetensors +1 -1
- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +2923 -3
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 627606952
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1088d85bad76e35cb6c18e7734c2c2df37b0f09d0d513cd23193692801b8e919
|
3 |
size 627606952
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 318986436
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:74fea6dfe87ac00874c7a8077e9de8baf19330d55fbcf21369d50ffdd7952746
|
3 |
size 318986436
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:908173892e21f0448410767027e18a92997b1ed2af990e2d2c4310d10a259e8d
|
3 |
size 15984
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f67794cf32b5607e3c7c900f9ec980e9ddeb5878f1ecf87202a26f9f7ec578df
|
3 |
size 15984
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5d73dca6b24dc149aa09b998461a4ad6f5693f3022488555c563abdd18b0bd07
|
3 |
size 15984
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cd682e7c95609633443dbf166e5225e8744766ccd1c3ac9a8a206552b0d19437
|
3 |
size 15984
|
last-checkpoint/rng_state_4.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:69fb71396e38b5278906060269440d2471ddeaa97d12505747b0e67c0969d3bd
|
3 |
size 15984
|
last-checkpoint/rng_state_5.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0ac676368096395c9abcdc2eb24514144adc29fa422f2ccb1a132f9486edc674
|
3 |
size 15984
|
last-checkpoint/rng_state_6.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:51c1b684147f6e3b2a7f0206497e7491505db91cfdd5d1908cfea9ae634ec7ed
|
3 |
size 15984
|
last-checkpoint/rng_state_7.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:993e06522ed60c0b873ce20890d06629f64e5c63c610b8979cd6428c8ce358d5
|
3 |
size 15984
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cd46dcde3e0e95c0d471f441b283d1d9f5c9aaae4854865d35095a1f5c249f2e
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch":
|
5 |
"eval_steps": 8310,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -107770,6 +107770,2926 @@
|
|
107770 |
"learning_rate": 0.0001844548534288357,
|
107771 |
"loss": 11.2043,
|
107772 |
"step": 153730
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
107773 |
}
|
107774 |
],
|
107775 |
"logging_steps": 10,
|
@@ -107789,7 +110709,7 @@
|
|
107789 |
"attributes": {}
|
107790 |
}
|
107791 |
},
|
107792 |
-
"total_flos": 3.
|
107793 |
"train_batch_size": 1,
|
107794 |
"trial_name": null,
|
107795 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 19.0,
|
5 |
"eval_steps": 8310,
|
6 |
+
"global_step": 157890,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
107770 |
"learning_rate": 0.0001844548534288357,
|
107771 |
"loss": 11.2043,
|
107772 |
"step": 153730
|
107773 |
+
},
|
107774 |
+
{
|
107775 |
+
"epoch": 18.500601684717207,
|
107776 |
+
"grad_norm": Infinity,
|
107777 |
+
"learning_rate": 0.00018445281626107063,
|
107778 |
+
"loss": 11.0292,
|
107779 |
+
"step": 153740
|
107780 |
+
},
|
107781 |
+
{
|
107782 |
+
"epoch": 18.501805054151625,
|
107783 |
+
"grad_norm": Infinity,
|
107784 |
+
"learning_rate": 0.0001844507789710815,
|
107785 |
+
"loss": 11.0998,
|
107786 |
+
"step": 153750
|
107787 |
+
},
|
107788 |
+
{
|
107789 |
+
"epoch": 18.503008423586042,
|
107790 |
+
"grad_norm": Infinity,
|
107791 |
+
"learning_rate": 0.00018444874155887123,
|
107792 |
+
"loss": 11.1909,
|
107793 |
+
"step": 153760
|
107794 |
+
},
|
107795 |
+
{
|
107796 |
+
"epoch": 18.504211793020456,
|
107797 |
+
"grad_norm": Infinity,
|
107798 |
+
"learning_rate": 0.00018444670402444278,
|
107799 |
+
"loss": 11.1862,
|
107800 |
+
"step": 153770
|
107801 |
+
},
|
107802 |
+
{
|
107803 |
+
"epoch": 18.505415162454874,
|
107804 |
+
"grad_norm": Infinity,
|
107805 |
+
"learning_rate": 0.0001844446663677991,
|
107806 |
+
"loss": 11.2138,
|
107807 |
+
"step": 153780
|
107808 |
+
},
|
107809 |
+
{
|
107810 |
+
"epoch": 18.50661853188929,
|
107811 |
+
"grad_norm": Infinity,
|
107812 |
+
"learning_rate": 0.00018444262858894312,
|
107813 |
+
"loss": 11.1951,
|
107814 |
+
"step": 153790
|
107815 |
+
},
|
107816 |
+
{
|
107817 |
+
"epoch": 18.507821901323705,
|
107818 |
+
"grad_norm": Infinity,
|
107819 |
+
"learning_rate": 0.00018444059068787786,
|
107820 |
+
"loss": 11.0419,
|
107821 |
+
"step": 153800
|
107822 |
+
},
|
107823 |
+
{
|
107824 |
+
"epoch": 18.509025270758123,
|
107825 |
+
"grad_norm": Infinity,
|
107826 |
+
"learning_rate": 0.0001844385526646062,
|
107827 |
+
"loss": 11.0918,
|
107828 |
+
"step": 153810
|
107829 |
+
},
|
107830 |
+
{
|
107831 |
+
"epoch": 18.51022864019254,
|
107832 |
+
"grad_norm": Infinity,
|
107833 |
+
"learning_rate": 0.00018443651451913108,
|
107834 |
+
"loss": 11.1833,
|
107835 |
+
"step": 153820
|
107836 |
+
},
|
107837 |
+
{
|
107838 |
+
"epoch": 18.511432009626954,
|
107839 |
+
"grad_norm": Infinity,
|
107840 |
+
"learning_rate": 0.00018443447625145548,
|
107841 |
+
"loss": 11.2276,
|
107842 |
+
"step": 153830
|
107843 |
+
},
|
107844 |
+
{
|
107845 |
+
"epoch": 18.512635379061372,
|
107846 |
+
"grad_norm": Infinity,
|
107847 |
+
"learning_rate": 0.00018443243786158236,
|
107848 |
+
"loss": 11.2327,
|
107849 |
+
"step": 153840
|
107850 |
+
},
|
107851 |
+
{
|
107852 |
+
"epoch": 18.51383874849579,
|
107853 |
+
"grad_norm": Infinity,
|
107854 |
+
"learning_rate": 0.00018443039934951466,
|
107855 |
+
"loss": 11.0014,
|
107856 |
+
"step": 153850
|
107857 |
+
},
|
107858 |
+
{
|
107859 |
+
"epoch": 18.515042117930204,
|
107860 |
+
"grad_norm": Infinity,
|
107861 |
+
"learning_rate": 0.00018442836071525532,
|
107862 |
+
"loss": 11.2101,
|
107863 |
+
"step": 153860
|
107864 |
+
},
|
107865 |
+
{
|
107866 |
+
"epoch": 18.51624548736462,
|
107867 |
+
"grad_norm": Infinity,
|
107868 |
+
"learning_rate": 0.00018442632195880728,
|
107869 |
+
"loss": 11.1144,
|
107870 |
+
"step": 153870
|
107871 |
+
},
|
107872 |
+
{
|
107873 |
+
"epoch": 18.51744885679904,
|
107874 |
+
"grad_norm": Infinity,
|
107875 |
+
"learning_rate": 0.00018442428308017354,
|
107876 |
+
"loss": 11.1024,
|
107877 |
+
"step": 153880
|
107878 |
+
},
|
107879 |
+
{
|
107880 |
+
"epoch": 18.518652226233453,
|
107881 |
+
"grad_norm": Infinity,
|
107882 |
+
"learning_rate": 0.000184422244079357,
|
107883 |
+
"loss": 11.1752,
|
107884 |
+
"step": 153890
|
107885 |
+
},
|
107886 |
+
{
|
107887 |
+
"epoch": 18.51985559566787,
|
107888 |
+
"grad_norm": Infinity,
|
107889 |
+
"learning_rate": 0.00018442020495636067,
|
107890 |
+
"loss": 11.108,
|
107891 |
+
"step": 153900
|
107892 |
+
},
|
107893 |
+
{
|
107894 |
+
"epoch": 18.521058965102288,
|
107895 |
+
"grad_norm": Infinity,
|
107896 |
+
"learning_rate": 0.00018441816571118742,
|
107897 |
+
"loss": 11.1124,
|
107898 |
+
"step": 153910
|
107899 |
+
},
|
107900 |
+
{
|
107901 |
+
"epoch": 18.522262334536702,
|
107902 |
+
"grad_norm": Infinity,
|
107903 |
+
"learning_rate": 0.00018441612634384025,
|
107904 |
+
"loss": 11.1661,
|
107905 |
+
"step": 153920
|
107906 |
+
},
|
107907 |
+
{
|
107908 |
+
"epoch": 18.52346570397112,
|
107909 |
+
"grad_norm": Infinity,
|
107910 |
+
"learning_rate": 0.0001844140868543221,
|
107911 |
+
"loss": 11.1174,
|
107912 |
+
"step": 153930
|
107913 |
+
},
|
107914 |
+
{
|
107915 |
+
"epoch": 18.524669073405537,
|
107916 |
+
"grad_norm": Infinity,
|
107917 |
+
"learning_rate": 0.00018441204724263592,
|
107918 |
+
"loss": 11.0932,
|
107919 |
+
"step": 153940
|
107920 |
+
},
|
107921 |
+
{
|
107922 |
+
"epoch": 18.52587244283995,
|
107923 |
+
"grad_norm": Infinity,
|
107924 |
+
"learning_rate": 0.00018441000750878467,
|
107925 |
+
"loss": 11.1473,
|
107926 |
+
"step": 153950
|
107927 |
+
},
|
107928 |
+
{
|
107929 |
+
"epoch": 18.52707581227437,
|
107930 |
+
"grad_norm": Infinity,
|
107931 |
+
"learning_rate": 0.0001844079676527713,
|
107932 |
+
"loss": 11.1022,
|
107933 |
+
"step": 153960
|
107934 |
+
},
|
107935 |
+
{
|
107936 |
+
"epoch": 18.528279181708786,
|
107937 |
+
"grad_norm": Infinity,
|
107938 |
+
"learning_rate": 0.00018440592767459877,
|
107939 |
+
"loss": 11.0514,
|
107940 |
+
"step": 153970
|
107941 |
+
},
|
107942 |
+
{
|
107943 |
+
"epoch": 18.5294825511432,
|
107944 |
+
"grad_norm": Infinity,
|
107945 |
+
"learning_rate": 0.00018440388757427,
|
107946 |
+
"loss": 11.1552,
|
107947 |
+
"step": 153980
|
107948 |
+
},
|
107949 |
+
{
|
107950 |
+
"epoch": 18.530685920577618,
|
107951 |
+
"grad_norm": Infinity,
|
107952 |
+
"learning_rate": 0.000184401847351788,
|
107953 |
+
"loss": 11.0548,
|
107954 |
+
"step": 153990
|
107955 |
+
},
|
107956 |
+
{
|
107957 |
+
"epoch": 18.531889290012035,
|
107958 |
+
"grad_norm": Infinity,
|
107959 |
+
"learning_rate": 0.00018439980700715568,
|
107960 |
+
"loss": 11.1378,
|
107961 |
+
"step": 154000
|
107962 |
+
},
|
107963 |
+
{
|
107964 |
+
"epoch": 18.53309265944645,
|
107965 |
+
"grad_norm": Infinity,
|
107966 |
+
"learning_rate": 0.000184397766540376,
|
107967 |
+
"loss": 11.1906,
|
107968 |
+
"step": 154010
|
107969 |
+
},
|
107970 |
+
{
|
107971 |
+
"epoch": 18.534296028880867,
|
107972 |
+
"grad_norm": Infinity,
|
107973 |
+
"learning_rate": 0.0001843957259514519,
|
107974 |
+
"loss": 11.0453,
|
107975 |
+
"step": 154020
|
107976 |
+
},
|
107977 |
+
{
|
107978 |
+
"epoch": 18.535499398315284,
|
107979 |
+
"grad_norm": Infinity,
|
107980 |
+
"learning_rate": 0.00018439368524038633,
|
107981 |
+
"loss": 11.2196,
|
107982 |
+
"step": 154030
|
107983 |
+
},
|
107984 |
+
{
|
107985 |
+
"epoch": 18.5367027677497,
|
107986 |
+
"grad_norm": Infinity,
|
107987 |
+
"learning_rate": 0.0001843916444071823,
|
107988 |
+
"loss": 11.2259,
|
107989 |
+
"step": 154040
|
107990 |
+
},
|
107991 |
+
{
|
107992 |
+
"epoch": 18.537906137184116,
|
107993 |
+
"grad_norm": Infinity,
|
107994 |
+
"learning_rate": 0.00018438960345184272,
|
107995 |
+
"loss": 11.239,
|
107996 |
+
"step": 154050
|
107997 |
+
},
|
107998 |
+
{
|
107999 |
+
"epoch": 18.53910950661853,
|
108000 |
+
"grad_norm": Infinity,
|
108001 |
+
"learning_rate": 0.00018438756237437054,
|
108002 |
+
"loss": 11.0352,
|
108003 |
+
"step": 154060
|
108004 |
+
},
|
108005 |
+
{
|
108006 |
+
"epoch": 18.540312876052948,
|
108007 |
+
"grad_norm": Infinity,
|
108008 |
+
"learning_rate": 0.00018438552117476872,
|
108009 |
+
"loss": 11.2338,
|
108010 |
+
"step": 154070
|
108011 |
+
},
|
108012 |
+
{
|
108013 |
+
"epoch": 18.541516245487365,
|
108014 |
+
"grad_norm": Infinity,
|
108015 |
+
"learning_rate": 0.00018438347985304024,
|
108016 |
+
"loss": 11.1626,
|
108017 |
+
"step": 154080
|
108018 |
+
},
|
108019 |
+
{
|
108020 |
+
"epoch": 18.54271961492178,
|
108021 |
+
"grad_norm": Infinity,
|
108022 |
+
"learning_rate": 0.00018438143840918798,
|
108023 |
+
"loss": 11.1056,
|
108024 |
+
"step": 154090
|
108025 |
+
},
|
108026 |
+
{
|
108027 |
+
"epoch": 18.543922984356197,
|
108028 |
+
"grad_norm": Infinity,
|
108029 |
+
"learning_rate": 0.00018437939684321498,
|
108030 |
+
"loss": 11.297,
|
108031 |
+
"step": 154100
|
108032 |
+
},
|
108033 |
+
{
|
108034 |
+
"epoch": 18.545126353790614,
|
108035 |
+
"grad_norm": Infinity,
|
108036 |
+
"learning_rate": 0.00018437735515512417,
|
108037 |
+
"loss": 11.2027,
|
108038 |
+
"step": 154110
|
108039 |
+
},
|
108040 |
+
{
|
108041 |
+
"epoch": 18.54632972322503,
|
108042 |
+
"grad_norm": Infinity,
|
108043 |
+
"learning_rate": 0.00018437531334491848,
|
108044 |
+
"loss": 11.1862,
|
108045 |
+
"step": 154120
|
108046 |
+
},
|
108047 |
+
{
|
108048 |
+
"epoch": 18.547533092659446,
|
108049 |
+
"grad_norm": Infinity,
|
108050 |
+
"learning_rate": 0.00018437327141260088,
|
108051 |
+
"loss": 11.2691,
|
108052 |
+
"step": 154130
|
108053 |
+
},
|
108054 |
+
{
|
108055 |
+
"epoch": 18.548736462093864,
|
108056 |
+
"grad_norm": Infinity,
|
108057 |
+
"learning_rate": 0.00018437122935817434,
|
108058 |
+
"loss": 11.0923,
|
108059 |
+
"step": 154140
|
108060 |
+
},
|
108061 |
+
{
|
108062 |
+
"epoch": 18.549939831528278,
|
108063 |
+
"grad_norm": Infinity,
|
108064 |
+
"learning_rate": 0.0001843691871816418,
|
108065 |
+
"loss": 11.1205,
|
108066 |
+
"step": 154150
|
108067 |
+
},
|
108068 |
+
{
|
108069 |
+
"epoch": 18.551143200962695,
|
108070 |
+
"grad_norm": Infinity,
|
108071 |
+
"learning_rate": 0.0001843671448830062,
|
108072 |
+
"loss": 11.164,
|
108073 |
+
"step": 154160
|
108074 |
+
},
|
108075 |
+
{
|
108076 |
+
"epoch": 18.552346570397113,
|
108077 |
+
"grad_norm": Infinity,
|
108078 |
+
"learning_rate": 0.0001843651024622705,
|
108079 |
+
"loss": 11.1202,
|
108080 |
+
"step": 154170
|
108081 |
+
},
|
108082 |
+
{
|
108083 |
+
"epoch": 18.553549939831527,
|
108084 |
+
"grad_norm": Infinity,
|
108085 |
+
"learning_rate": 0.0001843630599194377,
|
108086 |
+
"loss": 11.1886,
|
108087 |
+
"step": 154180
|
108088 |
+
},
|
108089 |
+
{
|
108090 |
+
"epoch": 18.554753309265944,
|
108091 |
+
"grad_norm": Infinity,
|
108092 |
+
"learning_rate": 0.0001843610172545107,
|
108093 |
+
"loss": 11.1563,
|
108094 |
+
"step": 154190
|
108095 |
+
},
|
108096 |
+
{
|
108097 |
+
"epoch": 18.555956678700362,
|
108098 |
+
"grad_norm": Infinity,
|
108099 |
+
"learning_rate": 0.00018435897446749248,
|
108100 |
+
"loss": 11.1706,
|
108101 |
+
"step": 154200
|
108102 |
+
},
|
108103 |
+
{
|
108104 |
+
"epoch": 18.557160048134776,
|
108105 |
+
"grad_norm": Infinity,
|
108106 |
+
"learning_rate": 0.000184356931558386,
|
108107 |
+
"loss": 11.0745,
|
108108 |
+
"step": 154210
|
108109 |
+
},
|
108110 |
+
{
|
108111 |
+
"epoch": 18.558363417569193,
|
108112 |
+
"grad_norm": Infinity,
|
108113 |
+
"learning_rate": 0.00018435488852719422,
|
108114 |
+
"loss": 11.1546,
|
108115 |
+
"step": 154220
|
108116 |
+
},
|
108117 |
+
{
|
108118 |
+
"epoch": 18.55956678700361,
|
108119 |
+
"grad_norm": Infinity,
|
108120 |
+
"learning_rate": 0.0001843528453739201,
|
108121 |
+
"loss": 11.2963,
|
108122 |
+
"step": 154230
|
108123 |
+
},
|
108124 |
+
{
|
108125 |
+
"epoch": 18.560770156438025,
|
108126 |
+
"grad_norm": Infinity,
|
108127 |
+
"learning_rate": 0.00018435080209856656,
|
108128 |
+
"loss": 11.1517,
|
108129 |
+
"step": 154240
|
108130 |
+
},
|
108131 |
+
{
|
108132 |
+
"epoch": 18.561973525872443,
|
108133 |
+
"grad_norm": Infinity,
|
108134 |
+
"learning_rate": 0.0001843487587011366,
|
108135 |
+
"loss": 11.1994,
|
108136 |
+
"step": 154250
|
108137 |
+
},
|
108138 |
+
{
|
108139 |
+
"epoch": 18.56317689530686,
|
108140 |
+
"grad_norm": Infinity,
|
108141 |
+
"learning_rate": 0.00018434671518163315,
|
108142 |
+
"loss": 11.0373,
|
108143 |
+
"step": 154260
|
108144 |
+
},
|
108145 |
+
{
|
108146 |
+
"epoch": 18.564380264741274,
|
108147 |
+
"grad_norm": Infinity,
|
108148 |
+
"learning_rate": 0.00018434467154005918,
|
108149 |
+
"loss": 11.1424,
|
108150 |
+
"step": 154270
|
108151 |
+
},
|
108152 |
+
{
|
108153 |
+
"epoch": 18.56558363417569,
|
108154 |
+
"grad_norm": Infinity,
|
108155 |
+
"learning_rate": 0.00018434262777641767,
|
108156 |
+
"loss": 11.2854,
|
108157 |
+
"step": 154280
|
108158 |
+
},
|
108159 |
+
{
|
108160 |
+
"epoch": 18.56678700361011,
|
108161 |
+
"grad_norm": Infinity,
|
108162 |
+
"learning_rate": 0.00018434058389071154,
|
108163 |
+
"loss": 11.1538,
|
108164 |
+
"step": 154290
|
108165 |
+
},
|
108166 |
+
{
|
108167 |
+
"epoch": 18.567990373044523,
|
108168 |
+
"grad_norm": Infinity,
|
108169 |
+
"learning_rate": 0.00018433853988294375,
|
108170 |
+
"loss": 11.1253,
|
108171 |
+
"step": 154300
|
108172 |
+
},
|
108173 |
+
{
|
108174 |
+
"epoch": 18.56919374247894,
|
108175 |
+
"grad_norm": Infinity,
|
108176 |
+
"learning_rate": 0.0001843364957531173,
|
108177 |
+
"loss": 11.1735,
|
108178 |
+
"step": 154310
|
108179 |
+
},
|
108180 |
+
{
|
108181 |
+
"epoch": 18.57039711191336,
|
108182 |
+
"grad_norm": Infinity,
|
108183 |
+
"learning_rate": 0.00018433445150123508,
|
108184 |
+
"loss": 11.0829,
|
108185 |
+
"step": 154320
|
108186 |
+
},
|
108187 |
+
{
|
108188 |
+
"epoch": 18.571600481347772,
|
108189 |
+
"grad_norm": Infinity,
|
108190 |
+
"learning_rate": 0.0001843324071273001,
|
108191 |
+
"loss": 11.2586,
|
108192 |
+
"step": 154330
|
108193 |
+
},
|
108194 |
+
{
|
108195 |
+
"epoch": 18.57280385078219,
|
108196 |
+
"grad_norm": Infinity,
|
108197 |
+
"learning_rate": 0.00018433036263131532,
|
108198 |
+
"loss": 11.1248,
|
108199 |
+
"step": 154340
|
108200 |
+
},
|
108201 |
+
{
|
108202 |
+
"epoch": 18.574007220216608,
|
108203 |
+
"grad_norm": Infinity,
|
108204 |
+
"learning_rate": 0.00018432831801328366,
|
108205 |
+
"loss": 11.111,
|
108206 |
+
"step": 154350
|
108207 |
+
},
|
108208 |
+
{
|
108209 |
+
"epoch": 18.57521058965102,
|
108210 |
+
"grad_norm": Infinity,
|
108211 |
+
"learning_rate": 0.00018432627327320816,
|
108212 |
+
"loss": 11.0498,
|
108213 |
+
"step": 154360
|
108214 |
+
},
|
108215 |
+
{
|
108216 |
+
"epoch": 18.57641395908544,
|
108217 |
+
"grad_norm": Infinity,
|
108218 |
+
"learning_rate": 0.00018432422841109167,
|
108219 |
+
"loss": 11.2345,
|
108220 |
+
"step": 154370
|
108221 |
+
},
|
108222 |
+
{
|
108223 |
+
"epoch": 18.577617328519857,
|
108224 |
+
"grad_norm": Infinity,
|
108225 |
+
"learning_rate": 0.00018432218342693721,
|
108226 |
+
"loss": 11.1254,
|
108227 |
+
"step": 154380
|
108228 |
+
},
|
108229 |
+
{
|
108230 |
+
"epoch": 18.57882069795427,
|
108231 |
+
"grad_norm": Infinity,
|
108232 |
+
"learning_rate": 0.00018432013832074776,
|
108233 |
+
"loss": 11.2464,
|
108234 |
+
"step": 154390
|
108235 |
+
},
|
108236 |
+
{
|
108237 |
+
"epoch": 18.58002406738869,
|
108238 |
+
"grad_norm": Infinity,
|
108239 |
+
"learning_rate": 0.00018431809309252622,
|
108240 |
+
"loss": 11.1221,
|
108241 |
+
"step": 154400
|
108242 |
+
},
|
108243 |
+
{
|
108244 |
+
"epoch": 18.581227436823106,
|
108245 |
+
"grad_norm": Infinity,
|
108246 |
+
"learning_rate": 0.0001843160477422756,
|
108247 |
+
"loss": 11.194,
|
108248 |
+
"step": 154410
|
108249 |
+
},
|
108250 |
+
{
|
108251 |
+
"epoch": 18.58243080625752,
|
108252 |
+
"grad_norm": Infinity,
|
108253 |
+
"learning_rate": 0.00018431400226999882,
|
108254 |
+
"loss": 11.2833,
|
108255 |
+
"step": 154420
|
108256 |
+
},
|
108257 |
+
{
|
108258 |
+
"epoch": 18.583634175691937,
|
108259 |
+
"grad_norm": Infinity,
|
108260 |
+
"learning_rate": 0.00018431195667569888,
|
108261 |
+
"loss": 11.1216,
|
108262 |
+
"step": 154430
|
108263 |
+
},
|
108264 |
+
{
|
108265 |
+
"epoch": 18.584837545126355,
|
108266 |
+
"grad_norm": Infinity,
|
108267 |
+
"learning_rate": 0.00018430991095937872,
|
108268 |
+
"loss": 11.0644,
|
108269 |
+
"step": 154440
|
108270 |
+
},
|
108271 |
+
{
|
108272 |
+
"epoch": 18.58604091456077,
|
108273 |
+
"grad_norm": Infinity,
|
108274 |
+
"learning_rate": 0.00018430786512104127,
|
108275 |
+
"loss": 11.1347,
|
108276 |
+
"step": 154450
|
108277 |
+
},
|
108278 |
+
{
|
108279 |
+
"epoch": 18.587244283995187,
|
108280 |
+
"grad_norm": Infinity,
|
108281 |
+
"learning_rate": 0.0001843058191606896,
|
108282 |
+
"loss": 11.1396,
|
108283 |
+
"step": 154460
|
108284 |
+
},
|
108285 |
+
{
|
108286 |
+
"epoch": 18.588447653429604,
|
108287 |
+
"grad_norm": Infinity,
|
108288 |
+
"learning_rate": 0.00018430377307832653,
|
108289 |
+
"loss": 11.1106,
|
108290 |
+
"step": 154470
|
108291 |
+
},
|
108292 |
+
{
|
108293 |
+
"epoch": 18.589651022864018,
|
108294 |
+
"grad_norm": Infinity,
|
108295 |
+
"learning_rate": 0.0001843017268739551,
|
108296 |
+
"loss": 11.1095,
|
108297 |
+
"step": 154480
|
108298 |
+
},
|
108299 |
+
{
|
108300 |
+
"epoch": 18.590854392298436,
|
108301 |
+
"grad_norm": Infinity,
|
108302 |
+
"learning_rate": 0.00018429968054757827,
|
108303 |
+
"loss": 11.1176,
|
108304 |
+
"step": 154490
|
108305 |
+
},
|
108306 |
+
{
|
108307 |
+
"epoch": 18.592057761732853,
|
108308 |
+
"grad_norm": Infinity,
|
108309 |
+
"learning_rate": 0.00018429763409919897,
|
108310 |
+
"loss": 11.0243,
|
108311 |
+
"step": 154500
|
108312 |
+
},
|
108313 |
+
{
|
108314 |
+
"epoch": 18.593261131167267,
|
108315 |
+
"grad_norm": Infinity,
|
108316 |
+
"learning_rate": 0.0001842955875288202,
|
108317 |
+
"loss": 11.0968,
|
108318 |
+
"step": 154510
|
108319 |
+
},
|
108320 |
+
{
|
108321 |
+
"epoch": 18.594464500601685,
|
108322 |
+
"grad_norm": Infinity,
|
108323 |
+
"learning_rate": 0.0001842935408364449,
|
108324 |
+
"loss": 11.139,
|
108325 |
+
"step": 154520
|
108326 |
+
},
|
108327 |
+
{
|
108328 |
+
"epoch": 18.595667870036102,
|
108329 |
+
"grad_norm": Infinity,
|
108330 |
+
"learning_rate": 0.00018429149402207604,
|
108331 |
+
"loss": 11.2446,
|
108332 |
+
"step": 154530
|
108333 |
+
},
|
108334 |
+
{
|
108335 |
+
"epoch": 18.596871239470516,
|
108336 |
+
"grad_norm": Infinity,
|
108337 |
+
"learning_rate": 0.00018428944708571656,
|
108338 |
+
"loss": 11.1045,
|
108339 |
+
"step": 154540
|
108340 |
+
},
|
108341 |
+
{
|
108342 |
+
"epoch": 18.598074608904934,
|
108343 |
+
"grad_norm": Infinity,
|
108344 |
+
"learning_rate": 0.00018428740002736946,
|
108345 |
+
"loss": 11.2959,
|
108346 |
+
"step": 154550
|
108347 |
+
},
|
108348 |
+
{
|
108349 |
+
"epoch": 18.59927797833935,
|
108350 |
+
"grad_norm": Infinity,
|
108351 |
+
"learning_rate": 0.00018428535284703768,
|
108352 |
+
"loss": 11.1471,
|
108353 |
+
"step": 154560
|
108354 |
+
},
|
108355 |
+
{
|
108356 |
+
"epoch": 18.600481347773766,
|
108357 |
+
"grad_norm": Infinity,
|
108358 |
+
"learning_rate": 0.00018428330554472416,
|
108359 |
+
"loss": 11.1321,
|
108360 |
+
"step": 154570
|
108361 |
+
},
|
108362 |
+
{
|
108363 |
+
"epoch": 18.601684717208183,
|
108364 |
+
"grad_norm": Infinity,
|
108365 |
+
"learning_rate": 0.00018428125812043194,
|
108366 |
+
"loss": 11.1653,
|
108367 |
+
"step": 154580
|
108368 |
+
},
|
108369 |
+
{
|
108370 |
+
"epoch": 18.6028880866426,
|
108371 |
+
"grad_norm": Infinity,
|
108372 |
+
"learning_rate": 0.00018427921057416388,
|
108373 |
+
"loss": 11.0967,
|
108374 |
+
"step": 154590
|
108375 |
+
},
|
108376 |
+
{
|
108377 |
+
"epoch": 18.604091456077015,
|
108378 |
+
"grad_norm": Infinity,
|
108379 |
+
"learning_rate": 0.00018427716290592302,
|
108380 |
+
"loss": 11.0111,
|
108381 |
+
"step": 154600
|
108382 |
+
},
|
108383 |
+
{
|
108384 |
+
"epoch": 18.605294825511432,
|
108385 |
+
"grad_norm": Infinity,
|
108386 |
+
"learning_rate": 0.00018427511511571228,
|
108387 |
+
"loss": 11.1689,
|
108388 |
+
"step": 154610
|
108389 |
+
},
|
108390 |
+
{
|
108391 |
+
"epoch": 18.60649819494585,
|
108392 |
+
"grad_norm": Infinity,
|
108393 |
+
"learning_rate": 0.00018427306720353466,
|
108394 |
+
"loss": 11.1187,
|
108395 |
+
"step": 154620
|
108396 |
+
},
|
108397 |
+
{
|
108398 |
+
"epoch": 18.607701564380264,
|
108399 |
+
"grad_norm": Infinity,
|
108400 |
+
"learning_rate": 0.00018427101916939308,
|
108401 |
+
"loss": 11.2486,
|
108402 |
+
"step": 154630
|
108403 |
+
},
|
108404 |
+
{
|
108405 |
+
"epoch": 18.60890493381468,
|
108406 |
+
"grad_norm": Infinity,
|
108407 |
+
"learning_rate": 0.00018426897101329054,
|
108408 |
+
"loss": 11.0974,
|
108409 |
+
"step": 154640
|
108410 |
+
},
|
108411 |
+
{
|
108412 |
+
"epoch": 18.6101083032491,
|
108413 |
+
"grad_norm": Infinity,
|
108414 |
+
"learning_rate": 0.00018426692273523,
|
108415 |
+
"loss": 11.228,
|
108416 |
+
"step": 154650
|
108417 |
+
},
|
108418 |
+
{
|
108419 |
+
"epoch": 18.611311672683513,
|
108420 |
+
"grad_norm": Infinity,
|
108421 |
+
"learning_rate": 0.00018426487433521445,
|
108422 |
+
"loss": 11.1379,
|
108423 |
+
"step": 154660
|
108424 |
+
},
|
108425 |
+
{
|
108426 |
+
"epoch": 18.61251504211793,
|
108427 |
+
"grad_norm": Infinity,
|
108428 |
+
"learning_rate": 0.00018426282581324678,
|
108429 |
+
"loss": 11.1701,
|
108430 |
+
"step": 154670
|
108431 |
+
},
|
108432 |
+
{
|
108433 |
+
"epoch": 18.613718411552348,
|
108434 |
+
"grad_norm": Infinity,
|
108435 |
+
"learning_rate": 0.00018426077716932998,
|
108436 |
+
"loss": 11.1545,
|
108437 |
+
"step": 154680
|
108438 |
+
},
|
108439 |
+
{
|
108440 |
+
"epoch": 18.614921780986762,
|
108441 |
+
"grad_norm": Infinity,
|
108442 |
+
"learning_rate": 0.00018425872840346707,
|
108443 |
+
"loss": 11.1638,
|
108444 |
+
"step": 154690
|
108445 |
+
},
|
108446 |
+
{
|
108447 |
+
"epoch": 18.61612515042118,
|
108448 |
+
"grad_norm": Infinity,
|
108449 |
+
"learning_rate": 0.00018425667951566096,
|
108450 |
+
"loss": 11.1699,
|
108451 |
+
"step": 154700
|
108452 |
+
},
|
108453 |
+
{
|
108454 |
+
"epoch": 18.617328519855597,
|
108455 |
+
"grad_norm": Infinity,
|
108456 |
+
"learning_rate": 0.00018425463050591464,
|
108457 |
+
"loss": 11.2327,
|
108458 |
+
"step": 154710
|
108459 |
+
},
|
108460 |
+
{
|
108461 |
+
"epoch": 18.61853188929001,
|
108462 |
+
"grad_norm": Infinity,
|
108463 |
+
"learning_rate": 0.00018425258137423108,
|
108464 |
+
"loss": 11.1129,
|
108465 |
+
"step": 154720
|
108466 |
+
},
|
108467 |
+
{
|
108468 |
+
"epoch": 18.61973525872443,
|
108469 |
+
"grad_norm": Infinity,
|
108470 |
+
"learning_rate": 0.0001842505321206132,
|
108471 |
+
"loss": 11.1449,
|
108472 |
+
"step": 154730
|
108473 |
+
},
|
108474 |
+
{
|
108475 |
+
"epoch": 18.620938628158846,
|
108476 |
+
"grad_norm": Infinity,
|
108477 |
+
"learning_rate": 0.00018424848274506403,
|
108478 |
+
"loss": 11.019,
|
108479 |
+
"step": 154740
|
108480 |
+
},
|
108481 |
+
{
|
108482 |
+
"epoch": 18.62214199759326,
|
108483 |
+
"grad_norm": Infinity,
|
108484 |
+
"learning_rate": 0.0001842464332475865,
|
108485 |
+
"loss": 11.1304,
|
108486 |
+
"step": 154750
|
108487 |
+
},
|
108488 |
+
{
|
108489 |
+
"epoch": 18.623345367027678,
|
108490 |
+
"grad_norm": Infinity,
|
108491 |
+
"learning_rate": 0.00018424438362818358,
|
108492 |
+
"loss": 11.1931,
|
108493 |
+
"step": 154760
|
108494 |
+
},
|
108495 |
+
{
|
108496 |
+
"epoch": 18.624548736462096,
|
108497 |
+
"grad_norm": Infinity,
|
108498 |
+
"learning_rate": 0.00018424233388685823,
|
108499 |
+
"loss": 11.0813,
|
108500 |
+
"step": 154770
|
108501 |
+
},
|
108502 |
+
{
|
108503 |
+
"epoch": 18.62575210589651,
|
108504 |
+
"grad_norm": Infinity,
|
108505 |
+
"learning_rate": 0.00018424028402361345,
|
108506 |
+
"loss": 11.1179,
|
108507 |
+
"step": 154780
|
108508 |
+
},
|
108509 |
+
{
|
108510 |
+
"epoch": 18.626955475330927,
|
108511 |
+
"grad_norm": Infinity,
|
108512 |
+
"learning_rate": 0.00018423823403845212,
|
108513 |
+
"loss": 11.2252,
|
108514 |
+
"step": 154790
|
108515 |
+
},
|
108516 |
+
{
|
108517 |
+
"epoch": 18.628158844765345,
|
108518 |
+
"grad_norm": Infinity,
|
108519 |
+
"learning_rate": 0.00018423618393137732,
|
108520 |
+
"loss": 11.2128,
|
108521 |
+
"step": 154800
|
108522 |
+
},
|
108523 |
+
{
|
108524 |
+
"epoch": 18.62936221419976,
|
108525 |
+
"grad_norm": Infinity,
|
108526 |
+
"learning_rate": 0.00018423413370239198,
|
108527 |
+
"loss": 11.1383,
|
108528 |
+
"step": 154810
|
108529 |
+
},
|
108530 |
+
{
|
108531 |
+
"epoch": 18.630565583634176,
|
108532 |
+
"grad_norm": Infinity,
|
108533 |
+
"learning_rate": 0.000184232083351499,
|
108534 |
+
"loss": 11.1558,
|
108535 |
+
"step": 154820
|
108536 |
+
},
|
108537 |
+
{
|
108538 |
+
"epoch": 18.63176895306859,
|
108539 |
+
"grad_norm": Infinity,
|
108540 |
+
"learning_rate": 0.00018423003287870143,
|
108541 |
+
"loss": 11.1106,
|
108542 |
+
"step": 154830
|
108543 |
+
},
|
108544 |
+
{
|
108545 |
+
"epoch": 18.632972322503008,
|
108546 |
+
"grad_norm": Infinity,
|
108547 |
+
"learning_rate": 0.0001842279822840022,
|
108548 |
+
"loss": 11.2044,
|
108549 |
+
"step": 154840
|
108550 |
+
},
|
108551 |
+
{
|
108552 |
+
"epoch": 18.634175691937426,
|
108553 |
+
"grad_norm": Infinity,
|
108554 |
+
"learning_rate": 0.0001842259315674043,
|
108555 |
+
"loss": 11.0732,
|
108556 |
+
"step": 154850
|
108557 |
+
},
|
108558 |
+
{
|
108559 |
+
"epoch": 18.63537906137184,
|
108560 |
+
"grad_norm": Infinity,
|
108561 |
+
"learning_rate": 0.00018422388072891063,
|
108562 |
+
"loss": 11.2463,
|
108563 |
+
"step": 154860
|
108564 |
+
},
|
108565 |
+
{
|
108566 |
+
"epoch": 18.636582430806257,
|
108567 |
+
"grad_norm": Infinity,
|
108568 |
+
"learning_rate": 0.00018422182976852427,
|
108569 |
+
"loss": 11.208,
|
108570 |
+
"step": 154870
|
108571 |
+
},
|
108572 |
+
{
|
108573 |
+
"epoch": 18.637785800240675,
|
108574 |
+
"grad_norm": Infinity,
|
108575 |
+
"learning_rate": 0.00018421977868624808,
|
108576 |
+
"loss": 11.0892,
|
108577 |
+
"step": 154880
|
108578 |
+
},
|
108579 |
+
{
|
108580 |
+
"epoch": 18.63898916967509,
|
108581 |
+
"grad_norm": Infinity,
|
108582 |
+
"learning_rate": 0.0001842177274820851,
|
108583 |
+
"loss": 11.0983,
|
108584 |
+
"step": 154890
|
108585 |
+
},
|
108586 |
+
{
|
108587 |
+
"epoch": 18.640192539109506,
|
108588 |
+
"grad_norm": Infinity,
|
108589 |
+
"learning_rate": 0.0001842156761560383,
|
108590 |
+
"loss": 11.0362,
|
108591 |
+
"step": 154900
|
108592 |
+
},
|
108593 |
+
{
|
108594 |
+
"epoch": 18.641395908543924,
|
108595 |
+
"grad_norm": Infinity,
|
108596 |
+
"learning_rate": 0.0001842136247081106,
|
108597 |
+
"loss": 11.2536,
|
108598 |
+
"step": 154910
|
108599 |
+
},
|
108600 |
+
{
|
108601 |
+
"epoch": 18.642599277978338,
|
108602 |
+
"grad_norm": Infinity,
|
108603 |
+
"learning_rate": 0.000184211573138305,
|
108604 |
+
"loss": 11.1086,
|
108605 |
+
"step": 154920
|
108606 |
+
},
|
108607 |
+
{
|
108608 |
+
"epoch": 18.643802647412755,
|
108609 |
+
"grad_norm": Infinity,
|
108610 |
+
"learning_rate": 0.00018420952144662444,
|
108611 |
+
"loss": 11.1801,
|
108612 |
+
"step": 154930
|
108613 |
+
},
|
108614 |
+
{
|
108615 |
+
"epoch": 18.645006016847173,
|
108616 |
+
"grad_norm": Infinity,
|
108617 |
+
"learning_rate": 0.0001842074696330719,
|
108618 |
+
"loss": 11.1866,
|
108619 |
+
"step": 154940
|
108620 |
+
},
|
108621 |
+
{
|
108622 |
+
"epoch": 18.646209386281587,
|
108623 |
+
"grad_norm": Infinity,
|
108624 |
+
"learning_rate": 0.00018420541769765042,
|
108625 |
+
"loss": 11.1017,
|
108626 |
+
"step": 154950
|
108627 |
+
},
|
108628 |
+
{
|
108629 |
+
"epoch": 18.647412755716005,
|
108630 |
+
"grad_norm": Infinity,
|
108631 |
+
"learning_rate": 0.00018420336564036287,
|
108632 |
+
"loss": 11.2785,
|
108633 |
+
"step": 154960
|
108634 |
+
},
|
108635 |
+
{
|
108636 |
+
"epoch": 18.648616125150422,
|
108637 |
+
"grad_norm": Infinity,
|
108638 |
+
"learning_rate": 0.0001842013134612123,
|
108639 |
+
"loss": 11.0446,
|
108640 |
+
"step": 154970
|
108641 |
+
},
|
108642 |
+
{
|
108643 |
+
"epoch": 18.649819494584836,
|
108644 |
+
"grad_norm": Infinity,
|
108645 |
+
"learning_rate": 0.0001841992611602016,
|
108646 |
+
"loss": 11.1732,
|
108647 |
+
"step": 154980
|
108648 |
+
},
|
108649 |
+
{
|
108650 |
+
"epoch": 18.651022864019254,
|
108651 |
+
"grad_norm": Infinity,
|
108652 |
+
"learning_rate": 0.0001841972087373338,
|
108653 |
+
"loss": 11.136,
|
108654 |
+
"step": 154990
|
108655 |
+
},
|
108656 |
+
{
|
108657 |
+
"epoch": 18.65222623345367,
|
108658 |
+
"grad_norm": Infinity,
|
108659 |
+
"learning_rate": 0.00018419515619261186,
|
108660 |
+
"loss": 11.0919,
|
108661 |
+
"step": 155000
|
108662 |
+
},
|
108663 |
+
{
|
108664 |
+
"epoch": 18.653429602888085,
|
108665 |
+
"grad_norm": Infinity,
|
108666 |
+
"learning_rate": 0.00018419310352603868,
|
108667 |
+
"loss": 11.1219,
|
108668 |
+
"step": 155010
|
108669 |
+
},
|
108670 |
+
{
|
108671 |
+
"epoch": 18.654632972322503,
|
108672 |
+
"grad_norm": Infinity,
|
108673 |
+
"learning_rate": 0.00018419105073761737,
|
108674 |
+
"loss": 11.1603,
|
108675 |
+
"step": 155020
|
108676 |
+
},
|
108677 |
+
{
|
108678 |
+
"epoch": 18.65583634175692,
|
108679 |
+
"grad_norm": Infinity,
|
108680 |
+
"learning_rate": 0.00018418899782735078,
|
108681 |
+
"loss": 11.1119,
|
108682 |
+
"step": 155030
|
108683 |
+
},
|
108684 |
+
{
|
108685 |
+
"epoch": 18.657039711191334,
|
108686 |
+
"grad_norm": Infinity,
|
108687 |
+
"learning_rate": 0.00018418694479524193,
|
108688 |
+
"loss": 11.2415,
|
108689 |
+
"step": 155040
|
108690 |
+
},
|
108691 |
+
{
|
108692 |
+
"epoch": 18.658243080625752,
|
108693 |
+
"grad_norm": Infinity,
|
108694 |
+
"learning_rate": 0.0001841848916412938,
|
108695 |
+
"loss": 11.1529,
|
108696 |
+
"step": 155050
|
108697 |
+
},
|
108698 |
+
{
|
108699 |
+
"epoch": 18.65944645006017,
|
108700 |
+
"grad_norm": Infinity,
|
108701 |
+
"learning_rate": 0.00018418283836550936,
|
108702 |
+
"loss": 11.0478,
|
108703 |
+
"step": 155060
|
108704 |
+
},
|
108705 |
+
{
|
108706 |
+
"epoch": 18.660649819494584,
|
108707 |
+
"grad_norm": Infinity,
|
108708 |
+
"learning_rate": 0.00018418078496789154,
|
108709 |
+
"loss": 11.1712,
|
108710 |
+
"step": 155070
|
108711 |
+
},
|
108712 |
+
{
|
108713 |
+
"epoch": 18.661853188929,
|
108714 |
+
"grad_norm": Infinity,
|
108715 |
+
"learning_rate": 0.00018417873144844333,
|
108716 |
+
"loss": 11.0886,
|
108717 |
+
"step": 155080
|
108718 |
+
},
|
108719 |
+
{
|
108720 |
+
"epoch": 18.66305655836342,
|
108721 |
+
"grad_norm": Infinity,
|
108722 |
+
"learning_rate": 0.00018417667780716777,
|
108723 |
+
"loss": 11.0901,
|
108724 |
+
"step": 155090
|
108725 |
+
},
|
108726 |
+
{
|
108727 |
+
"epoch": 18.664259927797833,
|
108728 |
+
"grad_norm": Infinity,
|
108729 |
+
"learning_rate": 0.00018417462404406773,
|
108730 |
+
"loss": 11.1937,
|
108731 |
+
"step": 155100
|
108732 |
+
},
|
108733 |
+
{
|
108734 |
+
"epoch": 18.66546329723225,
|
108735 |
+
"grad_norm": Infinity,
|
108736 |
+
"learning_rate": 0.0001841725701591462,
|
108737 |
+
"loss": 11.0931,
|
108738 |
+
"step": 155110
|
108739 |
+
},
|
108740 |
+
{
|
108741 |
+
"epoch": 18.666666666666668,
|
108742 |
+
"grad_norm": Infinity,
|
108743 |
+
"learning_rate": 0.00018417051615240624,
|
108744 |
+
"loss": 11.0515,
|
108745 |
+
"step": 155120
|
108746 |
+
},
|
108747 |
+
{
|
108748 |
+
"epoch": 18.667870036101082,
|
108749 |
+
"grad_norm": Infinity,
|
108750 |
+
"learning_rate": 0.00018416846202385073,
|
108751 |
+
"loss": 11.2758,
|
108752 |
+
"step": 155130
|
108753 |
+
},
|
108754 |
+
{
|
108755 |
+
"epoch": 18.6690734055355,
|
108756 |
+
"grad_norm": Infinity,
|
108757 |
+
"learning_rate": 0.0001841664077734827,
|
108758 |
+
"loss": 11.1772,
|
108759 |
+
"step": 155140
|
108760 |
+
},
|
108761 |
+
{
|
108762 |
+
"epoch": 18.670276774969917,
|
108763 |
+
"grad_norm": Infinity,
|
108764 |
+
"learning_rate": 0.00018416435340130508,
|
108765 |
+
"loss": 11.0956,
|
108766 |
+
"step": 155150
|
108767 |
+
},
|
108768 |
+
{
|
108769 |
+
"epoch": 18.67148014440433,
|
108770 |
+
"grad_norm": Infinity,
|
108771 |
+
"learning_rate": 0.00018416229890732083,
|
108772 |
+
"loss": 11.0273,
|
108773 |
+
"step": 155160
|
108774 |
+
},
|
108775 |
+
{
|
108776 |
+
"epoch": 18.67268351383875,
|
108777 |
+
"grad_norm": Infinity,
|
108778 |
+
"learning_rate": 0.000184160244291533,
|
108779 |
+
"loss": 11.207,
|
108780 |
+
"step": 155170
|
108781 |
+
},
|
108782 |
+
{
|
108783 |
+
"epoch": 18.673886883273166,
|
108784 |
+
"grad_norm": Infinity,
|
108785 |
+
"learning_rate": 0.0001841581895539445,
|
108786 |
+
"loss": 11.1246,
|
108787 |
+
"step": 155180
|
108788 |
+
},
|
108789 |
+
{
|
108790 |
+
"epoch": 18.67509025270758,
|
108791 |
+
"grad_norm": Infinity,
|
108792 |
+
"learning_rate": 0.0001841561346945583,
|
108793 |
+
"loss": 11.2164,
|
108794 |
+
"step": 155190
|
108795 |
+
},
|
108796 |
+
{
|
108797 |
+
"epoch": 18.676293622141998,
|
108798 |
+
"grad_norm": Infinity,
|
108799 |
+
"learning_rate": 0.00018415407971337743,
|
108800 |
+
"loss": 11.1303,
|
108801 |
+
"step": 155200
|
108802 |
+
},
|
108803 |
+
{
|
108804 |
+
"epoch": 18.677496991576415,
|
108805 |
+
"grad_norm": Infinity,
|
108806 |
+
"learning_rate": 0.00018415202461040484,
|
108807 |
+
"loss": 11.0508,
|
108808 |
+
"step": 155210
|
108809 |
+
},
|
108810 |
+
{
|
108811 |
+
"epoch": 18.67870036101083,
|
108812 |
+
"grad_norm": Infinity,
|
108813 |
+
"learning_rate": 0.00018414996938564347,
|
108814 |
+
"loss": 11.1942,
|
108815 |
+
"step": 155220
|
108816 |
+
},
|
108817 |
+
{
|
108818 |
+
"epoch": 18.679903730445247,
|
108819 |
+
"grad_norm": Infinity,
|
108820 |
+
"learning_rate": 0.0001841479140390963,
|
108821 |
+
"loss": 11.1507,
|
108822 |
+
"step": 155230
|
108823 |
+
},
|
108824 |
+
{
|
108825 |
+
"epoch": 18.681107099879664,
|
108826 |
+
"grad_norm": Infinity,
|
108827 |
+
"learning_rate": 0.00018414585857076635,
|
108828 |
+
"loss": 11.094,
|
108829 |
+
"step": 155240
|
108830 |
+
},
|
108831 |
+
{
|
108832 |
+
"epoch": 18.68231046931408,
|
108833 |
+
"grad_norm": Infinity,
|
108834 |
+
"learning_rate": 0.00018414380298065658,
|
108835 |
+
"loss": 11.3475,
|
108836 |
+
"step": 155250
|
108837 |
+
},
|
108838 |
+
{
|
108839 |
+
"epoch": 18.683513838748496,
|
108840 |
+
"grad_norm": Infinity,
|
108841 |
+
"learning_rate": 0.00018414174726876992,
|
108842 |
+
"loss": 11.2109,
|
108843 |
+
"step": 155260
|
108844 |
+
},
|
108845 |
+
{
|
108846 |
+
"epoch": 18.684717208182914,
|
108847 |
+
"grad_norm": Infinity,
|
108848 |
+
"learning_rate": 0.0001841396914351094,
|
108849 |
+
"loss": 11.328,
|
108850 |
+
"step": 155270
|
108851 |
+
},
|
108852 |
+
{
|
108853 |
+
"epoch": 18.685920577617328,
|
108854 |
+
"grad_norm": Infinity,
|
108855 |
+
"learning_rate": 0.00018413763547967796,
|
108856 |
+
"loss": 11.2317,
|
108857 |
+
"step": 155280
|
108858 |
+
},
|
108859 |
+
{
|
108860 |
+
"epoch": 18.687123947051745,
|
108861 |
+
"grad_norm": Infinity,
|
108862 |
+
"learning_rate": 0.00018413557940247862,
|
108863 |
+
"loss": 11.0034,
|
108864 |
+
"step": 155290
|
108865 |
+
},
|
108866 |
+
{
|
108867 |
+
"epoch": 18.688327316486163,
|
108868 |
+
"grad_norm": Infinity,
|
108869 |
+
"learning_rate": 0.00018413352320351427,
|
108870 |
+
"loss": 11.2417,
|
108871 |
+
"step": 155300
|
108872 |
+
},
|
108873 |
+
{
|
108874 |
+
"epoch": 18.689530685920577,
|
108875 |
+
"grad_norm": Infinity,
|
108876 |
+
"learning_rate": 0.00018413146688278797,
|
108877 |
+
"loss": 10.9798,
|
108878 |
+
"step": 155310
|
108879 |
+
},
|
108880 |
+
{
|
108881 |
+
"epoch": 18.690734055354994,
|
108882 |
+
"grad_norm": Infinity,
|
108883 |
+
"learning_rate": 0.00018412941044030267,
|
108884 |
+
"loss": 11.2268,
|
108885 |
+
"step": 155320
|
108886 |
+
},
|
108887 |
+
{
|
108888 |
+
"epoch": 18.691937424789412,
|
108889 |
+
"grad_norm": Infinity,
|
108890 |
+
"learning_rate": 0.00018412735387606131,
|
108891 |
+
"loss": 11.1689,
|
108892 |
+
"step": 155330
|
108893 |
+
},
|
108894 |
+
{
|
108895 |
+
"epoch": 18.693140794223826,
|
108896 |
+
"grad_norm": Infinity,
|
108897 |
+
"learning_rate": 0.00018412529719006695,
|
108898 |
+
"loss": 11.1137,
|
108899 |
+
"step": 155340
|
108900 |
+
},
|
108901 |
+
{
|
108902 |
+
"epoch": 18.694344163658243,
|
108903 |
+
"grad_norm": Infinity,
|
108904 |
+
"learning_rate": 0.00018412324038232249,
|
108905 |
+
"loss": 11.1095,
|
108906 |
+
"step": 155350
|
108907 |
+
},
|
108908 |
+
{
|
108909 |
+
"epoch": 18.69554753309266,
|
108910 |
+
"grad_norm": Infinity,
|
108911 |
+
"learning_rate": 0.00018412118345283092,
|
108912 |
+
"loss": 10.9851,
|
108913 |
+
"step": 155360
|
108914 |
+
},
|
108915 |
+
{
|
108916 |
+
"epoch": 18.696750902527075,
|
108917 |
+
"grad_norm": Infinity,
|
108918 |
+
"learning_rate": 0.00018411912640159525,
|
108919 |
+
"loss": 11.0877,
|
108920 |
+
"step": 155370
|
108921 |
+
},
|
108922 |
+
{
|
108923 |
+
"epoch": 18.697954271961493,
|
108924 |
+
"grad_norm": Infinity,
|
108925 |
+
"learning_rate": 0.00018411706922861844,
|
108926 |
+
"loss": 11.1739,
|
108927 |
+
"step": 155380
|
108928 |
+
},
|
108929 |
+
{
|
108930 |
+
"epoch": 18.69915764139591,
|
108931 |
+
"grad_norm": Infinity,
|
108932 |
+
"learning_rate": 0.00018411501193390346,
|
108933 |
+
"loss": 11.1279,
|
108934 |
+
"step": 155390
|
108935 |
+
},
|
108936 |
+
{
|
108937 |
+
"epoch": 18.700361010830324,
|
108938 |
+
"grad_norm": Infinity,
|
108939 |
+
"learning_rate": 0.00018411295451745328,
|
108940 |
+
"loss": 11.1035,
|
108941 |
+
"step": 155400
|
108942 |
+
},
|
108943 |
+
{
|
108944 |
+
"epoch": 18.70156438026474,
|
108945 |
+
"grad_norm": Infinity,
|
108946 |
+
"learning_rate": 0.00018411089697927086,
|
108947 |
+
"loss": 11.1095,
|
108948 |
+
"step": 155410
|
108949 |
+
},
|
108950 |
+
{
|
108951 |
+
"epoch": 18.70276774969916,
|
108952 |
+
"grad_norm": Infinity,
|
108953 |
+
"learning_rate": 0.00018410883931935926,
|
108954 |
+
"loss": 11.1272,
|
108955 |
+
"step": 155420
|
108956 |
+
},
|
108957 |
+
{
|
108958 |
+
"epoch": 18.703971119133573,
|
108959 |
+
"grad_norm": Infinity,
|
108960 |
+
"learning_rate": 0.00018410678153772136,
|
108961 |
+
"loss": 11.1291,
|
108962 |
+
"step": 155430
|
108963 |
+
},
|
108964 |
+
{
|
108965 |
+
"epoch": 18.70517448856799,
|
108966 |
+
"grad_norm": Infinity,
|
108967 |
+
"learning_rate": 0.0001841047236343602,
|
108968 |
+
"loss": 11.1143,
|
108969 |
+
"step": 155440
|
108970 |
+
},
|
108971 |
+
{
|
108972 |
+
"epoch": 18.706377858002405,
|
108973 |
+
"grad_norm": Infinity,
|
108974 |
+
"learning_rate": 0.00018410266560927875,
|
108975 |
+
"loss": 11.081,
|
108976 |
+
"step": 155450
|
108977 |
+
},
|
108978 |
+
{
|
108979 |
+
"epoch": 18.707581227436823,
|
108980 |
+
"grad_norm": Infinity,
|
108981 |
+
"learning_rate": 0.00018410060746247995,
|
108982 |
+
"loss": 11.1918,
|
108983 |
+
"step": 155460
|
108984 |
+
},
|
108985 |
+
{
|
108986 |
+
"epoch": 18.70878459687124,
|
108987 |
+
"grad_norm": Infinity,
|
108988 |
+
"learning_rate": 0.00018409854919396686,
|
108989 |
+
"loss": 11.1112,
|
108990 |
+
"step": 155470
|
108991 |
+
},
|
108992 |
+
{
|
108993 |
+
"epoch": 18.709987966305654,
|
108994 |
+
"grad_norm": Infinity,
|
108995 |
+
"learning_rate": 0.00018409649080374237,
|
108996 |
+
"loss": 11.1154,
|
108997 |
+
"step": 155480
|
108998 |
+
},
|
108999 |
+
{
|
109000 |
+
"epoch": 18.71119133574007,
|
109001 |
+
"grad_norm": Infinity,
|
109002 |
+
"learning_rate": 0.00018409443229180946,
|
109003 |
+
"loss": 11.1244,
|
109004 |
+
"step": 155490
|
109005 |
+
},
|
109006 |
+
{
|
109007 |
+
"epoch": 18.71239470517449,
|
109008 |
+
"grad_norm": Infinity,
|
109009 |
+
"learning_rate": 0.00018409237365817118,
|
109010 |
+
"loss": 11.1785,
|
109011 |
+
"step": 155500
|
109012 |
+
},
|
109013 |
+
{
|
109014 |
+
"epoch": 18.713598074608903,
|
109015 |
+
"grad_norm": Infinity,
|
109016 |
+
"learning_rate": 0.0001840903149028305,
|
109017 |
+
"loss": 11.0254,
|
109018 |
+
"step": 155510
|
109019 |
+
},
|
109020 |
+
{
|
109021 |
+
"epoch": 18.71480144404332,
|
109022 |
+
"grad_norm": Infinity,
|
109023 |
+
"learning_rate": 0.00018408825602579033,
|
109024 |
+
"loss": 11.096,
|
109025 |
+
"step": 155520
|
109026 |
+
},
|
109027 |
+
{
|
109028 |
+
"epoch": 18.71600481347774,
|
109029 |
+
"grad_norm": Infinity,
|
109030 |
+
"learning_rate": 0.00018408619702705372,
|
109031 |
+
"loss": 11.1088,
|
109032 |
+
"step": 155530
|
109033 |
+
},
|
109034 |
+
{
|
109035 |
+
"epoch": 18.717208182912152,
|
109036 |
+
"grad_norm": Infinity,
|
109037 |
+
"learning_rate": 0.0001840841379066236,
|
109038 |
+
"loss": 11.1197,
|
109039 |
+
"step": 155540
|
109040 |
+
},
|
109041 |
+
{
|
109042 |
+
"epoch": 18.71841155234657,
|
109043 |
+
"grad_norm": Infinity,
|
109044 |
+
"learning_rate": 0.000184082078664503,
|
109045 |
+
"loss": 11.043,
|
109046 |
+
"step": 155550
|
109047 |
+
},
|
109048 |
+
{
|
109049 |
+
"epoch": 18.719614921780988,
|
109050 |
+
"grad_norm": Infinity,
|
109051 |
+
"learning_rate": 0.00018408001930069484,
|
109052 |
+
"loss": 11.0468,
|
109053 |
+
"step": 155560
|
109054 |
+
},
|
109055 |
+
{
|
109056 |
+
"epoch": 18.7208182912154,
|
109057 |
+
"grad_norm": Infinity,
|
109058 |
+
"learning_rate": 0.00018407795981520216,
|
109059 |
+
"loss": 11.0612,
|
109060 |
+
"step": 155570
|
109061 |
+
},
|
109062 |
+
{
|
109063 |
+
"epoch": 18.72202166064982,
|
109064 |
+
"grad_norm": Infinity,
|
109065 |
+
"learning_rate": 0.00018407590020802793,
|
109066 |
+
"loss": 11.2637,
|
109067 |
+
"step": 155580
|
109068 |
+
},
|
109069 |
+
{
|
109070 |
+
"epoch": 18.723225030084237,
|
109071 |
+
"grad_norm": Infinity,
|
109072 |
+
"learning_rate": 0.0001840738404791751,
|
109073 |
+
"loss": 11.1286,
|
109074 |
+
"step": 155590
|
109075 |
+
},
|
109076 |
+
{
|
109077 |
+
"epoch": 18.72442839951865,
|
109078 |
+
"grad_norm": Infinity,
|
109079 |
+
"learning_rate": 0.00018407178062864664,
|
109080 |
+
"loss": 10.9937,
|
109081 |
+
"step": 155600
|
109082 |
+
},
|
109083 |
+
{
|
109084 |
+
"epoch": 18.72563176895307,
|
109085 |
+
"grad_norm": Infinity,
|
109086 |
+
"learning_rate": 0.00018406972065644557,
|
109087 |
+
"loss": 11.0324,
|
109088 |
+
"step": 155610
|
109089 |
+
},
|
109090 |
+
{
|
109091 |
+
"epoch": 18.726835138387486,
|
109092 |
+
"grad_norm": Infinity,
|
109093 |
+
"learning_rate": 0.00018406766056257485,
|
109094 |
+
"loss": 11.0811,
|
109095 |
+
"step": 155620
|
109096 |
+
},
|
109097 |
+
{
|
109098 |
+
"epoch": 18.7280385078219,
|
109099 |
+
"grad_norm": Infinity,
|
109100 |
+
"learning_rate": 0.00018406560034703748,
|
109101 |
+
"loss": 11.1048,
|
109102 |
+
"step": 155630
|
109103 |
+
},
|
109104 |
+
{
|
109105 |
+
"epoch": 18.729241877256317,
|
109106 |
+
"grad_norm": Infinity,
|
109107 |
+
"learning_rate": 0.00018406354000983642,
|
109108 |
+
"loss": 10.9592,
|
109109 |
+
"step": 155640
|
109110 |
+
},
|
109111 |
+
{
|
109112 |
+
"epoch": 18.730445246690735,
|
109113 |
+
"grad_norm": Infinity,
|
109114 |
+
"learning_rate": 0.0001840614795509747,
|
109115 |
+
"loss": 11.0839,
|
109116 |
+
"step": 155650
|
109117 |
+
},
|
109118 |
+
{
|
109119 |
+
"epoch": 18.73164861612515,
|
109120 |
+
"grad_norm": Infinity,
|
109121 |
+
"learning_rate": 0.00018405941897045523,
|
109122 |
+
"loss": 11.1915,
|
109123 |
+
"step": 155660
|
109124 |
+
},
|
109125 |
+
{
|
109126 |
+
"epoch": 18.732851985559567,
|
109127 |
+
"grad_norm": Infinity,
|
109128 |
+
"learning_rate": 0.00018405735826828107,
|
109129 |
+
"loss": 11.0202,
|
109130 |
+
"step": 155670
|
109131 |
+
},
|
109132 |
+
{
|
109133 |
+
"epoch": 18.734055354993984,
|
109134 |
+
"grad_norm": Infinity,
|
109135 |
+
"learning_rate": 0.0001840552974444551,
|
109136 |
+
"loss": 11.1132,
|
109137 |
+
"step": 155680
|
109138 |
+
},
|
109139 |
+
{
|
109140 |
+
"epoch": 18.735258724428398,
|
109141 |
+
"grad_norm": Infinity,
|
109142 |
+
"learning_rate": 0.0001840532364989804,
|
109143 |
+
"loss": 11.2307,
|
109144 |
+
"step": 155690
|
109145 |
+
},
|
109146 |
+
{
|
109147 |
+
"epoch": 18.736462093862816,
|
109148 |
+
"grad_norm": Infinity,
|
109149 |
+
"learning_rate": 0.0001840511754318599,
|
109150 |
+
"loss": 11.1407,
|
109151 |
+
"step": 155700
|
109152 |
+
},
|
109153 |
+
{
|
109154 |
+
"epoch": 18.737665463297233,
|
109155 |
+
"grad_norm": Infinity,
|
109156 |
+
"learning_rate": 0.00018404911424309658,
|
109157 |
+
"loss": 11.1428,
|
109158 |
+
"step": 155710
|
109159 |
+
},
|
109160 |
+
{
|
109161 |
+
"epoch": 18.738868832731647,
|
109162 |
+
"grad_norm": Infinity,
|
109163 |
+
"learning_rate": 0.00018404705293269347,
|
109164 |
+
"loss": 11.1013,
|
109165 |
+
"step": 155720
|
109166 |
+
},
|
109167 |
+
{
|
109168 |
+
"epoch": 18.740072202166065,
|
109169 |
+
"grad_norm": Infinity,
|
109170 |
+
"learning_rate": 0.00018404499150065351,
|
109171 |
+
"loss": 11.1795,
|
109172 |
+
"step": 155730
|
109173 |
+
},
|
109174 |
+
{
|
109175 |
+
"epoch": 18.741275571600482,
|
109176 |
+
"grad_norm": Infinity,
|
109177 |
+
"learning_rate": 0.0001840429299469797,
|
109178 |
+
"loss": 11.1118,
|
109179 |
+
"step": 155740
|
109180 |
+
},
|
109181 |
+
{
|
109182 |
+
"epoch": 18.742478941034896,
|
109183 |
+
"grad_norm": Infinity,
|
109184 |
+
"learning_rate": 0.00018404086827167502,
|
109185 |
+
"loss": 11.0965,
|
109186 |
+
"step": 155750
|
109187 |
+
},
|
109188 |
+
{
|
109189 |
+
"epoch": 18.743682310469314,
|
109190 |
+
"grad_norm": Infinity,
|
109191 |
+
"learning_rate": 0.00018403880647474249,
|
109192 |
+
"loss": 11.1054,
|
109193 |
+
"step": 155760
|
109194 |
+
},
|
109195 |
+
{
|
109196 |
+
"epoch": 18.74488567990373,
|
109197 |
+
"grad_norm": Infinity,
|
109198 |
+
"learning_rate": 0.00018403674455618501,
|
109199 |
+
"loss": 11.0522,
|
109200 |
+
"step": 155770
|
109201 |
+
},
|
109202 |
+
{
|
109203 |
+
"epoch": 18.746089049338146,
|
109204 |
+
"grad_norm": Infinity,
|
109205 |
+
"learning_rate": 0.00018403468251600562,
|
109206 |
+
"loss": 10.9737,
|
109207 |
+
"step": 155780
|
109208 |
+
},
|
109209 |
+
{
|
109210 |
+
"epoch": 18.747292418772563,
|
109211 |
+
"grad_norm": Infinity,
|
109212 |
+
"learning_rate": 0.0001840326203542073,
|
109213 |
+
"loss": 11.2099,
|
109214 |
+
"step": 155790
|
109215 |
+
},
|
109216 |
+
{
|
109217 |
+
"epoch": 18.74849578820698,
|
109218 |
+
"grad_norm": Infinity,
|
109219 |
+
"learning_rate": 0.00018403055807079307,
|
109220 |
+
"loss": 11.174,
|
109221 |
+
"step": 155800
|
109222 |
+
},
|
109223 |
+
{
|
109224 |
+
"epoch": 18.749699157641395,
|
109225 |
+
"grad_norm": Infinity,
|
109226 |
+
"learning_rate": 0.00018402849566576582,
|
109227 |
+
"loss": 11.0626,
|
109228 |
+
"step": 155810
|
109229 |
+
},
|
109230 |
+
{
|
109231 |
+
"epoch": 18.750902527075812,
|
109232 |
+
"grad_norm": Infinity,
|
109233 |
+
"learning_rate": 0.00018402643313912862,
|
109234 |
+
"loss": 11.1572,
|
109235 |
+
"step": 155820
|
109236 |
+
},
|
109237 |
+
{
|
109238 |
+
"epoch": 18.75210589651023,
|
109239 |
+
"grad_norm": Infinity,
|
109240 |
+
"learning_rate": 0.00018402437049088444,
|
109241 |
+
"loss": 11.0243,
|
109242 |
+
"step": 155830
|
109243 |
+
},
|
109244 |
+
{
|
109245 |
+
"epoch": 18.753309265944644,
|
109246 |
+
"grad_norm": Infinity,
|
109247 |
+
"learning_rate": 0.0001840223077210362,
|
109248 |
+
"loss": 11.0665,
|
109249 |
+
"step": 155840
|
109250 |
+
},
|
109251 |
+
{
|
109252 |
+
"epoch": 18.75451263537906,
|
109253 |
+
"grad_norm": Infinity,
|
109254 |
+
"learning_rate": 0.000184020244829587,
|
109255 |
+
"loss": 11.1575,
|
109256 |
+
"step": 155850
|
109257 |
+
},
|
109258 |
+
{
|
109259 |
+
"epoch": 18.75571600481348,
|
109260 |
+
"grad_norm": Infinity,
|
109261 |
+
"learning_rate": 0.0001840181818165397,
|
109262 |
+
"loss": 11.2205,
|
109263 |
+
"step": 155860
|
109264 |
+
},
|
109265 |
+
{
|
109266 |
+
"epoch": 18.756919374247893,
|
109267 |
+
"grad_norm": Infinity,
|
109268 |
+
"learning_rate": 0.00018401611868189738,
|
109269 |
+
"loss": 11.2051,
|
109270 |
+
"step": 155870
|
109271 |
+
},
|
109272 |
+
{
|
109273 |
+
"epoch": 18.75812274368231,
|
109274 |
+
"grad_norm": Infinity,
|
109275 |
+
"learning_rate": 0.00018401405542566298,
|
109276 |
+
"loss": 11.1057,
|
109277 |
+
"step": 155880
|
109278 |
+
},
|
109279 |
+
{
|
109280 |
+
"epoch": 18.759326113116728,
|
109281 |
+
"grad_norm": Infinity,
|
109282 |
+
"learning_rate": 0.00018401199204783953,
|
109283 |
+
"loss": 11.094,
|
109284 |
+
"step": 155890
|
109285 |
+
},
|
109286 |
+
{
|
109287 |
+
"epoch": 18.760529482551142,
|
109288 |
+
"grad_norm": Infinity,
|
109289 |
+
"learning_rate": 0.00018400992854842997,
|
109290 |
+
"loss": 11.0583,
|
109291 |
+
"step": 155900
|
109292 |
+
},
|
109293 |
+
{
|
109294 |
+
"epoch": 18.76173285198556,
|
109295 |
+
"grad_norm": Infinity,
|
109296 |
+
"learning_rate": 0.0001840078649274373,
|
109297 |
+
"loss": 11.1038,
|
109298 |
+
"step": 155910
|
109299 |
+
},
|
109300 |
+
{
|
109301 |
+
"epoch": 18.762936221419977,
|
109302 |
+
"grad_norm": Infinity,
|
109303 |
+
"learning_rate": 0.0001840058011848645,
|
109304 |
+
"loss": 11.3338,
|
109305 |
+
"step": 155920
|
109306 |
+
},
|
109307 |
+
{
|
109308 |
+
"epoch": 18.76413959085439,
|
109309 |
+
"grad_norm": Infinity,
|
109310 |
+
"learning_rate": 0.00018400373732071457,
|
109311 |
+
"loss": 11.0459,
|
109312 |
+
"step": 155930
|
109313 |
+
},
|
109314 |
+
{
|
109315 |
+
"epoch": 18.76534296028881,
|
109316 |
+
"grad_norm": Infinity,
|
109317 |
+
"learning_rate": 0.00018400167333499048,
|
109318 |
+
"loss": 11.2301,
|
109319 |
+
"step": 155940
|
109320 |
+
},
|
109321 |
+
{
|
109322 |
+
"epoch": 18.766546329723226,
|
109323 |
+
"grad_norm": Infinity,
|
109324 |
+
"learning_rate": 0.00018399960922769527,
|
109325 |
+
"loss": 11.1998,
|
109326 |
+
"step": 155950
|
109327 |
+
},
|
109328 |
+
{
|
109329 |
+
"epoch": 18.76774969915764,
|
109330 |
+
"grad_norm": Infinity,
|
109331 |
+
"learning_rate": 0.00018399754499883185,
|
109332 |
+
"loss": 11.096,
|
109333 |
+
"step": 155960
|
109334 |
+
},
|
109335 |
+
{
|
109336 |
+
"epoch": 18.768953068592058,
|
109337 |
+
"grad_norm": Infinity,
|
109338 |
+
"learning_rate": 0.00018399548064840322,
|
109339 |
+
"loss": 11.1265,
|
109340 |
+
"step": 155970
|
109341 |
+
},
|
109342 |
+
{
|
109343 |
+
"epoch": 18.770156438026476,
|
109344 |
+
"grad_norm": Infinity,
|
109345 |
+
"learning_rate": 0.00018399341617641245,
|
109346 |
+
"loss": 11.1074,
|
109347 |
+
"step": 155980
|
109348 |
+
},
|
109349 |
+
{
|
109350 |
+
"epoch": 18.77135980746089,
|
109351 |
+
"grad_norm": Infinity,
|
109352 |
+
"learning_rate": 0.00018399135158286245,
|
109353 |
+
"loss": 11.1348,
|
109354 |
+
"step": 155990
|
109355 |
+
},
|
109356 |
+
{
|
109357 |
+
"epoch": 18.772563176895307,
|
109358 |
+
"grad_norm": Infinity,
|
109359 |
+
"learning_rate": 0.0001839892868677562,
|
109360 |
+
"loss": 11.0085,
|
109361 |
+
"step": 156000
|
109362 |
+
},
|
109363 |
+
{
|
109364 |
+
"epoch": 18.773766546329725,
|
109365 |
+
"grad_norm": Infinity,
|
109366 |
+
"learning_rate": 0.00018398722203109675,
|
109367 |
+
"loss": 11.0944,
|
109368 |
+
"step": 156010
|
109369 |
+
},
|
109370 |
+
{
|
109371 |
+
"epoch": 18.77496991576414,
|
109372 |
+
"grad_norm": Infinity,
|
109373 |
+
"learning_rate": 0.00018398515707288704,
|
109374 |
+
"loss": 11.1064,
|
109375 |
+
"step": 156020
|
109376 |
+
},
|
109377 |
+
{
|
109378 |
+
"epoch": 18.776173285198556,
|
109379 |
+
"grad_norm": Infinity,
|
109380 |
+
"learning_rate": 0.00018398309199313008,
|
109381 |
+
"loss": 11.202,
|
109382 |
+
"step": 156030
|
109383 |
+
},
|
109384 |
+
{
|
109385 |
+
"epoch": 18.777376654632974,
|
109386 |
+
"grad_norm": Infinity,
|
109387 |
+
"learning_rate": 0.00018398102679182885,
|
109388 |
+
"loss": 11.0866,
|
109389 |
+
"step": 156040
|
109390 |
+
},
|
109391 |
+
{
|
109392 |
+
"epoch": 18.778580024067388,
|
109393 |
+
"grad_norm": Infinity,
|
109394 |
+
"learning_rate": 0.00018397896146898633,
|
109395 |
+
"loss": 11.1692,
|
109396 |
+
"step": 156050
|
109397 |
+
},
|
109398 |
+
{
|
109399 |
+
"epoch": 18.779783393501805,
|
109400 |
+
"grad_norm": Infinity,
|
109401 |
+
"learning_rate": 0.0001839768960246055,
|
109402 |
+
"loss": 11.1189,
|
109403 |
+
"step": 156060
|
109404 |
+
},
|
109405 |
+
{
|
109406 |
+
"epoch": 18.780986762936223,
|
109407 |
+
"grad_norm": Infinity,
|
109408 |
+
"learning_rate": 0.00018397483045868942,
|
109409 |
+
"loss": 11.1767,
|
109410 |
+
"step": 156070
|
109411 |
+
},
|
109412 |
+
{
|
109413 |
+
"epoch": 18.782190132370637,
|
109414 |
+
"grad_norm": Infinity,
|
109415 |
+
"learning_rate": 0.00018397276477124098,
|
109416 |
+
"loss": 11.1687,
|
109417 |
+
"step": 156080
|
109418 |
+
},
|
109419 |
+
{
|
109420 |
+
"epoch": 18.783393501805055,
|
109421 |
+
"grad_norm": Infinity,
|
109422 |
+
"learning_rate": 0.00018397069896226325,
|
109423 |
+
"loss": 11.1573,
|
109424 |
+
"step": 156090
|
109425 |
+
},
|
109426 |
+
{
|
109427 |
+
"epoch": 18.784596871239472,
|
109428 |
+
"grad_norm": Infinity,
|
109429 |
+
"learning_rate": 0.00018396863303175918,
|
109430 |
+
"loss": 11.0969,
|
109431 |
+
"step": 156100
|
109432 |
+
},
|
109433 |
+
{
|
109434 |
+
"epoch": 18.785800240673886,
|
109435 |
+
"grad_norm": Infinity,
|
109436 |
+
"learning_rate": 0.00018396656697973175,
|
109437 |
+
"loss": 11.0836,
|
109438 |
+
"step": 156110
|
109439 |
+
},
|
109440 |
+
{
|
109441 |
+
"epoch": 18.787003610108304,
|
109442 |
+
"grad_norm": Infinity,
|
109443 |
+
"learning_rate": 0.00018396450080618396,
|
109444 |
+
"loss": 11.1231,
|
109445 |
+
"step": 156120
|
109446 |
+
},
|
109447 |
+
{
|
109448 |
+
"epoch": 18.78820697954272,
|
109449 |
+
"grad_norm": Infinity,
|
109450 |
+
"learning_rate": 0.00018396243451111884,
|
109451 |
+
"loss": 11.0268,
|
109452 |
+
"step": 156130
|
109453 |
+
},
|
109454 |
+
{
|
109455 |
+
"epoch": 18.789410348977135,
|
109456 |
+
"grad_norm": Infinity,
|
109457 |
+
"learning_rate": 0.00018396036809453933,
|
109458 |
+
"loss": 11.2009,
|
109459 |
+
"step": 156140
|
109460 |
+
},
|
109461 |
+
{
|
109462 |
+
"epoch": 18.790613718411553,
|
109463 |
+
"grad_norm": Infinity,
|
109464 |
+
"learning_rate": 0.00018395830155644845,
|
109465 |
+
"loss": 11.106,
|
109466 |
+
"step": 156150
|
109467 |
+
},
|
109468 |
+
{
|
109469 |
+
"epoch": 18.79181708784597,
|
109470 |
+
"grad_norm": Infinity,
|
109471 |
+
"learning_rate": 0.00018395623489684916,
|
109472 |
+
"loss": 11.1531,
|
109473 |
+
"step": 156160
|
109474 |
+
},
|
109475 |
+
{
|
109476 |
+
"epoch": 18.793020457280385,
|
109477 |
+
"grad_norm": Infinity,
|
109478 |
+
"learning_rate": 0.00018395416811574448,
|
109479 |
+
"loss": 11.1239,
|
109480 |
+
"step": 156170
|
109481 |
+
},
|
109482 |
+
{
|
109483 |
+
"epoch": 18.794223826714802,
|
109484 |
+
"grad_norm": Infinity,
|
109485 |
+
"learning_rate": 0.0001839521012131374,
|
109486 |
+
"loss": 11.0848,
|
109487 |
+
"step": 156180
|
109488 |
+
},
|
109489 |
+
{
|
109490 |
+
"epoch": 18.79542719614922,
|
109491 |
+
"grad_norm": Infinity,
|
109492 |
+
"learning_rate": 0.00018395003418903086,
|
109493 |
+
"loss": 11.0738,
|
109494 |
+
"step": 156190
|
109495 |
+
},
|
109496 |
+
{
|
109497 |
+
"epoch": 18.796630565583634,
|
109498 |
+
"grad_norm": Infinity,
|
109499 |
+
"learning_rate": 0.00018394796704342795,
|
109500 |
+
"loss": 11.1085,
|
109501 |
+
"step": 156200
|
109502 |
+
},
|
109503 |
+
{
|
109504 |
+
"epoch": 18.79783393501805,
|
109505 |
+
"grad_norm": Infinity,
|
109506 |
+
"learning_rate": 0.0001839458997763316,
|
109507 |
+
"loss": 11.1598,
|
109508 |
+
"step": 156210
|
109509 |
+
},
|
109510 |
+
{
|
109511 |
+
"epoch": 18.799037304452465,
|
109512 |
+
"grad_norm": Infinity,
|
109513 |
+
"learning_rate": 0.00018394383238774477,
|
109514 |
+
"loss": 11.1524,
|
109515 |
+
"step": 156220
|
109516 |
+
},
|
109517 |
+
{
|
109518 |
+
"epoch": 18.800240673886883,
|
109519 |
+
"grad_norm": Infinity,
|
109520 |
+
"learning_rate": 0.00018394176487767052,
|
109521 |
+
"loss": 11.0619,
|
109522 |
+
"step": 156230
|
109523 |
+
},
|
109524 |
+
{
|
109525 |
+
"epoch": 18.8014440433213,
|
109526 |
+
"grad_norm": Infinity,
|
109527 |
+
"learning_rate": 0.0001839396972461118,
|
109528 |
+
"loss": 11.0887,
|
109529 |
+
"step": 156240
|
109530 |
+
},
|
109531 |
+
{
|
109532 |
+
"epoch": 18.802647412755714,
|
109533 |
+
"grad_norm": Infinity,
|
109534 |
+
"learning_rate": 0.0001839376294930716,
|
109535 |
+
"loss": 11.1339,
|
109536 |
+
"step": 156250
|
109537 |
+
},
|
109538 |
+
{
|
109539 |
+
"epoch": 18.803850782190132,
|
109540 |
+
"grad_norm": Infinity,
|
109541 |
+
"learning_rate": 0.00018393556161855297,
|
109542 |
+
"loss": 11.2568,
|
109543 |
+
"step": 156260
|
109544 |
+
},
|
109545 |
+
{
|
109546 |
+
"epoch": 18.80505415162455,
|
109547 |
+
"grad_norm": Infinity,
|
109548 |
+
"learning_rate": 0.00018393349362255882,
|
109549 |
+
"loss": 11.2225,
|
109550 |
+
"step": 156270
|
109551 |
+
},
|
109552 |
+
{
|
109553 |
+
"epoch": 18.806257521058964,
|
109554 |
+
"grad_norm": Infinity,
|
109555 |
+
"learning_rate": 0.0001839314255050922,
|
109556 |
+
"loss": 11.1804,
|
109557 |
+
"step": 156280
|
109558 |
+
},
|
109559 |
+
{
|
109560 |
+
"epoch": 18.80746089049338,
|
109561 |
+
"grad_norm": Infinity,
|
109562 |
+
"learning_rate": 0.00018392935726615608,
|
109563 |
+
"loss": 11.0418,
|
109564 |
+
"step": 156290
|
109565 |
+
},
|
109566 |
+
{
|
109567 |
+
"epoch": 18.8086642599278,
|
109568 |
+
"grad_norm": Infinity,
|
109569 |
+
"learning_rate": 0.00018392728890575345,
|
109570 |
+
"loss": 11.169,
|
109571 |
+
"step": 156300
|
109572 |
+
},
|
109573 |
+
{
|
109574 |
+
"epoch": 18.809867629362213,
|
109575 |
+
"grad_norm": Infinity,
|
109576 |
+
"learning_rate": 0.00018392522042388733,
|
109577 |
+
"loss": 11.1228,
|
109578 |
+
"step": 156310
|
109579 |
+
},
|
109580 |
+
{
|
109581 |
+
"epoch": 18.81107099879663,
|
109582 |
+
"grad_norm": Infinity,
|
109583 |
+
"learning_rate": 0.0001839231518205607,
|
109584 |
+
"loss": 11.2595,
|
109585 |
+
"step": 156320
|
109586 |
+
},
|
109587 |
+
{
|
109588 |
+
"epoch": 18.812274368231048,
|
109589 |
+
"grad_norm": Infinity,
|
109590 |
+
"learning_rate": 0.00018392108309577652,
|
109591 |
+
"loss": 11.2344,
|
109592 |
+
"step": 156330
|
109593 |
+
},
|
109594 |
+
{
|
109595 |
+
"epoch": 18.813477737665462,
|
109596 |
+
"grad_norm": Infinity,
|
109597 |
+
"learning_rate": 0.0001839190142495378,
|
109598 |
+
"loss": 11.0632,
|
109599 |
+
"step": 156340
|
109600 |
+
},
|
109601 |
+
{
|
109602 |
+
"epoch": 18.81468110709988,
|
109603 |
+
"grad_norm": Infinity,
|
109604 |
+
"learning_rate": 0.00018391694528184757,
|
109605 |
+
"loss": 11.1457,
|
109606 |
+
"step": 156350
|
109607 |
+
},
|
109608 |
+
{
|
109609 |
+
"epoch": 18.815884476534297,
|
109610 |
+
"grad_norm": Infinity,
|
109611 |
+
"learning_rate": 0.00018391487619270883,
|
109612 |
+
"loss": 11.2568,
|
109613 |
+
"step": 156360
|
109614 |
+
},
|
109615 |
+
{
|
109616 |
+
"epoch": 18.81708784596871,
|
109617 |
+
"grad_norm": Infinity,
|
109618 |
+
"learning_rate": 0.0001839128069821245,
|
109619 |
+
"loss": 11.1566,
|
109620 |
+
"step": 156370
|
109621 |
+
},
|
109622 |
+
{
|
109623 |
+
"epoch": 18.81829121540313,
|
109624 |
+
"grad_norm": Infinity,
|
109625 |
+
"learning_rate": 0.00018391073765009765,
|
109626 |
+
"loss": 11.3208,
|
109627 |
+
"step": 156380
|
109628 |
+
},
|
109629 |
+
{
|
109630 |
+
"epoch": 18.819494584837546,
|
109631 |
+
"grad_norm": Infinity,
|
109632 |
+
"learning_rate": 0.00018390866819663123,
|
109633 |
+
"loss": 11.101,
|
109634 |
+
"step": 156390
|
109635 |
+
},
|
109636 |
+
{
|
109637 |
+
"epoch": 18.82069795427196,
|
109638 |
+
"grad_norm": Infinity,
|
109639 |
+
"learning_rate": 0.0001839065986217283,
|
109640 |
+
"loss": 11.1122,
|
109641 |
+
"step": 156400
|
109642 |
+
},
|
109643 |
+
{
|
109644 |
+
"epoch": 18.821901323706378,
|
109645 |
+
"grad_norm": Infinity,
|
109646 |
+
"learning_rate": 0.00018390452892539175,
|
109647 |
+
"loss": 11.1221,
|
109648 |
+
"step": 156410
|
109649 |
+
},
|
109650 |
+
{
|
109651 |
+
"epoch": 18.823104693140795,
|
109652 |
+
"grad_norm": Infinity,
|
109653 |
+
"learning_rate": 0.00018390245910762464,
|
109654 |
+
"loss": 11.084,
|
109655 |
+
"step": 156420
|
109656 |
+
},
|
109657 |
+
{
|
109658 |
+
"epoch": 18.82430806257521,
|
109659 |
+
"grad_norm": Infinity,
|
109660 |
+
"learning_rate": 0.00018390038916842998,
|
109661 |
+
"loss": 11.1787,
|
109662 |
+
"step": 156430
|
109663 |
+
},
|
109664 |
+
{
|
109665 |
+
"epoch": 18.825511432009627,
|
109666 |
+
"grad_norm": Infinity,
|
109667 |
+
"learning_rate": 0.00018389831910781073,
|
109668 |
+
"loss": 11.1843,
|
109669 |
+
"step": 156440
|
109670 |
+
},
|
109671 |
+
{
|
109672 |
+
"epoch": 18.826714801444044,
|
109673 |
+
"grad_norm": Infinity,
|
109674 |
+
"learning_rate": 0.00018389624892576988,
|
109675 |
+
"loss": 11.1098,
|
109676 |
+
"step": 156450
|
109677 |
+
},
|
109678 |
+
{
|
109679 |
+
"epoch": 18.82791817087846,
|
109680 |
+
"grad_norm": Infinity,
|
109681 |
+
"learning_rate": 0.00018389417862231048,
|
109682 |
+
"loss": 11.1299,
|
109683 |
+
"step": 156460
|
109684 |
+
},
|
109685 |
+
{
|
109686 |
+
"epoch": 18.829121540312876,
|
109687 |
+
"grad_norm": Infinity,
|
109688 |
+
"learning_rate": 0.00018389210819743545,
|
109689 |
+
"loss": 11.0784,
|
109690 |
+
"step": 156470
|
109691 |
+
},
|
109692 |
+
{
|
109693 |
+
"epoch": 18.830324909747294,
|
109694 |
+
"grad_norm": Infinity,
|
109695 |
+
"learning_rate": 0.00018389003765114785,
|
109696 |
+
"loss": 11.1469,
|
109697 |
+
"step": 156480
|
109698 |
+
},
|
109699 |
+
{
|
109700 |
+
"epoch": 18.831528279181708,
|
109701 |
+
"grad_norm": Infinity,
|
109702 |
+
"learning_rate": 0.00018388796698345065,
|
109703 |
+
"loss": 11.1581,
|
109704 |
+
"step": 156490
|
109705 |
+
},
|
109706 |
+
{
|
109707 |
+
"epoch": 18.832731648616125,
|
109708 |
+
"grad_norm": Infinity,
|
109709 |
+
"learning_rate": 0.00018388589619434687,
|
109710 |
+
"loss": 11.0997,
|
109711 |
+
"step": 156500
|
109712 |
+
},
|
109713 |
+
{
|
109714 |
+
"epoch": 18.833935018050543,
|
109715 |
+
"grad_norm": Infinity,
|
109716 |
+
"learning_rate": 0.00018388382528383948,
|
109717 |
+
"loss": 11.1977,
|
109718 |
+
"step": 156510
|
109719 |
+
},
|
109720 |
+
{
|
109721 |
+
"epoch": 18.835138387484957,
|
109722 |
+
"grad_norm": Infinity,
|
109723 |
+
"learning_rate": 0.00018388175425193147,
|
109724 |
+
"loss": 11.1397,
|
109725 |
+
"step": 156520
|
109726 |
+
},
|
109727 |
+
{
|
109728 |
+
"epoch": 18.836341756919374,
|
109729 |
+
"grad_norm": Infinity,
|
109730 |
+
"learning_rate": 0.00018387968309862583,
|
109731 |
+
"loss": 11.3134,
|
109732 |
+
"step": 156530
|
109733 |
+
},
|
109734 |
+
{
|
109735 |
+
"epoch": 18.837545126353792,
|
109736 |
+
"grad_norm": Infinity,
|
109737 |
+
"learning_rate": 0.0001838776118239256,
|
109738 |
+
"loss": 11.0607,
|
109739 |
+
"step": 156540
|
109740 |
+
},
|
109741 |
+
{
|
109742 |
+
"epoch": 18.838748495788206,
|
109743 |
+
"grad_norm": Infinity,
|
109744 |
+
"learning_rate": 0.00018387554042783376,
|
109745 |
+
"loss": 11.0607,
|
109746 |
+
"step": 156550
|
109747 |
+
},
|
109748 |
+
{
|
109749 |
+
"epoch": 18.839951865222623,
|
109750 |
+
"grad_norm": Infinity,
|
109751 |
+
"learning_rate": 0.00018387346891035332,
|
109752 |
+
"loss": 11.1197,
|
109753 |
+
"step": 156560
|
109754 |
+
},
|
109755 |
+
{
|
109756 |
+
"epoch": 18.84115523465704,
|
109757 |
+
"grad_norm": Infinity,
|
109758 |
+
"learning_rate": 0.00018387139727148725,
|
109759 |
+
"loss": 11.1981,
|
109760 |
+
"step": 156570
|
109761 |
+
},
|
109762 |
+
{
|
109763 |
+
"epoch": 18.842358604091455,
|
109764 |
+
"grad_norm": Infinity,
|
109765 |
+
"learning_rate": 0.00018386932551123857,
|
109766 |
+
"loss": 11.165,
|
109767 |
+
"step": 156580
|
109768 |
+
},
|
109769 |
+
{
|
109770 |
+
"epoch": 18.843561973525873,
|
109771 |
+
"grad_norm": Infinity,
|
109772 |
+
"learning_rate": 0.00018386725362961025,
|
109773 |
+
"loss": 11.1773,
|
109774 |
+
"step": 156590
|
109775 |
+
},
|
109776 |
+
{
|
109777 |
+
"epoch": 18.84476534296029,
|
109778 |
+
"grad_norm": Infinity,
|
109779 |
+
"learning_rate": 0.00018386518162660532,
|
109780 |
+
"loss": 11.1637,
|
109781 |
+
"step": 156600
|
109782 |
+
},
|
109783 |
+
{
|
109784 |
+
"epoch": 18.845968712394704,
|
109785 |
+
"grad_norm": Infinity,
|
109786 |
+
"learning_rate": 0.00018386310950222677,
|
109787 |
+
"loss": 11.1841,
|
109788 |
+
"step": 156610
|
109789 |
+
},
|
109790 |
+
{
|
109791 |
+
"epoch": 18.84717208182912,
|
109792 |
+
"grad_norm": Infinity,
|
109793 |
+
"learning_rate": 0.0001838610372564776,
|
109794 |
+
"loss": 11.2402,
|
109795 |
+
"step": 156620
|
109796 |
+
},
|
109797 |
+
{
|
109798 |
+
"epoch": 18.84837545126354,
|
109799 |
+
"grad_norm": Infinity,
|
109800 |
+
"learning_rate": 0.00018385896488936075,
|
109801 |
+
"loss": 11.1267,
|
109802 |
+
"step": 156630
|
109803 |
+
},
|
109804 |
+
{
|
109805 |
+
"epoch": 18.849578820697953,
|
109806 |
+
"grad_norm": Infinity,
|
109807 |
+
"learning_rate": 0.00018385689240087936,
|
109808 |
+
"loss": 11.2748,
|
109809 |
+
"step": 156640
|
109810 |
+
},
|
109811 |
+
{
|
109812 |
+
"epoch": 18.85078219013237,
|
109813 |
+
"grad_norm": Infinity,
|
109814 |
+
"learning_rate": 0.00018385481979103628,
|
109815 |
+
"loss": 11.2116,
|
109816 |
+
"step": 156650
|
109817 |
+
},
|
109818 |
+
{
|
109819 |
+
"epoch": 18.85198555956679,
|
109820 |
+
"grad_norm": Infinity,
|
109821 |
+
"learning_rate": 0.0001838527470598346,
|
109822 |
+
"loss": 11.1882,
|
109823 |
+
"step": 156660
|
109824 |
+
},
|
109825 |
+
{
|
109826 |
+
"epoch": 18.853188929001202,
|
109827 |
+
"grad_norm": Infinity,
|
109828 |
+
"learning_rate": 0.0001838506742072773,
|
109829 |
+
"loss": 11.1638,
|
109830 |
+
"step": 156670
|
109831 |
+
},
|
109832 |
+
{
|
109833 |
+
"epoch": 18.85439229843562,
|
109834 |
+
"grad_norm": Infinity,
|
109835 |
+
"learning_rate": 0.00018384860123336734,
|
109836 |
+
"loss": 11.208,
|
109837 |
+
"step": 156680
|
109838 |
+
},
|
109839 |
+
{
|
109840 |
+
"epoch": 18.855595667870038,
|
109841 |
+
"grad_norm": Infinity,
|
109842 |
+
"learning_rate": 0.00018384652813810778,
|
109843 |
+
"loss": 11.1221,
|
109844 |
+
"step": 156690
|
109845 |
+
},
|
109846 |
+
{
|
109847 |
+
"epoch": 18.85679903730445,
|
109848 |
+
"grad_norm": Infinity,
|
109849 |
+
"learning_rate": 0.00018384445492150158,
|
109850 |
+
"loss": 11.0525,
|
109851 |
+
"step": 156700
|
109852 |
+
},
|
109853 |
+
{
|
109854 |
+
"epoch": 18.85800240673887,
|
109855 |
+
"grad_norm": Infinity,
|
109856 |
+
"learning_rate": 0.00018384238158355176,
|
109857 |
+
"loss": 11.1971,
|
109858 |
+
"step": 156710
|
109859 |
+
},
|
109860 |
+
{
|
109861 |
+
"epoch": 18.859205776173287,
|
109862 |
+
"grad_norm": Infinity,
|
109863 |
+
"learning_rate": 0.00018384030812426132,
|
109864 |
+
"loss": 11.1792,
|
109865 |
+
"step": 156720
|
109866 |
+
},
|
109867 |
+
{
|
109868 |
+
"epoch": 18.8604091456077,
|
109869 |
+
"grad_norm": Infinity,
|
109870 |
+
"learning_rate": 0.00018383823454363324,
|
109871 |
+
"loss": 11.1946,
|
109872 |
+
"step": 156730
|
109873 |
+
},
|
109874 |
+
{
|
109875 |
+
"epoch": 18.86161251504212,
|
109876 |
+
"grad_norm": Infinity,
|
109877 |
+
"learning_rate": 0.00018383616084167054,
|
109878 |
+
"loss": 11.064,
|
109879 |
+
"step": 156740
|
109880 |
+
},
|
109881 |
+
{
|
109882 |
+
"epoch": 18.862815884476536,
|
109883 |
+
"grad_norm": Infinity,
|
109884 |
+
"learning_rate": 0.00018383408701837622,
|
109885 |
+
"loss": 11.0675,
|
109886 |
+
"step": 156750
|
109887 |
+
},
|
109888 |
+
{
|
109889 |
+
"epoch": 18.86401925391095,
|
109890 |
+
"grad_norm": Infinity,
|
109891 |
+
"learning_rate": 0.00018383201307375327,
|
109892 |
+
"loss": 11.1382,
|
109893 |
+
"step": 156760
|
109894 |
+
},
|
109895 |
+
{
|
109896 |
+
"epoch": 18.865222623345367,
|
109897 |
+
"grad_norm": Infinity,
|
109898 |
+
"learning_rate": 0.0001838299390078047,
|
109899 |
+
"loss": 11.054,
|
109900 |
+
"step": 156770
|
109901 |
+
},
|
109902 |
+
{
|
109903 |
+
"epoch": 18.866425992779785,
|
109904 |
+
"grad_norm": Infinity,
|
109905 |
+
"learning_rate": 0.0001838278648205335,
|
109906 |
+
"loss": 11.105,
|
109907 |
+
"step": 156780
|
109908 |
+
},
|
109909 |
+
{
|
109910 |
+
"epoch": 18.8676293622142,
|
109911 |
+
"grad_norm": Infinity,
|
109912 |
+
"learning_rate": 0.00018382579051194273,
|
109913 |
+
"loss": 11.1738,
|
109914 |
+
"step": 156790
|
109915 |
+
},
|
109916 |
+
{
|
109917 |
+
"epoch": 18.868832731648617,
|
109918 |
+
"grad_norm": Infinity,
|
109919 |
+
"learning_rate": 0.0001838237160820353,
|
109920 |
+
"loss": 11.0556,
|
109921 |
+
"step": 156800
|
109922 |
+
},
|
109923 |
+
{
|
109924 |
+
"epoch": 18.870036101083034,
|
109925 |
+
"grad_norm": Infinity,
|
109926 |
+
"learning_rate": 0.00018382164153081426,
|
109927 |
+
"loss": 11.1819,
|
109928 |
+
"step": 156810
|
109929 |
+
},
|
109930 |
+
{
|
109931 |
+
"epoch": 18.871239470517448,
|
109932 |
+
"grad_norm": Infinity,
|
109933 |
+
"learning_rate": 0.00018381956685828264,
|
109934 |
+
"loss": 11.1954,
|
109935 |
+
"step": 156820
|
109936 |
+
},
|
109937 |
+
{
|
109938 |
+
"epoch": 18.872442839951866,
|
109939 |
+
"grad_norm": Infinity,
|
109940 |
+
"learning_rate": 0.0001838174920644434,
|
109941 |
+
"loss": 11.17,
|
109942 |
+
"step": 156830
|
109943 |
+
},
|
109944 |
+
{
|
109945 |
+
"epoch": 18.87364620938628,
|
109946 |
+
"grad_norm": Infinity,
|
109947 |
+
"learning_rate": 0.00018381541714929955,
|
109948 |
+
"loss": 11.2456,
|
109949 |
+
"step": 156840
|
109950 |
+
},
|
109951 |
+
{
|
109952 |
+
"epoch": 18.874849578820697,
|
109953 |
+
"grad_norm": Infinity,
|
109954 |
+
"learning_rate": 0.00018381334211285412,
|
109955 |
+
"loss": 11.1573,
|
109956 |
+
"step": 156850
|
109957 |
+
},
|
109958 |
+
{
|
109959 |
+
"epoch": 18.876052948255115,
|
109960 |
+
"grad_norm": Infinity,
|
109961 |
+
"learning_rate": 0.00018381126695511004,
|
109962 |
+
"loss": 11.1414,
|
109963 |
+
"step": 156860
|
109964 |
+
},
|
109965 |
+
{
|
109966 |
+
"epoch": 18.87725631768953,
|
109967 |
+
"grad_norm": Infinity,
|
109968 |
+
"learning_rate": 0.0001838091916760704,
|
109969 |
+
"loss": 11.2085,
|
109970 |
+
"step": 156870
|
109971 |
+
},
|
109972 |
+
{
|
109973 |
+
"epoch": 18.878459687123947,
|
109974 |
+
"grad_norm": Infinity,
|
109975 |
+
"learning_rate": 0.00018380711627573817,
|
109976 |
+
"loss": 11.1137,
|
109977 |
+
"step": 156880
|
109978 |
+
},
|
109979 |
+
{
|
109980 |
+
"epoch": 18.879663056558364,
|
109981 |
+
"grad_norm": Infinity,
|
109982 |
+
"learning_rate": 0.00018380504075411633,
|
109983 |
+
"loss": 11.1565,
|
109984 |
+
"step": 156890
|
109985 |
+
},
|
109986 |
+
{
|
109987 |
+
"epoch": 18.880866425992778,
|
109988 |
+
"grad_norm": Infinity,
|
109989 |
+
"learning_rate": 0.0001838029651112079,
|
109990 |
+
"loss": 11.1592,
|
109991 |
+
"step": 156900
|
109992 |
+
},
|
109993 |
+
{
|
109994 |
+
"epoch": 18.882069795427196,
|
109995 |
+
"grad_norm": Infinity,
|
109996 |
+
"learning_rate": 0.00018380088934701592,
|
109997 |
+
"loss": 11.0634,
|
109998 |
+
"step": 156910
|
109999 |
+
},
|
110000 |
+
{
|
110001 |
+
"epoch": 18.883273164861613,
|
110002 |
+
"grad_norm": Infinity,
|
110003 |
+
"learning_rate": 0.00018379881346154337,
|
110004 |
+
"loss": 11.1236,
|
110005 |
+
"step": 156920
|
110006 |
+
},
|
110007 |
+
{
|
110008 |
+
"epoch": 18.884476534296027,
|
110009 |
+
"grad_norm": Infinity,
|
110010 |
+
"learning_rate": 0.0001837967374547932,
|
110011 |
+
"loss": 11.0946,
|
110012 |
+
"step": 156930
|
110013 |
+
},
|
110014 |
+
{
|
110015 |
+
"epoch": 18.885679903730445,
|
110016 |
+
"grad_norm": Infinity,
|
110017 |
+
"learning_rate": 0.00018379466132676853,
|
110018 |
+
"loss": 11.1062,
|
110019 |
+
"step": 156940
|
110020 |
+
},
|
110021 |
+
{
|
110022 |
+
"epoch": 18.886883273164862,
|
110023 |
+
"grad_norm": Infinity,
|
110024 |
+
"learning_rate": 0.00018379258507747223,
|
110025 |
+
"loss": 11.0293,
|
110026 |
+
"step": 156950
|
110027 |
+
},
|
110028 |
+
{
|
110029 |
+
"epoch": 18.888086642599276,
|
110030 |
+
"grad_norm": Infinity,
|
110031 |
+
"learning_rate": 0.00018379050870690738,
|
110032 |
+
"loss": 11.0878,
|
110033 |
+
"step": 156960
|
110034 |
+
},
|
110035 |
+
{
|
110036 |
+
"epoch": 18.889290012033694,
|
110037 |
+
"grad_norm": Infinity,
|
110038 |
+
"learning_rate": 0.000183788432215077,
|
110039 |
+
"loss": 10.9764,
|
110040 |
+
"step": 156970
|
110041 |
+
},
|
110042 |
+
{
|
110043 |
+
"epoch": 18.89049338146811,
|
110044 |
+
"grad_norm": Infinity,
|
110045 |
+
"learning_rate": 0.0001837863556019841,
|
110046 |
+
"loss": 11.0772,
|
110047 |
+
"step": 156980
|
110048 |
+
},
|
110049 |
+
{
|
110050 |
+
"epoch": 18.891696750902526,
|
110051 |
+
"grad_norm": Infinity,
|
110052 |
+
"learning_rate": 0.0001837842788676316,
|
110053 |
+
"loss": 11.1573,
|
110054 |
+
"step": 156990
|
110055 |
+
},
|
110056 |
+
{
|
110057 |
+
"epoch": 18.892900120336943,
|
110058 |
+
"grad_norm": Infinity,
|
110059 |
+
"learning_rate": 0.0001837822020120226,
|
110060 |
+
"loss": 11.0387,
|
110061 |
+
"step": 157000
|
110062 |
+
},
|
110063 |
+
{
|
110064 |
+
"epoch": 18.89410348977136,
|
110065 |
+
"grad_norm": Infinity,
|
110066 |
+
"learning_rate": 0.00018378012503516007,
|
110067 |
+
"loss": 11.1678,
|
110068 |
+
"step": 157010
|
110069 |
+
},
|
110070 |
+
{
|
110071 |
+
"epoch": 18.895306859205775,
|
110072 |
+
"grad_norm": Infinity,
|
110073 |
+
"learning_rate": 0.000183778047937047,
|
110074 |
+
"loss": 11.1658,
|
110075 |
+
"step": 157020
|
110076 |
+
},
|
110077 |
+
{
|
110078 |
+
"epoch": 18.896510228640192,
|
110079 |
+
"grad_norm": Infinity,
|
110080 |
+
"learning_rate": 0.0001837759707176864,
|
110081 |
+
"loss": 11.1512,
|
110082 |
+
"step": 157030
|
110083 |
+
},
|
110084 |
+
{
|
110085 |
+
"epoch": 18.89771359807461,
|
110086 |
+
"grad_norm": Infinity,
|
110087 |
+
"learning_rate": 0.0001837738933770813,
|
110088 |
+
"loss": 11.0356,
|
110089 |
+
"step": 157040
|
110090 |
+
},
|
110091 |
+
{
|
110092 |
+
"epoch": 18.898916967509024,
|
110093 |
+
"grad_norm": Infinity,
|
110094 |
+
"learning_rate": 0.00018377181591523468,
|
110095 |
+
"loss": 11.2211,
|
110096 |
+
"step": 157050
|
110097 |
+
},
|
110098 |
+
{
|
110099 |
+
"epoch": 18.90012033694344,
|
110100 |
+
"grad_norm": Infinity,
|
110101 |
+
"learning_rate": 0.0001837697383321496,
|
110102 |
+
"loss": 11.0747,
|
110103 |
+
"step": 157060
|
110104 |
+
},
|
110105 |
+
{
|
110106 |
+
"epoch": 18.90132370637786,
|
110107 |
+
"grad_norm": Infinity,
|
110108 |
+
"learning_rate": 0.00018376766062782896,
|
110109 |
+
"loss": 11.206,
|
110110 |
+
"step": 157070
|
110111 |
+
},
|
110112 |
+
{
|
110113 |
+
"epoch": 18.902527075812273,
|
110114 |
+
"grad_norm": Infinity,
|
110115 |
+
"learning_rate": 0.00018376558280227588,
|
110116 |
+
"loss": 11.1095,
|
110117 |
+
"step": 157080
|
110118 |
+
},
|
110119 |
+
{
|
110120 |
+
"epoch": 18.90373044524669,
|
110121 |
+
"grad_norm": Infinity,
|
110122 |
+
"learning_rate": 0.00018376350485549333,
|
110123 |
+
"loss": 11.1608,
|
110124 |
+
"step": 157090
|
110125 |
+
},
|
110126 |
+
{
|
110127 |
+
"epoch": 18.904933814681108,
|
110128 |
+
"grad_norm": Infinity,
|
110129 |
+
"learning_rate": 0.00018376142678748426,
|
110130 |
+
"loss": 11.0815,
|
110131 |
+
"step": 157100
|
110132 |
+
},
|
110133 |
+
{
|
110134 |
+
"epoch": 18.906137184115522,
|
110135 |
+
"grad_norm": Infinity,
|
110136 |
+
"learning_rate": 0.00018375934859825177,
|
110137 |
+
"loss": 11.0471,
|
110138 |
+
"step": 157110
|
110139 |
+
},
|
110140 |
+
{
|
110141 |
+
"epoch": 18.90734055354994,
|
110142 |
+
"grad_norm": Infinity,
|
110143 |
+
"learning_rate": 0.0001837572702877988,
|
110144 |
+
"loss": 11.2041,
|
110145 |
+
"step": 157120
|
110146 |
+
},
|
110147 |
+
{
|
110148 |
+
"epoch": 18.908543922984357,
|
110149 |
+
"grad_norm": Infinity,
|
110150 |
+
"learning_rate": 0.0001837551918561284,
|
110151 |
+
"loss": 11.1298,
|
110152 |
+
"step": 157130
|
110153 |
+
},
|
110154 |
+
{
|
110155 |
+
"epoch": 18.90974729241877,
|
110156 |
+
"grad_norm": Infinity,
|
110157 |
+
"learning_rate": 0.00018375311330324356,
|
110158 |
+
"loss": 11.1328,
|
110159 |
+
"step": 157140
|
110160 |
+
},
|
110161 |
+
{
|
110162 |
+
"epoch": 18.91095066185319,
|
110163 |
+
"grad_norm": Infinity,
|
110164 |
+
"learning_rate": 0.00018375103462914727,
|
110165 |
+
"loss": 11.069,
|
110166 |
+
"step": 157150
|
110167 |
+
},
|
110168 |
+
{
|
110169 |
+
"epoch": 18.912154031287606,
|
110170 |
+
"grad_norm": Infinity,
|
110171 |
+
"learning_rate": 0.00018374895583384254,
|
110172 |
+
"loss": 11.1397,
|
110173 |
+
"step": 157160
|
110174 |
+
},
|
110175 |
+
{
|
110176 |
+
"epoch": 18.91335740072202,
|
110177 |
+
"grad_norm": Infinity,
|
110178 |
+
"learning_rate": 0.00018374687691733244,
|
110179 |
+
"loss": 11.1763,
|
110180 |
+
"step": 157170
|
110181 |
+
},
|
110182 |
+
{
|
110183 |
+
"epoch": 18.914560770156438,
|
110184 |
+
"grad_norm": Infinity,
|
110185 |
+
"learning_rate": 0.00018374479787961989,
|
110186 |
+
"loss": 11.2508,
|
110187 |
+
"step": 157180
|
110188 |
+
},
|
110189 |
+
{
|
110190 |
+
"epoch": 18.915764139590856,
|
110191 |
+
"grad_norm": Infinity,
|
110192 |
+
"learning_rate": 0.00018374271872070795,
|
110193 |
+
"loss": 11.2083,
|
110194 |
+
"step": 157190
|
110195 |
+
},
|
110196 |
+
{
|
110197 |
+
"epoch": 18.91696750902527,
|
110198 |
+
"grad_norm": Infinity,
|
110199 |
+
"learning_rate": 0.00018374063944059964,
|
110200 |
+
"loss": 11.1163,
|
110201 |
+
"step": 157200
|
110202 |
+
},
|
110203 |
+
{
|
110204 |
+
"epoch": 18.918170878459687,
|
110205 |
+
"grad_norm": Infinity,
|
110206 |
+
"learning_rate": 0.00018373856003929794,
|
110207 |
+
"loss": 11.1841,
|
110208 |
+
"step": 157210
|
110209 |
+
},
|
110210 |
+
{
|
110211 |
+
"epoch": 18.919374247894105,
|
110212 |
+
"grad_norm": Infinity,
|
110213 |
+
"learning_rate": 0.0001837364805168059,
|
110214 |
+
"loss": 11.0735,
|
110215 |
+
"step": 157220
|
110216 |
+
},
|
110217 |
+
{
|
110218 |
+
"epoch": 18.92057761732852,
|
110219 |
+
"grad_norm": Infinity,
|
110220 |
+
"learning_rate": 0.00018373440087312643,
|
110221 |
+
"loss": 11.1616,
|
110222 |
+
"step": 157230
|
110223 |
+
},
|
110224 |
+
{
|
110225 |
+
"epoch": 18.921780986762936,
|
110226 |
+
"grad_norm": Infinity,
|
110227 |
+
"learning_rate": 0.00018373232110826266,
|
110228 |
+
"loss": 11.1764,
|
110229 |
+
"step": 157240
|
110230 |
+
},
|
110231 |
+
{
|
110232 |
+
"epoch": 18.922984356197354,
|
110233 |
+
"grad_norm": Infinity,
|
110234 |
+
"learning_rate": 0.00018373024122221753,
|
110235 |
+
"loss": 11.16,
|
110236 |
+
"step": 157250
|
110237 |
+
},
|
110238 |
+
{
|
110239 |
+
"epoch": 18.924187725631768,
|
110240 |
+
"grad_norm": Infinity,
|
110241 |
+
"learning_rate": 0.00018372816121499405,
|
110242 |
+
"loss": 11.0687,
|
110243 |
+
"step": 157260
|
110244 |
+
},
|
110245 |
+
{
|
110246 |
+
"epoch": 18.925391095066185,
|
110247 |
+
"grad_norm": Infinity,
|
110248 |
+
"learning_rate": 0.00018372608108659528,
|
110249 |
+
"loss": 11.1236,
|
110250 |
+
"step": 157270
|
110251 |
+
},
|
110252 |
+
{
|
110253 |
+
"epoch": 18.926594464500603,
|
110254 |
+
"grad_norm": Infinity,
|
110255 |
+
"learning_rate": 0.00018372400083702418,
|
110256 |
+
"loss": 11.1659,
|
110257 |
+
"step": 157280
|
110258 |
+
},
|
110259 |
+
{
|
110260 |
+
"epoch": 18.927797833935017,
|
110261 |
+
"grad_norm": Infinity,
|
110262 |
+
"learning_rate": 0.0001837219204662838,
|
110263 |
+
"loss": 11.0458,
|
110264 |
+
"step": 157290
|
110265 |
+
},
|
110266 |
+
{
|
110267 |
+
"epoch": 18.929001203369435,
|
110268 |
+
"grad_norm": Infinity,
|
110269 |
+
"learning_rate": 0.00018371983997437713,
|
110270 |
+
"loss": 11.0637,
|
110271 |
+
"step": 157300
|
110272 |
+
},
|
110273 |
+
{
|
110274 |
+
"epoch": 18.930204572803852,
|
110275 |
+
"grad_norm": Infinity,
|
110276 |
+
"learning_rate": 0.00018371775936130717,
|
110277 |
+
"loss": 11.1876,
|
110278 |
+
"step": 157310
|
110279 |
+
},
|
110280 |
+
{
|
110281 |
+
"epoch": 18.931407942238266,
|
110282 |
+
"grad_norm": Infinity,
|
110283 |
+
"learning_rate": 0.00018371567862707694,
|
110284 |
+
"loss": 11.2099,
|
110285 |
+
"step": 157320
|
110286 |
+
},
|
110287 |
+
{
|
110288 |
+
"epoch": 18.932611311672684,
|
110289 |
+
"grad_norm": Infinity,
|
110290 |
+
"learning_rate": 0.00018371359777168947,
|
110291 |
+
"loss": 11.1799,
|
110292 |
+
"step": 157330
|
110293 |
+
},
|
110294 |
+
{
|
110295 |
+
"epoch": 18.9338146811071,
|
110296 |
+
"grad_norm": Infinity,
|
110297 |
+
"learning_rate": 0.00018371151679514775,
|
110298 |
+
"loss": 11.0395,
|
110299 |
+
"step": 157340
|
110300 |
+
},
|
110301 |
+
{
|
110302 |
+
"epoch": 18.935018050541515,
|
110303 |
+
"grad_norm": Infinity,
|
110304 |
+
"learning_rate": 0.0001837094356974548,
|
110305 |
+
"loss": 11.1093,
|
110306 |
+
"step": 157350
|
110307 |
+
},
|
110308 |
+
{
|
110309 |
+
"epoch": 18.936221419975933,
|
110310 |
+
"grad_norm": Infinity,
|
110311 |
+
"learning_rate": 0.0001837073544786136,
|
110312 |
+
"loss": 11.1713,
|
110313 |
+
"step": 157360
|
110314 |
+
},
|
110315 |
+
{
|
110316 |
+
"epoch": 18.93742478941035,
|
110317 |
+
"grad_norm": Infinity,
|
110318 |
+
"learning_rate": 0.00018370527313862724,
|
110319 |
+
"loss": 11.2034,
|
110320 |
+
"step": 157370
|
110321 |
+
},
|
110322 |
+
{
|
110323 |
+
"epoch": 18.938628158844764,
|
110324 |
+
"grad_norm": Infinity,
|
110325 |
+
"learning_rate": 0.00018370319167749865,
|
110326 |
+
"loss": 11.1406,
|
110327 |
+
"step": 157380
|
110328 |
+
},
|
110329 |
+
{
|
110330 |
+
"epoch": 18.939831528279182,
|
110331 |
+
"grad_norm": Infinity,
|
110332 |
+
"learning_rate": 0.00018370111009523087,
|
110333 |
+
"loss": 11.2147,
|
110334 |
+
"step": 157390
|
110335 |
+
},
|
110336 |
+
{
|
110337 |
+
"epoch": 18.9410348977136,
|
110338 |
+
"grad_norm": Infinity,
|
110339 |
+
"learning_rate": 0.00018369902839182695,
|
110340 |
+
"loss": 11.2302,
|
110341 |
+
"step": 157400
|
110342 |
+
},
|
110343 |
+
{
|
110344 |
+
"epoch": 18.942238267148014,
|
110345 |
+
"grad_norm": Infinity,
|
110346 |
+
"learning_rate": 0.00018369694656728984,
|
110347 |
+
"loss": 11.1735,
|
110348 |
+
"step": 157410
|
110349 |
+
},
|
110350 |
+
{
|
110351 |
+
"epoch": 18.94344163658243,
|
110352 |
+
"grad_norm": Infinity,
|
110353 |
+
"learning_rate": 0.00018369486462162262,
|
110354 |
+
"loss": 11.1265,
|
110355 |
+
"step": 157420
|
110356 |
+
},
|
110357 |
+
{
|
110358 |
+
"epoch": 18.94464500601685,
|
110359 |
+
"grad_norm": Infinity,
|
110360 |
+
"learning_rate": 0.00018369278255482826,
|
110361 |
+
"loss": 11.1389,
|
110362 |
+
"step": 157430
|
110363 |
+
},
|
110364 |
+
{
|
110365 |
+
"epoch": 18.945848375451263,
|
110366 |
+
"grad_norm": Infinity,
|
110367 |
+
"learning_rate": 0.00018369070036690976,
|
110368 |
+
"loss": 11.1313,
|
110369 |
+
"step": 157440
|
110370 |
+
},
|
110371 |
+
{
|
110372 |
+
"epoch": 18.94705174488568,
|
110373 |
+
"grad_norm": Infinity,
|
110374 |
+
"learning_rate": 0.00018368861805787018,
|
110375 |
+
"loss": 11.151,
|
110376 |
+
"step": 157450
|
110377 |
+
},
|
110378 |
+
{
|
110379 |
+
"epoch": 18.948255114320098,
|
110380 |
+
"grad_norm": Infinity,
|
110381 |
+
"learning_rate": 0.00018368653562771245,
|
110382 |
+
"loss": 11.1124,
|
110383 |
+
"step": 157460
|
110384 |
+
},
|
110385 |
+
{
|
110386 |
+
"epoch": 18.949458483754512,
|
110387 |
+
"grad_norm": Infinity,
|
110388 |
+
"learning_rate": 0.00018368445307643969,
|
110389 |
+
"loss": 11.1773,
|
110390 |
+
"step": 157470
|
110391 |
+
},
|
110392 |
+
{
|
110393 |
+
"epoch": 18.95066185318893,
|
110394 |
+
"grad_norm": Infinity,
|
110395 |
+
"learning_rate": 0.00018368237040405485,
|
110396 |
+
"loss": 11.0857,
|
110397 |
+
"step": 157480
|
110398 |
+
},
|
110399 |
+
{
|
110400 |
+
"epoch": 18.951865222623347,
|
110401 |
+
"grad_norm": Infinity,
|
110402 |
+
"learning_rate": 0.00018368028761056094,
|
110403 |
+
"loss": 11.0975,
|
110404 |
+
"step": 157490
|
110405 |
+
},
|
110406 |
+
{
|
110407 |
+
"epoch": 18.95306859205776,
|
110408 |
+
"grad_norm": Infinity,
|
110409 |
+
"learning_rate": 0.00018367820469596105,
|
110410 |
+
"loss": 11.022,
|
110411 |
+
"step": 157500
|
110412 |
+
},
|
110413 |
+
{
|
110414 |
+
"epoch": 18.95427196149218,
|
110415 |
+
"grad_norm": Infinity,
|
110416 |
+
"learning_rate": 0.0001836761216602581,
|
110417 |
+
"loss": 11.2443,
|
110418 |
+
"step": 157510
|
110419 |
+
},
|
110420 |
+
{
|
110421 |
+
"epoch": 18.955475330926596,
|
110422 |
+
"grad_norm": Infinity,
|
110423 |
+
"learning_rate": 0.00018367403850345516,
|
110424 |
+
"loss": 11.1917,
|
110425 |
+
"step": 157520
|
110426 |
+
},
|
110427 |
+
{
|
110428 |
+
"epoch": 18.95667870036101,
|
110429 |
+
"grad_norm": Infinity,
|
110430 |
+
"learning_rate": 0.0001836719552255552,
|
110431 |
+
"loss": 11.022,
|
110432 |
+
"step": 157530
|
110433 |
+
},
|
110434 |
+
{
|
110435 |
+
"epoch": 18.957882069795428,
|
110436 |
+
"grad_norm": Infinity,
|
110437 |
+
"learning_rate": 0.00018366987182656127,
|
110438 |
+
"loss": 11.2335,
|
110439 |
+
"step": 157540
|
110440 |
+
},
|
110441 |
+
{
|
110442 |
+
"epoch": 18.959085439229845,
|
110443 |
+
"grad_norm": Infinity,
|
110444 |
+
"learning_rate": 0.0001836677883064764,
|
110445 |
+
"loss": 11.2388,
|
110446 |
+
"step": 157550
|
110447 |
+
},
|
110448 |
+
{
|
110449 |
+
"epoch": 18.96028880866426,
|
110450 |
+
"grad_norm": Infinity,
|
110451 |
+
"learning_rate": 0.00018366570466530358,
|
110452 |
+
"loss": 11.0706,
|
110453 |
+
"step": 157560
|
110454 |
+
},
|
110455 |
+
{
|
110456 |
+
"epoch": 18.961492178098677,
|
110457 |
+
"grad_norm": Infinity,
|
110458 |
+
"learning_rate": 0.0001836636209030458,
|
110459 |
+
"loss": 11.1347,
|
110460 |
+
"step": 157570
|
110461 |
+
},
|
110462 |
+
{
|
110463 |
+
"epoch": 18.96269554753309,
|
110464 |
+
"grad_norm": Infinity,
|
110465 |
+
"learning_rate": 0.00018366153701970613,
|
110466 |
+
"loss": 11.1475,
|
110467 |
+
"step": 157580
|
110468 |
+
},
|
110469 |
+
{
|
110470 |
+
"epoch": 18.96389891696751,
|
110471 |
+
"grad_norm": Infinity,
|
110472 |
+
"learning_rate": 0.00018365945301528758,
|
110473 |
+
"loss": 11.1984,
|
110474 |
+
"step": 157590
|
110475 |
+
},
|
110476 |
+
{
|
110477 |
+
"epoch": 18.965102286401926,
|
110478 |
+
"grad_norm": Infinity,
|
110479 |
+
"learning_rate": 0.00018365736888979314,
|
110480 |
+
"loss": 11.1857,
|
110481 |
+
"step": 157600
|
110482 |
+
},
|
110483 |
+
{
|
110484 |
+
"epoch": 18.96630565583634,
|
110485 |
+
"grad_norm": Infinity,
|
110486 |
+
"learning_rate": 0.00018365528464322578,
|
110487 |
+
"loss": 11.0506,
|
110488 |
+
"step": 157610
|
110489 |
+
},
|
110490 |
+
{
|
110491 |
+
"epoch": 18.967509025270758,
|
110492 |
+
"grad_norm": Infinity,
|
110493 |
+
"learning_rate": 0.0001836532002755886,
|
110494 |
+
"loss": 11.2283,
|
110495 |
+
"step": 157620
|
110496 |
+
},
|
110497 |
+
{
|
110498 |
+
"epoch": 18.968712394705175,
|
110499 |
+
"grad_norm": Infinity,
|
110500 |
+
"learning_rate": 0.0001836511157868846,
|
110501 |
+
"loss": 11.2945,
|
110502 |
+
"step": 157630
|
110503 |
+
},
|
110504 |
+
{
|
110505 |
+
"epoch": 18.96991576413959,
|
110506 |
+
"grad_norm": Infinity,
|
110507 |
+
"learning_rate": 0.00018364903117711677,
|
110508 |
+
"loss": 11.0815,
|
110509 |
+
"step": 157640
|
110510 |
+
},
|
110511 |
+
{
|
110512 |
+
"epoch": 18.971119133574007,
|
110513 |
+
"grad_norm": Infinity,
|
110514 |
+
"learning_rate": 0.00018364694644628816,
|
110515 |
+
"loss": 11.174,
|
110516 |
+
"step": 157650
|
110517 |
+
},
|
110518 |
+
{
|
110519 |
+
"epoch": 18.972322503008424,
|
110520 |
+
"grad_norm": Infinity,
|
110521 |
+
"learning_rate": 0.00018364486159440173,
|
110522 |
+
"loss": 11.0937,
|
110523 |
+
"step": 157660
|
110524 |
+
},
|
110525 |
+
{
|
110526 |
+
"epoch": 18.97352587244284,
|
110527 |
+
"grad_norm": Infinity,
|
110528 |
+
"learning_rate": 0.00018364277662146056,
|
110529 |
+
"loss": 11.1016,
|
110530 |
+
"step": 157670
|
110531 |
+
},
|
110532 |
+
{
|
110533 |
+
"epoch": 18.974729241877256,
|
110534 |
+
"grad_norm": Infinity,
|
110535 |
+
"learning_rate": 0.00018364069152746764,
|
110536 |
+
"loss": 11.2318,
|
110537 |
+
"step": 157680
|
110538 |
+
},
|
110539 |
+
{
|
110540 |
+
"epoch": 18.975932611311674,
|
110541 |
+
"grad_norm": Infinity,
|
110542 |
+
"learning_rate": 0.00018363860631242599,
|
110543 |
+
"loss": 11.0833,
|
110544 |
+
"step": 157690
|
110545 |
+
},
|
110546 |
+
{
|
110547 |
+
"epoch": 18.977135980746088,
|
110548 |
+
"grad_norm": Infinity,
|
110549 |
+
"learning_rate": 0.0001836365209763386,
|
110550 |
+
"loss": 11.0869,
|
110551 |
+
"step": 157700
|
110552 |
+
},
|
110553 |
+
{
|
110554 |
+
"epoch": 18.978339350180505,
|
110555 |
+
"grad_norm": Infinity,
|
110556 |
+
"learning_rate": 0.00018363443551920857,
|
110557 |
+
"loss": 11.23,
|
110558 |
+
"step": 157710
|
110559 |
+
},
|
110560 |
+
{
|
110561 |
+
"epoch": 18.979542719614923,
|
110562 |
+
"grad_norm": Infinity,
|
110563 |
+
"learning_rate": 0.0001836323499410388,
|
110564 |
+
"loss": 11.1504,
|
110565 |
+
"step": 157720
|
110566 |
+
},
|
110567 |
+
{
|
110568 |
+
"epoch": 18.980746089049337,
|
110569 |
+
"grad_norm": Infinity,
|
110570 |
+
"learning_rate": 0.0001836302642418324,
|
110571 |
+
"loss": 11.1502,
|
110572 |
+
"step": 157730
|
110573 |
+
},
|
110574 |
+
{
|
110575 |
+
"epoch": 18.981949458483754,
|
110576 |
+
"grad_norm": Infinity,
|
110577 |
+
"learning_rate": 0.00018362817842159236,
|
110578 |
+
"loss": 11.1727,
|
110579 |
+
"step": 157740
|
110580 |
+
},
|
110581 |
+
{
|
110582 |
+
"epoch": 18.983152827918172,
|
110583 |
+
"grad_norm": Infinity,
|
110584 |
+
"learning_rate": 0.0001836260924803217,
|
110585 |
+
"loss": 11.0221,
|
110586 |
+
"step": 157750
|
110587 |
+
},
|
110588 |
+
{
|
110589 |
+
"epoch": 18.984356197352586,
|
110590 |
+
"grad_norm": Infinity,
|
110591 |
+
"learning_rate": 0.0001836240064180234,
|
110592 |
+
"loss": 11.1395,
|
110593 |
+
"step": 157760
|
110594 |
+
},
|
110595 |
+
{
|
110596 |
+
"epoch": 18.985559566787003,
|
110597 |
+
"grad_norm": Infinity,
|
110598 |
+
"learning_rate": 0.00018362192023470058,
|
110599 |
+
"loss": 11.0751,
|
110600 |
+
"step": 157770
|
110601 |
+
},
|
110602 |
+
{
|
110603 |
+
"epoch": 18.98676293622142,
|
110604 |
+
"grad_norm": Infinity,
|
110605 |
+
"learning_rate": 0.00018361983393035618,
|
110606 |
+
"loss": 11.1112,
|
110607 |
+
"step": 157780
|
110608 |
+
},
|
110609 |
+
{
|
110610 |
+
"epoch": 18.987966305655835,
|
110611 |
+
"grad_norm": Infinity,
|
110612 |
+
"learning_rate": 0.00018361774750499322,
|
110613 |
+
"loss": 11.0373,
|
110614 |
+
"step": 157790
|
110615 |
+
},
|
110616 |
+
{
|
110617 |
+
"epoch": 18.989169675090253,
|
110618 |
+
"grad_norm": Infinity,
|
110619 |
+
"learning_rate": 0.00018361566095861473,
|
110620 |
+
"loss": 11.0098,
|
110621 |
+
"step": 157800
|
110622 |
+
},
|
110623 |
+
{
|
110624 |
+
"epoch": 18.99037304452467,
|
110625 |
+
"grad_norm": Infinity,
|
110626 |
+
"learning_rate": 0.00018361357429122377,
|
110627 |
+
"loss": 11.1946,
|
110628 |
+
"step": 157810
|
110629 |
+
},
|
110630 |
+
{
|
110631 |
+
"epoch": 18.991576413959084,
|
110632 |
+
"grad_norm": Infinity,
|
110633 |
+
"learning_rate": 0.0001836114875028233,
|
110634 |
+
"loss": 11.1975,
|
110635 |
+
"step": 157820
|
110636 |
+
},
|
110637 |
+
{
|
110638 |
+
"epoch": 18.9927797833935,
|
110639 |
+
"grad_norm": Infinity,
|
110640 |
+
"learning_rate": 0.00018360940059341635,
|
110641 |
+
"loss": 11.2993,
|
110642 |
+
"step": 157830
|
110643 |
+
},
|
110644 |
+
{
|
110645 |
+
"epoch": 18.99398315282792,
|
110646 |
+
"grad_norm": Infinity,
|
110647 |
+
"learning_rate": 0.00018360731356300596,
|
110648 |
+
"loss": 11.138,
|
110649 |
+
"step": 157840
|
110650 |
+
},
|
110651 |
+
{
|
110652 |
+
"epoch": 18.995186522262333,
|
110653 |
+
"grad_norm": Infinity,
|
110654 |
+
"learning_rate": 0.00018360522641159516,
|
110655 |
+
"loss": 11.2325,
|
110656 |
+
"step": 157850
|
110657 |
+
},
|
110658 |
+
{
|
110659 |
+
"epoch": 18.99638989169675,
|
110660 |
+
"grad_norm": Infinity,
|
110661 |
+
"learning_rate": 0.00018360313913918694,
|
110662 |
+
"loss": 11.1623,
|
110663 |
+
"step": 157860
|
110664 |
+
},
|
110665 |
+
{
|
110666 |
+
"epoch": 18.99759326113117,
|
110667 |
+
"grad_norm": Infinity,
|
110668 |
+
"learning_rate": 0.00018360105174578437,
|
110669 |
+
"loss": 11.0993,
|
110670 |
+
"step": 157870
|
110671 |
+
},
|
110672 |
+
{
|
110673 |
+
"epoch": 18.998796630565582,
|
110674 |
+
"grad_norm": Infinity,
|
110675 |
+
"learning_rate": 0.0001835989642313904,
|
110676 |
+
"loss": 11.1284,
|
110677 |
+
"step": 157880
|
110678 |
+
},
|
110679 |
+
{
|
110680 |
+
"epoch": 19.0,
|
110681 |
+
"grad_norm": Infinity,
|
110682 |
+
"learning_rate": 0.0001835968765960081,
|
110683 |
+
"loss": 11.1654,
|
110684 |
+
"step": 157890
|
110685 |
+
},
|
110686 |
+
{
|
110687 |
+
"epoch": 19.0,
|
110688 |
+
"eval_loss": 11.139603614807129,
|
110689 |
+
"eval_runtime": 118.4351,
|
110690 |
+
"eval_samples_per_second": 62.372,
|
110691 |
+
"eval_steps_per_second": 7.802,
|
110692 |
+
"step": 157890
|
110693 |
}
|
110694 |
],
|
110695 |
"logging_steps": 10,
|
|
|
110709 |
"attributes": {}
|
110710 |
}
|
110711 |
},
|
110712 |
+
"total_flos": 3.3196933925675467e+19,
|
110713 |
"train_batch_size": 1,
|
110714 |
"trial_name": null,
|
110715 |
"trial_params": null
|