willtensora
commited on
Training in progress, step 153735, checkpoint
Browse files- last-checkpoint/adapter_model.safetensors +1 -1
- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +2908 -3
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 627606952
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:883791f7abc6e83c8e078106f39af0c6dbda2d78e338250d65b667eb4bb06dcc
|
3 |
size 627606952
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 318986436
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9bbf029fe349e864ea24f1aa543363bd07868a06eaec1a92cdffa42a099472c3
|
3 |
size 318986436
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b86ef01c6375796fdd1e8c22f1578cb8d4b3b030168886f5664505c5f70ec251
|
3 |
size 15984
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:91172b4328e968365f3e7c73829064deda13714a27c947fc1311c6feaa062ba9
|
3 |
size 15984
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a959582076a6ac61743f8b360375823c0a22d57c5299460727134171480c2c4a
|
3 |
size 15984
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ec8b44284719a200b8836519e2b619f44ab7ee36a92040ba68d4511ee7490bd5
|
3 |
size 15984
|
last-checkpoint/rng_state_4.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ed98150df53faedb2f7ab733ab95a1eb627722350e287b316cd3f55225249d45
|
3 |
size 15984
|
last-checkpoint/rng_state_5.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1c56fb323d2dfc786780918b0a3871635690d33d2b53c6f50f058cdc915c9879
|
3 |
size 15984
|
last-checkpoint/rng_state_6.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:54642cb8b6edcb0379f079a97a73cd798989d2033a83d539e2f7aa6e46426951
|
3 |
size 15984
|
last-checkpoint/rng_state_7.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:89e4108dfcd1cb85bb52febcf2d1838b369309a5b7aec71b4c24c537d77475c2
|
3 |
size 15984
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:566f22a287ae39f3b9654b0727421444cfffea626ed07bee5045c2aaca718e53
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 18.
|
5 |
"eval_steps": 8310,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -104865,6 +104865,2911 @@
|
|
104865 |
"eval_samples_per_second": 62.098,
|
104866 |
"eval_steps_per_second": 7.768,
|
104867 |
"step": 149580
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
104868 |
}
|
104869 |
],
|
104870 |
"logging_steps": 10,
|
@@ -104884,7 +107789,7 @@
|
|
104884 |
"attributes": {}
|
104885 |
}
|
104886 |
},
|
104887 |
-
"total_flos": 3.
|
104888 |
"train_batch_size": 1,
|
104889 |
"trial_name": null,
|
104890 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 18.5,
|
5 |
"eval_steps": 8310,
|
6 |
+
"global_step": 153735,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
104865 |
"eval_samples_per_second": 62.098,
|
104866 |
"eval_steps_per_second": 7.768,
|
104867 |
"step": 149580
|
104868 |
+
},
|
104869 |
+
{
|
104870 |
+
"epoch": 18.001203369434418,
|
104871 |
+
"grad_norm": 2082.1474609375,
|
104872 |
+
"learning_rate": 0.0001852877063218934,
|
104873 |
+
"loss": 7.3862,
|
104874 |
+
"step": 149590
|
104875 |
+
},
|
104876 |
+
{
|
104877 |
+
"epoch": 18.00240673886883,
|
104878 |
+
"grad_norm": 151.06271362304688,
|
104879 |
+
"learning_rate": 0.000185285720006071,
|
104880 |
+
"loss": 7.3733,
|
104881 |
+
"step": 149600
|
104882 |
+
},
|
104883 |
+
{
|
104884 |
+
"epoch": 18.00361010830325,
|
104885 |
+
"grad_norm": 338.38525390625,
|
104886 |
+
"learning_rate": 0.0001852837335668191,
|
104887 |
+
"loss": 7.3697,
|
104888 |
+
"step": 149610
|
104889 |
+
},
|
104890 |
+
{
|
104891 |
+
"epoch": 18.004813477737667,
|
104892 |
+
"grad_norm": 305.02740478515625,
|
104893 |
+
"learning_rate": 0.0001852817470041406,
|
104894 |
+
"loss": 7.3517,
|
104895 |
+
"step": 149620
|
104896 |
+
},
|
104897 |
+
{
|
104898 |
+
"epoch": 18.00601684717208,
|
104899 |
+
"grad_norm": 64.78070068359375,
|
104900 |
+
"learning_rate": 0.00018527976031803832,
|
104901 |
+
"loss": 7.3814,
|
104902 |
+
"step": 149630
|
104903 |
+
},
|
104904 |
+
{
|
104905 |
+
"epoch": 18.0072202166065,
|
104906 |
+
"grad_norm": 525.9762573242188,
|
104907 |
+
"learning_rate": 0.00018527777350851516,
|
104908 |
+
"loss": 7.3542,
|
104909 |
+
"step": 149640
|
104910 |
+
},
|
104911 |
+
{
|
104912 |
+
"epoch": 18.008423586040916,
|
104913 |
+
"grad_norm": 1789.5118408203125,
|
104914 |
+
"learning_rate": 0.00018527578657557406,
|
104915 |
+
"loss": 7.3802,
|
104916 |
+
"step": 149650
|
104917 |
+
},
|
104918 |
+
{
|
104919 |
+
"epoch": 18.00962695547533,
|
104920 |
+
"grad_norm": 172.57984924316406,
|
104921 |
+
"learning_rate": 0.0001852737995192178,
|
104922 |
+
"loss": 7.4317,
|
104923 |
+
"step": 149660
|
104924 |
+
},
|
104925 |
+
{
|
104926 |
+
"epoch": 18.010830324909747,
|
104927 |
+
"grad_norm": 3822.205078125,
|
104928 |
+
"learning_rate": 0.00018527181233944932,
|
104929 |
+
"loss": 7.2932,
|
104930 |
+
"step": 149670
|
104931 |
+
},
|
104932 |
+
{
|
104933 |
+
"epoch": 18.012033694344165,
|
104934 |
+
"grad_norm": 304.72161865234375,
|
104935 |
+
"learning_rate": 0.00018526982503627148,
|
104936 |
+
"loss": 7.334,
|
104937 |
+
"step": 149680
|
104938 |
+
},
|
104939 |
+
{
|
104940 |
+
"epoch": 18.01323706377858,
|
104941 |
+
"grad_norm": 103.07905578613281,
|
104942 |
+
"learning_rate": 0.00018526783760968712,
|
104943 |
+
"loss": 7.3166,
|
104944 |
+
"step": 149690
|
104945 |
+
},
|
104946 |
+
{
|
104947 |
+
"epoch": 18.014440433212997,
|
104948 |
+
"grad_norm": 111.76778411865234,
|
104949 |
+
"learning_rate": 0.0001852658500596992,
|
104950 |
+
"loss": 7.3012,
|
104951 |
+
"step": 149700
|
104952 |
+
},
|
104953 |
+
{
|
104954 |
+
"epoch": 18.015643802647414,
|
104955 |
+
"grad_norm": 173.53530883789062,
|
104956 |
+
"learning_rate": 0.0001852638623863105,
|
104957 |
+
"loss": 7.2736,
|
104958 |
+
"step": 149710
|
104959 |
+
},
|
104960 |
+
{
|
104961 |
+
"epoch": 18.016847172081828,
|
104962 |
+
"grad_norm": 1576.4041748046875,
|
104963 |
+
"learning_rate": 0.00018526187458952394,
|
104964 |
+
"loss": 7.398,
|
104965 |
+
"step": 149720
|
104966 |
+
},
|
104967 |
+
{
|
104968 |
+
"epoch": 18.018050541516246,
|
104969 |
+
"grad_norm": 1148.1187744140625,
|
104970 |
+
"learning_rate": 0.0001852598866693424,
|
104971 |
+
"loss": 7.3654,
|
104972 |
+
"step": 149730
|
104973 |
+
},
|
104974 |
+
{
|
104975 |
+
"epoch": 18.019253910950663,
|
104976 |
+
"grad_norm": 123.3941879272461,
|
104977 |
+
"learning_rate": 0.00018525789862576875,
|
104978 |
+
"loss": 7.4426,
|
104979 |
+
"step": 149740
|
104980 |
+
},
|
104981 |
+
{
|
104982 |
+
"epoch": 18.020457280385077,
|
104983 |
+
"grad_norm": 771.9910278320312,
|
104984 |
+
"learning_rate": 0.0001852559104588059,
|
104985 |
+
"loss": 7.3355,
|
104986 |
+
"step": 149750
|
104987 |
+
},
|
104988 |
+
{
|
104989 |
+
"epoch": 18.021660649819495,
|
104990 |
+
"grad_norm": 1503.333740234375,
|
104991 |
+
"learning_rate": 0.0001852539221684567,
|
104992 |
+
"loss": 7.3849,
|
104993 |
+
"step": 149760
|
104994 |
+
},
|
104995 |
+
{
|
104996 |
+
"epoch": 18.022864019253912,
|
104997 |
+
"grad_norm": 1722.6297607421875,
|
104998 |
+
"learning_rate": 0.00018525193375472398,
|
104999 |
+
"loss": 7.371,
|
105000 |
+
"step": 149770
|
105001 |
+
},
|
105002 |
+
{
|
105003 |
+
"epoch": 18.024067388688326,
|
105004 |
+
"grad_norm": 2357.4091796875,
|
105005 |
+
"learning_rate": 0.00018524994521761069,
|
105006 |
+
"loss": 7.3561,
|
105007 |
+
"step": 149780
|
105008 |
+
},
|
105009 |
+
{
|
105010 |
+
"epoch": 18.025270758122744,
|
105011 |
+
"grad_norm": 460.76104736328125,
|
105012 |
+
"learning_rate": 0.00018524795655711968,
|
105013 |
+
"loss": 7.3813,
|
105014 |
+
"step": 149790
|
105015 |
+
},
|
105016 |
+
{
|
105017 |
+
"epoch": 18.02647412755716,
|
105018 |
+
"grad_norm": 1920.828125,
|
105019 |
+
"learning_rate": 0.00018524596777325382,
|
105020 |
+
"loss": 7.4556,
|
105021 |
+
"step": 149800
|
105022 |
+
},
|
105023 |
+
{
|
105024 |
+
"epoch": 18.027677496991576,
|
105025 |
+
"grad_norm": 9213.4033203125,
|
105026 |
+
"learning_rate": 0.00018524397886601602,
|
105027 |
+
"loss": 7.3546,
|
105028 |
+
"step": 149810
|
105029 |
+
},
|
105030 |
+
{
|
105031 |
+
"epoch": 18.028880866425993,
|
105032 |
+
"grad_norm": 9927.3623046875,
|
105033 |
+
"learning_rate": 0.00018524198983540914,
|
105034 |
+
"loss": 7.3267,
|
105035 |
+
"step": 149820
|
105036 |
+
},
|
105037 |
+
{
|
105038 |
+
"epoch": 18.03008423586041,
|
105039 |
+
"grad_norm": 18445.11328125,
|
105040 |
+
"learning_rate": 0.00018524000068143604,
|
105041 |
+
"loss": 7.4944,
|
105042 |
+
"step": 149830
|
105043 |
+
},
|
105044 |
+
{
|
105045 |
+
"epoch": 18.031287605294825,
|
105046 |
+
"grad_norm": 8243.80078125,
|
105047 |
+
"learning_rate": 0.0001852380114040996,
|
105048 |
+
"loss": 7.5059,
|
105049 |
+
"step": 149840
|
105050 |
+
},
|
105051 |
+
{
|
105052 |
+
"epoch": 18.032490974729242,
|
105053 |
+
"grad_norm": 1434.2047119140625,
|
105054 |
+
"learning_rate": 0.00018523602200340276,
|
105055 |
+
"loss": 7.4293,
|
105056 |
+
"step": 149850
|
105057 |
+
},
|
105058 |
+
{
|
105059 |
+
"epoch": 18.03369434416366,
|
105060 |
+
"grad_norm": 983.7843017578125,
|
105061 |
+
"learning_rate": 0.0001852340324793483,
|
105062 |
+
"loss": 7.4002,
|
105063 |
+
"step": 149860
|
105064 |
+
},
|
105065 |
+
{
|
105066 |
+
"epoch": 18.034897713598074,
|
105067 |
+
"grad_norm": 3585.263427734375,
|
105068 |
+
"learning_rate": 0.00018523204283193919,
|
105069 |
+
"loss": 7.3998,
|
105070 |
+
"step": 149870
|
105071 |
+
},
|
105072 |
+
{
|
105073 |
+
"epoch": 18.03610108303249,
|
105074 |
+
"grad_norm": 3442.3017578125,
|
105075 |
+
"learning_rate": 0.0001852300530611782,
|
105076 |
+
"loss": 7.4419,
|
105077 |
+
"step": 149880
|
105078 |
+
},
|
105079 |
+
{
|
105080 |
+
"epoch": 18.03730445246691,
|
105081 |
+
"grad_norm": 743.7374267578125,
|
105082 |
+
"learning_rate": 0.00018522806316706837,
|
105083 |
+
"loss": 7.4102,
|
105084 |
+
"step": 149890
|
105085 |
+
},
|
105086 |
+
{
|
105087 |
+
"epoch": 18.038507821901323,
|
105088 |
+
"grad_norm": 1866.08251953125,
|
105089 |
+
"learning_rate": 0.00018522607314961246,
|
105090 |
+
"loss": 7.501,
|
105091 |
+
"step": 149900
|
105092 |
+
},
|
105093 |
+
{
|
105094 |
+
"epoch": 18.03971119133574,
|
105095 |
+
"grad_norm": 405.5949401855469,
|
105096 |
+
"learning_rate": 0.00018522408300881337,
|
105097 |
+
"loss": 7.3552,
|
105098 |
+
"step": 149910
|
105099 |
+
},
|
105100 |
+
{
|
105101 |
+
"epoch": 18.040914560770158,
|
105102 |
+
"grad_norm": 4595.52783203125,
|
105103 |
+
"learning_rate": 0.000185222092744674,
|
105104 |
+
"loss": 7.4292,
|
105105 |
+
"step": 149920
|
105106 |
+
},
|
105107 |
+
{
|
105108 |
+
"epoch": 18.042117930204572,
|
105109 |
+
"grad_norm": 388.3169250488281,
|
105110 |
+
"learning_rate": 0.0001852201023571972,
|
105111 |
+
"loss": 7.3558,
|
105112 |
+
"step": 149930
|
105113 |
+
},
|
105114 |
+
{
|
105115 |
+
"epoch": 18.04332129963899,
|
105116 |
+
"grad_norm": 1000.1346435546875,
|
105117 |
+
"learning_rate": 0.0001852181118463859,
|
105118 |
+
"loss": 7.3738,
|
105119 |
+
"step": 149940
|
105120 |
+
},
|
105121 |
+
{
|
105122 |
+
"epoch": 18.044524669073404,
|
105123 |
+
"grad_norm": 7603.63720703125,
|
105124 |
+
"learning_rate": 0.00018521612121224294,
|
105125 |
+
"loss": 7.4176,
|
105126 |
+
"step": 149950
|
105127 |
+
},
|
105128 |
+
{
|
105129 |
+
"epoch": 18.04572803850782,
|
105130 |
+
"grad_norm": 334.28271484375,
|
105131 |
+
"learning_rate": 0.0001852141304547712,
|
105132 |
+
"loss": 7.4004,
|
105133 |
+
"step": 149960
|
105134 |
+
},
|
105135 |
+
{
|
105136 |
+
"epoch": 18.04693140794224,
|
105137 |
+
"grad_norm": 1391.88037109375,
|
105138 |
+
"learning_rate": 0.0001852121395739736,
|
105139 |
+
"loss": 7.3678,
|
105140 |
+
"step": 149970
|
105141 |
+
},
|
105142 |
+
{
|
105143 |
+
"epoch": 18.048134777376653,
|
105144 |
+
"grad_norm": 27133.986328125,
|
105145 |
+
"learning_rate": 0.000185210148569853,
|
105146 |
+
"loss": 7.4173,
|
105147 |
+
"step": 149980
|
105148 |
+
},
|
105149 |
+
{
|
105150 |
+
"epoch": 18.04933814681107,
|
105151 |
+
"grad_norm": 654.1278076171875,
|
105152 |
+
"learning_rate": 0.00018520815744241226,
|
105153 |
+
"loss": 7.4461,
|
105154 |
+
"step": 149990
|
105155 |
+
},
|
105156 |
+
{
|
105157 |
+
"epoch": 18.050541516245488,
|
105158 |
+
"grad_norm": 653.0599365234375,
|
105159 |
+
"learning_rate": 0.00018520616619165429,
|
105160 |
+
"loss": 7.4052,
|
105161 |
+
"step": 150000
|
105162 |
+
},
|
105163 |
+
{
|
105164 |
+
"epoch": 18.051744885679902,
|
105165 |
+
"grad_norm": 17197.240234375,
|
105166 |
+
"learning_rate": 0.00018520417481758194,
|
105167 |
+
"loss": 7.4436,
|
105168 |
+
"step": 150010
|
105169 |
+
},
|
105170 |
+
{
|
105171 |
+
"epoch": 18.05294825511432,
|
105172 |
+
"grad_norm": 991.9915771484375,
|
105173 |
+
"learning_rate": 0.0001852021833201981,
|
105174 |
+
"loss": 7.4564,
|
105175 |
+
"step": 150020
|
105176 |
+
},
|
105177 |
+
{
|
105178 |
+
"epoch": 18.054151624548737,
|
105179 |
+
"grad_norm": 2691.764892578125,
|
105180 |
+
"learning_rate": 0.00018520019169950572,
|
105181 |
+
"loss": 7.4842,
|
105182 |
+
"step": 150030
|
105183 |
+
},
|
105184 |
+
{
|
105185 |
+
"epoch": 18.05535499398315,
|
105186 |
+
"grad_norm": 6338.83544921875,
|
105187 |
+
"learning_rate": 0.0001851981999555076,
|
105188 |
+
"loss": 7.3698,
|
105189 |
+
"step": 150040
|
105190 |
+
},
|
105191 |
+
{
|
105192 |
+
"epoch": 18.05655836341757,
|
105193 |
+
"grad_norm": 185.8012237548828,
|
105194 |
+
"learning_rate": 0.00018519620808820663,
|
105195 |
+
"loss": 7.2721,
|
105196 |
+
"step": 150050
|
105197 |
+
},
|
105198 |
+
{
|
105199 |
+
"epoch": 18.057761732851986,
|
105200 |
+
"grad_norm": 339.1905212402344,
|
105201 |
+
"learning_rate": 0.00018519421609760573,
|
105202 |
+
"loss": 7.3596,
|
105203 |
+
"step": 150060
|
105204 |
+
},
|
105205 |
+
{
|
105206 |
+
"epoch": 18.0589651022864,
|
105207 |
+
"grad_norm": 10055.3291015625,
|
105208 |
+
"learning_rate": 0.00018519222398370776,
|
105209 |
+
"loss": 7.4733,
|
105210 |
+
"step": 150070
|
105211 |
+
},
|
105212 |
+
{
|
105213 |
+
"epoch": 18.060168471720818,
|
105214 |
+
"grad_norm": 384.69122314453125,
|
105215 |
+
"learning_rate": 0.00018519023174651562,
|
105216 |
+
"loss": 7.4054,
|
105217 |
+
"step": 150080
|
105218 |
+
},
|
105219 |
+
{
|
105220 |
+
"epoch": 18.061371841155236,
|
105221 |
+
"grad_norm": 1484.3526611328125,
|
105222 |
+
"learning_rate": 0.00018518823938603219,
|
105223 |
+
"loss": 7.4336,
|
105224 |
+
"step": 150090
|
105225 |
+
},
|
105226 |
+
{
|
105227 |
+
"epoch": 18.06257521058965,
|
105228 |
+
"grad_norm": 655.484130859375,
|
105229 |
+
"learning_rate": 0.00018518624690226033,
|
105230 |
+
"loss": 7.409,
|
105231 |
+
"step": 150100
|
105232 |
+
},
|
105233 |
+
{
|
105234 |
+
"epoch": 18.063778580024067,
|
105235 |
+
"grad_norm": 399.81085205078125,
|
105236 |
+
"learning_rate": 0.00018518425429520292,
|
105237 |
+
"loss": 7.4643,
|
105238 |
+
"step": 150110
|
105239 |
+
},
|
105240 |
+
{
|
105241 |
+
"epoch": 18.064981949458485,
|
105242 |
+
"grad_norm": 777.1300659179688,
|
105243 |
+
"learning_rate": 0.00018518226156486288,
|
105244 |
+
"loss": 7.4887,
|
105245 |
+
"step": 150120
|
105246 |
+
},
|
105247 |
+
{
|
105248 |
+
"epoch": 18.0661853188929,
|
105249 |
+
"grad_norm": 1444.622314453125,
|
105250 |
+
"learning_rate": 0.00018518026871124307,
|
105251 |
+
"loss": 7.357,
|
105252 |
+
"step": 150130
|
105253 |
+
},
|
105254 |
+
{
|
105255 |
+
"epoch": 18.067388688327316,
|
105256 |
+
"grad_norm": 2199.203125,
|
105257 |
+
"learning_rate": 0.00018517827573434642,
|
105258 |
+
"loss": 7.3952,
|
105259 |
+
"step": 150140
|
105260 |
+
},
|
105261 |
+
{
|
105262 |
+
"epoch": 18.068592057761734,
|
105263 |
+
"grad_norm": 921.2860107421875,
|
105264 |
+
"learning_rate": 0.00018517628263417572,
|
105265 |
+
"loss": 7.4424,
|
105266 |
+
"step": 150150
|
105267 |
+
},
|
105268 |
+
{
|
105269 |
+
"epoch": 18.069795427196148,
|
105270 |
+
"grad_norm": 1011.6319580078125,
|
105271 |
+
"learning_rate": 0.00018517428941073396,
|
105272 |
+
"loss": 7.4433,
|
105273 |
+
"step": 150160
|
105274 |
+
},
|
105275 |
+
{
|
105276 |
+
"epoch": 18.070998796630565,
|
105277 |
+
"grad_norm": 355.4660339355469,
|
105278 |
+
"learning_rate": 0.00018517229606402393,
|
105279 |
+
"loss": 7.4623,
|
105280 |
+
"step": 150170
|
105281 |
+
},
|
105282 |
+
{
|
105283 |
+
"epoch": 18.072202166064983,
|
105284 |
+
"grad_norm": 336.6348876953125,
|
105285 |
+
"learning_rate": 0.0001851703025940486,
|
105286 |
+
"loss": 7.4286,
|
105287 |
+
"step": 150180
|
105288 |
+
},
|
105289 |
+
{
|
105290 |
+
"epoch": 18.073405535499397,
|
105291 |
+
"grad_norm": 1935.4490966796875,
|
105292 |
+
"learning_rate": 0.00018516830900081076,
|
105293 |
+
"loss": 7.3342,
|
105294 |
+
"step": 150190
|
105295 |
+
},
|
105296 |
+
{
|
105297 |
+
"epoch": 18.074608904933815,
|
105298 |
+
"grad_norm": 1970.3548583984375,
|
105299 |
+
"learning_rate": 0.0001851663152843134,
|
105300 |
+
"loss": 7.4098,
|
105301 |
+
"step": 150200
|
105302 |
+
},
|
105303 |
+
{
|
105304 |
+
"epoch": 18.075812274368232,
|
105305 |
+
"grad_norm": 10640.7919921875,
|
105306 |
+
"learning_rate": 0.00018516432144455934,
|
105307 |
+
"loss": 7.4296,
|
105308 |
+
"step": 150210
|
105309 |
+
},
|
105310 |
+
{
|
105311 |
+
"epoch": 18.077015643802646,
|
105312 |
+
"grad_norm": 5139.22802734375,
|
105313 |
+
"learning_rate": 0.00018516232748155148,
|
105314 |
+
"loss": 7.4374,
|
105315 |
+
"step": 150220
|
105316 |
+
},
|
105317 |
+
{
|
105318 |
+
"epoch": 18.078219013237064,
|
105319 |
+
"grad_norm": 6197.271484375,
|
105320 |
+
"learning_rate": 0.0001851603333952927,
|
105321 |
+
"loss": 7.4404,
|
105322 |
+
"step": 150230
|
105323 |
+
},
|
105324 |
+
{
|
105325 |
+
"epoch": 18.07942238267148,
|
105326 |
+
"grad_norm": 814.4199829101562,
|
105327 |
+
"learning_rate": 0.0001851583391857859,
|
105328 |
+
"loss": 7.4299,
|
105329 |
+
"step": 150240
|
105330 |
+
},
|
105331 |
+
{
|
105332 |
+
"epoch": 18.080625752105895,
|
105333 |
+
"grad_norm": 7303.916015625,
|
105334 |
+
"learning_rate": 0.00018515634485303395,
|
105335 |
+
"loss": 7.3515,
|
105336 |
+
"step": 150250
|
105337 |
+
},
|
105338 |
+
{
|
105339 |
+
"epoch": 18.081829121540313,
|
105340 |
+
"grad_norm": 37418.73046875,
|
105341 |
+
"learning_rate": 0.00018515435039703974,
|
105342 |
+
"loss": 7.4337,
|
105343 |
+
"step": 150260
|
105344 |
+
},
|
105345 |
+
{
|
105346 |
+
"epoch": 18.08303249097473,
|
105347 |
+
"grad_norm": 2385.81640625,
|
105348 |
+
"learning_rate": 0.0001851523558178062,
|
105349 |
+
"loss": 7.393,
|
105350 |
+
"step": 150270
|
105351 |
+
},
|
105352 |
+
{
|
105353 |
+
"epoch": 18.084235860409144,
|
105354 |
+
"grad_norm": 365.7031555175781,
|
105355 |
+
"learning_rate": 0.00018515036111533612,
|
105356 |
+
"loss": 7.4232,
|
105357 |
+
"step": 150280
|
105358 |
+
},
|
105359 |
+
{
|
105360 |
+
"epoch": 18.085439229843562,
|
105361 |
+
"grad_norm": 4888.36572265625,
|
105362 |
+
"learning_rate": 0.0001851483662896325,
|
105363 |
+
"loss": 7.3289,
|
105364 |
+
"step": 150290
|
105365 |
+
},
|
105366 |
+
{
|
105367 |
+
"epoch": 18.08664259927798,
|
105368 |
+
"grad_norm": 719.6381225585938,
|
105369 |
+
"learning_rate": 0.00018514637134069812,
|
105370 |
+
"loss": 7.4972,
|
105371 |
+
"step": 150300
|
105372 |
+
},
|
105373 |
+
{
|
105374 |
+
"epoch": 18.087845968712394,
|
105375 |
+
"grad_norm": 4957.48974609375,
|
105376 |
+
"learning_rate": 0.00018514437626853596,
|
105377 |
+
"loss": 7.557,
|
105378 |
+
"step": 150310
|
105379 |
+
},
|
105380 |
+
{
|
105381 |
+
"epoch": 18.08904933814681,
|
105382 |
+
"grad_norm": 5745.91552734375,
|
105383 |
+
"learning_rate": 0.00018514238107314882,
|
105384 |
+
"loss": 7.4551,
|
105385 |
+
"step": 150320
|
105386 |
+
},
|
105387 |
+
{
|
105388 |
+
"epoch": 18.09025270758123,
|
105389 |
+
"grad_norm": 3058.720703125,
|
105390 |
+
"learning_rate": 0.0001851403857545397,
|
105391 |
+
"loss": 7.4735,
|
105392 |
+
"step": 150330
|
105393 |
+
},
|
105394 |
+
{
|
105395 |
+
"epoch": 18.091456077015643,
|
105396 |
+
"grad_norm": 983.1807250976562,
|
105397 |
+
"learning_rate": 0.00018513839031271135,
|
105398 |
+
"loss": 7.4255,
|
105399 |
+
"step": 150340
|
105400 |
+
},
|
105401 |
+
{
|
105402 |
+
"epoch": 18.09265944645006,
|
105403 |
+
"grad_norm": 190.457763671875,
|
105404 |
+
"learning_rate": 0.00018513639474766676,
|
105405 |
+
"loss": 7.4275,
|
105406 |
+
"step": 150350
|
105407 |
+
},
|
105408 |
+
{
|
105409 |
+
"epoch": 18.093862815884478,
|
105410 |
+
"grad_norm": 136.7671661376953,
|
105411 |
+
"learning_rate": 0.0001851343990594088,
|
105412 |
+
"loss": 7.518,
|
105413 |
+
"step": 150360
|
105414 |
+
},
|
105415 |
+
{
|
105416 |
+
"epoch": 18.095066185318892,
|
105417 |
+
"grad_norm": 62.439186096191406,
|
105418 |
+
"learning_rate": 0.00018513240324794034,
|
105419 |
+
"loss": 7.5152,
|
105420 |
+
"step": 150370
|
105421 |
+
},
|
105422 |
+
{
|
105423 |
+
"epoch": 18.09626955475331,
|
105424 |
+
"grad_norm": 53.048553466796875,
|
105425 |
+
"learning_rate": 0.00018513040731326426,
|
105426 |
+
"loss": 7.5674,
|
105427 |
+
"step": 150380
|
105428 |
+
},
|
105429 |
+
{
|
105430 |
+
"epoch": 18.097472924187727,
|
105431 |
+
"grad_norm": 30.955257415771484,
|
105432 |
+
"learning_rate": 0.0001851284112553835,
|
105433 |
+
"loss": 7.5671,
|
105434 |
+
"step": 150390
|
105435 |
+
},
|
105436 |
+
{
|
105437 |
+
"epoch": 18.09867629362214,
|
105438 |
+
"grad_norm": 35.14524459838867,
|
105439 |
+
"learning_rate": 0.00018512641507430086,
|
105440 |
+
"loss": 7.5236,
|
105441 |
+
"step": 150400
|
105442 |
+
},
|
105443 |
+
{
|
105444 |
+
"epoch": 18.09987966305656,
|
105445 |
+
"grad_norm": 105.49443817138672,
|
105446 |
+
"learning_rate": 0.0001851244187700193,
|
105447 |
+
"loss": 7.53,
|
105448 |
+
"step": 150410
|
105449 |
+
},
|
105450 |
+
{
|
105451 |
+
"epoch": 18.101083032490976,
|
105452 |
+
"grad_norm": 26.84381103515625,
|
105453 |
+
"learning_rate": 0.0001851224223425417,
|
105454 |
+
"loss": 7.5037,
|
105455 |
+
"step": 150420
|
105456 |
+
},
|
105457 |
+
{
|
105458 |
+
"epoch": 18.10228640192539,
|
105459 |
+
"grad_norm": 55.635597229003906,
|
105460 |
+
"learning_rate": 0.00018512042579187094,
|
105461 |
+
"loss": 7.4922,
|
105462 |
+
"step": 150430
|
105463 |
+
},
|
105464 |
+
{
|
105465 |
+
"epoch": 18.103489771359808,
|
105466 |
+
"grad_norm": 66.30591583251953,
|
105467 |
+
"learning_rate": 0.00018511842911800988,
|
105468 |
+
"loss": 7.5733,
|
105469 |
+
"step": 150440
|
105470 |
+
},
|
105471 |
+
{
|
105472 |
+
"epoch": 18.104693140794225,
|
105473 |
+
"grad_norm": 111.55799102783203,
|
105474 |
+
"learning_rate": 0.00018511643232096148,
|
105475 |
+
"loss": 7.5171,
|
105476 |
+
"step": 150450
|
105477 |
+
},
|
105478 |
+
{
|
105479 |
+
"epoch": 18.10589651022864,
|
105480 |
+
"grad_norm": 203.03775024414062,
|
105481 |
+
"learning_rate": 0.00018511443540072855,
|
105482 |
+
"loss": 7.5406,
|
105483 |
+
"step": 150460
|
105484 |
+
},
|
105485 |
+
{
|
105486 |
+
"epoch": 18.107099879663057,
|
105487 |
+
"grad_norm": 65.03297424316406,
|
105488 |
+
"learning_rate": 0.00018511243835731404,
|
105489 |
+
"loss": 7.3924,
|
105490 |
+
"step": 150470
|
105491 |
+
},
|
105492 |
+
{
|
105493 |
+
"epoch": 18.108303249097474,
|
105494 |
+
"grad_norm": 247.0765380859375,
|
105495 |
+
"learning_rate": 0.0001851104411907208,
|
105496 |
+
"loss": 7.5306,
|
105497 |
+
"step": 150480
|
105498 |
+
},
|
105499 |
+
{
|
105500 |
+
"epoch": 18.10950661853189,
|
105501 |
+
"grad_norm": 105.78290557861328,
|
105502 |
+
"learning_rate": 0.00018510844390095177,
|
105503 |
+
"loss": 7.4942,
|
105504 |
+
"step": 150490
|
105505 |
+
},
|
105506 |
+
{
|
105507 |
+
"epoch": 18.110709987966306,
|
105508 |
+
"grad_norm": 82.8394775390625,
|
105509 |
+
"learning_rate": 0.00018510644648800977,
|
105510 |
+
"loss": 7.5812,
|
105511 |
+
"step": 150500
|
105512 |
+
},
|
105513 |
+
{
|
105514 |
+
"epoch": 18.111913357400724,
|
105515 |
+
"grad_norm": 185.19444274902344,
|
105516 |
+
"learning_rate": 0.00018510444895189777,
|
105517 |
+
"loss": 7.3928,
|
105518 |
+
"step": 150510
|
105519 |
+
},
|
105520 |
+
{
|
105521 |
+
"epoch": 18.113116726835138,
|
105522 |
+
"grad_norm": 318.8232421875,
|
105523 |
+
"learning_rate": 0.0001851024512926186,
|
105524 |
+
"loss": 7.3806,
|
105525 |
+
"step": 150520
|
105526 |
+
},
|
105527 |
+
{
|
105528 |
+
"epoch": 18.114320096269555,
|
105529 |
+
"grad_norm": 199.4155731201172,
|
105530 |
+
"learning_rate": 0.00018510045351017516,
|
105531 |
+
"loss": 7.4369,
|
105532 |
+
"step": 150530
|
105533 |
+
},
|
105534 |
+
{
|
105535 |
+
"epoch": 18.115523465703973,
|
105536 |
+
"grad_norm": 200.80133056640625,
|
105537 |
+
"learning_rate": 0.00018509845560457037,
|
105538 |
+
"loss": 7.4884,
|
105539 |
+
"step": 150540
|
105540 |
+
},
|
105541 |
+
{
|
105542 |
+
"epoch": 18.116726835138387,
|
105543 |
+
"grad_norm": 68.57654571533203,
|
105544 |
+
"learning_rate": 0.0001850964575758071,
|
105545 |
+
"loss": 7.3612,
|
105546 |
+
"step": 150550
|
105547 |
+
},
|
105548 |
+
{
|
105549 |
+
"epoch": 18.117930204572804,
|
105550 |
+
"grad_norm": 155.61082458496094,
|
105551 |
+
"learning_rate": 0.00018509445942388824,
|
105552 |
+
"loss": 7.3387,
|
105553 |
+
"step": 150560
|
105554 |
+
},
|
105555 |
+
{
|
105556 |
+
"epoch": 18.119133574007222,
|
105557 |
+
"grad_norm": 89.49139404296875,
|
105558 |
+
"learning_rate": 0.0001850924611488167,
|
105559 |
+
"loss": 7.355,
|
105560 |
+
"step": 150570
|
105561 |
+
},
|
105562 |
+
{
|
105563 |
+
"epoch": 18.120336943441636,
|
105564 |
+
"grad_norm": 200.8621826171875,
|
105565 |
+
"learning_rate": 0.00018509046275059536,
|
105566 |
+
"loss": 7.3875,
|
105567 |
+
"step": 150580
|
105568 |
+
},
|
105569 |
+
{
|
105570 |
+
"epoch": 18.121540312876053,
|
105571 |
+
"grad_norm": 156.1785888671875,
|
105572 |
+
"learning_rate": 0.0001850884642292271,
|
105573 |
+
"loss": 7.2877,
|
105574 |
+
"step": 150590
|
105575 |
+
},
|
105576 |
+
{
|
105577 |
+
"epoch": 18.12274368231047,
|
105578 |
+
"grad_norm": 99.13287353515625,
|
105579 |
+
"learning_rate": 0.00018508646558471482,
|
105580 |
+
"loss": 7.4628,
|
105581 |
+
"step": 150600
|
105582 |
+
},
|
105583 |
+
{
|
105584 |
+
"epoch": 18.123947051744885,
|
105585 |
+
"grad_norm": 241.97088623046875,
|
105586 |
+
"learning_rate": 0.00018508446681706145,
|
105587 |
+
"loss": 7.3007,
|
105588 |
+
"step": 150610
|
105589 |
+
},
|
105590 |
+
{
|
105591 |
+
"epoch": 18.125150421179303,
|
105592 |
+
"grad_norm": 103.4607162475586,
|
105593 |
+
"learning_rate": 0.00018508246792626985,
|
105594 |
+
"loss": 7.4595,
|
105595 |
+
"step": 150620
|
105596 |
+
},
|
105597 |
+
{
|
105598 |
+
"epoch": 18.126353790613717,
|
105599 |
+
"grad_norm": 279.5429992675781,
|
105600 |
+
"learning_rate": 0.00018508046891234288,
|
105601 |
+
"loss": 7.3618,
|
105602 |
+
"step": 150630
|
105603 |
+
},
|
105604 |
+
{
|
105605 |
+
"epoch": 18.127557160048134,
|
105606 |
+
"grad_norm": 137.11724853515625,
|
105607 |
+
"learning_rate": 0.00018507846977528346,
|
105608 |
+
"loss": 7.3999,
|
105609 |
+
"step": 150640
|
105610 |
+
},
|
105611 |
+
{
|
105612 |
+
"epoch": 18.128760529482552,
|
105613 |
+
"grad_norm": 111.09395599365234,
|
105614 |
+
"learning_rate": 0.00018507647051509453,
|
105615 |
+
"loss": 7.4563,
|
105616 |
+
"step": 150650
|
105617 |
+
},
|
105618 |
+
{
|
105619 |
+
"epoch": 18.129963898916966,
|
105620 |
+
"grad_norm": 143.97569274902344,
|
105621 |
+
"learning_rate": 0.0001850744711317789,
|
105622 |
+
"loss": 7.3328,
|
105623 |
+
"step": 150660
|
105624 |
+
},
|
105625 |
+
{
|
105626 |
+
"epoch": 18.131167268351383,
|
105627 |
+
"grad_norm": 85.04871368408203,
|
105628 |
+
"learning_rate": 0.00018507247162533955,
|
105629 |
+
"loss": 7.4279,
|
105630 |
+
"step": 150670
|
105631 |
+
},
|
105632 |
+
{
|
105633 |
+
"epoch": 18.1323706377858,
|
105634 |
+
"grad_norm": 270.6744689941406,
|
105635 |
+
"learning_rate": 0.00018507047199577928,
|
105636 |
+
"loss": 7.3959,
|
105637 |
+
"step": 150680
|
105638 |
+
},
|
105639 |
+
{
|
105640 |
+
"epoch": 18.133574007220215,
|
105641 |
+
"grad_norm": 67.41458129882812,
|
105642 |
+
"learning_rate": 0.00018506847224310105,
|
105643 |
+
"loss": 7.3664,
|
105644 |
+
"step": 150690
|
105645 |
+
},
|
105646 |
+
{
|
105647 |
+
"epoch": 18.134777376654633,
|
105648 |
+
"grad_norm": 134.37625122070312,
|
105649 |
+
"learning_rate": 0.00018506647236730774,
|
105650 |
+
"loss": 7.3415,
|
105651 |
+
"step": 150700
|
105652 |
+
},
|
105653 |
+
{
|
105654 |
+
"epoch": 18.13598074608905,
|
105655 |
+
"grad_norm": 253.5409393310547,
|
105656 |
+
"learning_rate": 0.00018506447236840226,
|
105657 |
+
"loss": 7.3253,
|
105658 |
+
"step": 150710
|
105659 |
+
},
|
105660 |
+
{
|
105661 |
+
"epoch": 18.137184115523464,
|
105662 |
+
"grad_norm": 107.16642761230469,
|
105663 |
+
"learning_rate": 0.00018506247224638747,
|
105664 |
+
"loss": 7.3727,
|
105665 |
+
"step": 150720
|
105666 |
+
},
|
105667 |
+
{
|
105668 |
+
"epoch": 18.13838748495788,
|
105669 |
+
"grad_norm": 203.11355590820312,
|
105670 |
+
"learning_rate": 0.0001850604720012663,
|
105671 |
+
"loss": 7.4762,
|
105672 |
+
"step": 150730
|
105673 |
+
},
|
105674 |
+
{
|
105675 |
+
"epoch": 18.1395908543923,
|
105676 |
+
"grad_norm": 211.16729736328125,
|
105677 |
+
"learning_rate": 0.00018505847163304158,
|
105678 |
+
"loss": 7.4892,
|
105679 |
+
"step": 150740
|
105680 |
+
},
|
105681 |
+
{
|
105682 |
+
"epoch": 18.140794223826713,
|
105683 |
+
"grad_norm": 92.10486602783203,
|
105684 |
+
"learning_rate": 0.0001850564711417163,
|
105685 |
+
"loss": 7.3412,
|
105686 |
+
"step": 150750
|
105687 |
+
},
|
105688 |
+
{
|
105689 |
+
"epoch": 18.14199759326113,
|
105690 |
+
"grad_norm": 149.5958709716797,
|
105691 |
+
"learning_rate": 0.0001850544705272933,
|
105692 |
+
"loss": 7.3792,
|
105693 |
+
"step": 150760
|
105694 |
+
},
|
105695 |
+
{
|
105696 |
+
"epoch": 18.14320096269555,
|
105697 |
+
"grad_norm": 145.82102966308594,
|
105698 |
+
"learning_rate": 0.0001850524697897755,
|
105699 |
+
"loss": 7.406,
|
105700 |
+
"step": 150770
|
105701 |
+
},
|
105702 |
+
{
|
105703 |
+
"epoch": 18.144404332129962,
|
105704 |
+
"grad_norm": 198.8045654296875,
|
105705 |
+
"learning_rate": 0.0001850504689291657,
|
105706 |
+
"loss": 7.3768,
|
105707 |
+
"step": 150780
|
105708 |
+
},
|
105709 |
+
{
|
105710 |
+
"epoch": 18.14560770156438,
|
105711 |
+
"grad_norm": 115.61459350585938,
|
105712 |
+
"learning_rate": 0.00018504846794546693,
|
105713 |
+
"loss": 7.3904,
|
105714 |
+
"step": 150790
|
105715 |
+
},
|
105716 |
+
{
|
105717 |
+
"epoch": 18.146811070998798,
|
105718 |
+
"grad_norm": 98.6722183227539,
|
105719 |
+
"learning_rate": 0.00018504646683868204,
|
105720 |
+
"loss": 7.3789,
|
105721 |
+
"step": 150800
|
105722 |
+
},
|
105723 |
+
{
|
105724 |
+
"epoch": 18.14801444043321,
|
105725 |
+
"grad_norm": 156.54734802246094,
|
105726 |
+
"learning_rate": 0.0001850444656088139,
|
105727 |
+
"loss": 7.2916,
|
105728 |
+
"step": 150810
|
105729 |
+
},
|
105730 |
+
{
|
105731 |
+
"epoch": 18.14921780986763,
|
105732 |
+
"grad_norm": 105.44517517089844,
|
105733 |
+
"learning_rate": 0.00018504246425586542,
|
105734 |
+
"loss": 7.3621,
|
105735 |
+
"step": 150820
|
105736 |
+
},
|
105737 |
+
{
|
105738 |
+
"epoch": 18.150421179302047,
|
105739 |
+
"grad_norm": 177.7098388671875,
|
105740 |
+
"learning_rate": 0.0001850404627798395,
|
105741 |
+
"loss": 7.4214,
|
105742 |
+
"step": 150830
|
105743 |
+
},
|
105744 |
+
{
|
105745 |
+
"epoch": 18.15162454873646,
|
105746 |
+
"grad_norm": 88.13648223876953,
|
105747 |
+
"learning_rate": 0.00018503846118073902,
|
105748 |
+
"loss": 7.4682,
|
105749 |
+
"step": 150840
|
105750 |
+
},
|
105751 |
+
{
|
105752 |
+
"epoch": 18.15282791817088,
|
105753 |
+
"grad_norm": 93.81919860839844,
|
105754 |
+
"learning_rate": 0.00018503645945856692,
|
105755 |
+
"loss": 7.3496,
|
105756 |
+
"step": 150850
|
105757 |
+
},
|
105758 |
+
{
|
105759 |
+
"epoch": 18.154031287605296,
|
105760 |
+
"grad_norm": 137.61151123046875,
|
105761 |
+
"learning_rate": 0.00018503445761332605,
|
105762 |
+
"loss": 7.4063,
|
105763 |
+
"step": 150860
|
105764 |
+
},
|
105765 |
+
{
|
105766 |
+
"epoch": 18.15523465703971,
|
105767 |
+
"grad_norm": 57.4403076171875,
|
105768 |
+
"learning_rate": 0.00018503245564501931,
|
105769 |
+
"loss": 7.4349,
|
105770 |
+
"step": 150870
|
105771 |
+
},
|
105772 |
+
{
|
105773 |
+
"epoch": 18.156438026474127,
|
105774 |
+
"grad_norm": 138.23391723632812,
|
105775 |
+
"learning_rate": 0.00018503045355364964,
|
105776 |
+
"loss": 7.3763,
|
105777 |
+
"step": 150880
|
105778 |
+
},
|
105779 |
+
{
|
105780 |
+
"epoch": 18.157641395908545,
|
105781 |
+
"grad_norm": 151.458740234375,
|
105782 |
+
"learning_rate": 0.00018502845133921988,
|
105783 |
+
"loss": 7.35,
|
105784 |
+
"step": 150890
|
105785 |
+
},
|
105786 |
+
{
|
105787 |
+
"epoch": 18.15884476534296,
|
105788 |
+
"grad_norm": 106.25843811035156,
|
105789 |
+
"learning_rate": 0.000185026449001733,
|
105790 |
+
"loss": 7.3819,
|
105791 |
+
"step": 150900
|
105792 |
+
},
|
105793 |
+
{
|
105794 |
+
"epoch": 18.160048134777377,
|
105795 |
+
"grad_norm": 126.08541107177734,
|
105796 |
+
"learning_rate": 0.00018502444654119182,
|
105797 |
+
"loss": 7.467,
|
105798 |
+
"step": 150910
|
105799 |
+
},
|
105800 |
+
{
|
105801 |
+
"epoch": 18.161251504211794,
|
105802 |
+
"grad_norm": 122.78411102294922,
|
105803 |
+
"learning_rate": 0.0001850224439575993,
|
105804 |
+
"loss": 7.3777,
|
105805 |
+
"step": 150920
|
105806 |
+
},
|
105807 |
+
{
|
105808 |
+
"epoch": 18.162454873646208,
|
105809 |
+
"grad_norm": 255.74432373046875,
|
105810 |
+
"learning_rate": 0.00018502044125095828,
|
105811 |
+
"loss": 7.3988,
|
105812 |
+
"step": 150930
|
105813 |
+
},
|
105814 |
+
{
|
105815 |
+
"epoch": 18.163658243080626,
|
105816 |
+
"grad_norm": 167.97303771972656,
|
105817 |
+
"learning_rate": 0.00018501843842127174,
|
105818 |
+
"loss": 7.523,
|
105819 |
+
"step": 150940
|
105820 |
+
},
|
105821 |
+
{
|
105822 |
+
"epoch": 18.164861612515043,
|
105823 |
+
"grad_norm": 78.77965545654297,
|
105824 |
+
"learning_rate": 0.00018501643546854247,
|
105825 |
+
"loss": 7.369,
|
105826 |
+
"step": 150950
|
105827 |
+
},
|
105828 |
+
{
|
105829 |
+
"epoch": 18.166064981949457,
|
105830 |
+
"grad_norm": 172.5324249267578,
|
105831 |
+
"learning_rate": 0.00018501443239277347,
|
105832 |
+
"loss": 7.457,
|
105833 |
+
"step": 150960
|
105834 |
+
},
|
105835 |
+
{
|
105836 |
+
"epoch": 18.167268351383875,
|
105837 |
+
"grad_norm": 69.45236206054688,
|
105838 |
+
"learning_rate": 0.00018501242919396758,
|
105839 |
+
"loss": 7.3798,
|
105840 |
+
"step": 150970
|
105841 |
+
},
|
105842 |
+
{
|
105843 |
+
"epoch": 18.168471720818292,
|
105844 |
+
"grad_norm": 66.4688491821289,
|
105845 |
+
"learning_rate": 0.00018501042587212773,
|
105846 |
+
"loss": 7.5096,
|
105847 |
+
"step": 150980
|
105848 |
+
},
|
105849 |
+
{
|
105850 |
+
"epoch": 18.169675090252706,
|
105851 |
+
"grad_norm": 102.49665832519531,
|
105852 |
+
"learning_rate": 0.0001850084224272568,
|
105853 |
+
"loss": 7.4028,
|
105854 |
+
"step": 150990
|
105855 |
+
},
|
105856 |
+
{
|
105857 |
+
"epoch": 18.170878459687124,
|
105858 |
+
"grad_norm": 169.3874053955078,
|
105859 |
+
"learning_rate": 0.0001850064188593577,
|
105860 |
+
"loss": 7.3443,
|
105861 |
+
"step": 151000
|
105862 |
+
},
|
105863 |
+
{
|
105864 |
+
"epoch": 18.17208182912154,
|
105865 |
+
"grad_norm": 119.93611145019531,
|
105866 |
+
"learning_rate": 0.0001850044151684333,
|
105867 |
+
"loss": 7.3586,
|
105868 |
+
"step": 151010
|
105869 |
+
},
|
105870 |
+
{
|
105871 |
+
"epoch": 18.173285198555956,
|
105872 |
+
"grad_norm": 111.78881072998047,
|
105873 |
+
"learning_rate": 0.00018500241135448655,
|
105874 |
+
"loss": 7.3727,
|
105875 |
+
"step": 151020
|
105876 |
+
},
|
105877 |
+
{
|
105878 |
+
"epoch": 18.174488567990373,
|
105879 |
+
"grad_norm": 209.1993408203125,
|
105880 |
+
"learning_rate": 0.0001850004074175203,
|
105881 |
+
"loss": 7.3423,
|
105882 |
+
"step": 151030
|
105883 |
+
},
|
105884 |
+
{
|
105885 |
+
"epoch": 18.17569193742479,
|
105886 |
+
"grad_norm": 129.2776641845703,
|
105887 |
+
"learning_rate": 0.0001849984033575375,
|
105888 |
+
"loss": 7.3181,
|
105889 |
+
"step": 151040
|
105890 |
+
},
|
105891 |
+
{
|
105892 |
+
"epoch": 18.176895306859205,
|
105893 |
+
"grad_norm": 51.54475784301758,
|
105894 |
+
"learning_rate": 0.000184996399174541,
|
105895 |
+
"loss": 7.5765,
|
105896 |
+
"step": 151050
|
105897 |
+
},
|
105898 |
+
{
|
105899 |
+
"epoch": 18.178098676293622,
|
105900 |
+
"grad_norm": 85.18266296386719,
|
105901 |
+
"learning_rate": 0.00018499439486853374,
|
105902 |
+
"loss": 7.4474,
|
105903 |
+
"step": 151060
|
105904 |
+
},
|
105905 |
+
{
|
105906 |
+
"epoch": 18.17930204572804,
|
105907 |
+
"grad_norm": 49.51419448852539,
|
105908 |
+
"learning_rate": 0.00018499239043951861,
|
105909 |
+
"loss": 7.5092,
|
105910 |
+
"step": 151070
|
105911 |
+
},
|
105912 |
+
{
|
105913 |
+
"epoch": 18.180505415162454,
|
105914 |
+
"grad_norm": 89.0058364868164,
|
105915 |
+
"learning_rate": 0.0001849903858874985,
|
105916 |
+
"loss": 7.5768,
|
105917 |
+
"step": 151080
|
105918 |
+
},
|
105919 |
+
{
|
105920 |
+
"epoch": 18.18170878459687,
|
105921 |
+
"grad_norm": 558.0090942382812,
|
105922 |
+
"learning_rate": 0.00018498838121247632,
|
105923 |
+
"loss": 7.5288,
|
105924 |
+
"step": 151090
|
105925 |
+
},
|
105926 |
+
{
|
105927 |
+
"epoch": 18.18291215403129,
|
105928 |
+
"grad_norm": 78.90757751464844,
|
105929 |
+
"learning_rate": 0.00018498637641445497,
|
105930 |
+
"loss": 7.4631,
|
105931 |
+
"step": 151100
|
105932 |
+
},
|
105933 |
+
{
|
105934 |
+
"epoch": 18.184115523465703,
|
105935 |
+
"grad_norm": 510.8673095703125,
|
105936 |
+
"learning_rate": 0.00018498437149343735,
|
105937 |
+
"loss": 7.5264,
|
105938 |
+
"step": 151110
|
105939 |
+
},
|
105940 |
+
{
|
105941 |
+
"epoch": 18.18531889290012,
|
105942 |
+
"grad_norm": 73.83006286621094,
|
105943 |
+
"learning_rate": 0.00018498236644942634,
|
105944 |
+
"loss": 7.4912,
|
105945 |
+
"step": 151120
|
105946 |
+
},
|
105947 |
+
{
|
105948 |
+
"epoch": 18.186522262334538,
|
105949 |
+
"grad_norm": 104.17164611816406,
|
105950 |
+
"learning_rate": 0.00018498036128242494,
|
105951 |
+
"loss": 7.5573,
|
105952 |
+
"step": 151130
|
105953 |
+
},
|
105954 |
+
{
|
105955 |
+
"epoch": 18.187725631768952,
|
105956 |
+
"grad_norm": 66.8059310913086,
|
105957 |
+
"learning_rate": 0.0001849783559924359,
|
105958 |
+
"loss": 7.4829,
|
105959 |
+
"step": 151140
|
105960 |
+
},
|
105961 |
+
{
|
105962 |
+
"epoch": 18.18892900120337,
|
105963 |
+
"grad_norm": 45.53932571411133,
|
105964 |
+
"learning_rate": 0.00018497635057946222,
|
105965 |
+
"loss": 7.5827,
|
105966 |
+
"step": 151150
|
105967 |
+
},
|
105968 |
+
{
|
105969 |
+
"epoch": 18.190132370637787,
|
105970 |
+
"grad_norm": 52.43602752685547,
|
105971 |
+
"learning_rate": 0.00018497434504350678,
|
105972 |
+
"loss": 7.5176,
|
105973 |
+
"step": 151160
|
105974 |
+
},
|
105975 |
+
{
|
105976 |
+
"epoch": 18.1913357400722,
|
105977 |
+
"grad_norm": 1033.6990966796875,
|
105978 |
+
"learning_rate": 0.0001849723393845725,
|
105979 |
+
"loss": 7.492,
|
105980 |
+
"step": 151170
|
105981 |
+
},
|
105982 |
+
{
|
105983 |
+
"epoch": 18.19253910950662,
|
105984 |
+
"grad_norm": 188.4258270263672,
|
105985 |
+
"learning_rate": 0.00018497033360266224,
|
105986 |
+
"loss": 7.5684,
|
105987 |
+
"step": 151180
|
105988 |
+
},
|
105989 |
+
{
|
105990 |
+
"epoch": 18.193742478941036,
|
105991 |
+
"grad_norm": 136.79278564453125,
|
105992 |
+
"learning_rate": 0.00018496832769777894,
|
105993 |
+
"loss": 7.5707,
|
105994 |
+
"step": 151190
|
105995 |
+
},
|
105996 |
+
{
|
105997 |
+
"epoch": 18.19494584837545,
|
105998 |
+
"grad_norm": 56.89272689819336,
|
105999 |
+
"learning_rate": 0.00018496632166992548,
|
106000 |
+
"loss": 7.5106,
|
106001 |
+
"step": 151200
|
106002 |
+
},
|
106003 |
+
{
|
106004 |
+
"epoch": 18.196149217809868,
|
106005 |
+
"grad_norm": 208.19151306152344,
|
106006 |
+
"learning_rate": 0.0001849643155191048,
|
106007 |
+
"loss": 7.5228,
|
106008 |
+
"step": 151210
|
106009 |
+
},
|
106010 |
+
{
|
106011 |
+
"epoch": 18.197352587244286,
|
106012 |
+
"grad_norm": 1972.562255859375,
|
106013 |
+
"learning_rate": 0.00018496230924531978,
|
106014 |
+
"loss": 7.537,
|
106015 |
+
"step": 151220
|
106016 |
+
},
|
106017 |
+
{
|
106018 |
+
"epoch": 18.1985559566787,
|
106019 |
+
"grad_norm": 236.2059783935547,
|
106020 |
+
"learning_rate": 0.0001849603028485733,
|
106021 |
+
"loss": 7.5807,
|
106022 |
+
"step": 151230
|
106023 |
+
},
|
106024 |
+
{
|
106025 |
+
"epoch": 18.199759326113117,
|
106026 |
+
"grad_norm": 155.88121032714844,
|
106027 |
+
"learning_rate": 0.0001849582963288683,
|
106028 |
+
"loss": 7.4712,
|
106029 |
+
"step": 151240
|
106030 |
+
},
|
106031 |
+
{
|
106032 |
+
"epoch": 18.200962695547535,
|
106033 |
+
"grad_norm": 190.3043975830078,
|
106034 |
+
"learning_rate": 0.00018495628968620768,
|
106035 |
+
"loss": 7.4649,
|
106036 |
+
"step": 151250
|
106037 |
+
},
|
106038 |
+
{
|
106039 |
+
"epoch": 18.20216606498195,
|
106040 |
+
"grad_norm": 122.1622543334961,
|
106041 |
+
"learning_rate": 0.0001849542829205943,
|
106042 |
+
"loss": 7.5207,
|
106043 |
+
"step": 151260
|
106044 |
+
},
|
106045 |
+
{
|
106046 |
+
"epoch": 18.203369434416366,
|
106047 |
+
"grad_norm": 83.27071380615234,
|
106048 |
+
"learning_rate": 0.0001849522760320311,
|
106049 |
+
"loss": 7.5285,
|
106050 |
+
"step": 151270
|
106051 |
+
},
|
106052 |
+
{
|
106053 |
+
"epoch": 18.204572803850784,
|
106054 |
+
"grad_norm": 162.8627166748047,
|
106055 |
+
"learning_rate": 0.00018495026902052103,
|
106056 |
+
"loss": 7.4729,
|
106057 |
+
"step": 151280
|
106058 |
+
},
|
106059 |
+
{
|
106060 |
+
"epoch": 18.205776173285198,
|
106061 |
+
"grad_norm": 149.04122924804688,
|
106062 |
+
"learning_rate": 0.00018494826188606691,
|
106063 |
+
"loss": 7.4415,
|
106064 |
+
"step": 151290
|
106065 |
+
},
|
106066 |
+
{
|
106067 |
+
"epoch": 18.206979542719615,
|
106068 |
+
"grad_norm": 49.33675765991211,
|
106069 |
+
"learning_rate": 0.00018494625462867168,
|
106070 |
+
"loss": 7.4724,
|
106071 |
+
"step": 151300
|
106072 |
+
},
|
106073 |
+
{
|
106074 |
+
"epoch": 18.20818291215403,
|
106075 |
+
"grad_norm": 220.3988800048828,
|
106076 |
+
"learning_rate": 0.00018494424724833824,
|
106077 |
+
"loss": 7.412,
|
106078 |
+
"step": 151310
|
106079 |
+
},
|
106080 |
+
{
|
106081 |
+
"epoch": 18.209386281588447,
|
106082 |
+
"grad_norm": 114.99543762207031,
|
106083 |
+
"learning_rate": 0.00018494223974506953,
|
106084 |
+
"loss": 7.3662,
|
106085 |
+
"step": 151320
|
106086 |
+
},
|
106087 |
+
{
|
106088 |
+
"epoch": 18.210589651022865,
|
106089 |
+
"grad_norm": 143.4963836669922,
|
106090 |
+
"learning_rate": 0.00018494023211886843,
|
106091 |
+
"loss": 7.499,
|
106092 |
+
"step": 151330
|
106093 |
+
},
|
106094 |
+
{
|
106095 |
+
"epoch": 18.21179302045728,
|
106096 |
+
"grad_norm": 90.40047454833984,
|
106097 |
+
"learning_rate": 0.0001849382243697378,
|
106098 |
+
"loss": 7.5558,
|
106099 |
+
"step": 151340
|
106100 |
+
},
|
106101 |
+
{
|
106102 |
+
"epoch": 18.212996389891696,
|
106103 |
+
"grad_norm": 107.75560760498047,
|
106104 |
+
"learning_rate": 0.00018493621649768062,
|
106105 |
+
"loss": 7.4405,
|
106106 |
+
"step": 151350
|
106107 |
+
},
|
106108 |
+
{
|
106109 |
+
"epoch": 18.214199759326114,
|
106110 |
+
"grad_norm": 184.58242797851562,
|
106111 |
+
"learning_rate": 0.00018493420850269977,
|
106112 |
+
"loss": 7.4225,
|
106113 |
+
"step": 151360
|
106114 |
+
},
|
106115 |
+
{
|
106116 |
+
"epoch": 18.215403128760528,
|
106117 |
+
"grad_norm": 112.13935852050781,
|
106118 |
+
"learning_rate": 0.00018493220038479812,
|
106119 |
+
"loss": 7.348,
|
106120 |
+
"step": 151370
|
106121 |
+
},
|
106122 |
+
{
|
106123 |
+
"epoch": 18.216606498194945,
|
106124 |
+
"grad_norm": 130.02120971679688,
|
106125 |
+
"learning_rate": 0.00018493019214397863,
|
106126 |
+
"loss": 7.5202,
|
106127 |
+
"step": 151380
|
106128 |
+
},
|
106129 |
+
{
|
106130 |
+
"epoch": 18.217809867629363,
|
106131 |
+
"grad_norm": 308.7380676269531,
|
106132 |
+
"learning_rate": 0.0001849281837802442,
|
106133 |
+
"loss": 7.3169,
|
106134 |
+
"step": 151390
|
106135 |
+
},
|
106136 |
+
{
|
106137 |
+
"epoch": 18.219013237063777,
|
106138 |
+
"grad_norm": 235.38868713378906,
|
106139 |
+
"learning_rate": 0.0001849261752935977,
|
106140 |
+
"loss": 7.3293,
|
106141 |
+
"step": 151400
|
106142 |
+
},
|
106143 |
+
{
|
106144 |
+
"epoch": 18.220216606498195,
|
106145 |
+
"grad_norm": 723.6695556640625,
|
106146 |
+
"learning_rate": 0.00018492416668404205,
|
106147 |
+
"loss": 7.3796,
|
106148 |
+
"step": 151410
|
106149 |
+
},
|
106150 |
+
{
|
106151 |
+
"epoch": 18.221419975932612,
|
106152 |
+
"grad_norm": 285.28717041015625,
|
106153 |
+
"learning_rate": 0.00018492215795158016,
|
106154 |
+
"loss": 7.3958,
|
106155 |
+
"step": 151420
|
106156 |
+
},
|
106157 |
+
{
|
106158 |
+
"epoch": 18.222623345367026,
|
106159 |
+
"grad_norm": 628.6517944335938,
|
106160 |
+
"learning_rate": 0.00018492014909621496,
|
106161 |
+
"loss": 7.4207,
|
106162 |
+
"step": 151430
|
106163 |
+
},
|
106164 |
+
{
|
106165 |
+
"epoch": 18.223826714801444,
|
106166 |
+
"grad_norm": 779.4058227539062,
|
106167 |
+
"learning_rate": 0.00018491814011794934,
|
106168 |
+
"loss": 7.3709,
|
106169 |
+
"step": 151440
|
106170 |
+
},
|
106171 |
+
{
|
106172 |
+
"epoch": 18.22503008423586,
|
106173 |
+
"grad_norm": 401.85968017578125,
|
106174 |
+
"learning_rate": 0.0001849161310167862,
|
106175 |
+
"loss": 7.3898,
|
106176 |
+
"step": 151450
|
106177 |
+
},
|
106178 |
+
{
|
106179 |
+
"epoch": 18.226233453670275,
|
106180 |
+
"grad_norm": 797.9706420898438,
|
106181 |
+
"learning_rate": 0.00018491412179272843,
|
106182 |
+
"loss": 7.3811,
|
106183 |
+
"step": 151460
|
106184 |
+
},
|
106185 |
+
{
|
106186 |
+
"epoch": 18.227436823104693,
|
106187 |
+
"grad_norm": 615.8762817382812,
|
106188 |
+
"learning_rate": 0.000184912112445779,
|
106189 |
+
"loss": 7.455,
|
106190 |
+
"step": 151470
|
106191 |
+
},
|
106192 |
+
{
|
106193 |
+
"epoch": 18.22864019253911,
|
106194 |
+
"grad_norm": 1646.37353515625,
|
106195 |
+
"learning_rate": 0.00018491010297594075,
|
106196 |
+
"loss": 7.389,
|
106197 |
+
"step": 151480
|
106198 |
+
},
|
106199 |
+
{
|
106200 |
+
"epoch": 18.229843561973524,
|
106201 |
+
"grad_norm": 1196.743408203125,
|
106202 |
+
"learning_rate": 0.00018490809338321662,
|
106203 |
+
"loss": 7.4918,
|
106204 |
+
"step": 151490
|
106205 |
+
},
|
106206 |
+
{
|
106207 |
+
"epoch": 18.231046931407942,
|
106208 |
+
"grad_norm": 1348.6378173828125,
|
106209 |
+
"learning_rate": 0.00018490608366760952,
|
106210 |
+
"loss": 7.4224,
|
106211 |
+
"step": 151500
|
106212 |
+
},
|
106213 |
+
{
|
106214 |
+
"epoch": 18.23225030084236,
|
106215 |
+
"grad_norm": 1069.326904296875,
|
106216 |
+
"learning_rate": 0.00018490407382912237,
|
106217 |
+
"loss": 7.4181,
|
106218 |
+
"step": 151510
|
106219 |
+
},
|
106220 |
+
{
|
106221 |
+
"epoch": 18.233453670276774,
|
106222 |
+
"grad_norm": 920.8336181640625,
|
106223 |
+
"learning_rate": 0.00018490206386775806,
|
106224 |
+
"loss": 7.4824,
|
106225 |
+
"step": 151520
|
106226 |
+
},
|
106227 |
+
{
|
106228 |
+
"epoch": 18.23465703971119,
|
106229 |
+
"grad_norm": 1177.33984375,
|
106230 |
+
"learning_rate": 0.0001849000537835195,
|
106231 |
+
"loss": 7.4369,
|
106232 |
+
"step": 151530
|
106233 |
+
},
|
106234 |
+
{
|
106235 |
+
"epoch": 18.23586040914561,
|
106236 |
+
"grad_norm": 1107.93359375,
|
106237 |
+
"learning_rate": 0.0001848980435764096,
|
106238 |
+
"loss": 7.3992,
|
106239 |
+
"step": 151540
|
106240 |
+
},
|
106241 |
+
{
|
106242 |
+
"epoch": 18.237063778580023,
|
106243 |
+
"grad_norm": 671.354736328125,
|
106244 |
+
"learning_rate": 0.00018489603324643126,
|
106245 |
+
"loss": 7.4526,
|
106246 |
+
"step": 151550
|
106247 |
+
},
|
106248 |
+
{
|
106249 |
+
"epoch": 18.23826714801444,
|
106250 |
+
"grad_norm": 713.6513061523438,
|
106251 |
+
"learning_rate": 0.0001848940227935874,
|
106252 |
+
"loss": 7.4403,
|
106253 |
+
"step": 151560
|
106254 |
+
},
|
106255 |
+
{
|
106256 |
+
"epoch": 18.239470517448858,
|
106257 |
+
"grad_norm": 826.3712768554688,
|
106258 |
+
"learning_rate": 0.0001848920122178809,
|
106259 |
+
"loss": 7.3983,
|
106260 |
+
"step": 151570
|
106261 |
+
},
|
106262 |
+
{
|
106263 |
+
"epoch": 18.240673886883272,
|
106264 |
+
"grad_norm": 1284.66650390625,
|
106265 |
+
"learning_rate": 0.00018489000151931476,
|
106266 |
+
"loss": 7.4668,
|
106267 |
+
"step": 151580
|
106268 |
+
},
|
106269 |
+
{
|
106270 |
+
"epoch": 18.24187725631769,
|
106271 |
+
"grad_norm": 699.0711059570312,
|
106272 |
+
"learning_rate": 0.0001848879906978918,
|
106273 |
+
"loss": 7.4623,
|
106274 |
+
"step": 151590
|
106275 |
+
},
|
106276 |
+
{
|
106277 |
+
"epoch": 18.243080625752107,
|
106278 |
+
"grad_norm": 404.0067443847656,
|
106279 |
+
"learning_rate": 0.00018488597975361496,
|
106280 |
+
"loss": 7.4273,
|
106281 |
+
"step": 151600
|
106282 |
+
},
|
106283 |
+
{
|
106284 |
+
"epoch": 18.24428399518652,
|
106285 |
+
"grad_norm": 987.5184326171875,
|
106286 |
+
"learning_rate": 0.00018488396868648717,
|
106287 |
+
"loss": 7.4349,
|
106288 |
+
"step": 151610
|
106289 |
+
},
|
106290 |
+
{
|
106291 |
+
"epoch": 18.24548736462094,
|
106292 |
+
"grad_norm": 851.533447265625,
|
106293 |
+
"learning_rate": 0.00018488195749651127,
|
106294 |
+
"loss": 7.329,
|
106295 |
+
"step": 151620
|
106296 |
+
},
|
106297 |
+
{
|
106298 |
+
"epoch": 18.246690734055356,
|
106299 |
+
"grad_norm": 971.6378784179688,
|
106300 |
+
"learning_rate": 0.00018487994618369028,
|
106301 |
+
"loss": 7.3766,
|
106302 |
+
"step": 151630
|
106303 |
+
},
|
106304 |
+
{
|
106305 |
+
"epoch": 18.24789410348977,
|
106306 |
+
"grad_norm": 675.4795532226562,
|
106307 |
+
"learning_rate": 0.000184877934748027,
|
106308 |
+
"loss": 7.3764,
|
106309 |
+
"step": 151640
|
106310 |
+
},
|
106311 |
+
{
|
106312 |
+
"epoch": 18.249097472924188,
|
106313 |
+
"grad_norm": 861.827392578125,
|
106314 |
+
"learning_rate": 0.00018487592318952443,
|
106315 |
+
"loss": 7.3667,
|
106316 |
+
"step": 151650
|
106317 |
+
},
|
106318 |
+
{
|
106319 |
+
"epoch": 18.250300842358605,
|
106320 |
+
"grad_norm": 1099.5947265625,
|
106321 |
+
"learning_rate": 0.0001848739115081854,
|
106322 |
+
"loss": 7.3711,
|
106323 |
+
"step": 151660
|
106324 |
+
},
|
106325 |
+
{
|
106326 |
+
"epoch": 18.25150421179302,
|
106327 |
+
"grad_norm": 358.1723937988281,
|
106328 |
+
"learning_rate": 0.0001848718997040129,
|
106329 |
+
"loss": 7.4148,
|
106330 |
+
"step": 151670
|
106331 |
+
},
|
106332 |
+
{
|
106333 |
+
"epoch": 18.252707581227437,
|
106334 |
+
"grad_norm": 2679.742431640625,
|
106335 |
+
"learning_rate": 0.00018486988777700983,
|
106336 |
+
"loss": 7.3917,
|
106337 |
+
"step": 151680
|
106338 |
+
},
|
106339 |
+
{
|
106340 |
+
"epoch": 18.253910950661854,
|
106341 |
+
"grad_norm": 1006.3916015625,
|
106342 |
+
"learning_rate": 0.00018486787572717902,
|
106343 |
+
"loss": 7.444,
|
106344 |
+
"step": 151690
|
106345 |
+
},
|
106346 |
+
{
|
106347 |
+
"epoch": 18.25511432009627,
|
106348 |
+
"grad_norm": 767.1204223632812,
|
106349 |
+
"learning_rate": 0.00018486586355452348,
|
106350 |
+
"loss": 7.3879,
|
106351 |
+
"step": 151700
|
106352 |
+
},
|
106353 |
+
{
|
106354 |
+
"epoch": 18.256317689530686,
|
106355 |
+
"grad_norm": 916.1561889648438,
|
106356 |
+
"learning_rate": 0.0001848638512590461,
|
106357 |
+
"loss": 7.3784,
|
106358 |
+
"step": 151710
|
106359 |
+
},
|
106360 |
+
{
|
106361 |
+
"epoch": 18.257521058965104,
|
106362 |
+
"grad_norm": 365.0423889160156,
|
106363 |
+
"learning_rate": 0.00018486183884074971,
|
106364 |
+
"loss": 7.3472,
|
106365 |
+
"step": 151720
|
106366 |
+
},
|
106367 |
+
{
|
106368 |
+
"epoch": 18.258724428399518,
|
106369 |
+
"grad_norm": 466.4209289550781,
|
106370 |
+
"learning_rate": 0.00018485982629963736,
|
106371 |
+
"loss": 7.3599,
|
106372 |
+
"step": 151730
|
106373 |
+
},
|
106374 |
+
{
|
106375 |
+
"epoch": 18.259927797833935,
|
106376 |
+
"grad_norm": 301.1725769042969,
|
106377 |
+
"learning_rate": 0.00018485781363571183,
|
106378 |
+
"loss": 7.407,
|
106379 |
+
"step": 151740
|
106380 |
+
},
|
106381 |
+
{
|
106382 |
+
"epoch": 18.261131167268353,
|
106383 |
+
"grad_norm": 360.5256652832031,
|
106384 |
+
"learning_rate": 0.00018485580084897613,
|
106385 |
+
"loss": 7.321,
|
106386 |
+
"step": 151750
|
106387 |
+
},
|
106388 |
+
{
|
106389 |
+
"epoch": 18.262334536702767,
|
106390 |
+
"grad_norm": 867.6375732421875,
|
106391 |
+
"learning_rate": 0.00018485378793943311,
|
106392 |
+
"loss": 7.3021,
|
106393 |
+
"step": 151760
|
106394 |
+
},
|
106395 |
+
{
|
106396 |
+
"epoch": 18.263537906137184,
|
106397 |
+
"grad_norm": 422.22113037109375,
|
106398 |
+
"learning_rate": 0.00018485177490708575,
|
106399 |
+
"loss": 7.3276,
|
106400 |
+
"step": 151770
|
106401 |
+
},
|
106402 |
+
{
|
106403 |
+
"epoch": 18.264741275571602,
|
106404 |
+
"grad_norm": 544.160400390625,
|
106405 |
+
"learning_rate": 0.0001848497617519369,
|
106406 |
+
"loss": 7.3177,
|
106407 |
+
"step": 151780
|
106408 |
+
},
|
106409 |
+
{
|
106410 |
+
"epoch": 18.265944645006016,
|
106411 |
+
"grad_norm": 496.56494140625,
|
106412 |
+
"learning_rate": 0.0001848477484739895,
|
106413 |
+
"loss": 7.3398,
|
106414 |
+
"step": 151790
|
106415 |
+
},
|
106416 |
+
{
|
106417 |
+
"epoch": 18.267148014440433,
|
106418 |
+
"grad_norm": 417.65289306640625,
|
106419 |
+
"learning_rate": 0.00018484573507324644,
|
106420 |
+
"loss": 7.3748,
|
106421 |
+
"step": 151800
|
106422 |
+
},
|
106423 |
+
{
|
106424 |
+
"epoch": 18.26835138387485,
|
106425 |
+
"grad_norm": 883.1669921875,
|
106426 |
+
"learning_rate": 0.00018484372154971065,
|
106427 |
+
"loss": 7.3655,
|
106428 |
+
"step": 151810
|
106429 |
+
},
|
106430 |
+
{
|
106431 |
+
"epoch": 18.269554753309265,
|
106432 |
+
"grad_norm": 639.7716674804688,
|
106433 |
+
"learning_rate": 0.00018484170790338507,
|
106434 |
+
"loss": 7.4151,
|
106435 |
+
"step": 151820
|
106436 |
+
},
|
106437 |
+
{
|
106438 |
+
"epoch": 18.270758122743683,
|
106439 |
+
"grad_norm": 910.5934448242188,
|
106440 |
+
"learning_rate": 0.00018483969413427259,
|
106441 |
+
"loss": 7.3394,
|
106442 |
+
"step": 151830
|
106443 |
+
},
|
106444 |
+
{
|
106445 |
+
"epoch": 18.2719614921781,
|
106446 |
+
"grad_norm": 489.7258605957031,
|
106447 |
+
"learning_rate": 0.00018483768024237613,
|
106448 |
+
"loss": 7.3665,
|
106449 |
+
"step": 151840
|
106450 |
+
},
|
106451 |
+
{
|
106452 |
+
"epoch": 18.273164861612514,
|
106453 |
+
"grad_norm": 431.5764465332031,
|
106454 |
+
"learning_rate": 0.00018483566622769858,
|
106455 |
+
"loss": 7.344,
|
106456 |
+
"step": 151850
|
106457 |
+
},
|
106458 |
+
{
|
106459 |
+
"epoch": 18.27436823104693,
|
106460 |
+
"grad_norm": 542.9868774414062,
|
106461 |
+
"learning_rate": 0.0001848336520902429,
|
106462 |
+
"loss": 7.3436,
|
106463 |
+
"step": 151860
|
106464 |
+
},
|
106465 |
+
{
|
106466 |
+
"epoch": 18.27557160048135,
|
106467 |
+
"grad_norm": 371.8831481933594,
|
106468 |
+
"learning_rate": 0.00018483163783001197,
|
106469 |
+
"loss": 7.3787,
|
106470 |
+
"step": 151870
|
106471 |
+
},
|
106472 |
+
{
|
106473 |
+
"epoch": 18.276774969915763,
|
106474 |
+
"grad_norm": 570.6926879882812,
|
106475 |
+
"learning_rate": 0.00018482962344700871,
|
106476 |
+
"loss": 7.4664,
|
106477 |
+
"step": 151880
|
106478 |
+
},
|
106479 |
+
{
|
106480 |
+
"epoch": 18.27797833935018,
|
106481 |
+
"grad_norm": 425.23931884765625,
|
106482 |
+
"learning_rate": 0.00018482760894123604,
|
106483 |
+
"loss": 7.2684,
|
106484 |
+
"step": 151890
|
106485 |
+
},
|
106486 |
+
{
|
106487 |
+
"epoch": 18.2791817087846,
|
106488 |
+
"grad_norm": 390.46575927734375,
|
106489 |
+
"learning_rate": 0.0001848255943126969,
|
106490 |
+
"loss": 7.349,
|
106491 |
+
"step": 151900
|
106492 |
+
},
|
106493 |
+
{
|
106494 |
+
"epoch": 18.280385078219012,
|
106495 |
+
"grad_norm": 474.7912902832031,
|
106496 |
+
"learning_rate": 0.00018482357956139416,
|
106497 |
+
"loss": 7.4784,
|
106498 |
+
"step": 151910
|
106499 |
+
},
|
106500 |
+
{
|
106501 |
+
"epoch": 18.28158844765343,
|
106502 |
+
"grad_norm": 497.7284240722656,
|
106503 |
+
"learning_rate": 0.00018482156468733077,
|
106504 |
+
"loss": 7.42,
|
106505 |
+
"step": 151920
|
106506 |
+
},
|
106507 |
+
{
|
106508 |
+
"epoch": 18.282791817087848,
|
106509 |
+
"grad_norm": 377.16796875,
|
106510 |
+
"learning_rate": 0.00018481954969050963,
|
106511 |
+
"loss": 7.3588,
|
106512 |
+
"step": 151930
|
106513 |
+
},
|
106514 |
+
{
|
106515 |
+
"epoch": 18.28399518652226,
|
106516 |
+
"grad_norm": 509.6619873046875,
|
106517 |
+
"learning_rate": 0.00018481753457093365,
|
106518 |
+
"loss": 7.4345,
|
106519 |
+
"step": 151940
|
106520 |
+
},
|
106521 |
+
{
|
106522 |
+
"epoch": 18.28519855595668,
|
106523 |
+
"grad_norm": 505.9047546386719,
|
106524 |
+
"learning_rate": 0.00018481551932860577,
|
106525 |
+
"loss": 7.3872,
|
106526 |
+
"step": 151950
|
106527 |
+
},
|
106528 |
+
{
|
106529 |
+
"epoch": 18.286401925391097,
|
106530 |
+
"grad_norm": 504.4636535644531,
|
106531 |
+
"learning_rate": 0.0001848135039635289,
|
106532 |
+
"loss": 7.3197,
|
106533 |
+
"step": 151960
|
106534 |
+
},
|
106535 |
+
{
|
106536 |
+
"epoch": 18.28760529482551,
|
106537 |
+
"grad_norm": 331.6162109375,
|
106538 |
+
"learning_rate": 0.0001848114884757059,
|
106539 |
+
"loss": 7.4205,
|
106540 |
+
"step": 151970
|
106541 |
+
},
|
106542 |
+
{
|
106543 |
+
"epoch": 18.28880866425993,
|
106544 |
+
"grad_norm": 499.6607971191406,
|
106545 |
+
"learning_rate": 0.0001848094728651398,
|
106546 |
+
"loss": 7.4117,
|
106547 |
+
"step": 151980
|
106548 |
+
},
|
106549 |
+
{
|
106550 |
+
"epoch": 18.290012033694346,
|
106551 |
+
"grad_norm": 602.674072265625,
|
106552 |
+
"learning_rate": 0.00018480745713183344,
|
106553 |
+
"loss": 7.3608,
|
106554 |
+
"step": 151990
|
106555 |
+
},
|
106556 |
+
{
|
106557 |
+
"epoch": 18.29121540312876,
|
106558 |
+
"grad_norm": 75.54895782470703,
|
106559 |
+
"learning_rate": 0.00018480544127578972,
|
106560 |
+
"loss": 7.4137,
|
106561 |
+
"step": 152000
|
106562 |
+
},
|
106563 |
+
{
|
106564 |
+
"epoch": 18.292418772563177,
|
106565 |
+
"grad_norm": 104.66126251220703,
|
106566 |
+
"learning_rate": 0.00018480342529701162,
|
106567 |
+
"loss": 7.3288,
|
106568 |
+
"step": 152010
|
106569 |
+
},
|
106570 |
+
{
|
106571 |
+
"epoch": 18.29362214199759,
|
106572 |
+
"grad_norm": 343.62664794921875,
|
106573 |
+
"learning_rate": 0.000184801409195502,
|
106574 |
+
"loss": 7.3711,
|
106575 |
+
"step": 152020
|
106576 |
+
},
|
106577 |
+
{
|
106578 |
+
"epoch": 18.29482551143201,
|
106579 |
+
"grad_norm": 523.1060791015625,
|
106580 |
+
"learning_rate": 0.00018479939297126382,
|
106581 |
+
"loss": 7.4172,
|
106582 |
+
"step": 152030
|
106583 |
+
},
|
106584 |
+
{
|
106585 |
+
"epoch": 18.296028880866427,
|
106586 |
+
"grad_norm": 326.9176025390625,
|
106587 |
+
"learning_rate": 0.0001847973766243,
|
106588 |
+
"loss": 7.4349,
|
106589 |
+
"step": 152040
|
106590 |
+
},
|
106591 |
+
{
|
106592 |
+
"epoch": 18.29723225030084,
|
106593 |
+
"grad_norm": 264.3764343261719,
|
106594 |
+
"learning_rate": 0.0001847953601546134,
|
106595 |
+
"loss": 7.3919,
|
106596 |
+
"step": 152050
|
106597 |
+
},
|
106598 |
+
{
|
106599 |
+
"epoch": 18.29843561973526,
|
106600 |
+
"grad_norm": 459.1297607421875,
|
106601 |
+
"learning_rate": 0.00018479334356220702,
|
106602 |
+
"loss": 7.3893,
|
106603 |
+
"step": 152060
|
106604 |
+
},
|
106605 |
+
{
|
106606 |
+
"epoch": 18.299638989169676,
|
106607 |
+
"grad_norm": 290.79315185546875,
|
106608 |
+
"learning_rate": 0.0001847913268470837,
|
106609 |
+
"loss": 7.415,
|
106610 |
+
"step": 152070
|
106611 |
+
},
|
106612 |
+
{
|
106613 |
+
"epoch": 18.30084235860409,
|
106614 |
+
"grad_norm": 597.1858520507812,
|
106615 |
+
"learning_rate": 0.00018478931000924642,
|
106616 |
+
"loss": 7.3745,
|
106617 |
+
"step": 152080
|
106618 |
+
},
|
106619 |
+
{
|
106620 |
+
"epoch": 18.302045728038507,
|
106621 |
+
"grad_norm": 850.1022338867188,
|
106622 |
+
"learning_rate": 0.00018478729304869805,
|
106623 |
+
"loss": 7.3653,
|
106624 |
+
"step": 152090
|
106625 |
+
},
|
106626 |
+
{
|
106627 |
+
"epoch": 18.303249097472925,
|
106628 |
+
"grad_norm": 237.01907348632812,
|
106629 |
+
"learning_rate": 0.00018478527596544158,
|
106630 |
+
"loss": 7.3974,
|
106631 |
+
"step": 152100
|
106632 |
+
},
|
106633 |
+
{
|
106634 |
+
"epoch": 18.30445246690734,
|
106635 |
+
"grad_norm": 964.3274536132812,
|
106636 |
+
"learning_rate": 0.00018478325875947985,
|
106637 |
+
"loss": 7.3103,
|
106638 |
+
"step": 152110
|
106639 |
+
},
|
106640 |
+
{
|
106641 |
+
"epoch": 18.305655836341757,
|
106642 |
+
"grad_norm": 365.0823974609375,
|
106643 |
+
"learning_rate": 0.00018478124143081581,
|
106644 |
+
"loss": 7.3759,
|
106645 |
+
"step": 152120
|
106646 |
+
},
|
106647 |
+
{
|
106648 |
+
"epoch": 18.306859205776174,
|
106649 |
+
"grad_norm": 540.3898315429688,
|
106650 |
+
"learning_rate": 0.0001847792239794524,
|
106651 |
+
"loss": 7.3724,
|
106652 |
+
"step": 152130
|
106653 |
+
},
|
106654 |
+
{
|
106655 |
+
"epoch": 18.308062575210588,
|
106656 |
+
"grad_norm": 206.74462890625,
|
106657 |
+
"learning_rate": 0.0001847772064053925,
|
106658 |
+
"loss": 7.3672,
|
106659 |
+
"step": 152140
|
106660 |
+
},
|
106661 |
+
{
|
106662 |
+
"epoch": 18.309265944645006,
|
106663 |
+
"grad_norm": 756.8995361328125,
|
106664 |
+
"learning_rate": 0.00018477518870863905,
|
106665 |
+
"loss": 7.4172,
|
106666 |
+
"step": 152150
|
106667 |
+
},
|
106668 |
+
{
|
106669 |
+
"epoch": 18.310469314079423,
|
106670 |
+
"grad_norm": 619.6227416992188,
|
106671 |
+
"learning_rate": 0.00018477317088919502,
|
106672 |
+
"loss": 7.3557,
|
106673 |
+
"step": 152160
|
106674 |
+
},
|
106675 |
+
{
|
106676 |
+
"epoch": 18.311672683513837,
|
106677 |
+
"grad_norm": 206.07252502441406,
|
106678 |
+
"learning_rate": 0.00018477115294706321,
|
106679 |
+
"loss": 7.3845,
|
106680 |
+
"step": 152170
|
106681 |
+
},
|
106682 |
+
{
|
106683 |
+
"epoch": 18.312876052948255,
|
106684 |
+
"grad_norm": 206.29991149902344,
|
106685 |
+
"learning_rate": 0.00018476913488224667,
|
106686 |
+
"loss": 7.408,
|
106687 |
+
"step": 152180
|
106688 |
+
},
|
106689 |
+
{
|
106690 |
+
"epoch": 18.314079422382672,
|
106691 |
+
"grad_norm": 166.0495147705078,
|
106692 |
+
"learning_rate": 0.00018476711669474823,
|
106693 |
+
"loss": 7.3012,
|
106694 |
+
"step": 152190
|
106695 |
+
},
|
106696 |
+
{
|
106697 |
+
"epoch": 18.315282791817086,
|
106698 |
+
"grad_norm": 161.13961791992188,
|
106699 |
+
"learning_rate": 0.00018476509838457085,
|
106700 |
+
"loss": 7.3856,
|
106701 |
+
"step": 152200
|
106702 |
+
},
|
106703 |
+
{
|
106704 |
+
"epoch": 18.316486161251504,
|
106705 |
+
"grad_norm": 127.53560638427734,
|
106706 |
+
"learning_rate": 0.00018476307995171745,
|
106707 |
+
"loss": 7.2802,
|
106708 |
+
"step": 152210
|
106709 |
+
},
|
106710 |
+
{
|
106711 |
+
"epoch": 18.31768953068592,
|
106712 |
+
"grad_norm": 232.245361328125,
|
106713 |
+
"learning_rate": 0.00018476106139619095,
|
106714 |
+
"loss": 7.3252,
|
106715 |
+
"step": 152220
|
106716 |
+
},
|
106717 |
+
{
|
106718 |
+
"epoch": 18.318892900120336,
|
106719 |
+
"grad_norm": 189.64749145507812,
|
106720 |
+
"learning_rate": 0.00018475904271799423,
|
106721 |
+
"loss": 7.2922,
|
106722 |
+
"step": 152230
|
106723 |
+
},
|
106724 |
+
{
|
106725 |
+
"epoch": 18.320096269554753,
|
106726 |
+
"grad_norm": 135.0006103515625,
|
106727 |
+
"learning_rate": 0.00018475702391713031,
|
106728 |
+
"loss": 7.332,
|
106729 |
+
"step": 152240
|
106730 |
+
},
|
106731 |
+
{
|
106732 |
+
"epoch": 18.32129963898917,
|
106733 |
+
"grad_norm": 372.1553649902344,
|
106734 |
+
"learning_rate": 0.00018475500499360204,
|
106735 |
+
"loss": 7.35,
|
106736 |
+
"step": 152250
|
106737 |
+
},
|
106738 |
+
{
|
106739 |
+
"epoch": 18.322503008423585,
|
106740 |
+
"grad_norm": 514.6927490234375,
|
106741 |
+
"learning_rate": 0.00018475298594741233,
|
106742 |
+
"loss": 7.4239,
|
106743 |
+
"step": 152260
|
106744 |
+
},
|
106745 |
+
{
|
106746 |
+
"epoch": 18.323706377858002,
|
106747 |
+
"grad_norm": 163.652587890625,
|
106748 |
+
"learning_rate": 0.00018475096677856412,
|
106749 |
+
"loss": 7.4147,
|
106750 |
+
"step": 152270
|
106751 |
+
},
|
106752 |
+
{
|
106753 |
+
"epoch": 18.32490974729242,
|
106754 |
+
"grad_norm": 248.01438903808594,
|
106755 |
+
"learning_rate": 0.00018474894748706034,
|
106756 |
+
"loss": 7.3285,
|
106757 |
+
"step": 152280
|
106758 |
+
},
|
106759 |
+
{
|
106760 |
+
"epoch": 18.326113116726834,
|
106761 |
+
"grad_norm": 275.4211120605469,
|
106762 |
+
"learning_rate": 0.0001847469280729039,
|
106763 |
+
"loss": 7.3398,
|
106764 |
+
"step": 152290
|
106765 |
+
},
|
106766 |
+
{
|
106767 |
+
"epoch": 18.32731648616125,
|
106768 |
+
"grad_norm": 271.49530029296875,
|
106769 |
+
"learning_rate": 0.00018474490853609776,
|
106770 |
+
"loss": 7.4121,
|
106771 |
+
"step": 152300
|
106772 |
+
},
|
106773 |
+
{
|
106774 |
+
"epoch": 18.32851985559567,
|
106775 |
+
"grad_norm": 275.52764892578125,
|
106776 |
+
"learning_rate": 0.0001847428888766448,
|
106777 |
+
"loss": 7.3025,
|
106778 |
+
"step": 152310
|
106779 |
+
},
|
106780 |
+
{
|
106781 |
+
"epoch": 18.329723225030083,
|
106782 |
+
"grad_norm": 815.0440673828125,
|
106783 |
+
"learning_rate": 0.00018474086909454795,
|
106784 |
+
"loss": 7.3067,
|
106785 |
+
"step": 152320
|
106786 |
+
},
|
106787 |
+
{
|
106788 |
+
"epoch": 18.3309265944645,
|
106789 |
+
"grad_norm": 557.1624755859375,
|
106790 |
+
"learning_rate": 0.00018473884918981015,
|
106791 |
+
"loss": 7.4209,
|
106792 |
+
"step": 152330
|
106793 |
+
},
|
106794 |
+
{
|
106795 |
+
"epoch": 18.332129963898918,
|
106796 |
+
"grad_norm": 283.4029541015625,
|
106797 |
+
"learning_rate": 0.00018473682916243434,
|
106798 |
+
"loss": 7.3618,
|
106799 |
+
"step": 152340
|
106800 |
+
},
|
106801 |
+
{
|
106802 |
+
"epoch": 18.333333333333332,
|
106803 |
+
"grad_norm": 284.2005615234375,
|
106804 |
+
"learning_rate": 0.00018473480901242336,
|
106805 |
+
"loss": 7.4374,
|
106806 |
+
"step": 152350
|
106807 |
+
},
|
106808 |
+
{
|
106809 |
+
"epoch": 18.33453670276775,
|
106810 |
+
"grad_norm": 181.1649169921875,
|
106811 |
+
"learning_rate": 0.00018473278873978023,
|
106812 |
+
"loss": 7.4721,
|
106813 |
+
"step": 152360
|
106814 |
+
},
|
106815 |
+
{
|
106816 |
+
"epoch": 18.335740072202167,
|
106817 |
+
"grad_norm": 380.21990966796875,
|
106818 |
+
"learning_rate": 0.0001847307683445078,
|
106819 |
+
"loss": 7.3412,
|
106820 |
+
"step": 152370
|
106821 |
+
},
|
106822 |
+
{
|
106823 |
+
"epoch": 18.33694344163658,
|
106824 |
+
"grad_norm": 96.97758483886719,
|
106825 |
+
"learning_rate": 0.0001847287478266091,
|
106826 |
+
"loss": 7.3578,
|
106827 |
+
"step": 152380
|
106828 |
+
},
|
106829 |
+
{
|
106830 |
+
"epoch": 18.338146811071,
|
106831 |
+
"grad_norm": 197.025390625,
|
106832 |
+
"learning_rate": 0.00018472672718608692,
|
106833 |
+
"loss": 7.3366,
|
106834 |
+
"step": 152390
|
106835 |
+
},
|
106836 |
+
{
|
106837 |
+
"epoch": 18.339350180505416,
|
106838 |
+
"grad_norm": 153.88284301757812,
|
106839 |
+
"learning_rate": 0.00018472470642294427,
|
106840 |
+
"loss": 7.4359,
|
106841 |
+
"step": 152400
|
106842 |
+
},
|
106843 |
+
{
|
106844 |
+
"epoch": 18.34055354993983,
|
106845 |
+
"grad_norm": 74.3664321899414,
|
106846 |
+
"learning_rate": 0.00018472268553718405,
|
106847 |
+
"loss": 7.3939,
|
106848 |
+
"step": 152410
|
106849 |
+
},
|
106850 |
+
{
|
106851 |
+
"epoch": 18.341756919374248,
|
106852 |
+
"grad_norm": 118.60506439208984,
|
106853 |
+
"learning_rate": 0.00018472066452880918,
|
106854 |
+
"loss": 7.3732,
|
106855 |
+
"step": 152420
|
106856 |
+
},
|
106857 |
+
{
|
106858 |
+
"epoch": 18.342960288808666,
|
106859 |
+
"grad_norm": 351.7347412109375,
|
106860 |
+
"learning_rate": 0.0001847186433978226,
|
106861 |
+
"loss": 7.3547,
|
106862 |
+
"step": 152430
|
106863 |
+
},
|
106864 |
+
{
|
106865 |
+
"epoch": 18.34416365824308,
|
106866 |
+
"grad_norm": 98.69527435302734,
|
106867 |
+
"learning_rate": 0.00018471662214422722,
|
106868 |
+
"loss": 7.4385,
|
106869 |
+
"step": 152440
|
106870 |
+
},
|
106871 |
+
{
|
106872 |
+
"epoch": 18.345367027677497,
|
106873 |
+
"grad_norm": 116.80021667480469,
|
106874 |
+
"learning_rate": 0.00018471460076802595,
|
106875 |
+
"loss": 7.3886,
|
106876 |
+
"step": 152450
|
106877 |
+
},
|
106878 |
+
{
|
106879 |
+
"epoch": 18.346570397111915,
|
106880 |
+
"grad_norm": 311.6566162109375,
|
106881 |
+
"learning_rate": 0.0001847125792692218,
|
106882 |
+
"loss": 7.3342,
|
106883 |
+
"step": 152460
|
106884 |
+
},
|
106885 |
+
{
|
106886 |
+
"epoch": 18.34777376654633,
|
106887 |
+
"grad_norm": 245.01870727539062,
|
106888 |
+
"learning_rate": 0.00018471055764781758,
|
106889 |
+
"loss": 7.446,
|
106890 |
+
"step": 152470
|
106891 |
+
},
|
106892 |
+
{
|
106893 |
+
"epoch": 18.348977135980746,
|
106894 |
+
"grad_norm": 699.9287719726562,
|
106895 |
+
"learning_rate": 0.00018470853590381626,
|
106896 |
+
"loss": 7.3242,
|
106897 |
+
"step": 152480
|
106898 |
+
},
|
106899 |
+
{
|
106900 |
+
"epoch": 18.350180505415164,
|
106901 |
+
"grad_norm": 325.8674621582031,
|
106902 |
+
"learning_rate": 0.0001847065140372208,
|
106903 |
+
"loss": 7.262,
|
106904 |
+
"step": 152490
|
106905 |
+
},
|
106906 |
+
{
|
106907 |
+
"epoch": 18.351383874849578,
|
106908 |
+
"grad_norm": 309.1668701171875,
|
106909 |
+
"learning_rate": 0.0001847044920480341,
|
106910 |
+
"loss": 7.4257,
|
106911 |
+
"step": 152500
|
106912 |
+
},
|
106913 |
+
{
|
106914 |
+
"epoch": 18.352587244283995,
|
106915 |
+
"grad_norm": 352.16363525390625,
|
106916 |
+
"learning_rate": 0.0001847024699362591,
|
106917 |
+
"loss": 7.3365,
|
106918 |
+
"step": 152510
|
106919 |
+
},
|
106920 |
+
{
|
106921 |
+
"epoch": 18.353790613718413,
|
106922 |
+
"grad_norm": 669.510498046875,
|
106923 |
+
"learning_rate": 0.00018470044770189868,
|
106924 |
+
"loss": 7.2026,
|
106925 |
+
"step": 152520
|
106926 |
+
},
|
106927 |
+
{
|
106928 |
+
"epoch": 18.354993983152827,
|
106929 |
+
"grad_norm": 6747.38037109375,
|
106930 |
+
"learning_rate": 0.00018469842534495583,
|
106931 |
+
"loss": 7.2472,
|
106932 |
+
"step": 152530
|
106933 |
+
},
|
106934 |
+
{
|
106935 |
+
"epoch": 18.356197352587245,
|
106936 |
+
"grad_norm": 752.1255493164062,
|
106937 |
+
"learning_rate": 0.00018469640286543343,
|
106938 |
+
"loss": 7.3093,
|
106939 |
+
"step": 152540
|
106940 |
+
},
|
106941 |
+
{
|
106942 |
+
"epoch": 18.357400722021662,
|
106943 |
+
"grad_norm": 637.0225219726562,
|
106944 |
+
"learning_rate": 0.00018469438026333444,
|
106945 |
+
"loss": 7.3485,
|
106946 |
+
"step": 152550
|
106947 |
+
},
|
106948 |
+
{
|
106949 |
+
"epoch": 18.358604091456076,
|
106950 |
+
"grad_norm": 1597.137939453125,
|
106951 |
+
"learning_rate": 0.00018469235753866176,
|
106952 |
+
"loss": 7.3601,
|
106953 |
+
"step": 152560
|
106954 |
+
},
|
106955 |
+
{
|
106956 |
+
"epoch": 18.359807460890494,
|
106957 |
+
"grad_norm": 805.3880004882812,
|
106958 |
+
"learning_rate": 0.00018469033469141835,
|
106959 |
+
"loss": 7.4051,
|
106960 |
+
"step": 152570
|
106961 |
+
},
|
106962 |
+
{
|
106963 |
+
"epoch": 18.36101083032491,
|
106964 |
+
"grad_norm": 409.3124694824219,
|
106965 |
+
"learning_rate": 0.00018468831172160707,
|
106966 |
+
"loss": 7.2234,
|
106967 |
+
"step": 152580
|
106968 |
+
},
|
106969 |
+
{
|
106970 |
+
"epoch": 18.362214199759325,
|
106971 |
+
"grad_norm": 406.9848327636719,
|
106972 |
+
"learning_rate": 0.00018468628862923094,
|
106973 |
+
"loss": 7.3235,
|
106974 |
+
"step": 152590
|
106975 |
+
},
|
106976 |
+
{
|
106977 |
+
"epoch": 18.363417569193743,
|
106978 |
+
"grad_norm": 1156.8260498046875,
|
106979 |
+
"learning_rate": 0.00018468426541429282,
|
106980 |
+
"loss": 7.2933,
|
106981 |
+
"step": 152600
|
106982 |
+
},
|
106983 |
+
{
|
106984 |
+
"epoch": 18.36462093862816,
|
106985 |
+
"grad_norm": 831.1348266601562,
|
106986 |
+
"learning_rate": 0.00018468224207679565,
|
106987 |
+
"loss": 7.3392,
|
106988 |
+
"step": 152610
|
106989 |
+
},
|
106990 |
+
{
|
106991 |
+
"epoch": 18.365824308062574,
|
106992 |
+
"grad_norm": 961.0281372070312,
|
106993 |
+
"learning_rate": 0.0001846802186167424,
|
106994 |
+
"loss": 7.3274,
|
106995 |
+
"step": 152620
|
106996 |
+
},
|
106997 |
+
{
|
106998 |
+
"epoch": 18.367027677496992,
|
106999 |
+
"grad_norm": 15979.4677734375,
|
107000 |
+
"learning_rate": 0.00018467819503413595,
|
107001 |
+
"loss": 7.423,
|
107002 |
+
"step": 152630
|
107003 |
+
},
|
107004 |
+
{
|
107005 |
+
"epoch": 18.36823104693141,
|
107006 |
+
"grad_norm": 4876.44580078125,
|
107007 |
+
"learning_rate": 0.00018467617132897926,
|
107008 |
+
"loss": 7.399,
|
107009 |
+
"step": 152640
|
107010 |
+
},
|
107011 |
+
{
|
107012 |
+
"epoch": 18.369434416365824,
|
107013 |
+
"grad_norm": 1651.61083984375,
|
107014 |
+
"learning_rate": 0.0001846741475012752,
|
107015 |
+
"loss": 7.4468,
|
107016 |
+
"step": 152650
|
107017 |
+
},
|
107018 |
+
{
|
107019 |
+
"epoch": 18.37063778580024,
|
107020 |
+
"grad_norm": 1993.109375,
|
107021 |
+
"learning_rate": 0.0001846721235510268,
|
107022 |
+
"loss": 7.415,
|
107023 |
+
"step": 152660
|
107024 |
+
},
|
107025 |
+
{
|
107026 |
+
"epoch": 18.37184115523466,
|
107027 |
+
"grad_norm": 1994.6533203125,
|
107028 |
+
"learning_rate": 0.00018467009947823688,
|
107029 |
+
"loss": 7.4098,
|
107030 |
+
"step": 152670
|
107031 |
+
},
|
107032 |
+
{
|
107033 |
+
"epoch": 18.373044524669073,
|
107034 |
+
"grad_norm": 1205.272216796875,
|
107035 |
+
"learning_rate": 0.00018466807528290846,
|
107036 |
+
"loss": 7.3834,
|
107037 |
+
"step": 152680
|
107038 |
+
},
|
107039 |
+
{
|
107040 |
+
"epoch": 18.37424789410349,
|
107041 |
+
"grad_norm": 813.9103393554688,
|
107042 |
+
"learning_rate": 0.0001846660509650444,
|
107043 |
+
"loss": 7.3982,
|
107044 |
+
"step": 152690
|
107045 |
+
},
|
107046 |
+
{
|
107047 |
+
"epoch": 18.375451263537904,
|
107048 |
+
"grad_norm": 276.84539794921875,
|
107049 |
+
"learning_rate": 0.00018466402652464768,
|
107050 |
+
"loss": 7.2824,
|
107051 |
+
"step": 152700
|
107052 |
+
},
|
107053 |
+
{
|
107054 |
+
"epoch": 18.376654632972322,
|
107055 |
+
"grad_norm": 239.27444458007812,
|
107056 |
+
"learning_rate": 0.00018466200196172122,
|
107057 |
+
"loss": 7.3645,
|
107058 |
+
"step": 152710
|
107059 |
+
},
|
107060 |
+
{
|
107061 |
+
"epoch": 18.37785800240674,
|
107062 |
+
"grad_norm": 449.86865234375,
|
107063 |
+
"learning_rate": 0.0001846599772762679,
|
107064 |
+
"loss": 7.2669,
|
107065 |
+
"step": 152720
|
107066 |
+
},
|
107067 |
+
{
|
107068 |
+
"epoch": 18.379061371841154,
|
107069 |
+
"grad_norm": 473.189453125,
|
107070 |
+
"learning_rate": 0.00018465795246829075,
|
107071 |
+
"loss": 7.3682,
|
107072 |
+
"step": 152730
|
107073 |
+
},
|
107074 |
+
{
|
107075 |
+
"epoch": 18.38026474127557,
|
107076 |
+
"grad_norm": 981.4517211914062,
|
107077 |
+
"learning_rate": 0.0001846559275377926,
|
107078 |
+
"loss": 7.4094,
|
107079 |
+
"step": 152740
|
107080 |
+
},
|
107081 |
+
{
|
107082 |
+
"epoch": 18.38146811070999,
|
107083 |
+
"grad_norm": 671.7831420898438,
|
107084 |
+
"learning_rate": 0.00018465390248477643,
|
107085 |
+
"loss": 7.275,
|
107086 |
+
"step": 152750
|
107087 |
+
},
|
107088 |
+
{
|
107089 |
+
"epoch": 18.382671480144403,
|
107090 |
+
"grad_norm": 440.99591064453125,
|
107091 |
+
"learning_rate": 0.00018465187730924517,
|
107092 |
+
"loss": 7.3697,
|
107093 |
+
"step": 152760
|
107094 |
+
},
|
107095 |
+
{
|
107096 |
+
"epoch": 18.38387484957882,
|
107097 |
+
"grad_norm": 352.1147155761719,
|
107098 |
+
"learning_rate": 0.00018464985201120174,
|
107099 |
+
"loss": 7.2749,
|
107100 |
+
"step": 152770
|
107101 |
+
},
|
107102 |
+
{
|
107103 |
+
"epoch": 18.385078219013238,
|
107104 |
+
"grad_norm": 406.75775146484375,
|
107105 |
+
"learning_rate": 0.0001846478265906491,
|
107106 |
+
"loss": 7.4258,
|
107107 |
+
"step": 152780
|
107108 |
+
},
|
107109 |
+
{
|
107110 |
+
"epoch": 18.386281588447652,
|
107111 |
+
"grad_norm": 633.501220703125,
|
107112 |
+
"learning_rate": 0.0001846458010475901,
|
107113 |
+
"loss": 7.4049,
|
107114 |
+
"step": 152790
|
107115 |
+
},
|
107116 |
+
{
|
107117 |
+
"epoch": 18.38748495788207,
|
107118 |
+
"grad_norm": 458.1518249511719,
|
107119 |
+
"learning_rate": 0.00018464377538202778,
|
107120 |
+
"loss": 7.415,
|
107121 |
+
"step": 152800
|
107122 |
+
},
|
107123 |
+
{
|
107124 |
+
"epoch": 18.388688327316487,
|
107125 |
+
"grad_norm": 672.3319091796875,
|
107126 |
+
"learning_rate": 0.00018464174959396498,
|
107127 |
+
"loss": 7.4147,
|
107128 |
+
"step": 152810
|
107129 |
+
},
|
107130 |
+
{
|
107131 |
+
"epoch": 18.3898916967509,
|
107132 |
+
"grad_norm": 273.4700927734375,
|
107133 |
+
"learning_rate": 0.00018463972368340468,
|
107134 |
+
"loss": 7.273,
|
107135 |
+
"step": 152820
|
107136 |
+
},
|
107137 |
+
{
|
107138 |
+
"epoch": 18.39109506618532,
|
107139 |
+
"grad_norm": 239.07583618164062,
|
107140 |
+
"learning_rate": 0.00018463769765034982,
|
107141 |
+
"loss": 7.3782,
|
107142 |
+
"step": 152830
|
107143 |
+
},
|
107144 |
+
{
|
107145 |
+
"epoch": 18.392298435619736,
|
107146 |
+
"grad_norm": 638.8527221679688,
|
107147 |
+
"learning_rate": 0.0001846356714948033,
|
107148 |
+
"loss": 7.2381,
|
107149 |
+
"step": 152840
|
107150 |
+
},
|
107151 |
+
{
|
107152 |
+
"epoch": 18.39350180505415,
|
107153 |
+
"grad_norm": 776.33447265625,
|
107154 |
+
"learning_rate": 0.00018463364521676808,
|
107155 |
+
"loss": 7.4281,
|
107156 |
+
"step": 152850
|
107157 |
+
},
|
107158 |
+
{
|
107159 |
+
"epoch": 18.394705174488568,
|
107160 |
+
"grad_norm": 343.4711608886719,
|
107161 |
+
"learning_rate": 0.0001846316188162471,
|
107162 |
+
"loss": 7.3826,
|
107163 |
+
"step": 152860
|
107164 |
+
},
|
107165 |
+
{
|
107166 |
+
"epoch": 18.395908543922985,
|
107167 |
+
"grad_norm": 725.744873046875,
|
107168 |
+
"learning_rate": 0.00018462959229324324,
|
107169 |
+
"loss": 7.4091,
|
107170 |
+
"step": 152870
|
107171 |
+
},
|
107172 |
+
{
|
107173 |
+
"epoch": 18.3971119133574,
|
107174 |
+
"grad_norm": 1224.29443359375,
|
107175 |
+
"learning_rate": 0.00018462756564775946,
|
107176 |
+
"loss": 7.4163,
|
107177 |
+
"step": 152880
|
107178 |
+
},
|
107179 |
+
{
|
107180 |
+
"epoch": 18.398315282791817,
|
107181 |
+
"grad_norm": 503.4244384765625,
|
107182 |
+
"learning_rate": 0.0001846255388797987,
|
107183 |
+
"loss": 7.2828,
|
107184 |
+
"step": 152890
|
107185 |
+
},
|
107186 |
+
{
|
107187 |
+
"epoch": 18.399518652226234,
|
107188 |
+
"grad_norm": 343.1127014160156,
|
107189 |
+
"learning_rate": 0.0001846235119893639,
|
107190 |
+
"loss": 7.2975,
|
107191 |
+
"step": 152900
|
107192 |
+
},
|
107193 |
+
{
|
107194 |
+
"epoch": 18.40072202166065,
|
107195 |
+
"grad_norm": 399.8078308105469,
|
107196 |
+
"learning_rate": 0.000184621484976458,
|
107197 |
+
"loss": 7.336,
|
107198 |
+
"step": 152910
|
107199 |
+
},
|
107200 |
+
{
|
107201 |
+
"epoch": 18.401925391095066,
|
107202 |
+
"grad_norm": 283.1817321777344,
|
107203 |
+
"learning_rate": 0.0001846194578410839,
|
107204 |
+
"loss": 7.2593,
|
107205 |
+
"step": 152920
|
107206 |
+
},
|
107207 |
+
{
|
107208 |
+
"epoch": 18.403128760529484,
|
107209 |
+
"grad_norm": 149.43238830566406,
|
107210 |
+
"learning_rate": 0.00018461743058324455,
|
107211 |
+
"loss": 7.2923,
|
107212 |
+
"step": 152930
|
107213 |
+
},
|
107214 |
+
{
|
107215 |
+
"epoch": 18.404332129963898,
|
107216 |
+
"grad_norm": 882.1889038085938,
|
107217 |
+
"learning_rate": 0.0001846154032029429,
|
107218 |
+
"loss": 7.3169,
|
107219 |
+
"step": 152940
|
107220 |
+
},
|
107221 |
+
{
|
107222 |
+
"epoch": 18.405535499398315,
|
107223 |
+
"grad_norm": 232.15188598632812,
|
107224 |
+
"learning_rate": 0.00018461337570018188,
|
107225 |
+
"loss": 7.4182,
|
107226 |
+
"step": 152950
|
107227 |
+
},
|
107228 |
+
{
|
107229 |
+
"epoch": 18.406738868832733,
|
107230 |
+
"grad_norm": 810.0722045898438,
|
107231 |
+
"learning_rate": 0.0001846113480749644,
|
107232 |
+
"loss": 7.43,
|
107233 |
+
"step": 152960
|
107234 |
+
},
|
107235 |
+
{
|
107236 |
+
"epoch": 18.407942238267147,
|
107237 |
+
"grad_norm": 460.2532043457031,
|
107238 |
+
"learning_rate": 0.0001846093203272934,
|
107239 |
+
"loss": 7.4534,
|
107240 |
+
"step": 152970
|
107241 |
+
},
|
107242 |
+
{
|
107243 |
+
"epoch": 18.409145607701564,
|
107244 |
+
"grad_norm": 652.58154296875,
|
107245 |
+
"learning_rate": 0.00018460729245717183,
|
107246 |
+
"loss": 7.3855,
|
107247 |
+
"step": 152980
|
107248 |
+
},
|
107249 |
+
{
|
107250 |
+
"epoch": 18.410348977135982,
|
107251 |
+
"grad_norm": 340.3279113769531,
|
107252 |
+
"learning_rate": 0.00018460526446460263,
|
107253 |
+
"loss": 7.4736,
|
107254 |
+
"step": 152990
|
107255 |
+
},
|
107256 |
+
{
|
107257 |
+
"epoch": 18.411552346570396,
|
107258 |
+
"grad_norm": 878.8419189453125,
|
107259 |
+
"learning_rate": 0.00018460323634958873,
|
107260 |
+
"loss": 7.4742,
|
107261 |
+
"step": 153000
|
107262 |
+
},
|
107263 |
+
{
|
107264 |
+
"epoch": 18.412755716004813,
|
107265 |
+
"grad_norm": 810.9647216796875,
|
107266 |
+
"learning_rate": 0.00018460120811213305,
|
107267 |
+
"loss": 7.3896,
|
107268 |
+
"step": 153010
|
107269 |
+
},
|
107270 |
+
{
|
107271 |
+
"epoch": 18.41395908543923,
|
107272 |
+
"grad_norm": 1688.00244140625,
|
107273 |
+
"learning_rate": 0.00018459917975223854,
|
107274 |
+
"loss": 7.3993,
|
107275 |
+
"step": 153020
|
107276 |
+
},
|
107277 |
+
{
|
107278 |
+
"epoch": 18.415162454873645,
|
107279 |
+
"grad_norm": 430.4827575683594,
|
107280 |
+
"learning_rate": 0.0001845971512699081,
|
107281 |
+
"loss": 7.4924,
|
107282 |
+
"step": 153030
|
107283 |
+
},
|
107284 |
+
{
|
107285 |
+
"epoch": 18.416365824308063,
|
107286 |
+
"grad_norm": 85.9831771850586,
|
107287 |
+
"learning_rate": 0.00018459512266514474,
|
107288 |
+
"loss": 7.5139,
|
107289 |
+
"step": 153040
|
107290 |
+
},
|
107291 |
+
{
|
107292 |
+
"epoch": 18.41756919374248,
|
107293 |
+
"grad_norm": 51.568939208984375,
|
107294 |
+
"learning_rate": 0.00018459309393795132,
|
107295 |
+
"loss": 7.481,
|
107296 |
+
"step": 153050
|
107297 |
+
},
|
107298 |
+
{
|
107299 |
+
"epoch": 18.418772563176894,
|
107300 |
+
"grad_norm": 54.81188201904297,
|
107301 |
+
"learning_rate": 0.0001845910650883308,
|
107302 |
+
"loss": 7.5253,
|
107303 |
+
"step": 153060
|
107304 |
+
},
|
107305 |
+
{
|
107306 |
+
"epoch": 18.41997593261131,
|
107307 |
+
"grad_norm": 74.08306884765625,
|
107308 |
+
"learning_rate": 0.00018458903611628615,
|
107309 |
+
"loss": 7.5278,
|
107310 |
+
"step": 153070
|
107311 |
+
},
|
107312 |
+
{
|
107313 |
+
"epoch": 18.42117930204573,
|
107314 |
+
"grad_norm": 147.89413452148438,
|
107315 |
+
"learning_rate": 0.00018458700702182027,
|
107316 |
+
"loss": 7.5596,
|
107317 |
+
"step": 153080
|
107318 |
+
},
|
107319 |
+
{
|
107320 |
+
"epoch": 18.422382671480143,
|
107321 |
+
"grad_norm": 692.1793823242188,
|
107322 |
+
"learning_rate": 0.00018458497780493612,
|
107323 |
+
"loss": 7.5143,
|
107324 |
+
"step": 153090
|
107325 |
+
},
|
107326 |
+
{
|
107327 |
+
"epoch": 18.42358604091456,
|
107328 |
+
"grad_norm": 40.994956970214844,
|
107329 |
+
"learning_rate": 0.0001845829484656366,
|
107330 |
+
"loss": 7.5969,
|
107331 |
+
"step": 153100
|
107332 |
+
},
|
107333 |
+
{
|
107334 |
+
"epoch": 18.42478941034898,
|
107335 |
+
"grad_norm": 9.625137329101562,
|
107336 |
+
"learning_rate": 0.00018458091900392466,
|
107337 |
+
"loss": 7.7019,
|
107338 |
+
"step": 153110
|
107339 |
+
},
|
107340 |
+
{
|
107341 |
+
"epoch": 18.425992779783392,
|
107342 |
+
"grad_norm": 12.490511894226074,
|
107343 |
+
"learning_rate": 0.0001845788894198033,
|
107344 |
+
"loss": 7.6813,
|
107345 |
+
"step": 153120
|
107346 |
+
},
|
107347 |
+
{
|
107348 |
+
"epoch": 18.42719614921781,
|
107349 |
+
"grad_norm": 8.232507705688477,
|
107350 |
+
"learning_rate": 0.00018457685971327536,
|
107351 |
+
"loss": 7.5214,
|
107352 |
+
"step": 153130
|
107353 |
+
},
|
107354 |
+
{
|
107355 |
+
"epoch": 18.428399518652228,
|
107356 |
+
"grad_norm": 34.846214294433594,
|
107357 |
+
"learning_rate": 0.00018457482988434385,
|
107358 |
+
"loss": 7.4904,
|
107359 |
+
"step": 153140
|
107360 |
+
},
|
107361 |
+
{
|
107362 |
+
"epoch": 18.42960288808664,
|
107363 |
+
"grad_norm": 413.77508544921875,
|
107364 |
+
"learning_rate": 0.00018457279993301166,
|
107365 |
+
"loss": 7.5764,
|
107366 |
+
"step": 153150
|
107367 |
+
},
|
107368 |
+
{
|
107369 |
+
"epoch": 18.43080625752106,
|
107370 |
+
"grad_norm": 290.9382629394531,
|
107371 |
+
"learning_rate": 0.00018457076985928177,
|
107372 |
+
"loss": 7.7764,
|
107373 |
+
"step": 153160
|
107374 |
+
},
|
107375 |
+
{
|
107376 |
+
"epoch": 18.432009626955477,
|
107377 |
+
"grad_norm": 1067.5765380859375,
|
107378 |
+
"learning_rate": 0.00018456873966315708,
|
107379 |
+
"loss": 7.7062,
|
107380 |
+
"step": 153170
|
107381 |
+
},
|
107382 |
+
{
|
107383 |
+
"epoch": 18.43321299638989,
|
107384 |
+
"grad_norm": 1371.8919677734375,
|
107385 |
+
"learning_rate": 0.00018456670934464056,
|
107386 |
+
"loss": 7.6137,
|
107387 |
+
"step": 153180
|
107388 |
+
},
|
107389 |
+
{
|
107390 |
+
"epoch": 18.43441636582431,
|
107391 |
+
"grad_norm": 526.41357421875,
|
107392 |
+
"learning_rate": 0.0001845646789037351,
|
107393 |
+
"loss": 7.5463,
|
107394 |
+
"step": 153190
|
107395 |
+
},
|
107396 |
+
{
|
107397 |
+
"epoch": 18.435619735258726,
|
107398 |
+
"grad_norm": 35714.6171875,
|
107399 |
+
"learning_rate": 0.0001845626483404437,
|
107400 |
+
"loss": 7.6021,
|
107401 |
+
"step": 153200
|
107402 |
+
},
|
107403 |
+
{
|
107404 |
+
"epoch": 18.43682310469314,
|
107405 |
+
"grad_norm": 107209.7421875,
|
107406 |
+
"learning_rate": 0.00018456061765476927,
|
107407 |
+
"loss": 7.9262,
|
107408 |
+
"step": 153210
|
107409 |
+
},
|
107410 |
+
{
|
107411 |
+
"epoch": 18.438026474127557,
|
107412 |
+
"grad_norm": 290236160.0,
|
107413 |
+
"learning_rate": 0.00018455858684671475,
|
107414 |
+
"loss": 8.4073,
|
107415 |
+
"step": 153220
|
107416 |
+
},
|
107417 |
+
{
|
107418 |
+
"epoch": 18.439229843561975,
|
107419 |
+
"grad_norm": 49486820.0,
|
107420 |
+
"learning_rate": 0.00018455655591628306,
|
107421 |
+
"loss": 8.7149,
|
107422 |
+
"step": 153230
|
107423 |
+
},
|
107424 |
+
{
|
107425 |
+
"epoch": 18.44043321299639,
|
107426 |
+
"grad_norm": 354431008.0,
|
107427 |
+
"learning_rate": 0.00018455452486347714,
|
107428 |
+
"loss": 9.1406,
|
107429 |
+
"step": 153240
|
107430 |
+
},
|
107431 |
+
{
|
107432 |
+
"epoch": 18.441636582430807,
|
107433 |
+
"grad_norm": 952564928.0,
|
107434 |
+
"learning_rate": 0.00018455249368829998,
|
107435 |
+
"loss": 9.4997,
|
107436 |
+
"step": 153250
|
107437 |
+
},
|
107438 |
+
{
|
107439 |
+
"epoch": 18.442839951865224,
|
107440 |
+
"grad_norm": 343560960.0,
|
107441 |
+
"learning_rate": 0.0001845504623907545,
|
107442 |
+
"loss": 9.6089,
|
107443 |
+
"step": 153260
|
107444 |
+
},
|
107445 |
+
{
|
107446 |
+
"epoch": 18.444043321299638,
|
107447 |
+
"grad_norm": 907001152.0,
|
107448 |
+
"learning_rate": 0.0001845484309708436,
|
107449 |
+
"loss": 9.5405,
|
107450 |
+
"step": 153270
|
107451 |
+
},
|
107452 |
+
{
|
107453 |
+
"epoch": 18.445246690734056,
|
107454 |
+
"grad_norm": 1099533184.0,
|
107455 |
+
"learning_rate": 0.00018454639942857025,
|
107456 |
+
"loss": 9.7289,
|
107457 |
+
"step": 153280
|
107458 |
+
},
|
107459 |
+
{
|
107460 |
+
"epoch": 18.446450060168473,
|
107461 |
+
"grad_norm": 401878048.0,
|
107462 |
+
"learning_rate": 0.00018454436776393739,
|
107463 |
+
"loss": 9.5961,
|
107464 |
+
"step": 153290
|
107465 |
+
},
|
107466 |
+
{
|
107467 |
+
"epoch": 18.447653429602887,
|
107468 |
+
"grad_norm": 393140160.0,
|
107469 |
+
"learning_rate": 0.00018454233597694795,
|
107470 |
+
"loss": 9.6738,
|
107471 |
+
"step": 153300
|
107472 |
+
},
|
107473 |
+
{
|
107474 |
+
"epoch": 18.448856799037305,
|
107475 |
+
"grad_norm": 110440016.0,
|
107476 |
+
"learning_rate": 0.00018454030406760488,
|
107477 |
+
"loss": 9.7425,
|
107478 |
+
"step": 153310
|
107479 |
+
},
|
107480 |
+
{
|
107481 |
+
"epoch": 18.450060168471722,
|
107482 |
+
"grad_norm": 227057136.0,
|
107483 |
+
"learning_rate": 0.0001845382720359111,
|
107484 |
+
"loss": 9.9498,
|
107485 |
+
"step": 153320
|
107486 |
+
},
|
107487 |
+
{
|
107488 |
+
"epoch": 18.451263537906136,
|
107489 |
+
"grad_norm": 189533648.0,
|
107490 |
+
"learning_rate": 0.0001845362398818696,
|
107491 |
+
"loss": 10.2202,
|
107492 |
+
"step": 153330
|
107493 |
+
},
|
107494 |
+
{
|
107495 |
+
"epoch": 18.452466907340554,
|
107496 |
+
"grad_norm": 144564608.0,
|
107497 |
+
"learning_rate": 0.00018453420760548327,
|
107498 |
+
"loss": 10.5441,
|
107499 |
+
"step": 153340
|
107500 |
+
},
|
107501 |
+
{
|
107502 |
+
"epoch": 18.45367027677497,
|
107503 |
+
"grad_norm": 158687680.0,
|
107504 |
+
"learning_rate": 0.00018453217520675508,
|
107505 |
+
"loss": 10.4903,
|
107506 |
+
"step": 153350
|
107507 |
+
},
|
107508 |
+
{
|
107509 |
+
"epoch": 18.454873646209386,
|
107510 |
+
"grad_norm": 84126192.0,
|
107511 |
+
"learning_rate": 0.00018453014268568797,
|
107512 |
+
"loss": 10.4288,
|
107513 |
+
"step": 153360
|
107514 |
+
},
|
107515 |
+
{
|
107516 |
+
"epoch": 18.456077015643803,
|
107517 |
+
"grad_norm": 347420544.0,
|
107518 |
+
"learning_rate": 0.00018452811004228485,
|
107519 |
+
"loss": 10.4237,
|
107520 |
+
"step": 153370
|
107521 |
+
},
|
107522 |
+
{
|
107523 |
+
"epoch": 18.45728038507822,
|
107524 |
+
"grad_norm": 705729536.0,
|
107525 |
+
"learning_rate": 0.0001845260772765487,
|
107526 |
+
"loss": 10.4202,
|
107527 |
+
"step": 153380
|
107528 |
+
},
|
107529 |
+
{
|
107530 |
+
"epoch": 18.458483754512635,
|
107531 |
+
"grad_norm": 447884192.0,
|
107532 |
+
"learning_rate": 0.00018452404438848246,
|
107533 |
+
"loss": 10.3371,
|
107534 |
+
"step": 153390
|
107535 |
+
},
|
107536 |
+
{
|
107537 |
+
"epoch": 18.459687123947052,
|
107538 |
+
"grad_norm": 319394624.0,
|
107539 |
+
"learning_rate": 0.00018452201137808904,
|
107540 |
+
"loss": 10.5163,
|
107541 |
+
"step": 153400
|
107542 |
+
},
|
107543 |
+
{
|
107544 |
+
"epoch": 18.460890493381466,
|
107545 |
+
"grad_norm": 307767616.0,
|
107546 |
+
"learning_rate": 0.00018451997824537139,
|
107547 |
+
"loss": 10.3893,
|
107548 |
+
"step": 153410
|
107549 |
+
},
|
107550 |
+
{
|
107551 |
+
"epoch": 18.462093862815884,
|
107552 |
+
"grad_norm": 18004549632.0,
|
107553 |
+
"learning_rate": 0.00018451794499033247,
|
107554 |
+
"loss": 10.6187,
|
107555 |
+
"step": 153420
|
107556 |
+
},
|
107557 |
+
{
|
107558 |
+
"epoch": 18.4632972322503,
|
107559 |
+
"grad_norm": 2387283456.0,
|
107560 |
+
"learning_rate": 0.00018451591161297524,
|
107561 |
+
"loss": 10.5182,
|
107562 |
+
"step": 153430
|
107563 |
+
},
|
107564 |
+
{
|
107565 |
+
"epoch": 18.464500601684716,
|
107566 |
+
"grad_norm": 256399568.0,
|
107567 |
+
"learning_rate": 0.00018451387811330264,
|
107568 |
+
"loss": 10.6427,
|
107569 |
+
"step": 153440
|
107570 |
+
},
|
107571 |
+
{
|
107572 |
+
"epoch": 18.465703971119133,
|
107573 |
+
"grad_norm": 226842784.0,
|
107574 |
+
"learning_rate": 0.00018451184449131756,
|
107575 |
+
"loss": 10.4203,
|
107576 |
+
"step": 153450
|
107577 |
+
},
|
107578 |
+
{
|
107579 |
+
"epoch": 18.46690734055355,
|
107580 |
+
"grad_norm": 15965370.0,
|
107581 |
+
"learning_rate": 0.00018450981074702295,
|
107582 |
+
"loss": 10.6075,
|
107583 |
+
"step": 153460
|
107584 |
+
},
|
107585 |
+
{
|
107586 |
+
"epoch": 18.468110709987965,
|
107587 |
+
"grad_norm": 38252532.0,
|
107588 |
+
"learning_rate": 0.0001845077768804218,
|
107589 |
+
"loss": 10.7387,
|
107590 |
+
"step": 153470
|
107591 |
+
},
|
107592 |
+
{
|
107593 |
+
"epoch": 18.469314079422382,
|
107594 |
+
"grad_norm": 34838764.0,
|
107595 |
+
"learning_rate": 0.00018450574289151706,
|
107596 |
+
"loss": 10.9605,
|
107597 |
+
"step": 153480
|
107598 |
+
},
|
107599 |
+
{
|
107600 |
+
"epoch": 18.4705174488568,
|
107601 |
+
"grad_norm": 62735660.0,
|
107602 |
+
"learning_rate": 0.0001845037087803116,
|
107603 |
+
"loss": 10.692,
|
107604 |
+
"step": 153490
|
107605 |
+
},
|
107606 |
+
{
|
107607 |
+
"epoch": 18.471720818291214,
|
107608 |
+
"grad_norm": 30423376.0,
|
107609 |
+
"learning_rate": 0.00018450167454680844,
|
107610 |
+
"loss": 10.3002,
|
107611 |
+
"step": 153500
|
107612 |
+
},
|
107613 |
+
{
|
107614 |
+
"epoch": 18.47292418772563,
|
107615 |
+
"grad_norm": 1632257.75,
|
107616 |
+
"learning_rate": 0.0001844996401910105,
|
107617 |
+
"loss": 10.3527,
|
107618 |
+
"step": 153510
|
107619 |
+
},
|
107620 |
+
{
|
107621 |
+
"epoch": 18.47412755716005,
|
107622 |
+
"grad_norm": 277744.25,
|
107623 |
+
"learning_rate": 0.00018449760571292073,
|
107624 |
+
"loss": 10.359,
|
107625 |
+
"step": 153520
|
107626 |
+
},
|
107627 |
+
{
|
107628 |
+
"epoch": 18.475330926594463,
|
107629 |
+
"grad_norm": 1733779.25,
|
107630 |
+
"learning_rate": 0.000184495571112542,
|
107631 |
+
"loss": 10.1695,
|
107632 |
+
"step": 153530
|
107633 |
+
},
|
107634 |
+
{
|
107635 |
+
"epoch": 18.47653429602888,
|
107636 |
+
"grad_norm": 903284.5,
|
107637 |
+
"learning_rate": 0.00018449353638987738,
|
107638 |
+
"loss": 9.9837,
|
107639 |
+
"step": 153540
|
107640 |
+
},
|
107641 |
+
{
|
107642 |
+
"epoch": 18.477737665463298,
|
107643 |
+
"grad_norm": 1033326.5,
|
107644 |
+
"learning_rate": 0.00018449150154492973,
|
107645 |
+
"loss": 10.4487,
|
107646 |
+
"step": 153550
|
107647 |
+
},
|
107648 |
+
{
|
107649 |
+
"epoch": 18.478941034897712,
|
107650 |
+
"grad_norm": 686108.9375,
|
107651 |
+
"learning_rate": 0.00018448946657770199,
|
107652 |
+
"loss": 10.8624,
|
107653 |
+
"step": 153560
|
107654 |
+
},
|
107655 |
+
{
|
107656 |
+
"epoch": 18.48014440433213,
|
107657 |
+
"grad_norm": 272341.71875,
|
107658 |
+
"learning_rate": 0.00018448743148819717,
|
107659 |
+
"loss": 10.9326,
|
107660 |
+
"step": 153570
|
107661 |
+
},
|
107662 |
+
{
|
107663 |
+
"epoch": 18.481347773766547,
|
107664 |
+
"grad_norm": 7521502232576.0,
|
107665 |
+
"learning_rate": 0.00018448539627641817,
|
107666 |
+
"loss": 10.9476,
|
107667 |
+
"step": 153580
|
107668 |
+
},
|
107669 |
+
{
|
107670 |
+
"epoch": 18.48255114320096,
|
107671 |
+
"grad_norm": 5581615661056.0,
|
107672 |
+
"learning_rate": 0.00018448336094236792,
|
107673 |
+
"loss": 10.5312,
|
107674 |
+
"step": 153590
|
107675 |
+
},
|
107676 |
+
{
|
107677 |
+
"epoch": 18.48375451263538,
|
107678 |
+
"grad_norm": 35045565792256.0,
|
107679 |
+
"learning_rate": 0.00018448132548604942,
|
107680 |
+
"loss": 10.2854,
|
107681 |
+
"step": 153600
|
107682 |
+
},
|
107683 |
+
{
|
107684 |
+
"epoch": 18.484957882069796,
|
107685 |
+
"grad_norm": 4497267490816.0,
|
107686 |
+
"learning_rate": 0.00018447928990746558,
|
107687 |
+
"loss": 10.3303,
|
107688 |
+
"step": 153610
|
107689 |
+
},
|
107690 |
+
{
|
107691 |
+
"epoch": 18.48616125150421,
|
107692 |
+
"grad_norm": 19079746289664.0,
|
107693 |
+
"learning_rate": 0.00018447725420661933,
|
107694 |
+
"loss": 10.5231,
|
107695 |
+
"step": 153620
|
107696 |
+
},
|
107697 |
+
{
|
107698 |
+
"epoch": 18.487364620938628,
|
107699 |
+
"grad_norm": 16816830152704.0,
|
107700 |
+
"learning_rate": 0.00018447521838351364,
|
107701 |
+
"loss": 10.6547,
|
107702 |
+
"step": 153630
|
107703 |
+
},
|
107704 |
+
{
|
107705 |
+
"epoch": 18.488567990373046,
|
107706 |
+
"grad_norm": 34880473792512.0,
|
107707 |
+
"learning_rate": 0.00018447318243815147,
|
107708 |
+
"loss": 10.7887,
|
107709 |
+
"step": 153640
|
107710 |
+
},
|
107711 |
+
{
|
107712 |
+
"epoch": 18.48977135980746,
|
107713 |
+
"grad_norm": 25699289088.0,
|
107714 |
+
"learning_rate": 0.00018447114637053572,
|
107715 |
+
"loss": 10.8947,
|
107716 |
+
"step": 153650
|
107717 |
+
},
|
107718 |
+
{
|
107719 |
+
"epoch": 18.490974729241877,
|
107720 |
+
"grad_norm": 49615843328.0,
|
107721 |
+
"learning_rate": 0.0001844691101806694,
|
107722 |
+
"loss": 10.9466,
|
107723 |
+
"step": 153660
|
107724 |
+
},
|
107725 |
+
{
|
107726 |
+
"epoch": 18.492178098676295,
|
107727 |
+
"grad_norm": 63240937472.0,
|
107728 |
+
"learning_rate": 0.0001844670738685554,
|
107729 |
+
"loss": 11.0833,
|
107730 |
+
"step": 153670
|
107731 |
+
},
|
107732 |
+
{
|
107733 |
+
"epoch": 18.49338146811071,
|
107734 |
+
"grad_norm": Infinity,
|
107735 |
+
"learning_rate": 0.0001844650374341967,
|
107736 |
+
"loss": 11.1395,
|
107737 |
+
"step": 153680
|
107738 |
+
},
|
107739 |
+
{
|
107740 |
+
"epoch": 18.494584837545126,
|
107741 |
+
"grad_norm": Infinity,
|
107742 |
+
"learning_rate": 0.00018446300087759623,
|
107743 |
+
"loss": 11.1156,
|
107744 |
+
"step": 153690
|
107745 |
+
},
|
107746 |
+
{
|
107747 |
+
"epoch": 18.495788206979544,
|
107748 |
+
"grad_norm": Infinity,
|
107749 |
+
"learning_rate": 0.00018446096419875695,
|
107750 |
+
"loss": 11.1737,
|
107751 |
+
"step": 153700
|
107752 |
+
},
|
107753 |
+
{
|
107754 |
+
"epoch": 18.496991576413958,
|
107755 |
+
"grad_norm": Infinity,
|
107756 |
+
"learning_rate": 0.00018445892739768182,
|
107757 |
+
"loss": 11.1832,
|
107758 |
+
"step": 153710
|
107759 |
+
},
|
107760 |
+
{
|
107761 |
+
"epoch": 18.498194945848375,
|
107762 |
+
"grad_norm": Infinity,
|
107763 |
+
"learning_rate": 0.00018445689047437376,
|
107764 |
+
"loss": 11.184,
|
107765 |
+
"step": 153720
|
107766 |
+
},
|
107767 |
+
{
|
107768 |
+
"epoch": 18.499398315282793,
|
107769 |
+
"grad_norm": Infinity,
|
107770 |
+
"learning_rate": 0.0001844548534288357,
|
107771 |
+
"loss": 11.2043,
|
107772 |
+
"step": 153730
|
107773 |
}
|
107774 |
],
|
107775 |
"logging_steps": 10,
|
|
|
107789 |
"attributes": {}
|
107790 |
}
|
107791 |
},
|
107792 |
+
"total_flos": 3.2323343541969355e+19,
|
107793 |
"train_batch_size": 1,
|
107794 |
"trial_name": null,
|
107795 |
"trial_params": null
|