willtensora commited on
Commit
a0be085
·
verified ·
1 Parent(s): d3f59c2

Training in progress, step 157890, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:883791f7abc6e83c8e078106f39af0c6dbda2d78e338250d65b667eb4bb06dcc
3
  size 627606952
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1088d85bad76e35cb6c18e7734c2c2df37b0f09d0d513cd23193692801b8e919
3
  size 627606952
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9bbf029fe349e864ea24f1aa543363bd07868a06eaec1a92cdffa42a099472c3
3
  size 318986436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:74fea6dfe87ac00874c7a8077e9de8baf19330d55fbcf21369d50ffdd7952746
3
  size 318986436
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b86ef01c6375796fdd1e8c22f1578cb8d4b3b030168886f5664505c5f70ec251
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:908173892e21f0448410767027e18a92997b1ed2af990e2d2c4310d10a259e8d
3
  size 15984
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:91172b4328e968365f3e7c73829064deda13714a27c947fc1311c6feaa062ba9
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f67794cf32b5607e3c7c900f9ec980e9ddeb5878f1ecf87202a26f9f7ec578df
3
  size 15984
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a959582076a6ac61743f8b360375823c0a22d57c5299460727134171480c2c4a
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5d73dca6b24dc149aa09b998461a4ad6f5693f3022488555c563abdd18b0bd07
3
  size 15984
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ec8b44284719a200b8836519e2b619f44ab7ee36a92040ba68d4511ee7490bd5
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd682e7c95609633443dbf166e5225e8744766ccd1c3ac9a8a206552b0d19437
3
  size 15984
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ed98150df53faedb2f7ab733ab95a1eb627722350e287b316cd3f55225249d45
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:69fb71396e38b5278906060269440d2471ddeaa97d12505747b0e67c0969d3bd
3
  size 15984
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1c56fb323d2dfc786780918b0a3871635690d33d2b53c6f50f058cdc915c9879
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ac676368096395c9abcdc2eb24514144adc29fa422f2ccb1a132f9486edc674
3
  size 15984
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:54642cb8b6edcb0379f079a97a73cd798989d2033a83d539e2f7aa6e46426951
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:51c1b684147f6e3b2a7f0206497e7491505db91cfdd5d1908cfea9ae634ec7ed
3
  size 15984
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:89e4108dfcd1cb85bb52febcf2d1838b369309a5b7aec71b4c24c537d77475c2
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:993e06522ed60c0b873ce20890d06629f64e5c63c610b8979cd6428c8ce358d5
3
  size 15984
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:566f22a287ae39f3b9654b0727421444cfffea626ed07bee5045c2aaca718e53
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd46dcde3e0e95c0d471f441b283d1d9f5c9aaae4854865d35095a1f5c249f2e
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 18.5,
5
  "eval_steps": 8310,
6
- "global_step": 153735,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -107770,6 +107770,2926 @@
107770
  "learning_rate": 0.0001844548534288357,
107771
  "loss": 11.2043,
107772
  "step": 153730
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
107773
  }
107774
  ],
107775
  "logging_steps": 10,
@@ -107789,7 +110709,7 @@
107789
  "attributes": {}
107790
  }
107791
  },
107792
- "total_flos": 3.2323343541969355e+19,
107793
  "train_batch_size": 1,
107794
  "trial_name": null,
107795
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 19.0,
5
  "eval_steps": 8310,
6
+ "global_step": 157890,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
107770
  "learning_rate": 0.0001844548534288357,
107771
  "loss": 11.2043,
107772
  "step": 153730
107773
+ },
107774
+ {
107775
+ "epoch": 18.500601684717207,
107776
+ "grad_norm": Infinity,
107777
+ "learning_rate": 0.00018445281626107063,
107778
+ "loss": 11.0292,
107779
+ "step": 153740
107780
+ },
107781
+ {
107782
+ "epoch": 18.501805054151625,
107783
+ "grad_norm": Infinity,
107784
+ "learning_rate": 0.0001844507789710815,
107785
+ "loss": 11.0998,
107786
+ "step": 153750
107787
+ },
107788
+ {
107789
+ "epoch": 18.503008423586042,
107790
+ "grad_norm": Infinity,
107791
+ "learning_rate": 0.00018444874155887123,
107792
+ "loss": 11.1909,
107793
+ "step": 153760
107794
+ },
107795
+ {
107796
+ "epoch": 18.504211793020456,
107797
+ "grad_norm": Infinity,
107798
+ "learning_rate": 0.00018444670402444278,
107799
+ "loss": 11.1862,
107800
+ "step": 153770
107801
+ },
107802
+ {
107803
+ "epoch": 18.505415162454874,
107804
+ "grad_norm": Infinity,
107805
+ "learning_rate": 0.0001844446663677991,
107806
+ "loss": 11.2138,
107807
+ "step": 153780
107808
+ },
107809
+ {
107810
+ "epoch": 18.50661853188929,
107811
+ "grad_norm": Infinity,
107812
+ "learning_rate": 0.00018444262858894312,
107813
+ "loss": 11.1951,
107814
+ "step": 153790
107815
+ },
107816
+ {
107817
+ "epoch": 18.507821901323705,
107818
+ "grad_norm": Infinity,
107819
+ "learning_rate": 0.00018444059068787786,
107820
+ "loss": 11.0419,
107821
+ "step": 153800
107822
+ },
107823
+ {
107824
+ "epoch": 18.509025270758123,
107825
+ "grad_norm": Infinity,
107826
+ "learning_rate": 0.0001844385526646062,
107827
+ "loss": 11.0918,
107828
+ "step": 153810
107829
+ },
107830
+ {
107831
+ "epoch": 18.51022864019254,
107832
+ "grad_norm": Infinity,
107833
+ "learning_rate": 0.00018443651451913108,
107834
+ "loss": 11.1833,
107835
+ "step": 153820
107836
+ },
107837
+ {
107838
+ "epoch": 18.511432009626954,
107839
+ "grad_norm": Infinity,
107840
+ "learning_rate": 0.00018443447625145548,
107841
+ "loss": 11.2276,
107842
+ "step": 153830
107843
+ },
107844
+ {
107845
+ "epoch": 18.512635379061372,
107846
+ "grad_norm": Infinity,
107847
+ "learning_rate": 0.00018443243786158236,
107848
+ "loss": 11.2327,
107849
+ "step": 153840
107850
+ },
107851
+ {
107852
+ "epoch": 18.51383874849579,
107853
+ "grad_norm": Infinity,
107854
+ "learning_rate": 0.00018443039934951466,
107855
+ "loss": 11.0014,
107856
+ "step": 153850
107857
+ },
107858
+ {
107859
+ "epoch": 18.515042117930204,
107860
+ "grad_norm": Infinity,
107861
+ "learning_rate": 0.00018442836071525532,
107862
+ "loss": 11.2101,
107863
+ "step": 153860
107864
+ },
107865
+ {
107866
+ "epoch": 18.51624548736462,
107867
+ "grad_norm": Infinity,
107868
+ "learning_rate": 0.00018442632195880728,
107869
+ "loss": 11.1144,
107870
+ "step": 153870
107871
+ },
107872
+ {
107873
+ "epoch": 18.51744885679904,
107874
+ "grad_norm": Infinity,
107875
+ "learning_rate": 0.00018442428308017354,
107876
+ "loss": 11.1024,
107877
+ "step": 153880
107878
+ },
107879
+ {
107880
+ "epoch": 18.518652226233453,
107881
+ "grad_norm": Infinity,
107882
+ "learning_rate": 0.000184422244079357,
107883
+ "loss": 11.1752,
107884
+ "step": 153890
107885
+ },
107886
+ {
107887
+ "epoch": 18.51985559566787,
107888
+ "grad_norm": Infinity,
107889
+ "learning_rate": 0.00018442020495636067,
107890
+ "loss": 11.108,
107891
+ "step": 153900
107892
+ },
107893
+ {
107894
+ "epoch": 18.521058965102288,
107895
+ "grad_norm": Infinity,
107896
+ "learning_rate": 0.00018441816571118742,
107897
+ "loss": 11.1124,
107898
+ "step": 153910
107899
+ },
107900
+ {
107901
+ "epoch": 18.522262334536702,
107902
+ "grad_norm": Infinity,
107903
+ "learning_rate": 0.00018441612634384025,
107904
+ "loss": 11.1661,
107905
+ "step": 153920
107906
+ },
107907
+ {
107908
+ "epoch": 18.52346570397112,
107909
+ "grad_norm": Infinity,
107910
+ "learning_rate": 0.0001844140868543221,
107911
+ "loss": 11.1174,
107912
+ "step": 153930
107913
+ },
107914
+ {
107915
+ "epoch": 18.524669073405537,
107916
+ "grad_norm": Infinity,
107917
+ "learning_rate": 0.00018441204724263592,
107918
+ "loss": 11.0932,
107919
+ "step": 153940
107920
+ },
107921
+ {
107922
+ "epoch": 18.52587244283995,
107923
+ "grad_norm": Infinity,
107924
+ "learning_rate": 0.00018441000750878467,
107925
+ "loss": 11.1473,
107926
+ "step": 153950
107927
+ },
107928
+ {
107929
+ "epoch": 18.52707581227437,
107930
+ "grad_norm": Infinity,
107931
+ "learning_rate": 0.0001844079676527713,
107932
+ "loss": 11.1022,
107933
+ "step": 153960
107934
+ },
107935
+ {
107936
+ "epoch": 18.528279181708786,
107937
+ "grad_norm": Infinity,
107938
+ "learning_rate": 0.00018440592767459877,
107939
+ "loss": 11.0514,
107940
+ "step": 153970
107941
+ },
107942
+ {
107943
+ "epoch": 18.5294825511432,
107944
+ "grad_norm": Infinity,
107945
+ "learning_rate": 0.00018440388757427,
107946
+ "loss": 11.1552,
107947
+ "step": 153980
107948
+ },
107949
+ {
107950
+ "epoch": 18.530685920577618,
107951
+ "grad_norm": Infinity,
107952
+ "learning_rate": 0.000184401847351788,
107953
+ "loss": 11.0548,
107954
+ "step": 153990
107955
+ },
107956
+ {
107957
+ "epoch": 18.531889290012035,
107958
+ "grad_norm": Infinity,
107959
+ "learning_rate": 0.00018439980700715568,
107960
+ "loss": 11.1378,
107961
+ "step": 154000
107962
+ },
107963
+ {
107964
+ "epoch": 18.53309265944645,
107965
+ "grad_norm": Infinity,
107966
+ "learning_rate": 0.000184397766540376,
107967
+ "loss": 11.1906,
107968
+ "step": 154010
107969
+ },
107970
+ {
107971
+ "epoch": 18.534296028880867,
107972
+ "grad_norm": Infinity,
107973
+ "learning_rate": 0.0001843957259514519,
107974
+ "loss": 11.0453,
107975
+ "step": 154020
107976
+ },
107977
+ {
107978
+ "epoch": 18.535499398315284,
107979
+ "grad_norm": Infinity,
107980
+ "learning_rate": 0.00018439368524038633,
107981
+ "loss": 11.2196,
107982
+ "step": 154030
107983
+ },
107984
+ {
107985
+ "epoch": 18.5367027677497,
107986
+ "grad_norm": Infinity,
107987
+ "learning_rate": 0.0001843916444071823,
107988
+ "loss": 11.2259,
107989
+ "step": 154040
107990
+ },
107991
+ {
107992
+ "epoch": 18.537906137184116,
107993
+ "grad_norm": Infinity,
107994
+ "learning_rate": 0.00018438960345184272,
107995
+ "loss": 11.239,
107996
+ "step": 154050
107997
+ },
107998
+ {
107999
+ "epoch": 18.53910950661853,
108000
+ "grad_norm": Infinity,
108001
+ "learning_rate": 0.00018438756237437054,
108002
+ "loss": 11.0352,
108003
+ "step": 154060
108004
+ },
108005
+ {
108006
+ "epoch": 18.540312876052948,
108007
+ "grad_norm": Infinity,
108008
+ "learning_rate": 0.00018438552117476872,
108009
+ "loss": 11.2338,
108010
+ "step": 154070
108011
+ },
108012
+ {
108013
+ "epoch": 18.541516245487365,
108014
+ "grad_norm": Infinity,
108015
+ "learning_rate": 0.00018438347985304024,
108016
+ "loss": 11.1626,
108017
+ "step": 154080
108018
+ },
108019
+ {
108020
+ "epoch": 18.54271961492178,
108021
+ "grad_norm": Infinity,
108022
+ "learning_rate": 0.00018438143840918798,
108023
+ "loss": 11.1056,
108024
+ "step": 154090
108025
+ },
108026
+ {
108027
+ "epoch": 18.543922984356197,
108028
+ "grad_norm": Infinity,
108029
+ "learning_rate": 0.00018437939684321498,
108030
+ "loss": 11.297,
108031
+ "step": 154100
108032
+ },
108033
+ {
108034
+ "epoch": 18.545126353790614,
108035
+ "grad_norm": Infinity,
108036
+ "learning_rate": 0.00018437735515512417,
108037
+ "loss": 11.2027,
108038
+ "step": 154110
108039
+ },
108040
+ {
108041
+ "epoch": 18.54632972322503,
108042
+ "grad_norm": Infinity,
108043
+ "learning_rate": 0.00018437531334491848,
108044
+ "loss": 11.1862,
108045
+ "step": 154120
108046
+ },
108047
+ {
108048
+ "epoch": 18.547533092659446,
108049
+ "grad_norm": Infinity,
108050
+ "learning_rate": 0.00018437327141260088,
108051
+ "loss": 11.2691,
108052
+ "step": 154130
108053
+ },
108054
+ {
108055
+ "epoch": 18.548736462093864,
108056
+ "grad_norm": Infinity,
108057
+ "learning_rate": 0.00018437122935817434,
108058
+ "loss": 11.0923,
108059
+ "step": 154140
108060
+ },
108061
+ {
108062
+ "epoch": 18.549939831528278,
108063
+ "grad_norm": Infinity,
108064
+ "learning_rate": 0.0001843691871816418,
108065
+ "loss": 11.1205,
108066
+ "step": 154150
108067
+ },
108068
+ {
108069
+ "epoch": 18.551143200962695,
108070
+ "grad_norm": Infinity,
108071
+ "learning_rate": 0.0001843671448830062,
108072
+ "loss": 11.164,
108073
+ "step": 154160
108074
+ },
108075
+ {
108076
+ "epoch": 18.552346570397113,
108077
+ "grad_norm": Infinity,
108078
+ "learning_rate": 0.0001843651024622705,
108079
+ "loss": 11.1202,
108080
+ "step": 154170
108081
+ },
108082
+ {
108083
+ "epoch": 18.553549939831527,
108084
+ "grad_norm": Infinity,
108085
+ "learning_rate": 0.0001843630599194377,
108086
+ "loss": 11.1886,
108087
+ "step": 154180
108088
+ },
108089
+ {
108090
+ "epoch": 18.554753309265944,
108091
+ "grad_norm": Infinity,
108092
+ "learning_rate": 0.0001843610172545107,
108093
+ "loss": 11.1563,
108094
+ "step": 154190
108095
+ },
108096
+ {
108097
+ "epoch": 18.555956678700362,
108098
+ "grad_norm": Infinity,
108099
+ "learning_rate": 0.00018435897446749248,
108100
+ "loss": 11.1706,
108101
+ "step": 154200
108102
+ },
108103
+ {
108104
+ "epoch": 18.557160048134776,
108105
+ "grad_norm": Infinity,
108106
+ "learning_rate": 0.000184356931558386,
108107
+ "loss": 11.0745,
108108
+ "step": 154210
108109
+ },
108110
+ {
108111
+ "epoch": 18.558363417569193,
108112
+ "grad_norm": Infinity,
108113
+ "learning_rate": 0.00018435488852719422,
108114
+ "loss": 11.1546,
108115
+ "step": 154220
108116
+ },
108117
+ {
108118
+ "epoch": 18.55956678700361,
108119
+ "grad_norm": Infinity,
108120
+ "learning_rate": 0.0001843528453739201,
108121
+ "loss": 11.2963,
108122
+ "step": 154230
108123
+ },
108124
+ {
108125
+ "epoch": 18.560770156438025,
108126
+ "grad_norm": Infinity,
108127
+ "learning_rate": 0.00018435080209856656,
108128
+ "loss": 11.1517,
108129
+ "step": 154240
108130
+ },
108131
+ {
108132
+ "epoch": 18.561973525872443,
108133
+ "grad_norm": Infinity,
108134
+ "learning_rate": 0.0001843487587011366,
108135
+ "loss": 11.1994,
108136
+ "step": 154250
108137
+ },
108138
+ {
108139
+ "epoch": 18.56317689530686,
108140
+ "grad_norm": Infinity,
108141
+ "learning_rate": 0.00018434671518163315,
108142
+ "loss": 11.0373,
108143
+ "step": 154260
108144
+ },
108145
+ {
108146
+ "epoch": 18.564380264741274,
108147
+ "grad_norm": Infinity,
108148
+ "learning_rate": 0.00018434467154005918,
108149
+ "loss": 11.1424,
108150
+ "step": 154270
108151
+ },
108152
+ {
108153
+ "epoch": 18.56558363417569,
108154
+ "grad_norm": Infinity,
108155
+ "learning_rate": 0.00018434262777641767,
108156
+ "loss": 11.2854,
108157
+ "step": 154280
108158
+ },
108159
+ {
108160
+ "epoch": 18.56678700361011,
108161
+ "grad_norm": Infinity,
108162
+ "learning_rate": 0.00018434058389071154,
108163
+ "loss": 11.1538,
108164
+ "step": 154290
108165
+ },
108166
+ {
108167
+ "epoch": 18.567990373044523,
108168
+ "grad_norm": Infinity,
108169
+ "learning_rate": 0.00018433853988294375,
108170
+ "loss": 11.1253,
108171
+ "step": 154300
108172
+ },
108173
+ {
108174
+ "epoch": 18.56919374247894,
108175
+ "grad_norm": Infinity,
108176
+ "learning_rate": 0.0001843364957531173,
108177
+ "loss": 11.1735,
108178
+ "step": 154310
108179
+ },
108180
+ {
108181
+ "epoch": 18.57039711191336,
108182
+ "grad_norm": Infinity,
108183
+ "learning_rate": 0.00018433445150123508,
108184
+ "loss": 11.0829,
108185
+ "step": 154320
108186
+ },
108187
+ {
108188
+ "epoch": 18.571600481347772,
108189
+ "grad_norm": Infinity,
108190
+ "learning_rate": 0.0001843324071273001,
108191
+ "loss": 11.2586,
108192
+ "step": 154330
108193
+ },
108194
+ {
108195
+ "epoch": 18.57280385078219,
108196
+ "grad_norm": Infinity,
108197
+ "learning_rate": 0.00018433036263131532,
108198
+ "loss": 11.1248,
108199
+ "step": 154340
108200
+ },
108201
+ {
108202
+ "epoch": 18.574007220216608,
108203
+ "grad_norm": Infinity,
108204
+ "learning_rate": 0.00018432831801328366,
108205
+ "loss": 11.111,
108206
+ "step": 154350
108207
+ },
108208
+ {
108209
+ "epoch": 18.57521058965102,
108210
+ "grad_norm": Infinity,
108211
+ "learning_rate": 0.00018432627327320816,
108212
+ "loss": 11.0498,
108213
+ "step": 154360
108214
+ },
108215
+ {
108216
+ "epoch": 18.57641395908544,
108217
+ "grad_norm": Infinity,
108218
+ "learning_rate": 0.00018432422841109167,
108219
+ "loss": 11.2345,
108220
+ "step": 154370
108221
+ },
108222
+ {
108223
+ "epoch": 18.577617328519857,
108224
+ "grad_norm": Infinity,
108225
+ "learning_rate": 0.00018432218342693721,
108226
+ "loss": 11.1254,
108227
+ "step": 154380
108228
+ },
108229
+ {
108230
+ "epoch": 18.57882069795427,
108231
+ "grad_norm": Infinity,
108232
+ "learning_rate": 0.00018432013832074776,
108233
+ "loss": 11.2464,
108234
+ "step": 154390
108235
+ },
108236
+ {
108237
+ "epoch": 18.58002406738869,
108238
+ "grad_norm": Infinity,
108239
+ "learning_rate": 0.00018431809309252622,
108240
+ "loss": 11.1221,
108241
+ "step": 154400
108242
+ },
108243
+ {
108244
+ "epoch": 18.581227436823106,
108245
+ "grad_norm": Infinity,
108246
+ "learning_rate": 0.0001843160477422756,
108247
+ "loss": 11.194,
108248
+ "step": 154410
108249
+ },
108250
+ {
108251
+ "epoch": 18.58243080625752,
108252
+ "grad_norm": Infinity,
108253
+ "learning_rate": 0.00018431400226999882,
108254
+ "loss": 11.2833,
108255
+ "step": 154420
108256
+ },
108257
+ {
108258
+ "epoch": 18.583634175691937,
108259
+ "grad_norm": Infinity,
108260
+ "learning_rate": 0.00018431195667569888,
108261
+ "loss": 11.1216,
108262
+ "step": 154430
108263
+ },
108264
+ {
108265
+ "epoch": 18.584837545126355,
108266
+ "grad_norm": Infinity,
108267
+ "learning_rate": 0.00018430991095937872,
108268
+ "loss": 11.0644,
108269
+ "step": 154440
108270
+ },
108271
+ {
108272
+ "epoch": 18.58604091456077,
108273
+ "grad_norm": Infinity,
108274
+ "learning_rate": 0.00018430786512104127,
108275
+ "loss": 11.1347,
108276
+ "step": 154450
108277
+ },
108278
+ {
108279
+ "epoch": 18.587244283995187,
108280
+ "grad_norm": Infinity,
108281
+ "learning_rate": 0.0001843058191606896,
108282
+ "loss": 11.1396,
108283
+ "step": 154460
108284
+ },
108285
+ {
108286
+ "epoch": 18.588447653429604,
108287
+ "grad_norm": Infinity,
108288
+ "learning_rate": 0.00018430377307832653,
108289
+ "loss": 11.1106,
108290
+ "step": 154470
108291
+ },
108292
+ {
108293
+ "epoch": 18.589651022864018,
108294
+ "grad_norm": Infinity,
108295
+ "learning_rate": 0.0001843017268739551,
108296
+ "loss": 11.1095,
108297
+ "step": 154480
108298
+ },
108299
+ {
108300
+ "epoch": 18.590854392298436,
108301
+ "grad_norm": Infinity,
108302
+ "learning_rate": 0.00018429968054757827,
108303
+ "loss": 11.1176,
108304
+ "step": 154490
108305
+ },
108306
+ {
108307
+ "epoch": 18.592057761732853,
108308
+ "grad_norm": Infinity,
108309
+ "learning_rate": 0.00018429763409919897,
108310
+ "loss": 11.0243,
108311
+ "step": 154500
108312
+ },
108313
+ {
108314
+ "epoch": 18.593261131167267,
108315
+ "grad_norm": Infinity,
108316
+ "learning_rate": 0.0001842955875288202,
108317
+ "loss": 11.0968,
108318
+ "step": 154510
108319
+ },
108320
+ {
108321
+ "epoch": 18.594464500601685,
108322
+ "grad_norm": Infinity,
108323
+ "learning_rate": 0.0001842935408364449,
108324
+ "loss": 11.139,
108325
+ "step": 154520
108326
+ },
108327
+ {
108328
+ "epoch": 18.595667870036102,
108329
+ "grad_norm": Infinity,
108330
+ "learning_rate": 0.00018429149402207604,
108331
+ "loss": 11.2446,
108332
+ "step": 154530
108333
+ },
108334
+ {
108335
+ "epoch": 18.596871239470516,
108336
+ "grad_norm": Infinity,
108337
+ "learning_rate": 0.00018428944708571656,
108338
+ "loss": 11.1045,
108339
+ "step": 154540
108340
+ },
108341
+ {
108342
+ "epoch": 18.598074608904934,
108343
+ "grad_norm": Infinity,
108344
+ "learning_rate": 0.00018428740002736946,
108345
+ "loss": 11.2959,
108346
+ "step": 154550
108347
+ },
108348
+ {
108349
+ "epoch": 18.59927797833935,
108350
+ "grad_norm": Infinity,
108351
+ "learning_rate": 0.00018428535284703768,
108352
+ "loss": 11.1471,
108353
+ "step": 154560
108354
+ },
108355
+ {
108356
+ "epoch": 18.600481347773766,
108357
+ "grad_norm": Infinity,
108358
+ "learning_rate": 0.00018428330554472416,
108359
+ "loss": 11.1321,
108360
+ "step": 154570
108361
+ },
108362
+ {
108363
+ "epoch": 18.601684717208183,
108364
+ "grad_norm": Infinity,
108365
+ "learning_rate": 0.00018428125812043194,
108366
+ "loss": 11.1653,
108367
+ "step": 154580
108368
+ },
108369
+ {
108370
+ "epoch": 18.6028880866426,
108371
+ "grad_norm": Infinity,
108372
+ "learning_rate": 0.00018427921057416388,
108373
+ "loss": 11.0967,
108374
+ "step": 154590
108375
+ },
108376
+ {
108377
+ "epoch": 18.604091456077015,
108378
+ "grad_norm": Infinity,
108379
+ "learning_rate": 0.00018427716290592302,
108380
+ "loss": 11.0111,
108381
+ "step": 154600
108382
+ },
108383
+ {
108384
+ "epoch": 18.605294825511432,
108385
+ "grad_norm": Infinity,
108386
+ "learning_rate": 0.00018427511511571228,
108387
+ "loss": 11.1689,
108388
+ "step": 154610
108389
+ },
108390
+ {
108391
+ "epoch": 18.60649819494585,
108392
+ "grad_norm": Infinity,
108393
+ "learning_rate": 0.00018427306720353466,
108394
+ "loss": 11.1187,
108395
+ "step": 154620
108396
+ },
108397
+ {
108398
+ "epoch": 18.607701564380264,
108399
+ "grad_norm": Infinity,
108400
+ "learning_rate": 0.00018427101916939308,
108401
+ "loss": 11.2486,
108402
+ "step": 154630
108403
+ },
108404
+ {
108405
+ "epoch": 18.60890493381468,
108406
+ "grad_norm": Infinity,
108407
+ "learning_rate": 0.00018426897101329054,
108408
+ "loss": 11.0974,
108409
+ "step": 154640
108410
+ },
108411
+ {
108412
+ "epoch": 18.6101083032491,
108413
+ "grad_norm": Infinity,
108414
+ "learning_rate": 0.00018426692273523,
108415
+ "loss": 11.228,
108416
+ "step": 154650
108417
+ },
108418
+ {
108419
+ "epoch": 18.611311672683513,
108420
+ "grad_norm": Infinity,
108421
+ "learning_rate": 0.00018426487433521445,
108422
+ "loss": 11.1379,
108423
+ "step": 154660
108424
+ },
108425
+ {
108426
+ "epoch": 18.61251504211793,
108427
+ "grad_norm": Infinity,
108428
+ "learning_rate": 0.00018426282581324678,
108429
+ "loss": 11.1701,
108430
+ "step": 154670
108431
+ },
108432
+ {
108433
+ "epoch": 18.613718411552348,
108434
+ "grad_norm": Infinity,
108435
+ "learning_rate": 0.00018426077716932998,
108436
+ "loss": 11.1545,
108437
+ "step": 154680
108438
+ },
108439
+ {
108440
+ "epoch": 18.614921780986762,
108441
+ "grad_norm": Infinity,
108442
+ "learning_rate": 0.00018425872840346707,
108443
+ "loss": 11.1638,
108444
+ "step": 154690
108445
+ },
108446
+ {
108447
+ "epoch": 18.61612515042118,
108448
+ "grad_norm": Infinity,
108449
+ "learning_rate": 0.00018425667951566096,
108450
+ "loss": 11.1699,
108451
+ "step": 154700
108452
+ },
108453
+ {
108454
+ "epoch": 18.617328519855597,
108455
+ "grad_norm": Infinity,
108456
+ "learning_rate": 0.00018425463050591464,
108457
+ "loss": 11.2327,
108458
+ "step": 154710
108459
+ },
108460
+ {
108461
+ "epoch": 18.61853188929001,
108462
+ "grad_norm": Infinity,
108463
+ "learning_rate": 0.00018425258137423108,
108464
+ "loss": 11.1129,
108465
+ "step": 154720
108466
+ },
108467
+ {
108468
+ "epoch": 18.61973525872443,
108469
+ "grad_norm": Infinity,
108470
+ "learning_rate": 0.0001842505321206132,
108471
+ "loss": 11.1449,
108472
+ "step": 154730
108473
+ },
108474
+ {
108475
+ "epoch": 18.620938628158846,
108476
+ "grad_norm": Infinity,
108477
+ "learning_rate": 0.00018424848274506403,
108478
+ "loss": 11.019,
108479
+ "step": 154740
108480
+ },
108481
+ {
108482
+ "epoch": 18.62214199759326,
108483
+ "grad_norm": Infinity,
108484
+ "learning_rate": 0.0001842464332475865,
108485
+ "loss": 11.1304,
108486
+ "step": 154750
108487
+ },
108488
+ {
108489
+ "epoch": 18.623345367027678,
108490
+ "grad_norm": Infinity,
108491
+ "learning_rate": 0.00018424438362818358,
108492
+ "loss": 11.1931,
108493
+ "step": 154760
108494
+ },
108495
+ {
108496
+ "epoch": 18.624548736462096,
108497
+ "grad_norm": Infinity,
108498
+ "learning_rate": 0.00018424233388685823,
108499
+ "loss": 11.0813,
108500
+ "step": 154770
108501
+ },
108502
+ {
108503
+ "epoch": 18.62575210589651,
108504
+ "grad_norm": Infinity,
108505
+ "learning_rate": 0.00018424028402361345,
108506
+ "loss": 11.1179,
108507
+ "step": 154780
108508
+ },
108509
+ {
108510
+ "epoch": 18.626955475330927,
108511
+ "grad_norm": Infinity,
108512
+ "learning_rate": 0.00018423823403845212,
108513
+ "loss": 11.2252,
108514
+ "step": 154790
108515
+ },
108516
+ {
108517
+ "epoch": 18.628158844765345,
108518
+ "grad_norm": Infinity,
108519
+ "learning_rate": 0.00018423618393137732,
108520
+ "loss": 11.2128,
108521
+ "step": 154800
108522
+ },
108523
+ {
108524
+ "epoch": 18.62936221419976,
108525
+ "grad_norm": Infinity,
108526
+ "learning_rate": 0.00018423413370239198,
108527
+ "loss": 11.1383,
108528
+ "step": 154810
108529
+ },
108530
+ {
108531
+ "epoch": 18.630565583634176,
108532
+ "grad_norm": Infinity,
108533
+ "learning_rate": 0.000184232083351499,
108534
+ "loss": 11.1558,
108535
+ "step": 154820
108536
+ },
108537
+ {
108538
+ "epoch": 18.63176895306859,
108539
+ "grad_norm": Infinity,
108540
+ "learning_rate": 0.00018423003287870143,
108541
+ "loss": 11.1106,
108542
+ "step": 154830
108543
+ },
108544
+ {
108545
+ "epoch": 18.632972322503008,
108546
+ "grad_norm": Infinity,
108547
+ "learning_rate": 0.0001842279822840022,
108548
+ "loss": 11.2044,
108549
+ "step": 154840
108550
+ },
108551
+ {
108552
+ "epoch": 18.634175691937426,
108553
+ "grad_norm": Infinity,
108554
+ "learning_rate": 0.0001842259315674043,
108555
+ "loss": 11.0732,
108556
+ "step": 154850
108557
+ },
108558
+ {
108559
+ "epoch": 18.63537906137184,
108560
+ "grad_norm": Infinity,
108561
+ "learning_rate": 0.00018422388072891063,
108562
+ "loss": 11.2463,
108563
+ "step": 154860
108564
+ },
108565
+ {
108566
+ "epoch": 18.636582430806257,
108567
+ "grad_norm": Infinity,
108568
+ "learning_rate": 0.00018422182976852427,
108569
+ "loss": 11.208,
108570
+ "step": 154870
108571
+ },
108572
+ {
108573
+ "epoch": 18.637785800240675,
108574
+ "grad_norm": Infinity,
108575
+ "learning_rate": 0.00018421977868624808,
108576
+ "loss": 11.0892,
108577
+ "step": 154880
108578
+ },
108579
+ {
108580
+ "epoch": 18.63898916967509,
108581
+ "grad_norm": Infinity,
108582
+ "learning_rate": 0.0001842177274820851,
108583
+ "loss": 11.0983,
108584
+ "step": 154890
108585
+ },
108586
+ {
108587
+ "epoch": 18.640192539109506,
108588
+ "grad_norm": Infinity,
108589
+ "learning_rate": 0.0001842156761560383,
108590
+ "loss": 11.0362,
108591
+ "step": 154900
108592
+ },
108593
+ {
108594
+ "epoch": 18.641395908543924,
108595
+ "grad_norm": Infinity,
108596
+ "learning_rate": 0.0001842136247081106,
108597
+ "loss": 11.2536,
108598
+ "step": 154910
108599
+ },
108600
+ {
108601
+ "epoch": 18.642599277978338,
108602
+ "grad_norm": Infinity,
108603
+ "learning_rate": 0.000184211573138305,
108604
+ "loss": 11.1086,
108605
+ "step": 154920
108606
+ },
108607
+ {
108608
+ "epoch": 18.643802647412755,
108609
+ "grad_norm": Infinity,
108610
+ "learning_rate": 0.00018420952144662444,
108611
+ "loss": 11.1801,
108612
+ "step": 154930
108613
+ },
108614
+ {
108615
+ "epoch": 18.645006016847173,
108616
+ "grad_norm": Infinity,
108617
+ "learning_rate": 0.0001842074696330719,
108618
+ "loss": 11.1866,
108619
+ "step": 154940
108620
+ },
108621
+ {
108622
+ "epoch": 18.646209386281587,
108623
+ "grad_norm": Infinity,
108624
+ "learning_rate": 0.00018420541769765042,
108625
+ "loss": 11.1017,
108626
+ "step": 154950
108627
+ },
108628
+ {
108629
+ "epoch": 18.647412755716005,
108630
+ "grad_norm": Infinity,
108631
+ "learning_rate": 0.00018420336564036287,
108632
+ "loss": 11.2785,
108633
+ "step": 154960
108634
+ },
108635
+ {
108636
+ "epoch": 18.648616125150422,
108637
+ "grad_norm": Infinity,
108638
+ "learning_rate": 0.0001842013134612123,
108639
+ "loss": 11.0446,
108640
+ "step": 154970
108641
+ },
108642
+ {
108643
+ "epoch": 18.649819494584836,
108644
+ "grad_norm": Infinity,
108645
+ "learning_rate": 0.0001841992611602016,
108646
+ "loss": 11.1732,
108647
+ "step": 154980
108648
+ },
108649
+ {
108650
+ "epoch": 18.651022864019254,
108651
+ "grad_norm": Infinity,
108652
+ "learning_rate": 0.0001841972087373338,
108653
+ "loss": 11.136,
108654
+ "step": 154990
108655
+ },
108656
+ {
108657
+ "epoch": 18.65222623345367,
108658
+ "grad_norm": Infinity,
108659
+ "learning_rate": 0.00018419515619261186,
108660
+ "loss": 11.0919,
108661
+ "step": 155000
108662
+ },
108663
+ {
108664
+ "epoch": 18.653429602888085,
108665
+ "grad_norm": Infinity,
108666
+ "learning_rate": 0.00018419310352603868,
108667
+ "loss": 11.1219,
108668
+ "step": 155010
108669
+ },
108670
+ {
108671
+ "epoch": 18.654632972322503,
108672
+ "grad_norm": Infinity,
108673
+ "learning_rate": 0.00018419105073761737,
108674
+ "loss": 11.1603,
108675
+ "step": 155020
108676
+ },
108677
+ {
108678
+ "epoch": 18.65583634175692,
108679
+ "grad_norm": Infinity,
108680
+ "learning_rate": 0.00018418899782735078,
108681
+ "loss": 11.1119,
108682
+ "step": 155030
108683
+ },
108684
+ {
108685
+ "epoch": 18.657039711191334,
108686
+ "grad_norm": Infinity,
108687
+ "learning_rate": 0.00018418694479524193,
108688
+ "loss": 11.2415,
108689
+ "step": 155040
108690
+ },
108691
+ {
108692
+ "epoch": 18.658243080625752,
108693
+ "grad_norm": Infinity,
108694
+ "learning_rate": 0.0001841848916412938,
108695
+ "loss": 11.1529,
108696
+ "step": 155050
108697
+ },
108698
+ {
108699
+ "epoch": 18.65944645006017,
108700
+ "grad_norm": Infinity,
108701
+ "learning_rate": 0.00018418283836550936,
108702
+ "loss": 11.0478,
108703
+ "step": 155060
108704
+ },
108705
+ {
108706
+ "epoch": 18.660649819494584,
108707
+ "grad_norm": Infinity,
108708
+ "learning_rate": 0.00018418078496789154,
108709
+ "loss": 11.1712,
108710
+ "step": 155070
108711
+ },
108712
+ {
108713
+ "epoch": 18.661853188929,
108714
+ "grad_norm": Infinity,
108715
+ "learning_rate": 0.00018417873144844333,
108716
+ "loss": 11.0886,
108717
+ "step": 155080
108718
+ },
108719
+ {
108720
+ "epoch": 18.66305655836342,
108721
+ "grad_norm": Infinity,
108722
+ "learning_rate": 0.00018417667780716777,
108723
+ "loss": 11.0901,
108724
+ "step": 155090
108725
+ },
108726
+ {
108727
+ "epoch": 18.664259927797833,
108728
+ "grad_norm": Infinity,
108729
+ "learning_rate": 0.00018417462404406773,
108730
+ "loss": 11.1937,
108731
+ "step": 155100
108732
+ },
108733
+ {
108734
+ "epoch": 18.66546329723225,
108735
+ "grad_norm": Infinity,
108736
+ "learning_rate": 0.0001841725701591462,
108737
+ "loss": 11.0931,
108738
+ "step": 155110
108739
+ },
108740
+ {
108741
+ "epoch": 18.666666666666668,
108742
+ "grad_norm": Infinity,
108743
+ "learning_rate": 0.00018417051615240624,
108744
+ "loss": 11.0515,
108745
+ "step": 155120
108746
+ },
108747
+ {
108748
+ "epoch": 18.667870036101082,
108749
+ "grad_norm": Infinity,
108750
+ "learning_rate": 0.00018416846202385073,
108751
+ "loss": 11.2758,
108752
+ "step": 155130
108753
+ },
108754
+ {
108755
+ "epoch": 18.6690734055355,
108756
+ "grad_norm": Infinity,
108757
+ "learning_rate": 0.0001841664077734827,
108758
+ "loss": 11.1772,
108759
+ "step": 155140
108760
+ },
108761
+ {
108762
+ "epoch": 18.670276774969917,
108763
+ "grad_norm": Infinity,
108764
+ "learning_rate": 0.00018416435340130508,
108765
+ "loss": 11.0956,
108766
+ "step": 155150
108767
+ },
108768
+ {
108769
+ "epoch": 18.67148014440433,
108770
+ "grad_norm": Infinity,
108771
+ "learning_rate": 0.00018416229890732083,
108772
+ "loss": 11.0273,
108773
+ "step": 155160
108774
+ },
108775
+ {
108776
+ "epoch": 18.67268351383875,
108777
+ "grad_norm": Infinity,
108778
+ "learning_rate": 0.000184160244291533,
108779
+ "loss": 11.207,
108780
+ "step": 155170
108781
+ },
108782
+ {
108783
+ "epoch": 18.673886883273166,
108784
+ "grad_norm": Infinity,
108785
+ "learning_rate": 0.0001841581895539445,
108786
+ "loss": 11.1246,
108787
+ "step": 155180
108788
+ },
108789
+ {
108790
+ "epoch": 18.67509025270758,
108791
+ "grad_norm": Infinity,
108792
+ "learning_rate": 0.0001841561346945583,
108793
+ "loss": 11.2164,
108794
+ "step": 155190
108795
+ },
108796
+ {
108797
+ "epoch": 18.676293622141998,
108798
+ "grad_norm": Infinity,
108799
+ "learning_rate": 0.00018415407971337743,
108800
+ "loss": 11.1303,
108801
+ "step": 155200
108802
+ },
108803
+ {
108804
+ "epoch": 18.677496991576415,
108805
+ "grad_norm": Infinity,
108806
+ "learning_rate": 0.00018415202461040484,
108807
+ "loss": 11.0508,
108808
+ "step": 155210
108809
+ },
108810
+ {
108811
+ "epoch": 18.67870036101083,
108812
+ "grad_norm": Infinity,
108813
+ "learning_rate": 0.00018414996938564347,
108814
+ "loss": 11.1942,
108815
+ "step": 155220
108816
+ },
108817
+ {
108818
+ "epoch": 18.679903730445247,
108819
+ "grad_norm": Infinity,
108820
+ "learning_rate": 0.0001841479140390963,
108821
+ "loss": 11.1507,
108822
+ "step": 155230
108823
+ },
108824
+ {
108825
+ "epoch": 18.681107099879664,
108826
+ "grad_norm": Infinity,
108827
+ "learning_rate": 0.00018414585857076635,
108828
+ "loss": 11.094,
108829
+ "step": 155240
108830
+ },
108831
+ {
108832
+ "epoch": 18.68231046931408,
108833
+ "grad_norm": Infinity,
108834
+ "learning_rate": 0.00018414380298065658,
108835
+ "loss": 11.3475,
108836
+ "step": 155250
108837
+ },
108838
+ {
108839
+ "epoch": 18.683513838748496,
108840
+ "grad_norm": Infinity,
108841
+ "learning_rate": 0.00018414174726876992,
108842
+ "loss": 11.2109,
108843
+ "step": 155260
108844
+ },
108845
+ {
108846
+ "epoch": 18.684717208182914,
108847
+ "grad_norm": Infinity,
108848
+ "learning_rate": 0.0001841396914351094,
108849
+ "loss": 11.328,
108850
+ "step": 155270
108851
+ },
108852
+ {
108853
+ "epoch": 18.685920577617328,
108854
+ "grad_norm": Infinity,
108855
+ "learning_rate": 0.00018413763547967796,
108856
+ "loss": 11.2317,
108857
+ "step": 155280
108858
+ },
108859
+ {
108860
+ "epoch": 18.687123947051745,
108861
+ "grad_norm": Infinity,
108862
+ "learning_rate": 0.00018413557940247862,
108863
+ "loss": 11.0034,
108864
+ "step": 155290
108865
+ },
108866
+ {
108867
+ "epoch": 18.688327316486163,
108868
+ "grad_norm": Infinity,
108869
+ "learning_rate": 0.00018413352320351427,
108870
+ "loss": 11.2417,
108871
+ "step": 155300
108872
+ },
108873
+ {
108874
+ "epoch": 18.689530685920577,
108875
+ "grad_norm": Infinity,
108876
+ "learning_rate": 0.00018413146688278797,
108877
+ "loss": 10.9798,
108878
+ "step": 155310
108879
+ },
108880
+ {
108881
+ "epoch": 18.690734055354994,
108882
+ "grad_norm": Infinity,
108883
+ "learning_rate": 0.00018412941044030267,
108884
+ "loss": 11.2268,
108885
+ "step": 155320
108886
+ },
108887
+ {
108888
+ "epoch": 18.691937424789412,
108889
+ "grad_norm": Infinity,
108890
+ "learning_rate": 0.00018412735387606131,
108891
+ "loss": 11.1689,
108892
+ "step": 155330
108893
+ },
108894
+ {
108895
+ "epoch": 18.693140794223826,
108896
+ "grad_norm": Infinity,
108897
+ "learning_rate": 0.00018412529719006695,
108898
+ "loss": 11.1137,
108899
+ "step": 155340
108900
+ },
108901
+ {
108902
+ "epoch": 18.694344163658243,
108903
+ "grad_norm": Infinity,
108904
+ "learning_rate": 0.00018412324038232249,
108905
+ "loss": 11.1095,
108906
+ "step": 155350
108907
+ },
108908
+ {
108909
+ "epoch": 18.69554753309266,
108910
+ "grad_norm": Infinity,
108911
+ "learning_rate": 0.00018412118345283092,
108912
+ "loss": 10.9851,
108913
+ "step": 155360
108914
+ },
108915
+ {
108916
+ "epoch": 18.696750902527075,
108917
+ "grad_norm": Infinity,
108918
+ "learning_rate": 0.00018411912640159525,
108919
+ "loss": 11.0877,
108920
+ "step": 155370
108921
+ },
108922
+ {
108923
+ "epoch": 18.697954271961493,
108924
+ "grad_norm": Infinity,
108925
+ "learning_rate": 0.00018411706922861844,
108926
+ "loss": 11.1739,
108927
+ "step": 155380
108928
+ },
108929
+ {
108930
+ "epoch": 18.69915764139591,
108931
+ "grad_norm": Infinity,
108932
+ "learning_rate": 0.00018411501193390346,
108933
+ "loss": 11.1279,
108934
+ "step": 155390
108935
+ },
108936
+ {
108937
+ "epoch": 18.700361010830324,
108938
+ "grad_norm": Infinity,
108939
+ "learning_rate": 0.00018411295451745328,
108940
+ "loss": 11.1035,
108941
+ "step": 155400
108942
+ },
108943
+ {
108944
+ "epoch": 18.70156438026474,
108945
+ "grad_norm": Infinity,
108946
+ "learning_rate": 0.00018411089697927086,
108947
+ "loss": 11.1095,
108948
+ "step": 155410
108949
+ },
108950
+ {
108951
+ "epoch": 18.70276774969916,
108952
+ "grad_norm": Infinity,
108953
+ "learning_rate": 0.00018410883931935926,
108954
+ "loss": 11.1272,
108955
+ "step": 155420
108956
+ },
108957
+ {
108958
+ "epoch": 18.703971119133573,
108959
+ "grad_norm": Infinity,
108960
+ "learning_rate": 0.00018410678153772136,
108961
+ "loss": 11.1291,
108962
+ "step": 155430
108963
+ },
108964
+ {
108965
+ "epoch": 18.70517448856799,
108966
+ "grad_norm": Infinity,
108967
+ "learning_rate": 0.0001841047236343602,
108968
+ "loss": 11.1143,
108969
+ "step": 155440
108970
+ },
108971
+ {
108972
+ "epoch": 18.706377858002405,
108973
+ "grad_norm": Infinity,
108974
+ "learning_rate": 0.00018410266560927875,
108975
+ "loss": 11.081,
108976
+ "step": 155450
108977
+ },
108978
+ {
108979
+ "epoch": 18.707581227436823,
108980
+ "grad_norm": Infinity,
108981
+ "learning_rate": 0.00018410060746247995,
108982
+ "loss": 11.1918,
108983
+ "step": 155460
108984
+ },
108985
+ {
108986
+ "epoch": 18.70878459687124,
108987
+ "grad_norm": Infinity,
108988
+ "learning_rate": 0.00018409854919396686,
108989
+ "loss": 11.1112,
108990
+ "step": 155470
108991
+ },
108992
+ {
108993
+ "epoch": 18.709987966305654,
108994
+ "grad_norm": Infinity,
108995
+ "learning_rate": 0.00018409649080374237,
108996
+ "loss": 11.1154,
108997
+ "step": 155480
108998
+ },
108999
+ {
109000
+ "epoch": 18.71119133574007,
109001
+ "grad_norm": Infinity,
109002
+ "learning_rate": 0.00018409443229180946,
109003
+ "loss": 11.1244,
109004
+ "step": 155490
109005
+ },
109006
+ {
109007
+ "epoch": 18.71239470517449,
109008
+ "grad_norm": Infinity,
109009
+ "learning_rate": 0.00018409237365817118,
109010
+ "loss": 11.1785,
109011
+ "step": 155500
109012
+ },
109013
+ {
109014
+ "epoch": 18.713598074608903,
109015
+ "grad_norm": Infinity,
109016
+ "learning_rate": 0.0001840903149028305,
109017
+ "loss": 11.0254,
109018
+ "step": 155510
109019
+ },
109020
+ {
109021
+ "epoch": 18.71480144404332,
109022
+ "grad_norm": Infinity,
109023
+ "learning_rate": 0.00018408825602579033,
109024
+ "loss": 11.096,
109025
+ "step": 155520
109026
+ },
109027
+ {
109028
+ "epoch": 18.71600481347774,
109029
+ "grad_norm": Infinity,
109030
+ "learning_rate": 0.00018408619702705372,
109031
+ "loss": 11.1088,
109032
+ "step": 155530
109033
+ },
109034
+ {
109035
+ "epoch": 18.717208182912152,
109036
+ "grad_norm": Infinity,
109037
+ "learning_rate": 0.0001840841379066236,
109038
+ "loss": 11.1197,
109039
+ "step": 155540
109040
+ },
109041
+ {
109042
+ "epoch": 18.71841155234657,
109043
+ "grad_norm": Infinity,
109044
+ "learning_rate": 0.000184082078664503,
109045
+ "loss": 11.043,
109046
+ "step": 155550
109047
+ },
109048
+ {
109049
+ "epoch": 18.719614921780988,
109050
+ "grad_norm": Infinity,
109051
+ "learning_rate": 0.00018408001930069484,
109052
+ "loss": 11.0468,
109053
+ "step": 155560
109054
+ },
109055
+ {
109056
+ "epoch": 18.7208182912154,
109057
+ "grad_norm": Infinity,
109058
+ "learning_rate": 0.00018407795981520216,
109059
+ "loss": 11.0612,
109060
+ "step": 155570
109061
+ },
109062
+ {
109063
+ "epoch": 18.72202166064982,
109064
+ "grad_norm": Infinity,
109065
+ "learning_rate": 0.00018407590020802793,
109066
+ "loss": 11.2637,
109067
+ "step": 155580
109068
+ },
109069
+ {
109070
+ "epoch": 18.723225030084237,
109071
+ "grad_norm": Infinity,
109072
+ "learning_rate": 0.0001840738404791751,
109073
+ "loss": 11.1286,
109074
+ "step": 155590
109075
+ },
109076
+ {
109077
+ "epoch": 18.72442839951865,
109078
+ "grad_norm": Infinity,
109079
+ "learning_rate": 0.00018407178062864664,
109080
+ "loss": 10.9937,
109081
+ "step": 155600
109082
+ },
109083
+ {
109084
+ "epoch": 18.72563176895307,
109085
+ "grad_norm": Infinity,
109086
+ "learning_rate": 0.00018406972065644557,
109087
+ "loss": 11.0324,
109088
+ "step": 155610
109089
+ },
109090
+ {
109091
+ "epoch": 18.726835138387486,
109092
+ "grad_norm": Infinity,
109093
+ "learning_rate": 0.00018406766056257485,
109094
+ "loss": 11.0811,
109095
+ "step": 155620
109096
+ },
109097
+ {
109098
+ "epoch": 18.7280385078219,
109099
+ "grad_norm": Infinity,
109100
+ "learning_rate": 0.00018406560034703748,
109101
+ "loss": 11.1048,
109102
+ "step": 155630
109103
+ },
109104
+ {
109105
+ "epoch": 18.729241877256317,
109106
+ "grad_norm": Infinity,
109107
+ "learning_rate": 0.00018406354000983642,
109108
+ "loss": 10.9592,
109109
+ "step": 155640
109110
+ },
109111
+ {
109112
+ "epoch": 18.730445246690735,
109113
+ "grad_norm": Infinity,
109114
+ "learning_rate": 0.0001840614795509747,
109115
+ "loss": 11.0839,
109116
+ "step": 155650
109117
+ },
109118
+ {
109119
+ "epoch": 18.73164861612515,
109120
+ "grad_norm": Infinity,
109121
+ "learning_rate": 0.00018405941897045523,
109122
+ "loss": 11.1915,
109123
+ "step": 155660
109124
+ },
109125
+ {
109126
+ "epoch": 18.732851985559567,
109127
+ "grad_norm": Infinity,
109128
+ "learning_rate": 0.00018405735826828107,
109129
+ "loss": 11.0202,
109130
+ "step": 155670
109131
+ },
109132
+ {
109133
+ "epoch": 18.734055354993984,
109134
+ "grad_norm": Infinity,
109135
+ "learning_rate": 0.0001840552974444551,
109136
+ "loss": 11.1132,
109137
+ "step": 155680
109138
+ },
109139
+ {
109140
+ "epoch": 18.735258724428398,
109141
+ "grad_norm": Infinity,
109142
+ "learning_rate": 0.0001840532364989804,
109143
+ "loss": 11.2307,
109144
+ "step": 155690
109145
+ },
109146
+ {
109147
+ "epoch": 18.736462093862816,
109148
+ "grad_norm": Infinity,
109149
+ "learning_rate": 0.0001840511754318599,
109150
+ "loss": 11.1407,
109151
+ "step": 155700
109152
+ },
109153
+ {
109154
+ "epoch": 18.737665463297233,
109155
+ "grad_norm": Infinity,
109156
+ "learning_rate": 0.00018404911424309658,
109157
+ "loss": 11.1428,
109158
+ "step": 155710
109159
+ },
109160
+ {
109161
+ "epoch": 18.738868832731647,
109162
+ "grad_norm": Infinity,
109163
+ "learning_rate": 0.00018404705293269347,
109164
+ "loss": 11.1013,
109165
+ "step": 155720
109166
+ },
109167
+ {
109168
+ "epoch": 18.740072202166065,
109169
+ "grad_norm": Infinity,
109170
+ "learning_rate": 0.00018404499150065351,
109171
+ "loss": 11.1795,
109172
+ "step": 155730
109173
+ },
109174
+ {
109175
+ "epoch": 18.741275571600482,
109176
+ "grad_norm": Infinity,
109177
+ "learning_rate": 0.0001840429299469797,
109178
+ "loss": 11.1118,
109179
+ "step": 155740
109180
+ },
109181
+ {
109182
+ "epoch": 18.742478941034896,
109183
+ "grad_norm": Infinity,
109184
+ "learning_rate": 0.00018404086827167502,
109185
+ "loss": 11.0965,
109186
+ "step": 155750
109187
+ },
109188
+ {
109189
+ "epoch": 18.743682310469314,
109190
+ "grad_norm": Infinity,
109191
+ "learning_rate": 0.00018403880647474249,
109192
+ "loss": 11.1054,
109193
+ "step": 155760
109194
+ },
109195
+ {
109196
+ "epoch": 18.74488567990373,
109197
+ "grad_norm": Infinity,
109198
+ "learning_rate": 0.00018403674455618501,
109199
+ "loss": 11.0522,
109200
+ "step": 155770
109201
+ },
109202
+ {
109203
+ "epoch": 18.746089049338146,
109204
+ "grad_norm": Infinity,
109205
+ "learning_rate": 0.00018403468251600562,
109206
+ "loss": 10.9737,
109207
+ "step": 155780
109208
+ },
109209
+ {
109210
+ "epoch": 18.747292418772563,
109211
+ "grad_norm": Infinity,
109212
+ "learning_rate": 0.0001840326203542073,
109213
+ "loss": 11.2099,
109214
+ "step": 155790
109215
+ },
109216
+ {
109217
+ "epoch": 18.74849578820698,
109218
+ "grad_norm": Infinity,
109219
+ "learning_rate": 0.00018403055807079307,
109220
+ "loss": 11.174,
109221
+ "step": 155800
109222
+ },
109223
+ {
109224
+ "epoch": 18.749699157641395,
109225
+ "grad_norm": Infinity,
109226
+ "learning_rate": 0.00018402849566576582,
109227
+ "loss": 11.0626,
109228
+ "step": 155810
109229
+ },
109230
+ {
109231
+ "epoch": 18.750902527075812,
109232
+ "grad_norm": Infinity,
109233
+ "learning_rate": 0.00018402643313912862,
109234
+ "loss": 11.1572,
109235
+ "step": 155820
109236
+ },
109237
+ {
109238
+ "epoch": 18.75210589651023,
109239
+ "grad_norm": Infinity,
109240
+ "learning_rate": 0.00018402437049088444,
109241
+ "loss": 11.0243,
109242
+ "step": 155830
109243
+ },
109244
+ {
109245
+ "epoch": 18.753309265944644,
109246
+ "grad_norm": Infinity,
109247
+ "learning_rate": 0.0001840223077210362,
109248
+ "loss": 11.0665,
109249
+ "step": 155840
109250
+ },
109251
+ {
109252
+ "epoch": 18.75451263537906,
109253
+ "grad_norm": Infinity,
109254
+ "learning_rate": 0.000184020244829587,
109255
+ "loss": 11.1575,
109256
+ "step": 155850
109257
+ },
109258
+ {
109259
+ "epoch": 18.75571600481348,
109260
+ "grad_norm": Infinity,
109261
+ "learning_rate": 0.0001840181818165397,
109262
+ "loss": 11.2205,
109263
+ "step": 155860
109264
+ },
109265
+ {
109266
+ "epoch": 18.756919374247893,
109267
+ "grad_norm": Infinity,
109268
+ "learning_rate": 0.00018401611868189738,
109269
+ "loss": 11.2051,
109270
+ "step": 155870
109271
+ },
109272
+ {
109273
+ "epoch": 18.75812274368231,
109274
+ "grad_norm": Infinity,
109275
+ "learning_rate": 0.00018401405542566298,
109276
+ "loss": 11.1057,
109277
+ "step": 155880
109278
+ },
109279
+ {
109280
+ "epoch": 18.759326113116728,
109281
+ "grad_norm": Infinity,
109282
+ "learning_rate": 0.00018401199204783953,
109283
+ "loss": 11.094,
109284
+ "step": 155890
109285
+ },
109286
+ {
109287
+ "epoch": 18.760529482551142,
109288
+ "grad_norm": Infinity,
109289
+ "learning_rate": 0.00018400992854842997,
109290
+ "loss": 11.0583,
109291
+ "step": 155900
109292
+ },
109293
+ {
109294
+ "epoch": 18.76173285198556,
109295
+ "grad_norm": Infinity,
109296
+ "learning_rate": 0.0001840078649274373,
109297
+ "loss": 11.1038,
109298
+ "step": 155910
109299
+ },
109300
+ {
109301
+ "epoch": 18.762936221419977,
109302
+ "grad_norm": Infinity,
109303
+ "learning_rate": 0.0001840058011848645,
109304
+ "loss": 11.3338,
109305
+ "step": 155920
109306
+ },
109307
+ {
109308
+ "epoch": 18.76413959085439,
109309
+ "grad_norm": Infinity,
109310
+ "learning_rate": 0.00018400373732071457,
109311
+ "loss": 11.0459,
109312
+ "step": 155930
109313
+ },
109314
+ {
109315
+ "epoch": 18.76534296028881,
109316
+ "grad_norm": Infinity,
109317
+ "learning_rate": 0.00018400167333499048,
109318
+ "loss": 11.2301,
109319
+ "step": 155940
109320
+ },
109321
+ {
109322
+ "epoch": 18.766546329723226,
109323
+ "grad_norm": Infinity,
109324
+ "learning_rate": 0.00018399960922769527,
109325
+ "loss": 11.1998,
109326
+ "step": 155950
109327
+ },
109328
+ {
109329
+ "epoch": 18.76774969915764,
109330
+ "grad_norm": Infinity,
109331
+ "learning_rate": 0.00018399754499883185,
109332
+ "loss": 11.096,
109333
+ "step": 155960
109334
+ },
109335
+ {
109336
+ "epoch": 18.768953068592058,
109337
+ "grad_norm": Infinity,
109338
+ "learning_rate": 0.00018399548064840322,
109339
+ "loss": 11.1265,
109340
+ "step": 155970
109341
+ },
109342
+ {
109343
+ "epoch": 18.770156438026476,
109344
+ "grad_norm": Infinity,
109345
+ "learning_rate": 0.00018399341617641245,
109346
+ "loss": 11.1074,
109347
+ "step": 155980
109348
+ },
109349
+ {
109350
+ "epoch": 18.77135980746089,
109351
+ "grad_norm": Infinity,
109352
+ "learning_rate": 0.00018399135158286245,
109353
+ "loss": 11.1348,
109354
+ "step": 155990
109355
+ },
109356
+ {
109357
+ "epoch": 18.772563176895307,
109358
+ "grad_norm": Infinity,
109359
+ "learning_rate": 0.0001839892868677562,
109360
+ "loss": 11.0085,
109361
+ "step": 156000
109362
+ },
109363
+ {
109364
+ "epoch": 18.773766546329725,
109365
+ "grad_norm": Infinity,
109366
+ "learning_rate": 0.00018398722203109675,
109367
+ "loss": 11.0944,
109368
+ "step": 156010
109369
+ },
109370
+ {
109371
+ "epoch": 18.77496991576414,
109372
+ "grad_norm": Infinity,
109373
+ "learning_rate": 0.00018398515707288704,
109374
+ "loss": 11.1064,
109375
+ "step": 156020
109376
+ },
109377
+ {
109378
+ "epoch": 18.776173285198556,
109379
+ "grad_norm": Infinity,
109380
+ "learning_rate": 0.00018398309199313008,
109381
+ "loss": 11.202,
109382
+ "step": 156030
109383
+ },
109384
+ {
109385
+ "epoch": 18.777376654632974,
109386
+ "grad_norm": Infinity,
109387
+ "learning_rate": 0.00018398102679182885,
109388
+ "loss": 11.0866,
109389
+ "step": 156040
109390
+ },
109391
+ {
109392
+ "epoch": 18.778580024067388,
109393
+ "grad_norm": Infinity,
109394
+ "learning_rate": 0.00018397896146898633,
109395
+ "loss": 11.1692,
109396
+ "step": 156050
109397
+ },
109398
+ {
109399
+ "epoch": 18.779783393501805,
109400
+ "grad_norm": Infinity,
109401
+ "learning_rate": 0.0001839768960246055,
109402
+ "loss": 11.1189,
109403
+ "step": 156060
109404
+ },
109405
+ {
109406
+ "epoch": 18.780986762936223,
109407
+ "grad_norm": Infinity,
109408
+ "learning_rate": 0.00018397483045868942,
109409
+ "loss": 11.1767,
109410
+ "step": 156070
109411
+ },
109412
+ {
109413
+ "epoch": 18.782190132370637,
109414
+ "grad_norm": Infinity,
109415
+ "learning_rate": 0.00018397276477124098,
109416
+ "loss": 11.1687,
109417
+ "step": 156080
109418
+ },
109419
+ {
109420
+ "epoch": 18.783393501805055,
109421
+ "grad_norm": Infinity,
109422
+ "learning_rate": 0.00018397069896226325,
109423
+ "loss": 11.1573,
109424
+ "step": 156090
109425
+ },
109426
+ {
109427
+ "epoch": 18.784596871239472,
109428
+ "grad_norm": Infinity,
109429
+ "learning_rate": 0.00018396863303175918,
109430
+ "loss": 11.0969,
109431
+ "step": 156100
109432
+ },
109433
+ {
109434
+ "epoch": 18.785800240673886,
109435
+ "grad_norm": Infinity,
109436
+ "learning_rate": 0.00018396656697973175,
109437
+ "loss": 11.0836,
109438
+ "step": 156110
109439
+ },
109440
+ {
109441
+ "epoch": 18.787003610108304,
109442
+ "grad_norm": Infinity,
109443
+ "learning_rate": 0.00018396450080618396,
109444
+ "loss": 11.1231,
109445
+ "step": 156120
109446
+ },
109447
+ {
109448
+ "epoch": 18.78820697954272,
109449
+ "grad_norm": Infinity,
109450
+ "learning_rate": 0.00018396243451111884,
109451
+ "loss": 11.0268,
109452
+ "step": 156130
109453
+ },
109454
+ {
109455
+ "epoch": 18.789410348977135,
109456
+ "grad_norm": Infinity,
109457
+ "learning_rate": 0.00018396036809453933,
109458
+ "loss": 11.2009,
109459
+ "step": 156140
109460
+ },
109461
+ {
109462
+ "epoch": 18.790613718411553,
109463
+ "grad_norm": Infinity,
109464
+ "learning_rate": 0.00018395830155644845,
109465
+ "loss": 11.106,
109466
+ "step": 156150
109467
+ },
109468
+ {
109469
+ "epoch": 18.79181708784597,
109470
+ "grad_norm": Infinity,
109471
+ "learning_rate": 0.00018395623489684916,
109472
+ "loss": 11.1531,
109473
+ "step": 156160
109474
+ },
109475
+ {
109476
+ "epoch": 18.793020457280385,
109477
+ "grad_norm": Infinity,
109478
+ "learning_rate": 0.00018395416811574448,
109479
+ "loss": 11.1239,
109480
+ "step": 156170
109481
+ },
109482
+ {
109483
+ "epoch": 18.794223826714802,
109484
+ "grad_norm": Infinity,
109485
+ "learning_rate": 0.0001839521012131374,
109486
+ "loss": 11.0848,
109487
+ "step": 156180
109488
+ },
109489
+ {
109490
+ "epoch": 18.79542719614922,
109491
+ "grad_norm": Infinity,
109492
+ "learning_rate": 0.00018395003418903086,
109493
+ "loss": 11.0738,
109494
+ "step": 156190
109495
+ },
109496
+ {
109497
+ "epoch": 18.796630565583634,
109498
+ "grad_norm": Infinity,
109499
+ "learning_rate": 0.00018394796704342795,
109500
+ "loss": 11.1085,
109501
+ "step": 156200
109502
+ },
109503
+ {
109504
+ "epoch": 18.79783393501805,
109505
+ "grad_norm": Infinity,
109506
+ "learning_rate": 0.0001839458997763316,
109507
+ "loss": 11.1598,
109508
+ "step": 156210
109509
+ },
109510
+ {
109511
+ "epoch": 18.799037304452465,
109512
+ "grad_norm": Infinity,
109513
+ "learning_rate": 0.00018394383238774477,
109514
+ "loss": 11.1524,
109515
+ "step": 156220
109516
+ },
109517
+ {
109518
+ "epoch": 18.800240673886883,
109519
+ "grad_norm": Infinity,
109520
+ "learning_rate": 0.00018394176487767052,
109521
+ "loss": 11.0619,
109522
+ "step": 156230
109523
+ },
109524
+ {
109525
+ "epoch": 18.8014440433213,
109526
+ "grad_norm": Infinity,
109527
+ "learning_rate": 0.0001839396972461118,
109528
+ "loss": 11.0887,
109529
+ "step": 156240
109530
+ },
109531
+ {
109532
+ "epoch": 18.802647412755714,
109533
+ "grad_norm": Infinity,
109534
+ "learning_rate": 0.0001839376294930716,
109535
+ "loss": 11.1339,
109536
+ "step": 156250
109537
+ },
109538
+ {
109539
+ "epoch": 18.803850782190132,
109540
+ "grad_norm": Infinity,
109541
+ "learning_rate": 0.00018393556161855297,
109542
+ "loss": 11.2568,
109543
+ "step": 156260
109544
+ },
109545
+ {
109546
+ "epoch": 18.80505415162455,
109547
+ "grad_norm": Infinity,
109548
+ "learning_rate": 0.00018393349362255882,
109549
+ "loss": 11.2225,
109550
+ "step": 156270
109551
+ },
109552
+ {
109553
+ "epoch": 18.806257521058964,
109554
+ "grad_norm": Infinity,
109555
+ "learning_rate": 0.0001839314255050922,
109556
+ "loss": 11.1804,
109557
+ "step": 156280
109558
+ },
109559
+ {
109560
+ "epoch": 18.80746089049338,
109561
+ "grad_norm": Infinity,
109562
+ "learning_rate": 0.00018392935726615608,
109563
+ "loss": 11.0418,
109564
+ "step": 156290
109565
+ },
109566
+ {
109567
+ "epoch": 18.8086642599278,
109568
+ "grad_norm": Infinity,
109569
+ "learning_rate": 0.00018392728890575345,
109570
+ "loss": 11.169,
109571
+ "step": 156300
109572
+ },
109573
+ {
109574
+ "epoch": 18.809867629362213,
109575
+ "grad_norm": Infinity,
109576
+ "learning_rate": 0.00018392522042388733,
109577
+ "loss": 11.1228,
109578
+ "step": 156310
109579
+ },
109580
+ {
109581
+ "epoch": 18.81107099879663,
109582
+ "grad_norm": Infinity,
109583
+ "learning_rate": 0.0001839231518205607,
109584
+ "loss": 11.2595,
109585
+ "step": 156320
109586
+ },
109587
+ {
109588
+ "epoch": 18.812274368231048,
109589
+ "grad_norm": Infinity,
109590
+ "learning_rate": 0.00018392108309577652,
109591
+ "loss": 11.2344,
109592
+ "step": 156330
109593
+ },
109594
+ {
109595
+ "epoch": 18.813477737665462,
109596
+ "grad_norm": Infinity,
109597
+ "learning_rate": 0.0001839190142495378,
109598
+ "loss": 11.0632,
109599
+ "step": 156340
109600
+ },
109601
+ {
109602
+ "epoch": 18.81468110709988,
109603
+ "grad_norm": Infinity,
109604
+ "learning_rate": 0.00018391694528184757,
109605
+ "loss": 11.1457,
109606
+ "step": 156350
109607
+ },
109608
+ {
109609
+ "epoch": 18.815884476534297,
109610
+ "grad_norm": Infinity,
109611
+ "learning_rate": 0.00018391487619270883,
109612
+ "loss": 11.2568,
109613
+ "step": 156360
109614
+ },
109615
+ {
109616
+ "epoch": 18.81708784596871,
109617
+ "grad_norm": Infinity,
109618
+ "learning_rate": 0.0001839128069821245,
109619
+ "loss": 11.1566,
109620
+ "step": 156370
109621
+ },
109622
+ {
109623
+ "epoch": 18.81829121540313,
109624
+ "grad_norm": Infinity,
109625
+ "learning_rate": 0.00018391073765009765,
109626
+ "loss": 11.3208,
109627
+ "step": 156380
109628
+ },
109629
+ {
109630
+ "epoch": 18.819494584837546,
109631
+ "grad_norm": Infinity,
109632
+ "learning_rate": 0.00018390866819663123,
109633
+ "loss": 11.101,
109634
+ "step": 156390
109635
+ },
109636
+ {
109637
+ "epoch": 18.82069795427196,
109638
+ "grad_norm": Infinity,
109639
+ "learning_rate": 0.0001839065986217283,
109640
+ "loss": 11.1122,
109641
+ "step": 156400
109642
+ },
109643
+ {
109644
+ "epoch": 18.821901323706378,
109645
+ "grad_norm": Infinity,
109646
+ "learning_rate": 0.00018390452892539175,
109647
+ "loss": 11.1221,
109648
+ "step": 156410
109649
+ },
109650
+ {
109651
+ "epoch": 18.823104693140795,
109652
+ "grad_norm": Infinity,
109653
+ "learning_rate": 0.00018390245910762464,
109654
+ "loss": 11.084,
109655
+ "step": 156420
109656
+ },
109657
+ {
109658
+ "epoch": 18.82430806257521,
109659
+ "grad_norm": Infinity,
109660
+ "learning_rate": 0.00018390038916842998,
109661
+ "loss": 11.1787,
109662
+ "step": 156430
109663
+ },
109664
+ {
109665
+ "epoch": 18.825511432009627,
109666
+ "grad_norm": Infinity,
109667
+ "learning_rate": 0.00018389831910781073,
109668
+ "loss": 11.1843,
109669
+ "step": 156440
109670
+ },
109671
+ {
109672
+ "epoch": 18.826714801444044,
109673
+ "grad_norm": Infinity,
109674
+ "learning_rate": 0.00018389624892576988,
109675
+ "loss": 11.1098,
109676
+ "step": 156450
109677
+ },
109678
+ {
109679
+ "epoch": 18.82791817087846,
109680
+ "grad_norm": Infinity,
109681
+ "learning_rate": 0.00018389417862231048,
109682
+ "loss": 11.1299,
109683
+ "step": 156460
109684
+ },
109685
+ {
109686
+ "epoch": 18.829121540312876,
109687
+ "grad_norm": Infinity,
109688
+ "learning_rate": 0.00018389210819743545,
109689
+ "loss": 11.0784,
109690
+ "step": 156470
109691
+ },
109692
+ {
109693
+ "epoch": 18.830324909747294,
109694
+ "grad_norm": Infinity,
109695
+ "learning_rate": 0.00018389003765114785,
109696
+ "loss": 11.1469,
109697
+ "step": 156480
109698
+ },
109699
+ {
109700
+ "epoch": 18.831528279181708,
109701
+ "grad_norm": Infinity,
109702
+ "learning_rate": 0.00018388796698345065,
109703
+ "loss": 11.1581,
109704
+ "step": 156490
109705
+ },
109706
+ {
109707
+ "epoch": 18.832731648616125,
109708
+ "grad_norm": Infinity,
109709
+ "learning_rate": 0.00018388589619434687,
109710
+ "loss": 11.0997,
109711
+ "step": 156500
109712
+ },
109713
+ {
109714
+ "epoch": 18.833935018050543,
109715
+ "grad_norm": Infinity,
109716
+ "learning_rate": 0.00018388382528383948,
109717
+ "loss": 11.1977,
109718
+ "step": 156510
109719
+ },
109720
+ {
109721
+ "epoch": 18.835138387484957,
109722
+ "grad_norm": Infinity,
109723
+ "learning_rate": 0.00018388175425193147,
109724
+ "loss": 11.1397,
109725
+ "step": 156520
109726
+ },
109727
+ {
109728
+ "epoch": 18.836341756919374,
109729
+ "grad_norm": Infinity,
109730
+ "learning_rate": 0.00018387968309862583,
109731
+ "loss": 11.3134,
109732
+ "step": 156530
109733
+ },
109734
+ {
109735
+ "epoch": 18.837545126353792,
109736
+ "grad_norm": Infinity,
109737
+ "learning_rate": 0.0001838776118239256,
109738
+ "loss": 11.0607,
109739
+ "step": 156540
109740
+ },
109741
+ {
109742
+ "epoch": 18.838748495788206,
109743
+ "grad_norm": Infinity,
109744
+ "learning_rate": 0.00018387554042783376,
109745
+ "loss": 11.0607,
109746
+ "step": 156550
109747
+ },
109748
+ {
109749
+ "epoch": 18.839951865222623,
109750
+ "grad_norm": Infinity,
109751
+ "learning_rate": 0.00018387346891035332,
109752
+ "loss": 11.1197,
109753
+ "step": 156560
109754
+ },
109755
+ {
109756
+ "epoch": 18.84115523465704,
109757
+ "grad_norm": Infinity,
109758
+ "learning_rate": 0.00018387139727148725,
109759
+ "loss": 11.1981,
109760
+ "step": 156570
109761
+ },
109762
+ {
109763
+ "epoch": 18.842358604091455,
109764
+ "grad_norm": Infinity,
109765
+ "learning_rate": 0.00018386932551123857,
109766
+ "loss": 11.165,
109767
+ "step": 156580
109768
+ },
109769
+ {
109770
+ "epoch": 18.843561973525873,
109771
+ "grad_norm": Infinity,
109772
+ "learning_rate": 0.00018386725362961025,
109773
+ "loss": 11.1773,
109774
+ "step": 156590
109775
+ },
109776
+ {
109777
+ "epoch": 18.84476534296029,
109778
+ "grad_norm": Infinity,
109779
+ "learning_rate": 0.00018386518162660532,
109780
+ "loss": 11.1637,
109781
+ "step": 156600
109782
+ },
109783
+ {
109784
+ "epoch": 18.845968712394704,
109785
+ "grad_norm": Infinity,
109786
+ "learning_rate": 0.00018386310950222677,
109787
+ "loss": 11.1841,
109788
+ "step": 156610
109789
+ },
109790
+ {
109791
+ "epoch": 18.84717208182912,
109792
+ "grad_norm": Infinity,
109793
+ "learning_rate": 0.0001838610372564776,
109794
+ "loss": 11.2402,
109795
+ "step": 156620
109796
+ },
109797
+ {
109798
+ "epoch": 18.84837545126354,
109799
+ "grad_norm": Infinity,
109800
+ "learning_rate": 0.00018385896488936075,
109801
+ "loss": 11.1267,
109802
+ "step": 156630
109803
+ },
109804
+ {
109805
+ "epoch": 18.849578820697953,
109806
+ "grad_norm": Infinity,
109807
+ "learning_rate": 0.00018385689240087936,
109808
+ "loss": 11.2748,
109809
+ "step": 156640
109810
+ },
109811
+ {
109812
+ "epoch": 18.85078219013237,
109813
+ "grad_norm": Infinity,
109814
+ "learning_rate": 0.00018385481979103628,
109815
+ "loss": 11.2116,
109816
+ "step": 156650
109817
+ },
109818
+ {
109819
+ "epoch": 18.85198555956679,
109820
+ "grad_norm": Infinity,
109821
+ "learning_rate": 0.0001838527470598346,
109822
+ "loss": 11.1882,
109823
+ "step": 156660
109824
+ },
109825
+ {
109826
+ "epoch": 18.853188929001202,
109827
+ "grad_norm": Infinity,
109828
+ "learning_rate": 0.0001838506742072773,
109829
+ "loss": 11.1638,
109830
+ "step": 156670
109831
+ },
109832
+ {
109833
+ "epoch": 18.85439229843562,
109834
+ "grad_norm": Infinity,
109835
+ "learning_rate": 0.00018384860123336734,
109836
+ "loss": 11.208,
109837
+ "step": 156680
109838
+ },
109839
+ {
109840
+ "epoch": 18.855595667870038,
109841
+ "grad_norm": Infinity,
109842
+ "learning_rate": 0.00018384652813810778,
109843
+ "loss": 11.1221,
109844
+ "step": 156690
109845
+ },
109846
+ {
109847
+ "epoch": 18.85679903730445,
109848
+ "grad_norm": Infinity,
109849
+ "learning_rate": 0.00018384445492150158,
109850
+ "loss": 11.0525,
109851
+ "step": 156700
109852
+ },
109853
+ {
109854
+ "epoch": 18.85800240673887,
109855
+ "grad_norm": Infinity,
109856
+ "learning_rate": 0.00018384238158355176,
109857
+ "loss": 11.1971,
109858
+ "step": 156710
109859
+ },
109860
+ {
109861
+ "epoch": 18.859205776173287,
109862
+ "grad_norm": Infinity,
109863
+ "learning_rate": 0.00018384030812426132,
109864
+ "loss": 11.1792,
109865
+ "step": 156720
109866
+ },
109867
+ {
109868
+ "epoch": 18.8604091456077,
109869
+ "grad_norm": Infinity,
109870
+ "learning_rate": 0.00018383823454363324,
109871
+ "loss": 11.1946,
109872
+ "step": 156730
109873
+ },
109874
+ {
109875
+ "epoch": 18.86161251504212,
109876
+ "grad_norm": Infinity,
109877
+ "learning_rate": 0.00018383616084167054,
109878
+ "loss": 11.064,
109879
+ "step": 156740
109880
+ },
109881
+ {
109882
+ "epoch": 18.862815884476536,
109883
+ "grad_norm": Infinity,
109884
+ "learning_rate": 0.00018383408701837622,
109885
+ "loss": 11.0675,
109886
+ "step": 156750
109887
+ },
109888
+ {
109889
+ "epoch": 18.86401925391095,
109890
+ "grad_norm": Infinity,
109891
+ "learning_rate": 0.00018383201307375327,
109892
+ "loss": 11.1382,
109893
+ "step": 156760
109894
+ },
109895
+ {
109896
+ "epoch": 18.865222623345367,
109897
+ "grad_norm": Infinity,
109898
+ "learning_rate": 0.0001838299390078047,
109899
+ "loss": 11.054,
109900
+ "step": 156770
109901
+ },
109902
+ {
109903
+ "epoch": 18.866425992779785,
109904
+ "grad_norm": Infinity,
109905
+ "learning_rate": 0.0001838278648205335,
109906
+ "loss": 11.105,
109907
+ "step": 156780
109908
+ },
109909
+ {
109910
+ "epoch": 18.8676293622142,
109911
+ "grad_norm": Infinity,
109912
+ "learning_rate": 0.00018382579051194273,
109913
+ "loss": 11.1738,
109914
+ "step": 156790
109915
+ },
109916
+ {
109917
+ "epoch": 18.868832731648617,
109918
+ "grad_norm": Infinity,
109919
+ "learning_rate": 0.0001838237160820353,
109920
+ "loss": 11.0556,
109921
+ "step": 156800
109922
+ },
109923
+ {
109924
+ "epoch": 18.870036101083034,
109925
+ "grad_norm": Infinity,
109926
+ "learning_rate": 0.00018382164153081426,
109927
+ "loss": 11.1819,
109928
+ "step": 156810
109929
+ },
109930
+ {
109931
+ "epoch": 18.871239470517448,
109932
+ "grad_norm": Infinity,
109933
+ "learning_rate": 0.00018381956685828264,
109934
+ "loss": 11.1954,
109935
+ "step": 156820
109936
+ },
109937
+ {
109938
+ "epoch": 18.872442839951866,
109939
+ "grad_norm": Infinity,
109940
+ "learning_rate": 0.0001838174920644434,
109941
+ "loss": 11.17,
109942
+ "step": 156830
109943
+ },
109944
+ {
109945
+ "epoch": 18.87364620938628,
109946
+ "grad_norm": Infinity,
109947
+ "learning_rate": 0.00018381541714929955,
109948
+ "loss": 11.2456,
109949
+ "step": 156840
109950
+ },
109951
+ {
109952
+ "epoch": 18.874849578820697,
109953
+ "grad_norm": Infinity,
109954
+ "learning_rate": 0.00018381334211285412,
109955
+ "loss": 11.1573,
109956
+ "step": 156850
109957
+ },
109958
+ {
109959
+ "epoch": 18.876052948255115,
109960
+ "grad_norm": Infinity,
109961
+ "learning_rate": 0.00018381126695511004,
109962
+ "loss": 11.1414,
109963
+ "step": 156860
109964
+ },
109965
+ {
109966
+ "epoch": 18.87725631768953,
109967
+ "grad_norm": Infinity,
109968
+ "learning_rate": 0.0001838091916760704,
109969
+ "loss": 11.2085,
109970
+ "step": 156870
109971
+ },
109972
+ {
109973
+ "epoch": 18.878459687123947,
109974
+ "grad_norm": Infinity,
109975
+ "learning_rate": 0.00018380711627573817,
109976
+ "loss": 11.1137,
109977
+ "step": 156880
109978
+ },
109979
+ {
109980
+ "epoch": 18.879663056558364,
109981
+ "grad_norm": Infinity,
109982
+ "learning_rate": 0.00018380504075411633,
109983
+ "loss": 11.1565,
109984
+ "step": 156890
109985
+ },
109986
+ {
109987
+ "epoch": 18.880866425992778,
109988
+ "grad_norm": Infinity,
109989
+ "learning_rate": 0.0001838029651112079,
109990
+ "loss": 11.1592,
109991
+ "step": 156900
109992
+ },
109993
+ {
109994
+ "epoch": 18.882069795427196,
109995
+ "grad_norm": Infinity,
109996
+ "learning_rate": 0.00018380088934701592,
109997
+ "loss": 11.0634,
109998
+ "step": 156910
109999
+ },
110000
+ {
110001
+ "epoch": 18.883273164861613,
110002
+ "grad_norm": Infinity,
110003
+ "learning_rate": 0.00018379881346154337,
110004
+ "loss": 11.1236,
110005
+ "step": 156920
110006
+ },
110007
+ {
110008
+ "epoch": 18.884476534296027,
110009
+ "grad_norm": Infinity,
110010
+ "learning_rate": 0.0001837967374547932,
110011
+ "loss": 11.0946,
110012
+ "step": 156930
110013
+ },
110014
+ {
110015
+ "epoch": 18.885679903730445,
110016
+ "grad_norm": Infinity,
110017
+ "learning_rate": 0.00018379466132676853,
110018
+ "loss": 11.1062,
110019
+ "step": 156940
110020
+ },
110021
+ {
110022
+ "epoch": 18.886883273164862,
110023
+ "grad_norm": Infinity,
110024
+ "learning_rate": 0.00018379258507747223,
110025
+ "loss": 11.0293,
110026
+ "step": 156950
110027
+ },
110028
+ {
110029
+ "epoch": 18.888086642599276,
110030
+ "grad_norm": Infinity,
110031
+ "learning_rate": 0.00018379050870690738,
110032
+ "loss": 11.0878,
110033
+ "step": 156960
110034
+ },
110035
+ {
110036
+ "epoch": 18.889290012033694,
110037
+ "grad_norm": Infinity,
110038
+ "learning_rate": 0.000183788432215077,
110039
+ "loss": 10.9764,
110040
+ "step": 156970
110041
+ },
110042
+ {
110043
+ "epoch": 18.89049338146811,
110044
+ "grad_norm": Infinity,
110045
+ "learning_rate": 0.0001837863556019841,
110046
+ "loss": 11.0772,
110047
+ "step": 156980
110048
+ },
110049
+ {
110050
+ "epoch": 18.891696750902526,
110051
+ "grad_norm": Infinity,
110052
+ "learning_rate": 0.0001837842788676316,
110053
+ "loss": 11.1573,
110054
+ "step": 156990
110055
+ },
110056
+ {
110057
+ "epoch": 18.892900120336943,
110058
+ "grad_norm": Infinity,
110059
+ "learning_rate": 0.0001837822020120226,
110060
+ "loss": 11.0387,
110061
+ "step": 157000
110062
+ },
110063
+ {
110064
+ "epoch": 18.89410348977136,
110065
+ "grad_norm": Infinity,
110066
+ "learning_rate": 0.00018378012503516007,
110067
+ "loss": 11.1678,
110068
+ "step": 157010
110069
+ },
110070
+ {
110071
+ "epoch": 18.895306859205775,
110072
+ "grad_norm": Infinity,
110073
+ "learning_rate": 0.000183778047937047,
110074
+ "loss": 11.1658,
110075
+ "step": 157020
110076
+ },
110077
+ {
110078
+ "epoch": 18.896510228640192,
110079
+ "grad_norm": Infinity,
110080
+ "learning_rate": 0.0001837759707176864,
110081
+ "loss": 11.1512,
110082
+ "step": 157030
110083
+ },
110084
+ {
110085
+ "epoch": 18.89771359807461,
110086
+ "grad_norm": Infinity,
110087
+ "learning_rate": 0.0001837738933770813,
110088
+ "loss": 11.0356,
110089
+ "step": 157040
110090
+ },
110091
+ {
110092
+ "epoch": 18.898916967509024,
110093
+ "grad_norm": Infinity,
110094
+ "learning_rate": 0.00018377181591523468,
110095
+ "loss": 11.2211,
110096
+ "step": 157050
110097
+ },
110098
+ {
110099
+ "epoch": 18.90012033694344,
110100
+ "grad_norm": Infinity,
110101
+ "learning_rate": 0.0001837697383321496,
110102
+ "loss": 11.0747,
110103
+ "step": 157060
110104
+ },
110105
+ {
110106
+ "epoch": 18.90132370637786,
110107
+ "grad_norm": Infinity,
110108
+ "learning_rate": 0.00018376766062782896,
110109
+ "loss": 11.206,
110110
+ "step": 157070
110111
+ },
110112
+ {
110113
+ "epoch": 18.902527075812273,
110114
+ "grad_norm": Infinity,
110115
+ "learning_rate": 0.00018376558280227588,
110116
+ "loss": 11.1095,
110117
+ "step": 157080
110118
+ },
110119
+ {
110120
+ "epoch": 18.90373044524669,
110121
+ "grad_norm": Infinity,
110122
+ "learning_rate": 0.00018376350485549333,
110123
+ "loss": 11.1608,
110124
+ "step": 157090
110125
+ },
110126
+ {
110127
+ "epoch": 18.904933814681108,
110128
+ "grad_norm": Infinity,
110129
+ "learning_rate": 0.00018376142678748426,
110130
+ "loss": 11.0815,
110131
+ "step": 157100
110132
+ },
110133
+ {
110134
+ "epoch": 18.906137184115522,
110135
+ "grad_norm": Infinity,
110136
+ "learning_rate": 0.00018375934859825177,
110137
+ "loss": 11.0471,
110138
+ "step": 157110
110139
+ },
110140
+ {
110141
+ "epoch": 18.90734055354994,
110142
+ "grad_norm": Infinity,
110143
+ "learning_rate": 0.0001837572702877988,
110144
+ "loss": 11.2041,
110145
+ "step": 157120
110146
+ },
110147
+ {
110148
+ "epoch": 18.908543922984357,
110149
+ "grad_norm": Infinity,
110150
+ "learning_rate": 0.0001837551918561284,
110151
+ "loss": 11.1298,
110152
+ "step": 157130
110153
+ },
110154
+ {
110155
+ "epoch": 18.90974729241877,
110156
+ "grad_norm": Infinity,
110157
+ "learning_rate": 0.00018375311330324356,
110158
+ "loss": 11.1328,
110159
+ "step": 157140
110160
+ },
110161
+ {
110162
+ "epoch": 18.91095066185319,
110163
+ "grad_norm": Infinity,
110164
+ "learning_rate": 0.00018375103462914727,
110165
+ "loss": 11.069,
110166
+ "step": 157150
110167
+ },
110168
+ {
110169
+ "epoch": 18.912154031287606,
110170
+ "grad_norm": Infinity,
110171
+ "learning_rate": 0.00018374895583384254,
110172
+ "loss": 11.1397,
110173
+ "step": 157160
110174
+ },
110175
+ {
110176
+ "epoch": 18.91335740072202,
110177
+ "grad_norm": Infinity,
110178
+ "learning_rate": 0.00018374687691733244,
110179
+ "loss": 11.1763,
110180
+ "step": 157170
110181
+ },
110182
+ {
110183
+ "epoch": 18.914560770156438,
110184
+ "grad_norm": Infinity,
110185
+ "learning_rate": 0.00018374479787961989,
110186
+ "loss": 11.2508,
110187
+ "step": 157180
110188
+ },
110189
+ {
110190
+ "epoch": 18.915764139590856,
110191
+ "grad_norm": Infinity,
110192
+ "learning_rate": 0.00018374271872070795,
110193
+ "loss": 11.2083,
110194
+ "step": 157190
110195
+ },
110196
+ {
110197
+ "epoch": 18.91696750902527,
110198
+ "grad_norm": Infinity,
110199
+ "learning_rate": 0.00018374063944059964,
110200
+ "loss": 11.1163,
110201
+ "step": 157200
110202
+ },
110203
+ {
110204
+ "epoch": 18.918170878459687,
110205
+ "grad_norm": Infinity,
110206
+ "learning_rate": 0.00018373856003929794,
110207
+ "loss": 11.1841,
110208
+ "step": 157210
110209
+ },
110210
+ {
110211
+ "epoch": 18.919374247894105,
110212
+ "grad_norm": Infinity,
110213
+ "learning_rate": 0.0001837364805168059,
110214
+ "loss": 11.0735,
110215
+ "step": 157220
110216
+ },
110217
+ {
110218
+ "epoch": 18.92057761732852,
110219
+ "grad_norm": Infinity,
110220
+ "learning_rate": 0.00018373440087312643,
110221
+ "loss": 11.1616,
110222
+ "step": 157230
110223
+ },
110224
+ {
110225
+ "epoch": 18.921780986762936,
110226
+ "grad_norm": Infinity,
110227
+ "learning_rate": 0.00018373232110826266,
110228
+ "loss": 11.1764,
110229
+ "step": 157240
110230
+ },
110231
+ {
110232
+ "epoch": 18.922984356197354,
110233
+ "grad_norm": Infinity,
110234
+ "learning_rate": 0.00018373024122221753,
110235
+ "loss": 11.16,
110236
+ "step": 157250
110237
+ },
110238
+ {
110239
+ "epoch": 18.924187725631768,
110240
+ "grad_norm": Infinity,
110241
+ "learning_rate": 0.00018372816121499405,
110242
+ "loss": 11.0687,
110243
+ "step": 157260
110244
+ },
110245
+ {
110246
+ "epoch": 18.925391095066185,
110247
+ "grad_norm": Infinity,
110248
+ "learning_rate": 0.00018372608108659528,
110249
+ "loss": 11.1236,
110250
+ "step": 157270
110251
+ },
110252
+ {
110253
+ "epoch": 18.926594464500603,
110254
+ "grad_norm": Infinity,
110255
+ "learning_rate": 0.00018372400083702418,
110256
+ "loss": 11.1659,
110257
+ "step": 157280
110258
+ },
110259
+ {
110260
+ "epoch": 18.927797833935017,
110261
+ "grad_norm": Infinity,
110262
+ "learning_rate": 0.0001837219204662838,
110263
+ "loss": 11.0458,
110264
+ "step": 157290
110265
+ },
110266
+ {
110267
+ "epoch": 18.929001203369435,
110268
+ "grad_norm": Infinity,
110269
+ "learning_rate": 0.00018371983997437713,
110270
+ "loss": 11.0637,
110271
+ "step": 157300
110272
+ },
110273
+ {
110274
+ "epoch": 18.930204572803852,
110275
+ "grad_norm": Infinity,
110276
+ "learning_rate": 0.00018371775936130717,
110277
+ "loss": 11.1876,
110278
+ "step": 157310
110279
+ },
110280
+ {
110281
+ "epoch": 18.931407942238266,
110282
+ "grad_norm": Infinity,
110283
+ "learning_rate": 0.00018371567862707694,
110284
+ "loss": 11.2099,
110285
+ "step": 157320
110286
+ },
110287
+ {
110288
+ "epoch": 18.932611311672684,
110289
+ "grad_norm": Infinity,
110290
+ "learning_rate": 0.00018371359777168947,
110291
+ "loss": 11.1799,
110292
+ "step": 157330
110293
+ },
110294
+ {
110295
+ "epoch": 18.9338146811071,
110296
+ "grad_norm": Infinity,
110297
+ "learning_rate": 0.00018371151679514775,
110298
+ "loss": 11.0395,
110299
+ "step": 157340
110300
+ },
110301
+ {
110302
+ "epoch": 18.935018050541515,
110303
+ "grad_norm": Infinity,
110304
+ "learning_rate": 0.0001837094356974548,
110305
+ "loss": 11.1093,
110306
+ "step": 157350
110307
+ },
110308
+ {
110309
+ "epoch": 18.936221419975933,
110310
+ "grad_norm": Infinity,
110311
+ "learning_rate": 0.0001837073544786136,
110312
+ "loss": 11.1713,
110313
+ "step": 157360
110314
+ },
110315
+ {
110316
+ "epoch": 18.93742478941035,
110317
+ "grad_norm": Infinity,
110318
+ "learning_rate": 0.00018370527313862724,
110319
+ "loss": 11.2034,
110320
+ "step": 157370
110321
+ },
110322
+ {
110323
+ "epoch": 18.938628158844764,
110324
+ "grad_norm": Infinity,
110325
+ "learning_rate": 0.00018370319167749865,
110326
+ "loss": 11.1406,
110327
+ "step": 157380
110328
+ },
110329
+ {
110330
+ "epoch": 18.939831528279182,
110331
+ "grad_norm": Infinity,
110332
+ "learning_rate": 0.00018370111009523087,
110333
+ "loss": 11.2147,
110334
+ "step": 157390
110335
+ },
110336
+ {
110337
+ "epoch": 18.9410348977136,
110338
+ "grad_norm": Infinity,
110339
+ "learning_rate": 0.00018369902839182695,
110340
+ "loss": 11.2302,
110341
+ "step": 157400
110342
+ },
110343
+ {
110344
+ "epoch": 18.942238267148014,
110345
+ "grad_norm": Infinity,
110346
+ "learning_rate": 0.00018369694656728984,
110347
+ "loss": 11.1735,
110348
+ "step": 157410
110349
+ },
110350
+ {
110351
+ "epoch": 18.94344163658243,
110352
+ "grad_norm": Infinity,
110353
+ "learning_rate": 0.00018369486462162262,
110354
+ "loss": 11.1265,
110355
+ "step": 157420
110356
+ },
110357
+ {
110358
+ "epoch": 18.94464500601685,
110359
+ "grad_norm": Infinity,
110360
+ "learning_rate": 0.00018369278255482826,
110361
+ "loss": 11.1389,
110362
+ "step": 157430
110363
+ },
110364
+ {
110365
+ "epoch": 18.945848375451263,
110366
+ "grad_norm": Infinity,
110367
+ "learning_rate": 0.00018369070036690976,
110368
+ "loss": 11.1313,
110369
+ "step": 157440
110370
+ },
110371
+ {
110372
+ "epoch": 18.94705174488568,
110373
+ "grad_norm": Infinity,
110374
+ "learning_rate": 0.00018368861805787018,
110375
+ "loss": 11.151,
110376
+ "step": 157450
110377
+ },
110378
+ {
110379
+ "epoch": 18.948255114320098,
110380
+ "grad_norm": Infinity,
110381
+ "learning_rate": 0.00018368653562771245,
110382
+ "loss": 11.1124,
110383
+ "step": 157460
110384
+ },
110385
+ {
110386
+ "epoch": 18.949458483754512,
110387
+ "grad_norm": Infinity,
110388
+ "learning_rate": 0.00018368445307643969,
110389
+ "loss": 11.1773,
110390
+ "step": 157470
110391
+ },
110392
+ {
110393
+ "epoch": 18.95066185318893,
110394
+ "grad_norm": Infinity,
110395
+ "learning_rate": 0.00018368237040405485,
110396
+ "loss": 11.0857,
110397
+ "step": 157480
110398
+ },
110399
+ {
110400
+ "epoch": 18.951865222623347,
110401
+ "grad_norm": Infinity,
110402
+ "learning_rate": 0.00018368028761056094,
110403
+ "loss": 11.0975,
110404
+ "step": 157490
110405
+ },
110406
+ {
110407
+ "epoch": 18.95306859205776,
110408
+ "grad_norm": Infinity,
110409
+ "learning_rate": 0.00018367820469596105,
110410
+ "loss": 11.022,
110411
+ "step": 157500
110412
+ },
110413
+ {
110414
+ "epoch": 18.95427196149218,
110415
+ "grad_norm": Infinity,
110416
+ "learning_rate": 0.0001836761216602581,
110417
+ "loss": 11.2443,
110418
+ "step": 157510
110419
+ },
110420
+ {
110421
+ "epoch": 18.955475330926596,
110422
+ "grad_norm": Infinity,
110423
+ "learning_rate": 0.00018367403850345516,
110424
+ "loss": 11.1917,
110425
+ "step": 157520
110426
+ },
110427
+ {
110428
+ "epoch": 18.95667870036101,
110429
+ "grad_norm": Infinity,
110430
+ "learning_rate": 0.0001836719552255552,
110431
+ "loss": 11.022,
110432
+ "step": 157530
110433
+ },
110434
+ {
110435
+ "epoch": 18.957882069795428,
110436
+ "grad_norm": Infinity,
110437
+ "learning_rate": 0.00018366987182656127,
110438
+ "loss": 11.2335,
110439
+ "step": 157540
110440
+ },
110441
+ {
110442
+ "epoch": 18.959085439229845,
110443
+ "grad_norm": Infinity,
110444
+ "learning_rate": 0.0001836677883064764,
110445
+ "loss": 11.2388,
110446
+ "step": 157550
110447
+ },
110448
+ {
110449
+ "epoch": 18.96028880866426,
110450
+ "grad_norm": Infinity,
110451
+ "learning_rate": 0.00018366570466530358,
110452
+ "loss": 11.0706,
110453
+ "step": 157560
110454
+ },
110455
+ {
110456
+ "epoch": 18.961492178098677,
110457
+ "grad_norm": Infinity,
110458
+ "learning_rate": 0.0001836636209030458,
110459
+ "loss": 11.1347,
110460
+ "step": 157570
110461
+ },
110462
+ {
110463
+ "epoch": 18.96269554753309,
110464
+ "grad_norm": Infinity,
110465
+ "learning_rate": 0.00018366153701970613,
110466
+ "loss": 11.1475,
110467
+ "step": 157580
110468
+ },
110469
+ {
110470
+ "epoch": 18.96389891696751,
110471
+ "grad_norm": Infinity,
110472
+ "learning_rate": 0.00018365945301528758,
110473
+ "loss": 11.1984,
110474
+ "step": 157590
110475
+ },
110476
+ {
110477
+ "epoch": 18.965102286401926,
110478
+ "grad_norm": Infinity,
110479
+ "learning_rate": 0.00018365736888979314,
110480
+ "loss": 11.1857,
110481
+ "step": 157600
110482
+ },
110483
+ {
110484
+ "epoch": 18.96630565583634,
110485
+ "grad_norm": Infinity,
110486
+ "learning_rate": 0.00018365528464322578,
110487
+ "loss": 11.0506,
110488
+ "step": 157610
110489
+ },
110490
+ {
110491
+ "epoch": 18.967509025270758,
110492
+ "grad_norm": Infinity,
110493
+ "learning_rate": 0.0001836532002755886,
110494
+ "loss": 11.2283,
110495
+ "step": 157620
110496
+ },
110497
+ {
110498
+ "epoch": 18.968712394705175,
110499
+ "grad_norm": Infinity,
110500
+ "learning_rate": 0.0001836511157868846,
110501
+ "loss": 11.2945,
110502
+ "step": 157630
110503
+ },
110504
+ {
110505
+ "epoch": 18.96991576413959,
110506
+ "grad_norm": Infinity,
110507
+ "learning_rate": 0.00018364903117711677,
110508
+ "loss": 11.0815,
110509
+ "step": 157640
110510
+ },
110511
+ {
110512
+ "epoch": 18.971119133574007,
110513
+ "grad_norm": Infinity,
110514
+ "learning_rate": 0.00018364694644628816,
110515
+ "loss": 11.174,
110516
+ "step": 157650
110517
+ },
110518
+ {
110519
+ "epoch": 18.972322503008424,
110520
+ "grad_norm": Infinity,
110521
+ "learning_rate": 0.00018364486159440173,
110522
+ "loss": 11.0937,
110523
+ "step": 157660
110524
+ },
110525
+ {
110526
+ "epoch": 18.97352587244284,
110527
+ "grad_norm": Infinity,
110528
+ "learning_rate": 0.00018364277662146056,
110529
+ "loss": 11.1016,
110530
+ "step": 157670
110531
+ },
110532
+ {
110533
+ "epoch": 18.974729241877256,
110534
+ "grad_norm": Infinity,
110535
+ "learning_rate": 0.00018364069152746764,
110536
+ "loss": 11.2318,
110537
+ "step": 157680
110538
+ },
110539
+ {
110540
+ "epoch": 18.975932611311674,
110541
+ "grad_norm": Infinity,
110542
+ "learning_rate": 0.00018363860631242599,
110543
+ "loss": 11.0833,
110544
+ "step": 157690
110545
+ },
110546
+ {
110547
+ "epoch": 18.977135980746088,
110548
+ "grad_norm": Infinity,
110549
+ "learning_rate": 0.0001836365209763386,
110550
+ "loss": 11.0869,
110551
+ "step": 157700
110552
+ },
110553
+ {
110554
+ "epoch": 18.978339350180505,
110555
+ "grad_norm": Infinity,
110556
+ "learning_rate": 0.00018363443551920857,
110557
+ "loss": 11.23,
110558
+ "step": 157710
110559
+ },
110560
+ {
110561
+ "epoch": 18.979542719614923,
110562
+ "grad_norm": Infinity,
110563
+ "learning_rate": 0.0001836323499410388,
110564
+ "loss": 11.1504,
110565
+ "step": 157720
110566
+ },
110567
+ {
110568
+ "epoch": 18.980746089049337,
110569
+ "grad_norm": Infinity,
110570
+ "learning_rate": 0.0001836302642418324,
110571
+ "loss": 11.1502,
110572
+ "step": 157730
110573
+ },
110574
+ {
110575
+ "epoch": 18.981949458483754,
110576
+ "grad_norm": Infinity,
110577
+ "learning_rate": 0.00018362817842159236,
110578
+ "loss": 11.1727,
110579
+ "step": 157740
110580
+ },
110581
+ {
110582
+ "epoch": 18.983152827918172,
110583
+ "grad_norm": Infinity,
110584
+ "learning_rate": 0.0001836260924803217,
110585
+ "loss": 11.0221,
110586
+ "step": 157750
110587
+ },
110588
+ {
110589
+ "epoch": 18.984356197352586,
110590
+ "grad_norm": Infinity,
110591
+ "learning_rate": 0.0001836240064180234,
110592
+ "loss": 11.1395,
110593
+ "step": 157760
110594
+ },
110595
+ {
110596
+ "epoch": 18.985559566787003,
110597
+ "grad_norm": Infinity,
110598
+ "learning_rate": 0.00018362192023470058,
110599
+ "loss": 11.0751,
110600
+ "step": 157770
110601
+ },
110602
+ {
110603
+ "epoch": 18.98676293622142,
110604
+ "grad_norm": Infinity,
110605
+ "learning_rate": 0.00018361983393035618,
110606
+ "loss": 11.1112,
110607
+ "step": 157780
110608
+ },
110609
+ {
110610
+ "epoch": 18.987966305655835,
110611
+ "grad_norm": Infinity,
110612
+ "learning_rate": 0.00018361774750499322,
110613
+ "loss": 11.0373,
110614
+ "step": 157790
110615
+ },
110616
+ {
110617
+ "epoch": 18.989169675090253,
110618
+ "grad_norm": Infinity,
110619
+ "learning_rate": 0.00018361566095861473,
110620
+ "loss": 11.0098,
110621
+ "step": 157800
110622
+ },
110623
+ {
110624
+ "epoch": 18.99037304452467,
110625
+ "grad_norm": Infinity,
110626
+ "learning_rate": 0.00018361357429122377,
110627
+ "loss": 11.1946,
110628
+ "step": 157810
110629
+ },
110630
+ {
110631
+ "epoch": 18.991576413959084,
110632
+ "grad_norm": Infinity,
110633
+ "learning_rate": 0.0001836114875028233,
110634
+ "loss": 11.1975,
110635
+ "step": 157820
110636
+ },
110637
+ {
110638
+ "epoch": 18.9927797833935,
110639
+ "grad_norm": Infinity,
110640
+ "learning_rate": 0.00018360940059341635,
110641
+ "loss": 11.2993,
110642
+ "step": 157830
110643
+ },
110644
+ {
110645
+ "epoch": 18.99398315282792,
110646
+ "grad_norm": Infinity,
110647
+ "learning_rate": 0.00018360731356300596,
110648
+ "loss": 11.138,
110649
+ "step": 157840
110650
+ },
110651
+ {
110652
+ "epoch": 18.995186522262333,
110653
+ "grad_norm": Infinity,
110654
+ "learning_rate": 0.00018360522641159516,
110655
+ "loss": 11.2325,
110656
+ "step": 157850
110657
+ },
110658
+ {
110659
+ "epoch": 18.99638989169675,
110660
+ "grad_norm": Infinity,
110661
+ "learning_rate": 0.00018360313913918694,
110662
+ "loss": 11.1623,
110663
+ "step": 157860
110664
+ },
110665
+ {
110666
+ "epoch": 18.99759326113117,
110667
+ "grad_norm": Infinity,
110668
+ "learning_rate": 0.00018360105174578437,
110669
+ "loss": 11.0993,
110670
+ "step": 157870
110671
+ },
110672
+ {
110673
+ "epoch": 18.998796630565582,
110674
+ "grad_norm": Infinity,
110675
+ "learning_rate": 0.0001835989642313904,
110676
+ "loss": 11.1284,
110677
+ "step": 157880
110678
+ },
110679
+ {
110680
+ "epoch": 19.0,
110681
+ "grad_norm": Infinity,
110682
+ "learning_rate": 0.0001835968765960081,
110683
+ "loss": 11.1654,
110684
+ "step": 157890
110685
+ },
110686
+ {
110687
+ "epoch": 19.0,
110688
+ "eval_loss": 11.139603614807129,
110689
+ "eval_runtime": 118.4351,
110690
+ "eval_samples_per_second": 62.372,
110691
+ "eval_steps_per_second": 7.802,
110692
+ "step": 157890
110693
  }
110694
  ],
110695
  "logging_steps": 10,
 
110709
  "attributes": {}
110710
  }
110711
  },
110712
+ "total_flos": 3.3196933925675467e+19,
110713
  "train_batch_size": 1,
110714
  "trial_name": null,
110715
  "trial_params": null