ToastyPigeon commited on
Commit
5a42b6f
·
verified ·
1 Parent(s): 08f7db1

Training in progress, step 351, checkpoint

Browse files
Files changed (28) hide show
  1. last-checkpoint/adapter_model.safetensors +1 -1
  2. last-checkpoint/global_step351/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
  3. last-checkpoint/global_step351/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
  4. last-checkpoint/global_step351/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
  5. last-checkpoint/global_step351/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
  6. last-checkpoint/global_step351/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +3 -0
  7. last-checkpoint/global_step351/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +3 -0
  8. last-checkpoint/global_step351/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +3 -0
  9. last-checkpoint/global_step351/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +3 -0
  10. last-checkpoint/global_step351/zero_pp_rank_0_mp_rank_00_model_states.pt +3 -0
  11. last-checkpoint/global_step351/zero_pp_rank_1_mp_rank_00_model_states.pt +3 -0
  12. last-checkpoint/global_step351/zero_pp_rank_2_mp_rank_00_model_states.pt +3 -0
  13. last-checkpoint/global_step351/zero_pp_rank_3_mp_rank_00_model_states.pt +3 -0
  14. last-checkpoint/global_step351/zero_pp_rank_4_mp_rank_00_model_states.pt +3 -0
  15. last-checkpoint/global_step351/zero_pp_rank_5_mp_rank_00_model_states.pt +3 -0
  16. last-checkpoint/global_step351/zero_pp_rank_6_mp_rank_00_model_states.pt +3 -0
  17. last-checkpoint/global_step351/zero_pp_rank_7_mp_rank_00_model_states.pt +3 -0
  18. last-checkpoint/latest +1 -1
  19. last-checkpoint/rng_state_0.pth +1 -1
  20. last-checkpoint/rng_state_1.pth +1 -1
  21. last-checkpoint/rng_state_2.pth +1 -1
  22. last-checkpoint/rng_state_3.pth +1 -1
  23. last-checkpoint/rng_state_4.pth +1 -1
  24. last-checkpoint/rng_state_5.pth +1 -1
  25. last-checkpoint/rng_state_6.pth +1 -1
  26. last-checkpoint/rng_state_7.pth +1 -1
  27. last-checkpoint/scheduler.pt +1 -1
  28. last-checkpoint/trainer_state.json +284 -3
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0809bafbece4702b690076073d24e954afb4de759d2258a07d2ced1ca7d828ec
3
  size 550593856
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:049ec0ada0813997e586979480e5c26282234a3e55448657344416c90be4b443
3
  size 550593856
last-checkpoint/global_step351/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d47520d68a34fea7b59d08fb29119a1ee7cf60c9dcc38af2d461972a9048513d
3
+ size 243591168
last-checkpoint/global_step351/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:75c9062ade3c410712aea545d4c8225dec40b61c91ba419a0e810bd5548dbcd5
3
+ size 243591168
last-checkpoint/global_step351/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c29fcb5aa1969ed584d0132e62648df263773a5deea7782a49b4f7d7d544d877
3
+ size 243591168
last-checkpoint/global_step351/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:89048b4aafab116843f6ac627f9f7e16d1dc2dcc70ffae5553e43839655c7227
3
+ size 243591168
last-checkpoint/global_step351/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5ea0fb8acfc0d14afc7df300706b4e00cf47093b3e7ec83b3316107877ce4b51
3
+ size 243591168
last-checkpoint/global_step351/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d73cea9cd303ef921a1cafa7f42287c41862b8166c07075aa94b708385730e98
3
+ size 243591168
last-checkpoint/global_step351/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c01386ea76c02f985434f5721dc8e0b68d1146e1b5dd080d0f92aef8cbdd7856
3
+ size 243591168
last-checkpoint/global_step351/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f8f777796c3242494e22af251b1bc8e1e2fc4a717e81e4b570bfdb251ec44e55
3
+ size 243591168
last-checkpoint/global_step351/zero_pp_rank_0_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8995aabcd4b6e9742412a808c926eab9786a2d6f245f494aa6ea0dcbf2c093b8
3
+ size 211435686
last-checkpoint/global_step351/zero_pp_rank_1_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8df642eecd1ed86817a38a932e19f9c2058cce1a341153c2d0fbdb9ab73bc81a
3
+ size 211435686
last-checkpoint/global_step351/zero_pp_rank_2_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5bf99ead9d6526b5da49ed12bf60aa817186b62c92a8e3914544ae9191c342c0
3
+ size 211435686
last-checkpoint/global_step351/zero_pp_rank_3_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f76a8e21348e535d739f6d1ac0a4767d7d92a89a4cbec2d92a94410011b4ed6f
3
+ size 211435686
last-checkpoint/global_step351/zero_pp_rank_4_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0765706d29af686fb0fb59f87e74e78d9f7d68a11a741cc6ec9d2ab538e2eef2
3
+ size 211435686
last-checkpoint/global_step351/zero_pp_rank_5_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aa336096e2796613b15b2357492cb9525a20c10f7dce9ae9522127817578506a
3
+ size 211435686
last-checkpoint/global_step351/zero_pp_rank_6_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6872a1e1763d577b225b7c9f547de78e60d18697cda8af4111b6bbe74d9b1354
3
+ size 211435686
last-checkpoint/global_step351/zero_pp_rank_7_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca144d0c56353acfd2da1d43386849a4d9e20c88b087d3c19d8f31420f046557
3
+ size 211435686
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step312
 
1
+ global_step351
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9662f90ac871284e42fb85b7bc6f2a2f3759cac7bb678faaa94f777fedabb313
3
  size 15920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0fec0d859a8870bb3863562a0fbaa6ebb33536c9a365b6abc0e8f09aacd3377c
3
  size 15920
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b2f3b2288ad04448267691f0a9a926d6ea4e1872e2d6a6114ff6a455551a1a14
3
  size 15920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:212dc5192b40486002c7fe7e08f770847069213c90b44b8eeb5c8c552aa09d2d
3
  size 15920
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d7054584e190f07a5be6417b3d068feba1c297bfef11d44338e024157eac1eea
3
  size 15920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef0f0879c31278fe1422ca799f1e946f52627895e6b6b450451e9b838670c583
3
  size 15920
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:699ca77f5f867262084cafd79c14bfb3ebb441808dca86d851f008f9e856702e
3
  size 15920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:66c283d16fff6ad5c32bb8063ebb0876ab2d3331a701287a5518f6611da12f69
3
  size 15920
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6107f40d55050f71acd0cf64cee3cb55f7a5f2f65beb01564a3368189c88b78a
3
  size 15920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d45b771d912fd42fd57bf4de4aad3e9035242ea784f2ed87aba4c621e5cd51a
3
  size 15920
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:34860f3d27e6296937a819f44272b9427aaea2bb9384f4f850d4d368c07caa62
3
  size 15920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5016a6096b206da65160f901789323144aa46e697f4c7af7e972b16bb657ce2c
3
  size 15920
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0486011b840a4dd1cc3653fce36b9eeabdeaf4aaa824c495c5201d1b4398ad03
3
  size 15920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b70b7d3a0a245937c94c1dfab4a674840cffae712e68cd2ec85b8111cd19b6ae
3
  size 15920
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8865eac23bdd917b7f787c6c0a4ad6441de4e3e64c1376b83d0e8ca0e795d614
3
  size 15920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:894e3dc130b31d1ff09091d3fba8c4756c7829f76a26489876f6acf7ddf58730
3
  size 15920
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cedf4b34e868c4fb23dc007fcfc750ae8b8c1963085e5e7d64434ad8b0f1b971
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:77a901e68fe508c580838a0a83e25dcc491921acb23119eaf14194c3e5bc3346
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.8125,
5
  "eval_steps": 39,
6
- "global_step": 312,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2263,6 +2263,287 @@
2263
  "eval_samples_per_second": 1.222,
2264
  "eval_steps_per_second": 0.153,
2265
  "step": 312
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2266
  }
2267
  ],
2268
  "logging_steps": 1,
@@ -2282,7 +2563,7 @@
2282
  "attributes": {}
2283
  }
2284
  },
2285
- "total_flos": 103140418387968.0,
2286
  "train_batch_size": 1,
2287
  "trial_name": null,
2288
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.9140625,
5
  "eval_steps": 39,
6
+ "global_step": 351,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2263
  "eval_samples_per_second": 1.222,
2264
  "eval_steps_per_second": 0.153,
2265
  "step": 312
2266
+ },
2267
+ {
2268
+ "epoch": 0.8151041666666666,
2269
+ "grad_norm": 0.13210051743324183,
2270
+ "learning_rate": 1.8187729717774925e-05,
2271
+ "loss": 2.2753,
2272
+ "step": 313
2273
+ },
2274
+ {
2275
+ "epoch": 0.8177083333333334,
2276
+ "grad_norm": 0.1474646492548143,
2277
+ "learning_rate": 1.7965726034785466e-05,
2278
+ "loss": 2.3933,
2279
+ "step": 314
2280
+ },
2281
+ {
2282
+ "epoch": 0.8203125,
2283
+ "grad_norm": 0.16871013478557104,
2284
+ "learning_rate": 1.7746481011411416e-05,
2285
+ "loss": 2.3614,
2286
+ "step": 315
2287
+ },
2288
+ {
2289
+ "epoch": 0.8229166666666666,
2290
+ "grad_norm": 0.15193041380102754,
2291
+ "learning_rate": 1.753001097907572e-05,
2292
+ "loss": 2.3525,
2293
+ "step": 316
2294
+ },
2295
+ {
2296
+ "epoch": 0.8255208333333334,
2297
+ "grad_norm": 0.14955167945389547,
2298
+ "learning_rate": 1.7316332062494016e-05,
2299
+ "loss": 2.4257,
2300
+ "step": 317
2301
+ },
2302
+ {
2303
+ "epoch": 0.828125,
2304
+ "grad_norm": 0.1572944020219996,
2305
+ "learning_rate": 1.710546017847347e-05,
2306
+ "loss": 2.5182,
2307
+ "step": 318
2308
+ },
2309
+ {
2310
+ "epoch": 0.8307291666666666,
2311
+ "grad_norm": 0.1493341360803509,
2312
+ "learning_rate": 1.6897411034727218e-05,
2313
+ "loss": 2.3794,
2314
+ "step": 319
2315
+ },
2316
+ {
2317
+ "epoch": 0.8333333333333334,
2318
+ "grad_norm": 0.13561727835199758,
2319
+ "learning_rate": 1.66922001287042e-05,
2320
+ "loss": 2.2277,
2321
+ "step": 320
2322
+ },
2323
+ {
2324
+ "epoch": 0.8359375,
2325
+ "grad_norm": 0.13052163343368234,
2326
+ "learning_rate": 1.648984274643487e-05,
2327
+ "loss": 2.309,
2328
+ "step": 321
2329
+ },
2330
+ {
2331
+ "epoch": 0.8385416666666666,
2332
+ "grad_norm": 0.1578566178286769,
2333
+ "learning_rate": 1.629035396139247e-05,
2334
+ "loss": 2.4832,
2335
+ "step": 322
2336
+ },
2337
+ {
2338
+ "epoch": 0.8411458333333334,
2339
+ "grad_norm": 0.16163117665249763,
2340
+ "learning_rate": 1.6093748633370295e-05,
2341
+ "loss": 2.3515,
2342
+ "step": 323
2343
+ },
2344
+ {
2345
+ "epoch": 0.84375,
2346
+ "grad_norm": 0.16589478114748782,
2347
+ "learning_rate": 1.5900041407374708e-05,
2348
+ "loss": 2.5438,
2349
+ "step": 324
2350
+ },
2351
+ {
2352
+ "epoch": 0.8463541666666666,
2353
+ "grad_norm": 0.13933316472296842,
2354
+ "learning_rate": 1.5709246712534315e-05,
2355
+ "loss": 2.4306,
2356
+ "step": 325
2357
+ },
2358
+ {
2359
+ "epoch": 0.8489583333333334,
2360
+ "grad_norm": 0.1606181863885322,
2361
+ "learning_rate": 1.5521378761025113e-05,
2362
+ "loss": 2.5248,
2363
+ "step": 326
2364
+ },
2365
+ {
2366
+ "epoch": 0.8515625,
2367
+ "grad_norm": 0.15583835916491617,
2368
+ "learning_rate": 1.5336451547011838e-05,
2369
+ "loss": 2.57,
2370
+ "step": 327
2371
+ },
2372
+ {
2373
+ "epoch": 0.8541666666666666,
2374
+ "grad_norm": 0.13760770777048018,
2375
+ "learning_rate": 1.515447884560556e-05,
2376
+ "loss": 2.4217,
2377
+ "step": 328
2378
+ },
2379
+ {
2380
+ "epoch": 0.8567708333333334,
2381
+ "grad_norm": 0.14254277680719865,
2382
+ "learning_rate": 1.4975474211837561e-05,
2383
+ "loss": 2.5538,
2384
+ "step": 329
2385
+ },
2386
+ {
2387
+ "epoch": 0.859375,
2388
+ "grad_norm": 0.15022329502854323,
2389
+ "learning_rate": 1.479945097964967e-05,
2390
+ "loss": 2.4875,
2391
+ "step": 330
2392
+ },
2393
+ {
2394
+ "epoch": 0.8619791666666666,
2395
+ "grad_norm": 0.14442101795365278,
2396
+ "learning_rate": 1.4626422260900962e-05,
2397
+ "loss": 2.4053,
2398
+ "step": 331
2399
+ },
2400
+ {
2401
+ "epoch": 0.8645833333333334,
2402
+ "grad_norm": 0.1339112894355035,
2403
+ "learning_rate": 1.4456400944391146e-05,
2404
+ "loss": 2.3938,
2405
+ "step": 332
2406
+ },
2407
+ {
2408
+ "epoch": 0.8671875,
2409
+ "grad_norm": 0.15065063782655086,
2410
+ "learning_rate": 1.4289399694900398e-05,
2411
+ "loss": 2.4288,
2412
+ "step": 333
2413
+ },
2414
+ {
2415
+ "epoch": 0.8697916666666666,
2416
+ "grad_norm": 0.15054714911892625,
2417
+ "learning_rate": 1.4125430952246071e-05,
2418
+ "loss": 2.4214,
2419
+ "step": 334
2420
+ },
2421
+ {
2422
+ "epoch": 0.8723958333333334,
2423
+ "grad_norm": 0.1560638225083581,
2424
+ "learning_rate": 1.3964506930355947e-05,
2425
+ "loss": 2.3693,
2426
+ "step": 335
2427
+ },
2428
+ {
2429
+ "epoch": 0.875,
2430
+ "grad_norm": 0.17170476718899333,
2431
+ "learning_rate": 1.380663961635852e-05,
2432
+ "loss": 2.3902,
2433
+ "step": 336
2434
+ },
2435
+ {
2436
+ "epoch": 0.8776041666666666,
2437
+ "grad_norm": 0.14853468036224993,
2438
+ "learning_rate": 1.3651840769690028e-05,
2439
+ "loss": 2.5229,
2440
+ "step": 337
2441
+ },
2442
+ {
2443
+ "epoch": 0.8802083333333334,
2444
+ "grad_norm": 0.13862935132651075,
2445
+ "learning_rate": 1.350012192121854e-05,
2446
+ "loss": 2.272,
2447
+ "step": 338
2448
+ },
2449
+ {
2450
+ "epoch": 0.8828125,
2451
+ "grad_norm": 0.16359619179880716,
2452
+ "learning_rate": 1.3351494372384995e-05,
2453
+ "loss": 2.5031,
2454
+ "step": 339
2455
+ },
2456
+ {
2457
+ "epoch": 0.8854166666666666,
2458
+ "grad_norm": 0.15198032362712752,
2459
+ "learning_rate": 1.3205969194361395e-05,
2460
+ "loss": 2.355,
2461
+ "step": 340
2462
+ },
2463
+ {
2464
+ "epoch": 0.8880208333333334,
2465
+ "grad_norm": 0.12892013869101834,
2466
+ "learning_rate": 1.3063557227226094e-05,
2467
+ "loss": 2.3602,
2468
+ "step": 341
2469
+ },
2470
+ {
2471
+ "epoch": 0.890625,
2472
+ "grad_norm": 0.1498170376596413,
2473
+ "learning_rate": 1.292426907915634e-05,
2474
+ "loss": 2.3912,
2475
+ "step": 342
2476
+ },
2477
+ {
2478
+ "epoch": 0.8932291666666666,
2479
+ "grad_norm": 0.15638364831022664,
2480
+ "learning_rate": 1.2788115125638068e-05,
2481
+ "loss": 2.4359,
2482
+ "step": 343
2483
+ },
2484
+ {
2485
+ "epoch": 0.8958333333333334,
2486
+ "grad_norm": 0.151291537071441,
2487
+ "learning_rate": 1.2655105508693065e-05,
2488
+ "loss": 2.3082,
2489
+ "step": 344
2490
+ },
2491
+ {
2492
+ "epoch": 0.8984375,
2493
+ "grad_norm": 0.14865959228520678,
2494
+ "learning_rate": 1.252525013612346e-05,
2495
+ "loss": 2.3877,
2496
+ "step": 345
2497
+ },
2498
+ {
2499
+ "epoch": 0.9010416666666666,
2500
+ "grad_norm": 0.1459248998632338,
2501
+ "learning_rate": 1.2398558680773736e-05,
2502
+ "loss": 2.3293,
2503
+ "step": 346
2504
+ },
2505
+ {
2506
+ "epoch": 0.9036458333333334,
2507
+ "grad_norm": 0.14220792400443616,
2508
+ "learning_rate": 1.227504057981016e-05,
2509
+ "loss": 2.4427,
2510
+ "step": 347
2511
+ },
2512
+ {
2513
+ "epoch": 0.90625,
2514
+ "grad_norm": 0.15415554255450412,
2515
+ "learning_rate": 1.2154705034017866e-05,
2516
+ "loss": 2.383,
2517
+ "step": 348
2518
+ },
2519
+ {
2520
+ "epoch": 0.9088541666666666,
2521
+ "grad_norm": 0.1457198339110242,
2522
+ "learning_rate": 1.203756100711545e-05,
2523
+ "loss": 2.3751,
2524
+ "step": 349
2525
+ },
2526
+ {
2527
+ "epoch": 0.9114583333333334,
2528
+ "grad_norm": 0.15986391066488098,
2529
+ "learning_rate": 1.1923617225087293e-05,
2530
+ "loss": 2.3891,
2531
+ "step": 350
2532
+ },
2533
+ {
2534
+ "epoch": 0.9140625,
2535
+ "grad_norm": 0.13681607679501942,
2536
+ "learning_rate": 1.1812882175533564e-05,
2537
+ "loss": 2.2878,
2538
+ "step": 351
2539
+ },
2540
+ {
2541
+ "epoch": 0.9140625,
2542
+ "eval_loss": 2.397103786468506,
2543
+ "eval_runtime": 65.1202,
2544
+ "eval_samples_per_second": 1.228,
2545
+ "eval_steps_per_second": 0.154,
2546
+ "step": 351
2547
  }
2548
  ],
2549
  "logging_steps": 1,
 
2563
  "attributes": {}
2564
  }
2565
  },
2566
+ "total_flos": 116032970686464.0,
2567
  "train_batch_size": 1,
2568
  "trial_name": null,
2569
  "trial_params": null