ToastyPigeon
commited on
Training in progress, step 351, checkpoint
Browse files- last-checkpoint/adapter_model.safetensors +1 -1
- last-checkpoint/global_step351/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step351/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step351/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step351/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step351/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step351/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step351/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step351/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step351/zero_pp_rank_0_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step351/zero_pp_rank_1_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step351/zero_pp_rank_2_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step351/zero_pp_rank_3_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step351/zero_pp_rank_4_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step351/zero_pp_rank_5_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step351/zero_pp_rank_6_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step351/zero_pp_rank_7_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +284 -3
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 550593856
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:049ec0ada0813997e586979480e5c26282234a3e55448657344416c90be4b443
|
3 |
size 550593856
|
last-checkpoint/global_step351/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d47520d68a34fea7b59d08fb29119a1ee7cf60c9dcc38af2d461972a9048513d
|
3 |
+
size 243591168
|
last-checkpoint/global_step351/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:75c9062ade3c410712aea545d4c8225dec40b61c91ba419a0e810bd5548dbcd5
|
3 |
+
size 243591168
|
last-checkpoint/global_step351/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c29fcb5aa1969ed584d0132e62648df263773a5deea7782a49b4f7d7d544d877
|
3 |
+
size 243591168
|
last-checkpoint/global_step351/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:89048b4aafab116843f6ac627f9f7e16d1dc2dcc70ffae5553e43839655c7227
|
3 |
+
size 243591168
|
last-checkpoint/global_step351/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5ea0fb8acfc0d14afc7df300706b4e00cf47093b3e7ec83b3316107877ce4b51
|
3 |
+
size 243591168
|
last-checkpoint/global_step351/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d73cea9cd303ef921a1cafa7f42287c41862b8166c07075aa94b708385730e98
|
3 |
+
size 243591168
|
last-checkpoint/global_step351/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c01386ea76c02f985434f5721dc8e0b68d1146e1b5dd080d0f92aef8cbdd7856
|
3 |
+
size 243591168
|
last-checkpoint/global_step351/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f8f777796c3242494e22af251b1bc8e1e2fc4a717e81e4b570bfdb251ec44e55
|
3 |
+
size 243591168
|
last-checkpoint/global_step351/zero_pp_rank_0_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8995aabcd4b6e9742412a808c926eab9786a2d6f245f494aa6ea0dcbf2c093b8
|
3 |
+
size 211435686
|
last-checkpoint/global_step351/zero_pp_rank_1_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8df642eecd1ed86817a38a932e19f9c2058cce1a341153c2d0fbdb9ab73bc81a
|
3 |
+
size 211435686
|
last-checkpoint/global_step351/zero_pp_rank_2_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5bf99ead9d6526b5da49ed12bf60aa817186b62c92a8e3914544ae9191c342c0
|
3 |
+
size 211435686
|
last-checkpoint/global_step351/zero_pp_rank_3_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f76a8e21348e535d739f6d1ac0a4767d7d92a89a4cbec2d92a94410011b4ed6f
|
3 |
+
size 211435686
|
last-checkpoint/global_step351/zero_pp_rank_4_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0765706d29af686fb0fb59f87e74e78d9f7d68a11a741cc6ec9d2ab538e2eef2
|
3 |
+
size 211435686
|
last-checkpoint/global_step351/zero_pp_rank_5_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:aa336096e2796613b15b2357492cb9525a20c10f7dce9ae9522127817578506a
|
3 |
+
size 211435686
|
last-checkpoint/global_step351/zero_pp_rank_6_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6872a1e1763d577b225b7c9f547de78e60d18697cda8af4111b6bbe74d9b1354
|
3 |
+
size 211435686
|
last-checkpoint/global_step351/zero_pp_rank_7_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ca144d0c56353acfd2da1d43386849a4d9e20c88b087d3c19d8f31420f046557
|
3 |
+
size 211435686
|
last-checkpoint/latest
CHANGED
@@ -1 +1 @@
|
|
1 |
-
|
|
|
1 |
+
global_step351
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15920
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0fec0d859a8870bb3863562a0fbaa6ebb33536c9a365b6abc0e8f09aacd3377c
|
3 |
size 15920
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15920
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:212dc5192b40486002c7fe7e08f770847069213c90b44b8eeb5c8c552aa09d2d
|
3 |
size 15920
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15920
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ef0f0879c31278fe1422ca799f1e946f52627895e6b6b450451e9b838670c583
|
3 |
size 15920
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15920
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:66c283d16fff6ad5c32bb8063ebb0876ab2d3331a701287a5518f6611da12f69
|
3 |
size 15920
|
last-checkpoint/rng_state_4.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15920
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1d45b771d912fd42fd57bf4de4aad3e9035242ea784f2ed87aba4c621e5cd51a
|
3 |
size 15920
|
last-checkpoint/rng_state_5.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15920
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5016a6096b206da65160f901789323144aa46e697f4c7af7e972b16bb657ce2c
|
3 |
size 15920
|
last-checkpoint/rng_state_6.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15920
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b70b7d3a0a245937c94c1dfab4a674840cffae712e68cd2ec85b8111cd19b6ae
|
3 |
size 15920
|
last-checkpoint/rng_state_7.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15920
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:894e3dc130b31d1ff09091d3fba8c4756c7829f76a26489876f6acf7ddf58730
|
3 |
size 15920
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:77a901e68fe508c580838a0a83e25dcc491921acb23119eaf14194c3e5bc3346
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 39,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -2263,6 +2263,287 @@
|
|
2263 |
"eval_samples_per_second": 1.222,
|
2264 |
"eval_steps_per_second": 0.153,
|
2265 |
"step": 312
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2266 |
}
|
2267 |
],
|
2268 |
"logging_steps": 1,
|
@@ -2282,7 +2563,7 @@
|
|
2282 |
"attributes": {}
|
2283 |
}
|
2284 |
},
|
2285 |
-
"total_flos":
|
2286 |
"train_batch_size": 1,
|
2287 |
"trial_name": null,
|
2288 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.9140625,
|
5 |
"eval_steps": 39,
|
6 |
+
"global_step": 351,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
2263 |
"eval_samples_per_second": 1.222,
|
2264 |
"eval_steps_per_second": 0.153,
|
2265 |
"step": 312
|
2266 |
+
},
|
2267 |
+
{
|
2268 |
+
"epoch": 0.8151041666666666,
|
2269 |
+
"grad_norm": 0.13210051743324183,
|
2270 |
+
"learning_rate": 1.8187729717774925e-05,
|
2271 |
+
"loss": 2.2753,
|
2272 |
+
"step": 313
|
2273 |
+
},
|
2274 |
+
{
|
2275 |
+
"epoch": 0.8177083333333334,
|
2276 |
+
"grad_norm": 0.1474646492548143,
|
2277 |
+
"learning_rate": 1.7965726034785466e-05,
|
2278 |
+
"loss": 2.3933,
|
2279 |
+
"step": 314
|
2280 |
+
},
|
2281 |
+
{
|
2282 |
+
"epoch": 0.8203125,
|
2283 |
+
"grad_norm": 0.16871013478557104,
|
2284 |
+
"learning_rate": 1.7746481011411416e-05,
|
2285 |
+
"loss": 2.3614,
|
2286 |
+
"step": 315
|
2287 |
+
},
|
2288 |
+
{
|
2289 |
+
"epoch": 0.8229166666666666,
|
2290 |
+
"grad_norm": 0.15193041380102754,
|
2291 |
+
"learning_rate": 1.753001097907572e-05,
|
2292 |
+
"loss": 2.3525,
|
2293 |
+
"step": 316
|
2294 |
+
},
|
2295 |
+
{
|
2296 |
+
"epoch": 0.8255208333333334,
|
2297 |
+
"grad_norm": 0.14955167945389547,
|
2298 |
+
"learning_rate": 1.7316332062494016e-05,
|
2299 |
+
"loss": 2.4257,
|
2300 |
+
"step": 317
|
2301 |
+
},
|
2302 |
+
{
|
2303 |
+
"epoch": 0.828125,
|
2304 |
+
"grad_norm": 0.1572944020219996,
|
2305 |
+
"learning_rate": 1.710546017847347e-05,
|
2306 |
+
"loss": 2.5182,
|
2307 |
+
"step": 318
|
2308 |
+
},
|
2309 |
+
{
|
2310 |
+
"epoch": 0.8307291666666666,
|
2311 |
+
"grad_norm": 0.1493341360803509,
|
2312 |
+
"learning_rate": 1.6897411034727218e-05,
|
2313 |
+
"loss": 2.3794,
|
2314 |
+
"step": 319
|
2315 |
+
},
|
2316 |
+
{
|
2317 |
+
"epoch": 0.8333333333333334,
|
2318 |
+
"grad_norm": 0.13561727835199758,
|
2319 |
+
"learning_rate": 1.66922001287042e-05,
|
2320 |
+
"loss": 2.2277,
|
2321 |
+
"step": 320
|
2322 |
+
},
|
2323 |
+
{
|
2324 |
+
"epoch": 0.8359375,
|
2325 |
+
"grad_norm": 0.13052163343368234,
|
2326 |
+
"learning_rate": 1.648984274643487e-05,
|
2327 |
+
"loss": 2.309,
|
2328 |
+
"step": 321
|
2329 |
+
},
|
2330 |
+
{
|
2331 |
+
"epoch": 0.8385416666666666,
|
2332 |
+
"grad_norm": 0.1578566178286769,
|
2333 |
+
"learning_rate": 1.629035396139247e-05,
|
2334 |
+
"loss": 2.4832,
|
2335 |
+
"step": 322
|
2336 |
+
},
|
2337 |
+
{
|
2338 |
+
"epoch": 0.8411458333333334,
|
2339 |
+
"grad_norm": 0.16163117665249763,
|
2340 |
+
"learning_rate": 1.6093748633370295e-05,
|
2341 |
+
"loss": 2.3515,
|
2342 |
+
"step": 323
|
2343 |
+
},
|
2344 |
+
{
|
2345 |
+
"epoch": 0.84375,
|
2346 |
+
"grad_norm": 0.16589478114748782,
|
2347 |
+
"learning_rate": 1.5900041407374708e-05,
|
2348 |
+
"loss": 2.5438,
|
2349 |
+
"step": 324
|
2350 |
+
},
|
2351 |
+
{
|
2352 |
+
"epoch": 0.8463541666666666,
|
2353 |
+
"grad_norm": 0.13933316472296842,
|
2354 |
+
"learning_rate": 1.5709246712534315e-05,
|
2355 |
+
"loss": 2.4306,
|
2356 |
+
"step": 325
|
2357 |
+
},
|
2358 |
+
{
|
2359 |
+
"epoch": 0.8489583333333334,
|
2360 |
+
"grad_norm": 0.1606181863885322,
|
2361 |
+
"learning_rate": 1.5521378761025113e-05,
|
2362 |
+
"loss": 2.5248,
|
2363 |
+
"step": 326
|
2364 |
+
},
|
2365 |
+
{
|
2366 |
+
"epoch": 0.8515625,
|
2367 |
+
"grad_norm": 0.15583835916491617,
|
2368 |
+
"learning_rate": 1.5336451547011838e-05,
|
2369 |
+
"loss": 2.57,
|
2370 |
+
"step": 327
|
2371 |
+
},
|
2372 |
+
{
|
2373 |
+
"epoch": 0.8541666666666666,
|
2374 |
+
"grad_norm": 0.13760770777048018,
|
2375 |
+
"learning_rate": 1.515447884560556e-05,
|
2376 |
+
"loss": 2.4217,
|
2377 |
+
"step": 328
|
2378 |
+
},
|
2379 |
+
{
|
2380 |
+
"epoch": 0.8567708333333334,
|
2381 |
+
"grad_norm": 0.14254277680719865,
|
2382 |
+
"learning_rate": 1.4975474211837561e-05,
|
2383 |
+
"loss": 2.5538,
|
2384 |
+
"step": 329
|
2385 |
+
},
|
2386 |
+
{
|
2387 |
+
"epoch": 0.859375,
|
2388 |
+
"grad_norm": 0.15022329502854323,
|
2389 |
+
"learning_rate": 1.479945097964967e-05,
|
2390 |
+
"loss": 2.4875,
|
2391 |
+
"step": 330
|
2392 |
+
},
|
2393 |
+
{
|
2394 |
+
"epoch": 0.8619791666666666,
|
2395 |
+
"grad_norm": 0.14442101795365278,
|
2396 |
+
"learning_rate": 1.4626422260900962e-05,
|
2397 |
+
"loss": 2.4053,
|
2398 |
+
"step": 331
|
2399 |
+
},
|
2400 |
+
{
|
2401 |
+
"epoch": 0.8645833333333334,
|
2402 |
+
"grad_norm": 0.1339112894355035,
|
2403 |
+
"learning_rate": 1.4456400944391146e-05,
|
2404 |
+
"loss": 2.3938,
|
2405 |
+
"step": 332
|
2406 |
+
},
|
2407 |
+
{
|
2408 |
+
"epoch": 0.8671875,
|
2409 |
+
"grad_norm": 0.15065063782655086,
|
2410 |
+
"learning_rate": 1.4289399694900398e-05,
|
2411 |
+
"loss": 2.4288,
|
2412 |
+
"step": 333
|
2413 |
+
},
|
2414 |
+
{
|
2415 |
+
"epoch": 0.8697916666666666,
|
2416 |
+
"grad_norm": 0.15054714911892625,
|
2417 |
+
"learning_rate": 1.4125430952246071e-05,
|
2418 |
+
"loss": 2.4214,
|
2419 |
+
"step": 334
|
2420 |
+
},
|
2421 |
+
{
|
2422 |
+
"epoch": 0.8723958333333334,
|
2423 |
+
"grad_norm": 0.1560638225083581,
|
2424 |
+
"learning_rate": 1.3964506930355947e-05,
|
2425 |
+
"loss": 2.3693,
|
2426 |
+
"step": 335
|
2427 |
+
},
|
2428 |
+
{
|
2429 |
+
"epoch": 0.875,
|
2430 |
+
"grad_norm": 0.17170476718899333,
|
2431 |
+
"learning_rate": 1.380663961635852e-05,
|
2432 |
+
"loss": 2.3902,
|
2433 |
+
"step": 336
|
2434 |
+
},
|
2435 |
+
{
|
2436 |
+
"epoch": 0.8776041666666666,
|
2437 |
+
"grad_norm": 0.14853468036224993,
|
2438 |
+
"learning_rate": 1.3651840769690028e-05,
|
2439 |
+
"loss": 2.5229,
|
2440 |
+
"step": 337
|
2441 |
+
},
|
2442 |
+
{
|
2443 |
+
"epoch": 0.8802083333333334,
|
2444 |
+
"grad_norm": 0.13862935132651075,
|
2445 |
+
"learning_rate": 1.350012192121854e-05,
|
2446 |
+
"loss": 2.272,
|
2447 |
+
"step": 338
|
2448 |
+
},
|
2449 |
+
{
|
2450 |
+
"epoch": 0.8828125,
|
2451 |
+
"grad_norm": 0.16359619179880716,
|
2452 |
+
"learning_rate": 1.3351494372384995e-05,
|
2453 |
+
"loss": 2.5031,
|
2454 |
+
"step": 339
|
2455 |
+
},
|
2456 |
+
{
|
2457 |
+
"epoch": 0.8854166666666666,
|
2458 |
+
"grad_norm": 0.15198032362712752,
|
2459 |
+
"learning_rate": 1.3205969194361395e-05,
|
2460 |
+
"loss": 2.355,
|
2461 |
+
"step": 340
|
2462 |
+
},
|
2463 |
+
{
|
2464 |
+
"epoch": 0.8880208333333334,
|
2465 |
+
"grad_norm": 0.12892013869101834,
|
2466 |
+
"learning_rate": 1.3063557227226094e-05,
|
2467 |
+
"loss": 2.3602,
|
2468 |
+
"step": 341
|
2469 |
+
},
|
2470 |
+
{
|
2471 |
+
"epoch": 0.890625,
|
2472 |
+
"grad_norm": 0.1498170376596413,
|
2473 |
+
"learning_rate": 1.292426907915634e-05,
|
2474 |
+
"loss": 2.3912,
|
2475 |
+
"step": 342
|
2476 |
+
},
|
2477 |
+
{
|
2478 |
+
"epoch": 0.8932291666666666,
|
2479 |
+
"grad_norm": 0.15638364831022664,
|
2480 |
+
"learning_rate": 1.2788115125638068e-05,
|
2481 |
+
"loss": 2.4359,
|
2482 |
+
"step": 343
|
2483 |
+
},
|
2484 |
+
{
|
2485 |
+
"epoch": 0.8958333333333334,
|
2486 |
+
"grad_norm": 0.151291537071441,
|
2487 |
+
"learning_rate": 1.2655105508693065e-05,
|
2488 |
+
"loss": 2.3082,
|
2489 |
+
"step": 344
|
2490 |
+
},
|
2491 |
+
{
|
2492 |
+
"epoch": 0.8984375,
|
2493 |
+
"grad_norm": 0.14865959228520678,
|
2494 |
+
"learning_rate": 1.252525013612346e-05,
|
2495 |
+
"loss": 2.3877,
|
2496 |
+
"step": 345
|
2497 |
+
},
|
2498 |
+
{
|
2499 |
+
"epoch": 0.9010416666666666,
|
2500 |
+
"grad_norm": 0.1459248998632338,
|
2501 |
+
"learning_rate": 1.2398558680773736e-05,
|
2502 |
+
"loss": 2.3293,
|
2503 |
+
"step": 346
|
2504 |
+
},
|
2505 |
+
{
|
2506 |
+
"epoch": 0.9036458333333334,
|
2507 |
+
"grad_norm": 0.14220792400443616,
|
2508 |
+
"learning_rate": 1.227504057981016e-05,
|
2509 |
+
"loss": 2.4427,
|
2510 |
+
"step": 347
|
2511 |
+
},
|
2512 |
+
{
|
2513 |
+
"epoch": 0.90625,
|
2514 |
+
"grad_norm": 0.15415554255450412,
|
2515 |
+
"learning_rate": 1.2154705034017866e-05,
|
2516 |
+
"loss": 2.383,
|
2517 |
+
"step": 348
|
2518 |
+
},
|
2519 |
+
{
|
2520 |
+
"epoch": 0.9088541666666666,
|
2521 |
+
"grad_norm": 0.1457198339110242,
|
2522 |
+
"learning_rate": 1.203756100711545e-05,
|
2523 |
+
"loss": 2.3751,
|
2524 |
+
"step": 349
|
2525 |
+
},
|
2526 |
+
{
|
2527 |
+
"epoch": 0.9114583333333334,
|
2528 |
+
"grad_norm": 0.15986391066488098,
|
2529 |
+
"learning_rate": 1.1923617225087293e-05,
|
2530 |
+
"loss": 2.3891,
|
2531 |
+
"step": 350
|
2532 |
+
},
|
2533 |
+
{
|
2534 |
+
"epoch": 0.9140625,
|
2535 |
+
"grad_norm": 0.13681607679501942,
|
2536 |
+
"learning_rate": 1.1812882175533564e-05,
|
2537 |
+
"loss": 2.2878,
|
2538 |
+
"step": 351
|
2539 |
+
},
|
2540 |
+
{
|
2541 |
+
"epoch": 0.9140625,
|
2542 |
+
"eval_loss": 2.397103786468506,
|
2543 |
+
"eval_runtime": 65.1202,
|
2544 |
+
"eval_samples_per_second": 1.228,
|
2545 |
+
"eval_steps_per_second": 0.154,
|
2546 |
+
"step": 351
|
2547 |
}
|
2548 |
],
|
2549 |
"logging_steps": 1,
|
|
|
2563 |
"attributes": {}
|
2564 |
}
|
2565 |
},
|
2566 |
+
"total_flos": 116032970686464.0,
|
2567 |
"train_batch_size": 1,
|
2568 |
"trial_name": null,
|
2569 |
"trial_params": null
|