Training in progress, step 59808, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1484196216
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fbb44ad69600872ef0bb443d3b43f12a5f06e90608b73f03ff4028fa72c313ed
|
3 |
size 1484196216
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 2968683840
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:af01e6e7f36514043e3ce67e600b1d8d265d938af879be15564a49cf1e029d6f
|
3 |
size 2968683840
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ae79343f53c51d705aaa2e789fb3a4d88d0a467b2f986398151d95d4faac2c55
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:023b85de6d1b039ecc813c0216bc60820d589f64ee137f692f288c5c4fc4729e
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 2.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -418273,6 +418273,412 @@
|
|
418273 |
"learning_rate": 2.3282539897695464e-10,
|
418274 |
"loss": 1.4932,
|
418275 |
"step": 59750
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
418276 |
}
|
418277 |
],
|
418278 |
"logging_steps": 1,
|
@@ -418287,12 +418693,12 @@
|
|
418287 |
"should_evaluate": false,
|
418288 |
"should_log": false,
|
418289 |
"should_save": true,
|
418290 |
-
"should_training_stop":
|
418291 |
},
|
418292 |
"attributes": {}
|
418293 |
}
|
418294 |
},
|
418295 |
-
"total_flos": 5.
|
418296 |
"train_batch_size": 2,
|
418297 |
"trial_name": null,
|
418298 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 2.9999373017166793,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 59808,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
418273 |
"learning_rate": 2.3282539897695464e-10,
|
418274 |
"loss": 1.4932,
|
418275 |
"step": 59750
|
418276 |
+
},
|
418277 |
+
{
|
418278 |
+
"epoch": 2.997078259997241,
|
418279 |
+
"grad_norm": 2.9519903659820557,
|
418280 |
+
"learning_rate": 2.2486615378625176e-10,
|
418281 |
+
"loss": 1.1974,
|
418282 |
+
"step": 59751
|
418283 |
+
},
|
418284 |
+
{
|
418285 |
+
"epoch": 2.9971284186238982,
|
418286 |
+
"grad_norm": 2.8692264556884766,
|
418287 |
+
"learning_rate": 2.1704532984223592e-10,
|
418288 |
+
"loss": 1.525,
|
418289 |
+
"step": 59752
|
418290 |
+
},
|
418291 |
+
{
|
418292 |
+
"epoch": 2.997178577250555,
|
418293 |
+
"grad_norm": 2.799671173095703,
|
418294 |
+
"learning_rate": 2.0936292716711158e-10,
|
418295 |
+
"loss": 1.2972,
|
418296 |
+
"step": 59753
|
418297 |
+
},
|
418298 |
+
{
|
418299 |
+
"epoch": 2.997228735877212,
|
418300 |
+
"grad_norm": 3.265291690826416,
|
418301 |
+
"learning_rate": 2.0181894577198103e-10,
|
418302 |
+
"loss": 1.5709,
|
418303 |
+
"step": 59754
|
418304 |
+
},
|
418305 |
+
{
|
418306 |
+
"epoch": 2.9972788945038684,
|
418307 |
+
"grad_norm": 2.9593746662139893,
|
418308 |
+
"learning_rate": 1.944133856901509e-10,
|
418309 |
+
"loss": 1.4483,
|
418310 |
+
"step": 59755
|
418311 |
+
},
|
418312 |
+
{
|
418313 |
+
"epoch": 2.9973290531305254,
|
418314 |
+
"grad_norm": 2.9063448905944824,
|
418315 |
+
"learning_rate": 1.8714624693272342e-10,
|
418316 |
+
"loss": 1.113,
|
418317 |
+
"step": 59756
|
418318 |
+
},
|
418319 |
+
{
|
418320 |
+
"epoch": 2.997379211757182,
|
418321 |
+
"grad_norm": 2.510237693786621,
|
418322 |
+
"learning_rate": 1.800175295274542e-10,
|
418323 |
+
"loss": 0.9191,
|
418324 |
+
"step": 59757
|
418325 |
+
},
|
418326 |
+
{
|
418327 |
+
"epoch": 2.997429370383839,
|
418328 |
+
"grad_norm": 3.348395824432373,
|
418329 |
+
"learning_rate": 1.730272334909966e-10,
|
418330 |
+
"loss": 1.4245,
|
418331 |
+
"step": 59758
|
418332 |
+
},
|
418333 |
+
{
|
418334 |
+
"epoch": 2.9974795290104956,
|
418335 |
+
"grad_norm": 3.254422426223755,
|
418336 |
+
"learning_rate": 1.661753588400039e-10,
|
418337 |
+
"loss": 1.3145,
|
418338 |
+
"step": 59759
|
418339 |
+
},
|
418340 |
+
{
|
418341 |
+
"epoch": 2.9975296876371527,
|
418342 |
+
"grad_norm": 3.8515803813934326,
|
418343 |
+
"learning_rate": 1.5946190559668063e-10,
|
418344 |
+
"loss": 1.3135,
|
418345 |
+
"step": 59760
|
418346 |
+
},
|
418347 |
+
{
|
418348 |
+
"epoch": 2.9975798462638092,
|
418349 |
+
"grad_norm": 3.0799078941345215,
|
418350 |
+
"learning_rate": 1.5288687377768008e-10,
|
418351 |
+
"loss": 1.2047,
|
418352 |
+
"step": 59761
|
418353 |
+
},
|
418354 |
+
{
|
418355 |
+
"epoch": 2.997630004890466,
|
418356 |
+
"grad_norm": 3.328263998031616,
|
418357 |
+
"learning_rate": 1.4645026339965562e-10,
|
418358 |
+
"loss": 1.4763,
|
418359 |
+
"step": 59762
|
418360 |
+
},
|
418361 |
+
{
|
418362 |
+
"epoch": 2.997680163517123,
|
418363 |
+
"grad_norm": 3.2846860885620117,
|
418364 |
+
"learning_rate": 1.4015207448481172e-10,
|
418365 |
+
"loss": 1.349,
|
418366 |
+
"step": 59763
|
418367 |
+
},
|
418368 |
+
{
|
418369 |
+
"epoch": 2.99773032214378,
|
418370 |
+
"grad_norm": 3.0045347213745117,
|
418371 |
+
"learning_rate": 1.339923070498017e-10,
|
418372 |
+
"loss": 1.3809,
|
418373 |
+
"step": 59764
|
418374 |
+
},
|
418375 |
+
{
|
418376 |
+
"epoch": 2.9977804807704365,
|
418377 |
+
"grad_norm": 3.032623052597046,
|
418378 |
+
"learning_rate": 1.2797096111127893e-10,
|
418379 |
+
"loss": 1.155,
|
418380 |
+
"step": 59765
|
418381 |
+
},
|
418382 |
+
{
|
418383 |
+
"epoch": 2.997830639397093,
|
418384 |
+
"grad_norm": 3.425898313522339,
|
418385 |
+
"learning_rate": 1.2208803668034563e-10,
|
418386 |
+
"loss": 1.4988,
|
418387 |
+
"step": 59766
|
418388 |
+
},
|
418389 |
+
{
|
418390 |
+
"epoch": 2.99788079802375,
|
418391 |
+
"grad_norm": 3.9673779010772705,
|
418392 |
+
"learning_rate": 1.1634353377920626e-10,
|
418393 |
+
"loss": 1.5579,
|
418394 |
+
"step": 59767
|
418395 |
+
},
|
418396 |
+
{
|
418397 |
+
"epoch": 2.997930956650407,
|
418398 |
+
"grad_norm": 3.9234225749969482,
|
418399 |
+
"learning_rate": 1.1073745242451417e-10,
|
418400 |
+
"loss": 1.2646,
|
418401 |
+
"step": 59768
|
418402 |
+
},
|
418403 |
+
{
|
418404 |
+
"epoch": 2.9979811152770637,
|
418405 |
+
"grad_norm": 4.005583763122559,
|
418406 |
+
"learning_rate": 1.052697926329227e-10,
|
418407 |
+
"loss": 1.1601,
|
418408 |
+
"step": 59769
|
418409 |
+
},
|
418410 |
+
{
|
418411 |
+
"epoch": 2.9980312739037203,
|
418412 |
+
"grad_norm": 4.037759304046631,
|
418413 |
+
"learning_rate": 9.994055440998296e-11,
|
418414 |
+
"loss": 1.0708,
|
418415 |
+
"step": 59770
|
418416 |
+
},
|
418417 |
+
{
|
418418 |
+
"epoch": 2.9980814325303773,
|
418419 |
+
"grad_norm": 4.791607856750488,
|
418420 |
+
"learning_rate": 9.474973778345053e-11,
|
418421 |
+
"loss": 1.1979,
|
418422 |
+
"step": 59771
|
418423 |
+
},
|
418424 |
+
{
|
418425 |
+
"epoch": 2.9981315911570343,
|
418426 |
+
"grad_norm": 5.135807514190674,
|
418427 |
+
"learning_rate": 8.969734275332542e-11,
|
418428 |
+
"loss": 1.0534,
|
418429 |
+
"step": 59772
|
418430 |
+
},
|
418431 |
+
{
|
418432 |
+
"epoch": 2.998181749783691,
|
418433 |
+
"grad_norm": 3.4575726985931396,
|
418434 |
+
"learning_rate": 8.478336934736319e-11,
|
418435 |
+
"loss": 1.4946,
|
418436 |
+
"step": 59773
|
418437 |
+
},
|
418438 |
+
{
|
418439 |
+
"epoch": 2.9982319084103475,
|
418440 |
+
"grad_norm": 1.5949138402938843,
|
418441 |
+
"learning_rate": 8.000781757111498e-11,
|
418442 |
+
"loss": 1.1375,
|
418443 |
+
"step": 59774
|
418444 |
+
},
|
418445 |
+
{
|
418446 |
+
"epoch": 2.9982820670370045,
|
418447 |
+
"grad_norm": 2.864654779434204,
|
418448 |
+
"learning_rate": 7.53706874412341e-11,
|
418449 |
+
"loss": 1.275,
|
418450 |
+
"step": 59775
|
418451 |
+
},
|
418452 |
+
{
|
418453 |
+
"epoch": 2.9983322256636615,
|
418454 |
+
"grad_norm": 2.6163508892059326,
|
418455 |
+
"learning_rate": 7.087197896882281e-11,
|
418456 |
+
"loss": 1.0021,
|
418457 |
+
"step": 59776
|
418458 |
+
},
|
418459 |
+
{
|
418460 |
+
"epoch": 2.998382384290318,
|
418461 |
+
"grad_norm": 3.790781259536743,
|
418462 |
+
"learning_rate": 6.651169217053443e-11,
|
418463 |
+
"loss": 1.2672,
|
418464 |
+
"step": 59777
|
418465 |
+
},
|
418466 |
+
{
|
418467 |
+
"epoch": 2.9984325429169747,
|
418468 |
+
"grad_norm": 2.571331262588501,
|
418469 |
+
"learning_rate": 6.228982704636899e-11,
|
418470 |
+
"loss": 0.5672,
|
418471 |
+
"step": 59778
|
418472 |
+
},
|
418473 |
+
{
|
418474 |
+
"epoch": 2.9984827015436317,
|
418475 |
+
"grad_norm": 3.5116817951202393,
|
418476 |
+
"learning_rate": 5.820638362408204e-11,
|
418477 |
+
"loss": 2.0333,
|
418478 |
+
"step": 59779
|
418479 |
+
},
|
418480 |
+
{
|
418481 |
+
"epoch": 2.9985328601702887,
|
418482 |
+
"grad_norm": 3.2379603385925293,
|
418483 |
+
"learning_rate": 5.426136190367359e-11,
|
418484 |
+
"loss": 1.7546,
|
418485 |
+
"step": 59780
|
418486 |
+
},
|
418487 |
+
{
|
418488 |
+
"epoch": 2.9985830187969453,
|
418489 |
+
"grad_norm": 2.421940803527832,
|
418490 |
+
"learning_rate": 5.045476190179699e-11,
|
418491 |
+
"loss": 1.0303,
|
418492 |
+
"step": 59781
|
418493 |
+
},
|
418494 |
+
{
|
418495 |
+
"epoch": 2.998633177423602,
|
418496 |
+
"grad_norm": 3.13572359085083,
|
418497 |
+
"learning_rate": 4.6786583624003346e-11,
|
418498 |
+
"loss": 1.247,
|
418499 |
+
"step": 59782
|
418500 |
+
},
|
418501 |
+
{
|
418502 |
+
"epoch": 2.998683336050259,
|
418503 |
+
"grad_norm": 2.828307628631592,
|
418504 |
+
"learning_rate": 4.3256827081394894e-11,
|
418505 |
+
"loss": 1.2053,
|
418506 |
+
"step": 59783
|
418507 |
+
},
|
418508 |
+
{
|
418509 |
+
"epoch": 2.998733494676916,
|
418510 |
+
"grad_norm": 3.0084455013275146,
|
418511 |
+
"learning_rate": 3.9865492290624973e-11,
|
418512 |
+
"loss": 1.4298,
|
418513 |
+
"step": 59784
|
418514 |
+
},
|
418515 |
+
{
|
418516 |
+
"epoch": 2.9987836533035725,
|
418517 |
+
"grad_norm": 3.4063498973846436,
|
418518 |
+
"learning_rate": 3.6612579257244704e-11,
|
418519 |
+
"loss": 1.7157,
|
418520 |
+
"step": 59785
|
418521 |
+
},
|
418522 |
+
{
|
418523 |
+
"epoch": 2.998833811930229,
|
418524 |
+
"grad_norm": 2.2077646255493164,
|
418525 |
+
"learning_rate": 3.349808798125409e-11,
|
418526 |
+
"loss": 1.1338,
|
418527 |
+
"step": 59786
|
418528 |
+
},
|
418529 |
+
{
|
418530 |
+
"epoch": 2.998883970556886,
|
418531 |
+
"grad_norm": 2.179716110229492,
|
418532 |
+
"learning_rate": 3.052201848485759e-11,
|
418533 |
+
"loss": 1.272,
|
418534 |
+
"step": 59787
|
418535 |
+
},
|
418536 |
+
{
|
418537 |
+
"epoch": 2.998934129183543,
|
418538 |
+
"grad_norm": 2.593482255935669,
|
418539 |
+
"learning_rate": 2.768437076805519e-11,
|
418540 |
+
"loss": 1.3971,
|
418541 |
+
"step": 59788
|
418542 |
+
},
|
418543 |
+
{
|
418544 |
+
"epoch": 2.9989842878101998,
|
418545 |
+
"grad_norm": 2.6535258293151855,
|
418546 |
+
"learning_rate": 2.4985144847500253e-11,
|
418547 |
+
"loss": 1.2945,
|
418548 |
+
"step": 59789
|
418549 |
+
},
|
418550 |
+
{
|
418551 |
+
"epoch": 2.9990344464368563,
|
418552 |
+
"grad_norm": 3.077737808227539,
|
418553 |
+
"learning_rate": 2.2424340717641656e-11,
|
418554 |
+
"loss": 2.0234,
|
418555 |
+
"step": 59790
|
418556 |
+
},
|
418557 |
+
{
|
418558 |
+
"epoch": 2.9990846050635134,
|
418559 |
+
"grad_norm": 2.6477880477905273,
|
418560 |
+
"learning_rate": 2.0001958395132748e-11,
|
418561 |
+
"loss": 1.1044,
|
418562 |
+
"step": 59791
|
418563 |
+
},
|
418564 |
+
{
|
418565 |
+
"epoch": 2.9991347636901704,
|
418566 |
+
"grad_norm": 3.315833568572998,
|
418567 |
+
"learning_rate": 1.7717997879973523e-11,
|
418568 |
+
"loss": 1.578,
|
418569 |
+
"step": 59792
|
418570 |
+
},
|
418571 |
+
{
|
418572 |
+
"epoch": 2.999184922316827,
|
418573 |
+
"grad_norm": 2.8919079303741455,
|
418574 |
+
"learning_rate": 1.5572459188817334e-11,
|
418575 |
+
"loss": 1.1892,
|
418576 |
+
"step": 59793
|
418577 |
+
},
|
418578 |
+
{
|
418579 |
+
"epoch": 2.9992350809434836,
|
418580 |
+
"grad_norm": 2.9915194511413574,
|
418581 |
+
"learning_rate": 1.3565342316113061e-11,
|
418582 |
+
"loss": 1.4405,
|
418583 |
+
"step": 59794
|
418584 |
+
},
|
418585 |
+
{
|
418586 |
+
"epoch": 2.9992852395701406,
|
418587 |
+
"grad_norm": 2.5658187866210938,
|
418588 |
+
"learning_rate": 1.1696647272962935e-11,
|
418589 |
+
"loss": 1.0032,
|
418590 |
+
"step": 59795
|
418591 |
+
},
|
418592 |
+
{
|
418593 |
+
"epoch": 2.9993353981967976,
|
418594 |
+
"grad_norm": 3.2243688106536865,
|
418595 |
+
"learning_rate": 9.966374064918071e-12,
|
418596 |
+
"loss": 1.459,
|
418597 |
+
"step": 59796
|
418598 |
+
},
|
418599 |
+
{
|
418600 |
+
"epoch": 2.999385556823454,
|
418601 |
+
"grad_norm": 3.0176734924316406,
|
418602 |
+
"learning_rate": 8.374522697529586e-12,
|
418603 |
+
"loss": 1.1177,
|
418604 |
+
"step": 59797
|
418605 |
+
},
|
418606 |
+
{
|
418607 |
+
"epoch": 2.999435715450111,
|
418608 |
+
"grad_norm": 3.0600199699401855,
|
418609 |
+
"learning_rate": 6.921093170797477e-12,
|
418610 |
+
"loss": 1.6016,
|
418611 |
+
"step": 59798
|
418612 |
+
},
|
418613 |
+
{
|
418614 |
+
"epoch": 2.999485874076768,
|
418615 |
+
"grad_norm": 3.130082607269287,
|
418616 |
+
"learning_rate": 5.6060854958239765e-12,
|
418617 |
+
"loss": 1.7026,
|
418618 |
+
"step": 59799
|
418619 |
+
},
|
418620 |
+
{
|
418621 |
+
"epoch": 2.999536032703425,
|
418622 |
+
"grad_norm": 2.7928144931793213,
|
418623 |
+
"learning_rate": 4.429499667057968e-12,
|
418624 |
+
"loss": 1.1228,
|
418625 |
+
"step": 59800
|
418626 |
+
},
|
418627 |
+
{
|
418628 |
+
"epoch": 2.9995861913300814,
|
418629 |
+
"grad_norm": 3.355782985687256,
|
418630 |
+
"learning_rate": 3.3913356956016827e-12,
|
418631 |
+
"loss": 1.6128,
|
418632 |
+
"step": 59801
|
418633 |
+
},
|
418634 |
+
{
|
418635 |
+
"epoch": 2.999636349956738,
|
418636 |
+
"grad_norm": 3.621020555496216,
|
418637 |
+
"learning_rate": 2.4915935759040053e-12,
|
418638 |
+
"loss": 1.3013,
|
418639 |
+
"step": 59802
|
418640 |
+
},
|
418641 |
+
{
|
418642 |
+
"epoch": 2.999686508583395,
|
418643 |
+
"grad_norm": 3.6413798332214355,
|
418644 |
+
"learning_rate": 1.7302733246182811e-12,
|
418645 |
+
"loss": 0.9964,
|
418646 |
+
"step": 59803
|
418647 |
+
},
|
418648 |
+
{
|
418649 |
+
"epoch": 2.999736667210052,
|
418650 |
+
"grad_norm": 5.63632345199585,
|
418651 |
+
"learning_rate": 1.10737493064228e-12,
|
418652 |
+
"loss": 1.7908,
|
418653 |
+
"step": 59804
|
418654 |
+
},
|
418655 |
+
{
|
418656 |
+
"epoch": 2.9997868258367086,
|
418657 |
+
"grad_norm": 3.6524646282196045,
|
418658 |
+
"learning_rate": 6.228983995271165e-13,
|
418659 |
+
"loss": 1.058,
|
418660 |
+
"step": 59805
|
418661 |
+
},
|
418662 |
+
{
|
418663 |
+
"epoch": 2.999836984463365,
|
418664 |
+
"grad_norm": 4.080533981323242,
|
418665 |
+
"learning_rate": 2.768437312727912e-13,
|
418666 |
+
"loss": 1.2011,
|
418667 |
+
"step": 59806
|
418668 |
+
},
|
418669 |
+
{
|
418670 |
+
"epoch": 2.9998871430900222,
|
418671 |
+
"grad_norm": 5.047213554382324,
|
418672 |
+
"learning_rate": 6.921093143041901e-14,
|
418673 |
+
"loss": 1.139,
|
418674 |
+
"step": 59807
|
418675 |
+
},
|
418676 |
+
{
|
418677 |
+
"epoch": 2.9999373017166793,
|
418678 |
+
"grad_norm": 4.863078594207764,
|
418679 |
+
"learning_rate": 0.0,
|
418680 |
+
"loss": 1.1164,
|
418681 |
+
"step": 59808
|
418682 |
}
|
418683 |
],
|
418684 |
"logging_steps": 1,
|
|
|
418693 |
"should_evaluate": false,
|
418694 |
"should_log": false,
|
418695 |
"should_save": true,
|
418696 |
+
"should_training_stop": true
|
418697 |
},
|
418698 |
"attributes": {}
|
418699 |
}
|
418700 |
},
|
418701 |
+
"total_flos": 5.404390115877323e+18,
|
418702 |
"train_batch_size": 2,
|
418703 |
"trial_name": null,
|
418704 |
"trial_params": null
|