willtensora
commited on
Training in progress, step 320, checkpoint
Browse files- last-checkpoint/model.safetensors +1 -1
- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +47 -3
last-checkpoint/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 988097824
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:36f4d7f40c1dfe0aa7d2a4231f75acffd5b79eac451b993d900c03c6ce59780c
|
3 |
size 988097824
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1004201914
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2d5e27fe47302e41aa4c52cfdfbfad74d9a01ea643642284793e240fbc32572f
|
3 |
size 1004201914
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7e82378552851182019f2c32ac7056e72bc481ae56bec95a5072e9bd4898d2ad
|
3 |
size 15984
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d2c4d52716d0b15349a19b71270c292451324106dbb42d8a60b878dc962ff5d3
|
3 |
size 15984
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9a0b9ba4048c8e75b028f96a9fdcb748a3661202e3a2650b862287ba2fca6855
|
3 |
size 15984
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bed3a979b774c8cacd490da31053292ae90bad1a76d793f3a7a6fbfd9ed50ddd
|
3 |
size 15984
|
last-checkpoint/rng_state_4.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e62a31dc4ab1a0f66e3aa2022c8aa7b7e528f30887ac8d363d9c8fe971f0aa17
|
3 |
size 15984
|
last-checkpoint/rng_state_5.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bd67f44c1b9b11ae3b1ae36097f4ddb763ca3afb82bcf1c555f73f0f036a28c9
|
3 |
size 15984
|
last-checkpoint/rng_state_6.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a9cde9132c6235ff6effcf02af494a1b7895d8c18f7fa29fe7f82368931ab98e
|
3 |
size 15984
|
last-checkpoint/rng_state_7.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b872f40b35a9deed3e8f1cefec7429d733e19916cb7fda73422a385ad32aac93
|
3 |
size 15984
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b87d3d77c5edb705ca109c194bdd88542b644ea60309ca727331b4859a982237
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 20,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -323,6 +323,50 @@
|
|
323 |
"eval_samples_per_second": 208.094,
|
324 |
"eval_steps_per_second": 6.514,
|
325 |
"step": 280
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
326 |
}
|
327 |
],
|
328 |
"logging_steps": 10,
|
@@ -342,7 +386,7 @@
|
|
342 |
"attributes": {}
|
343 |
}
|
344 |
},
|
345 |
-
"total_flos":
|
346 |
"train_batch_size": 4,
|
347 |
"trial_name": null,
|
348 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.0622568093385214,
|
5 |
"eval_steps": 20,
|
6 |
+
"global_step": 320,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
323 |
"eval_samples_per_second": 208.094,
|
324 |
"eval_steps_per_second": 6.514,
|
325 |
"step": 280
|
326 |
+
},
|
327 |
+
{
|
328 |
+
"epoch": 0.05642023346303502,
|
329 |
+
"grad_norm": 6.78125,
|
330 |
+
"learning_rate": 0.00012418110275386028,
|
331 |
+
"loss": 1.7687,
|
332 |
+
"step": 290
|
333 |
+
},
|
334 |
+
{
|
335 |
+
"epoch": 0.058365758754863814,
|
336 |
+
"grad_norm": 9.875,
|
337 |
+
"learning_rate": 0.00011916220986604296,
|
338 |
+
"loss": 1.7693,
|
339 |
+
"step": 300
|
340 |
+
},
|
341 |
+
{
|
342 |
+
"epoch": 0.058365758754863814,
|
343 |
+
"eval_loss": 1.725274920463562,
|
344 |
+
"eval_runtime": 98.7543,
|
345 |
+
"eval_samples_per_second": 185.045,
|
346 |
+
"eval_steps_per_second": 5.792,
|
347 |
+
"step": 300
|
348 |
+
},
|
349 |
+
{
|
350 |
+
"epoch": 0.06031128404669261,
|
351 |
+
"grad_norm": 7.0625,
|
352 |
+
"learning_rate": 0.00011409266840272685,
|
353 |
+
"loss": 1.7644,
|
354 |
+
"step": 310
|
355 |
+
},
|
356 |
+
{
|
357 |
+
"epoch": 0.0622568093385214,
|
358 |
+
"grad_norm": 5.5,
|
359 |
+
"learning_rate": 0.00010898587791726955,
|
360 |
+
"loss": 1.6371,
|
361 |
+
"step": 320
|
362 |
+
},
|
363 |
+
{
|
364 |
+
"epoch": 0.0622568093385214,
|
365 |
+
"eval_loss": 1.6797202825546265,
|
366 |
+
"eval_runtime": 89.056,
|
367 |
+
"eval_samples_per_second": 205.197,
|
368 |
+
"eval_steps_per_second": 6.423,
|
369 |
+
"step": 320
|
370 |
}
|
371 |
],
|
372 |
"logging_steps": 10,
|
|
|
386 |
"attributes": {}
|
387 |
}
|
388 |
},
|
389 |
+
"total_flos": 4.503400340887962e+16,
|
390 |
"train_batch_size": 4,
|
391 |
"trial_name": null,
|
392 |
"trial_params": null
|