Training in progress, step 2000, checkpoint
Browse files- last-checkpoint/model-00001-of-00002.safetensors +1 -1
- last-checkpoint/model-00002-of-00002.safetensors +1 -1
- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +28 -5
last-checkpoint/model-00001-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4992706480
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fc398ff23230c89e87f53312c3810b8834a5415fffe40e6ef729219393f73c22
|
3 |
size 4992706480
|
last-checkpoint/model-00002-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1180663192
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:72b287b59abaa725a86c71b140353928ec44f1cd87ea1e76e4dc750ebc139633
|
3 |
size 1180663192
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3137197104
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cac754e5794fffa36628c4c731133d2ed01862775619d8c98510105de3861c53
|
3 |
size 3137197104
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f3aa5344cc910cb75bd602d7ae0546f81de8bb0a34e38ddd9c75b43852cc2fb1
|
3 |
size 15024
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ebff8515314a4035d1544b55fdb7220ba3b47a3cdfb089d81aa7ad4521721bf1
|
3 |
size 15024
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f578ea9cd0ec24d5b514079228f354d9a10658abb872dcacb65f11c70e45bbdb
|
3 |
size 15024
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4c176dce8f62927af169095f249d0f7e77d81886784fbc18e97614e26057c92f
|
3 |
size 15024
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8e76828fe04659325cb09120e5b627eb9da0a73daa64f8d5208c6840bd1126fa
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
-
"best_metric":
|
3 |
-
"best_model_checkpoint": "./models/rootflo/fauna-v0.8/checkpoint-
|
4 |
-
"epoch":
|
5 |
"eval_steps": 1000,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -30,6 +30,29 @@
|
|
30 |
"eval_steps_per_second": 0.006,
|
31 |
"eval_wer": 62.958386403817265,
|
32 |
"step": 1000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
}
|
34 |
],
|
35 |
"logging_steps": 500,
|
@@ -58,7 +81,7 @@
|
|
58 |
"attributes": {}
|
59 |
}
|
60 |
},
|
61 |
-
"total_flos":
|
62 |
"train_batch_size": 72,
|
63 |
"trial_name": null,
|
64 |
"trial_params": null
|
|
|
1 |
{
|
2 |
+
"best_metric": 61.90141246696956,
|
3 |
+
"best_model_checkpoint": "./models/rootflo/fauna-v0.8/checkpoint-2000",
|
4 |
+
"epoch": 8.556149732620321,
|
5 |
"eval_steps": 1000,
|
6 |
+
"global_step": 2000,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
30 |
"eval_steps_per_second": 0.006,
|
31 |
"eval_wer": 62.958386403817265,
|
32 |
"step": 1000
|
33 |
+
},
|
34 |
+
{
|
35 |
+
"epoch": 6.4171122994652405,
|
36 |
+
"grad_norm": 0.5753424167633057,
|
37 |
+
"learning_rate": 3.745e-06,
|
38 |
+
"loss": 0.0391,
|
39 |
+
"step": 1500
|
40 |
+
},
|
41 |
+
{
|
42 |
+
"epoch": 8.556149732620321,
|
43 |
+
"grad_norm": 0.6431854963302612,
|
44 |
+
"learning_rate": 4.9950000000000005e-06,
|
45 |
+
"loss": 0.0287,
|
46 |
+
"step": 2000
|
47 |
+
},
|
48 |
+
{
|
49 |
+
"epoch": 8.556149732620321,
|
50 |
+
"eval_loss": 0.07155641168355942,
|
51 |
+
"eval_runtime": 1251.3824,
|
52 |
+
"eval_samples_per_second": 2.078,
|
53 |
+
"eval_steps_per_second": 0.006,
|
54 |
+
"eval_wer": 61.90141246696956,
|
55 |
+
"step": 2000
|
56 |
}
|
57 |
],
|
58 |
"logging_steps": 500,
|
|
|
81 |
"attributes": {}
|
82 |
}
|
83 |
},
|
84 |
+
"total_flos": 4.891786271351789e+21,
|
85 |
"train_batch_size": 72,
|
86 |
"trial_name": null,
|
87 |
"trial_params": null
|