Upload folder using huggingface_hub
Browse files- adapter_model.safetensors +1 -1
- optimizer.pt +1 -1
- rng_state.pth +1 -1
- scheduler.pt +1 -1
- trainer_state.json +1053 -3
adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3705288
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:afbb9fafdaa3ca57948b5758a6f7f91aff0d8bb88753df2cf01d7363ea7ed873
|
3 |
size 3705288
|
optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 2213690
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4ef832f29c75c0a4f145b612ac65d501410d3c9e4ad8c11582372091e773bf49
|
3 |
size 2213690
|
rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7fca791d6b819bad1dbc285bf8bd7345964c2aeaf16b7d702b5c0c5380f2a057
|
3 |
size 14244
|
scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6b04bc150d39b084b09acd98d05c3563c323fc35277059ff5584f9f2d3fde608
|
3 |
size 1064
|
trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch":
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -2107,6 +2107,1056 @@
|
|
2107 |
"learning_rate": 0.00014187192118226603,
|
2108 |
"loss": 0.444,
|
2109 |
"step": 6000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2110 |
}
|
2111 |
],
|
2112 |
"logging_steps": 20,
|
@@ -2126,7 +3176,7 @@
|
|
2126 |
"attributes": {}
|
2127 |
}
|
2128 |
},
|
2129 |
-
"total_flos": 1.
|
2130 |
"train_batch_size": 1,
|
2131 |
"trial_name": null,
|
2132 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 43.92922513727883,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 9000,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
2107 |
"learning_rate": 0.00014187192118226603,
|
2108 |
"loss": 0.444,
|
2109 |
"step": 6000
|
2110 |
+
},
|
2111 |
+
{
|
2112 |
+
"epoch": 29.383770591824284,
|
2113 |
+
"grad_norm": 4.88236141204834,
|
2114 |
+
"learning_rate": 0.00014167487684729063,
|
2115 |
+
"loss": 0.4639,
|
2116 |
+
"step": 6020
|
2117 |
+
},
|
2118 |
+
{
|
2119 |
+
"epoch": 29.481391092129346,
|
2120 |
+
"grad_norm": 3.7870137691497803,
|
2121 |
+
"learning_rate": 0.00014147783251231528,
|
2122 |
+
"loss": 0.4873,
|
2123 |
+
"step": 6040
|
2124 |
+
},
|
2125 |
+
{
|
2126 |
+
"epoch": 29.579011592434412,
|
2127 |
+
"grad_norm": 3.091411590576172,
|
2128 |
+
"learning_rate": 0.0001412807881773399,
|
2129 |
+
"loss": 0.4834,
|
2130 |
+
"step": 6060
|
2131 |
+
},
|
2132 |
+
{
|
2133 |
+
"epoch": 29.676632092739474,
|
2134 |
+
"grad_norm": 2.7498538494110107,
|
2135 |
+
"learning_rate": 0.00014108374384236454,
|
2136 |
+
"loss": 0.4846,
|
2137 |
+
"step": 6080
|
2138 |
+
},
|
2139 |
+
{
|
2140 |
+
"epoch": 29.77425259304454,
|
2141 |
+
"grad_norm": 3.2043850421905518,
|
2142 |
+
"learning_rate": 0.00014088669950738917,
|
2143 |
+
"loss": 0.4983,
|
2144 |
+
"step": 6100
|
2145 |
+
},
|
2146 |
+
{
|
2147 |
+
"epoch": 29.871873093349603,
|
2148 |
+
"grad_norm": 3.270357847213745,
|
2149 |
+
"learning_rate": 0.0001406896551724138,
|
2150 |
+
"loss": 0.4803,
|
2151 |
+
"step": 6120
|
2152 |
+
},
|
2153 |
+
{
|
2154 |
+
"epoch": 29.96949359365467,
|
2155 |
+
"grad_norm": 3.031405210494995,
|
2156 |
+
"learning_rate": 0.00014049261083743842,
|
2157 |
+
"loss": 0.5287,
|
2158 |
+
"step": 6140
|
2159 |
+
},
|
2160 |
+
{
|
2161 |
+
"epoch": 30.06711409395973,
|
2162 |
+
"grad_norm": 3.390765905380249,
|
2163 |
+
"learning_rate": 0.00014029556650246307,
|
2164 |
+
"loss": 0.4619,
|
2165 |
+
"step": 6160
|
2166 |
+
},
|
2167 |
+
{
|
2168 |
+
"epoch": 30.164734594264797,
|
2169 |
+
"grad_norm": 3.2783963680267334,
|
2170 |
+
"learning_rate": 0.0001400985221674877,
|
2171 |
+
"loss": 0.4328,
|
2172 |
+
"step": 6180
|
2173 |
+
},
|
2174 |
+
{
|
2175 |
+
"epoch": 30.26235509456986,
|
2176 |
+
"grad_norm": 3.6925759315490723,
|
2177 |
+
"learning_rate": 0.00013990147783251233,
|
2178 |
+
"loss": 0.487,
|
2179 |
+
"step": 6200
|
2180 |
+
},
|
2181 |
+
{
|
2182 |
+
"epoch": 30.359975594874925,
|
2183 |
+
"grad_norm": 3.0115065574645996,
|
2184 |
+
"learning_rate": 0.00013970443349753696,
|
2185 |
+
"loss": 0.467,
|
2186 |
+
"step": 6220
|
2187 |
+
},
|
2188 |
+
{
|
2189 |
+
"epoch": 30.457596095179987,
|
2190 |
+
"grad_norm": 4.561310291290283,
|
2191 |
+
"learning_rate": 0.00013950738916256158,
|
2192 |
+
"loss": 0.4801,
|
2193 |
+
"step": 6240
|
2194 |
+
},
|
2195 |
+
{
|
2196 |
+
"epoch": 30.555216595485053,
|
2197 |
+
"grad_norm": 3.2879674434661865,
|
2198 |
+
"learning_rate": 0.0001393103448275862,
|
2199 |
+
"loss": 0.4638,
|
2200 |
+
"step": 6260
|
2201 |
+
},
|
2202 |
+
{
|
2203 |
+
"epoch": 30.652837095790115,
|
2204 |
+
"grad_norm": 2.793945789337158,
|
2205 |
+
"learning_rate": 0.00013911330049261084,
|
2206 |
+
"loss": 0.463,
|
2207 |
+
"step": 6280
|
2208 |
+
},
|
2209 |
+
{
|
2210 |
+
"epoch": 30.75045759609518,
|
2211 |
+
"grad_norm": 3.615793466567993,
|
2212 |
+
"learning_rate": 0.0001389162561576355,
|
2213 |
+
"loss": 0.4907,
|
2214 |
+
"step": 6300
|
2215 |
+
},
|
2216 |
+
{
|
2217 |
+
"epoch": 30.848078096400243,
|
2218 |
+
"grad_norm": 3.160133123397827,
|
2219 |
+
"learning_rate": 0.0001387192118226601,
|
2220 |
+
"loss": 0.477,
|
2221 |
+
"step": 6320
|
2222 |
+
},
|
2223 |
+
{
|
2224 |
+
"epoch": 30.94569859670531,
|
2225 |
+
"grad_norm": 3.62670636177063,
|
2226 |
+
"learning_rate": 0.00013852216748768475,
|
2227 |
+
"loss": 0.4945,
|
2228 |
+
"step": 6340
|
2229 |
+
},
|
2230 |
+
{
|
2231 |
+
"epoch": 31.04331909701037,
|
2232 |
+
"grad_norm": 3.346158981323242,
|
2233 |
+
"learning_rate": 0.00013832512315270935,
|
2234 |
+
"loss": 0.4543,
|
2235 |
+
"step": 6360
|
2236 |
+
},
|
2237 |
+
{
|
2238 |
+
"epoch": 31.140939597315437,
|
2239 |
+
"grad_norm": 2.8707423210144043,
|
2240 |
+
"learning_rate": 0.000138128078817734,
|
2241 |
+
"loss": 0.4352,
|
2242 |
+
"step": 6380
|
2243 |
+
},
|
2244 |
+
{
|
2245 |
+
"epoch": 31.2385600976205,
|
2246 |
+
"grad_norm": 2.5617620944976807,
|
2247 |
+
"learning_rate": 0.00013793103448275863,
|
2248 |
+
"loss": 0.4611,
|
2249 |
+
"step": 6400
|
2250 |
+
},
|
2251 |
+
{
|
2252 |
+
"epoch": 31.336180597925566,
|
2253 |
+
"grad_norm": 3.2273828983306885,
|
2254 |
+
"learning_rate": 0.00013773399014778325,
|
2255 |
+
"loss": 0.4593,
|
2256 |
+
"step": 6420
|
2257 |
+
},
|
2258 |
+
{
|
2259 |
+
"epoch": 31.433801098230628,
|
2260 |
+
"grad_norm": 3.502797842025757,
|
2261 |
+
"learning_rate": 0.00013753694581280788,
|
2262 |
+
"loss": 0.4717,
|
2263 |
+
"step": 6440
|
2264 |
+
},
|
2265 |
+
{
|
2266 |
+
"epoch": 31.531421598535694,
|
2267 |
+
"grad_norm": 3.9278218746185303,
|
2268 |
+
"learning_rate": 0.0001373399014778325,
|
2269 |
+
"loss": 0.4813,
|
2270 |
+
"step": 6460
|
2271 |
+
},
|
2272 |
+
{
|
2273 |
+
"epoch": 31.629042098840756,
|
2274 |
+
"grad_norm": 3.013709545135498,
|
2275 |
+
"learning_rate": 0.00013714285714285716,
|
2276 |
+
"loss": 0.4305,
|
2277 |
+
"step": 6480
|
2278 |
+
},
|
2279 |
+
{
|
2280 |
+
"epoch": 31.726662599145822,
|
2281 |
+
"grad_norm": 2.661198377609253,
|
2282 |
+
"learning_rate": 0.0001369458128078818,
|
2283 |
+
"loss": 0.4495,
|
2284 |
+
"step": 6500
|
2285 |
+
},
|
2286 |
+
{
|
2287 |
+
"epoch": 31.824283099450884,
|
2288 |
+
"grad_norm": 2.6343297958374023,
|
2289 |
+
"learning_rate": 0.00013674876847290642,
|
2290 |
+
"loss": 0.4809,
|
2291 |
+
"step": 6520
|
2292 |
+
},
|
2293 |
+
{
|
2294 |
+
"epoch": 31.92190359975595,
|
2295 |
+
"grad_norm": 6.334170818328857,
|
2296 |
+
"learning_rate": 0.00013655172413793104,
|
2297 |
+
"loss": 0.4576,
|
2298 |
+
"step": 6540
|
2299 |
+
},
|
2300 |
+
{
|
2301 |
+
"epoch": 32.01952410006101,
|
2302 |
+
"grad_norm": 3.728727102279663,
|
2303 |
+
"learning_rate": 0.00013635467980295567,
|
2304 |
+
"loss": 0.5034,
|
2305 |
+
"step": 6560
|
2306 |
+
},
|
2307 |
+
{
|
2308 |
+
"epoch": 32.117144600366075,
|
2309 |
+
"grad_norm": 2.0572702884674072,
|
2310 |
+
"learning_rate": 0.0001361576354679803,
|
2311 |
+
"loss": 0.4161,
|
2312 |
+
"step": 6580
|
2313 |
+
},
|
2314 |
+
{
|
2315 |
+
"epoch": 32.214765100671144,
|
2316 |
+
"grad_norm": 2.7006356716156006,
|
2317 |
+
"learning_rate": 0.00013596059113300492,
|
2318 |
+
"loss": 0.4357,
|
2319 |
+
"step": 6600
|
2320 |
+
},
|
2321 |
+
{
|
2322 |
+
"epoch": 32.31238560097621,
|
2323 |
+
"grad_norm": 3.526782989501953,
|
2324 |
+
"learning_rate": 0.00013576354679802955,
|
2325 |
+
"loss": 0.4367,
|
2326 |
+
"step": 6620
|
2327 |
+
},
|
2328 |
+
{
|
2329 |
+
"epoch": 32.41000610128127,
|
2330 |
+
"grad_norm": 3.240647792816162,
|
2331 |
+
"learning_rate": 0.0001355665024630542,
|
2332 |
+
"loss": 0.4416,
|
2333 |
+
"step": 6640
|
2334 |
+
},
|
2335 |
+
{
|
2336 |
+
"epoch": 32.50762660158633,
|
2337 |
+
"grad_norm": 2.965851306915283,
|
2338 |
+
"learning_rate": 0.0001353694581280788,
|
2339 |
+
"loss": 0.4649,
|
2340 |
+
"step": 6660
|
2341 |
+
},
|
2342 |
+
{
|
2343 |
+
"epoch": 32.6052471018914,
|
2344 |
+
"grad_norm": 3.028812885284424,
|
2345 |
+
"learning_rate": 0.00013517241379310346,
|
2346 |
+
"loss": 0.4381,
|
2347 |
+
"step": 6680
|
2348 |
+
},
|
2349 |
+
{
|
2350 |
+
"epoch": 32.70286760219646,
|
2351 |
+
"grad_norm": 4.041370391845703,
|
2352 |
+
"learning_rate": 0.0001349753694581281,
|
2353 |
+
"loss": 0.4671,
|
2354 |
+
"step": 6700
|
2355 |
+
},
|
2356 |
+
{
|
2357 |
+
"epoch": 32.800488102501525,
|
2358 |
+
"grad_norm": 5.677656650543213,
|
2359 |
+
"learning_rate": 0.00013477832512315271,
|
2360 |
+
"loss": 0.4718,
|
2361 |
+
"step": 6720
|
2362 |
+
},
|
2363 |
+
{
|
2364 |
+
"epoch": 32.89810860280659,
|
2365 |
+
"grad_norm": 3.1538727283477783,
|
2366 |
+
"learning_rate": 0.00013458128078817737,
|
2367 |
+
"loss": 0.4705,
|
2368 |
+
"step": 6740
|
2369 |
+
},
|
2370 |
+
{
|
2371 |
+
"epoch": 32.99572910311166,
|
2372 |
+
"grad_norm": 3.8186867237091064,
|
2373 |
+
"learning_rate": 0.00013438423645320197,
|
2374 |
+
"loss": 0.4724,
|
2375 |
+
"step": 6760
|
2376 |
+
},
|
2377 |
+
{
|
2378 |
+
"epoch": 33.09334960341672,
|
2379 |
+
"grad_norm": 2.8248584270477295,
|
2380 |
+
"learning_rate": 0.00013418719211822662,
|
2381 |
+
"loss": 0.4399,
|
2382 |
+
"step": 6780
|
2383 |
+
},
|
2384 |
+
{
|
2385 |
+
"epoch": 33.19097010372178,
|
2386 |
+
"grad_norm": 2.2694895267486572,
|
2387 |
+
"learning_rate": 0.00013399014778325122,
|
2388 |
+
"loss": 0.4147,
|
2389 |
+
"step": 6800
|
2390 |
+
},
|
2391 |
+
{
|
2392 |
+
"epoch": 33.288590604026844,
|
2393 |
+
"grad_norm": 3.305610418319702,
|
2394 |
+
"learning_rate": 0.00013379310344827588,
|
2395 |
+
"loss": 0.4028,
|
2396 |
+
"step": 6820
|
2397 |
+
},
|
2398 |
+
{
|
2399 |
+
"epoch": 33.38621110433191,
|
2400 |
+
"grad_norm": 3.610136032104492,
|
2401 |
+
"learning_rate": 0.0001335960591133005,
|
2402 |
+
"loss": 0.4319,
|
2403 |
+
"step": 6840
|
2404 |
+
},
|
2405 |
+
{
|
2406 |
+
"epoch": 33.483831604636975,
|
2407 |
+
"grad_norm": 3.4783689975738525,
|
2408 |
+
"learning_rate": 0.00013339901477832513,
|
2409 |
+
"loss": 0.4361,
|
2410 |
+
"step": 6860
|
2411 |
+
},
|
2412 |
+
{
|
2413 |
+
"epoch": 33.58145210494204,
|
2414 |
+
"grad_norm": 3.0984203815460205,
|
2415 |
+
"learning_rate": 0.00013320197044334976,
|
2416 |
+
"loss": 0.4488,
|
2417 |
+
"step": 6880
|
2418 |
+
},
|
2419 |
+
{
|
2420 |
+
"epoch": 33.6790726052471,
|
2421 |
+
"grad_norm": 3.1558122634887695,
|
2422 |
+
"learning_rate": 0.00013300492610837438,
|
2423 |
+
"loss": 0.4262,
|
2424 |
+
"step": 6900
|
2425 |
+
},
|
2426 |
+
{
|
2427 |
+
"epoch": 33.77669310555217,
|
2428 |
+
"grad_norm": 4.813379764556885,
|
2429 |
+
"learning_rate": 0.000132807881773399,
|
2430 |
+
"loss": 0.452,
|
2431 |
+
"step": 6920
|
2432 |
+
},
|
2433 |
+
{
|
2434 |
+
"epoch": 33.87431360585723,
|
2435 |
+
"grad_norm": 3.047551393508911,
|
2436 |
+
"learning_rate": 0.00013261083743842364,
|
2437 |
+
"loss": 0.4517,
|
2438 |
+
"step": 6940
|
2439 |
+
},
|
2440 |
+
{
|
2441 |
+
"epoch": 33.971934106162294,
|
2442 |
+
"grad_norm": 3.0880701541900635,
|
2443 |
+
"learning_rate": 0.0001324137931034483,
|
2444 |
+
"loss": 0.5147,
|
2445 |
+
"step": 6960
|
2446 |
+
},
|
2447 |
+
{
|
2448 |
+
"epoch": 34.06955460646736,
|
2449 |
+
"grad_norm": 2.824169874191284,
|
2450 |
+
"learning_rate": 0.00013221674876847292,
|
2451 |
+
"loss": 0.4017,
|
2452 |
+
"step": 6980
|
2453 |
+
},
|
2454 |
+
{
|
2455 |
+
"epoch": 34.16717510677242,
|
2456 |
+
"grad_norm": 3.1136012077331543,
|
2457 |
+
"learning_rate": 0.00013201970443349755,
|
2458 |
+
"loss": 0.4291,
|
2459 |
+
"step": 7000
|
2460 |
+
},
|
2461 |
+
{
|
2462 |
+
"epoch": 34.26479560707749,
|
2463 |
+
"grad_norm": 4.246958255767822,
|
2464 |
+
"learning_rate": 0.00013182266009852217,
|
2465 |
+
"loss": 0.4318,
|
2466 |
+
"step": 7020
|
2467 |
+
},
|
2468 |
+
{
|
2469 |
+
"epoch": 34.36241610738255,
|
2470 |
+
"grad_norm": 2.4655661582946777,
|
2471 |
+
"learning_rate": 0.0001316256157635468,
|
2472 |
+
"loss": 0.4283,
|
2473 |
+
"step": 7040
|
2474 |
+
},
|
2475 |
+
{
|
2476 |
+
"epoch": 34.46003660768761,
|
2477 |
+
"grad_norm": 4.322596549987793,
|
2478 |
+
"learning_rate": 0.00013142857142857143,
|
2479 |
+
"loss": 0.4323,
|
2480 |
+
"step": 7060
|
2481 |
+
},
|
2482 |
+
{
|
2483 |
+
"epoch": 34.557657107992675,
|
2484 |
+
"grad_norm": 4.425800800323486,
|
2485 |
+
"learning_rate": 0.00013123152709359608,
|
2486 |
+
"loss": 0.4376,
|
2487 |
+
"step": 7080
|
2488 |
+
},
|
2489 |
+
{
|
2490 |
+
"epoch": 34.655277608297745,
|
2491 |
+
"grad_norm": 3.796889305114746,
|
2492 |
+
"learning_rate": 0.00013103448275862068,
|
2493 |
+
"loss": 0.4276,
|
2494 |
+
"step": 7100
|
2495 |
+
},
|
2496 |
+
{
|
2497 |
+
"epoch": 34.75289810860281,
|
2498 |
+
"grad_norm": 3.9222586154937744,
|
2499 |
+
"learning_rate": 0.00013083743842364534,
|
2500 |
+
"loss": 0.4658,
|
2501 |
+
"step": 7120
|
2502 |
+
},
|
2503 |
+
{
|
2504 |
+
"epoch": 34.85051860890787,
|
2505 |
+
"grad_norm": 4.5007548332214355,
|
2506 |
+
"learning_rate": 0.00013064039408866994,
|
2507 |
+
"loss": 0.4293,
|
2508 |
+
"step": 7140
|
2509 |
+
},
|
2510 |
+
{
|
2511 |
+
"epoch": 34.94813910921293,
|
2512 |
+
"grad_norm": 3.0858423709869385,
|
2513 |
+
"learning_rate": 0.0001304433497536946,
|
2514 |
+
"loss": 0.4214,
|
2515 |
+
"step": 7160
|
2516 |
+
},
|
2517 |
+
{
|
2518 |
+
"epoch": 35.045759609518,
|
2519 |
+
"grad_norm": 3.586949586868286,
|
2520 |
+
"learning_rate": 0.00013024630541871922,
|
2521 |
+
"loss": 0.4199,
|
2522 |
+
"step": 7180
|
2523 |
+
},
|
2524 |
+
{
|
2525 |
+
"epoch": 35.14338010982306,
|
2526 |
+
"grad_norm": 2.916937828063965,
|
2527 |
+
"learning_rate": 0.00013004926108374385,
|
2528 |
+
"loss": 0.4071,
|
2529 |
+
"step": 7200
|
2530 |
+
},
|
2531 |
+
{
|
2532 |
+
"epoch": 35.241000610128125,
|
2533 |
+
"grad_norm": 3.1324169635772705,
|
2534 |
+
"learning_rate": 0.00012985221674876847,
|
2535 |
+
"loss": 0.4151,
|
2536 |
+
"step": 7220
|
2537 |
+
},
|
2538 |
+
{
|
2539 |
+
"epoch": 35.33862111043319,
|
2540 |
+
"grad_norm": 2.8730344772338867,
|
2541 |
+
"learning_rate": 0.0001296551724137931,
|
2542 |
+
"loss": 0.3984,
|
2543 |
+
"step": 7240
|
2544 |
+
},
|
2545 |
+
{
|
2546 |
+
"epoch": 35.43624161073826,
|
2547 |
+
"grad_norm": 3.0865273475646973,
|
2548 |
+
"learning_rate": 0.00012945812807881775,
|
2549 |
+
"loss": 0.4273,
|
2550 |
+
"step": 7260
|
2551 |
+
},
|
2552 |
+
{
|
2553 |
+
"epoch": 35.53386211104332,
|
2554 |
+
"grad_norm": 4.397771835327148,
|
2555 |
+
"learning_rate": 0.00012926108374384238,
|
2556 |
+
"loss": 0.4232,
|
2557 |
+
"step": 7280
|
2558 |
+
},
|
2559 |
+
{
|
2560 |
+
"epoch": 35.63148261134838,
|
2561 |
+
"grad_norm": 2.4203243255615234,
|
2562 |
+
"learning_rate": 0.000129064039408867,
|
2563 |
+
"loss": 0.4035,
|
2564 |
+
"step": 7300
|
2565 |
+
},
|
2566 |
+
{
|
2567 |
+
"epoch": 35.729103111653444,
|
2568 |
+
"grad_norm": 2.94404673576355,
|
2569 |
+
"learning_rate": 0.00012886699507389164,
|
2570 |
+
"loss": 0.4332,
|
2571 |
+
"step": 7320
|
2572 |
+
},
|
2573 |
+
{
|
2574 |
+
"epoch": 35.82672361195851,
|
2575 |
+
"grad_norm": 3.4141249656677246,
|
2576 |
+
"learning_rate": 0.00012866995073891626,
|
2577 |
+
"loss": 0.4484,
|
2578 |
+
"step": 7340
|
2579 |
+
},
|
2580 |
+
{
|
2581 |
+
"epoch": 35.924344112263576,
|
2582 |
+
"grad_norm": 2.8227927684783936,
|
2583 |
+
"learning_rate": 0.0001284729064039409,
|
2584 |
+
"loss": 0.4509,
|
2585 |
+
"step": 7360
|
2586 |
+
},
|
2587 |
+
{
|
2588 |
+
"epoch": 36.02196461256864,
|
2589 |
+
"grad_norm": 2.768937110900879,
|
2590 |
+
"learning_rate": 0.00012827586206896552,
|
2591 |
+
"loss": 0.4391,
|
2592 |
+
"step": 7380
|
2593 |
+
},
|
2594 |
+
{
|
2595 |
+
"epoch": 36.1195851128737,
|
2596 |
+
"grad_norm": 4.155871391296387,
|
2597 |
+
"learning_rate": 0.00012807881773399014,
|
2598 |
+
"loss": 0.3954,
|
2599 |
+
"step": 7400
|
2600 |
+
},
|
2601 |
+
{
|
2602 |
+
"epoch": 36.21720561317877,
|
2603 |
+
"grad_norm": 2.484731912612915,
|
2604 |
+
"learning_rate": 0.0001278817733990148,
|
2605 |
+
"loss": 0.4363,
|
2606 |
+
"step": 7420
|
2607 |
+
},
|
2608 |
+
{
|
2609 |
+
"epoch": 36.31482611348383,
|
2610 |
+
"grad_norm": 2.7758595943450928,
|
2611 |
+
"learning_rate": 0.0001276847290640394,
|
2612 |
+
"loss": 0.4058,
|
2613 |
+
"step": 7440
|
2614 |
+
},
|
2615 |
+
{
|
2616 |
+
"epoch": 36.412446613788894,
|
2617 |
+
"grad_norm": 3.9609923362731934,
|
2618 |
+
"learning_rate": 0.00012748768472906405,
|
2619 |
+
"loss": 0.3845,
|
2620 |
+
"step": 7460
|
2621 |
+
},
|
2622 |
+
{
|
2623 |
+
"epoch": 36.51006711409396,
|
2624 |
+
"grad_norm": 3.963120222091675,
|
2625 |
+
"learning_rate": 0.00012729064039408868,
|
2626 |
+
"loss": 0.4301,
|
2627 |
+
"step": 7480
|
2628 |
+
},
|
2629 |
+
{
|
2630 |
+
"epoch": 36.607687614399026,
|
2631 |
+
"grad_norm": 2.77718448638916,
|
2632 |
+
"learning_rate": 0.0001270935960591133,
|
2633 |
+
"loss": 0.4034,
|
2634 |
+
"step": 7500
|
2635 |
+
},
|
2636 |
+
{
|
2637 |
+
"epoch": 36.70530811470409,
|
2638 |
+
"grad_norm": 3.6000113487243652,
|
2639 |
+
"learning_rate": 0.00012689655172413793,
|
2640 |
+
"loss": 0.4087,
|
2641 |
+
"step": 7520
|
2642 |
+
},
|
2643 |
+
{
|
2644 |
+
"epoch": 36.80292861500915,
|
2645 |
+
"grad_norm": 3.4430975914001465,
|
2646 |
+
"learning_rate": 0.00012669950738916256,
|
2647 |
+
"loss": 0.4109,
|
2648 |
+
"step": 7540
|
2649 |
+
},
|
2650 |
+
{
|
2651 |
+
"epoch": 36.90054911531421,
|
2652 |
+
"grad_norm": 3.3932645320892334,
|
2653 |
+
"learning_rate": 0.00012650246305418721,
|
2654 |
+
"loss": 0.4394,
|
2655 |
+
"step": 7560
|
2656 |
+
},
|
2657 |
+
{
|
2658 |
+
"epoch": 36.99816961561928,
|
2659 |
+
"grad_norm": 4.054554462432861,
|
2660 |
+
"learning_rate": 0.00012630541871921181,
|
2661 |
+
"loss": 0.4203,
|
2662 |
+
"step": 7580
|
2663 |
+
},
|
2664 |
+
{
|
2665 |
+
"epoch": 37.095790115924345,
|
2666 |
+
"grad_norm": 2.8766210079193115,
|
2667 |
+
"learning_rate": 0.00012610837438423647,
|
2668 |
+
"loss": 0.3861,
|
2669 |
+
"step": 7600
|
2670 |
+
},
|
2671 |
+
{
|
2672 |
+
"epoch": 37.19341061622941,
|
2673 |
+
"grad_norm": 4.115131855010986,
|
2674 |
+
"learning_rate": 0.0001259113300492611,
|
2675 |
+
"loss": 0.4236,
|
2676 |
+
"step": 7620
|
2677 |
+
},
|
2678 |
+
{
|
2679 |
+
"epoch": 37.29103111653447,
|
2680 |
+
"grad_norm": 2.776914358139038,
|
2681 |
+
"learning_rate": 0.00012571428571428572,
|
2682 |
+
"loss": 0.4244,
|
2683 |
+
"step": 7640
|
2684 |
+
},
|
2685 |
+
{
|
2686 |
+
"epoch": 37.38865161683954,
|
2687 |
+
"grad_norm": 3.8428800106048584,
|
2688 |
+
"learning_rate": 0.00012551724137931035,
|
2689 |
+
"loss": 0.4028,
|
2690 |
+
"step": 7660
|
2691 |
+
},
|
2692 |
+
{
|
2693 |
+
"epoch": 37.4862721171446,
|
2694 |
+
"grad_norm": 3.028683662414551,
|
2695 |
+
"learning_rate": 0.00012532019704433498,
|
2696 |
+
"loss": 0.4127,
|
2697 |
+
"step": 7680
|
2698 |
+
},
|
2699 |
+
{
|
2700 |
+
"epoch": 37.58389261744966,
|
2701 |
+
"grad_norm": 2.678617477416992,
|
2702 |
+
"learning_rate": 0.0001251231527093596,
|
2703 |
+
"loss": 0.4251,
|
2704 |
+
"step": 7700
|
2705 |
+
},
|
2706 |
+
{
|
2707 |
+
"epoch": 37.681513117754726,
|
2708 |
+
"grad_norm": 3.496917247772217,
|
2709 |
+
"learning_rate": 0.00012492610837438423,
|
2710 |
+
"loss": 0.404,
|
2711 |
+
"step": 7720
|
2712 |
+
},
|
2713 |
+
{
|
2714 |
+
"epoch": 37.779133618059795,
|
2715 |
+
"grad_norm": 4.018653869628906,
|
2716 |
+
"learning_rate": 0.00012472906403940889,
|
2717 |
+
"loss": 0.4028,
|
2718 |
+
"step": 7740
|
2719 |
+
},
|
2720 |
+
{
|
2721 |
+
"epoch": 37.87675411836486,
|
2722 |
+
"grad_norm": 3.317580223083496,
|
2723 |
+
"learning_rate": 0.0001245320197044335,
|
2724 |
+
"loss": 0.4032,
|
2725 |
+
"step": 7760
|
2726 |
+
},
|
2727 |
+
{
|
2728 |
+
"epoch": 37.97437461866992,
|
2729 |
+
"grad_norm": 3.7693002223968506,
|
2730 |
+
"learning_rate": 0.00012433497536945814,
|
2731 |
+
"loss": 0.3935,
|
2732 |
+
"step": 7780
|
2733 |
+
},
|
2734 |
+
{
|
2735 |
+
"epoch": 38.07199511897498,
|
2736 |
+
"grad_norm": 2.809558629989624,
|
2737 |
+
"learning_rate": 0.00012413793103448277,
|
2738 |
+
"loss": 0.4113,
|
2739 |
+
"step": 7800
|
2740 |
+
},
|
2741 |
+
{
|
2742 |
+
"epoch": 38.16961561928005,
|
2743 |
+
"grad_norm": 3.2092092037200928,
|
2744 |
+
"learning_rate": 0.0001239408866995074,
|
2745 |
+
"loss": 0.4019,
|
2746 |
+
"step": 7820
|
2747 |
+
},
|
2748 |
+
{
|
2749 |
+
"epoch": 38.267236119585114,
|
2750 |
+
"grad_norm": 3.3514404296875,
|
2751 |
+
"learning_rate": 0.00012374384236453202,
|
2752 |
+
"loss": 0.4013,
|
2753 |
+
"step": 7840
|
2754 |
+
},
|
2755 |
+
{
|
2756 |
+
"epoch": 38.364856619890176,
|
2757 |
+
"grad_norm": 3.9514451026916504,
|
2758 |
+
"learning_rate": 0.00012354679802955667,
|
2759 |
+
"loss": 0.3889,
|
2760 |
+
"step": 7860
|
2761 |
+
},
|
2762 |
+
{
|
2763 |
+
"epoch": 38.46247712019524,
|
2764 |
+
"grad_norm": 2.7896828651428223,
|
2765 |
+
"learning_rate": 0.00012334975369458127,
|
2766 |
+
"loss": 0.377,
|
2767 |
+
"step": 7880
|
2768 |
+
},
|
2769 |
+
{
|
2770 |
+
"epoch": 38.56009762050031,
|
2771 |
+
"grad_norm": 3.522840738296509,
|
2772 |
+
"learning_rate": 0.00012315270935960593,
|
2773 |
+
"loss": 0.4158,
|
2774 |
+
"step": 7900
|
2775 |
+
},
|
2776 |
+
{
|
2777 |
+
"epoch": 38.65771812080537,
|
2778 |
+
"grad_norm": 3.422250270843506,
|
2779 |
+
"learning_rate": 0.00012295566502463053,
|
2780 |
+
"loss": 0.3837,
|
2781 |
+
"step": 7920
|
2782 |
+
},
|
2783 |
+
{
|
2784 |
+
"epoch": 38.75533862111043,
|
2785 |
+
"grad_norm": 3.0469913482666016,
|
2786 |
+
"learning_rate": 0.00012275862068965518,
|
2787 |
+
"loss": 0.4036,
|
2788 |
+
"step": 7940
|
2789 |
+
},
|
2790 |
+
{
|
2791 |
+
"epoch": 38.852959121415495,
|
2792 |
+
"grad_norm": 2.904141664505005,
|
2793 |
+
"learning_rate": 0.0001225615763546798,
|
2794 |
+
"loss": 0.3928,
|
2795 |
+
"step": 7960
|
2796 |
+
},
|
2797 |
+
{
|
2798 |
+
"epoch": 38.950579621720564,
|
2799 |
+
"grad_norm": 3.7538552284240723,
|
2800 |
+
"learning_rate": 0.00012236453201970444,
|
2801 |
+
"loss": 0.4092,
|
2802 |
+
"step": 7980
|
2803 |
+
},
|
2804 |
+
{
|
2805 |
+
"epoch": 39.04820012202563,
|
2806 |
+
"grad_norm": 3.562114715576172,
|
2807 |
+
"learning_rate": 0.00012216748768472906,
|
2808 |
+
"loss": 0.3982,
|
2809 |
+
"step": 8000
|
2810 |
+
},
|
2811 |
+
{
|
2812 |
+
"epoch": 39.14582062233069,
|
2813 |
+
"grad_norm": 2.4931962490081787,
|
2814 |
+
"learning_rate": 0.00012197044334975369,
|
2815 |
+
"loss": 0.3547,
|
2816 |
+
"step": 8020
|
2817 |
+
},
|
2818 |
+
{
|
2819 |
+
"epoch": 39.24344112263575,
|
2820 |
+
"grad_norm": 2.461050271987915,
|
2821 |
+
"learning_rate": 0.00012177339901477833,
|
2822 |
+
"loss": 0.3762,
|
2823 |
+
"step": 8040
|
2824 |
+
},
|
2825 |
+
{
|
2826 |
+
"epoch": 39.34106162294082,
|
2827 |
+
"grad_norm": 3.1320595741271973,
|
2828 |
+
"learning_rate": 0.00012157635467980295,
|
2829 |
+
"loss": 0.3907,
|
2830 |
+
"step": 8060
|
2831 |
+
},
|
2832 |
+
{
|
2833 |
+
"epoch": 39.43868212324588,
|
2834 |
+
"grad_norm": 3.044754981994629,
|
2835 |
+
"learning_rate": 0.00012137931034482759,
|
2836 |
+
"loss": 0.4068,
|
2837 |
+
"step": 8080
|
2838 |
+
},
|
2839 |
+
{
|
2840 |
+
"epoch": 39.536302623550945,
|
2841 |
+
"grad_norm": 2.9243273735046387,
|
2842 |
+
"learning_rate": 0.00012118226600985223,
|
2843 |
+
"loss": 0.3903,
|
2844 |
+
"step": 8100
|
2845 |
+
},
|
2846 |
+
{
|
2847 |
+
"epoch": 39.63392312385601,
|
2848 |
+
"grad_norm": 4.234837055206299,
|
2849 |
+
"learning_rate": 0.00012098522167487685,
|
2850 |
+
"loss": 0.3841,
|
2851 |
+
"step": 8120
|
2852 |
+
},
|
2853 |
+
{
|
2854 |
+
"epoch": 39.73154362416108,
|
2855 |
+
"grad_norm": 3.993495464324951,
|
2856 |
+
"learning_rate": 0.00012078817733990148,
|
2857 |
+
"loss": 0.4082,
|
2858 |
+
"step": 8140
|
2859 |
+
},
|
2860 |
+
{
|
2861 |
+
"epoch": 39.82916412446614,
|
2862 |
+
"grad_norm": 3.8363142013549805,
|
2863 |
+
"learning_rate": 0.00012059113300492611,
|
2864 |
+
"loss": 0.3939,
|
2865 |
+
"step": 8160
|
2866 |
+
},
|
2867 |
+
{
|
2868 |
+
"epoch": 39.9267846247712,
|
2869 |
+
"grad_norm": 4.398952007293701,
|
2870 |
+
"learning_rate": 0.00012039408866995075,
|
2871 |
+
"loss": 0.4145,
|
2872 |
+
"step": 8180
|
2873 |
+
},
|
2874 |
+
{
|
2875 |
+
"epoch": 40.024405125076264,
|
2876 |
+
"grad_norm": 2.7002291679382324,
|
2877 |
+
"learning_rate": 0.00012019704433497539,
|
2878 |
+
"loss": 0.386,
|
2879 |
+
"step": 8200
|
2880 |
+
},
|
2881 |
+
{
|
2882 |
+
"epoch": 40.12202562538133,
|
2883 |
+
"grad_norm": 3.1867945194244385,
|
2884 |
+
"learning_rate": 0.00012,
|
2885 |
+
"loss": 0.3924,
|
2886 |
+
"step": 8220
|
2887 |
+
},
|
2888 |
+
{
|
2889 |
+
"epoch": 40.219646125686396,
|
2890 |
+
"grad_norm": 2.9179584980010986,
|
2891 |
+
"learning_rate": 0.00011980295566502464,
|
2892 |
+
"loss": 0.3741,
|
2893 |
+
"step": 8240
|
2894 |
+
},
|
2895 |
+
{
|
2896 |
+
"epoch": 40.31726662599146,
|
2897 |
+
"grad_norm": 5.108730316162109,
|
2898 |
+
"learning_rate": 0.00011960591133004926,
|
2899 |
+
"loss": 0.371,
|
2900 |
+
"step": 8260
|
2901 |
+
},
|
2902 |
+
{
|
2903 |
+
"epoch": 40.41488712629652,
|
2904 |
+
"grad_norm": 3.4418270587921143,
|
2905 |
+
"learning_rate": 0.0001194088669950739,
|
2906 |
+
"loss": 0.3845,
|
2907 |
+
"step": 8280
|
2908 |
+
},
|
2909 |
+
{
|
2910 |
+
"epoch": 40.51250762660159,
|
2911 |
+
"grad_norm": 3.245562791824341,
|
2912 |
+
"learning_rate": 0.00011921182266009854,
|
2913 |
+
"loss": 0.375,
|
2914 |
+
"step": 8300
|
2915 |
+
},
|
2916 |
+
{
|
2917 |
+
"epoch": 40.61012812690665,
|
2918 |
+
"grad_norm": 2.6644446849823,
|
2919 |
+
"learning_rate": 0.00011901477832512315,
|
2920 |
+
"loss": 0.3839,
|
2921 |
+
"step": 8320
|
2922 |
+
},
|
2923 |
+
{
|
2924 |
+
"epoch": 40.707748627211714,
|
2925 |
+
"grad_norm": 4.975727558135986,
|
2926 |
+
"learning_rate": 0.00011881773399014779,
|
2927 |
+
"loss": 0.3889,
|
2928 |
+
"step": 8340
|
2929 |
+
},
|
2930 |
+
{
|
2931 |
+
"epoch": 40.80536912751678,
|
2932 |
+
"grad_norm": 3.6427066326141357,
|
2933 |
+
"learning_rate": 0.0001186206896551724,
|
2934 |
+
"loss": 0.393,
|
2935 |
+
"step": 8360
|
2936 |
+
},
|
2937 |
+
{
|
2938 |
+
"epoch": 40.902989627821846,
|
2939 |
+
"grad_norm": 3.7799060344696045,
|
2940 |
+
"learning_rate": 0.00011842364532019705,
|
2941 |
+
"loss": 0.3894,
|
2942 |
+
"step": 8380
|
2943 |
+
},
|
2944 |
+
{
|
2945 |
+
"epoch": 41.00061012812691,
|
2946 |
+
"grad_norm": 4.170138835906982,
|
2947 |
+
"learning_rate": 0.00011822660098522169,
|
2948 |
+
"loss": 0.3965,
|
2949 |
+
"step": 8400
|
2950 |
+
},
|
2951 |
+
{
|
2952 |
+
"epoch": 41.09823062843197,
|
2953 |
+
"grad_norm": 2.660006523132324,
|
2954 |
+
"learning_rate": 0.00011802955665024631,
|
2955 |
+
"loss": 0.3412,
|
2956 |
+
"step": 8420
|
2957 |
+
},
|
2958 |
+
{
|
2959 |
+
"epoch": 41.19585112873703,
|
2960 |
+
"grad_norm": 3.9118030071258545,
|
2961 |
+
"learning_rate": 0.00011783251231527096,
|
2962 |
+
"loss": 0.3608,
|
2963 |
+
"step": 8440
|
2964 |
+
},
|
2965 |
+
{
|
2966 |
+
"epoch": 41.2934716290421,
|
2967 |
+
"grad_norm": 4.68622350692749,
|
2968 |
+
"learning_rate": 0.00011763546798029557,
|
2969 |
+
"loss": 0.3742,
|
2970 |
+
"step": 8460
|
2971 |
+
},
|
2972 |
+
{
|
2973 |
+
"epoch": 41.391092129347165,
|
2974 |
+
"grad_norm": 2.5423784255981445,
|
2975 |
+
"learning_rate": 0.00011743842364532021,
|
2976 |
+
"loss": 0.3901,
|
2977 |
+
"step": 8480
|
2978 |
+
},
|
2979 |
+
{
|
2980 |
+
"epoch": 41.48871262965223,
|
2981 |
+
"grad_norm": 3.6446280479431152,
|
2982 |
+
"learning_rate": 0.00011724137931034482,
|
2983 |
+
"loss": 0.3518,
|
2984 |
+
"step": 8500
|
2985 |
+
},
|
2986 |
+
{
|
2987 |
+
"epoch": 41.58633312995729,
|
2988 |
+
"grad_norm": 2.6701178550720215,
|
2989 |
+
"learning_rate": 0.00011704433497536946,
|
2990 |
+
"loss": 0.3809,
|
2991 |
+
"step": 8520
|
2992 |
+
},
|
2993 |
+
{
|
2994 |
+
"epoch": 41.68395363026236,
|
2995 |
+
"grad_norm": 3.226100206375122,
|
2996 |
+
"learning_rate": 0.0001168472906403941,
|
2997 |
+
"loss": 0.3834,
|
2998 |
+
"step": 8540
|
2999 |
+
},
|
3000 |
+
{
|
3001 |
+
"epoch": 41.78157413056742,
|
3002 |
+
"grad_norm": 3.4181952476501465,
|
3003 |
+
"learning_rate": 0.00011665024630541872,
|
3004 |
+
"loss": 0.4098,
|
3005 |
+
"step": 8560
|
3006 |
+
},
|
3007 |
+
{
|
3008 |
+
"epoch": 41.87919463087248,
|
3009 |
+
"grad_norm": 2.9190330505371094,
|
3010 |
+
"learning_rate": 0.00011645320197044336,
|
3011 |
+
"loss": 0.3838,
|
3012 |
+
"step": 8580
|
3013 |
+
},
|
3014 |
+
{
|
3015 |
+
"epoch": 41.976815131177545,
|
3016 |
+
"grad_norm": 4.082178115844727,
|
3017 |
+
"learning_rate": 0.00011625615763546797,
|
3018 |
+
"loss": 0.4109,
|
3019 |
+
"step": 8600
|
3020 |
+
},
|
3021 |
+
{
|
3022 |
+
"epoch": 42.074435631482615,
|
3023 |
+
"grad_norm": 2.899162530899048,
|
3024 |
+
"learning_rate": 0.00011605911330049261,
|
3025 |
+
"loss": 0.3624,
|
3026 |
+
"step": 8620
|
3027 |
+
},
|
3028 |
+
{
|
3029 |
+
"epoch": 42.17205613178768,
|
3030 |
+
"grad_norm": 2.4065990447998047,
|
3031 |
+
"learning_rate": 0.00011586206896551725,
|
3032 |
+
"loss": 0.3573,
|
3033 |
+
"step": 8640
|
3034 |
+
},
|
3035 |
+
{
|
3036 |
+
"epoch": 42.26967663209274,
|
3037 |
+
"grad_norm": 2.818037509918213,
|
3038 |
+
"learning_rate": 0.00011566502463054188,
|
3039 |
+
"loss": 0.3699,
|
3040 |
+
"step": 8660
|
3041 |
+
},
|
3042 |
+
{
|
3043 |
+
"epoch": 42.3672971323978,
|
3044 |
+
"grad_norm": 2.8875226974487305,
|
3045 |
+
"learning_rate": 0.00011546798029556651,
|
3046 |
+
"loss": 0.3489,
|
3047 |
+
"step": 8680
|
3048 |
+
},
|
3049 |
+
{
|
3050 |
+
"epoch": 42.464917632702864,
|
3051 |
+
"grad_norm": 3.0840396881103516,
|
3052 |
+
"learning_rate": 0.00011527093596059113,
|
3053 |
+
"loss": 0.3733,
|
3054 |
+
"step": 8700
|
3055 |
+
},
|
3056 |
+
{
|
3057 |
+
"epoch": 42.56253813300793,
|
3058 |
+
"grad_norm": 2.6554925441741943,
|
3059 |
+
"learning_rate": 0.00011507389162561578,
|
3060 |
+
"loss": 0.3541,
|
3061 |
+
"step": 8720
|
3062 |
+
},
|
3063 |
+
{
|
3064 |
+
"epoch": 42.660158633312996,
|
3065 |
+
"grad_norm": 2.766045331954956,
|
3066 |
+
"learning_rate": 0.00011487684729064042,
|
3067 |
+
"loss": 0.3682,
|
3068 |
+
"step": 8740
|
3069 |
+
},
|
3070 |
+
{
|
3071 |
+
"epoch": 42.75777913361806,
|
3072 |
+
"grad_norm": 3.0672762393951416,
|
3073 |
+
"learning_rate": 0.00011467980295566503,
|
3074 |
+
"loss": 0.3943,
|
3075 |
+
"step": 8760
|
3076 |
+
},
|
3077 |
+
{
|
3078 |
+
"epoch": 42.85539963392312,
|
3079 |
+
"grad_norm": 2.898484468460083,
|
3080 |
+
"learning_rate": 0.00011448275862068967,
|
3081 |
+
"loss": 0.3702,
|
3082 |
+
"step": 8780
|
3083 |
+
},
|
3084 |
+
{
|
3085 |
+
"epoch": 42.95302013422819,
|
3086 |
+
"grad_norm": 2.7023797035217285,
|
3087 |
+
"learning_rate": 0.00011428571428571428,
|
3088 |
+
"loss": 0.388,
|
3089 |
+
"step": 8800
|
3090 |
+
},
|
3091 |
+
{
|
3092 |
+
"epoch": 43.05064063453325,
|
3093 |
+
"grad_norm": 2.4088499546051025,
|
3094 |
+
"learning_rate": 0.00011408866995073892,
|
3095 |
+
"loss": 0.3615,
|
3096 |
+
"step": 8820
|
3097 |
+
},
|
3098 |
+
{
|
3099 |
+
"epoch": 43.148261134838314,
|
3100 |
+
"grad_norm": 2.3739655017852783,
|
3101 |
+
"learning_rate": 0.00011389162561576354,
|
3102 |
+
"loss": 0.3703,
|
3103 |
+
"step": 8840
|
3104 |
+
},
|
3105 |
+
{
|
3106 |
+
"epoch": 43.24588163514338,
|
3107 |
+
"grad_norm": 3.2558271884918213,
|
3108 |
+
"learning_rate": 0.00011369458128078818,
|
3109 |
+
"loss": 0.3478,
|
3110 |
+
"step": 8860
|
3111 |
+
},
|
3112 |
+
{
|
3113 |
+
"epoch": 43.343502135448446,
|
3114 |
+
"grad_norm": 2.931380271911621,
|
3115 |
+
"learning_rate": 0.00011349753694581282,
|
3116 |
+
"loss": 0.3553,
|
3117 |
+
"step": 8880
|
3118 |
+
},
|
3119 |
+
{
|
3120 |
+
"epoch": 43.44112263575351,
|
3121 |
+
"grad_norm": 2.5165908336639404,
|
3122 |
+
"learning_rate": 0.00011330049261083743,
|
3123 |
+
"loss": 0.3495,
|
3124 |
+
"step": 8900
|
3125 |
+
},
|
3126 |
+
{
|
3127 |
+
"epoch": 43.53874313605857,
|
3128 |
+
"grad_norm": 3.5619068145751953,
|
3129 |
+
"learning_rate": 0.00011310344827586207,
|
3130 |
+
"loss": 0.3692,
|
3131 |
+
"step": 8920
|
3132 |
+
},
|
3133 |
+
{
|
3134 |
+
"epoch": 43.63636363636363,
|
3135 |
+
"grad_norm": 2.39534068107605,
|
3136 |
+
"learning_rate": 0.0001129064039408867,
|
3137 |
+
"loss": 0.3674,
|
3138 |
+
"step": 8940
|
3139 |
+
},
|
3140 |
+
{
|
3141 |
+
"epoch": 43.7339841366687,
|
3142 |
+
"grad_norm": 3.495316505432129,
|
3143 |
+
"learning_rate": 0.00011270935960591134,
|
3144 |
+
"loss": 0.367,
|
3145 |
+
"step": 8960
|
3146 |
+
},
|
3147 |
+
{
|
3148 |
+
"epoch": 43.831604636973765,
|
3149 |
+
"grad_norm": 2.8195016384124756,
|
3150 |
+
"learning_rate": 0.00011251231527093598,
|
3151 |
+
"loss": 0.411,
|
3152 |
+
"step": 8980
|
3153 |
+
},
|
3154 |
+
{
|
3155 |
+
"epoch": 43.92922513727883,
|
3156 |
+
"grad_norm": 3.446014165878296,
|
3157 |
+
"learning_rate": 0.0001123152709359606,
|
3158 |
+
"loss": 0.3774,
|
3159 |
+
"step": 9000
|
3160 |
}
|
3161 |
],
|
3162 |
"logging_steps": 20,
|
|
|
3176 |
"attributes": {}
|
3177 |
}
|
3178 |
},
|
3179 |
+
"total_flos": 1.79330460401664e+17,
|
3180 |
"train_batch_size": 1,
|
3181 |
"trial_name": null,
|
3182 |
"trial_params": null
|