finnstrom3693
commited on
Upload Minisun Trained using model.fit on NeelNanda/pile-10k[0-5000],lr 1e-4,cw 128,2 epoch,batch size 8,cosine with restart
Browse files- config.json +2 -2
- tf_model.h5 +1 -1
config.json
CHANGED
@@ -8,7 +8,7 @@
|
|
8 |
"dropout_rate": 0.1,
|
9 |
"weight_decay": 0.01,
|
10 |
"learning_rate": 0.0001,
|
11 |
-
"restart_period":
|
12 |
-
"total_steps":
|
13 |
"warmup_ratio": 0.2
|
14 |
}
|
|
|
8 |
"dropout_rate": 0.1,
|
9 |
"weight_decay": 0.01,
|
10 |
"learning_rate": 0.0001,
|
11 |
+
"restart_period": 208,
|
12 |
+
"total_steps": 625,
|
13 |
"warmup_ratio": 0.2
|
14 |
}
|
tf_model.h5
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1902620824
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9eb59b3535f418b15136a046197653a4c4e3f34458d88dc5e5ed4ca387f1b910
|
3 |
size 1902620824
|