Update README.md
Browse files
README.md
CHANGED
@@ -27,7 +27,7 @@ New checkpoint: RWKV-4-Pile-3B-20221110-ctx4096.pth : Fine-tuned to ctx_len = 40
|
|
27 |
* PIQA acc 74.16%
|
28 |
* SC2016 acc 70.71%
|
29 |
* Hellaswag acc_norm 59.89%
|
30 |
-
ctx_len = 4096 n_layer = 32 n_embd = 2560
|
31 |
|
32 |
Final checkpoint: RWKV-4-Pile-3B-20221008-8023.pth : Trained on the Pile for 331B tokens.
|
33 |
* Pile loss 1.9469
|
@@ -35,4 +35,4 @@ Final checkpoint: RWKV-4-Pile-3B-20221008-8023.pth : Trained on the Pile for 331
|
|
35 |
* PIQA acc 73.72%
|
36 |
* SC2016 acc 70.28%
|
37 |
* Hellaswag acc_norm 59.63%
|
38 |
-
ctx_len = 1024 n_layer = 32 n_embd = 2560
|
|
|
27 |
* PIQA acc 74.16%
|
28 |
* SC2016 acc 70.71%
|
29 |
* Hellaswag acc_norm 59.89%
|
30 |
+
* ctx_len = 4096 n_layer = 32 n_embd = 2560
|
31 |
|
32 |
Final checkpoint: RWKV-4-Pile-3B-20221008-8023.pth : Trained on the Pile for 331B tokens.
|
33 |
* Pile loss 1.9469
|
|
|
35 |
* PIQA acc 73.72%
|
36 |
* SC2016 acc 70.28%
|
37 |
* Hellaswag acc_norm 59.63%
|
38 |
+
* ctx_len = 1024 n_layer = 32 n_embd = 2560
|