Spaces:
Running
Running
liujch1998
commited on
Commit
·
7474206
1
Parent(s):
9f036ec
Add Pile-train
Browse files- constants.py +1 -0
constants.py
CHANGED
@@ -3,6 +3,7 @@ import os
|
|
3 |
# options
|
4 |
CORPUS_BY_DESC = {
|
5 |
'RedPajama (LLaMA tokenizer), 1.4T tokens': 'v3_rpj_llama_c4',
|
|
|
6 |
'Pile-val (LLaMA tokenizer), 390M tokens': 'v3_pileval_llama',
|
7 |
'Pile-val (GPT-2 tokenizer), 380M tokens': 'v3_pileval_gpt2',
|
8 |
'Dolma-sample (OLMo tokenizer), 8.0B tokens': 'v4_dolmasample_olmo',
|
|
|
3 |
# options
|
4 |
CORPUS_BY_DESC = {
|
5 |
'RedPajama (LLaMA tokenizer), 1.4T tokens': 'v3_rpj_llama_c4',
|
6 |
+
'Pile-train (LLaMA tokenizer), 380B tokens': 'v4_piletrain_llama',
|
7 |
'Pile-val (LLaMA tokenizer), 390M tokens': 'v3_pileval_llama',
|
8 |
'Pile-val (GPT-2 tokenizer), 380M tokens': 'v3_pileval_gpt2',
|
9 |
'Dolma-sample (OLMo tokenizer), 8.0B tokens': 'v4_dolmasample_olmo',
|