Spaces:
Running
Running
liujch1998
commited on
Commit
·
9f036ec
1
Parent(s):
5f2c7e6
Sync changes
Browse files- app.py +1 -0
- constants.py +2 -0
app.py
CHANGED
@@ -109,6 +109,7 @@ with gr.Blocks() as demo:
|
|
109 |
|
110 |
<p style='font-size: 16px;'>This is an engine that processes n-gram / ∞-gram queries on a text corpus. Please first select the corpus and the type of query, then enter your query and submit.</p>
|
111 |
<p style='font-size: 16px;'>The engine is developed by <a href="https://liujch1998.github.io">Jiacheng (Gary) Liu</a> and documented in our paper: <a href="https://arxiv.org/abs/2401.17377">Infini-gram: Scaling Unbounded n-gram Language Models to a Trillion Tokens</a>. HF Paper Page: <a href="https://huggingface.co/papers/2401.17377">https://huggingface.co/papers/2401.17377</a></p>
|
|
|
112 |
<p style='font-size: 16px;'><b>Note: We kindly ask you not to programmatically submit queries to the API at the moment. We will release a more stable API soon. Thank you :)</b></p>
|
113 |
'''
|
114 |
)
|
|
|
109 |
|
110 |
<p style='font-size: 16px;'>This is an engine that processes n-gram / ∞-gram queries on a text corpus. Please first select the corpus and the type of query, then enter your query and submit.</p>
|
111 |
<p style='font-size: 16px;'>The engine is developed by <a href="https://liujch1998.github.io">Jiacheng (Gary) Liu</a> and documented in our paper: <a href="https://arxiv.org/abs/2401.17377">Infini-gram: Scaling Unbounded n-gram Language Models to a Trillion Tokens</a>. HF Paper Page: <a href="https://huggingface.co/papers/2401.17377">https://huggingface.co/papers/2401.17377</a></p>
|
112 |
+
<p style='font-size: 16px;'>All inputs are <b>case-sensitive</b>.</p>
|
113 |
<p style='font-size: 16px;'><b>Note: We kindly ask you not to programmatically submit queries to the API at the moment. We will release a more stable API soon. Thank you :)</b></p>
|
114 |
'''
|
115 |
)
|
constants.py
CHANGED
@@ -5,6 +5,7 @@ CORPUS_BY_DESC = {
|
|
5 |
'RedPajama (LLaMA tokenizer), 1.4T tokens': 'v3_rpj_llama_c4',
|
6 |
'Pile-val (LLaMA tokenizer), 390M tokens': 'v3_pileval_llama',
|
7 |
'Pile-val (GPT-2 tokenizer), 380M tokens': 'v3_pileval_gpt2',
|
|
|
8 |
}
|
9 |
CORPUS_DESCS = list(CORPUS_BY_DESC.keys())
|
10 |
ENGINE_BY_DESC = {
|
@@ -37,4 +38,5 @@ DEBUG = (os.environ.get('DEBUG', 'False') != 'False')
|
|
37 |
|
38 |
# C++ engine
|
39 |
CPP_PORT = int(os.environ.get('CPP_PORT', 3786))
|
|
|
40 |
SOCKET_OUT_BUFFER_SIZE = 65536
|
|
|
5 |
'RedPajama (LLaMA tokenizer), 1.4T tokens': 'v3_rpj_llama_c4',
|
6 |
'Pile-val (LLaMA tokenizer), 390M tokens': 'v3_pileval_llama',
|
7 |
'Pile-val (GPT-2 tokenizer), 380M tokens': 'v3_pileval_gpt2',
|
8 |
+
'Dolma-sample (OLMo tokenizer), 8.0B tokens': 'v4_dolmasample_olmo',
|
9 |
}
|
10 |
CORPUS_DESCS = list(CORPUS_BY_DESC.keys())
|
11 |
ENGINE_BY_DESC = {
|
|
|
38 |
|
39 |
# C++ engine
|
40 |
CPP_PORT = int(os.environ.get('CPP_PORT', 3786))
|
41 |
+
SOCKET_IN_BUFFER_SIZE = 2048
|
42 |
SOCKET_OUT_BUFFER_SIZE = 65536
|