liujch1998 commited on
Commit
e59eb9e
Β·
1 Parent(s): ce324c1

Customizable consts

Browse files
Files changed (2) hide show
  1. app.py +1 -29
  2. constants.py +30 -0
app.py CHANGED
@@ -2,35 +2,7 @@ import gradio as gr
2
  import json
3
  import os
4
  import requests
5
-
6
- CORPUS_BY_DESC = {
7
- 'RedPajama (LLaMA tokenizer)': 'rpj_v3_c4_llama2',
8
- 'Pile-val (GPT-2 tokenizer)': 'pile_v3_val',
9
- }
10
- CORPUS_DESCS = list(CORPUS_BY_DESC.keys())
11
- QUERY_TYPE_BY_DESC = {
12
- '1. Count an n-gram': 'count',
13
- '2. Compute the probability of the last token in an n-gram': 'compute_prob',
14
- '3. Compute the next-token distribution of an (n-1)-gram': 'get_next_token_distribution_approx',
15
- '4. Compute the ∞-gram probability of the last token': 'compute_infgram_prob',
16
- '5. Compute the ∞-gram next-token distribution': 'get_infgram_next_token_distribution_approx',
17
- '6. Searching for document containing n-gram(s)': 'get_a_random_document_from_cnf_query_fast_approx',
18
- # '7. Analyze an (AI-generated) document using ∞-gram': 'analyze_document',
19
- }
20
- QUERY_DESC_BY_TYPE = {v: k for k, v in QUERY_TYPE_BY_DESC.items()}
21
- QUERY_DESCS = list(QUERY_TYPE_BY_DESC.keys())
22
-
23
- MAX_QUERY_CHARS = 1000
24
- MAX_INPUT_DOC_TOKENS = 1000
25
- MAX_OUTPUT_DOC_TOKENS = 5000 # must be an even number!
26
- MAX_CNT_FOR_NTD = 1000
27
- MAX_CLAUSE_FREQ = 10000
28
- MAX_CLAUSE_FREQ_FAST = 1000000
29
- MAX_CLAUSE_FREQ_FAST_APPROX_PER_SHARD = 50000
30
- MAX_DIFF_TOKENS = 100
31
- MAX_DIFF_BYTES = 2 * MAX_DIFF_TOKENS
32
- MAX_CLAUSES_IN_CNF = 4
33
- MAX_TERMS_IN_DISJ_CLAUSE = 4
34
 
35
  API_IPADDR = os.environ.get('API_IPADDR', None)
36
  default_concurrency_limit = os.environ.get('default_concurrency_limit', 10)
 
2
  import json
3
  import os
4
  import requests
5
+ from .constants import *
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
 
7
  API_IPADDR = os.environ.get('API_IPADDR', None)
8
  default_concurrency_limit = os.environ.get('default_concurrency_limit', 10)
constants.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ CORPUS_BY_DESC = {
4
+ 'RedPajama (LLaMA tokenizer)': 'rpj_v3_c4_llama2',
5
+ 'Pile-val (GPT-2 tokenizer)': 'pile_v3_val',
6
+ }
7
+ CORPUS_DESCS = list(CORPUS_BY_DESC.keys())
8
+ QUERY_TYPE_BY_DESC = {
9
+ '1. Count an n-gram': 'count',
10
+ '2. Compute the probability of the last token in an n-gram': 'compute_prob',
11
+ '3. Compute the next-token distribution of an (n-1)-gram': 'get_next_token_distribution_approx',
12
+ '4. Compute the ∞-gram probability of the last token': 'compute_infgram_prob',
13
+ '5. Compute the ∞-gram next-token distribution': 'get_infgram_next_token_distribution_approx',
14
+ '6. Searching for document containing n-gram(s)': 'get_a_random_document_from_cnf_query_fast_approx',
15
+ # '7. Analyze an (AI-generated) document using ∞-gram': 'analyze_document',
16
+ }
17
+ QUERY_DESC_BY_TYPE = {v: k for k, v in QUERY_TYPE_BY_DESC.items()}
18
+ QUERY_DESCS = list(QUERY_TYPE_BY_DESC.keys())
19
+
20
+ MAX_QUERY_CHARS = os.environ.get('MAX_QUERY_CHARS', 1000)
21
+ MAX_INPUT_DOC_TOKENS = os.environ.get('MAX_INPUT_DOC_TOKENS', 1000)
22
+ MAX_OUTPUT_DOC_TOKENS = os.environ.get('MAX_OUTPUT_DOC_TOKENS', 5000)
23
+ MAX_CNT_FOR_NTD = os.environ.get('MAX_CNT_FOR_NTD', 1000)
24
+ MAX_CLAUSE_FREQ = os.environ.get('MAX_CLAUSE_FREQ', 10000)
25
+ MAX_CLAUSE_FREQ_FAST = os.environ.get('MAX_CLAUSE_FREQ_FAST', 1000000)
26
+ MAX_CLAUSE_FREQ_FAST_APPROX_PER_SHARD = os.environ.get('MAX_CLAUSE_FREQ_FAST_APPROX_PER_SHARD', 50000)
27
+ MAX_DIFF_TOKENS = os.environ.get('MAX_DIFF_TOKENS', 100)
28
+ MAX_DIFF_BYTES = 2 * MAX_DIFF_TOKENS
29
+ MAX_CLAUSES_IN_CNF = os.environ.get('MAX_CLAUSES_IN_CNF', 4)
30
+ MAX_TERMS_IN_DISJ_CLAUSE = os.environ.get('MAX_TERMS_IN_DISJ_CLAUSE', 4)