File size: 26,223 Bytes
d8d9fba
4641d03
d8d9fba
 
 
e418a75
d8d9fba
 
 
 
 
4641d03
d8d9fba
 
 
 
4641d03
d8d9fba
26b368d
d8d9fba
 
 
 
26b368d
d8d9fba
 
 
 
 
 
26b368d
4641d03
 
d8d9fba
 
26b368d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d8d9fba
 
 
 
 
 
4641d03
d8d9fba
 
 
 
 
4641d03
d8d9fba
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26b368d
d8d9fba
26b368d
 
 
d8d9fba
26b368d
d8d9fba
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26b368d
d8d9fba
26b368d
 
 
 
d8d9fba
26b368d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d8d9fba
26b368d
d8d9fba
26b368d
 
d8d9fba
 
 
 
 
 
26b368d
d8d9fba
 
 
 
 
26b368d
d8d9fba
 
86bc4ef
d8d9fba
 
 
26b368d
 
d8d9fba
26b368d
 
 
d8d9fba
26b368d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d8d9fba
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4641d03
 
 
 
 
 
 
 
d8d9fba
 
26b368d
d8d9fba
26b368d
 
 
d8d9fba
 
 
 
26b368d
d8d9fba
26b368d
 
 
d8d9fba
 
 
 
 
 
 
 
 
26b368d
 
d8d9fba
 
 
 
 
 
 
 
26b368d
 
d8d9fba
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
import gradio as gr
import datetime
import json
import os
import requests
from constants import *

API_IPADDR = os.environ.get('API_IPADDR', None)
default_concurrency_limit = os.environ.get('default_concurrency_limit', 10)
max_size = os.environ.get('max_size', 100)
max_threads = os.environ.get('max_threads', 40)
debug = (os.environ.get('debug', 'False') != 'False')

def process(corpus_desc, query_desc, query):
    corpus = CORPUS_BY_DESC[corpus_desc]
    query_type = QUERY_TYPE_BY_DESC[query_desc]
    timestamp = datetime.datetime.now().strftime('%Y%m%d-%H%M%S')
    data = {
        'timestamp': timestamp,
        'corpus': corpus,
        'query_type': query_type,
        'query': query,
    }
    print(json.dumps(data))
    if API_IPADDR is None:
        raise ValueError(f'API_IPADDR envvar is not set!')
    response = requests.post(f'http://{API_IPADDR}:5000/', json=data)
    if response.status_code == 200:
        result = response.json()
    else:
        raise ValueError(f'HTTP error {response.status_code}: {response.json()}')
    if debug:
        print(result)
    return result

def process_ard_cnf_multi(corpus_desc, query_desc, query, maxnum):
    corpus = CORPUS_BY_DESC[corpus_desc]
    query_type = QUERY_TYPE_BY_DESC[query_desc]
    timestamp = datetime.datetime.now().strftime('%Y%m%d-%H%M%S')
    data = {
        'timestamp': timestamp,
        'corpus': corpus,
        'query_type': query_type,
        'query': query,
        'maxnum': maxnum,
    }
    print(json.dumps(data))
    if API_IPADDR is None:
        raise ValueError(f'API_IPADDR envvar is not set!')
    response = requests.post(f'http://{API_IPADDR}:5000/', json=data)
    if response.status_code == 200:
        result = response.json()
    else:
        raise ValueError(f'HTTP error {response.status_code}: {response.json()}')
    if debug:
        print(result)
    if len(result) != 3:
        raise ValueError(f'Invalid result: {result}')
    outputs, output_tokens, message = result[0], result[1], result[2]
    outputs = outputs[:maxnum]
    while len(outputs) < 10:
        outputs.append([])
    return output_tokens, message, outputs[0], outputs[1], outputs[2], outputs[3], outputs[4], outputs[5], outputs[6], outputs[7], outputs[8], outputs[9]

with gr.Blocks() as demo:
    with gr.Column():
        gr.HTML(
            '''<h1 text-align="center">Infini-gram: An Engine for n-gram / ∞-gram Language Models with Trillion-Token Corpora</h1>

            <p style='font-size: 16px;'>This is an engine that processes n-gram / ∞-gram queries on a text corpus. Please first select the corpus and the type of query, then enter your query and submit.</p>
            <p style='font-size: 16px;'>The engine is documented in our paper: <a href="">Infini-gram: Scaling Unbounded n-gram Language Models to a Trillion Tokens</a></p>
            '''
        )
        with gr.Row():
            with gr.Column(scale=1):
                corpus_desc = gr.Radio(choices=CORPUS_DESCS, label='Corpus', value=CORPUS_DESCS[0])
            with gr.Column(scale=3):
                query_desc = gr.Radio(
                    choices=QUERY_DESCS, label='Query Type', value=QUERY_DESCS[0],
                )

        with gr.Row(visible=True) as row_1:
            with gr.Column():
                gr.HTML('<h2>1. Count an n-gram</h2>')
                gr.HTML('<p style="font-size: 16px;">This counts the number of times an n-gram appears in the corpus. If you submit an empty input, it will return the total number of tokens in the corpus.</p>')
                gr.HTML('<p style="font-size: 16px;">Example query: <b>natural language processing</b> (the output is Cnt(natural language processing))</p>')
                with gr.Row():
                    with gr.Column(scale=1):
                        count_input = gr.Textbox(placeholder='Enter a string (an n-gram) here', label='Query', interactive=True)
                        with gr.Row():
                            count_clear = gr.ClearButton(value='Clear', variant='secondary', visible=True)
                            count_submit = gr.Button(value='Submit', variant='primary', visible=True)
                        count_output_tokens = gr.Textbox(label='Tokenized', lines=2, interactive=False)
                    with gr.Column(scale=1):
                        count_output = gr.Label(label='Count', num_top_classes=0)

        with gr.Row(visible=False) as row_2:
            with gr.Column():
                gr.HTML('<h2>2. Compute the probability of the last token in an n-gram</h2>')
                gr.HTML('<p style="font-size: 16px;">This computes the n-gram probability of the last token conditioned on the previous tokens (i.e. (n-1)-gram)).</p>')
                gr.HTML('<p style="font-size: 16px;">Example query: <b>natural language processing</b> (the output is P(processing | natural language), by counting the appearance of the 3-gram "natural language processing" and the 2-gram "natural language", and take the division between the two)</p>')
                gr.HTML('<p style="font-size: 16px;">Note: The (n-1)-gram needs to exist in the corpus. If the (n-1)-gram is not found in the corpus, an error message will appear.</p>')
                with gr.Row():
                    with gr.Column(scale=1):
                        ngram_input = gr.Textbox(placeholder='Enter a string (an n-gram) here', label='Query', interactive=True)
                        with gr.Row():
                            ngram_clear = gr.ClearButton(value='Clear', variant='secondary', visible=True)
                            ngram_submit = gr.Button(value='Submit', variant='primary', visible=True)
                        ngram_output_tokens = gr.Textbox(label='Tokenized', lines=2, interactive=False)
                    with gr.Column(scale=1):
                        ngram_output = gr.Label(label='Probability', num_top_classes=0)

        with gr.Row(visible=False) as row_3:
            with gr.Column():
                gr.HTML('<h2>3. Compute the next-token distribution of an (n-1)-gram</h2>')
                gr.HTML('<p style="font-size: 16px;">This is an extension of the Query 2: It interprets your input as the (n-1)-gram and gives you the full next-token distribution.</p>')
                gr.HTML('<p style="font-size: 16px;">Example query: <b>natural language</b> (the output is P(* | natural language), for the top-10 tokens *)</p>')
                gr.HTML(f'<p style="font-size: 16px;">Note: The (n-1)-gram needs to exist in the corpus. If the (n-1)-gram is not found in the corpus, an error message will appear. If the (n-1)-gram appears more than {MAX_CNT_FOR_NTD} times in the corpus, the result will be approximate.</p>')
                with gr.Row():
                    with gr.Column(scale=1):
                        ntd_input = gr.Textbox(placeholder='Enter a string (an (n-1)-gram) here', label='Query', interactive=True)
                        with gr.Row():
                            ntd_clear = gr.ClearButton(value='Clear', variant='secondary', visible=True)
                            ntd_submit = gr.Button(value='Submit', variant='primary', visible=True)
                        ntd_output_tokens = gr.Textbox(label='Tokenized', lines=2, interactive=False)
                    with gr.Column(scale=1):
                        ntd_output = gr.Label(label='Distribution', num_top_classes=10)

        with gr.Row(visible=False) as row_4:
            with gr.Column():
                gr.HTML('<h2>4. Compute the ∞-gram probability of the last token</h2>')
                gr.HTML('<p style="font-size: 16px;">This computes the ∞-gram probability of the last token conditioned on the previous tokens. Compared to Query 2 (which uses your entire input for n-gram modeling), here we take the longest suffix that we can find in the corpus.</p>')
                gr.HTML('<p style="font-size: 16px;">Example query: <b>I love natural language processing</b> (the output is P(processing | natural language), because "natural language" appears in the corpus but "love natural language" doesn\'t; in this case the effective n = 3)</p>')
                gr.HTML('<p style="font-size: 16px;">Note: It may be possible that the effective n = 1, in which case it reduces to the uni-gram probability of the last token.</p>')
                with gr.Row():
                    with gr.Column(scale=1):
                        infgram_input = gr.Textbox(placeholder='Enter a string here', label='Query', interactive=True)
                        with gr.Row():
                            infgram_clear = gr.ClearButton(value='Clear', variant='secondary', visible=True)
                            infgram_submit = gr.Button(value='Submit', variant='primary', visible=True)
                        infgram_output_tokens = gr.Textbox(label='Tokenized', lines=2, interactive=False)
                        infgram_longest_suffix = gr.Textbox(label='Longest Found Suffix', interactive=False)
                    with gr.Column(scale=1):
                        infgram_output = gr.Label(label='Probability', num_top_classes=0)

        with gr.Row(visible=False) as row_5:
            with gr.Column():
                gr.HTML('<h2>5. Compute the ∞-gram next-token distribution</h2>')
                gr.HTML('<p style="font-size: 16px;">This is similar to Query 3, but with ∞-gram instead of n-gram.</p>')
                gr.HTML('<p style="font-size: 16px;">Example query: <b>I love natural language</b> (the output is P(* | natural language), for the top-10 tokens *)</p>')
                with gr.Row():
                    with gr.Column(scale=1):
                        infntd_input = gr.Textbox(placeholder='Enter a string here', label='Query', interactive=True)
                        with gr.Row():
                            infntd_clear = gr.ClearButton(value='Clear', variant='secondary', visible=True)
                            infntd_submit = gr.Button(value='Submit', variant='primary', visible=True)
                        infntd_output_tokens = gr.Textbox(label='Tokenized', lines=2, interactive=False)
                        infntd_longest_suffix = gr.Textbox(label='Longest Found Suffix', interactive=False)
                    with gr.Column(scale=1):
                        infntd_output = gr.Label(label='Distribution', num_top_classes=10)

        # with gr.Row(visible=False) as row_6:
        #     with gr.Column():
        #         gr.HTML(f'''<h2>6. Searching for document containing n-gram(s)</h2>
        #                     <p style="font-size: 16px;">This displays a random document in the corpus that satisfies your query. You can simply enter an n-gram, in which case the document displayed would contain your n-gram. You can also connect multiple n-gram terms with the AND/OR operators, in the <a href="https://en.wikipedia.org/wiki/Conjunctive_normal_form">CNF format</a>, in which case the displayed document contains n-grams such that it satisfies this logical constraint.</p>
        #                     <p style="font-size: 16px;">Example queries:</p>
        #                     <ul style="font-size: 16px;">
        #                         <li><b>natural language processing</b> (the displayed document would contain "natural language processing")</li>
        #                         <li><b>natural language processing AND deep learning</b> (the displayed document would contain both "natural language processing" and "deep learning")</li>
        #                         <li><b>natural language processing OR artificial intelligence AND deep learning OR machine learning</b> (the displayed document would contain at least one of "natural language processing" / "artificial intelligence", and also at least one of "deep learning" / "machine learning")</li>
        #                     </ul>
        #                     <p style="font-size: 16px;">If you want another random document, simply hit the Submit button again :)</p>
        #                     <p style="font-size: 16px;">A few notes:</p>
        #                     <ul style="font-size: 16px;">
        #                         <li>When you write a query in CNF, note that <b>OR has higher precedence than AND</b> (which is contrary to conventions in boolean algebra).</li>
        #                         <li>If the document is too long, it will be truncated to {MAX_OUTPUT_DOC_TOKENS} tokens.</li>
        #                         <li>We can only include documents where all terms (or clauses) are separated by no more than {MAX_DIFF_TOKENS} tokens.</li>
        #                         <li>If you query for two or more clauses, and a clause has more than {MAX_CLAUSE_FREQ_FAST_APPROX_PER_SHARD} matches (per shard), we will estimate the count from a random subset of all documents containing that clause. This might cause a zero count on conjuction of some simple n-grams (e.g., <b>birds AND oil</b>).</li>
        #                         <li>The number of found documents may contain duplicates (e.g., if a document contains your query term twice, it may be counted twice).</li>
        #                     </ul>
        #                     <p style="font-size: 16px;">❗️WARNING: Corpus may contain problematic contents such as PII, toxicity, hate speech, and NSFW text. This tool is merely presenting selected text from the corpus, without any post-hoc safety filtering. It is NOT creating new text. This is a research prototype through which we can expose and examine existing problems with massive text corpora. Please use with caution. Don't be evil :)</p>
        #                 ''')
        #         with gr.Row():
        #             with gr.Column(scale=1):
        #                 ard_cnf_input = gr.Textbox(placeholder='Enter a query here', label='Query', interactive=True)
        #                 with gr.Row():
        #                     ard_cnf_clear = gr.ClearButton(value='Clear', variant='secondary', visible=True)
        #                     ard_cnf_submit = gr.Button(value='Submit', variant='primary', visible=True)
        #                 ard_cnf_output_tokens = gr.Textbox(label='Tokenized', lines=2, interactive=False)
        #             with gr.Column(scale=1):
        #                 ard_cnf_output_message = gr.Label(label='Message', num_top_classes=0)
        #                 ard_cnf_output = gr.HighlightedText(label='Document', show_legend=False, color_map={"-": "red", "0": "green", "1": "cyan", "2": "blue", "3": "magenta"})

        with gr.Row(visible=False) as row_6a:
            with gr.Column():
                gr.HTML(f'''<h2>6. Searching for documents containing n-gram(s)</h2>
                            <p style="font-size: 16px;">This displays a few random documents in the corpus that satisfies your query. You can simply enter an n-gram, in which case the document displayed would contain your n-gram. You can also connect multiple n-gram terms with the AND/OR operators, in the <a href="https://en.wikipedia.org/wiki/Conjunctive_normal_form">CNF format</a>, in which case the displayed document contains n-grams such that it satisfies this logical constraint.</p>
                            <p style="font-size: 16px;">Example queries:</p>
                            <ul style="font-size: 16px;">
                                <li><b>natural language processing</b> (the displayed document would contain "natural language processing")</li>
                                <li><b>natural language processing AND deep learning</b> (the displayed document would contain both "natural language processing" and "deep learning")</li>
                                <li><b>natural language processing OR artificial intelligence AND deep learning OR machine learning</b> (the displayed document would contain at least one of "natural language processing" / "artificial intelligence", and also at least one of "deep learning" / "machine learning")</li>
                            </ul>
                            <p style="font-size: 16px;">If you want another batch of random documents, simply hit the Submit button again :)</p>
                            <p style="font-size: 16px;">A few notes:</p>
                            <ul style="font-size: 16px;">
                                <li>When you write a query in CNF, note that <b>OR has higher precedence than AND</b> (which is contrary to conventions in boolean algebra).</li>
                                <li>If the document is too long, it will be truncated to {MAX_OUTPUT_DOC_TOKENS} tokens.</li>
                                <li>We can only include documents where all terms (or clauses) are separated by no more than {MAX_DIFF_TOKENS} tokens.</li>
                                <li>If you query for two or more clauses, and a clause has more than {MAX_CLAUSE_FREQ_FAST_APPROX_PER_SHARD} matches (per shard), we will estimate the count from a random subset of all documents containing that clause. This might cause a zero count on conjuction of some simple n-grams (e.g., <b>birds AND oil</b>).</li>
                                <li>The number of found documents may contain duplicates (e.g., if a document contains your query term twice, it may be counted twice).</li>
                            </ul>
                            <p style="font-size: 16px;">❗️WARNING: Corpus may contain problematic contents such as PII, toxicity, hate speech, and NSFW text. This tool is merely presenting selected text from the corpus, without any post-hoc safety filtering. It is NOT creating new text. This is a research prototype through which we can expose and examine existing problems with massive text corpora. Please use with caution. Don't be evil :)</p>
                        ''')
                with gr.Row():
                    with gr.Column(scale=1):
                        ard_cnf_multi_input = gr.Textbox(placeholder='Enter a query here', label='Query', interactive=True)
                        ard_cnf_multi_maxnum = gr.Slider(minimum=1, maximum=10, value=1, step=1, label='Number of documents to Display')
                        with gr.Row():
                            ard_cnf_multi_clear = gr.ClearButton(value='Clear', variant='secondary', visible=True)
                            ard_cnf_multi_submit = gr.Button(value='Submit', variant='primary', visible=True)
                        ard_cnf_multi_output_tokens = gr.Textbox(label='Tokenized', lines=2, interactive=False)
                    with gr.Column(scale=1):
                        ard_cnf_multi_output_message = gr.Label(label='Message', num_top_classes=0)
                        with gr.Tab(label='1'):
                            ard_cnf_multi_output_0 = gr.HighlightedText(label='Document', show_legend=False, color_map={"-": "red", "0": "green", "1": "cyan", "2": "blue", "3": "magenta"})
                        with gr.Tab(label='2'):
                            ard_cnf_multi_output_1 = gr.HighlightedText(label='Document', show_legend=False, color_map={"-": "red", "0": "green", "1": "cyan", "2": "blue", "3": "magenta"})
                        with gr.Tab(label='3'):
                            ard_cnf_multi_output_2 = gr.HighlightedText(label='Document', show_legend=False, color_map={"-": "red", "0": "green", "1": "cyan", "2": "blue", "3": "magenta"})
                        with gr.Tab(label='4'):
                            ard_cnf_multi_output_3 = gr.HighlightedText(label='Document', show_legend=False, color_map={"-": "red", "0": "green", "1": "cyan", "2": "blue", "3": "magenta"})
                        with gr.Tab(label='5'):
                            ard_cnf_multi_output_4 = gr.HighlightedText(label='Document', show_legend=False, color_map={"-": "red", "0": "green", "1": "cyan", "2": "blue", "3": "magenta"})
                        with gr.Tab(label='6'):
                            ard_cnf_multi_output_5 = gr.HighlightedText(label='Document', show_legend=False, color_map={"-": "red", "0": "green", "1": "cyan", "2": "blue", "3": "magenta"})
                        with gr.Tab(label='7'):
                            ard_cnf_multi_output_6 = gr.HighlightedText(label='Document', show_legend=False, color_map={"-": "red", "0": "green", "1": "cyan", "2": "blue", "3": "magenta"})
                        with gr.Tab(label='8'):
                            ard_cnf_multi_output_7 = gr.HighlightedText(label='Document', show_legend=False, color_map={"-": "red", "0": "green", "1": "cyan", "2": "blue", "3": "magenta"})
                        with gr.Tab(label='9'):
                            ard_cnf_multi_output_8 = gr.HighlightedText(label='Document', show_legend=False, color_map={"-": "red", "0": "green", "1": "cyan", "2": "blue", "3": "magenta"})
                        with gr.Tab(label='10'):
                            ard_cnf_multi_output_9 = gr.HighlightedText(label='Document', show_legend=False, color_map={"-": "red", "0": "green", "1": "cyan", "2": "blue", "3": "magenta"})

        with gr.Row(visible=False) as row_7:
            with gr.Column():
                gr.HTML('<h2>7. Analyze an (AI-generated) document using ∞-gram</h2>')
                gr.HTML('<p style="font-size: 16px;">This analyzes the document you entered using the ∞-gram. Each token is highlighted where (1) the color represents its ∞-gram probability (red is 0.0, blue is 1.0), and (2) the alpha represents the effective n (higher alpha means higher n).</p>')
                gr.HTML('<p style="font-size: 16px;">If you hover over a token, the tokens preceding it are each highlighted where (1) the color represents the n-gram probability of your selected token, with the n-gram starting from that highlighted token (red is 0.0, blue is 1.0), and (2) the alpha represents the count of the (n-1)-gram starting from that highlighted token (and up to but excluding your selected token) (higher alpha means higher count).</p>')
                with gr.Row():
                    with gr.Column(scale=1):
                        doc_analysis_input = gr.Textbox(placeholder='Enter a document here', label='Query', interactive=True, lines=10)
                        with gr.Row():
                            doc_analysis_clear = gr.ClearButton(value='Clear', variant='secondary', visible=True)
                            doc_analysis_submit = gr.Button(value='Submit', variant='primary', visible=True)
                    with gr.Column(scale=1):
                        doc_analysis_output = gr.HTML(value='', label='Analysis')

        with gr.Row():
            gr.Markdown('''
If you find this tool useful, please kindly cite our paper:
```
(coming soon)
```
''')

    count_clear.add([count_input, count_output, count_output_tokens])
    ngram_clear.add([ngram_input, ngram_output, ngram_output_tokens])
    ntd_clear.add([ntd_input, ntd_output, ntd_output_tokens])
    infgram_clear.add([infgram_input, infgram_output, infgram_output_tokens])
    infntd_clear.add([infntd_input, infntd_output, infntd_output_tokens, infntd_longest_suffix])
    # ard_cnf_clear.add([ard_cnf_input, ard_cnf_output, ard_cnf_output_tokens, ard_cnf_output_message])
    ard_cnf_multi_clear.add([ard_cnf_multi_input, ard_cnf_multi_output_tokens, ard_cnf_multi_output_message, ard_cnf_multi_output_0, ard_cnf_multi_output_1, ard_cnf_multi_output_2, ard_cnf_multi_output_3, ard_cnf_multi_output_4, ard_cnf_multi_output_5, ard_cnf_multi_output_6, ard_cnf_multi_output_7, ard_cnf_multi_output_8, ard_cnf_multi_output_9])
    doc_analysis_clear.add([doc_analysis_input, doc_analysis_output])

    count_submit.click(process, inputs=[corpus_desc, query_desc, count_input], outputs=[count_output, count_output_tokens])
    ngram_submit.click(process, inputs=[corpus_desc, query_desc, ngram_input], outputs=[ngram_output, ngram_output_tokens])
    ntd_submit.click(process, inputs=[corpus_desc, query_desc, ntd_input], outputs=[ntd_output, ntd_output_tokens])
    infgram_submit.click(process, inputs=[corpus_desc, query_desc, infgram_input], outputs=[infgram_output, infgram_output_tokens, infgram_longest_suffix])
    infntd_submit.click(process, inputs=[corpus_desc, query_desc, infntd_input], outputs=[infntd_output, infntd_output_tokens, infntd_longest_suffix])
    # ard_cnf_submit.click(process, inputs=[corpus_desc, query_desc, ard_cnf_input], outputs=[ard_cnf_output, ard_cnf_output_tokens, ard_cnf_output_message])
    ard_cnf_multi_submit.click(process_ard_cnf_multi, inputs=[corpus_desc, query_desc, ard_cnf_multi_input, ard_cnf_multi_maxnum], outputs=[ard_cnf_multi_output_tokens, ard_cnf_multi_output_message, ard_cnf_multi_output_0, ard_cnf_multi_output_1, ard_cnf_multi_output_2, ard_cnf_multi_output_3, ard_cnf_multi_output_4, ard_cnf_multi_output_5, ard_cnf_multi_output_6, ard_cnf_multi_output_7, ard_cnf_multi_output_8, ard_cnf_multi_output_9])
    doc_analysis_submit.click(process, inputs=[corpus_desc, query_desc, doc_analysis_input], outputs=[doc_analysis_output])

    def update_query_desc(selection):
        return {
            row_1: gr.Row(visible=(selection == QUERY_DESC_BY_TYPE['count'])),
            row_2: gr.Row(visible=(selection == QUERY_DESC_BY_TYPE['compute_prob'])),
            row_3: gr.Row(visible=(selection == QUERY_DESC_BY_TYPE['get_next_token_distribution_approx'])),
            row_4: gr.Row(visible=(selection == QUERY_DESC_BY_TYPE['compute_infgram_prob'])),
            row_5: gr.Row(visible=(selection == QUERY_DESC_BY_TYPE['get_infgram_next_token_distribution_approx'])),
            # row_6: gr.Row(visible=(selection == QUERY_DESC_BY_TYPE['get_a_random_document_from_cnf_query_fast_approx'])),
            row_6a: gr.Row(visible=(selection == QUERY_DESC_BY_TYPE['get_random_documents_from_cnf_query_fast_approx'])),
            # row_7: gr.Row(visible=(selection == QUERY_DESC_BY_TYPE['analyze_document'])),
        }
    query_desc.change(fn=update_query_desc, inputs=query_desc, outputs=[
        row_1,
        row_2,
        row_3,
        row_4,
        row_5,
        # row_6,
        row_6a,
        # row_7,
    ])

demo.queue(
    default_concurrency_limit=default_concurrency_limit,
    max_size=max_size,
).launch(
    max_threads=max_threads,
    debug=debug,
)