liujch1998 commited on
Commit
0d3c7d8
·
1 Parent(s): e979a07

Add IP-based throttle

Browse files
Files changed (2) hide show
  1. app.py +46 -11
  2. constants.py +2 -0
app.py CHANGED
@@ -3,6 +3,7 @@ import datetime
3
  import json
4
  import os
5
  import requests
 
6
  from constants import *
7
 
8
  API_IPADDR = os.environ.get('API_IPADDR', None)
@@ -11,17 +12,31 @@ max_size = os.environ.get('max_size', 100)
11
  max_threads = os.environ.get('max_threads', 40)
12
  debug = (os.environ.get('debug', 'False') != 'False')
13
 
14
- def process(corpus_desc, query_desc, query):
 
 
 
 
 
 
 
 
 
15
  corpus = CORPUS_BY_DESC[corpus_desc]
16
  query_type = QUERY_TYPE_BY_DESC[query_desc]
17
- timestamp = datetime.datetime.now().strftime('%Y%m%d-%H%M%S')
18
  data = {
19
  'timestamp': timestamp,
 
 
20
  'corpus': corpus,
21
  'query_type': query_type,
22
  'query': query,
23
  }
24
  print(json.dumps(data))
 
 
 
 
25
  if API_IPADDR is None:
26
  raise ValueError(f'API_IPADDR envvar is not set!')
27
  response = requests.post(f'http://{API_IPADDR}:5000/', json=data)
@@ -33,18 +48,38 @@ def process(corpus_desc, query_desc, query):
33
  print(result)
34
  return result
35
 
36
- def process_ard_cnf_multi(corpus_desc, query_desc, query, maxnum):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
  corpus = CORPUS_BY_DESC[corpus_desc]
38
  query_type = QUERY_TYPE_BY_DESC[query_desc]
39
  timestamp = datetime.datetime.now().strftime('%Y%m%d-%H%M%S')
40
  data = {
41
  'timestamp': timestamp,
 
 
42
  'corpus': corpus,
43
  'query_type': query_type,
44
  'query': query,
45
  'maxnum': maxnum,
46
  }
47
  print(json.dumps(data))
 
 
 
 
48
  if API_IPADDR is None:
49
  raise ValueError(f'API_IPADDR envvar is not set!')
50
  response = requests.post(f'http://{API_IPADDR}:5000/', json=data)
@@ -60,7 +95,7 @@ def process_ard_cnf_multi(corpus_desc, query_desc, query, maxnum):
60
  outputs = outputs[:maxnum]
61
  while len(outputs) < 10:
62
  outputs.append([])
63
- return output_tokens, message, outputs[0], outputs[1], outputs[2], outputs[3], outputs[4], outputs[5], outputs[6], outputs[7], outputs[8], outputs[9]
64
 
65
  with gr.Blocks() as demo:
66
  with gr.Column():
@@ -281,14 +316,14 @@ If you find this tool useful, please kindly cite our paper:
281
  ard_cnf_multi_clear.add([ard_cnf_multi_input, ard_cnf_multi_output_tokens, ard_cnf_multi_output_message, ard_cnf_multi_output_0, ard_cnf_multi_output_1, ard_cnf_multi_output_2, ard_cnf_multi_output_3, ard_cnf_multi_output_4, ard_cnf_multi_output_5, ard_cnf_multi_output_6, ard_cnf_multi_output_7, ard_cnf_multi_output_8, ard_cnf_multi_output_9])
282
  doc_analysis_clear.add([doc_analysis_input, doc_analysis_output])
283
 
284
- count_submit.click(process, inputs=[corpus_desc, query_desc, count_input], outputs=[count_output, count_output_tokens], api_name=False)
285
- ngram_submit.click(process, inputs=[corpus_desc, query_desc, ngram_input], outputs=[ngram_output, ngram_output_tokens], api_name=False)
286
- ntd_submit.click(process, inputs=[corpus_desc, query_desc, ntd_input], outputs=[ntd_output, ntd_output_tokens], api_name=False)
287
- infgram_submit.click(process, inputs=[corpus_desc, query_desc, infgram_input], outputs=[infgram_output, infgram_output_tokens, infgram_longest_suffix], api_name=False)
288
- infntd_submit.click(process, inputs=[corpus_desc, query_desc, infntd_input], outputs=[infntd_output, infntd_output_tokens, infntd_longest_suffix], api_name=False)
289
  # ard_cnf_submit.click(process, inputs=[corpus_desc, query_desc, ard_cnf_input], outputs=[ard_cnf_output, ard_cnf_output_tokens, ard_cnf_output_message], api_name=False)
290
- ard_cnf_multi_submit.click(process_ard_cnf_multi, inputs=[corpus_desc, query_desc, ard_cnf_multi_input, ard_cnf_multi_maxnum], outputs=[ard_cnf_multi_output_tokens, ard_cnf_multi_output_message, ard_cnf_multi_output_0, ard_cnf_multi_output_1, ard_cnf_multi_output_2, ard_cnf_multi_output_3, ard_cnf_multi_output_4, ard_cnf_multi_output_5, ard_cnf_multi_output_6, ard_cnf_multi_output_7, ard_cnf_multi_output_8, ard_cnf_multi_output_9], api_name=False)
291
- doc_analysis_submit.click(process, inputs=[corpus_desc, query_desc, doc_analysis_input], outputs=[doc_analysis_output], api_name=False)
292
 
293
  def update_query_desc(selection):
294
  return {
 
3
  import json
4
  import os
5
  import requests
6
+ import time
7
  from constants import *
8
 
9
  API_IPADDR = os.environ.get('API_IPADDR', None)
 
12
  max_threads = os.environ.get('max_threads', 40)
13
  debug = (os.environ.get('debug', 'False') != 'False')
14
 
15
+ last_query_time_by_ip = {}
16
+
17
+ def process(corpus_desc, query_desc, query, ret_num, request: gr.Request):
18
+ global last_query_time_by_ip
19
+ ip = request.client.host if request else ''
20
+ timestamp = datetime.datetime.now().strftime('%Y%m%d-%H%M%S')
21
+ t = time.time()
22
+ last_query_time = 0 if ip == '' else last_query_time_by_ip.get(ip, 0)
23
+ blocked = (t - last_query_time < MIN_QUERY_INTERVAL_SECONDS)
24
+
25
  corpus = CORPUS_BY_DESC[corpus_desc]
26
  query_type = QUERY_TYPE_BY_DESC[query_desc]
 
27
  data = {
28
  'timestamp': timestamp,
29
+ 'ip': ip,
30
+ 'blocked': blocked,
31
  'corpus': corpus,
32
  'query_type': query_type,
33
  'query': query,
34
  }
35
  print(json.dumps(data))
36
+ if blocked:
37
+ return tuple([f'You queried too frequently. Please try again in {MIN_QUERY_INTERVAL_SECONDS} seconds.'] + [''] * (ret_num - 1))
38
+ if ip != '':
39
+ last_query_time_by_ip[ip] = t
40
  if API_IPADDR is None:
41
  raise ValueError(f'API_IPADDR envvar is not set!')
42
  response = requests.post(f'http://{API_IPADDR}:5000/', json=data)
 
48
  print(result)
49
  return result
50
 
51
+ def process_1(corpus_desc, query_desc, query, request: gr.Request):
52
+ return process(corpus_desc, query_desc, query, 1, request)
53
+ def process_2(corpus_desc, query_desc, query, request: gr.Request):
54
+ return process(corpus_desc, query_desc, query, 2, request)
55
+ def process_3(corpus_desc, query_desc, query, request: gr.Request):
56
+ return process(corpus_desc, query_desc, query, 3, request)
57
+
58
+ def process_ard_cnf_multi(corpus_desc, query_desc, query, maxnum, request: gr.Request):
59
+ global last_query_time_by_ip
60
+ ip = request.client.host if request else ''
61
+ timestamp = datetime.datetime.now().strftime('%Y%m%d-%H%M%S')
62
+ t = time.time()
63
+ last_query_time = 0 if ip == '' else last_query_time_by_ip.get(ip, 0)
64
+ blocked = (t - last_query_time < MIN_QUERY_INTERVAL_SECONDS)
65
+
66
  corpus = CORPUS_BY_DESC[corpus_desc]
67
  query_type = QUERY_TYPE_BY_DESC[query_desc]
68
  timestamp = datetime.datetime.now().strftime('%Y%m%d-%H%M%S')
69
  data = {
70
  'timestamp': timestamp,
71
+ 'ip': ip,
72
+ 'blocked': blocked,
73
  'corpus': corpus,
74
  'query_type': query_type,
75
  'query': query,
76
  'maxnum': maxnum,
77
  }
78
  print(json.dumps(data))
79
+ if blocked:
80
+ return tuple([f'You queried too frequently. Please try again in {MIN_QUERY_INTERVAL_SECONDS} seconds.'] + [''] * 11)
81
+ if ip != '':
82
+ last_query_time_by_ip[ip] = t
83
  if API_IPADDR is None:
84
  raise ValueError(f'API_IPADDR envvar is not set!')
85
  response = requests.post(f'http://{API_IPADDR}:5000/', json=data)
 
95
  outputs = outputs[:maxnum]
96
  while len(outputs) < 10:
97
  outputs.append([])
98
+ return message, output_tokens, outputs[0], outputs[1], outputs[2], outputs[3], outputs[4], outputs[5], outputs[6], outputs[7], outputs[8], outputs[9]
99
 
100
  with gr.Blocks() as demo:
101
  with gr.Column():
 
316
  ard_cnf_multi_clear.add([ard_cnf_multi_input, ard_cnf_multi_output_tokens, ard_cnf_multi_output_message, ard_cnf_multi_output_0, ard_cnf_multi_output_1, ard_cnf_multi_output_2, ard_cnf_multi_output_3, ard_cnf_multi_output_4, ard_cnf_multi_output_5, ard_cnf_multi_output_6, ard_cnf_multi_output_7, ard_cnf_multi_output_8, ard_cnf_multi_output_9])
317
  doc_analysis_clear.add([doc_analysis_input, doc_analysis_output])
318
 
319
+ count_submit.click(process_2, inputs=[corpus_desc, query_desc, count_input], outputs=[count_output, count_output_tokens], api_name=False)
320
+ ngram_submit.click(process_2, inputs=[corpus_desc, query_desc, ngram_input], outputs=[ngram_output, ngram_output_tokens], api_name=False)
321
+ ntd_submit.click(process_2, inputs=[corpus_desc, query_desc, ntd_input], outputs=[ntd_output, ntd_output_tokens], api_name=False)
322
+ infgram_submit.click(process_3, inputs=[corpus_desc, query_desc, infgram_input], outputs=[infgram_output, infgram_output_tokens, infgram_longest_suffix], api_name=False)
323
+ infntd_submit.click(process_3, inputs=[corpus_desc, query_desc, infntd_input], outputs=[infntd_output, infntd_output_tokens, infntd_longest_suffix], api_name=False)
324
  # ard_cnf_submit.click(process, inputs=[corpus_desc, query_desc, ard_cnf_input], outputs=[ard_cnf_output, ard_cnf_output_tokens, ard_cnf_output_message], api_name=False)
325
+ ard_cnf_multi_submit.click(process_ard_cnf_multi, inputs=[corpus_desc, query_desc, ard_cnf_multi_input, ard_cnf_multi_maxnum], outputs=[ard_cnf_multi_output_message, ard_cnf_multi_output_tokens, ard_cnf_multi_output_0, ard_cnf_multi_output_1, ard_cnf_multi_output_2, ard_cnf_multi_output_3, ard_cnf_multi_output_4, ard_cnf_multi_output_5, ard_cnf_multi_output_6, ard_cnf_multi_output_7, ard_cnf_multi_output_8, ard_cnf_multi_output_9], api_name=False)
326
+ doc_analysis_submit.click(process_1, inputs=[corpus_desc, query_desc, doc_analysis_input], outputs=[doc_analysis_output], api_name=False)
327
 
328
  def update_query_desc(selection):
329
  return {
constants.py CHANGED
@@ -30,3 +30,5 @@ MAX_DIFF_TOKENS = int(os.environ.get('MAX_DIFF_TOKENS', 100))
30
  MAX_DIFF_BYTES = 2 * MAX_DIFF_TOKENS
31
  MAX_CLAUSES_IN_CNF = int(os.environ.get('MAX_CLAUSES_IN_CNF', 4))
32
  MAX_TERMS_IN_DISJ_CLAUSE = int(os.environ.get('MAX_TERMS_IN_DISJ_CLAUSE', 4))
 
 
 
30
  MAX_DIFF_BYTES = 2 * MAX_DIFF_TOKENS
31
  MAX_CLAUSES_IN_CNF = int(os.environ.get('MAX_CLAUSES_IN_CNF', 4))
32
  MAX_TERMS_IN_DISJ_CLAUSE = int(os.environ.get('MAX_TERMS_IN_DISJ_CLAUSE', 4))
33
+
34
+ MIN_QUERY_INTERVAL_SECONDS = int(os.environ.get('MIN_QUERY_INTERVAL_SECONDS', 5))