terryyz commited on
Commit
f01b54b
·
verified ·
1 Parent(s): 9fd6b97

Delete demo.py

Browse files
Files changed (1) hide show
  1. demo.py +0 -196
demo.py DELETED
@@ -1,196 +0,0 @@
1
- import gradio as gr
2
- import subprocess
3
- import sys
4
- import os
5
- import threading
6
- import time
7
- import uuid
8
- import glob
9
- import shutil
10
- from pathlib import Path
11
- from huggingface_hub import HfApi
12
- from apscheduler.schedulers.background import BackgroundScheduler
13
-
14
- default_command = "bigcodebench.evaluate"
15
- is_running = False
16
-
17
- def generate_command(
18
- jsonl_file, split, subset, parallel,
19
- min_time_limit, max_as_limit, max_data_limit, max_stack_limit,
20
- check_gt_only, no_gt
21
- ):
22
- command = [default_command]
23
-
24
- if jsonl_file is not None:
25
- # Copy the uploaded file to the current directory
26
- local_filename = os.path.basename(jsonl_file.name)
27
- shutil.copy(jsonl_file.name, local_filename)
28
- command.extend(["--samples", local_filename])
29
-
30
- command.extend(["--split", split, "--subset", subset])
31
-
32
- if parallel is not None and parallel != 0:
33
- command.extend(["--parallel", str(int(parallel))])
34
-
35
- command.extend([
36
- "--min-time-limit", str(min_time_limit),
37
- "--max-as-limit", str(int(max_as_limit)),
38
- "--max-data-limit", str(int(max_data_limit)),
39
- "--max-stack-limit", str(int(max_stack_limit))
40
- ])
41
-
42
- if check_gt_only:
43
- command.append("--check-gt-only")
44
-
45
- if no_gt:
46
- command.append("--no-gt")
47
-
48
- return " ".join(command)
49
-
50
-
51
- def cleanup_previous_files(jsonl_file):
52
- if jsonl_file is not None:
53
- file_list = ['Dockerfile', 'app.py', 'README.md', os.path.basename(jsonl_file.name), "__pycache__"]
54
- else:
55
- file_list = ['Dockerfile', 'app.py', 'README.md', "__pycache__"]
56
- for file in glob.glob("*"):
57
- try:
58
- if file not in file_list:
59
- os.remove(file)
60
- except Exception as e:
61
- print(f"Error during cleanup of {file}: {e}")
62
-
63
- def find_result_file():
64
- json_files = glob.glob("*.json")
65
- if json_files:
66
- return max(json_files, key=os.path.getmtime)
67
- return None
68
-
69
- def run_bigcodebench(command):
70
- global is_running
71
- if is_running:
72
- yield "A command is already running. Please wait for it to finish.\n"
73
- return
74
- is_running = True
75
-
76
- try:
77
- yield f"Executing command: {command}\n"
78
-
79
- process = subprocess.Popen(command.split(), stdout=subprocess.PIPE, stderr=subprocess.STDOUT, universal_newlines=True)
80
-
81
- def kill_process():
82
- if process.poll() is None: # If the process is still running
83
- process.terminate()
84
- is_running = False
85
- yield "Process terminated after 12 minutes timeout.\n"
86
-
87
- # Start a timer to kill the process after 12 minutes
88
- timer = threading.Timer(720, kill_process)
89
- timer.start()
90
-
91
- for line in process.stdout:
92
- yield line
93
-
94
- # process.wait()
95
-
96
- timer.cancel()
97
-
98
- if process.returncode != 0:
99
- yield f"Error: Command exited with status {process.returncode}\n"
100
-
101
- yield "Evaluation completed.\n"
102
-
103
- result_file = find_result_file()
104
- if result_file:
105
- yield f"Result file found: {result_file}\n"
106
- else:
107
- yield "No result file found.\n"
108
- finally:
109
- is_running = False
110
-
111
- def stream_logs(command, jsonl_file=None):
112
- global is_running
113
-
114
- if is_running:
115
- yield "A command is already running. Please wait for it to finish.\n"
116
- return
117
-
118
- cleanup_previous_files(jsonl_file)
119
- yield "Cleaned up previous files.\n"
120
-
121
- log_content = []
122
- for log_line in run_bigcodebench(command):
123
- log_content.append(log_line)
124
- yield "".join(log_content)
125
-
126
- with gr.Blocks() as demo:
127
- gr.Markdown("# BigCodeBench Evaluator")
128
-
129
- with gr.Row():
130
- jsonl_file = gr.File(label="Upload JSONL file", file_types=[".jsonl"])
131
- split = gr.Dropdown(choices=["complete", "instruct"], label="Split", value="complete")
132
- subset = gr.Dropdown(choices=["hard", "full"], label="Subset", value="hard")
133
-
134
- with gr.Row():
135
- parallel = gr.Number(label="Parallel (optional)", precision=0)
136
- min_time_limit = gr.Number(label="Min Time Limit", value=1, precision=1)
137
- max_as_limit = gr.Number(label="Max AS Limit", value=25*1024, precision=0)
138
-
139
- with gr.Row():
140
- max_data_limit = gr.Number(label="Max Data Limit", value=25*1024, precision=0)
141
- max_stack_limit = gr.Number(label="Max Stack Limit", value=10, precision=0)
142
- check_gt_only = gr.Checkbox(label="Check GT Only")
143
- no_gt = gr.Checkbox(label="No GT")
144
-
145
- command_output = gr.Textbox(label="Command", value=default_command, interactive=False)
146
- with gr.Row():
147
- submit_btn = gr.Button("Run Evaluation")
148
- download_btn = gr.DownloadButton(label="Download Result")
149
- log_output = gr.Textbox(label="Execution Logs", lines=20)
150
-
151
- input_components = [
152
- jsonl_file, split, subset, parallel,
153
- min_time_limit, max_as_limit, max_data_limit, max_stack_limit,
154
- check_gt_only, no_gt
155
- ]
156
-
157
- for component in input_components:
158
- component.change(generate_command, inputs=input_components, outputs=command_output)
159
-
160
-
161
- def start_evaluation(command, jsonl_file, subset, split):
162
- extra = subset + "_" if subset != "full" else ""
163
- if jsonl_file is not None:
164
- result_path = os.path.basename(jsonl_file.name).replace(".jsonl", f"_{extra}eval_results.json")
165
- else:
166
- result_path = None
167
-
168
- for log in stream_logs(command, jsonl_file):
169
- if jsonl_file is not None:
170
- yield log, gr.update(value=result_path, label=result_path), gr.update()
171
- else:
172
- yield log, gr.update(), gr.update()
173
- result_file = find_result_file()
174
- if result_file:
175
- return gr.update(label="Evaluation completed. Result file found."), gr.update(value=result_file)
176
- # gr.Button(visible=False)#,
177
- # gr.DownloadButton(label="Download Result", value=result_file, visible=True))
178
- else:
179
- return gr.update(label="Evaluation completed. No result file found."), gr.update(value=result_path)
180
- # gr.Button("Run Evaluation", visible=True),
181
- # gr.DownloadButton(visible=False))
182
- submit_btn.click(start_evaluation,
183
- inputs=[command_output, jsonl_file, subset, split],
184
- outputs=[log_output, download_btn])
185
-
186
- REPO_ID = "bigcode/bigcodebench-evaluator"
187
- HF_TOKEN = os.environ.get("HF_TOKEN", None)
188
- API = HfApi(token=HF_TOKEN)
189
-
190
- def restart_space():
191
- API.restart_space(repo_id=REPO_ID, token=HF_TOKEN)
192
-
193
- demo.queue(max_size=300).launch(share=True, server_name="0.0.0.0", server_port=7860)
194
- scheduler = BackgroundScheduler()
195
- scheduler.add_job(restart_space, "interval", hours=3) # restarted every 3h as backup in case automatic updates are not working
196
- scheduler.start()