terryyz commited on
Commit
b54d74f
·
verified ·
1 Parent(s): 385c874

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -2
app.py CHANGED
@@ -106,6 +106,7 @@ def evaluate(
106
  max_as_limit: int = 30 * 1024,
107
  max_data_limit: int = 30 * 1024,
108
  max_stack_limit: int = 10,
 
109
  check_gt_only: bool = False,
110
  no_gt: bool = False,
111
  ):
@@ -156,7 +157,7 @@ def evaluate(
156
  if "solution" in sample
157
  else problems[task_id]["complete_prompt"] + sample["completion"]
158
  )
159
- if "sanitized_calibrated" in samples:
160
  solution = problems[task_id]["code_prompt"] + "\n pass\n" + solution
161
  remainings.add(sample["_identifier"])
162
  args = (
@@ -223,7 +224,7 @@ def evaluate(
223
  pass_at_k["model"] = os.path.basename(samples).split("--bigcodebench-")[0]
224
  pass_at_k["split"] = split
225
  pass_at_k["subset"] = subset
226
- pass_at_k["calibrated"] = "sanitized_calibrated" in samples
227
  pass_at_k["gt_pass_rate"] = gt_pass_rate
228
  pass_at_k["failed_tasks"] = failed_tasks
229
 
 
106
  max_as_limit: int = 30 * 1024,
107
  max_data_limit: int = 30 * 1024,
108
  max_stack_limit: int = 10,
109
+ calibrated: bool = True,
110
  check_gt_only: bool = False,
111
  no_gt: bool = False,
112
  ):
 
157
  if "solution" in sample
158
  else problems[task_id]["complete_prompt"] + sample["completion"]
159
  )
160
+ if calibrated:
161
  solution = problems[task_id]["code_prompt"] + "\n pass\n" + solution
162
  remainings.add(sample["_identifier"])
163
  args = (
 
224
  pass_at_k["model"] = os.path.basename(samples).split("--bigcodebench-")[0]
225
  pass_at_k["split"] = split
226
  pass_at_k["subset"] = subset
227
+ pass_at_k["calibrated"] = calibrated
228
  pass_at_k["gt_pass_rate"] = gt_pass_rate
229
  pass_at_k["failed_tasks"] = failed_tasks
230