jjyang77 commited on
Commit
da384b4
·
1 Parent(s): 0f87dc1

update samples input from file to data list

Browse files
Files changed (4) hide show
  1. .gitignore +2 -0
  2. Dockerfile +1 -1
  3. api/app.py +14 -3
  4. api/bigcodebench_data.py +7 -23
.gitignore CHANGED
@@ -2,6 +2,8 @@
2
  **.pyc
3
  **/__pycache__
4
 
 
 
5
  # Testing data
6
  /data
7
 
 
2
  **.pyc
3
  **/__pycache__
4
 
5
+ .hypothesis/
6
+
7
  # Testing data
8
  /data
9
 
Dockerfile CHANGED
@@ -21,7 +21,7 @@ RUN pip install --upgrade pip
21
  # Pre-install the dataset
22
  #RUN python3 -c "from bigcodebench.data import get_bigcodebench; get_bigcodebench()"
23
 
24
- RUN pip install fastapi gunicorn uvicorn[standard] httpx #pydantic==2.*
25
 
26
  RUN pip install -I --timeout 2000 -r https://github.com/bigcode-project/bigcodebench-annotation/releases/download/v0.1.0/requirements.txt
27
 
 
21
  # Pre-install the dataset
22
  #RUN python3 -c "from bigcodebench.data import get_bigcodebench; get_bigcodebench()"
23
 
24
+ RUN pip install fastapi gunicorn uvicorn[standard] httpx pydantic==2.*
25
 
26
  RUN pip install -I --timeout 2000 -r https://github.com/bigcode-project/bigcodebench-annotation/releases/download/v0.1.0/requirements.txt
27
 
api/app.py CHANGED
@@ -7,6 +7,8 @@ from concurrent.futures import ProcessPoolExecutor, as_completed
7
  from typing import Dict, List, Tuple
8
  import gc
9
 
 
 
10
  from fastapi import FastAPI
11
  from fastapi.responses import RedirectResponse
12
 
@@ -15,6 +17,14 @@ from api.code_execution import untrusted_check
15
 
16
  Result = Tuple[str, List[bool]]
17
 
 
 
 
 
 
 
 
 
18
  def create_app() -> FastAPI:
19
 
20
  level = os.environ.get("LOG_LEVEL", default=logging.INFO)
@@ -33,7 +43,8 @@ def create_app() -> FastAPI:
33
 
34
  @app.post("/evaluate/")
35
  async def evaluate(
36
- samples: str,
 
37
  parallel: int = -1,
38
  min_time_limit: float = 1,
39
  max_as_limit: int = 30 * 1024,
@@ -42,7 +53,7 @@ def create_app() -> FastAPI:
42
  no_gt: bool = True,
43
  ) -> dict:
44
  """
45
- Evaluate the correctness of the solutions in the given samples file.
46
  """
47
  if parallel < 1:
48
  n_workers = max(1, multiprocessing.cpu_count() // 2)
@@ -71,7 +82,7 @@ def create_app() -> FastAPI:
71
 
72
  solution = sample["solution"]
73
 
74
- if "sanitized-calibrated" in samples:
75
  solution = sample["code_prompt"] + "\n pass\n" + solution
76
  remainings.add(sample["_identifier"])
77
  args = (
 
7
  from typing import Dict, List, Tuple
8
  import gc
9
 
10
+ from pydantic import BaseModel
11
+
12
  from fastapi import FastAPI
13
  from fastapi.responses import RedirectResponse
14
 
 
17
 
18
  Result = Tuple[str, List[bool]]
19
 
20
+ class SampleDate(BaseModel):
21
+ task_id: str
22
+ solution: str
23
+ code_prompt: str
24
+ test: str
25
+ entry_point: str
26
+ res_id: int
27
+
28
  def create_app() -> FastAPI:
29
 
30
  level = os.environ.get("LOG_LEVEL", default=logging.INFO)
 
43
 
44
  @app.post("/evaluate/")
45
  async def evaluate(
46
+ samples: List[SampleDate],
47
+ calibrate: bool = True,
48
  parallel: int = -1,
49
  min_time_limit: float = 1,
50
  max_as_limit: int = 30 * 1024,
 
53
  no_gt: bool = True,
54
  ) -> dict:
55
  """
56
+ Evaluate the correctness of the solutions in the given samples data.
57
  """
58
  if parallel < 1:
59
  n_workers = max(1, multiprocessing.cpu_count() // 2)
 
82
 
83
  solution = sample["solution"]
84
 
85
+ if calibrate:
86
  solution = sample["code_prompt"] + "\n pass\n" + solution
87
  remainings.add(sample["_identifier"])
88
  args = (
api/bigcodebench_data.py CHANGED
@@ -20,27 +20,11 @@ def stream_jsonl(filename: str) -> Iterable[Dict]:
20
  yield json.loads(line)
21
 
22
 
23
- def load_solutions(sample_path: os.PathLike) -> Iterable[Dict]:
24
- """We accept two formats of inputs.
25
- + `sample.jsonl` which is the format from BigCodeBench, i.e., {task_id, completion or solution}.
26
- + A folder which contains sub-folders named after the task_id. Each sub-folder
27
- contains samples named in `[?].py` where `?` is the solution id starting with 0.
28
- Different from `sample.jsonl`, the solutions must be complete (with prompt prefix).
29
  """
30
- # if it is a file
31
- if os.path.isfile(sample_path):
32
- for i, sample in enumerate(stream_jsonl(sample_path)):
33
- assert "task_id" in sample, "No task_id found in sample!"
34
- assert "res_id" in sample, "No res_id found in sample!"
35
- assert "test" in sample, "No test found in sample!"
36
- assert "solution" in sample, "No solution found in sample!"
37
- assert isinstance(
38
- sample["solution"], str
39
- ), "Solution must be a string! If you have multiple solutions, please repeat the task_id."
40
-
41
- sample["_identifier"] = (
42
- sample["task_id"] + f" (line {i+1} in {sample_path})"
43
- )
44
- yield sample
45
- else:
46
- raise NotImplementedError("Only jsonl solution output file is supported for now.")
 
20
  yield json.loads(line)
21
 
22
 
23
+ def load_solutions(samples) -> Iterable[Dict]:
 
 
 
 
 
24
  """
25
+ """
26
+ for i, sample in enumerate(samples):
27
+ sample["_identifier"] = (
28
+ sample["task_id"] + f" (line {i+1} )"
29
+ )
30
+ yield sample