Jae-Won Chung commited on
Commit
31b5924
·
1 Parent(s): 9fd9223

Update benchmark.py

Browse files
Files changed (1) hide show
  1. scripts/benchmark.py +38 -38
scripts/benchmark.py CHANGED
@@ -40,12 +40,36 @@ SYSTEM_PROMPTS = {
40
  ),
41
  }
42
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  @dataclass
44
  class Output:
45
  response_length: int
46
  input: str
47
  output: str
48
 
 
49
  @torch.inference_mode()
50
  def run_inference(
51
  model,
@@ -218,9 +242,6 @@ def run_inference(
218
 
219
  return result
220
 
221
- def write_error_to_file(filename, error_message):
222
- with open(filename, 'a') as file:
223
- file.write(error_message + '\n')
224
 
225
  def main(
226
  model_path: str,
@@ -232,7 +253,7 @@ def main(
232
  temperature: float = 0.7,
233
  repitition_penalty: float = 1.0,
234
  max_new_tokens: int = 512,
235
- batch: int = 1,
236
  ) -> None:
237
  """Run benchmarking for one model on the entire input file.
238
 
@@ -262,8 +283,8 @@ def main(
262
  model_path = model_path[:-1]
263
  model_name_cleaned = "--".join(model_path.split("/")[-2:])
264
  output_dir = f"{output_dir}/{task}/{model_name_cleaned}"
265
- output_csv_path = f"{output_dir}/benchmark_batch_{batch}.json"
266
- config_json_path = f"{output_dir}/config.json"
267
  table = Table(title="Benchmark")
268
  table.add_column("Configuration")
269
  table.add_column("Value")
@@ -341,45 +362,23 @@ def main(
341
  "temperature": temperature,
342
  "repitition_penalty": repitition_penalty,
343
  "max_new_tokens": max_new_tokens,
344
- "batch_size": batch,
345
  },
346
  config_json,
347
  indent=4,
348
  )
349
  config_json.write("\n")
350
 
351
- class CustomDataset(Dataset):
352
- def __init__(self, data):
353
- self.data = data
354
-
355
- def __len__(self):
356
- return len(self.data)
357
-
358
- def __getitem__(self, index):
359
- sample = self.data[index]
360
- return sample["conversations"][0]["value"]
361
-
362
-
363
- def dataloader(input_file: str, batch_size: batch) -> Generator[tuple[bool, str], None, None]:
364
- """Yields a tuple of whether this is a warmup run and the input prompt."""
365
- for _ in range(3):
366
- yield True, ["Say something long and random. I don't care about the content." for _ in range (batch)]
367
- data = json.load(open(input_file, "r"))
368
- custom_dataset = CustomDataset(data)
369
- data_loader = DataLoader(custom_dataset, batch_size=batch_size, shuffle=False)
370
- for prompt in data_loader:
371
- yield False, prompt
372
-
373
  # Warm up the GPU with some random prompts.
374
  # Forward through all the prompts.
375
  is_first = True
376
  convs = []
377
  prompts = []
378
- data_iter = iter(dataloader(input_file, batch))
379
 
380
  for is_warmup, input_prompts in data_iter:
381
  # Construct the input prompt.
382
- for i in range(batch):
383
  conv = copy.deepcopy(conv_base)
384
  conv.append_message(conv.roles[0], input_prompts[i])
385
  conv.append_message(conv.roles[1], "")
@@ -404,18 +403,19 @@ def main(
404
  if results:
405
  # Record numbers.
406
  if not is_warmup:
407
- response_length = sum([result.response_length for result in results]) # number of valid tokens
408
- latency = measurements.time
409
- throughput = response_length / latency
410
- energy = measurements.total_energy
 
411
  output = {
412
  "model": model_name_cleaned,
413
  "throughput": throughput,
414
- "response_length": response_length,
415
  "latency": latency,
416
- "energy": energy,
417
  "input": [prompt.strip() for prompt in prompts],
418
- "output": [(result.output).strip() for result in results],
419
  }
420
  output_str = json.dumps(output, indent=4)
421
  if not is_warmup:
 
40
  ),
41
  }
42
 
43
+ class CustomDataset(Dataset):
44
+ def __init__(self, data):
45
+ self.data = data
46
+
47
+ def __len__(self):
48
+ return len(self.data)
49
+
50
+ def __getitem__(self, index):
51
+ sample = self.data[index]
52
+ return sample["conversations"][0]["value"]
53
+
54
+
55
+ def dataloader(input_file: str, batch_size: int) -> Generator[tuple[bool, list[str]], None, None]:
56
+ """Yields a tuple of whether this is a warmup run and the input prompt."""
57
+ for _ in range(3):
58
+ yield True, ["Say something long and random. I don't care about the content." for _ in range (batch_size)]
59
+ data = json.load(open(input_file, "r"))
60
+ custom_dataset = CustomDataset(data)
61
+ data_loader = DataLoader(custom_dataset, batch_size=batch_size, shuffle=False)
62
+ for prompt in data_loader:
63
+ yield False, prompt
64
+
65
+
66
  @dataclass
67
  class Output:
68
  response_length: int
69
  input: str
70
  output: str
71
 
72
+
73
  @torch.inference_mode()
74
  def run_inference(
75
  model,
 
242
 
243
  return result
244
 
 
 
 
245
 
246
  def main(
247
  model_path: str,
 
253
  temperature: float = 0.7,
254
  repitition_penalty: float = 1.0,
255
  max_new_tokens: int = 512,
256
+ batch_size: int = 1,
257
  ) -> None:
258
  """Run benchmarking for one model on the entire input file.
259
 
 
283
  model_path = model_path[:-1]
284
  model_name_cleaned = "--".join(model_path.split("/")[-2:])
285
  output_dir = f"{output_dir}/{task}/{model_name_cleaned}"
286
+ output_csv_path = f"{output_dir}/benchmark_batch_{batch_size}.json"
287
+ config_json_path = f"{output_dir}/config_batch_{batch_size}.json"
288
  table = Table(title="Benchmark")
289
  table.add_column("Configuration")
290
  table.add_column("Value")
 
362
  "temperature": temperature,
363
  "repitition_penalty": repitition_penalty,
364
  "max_new_tokens": max_new_tokens,
365
+ "batch_size": batch_size,
366
  },
367
  config_json,
368
  indent=4,
369
  )
370
  config_json.write("\n")
371
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
372
  # Warm up the GPU with some random prompts.
373
  # Forward through all the prompts.
374
  is_first = True
375
  convs = []
376
  prompts = []
377
+ data_iter = iter(dataloader(input_file, batch_size))
378
 
379
  for is_warmup, input_prompts in data_iter:
380
  # Construct the input prompt.
381
+ for i in range(batch_size):
382
  conv = copy.deepcopy(conv_base)
383
  conv.append_message(conv.roles[0], input_prompts[i])
384
  conv.append_message(conv.roles[1], "")
 
403
  if results:
404
  # Record numbers.
405
  if not is_warmup:
406
+ total_num_tokens = sum([result.response_length for result in results]) # total number of tokens
407
+ latency = measurements.time # seconds, identical for all requests
408
+ throughput = total_num_tokens / latency # tokens per second
409
+ energy = measurements.total_energy # Joules, total across all requests
410
+ # Fields should be interpreted as per-request
411
  output = {
412
  "model": model_name_cleaned,
413
  "throughput": throughput,
414
+ "response_length": total_num_tokens / batch_size,
415
  "latency": latency,
416
+ "energy": energy / batch_size,
417
  "input": [prompt.strip() for prompt in prompts],
418
+ "output": [result.output.strip() for result in results],
419
  }
420
  output_str = json.dumps(output, indent=4)
421
  if not is_warmup: