Spaces:
Running
Running
import json | |
from glob import glob | |
from pathlib import Path | |
import tyro | |
SATURATION_THRESHOLD = 0.95 | |
def main(result_dir: Path, output_dir: Path) -> None: | |
print(f"{result_dir} -> {output_dir}") | |
for model_dir in sorted(glob(f"{result_dir}/*/*")): | |
model_name = "/".join(model_dir.split("/")[-2:]) | |
print(f" {model_name}") | |
(output_dir / model_name).mkdir(parents=True, exist_ok=True) | |
# Gather all results files. | |
results = sorted(glob(f"{model_dir}/vllm+*+results.json")) | |
# Gather all stats files. Skip if stats file is missing. | |
files: list[tuple[str, str]] = [] | |
for result_file in results: | |
stats_file = result_file.replace("+results.json", "+stats.json") | |
if Path(stats_file).exists(): | |
files.append((result_file, stats_file)) | |
# Produce one JSON file per (results, stats) pair. | |
for result_path, stats_path in files: | |
with open(result_path) as f: | |
result_data = json.load(f) | |
with open(stats_path) as f: | |
stats_data = json.load(f) | |
# Final output data. | |
data = {} | |
# Derive metrics. | |
pp = len(stats_data["steady_state"]) | |
ss_total_time = max(node["time"] for node in stats_data["steady_state"]) | |
ss_total_energy = sum(sum(node["energy"].values()) for node in stats_data["steady_state"]) | |
ss_end_iter = next(filter(lambda iq: iq[1] == 0, enumerate(stats_data["num_waiting_sys"][2:])))[0] + 2 | |
ss_total_output_tokens = sum(stats_data["num_generation_tokens_iter"][:ss_end_iter]) | |
average_output_length = result_data["total_completion_tokens"] / result_data["num_requests"] | |
tpot = [] | |
for iter_tpot in stats_data["time_per_output_tokens_iter"]: | |
if iter_tpot: | |
tpot.append(sum(iter_tpot) / len(iter_tpot)) | |
# Actual fields. | |
data["Model"] = result_data["model"] | |
data["GPU"] = result_data["gpu_model"] | |
data["TP"] = result_data["num_gpus"] | |
data["PP"] = pp | |
data["Energy/req (J)"] = ss_total_energy / ss_total_output_tokens * average_output_length | |
data["Avg TPOT (s)"] = sum(tpot) / len(tpot) | |
data["Token tput (tok/s)"] = ss_total_output_tokens / ss_total_time | |
data["Avg Output Tokens"] = average_output_length | |
data["Avg BS (reqs)"] = sum(stats_data["num_running_sys"][:ss_end_iter]) / ss_end_iter | |
data["Max BS (reqs)"] = result_data["max_num_seqs"] * pp | |
# Skip saturated runs. | |
if data["Max BS (reqs)"] * SATURATION_THRESHOLD >= data["Avg BS (reqs)"]: | |
continue | |
# Dump output data. | |
filename = f"bs{result_data['max_num_seqs']}+tp{data['TP']}+pp{data['PP']}.json" | |
output_path = output_dir / model_name / filename | |
json.dump(data, open(output_path, "w"), indent=2) | |
if __name__ == "__main__": | |
tyro.cli(main) | |