regisss
Add try/catch in bash entrypoint to manage failed experiments
ca2da1c
raw
history blame
1.19 kB
import argparse
import os
from datasets import load_dataset, Dataset
from huggingface_hub import HfApi
TOKEN = os.environ.get("DEBUG")
api = HfApi(token=TOKEN)
parser = argparse.ArgumentParser()
parser.add_argument(
"--run_dir",
default=None,
type=str,
required=True,
help="Path to the run directory.",
)
parser.add_argument(
"--model_name",
default=None,
type=str,
required=True,
help="Model to benchmark.",
)
args = parser.parse_args()
# Updating request
dataset = load_dataset("EnergyStarAI/requests_debug", split="test", token=TOKEN).to_pandas()
# Set benchmark to failed
dataset.loc[dataset["model"].isin(args.model_name), ['status']] = "FAILED"
# Read error message
with open(f"{args.run_dir}/error.log", 'r') as file:
error_message = file.read()
# Add a new column for the error message if necessary
if "error_message" not in dataset.columns:
dataset["error_message"] = ""
dataset.loc[dataset["model"].isin(args.model_name), ['error_message']] = error_message
updated_dataset = Dataset.from_pandas(dataset)
updated_dataset.push_to_hub("EnergyStarAI/requests_debug", split="test", token=TOKEN)
print("Status set to FAILED")