meg-huggingface
Some clean-up
216eab8
raw
history blame
1.89 kB
#!/bin/bash
# TODO: I think this flag is related to getting fail logs; check.
set -e
export SPACE="EnergyStarAI/launch-computation-example"
echo "Not checking h100 -- already know it's not there."
#python /check_h100.py
echo "Attempting to run."
#if [[ $? = 0 ]]; then
# For each line in the requests dataset....
python /parse_requests.py | while read -r line; do
# Read the name of the model and the experiment.
IFS="," read backend_model experiment_name <<< "${line}"
echo "Benchmarking Model: ${backend_model}, Task: ${experiment_name}"
# Initialize the directory for output.
now=$(date +%Y-%m-%d-%H-%M-%S)
run_dir="./runs/${experiment_name}/${backend_model}/${now}"
mkdir -p "$run_dir"
# Let the benchmarking begin!
optimum-benchmark --config-name "${experiment_name}" --config-dir /optimum-benchmark/examples/energy_star/ backend.model="${backend_model}" backend.processor="${backend_model}" hydra.run.dir="${run_dir}" 2> "${run_dir}/error.log"
# Either mark that the benchmark FAILED, or upload the results.
if [ -s "${run_dir}/error.log" ]; then
# error.log is not-empty, an error was raised
echo "An error was raised while benchmarking the model..."
python /failed_run.py --run_dir "${run_dir}" --model_name "${backend_model}"
else
# The error log file is empty, and we didn't catch an error.
echo "Finished; uploading dataset results"
python /create_results.py ./runs
fi
done || {
# Catch any errors that get thrown; update the requests dataset to FAILED
# based on the last-read run_dir and backend_model.
echo "Error."
python /failed_run.py --run_dir "${run_dir}" --model_name "${backend_model}"
}
echo "Uploading all output from the /runs folder."
python /upload_run_folder.py --run_dir "/runs"
# Pausing space
echo "Pausing space."
python /pause_space.py
echo "Done."