Spaces:
Runtime error
Runtime error
regisss
commited on
Commit
·
ca2da1c
1
Parent(s):
3f77013
Add try/catch in bash entrypoint to manage failed experiments
Browse files- entrypoint.sh +15 -2
- failed_run.py +46 -0
entrypoint.sh
CHANGED
@@ -8,12 +8,25 @@ echo "Attempting to run."
|
|
8 |
python /parse_requests.py | while read line; do
|
9 |
IFS="," read backend_model experiment_name <<< $(echo ${line})
|
10 |
echo "Benchmarking Model: ${backend_model}, Task: ${experiment_name}"
|
11 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
done
|
|
|
|
|
13 |
echo "Finished; uploading dataset results"
|
14 |
python /create_results.py ./runs
|
|
|
15 |
# Pausing space
|
16 |
echo "Pausing space."
|
17 |
python /pause_space.py
|
18 |
echo "Done."
|
19 |
-
#fi
|
|
|
8 |
python /parse_requests.py | while read line; do
|
9 |
IFS="," read backend_model experiment_name <<< $(echo ${line})
|
10 |
echo "Benchmarking Model: ${backend_model}, Task: ${experiment_name}"
|
11 |
+
export run_dir= "./runs/${experiment_name}/${backend_model}/${now:%Y-%m-%d-%H-%M-%S}"
|
12 |
+
|
13 |
+
optimum-benchmark --config-name ${experiment_name} --config-dir /optimum-benchmark/examples/energy_star/ backend.model=${backend_model} backend.processor=${backend_model} hydra.run.dir=${run_dir} 2> $run_dir/error.log
|
14 |
+
|
15 |
+
if [ -s $run_dir/error.log ]; then
|
16 |
+
# error.log is not-empty, an error was raised
|
17 |
+
echo "An error was raised while benchmarking the model..."
|
18 |
+
python /failed_run.py --run_dir $run_dir --model_name $backend_model
|
19 |
+
|
20 |
+
# Delete the current run directory so that it is not pushed by create_results.py later
|
21 |
+
rm -rf $run_dir
|
22 |
+
fi
|
23 |
done
|
24 |
+
|
25 |
+
# The file is empty, so no error
|
26 |
echo "Finished; uploading dataset results"
|
27 |
python /create_results.py ./runs
|
28 |
+
|
29 |
# Pausing space
|
30 |
echo "Pausing space."
|
31 |
python /pause_space.py
|
32 |
echo "Done."
|
|
failed_run.py
ADDED
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import argparse
|
2 |
+
import os
|
3 |
+
|
4 |
+
from datasets import load_dataset, Dataset
|
5 |
+
from huggingface_hub import HfApi
|
6 |
+
|
7 |
+
|
8 |
+
TOKEN = os.environ.get("DEBUG")
|
9 |
+
api = HfApi(token=TOKEN)
|
10 |
+
|
11 |
+
parser = argparse.ArgumentParser()
|
12 |
+
parser.add_argument(
|
13 |
+
"--run_dir",
|
14 |
+
default=None,
|
15 |
+
type=str,
|
16 |
+
required=True,
|
17 |
+
help="Path to the run directory.",
|
18 |
+
)
|
19 |
+
parser.add_argument(
|
20 |
+
"--model_name",
|
21 |
+
default=None,
|
22 |
+
type=str,
|
23 |
+
required=True,
|
24 |
+
help="Model to benchmark.",
|
25 |
+
)
|
26 |
+
args = parser.parse_args()
|
27 |
+
|
28 |
+
# Updating request
|
29 |
+
dataset = load_dataset("EnergyStarAI/requests_debug", split="test", token=TOKEN).to_pandas()
|
30 |
+
|
31 |
+
# Set benchmark to failed
|
32 |
+
dataset.loc[dataset["model"].isin(args.model_name), ['status']] = "FAILED"
|
33 |
+
|
34 |
+
# Read error message
|
35 |
+
with open(f"{args.run_dir}/error.log", 'r') as file:
|
36 |
+
error_message = file.read()
|
37 |
+
|
38 |
+
# Add a new column for the error message if necessary
|
39 |
+
if "error_message" not in dataset.columns:
|
40 |
+
dataset["error_message"] = ""
|
41 |
+
dataset.loc[dataset["model"].isin(args.model_name), ['error_message']] = error_message
|
42 |
+
|
43 |
+
updated_dataset = Dataset.from_pandas(dataset)
|
44 |
+
updated_dataset.push_to_hub("EnergyStarAI/requests_debug", split="test", token=TOKEN)
|
45 |
+
|
46 |
+
print("Status set to FAILED")
|