Spaces:
Runtime error
Runtime error
meg-huggingface
commited on
Commit
·
216eab8
1
Parent(s):
51ccf18
Some clean-up
Browse files- entrypoint.sh +24 -23
- failed_run.py +4 -26
entrypoint.sh
CHANGED
@@ -4,45 +4,46 @@
|
|
4 |
set -e
|
5 |
|
6 |
export SPACE="EnergyStarAI/launch-computation-example"
|
7 |
-
failed=0
|
8 |
|
9 |
echo "Not checking h100 -- already know it's not there."
|
10 |
#python /check_h100.py
|
11 |
echo "Attempting to run."
|
12 |
#if [[ $? = 0 ]]; then
|
13 |
|
14 |
-
|
15 |
-
|
|
|
|
|
16 |
echo "Benchmarking Model: ${backend_model}, Task: ${experiment_name}"
|
|
|
|
|
17 |
now=$(date +%Y-%m-%d-%H-%M-%S)
|
18 |
-
|
19 |
-
mkdir -p $run_dir
|
20 |
|
21 |
# Let the benchmarking begin!
|
22 |
-
optimum-benchmark --config-name ${experiment_name} --config-dir /optimum-benchmark/examples/energy_star/ backend.model
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
done || {
|
24 |
-
|
25 |
-
|
|
|
|
|
26 |
}
|
27 |
|
28 |
echo "Uploading all output from the /runs folder."
|
29 |
python /upload_run_folder.py --run_dir "/runs"
|
30 |
|
31 |
-
if [ -s $run_dir/error.log ]; then
|
32 |
-
# error.log is not-empty, an error was raised
|
33 |
-
echo "An error was raised while benchmarking the model..."
|
34 |
-
python /failed_run.py --run_dir $run_dir --model_name $backend_model
|
35 |
-
# TODO: Is this necessary?
|
36 |
-
# Delete the current run directory so that it is not pushed by create_results.py later
|
37 |
-
rm -rf $run_dir
|
38 |
-
elif [ "$failed" -eq 1 ]; then
|
39 |
-
echo "Failed, but was not able to retrieve error log."
|
40 |
-
else
|
41 |
-
# The error log file is empty, and we didn't catch an error.
|
42 |
-
echo "Finished; uploading dataset results"
|
43 |
-
python /create_results.py ./runs
|
44 |
-
fi
|
45 |
-
|
46 |
# Pausing space
|
47 |
echo "Pausing space."
|
48 |
python /pause_space.py
|
|
|
4 |
set -e
|
5 |
|
6 |
export SPACE="EnergyStarAI/launch-computation-example"
|
|
|
7 |
|
8 |
echo "Not checking h100 -- already know it's not there."
|
9 |
#python /check_h100.py
|
10 |
echo "Attempting to run."
|
11 |
#if [[ $? = 0 ]]; then
|
12 |
|
13 |
+
# For each line in the requests dataset....
|
14 |
+
python /parse_requests.py | while read -r line; do
|
15 |
+
# Read the name of the model and the experiment.
|
16 |
+
IFS="," read backend_model experiment_name <<< "${line}"
|
17 |
echo "Benchmarking Model: ${backend_model}, Task: ${experiment_name}"
|
18 |
+
|
19 |
+
# Initialize the directory for output.
|
20 |
now=$(date +%Y-%m-%d-%H-%M-%S)
|
21 |
+
run_dir="./runs/${experiment_name}/${backend_model}/${now}"
|
22 |
+
mkdir -p "$run_dir"
|
23 |
|
24 |
# Let the benchmarking begin!
|
25 |
+
optimum-benchmark --config-name "${experiment_name}" --config-dir /optimum-benchmark/examples/energy_star/ backend.model="${backend_model}" backend.processor="${backend_model}" hydra.run.dir="${run_dir}" 2> "${run_dir}/error.log"
|
26 |
+
|
27 |
+
# Either mark that the benchmark FAILED, or upload the results.
|
28 |
+
if [ -s "${run_dir}/error.log" ]; then
|
29 |
+
# error.log is not-empty, an error was raised
|
30 |
+
echo "An error was raised while benchmarking the model..."
|
31 |
+
python /failed_run.py --run_dir "${run_dir}" --model_name "${backend_model}"
|
32 |
+
else
|
33 |
+
# The error log file is empty, and we didn't catch an error.
|
34 |
+
echo "Finished; uploading dataset results"
|
35 |
+
python /create_results.py ./runs
|
36 |
+
fi
|
37 |
done || {
|
38 |
+
# Catch any errors that get thrown; update the requests dataset to FAILED
|
39 |
+
# based on the last-read run_dir and backend_model.
|
40 |
+
echo "Error."
|
41 |
+
python /failed_run.py --run_dir "${run_dir}" --model_name "${backend_model}"
|
42 |
}
|
43 |
|
44 |
echo "Uploading all output from the /runs folder."
|
45 |
python /upload_run_folder.py --run_dir "/runs"
|
46 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
47 |
# Pausing space
|
48 |
echo "Pausing space."
|
49 |
python /pause_space.py
|
failed_run.py
CHANGED
@@ -23,13 +23,7 @@ parser.add_argument(
|
|
23 |
required=True,
|
24 |
help="Model to benchmark.",
|
25 |
)
|
26 |
-
|
27 |
-
"--logs_name",
|
28 |
-
default=None,
|
29 |
-
type=str,
|
30 |
-
required=False,
|
31 |
-
help="Location of space runtime error log -- note this is distinct from an optimum-benchmark log.",
|
32 |
-
)
|
33 |
args = parser.parse_args()
|
34 |
|
35 |
# Updating request
|
@@ -49,22 +43,6 @@ try:
|
|
49 |
except FileNotFoundError as e:
|
50 |
print(f"Could not find {args.run_dir}/error.log")
|
51 |
|
52 |
-
|
53 |
-
|
54 |
-
print("Status set to FAILED")
|
55 |
-
|
56 |
-
if args.logs_name:
|
57 |
-
print("Attempting to save space runtime error log at EnergyStarAI/error_logs")
|
58 |
-
try:
|
59 |
-
api.upload_file(
|
60 |
-
path_or_fileobj=args.error_log,
|
61 |
-
path_in_repo=args.error_log,
|
62 |
-
repo_id="EnergyStarAI/error_logs",
|
63 |
-
repo_type="dataset",
|
64 |
-
)
|
65 |
-
dataset.loc[dataset["model"].isin(args.model_name), ['status']] = "FAILED"
|
66 |
-
updated_dataset = Dataset.from_pandas(dataset)
|
67 |
-
updated_dataset.push_to_hub("EnergyStarAI/requests_debug", split="test", token=TOKEN)
|
68 |
-
except Exception as e:
|
69 |
-
print("That didn't work. Error:")
|
70 |
-
print(e)
|
|
|
23 |
required=True,
|
24 |
help="Model to benchmark.",
|
25 |
)
|
26 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
args = parser.parse_args()
|
28 |
|
29 |
# Updating request
|
|
|
43 |
except FileNotFoundError as e:
|
44 |
print(f"Could not find {args.run_dir}/error.log")
|
45 |
|
46 |
+
updated_dataset = Dataset.from_pandas(dataset)
|
47 |
+
updated_dataset.push_to_hub("EnergyStarAI/requests_debug", split="test", token=TOKEN)
|
48 |
+
print("Status set to FAILED")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|