meg-huggingface commited on
Commit
216eab8
·
1 Parent(s): 51ccf18

Some clean-up

Browse files
Files changed (2) hide show
  1. entrypoint.sh +24 -23
  2. failed_run.py +4 -26
entrypoint.sh CHANGED
@@ -4,45 +4,46 @@
4
  set -e
5
 
6
  export SPACE="EnergyStarAI/launch-computation-example"
7
- failed=0
8
 
9
  echo "Not checking h100 -- already know it's not there."
10
  #python /check_h100.py
11
  echo "Attempting to run."
12
  #if [[ $? = 0 ]]; then
13
 
14
- python /parse_requests.py | while read line; do
15
- IFS="," read backend_model experiment_name <<< $(echo ${line})
 
 
16
  echo "Benchmarking Model: ${backend_model}, Task: ${experiment_name}"
 
 
17
  now=$(date +%Y-%m-%d-%H-%M-%S)
18
- export run_dir="./runs/${experiment_name}/${backend_model}/${now}"
19
- mkdir -p $run_dir
20
 
21
  # Let the benchmarking begin!
22
- optimum-benchmark --config-name ${experiment_name} --config-dir /optimum-benchmark/examples/energy_star/ backend.model=${backend_model} backend.processor=${backend_model} hydra.run.dir=${run_dir} 2> $run_dir/error.log
 
 
 
 
 
 
 
 
 
 
 
23
  done || {
24
- echo "Error."
25
- failed=1
 
 
26
  }
27
 
28
  echo "Uploading all output from the /runs folder."
29
  python /upload_run_folder.py --run_dir "/runs"
30
 
31
- if [ -s $run_dir/error.log ]; then
32
- # error.log is not-empty, an error was raised
33
- echo "An error was raised while benchmarking the model..."
34
- python /failed_run.py --run_dir $run_dir --model_name $backend_model
35
- # TODO: Is this necessary?
36
- # Delete the current run directory so that it is not pushed by create_results.py later
37
- rm -rf $run_dir
38
- elif [ "$failed" -eq 1 ]; then
39
- echo "Failed, but was not able to retrieve error log."
40
- else
41
- # The error log file is empty, and we didn't catch an error.
42
- echo "Finished; uploading dataset results"
43
- python /create_results.py ./runs
44
- fi
45
-
46
  # Pausing space
47
  echo "Pausing space."
48
  python /pause_space.py
 
4
  set -e
5
 
6
  export SPACE="EnergyStarAI/launch-computation-example"
 
7
 
8
  echo "Not checking h100 -- already know it's not there."
9
  #python /check_h100.py
10
  echo "Attempting to run."
11
  #if [[ $? = 0 ]]; then
12
 
13
+ # For each line in the requests dataset....
14
+ python /parse_requests.py | while read -r line; do
15
+ # Read the name of the model and the experiment.
16
+ IFS="," read backend_model experiment_name <<< "${line}"
17
  echo "Benchmarking Model: ${backend_model}, Task: ${experiment_name}"
18
+
19
+ # Initialize the directory for output.
20
  now=$(date +%Y-%m-%d-%H-%M-%S)
21
+ run_dir="./runs/${experiment_name}/${backend_model}/${now}"
22
+ mkdir -p "$run_dir"
23
 
24
  # Let the benchmarking begin!
25
+ optimum-benchmark --config-name "${experiment_name}" --config-dir /optimum-benchmark/examples/energy_star/ backend.model="${backend_model}" backend.processor="${backend_model}" hydra.run.dir="${run_dir}" 2> "${run_dir}/error.log"
26
+
27
+ # Either mark that the benchmark FAILED, or upload the results.
28
+ if [ -s "${run_dir}/error.log" ]; then
29
+ # error.log is not-empty, an error was raised
30
+ echo "An error was raised while benchmarking the model..."
31
+ python /failed_run.py --run_dir "${run_dir}" --model_name "${backend_model}"
32
+ else
33
+ # The error log file is empty, and we didn't catch an error.
34
+ echo "Finished; uploading dataset results"
35
+ python /create_results.py ./runs
36
+ fi
37
  done || {
38
+ # Catch any errors that get thrown; update the requests dataset to FAILED
39
+ # based on the last-read run_dir and backend_model.
40
+ echo "Error."
41
+ python /failed_run.py --run_dir "${run_dir}" --model_name "${backend_model}"
42
  }
43
 
44
  echo "Uploading all output from the /runs folder."
45
  python /upload_run_folder.py --run_dir "/runs"
46
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
  # Pausing space
48
  echo "Pausing space."
49
  python /pause_space.py
failed_run.py CHANGED
@@ -23,13 +23,7 @@ parser.add_argument(
23
  required=True,
24
  help="Model to benchmark.",
25
  )
26
- parser.add_argument(
27
- "--logs_name",
28
- default=None,
29
- type=str,
30
- required=False,
31
- help="Location of space runtime error log -- note this is distinct from an optimum-benchmark log.",
32
- )
33
  args = parser.parse_args()
34
 
35
  # Updating request
@@ -49,22 +43,6 @@ try:
49
  except FileNotFoundError as e:
50
  print(f"Could not find {args.run_dir}/error.log")
51
 
52
-
53
-
54
- print("Status set to FAILED")
55
-
56
- if args.logs_name:
57
- print("Attempting to save space runtime error log at EnergyStarAI/error_logs")
58
- try:
59
- api.upload_file(
60
- path_or_fileobj=args.error_log,
61
- path_in_repo=args.error_log,
62
- repo_id="EnergyStarAI/error_logs",
63
- repo_type="dataset",
64
- )
65
- dataset.loc[dataset["model"].isin(args.model_name), ['status']] = "FAILED"
66
- updated_dataset = Dataset.from_pandas(dataset)
67
- updated_dataset.push_to_hub("EnergyStarAI/requests_debug", split="test", token=TOKEN)
68
- except Exception as e:
69
- print("That didn't work. Error:")
70
- print(e)
 
23
  required=True,
24
  help="Model to benchmark.",
25
  )
26
+
 
 
 
 
 
 
27
  args = parser.parse_args()
28
 
29
  # Updating request
 
43
  except FileNotFoundError as e:
44
  print(f"Could not find {args.run_dir}/error.log")
45
 
46
+ updated_dataset = Dataset.from_pandas(dataset)
47
+ updated_dataset.push_to_hub("EnergyStarAI/requests_debug", split="test", token=TOKEN)
48
+ print("Status set to FAILED")