meg HF staff commited on
Commit
0351a9f
·
verified ·
1 Parent(s): 07f5bdd

Experimenting.

Browse files
Files changed (1) hide show
  1. entrypoint.sh +14 -6
entrypoint.sh CHANGED
@@ -4,6 +4,7 @@
4
  set -e
5
 
6
  export SPACE="EnergyStarAI/launch-computation-example"
 
7
 
8
  echo "Not checking h100 -- already know it's not there."
9
  #python /check_h100.py
@@ -14,11 +15,15 @@ python /parse_requests.py | while read line; do
14
  IFS="," read backend_model experiment_name <<< $(echo ${line})
15
  echo "Benchmarking Model: ${backend_model}, Task: ${experiment_name}"
16
  now=$(date +%Y-%m-%d-%H-%M-%S)
17
- export run_dir="./runs/" #${experiment_name}/${backend_model}/${now}"
 
 
 
 
18
 
19
- optimum-benchmark --config-name ${experiment_name} --config-dir /optimum-benchmark/examples/energy_star/ backend.model=${backend_model} backend.processor=${backend_model} hydra.run.dir=${run_dir} 2> $run_dir/error-${now}.log
20
  done || {
21
  echo "Error."
 
22
  # TODO: Although this works, `curl` appears to run indefinitely because it is recording itself (the logs are recording the curl operation.)
23
  #echo "Using curl to retrieve the space run log."
24
  #logs_name=./runs/logs-${now}.txt
@@ -26,14 +31,17 @@ done || {
26
  #python /failed_run.py --run_dir $run_dir --model_name $backend_model --logs_name $logs_name
27
  }
28
 
29
- if [ -s $run_dir/error-${now}.log ]; then
30
  # error.log is not-empty, an error was raised
31
  echo "An error was raised while benchmarking the model..."
32
  python /failed_run.py --run_dir $run_dir --model_name $backend_model
33
- # # Delete the current run directory so that it is not pushed by create_results.py later
34
- # rm -rf $run_dir
 
 
 
35
  else
36
- # The file is empty, so no error
37
  echo "Finished; uploading dataset results"
38
  python /create_results.py ./runs
39
  fi
 
4
  set -e
5
 
6
  export SPACE="EnergyStarAI/launch-computation-example"
7
+ failed=0
8
 
9
  echo "Not checking h100 -- already know it's not there."
10
  #python /check_h100.py
 
15
  IFS="," read backend_model experiment_name <<< $(echo ${line})
16
  echo "Benchmarking Model: ${backend_model}, Task: ${experiment_name}"
17
  now=$(date +%Y-%m-%d-%H-%M-%S)
18
+ export run_dir="./runs/${experiment_name}/${backend_model}/${now}"
19
+ mkdir -p $run_dir
20
+
21
+ # Let the benchmarking begin!
22
+ optimum-benchmark --config-name ${experiment_name} --config-dir /optimum-benchmark/examples/energy_star/ backend.model=${backend_model} backend.processor=${backend_model} hydra.run.dir=${run_dir} 2> $run_dir/error.log
23
 
 
24
  done || {
25
  echo "Error."
26
+ failed=1
27
  # TODO: Although this works, `curl` appears to run indefinitely because it is recording itself (the logs are recording the curl operation.)
28
  #echo "Using curl to retrieve the space run log."
29
  #logs_name=./runs/logs-${now}.txt
 
31
  #python /failed_run.py --run_dir $run_dir --model_name $backend_model --logs_name $logs_name
32
  }
33
 
34
+ if [ -s $run_dir/error.log ]; then
35
  # error.log is not-empty, an error was raised
36
  echo "An error was raised while benchmarking the model..."
37
  python /failed_run.py --run_dir $run_dir --model_name $backend_model
38
+ # TODO: Is this necessary?
39
+ # Delete the current run directory so that it is not pushed by create_results.py later
40
+ rm -rf $run_dir
41
+ elif [ "$failed" -eq 1 ]; then
42
+ echo "Failed, but was not able to retrieve error log."
43
  else
44
+ # The error log file is empty, and we didn't catch an error.
45
  echo "Finished; uploading dataset results"
46
  python /create_results.py ./runs
47
  fi