meg-huggingface commited on
Commit
258cdcb
·
1 Parent(s): 9cfc9cd

Rolling back to 8 hours ago

Browse files
Files changed (2) hide show
  1. entrypoint.sh +3 -3
  2. failed_run.py +22 -31
entrypoint.sh CHANGED
@@ -1,6 +1,6 @@
1
  #!/bin/bash
2
 
3
- export SPACE="AIEnergyScore/launch-computation-example"
4
 
5
  echo "Not checking h100 -- already know it's not there."
6
  #python /check_h100.py
@@ -19,7 +19,7 @@ python /parse_requests.py | while read -r line; do
19
  mkdir -p "$run_dir"
20
 
21
  # Let the benchmarking begin!
22
- optimum-benchmark --config-name "${experiment_name}" --config-dir /optimum-benchmark/examples/energy_star/ backend.model="${backend_model}" backend.processor="${backend_model}" hydra.run.dir="${run_dir}" 2> "${run_dir}/error.log" || (python /failed_run.py --run_dir "${run_dir}" --model_name "${backend_model}" && rm -rf $run_dir) # If the benchmark fails, update accordingly. Remove the run directory for that specific model.
23
  done
24
 
25
  echo "Finished; uploading dataset results"
@@ -31,4 +31,4 @@ python /upload_run_folder.py --run_dir "/runs"
31
  # Pausing space
32
  echo "Pausing space."
33
  python /pause_space.py
34
- echo "Done."
 
1
  #!/bin/bash
2
 
3
+ export SPACE="EnergyStarAI/launch-computation-example"
4
 
5
  echo "Not checking h100 -- already know it's not there."
6
  #python /check_h100.py
 
19
  mkdir -p "$run_dir"
20
 
21
  # Let the benchmarking begin!
22
+ optimum-benchmark --config-name "${experiment_name}" --config-dir /optimum-benchmark/examples/energy_star/ backend.model="${backend_model}" backend.processor="${backend_model}" hydra.run.dir="${run_dir}" 2> "${run_dir}/error.log" || (python /failed_run.py --run_dir "${run_dir}" --model_name "${backend_model}" && rm -rf $run_dir)
23
  done
24
 
25
  echo "Finished; uploading dataset results"
 
31
  # Pausing space
32
  echo "Pausing space."
33
  python /pause_space.py
34
+ echo "Done."
failed_run.py CHANGED
@@ -4,7 +4,6 @@ import os
4
  from datasets import load_dataset, Dataset
5
  from huggingface_hub import HfApi
6
 
7
-
8
  TOKEN = os.environ.get("DEBUG")
9
  api = HfApi(token=TOKEN)
10
 
@@ -23,39 +22,31 @@ parser.add_argument(
23
  required=True,
24
  help="Model to benchmark.",
25
  )
26
- parser.add_argument(
27
- "--reason",
28
- default=None,
29
- type=str,
30
- required=False,
31
- help="Reason for failure -- to update in the requests file",
32
- )
33
 
34
  args = parser.parse_args()
35
 
36
  # Updating request
37
- dataset = load_dataset("AIEnergyScore/requests_debug", split="test", token=TOKEN).to_pandas()
38
-
39
- ## Set benchmark to failed
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
 
41
- # If we have a custom reason for failure, add that instead of generic FAILED.
42
- if args.reason:
43
- dataset.loc[dataset["model"].isin([args.model_name]), ['status']] = args.reason
44
- else:
45
- # TODO: This doesn't have to be try-except, we could actually check if the file is there...
46
- try:
47
- # Read error message
48
- with open(f"{args.run_dir}/error.log", 'r') as file:
49
- for f in file.readlines():
50
- if 'Traceback (most recent call last):' in f:
51
- error_message = f
52
- dataset.loc[dataset["model"].isin([args.model_name]), ['status']] = "FAILED"
53
- print("Status set to FAILED")
54
- else:
55
- dataset.loc[dataset["model"].isin([args.model_name]), ['status']] = "COMPLETED"
56
- # Add a new column for the error message if necessary
57
- except FileNotFoundError as e:
58
- print(f"Could not find {args.run_dir}/error.log")
59
-
60
  updated_dataset = Dataset.from_pandas(dataset)
61
- updated_dataset.push_to_hub("AIEnergyScore/requests_debug", split="test", token=TOKEN)
 
 
4
  from datasets import load_dataset, Dataset
5
  from huggingface_hub import HfApi
6
 
 
7
  TOKEN = os.environ.get("DEBUG")
8
  api = HfApi(token=TOKEN)
9
 
 
22
  required=True,
23
  help="Model to benchmark.",
24
  )
 
 
 
 
 
 
 
25
 
26
  args = parser.parse_args()
27
 
28
  # Updating request
29
+ dataset = load_dataset("EnergyStarAI/requests_debug", split="test",
30
+ token=TOKEN).to_pandas()
31
+
32
+ # Set benchmark to failed
33
+ # TODO: This doesn't have to be try-except, we could actually check if the file is there.
34
+ try:
35
+ # Read error message
36
+ with open(f"{args.run_dir}/error.log", 'r') as file:
37
+ for f in file.readlines():
38
+ if 'Traceback (most recent call last):' in f:
39
+ error_message = f
40
+ dataset.loc[dataset["model"].isin([args.model_name]), [
41
+ 'status']] = "FAILED"
42
+ print("Status set to FAILED")
43
+ else:
44
+ dataset.loc[dataset["model"].isin([args.model_name]), [
45
+ 'status']] = "COMPLETED"
46
+ # Add a new column for the error message if necessary
47
+ except FileNotFoundError as e:
48
+ print(f"Could not find {args.run_dir}/error.log")
49
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
  updated_dataset = Dataset.from_pandas(dataset)
51
+ updated_dataset.push_to_hub("EnergyStarAI/requests_debug", split="test",
52
+ token=TOKEN)