meg-huggingface commited on
Commit
d023e59
·
1 Parent(s): 41f48cc

Adding AIEnergyStar docker deets

Browse files
Files changed (3) hide show
  1. Dockerfile +46 -11
  2. check_h100.py +15 -0
  3. entrypoint.sh +7 -0
Dockerfile CHANGED
@@ -1,16 +1,51 @@
1
- # Read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
2
- # you will also find guides on how best to write your Dockerfile
3
 
4
- FROM python:3.9
 
 
 
 
 
 
 
5
 
6
- RUN useradd -m -u 1000 user
7
- USER user
8
- ENV PATH="/home/user/.local/bin:$PATH"
9
 
10
- WORKDIR /app
 
 
 
 
 
 
11
 
12
- COPY --chown=user ./requirements.txt requirements.txt
13
- RUN pip install --no-cache-dir --upgrade -r requirements.txt
 
 
 
 
 
 
 
 
14
 
15
- COPY --chown=user . /app
16
- CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM nvidia/cuda:12.2.0-devel-ubuntu22.04
 
2
 
3
+ ARG PYTORCH_VERSION=2.4.0
4
+ ARG PYTHON_VERSION=3.9
5
+ ARG CUDA_VERSION=12.1
6
+ ARG MAMBA_VERSION=24.3.0-0
7
+ ARG CUDA_CHANNEL=nvidia
8
+ ARG INSTALL_CHANNEL=pytorch
9
+ # Automatically set by buildx
10
+ ARG TARGETPLATFORM
11
 
12
+ ENV PATH=/opt/conda/bin:$PATH
 
 
13
 
14
+ RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
15
+ build-essential \
16
+ ca-certificates \
17
+ ccache \
18
+ curl \
19
+ git && \
20
+ rm -rf /var/lib/apt/lists/*
21
 
22
+ # Install conda
23
+ # translating Docker's TARGETPLATFORM into mamba arches
24
+ RUN case ${TARGETPLATFORM} in \
25
+ "linux/arm64") MAMBA_ARCH=aarch64 ;; \
26
+ *) MAMBA_ARCH=x86_64 ;; \
27
+ esac && \
28
+ curl -fsSL -v -o ~/mambaforge.sh -O "https://github.com/conda-forge/miniforge/releases/download/${MAMBA_VERSION}/Mambaforge-${MAMBA_VERSION}-Linux-${MAMBA_ARCH}.sh"
29
+ RUN chmod +x ~/mambaforge.sh && \
30
+ bash ~/mambaforge.sh -b -p /opt/conda && \
31
+ rm ~/mambaforge.sh
32
 
33
+ # Install pytorch
34
+ # On arm64 we exit with an error code
35
+ RUN case ${TARGETPLATFORM} in \
36
+ "linux/arm64") exit 1 ;; \
37
+ *) /opt/conda/bin/conda update -y conda && \
38
+ /opt/conda/bin/conda install -c "${INSTALL_CHANNEL}" -c "${CUDA_CHANNEL}" -y "python=${PYTHON_VERSION}" "pytorch=$PYTORCH_VERSION" "pytorch-cuda=$(echo $CUDA_VERSION | cut -d'.' -f 1-2)" ;; \
39
+ esac && \
40
+ /opt/conda/bin/conda clean -ya
41
+
42
+ COPY requirements.txt requirements.txt
43
+ RUN pip install -r requirements.txt
44
+
45
+ RUN git clone -b energy_star_dev https://github.com/huggingface/optimum-benchmark.git /optimum-benchmark && cd optimum-benchmark && pip install -e .
46
+
47
+ COPY ./check_h100.py /check_h100.py
48
+ COPY ./entrypoint.sh /entrypoint.sh
49
+ RUN chmod +x /entrypoint.sh
50
+
51
+ ENTRYPOINT ["/entrypoint.sh"]
check_h100.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from optimum_benchmark.system_utils import get_gpus
2
+
3
+ if __name__=="__main__":
4
+ # Get the names of all GPU devices
5
+ gpu_name = get_gpus()
6
+
7
+ # If there are several devices, keep the name of device 0
8
+ if isinstance(gpu_name, list):
9
+ gpu_name = gpu_name[0]
10
+
11
+ # Raise an error if the device is not H100
12
+ if "NVIDIA H100" in gpu_name:
13
+ print("At least one NVIDIA H100 GPU has been detected, launching the benchmark...")
14
+ else:
15
+ raise RuntimeError(f"This Docker container should be executed on NVIDIA H100 GPUs only, detected {gpu_name}.")
entrypoint.sh ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ python /launch_backend.py
4
+ #python /check_h100.py
5
+ #if [[ $? = 0 ]]; then
6
+ # optimum-benchmark --config-dir /optimum-benchmark/examples/energy_star/ $@
7
+ #fi