File size: 2,673 Bytes
d023e59
41f48cc
d023e59
 
 
 
 
 
 
 
41f48cc
b9e8649
1590827
 
 
 
1cb68d0
28aee03
 
8f16bc5
28aee03
 
497b887
b9e8649
f3f1538
 
 
 
 
 
 
 
 
5d17777
f3f1538
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1bc807f
82c03af
d023e59
da355f5
d023e59
1bc807f
 
 
 
e81427a
c12bfdb
1bc807f
5fade25
960800f
 
 
 
d023e59
 
42a7f14
 
 
 
 
c5729e2
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
FROM nvidia/cuda:12.2.0-devel-ubuntu22.04

ARG PYTORCH_VERSION=2.4.0
ARG PYTHON_VERSION=3.9
ARG CUDA_VERSION=12.1
ARG MAMBA_VERSION=24.3.0-0
ARG CUDA_CHANNEL=nvidia
ARG INSTALL_CHANNEL=pytorch
# Automatically set by buildx
ARG TARGETPLATFORM


#ENV HOME=/home/user \
#	PATH=/home/user/.local/bin:/opt/conda/bin:$PATH

ENV PATH=/opt/conda/bin:$PATH

#RUN mkdir -p .cache
#RUN mkdir -p data
# I'm not sure how to allow later python files used here to write to .cache without making it world-writeable.
#RUN chmod 777 -R .cache
#RUN chmod 777 -R data

    
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
        build-essential \
        ca-certificates \
        ccache \
        curl \
        python3 \
        python3-pip \
        git && \
        rm -rf /var/lib/apt/lists/*

# Install conda
# translating Docker's TARGETPLATFORM into mamba arches
RUN case ${TARGETPLATFORM} in \
         "linux/arm64")  MAMBA_ARCH=aarch64  ;; \
         *)              MAMBA_ARCH=x86_64   ;; \
    esac && \
    curl -fsSL -v -o ~/mambaforge.sh -O  "https://github.com/conda-forge/miniforge/releases/download/${MAMBA_VERSION}/Mambaforge-${MAMBA_VERSION}-Linux-${MAMBA_ARCH}.sh"
RUN chmod +x ~/mambaforge.sh && \
    bash ~/mambaforge.sh -b -p /opt/conda && \
    rm ~/mambaforge.sh

# Install pytorch
# On arm64 we exit with an error code
RUN case ${TARGETPLATFORM} in \
         "linux/arm64")  exit 1 ;; \
         *)              /opt/conda/bin/conda update -y conda &&  \
                         /opt/conda/bin/conda install -c "${INSTALL_CHANNEL}" -c "${CUDA_CHANNEL}" -y "python=${PYTHON_VERSION}" "pytorch=$PYTORCH_VERSION" "pytorch-cuda=$(echo $CUDA_VERSION | cut -d'.' -f 1-2)"  ;; \
    esac && \
    /opt/conda/bin/conda clean -ya

COPY ./requirements.txt requirements.txt
RUN pip install -r requirements.txt

RUN git clone -b energy_star_dev https://github.com/huggingface/optimum-benchmark.git /optimum-benchmark && cd optimum-benchmark && pip install -e .

COPY ./check_h100.py /check_h100.py
COPY ./entrypoint.sh /entrypoint.sh
COPY ./pause_space.py /pause_space.py
COPY ./parse_requests.py /parse_requests.py
COPY ./create_results.py /create_results.py
COPY ./failed_run.py /failed_run.py
COPY ./runs /runs
COPY ./upload_run_folder.py /upload_run_folder.py
COPY ./attempts.txt /attempts.txt
COPY ./failed_attempts.txt /failed_attempts.txt

RUN chmod 777 -R /
RUN chmod +x /entrypoint.sh

# Expose the secret DEBUG at buildtime and use its value as git remote URL
RUN --mount=type=secret,id=DEBUG,mode=0444,required=true \
 git init && \
 git remote add origin $(cat /run/secrets/DEBUG)

ENTRYPOINT ["/entrypoint.sh"]