forked from philschmid/deep-learning-pytorch-huggingface
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathDockerfile
More file actions
72 lines (61 loc) · 1.85 KB
/
Copy pathDockerfile
File metadata and controls
72 lines (61 loc) · 1.85 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
# Stage 1: Builder
FROM nvidia/cuda:12.1.0-cudnn8-devel-ubuntu22.04 as builder
# Set environment variables
ENV DEBIAN_FRONTEND=noninteractive
ENV PYTHON_VERSION=3.10
ENV PATH=/opt/conda/bin:$PATH
ENV PDSH_RCMD_TYPE=ssh
# Install basic build dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
wget \
git \
pdsh \
build-essential \
python${PYTHON_VERSION} \
python${PYTHON_VERSION}-dev \
python${PYTHON_VERSION}-venv \
&& rm -rf /var/lib/apt/lists/*
# Create virtual environment
RUN python${PYTHON_VERSION} -m venv /opt/venv
ENV PATH=/opt/venv/bin:$PATH
# Install Python dependencies
RUN python -m pip install --no-cache-dir --upgrade pip && \
pip install --no-cache-dir wheel && \
pip install --no-cache-dir "setuptools<71.0.0"\
torch==2.5.1 \
--index-url https://download.pytorch.org/whl/cu121 && \
pip install --no-cache-dir \
tensorboard \
transformers==4.48.1 \
datasets==3.1.0 \
accelerate==1.3.0 \
hf-transfer==0.1.9 \
deepspeed==0.15.4 \
trl==0.14.0 \
vllm==0.7.0 \
wandb && \
pip uninstall -y pynvml flash-attn && \
pip install --no-cache-dir nvidia-ml-py flash-attn --no-build-isolation
# Stage 2: Runtime
FROM nvidia/cuda:12.1.0-cudnn8-runtime-ubuntu22.04
# Set environment variables
ENV DEBIAN_FRONTEND=noninteractive
ENV PYTHON_VERSION=3.10
ENV PATH=/opt/venv/bin:$PATH
ENV CUDA_HOME="/usr/local/cuda"
ENV PDSH_RCMD_TYPE=ssh
# Install runtime dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
python${PYTHON_VERSION} \
python${PYTHON_VERSION}-venv \
git \
pdsh \
&& rm -rf /var/lib/apt/lists/*
# Copy virtual environment from builder
COPY --from=builder /opt/venv /opt/venv
# Configure pdsh
RUN echo "ssh" > /etc/pdsh/rcmd_default
# Set working directory
WORKDIR /workspace
# Command
CMD ["python3"]