-
Notifications
You must be signed in to change notification settings - Fork 14
/
Dockerfile.rhel75gpu
325 lines (281 loc) · 14.7 KB
/
Dockerfile.rhel75gpu
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
FROM registry.access.redhat.com/rhscl/s2i-core-rhel7
MAINTAINER Subin Modeel <[email protected]>
USER root
ENV BUILDER_VERSION 1.0
LABEL io.k8s.description="S2I builder for Tensorflow binaries." \
io.k8s.display-name="Tensorflow BUILD" \
io.openshift.expose-services="8080:http" \
io.openshift.tags="builder,python,tf-build" \
io.openshift.s2i.scripts-url="image:///usr/libexec/s2i"
############################################
## taken from base
## https://hub.docker.com/r/nvidia/cuda/
############################################
RUN NVIDIA_GPGKEY_SUM=d1be581509378368edeec8c1eb2958702feedf3bc3d17011adbf24efacce4ab5 && \
curl -fsSL https://developer.download.nvidia.com/compute/cuda/repos/rhel7/x86_64/7fa2af80.pub | sed '/^Version/d' > /etc/pki/rpm-gpg/RPM-GPG-KEY-NVIDIA && \
echo "$NVIDIA_GPGKEY_SUM /etc/pki/rpm-gpg/RPM-GPG-KEY-NVIDIA" | sha256sum -c --strict -
COPY cuda.repo /etc/yum.repos.d/cuda.repo
ENV CUDA_VERSION 9.2.148
ENV CUDA_PKG_VERSION 9-2-$CUDA_VERSION-1
RUN yum-config-manager --disable qci-1.1-for-rhel-7-beta-rpms
RUN yum install -y \
cuda-cudart-$CUDA_PKG_VERSION && \
ln -s cuda-9.2 /usr/local/cuda && \
rm -rf /var/cache/yum/*
##------------------------
## taken from devel
##------------------------
ENV LIBRARY_PATH /usr/local/cuda/lib64/stubs
##========================
##------------------------
## taken from runtime
##------------------------
ENV CUDNN_VERSION 7.1.4.18
LABEL com.nvidia.cudnn.version="${CUDNN_VERSION}"
##========================
# nvidia-docker 1.0
LABEL com.nvidia.volumes.needed="nvidia_driver"
LABEL com.nvidia.cuda.version="${CUDA_VERSION}"
RUN echo "/usr/local/nvidia/lib" >> /etc/ld.so.conf.d/nvidia.conf && \
echo "/usr/local/nvidia/lib64" >> /etc/ld.so.conf.d/nvidia.conf
ENV PATH /usr/local/nvidia/bin:/usr/local/cuda/bin:${PATH}
ENV LD_LIBRARY_PATH /usr/local/nvidia/lib:/usr/local/nvidia/lib64
# nvidia-container-runtime
ENV NVIDIA_VISIBLE_DEVICES all
ENV NVIDIA_DRIVER_CAPABILITIES compute,utility
ENV NVIDIA_REQUIRE_CUDA "cuda>=9.2"
##------------------------
## taken from tensorflow
##------------------------
ENV LD_LIBRARY_PATH /usr/local/cuda/extras/CUPTI/lib64:$LD_LIBRARY_PATH
ENV TF_CUDA_COMPUTE_CAPABILITIES=3.5,5.2,6.0,6.1,7.0
ENV TF_CUDA_VERSION=9.2
ENV TF_CUDNN_VERSION=7
# NCCL 2.x
ENV TF_NCCL_VERSION=2
##========================
############################################
# DONOT change the location of the following ENVs
ENV CUDA_HOME="/usr/local/cuda"
ENV CUDA_PATH="/usr/local/cuda"
ENV PATH="/usr/local/cuda/bin${PATH:+:${PATH}}"
ENV LD_LIBRARY_PATH="/usr/local/cuda/lib64${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}}";
ENV LD_LIBRARY_PATH="/usr/local/cuda/lib64/stubs/:$LD_LIBRARY_PATH"
# DONOT uncomment. uncomment for dev.
ARG BAZEL_VERSION=0.15.0
ARG NB_PYTHON_VER=3.6
ENV BAZEL_VERSION=$BAZEL_VERSION
ENV PYTHON_VER=$NB_PYTHON_VER
# Not essential, but wise to set the lang
# Note: Users with other languages should set this in their derivative image
ENV LANGUAGE en_US.UTF-8
ENV LANG en_US.UTF-8
ENV LC_ALL=""
ENV PYTHONIOENCODING UTF-8
ENV NB_USER=default
ENV NB_UID=1001
###ENV PYTHON_VER=2.7
ENV PYTHON_BIN_PATH=/usr/bin/python
ENV TINI_VERSION=v0.16.1
## Bazel
ENV PYTHON_LIB_PATH=/usr/lib64/python$PYTHON_VER/site-packages
ENV LD_LIBRARY_PATH="/usr/local/lib${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}}";
ENV BAZELRC /root/.bazelrc
###ENV BAZEL_VERSION 0.15.0
################################################
## Tensorflow ./configure options for Bazel
################################################
ENV PYTHON_BIN_PATH=/usr/bin/python
ENV CC_OPT_FLAGS -march=native
ENV TF_NEED_JEMALLOC 1
ENV TF_NEED_GCP 0
ENV TF_NEED_VERBS 0
ENV TF_NEED_HDFS 0
ENV TF_ENABLE_XLA 0
ENV TF_NEED_OPENCL 0
ENV TF_NEED_CUDA 1
ENV TF_NEED_MPI 0
ENV TF_NEED_GDR 0
ENV TF_NEED_S3 0
ENV TF_NEED_KAFKA 0
ENV TF_NEED_OPENCL_SYCL 0
ENV TF_DOWNLOAD_CLANG 0
ENV TF_SET_ANDROID_WORKSPACE 0
ENV PATH /usr/local/bin:$PATH:/home/default/.local/bin
ENV JAVA_HOME /usr/lib/jvm/java-1.8.0-openjdk-1.8.0*
ENV LD_LIBRARY_PATH="/usr/local/nccl-2.2:/usr/local/cuda-9.2${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}}"
ENV TF_CUDA_VERSION=9.2
ENV TF_NEED_CUDA=1
ENV TF_CUDA_COMPUTE_CAPABILITIES=3.5,5.2,6.0,6.1,7.0
ENV TF_NEED_TENSORRT=0
ENV NCCL_INSTALL_PATH=/usr/local/nccl-2.2
#bazel build -c opt --config=cuda --cxxopt="-D_GLIBCXX_USE_CXX11_ABI=0" tensorflow/tools/pip_package:build_pip_package
RUN echo 'PS1="\u@\h:\w\\$ \[$(tput sgr0)\]"' >> /root/.bashrc \
&& yum install -y yum-utils \
&& prepare-yum-repositories rhel-server-rhscl-7-rpms \
&& if [ "$PYTHON_VER" = "2.7" ] ; then yum install -y scl-utils python27 python-devel python27-python-devel && scl enable python27 bash ; fi \
&& if [ "$PYTHON_VER" = "3.5" ] ; then yum install -y scl-utils rh-python35 rh-python35-python-pip rh-python35-scldevel && scl enable rh-python35 bash ; fi \
&& if [ "$PYTHON_VER" = "3.6" ] ; then yum install -y scl-utils rh-python36 rh-python36-python-pip rh-python36-scldevel && scl enable rh-python36 bash ; fi \
&& echo "-----IMAGE_TEST--------" \
&& if [ "$PYTHON_VER" = "3.5" ] ; then ls -l /opt/rh/rh-python35/root/usr/bin ; fi \
&& if [ "$PYTHON_VER" = "3.6" ] ; then ls -l /opt/rh/rh-python36/root/usr/bin ; fi \
&& echo "which_python="`which python` \
&& echo "link_which_python=`ls -l $(which python) | awk '{print $9 $10 $11}'`" \
&& echo "link_bin_python=`ls -l /usr/bin/python |awk '{print $9 $10 $11}'`" \
&& echo "which_pip="`which pip` \
&& echo "link_which_pip=`ls -l $(which pip) | awk '{print $9 $10 $11}'`" \
&& echo "PATH=$PATH" \
&& echo "PYTHON_VER=$PYTHON_VER" \
&& echo "LD_LIBRARY_PATH=$LD_LIBRARY_PATH" \
&& echo "PYTHON_LIB_PATH=$PYTHON_LIB_PATH" \
&& echo "-----IMAGE_TEST--------" \
&& yum install -y gcc gcc-c++ glibc-devel openssl-devel gpg \
&& yum install -y which findutils x86info cpuid dmidecode procps \
&& yum install -y kernel-devel make automake autoconf swig zip unzip libtool binutils \
&& yum install -y freetype-devel libpng12-devel zlib-devel giflib-devel zeromq-devel \
&& yum install -y libxml2 libxml2-devel libxslt libxslt-devel gzip \
&& yum install -y java-1.8.0-openjdk java-1.8.0-openjdk-devel patch gdb file pciutils cmake \
&& yum install -y tar tree which git curl wget java-headless bzip2 gnupg2 sqlite protobuf-compiler \
&& echo "-----IMAGE_TEST--------" \
&& echo "which_python="`which python` \
&& echo "link_which_python=`ls -l $(which python) | awk '{print $9 $10 $11}'`" \
&& echo "link_bin_python=`ls -l /usr/bin/python |awk '{print $9 $10 $11}'`" \
&& echo "which_pip="`which pip` \
&& echo "link_which_pip=`ls -l $(which pip) | awk '{print $9 $10 $11}'`" \
&& echo "PATH=$PATH" \
&& echo "PYTHON_VER=$PYTHON_VER" \
&& echo "LD_LIBRARY_PATH=$LD_LIBRARY_PATH" \
&& echo "PYTHON_LIB_PATH=$PYTHON_LIB_PATH" \
&& echo "-----IMAGE_TEST--------" \
#########################
## taken from runtime
#########################
&& yum install -y cuda-libraries-$CUDA_PKG_VERSION cuda-nvtx-$CUDA_PKG_VERSION \
#########################
## taken from devel
#########################
&& yum install -y cuda-libraries-dev-$CUDA_PKG_VERSION cuda-nvml-dev-$CUDA_PKG_VERSION cuda-minimal-build-$CUDA_PKG_VERSION cuda-command-line-tools-$CUDA_PKG_VERSION \
&& rm -rf /var/cache/yum/* \
#########################
## taken from runtime
#########################
&& CUDNN_DOWNLOAD_SUM=f875340f812b942408098e4c9807cb4f8bdaea0db7c48613acece10c7c827101 \
&& curl -fsSL http://developer.download.nvidia.com/compute/redist/cudnn/v7.1.4/cudnn-9.2-linux-x64-v7.1.tgz -O \
&& echo "$CUDNN_DOWNLOAD_SUM cudnn-9.2-linux-x64-v7.1.tgz" | sha256sum -c - \
&& mkdir -p /usr/local/cudnn \
&& tar --no-same-owner -xzf cudnn-9.2-linux-x64-v7.1.tgz -C /usr/local/cudnn --strip-components 1 \
&& ln -s /usr/local/cudnn/include/cudnn.h /usr/local/cuda/include/cudnn.h \
&& cp /usr/local/cudnn/lib64/* /usr/local/cuda/lib64 \
&& rm cudnn-9.2-linux-x64-v7.1.tgz \
&& ln -s /usr/local/cuda/lib64/stubs/libcuda.so /usr/local/cuda/lib64/stubs/libcuda.so.1 \
&& ldconfig \
#########################
## taken from tf
#########################
## https://tech.amikelive.com/node-735/how-to-install-nvidia-collective-communications-library-nccl-2-for-tensorflow-on-ubuntu-16-04/
&& mkdir -p /usr/local/nccl-2.2 \
&& wget http://file.rdu.redhat.com/~smodeel/nccl_2.2.13-1+cuda9.2_x86_64.txz -P /usr/local/nccl-2.2/ \
&& tar -Jxvf /usr/local/nccl-2.2/nccl_2.2.13*.txz -C /usr/local/nccl-2.2/ --strip-components 1 \
&& export LD_LIBRARY_PATH="/usr/local/nccl-2.2:/usr/local/cuda-9.2${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}}" \
&& ln -s /usr/local/nccl-2.2/include/nccl.h /usr/include/nccl.h \
&& ln -s /usr/local/nccl-2.2/include/nccl.h /usr/local/cuda/include/nccl.h \
## Link NCCL libray and header where the build script expects them. \
&& mkdir -p /usr/local/cuda-9.2/lib \
&& ln -s /usr/lib/nccl-2.2/lib/libnccl.so.2 /usr/local/cuda/lib/libnccl.so.2 \
#########################
&& rpm -qf /bin/scl \
&& scl -l \
&& ls -l /usr/bin/python* \
&& chgrp -R root /opt \
&& curl "https://bootstrap.pypa.io/get-pip.py" -o "get-pip.py" \
&& echo $PATH $LD_LIBRARY_PATH \
&& echo $PYTHON_VER \
&& if [ "$PYTHON_VER" = "2.7" ] ; then python$PYTHON_VER get-pip.py ; fi \
&& if [ "$PYTHON_VER" = "3.5" ] ; then ls -l /opt/rh/rh-python35/root/usr/lib64/ && export LD_LIBRARY_PATH="/opt/rh/rh-python35/root/usr/lib64/:/usr/local/lib${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}}" && echo $LD_LIBRARY_PATH && export PYTHON_LIB_PATH=/opt/rh/rh-python35/root/usr/lib/python3.5/site-packages && echo $PYTHON_LIB_PATH && rm -fr /usr/bin/python && ln -s /opt/rh/rh-python35/root/usr/bin/python3.5 /usr/bin/python && ls -l /usr/bin/python* && python get-pip.py && rm -fr /usr/bin/pip && ln -s /opt/rh/rh-python35/root/usr/bin/pip /usr/bin/pip; fi \
&& if [ "$PYTHON_VER" = "3.6" ] ; then ls -l /opt/rh/rh-python36/root/usr/lib64/ && export LD_LIBRARY_PATH="/opt/rh/rh-python36/root/usr/lib64/:/usr/local/lib${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}}" && echo $LD_LIBRARY_PATH && export PYTHON_LIB_PATH=/opt/rh/rh-python36/root/usr/lib/python3.6/site-packages && echo $PYTHON_LIB_PATH && rm -fr /usr/bin/python && ln -s /opt/rh/rh-python36/root/usr/bin/python3.6 /usr/bin/python && ls -l /usr/bin/python* && python get-pip.py && rm -fr /usr/bin/pip && ln -s /opt/rh/rh-python36/root/usr/bin/pip /usr/bin/pip; fi \
&& echo "-----IMAGE_TEST--------" \
&& if [ "$PYTHON_VER" = "3.5" ] ; then echo "PYTHON_H="`rpm -ql rh-python35-python-devel | grep Python.h` ; fi \
&& if [ "$PYTHON_VER" = "3.5" ] ; then ls -l /opt/rh/rh-python35/root/usr/include/ && ls -l /opt/rh/rh-python35/root/usr/lib64/ ; fi \
&& if [ "$PYTHON_VER" = "3.6" ] ; then echo "PYTHON_H="`rpm -ql rh-python36-python-devel | grep Python.h` ; fi \
&& if [ "$PYTHON_VER" = "3.6" ] ; then ls -l /opt/rh/rh-python36/root/usr/include/ && ls -l /opt/rh/rh-python36/root/usr/lib64/ ; fi \
&& echo "which_python="`which python` \
&& echo "link_which_python=`ls -l $(which python) | awk '{print $9 $10 $11}'`" \
&& echo "link_bin_python=`ls -l /usr/bin/python |awk '{print $9 $10 $11}'`" \
&& echo "which_pip="`which pip` \
&& echo "link_which_pip=`ls -l $(which pip) | awk '{print $9 $10 $11}'`" \
&& echo "PATH=$PATH" \
&& echo "LD_LIBRARY_PATH=$LD_LIBRARY_PATH" \
&& echo "PYTHON_LIB_PATH=$PYTHON_LIB_PATH" \
&& echo "PYTHON_VER=$PYTHON_VER" \
&& ls -l /usr/bin/python* \
&& echo "-----IMAGE_TEST--------" \
&& mkdir -p /home/$NB_USER/.ssh \
&& echo 'alias ll="ls -l"' >> /home/$NB_USER/.bashrc \
&& ssh-keyscan pagure.io >> /home/$NB_USER/.ssh/known_hosts \
&& chmod -R a+rwx /opt \
&& chmod a+rw /etc/passwd \
&& wget -q https://github.com/krallin/tini/releases/download/${TINI_VERSION}/tini -P /tmp \
&& wget -q https://github.com/krallin/tini/releases/download/${TINI_VERSION}/tini.asc -P /tmp \
&& cd /tmp \
&& gpg --keyserver ha.pool.sks-keyservers.net --recv-keys 0527A9B7 && gpg --verify /tmp/tini.asc \
&& mv /tmp/tini /usr/local/bin/tini \
&& chmod +x /usr/local/bin/tini \
&& chown -R 1001:1001 /opt/app-root \
&& chgrp -R root /opt/app-root \
&& chmod -R ug+rwx /opt/app-root \
&& echo "startup --batch" >>/etc/.bazelrc \
&& echo "build --spawn_strategy=standalone --genrule_strategy=standalone" >>/etc/.bazelrc
#https://github.com/tensorflow/tensorflow/blob/master/tensorflow/tools/ci_build/install/.bazelrc
# Running bazel inside a `docker build` command causes trouble, cf:
# https://github.com/bazelbuild/bazel/issues/134
# The easiest solution is to set up a bazelrc file forcing --batch.
# Similarly, we need to workaround sandboxing issues:
# https://github.com/bazelbuild/bazel/issues/418
#
# A cloned repo is found under serving folder
#Size of bazel-$BAZEL_VERSION-installer-linux-x86_64.sh is 200MB+
# downloaded file is available under bazel folder
# No yum commands here
# removed python to fix numpy issue
RUN mkdir -p /tf \
&& if [ "$PYTHON_VER" = "3.5" ] ; then export LD_LIBRARY_PATH="/opt/rh/rh-python35/root/usr/lib64/:/usr/local/lib${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}}" ; fi \
&& if [ "$PYTHON_VER" = "3.6" ] ; then export LD_LIBRARY_PATH="/opt/rh/rh-python36/root/usr/lib64/:/usr/local/lib${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}}" ; fi \
&& echo $LD_LIBRARY_PATH \
&& pip install --no-cache-dir --upgrade pip \
&& pip install --no-cache-dir -U setuptools \
&& pip install --no-cache-dir enum34 futures mock numpy six pixiedust pillow pyyaml \
&& mkdir -p /tf/tools \
&& cd /tf/tools \
&& wget -q https://github.com/bazelbuild/bazel/releases/download/$BAZEL_VERSION/bazel-$BAZEL_VERSION-installer-linux-x86_64.sh \
&& ls -l /tf/tools \
&& chmod +x bazel-$BAZEL_VERSION-installer-linux-x86_64.sh \
&& ./bazel-$BAZEL_VERSION-installer-linux-x86_64.sh \
&& bazel \
&& usermod -g root $NB_USER \
&& mkdir -p /workspace \
&& chown $NB_UID:root /workspace \
&& chmod 1777 /workspace \
&& mkdir -p /home/$NB_USER \
&& chown -R $NB_UID:root /home/$NB_USER \
&& chmod g+rwX,o+rX -R /home/$NB_USER
COPY ./s2i/bin/ /usr/libexec/s2i
ADD test/ /tmp/test/
ADD entrypoint /entrypoint
RUN chmod +x /entrypoint
ADD build_tools /build_tools
# NO CLEANUP
# Donot add below commands
# && yum erase -y gcc gcc-c++ glibc-devel \
# && yum clean all -y \
EXPOSE 8080
ENV HOME /home/$NB_USER
# This default user is created in the openshift/base-centos7 image
USER 1001
# Make the default PWD somewhere that the user can write. This is
# useful when connecting with 'oc run' and starting a 'spark-shell',
# which will likely try to create files and directories in PWD and
# error out if it cannot.
WORKDIR /workspace
ENTRYPOINT ["/entrypoint"]
# TODO: Set the default CMD for the image
CMD ["/usr/libexec/s2i/usage"]