-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathDockerfile_base
91 lines (81 loc) · 3.56 KB
/
Dockerfile_base
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
# This base image comes shipped with java 8 (needed for scala)
FROM openjdk:8-jdk-alpine
# It also comes with Anaconda and Python (needed for PySpark and Zeppelin Python Interpreter)
COPY --from=continuumio/anaconda3:2019.07-alpine / /
# Set env variables
ENV DAEMON_RUN=true
ENV SPARK_VERSION=2.4.5
ENV HADOOP_VERSION=2.7
ENV SCALA_VERSION=2.12.3
ENV SCALA_HOME=/usr/share/scala
ENV SPARK_HOME=/spark
ENV SPARK_OPTS --driver-java-options=-Xms1024M --driver-java-options=-Xmx4096M --driver-java-options=-Dlog4j.logLevel=info
ENV PYTHONPATH $SPARK_HOME/python:$SPARK_HOME/python/lib/py4j-0.10.7-src.zip
ENV ZEPPELIN_HOME=/zeppelin
ENV ZEPPELIN_VERSION=0.8.2
ENV ZEPPELIN_ADDR="0.0.0.0"
# Add additional repo's for apk to use
RUN echo http://mirror.yandex.ru/mirrors/alpine/v3.3/main > /etc/apk/repositories; \
echo http://mirror.yandex.ru/mirrors/alpine/v3.3/community >> /etc/apk/repositories
# Update commands
RUN apk --update add wget tar bash coreutils procps openssl
# Install Scala
RUN apk add --no-cache --virtual=.build-dependencies wget ca-certificates && \
apk add --no-cache bash && \
cd "/tmp" && \
wget "https://downloads.typesafe.com/scala/${SCALA_VERSION}/scala-${SCALA_VERSION}.tgz" && \
tar xzf "scala-${SCALA_VERSION}.tgz" && \
mkdir "${SCALA_HOME}" && \
rm "/tmp/scala-${SCALA_VERSION}/bin/"*.bat && \
mv "/tmp/scala-${SCALA_VERSION}/bin" "/tmp/scala-${SCALA_VERSION}/lib" "${SCALA_HOME}" && \
ln -s "${SCALA_HOME}/bin/"* "/usr/bin/" && \
apk del .build-dependencies && \
rm -rf "/tmp/"*
RUN export PATH="/usr/local/sbt/bin:$PATH" && apk update && apk add ca-certificates wget tar && mkdir -p "/usr/local/sbt"
# Get Apache Spark
RUN wget https://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz
# Install Spark and move it to the folder "/spark" and then add this location to the PATH env variable
RUN tar -xzf spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz && \
mv spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION} /spark && \
rm spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz && \
export PATH=$SPARK_HOME/bin:$PATH
# Install Zeppelin
RUN wget -O zeppelin-${ZEPPELIN_VERSION}-bin-all.tgz http://archive.apache.org/dist/zeppelin/zeppelin-${ZEPPELIN_VERSION}/zeppelin-${ZEPPELIN_VERSION}-bin-all.tgz && \
tar -zxvf zeppelin-${ZEPPELIN_VERSION}-bin-all.tgz && \
rm -rf zeppelin-${ZEPPELIN_VERSION}-bin-all.tgz && \
mkdir -p ${ZEPPELIN_HOME} && \
mv zeppelin-${ZEPPELIN_VERSION}-bin-all/* ${ZEPPELIN_HOME}/ && \
chown -R root:root ${ZEPPELIN_HOME} && \
mkdir -p ${ZEPPELIN_HOME}/logs ${ZEPPELIN_HOME}/run ${ZEPPELIN_HOME}/webapps && \
# Allow process to edit /etc/passwd, to create a user entry for zeppelin
chgrp root /etc/passwd && chmod ug+rw /etc/passwd && \
# Give access to some specific folders
chmod -R 755 "${ZEPPELIN_HOME}/logs" "${ZEPPELIN_HOME}/run" "${ZEPPELIN_HOME}/notebook" "${ZEPPELIN_HOME}/conf" && \
# Allow process to create new folders (e.g. webapps)
chmod 755 ${ZEPPELIN_HOME}
# Install custom packages not included in Anaconda
RUN /opt/conda/bin/conda install -c anaconda -y \
pymongo \
r-base \
r-caret \
r-crayon \
r-devtools \
r-forecast \
r-knitr \
r-ggplot2 \
r-googleVis \
r-hexbin \
r-htmltools \
r-htmlwidgets \
r-irkernel \
r-nycflights13 \
r-randomforest \
r-rcurl \
r-rmarkdown \
r-rodbc \
r-rsqlite \
r-shiny \
r-tidyverse \
unixodbc \
&& /opt/conda/bin/conda clean --all -f -y
ENV PATH="/opt/conda/bin:${PATH}"