Skip to content

Commit

Permalink
Merge pull request #88 from uc-cdis/feat/al2
Browse files Browse the repository at this point in the history
making al base image updates to pelican export image
  • Loading branch information
MichaelLukowski authored Dec 5, 2024
2 parents 03118e5 + 244c90f commit d72f681
Show file tree
Hide file tree
Showing 6 changed files with 1,408 additions and 1,367 deletions.
88 changes: 54 additions & 34 deletions export.Dockerfile
Original file line number Diff line number Diff line change
@@ -1,23 +1,54 @@
FROM quay.io/cdis/python:python3.9-buster-2.0.0
ARG AZLINUX_BASE_VERSION=master

# Base stage with python-build-base
FROM quay.io/cdis/python-build-base:${AZLINUX_BASE_VERSION} AS base

ENV appname=pelican

ENV DEBIAN_FRONTEND=noninteractive
# create gen3 user
# Create a group 'gen3' with GID 1000 and a user 'gen3' with UID 1000
RUN groupadd -g 1000 gen3 && \
useradd -m -s /bin/bash -u 1000 -g gen3 gen3

# Install pipx
RUN python3 -m pip install pipx && \
python3 -m pipx ensurepath

USER gen3
# Install Poetry via pipx
RUN pipx install poetry
ENV PATH="/home/gen3/.local/bin:${PATH}"
USER root

WORKDIR /${appname}

# Builder stage
FROM base AS builder

RUN dnf update && dnf install -y \
python3-devel \
gcc \
postgresql-devel

COPY . /${appname}

# cache so that poetry install will run if these files change
COPY poetry.lock pyproject.toml /${appname}/

RUN poetry install -vv --no-interaction --without dev

# Final stage
FROM base

#RUN mkdir -p /usr/share/man/man1
#RUN mkdir -p /usr/share/man/man7
COPY --from=builder /venv /venv
COPY --from=builder /${appname} /${appname}

RUN apt-get update && apt-get install -y --no-install-recommends \
build-essential \
libgnutls30 \
openjdk-11-jre-headless \
# dependency for pyscopg2
libpq-dev \
postgresql-client \
RUN dnf update && dnf install -y \
wget \
unzip \
g++ \
&& rm -rf /var/lib/apt/lists/*
tar \
java-11-amazon-corretto \
gnutls \
&& rm -rf /var/cache/yum

ENV HADOOP_VERSION="3.2.1"
ENV HADOOP_HOME="/hadoop" \
Expand All @@ -27,7 +58,8 @@ RUN wget ${HADOOP_INSTALLATION_URL} \
&& mkdir -p $HADOOP_HOME \
&& tar -xvf hadoop-${HADOOP_VERSION}.tar.gz -C ${HADOOP_HOME} --strip-components 1 \
&& rm hadoop-${HADOOP_VERSION}.tar.gz \
&& rm -rf $HADOOP_HOME/share/doc
&& rm -rf $HADOOP_HOME/share/doc \
&& chown -R gen3:gen3 $HADOOP_HOME

ENV SQOOP_VERSION="1.4.7"
ENV SQOOP_HOME="/sqoop" \
Expand All @@ -39,12 +71,13 @@ RUN wget -q ${SQOOP_INSTALLATION_URL} \
&& mkdir -p $SQOOP_HOME \
&& tar -xvf sqoop-${SQOOP_VERSION}.bin__hadoop-2.6.0.tar.gz -C ${SQOOP_HOME} --strip-components 1 \
&& rm sqoop-${SQOOP_VERSION}.bin__hadoop-2.6.0.tar.gz \
&& rm -rf $SQOOP_HOME/docs
&& rm -rf $SQOOP_HOME/docs \
&& chown -R gen3:gen3 $SQOOP_HOME

ENV POSTGRES_JAR_VERSION="42.2.9"
ENV POSTGRES_JAR_URL="https://jdbc.postgresql.org/download/postgresql-${POSTGRES_JAR_VERSION}.jar" \
POSTGRES_JAR_PATH=$SQOOP_HOME/lib/postgresql-${POSTGRES_JAR_VERSION}.jar \
JAVA_HOME="/usr/lib/jvm/java-11-openjdk-amd64"
JAVA_HOME="/usr/lib/jvm/java-11-amazon-corretto"

RUN wget ${POSTGRES_JAR_URL} -O ${POSTGRES_JAR_PATH}

Expand All @@ -63,25 +96,12 @@ ENV HADOOP_CONF_DIR="$HADOOP_HOME/etc/hadoop" \

RUN mkdir -p $ACCUMULO_HOME $HIVE_HOME $HBASE_HOME $HCAT_HOME $ZOOKEEPER_HOME

ENV PATH=${SQOOP_HOME}/bin:${HADOOP_HOME}/sbin:$HADOOP_HOME/bin:${JAVA_HOME}/bin:${PATH}

WORKDIR /pelican

RUN pip install --upgrade pip
RUN chown -R gen3:gen3 $ACCUMULO_HOME $HIVE_HOME $HBASE_HOME $HCAT_HOME $ZOOKEEPER_HOME $JAVA_HOME $POSTGRES_JAR_PATH

# install poetry
RUN pip install --upgrade "poetry<1.2"

COPY . /$appname
WORKDIR /$appname

# cache so that poetry install will run if these files change
COPY poetry.lock pyproject.toml /$appname/
ENV PATH=${SQOOP_HOME}/bin:${HADOOP_HOME}/sbin:$HADOOP_HOME/bin:${JAVA_HOME}/bin:${PATH}

# install package and dependencies via poetry
RUN poetry config virtualenvs.create false \
&& poetry install -vv --no-dev --no-interaction \
&& poetry show -v
# Switch to non-root user 'gen3' for the serving process
USER gen3

ENV PYTHONUNBUFFERED=1

Expand Down
89 changes: 54 additions & 35 deletions import.Dockerfile
Original file line number Diff line number Diff line change
@@ -1,23 +1,54 @@
FROM quay.io/cdis/python:python3.9-buster-2.0.0
ARG AZLINUX_BASE_VERSION=master

# Base stage with python-build-base
FROM quay.io/cdis/python-build-base:${AZLINUX_BASE_VERSION} AS base

ENV appname=pelican

ENV DEBIAN_FRONTEND=noninteractive
# create gen3 user
# Create a group 'gen3' with GID 1000 and a user 'gen3' with UID 1000
RUN groupadd -g 1000 gen3 && \
useradd -m -s /bin/bash -u 1000 -g gen3 gen3

# Install pipx
RUN python3 -m pip install pipx && \
python3 -m pipx ensurepath

USER gen3
# Install Poetry via pipx
RUN pipx install poetry
ENV PATH="/home/gen3/.local/bin:${PATH}"
USER root

WORKDIR /${appname}

# Builder stage
FROM base AS builder

RUN dnf update && dnf install -y \
python3-devel \
gcc \
postgresql-devel

COPY . /${appname}

# cache so that poetry install will run if these files change
COPY poetry.lock pyproject.toml /${appname}/

#RUN mkdir -p /usr/share/man/man1
#RUN mkdir -p /usr/share/man/man7
RUN poetry install -vv --no-interaction --without dev

RUN apt-get update && apt-get install -y --no-install-recommends \
build-essential \
libgnutls30 \
openjdk-11-jre-headless \
# dependency for pyscopg2
libpq-dev \
postgresql-client \
# Final stage
FROM base

COPY --from=builder /venv /venv
COPY --from=builder /${appname} /${appname}

RUN dnf update && dnf install -y \
wget \
unzip \
g++ \
&& rm -rf /var/lib/apt/lists/*
tar \
java-11-amazon-corretto \
gnutls \
&& rm -rf /var/cache/yum

ENV HADOOP_VERSION="3.2.1"
ENV HADOOP_HOME="/hadoop" \
Expand All @@ -27,7 +58,8 @@ RUN wget ${HADOOP_INSTALLATION_URL} \
&& mkdir -p $HADOOP_HOME \
&& tar -xvf hadoop-${HADOOP_VERSION}.tar.gz -C ${HADOOP_HOME} --strip-components 1 \
&& rm hadoop-${HADOOP_VERSION}.tar.gz \
&& rm -rf $HADOOP_HOME/share/doc
&& rm -rf $HADOOP_HOME/share/doc \
&& chown -R gen3:gen3 $HADOOP_HOME

ENV SQOOP_VERSION="1.4.7"
ENV SQOOP_HOME="/sqoop" \
Expand All @@ -39,12 +71,13 @@ RUN wget -q ${SQOOP_INSTALLATION_URL} \
&& mkdir -p $SQOOP_HOME \
&& tar -xvf sqoop-${SQOOP_VERSION}.bin__hadoop-2.6.0.tar.gz -C ${SQOOP_HOME} --strip-components 1 \
&& rm sqoop-${SQOOP_VERSION}.bin__hadoop-2.6.0.tar.gz \
&& rm -rf $SQOOP_HOME/docs
&& rm -rf $SQOOP_HOME/docs \
&& chown -R gen3:gen3 $SQOOP_HOME

ENV POSTGRES_JAR_VERSION="42.2.9"
ENV POSTGRES_JAR_URL="https://jdbc.postgresql.org/download/postgresql-${POSTGRES_JAR_VERSION}.jar" \
POSTGRES_JAR_PATH=$SQOOP_HOME/lib/postgresql-${POSTGRES_JAR_VERSION}.jar \
JAVA_HOME="/usr/lib/jvm/java-11-openjdk-amd64"
JAVA_HOME="/usr/lib/jvm/java-11-amazon-corretto"

RUN wget ${POSTGRES_JAR_URL} -O ${POSTGRES_JAR_PATH}

Expand All @@ -63,26 +96,12 @@ ENV HADOOP_CONF_DIR="$HADOOP_HOME/etc/hadoop" \

RUN mkdir -p $ACCUMULO_HOME $HIVE_HOME $HBASE_HOME $HCAT_HOME $ZOOKEEPER_HOME

ENV PATH=${SQOOP_HOME}/bin:${HADOOP_HOME}/sbin:$HADOOP_HOME/bin:${JAVA_HOME}/bin:${PATH}

WORKDIR /pelican
RUN chown -R gen3:gen3 $ACCUMULO_HOME $HIVE_HOME $HBASE_HOME $HCAT_HOME $ZOOKEEPER_HOME $JAVA_HOME $POSTGRES_JAR_PATH

RUN pip install --upgrade pip

# install poetry
RUN pip install --upgrade "poetry<1.2"

COPY . /$appname
WORKDIR /$appname

# copy ONLY poetry artifact, install the dependencies but not fence
# this will make sure than the dependencies is cached
COPY poetry.lock pyproject.toml /$appname/
ENV PATH=${SQOOP_HOME}/bin:${HADOOP_HOME}/sbin:$HADOOP_HOME/bin:${JAVA_HOME}/bin:${PATH}

# install package and dependencies via poetry
RUN poetry config virtualenvs.create false \
&& poetry install -vv --no-dev --no-interaction \
&& poetry show -v
# Switch to non-root user 'gen3' for the serving process
USER gen3

ENV PYTHONUNBUFFERED=1

Expand Down
6 changes: 5 additions & 1 deletion job_export.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,11 @@

if access_format == "guid":
# calculate md5 sum
md5_sum = hashlib.md5()
md5 = (
hashlib.md5()
if sys.version_info < (3, 9)
else hashlib.md5(usedforsecurity=False)
) # nosec
chunk_size = 8192
with open(fname, "rb") as f:
while True:
Expand Down
4 changes: 2 additions & 2 deletions pelican/dictionary.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@
def init_dictionary(url):
d = DataDictionary(url=url)
dictionary.init(d)
# the gdcdatamodel expects dictionary initiated on load, so this can't be
# the gen3datamodel expects dictionary initiated on load, so this can't be
# imported on module level
from gdcdatamodel import models as md
from gen3datamodel import models as md

return d, md

Expand Down
Loading

0 comments on commit d72f681

Please sign in to comment.