Skip to content

Commit

Permalink
Switch base image to Debian Stable
Browse files Browse the repository at this point in the history
Switch base image from Alpine Linux to Debian Stable, in order to reduce
our image footprint, improve our security posture, and build our
container image reproducibly.

Fixes #1046
Refs #1047
  • Loading branch information
apyrgio committed Jan 23, 2025
1 parent 9353965 commit 033ce09
Show file tree
Hide file tree
Showing 2 changed files with 62 additions and 65 deletions.
123 changes: 58 additions & 65 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,78 +1,71 @@
###########################################
# Build PyMuPDF

FROM alpine:latest as pymupdf-build
ARG ARCH
ARG REQUIREMENTS_TXT

# Install PyMuPDF via hash-checked requirements file
COPY ${REQUIREMENTS_TXT} /tmp/requirements.txt

# PyMuPDF provides non-arm musl wheels only.
# Only install build-dependencies if we are actually building the wheel
RUN case "$ARCH" in \
"arm64") \
# This is required for copying later, but is created only in the pre-built wheels
mkdir -p /usr/lib/python3.12/site-packages/PyMuPDF.libs/ \
&& apk --no-cache add linux-headers g++ linux-headers gcc make python3-dev py3-pip clang-dev ;; \
*) \
apk --no-cache add py3-pip ;; \
esac
RUN pip install -vv --break-system-packages --require-hashes -r /tmp/requirements.txt


###########################################
# Download H2ORestart
FROM alpine:latest as h2orestart-dl
ARG H2ORESTART_CHECKSUM=d09bc5c93fe2483a7e4a57985d2a8d0e4efae2efb04375fe4b59a68afd7241e2
# NOTE: Updating the packages to their latest versions requires bumping the
# Dockerfile args below. For more info about this file, read
# docs/developer/reproducibility.md.

ARG DEBIAN_IMAGE_DATE=20250113

FROM debian:bookworm-${DEBIAN_IMAGE_DATE}-slim

ARG GVISOR_ARCHIVE_DATE=20250113
ARG DEBIAN_ARCHIVE_DATE=20250120
ARG H2ORESTART_CHECKSUM=7760dc2963332c50d15eee285933ec4b48d6a1de9e0c0f6082946f93090bd132
ARG H2ORESTART_VERSION=v0.7.0

ENV DEBIAN_FRONTEND=noninteractive

# The following way of installing packages is taken from
# https://github.com/reproducible-containers/repro-sources-list.sh/blob/master/Dockerfile.debian-12,
# and adapted to allow installing gVisor from each own repo as well.
RUN \
--mount=type=cache,target=/var/cache/apt,sharing=locked \
--mount=type=cache,target=/var/lib/apt,sharing=locked \
--mount=type=bind,source=./container_helpers/repro-sources-list.sh,target=/usr/local/bin/repro-sources-list.sh \
--mount=type=bind,source=./container_helpers/gvisor.key,target=/tmp/gvisor.key \
: "Hacky way to set a date for the Debian snapshot repos" && \
touch -d ${DEBIAN_ARCHIVE_DATE} /etc/apt/sources.list.d/debian.sources && \
touch -d ${DEBIAN_ARCHIVE_DATE} /etc/apt/sources.list && \
repro-sources-list.sh && \
: "Setup APT to install gVisor from its separate APT repo" && \
apt-get update && \
apt-get upgrade -y && \
apt-get install -y --no-install-recommends apt-transport-https ca-certificates gnupg && \
gpg -o /usr/share/keyrings/gvisor-archive-keyring.gpg --dearmor /tmp/gvisor.key && \
echo "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/gvisor-archive-keyring.gpg] https://storage.googleapis.com/gvisor/releases ${GVISOR_ARCHIVE_DATE} main" > /etc/apt/sources.list.d/gvisor.list && \
: "Install the necessary gVisor and Dangerzone dependencies" && \
apt-get update && \
apt-get install -y --no-install-recommends \
python3 python3-fitz libreoffice-nogui libreoffice-java-common \
python3 python3-magic default-jre-headless fonts-noto-cjk fonts-dejavu \
runsc unzip wget && \
: "Clean up for improving reproducibility (optional)" && \
rm -rf /var/cache/fontconfig/ && \
rm -rf /etc/ssl/certs/java/cacerts && \
rm -rf /var/log/* /var/cache/ldconfig/aux-cache

# Download H2ORestart from GitHub using a pinned version and hash. Note that
# it's available in Debian repos, but not in Bookworm yet.
RUN mkdir /libreoffice_ext && cd libreoffice_ext \
&& H2ORESTART_FILENAME=h2orestart.oxt \
&& H2ORESTART_VERSION="v0.6.6" \
&& wget https://github.com/ebandal/H2Orestart/releases/download/$H2ORESTART_VERSION/$H2ORESTART_FILENAME \
&& echo "$H2ORESTART_CHECKSUM $H2ORESTART_FILENAME" | sha256sum -c \
&& install -dm777 "/usr/lib/libreoffice/share/extensions/"

&& install -dm777 "/usr/lib/libreoffice/share/extensions/" \
&& rm /root/.wget-hsts

###########################################
# Dangerzone image

FROM alpine:latest

# Install dependencies
RUN apk --no-cache -U upgrade && \
apk --no-cache add \
libreoffice \
openjdk8 \
python3 \
py3-magic \
font-noto-cjk

COPY --from=pymupdf-build /usr/lib/python3.12/site-packages/fitz/ /usr/lib/python3.12/site-packages/fitz
COPY --from=pymupdf-build /usr/lib/python3.12/site-packages/pymupdf/ /usr/lib/python3.12/site-packages/pymupdf
COPY --from=pymupdf-build /usr/lib/python3.12/site-packages/PyMuPDF.libs/ /usr/lib/python3.12/site-packages/PyMuPDF.libs
COPY --from=h2orestart-dl /libreoffice_ext/ /libreoffice_ext

RUN install -dm777 "/usr/lib/libreoffice/share/extensions/"
# Create an unprivileged user both for gVisor and for running Dangerzone.
RUN addgroup --gid 1000 dangerzone
RUN adduser --uid 1000 --ingroup dangerzone --shell /bin/true \
--disabled-password --home /home/dangerzone dangerzone

# Copy Dangerzone's conversion logic under /opt/dangerzone, and allow Python to
# import it.
RUN mkdir -p /opt/dangerzone/dangerzone
RUN touch /opt/dangerzone/dangerzone/__init__.py
COPY conversion /opt/dangerzone/dangerzone/conversion

# Add the unprivileged user. Set the UID/GID of the dangerzone user/group to
# 1000, since we will point to it from the OCI config.
#
# NOTE: A tmpfs will be mounted over /home/dangerzone directory,
# so nothing within it from the image will be persisted.
RUN addgroup -g 1000 dangerzone && \
adduser -u 1000 -s /bin/true -G dangerzone -h /home/dangerzone -D dangerzone

RUN GVISOR_URL="https://storage.googleapis.com/gvisor/releases/release/latest/$(uname -m)"; \
wget "${GVISOR_URL}/runsc" "${GVISOR_URL}/runsc.sha512" && \
sha512sum -c runsc.sha512 && \
rm -f runsc.sha512 && \
chmod 555 runsc && \
mv runsc /usr/bin/
# Copy only the Python code, and not any produced .pyc files.
COPY conversion/*.py /opt/dangerzone/dangerzone/conversion/

# Let the entrypoint script write the OCI config for the inner container under
# /config.json.
RUN touch /config.json
RUN chown dangerzone:dangerzone /config.json

Expand Down
4 changes: 4 additions & 0 deletions dangerzone/conversion/doc_to_pixels.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,10 @@ async def convert(self) -> None:
# At least .odt, .docx, .odg, .odp, .ods, and .pptx
"application/zip": {
"type": "libreoffice",
# NOTE: `file` command < 5.45 cannot detect hwpx files properly, so we
# enable the extension in any case. See also:
# https://github.com/freedomofpress/dangerzone/pull/460#issuecomment-1654166465
"libreoffice_ext": "h2orestart.oxt",
},
# At least .doc, .docx, .odg, .odp, .odt, .pdf, .ppt, .pptx, .xls, and .xlsx
"application/octet-stream": {
Expand Down

0 comments on commit 033ce09

Please sign in to comment.