diff --git a/Dockerfile b/Dockerfile index 02906685d..62f56f8e8 100644 --- a/Dockerfile +++ b/Dockerfile @@ -44,7 +44,7 @@ RUN \ # Download H2ORestart from GitHub using a pinned version and hash. Note that # it's available in Debian repos, but not in Bookworm yet. -RUN mkdir /libreoffice_ext && cd libreoffice_ext \ +RUN mkdir /opt/libreoffice_ext && cd /opt/libreoffice_ext \ && H2ORESTART_FILENAME=h2orestart.oxt \ && wget https://github.com/ebandal/H2Orestart/releases/download/$H2ORESTART_VERSION/$H2ORESTART_FILENAME \ && echo "$H2ORESTART_CHECKSUM $H2ORESTART_FILENAME" | sha256sum -c \ @@ -64,18 +64,148 @@ RUN touch /opt/dangerzone/dangerzone/__init__.py # Copy only the Python code, and not any produced .pyc files. COPY conversion/*.py /opt/dangerzone/dangerzone/conversion/ -# Let the entrypoint script write the OCI config for the inner container under -# /config.json. -RUN touch /config.json -RUN chown dangerzone:dangerzone /config.json - -# Switch to the dangerzone user for the rest of the script. -USER dangerzone - # Create a directory that will be used by gVisor as the place where it will # store the state of its containers. RUN mkdir /home/dangerzone/.containers +############################################################################### +# +# REUSING CONTAINER IMAGES: +# Anatomy of a hack +# ======================== +# +# The rest of the Dockerfile aims to do one thing: allow the final container +# image to actually contain two container images; one for the outer container +# (spawned by Podman/Docker Desktop), and one for the inner container (spawned +# by gVisor). +# +# This has already been done in the past, and we explain why and how in the +# design document for gVisor integration (should be in +# `docs/developer/gvisor.md`). In this iteration, we want to also +# achieve the following: +# +# 1. Have a small final image, by sharing some system paths between the inner +# and outer container image using symlinks. +# 2. Allow our security scanning tool to see the contents of the inner +# container image. +# 3. Make the outer container image operational, in the sense that you can use +# `apt` commands and perform a conversion with Dangerzone, outside the +# gVisor sandbox. This is helpful for debugging purposes. +# +# Below we'll explain how our design choices are informed by the above +# sub-goals. +# +# First, to achieve a small container image, we basically need to copy `/etc`, +# `/usr` and `/opt` from the original Dangerzone image to the **inner** +# container image (under `/home/dangerzone/dangerzone-image/rootfs/`) +# +# That's all we need. The rest of the files play no role, and we can actually +# mask them in gVisor's OCI config. +# +# Second, in order to let our security scanner find the installed packages, +# we need to copy the following dirs to the root of the **outer** container +# image: +# * `/etc`, so that the security scanner can detect the image type and its +# sources +# * `/var`, so that the security scanner can have access to the APT database. +# +# IMPORTANT: We don't symlink the `/etc` of the **outer** container image to +# the **inner** one, in order to avoid leaking files like +# `/etc/{hostname,hosts,resolv.conf}` that Podman/Docker mounts when running +# the **outer** container image. +# +# Third, in order to have an operational Debian image, we are _mostly_ covered +# by the dirs we have copied. There's a _rare_ case where during debugging, we +# may want to install a system package that has components in `/etc` and +# `/var`, which will not be available in the **inner** container image. In that +# case, the developer can do the necessary symlinks in the live container. +# +# FILESYSTEM HIERARCHY +# ==================== +# +# The above plan leads to the following filesystem hierarchy: +# +# Outer container image: +# +# # ls -l / +# lrwxrwxrwx 1 root root 7 Jan 27 10:46 bin -> usr/bin +# -rwxr-xr-x 1 root root 7764 Jan 24 08:14 entrypoint.py +# drwxr-xr-x 1 root root 4096 Jan 27 10:47 etc +# drwxr-xr-x 1 root root 4096 Jan 27 10:46 home +# lrwxrwxrwx 1 root root 7 Jan 27 10:46 lib -> usr/lib +# lrwxrwxrwx 1 root root 9 Jan 27 10:46 lib64 -> usr/lib64 +# drwxr-xr-x 2 root root 4096 Jan 27 10:46 root +# drwxr-xr-x 1 root root 4096 Jan 27 10:47 run +# lrwxrwxrwx 1 root root 8 Jan 27 10:46 sbin -> usr/sbin +# drwxrwxrwx 2 root root 4096 Jan 27 10:46 tmp +# lrwxrwxrwx 1 root root 44 Jan 27 10:46 usr -> /home/dangerzone/dangerzone-image/rootfs/usr +# drwxr-xr-x 11 root root 4096 Jan 27 10:47 var +# +# Inner container image: +# +# # ls -l /home/dangerzone/dangerzone-image/rootfs/ +# total 12 +# lrwxrwxrwx 1 root root 7 Jan 27 10:47 bin -> usr/bin +# drwxr-xr-x 43 root root 4096 Jan 27 10:46 etc +# lrwxrwxrwx 1 root root 7 Jan 27 10:47 lib -> usr/lib +# lrwxrwxrwx 1 root root 9 Jan 27 10:47 lib64 -> usr/lib64 +# drwxr-xr-x 4 root root 4096 Jan 27 10:47 opt +# drwxr-xr-x 12 root root 4096 Jan 27 10:47 usr +# +# SYMLINKING /USR +# =============== +# +# It's surprisingly difficult (maybe even borderline impossible), to symlink +# `/usr` to a different path during image build. The problem is that /usr +# is very sensitive, and you can't manipulate it in a live system. That is, I +# haven't found a way to do the following, or something equivalent: +# +# rm -r /usr && ln -s /home/dangerzone/dangerzone-image/rootfs/usr/ /usr +# +# The `ln` binary, even if you specify it by its full path, cannot run +# (probably because `ld-linux.so` can't be found). For this reason, we have +# to create the symlinks beforehand, in a previous build stage. Then, in an +# empty contianer image (scratch images), we can copy these symlinks and the +# /usr, and stich everything together. +############################################################################### + +# Create the filesystem hierarchy that will be used to symlink /usr. + +RUN mkdir /new_root +RUN mkdir /new_root/root /new_root/run /new_root/tmp +RUN chmod 777 /new_root/tmp +RUN ln -s /home/dangerzone/dangerzone-image/rootfs/usr /new_root/usr +RUN ln -s usr/bin /new_root/bin +RUN ln -s usr/lib /new_root/lib +RUN ln -s usr/lib64 /new_root/lib64 +RUN ln -s usr/sbin /new_root/sbin + +## Final image + +FROM scratch + +# Copy the filesystem hierarchy that we created in the previous stage, so that +# /usr can be a symlink. +COPY --from=dangerzone-image /new_root/ / + +# Copy the bare minimum to run Dangerzone in the inner container image. +COPY --from=dangerzone-image /etc/ /home/dangerzone/dangerzone-image/rootfs/etc/ +COPY --from=dangerzone-image /opt/ /home/dangerzone/dangerzone-image/rootfs/opt/ +COPY --from=dangerzone-image /usr/ /home/dangerzone/dangerzone-image/rootfs/usr/ +RUN ln -s usr/bin /home/dangerzone/dangerzone-image/rootfs/bin +RUN ln -s usr/lib /home/dangerzone/dangerzone-image/rootfs/lib +RUN ln -s usr/lib64 /home/dangerzone/dangerzone-image/rootfs/lib64 + +# Copy the bare minimum to let the security scanner find vulnerabilities. +COPY --from=dangerzone-image /etc/ /etc/ +COPY --from=dangerzone-image /var/ /var/ + +# Allow our entrypoint script to make changes in the following folders. +RUN chown dangerzone:dangerzone /home/dangerzone /home/dangerzone/dangerzone-image/ + +# Switch to the dangerzone user for the rest of the script. +USER dangerzone + COPY container_helpers/entrypoint.py / ENTRYPOINT ["/entrypoint.py"] diff --git a/Dockerfile.in b/Dockerfile.in index 2824cf1c1..af03c8924 100644 --- a/Dockerfile.in +++ b/Dockerfile.in @@ -4,7 +4,7 @@ ARG DEBIAN_IMAGE_DATE={{DEBIAN_IMAGE_DATE}} -FROM debian:bookworm-${DEBIAN_IMAGE_DATE}-slim +FROM debian:bookworm-${DEBIAN_IMAGE_DATE}-slim as dangerzone-image ARG GVISOR_ARCHIVE_DATE={{GVISOR_ARCHIVE_DATE}} ARG DEBIAN_ARCHIVE_DATE={{DEBIAN_ARCHIVE_DATE}} @@ -44,7 +44,7 @@ RUN \ # Download H2ORestart from GitHub using a pinned version and hash. Note that # it's available in Debian repos, but not in Bookworm yet. -RUN mkdir /libreoffice_ext && cd libreoffice_ext \ +RUN mkdir /opt/libreoffice_ext && cd /opt/libreoffice_ext \ && H2ORESTART_FILENAME=h2orestart.oxt \ && wget https://github.com/ebandal/H2Orestart/releases/download/$H2ORESTART_VERSION/$H2ORESTART_FILENAME \ && echo "$H2ORESTART_CHECKSUM $H2ORESTART_FILENAME" | sha256sum -c \ @@ -64,18 +64,148 @@ RUN touch /opt/dangerzone/dangerzone/__init__.py # Copy only the Python code, and not any produced .pyc files. COPY conversion/*.py /opt/dangerzone/dangerzone/conversion/ -# Let the entrypoint script write the OCI config for the inner container under -# /config.json. -RUN touch /config.json -RUN chown dangerzone:dangerzone /config.json - -# Switch to the dangerzone user for the rest of the script. -USER dangerzone - # Create a directory that will be used by gVisor as the place where it will # store the state of its containers. RUN mkdir /home/dangerzone/.containers +############################################################################### +# +# REUSING CONTAINER IMAGES: +# Anatomy of a hack +# ======================== +# +# The rest of the Dockerfile aims to do one thing: allow the final container +# image to actually contain two container images; one for the outer container +# (spawned by Podman/Docker Desktop), and one for the inner container (spawned +# by gVisor). +# +# This has already been done in the past, and we explain why and how in the +# design document for gVisor integration (should be in +# `docs/developer/gvisor.md`). In this iteration, we want to also +# achieve the following: +# +# 1. Have a small final image, by sharing some system paths between the inner +# and outer container image using symlinks. +# 2. Allow our security scanning tool to see the contents of the inner +# container image. +# 3. Make the outer container image operational, in the sense that you can use +# `apt` commands and perform a conversion with Dangerzone, outside the +# gVisor sandbox. This is helpful for debugging purposes. +# +# Below we'll explain how our design choices are informed by the above +# sub-goals. +# +# First, to achieve a small container image, we basically need to copy `/etc`, +# `/usr` and `/opt` from the original Dangerzone image to the **inner** +# container image (under `/home/dangerzone/dangerzone-image/rootfs/`) +# +# That's all we need. The rest of the files play no role, and we can actually +# mask them in gVisor's OCI config. +# +# Second, in order to let our security scanner find the installed packages, +# we need to copy the following dirs to the root of the **outer** container +# image: +# * `/etc`, so that the security scanner can detect the image type and its +# sources +# * `/var`, so that the security scanner can have access to the APT database. +# +# IMPORTANT: We don't symlink the `/etc` of the **outer** container image to +# the **inner** one, in order to avoid leaking files like +# `/etc/{hostname,hosts,resolv.conf}` that Podman/Docker mounts when running +# the **outer** container image. +# +# Third, in order to have an operational Debian image, we are _mostly_ covered +# by the dirs we have copied. There's a _rare_ case where during debugging, we +# may want to install a system package that has components in `/etc` and +# `/var`, which will not be available in the **inner** container image. In that +# case, the developer can do the necessary symlinks in the live container. +# +# FILESYSTEM HIERARCHY +# ==================== +# +# The above plan leads to the following filesystem hierarchy: +# +# Outer container image: +# +# # ls -l / +# lrwxrwxrwx 1 root root 7 Jan 27 10:46 bin -> usr/bin +# -rwxr-xr-x 1 root root 7764 Jan 24 08:14 entrypoint.py +# drwxr-xr-x 1 root root 4096 Jan 27 10:47 etc +# drwxr-xr-x 1 root root 4096 Jan 27 10:46 home +# lrwxrwxrwx 1 root root 7 Jan 27 10:46 lib -> usr/lib +# lrwxrwxrwx 1 root root 9 Jan 27 10:46 lib64 -> usr/lib64 +# drwxr-xr-x 2 root root 4096 Jan 27 10:46 root +# drwxr-xr-x 1 root root 4096 Jan 27 10:47 run +# lrwxrwxrwx 1 root root 8 Jan 27 10:46 sbin -> usr/sbin +# drwxrwxrwx 2 root root 4096 Jan 27 10:46 tmp +# lrwxrwxrwx 1 root root 44 Jan 27 10:46 usr -> /home/dangerzone/dangerzone-image/rootfs/usr +# drwxr-xr-x 11 root root 4096 Jan 27 10:47 var +# +# Inner container image: +# +# # ls -l /home/dangerzone/dangerzone-image/rootfs/ +# total 12 +# lrwxrwxrwx 1 root root 7 Jan 27 10:47 bin -> usr/bin +# drwxr-xr-x 43 root root 4096 Jan 27 10:46 etc +# lrwxrwxrwx 1 root root 7 Jan 27 10:47 lib -> usr/lib +# lrwxrwxrwx 1 root root 9 Jan 27 10:47 lib64 -> usr/lib64 +# drwxr-xr-x 4 root root 4096 Jan 27 10:47 opt +# drwxr-xr-x 12 root root 4096 Jan 27 10:47 usr +# +# SYMLINKING /USR +# =============== +# +# It's surprisingly difficult (maybe even borderline impossible), to symlink +# `/usr` to a different path during image build. The problem is that /usr +# is very sensitive, and you can't manipulate it in a live system. That is, I +# haven't found a way to do the following, or something equivalent: +# +# rm -r /usr && ln -s /home/dangerzone/dangerzone-image/rootfs/usr/ /usr +# +# The `ln` binary, even if you specify it by its full path, cannot run +# (probably because `ld-linux.so` can't be found). For this reason, we have +# to create the symlinks beforehand, in a previous build stage. Then, in an +# empty contianer image (scratch images), we can copy these symlinks and the +# /usr, and stich everything together. +############################################################################### + +# Create the filesystem hierarchy that will be used to symlink /usr. + +RUN mkdir /new_root +RUN mkdir /new_root/root /new_root/run /new_root/tmp +RUN chmod 777 /new_root/tmp +RUN ln -s /home/dangerzone/dangerzone-image/rootfs/usr /new_root/usr +RUN ln -s usr/bin /new_root/bin +RUN ln -s usr/lib /new_root/lib +RUN ln -s usr/lib64 /new_root/lib64 +RUN ln -s usr/sbin /new_root/sbin + +## Final image + +FROM scratch + +# Copy the filesystem hierarchy that we created in the previous stage, so that +# /usr can be a symlink. +COPY --from=dangerzone-image /new_root/ / + +# Copy the bare minimum to run Dangerzone in the inner container image. +COPY --from=dangerzone-image /etc/ /home/dangerzone/dangerzone-image/rootfs/etc/ +COPY --from=dangerzone-image /opt/ /home/dangerzone/dangerzone-image/rootfs/opt/ +COPY --from=dangerzone-image /usr/ /home/dangerzone/dangerzone-image/rootfs/usr/ +RUN ln -s usr/bin /home/dangerzone/dangerzone-image/rootfs/bin +RUN ln -s usr/lib /home/dangerzone/dangerzone-image/rootfs/lib +RUN ln -s usr/lib64 /home/dangerzone/dangerzone-image/rootfs/lib64 + +# Copy the bare minimum to let the security scanner find vulnerabilities. +COPY --from=dangerzone-image /etc/ /etc/ +COPY --from=dangerzone-image /var/ /var/ + +# Allow our entrypoint script to make changes in the following folders. +RUN chown dangerzone:dangerzone /home/dangerzone /home/dangerzone/dangerzone-image/ + +# Switch to the dangerzone user for the rest of the script. +USER dangerzone + COPY container_helpers/entrypoint.py / ENTRYPOINT ["/entrypoint.py"] diff --git a/dangerzone/container_helpers/entrypoint.py b/dangerzone/container_helpers/entrypoint.py index 35248b36b..479b268e3 100755 --- a/dangerzone/container_helpers/entrypoint.py +++ b/dangerzone/container_helpers/entrypoint.py @@ -56,14 +56,14 @@ def log(message: str, *values: typing.Any) -> None: {"type": "RLIMIT_NOFILE", "hard": 4096, "soft": 4096}, ], }, - "root": {"path": "/", "readonly": True}, + "root": {"path": "rootfs", "readonly": True}, "hostname": "dangerzone", "mounts": [ # Mask almost every system directory of the outer container, by mounting tmpfs # on top of them. This is done to avoid leaking any sensitive information, # either mounted by Podman/Docker, or when gVisor runs, since we reuse the same # rootfs. We basically mask everything except for `/usr`, `/bin`, `/lib`, - # and `/etc`. + # `/etc`, and `/opt`. # # Note that we set `--root /home/dangerzone/.containers` for the directory where # gVisor will create files at runtime, which means that in principle, we are @@ -153,21 +153,6 @@ def log(message: str, *values: typing.Any) -> None: "source": "tmpfs", "options": ["nosuid", "noexec", "nodev"], }, - # Also mask some files that are usually mounted by Docker / Podman. These files - # should not contain any sensitive information, since we use the `--network - # none` flag, but we want to make sure in any case. - { - "destination": "/etc/hostname", - "type": "bind", - "source": "/dev/null", - "options": ["rbind", "ro"], - }, - { - "destination": "/etc/hosts", - "type": "bind", - "source": "/dev/null", - "options": ["rbind", "ro"], - }, # LibreOffice needs a writable home directory, so just mount a tmpfs # over it. { @@ -219,7 +204,7 @@ def log(message: str, *values: typing.Any) -> None: json.dump(oci_config, sys.stderr, indent=2, sort_keys=True) # json.dump doesn't print a trailing newline, so print one here: log("") -with open("/config.json", "w") as oci_config_out: +with open("/home/dangerzone/dangerzone-image/config.json", "w") as oci_config_out: json.dump(oci_config, oci_config_out, indent=2, sort_keys=True) # Run gVisor. @@ -236,7 +221,7 @@ def log(message: str, *values: typing.Any) -> None: runsc_argv += ["--debug=true", "--alsologtostderr=true"] if os.environ.get("RUNSC_FLAGS"): runsc_argv += [x for x in shlex.split(os.environ.get("RUNSC_FLAGS", "")) if x] -runsc_argv += ["run", "--bundle=/", "dangerzone"] +runsc_argv += ["run", "--bundle=/home/dangerzone/dangerzone-image", "dangerzone"] log( "Running gVisor with command line: {}", " ".join(shlex.quote(s) for s in runsc_argv) ) diff --git a/dangerzone/conversion/doc_to_pixels.py b/dangerzone/conversion/doc_to_pixels.py index 3a073778a..b59e8ca08 100644 --- a/dangerzone/conversion/doc_to_pixels.py +++ b/dangerzone/conversion/doc_to_pixels.py @@ -253,7 +253,7 @@ async def install_libreoffice_ext(self, libreoffice_ext: str) -> None: "unzip", "-d", f"/usr/lib/libreoffice/share/extensions/{libreoffice_ext}/", - f"/libreoffice_ext/{libreoffice_ext}", + f"/opt/libreoffice_ext/{libreoffice_ext}", ] await self.run_command( unzip_args,