diff --git a/examples/unified/.gitignore b/examples/unified/.gitignore new file mode 100644 index 0000000..acef31e --- /dev/null +++ b/examples/unified/.gitignore @@ -0,0 +1,5 @@ +/cfsctl +/extra/usr/lib/dracut/modules.d/37composefs/composefs-pivot-sysroot +/fix-verity.efi +/image.qcow2 +/tmp/ diff --git a/examples/unified/Containerfile b/examples/unified/Containerfile new file mode 100644 index 0000000..3073ed5 --- /dev/null +++ b/examples/unified/Containerfile @@ -0,0 +1,48 @@ +# Need 6.12 kernel from rawhide +FROM fedora:rawhide AS base +COPY extra / +COPY cfsctl /usr/bin +RUN --mount=type=cache,target=/var/cache/libdnf5 < /etc/kernel/cmdline +EOF +RUN --mount=type=cache,target=/var/cache/libdnf5 < tmp/efi/loader/loader.conf +mkdir -p tmp/efi/EFI/BOOT tmp/efi/EFI/systemd +cp /usr/lib/systemd/boot/efi/systemd-bootx64.efi tmp/efi/EFI/systemd +cp /usr/lib/systemd/boot/efi/systemd-bootx64.efi tmp/efi/EFI/BOOT/BOOTX64.EFI +${CFSCTL} oci prepare-boot "${IMAGE_ID}" tmp/efi + +fakeroot ./make-image +qemu-img convert -f raw tmp/image.raw -O qcow2 image.qcow2 +./fix-verity image.qcow2 # https://github.com/tytso/e2fsprogs/issues/201 diff --git a/examples/unified/empty b/examples/unified/empty new file mode 100644 index 0000000..e69de29 diff --git a/examples/unified/extra/etc/resolv.conf b/examples/unified/extra/etc/resolv.conf new file mode 120000 index 0000000..697ba64 --- /dev/null +++ b/examples/unified/extra/etc/resolv.conf @@ -0,0 +1 @@ +../run/systemd/resolve/stub-resolv.conf \ No newline at end of file diff --git a/examples/unified/extra/usr/lib/dracut/dracut.conf.d/37composefs.conf b/examples/unified/extra/usr/lib/dracut/dracut.conf.d/37composefs.conf new file mode 100644 index 0000000..1defe5d --- /dev/null +++ b/examples/unified/extra/usr/lib/dracut/dracut.conf.d/37composefs.conf @@ -0,0 +1,6 @@ +# we want to make sure the virtio disk drivers get included +hostonly=no + +# we need to force these in via the initramfs because we don't have modules in +# the base image +force_drivers+=" virtio_net vfat " diff --git a/examples/unified/extra/usr/lib/dracut/modules.d/37composefs/composefs-pivot-sysroot.service b/examples/unified/extra/usr/lib/dracut/modules.d/37composefs/composefs-pivot-sysroot.service new file mode 100644 index 0000000..3ba0562 --- /dev/null +++ b/examples/unified/extra/usr/lib/dracut/modules.d/37composefs/composefs-pivot-sysroot.service @@ -0,0 +1,34 @@ +# Copyright (C) 2013 Colin Walters +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library. If not, see . + +[Unit] +DefaultDependencies=no +ConditionKernelCommandLine=composefs +ConditionPathExists=/etc/initrd-release +After=sysroot.mount +Requires=sysroot.mount +Before=initrd-root-fs.target +Before=initrd-switch-root.target + +OnFailure=emergency.target +OnFailureJobMode=isolate + +[Service] +Type=oneshot +ExecStart=/usr/bin/composefs-pivot-sysroot +StandardInput=null +StandardOutput=journal +StandardError=journal+console +RemainAfterExit=yes diff --git a/examples/unified/extra/usr/lib/dracut/modules.d/37composefs/module-setup.sh b/examples/unified/extra/usr/lib/dracut/modules.d/37composefs/module-setup.sh new file mode 100755 index 0000000..c4186c6 --- /dev/null +++ b/examples/unified/extra/usr/lib/dracut/modules.d/37composefs/module-setup.sh @@ -0,0 +1,20 @@ +#!/usr/bin/bash + +check() { + return 0 +} + +depends() { + return 0 +} + +install() { + inst \ + "${moddir}/composefs-pivot-sysroot" /bin/composefs-pivot-sysroot + inst \ + "${moddir}/composefs-pivot-sysroot.service" \ + "${systemdsystemunitdir}/composefs-pivot-sysroot.service" + + $SYSTEMCTL -q --root "${initdir}" add-wants \ + 'initrd-root-fs.target' 'composefs-pivot-sysroot.service' +} diff --git a/examples/unified/extra/usr/lib/kernel/install.conf.d/37composefs.conf b/examples/unified/extra/usr/lib/kernel/install.conf.d/37composefs.conf new file mode 100644 index 0000000..4d12c4e --- /dev/null +++ b/examples/unified/extra/usr/lib/kernel/install.conf.d/37composefs.conf @@ -0,0 +1,2 @@ +layout = uki +uki_generator = ukify diff --git a/examples/unified/extra/usr/lib/systemd/network/37-wired.network b/examples/unified/extra/usr/lib/systemd/network/37-wired.network new file mode 100644 index 0000000..e4e05fd --- /dev/null +++ b/examples/unified/extra/usr/lib/systemd/network/37-wired.network @@ -0,0 +1,9 @@ +[Match] +Type=ether + +[Link] +RequiredForOnline=routable + +[Network] +DHCP=yes + diff --git a/examples/unified/extra/usr/lib/systemd/system/systemd-growfs-root.service.d/37-composefs.conf b/examples/unified/extra/usr/lib/systemd/system/systemd-growfs-root.service.d/37-composefs.conf new file mode 100644 index 0000000..c387c18 --- /dev/null +++ b/examples/unified/extra/usr/lib/systemd/system/systemd-growfs-root.service.d/37-composefs.conf @@ -0,0 +1,6 @@ +# Make sure we grow the right root filesystem + +[Service] +ExecStart= +ExecStart=/usr/lib/systemd/systemd-growfs /sysroot + diff --git a/examples/unified/fix-verity b/examples/unified/fix-verity new file mode 100755 index 0000000..783a49a --- /dev/null +++ b/examples/unified/fix-verity @@ -0,0 +1,59 @@ +#!/bin/sh + +# workaround for https://github.com/tytso/e2fsprogs/issues/201 + +set -eux + +# We use a custom UKI with an initramfs containing a script that remounts +# /sysroot read-write and enables fs-verity on all of the objects in +# /composefs/objects. +# +# The first time we're run (or if we are modified) we (re-)generate the UKI. +# This is done inside of a container (for independence from the host OS). + +image_file="$1" + +if [ "$0" -nt fix-verity.efi ]; then + podman run --rm -i fedora > tmp/fix-verity.efi <<'EOF' + set -eux + + cat > /tmp/fix-verity.sh <<'EOS' + mount -o remount,rw /sysroot + ( + cd /sysroot/composefs/objects + echo >&2 'Enabling fsverity on composefs objects' + for i in */*; do + fsverity enable $i; + done + echo >&2 'done!' + ) + umount /sysroot + sync + poweroff -ff +EOS + + ( + dnf --setopt keepcache=1 install -y \ + kernel binutils systemd-boot-unsigned btrfs-progs fsverity-utils + dracut \ + --uefi \ + --no-hostonly \ + --install 'sync fsverity' \ + --include /tmp/fix-verity.sh /lib/dracut/hooks/pre-pivot/fix-verity.sh \ + --kver "$(rpm -q kernel-core --qf '%{VERSION}-%{RELEASE}.%{ARCH}')" \ + --kernel-cmdline="root=PARTLABEL=root-x86-64 console=ttyS0" \ + /tmp/fix-verity.efi + ) >&2 + + cat /tmp/fix-verity.efi +EOF + mv tmp/fix-verity.efi fix-verity.efi +fi + +qemu-system-x86_64 \ + -nographic \ + -m 4096 \ + -enable-kvm \ + -bios /usr/share/edk2/ovmf/OVMF_CODE.fd \ + -drive file="$1",if=virtio,media=disk \ + -kernel fix-verity.efi diff --git a/examples/unified/make-image b/examples/unified/make-image new file mode 100755 index 0000000..ff05a0f --- /dev/null +++ b/examples/unified/make-image @@ -0,0 +1,19 @@ +#!/bin/sh + +set -eux + +chown -R 0:0 tmp/sysroot +chcon -R system_u:object_r:usr_t:s0 tmp/sysroot/composefs +chcon system_u:object_r:var_t:s0 tmp/sysroot/var + +> tmp/image.raw +SYSTEMD_REPART_MKFS_OPTIONS_EXT4='-O verity' \ + systemd-repart \ + --empty=require \ + --size=auto \ + --dry-run=no \ + --no-pager \ + --offline=yes \ + --root=tmp \ + --definitions=repart.d \ + tmp/image.raw diff --git a/examples/unified/repart.d/01-esp.conf b/examples/unified/repart.d/01-esp.conf new file mode 100644 index 0000000..67f93e1 --- /dev/null +++ b/examples/unified/repart.d/01-esp.conf @@ -0,0 +1,6 @@ +[Partition] +Type=esp +Format=vfat +CopyFiles=/efi:/ +SizeMinBytes=512M +SizeMaxBytes=512M diff --git a/examples/unified/repart.d/02-sysroot.conf b/examples/unified/repart.d/02-sysroot.conf new file mode 100644 index 0000000..65f289e --- /dev/null +++ b/examples/unified/repart.d/02-sysroot.conf @@ -0,0 +1,6 @@ +[Partition] +Type=root +Format=ext4 +SizeMinBytes=10G +SizeMaxBytes=10G +CopyFiles=/sysroot:/ diff --git a/examples/unified/run b/examples/unified/run new file mode 100755 index 0000000..5742835 --- /dev/null +++ b/examples/unified/run @@ -0,0 +1,12 @@ +#!/bin/sh + +set -eux + +cd "${0%/*}" + +qemu-system-x86_64 \ + -m 4096 \ + -enable-kvm \ + -bios /usr/share/edk2/ovmf/OVMF_CODE.fd \ + -drive file=image.qcow2,if=virtio,cache=unsafe \ + -nic user,model=virtio-net-pci diff --git a/src/bin/cfsctl.rs b/src/bin/cfsctl.rs index 2437b2b..d62a03e 100644 --- a/src/bin/cfsctl.rs +++ b/src/bin/cfsctl.rs @@ -73,7 +73,9 @@ enum Command { /// Perform garbage collection GC, /// Imports a composefs image (unsafe!) - ImportImage { reference: String }, + ImportImage { + reference: String, + }, /// Commands for dealing with OCI layers Oci { #[clap(subcommand)] @@ -86,6 +88,12 @@ enum Command { /// the mountpoint mountpoint: String, }, + CreateImage { + path: PathBuf, + }, + CreateDumpfile { + path: PathBuf, + }, } fn main() -> Result<()> { @@ -165,6 +173,13 @@ fn main() -> Result<()> { oci::prepare_boot(&repo, name, None, &output)?; } }, + Command::CreateImage { ref path } => { + let image_id = composefs::fs::create_image(path, Some(&repo))?; + println!("{}", hex::encode(image_id)); + } + Command::CreateDumpfile { ref path } => { + composefs::fs::create_dumpfile(path)?; + } Command::Mount { name, mountpoint } => { repo.mount(&name, &mountpoint)?; } diff --git a/src/fs.rs b/src/fs.rs index cab30ff..5e7b08c 100644 --- a/src/fs.rs +++ b/src/fs.rs @@ -1,16 +1,31 @@ -use std::{ffi::OsStr, mem::MaybeUninit, path::Path}; +use std::{ + cell::RefCell, + collections::{BTreeMap, HashMap}, + ffi::OsString, + ffi::{CStr, OsStr}, + mem::MaybeUninit, + os::unix::ffi::{OsStrExt, OsStringExt}, + path::Path, + rc::Rc, +}; -use anyhow::Result; +use anyhow::{bail, ensure, Result}; use rustix::{ - fd::OwnedFd, - fs::{fdatasync, linkat, mkdirat, mknodat, openat, symlinkat, AtFlags, FileType, OFlags, CWD}, + fd::{AsFd, OwnedFd}, + fs::{ + fdatasync, fstat, getxattr, linkat, listxattr, mkdirat, mknodat, openat, readlinkat, + symlinkat, AtFlags, Dir, FileType, Mode, OFlags, CWD, + }, io::{read_uninit, write, Errno}, }; use crate::{ - image::{DirEnt, Directory, Inode, Leaf, LeafContent, Stat}, + fsverity::{digest::FsVerityHasher, Sha256HashValue}, + image::{DirEnt, Directory, FileSystem, Inode, Leaf, LeafContent, Stat}, repository::Repository, + selabel::selabel, util::proc_self_fd, + INLINE_CONTENT_MAX, }; fn set_file_contents(dirfd: &OwnedFd, name: &OsStr, stat: &Stat, data: &[u8]) -> Result<()> { @@ -97,7 +112,217 @@ fn write_directory_contents(dir: &Directory, fd: &OwnedFd, repo: &Repository) -> Ok(()) } +// NB: hardlinks not supported pub fn write_to_path(repo: &Repository, dir: &Directory, output_dir: &Path) -> Result<()> { let fd = openat(CWD, output_dir, OFlags::PATH | OFlags::DIRECTORY, 0.into())?; write_directory_contents(dir, &fd, repo) } + +pub struct FilesystemReader<'repo> { + st_dev: u64, + repo: Option<&'repo Repository>, + inodes: HashMap>, + root_mtime: i64, +} + +impl<'repo> FilesystemReader<'repo> { + fn read_xattrs(&mut self, fd: &OwnedFd) -> Result, Box<[u8]>>> { + // flistxattr() and fgetxattr() don't with with O_PATH fds, so go via /proc/self/fd. Note: + // we want the symlink-following version of this call, which produces the correct behaviour + // even when trying to read xattrs from symlinks themselves. See + // https://gist.github.com/allisonkarlitskaya/7a80f2ebb3314d80f45c653a1ba0e398 + let filename = proc_self_fd(fd); + + let mut xattrs = BTreeMap::new(); + + let names_size = listxattr(&filename, &mut [])?; + let mut names = vec![0; names_size]; + let actual_names_size = listxattr(&filename, &mut names)?; + ensure!( + actual_names_size == names.len(), + "xattrs changed during read" + ); + + let names: Vec = names.into_iter().map(|c| c as u8).collect(); // fml + + let mut buffer = [0; 65536]; + for name in names.split_inclusive(|c| *c == 0) { + let name = CStr::from_bytes_with_nul(name)?; + let value_size = getxattr(&filename, name, &mut buffer)?; + let key = Box::from(OsStr::from_bytes(name.to_bytes())); + let value = Box::from(&buffer[..value_size]); + xattrs.insert(key, value); + } + + Ok(xattrs) + } + + fn stat(&mut self, fd: &OwnedFd, ifmt: FileType) -> Result<(rustix::fs::Stat, Stat)> { + let buf = fstat(fd)?; + + ensure!( + FileType::from_raw_mode(buf.st_mode) == ifmt, + "File type changed + between readdir() and fstat()" + ); + + let mtime = buf.st_mtime as i64; + + if buf.st_dev != self.st_dev { + if self.st_dev == u64::MAX { + self.st_dev = buf.st_dev; + } else { + bail!("Attempting to cross devices while importing filesystem"); + } + } else { + // The root mtime is equal to the most recent mtime of any inode *except* the root + // directory. Because self.st_dev is unset at first, we know we're in this branch only + // if this is the second (or later) inode we process (ie: not the root directory). + if mtime > self.root_mtime { + self.root_mtime = mtime; + } + } + + Ok(( + buf, + Stat { + st_mode: buf.st_mode & 0o7777, + st_uid: buf.st_uid, + st_gid: buf.st_gid, + st_mtim_sec: mtime, + xattrs: RefCell::new(self.read_xattrs(fd)?), + }, + )) + } + + fn read_leaf_content(&mut self, fd: OwnedFd, buf: rustix::fs::Stat) -> Result { + let content = match FileType::from_raw_mode(buf.st_mode) { + FileType::Directory | FileType::Unknown => unreachable!(), + FileType::RegularFile => { + let mut buffer = vec![MaybeUninit::uninit(); buf.st_size as usize]; + let (data, _) = read_uninit(fd, &mut buffer)?; + + if buf.st_size > INLINE_CONTENT_MAX as i64 { + let id = if let Some(repo) = self.repo { + repo.ensure_object(data)? + } else { + FsVerityHasher::hash(data) + }; + LeafContent::ExternalFile(id, buf.st_size as u64) + } else { + LeafContent::InlineFile(Vec::from(data)) + } + } + FileType::Symlink => { + let target = readlinkat(fd, "", [])?; + LeafContent::Symlink(OsString::from_vec(target.into_bytes())) + } + FileType::CharacterDevice => LeafContent::CharacterDevice(buf.st_rdev), + FileType::BlockDevice => LeafContent::BlockDevice(buf.st_rdev), + FileType::Fifo => LeafContent::Fifo, + FileType::Socket => LeafContent::Socket, + }; + Ok(content) + } + + fn read_leaf(&mut self, dirfd: &OwnedFd, name: &OsStr, ifmt: FileType) -> Result> { + let oflags = match ifmt { + FileType::RegularFile => OFlags::RDONLY, + _ => OFlags::PATH, + }; + + let fd = openat( + dirfd, + name, + oflags | OFlags::NOFOLLOW | OFlags::CLOEXEC, + Mode::empty(), + )?; + + let (buf, stat) = self.stat(&fd, ifmt)?; + + if let Some(leafref) = self.inodes.get(&buf.st_ino) { + Ok(Rc::clone(leafref)) + } else { + let content = self.read_leaf_content(fd, buf)?; + let leaf = Rc::new(Leaf { stat, content }); + if buf.st_nlink > 1 { + self.inodes.insert(buf.st_ino, Rc::clone(&leaf)); + } + Ok(leaf) + } + } + + pub fn read_directory(&mut self, dirfd: impl AsFd, name: &OsStr) -> Result { + let fd = openat( + dirfd, + name, + OFlags::RDONLY | OFlags::DIRECTORY | OFlags::NOFOLLOW | OFlags::CLOEXEC, + Mode::empty(), + )?; + + let (_, stat) = self.stat(&fd, FileType::Directory)?; + let mut directory = Directory { + stat, + entries: vec![], + }; + + for item in Dir::read_from(&fd)? { + let entry = item?; + let name = OsStr::from_bytes(entry.file_name().to_bytes()); + + if name == "." || name == ".." { + continue; + } + + let inode = self.read_inode(&fd, name, entry.file_type())?; + directory.insert(name, inode); + } + + Ok(directory) + } + + fn read_inode(&mut self, dirfd: &OwnedFd, name: &OsStr, ifmt: FileType) -> Result { + if ifmt == FileType::Directory { + Ok(Inode::Directory(Box::new( + self.read_directory(dirfd, name)?, + ))) + } else { + Ok(Inode::Leaf(self.read_leaf(dirfd, name, ifmt)?)) + } + } +} + +pub fn read_from_path(path: &Path, repo: Option<&Repository>) -> Result { + let mut reader = FilesystemReader { + repo, + inodes: HashMap::new(), + st_dev: u64::MAX, + root_mtime: 0, + }; + let mut fs = FileSystem { + root: reader.read_directory(CWD, path.as_os_str())?, + }; + fs.root.stat.st_mtim_sec = reader.root_mtime; + + // We can only relabel if we have the repo because we need to read the config and policy files + if let Some(repo) = repo { + selabel(&mut fs, repo)?; + } + + Ok(fs) +} + +pub fn create_image(path: &Path, repo: Option<&Repository>) -> Result { + let fs = read_from_path(path, repo)?; + let image = super::image::mkcomposefs(fs)?; + if let Some(repo) = repo { + Ok(repo.write_image(None, &image)?) + } else { + Ok(FsVerityHasher::hash(&image)) + } +} + +pub fn create_dumpfile(path: &Path) -> Result<()> { + let fs = read_from_path(path, None)?; + super::dumpfile::write_dumpfile(&mut std::io::stdout(), &fs) +}