Skip to content

Commit

Permalink
make multiple uid work
Browse files Browse the repository at this point in the history
  • Loading branch information
NilsIrl committed Aug 16, 2024
1 parent 8462f5c commit dd7277e
Showing 1 changed file with 54 additions and 17 deletions.
71 changes: 54 additions & 17 deletions src/main.zig
Original file line number Diff line number Diff line change
Expand Up @@ -346,17 +346,29 @@ fn check_unprivileged_userns_permissions() void {
}
}

fn umount(path: [*:0]const u8) void {
const umountRet: i64 = @bitCast(std.os.linux.umount(path));
if (umountRet != 0) {
assert(umountRet < 0 and umountRet > -4096);
const errno: std.posix.E = @enumFromInt(-umountRet);
std.debug.panic("Failed to unmount {s}. Errno: {}\n", .{ path, errno });
}
}

pub fn main() !u8 {
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
defer _ = gpa.deinit();
const allocator = gpa.allocator();

// TODO: consider the case where a user can mount the filesystem but isn't root
// We might only need to check for CAP_SYS_ADMIN
// Also in the case where fusermount3 is present this is unnecessary
const euid = std.os.linux.geteuid();
const egid = std.os.linux.getegid();

if (euid != 0) {
// So that fuse filesystems can be mounted without needing fusermount3

const egid = std.os.linux.getegid();

const username = try allocator.dupeZ(u8, std.mem.span((std.c.getpwuid(euid) orelse @panic("couldn't get username")).pw_name orelse @panic("couldn't get username")));
defer allocator.free(username);

Expand Down Expand Up @@ -420,10 +432,6 @@ pub fn main() !u8 {
const set_groups_file = try std.fmt.allocPrint(allocator, "/proc/{}/setgroups", .{pid});
defer allocator.free(set_groups_file);

// NOTE(nilsirl): I don't know what this does. If set to "allow"
// then sometimes /proc/{pid}/gid_map fails to write
try std.fs.cwd().writeFile(.{ .sub_path = set_groups_file, .data = "deny" });

newuidmap(allocator, pid, uid_mappings.items) catch {
std.debug.print("newuidmap failed, falling back to single user mapping\n", .{});
const uid_map_path = try std.fmt.allocPrint(allocator, "/proc/{}/uid_map", .{pid});
Expand All @@ -433,6 +441,8 @@ pub fn main() !u8 {
defer allocator.free(uid_map_content);
std.fs.cwd().writeFile(.{ .sub_path = uid_map_path, .data = uid_map_content }) catch |err| {
if (err == std.posix.WriteError.AccessDenied) {
// TODO: when using newuidmap this may not get hit until
// trying to mount file system
check_unprivileged_userns_permissions();
}
std.debug.panic("error: {}\n", .{err});
Expand All @@ -441,6 +451,11 @@ pub fn main() !u8 {

newgidmap(allocator, pid, gid_mappings.items) catch {
std.debug.print("newgidmap failed, falling back to single group mapping\n", .{});

// must be set for writing to gid_map to succeed (see user_namespaces(7))
// otherwise we want to leave it untouched so that setgroups can be used in the container
try std.fs.cwd().writeFile(.{ .sub_path = set_groups_file, .data = "deny" });

const gid_map_path = try std.fmt.allocPrint(allocator, "/proc/{}/gid_map", .{pid});
defer allocator.free(gid_map_path);

Expand All @@ -459,7 +474,7 @@ pub fn main() !u8 {
if (std.os.linux.W.IFEXITED(wait_result.status)) {
return std.os.linux.W.EXITSTATUS(wait_result.status);
}
std.debug.print("did not exit normally status: {}\n", .{wait_result.status});
std.debug.panic("did not exit normally status: {}\n", .{wait_result.status});
}

std.posix.close(write_fd);
Expand Down Expand Up @@ -520,12 +535,18 @@ pub fn main() !u8 {

const overlayfs_args = [_:null]?[*:0]const u8{ "fuse-overlayfs", "-o", overlayfs_options, mount_dir_path };

// reap the child of fuse-overlayfs so that we can be sure fuse-overlayfs
// has exited before unmounting squashfuse
assert(try std.posix.prctl(std.posix.PR.SET_CHILD_SUBREAPER, .{1}) == 0);
const pid = try std.posix.fork();
if (pid == 0) {
std.process.exit(@intCast(overlayfs_main(overlayfs_args.len, &overlayfs_args)));
_ = overlayfs_main(overlayfs_args.len, &overlayfs_args);
std.debug.panic("unreachable", .{});
}

const wait_pid_result = std.posix.waitpid(pid, 0);
assert(try std.posix.prctl(std.posix.PR.SET_CHILD_SUBREAPER, .{0}) == 0);

if (wait_pid_result.status != 0) {
std.debug.panic("failed to run overlayfs", .{});
}
Expand Down Expand Up @@ -562,20 +583,36 @@ pub fn main() !u8 {
// fails because most users do not have write permission there
assert(c.setenv("XDG_RUNTIME_DIR", "/tmp", 0) == 0);

const ret = c.libcrun_container_run(&crun_context, container, 0, &err);
const pid = try std.posix.fork();
assert(pid >= 0);
if (pid == 0) {
// Run container in a separate process because crun will try to reap
// every child including the fuse-overlayfs process still running
const ret = c.libcrun_container_run(&crun_context, container, 0, &err);

if (err != null) {
std.debug.panic("failed to run container (status/errno: {}) ({d}): {s}\n", .{ err.*.status, ret, err.*.msg });
}

if (err != null) {
std.debug.panic("failed to run container (status/errno: {}) ({d}): {s}\n", .{ err.*.status, ret, err.*.msg });
return @intCast(ret);
}

if (std.os.linux.umount(mount_dir_path) != 0) {
std.debug.print("Failed to unmount {s}\n", .{mount_dir_path});
const retStatus = std.posix.waitpid(pid, 0);
if (!std.posix.W.IFEXITED(retStatus.status)) {
std.debug.panic("container didn't exist normally : {}\n", .{retStatus.status});
}
if (std.os.linux.umount(filesystem_bundle_dir_null) != 0) {
std.debug.print("Failed to unmount {s}\n", .{filesystem_bundle_dir_null});

umount(mount_dir_path);

// wait for overlayfs process to finish so that device is not busy to unmount squashfuse
const overlayfs_status = std.posix.waitpid(-1, 0);
if (!std.posix.W.IFEXITED(overlayfs_status.status) or std.posix.W.EXITSTATUS(overlayfs_status.status) != 0) {
std.debug.panic("overlayfs failed to exit successfully, status: {}\n", .{overlayfs_status.status});
}

// TODO: clean up /tmp
umount(filesystem_bundle_dir_null);

try std.fs.deleteTreeAbsolute(&temp_dir_path);

return @intCast(ret);
return std.posix.W.EXITSTATUS(retStatus.status);
}

0 comments on commit dd7277e

Please sign in to comment.