-
Notifications
You must be signed in to change notification settings - Fork 1.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Launch cockpit-session via socket activation on /run/cockpit/session #16808
Changes from all commits
89be87b
4e32648
cb05ee1
110b45f
72bfb6c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -31,7 +31,9 @@ | |
#include <stdio.h> | ||
#include <stdlib.h> | ||
#include <string.h> | ||
#include <sys/socket.h> | ||
#include <sys/stat.h> | ||
#include <sys/un.h> | ||
#include <syslog.h> | ||
#include <unistd.h> | ||
|
||
|
@@ -41,6 +43,39 @@ | |
|
||
#define CLIENT_CERTIFICATE_DIRECTORY "/run/cockpit/tls/clients" | ||
|
||
static int | ||
open_proc_pid (pid_t pid) | ||
{ | ||
char path[100]; | ||
int r = snprintf (path, sizeof path, "/proc/%lu", (unsigned long) pid); | ||
if (r < 0 || r >= sizeof path) | ||
errx (EX, "memory error"); | ||
int fd = open (path, O_DIRECTORY | O_NOFOLLOW | O_PATH | O_CLOEXEC); | ||
if (fd < 0) | ||
err (EX, "failed to open %s", path); | ||
return fd; | ||
} | ||
|
||
static size_t | ||
read_proc_file (int dirfd, const char *name, char *buffer, size_t bufsize) | ||
{ | ||
int fd = openat (dirfd, name, O_RDONLY | O_CLOEXEC); | ||
if (fd < 0) | ||
err (EX, "Failed to open %s proc file", name); | ||
|
||
/* we don't accept/expect EINTR or short reads here: this is /proc, and we don't have | ||
* signal handlers which survive the login */ | ||
ssize_t len = read (fd, buffer, bufsize); | ||
if (len < 0) | ||
err (EX, "Failed to read /proc file %s", name); | ||
|
||
close (fd); | ||
if (len >= bufsize) | ||
errx (EX, "proc file %s exceeds buffer size %zu", name, bufsize); | ||
buffer[len] = '\0'; | ||
Comment on lines
+73
to
+75
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This reads very nicely now. |
||
return len; | ||
} | ||
|
||
/* This is a bit lame, but having a hard limit on peer certificates is | ||
* desirable: Let's not get DoSed by huge certs */ | ||
#define MAX_PEER_CERT_SIZE 100000 | ||
|
@@ -49,31 +84,49 @@ | |
* including "0::" prefix and newline. | ||
* NB: the kernel doesn't allow newlines in cgroup names. */ | ||
static char * | ||
read_proc_self_cgroup (size_t *out_length) | ||
read_proc_pid_cgroup (int dirfd, size_t *out_length) | ||
{ | ||
FILE *fp = fopen ("/proc/self/cgroup", "r"); | ||
char buffer[1024]; | ||
size_t len = read_proc_file (dirfd, "cgroup", buffer, sizeof buffer); | ||
|
||
if (fp == NULL) | ||
if (strncmp (buffer, "0::/", 4) == 0 && /* must be a cgroupsv2 */ | ||
buffer[len - 1] == '\n') /* must end with a newline */ | ||
{ | ||
warn ("Failed to open /proc/self/cgroup"); | ||
exit_init_problem ("internal-error", "Failed to open /proc/self/cgroup"); | ||
*out_length = len; | ||
return strdupx (buffer); | ||
} | ||
|
||
char buffer[1024]; | ||
*out_length = fread (buffer, 1, sizeof (buffer) - 1, fp); | ||
buffer[*out_length] = '\0'; | ||
fclose (fp); | ||
|
||
if (*out_length > 5 && /* at least "0::/\n" */ | ||
*out_length <= sizeof (buffer) - 1 && /* not too big */ | ||
strncmp (buffer, "0::", 3) == 0 && /* must be a cgroupsv2 */ | ||
buffer[*out_length - 1] == '\n') /* must end with a newline */ | ||
return strdupx (buffer); | ||
|
||
warnx ("unexpected cgroups content, certificate matching only supports cgroup v2: '%s'", buffer); | ||
martinpitt marked this conversation as resolved.
Show resolved
Hide resolved
|
||
exit_init_problem ("authentication-unavailable", "certificate matching only supports cgroup v2"); | ||
} | ||
|
||
static | ||
unsigned long long get_proc_pid_start_time (int dirfd) | ||
{ | ||
char buffer[4096]; | ||
read_proc_file (dirfd, "stat", buffer, sizeof buffer); | ||
|
||
/* start time is the token at index 19 after the '(process name)' entry - since only this | ||
* field can contain the ')' character, search backwards for this to avoid malicious | ||
* processes trying to fool us; See proc_pid_stat(5) */ | ||
const char *p = strrchr (buffer, ')'); | ||
if (p == NULL) | ||
errx (EX, "Failed to find process name in /proc/pid/stat: %s", buffer); | ||
for (int i = 0; i <= 19; i++) /* NB: ')' is the first token */ | ||
{ | ||
p = strchr (p, ' '); | ||
if (p == NULL) | ||
errx (EX, "Failed to find start time in /proc/pid/stat"); | ||
++p; /* skip over the space */ | ||
} | ||
|
||
char *endptr; | ||
unsigned long long start_time = strtoull (p, &endptr, 10); | ||
if (*endptr != ' ') | ||
errx (EX, "Failed to parse start time in /proc/pid/stat from %s", p); | ||
return start_time; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I guess this is the kernel so we don't have to worry about bounds-checking the returned value... I think we can't have other error cases here. It's worth noting that There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It actually doesn't -- this was failing initially, before I added the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think we can impose a meaningful bound here. https://man7.org/linux/man-pages/man5/proc_pid_stat.5.html makes no restriction other than monotonicity. |
||
} | ||
|
||
/* valid_256_bit_hex_string: | ||
* @str: a string | ||
* | ||
|
@@ -301,11 +354,54 @@ cockpit_session_client_certificate_map_user (const char *client_certificate_file | |
exit_init_problem ("authentication-unavailable", "No https instance certificate present"); | ||
} | ||
|
||
/* We only support being called via cockpit-session.socket (i.e. Unix socket). | ||
* We need to check the cgroup of cockpit-ws (our peer) */ | ||
struct ucred ucred; | ||
socklen_t ucred_len = sizeof ucred; | ||
if (getsockopt (STDIN_FILENO, SOL_SOCKET, SO_PEERCRED, &ucred, &ucred_len) != 0 || | ||
/* this is an inout parameter, be extra suspicious */ | ||
ucred_len < sizeof ucred) | ||
{ | ||
debug ("failed to read stdin peer credentials: %m; not in socket mode?"); | ||
warnx ("Certificate authentication only supported with cockpit-session.socket"); | ||
exit_init_problem ("authentication-unavailable", "Certificate authentication only supported with cockpit-session.socket"); | ||
} | ||
|
||
/* this is an inout parameter, be extra suspicious */ | ||
assert (ucred_len >= sizeof ucred); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'd handle this in a Conceptually, this feels like part of the return value of that call, so it makes sense to check it in the same place (to me). Feel free to ignore. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No strong opinion. Changed. |
||
debug ("unix socket mode, reading cgroup from peer pid %d", ucred.pid); | ||
int ws_pid_dirfd = open_proc_pid (ucred.pid); | ||
unsigned long long ws_start_time = get_proc_pid_start_time (ws_pid_dirfd); | ||
|
||
int my_pid_dirfd = open_proc_pid (getpid ()); | ||
unsigned long long my_start_time = get_proc_pid_start_time (my_pid_dirfd); | ||
close (my_pid_dirfd); | ||
|
||
debug ("peer start time: %llu, my start time: %llu", ws_start_time, my_start_time); | ||
|
||
/* Guard against pid recycling: If a malicious user captures ws, keeps the socket in a forked child and exits | ||
* the original pid, they can trap a different user to login, get the old pid (pointing ot their cgroup), and | ||
* capture their session. To prevent that, require that ws must have started earlier than ourselves. */ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You are probably already aware, but just in case, in polkit there were CVEs due to comparing processes in this way for authentication purposes, as there are ways for an attacker to control the start timestamp as recorded by the kernel, by holding the forking process at a particular time. See comment at: https://github.com/polkit-org/polkit/blob/main/src/polkit/polkitunixprocess.c#L59 These days we use pid fds which cannot be recycled, and only fallback on heuristics on old kernels. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks for this comment. We discussed this an awful lot. Please see this thread: There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It would be helpful to know a bit more about what the pidfd peercred thing actually solves: does it mean that for as long as the kernel is willing to return that pidfd to us that it then won't reissue that pid? That would be a very big help indeed, but it would also mean that we don't actually need to use the pidfd to gain that benefit... Or (in the case that the process that called I admittedly stopped looking into the polkit code after @martinpitt determined that it wasn't available on many of our target platforms and expressed reservations about implementing the fallback, but it would be useful to know how this ought to work, and I do believe that it will be a worthwhile thing to pursue in the future. In the meantime, though, please understand that this code is only relevant on systems that support logging in with TLS client certificates, and this is only one part of a defence-in-depth strategy:
We also limit the time that the login process is allowed to linger to 60 seconds in an attempt to make it more difficult to wait for a recycled PID. One of the attack scenarios that we consider possible in Cockpit is that one legitimate user could exploit a weakness in the large C code base of There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
A pidfd can never be recycled, when the process exits, it will simply become invalid and can no longer be resolved to a pid, even if a process with the same pid has since appeared. With recent kernels you can use statx() and compare directly two pidfds for equality, we do this in systemd now for example. On slightly older kernels you get two pidfds, you resolve each pidfd -> pid, compare the pid, and then afterwards check again that both pidfds are still valid - more laborious than statx(), but equally safe against all races. In summary, it is a feature designed exactly to avoid the kind of race conditions and vulnerabilities that have plagued polkit and other system software that need to authenticate processes.
Of course, it's entirely up to you, and it is dependent on use cases, threat model, etc. I only mentioned because I found the link to this PR on Mastodon, and having had to deal with all of this recently in polkit, I thought of giving a heads-up, as it's a fairly recent kernel feature so it's not very well known yet. But if it's not necessary for the use case here, that's entirely fine, this was just intended as an FYI, not an RFE. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @bluca Thanks! My main woe is that just getting a pidfd doesn't help us much. We need to know the peer's startup time (currently parsed from /proc/pid/stat) and cgroup (/proc/pid/cgroup), and pidfds have neither these APIs nor "give me the corresponding /proc/pid". If SO_PEERPIDFD is guaranteed to give the pid fd at the time of connect(), and resolving it fails once it's gone, that is a good hardening. We'd still have to resolve it to a pid and open /proc/pid, but it would replace the time comparison on newer kernels -- and we'd keep the time comparison as a fallback on older ones. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @allisonkarlitskaya yes, that's what I meant with "it would replace the time comparison on newer kernels" @mvollmer that's how I understand it, yes. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Is that needed for reasons other than comparing two processes for equality?
Yes that is the case - the kernel will give you the pidfd of the process that started the connect, and of nothing else, and it can be relied on. The normal flow is that you carry the pidfd around, and you resolve it to a pid (there's a glibc API to do that in 2.39, and there's an IOCTL in kernel 6.13, to avoid manual string parsing) at the time when you actually use the pid: if you can get a pid then the process is still alive and it's guaranteed to be the same, if you get an error then the process is gone and you can't resolve it to a pid anymore. If you want the cgroup from that, then systemd has APIs to get the unit via a pidfd, so that it can be done again race-free. There is ongoing work to add another SO_PEER option to get the cgroup id directly from a socket, but I don't think that has landed in the kernel yet. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We won't need /stat any more, but we very much need the cgroup. That's the reason for doing all this dance 😁 (we use that to identify the correct service instance for the TLS certificate that we receive, and encode the cert's SHA into the unit's instance name) Thanks for confirming the pidfd mechanics! That's how I understood it, and that will actually be helpful. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Right, in the future we'll hopefully have a SO_PEERCGROUP or so, work was in progress. For now one can ask systemd to translate from pidfd to cgroup in various ways, like There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In fact if you want the unit, there's APIs to get that directly, via D-Bus |
||
if (my_start_time < ws_start_time) | ||
{ | ||
warnx ("start time of this process (%llu) is older than cockpit-ws (%llu), pid recycling attack?", | ||
my_start_time, ws_start_time); | ||
close (ws_pid_dirfd); | ||
exit_init_problem ("access-denied", "implausible cockpit-ws start time"); | ||
} | ||
|
||
size_t ws_cgroup_length; | ||
char *ws_cgroup = read_proc_self_cgroup (&ws_cgroup_length); | ||
char *ws_cgroup = read_proc_pid_cgroup (ws_pid_dirfd, &ws_cgroup_length); | ||
assert (ws_cgroup); | ||
close (ws_pid_dirfd); | ||
|
||
/* read_proc_pid_cgroup() already ensures that, but just in case we refactor this: this is *essential* for the | ||
* subsequent comparison */ | ||
if (ws_cgroup[ws_cgroup_length - 1] != '\n') | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks for allowing me to be pedantic about this <3 |
||
errx (EX, "cgroup does not end in newline"); | ||
|
||
/* A simple prefix comparison is appropriate here because ws_cgroup | ||
* will contain exactly one newline (at the end), and the expected | ||
* contains exactly one newline (at the end), and the expected | ||
* value of ws_cgroup is on the first line in cert_pem. | ||
*/ | ||
if (strncmp (cert_pem, ws_cgroup, ws_cgroup_length) != 0) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -54,7 +54,9 @@ | |
const gchar *cockpit_ws_ssh_program = "/usr/bin/env python3 -m cockpit.beiboot --remote-bridge=supported"; | ||
|
||
/* Some tunables that can be set from tests */ | ||
const gchar *cockpit_ws_session_program = LIBEXECDIR "/cockpit-session"; | ||
const gchar *cockpit_ws_session_program = NULL; | ||
|
||
#define WS_SESSION_SOCKET "/run/cockpit/session" | ||
|
||
/* Timeout of authenticated session when no connections */ | ||
guint cockpit_ws_service_idle = 15; | ||
|
@@ -1116,7 +1118,12 @@ cockpit_session_launch (CockpitAuth *self, | |
g_str_equal (type, "tls-cert")) | ||
{ | ||
if (command == NULL && unix_path == NULL) | ||
command = cockpit_ws_session_program; | ||
{ | ||
martinpitt marked this conversation as resolved.
Show resolved
Hide resolved
|
||
if (cockpit_ws_session_program) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You removed the fallback code path but the commit message still claims "Fall back to calling cockpit-session directly for custom setups." There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fixed the message, thanks! |
||
command = cockpit_ws_session_program; | ||
else | ||
unix_path = WS_SESSION_SOCKET; | ||
} | ||
} | ||
|
||
g_autoptr(CockpitPipe) pipe = NULL; | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -378,6 +378,8 @@ authentication via sssd/FreeIPA. | |
%{_unitdir}/cockpit-motd.service | ||
%{_unitdir}/cockpit.socket | ||
%{_unitdir}/cockpit-ws-user.service | ||
%{_unitdir}/cockpit-session.socket | ||
%{_unitdir}/[email protected] | ||
%{_unitdir}/cockpit-wsinstance-http.socket | ||
%{_unitdir}/cockpit-wsinstance-http.service | ||
%{_unitdir}/cockpit-wsinstance-https-factory.socket | ||
|
@@ -397,7 +399,7 @@ authentication via sssd/FreeIPA. | |
%{_libexecdir}/cockpit-desktop | ||
%{_libexecdir}/cockpit-certificate-ensure | ||
%{_libexecdir}/cockpit-certificate-helper | ||
%attr(4750, root, cockpit-wsinstance) %{_libexecdir}/cockpit-session | ||
%{_libexecdir}/cockpit-session | ||
%{_datadir}/cockpit/branding | ||
%{_datadir}/selinux/packages/%{selinuxtype}/%{name}.pp.bz2 | ||
%{_mandir}/man8/%{name}_session_selinux.8cockpit.* | ||
|
@@ -560,8 +562,6 @@ These files are not required for running Cockpit. | |
|
||
%files -n cockpit-tests -f tests.list | ||
%{pamdir}/mock-pam-conv-mod.so | ||
%{_unitdir}/cockpit-session.socket | ||
martinpitt marked this conversation as resolved.
Show resolved
Hide resolved
|
||
%{_unitdir}/[email protected] | ||
|
||
%package -n cockpit-packagekit | ||
Summary: Cockpit user interface for packages | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1 @@ | ||
usr/share/cockpit/playground | ||
${env:deb_systemdsystemunitdir}/cockpit-session.socket | ||
${env:deb_systemdsystemunitdir}/[email protected] |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,6 +3,8 @@ etc/pam.d/cockpit | |
${env:deb_systemdsystemunitdir}/cockpit.service | ||
${env:deb_systemdsystemunitdir}/cockpit-motd.service | ||
${env:deb_systemdsystemunitdir}/cockpit.socket | ||
${env:deb_systemdsystemunitdir}/cockpit-session.socket | ||
${env:deb_systemdsystemunitdir}/[email protected] | ||
${env:deb_systemdsystemunitdir}/cockpit-ws-user.service | ||
${env:deb_systemdsystemunitdir}/cockpit-wsinstance-http.service | ||
${env:deb_systemdsystemunitdir}/cockpit-wsinstance-http.socket | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
"non-euclidean memory error" ;)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's the 10% extra that you get on special days!