Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

cgroup-v2 considerations #109

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 57 additions & 9 deletions src/sbd-common.c
Original file line number Diff line number Diff line change
Expand Up @@ -859,9 +859,15 @@ static int get_realtime_budget(void)
}

/* stolen from corosync */

#define LEGACY_CGROUP_PROC_PIDS "/sys/fs/cgroup/cpu/tasks"
#define UNIFIED_CGROUP_PROC_PIDS "/sys/fs/cgroup/cgroup.procs"

static int sbd_move_to_root_cgroup(bool enforce_root_cgroup) {
FILE *f;
int res = -1;
int res = -1, num;
char *rt_rq_name = NULL;
const char *root_pids = LEGACY_CGROUP_PROC_PIDS;

/*
* /sys/fs/cgroup is hardcoded, because most of Linux distributions are now
Expand All @@ -870,13 +876,53 @@ static int sbd_move_to_root_cgroup(bool enforce_root_cgroup) {
* This feature is expected to be removed as soon as systemd gets support
* for managing RT configuration.
*/
f = fopen("/sys/fs/cgroup/cpu/cpu.rt_runtime_us", "rt");
if (f == NULL) {
cl_log(LOG_DEBUG, "cpu.rt_runtime_us doesn't exist -> "
do {
f = fopen("/sys/fs/cgroup/cpu/cpu.rt_runtime_us", "rt");
if (f) {
break;
}
/* CONFIG_RT_GROUP_SCHED might still be enabled with cgroup-v2
cgroup.procs on cgroup-toplevel tells us we have cgroup-v2
(handy as we already need that to be in selinux-policy)
and name of rt_rq(s) in /proc/sched_debug tells us that
CONFIG_RT_GROUP_SCHED is enabled
cgroup-v2 has been around for a while in the kernel and it
is no mutual exclusive compile-time-configuration - so
checking what is actually mounted to go with what is there
*/
f = fopen(UNIFIED_CGROUP_PROC_PIDS, "rt");
if (f) {
fclose(f);
f = fopen("/proc/sched_debug", "rt");
if (f) {
while (((num = fscanf(f, "rt_rq[%*[^]]]:%m[^\n]\n",
&rt_rq_name)) != EOF) &&
(rt_rq_name == NULL)) {
/* consume a line */
if ((num > 0) || (fscanf(f, "%*[^\n]") == EOF) ||
(fscanf(f, "\n") == EOF)) {
break;
}
}
/* no hierarchical rt-budget distribution with
cgroup-v2 so far - thus checking for budget is
useless
*/
if (rt_rq_name) {
free(rt_rq_name);
enforce_root_cgroup = true;
root_pids = UNIFIED_CGROUP_PROC_PIDS;
break;
}
fclose(f);
}
}
cl_log(LOG_DEBUG, "cpu.rt_runtime_us doesn't exist & "
"/proc/sched_debug doesn't contain rt_rq[...]:/ -> "
"system without cgroup or with disabled CONFIG_RT_GROUP_SCHED");
res = 0;
goto exit_res;
}
} while (0);
fclose(f);

if ((!enforce_root_cgroup) && (get_realtime_budget() > 0)) {
Expand All @@ -886,21 +932,23 @@ static int sbd_move_to_root_cgroup(bool enforce_root_cgroup) {
goto exit_res;
}

f = fopen("/sys/fs/cgroup/cpu/tasks", "w");
f = fopen(root_pids, "w");
if (f == NULL) {
cl_log(LOG_WARNING, "Can't open cgroups tasks file for writing");
cl_log(LOG_WARNING, "Can't open %s for writing", root_pids);

goto exit_res;
}

if (fprintf(f, "%jd\n", (intmax_t)getpid()) <= 0) {
cl_log(LOG_WARNING, "Can't write sbd pid into cgroups tasks file");
cl_log(LOG_WARNING, "Can't write sbd pid into %s", root_pids);
goto close_and_exit_res;
}

res = 0;

close_and_exit_res:
if (fclose(f) != 0) {
cl_log(LOG_WARNING, "Can't close cgroups tasks file");
cl_log(LOG_WARNING, "Can't close %s", root_pids);
goto exit_res;
}

Expand Down
14 changes: 14 additions & 0 deletions src/sbd.sysconfig.in
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,20 @@ SBD_TIMEOUT_ACTION=flush,reboot
# If that is the case sbd will stay in that slice while it will
# be moved to root-slice otherwise.
#
# With cgroup-v2 behavior is very much different.
# With CONFIG_RT_GROUP_SCHED enabled and cpu-controller enabled
# there currently is no way to configure RT-budget in any slice
# but the root-slice. Otherway round if there is RT-budget used
# in any but the root-slice enabling the cpu-controller is
# inhibited.
# Thus - unless strictly disabled by setting 'no' - with cgroup-v2
# and CONFIG_RT_GROUP_SCHED enabled sbd is always moved
# to the root-slice regardless if the cpu-controller is at the
# moment enabled or not.
# Reason is that subsequent services might enable the cpu-controller
# or fail doing so if sbd was already using RT-budget in e.g. the
# system-slice.
#
SBD_MOVE_TO_ROOT_CGROUP=auto

## Type: yesno
Expand Down