diff --git a/criu/seize.c b/criu/seize.c index 7ade0b1fe8..936702c39f 100644 --- a/criu/seize.c +++ b/criu/seize.c @@ -542,6 +542,7 @@ static int freeze_processes(void) enum freezer_state state = THAWED; static const unsigned long step_ms = 100; + /* Since opts.timeout is in seconds, multiply it by 1000 to convert to milliseconds. */ unsigned long nr_attempts = (opts.timeout * 1000) / step_ms; unsigned long i = 0; @@ -599,6 +600,35 @@ static int freeze_processes(void) goto err; } nanosleep(&req, NULL); + + if (cgroup_v2) + continue; + + /* As per older kernel docs (freezer-subsystem.txt before + * the kernel commit ef9fe980c6fcc1821), if FREEZING is seen, + * userspace should either retry or thaw. While current + * kernel cgroup v1 docs no longer mention a need to retry, + * even recent kernels can't reliably freeze a cgroup v1. + * + * Let's keep asking the kernel to freeze from time to time. + * In addition, do occasional thaw/sleep/freeze. + * + * This is still a game of chances (the real fix belongs to the kernel) + * but these kludges might improve the probability of success. + * + * Cgroup v2 does not have this problem. + */ + switch (i%32) { + case 9: + case 20: + freezer_write_state(fd, FROZEN); + break; + case 31: + freezer_write_state(fd, THAWED); + nanosleep(&req, NULL); + freezer_write_state(fd, FROZEN); + break; + } } if (i > nr_attempts) {