_cleanup_set_free_ Set *allocated_set = NULL;
bool done = false;
- int r, ret = 0;
+ int r, ret = 0, ret_log_kill = 0;
pid_t my_pid;
assert(sig >= 0);
continue;
if (log_kill)
- log_kill(pid, sig, userdata);
+ ret_log_kill = log_kill(pid, sig, userdata);
/* If we haven't killed this process yet, kill
* it */
if (flags & CGROUP_SIGCONT)
(void) kill(pid, SIGCONT);
- if (ret == 0)
- ret = 1;
+ if (ret == 0) {
+ if (log_kill)
+ ret = ret_log_kill;
+ else
+ ret = 1;
+ }
}
done = false;
bool fatal;
};
- /* cgroupsv1, aka legacy/non-unified */
+ /* cgroup v1, aka legacy/non-unified */
static const struct Attribute legacy_attributes[] = {
{ "cgroup.procs", true },
{ "tasks", false },
{},
};
- /* cgroupsv2, aka unified */
+ /* cgroup v2, aka unified */
static const struct Attribute unified_attributes[] = {
{ "cgroup.procs", true },
{ "cgroup.subtree_control", true },
}
fs = procfs_file_alloca(pid, "cgroup");
- f = fopen(fs, "re");
- if (!f)
- return errno == ENOENT ? -ESRCH : -errno;
-
- (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
+ r = fopen_unlocked(fs, "re", &f);
+ if (r == -ENOENT)
+ return -ESRCH;
+ if (r < 0)
+ return r;
for (;;) {
_cleanup_free_ char *line = NULL;
* needs free()! */
if (IN_SET(p[0], 0, '_', '.') ||
- streq(p, "notify_on_release") ||
- streq(p, "release_agent") ||
- streq(p, "tasks") ||
+ STR_IN_SET(p, "notify_on_release", "release_agent", "tasks") ||
startswith(p, "cgroup."))
need_prefix = true;
else {
char **v;
int r;
- /* Reads one or more fields of a cgroupsv2 keyed attribute file. The 'keys' parameter should be an strv with
+ /* Reads one or more fields of a cgroup v2 keyed attribute file. The 'keys' parameter should be an strv with
* all keys to retrieve. The 'ret_values' parameter should be passed as string size with the same number of
* entries as 'keys'. On success each entry will be set to the value of the matching key.
*
CGroupMask mask;
int r;
- /* Determines the mask of supported cgroup controllers. Only
- * includes controllers we can make sense of and that are
- * actually accessible. */
+ /* Determines the mask of supported cgroup controllers. Only includes controllers we can make sense of and that
+ * are actually accessible. Only covers real controllers, i.e. not the CGROUP_CONTROLLER_BPF_xyz
+ * pseudo-controllers. */
r = cg_all_unified();
if (r < 0)
if (!controllers)
return -ENOMEM;
- f = fopen("/proc/cgroups", "re");
- if (!f) {
- if (errno == ENOENT) {
- *ret = NULL;
- return 0;
- }
-
- return -errno;
+ r = fopen_unlocked("/proc/cgroups", "re", &f);
+ if (r == -ENOENT) {
+ *ret = NULL;
+ return 0;
}
-
- (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
+ if (r < 0)
+ return r;
/* Ignore the header line */
(void) read_line(f, (size_t) -1, NULL);
static thread_local CGroupUnified unified_cache = CGROUP_UNIFIED_UNKNOWN;
-/* The hybrid mode was initially implemented in v232 and simply mounted cgroup v2 on /sys/fs/cgroup/systemd. This
+/* The hybrid mode was initially implemented in v232 and simply mounted cgroup2 on /sys/fs/cgroup/systemd. This
* unfortunately broke other tools (such as docker) which expected the v1 "name=systemd" hierarchy on
* /sys/fs/cgroup/systemd. From v233 and on, the hybrid mode mountnbs v2 on /sys/fs/cgroup/unified and maintains
* "name=systemd" hierarchy on /sys/fs/cgroup/systemd for compatibility with other tools.
unified_cache = CGROUP_UNIFIED_NONE;
}
}
- } else {
- log_debug("Unknown filesystem type %llx mounted on /sys/fs/cgroup.",
- (unsigned long long) fs.f_type);
- return -ENOMEDIUM;
- }
+ } else
+ return log_debug_errno(SYNTHETIC_ERRNO(ENOMEDIUM),
+ "Unknown filesystem type %llx mounted on /sys/fs/cgroup.",
+ (unsigned long long)fs.f_type);
return 0;
}
return cg_unified_update();
}
-int cg_enable_everywhere(CGroupMask supported, CGroupMask mask, const char *p) {
+int cg_enable_everywhere(
+ CGroupMask supported,
+ CGroupMask mask,
+ const char *p,
+ CGroupMask *ret_result_mask) {
+
_cleanup_fclose_ FILE *f = NULL;
_cleanup_free_ char *fs = NULL;
CGroupController c;
+ CGroupMask ret = 0;
int r;
assert(p);
- if (supported == 0)
+ if (supported == 0) {
+ if (ret_result_mask)
+ *ret_result_mask = 0;
return 0;
+ }
r = cg_all_unified();
if (r < 0)
return r;
- if (r == 0) /* on the legacy hiearchy there's no joining of controllers defined */
+ if (r == 0) {
+ /* On the legacy hiearchy there's no concept of "enabling" controllers in cgroups defined. Let's claim
+ * complete success right away. (If you wonder why we return the full mask here, rather than zero: the
+ * caller tends to use the returned mask later on to compare if all controllers where properly joined,
+ * and if not requeues realization. This use is the primary purpose of the return value, hence let's
+ * minimize surprises here and reduce triggers for re-realization by always saying we fully
+ * succeeded.) */
+ if (ret_result_mask)
+ *ret_result_mask = mask & supported & CGROUP_MASK_V2; /* If you wonder why we mask this with
+ * CGROUP_MASK_V2: The 'supported' mask
+ * might contain pure-V1 or BPF
+ * controllers, and we never want to
+ * claim that we could enable those with
+ * cgroup.subtree_control */
return 0;
+ }
r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, p, "cgroup.subtree_control", &fs);
if (r < 0)
if (!f) {
f = fopen(fs, "we");
- if (!f) {
- log_debug_errno(errno, "Failed to open cgroup.subtree_control file of %s: %m", p);
- break;
- }
+ if (!f)
+ return log_debug_errno(errno, "Failed to open cgroup.subtree_control file of %s: %m", p);
}
r = write_string_stream(f, s, WRITE_STRING_FILE_DISABLE_BUFFER);
if (r < 0) {
- log_debug_errno(r, "Failed to enable controller %s for %s (%s): %m", n, p, fs);
+ log_debug_errno(r, "Failed to %s controller %s for %s (%s): %m",
+ FLAGS_SET(mask, bit) ? "enable" : "disable", n, p, fs);
clearerr(f);
+
+ /* If we can't turn off a controller, leave it on in the reported resulting mask. This
+ * happens for example when we attempt to turn off a controller up in the tree that is
+ * used down in the tree. */
+ if (!FLAGS_SET(mask, bit) && r == -EBUSY) /* You might wonder why we check for EBUSY
+ * only here, and not follow the same logic
+ * for other errors such as EINVAL or
+ * EOPNOTSUPP or anything else. That's
+ * because EBUSY indicates that the
+ * controllers is currently enabled and
+ * cannot be disabled because something down
+ * the hierarchy is still using it. Any other
+ * error most likely means something like "I
+ * never heard of this controller" or
+ * similar. In the former case it's hence
+ * safe to assume the controller is still on
+ * after the failed operation, while in the
+ * latter case it's safer to assume the
+ * controller is unknown and hence certainly
+ * not enabled. */
+ ret |= bit;
+ } else {
+ /* Otherwise, if we managed to turn on a controller, set the bit reflecting that. */
+ if (FLAGS_SET(mask, bit))
+ ret |= bit;
}
}
}
+ /* Let's return the precise set of controllers now enabled for the cgroup. */
+ if (ret_result_mask)
+ *ret_result_mask = ret;
+
return 0;
}
int r;
bool b;
const bool is_default = DEFAULT_HIERARCHY == CGROUP_UNIFIED_ALL;
+ _cleanup_free_ char *c = NULL;
/* If we have a cached value, return that. */
if (wanted >= 0)
if (cg_unified_flush() >= 0)
return (wanted = unified_cache >= CGROUP_UNIFIED_ALL);
- /* Otherwise, let's see what the kernel command line has to say.
- * Since checking is expensive, cache a non-error result. */
+ /* If we were explicitly passed systemd.unified_cgroup_hierarchy,
+ * respect that. */
r = proc_cmdline_get_bool("systemd.unified_cgroup_hierarchy", &b);
+ if (r > 0)
+ return (wanted = b);
+
+ /* If we passed cgroup_no_v1=all with no other instructions, it seems
+ * highly unlikely that we want to use hybrid or legacy hierarchy. */
+ r = proc_cmdline_get_key("cgroup_no_v1", 0, &c);
+ if (r > 0 && streq_ptr(c, "all"))
+ return (wanted = true);
- return (wanted = r > 0 ? b : is_default);
+ return (wanted = is_default);
}
bool cg_is_legacy_wanted(void) {
if (wanted >= 0)
return wanted;
- /* Check if we have cgroups2 already mounted. */
+ /* Check if we have cgroup v2 already mounted. */
if (cg_unified_flush() >= 0 &&
unified_cache == CGROUP_UNIFIED_ALL)
return (wanted = false);
/* Otherwise, assume that at least partial legacy is wanted,
- * since cgroups2 should already be mounted at this point. */
+ * since cgroup v2 should already be mounted at this point. */
return (wanted = true);
}
return is_cgroup_fs(&s);
}
-static const char *cgroup_controller_table[_CGROUP_CONTROLLER_MAX] = {
+static const char *const cgroup_controller_table[_CGROUP_CONTROLLER_MAX] = {
[CGROUP_CONTROLLER_CPU] = "cpu",
[CGROUP_CONTROLLER_CPUACCT] = "cpuacct",
[CGROUP_CONTROLLER_IO] = "io",