</listitem>
</varlistentry>
+ <varlistentry>
+ <term><varname>DisableControllers=</varname></term>
+
+ <listitem>
+ <para>Disables controllers from being enabled for a unit's children. If a controller listed is already in use
+ in its subtree, the controller will be removed from the subtree. This can be used to avoid child units being
+ able to implicitly or explicitly enable a controller. Defaults to not disabling any controllers.</para>
+
+ <para>It may not be possible to successfully disable a controller if the unit or any child of the unit in
+ question delegates controllers to its children, as any delegated subtree of the cgroup hierarchy is unmanaged
+ by systemd.</para>
+
+ <para>Multiple controllers may be specified, separated by spaces. You may also pass
+ <varname>DisableControllers=</varname> multiple times, in which case each new instance adds another controller
+ to disable. Passing <varname>DisableControllers=</varname> by itself with no controller name present resets
+ the disabled controller list.</para>
+
+ <para>Valid controllers are <option>cpu</option>, <option>cpuacct</option>, <option>io</option>,
+ <option>blkio</option>, <option>memory</option>, <option>devices</option>, and <option>pids</option>.</para>
+ </listitem>
+ </varlistentry>
</variablelist>
</refsect1>
return 0;
}
+static void cgroup_xattr_apply(Unit *u) {
+ char ids[SD_ID128_STRING_MAX];
+ int r;
+
+ assert(u);
+
+ if (!MANAGER_IS_SYSTEM(u->manager))
+ return;
+
+ if (sd_id128_is_null(u->invocation_id))
+ return;
+
+ r = cg_set_xattr(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path,
+ "trusted.invocation_id",
+ sd_id128_to_string(u->invocation_id, ids), 32,
+ 0);
+ if (r < 0)
+ log_unit_debug_errno(u, r, "Failed to set invocation ID on control group %s, ignoring: %m", u->cgroup_path);
+}
+
static int lookup_block_device(const char *p, dev_t *ret) {
struct stat st = {};
int r;
if (!c)
return 0;
- return cgroup_context_get_mask(c) | unit_get_bpf_mask(u) | unit_get_delegate_mask(u);
+ return (cgroup_context_get_mask(c) | unit_get_bpf_mask(u) | unit_get_delegate_mask(u)) & ~unit_get_ancestor_disable_mask(u);
}
CGroupMask unit_get_delegate_mask(Unit *u) {
return unit_get_subtree_mask(u); /* we are the top-level slice */
}
+CGroupMask unit_get_disable_mask(Unit *u) {
+ CGroupContext *c;
+
+ c = unit_get_cgroup_context(u);
+ if (!c)
+ return 0;
+
+ return c->disable_controllers;
+}
+
+CGroupMask unit_get_ancestor_disable_mask(Unit *u) {
+ CGroupMask mask;
+
+ assert(u);
+ mask = unit_get_disable_mask(u);
+
+ /* Returns the mask of controllers which are marked as forcibly
+ * disabled in any ancestor unit or the unit in question. */
+
+ if (UNIT_ISSET(u->slice))
+ mask |= unit_get_ancestor_disable_mask(UNIT_DEREF(u->slice));
+
+ return mask;
+}
+
CGroupMask unit_get_subtree_mask(Unit *u) {
/* Returns the mask of this subtree, meaning of the group
mask = unit_get_own_mask(u) | unit_get_members_mask(u) | unit_get_siblings_mask(u);
mask &= u->manager->cgroup_supported;
+ mask &= ~unit_get_ancestor_disable_mask(u);
return mask;
}
mask = unit_get_members_mask(u);
mask &= u->manager->cgroup_supported;
+ mask &= ~unit_get_ancestor_disable_mask(u);
return mask;
}
static int unit_create_cgroup(
Unit *u,
CGroupMask target_mask,
- CGroupMask enable_mask) {
+ CGroupMask enable_mask,
+ ManagerState state) {
bool created;
int r;
log_unit_warning_errno(u, r, "Failed to migrate cgroup from to %s, ignoring: %m", u->cgroup_path);
}
+ /* Set attributes */
+ cgroup_context_apply(u, target_mask, state);
+ cgroup_xattr_apply(u);
+
return 0;
}
return r;
}
-static void cgroup_xattr_apply(Unit *u) {
- char ids[SD_ID128_STRING_MAX];
- int r;
-
- assert(u);
-
- if (!MANAGER_IS_SYSTEM(u->manager))
- return;
-
- if (sd_id128_is_null(u->invocation_id))
- return;
-
- r = cg_set_xattr(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path,
- "trusted.invocation_id",
- sd_id128_to_string(u->invocation_id, ids), 32,
- 0);
- if (r < 0)
- log_unit_debug_errno(u, r, "Failed to set invocation ID on control group %s, ignoring: %m", u->cgroup_path);
-}
-
static bool unit_has_mask_realized(
Unit *u,
CGroupMask target_mask,
u->cgroup_invalidated_mask == 0;
}
+static bool unit_has_mask_disables_realized(
+ Unit *u,
+ CGroupMask target_mask,
+ CGroupMask enable_mask) {
+
+ assert(u);
+
+ /* Returns true if all controllers which should be disabled are indeed disabled.
+ *
+ * Unlike unit_has_mask_realized, we don't care what was enabled, only that anything we want to remove is
+ * already removed. */
+
+ return !u->cgroup_realized ||
+ (FLAGS_SET(u->cgroup_realized_mask, target_mask & CGROUP_MASK_V1) &&
+ FLAGS_SET(u->cgroup_enabled_mask, enable_mask & CGROUP_MASK_V2));
+}
+
+static bool unit_has_mask_enables_realized(
+ Unit *u,
+ CGroupMask target_mask,
+ CGroupMask enable_mask) {
+
+ assert(u);
+
+ /* Returns true if all controllers which should be enabled are indeed enabled.
+ *
+ * Unlike unit_has_mask_realized, we don't care about the controllers that are not present, only that anything
+ * we want to add is already added. */
+
+ return u->cgroup_realized &&
+ ((u->cgroup_realized_mask | target_mask) & CGROUP_MASK_V1) == (u->cgroup_realized_mask & CGROUP_MASK_V1) &&
+ ((u->cgroup_enabled_mask | enable_mask) & CGROUP_MASK_V2) == (u->cgroup_enabled_mask & CGROUP_MASK_V2);
+}
+
void unit_add_to_cgroup_realize_queue(Unit *u) {
assert(u);
u->in_cgroup_realize_queue = false;
}
+/* Controllers can only be enabled breadth-first, from the root of the
+ * hierarchy downwards to the unit in question. */
+static int unit_realize_cgroup_now_enable(Unit *u, ManagerState state) {
+ CGroupMask target_mask, enable_mask, new_target_mask, new_enable_mask;
+ int r;
+
+ assert(u);
+
+ /* First go deal with this unit's parent, or we won't be able to enable
+ * any new controllers at this layer. */
+ if (UNIT_ISSET(u->slice)) {
+ r = unit_realize_cgroup_now_enable(UNIT_DEREF(u->slice), state);
+ if (r < 0)
+ return r;
+ }
+
+ target_mask = unit_get_target_mask(u);
+ enable_mask = unit_get_enable_mask(u);
+
+ /* We can only enable in this direction, don't try to disable anything.
+ */
+ if (unit_has_mask_enables_realized(u, target_mask, enable_mask))
+ return 0;
+
+ new_target_mask = u->cgroup_realized_mask | target_mask;
+ new_enable_mask = u->cgroup_enabled_mask | enable_mask;
+
+ return unit_create_cgroup(u, new_target_mask, new_enable_mask, state);
+}
+
+/* Controllers can only be disabled depth-first, from the leaves of the
+ * hierarchy upwards to the unit in question. */
+static int unit_realize_cgroup_now_disable(Unit *u, ManagerState state) {
+ Iterator i;
+ Unit *m;
+ void *v;
+
+ assert(u);
+
+ if (u->type != UNIT_SLICE)
+ return 0;
+
+ HASHMAP_FOREACH_KEY(v, m, u->dependencies[UNIT_BEFORE], i) {
+ CGroupMask target_mask, enable_mask, new_target_mask, new_enable_mask;
+ int r;
+
+ if (UNIT_DEREF(m->slice) != u)
+ continue;
+
+ /* The cgroup for this unit might not actually be fully
+ * realised yet, in which case it isn't holding any controllers
+ * open anyway. */
+ if (!m->cgroup_path)
+ continue;
+
+ /* We must disable those below us first in order to release the
+ * controller. */
+ if (m->type == UNIT_SLICE)
+ (void) unit_realize_cgroup_now_disable(m, state);
+
+ target_mask = unit_get_target_mask(m);
+ enable_mask = unit_get_enable_mask(m);
+
+ /* We can only disable in this direction, don't try to enable
+ * anything. */
+ if (unit_has_mask_disables_realized(m, target_mask, enable_mask))
+ continue;
+
+ new_target_mask = m->cgroup_realized_mask & target_mask;
+ new_enable_mask = m->cgroup_enabled_mask & enable_mask;
+
+ r = unit_create_cgroup(m, new_target_mask, new_enable_mask, state);
+ if (r < 0)
+ return r;
+ }
+
+ return 0;
+}
+
/* Check if necessary controllers and attributes for a unit are in place.
*
- * If so, do nothing.
- * If not, create paths, move processes over, and set attributes.
+ * - If so, do nothing.
+ * - If not, create paths, move processes over, and set attributes.
+ *
+ * Controllers can only be *enabled* in a breadth-first way, and *disabled* in
+ * a depth-first way. As such the process looks like this:
+ *
+ * Suppose we have a cgroup hierarchy which looks like this:
+ *
+ * root
+ * / \
+ * / \
+ * / \
+ * a b
+ * / \ / \
+ * / \ / \
+ * c d e f
+ * / \ / \ / \ / \
+ * h i j k l m n o
+ *
+ * 1. We want to realise cgroup "d" now.
+ * 2. cgroup "a" has DisableControllers=cpu in the associated unit.
+ * 3. cgroup "k" just started requesting the memory controller.
+ *
+ * To make this work we must do the following in order:
+ *
+ * 1. Disable CPU controller in k, j
+ * 2. Disable CPU controller in d
+ * 3. Enable memory controller in root
+ * 4. Enable memory controller in a
+ * 5. Enable memory controller in d
+ * 6. Enable memory controller in k
+ *
+ * Notice that we need to touch j in one direction, but not the other. We also
+ * don't go beyond d when disabling -- it's up to "a" to get realized if it
+ * wants to disable further. The basic rules are therefore:
+ *
+ * - If you're disabling something, you need to realise all of the cgroups from
+ * your recursive descendants to the root. This starts from the leaves.
+ * - If you're enabling something, you need to realise from the root cgroup
+ * downwards, but you don't need to iterate your recursive descendants.
*
* Returns 0 on success and < 0 on failure. */
static int unit_realize_cgroup_now(Unit *u, ManagerState state) {
if (unit_has_mask_realized(u, target_mask, enable_mask))
return 0;
- /* First, realize parents */
+ /* Disable controllers below us, if there are any */
+ r = unit_realize_cgroup_now_disable(u, state);
+ if (r < 0)
+ return r;
+
+ /* Enable controllers above us, if there are any */
if (UNIT_ISSET(u->slice)) {
- r = unit_realize_cgroup_now(UNIT_DEREF(u->slice), state);
+ r = unit_realize_cgroup_now_enable(UNIT_DEREF(u->slice), state);
if (r < 0)
return r;
}
- /* And then do the real work */
- r = unit_create_cgroup(u, target_mask, enable_mask);
+ /* Now actually deal with the cgroup we were trying to realise and set attributes */
+ r = unit_create_cgroup(u, target_mask, enable_mask, state);
if (r < 0)
return r;
- /* Finally, apply the necessary attributes. */
- cgroup_context_apply(u, target_mask, state);
- cgroup_xattr_apply(u);
-
/* Now, reset the invalidation mask */
u->cgroup_invalidated_mask = 0;
return 0;
bool delegate;
CGroupMask delegate_controllers;
+
+ CGroupMask disable_controllers;
};
/* Used when querying IP accounting data */
CGroupMask unit_get_members_mask(Unit *u);
CGroupMask unit_get_siblings_mask(Unit *u);
CGroupMask unit_get_subtree_mask(Unit *u);
+CGroupMask unit_get_disable_mask(Unit *u);
+CGroupMask unit_get_ancestor_disable_mask(Unit *u);
+
CGroupMask unit_get_target_mask(Unit *u);
CGroupMask unit_get_enable_mask(Unit *u);
static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_cgroup_device_policy, cgroup_device_policy, CGroupDevicePolicy);
-static int property_get_delegate_controllers(
+static int property_get_cgroup_mask(
sd_bus *bus,
const char *path,
const char *interface,
void *userdata,
sd_bus_error *error) {
- CGroupContext *c = userdata;
- CGroupController cc;
+ CGroupMask *mask = userdata;
+ CGroupController ctrl;
int r;
assert(bus);
assert(reply);
- assert(c);
-
- if (!c->delegate)
- return sd_bus_message_append(reply, "as", 0);
r = sd_bus_message_open_container(reply, 'a', "s");
if (r < 0)
return r;
- for (cc = 0; cc < _CGROUP_CONTROLLER_MAX; cc++) {
- if ((c->delegate_controllers & CGROUP_CONTROLLER_TO_MASK(cc)) == 0)
+ for (ctrl = 0; ctrl < _CGROUP_CONTROLLER_MAX; ctrl++) {
+ if ((*mask & CGROUP_CONTROLLER_TO_MASK(ctrl)) == 0)
continue;
- r = sd_bus_message_append(reply, "s", cgroup_controller_to_string(cc));
+ r = sd_bus_message_append(reply, "s", cgroup_controller_to_string(ctrl));
if (r < 0)
return r;
}
return sd_bus_message_close_container(reply);
}
+static int property_get_delegate_controllers(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ CGroupContext *c = userdata;
+
+ assert(bus);
+ assert(reply);
+ assert(c);
+
+ if (!c->delegate)
+ return sd_bus_message_append(reply, "as", 0);
+
+ return property_get_cgroup_mask(bus, path, interface, property, reply, &c->delegate_controllers, error);
+}
+
static int property_get_io_device_weight(
sd_bus *bus,
const char *path,
SD_BUS_PROPERTY("IPAccounting", "b", bus_property_get_bool, offsetof(CGroupContext, ip_accounting), 0),
SD_BUS_PROPERTY("IPAddressAllow", "a(iayu)", property_get_ip_address_access, offsetof(CGroupContext, ip_address_allow), 0),
SD_BUS_PROPERTY("IPAddressDeny", "a(iayu)", property_get_ip_address_access, offsetof(CGroupContext, ip_address_deny), 0),
+ SD_BUS_PROPERTY("DisableControllers", "as", property_get_cgroup_mask, offsetof(CGroupContext, disable_controllers), 0),
SD_BUS_VTABLE_END
};
$1.TasksAccounting, config_parse_bool, 0, offsetof($1, cgroup_context.tasks_accounting)
$1.TasksMax, config_parse_tasks_max, 0, offsetof($1, cgroup_context.tasks_max)
$1.Delegate, config_parse_delegate, 0, offsetof($1, cgroup_context)
+$1.DisableControllers, config_parse_disable_controllers, 0, offsetof($1, cgroup_context)
$1.IPAccounting, config_parse_bool, 0, offsetof($1, cgroup_context.ip_accounting)
$1.IPAddressAllow, config_parse_ip_address_access, 0, offsetof($1, cgroup_context.ip_address_allow)
$1.IPAddressDeny, config_parse_ip_address_access, 0, offsetof($1, cgroup_context.ip_address_deny)
return 0;
}
+int config_parse_disable_controllers(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ int r;
+ CGroupContext *c = data;
+ CGroupMask disabled_mask;
+
+ /* 1. If empty, make all controllers eligible for use again.
+ * 2. If non-empty, merge all listed controllers, space separated. */
+
+ if (isempty(rvalue)) {
+ c->disable_controllers = 0;
+ return 0;
+ }
+
+ r = cg_mask_from_string(rvalue, &disabled_mask);
+ if (r < 0 || disabled_mask <= 0) {
+ log_syntax(unit, LOG_ERR, filename, line, r, "Invalid cgroup string: %s, ignoring", rvalue);
+ return 0;
+ }
+
+ c->disable_controllers |= disabled_mask;
+
+ return 0;
+}
+
#define FOLLOW_MAX 8
static int open_follow(char **filename, FILE **_f, Set *names, char **_final) {
CONFIG_PARSER_PROTOTYPE(config_parse_collect_mode);
CONFIG_PARSER_PROTOTYPE(config_parse_pid_file);
CONFIG_PARSER_PROTOTYPE(config_parse_exit_status);
+CONFIG_PARSER_PROTOTYPE(config_parse_disable_controllers);
/* gperf prototypes */
const struct ConfigPerfItem* load_fragment_gperf_lookup(const char *key, GPERF_LEN_TYPE length);
static int test_cgroup_mask(void) {
_cleanup_(rm_rf_physical_and_freep) char *runtime_dir = NULL;
_cleanup_(manager_freep) Manager *m = NULL;
- Unit *son, *daughter, *parent, *root, *grandchild, *parent_deep;
+ Unit *son, *daughter, *parent, *root, *grandchild, *parent_deep, *nomem_parent, *nomem_leaf;
int r;
CGroupMask cpu_accounting_mask = get_cpu_accounting_mask();
assert_se(manager_load_startable_unit_or_warn(m, "daughter.service", NULL, &daughter) >= 0);
assert_se(manager_load_startable_unit_or_warn(m, "grandchild.service", NULL, &grandchild) >= 0);
assert_se(manager_load_startable_unit_or_warn(m, "parent-deep.slice", NULL, &parent_deep) >= 0);
+ assert_se(manager_load_startable_unit_or_warn(m, "nomem.slice", NULL, &nomem_parent) >= 0);
+ assert_se(manager_load_startable_unit_or_warn(m, "nomemleaf.service", NULL, &nomem_leaf) >= 0);
assert_se(UNIT_DEREF(son->slice) == parent);
assert_se(UNIT_DEREF(daughter->slice) == parent);
assert_se(UNIT_DEREF(parent_deep->slice) == parent);
assert_se(UNIT_DEREF(grandchild->slice) == parent_deep);
+ assert_se(UNIT_DEREF(nomem_leaf->slice) == nomem_parent);
root = UNIT_DEREF(parent->slice);
+ assert_se(UNIT_DEREF(nomem_parent->slice) == root);
/* Verify per-unit cgroups settings. */
ASSERT_CGROUP_MASK_JOINED(unit_get_own_mask(son), CGROUP_MASK_CPU);
ASSERT_CGROUP_MASK_JOINED(unit_get_own_mask(grandchild), 0);
ASSERT_CGROUP_MASK_JOINED(unit_get_own_mask(parent_deep), CGROUP_MASK_MEMORY);
ASSERT_CGROUP_MASK_JOINED(unit_get_own_mask(parent), (CGROUP_MASK_IO | CGROUP_MASK_BLKIO));
+ ASSERT_CGROUP_MASK_JOINED(unit_get_own_mask(nomem_parent), 0);
+ ASSERT_CGROUP_MASK_JOINED(unit_get_own_mask(nomem_leaf), (CGROUP_MASK_IO | CGROUP_MASK_BLKIO));
ASSERT_CGROUP_MASK_JOINED(unit_get_own_mask(root), 0);
/* Verify aggregation of member masks */
ASSERT_CGROUP_MASK_JOINED(unit_get_members_mask(grandchild), 0);
ASSERT_CGROUP_MASK_JOINED(unit_get_members_mask(parent_deep), 0);
ASSERT_CGROUP_MASK_JOINED(unit_get_members_mask(parent), (CGROUP_MASK_CPU | cpu_accounting_mask | CGROUP_MASK_MEMORY));
+ ASSERT_CGROUP_MASK_JOINED(unit_get_members_mask(nomem_parent), (CGROUP_MASK_IO | CGROUP_MASK_BLKIO));
+ ASSERT_CGROUP_MASK_JOINED(unit_get_members_mask(nomem_leaf), 0);
ASSERT_CGROUP_MASK_JOINED(unit_get_members_mask(root), (CGROUP_MASK_CPU | cpu_accounting_mask | CGROUP_MASK_IO | CGROUP_MASK_BLKIO | CGROUP_MASK_MEMORY));
/* Verify aggregation of sibling masks. */
ASSERT_CGROUP_MASK_JOINED(unit_get_siblings_mask(grandchild), 0);
ASSERT_CGROUP_MASK_JOINED(unit_get_siblings_mask(parent_deep), (CGROUP_MASK_CPU | cpu_accounting_mask | CGROUP_MASK_MEMORY));
ASSERT_CGROUP_MASK_JOINED(unit_get_siblings_mask(parent), (CGROUP_MASK_CPU | cpu_accounting_mask | CGROUP_MASK_IO | CGROUP_MASK_BLKIO | CGROUP_MASK_MEMORY));
+ ASSERT_CGROUP_MASK_JOINED(unit_get_siblings_mask(nomem_parent), (CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT | CGROUP_MASK_IO | CGROUP_MASK_BLKIO | CGROUP_MASK_MEMORY));
+ ASSERT_CGROUP_MASK_JOINED(unit_get_siblings_mask(nomem_leaf), (CGROUP_MASK_IO | CGROUP_MASK_BLKIO));
ASSERT_CGROUP_MASK_JOINED(unit_get_siblings_mask(root), (CGROUP_MASK_CPU | cpu_accounting_mask | CGROUP_MASK_IO | CGROUP_MASK_BLKIO | CGROUP_MASK_MEMORY));
/* Verify aggregation of target masks. */
ASSERT_CGROUP_MASK(unit_get_target_mask(grandchild), 0);
ASSERT_CGROUP_MASK(unit_get_target_mask(parent_deep), (CGROUP_MASK_EXTEND_JOINED(CGROUP_MASK_CPU | cpu_accounting_mask | CGROUP_MASK_MEMORY) & m->cgroup_supported));
ASSERT_CGROUP_MASK(unit_get_target_mask(parent), (CGROUP_MASK_EXTEND_JOINED(CGROUP_MASK_CPU | cpu_accounting_mask | CGROUP_MASK_IO | CGROUP_MASK_BLKIO | CGROUP_MASK_MEMORY) & m->cgroup_supported));
+ ASSERT_CGROUP_MASK(unit_get_target_mask(nomem_parent), (CGROUP_MASK_EXTEND_JOINED(CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT | CGROUP_MASK_IO | CGROUP_MASK_BLKIO) & m->cgroup_supported));
+ ASSERT_CGROUP_MASK(unit_get_target_mask(nomem_leaf), (CGROUP_MASK_EXTEND_JOINED(CGROUP_MASK_IO | CGROUP_MASK_BLKIO) & m->cgroup_supported));
ASSERT_CGROUP_MASK(unit_get_target_mask(root), (CGROUP_MASK_EXTEND_JOINED(CGROUP_MASK_CPU | cpu_accounting_mask | CGROUP_MASK_IO | CGROUP_MASK_BLKIO | CGROUP_MASK_MEMORY) & m->cgroup_supported));
return 0;
hwdb/10-bad.hwdb
journal-data/journal-1.txt
journal-data/journal-2.txt
+ nomem.slice
+ nomemleaf.service
parent-deep.slice
parent.slice
sched_idle_bad.service
--- /dev/null
+[Unit]
+Description=Nomem Parent Slice
+
+[Slice]
+DisableControllers=memory
--- /dev/null
+[Unit]
+Description=Nomem Leaf Service
+
+[Service]
+Slice=nomem.slice
+Type=oneshot
+ExecStart=/bin/true
+IOWeight=200
+MemoryAccounting=true