]> git.ipfire.org Git - thirdparty/systemd.git/commitdiff
core: split out cgroup specific state fields from Unit → CGroupRuntime
authorLennart Poettering <lennart@poettering.net>
Sat, 27 Jan 2024 09:59:15 +0000 (10:59 +0100)
committerLennart Poettering <lennart@poettering.net>
Fri, 16 Feb 2024 09:17:40 +0000 (10:17 +0100)
This refactors the Unit structure a bit: all cgroup-related state fields
are moved to a new structure CGroupRuntime, which is only allocated as
we realize a cgroup.

This is both a nice cleanup and should make unit structures considerably
smaller that have no cgroup associated, because never realized or
because they belong to a unit type that doesn#t have cgroups anyway.

This makes things nicely symmetric:

        ExecContext → static user configuration about execution
        ExecRuntime → dynamic user state of execution
        CGroupContext → static user configuration about cgroups
        CGroupRuntime → dynamic user state of cgroups

And each time the XyzContext is part of the unit type structures such as
Service or Slice that need it, but the runtime object is only allocated
when a unit is started.

28 files changed:
src/core/bpf-firewall.c
src/core/bpf-foreign.c
src/core/bpf-restrict-fs.c
src/core/bpf-restrict-fs.h
src/core/bpf-restrict-ifaces.c
src/core/bpf-socket-bind.c
src/core/cgroup.c
src/core/cgroup.h
src/core/core-varlink.c
src/core/dbus-cgroup.c
src/core/dbus-unit.c
src/core/mount.c
src/core/mount.h
src/core/scope.c
src/core/scope.h
src/core/service.c
src/core/service.h
src/core/slice.c
src/core/slice.h
src/core/socket.c
src/core/socket.h
src/core/swap.c
src/core/swap.h
src/core/unit-printf.c
src/core/unit-serialize.c
src/core/unit.c
src/core/unit.h
src/test/test-bpf-firewall.c

index 66773e182783be9e64e38b2c685f4554cee16f1b..3bac231c97502424246253b933c1e75f6f3e67cd 100644 (file)
@@ -196,19 +196,26 @@ static int bpf_firewall_compile_bpf(
         _cleanup_(bpf_program_freep) BPFProgram *p = NULL;
         int accounting_map_fd, r;
         bool access_enabled;
+        CGroupRuntime *crt;
 
         assert(u);
         assert(ret);
 
+        crt = unit_get_cgroup_runtime(u);
+        if (!crt) {
+                *ret = NULL;
+                return 0;
+        }
+
         accounting_map_fd = is_ingress ?
-                u->ip_accounting_ingress_map_fd :
-                u->ip_accounting_egress_map_fd;
+                crt->ip_accounting_ingress_map_fd :
+                crt->ip_accounting_egress_map_fd;
 
         access_enabled =
-                u->ipv4_allow_map_fd >= 0 ||
-                u->ipv6_allow_map_fd >= 0 ||
-                u->ipv4_deny_map_fd >= 0 ||
-                u->ipv6_deny_map_fd >= 0 ||
+                crt->ipv4_allow_map_fd >= 0 ||
+                crt->ipv6_allow_map_fd >= 0 ||
+                crt->ipv4_deny_map_fd >= 0 ||
+                crt->ipv6_deny_map_fd >= 0 ||
                 ip_allow_any ||
                 ip_deny_any;
 
@@ -234,26 +241,26 @@ static int bpf_firewall_compile_bpf(
                  * - Otherwise, access will be granted
                  */
 
-                if (u->ipv4_deny_map_fd >= 0) {
-                        r = add_lookup_instructions(p, u->ipv4_deny_map_fd, ETH_P_IP, is_ingress, ACCESS_DENIED);
+                if (crt->ipv4_deny_map_fd >= 0) {
+                        r = add_lookup_instructions(p, crt->ipv4_deny_map_fd, ETH_P_IP, is_ingress, ACCESS_DENIED);
                         if (r < 0)
                                 return r;
                 }
 
-                if (u->ipv6_deny_map_fd >= 0) {
-                        r = add_lookup_instructions(p, u->ipv6_deny_map_fd, ETH_P_IPV6, is_ingress, ACCESS_DENIED);
+                if (crt->ipv6_deny_map_fd >= 0) {
+                        r = add_lookup_instructions(p, crt->ipv6_deny_map_fd, ETH_P_IPV6, is_ingress, ACCESS_DENIED);
                         if (r < 0)
                                 return r;
                 }
 
-                if (u->ipv4_allow_map_fd >= 0) {
-                        r = add_lookup_instructions(p, u->ipv4_allow_map_fd, ETH_P_IP, is_ingress, ACCESS_ALLOWED);
+                if (crt->ipv4_allow_map_fd >= 0) {
+                        r = add_lookup_instructions(p, crt->ipv4_allow_map_fd, ETH_P_IP, is_ingress, ACCESS_ALLOWED);
                         if (r < 0)
                                 return r;
                 }
 
-                if (u->ipv6_allow_map_fd >= 0) {
-                        r = add_lookup_instructions(p, u->ipv6_allow_map_fd, ETH_P_IPV6, is_ingress, ACCESS_ALLOWED);
+                if (crt->ipv6_allow_map_fd >= 0) {
+                        r = add_lookup_instructions(p, crt->ipv6_allow_map_fd, ETH_P_IPV6, is_ingress, ACCESS_ALLOWED);
                         if (r < 0)
                                 return r;
                 }
@@ -495,37 +502,36 @@ static int bpf_firewall_prepare_access_maps(
         return 0;
 }
 
-static int bpf_firewall_prepare_accounting_maps(Unit *u, bool enabled, int *fd_ingress, int *fd_egress) {
+static int bpf_firewall_prepare_accounting_maps(Unit *u, bool enabled, CGroupRuntime *crt) {
         int r;
 
         assert(u);
-        assert(fd_ingress);
-        assert(fd_egress);
+        assert(crt);
 
         if (enabled) {
-                if (*fd_ingress < 0) {
+                if (crt->ip_accounting_ingress_map_fd < 0) {
                         char *name = strjoina("I_", u->id);
                         r = bpf_map_new(name, BPF_MAP_TYPE_ARRAY, sizeof(int), sizeof(uint64_t), 2, 0);
                         if (r < 0)
                                 return r;
 
-                        *fd_ingress = r;
+                        crt->ip_accounting_ingress_map_fd = r;
                 }
 
-                if (*fd_egress < 0) {
+                if (crt->ip_accounting_egress_map_fd < 0) {
                         char *name = strjoina("E_", u->id);
                         r = bpf_map_new(name, BPF_MAP_TYPE_ARRAY, sizeof(int), sizeof(uint64_t), 2, 0);
                         if (r < 0)
                                 return r;
 
-                        *fd_egress = r;
+                        crt->ip_accounting_egress_map_fd = r;
                 }
 
         } else {
-                *fd_ingress = safe_close(*fd_ingress);
-                *fd_egress = safe_close(*fd_egress);
+                crt->ip_accounting_ingress_map_fd = safe_close(crt->ip_accounting_ingress_map_fd);
+                crt->ip_accounting_egress_map_fd = safe_close(crt->ip_accounting_egress_map_fd);
 
-                zero(u->ip_accounting_extra);
+                zero(crt->ip_accounting_extra);
         }
 
         return 0;
@@ -535,6 +541,7 @@ int bpf_firewall_compile(Unit *u) {
         const char *ingress_name = NULL, *egress_name = NULL;
         bool ip_allow_any = false, ip_deny_any = false;
         CGroupContext *cc;
+        CGroupRuntime *crt;
         int r, supported;
 
         assert(u);
@@ -543,6 +550,10 @@ int bpf_firewall_compile(Unit *u) {
         if (!cc)
                 return -EINVAL;
 
+        crt = unit_setup_cgroup_runtime(u);
+        if (!crt)
+                return -ENOMEM;
+
         supported = bpf_firewall_supported();
         if (supported < 0)
                 return supported;
@@ -569,14 +580,14 @@ int bpf_firewall_compile(Unit *u) {
          * but we reuse the accounting maps. That way the firewall in effect always maps to the actual
          * configuration, but we don't flush out the accounting unnecessarily */
 
-        u->ip_bpf_ingress = bpf_program_free(u->ip_bpf_ingress);
-        u->ip_bpf_egress = bpf_program_free(u->ip_bpf_egress);
+        crt->ip_bpf_ingress = bpf_program_free(crt->ip_bpf_ingress);
+        crt->ip_bpf_egress = bpf_program_free(crt->ip_bpf_egress);
 
-        u->ipv4_allow_map_fd = safe_close(u->ipv4_allow_map_fd);
-        u->ipv4_deny_map_fd = safe_close(u->ipv4_deny_map_fd);
+        crt->ipv4_allow_map_fd = safe_close(crt->ipv4_allow_map_fd);
+        crt->ipv4_deny_map_fd = safe_close(crt->ipv4_deny_map_fd);
 
-        u->ipv6_allow_map_fd = safe_close(u->ipv6_allow_map_fd);
-        u->ipv6_deny_map_fd = safe_close(u->ipv6_deny_map_fd);
+        crt->ipv6_allow_map_fd = safe_close(crt->ipv6_allow_map_fd);
+        crt->ipv6_deny_map_fd = safe_close(crt->ipv6_deny_map_fd);
 
         if (u->type != UNIT_SLICE) {
                 /* In inner nodes we only do accounting, we do not actually bother with access control. However, leaf
@@ -585,24 +596,24 @@ int bpf_firewall_compile(Unit *u) {
                  * means that all configure IP access rules *will* take effect on processes, even though we never
                  * compile them for inner nodes. */
 
-                r = bpf_firewall_prepare_access_maps(u, ACCESS_ALLOWED, &u->ipv4_allow_map_fd, &u->ipv6_allow_map_fd, &ip_allow_any);
+                r = bpf_firewall_prepare_access_maps(u, ACCESS_ALLOWED, &crt->ipv4_allow_map_fd, &crt->ipv6_allow_map_fd, &ip_allow_any);
                 if (r < 0)
                         return log_unit_error_errno(u, r, "bpf-firewall: Preparation of BPF allow maps failed: %m");
 
-                r = bpf_firewall_prepare_access_maps(u, ACCESS_DENIED, &u->ipv4_deny_map_fd, &u->ipv6_deny_map_fd, &ip_deny_any);
+                r = bpf_firewall_prepare_access_maps(u, ACCESS_DENIED, &crt->ipv4_deny_map_fd, &crt->ipv6_deny_map_fd, &ip_deny_any);
                 if (r < 0)
                         return log_unit_error_errno(u, r, "bpf-firewall: Preparation of BPF deny maps failed: %m");
         }
 
-        r = bpf_firewall_prepare_accounting_maps(u, cc->ip_accounting, &u->ip_accounting_ingress_map_fd, &u->ip_accounting_egress_map_fd);
+        r = bpf_firewall_prepare_accounting_maps(u, cc->ip_accounting, crt);
         if (r < 0)
                 return log_unit_error_errno(u, r, "bpf-firewall: Preparation of BPF accounting maps failed: %m");
 
-        r = bpf_firewall_compile_bpf(u, ingress_name, true, &u->ip_bpf_ingress, ip_allow_any, ip_deny_any);
+        r = bpf_firewall_compile_bpf(u, ingress_name, true, &crt->ip_bpf_ingress, ip_allow_any, ip_deny_any);
         if (r < 0)
                 return log_unit_error_errno(u, r, "bpf-firewall: Compilation of ingress BPF program failed: %m");
 
-        r = bpf_firewall_compile_bpf(u, egress_name, false, &u->ip_bpf_egress, ip_allow_any, ip_deny_any);
+        r = bpf_firewall_compile_bpf(u, egress_name, false, &crt->ip_bpf_egress, ip_allow_any, ip_deny_any);
         if (r < 0)
                 return log_unit_error_errno(u, r, "bpf-firewall: Compilation of egress BPF program failed: %m");
 
@@ -634,6 +645,7 @@ static int load_bpf_progs_from_fs_to_set(Unit *u, char **filter_paths, Set **set
 
 int bpf_firewall_load_custom(Unit *u) {
         CGroupContext *cc;
+        CGroupRuntime *crt;
         int r, supported;
 
         assert(u);
@@ -641,6 +653,9 @@ int bpf_firewall_load_custom(Unit *u) {
         cc = unit_get_cgroup_context(u);
         if (!cc)
                 return 0;
+        crt = unit_get_cgroup_runtime(u);
+        if (!crt)
+                return 0;
 
         if (!(cc->ip_filters_ingress || cc->ip_filters_egress))
                 return 0;
@@ -653,10 +668,10 @@ int bpf_firewall_load_custom(Unit *u) {
                 return log_unit_debug_errno(u, SYNTHETIC_ERRNO(EOPNOTSUPP),
                                             "bpf-firewall: BPF_F_ALLOW_MULTI not supported, cannot attach custom BPF programs.");
 
-        r = load_bpf_progs_from_fs_to_set(u, cc->ip_filters_ingress, &u->ip_bpf_custom_ingress);
+        r = load_bpf_progs_from_fs_to_set(u, cc->ip_filters_ingress, &crt->ip_bpf_custom_ingress);
         if (r < 0)
                 return r;
-        r = load_bpf_progs_from_fs_to_set(u, cc->ip_filters_egress, &u->ip_bpf_custom_egress);
+        r = load_bpf_progs_from_fs_to_set(u, cc->ip_filters_egress, &crt->ip_bpf_custom_egress);
         if (r < 0)
                 return r;
 
@@ -686,6 +701,7 @@ int bpf_firewall_install(Unit *u) {
         _cleanup_(bpf_program_freep) BPFProgram *ip_bpf_ingress_uninstall = NULL, *ip_bpf_egress_uninstall = NULL;
         _cleanup_free_ char *path = NULL;
         CGroupContext *cc;
+        CGroupRuntime *crt;
         int r, supported;
         uint32_t flags;
 
@@ -694,9 +710,12 @@ int bpf_firewall_install(Unit *u) {
         cc = unit_get_cgroup_context(u);
         if (!cc)
                 return -EINVAL;
-        if (!u->cgroup_path)
+        crt = unit_get_cgroup_runtime(u);
+        if (!crt)
+                return -EINVAL;
+        if (!crt->cgroup_path)
                 return -EINVAL;
-        if (!u->cgroup_realized)
+        if (!crt->cgroup_realized)
                 return -EINVAL;
 
         supported = bpf_firewall_supported();
@@ -709,11 +728,11 @@ int bpf_firewall_install(Unit *u) {
                 return log_unit_debug_errno(u, SYNTHETIC_ERRNO(EOPNOTSUPP),
                                             "bpf-firewall: BPF_F_ALLOW_MULTI not supported, not doing BPF firewall on slice units.");
         if (supported != BPF_FIREWALL_SUPPORTED_WITH_MULTI &&
-            (!set_isempty(u->ip_bpf_custom_ingress) || !set_isempty(u->ip_bpf_custom_egress)))
+            (!set_isempty(crt->ip_bpf_custom_ingress) || !set_isempty(crt->ip_bpf_custom_egress)))
                 return log_unit_debug_errno(u, SYNTHETIC_ERRNO(EOPNOTSUPP),
                                             "bpf-firewall: BPF_F_ALLOW_MULTI not supported, cannot attach custom BPF programs.");
 
-        r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, NULL, &path);
+        r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, crt->cgroup_path, NULL, &path);
         if (r < 0)
                 return log_unit_error_errno(u, r, "bpf-firewall: Failed to determine cgroup path: %m");
 
@@ -724,44 +743,44 @@ int bpf_firewall_install(Unit *u) {
                  * after attaching the new programs, so that there's no time window where neither program is
                  * attached. (There will be a program where both are attached, but that's OK, since this is a
                  * security feature where we rather want to lock down too much than too little */
-                ip_bpf_egress_uninstall = TAKE_PTR(u->ip_bpf_egress_installed);
-                ip_bpf_ingress_uninstall = TAKE_PTR(u->ip_bpf_ingress_installed);
+                ip_bpf_egress_uninstall = TAKE_PTR(crt->ip_bpf_egress_installed);
+                ip_bpf_ingress_uninstall = TAKE_PTR(crt->ip_bpf_ingress_installed);
         } else {
                 /* If we don't have BPF_F_ALLOW_MULTI then unref the old BPF programs (which will implicitly
                  * detach them) right before attaching the new program, to minimize the time window when we
                  * don't account for IP traffic. */
-                u->ip_bpf_egress_installed = bpf_program_free(u->ip_bpf_egress_installed);
-                u->ip_bpf_ingress_installed = bpf_program_free(u->ip_bpf_ingress_installed);
+                crt->ip_bpf_egress_installed = bpf_program_free(crt->ip_bpf_egress_installed);
+                crt->ip_bpf_ingress_installed = bpf_program_free(crt->ip_bpf_ingress_installed);
         }
 
-        if (u->ip_bpf_egress) {
-                r = bpf_program_cgroup_attach(u->ip_bpf_egress, BPF_CGROUP_INET_EGRESS, path, flags);
+        if (crt->ip_bpf_egress) {
+                r = bpf_program_cgroup_attach(crt->ip_bpf_egress, BPF_CGROUP_INET_EGRESS, path, flags);
                 if (r < 0)
                         return log_unit_error_errno(u, r,
                                 "bpf-firewall: Attaching egress BPF program to cgroup %s failed: %m", path);
 
                 /* Remember that this BPF program is installed now. */
-                u->ip_bpf_egress_installed = TAKE_PTR(u->ip_bpf_egress);
+                crt->ip_bpf_egress_installed = TAKE_PTR(crt->ip_bpf_egress);
         }
 
-        if (u->ip_bpf_ingress) {
-                r = bpf_program_cgroup_attach(u->ip_bpf_ingress, BPF_CGROUP_INET_INGRESS, path, flags);
+        if (crt->ip_bpf_ingress) {
+                r = bpf_program_cgroup_attach(crt->ip_bpf_ingress, BPF_CGROUP_INET_INGRESS, path, flags);
                 if (r < 0)
                         return log_unit_error_errno(u, r,
                                 "bpf-firewall: Attaching ingress BPF program to cgroup %s failed: %m", path);
 
-                u->ip_bpf_ingress_installed = TAKE_PTR(u->ip_bpf_ingress);
+                crt->ip_bpf_ingress_installed = TAKE_PTR(crt->ip_bpf_ingress);
         }
 
         /* And now, definitely get rid of the old programs, and detach them */
         ip_bpf_egress_uninstall = bpf_program_free(ip_bpf_egress_uninstall);
         ip_bpf_ingress_uninstall = bpf_program_free(ip_bpf_ingress_uninstall);
 
-        r = attach_custom_bpf_progs(u, path, BPF_CGROUP_INET_EGRESS, &u->ip_bpf_custom_egress, &u->ip_bpf_custom_egress_installed);
+        r = attach_custom_bpf_progs(u, path, BPF_CGROUP_INET_EGRESS, &crt->ip_bpf_custom_egress, &crt->ip_bpf_custom_egress_installed);
         if (r < 0)
                 return r;
 
-        r = attach_custom_bpf_progs(u, path, BPF_CGROUP_INET_INGRESS, &u->ip_bpf_custom_ingress, &u->ip_bpf_custom_ingress_installed);
+        r = attach_custom_bpf_progs(u, path, BPF_CGROUP_INET_INGRESS, &crt->ip_bpf_custom_ingress, &crt->ip_bpf_custom_ingress_installed);
         if (r < 0)
                 return r;
 
@@ -954,21 +973,25 @@ void emit_bpf_firewall_warning(Unit *u) {
 void bpf_firewall_close(Unit *u) {
         assert(u);
 
-        u->ip_accounting_ingress_map_fd = safe_close(u->ip_accounting_ingress_map_fd);
-        u->ip_accounting_egress_map_fd = safe_close(u->ip_accounting_egress_map_fd);
+        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+        if (!crt)
+                return;
+
+        crt->ip_accounting_ingress_map_fd = safe_close(crt->ip_accounting_ingress_map_fd);
+        crt->ip_accounting_egress_map_fd = safe_close(crt->ip_accounting_egress_map_fd);
 
-        u->ipv4_allow_map_fd = safe_close(u->ipv4_allow_map_fd);
-        u->ipv6_allow_map_fd = safe_close(u->ipv6_allow_map_fd);
-        u->ipv4_deny_map_fd = safe_close(u->ipv4_deny_map_fd);
-        u->ipv6_deny_map_fd = safe_close(u->ipv6_deny_map_fd);
+        crt->ipv4_allow_map_fd = safe_close(crt->ipv4_allow_map_fd);
+        crt->ipv6_allow_map_fd = safe_close(crt->ipv6_allow_map_fd);
+        crt->ipv4_deny_map_fd = safe_close(crt->ipv4_deny_map_fd);
+        crt->ipv6_deny_map_fd = safe_close(crt->ipv6_deny_map_fd);
 
-        u->ip_bpf_ingress = bpf_program_free(u->ip_bpf_ingress);
-        u->ip_bpf_ingress_installed = bpf_program_free(u->ip_bpf_ingress_installed);
-        u->ip_bpf_egress = bpf_program_free(u->ip_bpf_egress);
-        u->ip_bpf_egress_installed = bpf_program_free(u->ip_bpf_egress_installed);
+        crt->ip_bpf_ingress = bpf_program_free(crt->ip_bpf_ingress);
+        crt->ip_bpf_ingress_installed = bpf_program_free(crt->ip_bpf_ingress_installed);
+        crt->ip_bpf_egress = bpf_program_free(crt->ip_bpf_egress);
+        crt->ip_bpf_egress_installed = bpf_program_free(crt->ip_bpf_egress_installed);
 
-        u->ip_bpf_custom_ingress = set_free(u->ip_bpf_custom_ingress);
-        u->ip_bpf_custom_egress = set_free(u->ip_bpf_custom_egress);
-        u->ip_bpf_custom_ingress_installed = set_free(u->ip_bpf_custom_ingress_installed);
-        u->ip_bpf_custom_egress_installed = set_free(u->ip_bpf_custom_egress_installed);
+        crt->ip_bpf_custom_ingress = set_free(crt->ip_bpf_custom_ingress);
+        crt->ip_bpf_custom_egress = set_free(crt->ip_bpf_custom_egress);
+        crt->ip_bpf_custom_ingress_installed = set_free(crt->ip_bpf_custom_ingress_installed);
+        crt->ip_bpf_custom_egress_installed = set_free(crt->ip_bpf_custom_egress_installed);
 }
index 909d34b5dff034f8002f16f6a76c5b7a9eb1e39a..851cc424a2db3f63538831e17e00fbd7990cd2b5 100644 (file)
@@ -81,6 +81,7 @@ static int bpf_foreign_prepare(
                 Unit *u,
                 enum bpf_attach_type attach_type,
                 const char *bpffs_path) {
+
         _cleanup_(bpf_program_freep) BPFProgram *prog = NULL;
         _cleanup_free_ BPFForeignKey *key = NULL;
         uint32_t prog_id;
@@ -101,6 +102,11 @@ static int bpf_foreign_prepare(
                 return log_unit_error_errno(u, SYNTHETIC_ERRNO(EINVAL),
                                 "bpf-foreign: Path in BPF filesystem is expected.");
 
+        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+        if (!crt)
+                return log_unit_error_errno(u, SYNTHETIC_ERRNO(EINVAL),
+                                            "Failed to get control group runtime object.");
+
         r = bpf_program_new_from_bpffs_path(bpffs_path, &prog);
         if (r < 0)
                 return log_unit_error_errno(u, r, "bpf-foreign: Failed to create foreign BPF program: %m");
@@ -114,7 +120,7 @@ static int bpf_foreign_prepare(
                 return log_unit_error_errno(u, r,
                                 "bpf-foreign: Failed to create foreign BPF program key from path '%s': %m", bpffs_path);
 
-        r = hashmap_ensure_put(&u->bpf_foreign_by_key, &bpf_foreign_by_key_hash_ops, key, prog);
+        r = hashmap_ensure_put(&crt->bpf_foreign_by_key, &bpf_foreign_by_key_hash_ops, key, prog);
         if (r == -EEXIST) {
                 log_unit_warning_errno(u, r, "bpf-foreign: Foreign BPF program already exists, ignoring: %m");
                 return 0;
@@ -131,6 +137,7 @@ static int bpf_foreign_prepare(
 int bpf_foreign_install(Unit *u) {
         _cleanup_free_ char *cgroup_path = NULL;
         CGroupContext *cc;
+        CGroupRuntime *crt;
         int r, ret = 0;
 
         assert(u);
@@ -139,7 +146,11 @@ int bpf_foreign_install(Unit *u) {
         if (!cc)
                 return 0;
 
-        r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, NULL, &cgroup_path);
+        crt = unit_get_cgroup_runtime(u);
+        if (!crt)
+                return 0;
+
+        r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, crt->cgroup_path, NULL, &cgroup_path);
         if (r < 0)
                 return log_unit_error_errno(u, r, "bpf-foreign: Failed to get cgroup path: %m");
 
@@ -149,6 +160,6 @@ int bpf_foreign_install(Unit *u) {
                         ret = r;
         }
 
-        r = attach_programs(u, cgroup_path, u->bpf_foreign_by_key, BPF_F_ALLOW_MULTI);
+        r = attach_programs(u, cgroup_path, crt->bpf_foreign_by_key, BPF_F_ALLOW_MULTI);
         return ret < 0 ? ret : r;
 }
index 15ef86d50fc9805a00af4f8b67a2115791cfeb01..0f698e1c25ed74ea3c4f674c3722c0338676e63d 100644 (file)
@@ -209,7 +209,9 @@ int bpf_restrict_fs_update(const Set *filesystems, uint64_t cgroup_id, int outer
         return 0;
 }
 
-int bpf_restrict_fs_cleanup(const Unit *u) {
+int bpf_restrict_fs_cleanup(Unit *u) {
+        CGroupRuntime *crt;
+
         assert(u);
         assert(u->manager);
 
@@ -220,14 +222,18 @@ int bpf_restrict_fs_cleanup(const Unit *u) {
         if (!u->manager->restrict_fs)
                 return 0;
 
-        if (u->cgroup_id == 0)
+        crt = unit_get_cgroup_runtime(u);
+        if (!crt)
+                return 0;
+
+        if (crt->cgroup_id == 0)
                 return 0;
 
         int fd = sym_bpf_map__fd(u->manager->restrict_fs->maps.cgroup_hash);
         if (fd < 0)
                 return log_unit_error_errno(u, errno, "bpf-restrict-fs: Failed to get BPF map fd: %m");
 
-        if (sym_bpf_map_delete_elem(fd, &u->cgroup_id) != 0 && errno != ENOENT)
+        if (sym_bpf_map_delete_elem(fd, &crt->cgroup_id) != 0 && errno != ENOENT)
                 return log_unit_debug_errno(u, errno, "bpf-restrict-fs: Failed to delete cgroup entry from LSM BPF map: %m");
 
         return 0;
@@ -259,7 +265,7 @@ int bpf_restrict_fs_update(const Set *filesystems, uint64_t cgroup_id, int outer
         return log_debug_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "bpf-restrict-fs: Failed to restrict filesystems using LSM BPF: %m");
 }
 
-int bpf_restrict_fs_cleanup(const Unit *u) {
+int bpf_restrict_fs_cleanup(Unit *u) {
         return 0;
 }
 
index ffb360b117820de45a7ad594c6932b16ab1a9592..8da12de7463b52f4dceb686417649b60fc7dac2e 100644 (file)
@@ -17,7 +17,7 @@ typedef struct restrict_fs_bpf restrict_fs_bpf;
 bool bpf_restrict_fs_supported(bool initialize);
 int bpf_restrict_fs_setup(Manager *m);
 int bpf_restrict_fs_update(const Set *filesystems, uint64_t cgroup_id, int outer_map_fd, bool allow_list);
-int bpf_restrict_fs_cleanup(const Unit *u);
+int bpf_restrict_fs_cleanup(Unit *u);
 int bpf_restrict_fs_map_fd(Unit *u);
 void bpf_restrict_fs_destroy(struct restrict_fs_bpf *prog);
 int bpf_restrict_fs_parse_filesystem(const char *name, Set **filesystems, FilesystemParseFlags flags, const char *unit, const char *filename, unsigned line);
index 9cfe49a31802c392c2cb519298061127839cd9a0..1156e6af79e45aa06aae6de284e56e52166ea8aa 100644 (file)
@@ -103,13 +103,18 @@ static int restrict_ifaces_install_impl(Unit *u) {
         _cleanup_free_ char *cgroup_path = NULL;
         _cleanup_close_ int cgroup_fd = -EBADF;
         CGroupContext *cc;
+        CGroupRuntime *crt;
         int r;
 
         cc = unit_get_cgroup_context(u);
         if (!cc)
                 return 0;
 
-        r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, NULL, &cgroup_path);
+        crt = unit_get_cgroup_runtime(u);
+        if (!crt)
+                return 0;
+
+        r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, crt->cgroup_path, NULL, &cgroup_path);
         if (r < 0)
                 return log_unit_error_errno(u, r, "restrict-interfaces: Failed to get cgroup path: %m");
 
@@ -137,30 +142,42 @@ static int restrict_ifaces_install_impl(Unit *u) {
         if (r != 0)
                 return log_unit_error_errno(u, r, "restrict-interfaces: Failed to create egress cgroup link: %m");
 
-        u->restrict_ifaces_ingress_bpf_link = TAKE_PTR(ingress_link);
-        u->restrict_ifaces_egress_bpf_link = TAKE_PTR(egress_link);
+        crt->restrict_ifaces_ingress_bpf_link = TAKE_PTR(ingress_link);
+        crt->restrict_ifaces_egress_bpf_link = TAKE_PTR(egress_link);
 
         return 0;
 }
 
 int bpf_restrict_ifaces_install(Unit *u) {
+        CGroupRuntime *crt;
         int r;
 
+        assert(u);
+
+        crt = unit_get_cgroup_runtime(u);
+        if (!crt)
+                return 0;
+
         r = restrict_ifaces_install_impl(u);
-        fdset_close(u->initial_restric_ifaces_link_fds);
+        fdset_close(crt->initial_restric_ifaces_link_fds);
         return r;
 }
 
 int bpf_restrict_ifaces_serialize(Unit *u, FILE *f, FDSet *fds) {
+        CGroupRuntime *crt;
         int r;
 
         assert(u);
 
-        r = bpf_serialize_link(f, fds, "restrict-ifaces-bpf-fd", u->restrict_ifaces_ingress_bpf_link);
+        crt = unit_get_cgroup_runtime(u);
+        if (!crt)
+                return 0;
+
+        r = bpf_serialize_link(f, fds, "restrict-ifaces-bpf-fd", crt->restrict_ifaces_ingress_bpf_link);
         if (r < 0)
                 return r;
 
-        return bpf_serialize_link(f, fds, "restrict-ifaces-bpf-fd", u->restrict_ifaces_egress_bpf_link);
+        return bpf_serialize_link(f, fds, "restrict-ifaces-bpf-fd", crt->restrict_ifaces_egress_bpf_link);
 }
 
 int bpf_restrict_ifaces_add_initial_link_fd(Unit *u, int fd) {
@@ -168,13 +185,17 @@ int bpf_restrict_ifaces_add_initial_link_fd(Unit *u, int fd) {
 
         assert(u);
 
-        if (!u->initial_restric_ifaces_link_fds) {
-                u->initial_restric_ifaces_link_fds = fdset_new();
-                if (!u->initial_restric_ifaces_link_fds)
+        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+        if (!crt)
+                return -EINVAL;
+
+        if (!crt->initial_restric_ifaces_link_fds) {
+                crt->initial_restric_ifaces_link_fds = fdset_new();
+                if (!crt->initial_restric_ifaces_link_fds)
                         return log_oom();
         }
 
-        r = fdset_put(u->initial_restric_ifaces_link_fds, fd);
+        r = fdset_put(crt->initial_restric_ifaces_link_fds, fd);
         if (r < 0)
                 return log_unit_error_errno(u, r,
                         "restrict-interfaces: Failed to put restrict-ifaces-bpf-fd %d to restored fdset: %m", fd);
index b1b1a6770ef437489a9371e66f525757ce697076..465216a7d0dfd29708ffe42b5d4d1f481fa6ac15 100644 (file)
@@ -139,13 +139,18 @@ int bpf_socket_bind_add_initial_link_fd(Unit *u, int fd) {
 
         assert(u);
 
-        if (!u->initial_socket_bind_link_fds) {
-                u->initial_socket_bind_link_fds = fdset_new();
-                if (!u->initial_socket_bind_link_fds)
+        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+        if (!crt)
+                return log_unit_error_errno(u, SYNTHETIC_ERRNO(EINVAL),
+                                            "Failed to get control group runtime object.");
+
+        if (!crt->initial_socket_bind_link_fds) {
+                crt->initial_socket_bind_link_fds = fdset_new();
+                if (!crt->initial_socket_bind_link_fds)
                         return log_oom();
         }
 
-        r = fdset_put(u->initial_socket_bind_link_fds, fd);
+        r = fdset_put(crt->initial_socket_bind_link_fds, fd);
         if (r < 0)
                 return log_unit_error_errno(u, r, "bpf-socket-bind: Failed to put BPF fd %d to initial fdset", fd);
 
@@ -158,6 +163,7 @@ static int socket_bind_install_impl(Unit *u) {
         _cleanup_free_ char *cgroup_path = NULL;
         _cleanup_close_ int cgroup_fd = -EBADF;
         CGroupContext *cc;
+        CGroupRuntime *crt;
         int r;
 
         assert(u);
@@ -166,7 +172,11 @@ static int socket_bind_install_impl(Unit *u) {
         if (!cc)
                 return 0;
 
-        r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, NULL, &cgroup_path);
+        crt = unit_get_cgroup_runtime(u);
+        if (!crt)
+                return 0;
+
+        r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, crt->cgroup_path, NULL, &cgroup_path);
         if (r < 0)
                 return log_unit_error_errno(u, r, "bpf-socket-bind: Failed to get cgroup path: %m");
 
@@ -193,35 +203,42 @@ static int socket_bind_install_impl(Unit *u) {
                 return log_unit_error_errno(u, r, "bpf-socket-bind: Failed to link '%s' cgroup-bpf program: %m",
                                             sym_bpf_program__name(obj->progs.sd_bind6));
 
-        u->ipv4_socket_bind_link = TAKE_PTR(ipv4);
-        u->ipv6_socket_bind_link = TAKE_PTR(ipv6);
+        crt->ipv4_socket_bind_link = TAKE_PTR(ipv4);
+        crt->ipv6_socket_bind_link = TAKE_PTR(ipv6);
 
         return 0;
 }
 
 int bpf_socket_bind_install(Unit *u) {
+        CGroupRuntime *crt;
         int r;
 
         assert(u);
 
-        r = socket_bind_install_impl(u);
-        if (r == -ENOMEM)
-                return r;
+        crt = unit_get_cgroup_runtime(u);
+        if (!crt)
+                return 0;
 
-        fdset_close(u->initial_socket_bind_link_fds);
+        r = socket_bind_install_impl(u);
+        fdset_close(crt->initial_socket_bind_link_fds);
         return r;
 }
 
 int bpf_socket_bind_serialize(Unit *u, FILE *f, FDSet *fds) {
+        CGroupRuntime *crt;
         int r;
 
         assert(u);
 
-        r = bpf_serialize_link(f, fds, "ipv4-socket-bind-bpf-link", u->ipv4_socket_bind_link);
+        crt = unit_get_cgroup_runtime(u);
+        if (!crt)
+                return 0;
+
+        r = bpf_serialize_link(f, fds, "ipv4-socket-bind-bpf-link", crt->ipv4_socket_bind_link);
         if (r < 0)
                 return r;
 
-        return bpf_serialize_link(f, fds, "ipv6-socket-bind-bpf-link", u->ipv6_socket_bind_link);
+        return bpf_serialize_link(f, fds, "ipv6-socket-bind-bpf-link", crt->ipv6_socket_bind_link);
 }
 
 #else /* ! BPF_FRAMEWORK */
index f79047a37900090dc7bc6d8e5f0443dbfb11cd22..1a079a37b3368fbda447f6648df5f40d27df8756 100644 (file)
@@ -34,6 +34,7 @@
 #include "process-util.h"
 #include "procfs-util.h"
 #include "set.h"
+#include "serialize.h"
 #include "special.h"
 #include "stdio-util.h"
 #include "string-table.h"
@@ -116,10 +117,16 @@ bool unit_has_host_root_cgroup(Unit *u) {
 static int set_attribute_and_warn(Unit *u, const char *controller, const char *attribute, const char *value) {
         int r;
 
-        r = cg_set_attribute(controller, u->cgroup_path, attribute, value);
+        assert(u);
+
+        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+        if (!crt || !crt->cgroup_path)
+                return -EOWNERDEAD;
+
+        r = cg_set_attribute(controller, crt->cgroup_path, attribute, value);
         if (r < 0)
                 log_unit_full_errno(u, LOG_LEVEL_CGROUP_WRITE(r), r, "Failed to set '%s' attribute on '%s' to '%.*s': %m",
-                                    strna(attribute), empty_to_root(u->cgroup_path), (int) strcspn(value, NEWLINE), value);
+                                    strna(attribute), empty_to_root(crt->cgroup_path), (int) strcspn(value, NEWLINE), value);
 
         return r;
 }
@@ -619,10 +626,11 @@ void cgroup_context_done(CGroupContext *c) {
 static int unit_get_kernel_memory_limit(Unit *u, const char *file, uint64_t *ret) {
         assert(u);
 
-        if (!u->cgroup_realized)
+        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+        if (!crt || !crt->cgroup_path)
                 return -EOWNERDEAD;
 
-        return cg_get_attribute_as_uint64("memory", u->cgroup_path, file, ret);
+        return cg_get_attribute_as_uint64("memory", crt->cgroup_path, file, ret);
 }
 
 static int unit_compare_memory_limit(Unit *u, const char *property_name, uint64_t *ret_unit_value, uint64_t *ret_kernel_value) {
@@ -1172,12 +1180,13 @@ static void unit_set_xattr_graceful(Unit *u, const char *name, const void *data,
         assert(u);
         assert(name);
 
-        if (!u->cgroup_path)
+        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+        if (!crt || !crt->cgroup_path)
                 return;
 
-        r = cg_set_xattr(u->cgroup_path, name, data, size, 0);
+        r = cg_set_xattr(crt->cgroup_path, name, data, size, 0);
         if (r < 0)
-                log_unit_debug_errno(u, r, "Failed to set '%s' xattr on control group %s, ignoring: %m", name, empty_to_root(u->cgroup_path));
+                log_unit_debug_errno(u, r, "Failed to set '%s' xattr on control group %s, ignoring: %m", name, empty_to_root(crt->cgroup_path));
 }
 
 static void unit_remove_xattr_graceful(Unit *u, const char *name) {
@@ -1186,12 +1195,13 @@ static void unit_remove_xattr_graceful(Unit *u, const char *name) {
         assert(u);
         assert(name);
 
-        if (!u->cgroup_path)
+        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+        if (!crt || !crt->cgroup_path)
                 return;
 
-        r = cg_remove_xattr(u->cgroup_path, name);
+        r = cg_remove_xattr(crt->cgroup_path, name);
         if (r < 0 && !ERRNO_IS_XATTR_ABSENT(r))
-                log_unit_debug_errno(u, r, "Failed to remove '%s' xattr flag on control group %s, ignoring: %m", name, empty_to_root(u->cgroup_path));
+                log_unit_debug_errno(u, r, "Failed to remove '%s' xattr flag on control group %s, ignoring: %m", name, empty_to_root(crt->cgroup_path));
 }
 
 static void cgroup_oomd_xattr_apply(Unit *u) {
@@ -1318,9 +1328,13 @@ static void cgroup_survive_xattr_apply(Unit *u) {
 
         assert(u);
 
+        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+        if (!crt)
+                return;
+
         if (u->survive_final_kill_signal) {
                 r = cg_set_xattr(
-                                u->cgroup_path,
+                                crt->cgroup_path,
                                 "user.survive_final_kill_signal",
                                 "1",
                                 1,
@@ -1328,7 +1342,7 @@ static void cgroup_survive_xattr_apply(Unit *u) {
                 /* user xattr support was added in kernel v5.7 */
                 if (ERRNO_IS_NEG_NOT_SUPPORTED(r))
                         r = cg_set_xattr(
-                                        u->cgroup_path,
+                                        crt->cgroup_path,
                                         "trusted.survive_final_kill_signal",
                                         "1",
                                         1,
@@ -1338,7 +1352,7 @@ static void cgroup_survive_xattr_apply(Unit *u) {
                                              r,
                                              "Failed to set 'survive_final_kill_signal' xattr on control "
                                              "group %s, ignoring: %m",
-                                             empty_to_root(u->cgroup_path));
+                                             empty_to_root(crt->cgroup_path));
         } else {
                 unit_remove_xattr_graceful(u, "user.survive_final_kill_signal");
                 unit_remove_xattr_graceful(u, "trusted.survive_final_kill_signal");
@@ -1475,6 +1489,12 @@ usec_t cgroup_cpu_adjust_period(usec_t period, usec_t quota, usec_t resolution,
 static usec_t cgroup_cpu_adjust_period_and_log(Unit *u, usec_t period, usec_t quota) {
         usec_t new_period;
 
+        assert(u);
+
+        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+        if (!crt)
+                return USEC_INFINITY;
+
         if (quota == USEC_INFINITY)
                 /* Always use default period for infinity quota. */
                 return CGROUP_CPU_QUOTA_DEFAULT_PERIOD_USEC;
@@ -1487,10 +1507,10 @@ static usec_t cgroup_cpu_adjust_period_and_log(Unit *u, usec_t period, usec_t qu
         new_period = cgroup_cpu_adjust_period(period, quota, USEC_PER_MSEC, USEC_PER_SEC);
 
         if (new_period != period) {
-                log_unit_full(u, u->warned_clamping_cpu_quota_period ? LOG_DEBUG : LOG_WARNING,
+                log_unit_full(u, crt->warned_clamping_cpu_quota_period ? LOG_DEBUG : LOG_WARNING,
                               "Clamping CPU interval for cpu.max: period is now %s",
                               FORMAT_TIMESPAN(new_period, 1));
-                u->warned_clamping_cpu_quota_period = true;
+                crt->warned_clamping_cpu_quota_period = true;
         }
 
         return new_period;
@@ -1510,17 +1530,25 @@ static void cgroup_apply_unified_cpu_idle(Unit *u, uint64_t weight) {
         bool is_idle;
         const char *idle_val;
 
+        assert(u);
+
+        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+        if (!crt || !crt->cgroup_path)
+                return;
+
         is_idle = weight == CGROUP_WEIGHT_IDLE;
         idle_val = one_zero(is_idle);
-        r = cg_set_attribute("cpu", u->cgroup_path, "cpu.idle", idle_val);
+        r = cg_set_attribute("cpu", crt->cgroup_path, "cpu.idle", idle_val);
         if (r < 0 && (r != -ENOENT || is_idle))
                 log_unit_full_errno(u, LOG_LEVEL_CGROUP_WRITE(r), r, "Failed to set '%s' attribute on '%s' to '%s': %m",
-                                    "cpu.idle", empty_to_root(u->cgroup_path), idle_val);
+                                    "cpu.idle", empty_to_root(crt->cgroup_path), idle_val);
 }
 
 static void cgroup_apply_unified_cpu_quota(Unit *u, usec_t quota, usec_t period) {
         char buf[(DECIMAL_STR_MAX(usec_t) + 1) * 2 + 1];
 
+        assert(u);
+
         period = cgroup_cpu_adjust_period_and_log(u, period, quota);
         if (quota != USEC_INFINITY)
                 xsprintf(buf, USEC_FMT " " USEC_FMT "\n",
@@ -1636,6 +1664,12 @@ static int set_bfq_weight(Unit *u, const char *controller, dev_t dev, uint64_t i
         uint64_t bfq_weight;
         int r;
 
+        assert(u);
+
+        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+        if (!crt || !crt->cgroup_path)
+                return -EOWNERDEAD;
+
         /* FIXME: drop this function when distro kernels properly support BFQ through "io.weight"
          * See also: https://github.com/systemd/systemd/pull/13335 and
          * https://github.com/torvalds/linux/commit/65752aef0a407e1ef17ec78a7fc31ba4e0b360f9. */
@@ -1648,7 +1682,7 @@ static int set_bfq_weight(Unit *u, const char *controller, dev_t dev, uint64_t i
         else
                 xsprintf(buf, "%" PRIu64 "\n", bfq_weight);
 
-        r = cg_set_attribute(controller, u->cgroup_path, p, buf);
+        r = cg_set_attribute(controller, crt->cgroup_path, p, buf);
 
         /* FIXME: drop this when kernels prior
          * 795fe54c2a82 ("bfq: Add per-device weight") v5.4
@@ -1672,13 +1706,19 @@ static void cgroup_apply_io_device_weight(Unit *u, const char *dev_path, uint64_
         dev_t dev;
         int r, r1, r2;
 
+        assert(u);
+
+        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+        if (!crt || !crt->cgroup_path)
+                return;
+
         if (lookup_block_device(dev_path, &dev) < 0)
                 return;
 
         r1 = set_bfq_weight(u, "io", dev, io_weight);
 
         xsprintf(buf, DEVNUM_FORMAT_STR " %" PRIu64 "\n", DEVNUM_FORMAT_VAL(dev), io_weight);
-        r2 = cg_set_attribute("io", u->cgroup_path, "io.weight", buf);
+        r2 = cg_set_attribute("io", crt->cgroup_path, "io.weight", buf);
 
         /* Look at the configured device, when both fail, prefer io.weight errno. */
         r = r2 == -EOPNOTSUPP ? r1 : r2;
@@ -1686,7 +1726,7 @@ static void cgroup_apply_io_device_weight(Unit *u, const char *dev_path, uint64_
         if (r < 0)
                 log_unit_full_errno(u, LOG_LEVEL_CGROUP_WRITE(r),
                                     r, "Failed to set 'io[.bfq].weight' attribute on '%s' to '%.*s': %m",
-                                    empty_to_root(u->cgroup_path), (int) strcspn(buf, NEWLINE), buf);
+                                    empty_to_root(crt->cgroup_path), (int) strcspn(buf, NEWLINE), buf);
 }
 
 static void cgroup_apply_blkio_device_weight(Unit *u, const char *dev_path, uint64_t blkio_weight) {
@@ -1803,7 +1843,8 @@ void unit_modify_nft_set(Unit *u, bool add) {
         if (cg_all_unified() <= 0)
                 return;
 
-        if (u->cgroup_id == 0)
+        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+        if (!crt || crt->cgroup_id == 0)
                 return;
 
         if (!u->manager->fw_ctx) {
@@ -1820,15 +1861,15 @@ void unit_modify_nft_set(Unit *u, bool add) {
                 if (nft_set->source != NFT_SET_SOURCE_CGROUP)
                         continue;
 
-                uint64_t element = u->cgroup_id;
+                uint64_t element = crt->cgroup_id;
 
                 r = nft_set_element_modify_any(u->manager->fw_ctx, add, nft_set->nfproto, nft_set->table, nft_set->set, &element, sizeof(element));
                 if (r < 0)
                         log_warning_errno(r, "Failed to %s NFT set: family %s, table %s, set %s, cgroup %" PRIu64 ", ignoring: %m",
-                                          add? "add" : "delete", nfproto_to_string(nft_set->nfproto), nft_set->table, nft_set->set, u->cgroup_id);
+                                          add? "add" : "delete", nfproto_to_string(nft_set->nfproto), nft_set->table, nft_set->set, crt->cgroup_id);
                 else
                         log_debug("%s NFT set: family %s, table %s, set %s, cgroup %" PRIu64,
-                                  add? "Added" : "Deleted", nfproto_to_string(nft_set->nfproto), nft_set->table, nft_set->set, u->cgroup_id);
+                                  add? "Added" : "Deleted", nfproto_to_string(nft_set->nfproto), nft_set->table, nft_set->set, crt->cgroup_id);
         }
 }
 
@@ -1846,13 +1887,15 @@ static void cgroup_apply_restrict_network_interfaces(Unit *u) {
 
 static int cgroup_apply_devices(Unit *u) {
         _cleanup_(bpf_program_freep) BPFProgram *prog = NULL;
-        const char *path;
         CGroupContext *c;
         CGroupDevicePolicy policy;
         int r;
 
         assert_se(c = unit_get_cgroup_context(u));
-        assert_se(path = u->cgroup_path);
+
+        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+        if (!crt || !crt->cgroup_path)
+                return -EOWNERDEAD;
 
         policy = c->device_policy;
 
@@ -1866,9 +1909,9 @@ static int cgroup_apply_devices(Unit *u) {
                  * EINVAL here. */
 
                 if (c->device_allow || policy != CGROUP_DEVICE_POLICY_AUTO)
-                        r = cg_set_attribute("devices", path, "devices.deny", "a");
+                        r = cg_set_attribute("devices", crt->cgroup_path, "devices.deny", "a");
                 else
-                        r = cg_set_attribute("devices", path, "devices.allow", "a");
+                        r = cg_set_attribute("devices", crt->cgroup_path, "devices.allow", "a");
                 if (r < 0)
                         log_unit_full_errno(u, IN_SET(r, -ENOENT, -EROFS, -EINVAL, -EACCES, -EPERM) ? LOG_DEBUG : LOG_WARNING, r,
                                             "Failed to reset devices.allow/devices.deny: %m");
@@ -1879,7 +1922,7 @@ static int cgroup_apply_devices(Unit *u) {
 
         bool any = false;
         if (allow_list_static) {
-                r = bpf_devices_allow_list_static(prog, path);
+                r = bpf_devices_allow_list_static(prog, crt->cgroup_path);
                 if (r > 0)
                         any = true;
         }
@@ -1891,11 +1934,11 @@ static int cgroup_apply_devices(Unit *u) {
                         continue;
 
                 if (path_startswith(a->path, "/dev/"))
-                        r = bpf_devices_allow_list_device(prog, path, a->path, a->permissions);
+                        r = bpf_devices_allow_list_device(prog, crt->cgroup_path, a->path, a->permissions);
                 else if ((val = startswith(a->path, "block-")))
-                        r = bpf_devices_allow_list_major(prog, path, val, 'b', a->permissions);
+                        r = bpf_devices_allow_list_major(prog, crt->cgroup_path, val, 'b', a->permissions);
                 else if ((val = startswith(a->path, "char-")))
-                        r = bpf_devices_allow_list_major(prog, path, val, 'c', a->permissions);
+                        r = bpf_devices_allow_list_major(prog, crt->cgroup_path, val, 'c', a->permissions);
                 else {
                         log_unit_debug(u, "Ignoring device '%s' while writing cgroup attribute.", a->path);
                         continue;
@@ -1914,7 +1957,7 @@ static int cgroup_apply_devices(Unit *u) {
                 policy = CGROUP_DEVICE_POLICY_STRICT;
         }
 
-        r = bpf_devices_apply_policy(&prog, policy, any, path, &u->bpf_device_control_installed);
+        r = bpf_devices_apply_policy(&prog, policy, any, crt->cgroup_path, &crt->bpf_device_control_installed);
         if (r < 0) {
                 static bool warned = false;
 
@@ -1961,9 +2004,9 @@ static void cgroup_context_apply(
                 CGroupMask apply_mask,
                 ManagerState state) {
 
+        bool is_host_root, is_local_root;
         const char *path;
         CGroupContext *c;
-        bool is_host_root, is_local_root;
         int r;
 
         assert(u);
@@ -1978,7 +2021,12 @@ static void cgroup_context_apply(
         is_host_root = unit_has_host_root_cgroup(u);
 
         assert_se(c = unit_get_cgroup_context(u));
-        assert_se(path = u->cgroup_path);
+
+        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+        if (!crt || !crt->cgroup_path)
+                return;
+
+        path = crt->cgroup_path;
 
         if (is_local_root) /* Make sure we don't try to display messages with an empty path. */
                 path = "/";
@@ -2446,20 +2494,24 @@ CGroupMask unit_get_members_mask(Unit *u) {
 
         /* Returns the mask of controllers all of the unit's children require, merged */
 
-        if (u->cgroup_members_mask_valid)
-                return u->cgroup_members_mask; /* Use cached value if possible */
-
-        u->cgroup_members_mask = 0;
+        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+        if (crt && crt->cgroup_members_mask_valid)
+                return crt->cgroup_members_mask; /* Use cached value if possible */
 
+        CGroupMask m = 0;
         if (u->type == UNIT_SLICE) {
                 Unit *member;
 
                 UNIT_FOREACH_DEPENDENCY(member, u, UNIT_ATOM_SLICE_OF)
-                        u->cgroup_members_mask |= unit_get_subtree_mask(member); /* note that this calls ourselves again, for the children */
+                        m |= unit_get_subtree_mask(member); /* note that this calls ourselves again, for the children */
         }
 
-        u->cgroup_members_mask_valid = true;
-        return u->cgroup_members_mask;
+        if (crt) {
+                crt->cgroup_members_mask = m;
+                crt->cgroup_members_mask_valid = true;
+        }
+
+        return m;
 }
 
 CGroupMask unit_get_siblings_mask(Unit *u) {
@@ -2545,8 +2597,12 @@ void unit_invalidate_cgroup_members_masks(Unit *u) {
 
         assert(u);
 
+        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+        if (!crt)
+                return;
+
         /* Recurse invalidate the member masks cache all the way up the tree */
-        u->cgroup_members_mask_valid = false;
+        crt->cgroup_members_mask_valid = false;
 
         slice = UNIT_GET_SLICE(u);
         if (slice)
@@ -2558,11 +2614,12 @@ const char *unit_get_realized_cgroup_path(Unit *u, CGroupMask mask) {
         /* Returns the realized cgroup path of the specified unit where all specified controllers are available. */
 
         while (u) {
-
-                if (u->cgroup_path &&
-                    u->cgroup_realized &&
-                    FLAGS_SET(u->cgroup_realized_mask, mask))
-                        return u->cgroup_path;
+                CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+                if (crt &&
+                    crt->cgroup_path &&
+                    crt->cgroup_realized &&
+                    FLAGS_SET(crt->cgroup_realized_mask, mask))
+                        return crt->cgroup_path;
 
                 u = UNIT_GET_SLICE(u);
         }
@@ -2612,27 +2669,34 @@ int unit_default_cgroup_path(const Unit *u, char **ret) {
 
 int unit_set_cgroup_path(Unit *u, const char *path) {
         _cleanup_free_ char *p = NULL;
+        CGroupRuntime *crt;
         int r;
 
         assert(u);
 
-        if (streq_ptr(u->cgroup_path, path))
+        crt = unit_get_cgroup_runtime(u);
+
+        if (crt && streq_ptr(crt->cgroup_path, path))
                 return 0;
 
+        unit_release_cgroup(u);
+
+        crt = unit_setup_cgroup_runtime(u);
+        if (!crt)
+                return -ENOMEM;
+
         if (path) {
                 p = strdup(path);
                 if (!p)
                         return -ENOMEM;
-        }
 
-        if (p) {
                 r = hashmap_put(u->manager->cgroup_unit, p, u);
                 if (r < 0)
                         return r;
         }
 
-        unit_release_cgroup(u);
-        u->cgroup_path = TAKE_PTR(p);
+        assert(!crt->cgroup_path);
+        crt->cgroup_path = TAKE_PTR(p);
 
         return 1;
 }
@@ -2646,10 +2710,11 @@ int unit_watch_cgroup(Unit *u) {
         /* Watches the "cgroups.events" attribute of this unit's cgroup for "empty" events, but only if
          * cgroupv2 is available. */
 
-        if (!u->cgroup_path)
+        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+        if (!crt || !crt->cgroup_path)
                 return 0;
 
-        if (u->cgroup_control_inotify_wd >= 0)
+        if (crt->cgroup_control_inotify_wd >= 0)
                 return 0;
 
         /* Only applies to the unified hierarchy */
@@ -2667,30 +2732,29 @@ int unit_watch_cgroup(Unit *u) {
         if (r < 0)
                 return log_oom();
 
-        r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, "cgroup.events", &events);
+        r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, crt->cgroup_path, "cgroup.events", &events);
         if (r < 0)
                 return log_oom();
 
-        u->cgroup_control_inotify_wd = inotify_add_watch(u->manager->cgroup_inotify_fd, events, IN_MODIFY);
-        if (u->cgroup_control_inotify_wd < 0) {
+        crt->cgroup_control_inotify_wd = inotify_add_watch(u->manager->cgroup_inotify_fd, events, IN_MODIFY);
+        if (crt->cgroup_control_inotify_wd < 0) {
 
                 if (errno == ENOENT) /* If the directory is already gone we don't need to track it, so this
                                       * is not an error */
                         return 0;
 
-                return log_unit_error_errno(u, errno, "Failed to add control inotify watch descriptor for control group %s: %m", empty_to_root(u->cgroup_path));
+                return log_unit_error_errno(u, errno, "Failed to add control inotify watch descriptor for control group %s: %m", empty_to_root(crt->cgroup_path));
         }
 
-        r = hashmap_put(u->manager->cgroup_control_inotify_wd_unit, INT_TO_PTR(u->cgroup_control_inotify_wd), u);
+        r = hashmap_put(u->manager->cgroup_control_inotify_wd_unit, INT_TO_PTR(crt->cgroup_control_inotify_wd), u);
         if (r < 0)
-                return log_unit_error_errno(u, r, "Failed to add control inotify watch descriptor for control group %s to hash map: %m", empty_to_root(u->cgroup_path));
+                return log_unit_error_errno(u, r, "Failed to add control inotify watch descriptor for control group %s to hash map: %m", empty_to_root(crt->cgroup_path));
 
         return 0;
 }
 
 int unit_watch_cgroup_memory(Unit *u) {
         _cleanup_free_ char *events = NULL;
-        CGroupContext *c;
         int r;
 
         assert(u);
@@ -2698,10 +2762,11 @@ int unit_watch_cgroup_memory(Unit *u) {
         /* Watches the "memory.events" attribute of this unit's cgroup for "oom_kill" events, but only if
          * cgroupv2 is available. */
 
-        if (!u->cgroup_path)
+        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+        if (!crt || !crt->cgroup_path)
                 return 0;
 
-        c = unit_get_cgroup_context(u);
+        CGroupContext *c = unit_get_cgroup_context(u);
         if (!c)
                 return 0;
 
@@ -2716,7 +2781,7 @@ int unit_watch_cgroup_memory(Unit *u) {
         if (u->type == UNIT_SLICE)
                 return 0;
 
-        if (u->cgroup_memory_inotify_wd >= 0)
+        if (crt->cgroup_memory_inotify_wd >= 0)
                 return 0;
 
         /* Only applies to the unified hierarchy */
@@ -2730,23 +2795,23 @@ int unit_watch_cgroup_memory(Unit *u) {
         if (r < 0)
                 return log_oom();
 
-        r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, "memory.events", &events);
+        r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, crt->cgroup_path, "memory.events", &events);
         if (r < 0)
                 return log_oom();
 
-        u->cgroup_memory_inotify_wd = inotify_add_watch(u->manager->cgroup_inotify_fd, events, IN_MODIFY);
-        if (u->cgroup_memory_inotify_wd < 0) {
+        crt->cgroup_memory_inotify_wd = inotify_add_watch(u->manager->cgroup_inotify_fd, events, IN_MODIFY);
+        if (crt->cgroup_memory_inotify_wd < 0) {
 
                 if (errno == ENOENT) /* If the directory is already gone we don't need to track it, so this
                                       * is not an error */
                         return 0;
 
-                return log_unit_error_errno(u, errno, "Failed to add memory inotify watch descriptor for control group %s: %m", empty_to_root(u->cgroup_path));
+                return log_unit_error_errno(u, errno, "Failed to add memory inotify watch descriptor for control group %s: %m", empty_to_root(crt->cgroup_path));
         }
 
-        r = hashmap_put(u->manager->cgroup_memory_inotify_wd_unit, INT_TO_PTR(u->cgroup_memory_inotify_wd), u);
+        r = hashmap_put(u->manager->cgroup_memory_inotify_wd_unit, INT_TO_PTR(crt->cgroup_memory_inotify_wd), u);
         if (r < 0)
-                return log_unit_error_errno(u, r, "Failed to add memory inotify watch descriptor for control group %s to hash map: %m", empty_to_root(u->cgroup_path));
+                return log_unit_error_errno(u, r, "Failed to add memory inotify watch descriptor for control group %s to hash map: %m", empty_to_root(crt->cgroup_path));
 
         return 0;
 }
@@ -2757,12 +2822,15 @@ int unit_pick_cgroup_path(Unit *u) {
 
         assert(u);
 
-        if (u->cgroup_path)
-                return 0;
-
         if (!UNIT_HAS_CGROUP_CONTEXT(u))
                 return -EINVAL;
 
+        CGroupRuntime *crt = unit_setup_cgroup_runtime(u);
+        if (!crt)
+                return -ENOMEM;
+        if (crt->cgroup_path)
+                return 0;
+
         r = unit_default_cgroup_path(u, &path);
         if (r < 0)
                 return log_unit_error_errno(u, r, "Failed to generate default cgroup path: %m");
@@ -2797,25 +2865,27 @@ static int unit_update_cgroup(
         if (r < 0)
                 return r;
 
+        CGroupRuntime *crt = ASSERT_PTR(unit_get_cgroup_runtime(u));
+
         /* First, create our own group */
-        r = cg_create_everywhere(u->manager->cgroup_supported, target_mask, u->cgroup_path);
+        r = cg_create_everywhere(u->manager->cgroup_supported, target_mask, crt->cgroup_path);
         if (r < 0)
-                return log_unit_error_errno(u, r, "Failed to create cgroup %s: %m", empty_to_root(u->cgroup_path));
+                return log_unit_error_errno(u, r, "Failed to create cgroup %s: %m", empty_to_root(crt->cgroup_path));
         created = r;
 
         if (cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER) > 0) {
                 uint64_t cgroup_id = 0;
 
-                r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, NULL, &cgroup_full_path);
+                r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, crt->cgroup_path, NULL, &cgroup_full_path);
                 if (r == 0) {
                         r = cg_path_get_cgroupid(cgroup_full_path, &cgroup_id);
                         if (r < 0)
                                 log_unit_full_errno(u, ERRNO_IS_NOT_SUPPORTED(r) ? LOG_DEBUG : LOG_WARNING, r,
                                                     "Failed to get cgroup ID of cgroup %s, ignoring: %m", cgroup_full_path);
                 } else
-                        log_unit_warning_errno(u, r, "Failed to get full cgroup path on cgroup %s, ignoring: %m", empty_to_root(u->cgroup_path));
+                        log_unit_warning_errno(u, r, "Failed to get full cgroup path on cgroup %s, ignoring: %m", empty_to_root(crt->cgroup_path));
 
-                u->cgroup_id = cgroup_id;
+                crt->cgroup_id = cgroup_id;
         }
 
         /* Start watching it */
@@ -2824,23 +2894,23 @@ static int unit_update_cgroup(
 
         /* For v2 we preserve enabled controllers in delegated units, adjust others,
          * for v1 we figure out which controller hierarchies need migration. */
-        if (created || !u->cgroup_realized || !unit_cgroup_delegate(u)) {
+        if (created || !crt->cgroup_realized || !unit_cgroup_delegate(u)) {
                 CGroupMask result_mask = 0;
 
                 /* Enable all controllers we need */
-                r = cg_enable_everywhere(u->manager->cgroup_supported, enable_mask, u->cgroup_path, &result_mask);
+                r = cg_enable_everywhere(u->manager->cgroup_supported, enable_mask, crt->cgroup_path, &result_mask);
                 if (r < 0)
-                        log_unit_warning_errno(u, r, "Failed to enable/disable controllers on cgroup %s, ignoring: %m", empty_to_root(u->cgroup_path));
+                        log_unit_warning_errno(u, r, "Failed to enable/disable controllers on cgroup %s, ignoring: %m", empty_to_root(crt->cgroup_path));
 
                 /* Remember what's actually enabled now */
-                u->cgroup_enabled_mask = result_mask;
+                crt->cgroup_enabled_mask = result_mask;
 
-                migrate_mask = u->cgroup_realized_mask ^ target_mask;
+                migrate_mask = crt->cgroup_realized_mask ^ target_mask;
         }
 
         /* Keep track that this is now realized */
-        u->cgroup_realized = true;
-        u->cgroup_realized_mask = target_mask;
+        crt->cgroup_realized = true;
+        crt->cgroup_realized_mask = target_mask;
 
         /* Migrate processes in controller hierarchies both downwards (enabling) and upwards (disabling).
          *
@@ -2850,14 +2920,14 @@ static int unit_update_cgroup(
          * delegated units.
          */
         if (cg_all_unified() == 0) {
-                r = cg_migrate_v1_controllers(u->manager->cgroup_supported, migrate_mask, u->cgroup_path, migrate_callback, u);
+                r = cg_migrate_v1_controllers(u->manager->cgroup_supported, migrate_mask, crt->cgroup_path, migrate_callback, u);
                 if (r < 0)
-                        log_unit_warning_errno(u, r, "Failed to migrate controller cgroups from %s, ignoring: %m", empty_to_root(u->cgroup_path));
+                        log_unit_warning_errno(u, r, "Failed to migrate controller cgroups from %s, ignoring: %m", empty_to_root(crt->cgroup_path));
 
                 is_root_slice = unit_has_name(u, SPECIAL_ROOT_SLICE);
-                r = cg_trim_v1_controllers(u->manager->cgroup_supported, ~target_mask, u->cgroup_path, !is_root_slice);
+                r = cg_trim_v1_controllers(u->manager->cgroup_supported, ~target_mask, crt->cgroup_path, !is_root_slice);
                 if (r < 0)
-                        log_unit_warning_errno(u, r, "Failed to delete controller cgroups %s, ignoring: %m", empty_to_root(u->cgroup_path));
+                        log_unit_warning_errno(u, r, "Failed to delete controller cgroups %s, ignoring: %m", empty_to_root(crt->cgroup_path));
         }
 
         /* Set attributes */
@@ -2887,11 +2957,12 @@ static int unit_attach_pid_to_cgroup_via_bus(Unit *u, pid_t pid, const char *suf
         if (!u->manager->system_bus)
                 return -EIO;
 
-        if (!u->cgroup_path)
-                return -EINVAL;
+        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+        if (!crt || !crt->cgroup_path)
+                return -EOWNERDEAD;
 
         /* Determine this unit's cgroup path relative to our cgroup root */
-        pp = path_startswith(u->cgroup_path, u->manager->cgroup_root);
+        pp = path_startswith(crt->cgroup_path, u->manager->cgroup_root);
         if (!pp)
                 return -EINVAL;
 
@@ -2935,10 +3006,12 @@ int unit_attach_pids_to_cgroup(Unit *u, Set *pids, const char *suffix_path) {
         if (r < 0)
                 return r;
 
+        CGroupRuntime *crt = ASSERT_PTR(unit_get_cgroup_runtime(u));
+
         if (isempty(suffix_path))
-                p = u->cgroup_path;
+                p = crt->cgroup_path;
         else {
-                joined = path_join(u->cgroup_path, suffix_path);
+                joined = path_join(crt->cgroup_path, suffix_path);
                 if (!joined)
                         return -ENOMEM;
 
@@ -3010,7 +3083,7 @@ int unit_attach_pids_to_cgroup(Unit *u, Set *pids, const char *suffix_path) {
                                 continue;
 
                         /* If this controller is delegated and realized, honour the caller's request for the cgroup suffix. */
-                        if (delegated_mask & u->cgroup_realized_mask & bit) {
+                        if (delegated_mask & crt->cgroup_realized_mask & bit) {
                                 r = cg_attach(cgroup_controller_to_string(c), p, pid->pid);
                                 if (r >= 0)
                                         continue; /* Success! */
@@ -3043,6 +3116,10 @@ static bool unit_has_mask_realized(
 
         assert(u);
 
+        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+        if (!crt)
+                return false;
+
         /* Returns true if this unit is fully realized. We check four things:
          *
          * 1. Whether the cgroup was created at all
@@ -3058,10 +3135,10 @@ static bool unit_has_mask_realized(
          * enabled through cgroup.subtree_control, and since the BPF pseudo-controllers don't show up there, they
          * simply don't matter. */
 
-        return u->cgroup_realized &&
-                ((u->cgroup_realized_mask ^ target_mask) & CGROUP_MASK_V1) == 0 &&
-                ((u->cgroup_enabled_mask ^ enable_mask) & CGROUP_MASK_V2) == 0 &&
-                u->cgroup_invalidated_mask == 0;
+        return crt->cgroup_realized &&
+                ((crt->cgroup_realized_mask ^ target_mask) & CGROUP_MASK_V1) == 0 &&
+                ((crt->cgroup_enabled_mask ^ enable_mask) & CGROUP_MASK_V2) == 0 &&
+                crt->cgroup_invalidated_mask == 0;
 }
 
 static bool unit_has_mask_disables_realized(
@@ -3071,14 +3148,18 @@ static bool unit_has_mask_disables_realized(
 
         assert(u);
 
+        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+        if (!crt)
+                return true;
+
         /* Returns true if all controllers which should be disabled are indeed disabled.
          *
          * Unlike unit_has_mask_realized, we don't care what was enabled, only that anything we want to remove is
          * already removed. */
 
-        return !u->cgroup_realized ||
-                (FLAGS_SET(u->cgroup_realized_mask, target_mask & CGROUP_MASK_V1) &&
-                 FLAGS_SET(u->cgroup_enabled_mask, enable_mask & CGROUP_MASK_V2));
+        return !crt->cgroup_realized ||
+                (FLAGS_SET(crt->cgroup_realized_mask, target_mask & CGROUP_MASK_V1) &&
+                 FLAGS_SET(crt->cgroup_enabled_mask, enable_mask & CGROUP_MASK_V2));
 }
 
 static bool unit_has_mask_enables_realized(
@@ -3088,14 +3169,18 @@ static bool unit_has_mask_enables_realized(
 
         assert(u);
 
+        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+        if (!crt)
+                return false;
+
         /* Returns true if all controllers which should be enabled are indeed enabled.
          *
          * Unlike unit_has_mask_realized, we don't care about the controllers that are not present, only that anything
          * we want to add is already added. */
 
-        return u->cgroup_realized &&
-                ((u->cgroup_realized_mask | target_mask) & CGROUP_MASK_V1) == (u->cgroup_realized_mask & CGROUP_MASK_V1) &&
-                ((u->cgroup_enabled_mask | enable_mask) & CGROUP_MASK_V2) == (u->cgroup_enabled_mask & CGROUP_MASK_V2);
+        return crt->cgroup_realized &&
+                ((crt->cgroup_realized_mask | target_mask) & CGROUP_MASK_V1) == (crt->cgroup_realized_mask & CGROUP_MASK_V1) &&
+                ((crt->cgroup_enabled_mask | enable_mask) & CGROUP_MASK_V2) == (crt->cgroup_enabled_mask & CGROUP_MASK_V2);
 }
 
 void unit_add_to_cgroup_realize_queue(Unit *u) {
@@ -3144,8 +3229,10 @@ static int unit_realize_cgroup_now_enable(Unit *u, ManagerState state) {
         if (unit_has_mask_enables_realized(u, target_mask, enable_mask))
                 return 0;
 
-        new_target_mask = u->cgroup_realized_mask | target_mask;
-        new_enable_mask = u->cgroup_enabled_mask | enable_mask;
+        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+
+        new_target_mask = (crt ? crt->cgroup_realized_mask : 0) | target_mask;
+        new_enable_mask = (crt ? crt->cgroup_enabled_mask : 0) | enable_mask;
 
         return unit_update_cgroup(u, new_target_mask, new_enable_mask, state);
 }
@@ -3164,9 +3251,13 @@ static int unit_realize_cgroup_now_disable(Unit *u, ManagerState state) {
                 CGroupMask target_mask, enable_mask, new_target_mask, new_enable_mask;
                 int r;
 
+                CGroupRuntime *rt = unit_get_cgroup_runtime(m);
+                if (!rt)
+                        continue;
+
                 /* The cgroup for this unit might not actually be fully realised yet, in which case it isn't
                  * holding any controllers open anyway. */
-                if (!m->cgroup_realized)
+                if (!rt->cgroup_realized)
                         continue;
 
                 /* We must disable those below us first in order to release the controller. */
@@ -3180,8 +3271,8 @@ static int unit_realize_cgroup_now_disable(Unit *u, ManagerState state) {
                 if (unit_has_mask_disables_realized(m, target_mask, enable_mask))
                         continue;
 
-                new_target_mask = m->cgroup_realized_mask & target_mask;
-                new_enable_mask = m->cgroup_enabled_mask & enable_mask;
+                new_target_mask = rt->cgroup_realized_mask & target_mask;
+                new_enable_mask = rt->cgroup_enabled_mask & enable_mask;
 
                 r = unit_update_cgroup(m, new_target_mask, new_enable_mask, state);
                 if (r < 0)
@@ -3268,8 +3359,10 @@ static int unit_realize_cgroup_now(Unit *u, ManagerState state) {
         if (r < 0)
                 return r;
 
+        CGroupRuntime *crt = ASSERT_PTR(unit_get_cgroup_runtime(u));
+
         /* Now, reset the invalidation mask */
-        u->cgroup_invalidated_mask = 0;
+        crt->cgroup_invalidated_mask = 0;
         return 0;
 }
 
@@ -3320,11 +3413,13 @@ void unit_add_family_to_cgroup_realize_queue(Unit *u) {
          * masks. */
 
         do {
-                Unit *m;
+                CGroupRuntime *crt = unit_get_cgroup_runtime(u);
 
                 /* Children of u likely changed when we're called */
-                u->cgroup_members_mask_valid = false;
+                if (crt)
+                        crt->cgroup_members_mask_valid = false;
 
+                Unit *m;
                 UNIT_FOREACH_DEPENDENCY(m, u, UNIT_ATOM_SLICE_OF) {
 
                         /* No point in doing cgroup application for units without active processes. */
@@ -3333,7 +3428,8 @@ void unit_add_family_to_cgroup_realize_queue(Unit *u) {
 
                         /* We only enqueue siblings if they were realized once at least, in the main
                          * hierarchy. */
-                        if (!m->cgroup_realized)
+                        crt = unit_get_cgroup_runtime(m);
+                        if (!crt || !crt->cgroup_realized)
                                 continue;
 
                         /* If the unit doesn't need any new controllers and has current ones
@@ -3384,26 +3480,50 @@ void unit_release_cgroup(Unit *u) {
         /* Forgets all cgroup details for this cgroup — but does *not* destroy the cgroup. This is hence OK to call
          * when we close down everything for reexecution, where we really want to leave the cgroup in place. */
 
-        if (u->cgroup_path) {
-                (void) hashmap_remove(u->manager->cgroup_unit, u->cgroup_path);
-                u->cgroup_path = mfree(u->cgroup_path);
+        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+        if (!crt)
+                return;
+
+        if (crt->cgroup_path) {
+                (void) hashmap_remove(u->manager->cgroup_unit, crt->cgroup_path);
+                crt->cgroup_path = mfree(crt->cgroup_path);
         }
 
-        if (u->cgroup_control_inotify_wd >= 0) {
-                if (inotify_rm_watch(u->manager->cgroup_inotify_fd, u->cgroup_control_inotify_wd) < 0)
-                        log_unit_debug_errno(u, errno, "Failed to remove cgroup control inotify watch %i for %s, ignoring: %m", u->cgroup_control_inotify_wd, u->id);
+        if (crt->cgroup_control_inotify_wd >= 0) {
+                if (inotify_rm_watch(u->manager->cgroup_inotify_fd, crt->cgroup_control_inotify_wd) < 0)
+                        log_unit_debug_errno(u, errno, "Failed to remove cgroup control inotify watch %i for %s, ignoring: %m", crt->cgroup_control_inotify_wd, u->id);
 
-                (void) hashmap_remove(u->manager->cgroup_control_inotify_wd_unit, INT_TO_PTR(u->cgroup_control_inotify_wd));
-                u->cgroup_control_inotify_wd = -1;
+                (void) hashmap_remove(u->manager->cgroup_control_inotify_wd_unit, INT_TO_PTR(crt->cgroup_control_inotify_wd));
+                crt->cgroup_control_inotify_wd = -1;
         }
 
-        if (u->cgroup_memory_inotify_wd >= 0) {
-                if (inotify_rm_watch(u->manager->cgroup_inotify_fd, u->cgroup_memory_inotify_wd) < 0)
-                        log_unit_debug_errno(u, errno, "Failed to remove cgroup memory inotify watch %i for %s, ignoring: %m", u->cgroup_memory_inotify_wd, u->id);
+        if (crt->cgroup_memory_inotify_wd >= 0) {
+                if (inotify_rm_watch(u->manager->cgroup_inotify_fd, crt->cgroup_memory_inotify_wd) < 0)
+                        log_unit_debug_errno(u, errno, "Failed to remove cgroup memory inotify watch %i for %s, ignoring: %m", crt->cgroup_memory_inotify_wd, u->id);
 
-                (void) hashmap_remove(u->manager->cgroup_memory_inotify_wd_unit, INT_TO_PTR(u->cgroup_memory_inotify_wd));
-                u->cgroup_memory_inotify_wd = -1;
+                (void) hashmap_remove(u->manager->cgroup_memory_inotify_wd_unit, INT_TO_PTR(crt->cgroup_memory_inotify_wd));
+                crt->cgroup_memory_inotify_wd = -1;
         }
+
+        *(CGroupRuntime**) ((uint8_t*) u + UNIT_VTABLE(u)->cgroup_runtime_offset) = cgroup_runtime_free(crt);
+}
+
+int unit_cgroup_is_empty(Unit *u) {
+        int r;
+
+        assert(u);
+
+        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+        if (!crt)
+                return -ENXIO;
+        if (!crt->cgroup_path)
+                return -EOWNERDEAD;
+
+        r = cg_is_empty_recursive(SYSTEMD_CGROUP_CONTROLLER, crt->cgroup_path);
+        if (r < 0)
+                return log_unit_debug_errno(u, r, "Failed to determine whether cgroup %s is empty, ignoring: %m", empty_to_root(crt->cgroup_path));
+
+        return r;
 }
 
 bool unit_maybe_release_cgroup(Unit *u) {
@@ -3411,17 +3531,16 @@ bool unit_maybe_release_cgroup(Unit *u) {
 
         assert(u);
 
-        if (!u->cgroup_path)
+        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+        if (!crt || !crt->cgroup_path)
                 return true;
 
-        /* Don't release the cgroup if there are still processes under it. If we get notified later when all the
-         * processes exit (e.g. the processes were in D-state and exited after the unit was marked as failed)
-         * we need the cgroup paths to continue to be tracked by the manager so they can be looked up and cleaned
-         * up later. */
-        r = cg_is_empty_recursive(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path);
-        if (r < 0)
-                log_unit_debug_errno(u, r, "Error checking if the cgroup is recursively empty, ignoring: %m");
-        else if (r == 1) {
+        /* Don't release the cgroup if there are still processes under it. If we get notified later when all
+         * the processes exit (e.g. the processes were in D-state and exited after the unit was marked as
+         * failed) we need the cgroup paths to continue to be tracked by the manager so they can be looked up
+         * and cleaned up later. */
+        r = unit_cgroup_is_empty(u);
+        if (r == 1) {
                 unit_release_cgroup(u);
                 return true;
         }
@@ -3436,8 +3555,8 @@ void unit_prune_cgroup(Unit *u) {
         assert(u);
 
         /* Removes the cgroup, if empty and possible, and stops watching it. */
-
-        if (!u->cgroup_path)
+        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+        if (!crt || !crt->cgroup_path)
                 return;
 
         /* Cache the last CPU and memory usage values before we destroy the cgroup */
@@ -3454,14 +3573,14 @@ void unit_prune_cgroup(Unit *u) {
 
         is_root_slice = unit_has_name(u, SPECIAL_ROOT_SLICE);
 
-        r = cg_trim_everywhere(u->manager->cgroup_supported, u->cgroup_path, !is_root_slice);
+        r = cg_trim_everywhere(u->manager->cgroup_supported, crt->cgroup_path, !is_root_slice);
         if (r < 0)
                 /* One reason we could have failed here is, that the cgroup still contains a process.
                  * However, if the cgroup becomes removable at a later time, it might be removed when
                  * the containing slice is stopped. So even if we failed now, this unit shouldn't assume
                  * that the cgroup is still realized the next time it is started. Do not return early
                  * on error, continue cleanup. */
-                log_unit_full_errno(u, r == -EBUSY ? LOG_DEBUG : LOG_WARNING, r, "Failed to destroy cgroup %s, ignoring: %m", empty_to_root(u->cgroup_path));
+                log_unit_full_errno(u, r == -EBUSY ? LOG_DEBUG : LOG_WARNING, r, "Failed to destroy cgroup %s, ignoring: %m", empty_to_root(crt->cgroup_path));
 
         if (is_root_slice)
                 return;
@@ -3469,11 +3588,15 @@ void unit_prune_cgroup(Unit *u) {
         if (!unit_maybe_release_cgroup(u)) /* Returns true if the cgroup was released */
                 return;
 
-        u->cgroup_realized = false;
-        u->cgroup_realized_mask = 0;
-        u->cgroup_enabled_mask = 0;
+        crt = unit_get_cgroup_runtime(u); /* The above might have destroyed the runtime object, let's see if it's still there */
+        if (!crt)
+                return;
+
+        crt->cgroup_realized = false;
+        crt->cgroup_realized_mask = 0;
+        crt->cgroup_enabled_mask = 0;
 
-        u->bpf_device_control_installed = bpf_program_free(u->bpf_device_control_installed);
+        crt->bpf_device_control_installed = bpf_program_free(crt->bpf_device_control_installed);
 }
 
 int unit_search_main_pid(Unit *u, PidRef *ret) {
@@ -3484,10 +3607,11 @@ int unit_search_main_pid(Unit *u, PidRef *ret) {
         assert(u);
         assert(ret);
 
-        if (!u->cgroup_path)
+        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+        if (!crt || !crt->cgroup_path)
                 return -ENXIO;
 
-        r = cg_enumerate_processes(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, &f);
+        r = cg_enumerate_processes(SYSTEMD_CGROUP_CONTROLLER, crt->cgroup_path, &f);
         if (r < 0)
                 return r;
 
@@ -3583,7 +3707,8 @@ int unit_synthesize_cgroup_empty_event(Unit *u) {
          * support for non-unified systems where notifications aren't reliable, and hence need to take whatever we can
          * get as notification source as soon as we stopped having any useful PIDs to watch for. */
 
-        if (!u->cgroup_path)
+        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+        if (!crt || !crt->cgroup_path)
                 return -ENOENT;
 
         r = cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER);
@@ -3609,7 +3734,8 @@ int unit_watch_all_pids(Unit *u) {
          * get reliable cgroup empty notifications: we try to use
          * SIGCHLD as replacement. */
 
-        if (!u->cgroup_path)
+        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+        if (!crt || !crt->cgroup_path)
                 return -ENOENT;
 
         r = cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER);
@@ -3618,7 +3744,7 @@ int unit_watch_all_pids(Unit *u) {
         if (r > 0) /* On unified we can use proper notifications */
                 return 0;
 
-        return unit_watch_pids_in_path(u, u->cgroup_path);
+        return unit_watch_pids_in_path(u, crt->cgroup_path);
 }
 
 static int on_cgroup_empty_event(sd_event_source *s, void *userdata) {
@@ -3683,15 +3809,8 @@ void unit_add_to_cgroup_empty_queue(Unit *u) {
                 return;
 
         /* Let's verify that the cgroup is really empty */
-        if (!u->cgroup_path)
-                return;
-
-        r = cg_is_empty_recursive(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path);
-        if (r < 0) {
-                log_unit_debug_errno(u, r, "Failed to determine whether cgroup %s is empty: %m", empty_to_root(u->cgroup_path));
-                return;
-        }
-        if (r == 0)
+        r = unit_cgroup_is_empty(u);
+        if (r <= 0)
                 return;
 
         LIST_PREPEND(cgroup_empty_queue, u->manager->cgroup_empty_queue, u);
@@ -3719,7 +3838,10 @@ int unit_check_oomd_kill(Unit *u) {
         uint64_t n = 0;
         int r;
 
-        if (!u->cgroup_path)
+        assert(u);
+
+        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+        if (!crt || !crt->cgroup_path)
                 return 0;
 
         r = cg_all_unified();
@@ -3728,7 +3850,7 @@ int unit_check_oomd_kill(Unit *u) {
         else if (r == 0)
                 return 0;
 
-        r = cg_get_xattr_malloc(u->cgroup_path, "user.oomd_ooms", &value);
+        r = cg_get_xattr_malloc(crt->cgroup_path, "user.oomd_ooms", &value);
         if (r < 0 && !ERRNO_IS_XATTR_ABSENT(r))
                 return r;
 
@@ -3738,15 +3860,15 @@ int unit_check_oomd_kill(Unit *u) {
                          return r;
         }
 
-        increased = n > u->managed_oom_kill_last;
-        u->managed_oom_kill_last = n;
+        increased = n > crt->managed_oom_kill_last;
+        crt->managed_oom_kill_last = n;
 
         if (!increased)
                 return 0;
 
         n = 0;
         value = mfree(value);
-        r = cg_get_xattr_malloc(u->cgroup_path, "user.oomd_kill", &value);
+        r = cg_get_xattr_malloc(crt->cgroup_path, "user.oomd_kill", &value);
         if (r >= 0 && !isempty(value))
                 (void) safe_atou64(value, &n);
 
@@ -3773,10 +3895,16 @@ int unit_check_oom(Unit *u) {
         uint64_t c;
         int r;
 
-        if (!u->cgroup_path)
+        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+        if (!crt || !crt->cgroup_path)
                 return 0;
 
-        r = cg_get_keyed_attribute("memory", u->cgroup_path, "memory.events", STRV_MAKE("oom_kill"), &oom_kill);
+        r = cg_get_keyed_attribute(
+                        "memory",
+                        crt->cgroup_path,
+                        "memory.events",
+                        STRV_MAKE("oom_kill"),
+                        &oom_kill);
         if (IN_SET(r, -ENOENT, -ENXIO)) /* Handle gracefully if cgroup or oom_kill attribute don't exist */
                 c = 0;
         else if (r < 0)
@@ -3787,8 +3915,8 @@ int unit_check_oom(Unit *u) {
                         return log_unit_debug_errno(u, r, "Failed to parse oom_kill field: %m");
         }
 
-        increased = c > u->oom_kill_last;
-        u->oom_kill_last = c;
+        increased = c > crt->oom_kill_last;
+        crt->oom_kill_last = c;
 
         if (!increased)
                 return 0;
@@ -3838,7 +3966,9 @@ static void unit_add_to_cgroup_oom_queue(Unit *u) {
 
         if (u->in_cgroup_oom_queue)
                 return;
-        if (!u->cgroup_path)
+
+        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+        if (!crt || !crt->cgroup_path)
                 return;
 
         LIST_PREPEND(cgroup_oom_queue, u->manager->cgroup_oom_queue, u);
@@ -3875,11 +4005,16 @@ static int unit_check_cgroup_events(Unit *u) {
 
         assert(u);
 
-        if (!u->cgroup_path)
+        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+        if (!crt || !crt->cgroup_path)
                 return 0;
 
-        r = cg_get_keyed_attribute_graceful(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, "cgroup.events",
-                                            STRV_MAKE("populated", "frozen"), values);
+        r = cg_get_keyed_attribute_graceful(
+                        SYSTEMD_CGROUP_CONTROLLER,
+                        crt->cgroup_path,
+                        "cgroup.events",
+                        STRV_MAKE("populated", "frozen"),
+                        values);
         if (r < 0)
                 return r;
 
@@ -4309,7 +4444,8 @@ int unit_get_memory_available(Unit *u, uint64_t *ret) {
                 if (!unit_context)
                         return -ENODATA;
 
-                if (!u->cgroup_path)
+                CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+                if (!crt || !crt->cgroup_path)
                         continue;
 
                 (void) unit_get_memory_current(u, &current);
@@ -4341,21 +4477,22 @@ int unit_get_memory_current(Unit *u, uint64_t *ret) {
         if (!UNIT_CGROUP_BOOL(u, memory_accounting))
                 return -ENODATA;
 
-        if (!u->cgroup_path)
+        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+        if (!crt || !crt->cgroup_path)
                 return -ENODATA;
 
         /* The root cgroup doesn't expose this information, let's get it from /proc instead */
         if (unit_has_host_root_cgroup(u))
                 return procfs_memory_get_used(ret);
 
-        if ((u->cgroup_realized_mask & CGROUP_MASK_MEMORY) == 0)
+        if ((crt->cgroup_realized_mask & CGROUP_MASK_MEMORY) == 0)
                 return -ENODATA;
 
         r = cg_all_unified();
         if (r < 0)
                 return r;
 
-        return cg_get_attribute_as_uint64("memory", u->cgroup_path, r > 0 ? "memory.current" : "memory.usage_in_bytes", ret);
+        return cg_get_attribute_as_uint64("memory", crt->cgroup_path, r > 0 ? "memory.current" : "memory.usage_in_bytes", ret);
 }
 
 int unit_get_memory_accounting(Unit *u, CGroupMemoryAccountingMetric metric, uint64_t *ret) {
@@ -4378,7 +4515,10 @@ int unit_get_memory_accounting(Unit *u, CGroupMemoryAccountingMetric metric, uin
         if (!UNIT_CGROUP_BOOL(u, memory_accounting))
                 return -ENODATA;
 
-        if (!u->cgroup_path)
+        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+        if (!crt)
+                return -ENODATA;
+        if (!crt->cgroup_path)
                 /* If the cgroup is already gone, we try to find the last cached value. */
                 goto finish;
 
@@ -4386,7 +4526,7 @@ int unit_get_memory_accounting(Unit *u, CGroupMemoryAccountingMetric metric, uin
         if (unit_has_host_root_cgroup(u))
                 return -ENODATA;
 
-        if (!FLAGS_SET(u->cgroup_realized_mask, CGROUP_MASK_MEMORY))
+        if (!FLAGS_SET(crt->cgroup_realized_mask, CGROUP_MASK_MEMORY))
                 return -ENODATA;
 
         r = cg_all_unified();
@@ -4395,14 +4535,14 @@ int unit_get_memory_accounting(Unit *u, CGroupMemoryAccountingMetric metric, uin
         if (r == 0)
                 return -ENODATA;
 
-        r = cg_get_attribute_as_uint64("memory", u->cgroup_path, attributes_table[metric], &bytes);
+        r = cg_get_attribute_as_uint64("memory", crt->cgroup_path, attributes_table[metric], &bytes);
         if (r < 0 && r != -ENODATA)
                 return r;
         updated = r >= 0;
 
 finish:
         if (metric <= _CGROUP_MEMORY_ACCOUNTING_METRIC_CACHED_LAST) {
-                uint64_t *last = &u->memory_accounting_last[metric];
+                uint64_t *last = &crt->memory_accounting_last[metric];
 
                 if (updated)
                         *last = bytes;
@@ -4427,17 +4567,18 @@ int unit_get_tasks_current(Unit *u, uint64_t *ret) {
         if (!UNIT_CGROUP_BOOL(u, tasks_accounting))
                 return -ENODATA;
 
-        if (!u->cgroup_path)
+        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+        if (!crt || !crt->cgroup_path)
                 return -ENODATA;
 
         /* The root cgroup doesn't expose this information, let's get it from /proc instead */
         if (unit_has_host_root_cgroup(u))
                 return procfs_tasks_get_current(ret);
 
-        if ((u->cgroup_realized_mask & CGROUP_MASK_PIDS) == 0)
+        if ((crt->cgroup_realized_mask & CGROUP_MASK_PIDS) == 0)
                 return -ENODATA;
 
-        return cg_get_attribute_as_uint64("pids", u->cgroup_path, "pids.current", ret);
+        return cg_get_attribute_as_uint64("pids", crt->cgroup_path, "pids.current", ret);
 }
 
 static int unit_get_cpu_usage_raw(Unit *u, nsec_t *ret) {
@@ -4447,7 +4588,8 @@ static int unit_get_cpu_usage_raw(Unit *u, nsec_t *ret) {
         assert(u);
         assert(ret);
 
-        if (!u->cgroup_path)
+        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+        if (!crt || !crt->cgroup_path)
                 return -ENODATA;
 
         /* The root cgroup doesn't expose this information, let's get it from /proc instead */
@@ -4455,7 +4597,7 @@ static int unit_get_cpu_usage_raw(Unit *u, nsec_t *ret) {
                 return procfs_cpu_get_usage(ret);
 
         /* Requisite controllers for CPU accounting are not enabled */
-        if ((get_cpu_accounting_mask() & ~u->cgroup_realized_mask) != 0)
+        if ((get_cpu_accounting_mask() & ~crt->cgroup_realized_mask) != 0)
                 return -ENODATA;
 
         r = cg_all_unified();
@@ -4465,7 +4607,7 @@ static int unit_get_cpu_usage_raw(Unit *u, nsec_t *ret) {
                 _cleanup_free_ char *val = NULL;
                 uint64_t us;
 
-                r = cg_get_keyed_attribute("cpu", u->cgroup_path, "cpu.stat", STRV_MAKE("usage_usec"), &val);
+                r = cg_get_keyed_attribute("cpu", crt->cgroup_path, "cpu.stat", STRV_MAKE("usage_usec"), &val);
                 if (IN_SET(r, -ENOENT, -ENXIO))
                         return -ENODATA;
                 if (r < 0)
@@ -4477,7 +4619,7 @@ static int unit_get_cpu_usage_raw(Unit *u, nsec_t *ret) {
 
                 ns = us * NSEC_PER_USEC;
         } else
-                return cg_get_attribute_as_uint64("cpuacct", u->cgroup_path, "cpuacct.usage", ret);
+                return cg_get_attribute_as_uint64("cpuacct", crt->cgroup_path, "cpuacct.usage", ret);
 
         *ret = ns;
         return 0;
@@ -4493,27 +4635,31 @@ int unit_get_cpu_usage(Unit *u, nsec_t *ret) {
          * started. If the cgroup has been removed already, returns the last cached value. To cache the value, simply
          * call this function with a NULL return value. */
 
+        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+        if (!crt || !crt->cgroup_path)
+                return -ENODATA;
+
         if (!UNIT_CGROUP_BOOL(u, cpu_accounting))
                 return -ENODATA;
 
         r = unit_get_cpu_usage_raw(u, &ns);
-        if (r == -ENODATA && u->cpu_usage_last != NSEC_INFINITY) {
+        if (r == -ENODATA && crt->cpu_usage_last != NSEC_INFINITY) {
                 /* If we can't get the CPU usage anymore (because the cgroup was already removed, for example), use our
                  * cached value. */
 
                 if (ret)
-                        *ret = u->cpu_usage_last;
+                        *ret = crt->cpu_usage_last;
                 return 0;
         }
         if (r < 0)
                 return r;
 
-        if (ns > u->cpu_usage_base)
-                ns -= u->cpu_usage_base;
+        if (ns > crt->cpu_usage_base)
+                ns -= crt->cpu_usage_base;
         else
                 ns = 0;
 
-        u->cpu_usage_last = ns;
+        crt->cpu_usage_last = ns;
         if (ret)
                 *ret = ns;
 
@@ -4536,9 +4682,13 @@ int unit_get_ip_accounting(
         if (!UNIT_CGROUP_BOOL(u, ip_accounting))
                 return -ENODATA;
 
+        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+        if (!crt || !crt->cgroup_path)
+                return -ENODATA;
+
         fd = IN_SET(metric, CGROUP_IP_INGRESS_BYTES, CGROUP_IP_INGRESS_PACKETS) ?
-                u->ip_accounting_ingress_map_fd :
-                u->ip_accounting_egress_map_fd;
+                crt->ip_accounting_ingress_map_fd :
+                crt->ip_accounting_egress_map_fd;
         if (fd < 0)
                 return -ENODATA;
 
@@ -4553,7 +4703,7 @@ int unit_get_ip_accounting(
          * all BPF programs and maps anew, but serialize the old counters. When deserializing we store them in the
          * ip_accounting_extra[] field, and add them in here transparently. */
 
-        *ret = value + u->ip_accounting_extra[metric];
+        *ret = value + crt->ip_accounting_extra[metric];
 
         return r;
 }
@@ -4623,7 +4773,8 @@ static int unit_get_io_accounting_raw(Unit *u, uint64_t ret[static _CGROUP_IO_AC
 
         assert(u);
 
-        if (!u->cgroup_path)
+        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+        if (!crt || !crt->cgroup_path)
                 return -ENODATA;
 
         if (unit_has_host_root_cgroup(u))
@@ -4635,10 +4786,10 @@ static int unit_get_io_accounting_raw(Unit *u, uint64_t ret[static _CGROUP_IO_AC
         if (r == 0) /* TODO: support cgroupv1 */
                 return -ENODATA;
 
-        if (!FLAGS_SET(u->cgroup_realized_mask, CGROUP_MASK_IO))
+        if (!FLAGS_SET(crt->cgroup_realized_mask, CGROUP_MASK_IO))
                 return -ENODATA;
 
-        r = cg_get_path("io", u->cgroup_path, "io.stat", &path);
+        r = cg_get_path("io", crt->cgroup_path, "io.stat", &path);
         if (r < 0)
                 return r;
 
@@ -4706,26 +4857,30 @@ int unit_get_io_accounting(
         if (!UNIT_CGROUP_BOOL(u, io_accounting))
                 return -ENODATA;
 
-        if (allow_cache && u->io_accounting_last[metric] != UINT64_MAX)
+        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+        if (!crt || !crt->cgroup_path)
+                return -ENODATA;
+
+        if (allow_cache && crt->io_accounting_last[metric] != UINT64_MAX)
                 goto done;
 
         r = unit_get_io_accounting_raw(u, raw);
-        if (r == -ENODATA && u->io_accounting_last[metric] != UINT64_MAX)
+        if (r == -ENODATA && crt->io_accounting_last[metric] != UINT64_MAX)
                 goto done;
         if (r < 0)
                 return r;
 
         for (CGroupIOAccountingMetric i = 0; i < _CGROUP_IO_ACCOUNTING_METRIC_MAX; i++) {
                 /* Saturated subtraction */
-                if (raw[i] > u->io_accounting_base[i])
-                        u->io_accounting_last[i] = raw[i] - u->io_accounting_base[i];
+                if (raw[i] > crt->io_accounting_base[i])
+                        crt->io_accounting_last[i] = raw[i] - crt->io_accounting_base[i];
                 else
-                        u->io_accounting_last[i] = 0;
+                        crt->io_accounting_last[i] = 0;
         }
 
 done:
         if (ret)
-                *ret = u->io_accounting_last[metric];
+                *ret = crt->io_accounting_last[metric];
 
         return 0;
 }
@@ -4735,11 +4890,15 @@ int unit_reset_cpu_accounting(Unit *u) {
 
         assert(u);
 
-        u->cpu_usage_last = NSEC_INFINITY;
+        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+        if (!crt || !crt->cgroup_path)
+                return 0;
+
+        crt->cpu_usage_last = NSEC_INFINITY;
 
-        r = unit_get_cpu_usage_raw(u, &u->cpu_usage_base);
+        r = unit_get_cpu_usage_raw(u, &crt->cpu_usage_base);
         if (r < 0) {
-                u->cpu_usage_base = 0;
+                crt->cpu_usage_base = 0;
                 return r;
         }
 
@@ -4749,7 +4908,11 @@ int unit_reset_cpu_accounting(Unit *u) {
 void unit_reset_memory_accounting_last(Unit *u) {
         assert(u);
 
-        FOREACH_ARRAY(i, u->memory_accounting_last, ELEMENTSOF(u->memory_accounting_last))
+        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+        if (!crt || !crt->cgroup_path)
+                return;
+
+        FOREACH_ARRAY(i, crt->memory_accounting_last, ELEMENTSOF(crt->memory_accounting_last))
                 *i = UINT64_MAX;
 }
 
@@ -4758,13 +4921,17 @@ int unit_reset_ip_accounting(Unit *u) {
 
         assert(u);
 
-        if (u->ip_accounting_ingress_map_fd >= 0)
-                RET_GATHER(r, bpf_firewall_reset_accounting(u->ip_accounting_ingress_map_fd));
+        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+        if (!crt || !crt->cgroup_path)
+                return 0;
+
+        if (crt->ip_accounting_ingress_map_fd >= 0)
+                RET_GATHER(r, bpf_firewall_reset_accounting(crt->ip_accounting_ingress_map_fd));
 
-        if (u->ip_accounting_egress_map_fd >= 0)
-                RET_GATHER(r, bpf_firewall_reset_accounting(u->ip_accounting_egress_map_fd));
+        if (crt->ip_accounting_egress_map_fd >= 0)
+                RET_GATHER(r, bpf_firewall_reset_accounting(crt->ip_accounting_egress_map_fd));
 
-        zero(u->ip_accounting_extra);
+        zero(crt->ip_accounting_extra);
 
         return r;
 }
@@ -4772,7 +4939,11 @@ int unit_reset_ip_accounting(Unit *u) {
 void unit_reset_io_accounting_last(Unit *u) {
         assert(u);
 
-        FOREACH_ARRAY(i, u->io_accounting_last, _CGROUP_IO_ACCOUNTING_METRIC_MAX)
+        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+        if (!crt || !crt->cgroup_path)
+                return;
+
+        FOREACH_ARRAY(i, crt->io_accounting_last, _CGROUP_IO_ACCOUNTING_METRIC_MAX)
                 *i = UINT64_MAX;
 }
 
@@ -4781,11 +4952,15 @@ int unit_reset_io_accounting(Unit *u) {
 
         assert(u);
 
+        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+        if (!crt || !crt->cgroup_path)
+                return 0;
+
         unit_reset_io_accounting_last(u);
 
-        r = unit_get_io_accounting_raw(u, u->io_accounting_base);
+        r = unit_get_io_accounting_raw(u, crt->io_accounting_base);
         if (r < 0) {
-                zero(u->io_accounting_base);
+                zero(crt->io_accounting_base);
                 return r;
         }
 
@@ -4811,6 +4986,10 @@ void unit_invalidate_cgroup(Unit *u, CGroupMask m) {
         if (!UNIT_HAS_CGROUP_CONTEXT(u))
                 return;
 
+        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+        if (!crt)
+                return;
+
         if (m == 0)
                 return;
 
@@ -4821,10 +5000,10 @@ void unit_invalidate_cgroup(Unit *u, CGroupMask m) {
         if (m & (CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT))
                 m |= CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT;
 
-        if (FLAGS_SET(u->cgroup_invalidated_mask, m)) /* NOP? */
+        if (FLAGS_SET(crt->cgroup_invalidated_mask, m)) /* NOP? */
                 return;
 
-        u->cgroup_invalidated_mask |= m;
+        crt->cgroup_invalidated_mask |= m;
         unit_add_to_cgroup_realize_queue(u);
 }
 
@@ -4834,10 +5013,14 @@ void unit_invalidate_cgroup_bpf(Unit *u) {
         if (!UNIT_HAS_CGROUP_CONTEXT(u))
                 return;
 
-        if (u->cgroup_invalidated_mask & CGROUP_MASK_BPF_FIREWALL) /* NOP? */
+        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+        if (!crt)
                 return;
 
-        u->cgroup_invalidated_mask |= CGROUP_MASK_BPF_FIREWALL;
+        if (crt->cgroup_invalidated_mask & CGROUP_MASK_BPF_FIREWALL) /* NOP? */
+                return;
+
+        crt->cgroup_invalidated_mask |= CGROUP_MASK_BPF_FIREWALL;
         unit_add_to_cgroup_realize_queue(u);
 
         /* If we are a slice unit, we also need to put compile a new BPF program for all our children, as the IP access
@@ -4897,8 +5080,16 @@ static int unit_cgroup_freezer_kernel_state(Unit *u, FreezerState *ret) {
         assert(u);
         assert(ret);
 
-        r = cg_get_keyed_attribute(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, "cgroup.events",
-                                   STRV_MAKE("frozen"), &val);
+        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+        if (!crt || !crt->cgroup_path)
+                return -EOWNERDEAD;
+
+        r = cg_get_keyed_attribute(
+                        SYSTEMD_CGROUP_CONTROLLER,
+                        crt->cgroup_path,
+                        "cgroup.events",
+                        STRV_MAKE("frozen"),
+                        &val);
         if (IN_SET(r, -ENOENT, -ENXIO))
                 return -ENODATA;
         if (r < 0)
@@ -4926,9 +5117,13 @@ int unit_cgroup_freezer_action(Unit *u, FreezerAction action) {
         assert(IN_SET(action, FREEZER_FREEZE, FREEZER_PARENT_FREEZE,
                               FREEZER_THAW, FREEZER_PARENT_THAW));
 
-        if (!cg_freezer_supported() || !u->cgroup_realized)
+        if (!cg_freezer_supported())
                 return 0;
 
+        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+        if (!crt || !crt->cgroup_realized)
+                return -EBUSY;
+
         unit_next_freezer_state(u, action, &next, &target);
 
         r = unit_cgroup_freezer_kernel_state(u, &current);
@@ -4956,7 +5151,7 @@ int unit_cgroup_freezer_action(Unit *u, FreezerAction action) {
                         next = FREEZER_THAWING;
         }
 
-        r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, "cgroup.freeze", &path);
+        r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, crt->cgroup_path, "cgroup.freeze", &path);
         if (r < 0)
                 return r;
 
@@ -4979,10 +5174,11 @@ int unit_get_cpuset(Unit *u, CPUSet *cpus, const char *name) {
         assert(u);
         assert(cpus);
 
-        if (!u->cgroup_path)
+        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+        if (!crt || !crt->cgroup_path)
                 return -ENODATA;
 
-        if ((u->cgroup_realized_mask & CGROUP_MASK_CPUSET) == 0)
+        if ((crt->cgroup_realized_mask & CGROUP_MASK_CPUSET) == 0)
                 return -ENODATA;
 
         r = cg_all_unified();
@@ -4991,7 +5187,7 @@ int unit_get_cpuset(Unit *u, CPUSet *cpus, const char *name) {
         if (r == 0)
                 return -ENODATA;
 
-        r = cg_get_attribute("cpuset", u->cgroup_path, name, &v);
+        r = cg_get_attribute("cpuset", crt->cgroup_path, name, &v);
         if (r == -ENOENT)
                 return -ENODATA;
         if (r < 0)
@@ -5000,6 +5196,425 @@ int unit_get_cpuset(Unit *u, CPUSet *cpus, const char *name) {
         return parse_cpu_set_full(v, cpus, false, NULL, NULL, 0, NULL);
 }
 
+CGroupRuntime *cgroup_runtime_new(void) {
+        _cleanup_(cgroup_runtime_freep) CGroupRuntime *crt = NULL;
+
+        crt = new(CGroupRuntime, 1);
+        if (!crt)
+                return NULL;
+
+        *crt = (CGroupRuntime) {
+                .cpu_usage_last = NSEC_INFINITY,
+
+                .cgroup_control_inotify_wd = -1,
+                .cgroup_memory_inotify_wd = -1,
+
+                .ip_accounting_ingress_map_fd = -EBADF,
+                .ip_accounting_egress_map_fd = -EBADF,
+
+                .ipv4_allow_map_fd = -EBADF,
+                .ipv6_allow_map_fd = -EBADF,
+                .ipv4_deny_map_fd = -EBADF,
+                .ipv6_deny_map_fd = -EBADF,
+
+                .cgroup_invalidated_mask = _CGROUP_MASK_ALL,
+        };
+
+        FOREACH_ARRAY(i, crt->memory_accounting_last, ELEMENTSOF(crt->memory_accounting_last))
+                *i = UINT64_MAX;
+        FOREACH_ARRAY(i, crt->io_accounting_base, ELEMENTSOF(crt->io_accounting_base))
+                *i = UINT64_MAX;
+        FOREACH_ARRAY(i, crt->io_accounting_last, ELEMENTSOF(crt->io_accounting_last))
+                *i = UINT64_MAX;
+        FOREACH_ARRAY(i, crt->ip_accounting_extra, ELEMENTSOF(crt->ip_accounting_extra))
+                *i = UINT64_MAX;
+
+        return TAKE_PTR(crt);
+}
+
+CGroupRuntime *cgroup_runtime_free(CGroupRuntime *crt) {
+        if (!crt)
+                return NULL;
+
+        fdset_free(crt->initial_socket_bind_link_fds);
+#if BPF_FRAMEWORK
+        bpf_link_free(crt->ipv4_socket_bind_link);
+        bpf_link_free(crt->ipv6_socket_bind_link);
+#endif
+        hashmap_free(crt->bpf_foreign_by_key);
+
+        bpf_program_free(crt->bpf_device_control_installed);
+
+#if BPF_FRAMEWORK
+        bpf_link_free(crt->restrict_ifaces_ingress_bpf_link);
+        bpf_link_free(crt->restrict_ifaces_egress_bpf_link);
+#endif
+        fdset_free(crt->initial_restric_ifaces_link_fds);
+
+        safe_close(crt->ipv4_allow_map_fd);
+        safe_close(crt->ipv6_allow_map_fd);
+        safe_close(crt->ipv4_deny_map_fd);
+        safe_close(crt->ipv6_deny_map_fd);
+
+        bpf_program_free(crt->ip_bpf_ingress);
+        bpf_program_free(crt->ip_bpf_ingress_installed);
+        bpf_program_free(crt->ip_bpf_egress);
+        bpf_program_free(crt->ip_bpf_egress_installed);
+
+        set_free(crt->ip_bpf_custom_ingress);
+        set_free(crt->ip_bpf_custom_ingress_installed);
+        set_free(crt->ip_bpf_custom_egress);
+        set_free(crt->ip_bpf_custom_egress_installed);
+
+        fdset_free(crt->initial_socket_bind_link_fds);
+        fdset_free(crt->initial_restric_ifaces_link_fds);
+
+        free(crt->cgroup_path);
+
+        return mfree(crt);
+}
+
+static const char* const ip_accounting_metric_field_table[_CGROUP_IP_ACCOUNTING_METRIC_MAX] = {
+        [CGROUP_IP_INGRESS_BYTES]   = "ip-accounting-ingress-bytes",
+        [CGROUP_IP_INGRESS_PACKETS] = "ip-accounting-ingress-packets",
+        [CGROUP_IP_EGRESS_BYTES]    = "ip-accounting-egress-bytes",
+        [CGROUP_IP_EGRESS_PACKETS]  = "ip-accounting-egress-packets",
+};
+
+DEFINE_PRIVATE_STRING_TABLE_LOOKUP(ip_accounting_metric_field, CGroupIPAccountingMetric);
+
+static const char* const io_accounting_metric_field_base_table[_CGROUP_IO_ACCOUNTING_METRIC_MAX] = {
+        [CGROUP_IO_READ_BYTES]       = "io-accounting-read-bytes-base",
+        [CGROUP_IO_WRITE_BYTES]      = "io-accounting-write-bytes-base",
+        [CGROUP_IO_READ_OPERATIONS]  = "io-accounting-read-operations-base",
+        [CGROUP_IO_WRITE_OPERATIONS] = "io-accounting-write-operations-base",
+};
+
+DEFINE_PRIVATE_STRING_TABLE_LOOKUP(io_accounting_metric_field_base, CGroupIOAccountingMetric);
+
+static const char* const io_accounting_metric_field_last_table[_CGROUP_IO_ACCOUNTING_METRIC_MAX] = {
+        [CGROUP_IO_READ_BYTES]       = "io-accounting-read-bytes-last",
+        [CGROUP_IO_WRITE_BYTES]      = "io-accounting-write-bytes-last",
+        [CGROUP_IO_READ_OPERATIONS]  = "io-accounting-read-operations-last",
+        [CGROUP_IO_WRITE_OPERATIONS] = "io-accounting-write-operations-last",
+};
+
+DEFINE_PRIVATE_STRING_TABLE_LOOKUP(io_accounting_metric_field_last, CGroupIOAccountingMetric);
+
+static const char* const memory_accounting_metric_field_last_table[_CGROUP_MEMORY_ACCOUNTING_METRIC_CACHED_LAST + 1] = {
+        [CGROUP_MEMORY_PEAK]      = "memory-accounting-peak",
+        [CGROUP_MEMORY_SWAP_PEAK] = "memory-accounting-swap-peak",
+};
+
+DEFINE_PRIVATE_STRING_TABLE_LOOKUP(memory_accounting_metric_field_last, CGroupMemoryAccountingMetric);
+
+static int serialize_cgroup_mask(FILE *f, const char *key, CGroupMask mask) {
+        _cleanup_free_ char *s = NULL;
+        int r;
+
+        assert(f);
+        assert(key);
+
+        if (mask == 0)
+                return 0;
+
+        r = cg_mask_to_string(mask, &s);
+        if (r < 0)
+                return log_error_errno(r, "Failed to format cgroup mask: %m");
+
+        return serialize_item(f, key, s);
+}
+
+int cgroup_runtime_serialize(Unit *u, FILE *f, FDSet *fds) {
+        int r;
+
+        assert(u);
+        assert(f);
+        assert(fds);
+
+        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+        if (!crt)
+                return 0;
+
+        (void) serialize_item_format(f, "cpu-usage-base", "%" PRIu64, crt->cpu_usage_base);
+        if (crt->cpu_usage_last != NSEC_INFINITY)
+                (void) serialize_item_format(f, "cpu-usage-last", "%" PRIu64, crt->cpu_usage_last);
+
+        if (crt->managed_oom_kill_last > 0)
+                (void) serialize_item_format(f, "managed-oom-kill-last", "%" PRIu64, crt->managed_oom_kill_last);
+
+        if (crt->oom_kill_last > 0)
+                (void) serialize_item_format(f, "oom-kill-last", "%" PRIu64, crt->oom_kill_last);
+
+        for (CGroupMemoryAccountingMetric metric = 0; metric <= _CGROUP_MEMORY_ACCOUNTING_METRIC_CACHED_LAST; metric++) {
+                uint64_t v;
+
+                r = unit_get_memory_accounting(u, metric, &v);
+                if (r >= 0)
+                        (void) serialize_item_format(f, memory_accounting_metric_field_last_to_string(metric), "%" PRIu64, v);
+        }
+
+        for (CGroupIPAccountingMetric m = 0; m < _CGROUP_IP_ACCOUNTING_METRIC_MAX; m++) {
+                uint64_t v;
+
+                r = unit_get_ip_accounting(u, m, &v);
+                if (r >= 0)
+                        (void) serialize_item_format(f, ip_accounting_metric_field_to_string(m), "%" PRIu64, v);
+        }
+
+        for (CGroupIOAccountingMetric im = 0; im < _CGROUP_IO_ACCOUNTING_METRIC_MAX; im++) {
+                (void) serialize_item_format(f, io_accounting_metric_field_base_to_string(im), "%" PRIu64, crt->io_accounting_base[im]);
+
+                if (crt->io_accounting_last[im] != UINT64_MAX)
+                        (void) serialize_item_format(f, io_accounting_metric_field_last_to_string(im), "%" PRIu64, crt->io_accounting_last[im]);
+        }
+
+        if (crt->cgroup_path)
+                (void) serialize_item(f, "cgroup", crt->cgroup_path);
+        if (crt->cgroup_id != 0)
+                (void) serialize_item_format(f, "cgroup-id", "%" PRIu64, crt->cgroup_id);
+
+        (void) serialize_bool(f, "cgroup-realized", crt->cgroup_realized);
+        (void) serialize_cgroup_mask(f, "cgroup-realized-mask", crt->cgroup_realized_mask);
+        (void) serialize_cgroup_mask(f, "cgroup-enabled-mask", crt->cgroup_enabled_mask);
+        (void) serialize_cgroup_mask(f, "cgroup-invalidated-mask", crt->cgroup_invalidated_mask);
+
+        (void) bpf_socket_bind_serialize(u, f, fds);
+
+        (void) bpf_program_serialize_attachment(f, fds, "ip-bpf-ingress-installed", crt->ip_bpf_ingress_installed);
+        (void) bpf_program_serialize_attachment(f, fds, "ip-bpf-egress-installed", crt->ip_bpf_egress_installed);
+        (void) bpf_program_serialize_attachment(f, fds, "bpf-device-control-installed", crt->bpf_device_control_installed);
+        (void) bpf_program_serialize_attachment_set(f, fds, "ip-bpf-custom-ingress-installed", crt->ip_bpf_custom_ingress_installed);
+        (void) bpf_program_serialize_attachment_set(f, fds, "ip-bpf-custom-egress-installed", crt->ip_bpf_custom_egress_installed);
+
+        (void) bpf_restrict_ifaces_serialize(u, f, fds);
+
+        return 0;
+}
+
+#define MATCH_DESERIALIZE(u, key, l, v, parse_func, target)             \
+        ({                                                              \
+                bool _deserialize_matched = streq(l, key);              \
+                if (_deserialize_matched) {                             \
+                        CGroupRuntime *crt = unit_setup_cgroup_runtime(u); \
+                        if (!crt)                                       \
+                                log_oom_debug();                        \
+                        else {                                          \
+                                int _deserialize_r = parse_func(v);     \
+                                if (_deserialize_r < 0)                 \
+                                        log_unit_debug_errno(u, _deserialize_r, \
+                                                             "Failed to parse \"%s=%s\", ignoring.", l, v); \
+                                else                                    \
+                                        crt->target = _deserialize_r; \
+                        }                                               \
+                }                                                       \
+                _deserialize_matched;                                   \
+        })
+
+#define MATCH_DESERIALIZE_IMMEDIATE(u, key, l, v, parse_func, target)   \
+        ({                                                              \
+                 bool _deserialize_matched = streq(l, key);             \
+                 if (_deserialize_matched) {                            \
+                         CGroupRuntime *crt = unit_setup_cgroup_runtime(u); \
+                         if (!crt)                                      \
+                                 log_oom_debug();                       \
+                         else {                                         \
+                                 int _deserialize_r = parse_func(v, &crt->target); \
+                                 if (_deserialize_r < 0)                \
+                                         log_unit_debug_errno(u, _deserialize_r, \
+                                                              "Failed to parse \"%s=%s\", ignoring", l, v); \
+                         }                                              \
+                 }                                                      \
+                _deserialize_matched;                                   \
+        })
+
+#define MATCH_DESERIALIZE_METRIC(u, key, l, v, parse_func, target)             \
+        ({                                                              \
+                bool _deserialize_matched = streq(l, key);              \
+                if (_deserialize_matched) {                             \
+                        CGroupRuntime *crt = unit_setup_cgroup_runtime(u); \
+                        if (!crt)                                       \
+                                log_oom_debug();                        \
+                        else {                                          \
+                                int _deserialize_r = parse_func(v);     \
+                                if (_deserialize_r < 0)                 \
+                                        log_unit_debug_errno(u, _deserialize_r, \
+                                                             "Failed to parse \"%s=%s\", ignoring.", l, v); \
+                                else                                    \
+                                        crt->target = _deserialize_r; \
+                        }                                               \
+                }                                                       \
+                _deserialize_matched;                                   \
+        })
+
+int cgroup_runtime_deserialize_one(Unit *u, const char *key, const char *value, FDSet *fds) {
+        int r;
+
+        assert(u);
+        assert(value);
+
+        if (!UNIT_HAS_CGROUP_CONTEXT(u))
+                return 0;
+
+        if (MATCH_DESERIALIZE_IMMEDIATE(u, "cpu-usage-base", key, value, safe_atou64, cpu_usage_base) ||
+            MATCH_DESERIALIZE_IMMEDIATE(u, "cpuacct-usage-base", key, value, safe_atou64, cpu_usage_base))
+                return 1;
+
+        if (MATCH_DESERIALIZE_IMMEDIATE(u, "cpu-usage-last", key, value, safe_atou64, cpu_usage_last))
+                return 1;
+
+        if (MATCH_DESERIALIZE_IMMEDIATE(u, "managed-oom-kill-last", key, value, safe_atou64, managed_oom_kill_last))
+                return 1;
+
+        if (MATCH_DESERIALIZE_IMMEDIATE(u, "oom-kill-last", key, value, safe_atou64, oom_kill_last))
+                return 1;
+
+        if (streq(key, "cgroup")) {
+                r = unit_set_cgroup_path(u, value);
+                if (r < 0)
+                        log_unit_debug_errno(u, r, "Failed to set cgroup path %s, ignoring: %m", value);
+
+                (void) unit_watch_cgroup(u);
+                (void) unit_watch_cgroup_memory(u);
+                return 1;
+        }
+
+        if (MATCH_DESERIALIZE_IMMEDIATE(u, "cgroup-id", key, value, safe_atou64, cgroup_id))
+                return 1;
+
+        if (MATCH_DESERIALIZE(u, "cgroup-realized", key, value, parse_boolean, cgroup_realized))
+                return 1;
+
+        if (MATCH_DESERIALIZE_IMMEDIATE(u, "cgroup-realized-mask", key, value, cg_mask_from_string, cgroup_realized_mask))
+                return 1;
+
+        if (MATCH_DESERIALIZE_IMMEDIATE(u, "cgroup-enabled-mask", key, value, cg_mask_from_string, cgroup_enabled_mask))
+                return 1;
+
+        if (MATCH_DESERIALIZE_IMMEDIATE(u, "cgroup-invalidated-mask", key, value, cg_mask_from_string, cgroup_invalidated_mask))
+                return 1;
+
+        if (STR_IN_SET(key, "ipv4-socket-bind-bpf-link-fd", "ipv6-socket-bind-bpf-link-fd")) {
+                int fd;
+
+                fd = deserialize_fd(fds, value);
+                if (fd >= 0)
+                        (void) bpf_socket_bind_add_initial_link_fd(u, fd);
+
+                return 1;
+        }
+
+        if (STR_IN_SET(key,
+                       "ip-bpf-ingress-installed", "ip-bpf-egress-installed",
+                       "bpf-device-control-installed",
+                       "ip-bpf-custom-ingress-installed", "ip-bpf-custom-egress-installed")) {
+
+                CGroupRuntime *crt = unit_setup_cgroup_runtime(u);
+                if (!crt)
+                        log_oom_debug();
+                else {
+                        if (streq(key, "ip-bpf-ingress-installed"))
+                                (void) bpf_program_deserialize_attachment(value, fds, &crt->ip_bpf_ingress_installed);
+
+                        if (streq(key, "ip-bpf-egress-installed"))
+                                (void) bpf_program_deserialize_attachment(value, fds, &crt->ip_bpf_egress_installed);
+
+                        if (streq(key, "bpf-device-control-installed"))
+                                (void) bpf_program_deserialize_attachment(value, fds, &crt->bpf_device_control_installed);
+
+                        if (streq(key, "ip-bpf-custom-ingress-installed"))
+                                (void) bpf_program_deserialize_attachment_set(value, fds, &crt->ip_bpf_custom_ingress_installed);
+
+                        if (streq(key, "ip-bpf-custom-egress-installed"))
+                                (void) bpf_program_deserialize_attachment_set(value, fds, &crt->ip_bpf_custom_egress_installed);
+                }
+
+                return 1;
+        }
+
+        if (streq(key, "restrict-ifaces-bpf-fd")) {
+                int fd;
+
+                fd = deserialize_fd(fds, value);
+                if (fd >= 0)
+                        (void) bpf_restrict_ifaces_add_initial_link_fd(u, fd);
+                return 1;
+        }
+
+        CGroupMemoryAccountingMetric mm = memory_accounting_metric_field_last_from_string(key);
+        if (mm >= 0) {
+                uint64_t c;
+
+                r = safe_atou64(value, &c);
+                if (r < 0)
+                        log_unit_debug(u, "Failed to parse memory accounting last value %s, ignoring.", value);
+                else {
+                        CGroupRuntime *crt = unit_setup_cgroup_runtime(u);
+                        if (!crt)
+                                log_oom_debug();
+                        else
+                                crt->memory_accounting_last[mm] = c;
+                }
+
+                return 1;
+        }
+
+        CGroupIPAccountingMetric ipm = ip_accounting_metric_field_from_string(key);
+        if (ipm >= 0) {
+                uint64_t c;
+
+                r = safe_atou64(value, &c);
+                if (r < 0)
+                        log_unit_debug(u, "Failed to parse IP accounting value %s, ignoring.", value);
+                else {
+                        CGroupRuntime *crt = unit_setup_cgroup_runtime(u);
+                        if (!crt)
+                                log_oom_debug();
+                        else
+                                crt->ip_accounting_extra[ipm] = c;
+                }
+
+                return 1;
+        }
+
+        CGroupIOAccountingMetric iom = io_accounting_metric_field_base_from_string(key);
+        if (iom >= 0) {
+                uint64_t c;
+
+                r = safe_atou64(value, &c);
+                if (r < 0)
+                        log_unit_debug(u, "Failed to parse IO accounting base value %s, ignoring.", value);
+                else {
+                        CGroupRuntime *crt = unit_setup_cgroup_runtime(u);
+                        if (!crt)
+                                log_oom_debug();
+                        else
+                                crt->io_accounting_base[iom] = c;
+                }
+
+                return 1;
+        }
+
+        iom = io_accounting_metric_field_last_from_string(key);
+        if (iom >= 0) {
+                uint64_t c;
+
+                r = safe_atou64(value, &c);
+                if (r < 0)
+                        log_unit_debug(u, "Failed to parse IO accounting last value %s, ignoring.", value);
+                else {
+                        CGroupRuntime *crt = unit_setup_cgroup_runtime(u);
+                        if (!crt)
+                                log_oom_debug();
+                        else
+                                crt->io_accounting_last[iom] = c;
+                }
+                return 1;
+        }
+
+        return 0;
+}
+
 static const char* const cgroup_device_policy_table[_CGROUP_DEVICE_POLICY_MAX] = {
         [CGROUP_DEVICE_POLICY_AUTO]   = "auto",
         [CGROUP_DEVICE_POLICY_CLOSED] = "closed",
index 3ea3879bbcf35fb3f87c26708cc2a272de1e2846..54fb96981e1d4eb7797ac50b23200129cfa05e7f 100644 (file)
@@ -3,6 +3,9 @@
 
 #include <stdbool.h>
 
+#include "sd-event.h"
+
+#include "bpf-program.h"
 #include "bpf-restrict-fs.h"
 #include "cgroup-util.h"
 #include "cpu-set-util.h"
@@ -35,6 +38,7 @@ typedef struct CGroupBlockIODeviceWeight CGroupBlockIODeviceWeight;
 typedef struct CGroupBlockIODeviceBandwidth CGroupBlockIODeviceBandwidth;
 typedef struct CGroupBPFForeignProgram CGroupBPFForeignProgram;
 typedef struct CGroupSocketBindItem CGroupSocketBindItem;
+typedef struct CGroupRuntime CGroupRuntime;
 
 typedef enum CGroupDevicePolicy {
         /* When devices listed, will allow those, plus built-in ones, if none are listed will allow
@@ -131,7 +135,9 @@ typedef enum CGroupPressureWatch {
         _CGROUP_PRESSURE_WATCH_INVALID = -EINVAL,
 } CGroupPressureWatch;
 
-/* When adding members make sure to update cgroup_context_copy() accordingly */
+/* The user-supplied cgroup-related configuration options. This remains mostly immutable while the service
+ * manager is running (except for an occasional SetProperty() configuration change), outside of reload
+ * cycles. When adding members make sure to update cgroup_context_copy() accordingly. */
 struct CGroupContext {
         bool cpu_accounting;
         bool io_accounting;
@@ -288,6 +294,86 @@ typedef enum CGroupLimitType {
         _CGROUP_LIMIT_INVALID = -EINVAL,
 } CGroupLimitType;
 
+/* The dynamic, regular updated information about a unit that as a realized cgroup. This is only allocated when a unit is first realized */
+typedef struct CGroupRuntime {
+        /* Where the cpu.stat or cpuacct.usage was at the time the unit was started */
+        nsec_t cpu_usage_base;
+        nsec_t cpu_usage_last; /* the most recently read value */
+
+        /* Most recently read value of memory accounting metrics */
+        uint64_t memory_accounting_last[_CGROUP_MEMORY_ACCOUNTING_METRIC_CACHED_LAST + 1];
+
+        /* The current counter of OOM kills initiated by systemd-oomd */
+        uint64_t managed_oom_kill_last;
+
+        /* The current counter of the oom_kill field in the memory.events cgroup attribute */
+        uint64_t oom_kill_last;
+
+        /* Where the io.stat data was at the time the unit was started */
+        uint64_t io_accounting_base[_CGROUP_IO_ACCOUNTING_METRIC_MAX];
+        uint64_t io_accounting_last[_CGROUP_IO_ACCOUNTING_METRIC_MAX]; /* the most recently read value */
+
+        /* Counterparts in the cgroup filesystem */
+        char *cgroup_path;
+        uint64_t cgroup_id;
+        CGroupMask cgroup_realized_mask;           /* In which hierarchies does this unit's cgroup exist? (only relevant on cgroup v1) */
+        CGroupMask cgroup_enabled_mask;            /* Which controllers are enabled (or more correctly: enabled for the children) for this unit's cgroup? (only relevant on cgroup v2) */
+        CGroupMask cgroup_invalidated_mask;        /* A mask specifying controllers which shall be considered invalidated, and require re-realization */
+        CGroupMask cgroup_members_mask;            /* A cache for the controllers required by all children of this cgroup (only relevant for slice units) */
+
+        /* Inotify watch descriptors for watching cgroup.events and memory.events on cgroupv2 */
+        int cgroup_control_inotify_wd;
+        int cgroup_memory_inotify_wd;
+
+        /* Device Controller BPF program */
+        BPFProgram *bpf_device_control_installed;
+
+        /* IP BPF Firewalling/accounting */
+        int ip_accounting_ingress_map_fd;
+        int ip_accounting_egress_map_fd;
+        uint64_t ip_accounting_extra[_CGROUP_IP_ACCOUNTING_METRIC_MAX];
+
+        int ipv4_allow_map_fd;
+        int ipv6_allow_map_fd;
+        int ipv4_deny_map_fd;
+        int ipv6_deny_map_fd;
+        BPFProgram *ip_bpf_ingress, *ip_bpf_ingress_installed;
+        BPFProgram *ip_bpf_egress, *ip_bpf_egress_installed;
+
+        Set *ip_bpf_custom_ingress;
+        Set *ip_bpf_custom_ingress_installed;
+        Set *ip_bpf_custom_egress;
+        Set *ip_bpf_custom_egress_installed;
+
+        /* BPF programs managed (e.g. loaded to kernel) by an entity external to systemd,
+         * attached to unit cgroup by provided program fd and attach type. */
+        Hashmap *bpf_foreign_by_key;
+
+        FDSet *initial_socket_bind_link_fds;
+#if BPF_FRAMEWORK
+        /* BPF links to BPF programs attached to cgroup/bind{4|6} hooks and
+         * responsible for allowing or denying a unit to bind(2) to a socket
+         * address. */
+        struct bpf_link *ipv4_socket_bind_link;
+        struct bpf_link *ipv6_socket_bind_link;
+#endif
+
+        FDSet *initial_restric_ifaces_link_fds;
+#if BPF_FRAMEWORK
+        struct bpf_link *restrict_ifaces_ingress_bpf_link;
+        struct bpf_link *restrict_ifaces_egress_bpf_link;
+#endif
+
+        bool cgroup_realized:1;
+        bool cgroup_members_mask_valid:1;
+
+        /* Reset cgroup accounting next time we fork something off */
+        bool reset_accounting:1;
+
+        /* Whether we warned about clamping the CPU quota period */
+        bool warned_clamping_cpu_quota_period:1;
+} CGroupRuntime;
+
 typedef struct Unit Unit;
 typedef struct Manager Manager;
 typedef enum ManagerState ManagerState;
@@ -360,6 +446,7 @@ int unit_watch_cgroup(Unit *u);
 int unit_watch_cgroup_memory(Unit *u);
 void unit_add_to_cgroup_realize_queue(Unit *u);
 
+int unit_cgroup_is_empty(Unit *u);
 void unit_release_cgroup(Unit *u);
 /* Releases the cgroup only if it is recursively empty.
  * Returns true if the cgroup was released, false otherwise. */
@@ -435,6 +522,16 @@ bool unit_cgroup_delegate(Unit *u);
 int unit_get_cpuset(Unit *u, CPUSet *cpus, const char *name);
 int unit_cgroup_freezer_action(Unit *u, FreezerAction action);
 
+const char* freezer_action_to_string(FreezerAction a) _const_;
+FreezerAction freezer_action_from_string(const char *s) _pure_;
+
+CGroupRuntime *cgroup_runtime_new(void);
+CGroupRuntime *cgroup_runtime_free(CGroupRuntime *crt);
+DEFINE_TRIVIAL_CLEANUP_FUNC(CGroupRuntime*, cgroup_runtime_free);
+
+int cgroup_runtime_serialize(Unit *u, FILE *f, FDSet *fds);
+int cgroup_runtime_deserialize_one(Unit *u, const char *key, const char *value, FDSet *fds);
+
 const char* cgroup_pressure_watch_to_string(CGroupPressureWatch a) _const_;
 CGroupPressureWatch cgroup_pressure_watch_from_string(const char *s) _pure_;
 
index 10650421062a500412678b62f6004ae648132ffd..59aae43671d4a9a3cb13224e1053e0b7c3ca4fda 100644 (file)
@@ -69,6 +69,10 @@ static int build_managed_oom_json_array_element(Unit *u, const char *property, J
         if (!c)
                 return -EINVAL;
 
+        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+        if (!crt)
+                return -EINVAL;
+
         if (UNIT_IS_INACTIVE_OR_FAILED(unit_active_state(u)))
                 /* systemd-oomd should always treat inactive units as though they didn't enable any action since they
                  * should not have a valid cgroup */
@@ -83,19 +87,24 @@ static int build_managed_oom_json_array_element(Unit *u, const char *property, J
 
         return json_build(ret_v, JSON_BUILD_OBJECT(
                                  JSON_BUILD_PAIR("mode", JSON_BUILD_STRING(mode)),
-                                 JSON_BUILD_PAIR("path", JSON_BUILD_STRING(u->cgroup_path)),
+                                 JSON_BUILD_PAIR("path", JSON_BUILD_STRING(crt->cgroup_path)),
                                  JSON_BUILD_PAIR("property", JSON_BUILD_STRING(property)),
                                  JSON_BUILD_PAIR_CONDITION(use_limit, "limit", JSON_BUILD_UNSIGNED(c->moom_mem_pressure_limit))));
 }
 
 int manager_varlink_send_managed_oom_update(Unit *u) {
         _cleanup_(json_variant_unrefp) JsonVariant *arr = NULL, *v = NULL;
+        CGroupRuntime *crt;
         CGroupContext *c;
         int r;
 
         assert(u);
 
-        if (!UNIT_VTABLE(u)->can_set_managed_oom || !u->manager || !u->cgroup_path)
+        if (!UNIT_VTABLE(u)->can_set_managed_oom || !u->manager)
+                return 0;
+
+        crt = unit_get_cgroup_runtime(u);
+        if (!crt || !crt->cgroup_path)
                 return 0;
 
         if (MANAGER_IS_SYSTEM(u->manager)) {
index bba38bddc9b3929faed485669c12a1d057b613d6..9afc0827e667b8616c27948bd857b45daf9cfbe5 100644 (file)
@@ -1300,7 +1300,9 @@ int bus_cgroup_set_property(
 
                 if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
                         c->cpu_quota_per_sec_usec = u64;
-                        u->warned_clamping_cpu_quota_period = false;
+                        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+                        if (crt)
+                                crt->warned_clamping_cpu_quota_period = false;
                         unit_invalidate_cgroup(u, CGROUP_MASK_CPU);
 
                         if (c->cpu_quota_per_sec_usec == USEC_INFINITY)
@@ -1324,7 +1326,9 @@ int bus_cgroup_set_property(
 
                 if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
                         c->cpu_quota_period_usec = u64;
-                        u->warned_clamping_cpu_quota_period = false;
+                        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+                        if (crt)
+                                crt->warned_clamping_cpu_quota_period = false;
                         unit_invalidate_cgroup(u, CGROUP_MASK_CPU);
                         if (c->cpu_quota_period_usec == USEC_INFINITY)
                                 unit_write_setting(u, flags, "CPUQuotaPeriodSec", "CPUQuotaPeriodSec=");
index 11a52a25353ad06338b3c73971d6edde3c4fdb4b..eda9f6c74e18530be2b56d3728394789809811d6 100644 (file)
@@ -1216,12 +1216,32 @@ static int property_get_cgroup(
          * indicates the root cgroup, which we report as "/". c) all
          * other cases we report as-is. */
 
-        if (u->cgroup_path)
-                t = empty_to_root(u->cgroup_path);
+        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+
+        if (crt && crt->cgroup_path)
+                t = empty_to_root(crt->cgroup_path);
 
         return sd_bus_message_append(reply, "s", t);
 }
 
+static int property_get_cgroup_id(
+                sd_bus *bus,
+                const char *path,
+                const char *interface,
+                const char *property,
+                sd_bus_message *reply,
+                void *userdata,
+                sd_bus_error *error) {
+
+        Unit *u = ASSERT_PTR(userdata);
+
+        assert(bus);
+        assert(reply);
+
+        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+        return sd_bus_message_append(reply, "t", crt ? crt->cgroup_id : 0);
+}
+
 static int append_process(sd_bus_message *reply, const char *p, PidRef *pid, Set *pids) {
         _cleanup_free_ char *buf = NULL, *cmdline = NULL;
         int r;
@@ -1350,8 +1370,10 @@ int bus_unit_method_get_processes(sd_bus_message *message, void *userdata, sd_bu
         if (r < 0)
                 return r;
 
-        if (u->cgroup_path) {
-                r = append_cgroup(reply, u->cgroup_path, pids);
+        CGroupRuntime *crt;
+        crt = unit_get_cgroup_runtime(u);
+        if (crt && crt->cgroup_path) {
+                r = append_cgroup(reply, crt->cgroup_path, pids);
                 if (r < 0)
                         return r;
         }
@@ -1558,7 +1580,7 @@ const sd_bus_vtable bus_unit_cgroup_vtable[] = {
         SD_BUS_VTABLE_START(0),
         SD_BUS_PROPERTY("Slice", "s", property_get_slice, 0, 0),
         SD_BUS_PROPERTY("ControlGroup", "s", property_get_cgroup, 0, 0),
-        SD_BUS_PROPERTY("ControlGroupId", "t", NULL, offsetof(Unit, cgroup_id), 0),
+        SD_BUS_PROPERTY("ControlGroupId", "t", property_get_cgroup_id, 0, 0),
         SD_BUS_PROPERTY("MemoryCurrent", "t", property_get_current_memory, 0, 0),
         SD_BUS_PROPERTY("MemoryPeak", "t", property_get_memory_accounting, 0, 0),
         SD_BUS_PROPERTY("MemorySwapCurrent", "t", property_get_memory_accounting, 0, 0),
index 1fa2c2f9e2e5aac4f068c5fb63eb4bb396a487f6..ecd8172fb457d9f67ef54d56f1cd381cec17872d 100644 (file)
@@ -240,6 +240,7 @@ static void mount_done(Unit *u) {
         mount_parameters_done(&m->parameters_fragment);
 
         m->exec_runtime = exec_runtime_free(m->exec_runtime);
+
         exec_command_done_array(m->exec_command, _MOUNT_EXEC_COMMAND_MAX);
         m->control_command = NULL;
 
@@ -815,8 +816,10 @@ static int mount_coldplug(Unit *u) {
                         return r;
         }
 
-        if (!IN_SET(m->deserialized_state, MOUNT_DEAD, MOUNT_FAILED))
+        if (!IN_SET(m->deserialized_state, MOUNT_DEAD, MOUNT_FAILED)) {
                 (void) unit_setup_exec_runtime(u);
+                (void) unit_setup_cgroup_runtime(u);
+        }
 
         mount_set_state(m, m->deserialized_state);
         return 0;
@@ -1332,7 +1335,9 @@ static void mount_cycle_clear(Mount *m) {
         m->result = MOUNT_SUCCESS;
         m->reload_result = MOUNT_SUCCESS;
         exec_command_reset_status_array(m->exec_command, _MOUNT_EXEC_COMMAND_MAX);
-        UNIT(m)->reset_accounting = true;
+
+        if (m->cgroup_runtime)
+                m->cgroup_runtime->reset_accounting = true;
 }
 
 static int mount_start(Unit *u) {
@@ -2448,6 +2453,7 @@ const UnitVTable mount_vtable = {
         .cgroup_context_offset = offsetof(Mount, cgroup_context),
         .kill_context_offset = offsetof(Mount, kill_context),
         .exec_runtime_offset = offsetof(Mount, exec_runtime),
+        .cgroup_runtime_offset = offsetof(Mount, cgroup_runtime),
 
         .sections =
                 "Unit\0"
index 6712c16811f226d7ca030fc6cb7ba489ff8e2558..a029dc87aca36473aa84787aebfd79c9994c33da 100644 (file)
@@ -79,6 +79,7 @@ struct Mount {
         CGroupContext cgroup_context;
 
         ExecRuntime *exec_runtime;
+        CGroupRuntime *cgroup_runtime;
 
         MountState state, deserialized_state;
 
index 3f950e0c9efd761e56b9adba796518b0ccfeaf0e..419370590956f7cd7367f8c3b4f5109d10d3bea3 100644 (file)
@@ -353,6 +353,9 @@ static int scope_enter_start_chown(Scope *s) {
         assert(s);
         assert(s->user);
 
+        if (!s->cgroup_runtime)
+                return -EINVAL;
+
         r = scope_arm_timer(s, /* relative= */ true, u->manager->defaults.timeout_start_usec);
         if (r < 0)
                 return r;
@@ -385,7 +388,7 @@ static int scope_enter_start_chown(Scope *s) {
                         }
                 }
 
-                r = cg_set_access(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, uid, gid);
+                r = cg_set_access(SYSTEMD_CGROUP_CONTROLLER, s->cgroup_runtime->cgroup_path, uid, gid);
                 if (r < 0) {
                         log_unit_error_errno(UNIT(s), r, "Failed to adjust control group access: %m");
                         _exit(EXIT_CGROUP);
@@ -776,6 +779,7 @@ const UnitVTable scope_vtable = {
         .object_size = sizeof(Scope),
         .cgroup_context_offset = offsetof(Scope, cgroup_context),
         .kill_context_offset = offsetof(Scope, kill_context),
+        .cgroup_runtime_offset = offsetof(Scope, cgroup_runtime),
 
         .sections =
                 "Unit\0"
index c9574a32c2abc9e98724baf9e1ac109e91e5f346..1090431c13e117eef1a85214d7bf4840246ebb74 100644 (file)
@@ -21,6 +21,7 @@ struct Scope {
 
         CGroupContext cgroup_context;
         KillContext kill_context;
+        CGroupRuntime *cgroup_runtime;
 
         ScopeState state, deserialized_state;
         ScopeResult result;
index 7809c6a5119f5271767c2335a658dc8d32494dae..d017515c9299dca0a0e1d094761a8382420fb97c 100644 (file)
@@ -460,6 +460,7 @@ static void service_done(Unit *u) {
         s->status_text = mfree(s->status_text);
 
         s->exec_runtime = exec_runtime_free(s->exec_runtime);
+
         exec_command_free_array(s->exec_command, _SERVICE_EXEC_COMMAND_MAX);
         s->control_command = NULL;
         s->main_command = NULL;
@@ -1345,6 +1346,7 @@ static int service_coldplug(Unit *u) {
                     SERVICE_DEAD_RESOURCES_PINNED)) {
                 (void) unit_enqueue_rewatch_pids(u);
                 (void) unit_setup_exec_runtime(u);
+                (void) unit_setup_cgroup_runtime(u);
         }
 
         if (IN_SET(s->deserialized_state, SERVICE_START_POST, SERVICE_RUNNING, SERVICE_RELOAD, SERVICE_RELOAD_SIGNAL, SERVICE_RELOAD_NOTIFY))
@@ -1885,10 +1887,10 @@ static int cgroup_good(Service *s) {
         /* Returns 0 if the cgroup is empty or doesn't exist, > 0 if it is exists and is populated, < 0 if we can't
          * figure it out */
 
-        if (!UNIT(s)->cgroup_path)
+        if (!s->cgroup_runtime || !s->cgroup_runtime->cgroup_path)
                 return 0;
 
-        r = cg_is_empty_recursive(SYSTEMD_CGROUP_CONTROLLER, UNIT(s)->cgroup_path);
+        r = cg_is_empty_recursive(SYSTEMD_CGROUP_CONTROLLER, s->cgroup_runtime->cgroup_path);
         if (r < 0)
                 return r;
 
@@ -2773,7 +2775,9 @@ static int service_start(Unit *u) {
                 s->flush_n_restarts = false;
         }
 
-        u->reset_accounting = true;
+        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+        if (crt)
+                crt->reset_accounting = true;
 
         service_enter_condition(s);
         return 1;
@@ -5149,6 +5153,7 @@ const UnitVTable service_vtable = {
         .cgroup_context_offset = offsetof(Service, cgroup_context),
         .kill_context_offset = offsetof(Service, kill_context),
         .exec_runtime_offset = offsetof(Service, exec_runtime),
+        .cgroup_runtime_offset = offsetof(Service, cgroup_runtime),
 
         .sections =
                 "Unit\0"
index 364cd6885b8759b9f6d2fc0553fd5985ffdf2f33..59598f77de66f9e32a694d5eb2000b5120eec9b9 100644 (file)
@@ -168,6 +168,8 @@ struct Service {
         /* Runtime data of the execution context */
         ExecRuntime *exec_runtime;
 
+        CGroupRuntime *cgroup_runtime;
+
         PidRef main_pid, control_pid;
 
         /* if we are a socket activated service instance, store information of the connection/peer/socket */
index e9b2683912c61cf011e8de13e27a0430f32622a6..c38f326cba419f7f8dee4ac8c6b67586115ccf73 100644 (file)
@@ -399,6 +399,7 @@ static int slice_freezer_action(Unit *s, FreezerAction action) {
 const UnitVTable slice_vtable = {
         .object_size = sizeof(Slice),
         .cgroup_context_offset = offsetof(Slice, cgroup_context),
+        .cgroup_runtime_offset = offsetof(Slice, cgroup_runtime),
 
         .sections =
                 "Unit\0"
index e2f92746556c8da3cb8c5af8f45801cd9fd3a10a..004349dc4fb5cb8c761a80222bf1af9935ff653d 100644 (file)
@@ -11,6 +11,8 @@ struct Slice {
         SliceState state, deserialized_state;
 
         CGroupContext cgroup_context;
+
+        CGroupRuntime *cgroup_runtime;
 };
 
 extern const UnitVTable slice_vtable;
index 0ccbfa0090002755c0d86665a4943caaa2c1c7f3..9f9de2918791f79edee70b0b8e47f3677cebeb65 100644 (file)
@@ -167,6 +167,7 @@ static void socket_done(Unit *u) {
         s->peers_by_address = set_free(s->peers_by_address);
 
         s->exec_runtime = exec_runtime_free(s->exec_runtime);
+
         exec_command_free_array(s->exec_command, _SOCKET_EXEC_COMMAND_MAX);
         s->control_command = NULL;
 
@@ -2473,7 +2474,8 @@ static int socket_start(Unit *u) {
         s->result = SOCKET_SUCCESS;
         exec_command_reset_status_list_array(s->exec_command, _SOCKET_EXEC_COMMAND_MAX);
 
-        u->reset_accounting = true;
+        if (s->cgroup_runtime)
+                s->cgroup_runtime->reset_accounting = true;
 
         socket_enter_start_pre(s);
         return 1;
@@ -3528,6 +3530,7 @@ const UnitVTable socket_vtable = {
         .cgroup_context_offset = offsetof(Socket, cgroup_context),
         .kill_context_offset = offsetof(Socket, kill_context),
         .exec_runtime_offset = offsetof(Socket, exec_runtime),
+        .cgroup_runtime_offset = offsetof(Socket, cgroup_runtime),
 
         .sections =
                 "Unit\0"
index 0983e8c9d0f869a5a63fa0a099e022a140f01591..5efe01d2bf07ad2797f3b203786b6c277a1dafbe 100644 (file)
@@ -92,6 +92,7 @@ struct Socket {
         CGroupContext cgroup_context;
 
         ExecRuntime *exec_runtime;
+        CGroupRuntime *cgroup_runtime;
 
         /* For Accept=no sockets refers to the one service we'll
          * activate. For Accept=yes sockets is either NULL, or filled
index 4faee7abec2291ea0855e0decd5640221ef5dc75..709671561b757c1b3704fc160ca036d8b2b69936 100644 (file)
@@ -168,6 +168,7 @@ static void swap_done(Unit *u) {
         s->parameters_fragment.options = mfree(s->parameters_fragment.options);
 
         s->exec_runtime = exec_runtime_free(s->exec_runtime);
+
         exec_command_done_array(s->exec_command, _SWAP_EXEC_COMMAND_MAX);
         s->control_command = NULL;
 
@@ -564,8 +565,10 @@ static int swap_coldplug(Unit *u) {
                         return r;
         }
 
-        if (!IN_SET(new_state, SWAP_DEAD, SWAP_FAILED))
+        if (!IN_SET(new_state, SWAP_DEAD, SWAP_FAILED)) {
                 (void) unit_setup_exec_runtime(u);
+                (void) unit_setup_cgroup_runtime(u);
+        }
 
         swap_set_state(s, new_state);
         return 0;
@@ -854,7 +857,9 @@ static void swap_cycle_clear(Swap *s) {
 
         s->result = SWAP_SUCCESS;
         exec_command_reset_status_array(s->exec_command, _SWAP_EXEC_COMMAND_MAX);
-        UNIT(s)->reset_accounting = true;
+
+        if (s->cgroup_runtime)
+                s->cgroup_runtime->reset_accounting = true;
 }
 
 static int swap_start(Unit *u) {
@@ -1589,6 +1594,7 @@ const UnitVTable swap_vtable = {
         .cgroup_context_offset = offsetof(Swap, cgroup_context),
         .kill_context_offset = offsetof(Swap, kill_context),
         .exec_runtime_offset = offsetof(Swap, exec_runtime),
+        .cgroup_runtime_offset = offsetof(Swap, cgroup_runtime),
 
         .sections =
                 "Unit\0"
index ef20f0f764796f6b9b34a7de6866c08bce4dc680..d9bbd377f0e947208510af7b78e8c06d0e043b15 100644 (file)
@@ -70,6 +70,7 @@ struct Swap {
         CGroupContext cgroup_context;
 
         ExecRuntime *exec_runtime;
+        CGroupRuntime *cgroup_runtime;
 
         SwapState state, deserialized_state;
 
index be4cb139e9b8e6f6bf1500e9bb625a99b460c9bb..40267531d12b9fb9bcdf939d980693b387dc86c2 100644 (file)
@@ -87,13 +87,14 @@ static void bad_specifier(const Unit *u, char specifier) {
 
 static int specifier_cgroup(char specifier, const void *data, const char *root, const void *userdata, char **ret) {
         const Unit *u = ASSERT_PTR(userdata);
+        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
 
         bad_specifier(u, specifier);
 
-        if (u->cgroup_path) {
+        if (crt && crt->cgroup_path) {
                 char *n;
 
-                n = strdup(u->cgroup_path);
+                n = strdup(crt->cgroup_path);
                 if (!n)
                         return -ENOMEM;
 
@@ -126,8 +127,10 @@ static int specifier_cgroup_slice(char specifier, const void *data, const char *
 
         slice = UNIT_GET_SLICE(u);
         if (slice) {
-                if (slice->cgroup_path)
-                        n = strdup(slice->cgroup_path);
+                CGroupRuntime *crt = unit_get_cgroup_runtime(slice);
+
+                if (crt && crt->cgroup_path)
+                        n = strdup(crt->cgroup_path);
                 else
                         return unit_default_cgroup_path(slice, ret);
         } else
index 297bf771b799bc358274cacff5eda8e6289e1b38..9a4e71713551b3de63f063eb9ed4554a03f6904c 100644 (file)
 #include "unit-serialize.h"
 #include "user-util.h"
 
-static int serialize_cgroup_mask(FILE *f, const char *key, CGroupMask mask) {
-        _cleanup_free_ char *s = NULL;
-        int r;
-
-        assert(f);
-        assert(key);
-
-        if (mask == 0)
-                return 0;
-
-        r = cg_mask_to_string(mask, &s);
-        if (r < 0)
-                return log_error_errno(r, "Failed to format cgroup mask: %m");
-
-        return serialize_item(f, key, s);
-}
-
 /* Make sure out values fit in the bitfield. */
 assert_cc(_UNIT_MARKER_MAX <= sizeof(((Unit){}).markers) * 8);
 
@@ -69,40 +52,6 @@ static int deserialize_markers(Unit *u, const char *value) {
         }
 }
 
-static const char* const ip_accounting_metric_field_table[_CGROUP_IP_ACCOUNTING_METRIC_MAX] = {
-        [CGROUP_IP_INGRESS_BYTES]   = "ip-accounting-ingress-bytes",
-        [CGROUP_IP_INGRESS_PACKETS] = "ip-accounting-ingress-packets",
-        [CGROUP_IP_EGRESS_BYTES]    = "ip-accounting-egress-bytes",
-        [CGROUP_IP_EGRESS_PACKETS]  = "ip-accounting-egress-packets",
-};
-
-DEFINE_PRIVATE_STRING_TABLE_LOOKUP(ip_accounting_metric_field, CGroupIPAccountingMetric);
-
-static const char* const io_accounting_metric_field_base_table[_CGROUP_IO_ACCOUNTING_METRIC_MAX] = {
-        [CGROUP_IO_READ_BYTES]       = "io-accounting-read-bytes-base",
-        [CGROUP_IO_WRITE_BYTES]      = "io-accounting-write-bytes-base",
-        [CGROUP_IO_READ_OPERATIONS]  = "io-accounting-read-operations-base",
-        [CGROUP_IO_WRITE_OPERATIONS] = "io-accounting-write-operations-base",
-};
-
-DEFINE_PRIVATE_STRING_TABLE_LOOKUP(io_accounting_metric_field_base, CGroupIOAccountingMetric);
-
-static const char* const io_accounting_metric_field_last_table[_CGROUP_IO_ACCOUNTING_METRIC_MAX] = {
-        [CGROUP_IO_READ_BYTES]       = "io-accounting-read-bytes-last",
-        [CGROUP_IO_WRITE_BYTES]      = "io-accounting-write-bytes-last",
-        [CGROUP_IO_READ_OPERATIONS]  = "io-accounting-read-operations-last",
-        [CGROUP_IO_WRITE_OPERATIONS] = "io-accounting-write-operations-last",
-};
-
-DEFINE_PRIVATE_STRING_TABLE_LOOKUP(io_accounting_metric_field_last, CGroupIOAccountingMetric);
-
-static const char* const memory_accounting_metric_field_last_table[_CGROUP_MEMORY_ACCOUNTING_METRIC_CACHED_LAST + 1] = {
-        [CGROUP_MEMORY_PEAK]      = "memory-accounting-peak",
-        [CGROUP_MEMORY_SWAP_PEAK] = "memory-accounting-swap-peak",
-};
-
-DEFINE_PRIVATE_STRING_TABLE_LOOKUP(memory_accounting_metric_field_last, CGroupMemoryAccountingMetric);
-
 int unit_serialize_state(Unit *u, FILE *f, FDSet *fds, bool switching_root) {
         int r;
 
@@ -158,48 +107,7 @@ int unit_serialize_state(Unit *u, FILE *f, FDSet *fds, bool switching_root) {
         (void) serialize_bool(f, "exported-log-rate-limit-interval", u->exported_log_ratelimit_interval);
         (void) serialize_bool(f, "exported-log-rate-limit-burst", u->exported_log_ratelimit_burst);
 
-        (void) serialize_item_format(f, "cpu-usage-base", "%" PRIu64, u->cpu_usage_base);
-        if (u->cpu_usage_last != NSEC_INFINITY)
-                (void) serialize_item_format(f, "cpu-usage-last", "%" PRIu64, u->cpu_usage_last);
-
-        if (u->managed_oom_kill_last > 0)
-                (void) serialize_item_format(f, "managed-oom-kill-last", "%" PRIu64, u->managed_oom_kill_last);
-
-        if (u->oom_kill_last > 0)
-                (void) serialize_item_format(f, "oom-kill-last", "%" PRIu64, u->oom_kill_last);
-
-        for (CGroupIOAccountingMetric im = 0; im < _CGROUP_IO_ACCOUNTING_METRIC_MAX; im++) {
-                (void) serialize_item_format(f, io_accounting_metric_field_base_to_string(im), "%" PRIu64, u->io_accounting_base[im]);
-
-                if (u->io_accounting_last[im] != UINT64_MAX)
-                        (void) serialize_item_format(f, io_accounting_metric_field_last_to_string(im), "%" PRIu64, u->io_accounting_last[im]);
-        }
-
-        for (CGroupMemoryAccountingMetric metric = 0; metric <= _CGROUP_MEMORY_ACCOUNTING_METRIC_CACHED_LAST; metric++) {
-                uint64_t v;
-
-                r = unit_get_memory_accounting(u, metric, &v);
-                if (r >= 0)
-                        (void) serialize_item_format(f, memory_accounting_metric_field_last_to_string(metric), "%" PRIu64, v);
-        }
-
-        if (u->cgroup_path)
-                (void) serialize_item(f, "cgroup", u->cgroup_path);
-
-        (void) serialize_bool(f, "cgroup-realized", u->cgroup_realized);
-        (void) serialize_cgroup_mask(f, "cgroup-realized-mask", u->cgroup_realized_mask);
-        (void) serialize_cgroup_mask(f, "cgroup-enabled-mask", u->cgroup_enabled_mask);
-        (void) serialize_cgroup_mask(f, "cgroup-invalidated-mask", u->cgroup_invalidated_mask);
-
-        (void) bpf_socket_bind_serialize(u, f, fds);
-
-        (void) bpf_program_serialize_attachment(f, fds, "ip-bpf-ingress-installed", u->ip_bpf_ingress_installed);
-        (void) bpf_program_serialize_attachment(f, fds, "ip-bpf-egress-installed", u->ip_bpf_egress_installed);
-        (void) bpf_program_serialize_attachment(f, fds, "bpf-device-control-installed", u->bpf_device_control_installed);
-        (void) bpf_program_serialize_attachment_set(f, fds, "ip-bpf-custom-ingress-installed", u->ip_bpf_custom_ingress_installed);
-        (void) bpf_program_serialize_attachment_set(f, fds, "ip-bpf-custom-egress-installed", u->ip_bpf_custom_egress_installed);
-
-        (void) bpf_restrict_ifaces_serialize(u, f, fds);
+        (void) cgroup_runtime_serialize(u, f, fds);
 
         if (uid_is_valid(u->ref_uid))
                 (void) serialize_item_format(f, "ref-uid", UID_FMT, u->ref_uid);
@@ -214,14 +122,6 @@ int unit_serialize_state(Unit *u, FILE *f, FDSet *fds, bool switching_root) {
 
         bus_track_serialize(u->bus_track, f, "ref");
 
-        for (CGroupIPAccountingMetric m = 0; m < _CGROUP_IP_ACCOUNTING_METRIC_MAX; m++) {
-                uint64_t v;
-
-                r = unit_get_ip_accounting(u, m, &v);
-                if (r >= 0)
-                        (void) serialize_item_format(f, ip_accounting_metric_field_to_string(m), "%" PRIu64, v);
-        }
-
         if (!switching_root) {
                 if (u->job) {
                         fputs("job\n", f);
@@ -297,7 +197,6 @@ int unit_deserialize_state(Unit *u, FILE *f, FDSet *fds) {
 
         for (;;) {
                 _cleanup_free_ char *l  = NULL;
-                ssize_t m;
                 size_t k;
                 char *v;
 
@@ -380,76 +279,7 @@ int unit_deserialize_state(Unit *u, FILE *f, FDSet *fds) {
                 else if (MATCH_DESERIALIZE("exported-log-rate-limit-burst", l, v, parse_boolean, u->exported_log_ratelimit_burst))
                         continue;
 
-                else if (MATCH_DESERIALIZE_IMMEDIATE("cpu-usage-base", l, v, safe_atou64, u->cpu_usage_base) ||
-                         MATCH_DESERIALIZE_IMMEDIATE("cpuacct-usage-base", l, v, safe_atou64, u->cpu_usage_base))
-                        continue;
-
-                else if (MATCH_DESERIALIZE_IMMEDIATE("cpu-usage-last", l, v, safe_atou64, u->cpu_usage_last))
-                        continue;
-
-                else if (MATCH_DESERIALIZE_IMMEDIATE("managed-oom-kill-last", l, v, safe_atou64, u->managed_oom_kill_last))
-                        continue;
-
-                else if (MATCH_DESERIALIZE_IMMEDIATE("oom-kill-last", l, v, safe_atou64, u->oom_kill_last))
-                        continue;
-
-                else if (streq(l, "cgroup")) {
-                        r = unit_set_cgroup_path(u, v);
-                        if (r < 0)
-                                log_unit_debug_errno(u, r, "Failed to set cgroup path %s, ignoring: %m", v);
-
-                        (void) unit_watch_cgroup(u);
-                        (void) unit_watch_cgroup_memory(u);
-
-                        continue;
-
-                } else if (MATCH_DESERIALIZE("cgroup-realized", l, v, parse_boolean, u->cgroup_realized))
-                        continue;
-
-                else if (MATCH_DESERIALIZE_IMMEDIATE("cgroup-realized-mask", l, v, cg_mask_from_string, u->cgroup_realized_mask))
-                        continue;
-
-                else if (MATCH_DESERIALIZE_IMMEDIATE("cgroup-enabled-mask", l, v, cg_mask_from_string, u->cgroup_enabled_mask))
-                        continue;
-
-                else if (MATCH_DESERIALIZE_IMMEDIATE("cgroup-invalidated-mask", l, v, cg_mask_from_string, u->cgroup_invalidated_mask))
-                        continue;
-
-                else if (STR_IN_SET(l, "ipv4-socket-bind-bpf-link-fd", "ipv6-socket-bind-bpf-link-fd")) {
-                        int fd;
-
-                        fd = deserialize_fd(fds, v);
-                        if (fd >= 0)
-                                (void) bpf_socket_bind_add_initial_link_fd(u, fd);
-                        continue;
-
-                } else if (streq(l, "ip-bpf-ingress-installed")) {
-                         (void) bpf_program_deserialize_attachment(v, fds, &u->ip_bpf_ingress_installed);
-                         continue;
-                } else if (streq(l, "ip-bpf-egress-installed")) {
-                         (void) bpf_program_deserialize_attachment(v, fds, &u->ip_bpf_egress_installed);
-                         continue;
-                } else if (streq(l, "bpf-device-control-installed")) {
-                         (void) bpf_program_deserialize_attachment(v, fds, &u->bpf_device_control_installed);
-                         continue;
-
-                } else if (streq(l, "ip-bpf-custom-ingress-installed")) {
-                         (void) bpf_program_deserialize_attachment_set(v, fds, &u->ip_bpf_custom_ingress_installed);
-                         continue;
-                } else if (streq(l, "ip-bpf-custom-egress-installed")) {
-                         (void) bpf_program_deserialize_attachment_set(v, fds, &u->ip_bpf_custom_egress_installed);
-                         continue;
-
-                } else if (streq(l, "restrict-ifaces-bpf-fd")) {
-                        int fd;
-
-                        fd = deserialize_fd(fds, v);
-                        if (fd >= 0)
-                                (void) bpf_restrict_ifaces_add_initial_link_fd(u, fd);
-
-                        continue;
-
-                } else if (streq(l, "ref-uid")) {
+                else if (streq(l, "ref-uid")) {
                         uid_t uid;
 
                         r = parse_uid(v, &uid);
@@ -499,55 +329,6 @@ int unit_deserialize_state(Unit *u, FILE *f, FDSet *fds) {
                         continue;
                 }
 
-                m = memory_accounting_metric_field_last_from_string(l);
-                if (m >= 0) {
-                        uint64_t c;
-
-                        r = safe_atou64(v, &c);
-                        if (r < 0)
-                                log_unit_debug(u, "Failed to parse memory accounting last value %s, ignoring.", v);
-                        else
-                                u->memory_accounting_last[m] = c;
-                        continue;
-                }
-
-                /* Check if this is an IP accounting metric serialization field */
-                m = ip_accounting_metric_field_from_string(l);
-                if (m >= 0) {
-                        uint64_t c;
-
-                        r = safe_atou64(v, &c);
-                        if (r < 0)
-                                log_unit_debug(u, "Failed to parse IP accounting value %s, ignoring.", v);
-                        else
-                                u->ip_accounting_extra[m] = c;
-                        continue;
-                }
-
-                m = io_accounting_metric_field_base_from_string(l);
-                if (m >= 0) {
-                        uint64_t c;
-
-                        r = safe_atou64(v, &c);
-                        if (r < 0)
-                                log_unit_debug(u, "Failed to parse IO accounting base value %s, ignoring.", v);
-                        else
-                                u->io_accounting_base[m] = c;
-                        continue;
-                }
-
-                m = io_accounting_metric_field_last_from_string(l);
-                if (m >= 0) {
-                        uint64_t c;
-
-                        r = safe_atou64(v, &c);
-                        if (r < 0)
-                                log_unit_debug(u, "Failed to parse IO accounting last value %s, ignoring.", v);
-                        else
-                                u->io_accounting_last[m] = c;
-                        continue;
-                }
-
                 r = exec_shared_runtime_deserialize_compat(u, l, v, fds);
                 if (r < 0) {
                         log_unit_warning(u, "Failed to deserialize runtime parameter '%s', ignoring.", l);
@@ -556,6 +337,13 @@ int unit_deserialize_state(Unit *u, FILE *f, FDSet *fds) {
                         /* Returns positive if key was handled by the call */
                         continue;
 
+                r = cgroup_runtime_deserialize_one(u, l, v, fds);
+                if (r < 0) {
+                        log_unit_warning(u, "Failed to deserialize cgroup runtime parameter '%s, ignoring.", l);
+                        continue;
+                } else if (r > 0)
+                        continue; /* was handled */
+
                 if (UNIT_VTABLE(u)->deserialize_item) {
                         r = UNIT_VTABLE(u)->deserialize_item(u, l, v, fds);
                         if (r < 0)
@@ -574,7 +362,9 @@ int unit_deserialize_state(Unit *u, FILE *f, FDSet *fds) {
         /* Let's make sure that everything that is deserialized also gets any potential new cgroup settings
          * applied after we are done. For that we invalidate anything already realized, so that we can
          * realize it again. */
-        if (u->cgroup_realized) {
+        CGroupRuntime *crt;
+        crt = unit_get_cgroup_runtime(u);
+        if (crt && crt->cgroup_realized) {
                 unit_invalidate_cgroup(u, _CGROUP_MASK_ALL);
                 unit_invalidate_cgroup_bpf(u);
         }
@@ -707,23 +497,25 @@ void unit_dump(Unit *u, FILE *f, const char *prefix) {
         }
 
         if (UNIT_HAS_CGROUP_CONTEXT(u)) {
+                CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+
                 fprintf(f,
                         "%s\tSlice: %s\n"
                         "%s\tCGroup: %s\n"
                         "%s\tCGroup realized: %s\n",
                         prefix, strna(unit_slice_name(u)),
-                        prefix, strna(u->cgroup_path),
-                        prefix, yes_no(u->cgroup_realized));
+                        prefix, strna(crt ? crt->cgroup_path : NULL),
+                        prefix, yes_no(crt ? crt->cgroup_realized : false));
 
-                if (u->cgroup_realized_mask != 0) {
+                if (crt && crt->cgroup_realized_mask != 0) {
                         _cleanup_free_ char *s = NULL;
-                        (void) cg_mask_to_string(u->cgroup_realized_mask, &s);
+                        (void) cg_mask_to_string(crt->cgroup_realized_mask, &s);
                         fprintf(f, "%s\tCGroup realized mask: %s\n", prefix, strnull(s));
                 }
 
-                if (u->cgroup_enabled_mask != 0) {
+                if (crt && crt->cgroup_enabled_mask != 0) {
                         _cleanup_free_ char *s = NULL;
-                        (void) cg_mask_to_string(u->cgroup_enabled_mask, &s);
+                        (void) cg_mask_to_string(crt->cgroup_enabled_mask, &s);
                         fprintf(f, "%s\tCGroup enabled mask: %s\n", prefix, strnull(s));
                 }
 
index 6dc13bcc9ad3138cca53be73338a8b77819b9fc7..df99fff9d271d0c9cd9c09faa71e1f38cf626a9e 100644 (file)
@@ -109,29 +109,13 @@ Unit* unit_new(Manager *m, size_t size) {
         u->unit_file_preset = -1;
         u->on_failure_job_mode = JOB_REPLACE;
         u->on_success_job_mode = JOB_FAIL;
-        u->cgroup_control_inotify_wd = -1;
-        u->cgroup_memory_inotify_wd = -1;
         u->job_timeout = USEC_INFINITY;
         u->job_running_timeout = USEC_INFINITY;
         u->ref_uid = UID_INVALID;
         u->ref_gid = GID_INVALID;
-        u->cpu_usage_last = NSEC_INFINITY;
 
-        unit_reset_memory_accounting_last(u);
-
-        unit_reset_io_accounting_last(u);
-
-        u->cgroup_invalidated_mask |= CGROUP_MASK_BPF_FIREWALL;
         u->failure_action_exit_status = u->success_action_exit_status = -1;
 
-        u->ip_accounting_ingress_map_fd = -EBADF;
-        u->ip_accounting_egress_map_fd = -EBADF;
-
-        u->ipv4_allow_map_fd = -EBADF;
-        u->ipv6_allow_map_fd = -EBADF;
-        u->ipv4_deny_map_fd = -EBADF;
-        u->ipv6_deny_map_fd = -EBADF;
-
         u->last_section_private = -1;
 
         u->start_ratelimit = (const RateLimit) {
@@ -139,7 +123,13 @@ Unit* unit_new(Manager *m, size_t size) {
                 m->defaults.start_limit_burst,
         };
 
-        u->auto_start_stop_ratelimit = (const RateLimit) { .interval = 10 * USEC_PER_SEC, .burst = 16 };
+        u->auto_start_stop_ratelimit = (const RateLimit) {
+                .interval = 10 * USEC_PER_SEC,
+                .burst = 16
+        };
+
+        unit_reset_memory_accounting_last(u);
+        unit_reset_io_accounting_last(u);
 
         return u;
 }
@@ -490,16 +480,11 @@ bool unit_may_gc(Unit *u) {
         if (unit_success_failure_handler_has_jobs(u))
                 return false;
 
-        if (u->cgroup_path) {
-                /* If the unit has a cgroup, then check whether there's anything in it. If so, we should stay
-                 * around. Units with active processes should never be collected. */
-
-                r = cg_is_empty_recursive(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path);
-                if (r < 0)
-                        log_unit_debug_errno(u, r, "Failed to determine whether cgroup %s is empty: %m", empty_to_root(u->cgroup_path));
-                if (r <= 0)
-                        return false;
-        }
+        /* If the unit has a cgroup, then check whether there's anything in it. If so, we should stay
+         * around. Units with active processes should never be collected. */
+        r = unit_cgroup_is_empty(u);
+        if (r <= 0 && r != -ENXIO)
+                return false; /* ENXIO means: currently not realized */
 
         if (!UNIT_VTABLE(u)->may_gc)
                 return true;
@@ -804,12 +789,6 @@ Unit* unit_free(Unit *u) {
         if (u->on_console)
                 manager_unref_console(u->manager);
 
-        fdset_free(u->initial_socket_bind_link_fds);
-#if BPF_FRAMEWORK
-        bpf_link_free(u->ipv4_socket_bind_link);
-        bpf_link_free(u->ipv6_socket_bind_link);
-#endif
-
         unit_release_cgroup(u);
 
         if (!MANAGER_IS_RELOADING(u->manager))
@@ -866,16 +845,6 @@ Unit* unit_free(Unit *u) {
 
         bpf_firewall_close(u);
 
-        hashmap_free(u->bpf_foreign_by_key);
-
-        bpf_program_free(u->bpf_device_control_installed);
-
-#if BPF_FRAMEWORK
-        bpf_link_free(u->restrict_ifaces_ingress_bpf_link);
-        bpf_link_free(u->restrict_ifaces_egress_bpf_link);
-#endif
-        fdset_free(u->initial_restric_ifaces_link_fds);
-
         condition_free_list(u->conditions);
         condition_free_list(u->asserts);
 
@@ -2966,7 +2935,8 @@ int unit_enqueue_rewatch_pids(Unit *u) {
 
         assert(u);
 
-        if (!u->cgroup_path)
+        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+        if (!crt || !crt->cgroup_path)
                 return -ENOENT;
 
         r = cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER);
@@ -3452,8 +3422,11 @@ int unit_set_slice(Unit *u, Unit *slice) {
                 return 0;
 
         /* Disallow slice changes if @u is already bound to cgroups */
-        if (UNIT_GET_SLICE(u) && u->cgroup_realized)
-                return -EBUSY;
+        if (UNIT_GET_SLICE(u)) {
+                CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+                if (crt && crt->cgroup_realized)
+                        return -EBUSY;
+        }
 
         /* Remove any slices assigned prior; we should only have one UNIT_IN_SLICE dependency */
         if (UNIT_GET_SLICE(u))
@@ -4093,31 +4066,35 @@ int unit_kill(
         /* Note: if we shall enqueue rather than kill we won't do this via the cgroup mechanism, since it
          * doesn't really make much sense (and given that enqueued values are a relatively expensive
          * resource, and we shouldn't allow us to be subjects for such allocation sprees) */
-        if (IN_SET(who, KILL_ALL, KILL_ALL_FAIL) && u->cgroup_path && code == SI_USER) {
-                _cleanup_set_free_ Set *pid_set = NULL;
+        if (IN_SET(who, KILL_ALL, KILL_ALL_FAIL) && code == SI_USER) {
+                CGroupRuntime *crt = unit_get_cgroup_runtime(u);
 
-                /* Exclude the main/control pids from being killed via the cgroup */
-                r = unit_pid_set(u, &pid_set);
-                if (r < 0)
-                        return log_oom();
+                if (crt && crt->cgroup_path) {
+                        _cleanup_set_free_ Set *pid_set = NULL;
 
-                r = cg_kill_recursive(u->cgroup_path, signo, 0, pid_set, kill_common_log, u);
-                if (r < 0 && !IN_SET(r, -ESRCH, -ENOENT)) {
-                        if (ret >= 0)
-                                sd_bus_error_set_errnof(
-                                                ret_error, r,
-                                                "Failed to send signal SIG%s to auxiliary processes: %m",
+                        /* Exclude the main/control pids from being killed via the cgroup */
+                        r = unit_pid_set(u, &pid_set);
+                        if (r < 0)
+                                return log_oom();
+
+                        r = cg_kill_recursive(crt->cgroup_path, signo, 0, pid_set, kill_common_log, u);
+                        if (r < 0 && !IN_SET(r, -ESRCH, -ENOENT)) {
+                                if (ret >= 0)
+                                        sd_bus_error_set_errnof(
+                                                        ret_error, r,
+                                                        "Failed to send signal SIG%s to auxiliary processes: %m",
+                                                        signal_to_string(signo));
+
+                                log_unit_warning_errno(
+                                                u, r,
+                                                "Failed to send signal SIG%s to auxiliary processes on client request: %m",
                                                 signal_to_string(signo));
 
-                        log_unit_warning_errno(
-                                        u, r,
-                                        "Failed to send signal SIG%s to auxiliary processes on client request: %m",
-                                        signal_to_string(signo));
+                                RET_GATHER(ret, r);
+                        }
 
-                        RET_GATHER(ret, r);
+                        killed = killed || r >= 0;
                 }
-
-                killed = killed || r >= 0;
         }
 
         /* If the "fail" versions of the operation are requested, then complain if the set of processes we killed is empty */
@@ -4377,7 +4354,7 @@ ExecContext *unit_get_exec_context(const Unit *u) {
         return (ExecContext*) ((uint8_t*) u + offset);
 }
 
-KillContext *unit_get_kill_context(Unit *u) {
+KillContext *unit_get_kill_context(const Unit *u) {
         size_t offset;
         assert(u);
 
@@ -4391,7 +4368,7 @@ KillContext *unit_get_kill_context(Unit *u) {
         return (KillContext*) ((uint8_t*) u + offset);
 }
 
-CGroupContext *unit_get_cgroup_context(Unit *u) {
+CGroupContext *unit_get_cgroup_context(const Unit *u) {
         size_t offset;
 
         if (u->type < 0)
@@ -4404,7 +4381,7 @@ CGroupContext *unit_get_cgroup_context(Unit *u) {
         return (CGroupContext*) ((uint8_t*) u + offset);
 }
 
-ExecRuntime *unit_get_exec_runtime(Unit *u) {
+ExecRuntime *unit_get_exec_runtime(const Unit *u) {
         size_t offset;
 
         if (u->type < 0)
@@ -4417,6 +4394,19 @@ ExecRuntime *unit_get_exec_runtime(Unit *u) {
         return *(ExecRuntime**) ((uint8_t*) u + offset);
 }
 
+CGroupRuntime *unit_get_cgroup_runtime(const Unit *u) {
+        size_t offset;
+
+        if (u->type < 0)
+                return NULL;
+
+        offset = UNIT_VTABLE(u)->cgroup_runtime_offset;
+        if (offset <= 0)
+                return NULL;
+
+        return *(CGroupRuntime**) ((uint8_t*) u + offset);
+}
+
 static const char* unit_drop_in_dir(Unit *u, UnitWriteFlags flags) {
         assert(u);
 
@@ -4810,7 +4800,8 @@ int unit_kill_context(Unit *u, KillOperation k) {
         r = unit_kill_context_one(u, unit_control_pid(u), "control", /* is_alien = */ false, sig, send_sighup, log_func);
         wait_for_exit = wait_for_exit || r > 0;
 
-        if (u->cgroup_path &&
+        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+        if (crt && crt->cgroup_path &&
             (c->kill_mode == KILL_CONTROL_GROUP || (c->kill_mode == KILL_MIXED && k == KILL_KILL))) {
                 _cleanup_set_free_ Set *pid_set = NULL;
 
@@ -4820,14 +4811,14 @@ int unit_kill_context(Unit *u, KillOperation k) {
                         return r;
 
                 r = cg_kill_recursive(
-                                u->cgroup_path,
+                                crt->cgroup_path,
                                 sig,
                                 CGROUP_SIGCONT|CGROUP_IGNORE_SELF,
                                 pid_set,
                                 log_func, u);
                 if (r < 0) {
                         if (!IN_SET(r, -EAGAIN, -ESRCH, -ENOENT))
-                                log_unit_warning_errno(u, r, "Failed to kill control group %s, ignoring: %m", empty_to_root(u->cgroup_path));
+                                log_unit_warning_errno(u, r, "Failed to kill control group %s, ignoring: %m", empty_to_root(crt->cgroup_path));
 
                 } else if (r > 0) {
 
@@ -4848,7 +4839,7 @@ int unit_kill_context(Unit *u, KillOperation k) {
                                         return r;
 
                                 (void) cg_kill_recursive(
-                                                u->cgroup_path,
+                                                crt->cgroup_path,
                                                 SIGHUP,
                                                 CGROUP_IGNORE_SELF,
                                                 pid_set,
@@ -4996,6 +4987,21 @@ int unit_setup_exec_runtime(Unit *u) {
         return r;
 }
 
+CGroupRuntime *unit_setup_cgroup_runtime(Unit *u) {
+        size_t offset;
+
+        assert(u);
+
+        offset = UNIT_VTABLE(u)->cgroup_runtime_offset;
+        assert(offset > 0);
+
+        CGroupRuntime **rt = (CGroupRuntime**) ((uint8_t*) u + offset);
+        if (*rt)
+                return *rt;
+
+        return (*rt = cgroup_runtime_new());
+}
+
 bool unit_type_supported(UnitType t) {
         static int8_t cache[_UNIT_TYPE_MAX] = {}; /* -1: disabled, 1: enabled: 0: don't know */
         int r;
@@ -5339,7 +5345,8 @@ int unit_set_exec_params(Unit *u, ExecParameters *p) {
         SET_FLAG(p->flags, EXEC_PASS_LOG_UNIT|EXEC_CHOWN_DIRECTORIES, MANAGER_IS_SYSTEM(u->manager));
 
         /* Copy parameters from unit */
-        p->cgroup_path = u->cgroup_path;
+        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+        p->cgroup_path = crt ? crt->cgroup_path : NULL;
         SET_FLAG(p->flags, EXEC_CGROUP_DELEGATE, unit_cgroup_delegate(u));
 
         p->received_credentials_directory = u->manager->received_credentials_directory;
@@ -5359,7 +5366,7 @@ int unit_set_exec_params(Unit *u, ExecParameters *p) {
 
         p->user_lookup_fd = u->manager->user_lookup_fds[1];
 
-        p->cgroup_id = u->cgroup_id;
+        p->cgroup_id = crt ? crt->cgroup_id : 0;
         p->invocation_id = u->invocation_id;
         sd_id128_to_string(p->invocation_id, p->invocation_id_string);
         p->unit_id = strdup(u->id);
@@ -5381,6 +5388,10 @@ int unit_fork_helper_process(Unit *u, const char *name, PidRef *ret) {
 
         (void) unit_realize_cgroup(u);
 
+        CGroupRuntime *crt = unit_setup_cgroup_runtime(u);
+        if (!crt)
+                return -ENOMEM;
+
         r = safe_fork(name, FORK_REOPEN_LOG|FORK_DEATHSIG_SIGTERM, &pid);
         if (r < 0)
                 return r;
@@ -5403,10 +5414,10 @@ int unit_fork_helper_process(Unit *u, const char *name, PidRef *ret) {
         (void) default_signals(SIGNALS_CRASH_HANDLER, SIGNALS_IGNORE);
         (void) ignore_signals(SIGPIPE);
 
-        if (u->cgroup_path) {
-                r = cg_attach_everywhere(u->manager->cgroup_supported, u->cgroup_path, 0, NULL, NULL);
+        if (crt->cgroup_path) {
+                r = cg_attach_everywhere(u->manager->cgroup_supported, crt->cgroup_path, 0, NULL, NULL);
                 if (r < 0) {
-                        log_unit_error_errno(u, r, "Failed to join unit cgroup %s: %m", empty_to_root(u->cgroup_path));
+                        log_unit_error_errno(u, r, "Failed to join unit cgroup %s: %m", empty_to_root(crt->cgroup_path));
                         _exit(EXIT_CGROUP);
                 }
         }
@@ -5801,9 +5812,10 @@ int unit_prepare_exec(Unit *u) {
 
         (void) unit_realize_cgroup(u);
 
-        if (u->reset_accounting) {
+        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+        if (crt->reset_accounting) {
                 (void) unit_reset_accounting(u);
-                u->reset_accounting = false;
+                crt->reset_accounting = false;
         }
 
         unit_export_state_files(u);
@@ -5863,11 +5875,13 @@ int unit_warn_leftover_processes(Unit *u, cg_kill_log_func_t log_func) {
 
         (void) unit_pick_cgroup_path(u);
 
-        if (!u->cgroup_path)
+        CGroupRuntime *crt = unit_get_cgroup_runtime(u);
+
+        if (!crt || !crt->cgroup_path)
                 return 0;
 
         return cg_kill_recursive(
-                        u->cgroup_path,
+                        crt->cgroup_path,
                         /* sig= */ 0,
                         /* flags= */ 0,
                         /* set= */ NULL,
index 15103efb64c1a2f0b74e86d8b208dbc5c818ab3c..b1cc2c2d01311f12f91964751ac658e86788263e 100644 (file)
@@ -208,6 +208,7 @@ struct UnitRef {
         LIST_FIELDS(UnitRef, refs_by_target);
 };
 
+/* The generic, dynamic definition of the unit */
 typedef struct Unit {
         Manager *manager;
 
@@ -370,74 +371,6 @@ typedef struct Unit {
         UnitFileState unit_file_state;
         PresetAction unit_file_preset;
 
-        /* Where the cpu.stat or cpuacct.usage was at the time the unit was started */
-        nsec_t cpu_usage_base;
-        nsec_t cpu_usage_last; /* the most recently read value */
-
-        /* Most recently read value of memory accounting metrics */
-        uint64_t memory_accounting_last[_CGROUP_MEMORY_ACCOUNTING_METRIC_CACHED_LAST + 1];
-
-        /* The current counter of OOM kills initiated by systemd-oomd */
-        uint64_t managed_oom_kill_last;
-
-        /* The current counter of the oom_kill field in the memory.events cgroup attribute */
-        uint64_t oom_kill_last;
-
-        /* Where the io.stat data was at the time the unit was started */
-        uint64_t io_accounting_base[_CGROUP_IO_ACCOUNTING_METRIC_MAX];
-        uint64_t io_accounting_last[_CGROUP_IO_ACCOUNTING_METRIC_MAX]; /* the most recently read value */
-
-        /* Counterparts in the cgroup filesystem */
-        char *cgroup_path;
-        uint64_t cgroup_id;
-        CGroupMask cgroup_realized_mask;           /* In which hierarchies does this unit's cgroup exist? (only relevant on cgroup v1) */
-        CGroupMask cgroup_enabled_mask;            /* Which controllers are enabled (or more correctly: enabled for the children) for this unit's cgroup? (only relevant on cgroup v2) */
-        CGroupMask cgroup_invalidated_mask;        /* A mask specifying controllers which shall be considered invalidated, and require re-realization */
-        CGroupMask cgroup_members_mask;            /* A cache for the controllers required by all children of this cgroup (only relevant for slice units) */
-
-        /* Inotify watch descriptors for watching cgroup.events and memory.events on cgroupv2 */
-        int cgroup_control_inotify_wd;
-        int cgroup_memory_inotify_wd;
-
-        /* Device Controller BPF program */
-        BPFProgram *bpf_device_control_installed;
-
-        /* IP BPF Firewalling/accounting */
-        int ip_accounting_ingress_map_fd;
-        int ip_accounting_egress_map_fd;
-        uint64_t ip_accounting_extra[_CGROUP_IP_ACCOUNTING_METRIC_MAX];
-
-        int ipv4_allow_map_fd;
-        int ipv6_allow_map_fd;
-        int ipv4_deny_map_fd;
-        int ipv6_deny_map_fd;
-        BPFProgram *ip_bpf_ingress, *ip_bpf_ingress_installed;
-        BPFProgram *ip_bpf_egress, *ip_bpf_egress_installed;
-
-        Set *ip_bpf_custom_ingress;
-        Set *ip_bpf_custom_ingress_installed;
-        Set *ip_bpf_custom_egress;
-        Set *ip_bpf_custom_egress_installed;
-
-        /* BPF programs managed (e.g. loaded to kernel) by an entity external to systemd,
-         * attached to unit cgroup by provided program fd and attach type. */
-        Hashmap *bpf_foreign_by_key;
-
-        FDSet *initial_socket_bind_link_fds;
-#if BPF_FRAMEWORK
-        /* BPF links to BPF programs attached to cgroup/bind{4|6} hooks and
-         * responsible for allowing or denying a unit to bind(2) to a socket
-         * address. */
-        struct bpf_link *ipv4_socket_bind_link;
-        struct bpf_link *ipv6_socket_bind_link;
-#endif
-
-        FDSet *initial_restric_ifaces_link_fds;
-#if BPF_FRAMEWORK
-        struct bpf_link *restrict_ifaces_ingress_bpf_link;
-        struct bpf_link *restrict_ifaces_egress_bpf_link;
-#endif
-
         /* Low-priority event source which is used to remove watched PIDs that have gone away, and subscribe to any new
          * ones which might have appeared. */
         sd_event_source *rewatch_pids_event_source;
@@ -508,12 +441,6 @@ typedef struct Unit {
         bool in_audit:1;
         bool on_console:1;
 
-        bool cgroup_realized:1;
-        bool cgroup_members_mask_valid:1;
-
-        /* Reset cgroup accounting next time we fork something off */
-        bool reset_accounting:1;
-
         bool start_limit_hit:1;
 
         /* Did we already invoke unit_coldplug() for this unit? */
@@ -529,9 +456,6 @@ typedef struct Unit {
         bool exported_log_ratelimit_interval:1;
         bool exported_log_ratelimit_burst:1;
 
-        /* Whether we warned about clamping the CPU quota period */
-        bool warned_clamping_cpu_quota_period:1;
-
         /* When writing transient unit files, stores which section we stored last. If < 0, we didn't write any yet. If
          * == 0 we are in the [Unit] section, if > 0 we are in the unit type-specific section. */
         signed int last_section_private:2;
@@ -577,6 +501,7 @@ static inline bool UNIT_WRITE_FLAGS_NOOP(UnitWriteFlags flags) {
 
 #include "kill.h"
 
+/* The static const, immutable data about a specific unit type */
 typedef struct UnitVTable {
         /* How much memory does an object of this unit type need */
         size_t object_size;
@@ -593,11 +518,14 @@ typedef struct UnitVTable {
          * KillContext is found, if the unit type has that */
         size_t kill_context_offset;
 
-        /* If greater than 0, the offset into the object where the
-         * pointer to ExecSharedRuntime is found, if the unit type has
-         * that */
+        /* If greater than 0, the offset into the object where the pointer to ExecRuntime is found, if
+         * the unit type has that */
         size_t exec_runtime_offset;
 
+        /* If greater than 0, the offset into the object where the pointer to CGroupRuntime is found, if the
+         * unit type has that */
+        size_t cgroup_runtime_offset;
+
         /* The name of the configuration file section with the private settings of this unit */
         const char *private_section;
 
@@ -993,12 +921,14 @@ void unit_ref_unset(UnitRef *ref);
 int unit_patch_contexts(Unit *u);
 
 ExecContext *unit_get_exec_context(const Unit *u) _pure_;
-KillContext *unit_get_kill_context(Unit *u) _pure_;
-CGroupContext *unit_get_cgroup_context(Unit *u) _pure_;
+KillContext *unit_get_kill_context(const Unit *u) _pure_;
+CGroupContext *unit_get_cgroup_context(const Unit *u) _pure_;
 
-ExecRuntime *unit_get_exec_runtime(Unit *u) _pure_;
+ExecRuntime *unit_get_exec_runtime(const Unit *u) _pure_;
+CGroupRuntime *unit_get_cgroup_runtime(const Unit *u) _pure_;
 
 int unit_setup_exec_runtime(Unit *u);
+CGroupRuntime *unit_setup_cgroup_runtime(Unit *u);
 
 const char* unit_escape_setting(const char *s, UnitWriteFlags flags, char **buf);
 char* unit_concat_strv(char **l, UnitWriteFlags flags);
index c4175bcb0eed39576d1f1825d3747c35efaf9bbf..30193effa6d821763770a7e637872eb673537fcb 100644 (file)
@@ -155,10 +155,11 @@ int main(int argc, char *argv[]) {
                 return log_tests_skipped("Kernel doesn't support the necessary bpf bits (masked out via seccomp?)");
         assert_se(r >= 0);
 
-        assert_se(u->ip_bpf_ingress);
-        assert_se(u->ip_bpf_egress);
+        CGroupRuntime *crt = ASSERT_PTR(unit_get_cgroup_runtime(u));
+        assert_se(crt->ip_bpf_ingress);
+        assert_se(crt->ip_bpf_egress);
 
-        r = bpf_program_load_kernel(u->ip_bpf_ingress, log_buf, ELEMENTSOF(log_buf));
+        r = bpf_program_load_kernel(crt->ip_bpf_ingress, log_buf, ELEMENTSOF(log_buf));
 
         log_notice("log:");
         log_notice("-------");
@@ -167,7 +168,7 @@ int main(int argc, char *argv[]) {
 
         assert_se(r >= 0);
 
-        r = bpf_program_load_kernel(u->ip_bpf_egress, log_buf, ELEMENTSOF(log_buf));
+        r = bpf_program_load_kernel(crt->ip_bpf_egress, log_buf, ELEMENTSOF(log_buf));
 
         log_notice("log:");
         log_notice("-------");