]> git.ipfire.org Git - thirdparty/systemd.git/commitdiff
oom: add unit file settings for oomd avoid/omit xattrs
authorAnita Zhang <the.anitazha@gmail.com>
Thu, 28 Jan 2021 10:31:44 +0000 (02:31 -0800)
committerAnita Zhang <the.anitazha@gmail.com>
Fri, 12 Feb 2021 20:45:36 +0000 (12:45 -0800)
docs/TRANSIENT-SETTINGS.md
src/core/cgroup.c
src/core/cgroup.h
src/core/dbus-cgroup.c
src/core/execute.c
src/core/load-fragment-gperf.gperf.m4
src/core/load-fragment.c
src/core/load-fragment.h
src/shared/bus-unit-util.c
test/fuzz/fuzz-unit-file/directives.service
test/units/testsuite-56.sh

index 50370602543d46a05a8529d629d2ebd557d83e95..9f69a3162a0d8e7cb1c08a51eba8f26dee2479af 100644 (file)
@@ -273,6 +273,7 @@ All cgroup/resource control settings are available for transient units
 ✓ ManagedOOMSwap=
 ✓ ManagedOOMMemoryPressure=
 ✓ ManagedOOMMemoryPressureLimit=
+✓ ManagedOOMPreference=
 ```
 
 ## Process Killing Settings
index 70282a7abdac4df50b3d2e8e75096899d7a21122..b0fc67a1255736623b37d633fe116ab0cfecc4f8 100644 (file)
@@ -131,6 +131,7 @@ void cgroup_context_init(CGroupContext *c) {
 
                 .moom_swap = MANAGED_OOM_AUTO,
                 .moom_mem_pressure = MANAGED_OOM_AUTO,
+                .moom_preference = MANAGED_OOM_PREFERENCE_NONE,
         };
 }
 
@@ -417,7 +418,8 @@ void cgroup_context_dump(Unit *u, FILE* f, const char *prefix) {
                 "%sDelegate: %s\n"
                 "%sManagedOOMSwap: %s\n"
                 "%sManagedOOMMemoryPressure: %s\n"
-                "%sManagedOOMMemoryPressureLimit: %" PRIu32 ".%02" PRIu32 "%%\n",
+                "%sManagedOOMMemoryPressureLimit: %" PRIu32 ".%02" PRIu32 "%%\n"
+                "%sManagedOOMPreference: %s%%\n",
                 prefix, yes_no(c->cpu_accounting),
                 prefix, yes_no(c->io_accounting),
                 prefix, yes_no(c->blockio_accounting),
@@ -450,7 +452,8 @@ void cgroup_context_dump(Unit *u, FILE* f, const char *prefix) {
                 prefix, yes_no(c->delegate),
                 prefix, managed_oom_mode_to_string(c->moom_swap),
                 prefix, managed_oom_mode_to_string(c->moom_mem_pressure),
-                prefix, c->moom_mem_pressure_limit_permyriad / 100, c->moom_mem_pressure_limit_permyriad % 100);
+                prefix, c->moom_mem_pressure_limit_permyriad / 100, c->moom_mem_pressure_limit_permyriad % 100,
+                prefix, managed_oom_preference_to_string(c->moom_preference));
 
         if (c->delegate) {
                 _cleanup_free_ char *t = NULL;
@@ -600,6 +603,41 @@ int cgroup_add_device_allow(CGroupContext *c, const char *dev, const char *mode)
 UNIT_DEFINE_ANCESTOR_MEMORY_LOOKUP(memory_low);
 UNIT_DEFINE_ANCESTOR_MEMORY_LOOKUP(memory_min);
 
+void cgroup_oomd_xattr_apply(Unit *u, const char *cgroup_path) {
+        CGroupContext *c;
+        int r;
+
+        assert(u);
+
+        c = unit_get_cgroup_context(u);
+        if (!c)
+                return;
+
+        if (c->moom_preference == MANAGED_OOM_PREFERENCE_OMIT) {
+                r = cg_set_xattr(SYSTEMD_CGROUP_CONTROLLER, cgroup_path, "user.oomd_omit", "1", 1, 0);
+                if (r < 0)
+                        log_unit_debug_errno(u, r, "Failed to set oomd_omit flag on control group %s, ignoring: %m", cgroup_path);
+        }
+
+        if (c->moom_preference == MANAGED_OOM_PREFERENCE_AVOID) {
+                r = cg_set_xattr(SYSTEMD_CGROUP_CONTROLLER, cgroup_path, "user.oomd_avoid", "1", 1, 0);
+                if (r < 0)
+                        log_unit_debug_errno(u, r, "Failed to set oomd_avoid flag on control group %s, ignoring: %m", cgroup_path);
+        }
+
+        if (c->moom_preference != MANAGED_OOM_PREFERENCE_AVOID) {
+                r = cg_remove_xattr(SYSTEMD_CGROUP_CONTROLLER, cgroup_path, "user.oomd_avoid");
+                if (r != -ENODATA)
+                        log_unit_debug_errno(u, r, "Failed to remove oomd_avoid flag on control group %s, ignoring: %m", cgroup_path);
+        }
+
+        if (c->moom_preference != MANAGED_OOM_PREFERENCE_OMIT) {
+                r = cg_remove_xattr(SYSTEMD_CGROUP_CONTROLLER, cgroup_path, "user.oomd_omit");
+                if (r != -ENODATA)
+                        log_unit_debug_errno(u, r, "Failed to remove oomd_omit flag on control group %s, ignoring: %m", cgroup_path);
+        }
+}
+
 static void cgroup_xattr_apply(Unit *u) {
         char ids[SD_ID128_STRING_MAX];
         int r;
@@ -630,6 +668,8 @@ static void cgroup_xattr_apply(Unit *u) {
                 if (r != -ENODATA)
                         log_unit_debug_errno(u, r, "Failed to remove delegate flag on control group %s, ignoring: %m", u->cgroup_path);
         }
+
+        cgroup_oomd_xattr_apply(u, u->cgroup_path);
 }
 
 static int lookup_block_device(const char *p, dev_t *ret) {
@@ -3737,12 +3777,6 @@ int unit_cgroup_freezer_action(Unit *u, FreezerAction action) {
         return 1;
 }
 
-static const char* const cgroup_device_policy_table[_CGROUP_DEVICE_POLICY_MAX] = {
-        [CGROUP_DEVICE_POLICY_AUTO]   = "auto",
-        [CGROUP_DEVICE_POLICY_CLOSED] = "closed",
-        [CGROUP_DEVICE_POLICY_STRICT] = "strict",
-};
-
 int unit_get_cpuset(Unit *u, CPUSet *cpus, const char *name) {
         _cleanup_free_ char *v = NULL;
         int r;
@@ -3771,6 +3805,12 @@ int unit_get_cpuset(Unit *u, CPUSet *cpus, const char *name) {
         return parse_cpu_set_full(v, cpus, false, NULL, NULL, 0, NULL);
 }
 
+static const char* const cgroup_device_policy_table[_CGROUP_DEVICE_POLICY_MAX] = {
+        [CGROUP_DEVICE_POLICY_AUTO]   = "auto",
+        [CGROUP_DEVICE_POLICY_CLOSED] = "closed",
+        [CGROUP_DEVICE_POLICY_STRICT] = "strict",
+};
+
 DEFINE_STRING_TABLE_LOOKUP(cgroup_device_policy, CGroupDevicePolicy);
 
 static const char* const freezer_action_table[_FREEZER_ACTION_MAX] = {
index 9fbfabbb7e3da28a685b72d5b1c9a01639a576a2..92f71a4282a96aad8130801fce0e0eb150e87489 100644 (file)
@@ -164,6 +164,7 @@ struct CGroupContext {
         ManagedOOMMode moom_swap;
         ManagedOOMMode moom_mem_pressure;
         uint32_t moom_mem_pressure_limit_permyriad;
+        ManagedOOMPreference moom_preference;
 };
 
 /* Used when querying IP accounting data */
@@ -204,6 +205,8 @@ void cgroup_context_free_blockio_device_bandwidth(CGroupContext *c, CGroupBlockI
 
 int cgroup_add_device_allow(CGroupContext *c, const char *dev, const char *mode);
 
+void cgroup_oomd_xattr_apply(Unit *u, const char *cgroup_path);
+
 CGroupMask unit_get_own_mask(Unit *u);
 CGroupMask unit_get_delegate_mask(Unit *u);
 CGroupMask unit_get_members_mask(Unit *u);
index 6f309feb236c42750bbca74ac3116908c39bfe7f..0b2d945283e36a668568183b1fb33477217fcd9f 100644 (file)
@@ -21,6 +21,7 @@ BUS_DEFINE_PROPERTY_GET(bus_property_get_tasks_max, "t", TasksMax, tasks_max_res
 
 static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_cgroup_device_policy, cgroup_device_policy, CGroupDevicePolicy);
 static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_managed_oom_mode, managed_oom_mode, ManagedOOMMode);
+static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_managed_oom_preference, managed_oom_preference, ManagedOOMPreference);
 
 static int property_get_cgroup_mask(
                 sd_bus *bus,
@@ -395,6 +396,7 @@ const sd_bus_vtable bus_cgroup_vtable[] = {
         SD_BUS_PROPERTY("ManagedOOMSwap", "s", property_get_managed_oom_mode, offsetof(CGroupContext, moom_swap), 0),
         SD_BUS_PROPERTY("ManagedOOMMemoryPressure", "s", property_get_managed_oom_mode, offsetof(CGroupContext, moom_mem_pressure), 0),
         SD_BUS_PROPERTY("ManagedOOMMemoryPressureLimitPermyriad", "u", NULL, offsetof(CGroupContext, moom_mem_pressure_limit_permyriad), 0),
+        SD_BUS_PROPERTY("ManagedOOMPreference", "s", property_get_managed_oom_preference, offsetof(CGroupContext, moom_preference), 0),
         SD_BUS_VTABLE_END
 };
 
@@ -1720,6 +1722,26 @@ int bus_cgroup_set_property(
                 return 1;
         }
 
+        if (streq(name, "ManagedOOMPreference")) {
+                ManagedOOMPreference p;
+                const char *pref;
+
+                r = sd_bus_message_read(message, "s", &pref);
+                if (r < 0)
+                        return r;
+
+                p = managed_oom_preference_from_string(pref);
+                if (p < 0)
+                        return -EINVAL;
+
+                if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+                        c->moom_preference = p;
+                        unit_write_settingf(u, flags, name, "ManagedOOMPreference=%s", pref);
+                }
+
+                return 1;
+        }
+
         if (streq(name, "DisableControllers") || (u->transient && u->load_state == UNIT_STUB))
                 return bus_cgroup_set_transient_property(u, c, name, message, flags, error);
 
index 29fe9f05b12ae59ac8aec200f43d28799ba6d4ea..2c35a917f88d0c79fc198c19105588dc60f29e07 100644 (file)
@@ -4706,6 +4706,10 @@ int exec_spawn(Unit *unit,
                         r = cg_create(SYSTEMD_CGROUP_CONTROLLER, subcgroup_path);
                         if (r < 0)
                                 return log_unit_error_errno(unit, r, "Failed to create control group '%s': %m", subcgroup_path);
+
+                        /* Normally we would not propagate the oomd xattrs to children but since we created this
+                         * sub-cgroup internally we should do it. */
+                        cgroup_oomd_xattr_apply(unit, subcgroup_path);
                 }
         }
 
index be07c7604cc7d2d2e53cbc04f4d687615679ff1b..b0460e27645ced213e33dc65b2a8c7814d346b46 100644 (file)
@@ -230,6 +230,7 @@ $1.IPEgressFilterPath,                   config_parse_ip_filter_bpf_progs,
 $1.ManagedOOMSwap,                       config_parse_managed_oom_mode,               0,                                  offsetof($1, cgroup_context.moom_swap)
 $1.ManagedOOMMemoryPressure,             config_parse_managed_oom_mode,               0,                                  offsetof($1, cgroup_context.moom_mem_pressure)
 $1.ManagedOOMMemoryPressureLimit,        config_parse_managed_oom_mem_pressure_limit, 0,                                  offsetof($1, cgroup_context.moom_mem_pressure_limit_permyriad)
+$1.ManagedOOMPreference,                 config_parse_managed_oom_preference,         0,                                  offsetof($1, cgroup_context.moom_preference)
 $1.NetClass,                             config_parse_warn_compat,                    DISABLED_LEGACY,                    0'
 )m4_dnl
 Unit.Description,                        config_parse_unit_string_printf,             0,                                  offsetof(Unit, description)
index 06b71aaf1575a10b5132eeefc4fd9660f031b76f..c6b017556f9247a9425e699460debbad0f40198e 100644 (file)
@@ -133,6 +133,7 @@ DEFINE_CONFIG_PARSE_ENUM(config_parse_service_restart, service_restart, ServiceR
 DEFINE_CONFIG_PARSE_ENUM(config_parse_service_timeout_failure_mode, service_timeout_failure_mode, ServiceTimeoutFailureMode, "Failed to parse timeout failure mode");
 DEFINE_CONFIG_PARSE_ENUM(config_parse_socket_bind, socket_address_bind_ipv6_only_or_bool, SocketAddressBindIPv6Only, "Failed to parse bind IPv6 only value");
 DEFINE_CONFIG_PARSE_ENUM(config_parse_oom_policy, oom_policy, OOMPolicy, "Failed to parse OOM policy");
+DEFINE_CONFIG_PARSE_ENUM(config_parse_managed_oom_preference, managed_oom_preference, ManagedOOMPreference, "Failed to parse ManagedOOMPreference=");
 DEFINE_CONFIG_PARSE_ENUM_WITH_DEFAULT(config_parse_ip_tos, ip_tos, int, -1, "Failed to parse IP TOS value");
 DEFINE_CONFIG_PARSE_PTR(config_parse_blockio_weight, cg_blkio_weight_parse, uint64_t, "Invalid block IO weight");
 DEFINE_CONFIG_PARSE_PTR(config_parse_cg_weight, cg_weight_parse, uint64_t, "Invalid weight");
index 6b2175cd2afe45d010430b2bed643cbef368e215..e4a5cb7986931f37689af612ea780d16e2474e0c 100644 (file)
@@ -78,6 +78,7 @@ CONFIG_PARSER_PROTOTYPE(config_parse_tasks_max);
 CONFIG_PARSER_PROTOTYPE(config_parse_delegate);
 CONFIG_PARSER_PROTOTYPE(config_parse_managed_oom_mode);
 CONFIG_PARSER_PROTOTYPE(config_parse_managed_oom_mem_pressure_limit);
+CONFIG_PARSER_PROTOTYPE(config_parse_managed_oom_preference);
 CONFIG_PARSER_PROTOTYPE(config_parse_device_policy);
 CONFIG_PARSER_PROTOTYPE(config_parse_device_allow);
 CONFIG_PARSER_PROTOTYPE(config_parse_io_device_latency);
index 84f57d94d2358e14458f8d512ee607fe4a356328..5bbaa07dd1c75632007bfb2a2cdd1110652e73da 100644 (file)
@@ -435,7 +435,8 @@ static int bus_append_cgroup_property(sd_bus_message *m, const char *field, cons
         if (STR_IN_SET(field, "DevicePolicy",
                               "Slice",
                               "ManagedOOMSwap",
-                              "ManagedOOMMemoryPressure"))
+                              "ManagedOOMMemoryPressure",
+                              "ManagedOOMPreference"))
                 return bus_append_string(m, field, eq);
 
         if (STR_IN_SET(field, "ManagedOOMMemoryPressureLimit")) {
index 15fa556dd64b6f1e9464d6af815ae0abd3625793..0c7ded6786a2dc457e3d6c031242c5006bd44f9a 100644 (file)
@@ -138,6 +138,10 @@ MakeDirectory=
 Mark=
 MaxConnections=
 MaxConnectionsPerSource=
+ManagedOOMSwap=
+ManagedOOMMemoryPressure=
+ManagedOOMMemoryPressureLimitPercent=
+ManagedOOMPreference=
 MemoryAccounting=
 MemoryHigh=
 MemoryLimit=
index 88c185b88699db65b1efb5401acd8fe2f06ebc0d..1884f814689ab9358b3e4ac0c6fa9dea22c6bc9c 100755 (executable)
@@ -13,6 +13,8 @@ if [[ "$cgroup_type" != *"cgroup2"* ]] && [[ "$cgroup_type" != *"0x63677270"* ]]
 fi
 [[ -e /skipped ]] && exit 0 || true
 
+rm -rf /etc/systemd/system/testsuite-56-testbloat.service.d
+
 echo "DefaultMemoryPressureDurationSec=5s" >> /etc/systemd/oomd.conf
 
 systemctl start testsuite-56-testchill.service
@@ -41,10 +43,14 @@ if ! systemctl status testsuite-56-testchill.service; then exit 24; fi
 if setfattr -n user.xattr_test -v 1 /sys/fs/cgroup/; then
     sleep 120 # wait for systemd-oomd kill cool down and elevated memory pressure to come down
 
+    mkdir -p /etc/systemd/system/testsuite-56-testbloat.service.d/
+    echo "[Service]" > /etc/systemd/system/testsuite-56-testbloat.service.d/override.conf
+    echo "ManagedOOMPreference=avoid" >> /etc/systemd/system/testsuite-56-testbloat.service.d/override.conf
+
+    systemctl daemon-reload
     systemctl start testsuite-56-testchill.service
     systemctl start testsuite-56-testmunch.service
     systemctl start testsuite-56-testbloat.service
-    setfattr -n user.oomd_avoid -v 1 /sys/fs/cgroup/testsuite.slice/testsuite-56.slice/testsuite-56-workload.slice/testsuite-56-testbloat.service
 
     timeout=$(date -ud "2 minutes" +%s)
     while [[ $(date -u +%s) -le $timeout ]]; do