]> git.ipfire.org Git - thirdparty/systemd.git/commitdiff
test: try to detect SIGILL in stress-ng and skip TEST-55-OOMD gracefully 42290/head
authorLuca Boccassi <luca.boccassi@gmail.com>
Mon, 25 May 2026 11:11:23 +0000 (12:11 +0100)
committerLuca Boccassi <luca.boccassi@gmail.com>
Mon, 25 May 2026 11:12:19 +0000 (12:12 +0100)
The attempt to fix SIGILL in TEST-55-OOMD actually made the test more
flaky, as the chosen stress-ng method generates less pressure, so it
often fails.

Try a different approach: try to detect that SIGILL caused the unit
calling stress-ng to fail, and skip gracefully in that case.

test/units/TEST-55-OOMD.sh

index 6689bbdd733c9abd9732aafa80acf4f5eda3a91c..9993e9893776fcce84a94829f0ae7b29a381a64f 100755 (executable)
@@ -94,6 +94,19 @@ else
     systemd-run -t -p MemoryMax=10M -p MemorySwapMax=0 -p MemoryZSwapMax=0 true
 fi
 
+# stress-ng can fail with SIGILL due to trying to use AVX-512 on older CPUs, try to detect and avoid failing
+stress_ng_sigilled() {
+    local result status sigill
+    local unit="${1:?}"
+    shift
+
+    result=$(systemctl "$@" show "$unit" -P Result)
+    status=$(systemctl "$@" show "$unit" -P ExecMainStatus)
+    sigill=$(kill -l ILL)
+
+    [[ "$status" == "$sigill" && ( "$result" == "signal" || "$result" == "core-dump" ) ]]
+}
+
 test_basic() {
     local cgroup_path="${1:?}"
     shift
@@ -123,7 +136,11 @@ test_basic() {
     if systemctl "$@" status TEST-55-OOMD-testbloat.service; then exit 42; fi
     if ! systemctl "$@" status TEST-55-OOMD-testchill.service; then exit 24; fi
 
-    assert_eq "$(systemctl "$@" show TEST-55-OOMD-testbloat.service -P ManagedOOMKills)" "1"
+    if stress_ng_sigilled TEST-55-OOMD-testbloat.service "$@"; then
+        echo "stress-ng died with SIGILL, skipping ManagedOOMKills assertion"
+    else
+        assert_eq "$(systemctl "$@" show TEST-55-OOMD-testbloat.service -P ManagedOOMKills)" "1"
+    fi
 
     systemctl "$@" kill --signal=KILL TEST-55-OOMD-testbloat.service || :
     systemctl "$@" stop TEST-55-OOMD-testbloat.service
@@ -171,10 +188,14 @@ EOF
         sleep 2
     done
 
-    # testmunch should be killed since testbloat had the avoid xattr on it
-    if ! systemctl status TEST-55-OOMD-testbloat.service; then exit 25; fi
-    if systemctl status TEST-55-OOMD-testmunch.service; then exit 43; fi
-    if ! systemctl status TEST-55-OOMD-testchill.service; then exit 24; fi
+    if stress_ng_sigilled TEST-55-OOMD-testbloat.service || stress_ng_sigilled TEST-55-OOMD-testmunch.service; then
+        echo "stress-ng died with SIGILL, skipping testcase_preference_avoid assertions"
+    else
+        # testmunch should be killed since testbloat had the avoid xattr on it
+        if ! systemctl status TEST-55-OOMD-testbloat.service; then exit 25; fi
+        if systemctl status TEST-55-OOMD-testmunch.service; then exit 43; fi
+        if ! systemctl status TEST-55-OOMD-testchill.service; then exit 24; fi
+    fi
 
     systemctl kill --signal=KILL TEST-55-OOMD-testbloat.service || :
     systemctl kill --signal=KILL TEST-55-OOMD-testmunch.service || :
@@ -249,8 +270,12 @@ EOF
         sleep 2
     done
 
-    if systemctl status TEST-55-OOMD-testmunch.service; then exit 44; fi
-    if ! systemctl status TEST-55-OOMD-testchill.service; then exit 23; fi
+    if stress_ng_sigilled TEST-55-OOMD-testmunch.service; then
+        echo "stress-ng died with SIGILL, skipping testcase_duration_override assertions"
+    else
+        if systemctl status TEST-55-OOMD-testmunch.service; then exit 44; fi
+        if ! systemctl status TEST-55-OOMD-testchill.service; then exit 23; fi
+    fi
 
     systemctl kill --signal=KILL TEST-55-OOMD-testmunch.service || :
     systemctl stop TEST-55-OOMD-testmunch.service
@@ -463,9 +488,13 @@ EOF
     # many times. With LastingSec=1h the kill must not fire.
     sleep 6
 
-    # Unit must still be active — if it were killed, Result= would be oom-kill.
-    assert_eq "$(systemctl show TEST-55-OOMD-slowrule.service -P ActiveState)" "active"
-    assert_eq "$(systemctl show TEST-55-OOMD-slowrule.service -P Result)" "success"
+    if stress_ng_sigilled TEST-55-OOMD-slowrule.service; then
+        echo "stress-ng died with SIGILL, skipping testcase_oom_rulesets_lasting_sec assertions"
+    else
+        # Unit must still be active. If it were killed, Result= would be oom-kill.
+        assert_eq "$(systemctl show TEST-55-OOMD-slowrule.service -P ActiveState)" "active"
+        assert_eq "$(systemctl show TEST-55-OOMD-slowrule.service -P Result)" "success"
+    fi
 
     systemctl stop TEST-55-OOMD-slowrule.service 2>/dev/null || true
 
@@ -484,6 +513,11 @@ EOF
     systemctl reload systemd-oomd.service
     ! systemctl start --wait TEST-55-OOMD-testbloat.service || exit 1
 
+    if stress_ng_sigilled TEST-55-OOMD-testbloat.service; then
+        echo "stress-ng died with SIGILL, skipping testcase_prekill_hook"
+        return 0
+    fi
+
     # one hook
     mkdir -p /run/systemd/oomd.prekill.hook/
     ncat --recv-only -kUl /run/systemd/oomd.prekill.hook/althook >/tmp/oomd_event.json &