From: Luca Boccassi Date: Mon, 25 May 2026 11:11:23 +0000 (+0100) Subject: test: try to detect SIGILL in stress-ng and skip TEST-55-OOMD gracefully X-Git-Tag: v261-rc2~32^2 X-Git-Url: http://git.ipfire.org/gitweb/index.cgi?a=commitdiff_plain;h=a17efef1375c07df89e7f02e87fb881dfced53fb;p=thirdparty%2Fsystemd.git test: try to detect SIGILL in stress-ng and skip TEST-55-OOMD gracefully The attempt to fix SIGILL in TEST-55-OOMD actually made the test more flaky, as the chosen stress-ng method generates less pressure, so it often fails. Try a different approach: try to detect that SIGILL caused the unit calling stress-ng to fail, and skip gracefully in that case. --- diff --git a/test/units/TEST-55-OOMD.sh b/test/units/TEST-55-OOMD.sh index 6689bbdd733..9993e989377 100755 --- a/test/units/TEST-55-OOMD.sh +++ b/test/units/TEST-55-OOMD.sh @@ -94,6 +94,19 @@ else systemd-run -t -p MemoryMax=10M -p MemorySwapMax=0 -p MemoryZSwapMax=0 true fi +# stress-ng can fail with SIGILL due to trying to use AVX-512 on older CPUs, try to detect and avoid failing +stress_ng_sigilled() { + local result status sigill + local unit="${1:?}" + shift + + result=$(systemctl "$@" show "$unit" -P Result) + status=$(systemctl "$@" show "$unit" -P ExecMainStatus) + sigill=$(kill -l ILL) + + [[ "$status" == "$sigill" && ( "$result" == "signal" || "$result" == "core-dump" ) ]] +} + test_basic() { local cgroup_path="${1:?}" shift @@ -123,7 +136,11 @@ test_basic() { if systemctl "$@" status TEST-55-OOMD-testbloat.service; then exit 42; fi if ! systemctl "$@" status TEST-55-OOMD-testchill.service; then exit 24; fi - assert_eq "$(systemctl "$@" show TEST-55-OOMD-testbloat.service -P ManagedOOMKills)" "1" + if stress_ng_sigilled TEST-55-OOMD-testbloat.service "$@"; then + echo "stress-ng died with SIGILL, skipping ManagedOOMKills assertion" + else + assert_eq "$(systemctl "$@" show TEST-55-OOMD-testbloat.service -P ManagedOOMKills)" "1" + fi systemctl "$@" kill --signal=KILL TEST-55-OOMD-testbloat.service || : systemctl "$@" stop TEST-55-OOMD-testbloat.service @@ -171,10 +188,14 @@ EOF sleep 2 done - # testmunch should be killed since testbloat had the avoid xattr on it - if ! systemctl status TEST-55-OOMD-testbloat.service; then exit 25; fi - if systemctl status TEST-55-OOMD-testmunch.service; then exit 43; fi - if ! systemctl status TEST-55-OOMD-testchill.service; then exit 24; fi + if stress_ng_sigilled TEST-55-OOMD-testbloat.service || stress_ng_sigilled TEST-55-OOMD-testmunch.service; then + echo "stress-ng died with SIGILL, skipping testcase_preference_avoid assertions" + else + # testmunch should be killed since testbloat had the avoid xattr on it + if ! systemctl status TEST-55-OOMD-testbloat.service; then exit 25; fi + if systemctl status TEST-55-OOMD-testmunch.service; then exit 43; fi + if ! systemctl status TEST-55-OOMD-testchill.service; then exit 24; fi + fi systemctl kill --signal=KILL TEST-55-OOMD-testbloat.service || : systemctl kill --signal=KILL TEST-55-OOMD-testmunch.service || : @@ -249,8 +270,12 @@ EOF sleep 2 done - if systemctl status TEST-55-OOMD-testmunch.service; then exit 44; fi - if ! systemctl status TEST-55-OOMD-testchill.service; then exit 23; fi + if stress_ng_sigilled TEST-55-OOMD-testmunch.service; then + echo "stress-ng died with SIGILL, skipping testcase_duration_override assertions" + else + if systemctl status TEST-55-OOMD-testmunch.service; then exit 44; fi + if ! systemctl status TEST-55-OOMD-testchill.service; then exit 23; fi + fi systemctl kill --signal=KILL TEST-55-OOMD-testmunch.service || : systemctl stop TEST-55-OOMD-testmunch.service @@ -463,9 +488,13 @@ EOF # many times. With LastingSec=1h the kill must not fire. sleep 6 - # Unit must still be active — if it were killed, Result= would be oom-kill. - assert_eq "$(systemctl show TEST-55-OOMD-slowrule.service -P ActiveState)" "active" - assert_eq "$(systemctl show TEST-55-OOMD-slowrule.service -P Result)" "success" + if stress_ng_sigilled TEST-55-OOMD-slowrule.service; then + echo "stress-ng died with SIGILL, skipping testcase_oom_rulesets_lasting_sec assertions" + else + # Unit must still be active. If it were killed, Result= would be oom-kill. + assert_eq "$(systemctl show TEST-55-OOMD-slowrule.service -P ActiveState)" "active" + assert_eq "$(systemctl show TEST-55-OOMD-slowrule.service -P Result)" "success" + fi systemctl stop TEST-55-OOMD-slowrule.service 2>/dev/null || true @@ -484,6 +513,11 @@ EOF systemctl reload systemd-oomd.service ! systemctl start --wait TEST-55-OOMD-testbloat.service || exit 1 + if stress_ng_sigilled TEST-55-OOMD-testbloat.service; then + echo "stress-ng died with SIGILL, skipping testcase_prekill_hook" + return 0 + fi + # one hook mkdir -p /run/systemd/oomd.prekill.hook/ ncat --recv-only -kUl /run/systemd/oomd.prekill.hook/althook >/tmp/oomd_event.json &