]> git.ipfire.org Git - thirdparty/systemd.git/commitdiff
test-oomd: drop racy last_had_mem_reclaim assertion
authorLuca Boccassi <luca.boccassi@gmail.com>
Wed, 27 May 2026 22:32:02 +0000 (23:32 +0100)
committerYu Watanabe <watanabe.yu+github@gmail.com>
Thu, 28 May 2026 06:01:43 +0000 (15:01 +0900)
test_oomd_cgroup_context_acquire_and_insert sets c1->pgscan = UINT64_MAX
and c1->last_had_mem_reclaim = 888 before re-inserting via
oomd_insert_cgroup_context(h1, h2, cgroup), then asserts that c2 carries
over last_had_mem_reclaim == 888.

The assumption was that, because c1->pgscan (which becomes c2->last_pgscan)
is UINT64_MAX and is therefore strictly greater than the freshly-acquired
c2->pgscan, oomd_pgscan_rate() in oomd_insert_cgroup_context() would
return 0 and leave last_had_mem_reclaim untouched. But the function
treats last_pgscan > pgscan as "cgroup was recreated", resets last_pgscan
to 0, and returns pgscan - 0 == pgscan. So when the kernel records any
non-zero pgscan on the live cgroup of the test unit, the rate is > 0 and
last_had_mem_reclaim is overwritten with now(CLOCK_MONOTONIC).

From src/oom/oomd-util.c:

    uint64_t oomd_pgscan_rate(const OomdCGroupContext *c) {
            [...]
            last_pgscan = c->last_pgscan;
            if (c->last_pgscan > c->pgscan) {
                    log_debug("Last pgscan %"PRIu64" greater than current pgscan %"PRIu64" for %s. Using last pgscan of zero.",
                                    c->last_pgscan, c->pgscan, c->path);
                    last_pgscan = 0;
            }
            return c->pgscan - last_pgscan;
    }

    int oomd_insert_cgroup_context(Hashmap *old_h, Hashmap *new_h, const char *path) {
            [...]
            if (oomd_pgscan_rate(curr_ctx) > 0)
                    curr_ctx->last_had_mem_reclaim = now(CLOCK_MONOTONIC);

Under sanitizers the test can be slow enough that the kernel scans the
test service cgroup at least once between the two acquisitions, which
triggers the failure:

    [ 1898.159455] test-oomd-util[2606]: /* test_oomd_cgroup_context_acquire_and_insert */
    [ 1898.161247] test-oomd-util[2606]: Last pgscan 18446744073709551615 greater than current pgscan 2 for /system.slice/test-oomd-util.service. Using last pgscan of zero.
    [ 1898.161330] test-oomd-util[2606]: src/oom/test-oomd-util.c:322: Assertion failed: Expected "c2->last_had_mem_reclaim == 888u", but 1898161250 != 888

(1898161250 matches the monotonic timestamp of the log line, confirming
that last_had_mem_reclaim was overwritten with now(CLOCK_MONOTONIC).)

The propagation of last_had_mem_reclaim is already covered
deterministically by TEST(oomd_update_cgroup_contexts_between_hashmaps),
which builds OomdCGroupContext structs directly with matching pgscan
values so that oomd_pgscan_rate() returns exactly 0. So just drop the
racy setup and assertion here.

Follow-up for df637ede7b4e1d0faf8d620d626a0af230712a9e

Fixes https://github.com/systemd/systemd/issues/38543

Co-developed-by: Claude Opus 4.7 <noreply@anthropic.com>
src/oom/test-oomd-util.c

index c78315c56b6b0da11a3bb249f572d9ed051cac1c..14a821ebebecbbeae7b7661575b970a047c8086c 100644 (file)
@@ -309,7 +309,6 @@ TEST(oomd_cgroup_context_acquire_and_insert) {
         c1->mem_pressure_limit = 6789;
         c1->mem_pressure_limit_hit_start = 42;
         c1->mem_pressure_duration_usec = 1234;
-        c1->last_had_mem_reclaim = 888;
         ASSERT_NOT_NULL(h2 = hashmap_new(&oomd_cgroup_ctx_hash_ops));
         ASSERT_OK(oomd_insert_cgroup_context(h1, h2, cgroup));
         ASSERT_NOT_NULL(c1 = hashmap_get(h1, cgroup));
@@ -319,7 +318,6 @@ TEST(oomd_cgroup_context_acquire_and_insert) {
         ASSERT_EQ(c2->mem_pressure_limit, 6789u);
         ASSERT_EQ(c2->mem_pressure_limit_hit_start, 42u);
         ASSERT_EQ(c2->mem_pressure_duration_usec, 1234u);
-        ASSERT_EQ(c2->last_had_mem_reclaim, 888u); /* assumes the live pgscan is less than UINT64_MAX */
 }
 
 TEST(oomd_update_cgroup_contexts_between_hashmaps) {