]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
memcg: convert memcg->socket_pressure to u64
authorKuniyuki Iwashima <kuniyu@google.com>
Thu, 17 Jul 2025 19:46:43 +0000 (19:46 +0000)
committerAndrew Morton <akpm@linux-foundation.org>
Fri, 25 Jul 2025 02:12:32 +0000 (19:12 -0700)
memcg->socket_pressure is initialised with jiffies when the memcg is
created.

Once vmpressure detects that the cgroup is under memory pressure, the
field is updated with jiffies + HZ to signal the fact to the socket layer
and suppress memory allocation for one second.

Otherwise, the field is not updated.

mem_cgroup_under_socket_pressure() uses time_before() to check if jiffies
is less than memcg->socket_pressure, and this has a bug on 32-bit kernel.

  if (time_before(jiffies, memcg->socket_pressure))
          return true;

As time_before() casts the final result to long, the acceptable delta
between two timestamps is 2 ^ (BITS_PER_LONG - 1).

On 32-bit kernel with CONFIG_HZ=1000, this is about 24 days.

  >>> (2 ** 31) / 1000 / 60 / 60 / 24
  24.855134814814818

Once 24 days have passed since the last update of socket_pressure,
mem_cgroup_under_socket_pressure() starts to lie until the next 24 days
pass.

We don't need to worry about this on 64-bit machines unless they serve for
300 million years.

  >>> (2 ** 63) / 1000 / 60 / 60 / 24 / 365
  292471208.6775361

Let's convert memcg->socket_pressure to u64.

Performance teting:

I don't have a real 32-bit machine so this is a result on QEMU, but
with/without the u64 jiffie patch, the time spent in
mem_cgroup_under_socket_pressure() was 1~5us and I didn't see any
measurable delta.

no patch applied:
iperf3   273 [000]   137.296248:
probe:mem_cgroup_under_socket_pressure: (c13660d0)
                c13660d1 mem_cgroup_under_socket_pressure+0x1
([kernel.kallsyms])
iperf3   273 [000]   137.296249:
probe:mem_cgroup_under_socket_pressure__return: (c13660d0 <- c1d8fd7f)
iperf3   273 [000]   137.296251:
probe:mem_cgroup_under_socket_pressure: (c13660d0)
                c13660d1 mem_cgroup_under_socket_pressure+0x1
([kernel.kallsyms])
iperf3   273 [000]   137.296253:
probe:mem_cgroup_under_socket_pressure__return: (c13660d0 <- c1d8fd7f)

u64 jiffies patch applied:
iperf3   308 [001]   330.669370:
probe:mem_cgroup_under_socket_pressure: (c12ddba0)
                c12ddba1 mem_cgroup_under_socket_pressure+0x1
([kernel.kallsyms])
iperf3   308 [001]   330.669371:
probe:mem_cgroup_under_socket_pressure__return: (c12ddba0 <- c1ce98bf)
iperf3   308 [001]   330.669382:
probe:mem_cgroup_under_socket_pressure: (c12ddba0)
                c12ddba1 mem_cgroup_under_socket_pressure+0x1
([kernel.kallsyms])
iperf3   308 [001]   330.669384:
probe:mem_cgroup_under_socket_pressure__return: (c12ddba0 <- c1ce98bf)

So the u64 approach is good enough.

Link: https://lkml.kernel.org/r/20250717194645.1096500-1-kuniyu@google.com
Fixes: 8e8ae645249b ("mm: memcontrol: hook up vmpressure to socket pressure")
Signed-off-by: Kuniyuki Iwashima <kuniyu@google.com>
Reported-by: Neal Cardwell <ncardwell@google.com>
Suggested-by: Andrew Morton <akpm@linux-foundation.org>
Acked-by: Shakeel Butt <shakeel.butt@linux.dev>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: David S. Miller <davem@davemloft.net>
Cc: Eric Dumazet <ncardwell@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
include/linux/memcontrol.h
mm/memcontrol.c
mm/vmpressure.c

index 87b6688f124a7d78135d1d9dd4c641d825446ac6..785173aa0739cc5579da30e61b108d8bc9903f0d 100644 (file)
@@ -251,8 +251,10 @@ struct mem_cgroup {
         * that this indicator should NOT be used in legacy cgroup mode
         * where socket memory is accounted/charged separately.
         */
-       unsigned long           socket_pressure;
-
+       u64                     socket_pressure;
+#if BITS_PER_LONG < 64
+       seqlock_t               socket_pressure_seqlock;
+#endif
        int kmemcg_id;
        /*
         * memcg->objcg is wiped out as a part of the objcg repaprenting
@@ -1602,6 +1604,42 @@ extern struct static_key_false memcg_sockets_enabled_key;
 #define mem_cgroup_sockets_enabled static_branch_unlikely(&memcg_sockets_enabled_key)
 void mem_cgroup_sk_alloc(struct sock *sk);
 void mem_cgroup_sk_free(struct sock *sk);
+
+#if BITS_PER_LONG < 64
+static inline void mem_cgroup_set_socket_pressure(struct mem_cgroup *memcg)
+{
+       u64 val = get_jiffies_64() + HZ;
+       unsigned long flags;
+
+       write_seqlock_irqsave(&memcg->socket_pressure_seqlock, flags);
+       memcg->socket_pressure = val;
+       write_sequnlock_irqrestore(&memcg->socket_pressure_seqlock, flags);
+}
+
+static inline u64 mem_cgroup_get_socket_pressure(struct mem_cgroup *memcg)
+{
+       unsigned int seq;
+       u64 val;
+
+       do {
+               seq = read_seqbegin(&memcg->socket_pressure_seqlock);
+               val = memcg->socket_pressure;
+       } while (read_seqretry(&memcg->socket_pressure_seqlock, seq));
+
+       return val;
+}
+#else
+static inline void mem_cgroup_set_socket_pressure(struct mem_cgroup *memcg)
+{
+       WRITE_ONCE(memcg->socket_pressure, jiffies + HZ);
+}
+
+static inline u64 mem_cgroup_get_socket_pressure(struct mem_cgroup *memcg)
+{
+       return READ_ONCE(memcg->socket_pressure);
+}
+#endif
+
 static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg)
 {
 #ifdef CONFIG_MEMCG_V1
@@ -1609,7 +1647,7 @@ static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg)
                return !!memcg->tcpmem_pressure;
 #endif /* CONFIG_MEMCG_V1 */
        do {
-               if (time_before(jiffies, READ_ONCE(memcg->socket_pressure)))
+               if (time_before64(get_jiffies_64(), mem_cgroup_get_socket_pressure(memcg)))
                        return true;
        } while ((memcg = parent_mem_cgroup(memcg)));
        return false;
index 235c66d2161b2633135e42a45315723dd7511ca0..de7d737fe01162fa7cad3715e0ae9326c107fd2f 100644 (file)
@@ -3754,7 +3754,10 @@ static struct mem_cgroup *mem_cgroup_alloc(struct mem_cgroup *parent)
        INIT_LIST_HEAD(&memcg->memory_peaks);
        INIT_LIST_HEAD(&memcg->swap_peaks);
        spin_lock_init(&memcg->peaks_lock);
-       memcg->socket_pressure = jiffies;
+       memcg->socket_pressure = get_jiffies_64();
+#if BITS_PER_LONG < 64
+       seqlock_init(&memcg->socket_pressure_seqlock);
+#endif
        memcg1_memcg_init(memcg);
        memcg->kmemcg_id = -1;
        INIT_LIST_HEAD(&memcg->objcg_list);
index bd5183dfd8791fef6cbd8ae35dddf59ee88516fe..c197ed47bcc45a663d1b3d09df9a52e1a16ccba6 100644 (file)
@@ -316,7 +316,7 @@ void vmpressure(gfp_t gfp, struct mem_cgroup *memcg, bool tree,
                         * asserted for a second in which subsequent
                         * pressure events can occur.
                         */
-                       WRITE_ONCE(memcg->socket_pressure, jiffies + HZ);
+                       mem_cgroup_set_socket_pressure(memcg);
                }
        }
 }