From 378bdb97405a00bf03d8d993435c83add1688e36 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 17 Jul 2025 19:46:43 +0000 Subject: [PATCH] memcg: convert memcg->socket_pressure to u64 memcg->socket_pressure is initialised with jiffies when the memcg is created. Once vmpressure detects that the cgroup is under memory pressure, the field is updated with jiffies + HZ to signal the fact to the socket layer and suppress memory allocation for one second. Otherwise, the field is not updated. mem_cgroup_under_socket_pressure() uses time_before() to check if jiffies is less than memcg->socket_pressure, and this has a bug on 32-bit kernel. if (time_before(jiffies, memcg->socket_pressure)) return true; As time_before() casts the final result to long, the acceptable delta between two timestamps is 2 ^ (BITS_PER_LONG - 1). On 32-bit kernel with CONFIG_HZ=1000, this is about 24 days. >>> (2 ** 31) / 1000 / 60 / 60 / 24 24.855134814814818 Once 24 days have passed since the last update of socket_pressure, mem_cgroup_under_socket_pressure() starts to lie until the next 24 days pass. We don't need to worry about this on 64-bit machines unless they serve for 300 million years. >>> (2 ** 63) / 1000 / 60 / 60 / 24 / 365 292471208.6775361 Let's convert memcg->socket_pressure to u64. Performance teting: I don't have a real 32-bit machine so this is a result on QEMU, but with/without the u64 jiffie patch, the time spent in mem_cgroup_under_socket_pressure() was 1~5us and I didn't see any measurable delta. no patch applied: iperf3 273 [000] 137.296248: probe:mem_cgroup_under_socket_pressure: (c13660d0) c13660d1 mem_cgroup_under_socket_pressure+0x1 ([kernel.kallsyms]) iperf3 273 [000] 137.296249: probe:mem_cgroup_under_socket_pressure__return: (c13660d0 <- c1d8fd7f) iperf3 273 [000] 137.296251: probe:mem_cgroup_under_socket_pressure: (c13660d0) c13660d1 mem_cgroup_under_socket_pressure+0x1 ([kernel.kallsyms]) iperf3 273 [000] 137.296253: probe:mem_cgroup_under_socket_pressure__return: (c13660d0 <- c1d8fd7f) u64 jiffies patch applied: iperf3 308 [001] 330.669370: probe:mem_cgroup_under_socket_pressure: (c12ddba0) c12ddba1 mem_cgroup_under_socket_pressure+0x1 ([kernel.kallsyms]) iperf3 308 [001] 330.669371: probe:mem_cgroup_under_socket_pressure__return: (c12ddba0 <- c1ce98bf) iperf3 308 [001] 330.669382: probe:mem_cgroup_under_socket_pressure: (c12ddba0) c12ddba1 mem_cgroup_under_socket_pressure+0x1 ([kernel.kallsyms]) iperf3 308 [001] 330.669384: probe:mem_cgroup_under_socket_pressure__return: (c12ddba0 <- c1ce98bf) So the u64 approach is good enough. Link: https://lkml.kernel.org/r/20250717194645.1096500-1-kuniyu@google.com Fixes: 8e8ae645249b ("mm: memcontrol: hook up vmpressure to socket pressure") Signed-off-by: Kuniyuki Iwashima Reported-by: Neal Cardwell Suggested-by: Andrew Morton Acked-by: Shakeel Butt Acked-by: Johannes Weiner Cc: David S. Miller Cc: Eric Dumazet Cc: Johannes Weiner Cc: Vladimir Davydov Signed-off-by: Andrew Morton --- include/linux/memcontrol.h | 44 +++++++++++++++++++++++++++++++++++--- mm/memcontrol.c | 5 ++++- mm/vmpressure.c | 2 +- 3 files changed, 46 insertions(+), 5 deletions(-) diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 87b6688f124a7..785173aa0739c 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -251,8 +251,10 @@ struct mem_cgroup { * that this indicator should NOT be used in legacy cgroup mode * where socket memory is accounted/charged separately. */ - unsigned long socket_pressure; - + u64 socket_pressure; +#if BITS_PER_LONG < 64 + seqlock_t socket_pressure_seqlock; +#endif int kmemcg_id; /* * memcg->objcg is wiped out as a part of the objcg repaprenting @@ -1602,6 +1604,42 @@ extern struct static_key_false memcg_sockets_enabled_key; #define mem_cgroup_sockets_enabled static_branch_unlikely(&memcg_sockets_enabled_key) void mem_cgroup_sk_alloc(struct sock *sk); void mem_cgroup_sk_free(struct sock *sk); + +#if BITS_PER_LONG < 64 +static inline void mem_cgroup_set_socket_pressure(struct mem_cgroup *memcg) +{ + u64 val = get_jiffies_64() + HZ; + unsigned long flags; + + write_seqlock_irqsave(&memcg->socket_pressure_seqlock, flags); + memcg->socket_pressure = val; + write_sequnlock_irqrestore(&memcg->socket_pressure_seqlock, flags); +} + +static inline u64 mem_cgroup_get_socket_pressure(struct mem_cgroup *memcg) +{ + unsigned int seq; + u64 val; + + do { + seq = read_seqbegin(&memcg->socket_pressure_seqlock); + val = memcg->socket_pressure; + } while (read_seqretry(&memcg->socket_pressure_seqlock, seq)); + + return val; +} +#else +static inline void mem_cgroup_set_socket_pressure(struct mem_cgroup *memcg) +{ + WRITE_ONCE(memcg->socket_pressure, jiffies + HZ); +} + +static inline u64 mem_cgroup_get_socket_pressure(struct mem_cgroup *memcg) +{ + return READ_ONCE(memcg->socket_pressure); +} +#endif + static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg) { #ifdef CONFIG_MEMCG_V1 @@ -1609,7 +1647,7 @@ static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg) return !!memcg->tcpmem_pressure; #endif /* CONFIG_MEMCG_V1 */ do { - if (time_before(jiffies, READ_ONCE(memcg->socket_pressure))) + if (time_before64(get_jiffies_64(), mem_cgroup_get_socket_pressure(memcg))) return true; } while ((memcg = parent_mem_cgroup(memcg))); return false; diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 235c66d2161b2..de7d737fe0116 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -3754,7 +3754,10 @@ static struct mem_cgroup *mem_cgroup_alloc(struct mem_cgroup *parent) INIT_LIST_HEAD(&memcg->memory_peaks); INIT_LIST_HEAD(&memcg->swap_peaks); spin_lock_init(&memcg->peaks_lock); - memcg->socket_pressure = jiffies; + memcg->socket_pressure = get_jiffies_64(); +#if BITS_PER_LONG < 64 + seqlock_init(&memcg->socket_pressure_seqlock); +#endif memcg1_memcg_init(memcg); memcg->kmemcg_id = -1; INIT_LIST_HEAD(&memcg->objcg_list); diff --git a/mm/vmpressure.c b/mm/vmpressure.c index bd5183dfd8791..c197ed47bcc45 100644 --- a/mm/vmpressure.c +++ b/mm/vmpressure.c @@ -316,7 +316,7 @@ void vmpressure(gfp_t gfp, struct mem_cgroup *memcg, bool tree, * asserted for a second in which subsequent * pressure events can occur. */ - WRITE_ONCE(memcg->socket_pressure, jiffies + HZ); + mem_cgroup_set_socket_pressure(memcg); } } } -- 2.47.2