From: Greg Kroah-Hartman Date: Wed, 2 Oct 2024 10:01:35 +0000 (+0200) Subject: 6.1-stable patches X-Git-Tag: v6.6.54~35 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=674dc05a7a59832d60082b9f222bd5942de533f4;p=thirdparty%2Fkernel%2Fstable-queue.git 6.1-stable patches added patches: icmp-add-counters-for-rate-limits.patch icmp-change-the-order-of-rate-limits.patch --- diff --git a/queue-6.1/icmp-add-counters-for-rate-limits.patch b/queue-6.1/icmp-add-counters-for-rate-limits.patch new file mode 100644 index 00000000000..486878128ba --- /dev/null +++ b/queue-6.1/icmp-add-counters-for-rate-limits.patch @@ -0,0 +1,139 @@ +From stable+bounces-78550-greg=kroah.com@vger.kernel.org Tue Oct 1 17:09:18 2024 +From: Eric Dumazet +Date: Tue, 1 Oct 2024 15:04:03 +0000 +Subject: icmp: Add counters for rate limits +To: stable@vger.kernel.org +Cc: Jamie Bainbridge , Abhishek Rawal , Paolo Abeni , Eric Dumazet +Message-ID: <20241001150404.2176005-1-edumazet@google.com> + +From: Jamie Bainbridge + +commit d0941130c93515411c8d66fc22bdae407b509a6d upstream. + +There are multiple ICMP rate limiting mechanisms: + +* Global limits: net.ipv4.icmp_msgs_burst/icmp_msgs_per_sec +* v4 per-host limits: net.ipv4.icmp_ratelimit/ratemask +* v6 per-host limits: net.ipv6.icmp_ratelimit/ratemask + +However, when ICMP output is limited, there is no way to tell +which limit has been hit or even if the limits are responsible +for the lack of ICMP output. + +Add counters for each of the cases above. As we are within +local_bh_disable(), use the __INC stats variant. + +Example output: + + # nstat -sz "*RateLimit*" + IcmpOutRateLimitGlobal 134 0.0 + IcmpOutRateLimitHost 770 0.0 + Icmp6OutRateLimitHost 84 0.0 + +Signed-off-by: Jamie Bainbridge +Suggested-by: Abhishek Rawal +Link: https://lore.kernel.org/r/273b32241e6b7fdc5c609e6f5ebc68caf3994342.1674605770.git.jamie.bainbridge@gmail.com +Signed-off-by: Paolo Abeni +Signed-off-by: Eric Dumazet +Signed-off-by: Greg Kroah-Hartman +--- + include/uapi/linux/snmp.h | 3 +++ + net/ipv4/icmp.c | 3 +++ + net/ipv4/proc.c | 8 +++++--- + net/ipv6/icmp.c | 4 ++++ + net/ipv6/proc.c | 1 + + 5 files changed, 16 insertions(+), 3 deletions(-) + +--- a/include/uapi/linux/snmp.h ++++ b/include/uapi/linux/snmp.h +@@ -95,6 +95,8 @@ enum + ICMP_MIB_OUTADDRMASKS, /* OutAddrMasks */ + ICMP_MIB_OUTADDRMASKREPS, /* OutAddrMaskReps */ + ICMP_MIB_CSUMERRORS, /* InCsumErrors */ ++ ICMP_MIB_RATELIMITGLOBAL, /* OutRateLimitGlobal */ ++ ICMP_MIB_RATELIMITHOST, /* OutRateLimitHost */ + __ICMP_MIB_MAX + }; + +@@ -112,6 +114,7 @@ enum + ICMP6_MIB_OUTMSGS, /* OutMsgs */ + ICMP6_MIB_OUTERRORS, /* OutErrors */ + ICMP6_MIB_CSUMERRORS, /* InCsumErrors */ ++ ICMP6_MIB_RATELIMITHOST, /* OutRateLimitHost */ + __ICMP6_MIB_MAX + }; + +--- a/net/ipv4/icmp.c ++++ b/net/ipv4/icmp.c +@@ -297,6 +297,7 @@ static bool icmpv4_global_allow(struct n + if (icmp_global_allow()) + return true; + ++ __ICMP_INC_STATS(net, ICMP_MIB_RATELIMITGLOBAL); + return false; + } + +@@ -326,6 +327,8 @@ static bool icmpv4_xrlim_allow(struct ne + if (peer) + inet_putpeer(peer); + out: ++ if (!rc) ++ __ICMP_INC_STATS(net, ICMP_MIB_RATELIMITHOST); + return rc; + } + +--- a/net/ipv4/proc.c ++++ b/net/ipv4/proc.c +@@ -352,7 +352,7 @@ static void icmp_put(struct seq_file *se + seq_puts(seq, "\nIcmp: InMsgs InErrors InCsumErrors"); + for (i = 0; icmpmibmap[i].name; i++) + seq_printf(seq, " In%s", icmpmibmap[i].name); +- seq_puts(seq, " OutMsgs OutErrors"); ++ seq_puts(seq, " OutMsgs OutErrors OutRateLimitGlobal OutRateLimitHost"); + for (i = 0; icmpmibmap[i].name; i++) + seq_printf(seq, " Out%s", icmpmibmap[i].name); + seq_printf(seq, "\nIcmp: %lu %lu %lu", +@@ -362,9 +362,11 @@ static void icmp_put(struct seq_file *se + for (i = 0; icmpmibmap[i].name; i++) + seq_printf(seq, " %lu", + atomic_long_read(ptr + icmpmibmap[i].index)); +- seq_printf(seq, " %lu %lu", ++ seq_printf(seq, " %lu %lu %lu %lu", + snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_OUTMSGS), +- snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_OUTERRORS)); ++ snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_OUTERRORS), ++ snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_RATELIMITGLOBAL), ++ snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_RATELIMITHOST)); + for (i = 0; icmpmibmap[i].name; i++) + seq_printf(seq, " %lu", + atomic_long_read(ptr + (icmpmibmap[i].index | 0x100))); +--- a/net/ipv6/icmp.c ++++ b/net/ipv6/icmp.c +@@ -183,6 +183,7 @@ static bool icmpv6_global_allow(struct n + if (icmp_global_allow()) + return true; + ++ __ICMP_INC_STATS(net, ICMP_MIB_RATELIMITGLOBAL); + return false; + } + +@@ -224,6 +225,9 @@ static bool icmpv6_xrlim_allow(struct so + if (peer) + inet_putpeer(peer); + } ++ if (!res) ++ __ICMP6_INC_STATS(net, ip6_dst_idev(dst), ++ ICMP6_MIB_RATELIMITHOST); + dst_release(dst); + return res; + } +--- a/net/ipv6/proc.c ++++ b/net/ipv6/proc.c +@@ -94,6 +94,7 @@ static const struct snmp_mib snmp6_icmp6 + SNMP_MIB_ITEM("Icmp6OutMsgs", ICMP6_MIB_OUTMSGS), + SNMP_MIB_ITEM("Icmp6OutErrors", ICMP6_MIB_OUTERRORS), + SNMP_MIB_ITEM("Icmp6InCsumErrors", ICMP6_MIB_CSUMERRORS), ++ SNMP_MIB_ITEM("Icmp6OutRateLimitHost", ICMP6_MIB_RATELIMITHOST), + SNMP_MIB_SENTINEL + }; + diff --git a/queue-6.1/icmp-change-the-order-of-rate-limits.patch b/queue-6.1/icmp-change-the-order-of-rate-limits.patch new file mode 100644 index 00000000000..79af7df9f84 --- /dev/null +++ b/queue-6.1/icmp-change-the-order-of-rate-limits.patch @@ -0,0 +1,378 @@ +From stable+bounces-78551-greg=kroah.com@vger.kernel.org Tue Oct 1 17:09:19 2024 +From: Eric Dumazet +Date: Tue, 1 Oct 2024 15:04:04 +0000 +Subject: icmp: change the order of rate limits +To: stable@vger.kernel.org +Cc: Eric Dumazet , Keyu Man , David Ahern , Jesper Dangaard Brouer , Jakub Kicinski +Message-ID: <20241001150404.2176005-2-edumazet@google.com> + +From: Eric Dumazet + +commit 8c2bd38b95f75f3d2a08c93e35303e26d480d24e upstream. + +ICMP messages are ratelimited : + +After the blamed commits, the two rate limiters are applied in this order: + +1) host wide ratelimit (icmp_global_allow()) + +2) Per destination ratelimit (inetpeer based) + +In order to avoid side-channels attacks, we need to apply +the per destination check first. + +This patch makes the following change : + +1) icmp_global_allow() checks if the host wide limit is reached. + But credits are not yet consumed. This is deferred to 3) + +2) The per destination limit is checked/updated. + This might add a new node in inetpeer tree. + +3) icmp_global_consume() consumes tokens if prior operations succeeded. + +This means that host wide ratelimit is still effective +in keeping inetpeer tree small even under DDOS. + +As a bonus, I removed icmp_global.lock as the fast path +can use a lock-free operation. + +Fixes: c0303efeab73 ("net: reduce cycles spend on ICMP replies that gets rate limited") +Fixes: 4cdf507d5452 ("icmp: add a global rate limitation") +Reported-by: Keyu Man +Signed-off-by: Eric Dumazet +Reviewed-by: David Ahern +Cc: Jesper Dangaard Brouer +Cc: stable@vger.kernel.org +Link: https://patch.msgid.link/20240829144641.3880376-2-edumazet@google.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Eric Dumazet +Signed-off-by: Greg Kroah-Hartman +--- + include/net/ip.h | 2 + + net/ipv4/icmp.c | 103 +++++++++++++++++++++++++++++-------------------------- + net/ipv6/icmp.c | 28 +++++++++----- + 3 files changed, 76 insertions(+), 57 deletions(-) + +--- a/include/net/ip.h ++++ b/include/net/ip.h +@@ -781,6 +781,8 @@ static inline void ip_cmsg_recv(struct m + } + + bool icmp_global_allow(void); ++void icmp_global_consume(void); ++ + extern int sysctl_icmp_msgs_per_sec; + extern int sysctl_icmp_msgs_burst; + +--- a/net/ipv4/icmp.c ++++ b/net/ipv4/icmp.c +@@ -222,57 +222,59 @@ int sysctl_icmp_msgs_per_sec __read_most + int sysctl_icmp_msgs_burst __read_mostly = 50; + + static struct { +- spinlock_t lock; +- u32 credit; ++ atomic_t credit; + u32 stamp; +-} icmp_global = { +- .lock = __SPIN_LOCK_UNLOCKED(icmp_global.lock), +-}; ++} icmp_global; + + /** + * icmp_global_allow - Are we allowed to send one more ICMP message ? + * + * Uses a token bucket to limit our ICMP messages to ~sysctl_icmp_msgs_per_sec. + * Returns false if we reached the limit and can not send another packet. +- * Note: called with BH disabled ++ * Works in tandem with icmp_global_consume(). + */ + bool icmp_global_allow(void) + { +- u32 credit, delta, incr = 0, now = (u32)jiffies; +- bool rc = false; ++ u32 delta, now, oldstamp; ++ int incr, new, old; + +- /* Check if token bucket is empty and cannot be refilled +- * without taking the spinlock. The READ_ONCE() are paired +- * with the following WRITE_ONCE() in this same function. ++ /* Note: many cpus could find this condition true. ++ * Then later icmp_global_consume() could consume more credits, ++ * this is an acceptable race. + */ +- if (!READ_ONCE(icmp_global.credit)) { +- delta = min_t(u32, now - READ_ONCE(icmp_global.stamp), HZ); +- if (delta < HZ / 50) +- return false; +- } ++ if (atomic_read(&icmp_global.credit) > 0) ++ return true; + +- spin_lock(&icmp_global.lock); +- delta = min_t(u32, now - icmp_global.stamp, HZ); +- if (delta >= HZ / 50) { +- incr = READ_ONCE(sysctl_icmp_msgs_per_sec) * delta / HZ; +- if (incr) +- WRITE_ONCE(icmp_global.stamp, now); +- } +- credit = min_t(u32, icmp_global.credit + incr, +- READ_ONCE(sysctl_icmp_msgs_burst)); +- if (credit) { +- /* We want to use a credit of one in average, but need to randomize +- * it for security reasons. +- */ +- credit = max_t(int, credit - prandom_u32_max(3), 0); +- rc = true; ++ now = jiffies; ++ oldstamp = READ_ONCE(icmp_global.stamp); ++ delta = min_t(u32, now - oldstamp, HZ); ++ if (delta < HZ / 50) ++ return false; ++ ++ incr = READ_ONCE(sysctl_icmp_msgs_per_sec) * delta / HZ; ++ if (!incr) ++ return false; ++ ++ if (cmpxchg(&icmp_global.stamp, oldstamp, now) == oldstamp) { ++ old = atomic_read(&icmp_global.credit); ++ do { ++ new = min(old + incr, READ_ONCE(sysctl_icmp_msgs_burst)); ++ } while (!atomic_try_cmpxchg(&icmp_global.credit, &old, new)); + } +- WRITE_ONCE(icmp_global.credit, credit); +- spin_unlock(&icmp_global.lock); +- return rc; ++ return true; + } + EXPORT_SYMBOL(icmp_global_allow); + ++void icmp_global_consume(void) ++{ ++ int credits = get_random_u32_below(3); ++ ++ /* Note: this might make icmp_global.credit negative. */ ++ if (credits) ++ atomic_sub(credits, &icmp_global.credit); ++} ++EXPORT_SYMBOL(icmp_global_consume); ++ + static bool icmpv4_mask_allow(struct net *net, int type, int code) + { + if (type > NR_ICMP_TYPES) +@@ -289,14 +291,16 @@ static bool icmpv4_mask_allow(struct net + return false; + } + +-static bool icmpv4_global_allow(struct net *net, int type, int code) ++static bool icmpv4_global_allow(struct net *net, int type, int code, ++ bool *apply_ratelimit) + { + if (icmpv4_mask_allow(net, type, code)) + return true; + +- if (icmp_global_allow()) ++ if (icmp_global_allow()) { ++ *apply_ratelimit = true; + return true; +- ++ } + __ICMP_INC_STATS(net, ICMP_MIB_RATELIMITGLOBAL); + return false; + } +@@ -306,15 +310,16 @@ static bool icmpv4_global_allow(struct n + */ + + static bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt, +- struct flowi4 *fl4, int type, int code) ++ struct flowi4 *fl4, int type, int code, ++ bool apply_ratelimit) + { + struct dst_entry *dst = &rt->dst; + struct inet_peer *peer; + bool rc = true; + int vif; + +- if (icmpv4_mask_allow(net, type, code)) +- goto out; ++ if (!apply_ratelimit) ++ return true; + + /* No rate limit on loopback */ + if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) +@@ -329,6 +334,8 @@ static bool icmpv4_xrlim_allow(struct ne + out: + if (!rc) + __ICMP_INC_STATS(net, ICMP_MIB_RATELIMITHOST); ++ else ++ icmp_global_consume(); + return rc; + } + +@@ -400,6 +407,7 @@ static void icmp_reply(struct icmp_bxm * + struct ipcm_cookie ipc; + struct rtable *rt = skb_rtable(skb); + struct net *net = dev_net(rt->dst.dev); ++ bool apply_ratelimit = false; + struct flowi4 fl4; + struct sock *sk; + struct inet_sock *inet; +@@ -411,11 +419,11 @@ static void icmp_reply(struct icmp_bxm * + if (ip_options_echo(net, &icmp_param->replyopts.opt.opt, skb)) + return; + +- /* Needed by both icmp_global_allow and icmp_xmit_lock */ ++ /* Needed by both icmpv4_global_allow and icmp_xmit_lock */ + local_bh_disable(); + +- /* global icmp_msgs_per_sec */ +- if (!icmpv4_global_allow(net, type, code)) ++ /* is global icmp_msgs_per_sec exhausted ? */ ++ if (!icmpv4_global_allow(net, type, code, &apply_ratelimit)) + goto out_bh_enable; + + sk = icmp_xmit_lock(net); +@@ -448,7 +456,7 @@ static void icmp_reply(struct icmp_bxm * + rt = ip_route_output_key(net, &fl4); + if (IS_ERR(rt)) + goto out_unlock; +- if (icmpv4_xrlim_allow(net, rt, &fl4, type, code)) ++ if (icmpv4_xrlim_allow(net, rt, &fl4, type, code, apply_ratelimit)) + icmp_push_reply(sk, icmp_param, &fl4, &ipc, &rt); + ip_rt_put(rt); + out_unlock: +@@ -592,6 +600,7 @@ void __icmp_send(struct sk_buff *skb_in, + int room; + struct icmp_bxm icmp_param; + struct rtable *rt = skb_rtable(skb_in); ++ bool apply_ratelimit = false; + struct ipcm_cookie ipc; + struct flowi4 fl4; + __be32 saddr; +@@ -673,7 +682,7 @@ void __icmp_send(struct sk_buff *skb_in, + } + } + +- /* Needed by both icmp_global_allow and icmp_xmit_lock */ ++ /* Needed by both icmpv4_global_allow and icmp_xmit_lock */ + local_bh_disable(); + + /* Check global sysctl_icmp_msgs_per_sec ratelimit, unless +@@ -681,7 +690,7 @@ void __icmp_send(struct sk_buff *skb_in, + * loopback, then peer ratelimit still work (in icmpv4_xrlim_allow) + */ + if (!(skb_in->dev && (skb_in->dev->flags&IFF_LOOPBACK)) && +- !icmpv4_global_allow(net, type, code)) ++ !icmpv4_global_allow(net, type, code, &apply_ratelimit)) + goto out_bh_enable; + + sk = icmp_xmit_lock(net); +@@ -740,7 +749,7 @@ void __icmp_send(struct sk_buff *skb_in, + goto out_unlock; + + /* peer icmp_ratelimit */ +- if (!icmpv4_xrlim_allow(net, rt, &fl4, type, code)) ++ if (!icmpv4_xrlim_allow(net, rt, &fl4, type, code, apply_ratelimit)) + goto ende; + + /* RFC says return as much as we can without exceeding 576 bytes. */ +--- a/net/ipv6/icmp.c ++++ b/net/ipv6/icmp.c +@@ -175,14 +175,16 @@ static bool icmpv6_mask_allow(struct net + return false; + } + +-static bool icmpv6_global_allow(struct net *net, int type) ++static bool icmpv6_global_allow(struct net *net, int type, ++ bool *apply_ratelimit) + { + if (icmpv6_mask_allow(net, type)) + return true; + +- if (icmp_global_allow()) ++ if (icmp_global_allow()) { ++ *apply_ratelimit = true; + return true; +- ++ } + __ICMP_INC_STATS(net, ICMP_MIB_RATELIMITGLOBAL); + return false; + } +@@ -191,13 +193,13 @@ static bool icmpv6_global_allow(struct n + * Check the ICMP output rate limit + */ + static bool icmpv6_xrlim_allow(struct sock *sk, u8 type, +- struct flowi6 *fl6) ++ struct flowi6 *fl6, bool apply_ratelimit) + { + struct net *net = sock_net(sk); + struct dst_entry *dst; + bool res = false; + +- if (icmpv6_mask_allow(net, type)) ++ if (!apply_ratelimit) + return true; + + /* +@@ -228,6 +230,8 @@ static bool icmpv6_xrlim_allow(struct so + if (!res) + __ICMP6_INC_STATS(net, ip6_dst_idev(dst), + ICMP6_MIB_RATELIMITHOST); ++ else ++ icmp_global_consume(); + dst_release(dst); + return res; + } +@@ -454,6 +458,7 @@ void icmp6_send(struct sk_buff *skb, u8 + struct net *net; + struct ipv6_pinfo *np; + const struct in6_addr *saddr = NULL; ++ bool apply_ratelimit = false; + struct dst_entry *dst; + struct icmp6hdr tmp_hdr; + struct flowi6 fl6; +@@ -535,11 +540,12 @@ void icmp6_send(struct sk_buff *skb, u8 + return; + } + +- /* Needed by both icmp_global_allow and icmpv6_xmit_lock */ ++ /* Needed by both icmpv6_global_allow and icmpv6_xmit_lock */ + local_bh_disable(); + + /* Check global sysctl_icmp_msgs_per_sec ratelimit */ +- if (!(skb->dev->flags & IFF_LOOPBACK) && !icmpv6_global_allow(net, type)) ++ if (!(skb->dev->flags & IFF_LOOPBACK) && ++ !icmpv6_global_allow(net, type, &apply_ratelimit)) + goto out_bh_enable; + + mip6_addr_swap(skb, parm); +@@ -577,7 +583,7 @@ void icmp6_send(struct sk_buff *skb, u8 + + np = inet6_sk(sk); + +- if (!icmpv6_xrlim_allow(sk, type, &fl6)) ++ if (!icmpv6_xrlim_allow(sk, type, &fl6, apply_ratelimit)) + goto out; + + tmp_hdr.icmp6_type = type; +@@ -719,6 +725,7 @@ static void icmpv6_echo_reply(struct sk_ + struct ipv6_pinfo *np; + const struct in6_addr *saddr = NULL; + struct icmp6hdr *icmph = icmp6_hdr(skb); ++ bool apply_ratelimit = false; + struct icmp6hdr tmp_hdr; + struct flowi6 fl6; + struct icmpv6_msg msg; +@@ -782,8 +789,9 @@ static void icmpv6_echo_reply(struct sk_ + goto out; + + /* Check the ratelimit */ +- if ((!(skb->dev->flags & IFF_LOOPBACK) && !icmpv6_global_allow(net, ICMPV6_ECHO_REPLY)) || +- !icmpv6_xrlim_allow(sk, ICMPV6_ECHO_REPLY, &fl6)) ++ if ((!(skb->dev->flags & IFF_LOOPBACK) && ++ !icmpv6_global_allow(net, ICMPV6_ECHO_REPLY, &apply_ratelimit)) || ++ !icmpv6_xrlim_allow(sk, ICMPV6_ECHO_REPLY, &fl6, apply_ratelimit)) + goto out_dst_release; + + idev = __in6_dev_get(skb->dev); diff --git a/queue-6.1/series b/queue-6.1/series index 04e2e8f73b0..69519532fff 100644 --- a/queue-6.1/series +++ b/queue-6.1/series @@ -371,3 +371,5 @@ cpuidle-riscv-sbi-use-scoped-device-node-handling-to.patch mm-filemap-return-early-if-failed-to-allocate-memory-for-split.patch lib-xarray-introduce-a-new-helper-xas_get_order.patch mm-filemap-optimize-filemap-folio-adding.patch +icmp-add-counters-for-rate-limits.patch +icmp-change-the-order-of-rate-limits.patch