]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
Fixes for 5.15
authorSasha Levin <sashal@kernel.org>
Tue, 18 Feb 2025 12:30:06 +0000 (07:30 -0500)
committerSasha Levin <sashal@kernel.org>
Tue, 18 Feb 2025 12:31:22 +0000 (07:31 -0500)
Signed-off-by: Sasha Levin <sashal@kernel.org>
22 files changed:
queue-5.15/arp-use-rcu-protection-in-arp_xmit.patch [new file with mode: 0644]
queue-5.15/clocksource-replace-cpumask_weight-with-cpumask_empt.patch [new file with mode: 0644]
queue-5.15/clocksource-use-migrate_disable-to-avoid-calling-get.patch [new file with mode: 0644]
queue-5.15/clocksource-use-pr_info-for-checking-clocksource-syn.patch [new file with mode: 0644]
queue-5.15/ipv4-add-rcu-protection-to-ip4_dst_hoplimit.patch [new file with mode: 0644]
queue-5.15/ipv4-use-rcu-protection-in-__ip_rt_update_pmtu.patch [new file with mode: 0644]
queue-5.15/ipv4-use-rcu-protection-in-inet_select_addr.patch [new file with mode: 0644]
queue-5.15/ipv4-use-rcu-protection-in-rt_is_expired.patch [new file with mode: 0644]
queue-5.15/ipv6-mcast-add-rcu-protection-to-mld_newpack.patch [new file with mode: 0644]
queue-5.15/ipv6-use-rcu-protection-in-ip6_default_advmss.patch [new file with mode: 0644]
queue-5.15/namespaceify-min_pmtu-sysctl.patch [new file with mode: 0644]
queue-5.15/namespaceify-mtu_expires-sysctl.patch [new file with mode: 0644]
queue-5.15/ndisc-extend-rcu-protection-in-ndisc_send_skb.patch [new file with mode: 0644]
queue-5.15/ndisc-use-rcu-protection-in-ndisc_alloc_skb.patch [new file with mode: 0644]
queue-5.15/neighbour-delete-redundant-judgment-statements.patch [new file with mode: 0644]
queue-5.15/neighbour-use-rcu-protection-in-__neigh_notify.patch [new file with mode: 0644]
queue-5.15/net-add-dev_net_rcu-helper.patch [new file with mode: 0644]
queue-5.15/net-ipv4-cache-pmtu-for-all-packet-paths-if-multipat.patch [new file with mode: 0644]
queue-5.15/net-treat-possible_net_t-net-pointer-as-an-rcu-one-a.patch [new file with mode: 0644]
queue-5.15/openvswitch-use-rcu-protection-in-ovs_vport_cmd_fill.patch [new file with mode: 0644]
queue-5.15/selftest-net-test-ipv4-pmtu-exceptions-with-dscp-and.patch [new file with mode: 0644]
queue-5.15/series

diff --git a/queue-5.15/arp-use-rcu-protection-in-arp_xmit.patch b/queue-5.15/arp-use-rcu-protection-in-arp_xmit.patch
new file mode 100644 (file)
index 0000000..9d1d6e9
--- /dev/null
@@ -0,0 +1,45 @@
+From 902fa55813207ab583e8d7e15f0069b1f2c5cfe2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 7 Feb 2025 13:58:36 +0000
+Subject: arp: use RCU protection in arp_xmit()
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit a42b69f692165ec39db42d595f4f65a4c8f42e44 ]
+
+arp_xmit() can be called without RTNL or RCU protection.
+
+Use RCU protection to avoid potential UAF.
+
+Fixes: 29a26a568038 ("netfilter: Pass struct net into the netfilter hooks")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reviewed-by: David Ahern <dsahern@kernel.org>
+Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Link: https://patch.msgid.link/20250207135841.1948589-5-edumazet@google.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/arp.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
+index 8ae9bd6f91c19..6879e0b70c769 100644
+--- a/net/ipv4/arp.c
++++ b/net/ipv4/arp.c
+@@ -637,10 +637,12 @@ static int arp_xmit_finish(struct net *net, struct sock *sk, struct sk_buff *skb
+  */
+ void arp_xmit(struct sk_buff *skb)
+ {
++      rcu_read_lock();
+       /* Send it off, maybe filter it using firewalling first.  */
+       NF_HOOK(NFPROTO_ARP, NF_ARP_OUT,
+-              dev_net(skb->dev), NULL, skb, NULL, skb->dev,
++              dev_net_rcu(skb->dev), NULL, skb, NULL, skb->dev,
+               arp_xmit_finish);
++      rcu_read_unlock();
+ }
+ EXPORT_SYMBOL(arp_xmit);
+-- 
+2.39.5
+
diff --git a/queue-5.15/clocksource-replace-cpumask_weight-with-cpumask_empt.patch b/queue-5.15/clocksource-replace-cpumask_weight-with-cpumask_empt.patch
new file mode 100644 (file)
index 0000000..fb4ae74
--- /dev/null
@@ -0,0 +1,41 @@
+From 2101ff45f34fa447a21bf9d0da8226b68008017c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 10 Feb 2022 14:49:07 -0800
+Subject: clocksource: Replace cpumask_weight() with cpumask_empty()
+
+From: Yury Norov <yury.norov@gmail.com>
+
+[ Upstream commit 8afbcaf8690dac19ebf570a4e4fef9c59c75bf8e ]
+
+clocksource_verify_percpu() calls cpumask_weight() to check if any bit of a
+given cpumask is set.
+
+This can be done more efficiently with cpumask_empty() because
+cpumask_empty() stops traversing the cpumask as soon as it finds first set
+bit, while cpumask_weight() counts all bits unconditionally.
+
+Signed-off-by: Yury Norov <yury.norov@gmail.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Link: https://lore.kernel.org/r/20220210224933.379149-24-yury.norov@gmail.com
+Stable-dep-of: 6bb05a33337b ("clocksource: Use migrate_disable() to avoid calling get_random_u32() in atomic context")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/time/clocksource.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
+index 5aa8eec89e781..ee7e8d0dc182f 100644
+--- a/kernel/time/clocksource.c
++++ b/kernel/time/clocksource.c
+@@ -344,7 +344,7 @@ void clocksource_verify_percpu(struct clocksource *cs)
+       cpus_read_lock();
+       preempt_disable();
+       clocksource_verify_choose_cpus();
+-      if (cpumask_weight(&cpus_chosen) == 0) {
++      if (cpumask_empty(&cpus_chosen)) {
+               preempt_enable();
+               cpus_read_unlock();
+               pr_warn("Not enough CPUs to check clocksource '%s'.\n", cs->name);
+-- 
+2.39.5
+
diff --git a/queue-5.15/clocksource-use-migrate_disable-to-avoid-calling-get.patch b/queue-5.15/clocksource-use-migrate_disable-to-avoid-calling-get.patch
new file mode 100644 (file)
index 0000000..31999bc
--- /dev/null
@@ -0,0 +1,82 @@
+From db03669347973a66724bdbdb90357f70b0c66213 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 31 Jan 2025 12:33:23 -0500
+Subject: clocksource: Use migrate_disable() to avoid calling get_random_u32()
+ in atomic context
+
+From: Waiman Long <longman@redhat.com>
+
+[ Upstream commit 6bb05a33337b2c842373857b63de5c9bf1ae2a09 ]
+
+The following bug report happened with a PREEMPT_RT kernel:
+
+  BUG: sleeping function called from invalid context at kernel/locking/spinlock_rt.c:48
+  in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 2012, name: kwatchdog
+  preempt_count: 1, expected: 0
+  RCU nest depth: 0, expected: 0
+  get_random_u32+0x4f/0x110
+  clocksource_verify_choose_cpus+0xab/0x1a0
+  clocksource_verify_percpu.part.0+0x6b/0x330
+  clocksource_watchdog_kthread+0x193/0x1a0
+
+It is due to the fact that clocksource_verify_choose_cpus() is invoked with
+preemption disabled.  This function invokes get_random_u32() to obtain
+random numbers for choosing CPUs.  The batched_entropy_32 local lock and/or
+the base_crng.lock spinlock in driver/char/random.c will be acquired during
+the call. In PREEMPT_RT kernel, they are both sleeping locks and so cannot
+be acquired in atomic context.
+
+Fix this problem by using migrate_disable() to allow smp_processor_id() to
+be reliably used without introducing atomic context. preempt_disable() is
+then called after clocksource_verify_choose_cpus() but before the
+clocksource measurement is being run to avoid introducing unexpected
+latency.
+
+Fixes: 7560c02bdffb ("clocksource: Check per-CPU clock synchronization when marked unstable")
+Suggested-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Signed-off-by: Waiman Long <longman@redhat.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Paul E. McKenney <paulmck@kernel.org>
+Reviewed-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Link: https://lore.kernel.org/all/20250131173323.891943-2-longman@redhat.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/time/clocksource.c | 6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
+index 8d9b11555f7ef..32efc87c41f20 100644
+--- a/kernel/time/clocksource.c
++++ b/kernel/time/clocksource.c
+@@ -342,10 +342,10 @@ void clocksource_verify_percpu(struct clocksource *cs)
+       cpumask_clear(&cpus_ahead);
+       cpumask_clear(&cpus_behind);
+       cpus_read_lock();
+-      preempt_disable();
++      migrate_disable();
+       clocksource_verify_choose_cpus();
+       if (cpumask_empty(&cpus_chosen)) {
+-              preempt_enable();
++              migrate_enable();
+               cpus_read_unlock();
+               pr_warn("Not enough CPUs to check clocksource '%s'.\n", cs->name);
+               return;
+@@ -353,6 +353,7 @@ void clocksource_verify_percpu(struct clocksource *cs)
+       testcpu = smp_processor_id();
+       pr_info("Checking clocksource %s synchronization from CPU %d to CPUs %*pbl.\n",
+               cs->name, testcpu, cpumask_pr_args(&cpus_chosen));
++      preempt_disable();
+       for_each_cpu(cpu, &cpus_chosen) {
+               if (cpu == testcpu)
+                       continue;
+@@ -372,6 +373,7 @@ void clocksource_verify_percpu(struct clocksource *cs)
+                       cs_nsec_min = cs_nsec;
+       }
+       preempt_enable();
++      migrate_enable();
+       cpus_read_unlock();
+       if (!cpumask_empty(&cpus_ahead))
+               pr_warn("        CPUs %*pbl ahead of CPU %d for clocksource %s.\n",
+-- 
+2.39.5
+
diff --git a/queue-5.15/clocksource-use-pr_info-for-checking-clocksource-syn.patch b/queue-5.15/clocksource-use-pr_info-for-checking-clocksource-syn.patch
new file mode 100644 (file)
index 0000000..033ae23
--- /dev/null
@@ -0,0 +1,45 @@
+From 15afb00ecb1cca0aa44bd46d983692d14de39591 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 24 Jan 2025 20:54:41 -0500
+Subject: clocksource: Use pr_info() for "Checking clocksource synchronization"
+ message
+
+From: Waiman Long <longman@redhat.com>
+
+[ Upstream commit 1f566840a82982141f94086061927a90e79440e5 ]
+
+The "Checking clocksource synchronization" message is normally printed
+when clocksource_verify_percpu() is called for a given clocksource if
+both the CLOCK_SOURCE_UNSTABLE and CLOCK_SOURCE_VERIFY_PERCPU flags
+are set.
+
+It is an informational message and so pr_info() is the correct choice.
+
+Signed-off-by: Waiman Long <longman@redhat.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Paul E. McKenney <paulmck@kernel.org>
+Acked-by: John Stultz <jstultz@google.com>
+Link: https://lore.kernel.org/all/20250125015442.3740588-1-longman@redhat.com
+Stable-dep-of: 6bb05a33337b ("clocksource: Use migrate_disable() to avoid calling get_random_u32() in atomic context")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/time/clocksource.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
+index ee7e8d0dc182f..8d9b11555f7ef 100644
+--- a/kernel/time/clocksource.c
++++ b/kernel/time/clocksource.c
+@@ -351,7 +351,8 @@ void clocksource_verify_percpu(struct clocksource *cs)
+               return;
+       }
+       testcpu = smp_processor_id();
+-      pr_warn("Checking clocksource %s synchronization from CPU %d to CPUs %*pbl.\n", cs->name, testcpu, cpumask_pr_args(&cpus_chosen));
++      pr_info("Checking clocksource %s synchronization from CPU %d to CPUs %*pbl.\n",
++              cs->name, testcpu, cpumask_pr_args(&cpus_chosen));
+       for_each_cpu(cpu, &cpus_chosen) {
+               if (cpu == testcpu)
+                       continue;
+-- 
+2.39.5
+
diff --git a/queue-5.15/ipv4-add-rcu-protection-to-ip4_dst_hoplimit.patch b/queue-5.15/ipv4-add-rcu-protection-to-ip4_dst_hoplimit.patch
new file mode 100644 (file)
index 0000000..aa23f5e
--- /dev/null
@@ -0,0 +1,47 @@
+From d09b054acd19d10f22c938657607112195fec7a4 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 5 Feb 2025 15:51:10 +0000
+Subject: ipv4: add RCU protection to ip4_dst_hoplimit()
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 469308552ca4560176cfc100e7ca84add1bebd7c ]
+
+ip4_dst_hoplimit() must use RCU protection to make
+sure the net structure it reads does not disappear.
+
+Fixes: fa50d974d104 ("ipv4: Namespaceify ip_default_ttl sysctl knob")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Link: https://patch.msgid.link/20250205155120.1676781-3-edumazet@google.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/route.h | 9 +++++++--
+ 1 file changed, 7 insertions(+), 2 deletions(-)
+
+diff --git a/include/net/route.h b/include/net/route.h
+index 30610101ea14f..036e3ee3b856b 100644
+--- a/include/net/route.h
++++ b/include/net/route.h
+@@ -357,10 +357,15 @@ static inline int inet_iif(const struct sk_buff *skb)
+ static inline int ip4_dst_hoplimit(const struct dst_entry *dst)
+ {
+       int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
+-      struct net *net = dev_net(dst->dev);
+-      if (hoplimit == 0)
++      if (hoplimit == 0) {
++              const struct net *net;
++
++              rcu_read_lock();
++              net = dev_net_rcu(dst->dev);
+               hoplimit = READ_ONCE(net->ipv4.sysctl_ip_default_ttl);
++              rcu_read_unlock();
++      }
+       return hoplimit;
+ }
+-- 
+2.39.5
+
diff --git a/queue-5.15/ipv4-use-rcu-protection-in-__ip_rt_update_pmtu.patch b/queue-5.15/ipv4-use-rcu-protection-in-__ip_rt_update_pmtu.patch
new file mode 100644 (file)
index 0000000..0c4a63c
--- /dev/null
@@ -0,0 +1,77 @@
+From 6da58ac499930e5b2c82f1ceab71fc1345efad1b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 5 Feb 2025 15:51:15 +0000
+Subject: ipv4: use RCU protection in __ip_rt_update_pmtu()
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 139512191bd06f1b496117c76372b2ce372c9a41 ]
+
+__ip_rt_update_pmtu() must use RCU protection to make
+sure the net structure it reads does not disappear.
+
+Fixes: 2fbc6e89b2f1 ("ipv4: Update exception handling for multipath routes via same device")
+Fixes: 1de6b15a434c ("Namespaceify min_pmtu sysctl")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Link: https://patch.msgid.link/20250205155120.1676781-8-edumazet@google.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/route.c | 11 ++++++-----
+ 1 file changed, 6 insertions(+), 5 deletions(-)
+
+diff --git a/net/ipv4/route.c b/net/ipv4/route.c
+index 2ae9d2855efab..a4884d434038e 100644
+--- a/net/ipv4/route.c
++++ b/net/ipv4/route.c
+@@ -1023,9 +1023,9 @@ out:     kfree_skb(skb);
+ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu)
+ {
+       struct dst_entry *dst = &rt->dst;
+-      struct net *net = dev_net(dst->dev);
+       struct fib_result res;
+       bool lock = false;
++      struct net *net;
+       u32 old_mtu;
+       if (ip_mtu_locked(dst))
+@@ -1035,6 +1035,8 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu)
+       if (old_mtu < mtu)
+               return;
++      rcu_read_lock();
++      net = dev_net_rcu(dst->dev);
+       if (mtu < net->ipv4.ip_rt_min_pmtu) {
+               lock = true;
+               mtu = min(old_mtu, net->ipv4.ip_rt_min_pmtu);
+@@ -1042,9 +1044,8 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu)
+       if (rt->rt_pmtu == mtu && !lock &&
+           time_before(jiffies, dst->expires - net->ipv4.ip_rt_mtu_expires / 2))
+-              return;
++              goto out;
+-      rcu_read_lock();
+       if (fib_lookup(net, fl4, &res, 0) == 0) {
+               struct fib_nh_common *nhc;
+@@ -1058,14 +1059,14 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu)
+                               update_or_create_fnhe(nhc, fl4->daddr, 0, mtu, lock,
+                                                     jiffies + net->ipv4.ip_rt_mtu_expires);
+                       }
+-                      rcu_read_unlock();
+-                      return;
++                      goto out;
+               }
+ #endif /* CONFIG_IP_ROUTE_MULTIPATH */
+               nhc = FIB_RES_NHC(res);
+               update_or_create_fnhe(nhc, fl4->daddr, 0, mtu, lock,
+                                     jiffies + net->ipv4.ip_rt_mtu_expires);
+       }
++out:
+       rcu_read_unlock();
+ }
+-- 
+2.39.5
+
diff --git a/queue-5.15/ipv4-use-rcu-protection-in-inet_select_addr.patch b/queue-5.15/ipv4-use-rcu-protection-in-inet_select_addr.patch
new file mode 100644 (file)
index 0000000..207828c
--- /dev/null
@@ -0,0 +1,41 @@
+From cbf9ee413880e59c4dfef62141d63e9139a261b0 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 5 Feb 2025 15:51:14 +0000
+Subject: ipv4: use RCU protection in inet_select_addr()
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 719817cd293e4fa389e1f69c396f3f816ed5aa41 ]
+
+inet_select_addr() must use RCU protection to make
+sure the net structure it reads does not disappear.
+
+Fixes: c4544c724322 ("[NETNS]: Process inet_select_addr inside a namespace.")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Link: https://patch.msgid.link/20250205155120.1676781-7-edumazet@google.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/devinet.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
+index dcbc087fff179..33e87b442b475 100644
+--- a/net/ipv4/devinet.c
++++ b/net/ipv4/devinet.c
+@@ -1316,10 +1316,11 @@ __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
+       __be32 addr = 0;
+       unsigned char localnet_scope = RT_SCOPE_HOST;
+       struct in_device *in_dev;
+-      struct net *net = dev_net(dev);
++      struct net *net;
+       int master_idx;
+       rcu_read_lock();
++      net = dev_net_rcu(dev);
+       in_dev = __in_dev_get_rcu(dev);
+       if (!in_dev)
+               goto no_in_dev;
+-- 
+2.39.5
+
diff --git a/queue-5.15/ipv4-use-rcu-protection-in-rt_is_expired.patch b/queue-5.15/ipv4-use-rcu-protection-in-rt_is_expired.patch
new file mode 100644 (file)
index 0000000..2ea7bf3
--- /dev/null
@@ -0,0 +1,44 @@
+From 5374e2891497377d7647fdd20ca025cec0db5ea9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 5 Feb 2025 15:51:13 +0000
+Subject: ipv4: use RCU protection in rt_is_expired()
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit dd205fcc33d92d54eee4d7f21bb073af9bd5ce2b ]
+
+rt_is_expired() must use RCU protection to make
+sure the net structure it reads does not disappear.
+
+Fixes: e84f84f27647 ("netns: place rt_genid into struct net")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Link: https://patch.msgid.link/20250205155120.1676781-6-edumazet@google.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/route.c | 8 +++++++-
+ 1 file changed, 7 insertions(+), 1 deletion(-)
+
+diff --git a/net/ipv4/route.c b/net/ipv4/route.c
+index 3522801885787..3ad78bbd6261b 100644
+--- a/net/ipv4/route.c
++++ b/net/ipv4/route.c
+@@ -400,7 +400,13 @@ static inline int ip_rt_proc_init(void)
+ static inline bool rt_is_expired(const struct rtable *rth)
+ {
+-      return rth->rt_genid != rt_genid_ipv4(dev_net(rth->dst.dev));
++      bool res;
++
++      rcu_read_lock();
++      res = rth->rt_genid != rt_genid_ipv4(dev_net_rcu(rth->dst.dev));
++      rcu_read_unlock();
++
++      return res;
+ }
+ void rt_cache_flush(struct net *net)
+-- 
+2.39.5
+
diff --git a/queue-5.15/ipv6-mcast-add-rcu-protection-to-mld_newpack.patch b/queue-5.15/ipv6-mcast-add-rcu-protection-to-mld_newpack.patch
new file mode 100644 (file)
index 0000000..97a726e
--- /dev/null
@@ -0,0 +1,80 @@
+From 229bb6df37287bb7607c4c8d8a77468874f3a962 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 12 Feb 2025 14:10:21 +0000
+Subject: ipv6: mcast: add RCU protection to mld_newpack()
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit a527750d877fd334de87eef81f1cb5f0f0ca3373 ]
+
+mld_newpack() can be called without RTNL or RCU being held.
+
+Note that we no longer can use sock_alloc_send_skb() because
+ipv6.igmp_sk uses GFP_KERNEL allocations which can sleep.
+
+Instead use alloc_skb() and charge the net->ipv6.igmp_sk
+socket under RCU protection.
+
+Fixes: b8ad0cbc58f7 ("[NETNS][IPV6] mcast - handle several network namespace")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reviewed-by: David Ahern <dsahern@kernel.org>
+Link: https://patch.msgid.link/20250212141021.1663666-1-edumazet@google.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv6/mcast.c | 14 ++++++++++----
+ 1 file changed, 10 insertions(+), 4 deletions(-)
+
+diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
+index 6e5d1ade48a89..1d038a0840994 100644
+--- a/net/ipv6/mcast.c
++++ b/net/ipv6/mcast.c
+@@ -1731,21 +1731,19 @@ static struct sk_buff *mld_newpack(struct inet6_dev *idev, unsigned int mtu)
+       struct net_device *dev = idev->dev;
+       int hlen = LL_RESERVED_SPACE(dev);
+       int tlen = dev->needed_tailroom;
+-      struct net *net = dev_net(dev);
+       const struct in6_addr *saddr;
+       struct in6_addr addr_buf;
+       struct mld2_report *pmr;
+       struct sk_buff *skb;
+       unsigned int size;
+       struct sock *sk;
+-      int err;
++      struct net *net;
+-      sk = net->ipv6.igmp_sk;
+       /* we assume size > sizeof(ra) here
+        * Also try to not allocate high-order pages for big MTU
+        */
+       size = min_t(int, mtu, PAGE_SIZE / 2) + hlen + tlen;
+-      skb = sock_alloc_send_skb(sk, size, 1, &err);
++      skb = alloc_skb(size, GFP_KERNEL);
+       if (!skb)
+               return NULL;
+@@ -1753,6 +1751,12 @@ static struct sk_buff *mld_newpack(struct inet6_dev *idev, unsigned int mtu)
+       skb_reserve(skb, hlen);
+       skb_tailroom_reserve(skb, mtu, tlen);
++      rcu_read_lock();
++
++      net = dev_net_rcu(dev);
++      sk = net->ipv6.igmp_sk;
++      skb_set_owner_w(skb, sk);
++
+       if (ipv6_get_lladdr(dev, &addr_buf, IFA_F_TENTATIVE)) {
+               /* <draft-ietf-magma-mld-source-05.txt>:
+                * use unspecified address as the source address
+@@ -1764,6 +1768,8 @@ static struct sk_buff *mld_newpack(struct inet6_dev *idev, unsigned int mtu)
+       ip6_mc_hdr(sk, skb, dev, saddr, &mld2_all_mcr, NEXTHDR_HOP, 0);
++      rcu_read_unlock();
++
+       skb_put_data(skb, ra, sizeof(ra));
+       skb_set_transport_header(skb, skb_tail_pointer(skb) - skb->data);
+-- 
+2.39.5
+
diff --git a/queue-5.15/ipv6-use-rcu-protection-in-ip6_default_advmss.patch b/queue-5.15/ipv6-use-rcu-protection-in-ip6_default_advmss.patch
new file mode 100644 (file)
index 0000000..ff22d94
--- /dev/null
@@ -0,0 +1,49 @@
+From 15bb8fffed081e8f0bae7e5bfc10813b28c9fafc Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 5 Feb 2025 15:51:18 +0000
+Subject: ipv6: use RCU protection in ip6_default_advmss()
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 3c8ffcd248da34fc41e52a46e51505900115fc2a ]
+
+ip6_default_advmss() needs rcu protection to make
+sure the net structure it reads does not disappear.
+
+Fixes: 5578689a4e3c ("[NETNS][IPV6] route6 - make route6 per namespace")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Link: https://patch.msgid.link/20250205155120.1676781-11-edumazet@google.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv6/route.c | 7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+diff --git a/net/ipv6/route.c b/net/ipv6/route.c
+index b7f494cca3e5c..94526436b91e8 100644
+--- a/net/ipv6/route.c
++++ b/net/ipv6/route.c
+@@ -3184,13 +3184,18 @@ static unsigned int ip6_default_advmss(const struct dst_entry *dst)
+ {
+       struct net_device *dev = dst->dev;
+       unsigned int mtu = dst_mtu(dst);
+-      struct net *net = dev_net(dev);
++      struct net *net;
+       mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
++      rcu_read_lock();
++
++      net = dev_net_rcu(dev);
+       if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
+               mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
++      rcu_read_unlock();
++
+       /*
+        * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
+        * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
+-- 
+2.39.5
+
diff --git a/queue-5.15/namespaceify-min_pmtu-sysctl.patch b/queue-5.15/namespaceify-min_pmtu-sysctl.patch
new file mode 100644 (file)
index 0000000..6219c84
--- /dev/null
@@ -0,0 +1,179 @@
+From aad943d05c2b69b1cef568185aa1787eeec0d8d2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 4 Jan 2022 10:59:34 +0000
+Subject: Namespaceify min_pmtu sysctl
+
+From: xu xin <xu.xin16@zte.com.cn>
+
+[ Upstream commit 1de6b15a434c0068253fea5d719f71143e7e3a79 ]
+
+This patch enables the sysctl min_pmtu to be configured per net
+namespace.
+
+Signed-off-by: xu xin <xu.xin16@zte.com.cn>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Stable-dep-of: 139512191bd0 ("ipv4: use RCU protection in __ip_rt_update_pmtu()")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/netns/ipv4.h |  2 ++
+ net/ipv4/route.c         | 53 ++++++++++++++++++++++++++++------------
+ 2 files changed, 39 insertions(+), 16 deletions(-)
+
+diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
+index d60a10cfc3823..16515c04a46a7 100644
+--- a/include/net/netns/ipv4.h
++++ b/include/net/netns/ipv4.h
+@@ -84,6 +84,8 @@ struct netns_ipv4 {
+       int sysctl_icmp_ratelimit;
+       int sysctl_icmp_ratemask;
++      u32 ip_rt_min_pmtu;
++
+       struct local_ports ip_local_ports;
+       u8 sysctl_tcp_ecn;
+diff --git a/net/ipv4/route.c b/net/ipv4/route.c
+index 3ad78bbd6261b..9a837cd2b925a 100644
+--- a/net/ipv4/route.c
++++ b/net/ipv4/route.c
+@@ -118,6 +118,8 @@
+ #define RT_GC_TIMEOUT (300*HZ)
++#define DEFAULT_MIN_PMTU (512 + 20 + 20)
++
+ static int ip_rt_max_size;
+ static int ip_rt_redirect_number __read_mostly        = 9;
+ static int ip_rt_redirect_load __read_mostly  = HZ / 50;
+@@ -125,7 +127,6 @@ static int ip_rt_redirect_silence __read_mostly    = ((HZ / 50) << (9 + 1));
+ static int ip_rt_error_cost __read_mostly     = HZ;
+ static int ip_rt_error_burst __read_mostly    = 5 * HZ;
+ static int ip_rt_mtu_expires __read_mostly    = 10 * 60 * HZ;
+-static u32 ip_rt_min_pmtu __read_mostly               = 512 + 20 + 20;
+ static int ip_rt_min_advmss __read_mostly     = 256;
+ static int ip_rt_gc_timeout __read_mostly     = RT_GC_TIMEOUT;
+@@ -1034,9 +1035,9 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu)
+       if (old_mtu < mtu)
+               return;
+-      if (mtu < ip_rt_min_pmtu) {
++      if (mtu < net->ipv4.ip_rt_min_pmtu) {
+               lock = true;
+-              mtu = min(old_mtu, ip_rt_min_pmtu);
++              mtu = min(old_mtu, net->ipv4.ip_rt_min_pmtu);
+       }
+       if (rt->rt_pmtu == mtu && !lock &&
+@@ -3578,14 +3579,6 @@ static struct ctl_table ipv4_route_table[] = {
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec_jiffies,
+       },
+-      {
+-              .procname       = "min_pmtu",
+-              .data           = &ip_rt_min_pmtu,
+-              .maxlen         = sizeof(int),
+-              .mode           = 0644,
+-              .proc_handler   = proc_dointvec_minmax,
+-              .extra1         = &ip_min_valid_pmtu,
+-      },
+       {
+               .procname       = "min_adv_mss",
+               .data           = &ip_rt_min_advmss,
+@@ -3598,13 +3591,21 @@ static struct ctl_table ipv4_route_table[] = {
+ static const char ipv4_route_flush_procname[] = "flush";
+-static struct ctl_table ipv4_route_flush_table[] = {
++static struct ctl_table ipv4_route_netns_table[] = {
+       {
+               .procname       = ipv4_route_flush_procname,
+               .maxlen         = sizeof(int),
+               .mode           = 0200,
+               .proc_handler   = ipv4_sysctl_rtcache_flush,
+       },
++      {
++              .procname       = "min_pmtu",
++              .data           = &init_net.ipv4.ip_rt_min_pmtu,
++              .maxlen         = sizeof(int),
++              .mode           = 0644,
++              .proc_handler   = proc_dointvec_minmax,
++              .extra1         = &ip_min_valid_pmtu,
++      },
+       { },
+ };
+@@ -3612,9 +3613,11 @@ static __net_init int sysctl_route_net_init(struct net *net)
+ {
+       struct ctl_table *tbl;
+-      tbl = ipv4_route_flush_table;
++      tbl = ipv4_route_netns_table;
+       if (!net_eq(net, &init_net)) {
+-              tbl = kmemdup(tbl, sizeof(ipv4_route_flush_table), GFP_KERNEL);
++              int i;
++
++              tbl = kmemdup(tbl, sizeof(ipv4_route_netns_table), GFP_KERNEL);
+               if (!tbl)
+                       goto err_dup;
+@@ -3623,6 +3626,12 @@ static __net_init int sysctl_route_net_init(struct net *net)
+                       if (tbl[0].procname != ipv4_route_flush_procname)
+                               tbl[0].procname = NULL;
+               }
++
++              /* Update the variables to point into the current struct net
++               * except for the first element flush
++               */
++              for (i = 1; i < ARRAY_SIZE(ipv4_route_netns_table) - 1; i++)
++                      tbl[i].data += (void *)net - (void *)&init_net;
+       }
+       tbl[0].extra1 = net;
+@@ -3632,7 +3641,7 @@ static __net_init int sysctl_route_net_init(struct net *net)
+       return 0;
+ err_reg:
+-      if (tbl != ipv4_route_flush_table)
++      if (tbl != ipv4_route_netns_table)
+               kfree(tbl);
+ err_dup:
+       return -ENOMEM;
+@@ -3644,7 +3653,7 @@ static __net_exit void sysctl_route_net_exit(struct net *net)
+       tbl = net->ipv4.route_hdr->ctl_table_arg;
+       unregister_net_sysctl_table(net->ipv4.route_hdr);
+-      BUG_ON(tbl == ipv4_route_flush_table);
++      BUG_ON(tbl == ipv4_route_netns_table);
+       kfree(tbl);
+ }
+@@ -3654,6 +3663,17 @@ static __net_initdata struct pernet_operations sysctl_route_ops = {
+ };
+ #endif
++static __net_init int netns_ip_rt_init(struct net *net)
++{
++      /* Set default value for namespaceified sysctls */
++      net->ipv4.ip_rt_min_pmtu = DEFAULT_MIN_PMTU;
++      return 0;
++}
++
++static struct pernet_operations __net_initdata ip_rt_ops = {
++      .init = netns_ip_rt_init,
++};
++
+ static __net_init int rt_genid_init(struct net *net)
+ {
+       atomic_set(&net->ipv4.rt_genid, 0);
+@@ -3759,6 +3779,7 @@ int __init ip_rt_init(void)
+ #ifdef CONFIG_SYSCTL
+       register_pernet_subsys(&sysctl_route_ops);
+ #endif
++      register_pernet_subsys(&ip_rt_ops);
+       register_pernet_subsys(&rt_genid_ops);
+       register_pernet_subsys(&ipv4_inetpeer_ops);
+       return 0;
+-- 
+2.39.5
+
diff --git a/queue-5.15/namespaceify-mtu_expires-sysctl.patch b/queue-5.15/namespaceify-mtu_expires-sysctl.patch
new file mode 100644 (file)
index 0000000..cf8c1df
--- /dev/null
@@ -0,0 +1,110 @@
+From 263bf32bee3c8ef0e309b7b0ef3285ddeec1b7dd Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 4 Jan 2022 10:59:47 +0000
+Subject: Namespaceify mtu_expires sysctl
+
+From: xu xin <xu.xin16@zte.com.cn>
+
+[ Upstream commit 1135fad204805518462c1f0caaca6bcd52ba78cf ]
+
+This patch enables the sysctl mtu_expires to be configured per net
+namespace.
+
+Signed-off-by: xu xin <xu.xin16@zte.com.cn>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Stable-dep-of: 139512191bd0 ("ipv4: use RCU protection in __ip_rt_update_pmtu()")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/netns/ipv4.h |  1 +
+ net/ipv4/route.c         | 21 +++++++++++----------
+ 2 files changed, 12 insertions(+), 10 deletions(-)
+
+diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
+index 16515c04a46a7..8bc0d865338e4 100644
+--- a/include/net/netns/ipv4.h
++++ b/include/net/netns/ipv4.h
+@@ -85,6 +85,7 @@ struct netns_ipv4 {
+       int sysctl_icmp_ratemask;
+       u32 ip_rt_min_pmtu;
++      int ip_rt_mtu_expires;
+       struct local_ports ip_local_ports;
+diff --git a/net/ipv4/route.c b/net/ipv4/route.c
+index 9a837cd2b925a..75c379315ef37 100644
+--- a/net/ipv4/route.c
++++ b/net/ipv4/route.c
+@@ -119,6 +119,7 @@
+ #define RT_GC_TIMEOUT (300*HZ)
+ #define DEFAULT_MIN_PMTU (512 + 20 + 20)
++#define DEFAULT_MTU_EXPIRES (10 * 60 * HZ)
+ static int ip_rt_max_size;
+ static int ip_rt_redirect_number __read_mostly        = 9;
+@@ -126,7 +127,6 @@ static int ip_rt_redirect_load __read_mostly       = HZ / 50;
+ static int ip_rt_redirect_silence __read_mostly       = ((HZ / 50) << (9 + 1));
+ static int ip_rt_error_cost __read_mostly     = HZ;
+ static int ip_rt_error_burst __read_mostly    = 5 * HZ;
+-static int ip_rt_mtu_expires __read_mostly    = 10 * 60 * HZ;
+ static int ip_rt_min_advmss __read_mostly     = 256;
+ static int ip_rt_gc_timeout __read_mostly     = RT_GC_TIMEOUT;
+@@ -1041,7 +1041,7 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu)
+       }
+       if (rt->rt_pmtu == mtu && !lock &&
+-          time_before(jiffies, dst->expires - ip_rt_mtu_expires / 2))
++          time_before(jiffies, dst->expires - net->ipv4.ip_rt_mtu_expires / 2))
+               return;
+       rcu_read_lock();
+@@ -1051,7 +1051,7 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu)
+               fib_select_path(net, &res, fl4, NULL);
+               nhc = FIB_RES_NHC(res);
+               update_or_create_fnhe(nhc, fl4->daddr, 0, mtu, lock,
+-                                    jiffies + ip_rt_mtu_expires);
++                                    jiffies + net->ipv4.ip_rt_mtu_expires);
+       }
+       rcu_read_unlock();
+ }
+@@ -3572,13 +3572,6 @@ static struct ctl_table ipv4_route_table[] = {
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec,
+       },
+-      {
+-              .procname       = "mtu_expires",
+-              .data           = &ip_rt_mtu_expires,
+-              .maxlen         = sizeof(int),
+-              .mode           = 0644,
+-              .proc_handler   = proc_dointvec_jiffies,
+-      },
+       {
+               .procname       = "min_adv_mss",
+               .data           = &ip_rt_min_advmss,
+@@ -3606,6 +3599,13 @@ static struct ctl_table ipv4_route_netns_table[] = {
+               .proc_handler   = proc_dointvec_minmax,
+               .extra1         = &ip_min_valid_pmtu,
+       },
++      {
++              .procname       = "mtu_expires",
++              .data           = &init_net.ipv4.ip_rt_mtu_expires,
++              .maxlen         = sizeof(int),
++              .mode           = 0644,
++              .proc_handler   = proc_dointvec_jiffies,
++      },
+       { },
+ };
+@@ -3667,6 +3667,7 @@ static __net_init int netns_ip_rt_init(struct net *net)
+ {
+       /* Set default value for namespaceified sysctls */
+       net->ipv4.ip_rt_min_pmtu = DEFAULT_MIN_PMTU;
++      net->ipv4.ip_rt_mtu_expires = DEFAULT_MTU_EXPIRES;
+       return 0;
+ }
+-- 
+2.39.5
+
diff --git a/queue-5.15/ndisc-extend-rcu-protection-in-ndisc_send_skb.patch b/queue-5.15/ndisc-extend-rcu-protection-in-ndisc_send_skb.patch
new file mode 100644 (file)
index 0000000..e0bc44f
--- /dev/null
@@ -0,0 +1,72 @@
+From 8a44c27dc8c014a4f3a2fc0db7e7ad5a5a1128e4 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 7 Feb 2025 13:58:39 +0000
+Subject: ndisc: extend RCU protection in ndisc_send_skb()
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit ed6ae1f325d3c43966ec1b62ac1459e2b8e45640 ]
+
+ndisc_send_skb() can be called without RTNL or RCU held.
+
+Acquire rcu_read_lock() earlier, so that we can use dev_net_rcu()
+and avoid a potential UAF.
+
+Fixes: 1762f7e88eb3 ("[NETNS][IPV6] ndisc - make socket control per namespace")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reviewed-by: David Ahern <dsahern@kernel.org>
+Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Link: https://patch.msgid.link/20250207135841.1948589-8-edumazet@google.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv6/ndisc.c | 12 ++++++++----
+ 1 file changed, 8 insertions(+), 4 deletions(-)
+
+diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
+index 3972189c09b14..af584e879467e 100644
+--- a/net/ipv6/ndisc.c
++++ b/net/ipv6/ndisc.c
+@@ -471,16 +471,20 @@ static void ndisc_send_skb(struct sk_buff *skb,
+                          const struct in6_addr *daddr,
+                          const struct in6_addr *saddr)
+ {
++      struct icmp6hdr *icmp6h = icmp6_hdr(skb);
+       struct dst_entry *dst = skb_dst(skb);
+-      struct net *net = dev_net(skb->dev);
+-      struct sock *sk = net->ipv6.ndisc_sk;
+       struct inet6_dev *idev;
++      struct net *net;
++      struct sock *sk;
+       int err;
+-      struct icmp6hdr *icmp6h = icmp6_hdr(skb);
+       u8 type;
+       type = icmp6h->icmp6_type;
++      rcu_read_lock();
++
++      net = dev_net_rcu(skb->dev);
++      sk = net->ipv6.ndisc_sk;
+       if (!dst) {
+               struct flowi6 fl6;
+               int oif = skb->dev->ifindex;
+@@ -488,6 +492,7 @@ static void ndisc_send_skb(struct sk_buff *skb,
+               icmpv6_flow_init(sk, &fl6, type, saddr, daddr, oif);
+               dst = icmp6_dst_alloc(skb->dev, &fl6);
+               if (IS_ERR(dst)) {
++                      rcu_read_unlock();
+                       kfree_skb(skb);
+                       return;
+               }
+@@ -502,7 +507,6 @@ static void ndisc_send_skb(struct sk_buff *skb,
+       ip6_nd_hdr(skb, saddr, daddr, inet6_sk(sk)->hop_limit, skb->len);
+-      rcu_read_lock();
+       idev = __in6_dev_get(dst->dev);
+       IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len);
+-- 
+2.39.5
+
diff --git a/queue-5.15/ndisc-use-rcu-protection-in-ndisc_alloc_skb.patch b/queue-5.15/ndisc-use-rcu-protection-in-ndisc_alloc_skb.patch
new file mode 100644 (file)
index 0000000..192d6b1
--- /dev/null
@@ -0,0 +1,59 @@
+From ac4f3804a967edf91db2bca556b68c2e3c9e98c4 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 7 Feb 2025 13:58:34 +0000
+Subject: ndisc: use RCU protection in ndisc_alloc_skb()
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 628e6d18930bbd21f2d4562228afe27694f66da9 ]
+
+ndisc_alloc_skb() can be called without RTNL or RCU being held.
+
+Add RCU protection to avoid possible UAF.
+
+Fixes: de09334b9326 ("ndisc: Introduce ndisc_alloc_skb() helper.")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reviewed-by: David Ahern <dsahern@kernel.org>
+Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Link: https://patch.msgid.link/20250207135841.1948589-3-edumazet@google.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv6/ndisc.c | 10 ++++------
+ 1 file changed, 4 insertions(+), 6 deletions(-)
+
+diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
+index 63c1420c58249..3972189c09b14 100644
+--- a/net/ipv6/ndisc.c
++++ b/net/ipv6/ndisc.c
+@@ -417,15 +417,11 @@ static struct sk_buff *ndisc_alloc_skb(struct net_device *dev,
+ {
+       int hlen = LL_RESERVED_SPACE(dev);
+       int tlen = dev->needed_tailroom;
+-      struct sock *sk = dev_net(dev)->ipv6.ndisc_sk;
+       struct sk_buff *skb;
+       skb = alloc_skb(hlen + sizeof(struct ipv6hdr) + len + tlen, GFP_ATOMIC);
+-      if (!skb) {
+-              ND_PRINTK(0, err, "ndisc: %s failed to allocate an skb\n",
+-                        __func__);
++      if (!skb)
+               return NULL;
+-      }
+       skb->protocol = htons(ETH_P_IPV6);
+       skb->dev = dev;
+@@ -436,7 +432,9 @@ static struct sk_buff *ndisc_alloc_skb(struct net_device *dev,
+       /* Manually assign socket ownership as we avoid calling
+        * sock_alloc_send_pskb() to bypass wmem buffer limits
+        */
+-      skb_set_owner_w(skb, sk);
++      rcu_read_lock();
++      skb_set_owner_w(skb, dev_net_rcu(dev)->ipv6.ndisc_sk);
++      rcu_read_unlock();
+       return skb;
+ }
+-- 
+2.39.5
+
diff --git a/queue-5.15/neighbour-delete-redundant-judgment-statements.patch b/queue-5.15/neighbour-delete-redundant-judgment-statements.patch
new file mode 100644 (file)
index 0000000..d8c22ea
--- /dev/null
@@ -0,0 +1,40 @@
+From aa8d7d2415da6a2fc8f8f325a237852dbe452ab5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 22 Aug 2024 12:32:45 +0800
+Subject: neighbour: delete redundant judgment statements
+
+From: Li Zetao <lizetao1@huawei.com>
+
+[ Upstream commit c25bdd2ac8cf7da70a226f1a66cdce7af15ff86f ]
+
+The initial value of err is -ENOBUFS, and err is guaranteed to be
+less than 0 before all goto errout. Therefore, on the error path
+of errout, there is no need to repeatedly judge that err is less than 0,
+and delete redundant judgments to make the code more concise.
+
+Signed-off-by: Li Zetao <lizetao1@huawei.com>
+Reviewed-by: Petr Machata <petrm@nvidia.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Stable-dep-of: becbd5850c03 ("neighbour: use RCU protection in __neigh_notify()")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/core/neighbour.c | 3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+diff --git a/net/core/neighbour.c b/net/core/neighbour.c
+index 6f3bd1a4ec8ca..7fffbe0424342 100644
+--- a/net/core/neighbour.c
++++ b/net/core/neighbour.c
+@@ -3387,8 +3387,7 @@ static void __neigh_notify(struct neighbour *n, int type, int flags,
+       rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
+       return;
+ errout:
+-      if (err < 0)
+-              rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
++      rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
+ }
+ void neigh_app_ns(struct neighbour *n)
+-- 
+2.39.5
+
diff --git a/queue-5.15/neighbour-use-rcu-protection-in-__neigh_notify.patch b/queue-5.15/neighbour-use-rcu-protection-in-__neigh_notify.patch
new file mode 100644 (file)
index 0000000..435740b
--- /dev/null
@@ -0,0 +1,58 @@
+From e227247e68d9cf6ebcb96be1999f2051b1127a31 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 7 Feb 2025 13:58:35 +0000
+Subject: neighbour: use RCU protection in __neigh_notify()
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit becbd5850c03ed33b232083dd66c6e38c0c0e569 ]
+
+__neigh_notify() can be called without RTNL or RCU protection.
+
+Use RCU protection to avoid potential UAF.
+
+Fixes: 426b5303eb43 ("[NETNS]: Modify the neighbour table code so it handles multiple network namespaces")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reviewed-by: David Ahern <dsahern@kernel.org>
+Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Link: https://patch.msgid.link/20250207135841.1948589-4-edumazet@google.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/core/neighbour.c | 8 ++++++--
+ 1 file changed, 6 insertions(+), 2 deletions(-)
+
+diff --git a/net/core/neighbour.c b/net/core/neighbour.c
+index 7fffbe0424342..9549738b81842 100644
+--- a/net/core/neighbour.c
++++ b/net/core/neighbour.c
+@@ -3369,10 +3369,12 @@ static const struct seq_operations neigh_stat_seq_ops = {
+ static void __neigh_notify(struct neighbour *n, int type, int flags,
+                          u32 pid)
+ {
+-      struct net *net = dev_net(n->dev);
+       struct sk_buff *skb;
+       int err = -ENOBUFS;
++      struct net *net;
++      rcu_read_lock();
++      net = dev_net_rcu(n->dev);
+       skb = nlmsg_new(neigh_nlmsg_size(), GFP_ATOMIC);
+       if (skb == NULL)
+               goto errout;
+@@ -3385,9 +3387,11 @@ static void __neigh_notify(struct neighbour *n, int type, int flags,
+               goto errout;
+       }
+       rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
+-      return;
++      goto out;
+ errout:
+       rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
++out:
++      rcu_read_unlock();
+ }
+ void neigh_app_ns(struct neighbour *n)
+-- 
+2.39.5
+
diff --git a/queue-5.15/net-add-dev_net_rcu-helper.patch b/queue-5.15/net-add-dev_net_rcu-helper.patch
new file mode 100644 (file)
index 0000000..363d4d8
--- /dev/null
@@ -0,0 +1,62 @@
+From 33e40695198caa179ab1454878b6d2be38fbaa96 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 5 Feb 2025 15:51:09 +0000
+Subject: net: add dev_net_rcu() helper
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 482ad2a4ace2740ca0ff1cbc8f3c7f862f3ab507 ]
+
+dev->nd_net can change, readers should either
+use rcu_read_lock() or RTNL.
+
+We currently use a generic helper, dev_net() with
+no debugging support. We probably have many hidden bugs.
+
+Add dev_net_rcu() helper for callers using rcu_read_lock()
+protection.
+
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Link: https://patch.msgid.link/20250205155120.1676781-2-edumazet@google.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Stable-dep-of: dd205fcc33d9 ("ipv4: use RCU protection in rt_is_expired()")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/netdevice.h   | 6 ++++++
+ include/net/net_namespace.h | 2 +-
+ 2 files changed, 7 insertions(+), 1 deletion(-)
+
+diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
+index 829ebde5d50d5..79b528c128c14 100644
+--- a/include/linux/netdevice.h
++++ b/include/linux/netdevice.h
+@@ -2454,6 +2454,12 @@ struct net *dev_net(const struct net_device *dev)
+       return read_pnet(&dev->nd_net);
+ }
++static inline
++struct net *dev_net_rcu(const struct net_device *dev)
++{
++      return read_pnet_rcu(&dev->nd_net);
++}
++
+ static inline
+ void dev_net_set(struct net_device *dev, struct net *net)
+ {
+diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
+index 0b6bea456fce6..ff9ecc76d622b 100644
+--- a/include/net/net_namespace.h
++++ b/include/net/net_namespace.h
+@@ -336,7 +336,7 @@ static inline struct net *read_pnet(const possible_net_t *pnet)
+ #endif
+ }
+-static inline struct net *read_pnet_rcu(possible_net_t *pnet)
++static inline struct net *read_pnet_rcu(const possible_net_t *pnet)
+ {
+ #ifdef CONFIG_NET_NS
+       return rcu_dereference(pnet->net);
+-- 
+2.39.5
+
diff --git a/queue-5.15/net-ipv4-cache-pmtu-for-all-packet-paths-if-multipat.patch b/queue-5.15/net-ipv4-cache-pmtu-for-all-packet-paths-if-multipat.patch
new file mode 100644 (file)
index 0000000..cb827ca
--- /dev/null
@@ -0,0 +1,292 @@
+From d3df20325474b0793d11f7a7e98a38fdc3988a06 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 8 Nov 2024 09:34:24 +0000
+Subject: net: ipv4: Cache pmtu for all packet paths if multipath enabled
+
+From: Vladimir Vdovin <deliran@verdict.gg>
+
+[ Upstream commit 7d3f3b4367f315a61fc615e3138f3d320da8c466 ]
+
+Check number of paths by fib_info_num_path(),
+and update_or_create_fnhe() for every path.
+Problem is that pmtu is cached only for the oif
+that has received icmp message "need to frag",
+other oifs will still try to use "default" iface mtu.
+
+An example topology showing the problem:
+
+                    |  host1
+                +---------+
+                |  dummy0 | 10.179.20.18/32  mtu9000
+                +---------+
+        +-----------+----------------+
+    +---------+                     +---------+
+    | ens17f0 |  10.179.2.141/31    | ens17f1 |  10.179.2.13/31
+    +---------+                     +---------+
+        |    (all here have mtu 9000)    |
+    +------+                         +------+
+    | ro1  |  10.179.2.140/31        | ro2  |  10.179.2.12/31
+    +------+                         +------+
+        |                                |
+---------+------------+-------------------+------
+                        |
+                    +-----+
+                    | ro3 | 10.10.10.10  mtu1500
+                    +-----+
+                        |
+    ========================================
+                some networks
+    ========================================
+                        |
+                    +-----+
+                    | eth0| 10.10.30.30  mtu9000
+                    +-----+
+                        |  host2
+
+host1 have enabled multipath and
+sysctl net.ipv4.fib_multipath_hash_policy = 1:
+
+default proto static src 10.179.20.18
+        nexthop via 10.179.2.12 dev ens17f1 weight 1
+        nexthop via 10.179.2.140 dev ens17f0 weight 1
+
+When host1 tries to do pmtud from 10.179.20.18/32 to host2,
+host1 receives at ens17f1 iface an icmp packet from ro3 that ro3 mtu=1500.
+And host1 caches it in nexthop exceptions cache.
+
+Problem is that it is cached only for the iface that has received icmp,
+and there is no way that ro3 will send icmp msg to host1 via another path.
+
+Host1 now have this routes to host2:
+
+ip r g 10.10.30.30 sport 30000 dport 443
+10.10.30.30 via 10.179.2.12 dev ens17f1 src 10.179.20.18 uid 0
+    cache expires 521sec mtu 1500
+
+ip r g 10.10.30.30 sport 30033 dport 443
+10.10.30.30 via 10.179.2.140 dev ens17f0 src 10.179.20.18 uid 0
+    cache
+
+So when host1 tries again to reach host2 with mtu>1500,
+if packet flow is lucky enough to be hashed with oif=ens17f1 its ok,
+if oif=ens17f0 it blackholes and still gets icmp msgs from ro3 to ens17f1,
+until lucky day when ro3 will send it through another flow to ens17f0.
+
+Signed-off-by: Vladimir Vdovin <deliran@verdict.gg>
+Reviewed-by: Ido Schimmel <idosch@nvidia.com>
+Link: https://patch.msgid.link/20241108093427.317942-1-deliran@verdict.gg
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Stable-dep-of: 139512191bd0 ("ipv4: use RCU protection in __ip_rt_update_pmtu()")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/route.c                    |  13 ++++
+ tools/testing/selftests/net/pmtu.sh | 112 +++++++++++++++++++++++-----
+ 2 files changed, 108 insertions(+), 17 deletions(-)
+
+diff --git a/net/ipv4/route.c b/net/ipv4/route.c
+index 75c379315ef37..2ae9d2855efab 100644
+--- a/net/ipv4/route.c
++++ b/net/ipv4/route.c
+@@ -1049,6 +1049,19 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu)
+               struct fib_nh_common *nhc;
+               fib_select_path(net, &res, fl4, NULL);
++#ifdef CONFIG_IP_ROUTE_MULTIPATH
++              if (fib_info_num_path(res.fi) > 1) {
++                      int nhsel;
++
++                      for (nhsel = 0; nhsel < fib_info_num_path(res.fi); nhsel++) {
++                              nhc = fib_info_nhc(res.fi, nhsel);
++                              update_or_create_fnhe(nhc, fl4->daddr, 0, mtu, lock,
++                                                    jiffies + net->ipv4.ip_rt_mtu_expires);
++                      }
++                      rcu_read_unlock();
++                      return;
++              }
++#endif /* CONFIG_IP_ROUTE_MULTIPATH */
+               nhc = FIB_RES_NHC(res);
+               update_or_create_fnhe(nhc, fl4->daddr, 0, mtu, lock,
+                                     jiffies + net->ipv4.ip_rt_mtu_expires);
+diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh
+index dbfa56173d291..33f4fb34ac9b2 100755
+--- a/tools/testing/selftests/net/pmtu.sh
++++ b/tools/testing/selftests/net/pmtu.sh
+@@ -197,6 +197,12 @@
+ #
+ # - pmtu_ipv6_route_change
+ #     Same as above but with IPv6
++#
++# - pmtu_ipv4_mp_exceptions
++#     Use the same topology as in pmtu_ipv4, but add routeable addresses
++#     on host A and B on lo reachable via both routers. Host A and B
++#     addresses have multipath routes to each other, b_r1 mtu = 1500.
++#     Check that PMTU exceptions are created for both paths.
+ # Kselftest framework requirement - SKIP code is 4.
+ ksft_skip=4
+@@ -266,7 +272,8 @@ tests="
+       list_flush_ipv4_exception       ipv4: list and flush cached exceptions  1
+       list_flush_ipv6_exception       ipv6: list and flush cached exceptions  1
+       pmtu_ipv4_route_change          ipv4: PMTU exception w/route replace    1
+-      pmtu_ipv6_route_change          ipv6: PMTU exception w/route replace    1"
++      pmtu_ipv6_route_change          ipv6: PMTU exception w/route replace    1
++      pmtu_ipv4_mp_exceptions         ipv4: PMTU multipath nh exceptions      1"
+ NS_A="ns-A"
+ NS_B="ns-B"
+@@ -353,6 +360,9 @@ tunnel6_a_addr="fd00:2::a"
+ tunnel6_b_addr="fd00:2::b"
+ tunnel6_mask="64"
++host4_a_addr="192.168.99.99"
++host4_b_addr="192.168.88.88"
++
+ dummy6_0_prefix="fc00:1000::"
+ dummy6_1_prefix="fc00:1001::"
+ dummy6_mask="64"
+@@ -907,6 +917,52 @@ setup_ovs_bridge() {
+       run_cmd ip route add ${prefix6}:${b_r1}::1 via ${prefix6}:${a_r1}::2
+ }
++setup_multipath_new() {
++      # Set up host A with multipath routes to host B host4_b_addr
++      run_cmd ${ns_a} ip addr add ${host4_a_addr} dev lo
++      run_cmd ${ns_a} ip nexthop add id 401 via ${prefix4}.${a_r1}.2 dev veth_A-R1
++      run_cmd ${ns_a} ip nexthop add id 402 via ${prefix4}.${a_r2}.2 dev veth_A-R2
++      run_cmd ${ns_a} ip nexthop add id 403 group 401/402
++      run_cmd ${ns_a} ip route add ${host4_b_addr} src ${host4_a_addr} nhid 403
++
++      # Set up host B with multipath routes to host A host4_a_addr
++      run_cmd ${ns_b} ip addr add ${host4_b_addr} dev lo
++      run_cmd ${ns_b} ip nexthop add id 401 via ${prefix4}.${b_r1}.2 dev veth_B-R1
++      run_cmd ${ns_b} ip nexthop add id 402 via ${prefix4}.${b_r2}.2 dev veth_B-R2
++      run_cmd ${ns_b} ip nexthop add id 403 group 401/402
++      run_cmd ${ns_b} ip route add ${host4_a_addr} src ${host4_b_addr} nhid 403
++}
++
++setup_multipath_old() {
++      # Set up host A with multipath routes to host B host4_b_addr
++      run_cmd ${ns_a} ip addr add ${host4_a_addr} dev lo
++      run_cmd ${ns_a} ip route add ${host4_b_addr} \
++                      src ${host4_a_addr} \
++                      nexthop via ${prefix4}.${a_r1}.2 weight 1 \
++                      nexthop via ${prefix4}.${a_r2}.2 weight 1
++
++      # Set up host B with multipath routes to host A host4_a_addr
++      run_cmd ${ns_b} ip addr add ${host4_b_addr} dev lo
++      run_cmd ${ns_b} ip route add ${host4_a_addr} \
++                      src ${host4_b_addr} \
++                      nexthop via ${prefix4}.${b_r1}.2 weight 1 \
++                      nexthop via ${prefix4}.${b_r2}.2 weight 1
++}
++
++setup_multipath() {
++      if [ "$USE_NH" = "yes" ]; then
++              setup_multipath_new
++      else
++              setup_multipath_old
++      fi
++
++      # Set up routers with routes to dummies
++      run_cmd ${ns_r1} ip route add ${host4_a_addr} via ${prefix4}.${a_r1}.1
++      run_cmd ${ns_r2} ip route add ${host4_a_addr} via ${prefix4}.${a_r2}.1
++      run_cmd ${ns_r1} ip route add ${host4_b_addr} via ${prefix4}.${b_r1}.1
++      run_cmd ${ns_r2} ip route add ${host4_b_addr} via ${prefix4}.${b_r2}.1
++}
++
+ setup() {
+       [ "$(id -u)" -ne 0 ] && echo "  need to run as root" && return $ksft_skip
+@@ -988,23 +1044,15 @@ link_get_mtu() {
+ }
+ route_get_dst_exception() {
+-      ns_cmd="${1}"
+-      dst="${2}"
+-      dsfield="${3}"
++      ns_cmd="${1}"; shift
+-      if [ -z "${dsfield}" ]; then
+-              dsfield=0
+-      fi
+-
+-      ${ns_cmd} ip route get "${dst}" dsfield "${dsfield}"
++      ${ns_cmd} ip route get "$@"
+ }
+ route_get_dst_pmtu_from_exception() {
+-      ns_cmd="${1}"
+-      dst="${2}"
+-      dsfield="${3}"
++      ns_cmd="${1}"; shift
+-      mtu_parse "$(route_get_dst_exception "${ns_cmd}" "${dst}" "${dsfield}")"
++      mtu_parse "$(route_get_dst_exception "${ns_cmd}" "$@")"
+ }
+ check_pmtu_value() {
+@@ -1147,10 +1195,10 @@ test_pmtu_ipv4_dscp_icmp_exception() {
+       run_cmd "${ns_a}" ping -q -M want -Q "${dsfield}" -c 1 -w 1 -s "${len}" "${dst2}"
+       # Check that exceptions have been created with the correct PMTU
+-      pmtu_1="$(route_get_dst_pmtu_from_exception "${ns_a}" "${dst1}" "${policy_mark}")"
++      pmtu_1="$(route_get_dst_pmtu_from_exception "${ns_a}" "${dst1}" dsfield "${policy_mark}")"
+       check_pmtu_value "1400" "${pmtu_1}" "exceeding MTU" || return 1
+-      pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" "${dst2}" "${policy_mark}")"
++      pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" "${dst2}" dsfield "${policy_mark}")"
+       check_pmtu_value "1500" "${pmtu_2}" "exceeding MTU" || return 1
+ }
+@@ -1197,9 +1245,9 @@ test_pmtu_ipv4_dscp_udp_exception() {
+               UDP:"${dst2}":50000,tos="${dsfield}"
+       # Check that exceptions have been created with the correct PMTU
+-      pmtu_1="$(route_get_dst_pmtu_from_exception "${ns_a}" "${dst1}" "${policy_mark}")"
++      pmtu_1="$(route_get_dst_pmtu_from_exception "${ns_a}" "${dst1}" dsfield "${policy_mark}")"
+       check_pmtu_value "1400" "${pmtu_1}" "exceeding MTU" || return 1
+-      pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" "${dst2}" "${policy_mark}")"
++      pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" "${dst2}" dsfield "${policy_mark}")"
+       check_pmtu_value "1500" "${pmtu_2}" "exceeding MTU" || return 1
+ }
+@@ -2205,6 +2253,36 @@ test_pmtu_ipv6_route_change() {
+       test_pmtu_ipvX_route_change 6
+ }
++test_pmtu_ipv4_mp_exceptions() {
++      setup namespaces routing multipath || return $ksft_skip
++
++      trace "${ns_a}"  veth_A-R1    "${ns_r1}" veth_R1-A \
++            "${ns_r1}" veth_R1-B    "${ns_b}"  veth_B-R1 \
++            "${ns_a}"  veth_A-R2    "${ns_r2}" veth_R2-A \
++            "${ns_r2}" veth_R2-B    "${ns_b}"  veth_B-R2
++
++      # Set up initial MTU values
++      mtu "${ns_a}"  veth_A-R1 2000
++      mtu "${ns_r1}" veth_R1-A 2000
++      mtu "${ns_r1}" veth_R1-B 1500
++      mtu "${ns_b}"  veth_B-R1 1500
++
++      mtu "${ns_a}"  veth_A-R2 2000
++      mtu "${ns_r2}" veth_R2-A 2000
++      mtu "${ns_r2}" veth_R2-B 1500
++      mtu "${ns_b}"  veth_B-R2 1500
++
++      # Ping and expect two nexthop exceptions for two routes
++      run_cmd ${ns_a} ping -q -M want -i 0.1 -c 1 -s 1800 "${host4_b_addr}"
++
++      # Check that exceptions have been created with the correct PMTU
++      pmtu_a_R1="$(route_get_dst_pmtu_from_exception "${ns_a}" "${host4_b_addr}" oif veth_A-R1)"
++      pmtu_a_R2="$(route_get_dst_pmtu_from_exception "${ns_a}" "${host4_b_addr}" oif veth_A-R2)"
++
++      check_pmtu_value "1500" "${pmtu_a_R1}" "exceeding MTU (veth_A-R1)" || return 1
++      check_pmtu_value "1500" "${pmtu_a_R2}" "exceeding MTU (veth_A-R2)" || return 1
++}
++
+ usage() {
+       echo
+       echo "$0 [OPTIONS] [TEST]..."
+-- 
+2.39.5
+
diff --git a/queue-5.15/net-treat-possible_net_t-net-pointer-as-an-rcu-one-a.patch b/queue-5.15/net-treat-possible_net_t-net-pointer-as-an-rcu-one-a.patch
new file mode 100644 (file)
index 0000000..d909b2f
--- /dev/null
@@ -0,0 +1,65 @@
+From 8a95e86353f62029da9cd4e760bfc558b5979cd1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 13 Oct 2023 14:10:23 +0200
+Subject: net: treat possible_net_t net pointer as an RCU one and add
+ read_pnet_rcu()
+
+From: Jiri Pirko <jiri@nvidia.com>
+
+[ Upstream commit 2034d90ae41ae93e30d492ebcf1f06f97a9cfba6 ]
+
+Make the net pointer stored in possible_net_t structure annotated as
+an RCU pointer. Change the access helpers to treat it as such.
+Introduce read_pnet_rcu() helper to allow caller to dereference
+the net pointer under RCU read lock.
+
+Signed-off-by: Jiri Pirko <jiri@nvidia.com>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Stable-dep-of: dd205fcc33d9 ("ipv4: use RCU protection in rt_is_expired()")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/net_namespace.h | 15 ++++++++++++---
+ 1 file changed, 12 insertions(+), 3 deletions(-)
+
+diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
+index d184b832166b6..0b6bea456fce6 100644
+--- a/include/net/net_namespace.h
++++ b/include/net/net_namespace.h
+@@ -316,21 +316,30 @@ static inline int check_net(const struct net *net)
+ typedef struct {
+ #ifdef CONFIG_NET_NS
+-      struct net *net;
++      struct net __rcu *net;
+ #endif
+ } possible_net_t;
+ static inline void write_pnet(possible_net_t *pnet, struct net *net)
+ {
+ #ifdef CONFIG_NET_NS
+-      pnet->net = net;
++      rcu_assign_pointer(pnet->net, net);
+ #endif
+ }
+ static inline struct net *read_pnet(const possible_net_t *pnet)
+ {
+ #ifdef CONFIG_NET_NS
+-      return pnet->net;
++      return rcu_dereference_protected(pnet->net, true);
++#else
++      return &init_net;
++#endif
++}
++
++static inline struct net *read_pnet_rcu(possible_net_t *pnet)
++{
++#ifdef CONFIG_NET_NS
++      return rcu_dereference(pnet->net);
+ #else
+       return &init_net;
+ #endif
+-- 
+2.39.5
+
diff --git a/queue-5.15/openvswitch-use-rcu-protection-in-ovs_vport_cmd_fill.patch b/queue-5.15/openvswitch-use-rcu-protection-in-ovs_vport_cmd_fill.patch
new file mode 100644 (file)
index 0000000..53ac7ae
--- /dev/null
@@ -0,0 +1,66 @@
+From 0d74272a163a95a6bdb5c307a5bdfde008268843 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 7 Feb 2025 13:58:37 +0000
+Subject: openvswitch: use RCU protection in ovs_vport_cmd_fill_info()
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 90b2f49a502fa71090d9f4fe29a2f51fe5dff76d ]
+
+ovs_vport_cmd_fill_info() can be called without RTNL or RCU.
+
+Use RCU protection and dev_net_rcu() to avoid potential UAF.
+
+Fixes: 9354d4520342 ("openvswitch: reliable interface indentification in port dumps")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Link: https://patch.msgid.link/20250207135841.1948589-6-edumazet@google.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/openvswitch/datapath.c | 12 +++++++++---
+ 1 file changed, 9 insertions(+), 3 deletions(-)
+
+diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
+index 0fc98e89a1149..c28b56c309169 100644
+--- a/net/openvswitch/datapath.c
++++ b/net/openvswitch/datapath.c
+@@ -2058,6 +2058,7 @@ static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
+ {
+       struct ovs_header *ovs_header;
+       struct ovs_vport_stats vport_stats;
++      struct net *net_vport;
+       int err;
+       ovs_header = genlmsg_put(skb, portid, seq, &dp_vport_genl_family,
+@@ -2074,12 +2075,15 @@ static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
+           nla_put_u32(skb, OVS_VPORT_ATTR_IFINDEX, vport->dev->ifindex))
+               goto nla_put_failure;
+-      if (!net_eq(net, dev_net(vport->dev))) {
+-              int id = peernet2id_alloc(net, dev_net(vport->dev), gfp);
++      rcu_read_lock();
++      net_vport = dev_net_rcu(vport->dev);
++      if (!net_eq(net, net_vport)) {
++              int id = peernet2id_alloc(net, net_vport, GFP_ATOMIC);
+               if (nla_put_s32(skb, OVS_VPORT_ATTR_NETNSID, id))
+-                      goto nla_put_failure;
++                      goto nla_put_failure_unlock;
+       }
++      rcu_read_unlock();
+       ovs_vport_get_stats(vport, &vport_stats);
+       if (nla_put_64bit(skb, OVS_VPORT_ATTR_STATS,
+@@ -2097,6 +2101,8 @@ static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
+       genlmsg_end(skb, ovs_header);
+       return 0;
++nla_put_failure_unlock:
++      rcu_read_unlock();
+ nla_put_failure:
+       err = -EMSGSIZE;
+ error:
+-- 
+2.39.5
+
diff --git a/queue-5.15/selftest-net-test-ipv4-pmtu-exceptions-with-dscp-and.patch b/queue-5.15/selftest-net-test-ipv4-pmtu-exceptions-with-dscp-and.patch
new file mode 100644 (file)
index 0000000..da74eb7
--- /dev/null
@@ -0,0 +1,273 @@
+From 890ab1a64aa1bfe3679db9f4fc6234fd41248d8b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 17 Mar 2022 13:45:11 +0100
+Subject: selftest: net: Test IPv4 PMTU exceptions with DSCP and ECN
+
+From: Guillaume Nault <gnault@redhat.com>
+
+[ Upstream commit ec730c3e1f0e3a80612a9be2beb00e2b4f93fe70 ]
+
+Add two tests to pmtu.sh, for verifying that PMTU exceptions get
+properly created for routes that don't belong to the main table.
+
+A fib-rule based on the packet's DSCP field is used to jump to the
+correct table. ECN shouldn't interfere with this process, so each test
+has two components: one that only sets DSCP and one that sets both DSCP
+and ECN.
+
+One of the test triggers PMTU exceptions using ICMP Echo Requests, the
+other using UDP packets (to test different handlers in the kernel).
+
+A few adjustments are necessary in the rest of the script to allow
+policy routing scenarios:
+
+  * Add global variable rt_table that allows setup_routing_*() to
+    add routes to a specific routing table. By default rt_table is set
+    to "main", so existing tests don't need to be modified.
+
+  * Another global variable, policy_mark, is used to define which
+    dsfield value is used for policy routing. This variable has no
+    effect on tests that don't use policy routing.
+
+  * The UDP version of the test uses socat. So cleanup() now also need
+    to kill socat PIDs.
+
+  * route_get_dst_pmtu_from_exception() and route_get_dst_exception()
+    now take an optional third argument specifying the dsfield. If
+    not specified, 0 is used, so existing users don't need to be
+    modified.
+
+Signed-off-by: Guillaume Nault <gnault@redhat.com>
+Reviewed-by: David Ahern <dsahern@kernel.org>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Stable-dep-of: 139512191bd0 ("ipv4: use RCU protection in __ip_rt_update_pmtu()")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/testing/selftests/net/pmtu.sh | 141 +++++++++++++++++++++++++++-
+ 1 file changed, 137 insertions(+), 4 deletions(-)
+
+diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh
+index 84c05e533056d..dbfa56173d291 100755
+--- a/tools/testing/selftests/net/pmtu.sh
++++ b/tools/testing/selftests/net/pmtu.sh
+@@ -26,6 +26,15 @@
+ # - pmtu_ipv6
+ #     Same as pmtu_ipv4, except for locked PMTU tests, using IPv6
+ #
++# - pmtu_ipv4_dscp_icmp_exception
++#     Set up the same network topology as pmtu_ipv4, but use non-default
++#     routing table in A. A fib-rule is used to jump to this routing table
++#     based on DSCP. Send ICMPv4 packets with the expected DSCP value and
++#     verify that ECN doesn't interfere with the creation of PMTU exceptions.
++#
++# - pmtu_ipv4_dscp_udp_exception
++#     Same as pmtu_ipv4_dscp_icmp_exception, but use UDP instead of ICMP.
++#
+ # - pmtu_ipv4_vxlan4_exception
+ #     Set up the same network topology as pmtu_ipv4, create a VXLAN tunnel
+ #     over IPv4 between A and B, routed via R1. On the link between R1 and B,
+@@ -203,6 +212,8 @@ which ping6 > /dev/null 2>&1 && ping6=$(which ping6) || ping6=$(which ping)
+ tests="
+       pmtu_ipv4_exception             ipv4: PMTU exceptions                   1
+       pmtu_ipv6_exception             ipv6: PMTU exceptions                   1
++      pmtu_ipv4_dscp_icmp_exception   ICMPv4 with DSCP and ECN: PMTU exceptions       1
++      pmtu_ipv4_dscp_udp_exception    UDPv4 with DSCP and ECN: PMTU exceptions        1
+       pmtu_ipv4_vxlan4_exception      IPv4 over vxlan4: PMTU exceptions       1
+       pmtu_ipv6_vxlan4_exception      IPv6 over vxlan4: PMTU exceptions       1
+       pmtu_ipv4_vxlan6_exception      IPv4 over vxlan6: PMTU exceptions       1
+@@ -323,6 +334,9 @@ routes_nh="
+       B       6       default                 61
+ "
++policy_mark=0x04
++rt_table=main
++
+ veth4_a_addr="192.168.1.1"
+ veth4_b_addr="192.168.1.2"
+ veth4_c_addr="192.168.2.10"
+@@ -346,6 +360,7 @@ dummy6_mask="64"
+ err_buf=
+ tcpdump_pids=
+ nettest_pids=
++socat_pids=
+ err() {
+       err_buf="${err_buf}${1}
+@@ -725,7 +740,7 @@ setup_routing_old() {
+               ns_name="$(nsname ${ns})"
+-              ip -n ${ns_name} route add ${addr} via ${gw}
++              ip -n "${ns_name}" route add "${addr}" table "${rt_table}" via "${gw}"
+               ns=""; addr=""; gw=""
+       done
+@@ -755,7 +770,7 @@ setup_routing_new() {
+               ns_name="$(nsname ${ns})"
+-              ip -n ${ns_name} -${fam} route add ${addr} nhid ${nhid}
++              ip -n "${ns_name}" -"${fam}" route add "${addr}" table "${rt_table}" nhid "${nhid}"
+               ns=""; fam=""; addr=""; nhid=""
+       done
+@@ -800,6 +815,24 @@ setup_routing() {
+       return 0
+ }
++setup_policy_routing() {
++      setup_routing
++
++      ip -netns "${NS_A}" -4 rule add dsfield "${policy_mark}" \
++              table "${rt_table}"
++
++      # Set the IPv4 Don't Fragment bit with tc, since socat doesn't seem to
++      # have an option do to it.
++      tc -netns "${NS_A}" qdisc replace dev veth_A-R1 root prio
++      tc -netns "${NS_A}" qdisc replace dev veth_A-R2 root prio
++      tc -netns "${NS_A}" filter add dev veth_A-R1                      \
++              protocol ipv4 flower ip_proto udp                         \
++              action pedit ex munge ip df set 0x40 pipe csum ip and udp
++      tc -netns "${NS_A}" filter add dev veth_A-R2                      \
++              protocol ipv4 flower ip_proto udp                         \
++              action pedit ex munge ip df set 0x40 pipe csum ip and udp
++}
++
+ setup_bridge() {
+       run_cmd ${ns_a} ip link add br0 type bridge || return $ksft_skip
+       run_cmd ${ns_a} ip link set br0 up
+@@ -905,6 +938,11 @@ cleanup() {
+       done
+       nettest_pids=
++      for pid in ${socat_pids}; do
++              kill "${pid}"
++      done
++      socat_pids=
++
+       for n in ${NS_A} ${NS_B} ${NS_C} ${NS_R1} ${NS_R2}; do
+               ip netns del ${n} 2> /dev/null
+       done
+@@ -952,15 +990,21 @@ link_get_mtu() {
+ route_get_dst_exception() {
+       ns_cmd="${1}"
+       dst="${2}"
++      dsfield="${3}"
+-      ${ns_cmd} ip route get "${dst}"
++      if [ -z "${dsfield}" ]; then
++              dsfield=0
++      fi
++
++      ${ns_cmd} ip route get "${dst}" dsfield "${dsfield}"
+ }
+ route_get_dst_pmtu_from_exception() {
+       ns_cmd="${1}"
+       dst="${2}"
++      dsfield="${3}"
+-      mtu_parse "$(route_get_dst_exception "${ns_cmd}" ${dst})"
++      mtu_parse "$(route_get_dst_exception "${ns_cmd}" "${dst}" "${dsfield}")"
+ }
+ check_pmtu_value() {
+@@ -1070,6 +1114,95 @@ test_pmtu_ipv6_exception() {
+       test_pmtu_ipvX 6
+ }
++test_pmtu_ipv4_dscp_icmp_exception() {
++      rt_table=100
++
++      setup namespaces policy_routing || return $ksft_skip
++      trace "${ns_a}"  veth_A-R1    "${ns_r1}" veth_R1-A \
++            "${ns_r1}" veth_R1-B    "${ns_b}"  veth_B-R1 \
++            "${ns_a}"  veth_A-R2    "${ns_r2}" veth_R2-A \
++            "${ns_r2}" veth_R2-B    "${ns_b}"  veth_B-R2
++
++      # Set up initial MTU values
++      mtu "${ns_a}"  veth_A-R1 2000
++      mtu "${ns_r1}" veth_R1-A 2000
++      mtu "${ns_r1}" veth_R1-B 1400
++      mtu "${ns_b}"  veth_B-R1 1400
++
++      mtu "${ns_a}"  veth_A-R2 2000
++      mtu "${ns_r2}" veth_R2-A 2000
++      mtu "${ns_r2}" veth_R2-B 1500
++      mtu "${ns_b}"  veth_B-R2 1500
++
++      len=$((2000 - 20 - 8)) # Fills MTU of veth_A-R1
++
++      dst1="${prefix4}.${b_r1}.1"
++      dst2="${prefix4}.${b_r2}.1"
++
++      # Create route exceptions
++      dsfield=${policy_mark} # No ECN bit set (Not-ECT)
++      run_cmd "${ns_a}" ping -q -M want -Q "${dsfield}" -c 1 -w 1 -s "${len}" "${dst1}"
++
++      dsfield=$(printf "%#x" $((policy_mark + 0x02))) # ECN=2 (ECT(0))
++      run_cmd "${ns_a}" ping -q -M want -Q "${dsfield}" -c 1 -w 1 -s "${len}" "${dst2}"
++
++      # Check that exceptions have been created with the correct PMTU
++      pmtu_1="$(route_get_dst_pmtu_from_exception "${ns_a}" "${dst1}" "${policy_mark}")"
++      check_pmtu_value "1400" "${pmtu_1}" "exceeding MTU" || return 1
++
++      pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" "${dst2}" "${policy_mark}")"
++      check_pmtu_value "1500" "${pmtu_2}" "exceeding MTU" || return 1
++}
++
++test_pmtu_ipv4_dscp_udp_exception() {
++      rt_table=100
++
++      if ! which socat > /dev/null 2>&1; then
++              echo "'socat' command not found; skipping tests"
++              return $ksft_skip
++      fi
++
++      setup namespaces policy_routing || return $ksft_skip
++      trace "${ns_a}"  veth_A-R1    "${ns_r1}" veth_R1-A \
++            "${ns_r1}" veth_R1-B    "${ns_b}"  veth_B-R1 \
++            "${ns_a}"  veth_A-R2    "${ns_r2}" veth_R2-A \
++            "${ns_r2}" veth_R2-B    "${ns_b}"  veth_B-R2
++
++      # Set up initial MTU values
++      mtu "${ns_a}"  veth_A-R1 2000
++      mtu "${ns_r1}" veth_R1-A 2000
++      mtu "${ns_r1}" veth_R1-B 1400
++      mtu "${ns_b}"  veth_B-R1 1400
++
++      mtu "${ns_a}"  veth_A-R2 2000
++      mtu "${ns_r2}" veth_R2-A 2000
++      mtu "${ns_r2}" veth_R2-B 1500
++      mtu "${ns_b}"  veth_B-R2 1500
++
++      len=$((2000 - 20 - 8)) # Fills MTU of veth_A-R1
++
++      dst1="${prefix4}.${b_r1}.1"
++      dst2="${prefix4}.${b_r2}.1"
++
++      # Create route exceptions
++      run_cmd_bg "${ns_b}" socat UDP-LISTEN:50000 OPEN:/dev/null,wronly=1
++      socat_pids="${socat_pids} $!"
++
++      dsfield=${policy_mark} # No ECN bit set (Not-ECT)
++      run_cmd "${ns_a}" socat OPEN:/dev/zero,rdonly=1,readbytes="${len}" \
++              UDP:"${dst1}":50000,tos="${dsfield}"
++
++      dsfield=$(printf "%#x" $((policy_mark + 0x02))) # ECN=2 (ECT(0))
++      run_cmd "${ns_a}" socat OPEN:/dev/zero,rdonly=1,readbytes="${len}" \
++              UDP:"${dst2}":50000,tos="${dsfield}"
++
++      # Check that exceptions have been created with the correct PMTU
++      pmtu_1="$(route_get_dst_pmtu_from_exception "${ns_a}" "${dst1}" "${policy_mark}")"
++      check_pmtu_value "1400" "${pmtu_1}" "exceeding MTU" || return 1
++      pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" "${dst2}" "${policy_mark}")"
++      check_pmtu_value "1500" "${pmtu_2}" "exceeding MTU" || return 1
++}
++
+ test_pmtu_ipvX_over_vxlanY_or_geneveY_exception() {
+       type=${1}
+       family=${2}
+-- 
+2.39.5
+
index b19800f48facb2e65e251c07fed8abc0bc2375a3..2ad3284b4240801d47775eb9e519cfc76c4e36dd 100644 (file)
@@ -386,3 +386,24 @@ serial-8250-fix-fifo-underflow-on-flush.patch
 alpha-align-stack-for-page-fault-and-user-unaligned-trap-handlers.patch
 gpio-stmpe-check-return-value-of-stmpe_reg_read-in-stmpe_gpio_irq_sync_unlock.patch
 partitions-mac-fix-handling-of-bogus-partition-table.patch
+clocksource-replace-cpumask_weight-with-cpumask_empt.patch
+clocksource-use-pr_info-for-checking-clocksource-syn.patch
+clocksource-use-migrate_disable-to-avoid-calling-get.patch
+ipv4-add-rcu-protection-to-ip4_dst_hoplimit.patch
+net-treat-possible_net_t-net-pointer-as-an-rcu-one-a.patch
+net-add-dev_net_rcu-helper.patch
+ipv4-use-rcu-protection-in-rt_is_expired.patch
+ipv4-use-rcu-protection-in-inet_select_addr.patch
+namespaceify-min_pmtu-sysctl.patch
+namespaceify-mtu_expires-sysctl.patch
+selftest-net-test-ipv4-pmtu-exceptions-with-dscp-and.patch
+net-ipv4-cache-pmtu-for-all-packet-paths-if-multipat.patch
+ipv4-use-rcu-protection-in-__ip_rt_update_pmtu.patch
+ipv6-use-rcu-protection-in-ip6_default_advmss.patch
+ndisc-use-rcu-protection-in-ndisc_alloc_skb.patch
+neighbour-delete-redundant-judgment-statements.patch
+neighbour-use-rcu-protection-in-__neigh_notify.patch
+arp-use-rcu-protection-in-arp_xmit.patch
+openvswitch-use-rcu-protection-in-ovs_vport_cmd_fill.patch
+ndisc-extend-rcu-protection-in-ndisc_send_skb.patch
+ipv6-mcast-add-rcu-protection-to-mld_newpack.patch