--- /dev/null
+From 92f1655aa2b2294d0b49925f3b875a634bd3b59e Mon Sep 17 00:00:00 2001
+From: Eric Dumazet <edumazet@google.com>
+Date: Tue, 28 May 2024 11:43:53 +0000
+Subject: net: fix __dst_negative_advice() race
+
+From: Eric Dumazet <edumazet@google.com>
+
+commit 92f1655aa2b2294d0b49925f3b875a634bd3b59e upstream.
+
+__dst_negative_advice() does not enforce proper RCU rules when
+sk->dst_cache must be cleared, leading to possible UAF.
+
+RCU rules are that we must first clear sk->sk_dst_cache,
+then call dst_release(old_dst).
+
+Note that sk_dst_reset(sk) is implementing this protocol correctly,
+while __dst_negative_advice() uses the wrong order.
+
+Given that ip6_negative_advice() has special logic
+against RTF_CACHE, this means each of the three ->negative_advice()
+existing methods must perform the sk_dst_reset() themselves.
+
+Note the check against NULL dst is centralized in
+__dst_negative_advice(), there is no need to duplicate
+it in various callbacks.
+
+Many thanks to Clement Lecigne for tracking this issue.
+
+This old bug became visible after the blamed commit, using UDP sockets.
+
+Fixes: a87cb3e48ee8 ("net: Facility to report route quality of connected sockets")
+Reported-by: Clement Lecigne <clecigne@google.com>
+Diagnosed-by: Clement Lecigne <clecigne@google.com>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: Tom Herbert <tom@herbertland.com>
+Reviewed-by: David Ahern <dsahern@kernel.org>
+Link: https://lore.kernel.org/r/20240528114353.1794151-1-edumazet@google.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+[Lee: Stable backport]
+Signed-off-by: Lee Jones <lee@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/dst_ops.h | 2 +-
+ include/net/sock.h | 13 +++----------
+ net/ipv4/route.c | 22 ++++++++--------------
+ net/ipv6/route.c | 29 +++++++++++++++--------------
+ net/xfrm/xfrm_policy.c | 11 +++--------
+ 5 files changed, 30 insertions(+), 47 deletions(-)
+
+diff --git a/include/net/dst_ops.h b/include/net/dst_ops.h
+index 632086b2f644..3ae2fda29507 100644
+--- a/include/net/dst_ops.h
++++ b/include/net/dst_ops.h
+@@ -24,7 +24,7 @@ struct dst_ops {
+ void (*destroy)(struct dst_entry *);
+ void (*ifdown)(struct dst_entry *,
+ struct net_device *dev, int how);
+- struct dst_entry * (*negative_advice)(struct dst_entry *);
++ void (*negative_advice)(struct sock *sk, struct dst_entry *);
+ void (*link_failure)(struct sk_buff *);
+ void (*update_pmtu)(struct dst_entry *dst, struct sock *sk,
+ struct sk_buff *skb, u32 mtu,
+diff --git a/include/net/sock.h b/include/net/sock.h
+index 8d592df7251f..250d5a6c508c 100644
+--- a/include/net/sock.h
++++ b/include/net/sock.h
+@@ -1938,19 +1938,12 @@ sk_dst_get(struct sock *sk)
+
+ static inline void dst_negative_advice(struct sock *sk)
+ {
+- struct dst_entry *ndst, *dst = __sk_dst_get(sk);
++ struct dst_entry *dst = __sk_dst_get(sk);
+
+ sk_rethink_txhash(sk);
+
+- if (dst && dst->ops->negative_advice) {
+- ndst = dst->ops->negative_advice(dst);
+-
+- if (ndst != dst) {
+- rcu_assign_pointer(sk->sk_dst_cache, ndst);
+- sk_tx_queue_clear(sk);
+- WRITE_ONCE(sk->sk_dst_pending_confirm, 0);
+- }
+- }
++ if (dst && dst->ops->negative_advice)
++ dst->ops->negative_advice(sk, dst);
+ }
+
+ static inline void
+diff --git a/net/ipv4/route.c b/net/ipv4/route.c
+index 5b008d838e2b..2672b71e662d 100644
+--- a/net/ipv4/route.c
++++ b/net/ipv4/route.c
+@@ -137,7 +137,8 @@ static int ip_rt_gc_timeout __read_mostly = RT_GC_TIMEOUT;
+ static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie);
+ static unsigned int ipv4_default_advmss(const struct dst_entry *dst);
+ static unsigned int ipv4_mtu(const struct dst_entry *dst);
+-static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst);
++static void ipv4_negative_advice(struct sock *sk,
++ struct dst_entry *dst);
+ static void ipv4_link_failure(struct sk_buff *skb);
+ static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
+ struct sk_buff *skb, u32 mtu,
+@@ -856,22 +857,15 @@ static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buf
+ __ip_do_redirect(rt, skb, &fl4, true);
+ }
+
+-static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst)
++static void ipv4_negative_advice(struct sock *sk,
++ struct dst_entry *dst)
+ {
+ struct rtable *rt = (struct rtable *)dst;
+- struct dst_entry *ret = dst;
+
+- if (rt) {
+- if (dst->obsolete > 0) {
+- ip_rt_put(rt);
+- ret = NULL;
+- } else if ((rt->rt_flags & RTCF_REDIRECTED) ||
+- rt->dst.expires) {
+- ip_rt_put(rt);
+- ret = NULL;
+- }
+- }
+- return ret;
++ if ((dst->obsolete > 0) ||
++ (rt->rt_flags & RTCF_REDIRECTED) ||
++ rt->dst.expires)
++ sk_dst_reset(sk);
+ }
+
+ /*
+diff --git a/net/ipv6/route.c b/net/ipv6/route.c
+index 8eac2c890449..5f481c7e3039 100644
+--- a/net/ipv6/route.c
++++ b/net/ipv6/route.c
+@@ -84,7 +84,8 @@ enum rt6_nud_state {
+ static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
+ static unsigned int ip6_default_advmss(const struct dst_entry *dst);
+ static unsigned int ip6_mtu(const struct dst_entry *dst);
+-static struct dst_entry *ip6_negative_advice(struct dst_entry *);
++static void ip6_negative_advice(struct sock *sk,
++ struct dst_entry *dst);
+ static void ip6_dst_destroy(struct dst_entry *);
+ static void ip6_dst_ifdown(struct dst_entry *,
+ struct net_device *dev, int how);
+@@ -2658,24 +2659,24 @@ static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
+ return dst_ret;
+ }
+
+-static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
++static void ip6_negative_advice(struct sock *sk,
++ struct dst_entry *dst)
+ {
+ struct rt6_info *rt = (struct rt6_info *) dst;
+
+- if (rt) {
+- if (rt->rt6i_flags & RTF_CACHE) {
+- rcu_read_lock();
+- if (rt6_check_expired(rt)) {
+- rt6_remove_exception_rt(rt);
+- dst = NULL;
+- }
+- rcu_read_unlock();
+- } else {
+- dst_release(dst);
+- dst = NULL;
++ if (rt->rt6i_flags & RTF_CACHE) {
++ rcu_read_lock();
++ if (rt6_check_expired(rt)) {
++ /* counteract the dst_release() in sk_dst_reset() */
++ dst_hold(dst);
++ sk_dst_reset(sk);
++
++ rt6_remove_exception_rt(rt);
+ }
++ rcu_read_unlock();
++ return;
+ }
+- return dst;
++ sk_dst_reset(sk);
+ }
+
+ static void ip6_link_failure(struct sk_buff *skb)
+diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
+index 9484f27e905a..bffac2f4b581 100644
+--- a/net/xfrm/xfrm_policy.c
++++ b/net/xfrm/xfrm_policy.c
+@@ -3772,15 +3772,10 @@ static void xfrm_link_failure(struct sk_buff *skb)
+ /* Impossible. Such dst must be popped before reaches point of failure. */
+ }
+
+-static struct dst_entry *xfrm_negative_advice(struct dst_entry *dst)
++static void xfrm_negative_advice(struct sock *sk, struct dst_entry *dst)
+ {
+- if (dst) {
+- if (dst->obsolete) {
+- dst_release(dst);
+- dst = NULL;
+- }
+- }
+- return dst;
++ if (dst->obsolete)
++ sk_dst_reset(sk);
+ }
+
+ static void xfrm_init_pmtu(struct xfrm_dst **bundle, int nr)
+--
+2.45.2.505.gda0bf45e8d-goog
+
+