From: Eric Dumazet Date: Mon, 30 Jun 2025 12:19:31 +0000 (+0000) Subject: ipv4: adopt dst_dev, skb_dst_dev and skb_dst_dev_net[_rcu] X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=a74fc62eec155ca5a6da8ff3856f3dc87fe24558;p=thirdparty%2Flinux.git ipv4: adopt dst_dev, skb_dst_dev and skb_dst_dev_net[_rcu] Use the new helpers as a first step to deal with potential dst->dev races. Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250630121934.3399505-8-edumazet@google.com Signed-off-by: Jakub Kicinski --- diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index ae09e91398a5d..19dbd9081d5a5 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -481,7 +481,7 @@ static inline struct sock *__inet_lookup_skb(struct inet_hashinfo *hashinfo, const int sdif, bool *refcounted) { - struct net *net = dev_net_rcu(skb_dst(skb)->dev); + struct net *net = skb_dst_dev_net_rcu(skb); const struct iphdr *iph = ip_hdr(skb); struct sock *sk; diff --git a/include/net/ip.h b/include/net/ip.h index 391af454422eb..befcba575129a 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -472,7 +472,7 @@ static inline unsigned int ip_dst_mtu_maybe_forward(const struct dst_entry *dst, rcu_read_lock(); - net = dev_net_rcu(dst->dev); + net = dev_net_rcu(dst_dev(dst)); if (READ_ONCE(net->ipv4.sysctl_ip_fwd_use_pmtu) || ip_mtu_locked(dst) || !forwarding) { @@ -486,7 +486,7 @@ static inline unsigned int ip_dst_mtu_maybe_forward(const struct dst_entry *dst, if (mtu) goto out; - mtu = READ_ONCE(dst->dev->mtu); + mtu = READ_ONCE(dst_dev(dst)->mtu); if (unlikely(ip_mtu_locked(dst))) { if (rt->rt_uses_gateway && mtu > 576) @@ -506,16 +506,17 @@ out: static inline unsigned int ip_skb_dst_mtu(struct sock *sk, const struct sk_buff *skb) { + const struct dst_entry *dst = skb_dst(skb); unsigned int mtu; if (!sk || !sk_fullsock(sk) || ip_sk_use_pmtu(sk)) { bool forwarding = IPCB(skb)->flags & IPSKB_FORWARDED; - return ip_dst_mtu_maybe_forward(skb_dst(skb), forwarding); + return ip_dst_mtu_maybe_forward(dst, forwarding); } - mtu = min(READ_ONCE(skb_dst(skb)->dev->mtu), IP_MAX_MTU); - return mtu - lwtunnel_headroom(skb_dst(skb)->lwtstate, mtu); + mtu = min(READ_ONCE(dst_dev(dst)->mtu), IP_MAX_MTU); + return mtu - lwtunnel_headroom(dst->lwtstate, mtu); } struct dst_metrics *ip_fib_metrics_init(struct nlattr *fc_mx, int fc_mx_len, diff --git a/include/net/route.h b/include/net/route.h index 3d3d6048ffca2..7ea840daa775b 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -390,7 +390,7 @@ static inline int ip4_dst_hoplimit(const struct dst_entry *dst) const struct net *net; rcu_read_lock(); - net = dev_net_rcu(dst->dev); + net = dev_net_rcu(dst_dev(dst)); hoplimit = READ_ONCE(net->ipv4.sysctl_ip_default_ttl); rcu_read_unlock(); } diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 717cb7d3607a1..2ffe73ea644ff 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -311,18 +311,20 @@ static bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt, { struct dst_entry *dst = &rt->dst; struct inet_peer *peer; + struct net_device *dev; bool rc = true; if (!apply_ratelimit) return true; /* No rate limit on loopback */ - if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) + dev = dst_dev(dst); + if (dev && (dev->flags & IFF_LOOPBACK)) goto out; rcu_read_lock(); peer = inet_getpeer_v4(net->ipv4.peers, fl4->daddr, - l3mdev_master_ifindex_rcu(dst->dev)); + l3mdev_master_ifindex_rcu(dev)); rc = inet_peer_xrlim_allow(peer, READ_ONCE(net->ipv4.sysctl_icmp_ratelimit)); rcu_read_unlock(); @@ -466,13 +468,13 @@ out_bh_enable: */ static struct net_device *icmp_get_route_lookup_dev(struct sk_buff *skb) { - struct net_device *route_lookup_dev = NULL; + struct net_device *dev = skb->dev; + const struct dst_entry *dst; - if (skb->dev) - route_lookup_dev = skb->dev; - else if (skb_dst(skb)) - route_lookup_dev = skb_dst(skb)->dev; - return route_lookup_dev; + if (dev) + return dev; + dst = skb_dst(skb); + return dst ? dst_dev(dst) : NULL; } static struct rtable *icmp_route_lookup(struct net *net, struct flowi4 *fl4, @@ -869,7 +871,7 @@ static enum skb_drop_reason icmp_unreach(struct sk_buff *skb) struct net *net; u32 info = 0; - net = dev_net_rcu(skb_dst(skb)->dev); + net = skb_dst_dev_net_rcu(skb); /* * Incomplete header ? @@ -1012,7 +1014,7 @@ static enum skb_drop_reason icmp_echo(struct sk_buff *skb) struct icmp_bxm icmp_param; struct net *net; - net = dev_net_rcu(skb_dst(skb)->dev); + net = skb_dst_dev_net_rcu(skb); /* should there be an ICMP stat for ignored echos? */ if (READ_ONCE(net->ipv4.sysctl_icmp_echo_ignore_all)) return SKB_NOT_DROPPED_YET; @@ -1182,7 +1184,7 @@ static enum skb_drop_reason icmp_timestamp(struct sk_buff *skb) return SKB_NOT_DROPPED_YET; out_err: - __ICMP_INC_STATS(dev_net_rcu(skb_dst(skb)->dev), ICMP_MIB_INERRORS); + __ICMP_INC_STATS(skb_dst_dev_net_rcu(skb), ICMP_MIB_INERRORS); return SKB_DROP_REASON_PKT_TOO_SMALL; } diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index d1769034b6438..7182f1419c2a4 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -427,7 +427,7 @@ static int igmpv3_sendpack(struct sk_buff *skb) pig->csum = ip_compute_csum(igmp_hdr(skb), igmplen); - return ip_local_out(dev_net(skb_dst(skb)->dev), skb->sk, skb); + return ip_local_out(skb_dst_dev_net(skb), skb->sk, skb); } static int grec_size(struct ip_mc_list *pmc, int type, int gdel, int sdel) diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 64b3fb3208af6..b2584cce90ae1 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -476,7 +476,7 @@ out_fail: /* Process an incoming IP datagram fragment. */ int ip_defrag(struct net *net, struct sk_buff *skb, u32 user) { - struct net_device *dev = skb->dev ? : skb_dst(skb)->dev; + struct net_device *dev = skb->dev ? : skb_dst_dev(skb); int vif = l3mdev_master_ifindex_rcu(dev); struct ipq *qp; diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index a2705d454fd64..414b47a0d513f 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -116,7 +116,7 @@ int __ip_local_out(struct net *net, struct sock *sk, struct sk_buff *skb) skb->protocol = htons(ETH_P_IP); return nf_hook(NFPROTO_IPV4, NF_INET_LOCAL_OUT, - net, sk, skb, NULL, skb_dst(skb)->dev, + net, sk, skb, NULL, skb_dst_dev(skb), dst_output); } @@ -199,7 +199,7 @@ static int ip_finish_output2(struct net *net, struct sock *sk, struct sk_buff *s { struct dst_entry *dst = skb_dst(skb); struct rtable *rt = dst_rtable(dst); - struct net_device *dev = dst->dev; + struct net_device *dev = dst_dev(dst); unsigned int hh_len = LL_RESERVED_SPACE(dev); struct neighbour *neigh; bool is_v6gw = false; @@ -425,7 +425,7 @@ int ip_mc_output(struct net *net, struct sock *sk, struct sk_buff *skb) int ip_output(struct net *net, struct sock *sk, struct sk_buff *skb) { - struct net_device *dev = skb_dst(skb)->dev, *indev = skb->dev; + struct net_device *dev = skb_dst_dev(skb), *indev = skb->dev; skb->dev = dev; skb->protocol = htons(ETH_P_IP); diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index 686e4f3d83aa7..95b6bb78fcd27 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -229,7 +229,7 @@ static netdev_tx_t vti_xmit(struct sk_buff *skb, struct net_device *dev, goto tx_error_icmp; } - tdev = dst->dev; + tdev = dst_dev(dst); if (tdev == dev) { dst_release(dst); @@ -259,7 +259,7 @@ static netdev_tx_t vti_xmit(struct sk_buff *skb, struct net_device *dev, xmit: skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(dev))); skb_dst_set(skb, dst); - skb->dev = skb_dst(skb)->dev; + skb->dev = skb_dst_dev(skb); err = dst_output(tunnel->net, skb->sk, skb); if (net_xmit_eval(err) == 0) diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index 08bc3f2c0078b..0565f001120dc 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c @@ -20,12 +20,12 @@ /* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */ int ip_route_me_harder(struct net *net, struct sock *sk, struct sk_buff *skb, unsigned int addr_type) { + struct net_device *dev = skb_dst_dev(skb); const struct iphdr *iph = ip_hdr(skb); struct rtable *rt; struct flowi4 fl4 = {}; __be32 saddr = iph->saddr; __u8 flags; - struct net_device *dev = skb_dst(skb)->dev; struct flow_keys flkeys; unsigned int hh_len; @@ -74,7 +74,7 @@ int ip_route_me_harder(struct net *net, struct sock *sk, struct sk_buff *skb, un #endif /* Change in oif may mean change in hh_len. */ - hh_len = skb_dst(skb)->dev->hard_header_len; + hh_len = skb_dst_dev(skb)->hard_header_len; if (skb_headroom(skb) < hh_len && pskb_expand_head(skb, HH_DATA_ALIGN(hh_len - skb_headroom(skb)), 0, GFP_ATOMIC)) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index ce6aba4f01ff2..64ba377cd6cc6 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -413,7 +413,7 @@ static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, const void *daddr) { const struct rtable *rt = container_of(dst, struct rtable, dst); - struct net_device *dev = dst->dev; + struct net_device *dev = dst_dev(dst); struct neighbour *n; rcu_read_lock(); @@ -440,7 +440,7 @@ static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, static void ipv4_confirm_neigh(const struct dst_entry *dst, const void *daddr) { const struct rtable *rt = container_of(dst, struct rtable, dst); - struct net_device *dev = dst->dev; + struct net_device *dev = dst_dev(dst); const __be32 *pkey = daddr; if (rt->rt_gw_family == AF_INET) { @@ -1026,7 +1026,7 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu) return; rcu_read_lock(); - net = dev_net_rcu(dst->dev); + net = dev_net_rcu(dst_dev(dst)); if (mtu < net->ipv4.ip_rt_min_pmtu) { lock = true; mtu = min(old_mtu, net->ipv4.ip_rt_min_pmtu); @@ -1326,7 +1326,7 @@ static unsigned int ipv4_default_advmss(const struct dst_entry *dst) struct net *net; rcu_read_lock(); - net = dev_net_rcu(dst->dev); + net = dev_net_rcu(dst_dev(dst)); advmss = max_t(unsigned int, ipv4_mtu(dst) - header_size, net->ipv4.ip_rt_min_advmss); rcu_read_unlock(); diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index 5107121c5e37f..f1884f0c9e523 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -559,6 +559,7 @@ bool tcp_fastopen_active_should_disable(struct sock *sk) void tcp_fastopen_active_disable_ofo_check(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); + struct net_device *dev; struct dst_entry *dst; struct sk_buff *skb; @@ -576,7 +577,8 @@ void tcp_fastopen_active_disable_ofo_check(struct sock *sk) } else if (tp->syn_fastopen_ch && atomic_read(&sock_net(sk)->ipv4.tfo_active_disable_times)) { dst = sk_dst_get(sk); - if (!(dst && dst->dev && (dst->dev->flags & IFF_LOOPBACK))) + dev = dst ? dst_dev(dst) : NULL; + if (!(dev && (dev->flags & IFF_LOOPBACK))) atomic_set(&sock_net(sk)->ipv4.tfo_active_disable_times, 0); dst_release(dst); } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index b406fd012b2e6..a847d894ace38 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -788,7 +788,7 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb, arg.iov[0].iov_base = (unsigned char *)&rep; arg.iov[0].iov_len = sizeof(rep.th); - net = sk ? sock_net(sk) : dev_net_rcu(skb_dst(skb)->dev); + net = sk ? sock_net(sk) : skb_dst_dev_net_rcu(skb); /* Invalid TCP option size or twice included auth */ if (tcp_parse_auth_options(tcp_hdr(skb), &md5_hash_location, &aoh)) diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index 4251670e328c8..03c068ea27b6a 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -166,11 +166,11 @@ static struct tcp_metrics_block *tcpm_new(struct dst_entry *dst, unsigned int hash) { struct tcp_metrics_block *tm; - struct net *net; bool reclaim = false; + struct net *net; spin_lock_bh(&tcp_metrics_lock); - net = dev_net_rcu(dst->dev); + net = dev_net_rcu(dst_dev(dst)); /* While waiting for the spin-lock the cache might have been populated * with this entry and so we have to check again. @@ -273,7 +273,7 @@ static struct tcp_metrics_block *__tcp_get_metrics_req(struct request_sock *req, return NULL; } - net = dev_net_rcu(dst->dev); + net = dev_net_rcu(dst_dev(dst)); hash ^= net_hash_mix(net); hash = hash_32(hash, tcp_metrics_hash_log); @@ -318,7 +318,7 @@ static struct tcp_metrics_block *tcp_get_metrics(struct sock *sk, else return NULL; - net = dev_net_rcu(dst->dev); + net = dev_net_rcu(dst_dev(dst)); hash ^= net_hash_mix(net); hash = hash_32(hash, tcp_metrics_hash_log); diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c index 3cff51ba72bb0..0ae67d537499a 100644 --- a/net/ipv4/xfrm4_output.c +++ b/net/ipv4/xfrm4_output.c @@ -31,7 +31,7 @@ static int __xfrm4_output(struct net *net, struct sock *sk, struct sk_buff *skb) int xfrm4_output(struct net *net, struct sock *sk, struct sk_buff *skb) { return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, - net, sk, skb, skb->dev, skb_dst(skb)->dev, + net, sk, skb, skb->dev, skb_dst_dev(skb), __xfrm4_output, !(IPCB(skb)->flags & IPSKB_REROUTED)); }