]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
ipv6: Get rid of RTNL for SIOCDELRT and RTM_DELROUTE.
authorKuniyuki Iwashima <kuniyu@amazon.com>
Fri, 18 Apr 2025 00:03:43 +0000 (17:03 -0700)
committerPaolo Abeni <pabeni@redhat.com>
Thu, 24 Apr 2025 07:29:55 +0000 (09:29 +0200)
Basically, removing an IPv6 route does not require RTNL because
the IPv6 routing tables are protected by per table lock.

inet6_rtm_delroute() calls nexthop_find_by_id() to check if the
nexthop specified by RTA_NH_ID exists.  nexthop uses rbtree and
the top-down walk can be safely performed under RCU.

ip6_route_del() already relies on RCU and the table lock, but we
need to extend the RCU critical section a bit more to cover
__ip6_del_rt().  For example, nexthop_for_each_fib6_nh() and
inet6_rt_notify() needs RCU.

Let's call nexthop_find_by_id() and __ip6_del_rt() under RCU and
get rid of RTNL from inet6_rtm_delroute() and SIOCDELRT.

Even if the nexthop is removed after rcu_read_unlock() in
inet6_rtm_delroute(), __remove_nexthop_fib() cleans up the routes
tied to the nexthop, and ip6_route_del() returns -ESRCH.  So the
request was at least valid as of nexthop_find_by_id(), and it's just
a matter of timing.

Note that we need to pass false to lwtunnel_valid_encap_type_attr().
The following patches also use the newroute bool.

Note also that fib6_get_table() does not require RCU because once
allocated fib6_table is not freed until netns dismantle.  I will
post a follow-up series to convert such callers to RCU-lockless
version.  [0]

Link: https://lore.kernel.org/netdev/20250417174557.65721-1-kuniyu@amazon.com/
Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Link: https://patch.msgid.link/20250418000443.43734-3-kuniyu@amazon.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
net/ipv6/route.c

index 369914a3746137a5ddc95145067236e9c3109692..1c304f259d9b41158a765b06d22277dce637fecf 100644 (file)
@@ -4125,9 +4125,9 @@ static int ip6_route_del(struct fib6_config *cfg,
                        if (rt->nh) {
                                if (!fib6_info_hold_safe(rt))
                                        continue;
-                               rcu_read_unlock();
 
-                               return __ip6_del_rt(rt, &cfg->fc_nlinfo);
+                               err =  __ip6_del_rt(rt, &cfg->fc_nlinfo);
+                               break;
                        }
                        if (cfg->fc_nh_id)
                                continue;
@@ -4142,13 +4142,13 @@ static int ip6_route_del(struct fib6_config *cfg,
                                continue;
                        if (!fib6_info_hold_safe(rt))
                                continue;
-                       rcu_read_unlock();
 
                        /* if gateway was specified only delete the one hop */
                        if (cfg->fc_flags & RTF_GATEWAY)
-                               return __ip6_del_rt(rt, &cfg->fc_nlinfo);
-
-                       return __ip6_del_rt_siblings(rt, cfg);
+                               err = __ip6_del_rt(rt, &cfg->fc_nlinfo);
+                       else
+                               err = __ip6_del_rt_siblings(rt, cfg);
+                       break;
                }
        }
        rcu_read_unlock();
@@ -4517,19 +4517,20 @@ int ipv6_route_ioctl(struct net *net, unsigned int cmd, struct in6_rtmsg *rtmsg)
 
        rtmsg_to_fib6_config(net, rtmsg, &cfg);
 
-       rtnl_lock();
        switch (cmd) {
        case SIOCADDRT:
+               rtnl_lock();
                /* Only do the default setting of fc_metric in route adding */
                if (cfg.fc_metric == 0)
                        cfg.fc_metric = IP6_RT_PRIO_USER;
                err = ip6_route_add(&cfg, GFP_KERNEL, NULL);
+               rtnl_unlock();
                break;
        case SIOCDELRT:
                err = ip6_route_del(&cfg, NULL);
                break;
        }
-       rtnl_unlock();
+
        return err;
 }
 
@@ -5052,7 +5053,8 @@ static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
 };
 
 static int rtm_to_fib6_multipath_config(struct fib6_config *cfg,
-                                       struct netlink_ext_ack *extack)
+                                       struct netlink_ext_ack *extack,
+                                       bool newroute)
 {
        struct rtnexthop *rtnh;
        int remaining;
@@ -5086,15 +5088,16 @@ static int rtm_to_fib6_multipath_config(struct fib6_config *cfg,
        } while (rtnh_ok(rtnh, remaining));
 
        return lwtunnel_valid_encap_type_attr(cfg->fc_mp, cfg->fc_mp_len,
-                                             extack, true);
+                                             extack, newroute);
 }
 
 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
                              struct fib6_config *cfg,
                              struct netlink_ext_ack *extack)
 {
-       struct rtmsg *rtm;
+       bool newroute = nlh->nlmsg_type == RTM_NEWROUTE;
        struct nlattr *tb[RTA_MAX+1];
+       struct rtmsg *rtm;
        unsigned int pref;
        int err;
 
@@ -5203,7 +5206,7 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
                cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
                cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
 
-               err = rtm_to_fib6_multipath_config(cfg, extack);
+               err = rtm_to_fib6_multipath_config(cfg, extack, newroute);
                if (err < 0)
                        goto errout;
        }
@@ -5223,7 +5226,7 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
                cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
 
                err = lwtunnel_valid_encap_type(cfg->fc_encap_type,
-                                               extack, true);
+                                               extack, newroute);
                if (err < 0)
                        goto errout;
        }
@@ -5546,15 +5549,20 @@ static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
        if (err < 0)
                return err;
 
-       if (cfg.fc_nh_id &&
-           !nexthop_find_by_id(sock_net(skb->sk), cfg.fc_nh_id)) {
-               NL_SET_ERR_MSG(extack, "Nexthop id does not exist");
-               return -EINVAL;
+       if (cfg.fc_nh_id) {
+               rcu_read_lock();
+               err = !nexthop_find_by_id(sock_net(skb->sk), cfg.fc_nh_id);
+               rcu_read_unlock();
+
+               if (err) {
+                       NL_SET_ERR_MSG(extack, "Nexthop id does not exist");
+                       return -EINVAL;
+               }
        }
 
-       if (cfg.fc_mp)
+       if (cfg.fc_mp) {
                return ip6_route_multipath_del(&cfg, extack);
-       else {
+       else {
                cfg.fc_delete_all_nh = 1;
                return ip6_route_del(&cfg, extack);
        }
@@ -6766,7 +6774,7 @@ static const struct rtnl_msg_handler ip6_route_rtnl_msg_handlers[] __initconst_o
        {.owner = THIS_MODULE, .protocol = PF_INET6, .msgtype = RTM_NEWROUTE,
         .doit = inet6_rtm_newroute},
        {.owner = THIS_MODULE, .protocol = PF_INET6, .msgtype = RTM_DELROUTE,
-        .doit = inet6_rtm_delroute},
+        .doit = inet6_rtm_delroute, .flags = RTNL_FLAG_DOIT_UNLOCKED},
        {.owner = THIS_MODULE, .protocol = PF_INET6, .msgtype = RTM_GETROUTE,
         .doit = inet6_rtm_getroute, .flags = RTNL_FLAG_DOIT_UNLOCKED},
 };