]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
ipv6: Get rid of RTNL for SIOCADDRT and RTM_NEWROUTE.
authorKuniyuki Iwashima <kuniyu@amazon.com>
Fri, 18 Apr 2025 00:03:56 +0000 (17:03 -0700)
committerPaolo Abeni <pabeni@redhat.com>
Thu, 24 Apr 2025 07:29:56 +0000 (09:29 +0200)
Now we are ready to remove RTNL from SIOCADDRT and RTM_NEWROUTE.

The remaining things to do are

  1. pass false to lwtunnel_valid_encap_type_attr()
  2. use rcu_dereference_rtnl() in fib6_check_nexthop()
  3. place rcu_read_lock() before ip6_route_info_create_nh().

Let's complete the RTNL-free conversion.

When each CPU-X adds 100000 routes on table-X in a batch
concurrently on c7a.metal-48xl EC2 instance with 192 CPUs,

without this series:

  $ sudo ./route_test.sh
  ...
  added 19200000 routes (100000 routes * 192 tables).
  time elapsed: 191577 milliseconds.

with this series:

  $ sudo ./route_test.sh
  ...
  added 19200000 routes (100000 routes * 192 tables).
  time elapsed: 62854 milliseconds.

I changed the number of routes in each table (1000 ~ 100000)
and consistently saw it finish 3x faster with this series.

Note that now every caller of lwtunnel_valid_encap_type() passes
false as the last argument, and this can be removed later.

Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Link: https://patch.msgid.link/20250418000443.43734-16-kuniyu@amazon.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
net/ipv4/nexthop.c
net/ipv6/route.c

index 6ba6cb1340c12d4710c81a226d4f9483213aa15e..823e4a783d2b1dde5891d603a8cde91b7ce3e94f 100644 (file)
@@ -1556,12 +1556,12 @@ int fib6_check_nexthop(struct nexthop *nh, struct fib6_config *cfg,
        if (nh->is_group) {
                struct nh_group *nhg;
 
-               nhg = rtnl_dereference(nh->nh_grp);
+               nhg = rcu_dereference_rtnl(nh->nh_grp);
                if (nhg->has_v4)
                        goto no_v4_nh;
                is_fdb_nh = nhg->fdb_nh;
        } else {
-               nhi = rtnl_dereference(nh->nh_info);
+               nhi = rcu_dereference_rtnl(nh->nh_info);
                if (nhi->family == AF_INET)
                        goto no_v4_nh;
                is_fdb_nh = nhi->fdb_nh;
index 85724aee12707b083a7252208dd997fab8f27698..d0351e95d91618ad9d955605d3cbeda43280bba9 100644 (file)
@@ -3903,12 +3903,16 @@ int ip6_route_add(struct fib6_config *cfg, gfp_t gfp_flags,
        if (IS_ERR(rt))
                return PTR_ERR(rt);
 
+       rcu_read_lock();
+
        err = ip6_route_info_create_nh(rt, cfg, extack);
        if (err)
-               return err;
+               goto unlock;
 
        err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, extack);
        fib6_info_release(rt);
+unlock:
+       rcu_read_unlock();
 
        return err;
 }
@@ -4529,12 +4533,10 @@ int ipv6_route_ioctl(struct net *net, unsigned int cmd, struct in6_rtmsg *rtmsg)
 
        switch (cmd) {
        case SIOCADDRT:
-               rtnl_lock();
                /* Only do the default setting of fc_metric in route adding */
                if (cfg.fc_metric == 0)
                        cfg.fc_metric = IP6_RT_PRIO_USER;
                err = ip6_route_add(&cfg, GFP_KERNEL, NULL);
-               rtnl_unlock();
                break;
        case SIOCDELRT:
                err = ip6_route_del(&cfg, NULL);
@@ -5113,7 +5115,7 @@ static int rtm_to_fib6_multipath_config(struct fib6_config *cfg,
        } while (rtnh_ok(rtnh, remaining));
 
        return lwtunnel_valid_encap_type_attr(cfg->fc_mp, cfg->fc_mp_len,
-                                             extack, newroute);
+                                             extack, false);
 }
 
 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
@@ -5251,7 +5253,7 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
                cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
 
                err = lwtunnel_valid_encap_type(cfg->fc_encap_type,
-                                               extack, newroute);
+                                               extack, false);
                if (err < 0)
                        goto errout;
        }
@@ -5518,6 +5520,8 @@ static int ip6_route_multipath_add(struct fib6_config *cfg,
        if (err)
                return err;
 
+       rcu_read_lock();
+
        err = ip6_route_mpath_info_create_nh(&rt6_nh_list, extack);
        if (err)
                goto cleanup;
@@ -5609,6 +5613,8 @@ add_errout:
        }
 
 cleanup:
+       rcu_read_unlock();
+
        list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, list) {
                fib6_info_release(nh->fib6_info);
                list_del(&nh->list);
@@ -6891,7 +6897,7 @@ static void bpf_iter_unregister(void)
 
 static const struct rtnl_msg_handler ip6_route_rtnl_msg_handlers[] __initconst_or_module = {
        {.owner = THIS_MODULE, .protocol = PF_INET6, .msgtype = RTM_NEWROUTE,
-        .doit = inet6_rtm_newroute},
+        .doit = inet6_rtm_newroute, .flags = RTNL_FLAG_DOIT_UNLOCKED},
        {.owner = THIS_MODULE, .protocol = PF_INET6, .msgtype = RTM_DELROUTE,
         .doit = inet6_rtm_delroute, .flags = RTNL_FLAG_DOIT_UNLOCKED},
        {.owner = THIS_MODULE, .protocol = PF_INET6, .msgtype = RTM_GETROUTE,