From 081efd18326e353c6fbfdeff903a83edde953f72 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 17 Apr 2025 17:03:55 -0700 Subject: [PATCH] ipv6: Protect nh->f6i_list with spinlock and flag. We will get rid of RTNL from RTM_NEWROUTE and SIOCADDRT. Then, we may be going to add a route tied to a dying nexthop. The nexthop itself is not freed during the RCU grace period, but if we link a route after __remove_nexthop_fib() is called for the nexthop, the route will be leaked. To avoid the race between IPv6 route addition under RCU vs nexthop deletion under RTNL, let's add a dead flag and protect it and nh->f6i_list with a spinlock. __remove_nexthop_fib() acquires the nexthop's spinlock and sets false to nh->dead, then calls ip6_del_rt() for the linked route one by one without the spinlock because fib6_purge_rt() acquires it later. While adding an IPv6 route, fib6_add() acquires the nexthop lock and checks the dead flag just before inserting the route. Signed-off-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250418000443.43734-15-kuniyu@amazon.com Signed-off-by: Paolo Abeni --- include/net/nexthop.h | 2 ++ net/ipv4/nexthop.c | 18 +++++++++++++++--- net/ipv6/ip6_fib.c | 39 ++++++++++++++++++++++++++++++++++----- 3 files changed, 51 insertions(+), 8 deletions(-) diff --git a/include/net/nexthop.h b/include/net/nexthop.h index d9fb44e8b3219..572e69cda4766 100644 --- a/include/net/nexthop.h +++ b/include/net/nexthop.h @@ -152,6 +152,8 @@ struct nexthop { u8 protocol; /* app managing this nh */ u8 nh_flags; bool is_group; + bool dead; + spinlock_t lock; /* protect dead and f6i_list */ refcount_t refcnt; struct rcu_head rcu; diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index d9cf06b297d11..6ba6cb1340c12 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -541,6 +541,7 @@ static struct nexthop *nexthop_alloc(void) INIT_LIST_HEAD(&nh->f6i_list); INIT_LIST_HEAD(&nh->grp_list); INIT_LIST_HEAD(&nh->fdb_list); + spin_lock_init(&nh->lock); } return nh; } @@ -2118,7 +2119,7 @@ static void remove_nexthop_group(struct nexthop *nh, struct nl_info *nlinfo) /* not called for nexthop replace */ static void __remove_nexthop_fib(struct net *net, struct nexthop *nh) { - struct fib6_info *f6i, *tmp; + struct fib6_info *f6i; bool do_flush = false; struct fib_info *fi; @@ -2129,13 +2130,24 @@ static void __remove_nexthop_fib(struct net *net, struct nexthop *nh) if (do_flush) fib_flush(net); - /* ip6_del_rt removes the entry from this list hence the _safe */ - list_for_each_entry_safe(f6i, tmp, &nh->f6i_list, nh_list) { + spin_lock_bh(&nh->lock); + + nh->dead = true; + + while (!list_empty(&nh->f6i_list)) { + f6i = list_first_entry(&nh->f6i_list, typeof(*f6i), nh_list); + /* __ip6_del_rt does a release, so do a hold here */ fib6_info_hold(f6i); + + spin_unlock_bh(&nh->lock); ipv6_stub->ip6_del_rt(net, f6i, !READ_ONCE(net->ipv4.sysctl_nexthop_compat_mode)); + + spin_lock_bh(&nh->lock); } + + spin_unlock_bh(&nh->lock); } static void __remove_nexthop(struct net *net, struct nexthop *nh, diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 9e9db5470bbf5..1f860340690ca 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -1048,8 +1048,14 @@ static void fib6_purge_rt(struct fib6_info *rt, struct fib6_node *fn, rt6_flush_exceptions(rt); fib6_drop_pcpu_from(rt, table); - if (rt->nh && !list_empty(&rt->nh_list)) - list_del_init(&rt->nh_list); + if (rt->nh) { + spin_lock(&rt->nh->lock); + + if (!list_empty(&rt->nh_list)) + list_del_init(&rt->nh_list); + + spin_unlock(&rt->nh->lock); + } if (refcount_read(&rt->fib6_ref) != 1) { /* This route is used as dummy address holder in some split @@ -1341,6 +1347,28 @@ add: return 0; } +static int fib6_add_rt2node_nh(struct fib6_node *fn, struct fib6_info *rt, + struct nl_info *info, struct netlink_ext_ack *extack, + struct list_head *purge_list) +{ + int err; + + spin_lock(&rt->nh->lock); + + if (rt->nh->dead) { + NL_SET_ERR_MSG(extack, "Nexthop has been deleted"); + err = -EINVAL; + } else { + err = fib6_add_rt2node(fn, rt, info, extack, purge_list); + if (!err) + list_add(&rt->nh_list, &rt->nh->f6i_list); + } + + spin_unlock(&rt->nh->lock); + + return err; +} + static void fib6_start_gc(struct net *net, struct fib6_info *rt) { if (!timer_pending(&net->ipv6.ip6_fib_timer) && @@ -1498,7 +1526,10 @@ int fib6_add(struct fib6_node *root, struct fib6_info *rt, } #endif - err = fib6_add_rt2node(fn, rt, info, extack, &purge_list); + if (rt->nh) + err = fib6_add_rt2node_nh(fn, rt, info, extack, &purge_list); + else + err = fib6_add_rt2node(fn, rt, info, extack, &purge_list); if (!err) { struct fib6_info *iter, *next; @@ -1508,8 +1539,6 @@ int fib6_add(struct fib6_node *root, struct fib6_info *rt, fib6_info_release(iter); } - if (rt->nh) - list_add(&rt->nh_list, &rt->nh->f6i_list); __fib6_update_sernum_upto_root(rt, fib6_new_sernum(info->nl_net)); if (rt->fib6_flags & RTF_EXPIRES) -- 2.39.5