]> git.ipfire.org Git - thirdparty/kernel/stable.git/commitdiff
neighbour: Free pneigh_entry after RCU grace period.
authorKuniyuki Iwashima <kuniyu@google.com>
Wed, 16 Jul 2025 22:08:12 +0000 (22:08 +0000)
committerJakub Kicinski <kuba@kernel.org>
Thu, 17 Jul 2025 23:25:20 +0000 (16:25 -0700)
We will convert RTM_GETNEIGH to RCU.

neigh_get() looks up pneigh_entry by pneigh_lookup() and passes
it to pneigh_fill_info().

Then, we must ensure that the entry is alive till pneigh_fill_info()
completes, but read_lock_bh(&tbl->lock) in pneigh_lookup() does not
guarantee that.

Also, we will convert all readers of tbl->phash_buckets[] to RCU.

Let's use call_rcu() to free pneigh_entry and update phash_buckets[]
and ->next by rcu_assign_pointer().

pneigh_ifdown_and_unlock() uses list_head to avoid overwriting
->next and moving RCU iterators to another list.

pndisc_destructor() (only IPv6 ndisc uses this) uses a mutex, so it
is not delayed to call_rcu(), where we cannot sleep.  This is fine
because the mcast code works with RCU and ipv6_dev_mc_dec() frees
mcast objects after RCU grace period.

While at it, we change the return type of pneigh_ifdown_and_unlock()
to void.

Signed-off-by: Kuniyuki Iwashima <kuniyu@google.com>
Link: https://patch.msgid.link/20250716221221.442239-8-kuniyu@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
include/net/neighbour.h
net/core/neighbour.c

index 1ddc44a04200061143a82820dbcee04ea68f2896..6d7f9aa53a7a90e19e67f452168d9c8a9f749ea1 100644 (file)
@@ -180,6 +180,10 @@ struct pneigh_entry {
        possible_net_t          net;
        struct net_device       *dev;
        netdevice_tracker       dev_tracker;
+       union {
+               struct list_head        free_node;
+               struct rcu_head         rcu;
+       };
        u32                     flags;
        u8                      protocol;
        bool                    permanent;
index 7fcb0a8d655f56f7d906ebe141410992194fd8b1..fa2e60a479ef3c97e1d1afd5d112d6f08daa9c6d 100644 (file)
@@ -54,9 +54,9 @@ static void neigh_timer_handler(struct timer_list *t);
 static void __neigh_notify(struct neighbour *n, int type, int flags,
                           u32 pid);
 static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid);
-static int pneigh_ifdown_and_unlock(struct neigh_table *tbl,
-                                   struct net_device *dev,
-                                   bool skip_perm);
+static void pneigh_ifdown_and_unlock(struct neigh_table *tbl,
+                                    struct net_device *dev,
+                                    bool skip_perm);
 
 #ifdef CONFIG_PROC_FS
 static const struct seq_operations neigh_stat_seq_ops;
@@ -810,6 +810,14 @@ out:
        return n;
 }
 
+static void pneigh_destroy(struct rcu_head *rcu)
+{
+       struct pneigh_entry *n = container_of(rcu, struct pneigh_entry, rcu);
+
+       netdev_put(n->dev, &n->dev_tracker);
+       kfree(n);
+}
+
 int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
                  struct net_device *dev)
 {
@@ -828,10 +836,11 @@ int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
                    net_eq(pneigh_net(n), net)) {
                        rcu_assign_pointer(*np, n->next);
                        write_unlock_bh(&tbl->lock);
+
                        if (tbl->pdestructor)
                                tbl->pdestructor(n);
-                       netdev_put(n->dev, &n->dev_tracker);
-                       kfree(n);
+
+                       call_rcu(&n->rcu, pneigh_destroy);
                        return 0;
                }
        }
@@ -839,11 +848,12 @@ int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
        return -ENOENT;
 }
 
-static int pneigh_ifdown_and_unlock(struct neigh_table *tbl,
-                                   struct net_device *dev,
-                                   bool skip_perm)
+static void pneigh_ifdown_and_unlock(struct neigh_table *tbl,
+                                    struct net_device *dev,
+                                    bool skip_perm)
 {
-       struct pneigh_entry *n, __rcu **np, *freelist = NULL;
+       struct pneigh_entry *n, __rcu **np;
+       LIST_HEAD(head);
        u32 h;
 
        for (h = 0; h <= PNEIGH_HASHMASK; h++) {
@@ -853,24 +863,25 @@ static int pneigh_ifdown_and_unlock(struct neigh_table *tbl,
                                goto skip;
                        if (!dev || n->dev == dev) {
                                rcu_assign_pointer(*np, n->next);
-                               rcu_assign_pointer(n->next, freelist);
-                               freelist = n;
+                               list_add(&n->free_node, &head);
                                continue;
                        }
 skip:
                        np = &n->next;
                }
        }
+
        write_unlock_bh(&tbl->lock);
-       while ((n = freelist)) {
-               freelist = rcu_dereference_protected(n->next, 1);
-               n->next = NULL;
+
+       while (!list_empty(&head)) {
+               n = list_first_entry(&head, typeof(*n), free_node);
+               list_del(&n->free_node);
+
                if (tbl->pdestructor)
                        tbl->pdestructor(n);
-               netdev_put(n->dev, &n->dev_tracker);
-               kfree(n);
+
+               call_rcu(&n->rcu, pneigh_destroy);
        }
-       return -ENOENT;
 }
 
 static inline void neigh_parms_put(struct neigh_parms *parms)