]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
neighbour: Protect tbl->phash_buckets[] with a dedicated mutex.
authorKuniyuki Iwashima <kuniyu@google.com>
Wed, 16 Jul 2025 22:08:19 +0000 (22:08 +0000)
committerJakub Kicinski <kuba@kernel.org>
Thu, 17 Jul 2025 23:25:21 +0000 (16:25 -0700)
tbl->phash_buckets[] is only modified in the slow path by pneigh_create()
and pneigh_delete() under the table lock.

Both of them are called under RTNL, so no extra lock is needed, but we
will remove RTNL from the paths.

pneigh_create() looks up a pneigh_entry, and this part can be lockless,
but it would complicate the logic like

  1. lookup
  2. allocate pengih_entry for GFP_KERNEL
  3. lookup again but under lock
  4. if found, return it after freeing the allocated memory
  5. else, return the new one

Instead, let's add a per-table mutex and run lookup and allocation
under it.

Note that updating pneigh_entry part in neigh_add() is still protected
by RTNL and will be moved to pneigh_create() in the next patch.

Signed-off-by: Kuniyuki Iwashima <kuniyu@google.com>
Link: https://patch.msgid.link/20250716221221.442239-15-kuniyu@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
include/net/neighbour.h
net/core/neighbour.c

index f8c7261cd4ebb0ab5ea5884b176c928f85fe28ca..f333f9ebc42594c8292bf036f5cb31e3de7f17bc 100644 (file)
@@ -240,6 +240,7 @@ struct neigh_table {
        unsigned long           last_rand;
        struct neigh_statistics __percpu *stats;
        struct neigh_hash_table __rcu *nht;
+       struct mutex            phash_lock;
        struct pneigh_entry     __rcu **phash_buckets;
 };
 
index 38f0067068c5652448cc7e607168b873aa5ebc73..d312b6323ff207943dc5f2257ea9919e5bfa53e1 100644 (file)
@@ -54,9 +54,8 @@ static void neigh_timer_handler(struct timer_list *t);
 static void __neigh_notify(struct neighbour *n, int type, int flags,
                           u32 pid);
 static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid);
-static void pneigh_ifdown_and_unlock(struct neigh_table *tbl,
-                                    struct net_device *dev,
-                                    bool skip_perm);
+static void pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev,
+                         bool skip_perm);
 
 #ifdef CONFIG_PROC_FS
 static const struct seq_operations neigh_stat_seq_ops;
@@ -437,7 +436,9 @@ static int __neigh_ifdown(struct neigh_table *tbl, struct net_device *dev,
 {
        write_lock_bh(&tbl->lock);
        neigh_flush_dev(tbl, dev, skip_perm);
-       pneigh_ifdown_and_unlock(tbl, dev, skip_perm);
+       write_unlock_bh(&tbl->lock);
+
+       pneigh_ifdown(tbl, dev, skip_perm);
        pneigh_queue_purge(&tbl->proxy_queue, dev ? dev_net(dev) : NULL,
                           tbl->family);
        if (skb_queue_empty_lockless(&tbl->proxy_queue))
@@ -731,7 +732,7 @@ struct pneigh_entry *pneigh_lookup(struct neigh_table *tbl,
        key_len = tbl->key_len;
        hash_val = pneigh_hash(pkey, key_len);
        n = rcu_dereference_check(tbl->phash_buckets[hash_val],
-                                 lockdep_is_held(&tbl->lock));
+                                 lockdep_is_held(&tbl->phash_lock));
 
        while (n) {
                if (!memcmp(n->key, pkey, key_len) &&
@@ -739,7 +740,7 @@ struct pneigh_entry *pneigh_lookup(struct neigh_table *tbl,
                    (n->dev == dev || !n->dev))
                        return n;
 
-               n = rcu_dereference_check(n->next, lockdep_is_held(&tbl->lock));
+               n = rcu_dereference_check(n->next, lockdep_is_held(&tbl->phash_lock));
        }
 
        return NULL;
@@ -754,11 +755,9 @@ struct pneigh_entry *pneigh_create(struct neigh_table *tbl,
        unsigned int key_len;
        u32 hash_val;
 
-       ASSERT_RTNL();
+       mutex_lock(&tbl->phash_lock);
 
-       read_lock_bh(&tbl->lock);
        n = pneigh_lookup(tbl, net, pkey, dev);
-       read_unlock_bh(&tbl->lock);
        if (n)
                goto out;
 
@@ -780,11 +779,10 @@ struct pneigh_entry *pneigh_create(struct neigh_table *tbl,
        }
 
        hash_val = pneigh_hash(pkey, key_len);
-       write_lock_bh(&tbl->lock);
        n->next = tbl->phash_buckets[hash_val];
        rcu_assign_pointer(tbl->phash_buckets[hash_val], n);
-       write_unlock_bh(&tbl->lock);
 out:
+       mutex_unlock(&tbl->phash_lock);
        return n;
 }
 
@@ -806,14 +804,16 @@ int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
        key_len = tbl->key_len;
        hash_val = pneigh_hash(pkey, key_len);
 
-       write_lock_bh(&tbl->lock);
+       mutex_lock(&tbl->phash_lock);
+
        for (np = &tbl->phash_buckets[hash_val];
             (n = rcu_dereference_protected(*np, 1)) != NULL;
             np = &n->next) {
                if (!memcmp(n->key, pkey, key_len) && n->dev == dev &&
                    net_eq(pneigh_net(n), net)) {
                        rcu_assign_pointer(*np, n->next);
-                       write_unlock_bh(&tbl->lock);
+
+                       mutex_unlock(&tbl->phash_lock);
 
                        if (tbl->pdestructor)
                                tbl->pdestructor(n);
@@ -822,18 +822,20 @@ int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
                        return 0;
                }
        }
-       write_unlock_bh(&tbl->lock);
+
+       mutex_unlock(&tbl->phash_lock);
        return -ENOENT;
 }
 
-static void pneigh_ifdown_and_unlock(struct neigh_table *tbl,
-                                    struct net_device *dev,
-                                    bool skip_perm)
+static void pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev,
+                         bool skip_perm)
 {
        struct pneigh_entry *n, __rcu **np;
        LIST_HEAD(head);
        u32 h;
 
+       mutex_lock(&tbl->phash_lock);
+
        for (h = 0; h <= PNEIGH_HASHMASK; h++) {
                np = &tbl->phash_buckets[h];
                while ((n = rcu_dereference_protected(*np, 1)) != NULL) {
@@ -849,7 +851,7 @@ skip:
                }
        }
 
-       write_unlock_bh(&tbl->lock);
+       mutex_unlock(&tbl->phash_lock);
 
        while (!list_empty(&head)) {
                n = list_first_entry(&head, typeof(*n), free_node);
@@ -1796,6 +1798,7 @@ void neigh_table_init(int index, struct neigh_table *tbl)
                WARN_ON(tbl->entry_size % NEIGH_PRIV_ALIGN);
 
        rwlock_init(&tbl->lock);
+       mutex_init(&tbl->phash_lock);
 
        INIT_DEFERRABLE_WORK(&tbl->gc_work, neigh_periodic_work);
        queue_delayed_work(system_power_efficient_wq, &tbl->gc_work,