]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
neighbour: Create netdev->neighbour association
authorGilad Naaman <gnaaman@drivenets.com>
Thu, 7 Nov 2024 16:04:43 +0000 (16:04 +0000)
committerJakub Kicinski <kuba@kernel.org>
Sat, 9 Nov 2024 21:22:57 +0000 (13:22 -0800)
Create a mapping between a netdev and its neighoburs,
allowing for much cheaper flushes.

Signed-off-by: Gilad Naaman <gnaaman@drivenets.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Link: https://patch.msgid.link/20241107160444.2913124-7-gnaaman@drivenets.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Documentation/networking/net_cachelines/net_device.rst
include/linux/netdevice.h
include/net/neighbour.h
include/net/neighbour_tables.h [new file with mode: 0644]
net/core/neighbour.c

index ade50d4e67cf1314bc84c089e3ff57db45ee2702..15e31ece675fc4a75fe4a9dc2a2d9d3698c85045 100644 (file)
@@ -188,4 +188,5 @@ u64                                 max_pacing_offload_horizon
 struct_napi_config*                 napi_config
 unsigned_long                       gro_flush_timeout
 u32                                 napi_defer_hard_irqs
+struct hlist_head                   neighbours[2]
 =================================== =========================== =================== =================== ===================================================================================
index 3c552b648b27be347ff734131479dfd0c5ee9e13..df4483598628882bb105909f927ab5eee4337ae7 100644 (file)
@@ -52,6 +52,7 @@
 #include <net/net_trackers.h>
 #include <net/net_debug.h>
 #include <net/dropreason-core.h>
+#include <net/neighbour_tables.h>
 
 struct netpoll_info;
 struct device;
@@ -2032,6 +2033,9 @@ enum netdev_reg_state {
  *     @napi_defer_hard_irqs:  If not zero, provides a counter that would
  *                             allow to avoid NIC hard IRQ, on busy queues.
  *
+ *     @neighbours:    List heads pointing to this device's neighbours'
+ *                     dev_list, one per address-family.
+ *
  *     FIXME: cleanup struct net_device such that network protocol info
  *     moves out.
  */
@@ -2440,6 +2444,9 @@ struct net_device {
         */
        struct net_shaper_hierarchy *net_shaper_hierarchy;
 #endif
+
+       struct hlist_head neighbours[NEIGH_NR_TABLES];
+
        u8                      priv[] ____cacheline_aligned
                                       __counted_by(priv_len);
 } ____cacheline_aligned;
index 40aac1e24c68b6c5765e36fc71b8d2a618cae64c..9a832cab5b1d997adb4c214b5c6e2113f43365db 100644 (file)
@@ -29,6 +29,7 @@
 #include <linux/sysctl.h>
 #include <linux/workqueue.h>
 #include <net/rtnetlink.h>
+#include <net/neighbour_tables.h>
 
 /*
  * NUD stands for "neighbor unreachability detection"
@@ -136,6 +137,7 @@ struct neigh_statistics {
 
 struct neighbour {
        struct hlist_node       hash;
+       struct hlist_node       dev_list;
        struct neigh_table      *tbl;
        struct neigh_parms      *parms;
        unsigned long           confirmed;
@@ -236,13 +238,6 @@ struct neigh_table {
        struct pneigh_entry     **phash_buckets;
 };
 
-enum {
-       NEIGH_ARP_TABLE = 0,
-       NEIGH_ND_TABLE = 1,
-       NEIGH_NR_TABLES,
-       NEIGH_LINK_TABLE = NEIGH_NR_TABLES /* Pseudo table for neigh_xmit */
-};
-
 static inline int neigh_parms_family(struct neigh_parms *p)
 {
        return p->tbl->family;
diff --git a/include/net/neighbour_tables.h b/include/net/neighbour_tables.h
new file mode 100644 (file)
index 0000000..bcffbe8
--- /dev/null
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _NET_NEIGHBOUR_TABLES_H
+#define _NET_NEIGHBOUR_TABLES_H
+
+enum {
+       NEIGH_ARP_TABLE = 0,
+       NEIGH_ND_TABLE = 1,
+       NEIGH_NR_TABLES,
+       NEIGH_LINK_TABLE = NEIGH_NR_TABLES /* Pseudo table for neigh_xmit */
+};
+
+#endif
index 59f359c7b5e3f1b0edf7add4829c8079c8ac5159..5e572f6eaf2ce0009704008215c6a978f481a753 100644 (file)
@@ -60,6 +60,25 @@ static int pneigh_ifdown_and_unlock(struct neigh_table *tbl,
 static const struct seq_operations neigh_stat_seq_ops;
 #endif
 
+static struct hlist_head *neigh_get_dev_table(struct net_device *dev, int family)
+{
+       int i;
+
+       switch (family) {
+       default:
+               DEBUG_NET_WARN_ON_ONCE(1);
+               fallthrough; /* to avoid panic by null-ptr-deref */
+       case AF_INET:
+               i = NEIGH_ARP_TABLE;
+               break;
+       case AF_INET6:
+               i = NEIGH_ND_TABLE;
+               break;
+       }
+
+       return &dev->neighbours[i];
+}
+
 /*
    Neighbour hash table buckets are protected with rwlock tbl->lock.
 
@@ -211,6 +230,7 @@ bool neigh_remove_one(struct neighbour *n)
        write_lock(&n->lock);
        if (refcount_read(&n->refcnt) == 1) {
                hlist_del_rcu(&n->hash);
+               hlist_del_rcu(&n->dev_list);
                neigh_mark_dead(n);
                retval = true;
        }
@@ -351,48 +371,42 @@ static void pneigh_queue_purge(struct sk_buff_head *list, struct net *net,
 static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev,
                            bool skip_perm)
 {
-       int i;
-       struct neigh_hash_table *nht;
-
-       nht = rcu_dereference_protected(tbl->nht,
-                                       lockdep_is_held(&tbl->lock));
+       struct hlist_head *dev_head;
+       struct hlist_node *tmp;
+       struct neighbour *n;
 
-       for (i = 0; i < (1 << nht->hash_shift); i++) {
-               struct hlist_node *tmp;
-               struct neighbour *n;
+       dev_head = neigh_get_dev_table(dev, tbl->family);
 
-               neigh_for_each_in_bucket_safe(n, tmp, &nht->hash_heads[i]) {
-                       if (dev && n->dev != dev)
-                               continue;
-                       if (skip_perm && n->nud_state & NUD_PERMANENT)
-                               continue;
+       hlist_for_each_entry_safe(n, tmp, dev_head, dev_list) {
+               if (skip_perm && n->nud_state & NUD_PERMANENT)
+                       continue;
 
-                       hlist_del_rcu(&n->hash);
-                       write_lock(&n->lock);
-                       neigh_del_timer(n);
-                       neigh_mark_dead(n);
-                       if (refcount_read(&n->refcnt) != 1) {
-                               /* The most unpleasant situation.
-                                  We must destroy neighbour entry,
-                                  but someone still uses it.
-
-                                  The destroy will be delayed until
-                                  the last user releases us, but
-                                  we must kill timers etc. and move
-                                  it to safe state.
-                                */
-                               __skb_queue_purge(&n->arp_queue);
-                               n->arp_queue_len_bytes = 0;
-                               WRITE_ONCE(n->output, neigh_blackhole);
-                               if (n->nud_state & NUD_VALID)
-                                       n->nud_state = NUD_NOARP;
-                               else
-                                       n->nud_state = NUD_NONE;
-                               neigh_dbg(2, "neigh %p is stray\n", n);
-                       }
-                       write_unlock(&n->lock);
-                       neigh_cleanup_and_release(n);
+               hlist_del_rcu(&n->hash);
+               hlist_del_rcu(&n->dev_list);
+               write_lock(&n->lock);
+               neigh_del_timer(n);
+               neigh_mark_dead(n);
+               if (refcount_read(&n->refcnt) != 1) {
+                       /* The most unpleasant situation.
+                        * We must destroy neighbour entry,
+                        * but someone still uses it.
+                        *
+                        * The destroy will be delayed until
+                        * the last user releases us, but
+                        * we must kill timers etc. and move
+                        * it to safe state.
+                        */
+                       __skb_queue_purge(&n->arp_queue);
+                       n->arp_queue_len_bytes = 0;
+                       WRITE_ONCE(n->output, neigh_blackhole);
+                       if (n->nud_state & NUD_VALID)
+                               n->nud_state = NUD_NOARP;
+                       else
+                               n->nud_state = NUD_NONE;
+                       neigh_dbg(2, "neigh %p is stray\n", n);
                }
+               write_unlock(&n->lock);
+               neigh_cleanup_and_release(n);
        }
 }
 
@@ -655,6 +669,10 @@ ___neigh_create(struct neigh_table *tbl, const void *pkey,
        if (want_ref)
                neigh_hold(n);
        hlist_add_head_rcu(&n->hash, &nht->hash_heads[hash_val]);
+
+       hlist_add_head_rcu(&n->dev_list,
+                          neigh_get_dev_table(dev, tbl->family));
+
        write_unlock_bh(&tbl->lock);
        neigh_dbg(2, "neigh %p is created\n", n);
        rc = n;
@@ -935,6 +953,7 @@ static void neigh_periodic_work(struct work_struct *work)
                             !time_in_range_open(jiffies, n->used,
                                                 n->used + NEIGH_VAR(n->parms, GC_STALETIME)))) {
                                hlist_del_rcu(&n->hash);
+                               hlist_del_rcu(&n->dev_list);
                                neigh_mark_dead(n);
                                write_unlock(&n->lock);
                                neigh_cleanup_and_release(n);
@@ -3054,6 +3073,7 @@ void __neigh_for_each_release(struct neigh_table *tbl,
                        release = cb(n);
                        if (release) {
                                hlist_del_rcu(&n->hash);
+                               hlist_del_rcu(&n->dev_list);
                                neigh_mark_dead(n);
                        }
                        write_unlock(&n->lock);