]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
net: core: neighbour: Call __neigh_notify() under a lock
authorPetr Machata <petrm@nvidia.com>
Wed, 21 Jan 2026 16:43:36 +0000 (17:43 +0100)
committerJakub Kicinski <kuba@kernel.org>
Sun, 25 Jan 2026 22:57:36 +0000 (14:57 -0800)
Andy Roulin has described an issue with the current neighbor notification
scheme as follows. This was also presented publicly at the link below.

    neigh_update sends a rtnl notification if an update, e.g.,
    nud_state change, was done but there is no guarantee of
    ordering of the rtnl notifications. Consider the following
    scenario:

    userspace thread                   kernel thread
    ================                   =============
    neigh_update
       write_lock_bh(n->lock)
       n->nud_state = STALE
       write_unlock_bh(n->lock)
       neigh_notify
         neigh_fill_info
           read_lock_bh(n->lock)
           ndm->nud_state = STALE
           read_unlock_bh(n->lock)
         -------------------------->
                                      neigh:update
                                      write_lock_bh(n->lock)
                                      n->nud_state = REACHABLE
                                      write_unlock_bh(n->lock)
                                      neigh_notify
                                        neigh_fill_info
                                           read_lock_bh(n->lock)
                                           ndm->nud_state = REACHABLE
                                           read_unlock_bh(n->lock)
                                        rtnl_nofify
                                      RTNL REACHABLE sent
                            <--------
        rtnl_notify
        RTNL STALE sent

    In this scenario, the kernel neigh is updated first to STALE and
    then REACHABLE but the netlink notifications are sent out of order,
    first REACHABLE and then STALE.

The solution is to send the netlink message inside the same critical
section that formats the message. That way both the contents and ordering
of the message reflect the same state, and we cannot see the abovementioned
out-of-order delivery.

Even with this patch, a remaining issue that the contents of the message
may not reflect the changes made to the neighbor. A kernel thread might
still interrupt a userspace thread after the change is done, but before
formatting and sending the message. Then what we would see is two messages
with the same contents. The following patches will attempt to address that
issue.

To support those future patches, convert __neigh_notify() to a helper that
assumes that the neighbor lock is already taken by having it call
__neigh_fill_info() instead of neigh_fill_info(). Add a new helper,
neigh_notify(), which takes the lock before calling __neigh_notify().
Migrate all callers to use the latter.

Link: https://lore.kernel.org/netdev/ed6768c1-80b8-aee2-e545-b51661d49336@nvidia.com/
Signed-off-by: Petr Machata <petrm@nvidia.com>
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Link: https://patch.msgid.link/4b4368dcc5f5a7e407009cb6c36b69cfb5282864.1769012464.git.petrm@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
net/core/neighbour.c

index 6cdd93dfa3ea9dd8b2336245580a9faaec07da3f..66f4a652510649a57654c3818c7c6eab89fff217 100644 (file)
@@ -51,8 +51,7 @@ do {                                          \
 #define PNEIGH_HASHMASK                0xF
 
 static void neigh_timer_handler(struct timer_list *t);
-static void __neigh_notify(struct neighbour *n, int type, int flags,
-                          u32 pid);
+static void neigh_notify(struct neighbour *n, int type, int flags, u32 pid);
 static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid);
 static void pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev,
                          bool skip_perm);
@@ -117,7 +116,7 @@ static int neigh_blackhole(struct neighbour *neigh, struct sk_buff *skb)
 static void neigh_cleanup_and_release(struct neighbour *neigh)
 {
        trace_neigh_cleanup_and_release(neigh, 0);
-       __neigh_notify(neigh, RTM_DELNEIGH, 0, 0);
+       neigh_notify(neigh, RTM_DELNEIGH, 0, 0);
        call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
        neigh_release(neigh);
 }
@@ -2740,7 +2739,7 @@ nla_put_failure:
 static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid)
 {
        call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
-       __neigh_notify(neigh, RTM_NEWNEIGH, 0, nlmsg_pid);
+       neigh_notify(neigh, RTM_NEWNEIGH, 0, nlmsg_pid);
 }
 
 static bool neigh_master_filtered(struct net_device *dev, int master_idx)
@@ -3555,7 +3554,7 @@ static void __neigh_notify(struct neighbour *n, int type, int flags,
        if (skb == NULL)
                goto errout;
 
-       err = neigh_fill_info(skb, n, pid, 0, type, flags);
+       err = __neigh_fill_info(skb, n, pid, 0, type, flags);
        if (err < 0) {
                /* -EMSGSIZE implies BUG in neigh_nlmsg_size() */
                WARN_ON(err == -EMSGSIZE);
@@ -3570,9 +3569,16 @@ out:
        rcu_read_unlock();
 }
 
+static void neigh_notify(struct neighbour *neigh, int type, int flags, u32 pid)
+{
+       read_lock_bh(&neigh->lock);
+       __neigh_notify(neigh, type, flags, pid);
+       read_unlock_bh(&neigh->lock);
+}
+
 void neigh_app_ns(struct neighbour *n)
 {
-       __neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST, 0);
+       neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST, 0);
 }
 EXPORT_SYMBOL(neigh_app_ns);