]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
net: bonding: use workqueue to make sure peer notify updated in lacp mode
authorTonghao Zhang <tonghao@bamaicloud.com>
Sun, 18 Jan 2026 04:21:11 +0000 (12:21 +0800)
committerPaolo Abeni <pabeni@redhat.com>
Thu, 22 Jan 2026 10:20:33 +0000 (11:20 +0100)
The rtnl lock might be locked, preventing ad_cond_set_peer_notif() from
acquiring the lock and updating send_peer_notif. This patch addresses
the issue by using a workqueue. Since updating send_peer_notif does
not require high real-time performance, such delayed updates are entirely
acceptable.

In fact, checking this value and using it in multiple places, all operations
are protected at the same time by rtnl lock, such as
- read send_peer_notif
- send_peer_notif--
- bond_should_notify_peers

By the way, rtnl lock is still required, when accessing bond.params.* for
updating send_peer_notif. In lacp mode, resetting send_peer_notif in
workqueue is safe, simple and effective way.

Additionally, this patch introduces bond_peer_notify_may_events(), which
is used to check whether an event should be sent. This function will be
used in both patch 1 and 2.

Cc: Jay Vosburgh <jv@jvosburgh.net>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Jakub Kicinski <kuba@kernel.org>
Cc: Paolo Abeni <pabeni@redhat.com>
Cc: Simon Horman <horms@kernel.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Andrew Lunn <andrew+netdev@lunn.ch>
Cc: Nikolay Aleksandrov <razor@blackwall.org>
Cc: Hangbin Liu <liuhangbin@gmail.com>
Cc: Jason Xing <kerneljasonxing@gmail.com>
Suggested-by: Hangbin Liu <liuhangbin@gmail.com>
Signed-off-by: Tonghao Zhang <tonghao@bamaicloud.com>
Reviewed-by: Hangbin Liu <liuhangbin@gmail.com>
Link: https://patch.msgid.link/f95accb5db0b10ce3ed2f834fc70f716c9abbb9c.1768709239.git.tonghao@bamaicloud.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
drivers/net/bonding/bond_3ad.c
drivers/net/bonding/bond_main.c
include/net/bonding.h

index be10d31abe0c24dc6b0a7b6c1bc310628e0f7bf8..af7f74cfdc086c3e1f1565d5e930e356902b1b99 100644 (file)
@@ -1017,11 +1017,8 @@ static void ad_cond_set_peer_notif(struct port *port)
 {
        struct bonding *bond = port->slave->bond;
 
-       if (bond->params.broadcast_neighbor && rtnl_trylock()) {
-               bond->send_peer_notif = bond->params.num_peer_notif *
-                       max(1, bond->params.peer_notif_delay);
-               rtnl_unlock();
-       }
+       if (bond->params.broadcast_neighbor)
+               bond_peer_notify_work_rearm(bond, 0);
 }
 
 /**
index 3d56339a8a10d97664e6d8cb8b41a681e6e9efc5..353b0c8a0ca720905b9c4bccc4883765cdbe4544 100644 (file)
@@ -1195,6 +1195,48 @@ static bool bond_should_notify_peers(struct bonding *bond)
        return true;
 }
 
+/* Use this to update send_peer_notif when RTNL may be held in other places. */
+void bond_peer_notify_work_rearm(struct bonding *bond, unsigned long delay)
+{
+       queue_delayed_work(bond->wq, &bond->peer_notify_work, delay);
+}
+
+/* Peer notify update handler. Holds only RTNL */
+static void bond_peer_notify_reset(struct bonding *bond)
+{
+       bond->send_peer_notif = bond->params.num_peer_notif *
+               max(1, bond->params.peer_notif_delay);
+}
+
+static void bond_peer_notify_handler(struct work_struct *work)
+{
+       struct bonding *bond = container_of(work, struct bonding,
+                                           peer_notify_work.work);
+
+       if (!rtnl_trylock()) {
+               bond_peer_notify_work_rearm(bond, 1);
+               return;
+       }
+
+       bond_peer_notify_reset(bond);
+
+       rtnl_unlock();
+}
+
+/* Peer notify events post. Holds only RTNL */
+static void bond_peer_notify_may_events(struct bonding *bond, bool force)
+{
+       bool notified = false;
+
+       if (bond_should_notify_peers(bond)) {
+               notified = true;
+               call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, bond->dev);
+       }
+
+       if (notified || force)
+               bond->send_peer_notif--;
+}
+
 /**
  * bond_change_active_slave - change the active slave into the specified one
  * @bond: our bonding struct
@@ -1270,8 +1312,6 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active)
                                                      BOND_SLAVE_NOTIFY_NOW);
 
                if (new_active) {
-                       bool should_notify_peers = false;
-
                        bond_set_slave_active_flags(new_active,
                                                    BOND_SLAVE_NOTIFY_NOW);
 
@@ -1279,19 +1319,11 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active)
                                bond_do_fail_over_mac(bond, new_active,
                                                      old_active);
 
-                       if (netif_running(bond->dev)) {
-                               bond->send_peer_notif =
-                                       bond->params.num_peer_notif *
-                                       max(1, bond->params.peer_notif_delay);
-                               should_notify_peers =
-                                       bond_should_notify_peers(bond);
-                       }
-
                        call_netdevice_notifiers(NETDEV_BONDING_FAILOVER, bond->dev);
-                       if (should_notify_peers) {
-                               bond->send_peer_notif--;
-                               call_netdevice_notifiers(NETDEV_NOTIFY_PEERS,
-                                                        bond->dev);
+
+                       if (netif_running(bond->dev)) {
+                               bond_peer_notify_reset(bond);
+                               bond_peer_notify_may_events(bond, false);
                        }
                }
        }
@@ -4213,6 +4245,10 @@ static u32 bond_xmit_hash_xdp(struct bonding *bond, struct xdp_buff *xdp)
 
 void bond_work_init_all(struct bonding *bond)
 {
+       /* ndo_stop, bond_close() will try to flush the work under
+        * the rtnl lock. The workqueue must not block on rtnl lock
+        * to avoid deadlock.
+        */
        INIT_DELAYED_WORK(&bond->mcast_work,
                          bond_resend_igmp_join_requests_delayed);
        INIT_DELAYED_WORK(&bond->alb_work, bond_alb_monitor);
@@ -4220,6 +4256,7 @@ void bond_work_init_all(struct bonding *bond)
        INIT_DELAYED_WORK(&bond->arp_work, bond_arp_monitor);
        INIT_DELAYED_WORK(&bond->ad_work, bond_3ad_state_machine_handler);
        INIT_DELAYED_WORK(&bond->slave_arr_work, bond_slave_arr_handler);
+       INIT_DELAYED_WORK(&bond->peer_notify_work, bond_peer_notify_handler);
 }
 
 void bond_work_cancel_all(struct bonding *bond)
@@ -4230,6 +4267,7 @@ void bond_work_cancel_all(struct bonding *bond)
        cancel_delayed_work_sync(&bond->ad_work);
        cancel_delayed_work_sync(&bond->mcast_work);
        cancel_delayed_work_sync(&bond->slave_arr_work);
+       cancel_delayed_work_sync(&bond->peer_notify_work);
 }
 
 static int bond_open(struct net_device *bond_dev)
index 49edc7da05867f355f880329817bdc2a371f226b..63d08056a4a4bddd59c7a525e13f3daaba5203ed 100644 (file)
@@ -254,6 +254,7 @@ struct bonding {
        struct   delayed_work ad_work;
        struct   delayed_work mcast_work;
        struct   delayed_work slave_arr_work;
+       struct   delayed_work peer_notify_work;
 #ifdef CONFIG_DEBUG_FS
        /* debugging support via debugfs */
        struct   dentry *debug_dir;
@@ -709,6 +710,7 @@ struct bond_vlan_tag *bond_verify_device_path(struct net_device *start_dev,
                                              int level);
 int bond_update_slave_arr(struct bonding *bond, struct slave *skipslave);
 void bond_slave_arr_work_rearm(struct bonding *bond, unsigned long delay);
+void bond_peer_notify_work_rearm(struct bonding *bond, unsigned long delay);
 void bond_work_init_all(struct bonding *bond);
 void bond_work_cancel_all(struct bonding *bond);