]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
net/mlx5: Handle port and vport speed change events in MPESW
authorOr Har-Toov <ohartoov@nvidia.com>
Thu, 18 Dec 2025 15:58:13 +0000 (17:58 +0200)
committerLeon Romanovsky <leon@kernel.org>
Mon, 5 Jan 2026 07:38:25 +0000 (02:38 -0500)
Add port change event handling logic for MPESW LAG mode, ensuring
VFs are updated when the speed of LAG physical ports changes.
This triggers a speed update workflow when relevant port state changes
occur, enabling consistent and accurate reporting of VF bandwidth.

Signed-off-by: Or Har-Toov <ohartoov@nvidia.com>
Reviewed-by: Maher Sanalla <msanalla@nvidia.com>
Reviewed-by: Mark Bloch <mbloch@nvidia.com>
Signed-off-by: Edward Srouji <edwards@nvidia.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h
drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c
drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.h
drivers/net/ethernet/mellanox/mlx5/core/vport.c
include/linux/mlx5/driver.h
include/linux/mlx5/vport.h

index c9d943a230b590ff97aae7a943e49941f4e2c410..2d6024ebe346a164e3945c1aa185dc8f7817064b 100644 (file)
@@ -233,14 +233,25 @@ static void mlx5_ldev_free(struct kref *ref)
 {
        struct mlx5_lag *ldev = container_of(ref, struct mlx5_lag, ref);
        struct net *net;
+       int i;
 
        if (ldev->nb.notifier_call) {
                net = read_pnet(&ldev->net);
                unregister_netdevice_notifier_net(net, &ldev->nb);
        }
 
+       mlx5_ldev_for_each(i, 0, ldev) {
+               if (ldev->pf[i].dev &&
+                   ldev->pf[i].port_change_nb.nb.notifier_call) {
+                       struct mlx5_nb *nb = &ldev->pf[i].port_change_nb;
+
+                       mlx5_eq_notifier_unregister(ldev->pf[i].dev, nb);
+               }
+       }
+
        mlx5_lag_mp_cleanup(ldev);
        cancel_delayed_work_sync(&ldev->bond_work);
+       cancel_work_sync(&ldev->speed_update_work);
        destroy_workqueue(ldev->wq);
        mutex_destroy(&ldev->lock);
        kfree(ldev);
@@ -274,6 +285,7 @@ static struct mlx5_lag *mlx5_lag_dev_alloc(struct mlx5_core_dev *dev)
        kref_init(&ldev->ref);
        mutex_init(&ldev->lock);
        INIT_DELAYED_WORK(&ldev->bond_work, mlx5_do_bond_work);
+       INIT_WORK(&ldev->speed_update_work, mlx5_mpesw_speed_update_work);
 
        ldev->nb.notifier_call = mlx5_lag_netdev_event;
        write_pnet(&ldev->net, mlx5_core_net(dev));
@@ -1033,6 +1045,13 @@ static int mlx5_lag_sum_devices_max_speed(struct mlx5_lag *ldev, u32 *max_speed)
                                          mlx5_port_max_linkspeed);
 }
 
+static int mlx5_lag_sum_devices_oper_speed(struct mlx5_lag *ldev,
+                                          u32 *oper_speed)
+{
+       return mlx5_lag_sum_devices_speed(ldev, oper_speed,
+                                         mlx5_port_oper_linkspeed);
+}
+
 static void mlx5_lag_modify_device_vports_speed(struct mlx5_core_dev *mdev,
                                                u32 speed)
 {
@@ -1070,10 +1089,14 @@ void mlx5_lag_set_vports_agg_speed(struct mlx5_lag *ldev)
        u32 speed;
        int pf_idx;
 
-       speed = ldev->tracker.bond_speed_mbps;
-
-       if (speed == SPEED_UNKNOWN)
-               return;
+       if (ldev->mode == MLX5_LAG_MODE_MPESW) {
+               if (mlx5_lag_sum_devices_oper_speed(ldev, &speed))
+                       return;
+       } else {
+               speed = ldev->tracker.bond_speed_mbps;
+               if (speed == SPEED_UNKNOWN)
+                       return;
+       }
 
        /* If speed is not set, use the sum of max speeds of all PFs */
        if (!speed && mlx5_lag_sum_devices_max_speed(ldev, &speed))
@@ -1520,6 +1543,10 @@ static void mlx5_ldev_add_mdev(struct mlx5_lag *ldev,
 
        ldev->pf[fn].dev = dev;
        dev->priv.lag = ldev;
+
+       MLX5_NB_INIT(&ldev->pf[fn].port_change_nb,
+                    mlx5_lag_mpesw_port_change_event, PORT_CHANGE);
+       mlx5_eq_notifier_register(dev, &ldev->pf[fn].port_change_nb);
 }
 
 static void mlx5_ldev_remove_mdev(struct mlx5_lag *ldev,
@@ -1531,6 +1558,9 @@ static void mlx5_ldev_remove_mdev(struct mlx5_lag *ldev,
        if (ldev->pf[fn].dev != dev)
                return;
 
+       if (ldev->pf[fn].port_change_nb.nb.notifier_call)
+               mlx5_eq_notifier_unregister(dev, &ldev->pf[fn].port_change_nb);
+
        ldev->pf[fn].dev = NULL;
        dev->priv.lag = NULL;
 }
index 8de5640a01617993f7cf890010b9d1b177e8be53..be1afece5fdc194c7e65256a615f7107e581ca2c 100644 (file)
@@ -39,6 +39,7 @@ struct lag_func {
        struct mlx5_core_dev *dev;
        struct net_device    *netdev;
        bool has_drop;
+       struct mlx5_nb port_change_nb;
 };
 
 /* Used for collection of netdev event info. */
@@ -67,6 +68,7 @@ struct mlx5_lag {
        struct lag_tracker        tracker;
        struct workqueue_struct   *wq;
        struct delayed_work       bond_work;
+       struct work_struct        speed_update_work;
        struct notifier_block     nb;
        possible_net_t net;
        struct lag_mp             lag_mp;
index 2d86af8f0d9b818d4f6bbc6a009c6dea657d31be..04762562d7d9b3a369bf15ca4f001b5c638aa988 100644 (file)
@@ -110,6 +110,8 @@ static int enable_mpesw(struct mlx5_lag *ldev)
                        goto err_rescan_drivers;
        }
 
+       mlx5_lag_set_vports_agg_speed(ldev);
+
        return 0;
 
 err_rescan_drivers:
@@ -223,3 +225,40 @@ bool mlx5_lag_is_mpesw(struct mlx5_core_dev *dev)
        return ldev && ldev->mode == MLX5_LAG_MODE_MPESW;
 }
 EXPORT_SYMBOL(mlx5_lag_is_mpesw);
+
+void mlx5_mpesw_speed_update_work(struct work_struct *work)
+{
+       struct mlx5_lag *ldev = container_of(work, struct mlx5_lag,
+                                            speed_update_work);
+
+       mutex_lock(&ldev->lock);
+       if (ldev->mode == MLX5_LAG_MODE_MPESW) {
+               if (ldev->mode_changes_in_progress)
+                       queue_work(ldev->wq, &ldev->speed_update_work);
+               else
+                       mlx5_lag_set_vports_agg_speed(ldev);
+       }
+
+       mutex_unlock(&ldev->lock);
+}
+
+int mlx5_lag_mpesw_port_change_event(struct notifier_block *nb,
+                                    unsigned long event, void *data)
+{
+       struct mlx5_nb *mlx5_nb = container_of(nb, struct mlx5_nb, nb);
+       struct lag_func *lag_func = container_of(mlx5_nb,
+                                                struct lag_func,
+                                                port_change_nb);
+       struct mlx5_core_dev *dev = lag_func->dev;
+       struct mlx5_lag *ldev = dev->priv.lag;
+       struct mlx5_eqe *eqe = data;
+
+       if (!ldev)
+               return NOTIFY_DONE;
+
+       if (eqe->sub_type == MLX5_PORT_CHANGE_SUBTYPE_DOWN ||
+           eqe->sub_type == MLX5_PORT_CHANGE_SUBTYPE_ACTIVE)
+               queue_work(ldev->wq, &ldev->speed_update_work);
+
+       return NOTIFY_OK;
+}
index 02520f27a033c77442c6937ccc04d7d22fdb876a..f5d9b5c97b0da83702be56f7a0886e6eef496259 100644 (file)
@@ -32,4 +32,18 @@ bool mlx5_lag_is_mpesw(struct mlx5_core_dev *dev);
 void mlx5_lag_mpesw_disable(struct mlx5_core_dev *dev);
 int mlx5_lag_mpesw_enable(struct mlx5_core_dev *dev);
 
+#ifdef CONFIG_MLX5_ESWITCH
+void mlx5_mpesw_speed_update_work(struct work_struct *work);
+int mlx5_lag_mpesw_port_change_event(struct notifier_block *nb,
+                                    unsigned long event, void *data);
+#else
+static inline void mlx5_mpesw_speed_update_work(struct work_struct *work) {}
+static inline int mlx5_lag_mpesw_port_change_event(struct notifier_block *nb,
+                                                  unsigned long event,
+                                                  void *data)
+{
+       return NOTIFY_DONE;
+}
+#endif /* CONFIG_MLX5_ESWITCH */
+
 #endif /* __MLX5_LAG_MPESW_H__ */
index 78b1b291cfa44eb7892ae534f62622e2c1ec2338..cb098d3eb2faf25c91a594d28bc2c594a58601f5 100644 (file)
@@ -122,6 +122,35 @@ int mlx5_modify_vport_max_tx_speed(struct mlx5_core_dev *mdev, u8 opmod,
        return mlx5_cmd_exec_in(mdev, modify_vport_state, in);
 }
 
+int mlx5_query_vport_max_tx_speed(struct mlx5_core_dev *mdev, u8 op_mod,
+                                 u16 vport, u8 other_vport, u32 *max_tx_speed)
+{
+       u32 out[MLX5_ST_SZ_DW(query_vport_state_out)] = {};
+       u32 in[MLX5_ST_SZ_DW(query_vport_state_in)] = {};
+       u32 state;
+       int err;
+
+       MLX5_SET(query_vport_state_in, in, opcode,
+                MLX5_CMD_OP_QUERY_VPORT_STATE);
+       MLX5_SET(query_vport_state_in, in, op_mod, op_mod);
+       MLX5_SET(query_vport_state_in, in, vport_number, vport);
+       MLX5_SET(query_vport_state_in, in, other_vport, other_vport);
+
+       err = mlx5_cmd_exec_inout(mdev, query_vport_state, in, out);
+       if (err)
+               return err;
+
+       state = MLX5_GET(query_vport_state_out, out, state);
+       if (state == VPORT_STATE_DOWN) {
+               *max_tx_speed = 0;
+               return 0;
+       }
+
+       *max_tx_speed = MLX5_GET(query_vport_state_out, out, max_tx_speed);
+       return 0;
+}
+EXPORT_SYMBOL_GPL(mlx5_query_vport_max_tx_speed);
+
 static int mlx5_query_nic_vport_context(struct mlx5_core_dev *mdev, u16 vport,
                                        bool other_vport, u32 *out)
 {
index 1c54aa6f74fbc656cf3d65dce5a4f4e41e0a6a4e..9e0ab3cfab7384153a61aa303a482a7ff6b1732e 100644 (file)
@@ -1149,6 +1149,7 @@ int mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev *dev);
 bool mlx5_lag_is_roce(struct mlx5_core_dev *dev);
 bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev);
 bool mlx5_lag_is_active(struct mlx5_core_dev *dev);
+int mlx5_lag_query_bond_speed(struct net_device *bond_dev, u32 *speed);
 bool mlx5_lag_mode_is_hash(struct mlx5_core_dev *dev);
 bool mlx5_lag_is_master(struct mlx5_core_dev *dev);
 bool mlx5_lag_is_shared_fdb(struct mlx5_core_dev *dev);
index 2acf10e9f60a2f9077984b7593666dd62ba39103..dfa2fe32217af0860826539a36b796a0f84f1e03 100644 (file)
@@ -60,6 +60,8 @@ enum {
 u8 mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport);
 int mlx5_modify_vport_admin_state(struct mlx5_core_dev *mdev, u8 opmod,
                                  u16 vport, u8 other_vport, u8 state);
+int mlx5_query_vport_max_tx_speed(struct mlx5_core_dev *mdev, u8 op_mod,
+                                 u16 vport, u8 other_vport, u32 *max_tx_speed);
 int mlx5_modify_vport_max_tx_speed(struct mlx5_core_dev *mdev, u8 opmod,
                                   u16 vport, u8 other_vport, u16 max_tx_speed);
 int mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev,