From 28ea6036dad268a055b693d9c06a6f3d126096d3 Mon Sep 17 00:00:00 2001 From: Or Har-Toov Date: Thu, 18 Dec 2025 17:58:13 +0200 Subject: [PATCH] net/mlx5: Handle port and vport speed change events in MPESW Add port change event handling logic for MPESW LAG mode, ensuring VFs are updated when the speed of LAG physical ports changes. This triggers a speed update workflow when relevant port state changes occur, enabling consistent and accurate reporting of VF bandwidth. Signed-off-by: Or Har-Toov Reviewed-by: Maher Sanalla Reviewed-by: Mark Bloch Signed-off-by: Edward Srouji Signed-off-by: Leon Romanovsky --- .../net/ethernet/mellanox/mlx5/core/lag/lag.c | 38 ++++++++++++++++-- .../net/ethernet/mellanox/mlx5/core/lag/lag.h | 2 + .../ethernet/mellanox/mlx5/core/lag/mpesw.c | 39 +++++++++++++++++++ .../ethernet/mellanox/mlx5/core/lag/mpesw.h | 14 +++++++ .../net/ethernet/mellanox/mlx5/core/vport.c | 29 ++++++++++++++ include/linux/mlx5/driver.h | 1 + include/linux/mlx5/vport.h | 2 + 7 files changed, 121 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c index c9d943a230b59..2d6024ebe346a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c @@ -233,14 +233,25 @@ static void mlx5_ldev_free(struct kref *ref) { struct mlx5_lag *ldev = container_of(ref, struct mlx5_lag, ref); struct net *net; + int i; if (ldev->nb.notifier_call) { net = read_pnet(&ldev->net); unregister_netdevice_notifier_net(net, &ldev->nb); } + mlx5_ldev_for_each(i, 0, ldev) { + if (ldev->pf[i].dev && + ldev->pf[i].port_change_nb.nb.notifier_call) { + struct mlx5_nb *nb = &ldev->pf[i].port_change_nb; + + mlx5_eq_notifier_unregister(ldev->pf[i].dev, nb); + } + } + mlx5_lag_mp_cleanup(ldev); cancel_delayed_work_sync(&ldev->bond_work); + cancel_work_sync(&ldev->speed_update_work); destroy_workqueue(ldev->wq); mutex_destroy(&ldev->lock); kfree(ldev); @@ -274,6 +285,7 @@ static struct mlx5_lag *mlx5_lag_dev_alloc(struct mlx5_core_dev *dev) kref_init(&ldev->ref); mutex_init(&ldev->lock); INIT_DELAYED_WORK(&ldev->bond_work, mlx5_do_bond_work); + INIT_WORK(&ldev->speed_update_work, mlx5_mpesw_speed_update_work); ldev->nb.notifier_call = mlx5_lag_netdev_event; write_pnet(&ldev->net, mlx5_core_net(dev)); @@ -1033,6 +1045,13 @@ static int mlx5_lag_sum_devices_max_speed(struct mlx5_lag *ldev, u32 *max_speed) mlx5_port_max_linkspeed); } +static int mlx5_lag_sum_devices_oper_speed(struct mlx5_lag *ldev, + u32 *oper_speed) +{ + return mlx5_lag_sum_devices_speed(ldev, oper_speed, + mlx5_port_oper_linkspeed); +} + static void mlx5_lag_modify_device_vports_speed(struct mlx5_core_dev *mdev, u32 speed) { @@ -1070,10 +1089,14 @@ void mlx5_lag_set_vports_agg_speed(struct mlx5_lag *ldev) u32 speed; int pf_idx; - speed = ldev->tracker.bond_speed_mbps; - - if (speed == SPEED_UNKNOWN) - return; + if (ldev->mode == MLX5_LAG_MODE_MPESW) { + if (mlx5_lag_sum_devices_oper_speed(ldev, &speed)) + return; + } else { + speed = ldev->tracker.bond_speed_mbps; + if (speed == SPEED_UNKNOWN) + return; + } /* If speed is not set, use the sum of max speeds of all PFs */ if (!speed && mlx5_lag_sum_devices_max_speed(ldev, &speed)) @@ -1520,6 +1543,10 @@ static void mlx5_ldev_add_mdev(struct mlx5_lag *ldev, ldev->pf[fn].dev = dev; dev->priv.lag = ldev; + + MLX5_NB_INIT(&ldev->pf[fn].port_change_nb, + mlx5_lag_mpesw_port_change_event, PORT_CHANGE); + mlx5_eq_notifier_register(dev, &ldev->pf[fn].port_change_nb); } static void mlx5_ldev_remove_mdev(struct mlx5_lag *ldev, @@ -1531,6 +1558,9 @@ static void mlx5_ldev_remove_mdev(struct mlx5_lag *ldev, if (ldev->pf[fn].dev != dev) return; + if (ldev->pf[fn].port_change_nb.nb.notifier_call) + mlx5_eq_notifier_unregister(dev, &ldev->pf[fn].port_change_nb); + ldev->pf[fn].dev = NULL; dev->priv.lag = NULL; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h index 8de5640a01617..be1afece5fdc1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h @@ -39,6 +39,7 @@ struct lag_func { struct mlx5_core_dev *dev; struct net_device *netdev; bool has_drop; + struct mlx5_nb port_change_nb; }; /* Used for collection of netdev event info. */ @@ -67,6 +68,7 @@ struct mlx5_lag { struct lag_tracker tracker; struct workqueue_struct *wq; struct delayed_work bond_work; + struct work_struct speed_update_work; struct notifier_block nb; possible_net_t net; struct lag_mp lag_mp; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c index 2d86af8f0d9b8..04762562d7d9b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c @@ -110,6 +110,8 @@ static int enable_mpesw(struct mlx5_lag *ldev) goto err_rescan_drivers; } + mlx5_lag_set_vports_agg_speed(ldev); + return 0; err_rescan_drivers: @@ -223,3 +225,40 @@ bool mlx5_lag_is_mpesw(struct mlx5_core_dev *dev) return ldev && ldev->mode == MLX5_LAG_MODE_MPESW; } EXPORT_SYMBOL(mlx5_lag_is_mpesw); + +void mlx5_mpesw_speed_update_work(struct work_struct *work) +{ + struct mlx5_lag *ldev = container_of(work, struct mlx5_lag, + speed_update_work); + + mutex_lock(&ldev->lock); + if (ldev->mode == MLX5_LAG_MODE_MPESW) { + if (ldev->mode_changes_in_progress) + queue_work(ldev->wq, &ldev->speed_update_work); + else + mlx5_lag_set_vports_agg_speed(ldev); + } + + mutex_unlock(&ldev->lock); +} + +int mlx5_lag_mpesw_port_change_event(struct notifier_block *nb, + unsigned long event, void *data) +{ + struct mlx5_nb *mlx5_nb = container_of(nb, struct mlx5_nb, nb); + struct lag_func *lag_func = container_of(mlx5_nb, + struct lag_func, + port_change_nb); + struct mlx5_core_dev *dev = lag_func->dev; + struct mlx5_lag *ldev = dev->priv.lag; + struct mlx5_eqe *eqe = data; + + if (!ldev) + return NOTIFY_DONE; + + if (eqe->sub_type == MLX5_PORT_CHANGE_SUBTYPE_DOWN || + eqe->sub_type == MLX5_PORT_CHANGE_SUBTYPE_ACTIVE) + queue_work(ldev->wq, &ldev->speed_update_work); + + return NOTIFY_OK; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.h b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.h index 02520f27a033c..f5d9b5c97b0da 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.h @@ -32,4 +32,18 @@ bool mlx5_lag_is_mpesw(struct mlx5_core_dev *dev); void mlx5_lag_mpesw_disable(struct mlx5_core_dev *dev); int mlx5_lag_mpesw_enable(struct mlx5_core_dev *dev); +#ifdef CONFIG_MLX5_ESWITCH +void mlx5_mpesw_speed_update_work(struct work_struct *work); +int mlx5_lag_mpesw_port_change_event(struct notifier_block *nb, + unsigned long event, void *data); +#else +static inline void mlx5_mpesw_speed_update_work(struct work_struct *work) {} +static inline int mlx5_lag_mpesw_port_change_event(struct notifier_block *nb, + unsigned long event, + void *data) +{ + return NOTIFY_DONE; +} +#endif /* CONFIG_MLX5_ESWITCH */ + #endif /* __MLX5_LAG_MPESW_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c index 78b1b291cfa44..cb098d3eb2faf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c @@ -122,6 +122,35 @@ int mlx5_modify_vport_max_tx_speed(struct mlx5_core_dev *mdev, u8 opmod, return mlx5_cmd_exec_in(mdev, modify_vport_state, in); } +int mlx5_query_vport_max_tx_speed(struct mlx5_core_dev *mdev, u8 op_mod, + u16 vport, u8 other_vport, u32 *max_tx_speed) +{ + u32 out[MLX5_ST_SZ_DW(query_vport_state_out)] = {}; + u32 in[MLX5_ST_SZ_DW(query_vport_state_in)] = {}; + u32 state; + int err; + + MLX5_SET(query_vport_state_in, in, opcode, + MLX5_CMD_OP_QUERY_VPORT_STATE); + MLX5_SET(query_vport_state_in, in, op_mod, op_mod); + MLX5_SET(query_vport_state_in, in, vport_number, vport); + MLX5_SET(query_vport_state_in, in, other_vport, other_vport); + + err = mlx5_cmd_exec_inout(mdev, query_vport_state, in, out); + if (err) + return err; + + state = MLX5_GET(query_vport_state_out, out, state); + if (state == VPORT_STATE_DOWN) { + *max_tx_speed = 0; + return 0; + } + + *max_tx_speed = MLX5_GET(query_vport_state_out, out, max_tx_speed); + return 0; +} +EXPORT_SYMBOL_GPL(mlx5_query_vport_max_tx_speed); + static int mlx5_query_nic_vport_context(struct mlx5_core_dev *mdev, u16 vport, bool other_vport, u32 *out) { diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 1c54aa6f74fbc..9e0ab3cfab738 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1149,6 +1149,7 @@ int mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev *dev); bool mlx5_lag_is_roce(struct mlx5_core_dev *dev); bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev); bool mlx5_lag_is_active(struct mlx5_core_dev *dev); +int mlx5_lag_query_bond_speed(struct net_device *bond_dev, u32 *speed); bool mlx5_lag_mode_is_hash(struct mlx5_core_dev *dev); bool mlx5_lag_is_master(struct mlx5_core_dev *dev); bool mlx5_lag_is_shared_fdb(struct mlx5_core_dev *dev); diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h index 2acf10e9f60a2..dfa2fe32217af 100644 --- a/include/linux/mlx5/vport.h +++ b/include/linux/mlx5/vport.h @@ -60,6 +60,8 @@ enum { u8 mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport); int mlx5_modify_vport_admin_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport, u8 other_vport, u8 state); +int mlx5_query_vport_max_tx_speed(struct mlx5_core_dev *mdev, u8 op_mod, + u16 vport, u8 other_vport, u32 *max_tx_speed); int mlx5_modify_vport_max_tx_speed(struct mlx5_core_dev *mdev, u8 opmod, u16 vport, u8 other_vport, u16 max_tx_speed); int mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev, -- 2.47.3