From ef5a35b7818acf9f0a047a12dcb7760cf2006d6e Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Tue, 17 Nov 2020 19:33:52 +0200 Subject: [PATCH] mlxsw: core: Use variable timeout for EMAD retries [ Upstream commit 1f492eab67bced119a0ac7db75ef2047e29a30c6 ] The driver sends Ethernet Management Datagram (EMAD) packets to the device for configuration purposes and waits for up to 200ms for a reply. A request is retried up to 5 times. When the system is under heavy load, replies are not always processed in time and EMAD transactions fail. Make the process more robust to such delays by using exponential backoff. First wait for up to 200ms, then retransmit and wait for up to 400ms and so on. Fixes: caf7297e7ab5 ("mlxsw: core: Introduce support for asynchronous EMAD register access") Reported-by: Denis Yulevich Tested-by: Denis Yulevich Signed-off-by: Ido Schimmel Reviewed-by: Jiri Pirko Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlxsw/core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c index 05e90ef15871c..5dab77ebd73e0 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core.c @@ -558,7 +558,8 @@ static void mlxsw_emad_trans_timeout_schedule(struct mlxsw_reg_trans *trans) if (trans->core->fw_flash_in_progress) timeout = msecs_to_jiffies(MLXSW_EMAD_TIMEOUT_DURING_FW_FLASH_MS); - queue_delayed_work(trans->core->emad_wq, &trans->timeout_dw, timeout); + queue_delayed_work(trans->core->emad_wq, &trans->timeout_dw, + timeout << trans->retries); } static int mlxsw_emad_transmit(struct mlxsw_core *mlxsw_core, -- 2.47.2