]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
net/mlx5: HWS, set timeout on polling for completion
authorYevgeny Kliteynik <kliteyn@nvidia.com>
Thu, 2 Jan 2025 18:14:14 +0000 (20:14 +0200)
committerJakub Kicinski <kuba@kernel.org>
Tue, 7 Jan 2025 00:33:41 +0000 (16:33 -0800)
Consolidate BWC polling for completion into one function
and set a time limit on the loop that polls for completion.
This can happen only if there is some issue with FW/PCI/HW,
such as FW being stuck, PCI issue, etc.

Signed-off-by: Yevgeny Kliteynik <kliteyn@nvidia.com>
Reviewed-by: Itamar Gozlan <igozlan@nvidia.com>
Reviewed-by: Mark Bloch <mbloch@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Link: https://patch.msgid.link/20250102181415.1477316-16-tariqt@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.c
drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.h

index 40d688ed6153a376ba0b2b4307608a018fcce734..a8d886e9214489aa6154c7e079ce50e28b70793f 100644 (file)
@@ -219,6 +219,8 @@ static int hws_bwc_queue_poll(struct mlx5hws_context *ctx,
                              u32 *pending_rules,
                              bool drain)
 {
+       unsigned long timeout = jiffies +
+                               msecs_to_jiffies(MLX5HWS_BWC_POLLING_TIMEOUT * MSEC_PER_SEC);
        struct mlx5hws_flow_op_result comp[MLX5HWS_BWC_MATCHER_REHASH_BURST_TH];
        u16 burst_th = hws_bwc_get_burst_th(ctx, queue_id);
        bool got_comp = *pending_rules >= burst_th;
@@ -254,6 +256,11 @@ static int hws_bwc_queue_poll(struct mlx5hws_context *ctx,
                }
 
                got_comp = !!ret;
+
+               if (unlikely(!got_comp && time_after(jiffies, timeout))) {
+                       mlx5hws_err(ctx, "BWC poll error: polling queue %d - TIMEOUT\n", queue_id);
+                       return -ETIMEDOUT;
+               }
        }
 
        return err;
@@ -338,22 +345,21 @@ hws_bwc_rule_destroy_hws_sync(struct mlx5hws_bwc_rule *bwc_rule,
                              struct mlx5hws_rule_attr *rule_attr)
 {
        struct mlx5hws_context *ctx = bwc_rule->bwc_matcher->matcher->tbl->ctx;
-       struct mlx5hws_flow_op_result completion;
+       u32 expected_completions = 1;
        int ret;
 
        ret = hws_bwc_rule_destroy_hws_async(bwc_rule, rule_attr);
        if (unlikely(ret))
                return ret;
 
-       do {
-               ret = mlx5hws_send_queue_poll(ctx, rule_attr->queue_id, &completion, 1);
-       } while (ret != 1);
-
-       if (unlikely(completion.status != MLX5HWS_FLOW_OP_SUCCESS ||
-                    (bwc_rule->rule->status != MLX5HWS_RULE_STATUS_DELETED &&
-                     bwc_rule->rule->status != MLX5HWS_RULE_STATUS_DELETING))) {
-               mlx5hws_err(ctx, "Failed destroying BWC rule: completion %d, rule status %d\n",
-                           completion.status, bwc_rule->rule->status);
+       ret = hws_bwc_queue_poll(ctx, rule_attr->queue_id, &expected_completions, true);
+       if (unlikely(ret))
+               return ret;
+
+       if (unlikely(bwc_rule->rule->status != MLX5HWS_RULE_STATUS_DELETED &&
+                    bwc_rule->rule->status != MLX5HWS_RULE_STATUS_DELETING)) {
+               mlx5hws_err(ctx, "Failed destroying BWC rule: rule status %d\n",
+                           bwc_rule->rule->status);
                return -EINVAL;
        }
 
index 06c2a30c0d4e7804adf599da21a2dde2285252d8..f9f569131ddebef5f4987f9eb6c8d443944a83a9 100644 (file)
@@ -18,6 +18,8 @@
 
 #define MLX5HWS_BWC_MAX_ACTS 16
 
+#define MLX5HWS_BWC_POLLING_TIMEOUT 60
+
 struct mlx5hws_bwc_matcher {
        struct mlx5hws_matcher *matcher;
        struct mlx5hws_match_template *mt;