]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
5.4-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 20 Nov 2020 08:47:03 +0000 (09:47 +0100)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 20 Nov 2020 08:47:03 +0000 (09:47 +0100)
added patches:
mips-pci-fix-mips-build.patch
net-mlx5-add-retry-mechanism-to-the-command-entry-index-allocation.patch
net-mlx5-fix-a-race-when-moving-command-interface-to-events-mode.patch
net-mlx5-poll-cmd-eq-in-case-of-command-timeout.patch
net-mlx5-use-async-eq-setup-cleanup-helpers-for-multiple-eqs.patch

queue-5.4/mips-pci-fix-mips-build.patch [new file with mode: 0644]
queue-5.4/net-mlx5-add-retry-mechanism-to-the-command-entry-index-allocation.patch [new file with mode: 0644]
queue-5.4/net-mlx5-fix-a-race-when-moving-command-interface-to-events-mode.patch [new file with mode: 0644]
queue-5.4/net-mlx5-poll-cmd-eq-in-case-of-command-timeout.patch [new file with mode: 0644]
queue-5.4/net-mlx5-use-async-eq-setup-cleanup-helpers-for-multiple-eqs.patch [new file with mode: 0644]
queue-5.4/series

diff --git a/queue-5.4/mips-pci-fix-mips-build.patch b/queue-5.4/mips-pci-fix-mips-build.patch
new file mode 100644 (file)
index 0000000..219221b
--- /dev/null
@@ -0,0 +1,31 @@
+From 2711bea84e15a5a16d5ac694c9025890158a36dd Mon Sep 17 00:00:00 2001
+From: Sudip Mukherjee <sudipm.mukherjee@gmail.com>
+Date: Thu, 19 Nov 2020 10:26:33 +0000
+Subject: MIPS: PCI: Fix MIPS build
+
+From: Sudip Mukherjee <sudipm.mukherjee@gmail.com>
+
+While backporting 37640adbefd6 ("MIPS: PCI: remember nasid changed by
+set interrupt affinity") something went wrong and an extra 'n' was added.
+So 'data->nasid' became 'data->nnasid' and the MIPS builds started failing.
+
+This is only needed for 5.4-stable tree.
+
+Fixes: 957978aa56f1 ("MIPS: PCI: remember nasid changed by set interrupt affinity")
+Signed-off-by: Sudip Mukherjee <sudipm.mukherjee@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/mips/pci/pci-xtalk-bridge.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/mips/pci/pci-xtalk-bridge.c
++++ b/arch/mips/pci/pci-xtalk-bridge.c
+@@ -284,7 +284,7 @@ static int bridge_set_affinity(struct ir
+       ret = irq_chip_set_affinity_parent(d, mask, force);
+       if (ret >= 0) {
+               cpu = cpumask_first_and(mask, cpu_online_mask);
+-              data->nnasid = COMPACT_TO_NASID_NODEID(cpu_to_node(cpu));
++              data->nasid = COMPACT_TO_NASID_NODEID(cpu_to_node(cpu));
+               bridge_write(data->bc, b_int_addr[pin].addr,
+                            (((data->bc->intr_addr >> 30) & 0x30000) |
+                             bit | (data->nasid << 8)));
diff --git a/queue-5.4/net-mlx5-add-retry-mechanism-to-the-command-entry-index-allocation.patch b/queue-5.4/net-mlx5-add-retry-mechanism-to-the-command-entry-index-allocation.patch
new file mode 100644 (file)
index 0000000..ebb4522
--- /dev/null
@@ -0,0 +1,66 @@
+From 410bd754cd73c4a2ac3856d9a03d7b08f9c906bf Mon Sep 17 00:00:00 2001
+From: Eran Ben Elisha <eranbe@nvidia.com>
+Date: Mon, 31 Aug 2020 15:04:35 +0300
+Subject: net/mlx5: Add retry mechanism to the command entry index allocation
+
+From: Eran Ben Elisha <eranbe@nvidia.com>
+
+commit 410bd754cd73c4a2ac3856d9a03d7b08f9c906bf upstream.
+
+It is possible that new command entry index allocation will temporarily
+fail. The new command holds the semaphore, so it means that a free entry
+should be ready soon. Add one second retry mechanism before returning an
+error.
+
+Patch "net/mlx5: Avoid possible free of command entry while timeout comp
+handler" increase the possibility to bump into this temporarily failure
+as it delays the entry index release for non-callback commands.
+
+Fixes: e126ba97dba9 ("mlx5: Add driver for Mellanox Connect-IB adapters")
+Signed-off-by: Eran Ben Elisha <eranbe@nvidia.com>
+Reviewed-by: Moshe Shemesh <moshe@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Cc: Timo Rothenpieler <timo@rothenpieler.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/net/ethernet/mellanox/mlx5/core/cmd.c |   21 ++++++++++++++++++++-
+ 1 file changed, 20 insertions(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+@@ -883,6 +883,25 @@ static bool opcode_allowed(struct mlx5_c
+       return cmd->allowed_opcode == opcode;
+ }
++static int cmd_alloc_index_retry(struct mlx5_cmd *cmd)
++{
++      unsigned long alloc_end = jiffies + msecs_to_jiffies(1000);
++      int idx;
++
++retry:
++      idx = cmd_alloc_index(cmd);
++      if (idx < 0 && time_before(jiffies, alloc_end)) {
++              /* Index allocation can fail on heavy load of commands. This is a temporary
++               * situation as the current command already holds the semaphore, meaning that
++               * another command completion is being handled and it is expected to release
++               * the entry index soon.
++               */
++              cpu_relax();
++              goto retry;
++      }
++      return idx;
++}
++
+ static void cmd_work_handler(struct work_struct *work)
+ {
+       struct mlx5_cmd_work_ent *ent = container_of(work, struct mlx5_cmd_work_ent, work);
+@@ -900,7 +919,7 @@ static void cmd_work_handler(struct work
+       sem = ent->page_queue ? &cmd->pages_sem : &cmd->sem;
+       down(sem);
+       if (!ent->page_queue) {
+-              alloc_ret = cmd_alloc_index(cmd);
++              alloc_ret = cmd_alloc_index_retry(cmd);
+               if (alloc_ret < 0) {
+                       mlx5_core_err(dev, "failed to allocate command entry\n");
+                       if (ent->callback) {
diff --git a/queue-5.4/net-mlx5-fix-a-race-when-moving-command-interface-to-events-mode.patch b/queue-5.4/net-mlx5-fix-a-race-when-moving-command-interface-to-events-mode.patch
new file mode 100644 (file)
index 0000000..a5a6ee3
--- /dev/null
@@ -0,0 +1,166 @@
+From d43b7007dbd1195a5b6b83213e49b1516aaf6f5e Mon Sep 17 00:00:00 2001
+From: Eran Ben Elisha <eranbe@mellanox.com>
+Date: Wed, 18 Mar 2020 21:44:32 +0200
+Subject: net/mlx5: Fix a race when moving command interface to events mode
+
+From: Eran Ben Elisha <eranbe@mellanox.com>
+
+commit d43b7007dbd1195a5b6b83213e49b1516aaf6f5e upstream.
+
+After driver creates (via FW command) an EQ for commands, the driver will
+be informed on new commands completion by EQE. However, due to a race in
+driver's internal command mode metadata update, some new commands will
+still be miss-handled by driver as if we are in polling mode. Such commands
+can get two non forced completion, leading to already freed command entry
+access.
+
+CREATE_EQ command, that maps EQ to the command queue must be posted to the
+command queue while it is empty and no other command should be posted.
+
+Add SW mechanism that once the CREATE_EQ command is about to be executed,
+all other commands will return error without being sent to the FW. Allow
+sending other commands only after successfully changing the driver's
+internal command mode metadata.
+We can safely return error to all other commands while creating the command
+EQ, as all other commands might be sent from the user/application during
+driver load. Application can rerun them later after driver's load was
+finished.
+
+Fixes: e126ba97dba9 ("mlx5: Add driver for Mellanox Connect-IB adapters")
+Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
+Signed-off-by: Moshe Shemesh <moshe@mellanox.com>
+Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
+Cc: Timo Rothenpieler <timo@rothenpieler.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/net/ethernet/mellanox/mlx5/core/cmd.c |   35 +++++++++++++++++++++++---
+ drivers/net/ethernet/mellanox/mlx5/core/eq.c  |    3 ++
+ include/linux/mlx5/driver.h                   |    6 ++++
+ 3 files changed, 40 insertions(+), 4 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+@@ -875,6 +875,14 @@ static void free_msg(struct mlx5_core_de
+ static void mlx5_free_cmd_msg(struct mlx5_core_dev *dev,
+                             struct mlx5_cmd_msg *msg);
++static bool opcode_allowed(struct mlx5_cmd *cmd, u16 opcode)
++{
++      if (cmd->allowed_opcode == CMD_ALLOWED_OPCODE_ALL)
++              return true;
++
++      return cmd->allowed_opcode == opcode;
++}
++
+ static void cmd_work_handler(struct work_struct *work)
+ {
+       struct mlx5_cmd_work_ent *ent = container_of(work, struct mlx5_cmd_work_ent, work);
+@@ -941,7 +949,8 @@ static void cmd_work_handler(struct work
+       /* Skip sending command to fw if internal error */
+       if (pci_channel_offline(dev->pdev) ||
+-          dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
++          dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR ||
++          !opcode_allowed(&dev->cmd, ent->op)) {
+               u8 status = 0;
+               u32 drv_synd;
+@@ -1459,6 +1468,22 @@ static void create_debugfs_files(struct
+       mlx5_cmdif_debugfs_init(dev);
+ }
++void mlx5_cmd_allowed_opcode(struct mlx5_core_dev *dev, u16 opcode)
++{
++      struct mlx5_cmd *cmd = &dev->cmd;
++      int i;
++
++      for (i = 0; i < cmd->max_reg_cmds; i++)
++              down(&cmd->sem);
++      down(&cmd->pages_sem);
++
++      cmd->allowed_opcode = opcode;
++
++      up(&cmd->pages_sem);
++      for (i = 0; i < cmd->max_reg_cmds; i++)
++              up(&cmd->sem);
++}
++
+ static void mlx5_cmd_change_mod(struct mlx5_core_dev *dev, int mode)
+ {
+       struct mlx5_cmd *cmd = &dev->cmd;
+@@ -1751,12 +1776,13 @@ static int cmd_exec(struct mlx5_core_dev
+       int err;
+       u8 status = 0;
+       u32 drv_synd;
++      u16 opcode;
+       u8 token;
++      opcode = MLX5_GET(mbox_in, in, opcode);
+       if (pci_channel_offline(dev->pdev) ||
+-          dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
+-              u16 opcode = MLX5_GET(mbox_in, in, opcode);
+-
++          dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR ||
++          !opcode_allowed(&dev->cmd, opcode)) {
+               err = mlx5_internal_err_ret_value(dev, opcode, &drv_synd, &status);
+               MLX5_SET(mbox_out, out, status, status);
+               MLX5_SET(mbox_out, out, syndrome, drv_synd);
+@@ -2058,6 +2084,7 @@ int mlx5_cmd_init(struct mlx5_core_dev *
+       mlx5_core_dbg(dev, "descriptor at dma 0x%llx\n", (unsigned long long)(cmd->dma));
+       cmd->mode = CMD_MODE_POLLING;
++      cmd->allowed_opcode = CMD_ALLOWED_OPCODE_ALL;
+       create_msg_cache(dev);
+--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+@@ -648,11 +648,13 @@ static int create_async_eqs(struct mlx5_
+               .nent = MLX5_NUM_CMD_EQE,
+               .mask[0] = 1ull << MLX5_EVENT_TYPE_CMD,
+       };
++      mlx5_cmd_allowed_opcode(dev, MLX5_CMD_OP_CREATE_EQ);
+       err = setup_async_eq(dev, &table->cmd_eq, &param, "cmd");
+       if (err)
+               goto err1;
+       mlx5_cmd_use_events(dev);
++      mlx5_cmd_allowed_opcode(dev, CMD_ALLOWED_OPCODE_ALL);
+       param = (struct mlx5_eq_param) {
+               .irq_index = 0,
+@@ -682,6 +684,7 @@ err2:
+       mlx5_cmd_use_polling(dev);
+       cleanup_async_eq(dev, &table->cmd_eq, "cmd");
+ err1:
++      mlx5_cmd_allowed_opcode(dev, CMD_ALLOWED_OPCODE_ALL);
+       mlx5_eq_notifier_unregister(dev, &table->cq_err_nb);
+       return err;
+ }
+--- a/include/linux/mlx5/driver.h
++++ b/include/linux/mlx5/driver.h
+@@ -299,6 +299,7 @@ struct mlx5_cmd {
+       struct semaphore sem;
+       struct semaphore pages_sem;
+       int     mode;
++      u16     allowed_opcode;
+       struct mlx5_cmd_work_ent *ent_arr[MLX5_MAX_COMMANDS];
+       struct dma_pool *pool;
+       struct mlx5_cmd_debug dbg;
+@@ -890,10 +891,15 @@ mlx5_frag_buf_get_idx_last_contig_stride
+       return min_t(u32, last_frag_stride_idx - fbc->strides_offset, fbc->sz_m1);
+ }
++enum {
++      CMD_ALLOWED_OPCODE_ALL,
++};
++
+ int mlx5_cmd_init(struct mlx5_core_dev *dev);
+ void mlx5_cmd_cleanup(struct mlx5_core_dev *dev);
+ void mlx5_cmd_use_events(struct mlx5_core_dev *dev);
+ void mlx5_cmd_use_polling(struct mlx5_core_dev *dev);
++void mlx5_cmd_allowed_opcode(struct mlx5_core_dev *dev, u16 opcode);
+ struct mlx5_async_ctx {
+       struct mlx5_core_dev *dev;
diff --git a/queue-5.4/net-mlx5-poll-cmd-eq-in-case-of-command-timeout.patch b/queue-5.4/net-mlx5-poll-cmd-eq-in-case-of-command-timeout.patch
new file mode 100644 (file)
index 0000000..79e2492
--- /dev/null
@@ -0,0 +1,215 @@
+From 1d5558b1f0de81f54ddee05f3793acc5260d107f Mon Sep 17 00:00:00 2001
+From: Eran Ben Elisha <eranbe@mellanox.com>
+Date: Tue, 21 Jul 2020 10:25:52 +0300
+Subject: net/mlx5: poll cmd EQ in case of command timeout
+
+From: Eran Ben Elisha <eranbe@mellanox.com>
+
+commit 1d5558b1f0de81f54ddee05f3793acc5260d107f upstream.
+
+Once driver detects a command interface command timeout, it warns the
+user and returns timeout error to the caller. In such case, the entry of
+the command is not evacuated (because only real event interrupt is allowed
+to clear command interface entry). If the HW event interrupt
+of this entry will never arrive, this entry will be left unused forever.
+Command interface entries are limited and eventually we can end up without
+the ability to post a new command.
+
+In addition, if driver will not consume the EQE of the lost interrupt and
+rearm the EQ, no new interrupts will arrive for other commands.
+
+Add a resiliency mechanism for manually polling the command EQ in case of
+a command timeout. In case resiliency mechanism will find non-handled EQE,
+it will consume it, and the command interface will be fully functional
+again. Once the resiliency flow finished, wait another 5 seconds for the
+command interface to complete for this command entry.
+
+Define mlx5_cmd_eq_recover() to manage the cmd EQ polling resiliency flow.
+Add an async EQ spinlock to avoid races between resiliency flows and real
+interrupts that might run simultaneously.
+
+Fixes: e126ba97dba9 ("mlx5: Add driver for Mellanox Connect-IB adapters")
+Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Cc: Timo Rothenpieler <timo@rothenpieler.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/net/ethernet/mellanox/mlx5/core/cmd.c    |   53 +++++++++++++++++++----
+ drivers/net/ethernet/mellanox/mlx5/core/eq.c     |   40 ++++++++++++++++-
+ drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h |    2 
+ 3 files changed, 86 insertions(+), 9 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+@@ -853,11 +853,21 @@ static void cb_timeout_handler(struct wo
+       struct mlx5_core_dev *dev = container_of(ent->cmd, struct mlx5_core_dev,
+                                                cmd);
++      mlx5_cmd_eq_recover(dev);
++
++      /* Maybe got handled by eq recover ? */
++      if (!test_bit(MLX5_CMD_ENT_STATE_PENDING_COMP, &ent->state)) {
++              mlx5_core_warn(dev, "cmd[%d]: %s(0x%x) Async, recovered after timeout\n", ent->idx,
++                             mlx5_command_str(msg_to_opcode(ent->in)), msg_to_opcode(ent->in));
++              goto out; /* phew, already handled */
++      }
++
+       ent->ret = -ETIMEDOUT;
+-      mlx5_core_warn(dev, "%s(0x%x) timeout. Will cause a leak of a command resource\n",
+-                     mlx5_command_str(msg_to_opcode(ent->in)),
+-                     msg_to_opcode(ent->in));
++      mlx5_core_warn(dev, "cmd[%d]: %s(0x%x) Async, timeout. Will cause a leak of a command resource\n",
++                     ent->idx, mlx5_command_str(msg_to_opcode(ent->in)), msg_to_opcode(ent->in));
+       mlx5_cmd_comp_handler(dev, 1UL << ent->idx, true);
++
++out:
+       cmd_ent_put(ent); /* for the cmd_ent_get() took on schedule delayed work */
+ }
+@@ -987,6 +997,35 @@ static const char *deliv_status_to_str(u
+       }
+ }
++enum {
++      MLX5_CMD_TIMEOUT_RECOVER_MSEC   = 5 * 1000,
++};
++
++static void wait_func_handle_exec_timeout(struct mlx5_core_dev *dev,
++                                        struct mlx5_cmd_work_ent *ent)
++{
++      unsigned long timeout = msecs_to_jiffies(MLX5_CMD_TIMEOUT_RECOVER_MSEC);
++
++      mlx5_cmd_eq_recover(dev);
++
++      /* Re-wait on the ent->done after executing the recovery flow. If the
++       * recovery flow (or any other recovery flow running simultaneously)
++       * has recovered an EQE, it should cause the entry to be completed by
++       * the command interface.
++       */
++      if (wait_for_completion_timeout(&ent->done, timeout)) {
++              mlx5_core_warn(dev, "cmd[%d]: %s(0x%x) recovered after timeout\n", ent->idx,
++                             mlx5_command_str(msg_to_opcode(ent->in)), msg_to_opcode(ent->in));
++              return;
++      }
++
++      mlx5_core_warn(dev, "cmd[%d]: %s(0x%x) No done completion\n", ent->idx,
++                     mlx5_command_str(msg_to_opcode(ent->in)), msg_to_opcode(ent->in));
++
++      ent->ret = -ETIMEDOUT;
++      mlx5_cmd_comp_handler(dev, 1UL << ent->idx, true);
++}
++
+ static int wait_func(struct mlx5_core_dev *dev, struct mlx5_cmd_work_ent *ent)
+ {
+       unsigned long timeout = msecs_to_jiffies(MLX5_CMD_TIMEOUT_MSEC);
+@@ -998,12 +1037,10 @@ static int wait_func(struct mlx5_core_de
+               ent->ret = -ECANCELED;
+               goto out_err;
+       }
+-      if (cmd->mode == CMD_MODE_POLLING || ent->polling) {
++      if (cmd->mode == CMD_MODE_POLLING || ent->polling)
+               wait_for_completion(&ent->done);
+-      } else if (!wait_for_completion_timeout(&ent->done, timeout)) {
+-              ent->ret = -ETIMEDOUT;
+-              mlx5_cmd_comp_handler(dev, 1UL << ent->idx, true);
+-      }
++      else if (!wait_for_completion_timeout(&ent->done, timeout))
++              wait_func_handle_exec_timeout(dev, ent);
+ out_err:
+       err = ent->ret;
+--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+@@ -190,6 +190,29 @@ u32 mlx5_eq_poll_irq_disabled(struct mlx
+       return count_eqe;
+ }
++static void mlx5_eq_async_int_lock(struct mlx5_eq_async *eq, unsigned long *flags)
++      __acquires(&eq->lock)
++{
++      if (in_irq())
++              spin_lock(&eq->lock);
++      else
++              spin_lock_irqsave(&eq->lock, *flags);
++}
++
++static void mlx5_eq_async_int_unlock(struct mlx5_eq_async *eq, unsigned long *flags)
++      __releases(&eq->lock)
++{
++      if (in_irq())
++              spin_unlock(&eq->lock);
++      else
++              spin_unlock_irqrestore(&eq->lock, *flags);
++}
++
++enum async_eq_nb_action {
++      ASYNC_EQ_IRQ_HANDLER = 0,
++      ASYNC_EQ_RECOVER = 1,
++};
++
+ static int mlx5_eq_async_int(struct notifier_block *nb,
+                            unsigned long action, void *data)
+ {
+@@ -199,11 +222,14 @@ static int mlx5_eq_async_int(struct noti
+       struct mlx5_eq_table *eqt;
+       struct mlx5_core_dev *dev;
+       struct mlx5_eqe *eqe;
++      unsigned long flags;
+       int num_eqes = 0;
+       dev = eq->dev;
+       eqt = dev->priv.eq_table;
++      mlx5_eq_async_int_lock(eq_async, &flags);
++
+       eqe = next_eqe_sw(eq);
+       if (!eqe)
+               goto out;
+@@ -224,8 +250,19 @@ static int mlx5_eq_async_int(struct noti
+ out:
+       eq_update_ci(eq, 1);
++      mlx5_eq_async_int_unlock(eq_async, &flags);
+-      return 0;
++      return unlikely(action == ASYNC_EQ_RECOVER) ? num_eqes : 0;
++}
++
++void mlx5_cmd_eq_recover(struct mlx5_core_dev *dev)
++{
++      struct mlx5_eq_async *eq = &dev->priv.eq_table->cmd_eq;
++      int eqes;
++
++      eqes = mlx5_eq_async_int(&eq->irq_nb, ASYNC_EQ_RECOVER, NULL);
++      if (eqes)
++              mlx5_core_warn(dev, "Recovered %d EQEs on cmd_eq\n", eqes);
+ }
+ static void init_eq_buf(struct mlx5_eq *eq)
+@@ -570,6 +607,7 @@ setup_async_eq(struct mlx5_core_dev *dev
+       int err;
+       eq->irq_nb.notifier_call = mlx5_eq_async_int;
++      spin_lock_init(&eq->lock);
+       err = create_async_eq(dev, &eq->core, param);
+       if (err) {
+--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h
++++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h
+@@ -38,6 +38,7 @@ struct mlx5_eq {
+ struct mlx5_eq_async {
+       struct mlx5_eq          core;
+       struct notifier_block   irq_nb;
++      spinlock_t              lock; /* To avoid irq EQ handle races with resiliency flows */
+ };
+ struct mlx5_eq_comp {
+@@ -82,6 +83,7 @@ void mlx5_cq_tasklet_cb(unsigned long da
+ struct cpumask *mlx5_eq_comp_cpumask(struct mlx5_core_dev *dev, int ix);
+ u32 mlx5_eq_poll_irq_disabled(struct mlx5_eq_comp *eq);
++void mlx5_cmd_eq_recover(struct mlx5_core_dev *dev);
+ void mlx5_eq_synchronize_async_irq(struct mlx5_core_dev *dev);
+ void mlx5_eq_synchronize_cmd_irq(struct mlx5_core_dev *dev);
diff --git a/queue-5.4/net-mlx5-use-async-eq-setup-cleanup-helpers-for-multiple-eqs.patch b/queue-5.4/net-mlx5-use-async-eq-setup-cleanup-helpers-for-multiple-eqs.patch
new file mode 100644 (file)
index 0000000..f61455a
--- /dev/null
@@ -0,0 +1,186 @@
+From 3ed879965cc4ea13fe0908468b653c4ff2cb1309 Mon Sep 17 00:00:00 2001
+From: Parav Pandit <parav@mellanox.com>
+Date: Fri, 6 Dec 2019 15:13:41 -0600
+Subject: net/mlx5: Use async EQ setup cleanup helpers for multiple EQs
+
+From: Parav Pandit <parav@mellanox.com>
+
+commit 3ed879965cc4ea13fe0908468b653c4ff2cb1309 upstream.
+
+Use helper routines to setup and teardown multiple EQs and reuse the
+code in setup, cleanup and error unwinding flows.
+
+Signed-off-by: Parav Pandit <parav@mellanox.com>
+Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
+Cc: Timo Rothenpieler <timo@rothenpieler.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/net/ethernet/mellanox/mlx5/core/eq.c |  114 +++++++++++----------------
+ 1 file changed, 49 insertions(+), 65 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+@@ -563,6 +563,39 @@ static void gather_async_events_mask(str
+               gather_user_async_events(dev, mask);
+ }
++static int
++setup_async_eq(struct mlx5_core_dev *dev, struct mlx5_eq_async *eq,
++             struct mlx5_eq_param *param, const char *name)
++{
++      int err;
++
++      eq->irq_nb.notifier_call = mlx5_eq_async_int;
++
++      err = create_async_eq(dev, &eq->core, param);
++      if (err) {
++              mlx5_core_warn(dev, "failed to create %s EQ %d\n", name, err);
++              return err;
++      }
++      err = mlx5_eq_enable(dev, &eq->core, &eq->irq_nb);
++      if (err) {
++              mlx5_core_warn(dev, "failed to enable %s EQ %d\n", name, err);
++              destroy_async_eq(dev, &eq->core);
++      }
++      return err;
++}
++
++static void cleanup_async_eq(struct mlx5_core_dev *dev,
++                           struct mlx5_eq_async *eq, const char *name)
++{
++      int err;
++
++      mlx5_eq_disable(dev, &eq->core, &eq->irq_nb);
++      err = destroy_async_eq(dev, &eq->core);
++      if (err)
++              mlx5_core_err(dev, "failed to destroy %s eq, err(%d)\n",
++                            name, err);
++}
++
+ static int create_async_eqs(struct mlx5_core_dev *dev)
+ {
+       struct mlx5_eq_table *table = dev->priv.eq_table;
+@@ -572,77 +605,45 @@ static int create_async_eqs(struct mlx5_
+       MLX5_NB_INIT(&table->cq_err_nb, cq_err_event_notifier, CQ_ERROR);
+       mlx5_eq_notifier_register(dev, &table->cq_err_nb);
+-      table->cmd_eq.irq_nb.notifier_call = mlx5_eq_async_int;
+       param = (struct mlx5_eq_param) {
+               .irq_index = 0,
+               .nent = MLX5_NUM_CMD_EQE,
++              .mask[0] = 1ull << MLX5_EVENT_TYPE_CMD,
+       };
+-
+-      param.mask[0] = 1ull << MLX5_EVENT_TYPE_CMD;
+-      err = create_async_eq(dev, &table->cmd_eq.core, &param);
+-      if (err) {
+-              mlx5_core_warn(dev, "failed to create cmd EQ %d\n", err);
+-              goto err0;
+-      }
+-      err = mlx5_eq_enable(dev, &table->cmd_eq.core, &table->cmd_eq.irq_nb);
+-      if (err) {
+-              mlx5_core_warn(dev, "failed to enable cmd EQ %d\n", err);
++      err = setup_async_eq(dev, &table->cmd_eq, &param, "cmd");
++      if (err)
+               goto err1;
+-      }
++
+       mlx5_cmd_use_events(dev);
+-      table->async_eq.irq_nb.notifier_call = mlx5_eq_async_int;
+       param = (struct mlx5_eq_param) {
+               .irq_index = 0,
+               .nent = MLX5_NUM_ASYNC_EQE,
+       };
+       gather_async_events_mask(dev, param.mask);
+-      err = create_async_eq(dev, &table->async_eq.core, &param);
+-      if (err) {
+-              mlx5_core_warn(dev, "failed to create async EQ %d\n", err);
++      err = setup_async_eq(dev, &table->async_eq, &param, "async");
++      if (err)
+               goto err2;
+-      }
+-      err = mlx5_eq_enable(dev, &table->async_eq.core,
+-                           &table->async_eq.irq_nb);
+-      if (err) {
+-              mlx5_core_warn(dev, "failed to enable async EQ %d\n", err);
+-              goto err3;
+-      }
+-      table->pages_eq.irq_nb.notifier_call = mlx5_eq_async_int;
+       param = (struct mlx5_eq_param) {
+               .irq_index = 0,
+               .nent = /* TODO: sriov max_vf + */ 1,
++              .mask[0] = 1ull << MLX5_EVENT_TYPE_PAGE_REQUEST,
+       };
+-      param.mask[0] = 1ull << MLX5_EVENT_TYPE_PAGE_REQUEST;
+-      err = create_async_eq(dev, &table->pages_eq.core, &param);
+-      if (err) {
+-              mlx5_core_warn(dev, "failed to create pages EQ %d\n", err);
+-              goto err4;
+-      }
+-      err = mlx5_eq_enable(dev, &table->pages_eq.core,
+-                           &table->pages_eq.irq_nb);
+-      if (err) {
+-              mlx5_core_warn(dev, "failed to enable pages EQ %d\n", err);
+-              goto err5;
+-      }
++      err = setup_async_eq(dev, &table->pages_eq, &param, "pages");
++      if (err)
++              goto err3;
+-      return err;
++      return 0;
+-err5:
+-      destroy_async_eq(dev, &table->pages_eq.core);
+-err4:
+-      mlx5_eq_disable(dev, &table->async_eq.core, &table->async_eq.irq_nb);
+ err3:
+-      destroy_async_eq(dev, &table->async_eq.core);
++      cleanup_async_eq(dev, &table->async_eq, "async");
+ err2:
+       mlx5_cmd_use_polling(dev);
+-      mlx5_eq_disable(dev, &table->cmd_eq.core, &table->cmd_eq.irq_nb);
++      cleanup_async_eq(dev, &table->cmd_eq, "cmd");
+ err1:
+-      destroy_async_eq(dev, &table->cmd_eq.core);
+-err0:
+       mlx5_eq_notifier_unregister(dev, &table->cq_err_nb);
+       return err;
+ }
+@@ -650,28 +651,11 @@ err0:
+ static void destroy_async_eqs(struct mlx5_core_dev *dev)
+ {
+       struct mlx5_eq_table *table = dev->priv.eq_table;
+-      int err;
+-
+-      mlx5_eq_disable(dev, &table->pages_eq.core, &table->pages_eq.irq_nb);
+-      err = destroy_async_eq(dev, &table->pages_eq.core);
+-      if (err)
+-              mlx5_core_err(dev, "failed to destroy pages eq, err(%d)\n",
+-                            err);
+-
+-      mlx5_eq_disable(dev, &table->async_eq.core, &table->async_eq.irq_nb);
+-      err = destroy_async_eq(dev, &table->async_eq.core);
+-      if (err)
+-              mlx5_core_err(dev, "failed to destroy async eq, err(%d)\n",
+-                            err);
++      cleanup_async_eq(dev, &table->pages_eq, "pages");
++      cleanup_async_eq(dev, &table->async_eq, "async");
+       mlx5_cmd_use_polling(dev);
+-
+-      mlx5_eq_disable(dev, &table->cmd_eq.core, &table->cmd_eq.irq_nb);
+-      err = destroy_async_eq(dev, &table->cmd_eq.core);
+-      if (err)
+-              mlx5_core_err(dev, "failed to destroy command eq, err(%d)\n",
+-                            err);
+-
++      cleanup_async_eq(dev, &table->cmd_eq, "cmd");
+       mlx5_eq_notifier_unregister(dev, &table->cq_err_nb);
+ }
index 66fc4adb43e30a95719a86732674c3c5478937f7..c822900ff3d8d144febb1e58d5bdcc699fa448d2 100644 (file)
@@ -3,3 +3,8 @@ powerpc-64s-flush-l1d-on-kernel-entry.patch
 powerpc-64s-flush-l1d-after-user-accesses.patch
 powerpc-only-include-kup-radix.h-for-64-bit-book3s.patch
 selftests-powerpc-entry-flush-test.patch
+mips-pci-fix-mips-build.patch
+net-mlx5-use-async-eq-setup-cleanup-helpers-for-multiple-eqs.patch
+net-mlx5-poll-cmd-eq-in-case-of-command-timeout.patch
+net-mlx5-fix-a-race-when-moving-command-interface-to-events-mode.patch
+net-mlx5-add-retry-mechanism-to-the-command-entry-index-allocation.patch