Fixes for 5.19

author Sasha Levin <sashal@kernel.org>

Thu, 15 Sep 2022 10:59:20 +0000 (06:59 -0400)

committer Sasha Levin <sashal@kernel.org>

Thu, 15 Sep 2022 10:59:20 +0000 (06:59 -0400)
author Sasha Levin <sashal@kernel.org>
Thu, 15 Sep 2022 10:59:20 +0000 (06:59 -0400)
committer Sasha Levin <sashal@kernel.org>
Thu, 15 Sep 2022 10:59:20 +0000 (06:59 -0400)
diff --git a/queue-5.19/iommu-vt-d-fix-kdump-kernels-boot-failure-with-scala.patch b/queue-5.19/iommu-vt-d-fix-kdump-kernels-boot-failure-with-scala.patch

new file mode 100644 (file)

index 0000000..4e27c2f
--- /dev/null
+++ b/queue-5.19/iommu-vt-d-fix-kdump-kernels-boot-failure-with-scala.patch
@@ -0,0 +1,270 @@
+From be46e14e1c3651f3c002b34e47ddb0cf0a1d35c7 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 23 Aug 2022 14:15:54 +0800
+Subject: iommu/vt-d: Fix kdump kernels boot failure with scalable mode
+
+From: Lu Baolu <baolu.lu@linux.intel.com>
+
+[ Upstream commit 0c5f6c0d8201a809a6585b07b6263e9db2c874a3 ]
+
+The translation table copying code for kdump kernels is currently based
+on the extended root/context entry formats of ECS mode defined in older
+VT-d v2.5, and doesn't handle the scalable mode formats. This causes
+the kexec capture kernel boot failure with DMAR faults if the IOMMU was
+enabled in scalable mode by the previous kernel.
+
+The ECS mode has already been deprecated by the VT-d spec since v3.0 and
+Intel IOMMU driver doesn't support this mode as there's no real hardware
+implementation. Hence this converts ECS checking in copying table code
+into scalable mode.
+
+The existing copying code consumes a bit in the context entry as a mark
+of copied entry. It needs to work for the old format as well as for the
+extended context entries. As it's hard to find such a common bit for both
+legacy and scalable mode context entries. This replaces it with a per-
+IOMMU bitmap.
+
+Fixes: 7373a8cc38197 ("iommu/vt-d: Setup context and enable RID2PASID support")
+Cc: stable@vger.kernel.org
+Reported-by: Jerry Snitselaar <jsnitsel@redhat.com>
+Tested-by: Wen Jin <wen.jin@intel.com>
+Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
+Link: https://lore.kernel.org/r/20220817011035.3250131-1-baolu.lu@linux.intel.com
+Signed-off-by: Joerg Roedel <jroedel@suse.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/iommu/intel/iommu.c | 100 ++++++++++++++++--------------------
+ include/linux/intel-iommu.h |   9 ++--
+ 2 files changed, 50 insertions(+), 59 deletions(-)
+
+diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
+index 40ac3a78d90ef..c0464959cbcdb 100644
+--- a/drivers/iommu/intel/iommu.c
++++ b/drivers/iommu/intel/iommu.c
+@@ -168,38 +168,6 @@ static phys_addr_t root_entry_uctp(struct root_entry *re)
+       return re->hi & VTD_PAGE_MASK;
+ }
+ 
+-static inline void context_clear_pasid_enable(struct context_entry *context)
+-{
+-      context->lo &= ~(1ULL << 11);
+-}
+-
+-static inline bool context_pasid_enabled(struct context_entry *context)
+-{
+-      return !!(context->lo & (1ULL << 11));
+-}
+-
+-static inline void context_set_copied(struct context_entry *context)
+-{
+-      context->hi |= (1ull << 3);
+-}
+-
+-static inline bool context_copied(struct context_entry *context)
+-{
+-      return !!(context->hi & (1ULL << 3));
+-}
+-
+-static inline bool __context_present(struct context_entry *context)
+-{
+-      return (context->lo & 1);
+-}
+-
+-bool context_present(struct context_entry *context)
+-{
+-      return context_pasid_enabled(context) ?
+-           __context_present(context) :
+-           __context_present(context) && !context_copied(context);
+-}
+-
+ static inline void context_set_present(struct context_entry *context)
+ {
+       context->lo |= 1;
+@@ -247,6 +215,26 @@ static inline void context_clear_entry(struct context_entry *context)
+       context->hi = 0;
+ }
+ 
++static inline bool context_copied(struct intel_iommu *iommu, u8 bus, u8 devfn)
++{
++      if (!iommu->copied_tables)
++              return false;
++
++      return test_bit(((long)bus << 8) | devfn, iommu->copied_tables);
++}
++
++static inline void
++set_context_copied(struct intel_iommu *iommu, u8 bus, u8 devfn)
++{
++      set_bit(((long)bus << 8) | devfn, iommu->copied_tables);
++}
++
++static inline void
++clear_context_copied(struct intel_iommu *iommu, u8 bus, u8 devfn)
++{
++      clear_bit(((long)bus << 8) | devfn, iommu->copied_tables);
++}
++
+ /*
+  * This domain is a statically identity mapping domain.
+  *    1. This domain creats a static 1:1 mapping to all usable memory.
+@@ -644,6 +632,13 @@ struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus,
+       struct context_entry *context;
+       u64 *entry;
+ 
++      /*
++       * Except that the caller requested to allocate a new entry,
++       * returning a copied context entry makes no sense.
++       */
++      if (!alloc && context_copied(iommu, bus, devfn))
++              return NULL;
++
+       entry = &root->lo;
+       if (sm_supported(iommu)) {
+               if (devfn >= 0x80) {
+@@ -1770,6 +1765,11 @@ static void free_dmar_iommu(struct intel_iommu *iommu)
+               iommu->domain_ids = NULL;
+       }
+ 
++      if (iommu->copied_tables) {
++              bitmap_free(iommu->copied_tables);
++              iommu->copied_tables = NULL;
++      }
++
+       g_iommus[iommu->seq_id] = NULL;
+ 
+       /* free context mapping */
+@@ -1978,7 +1978,7 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
+               goto out_unlock;
+ 
+       ret = 0;
+-      if (context_present(context))
++      if (context_present(context) && !context_copied(iommu, bus, devfn))
+               goto out_unlock;
+ 
+       /*
+@@ -1990,7 +1990,7 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
+        * in-flight DMA will exist, and we don't need to worry anymore
+        * hereafter.
+        */
+-      if (context_copied(context)) {
++      if (context_copied(iommu, bus, devfn)) {
+               u16 did_old = context_domain_id(context);
+ 
+               if (did_old < cap_ndoms(iommu->cap)) {
+@@ -2001,6 +2001,8 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
+                       iommu->flush.flush_iotlb(iommu, did_old, 0, 0,
+                                                DMA_TLB_DSI_FLUSH);
+               }
++
++              clear_context_copied(iommu, bus, devfn);
+       }
+ 
+       context_clear_entry(context);
+@@ -2783,32 +2785,14 @@ static int copy_context_table(struct intel_iommu *iommu,
+               /* Now copy the context entry */
+               memcpy(&ce, old_ce + idx, sizeof(ce));
+ 
+-              if (!__context_present(&ce))
++              if (!context_present(&ce))
+                       continue;
+ 
+               did = context_domain_id(&ce);
+               if (did >= 0 && did < cap_ndoms(iommu->cap))
+                       set_bit(did, iommu->domain_ids);
+ 
+-              /*
+-               * We need a marker for copied context entries. This
+-               * marker needs to work for the old format as well as
+-               * for extended context entries.
+-               *
+-               * Bit 67 of the context entry is used. In the old
+-               * format this bit is available to software, in the
+-               * extended format it is the PGE bit, but PGE is ignored
+-               * by HW if PASIDs are disabled (and thus still
+-               * available).
+-               *
+-               * So disable PASIDs first and then mark the entry
+-               * copied. This means that we don't copy PASID
+-               * translations from the old kernel, but this is fine as
+-               * faults there are not fatal.
+-               */
+-              context_clear_pasid_enable(&ce);
+-              context_set_copied(&ce);
+-
++              set_context_copied(iommu, bus, devfn);
+               new_ce[idx] = ce;
+       }
+ 
+@@ -2835,8 +2819,8 @@ static int copy_translation_tables(struct intel_iommu *iommu)
+       bool new_ext, ext;
+ 
+       rtaddr_reg = dmar_readq(iommu->reg + DMAR_RTADDR_REG);
+-      ext        = !!(rtaddr_reg & DMA_RTADDR_RTT);
+-      new_ext    = !!ecap_ecs(iommu->ecap);
++      ext        = !!(rtaddr_reg & DMA_RTADDR_SMT);
++      new_ext    = !!sm_supported(iommu);
+ 
+       /*
+        * The RTT bit can only be changed when translation is disabled,
+@@ -2847,6 +2831,10 @@ static int copy_translation_tables(struct intel_iommu *iommu)
+       if (new_ext != ext)
+               return -EINVAL;
+ 
++      iommu->copied_tables = bitmap_zalloc(BIT_ULL(16), GFP_KERNEL);
++      if (!iommu->copied_tables)
++              return -ENOMEM;
++
+       old_rt_phys = rtaddr_reg & VTD_PAGE_MASK;
+       if (!old_rt_phys)
+               return -EINVAL;
+diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
+index 5fcf89faa31ab..d72626d71258f 100644
+--- a/include/linux/intel-iommu.h
++++ b/include/linux/intel-iommu.h
+@@ -196,7 +196,6 @@
+ #define ecap_dis(e)           (((e) >> 27) & 0x1)
+ #define ecap_nest(e)          (((e) >> 26) & 0x1)
+ #define ecap_mts(e)           (((e) >> 25) & 0x1)
+-#define ecap_ecs(e)           (((e) >> 24) & 0x1)
+ #define ecap_iotlb_offset(e)  ((((e) >> 8) & 0x3ff) * 16)
+ #define ecap_max_iotlb_offset(e) (ecap_iotlb_offset(e) + 16)
+ #define ecap_coherent(e)      ((e) & 0x1)
+@@ -264,7 +263,6 @@
+ #define DMA_GSTS_CFIS (((u32)1) << 23)
+ 
+ /* DMA_RTADDR_REG */
+-#define DMA_RTADDR_RTT (((u64)1) << 11)
+ #define DMA_RTADDR_SMT (((u64)1) << 10)
+ 
+ /* CCMD_REG */
+@@ -579,6 +577,7 @@ struct intel_iommu {
+ 
+ #ifdef CONFIG_INTEL_IOMMU
+       unsigned long   *domain_ids; /* bitmap of domains */
++      unsigned long   *copied_tables; /* bitmap of copied tables */
+       spinlock_t      lock; /* protect context, domain ids */
+       struct root_entry *root_entry; /* virtual address */
+ 
+@@ -692,6 +691,11 @@ static inline int nr_pte_to_next_page(struct dma_pte *pte)
+               (struct dma_pte *)ALIGN((unsigned long)pte, VTD_PAGE_SIZE) - pte;
+ }
+ 
++static inline bool context_present(struct context_entry *context)
++{
++      return (context->lo & 1);
++}
++
+ extern struct dmar_drhd_unit * dmar_find_matched_drhd_unit(struct pci_dev *dev);
+ 
+ extern int dmar_enable_qi(struct intel_iommu *iommu);
+@@ -776,7 +780,6 @@ static inline void intel_iommu_debugfs_init(void) {}
+ #endif /* CONFIG_INTEL_IOMMU_DEBUGFS */
+ 
+ extern const struct attribute_group *intel_iommu_groups[];
+-bool context_present(struct context_entry *context);
+ struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus,
+                                        u8 devfn, int alloc);
+ 
+-- 
+2.35.1
+
diff --git a/queue-5.19/net-mlx5-introduce-ifc-bits-for-using-software-vhca-.patch b/queue-5.19/net-mlx5-introduce-ifc-bits-for-using-software-vhca-.patch

new file mode 100644 (file)

index 0000000..887411f
--- /dev/null
+++ b/queue-5.19/net-mlx5-introduce-ifc-bits-for-using-software-vhca-.patch
@@ -0,0 +1,81 @@
+From dfd456f054e8df529005b7ee9b68fa3ac0d9a5cb Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 2 Jun 2022 12:41:00 +0300
+Subject: net/mlx5: Introduce ifc bits for using software vhca id
+
+From: Yishai Hadas <yishaih@nvidia.com>
+
+[ Upstream commit 0372c546eca575445331c0ad8902210b70be6d61 ]
+
+Introduce ifc related stuff to enable using software vhca id
+functionality.
+
+Signed-off-by: Yishai Hadas <yishaih@nvidia.com>
+Reviewed-by: Mark Bloch <mbloch@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Stable-dep-of: 9ca05b0f27de ("RDMA/mlx5: Rely on RoCE fw cap instead of devlink when setting profile")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/mlx5/mlx5_ifc.h | 25 +++++++++++++++++++++----
+ 1 file changed, 21 insertions(+), 4 deletions(-)
+
+diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
+index fd7d083a34d33..6d57e5ec9718d 100644
+--- a/include/linux/mlx5/mlx5_ifc.h
++++ b/include/linux/mlx5/mlx5_ifc.h
+@@ -1804,7 +1804,14 @@ struct mlx5_ifc_cmd_hca_cap_2_bits {
+       u8         max_reformat_remove_size[0x8];
+       u8         max_reformat_remove_offset[0x8];
+ 
+-      u8         reserved_at_c0[0x740];
++      u8         reserved_at_c0[0x160];
++
++      u8         reserved_at_220[0x1];
++      u8         sw_vhca_id_valid[0x1];
++      u8         sw_vhca_id[0xe];
++      u8         reserved_at_230[0x10];
++
++      u8         reserved_at_240[0x5c0];
+ };
+ 
+ enum mlx5_ifc_flow_destination_type {
+@@ -3715,6 +3722,11 @@ struct mlx5_ifc_rmpc_bits {
+       struct mlx5_ifc_wq_bits wq;
+ };
+ 
++enum {
++      VHCA_ID_TYPE_HW = 0,
++      VHCA_ID_TYPE_SW = 1,
++};
++
+ struct mlx5_ifc_nic_vport_context_bits {
+       u8         reserved_at_0[0x5];
+       u8         min_wqe_inline_mode[0x3];
+@@ -3731,8 +3743,8 @@ struct mlx5_ifc_nic_vport_context_bits {
+       u8         event_on_mc_address_change[0x1];
+       u8         event_on_uc_address_change[0x1];
+ 
+-      u8         reserved_at_40[0xc];
+-
++      u8         vhca_id_type[0x1];
++      u8         reserved_at_41[0xb];
+       u8         affiliation_criteria[0x4];
+       u8         affiliated_vhca_id[0x10];
+ 
+@@ -7189,7 +7201,12 @@ struct mlx5_ifc_init_hca_in_bits {
+       u8         reserved_at_20[0x10];
+       u8         op_mod[0x10];
+ 
+-      u8         reserved_at_40[0x40];
++      u8         reserved_at_40[0x20];
++
++      u8         reserved_at_60[0x2];
++      u8         sw_vhca_id[0xe];
++      u8         reserved_at_70[0x10];
++
+       u8         sw_owner_id[4][0x20];
+ };
+ 
+-- 
+2.35.1
+
diff --git a/queue-5.19/net-mlx5-use-software-vhca-id-when-it-s-supported.patch b/queue-5.19/net-mlx5-use-software-vhca-id-when-it-s-supported.patch

new file mode 100644 (file)

index 0000000..8212180
--- /dev/null
+++ b/queue-5.19/net-mlx5-use-software-vhca-id-when-it-s-supported.patch
@@ -0,0 +1,179 @@
+From 0389847c9b08301f63601d5d90d814f4faa5df52 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 2 Jun 2022 12:47:34 +0300
+Subject: net/mlx5: Use software VHCA id when it's supported
+
+From: Yishai Hadas <yishaih@nvidia.com>
+
+[ Upstream commit dc402ccc0d7b55922a79505df3000da7deb77a2b ]
+
+Use software VHCA id when it's supported by the firmware.
+
+A unique id is allocated upon mlx5_mdev_init() and freed upon
+mlx5_mdev_uninit(), as such it stays the same during the full life cycle
+of the device including upon health recovery if occurred.
+
+The conjunction of sw_vhca_id with sw_owner_id will be a global unique
+id per function which uses mlx5_core.
+
+The sw_vhca_id is set upon init_hca command and is used to specify the
+VHCA that the NIC vport is affiliated with.
+
+This functionality is needed upon migration of VM which is MPV based.
+(i.e. multi port device).
+
+Signed-off-by: Yishai Hadas <yishaih@nvidia.com>
+Reviewed-by: Mark Bloch <mbloch@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Stable-dep-of: 9ca05b0f27de ("RDMA/mlx5: Rely on RoCE fw cap instead of devlink when setting profile")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/fw.c  |  4 ++
+ .../net/ethernet/mellanox/mlx5/core/main.c    | 49 +++++++++++++++++++
+ .../net/ethernet/mellanox/mlx5/core/vport.c   | 14 ++++--
+ include/linux/mlx5/driver.h                   |  1 +
+ 4 files changed, 65 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
+index cfb8bedba5124..079fa44ada71e 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
+@@ -289,6 +289,10 @@ int mlx5_cmd_init_hca(struct mlx5_core_dev *dev, uint32_t *sw_owner_id)
+                                      sw_owner_id[i]);
+       }
+ 
++      if (MLX5_CAP_GEN_2_MAX(dev, sw_vhca_id_valid) &&
++          dev->priv.sw_vhca_id > 0)
++              MLX5_SET(init_hca_in, in, sw_vhca_id, dev->priv.sw_vhca_id);
++
+       return mlx5_cmd_exec_in(dev, init_hca, in);
+ }
+ 
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
+index 616207c3b187a..64d54bba91f69 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
+@@ -90,6 +90,8 @@ module_param_named(prof_sel, prof_sel, uint, 0444);
+ MODULE_PARM_DESC(prof_sel, "profile selector. Valid range 0 - 2");
+ 
+ static u32 sw_owner_id[4];
++#define MAX_SW_VHCA_ID (BIT(__mlx5_bit_sz(cmd_hca_cap_2, sw_vhca_id)) - 1)
++static DEFINE_IDA(sw_vhca_ida);
+ 
+ enum {
+       MLX5_ATOMIC_REQ_MODE_BE = 0x0,
+@@ -499,6 +501,31 @@ static int max_uc_list_get_devlink_param(struct mlx5_core_dev *dev)
+       return err;
+ }
+ 
++static int handle_hca_cap_2(struct mlx5_core_dev *dev, void *set_ctx)
++{
++      void *set_hca_cap;
++      int err;
++
++      if (!MLX5_CAP_GEN_MAX(dev, hca_cap_2))
++              return 0;
++
++      err = mlx5_core_get_caps(dev, MLX5_CAP_GENERAL_2);
++      if (err)
++              return err;
++
++      if (!MLX5_CAP_GEN_2_MAX(dev, sw_vhca_id_valid) ||
++          !(dev->priv.sw_vhca_id > 0))
++              return 0;
++
++      set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx,
++                                 capability);
++      memcpy(set_hca_cap, dev->caps.hca[MLX5_CAP_GENERAL_2]->cur,
++             MLX5_ST_SZ_BYTES(cmd_hca_cap_2));
++      MLX5_SET(cmd_hca_cap_2, set_hca_cap, sw_vhca_id_valid, 1);
++
++      return set_caps(dev, set_ctx, MLX5_CAP_GENERAL_2);
++}
++
+ static int handle_hca_cap(struct mlx5_core_dev *dev, void *set_ctx)
+ {
+       struct mlx5_profile *prof = &dev->profile;
+@@ -669,6 +696,13 @@ static int set_hca_cap(struct mlx5_core_dev *dev)
+               goto out;
+       }
+ 
++      memset(set_ctx, 0, set_sz);
++      err = handle_hca_cap_2(dev, set_ctx);
++      if (err) {
++              mlx5_core_err(dev, "handle_hca_cap_2 failed\n");
++              goto out;
++      }
++
+ out:
+       kfree(set_ctx);
+       return err;
+@@ -1512,6 +1546,18 @@ int mlx5_mdev_init(struct mlx5_core_dev *dev, int profile_idx)
+       if (err)
+               goto err_hca_caps;
+ 
++      /* The conjunction of sw_vhca_id with sw_owner_id will be a global
++       * unique id per function which uses mlx5_core.
++       * Those values are supplied to FW as part of the init HCA command to
++       * be used by both driver and FW when it's applicable.
++       */
++      dev->priv.sw_vhca_id = ida_alloc_range(&sw_vhca_ida, 1,
++                                             MAX_SW_VHCA_ID,
++                                             GFP_KERNEL);
++      if (dev->priv.sw_vhca_id < 0)
++              mlx5_core_err(dev, "failed to allocate sw_vhca_id, err=%d\n",
++                            dev->priv.sw_vhca_id);
++
+       return 0;
+ 
+ err_hca_caps:
+@@ -1537,6 +1583,9 @@ void mlx5_mdev_uninit(struct mlx5_core_dev *dev)
+ {
+       struct mlx5_priv *priv = &dev->priv;
+ 
++      if (priv->sw_vhca_id > 0)
++              ida_free(&sw_vhca_ida, dev->priv.sw_vhca_id);
++
+       mlx5_hca_caps_free(dev);
+       mlx5_adev_cleanup(dev);
+       mlx5_pagealloc_cleanup(dev);
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
+index ac020cb780727..d5c3173250309 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
+@@ -1086,9 +1086,17 @@ int mlx5_nic_vport_affiliate_multiport(struct mlx5_core_dev *master_mdev,
+               goto free;
+ 
+       MLX5_SET(modify_nic_vport_context_in, in, field_select.affiliation, 1);
+-      MLX5_SET(modify_nic_vport_context_in, in,
+-               nic_vport_context.affiliated_vhca_id,
+-               MLX5_CAP_GEN(master_mdev, vhca_id));
++      if (MLX5_CAP_GEN_2(master_mdev, sw_vhca_id_valid)) {
++              MLX5_SET(modify_nic_vport_context_in, in,
++                       nic_vport_context.vhca_id_type, VHCA_ID_TYPE_SW);
++              MLX5_SET(modify_nic_vport_context_in, in,
++                       nic_vport_context.affiliated_vhca_id,
++                       MLX5_CAP_GEN_2(master_mdev, sw_vhca_id));
++      } else {
++              MLX5_SET(modify_nic_vport_context_in, in,
++                       nic_vport_context.affiliated_vhca_id,
++                       MLX5_CAP_GEN(master_mdev, vhca_id));
++      }
+       MLX5_SET(modify_nic_vport_context_in, in,
+                nic_vport_context.affiliation_criteria,
+                MLX5_CAP_GEN(port_mdev, affiliate_nic_vport_criteria));
+diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
+index b0b4ac92354a2..0015a08ddbd24 100644
+--- a/include/linux/mlx5/driver.h
++++ b/include/linux/mlx5/driver.h
+@@ -606,6 +606,7 @@ struct mlx5_priv {
+       spinlock_t              ctx_lock;
+       struct mlx5_adev       **adev;
+       int                     adev_idx;
++      int                     sw_vhca_id;
+       struct mlx5_events      *events;
+ 
+       struct mlx5_flow_steering *steering;
+-- 
+2.35.1
+
diff --git a/queue-5.19/rdma-mlx5-add-a-umr-recovery-flow.patch b/queue-5.19/rdma-mlx5-add-a-umr-recovery-flow.patch

new file mode 100644 (file)

index 0000000..9c2d8ce
--- /dev/null
+++ b/queue-5.19/rdma-mlx5-add-a-umr-recovery-flow.patch
@@ -0,0 +1,185 @@
+From f72f1848bfdd0f07c7a648c8be04c43eb8a38d5b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 15 May 2022 07:19:53 +0300
+Subject: RDMA/mlx5: Add a umr recovery flow
+
+From: Aharon Landau <aharonl@nvidia.com>
+
+[ Upstream commit 158e71bb69e368b8b33e8b7c4ac8c111da0c1ae2 ]
+
+When a UMR fails, the UMR QP state changes to an error state. Therefore,
+all the further UMR operations will fail too.
+
+Add a recovery flow to the UMR QP, and repost the flushed WQEs.
+
+Link: https://lore.kernel.org/r/6cc24816cca049bd8541317f5e41d3ac659445d3.1652588303.git.leonro@nvidia.com
+Signed-off-by: Aharon Landau <aharonl@nvidia.com>
+Reviewed-by: Michael Guralnik <michaelgur@nvidia.com>
+Signed-off-by: Leon Romanovsky <leon@kernel.org>
+Stable-dep-of: 9b7d4be967f1 ("RDMA/mlx5: Fix UMR cleanup on error flow of driver init")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/infiniband/hw/mlx5/cq.c      |  4 ++
+ drivers/infiniband/hw/mlx5/mlx5_ib.h | 12 ++++-
+ drivers/infiniband/hw/mlx5/umr.c     | 78 ++++++++++++++++++++++++----
+ 3 files changed, 83 insertions(+), 11 deletions(-)
+
+diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
+index 08371a80fdc26..be189e0525de6 100644
+--- a/drivers/infiniband/hw/mlx5/cq.c
++++ b/drivers/infiniband/hw/mlx5/cq.c
+@@ -523,6 +523,10 @@ static int mlx5_poll_one(struct mlx5_ib_cq *cq,
+                           "Requestor" : "Responder", cq->mcq.cqn);
+               mlx5_ib_dbg(dev, "syndrome 0x%x, vendor syndrome 0x%x\n",
+                           err_cqe->syndrome, err_cqe->vendor_err_synd);
++              if (wc->status != IB_WC_WR_FLUSH_ERR &&
++                  (*cur_qp)->type == MLX5_IB_QPT_REG_UMR)
++                      dev->umrc.state = MLX5_UMR_STATE_RECOVER;
++
+               if (opcode == MLX5_CQE_REQ_ERR) {
+                       wq = &(*cur_qp)->sq;
+                       wqe_ctr = be16_to_cpu(cqe64->wqe_counter);
+diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
+index 998b67509a533..7460e0dfe6db4 100644
+--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
++++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
+@@ -717,13 +717,23 @@ struct mlx5_ib_umr_context {
+       struct completion       done;
+ };
+ 
++enum {
++      MLX5_UMR_STATE_ACTIVE,
++      MLX5_UMR_STATE_RECOVER,
++      MLX5_UMR_STATE_ERR,
++};
++
+ struct umr_common {
+       struct ib_pd    *pd;
+       struct ib_cq    *cq;
+       struct ib_qp    *qp;
+-      /* control access to UMR QP
++      /* Protects from UMR QP overflow
+        */
+       struct semaphore        sem;
++      /* Protects from using UMR while the UMR is not active
++       */
++      struct mutex lock;
++      unsigned int state;
+ };
+ 
+ struct mlx5_cache_ent {
+diff --git a/drivers/infiniband/hw/mlx5/umr.c b/drivers/infiniband/hw/mlx5/umr.c
+index 3a48364c09181..e00b94d1b1ea1 100644
+--- a/drivers/infiniband/hw/mlx5/umr.c
++++ b/drivers/infiniband/hw/mlx5/umr.c
+@@ -176,6 +176,7 @@ int mlx5r_umr_resource_init(struct mlx5_ib_dev *dev)
+       dev->umrc.pd = pd;
+ 
+       sema_init(&dev->umrc.sem, MAX_UMR_WR);
++      mutex_init(&dev->umrc.lock);
+ 
+       return 0;
+ 
+@@ -195,6 +196,31 @@ void mlx5r_umr_resource_cleanup(struct mlx5_ib_dev *dev)
+       ib_dealloc_pd(dev->umrc.pd);
+ }
+ 
++static int mlx5r_umr_recover(struct mlx5_ib_dev *dev)
++{
++      struct umr_common *umrc = &dev->umrc;
++      struct ib_qp_attr attr;
++      int err;
++
++      attr.qp_state = IB_QPS_RESET;
++      err = ib_modify_qp(umrc->qp, &attr, IB_QP_STATE);
++      if (err) {
++              mlx5_ib_dbg(dev, "Couldn't modify UMR QP\n");
++              goto err;
++      }
++
++      err = mlx5r_umr_qp_rst2rts(dev, umrc->qp);
++      if (err)
++              goto err;
++
++      umrc->state = MLX5_UMR_STATE_ACTIVE;
++      return 0;
++
++err:
++      umrc->state = MLX5_UMR_STATE_ERR;
++      return err;
++}
++
+ static int mlx5r_umr_post_send(struct ib_qp *ibqp, u32 mkey, struct ib_cqe *cqe,
+                              struct mlx5r_umr_wqe *wqe, bool with_data)
+ {
+@@ -231,7 +257,7 @@ static int mlx5r_umr_post_send(struct ib_qp *ibqp, u32 mkey, struct ib_cqe *cqe,
+ 
+       id.ib_cqe = cqe;
+       mlx5r_finish_wqe(qp, ctrl, seg, size, cur_edge, idx, id.wr_id, 0,
+-                       MLX5_FENCE_MODE_NONE, MLX5_OPCODE_UMR);
++                       MLX5_FENCE_MODE_INITIATOR_SMALL, MLX5_OPCODE_UMR);
+ 
+       mlx5r_ring_db(qp, 1, ctrl);
+ 
+@@ -270,17 +296,49 @@ static int mlx5r_umr_post_send_wait(struct mlx5_ib_dev *dev, u32 mkey,
+       mlx5r_umr_init_context(&umr_context);
+ 
+       down(&umrc->sem);
+-      err = mlx5r_umr_post_send(umrc->qp, mkey, &umr_context.cqe, wqe,
+-                                with_data);
+-      if (err)
+-              mlx5_ib_warn(dev, "UMR post send failed, err %d\n", err);
+-      else {
+-              wait_for_completion(&umr_context.done);
+-              if (umr_context.status != IB_WC_SUCCESS) {
+-                      mlx5_ib_warn(dev, "reg umr failed (%u)\n",
+-                                   umr_context.status);
++      while (true) {
++              mutex_lock(&umrc->lock);
++              if (umrc->state == MLX5_UMR_STATE_ERR) {
++                      mutex_unlock(&umrc->lock);
+                       err = -EFAULT;
++                      break;
++              }
++
++              if (umrc->state == MLX5_UMR_STATE_RECOVER) {
++                      mutex_unlock(&umrc->lock);
++                      usleep_range(3000, 5000);
++                      continue;
++              }
++
++              err = mlx5r_umr_post_send(umrc->qp, mkey, &umr_context.cqe, wqe,
++                                        with_data);
++              mutex_unlock(&umrc->lock);
++              if (err) {
++                      mlx5_ib_warn(dev, "UMR post send failed, err %d\n",
++                                   err);
++                      break;
+               }
++
++              wait_for_completion(&umr_context.done);
++
++              if (umr_context.status == IB_WC_SUCCESS)
++                      break;
++
++              if (umr_context.status == IB_WC_WR_FLUSH_ERR)
++                      continue;
++
++              WARN_ON_ONCE(1);
++              mlx5_ib_warn(dev,
++                      "reg umr failed (%u). Trying to recover and resubmit the flushed WQEs\n",
++                      umr_context.status);
++              mutex_lock(&umrc->lock);
++              err = mlx5r_umr_recover(dev);
++              mutex_unlock(&umrc->lock);
++              if (err)
++                      mlx5_ib_warn(dev, "couldn't recover UMR, err %d\n",
++                                   err);
++              err = -EFAULT;
++              break;
+       }
+       up(&umrc->sem);
+       return err;
+-- 
+2.35.1
+
diff --git a/queue-5.19/rdma-mlx5-fix-umr-cleanup-on-error-flow-of-driver-in.patch b/queue-5.19/rdma-mlx5-fix-umr-cleanup-on-error-flow-of-driver-in.patch

new file mode 100644 (file)

index 0000000..69e0438
--- /dev/null
+++ b/queue-5.19/rdma-mlx5-fix-umr-cleanup-on-error-flow-of-driver-in.patch
@@ -0,0 +1,64 @@
+From 064b2940c803d63bf31e9693a6e4c4b45c3c8501 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 29 Aug 2022 12:02:29 +0300
+Subject: RDMA/mlx5: Fix UMR cleanup on error flow of driver init
+
+From: Maor Gottlieb <maorg@nvidia.com>
+
+[ Upstream commit 9b7d4be967f16f79a2283b2338709fcc750313ee ]
+
+The cited commit removed from the cleanup flow of umr the checks
+if the resources were created. This could lead to null-ptr-deref
+in case that we had failure in mlx5_ib_stage_ib_reg_init stage.
+
+Fix it by adding new state to the umr that can say if the resources
+were created or not and check it in the umr cleanup flow before
+destroying the resources.
+
+Fixes: 04876c12c19e ("RDMA/mlx5: Move init and cleanup of UMR to umr.c")
+Reviewed-by: Michael Guralnik <michaelgur@nvidia.com>
+Signed-off-by: Maor Gottlieb <maorg@nvidia.com>
+Link: https://lore.kernel.org/r/4cfa61386cf202e9ce330e8d228ce3b25a36326e.1661763459.git.leonro@nvidia.com
+Signed-off-by: Leon Romanovsky <leon@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/infiniband/hw/mlx5/mlx5_ib.h | 1 +
+ drivers/infiniband/hw/mlx5/umr.c     | 3 +++
+ 2 files changed, 4 insertions(+)
+
+diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
+index 7460e0dfe6db4..c2cca032a6ed4 100644
+--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
++++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
+@@ -718,6 +718,7 @@ struct mlx5_ib_umr_context {
+ };
+ 
+ enum {
++      MLX5_UMR_STATE_UNINIT,
+       MLX5_UMR_STATE_ACTIVE,
+       MLX5_UMR_STATE_RECOVER,
+       MLX5_UMR_STATE_ERR,
+diff --git a/drivers/infiniband/hw/mlx5/umr.c b/drivers/infiniband/hw/mlx5/umr.c
+index e00b94d1b1ea1..d5105b5c9979b 100644
+--- a/drivers/infiniband/hw/mlx5/umr.c
++++ b/drivers/infiniband/hw/mlx5/umr.c
+@@ -177,6 +177,7 @@ int mlx5r_umr_resource_init(struct mlx5_ib_dev *dev)
+ 
+       sema_init(&dev->umrc.sem, MAX_UMR_WR);
+       mutex_init(&dev->umrc.lock);
++      dev->umrc.state = MLX5_UMR_STATE_ACTIVE;
+ 
+       return 0;
+ 
+@@ -191,6 +192,8 @@ int mlx5r_umr_resource_init(struct mlx5_ib_dev *dev)
+ 
+ void mlx5r_umr_resource_cleanup(struct mlx5_ib_dev *dev)
+ {
++      if (dev->umrc.state == MLX5_UMR_STATE_UNINIT)
++              return;
+       ib_destroy_qp(dev->umrc.qp);
+       ib_free_cq(dev->umrc.cq);
+       ib_dealloc_pd(dev->umrc.pd);
+-- 
+2.35.1
+
diff --git a/queue-5.19/rdma-mlx5-rely-on-roce-fw-cap-instead-of-devlink-whe.patch b/queue-5.19/rdma-mlx5-rely-on-roce-fw-cap-instead-of-devlink-whe.patch

new file mode 100644 (file)

index 0000000..e06036c
--- /dev/null
+++ b/queue-5.19/rdma-mlx5-rely-on-roce-fw-cap-instead-of-devlink-whe.patch
@@ -0,0 +1,134 @@
+From 9954f3b09f233f60ce5ad618a101ab2f046b87a9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 29 Aug 2022 12:02:27 +0300
+Subject: RDMA/mlx5: Rely on RoCE fw cap instead of devlink when setting
+ profile
+
+From: Maher Sanalla <msanalla@nvidia.com>
+
+[ Upstream commit 9ca05b0f27de928be121cccf07735819dc9e1ed3 ]
+
+When the RDMA auxiliary driver probes, it sets its profile based on
+devlink driverinit value. The latter might not be in sync with FW yet
+(In case devlink reload is not performed), thus causing a mismatch
+between RDMA driver and FW. This results in the following FW syndrome
+when the RDMA driver tries to adjust RoCE state, which fails the probe:
+
+"0xC1F678 | modify_nic_vport_context: roce_en set on a vport that
+doesn't support roce"
+
+To prevent this, select the PF profile based on FW RoCE capability
+instead of relying on devlink driverinit value.
+To provide backward compatibility of the RoCE disable feature, on older
+FW's where roce_rw is not set (FW RoCE capability is read-only), keep
+the current behavior e.g., rely on devlink driverinit value.
+
+Fixes: fbfa97b4d79f ("net/mlx5: Disable roce at HCA level")
+Reviewed-by: Shay Drory <shayd@nvidia.com>
+Reviewed-by: Michael Guralnik <michaelgur@nvidia.com>
+Reviewed-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Maher Sanalla <msanalla@nvidia.com>
+Link: https://lore.kernel.org/r/cb34ce9a1df4a24c135cb804db87f7d2418bd6cc.1661763459.git.leonro@nvidia.com
+Signed-off-by: Leon Romanovsky <leon@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/infiniband/hw/mlx5/main.c             |  2 +-
+ .../net/ethernet/mellanox/mlx5/core/main.c    | 23 +++++++++++++++++--
+ include/linux/mlx5/driver.h                   | 19 +++++++--------
+ 3 files changed, 32 insertions(+), 12 deletions(-)
+
+diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
+index 63c89a72cc352..bb13164124fdb 100644
+--- a/drivers/infiniband/hw/mlx5/main.c
++++ b/drivers/infiniband/hw/mlx5/main.c
+@@ -4336,7 +4336,7 @@ static int mlx5r_probe(struct auxiliary_device *adev,
+       dev->mdev = mdev;
+       dev->num_ports = num_ports;
+ 
+-      if (ll == IB_LINK_LAYER_ETHERNET && !mlx5_is_roce_init_enabled(mdev))
++      if (ll == IB_LINK_LAYER_ETHERNET && !mlx5_get_roce_state(mdev))
+               profile = &raw_eth_profile;
+       else
+               profile = &pf_profile;
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
+index 64d54bba91f69..6c8bb74bd8fc6 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
+@@ -501,6 +501,24 @@ static int max_uc_list_get_devlink_param(struct mlx5_core_dev *dev)
+       return err;
+ }
+ 
++bool mlx5_is_roce_on(struct mlx5_core_dev *dev)
++{
++      struct devlink *devlink = priv_to_devlink(dev);
++      union devlink_param_value val;
++      int err;
++
++      err = devlink_param_driverinit_value_get(devlink,
++                                               DEVLINK_PARAM_GENERIC_ID_ENABLE_ROCE,
++                                               &val);
++
++      if (!err)
++              return val.vbool;
++
++      mlx5_core_dbg(dev, "Failed to get param. err = %d\n", err);
++      return MLX5_CAP_GEN(dev, roce);
++}
++EXPORT_SYMBOL(mlx5_is_roce_on);
++
+ static int handle_hca_cap_2(struct mlx5_core_dev *dev, void *set_ctx)
+ {
+       void *set_hca_cap;
+@@ -604,7 +622,8 @@ static int handle_hca_cap(struct mlx5_core_dev *dev, void *set_ctx)
+                        MLX5_CAP_GEN_MAX(dev, num_total_dynamic_vf_msix));
+ 
+       if (MLX5_CAP_GEN(dev, roce_rw_supported))
+-              MLX5_SET(cmd_hca_cap, set_hca_cap, roce, mlx5_is_roce_init_enabled(dev));
++              MLX5_SET(cmd_hca_cap, set_hca_cap, roce,
++                       mlx5_is_roce_on(dev));
+ 
+       max_uc_list = max_uc_list_get_devlink_param(dev);
+       if (max_uc_list > 0)
+@@ -630,7 +649,7 @@ static int handle_hca_cap(struct mlx5_core_dev *dev, void *set_ctx)
+  */
+ static bool is_roce_fw_disabled(struct mlx5_core_dev *dev)
+ {
+-      return (MLX5_CAP_GEN(dev, roce_rw_supported) && !mlx5_is_roce_init_enabled(dev)) ||
++      return (MLX5_CAP_GEN(dev, roce_rw_supported) && !mlx5_is_roce_on(dev)) ||
+               (!MLX5_CAP_GEN(dev, roce_rw_supported) && !MLX5_CAP_GEN(dev, roce));
+ }
+ 
+diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
+index 0015a08ddbd24..b3ea245faa515 100644
+--- a/include/linux/mlx5/driver.h
++++ b/include/linux/mlx5/driver.h
+@@ -1275,16 +1275,17 @@ enum {
+       MLX5_TRIGGERED_CMD_COMP = (u64)1 << 32,
+ };
+ 
+-static inline bool mlx5_is_roce_init_enabled(struct mlx5_core_dev *dev)
++bool mlx5_is_roce_on(struct mlx5_core_dev *dev);
++
++static inline bool mlx5_get_roce_state(struct mlx5_core_dev *dev)
+ {
+-      struct devlink *devlink = priv_to_devlink(dev);
+-      union devlink_param_value val;
+-      int err;
+-
+-      err = devlink_param_driverinit_value_get(devlink,
+-                                               DEVLINK_PARAM_GENERIC_ID_ENABLE_ROCE,
+-                                               &val);
+-      return err ? MLX5_CAP_GEN(dev, roce) : val.vbool;
++      if (MLX5_CAP_GEN(dev, roce_rw_supported))
++              return MLX5_CAP_GEN(dev, roce);
++
++      /* If RoCE cap is read-only in FW, get RoCE state from devlink
++       * in order to support RoCE enable/disable feature
++       */
++      return mlx5_is_roce_on(dev);
+ }
+ 
+ #endif /* MLX5_DRIVER_H */
+-- 
+2.35.1
+
diff --git a/queue-5.19/series b/queue-5.19/series

new file mode 100644 (file)

index 0000000..b14326b
--- /dev/null
+++ b/queue-5.19/series
@@ -0,0 +1,6 @@
+iommu-vt-d-fix-kdump-kernels-boot-failure-with-scala.patch
+net-mlx5-introduce-ifc-bits-for-using-software-vhca-.patch
+net-mlx5-use-software-vhca-id-when-it-s-supported.patch
+rdma-mlx5-rely-on-roce-fw-cap-instead-of-devlink-whe.patch
+rdma-mlx5-add-a-umr-recovery-flow.patch
+rdma-mlx5-fix-umr-cleanup-on-error-flow-of-driver-in.patch
author	Sasha Levin <sashal@kernel.org>
	Thu, 15 Sep 2022 10:59:20 +0000 (06:59 -0400)
committer	Sasha Levin <sashal@kernel.org>
	Thu, 15 Sep 2022 10:59:20 +0000 (06:59 -0400)
queue-5.19/iommu-vt-d-fix-kdump-kernels-boot-failure-with-scala.patch	[new file with mode: 0644]	patch \| blob
queue-5.19/net-mlx5-introduce-ifc-bits-for-using-software-vhca-.patch	[new file with mode: 0644]	patch \| blob
queue-5.19/net-mlx5-use-software-vhca-id-when-it-s-supported.patch	[new file with mode: 0644]	patch \| blob
queue-5.19/rdma-mlx5-add-a-umr-recovery-flow.patch	[new file with mode: 0644]	patch \| blob
queue-5.19/rdma-mlx5-fix-umr-cleanup-on-error-flow-of-driver-in.patch	[new file with mode: 0644]	patch \| blob
queue-5.19/rdma-mlx5-rely-on-roce-fw-cap-instead-of-devlink-whe.patch	[new file with mode: 0644]	patch \| blob
queue-5.19/series	[new file with mode: 0644]	patch \| blob