From: Sasha Levin Date: Thu, 15 Sep 2022 10:59:20 +0000 (-0400) Subject: Fixes for 5.19 X-Git-Tag: v4.9.329~42 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=02a7c169662e56f8e85c5bfcdff32ea61cd44b23;p=thirdparty%2Fkernel%2Fstable-queue.git Fixes for 5.19 Signed-off-by: Sasha Levin --- diff --git a/queue-5.19/iommu-vt-d-fix-kdump-kernels-boot-failure-with-scala.patch b/queue-5.19/iommu-vt-d-fix-kdump-kernels-boot-failure-with-scala.patch new file mode 100644 index 00000000000..4e27c2f6158 --- /dev/null +++ b/queue-5.19/iommu-vt-d-fix-kdump-kernels-boot-failure-with-scala.patch @@ -0,0 +1,270 @@ +From be46e14e1c3651f3c002b34e47ddb0cf0a1d35c7 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 23 Aug 2022 14:15:54 +0800 +Subject: iommu/vt-d: Fix kdump kernels boot failure with scalable mode + +From: Lu Baolu + +[ Upstream commit 0c5f6c0d8201a809a6585b07b6263e9db2c874a3 ] + +The translation table copying code for kdump kernels is currently based +on the extended root/context entry formats of ECS mode defined in older +VT-d v2.5, and doesn't handle the scalable mode formats. This causes +the kexec capture kernel boot failure with DMAR faults if the IOMMU was +enabled in scalable mode by the previous kernel. + +The ECS mode has already been deprecated by the VT-d spec since v3.0 and +Intel IOMMU driver doesn't support this mode as there's no real hardware +implementation. Hence this converts ECS checking in copying table code +into scalable mode. + +The existing copying code consumes a bit in the context entry as a mark +of copied entry. It needs to work for the old format as well as for the +extended context entries. As it's hard to find such a common bit for both +legacy and scalable mode context entries. This replaces it with a per- +IOMMU bitmap. + +Fixes: 7373a8cc38197 ("iommu/vt-d: Setup context and enable RID2PASID support") +Cc: stable@vger.kernel.org +Reported-by: Jerry Snitselaar +Tested-by: Wen Jin +Signed-off-by: Lu Baolu +Link: https://lore.kernel.org/r/20220817011035.3250131-1-baolu.lu@linux.intel.com +Signed-off-by: Joerg Roedel +Signed-off-by: Sasha Levin +--- + drivers/iommu/intel/iommu.c | 100 ++++++++++++++++-------------------- + include/linux/intel-iommu.h | 9 ++-- + 2 files changed, 50 insertions(+), 59 deletions(-) + +diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c +index 40ac3a78d90ef..c0464959cbcdb 100644 +--- a/drivers/iommu/intel/iommu.c ++++ b/drivers/iommu/intel/iommu.c +@@ -168,38 +168,6 @@ static phys_addr_t root_entry_uctp(struct root_entry *re) + return re->hi & VTD_PAGE_MASK; + } + +-static inline void context_clear_pasid_enable(struct context_entry *context) +-{ +- context->lo &= ~(1ULL << 11); +-} +- +-static inline bool context_pasid_enabled(struct context_entry *context) +-{ +- return !!(context->lo & (1ULL << 11)); +-} +- +-static inline void context_set_copied(struct context_entry *context) +-{ +- context->hi |= (1ull << 3); +-} +- +-static inline bool context_copied(struct context_entry *context) +-{ +- return !!(context->hi & (1ULL << 3)); +-} +- +-static inline bool __context_present(struct context_entry *context) +-{ +- return (context->lo & 1); +-} +- +-bool context_present(struct context_entry *context) +-{ +- return context_pasid_enabled(context) ? +- __context_present(context) : +- __context_present(context) && !context_copied(context); +-} +- + static inline void context_set_present(struct context_entry *context) + { + context->lo |= 1; +@@ -247,6 +215,26 @@ static inline void context_clear_entry(struct context_entry *context) + context->hi = 0; + } + ++static inline bool context_copied(struct intel_iommu *iommu, u8 bus, u8 devfn) ++{ ++ if (!iommu->copied_tables) ++ return false; ++ ++ return test_bit(((long)bus << 8) | devfn, iommu->copied_tables); ++} ++ ++static inline void ++set_context_copied(struct intel_iommu *iommu, u8 bus, u8 devfn) ++{ ++ set_bit(((long)bus << 8) | devfn, iommu->copied_tables); ++} ++ ++static inline void ++clear_context_copied(struct intel_iommu *iommu, u8 bus, u8 devfn) ++{ ++ clear_bit(((long)bus << 8) | devfn, iommu->copied_tables); ++} ++ + /* + * This domain is a statically identity mapping domain. + * 1. This domain creats a static 1:1 mapping to all usable memory. +@@ -644,6 +632,13 @@ struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus, + struct context_entry *context; + u64 *entry; + ++ /* ++ * Except that the caller requested to allocate a new entry, ++ * returning a copied context entry makes no sense. ++ */ ++ if (!alloc && context_copied(iommu, bus, devfn)) ++ return NULL; ++ + entry = &root->lo; + if (sm_supported(iommu)) { + if (devfn >= 0x80) { +@@ -1770,6 +1765,11 @@ static void free_dmar_iommu(struct intel_iommu *iommu) + iommu->domain_ids = NULL; + } + ++ if (iommu->copied_tables) { ++ bitmap_free(iommu->copied_tables); ++ iommu->copied_tables = NULL; ++ } ++ + g_iommus[iommu->seq_id] = NULL; + + /* free context mapping */ +@@ -1978,7 +1978,7 @@ static int domain_context_mapping_one(struct dmar_domain *domain, + goto out_unlock; + + ret = 0; +- if (context_present(context)) ++ if (context_present(context) && !context_copied(iommu, bus, devfn)) + goto out_unlock; + + /* +@@ -1990,7 +1990,7 @@ static int domain_context_mapping_one(struct dmar_domain *domain, + * in-flight DMA will exist, and we don't need to worry anymore + * hereafter. + */ +- if (context_copied(context)) { ++ if (context_copied(iommu, bus, devfn)) { + u16 did_old = context_domain_id(context); + + if (did_old < cap_ndoms(iommu->cap)) { +@@ -2001,6 +2001,8 @@ static int domain_context_mapping_one(struct dmar_domain *domain, + iommu->flush.flush_iotlb(iommu, did_old, 0, 0, + DMA_TLB_DSI_FLUSH); + } ++ ++ clear_context_copied(iommu, bus, devfn); + } + + context_clear_entry(context); +@@ -2783,32 +2785,14 @@ static int copy_context_table(struct intel_iommu *iommu, + /* Now copy the context entry */ + memcpy(&ce, old_ce + idx, sizeof(ce)); + +- if (!__context_present(&ce)) ++ if (!context_present(&ce)) + continue; + + did = context_domain_id(&ce); + if (did >= 0 && did < cap_ndoms(iommu->cap)) + set_bit(did, iommu->domain_ids); + +- /* +- * We need a marker for copied context entries. This +- * marker needs to work for the old format as well as +- * for extended context entries. +- * +- * Bit 67 of the context entry is used. In the old +- * format this bit is available to software, in the +- * extended format it is the PGE bit, but PGE is ignored +- * by HW if PASIDs are disabled (and thus still +- * available). +- * +- * So disable PASIDs first and then mark the entry +- * copied. This means that we don't copy PASID +- * translations from the old kernel, but this is fine as +- * faults there are not fatal. +- */ +- context_clear_pasid_enable(&ce); +- context_set_copied(&ce); +- ++ set_context_copied(iommu, bus, devfn); + new_ce[idx] = ce; + } + +@@ -2835,8 +2819,8 @@ static int copy_translation_tables(struct intel_iommu *iommu) + bool new_ext, ext; + + rtaddr_reg = dmar_readq(iommu->reg + DMAR_RTADDR_REG); +- ext = !!(rtaddr_reg & DMA_RTADDR_RTT); +- new_ext = !!ecap_ecs(iommu->ecap); ++ ext = !!(rtaddr_reg & DMA_RTADDR_SMT); ++ new_ext = !!sm_supported(iommu); + + /* + * The RTT bit can only be changed when translation is disabled, +@@ -2847,6 +2831,10 @@ static int copy_translation_tables(struct intel_iommu *iommu) + if (new_ext != ext) + return -EINVAL; + ++ iommu->copied_tables = bitmap_zalloc(BIT_ULL(16), GFP_KERNEL); ++ if (!iommu->copied_tables) ++ return -ENOMEM; ++ + old_rt_phys = rtaddr_reg & VTD_PAGE_MASK; + if (!old_rt_phys) + return -EINVAL; +diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h +index 5fcf89faa31ab..d72626d71258f 100644 +--- a/include/linux/intel-iommu.h ++++ b/include/linux/intel-iommu.h +@@ -196,7 +196,6 @@ + #define ecap_dis(e) (((e) >> 27) & 0x1) + #define ecap_nest(e) (((e) >> 26) & 0x1) + #define ecap_mts(e) (((e) >> 25) & 0x1) +-#define ecap_ecs(e) (((e) >> 24) & 0x1) + #define ecap_iotlb_offset(e) ((((e) >> 8) & 0x3ff) * 16) + #define ecap_max_iotlb_offset(e) (ecap_iotlb_offset(e) + 16) + #define ecap_coherent(e) ((e) & 0x1) +@@ -264,7 +263,6 @@ + #define DMA_GSTS_CFIS (((u32)1) << 23) + + /* DMA_RTADDR_REG */ +-#define DMA_RTADDR_RTT (((u64)1) << 11) + #define DMA_RTADDR_SMT (((u64)1) << 10) + + /* CCMD_REG */ +@@ -579,6 +577,7 @@ struct intel_iommu { + + #ifdef CONFIG_INTEL_IOMMU + unsigned long *domain_ids; /* bitmap of domains */ ++ unsigned long *copied_tables; /* bitmap of copied tables */ + spinlock_t lock; /* protect context, domain ids */ + struct root_entry *root_entry; /* virtual address */ + +@@ -692,6 +691,11 @@ static inline int nr_pte_to_next_page(struct dma_pte *pte) + (struct dma_pte *)ALIGN((unsigned long)pte, VTD_PAGE_SIZE) - pte; + } + ++static inline bool context_present(struct context_entry *context) ++{ ++ return (context->lo & 1); ++} ++ + extern struct dmar_drhd_unit * dmar_find_matched_drhd_unit(struct pci_dev *dev); + + extern int dmar_enable_qi(struct intel_iommu *iommu); +@@ -776,7 +780,6 @@ static inline void intel_iommu_debugfs_init(void) {} + #endif /* CONFIG_INTEL_IOMMU_DEBUGFS */ + + extern const struct attribute_group *intel_iommu_groups[]; +-bool context_present(struct context_entry *context); + struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus, + u8 devfn, int alloc); + +-- +2.35.1 + diff --git a/queue-5.19/net-mlx5-introduce-ifc-bits-for-using-software-vhca-.patch b/queue-5.19/net-mlx5-introduce-ifc-bits-for-using-software-vhca-.patch new file mode 100644 index 00000000000..887411fc762 --- /dev/null +++ b/queue-5.19/net-mlx5-introduce-ifc-bits-for-using-software-vhca-.patch @@ -0,0 +1,81 @@ +From dfd456f054e8df529005b7ee9b68fa3ac0d9a5cb Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 2 Jun 2022 12:41:00 +0300 +Subject: net/mlx5: Introduce ifc bits for using software vhca id + +From: Yishai Hadas + +[ Upstream commit 0372c546eca575445331c0ad8902210b70be6d61 ] + +Introduce ifc related stuff to enable using software vhca id +functionality. + +Signed-off-by: Yishai Hadas +Reviewed-by: Mark Bloch +Signed-off-by: Saeed Mahameed +Stable-dep-of: 9ca05b0f27de ("RDMA/mlx5: Rely on RoCE fw cap instead of devlink when setting profile") +Signed-off-by: Sasha Levin +--- + include/linux/mlx5/mlx5_ifc.h | 25 +++++++++++++++++++++---- + 1 file changed, 21 insertions(+), 4 deletions(-) + +diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h +index fd7d083a34d33..6d57e5ec9718d 100644 +--- a/include/linux/mlx5/mlx5_ifc.h ++++ b/include/linux/mlx5/mlx5_ifc.h +@@ -1804,7 +1804,14 @@ struct mlx5_ifc_cmd_hca_cap_2_bits { + u8 max_reformat_remove_size[0x8]; + u8 max_reformat_remove_offset[0x8]; + +- u8 reserved_at_c0[0x740]; ++ u8 reserved_at_c0[0x160]; ++ ++ u8 reserved_at_220[0x1]; ++ u8 sw_vhca_id_valid[0x1]; ++ u8 sw_vhca_id[0xe]; ++ u8 reserved_at_230[0x10]; ++ ++ u8 reserved_at_240[0x5c0]; + }; + + enum mlx5_ifc_flow_destination_type { +@@ -3715,6 +3722,11 @@ struct mlx5_ifc_rmpc_bits { + struct mlx5_ifc_wq_bits wq; + }; + ++enum { ++ VHCA_ID_TYPE_HW = 0, ++ VHCA_ID_TYPE_SW = 1, ++}; ++ + struct mlx5_ifc_nic_vport_context_bits { + u8 reserved_at_0[0x5]; + u8 min_wqe_inline_mode[0x3]; +@@ -3731,8 +3743,8 @@ struct mlx5_ifc_nic_vport_context_bits { + u8 event_on_mc_address_change[0x1]; + u8 event_on_uc_address_change[0x1]; + +- u8 reserved_at_40[0xc]; +- ++ u8 vhca_id_type[0x1]; ++ u8 reserved_at_41[0xb]; + u8 affiliation_criteria[0x4]; + u8 affiliated_vhca_id[0x10]; + +@@ -7189,7 +7201,12 @@ struct mlx5_ifc_init_hca_in_bits { + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + +- u8 reserved_at_40[0x40]; ++ u8 reserved_at_40[0x20]; ++ ++ u8 reserved_at_60[0x2]; ++ u8 sw_vhca_id[0xe]; ++ u8 reserved_at_70[0x10]; ++ + u8 sw_owner_id[4][0x20]; + }; + +-- +2.35.1 + diff --git a/queue-5.19/net-mlx5-use-software-vhca-id-when-it-s-supported.patch b/queue-5.19/net-mlx5-use-software-vhca-id-when-it-s-supported.patch new file mode 100644 index 00000000000..8212180a8c0 --- /dev/null +++ b/queue-5.19/net-mlx5-use-software-vhca-id-when-it-s-supported.patch @@ -0,0 +1,179 @@ +From 0389847c9b08301f63601d5d90d814f4faa5df52 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 2 Jun 2022 12:47:34 +0300 +Subject: net/mlx5: Use software VHCA id when it's supported + +From: Yishai Hadas + +[ Upstream commit dc402ccc0d7b55922a79505df3000da7deb77a2b ] + +Use software VHCA id when it's supported by the firmware. + +A unique id is allocated upon mlx5_mdev_init() and freed upon +mlx5_mdev_uninit(), as such it stays the same during the full life cycle +of the device including upon health recovery if occurred. + +The conjunction of sw_vhca_id with sw_owner_id will be a global unique +id per function which uses mlx5_core. + +The sw_vhca_id is set upon init_hca command and is used to specify the +VHCA that the NIC vport is affiliated with. + +This functionality is needed upon migration of VM which is MPV based. +(i.e. multi port device). + +Signed-off-by: Yishai Hadas +Reviewed-by: Mark Bloch +Signed-off-by: Saeed Mahameed +Stable-dep-of: 9ca05b0f27de ("RDMA/mlx5: Rely on RoCE fw cap instead of devlink when setting profile") +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/mellanox/mlx5/core/fw.c | 4 ++ + .../net/ethernet/mellanox/mlx5/core/main.c | 49 +++++++++++++++++++ + .../net/ethernet/mellanox/mlx5/core/vport.c | 14 ++++-- + include/linux/mlx5/driver.h | 1 + + 4 files changed, 65 insertions(+), 3 deletions(-) + +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c +index cfb8bedba5124..079fa44ada71e 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c +@@ -289,6 +289,10 @@ int mlx5_cmd_init_hca(struct mlx5_core_dev *dev, uint32_t *sw_owner_id) + sw_owner_id[i]); + } + ++ if (MLX5_CAP_GEN_2_MAX(dev, sw_vhca_id_valid) && ++ dev->priv.sw_vhca_id > 0) ++ MLX5_SET(init_hca_in, in, sw_vhca_id, dev->priv.sw_vhca_id); ++ + return mlx5_cmd_exec_in(dev, init_hca, in); + } + +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c +index 616207c3b187a..64d54bba91f69 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c +@@ -90,6 +90,8 @@ module_param_named(prof_sel, prof_sel, uint, 0444); + MODULE_PARM_DESC(prof_sel, "profile selector. Valid range 0 - 2"); + + static u32 sw_owner_id[4]; ++#define MAX_SW_VHCA_ID (BIT(__mlx5_bit_sz(cmd_hca_cap_2, sw_vhca_id)) - 1) ++static DEFINE_IDA(sw_vhca_ida); + + enum { + MLX5_ATOMIC_REQ_MODE_BE = 0x0, +@@ -499,6 +501,31 @@ static int max_uc_list_get_devlink_param(struct mlx5_core_dev *dev) + return err; + } + ++static int handle_hca_cap_2(struct mlx5_core_dev *dev, void *set_ctx) ++{ ++ void *set_hca_cap; ++ int err; ++ ++ if (!MLX5_CAP_GEN_MAX(dev, hca_cap_2)) ++ return 0; ++ ++ err = mlx5_core_get_caps(dev, MLX5_CAP_GENERAL_2); ++ if (err) ++ return err; ++ ++ if (!MLX5_CAP_GEN_2_MAX(dev, sw_vhca_id_valid) || ++ !(dev->priv.sw_vhca_id > 0)) ++ return 0; ++ ++ set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx, ++ capability); ++ memcpy(set_hca_cap, dev->caps.hca[MLX5_CAP_GENERAL_2]->cur, ++ MLX5_ST_SZ_BYTES(cmd_hca_cap_2)); ++ MLX5_SET(cmd_hca_cap_2, set_hca_cap, sw_vhca_id_valid, 1); ++ ++ return set_caps(dev, set_ctx, MLX5_CAP_GENERAL_2); ++} ++ + static int handle_hca_cap(struct mlx5_core_dev *dev, void *set_ctx) + { + struct mlx5_profile *prof = &dev->profile; +@@ -669,6 +696,13 @@ static int set_hca_cap(struct mlx5_core_dev *dev) + goto out; + } + ++ memset(set_ctx, 0, set_sz); ++ err = handle_hca_cap_2(dev, set_ctx); ++ if (err) { ++ mlx5_core_err(dev, "handle_hca_cap_2 failed\n"); ++ goto out; ++ } ++ + out: + kfree(set_ctx); + return err; +@@ -1512,6 +1546,18 @@ int mlx5_mdev_init(struct mlx5_core_dev *dev, int profile_idx) + if (err) + goto err_hca_caps; + ++ /* The conjunction of sw_vhca_id with sw_owner_id will be a global ++ * unique id per function which uses mlx5_core. ++ * Those values are supplied to FW as part of the init HCA command to ++ * be used by both driver and FW when it's applicable. ++ */ ++ dev->priv.sw_vhca_id = ida_alloc_range(&sw_vhca_ida, 1, ++ MAX_SW_VHCA_ID, ++ GFP_KERNEL); ++ if (dev->priv.sw_vhca_id < 0) ++ mlx5_core_err(dev, "failed to allocate sw_vhca_id, err=%d\n", ++ dev->priv.sw_vhca_id); ++ + return 0; + + err_hca_caps: +@@ -1537,6 +1583,9 @@ void mlx5_mdev_uninit(struct mlx5_core_dev *dev) + { + struct mlx5_priv *priv = &dev->priv; + ++ if (priv->sw_vhca_id > 0) ++ ida_free(&sw_vhca_ida, dev->priv.sw_vhca_id); ++ + mlx5_hca_caps_free(dev); + mlx5_adev_cleanup(dev); + mlx5_pagealloc_cleanup(dev); +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c +index ac020cb780727..d5c3173250309 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c +@@ -1086,9 +1086,17 @@ int mlx5_nic_vport_affiliate_multiport(struct mlx5_core_dev *master_mdev, + goto free; + + MLX5_SET(modify_nic_vport_context_in, in, field_select.affiliation, 1); +- MLX5_SET(modify_nic_vport_context_in, in, +- nic_vport_context.affiliated_vhca_id, +- MLX5_CAP_GEN(master_mdev, vhca_id)); ++ if (MLX5_CAP_GEN_2(master_mdev, sw_vhca_id_valid)) { ++ MLX5_SET(modify_nic_vport_context_in, in, ++ nic_vport_context.vhca_id_type, VHCA_ID_TYPE_SW); ++ MLX5_SET(modify_nic_vport_context_in, in, ++ nic_vport_context.affiliated_vhca_id, ++ MLX5_CAP_GEN_2(master_mdev, sw_vhca_id)); ++ } else { ++ MLX5_SET(modify_nic_vport_context_in, in, ++ nic_vport_context.affiliated_vhca_id, ++ MLX5_CAP_GEN(master_mdev, vhca_id)); ++ } + MLX5_SET(modify_nic_vport_context_in, in, + nic_vport_context.affiliation_criteria, + MLX5_CAP_GEN(port_mdev, affiliate_nic_vport_criteria)); +diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h +index b0b4ac92354a2..0015a08ddbd24 100644 +--- a/include/linux/mlx5/driver.h ++++ b/include/linux/mlx5/driver.h +@@ -606,6 +606,7 @@ struct mlx5_priv { + spinlock_t ctx_lock; + struct mlx5_adev **adev; + int adev_idx; ++ int sw_vhca_id; + struct mlx5_events *events; + + struct mlx5_flow_steering *steering; +-- +2.35.1 + diff --git a/queue-5.19/rdma-mlx5-add-a-umr-recovery-flow.patch b/queue-5.19/rdma-mlx5-add-a-umr-recovery-flow.patch new file mode 100644 index 00000000000..9c2d8ce52a3 --- /dev/null +++ b/queue-5.19/rdma-mlx5-add-a-umr-recovery-flow.patch @@ -0,0 +1,185 @@ +From f72f1848bfdd0f07c7a648c8be04c43eb8a38d5b Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 15 May 2022 07:19:53 +0300 +Subject: RDMA/mlx5: Add a umr recovery flow + +From: Aharon Landau + +[ Upstream commit 158e71bb69e368b8b33e8b7c4ac8c111da0c1ae2 ] + +When a UMR fails, the UMR QP state changes to an error state. Therefore, +all the further UMR operations will fail too. + +Add a recovery flow to the UMR QP, and repost the flushed WQEs. + +Link: https://lore.kernel.org/r/6cc24816cca049bd8541317f5e41d3ac659445d3.1652588303.git.leonro@nvidia.com +Signed-off-by: Aharon Landau +Reviewed-by: Michael Guralnik +Signed-off-by: Leon Romanovsky +Stable-dep-of: 9b7d4be967f1 ("RDMA/mlx5: Fix UMR cleanup on error flow of driver init") +Signed-off-by: Sasha Levin +--- + drivers/infiniband/hw/mlx5/cq.c | 4 ++ + drivers/infiniband/hw/mlx5/mlx5_ib.h | 12 ++++- + drivers/infiniband/hw/mlx5/umr.c | 78 ++++++++++++++++++++++++---- + 3 files changed, 83 insertions(+), 11 deletions(-) + +diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c +index 08371a80fdc26..be189e0525de6 100644 +--- a/drivers/infiniband/hw/mlx5/cq.c ++++ b/drivers/infiniband/hw/mlx5/cq.c +@@ -523,6 +523,10 @@ static int mlx5_poll_one(struct mlx5_ib_cq *cq, + "Requestor" : "Responder", cq->mcq.cqn); + mlx5_ib_dbg(dev, "syndrome 0x%x, vendor syndrome 0x%x\n", + err_cqe->syndrome, err_cqe->vendor_err_synd); ++ if (wc->status != IB_WC_WR_FLUSH_ERR && ++ (*cur_qp)->type == MLX5_IB_QPT_REG_UMR) ++ dev->umrc.state = MLX5_UMR_STATE_RECOVER; ++ + if (opcode == MLX5_CQE_REQ_ERR) { + wq = &(*cur_qp)->sq; + wqe_ctr = be16_to_cpu(cqe64->wqe_counter); +diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h +index 998b67509a533..7460e0dfe6db4 100644 +--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h ++++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h +@@ -717,13 +717,23 @@ struct mlx5_ib_umr_context { + struct completion done; + }; + ++enum { ++ MLX5_UMR_STATE_ACTIVE, ++ MLX5_UMR_STATE_RECOVER, ++ MLX5_UMR_STATE_ERR, ++}; ++ + struct umr_common { + struct ib_pd *pd; + struct ib_cq *cq; + struct ib_qp *qp; +- /* control access to UMR QP ++ /* Protects from UMR QP overflow + */ + struct semaphore sem; ++ /* Protects from using UMR while the UMR is not active ++ */ ++ struct mutex lock; ++ unsigned int state; + }; + + struct mlx5_cache_ent { +diff --git a/drivers/infiniband/hw/mlx5/umr.c b/drivers/infiniband/hw/mlx5/umr.c +index 3a48364c09181..e00b94d1b1ea1 100644 +--- a/drivers/infiniband/hw/mlx5/umr.c ++++ b/drivers/infiniband/hw/mlx5/umr.c +@@ -176,6 +176,7 @@ int mlx5r_umr_resource_init(struct mlx5_ib_dev *dev) + dev->umrc.pd = pd; + + sema_init(&dev->umrc.sem, MAX_UMR_WR); ++ mutex_init(&dev->umrc.lock); + + return 0; + +@@ -195,6 +196,31 @@ void mlx5r_umr_resource_cleanup(struct mlx5_ib_dev *dev) + ib_dealloc_pd(dev->umrc.pd); + } + ++static int mlx5r_umr_recover(struct mlx5_ib_dev *dev) ++{ ++ struct umr_common *umrc = &dev->umrc; ++ struct ib_qp_attr attr; ++ int err; ++ ++ attr.qp_state = IB_QPS_RESET; ++ err = ib_modify_qp(umrc->qp, &attr, IB_QP_STATE); ++ if (err) { ++ mlx5_ib_dbg(dev, "Couldn't modify UMR QP\n"); ++ goto err; ++ } ++ ++ err = mlx5r_umr_qp_rst2rts(dev, umrc->qp); ++ if (err) ++ goto err; ++ ++ umrc->state = MLX5_UMR_STATE_ACTIVE; ++ return 0; ++ ++err: ++ umrc->state = MLX5_UMR_STATE_ERR; ++ return err; ++} ++ + static int mlx5r_umr_post_send(struct ib_qp *ibqp, u32 mkey, struct ib_cqe *cqe, + struct mlx5r_umr_wqe *wqe, bool with_data) + { +@@ -231,7 +257,7 @@ static int mlx5r_umr_post_send(struct ib_qp *ibqp, u32 mkey, struct ib_cqe *cqe, + + id.ib_cqe = cqe; + mlx5r_finish_wqe(qp, ctrl, seg, size, cur_edge, idx, id.wr_id, 0, +- MLX5_FENCE_MODE_NONE, MLX5_OPCODE_UMR); ++ MLX5_FENCE_MODE_INITIATOR_SMALL, MLX5_OPCODE_UMR); + + mlx5r_ring_db(qp, 1, ctrl); + +@@ -270,17 +296,49 @@ static int mlx5r_umr_post_send_wait(struct mlx5_ib_dev *dev, u32 mkey, + mlx5r_umr_init_context(&umr_context); + + down(&umrc->sem); +- err = mlx5r_umr_post_send(umrc->qp, mkey, &umr_context.cqe, wqe, +- with_data); +- if (err) +- mlx5_ib_warn(dev, "UMR post send failed, err %d\n", err); +- else { +- wait_for_completion(&umr_context.done); +- if (umr_context.status != IB_WC_SUCCESS) { +- mlx5_ib_warn(dev, "reg umr failed (%u)\n", +- umr_context.status); ++ while (true) { ++ mutex_lock(&umrc->lock); ++ if (umrc->state == MLX5_UMR_STATE_ERR) { ++ mutex_unlock(&umrc->lock); + err = -EFAULT; ++ break; ++ } ++ ++ if (umrc->state == MLX5_UMR_STATE_RECOVER) { ++ mutex_unlock(&umrc->lock); ++ usleep_range(3000, 5000); ++ continue; ++ } ++ ++ err = mlx5r_umr_post_send(umrc->qp, mkey, &umr_context.cqe, wqe, ++ with_data); ++ mutex_unlock(&umrc->lock); ++ if (err) { ++ mlx5_ib_warn(dev, "UMR post send failed, err %d\n", ++ err); ++ break; + } ++ ++ wait_for_completion(&umr_context.done); ++ ++ if (umr_context.status == IB_WC_SUCCESS) ++ break; ++ ++ if (umr_context.status == IB_WC_WR_FLUSH_ERR) ++ continue; ++ ++ WARN_ON_ONCE(1); ++ mlx5_ib_warn(dev, ++ "reg umr failed (%u). Trying to recover and resubmit the flushed WQEs\n", ++ umr_context.status); ++ mutex_lock(&umrc->lock); ++ err = mlx5r_umr_recover(dev); ++ mutex_unlock(&umrc->lock); ++ if (err) ++ mlx5_ib_warn(dev, "couldn't recover UMR, err %d\n", ++ err); ++ err = -EFAULT; ++ break; + } + up(&umrc->sem); + return err; +-- +2.35.1 + diff --git a/queue-5.19/rdma-mlx5-fix-umr-cleanup-on-error-flow-of-driver-in.patch b/queue-5.19/rdma-mlx5-fix-umr-cleanup-on-error-flow-of-driver-in.patch new file mode 100644 index 00000000000..69e0438d22c --- /dev/null +++ b/queue-5.19/rdma-mlx5-fix-umr-cleanup-on-error-flow-of-driver-in.patch @@ -0,0 +1,64 @@ +From 064b2940c803d63bf31e9693a6e4c4b45c3c8501 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 29 Aug 2022 12:02:29 +0300 +Subject: RDMA/mlx5: Fix UMR cleanup on error flow of driver init + +From: Maor Gottlieb + +[ Upstream commit 9b7d4be967f16f79a2283b2338709fcc750313ee ] + +The cited commit removed from the cleanup flow of umr the checks +if the resources were created. This could lead to null-ptr-deref +in case that we had failure in mlx5_ib_stage_ib_reg_init stage. + +Fix it by adding new state to the umr that can say if the resources +were created or not and check it in the umr cleanup flow before +destroying the resources. + +Fixes: 04876c12c19e ("RDMA/mlx5: Move init and cleanup of UMR to umr.c") +Reviewed-by: Michael Guralnik +Signed-off-by: Maor Gottlieb +Link: https://lore.kernel.org/r/4cfa61386cf202e9ce330e8d228ce3b25a36326e.1661763459.git.leonro@nvidia.com +Signed-off-by: Leon Romanovsky +Signed-off-by: Sasha Levin +--- + drivers/infiniband/hw/mlx5/mlx5_ib.h | 1 + + drivers/infiniband/hw/mlx5/umr.c | 3 +++ + 2 files changed, 4 insertions(+) + +diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h +index 7460e0dfe6db4..c2cca032a6ed4 100644 +--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h ++++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h +@@ -718,6 +718,7 @@ struct mlx5_ib_umr_context { + }; + + enum { ++ MLX5_UMR_STATE_UNINIT, + MLX5_UMR_STATE_ACTIVE, + MLX5_UMR_STATE_RECOVER, + MLX5_UMR_STATE_ERR, +diff --git a/drivers/infiniband/hw/mlx5/umr.c b/drivers/infiniband/hw/mlx5/umr.c +index e00b94d1b1ea1..d5105b5c9979b 100644 +--- a/drivers/infiniband/hw/mlx5/umr.c ++++ b/drivers/infiniband/hw/mlx5/umr.c +@@ -177,6 +177,7 @@ int mlx5r_umr_resource_init(struct mlx5_ib_dev *dev) + + sema_init(&dev->umrc.sem, MAX_UMR_WR); + mutex_init(&dev->umrc.lock); ++ dev->umrc.state = MLX5_UMR_STATE_ACTIVE; + + return 0; + +@@ -191,6 +192,8 @@ int mlx5r_umr_resource_init(struct mlx5_ib_dev *dev) + + void mlx5r_umr_resource_cleanup(struct mlx5_ib_dev *dev) + { ++ if (dev->umrc.state == MLX5_UMR_STATE_UNINIT) ++ return; + ib_destroy_qp(dev->umrc.qp); + ib_free_cq(dev->umrc.cq); + ib_dealloc_pd(dev->umrc.pd); +-- +2.35.1 + diff --git a/queue-5.19/rdma-mlx5-rely-on-roce-fw-cap-instead-of-devlink-whe.patch b/queue-5.19/rdma-mlx5-rely-on-roce-fw-cap-instead-of-devlink-whe.patch new file mode 100644 index 00000000000..e06036ceeb2 --- /dev/null +++ b/queue-5.19/rdma-mlx5-rely-on-roce-fw-cap-instead-of-devlink-whe.patch @@ -0,0 +1,134 @@ +From 9954f3b09f233f60ce5ad618a101ab2f046b87a9 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 29 Aug 2022 12:02:27 +0300 +Subject: RDMA/mlx5: Rely on RoCE fw cap instead of devlink when setting + profile + +From: Maher Sanalla + +[ Upstream commit 9ca05b0f27de928be121cccf07735819dc9e1ed3 ] + +When the RDMA auxiliary driver probes, it sets its profile based on +devlink driverinit value. The latter might not be in sync with FW yet +(In case devlink reload is not performed), thus causing a mismatch +between RDMA driver and FW. This results in the following FW syndrome +when the RDMA driver tries to adjust RoCE state, which fails the probe: + +"0xC1F678 | modify_nic_vport_context: roce_en set on a vport that +doesn't support roce" + +To prevent this, select the PF profile based on FW RoCE capability +instead of relying on devlink driverinit value. +To provide backward compatibility of the RoCE disable feature, on older +FW's where roce_rw is not set (FW RoCE capability is read-only), keep +the current behavior e.g., rely on devlink driverinit value. + +Fixes: fbfa97b4d79f ("net/mlx5: Disable roce at HCA level") +Reviewed-by: Shay Drory +Reviewed-by: Michael Guralnik +Reviewed-by: Saeed Mahameed +Signed-off-by: Maher Sanalla +Link: https://lore.kernel.org/r/cb34ce9a1df4a24c135cb804db87f7d2418bd6cc.1661763459.git.leonro@nvidia.com +Signed-off-by: Leon Romanovsky +Signed-off-by: Sasha Levin +--- + drivers/infiniband/hw/mlx5/main.c | 2 +- + .../net/ethernet/mellanox/mlx5/core/main.c | 23 +++++++++++++++++-- + include/linux/mlx5/driver.h | 19 +++++++-------- + 3 files changed, 32 insertions(+), 12 deletions(-) + +diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c +index 63c89a72cc352..bb13164124fdb 100644 +--- a/drivers/infiniband/hw/mlx5/main.c ++++ b/drivers/infiniband/hw/mlx5/main.c +@@ -4336,7 +4336,7 @@ static int mlx5r_probe(struct auxiliary_device *adev, + dev->mdev = mdev; + dev->num_ports = num_ports; + +- if (ll == IB_LINK_LAYER_ETHERNET && !mlx5_is_roce_init_enabled(mdev)) ++ if (ll == IB_LINK_LAYER_ETHERNET && !mlx5_get_roce_state(mdev)) + profile = &raw_eth_profile; + else + profile = &pf_profile; +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c +index 64d54bba91f69..6c8bb74bd8fc6 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c +@@ -501,6 +501,24 @@ static int max_uc_list_get_devlink_param(struct mlx5_core_dev *dev) + return err; + } + ++bool mlx5_is_roce_on(struct mlx5_core_dev *dev) ++{ ++ struct devlink *devlink = priv_to_devlink(dev); ++ union devlink_param_value val; ++ int err; ++ ++ err = devlink_param_driverinit_value_get(devlink, ++ DEVLINK_PARAM_GENERIC_ID_ENABLE_ROCE, ++ &val); ++ ++ if (!err) ++ return val.vbool; ++ ++ mlx5_core_dbg(dev, "Failed to get param. err = %d\n", err); ++ return MLX5_CAP_GEN(dev, roce); ++} ++EXPORT_SYMBOL(mlx5_is_roce_on); ++ + static int handle_hca_cap_2(struct mlx5_core_dev *dev, void *set_ctx) + { + void *set_hca_cap; +@@ -604,7 +622,8 @@ static int handle_hca_cap(struct mlx5_core_dev *dev, void *set_ctx) + MLX5_CAP_GEN_MAX(dev, num_total_dynamic_vf_msix)); + + if (MLX5_CAP_GEN(dev, roce_rw_supported)) +- MLX5_SET(cmd_hca_cap, set_hca_cap, roce, mlx5_is_roce_init_enabled(dev)); ++ MLX5_SET(cmd_hca_cap, set_hca_cap, roce, ++ mlx5_is_roce_on(dev)); + + max_uc_list = max_uc_list_get_devlink_param(dev); + if (max_uc_list > 0) +@@ -630,7 +649,7 @@ static int handle_hca_cap(struct mlx5_core_dev *dev, void *set_ctx) + */ + static bool is_roce_fw_disabled(struct mlx5_core_dev *dev) + { +- return (MLX5_CAP_GEN(dev, roce_rw_supported) && !mlx5_is_roce_init_enabled(dev)) || ++ return (MLX5_CAP_GEN(dev, roce_rw_supported) && !mlx5_is_roce_on(dev)) || + (!MLX5_CAP_GEN(dev, roce_rw_supported) && !MLX5_CAP_GEN(dev, roce)); + } + +diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h +index 0015a08ddbd24..b3ea245faa515 100644 +--- a/include/linux/mlx5/driver.h ++++ b/include/linux/mlx5/driver.h +@@ -1275,16 +1275,17 @@ enum { + MLX5_TRIGGERED_CMD_COMP = (u64)1 << 32, + }; + +-static inline bool mlx5_is_roce_init_enabled(struct mlx5_core_dev *dev) ++bool mlx5_is_roce_on(struct mlx5_core_dev *dev); ++ ++static inline bool mlx5_get_roce_state(struct mlx5_core_dev *dev) + { +- struct devlink *devlink = priv_to_devlink(dev); +- union devlink_param_value val; +- int err; +- +- err = devlink_param_driverinit_value_get(devlink, +- DEVLINK_PARAM_GENERIC_ID_ENABLE_ROCE, +- &val); +- return err ? MLX5_CAP_GEN(dev, roce) : val.vbool; ++ if (MLX5_CAP_GEN(dev, roce_rw_supported)) ++ return MLX5_CAP_GEN(dev, roce); ++ ++ /* If RoCE cap is read-only in FW, get RoCE state from devlink ++ * in order to support RoCE enable/disable feature ++ */ ++ return mlx5_is_roce_on(dev); + } + + #endif /* MLX5_DRIVER_H */ +-- +2.35.1 + diff --git a/queue-5.19/series b/queue-5.19/series new file mode 100644 index 00000000000..b14326b0ca1 --- /dev/null +++ b/queue-5.19/series @@ -0,0 +1,6 @@ +iommu-vt-d-fix-kdump-kernels-boot-failure-with-scala.patch +net-mlx5-introduce-ifc-bits-for-using-software-vhca-.patch +net-mlx5-use-software-vhca-id-when-it-s-supported.patch +rdma-mlx5-rely-on-roce-fw-cap-instead-of-devlink-whe.patch +rdma-mlx5-add-a-umr-recovery-flow.patch +rdma-mlx5-fix-umr-cleanup-on-error-flow-of-driver-in.patch