From: Greg Kroah-Hartman Date: Sun, 28 May 2023 16:53:28 +0000 (+0100) Subject: 6.3-stable patches X-Git-Tag: review~9 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=a3042ca13aaa6060ec587d7d198532bd716bda15;p=thirdparty%2Fkernel%2Fstable-queue.git 6.3-stable patches added patches: arm64-dts-imx8mn-var-som-fix-phy-detection-bug-by-adding-deassert-delay.patch asoc-intel-avs-access-path-components-under-lock.patch asoc-intel-avs-fix-declaration-of-enum-avs_channel_config.patch asoc-intel-skylake-fix-declaration-of-enum-skl_ch_cfg.patch cxl-move-cxl_await_media_ready-to-before-capacity-info-retrieval.patch cxl-wait-memory_info_valid-before-access-memory-related-info.patch firmware-arm_ffa-set-reserved-mbz-fields-to-zero-in-the-memory-descriptors.patch forcedeth-fix-an-error-handling-path-in-nv_probe.patch net-mlx5-collect-command-failures-data-only-for-known-commands.patch net-mlx5-devcom-fix-error-flow-in-mlx5_devcom_register_device.patch net-mlx5-devcom-serialize-devcom-registration.patch net-mlx5-dr-check-force-loopback-rc-qp-capability-independently-from-roce.patch net-mlx5-dr-fix-crc32-calculation-to-work-on-big-endian-be-cpus.patch net-mlx5-fix-error-message-when-failing-to-allocate-device-memory.patch net-mlx5-handle-pairing-of-e-switch-via-uplink-un-load-apis.patch net-mlx5e-do-as-little-as-possible-in-napi-poll-when-budget-is-0.patch net-mlx5e-fix-deadlock-in-tc-route-query-code.patch net-mlx5e-fix-sq-wake-logic-in-ptp-napi_poll-context.patch net-mlx5e-use-correct-encap-attribute-during-invalidation.patch platform-mellanox-mlxbf-pmc-fix-sscanf-error-checking.patch sctp-fix-an-issue-that-plpmtu-can-never-go-to-complete-state.patch x86-show_trace_log_lvl-ensure-stack-pointer-is-aligned-again.patch --- diff --git a/queue-6.3/arm64-dts-imx8mn-var-som-fix-phy-detection-bug-by-adding-deassert-delay.patch b/queue-6.3/arm64-dts-imx8mn-var-som-fix-phy-detection-bug-by-adding-deassert-delay.patch new file mode 100644 index 00000000000..c6c605c4a13 --- /dev/null +++ b/queue-6.3/arm64-dts-imx8mn-var-som-fix-phy-detection-bug-by-adding-deassert-delay.patch @@ -0,0 +1,58 @@ +From f161cea5a20f3aeeb637a88ad1705fc2720b4d58 Mon Sep 17 00:00:00 2001 +From: Hugo Villeneuve +Date: Mon, 1 May 2023 13:05:32 -0400 +Subject: arm64: dts: imx8mn-var-som: fix PHY detection bug by adding deassert delay + +From: Hugo Villeneuve + +commit f161cea5a20f3aeeb637a88ad1705fc2720b4d58 upstream. + +While testing the ethernet interface on a Variscite symphony carrier +board using an imx8mn SOM with an onboard ADIN1300 PHY (EC hardware +configuration), the ethernet PHY is not detected. + +The ADIN1300 datasheet indicate that the "Management interface +active (t4)" state is reached at most 5ms after the reset signal is +deasserted. + +The device tree in Variscite custom git repository uses the following +property: + + phy-reset-post-delay = <20>; + +Add a new MDIO property 'reset-deassert-us' of 20ms to have the same +delay inside the ethphy node. Adding this property fixes the problem +with the PHY detection. + +Note that this SOM can also have an Atheros AR8033 PHY. In this case, +a 1ms deassert delay is sufficient. Add a comment to that effect. + +Fixes: ade0176dd8a0 ("arm64: dts: imx8mn-var-som: Add Variscite VAR-SOM-MX8MN System on Module") +Signed-off-by: Hugo Villeneuve +Signed-off-by: Shawn Guo +Signed-off-by: Greg Kroah-Hartman +--- + arch/arm64/boot/dts/freescale/imx8mn-var-som.dtsi | 8 +++++++- + 1 file changed, 7 insertions(+), 1 deletion(-) + +--- a/arch/arm64/boot/dts/freescale/imx8mn-var-som.dtsi ++++ b/arch/arm64/boot/dts/freescale/imx8mn-var-som.dtsi +@@ -98,11 +98,17 @@ + #address-cells = <1>; + #size-cells = <0>; + +- ethphy: ethernet-phy@4 { ++ ethphy: ethernet-phy@4 { /* AR8033 or ADIN1300 */ + compatible = "ethernet-phy-ieee802.3-c22"; + reg = <4>; + reset-gpios = <&gpio1 9 GPIO_ACTIVE_LOW>; + reset-assert-us = <10000>; ++ /* ++ * Deassert delay: ++ * ADIN1300 requires 5ms. ++ * AR8033 requires 1ms. ++ */ ++ reset-deassert-us = <20000>; + }; + }; + }; diff --git a/queue-6.3/asoc-intel-avs-access-path-components-under-lock.patch b/queue-6.3/asoc-intel-avs-access-path-components-under-lock.patch new file mode 100644 index 00000000000..e8a3f0807fd --- /dev/null +++ b/queue-6.3/asoc-intel-avs-access-path-components-under-lock.patch @@ -0,0 +1,52 @@ +From d849996f7458042af803b7d15a181922834c5249 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Amadeusz=20S=C5=82awi=C5=84ski?= + +Date: Fri, 19 May 2023 22:17:06 +0200 +Subject: ASoC: Intel: avs: Access path components under lock +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Amadeusz Sławiński + +commit d849996f7458042af803b7d15a181922834c5249 upstream. + +Path and its components should be accessed under lock to prevent +problems with one thread modifying them while other tries to read. + +Fixes: c8c960c10971 ("ASoC: Intel: avs: APL-based platforms support") +Reviewed-by: Cezary Rojewski +Signed-off-by: Amadeusz Sławiński +Link: https://lore.kernel.org/r/20230519201711.4073845-3-amadeuszx.slawinski@linux.intel.com +Signed-off-by: Mark Brown +Signed-off-by: Greg Kroah-Hartman +--- + sound/soc/intel/avs/apl.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +--- a/sound/soc/intel/avs/apl.c ++++ b/sound/soc/intel/avs/apl.c +@@ -169,6 +169,7 @@ static bool apl_lp_streaming(struct avs_ + { + struct avs_path *path; + ++ spin_lock(&adev->path_list_lock); + /* Any gateway without buffer allocated in LP area disqualifies D0IX. */ + list_for_each_entry(path, &adev->path_list, node) { + struct avs_path_pipeline *ppl; +@@ -188,11 +189,14 @@ static bool apl_lp_streaming(struct avs_ + if (cfg->copier.dma_type == INVALID_OBJECT_ID) + continue; + +- if (!mod->gtw_attrs.lp_buffer_alloc) ++ if (!mod->gtw_attrs.lp_buffer_alloc) { ++ spin_unlock(&adev->path_list_lock); + return false; ++ } + } + } + } ++ spin_unlock(&adev->path_list_lock); + + return true; + } diff --git a/queue-6.3/asoc-intel-avs-fix-declaration-of-enum-avs_channel_config.patch b/queue-6.3/asoc-intel-avs-fix-declaration-of-enum-avs_channel_config.patch new file mode 100644 index 00000000000..1faf43c8a03 --- /dev/null +++ b/queue-6.3/asoc-intel-avs-fix-declaration-of-enum-avs_channel_config.patch @@ -0,0 +1,36 @@ +From 1cf036deebcdec46d6348842bd2f8931202fd4cd Mon Sep 17 00:00:00 2001 +From: Cezary Rojewski +Date: Fri, 19 May 2023 22:17:08 +0200 +Subject: ASoC: Intel: avs: Fix declaration of enum avs_channel_config +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Cezary Rojewski + +commit 1cf036deebcdec46d6348842bd2f8931202fd4cd upstream. + +Constant 'C4_CHANNEL' does not exist on the firmware side. Value 0xC is +reserved for 'C7_1' instead. + +Fixes: 580a5912d1fe ("ASoC: Intel: avs: Declare module configuration types") +Signed-off-by: Cezary Rojewski +Signed-off-by: Amadeusz Sławiński +Link: https://lore.kernel.org/r/20230519201711.4073845-5-amadeuszx.slawinski@linux.intel.com +Signed-off-by: Mark Brown +Signed-off-by: Greg Kroah-Hartman +--- + sound/soc/intel/avs/messages.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/sound/soc/intel/avs/messages.h ++++ b/sound/soc/intel/avs/messages.h +@@ -619,7 +619,7 @@ enum avs_channel_config { + AVS_CHANNEL_CONFIG_DUAL_MONO = 9, + AVS_CHANNEL_CONFIG_I2S_DUAL_STEREO_0 = 10, + AVS_CHANNEL_CONFIG_I2S_DUAL_STEREO_1 = 11, +- AVS_CHANNEL_CONFIG_4_CHANNEL = 12, ++ AVS_CHANNEL_CONFIG_7_1 = 12, + AVS_CHANNEL_CONFIG_INVALID + }; + diff --git a/queue-6.3/asoc-intel-skylake-fix-declaration-of-enum-skl_ch_cfg.patch b/queue-6.3/asoc-intel-skylake-fix-declaration-of-enum-skl_ch_cfg.patch new file mode 100644 index 00000000000..15d2d3c6218 --- /dev/null +++ b/queue-6.3/asoc-intel-skylake-fix-declaration-of-enum-skl_ch_cfg.patch @@ -0,0 +1,37 @@ +From 95109657471311601b98e71f03d0244f48dc61bb Mon Sep 17 00:00:00 2001 +From: Cezary Rojewski +Date: Fri, 19 May 2023 22:17:07 +0200 +Subject: ASoC: Intel: Skylake: Fix declaration of enum skl_ch_cfg +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Cezary Rojewski + +commit 95109657471311601b98e71f03d0244f48dc61bb upstream. + +Constant 'C4_CHANNEL' does not exist on the firmware side. Value 0xC is +reserved for 'C7_1' instead. + +Fixes: 04afbbbb1cba ("ASoC: Intel: Skylake: Update the topology interface structure") +Signed-off-by: Cezary Rojewski +Signed-off-by: Amadeusz Sławiński +Link: https://lore.kernel.org/r/20230519201711.4073845-4-amadeuszx.slawinski@linux.intel.com +Signed-off-by: Mark Brown +Signed-off-by: Greg Kroah-Hartman +--- + include/uapi/sound/skl-tplg-interface.h | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/include/uapi/sound/skl-tplg-interface.h ++++ b/include/uapi/sound/skl-tplg-interface.h +@@ -66,7 +66,8 @@ enum skl_ch_cfg { + SKL_CH_CFG_DUAL_MONO = 9, + SKL_CH_CFG_I2S_DUAL_STEREO_0 = 10, + SKL_CH_CFG_I2S_DUAL_STEREO_1 = 11, +- SKL_CH_CFG_4_CHANNEL = 12, ++ SKL_CH_CFG_7_1 = 12, ++ SKL_CH_CFG_4_CHANNEL = SKL_CH_CFG_7_1, + SKL_CH_CFG_INVALID + }; + diff --git a/queue-6.3/cxl-move-cxl_await_media_ready-to-before-capacity-info-retrieval.patch b/queue-6.3/cxl-move-cxl_await_media_ready-to-before-capacity-info-retrieval.patch new file mode 100644 index 00000000000..b623015cbea --- /dev/null +++ b/queue-6.3/cxl-move-cxl_await_media_ready-to-before-capacity-info-retrieval.patch @@ -0,0 +1,145 @@ +From e764f12208b99ac7892c4e3f6bf88d71ca71036f Mon Sep 17 00:00:00 2001 +From: Dave Jiang +Date: Thu, 18 May 2023 16:38:20 -0700 +Subject: cxl: Move cxl_await_media_ready() to before capacity info retrieval + +From: Dave Jiang + +commit e764f12208b99ac7892c4e3f6bf88d71ca71036f upstream. + +Move cxl_await_media_ready() to cxl_pci probe before driver starts issuing +IDENTIFY and retrieving memory device information to ensure that the +device is ready to provide the information. Allow cxl_pci_probe() to succeed +even if media is not ready. Cache the media failure in cxlds and don't ask +the device for any media information. + +The rationale for proceeding in the !media_ready case is to allow for +mailbox operations to interrogate and/or remediate the device. After +media is repaired then rebinding the cxl_pci driver is expected to +restart the capacity scan. + +Suggested-by: Dan Williams +Fixes: b39cb1052a5c ("cxl/mem: Register CXL memX devices") +Reviewed-by: Ira Weiny +Signed-off-by: Dave Jiang +Link: https://lore.kernel.org/r/168445310026.3251520.8124296540679268206.stgit@djiang5-mobl3 +[djbw: fixup cxl_test] +Signed-off-by: Dan Williams +Signed-off-by: Greg Kroah-Hartman +--- + drivers/cxl/core/mbox.c | 15 ++++++++++----- + drivers/cxl/cxlmem.h | 2 ++ + drivers/cxl/mem.c | 3 +++ + drivers/cxl/pci.c | 6 ++++++ + drivers/cxl/port.c | 6 ------ + tools/testing/cxl/test/mem.c | 1 + + 6 files changed, 22 insertions(+), 11 deletions(-) + +--- a/drivers/cxl/core/mbox.c ++++ b/drivers/cxl/core/mbox.c +@@ -984,7 +984,7 @@ static int cxl_mem_get_partition_info(st + * cxl_dev_state_identify() - Send the IDENTIFY command to the device. + * @cxlds: The device data for the operation + * +- * Return: 0 if identify was executed successfully. ++ * Return: 0 if identify was executed successfully or media not ready. + * + * This will dispatch the identify command to the device and on success populate + * structures to be exported to sysfs. +@@ -996,6 +996,9 @@ int cxl_dev_state_identify(struct cxl_de + struct cxl_mbox_cmd mbox_cmd; + int rc; + ++ if (!cxlds->media_ready) ++ return 0; ++ + mbox_cmd = (struct cxl_mbox_cmd) { + .opcode = CXL_MBOX_OP_IDENTIFY, + .size_out = sizeof(id), +@@ -1065,10 +1068,12 @@ int cxl_mem_create_range_info(struct cxl + cxlds->persistent_only_bytes, "pmem"); + } + +- rc = cxl_mem_get_partition_info(cxlds); +- if (rc) { +- dev_err(dev, "Failed to query partition information\n"); +- return rc; ++ if (cxlds->media_ready) { ++ rc = cxl_mem_get_partition_info(cxlds); ++ if (rc) { ++ dev_err(dev, "Failed to query partition information\n"); ++ return rc; ++ } + } + + rc = add_dpa_res(dev, &cxlds->dpa_res, &cxlds->ram_res, 0, +--- a/drivers/cxl/cxlmem.h ++++ b/drivers/cxl/cxlmem.h +@@ -227,6 +227,7 @@ struct cxl_event_state { + * @regs: Parsed register blocks + * @cxl_dvsec: Offset to the PCIe device DVSEC + * @rcd: operating in RCD mode (CXL 3.0 9.11.8 CXL Devices Attached to an RCH) ++ * @media_ready: Indicate whether the device media is usable + * @payload_size: Size of space for payload + * (CXL 2.0 8.2.8.4.3 Mailbox Capabilities Register) + * @lsa_size: Size of Label Storage Area +@@ -264,6 +265,7 @@ struct cxl_dev_state { + int cxl_dvsec; + + bool rcd; ++ bool media_ready; + size_t payload_size; + size_t lsa_size; + struct mutex mbox_mutex; /* Protects device mailbox and firmware */ +--- a/drivers/cxl/mem.c ++++ b/drivers/cxl/mem.c +@@ -104,6 +104,9 @@ static int cxl_mem_probe(struct device * + struct dentry *dentry; + int rc; + ++ if (!cxlds->media_ready) ++ return -EBUSY; ++ + /* + * Someone is trying to reattach this device after it lost its port + * connection (an endpoint port previously registered by this memdev was +--- a/drivers/cxl/pci.c ++++ b/drivers/cxl/pci.c +@@ -757,6 +757,12 @@ static int cxl_pci_probe(struct pci_dev + if (rc) + dev_dbg(&pdev->dev, "Failed to map RAS capability.\n"); + ++ rc = cxl_await_media_ready(cxlds); ++ if (rc == 0) ++ cxlds->media_ready = true; ++ else ++ dev_warn(&pdev->dev, "Media not active (%d)\n", rc); ++ + rc = cxl_pci_setup_mailbox(cxlds); + if (rc) + return rc; +--- a/drivers/cxl/port.c ++++ b/drivers/cxl/port.c +@@ -117,12 +117,6 @@ static int cxl_endpoint_port_probe(struc + if (rc) + return rc; + +- rc = cxl_await_media_ready(cxlds); +- if (rc) { +- dev_err(&port->dev, "Media not active (%d)\n", rc); +- return rc; +- } +- + rc = devm_cxl_enumerate_decoders(cxlhdm, &info); + if (rc) + return rc; +--- a/tools/testing/cxl/test/mem.c ++++ b/tools/testing/cxl/test/mem.c +@@ -1010,6 +1010,7 @@ static int cxl_mock_mem_probe(struct pla + if (rc) + return rc; + ++ cxlds->media_ready = true; + rc = cxl_dev_state_identify(cxlds); + if (rc) + return rc; diff --git a/queue-6.3/cxl-wait-memory_info_valid-before-access-memory-related-info.patch b/queue-6.3/cxl-wait-memory_info_valid-before-access-memory-related-info.patch new file mode 100644 index 00000000000..b776a4fd648 --- /dev/null +++ b/queue-6.3/cxl-wait-memory_info_valid-before-access-memory-related-info.patch @@ -0,0 +1,148 @@ +From ce17ad0d54985e2595a3e615fda31df61808a08c Mon Sep 17 00:00:00 2001 +From: Dave Jiang +Date: Thu, 18 May 2023 14:54:34 -0700 +Subject: cxl: Wait Memory_Info_Valid before access memory related info + +From: Dave Jiang + +commit ce17ad0d54985e2595a3e615fda31df61808a08c upstream. + +The Memory_Info_Valid bit (CXL 3.0 8.1.3.8.2) indicates that the CXL +Range Size High and Size Low registers are valid. The bit must be set +within 1 second of reset deassertion to the device. Check valid bit +before we check the Memory_Active bit when waiting for +cxl_await_media_ready() to ensure that the memory info is valid for +consumption. Also ensures both DVSEC ranges 1 and 2 are ready if DVSEC +Capability indicates they are both supported. + +Fixes: 523e594d9cc0 ("cxl/pci: Implement wait for media active") +Reviewed-by: Jonathan Cameron +Signed-off-by: Dave Jiang +Link: https://lore.kernel.org/r/168444687469.3134781.11033518965387297327.stgit@djiang5-mobl3 +Signed-off-by: Dan Williams +Signed-off-by: Greg Kroah-Hartman +--- + drivers/cxl/core/pci.c | 85 +++++++++++++++++++++++++++++++++++++++++++------ + drivers/cxl/cxlpci.h | 2 + + 2 files changed, 78 insertions(+), 9 deletions(-) + +--- a/drivers/cxl/core/pci.c ++++ b/drivers/cxl/core/pci.c +@@ -101,23 +101,57 @@ int devm_cxl_port_enumerate_dports(struc + } + EXPORT_SYMBOL_NS_GPL(devm_cxl_port_enumerate_dports, CXL); + +-/* +- * Wait up to @media_ready_timeout for the device to report memory +- * active. +- */ +-int cxl_await_media_ready(struct cxl_dev_state *cxlds) ++static int cxl_dvsec_mem_range_valid(struct cxl_dev_state *cxlds, int id) ++{ ++ struct pci_dev *pdev = to_pci_dev(cxlds->dev); ++ int d = cxlds->cxl_dvsec; ++ bool valid = false; ++ int rc, i; ++ u32 temp; ++ ++ if (id > CXL_DVSEC_RANGE_MAX) ++ return -EINVAL; ++ ++ /* Check MEM INFO VALID bit first, give up after 1s */ ++ i = 1; ++ do { ++ rc = pci_read_config_dword(pdev, ++ d + CXL_DVSEC_RANGE_SIZE_LOW(id), ++ &temp); ++ if (rc) ++ return rc; ++ ++ valid = FIELD_GET(CXL_DVSEC_MEM_INFO_VALID, temp); ++ if (valid) ++ break; ++ msleep(1000); ++ } while (i--); ++ ++ if (!valid) { ++ dev_err(&pdev->dev, ++ "Timeout awaiting memory range %d valid after 1s.\n", ++ id); ++ return -ETIMEDOUT; ++ } ++ ++ return 0; ++} ++ ++static int cxl_dvsec_mem_range_active(struct cxl_dev_state *cxlds, int id) + { + struct pci_dev *pdev = to_pci_dev(cxlds->dev); + int d = cxlds->cxl_dvsec; + bool active = false; +- u64 md_status; + int rc, i; ++ u32 temp; + +- for (i = media_ready_timeout; i; i--) { +- u32 temp; ++ if (id > CXL_DVSEC_RANGE_MAX) ++ return -EINVAL; + ++ /* Check MEM ACTIVE bit, up to 60s timeout by default */ ++ for (i = media_ready_timeout; i; i--) { + rc = pci_read_config_dword( +- pdev, d + CXL_DVSEC_RANGE_SIZE_LOW(0), &temp); ++ pdev, d + CXL_DVSEC_RANGE_SIZE_LOW(id), &temp); + if (rc) + return rc; + +@@ -134,6 +168,39 @@ int cxl_await_media_ready(struct cxl_dev + return -ETIMEDOUT; + } + ++ return 0; ++} ++ ++/* ++ * Wait up to @media_ready_timeout for the device to report memory ++ * active. ++ */ ++int cxl_await_media_ready(struct cxl_dev_state *cxlds) ++{ ++ struct pci_dev *pdev = to_pci_dev(cxlds->dev); ++ int d = cxlds->cxl_dvsec; ++ int rc, i, hdm_count; ++ u64 md_status; ++ u16 cap; ++ ++ rc = pci_read_config_word(pdev, ++ d + CXL_DVSEC_CAP_OFFSET, &cap); ++ if (rc) ++ return rc; ++ ++ hdm_count = FIELD_GET(CXL_DVSEC_HDM_COUNT_MASK, cap); ++ for (i = 0; i < hdm_count; i++) { ++ rc = cxl_dvsec_mem_range_valid(cxlds, i); ++ if (rc) ++ return rc; ++ } ++ ++ for (i = 0; i < hdm_count; i++) { ++ rc = cxl_dvsec_mem_range_active(cxlds, i); ++ if (rc) ++ return rc; ++ } ++ + md_status = readq(cxlds->regs.memdev + CXLMDEV_STATUS_OFFSET); + if (!CXLMDEV_READY(md_status)) + return -EIO; +--- a/drivers/cxl/cxlpci.h ++++ b/drivers/cxl/cxlpci.h +@@ -31,6 +31,8 @@ + #define CXL_DVSEC_RANGE_BASE_LOW(i) (0x24 + (i * 0x10)) + #define CXL_DVSEC_MEM_BASE_LOW_MASK GENMASK(31, 28) + ++#define CXL_DVSEC_RANGE_MAX 2 ++ + /* CXL 2.0 8.1.4: Non-CXL Function Map DVSEC */ + #define CXL_DVSEC_FUNCTION_MAP 2 + diff --git a/queue-6.3/firmware-arm_ffa-set-reserved-mbz-fields-to-zero-in-the-memory-descriptors.patch b/queue-6.3/firmware-arm_ffa-set-reserved-mbz-fields-to-zero-in-the-memory-descriptors.patch new file mode 100644 index 00000000000..38a9b196390 --- /dev/null +++ b/queue-6.3/firmware-arm_ffa-set-reserved-mbz-fields-to-zero-in-the-memory-descriptors.patch @@ -0,0 +1,59 @@ +From 111a833dc5cbef3d05b2a796a7e23cb7f6ff2192 Mon Sep 17 00:00:00 2001 +From: Sudeep Holla +Date: Wed, 3 May 2023 14:12:52 +0100 +Subject: firmware: arm_ffa: Set reserved/MBZ fields to zero in the memory descriptors + +From: Sudeep Holla + +commit 111a833dc5cbef3d05b2a796a7e23cb7f6ff2192 upstream. + +The transmit buffers allocated by the driver can be used to transmit data +by any messages/commands needing the buffer. However, it is not guaranteed +to have been zero-ed before every new transmission and hence it will just +contain residual value from the previous transmission. There are several +reserved fields in the memory descriptors that must be zero(MBZ). The +receiver can reject the transmission if any such MBZ fields are non-zero. + +While we can set the whole page to zero, it is not optimal as most of the +fields get initialised to the value required for the current transmission. + +So, just set the reserved/MBZ fields to zero in the memory descriptors +explicitly to honour the requirement and keep the receiver happy. + +Fixes: cc2195fe536c ("firmware: arm_ffa: Add support for MEM_* interfaces") +Reported-by: Marc Bonnici +Link: https://lore.kernel.org/r/20230503131252.12585-1-sudeep.holla@arm.com +Signed-off-by: Sudeep Holla +Signed-off-by: Greg Kroah-Hartman +--- + drivers/firmware/arm_ffa/driver.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +--- a/drivers/firmware/arm_ffa/driver.c ++++ b/drivers/firmware/arm_ffa/driver.c +@@ -420,12 +420,17 @@ ffa_setup_and_transmit(u32 func_id, void + ep_mem_access->receiver = args->attrs[idx].receiver; + ep_mem_access->attrs = args->attrs[idx].attrs; + ep_mem_access->composite_off = COMPOSITE_OFFSET(args->nattrs); ++ ep_mem_access->flag = 0; ++ ep_mem_access->reserved = 0; + } ++ mem_region->reserved_0 = 0; ++ mem_region->reserved_1 = 0; + mem_region->ep_count = args->nattrs; + + composite = buffer + COMPOSITE_OFFSET(args->nattrs); + composite->total_pg_cnt = ffa_get_num_pages_sg(args->sg); + composite->addr_range_cnt = num_entries; ++ composite->reserved = 0; + + length = COMPOSITE_CONSTITUENTS_OFFSET(args->nattrs, num_entries); + frag_len = COMPOSITE_CONSTITUENTS_OFFSET(args->nattrs, 0); +@@ -460,6 +465,7 @@ ffa_setup_and_transmit(u32 func_id, void + + constituents->address = sg_phys(args->sg); + constituents->pg_cnt = args->sg->length / FFA_PAGE_SIZE; ++ constituents->reserved = 0; + constituents++; + frag_len += sizeof(struct ffa_mem_region_addr_range); + } while ((args->sg = sg_next(args->sg))); diff --git a/queue-6.3/forcedeth-fix-an-error-handling-path-in-nv_probe.patch b/queue-6.3/forcedeth-fix-an-error-handling-path-in-nv_probe.patch new file mode 100644 index 00000000000..d829e158d68 --- /dev/null +++ b/queue-6.3/forcedeth-fix-an-error-handling-path-in-nv_probe.patch @@ -0,0 +1,35 @@ +From 5b17a4971d3b2a073f4078dd65331efbe35baa2d Mon Sep 17 00:00:00 2001 +From: Christophe JAILLET +Date: Sat, 20 May 2023 10:30:17 +0200 +Subject: forcedeth: Fix an error handling path in nv_probe() + +From: Christophe JAILLET + +commit 5b17a4971d3b2a073f4078dd65331efbe35baa2d upstream. + +If an error occures after calling nv_mgmt_acquire_sema(), it should be +undone with a corresponding nv_mgmt_release_sema() call. + +Add it in the error handling path of the probe as already done in the +remove function. + +Fixes: cac1c52c3621 ("forcedeth: mgmt unit interface") +Signed-off-by: Christophe JAILLET +Acked-by: Zhu Yanjun +Link: https://lore.kernel.org/r/355e9a7d351b32ad897251b6f81b5886fcdc6766.1684571393.git.christophe.jaillet@wanadoo.fr +Signed-off-by: Jakub Kicinski +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/nvidia/forcedeth.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/drivers/net/ethernet/nvidia/forcedeth.c ++++ b/drivers/net/ethernet/nvidia/forcedeth.c +@@ -6138,6 +6138,7 @@ static int nv_probe(struct pci_dev *pci_ + return 0; + + out_error: ++ nv_mgmt_release_sema(dev); + if (phystate_orig) + writel(phystate|NVREG_ADAPTCTL_RUNNING, base + NvRegAdapterControl); + out_freering: diff --git a/queue-6.3/net-mlx5-collect-command-failures-data-only-for-known-commands.patch b/queue-6.3/net-mlx5-collect-command-failures-data-only-for-known-commands.patch new file mode 100644 index 00000000000..b8834946437 --- /dev/null +++ b/queue-6.3/net-mlx5-collect-command-failures-data-only-for-known-commands.patch @@ -0,0 +1,39 @@ +From 2a0a935fb64ee8af253b9c6133bb6702fb152ac2 Mon Sep 17 00:00:00 2001 +From: Shay Drory +Date: Tue, 2 May 2023 11:03:53 +0300 +Subject: net/mlx5: Collect command failures data only for known commands + +From: Shay Drory + +commit 2a0a935fb64ee8af253b9c6133bb6702fb152ac2 upstream. + +DEVX can issue a general command, which is not used by mlx5 driver. +In case such command is failed, mlx5 is trying to collect the failure +data, However, mlx5 doesn't create a storage for this command, since +mlx5 doesn't use it. This lead to array-index-out-of-bounds error. + +Fix it by checking whether the command is known before collecting the +failure data. + +Fixes: 34f46ae0d4b3 ("net/mlx5: Add command failures data to debugfs") +Signed-off-by: Shay Drory +Signed-off-by: Saeed Mahameed +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +@@ -1920,9 +1920,10 @@ static void mlx5_cmd_err_trace(struct ml + static void cmd_status_log(struct mlx5_core_dev *dev, u16 opcode, u8 status, + u32 syndrome, int err) + { ++ const char *namep = mlx5_command_str(opcode); + struct mlx5_cmd_stats *stats; + +- if (!err) ++ if (!err || !(strcmp(namep, "unknown command opcode"))) + return; + + stats = &dev->cmd.stats[opcode]; diff --git a/queue-6.3/net-mlx5-devcom-fix-error-flow-in-mlx5_devcom_register_device.patch b/queue-6.3/net-mlx5-devcom-fix-error-flow-in-mlx5_devcom_register_device.patch new file mode 100644 index 00000000000..e723e9de728 --- /dev/null +++ b/queue-6.3/net-mlx5-devcom-fix-error-flow-in-mlx5_devcom_register_device.patch @@ -0,0 +1,35 @@ +From af87194352cad882d787d06fb7efa714acd95427 Mon Sep 17 00:00:00 2001 +From: Shay Drory +Date: Tue, 2 May 2023 13:35:11 +0300 +Subject: net/mlx5: Devcom, fix error flow in mlx5_devcom_register_device + +From: Shay Drory + +commit af87194352cad882d787d06fb7efa714acd95427 upstream. + +In case devcom allocation is failed, mlx5 is always freeing the priv. +However, this priv might have been allocated by a different thread, +and freeing it might lead to use-after-free bugs. +Fix it by freeing the priv only in case it was allocated by the +running thread. + +Fixes: fadd59fc50d0 ("net/mlx5: Introduce inter-device communication mechanism") +Signed-off-by: Shay Drory +Signed-off-by: Saeed Mahameed +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c +@@ -112,7 +112,8 @@ struct mlx5_devcom *mlx5_devcom_register + priv->devs[idx] = dev; + devcom = mlx5_devcom_alloc(priv, idx); + if (!devcom) { +- kfree(priv); ++ if (new_priv) ++ kfree(priv); + return ERR_PTR(-ENOMEM); + } + diff --git a/queue-6.3/net-mlx5-devcom-serialize-devcom-registration.patch b/queue-6.3/net-mlx5-devcom-serialize-devcom-registration.patch new file mode 100644 index 00000000000..2b011cc3c60 --- /dev/null +++ b/queue-6.3/net-mlx5-devcom-serialize-devcom-registration.patch @@ -0,0 +1,95 @@ +From 1f893f57a3bf9fe1f4bcb25b55aea7f7f9712fe7 Mon Sep 17 00:00:00 2001 +From: Shay Drory +Date: Tue, 2 May 2023 13:36:42 +0300 +Subject: net/mlx5: Devcom, serialize devcom registration + +From: Shay Drory + +commit 1f893f57a3bf9fe1f4bcb25b55aea7f7f9712fe7 upstream. + +From one hand, mlx5 driver is allowing to probe PFs in parallel. +From the other hand, devcom, which is a share resource between PFs, is +registered without any lock. This might resulted in memory problems. + +Hence, use the global mlx5_dev_list_lock in order to serialize devcom +registration. + +Fixes: fadd59fc50d0 ("net/mlx5: Introduce inter-device communication mechanism") +Signed-off-by: Shay Drory +Reviewed-by: Mark Bloch +Signed-off-by: Saeed Mahameed +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c | 19 ++++++++++++++----- + 1 file changed, 14 insertions(+), 5 deletions(-) + +--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c +@@ -3,6 +3,7 @@ + + #include + #include "lib/devcom.h" ++#include "mlx5_core.h" + + static LIST_HEAD(devcom_list); + +@@ -77,6 +78,7 @@ struct mlx5_devcom *mlx5_devcom_register + if (MLX5_CAP_GEN(dev, num_lag_ports) != MLX5_DEVCOM_PORTS_SUPPORTED) + return NULL; + ++ mlx5_dev_list_lock(); + sguid0 = mlx5_query_nic_system_image_guid(dev); + list_for_each_entry(iter, &devcom_list, list) { + struct mlx5_core_dev *tmp_dev = NULL; +@@ -102,8 +104,10 @@ struct mlx5_devcom *mlx5_devcom_register + + if (!priv) { + priv = mlx5_devcom_list_alloc(); +- if (!priv) +- return ERR_PTR(-ENOMEM); ++ if (!priv) { ++ devcom = ERR_PTR(-ENOMEM); ++ goto out; ++ } + + idx = 0; + new_priv = true; +@@ -114,12 +118,14 @@ struct mlx5_devcom *mlx5_devcom_register + if (!devcom) { + if (new_priv) + kfree(priv); +- return ERR_PTR(-ENOMEM); ++ devcom = ERR_PTR(-ENOMEM); ++ goto out; + } + + if (new_priv) + list_add(&priv->list, &devcom_list); +- ++out: ++ mlx5_dev_list_unlock(); + return devcom; + } + +@@ -132,6 +138,7 @@ void mlx5_devcom_unregister_device(struc + if (IS_ERR_OR_NULL(devcom)) + return; + ++ mlx5_dev_list_lock(); + priv = devcom->priv; + priv->devs[devcom->idx] = NULL; + +@@ -142,10 +149,12 @@ void mlx5_devcom_unregister_device(struc + break; + + if (i != MLX5_DEVCOM_PORTS_SUPPORTED) +- return; ++ goto out; + + list_del(&priv->list); + kfree(priv); ++out: ++ mlx5_dev_list_unlock(); + } + + void mlx5_devcom_register_component(struct mlx5_devcom *devcom, diff --git a/queue-6.3/net-mlx5-dr-check-force-loopback-rc-qp-capability-independently-from-roce.patch b/queue-6.3/net-mlx5-dr-check-force-loopback-rc-qp-capability-independently-from-roce.patch new file mode 100644 index 00000000000..2809a5b2f4b --- /dev/null +++ b/queue-6.3/net-mlx5-dr-check-force-loopback-rc-qp-capability-independently-from-roce.patch @@ -0,0 +1,59 @@ +From c7dd225bc224726c22db08e680bf787f60ebdee3 Mon Sep 17 00:00:00 2001 +From: Yevgeny Kliteynik +Date: Sun, 2 Apr 2023 17:14:10 +0300 +Subject: net/mlx5: DR, Check force-loopback RC QP capability independently from RoCE + +From: Yevgeny Kliteynik + +commit c7dd225bc224726c22db08e680bf787f60ebdee3 upstream. + +SW Steering uses RC QP for writing STEs to ICM. This writingis done in LB +(loopback), and FL (force-loopback) QP is preferred for performance. FL is +available when RoCE is enabled or disabled based on RoCE caps. +This patch adds reading of FL capability from HCA caps in addition to the +existing reading from RoCE caps, thus fixing the case where we didn't +have loopback enabled when RoCE was disabled. + +Fixes: 7304d603a57a ("net/mlx5: DR, Add support for force-loopback QP") +Signed-off-by: Itamar Gozlan +Signed-off-by: Yevgeny Kliteynik +Signed-off-by: Saeed Mahameed +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c | 4 +++- + include/linux/mlx5/mlx5_ifc.h | 4 +++- + 2 files changed, 6 insertions(+), 2 deletions(-) + +--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c +@@ -117,6 +117,8 @@ int mlx5dr_cmd_query_device(struct mlx5_ + caps->gvmi = MLX5_CAP_GEN(mdev, vhca_id); + caps->flex_protocols = MLX5_CAP_GEN(mdev, flex_parser_protocols); + caps->sw_format_ver = MLX5_CAP_GEN(mdev, steering_format_version); ++ caps->roce_caps.fl_rc_qp_when_roce_disabled = ++ MLX5_CAP_GEN(mdev, fl_rc_qp_when_roce_disabled); + + if (MLX5_CAP_GEN(mdev, roce)) { + err = dr_cmd_query_nic_vport_roce_en(mdev, 0, &roce_en); +@@ -124,7 +126,7 @@ int mlx5dr_cmd_query_device(struct mlx5_ + return err; + + caps->roce_caps.roce_en = roce_en; +- caps->roce_caps.fl_rc_qp_when_roce_disabled = ++ caps->roce_caps.fl_rc_qp_when_roce_disabled |= + MLX5_CAP_ROCE(mdev, fl_rc_qp_when_roce_disabled); + caps->roce_caps.fl_rc_qp_when_roce_enabled = + MLX5_CAP_ROCE(mdev, fl_rc_qp_when_roce_enabled); +--- a/include/linux/mlx5/mlx5_ifc.h ++++ b/include/linux/mlx5/mlx5_ifc.h +@@ -1679,7 +1679,9 @@ struct mlx5_ifc_cmd_hca_cap_bits { + u8 rc[0x1]; + + u8 uar_4k[0x1]; +- u8 reserved_at_241[0x9]; ++ u8 reserved_at_241[0x7]; ++ u8 fl_rc_qp_when_roce_disabled[0x1]; ++ u8 regexp_params[0x1]; + u8 uar_sz[0x6]; + u8 port_selection_cap[0x1]; + u8 reserved_at_248[0x1]; diff --git a/queue-6.3/net-mlx5-dr-fix-crc32-calculation-to-work-on-big-endian-be-cpus.patch b/queue-6.3/net-mlx5-dr-fix-crc32-calculation-to-work-on-big-endian-be-cpus.patch new file mode 100644 index 00000000000..90793f49974 --- /dev/null +++ b/queue-6.3/net-mlx5-dr-fix-crc32-calculation-to-work-on-big-endian-be-cpus.patch @@ -0,0 +1,38 @@ +From 1e5daf5565b61a96e570865091589afc9156e3d3 Mon Sep 17 00:00:00 2001 +From: Erez Shitrit +Date: Thu, 9 Mar 2023 16:43:15 +0200 +Subject: net/mlx5: DR, Fix crc32 calculation to work on big-endian (BE) CPUs + +From: Erez Shitrit + +commit 1e5daf5565b61a96e570865091589afc9156e3d3 upstream. + +When calculating crc for hash index we use the function crc32 that +calculates for little-endian (LE) arch. +Then we convert it to network endianness using htonl(), but it's wrong +to do the conversion in BE archs since the crc32 value is already LE. + +The solution is to switch the bytes from the crc result for all types +of arc. + +Fixes: 40416d8ede65 ("net/mlx5: DR, Replace CRC32 implementation to use kernel lib") +Signed-off-by: Erez Shitrit +Reviewed-by: Alex Vesker +Signed-off-by: Saeed Mahameed +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c +@@ -15,7 +15,8 @@ static u32 dr_ste_crc32_calc(const void + { + u32 crc = crc32(0, input_data, length); + +- return (__force u32)htonl(crc); ++ return (__force u32)((crc >> 24) & 0xff) | ((crc << 8) & 0xff0000) | ++ ((crc >> 8) & 0xff00) | ((crc << 24) & 0xff000000); + } + + bool mlx5dr_ste_supp_ttl_cs_recalc(struct mlx5dr_cmd_caps *caps) diff --git a/queue-6.3/net-mlx5-fix-error-message-when-failing-to-allocate-device-memory.patch b/queue-6.3/net-mlx5-fix-error-message-when-failing-to-allocate-device-memory.patch new file mode 100644 index 00000000000..78c3ad253b4 --- /dev/null +++ b/queue-6.3/net-mlx5-fix-error-message-when-failing-to-allocate-device-memory.patch @@ -0,0 +1,30 @@ +From a65735148e0328f80c0f72f9f8d2f609bfcf4aff Mon Sep 17 00:00:00 2001 +From: Roi Dayan +Date: Mon, 1 May 2023 14:37:56 +0300 +Subject: net/mlx5: Fix error message when failing to allocate device memory + +From: Roi Dayan + +commit a65735148e0328f80c0f72f9f8d2f609bfcf4aff upstream. + +Fix spacing for the error and also the correct error code pointer. + +Fixes: c9b9dcb430b3 ("net/mlx5: Move device memory management to mlx5_core") +Signed-off-by: Roi Dayan +Signed-off-by: Saeed Mahameed +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx5/core/main.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c +@@ -1041,7 +1041,7 @@ static int mlx5_init_once(struct mlx5_co + + dev->dm = mlx5_dm_create(dev); + if (IS_ERR(dev->dm)) +- mlx5_core_warn(dev, "Failed to init device memory%d\n", err); ++ mlx5_core_warn(dev, "Failed to init device memory %ld\n", PTR_ERR(dev->dm)); + + dev->tracer = mlx5_fw_tracer_create(dev); + dev->hv_vhca = mlx5_hv_vhca_create(dev); diff --git a/queue-6.3/net-mlx5-handle-pairing-of-e-switch-via-uplink-un-load-apis.patch b/queue-6.3/net-mlx5-handle-pairing-of-e-switch-via-uplink-un-load-apis.patch new file mode 100644 index 00000000000..7216228359f --- /dev/null +++ b/queue-6.3/net-mlx5-handle-pairing-of-e-switch-via-uplink-un-load-apis.patch @@ -0,0 +1,154 @@ +From 2be5bd42a5bba1a05daedc86cf0e248210009669 Mon Sep 17 00:00:00 2001 +From: Shay Drory +Date: Mon, 20 Mar 2023 13:07:53 +0200 +Subject: net/mlx5: Handle pairing of E-switch via uplink un/load APIs + +From: Shay Drory + +commit 2be5bd42a5bba1a05daedc86cf0e248210009669 upstream. + +In case user switch a device from switchdev mode to legacy mode, mlx5 +first unpair the E-switch and afterwards unload the uplink vport. +From the other hand, in case user remove or reload a device, mlx5 +first unload the uplink vport and afterwards unpair the E-switch. + +The latter is causing a bug[1], hence, handle pairing of E-switch as +part of uplink un/load APIs. + +[1] +In case VF_LAG is used, every tc fdb flow is duplicated to the peer +esw. However, the original esw keeps a pointer to this duplicated +flow, not the peer esw. +e.g.: if user create tc fdb flow over esw0, the flow is duplicated +over esw1, in FW/HW, but in SW, esw0 keeps a pointer to the duplicated +flow. +During module unload while a peer tc fdb flow is still offloaded, in +case the first device to be removed is the peer device (esw1 in the +example above), the peer net-dev is destroyed, and so the mlx5e_priv +is memset to 0. +Afterwards, the peer device is trying to unpair himself from the +original device (esw0 in the example above). Unpair API invoke the +original device to clear peer flow from its eswitch (esw0), but the +peer flow, which is stored over the original eswitch (esw0), is +trying to use the peer mlx5e_priv, which is memset to 0 and result in +bellow kernel-oops. + +[ 157.964081 ] BUG: unable to handle page fault for address: 000000000002ce60 +[ 157.964662 ] #PF: supervisor read access in kernel mode +[ 157.965123 ] #PF: error_code(0x0000) - not-present page +[ 157.965582 ] PGD 0 P4D 0 +[ 157.965866 ] Oops: 0000 [#1] SMP +[ 157.967670 ] RIP: 0010:mlx5e_tc_del_fdb_flow+0x48/0x460 [mlx5_core] +[ 157.976164 ] Call Trace: +[ 157.976437 ] +[ 157.976690 ] __mlx5e_tc_del_fdb_peer_flow+0xe6/0x100 [mlx5_core] +[ 157.977230 ] mlx5e_tc_clean_fdb_peer_flows+0x67/0x90 [mlx5_core] +[ 157.977767 ] mlx5_esw_offloads_unpair+0x2d/0x1e0 [mlx5_core] +[ 157.984653 ] mlx5_esw_offloads_devcom_event+0xbf/0x130 [mlx5_core] +[ 157.985212 ] mlx5_devcom_send_event+0xa3/0xb0 [mlx5_core] +[ 157.985714 ] esw_offloads_disable+0x5a/0x110 [mlx5_core] +[ 157.986209 ] mlx5_eswitch_disable_locked+0x152/0x170 [mlx5_core] +[ 157.986757 ] mlx5_eswitch_disable+0x51/0x80 [mlx5_core] +[ 157.987248 ] mlx5_unload+0x2a/0xb0 [mlx5_core] +[ 157.987678 ] mlx5_uninit_one+0x5f/0xd0 [mlx5_core] +[ 157.988127 ] remove_one+0x64/0xe0 [mlx5_core] +[ 157.988549 ] pci_device_remove+0x31/0xa0 +[ 157.988933 ] device_release_driver_internal+0x18f/0x1f0 +[ 157.989402 ] driver_detach+0x3f/0x80 +[ 157.989754 ] bus_remove_driver+0x70/0xf0 +[ 157.990129 ] pci_unregister_driver+0x34/0x90 +[ 157.990537 ] mlx5_cleanup+0xc/0x1c [mlx5_core] +[ 157.990972 ] __x64_sys_delete_module+0x15a/0x250 +[ 157.991398 ] ? exit_to_user_mode_prepare+0xea/0x110 +[ 157.991840 ] do_syscall_64+0x3d/0x90 +[ 157.992198 ] entry_SYSCALL_64_after_hwframe+0x46/0xb0 + +Fixes: 04de7dda7394 ("net/mlx5e: Infrastructure for duplicated offloading of TC flows") +Fixes: 1418ddd96afd ("net/mlx5e: Duplicate offloaded TC eswitch rules under uplink LAG") +Signed-off-by: Shay Drory +Reviewed-by: Roi Dayan +Signed-off-by: Saeed Mahameed +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 4 +++- + drivers/net/ethernet/mellanox/mlx5/core/eswitch.h | 4 ++++ + drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 7 ++----- + 3 files changed, 9 insertions(+), 6 deletions(-) + +--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +@@ -5449,6 +5449,8 @@ int mlx5e_tc_esw_init(struct mlx5_rep_up + goto err_action_counter; + } + ++ mlx5_esw_offloads_devcom_init(esw); ++ + return 0; + + err_action_counter: +@@ -5477,7 +5479,7 @@ void mlx5e_tc_esw_cleanup(struct mlx5_re + priv = netdev_priv(rpriv->netdev); + esw = priv->mdev->priv.eswitch; + +- mlx5e_tc_clean_fdb_peer_flows(esw); ++ mlx5_esw_offloads_devcom_cleanup(esw); + + mlx5e_tc_tun_cleanup(uplink_priv->encap); + +--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h ++++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +@@ -371,6 +371,8 @@ int mlx5_eswitch_enable(struct mlx5_eswi + void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw, bool clear_vf); + void mlx5_eswitch_disable_locked(struct mlx5_eswitch *esw); + void mlx5_eswitch_disable(struct mlx5_eswitch *esw); ++void mlx5_esw_offloads_devcom_init(struct mlx5_eswitch *esw); ++void mlx5_esw_offloads_devcom_cleanup(struct mlx5_eswitch *esw); + int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw, + u16 vport, const u8 *mac); + int mlx5_eswitch_set_vport_state(struct mlx5_eswitch *esw, +@@ -768,6 +770,8 @@ static inline void mlx5_eswitch_cleanup( + static inline int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int num_vfs) { return 0; } + static inline void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw, bool clear_vf) {} + static inline void mlx5_eswitch_disable(struct mlx5_eswitch *esw) {} ++static inline void mlx5_esw_offloads_devcom_init(struct mlx5_eswitch *esw) {} ++static inline void mlx5_esw_offloads_devcom_cleanup(struct mlx5_eswitch *esw) {} + static inline bool mlx5_eswitch_is_funcs_handler(struct mlx5_core_dev *dev) { return false; } + static inline + int mlx5_eswitch_set_vport_state(struct mlx5_eswitch *esw, u16 vport, int link_state) { return 0; } +--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +@@ -2781,7 +2781,7 @@ err_out: + return err; + } + +-static void esw_offloads_devcom_init(struct mlx5_eswitch *esw) ++void mlx5_esw_offloads_devcom_init(struct mlx5_eswitch *esw) + { + struct mlx5_devcom *devcom = esw->dev->priv.devcom; + +@@ -2804,7 +2804,7 @@ static void esw_offloads_devcom_init(str + ESW_OFFLOADS_DEVCOM_PAIR, esw); + } + +-static void esw_offloads_devcom_cleanup(struct mlx5_eswitch *esw) ++void mlx5_esw_offloads_devcom_cleanup(struct mlx5_eswitch *esw) + { + struct mlx5_devcom *devcom = esw->dev->priv.devcom; + +@@ -3274,8 +3274,6 @@ int esw_offloads_enable(struct mlx5_eswi + if (err) + goto err_vports; + +- esw_offloads_devcom_init(esw); +- + return 0; + + err_vports: +@@ -3316,7 +3314,6 @@ static int esw_offloads_stop(struct mlx5 + + void esw_offloads_disable(struct mlx5_eswitch *esw) + { +- esw_offloads_devcom_cleanup(esw); + mlx5_eswitch_disable_pf_vf_vports(esw); + esw_offloads_unload_rep(esw, MLX5_VPORT_UPLINK); + esw_set_passing_vport_metadata(esw, false); diff --git a/queue-6.3/net-mlx5e-do-as-little-as-possible-in-napi-poll-when-budget-is-0.patch b/queue-6.3/net-mlx5e-do-as-little-as-possible-in-napi-poll-when-budget-is-0.patch new file mode 100644 index 00000000000..12dd5a1dc62 --- /dev/null +++ b/queue-6.3/net-mlx5e-do-as-little-as-possible-in-napi-poll-when-budget-is-0.patch @@ -0,0 +1,78 @@ +From afbed3f74830163f9559579dee382cac3cff82da Mon Sep 17 00:00:00 2001 +From: Jakub Kicinski +Date: Tue, 16 May 2023 18:59:35 -0700 +Subject: net/mlx5e: do as little as possible in napi poll when budget is 0 + +From: Jakub Kicinski + +commit afbed3f74830163f9559579dee382cac3cff82da upstream. + +NAPI gets called with budget of 0 from netpoll, which has interrupts +disabled. We should try to free some space on Tx rings and nothing +else. + +Specifically do not try to handle XDP TX or try to refill Rx buffers - +we can't use the page pool from IRQ context. Don't check if IRQs moved, +either, that makes no sense in netpoll. Netpoll calls _all_ the rings +from whatever CPU it happens to be invoked on. + +In general do as little as possible, the work quickly adds up when +there's tens of rings to poll. + +The immediate stack trace I was seeing is: + + __do_softirq+0xd1/0x2c0 + __local_bh_enable_ip+0xc7/0x120 + + + page_pool_put_defragged_page+0x267/0x320 + mlx5e_free_xdpsq_desc+0x99/0xd0 + mlx5e_poll_xdpsq_cq+0x138/0x3b0 + mlx5e_napi_poll+0xc3/0x8b0 + netpoll_poll_dev+0xce/0x150 + +AFAIU page pool takes a BH lock, releases it and since BH is now +enabled tries to run softirqs. + +Reviewed-by: Tariq Toukan +Fixes: 60bbf7eeef10 ("mlx5: use page_pool for xdp_return_frame call") +Signed-off-by: Jakub Kicinski +Reviewed-by: Simon Horman +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c | 16 +++++++++------- + 1 file changed, 9 insertions(+), 7 deletions(-) + +--- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c +@@ -161,20 +161,22 @@ int mlx5e_napi_poll(struct napi_struct * + } + } + ++ /* budget=0 means we may be in IRQ context, do as little as possible */ ++ if (unlikely(!budget)) ++ goto out; ++ + busy |= mlx5e_poll_xdpsq_cq(&c->xdpsq.cq); + + if (c->xdp) + busy |= mlx5e_poll_xdpsq_cq(&c->rq_xdpsq.cq); + +- if (likely(budget)) { /* budget=0 means: don't poll rx rings */ +- if (xsk_open) +- work_done = mlx5e_poll_rx_cq(&xskrq->cq, budget); ++ if (xsk_open) ++ work_done = mlx5e_poll_rx_cq(&xskrq->cq, budget); + +- if (likely(budget - work_done)) +- work_done += mlx5e_poll_rx_cq(&rq->cq, budget - work_done); ++ if (likely(budget - work_done)) ++ work_done += mlx5e_poll_rx_cq(&rq->cq, budget - work_done); + +- busy |= work_done == budget; +- } ++ busy |= work_done == budget; + + mlx5e_poll_ico_cq(&c->icosq.cq); + if (mlx5e_poll_ico_cq(&c->async_icosq.cq)) diff --git a/queue-6.3/net-mlx5e-fix-deadlock-in-tc-route-query-code.patch b/queue-6.3/net-mlx5e-fix-deadlock-in-tc-route-query-code.patch new file mode 100644 index 00000000000..8f98fa91fcf --- /dev/null +++ b/queue-6.3/net-mlx5e-fix-deadlock-in-tc-route-query-code.patch @@ -0,0 +1,354 @@ +From 691c041bf20899fc13c793f92ba61ab660fa3a30 Mon Sep 17 00:00:00 2001 +From: Vlad Buslov +Date: Fri, 31 Mar 2023 14:20:51 +0200 +Subject: net/mlx5e: Fix deadlock in tc route query code + +From: Vlad Buslov + +commit 691c041bf20899fc13c793f92ba61ab660fa3a30 upstream. + +Cited commit causes ABBA deadlock[0] when peer flows are created while +holding the devcom rw semaphore. Due to peer flows offload implementation +the lock is taken much higher up the call chain and there is no obvious way +to easily fix the deadlock. Instead, since tc route query code needs the +peer eswitch structure only to perform a lookup in xarray and doesn't +perform any sleeping operations with it, refactor the code for lockless +execution in following ways: + +- RCUify the devcom 'data' pointer. When resetting the pointer +synchronously wait for RCU grace period before returning. This is fine +since devcom is currently only used for synchronization of +pairing/unpairing of eswitches which is rare and already expensive as-is. + +- Wrap all usages of 'paired' boolean in {READ|WRITE}_ONCE(). The flag has +already been used in some unlocked contexts without proper +annotations (e.g. users of mlx5_devcom_is_paired() function), but it wasn't +an issue since all relevant code paths checked it again after obtaining the +devcom semaphore. Now it is also used by mlx5_devcom_get_peer_data_rcu() as +"best effort" check to return NULL when devcom is being unpaired. Note that +while RCU read lock doesn't prevent the unpaired flag from being changed +concurrently it still guarantees that reader can continue to use 'data'. + +- Refactor mlx5e_tc_query_route_vport() function to use new +mlx5_devcom_get_peer_data_rcu() API which fixes the deadlock. + +[0]: + +[ 164.599612] ====================================================== +[ 164.600142] WARNING: possible circular locking dependency detected +[ 164.600667] 6.3.0-rc3+ #1 Not tainted +[ 164.601021] ------------------------------------------------------ +[ 164.601557] handler1/3456 is trying to acquire lock: +[ 164.601998] ffff88811f1714b0 (&esw->offloads.encap_tbl_lock){+.+.}-{3:3}, at: mlx5e_attach_encap+0xd8/0x8b0 [mlx5_core] +[ 164.603078] + but task is already holding lock: +[ 164.603617] ffff88810137fc98 (&comp->sem){++++}-{3:3}, at: mlx5_devcom_get_peer_data+0x37/0x80 [mlx5_core] +[ 164.604459] + which lock already depends on the new lock. + +[ 164.605190] + the existing dependency chain (in reverse order) is: +[ 164.605848] + -> #1 (&comp->sem){++++}-{3:3}: +[ 164.606380] down_read+0x39/0x50 +[ 164.606772] mlx5_devcom_get_peer_data+0x37/0x80 [mlx5_core] +[ 164.607336] mlx5e_tc_query_route_vport+0x86/0xc0 [mlx5_core] +[ 164.607914] mlx5e_tc_tun_route_lookup+0x1a4/0x1d0 [mlx5_core] +[ 164.608495] mlx5e_attach_decap_route+0xc6/0x1e0 [mlx5_core] +[ 164.609063] mlx5e_tc_add_fdb_flow+0x1ea/0x360 [mlx5_core] +[ 164.609627] __mlx5e_add_fdb_flow+0x2d2/0x430 [mlx5_core] +[ 164.610175] mlx5e_configure_flower+0x952/0x1a20 [mlx5_core] +[ 164.610741] tc_setup_cb_add+0xd4/0x200 +[ 164.611146] fl_hw_replace_filter+0x14c/0x1f0 [cls_flower] +[ 164.611661] fl_change+0xc95/0x18a0 [cls_flower] +[ 164.612116] tc_new_tfilter+0x3fc/0xd20 +[ 164.612516] rtnetlink_rcv_msg+0x418/0x5b0 +[ 164.612936] netlink_rcv_skb+0x54/0x100 +[ 164.613339] netlink_unicast+0x190/0x250 +[ 164.613746] netlink_sendmsg+0x245/0x4a0 +[ 164.614150] sock_sendmsg+0x38/0x60 +[ 164.614522] ____sys_sendmsg+0x1d0/0x1e0 +[ 164.614934] ___sys_sendmsg+0x80/0xc0 +[ 164.615320] __sys_sendmsg+0x51/0x90 +[ 164.615701] do_syscall_64+0x3d/0x90 +[ 164.616083] entry_SYSCALL_64_after_hwframe+0x46/0xb0 +[ 164.616568] + -> #0 (&esw->offloads.encap_tbl_lock){+.+.}-{3:3}: +[ 164.617210] __lock_acquire+0x159e/0x26e0 +[ 164.617638] lock_acquire+0xc2/0x2a0 +[ 164.618018] __mutex_lock+0x92/0xcd0 +[ 164.618401] mlx5e_attach_encap+0xd8/0x8b0 [mlx5_core] +[ 164.618943] post_process_attr+0x153/0x2d0 [mlx5_core] +[ 164.619471] mlx5e_tc_add_fdb_flow+0x164/0x360 [mlx5_core] +[ 164.620021] __mlx5e_add_fdb_flow+0x2d2/0x430 [mlx5_core] +[ 164.620564] mlx5e_configure_flower+0xe33/0x1a20 [mlx5_core] +[ 164.621125] tc_setup_cb_add+0xd4/0x200 +[ 164.621531] fl_hw_replace_filter+0x14c/0x1f0 [cls_flower] +[ 164.622047] fl_change+0xc95/0x18a0 [cls_flower] +[ 164.622500] tc_new_tfilter+0x3fc/0xd20 +[ 164.622906] rtnetlink_rcv_msg+0x418/0x5b0 +[ 164.623324] netlink_rcv_skb+0x54/0x100 +[ 164.623727] netlink_unicast+0x190/0x250 +[ 164.624138] netlink_sendmsg+0x245/0x4a0 +[ 164.624544] sock_sendmsg+0x38/0x60 +[ 164.624919] ____sys_sendmsg+0x1d0/0x1e0 +[ 164.625340] ___sys_sendmsg+0x80/0xc0 +[ 164.625731] __sys_sendmsg+0x51/0x90 +[ 164.626117] do_syscall_64+0x3d/0x90 +[ 164.626502] entry_SYSCALL_64_after_hwframe+0x46/0xb0 +[ 164.626995] + other info that might help us debug this: + +[ 164.627725] Possible unsafe locking scenario: + +[ 164.628268] CPU0 CPU1 +[ 164.628683] ---- ---- +[ 164.629098] lock(&comp->sem); +[ 164.629421] lock(&esw->offloads.encap_tbl_lock); +[ 164.630066] lock(&comp->sem); +[ 164.630555] lock(&esw->offloads.encap_tbl_lock); +[ 164.630993] + *** DEADLOCK *** + +[ 164.631575] 3 locks held by handler1/3456: +[ 164.631962] #0: ffff888124b75130 (&block->cb_lock){++++}-{3:3}, at: tc_setup_cb_add+0x5b/0x200 +[ 164.632703] #1: ffff888116e512b8 (&esw->mode_lock){++++}-{3:3}, at: mlx5_esw_hold+0x39/0x50 [mlx5_core] +[ 164.633552] #2: ffff88810137fc98 (&comp->sem){++++}-{3:3}, at: mlx5_devcom_get_peer_data+0x37/0x80 [mlx5_core] +[ 164.634435] + stack backtrace: +[ 164.634883] CPU: 17 PID: 3456 Comm: handler1 Not tainted 6.3.0-rc3+ #1 +[ 164.635431] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 +[ 164.636340] Call Trace: +[ 164.636616] +[ 164.636863] dump_stack_lvl+0x47/0x70 +[ 164.637217] check_noncircular+0xfe/0x110 +[ 164.637601] __lock_acquire+0x159e/0x26e0 +[ 164.637977] ? mlx5_cmd_set_fte+0x5b0/0x830 [mlx5_core] +[ 164.638472] lock_acquire+0xc2/0x2a0 +[ 164.638828] ? mlx5e_attach_encap+0xd8/0x8b0 [mlx5_core] +[ 164.639339] ? lock_is_held_type+0x98/0x110 +[ 164.639728] __mutex_lock+0x92/0xcd0 +[ 164.640074] ? mlx5e_attach_encap+0xd8/0x8b0 [mlx5_core] +[ 164.640576] ? __lock_acquire+0x382/0x26e0 +[ 164.640958] ? mlx5e_attach_encap+0xd8/0x8b0 [mlx5_core] +[ 164.641468] ? mlx5e_attach_encap+0xd8/0x8b0 [mlx5_core] +[ 164.641965] mlx5e_attach_encap+0xd8/0x8b0 [mlx5_core] +[ 164.642454] ? lock_release+0xbf/0x240 +[ 164.642819] post_process_attr+0x153/0x2d0 [mlx5_core] +[ 164.643318] mlx5e_tc_add_fdb_flow+0x164/0x360 [mlx5_core] +[ 164.643835] __mlx5e_add_fdb_flow+0x2d2/0x430 [mlx5_core] +[ 164.644340] mlx5e_configure_flower+0xe33/0x1a20 [mlx5_core] +[ 164.644862] ? lock_acquire+0xc2/0x2a0 +[ 164.645219] tc_setup_cb_add+0xd4/0x200 +[ 164.645588] fl_hw_replace_filter+0x14c/0x1f0 [cls_flower] +[ 164.646067] fl_change+0xc95/0x18a0 [cls_flower] +[ 164.646488] tc_new_tfilter+0x3fc/0xd20 +[ 164.646861] ? tc_del_tfilter+0x810/0x810 +[ 164.647236] rtnetlink_rcv_msg+0x418/0x5b0 +[ 164.647621] ? rtnl_setlink+0x160/0x160 +[ 164.647982] netlink_rcv_skb+0x54/0x100 +[ 164.648348] netlink_unicast+0x190/0x250 +[ 164.648722] netlink_sendmsg+0x245/0x4a0 +[ 164.649090] sock_sendmsg+0x38/0x60 +[ 164.649434] ____sys_sendmsg+0x1d0/0x1e0 +[ 164.649804] ? copy_msghdr_from_user+0x6d/0xa0 +[ 164.650213] ___sys_sendmsg+0x80/0xc0 +[ 164.650563] ? lock_acquire+0xc2/0x2a0 +[ 164.650926] ? lock_acquire+0xc2/0x2a0 +[ 164.651286] ? __fget_files+0x5/0x190 +[ 164.651644] ? find_held_lock+0x2b/0x80 +[ 164.652006] ? __fget_files+0xb9/0x190 +[ 164.652365] ? lock_release+0xbf/0x240 +[ 164.652723] ? __fget_files+0xd3/0x190 +[ 164.653079] __sys_sendmsg+0x51/0x90 +[ 164.653435] do_syscall_64+0x3d/0x90 +[ 164.653784] entry_SYSCALL_64_after_hwframe+0x46/0xb0 +[ 164.654229] RIP: 0033:0x7f378054f8bd +[ 164.654577] Code: 28 89 54 24 1c 48 89 74 24 10 89 7c 24 08 e8 6a c3 f4 ff 8b 54 24 1c 48 8b 74 24 10 41 89 c0 8b 7c 24 08 b8 2e 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 33 44 89 c7 48 89 44 24 08 e8 be c3 f4 ff 48 +[ 164.656041] RSP: 002b:00007f377fa114b0 EFLAGS: 00000293 ORIG_RAX: 000000000000002e +[ 164.656701] RAX: ffffffffffffffda RBX: 0000000000000001 RCX: 00007f378054f8bd +[ 164.657297] RDX: 0000000000000000 RSI: 00007f377fa11540 RDI: 0000000000000014 +[ 164.657885] RBP: 00007f377fa12278 R08: 0000000000000000 R09: 000000000000015c +[ 164.658472] R10: 00007f377fa123d0 R11: 0000000000000293 R12: 0000560962d99bd0 +[ 164.665317] R13: 0000000000000000 R14: 0000560962d99bd0 R15: 00007f377fa11540 + +Fixes: f9d196bd632b ("net/mlx5e: Use correct eswitch for stack devices with lag") +Signed-off-by: Vlad Buslov +Reviewed-by: Roi Dayan +Reviewed-by: Shay Drory +Reviewed-by: Tariq Toukan +Signed-off-by: Saeed Mahameed +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 19 +++---- + drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c | 48 ++++++++++++++----- + drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h | 1 + 3 files changed, 48 insertions(+), 20 deletions(-) + +--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +@@ -1692,11 +1692,9 @@ bool mlx5e_tc_is_vf_tunnel(struct net_de + int mlx5e_tc_query_route_vport(struct net_device *out_dev, struct net_device *route_dev, u16 *vport) + { + struct mlx5e_priv *out_priv, *route_priv; +- struct mlx5_devcom *devcom = NULL; + struct mlx5_core_dev *route_mdev; + struct mlx5_eswitch *esw; + u16 vhca_id; +- int err; + + out_priv = netdev_priv(out_dev); + esw = out_priv->mdev->priv.eswitch; +@@ -1705,6 +1703,9 @@ int mlx5e_tc_query_route_vport(struct ne + + vhca_id = MLX5_CAP_GEN(route_mdev, vhca_id); + if (mlx5_lag_is_active(out_priv->mdev)) { ++ struct mlx5_devcom *devcom; ++ int err; ++ + /* In lag case we may get devices from different eswitch instances. + * If we failed to get vport num, it means, mostly, that we on the wrong + * eswitch. +@@ -1713,16 +1714,16 @@ int mlx5e_tc_query_route_vport(struct ne + if (err != -ENOENT) + return err; + ++ rcu_read_lock(); + devcom = out_priv->mdev->priv.devcom; +- esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS); +- if (!esw) +- return -ENODEV; ++ esw = mlx5_devcom_get_peer_data_rcu(devcom, MLX5_DEVCOM_ESW_OFFLOADS); ++ err = esw ? mlx5_eswitch_vhca_id_to_vport(esw, vhca_id, vport) : -ENODEV; ++ rcu_read_unlock(); ++ ++ return err; + } + +- err = mlx5_eswitch_vhca_id_to_vport(esw, vhca_id, vport); +- if (devcom) +- mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS); +- return err; ++ return mlx5_eswitch_vhca_id_to_vport(esw, vhca_id, vport); + } + + static int +--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c +@@ -13,7 +13,7 @@ static LIST_HEAD(devcom_list); + + struct mlx5_devcom_component { + struct { +- void *data; ++ void __rcu *data; + } device[MLX5_DEVCOM_PORTS_SUPPORTED]; + + mlx5_devcom_event_handler_t handler; +@@ -162,7 +162,7 @@ void mlx5_devcom_register_component(stru + comp = &devcom->priv->components[id]; + down_write(&comp->sem); + comp->handler = handler; +- comp->device[devcom->idx].data = data; ++ rcu_assign_pointer(comp->device[devcom->idx].data, data); + up_write(&comp->sem); + } + +@@ -176,8 +176,9 @@ void mlx5_devcom_unregister_component(st + + comp = &devcom->priv->components[id]; + down_write(&comp->sem); +- comp->device[devcom->idx].data = NULL; ++ RCU_INIT_POINTER(comp->device[devcom->idx].data, NULL); + up_write(&comp->sem); ++ synchronize_rcu(); + } + + int mlx5_devcom_send_event(struct mlx5_devcom *devcom, +@@ -193,12 +194,15 @@ int mlx5_devcom_send_event(struct mlx5_d + + comp = &devcom->priv->components[id]; + down_write(&comp->sem); +- for (i = 0; i < MLX5_DEVCOM_PORTS_SUPPORTED; i++) +- if (i != devcom->idx && comp->device[i].data) { +- err = comp->handler(event, comp->device[i].data, +- event_data); ++ for (i = 0; i < MLX5_DEVCOM_PORTS_SUPPORTED; i++) { ++ void *data = rcu_dereference_protected(comp->device[i].data, ++ lockdep_is_held(&comp->sem)); ++ ++ if (i != devcom->idx && data) { ++ err = comp->handler(event, data, event_data); + break; + } ++ } + + up_write(&comp->sem); + return err; +@@ -213,7 +217,7 @@ void mlx5_devcom_set_paired(struct mlx5_ + comp = &devcom->priv->components[id]; + WARN_ON(!rwsem_is_locked(&comp->sem)); + +- comp->paired = paired; ++ WRITE_ONCE(comp->paired, paired); + } + + bool mlx5_devcom_is_paired(struct mlx5_devcom *devcom, +@@ -222,7 +226,7 @@ bool mlx5_devcom_is_paired(struct mlx5_d + if (IS_ERR_OR_NULL(devcom)) + return false; + +- return devcom->priv->components[id].paired; ++ return READ_ONCE(devcom->priv->components[id].paired); + } + + void *mlx5_devcom_get_peer_data(struct mlx5_devcom *devcom, +@@ -236,7 +240,7 @@ void *mlx5_devcom_get_peer_data(struct m + + comp = &devcom->priv->components[id]; + down_read(&comp->sem); +- if (!comp->paired) { ++ if (!READ_ONCE(comp->paired)) { + up_read(&comp->sem); + return NULL; + } +@@ -245,7 +249,29 @@ void *mlx5_devcom_get_peer_data(struct m + if (i != devcom->idx) + break; + +- return comp->device[i].data; ++ return rcu_dereference_protected(comp->device[i].data, lockdep_is_held(&comp->sem)); ++} ++ ++void *mlx5_devcom_get_peer_data_rcu(struct mlx5_devcom *devcom, enum mlx5_devcom_components id) ++{ ++ struct mlx5_devcom_component *comp; ++ int i; ++ ++ if (IS_ERR_OR_NULL(devcom)) ++ return NULL; ++ ++ for (i = 0; i < MLX5_DEVCOM_PORTS_SUPPORTED; i++) ++ if (i != devcom->idx) ++ break; ++ ++ comp = &devcom->priv->components[id]; ++ /* This can change concurrently, however 'data' pointer will remain ++ * valid for the duration of RCU read section. ++ */ ++ if (!READ_ONCE(comp->paired)) ++ return NULL; ++ ++ return rcu_dereference(comp->device[i].data); + } + + void mlx5_devcom_release_peer_data(struct mlx5_devcom *devcom, +--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h ++++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h +@@ -41,6 +41,7 @@ bool mlx5_devcom_is_paired(struct mlx5_d + + void *mlx5_devcom_get_peer_data(struct mlx5_devcom *devcom, + enum mlx5_devcom_components id); ++void *mlx5_devcom_get_peer_data_rcu(struct mlx5_devcom *devcom, enum mlx5_devcom_components id); + void mlx5_devcom_release_peer_data(struct mlx5_devcom *devcom, + enum mlx5_devcom_components id); + diff --git a/queue-6.3/net-mlx5e-fix-sq-wake-logic-in-ptp-napi_poll-context.patch b/queue-6.3/net-mlx5e-fix-sq-wake-logic-in-ptp-napi_poll-context.patch new file mode 100644 index 00000000000..d0a6fa2e99d --- /dev/null +++ b/queue-6.3/net-mlx5e-fix-sq-wake-logic-in-ptp-napi_poll-context.patch @@ -0,0 +1,81 @@ +From 7aa50380191635e5897a773f272829cc961a2be5 Mon Sep 17 00:00:00 2001 +From: Rahul Rameshbabu +Date: Tue, 21 Feb 2023 16:18:48 -0800 +Subject: net/mlx5e: Fix SQ wake logic in ptp napi_poll context + +From: Rahul Rameshbabu + +commit 7aa50380191635e5897a773f272829cc961a2be5 upstream. + +Check in the mlx5e_ptp_poll_ts_cq context if the ptp tx sq should be woken +up. Before change, the ptp tx sq may never wake up if the ptp tx ts skb +fifo is full when mlx5e_poll_tx_cq checks if the queue should be woken up. + +Fixes: 1880bc4e4a96 ("net/mlx5e: Add TX port timestamp support") +Signed-off-by: Rahul Rameshbabu +Reviewed-by: Tariq Toukan +Signed-off-by: Saeed Mahameed +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c | 2 ++ + drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h | 2 ++ + drivers/net/ethernet/mellanox/mlx5/core/en_tx.c | 19 ++++++++++++------- + 3 files changed, 16 insertions(+), 7 deletions(-) + +--- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c +@@ -175,6 +175,8 @@ static bool mlx5e_ptp_poll_ts_cq(struct + /* ensure cq space is freed before enabling more cqes */ + wmb(); + ++ mlx5e_txqsq_wake(&ptpsq->txqsq); ++ + return work_done == budget; + } + +--- a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h +@@ -182,6 +182,8 @@ static inline u16 mlx5e_txqsq_get_next_p + return pi; + } + ++void mlx5e_txqsq_wake(struct mlx5e_txqsq *sq); ++ + static inline u16 mlx5e_shampo_get_cqe_header_index(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) + { + return be16_to_cpu(cqe->shampo.header_entry_index) & (rq->mpwqe.shampo->hd_per_wq - 1); +--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +@@ -762,6 +762,17 @@ static void mlx5e_tx_wi_consume_fifo_skb + } + } + ++void mlx5e_txqsq_wake(struct mlx5e_txqsq *sq) ++{ ++ if (netif_tx_queue_stopped(sq->txq) && ++ mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, sq->stop_room) && ++ mlx5e_ptpsq_fifo_has_room(sq) && ++ !test_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state)) { ++ netif_tx_wake_queue(sq->txq); ++ sq->stats->wake++; ++ } ++} ++ + bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget) + { + struct mlx5e_sq_stats *stats; +@@ -861,13 +872,7 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *c + + netdev_tx_completed_queue(sq->txq, npkts, nbytes); + +- if (netif_tx_queue_stopped(sq->txq) && +- mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, sq->stop_room) && +- mlx5e_ptpsq_fifo_has_room(sq) && +- !test_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state)) { +- netif_tx_wake_queue(sq->txq); +- stats->wake++; +- } ++ mlx5e_txqsq_wake(sq); + + return (i == MLX5E_TX_CQ_POLL_BUDGET); + } diff --git a/queue-6.3/net-mlx5e-use-correct-encap-attribute-during-invalidation.patch b/queue-6.3/net-mlx5e-use-correct-encap-attribute-during-invalidation.patch new file mode 100644 index 00000000000..438164d0b44 --- /dev/null +++ b/queue-6.3/net-mlx5e-use-correct-encap-attribute-during-invalidation.patch @@ -0,0 +1,62 @@ +From be071cdb167fc3e25fe81922166b3d499d23e8ac Mon Sep 17 00:00:00 2001 +From: Vlad Buslov +Date: Mon, 3 Apr 2023 22:26:00 +0200 +Subject: net/mlx5e: Use correct encap attribute during invalidation + +From: Vlad Buslov + +commit be071cdb167fc3e25fe81922166b3d499d23e8ac upstream. + +With introduction of post action infrastructure most of the users of encap +attribute had been modified in order to obtain the correct attribute by +calling mlx5e_tc_get_encap_attr() helper instead of assuming encap action +is always on default attribute. However, the cited commit didn't modify +mlx5e_invalidate_encap() which prevents it from destroying correct modify +header action which leads to a warning [0]. Fix the issue by using correct +attribute. + +[0]: + +Feb 21 09:47:35 c-237-177-40-045 kernel: WARNING: CPU: 17 PID: 654 at drivers/net/ethernet/mellanox/mlx5/core/en_tc.c:684 mlx5e_tc_attach_mod_hdr+0x1cc/0x230 [mlx5_core] +Feb 21 09:47:35 c-237-177-40-045 kernel: RIP: 0010:mlx5e_tc_attach_mod_hdr+0x1cc/0x230 [mlx5_core] +Feb 21 09:47:35 c-237-177-40-045 kernel: Call Trace: +Feb 21 09:47:35 c-237-177-40-045 kernel: +Feb 21 09:47:35 c-237-177-40-045 kernel: mlx5e_tc_fib_event_work+0x8e3/0x1f60 [mlx5_core] +Feb 21 09:47:35 c-237-177-40-045 kernel: ? mlx5e_take_all_encap_flows+0xe0/0xe0 [mlx5_core] +Feb 21 09:47:35 c-237-177-40-045 kernel: ? lock_downgrade+0x6d0/0x6d0 +Feb 21 09:47:35 c-237-177-40-045 kernel: ? lockdep_hardirqs_on_prepare+0x273/0x3f0 +Feb 21 09:47:35 c-237-177-40-045 kernel: ? lockdep_hardirqs_on_prepare+0x273/0x3f0 +Feb 21 09:47:35 c-237-177-40-045 kernel: process_one_work+0x7c2/0x1310 +Feb 21 09:47:35 c-237-177-40-045 kernel: ? lockdep_hardirqs_on_prepare+0x3f0/0x3f0 +Feb 21 09:47:35 c-237-177-40-045 kernel: ? pwq_dec_nr_in_flight+0x230/0x230 +Feb 21 09:47:35 c-237-177-40-045 kernel: ? rwlock_bug.part.0+0x90/0x90 +Feb 21 09:47:35 c-237-177-40-045 kernel: worker_thread+0x59d/0xec0 +Feb 21 09:47:35 c-237-177-40-045 kernel: ? __kthread_parkme+0xd9/0x1d0 + +Fixes: 8300f225268b ("net/mlx5e: Create new flow attr for multi table actions") +Signed-off-by: Vlad Buslov +Reviewed-by: Roi Dayan +Reviewed-by: Tariq Toukan +Signed-off-by: Saeed Mahameed +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c +@@ -1338,11 +1338,13 @@ static void mlx5e_invalidate_encap(struc + struct mlx5e_tc_flow *flow; + + list_for_each_entry(flow, encap_flows, tmp_list) { +- struct mlx5_flow_attr *attr = flow->attr; + struct mlx5_esw_flow_attr *esw_attr; ++ struct mlx5_flow_attr *attr; + + if (!mlx5e_is_offloaded_flow(flow)) + continue; ++ ++ attr = mlx5e_tc_get_encap_attr(flow); + esw_attr = attr->esw_attr; + + if (flow_flag_test(flow, SLOW)) diff --git a/queue-6.3/platform-mellanox-mlxbf-pmc-fix-sscanf-error-checking.patch b/queue-6.3/platform-mellanox-mlxbf-pmc-fix-sscanf-error-checking.patch new file mode 100644 index 00000000000..956f9ed490e --- /dev/null +++ b/queue-6.3/platform-mellanox-mlxbf-pmc-fix-sscanf-error-checking.patch @@ -0,0 +1,40 @@ +From 95e4b25192e9238fd2dbe85d96dd2f8fd1ce9d14 Mon Sep 17 00:00:00 2001 +From: Dan Carpenter +Date: Mon, 15 May 2023 13:32:37 +0300 +Subject: platform/mellanox: mlxbf-pmc: fix sscanf() error checking +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Dan Carpenter + +commit 95e4b25192e9238fd2dbe85d96dd2f8fd1ce9d14 upstream. + +The sscanf() function never returns negatives. It returns the number of +items successfully read. + +Fixes: 1a218d312e65 ("platform/mellanox: mlxbf-pmc: Add Mellanox BlueField PMC driver") +Signed-off-by: Dan Carpenter +Reviewed-by: Ilpo Järvinen +Link: https://lore.kernel.org/r/4ccdfd28-099b-40bf-8d77-ad4ea2e76b93@kili.mountain +Reviewed-by: Hans de Goede +Signed-off-by: Hans de Goede +Signed-off-by: Greg Kroah-Hartman +--- + drivers/platform/mellanox/mlxbf-pmc.c | 5 ++--- + 1 file changed, 2 insertions(+), 3 deletions(-) + +--- a/drivers/platform/mellanox/mlxbf-pmc.c ++++ b/drivers/platform/mellanox/mlxbf-pmc.c +@@ -1348,9 +1348,8 @@ static int mlxbf_pmc_map_counters(struct + + for (i = 0; i < pmc->total_blocks; ++i) { + if (strstr(pmc->block_name[i], "tile")) { +- ret = sscanf(pmc->block_name[i], "tile%d", &tile_num); +- if (ret < 0) +- return ret; ++ if (sscanf(pmc->block_name[i], "tile%d", &tile_num) != 1) ++ return -EINVAL; + + if (tile_num >= pmc->tile_count) + continue; diff --git a/queue-6.3/sctp-fix-an-issue-that-plpmtu-can-never-go-to-complete-state.patch b/queue-6.3/sctp-fix-an-issue-that-plpmtu-can-never-go-to-complete-state.patch new file mode 100644 index 00000000000..c2399fb22a7 --- /dev/null +++ b/queue-6.3/sctp-fix-an-issue-that-plpmtu-can-never-go-to-complete-state.patch @@ -0,0 +1,58 @@ +From 6ca328e985cd995dfd1d5de44046e6074f853fbb Mon Sep 17 00:00:00 2001 +From: Xin Long +Date: Thu, 18 May 2023 16:03:00 -0400 +Subject: sctp: fix an issue that plpmtu can never go to complete state + +From: Xin Long + +commit 6ca328e985cd995dfd1d5de44046e6074f853fbb upstream. + +When doing plpmtu probe, the probe size is growing every time when it +receives the ACK during the Search state until the probe fails. When +the failure occurs, pl.probe_high is set and it goes to the Complete +state. + +However, if the link pmtu is huge, like 65535 in loopback_dev, the probe +eventually keeps using SCTP_MAX_PLPMTU as the probe size and never fails. +Because of that, pl.probe_high can not be set, and the plpmtu probe can +never go to the Complete state. + +Fix it by setting pl.probe_high to SCTP_MAX_PLPMTU when the probe size +grows to SCTP_MAX_PLPMTU in sctp_transport_pl_recv(). Also, not allow +the probe size greater than SCTP_MAX_PLPMTU in the Complete state. + +Fixes: b87641aff9e7 ("sctp: do state transition when a probe succeeds on HB ACK recv path") +Signed-off-by: Xin Long +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sctp/transport.c | 11 +++++++---- + 1 file changed, 7 insertions(+), 4 deletions(-) + +--- a/net/sctp/transport.c ++++ b/net/sctp/transport.c +@@ -324,9 +324,12 @@ bool sctp_transport_pl_recv(struct sctp_ + t->pl.probe_size += SCTP_PL_BIG_STEP; + } else if (t->pl.state == SCTP_PL_SEARCH) { + if (!t->pl.probe_high) { +- t->pl.probe_size = min(t->pl.probe_size + SCTP_PL_BIG_STEP, +- SCTP_MAX_PLPMTU); +- return false; ++ if (t->pl.probe_size < SCTP_MAX_PLPMTU) { ++ t->pl.probe_size = min(t->pl.probe_size + SCTP_PL_BIG_STEP, ++ SCTP_MAX_PLPMTU); ++ return false; ++ } ++ t->pl.probe_high = SCTP_MAX_PLPMTU; + } + t->pl.probe_size += SCTP_PL_MIN_STEP; + if (t->pl.probe_size >= t->pl.probe_high) { +@@ -341,7 +344,7 @@ bool sctp_transport_pl_recv(struct sctp_ + } else if (t->pl.state == SCTP_PL_COMPLETE) { + /* Raise probe_size again after 30 * interval in Search Complete */ + t->pl.state = SCTP_PL_SEARCH; /* Search Complete -> Search */ +- t->pl.probe_size += SCTP_PL_MIN_STEP; ++ t->pl.probe_size = min(t->pl.probe_size + SCTP_PL_MIN_STEP, SCTP_MAX_PLPMTU); + } + + return t->pl.state == SCTP_PL_COMPLETE; diff --git a/queue-6.3/series b/queue-6.3/series index bd39890db2c..dfec036ec04 100644 --- a/queue-6.3/series +++ b/queue-6.3/series @@ -96,3 +96,25 @@ coresight-fix-signedness-bug-in-tmc_etr_buf_insert_barrier_packet.patch arm-dts-imx6qdl-mba6-add-missing-pvcie-supply-regulator.patch x86-pci-xen-populate-msi-sysfs-entries.patch xen-pvcalls-back-fix-double-frees-with-pvcalls_new_active_socket.patch +x86-show_trace_log_lvl-ensure-stack-pointer-is-aligned-again.patch +asoc-intel-skylake-fix-declaration-of-enum-skl_ch_cfg.patch +asoc-intel-avs-fix-declaration-of-enum-avs_channel_config.patch +asoc-intel-avs-access-path-components-under-lock.patch +cxl-wait-memory_info_valid-before-access-memory-related-info.patch +cxl-move-cxl_await_media_ready-to-before-capacity-info-retrieval.patch +sctp-fix-an-issue-that-plpmtu-can-never-go-to-complete-state.patch +forcedeth-fix-an-error-handling-path-in-nv_probe.patch +platform-mellanox-mlxbf-pmc-fix-sscanf-error-checking.patch +net-mlx5e-fix-sq-wake-logic-in-ptp-napi_poll-context.patch +net-mlx5e-fix-deadlock-in-tc-route-query-code.patch +net-mlx5e-use-correct-encap-attribute-during-invalidation.patch +net-mlx5e-do-as-little-as-possible-in-napi-poll-when-budget-is-0.patch +net-mlx5-dr-fix-crc32-calculation-to-work-on-big-endian-be-cpus.patch +net-mlx5-handle-pairing-of-e-switch-via-uplink-un-load-apis.patch +net-mlx5-dr-check-force-loopback-rc-qp-capability-independently-from-roce.patch +net-mlx5-fix-error-message-when-failing-to-allocate-device-memory.patch +net-mlx5-collect-command-failures-data-only-for-known-commands.patch +net-mlx5-devcom-fix-error-flow-in-mlx5_devcom_register_device.patch +net-mlx5-devcom-serialize-devcom-registration.patch +arm64-dts-imx8mn-var-som-fix-phy-detection-bug-by-adding-deassert-delay.patch +firmware-arm_ffa-set-reserved-mbz-fields-to-zero-in-the-memory-descriptors.patch diff --git a/queue-6.3/x86-show_trace_log_lvl-ensure-stack-pointer-is-aligned-again.patch b/queue-6.3/x86-show_trace_log_lvl-ensure-stack-pointer-is-aligned-again.patch new file mode 100644 index 00000000000..a01ec356d67 --- /dev/null +++ b/queue-6.3/x86-show_trace_log_lvl-ensure-stack-pointer-is-aligned-again.patch @@ -0,0 +1,69 @@ +From 2e4be0d011f21593c6b316806779ba1eba2cd7e0 Mon Sep 17 00:00:00 2001 +From: Vernon Lovejoy +Date: Fri, 12 May 2023 12:42:32 +0200 +Subject: x86/show_trace_log_lvl: Ensure stack pointer is aligned, again + +From: Vernon Lovejoy + +commit 2e4be0d011f21593c6b316806779ba1eba2cd7e0 upstream. + +The commit e335bb51cc15 ("x86/unwind: Ensure stack pointer is aligned") +tried to align the stack pointer in show_trace_log_lvl(), otherwise the +"stack < stack_info.end" check can't guarantee that the last read does +not go past the end of the stack. + +However, we have the same problem with the initial value of the stack +pointer, it can also be unaligned. So without this patch this trivial +kernel module + + #include + + static int init(void) + { + asm volatile("sub $0x4,%rsp"); + dump_stack(); + asm volatile("add $0x4,%rsp"); + + return -EAGAIN; + } + + module_init(init); + MODULE_LICENSE("GPL"); + +crashes the kernel. + +Fixes: e335bb51cc15 ("x86/unwind: Ensure stack pointer is aligned") +Signed-off-by: Vernon Lovejoy +Signed-off-by: Oleg Nesterov +Link: https://lore.kernel.org/r/20230512104232.GA10227@redhat.com +Signed-off-by: Josh Poimboeuf +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kernel/dumpstack.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +--- a/arch/x86/kernel/dumpstack.c ++++ b/arch/x86/kernel/dumpstack.c +@@ -195,7 +195,6 @@ static void show_trace_log_lvl(struct ta + printk("%sCall Trace:\n", log_lvl); + + unwind_start(&state, task, regs, stack); +- stack = stack ? : get_stack_pointer(task, regs); + regs = unwind_get_entry_regs(&state, &partial); + + /* +@@ -214,9 +213,13 @@ static void show_trace_log_lvl(struct ta + * - hardirq stack + * - entry stack + */ +- for ( ; stack; stack = PTR_ALIGN(stack_info.next_sp, sizeof(long))) { ++ for (stack = stack ?: get_stack_pointer(task, regs); ++ stack; ++ stack = stack_info.next_sp) { + const char *stack_name; + ++ stack = PTR_ALIGN(stack, sizeof(long)); ++ + if (get_stack_info(stack, task, &stack_info, &visit_mask)) { + /* + * We weren't on a valid stack. It's possible that