]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
6.3-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sun, 28 May 2023 16:53:28 +0000 (17:53 +0100)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sun, 28 May 2023 16:53:28 +0000 (17:53 +0100)
added patches:
arm64-dts-imx8mn-var-som-fix-phy-detection-bug-by-adding-deassert-delay.patch
asoc-intel-avs-access-path-components-under-lock.patch
asoc-intel-avs-fix-declaration-of-enum-avs_channel_config.patch
asoc-intel-skylake-fix-declaration-of-enum-skl_ch_cfg.patch
cxl-move-cxl_await_media_ready-to-before-capacity-info-retrieval.patch
cxl-wait-memory_info_valid-before-access-memory-related-info.patch
firmware-arm_ffa-set-reserved-mbz-fields-to-zero-in-the-memory-descriptors.patch
forcedeth-fix-an-error-handling-path-in-nv_probe.patch
net-mlx5-collect-command-failures-data-only-for-known-commands.patch
net-mlx5-devcom-fix-error-flow-in-mlx5_devcom_register_device.patch
net-mlx5-devcom-serialize-devcom-registration.patch
net-mlx5-dr-check-force-loopback-rc-qp-capability-independently-from-roce.patch
net-mlx5-dr-fix-crc32-calculation-to-work-on-big-endian-be-cpus.patch
net-mlx5-fix-error-message-when-failing-to-allocate-device-memory.patch
net-mlx5-handle-pairing-of-e-switch-via-uplink-un-load-apis.patch
net-mlx5e-do-as-little-as-possible-in-napi-poll-when-budget-is-0.patch
net-mlx5e-fix-deadlock-in-tc-route-query-code.patch
net-mlx5e-fix-sq-wake-logic-in-ptp-napi_poll-context.patch
net-mlx5e-use-correct-encap-attribute-during-invalidation.patch
platform-mellanox-mlxbf-pmc-fix-sscanf-error-checking.patch
sctp-fix-an-issue-that-plpmtu-can-never-go-to-complete-state.patch
x86-show_trace_log_lvl-ensure-stack-pointer-is-aligned-again.patch

23 files changed:
queue-6.3/arm64-dts-imx8mn-var-som-fix-phy-detection-bug-by-adding-deassert-delay.patch [new file with mode: 0644]
queue-6.3/asoc-intel-avs-access-path-components-under-lock.patch [new file with mode: 0644]
queue-6.3/asoc-intel-avs-fix-declaration-of-enum-avs_channel_config.patch [new file with mode: 0644]
queue-6.3/asoc-intel-skylake-fix-declaration-of-enum-skl_ch_cfg.patch [new file with mode: 0644]
queue-6.3/cxl-move-cxl_await_media_ready-to-before-capacity-info-retrieval.patch [new file with mode: 0644]
queue-6.3/cxl-wait-memory_info_valid-before-access-memory-related-info.patch [new file with mode: 0644]
queue-6.3/firmware-arm_ffa-set-reserved-mbz-fields-to-zero-in-the-memory-descriptors.patch [new file with mode: 0644]
queue-6.3/forcedeth-fix-an-error-handling-path-in-nv_probe.patch [new file with mode: 0644]
queue-6.3/net-mlx5-collect-command-failures-data-only-for-known-commands.patch [new file with mode: 0644]
queue-6.3/net-mlx5-devcom-fix-error-flow-in-mlx5_devcom_register_device.patch [new file with mode: 0644]
queue-6.3/net-mlx5-devcom-serialize-devcom-registration.patch [new file with mode: 0644]
queue-6.3/net-mlx5-dr-check-force-loopback-rc-qp-capability-independently-from-roce.patch [new file with mode: 0644]
queue-6.3/net-mlx5-dr-fix-crc32-calculation-to-work-on-big-endian-be-cpus.patch [new file with mode: 0644]
queue-6.3/net-mlx5-fix-error-message-when-failing-to-allocate-device-memory.patch [new file with mode: 0644]
queue-6.3/net-mlx5-handle-pairing-of-e-switch-via-uplink-un-load-apis.patch [new file with mode: 0644]
queue-6.3/net-mlx5e-do-as-little-as-possible-in-napi-poll-when-budget-is-0.patch [new file with mode: 0644]
queue-6.3/net-mlx5e-fix-deadlock-in-tc-route-query-code.patch [new file with mode: 0644]
queue-6.3/net-mlx5e-fix-sq-wake-logic-in-ptp-napi_poll-context.patch [new file with mode: 0644]
queue-6.3/net-mlx5e-use-correct-encap-attribute-during-invalidation.patch [new file with mode: 0644]
queue-6.3/platform-mellanox-mlxbf-pmc-fix-sscanf-error-checking.patch [new file with mode: 0644]
queue-6.3/sctp-fix-an-issue-that-plpmtu-can-never-go-to-complete-state.patch [new file with mode: 0644]
queue-6.3/series
queue-6.3/x86-show_trace_log_lvl-ensure-stack-pointer-is-aligned-again.patch [new file with mode: 0644]

diff --git a/queue-6.3/arm64-dts-imx8mn-var-som-fix-phy-detection-bug-by-adding-deassert-delay.patch b/queue-6.3/arm64-dts-imx8mn-var-som-fix-phy-detection-bug-by-adding-deassert-delay.patch
new file mode 100644 (file)
index 0000000..c6c605c
--- /dev/null
@@ -0,0 +1,58 @@
+From f161cea5a20f3aeeb637a88ad1705fc2720b4d58 Mon Sep 17 00:00:00 2001
+From: Hugo Villeneuve <hvilleneuve@dimonoff.com>
+Date: Mon, 1 May 2023 13:05:32 -0400
+Subject: arm64: dts: imx8mn-var-som: fix PHY detection bug by adding deassert delay
+
+From: Hugo Villeneuve <hvilleneuve@dimonoff.com>
+
+commit f161cea5a20f3aeeb637a88ad1705fc2720b4d58 upstream.
+
+While testing the ethernet interface on a Variscite symphony carrier
+board using an imx8mn SOM with an onboard ADIN1300 PHY (EC hardware
+configuration), the ethernet PHY is not detected.
+
+The ADIN1300 datasheet indicate that the "Management interface
+active (t4)" state is reached at most 5ms after the reset signal is
+deasserted.
+
+The device tree in Variscite custom git repository uses the following
+property:
+
+    phy-reset-post-delay = <20>;
+
+Add a new MDIO property 'reset-deassert-us' of 20ms to have the same
+delay inside the ethphy node. Adding this property fixes the problem
+with the PHY detection.
+
+Note that this SOM can also have an Atheros AR8033 PHY. In this case,
+a 1ms deassert delay is sufficient. Add a comment to that effect.
+
+Fixes: ade0176dd8a0 ("arm64: dts: imx8mn-var-som: Add Variscite VAR-SOM-MX8MN System on Module")
+Signed-off-by: Hugo Villeneuve <hvilleneuve@dimonoff.com>
+Signed-off-by: Shawn Guo <shawnguo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/arm64/boot/dts/freescale/imx8mn-var-som.dtsi |    8 +++++++-
+ 1 file changed, 7 insertions(+), 1 deletion(-)
+
+--- a/arch/arm64/boot/dts/freescale/imx8mn-var-som.dtsi
++++ b/arch/arm64/boot/dts/freescale/imx8mn-var-som.dtsi
+@@ -98,11 +98,17 @@
+               #address-cells = <1>;
+               #size-cells = <0>;
+-              ethphy: ethernet-phy@4 {
++              ethphy: ethernet-phy@4 { /* AR8033 or ADIN1300 */
+                       compatible = "ethernet-phy-ieee802.3-c22";
+                       reg = <4>;
+                       reset-gpios = <&gpio1 9 GPIO_ACTIVE_LOW>;
+                       reset-assert-us = <10000>;
++                      /*
++                       * Deassert delay:
++                       * ADIN1300 requires 5ms.
++                       * AR8033   requires 1ms.
++                       */
++                      reset-deassert-us = <20000>;
+               };
+       };
+ };
diff --git a/queue-6.3/asoc-intel-avs-access-path-components-under-lock.patch b/queue-6.3/asoc-intel-avs-access-path-components-under-lock.patch
new file mode 100644 (file)
index 0000000..e8a3f08
--- /dev/null
@@ -0,0 +1,52 @@
+From d849996f7458042af803b7d15a181922834c5249 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Amadeusz=20S=C5=82awi=C5=84ski?=
+ <amadeuszx.slawinski@linux.intel.com>
+Date: Fri, 19 May 2023 22:17:06 +0200
+Subject: ASoC: Intel: avs: Access path components under lock
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Amadeusz Sławiński <amadeuszx.slawinski@linux.intel.com>
+
+commit d849996f7458042af803b7d15a181922834c5249 upstream.
+
+Path and its components should be accessed under lock to prevent
+problems with one thread modifying them while other tries to read.
+
+Fixes: c8c960c10971 ("ASoC: Intel: avs: APL-based platforms support")
+Reviewed-by: Cezary Rojewski <cezary.rojewski@intel.com>
+Signed-off-by: Amadeusz Sławiński <amadeuszx.slawinski@linux.intel.com>
+Link: https://lore.kernel.org/r/20230519201711.4073845-3-amadeuszx.slawinski@linux.intel.com
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ sound/soc/intel/avs/apl.c |    6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+--- a/sound/soc/intel/avs/apl.c
++++ b/sound/soc/intel/avs/apl.c
+@@ -169,6 +169,7 @@ static bool apl_lp_streaming(struct avs_
+ {
+       struct avs_path *path;
++      spin_lock(&adev->path_list_lock);
+       /* Any gateway without buffer allocated in LP area disqualifies D0IX. */
+       list_for_each_entry(path, &adev->path_list, node) {
+               struct avs_path_pipeline *ppl;
+@@ -188,11 +189,14 @@ static bool apl_lp_streaming(struct avs_
+                               if (cfg->copier.dma_type == INVALID_OBJECT_ID)
+                                       continue;
+-                              if (!mod->gtw_attrs.lp_buffer_alloc)
++                              if (!mod->gtw_attrs.lp_buffer_alloc) {
++                                      spin_unlock(&adev->path_list_lock);
+                                       return false;
++                              }
+                       }
+               }
+       }
++      spin_unlock(&adev->path_list_lock);
+       return true;
+ }
diff --git a/queue-6.3/asoc-intel-avs-fix-declaration-of-enum-avs_channel_config.patch b/queue-6.3/asoc-intel-avs-fix-declaration-of-enum-avs_channel_config.patch
new file mode 100644 (file)
index 0000000..1faf43c
--- /dev/null
@@ -0,0 +1,36 @@
+From 1cf036deebcdec46d6348842bd2f8931202fd4cd Mon Sep 17 00:00:00 2001
+From: Cezary Rojewski <cezary.rojewski@intel.com>
+Date: Fri, 19 May 2023 22:17:08 +0200
+Subject: ASoC: Intel: avs: Fix declaration of enum avs_channel_config
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Cezary Rojewski <cezary.rojewski@intel.com>
+
+commit 1cf036deebcdec46d6348842bd2f8931202fd4cd upstream.
+
+Constant 'C4_CHANNEL' does not exist on the firmware side. Value 0xC is
+reserved for 'C7_1' instead.
+
+Fixes: 580a5912d1fe ("ASoC: Intel: avs: Declare module configuration types")
+Signed-off-by: Cezary Rojewski <cezary.rojewski@intel.com>
+Signed-off-by: Amadeusz Sławiński <amadeuszx.slawinski@linux.intel.com>
+Link: https://lore.kernel.org/r/20230519201711.4073845-5-amadeuszx.slawinski@linux.intel.com
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ sound/soc/intel/avs/messages.h |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/sound/soc/intel/avs/messages.h
++++ b/sound/soc/intel/avs/messages.h
+@@ -619,7 +619,7 @@ enum avs_channel_config {
+       AVS_CHANNEL_CONFIG_DUAL_MONO = 9,
+       AVS_CHANNEL_CONFIG_I2S_DUAL_STEREO_0 = 10,
+       AVS_CHANNEL_CONFIG_I2S_DUAL_STEREO_1 = 11,
+-      AVS_CHANNEL_CONFIG_4_CHANNEL = 12,
++      AVS_CHANNEL_CONFIG_7_1 = 12,
+       AVS_CHANNEL_CONFIG_INVALID
+ };
diff --git a/queue-6.3/asoc-intel-skylake-fix-declaration-of-enum-skl_ch_cfg.patch b/queue-6.3/asoc-intel-skylake-fix-declaration-of-enum-skl_ch_cfg.patch
new file mode 100644 (file)
index 0000000..15d2d3c
--- /dev/null
@@ -0,0 +1,37 @@
+From 95109657471311601b98e71f03d0244f48dc61bb Mon Sep 17 00:00:00 2001
+From: Cezary Rojewski <cezary.rojewski@intel.com>
+Date: Fri, 19 May 2023 22:17:07 +0200
+Subject: ASoC: Intel: Skylake: Fix declaration of enum skl_ch_cfg
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Cezary Rojewski <cezary.rojewski@intel.com>
+
+commit 95109657471311601b98e71f03d0244f48dc61bb upstream.
+
+Constant 'C4_CHANNEL' does not exist on the firmware side. Value 0xC is
+reserved for 'C7_1' instead.
+
+Fixes: 04afbbbb1cba ("ASoC: Intel: Skylake: Update the topology interface structure")
+Signed-off-by: Cezary Rojewski <cezary.rojewski@intel.com>
+Signed-off-by: Amadeusz Sławiński <amadeuszx.slawinski@linux.intel.com>
+Link: https://lore.kernel.org/r/20230519201711.4073845-4-amadeuszx.slawinski@linux.intel.com
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/uapi/sound/skl-tplg-interface.h |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/include/uapi/sound/skl-tplg-interface.h
++++ b/include/uapi/sound/skl-tplg-interface.h
+@@ -66,7 +66,8 @@ enum skl_ch_cfg {
+       SKL_CH_CFG_DUAL_MONO = 9,
+       SKL_CH_CFG_I2S_DUAL_STEREO_0 = 10,
+       SKL_CH_CFG_I2S_DUAL_STEREO_1 = 11,
+-      SKL_CH_CFG_4_CHANNEL = 12,
++      SKL_CH_CFG_7_1 = 12,
++      SKL_CH_CFG_4_CHANNEL = SKL_CH_CFG_7_1,
+       SKL_CH_CFG_INVALID
+ };
diff --git a/queue-6.3/cxl-move-cxl_await_media_ready-to-before-capacity-info-retrieval.patch b/queue-6.3/cxl-move-cxl_await_media_ready-to-before-capacity-info-retrieval.patch
new file mode 100644 (file)
index 0000000..b623015
--- /dev/null
@@ -0,0 +1,145 @@
+From e764f12208b99ac7892c4e3f6bf88d71ca71036f Mon Sep 17 00:00:00 2001
+From: Dave Jiang <dave.jiang@intel.com>
+Date: Thu, 18 May 2023 16:38:20 -0700
+Subject: cxl: Move cxl_await_media_ready() to before capacity info retrieval
+
+From: Dave Jiang <dave.jiang@intel.com>
+
+commit e764f12208b99ac7892c4e3f6bf88d71ca71036f upstream.
+
+Move cxl_await_media_ready() to cxl_pci probe before driver starts issuing
+IDENTIFY and retrieving memory device information to ensure that the
+device is ready to provide the information. Allow cxl_pci_probe() to succeed
+even if media is not ready. Cache the media failure in cxlds and don't ask
+the device for any media information.
+
+The rationale for proceeding in the !media_ready case is to allow for
+mailbox operations to interrogate and/or remediate the device. After
+media is repaired then rebinding the cxl_pci driver is expected to
+restart the capacity scan.
+
+Suggested-by: Dan Williams <dan.j.williams@intel.com>
+Fixes: b39cb1052a5c ("cxl/mem: Register CXL memX devices")
+Reviewed-by: Ira Weiny <ira.weiny@intel.com>
+Signed-off-by: Dave Jiang <dave.jiang@intel.com>
+Link: https://lore.kernel.org/r/168445310026.3251520.8124296540679268206.stgit@djiang5-mobl3
+[djbw: fixup cxl_test]
+Signed-off-by: Dan Williams <dan.j.williams@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/cxl/core/mbox.c      |   15 ++++++++++-----
+ drivers/cxl/cxlmem.h         |    2 ++
+ drivers/cxl/mem.c            |    3 +++
+ drivers/cxl/pci.c            |    6 ++++++
+ drivers/cxl/port.c           |    6 ------
+ tools/testing/cxl/test/mem.c |    1 +
+ 6 files changed, 22 insertions(+), 11 deletions(-)
+
+--- a/drivers/cxl/core/mbox.c
++++ b/drivers/cxl/core/mbox.c
+@@ -984,7 +984,7 @@ static int cxl_mem_get_partition_info(st
+  * cxl_dev_state_identify() - Send the IDENTIFY command to the device.
+  * @cxlds: The device data for the operation
+  *
+- * Return: 0 if identify was executed successfully.
++ * Return: 0 if identify was executed successfully or media not ready.
+  *
+  * This will dispatch the identify command to the device and on success populate
+  * structures to be exported to sysfs.
+@@ -996,6 +996,9 @@ int cxl_dev_state_identify(struct cxl_de
+       struct cxl_mbox_cmd mbox_cmd;
+       int rc;
++      if (!cxlds->media_ready)
++              return 0;
++
+       mbox_cmd = (struct cxl_mbox_cmd) {
+               .opcode = CXL_MBOX_OP_IDENTIFY,
+               .size_out = sizeof(id),
+@@ -1065,10 +1068,12 @@ int cxl_mem_create_range_info(struct cxl
+                                  cxlds->persistent_only_bytes, "pmem");
+       }
+-      rc = cxl_mem_get_partition_info(cxlds);
+-      if (rc) {
+-              dev_err(dev, "Failed to query partition information\n");
+-              return rc;
++      if (cxlds->media_ready) {
++              rc = cxl_mem_get_partition_info(cxlds);
++              if (rc) {
++                      dev_err(dev, "Failed to query partition information\n");
++                      return rc;
++              }
+       }
+       rc = add_dpa_res(dev, &cxlds->dpa_res, &cxlds->ram_res, 0,
+--- a/drivers/cxl/cxlmem.h
++++ b/drivers/cxl/cxlmem.h
+@@ -227,6 +227,7 @@ struct cxl_event_state {
+  * @regs: Parsed register blocks
+  * @cxl_dvsec: Offset to the PCIe device DVSEC
+  * @rcd: operating in RCD mode (CXL 3.0 9.11.8 CXL Devices Attached to an RCH)
++ * @media_ready: Indicate whether the device media is usable
+  * @payload_size: Size of space for payload
+  *                (CXL 2.0 8.2.8.4.3 Mailbox Capabilities Register)
+  * @lsa_size: Size of Label Storage Area
+@@ -264,6 +265,7 @@ struct cxl_dev_state {
+       int cxl_dvsec;
+       bool rcd;
++      bool media_ready;
+       size_t payload_size;
+       size_t lsa_size;
+       struct mutex mbox_mutex; /* Protects device mailbox and firmware */
+--- a/drivers/cxl/mem.c
++++ b/drivers/cxl/mem.c
+@@ -104,6 +104,9 @@ static int cxl_mem_probe(struct device *
+       struct dentry *dentry;
+       int rc;
++      if (!cxlds->media_ready)
++              return -EBUSY;
++
+       /*
+        * Someone is trying to reattach this device after it lost its port
+        * connection (an endpoint port previously registered by this memdev was
+--- a/drivers/cxl/pci.c
++++ b/drivers/cxl/pci.c
+@@ -757,6 +757,12 @@ static int cxl_pci_probe(struct pci_dev
+       if (rc)
+               dev_dbg(&pdev->dev, "Failed to map RAS capability.\n");
++      rc = cxl_await_media_ready(cxlds);
++      if (rc == 0)
++              cxlds->media_ready = true;
++      else
++              dev_warn(&pdev->dev, "Media not active (%d)\n", rc);
++
+       rc = cxl_pci_setup_mailbox(cxlds);
+       if (rc)
+               return rc;
+--- a/drivers/cxl/port.c
++++ b/drivers/cxl/port.c
+@@ -117,12 +117,6 @@ static int cxl_endpoint_port_probe(struc
+       if (rc)
+               return rc;
+-      rc = cxl_await_media_ready(cxlds);
+-      if (rc) {
+-              dev_err(&port->dev, "Media not active (%d)\n", rc);
+-              return rc;
+-      }
+-
+       rc = devm_cxl_enumerate_decoders(cxlhdm, &info);
+       if (rc)
+               return rc;
+--- a/tools/testing/cxl/test/mem.c
++++ b/tools/testing/cxl/test/mem.c
+@@ -1010,6 +1010,7 @@ static int cxl_mock_mem_probe(struct pla
+       if (rc)
+               return rc;
++      cxlds->media_ready = true;
+       rc = cxl_dev_state_identify(cxlds);
+       if (rc)
+               return rc;
diff --git a/queue-6.3/cxl-wait-memory_info_valid-before-access-memory-related-info.patch b/queue-6.3/cxl-wait-memory_info_valid-before-access-memory-related-info.patch
new file mode 100644 (file)
index 0000000..b776a4f
--- /dev/null
@@ -0,0 +1,148 @@
+From ce17ad0d54985e2595a3e615fda31df61808a08c Mon Sep 17 00:00:00 2001
+From: Dave Jiang <dave.jiang@intel.com>
+Date: Thu, 18 May 2023 14:54:34 -0700
+Subject: cxl: Wait Memory_Info_Valid before access memory related info
+
+From: Dave Jiang <dave.jiang@intel.com>
+
+commit ce17ad0d54985e2595a3e615fda31df61808a08c upstream.
+
+The Memory_Info_Valid bit (CXL 3.0 8.1.3.8.2) indicates that the CXL
+Range Size High and Size Low registers are valid. The bit must be set
+within 1 second of reset deassertion to the device. Check valid bit
+before we check the Memory_Active bit when waiting for
+cxl_await_media_ready() to ensure that the memory info is valid for
+consumption. Also ensures both DVSEC ranges 1 and 2 are ready if DVSEC
+Capability indicates they are both supported.
+
+Fixes: 523e594d9cc0 ("cxl/pci: Implement wait for media active")
+Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
+Signed-off-by: Dave Jiang <dave.jiang@intel.com>
+Link: https://lore.kernel.org/r/168444687469.3134781.11033518965387297327.stgit@djiang5-mobl3
+Signed-off-by: Dan Williams <dan.j.williams@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/cxl/core/pci.c |   85 +++++++++++++++++++++++++++++++++++++++++++------
+ drivers/cxl/cxlpci.h   |    2 +
+ 2 files changed, 78 insertions(+), 9 deletions(-)
+
+--- a/drivers/cxl/core/pci.c
++++ b/drivers/cxl/core/pci.c
+@@ -101,23 +101,57 @@ int devm_cxl_port_enumerate_dports(struc
+ }
+ EXPORT_SYMBOL_NS_GPL(devm_cxl_port_enumerate_dports, CXL);
+-/*
+- * Wait up to @media_ready_timeout for the device to report memory
+- * active.
+- */
+-int cxl_await_media_ready(struct cxl_dev_state *cxlds)
++static int cxl_dvsec_mem_range_valid(struct cxl_dev_state *cxlds, int id)
++{
++      struct pci_dev *pdev = to_pci_dev(cxlds->dev);
++      int d = cxlds->cxl_dvsec;
++      bool valid = false;
++      int rc, i;
++      u32 temp;
++
++      if (id > CXL_DVSEC_RANGE_MAX)
++              return -EINVAL;
++
++      /* Check MEM INFO VALID bit first, give up after 1s */
++      i = 1;
++      do {
++              rc = pci_read_config_dword(pdev,
++                                         d + CXL_DVSEC_RANGE_SIZE_LOW(id),
++                                         &temp);
++              if (rc)
++                      return rc;
++
++              valid = FIELD_GET(CXL_DVSEC_MEM_INFO_VALID, temp);
++              if (valid)
++                      break;
++              msleep(1000);
++      } while (i--);
++
++      if (!valid) {
++              dev_err(&pdev->dev,
++                      "Timeout awaiting memory range %d valid after 1s.\n",
++                      id);
++              return -ETIMEDOUT;
++      }
++
++      return 0;
++}
++
++static int cxl_dvsec_mem_range_active(struct cxl_dev_state *cxlds, int id)
+ {
+       struct pci_dev *pdev = to_pci_dev(cxlds->dev);
+       int d = cxlds->cxl_dvsec;
+       bool active = false;
+-      u64 md_status;
+       int rc, i;
++      u32 temp;
+-      for (i = media_ready_timeout; i; i--) {
+-              u32 temp;
++      if (id > CXL_DVSEC_RANGE_MAX)
++              return -EINVAL;
++      /* Check MEM ACTIVE bit, up to 60s timeout by default */
++      for (i = media_ready_timeout; i; i--) {
+               rc = pci_read_config_dword(
+-                      pdev, d + CXL_DVSEC_RANGE_SIZE_LOW(0), &temp);
++                      pdev, d + CXL_DVSEC_RANGE_SIZE_LOW(id), &temp);
+               if (rc)
+                       return rc;
+@@ -134,6 +168,39 @@ int cxl_await_media_ready(struct cxl_dev
+               return -ETIMEDOUT;
+       }
++      return 0;
++}
++
++/*
++ * Wait up to @media_ready_timeout for the device to report memory
++ * active.
++ */
++int cxl_await_media_ready(struct cxl_dev_state *cxlds)
++{
++      struct pci_dev *pdev = to_pci_dev(cxlds->dev);
++      int d = cxlds->cxl_dvsec;
++      int rc, i, hdm_count;
++      u64 md_status;
++      u16 cap;
++
++      rc = pci_read_config_word(pdev,
++                                d + CXL_DVSEC_CAP_OFFSET, &cap);
++      if (rc)
++              return rc;
++
++      hdm_count = FIELD_GET(CXL_DVSEC_HDM_COUNT_MASK, cap);
++      for (i = 0; i < hdm_count; i++) {
++              rc = cxl_dvsec_mem_range_valid(cxlds, i);
++              if (rc)
++                      return rc;
++      }
++
++      for (i = 0; i < hdm_count; i++) {
++              rc = cxl_dvsec_mem_range_active(cxlds, i);
++              if (rc)
++                      return rc;
++      }
++
+       md_status = readq(cxlds->regs.memdev + CXLMDEV_STATUS_OFFSET);
+       if (!CXLMDEV_READY(md_status))
+               return -EIO;
+--- a/drivers/cxl/cxlpci.h
++++ b/drivers/cxl/cxlpci.h
+@@ -31,6 +31,8 @@
+ #define   CXL_DVSEC_RANGE_BASE_LOW(i) (0x24 + (i * 0x10))
+ #define     CXL_DVSEC_MEM_BASE_LOW_MASK       GENMASK(31, 28)
++#define CXL_DVSEC_RANGE_MAX           2
++
+ /* CXL 2.0 8.1.4: Non-CXL Function Map DVSEC */
+ #define CXL_DVSEC_FUNCTION_MAP                                        2
diff --git a/queue-6.3/firmware-arm_ffa-set-reserved-mbz-fields-to-zero-in-the-memory-descriptors.patch b/queue-6.3/firmware-arm_ffa-set-reserved-mbz-fields-to-zero-in-the-memory-descriptors.patch
new file mode 100644 (file)
index 0000000..38a9b19
--- /dev/null
@@ -0,0 +1,59 @@
+From 111a833dc5cbef3d05b2a796a7e23cb7f6ff2192 Mon Sep 17 00:00:00 2001
+From: Sudeep Holla <sudeep.holla@arm.com>
+Date: Wed, 3 May 2023 14:12:52 +0100
+Subject: firmware: arm_ffa: Set reserved/MBZ fields to zero in the memory descriptors
+
+From: Sudeep Holla <sudeep.holla@arm.com>
+
+commit 111a833dc5cbef3d05b2a796a7e23cb7f6ff2192 upstream.
+
+The transmit buffers allocated by the driver can be used to transmit data
+by any messages/commands needing the buffer. However, it is not guaranteed
+to have been zero-ed before every new transmission and hence it will just
+contain residual value from the previous transmission. There are several
+reserved fields in the memory descriptors that must be zero(MBZ). The
+receiver can reject the transmission if any such MBZ fields are non-zero.
+
+While we can set the whole page to zero, it is not optimal as most of the
+fields get initialised to the value required for the current transmission.
+
+So, just set the reserved/MBZ fields to zero in the memory descriptors
+explicitly to honour the requirement and keep the receiver happy.
+
+Fixes: cc2195fe536c ("firmware: arm_ffa: Add support for MEM_* interfaces")
+Reported-by: Marc Bonnici <marc.bonnici@arm.com>
+Link: https://lore.kernel.org/r/20230503131252.12585-1-sudeep.holla@arm.com
+Signed-off-by: Sudeep Holla <sudeep.holla@arm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/firmware/arm_ffa/driver.c |    6 ++++++
+ 1 file changed, 6 insertions(+)
+
+--- a/drivers/firmware/arm_ffa/driver.c
++++ b/drivers/firmware/arm_ffa/driver.c
+@@ -420,12 +420,17 @@ ffa_setup_and_transmit(u32 func_id, void
+               ep_mem_access->receiver = args->attrs[idx].receiver;
+               ep_mem_access->attrs = args->attrs[idx].attrs;
+               ep_mem_access->composite_off = COMPOSITE_OFFSET(args->nattrs);
++              ep_mem_access->flag = 0;
++              ep_mem_access->reserved = 0;
+       }
++      mem_region->reserved_0 = 0;
++      mem_region->reserved_1 = 0;
+       mem_region->ep_count = args->nattrs;
+       composite = buffer + COMPOSITE_OFFSET(args->nattrs);
+       composite->total_pg_cnt = ffa_get_num_pages_sg(args->sg);
+       composite->addr_range_cnt = num_entries;
++      composite->reserved = 0;
+       length = COMPOSITE_CONSTITUENTS_OFFSET(args->nattrs, num_entries);
+       frag_len = COMPOSITE_CONSTITUENTS_OFFSET(args->nattrs, 0);
+@@ -460,6 +465,7 @@ ffa_setup_and_transmit(u32 func_id, void
+               constituents->address = sg_phys(args->sg);
+               constituents->pg_cnt = args->sg->length / FFA_PAGE_SIZE;
++              constituents->reserved = 0;
+               constituents++;
+               frag_len += sizeof(struct ffa_mem_region_addr_range);
+       } while ((args->sg = sg_next(args->sg)));
diff --git a/queue-6.3/forcedeth-fix-an-error-handling-path-in-nv_probe.patch b/queue-6.3/forcedeth-fix-an-error-handling-path-in-nv_probe.patch
new file mode 100644 (file)
index 0000000..d829e15
--- /dev/null
@@ -0,0 +1,35 @@
+From 5b17a4971d3b2a073f4078dd65331efbe35baa2d Mon Sep 17 00:00:00 2001
+From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
+Date: Sat, 20 May 2023 10:30:17 +0200
+Subject: forcedeth: Fix an error handling path in nv_probe()
+
+From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
+
+commit 5b17a4971d3b2a073f4078dd65331efbe35baa2d upstream.
+
+If an error occures after calling nv_mgmt_acquire_sema(), it should be
+undone with a corresponding nv_mgmt_release_sema() call.
+
+Add it in the error handling path of the probe as already done in the
+remove function.
+
+Fixes: cac1c52c3621 ("forcedeth: mgmt unit interface")
+Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
+Acked-by: Zhu Yanjun <zyjzyj2000@gmail.com>
+Link: https://lore.kernel.org/r/355e9a7d351b32ad897251b6f81b5886fcdc6766.1684571393.git.christophe.jaillet@wanadoo.fr
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/nvidia/forcedeth.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/net/ethernet/nvidia/forcedeth.c
++++ b/drivers/net/ethernet/nvidia/forcedeth.c
+@@ -6138,6 +6138,7 @@ static int nv_probe(struct pci_dev *pci_
+       return 0;
+ out_error:
++      nv_mgmt_release_sema(dev);
+       if (phystate_orig)
+               writel(phystate|NVREG_ADAPTCTL_RUNNING, base + NvRegAdapterControl);
+ out_freering:
diff --git a/queue-6.3/net-mlx5-collect-command-failures-data-only-for-known-commands.patch b/queue-6.3/net-mlx5-collect-command-failures-data-only-for-known-commands.patch
new file mode 100644 (file)
index 0000000..b883494
--- /dev/null
@@ -0,0 +1,39 @@
+From 2a0a935fb64ee8af253b9c6133bb6702fb152ac2 Mon Sep 17 00:00:00 2001
+From: Shay Drory <shayd@nvidia.com>
+Date: Tue, 2 May 2023 11:03:53 +0300
+Subject: net/mlx5: Collect command failures data only for known commands
+
+From: Shay Drory <shayd@nvidia.com>
+
+commit 2a0a935fb64ee8af253b9c6133bb6702fb152ac2 upstream.
+
+DEVX can issue a general command, which is not used by mlx5 driver.
+In case such command is failed, mlx5 is trying to collect the failure
+data, However, mlx5 doesn't create a storage for this command, since
+mlx5 doesn't use it. This lead to array-index-out-of-bounds error.
+
+Fix it by checking whether the command is known before collecting the
+failure data.
+
+Fixes: 34f46ae0d4b3 ("net/mlx5: Add command failures data to debugfs")
+Signed-off-by: Shay Drory <shayd@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/cmd.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+@@ -1920,9 +1920,10 @@ static void mlx5_cmd_err_trace(struct ml
+ static void cmd_status_log(struct mlx5_core_dev *dev, u16 opcode, u8 status,
+                          u32 syndrome, int err)
+ {
++      const char *namep = mlx5_command_str(opcode);
+       struct mlx5_cmd_stats *stats;
+-      if (!err)
++      if (!err || !(strcmp(namep, "unknown command opcode")))
+               return;
+       stats = &dev->cmd.stats[opcode];
diff --git a/queue-6.3/net-mlx5-devcom-fix-error-flow-in-mlx5_devcom_register_device.patch b/queue-6.3/net-mlx5-devcom-fix-error-flow-in-mlx5_devcom_register_device.patch
new file mode 100644 (file)
index 0000000..e723e9d
--- /dev/null
@@ -0,0 +1,35 @@
+From af87194352cad882d787d06fb7efa714acd95427 Mon Sep 17 00:00:00 2001
+From: Shay Drory <shayd@nvidia.com>
+Date: Tue, 2 May 2023 13:35:11 +0300
+Subject: net/mlx5: Devcom, fix error flow in mlx5_devcom_register_device
+
+From: Shay Drory <shayd@nvidia.com>
+
+commit af87194352cad882d787d06fb7efa714acd95427 upstream.
+
+In case devcom allocation is failed, mlx5 is always freeing the priv.
+However, this priv might have been allocated by a different thread,
+and freeing it might lead to use-after-free bugs.
+Fix it by freeing the priv only in case it was allocated by the
+running thread.
+
+Fixes: fadd59fc50d0 ("net/mlx5: Introduce inter-device communication mechanism")
+Signed-off-by: Shay Drory <shayd@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c
+@@ -112,7 +112,8 @@ struct mlx5_devcom *mlx5_devcom_register
+       priv->devs[idx] = dev;
+       devcom = mlx5_devcom_alloc(priv, idx);
+       if (!devcom) {
+-              kfree(priv);
++              if (new_priv)
++                      kfree(priv);
+               return ERR_PTR(-ENOMEM);
+       }
diff --git a/queue-6.3/net-mlx5-devcom-serialize-devcom-registration.patch b/queue-6.3/net-mlx5-devcom-serialize-devcom-registration.patch
new file mode 100644 (file)
index 0000000..2b011cc
--- /dev/null
@@ -0,0 +1,95 @@
+From 1f893f57a3bf9fe1f4bcb25b55aea7f7f9712fe7 Mon Sep 17 00:00:00 2001
+From: Shay Drory <shayd@nvidia.com>
+Date: Tue, 2 May 2023 13:36:42 +0300
+Subject: net/mlx5: Devcom, serialize devcom registration
+
+From: Shay Drory <shayd@nvidia.com>
+
+commit 1f893f57a3bf9fe1f4bcb25b55aea7f7f9712fe7 upstream.
+
+From one hand, mlx5 driver is allowing to probe PFs in parallel.
+From the other hand, devcom, which is a share resource between PFs, is
+registered without any lock. This might resulted in memory problems.
+
+Hence, use the global mlx5_dev_list_lock in order to serialize devcom
+registration.
+
+Fixes: fadd59fc50d0 ("net/mlx5: Introduce inter-device communication mechanism")
+Signed-off-by: Shay Drory <shayd@nvidia.com>
+Reviewed-by: Mark Bloch <mbloch@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c |   19 ++++++++++++++-----
+ 1 file changed, 14 insertions(+), 5 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c
+@@ -3,6 +3,7 @@
+ #include <linux/mlx5/vport.h>
+ #include "lib/devcom.h"
++#include "mlx5_core.h"
+ static LIST_HEAD(devcom_list);
+@@ -77,6 +78,7 @@ struct mlx5_devcom *mlx5_devcom_register
+       if (MLX5_CAP_GEN(dev, num_lag_ports) != MLX5_DEVCOM_PORTS_SUPPORTED)
+               return NULL;
++      mlx5_dev_list_lock();
+       sguid0 = mlx5_query_nic_system_image_guid(dev);
+       list_for_each_entry(iter, &devcom_list, list) {
+               struct mlx5_core_dev *tmp_dev = NULL;
+@@ -102,8 +104,10 @@ struct mlx5_devcom *mlx5_devcom_register
+       if (!priv) {
+               priv = mlx5_devcom_list_alloc();
+-              if (!priv)
+-                      return ERR_PTR(-ENOMEM);
++              if (!priv) {
++                      devcom = ERR_PTR(-ENOMEM);
++                      goto out;
++              }
+               idx = 0;
+               new_priv = true;
+@@ -114,12 +118,14 @@ struct mlx5_devcom *mlx5_devcom_register
+       if (!devcom) {
+               if (new_priv)
+                       kfree(priv);
+-              return ERR_PTR(-ENOMEM);
++              devcom = ERR_PTR(-ENOMEM);
++              goto out;
+       }
+       if (new_priv)
+               list_add(&priv->list, &devcom_list);
+-
++out:
++      mlx5_dev_list_unlock();
+       return devcom;
+ }
+@@ -132,6 +138,7 @@ void mlx5_devcom_unregister_device(struc
+       if (IS_ERR_OR_NULL(devcom))
+               return;
++      mlx5_dev_list_lock();
+       priv = devcom->priv;
+       priv->devs[devcom->idx] = NULL;
+@@ -142,10 +149,12 @@ void mlx5_devcom_unregister_device(struc
+                       break;
+       if (i != MLX5_DEVCOM_PORTS_SUPPORTED)
+-              return;
++              goto out;
+       list_del(&priv->list);
+       kfree(priv);
++out:
++      mlx5_dev_list_unlock();
+ }
+ void mlx5_devcom_register_component(struct mlx5_devcom *devcom,
diff --git a/queue-6.3/net-mlx5-dr-check-force-loopback-rc-qp-capability-independently-from-roce.patch b/queue-6.3/net-mlx5-dr-check-force-loopback-rc-qp-capability-independently-from-roce.patch
new file mode 100644 (file)
index 0000000..2809a5b
--- /dev/null
@@ -0,0 +1,59 @@
+From c7dd225bc224726c22db08e680bf787f60ebdee3 Mon Sep 17 00:00:00 2001
+From: Yevgeny Kliteynik <kliteyn@nvidia.com>
+Date: Sun, 2 Apr 2023 17:14:10 +0300
+Subject: net/mlx5: DR, Check force-loopback RC QP capability independently from RoCE
+
+From: Yevgeny Kliteynik <kliteyn@nvidia.com>
+
+commit c7dd225bc224726c22db08e680bf787f60ebdee3 upstream.
+
+SW Steering uses RC QP for writing STEs to ICM. This writingis done in LB
+(loopback), and FL (force-loopback) QP is preferred for performance. FL is
+available when RoCE is enabled or disabled based on RoCE caps.
+This patch adds reading of FL capability from HCA caps in addition to the
+existing reading from RoCE caps, thus fixing the case where we didn't
+have loopback enabled when RoCE was disabled.
+
+Fixes: 7304d603a57a ("net/mlx5: DR, Add support for force-loopback QP")
+Signed-off-by: Itamar Gozlan <igozlan@nvidia.com>
+Signed-off-by: Yevgeny Kliteynik <kliteyn@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c |    4 +++-
+ include/linux/mlx5/mlx5_ifc.h                             |    4 +++-
+ 2 files changed, 6 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c
+@@ -117,6 +117,8 @@ int mlx5dr_cmd_query_device(struct mlx5_
+       caps->gvmi              = MLX5_CAP_GEN(mdev, vhca_id);
+       caps->flex_protocols    = MLX5_CAP_GEN(mdev, flex_parser_protocols);
+       caps->sw_format_ver     = MLX5_CAP_GEN(mdev, steering_format_version);
++      caps->roce_caps.fl_rc_qp_when_roce_disabled =
++              MLX5_CAP_GEN(mdev, fl_rc_qp_when_roce_disabled);
+       if (MLX5_CAP_GEN(mdev, roce)) {
+               err = dr_cmd_query_nic_vport_roce_en(mdev, 0, &roce_en);
+@@ -124,7 +126,7 @@ int mlx5dr_cmd_query_device(struct mlx5_
+                       return err;
+               caps->roce_caps.roce_en = roce_en;
+-              caps->roce_caps.fl_rc_qp_when_roce_disabled =
++              caps->roce_caps.fl_rc_qp_when_roce_disabled |=
+                       MLX5_CAP_ROCE(mdev, fl_rc_qp_when_roce_disabled);
+               caps->roce_caps.fl_rc_qp_when_roce_enabled =
+                       MLX5_CAP_ROCE(mdev, fl_rc_qp_when_roce_enabled);
+--- a/include/linux/mlx5/mlx5_ifc.h
++++ b/include/linux/mlx5/mlx5_ifc.h
+@@ -1679,7 +1679,9 @@ struct mlx5_ifc_cmd_hca_cap_bits {
+       u8         rc[0x1];
+       u8         uar_4k[0x1];
+-      u8         reserved_at_241[0x9];
++      u8         reserved_at_241[0x7];
++      u8         fl_rc_qp_when_roce_disabled[0x1];
++      u8         regexp_params[0x1];
+       u8         uar_sz[0x6];
+       u8         port_selection_cap[0x1];
+       u8         reserved_at_248[0x1];
diff --git a/queue-6.3/net-mlx5-dr-fix-crc32-calculation-to-work-on-big-endian-be-cpus.patch b/queue-6.3/net-mlx5-dr-fix-crc32-calculation-to-work-on-big-endian-be-cpus.patch
new file mode 100644 (file)
index 0000000..90793f4
--- /dev/null
@@ -0,0 +1,38 @@
+From 1e5daf5565b61a96e570865091589afc9156e3d3 Mon Sep 17 00:00:00 2001
+From: Erez Shitrit <erezsh@nvidia.com>
+Date: Thu, 9 Mar 2023 16:43:15 +0200
+Subject: net/mlx5: DR, Fix crc32 calculation to work on big-endian (BE) CPUs
+
+From: Erez Shitrit <erezsh@nvidia.com>
+
+commit 1e5daf5565b61a96e570865091589afc9156e3d3 upstream.
+
+When calculating crc for hash index we use the function crc32 that
+calculates for little-endian (LE) arch.
+Then we convert it to network endianness using htonl(), but it's wrong
+to do the conversion in BE archs since the crc32 value is already LE.
+
+The solution is to switch the bytes from the crc result for all types
+of arc.
+
+Fixes: 40416d8ede65 ("net/mlx5: DR, Replace CRC32 implementation to use kernel lib")
+Signed-off-by: Erez Shitrit <erezsh@nvidia.com>
+Reviewed-by: Alex Vesker <valex@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c
+@@ -15,7 +15,8 @@ static u32 dr_ste_crc32_calc(const void
+ {
+       u32 crc = crc32(0, input_data, length);
+-      return (__force u32)htonl(crc);
++      return (__force u32)((crc >> 24) & 0xff) | ((crc << 8) & 0xff0000) |
++                          ((crc >> 8) & 0xff00) | ((crc << 24) & 0xff000000);
+ }
+ bool mlx5dr_ste_supp_ttl_cs_recalc(struct mlx5dr_cmd_caps *caps)
diff --git a/queue-6.3/net-mlx5-fix-error-message-when-failing-to-allocate-device-memory.patch b/queue-6.3/net-mlx5-fix-error-message-when-failing-to-allocate-device-memory.patch
new file mode 100644 (file)
index 0000000..78c3ad2
--- /dev/null
@@ -0,0 +1,30 @@
+From a65735148e0328f80c0f72f9f8d2f609bfcf4aff Mon Sep 17 00:00:00 2001
+From: Roi Dayan <roid@nvidia.com>
+Date: Mon, 1 May 2023 14:37:56 +0300
+Subject: net/mlx5: Fix error message when failing to allocate device memory
+
+From: Roi Dayan <roid@nvidia.com>
+
+commit a65735148e0328f80c0f72f9f8d2f609bfcf4aff upstream.
+
+Fix spacing for the error and also the correct error code pointer.
+
+Fixes: c9b9dcb430b3 ("net/mlx5: Move device memory management to mlx5_core")
+Signed-off-by: Roi Dayan <roid@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/main.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
+@@ -1041,7 +1041,7 @@ static int mlx5_init_once(struct mlx5_co
+       dev->dm = mlx5_dm_create(dev);
+       if (IS_ERR(dev->dm))
+-              mlx5_core_warn(dev, "Failed to init device memory%d\n", err);
++              mlx5_core_warn(dev, "Failed to init device memory %ld\n", PTR_ERR(dev->dm));
+       dev->tracer = mlx5_fw_tracer_create(dev);
+       dev->hv_vhca = mlx5_hv_vhca_create(dev);
diff --git a/queue-6.3/net-mlx5-handle-pairing-of-e-switch-via-uplink-un-load-apis.patch b/queue-6.3/net-mlx5-handle-pairing-of-e-switch-via-uplink-un-load-apis.patch
new file mode 100644 (file)
index 0000000..7216228
--- /dev/null
@@ -0,0 +1,154 @@
+From 2be5bd42a5bba1a05daedc86cf0e248210009669 Mon Sep 17 00:00:00 2001
+From: Shay Drory <shayd@nvidia.com>
+Date: Mon, 20 Mar 2023 13:07:53 +0200
+Subject: net/mlx5: Handle pairing of E-switch via uplink un/load APIs
+
+From: Shay Drory <shayd@nvidia.com>
+
+commit 2be5bd42a5bba1a05daedc86cf0e248210009669 upstream.
+
+In case user switch a device from switchdev mode to legacy mode, mlx5
+first unpair the E-switch and afterwards unload the uplink vport.
+From the other hand, in case user remove or reload a device, mlx5
+first unload the uplink vport and afterwards unpair the E-switch.
+
+The latter is causing a bug[1], hence, handle pairing of E-switch as
+part of uplink un/load APIs.
+
+[1]
+In case VF_LAG is used, every tc fdb flow is duplicated to the peer
+esw. However, the original esw keeps a pointer to this duplicated
+flow, not the peer esw.
+e.g.: if user create tc fdb flow over esw0, the flow is duplicated
+over esw1, in FW/HW, but in SW, esw0 keeps a pointer to the duplicated
+flow.
+During module unload while a peer tc fdb flow is still offloaded, in
+case the first device to be removed is the peer device (esw1 in the
+example above), the peer net-dev is destroyed, and so the mlx5e_priv
+is memset to 0.
+Afterwards, the peer device is trying to unpair himself from the
+original device (esw0 in the example above). Unpair API invoke the
+original device to clear peer flow from its eswitch (esw0), but the
+peer flow, which is stored over the original eswitch (esw0), is
+trying to use the peer mlx5e_priv, which is memset to 0 and result in
+bellow kernel-oops.
+
+[  157.964081 ] BUG: unable to handle page fault for address: 000000000002ce60
+[  157.964662 ] #PF: supervisor read access in kernel mode
+[  157.965123 ] #PF: error_code(0x0000) - not-present page
+[  157.965582 ] PGD 0 P4D 0
+[  157.965866 ] Oops: 0000 [#1] SMP
+[  157.967670 ] RIP: 0010:mlx5e_tc_del_fdb_flow+0x48/0x460 [mlx5_core]
+[  157.976164 ] Call Trace:
+[  157.976437 ]  <TASK>
+[  157.976690 ]  __mlx5e_tc_del_fdb_peer_flow+0xe6/0x100 [mlx5_core]
+[  157.977230 ]  mlx5e_tc_clean_fdb_peer_flows+0x67/0x90 [mlx5_core]
+[  157.977767 ]  mlx5_esw_offloads_unpair+0x2d/0x1e0 [mlx5_core]
+[  157.984653 ]  mlx5_esw_offloads_devcom_event+0xbf/0x130 [mlx5_core]
+[  157.985212 ]  mlx5_devcom_send_event+0xa3/0xb0 [mlx5_core]
+[  157.985714 ]  esw_offloads_disable+0x5a/0x110 [mlx5_core]
+[  157.986209 ]  mlx5_eswitch_disable_locked+0x152/0x170 [mlx5_core]
+[  157.986757 ]  mlx5_eswitch_disable+0x51/0x80 [mlx5_core]
+[  157.987248 ]  mlx5_unload+0x2a/0xb0 [mlx5_core]
+[  157.987678 ]  mlx5_uninit_one+0x5f/0xd0 [mlx5_core]
+[  157.988127 ]  remove_one+0x64/0xe0 [mlx5_core]
+[  157.988549 ]  pci_device_remove+0x31/0xa0
+[  157.988933 ]  device_release_driver_internal+0x18f/0x1f0
+[  157.989402 ]  driver_detach+0x3f/0x80
+[  157.989754 ]  bus_remove_driver+0x70/0xf0
+[  157.990129 ]  pci_unregister_driver+0x34/0x90
+[  157.990537 ]  mlx5_cleanup+0xc/0x1c [mlx5_core]
+[  157.990972 ]  __x64_sys_delete_module+0x15a/0x250
+[  157.991398 ]  ? exit_to_user_mode_prepare+0xea/0x110
+[  157.991840 ]  do_syscall_64+0x3d/0x90
+[  157.992198 ]  entry_SYSCALL_64_after_hwframe+0x46/0xb0
+
+Fixes: 04de7dda7394 ("net/mlx5e: Infrastructure for duplicated offloading of TC flows")
+Fixes: 1418ddd96afd ("net/mlx5e: Duplicate offloaded TC eswitch rules under uplink LAG")
+Signed-off-by: Shay Drory <shayd@nvidia.com>
+Reviewed-by: Roi Dayan <roid@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en_tc.c            |    4 +++-
+ drivers/net/ethernet/mellanox/mlx5/core/eswitch.h          |    4 ++++
+ drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c |    7 ++-----
+ 3 files changed, 9 insertions(+), 6 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+@@ -5449,6 +5449,8 @@ int mlx5e_tc_esw_init(struct mlx5_rep_up
+               goto err_action_counter;
+       }
++      mlx5_esw_offloads_devcom_init(esw);
++
+       return 0;
+ err_action_counter:
+@@ -5477,7 +5479,7 @@ void mlx5e_tc_esw_cleanup(struct mlx5_re
+       priv = netdev_priv(rpriv->netdev);
+       esw = priv->mdev->priv.eswitch;
+-      mlx5e_tc_clean_fdb_peer_flows(esw);
++      mlx5_esw_offloads_devcom_cleanup(esw);
+       mlx5e_tc_tun_cleanup(uplink_priv->encap);
+--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
++++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+@@ -371,6 +371,8 @@ int mlx5_eswitch_enable(struct mlx5_eswi
+ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw, bool clear_vf);
+ void mlx5_eswitch_disable_locked(struct mlx5_eswitch *esw);
+ void mlx5_eswitch_disable(struct mlx5_eswitch *esw);
++void mlx5_esw_offloads_devcom_init(struct mlx5_eswitch *esw);
++void mlx5_esw_offloads_devcom_cleanup(struct mlx5_eswitch *esw);
+ int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw,
+                              u16 vport, const u8 *mac);
+ int mlx5_eswitch_set_vport_state(struct mlx5_eswitch *esw,
+@@ -768,6 +770,8 @@ static inline void mlx5_eswitch_cleanup(
+ static inline int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int num_vfs) { return 0; }
+ static inline void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw, bool clear_vf) {}
+ static inline void mlx5_eswitch_disable(struct mlx5_eswitch *esw) {}
++static inline void mlx5_esw_offloads_devcom_init(struct mlx5_eswitch *esw) {}
++static inline void mlx5_esw_offloads_devcom_cleanup(struct mlx5_eswitch *esw) {}
+ static inline bool mlx5_eswitch_is_funcs_handler(struct mlx5_core_dev *dev) { return false; }
+ static inline
+ int mlx5_eswitch_set_vport_state(struct mlx5_eswitch *esw, u16 vport, int link_state) { return 0; }
+--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+@@ -2781,7 +2781,7 @@ err_out:
+       return err;
+ }
+-static void esw_offloads_devcom_init(struct mlx5_eswitch *esw)
++void mlx5_esw_offloads_devcom_init(struct mlx5_eswitch *esw)
+ {
+       struct mlx5_devcom *devcom = esw->dev->priv.devcom;
+@@ -2804,7 +2804,7 @@ static void esw_offloads_devcom_init(str
+                              ESW_OFFLOADS_DEVCOM_PAIR, esw);
+ }
+-static void esw_offloads_devcom_cleanup(struct mlx5_eswitch *esw)
++void mlx5_esw_offloads_devcom_cleanup(struct mlx5_eswitch *esw)
+ {
+       struct mlx5_devcom *devcom = esw->dev->priv.devcom;
+@@ -3274,8 +3274,6 @@ int esw_offloads_enable(struct mlx5_eswi
+       if (err)
+               goto err_vports;
+-      esw_offloads_devcom_init(esw);
+-
+       return 0;
+ err_vports:
+@@ -3316,7 +3314,6 @@ static int esw_offloads_stop(struct mlx5
+ void esw_offloads_disable(struct mlx5_eswitch *esw)
+ {
+-      esw_offloads_devcom_cleanup(esw);
+       mlx5_eswitch_disable_pf_vf_vports(esw);
+       esw_offloads_unload_rep(esw, MLX5_VPORT_UPLINK);
+       esw_set_passing_vport_metadata(esw, false);
diff --git a/queue-6.3/net-mlx5e-do-as-little-as-possible-in-napi-poll-when-budget-is-0.patch b/queue-6.3/net-mlx5e-do-as-little-as-possible-in-napi-poll-when-budget-is-0.patch
new file mode 100644 (file)
index 0000000..12dd5a1
--- /dev/null
@@ -0,0 +1,78 @@
+From afbed3f74830163f9559579dee382cac3cff82da Mon Sep 17 00:00:00 2001
+From: Jakub Kicinski <kuba@kernel.org>
+Date: Tue, 16 May 2023 18:59:35 -0700
+Subject: net/mlx5e: do as little as possible in napi poll when budget is 0
+
+From: Jakub Kicinski <kuba@kernel.org>
+
+commit afbed3f74830163f9559579dee382cac3cff82da upstream.
+
+NAPI gets called with budget of 0 from netpoll, which has interrupts
+disabled. We should try to free some space on Tx rings and nothing
+else.
+
+Specifically do not try to handle XDP TX or try to refill Rx buffers -
+we can't use the page pool from IRQ context. Don't check if IRQs moved,
+either, that makes no sense in netpoll. Netpoll calls _all_ the rings
+from whatever CPU it happens to be invoked on.
+
+In general do as little as possible, the work quickly adds up when
+there's tens of rings to poll.
+
+The immediate stack trace I was seeing is:
+
+    __do_softirq+0xd1/0x2c0
+    __local_bh_enable_ip+0xc7/0x120
+    </IRQ>
+    <TASK>
+    page_pool_put_defragged_page+0x267/0x320
+    mlx5e_free_xdpsq_desc+0x99/0xd0
+    mlx5e_poll_xdpsq_cq+0x138/0x3b0
+    mlx5e_napi_poll+0xc3/0x8b0
+    netpoll_poll_dev+0xce/0x150
+
+AFAIU page pool takes a BH lock, releases it and since BH is now
+enabled tries to run softirqs.
+
+Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
+Fixes: 60bbf7eeef10 ("mlx5: use page_pool for xdp_return_frame call")
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Reviewed-by: Simon Horman <simon.horman@corigine.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c |   16 +++++++++-------
+ 1 file changed, 9 insertions(+), 7 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
+@@ -161,20 +161,22 @@ int mlx5e_napi_poll(struct napi_struct *
+               }
+       }
++      /* budget=0 means we may be in IRQ context, do as little as possible */
++      if (unlikely(!budget))
++              goto out;
++
+       busy |= mlx5e_poll_xdpsq_cq(&c->xdpsq.cq);
+       if (c->xdp)
+               busy |= mlx5e_poll_xdpsq_cq(&c->rq_xdpsq.cq);
+-      if (likely(budget)) { /* budget=0 means: don't poll rx rings */
+-              if (xsk_open)
+-                      work_done = mlx5e_poll_rx_cq(&xskrq->cq, budget);
++      if (xsk_open)
++              work_done = mlx5e_poll_rx_cq(&xskrq->cq, budget);
+-              if (likely(budget - work_done))
+-                      work_done += mlx5e_poll_rx_cq(&rq->cq, budget - work_done);
++      if (likely(budget - work_done))
++              work_done += mlx5e_poll_rx_cq(&rq->cq, budget - work_done);
+-              busy |= work_done == budget;
+-      }
++      busy |= work_done == budget;
+       mlx5e_poll_ico_cq(&c->icosq.cq);
+       if (mlx5e_poll_ico_cq(&c->async_icosq.cq))
diff --git a/queue-6.3/net-mlx5e-fix-deadlock-in-tc-route-query-code.patch b/queue-6.3/net-mlx5e-fix-deadlock-in-tc-route-query-code.patch
new file mode 100644 (file)
index 0000000..8f98fa9
--- /dev/null
@@ -0,0 +1,354 @@
+From 691c041bf20899fc13c793f92ba61ab660fa3a30 Mon Sep 17 00:00:00 2001
+From: Vlad Buslov <vladbu@nvidia.com>
+Date: Fri, 31 Mar 2023 14:20:51 +0200
+Subject: net/mlx5e: Fix deadlock in tc route query code
+
+From: Vlad Buslov <vladbu@nvidia.com>
+
+commit 691c041bf20899fc13c793f92ba61ab660fa3a30 upstream.
+
+Cited commit causes ABBA deadlock[0] when peer flows are created while
+holding the devcom rw semaphore. Due to peer flows offload implementation
+the lock is taken much higher up the call chain and there is no obvious way
+to easily fix the deadlock. Instead, since tc route query code needs the
+peer eswitch structure only to perform a lookup in xarray and doesn't
+perform any sleeping operations with it, refactor the code for lockless
+execution in following ways:
+
+- RCUify the devcom 'data' pointer. When resetting the pointer
+synchronously wait for RCU grace period before returning. This is fine
+since devcom is currently only used for synchronization of
+pairing/unpairing of eswitches which is rare and already expensive as-is.
+
+- Wrap all usages of 'paired' boolean in {READ|WRITE}_ONCE(). The flag has
+already been used in some unlocked contexts without proper
+annotations (e.g. users of mlx5_devcom_is_paired() function), but it wasn't
+an issue since all relevant code paths checked it again after obtaining the
+devcom semaphore. Now it is also used by mlx5_devcom_get_peer_data_rcu() as
+"best effort" check to return NULL when devcom is being unpaired. Note that
+while RCU read lock doesn't prevent the unpaired flag from being changed
+concurrently it still guarantees that reader can continue to use 'data'.
+
+- Refactor mlx5e_tc_query_route_vport() function to use new
+mlx5_devcom_get_peer_data_rcu() API which fixes the deadlock.
+
+[0]:
+
+[  164.599612] ======================================================
+[  164.600142] WARNING: possible circular locking dependency detected
+[  164.600667] 6.3.0-rc3+ #1 Not tainted
+[  164.601021] ------------------------------------------------------
+[  164.601557] handler1/3456 is trying to acquire lock:
+[  164.601998] ffff88811f1714b0 (&esw->offloads.encap_tbl_lock){+.+.}-{3:3}, at: mlx5e_attach_encap+0xd8/0x8b0 [mlx5_core]
+[  164.603078]
+               but task is already holding lock:
+[  164.603617] ffff88810137fc98 (&comp->sem){++++}-{3:3}, at: mlx5_devcom_get_peer_data+0x37/0x80 [mlx5_core]
+[  164.604459]
+               which lock already depends on the new lock.
+
+[  164.605190]
+               the existing dependency chain (in reverse order) is:
+[  164.605848]
+               -> #1 (&comp->sem){++++}-{3:3}:
+[  164.606380]        down_read+0x39/0x50
+[  164.606772]        mlx5_devcom_get_peer_data+0x37/0x80 [mlx5_core]
+[  164.607336]        mlx5e_tc_query_route_vport+0x86/0xc0 [mlx5_core]
+[  164.607914]        mlx5e_tc_tun_route_lookup+0x1a4/0x1d0 [mlx5_core]
+[  164.608495]        mlx5e_attach_decap_route+0xc6/0x1e0 [mlx5_core]
+[  164.609063]        mlx5e_tc_add_fdb_flow+0x1ea/0x360 [mlx5_core]
+[  164.609627]        __mlx5e_add_fdb_flow+0x2d2/0x430 [mlx5_core]
+[  164.610175]        mlx5e_configure_flower+0x952/0x1a20 [mlx5_core]
+[  164.610741]        tc_setup_cb_add+0xd4/0x200
+[  164.611146]        fl_hw_replace_filter+0x14c/0x1f0 [cls_flower]
+[  164.611661]        fl_change+0xc95/0x18a0 [cls_flower]
+[  164.612116]        tc_new_tfilter+0x3fc/0xd20
+[  164.612516]        rtnetlink_rcv_msg+0x418/0x5b0
+[  164.612936]        netlink_rcv_skb+0x54/0x100
+[  164.613339]        netlink_unicast+0x190/0x250
+[  164.613746]        netlink_sendmsg+0x245/0x4a0
+[  164.614150]        sock_sendmsg+0x38/0x60
+[  164.614522]        ____sys_sendmsg+0x1d0/0x1e0
+[  164.614934]        ___sys_sendmsg+0x80/0xc0
+[  164.615320]        __sys_sendmsg+0x51/0x90
+[  164.615701]        do_syscall_64+0x3d/0x90
+[  164.616083]        entry_SYSCALL_64_after_hwframe+0x46/0xb0
+[  164.616568]
+               -> #0 (&esw->offloads.encap_tbl_lock){+.+.}-{3:3}:
+[  164.617210]        __lock_acquire+0x159e/0x26e0
+[  164.617638]        lock_acquire+0xc2/0x2a0
+[  164.618018]        __mutex_lock+0x92/0xcd0
+[  164.618401]        mlx5e_attach_encap+0xd8/0x8b0 [mlx5_core]
+[  164.618943]        post_process_attr+0x153/0x2d0 [mlx5_core]
+[  164.619471]        mlx5e_tc_add_fdb_flow+0x164/0x360 [mlx5_core]
+[  164.620021]        __mlx5e_add_fdb_flow+0x2d2/0x430 [mlx5_core]
+[  164.620564]        mlx5e_configure_flower+0xe33/0x1a20 [mlx5_core]
+[  164.621125]        tc_setup_cb_add+0xd4/0x200
+[  164.621531]        fl_hw_replace_filter+0x14c/0x1f0 [cls_flower]
+[  164.622047]        fl_change+0xc95/0x18a0 [cls_flower]
+[  164.622500]        tc_new_tfilter+0x3fc/0xd20
+[  164.622906]        rtnetlink_rcv_msg+0x418/0x5b0
+[  164.623324]        netlink_rcv_skb+0x54/0x100
+[  164.623727]        netlink_unicast+0x190/0x250
+[  164.624138]        netlink_sendmsg+0x245/0x4a0
+[  164.624544]        sock_sendmsg+0x38/0x60
+[  164.624919]        ____sys_sendmsg+0x1d0/0x1e0
+[  164.625340]        ___sys_sendmsg+0x80/0xc0
+[  164.625731]        __sys_sendmsg+0x51/0x90
+[  164.626117]        do_syscall_64+0x3d/0x90
+[  164.626502]        entry_SYSCALL_64_after_hwframe+0x46/0xb0
+[  164.626995]
+               other info that might help us debug this:
+
+[  164.627725]  Possible unsafe locking scenario:
+
+[  164.628268]        CPU0                    CPU1
+[  164.628683]        ----                    ----
+[  164.629098]   lock(&comp->sem);
+[  164.629421]                                lock(&esw->offloads.encap_tbl_lock);
+[  164.630066]                                lock(&comp->sem);
+[  164.630555]   lock(&esw->offloads.encap_tbl_lock);
+[  164.630993]
+                *** DEADLOCK ***
+
+[  164.631575] 3 locks held by handler1/3456:
+[  164.631962]  #0: ffff888124b75130 (&block->cb_lock){++++}-{3:3}, at: tc_setup_cb_add+0x5b/0x200
+[  164.632703]  #1: ffff888116e512b8 (&esw->mode_lock){++++}-{3:3}, at: mlx5_esw_hold+0x39/0x50 [mlx5_core]
+[  164.633552]  #2: ffff88810137fc98 (&comp->sem){++++}-{3:3}, at: mlx5_devcom_get_peer_data+0x37/0x80 [mlx5_core]
+[  164.634435]
+               stack backtrace:
+[  164.634883] CPU: 17 PID: 3456 Comm: handler1 Not tainted 6.3.0-rc3+ #1
+[  164.635431] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014
+[  164.636340] Call Trace:
+[  164.636616]  <TASK>
+[  164.636863]  dump_stack_lvl+0x47/0x70
+[  164.637217]  check_noncircular+0xfe/0x110
+[  164.637601]  __lock_acquire+0x159e/0x26e0
+[  164.637977]  ? mlx5_cmd_set_fte+0x5b0/0x830 [mlx5_core]
+[  164.638472]  lock_acquire+0xc2/0x2a0
+[  164.638828]  ? mlx5e_attach_encap+0xd8/0x8b0 [mlx5_core]
+[  164.639339]  ? lock_is_held_type+0x98/0x110
+[  164.639728]  __mutex_lock+0x92/0xcd0
+[  164.640074]  ? mlx5e_attach_encap+0xd8/0x8b0 [mlx5_core]
+[  164.640576]  ? __lock_acquire+0x382/0x26e0
+[  164.640958]  ? mlx5e_attach_encap+0xd8/0x8b0 [mlx5_core]
+[  164.641468]  ? mlx5e_attach_encap+0xd8/0x8b0 [mlx5_core]
+[  164.641965]  mlx5e_attach_encap+0xd8/0x8b0 [mlx5_core]
+[  164.642454]  ? lock_release+0xbf/0x240
+[  164.642819]  post_process_attr+0x153/0x2d0 [mlx5_core]
+[  164.643318]  mlx5e_tc_add_fdb_flow+0x164/0x360 [mlx5_core]
+[  164.643835]  __mlx5e_add_fdb_flow+0x2d2/0x430 [mlx5_core]
+[  164.644340]  mlx5e_configure_flower+0xe33/0x1a20 [mlx5_core]
+[  164.644862]  ? lock_acquire+0xc2/0x2a0
+[  164.645219]  tc_setup_cb_add+0xd4/0x200
+[  164.645588]  fl_hw_replace_filter+0x14c/0x1f0 [cls_flower]
+[  164.646067]  fl_change+0xc95/0x18a0 [cls_flower]
+[  164.646488]  tc_new_tfilter+0x3fc/0xd20
+[  164.646861]  ? tc_del_tfilter+0x810/0x810
+[  164.647236]  rtnetlink_rcv_msg+0x418/0x5b0
+[  164.647621]  ? rtnl_setlink+0x160/0x160
+[  164.647982]  netlink_rcv_skb+0x54/0x100
+[  164.648348]  netlink_unicast+0x190/0x250
+[  164.648722]  netlink_sendmsg+0x245/0x4a0
+[  164.649090]  sock_sendmsg+0x38/0x60
+[  164.649434]  ____sys_sendmsg+0x1d0/0x1e0
+[  164.649804]  ? copy_msghdr_from_user+0x6d/0xa0
+[  164.650213]  ___sys_sendmsg+0x80/0xc0
+[  164.650563]  ? lock_acquire+0xc2/0x2a0
+[  164.650926]  ? lock_acquire+0xc2/0x2a0
+[  164.651286]  ? __fget_files+0x5/0x190
+[  164.651644]  ? find_held_lock+0x2b/0x80
+[  164.652006]  ? __fget_files+0xb9/0x190
+[  164.652365]  ? lock_release+0xbf/0x240
+[  164.652723]  ? __fget_files+0xd3/0x190
+[  164.653079]  __sys_sendmsg+0x51/0x90
+[  164.653435]  do_syscall_64+0x3d/0x90
+[  164.653784]  entry_SYSCALL_64_after_hwframe+0x46/0xb0
+[  164.654229] RIP: 0033:0x7f378054f8bd
+[  164.654577] Code: 28 89 54 24 1c 48 89 74 24 10 89 7c 24 08 e8 6a c3 f4 ff 8b 54 24 1c 48 8b 74 24 10 41 89 c0 8b 7c 24 08 b8 2e 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 33 44 89 c7 48 89 44 24 08 e8 be c3 f4 ff 48
+[  164.656041] RSP: 002b:00007f377fa114b0 EFLAGS: 00000293 ORIG_RAX: 000000000000002e
+[  164.656701] RAX: ffffffffffffffda RBX: 0000000000000001 RCX: 00007f378054f8bd
+[  164.657297] RDX: 0000000000000000 RSI: 00007f377fa11540 RDI: 0000000000000014
+[  164.657885] RBP: 00007f377fa12278 R08: 0000000000000000 R09: 000000000000015c
+[  164.658472] R10: 00007f377fa123d0 R11: 0000000000000293 R12: 0000560962d99bd0
+[  164.665317] R13: 0000000000000000 R14: 0000560962d99bd0 R15: 00007f377fa11540
+
+Fixes: f9d196bd632b ("net/mlx5e: Use correct eswitch for stack devices with lag")
+Signed-off-by: Vlad Buslov <vladbu@nvidia.com>
+Reviewed-by: Roi Dayan <roid@nvidia.com>
+Reviewed-by: Shay Drory <shayd@nvidia.com>
+Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en_tc.c      |   19 +++----
+ drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c |   48 ++++++++++++++-----
+ drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h |    1 
+ 3 files changed, 48 insertions(+), 20 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+@@ -1692,11 +1692,9 @@ bool mlx5e_tc_is_vf_tunnel(struct net_de
+ int mlx5e_tc_query_route_vport(struct net_device *out_dev, struct net_device *route_dev, u16 *vport)
+ {
+       struct mlx5e_priv *out_priv, *route_priv;
+-      struct mlx5_devcom *devcom = NULL;
+       struct mlx5_core_dev *route_mdev;
+       struct mlx5_eswitch *esw;
+       u16 vhca_id;
+-      int err;
+       out_priv = netdev_priv(out_dev);
+       esw = out_priv->mdev->priv.eswitch;
+@@ -1705,6 +1703,9 @@ int mlx5e_tc_query_route_vport(struct ne
+       vhca_id = MLX5_CAP_GEN(route_mdev, vhca_id);
+       if (mlx5_lag_is_active(out_priv->mdev)) {
++              struct mlx5_devcom *devcom;
++              int err;
++
+               /* In lag case we may get devices from different eswitch instances.
+                * If we failed to get vport num, it means, mostly, that we on the wrong
+                * eswitch.
+@@ -1713,16 +1714,16 @@ int mlx5e_tc_query_route_vport(struct ne
+               if (err != -ENOENT)
+                       return err;
++              rcu_read_lock();
+               devcom = out_priv->mdev->priv.devcom;
+-              esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
+-              if (!esw)
+-                      return -ENODEV;
++              esw = mlx5_devcom_get_peer_data_rcu(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
++              err = esw ? mlx5_eswitch_vhca_id_to_vport(esw, vhca_id, vport) : -ENODEV;
++              rcu_read_unlock();
++
++              return err;
+       }
+-      err = mlx5_eswitch_vhca_id_to_vport(esw, vhca_id, vport);
+-      if (devcom)
+-              mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
+-      return err;
++      return mlx5_eswitch_vhca_id_to_vport(esw, vhca_id, vport);
+ }
+ static int
+--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c
+@@ -13,7 +13,7 @@ static LIST_HEAD(devcom_list);
+ struct mlx5_devcom_component {
+       struct {
+-              void *data;
++              void __rcu *data;
+       } device[MLX5_DEVCOM_PORTS_SUPPORTED];
+       mlx5_devcom_event_handler_t handler;
+@@ -162,7 +162,7 @@ void mlx5_devcom_register_component(stru
+       comp = &devcom->priv->components[id];
+       down_write(&comp->sem);
+       comp->handler = handler;
+-      comp->device[devcom->idx].data = data;
++      rcu_assign_pointer(comp->device[devcom->idx].data, data);
+       up_write(&comp->sem);
+ }
+@@ -176,8 +176,9 @@ void mlx5_devcom_unregister_component(st
+       comp = &devcom->priv->components[id];
+       down_write(&comp->sem);
+-      comp->device[devcom->idx].data = NULL;
++      RCU_INIT_POINTER(comp->device[devcom->idx].data, NULL);
+       up_write(&comp->sem);
++      synchronize_rcu();
+ }
+ int mlx5_devcom_send_event(struct mlx5_devcom *devcom,
+@@ -193,12 +194,15 @@ int mlx5_devcom_send_event(struct mlx5_d
+       comp = &devcom->priv->components[id];
+       down_write(&comp->sem);
+-      for (i = 0; i < MLX5_DEVCOM_PORTS_SUPPORTED; i++)
+-              if (i != devcom->idx && comp->device[i].data) {
+-                      err = comp->handler(event, comp->device[i].data,
+-                                          event_data);
++      for (i = 0; i < MLX5_DEVCOM_PORTS_SUPPORTED; i++) {
++              void *data = rcu_dereference_protected(comp->device[i].data,
++                                                     lockdep_is_held(&comp->sem));
++
++              if (i != devcom->idx && data) {
++                      err = comp->handler(event, data, event_data);
+                       break;
+               }
++      }
+       up_write(&comp->sem);
+       return err;
+@@ -213,7 +217,7 @@ void mlx5_devcom_set_paired(struct mlx5_
+       comp = &devcom->priv->components[id];
+       WARN_ON(!rwsem_is_locked(&comp->sem));
+-      comp->paired = paired;
++      WRITE_ONCE(comp->paired, paired);
+ }
+ bool mlx5_devcom_is_paired(struct mlx5_devcom *devcom,
+@@ -222,7 +226,7 @@ bool mlx5_devcom_is_paired(struct mlx5_d
+       if (IS_ERR_OR_NULL(devcom))
+               return false;
+-      return devcom->priv->components[id].paired;
++      return READ_ONCE(devcom->priv->components[id].paired);
+ }
+ void *mlx5_devcom_get_peer_data(struct mlx5_devcom *devcom,
+@@ -236,7 +240,7 @@ void *mlx5_devcom_get_peer_data(struct m
+       comp = &devcom->priv->components[id];
+       down_read(&comp->sem);
+-      if (!comp->paired) {
++      if (!READ_ONCE(comp->paired)) {
+               up_read(&comp->sem);
+               return NULL;
+       }
+@@ -245,7 +249,29 @@ void *mlx5_devcom_get_peer_data(struct m
+               if (i != devcom->idx)
+                       break;
+-      return comp->device[i].data;
++      return rcu_dereference_protected(comp->device[i].data, lockdep_is_held(&comp->sem));
++}
++
++void *mlx5_devcom_get_peer_data_rcu(struct mlx5_devcom *devcom, enum mlx5_devcom_components id)
++{
++      struct mlx5_devcom_component *comp;
++      int i;
++
++      if (IS_ERR_OR_NULL(devcom))
++              return NULL;
++
++      for (i = 0; i < MLX5_DEVCOM_PORTS_SUPPORTED; i++)
++              if (i != devcom->idx)
++                      break;
++
++      comp = &devcom->priv->components[id];
++      /* This can change concurrently, however 'data' pointer will remain
++       * valid for the duration of RCU read section.
++       */
++      if (!READ_ONCE(comp->paired))
++              return NULL;
++
++      return rcu_dereference(comp->device[i].data);
+ }
+ void mlx5_devcom_release_peer_data(struct mlx5_devcom *devcom,
+--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h
++++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h
+@@ -41,6 +41,7 @@ bool mlx5_devcom_is_paired(struct mlx5_d
+ void *mlx5_devcom_get_peer_data(struct mlx5_devcom *devcom,
+                               enum mlx5_devcom_components id);
++void *mlx5_devcom_get_peer_data_rcu(struct mlx5_devcom *devcom, enum mlx5_devcom_components id);
+ void mlx5_devcom_release_peer_data(struct mlx5_devcom *devcom,
+                                  enum mlx5_devcom_components id);
diff --git a/queue-6.3/net-mlx5e-fix-sq-wake-logic-in-ptp-napi_poll-context.patch b/queue-6.3/net-mlx5e-fix-sq-wake-logic-in-ptp-napi_poll-context.patch
new file mode 100644 (file)
index 0000000..d0a6fa2
--- /dev/null
@@ -0,0 +1,81 @@
+From 7aa50380191635e5897a773f272829cc961a2be5 Mon Sep 17 00:00:00 2001
+From: Rahul Rameshbabu <rrameshbabu@nvidia.com>
+Date: Tue, 21 Feb 2023 16:18:48 -0800
+Subject: net/mlx5e: Fix SQ wake logic in ptp napi_poll context
+
+From: Rahul Rameshbabu <rrameshbabu@nvidia.com>
+
+commit 7aa50380191635e5897a773f272829cc961a2be5 upstream.
+
+Check in the mlx5e_ptp_poll_ts_cq context if the ptp tx sq should be woken
+up. Before change, the ptp tx sq may never wake up if the ptp tx ts skb
+fifo is full when mlx5e_poll_tx_cq checks if the queue should be woken up.
+
+Fixes: 1880bc4e4a96 ("net/mlx5e: Add TX port timestamp support")
+Signed-off-by: Rahul Rameshbabu <rrameshbabu@nvidia.com>
+Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c  |    2 ++
+ drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h |    2 ++
+ drivers/net/ethernet/mellanox/mlx5/core/en_tx.c   |   19 ++++++++++++-------
+ 3 files changed, 16 insertions(+), 7 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
+@@ -175,6 +175,8 @@ static bool mlx5e_ptp_poll_ts_cq(struct
+       /* ensure cq space is freed before enabling more cqes */
+       wmb();
++      mlx5e_txqsq_wake(&ptpsq->txqsq);
++
+       return work_done == budget;
+ }
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
+@@ -182,6 +182,8 @@ static inline u16 mlx5e_txqsq_get_next_p
+       return pi;
+ }
++void mlx5e_txqsq_wake(struct mlx5e_txqsq *sq);
++
+ static inline u16 mlx5e_shampo_get_cqe_header_index(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
+ {
+       return be16_to_cpu(cqe->shampo.header_entry_index) & (rq->mpwqe.shampo->hd_per_wq - 1);
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
+@@ -762,6 +762,17 @@ static void mlx5e_tx_wi_consume_fifo_skb
+       }
+ }
++void mlx5e_txqsq_wake(struct mlx5e_txqsq *sq)
++{
++      if (netif_tx_queue_stopped(sq->txq) &&
++          mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, sq->stop_room) &&
++          mlx5e_ptpsq_fifo_has_room(sq) &&
++          !test_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state)) {
++              netif_tx_wake_queue(sq->txq);
++              sq->stats->wake++;
++      }
++}
++
+ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
+ {
+       struct mlx5e_sq_stats *stats;
+@@ -861,13 +872,7 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *c
+       netdev_tx_completed_queue(sq->txq, npkts, nbytes);
+-      if (netif_tx_queue_stopped(sq->txq) &&
+-          mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, sq->stop_room) &&
+-          mlx5e_ptpsq_fifo_has_room(sq) &&
+-          !test_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state)) {
+-              netif_tx_wake_queue(sq->txq);
+-              stats->wake++;
+-      }
++      mlx5e_txqsq_wake(sq);
+       return (i == MLX5E_TX_CQ_POLL_BUDGET);
+ }
diff --git a/queue-6.3/net-mlx5e-use-correct-encap-attribute-during-invalidation.patch b/queue-6.3/net-mlx5e-use-correct-encap-attribute-during-invalidation.patch
new file mode 100644 (file)
index 0000000..438164d
--- /dev/null
@@ -0,0 +1,62 @@
+From be071cdb167fc3e25fe81922166b3d499d23e8ac Mon Sep 17 00:00:00 2001
+From: Vlad Buslov <vladbu@nvidia.com>
+Date: Mon, 3 Apr 2023 22:26:00 +0200
+Subject: net/mlx5e: Use correct encap attribute during invalidation
+
+From: Vlad Buslov <vladbu@nvidia.com>
+
+commit be071cdb167fc3e25fe81922166b3d499d23e8ac upstream.
+
+With introduction of post action infrastructure most of the users of encap
+attribute had been modified in order to obtain the correct attribute by
+calling mlx5e_tc_get_encap_attr() helper instead of assuming encap action
+is always on default attribute. However, the cited commit didn't modify
+mlx5e_invalidate_encap() which prevents it from destroying correct modify
+header action which leads to a warning [0]. Fix the issue by using correct
+attribute.
+
+[0]:
+
+Feb 21 09:47:35 c-237-177-40-045 kernel: WARNING: CPU: 17 PID: 654 at drivers/net/ethernet/mellanox/mlx5/core/en_tc.c:684 mlx5e_tc_attach_mod_hdr+0x1cc/0x230 [mlx5_core]
+Feb 21 09:47:35 c-237-177-40-045 kernel: RIP: 0010:mlx5e_tc_attach_mod_hdr+0x1cc/0x230 [mlx5_core]
+Feb 21 09:47:35 c-237-177-40-045 kernel: Call Trace:
+Feb 21 09:47:35 c-237-177-40-045 kernel:  <TASK>
+Feb 21 09:47:35 c-237-177-40-045 kernel:  mlx5e_tc_fib_event_work+0x8e3/0x1f60 [mlx5_core]
+Feb 21 09:47:35 c-237-177-40-045 kernel:  ? mlx5e_take_all_encap_flows+0xe0/0xe0 [mlx5_core]
+Feb 21 09:47:35 c-237-177-40-045 kernel:  ? lock_downgrade+0x6d0/0x6d0
+Feb 21 09:47:35 c-237-177-40-045 kernel:  ? lockdep_hardirqs_on_prepare+0x273/0x3f0
+Feb 21 09:47:35 c-237-177-40-045 kernel:  ? lockdep_hardirqs_on_prepare+0x273/0x3f0
+Feb 21 09:47:35 c-237-177-40-045 kernel:  process_one_work+0x7c2/0x1310
+Feb 21 09:47:35 c-237-177-40-045 kernel:  ? lockdep_hardirqs_on_prepare+0x3f0/0x3f0
+Feb 21 09:47:35 c-237-177-40-045 kernel:  ? pwq_dec_nr_in_flight+0x230/0x230
+Feb 21 09:47:35 c-237-177-40-045 kernel:  ? rwlock_bug.part.0+0x90/0x90
+Feb 21 09:47:35 c-237-177-40-045 kernel:  worker_thread+0x59d/0xec0
+Feb 21 09:47:35 c-237-177-40-045 kernel:  ? __kthread_parkme+0xd9/0x1d0
+
+Fixes: 8300f225268b ("net/mlx5e: Create new flow attr for multi table actions")
+Signed-off-by: Vlad Buslov <vladbu@nvidia.com>
+Reviewed-by: Roi Dayan <roid@nvidia.com>
+Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c |    4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c
+@@ -1338,11 +1338,13 @@ static void mlx5e_invalidate_encap(struc
+       struct mlx5e_tc_flow *flow;
+       list_for_each_entry(flow, encap_flows, tmp_list) {
+-              struct mlx5_flow_attr *attr = flow->attr;
+               struct mlx5_esw_flow_attr *esw_attr;
++              struct mlx5_flow_attr *attr;
+               if (!mlx5e_is_offloaded_flow(flow))
+                       continue;
++
++              attr = mlx5e_tc_get_encap_attr(flow);
+               esw_attr = attr->esw_attr;
+               if (flow_flag_test(flow, SLOW))
diff --git a/queue-6.3/platform-mellanox-mlxbf-pmc-fix-sscanf-error-checking.patch b/queue-6.3/platform-mellanox-mlxbf-pmc-fix-sscanf-error-checking.patch
new file mode 100644 (file)
index 0000000..956f9ed
--- /dev/null
@@ -0,0 +1,40 @@
+From 95e4b25192e9238fd2dbe85d96dd2f8fd1ce9d14 Mon Sep 17 00:00:00 2001
+From: Dan Carpenter <dan.carpenter@linaro.org>
+Date: Mon, 15 May 2023 13:32:37 +0300
+Subject: platform/mellanox: mlxbf-pmc: fix sscanf() error checking
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Dan Carpenter <dan.carpenter@linaro.org>
+
+commit 95e4b25192e9238fd2dbe85d96dd2f8fd1ce9d14 upstream.
+
+The sscanf() function never returns negatives.  It returns the number of
+items successfully read.
+
+Fixes: 1a218d312e65 ("platform/mellanox: mlxbf-pmc: Add Mellanox BlueField PMC driver")
+Signed-off-by: Dan Carpenter <dan.carpenter@linaro.org>
+Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
+Link: https://lore.kernel.org/r/4ccdfd28-099b-40bf-8d77-ad4ea2e76b93@kili.mountain
+Reviewed-by: Hans de Goede <hdegoede@redhat.com>
+Signed-off-by: Hans de Goede <hdegoede@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/platform/mellanox/mlxbf-pmc.c |    5 ++---
+ 1 file changed, 2 insertions(+), 3 deletions(-)
+
+--- a/drivers/platform/mellanox/mlxbf-pmc.c
++++ b/drivers/platform/mellanox/mlxbf-pmc.c
+@@ -1348,9 +1348,8 @@ static int mlxbf_pmc_map_counters(struct
+       for (i = 0; i < pmc->total_blocks; ++i) {
+               if (strstr(pmc->block_name[i], "tile")) {
+-                      ret = sscanf(pmc->block_name[i], "tile%d", &tile_num);
+-                      if (ret < 0)
+-                              return ret;
++                      if (sscanf(pmc->block_name[i], "tile%d", &tile_num) != 1)
++                              return -EINVAL;
+                       if (tile_num >= pmc->tile_count)
+                               continue;
diff --git a/queue-6.3/sctp-fix-an-issue-that-plpmtu-can-never-go-to-complete-state.patch b/queue-6.3/sctp-fix-an-issue-that-plpmtu-can-never-go-to-complete-state.patch
new file mode 100644 (file)
index 0000000..c2399fb
--- /dev/null
@@ -0,0 +1,58 @@
+From 6ca328e985cd995dfd1d5de44046e6074f853fbb Mon Sep 17 00:00:00 2001
+From: Xin Long <lucien.xin@gmail.com>
+Date: Thu, 18 May 2023 16:03:00 -0400
+Subject: sctp: fix an issue that plpmtu can never go to complete state
+
+From: Xin Long <lucien.xin@gmail.com>
+
+commit 6ca328e985cd995dfd1d5de44046e6074f853fbb upstream.
+
+When doing plpmtu probe, the probe size is growing every time when it
+receives the ACK during the Search state until the probe fails. When
+the failure occurs, pl.probe_high is set and it goes to the Complete
+state.
+
+However, if the link pmtu is huge, like 65535 in loopback_dev, the probe
+eventually keeps using SCTP_MAX_PLPMTU as the probe size and never fails.
+Because of that, pl.probe_high can not be set, and the plpmtu probe can
+never go to the Complete state.
+
+Fix it by setting pl.probe_high to SCTP_MAX_PLPMTU when the probe size
+grows to SCTP_MAX_PLPMTU in sctp_transport_pl_recv(). Also, not allow
+the probe size greater than SCTP_MAX_PLPMTU in the Complete state.
+
+Fixes: b87641aff9e7 ("sctp: do state transition when a probe succeeds on HB ACK recv path")
+Signed-off-by: Xin Long <lucien.xin@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sctp/transport.c |   11 +++++++----
+ 1 file changed, 7 insertions(+), 4 deletions(-)
+
+--- a/net/sctp/transport.c
++++ b/net/sctp/transport.c
+@@ -324,9 +324,12 @@ bool sctp_transport_pl_recv(struct sctp_
+               t->pl.probe_size += SCTP_PL_BIG_STEP;
+       } else if (t->pl.state == SCTP_PL_SEARCH) {
+               if (!t->pl.probe_high) {
+-                      t->pl.probe_size = min(t->pl.probe_size + SCTP_PL_BIG_STEP,
+-                                             SCTP_MAX_PLPMTU);
+-                      return false;
++                      if (t->pl.probe_size < SCTP_MAX_PLPMTU) {
++                              t->pl.probe_size = min(t->pl.probe_size + SCTP_PL_BIG_STEP,
++                                                     SCTP_MAX_PLPMTU);
++                              return false;
++                      }
++                      t->pl.probe_high = SCTP_MAX_PLPMTU;
+               }
+               t->pl.probe_size += SCTP_PL_MIN_STEP;
+               if (t->pl.probe_size >= t->pl.probe_high) {
+@@ -341,7 +344,7 @@ bool sctp_transport_pl_recv(struct sctp_
+       } else if (t->pl.state == SCTP_PL_COMPLETE) {
+               /* Raise probe_size again after 30 * interval in Search Complete */
+               t->pl.state = SCTP_PL_SEARCH; /* Search Complete -> Search */
+-              t->pl.probe_size += SCTP_PL_MIN_STEP;
++              t->pl.probe_size = min(t->pl.probe_size + SCTP_PL_MIN_STEP, SCTP_MAX_PLPMTU);
+       }
+       return t->pl.state == SCTP_PL_COMPLETE;
index bd39890db2c9cbb4ce4112d612269e290349e970..dfec036ec047e92ea47a867cd0d5ed1830e29dd6 100644 (file)
@@ -96,3 +96,25 @@ coresight-fix-signedness-bug-in-tmc_etr_buf_insert_barrier_packet.patch
 arm-dts-imx6qdl-mba6-add-missing-pvcie-supply-regulator.patch
 x86-pci-xen-populate-msi-sysfs-entries.patch
 xen-pvcalls-back-fix-double-frees-with-pvcalls_new_active_socket.patch
+x86-show_trace_log_lvl-ensure-stack-pointer-is-aligned-again.patch
+asoc-intel-skylake-fix-declaration-of-enum-skl_ch_cfg.patch
+asoc-intel-avs-fix-declaration-of-enum-avs_channel_config.patch
+asoc-intel-avs-access-path-components-under-lock.patch
+cxl-wait-memory_info_valid-before-access-memory-related-info.patch
+cxl-move-cxl_await_media_ready-to-before-capacity-info-retrieval.patch
+sctp-fix-an-issue-that-plpmtu-can-never-go-to-complete-state.patch
+forcedeth-fix-an-error-handling-path-in-nv_probe.patch
+platform-mellanox-mlxbf-pmc-fix-sscanf-error-checking.patch
+net-mlx5e-fix-sq-wake-logic-in-ptp-napi_poll-context.patch
+net-mlx5e-fix-deadlock-in-tc-route-query-code.patch
+net-mlx5e-use-correct-encap-attribute-during-invalidation.patch
+net-mlx5e-do-as-little-as-possible-in-napi-poll-when-budget-is-0.patch
+net-mlx5-dr-fix-crc32-calculation-to-work-on-big-endian-be-cpus.patch
+net-mlx5-handle-pairing-of-e-switch-via-uplink-un-load-apis.patch
+net-mlx5-dr-check-force-loopback-rc-qp-capability-independently-from-roce.patch
+net-mlx5-fix-error-message-when-failing-to-allocate-device-memory.patch
+net-mlx5-collect-command-failures-data-only-for-known-commands.patch
+net-mlx5-devcom-fix-error-flow-in-mlx5_devcom_register_device.patch
+net-mlx5-devcom-serialize-devcom-registration.patch
+arm64-dts-imx8mn-var-som-fix-phy-detection-bug-by-adding-deassert-delay.patch
+firmware-arm_ffa-set-reserved-mbz-fields-to-zero-in-the-memory-descriptors.patch
diff --git a/queue-6.3/x86-show_trace_log_lvl-ensure-stack-pointer-is-aligned-again.patch b/queue-6.3/x86-show_trace_log_lvl-ensure-stack-pointer-is-aligned-again.patch
new file mode 100644 (file)
index 0000000..a01ec35
--- /dev/null
@@ -0,0 +1,69 @@
+From 2e4be0d011f21593c6b316806779ba1eba2cd7e0 Mon Sep 17 00:00:00 2001
+From: Vernon Lovejoy <vlovejoy@redhat.com>
+Date: Fri, 12 May 2023 12:42:32 +0200
+Subject: x86/show_trace_log_lvl: Ensure stack pointer is aligned, again
+
+From: Vernon Lovejoy <vlovejoy@redhat.com>
+
+commit 2e4be0d011f21593c6b316806779ba1eba2cd7e0 upstream.
+
+The commit e335bb51cc15 ("x86/unwind: Ensure stack pointer is aligned")
+tried to align the stack pointer in show_trace_log_lvl(), otherwise the
+"stack < stack_info.end" check can't guarantee that the last read does
+not go past the end of the stack.
+
+However, we have the same problem with the initial value of the stack
+pointer, it can also be unaligned. So without this patch this trivial
+kernel module
+
+       #include <linux/module.h>
+
+       static int init(void)
+       {
+               asm volatile("sub    $0x4,%rsp");
+               dump_stack();
+               asm volatile("add    $0x4,%rsp");
+
+               return -EAGAIN;
+       }
+
+       module_init(init);
+       MODULE_LICENSE("GPL");
+
+crashes the kernel.
+
+Fixes: e335bb51cc15 ("x86/unwind: Ensure stack pointer is aligned")
+Signed-off-by: Vernon Lovejoy <vlovejoy@redhat.com>
+Signed-off-by: Oleg Nesterov <oleg@redhat.com>
+Link: https://lore.kernel.org/r/20230512104232.GA10227@redhat.com
+Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/dumpstack.c |    7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/kernel/dumpstack.c
++++ b/arch/x86/kernel/dumpstack.c
+@@ -195,7 +195,6 @@ static void show_trace_log_lvl(struct ta
+       printk("%sCall Trace:\n", log_lvl);
+       unwind_start(&state, task, regs, stack);
+-      stack = stack ? : get_stack_pointer(task, regs);
+       regs = unwind_get_entry_regs(&state, &partial);
+       /*
+@@ -214,9 +213,13 @@ static void show_trace_log_lvl(struct ta
+        * - hardirq stack
+        * - entry stack
+        */
+-      for ( ; stack; stack = PTR_ALIGN(stack_info.next_sp, sizeof(long))) {
++      for (stack = stack ?: get_stack_pointer(task, regs);
++           stack;
++           stack = stack_info.next_sp) {
+               const char *stack_name;
++              stack = PTR_ALIGN(stack, sizeof(long));
++
+               if (get_stack_info(stack, task, &stack_info, &visit_mask)) {
+                       /*
+                        * We weren't on a valid stack.  It's possible that