]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
Fixes for 6.1
authorSasha Levin <sashal@kernel.org>
Sat, 4 Jan 2025 02:42:49 +0000 (21:42 -0500)
committerSasha Levin <sashal@kernel.org>
Sat, 4 Jan 2025 02:42:49 +0000 (21:42 -0500)
Signed-off-by: Sasha Levin <sashal@kernel.org>
queue-6.1/net-mctp-handle-skb-cleanup-on-sock_queue-failures.patch [new file with mode: 0644]
queue-6.1/series
queue-6.1/thunderbolt-add-support-for-intel-lunar-lake.patch [new file with mode: 0644]
queue-6.1/thunderbolt-add-support-for-intel-panther-lake-m-p.patch [new file with mode: 0644]
queue-6.1/thunderbolt-don-t-display-nvm_version-unless-upgrade.patch [new file with mode: 0644]
queue-6.1/usb-xhci-limit-stop-endpoint-retries.patch [new file with mode: 0644]
queue-6.1/xhci-retry-stop-endpoint-on-buggy-nec-controllers.patch [new file with mode: 0644]
queue-6.1/xhci-turn-nec-specific-quirk-for-handling-stop-endpo.patch [new file with mode: 0644]

diff --git a/queue-6.1/net-mctp-handle-skb-cleanup-on-sock_queue-failures.patch b/queue-6.1/net-mctp-handle-skb-cleanup-on-sock_queue-failures.patch
new file mode 100644 (file)
index 0000000..232bd96
--- /dev/null
@@ -0,0 +1,128 @@
+From 1ea337523abcae33664628727d3a752608c2893f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 18 Dec 2024 11:53:01 +0800
+Subject: net: mctp: handle skb cleanup on sock_queue failures
+
+From: Jeremy Kerr <jk@codeconstruct.com.au>
+
+[ Upstream commit ce1219c3f76bb131d095e90521506d3c6ccfa086 ]
+
+Currently, we don't use the return value from sock_queue_rcv_skb, which
+means we may leak skbs if a message is not successfully queued to a
+socket.
+
+Instead, ensure that we're freeing the skb where the sock hasn't
+otherwise taken ownership of the skb by adding checks on the
+sock_queue_rcv_skb() to invoke a kfree on failure.
+
+In doing so, rather than using the 'rc' value to trigger the
+kfree_skb(), use the skb pointer itself, which is more explicit.
+
+Also, add a kunit test for the sock delivery failure cases.
+
+Fixes: 4a992bbd3650 ("mctp: Implement message fragmentation & reassembly")
+Cc: stable@vger.kernel.org
+Signed-off-by: Jeremy Kerr <jk@codeconstruct.com.au>
+Link: https://patch.msgid.link/20241218-mctp-next-v2-1-1c1729645eaa@codeconstruct.com.au
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/mctp/route.c | 36 ++++++++++++++++++++++++++----------
+ 1 file changed, 26 insertions(+), 10 deletions(-)
+
+diff --git a/net/mctp/route.c b/net/mctp/route.c
+index ea7cb9973128..e72cdd4ce588 100644
+--- a/net/mctp/route.c
++++ b/net/mctp/route.c
+@@ -334,8 +334,13 @@ static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb)
+       msk = NULL;
+       rc = -EINVAL;
+-      /* we may be receiving a locally-routed packet; drop source sk
+-       * accounting
++      /* We may be receiving a locally-routed packet; drop source sk
++       * accounting.
++       *
++       * From here, we will either queue the skb - either to a frag_queue, or
++       * to a receiving socket. When that succeeds, we clear the skb pointer;
++       * a non-NULL skb on exit will be otherwise unowned, and hence
++       * kfree_skb()-ed.
+        */
+       skb_orphan(skb);
+@@ -389,7 +394,9 @@ static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb)
+                * pending key.
+                */
+               if (flags & MCTP_HDR_FLAG_EOM) {
+-                      sock_queue_rcv_skb(&msk->sk, skb);
++                      rc = sock_queue_rcv_skb(&msk->sk, skb);
++                      if (!rc)
++                              skb = NULL;
+                       if (key) {
+                               /* we've hit a pending reassembly; not much we
+                                * can do but drop it
+@@ -398,7 +405,6 @@ static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb)
+                                                  MCTP_TRACE_KEY_REPLIED);
+                               key = NULL;
+                       }
+-                      rc = 0;
+                       goto out_unlock;
+               }
+@@ -425,8 +431,10 @@ static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb)
+                        * this function.
+                        */
+                       rc = mctp_key_add(key, msk);
+-                      if (!rc)
++                      if (!rc) {
+                               trace_mctp_key_acquire(key);
++                              skb = NULL;
++                      }
+                       /* we don't need to release key->lock on exit, so
+                        * clean up here and suppress the unlock via
+@@ -444,6 +452,8 @@ static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb)
+                               key = NULL;
+                       } else {
+                               rc = mctp_frag_queue(key, skb);
++                              if (!rc)
++                                      skb = NULL;
+                       }
+               }
+@@ -458,12 +468,19 @@ static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb)
+               else
+                       rc = mctp_frag_queue(key, skb);
++              if (rc)
++                      goto out_unlock;
++
++              /* we've queued; the queue owns the skb now */
++              skb = NULL;
++
+               /* end of message? deliver to socket, and we're done with
+                * the reassembly/response key
+                */
+-              if (!rc && flags & MCTP_HDR_FLAG_EOM) {
+-                      sock_queue_rcv_skb(key->sk, key->reasm_head);
+-                      key->reasm_head = NULL;
++              if (flags & MCTP_HDR_FLAG_EOM) {
++                      rc = sock_queue_rcv_skb(key->sk, key->reasm_head);
++                      if (!rc)
++                              key->reasm_head = NULL;
+                       __mctp_key_done_in(key, net, f, MCTP_TRACE_KEY_REPLIED);
+                       key = NULL;
+               }
+@@ -482,8 +499,7 @@ static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb)
+       if (any_key)
+               mctp_key_unref(any_key);
+ out:
+-      if (rc)
+-              kfree_skb(skb);
++      kfree_skb(skb);
+       return rc;
+ }
+-- 
+2.39.5
+
index 7b4f4be4c4f3384bd76edf4ac4da20d391bb663d..81fe0498742d8a91eb915c68e29aaa183b684355 100644 (file)
@@ -2,3 +2,10 @@ x86-hyperv-fix-hv-tsc-page-based-sched_clock-for-hibernation.patch
 selinux-ignore-unknown-extended-permissions.patch
 btrfs-fix-use-after-free-in-btrfs_encoded_read_endio.patch
 tracing-have-process_string-also-allow-arrays.patch
+thunderbolt-add-support-for-intel-lunar-lake.patch
+thunderbolt-add-support-for-intel-panther-lake-m-p.patch
+thunderbolt-don-t-display-nvm_version-unless-upgrade.patch
+xhci-retry-stop-endpoint-on-buggy-nec-controllers.patch
+usb-xhci-limit-stop-endpoint-retries.patch
+xhci-turn-nec-specific-quirk-for-handling-stop-endpo.patch
+net-mctp-handle-skb-cleanup-on-sock_queue-failures.patch
diff --git a/queue-6.1/thunderbolt-add-support-for-intel-lunar-lake.patch b/queue-6.1/thunderbolt-add-support-for-intel-lunar-lake.patch
new file mode 100644 (file)
index 0000000..48e31dc
--- /dev/null
@@ -0,0 +1,54 @@
+From 640d84afd203a4e2318b55bca9fc585152943c10 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 20 May 2022 13:47:11 +0300
+Subject: thunderbolt: Add support for Intel Lunar Lake
+
+From: Mika Westerberg <mika.westerberg@linux.intel.com>
+
+[ Upstream commit 2cd3da4e37453019e21a486d9de3144f46b4fdf7 ]
+
+Intel Lunar Lake has similar integrated Thunderbolt/USB4 controller as
+Intel Meteor Lake with some small differences in the host router (it has
+3 DP IN adapters for instance). Add the Intel Lunar Lake PCI IDs to the
+driver list of supported devices.
+
+Tested-by: Pengfei Xu <pengfei.xu@intel.com>
+Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
+Stable-dep-of: 8644b48714dc ("thunderbolt: Add support for Intel Panther Lake-M/P")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/thunderbolt/nhi.c | 4 ++++
+ drivers/thunderbolt/nhi.h | 2 ++
+ 2 files changed, 6 insertions(+)
+
+diff --git a/drivers/thunderbolt/nhi.c b/drivers/thunderbolt/nhi.c
+index 288aaa05d007..5301effa6ab0 100644
+--- a/drivers/thunderbolt/nhi.c
++++ b/drivers/thunderbolt/nhi.c
+@@ -1479,6 +1479,10 @@ static struct pci_device_id nhi_ids[] = {
+         .driver_data = (kernel_ulong_t)&icl_nhi_ops },
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_MTL_P_NHI1),
+         .driver_data = (kernel_ulong_t)&icl_nhi_ops },
++      { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_LNL_NHI0),
++        .driver_data = (kernel_ulong_t)&icl_nhi_ops },
++      { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_LNL_NHI1),
++        .driver_data = (kernel_ulong_t)&icl_nhi_ops },
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_BARLOW_RIDGE_HOST_80G_NHI) },
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_BARLOW_RIDGE_HOST_40G_NHI) },
+diff --git a/drivers/thunderbolt/nhi.h b/drivers/thunderbolt/nhi.h
+index 0f029ce75882..7a07c7c1a9c2 100644
+--- a/drivers/thunderbolt/nhi.h
++++ b/drivers/thunderbolt/nhi.h
+@@ -90,6 +90,8 @@ extern const struct tb_nhi_ops icl_nhi_ops;
+ #define PCI_DEVICE_ID_INTEL_TGL_H_NHI1                        0x9a21
+ #define PCI_DEVICE_ID_INTEL_RPL_NHI0                  0xa73e
+ #define PCI_DEVICE_ID_INTEL_RPL_NHI1                  0xa76d
++#define PCI_DEVICE_ID_INTEL_LNL_NHI0                  0xa833
++#define PCI_DEVICE_ID_INTEL_LNL_NHI1                  0xa834
+ #define PCI_CLASS_SERIAL_USB_USB4                     0x0c0340
+-- 
+2.39.5
+
diff --git a/queue-6.1/thunderbolt-add-support-for-intel-panther-lake-m-p.patch b/queue-6.1/thunderbolt-add-support-for-intel-panther-lake-m-p.patch
new file mode 100644 (file)
index 0000000..b2a21f6
--- /dev/null
@@ -0,0 +1,58 @@
+From bd754a80589488a46490f5ac74d7a7c4834c7e46 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 14 May 2024 10:15:14 +0300
+Subject: thunderbolt: Add support for Intel Panther Lake-M/P
+
+From: Mika Westerberg <mika.westerberg@linux.intel.com>
+
+[ Upstream commit 8644b48714dca8bf2f42a4ff8311de8efc9bd8c3 ]
+
+Intel Panther Lake-M/P has the same integrated Thunderbolt/USB4
+controller as Lunar Lake. Add these PCI IDs to the driver list of
+supported devices.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/thunderbolt/nhi.c | 8 ++++++++
+ drivers/thunderbolt/nhi.h | 4 ++++
+ 2 files changed, 12 insertions(+)
+
+diff --git a/drivers/thunderbolt/nhi.c b/drivers/thunderbolt/nhi.c
+index 5301effa6ab0..56a9222b439a 100644
+--- a/drivers/thunderbolt/nhi.c
++++ b/drivers/thunderbolt/nhi.c
+@@ -1483,6 +1483,14 @@ static struct pci_device_id nhi_ids[] = {
+         .driver_data = (kernel_ulong_t)&icl_nhi_ops },
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_LNL_NHI1),
+         .driver_data = (kernel_ulong_t)&icl_nhi_ops },
++      { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_PTL_M_NHI0),
++        .driver_data = (kernel_ulong_t)&icl_nhi_ops },
++      { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_PTL_M_NHI1),
++        .driver_data = (kernel_ulong_t)&icl_nhi_ops },
++      { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_PTL_P_NHI0),
++        .driver_data = (kernel_ulong_t)&icl_nhi_ops },
++      { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_PTL_P_NHI1),
++        .driver_data = (kernel_ulong_t)&icl_nhi_ops },
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_BARLOW_RIDGE_HOST_80G_NHI) },
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_BARLOW_RIDGE_HOST_40G_NHI) },
+diff --git a/drivers/thunderbolt/nhi.h b/drivers/thunderbolt/nhi.h
+index 7a07c7c1a9c2..16744f25a9a0 100644
+--- a/drivers/thunderbolt/nhi.h
++++ b/drivers/thunderbolt/nhi.h
+@@ -92,6 +92,10 @@ extern const struct tb_nhi_ops icl_nhi_ops;
+ #define PCI_DEVICE_ID_INTEL_RPL_NHI1                  0xa76d
+ #define PCI_DEVICE_ID_INTEL_LNL_NHI0                  0xa833
+ #define PCI_DEVICE_ID_INTEL_LNL_NHI1                  0xa834
++#define PCI_DEVICE_ID_INTEL_PTL_M_NHI0                        0xe333
++#define PCI_DEVICE_ID_INTEL_PTL_M_NHI1                        0xe334
++#define PCI_DEVICE_ID_INTEL_PTL_P_NHI0                        0xe433
++#define PCI_DEVICE_ID_INTEL_PTL_P_NHI1                        0xe434
+ #define PCI_CLASS_SERIAL_USB_USB4                     0x0c0340
+-- 
+2.39.5
+
diff --git a/queue-6.1/thunderbolt-don-t-display-nvm_version-unless-upgrade.patch b/queue-6.1/thunderbolt-don-t-display-nvm_version-unless-upgrade.patch
new file mode 100644 (file)
index 0000000..7344a47
--- /dev/null
@@ -0,0 +1,77 @@
+From 3e5960dbcc95f08c6e945c90c54eadf13c29d13d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 9 Dec 2024 10:25:51 -0600
+Subject: thunderbolt: Don't display nvm_version unless upgrade supported
+
+From: Mario Limonciello <mario.limonciello@amd.com>
+
+[ Upstream commit e34f1717ef0632fcec5cb827e5e0e9f223d70c9b ]
+
+The read will never succeed if NVM wasn't initialized due to an unknown
+format.
+
+Add a new callback for visibility to only show when supported.
+
+Cc: stable@vger.kernel.org
+Fixes: aef9c693e7e5 ("thunderbolt: Move vendor specific NVM handling into nvm.c")
+Reported-by: Richard Hughes <hughsient@gmail.com>
+Closes: https://github.com/fwupd/fwupd/issues/8200
+Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
+Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/thunderbolt/retimer.c | 17 +++++++++++++++--
+ 1 file changed, 15 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/thunderbolt/retimer.c b/drivers/thunderbolt/retimer.c
+index edbd92435b41..5bd5c22a5085 100644
+--- a/drivers/thunderbolt/retimer.c
++++ b/drivers/thunderbolt/retimer.c
+@@ -98,6 +98,7 @@ static int tb_retimer_nvm_add(struct tb_retimer *rt)
+ err_nvm:
+       dev_dbg(&rt->dev, "NVM upgrade disabled\n");
++      rt->no_nvm_upgrade = true;
+       if (!IS_ERR(nvm))
+               tb_nvm_free(nvm);
+@@ -177,8 +178,6 @@ static ssize_t nvm_authenticate_show(struct device *dev,
+       if (!rt->nvm)
+               ret = -EAGAIN;
+-      else if (rt->no_nvm_upgrade)
+-              ret = -EOPNOTSUPP;
+       else
+               ret = sysfs_emit(buf, "%#x\n", rt->auth_status);
+@@ -304,6 +303,19 @@ static ssize_t vendor_show(struct device *dev, struct device_attribute *attr,
+ }
+ static DEVICE_ATTR_RO(vendor);
++static umode_t retimer_is_visible(struct kobject *kobj, struct attribute *attr,
++                                int n)
++{
++      struct device *dev = kobj_to_dev(kobj);
++      struct tb_retimer *rt = tb_to_retimer(dev);
++
++      if (attr == &dev_attr_nvm_authenticate.attr ||
++          attr == &dev_attr_nvm_version.attr)
++              return rt->no_nvm_upgrade ? 0 : attr->mode;
++
++      return attr->mode;
++}
++
+ static struct attribute *retimer_attrs[] = {
+       &dev_attr_device.attr,
+       &dev_attr_nvm_authenticate.attr,
+@@ -313,6 +325,7 @@ static struct attribute *retimer_attrs[] = {
+ };
+ static const struct attribute_group retimer_group = {
++      .is_visible = retimer_is_visible,
+       .attrs = retimer_attrs,
+ };
+-- 
+2.39.5
+
diff --git a/queue-6.1/usb-xhci-limit-stop-endpoint-retries.patch b/queue-6.1/usb-xhci-limit-stop-endpoint-retries.patch
new file mode 100644 (file)
index 0000000..5cea88e
--- /dev/null
@@ -0,0 +1,149 @@
+From 0e468029f37d5fa77e3eb7177993865d71efe3cc Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 6 Nov 2024 12:14:57 +0200
+Subject: usb: xhci: Limit Stop Endpoint retries
+
+From: Michal Pecio <michal.pecio@gmail.com>
+
+[ Upstream commit 42b7581376015c1bbcbe5831f043cd0ac119d028 ]
+
+Some host controllers fail to atomically transition an endpoint to the
+Running state on a doorbell ring and enter a hidden "Restarting" state,
+which looks very much like Stopped, with the important difference that
+it will spontaneously transition to Running anytime soon.
+
+A Stop Endpoint command queued in the Restarting state typically fails
+with Context State Error and the completion handler sees the Endpoint
+Context State as either still Stopped or already Running. Even a case
+of Halted was observed, when an error occurred right after the restart.
+
+The Halted state is already recovered from by resetting the endpoint.
+The Running state is handled by retrying Stop Endpoint.
+
+The Stopped state was recognized as a problem on NEC controllers and
+worked around also by retrying, because the endpoint soon restarts and
+then stops for good. But there is a risk: the command may fail if the
+endpoint is "stopped for good" already, and retries will fail forever.
+
+The possibility of this was not realized at the time, but a number of
+cases were discovered later and reproduced. Some proved difficult to
+deal with, and it is outright impossible to predict if an endpoint may
+fail to ever start at all due to a hardware bug. One such bug (albeit
+on ASM3142, not on NEC) was found to be reliably triggered simply by
+toggling an AX88179 NIC up/down in a tight loop for a few seconds.
+
+An endless retries storm is quite nasty. Besides putting needless load
+on the xHC and CPU, it causes URBs never to be given back, paralyzing
+the device and connection/disconnection logic for the whole bus if the
+device is unplugged. User processes waiting for URBs become unkillable,
+drivers and kworker threads lock up and xhci_hcd cannot be reloaded.
+
+For peace of mind, impose a timeout on Stop Endpoint retries in this
+case. If they don't succeed in 100ms, consider the endpoint stopped
+permanently for some reason and just give back the unlinked URBs. This
+failure case is rare already and work is under way to make it rarer.
+
+Start this work today by also handling one simple case of race with
+Reset Endpoint, because it costs just two lines to implement.
+
+Fixes: fd9d55d190c0 ("xhci: retry Stop Endpoint on buggy NEC controllers")
+CC: stable@vger.kernel.org
+Signed-off-by: Michal Pecio <michal.pecio@gmail.com>
+Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
+Link: https://lore.kernel.org/r/20241106101459.775897-32-mathias.nyman@linux.intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Stable-dep-of: e21ebe51af68 ("xhci: Turn NEC specific quirk for handling Stop Endpoint errors generic")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/usb/host/xhci-ring.c | 28 ++++++++++++++++++++++++----
+ drivers/usb/host/xhci.c      |  2 ++
+ drivers/usb/host/xhci.h      |  1 +
+ 3 files changed, 27 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
+index d193d5ad8789..4a3a8a3fa69d 100644
+--- a/drivers/usb/host/xhci-ring.c
++++ b/drivers/usb/host/xhci-ring.c
+@@ -52,6 +52,7 @@
+  *   endpoint rings; it generates events on the event ring for these.
+  */
++#include <linux/jiffies.h>
+ #include <linux/scatterlist.h>
+ #include <linux/slab.h>
+ #include <linux/dma-mapping.h>
+@@ -1143,16 +1144,35 @@ static void xhci_handle_cmd_stop_ep(struct xhci_hcd *xhci, int slot_id,
+                       return;
+               case EP_STATE_STOPPED:
+                       /*
+-                       * NEC uPD720200 sometimes sets this state and fails with
+-                       * Context Error while continuing to process TRBs.
+-                       * Be conservative and trust EP_CTX_STATE on other chips.
++                       * Per xHCI 4.6.9, Stop Endpoint command on a Stopped
++                       * EP is a Context State Error, and EP stays Stopped.
++                       *
++                       * But maybe it failed on Halted, and somebody ran Reset
++                       * Endpoint later. EP state is now Stopped and EP_HALTED
++                       * still set because Reset EP handler will run after us.
++                       */
++                      if (ep->ep_state & EP_HALTED)
++                              break;
++                      /*
++                       * On some HCs EP state remains Stopped for some tens of
++                       * us to a few ms or more after a doorbell ring, and any
++                       * new Stop Endpoint fails without aborting the restart.
++                       * This handler may run quickly enough to still see this
++                       * Stopped state, but it will soon change to Running.
++                       *
++                       * Assume this bug on unexpected Stop Endpoint failures.
++                       * Keep retrying until the EP starts and stops again, on
++                       * chips where this is known to help. Wait for 100ms.
+                        */
+                       if (!(xhci->quirks & XHCI_NEC_HOST))
+                               break;
++                      if (time_is_before_jiffies(ep->stop_time + msecs_to_jiffies(100)))
++                              break;
+                       fallthrough;
+               case EP_STATE_RUNNING:
+                       /* Race, HW handled stop ep cmd before ep was running */
+-                      xhci_dbg(xhci, "Stop ep completion ctx error, ep is running\n");
++                      xhci_dbg(xhci, "Stop ep completion ctx error, ctx_state %d\n",
++                                      GET_EP_CTX_STATE(ep_ctx));
+                       command = xhci_alloc_command(xhci, false, GFP_ATOMIC);
+                       if (!command) {
+diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c
+index b072154badf3..ae14c7ade9bc 100644
+--- a/drivers/usb/host/xhci.c
++++ b/drivers/usb/host/xhci.c
+@@ -8,6 +8,7 @@
+  * Some code borrowed from the Linux EHCI driver.
+  */
++#include <linux/jiffies.h>
+ #include <linux/pci.h>
+ #include <linux/iommu.h>
+ #include <linux/iopoll.h>
+@@ -1911,6 +1912,7 @@ static int xhci_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status)
+                       ret = -ENOMEM;
+                       goto done;
+               }
++              ep->stop_time = jiffies;
+               ep->ep_state |= EP_STOP_CMD_PENDING;
+               xhci_queue_stop_endpoint(xhci, command, urb->dev->slot_id,
+                                        ep_index, 0);
+diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h
+index 0b526edf636f..a75b8122538d 100644
+--- a/drivers/usb/host/xhci.h
++++ b/drivers/usb/host/xhci.h
+@@ -717,6 +717,7 @@ struct xhci_virt_ep {
+       /* Bandwidth checking storage */
+       struct xhci_bw_info     bw_info;
+       struct list_head        bw_endpoint_list;
++      unsigned long           stop_time;
+       /* Isoch Frame ID checking storage */
+       int                     next_frame_id;
+       /* Use new Isoch TRB layout needed for extended TBC support */
+-- 
+2.39.5
+
diff --git a/queue-6.1/xhci-retry-stop-endpoint-on-buggy-nec-controllers.patch b/queue-6.1/xhci-retry-stop-endpoint-on-buggy-nec-controllers.patch
new file mode 100644 (file)
index 0000000..888f1d5
--- /dev/null
@@ -0,0 +1,57 @@
+From 59cca97285494e3f1a6dbae7da27ea9ffac59244 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 29 Feb 2024 16:14:36 +0200
+Subject: xhci: retry Stop Endpoint on buggy NEC controllers
+
+From: Michal Pecio <michal.pecio@gmail.com>
+
+[ Upstream commit fd9d55d190c0e5fefd3a9165ea361809427885a1 ]
+
+Two NEC uPD720200 adapters have been observed to randomly misbehave:
+a Stop Endpoint command fails with Context Error, the Output Context
+indicates Stopped state, and the endpoint keeps running. Very often,
+Set TR Dequeue Pointer is seen to fail next with Context Error too,
+in addition to problems from unexpectedly completed cancelled work.
+
+The pathology is common on fast running isoc endpoints like uvcvideo,
+but has also been reproduced on a full-speed bulk endpoint of pl2303.
+It seems all EPs are affected, with risk proportional to their load.
+
+Reproduction involves receiving any kind of stream and closing it to
+make the device driver cancel URBs already queued in advance.
+
+Deal with it by retrying the command like in the Running state.
+
+Signed-off-by: Michal Pecio <michal.pecio@gmail.com>
+Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
+Link: https://lore.kernel.org/r/20240229141438.619372-8-mathias.nyman@linux.intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Stable-dep-of: e21ebe51af68 ("xhci: Turn NEC specific quirk for handling Stop Endpoint errors generic")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/usb/host/xhci-ring.c | 9 +++++++++
+ 1 file changed, 9 insertions(+)
+
+diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
+index 975d825091cb..d193d5ad8789 100644
+--- a/drivers/usb/host/xhci-ring.c
++++ b/drivers/usb/host/xhci-ring.c
+@@ -1141,6 +1141,15 @@ static void xhci_handle_cmd_stop_ep(struct xhci_hcd *xhci, int slot_id,
+                               break;
+                       ep->ep_state &= ~EP_STOP_CMD_PENDING;
+                       return;
++              case EP_STATE_STOPPED:
++                      /*
++                       * NEC uPD720200 sometimes sets this state and fails with
++                       * Context Error while continuing to process TRBs.
++                       * Be conservative and trust EP_CTX_STATE on other chips.
++                       */
++                      if (!(xhci->quirks & XHCI_NEC_HOST))
++                              break;
++                      fallthrough;
+               case EP_STATE_RUNNING:
+                       /* Race, HW handled stop ep cmd before ep was running */
+                       xhci_dbg(xhci, "Stop ep completion ctx error, ep is running\n");
+-- 
+2.39.5
+
diff --git a/queue-6.1/xhci-turn-nec-specific-quirk-for-handling-stop-endpo.patch b/queue-6.1/xhci-turn-nec-specific-quirk-for-handling-stop-endpo.patch
new file mode 100644 (file)
index 0000000..c646533
--- /dev/null
@@ -0,0 +1,45 @@
+From 66adfedd9af771d1b229bc869b12fe8b62b43ea1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 17 Dec 2024 12:21:21 +0200
+Subject: xhci: Turn NEC specific quirk for handling Stop Endpoint errors
+ generic
+
+From: Mathias Nyman <mathias.nyman@linux.intel.com>
+
+[ Upstream commit e21ebe51af688eb98fd6269240212a3c7300deea ]
+
+xHC hosts from several vendors have the same issue where endpoints start
+so slowly that a later queued 'Stop Endpoint' command may complete before
+endpoint is up and running.
+
+The 'Stop Endpoint' command fails with context state error as the endpoint
+still appears as  stopped.
+
+See commit 42b758137601 ("usb: xhci: Limit Stop Endpoint retries") for
+details
+
+CC: stable@vger.kernel.org
+Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
+Link: https://lore.kernel.org/r/20241217102122.2316814-2-mathias.nyman@linux.intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/usb/host/xhci-ring.c | 2 --
+ 1 file changed, 2 deletions(-)
+
+diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
+index 4a3a8a3fa69d..e5b2a3b551e3 100644
+--- a/drivers/usb/host/xhci-ring.c
++++ b/drivers/usb/host/xhci-ring.c
+@@ -1164,8 +1164,6 @@ static void xhci_handle_cmd_stop_ep(struct xhci_hcd *xhci, int slot_id,
+                        * Keep retrying until the EP starts and stops again, on
+                        * chips where this is known to help. Wait for 100ms.
+                        */
+-                      if (!(xhci->quirks & XHCI_NEC_HOST))
+-                              break;
+                       if (time_is_before_jiffies(ep->stop_time + msecs_to_jiffies(100)))
+                               break;
+                       fallthrough;
+-- 
+2.39.5
+