xhci-don-t-perform-soft-retry-for-etron-xhci-host.patch
xhci-don-t-issue-reset-device-command-to-etron-xhci-host.patch
bluetooth-fix-type-of-len-in-rfcomm_sock_getsockopt-_old.patch
+usb-xhci-limit-stop-endpoint-retries.patch
+usb-xhci-fix-td-invalidation-under-pending-set-tr-dequeue.patch
+usb-xhci-avoid-queuing-redundant-stop-endpoint-commands.patch
--- /dev/null
+From 474538b8dd1cd9c666e56cfe8ef60fbb0fb513f4 Mon Sep 17 00:00:00 2001
+From: Michal Pecio <michal.pecio@gmail.com>
+Date: Wed, 6 Nov 2024 12:14:59 +0200
+Subject: usb: xhci: Avoid queuing redundant Stop Endpoint commands
+
+From: Michal Pecio <michal.pecio@gmail.com>
+
+commit 474538b8dd1cd9c666e56cfe8ef60fbb0fb513f4 upstream.
+
+Stop Endpoint command on an already stopped endpoint fails and may be
+misinterpreted as a known hardware bug by the completion handler. This
+results in an unnecessary delay with repeated retries of the command.
+
+Avoid queuing this command when endpoint state flags indicate that it's
+stopped or halted and the command will fail. If commands are pending on
+the endpoint, their completion handlers will process cancelled TDs so
+it's done. In case of waiting for external operations like clearing TT
+buffer, the endpoint is stopped and cancelled TDs can be processed now.
+
+This eliminates practically all unnecessary retries because an endpoint
+with pending URBs is maintained in Running state by the driver, unless
+aforementioned commands or other operations are pending on it. This is
+guaranteed by xhci_ring_ep_doorbell() and by the fact that it is called
+every time any of those operations completes.
+
+The only known exceptions are hardware bugs (the endpoint never starts
+at all) and Stream Protocol errors not associated with any TRB, which
+cause an endpoint reset not followed by restart. Sounds like a bug.
+
+Generally, these retries are only expected to happen when the endpoint
+fails to start for unknown/no reason, which is a worse problem itself,
+and fixing the bug eliminates the retries too.
+
+All cases were tested and found to work as expected. SET_DEQ_PENDING
+was produced by patching uvcvideo to unlink URBs in 100us intervals,
+which then runs into this case very often. EP_HALTED was produced by
+restarting 'cat /dev/ttyUSB0' on a serial dongle with broken cable.
+EP_CLEARING_TT by the same, with the dongle on an external hub.
+
+Fixes: fd9d55d190c0 ("xhci: retry Stop Endpoint on buggy NEC controllers")
+CC: stable@vger.kernel.org
+Signed-off-by: Michal Pecio <michal.pecio@gmail.com>
+Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
+Link: https://lore.kernel.org/r/20241106101459.775897-34-mathias.nyman@linux.intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/usb/host/xhci-ring.c | 13 +++++++++++++
+ drivers/usb/host/xhci.c | 19 +++++++++++++++----
+ drivers/usb/host/xhci.h | 1 +
+ 3 files changed, 29 insertions(+), 4 deletions(-)
+
+--- a/drivers/usb/host/xhci-ring.c
++++ b/drivers/usb/host/xhci-ring.c
+@@ -1070,6 +1070,19 @@ static int xhci_invalidate_cancelled_tds
+ }
+
+ /*
++ * Erase queued TDs from transfer ring(s) and give back those the xHC didn't
++ * stop on. If necessary, queue commands to move the xHC off cancelled TDs it
++ * stopped on. Those will be given back later when the commands complete.
++ *
++ * Call under xhci->lock on a stopped endpoint.
++ */
++void xhci_process_cancelled_tds(struct xhci_virt_ep *ep)
++{
++ xhci_invalidate_cancelled_tds(ep);
++ xhci_giveback_invalidated_tds(ep);
++}
++
++/*
+ * Returns the TD the endpoint ring halted on.
+ * Only call for non-running rings without streams.
+ */
+--- a/drivers/usb/host/xhci.c
++++ b/drivers/usb/host/xhci.c
+@@ -1769,10 +1769,21 @@ static int xhci_urb_dequeue(struct usb_h
+ }
+ }
+
+- /* Queue a stop endpoint command, but only if this is
+- * the first cancellation to be handled.
+- */
+- if (!(ep->ep_state & EP_STOP_CMD_PENDING)) {
++ /* These completion handlers will sort out cancelled TDs for us */
++ if (ep->ep_state & (EP_STOP_CMD_PENDING | EP_HALTED | SET_DEQ_PENDING)) {
++ xhci_dbg(xhci, "Not queuing Stop Endpoint on slot %d ep %d in state 0x%x\n",
++ urb->dev->slot_id, ep_index, ep->ep_state);
++ goto done;
++ }
++
++ /* In this case no commands are pending but the endpoint is stopped */
++ if (ep->ep_state & EP_CLEARING_TT) {
++ /* and cancelled TDs can be given back right away */
++ xhci_dbg(xhci, "Invalidating TDs instantly on slot %d ep %d in state 0x%x\n",
++ urb->dev->slot_id, ep_index, ep->ep_state);
++ xhci_process_cancelled_tds(ep);
++ } else {
++ /* Otherwise, queue a new Stop Endpoint command */
+ command = xhci_alloc_command(xhci, false, GFP_ATOMIC);
+ if (!command) {
+ ret = -ENOMEM;
+--- a/drivers/usb/host/xhci.h
++++ b/drivers/usb/host/xhci.h
+@@ -1921,6 +1921,7 @@ void xhci_ring_doorbell_for_active_rings
+ void xhci_cleanup_command_queue(struct xhci_hcd *xhci);
+ void inc_deq(struct xhci_hcd *xhci, struct xhci_ring *ring);
+ unsigned int count_trbs(u64 addr, u64 len);
++void xhci_process_cancelled_tds(struct xhci_virt_ep *ep);
+
+ /* xHCI roothub code */
+ void xhci_set_link_state(struct xhci_hcd *xhci, struct xhci_port *port,
--- /dev/null
+From 484c3bab2d5dfa13ff659a51a06e9a393141eefc Mon Sep 17 00:00:00 2001
+From: Michal Pecio <michal.pecio@gmail.com>
+Date: Wed, 6 Nov 2024 12:14:58 +0200
+Subject: usb: xhci: Fix TD invalidation under pending Set TR Dequeue
+
+From: Michal Pecio <michal.pecio@gmail.com>
+
+commit 484c3bab2d5dfa13ff659a51a06e9a393141eefc upstream.
+
+xhci_invalidate_cancelled_tds() may not work correctly if the hardware
+is modifying endpoint or stream contexts at the same time by executing
+a Set TR Dequeue command. And even if it worked, it would be unable to
+queue Set TR Dequeue for the next stream, failing to clear xHC cache.
+
+On stream endpoints, a chain of Set TR Dequeue commands may take some
+time to execute and we may want to cancel more TDs during this time.
+Currently this leads to Stop Endpoint completion handler calling this
+function without testing for SET_DEQ_PENDING, which will trigger the
+aforementioned problems when it happens.
+
+On all endpoints, a halt condition causes Reset Endpoint to be queued
+and an error status given to the class driver, which may unlink more
+URBs in response. Stop Endpoint is queued and its handler may execute
+concurrently with Set TR Dequeue queued by Reset Endpoint handler.
+
+(Reset Endpoint handler calls this function too, but there seems to
+be no possibility of it running concurrently with Set TR Dequeue).
+
+Fix xhci_invalidate_cancelled_tds() to work correctly under a pending
+Set TR Dequeue. Bail out of the function when SET_DEQ_PENDING is set,
+then make the completion handler call the function again and also call
+xhci_giveback_invalidated_tds(), which needs to be called next.
+
+This seems to fix another potential bug, where the handler would call
+xhci_invalidate_cancelled_tds(), which may clear some deferred TDs if
+a sanity check fails, and the TDs wouldn't be given back promptly.
+
+Said sanity check seems to be wrong and prone to false positives when
+the endpoint halts, but fixing it is beyond the scope of this change,
+besides ensuring that cleared TDs are given back properly.
+
+Fixes: 5ceac4402f5d ("xhci: Handle TD clearing for multiple streams case")
+CC: stable@vger.kernel.org
+Signed-off-by: Michal Pecio <michal.pecio@gmail.com>
+Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
+Link: https://lore.kernel.org/r/20241106101459.775897-33-mathias.nyman@linux.intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/usb/host/xhci-ring.c | 18 +++++++++++++-----
+ 1 file changed, 13 insertions(+), 5 deletions(-)
+
+--- a/drivers/usb/host/xhci-ring.c
++++ b/drivers/usb/host/xhci-ring.c
+@@ -973,6 +973,13 @@ static int xhci_invalidate_cancelled_tds
+ unsigned int slot_id = ep->vdev->slot_id;
+ int err;
+
++ /*
++ * This is not going to work if the hardware is changing its dequeue
++ * pointers as we look at them. Completion handler will call us later.
++ */
++ if (ep->ep_state & SET_DEQ_PENDING)
++ return 0;
++
+ xhci = ep->xhci;
+
+ list_for_each_entry_safe(td, tmp_td, &ep->cancelled_td_list, cancelled_td_list) {
+@@ -1359,7 +1366,6 @@ static void xhci_handle_cmd_set_deq(stru
+ struct xhci_ep_ctx *ep_ctx;
+ struct xhci_slot_ctx *slot_ctx;
+ struct xhci_td *td, *tmp_td;
+- bool deferred = false;
+
+ ep_index = TRB_TO_EP_INDEX(le32_to_cpu(trb->generic.field[3]));
+ stream_id = TRB_TO_STREAM_ID(le32_to_cpu(trb->generic.field[2]));
+@@ -1460,8 +1466,6 @@ static void xhci_handle_cmd_set_deq(stru
+ xhci_dbg(ep->xhci, "%s: Giveback cancelled URB %p TD\n",
+ __func__, td->urb);
+ xhci_td_cleanup(ep->xhci, td, ep_ring, td->status);
+- } else if (td->cancel_status == TD_CLEARING_CACHE_DEFERRED) {
+- deferred = true;
+ } else {
+ xhci_dbg(ep->xhci, "%s: Keep cancelled URB %p TD as cancel_status is %d\n",
+ __func__, td->urb, td->cancel_status);
+@@ -1472,11 +1476,15 @@ cleanup:
+ ep->queued_deq_seg = NULL;
+ ep->queued_deq_ptr = NULL;
+
+- if (deferred) {
+- /* We have more streams to clear */
++ /* Check for deferred or newly cancelled TDs */
++ if (!list_empty(&ep->cancelled_td_list)) {
+ xhci_dbg(ep->xhci, "%s: Pending TDs to clear, continuing with invalidation\n",
+ __func__);
+ xhci_invalidate_cancelled_tds(ep);
++ /* Try to restart the endpoint if all is done */
++ ring_doorbell_for_active_rings(xhci, slot_id, ep_index);
++ /* Start giving back any TDs invalidated above */
++ xhci_giveback_invalidated_tds(ep);
+ } else {
+ /* Restart any rings with pending URBs */
+ xhci_dbg(ep->xhci, "%s: All TDs cleared, ring doorbell\n", __func__);
--- /dev/null
+From 42b7581376015c1bbcbe5831f043cd0ac119d028 Mon Sep 17 00:00:00 2001
+From: Michal Pecio <michal.pecio@gmail.com>
+Date: Wed, 6 Nov 2024 12:14:57 +0200
+Subject: usb: xhci: Limit Stop Endpoint retries
+
+From: Michal Pecio <michal.pecio@gmail.com>
+
+commit 42b7581376015c1bbcbe5831f043cd0ac119d028 upstream.
+
+Some host controllers fail to atomically transition an endpoint to the
+Running state on a doorbell ring and enter a hidden "Restarting" state,
+which looks very much like Stopped, with the important difference that
+it will spontaneously transition to Running anytime soon.
+
+A Stop Endpoint command queued in the Restarting state typically fails
+with Context State Error and the completion handler sees the Endpoint
+Context State as either still Stopped or already Running. Even a case
+of Halted was observed, when an error occurred right after the restart.
+
+The Halted state is already recovered from by resetting the endpoint.
+The Running state is handled by retrying Stop Endpoint.
+
+The Stopped state was recognized as a problem on NEC controllers and
+worked around also by retrying, because the endpoint soon restarts and
+then stops for good. But there is a risk: the command may fail if the
+endpoint is "stopped for good" already, and retries will fail forever.
+
+The possibility of this was not realized at the time, but a number of
+cases were discovered later and reproduced. Some proved difficult to
+deal with, and it is outright impossible to predict if an endpoint may
+fail to ever start at all due to a hardware bug. One such bug (albeit
+on ASM3142, not on NEC) was found to be reliably triggered simply by
+toggling an AX88179 NIC up/down in a tight loop for a few seconds.
+
+An endless retries storm is quite nasty. Besides putting needless load
+on the xHC and CPU, it causes URBs never to be given back, paralyzing
+the device and connection/disconnection logic for the whole bus if the
+device is unplugged. User processes waiting for URBs become unkillable,
+drivers and kworker threads lock up and xhci_hcd cannot be reloaded.
+
+For peace of mind, impose a timeout on Stop Endpoint retries in this
+case. If they don't succeed in 100ms, consider the endpoint stopped
+permanently for some reason and just give back the unlinked URBs. This
+failure case is rare already and work is under way to make it rarer.
+
+Start this work today by also handling one simple case of race with
+Reset Endpoint, because it costs just two lines to implement.
+
+Fixes: fd9d55d190c0 ("xhci: retry Stop Endpoint on buggy NEC controllers")
+CC: stable@vger.kernel.org
+Signed-off-by: Michal Pecio <michal.pecio@gmail.com>
+Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
+Link: https://lore.kernel.org/r/20241106101459.775897-32-mathias.nyman@linux.intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/usb/host/xhci-ring.c | 28 ++++++++++++++++++++++++----
+ drivers/usb/host/xhci.c | 2 ++
+ drivers/usb/host/xhci.h | 1 +
+ 3 files changed, 27 insertions(+), 4 deletions(-)
+
+--- a/drivers/usb/host/xhci-ring.c
++++ b/drivers/usb/host/xhci-ring.c
+@@ -52,6 +52,7 @@
+ * endpoint rings; it generates events on the event ring for these.
+ */
+
++#include <linux/jiffies.h>
+ #include <linux/scatterlist.h>
+ #include <linux/slab.h>
+ #include <linux/dma-mapping.h>
+@@ -1151,16 +1152,35 @@ static void xhci_handle_cmd_stop_ep(stru
+ return;
+ case EP_STATE_STOPPED:
+ /*
+- * NEC uPD720200 sometimes sets this state and fails with
+- * Context Error while continuing to process TRBs.
+- * Be conservative and trust EP_CTX_STATE on other chips.
++ * Per xHCI 4.6.9, Stop Endpoint command on a Stopped
++ * EP is a Context State Error, and EP stays Stopped.
++ *
++ * But maybe it failed on Halted, and somebody ran Reset
++ * Endpoint later. EP state is now Stopped and EP_HALTED
++ * still set because Reset EP handler will run after us.
++ */
++ if (ep->ep_state & EP_HALTED)
++ break;
++ /*
++ * On some HCs EP state remains Stopped for some tens of
++ * us to a few ms or more after a doorbell ring, and any
++ * new Stop Endpoint fails without aborting the restart.
++ * This handler may run quickly enough to still see this
++ * Stopped state, but it will soon change to Running.
++ *
++ * Assume this bug on unexpected Stop Endpoint failures.
++ * Keep retrying until the EP starts and stops again, on
++ * chips where this is known to help. Wait for 100ms.
+ */
+ if (!(xhci->quirks & XHCI_NEC_HOST))
+ break;
++ if (time_is_before_jiffies(ep->stop_time + msecs_to_jiffies(100)))
++ break;
+ fallthrough;
+ case EP_STATE_RUNNING:
+ /* Race, HW handled stop ep cmd before ep was running */
+- xhci_dbg(xhci, "Stop ep completion ctx error, ep is running\n");
++ xhci_dbg(xhci, "Stop ep completion ctx error, ctx_state %d\n",
++ GET_EP_CTX_STATE(ep_ctx));
+
+ command = xhci_alloc_command(xhci, false, GFP_ATOMIC);
+ if (!command) {
+--- a/drivers/usb/host/xhci.c
++++ b/drivers/usb/host/xhci.c
+@@ -8,6 +8,7 @@
+ * Some code borrowed from the Linux EHCI driver.
+ */
+
++#include <linux/jiffies.h>
+ #include <linux/pci.h>
+ #include <linux/iommu.h>
+ #include <linux/iopoll.h>
+@@ -1777,6 +1778,7 @@ static int xhci_urb_dequeue(struct usb_h
+ ret = -ENOMEM;
+ goto done;
+ }
++ ep->stop_time = jiffies;
+ ep->ep_state |= EP_STOP_CMD_PENDING;
+ xhci_queue_stop_endpoint(xhci, command, urb->dev->slot_id,
+ ep_index, 0);
+--- a/drivers/usb/host/xhci.h
++++ b/drivers/usb/host/xhci.h
+@@ -690,6 +690,7 @@ struct xhci_virt_ep {
+ /* Bandwidth checking storage */
+ struct xhci_bw_info bw_info;
+ struct list_head bw_endpoint_list;
++ unsigned long stop_time;
+ /* Isoch Frame ID checking storage */
+ int next_frame_id;
+ /* Use new Isoch TRB layout needed for extended TBC support */