]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
i3c: mipi-i3c-hci: Add DMA-mode recovery for internal controller errors
authorAdrian Hunter <adrian.hunter@intel.com>
Wed, 3 Jun 2026 09:07:50 +0000 (12:07 +0300)
committerAlexandre Belloni <alexandre.belloni@bootlin.com>
Sun, 14 Jun 2026 15:21:34 +0000 (17:21 +0200)
Handle internal I3C HCI errors when operating in DMA mode by adding a
simple recovery mechanism.

On detection of an internal controller error, mark recovery as needed and
attempt to restore operation by performing a software reset followed by
state restore.  To keep recovery straightforward on this unlikely error
path, all currently queued transfers are terminated and completed with an
error.

This allows the controller to resume operation after internal failures
rather than remaining permanently stuck.

Note, internal errors indicated by INTR_HC_INTERNAL_ERR, cause the
controller to stop.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Reviewed-by: Frank Li <Frank.Li@nxp.com>
Link: https://patch.msgid.link/20260603090754.16252-14-adrian.hunter@intel.com
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
drivers/i3c/master/mipi-i3c-hci/cmd.h
drivers/i3c/master/mipi-i3c-hci/core.c
drivers/i3c/master/mipi-i3c-hci/dma.c
drivers/i3c/master/mipi-i3c-hci/hci.h

index b1bf87daa65166a8115abcfaebcd61540e29a025..7bada7b4b2de5b0ed7e93ad76ac03ae8f2c7ed23 100644 (file)
@@ -65,4 +65,10 @@ struct hci_cmd_ops {
 extern const struct hci_cmd_ops mipi_i3c_hci_cmd_v1;
 extern const struct hci_cmd_ops mipi_i3c_hci_cmd_v2;
 
+static inline void hci_cmd_set_resp_err(u32 *response, int resp_err)
+{
+       *response &= ~RESP_ERR_FIELD;
+       *response |= FIELD_PREP(RESP_ERR_FIELD, resp_err);
+}
+
 #endif
index 12a0122fb7094da5e53f601b16daddbfb0f8eaf7..69dcf5dad3a522b4b4a12bc7a96ca62e137cc548 100644 (file)
@@ -668,6 +668,7 @@ static irqreturn_t i3c_hci_irq_handler(int irq, void *dev_id)
        if (val & INTR_HC_INTERNAL_ERR) {
                dev_err(&hci->master.dev, "Host Controller Internal Error\n");
                val &= ~INTR_HC_INTERNAL_ERR;
+               hci->recovery_needed = true;
        }
 
        if (val)
index f9023cb3c5a2c859ac197d043946ec262338d035..f39a6ce2aad55aa5c0995360680c09ce54b0eb2a 100644 (file)
@@ -9,6 +9,7 @@
  */
 
 #include <linux/bitfield.h>
+#include <linux/delay.h>
 #include <linux/device.h>
 #include <linux/dma-mapping.h>
 #include <linux/errno.h>
@@ -258,6 +259,10 @@ ring_ready:
        rh_reg_write(RING_CONTROL, RING_CTRL_ENABLE);
        rh_reg_write(RING_CONTROL, RING_CTRL_ENABLE | RING_CTRL_RUN_STOP);
 
+       /*
+        * Do not clear the entries of rh->src_xfers because the recovery uses
+        * them.  In other cases they should be NULL anyway.
+        */
        rh->done_ptr = 0;
        rh->ibi_chunk_ptr = 0;
        rh->xfer_space = rh->xfer_entries;
@@ -362,7 +367,7 @@ static int hci_dma_init(struct i3c_hci *hci)
                rh->resp = dma_alloc_coherent(rings->sysdev, resps_sz,
                                              &rh->resp_dma, GFP_KERNEL);
                rh->src_xfers =
-                       kmalloc_objs(*rh->src_xfers, rh->xfer_entries);
+                       kzalloc_objs(*rh->src_xfers, rh->xfer_entries);
                ret = -ENOMEM;
                if (!rh->xfer || !rh->resp || !rh->src_xfers)
                        goto err_out;
@@ -572,13 +577,15 @@ static void hci_dma_xfer_done(struct i3c_hci *hci, struct hci_rh_data *rh)
                        hci_dma_unmap_xfer(hci, xfer, 1);
                        rh->src_xfers[done_ptr] = NULL;
                        xfer->ring_entry = -1;
-                       xfer->response = resp;
                        if (tid != xfer->cmd_tid) {
                                dev_err(&hci->master.dev,
                                        "response tid=%d when expecting %d\n",
                                        tid, xfer->cmd_tid);
-                               /* TODO: do something about it? */
+                               hci->recovery_needed = true;
+                               if (!RESP_STATUS(resp))
+                                       hci_cmd_set_resp_err(&resp, RESP_ERR_HC_TERMINATED);
                        }
+                       xfer->response = resp;
                        if (xfer == xfer->final_xfer || RESP_STATUS(resp))
                                complete(xfer->final_xfer->completion);
                        if (RESP_STATUS(resp))
@@ -625,6 +632,60 @@ static void hci_dma_unblock_enqueue(struct i3c_hci *hci)
        }
 }
 
+static void hci_dma_error_out_rh(struct i3c_hci *hci, struct hci_rh_data *rh)
+{
+       /*
+        * The entries of rh->src_xfers are not cleared by
+        * i3c_hci_reset_and_restore(), so can be used here.  Do 2 passes so
+        * that the final_xfer of an xfer list is always processed last.
+        */
+       for (int pass = 0; pass < 2; pass++)
+               for (int i = 0; i < rh->xfer_entries; i++) {
+                       struct hci_xfer *xfer = rh->src_xfers[i];
+
+                       if (!xfer || (!pass && xfer == xfer->final_xfer))
+                               continue;
+                       hci_dma_unmap_xfer(hci, xfer, 1);
+                       rh->src_xfers[i] = NULL;
+                       xfer->ring_entry = -1;
+                       hci_cmd_set_resp_err(&xfer->response, RESP_ERR_HC_TERMINATED);
+                       if (xfer == xfer->final_xfer)
+                               complete(xfer->final_xfer->completion);
+               }
+}
+
+static void hci_dma_error_out_all(struct i3c_hci *hci)
+{
+       struct hci_rings_data *rings = hci->io_data;
+
+       for (int i = 0; i < rings->total; i++)
+               hci_dma_error_out_rh(hci, &rings->headers[i]);
+}
+
+static void hci_dma_recovery(struct i3c_hci *hci)
+{
+       int ret;
+
+       dev_err(&hci->master.dev, "Attempting to recover from internal errors\n");
+
+       for (int i = 0; i < 3; i++) {
+               ret = i3c_hci_reset_and_restore(hci);
+               if (!ret)
+                       break;
+               dev_err(&hci->master.dev, "Reset and restore failed, error %d\n", ret);
+               /* Just in case the controller is busy, give it some time */
+               msleep(1000);
+       }
+
+       spin_lock_irq(&hci->lock);
+       hci_dma_error_out_all(hci);
+       hci_dma_unblock_enqueue(hci);
+       hci->recovery_needed = false;
+       spin_unlock_irq(&hci->lock);
+
+       dev_err(&hci->master.dev, "Recovery %s\n", ret ? "failed!" : "done");
+}
+
 static bool hci_dma_dequeue_xfer(struct i3c_hci *hci,
                                 struct hci_xfer *xfer_list, int n)
 {
@@ -640,6 +701,17 @@ static bool hci_dma_dequeue_xfer(struct i3c_hci *hci,
 
        ring_status = rh_reg_read(RING_STATUS);
        if (ring_status & RING_STATUS_RUNNING) {
+               /*
+                * The transfer may have already completed, especially
+                * if recovery has just run.  Do nothing in that case.
+                */
+               hci_dma_xfer_done(hci, rh);
+               if (xfer_list->final_xfer->ring_entry < 0 &&
+                   !hci->recovery_needed && !hci->enqueue_blocked &&
+                   ring_status == (RING_STATUS_ENABLED | RING_STATUS_RUNNING)) {
+                       spin_unlock_irq(&hci->lock);
+                       return false;
+               }
                hci->enqueue_blocked = true;
                spin_unlock_irq(&hci->lock);
                /* stop the ring */
@@ -647,12 +719,8 @@ static bool hci_dma_dequeue_xfer(struct i3c_hci *hci,
                spin_lock_irq(&hci->lock);
                ring_status = rh_reg_read(RING_STATUS);
                if (ring_status & RING_STATUS_RUNNING) {
-                       /*
-                        * We're deep in it if ever this condition is ever met.
-                        * Hardware might still be writing to memory, etc.
-                        */
-                       dev_crit(&hci->master.dev, "unable to abort the ring\n");
-                       WARN_ON(1);
+                       dev_err(&hci->master.dev, "Unable to abort the DMA ring\n");
+                       hci->recovery_needed = true;
                }
        }
 
@@ -662,6 +730,13 @@ static bool hci_dma_dequeue_xfer(struct i3c_hci *hci,
 
        hci_dma_xfer_done(hci, rh);
 
+       if (hci->recovery_needed) {
+               hci->enqueue_blocked = true;
+               spin_unlock_irq(&hci->lock);
+               hci_dma_recovery(hci);
+               return true;
+       }
+
        for (i = 0; i < n; i++) {
                struct hci_xfer *xfer = xfer_list + i;
                int idx = xfer->ring_entry;
index a3151c26827e4ccc6c6b1b93bbe03b886761b2fc..4bf2c66c97b47b3adec4b35792dcc5a9c1391094 100644 (file)
@@ -55,6 +55,7 @@ struct i3c_hci {
        atomic_t next_cmd_tid;
        bool irq_inactive;
        bool enqueue_blocked;
+       bool recovery_needed;
        wait_queue_head_t enqueue_wait_queue;
        u32 caps;
        unsigned int quirks;