]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
accel/habanalabs: add more info upon cpu pkt timeout
authorFarah Kassabri <fkassabri@habana.ai>
Tue, 9 Apr 2024 11:46:19 +0000 (14:46 +0300)
committerOfir Bitton <obitton@habana.ai>
Sun, 23 Jun 2024 06:53:32 +0000 (09:53 +0300)
In order to have better debuggability upon encountering FW issues,
We are adding additional info once CPU packet timeout expires.

Signed-off-by: Farah Kassabri <fkassabri@habana.ai>
Reviewed-by: Ofir Bitton <obitton@habana.ai>
Signed-off-by: Ofir Bitton <obitton@habana.ai>
drivers/accel/habanalabs/common/firmware_if.c

index 6f0c40b12072f96a881fa4dc31cfb33835ca9c14..3cd8a1f699805e10234c6e1ad5bbf6dc0fba0c1a 100644 (file)
@@ -460,11 +460,19 @@ int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg,
                /* If FW performed reset just before sending it a packet, we will get a timeout.
                 * This is expected behavior, hence no need for error message.
                 */
-               if (!hl_device_operational(hdev, NULL) && !hdev->reset_info.in_compute_reset)
+               if (!hl_device_operational(hdev, NULL) && !hdev->reset_info.in_compute_reset) {
                        dev_dbg(hdev->dev, "Device CPU packet timeout (0x%x) due to FW reset\n",
                                        tmp);
-               else
-                       dev_err(hdev->dev, "Device CPU packet timeout (status = 0x%x)\n", tmp);
+               } else {
+                       struct hl_bd *bd = queue->kernel_address;
+
+                       bd += hl_pi_2_offset(queue->pi);
+
+                       dev_err(hdev->dev, "Device CPU packet timeout (status = 0x%x)\n"
+                                       "Pkt info: dma_addr: 0x%llx, kernel_addr: %p, len:0x%x, ctl: 0x%x, ptr:0x%llx, dram_bd:%u\n",
+                                       tmp, pkt_dma_addr, (void *)pkt, bd->len, bd->ctl, bd->ptr,
+                                       queue->dram_bd);
+               }
                hdev->device_cpu_disabled = true;
                goto out;
        }