]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
accel/habanalabs: revise print on EQ heartbeat failure
authorTomer Tayar <ttayar@habana.ai>
Tue, 16 Apr 2024 14:01:12 +0000 (17:01 +0300)
committerOfir Bitton <obitton@habana.ai>
Sun, 23 Jun 2024 06:53:32 +0000 (09:53 +0300)
Don't print the "previous EQ index" value in case of a EQ heartbeat
failure, because it is incremented along with the EQ CI and therefore
redundant.

In addition, as the CPU-CP PI is zeroed when it reaches a value that is
twice the queue size, add a value of the CI with a similar wrap around,
to make it easier to compare the values.

Signed-off-by: Tomer Tayar <ttayar@habana.ai>
Reviewed-by: Ofir Bitton <obitton@habana.ai>
Signed-off-by: Ofir Bitton <obitton@habana.ai>
drivers/accel/habanalabs/common/device.c

index 2fa6bf4c97afc39e596380afb2d54fc005fd8860..3efc26dd9497127fd51339534d679fdbd4bc905d 100644 (file)
@@ -1064,23 +1064,24 @@ static bool is_pci_link_healthy(struct hl_device *hdev)
 
 static bool hl_device_eq_heartbeat_received(struct hl_device *hdev)
 {
+       struct eq_heartbeat_debug_info *heartbeat_debug_info = &hdev->heartbeat_debug_info;
+       u32 cpu_q_id = heartbeat_debug_info->cpu_queue_id, pq_pi_mask = (HL_QUEUE_LENGTH << 1) - 1;
        struct asic_fixed_properties *prop = &hdev->asic_prop;
-       u32 cpu_q_id;
 
        if (!prop->cpucp_info.eq_health_check_supported)
                return true;
 
        if (!hdev->eq_heartbeat_received) {
-               cpu_q_id = hdev->heartbeat_debug_info.cpu_queue_id;
-
                dev_err(hdev->dev, "EQ heartbeat event was not received!\n");
 
-               dev_err(hdev->dev, "Heartbeat events counter: %u, Q_PI: %u, Q_CI: %u, EQ CI: %u, EQ prev: %u\n",
-                               hdev->heartbeat_debug_info.heartbeat_event_counter,
-                               hdev->kernel_queues[cpu_q_id].pi,
-                               atomic_read(&hdev->kernel_queues[cpu_q_id].ci),
-                               hdev->event_queue.ci,
-                               hdev->event_queue.prev_eqe_index);
+               dev_err(hdev->dev,
+                       "Heartbeat events counter: %u, EQ CI: %u, PQ PI: %u, PQ CI: %u (%u)\n",
+                       heartbeat_debug_info->heartbeat_event_counter,
+                       hdev->event_queue.ci,
+                       hdev->kernel_queues[cpu_q_id].pi,
+                       atomic_read(&hdev->kernel_queues[cpu_q_id].ci),
+                       atomic_read(&hdev->kernel_queues[cpu_q_id].ci) & pq_pi_mask);
+
                return false;
        }