]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
accel/habanalabs: change the heartbeat scheduling point
authorFarah Kassabri <fkassabri@habana.ai>
Thu, 4 Apr 2024 08:06:03 +0000 (11:06 +0300)
committerOfir Bitton <obitton@habana.ai>
Sun, 23 Jun 2024 06:53:04 +0000 (09:53 +0300)
Currently we schedule the heartbeat thread at late init, only then
we set the INTS_REGISTER packet which enables events to be received
from firmware.

Init may take some time and we want to give firmware 2 full cycles of
heartbeat thread after it received INTS_REGISTER.

The patch will move the heartbeat thread scheduling to be after driver
is done with all initializations.

Signed-off-by: Farah Kassabri <fkassabri@habana.ai>
Reviewed-by: Ofir Bitton <obitton@habana.ai>
Signed-off-by: Ofir Bitton <obitton@habana.ai>
drivers/accel/habanalabs/common/device.c

index 31daa918453740de6f9d9e571b3584b3ce65ae10..5ca7014def0098243c91a8c4983e731a2e7ebb73 100644 (file)
@@ -1150,23 +1150,6 @@ static int device_late_init(struct hl_device *hdev)
        }
 
        hdev->high_pll = hdev->asic_prop.high_pll;
-
-       if (hdev->heartbeat) {
-               hdev->heartbeat_debug_info.heartbeat_event_counter = 0;
-
-               /*
-                * Before scheduling the heartbeat driver will check if eq event has received.
-                * for the first schedule we need to set the indication as true then for the next
-                * one this indication will be true only if eq event was sent by FW.
-                */
-               hdev->eq_heartbeat_received = true;
-
-               INIT_DELAYED_WORK(&hdev->work_heartbeat, hl_device_heartbeat);
-
-               schedule_delayed_work(&hdev->work_heartbeat,
-                               usecs_to_jiffies(HL_HEARTBEAT_PER_USEC));
-       }
-
        hdev->late_init_done = true;
 
        return 0;
@@ -1183,9 +1166,6 @@ static void device_late_fini(struct hl_device *hdev)
        if (!hdev->late_init_done)
                return;
 
-       if (hdev->heartbeat)
-               cancel_delayed_work_sync(&hdev->work_heartbeat);
-
        if (hdev->asic_funcs->late_fini)
                hdev->asic_funcs->late_fini(hdev);
 
@@ -1286,8 +1266,12 @@ static void hl_abort_waiting_for_completions(struct hl_device *hdev)
 static void cleanup_resources(struct hl_device *hdev, bool hard_reset, bool fw_reset,
                                bool skip_wq_flush)
 {
-       if (hard_reset)
+       if (hard_reset) {
+               if (hdev->heartbeat)
+                       cancel_delayed_work_sync(&hdev->work_heartbeat);
+
                device_late_fini(hdev);
+       }
 
        /*
         * Halt the engines and disable interrupts so we won't get any more
@@ -1565,6 +1549,26 @@ static void handle_reset_trigger(struct hl_device *hdev, u32 flags)
        }
 }
 
+static inline void device_heartbeat_schedule(struct hl_device *hdev)
+{
+       if (!hdev->heartbeat)
+               return;
+
+       hdev->heartbeat_debug_info.heartbeat_event_counter = 0;
+
+       /*
+        * Before scheduling the heartbeat driver will check if eq event has received.
+        * for the first schedule we need to set the indication as true then for the next
+        * one this indication will be true only if eq event was sent by FW.
+        */
+       hdev->eq_heartbeat_received = true;
+
+       INIT_DELAYED_WORK(&hdev->work_heartbeat, hl_device_heartbeat);
+
+       schedule_delayed_work(&hdev->work_heartbeat,
+                       usecs_to_jiffies(HL_HEARTBEAT_PER_USEC));
+}
+
 /*
  * hl_device_reset - reset the device
  *
@@ -1934,6 +1938,8 @@ kill_processes:
        if (hard_reset) {
                hdev->reset_info.hard_reset_cnt++;
 
+               device_heartbeat_schedule(hdev);
+
                /* After reset is done, we are ready to receive events from
                 * the F/W. We can't do it before because we will ignore events
                 * and if those events are fatal, we won't know about it and
@@ -2368,6 +2374,12 @@ int hl_device_init(struct hl_device *hdev)
                goto out_disabled;
        }
 
+       /* Scheduling the EQ heartbeat thread must come after driver is done with all
+        * initializations, as we want to make sure the FW gets enough time to be prepared
+        * to respond to heartbeat packets.
+        */
+       device_heartbeat_schedule(hdev);
+
        dev_notice(hdev->dev,
                "Successfully added device %s to habanalabs driver\n",
                dev_name(&(hdev)->pdev->dev));