]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
accel/habanalabs: check for errors after preboot is ready
authorFarah Kassabri <fkassabri@habana.ai>
Sun, 3 Mar 2024 16:06:11 +0000 (18:06 +0200)
committerOfir Bitton <obitton@habana.ai>
Sun, 23 Jun 2024 06:53:03 +0000 (09:53 +0300)
Driver should check and report any fatal errors detected by preboot,
before it attempts to load the boot fit.
Some errors may cause the driver to stop the boot process and mark
the device as unusable.
This check will allow the driver to fail and print the error reported
by preboot and skip the time wasting attempt of trying to load the
boot fit, which will fail due to the error.

Signed-off-by: Farah Kassabri <fkassabri@habana.ai>
Reviewed-by: Ofir Bitton <obitton@habana.ai>
Signed-off-by: Ofir Bitton <obitton@habana.ai>
drivers/accel/habanalabs/common/firmware_if.c

index 886b3c07503d9c7a0c26180a8254c5d92a74a1f5..6f0c40b12072f96a881fa4dc31cfb33835ca9c14 100644 (file)
@@ -1482,7 +1482,7 @@ int hl_fw_wait_preboot_ready(struct hl_device *hdev)
 {
        struct pre_fw_load_props *pre_fw_load = &hdev->fw_loader.pre_fw_load;
        u32 status = 0, timeout;
-       int rc, tries = 1;
+       int rc, tries = 1, fw_err = 0;
        bool preboot_still_runs;
 
        /* Need to check two possible scenarios:
@@ -1522,18 +1522,18 @@ retry:
                }
        }
 
-       if (rc) {
+       /* If we read all FF, then something is totally wrong, no point
+        * of reading specific errors
+        */
+       if (status != -1)
+               fw_err = fw_read_errors(hdev, pre_fw_load->boot_err0_reg,
+                                       pre_fw_load->boot_err1_reg,
+                                       pre_fw_load->sts_boot_dev_sts0_reg,
+                                       pre_fw_load->sts_boot_dev_sts1_reg);
+       if (rc || fw_err) {
                detect_cpu_boot_status(hdev, status);
-               dev_err(hdev->dev, "CPU boot ready timeout (status = %d)\n", status);
-
-               /* If we read all FF, then something is totally wrong, no point
-                * of reading specific errors
-                */
-               if (status != -1)
-                       fw_read_errors(hdev, pre_fw_load->boot_err0_reg,
-                                               pre_fw_load->boot_err1_reg,
-                                               pre_fw_load->sts_boot_dev_sts0_reg,
-                                               pre_fw_load->sts_boot_dev_sts1_reg);
+               dev_err(hdev->dev, "CPU boot %s (status = %d)\n",
+                               fw_err ? "failed due to an error" : "ready timeout", status);
                return -EIO;
        }