]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
accel/habanalabs/gaudi2: assume hard-reset by FW upon MC SEI severe error
authorTomer Tayar <ttayar@habana.ai>
Sun, 10 Mar 2024 10:18:35 +0000 (12:18 +0200)
committerOfir Bitton <obitton@habana.ai>
Sun, 23 Jun 2024 06:53:03 +0000 (09:53 +0300)
FW initiates a hard reset upon an MC SEI severe error.
Align the driver to expect this reset and avoid accessing the device
until the reset is done.

Signed-off-by: Tomer Tayar <ttayar@habana.ai>
Reviewed-by: Ofir Bitton <obitton@habana.ai>
Signed-off-by: Ofir Bitton <obitton@habana.ai>
drivers/accel/habanalabs/gaudi2/gaudi2.c

index 18cc7b773650957872d1304c28c369c650d17d7d..4791582d157ce9d515cbf0c161b026afa0b55972 100644 (file)
@@ -10004,6 +10004,7 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent
                if (gaudi2_handle_hbm_mc_sei_err(hdev, event_type, &eq_entry->sei_data)) {
                        reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
                        reset_required = true;
+                       is_critical = eq_entry->sei_data.hdr.is_critical;
                }
                error_count++;
                break;
@@ -10235,8 +10236,7 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent
                gaudi2_print_event(hdev, event_type, true,
                                "No error cause for H/W event %u", event_type);
 
-       if ((gaudi2_irq_map_table[event_type].reset != EVENT_RESET_TYPE_NONE) ||
-                               reset_required) {
+       if ((gaudi2_irq_map_table[event_type].reset != EVENT_RESET_TYPE_NONE) || reset_required) {
                if (reset_required ||
                                (gaudi2_irq_map_table[event_type].reset == EVENT_RESET_TYPE_HARD))
                        reset_flags |= HL_DRV_RESET_HARD;