From c8c10dcacafce3b6fb0238479b5e28e92b0366da Mon Sep 17 00:00:00 2001 From: Tomer Tayar Date: Sun, 10 Mar 2024 12:18:35 +0200 Subject: [PATCH] accel/habanalabs/gaudi2: assume hard-reset by FW upon MC SEI severe error FW initiates a hard reset upon an MC SEI severe error. Align the driver to expect this reset and avoid accessing the device until the reset is done. Signed-off-by: Tomer Tayar Reviewed-by: Ofir Bitton Signed-off-by: Ofir Bitton --- drivers/accel/habanalabs/gaudi2/gaudi2.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/accel/habanalabs/gaudi2/gaudi2.c b/drivers/accel/habanalabs/gaudi2/gaudi2.c index 18cc7b7736509..4791582d157ce 100644 --- a/drivers/accel/habanalabs/gaudi2/gaudi2.c +++ b/drivers/accel/habanalabs/gaudi2/gaudi2.c @@ -10004,6 +10004,7 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent if (gaudi2_handle_hbm_mc_sei_err(hdev, event_type, &eq_entry->sei_data)) { reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; reset_required = true; + is_critical = eq_entry->sei_data.hdr.is_critical; } error_count++; break; @@ -10235,8 +10236,7 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent gaudi2_print_event(hdev, event_type, true, "No error cause for H/W event %u", event_type); - if ((gaudi2_irq_map_table[event_type].reset != EVENT_RESET_TYPE_NONE) || - reset_required) { + if ((gaudi2_irq_map_table[event_type].reset != EVENT_RESET_TYPE_NONE) || reset_required) { if (reset_required || (gaudi2_irq_map_table[event_type].reset == EVENT_RESET_TYPE_HARD)) reset_flags |= HL_DRV_RESET_HARD; -- 2.39.5