]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
accel/habanalabs: add a common handler for clock change events
authorTomer Tayar <ttayar@habana.ai>
Tue, 12 Mar 2024 14:48:42 +0000 (16:48 +0200)
committerOfir Bitton <obitton@habana.ai>
Sun, 23 Jun 2024 06:53:04 +0000 (09:53 +0300)
As the new dynamic EQ includes clock change events which are common and
not ASIC-specific, add a common handler for these events.

Signed-off-by: Tomer Tayar <ttayar@habana.ai>
Reviewed-by: Ofir Bitton <obitton@habana.ai>
Signed-off-by: Ofir Bitton <obitton@habana.ai>
drivers/accel/habanalabs/common/device.c
drivers/accel/habanalabs/common/habanalabs.h

index fd117489a05ae9c598c59f52a7efe17c6977c723..31daa918453740de6f9d9e571b3584b3ce65ae10 100644 (file)
@@ -2860,3 +2860,49 @@ void hl_eq_heartbeat_event_handle(struct hl_device *hdev)
        hdev->heartbeat_debug_info.heartbeat_event_counter++;
        hdev->eq_heartbeat_received = true;
 }
+
+void hl_handle_clk_change_event(struct hl_device *hdev, u16 event_type, u64 *event_mask)
+{
+       struct hl_clk_throttle *clk_throttle = &hdev->clk_throttling;
+       ktime_t zero_time = ktime_set(0, 0);
+
+       mutex_lock(&clk_throttle->lock);
+
+       switch (event_type) {
+       case EQ_EVENT_POWER_EVT_START:
+               clk_throttle->current_reason |= HL_CLK_THROTTLE_POWER;
+               clk_throttle->aggregated_reason |= HL_CLK_THROTTLE_POWER;
+               clk_throttle->timestamp[HL_CLK_THROTTLE_TYPE_POWER].start = ktime_get();
+               clk_throttle->timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = zero_time;
+               dev_dbg_ratelimited(hdev->dev, "Clock throttling due to power consumption\n");
+               break;
+
+       case EQ_EVENT_POWER_EVT_END:
+               clk_throttle->current_reason &= ~HL_CLK_THROTTLE_POWER;
+               clk_throttle->timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = ktime_get();
+               dev_dbg_ratelimited(hdev->dev, "Power envelop is safe, back to optimal clock\n");
+               break;
+
+       case EQ_EVENT_THERMAL_EVT_START:
+               clk_throttle->current_reason |= HL_CLK_THROTTLE_THERMAL;
+               clk_throttle->aggregated_reason |= HL_CLK_THROTTLE_THERMAL;
+               clk_throttle->timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].start = ktime_get();
+               clk_throttle->timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = zero_time;
+               *event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
+               dev_info_ratelimited(hdev->dev, "Clock throttling due to overheating\n");
+               break;
+
+       case EQ_EVENT_THERMAL_EVT_END:
+               clk_throttle->current_reason &= ~HL_CLK_THROTTLE_THERMAL;
+               clk_throttle->timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = ktime_get();
+               *event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
+               dev_info_ratelimited(hdev->dev, "Thermal envelop is safe, back to optimal clock\n");
+               break;
+
+       default:
+               dev_err(hdev->dev, "Received invalid clock change event %d\n", event_type);
+               break;
+       }
+
+       mutex_unlock(&clk_throttle->lock);
+}
index 0d16b5310adde9c8a47a7fe9c260a43c003f9810..3ea1b131cd428ec29697193d7cee4e4b9022678a 100644 (file)
@@ -4063,6 +4063,7 @@ void hl_enable_err_info_capture(struct hl_error_info *captured_err_info);
 void hl_init_cpu_for_irq(struct hl_device *hdev);
 void hl_set_irq_affinity(struct hl_device *hdev, int irq);
 void hl_eq_heartbeat_event_handle(struct hl_device *hdev);
+void hl_handle_clk_change_event(struct hl_device *hdev, u16 event_type, u64 *event_mask);
 
 #ifdef CONFIG_DEBUG_FS