vdev->timeout.boot = -1;
vdev->timeout.jsm = -1;
vdev->timeout.tdr = -1;
+ vdev->timeout.inference = -1;
vdev->timeout.autosuspend = -1;
vdev->timeout.d0i3_entry_msg = -1;
} else if (ivpu_is_fpga(vdev)) {
vdev->timeout.boot = 50;
vdev->timeout.jsm = 15000;
vdev->timeout.tdr = 30000;
+ vdev->timeout.inference = 900000;
vdev->timeout.autosuspend = -1;
vdev->timeout.d0i3_entry_msg = 500;
vdev->timeout.state_dump_msg = 10000;
vdev->timeout.boot = 50;
vdev->timeout.jsm = 500;
vdev->timeout.tdr = 10000;
+ vdev->timeout.inference = 300000;
vdev->timeout.autosuspend = 100;
vdev->timeout.d0i3_entry_msg = 100;
vdev->timeout.state_dump_msg = 10;
vdev->timeout.boot = 1000;
vdev->timeout.jsm = 500;
vdev->timeout.tdr = 2000;
+ vdev->timeout.inference = 60000;
if (ivpu_hw_ip_gen(vdev) == IVPU_HW_IP_37XX)
vdev->timeout.autosuspend = 10;
else
module_param_named(tdr_timeout_ms, ivpu_tdr_timeout_ms, ulong, 0644);
MODULE_PARM_DESC(tdr_timeout_ms, "Timeout for device hang detection, in milliseconds, 0 - default");
+static unsigned long ivpu_inference_timeout_ms;
+module_param_named(inference_timeout_ms, ivpu_inference_timeout_ms, ulong, 0644);
+MODULE_PARM_DESC(inference_timeout_ms, "Inference maximum duration, in milliseconds, 0 - default");
+
#define PM_RESCHEDULE_LIMIT 5
-#define PM_TDR_HEARTBEAT_LIMIT 30
static void ivpu_pm_prepare_cold_boot(struct ivpu_device *vdev)
{
{
struct ivpu_pm_info *pm = container_of(work, struct ivpu_pm_info, job_timeout_work.work);
struct ivpu_device *vdev = pm->vdev;
+ unsigned long timeout_ms = ivpu_tdr_timeout_ms ? ivpu_tdr_timeout_ms : vdev->timeout.tdr;
+ unsigned long inference_timeout_ms = ivpu_inference_timeout_ms ? ivpu_inference_timeout_ms :
+ vdev->timeout.inference;
+ u64 inference_max_retries;
u64 heartbeat;
if (ivpu_jsm_get_heartbeat(vdev, 0, &heartbeat) || heartbeat <= vdev->fw->last_heartbeat) {
goto recovery;
}
- if (atomic_fetch_inc(&vdev->job_timeout_counter) > PM_TDR_HEARTBEAT_LIMIT) {
- ivpu_err(vdev, "Job timeout detected, heartbeat limit exceeded\n");
+ inference_max_retries = DIV_ROUND_UP(inference_timeout_ms, timeout_ms);
+ if (atomic_fetch_inc(&vdev->job_timeout_counter) >= inference_max_retries) {
+ ivpu_err(vdev, "Job timeout detected, heartbeat limit (%lld) exceeded\n",
+ inference_max_retries);
goto recovery;
}