From: Matthew Brost Date: Thu, 26 Mar 2026 21:01:15 +0000 (-0700) Subject: drm/xe: Avoid memory allocations in xe_device_declare_wedged() X-Git-Tag: v7.0-rc7~22^2~2^2 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=56b7432b7e8e6ae1b289cb405d16db4150ef193b;p=thirdparty%2Flinux.git drm/xe: Avoid memory allocations in xe_device_declare_wedged() xe_device_declare_wedged() runs in the DMA-fence signaling path, where GFP_KERNEL memory allocations are not allowed. However, registering xe_device_wedged_fini via drmm_add_action_or_reset() triggers a GFP_KERNEL allocation. Fix this by deferring the registration of xe_device_wedged_fini until late in the driver load sequence. Additionally, drop the wedged PM reference only if the device is actually wedged in xe_device_wedged_fini. Fixes: 452bca0edbd0 ("drm/xe: Don't suspend device upon wedge") Signed-off-by: Matthew Brost Reviewed-by: Rodrigo Vivi Link: https://patch.msgid.link/20260326210116.202585-2-matthew.brost@intel.com (cherry picked from commit b08ceb443866808b881b12d4183008d214d816c1) Signed-off-by: Rodrigo Vivi --- diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 52ee24e9cd3a..3eb06b27db7e 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -837,6 +837,14 @@ static void detect_preproduction_hw(struct xe_device *xe) } } +static void xe_device_wedged_fini(struct drm_device *drm, void *arg) +{ + struct xe_device *xe = arg; + + if (atomic_read(&xe->wedged.flag)) + xe_pm_runtime_put(xe); +} + int xe_device_probe(struct xe_device *xe) { struct xe_tile *tile; @@ -1013,6 +1021,10 @@ int xe_device_probe(struct xe_device *xe) detect_preproduction_hw(xe); + err = drmm_add_action_or_reset(&xe->drm, xe_device_wedged_fini, xe); + if (err) + goto err_unregister_display; + return devm_add_action_or_reset(xe->drm.dev, xe_device_sanitize, xe); err_unregister_display: @@ -1216,13 +1228,6 @@ u64 xe_device_uncanonicalize_addr(struct xe_device *xe, u64 address) return address & GENMASK_ULL(xe->info.va_bits - 1, 0); } -static void xe_device_wedged_fini(struct drm_device *drm, void *arg) -{ - struct xe_device *xe = arg; - - xe_pm_runtime_put(xe); -} - /** * DOC: Xe Device Wedging * @@ -1300,15 +1305,9 @@ void xe_device_declare_wedged(struct xe_device *xe) return; } - xe_pm_runtime_get_noresume(xe); - - if (drmm_add_action_or_reset(&xe->drm, xe_device_wedged_fini, xe)) { - drm_err(&xe->drm, "Failed to register xe_device_wedged_fini clean-up. Although device is wedged.\n"); - return; - } - if (!atomic_xchg(&xe->wedged.flag, 1)) { xe->needs_flr_on_fini = true; + xe_pm_runtime_get_noresume(xe); drm_err(&xe->drm, "CRITICAL: Xe has declared device %s as wedged.\n" "IOCTLs and executions are blocked. Only a rebind may clear the failure\n"