From: Thomas Hellström Date: Fri, 19 Dec 2025 11:33:20 +0000 (+0100) Subject: drm/xe/svm: Serialize migration to device if racing X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=0620837490e0401cb4e9965a1e0c462dbff1c97b;p=thirdparty%2Fkernel%2Flinux.git drm/xe/svm: Serialize migration to device if racing Introduce an rw-semaphore to serialize migration to device if it's likely that migration races with another device migration of the same CPU address space range. This is a temporary fix to attempt to mitigate a livelock that might happen if many devices try to migrate a range at the same time, and it affects only devices using the xe driver. A longer term fix is probably improvements in the core mm migration layer. Suggested-by: Matthew Brost Signed-off-by: Thomas Hellström Reviewed-by: Matthew Brost Link: https://patch.msgid.link/20251219113320.183860-25-thomas.hellstrom@linux.intel.com --- diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c index 84ff99aa3e499..fa2ee2c08f318 100644 --- a/drivers/gpu/drm/xe/xe_svm.c +++ b/drivers/gpu/drm/xe/xe_svm.c @@ -1593,10 +1593,12 @@ struct drm_pagemap *xe_vma_resolve_pagemap(struct xe_vma *vma, struct xe_tile *t int xe_svm_alloc_vram(struct xe_svm_range *range, const struct drm_gpusvm_ctx *ctx, struct drm_pagemap *dpagemap) { + static DECLARE_RWSEM(driver_migrate_lock); struct xe_vm *vm = range_to_vm(&range->base); enum drm_gpusvm_scan_result migration_state; struct xe_device *xe = vm->xe; int err, retries = 1; + bool write_locked = false; xe_assert(range_to_vm(&range->base)->xe, range->base.pages.flags.migrate_devmem); range_debug(range, "ALLOCATE VRAM"); @@ -1615,16 +1617,32 @@ int xe_svm_alloc_vram(struct xe_svm_range *range, const struct drm_gpusvm_ctx *c drm_dbg(&xe->drm, "Request migration to device memory on \"%s\".\n", dpagemap->drm->unique); + err = down_read_interruptible(&driver_migrate_lock); + if (err) + return err; do { err = drm_pagemap_populate_mm(dpagemap, xe_svm_range_start(range), xe_svm_range_end(range), range->base.gpusvm->mm, ctx->timeslice_ms); - if (err == -EBUSY && retries) - drm_gpusvm_range_evict(range->base.gpusvm, &range->base); + if (err == -EBUSY && retries) { + if (!write_locked) { + int lock_err; + up_read(&driver_migrate_lock); + lock_err = down_write_killable(&driver_migrate_lock); + if (lock_err) + return lock_err; + write_locked = true; + } + drm_gpusvm_range_evict(range->base.gpusvm, &range->base); + } } while (err == -EBUSY && retries--); + if (write_locked) + up_write(&driver_migrate_lock); + else + up_read(&driver_migrate_lock); return err; }