]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
drm/xe/uapi: Introduce a flag to disallow vm overcommit in fault mode
authorThomas Hellström <thomas.hellstrom@linux.intel.com>
Wed, 4 Feb 2026 15:33:20 +0000 (16:33 +0100)
committerThomas Hellström <thomas.hellstrom@linux.intel.com>
Tue, 24 Feb 2026 14:27:03 +0000 (15:27 +0100)
Some compute applications may try to allocate device memory to probe
how much device memory is actually available, assuming that the
application will be the only one running on the particular GPU.

That strategy fails in fault mode since it allows VM overcommit.

While this could be resolved in user-space it's further complicated
by cgroups potentially restricting the amount of memory available
to the application.

Introduce a vm create flag, DRM_XE_VM_CREATE_NO_VM_OVERCOMMIT, that
allows fault mode to mimic the behaviour of !fault mode WRT this. It
blocks evicting same vm bos during VM_BIND processing. However,
it does *not* block evicting same-vm bos during pagefault
processing, preferring eviction rather than VM banning in
OOM situations.

Cc: John Falkowski <john.falkowski@intel.com>
Cc: Michal Mrozek <michal.mrozek@intel.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Link: https://patch.msgid.link/20260204153320.17989-1-thomas.hellstrom@linux.intel.com
drivers/gpu/drm/xe/xe_vm.c
drivers/gpu/drm/xe/xe_vm.h
drivers/gpu/drm/xe/xe_vm_types.h
include/uapi/drm/xe_drm.h

index a46f11a71c3724f1a9c9d8d818bd2e0f3c3683e5..550208ef63f8ac7d0e117bebeb148ea6041f6360 100644 (file)
@@ -1941,7 +1941,8 @@ find_ufence_get(struct xe_sync_entry *syncs, u32 num_syncs)
 
 #define ALL_DRM_XE_VM_CREATE_FLAGS (DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE | \
                                    DRM_XE_VM_CREATE_FLAG_LR_MODE | \
-                                   DRM_XE_VM_CREATE_FLAG_FAULT_MODE)
+                                   DRM_XE_VM_CREATE_FLAG_FAULT_MODE | \
+                                   DRM_XE_VM_CREATE_FLAG_NO_VM_OVERCOMMIT)
 
 int xe_vm_create_ioctl(struct drm_device *dev, void *data,
                       struct drm_file *file)
@@ -1980,12 +1981,18 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data,
                         args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE))
                return -EINVAL;
 
+       if (XE_IOCTL_DBG(xe, !(args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE) &&
+                        args->flags & DRM_XE_VM_CREATE_FLAG_NO_VM_OVERCOMMIT))
+               return -EINVAL;
+
        if (args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE)
                flags |= XE_VM_FLAG_SCRATCH_PAGE;
        if (args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE)
                flags |= XE_VM_FLAG_LR_MODE;
        if (args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE)
                flags |= XE_VM_FLAG_FAULT_MODE;
+       if (args->flags & DRM_XE_VM_CREATE_FLAG_NO_VM_OVERCOMMIT)
+               flags |= XE_VM_FLAG_NO_VM_OVERCOMMIT;
 
        vm = xe_vm_create(xe, flags, xef);
        if (IS_ERR(vm))
@@ -2906,7 +2913,7 @@ static int vma_lock_and_validate(struct drm_exec *exec, struct xe_vma *vma,
                        err = drm_exec_lock_obj(exec, &bo->ttm.base);
                if (!err && validate)
                        err = xe_bo_validate(bo, vm,
-                                            !xe_vm_in_preempt_fence_mode(vm) &&
+                                            xe_vm_allow_vm_eviction(vm) &&
                                             res_evict, exec);
        }
 
index 288115c7844aa399f59b2fbb4c29007fa1b29eab..f849e369432b0d62f2c6b7fcbfb3346cbf4bcb74 100644 (file)
@@ -220,6 +220,13 @@ static inline bool xe_vm_in_preempt_fence_mode(struct xe_vm *vm)
        return xe_vm_in_lr_mode(vm) && !xe_vm_in_fault_mode(vm);
 }
 
+static inline bool xe_vm_allow_vm_eviction(struct xe_vm *vm)
+{
+       return !xe_vm_in_lr_mode(vm) ||
+               (xe_vm_in_fault_mode(vm) &&
+                !(vm->flags & XE_VM_FLAG_NO_VM_OVERCOMMIT));
+}
+
 int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q);
 void xe_vm_remove_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q);
 
index 43203e90ee3e4053965e00e7845e0b96ada24a03..1f6f7e30e751e6bb59f39729e229616b25620eae 100644 (file)
@@ -232,6 +232,7 @@ struct xe_vm {
 #define XE_VM_FLAG_TILE_ID(flags)      FIELD_GET(GENMASK(7, 6), flags)
 #define XE_VM_FLAG_SET_TILE_ID(tile)   FIELD_PREP(GENMASK(7, 6), (tile)->id)
 #define XE_VM_FLAG_GSC                 BIT(8)
+#define XE_VM_FLAG_NO_VM_OVERCOMMIT     BIT(9)
        unsigned long flags;
 
        /**
index c9e70f78e7238b910b3b73d043a1d16a2cbe8613..ef2565048bdf1f953bdf76bd3c0c79af178a7b5d 100644 (file)
@@ -969,6 +969,11 @@ struct drm_xe_gem_mmap_offset {
  *    demand when accessed, and also allows per-VM overcommit of memory.
  *    The xe driver internally uses recoverable pagefaults to implement
  *    this.
+ *  - %DRM_XE_VM_CREATE_FLAG_NO_VM_OVERCOMMIT - Requires also
+ *    DRM_XE_VM_CREATE_FLAG_FAULT_MODE. This disallows per-VM overcommit
+ *    but only during a &DRM_IOCTL_XE_VM_BIND operation with the
+ *    %DRM_XE_VM_BIND_FLAG_IMMEDIATE flag set. This may be useful for
+ *    user-space naively probing the amount of available memory.
  */
 struct drm_xe_vm_create {
        /** @extensions: Pointer to the first extension struct, if any */
@@ -977,6 +982,7 @@ struct drm_xe_vm_create {
 #define DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE     (1 << 0)
 #define DRM_XE_VM_CREATE_FLAG_LR_MODE          (1 << 1)
 #define DRM_XE_VM_CREATE_FLAG_FAULT_MODE       (1 << 2)
+#define DRM_XE_VM_CREATE_FLAG_NO_VM_OVERCOMMIT  (1 << 3)
        /** @flags: Flags */
        __u32 flags;