]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
drm/xe: Split TLB invalidation into submit and wait steps
authorThomas Hellström <thomas.hellstrom@linux.intel.com>
Thu, 5 Mar 2026 09:39:08 +0000 (10:39 +0100)
committerThomas Hellström <thomas.hellstrom@linux.intel.com>
Wed, 11 Mar 2026 08:32:59 +0000 (09:32 +0100)
xe_vm_range_tilemask_tlb_inval() submits TLB invalidation requests to
all GTs in a tile mask and then immediately waits for them to complete
before returning. This is fine for the existing callers, but a
subsequent patch will need to defer the wait in order to overlap TLB
invalidations across multiple VMAs.

Introduce xe_tlb_inval_range_tilemask_submit() and
xe_tlb_inval_batch_wait() in xe_tlb_inval.c as the submit and wait
halves respectively. The batch of fences is carried in the new
xe_tlb_inval_batch structure. Remove xe_vm_range_tilemask_tlb_inval()
and convert all three call sites to the new API.

v3:
- Don't wait on TLB invalidation batches if the corresponding batch
  submit returns an error. (Matt Brost)
- s/_batch/batch/ (Matt Brost)

Assisted-by: GitHub Copilot:claude-sonnet-4.6
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Link: https://patch.msgid.link/20260305093909.43623-4-thomas.hellstrom@linux.intel.com
drivers/gpu/drm/xe/xe_svm.c
drivers/gpu/drm/xe/xe_tlb_inval.c
drivers/gpu/drm/xe/xe_tlb_inval.h
drivers/gpu/drm/xe/xe_tlb_inval_types.h
drivers/gpu/drm/xe/xe_vm.c
drivers/gpu/drm/xe/xe_vm.h
drivers/gpu/drm/xe/xe_vm_madvise.c
drivers/gpu/drm/xe/xe_vm_types.h

index 78f4b2c60670a5bdfe009daff336a5115717cd79..7e75d03456ac43132e602923dab9b21372699584 100644 (file)
@@ -19,6 +19,7 @@
 #include "xe_pt.h"
 #include "xe_svm.h"
 #include "xe_tile.h"
+#include "xe_tlb_inval.h"
 #include "xe_ttm_vram_mgr.h"
 #include "xe_vm.h"
 #include "xe_vm_types.h"
@@ -225,6 +226,7 @@ static void xe_svm_invalidate(struct drm_gpusvm *gpusvm,
                              const struct mmu_notifier_range *mmu_range)
 {
        struct xe_vm *vm = gpusvm_to_vm(gpusvm);
+       struct xe_tlb_inval_batch batch;
        struct xe_device *xe = vm->xe;
        struct drm_gpusvm_range *r, *first;
        struct xe_tile *tile;
@@ -276,8 +278,10 @@ static void xe_svm_invalidate(struct drm_gpusvm *gpusvm,
 
        xe_device_wmb(xe);
 
-       err = xe_vm_range_tilemask_tlb_inval(vm, adj_start, adj_end, tile_mask);
-       WARN_ON_ONCE(err);
+       err = xe_tlb_inval_range_tilemask_submit(xe, vm->usm.asid, adj_start, adj_end,
+                                                tile_mask, &batch);
+       if (!WARN_ON_ONCE(err))
+               xe_tlb_inval_batch_wait(&batch);
 
 range_notifier_event_end:
        r = first;
index 933f30fb617db8154b0b70d2959b6b82d0e103db..10dcd4abb00f09392cf6aecce6bcb1372be6a5c9 100644 (file)
@@ -486,3 +486,87 @@ bool xe_tlb_inval_idle(struct xe_tlb_inval *tlb_inval)
        guard(spinlock_irq)(&tlb_inval->pending_lock);
        return list_is_singular(&tlb_inval->pending_fences);
 }
+
+/**
+ * xe_tlb_inval_batch_wait() - Wait for all fences in a TLB invalidation batch
+ * @batch: Batch of TLB invalidation fences to wait on
+ *
+ * Waits for every fence in @batch to signal, then resets @batch so it can be
+ * reused for a subsequent invalidation.
+ */
+void xe_tlb_inval_batch_wait(struct xe_tlb_inval_batch *batch)
+{
+       struct xe_tlb_inval_fence *fence = &batch->fence[0];
+       unsigned int i;
+
+       for (i = 0; i < batch->num_fences; ++i)
+               xe_tlb_inval_fence_wait(fence++);
+
+       batch->num_fences = 0;
+}
+
+/**
+ * xe_tlb_inval_range_tilemask_submit() - Submit TLB invalidations for an
+ * address range on a tile mask
+ * @xe: The xe device
+ * @asid: Address space ID
+ * @start: start address
+ * @end: end address
+ * @tile_mask: mask for which gt's issue tlb invalidation
+ * @batch: Batch of tlb invalidate fences
+ *
+ * Issue a range based TLB invalidation for gt's in tilemask
+ * If the function returns an error, there is no need to call
+ * xe_tlb_inval_batch_wait() on @batch.
+ *
+ * Returns 0 for success, negative error code otherwise.
+ */
+int xe_tlb_inval_range_tilemask_submit(struct xe_device *xe, u32 asid,
+                                      u64 start, u64 end, u8 tile_mask,
+                                      struct xe_tlb_inval_batch *batch)
+{
+       struct xe_tlb_inval_fence *fence = &batch->fence[0];
+       struct xe_tile *tile;
+       u32 fence_id = 0;
+       u8 id;
+       int err;
+
+       batch->num_fences = 0;
+       if (!tile_mask)
+               return 0;
+
+       for_each_tile(tile, xe, id) {
+               if (!(tile_mask & BIT(id)))
+                       continue;
+
+               xe_tlb_inval_fence_init(&tile->primary_gt->tlb_inval,
+                                       &fence[fence_id], true);
+
+               err = xe_tlb_inval_range(&tile->primary_gt->tlb_inval,
+                                        &fence[fence_id], start, end,
+                                        asid, NULL);
+               if (err)
+                       goto wait;
+               ++fence_id;
+
+               if (!tile->media_gt)
+                       continue;
+
+               xe_tlb_inval_fence_init(&tile->media_gt->tlb_inval,
+                                       &fence[fence_id], true);
+
+               err = xe_tlb_inval_range(&tile->media_gt->tlb_inval,
+                                        &fence[fence_id], start, end,
+                                        asid, NULL);
+               if (err)
+                       goto wait;
+               ++fence_id;
+       }
+
+wait:
+       batch->num_fences = fence_id;
+       if (err)
+               xe_tlb_inval_batch_wait(batch);
+
+       return err;
+}
index 62089254fa23995f90a043f678b84a491556c590..a76b7823a5f2e272eb95e470cf577fd1d2f95cbf 100644 (file)
@@ -45,4 +45,10 @@ void xe_tlb_inval_done_handler(struct xe_tlb_inval *tlb_inval, int seqno);
 
 bool xe_tlb_inval_idle(struct xe_tlb_inval *tlb_inval);
 
+int xe_tlb_inval_range_tilemask_submit(struct xe_device *xe, u32 asid,
+                                      u64 start, u64 end, u8 tile_mask,
+                                      struct xe_tlb_inval_batch *batch);
+
+void xe_tlb_inval_batch_wait(struct xe_tlb_inval_batch *batch);
+
 #endif /* _XE_TLB_INVAL_ */
index 3b089f90f0021c697ce400f63c380b5e01129b97..3d1797d186fd79a1392e8772f183ac408e5334dc 100644 (file)
@@ -9,6 +9,8 @@
 #include <linux/workqueue.h>
 #include <linux/dma-fence.h>
 
+#include "xe_device_types.h"
+
 struct drm_suballoc;
 struct xe_tlb_inval;
 
@@ -132,4 +134,16 @@ struct xe_tlb_inval_fence {
        ktime_t inval_time;
 };
 
+/**
+ * struct xe_tlb_inval_batch - Batch of TLB invalidation fences
+ *
+ * Holds one fence per GT covered by a TLB invalidation request.
+ */
+struct xe_tlb_inval_batch {
+       /** @fence: per-GT TLB invalidation fences */
+       struct xe_tlb_inval_fence fence[XE_MAX_TILES_PER_DEVICE * XE_MAX_GT_PER_TILE];
+       /** @num_fences: number of valid entries in @fence */
+       unsigned int num_fences;
+};
+
 #endif
index 550208ef63f8ac7d0e117bebeb148ea6041f6360..a4053fb75ebbbae4a2a898df9ca0c9d8640504dd 100644 (file)
@@ -3969,66 +3969,6 @@ void xe_vm_unlock(struct xe_vm *vm)
        dma_resv_unlock(xe_vm_resv(vm));
 }
 
-/**
- * xe_vm_range_tilemask_tlb_inval - Issue a TLB invalidation on this tilemask for an
- * address range
- * @vm: The VM
- * @start: start address
- * @end: end address
- * @tile_mask: mask for which gt's issue tlb invalidation
- *
- * Issue a range based TLB invalidation for gt's in tilemask
- *
- * Returns 0 for success, negative error code otherwise.
- */
-int xe_vm_range_tilemask_tlb_inval(struct xe_vm *vm, u64 start,
-                                  u64 end, u8 tile_mask)
-{
-       struct xe_tlb_inval_fence
-               fence[XE_MAX_TILES_PER_DEVICE * XE_MAX_GT_PER_TILE];
-       struct xe_tile *tile;
-       u32 fence_id = 0;
-       u8 id;
-       int err;
-
-       if (!tile_mask)
-               return 0;
-
-       for_each_tile(tile, vm->xe, id) {
-               if (!(tile_mask & BIT(id)))
-                       continue;
-
-               xe_tlb_inval_fence_init(&tile->primary_gt->tlb_inval,
-                                       &fence[fence_id], true);
-
-               err = xe_tlb_inval_range(&tile->primary_gt->tlb_inval,
-                                        &fence[fence_id], start, end,
-                                        vm->usm.asid, NULL);
-               if (err)
-                       goto wait;
-               ++fence_id;
-
-               if (!tile->media_gt)
-                       continue;
-
-               xe_tlb_inval_fence_init(&tile->media_gt->tlb_inval,
-                                       &fence[fence_id], true);
-
-               err = xe_tlb_inval_range(&tile->media_gt->tlb_inval,
-                                        &fence[fence_id], start, end,
-                                        vm->usm.asid, NULL);
-               if (err)
-                       goto wait;
-               ++fence_id;
-       }
-
-wait:
-       for (id = 0; id < fence_id; ++id)
-               xe_tlb_inval_fence_wait(&fence[id]);
-
-       return err;
-}
-
 /**
  * xe_vm_invalidate_vma - invalidate GPU mappings for VMA without a lock
  * @vma: VMA to invalidate
@@ -4043,6 +3983,7 @@ int xe_vm_invalidate_vma(struct xe_vma *vma)
 {
        struct xe_device *xe = xe_vma_vm(vma)->xe;
        struct xe_vm *vm = xe_vma_vm(vma);
+       struct xe_tlb_inval_batch batch;
        struct xe_tile *tile;
        u8 tile_mask = 0;
        int ret = 0;
@@ -4083,12 +4024,16 @@ int xe_vm_invalidate_vma(struct xe_vma *vma)
 
        xe_device_wmb(xe);
 
-       ret = xe_vm_range_tilemask_tlb_inval(xe_vma_vm(vma), xe_vma_start(vma),
-                                            xe_vma_end(vma), tile_mask);
+       ret = xe_tlb_inval_range_tilemask_submit(xe, xe_vma_vm(vma)->usm.asid,
+                                                xe_vma_start(vma), xe_vma_end(vma),
+                                                tile_mask, &batch);
 
        /* WRITE_ONCE pairs with READ_ONCE in xe_vm_has_valid_gpu_mapping() */
        WRITE_ONCE(vma->tile_invalidated, vma->tile_mask);
 
+       if (!ret)
+               xe_tlb_inval_batch_wait(&batch);
+
        return ret;
 }
 
index f849e369432b0d62f2c6b7fcbfb3346cbf4bcb74..62f4b6fec0bc171576857f3ddfc8248b9b98396f 100644 (file)
@@ -240,9 +240,6 @@ struct dma_fence *xe_vm_range_rebind(struct xe_vm *vm,
 struct dma_fence *xe_vm_range_unbind(struct xe_vm *vm,
                                     struct xe_svm_range *range);
 
-int xe_vm_range_tilemask_tlb_inval(struct xe_vm *vm, u64 start,
-                                  u64 end, u8 tile_mask);
-
 int xe_vm_invalidate_vma(struct xe_vma *vma);
 
 int xe_vm_validate_protected(struct xe_vm *vm);
index 0c92fed6c6a62e6daf2fc1447f5bde05be7f1d5f..6e0d95782cb536052020c86ade0f48c04d50bcd5 100644 (file)
@@ -12,6 +12,7 @@
 #include "xe_pat.h"
 #include "xe_pt.h"
 #include "xe_svm.h"
+#include "xe_tlb_inval.h"
 
 struct xe_vmas_in_madvise_range {
        u64 addr;
@@ -235,13 +236,20 @@ static u8 xe_zap_ptes_in_madvise_range(struct xe_vm *vm, u64 start, u64 end)
 static int xe_vm_invalidate_madvise_range(struct xe_vm *vm, u64 start, u64 end)
 {
        u8 tile_mask = xe_zap_ptes_in_madvise_range(vm, start, end);
+       struct xe_tlb_inval_batch batch;
+       int err;
 
        if (!tile_mask)
                return 0;
 
        xe_device_wmb(vm->xe);
 
-       return xe_vm_range_tilemask_tlb_inval(vm, start, end, tile_mask);
+       err = xe_tlb_inval_range_tilemask_submit(vm->xe, vm->usm.asid, start, end,
+                                                tile_mask, &batch);
+       if (!err)
+               xe_tlb_inval_batch_wait(&batch);
+
+       return err;
 }
 
 static bool madvise_args_are_sane(struct xe_device *xe, const struct drm_xe_madvise *args)
index 1f6f7e30e751e6bb59f39729e229616b25620eae..de6544165cfada0f7dd90b736952acc87e32f955 100644 (file)
@@ -18,6 +18,7 @@
 #include "xe_device_types.h"
 #include "xe_pt_types.h"
 #include "xe_range_fence.h"
+#include "xe_tlb_inval_types.h"
 #include "xe_userptr.h"
 
 struct drm_pagemap;