drm/xe: Add SVM range invalidation and page fault

author Matthew Brost <matthew.brost@intel.com>

Thu, 6 Mar 2025 01:26:37 +0000 (17:26 -0800)

committer Matthew Brost <matthew.brost@intel.com>

Thu, 6 Mar 2025 19:35:40 +0000 (11:35 -0800)
author Matthew Brost <matthew.brost@intel.com>
Thu, 6 Mar 2025 01:26:37 +0000 (17:26 -0800)
committer Matthew Brost <matthew.brost@intel.com>
Thu, 6 Mar 2025 19:35:40 +0000 (11:35 -0800)
diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c

index 17d69039b8666c677534003cd73889d424459b68..c5ad9a0a89c2b322df9f5625df7f405ab87cc74d 100644 (file)
--- a/drivers/gpu/drm/xe/xe_gt_pagefault.c
+++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c
@@ -19,6 +19,7 @@
  #include "xe_guc.h"
  #include "xe_guc_ct.h"
  #include "xe_migrate.h"
+#include "xe_svm.h"
  #include "xe_trace_bo.h"
  #include "xe_vm.h"
  
@@ -125,8 +126,8 @@ static int xe_pf_begin(struct drm_exec *exec, struct xe_vma *vma,
         return 0;
  }
  
-static int handle_vma_pagefault(struct xe_gt *gt, struct pagefault *pf,
-                               struct xe_vma *vma)
+static int handle_vma_pagefault(struct xe_gt *gt, struct xe_vma *vma,
+                               bool atomic)
  {
         struct xe_vm *vm = xe_vma_vm(vma);
         struct xe_tile *tile = gt_to_tile(gt);
@@ -134,13 +135,13 @@ static int handle_vma_pagefault(struct xe_gt *gt, struct pagefault *pf,
         struct dma_fence *fence;
         ktime_t end = 0;
         int err;
-       bool atomic;
+
+       lockdep_assert_held_write(&vm->lock);
  
         xe_gt_stats_incr(gt, XE_GT_STATS_ID_VMA_PAGEFAULT_COUNT, 1);
         xe_gt_stats_incr(gt, XE_GT_STATS_ID_VMA_PAGEFAULT_KB, xe_vma_size(vma) / 1024);
  
         trace_xe_vma_pagefault(vma);
-       atomic = access_is_atomic(pf->access_type);
  
         /* Check if VMA is valid */
         if (vma_is_valid(tile, vma) && !atomic)
@@ -210,6 +211,7 @@ static int handle_pagefault(struct xe_gt *gt, struct pagefault *pf)
         struct xe_vm *vm;
         struct xe_vma *vma = NULL;
         int err;
+       bool atomic;
  
         /* SW isn't expected to handle TRTT faults */
         if (pf->trva_fault)
@@ -235,7 +237,13 @@ static int handle_pagefault(struct xe_gt *gt, struct pagefault *pf)
                 goto unlock_vm;
         }
  
-       err = handle_vma_pagefault(gt, pf, vma);
+       atomic = access_is_atomic(pf->access_type);
+
+       if (xe_vma_is_cpu_addr_mirror(vma))
+               err = xe_svm_handle_pagefault(vm, vma, gt_to_tile(gt),
+                                             pf->page_addr, atomic);
+       else
+               err = handle_vma_pagefault(gt, vma, atomic);
  
  unlock_vm:
         if (!err)
diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c

index fb7f26353b8ff443977ff4f99706cab0974edf1c..9c3c70de71d0fac080cbb12529c89f8cfa26439e 100644 (file)
--- a/drivers/gpu/drm/xe/xe_pt.c
+++ b/drivers/gpu/drm/xe/xe_pt.c
@@ -20,6 +20,7 @@
  #include "xe_res_cursor.h"
  #include "xe_sched_job.h"
  #include "xe_sync.h"
+#include "xe_svm.h"
  #include "xe_trace.h"
  #include "xe_ttm_stolen_mgr.h"
  #include "xe_vm.h"
@@ -851,6 +852,46 @@ bool xe_pt_zap_ptes(struct xe_tile *tile, struct xe_vma *vma)
         return xe_walk.needs_invalidate;
  }
  
+/**
+ * xe_pt_zap_ptes_range() - Zap (zero) gpu ptes of a SVM range
+ * @tile: The tile we're zapping for.
+ * @vm: The VM we're zapping for.
+ * @range: The SVM range we're zapping for.
+ *
+ * SVM invalidation needs to be able to zap the gpu ptes of a given address
+ * range. In order to be able to do that, that function needs access to the
+ * shared page-table entries so it can either clear the leaf PTEs or
+ * clear the pointers to lower-level page-tables. The caller is required
+ * to hold the SVM notifier lock.
+ *
+ * Return: Whether ptes were actually updated and a TLB invalidation is
+ * required.
+ */
+bool xe_pt_zap_ptes_range(struct xe_tile *tile, struct xe_vm *vm,
+                         struct xe_svm_range *range)
+{
+       struct xe_pt_zap_ptes_walk xe_walk = {
+               .base = {
+                       .ops = &xe_pt_zap_ptes_ops,
+                       .shifts = xe_normal_pt_shifts,
+                       .max_level = XE_PT_HIGHEST_LEVEL,
+               },
+               .tile = tile,
+       };
+       struct xe_pt *pt = vm->pt_root[tile->id];
+       u8 pt_mask = (range->tile_present & ~range->tile_invalidated);
+
+       xe_svm_assert_in_notifier(vm);
+
+       if (!(pt_mask & BIT(tile->id)))
+               return false;
+
+       (void)xe_pt_walk_shared(&pt->base, pt->level, range->base.itree.start,
+                               range->base.itree.last + 1, &xe_walk.base);
+
+       return xe_walk.needs_invalidate;
+}
+
  static void
  xe_vm_populate_pgtable(struct xe_migrate_pt_update *pt_update, struct xe_tile *tile,
                        struct iosys_map *map, void *data,
diff --git a/drivers/gpu/drm/xe/xe_pt.h b/drivers/gpu/drm/xe/xe_pt.h

index 8e43912ae8e94c2aacc683a0f882787da8886bb0..5ecf003d513c04a170e7e2649d5609f0dad37b4c 100644 (file)
--- a/drivers/gpu/drm/xe/xe_pt.h
+++ b/drivers/gpu/drm/xe/xe_pt.h
@@ -45,5 +45,7 @@ void xe_pt_update_ops_fini(struct xe_tile *tile, struct xe_vma_ops *vops);
  void xe_pt_update_ops_abort(struct xe_tile *tile, struct xe_vma_ops *vops);
  
  bool xe_pt_zap_ptes(struct xe_tile *tile, struct xe_vma *vma);
+bool xe_pt_zap_ptes_range(struct xe_tile *tile, struct xe_vm *vm,
+                         struct xe_svm_range *range);
  
  #endif
diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c

index 79da859f02b1dcb11a25960091fe44ff0d0f723e..866872f75d5e02a4c5f8c89dbce4dd71a560ee22 100644 (file)
--- a/drivers/gpu/drm/xe/xe_svm.c
+++ b/drivers/gpu/drm/xe/xe_svm.c
@@ -3,18 +3,204 @@
   * Copyright © 2024 Intel Corporation
   */
  
+#include "xe_gt_tlb_invalidation.h"
+#include "xe_pt.h"
  #include "xe_svm.h"
  #include "xe_vm.h"
  #include "xe_vm_types.h"
  
+static struct xe_vm *gpusvm_to_vm(struct drm_gpusvm *gpusvm)
+{
+       return container_of(gpusvm, struct xe_vm, svm.gpusvm);
+}
+
+static struct xe_vm *range_to_vm(struct drm_gpusvm_range *r)
+{
+       return gpusvm_to_vm(r->gpusvm);
+}
+
+static unsigned long xe_svm_range_start(struct xe_svm_range *range)
+{
+       return drm_gpusvm_range_start(&range->base);
+}
+
+static unsigned long xe_svm_range_end(struct xe_svm_range *range)
+{
+       return drm_gpusvm_range_end(&range->base);
+}
+
+static struct drm_gpusvm_range *
+xe_svm_range_alloc(struct drm_gpusvm *gpusvm)
+{
+       struct xe_svm_range *range;
+
+       range = kzalloc(sizeof(*range), GFP_KERNEL);
+       if (!range)
+               return ERR_PTR(-ENOMEM);
+
+       xe_vm_get(gpusvm_to_vm(gpusvm));
+
+       return &range->base;
+}
+
+static void xe_svm_range_free(struct drm_gpusvm_range *range)
+{
+       xe_vm_put(range_to_vm(range));
+       kfree(range);
+}
+
+static struct xe_svm_range *to_xe_range(struct drm_gpusvm_range *r)
+{
+       return container_of(r, struct xe_svm_range, base);
+}
+
+static u8
+xe_svm_range_notifier_event_begin(struct xe_vm *vm, struct drm_gpusvm_range *r,
+                                 const struct mmu_notifier_range *mmu_range,
+                                 u64 *adj_start, u64 *adj_end)
+{
+       struct xe_svm_range *range = to_xe_range(r);
+       struct xe_device *xe = vm->xe;
+       struct xe_tile *tile;
+       u8 tile_mask = 0;
+       u8 id;
+
+       xe_svm_assert_in_notifier(vm);
+
+       /* Skip if already unmapped or if no binding exist */
+       if (range->base.flags.unmapped || !range->tile_present)
+               return 0;
+
+       /* Adjust invalidation to range boundaries */
+       *adj_start = min(xe_svm_range_start(range), mmu_range->start);
+       *adj_end = max(xe_svm_range_end(range), mmu_range->end);
+
+       /*
+        * XXX: Ideally would zap PTEs in one shot in xe_svm_invalidate but the
+        * invalidation code can't correctly cope with sparse ranges or
+        * invalidations spanning multiple ranges.
+        */
+       for_each_tile(tile, xe, id)
+               if (xe_pt_zap_ptes_range(tile, vm, range)) {
+                       tile_mask |= BIT(id);
+                       range->tile_invalidated |= BIT(id);
+               }
+
+       return tile_mask;
+}
+
+static void
+xe_svm_range_notifier_event_end(struct xe_vm *vm, struct drm_gpusvm_range *r,
+                               const struct mmu_notifier_range *mmu_range)
+{
+       struct drm_gpusvm_ctx ctx = { .in_notifier = true, };
+
+       xe_svm_assert_in_notifier(vm);
+
+       drm_gpusvm_range_unmap_pages(&vm->svm.gpusvm, r, &ctx);
+       /* TODO: Add range to garbage collector if VM is not closed */
+}
+
  static void xe_svm_invalidate(struct drm_gpusvm *gpusvm,
                               struct drm_gpusvm_notifier *notifier,
                               const struct mmu_notifier_range *mmu_range)
  {
-       /* TODO: Implement */
+       struct xe_vm *vm = gpusvm_to_vm(gpusvm);
+       struct xe_device *xe = vm->xe;
+       struct xe_tile *tile;
+       struct drm_gpusvm_range *r, *first;
+       struct xe_gt_tlb_invalidation_fence
+               fence[XE_MAX_TILES_PER_DEVICE * XE_MAX_GT_PER_TILE];
+       u64 adj_start = mmu_range->start, adj_end = mmu_range->end;
+       u8 tile_mask = 0;
+       u8 id;
+       u32 fence_id = 0;
+       long err;
+
+       xe_svm_assert_in_notifier(vm);
+
+       /* Adjust invalidation to notifier boundaries */
+       adj_start = max(drm_gpusvm_notifier_start(notifier), adj_start);
+       adj_end = min(drm_gpusvm_notifier_end(notifier), adj_end);
+
+       first = drm_gpusvm_range_find(notifier, adj_start, adj_end);
+       if (!first)
+               return;
+
+       /*
+        * PTs may be getting destroyed so not safe to touch these but PT should
+        * be invalidated at this point in time. Regardless we still need to
+        * ensure any dma mappings are unmapped in the here.
+        */
+       if (xe_vm_is_closed(vm))
+               goto range_notifier_event_end;
+
+       /*
+        * XXX: Less than ideal to always wait on VM's resv slots if an
+        * invalidation is not required. Could walk range list twice to figure
+        * out if an invalidations is need, but also not ideal.
+        */
+       err = dma_resv_wait_timeout(xe_vm_resv(vm),
+                                   DMA_RESV_USAGE_BOOKKEEP,
+                                   false, MAX_SCHEDULE_TIMEOUT);
+       XE_WARN_ON(err <= 0);
+
+       r = first;
+       drm_gpusvm_for_each_range(r, notifier, adj_start, adj_end)
+               tile_mask |= xe_svm_range_notifier_event_begin(vm, r, mmu_range,
+                                                              &adj_start,
+                                                              &adj_end);
+       if (!tile_mask)
+               goto range_notifier_event_end;
+
+       xe_device_wmb(xe);
+
+       for_each_tile(tile, xe, id) {
+               if (tile_mask & BIT(id)) {
+                       int err;
+
+                       xe_gt_tlb_invalidation_fence_init(tile->primary_gt,
+                                                         &fence[fence_id], true);
+
+                       err = xe_gt_tlb_invalidation_range(tile->primary_gt,
+                                                          &fence[fence_id],
+                                                          adj_start,
+                                                          adj_end,
+                                                          vm->usm.asid);
+                       if (WARN_ON_ONCE(err < 0))
+                               goto wait;
+                       ++fence_id;
+
+                       if (!tile->media_gt)
+                               continue;
+
+                       xe_gt_tlb_invalidation_fence_init(tile->media_gt,
+                                                         &fence[fence_id], true);
+
+                       err = xe_gt_tlb_invalidation_range(tile->media_gt,
+                                                          &fence[fence_id],
+                                                          adj_start,
+                                                          adj_end,
+                                                          vm->usm.asid);
+                       if (WARN_ON_ONCE(err < 0))
+                               goto wait;
+                       ++fence_id;
+               }
+       }
+
+wait:
+       for (id = 0; id < fence_id; ++id)
+               xe_gt_tlb_invalidation_fence_wait(&fence[id]);
+
+range_notifier_event_end:
+       r = first;
+       drm_gpusvm_for_each_range(r, notifier, adj_start, adj_end)
+               xe_svm_range_notifier_event_end(vm, r, mmu_range);
  }
  
  static const struct drm_gpusvm_ops gpusvm_ops = {
+       .range_alloc = xe_svm_range_alloc,
+       .range_free = xe_svm_range_free,
         .invalidate = xe_svm_invalidate,
  };
  
@@ -71,3 +257,44 @@ void xe_svm_fini(struct xe_vm *vm)
  
         drm_gpusvm_fini(&vm->svm.gpusvm);
  }
+
+/**
+ * xe_svm_handle_pagefault() - SVM handle page fault
+ * @vm: The VM.
+ * @vma: The CPU address mirror VMA.
+ * @tile: The tile upon the fault occurred.
+ * @fault_addr: The GPU fault address.
+ * @atomic: The fault atomic access bit.
+ *
+ * Create GPU bindings for a SVM page fault.
+ *
+ * Return: 0 on success, negative error code on error.
+ */
+int xe_svm_handle_pagefault(struct xe_vm *vm, struct xe_vma *vma,
+                           struct xe_tile *tile, u64 fault_addr,
+                           bool atomic)
+{
+       struct drm_gpusvm_ctx ctx = { .read_only = xe_vma_read_only(vma), };
+       struct drm_gpusvm_range *r;
+       int err;
+
+       lockdep_assert_held_write(&vm->lock);
+       xe_assert(vm->xe, xe_vma_is_cpu_addr_mirror(vma));
+
+retry:
+       /* TODO: Run garbage collector */
+
+       r = drm_gpusvm_range_find_or_insert(&vm->svm.gpusvm, fault_addr,
+                                           xe_vma_start(vma), xe_vma_end(vma),
+                                           &ctx);
+       if (IS_ERR(r))
+               return PTR_ERR(r);
+
+       err = drm_gpusvm_range_get_pages(&vm->svm.gpusvm, r, &ctx);
+       if (err == -EFAULT || err == -EPERM)    /* Corner where CPU mappings have changed */
+               goto retry;
+
+       /* TODO: Issue bind */
+
+       return err;
+}
diff --git a/drivers/gpu/drm/xe/xe_svm.h b/drivers/gpu/drm/xe/xe_svm.h

index d361a78a6839fd9e6b93f01b646582963e5a7944..31090967b83ceac60e985e6cfcdd012cd9f59100 100644 (file)
--- a/drivers/gpu/drm/xe/xe_svm.h
+++ b/drivers/gpu/drm/xe/xe_svm.h
@@ -7,10 +7,29 @@
  #define _XE_SVM_H_
  
  #include <drm/drm_pagemap.h>
+#include <drm/drm_gpusvm.h>
  
  #define XE_INTERCONNECT_VRAM DRM_INTERCONNECT_DRIVER
  
+struct xe_tile;
  struct xe_vm;
+struct xe_vma;
+
+/** struct xe_svm_range - SVM range */
+struct xe_svm_range {
+       /** @base: base drm_gpusvm_range */
+       struct drm_gpusvm_range base;
+       /**
+        * @tile_present: Tile mask of binding is present for this range.
+        * Protected by GPU SVM notifier lock.
+        */
+       u8 tile_present;
+       /**
+        * @tile_invalidated: Tile mask of binding is invalidated for this
+        * range. Protected by GPU SVM notifier lock.
+        */
+       u8 tile_invalidated;
+};
  
  #if IS_ENABLED(CONFIG_DRM_GPUSVM)
  int xe_svm_init(struct xe_vm *vm);
@@ -18,6 +37,10 @@ int xe_svm_init(struct xe_vm *vm);
  void xe_svm_fini(struct xe_vm *vm);
  
  void xe_svm_close(struct xe_vm *vm);
+
+int xe_svm_handle_pagefault(struct xe_vm *vm, struct xe_vma *vma,
+                           struct xe_tile *tile, u64 fault_addr,
+                           bool atomic);
  #else
  static inline
  int xe_svm_init(struct xe_vm *vm)
@@ -34,6 +57,23 @@ static inline
  void xe_svm_close(struct xe_vm *vm)
  {
  }
+
+static inline
+int xe_svm_handle_pagefault(struct xe_vm *vm, struct xe_vma *vma,
+                           struct xe_tile *tile, u64 fault_addr,
+                           bool atomic)
+{
+       return 0;
+}
  #endif
  
+#define xe_svm_assert_in_notifier(vm__) \
+       lockdep_assert_held_write(&(vm__)->svm.gpusvm.notifier_lock)
+
+#define xe_svm_notifier_lock(vm__)     \
+       drm_gpusvm_notifier_lock(&(vm__)->svm.gpusvm)
+
+#define xe_svm_notifier_unlock(vm__)   \
+       drm_gpusvm_notifier_unlock(&(vm__)->svm.gpusvm)
+
  #endif
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c

index 8a6416deffa2f047b75ce94b83913534666eb9e6..eb233a2b2f8835a6e7eb66b57b6b7f4e7414b133 100644 (file)
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -1623,6 +1623,8 @@ static void xe_vm_close(struct xe_vm *vm)
         bound = drm_dev_enter(&xe->drm, &idx);
  
         down_write(&vm->lock);
+       if (xe_vm_in_fault_mode(vm))
+               xe_svm_notifier_lock(vm);
  
         vm->size = 0;
  
@@ -1646,6 +1648,8 @@ static void xe_vm_close(struct xe_vm *vm)
                 }
         }
  
+       if (xe_vm_in_fault_mode(vm))
+               xe_svm_notifier_unlock(vm);
         up_write(&vm->lock);
  
         if (bound)
author	Matthew Brost <matthew.brost@intel.com>
	Thu, 6 Mar 2025 01:26:37 +0000 (17:26 -0800)
committer	Matthew Brost <matthew.brost@intel.com>
	Thu, 6 Mar 2025 19:35:40 +0000 (11:35 -0800)
drivers/gpu/drm/xe/xe_gt_pagefault.c		patch \| blob \| blame \| history
drivers/gpu/drm/xe/xe_pt.c		patch \| blob \| blame \| history
drivers/gpu/drm/xe/xe_pt.h		patch \| blob \| blame \| history
drivers/gpu/drm/xe/xe_svm.c		patch \| blob \| blame \| history
drivers/gpu/drm/xe/xe_svm.h		patch \| blob \| blame \| history
drivers/gpu/drm/xe/xe_vm.c		patch \| blob \| blame \| history