drm/xe/vf: Avoid indefinite blocking in preempt rebind worker for VFs supporting...

author Matthew Brost <matthew.brost@intel.com>

Wed, 8 Oct 2025 21:45:16 +0000 (14:45 -0700)

committer Matthew Brost <matthew.brost@intel.com>

Thu, 9 Oct 2025 10:22:41 +0000 (03:22 -0700)
author Matthew Brost <matthew.brost@intel.com>
Wed, 8 Oct 2025 21:45:16 +0000 (14:45 -0700)
committer Matthew Brost <matthew.brost@intel.com>
Thu, 9 Oct 2025 10:22:41 +0000 (03:22 -0700)
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c

index 4e914928e0a976237931ef11f159c00e305b69b9..faca626702b86adbd92d896bd81a16c2aa978c3e 100644 (file)
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -35,6 +35,7 @@
  #include "xe_pt.h"
  #include "xe_pxp.h"
  #include "xe_res_cursor.h"
+#include "xe_sriov_vf.h"
  #include "xe_svm.h"
  #include "xe_sync.h"
  #include "xe_tile.h"
@@ -111,12 +112,22 @@ static int alloc_preempt_fences(struct xe_vm *vm, struct list_head *list,
  static int wait_for_existing_preempt_fences(struct xe_vm *vm)
  {
         struct xe_exec_queue *q;
+       bool vf_migration = IS_SRIOV_VF(vm->xe) &&
+               xe_sriov_vf_migration_supported(vm->xe);
+       signed long wait_time = vf_migration ? HZ / 5 : MAX_SCHEDULE_TIMEOUT;
  
         xe_vm_assert_held(vm);
  
         list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) {
                 if (q->lr.pfence) {
-                       long timeout = dma_fence_wait(q->lr.pfence, false);
+                       long timeout;
+
+                       timeout = dma_fence_wait_timeout(q->lr.pfence, false,
+                                                        wait_time);
+                       if (!timeout) {
+                               xe_assert(vm->xe, vf_migration);
+                               return -EAGAIN;
+                       }
  
                         /* Only -ETIME on fence indicates VM needs to be killed */
                         if (timeout < 0 || q->lr.pfence->error == -ETIME)
@@ -541,6 +552,19 @@ out_unlock:
  out_unlock_outer:
         if (err == -EAGAIN) {
                 trace_xe_vm_rebind_worker_retry(vm);
+
+               /*
+                * We can't block in workers on a VF which supports migration
+                * given this can block the VF post-migration workers from
+                * getting scheduled.
+                */
+               if (IS_SRIOV_VF(vm->xe) &&
+                   xe_sriov_vf_migration_supported(vm->xe)) {
+                       up_write(&vm->lock);
+                       xe_vm_queue_rebind_worker(vm);
+                       return;
+               }
+
                 goto retry;
         }
author	Matthew Brost <matthew.brost@intel.com>
	Wed, 8 Oct 2025 21:45:16 +0000 (14:45 -0700)
committer	Matthew Brost <matthew.brost@intel.com>
	Thu, 9 Oct 2025 10:22:41 +0000 (03:22 -0700)