]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
drm/amdkfd: Output migrate end event if migrate failed
authorPhilip Yang <Philip.Yang@amd.com>
Fri, 16 Feb 2024 16:41:16 +0000 (11:41 -0500)
committerAlex Deucher <alexander.deucher@amd.com>
Mon, 7 Oct 2024 18:09:02 +0000 (14:09 -0400)
If page migration failed, also output migrate end event to match with
migrate start event, with failure error_code added to the end of the
migrate message macro. This will not break uAPI because application uses
old message macro sscanf drop and ignore the error_code.

Output GPU page fault restore end event if migration failed.

Signed-off-by: Philip Yang <Philip.Yang@amd.com>
Reviewed-by: James Zhu <James.Zhu@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h
drivers/gpu/drm/amd/amdkfd/kfd_svm.c
include/uapi/linux/kfd_ioctl.h

index 8ee3d07ffbdfa243623a93fa4f7aeb56ba02334f..eacfeb32f35d6612e1afb5919f1cdc43c1b77d44 100644 (file)
@@ -445,14 +445,13 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct svm_range *prange,
        pr_debug("successful/cpages/npages 0x%lx/0x%lx/0x%lx\n",
                         mpages, cpages, migrate.npages);
 
-       kfd_smi_event_migration_end(node, p->lead_thread->pid,
-                                   start >> PAGE_SHIFT, end >> PAGE_SHIFT,
-                                   0, node->id, trigger);
-
        svm_range_dma_unmap_dev(adev->dev, scratch, 0, npages);
 
 out_free:
        kvfree(buf);
+       kfd_smi_event_migration_end(node, p->lead_thread->pid,
+                                   start >> PAGE_SHIFT, end >> PAGE_SHIFT,
+                                   0, node->id, trigger, r);
 out:
        if (!r && mpages) {
                pdd = svm_range_get_pdd_by_node(prange, node);
@@ -751,14 +750,13 @@ svm_migrate_vma_to_ram(struct kfd_node *node, struct svm_range *prange,
        svm_migrate_copy_done(adev, mfence);
        migrate_vma_finalize(&migrate);
 
-       kfd_smi_event_migration_end(node, p->lead_thread->pid,
-                                   start >> PAGE_SHIFT, end >> PAGE_SHIFT,
-                                   node->id, 0, trigger);
-
        svm_range_dma_unmap_dev(adev->dev, scratch, 0, npages);
 
 out_free:
        kvfree(buf);
+       kfd_smi_event_migration_end(node, p->lead_thread->pid,
+                                   start >> PAGE_SHIFT, end >> PAGE_SHIFT,
+                                   node->id, 0, trigger, r);
 out:
        if (!r && cpages) {
                mpages = cpages - upages;
index de8b9abf7afcf35f227c4ddd5c8bc5b933a3155a..1d94b445a060dda85ff9c7955c97e69d4d428bd8 100644 (file)
@@ -292,12 +292,13 @@ void kfd_smi_event_migration_start(struct kfd_node *node, pid_t pid,
 
 void kfd_smi_event_migration_end(struct kfd_node *node, pid_t pid,
                                 unsigned long start, unsigned long end,
-                                uint32_t from, uint32_t to, uint32_t trigger)
+                                uint32_t from, uint32_t to, uint32_t trigger,
+                                int error_code)
 {
        kfd_smi_event_add(pid, node, KFD_SMI_EVENT_MIGRATE_END,
                          KFD_EVENT_FMT_MIGRATE_END(
                          ktime_get_boottime_ns(), pid, start, end - start,
-                         from, to, trigger));
+                         from, to, trigger, error_code));
 }
 
 void kfd_smi_event_queue_eviction(struct kfd_node *node, pid_t pid,
index 85010b8307f885cb17e4a2f0fb735bb368fd83af..503bff13d815336416689f8f0c6d8d59848df9cb 100644 (file)
@@ -44,7 +44,8 @@ void kfd_smi_event_migration_start(struct kfd_node *node, pid_t pid,
                             uint32_t trigger);
 void kfd_smi_event_migration_end(struct kfd_node *node, pid_t pid,
                             unsigned long start, unsigned long end,
-                            uint32_t from, uint32_t to, uint32_t trigger);
+                            uint32_t from, uint32_t to, uint32_t trigger,
+                            int error_code);
 void kfd_smi_event_queue_eviction(struct kfd_node *node, pid_t pid,
                                  uint32_t trigger);
 void kfd_smi_event_queue_restore(struct kfd_node *node, pid_t pid);
index 04e746923697459956af019b2d1bbd0ab9fdf4a8..857ec6f23bba1b9c03856c97d31f70bbb68f6a73 100644 (file)
@@ -3085,8 +3085,6 @@ retry_write_locked:
        start = max_t(unsigned long, ALIGN_DOWN(addr, size), prange->start);
        last = min_t(unsigned long, ALIGN(addr + 1, size) - 1, prange->last);
        if (prange->actual_loc != 0 || best_loc != 0) {
-               migration = true;
-
                if (best_loc) {
                        r = svm_migrate_to_vram(prange, best_loc, start, last,
                                        mm, KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU);
@@ -3109,7 +3107,9 @@ retry_write_locked:
                if (r) {
                        pr_debug("failed %d to migrate svms %p [0x%lx 0x%lx]\n",
                                 r, svms, start, last);
-                       goto out_unlock_range;
+                       goto out_migrate_fail;
+               } else {
+                       migration = true;
                }
        }
 
@@ -3119,6 +3119,7 @@ retry_write_locked:
                pr_debug("failed %d to map svms 0x%p [0x%lx 0x%lx] to gpus\n",
                         r, svms, start, last);
 
+out_migrate_fail:
        kfd_smi_event_page_fault_end(node, p->lead_thread->pid, addr,
                                     migration);
 
index 717307d6b5b74ce26e169eec07ff7590001303db..fa9f9846b88e4da137138e43aee854057fae93a7 100644 (file)
@@ -609,6 +609,7 @@ struct kfd_ioctl_smi_events_args {
  *    migrate_update: GPU page fault is recovered by 'M' for migrate, 'U' for update
  *    rw: 'W' for write page fault, 'R' for read page fault
  *    rescheduled: 'R' if the queue restore failed and rescheduled to try again
+ *    error_code: migrate failure error code, 0 if no error
  */
 #define KFD_EVENT_FMT_UPDATE_GPU_RESET(reset_seq_num, reset_cause)\
                "%x %s\n", (reset_seq_num), (reset_cause)
@@ -630,9 +631,9 @@ struct kfd_ioctl_smi_events_args {
                "%lld -%d @%lx(%lx) %x->%x %x:%x %d\n", (ns), (pid), (start), (size),\
                (from), (to), (prefetch_loc), (preferred_loc), (migrate_trigger)
 
-#define KFD_EVENT_FMT_MIGRATE_END(ns, pid, start, size, from, to, migrate_trigger)\
-               "%lld -%d @%lx(%lx) %x->%x %d\n", (ns), (pid), (start), (size),\
-               (from), (to), (migrate_trigger)
+#define KFD_EVENT_FMT_MIGRATE_END(ns, pid, start, size, from, to, migrate_trigger, error_code) \
+               "%lld -%d @%lx(%lx) %x->%x %d %d\n", (ns), (pid), (start), (size),\
+               (from), (to), (migrate_trigger), (error_code)
 
 #define KFD_EVENT_FMT_QUEUE_EVICTION(ns, pid, node, evict_trigger)\
                "%lld -%d %x %d\n", (ns), (pid), (node), (evict_trigger)