]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
drm/panthor: Make panthor_vm_[un]map_pages() more robust
authorBoris Brezillon <boris.brezillon@collabora.com>
Fri, 28 Nov 2025 08:48:39 +0000 (09:48 +0100)
committerBoris Brezillon <boris.brezillon@collabora.com>
Fri, 28 Nov 2025 09:17:44 +0000 (10:17 +0100)
There's no reason for panthor_vm_[un]map_pages() to fail unless the
drm_gpuvm state and the page table are out of sync, so let's reflect that
by making panthor_vm_unmap_pages() a void function and adding
WARN_ON()s in various places. We also try to recover from those
unexpected mismatch by checking for already unmapped ranges and skipping
them. But there's only so much we can do to try and cope with such
SW bugs, so when we see a mismatch, we flag the VM unusable and disable
the AS to avoid further GPU accesses to the memory.

It could be that the as_disable() call fails because the MMU unit is
stuck, in which case the whole GPU is frozen, and only a GPU reset can
unblock things. Ater the reset, the VM will be seen as unusable and
any attempt to re-use it will fail, so we should be covered for any
use-after-unmap issues.

v2:
- Fix double unlock

v3:
- Collect R-b

v4:
- No changes

Reviewed-by: Steven Price <steven.price@arm.com>
Link: https://patch.msgid.link/20251128084841.3804658-6-boris.brezillon@collabora.com
Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
drivers/gpu/drm/panthor/panthor_mmu.c

index f39e6e799c74d7dd103fb43e9e54c6b587b459b5..8ba5259e3d288bd85a6e6e659c1375cff3a73652 100644 (file)
@@ -842,13 +842,33 @@ static size_t get_pgsize(u64 addr, size_t size, size_t *count)
        return SZ_2M;
 }
 
-static int panthor_vm_unmap_pages(struct panthor_vm *vm, u64 iova, u64 size)
+static void panthor_vm_declare_unusable(struct panthor_vm *vm)
+{
+       struct panthor_device *ptdev = vm->ptdev;
+       int cookie;
+
+       if (vm->unusable)
+               return;
+
+       vm->unusable = true;
+       mutex_lock(&ptdev->mmu->as.slots_lock);
+       if (vm->as.id >= 0 && drm_dev_enter(&ptdev->base, &cookie)) {
+               panthor_mmu_as_disable(ptdev, vm->as.id);
+               drm_dev_exit(cookie);
+       }
+       mutex_unlock(&ptdev->mmu->as.slots_lock);
+}
+
+static void panthor_vm_unmap_pages(struct panthor_vm *vm, u64 iova, u64 size)
 {
        struct panthor_device *ptdev = vm->ptdev;
        struct io_pgtable_ops *ops = vm->pgtbl_ops;
        u64 start_iova = iova;
        u64 offset = 0;
 
+       if (!size)
+               return;
+
        drm_WARN_ON(&ptdev->base,
                    (iova < vm->locked_region.start) ||
                    (iova + size > vm->locked_region.start + vm->locked_region.size));
@@ -858,13 +878,28 @@ static int panthor_vm_unmap_pages(struct panthor_vm *vm, u64 iova, u64 size)
                size_t pgsize = get_pgsize(iova + offset, size - offset, &pgcount);
 
                unmapped_sz = ops->unmap_pages(ops, iova + offset, pgsize, pgcount, NULL);
+               if (drm_WARN_ON_ONCE(&ptdev->base, unmapped_sz != pgsize * pgcount)) {
+                       /* Gracefully handle sparsely unmapped regions to avoid leaving
+                        * page table pages behind when the drm_gpuvm and VM page table
+                        * are out-of-sync. This is not supposed to happen, hence the
+                        * above WARN_ON().
+                        */
+                       while (!ops->iova_to_phys(ops, iova + unmapped_sz) &&
+                              unmapped_sz < pgsize * pgcount)
+                               unmapped_sz += SZ_4K;
+
+                       /* We're passed the point where we can try to fix things,
+                        * so flag the VM unusable to make sure it's not going
+                        * to be used anymore.
+                        */
+                       panthor_vm_declare_unusable(vm);
 
-               if (drm_WARN_ON(&ptdev->base, unmapped_sz != pgsize * pgcount)) {
-                       drm_err(&ptdev->base, "failed to unmap range %llx-%llx (requested range %llx-%llx)\n",
-                               iova + offset + unmapped_sz,
-                               iova + offset + pgsize * pgcount,
-                               iova, iova + size);
-                       return  -EINVAL;
+                       /* If we don't make progress, we're screwed. That also means
+                        * something else prevents us from unmapping the region, but
+                        * there's not much we can do here: time for debugging.
+                        */
+                       if (drm_WARN_ON_ONCE(&ptdev->base, !unmapped_sz))
+                               return;
                }
 
                drm_dbg(&ptdev->base,
@@ -874,8 +909,6 @@ static int panthor_vm_unmap_pages(struct panthor_vm *vm, u64 iova, u64 size)
 
                offset += unmapped_sz;
        }
-
-       return 0;
 }
 
 static int
@@ -927,16 +960,17 @@ panthor_vm_map_pages(struct panthor_vm *vm, u64 iova, int prot,
                        paddr += mapped;
                        len -= mapped;
 
-                       if (drm_WARN_ON(&ptdev->base, !ret && !mapped))
+                       /* If nothing was mapped, consider it an ENOMEM. */
+                       if (!ret && !mapped)
                                ret = -ENOMEM;
 
-                       if (ret) {
-                               /* If something failed, unmap what we've already mapped before
-                                * returning. The unmap call is not supposed to fail.
+                       /* If something fails, we stop there, and flag the VM unusable. */
+                       if (drm_WARN_ON_ONCE(&ptdev->base, ret)) {
+                               /* Unmap what we've already mapped to avoid leaving page
+                                * table pages behind.
                                 */
-                               drm_WARN_ON(&ptdev->base,
-                                           panthor_vm_unmap_pages(vm, start_iova,
-                                                                  iova - start_iova));
+                               panthor_vm_unmap_pages(vm, start_iova, iova - start_iova);
+                               panthor_vm_declare_unusable(vm);
                                return ret;
                        }
                }
@@ -2120,12 +2154,9 @@ static int panthor_gpuva_sm_step_remap(struct drm_gpuva_op *op,
        struct panthor_vm_op_ctx *op_ctx = vm->op_ctx;
        struct panthor_vma *prev_vma = NULL, *next_vma = NULL;
        u64 unmap_start, unmap_range;
-       int ret;
 
        drm_gpuva_op_remap_to_unmap_range(&op->remap, &unmap_start, &unmap_range);
-       ret = panthor_vm_unmap_pages(vm, unmap_start, unmap_range);
-       if (ret)
-               return ret;
+       panthor_vm_unmap_pages(vm, unmap_start, unmap_range);
 
        if (op->remap.prev) {
                prev_vma = panthor_vm_op_ctx_get_vma(op_ctx);
@@ -2165,13 +2196,9 @@ static int panthor_gpuva_sm_step_unmap(struct drm_gpuva_op *op,
 {
        struct panthor_vma *unmap_vma = container_of(op->unmap.va, struct panthor_vma, base);
        struct panthor_vm *vm = priv;
-       int ret;
-
-       ret = panthor_vm_unmap_pages(vm, unmap_vma->base.va.addr,
-                                    unmap_vma->base.va.range);
-       if (drm_WARN_ON(&vm->ptdev->base, ret))
-               return ret;
 
+       panthor_vm_unmap_pages(vm, unmap_vma->base.va.addr,
+                              unmap_vma->base.va.range);
        drm_gpuva_unmap(&op->unmap);
        panthor_vma_unlink(vm, unmap_vma);
        return 0;
@@ -2251,7 +2278,7 @@ panthor_vm_exec_op(struct panthor_vm *vm, struct panthor_vm_op_ctx *op,
 
 out:
        if (ret && flag_vm_unusable_on_failure)
-               vm->unusable = true;
+               panthor_vm_declare_unusable(vm);
 
        vm->op_ctx = NULL;
        mutex_unlock(&vm->op_lock);