From: Alice Ryhl Date: Thu, 8 Jan 2026 16:07:31 +0000 (+0000) Subject: drm/gpuvm: take GEM lock inside drm_gpuvm_bo_obtain_prealloc() X-Git-Tag: v6.19-rc6~20^2^2~8 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=9ce4aef9a5b1b76207152ba019f838f62dff97b8;p=thirdparty%2Flinux.git drm/gpuvm: take GEM lock inside drm_gpuvm_bo_obtain_prealloc() When calling drm_gpuvm_bo_obtain_prealloc() and using immediate mode, this may result in a call to ops->vm_bo_free(vm_bo) while holding the GEMs gpuva mutex. This is a problem if ops->vm_bo_free(vm_bo) performs any operations that are not safe in the fence signalling critical path, and it turns out that Panthor (the only current user of the method) calls drm_gem_shmem_unpin() which takes a resv lock internally. This constitutes both a violation of signalling safety and lock inversion. To fix this, we modify the method to internally take the GEMs gpuva mutex so that the mutex can be unlocked before freeing the preallocated vm_bo. Note that this modification introduces a requirement that the driver uses immediate mode to call drm_gpuvm_bo_obtain_prealloc() as it would otherwise take the wrong lock. Fixes: 63e919a31625 ("panthor: use drm_gpuva_unlink_defer()") Reviewed-by: Boris Brezillon Signed-off-by: Alice Ryhl Link: https://patch.msgid.link/20260108-gpuvm-rust-v2-1-dbd014005a0b@google.com Signed-off-by: Danilo Krummrich --- diff --git a/drivers/gpu/drm/drm_gpuvm.c b/drivers/gpu/drm/drm_gpuvm.c index 8a06d296561d..0de47e83d84d 100644 --- a/drivers/gpu/drm/drm_gpuvm.c +++ b/drivers/gpu/drm/drm_gpuvm.c @@ -1602,14 +1602,48 @@ drm_gpuvm_bo_create(struct drm_gpuvm *gpuvm, } EXPORT_SYMBOL_GPL(drm_gpuvm_bo_create); +/* + * drm_gpuvm_bo_destroy_not_in_lists() - final part of drm_gpuvm_bo cleanup + * @vm_bo: the &drm_gpuvm_bo to destroy + * + * It is illegal to call this method if the @vm_bo is present in the GEMs gpuva + * list, the extobj list, or the evicted list. + * + * Note that this puts a refcount on the GEM object, which may destroy the GEM + * object if the refcount reaches zero. It's illegal for this to happen if the + * caller holds the GEMs gpuva mutex because it would free the mutex. + */ +static void +drm_gpuvm_bo_destroy_not_in_lists(struct drm_gpuvm_bo *vm_bo) +{ + struct drm_gpuvm *gpuvm = vm_bo->vm; + const struct drm_gpuvm_ops *ops = gpuvm->ops; + struct drm_gem_object *obj = vm_bo->obj; + + if (ops && ops->vm_bo_free) + ops->vm_bo_free(vm_bo); + else + kfree(vm_bo); + + drm_gpuvm_put(gpuvm); + drm_gem_object_put(obj); +} + +static void +drm_gpuvm_bo_destroy_not_in_lists_kref(struct kref *kref) +{ + struct drm_gpuvm_bo *vm_bo = container_of(kref, struct drm_gpuvm_bo, + kref); + + drm_gpuvm_bo_destroy_not_in_lists(vm_bo); +} + static void drm_gpuvm_bo_destroy(struct kref *kref) { struct drm_gpuvm_bo *vm_bo = container_of(kref, struct drm_gpuvm_bo, kref); struct drm_gpuvm *gpuvm = vm_bo->vm; - const struct drm_gpuvm_ops *ops = gpuvm->ops; - struct drm_gem_object *obj = vm_bo->obj; bool lock = !drm_gpuvm_resv_protected(gpuvm); if (!lock) @@ -1618,16 +1652,10 @@ drm_gpuvm_bo_destroy(struct kref *kref) drm_gpuvm_bo_list_del(vm_bo, extobj, lock); drm_gpuvm_bo_list_del(vm_bo, evict, lock); - drm_gem_gpuva_assert_lock_held(gpuvm, obj); + drm_gem_gpuva_assert_lock_held(gpuvm, vm_bo->obj); list_del(&vm_bo->list.entry.gem); - if (ops && ops->vm_bo_free) - ops->vm_bo_free(vm_bo); - else - kfree(vm_bo); - - drm_gpuvm_put(gpuvm); - drm_gem_object_put(obj); + drm_gpuvm_bo_destroy_not_in_lists(vm_bo); } /** @@ -1745,9 +1773,7 @@ EXPORT_SYMBOL_GPL(drm_gpuvm_bo_put_deferred); void drm_gpuvm_bo_deferred_cleanup(struct drm_gpuvm *gpuvm) { - const struct drm_gpuvm_ops *ops = gpuvm->ops; struct drm_gpuvm_bo *vm_bo; - struct drm_gem_object *obj; struct llist_node *bo_defer; bo_defer = llist_del_all(&gpuvm->bo_defer); @@ -1766,14 +1792,7 @@ drm_gpuvm_bo_deferred_cleanup(struct drm_gpuvm *gpuvm) while (bo_defer) { vm_bo = llist_entry(bo_defer, struct drm_gpuvm_bo, list.entry.bo_defer); bo_defer = bo_defer->next; - obj = vm_bo->obj; - if (ops && ops->vm_bo_free) - ops->vm_bo_free(vm_bo); - else - kfree(vm_bo); - - drm_gpuvm_put(gpuvm); - drm_gem_object_put(obj); + drm_gpuvm_bo_destroy_not_in_lists(vm_bo); } } EXPORT_SYMBOL_GPL(drm_gpuvm_bo_deferred_cleanup); @@ -1861,6 +1880,9 @@ EXPORT_SYMBOL_GPL(drm_gpuvm_bo_obtain); * count is decreased. If not found @__vm_bo is returned without further * increase of the reference count. * + * The provided @__vm_bo must not already be in the gpuva, evict, or extobj + * lists prior to calling this method. + * * A new &drm_gpuvm_bo is added to the GEMs gpuva list. * * Returns: a pointer to the found &drm_gpuvm_bo or @__vm_bo if no existing @@ -1873,14 +1895,19 @@ drm_gpuvm_bo_obtain_prealloc(struct drm_gpuvm_bo *__vm_bo) struct drm_gem_object *obj = __vm_bo->obj; struct drm_gpuvm_bo *vm_bo; + drm_WARN_ON(gpuvm->drm, !drm_gpuvm_immediate_mode(gpuvm)); + + mutex_lock(&obj->gpuva.lock); vm_bo = drm_gpuvm_bo_find(gpuvm, obj); if (vm_bo) { - drm_gpuvm_bo_put(__vm_bo); + mutex_unlock(&obj->gpuva.lock); + kref_put(&__vm_bo->kref, drm_gpuvm_bo_destroy_not_in_lists_kref); return vm_bo; } drm_gem_gpuva_assert_lock_held(gpuvm, obj); list_add_tail(&__vm_bo->list.entry.gem, &obj->gpuva.list); + mutex_unlock(&obj->gpuva.lock); return __vm_bo; } diff --git a/drivers/gpu/drm/panthor/panthor_mmu.c b/drivers/gpu/drm/panthor/panthor_mmu.c index d4839d282689..f6339963e496 100644 --- a/drivers/gpu/drm/panthor/panthor_mmu.c +++ b/drivers/gpu/drm/panthor/panthor_mmu.c @@ -1252,17 +1252,7 @@ static int panthor_vm_prepare_map_op_ctx(struct panthor_vm_op_ctx *op_ctx, goto err_cleanup; } - /* drm_gpuvm_bo_obtain_prealloc() will call drm_gpuvm_bo_put() on our - * pre-allocated BO if the association exists. Given we - * only have one ref on preallocated_vm_bo, drm_gpuvm_bo_destroy() will - * be called immediately, and we have to hold the VM resv lock when - * calling this function. - */ - dma_resv_lock(panthor_vm_resv(vm), NULL); - mutex_lock(&bo->base.base.gpuva.lock); op_ctx->map.vm_bo = drm_gpuvm_bo_obtain_prealloc(preallocated_vm_bo); - mutex_unlock(&bo->base.base.gpuva.lock); - dma_resv_unlock(panthor_vm_resv(vm)); op_ctx->map.bo_offset = offset;