From 3bebfd53af0f7c8ea55094ba7b8b8b907024bb7b Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Sun, 29 Jun 2025 13:13:24 -0700 Subject: [PATCH] drm/msm: Defer VMA unmap for fb unpins With the conversion to drm_gpuvm, we lost the lazy VMA cleanup, which means that fb cleanup/unpin when pageflipping to new scanout buffers immediately unmaps the scanout buffer. This is costly (with tlbinv, it can be 4-6ms for a 1080p scanout buffer, and more for higher resolutions)! To avoid this, introduce a vma_ref, which is incremented whenever userspace has a GEM handle or dma-buf fd. When unpinning if the vm is the kms->vm we defer tearing down the VMA until the vma_ref drops to zero. If the buffer is still part of a flip-chain then userspace will be holding some sort of reference to the BO, either via a GEM handle and/or dma-buf fd. So this avoids unmapping the VMA when there is a strong possibility that it will be needed again. Signed-off-by: Rob Clark Tested-by: Antonino Maniscalco Reviewed-by: Antonino Maniscalco Patchwork: https://patchwork.freedesktop.org/patch/661538/ --- drivers/gpu/drm/msm/msm_drv.c | 1 + drivers/gpu/drm/msm/msm_drv.h | 1 + drivers/gpu/drm/msm/msm_fb.c | 5 ++- drivers/gpu/drm/msm/msm_gem.c | 60 ++++++++++++++++++----------- drivers/gpu/drm/msm/msm_gem.h | 28 ++++++++++++++ drivers/gpu/drm/msm/msm_gem_prime.c | 54 +++++++++++++++++++++++++- 6 files changed, 123 insertions(+), 26 deletions(-) diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c index d0f80267f4ba7..2283a377cda14 100644 --- a/drivers/gpu/drm/msm/msm_drv.c +++ b/drivers/gpu/drm/msm/msm_drv.c @@ -837,6 +837,7 @@ static const struct drm_driver msm_driver = { .postclose = msm_postclose, .dumb_create = msm_gem_dumb_create, .dumb_map_offset = msm_gem_dumb_map_offset, + .gem_prime_import = msm_gem_prime_import, .gem_prime_import_sg_table = msm_gem_prime_import_sg_table, #ifdef CONFIG_DEBUG_FS .debugfs_init = msm_debugfs_init, diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h index 200c3135bbf9c..2b49c4b800eef 100644 --- a/drivers/gpu/drm/msm/msm_drv.h +++ b/drivers/gpu/drm/msm/msm_drv.h @@ -269,6 +269,7 @@ void msm_gem_shrinker_cleanup(struct drm_device *dev); struct sg_table *msm_gem_prime_get_sg_table(struct drm_gem_object *obj); int msm_gem_prime_vmap(struct drm_gem_object *obj, struct iosys_map *map); void msm_gem_prime_vunmap(struct drm_gem_object *obj, struct iosys_map *map); +struct drm_gem_object *msm_gem_prime_import(struct drm_device *dev, struct dma_buf *buf); struct drm_gem_object *msm_gem_prime_import_sg_table(struct drm_device *dev, struct dma_buf_attachment *attach, struct sg_table *sg); struct dma_buf *msm_gem_prime_export(struct drm_gem_object *obj, int flags); diff --git a/drivers/gpu/drm/msm/msm_fb.c b/drivers/gpu/drm/msm/msm_fb.c index 8ae2f326ec54f..bc7c2bb8f01e6 100644 --- a/drivers/gpu/drm/msm/msm_fb.c +++ b/drivers/gpu/drm/msm/msm_fb.c @@ -89,6 +89,7 @@ int msm_framebuffer_prepare(struct drm_framebuffer *fb, bool needs_dirtyfb) return 0; for (i = 0; i < n; i++) { + msm_gem_vma_get(fb->obj[i]); ret = msm_gem_get_and_pin_iova(fb->obj[i], vm, &msm_fb->iova[i]); drm_dbg_state(fb->dev, "FB[%u]: iova[%d]: %08llx (%d)\n", fb->base.id, i, msm_fb->iova[i], ret); @@ -114,8 +115,10 @@ void msm_framebuffer_cleanup(struct drm_framebuffer *fb, bool needed_dirtyfb) memset(msm_fb->iova, 0, sizeof(msm_fb->iova)); - for (i = 0; i < n; i++) + for (i = 0; i < n; i++) { msm_gem_unpin_iova(fb->obj[i], vm); + msm_gem_vma_put(fb->obj[i]); + } } uint32_t msm_framebuffer_iova(struct drm_framebuffer *fb, int plane) diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index 3e87d27dfcb60..33d3354c61020 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -19,6 +19,7 @@ #include "msm_drv.h" #include "msm_gem.h" #include "msm_gpu.h" +#include "msm_kms.h" static void update_device_mem(struct msm_drm_private *priv, ssize_t size) { @@ -39,6 +40,7 @@ static void update_ctx_mem(struct drm_file *file, ssize_t size) static int msm_gem_open(struct drm_gem_object *obj, struct drm_file *file) { + msm_gem_vma_get(obj); update_ctx_mem(file, obj->size); return 0; } @@ -46,33 +48,13 @@ static int msm_gem_open(struct drm_gem_object *obj, struct drm_file *file) static void put_iova_spaces(struct drm_gem_object *obj, struct drm_gpuvm *vm, bool close, const char *reason); -static void detach_vm(struct drm_gem_object *obj, struct drm_gpuvm *vm) -{ - msm_gem_assert_locked(obj); - drm_gpuvm_resv_assert_held(vm); - - struct drm_gpuvm_bo *vm_bo = drm_gpuvm_bo_find(vm, obj); - if (vm_bo) { - struct drm_gpuva *vma; - - drm_gpuvm_bo_for_each_va (vma, vm_bo) { - if (vma->vm != vm) - continue; - msm_gem_vma_unmap(vma, "detach"); - msm_gem_vma_close(vma); - break; - } - - drm_gpuvm_bo_put(vm_bo); - } -} - static void msm_gem_close(struct drm_gem_object *obj, struct drm_file *file) { struct msm_context *ctx = file->driver_priv; struct drm_exec exec; update_ctx_mem(file, -obj->size); + msm_gem_vma_put(obj); /* * If VM isn't created yet, nothing to cleanup. And in fact calling @@ -99,7 +81,31 @@ static void msm_gem_close(struct drm_gem_object *obj, struct drm_file *file) msm_gem_lock_vm_and_obj(&exec, obj, ctx->vm); put_iova_spaces(obj, ctx->vm, true, "close"); - detach_vm(obj, ctx->vm); + drm_exec_fini(&exec); /* drop locks */ +} + +/* + * Get/put for kms->vm VMA + */ + +void msm_gem_vma_get(struct drm_gem_object *obj) +{ + atomic_inc(&to_msm_bo(obj)->vma_ref); +} + +void msm_gem_vma_put(struct drm_gem_object *obj) +{ + struct msm_drm_private *priv = obj->dev->dev_private; + struct drm_exec exec; + + if (atomic_dec_return(&to_msm_bo(obj)->vma_ref)) + return; + + if (!priv->kms) + return; + + msm_gem_lock_vm_and_obj(&exec, obj, priv->kms->vm); + put_iova_spaces(obj, priv->kms->vm, true, "vma_put"); drm_exec_fini(&exec); /* drop locks */ } @@ -656,6 +662,13 @@ int msm_gem_set_iova(struct drm_gem_object *obj, return ret; } +static bool is_kms_vm(struct drm_gpuvm *vm) +{ + struct msm_drm_private *priv = vm->drm->dev_private; + + return priv->kms && (priv->kms->vm == vm); +} + /* * Unpin a iova by updating the reference counts. The memory isn't actually * purged until something else (shrinker, mm_notifier, destroy, etc) decides @@ -671,7 +684,8 @@ void msm_gem_unpin_iova(struct drm_gem_object *obj, struct drm_gpuvm *vm) if (vma) { msm_gem_unpin_locked(obj); } - detach_vm(obj, vm); + if (!is_kms_vm(vm)) + put_iova_spaces(obj, vm, true, "close"); drm_exec_fini(&exec); /* drop locks */ } diff --git a/drivers/gpu/drm/msm/msm_gem.h b/drivers/gpu/drm/msm/msm_gem.h index 1ce97f8a30bbb..5c0c59e4835cf 100644 --- a/drivers/gpu/drm/msm/msm_gem.h +++ b/drivers/gpu/drm/msm/msm_gem.h @@ -211,9 +211,37 @@ struct msm_gem_object { * Protected by LRU lock. */ int pin_count; + + /** + * @vma_ref: Reference count of VMA users. + * + * With the vm_bo/vma holding a reference to the GEM object, we'd + * otherwise have to actively tear down a VMA when, for example, + * a buffer is unpinned for scanout, vs. the pre-drm_gpuvm approach + * where a VMA did not hold a reference to the BO, but instead was + * implicitly torn down when the BO was freed. + * + * To regain the lazy VMA teardown, we use the @vma_ref. It is + * incremented for any of the following: + * + * 1) the BO is exported as a dma_buf + * 2) the BO has open userspace handle + * + * All of those conditions will hold an reference to the BO, + * preventing it from being freed. So lazily keeping around the + * VMA will not prevent the BO from being freed. (Or rather, the + * reference loop is harmless in this case.) + * + * When the @vma_ref drops to zero, then kms->vm VMA will be + * torn down. + */ + atomic_t vma_ref; }; #define to_msm_bo(x) container_of(x, struct msm_gem_object, base) +void msm_gem_vma_get(struct drm_gem_object *obj); +void msm_gem_vma_put(struct drm_gem_object *obj); + uint64_t msm_gem_mmap_offset(struct drm_gem_object *obj); int msm_gem_prot(struct drm_gem_object *obj); int msm_gem_pin_vma_locked(struct drm_gem_object *obj, struct drm_gpuva *vma); diff --git a/drivers/gpu/drm/msm/msm_gem_prime.c b/drivers/gpu/drm/msm/msm_gem_prime.c index 4d93f2daeeaa0..c0a33ac839cb6 100644 --- a/drivers/gpu/drm/msm/msm_gem_prime.c +++ b/drivers/gpu/drm/msm/msm_gem_prime.c @@ -6,6 +6,7 @@ #include +#include #include #include "msm_drv.h" @@ -42,19 +43,68 @@ void msm_gem_prime_vunmap(struct drm_gem_object *obj, struct iosys_map *map) msm_gem_put_vaddr_locked(obj); } +static void msm_gem_dmabuf_release(struct dma_buf *dma_buf) +{ + struct drm_gem_object *obj = dma_buf->priv; + + msm_gem_vma_put(obj); + drm_gem_dmabuf_release(dma_buf); +} + +static const struct dma_buf_ops msm_gem_prime_dmabuf_ops = { + .attach = drm_gem_map_attach, + .detach = drm_gem_map_detach, + .map_dma_buf = drm_gem_map_dma_buf, + .unmap_dma_buf = drm_gem_unmap_dma_buf, + .release = msm_gem_dmabuf_release, + .mmap = drm_gem_dmabuf_mmap, + .vmap = drm_gem_dmabuf_vmap, + .vunmap = drm_gem_dmabuf_vunmap, +}; + +struct drm_gem_object *msm_gem_prime_import(struct drm_device *dev, + struct dma_buf *buf) +{ + if (buf->ops == &msm_gem_prime_dmabuf_ops) { + struct drm_gem_object *obj = buf->priv; + if (obj->dev == dev) { + /* + * Importing dmabuf exported from our own gem increases + * refcount on gem itself instead of f_count of dmabuf. + */ + drm_gem_object_get(obj); + return obj; + } + } + + return drm_gem_prime_import(dev, buf); +} + struct drm_gem_object *msm_gem_prime_import_sg_table(struct drm_device *dev, struct dma_buf_attachment *attach, struct sg_table *sg) { return msm_gem_import(dev, attach->dmabuf, sg); } - struct dma_buf *msm_gem_prime_export(struct drm_gem_object *obj, int flags) { if (to_msm_bo(obj)->flags & MSM_BO_NO_SHARE) return ERR_PTR(-EPERM); - return drm_gem_prime_export(obj, flags); + msm_gem_vma_get(obj); + + struct drm_device *dev = obj->dev; + struct dma_buf_export_info exp_info = { + .exp_name = KBUILD_MODNAME, /* white lie for debug */ + .owner = dev->driver->fops->owner, + .ops = &msm_gem_prime_dmabuf_ops, + .size = obj->size, + .flags = flags, + .priv = obj, + .resv = obj->resv, + }; + + return drm_gem_dmabuf_export(dev, &exp_info); } int msm_gem_prime_pin(struct drm_gem_object *obj) -- 2.47.2