From: Max Zhen Date: Tue, 24 Mar 2026 16:31:59 +0000 (-0700) Subject: accel/amdxdna: Add per-process BO memory usage query support X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=1f513a3ec3a945efb30c963681ac32ac7b99928b;p=thirdparty%2Flinux.git accel/amdxdna: Add per-process BO memory usage query support Add support for querying per-process buffer object (BO) memory usage through the amdxdna GET_ARRAY UAPI. Introduce a new query type, DRM_AMDXDNA_BO_USAGE, along with struct amdxdna_drm_bo_usage to report BO memory usage statistics, including heap, total, and internal usage. Track BO memory usage on a per-client basis by maintaining counters in GEM open/close and heap allocation/free paths. This ensures the reported statistics reflect the current memory footprint of each process. Wire the new query into the GET_ARRAY implementation to expose the usage information to userspace. Link: https://github.com/amd/xdna-driver/commit/0546f2aaadbdacf1c3556410ecd71622044cd916 Signed-off-by: Max Zhen Reviewed-by: Lizhi Hou Reviewed-by: Mario Limonciello (AMD) Signed-off-by: Lizhi Hou Link: https://patch.msgid.link/20260324163159.2425461-1-lizhi.hou@amd.com --- diff --git a/drivers/accel/amdxdna/aie2_pci.c b/drivers/accel/amdxdna/aie2_pci.c index 9e39bfe75971b..f1ac4e00bd9f4 100644 --- a/drivers/accel/amdxdna/aie2_pci.c +++ b/drivers/accel/amdxdna/aie2_pci.c @@ -865,6 +865,7 @@ static int aie2_hwctx_status_cb(struct amdxdna_hwctx *hwctx, void *arg) tmp->command_submissions = hwctx->priv->seq; tmp->command_completions = hwctx->priv->completed; tmp->pasid = hwctx->client->pasid; + tmp->heap_usage = hwctx->client->heap_usage; tmp->priority = hwctx->qos.priority; tmp->gops = hwctx->qos.gops; tmp->fps = hwctx->qos.fps; @@ -1148,6 +1149,9 @@ static int aie2_get_array(struct amdxdna_client *client, case DRM_AMDXDNA_HW_LAST_ASYNC_ERR: ret = aie2_get_array_async_error(xdna->dev_handle, args); break; + case DRM_AMDXDNA_BO_USAGE: + ret = amdxdna_drm_get_bo_usage(&xdna->ddev, args); + break; default: XDNA_ERR(xdna, "Not supported request parameter %u", args->param); ret = -EOPNOTSUPP; diff --git a/drivers/accel/amdxdna/amdxdna_gem.c b/drivers/accel/amdxdna/amdxdna_gem.c index 27712704e42d9..238ee244d4a69 100644 --- a/drivers/accel/amdxdna/amdxdna_gem.c +++ b/drivers/accel/amdxdna/amdxdna_gem.c @@ -63,6 +63,8 @@ amdxdna_gem_heap_alloc(struct amdxdna_gem_obj *abo) goto unlock_out; } + client->heap_usage += mem->size; + drm_gem_object_get(to_gobj(heap)); unlock_out: @@ -74,16 +76,17 @@ unlock_out: static void amdxdna_gem_heap_free(struct amdxdna_gem_obj *abo) { + struct amdxdna_client *client = abo->client; struct amdxdna_gem_obj *heap; - mutex_lock(&abo->client->mm_lock); + mutex_lock(&client->mm_lock); drm_mm_remove_node(&abo->mm_node); - - heap = abo->client->dev_heap; + client->heap_usage -= abo->mem.size; + heap = client->dev_heap; drm_gem_object_put(to_gobj(heap)); - mutex_unlock(&abo->client->mm_lock); + mutex_unlock(&client->mm_lock); } static struct amdxdna_gem_obj * @@ -102,6 +105,8 @@ amdxdna_gem_create_obj(struct drm_device *dev, size_t size) abo->mem.dma_addr = AMDXDNA_INVALID_ADDR; abo->mem.uva = AMDXDNA_INVALID_ADDR; abo->mem.size = size; + abo->open_ref = 0; + abo->internal = false; INIT_LIST_HEAD(&abo->mem.umap_list); return abo; @@ -508,13 +513,55 @@ static void amdxdna_imported_obj_free(struct amdxdna_gem_obj *abo) kfree(abo); } +static inline bool +amdxdna_gem_skip_bo_usage(struct amdxdna_gem_obj *abo) +{ + /* Do not count imported BOs since the buffer is not allocated by us. */ + if (is_import_bo(abo)) + return true; + + /* Already counted as part of HEAP BO */ + if (abo->type == AMDXDNA_BO_DEV) + return true; + + return false; +} + +static void +amdxdna_gem_add_bo_usage(struct amdxdna_gem_obj *abo) +{ + struct amdxdna_client *client = abo->client; + + if (amdxdna_gem_skip_bo_usage(abo)) + return; + + guard(mutex)(&client->mm_lock); + + client->total_bo_usage += abo->mem.size; + if (abo->internal) + client->total_int_bo_usage += abo->mem.size; +} + +static void +amdxdna_gem_del_bo_usage(struct amdxdna_gem_obj *abo) +{ + struct amdxdna_client *client = abo->client; + + if (amdxdna_gem_skip_bo_usage(abo)) + return; + + guard(mutex)(&client->mm_lock); + + client->total_bo_usage -= abo->mem.size; + if (abo->internal) + client->total_int_bo_usage -= abo->mem.size; +} + static void amdxdna_gem_obj_free(struct drm_gem_object *gobj) { struct amdxdna_dev *xdna = to_xdna_dev(gobj->dev); struct amdxdna_gem_obj *abo = to_xdna_obj(gobj); - XDNA_DBG(xdna, "BO type %d xdna_addr 0x%llx", abo->type, amdxdna_gem_dev_addr(abo)); - amdxdna_hmm_unregister(abo, NULL); flush_workqueue(xdna->notifier_wq); @@ -543,9 +590,13 @@ static int amdxdna_gem_obj_open(struct drm_gem_object *gobj, struct drm_file *fi int ret; guard(mutex)(&abo->lock); + abo->open_ref++; - if (!abo->client) + if (abo->open_ref == 1) { + /* Attached to the client when first opened by it. */ abo->client = filp->driver_priv; + amdxdna_gem_add_bo_usage(abo); + } if (amdxdna_iova_on(xdna)) { ret = amdxdna_iommu_map_bo(xdna, abo); if (ret) @@ -555,6 +606,20 @@ static int amdxdna_gem_obj_open(struct drm_gem_object *gobj, struct drm_file *fi return 0; } +static void amdxdna_gem_obj_close(struct drm_gem_object *gobj, struct drm_file *filp) +{ + struct amdxdna_gem_obj *abo = to_xdna_obj(gobj); + + guard(mutex)(&abo->lock); + abo->open_ref--; + + if (abo->open_ref == 0) { + amdxdna_gem_del_bo_usage(abo); + /* Detach from the client when last closed by it. */ + abo->client = NULL; + } +} + static int amdxdna_gem_dev_obj_vmap(struct drm_gem_object *obj, struct iosys_map *map) { struct amdxdna_gem_obj *abo = to_xdna_obj(obj); @@ -575,6 +640,7 @@ static const struct drm_gem_object_funcs amdxdna_gem_dev_obj_funcs = { static const struct drm_gem_object_funcs amdxdna_gem_shmem_funcs = { .free = amdxdna_gem_obj_free, .open = amdxdna_gem_obj_open, + .close = amdxdna_gem_obj_close, .print_info = drm_gem_shmem_object_print_info, .pin = drm_gem_shmem_object_pin, .unpin = drm_gem_shmem_object_unpin, @@ -708,10 +774,13 @@ amdxdna_drm_create_share_bo(struct drm_device *dev, if (IS_ERR(abo)) return ERR_CAST(abo); - if (args->type == AMDXDNA_BO_DEV_HEAP) + if (args->type == AMDXDNA_BO_DEV_HEAP) { abo->type = AMDXDNA_BO_DEV_HEAP; - else + abo->internal = true; + } else { abo->type = AMDXDNA_BO_SHARE; + abo->internal = args->type == AMDXDNA_BO_CMD; + } return abo; } @@ -783,6 +852,11 @@ amdxdna_drm_create_dev_bo(struct drm_device *dev, gobj = to_gobj(abo); gobj->funcs = &amdxdna_gem_dev_obj_funcs; abo->type = AMDXDNA_BO_DEV; + abo->internal = true; + /* + * DEV BOs cannot be alive when client is gone, it's OK to + * always establish the connection. + */ abo->client = client; ret = amdxdna_gem_heap_alloc(abo); @@ -826,7 +900,7 @@ int amdxdna_drm_create_bo_ioctl(struct drm_device *dev, void *data, struct drm_f if (IS_ERR(abo)) return PTR_ERR(abo); - /* ready to publish object to userspace */ + /* Ready to publish object to userspace and count for BO usage. */ ret = drm_gem_handle_create(filp, to_gobj(abo), &args->handle); if (ret) { XDNA_ERR(xdna, "Create handle failed"); @@ -986,3 +1060,43 @@ put_obj: drm_gem_object_put(gobj); return ret; } + +int amdxdna_drm_get_bo_usage(struct drm_device *dev, struct amdxdna_drm_get_array *args) +{ + size_t min_sz = min(args->element_size, sizeof(struct amdxdna_drm_bo_usage)); + char __user *buf = u64_to_user_ptr(args->buffer); + struct amdxdna_dev *xdna = to_xdna_dev(dev); + struct amdxdna_client *tmp_client; + struct amdxdna_drm_bo_usage tmp; + + drm_WARN_ON(dev, !mutex_is_locked(&xdna->dev_lock)); + + if (args->num_element != 1) + return -EINVAL; + + if (copy_from_user(&tmp, buf, min_sz)) + return -EFAULT; + + if (!tmp.pid) + return -EINVAL; + + tmp.total_usage = 0; + tmp.internal_usage = 0; + tmp.heap_usage = 0; + + list_for_each_entry(tmp_client, &xdna->client_list, node) { + if (tmp.pid != tmp_client->pid) + continue; + + mutex_lock(&tmp_client->mm_lock); + tmp.total_usage += tmp_client->total_bo_usage; + tmp.internal_usage += tmp_client->total_int_bo_usage; + tmp.heap_usage += tmp_client->heap_usage; + mutex_unlock(&tmp_client->mm_lock); + } + + if (copy_to_user(buf, &tmp, min_sz)) + return -EFAULT; + + return 0; +} diff --git a/drivers/accel/amdxdna/amdxdna_gem.h b/drivers/accel/amdxdna/amdxdna_gem.h index a77d9344f8a46..4fc48a1189d27 100644 --- a/drivers/accel/amdxdna/amdxdna_gem.h +++ b/drivers/accel/amdxdna/amdxdna_gem.h @@ -41,8 +41,9 @@ struct amdxdna_gem_obj { struct amdxdna_client *client; u8 type; bool pinned; - struct mutex lock; /* Protects: pinned, mem.kva */ + struct mutex lock; /* Protects: pinned, mem.kva, open_ref */ struct amdxdna_mem mem; + int open_ref; /* Below members are initialized when needed */ struct drm_mm mm; /* For AMDXDNA_BO_DEV_HEAP */ @@ -50,6 +51,9 @@ struct amdxdna_gem_obj { u32 assigned_hwctx; struct dma_buf *dma_buf; struct dma_buf_attachment *attach; + + /* True, if BO is managed by XRT, not application */ + bool internal; }; #define to_gobj(obj) (&(obj)->base.base) @@ -98,5 +102,6 @@ void amdxdna_gem_unpin(struct amdxdna_gem_obj *abo); int amdxdna_drm_create_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); int amdxdna_drm_get_bo_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); int amdxdna_drm_sync_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); +int amdxdna_drm_get_bo_usage(struct drm_device *dev, struct amdxdna_drm_get_array *args); #endif /* _AMDXDNA_GEM_H_ */ diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.c b/drivers/accel/amdxdna/amdxdna_pci_drv.c index d83be00daf2bf..b50a7d1f8a118 100644 --- a/drivers/accel/amdxdna/amdxdna_pci_drv.c +++ b/drivers/accel/amdxdna/amdxdna_pci_drv.c @@ -36,9 +36,10 @@ MODULE_FIRMWARE("amdnpu/17f0_11/npu_7.sbin"); * 0.5: Support getting telemetry data * 0.6: Support preemption * 0.7: Support getting power and utilization data + * 0.8: Support BO usage query */ #define AMDXDNA_DRIVER_MAJOR 0 -#define AMDXDNA_DRIVER_MINOR 7 +#define AMDXDNA_DRIVER_MINOR 8 /* * Bind the driver base on (vendor_id, device_id) pair and later use the @@ -120,11 +121,12 @@ static void amdxdna_client_cleanup(struct amdxdna_client *client) amdxdna_hwctx_remove_all(client); xa_destroy(&client->hwctx_xa); cleanup_srcu_struct(&client->hwctx_srcu); - mutex_destroy(&client->mm_lock); if (client->dev_heap) drm_gem_object_put(to_gobj(client->dev_heap)); + mutex_destroy(&client->mm_lock); + if (!IS_ERR_OR_NULL(client->sva)) iommu_sva_unbind_device(client->sva); mmdrop(client->mm); diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.h b/drivers/accel/amdxdna/amdxdna_pci_drv.h index e91d14ae5190e..0661749917d68 100644 --- a/drivers/accel/amdxdna/amdxdna_pci_drv.h +++ b/drivers/accel/amdxdna/amdxdna_pci_drv.h @@ -138,6 +138,10 @@ struct amdxdna_client { struct iommu_sva *sva; int pasid; struct mm_struct *mm; + + size_t heap_usage; + size_t total_bo_usage; + size_t total_int_bo_usage; }; #define amdxdna_for_each_hwctx(client, hwctx_id, entry) \ diff --git a/include/uapi/drm/amdxdna_accel.h b/include/uapi/drm/amdxdna_accel.h index bddaaaf945cf7..61d3686fa3b1f 100644 --- a/include/uapi/drm/amdxdna_accel.h +++ b/include/uapi/drm/amdxdna_accel.h @@ -591,8 +591,37 @@ struct amdxdna_async_error { __u64 ex_err_code; }; +/** + * struct amdxdna_drm_bo_usage - all types of BO usage + * BOs managed by XRT/SHIM/driver is counted as internal. + * Others are counted as external which are managed by applications. + * + * Among all types of BOs: + * AMDXDNA_BO_DEV_HEAP - is counted for internal. + * AMDXDNA_BO_SHARE - is counted for external. + * AMDXDNA_BO_CMD - is counted for internal. + * AMDXDNA_BO_DEV - is counted by heap_usage only, not internal + * or external. It does not add to the total memory + * footprint since its mem comes from heap which is + * already counted as internal. + */ +struct amdxdna_drm_bo_usage { + /** @pid: The ID of the process to query from. */ + __s64 pid; + /** @total_usage: Total BO size used by process. */ + __u64 total_usage; + /** @internal_usage: Total internal BO size used by process. */ + __u64 internal_usage; + /** @heap_usage: Total device BO size used by process. */ + __u64 heap_usage; +}; + +/* + * Supported params in struct amdxdna_drm_get_array + */ #define DRM_AMDXDNA_HW_CONTEXT_ALL 0 #define DRM_AMDXDNA_HW_LAST_ASYNC_ERR 2 +#define DRM_AMDXDNA_BO_USAGE 6 /** * struct amdxdna_drm_get_array - Get information array. @@ -605,6 +634,12 @@ struct amdxdna_drm_get_array { * * %DRM_AMDXDNA_HW_CONTEXT_ALL: * Returns all created hardware contexts. + * + * %DRM_AMDXDNA_HW_LAST_ASYNC_ERR: + * Returns last async error. + * + * %DRM_AMDXDNA_BO_USAGE: + * Returns usage of heap/internal/external BOs. */ __u32 param; /**