From: Jiri Pirko Date: Fri, 29 May 2026 13:42:59 +0000 (+0200) Subject: RDMA/core: Introduce generic buffer descriptor infrastructure for umem X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=3cfdff484d84f04716ae56063a8dac3773bd3f29;p=thirdparty%2Fkernel%2Fstable.git RDMA/core: Introduce generic buffer descriptor infrastructure for umem Introduce a per-attribute UVERBS_ATTR_UMEM model so each uverbs command's umem set is explicit in its UAPI definition. Add driver-facing wrapper helpers that pin a umem on demand from an attribute or a VA addr; the driver owns the returned umem and releases it from its destroy/error paths. Link: https://patch.msgid.link/r/20260529134312.2836341-4-jiri@resnulli.us Signed-off-by: Jiri Pirko Signed-off-by: Jason Gunthorpe --- diff --git a/drivers/infiniband/core/ib_core_uverbs.c b/drivers/infiniband/core/ib_core_uverbs.c index b4fc693a3bd8..6e063e05f796 100644 --- a/drivers/infiniband/core/ib_core_uverbs.c +++ b/drivers/infiniband/core/ib_core_uverbs.c @@ -708,6 +708,31 @@ int uverbs_get_flags32(u32 *to, const struct uverbs_attr_bundle *attrs_bundle, } EXPORT_SYMBOL(uverbs_get_flags32); +/** + * uverbs_get_buffer_desc - Read a buffer descriptor from a uverbs attr. + * @attrs_bundle: uverbs attribute bundle. + * @attr_id: id of an UVERBS_ATTR_UMEM-typed attribute. + * @desc: descriptor to fill. + * + * Return: 0 on success, -ENOENT if @attr_id is not set, -EINVAL on a + * malformed descriptor, or any other negative errno propagated from + * uverbs_copy_from() (notably -EFAULT on copy_from_user() failure). + */ +int uverbs_get_buffer_desc(const struct uverbs_attr_bundle *attrs_bundle, + u16 attr_id, struct ib_uverbs_buffer_desc *desc) +{ + int ret; + + ret = uverbs_copy_from(desc, attrs_bundle, attr_id); + if (ret) + return ret; + if (desc->flags & ~IB_UVERBS_BUFFER_DESC_FLAGS_KNOWN_MASK) + return -EINVAL; + desc->optional_flags &= IB_UVERBS_BUFFER_DESC_OPTIONAL_FLAGS_KNOWN_MASK; + return 0; +} +EXPORT_SYMBOL(uverbs_get_buffer_desc); + /* Once called an abort will call through to the type's destroy_hw() */ void uverbs_finalize_uobj_create(const struct uverbs_attr_bundle *bundle, u16 idx) diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index 0056f23af57b..7d2256583bc7 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c @@ -269,6 +269,175 @@ umem_kfree: return ret ? ERR_PTR(ret) : umem; } +/** + * ib_umem_get_desc - Pin a umem from a buffer descriptor. + * @device: IB device. + * @desc: buffer descriptor (VA or DMABUF). + * @access: IB access flags. + * + * Return: caller-owned umem on success, ERR_PTR(...) on error. + */ +struct ib_umem *ib_umem_get_desc(struct ib_device *device, + const struct ib_uverbs_buffer_desc *desc, + int access) +{ + struct ib_umem_dmabuf *umem_dmabuf; + + if (desc->flags & ~IB_UVERBS_BUFFER_DESC_FLAGS_KNOWN_MASK) + return ERR_PTR(-EINVAL); + + if (overflows_type(desc->addr, unsigned long) || + overflows_type(desc->length, size_t)) + return ERR_PTR(-EOVERFLOW); + + switch (desc->type) { + case IB_UVERBS_BUFFER_TYPE_DMABUF: + umem_dmabuf = ib_umem_dmabuf_get_pinned(device, desc->addr, + desc->length, desc->fd, + access); + if (IS_ERR(umem_dmabuf)) + return ERR_CAST(umem_dmabuf); + return &umem_dmabuf->umem; + case IB_UVERBS_BUFFER_TYPE_VA: + return __ib_umem_get_va(device, desc->addr, desc->length, + access); + default: + return ERR_PTR(-EINVAL); + } +} +EXPORT_SYMBOL(ib_umem_get_desc); + +/* + * Per-command legacy buffer-desc filler. + * Returns 0 on success (desc filled), -ENODATA if no legacy attrs apply, + * negative errno on validation failure. + */ +typedef int (*ib_umem_buf_desc_filler_t)(const struct uverbs_attr_bundle *attrs, + struct ib_uverbs_buffer_desc *desc); + +/* + * ib_umem_resolve_desc - Resolve a buffer descriptor from a per-command UMEM + * attribute and/or a legacy attr filler. + * + * Return: + * 0 @desc filled. + * -ENOENT no source produced a buffer. + * -EINVAL both the UMEM attribute and the legacy filler produced a buffer. + * -errno propagated from attr read / filler validation. + */ +static int ib_umem_resolve_desc(const struct uverbs_attr_bundle *attrs, + u16 attr_id, + ib_umem_buf_desc_filler_t legacy_filler, + struct ib_uverbs_buffer_desc *desc) +{ + bool have_desc = false; + int ret; + + if (!attrs) + return -ENOENT; + + ret = uverbs_get_buffer_desc(attrs, attr_id, desc); + if (!ret) + have_desc = true; + else if (ret != -ENOENT) + return ret; + + if (legacy_filler) { + struct ib_uverbs_buffer_desc legacy_desc = {}; + + ret = legacy_filler(attrs, &legacy_desc); + if (!ret) { + if (have_desc) + return -EINVAL; + *desc = legacy_desc; + have_desc = true; + } else if (ret != -ENODATA) { + return ret; + } + } + + return have_desc ? 0 : -ENOENT; +} + +/* + * ib_umem_get_desc_check - Pin a umem from @desc and verify it meets + * @min_size. + */ +static struct ib_umem * +ib_umem_get_desc_check(struct ib_device *device, + const struct ib_uverbs_buffer_desc *desc, + size_t min_size, int access) +{ + struct ib_umem *umem; + + umem = ib_umem_get_desc(device, desc, access); + if (IS_ERR(umem)) + return umem; + if (umem->length < min_size) { + ib_umem_release(umem); + return ERR_PTR(-EINVAL); + } + return umem; +} + +/* + * ib_umem_get_from_attrs - Pin a umem from a per-command UMEM attribute + * and/or a legacy attr filler. + * + * Return: caller-owned umem on success; NULL when no source supplied a + * buffer; ERR_PTR(...) on error. + */ +static struct ib_umem * +ib_umem_get_from_attrs(struct ib_device *device, + const struct uverbs_attr_bundle *attrs, + u16 attr_id, ib_umem_buf_desc_filler_t legacy_filler, + size_t size, int access) +{ + struct ib_uverbs_buffer_desc desc = {}; + int ret; + + ret = ib_umem_resolve_desc(attrs, attr_id, legacy_filler, &desc); + if (ret == -ENOENT) + return NULL; + if (ret) + return ERR_PTR(ret); + return ib_umem_get_desc_check(device, &desc, size, access); +} + +/* + * ib_umem_get_from_attrs_or_va - Pin a umem from a per-command UMEM + * attribute and/or a legacy attr filler, + * falling back to a UHW VA when no source + * matched. + * + * @size is always consumed: it is the length to pin on the VA fallback + * path AND the post-pin minimum-length check on the attr / legacy paths. + * Callers must always pass a meaningful, validated value. + * + * Return: caller-owned umem on success, ERR_PTR(...) on error. + */ +static struct ib_umem * +ib_umem_get_from_attrs_or_va(struct ib_device *device, + const struct uverbs_attr_bundle *attrs, + u16 attr_id, + ib_umem_buf_desc_filler_t legacy_filler, + u64 addr, size_t size, int access) +{ + struct ib_uverbs_buffer_desc desc = {}; + int ret; + + ret = ib_umem_resolve_desc(attrs, attr_id, legacy_filler, &desc); + if (ret == -ENOENT) + desc = (struct ib_uverbs_buffer_desc){ + .type = IB_UVERBS_BUFFER_TYPE_VA, + .addr = addr, + .length = size, + }; + else if (ret) + return ERR_PTR(ret); + return ib_umem_get_desc_check(device, &desc, size, access); +} + /** * ib_umem_get_va - Pin and DMA map userspace memory. * @@ -284,6 +453,65 @@ struct ib_umem *ib_umem_get_va(struct ib_device *device, unsigned long addr, } EXPORT_SYMBOL(ib_umem_get_va); +/** + * ib_umem_get_attr - Pin a umem from a per-command UMEM attribute. + * @device: IB device. + * @attrs: uverbs attribute bundle (may be NULL). + * @attr_id: per-command UMEM attribute id. + * @size: minimum required umem length. + * @access: IB access flags. + * + * Return: caller-owned umem on success; NULL when no source supplied + * a buffer; ERR_PTR(...) on error. + */ +struct ib_umem *ib_umem_get_attr(struct ib_device *device, + const struct uverbs_attr_bundle *attrs, + u16 attr_id, size_t size, int access) +{ + return ib_umem_get_from_attrs(device, attrs, attr_id, NULL, size, + access); +} +EXPORT_SYMBOL(ib_umem_get_attr); + +/** + * ib_umem_get_attr_or_va - Pin a umem from a per-command UMEM attribute, + * falling back to a UHW VA. + * @device: IB device. + * @attrs: uverbs attribute bundle (may be NULL). + * @attr_id: per-command UMEM attribute id. + * @addr: UHW user VA used when no per-command attribute matched. + * @size: on the attr / legacy paths, the minimum required umem length + * validated post-pin; on the VA fallback path, the length to pin. + * @access: IB access flags. + * + * Like ib_umem_get_attr(), but pins @addr/@size when no per-command + * UMEM attribute is supplied. + * + * IMPORTANT: @size is always consumed. On the attr / legacy paths it is + * used as the post-pin minimum-length check; on the VA fallback path it + * is the length to pin. Callers MUST pass a meaningful, validated value + * even when they expect an attribute-supplied buffer to be used. + * + * Every in-tree caller passes the same value for the two roles of @size + * because no driver today distinguishes a user-passed buffer length from + * a driver-computed minimum. Drivers that currently accept a user-supplied + * length without cross-checking it against a driver minimum (vmw_pvrdma + * CQ/QP/SRQ, qedr CQ/QP/SRQ, mana WQ/QP, ionic CQ/QP), once tightened to + * compute and check a real minimum, will want to introduce a separate + * helper that passes these as distinct values. + * + * Return: caller-owned umem on success, ERR_PTR(...) on error. + */ +struct ib_umem *ib_umem_get_attr_or_va(struct ib_device *device, + const struct uverbs_attr_bundle *attrs, + u16 attr_id, u64 addr, size_t size, + int access) +{ + return ib_umem_get_from_attrs_or_va(device, attrs, attr_id, NULL, addr, + size, access); +} +EXPORT_SYMBOL(ib_umem_get_attr_or_va); + /** * ib_umem_release - release pinned memory * @umem: umem struct to release diff --git a/include/rdma/ib_umem.h b/include/rdma/ib_umem.h index 25e90766892e..0f373679ea81 100644 --- a/include/rdma/ib_umem.h +++ b/include/rdma/ib_umem.h @@ -73,10 +73,26 @@ static inline size_t ib_umem_num_pages(struct ib_umem *umem) { return ib_umem_num_dma_blocks(umem, PAGE_SIZE); } + +struct ib_udata; +struct ib_uverbs_buffer_desc; +struct uverbs_attr_bundle; + #ifdef CONFIG_INFINIBAND_USER_MEM +struct ib_umem *ib_umem_get_desc(struct ib_device *device, + const struct ib_uverbs_buffer_desc *desc, + int access); struct ib_umem *ib_umem_get_va(struct ib_device *device, unsigned long addr, size_t size, int access); +struct ib_umem *ib_umem_get_attr(struct ib_device *device, + const struct uverbs_attr_bundle *attrs, + u16 attr_id, size_t size, int access); +struct ib_umem *ib_umem_get_attr_or_va(struct ib_device *device, + const struct uverbs_attr_bundle *attrs, + u16 attr_id, u64 addr, size_t size, + int access); + void ib_umem_release(struct ib_umem *umem); int ib_umem_copy_from(void *dst, struct ib_umem *umem, size_t offset, size_t length); @@ -160,12 +176,32 @@ void ib_umem_dmabuf_revoke(struct ib_umem_dmabuf *umem_dmabuf); #include +static inline struct ib_umem * +ib_umem_get_desc(struct ib_device *device, + const struct ib_uverbs_buffer_desc *desc, int access) +{ + return ERR_PTR(-EOPNOTSUPP); +} static inline struct ib_umem *ib_umem_get_va(struct ib_device *device, unsigned long addr, size_t size, int access) { return ERR_PTR(-EOPNOTSUPP); } +static inline struct ib_umem * +ib_umem_get_attr(struct ib_device *device, + const struct uverbs_attr_bundle *attrs, u16 attr_id, + size_t size, int access) +{ + return ERR_PTR(-EOPNOTSUPP); +} +static inline struct ib_umem * +ib_umem_get_attr_or_va(struct ib_device *device, + const struct uverbs_attr_bundle *attrs, u16 attr_id, + u64 addr, size_t size, int access) +{ + return ERR_PTR(-EOPNOTSUPP); +} static inline void ib_umem_release(struct ib_umem *umem) { } static inline int ib_umem_copy_from(void *dst, struct ib_umem *umem, size_t offset, size_t length) { diff --git a/include/rdma/uverbs_ioctl.h b/include/rdma/uverbs_ioctl.h index 9d7575d999e1..24fd36213023 100644 --- a/include/rdma/uverbs_ioctl.h +++ b/include/rdma/uverbs_ioctl.h @@ -590,6 +590,28 @@ struct uapi_definition { UA_OPTIONAL, \ .is_udata = 1) +/* + * Per-attribute UMEM descriptor. The payload is a single + * struct ib_uverbs_buffer_desc identifying a memory region backed by + * dma-buf or user virtual address. _access selects UA_OPTIONAL or + * UA_MANDATORY. Drivers obtain a umem from the attribute via the + * ib_umem_get_*() wrapper helpers. + */ +#define UVERBS_ATTR_UMEM(_attr_id, _access) \ + UVERBS_ATTR_PTR_IN(_attr_id, \ + UVERBS_ATTR_TYPE(struct ib_uverbs_buffer_desc), \ + _access) + +/* + * Bit masks of the @flags / @optional_flags fields of struct + * ib_uverbs_buffer_desc that the kernel understands. @flags is strict: + * any bit outside the known mask makes the call fail with -EINVAL. + * @optional_flags is advisory: bits outside the known mask are silently + * dropped. Both masks are extended as new bits are introduced. + */ +#define IB_UVERBS_BUFFER_DESC_FLAGS_KNOWN_MASK 0U +#define IB_UVERBS_BUFFER_DESC_OPTIONAL_FLAGS_KNOWN_MASK 0U + /* ================================================= * Parsing infrastructure * ================================================= @@ -862,6 +884,8 @@ int uverbs_get_flags32(u32 *to, const struct uverbs_attr_bundle *attrs_bundle, size_t idx, u64 allowed_bits); int uverbs_copy_to(const struct uverbs_attr_bundle *attrs_bundle, size_t idx, const void *from, size_t size); +int uverbs_get_buffer_desc(const struct uverbs_attr_bundle *attrs_bundle, + u16 attr_id, struct ib_uverbs_buffer_desc *desc); __malloc void *_uverbs_alloc(struct uverbs_attr_bundle *bundle, size_t size, gfp_t flags); @@ -920,6 +944,12 @@ static inline int uverbs_copy_to(const struct uverbs_attr_bundle *attrs_bundle, { return -EINVAL; } +static inline int +uverbs_get_buffer_desc(const struct uverbs_attr_bundle *attrs_bundle, + u16 attr_id, struct ib_uverbs_buffer_desc *desc) +{ + return -EINVAL; +} static inline __malloc void *uverbs_alloc(struct uverbs_attr_bundle *bundle, size_t size) { diff --git a/include/uapi/rdma/ib_user_ioctl_verbs.h b/include/uapi/rdma/ib_user_ioctl_verbs.h index 90c5cd8e7753..51030c27d479 100644 --- a/include/uapi/rdma/ib_user_ioctl_verbs.h +++ b/include/uapi/rdma/ib_user_ioctl_verbs.h @@ -273,4 +273,31 @@ struct ib_uverbs_gid_entry { __u32 netdev_ifindex; /* It is 0 if there is no netdev associated with it */ }; +enum ib_uverbs_buffer_type { + IB_UVERBS_BUFFER_TYPE_DMABUF, + IB_UVERBS_BUFFER_TYPE_VA, +}; + +/* + * Describes a single buffer backed by dma-buf or user virtual address. + * Used as the payload of a per-attribute UVERBS_ATTR_UMEM-typed attribute. + * + * @type: buffer type from enum ib_uverbs_buffer_type + * @fd: dma-buf file descriptor (valid for IB_UVERBS_BUFFER_TYPE_DMABUF) + * @flags: required flags; the kernel rejects the call with -EINVAL if any + * bit is not understood. No bits are defined yet. + * @optional_flags: advisory flags; bits the kernel does not understand are + * silently ignored. No bits are defined yet. + * @addr: offset within dma-buf, or user virtual address for VA + * @length: buffer length in bytes + */ +struct ib_uverbs_buffer_desc { + __u32 type; + __s32 fd; + __u32 flags; + __u32 optional_flags; + __aligned_u64 addr; + __aligned_u64 length; +}; + #endif