From: Daisuke Matsuda Date: Thu, 22 May 2025 11:19:54 +0000 (+0000) Subject: RDMA/rxe: Implement synchronous prefetch for ODP MRs X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=3576b0df1588a0fd0249c29975d9dc92ffd6f3c0;p=thirdparty%2Flinux.git RDMA/rxe: Implement synchronous prefetch for ODP MRs Minimal implementation of ibv_advise_mr(3) requires synchronous calls being successful with the IBV_ADVISE_MR_FLAG_FLUSH flag. Asynchronous requests, which are best-effort, will be supported subsequently. Signed-off-by: Daisuke Matsuda Link: https://patch.msgid.link/20250522111955.3227-2-dskmtsd@gmail.com Reviewed-by: Zhu Yanjun Signed-off-by: Leon Romanovsky --- diff --git a/drivers/infiniband/sw/rxe/rxe.c b/drivers/infiniband/sw/rxe/rxe.c index 3a77d6db17202..e891199cbdefa 100644 --- a/drivers/infiniband/sw/rxe/rxe.c +++ b/drivers/infiniband/sw/rxe/rxe.c @@ -34,6 +34,10 @@ void rxe_dealloc(struct ib_device *ib_dev) mutex_destroy(&rxe->usdev_lock); } +static const struct ib_device_ops rxe_ib_dev_odp_ops = { + .advise_mr = rxe_ib_advise_mr, +}; + /* initialize rxe device parameters */ static void rxe_init_device_param(struct rxe_dev *rxe, struct net_device *ndev) { @@ -103,6 +107,9 @@ static void rxe_init_device_param(struct rxe_dev *rxe, struct net_device *ndev) rxe->attr.odp_caps.per_transport_caps.rc_odp_caps |= IB_ODP_SUPPORT_SRQ_RECV; rxe->attr.odp_caps.per_transport_caps.rc_odp_caps |= IB_ODP_SUPPORT_FLUSH; rxe->attr.odp_caps.per_transport_caps.rc_odp_caps |= IB_ODP_SUPPORT_ATOMIC_WRITE; + + /* set handler for ODP prefetching API - ibv_advise_mr(3) */ + ib_set_device_ops(&rxe->ib_dev, &rxe_ib_dev_odp_ops); } } diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h index 876702058c842..7992290886e12 100644 --- a/drivers/infiniband/sw/rxe/rxe_loc.h +++ b/drivers/infiniband/sw/rxe/rxe_loc.h @@ -203,6 +203,9 @@ enum resp_states rxe_odp_atomic_op(struct rxe_mr *mr, u64 iova, int opcode, int rxe_odp_flush_pmem_iova(struct rxe_mr *mr, u64 iova, unsigned int length); enum resp_states rxe_odp_do_atomic_write(struct rxe_mr *mr, u64 iova, u64 value); +int rxe_ib_advise_mr(struct ib_pd *pd, enum ib_uverbs_advise_mr_advice advice, + u32 flags, struct ib_sge *sg_list, u32 num_sge, + struct uverbs_attr_bundle *attrs); #else /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */ static inline int rxe_odp_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova, @@ -231,6 +234,15 @@ static inline enum resp_states rxe_odp_do_atomic_write(struct rxe_mr *mr, { return RESPST_ERR_UNSUPPORTED_OPCODE; } +static inline int rxe_ib_advise_mr(struct ib_pd *pd, + enum ib_uverbs_advise_mr_advice advice, + u32 flags, struct ib_sge *sg_list, + u32 num_sge, + struct uverbs_attr_bundle *attrs) +{ + return -EOPNOTSUPP; +} + #endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */ #endif /* RXE_LOC_H */ diff --git a/drivers/infiniband/sw/rxe/rxe_odp.c b/drivers/infiniband/sw/rxe/rxe_odp.c index dbc5a5600eb78..c0413181acc2f 100644 --- a/drivers/infiniband/sw/rxe/rxe_odp.c +++ b/drivers/infiniband/sw/rxe/rxe_odp.c @@ -418,3 +418,87 @@ enum resp_states rxe_odp_do_atomic_write(struct rxe_mr *mr, u64 iova, u64 value) return RESPST_NONE; } + +static int rxe_ib_prefetch_sg_list(struct ib_pd *ibpd, + enum ib_uverbs_advise_mr_advice advice, + u32 pf_flags, struct ib_sge *sg_list, + u32 num_sge) +{ + struct rxe_pd *pd = container_of(ibpd, struct rxe_pd, ibpd); + int ret = 0; + u32 i; + + for (i = 0; i < num_sge; ++i) { + struct rxe_mr *mr; + struct ib_umem_odp *umem_odp; + + mr = lookup_mr(pd, IB_ACCESS_LOCAL_WRITE, + sg_list[i].lkey, RXE_LOOKUP_LOCAL); + + if (IS_ERR(mr)) { + rxe_dbg_pd(pd, "mr with lkey %x not found\n", + sg_list[i].lkey); + return PTR_ERR(mr); + } + + if (advice == IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH_WRITE && + !mr->umem->writable) { + rxe_dbg_mr(mr, "missing write permission\n"); + rxe_put(mr); + return -EPERM; + } + + ret = rxe_odp_do_pagefault_and_lock( + mr, sg_list[i].addr, sg_list[i].length, pf_flags); + if (ret < 0) { + rxe_dbg_mr(mr, "failed to prefetch the mr\n"); + rxe_put(mr); + return ret; + } + + umem_odp = to_ib_umem_odp(mr->umem); + mutex_unlock(&umem_odp->umem_mutex); + + rxe_put(mr); + } + + return 0; +} + +static int rxe_ib_advise_mr_prefetch(struct ib_pd *ibpd, + enum ib_uverbs_advise_mr_advice advice, + u32 flags, struct ib_sge *sg_list, + u32 num_sge) +{ + u32 pf_flags = RXE_PAGEFAULT_DEFAULT; + + if (advice == IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH) + pf_flags |= RXE_PAGEFAULT_RDONLY; + + if (advice == IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH_NO_FAULT) + pf_flags |= RXE_PAGEFAULT_SNAPSHOT; + + /* Synchronous call */ + if (flags & IB_UVERBS_ADVISE_MR_FLAG_FLUSH) + return rxe_ib_prefetch_sg_list(ibpd, advice, pf_flags, sg_list, + num_sge); + + /* Asynchronous call is to be added in the next patch */ + return -EOPNOTSUPP; +} + +int rxe_ib_advise_mr(struct ib_pd *ibpd, + enum ib_uverbs_advise_mr_advice advice, + u32 flags, + struct ib_sge *sg_list, + u32 num_sge, + struct uverbs_attr_bundle *attrs) +{ + if (advice != IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH && + advice != IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH_WRITE && + advice != IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH_NO_FAULT) + return -EOPNOTSUPP; + + return rxe_ib_advise_mr_prefetch(ibpd, advice, flags, + sg_list, num_sge); +}