return xdr->len;
}
-/*
- * Cap contiguous RDMA Read sink allocations at order-4.
- * Higher orders risk allocation failure under
- * __GFP_NORETRY, which would negate the benefit of the
- * contiguous fast path.
- */
-#define SVC_RDMA_CONTIG_MAX_ORDER 4
-
-/**
- * svc_rdma_alloc_read_pages - Allocate physically contiguous pages
- * @nr_pages: number of pages needed
- * @order: on success, set to the allocation order
- *
- * Attempts a higher-order allocation, falling back to smaller orders.
- * The returned pages are split immediately so each sub-page has its
- * own refcount and can be freed independently.
- *
- * Returns a pointer to the first page on success, or NULL if even
- * order-1 allocation fails.
- */
-static struct page *
-svc_rdma_alloc_read_pages(unsigned int nr_pages, unsigned int *order)
-{
- unsigned int o;
- struct page *page;
-
- o = min(get_order(nr_pages << PAGE_SHIFT),
- SVC_RDMA_CONTIG_MAX_ORDER);
-
- while (o >= 1) {
- page = alloc_pages(GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN,
- o);
- if (page) {
- split_page(page, o);
- *order = o;
- return page;
- }
- o--;
- }
- return NULL;
-}
-
-/*
- * svc_rdma_fill_contig_bvec - Replace rq_pages with a contiguous allocation
- * @rqstp: RPC transaction context
- * @head: context for ongoing I/O
- * @bv: bvec entry to fill
- * @pages_left: number of data pages remaining in the segment
- * @len_left: bytes remaining in the segment
- *
- * On success, fills @bv with a bvec spanning the contiguous range and
- * advances rc_curpage/rc_page_count. Returns the byte length covered,
- * or zero if the allocation failed or would overrun rq_maxpages.
- */
-static unsigned int
-svc_rdma_fill_contig_bvec(struct svc_rqst *rqstp,
- struct svc_rdma_recv_ctxt *head,
- struct bio_vec *bv, unsigned int pages_left,
- unsigned int len_left)
-{
- unsigned int order, npages, chunk_pages, chunk_len, i;
- struct page *page;
-
- page = svc_rdma_alloc_read_pages(pages_left, &order);
- if (!page)
- return 0;
- npages = 1 << order;
-
- if (head->rc_curpage + npages > rqstp->rq_maxpages) {
- for (i = 0; i < npages; i++)
- __free_page(page + i);
- return 0;
- }
-
- /*
- * Replace rq_pages[] entries with pages from the contiguous
- * allocation. If npages exceeds chunk_pages, the extra pages
- * stay in rq_pages[] for later reuse or normal rqst teardown.
- */
- for (i = 0; i < npages; i++) {
- svc_rqst_page_release(rqstp,
- rqstp->rq_pages[head->rc_curpage + i]);
- rqstp->rq_pages[head->rc_curpage + i] = page + i;
- }
-
- chunk_pages = min(npages, pages_left);
- chunk_len = min_t(unsigned int, chunk_pages << PAGE_SHIFT, len_left);
- bvec_set_page(bv, page, chunk_len, 0);
- head->rc_page_count += chunk_pages;
- head->rc_curpage += chunk_pages;
- return chunk_len;
-}
-
-/*
- * svc_rdma_fill_page_bvec - Add a single rq_page to the bvec array
- * @head: context for ongoing I/O
- * @ctxt: R/W context whose bvec array is being filled
- * @cur: page to add
- * @bvec_idx: pointer to current bvec index, not advanced on merge
- * @len_left: bytes remaining in the segment
- *
- * If @cur is physically contiguous with the preceding bvec, it is
- * merged by extending that bvec's length. Otherwise a new bvec
- * entry is created. Returns the byte length covered.
- */
-static unsigned int
-svc_rdma_fill_page_bvec(struct svc_rdma_recv_ctxt *head,
- struct svc_rdma_rw_ctxt *ctxt, struct page *cur,
- unsigned int *bvec_idx, unsigned int len_left)
-{
- unsigned int chunk_len = min_t(unsigned int, PAGE_SIZE, len_left);
-
- head->rc_page_count++;
- head->rc_curpage++;
-
- if (*bvec_idx > 0) {
- struct bio_vec *prev = &ctxt->rw_bvec[*bvec_idx - 1];
-
- if (page_to_phys(prev->bv_page) + prev->bv_offset +
- prev->bv_len == page_to_phys(cur)) {
- prev->bv_len += chunk_len;
- return chunk_len;
- }
- }
-
- bvec_set_page(&ctxt->rw_bvec[*bvec_idx], cur, chunk_len, 0);
- (*bvec_idx)++;
- return chunk_len;
-}
-
-/**
- * svc_rdma_build_read_segment_contig - Build RDMA Read WR with contiguous pages
- * @rqstp: RPC transaction context
- * @head: context for ongoing I/O
- * @segment: co-ordinates of remote memory to be read
- *
- * Greedily allocates higher-order pages to cover the segment,
- * building one bvec per contiguous chunk. Each allocation is
- * split so sub-pages have independent refcounts. When a
- * higher-order allocation fails, remaining pages are covered
- * individually, merging adjacent pages into the preceding bvec
- * when they are physically contiguous. The split sub-pages
- * replace entries in rq_pages[] so downstream cleanup is
- * unchanged.
- *
- * Returns:
- * %0: the Read WR was constructed successfully
- * %-ENOMEM: allocation failed
- * %-EIO: a DMA mapping error occurred
- */
-static int svc_rdma_build_read_segment_contig(struct svc_rqst *rqstp,
- struct svc_rdma_recv_ctxt *head,
- const struct svc_rdma_segment *segment)
-{
- struct svcxprt_rdma *rdma = svc_rdma_rqst_rdma(rqstp);
- struct svc_rdma_chunk_ctxt *cc = &head->rc_cc;
- unsigned int nr_data_pages, bvec_idx;
- struct svc_rdma_rw_ctxt *ctxt;
- unsigned int len_left;
- int ret;
-
- nr_data_pages = PAGE_ALIGN(segment->rs_length) >> PAGE_SHIFT;
- if (head->rc_curpage + nr_data_pages > rqstp->rq_maxpages)
- return -ENOMEM;
-
- ctxt = svc_rdma_get_rw_ctxt(rdma, nr_data_pages);
- if (!ctxt)
- return -ENOMEM;
-
- bvec_idx = 0;
- len_left = segment->rs_length;
- while (len_left) {
- unsigned int pages_left = PAGE_ALIGN(len_left) >> PAGE_SHIFT;
- unsigned int chunk_len = 0;
-
- if (pages_left >= 2)
- chunk_len = svc_rdma_fill_contig_bvec(rqstp, head,
- &ctxt->rw_bvec[bvec_idx],
- pages_left, len_left);
- if (chunk_len) {
- bvec_idx++;
- } else {
- struct page *cur =
- rqstp->rq_pages[head->rc_curpage];
- chunk_len = svc_rdma_fill_page_bvec(head, ctxt, cur,
- &bvec_idx,
- len_left);
- }
-
- len_left -= chunk_len;
- }
-
- ctxt->rw_nents = bvec_idx;
-
- head->rc_pageoff = offset_in_page(segment->rs_length);
- if (head->rc_pageoff)
- head->rc_curpage--;
-
- ret = svc_rdma_rw_ctx_init(rdma, ctxt, segment->rs_offset,
- segment->rs_handle, segment->rs_length,
- DMA_FROM_DEVICE);
- if (ret < 0)
- return -EIO;
- percpu_counter_inc(&svcrdma_stat_read);
-
- list_add(&ctxt->rw_list, &cc->cc_rwctxts);
- cc->cc_sqecount += ret;
- return 0;
-}
-
/**
* svc_rdma_build_read_segment - Build RDMA Read WQEs to pull one RDMA segment
* @rqstp: RPC transaction context
if (check_add_overflow(head->rc_pageoff, len, &total))
return -EINVAL;
nr_bvec = PAGE_ALIGN(total) >> PAGE_SHIFT;
-
- if (head->rc_pageoff == 0 && nr_bvec >= 2) {
- ret = svc_rdma_build_read_segment_contig(rqstp, head,
- segment);
- if (ret != -ENOMEM)
- return ret;
- }
-
ctxt = svc_rdma_get_rw_ctxt(rdma, nr_bvec);
if (!ctxt)
return -ENOMEM;
{
unsigned int i;
- /*
- * Move only pages containing RPC data into rc_pages[]. Pages
- * from a contiguous allocation that were not used for the
- * payload remain in rq_pages[] for subsequent reuse.
- */
for (i = 0; i < head->rc_page_count; i++) {
head->rc_pages[i] = rqstp->rq_pages[i];
rqstp->rq_pages[i] = NULL;