From: Ming Lei Date: Tue, 31 Mar 2026 15:31:55 +0000 (+0800) Subject: ublk: eliminate permanent pages[] array from struct ublk_buf X-Git-Tag: v7.1-rc1~233^2~22 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=8a34e88769f617dc980edb5a0079e347bd1b9a89;p=thirdparty%2Fkernel%2Flinux.git ublk: eliminate permanent pages[] array from struct ublk_buf The pages[] array (kvmalloc'd, 8 bytes per page = 2MB for a 1GB buffer) was stored permanently in struct ublk_buf but only needed during pin_user_pages_fast() and maple tree construction. Since the maple tree already stores PFN ranges via ublk_buf_range, struct page pointers can be recovered via pfn_to_page() during unregistration. Make pages[] a temporary allocation in ublk_ctrl_reg_buf(), freed immediately after the maple tree is built. Rewrite __ublk_ctrl_unreg_buf() to iterate the maple tree for matching buf_index entries, recovering struct page pointers via pfn_to_page() and unpinning in batches of 32. Simplify ublk_buf_erase_ranges() to iterate the maple tree by buf_index instead of walking the now-removed pages[] array. Signed-off-by: Ming Lei Link: https://patch.msgid.link/20260331153207.3635125-5-ming.lei@redhat.com Signed-off-by: Jens Axboe --- diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index bdb1de41d526a..1af42850f5b10 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -296,7 +296,6 @@ struct ublk_queue { /* Per-registered shared memory buffer */ struct ublk_buf { - struct page **pages; unsigned int nr_pages; }; @@ -5261,27 +5260,25 @@ static void ublk_unquiesce_and_resume(struct gendisk *disk) blk_mq_unquiesce_queue(disk->queue); } -/* Erase coalesced PFN ranges from the maple tree for pages [0, nr_pages) */ -static void ublk_buf_erase_ranges(struct ublk_device *ub, - struct ublk_buf *ubuf, - unsigned long nr_pages) +/* Erase coalesced PFN ranges from the maple tree matching buf_index */ +static void ublk_buf_erase_ranges(struct ublk_device *ub, int buf_index) { - unsigned long i; - - for (i = 0; i < nr_pages; ) { - unsigned long pfn = page_to_pfn(ubuf->pages[i]); - unsigned long start = i; + MA_STATE(mas, &ub->buf_tree, 0, ULONG_MAX); + struct ublk_buf_range *range; - while (i + 1 < nr_pages && - page_to_pfn(ubuf->pages[i + 1]) == pfn + (i - start) + 1) - i++; - i++; - kfree(mtree_erase(&ub->buf_tree, pfn)); + mas_lock(&mas); + mas_for_each(&mas, range, ULONG_MAX) { + if (range->buf_index == buf_index) { + mas_erase(&mas); + kfree(range); + } } + mas_unlock(&mas); } static int __ublk_ctrl_reg_buf(struct ublk_device *ub, - struct ublk_buf *ubuf, int index, + struct ublk_buf *ubuf, + struct page **pages, int index, unsigned short flags) { unsigned long nr_pages = ubuf->nr_pages; @@ -5289,13 +5286,13 @@ static int __ublk_ctrl_reg_buf(struct ublk_device *ub, int ret; for (i = 0; i < nr_pages; ) { - unsigned long pfn = page_to_pfn(ubuf->pages[i]); + unsigned long pfn = page_to_pfn(pages[i]); unsigned long start = i; struct ublk_buf_range *range; /* Find run of consecutive PFNs */ while (i + 1 < nr_pages && - page_to_pfn(ubuf->pages[i + 1]) == pfn + (i - start) + 1) + page_to_pfn(pages[i + 1]) == pfn + (i - start) + 1) i++; i++; /* past the last page in this run */ @@ -5320,7 +5317,7 @@ static int __ublk_ctrl_reg_buf(struct ublk_device *ub, return 0; unwind: - ublk_buf_erase_ranges(ub, ubuf, i); + ublk_buf_erase_ranges(ub, index); return ret; } @@ -5335,6 +5332,7 @@ static int ublk_ctrl_reg_buf(struct ublk_device *ub, void __user *argp = (void __user *)(unsigned long)header->addr; struct ublk_shmem_buf_reg buf_reg; unsigned long addr, size, nr_pages; + struct page **pages = NULL; unsigned int gup_flags; struct gendisk *disk; struct ublk_buf *ubuf; @@ -5371,9 +5369,8 @@ static int ublk_ctrl_reg_buf(struct ublk_device *ub, goto put_disk; } - ubuf->pages = kvmalloc_array(nr_pages, sizeof(*ubuf->pages), - GFP_KERNEL); - if (!ubuf->pages) { + pages = kvmalloc_array(nr_pages, sizeof(*pages), GFP_KERNEL); + if (!pages) { ret = -ENOMEM; goto err_free; } @@ -5382,7 +5379,7 @@ static int ublk_ctrl_reg_buf(struct ublk_device *ub, if (!(buf_reg.flags & UBLK_SHMEM_BUF_READ_ONLY)) gup_flags |= FOLL_WRITE; - pinned = pin_user_pages_fast(addr, nr_pages, gup_flags, ubuf->pages); + pinned = pin_user_pages_fast(addr, nr_pages, gup_flags, pages); if (pinned < 0) { ret = pinned; goto err_free_pages; @@ -5406,7 +5403,7 @@ static int ublk_ctrl_reg_buf(struct ublk_device *ub, if (ret) goto err_unlock; - ret = __ublk_ctrl_reg_buf(ub, ubuf, index, buf_reg.flags); + ret = __ublk_ctrl_reg_buf(ub, ubuf, pages, index, buf_reg.flags); if (ret) { xa_erase(&ub->bufs_xa, index); goto err_unlock; @@ -5414,6 +5411,7 @@ static int ublk_ctrl_reg_buf(struct ublk_device *ub, mutex_unlock(&ub->mutex); + kvfree(pages); ublk_unquiesce_and_resume(disk); ublk_put_disk(disk); return index; @@ -5422,9 +5420,9 @@ err_unlock: mutex_unlock(&ub->mutex); ublk_unquiesce_and_resume(disk); err_unpin: - unpin_user_pages(ubuf->pages, pinned); + unpin_user_pages(pages, pinned); err_free_pages: - kvfree(ubuf->pages); + kvfree(pages); err_free: kfree(ubuf); put_disk: @@ -5433,11 +5431,36 @@ put_disk: } static void __ublk_ctrl_unreg_buf(struct ublk_device *ub, - struct ublk_buf *ubuf) + struct ublk_buf *ubuf, int buf_index) { - ublk_buf_erase_ranges(ub, ubuf, ubuf->nr_pages); - unpin_user_pages(ubuf->pages, ubuf->nr_pages); - kvfree(ubuf->pages); + MA_STATE(mas, &ub->buf_tree, 0, ULONG_MAX); + struct ublk_buf_range *range; + struct page *pages[32]; + + mas_lock(&mas); + mas_for_each(&mas, range, ULONG_MAX) { + unsigned long base, nr, off; + + if (range->buf_index != buf_index) + continue; + + base = range->base_pfn; + nr = mas.last - mas.index + 1; + mas_erase(&mas); + + for (off = 0; off < nr; ) { + unsigned int batch = min_t(unsigned long, + nr - off, 32); + unsigned int j; + + for (j = 0; j < batch; j++) + pages[j] = pfn_to_page(base + off + j); + unpin_user_pages(pages, batch); + off += batch; + } + kfree(range); + } + mas_unlock(&mas); kfree(ubuf); } @@ -5468,7 +5491,7 @@ static int ublk_ctrl_unreg_buf(struct ublk_device *ub, return -ENOENT; } - __ublk_ctrl_unreg_buf(ub, ubuf); + __ublk_ctrl_unreg_buf(ub, ubuf, index); mutex_unlock(&ub->mutex); @@ -5483,7 +5506,7 @@ static void ublk_buf_cleanup(struct ublk_device *ub) unsigned long index; xa_for_each(&ub->bufs_xa, index, ubuf) - __ublk_ctrl_unreg_buf(ub, ubuf); + __ublk_ctrl_unreg_buf(ub, ubuf, index); xa_destroy(&ub->bufs_xa); mtree_destroy(&ub->buf_tree); }