--- /dev/null
+From 8c273186074a591cfdcd4370849676bc3eeb6ecb Mon Sep 17 00:00:00 2001
+From: Jens Axboe <axboe@kernel.dk>
+Date: Fri, 5 Nov 2021 17:15:46 -0600
+Subject: io_uring: add ring freeing helper
+
+From: Jens Axboe <axboe@kernel.dk>
+
+Commit 9c189eee73af1825ea9c895fafad469de5f82641 upstream.
+
+We do rings and sqes separately, move them into a helper that does both
+the freeing and clearing of the memory.
+
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ io_uring/io_uring.c | 17 +++++++++++------
+ 1 file changed, 11 insertions(+), 6 deletions(-)
+
+diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
+index ebcb0680f1cc..b211feb0d2b1 100644
+--- a/io_uring/io_uring.c
++++ b/io_uring/io_uring.c
+@@ -2525,6 +2525,14 @@ static void io_mem_free(void *ptr)
+ free_compound_page(page);
+ }
+
++static void io_rings_free(struct io_ring_ctx *ctx)
++{
++ io_mem_free(ctx->rings);
++ io_mem_free(ctx->sq_sqes);
++ ctx->rings = NULL;
++ ctx->sq_sqes = NULL;
++}
++
+ static void *io_mem_alloc(size_t size)
+ {
+ gfp_t gfp = GFP_KERNEL_ACCOUNT | __GFP_ZERO | __GFP_NOWARN | __GFP_COMP;
+@@ -2684,8 +2692,7 @@ static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx)
+ mmdrop(ctx->mm_account);
+ ctx->mm_account = NULL;
+ }
+- io_mem_free(ctx->rings);
+- io_mem_free(ctx->sq_sqes);
++ io_rings_free(ctx);
+
+ percpu_ref_exit(&ctx->refs);
+ free_uid(ctx->user);
+@@ -3452,15 +3459,13 @@ static __cold int io_allocate_scq_urings(struct io_ring_ctx *ctx,
+ else
+ size = array_size(sizeof(struct io_uring_sqe), p->sq_entries);
+ if (size == SIZE_MAX) {
+- io_mem_free(ctx->rings);
+- ctx->rings = NULL;
++ io_rings_free(ctx);
+ return -EOVERFLOW;
+ }
+
+ ptr = io_mem_alloc(size);
+ if (IS_ERR(ptr)) {
+- io_mem_free(ctx->rings);
+- ctx->rings = NULL;
++ io_rings_free(ctx);
+ return PTR_ERR(ptr);
+ }
+
+--
+2.47.2
+
--- /dev/null
+From 71318baa99b6fbb65edf76dd0afaad3afd7007cc Mon Sep 17 00:00:00 2001
+From: Jens Axboe <axboe@kernel.dk>
+Date: Wed, 29 May 2024 09:38:38 -0600
+Subject: io_uring: don't attempt to mmap larger than what the user asks for
+
+From: Jens Axboe <axboe@kernel.dk>
+
+Commit 06fe9b1df1086b42718d632aa57e8f7cd1a66a21 upstream.
+
+If IORING_FEAT_SINGLE_MMAP is ignored, as can happen if an application
+uses an ancient liburing or does setup manually, then 3 mmap's are
+required to map the ring into userspace. The kernel will still have
+collapsed the mappings, however userspace may ask for mapping them
+individually. If so, then we should not use the full number of ring
+pages, as it may exceed the partial mapping. Doing so will yield an
+-EFAULT from vm_insert_pages(), as we pass in more pages than what the
+application asked for.
+
+Cap the number of pages to match what the application asked for, for
+the particular mapping operation.
+
+Reported-by: Lucas Mülling <lmulling@proton.me>
+Link: https://github.com/axboe/liburing/issues/1157
+Fixes: 3ab1db3c6039 ("io_uring: get rid of remap_pfn_range() for mapping rings/sqes")
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ io_uring/io_uring.c | 5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+--- a/io_uring/io_uring.c
++++ b/io_uring/io_uring.c
+@@ -3612,6 +3612,7 @@ static __cold int io_uring_mmap(struct f
+ struct io_ring_ctx *ctx = file->private_data;
+ size_t sz = vma->vm_end - vma->vm_start;
+ long offset = vma->vm_pgoff << PAGE_SHIFT;
++ unsigned int npages;
+ unsigned long pfn;
+ void *ptr;
+
+@@ -3622,8 +3623,8 @@ static __cold int io_uring_mmap(struct f
+ switch (offset & IORING_OFF_MMAP_MASK) {
+ case IORING_OFF_SQ_RING:
+ case IORING_OFF_CQ_RING:
+- return io_uring_mmap_pages(ctx, vma, ctx->ring_pages,
+- ctx->n_ring_pages);
++ npages = min(ctx->n_ring_pages, (sz + PAGE_SIZE - 1) >> PAGE_SHIFT);
++ return io_uring_mmap_pages(ctx, vma, ctx->ring_pages, npages);
+ case IORING_OFF_SQES:
+ return io_uring_mmap_pages(ctx, vma, ctx->sqe_pages,
+ ctx->n_sqe_pages);
--- /dev/null
+From fb318430c8de3dcee5727f050dfe3f3dd8c4549c Mon Sep 17 00:00:00 2001
+From: Pavel Begunkov <asml.silence@gmail.com>
+Date: Mon, 25 Nov 2024 23:10:31 +0000
+Subject: io_uring: fix corner case forgetting to vunmap
+
+From: Pavel Begunkov <asml.silence@gmail.com>
+
+Commit 43eef70e7e2ac74e7767731dd806720c7fb5e010 upstream.
+
+io_pages_unmap() is a bit tricky in trying to figure whether the pages
+were previously vmap'ed or not. In particular If there is juts one page
+it belives there is no need to vunmap. Paired io_pages_map(), however,
+could've failed io_mem_alloc_compound() and attempted to
+io_mem_alloc_single(), which does vmap, and that leads to unpaired vmap.
+
+The solution is to fail if io_mem_alloc_compound() can't allocate a
+single page. That's the easiest way to deal with it, and those two
+functions are getting removed soon, so no need to overcomplicate it.
+
+Cc: stable@vger.kernel.org
+Fixes: 3ab1db3c6039e ("io_uring: get rid of remap_pfn_range() for mapping rings/sqes")
+Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
+Link: https://lore.kernel.org/r/477e75a3907a2fe83249e49c0a92cd480b2c60e0.1732569842.git.asml.silence@gmail.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ io_uring/io_uring.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/io_uring/io_uring.c
++++ b/io_uring/io_uring.c
+@@ -2889,6 +2889,8 @@ static void *io_pages_map(struct page **
+ ret = io_mem_alloc_compound(pages, nr_pages, size, gfp);
+ if (!IS_ERR(ret))
+ goto done;
++ if (nr_pages == 1)
++ goto fail;
+
+ ret = io_mem_alloc_single(pages, nr_pages, size, gfp);
+ if (!IS_ERR(ret)) {
+@@ -2897,7 +2899,7 @@ done:
+ *npages = nr_pages;
+ return ret;
+ }
+-
++fail:
+ kvfree(pages);
+ *out_pages = NULL;
+ *npages = 0;
--- /dev/null
+From 55b2d61e07a887351cf2996e96f89ade5ab7f1b7 Mon Sep 17 00:00:00 2001
+From: Pavel Begunkov <asml.silence@gmail.com>
+Date: Thu, 18 Jul 2024 20:00:53 +0100
+Subject: io_uring: fix error pbuf checking
+
+From: Pavel Begunkov <asml.silence@gmail.com>
+
+Commit bcc87d978b834c298bbdd9c52454c5d0a946e97e upstream.
+
+Syz reports a problem, which boils down to NULL vs IS_ERR inconsistent
+error handling in io_alloc_pbuf_ring().
+
+KASAN: null-ptr-deref in range [0x0000000000000000-0x0000000000000007]
+RIP: 0010:__io_remove_buffers+0xac/0x700 io_uring/kbuf.c:341
+Call Trace:
+ <TASK>
+ io_put_bl io_uring/kbuf.c:378 [inline]
+ io_destroy_buffers+0x14e/0x490 io_uring/kbuf.c:392
+ io_ring_ctx_free+0xa00/0x1070 io_uring/io_uring.c:2613
+ io_ring_exit_work+0x80f/0x8a0 io_uring/io_uring.c:2844
+ process_one_work kernel/workqueue.c:3231 [inline]
+ process_scheduled_works+0xa2c/0x1830 kernel/workqueue.c:3312
+ worker_thread+0x86d/0xd40 kernel/workqueue.c:3390
+ kthread+0x2f0/0x390 kernel/kthread.c:389
+ ret_from_fork+0x4b/0x80 arch/x86/kernel/process.c:147
+ ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:244
+
+Cc: stable@vger.kernel.org
+Reported-by: syzbot+2074b1a3d447915c6f1c@syzkaller.appspotmail.com
+Fixes: 87585b05757dc ("io_uring/kbuf: use vm_insert_pages() for mmap'ed pbuf ring")
+Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
+Link: https://lore.kernel.org/r/c5f9df20560bd9830401e8e48abc029e7cfd9f5e.1721329239.git.asml.silence@gmail.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ io_uring/kbuf.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/io_uring/kbuf.c
++++ b/io_uring/kbuf.c
+@@ -510,8 +510,10 @@ static int io_alloc_pbuf_ring(struct io_
+ ring_size = reg->ring_entries * sizeof(struct io_uring_buf_ring);
+
+ bl->buf_ring = io_pages_map(&bl->buf_pages, &bl->buf_nr_pages, ring_size);
+- if (!bl->buf_ring)
++ if (IS_ERR(bl->buf_ring)) {
++ bl->buf_ring = NULL;
+ return -ENOMEM;
++ }
+ bl->is_mapped = 1;
+ bl->is_mmap = 1;
+ return 0;
--- /dev/null
+From bfaf932689d7e59e3558977854b74bde1b137fae Mon Sep 17 00:00:00 2001
+From: Jens Axboe <axboe@kernel.dk>
+Date: Wed, 13 Mar 2024 09:56:14 -0600
+Subject: io_uring: get rid of remap_pfn_range() for mapping rings/sqes
+
+From: Jens Axboe <axboe@kernel.dk>
+
+Commit 3ab1db3c6039e02a9deb9d5091d28d559917a645 upstream.
+
+Rather than use remap_pfn_range() for this and manually free later,
+switch to using vm_insert_pages() and have it Just Work.
+
+If possible, allocate a single compound page that covers the range that
+is needed. If that works, then we can just use page_address() on that
+page. If we fail to get a compound page, allocate single pages and use
+vmap() to map them into the kernel virtual address space.
+
+This just covers the rings/sqes, the other remaining user of the mmap
+remap_pfn_range() user will be converted separately. Once that is done,
+we can kill the old alloc/free code.
+
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ io_uring/io_uring.c | 139 +++++++++++++++++++++++++++++++++++++++++++++++++---
+ io_uring/io_uring.h | 2
+ 2 files changed, 133 insertions(+), 8 deletions(-)
+
+--- a/io_uring/io_uring.c
++++ b/io_uring/io_uring.c
+@@ -2683,6 +2683,36 @@ static int io_cqring_wait(struct io_ring
+ return READ_ONCE(rings->cq.head) == READ_ONCE(rings->cq.tail) ? ret : 0;
+ }
+
++static void io_pages_unmap(void *ptr, struct page ***pages,
++ unsigned short *npages)
++{
++ bool do_vunmap = false;
++
++ if (!ptr)
++ return;
++
++ if (*npages) {
++ struct page **to_free = *pages;
++ int i;
++
++ /*
++ * Only did vmap for the non-compound multiple page case.
++ * For the compound page, we just need to put the head.
++ */
++ if (PageCompound(to_free[0]))
++ *npages = 1;
++ else if (*npages > 1)
++ do_vunmap = true;
++ for (i = 0; i < *npages; i++)
++ put_page(to_free[i]);
++ }
++ if (do_vunmap)
++ vunmap(ptr);
++ kvfree(*pages);
++ *pages = NULL;
++ *npages = 0;
++}
++
+ void io_mem_free(void *ptr)
+ {
+ if (!ptr)
+@@ -2787,8 +2817,8 @@ static void *io_sqes_map(struct io_ring_
+ static void io_rings_free(struct io_ring_ctx *ctx)
+ {
+ if (!(ctx->flags & IORING_SETUP_NO_MMAP)) {
+- io_mem_free(ctx->rings);
+- io_mem_free(ctx->sq_sqes);
++ io_pages_unmap(ctx->rings, &ctx->ring_pages, &ctx->n_ring_pages);
++ io_pages_unmap(ctx->sq_sqes, &ctx->sqe_pages, &ctx->n_sqe_pages);
+ } else {
+ io_pages_free(&ctx->ring_pages, ctx->n_ring_pages);
+ ctx->n_ring_pages = 0;
+@@ -2800,6 +2830,80 @@ static void io_rings_free(struct io_ring
+ ctx->sq_sqes = NULL;
+ }
+
++static void *io_mem_alloc_compound(struct page **pages, int nr_pages,
++ size_t size, gfp_t gfp)
++{
++ struct page *page;
++ int i, order;
++
++ order = get_order(size);
++ if (order > 10)
++ return ERR_PTR(-ENOMEM);
++ else if (order)
++ gfp |= __GFP_COMP;
++
++ page = alloc_pages(gfp, order);
++ if (!page)
++ return ERR_PTR(-ENOMEM);
++
++ for (i = 0; i < nr_pages; i++)
++ pages[i] = page + i;
++
++ return page_address(page);
++}
++
++static void *io_mem_alloc_single(struct page **pages, int nr_pages, size_t size,
++ gfp_t gfp)
++{
++ void *ret;
++ int i;
++
++ for (i = 0; i < nr_pages; i++) {
++ pages[i] = alloc_page(gfp);
++ if (!pages[i])
++ goto err;
++ }
++
++ ret = vmap(pages, nr_pages, VM_MAP, PAGE_KERNEL);
++ if (ret)
++ return ret;
++err:
++ while (i--)
++ put_page(pages[i]);
++ return ERR_PTR(-ENOMEM);
++}
++
++static void *io_pages_map(struct page ***out_pages, unsigned short *npages,
++ size_t size)
++{
++ gfp_t gfp = GFP_KERNEL_ACCOUNT | __GFP_ZERO | __GFP_NOWARN;
++ struct page **pages;
++ int nr_pages;
++ void *ret;
++
++ nr_pages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
++ pages = kvmalloc_array(nr_pages, sizeof(struct page *), gfp);
++ if (!pages)
++ return ERR_PTR(-ENOMEM);
++
++ ret = io_mem_alloc_compound(pages, nr_pages, size, gfp);
++ if (!IS_ERR(ret))
++ goto done;
++
++ ret = io_mem_alloc_single(pages, nr_pages, size, gfp);
++ if (!IS_ERR(ret)) {
++done:
++ *out_pages = pages;
++ *npages = nr_pages;
++ return ret;
++ }
++
++ kvfree(pages);
++ *out_pages = NULL;
++ *npages = 0;
++ return ret;
++}
++
+ void *io_mem_alloc(size_t size)
+ {
+ gfp_t gfp = GFP_KERNEL_ACCOUNT | __GFP_ZERO | __GFP_NOWARN | __GFP_COMP;
+@@ -3463,14 +3567,12 @@ static void *io_uring_validate_mmap_requ
+ /* Don't allow mmap if the ring was setup without it */
+ if (ctx->flags & IORING_SETUP_NO_MMAP)
+ return ERR_PTR(-EINVAL);
+- ptr = ctx->rings;
+- break;
++ return ctx->rings;
+ case IORING_OFF_SQES:
+ /* Don't allow mmap if the ring was setup without it */
+ if (ctx->flags & IORING_SETUP_NO_MMAP)
+ return ERR_PTR(-EINVAL);
+- ptr = ctx->sq_sqes;
+- break;
++ return ctx->sq_sqes;
+ case IORING_OFF_PBUF_RING: {
+ struct io_buffer_list *bl;
+ unsigned int bgid;
+@@ -3494,11 +3596,22 @@ static void *io_uring_validate_mmap_requ
+ return ptr;
+ }
+
++int io_uring_mmap_pages(struct io_ring_ctx *ctx, struct vm_area_struct *vma,
++ struct page **pages, int npages)
++{
++ unsigned long nr_pages = npages;
++
++ vm_flags_set(vma, VM_DONTEXPAND);
++ return vm_insert_pages(vma, vma->vm_start, pages, &nr_pages);
++}
++
+ #ifdef CONFIG_MMU
+
+ static __cold int io_uring_mmap(struct file *file, struct vm_area_struct *vma)
+ {
++ struct io_ring_ctx *ctx = file->private_data;
+ size_t sz = vma->vm_end - vma->vm_start;
++ long offset = vma->vm_pgoff << PAGE_SHIFT;
+ unsigned long pfn;
+ void *ptr;
+
+@@ -3506,6 +3619,16 @@ static __cold int io_uring_mmap(struct f
+ if (IS_ERR(ptr))
+ return PTR_ERR(ptr);
+
++ switch (offset & IORING_OFF_MMAP_MASK) {
++ case IORING_OFF_SQ_RING:
++ case IORING_OFF_CQ_RING:
++ return io_uring_mmap_pages(ctx, vma, ctx->ring_pages,
++ ctx->n_ring_pages);
++ case IORING_OFF_SQES:
++ return io_uring_mmap_pages(ctx, vma, ctx->sqe_pages,
++ ctx->n_sqe_pages);
++ }
++
+ pfn = virt_to_phys(ptr) >> PAGE_SHIFT;
+ return remap_pfn_range(vma, vma->vm_start, pfn, sz, vma->vm_page_prot);
+ }
+@@ -3795,7 +3918,7 @@ static __cold int io_allocate_scq_urings
+ return -EOVERFLOW;
+
+ if (!(ctx->flags & IORING_SETUP_NO_MMAP))
+- rings = io_mem_alloc(size);
++ rings = io_pages_map(&ctx->ring_pages, &ctx->n_ring_pages, size);
+ else
+ rings = io_rings_map(ctx, p->cq_off.user_addr, size);
+
+@@ -3820,7 +3943,7 @@ static __cold int io_allocate_scq_urings
+ }
+
+ if (!(ctx->flags & IORING_SETUP_NO_MMAP))
+- ptr = io_mem_alloc(size);
++ ptr = io_pages_map(&ctx->sqe_pages, &ctx->n_sqe_pages, size);
+ else
+ ptr = io_sqes_map(ctx, p->sq_off.user_addr, size);
+
+--- a/io_uring/io_uring.h
++++ b/io_uring/io_uring.h
+@@ -55,6 +55,8 @@ bool io_fill_cqe_req_aux(struct io_kiocb
+ void __io_commit_cqring_flush(struct io_ring_ctx *ctx);
+
+ struct page **io_pin_pages(unsigned long ubuf, unsigned long len, int *npages);
++int io_uring_mmap_pages(struct io_ring_ctx *ctx, struct vm_area_struct *vma,
++ struct page **pages, int npages);
+
+ struct file *io_file_get_normal(struct io_kiocb *req, int fd);
+ struct file *io_file_get_fixed(struct io_kiocb *req, int fd,
--- /dev/null
+From bd9194ea9dc6647ca247b948aa8587fcb348ac6e Mon Sep 17 00:00:00 2001
+From: Jens Axboe <axboe@kernel.dk>
+Date: Tue, 12 Mar 2024 20:24:21 -0600
+Subject: io_uring/kbuf: use vm_insert_pages() for mmap'ed pbuf ring
+
+From: Jens Axboe <axboe@kernel.dk>
+
+Commit 87585b05757dc70545efb434669708d276125559 upstream.
+
+Rather than use remap_pfn_range() for this and manually free later,
+switch to using vm_insert_page() and have it Just Work.
+
+This requires a bit of effort on the mmap lookup side, as the ctx
+uring_lock isn't held, which otherwise protects buffer_lists from being
+torn down, and it's not safe to grab from mmap context that would
+introduce an ABBA deadlock between the mmap lock and the ctx uring_lock.
+Instead, lookup the buffer_list under RCU, as the the list is RCU freed
+already. Use the existing reference count to determine whether it's
+possible to safely grab a reference to it (eg if it's not zero already),
+and drop that reference when done with the mapping. If the mmap
+reference is the last one, the buffer_list and the associated memory can
+go away, since the vma insertion has references to the inserted pages at
+that point.
+
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/io_uring_types.h | 3
+ io_uring/io_uring.c | 58 ++++-------------
+ io_uring/io_uring.h | 6 +
+ io_uring/kbuf.c | 137 ++++++++---------------------------------
+ io_uring/kbuf.h | 3
+ 5 files changed, 48 insertions(+), 159 deletions(-)
+
+--- a/include/linux/io_uring_types.h
++++ b/include/linux/io_uring_types.h
+@@ -326,9 +326,6 @@ struct io_ring_ctx {
+
+ struct list_head io_buffers_cache;
+
+- /* deferred free list, protected by ->uring_lock */
+- struct hlist_head io_buf_list;
+-
+ /* Keep this last, we don't need it for the fast path */
+ struct wait_queue_head poll_wq;
+ struct io_restriction restrictions;
+--- a/io_uring/io_uring.c
++++ b/io_uring/io_uring.c
+@@ -311,7 +311,6 @@ static __cold struct io_ring_ctx *io_rin
+ INIT_LIST_HEAD(&ctx->sqd_list);
+ INIT_LIST_HEAD(&ctx->cq_overflow_list);
+ INIT_LIST_HEAD(&ctx->io_buffers_cache);
+- INIT_HLIST_HEAD(&ctx->io_buf_list);
+ io_alloc_cache_init(&ctx->rsrc_node_cache, IO_NODE_ALLOC_CACHE_MAX,
+ sizeof(struct io_rsrc_node));
+ io_alloc_cache_init(&ctx->apoll_cache, IO_ALLOC_CACHE_MAX,
+@@ -2682,15 +2681,15 @@ static int io_cqring_wait(struct io_ring
+ return READ_ONCE(rings->cq.head) == READ_ONCE(rings->cq.tail) ? ret : 0;
+ }
+
+-static void io_pages_unmap(void *ptr, struct page ***pages,
+- unsigned short *npages)
++void io_pages_unmap(void *ptr, struct page ***pages, unsigned short *npages,
++ bool put_pages)
+ {
+ bool do_vunmap = false;
+
+ if (!ptr)
+ return;
+
+- if (*npages) {
++ if (put_pages && *npages) {
+ struct page **to_free = *pages;
+ int i;
+
+@@ -2712,14 +2711,6 @@ static void io_pages_unmap(void *ptr, st
+ *npages = 0;
+ }
+
+-void io_mem_free(void *ptr)
+-{
+- if (!ptr)
+- return;
+-
+- folio_put(virt_to_folio(ptr));
+-}
+-
+ static void io_pages_free(struct page ***pages, int npages)
+ {
+ struct page **page_array;
+@@ -2818,8 +2809,10 @@ static void *io_sqes_map(struct io_ring_
+ static void io_rings_free(struct io_ring_ctx *ctx)
+ {
+ if (!(ctx->flags & IORING_SETUP_NO_MMAP)) {
+- io_pages_unmap(ctx->rings, &ctx->ring_pages, &ctx->n_ring_pages);
+- io_pages_unmap(ctx->sq_sqes, &ctx->sqe_pages, &ctx->n_sqe_pages);
++ io_pages_unmap(ctx->rings, &ctx->ring_pages, &ctx->n_ring_pages,
++ true);
++ io_pages_unmap(ctx->sq_sqes, &ctx->sqe_pages, &ctx->n_sqe_pages,
++ true);
+ } else {
+ io_pages_free(&ctx->ring_pages, ctx->n_ring_pages);
+ ctx->n_ring_pages = 0;
+@@ -2876,8 +2869,8 @@ err:
+ return ERR_PTR(-ENOMEM);
+ }
+
+-static void *io_pages_map(struct page ***out_pages, unsigned short *npages,
+- size_t size)
++void *io_pages_map(struct page ***out_pages, unsigned short *npages,
++ size_t size)
+ {
+ gfp_t gfp = GFP_KERNEL_ACCOUNT | __GFP_ZERO | __GFP_NOWARN;
+ struct page **pages;
+@@ -2909,17 +2902,6 @@ fail:
+ return ret;
+ }
+
+-void *io_mem_alloc(size_t size)
+-{
+- gfp_t gfp = GFP_KERNEL_ACCOUNT | __GFP_ZERO | __GFP_NOWARN | __GFP_COMP;
+- void *ret;
+-
+- ret = (void *) __get_free_pages(gfp, get_order(size));
+- if (ret)
+- return ret;
+- return ERR_PTR(-ENOMEM);
+-}
+-
+ static unsigned long rings_size(struct io_ring_ctx *ctx, unsigned int sq_entries,
+ unsigned int cq_entries, size_t *sq_offset)
+ {
+@@ -3073,7 +3055,6 @@ static __cold void io_ring_ctx_free(stru
+ ctx->mm_account = NULL;
+ }
+ io_rings_free(ctx);
+- io_kbuf_mmap_list_free(ctx);
+
+ percpu_ref_exit(&ctx->refs);
+ free_uid(ctx->user);
+@@ -3563,10 +3544,8 @@ static void *io_uring_validate_mmap_requ
+ {
+ struct io_ring_ctx *ctx = file->private_data;
+ loff_t offset = pgoff << PAGE_SHIFT;
+- struct page *page;
+- void *ptr;
+
+- switch (offset & IORING_OFF_MMAP_MASK) {
++ switch ((pgoff << PAGE_SHIFT) & IORING_OFF_MMAP_MASK) {
+ case IORING_OFF_SQ_RING:
+ case IORING_OFF_CQ_RING:
+ /* Don't allow mmap if the ring was setup without it */
+@@ -3581,6 +3560,7 @@ static void *io_uring_validate_mmap_requ
+ case IORING_OFF_PBUF_RING: {
+ struct io_buffer_list *bl;
+ unsigned int bgid;
++ void *ptr;
+
+ bgid = (offset & ~IORING_OFF_MMAP_MASK) >> IORING_OFF_PBUF_SHIFT;
+ bl = io_pbuf_get_bl(ctx, bgid);
+@@ -3588,17 +3568,11 @@ static void *io_uring_validate_mmap_requ
+ return bl;
+ ptr = bl->buf_ring;
+ io_put_bl(ctx, bl);
+- break;
++ return ptr;
+ }
+- default:
+- return ERR_PTR(-EINVAL);
+ }
+
+- page = virt_to_head_page(ptr);
+- if (sz > page_size(page))
+- return ERR_PTR(-EINVAL);
+-
+- return ptr;
++ return ERR_PTR(-EINVAL);
+ }
+
+ int io_uring_mmap_pages(struct io_ring_ctx *ctx, struct vm_area_struct *vma,
+@@ -3618,7 +3592,6 @@ static __cold int io_uring_mmap(struct f
+ size_t sz = vma->vm_end - vma->vm_start;
+ long offset = vma->vm_pgoff << PAGE_SHIFT;
+ unsigned int npages;
+- unsigned long pfn;
+ void *ptr;
+
+ ptr = io_uring_validate_mmap_request(file, vma->vm_pgoff, sz);
+@@ -3633,10 +3606,11 @@ static __cold int io_uring_mmap(struct f
+ case IORING_OFF_SQES:
+ return io_uring_mmap_pages(ctx, vma, ctx->sqe_pages,
+ ctx->n_sqe_pages);
++ case IORING_OFF_PBUF_RING:
++ return io_pbuf_mmap(file, vma);
+ }
+
+- pfn = virt_to_phys(ptr) >> PAGE_SHIFT;
+- return remap_pfn_range(vma, vma->vm_start, pfn, sz, vma->vm_page_prot);
++ return -EINVAL;
+ }
+
+ static unsigned long io_uring_mmu_get_unmapped_area(struct file *filp,
+--- a/io_uring/io_uring.h
++++ b/io_uring/io_uring.h
+@@ -93,8 +93,10 @@ bool __io_alloc_req_refill(struct io_rin
+ bool io_match_task_safe(struct io_kiocb *head, struct task_struct *task,
+ bool cancel_all);
+
+-void *io_mem_alloc(size_t size);
+-void io_mem_free(void *ptr);
++void *io_pages_map(struct page ***out_pages, unsigned short *npages,
++ size_t size);
++void io_pages_unmap(void *ptr, struct page ***pages, unsigned short *npages,
++ bool put_pages);
+
+ #if defined(CONFIG_PROVE_LOCKING)
+ static inline void io_lockdep_assert_cq_locked(struct io_ring_ctx *ctx)
+--- a/io_uring/kbuf.c
++++ b/io_uring/kbuf.c
+@@ -30,25 +30,12 @@ struct io_provide_buf {
+ __u16 bid;
+ };
+
+-static inline struct io_buffer_list *__io_buffer_get_list(struct io_ring_ctx *ctx,
+- unsigned int bgid)
+-{
+- return xa_load(&ctx->io_bl_xa, bgid);
+-}
+-
+-struct io_buf_free {
+- struct hlist_node list;
+- void *mem;
+- size_t size;
+- int inuse;
+-};
+-
+ static inline struct io_buffer_list *io_buffer_get_list(struct io_ring_ctx *ctx,
+ unsigned int bgid)
+ {
+ lockdep_assert_held(&ctx->uring_lock);
+
+- return __io_buffer_get_list(ctx, bgid);
++ return xa_load(&ctx->io_bl_xa, bgid);
+ }
+
+ static int io_buffer_add_list(struct io_ring_ctx *ctx,
+@@ -199,24 +186,6 @@ void __user *io_buffer_select(struct io_
+ return ret;
+ }
+
+-/*
+- * Mark the given mapped range as free for reuse
+- */
+-static void io_kbuf_mark_free(struct io_ring_ctx *ctx, struct io_buffer_list *bl)
+-{
+- struct io_buf_free *ibf;
+-
+- hlist_for_each_entry(ibf, &ctx->io_buf_list, list) {
+- if (bl->buf_ring == ibf->mem) {
+- ibf->inuse = 0;
+- return;
+- }
+- }
+-
+- /* can't happen... */
+- WARN_ON_ONCE(1);
+-}
+-
+ static int __io_remove_buffers(struct io_ring_ctx *ctx,
+ struct io_buffer_list *bl, unsigned nbufs)
+ {
+@@ -228,23 +197,16 @@ static int __io_remove_buffers(struct io
+
+ if (bl->is_mapped) {
+ i = bl->buf_ring->tail - bl->head;
+- if (bl->is_mmap) {
+- /*
+- * io_kbuf_list_free() will free the page(s) at
+- * ->release() time.
+- */
+- io_kbuf_mark_free(ctx, bl);
+- bl->buf_ring = NULL;
+- bl->is_mmap = 0;
+- } else if (bl->buf_nr_pages) {
++ if (bl->buf_nr_pages) {
+ int j;
+
+- for (j = 0; j < bl->buf_nr_pages; j++)
+- unpin_user_page(bl->buf_pages[j]);
+- kvfree(bl->buf_pages);
+- vunmap(bl->buf_ring);
+- bl->buf_pages = NULL;
+- bl->buf_nr_pages = 0;
++ if (!bl->is_mmap) {
++ for (j = 0; j < bl->buf_nr_pages; j++)
++ unpin_user_page(bl->buf_pages[j]);
++ }
++ io_pages_unmap(bl->buf_ring, &bl->buf_pages,
++ &bl->buf_nr_pages, bl->is_mmap);
++ bl->is_mmap = 0;
+ }
+ /* make sure it's seen as empty */
+ INIT_LIST_HEAD(&bl->buf_list);
+@@ -540,63 +502,17 @@ error_unpin:
+ return ret;
+ }
+
+-/*
+- * See if we have a suitable region that we can reuse, rather than allocate
+- * both a new io_buf_free and mem region again. We leave it on the list as
+- * even a reused entry will need freeing at ring release.
+- */
+-static struct io_buf_free *io_lookup_buf_free_entry(struct io_ring_ctx *ctx,
+- size_t ring_size)
+-{
+- struct io_buf_free *ibf, *best = NULL;
+- size_t best_dist;
+-
+- hlist_for_each_entry(ibf, &ctx->io_buf_list, list) {
+- size_t dist;
+-
+- if (ibf->inuse || ibf->size < ring_size)
+- continue;
+- dist = ibf->size - ring_size;
+- if (!best || dist < best_dist) {
+- best = ibf;
+- if (!dist)
+- break;
+- best_dist = dist;
+- }
+- }
+-
+- return best;
+-}
+-
+ static int io_alloc_pbuf_ring(struct io_ring_ctx *ctx,
+ struct io_uring_buf_reg *reg,
+ struct io_buffer_list *bl)
+ {
+- struct io_buf_free *ibf;
+ size_t ring_size;
+- void *ptr;
+
+ ring_size = reg->ring_entries * sizeof(struct io_uring_buf_ring);
+
+- /* Reuse existing entry, if we can */
+- ibf = io_lookup_buf_free_entry(ctx, ring_size);
+- if (!ibf) {
+- ptr = io_mem_alloc(ring_size);
+- if (IS_ERR(ptr))
+- return PTR_ERR(ptr);
+-
+- /* Allocate and store deferred free entry */
+- ibf = kmalloc(sizeof(*ibf), GFP_KERNEL_ACCOUNT);
+- if (!ibf) {
+- io_mem_free(ptr);
+- return -ENOMEM;
+- }
+- ibf->mem = ptr;
+- ibf->size = ring_size;
+- hlist_add_head(&ibf->list, &ctx->io_buf_list);
+- }
+- ibf->inuse = 1;
+- bl->buf_ring = ibf->mem;
++ bl->buf_ring = io_pages_map(&bl->buf_pages, &bl->buf_nr_pages, ring_size);
++ if (!bl->buf_ring)
++ return -ENOMEM;
+ bl->is_mapped = 1;
+ bl->is_mmap = 1;
+ return 0;
+@@ -719,18 +635,19 @@ struct io_buffer_list *io_pbuf_get_bl(st
+ return ERR_PTR(-EINVAL);
+ }
+
+-/*
+- * Called at or after ->release(), free the mmap'ed buffers that we used
+- * for memory mapped provided buffer rings.
+- */
+-void io_kbuf_mmap_list_free(struct io_ring_ctx *ctx)
+-{
+- struct io_buf_free *ibf;
+- struct hlist_node *tmp;
+-
+- hlist_for_each_entry_safe(ibf, tmp, &ctx->io_buf_list, list) {
+- hlist_del(&ibf->list);
+- io_mem_free(ibf->mem);
+- kfree(ibf);
+- }
++int io_pbuf_mmap(struct file *file, struct vm_area_struct *vma)
++{
++ struct io_ring_ctx *ctx = file->private_data;
++ loff_t pgoff = vma->vm_pgoff << PAGE_SHIFT;
++ struct io_buffer_list *bl;
++ int bgid, ret;
++
++ bgid = (pgoff & ~IORING_OFF_MMAP_MASK) >> IORING_OFF_PBUF_SHIFT;
++ bl = io_pbuf_get_bl(ctx, bgid);
++ if (IS_ERR(bl))
++ return PTR_ERR(bl);
++
++ ret = io_uring_mmap_pages(ctx, vma, bl->buf_pages, bl->buf_nr_pages);
++ io_put_bl(ctx, bl);
++ return ret;
+ }
+--- a/io_uring/kbuf.h
++++ b/io_uring/kbuf.h
+@@ -54,8 +54,6 @@ int io_provide_buffers(struct io_kiocb *
+ int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg);
+ int io_unregister_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg);
+
+-void io_kbuf_mmap_list_free(struct io_ring_ctx *ctx);
+-
+ unsigned int __io_put_kbuf(struct io_kiocb *req, unsigned issue_flags);
+
+ void io_kbuf_recycle_legacy(struct io_kiocb *req, unsigned issue_flags);
+@@ -63,6 +61,7 @@ void io_kbuf_recycle_legacy(struct io_ki
+ void io_put_bl(struct io_ring_ctx *ctx, struct io_buffer_list *bl);
+ struct io_buffer_list *io_pbuf_get_bl(struct io_ring_ctx *ctx,
+ unsigned long bgid);
++int io_pbuf_mmap(struct file *file, struct vm_area_struct *vma);
+
+ static inline void io_kbuf_recycle_ring(struct io_kiocb *req)
+ {
--- /dev/null
+From 432b583ab581f2c21cad164d396a8e9fa4754a22 Mon Sep 17 00:00:00 2001
+From: Jens Axboe <axboe@kernel.dk>
+Date: Tue, 12 Mar 2024 10:42:27 -0600
+Subject: io_uring/kbuf: vmap pinned buffer ring
+
+From: Jens Axboe <axboe@kernel.dk>
+
+Commit e270bfd22a2a10d1cfbaddf23e79b6d0b405d21e upstream.
+
+This avoids needing to care about HIGHMEM, and it makes the buffer
+indexing easier as both ring provided buffer methods are now virtually
+mapped in a contigious fashion.
+
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ io_uring/kbuf.c | 39 +++++++++++++++------------------------
+ 1 file changed, 15 insertions(+), 24 deletions(-)
+
+--- a/io_uring/kbuf.c
++++ b/io_uring/kbuf.c
+@@ -7,6 +7,7 @@
+ #include <linux/slab.h>
+ #include <linux/namei.h>
+ #include <linux/poll.h>
++#include <linux/vmalloc.h>
+ #include <linux/io_uring.h>
+
+ #include <uapi/linux/io_uring.h>
+@@ -153,15 +154,7 @@ static void __user *io_ring_buffer_selec
+ return NULL;
+
+ head &= bl->mask;
+- /* mmaped buffers are always contig */
+- if (bl->is_mmap || head < IO_BUFFER_LIST_BUF_PER_PAGE) {
+- buf = &br->bufs[head];
+- } else {
+- int off = head & (IO_BUFFER_LIST_BUF_PER_PAGE - 1);
+- int index = head / IO_BUFFER_LIST_BUF_PER_PAGE;
+- buf = page_address(bl->buf_pages[index]);
+- buf += off;
+- }
++ buf = &br->bufs[head];
+ if (*len == 0 || *len > buf->len)
+ *len = buf->len;
+ req->flags |= REQ_F_BUFFER_RING;
+@@ -249,6 +242,7 @@ static int __io_remove_buffers(struct io
+ for (j = 0; j < bl->buf_nr_pages; j++)
+ unpin_user_page(bl->buf_pages[j]);
+ kvfree(bl->buf_pages);
++ vunmap(bl->buf_ring);
+ bl->buf_pages = NULL;
+ bl->buf_nr_pages = 0;
+ }
+@@ -501,9 +495,9 @@ err:
+ static int io_pin_pbuf_ring(struct io_uring_buf_reg *reg,
+ struct io_buffer_list *bl)
+ {
+- struct io_uring_buf_ring *br;
++ struct io_uring_buf_ring *br = NULL;
++ int nr_pages, ret, i;
+ struct page **pages;
+- int i, nr_pages;
+
+ pages = io_pin_pages(reg->ring_addr,
+ flex_array_size(br, bufs, reg->ring_entries),
+@@ -511,18 +505,12 @@ static int io_pin_pbuf_ring(struct io_ur
+ if (IS_ERR(pages))
+ return PTR_ERR(pages);
+
+- /*
+- * Apparently some 32-bit boxes (ARM) will return highmem pages,
+- * which then need to be mapped. We could support that, but it'd
+- * complicate the code and slowdown the common cases quite a bit.
+- * So just error out, returning -EINVAL just like we did on kernels
+- * that didn't support mapped buffer rings.
+- */
+- for (i = 0; i < nr_pages; i++)
+- if (PageHighMem(pages[i]))
+- goto error_unpin;
++ br = vmap(pages, nr_pages, VM_MAP, PAGE_KERNEL);
++ if (!br) {
++ ret = -ENOMEM;
++ goto error_unpin;
++ }
+
+- br = page_address(pages[0]);
+ #ifdef SHM_COLOUR
+ /*
+ * On platforms that have specific aliasing requirements, SHM_COLOUR
+@@ -533,8 +521,10 @@ static int io_pin_pbuf_ring(struct io_ur
+ * should use IOU_PBUF_RING_MMAP instead, and liburing will handle
+ * this transparently.
+ */
+- if ((reg->ring_addr | (unsigned long) br) & (SHM_COLOUR - 1))
++ if ((reg->ring_addr | (unsigned long) br) & (SHM_COLOUR - 1)) {
++ ret = -EINVAL;
+ goto error_unpin;
++ }
+ #endif
+ bl->buf_pages = pages;
+ bl->buf_nr_pages = nr_pages;
+@@ -546,7 +536,8 @@ error_unpin:
+ for (i = 0; i < nr_pages; i++)
+ unpin_user_page(pages[i]);
+ kvfree(pages);
+- return -EINVAL;
++ vunmap(br);
++ return ret;
+ }
+
+ /*
--- /dev/null
+From b001225fa4fe09610b35b428e46193ed2a28c95f Mon Sep 17 00:00:00 2001
+From: Jens Axboe <axboe@kernel.dk>
+Date: Fri, 5 Nov 2021 17:13:52 -0600
+Subject: io_uring: return error pointer from io_mem_alloc()
+
+From: Jens Axboe <axboe@kernel.dk>
+
+Commit e27cef86a0edd4ef7f8b4670f508a03b509cbbb2 upstream.
+
+In preparation for having more than one time of ring allocator, make the
+existing one return valid/error-pointer rather than just NULL.
+
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ io_uring/io_uring.c | 18 ++++++++++++------
+ 1 file changed, 12 insertions(+), 6 deletions(-)
+
+diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
+index 33597284e1cb..ebcb0680f1cc 100644
+--- a/io_uring/io_uring.c
++++ b/io_uring/io_uring.c
+@@ -2528,8 +2528,12 @@ static void io_mem_free(void *ptr)
+ static void *io_mem_alloc(size_t size)
+ {
+ gfp_t gfp = GFP_KERNEL_ACCOUNT | __GFP_ZERO | __GFP_NOWARN | __GFP_COMP;
++ void *ret;
+
+- return (void *) __get_free_pages(gfp, get_order(size));
++ ret = (void *) __get_free_pages(gfp, get_order(size));
++ if (ret)
++ return ret;
++ return ERR_PTR(-ENOMEM);
+ }
+
+ static unsigned long rings_size(struct io_ring_ctx *ctx, unsigned int sq_entries,
+@@ -3422,6 +3426,7 @@ static __cold int io_allocate_scq_urings(struct io_ring_ctx *ctx,
+ {
+ struct io_rings *rings;
+ size_t size, sq_array_offset;
++ void *ptr;
+
+ /* make sure these are sane, as we already accounted them */
+ ctx->sq_entries = p->sq_entries;
+@@ -3432,8 +3437,8 @@ static __cold int io_allocate_scq_urings(struct io_ring_ctx *ctx,
+ return -EOVERFLOW;
+
+ rings = io_mem_alloc(size);
+- if (!rings)
+- return -ENOMEM;
++ if (IS_ERR(rings))
++ return PTR_ERR(rings);
+
+ ctx->rings = rings;
+ ctx->sq_array = (u32 *)((char *)rings + sq_array_offset);
+@@ -3452,13 +3457,14 @@ static __cold int io_allocate_scq_urings(struct io_ring_ctx *ctx,
+ return -EOVERFLOW;
+ }
+
+- ctx->sq_sqes = io_mem_alloc(size);
+- if (!ctx->sq_sqes) {
++ ptr = io_mem_alloc(size);
++ if (IS_ERR(ptr)) {
+ io_mem_free(ctx->rings);
+ ctx->rings = NULL;
+- return -ENOMEM;
++ return PTR_ERR(ptr);
+ }
+
++ ctx->sq_sqes = ptr;
+ return 0;
+ }
+
+--
+2.47.2
+
--- /dev/null
+From dc5ec8a2f867b4211508a5ded8616103f4d67112 Mon Sep 17 00:00:00 2001
+From: Jens Axboe <axboe@kernel.dk>
+Date: Wed, 13 Mar 2024 14:58:14 -0600
+Subject: io_uring: unify io_pin_pages()
+
+From: Jens Axboe <axboe@kernel.dk>
+
+Commit 1943f96b3816e0f0d3d6686374d6e1d617c8b42c upstream.
+
+Move it into io_uring.c where it belongs, and use it in there as well
+rather than have two implementations of this.
+
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ io_uring/io_uring.c | 61 +++++++++++++++++++++++++++++++++++-----------------
+ io_uring/rsrc.c | 39 ---------------------------------
+ 2 files changed, 42 insertions(+), 58 deletions(-)
+
+--- a/io_uring/io_uring.c
++++ b/io_uring/io_uring.c
+@@ -2738,33 +2738,57 @@ static void io_pages_free(struct page **
+ *pages = NULL;
+ }
+
++struct page **io_pin_pages(unsigned long uaddr, unsigned long len, int *npages)
++{
++ unsigned long start, end, nr_pages;
++ struct page **pages;
++ int ret;
++
++ end = (uaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
++ start = uaddr >> PAGE_SHIFT;
++ nr_pages = end - start;
++ if (WARN_ON_ONCE(!nr_pages))
++ return ERR_PTR(-EINVAL);
++
++ pages = kvmalloc_array(nr_pages, sizeof(struct page *), GFP_KERNEL);
++ if (!pages)
++ return ERR_PTR(-ENOMEM);
++
++ ret = pin_user_pages_fast(uaddr, nr_pages, FOLL_WRITE | FOLL_LONGTERM,
++ pages);
++ /* success, mapped all pages */
++ if (ret == nr_pages) {
++ *npages = nr_pages;
++ return pages;
++ }
++
++ /* partial map, or didn't map anything */
++ if (ret >= 0) {
++ /* if we did partial map, release any pages we did get */
++ if (ret)
++ unpin_user_pages(pages, ret);
++ ret = -EFAULT;
++ }
++ kvfree(pages);
++ return ERR_PTR(ret);
++}
++
+ static void *__io_uaddr_map(struct page ***pages, unsigned short *npages,
+ unsigned long uaddr, size_t size)
+ {
+ struct page **page_array;
+ unsigned int nr_pages;
+ void *page_addr;
+- int ret, pinned;
+
+ *npages = 0;
+
+ if (uaddr & (PAGE_SIZE - 1) || !size)
+ return ERR_PTR(-EINVAL);
+
+- nr_pages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
+- if (nr_pages > USHRT_MAX)
+- return ERR_PTR(-EINVAL);
+- page_array = kvmalloc_array(nr_pages, sizeof(struct page *), GFP_KERNEL);
+- if (!page_array)
+- return ERR_PTR(-ENOMEM);
+-
+-
+- pinned = pin_user_pages_fast(uaddr, nr_pages, FOLL_WRITE | FOLL_LONGTERM,
+- page_array);
+- if (pinned != nr_pages) {
+- ret = (pinned < 0) ? pinned : -EFAULT;
+- goto free_pages;
+- }
++ nr_pages = 0;
++ page_array = io_pin_pages(uaddr, size, &nr_pages);
++ if (IS_ERR(page_array))
++ return page_array;
+
+ page_addr = vmap(page_array, nr_pages, VM_MAP, PAGE_KERNEL);
+ if (page_addr) {
+@@ -2772,10 +2796,9 @@ static void *__io_uaddr_map(struct page
+ *npages = nr_pages;
+ return page_addr;
+ }
+- ret = -ENOMEM;
+-free_pages:
+- io_pages_free(&page_array, pinned > 0 ? pinned : 0);
+- return ERR_PTR(ret);
++
++ io_pages_free(&page_array, nr_pages);
++ return ERR_PTR(-ENOMEM);
+ }
+
+ static void *io_rings_map(struct io_ring_ctx *ctx, unsigned long uaddr,
+--- a/io_uring/rsrc.c
++++ b/io_uring/rsrc.c
+@@ -873,45 +873,6 @@ static int io_buffer_account_pin(struct
+ return ret;
+ }
+
+-struct page **io_pin_pages(unsigned long ubuf, unsigned long len, int *npages)
+-{
+- unsigned long start, end, nr_pages;
+- struct page **pages = NULL;
+- int pret, ret = -ENOMEM;
+-
+- end = (ubuf + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
+- start = ubuf >> PAGE_SHIFT;
+- nr_pages = end - start;
+-
+- pages = kvmalloc_array(nr_pages, sizeof(struct page *), GFP_KERNEL);
+- if (!pages)
+- goto done;
+-
+- ret = 0;
+- mmap_read_lock(current->mm);
+- pret = pin_user_pages(ubuf, nr_pages, FOLL_WRITE | FOLL_LONGTERM,
+- pages);
+- if (pret == nr_pages)
+- *npages = nr_pages;
+- else
+- ret = pret < 0 ? pret : -EFAULT;
+-
+- mmap_read_unlock(current->mm);
+- if (ret) {
+- /* if we did partial map, release any pages we did get */
+- if (pret > 0)
+- unpin_user_pages(pages, pret);
+- goto done;
+- }
+- ret = 0;
+-done:
+- if (ret < 0) {
+- kvfree(pages);
+- pages = ERR_PTR(ret);
+- }
+- return pages;
+-}
+-
+ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov,
+ struct io_mapped_ubuf **pimu,
+ struct page **last_hpage)
--- /dev/null
+From c8e556f54f547266d984bcffbb44279ec3884258 Mon Sep 17 00:00:00 2001
+From: Jens Axboe <axboe@kernel.dk>
+Date: Wed, 13 Mar 2024 15:01:03 -0600
+Subject: io_uring: use unpin_user_pages() where appropriate
+
+From: Jens Axboe <axboe@kernel.dk>
+
+Commit 18595c0a58ae29ac6a996c5b664610119b73182d upstream.
+
+There are a few cases of open-rolled loops around unpin_user_page(), use
+the generic helper instead.
+
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ io_uring/kbuf.c | 5 ++---
+ 1 file changed, 2 insertions(+), 3 deletions(-)
+
+--- a/io_uring/kbuf.c
++++ b/io_uring/kbuf.c
+@@ -458,8 +458,8 @@ static int io_pin_pbuf_ring(struct io_ur
+ struct io_buffer_list *bl)
+ {
+ struct io_uring_buf_ring *br = NULL;
+- int nr_pages, ret, i;
+ struct page **pages;
++ int nr_pages, ret;
+
+ pages = io_pin_pages(reg->ring_addr,
+ flex_array_size(br, bufs, reg->ring_entries),
+@@ -495,8 +495,7 @@ static int io_pin_pbuf_ring(struct io_ur
+ bl->is_mmap = 0;
+ return 0;
+ error_unpin:
+- for (i = 0; i < nr_pages; i++)
+- unpin_user_page(pages[i]);
++ unpin_user_pages(pages, nr_pages);
+ kvfree(pages);
+ vunmap(br);
+ return ret;
--- /dev/null
+From 23cd4c4db8836b441e401328244a1864b47ac3c8 Mon Sep 17 00:00:00 2001
+From: Jens Axboe <axboe@kernel.dk>
+Date: Wed, 13 Mar 2024 14:10:40 -0600
+Subject: io_uring: use vmap() for ring mapping
+
+From: Jens Axboe <axboe@kernel.dk>
+
+Commit 09fc75e0c035a2cabb8caa15cec6e85159dd94f0 upstream.
+
+This is the last holdout which does odd page checking, convert it to
+vmap just like what is done for the non-mmap path.
+
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ io_uring/io_uring.c | 38 +++++++++-----------------------------
+ 1 file changed, 9 insertions(+), 29 deletions(-)
+
+--- a/io_uring/io_uring.c
++++ b/io_uring/io_uring.c
+@@ -64,7 +64,6 @@
+ #include <linux/sched/mm.h>
+ #include <linux/uaccess.h>
+ #include <linux/nospec.h>
+-#include <linux/highmem.h>
+ #include <linux/fsnotify.h>
+ #include <linux/fadvise.h>
+ #include <linux/task_work.h>
+@@ -2745,7 +2744,7 @@ static void *__io_uaddr_map(struct page
+ struct page **page_array;
+ unsigned int nr_pages;
+ void *page_addr;
+- int ret, i, pinned;
++ int ret, pinned;
+
+ *npages = 0;
+
+@@ -2767,34 +2766,13 @@ static void *__io_uaddr_map(struct page
+ goto free_pages;
+ }
+
+- page_addr = page_address(page_array[0]);
+- for (i = 0; i < nr_pages; i++) {
+- ret = -EINVAL;
+-
+- /*
+- * Can't support mapping user allocated ring memory on 32-bit
+- * archs where it could potentially reside in highmem. Just
+- * fail those with -EINVAL, just like we did on kernels that
+- * didn't support this feature.
+- */
+- if (PageHighMem(page_array[i]))
+- goto free_pages;
+-
+- /*
+- * No support for discontig pages for now, should either be a
+- * single normal page, or a huge page. Later on we can add
+- * support for remapping discontig pages, for now we will
+- * just fail them with EINVAL.
+- */
+- if (page_address(page_array[i]) != page_addr)
+- goto free_pages;
+- page_addr += PAGE_SIZE;
++ page_addr = vmap(page_array, nr_pages, VM_MAP, PAGE_KERNEL);
++ if (page_addr) {
++ *pages = page_array;
++ *npages = nr_pages;
++ return page_addr;
+ }
+-
+- *pages = page_array;
+- *npages = nr_pages;
+- return page_to_virt(page_array[0]);
+-
++ ret = -ENOMEM;
+ free_pages:
+ io_pages_free(&page_array, pinned > 0 ? pinned : 0);
+ return ERR_PTR(ret);
+@@ -2824,6 +2802,8 @@ static void io_rings_free(struct io_ring
+ ctx->n_ring_pages = 0;
+ io_pages_free(&ctx->sqe_pages, ctx->n_sqe_pages);
+ ctx->n_sqe_pages = 0;
++ vunmap(ctx->rings);
++ vunmap(ctx->sq_sqes);
+ }
+
+ ctx->rings = NULL;
--- /dev/null
+From ac77b7bfe1633f5366bceb76d74d2f04846b2186 Mon Sep 17 00:00:00 2001
+From: Jens Axboe <axboe@kernel.dk>
+Date: Sat, 16 Mar 2024 07:21:43 -0600
+Subject: mm: add nommu variant of vm_insert_pages()
+
+From: Jens Axboe <axboe@kernel.dk>
+
+Commit 62346c6cb28b043f2a6e95337d9081ec0b37b5f5 upstream.
+
+An identical one exists for vm_insert_page(), add one for
+vm_insert_pages() to avoid needing to check for CONFIG_MMU in code using
+it.
+
+Acked-by: Johannes Weiner <hannes@cmpxchg.org>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/nommu.c | 7 +++++++
+ 1 file changed, 7 insertions(+)
+
+--- a/mm/nommu.c
++++ b/mm/nommu.c
+@@ -357,6 +357,13 @@ int vm_insert_page(struct vm_area_struct
+ }
+ EXPORT_SYMBOL(vm_insert_page);
+
++int vm_insert_pages(struct vm_area_struct *vma, unsigned long addr,
++ struct page **pages, unsigned long *num)
++{
++ return -EINVAL;
++}
++EXPORT_SYMBOL(vm_insert_pages);
++
+ int vm_map_pages(struct vm_area_struct *vma, struct page **pages,
+ unsigned long num)
+ {
drm-nouveau-do-not-override-forced-connector-status.patch
net-handle-napi_schedule-calls-from-non-interrupt.patch
block-fix-kmem_cache-of-name-bio-108-already-exists.patch
+mm-add-nommu-variant-of-vm_insert_pages.patch
+io_uring-get-rid-of-remap_pfn_range-for-mapping-rings-sqes.patch
+io_uring-don-t-attempt-to-mmap-larger-than-what-the-user-asks-for.patch
+io_uring-fix-corner-case-forgetting-to-vunmap.patch
+io_uring-use-vmap-for-ring-mapping.patch
+io_uring-unify-io_pin_pages.patch
+io_uring-kbuf-vmap-pinned-buffer-ring.patch
+io_uring-kbuf-use-vm_insert_pages-for-mmap-ed-pbuf-ring.patch
+io_uring-use-unpin_user_pages-where-appropriate.patch
+io_uring-fix-error-pbuf-checking.patch
+io_uring-add-ring-freeing-helper.patch
+io_uring-return-error-pointer-from-io_mem_alloc.patch