From: Greg Kroah-Hartman Date: Sun, 16 Mar 2025 05:45:58 +0000 (+0100) Subject: 6.1-stable patches X-Git-Tag: v6.6.84~40 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=fa0ac0f20f0605c6c860e7cc902c8525b34b525b;p=thirdparty%2Fkernel%2Fstable-queue.git 6.1-stable patches added patches: io_uring-add-ring-freeing-helper.patch io_uring-don-t-attempt-to-mmap-larger-than-what-the-user-asks-for.patch io_uring-fix-corner-case-forgetting-to-vunmap.patch io_uring-get-rid-of-remap_pfn_range-for-mapping-rings-sqes.patch io_uring-return-error-pointer-from-io_mem_alloc.patch mm-add-nommu-variant-of-vm_insert_pages.patch --- diff --git a/queue-6.1/io_uring-add-ring-freeing-helper.patch b/queue-6.1/io_uring-add-ring-freeing-helper.patch new file mode 100644 index 0000000000..b5a8568a7b --- /dev/null +++ b/queue-6.1/io_uring-add-ring-freeing-helper.patch @@ -0,0 +1,63 @@ +From 8c273186074a591cfdcd4370849676bc3eeb6ecb Mon Sep 17 00:00:00 2001 +From: Jens Axboe +Date: Fri, 5 Nov 2021 17:15:46 -0600 +Subject: io_uring: add ring freeing helper + +From: Jens Axboe + +Commit 9c189eee73af1825ea9c895fafad469de5f82641 upstream. + +We do rings and sqes separately, move them into a helper that does both +the freeing and clearing of the memory. + +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + io_uring/io_uring.c | 17 +++++++++++------ + 1 file changed, 11 insertions(+), 6 deletions(-) + +--- a/io_uring/io_uring.c ++++ b/io_uring/io_uring.c +@@ -2525,6 +2525,14 @@ static void io_mem_free(void *ptr) + free_compound_page(page); + } + ++static void io_rings_free(struct io_ring_ctx *ctx) ++{ ++ io_mem_free(ctx->rings); ++ io_mem_free(ctx->sq_sqes); ++ ctx->rings = NULL; ++ ctx->sq_sqes = NULL; ++} ++ + static void *io_mem_alloc(size_t size) + { + gfp_t gfp = GFP_KERNEL_ACCOUNT | __GFP_ZERO | __GFP_NOWARN | __GFP_COMP; +@@ -2684,8 +2692,7 @@ static __cold void io_ring_ctx_free(stru + mmdrop(ctx->mm_account); + ctx->mm_account = NULL; + } +- io_mem_free(ctx->rings); +- io_mem_free(ctx->sq_sqes); ++ io_rings_free(ctx); + + percpu_ref_exit(&ctx->refs); + free_uid(ctx->user); +@@ -3452,15 +3459,13 @@ static __cold int io_allocate_scq_urings + else + size = array_size(sizeof(struct io_uring_sqe), p->sq_entries); + if (size == SIZE_MAX) { +- io_mem_free(ctx->rings); +- ctx->rings = NULL; ++ io_rings_free(ctx); + return -EOVERFLOW; + } + + ptr = io_mem_alloc(size); + if (IS_ERR(ptr)) { +- io_mem_free(ctx->rings); +- ctx->rings = NULL; ++ io_rings_free(ctx); + return PTR_ERR(ptr); + } + diff --git a/queue-6.1/io_uring-don-t-attempt-to-mmap-larger-than-what-the-user-asks-for.patch b/queue-6.1/io_uring-don-t-attempt-to-mmap-larger-than-what-the-user-asks-for.patch new file mode 100644 index 0000000000..b7b698e479 --- /dev/null +++ b/queue-6.1/io_uring-don-t-attempt-to-mmap-larger-than-what-the-user-asks-for.patch @@ -0,0 +1,51 @@ +From 521b7cfc3988765e869b145c580a2b8c9e0d2186 Mon Sep 17 00:00:00 2001 +From: Jens Axboe +Date: Wed, 29 May 2024 09:38:38 -0600 +Subject: io_uring: don't attempt to mmap larger than what the user asks for + +From: Jens Axboe + +Commit 06fe9b1df1086b42718d632aa57e8f7cd1a66a21 upstream. + +If IORING_FEAT_SINGLE_MMAP is ignored, as can happen if an application +uses an ancient liburing or does setup manually, then 3 mmap's are +required to map the ring into userspace. The kernel will still have +collapsed the mappings, however userspace may ask for mapping them +individually. If so, then we should not use the full number of ring +pages, as it may exceed the partial mapping. Doing so will yield an +-EFAULT from vm_insert_pages(), as we pass in more pages than what the +application asked for. + +Cap the number of pages to match what the application asked for, for +the particular mapping operation. + +Reported-by: Lucas Mülling +Link: https://github.com/axboe/liburing/issues/1157 +Fixes: 3ab1db3c6039 ("io_uring: get rid of remap_pfn_range() for mapping rings/sqes") +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + io_uring/io_uring.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +--- a/io_uring/io_uring.c ++++ b/io_uring/io_uring.c +@@ -3236,6 +3236,7 @@ static __cold int io_uring_mmap(struct f + struct io_ring_ctx *ctx = file->private_data; + size_t sz = vma->vm_end - vma->vm_start; + long offset = vma->vm_pgoff << PAGE_SHIFT; ++ unsigned int npages; + unsigned long pfn; + void *ptr; + +@@ -3246,8 +3247,8 @@ static __cold int io_uring_mmap(struct f + switch (offset & IORING_OFF_MMAP_MASK) { + case IORING_OFF_SQ_RING: + case IORING_OFF_CQ_RING: +- return io_uring_mmap_pages(ctx, vma, ctx->ring_pages, +- ctx->n_ring_pages); ++ npages = min(ctx->n_ring_pages, (sz + PAGE_SIZE - 1) >> PAGE_SHIFT); ++ return io_uring_mmap_pages(ctx, vma, ctx->ring_pages, npages); + case IORING_OFF_SQES: + return io_uring_mmap_pages(ctx, vma, ctx->sqe_pages, + ctx->n_sqe_pages); diff --git a/queue-6.1/io_uring-fix-corner-case-forgetting-to-vunmap.patch b/queue-6.1/io_uring-fix-corner-case-forgetting-to-vunmap.patch new file mode 100644 index 0000000000..64f58e0235 --- /dev/null +++ b/queue-6.1/io_uring-fix-corner-case-forgetting-to-vunmap.patch @@ -0,0 +1,49 @@ +From bcb2f3c886d4bfe279404ea44848dbcbc6e20f97 Mon Sep 17 00:00:00 2001 +From: Pavel Begunkov +Date: Mon, 25 Nov 2024 23:10:31 +0000 +Subject: io_uring: fix corner case forgetting to vunmap + +From: Pavel Begunkov + +Commit 43eef70e7e2ac74e7767731dd806720c7fb5e010 upstream. + +io_pages_unmap() is a bit tricky in trying to figure whether the pages +were previously vmap'ed or not. In particular If there is juts one page +it belives there is no need to vunmap. Paired io_pages_map(), however, +could've failed io_mem_alloc_compound() and attempted to +io_mem_alloc_single(), which does vmap, and that leads to unpaired vmap. + +The solution is to fail if io_mem_alloc_compound() can't allocate a +single page. That's the easiest way to deal with it, and those two +functions are getting removed soon, so no need to overcomplicate it. + +Cc: stable@vger.kernel.org +Fixes: 3ab1db3c6039e ("io_uring: get rid of remap_pfn_range() for mapping rings/sqes") +Signed-off-by: Pavel Begunkov +Link: https://lore.kernel.org/r/477e75a3907a2fe83249e49c0a92cd480b2c60e0.1732569842.git.asml.silence@gmail.com +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + io_uring/io_uring.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/io_uring/io_uring.c ++++ b/io_uring/io_uring.c +@@ -2610,6 +2610,8 @@ static void *io_pages_map(struct page ** + ret = io_mem_alloc_compound(pages, nr_pages, size, gfp); + if (!IS_ERR(ret)) + goto done; ++ if (nr_pages == 1) ++ goto fail; + + ret = io_mem_alloc_single(pages, nr_pages, size, gfp); + if (!IS_ERR(ret)) { +@@ -2618,7 +2620,7 @@ done: + *npages = nr_pages; + return ret; + } +- ++fail: + kvfree(pages); + *out_pages = NULL; + *npages = 0; diff --git a/queue-6.1/io_uring-get-rid-of-remap_pfn_range-for-mapping-rings-sqes.patch b/queue-6.1/io_uring-get-rid-of-remap_pfn_range-for-mapping-rings-sqes.patch new file mode 100644 index 0000000000..b0f846239f --- /dev/null +++ b/queue-6.1/io_uring-get-rid-of-remap_pfn_range-for-mapping-rings-sqes.patch @@ -0,0 +1,268 @@ +From 6c7c93b9211b6a6c625d4e8102659470742cb27f Mon Sep 17 00:00:00 2001 +From: Jens Axboe +Date: Wed, 13 Mar 2024 09:56:14 -0600 +Subject: io_uring: get rid of remap_pfn_range() for mapping rings/sqes + +From: Jens Axboe + +Commit 3ab1db3c6039e02a9deb9d5091d28d559917a645 upstream. + +Rather than use remap_pfn_range() for this and manually free later, +switch to using vm_insert_pages() and have it Just Work. + +If possible, allocate a single compound page that covers the range that +is needed. If that works, then we can just use page_address() on that +page. If we fail to get a compound page, allocate single pages and use +vmap() to map them into the kernel virtual address space. + +This just covers the rings/sqes, the other remaining user of the mmap +remap_pfn_range() user will be converted separately. Once that is done, +we can kill the old alloc/free code. + +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/io_uring_types.h | 5 + + include/uapi/linux/io_uring.h | 1 + io_uring/io_uring.c | 132 ++++++++++++++++++++++++++++++++++++----- + io_uring/io_uring.h | 2 + 4 files changed, 124 insertions(+), 16 deletions(-) + +--- a/include/linux/io_uring_types.h ++++ b/include/linux/io_uring_types.h +@@ -352,6 +352,11 @@ struct io_ring_ctx { + unsigned sq_thread_idle; + /* protected by ->completion_lock */ + unsigned evfd_last_cq_tail; ++ ++ unsigned short n_ring_pages; ++ unsigned short n_sqe_pages; ++ struct page **ring_pages; ++ struct page **sqe_pages; + }; + + enum { +--- a/include/uapi/linux/io_uring.h ++++ b/include/uapi/linux/io_uring.h +@@ -379,6 +379,7 @@ enum { + #define IORING_OFF_SQ_RING 0ULL + #define IORING_OFF_CQ_RING 0x8000000ULL + #define IORING_OFF_SQES 0x10000000ULL ++#define IORING_OFF_MMAP_MASK 0xf8000000ULL + + /* + * Filled with the offset for mmap(2) +--- a/io_uring/io_uring.c ++++ b/io_uring/io_uring.c +@@ -2513,37 +2513,118 @@ static int io_cqring_wait(struct io_ring + return READ_ONCE(rings->cq.head) == READ_ONCE(rings->cq.tail) ? ret : 0; + } + +-static void io_mem_free(void *ptr) ++static void io_pages_unmap(void *ptr, struct page ***pages, ++ unsigned short *npages) + { +- struct page *page; ++ bool do_vunmap = false; + + if (!ptr) + return; + +- page = virt_to_head_page(ptr); +- if (put_page_testzero(page)) +- free_compound_page(page); ++ if (*npages) { ++ struct page **to_free = *pages; ++ int i; ++ ++ /* ++ * Only did vmap for the non-compound multiple page case. ++ * For the compound page, we just need to put the head. ++ */ ++ if (PageCompound(to_free[0])) ++ *npages = 1; ++ else if (*npages > 1) ++ do_vunmap = true; ++ for (i = 0; i < *npages; i++) ++ put_page(to_free[i]); ++ } ++ if (do_vunmap) ++ vunmap(ptr); ++ kvfree(*pages); ++ *pages = NULL; ++ *npages = 0; + } + + static void io_rings_free(struct io_ring_ctx *ctx) + { +- io_mem_free(ctx->rings); +- io_mem_free(ctx->sq_sqes); ++ io_pages_unmap(ctx->rings, &ctx->ring_pages, &ctx->n_ring_pages); ++ io_pages_unmap(ctx->sq_sqes, &ctx->sqe_pages, &ctx->n_sqe_pages); + ctx->rings = NULL; + ctx->sq_sqes = NULL; + } + +-static void *io_mem_alloc(size_t size) ++static void *io_mem_alloc_compound(struct page **pages, int nr_pages, ++ size_t size, gfp_t gfp) ++{ ++ struct page *page; ++ int i, order; ++ ++ order = get_order(size); ++ if (order > 10) ++ return ERR_PTR(-ENOMEM); ++ else if (order) ++ gfp |= __GFP_COMP; ++ ++ page = alloc_pages(gfp, order); ++ if (!page) ++ return ERR_PTR(-ENOMEM); ++ ++ for (i = 0; i < nr_pages; i++) ++ pages[i] = page + i; ++ ++ return page_address(page); ++} ++ ++static void *io_mem_alloc_single(struct page **pages, int nr_pages, size_t size, ++ gfp_t gfp) + { +- gfp_t gfp = GFP_KERNEL_ACCOUNT | __GFP_ZERO | __GFP_NOWARN | __GFP_COMP; + void *ret; ++ int i; ++ ++ for (i = 0; i < nr_pages; i++) { ++ pages[i] = alloc_page(gfp); ++ if (!pages[i]) ++ goto err; ++ } + +- ret = (void *) __get_free_pages(gfp, get_order(size)); ++ ret = vmap(pages, nr_pages, VM_MAP, PAGE_KERNEL); + if (ret) + return ret; ++err: ++ while (i--) ++ put_page(pages[i]); + return ERR_PTR(-ENOMEM); + } + ++static void *io_pages_map(struct page ***out_pages, unsigned short *npages, ++ size_t size) ++{ ++ gfp_t gfp = GFP_KERNEL_ACCOUNT | __GFP_ZERO | __GFP_NOWARN; ++ struct page **pages; ++ int nr_pages; ++ void *ret; ++ ++ nr_pages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT; ++ pages = kvmalloc_array(nr_pages, sizeof(struct page *), gfp); ++ if (!pages) ++ return ERR_PTR(-ENOMEM); ++ ++ ret = io_mem_alloc_compound(pages, nr_pages, size, gfp); ++ if (!IS_ERR(ret)) ++ goto done; ++ ++ ret = io_mem_alloc_single(pages, nr_pages, size, gfp); ++ if (!IS_ERR(ret)) { ++done: ++ *out_pages = pages; ++ *npages = nr_pages; ++ return ret; ++ } ++ ++ kvfree(pages); ++ *out_pages = NULL; ++ *npages = 0; ++ return ret; ++} ++ + static unsigned long rings_size(struct io_ring_ctx *ctx, unsigned int sq_entries, + unsigned int cq_entries, size_t *sq_offset) + { +@@ -3125,11 +3206,9 @@ static void *io_uring_validate_mmap_requ + switch (offset) { + case IORING_OFF_SQ_RING: + case IORING_OFF_CQ_RING: +- ptr = ctx->rings; +- break; ++ return ctx->rings; + case IORING_OFF_SQES: +- ptr = ctx->sq_sqes; +- break; ++ return ctx->sq_sqes; + default: + return ERR_PTR(-EINVAL); + } +@@ -3141,11 +3220,22 @@ static void *io_uring_validate_mmap_requ + return ptr; + } + ++int io_uring_mmap_pages(struct io_ring_ctx *ctx, struct vm_area_struct *vma, ++ struct page **pages, int npages) ++{ ++ unsigned long nr_pages = npages; ++ ++ vma->vm_flags |= VM_DONTEXPAND; ++ return vm_insert_pages(vma, vma->vm_start, pages, &nr_pages); ++} ++ + #ifdef CONFIG_MMU + + static __cold int io_uring_mmap(struct file *file, struct vm_area_struct *vma) + { ++ struct io_ring_ctx *ctx = file->private_data; + size_t sz = vma->vm_end - vma->vm_start; ++ long offset = vma->vm_pgoff << PAGE_SHIFT; + unsigned long pfn; + void *ptr; + +@@ -3153,6 +3243,16 @@ static __cold int io_uring_mmap(struct f + if (IS_ERR(ptr)) + return PTR_ERR(ptr); + ++ switch (offset & IORING_OFF_MMAP_MASK) { ++ case IORING_OFF_SQ_RING: ++ case IORING_OFF_CQ_RING: ++ return io_uring_mmap_pages(ctx, vma, ctx->ring_pages, ++ ctx->n_ring_pages); ++ case IORING_OFF_SQES: ++ return io_uring_mmap_pages(ctx, vma, ctx->sqe_pages, ++ ctx->n_sqe_pages); ++ } ++ + pfn = virt_to_phys(ptr) >> PAGE_SHIFT; + return remap_pfn_range(vma, vma->vm_start, pfn, sz, vma->vm_page_prot); + } +@@ -3443,7 +3543,7 @@ static __cold int io_allocate_scq_urings + if (size == SIZE_MAX) + return -EOVERFLOW; + +- rings = io_mem_alloc(size); ++ rings = io_pages_map(&ctx->ring_pages, &ctx->n_ring_pages, size); + if (IS_ERR(rings)) + return PTR_ERR(rings); + +@@ -3463,7 +3563,7 @@ static __cold int io_allocate_scq_urings + return -EOVERFLOW; + } + +- ptr = io_mem_alloc(size); ++ ptr = io_pages_map(&ctx->sqe_pages, &ctx->n_sqe_pages, size); + if (IS_ERR(ptr)) { + io_rings_free(ctx); + return PTR_ERR(ptr); +--- a/io_uring/io_uring.h ++++ b/io_uring/io_uring.h +@@ -41,6 +41,8 @@ bool io_fill_cqe_aux(struct io_ring_ctx + void __io_commit_cqring_flush(struct io_ring_ctx *ctx); + + struct page **io_pin_pages(unsigned long ubuf, unsigned long len, int *npages); ++int io_uring_mmap_pages(struct io_ring_ctx *ctx, struct vm_area_struct *vma, ++ struct page **pages, int npages); + + struct file *io_file_get_normal(struct io_kiocb *req, int fd); + struct file *io_file_get_fixed(struct io_kiocb *req, int fd, diff --git a/queue-6.1/io_uring-return-error-pointer-from-io_mem_alloc.patch b/queue-6.1/io_uring-return-error-pointer-from-io_mem_alloc.patch new file mode 100644 index 0000000000..8b014977f6 --- /dev/null +++ b/queue-6.1/io_uring-return-error-pointer-from-io_mem_alloc.patch @@ -0,0 +1,71 @@ +From b001225fa4fe09610b35b428e46193ed2a28c95f Mon Sep 17 00:00:00 2001 +From: Jens Axboe +Date: Fri, 5 Nov 2021 17:13:52 -0600 +Subject: io_uring: return error pointer from io_mem_alloc() + +From: Jens Axboe + +Commit e27cef86a0edd4ef7f8b4670f508a03b509cbbb2 upstream. + +In preparation for having more than one time of ring allocator, make the +existing one return valid/error-pointer rather than just NULL. + +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + io_uring/io_uring.c | 18 ++++++++++++------ + 1 file changed, 12 insertions(+), 6 deletions(-) + +--- a/io_uring/io_uring.c ++++ b/io_uring/io_uring.c +@@ -2528,8 +2528,12 @@ static void io_mem_free(void *ptr) + static void *io_mem_alloc(size_t size) + { + gfp_t gfp = GFP_KERNEL_ACCOUNT | __GFP_ZERO | __GFP_NOWARN | __GFP_COMP; ++ void *ret; + +- return (void *) __get_free_pages(gfp, get_order(size)); ++ ret = (void *) __get_free_pages(gfp, get_order(size)); ++ if (ret) ++ return ret; ++ return ERR_PTR(-ENOMEM); + } + + static unsigned long rings_size(struct io_ring_ctx *ctx, unsigned int sq_entries, +@@ -3422,6 +3426,7 @@ static __cold int io_allocate_scq_urings + { + struct io_rings *rings; + size_t size, sq_array_offset; ++ void *ptr; + + /* make sure these are sane, as we already accounted them */ + ctx->sq_entries = p->sq_entries; +@@ -3432,8 +3437,8 @@ static __cold int io_allocate_scq_urings + return -EOVERFLOW; + + rings = io_mem_alloc(size); +- if (!rings) +- return -ENOMEM; ++ if (IS_ERR(rings)) ++ return PTR_ERR(rings); + + ctx->rings = rings; + ctx->sq_array = (u32 *)((char *)rings + sq_array_offset); +@@ -3452,13 +3457,14 @@ static __cold int io_allocate_scq_urings + return -EOVERFLOW; + } + +- ctx->sq_sqes = io_mem_alloc(size); +- if (!ctx->sq_sqes) { ++ ptr = io_mem_alloc(size); ++ if (IS_ERR(ptr)) { + io_mem_free(ctx->rings); + ctx->rings = NULL; +- return -ENOMEM; ++ return PTR_ERR(ptr); + } + ++ ctx->sq_sqes = ptr; + return 0; + } + diff --git a/queue-6.1/mm-add-nommu-variant-of-vm_insert_pages.patch b/queue-6.1/mm-add-nommu-variant-of-vm_insert_pages.patch new file mode 100644 index 0000000000..c881d0f64f --- /dev/null +++ b/queue-6.1/mm-add-nommu-variant-of-vm_insert_pages.patch @@ -0,0 +1,36 @@ +From adaee635dadf556c959c3c4428b09ea2d8bb8d72 Mon Sep 17 00:00:00 2001 +From: Jens Axboe +Date: Sat, 16 Mar 2024 07:21:43 -0600 +Subject: mm: add nommu variant of vm_insert_pages() + +From: Jens Axboe + +Commit 62346c6cb28b043f2a6e95337d9081ec0b37b5f5 upstream. + +An identical one exists for vm_insert_page(), add one for +vm_insert_pages() to avoid needing to check for CONFIG_MMU in code using +it. + +Acked-by: Johannes Weiner +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + mm/nommu.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +--- a/mm/nommu.c ++++ b/mm/nommu.c +@@ -357,6 +357,13 @@ int vm_insert_page(struct vm_area_struct + } + EXPORT_SYMBOL(vm_insert_page); + ++int vm_insert_pages(struct vm_area_struct *vma, unsigned long addr, ++ struct page **pages, unsigned long *num) ++{ ++ return -EINVAL; ++} ++EXPORT_SYMBOL(vm_insert_pages); ++ + int vm_map_pages(struct vm_area_struct *vma, struct page **pages, + unsigned long num) + { diff --git a/queue-6.1/series b/queue-6.1/series index 16dd80601e..0c37d02824 100644 --- a/queue-6.1/series +++ b/queue-6.1/series @@ -69,3 +69,9 @@ x86-irq-define-trace-events-conditionally.patch mptcp-safety-check-before-fallback.patch drm-nouveau-do-not-override-forced-connector-status.patch block-fix-kmem_cache-of-name-bio-108-already-exists.patch +io_uring-return-error-pointer-from-io_mem_alloc.patch +io_uring-add-ring-freeing-helper.patch +mm-add-nommu-variant-of-vm_insert_pages.patch +io_uring-get-rid-of-remap_pfn_range-for-mapping-rings-sqes.patch +io_uring-don-t-attempt-to-mmap-larger-than-what-the-user-asks-for.patch +io_uring-fix-corner-case-forgetting-to-vunmap.patch