From: Greg Kroah-Hartman Date: Mon, 12 Aug 2024 14:42:21 +0000 (+0200) Subject: 6.1-stable patches X-Git-Tag: v6.1.105~37 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=cd0665042e54c8f281f40d165e5deed7aa84dd05;p=thirdparty%2Fkernel%2Fstable-queue.git 6.1-stable patches added patches: block-call-.limit_depth-after-.hctx-has-been-set.patch block-mq-deadline-fix-the-tag-reservation-code.patch mm-hugetlb-fix-potential-race-in-__update_and_free_hugetlb_folio.patch xfs-fix-log-recovery-buffer-allocation-for-the-legacy-h_size-fixup.patch --- diff --git a/queue-6.1/block-call-.limit_depth-after-.hctx-has-been-set.patch b/queue-6.1/block-call-.limit_depth-after-.hctx-has-been-set.patch new file mode 100644 index 00000000000..e36c960e1dd --- /dev/null +++ b/queue-6.1/block-call-.limit_depth-after-.hctx-has-been-set.patch @@ -0,0 +1,55 @@ +From 6259151c04d4e0085e00d2dcb471ebdd1778e72e Mon Sep 17 00:00:00 2001 +From: Bart Van Assche +Date: Thu, 9 May 2024 10:01:48 -0700 +Subject: block: Call .limit_depth() after .hctx has been set + +From: Bart Van Assche + +commit 6259151c04d4e0085e00d2dcb471ebdd1778e72e upstream. + +Call .limit_depth() after data->hctx has been set such that data->hctx can +be used in .limit_depth() implementations. + +Cc: Christoph Hellwig +Cc: Damien Le Moal +Cc: Zhiguo Niu +Fixes: 07757588e507 ("block/mq-deadline: Reserve 25% of scheduler tags for synchronous requests") +Signed-off-by: Bart Van Assche +Tested-by: Zhiguo Niu +Reviewed-by: Christoph Hellwig +Link: https://lore.kernel.org/r/20240509170149.7639-2-bvanassche@acm.org +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + block/blk-mq.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +--- a/block/blk-mq.c ++++ b/block/blk-mq.c +@@ -439,6 +439,7 @@ __blk_mq_alloc_requests_batch(struct blk + + static struct request *__blk_mq_alloc_requests(struct blk_mq_alloc_data *data) + { ++ void (*limit_depth)(blk_opf_t, struct blk_mq_alloc_data *) = NULL; + struct request_queue *q = data->q; + u64 alloc_time_ns = 0; + struct request *rq; +@@ -465,7 +466,7 @@ static struct request *__blk_mq_alloc_re + !blk_op_is_passthrough(data->cmd_flags) && + e->type->ops.limit_depth && + !(data->flags & BLK_MQ_REQ_RESERVED)) +- e->type->ops.limit_depth(data->cmd_flags, data); ++ limit_depth = e->type->ops.limit_depth; + } + + retry: +@@ -477,6 +478,9 @@ retry: + if (data->flags & BLK_MQ_REQ_RESERVED) + data->rq_flags |= RQF_RESV; + ++ if (limit_depth) ++ limit_depth(data->cmd_flags, data); ++ + /* + * Try batched alloc if we want more than 1 tag. + */ diff --git a/queue-6.1/block-mq-deadline-fix-the-tag-reservation-code.patch b/queue-6.1/block-mq-deadline-fix-the-tag-reservation-code.patch new file mode 100644 index 00000000000..7c3faf2342e --- /dev/null +++ b/queue-6.1/block-mq-deadline-fix-the-tag-reservation-code.patch @@ -0,0 +1,78 @@ +From 39823b47bbd40502632ffba90ebb34fff7c8b5e8 Mon Sep 17 00:00:00 2001 +From: Bart Van Assche +Date: Thu, 9 May 2024 10:01:49 -0700 +Subject: block/mq-deadline: Fix the tag reservation code + +From: Bart Van Assche + +commit 39823b47bbd40502632ffba90ebb34fff7c8b5e8 upstream. + +The current tag reservation code is based on a misunderstanding of the +meaning of data->shallow_depth. Fix the tag reservation code as follows: +* By default, do not reserve any tags for synchronous requests because + for certain use cases reserving tags reduces performance. See also + Harshit Mogalapalli, [bug-report] Performance regression with fio + sequential-write on a multipath setup, 2024-03-07 + (https://lore.kernel.org/linux-block/5ce2ae5d-61e2-4ede-ad55-551112602401@oracle.com/) +* Reduce min_shallow_depth to one because min_shallow_depth must be less + than or equal any shallow_depth value. +* Scale dd->async_depth from the range [1, nr_requests] to [1, + bits_per_sbitmap_word]. + +Cc: Christoph Hellwig +Cc: Damien Le Moal +Cc: Zhiguo Niu +Fixes: 07757588e507 ("block/mq-deadline: Reserve 25% of scheduler tags for synchronous requests") +Signed-off-by: Bart Van Assche +Reviewed-by: Christoph Hellwig +Link: https://lore.kernel.org/r/20240509170149.7639-3-bvanassche@acm.org +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + block/mq-deadline.c | 20 +++++++++++++++++--- + 1 file changed, 17 insertions(+), 3 deletions(-) + +--- a/block/mq-deadline.c ++++ b/block/mq-deadline.c +@@ -598,6 +598,20 @@ unlock: + } + + /* ++ * 'depth' is a number in the range 1..INT_MAX representing a number of ++ * requests. Scale it with a factor (1 << bt->sb.shift) / q->nr_requests since ++ * 1..(1 << bt->sb.shift) is the range expected by sbitmap_get_shallow(). ++ * Values larger than q->nr_requests have the same effect as q->nr_requests. ++ */ ++static int dd_to_word_depth(struct blk_mq_hw_ctx *hctx, unsigned int qdepth) ++{ ++ struct sbitmap_queue *bt = &hctx->sched_tags->bitmap_tags; ++ const unsigned int nrr = hctx->queue->nr_requests; ++ ++ return ((qdepth << bt->sb.shift) + nrr - 1) / nrr; ++} ++ ++/* + * Called by __blk_mq_alloc_request(). The shallow_depth value set by this + * function is used by __blk_mq_get_tag(). + */ +@@ -613,7 +627,7 @@ static void dd_limit_depth(blk_opf_t opf + * Throttle asynchronous requests and writes such that these requests + * do not block the allocation of synchronous requests. + */ +- data->shallow_depth = dd->async_depth; ++ data->shallow_depth = dd_to_word_depth(data->hctx, dd->async_depth); + } + + /* Called by blk_mq_update_nr_requests(). */ +@@ -623,9 +637,9 @@ static void dd_depth_updated(struct blk_ + struct deadline_data *dd = q->elevator->elevator_data; + struct blk_mq_tags *tags = hctx->sched_tags; + +- dd->async_depth = max(1UL, 3 * q->nr_requests / 4); ++ dd->async_depth = q->nr_requests; + +- sbitmap_queue_min_shallow_depth(&tags->bitmap_tags, dd->async_depth); ++ sbitmap_queue_min_shallow_depth(&tags->bitmap_tags, 1); + } + + /* Called by blk_mq_init_hctx() and blk_mq_init_sched(). */ diff --git a/queue-6.1/mm-hugetlb-fix-potential-race-in-__update_and_free_hugetlb_folio.patch b/queue-6.1/mm-hugetlb-fix-potential-race-in-__update_and_free_hugetlb_folio.patch new file mode 100644 index 00000000000..b0532eff971 --- /dev/null +++ b/queue-6.1/mm-hugetlb-fix-potential-race-in-__update_and_free_hugetlb_folio.patch @@ -0,0 +1,75 @@ +From 5596d9e8b553dacb0ac34bcf873cbbfb16c3ba3e Mon Sep 17 00:00:00 2001 +From: Miaohe Lin +Date: Mon, 8 Jul 2024 10:51:27 +0800 +Subject: mm/hugetlb: fix potential race in __update_and_free_hugetlb_folio() + +From: Miaohe Lin + +commit 5596d9e8b553dacb0ac34bcf873cbbfb16c3ba3e upstream. + +There is a potential race between __update_and_free_hugetlb_folio() and +try_memory_failure_hugetlb(): + + CPU1 CPU2 + __update_and_free_hugetlb_folio try_memory_failure_hugetlb + folio_test_hugetlb + -- It's still hugetlb folio. + folio_clear_hugetlb_hwpoison + spin_lock_irq(&hugetlb_lock); + __get_huge_page_for_hwpoison + folio_set_hugetlb_hwpoison + spin_unlock_irq(&hugetlb_lock); + spin_lock_irq(&hugetlb_lock); + __folio_clear_hugetlb(folio); + -- Hugetlb flag is cleared but too late. + spin_unlock_irq(&hugetlb_lock); + +When the above race occurs, raw error page info will be leaked. Even +worse, raw error pages won't have hwpoisoned flag set and hit +pcplists/buddy. Fix this issue by deferring +folio_clear_hugetlb_hwpoison() until __folio_clear_hugetlb() is done. So +all raw error pages will have hwpoisoned flag set. + +Link: https://lkml.kernel.org/r/20240708025127.107713-1-linmiaohe@huawei.com +Fixes: 32c877191e02 ("hugetlb: do not clear hugetlb dtor until allocating vmemmap") +Signed-off-by: Miaohe Lin +Acked-by: Muchun Song +Reviewed-by: Oscar Salvador +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Miaohe Lin +Signed-off-by: Greg Kroah-Hartman +--- + mm/hugetlb.c | 14 +++++++------- + 1 file changed, 7 insertions(+), 7 deletions(-) + +--- a/mm/hugetlb.c ++++ b/mm/hugetlb.c +@@ -1786,13 +1786,6 @@ static void __update_and_free_page(struc + } + + /* +- * Move PageHWPoison flag from head page to the raw error pages, +- * which makes any healthy subpages reusable. +- */ +- if (unlikely(PageHWPoison(page))) +- hugetlb_clear_page_hwpoison(page); +- +- /* + * If vmemmap pages were allocated above, then we need to clear the + * hugetlb destructor under the hugetlb lock. + */ +@@ -1802,6 +1795,13 @@ static void __update_and_free_page(struc + spin_unlock_irq(&hugetlb_lock); + } + ++ /* ++ * Move PageHWPoison flag from head page to the raw error pages, ++ * which makes any healthy subpages reusable. ++ */ ++ if (unlikely(PageHWPoison(page))) ++ hugetlb_clear_page_hwpoison(page); ++ + for (i = 0; i < pages_per_huge_page(h); i++) { + subpage = nth_page(page, i); + subpage->flags &= ~(1 << PG_locked | 1 << PG_error | diff --git a/queue-6.1/series b/queue-6.1/series index e335a9f3416..4172ac6e7e4 100644 --- a/queue-6.1/series +++ b/queue-6.1/series @@ -138,3 +138,7 @@ mm-huge_memory-use-config_64bit-to-relax-huge-page-alignment-on-32-bit-machines. btrfs-fix-corruption-after-buffer-fault-in-during-direct-io-append-write.patch ipv6-fix-source-address-selection-with-route-leak.patch tools-headers-arm64-sync-arm64-s-cputype.h-with-the-kernel-sources.patch +mm-hugetlb-fix-potential-race-in-__update_and_free_hugetlb_folio.patch +block-call-.limit_depth-after-.hctx-has-been-set.patch +block-mq-deadline-fix-the-tag-reservation-code.patch +xfs-fix-log-recovery-buffer-allocation-for-the-legacy-h_size-fixup.patch diff --git a/queue-6.1/xfs-fix-log-recovery-buffer-allocation-for-the-legacy-h_size-fixup.patch b/queue-6.1/xfs-fix-log-recovery-buffer-allocation-for-the-legacy-h_size-fixup.patch new file mode 100644 index 00000000000..cbac6128511 --- /dev/null +++ b/queue-6.1/xfs-fix-log-recovery-buffer-allocation-for-the-legacy-h_size-fixup.patch @@ -0,0 +1,72 @@ +From 45cf976008ddef4a9c9a30310c9b4fb2a9a6602a Mon Sep 17 00:00:00 2001 +From: Christoph Hellwig +Date: Tue, 30 Apr 2024 06:07:55 +0200 +Subject: xfs: fix log recovery buffer allocation for the legacy h_size fixup + +From: Christoph Hellwig + +commit 45cf976008ddef4a9c9a30310c9b4fb2a9a6602a upstream. + +Commit a70f9fe52daa ("xfs: detect and handle invalid iclog size set by +mkfs") added a fixup for incorrect h_size values used for the initial +umount record in old xfsprogs versions. Later commit 0c771b99d6c9 +("xfs: clean up calculation of LR header blocks") cleaned up the log +reover buffer calculation, but stoped using the fixed up h_size value +to size the log recovery buffer, which can lead to an out of bounds +access when the incorrect h_size does not come from the old mkfs +tool, but a fuzzer. + +Fix this by open coding xlog_logrec_hblks and taking the fixed h_size +into account for this calculation. + +Fixes: 0c771b99d6c9 ("xfs: clean up calculation of LR header blocks") +Reported-by: Sam Sun +Signed-off-by: Christoph Hellwig +Reviewed-by: Brian Foster +Reviewed-by: "Darrick J. Wong" +Signed-off-by: Chandan Babu R +Signed-off-by: Kevin Berry +Signed-off-by: Greg Kroah-Hartman +--- + fs/xfs/xfs_log_recover.c | 20 ++++++++++++++------ + 1 file changed, 14 insertions(+), 6 deletions(-) + +--- a/fs/xfs/xfs_log_recover.c ++++ b/fs/xfs/xfs_log_recover.c +@@ -2960,7 +2960,7 @@ xlog_do_recovery_pass( + int error = 0, h_size, h_len; + int error2 = 0; + int bblks, split_bblks; +- int hblks, split_hblks, wrapped_hblks; ++ int hblks = 1, split_hblks, wrapped_hblks; + int i; + struct hlist_head rhash[XLOG_RHASH_SIZE]; + LIST_HEAD (buffer_list); +@@ -3016,14 +3016,22 @@ xlog_do_recovery_pass( + if (error) + goto bread_err1; + +- hblks = xlog_logrec_hblks(log, rhead); +- if (hblks != 1) { +- kmem_free(hbp); +- hbp = xlog_alloc_buffer(log, hblks); ++ /* ++ * This open codes xlog_logrec_hblks so that we can reuse the ++ * fixed up h_size value calculated above. Without that we'd ++ * still allocate the buffer based on the incorrect on-disk ++ * size. ++ */ ++ if (h_size > XLOG_HEADER_CYCLE_SIZE && ++ (rhead->h_version & cpu_to_be32(XLOG_VERSION_2))) { ++ hblks = DIV_ROUND_UP(h_size, XLOG_HEADER_CYCLE_SIZE); ++ if (hblks > 1) { ++ kmem_free(hbp); ++ hbp = xlog_alloc_buffer(log, hblks); ++ } + } + } else { + ASSERT(log->l_sectBBsize == 1); +- hblks = 1; + hbp = xlog_alloc_buffer(log, 1); + h_size = XLOG_BIG_RECORD_BSIZE; + }