From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 12 Aug 2024 14:42:21 +0000 (+0200)
Subject: 6.1-stable patches
X-Git-Tag: v6.1.105~37
X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=cd0665042e54c8f281f40d165e5deed7aa84dd05;p=thirdparty%2Fkernel%2Fstable-queue.git

6.1-stable patches

added patches:
	block-call-.limit_depth-after-.hctx-has-been-set.patch
	block-mq-deadline-fix-the-tag-reservation-code.patch
	mm-hugetlb-fix-potential-race-in-__update_and_free_hugetlb_folio.patch
	xfs-fix-log-recovery-buffer-allocation-for-the-legacy-h_size-fixup.patch
---

diff --git a/queue-6.1/block-call-.limit_depth-after-.hctx-has-been-set.patch b/queue-6.1/block-call-.limit_depth-after-.hctx-has-been-set.patch
new file mode 100644
index 00000000000..e36c960e1dd
--- /dev/null
+++ b/queue-6.1/block-call-.limit_depth-after-.hctx-has-been-set.patch
@@ -0,0 +1,55 @@
+From 6259151c04d4e0085e00d2dcb471ebdd1778e72e Mon Sep 17 00:00:00 2001
+From: Bart Van Assche <bvanassche@acm.org>
+Date: Thu, 9 May 2024 10:01:48 -0700
+Subject: block: Call .limit_depth() after .hctx has been set
+
+From: Bart Van Assche <bvanassche@acm.org>
+
+commit 6259151c04d4e0085e00d2dcb471ebdd1778e72e upstream.
+
+Call .limit_depth() after data->hctx has been set such that data->hctx can
+be used in .limit_depth() implementations.
+
+Cc: Christoph Hellwig <hch@lst.de>
+Cc: Damien Le Moal <dlemoal@kernel.org>
+Cc: Zhiguo Niu <zhiguo.niu@unisoc.com>
+Fixes: 07757588e507 ("block/mq-deadline: Reserve 25% of scheduler tags for synchronous requests")
+Signed-off-by: Bart Van Assche <bvanassche@acm.org>
+Tested-by: Zhiguo Niu <zhiguo.niu@unisoc.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Link: https://lore.kernel.org/r/20240509170149.7639-2-bvanassche@acm.org
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ block/blk-mq.c |    6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+--- a/block/blk-mq.c
++++ b/block/blk-mq.c
+@@ -439,6 +439,7 @@ __blk_mq_alloc_requests_batch(struct blk
+ 
+ static struct request *__blk_mq_alloc_requests(struct blk_mq_alloc_data *data)
+ {
++	void (*limit_depth)(blk_opf_t, struct blk_mq_alloc_data *) = NULL;
+ 	struct request_queue *q = data->q;
+ 	u64 alloc_time_ns = 0;
+ 	struct request *rq;
+@@ -465,7 +466,7 @@ static struct request *__blk_mq_alloc_re
+ 		    !blk_op_is_passthrough(data->cmd_flags) &&
+ 		    e->type->ops.limit_depth &&
+ 		    !(data->flags & BLK_MQ_REQ_RESERVED))
+-			e->type->ops.limit_depth(data->cmd_flags, data);
++			limit_depth = e->type->ops.limit_depth;
+ 	}
+ 
+ retry:
+@@ -477,6 +478,9 @@ retry:
+ 	if (data->flags & BLK_MQ_REQ_RESERVED)
+ 		data->rq_flags |= RQF_RESV;
+ 
++	if (limit_depth)
++		limit_depth(data->cmd_flags, data);
++
+ 	/*
+ 	 * Try batched alloc if we want more than 1 tag.
+ 	 */
diff --git a/queue-6.1/block-mq-deadline-fix-the-tag-reservation-code.patch b/queue-6.1/block-mq-deadline-fix-the-tag-reservation-code.patch
new file mode 100644
index 00000000000..7c3faf2342e
--- /dev/null
+++ b/queue-6.1/block-mq-deadline-fix-the-tag-reservation-code.patch
@@ -0,0 +1,78 @@
+From 39823b47bbd40502632ffba90ebb34fff7c8b5e8 Mon Sep 17 00:00:00 2001
+From: Bart Van Assche <bvanassche@acm.org>
+Date: Thu, 9 May 2024 10:01:49 -0700
+Subject: block/mq-deadline: Fix the tag reservation code
+
+From: Bart Van Assche <bvanassche@acm.org>
+
+commit 39823b47bbd40502632ffba90ebb34fff7c8b5e8 upstream.
+
+The current tag reservation code is based on a misunderstanding of the
+meaning of data->shallow_depth. Fix the tag reservation code as follows:
+* By default, do not reserve any tags for synchronous requests because
+  for certain use cases reserving tags reduces performance. See also
+  Harshit Mogalapalli, [bug-report] Performance regression with fio
+  sequential-write on a multipath setup, 2024-03-07
+  (https://lore.kernel.org/linux-block/5ce2ae5d-61e2-4ede-ad55-551112602401@oracle.com/)
+* Reduce min_shallow_depth to one because min_shallow_depth must be less
+  than or equal any shallow_depth value.
+* Scale dd->async_depth from the range [1, nr_requests] to [1,
+  bits_per_sbitmap_word].
+
+Cc: Christoph Hellwig <hch@lst.de>
+Cc: Damien Le Moal <dlemoal@kernel.org>
+Cc: Zhiguo Niu <zhiguo.niu@unisoc.com>
+Fixes: 07757588e507 ("block/mq-deadline: Reserve 25% of scheduler tags for synchronous requests")
+Signed-off-by: Bart Van Assche <bvanassche@acm.org>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Link: https://lore.kernel.org/r/20240509170149.7639-3-bvanassche@acm.org
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ block/mq-deadline.c |   20 +++++++++++++++++---
+ 1 file changed, 17 insertions(+), 3 deletions(-)
+
+--- a/block/mq-deadline.c
++++ b/block/mq-deadline.c
+@@ -598,6 +598,20 @@ unlock:
+ }
+ 
+ /*
++ * 'depth' is a number in the range 1..INT_MAX representing a number of
++ * requests. Scale it with a factor (1 << bt->sb.shift) / q->nr_requests since
++ * 1..(1 << bt->sb.shift) is the range expected by sbitmap_get_shallow().
++ * Values larger than q->nr_requests have the same effect as q->nr_requests.
++ */
++static int dd_to_word_depth(struct blk_mq_hw_ctx *hctx, unsigned int qdepth)
++{
++	struct sbitmap_queue *bt = &hctx->sched_tags->bitmap_tags;
++	const unsigned int nrr = hctx->queue->nr_requests;
++
++	return ((qdepth << bt->sb.shift) + nrr - 1) / nrr;
++}
++
++/*
+  * Called by __blk_mq_alloc_request(). The shallow_depth value set by this
+  * function is used by __blk_mq_get_tag().
+  */
+@@ -613,7 +627,7 @@ static void dd_limit_depth(blk_opf_t opf
+ 	 * Throttle asynchronous requests and writes such that these requests
+ 	 * do not block the allocation of synchronous requests.
+ 	 */
+-	data->shallow_depth = dd->async_depth;
++	data->shallow_depth = dd_to_word_depth(data->hctx, dd->async_depth);
+ }
+ 
+ /* Called by blk_mq_update_nr_requests(). */
+@@ -623,9 +637,9 @@ static void dd_depth_updated(struct blk_
+ 	struct deadline_data *dd = q->elevator->elevator_data;
+ 	struct blk_mq_tags *tags = hctx->sched_tags;
+ 
+-	dd->async_depth = max(1UL, 3 * q->nr_requests / 4);
++	dd->async_depth = q->nr_requests;
+ 
+-	sbitmap_queue_min_shallow_depth(&tags->bitmap_tags, dd->async_depth);
++	sbitmap_queue_min_shallow_depth(&tags->bitmap_tags, 1);
+ }
+ 
+ /* Called by blk_mq_init_hctx() and blk_mq_init_sched(). */
diff --git a/queue-6.1/mm-hugetlb-fix-potential-race-in-__update_and_free_hugetlb_folio.patch b/queue-6.1/mm-hugetlb-fix-potential-race-in-__update_and_free_hugetlb_folio.patch
new file mode 100644
index 00000000000..b0532eff971
--- /dev/null
+++ b/queue-6.1/mm-hugetlb-fix-potential-race-in-__update_and_free_hugetlb_folio.patch
@@ -0,0 +1,75 @@
+From 5596d9e8b553dacb0ac34bcf873cbbfb16c3ba3e Mon Sep 17 00:00:00 2001
+From: Miaohe Lin <linmiaohe@huawei.com>
+Date: Mon, 8 Jul 2024 10:51:27 +0800
+Subject: mm/hugetlb: fix potential race in __update_and_free_hugetlb_folio()
+
+From: Miaohe Lin <linmiaohe@huawei.com>
+
+commit 5596d9e8b553dacb0ac34bcf873cbbfb16c3ba3e upstream.
+
+There is a potential race between __update_and_free_hugetlb_folio() and
+try_memory_failure_hugetlb():
+
+ CPU1					CPU2
+ __update_and_free_hugetlb_folio	try_memory_failure_hugetlb
+					 folio_test_hugetlb
+					  -- It's still hugetlb folio.
+  folio_clear_hugetlb_hwpoison
+  					  spin_lock_irq(&hugetlb_lock);
+					   __get_huge_page_for_hwpoison
+					    folio_set_hugetlb_hwpoison
+					  spin_unlock_irq(&hugetlb_lock);
+  spin_lock_irq(&hugetlb_lock);
+  __folio_clear_hugetlb(folio);
+   -- Hugetlb flag is cleared but too late.
+  spin_unlock_irq(&hugetlb_lock);
+
+When the above race occurs, raw error page info will be leaked.  Even
+worse, raw error pages won't have hwpoisoned flag set and hit
+pcplists/buddy.  Fix this issue by deferring
+folio_clear_hugetlb_hwpoison() until __folio_clear_hugetlb() is done.  So
+all raw error pages will have hwpoisoned flag set.
+
+Link: https://lkml.kernel.org/r/20240708025127.107713-1-linmiaohe@huawei.com
+Fixes: 32c877191e02 ("hugetlb: do not clear hugetlb dtor until allocating vmemmap")
+Signed-off-by: Miaohe Lin <linmiaohe@huawei.com>
+Acked-by: Muchun Song <muchun.song@linux.dev>
+Reviewed-by: Oscar Salvador <osalvador@suse.de>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Miaohe Lin <linmiaohe@huawei.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/hugetlb.c |   14 +++++++-------
+ 1 file changed, 7 insertions(+), 7 deletions(-)
+
+--- a/mm/hugetlb.c
++++ b/mm/hugetlb.c
+@@ -1786,13 +1786,6 @@ static void __update_and_free_page(struc
+ 	}
+ 
+ 	/*
+-	 * Move PageHWPoison flag from head page to the raw error pages,
+-	 * which makes any healthy subpages reusable.
+-	 */
+-	if (unlikely(PageHWPoison(page)))
+-		hugetlb_clear_page_hwpoison(page);
+-
+-	/*
+ 	 * If vmemmap pages were allocated above, then we need to clear the
+ 	 * hugetlb destructor under the hugetlb lock.
+ 	 */
+@@ -1802,6 +1795,13 @@ static void __update_and_free_page(struc
+ 		spin_unlock_irq(&hugetlb_lock);
+ 	}
+ 
++	/*
++	 * Move PageHWPoison flag from head page to the raw error pages,
++	 * which makes any healthy subpages reusable.
++	 */
++	if (unlikely(PageHWPoison(page)))
++		hugetlb_clear_page_hwpoison(page);
++
+ 	for (i = 0; i < pages_per_huge_page(h); i++) {
+ 		subpage = nth_page(page, i);
+ 		subpage->flags &= ~(1 << PG_locked | 1 << PG_error |
diff --git a/queue-6.1/series b/queue-6.1/series
index e335a9f3416..4172ac6e7e4 100644
--- a/queue-6.1/series
+++ b/queue-6.1/series
@@ -138,3 +138,7 @@ mm-huge_memory-use-config_64bit-to-relax-huge-page-alignment-on-32-bit-machines.
 btrfs-fix-corruption-after-buffer-fault-in-during-direct-io-append-write.patch
 ipv6-fix-source-address-selection-with-route-leak.patch
 tools-headers-arm64-sync-arm64-s-cputype.h-with-the-kernel-sources.patch
+mm-hugetlb-fix-potential-race-in-__update_and_free_hugetlb_folio.patch
+block-call-.limit_depth-after-.hctx-has-been-set.patch
+block-mq-deadline-fix-the-tag-reservation-code.patch
+xfs-fix-log-recovery-buffer-allocation-for-the-legacy-h_size-fixup.patch
diff --git a/queue-6.1/xfs-fix-log-recovery-buffer-allocation-for-the-legacy-h_size-fixup.patch b/queue-6.1/xfs-fix-log-recovery-buffer-allocation-for-the-legacy-h_size-fixup.patch
new file mode 100644
index 00000000000..cbac6128511
--- /dev/null
+++ b/queue-6.1/xfs-fix-log-recovery-buffer-allocation-for-the-legacy-h_size-fixup.patch
@@ -0,0 +1,72 @@
+From 45cf976008ddef4a9c9a30310c9b4fb2a9a6602a Mon Sep 17 00:00:00 2001
+From: Christoph Hellwig <hch@lst.de>
+Date: Tue, 30 Apr 2024 06:07:55 +0200
+Subject: xfs: fix log recovery buffer allocation for the legacy h_size fixup
+
+From: Christoph Hellwig <hch@lst.de>
+
+commit 45cf976008ddef4a9c9a30310c9b4fb2a9a6602a upstream.
+
+Commit a70f9fe52daa ("xfs: detect and handle invalid iclog size set by
+mkfs") added a fixup for incorrect h_size values used for the initial
+umount record in old xfsprogs versions.  Later commit 0c771b99d6c9
+("xfs: clean up calculation of LR header blocks") cleaned up the log
+reover buffer calculation, but stoped using the fixed up h_size value
+to size the log recovery buffer, which can lead to an out of bounds
+access when the incorrect h_size does not come from the old mkfs
+tool, but a fuzzer.
+
+Fix this by open coding xlog_logrec_hblks and taking the fixed h_size
+into account for this calculation.
+
+Fixes: 0c771b99d6c9 ("xfs: clean up calculation of LR header blocks")
+Reported-by: Sam Sun <samsun1006219@gmail.com>
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Brian Foster <bfoster@redhat.com>
+Reviewed-by: "Darrick J. Wong" <djwong@kernel.org>
+Signed-off-by: Chandan Babu R <chandanbabu@kernel.org>
+Signed-off-by: Kevin Berry <kpberry@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_log_recover.c |   20 ++++++++++++++------
+ 1 file changed, 14 insertions(+), 6 deletions(-)
+
+--- a/fs/xfs/xfs_log_recover.c
++++ b/fs/xfs/xfs_log_recover.c
+@@ -2960,7 +2960,7 @@ xlog_do_recovery_pass(
+ 	int			error = 0, h_size, h_len;
+ 	int			error2 = 0;
+ 	int			bblks, split_bblks;
+-	int			hblks, split_hblks, wrapped_hblks;
++	int			hblks = 1, split_hblks, wrapped_hblks;
+ 	int			i;
+ 	struct hlist_head	rhash[XLOG_RHASH_SIZE];
+ 	LIST_HEAD		(buffer_list);
+@@ -3016,14 +3016,22 @@ xlog_do_recovery_pass(
+ 		if (error)
+ 			goto bread_err1;
+ 
+-		hblks = xlog_logrec_hblks(log, rhead);
+-		if (hblks != 1) {
+-			kmem_free(hbp);
+-			hbp = xlog_alloc_buffer(log, hblks);
++		/*
++		 * This open codes xlog_logrec_hblks so that we can reuse the
++		 * fixed up h_size value calculated above.  Without that we'd
++		 * still allocate the buffer based on the incorrect on-disk
++		 * size.
++		 */
++		if (h_size > XLOG_HEADER_CYCLE_SIZE &&
++		    (rhead->h_version & cpu_to_be32(XLOG_VERSION_2))) {
++			hblks = DIV_ROUND_UP(h_size, XLOG_HEADER_CYCLE_SIZE);
++			if (hblks > 1) {
++				kmem_free(hbp);
++				hbp = xlog_alloc_buffer(log, hblks);
++			}
+ 		}
+ 	} else {
+ 		ASSERT(log->l_sectBBsize == 1);
+-		hblks = 1;
+ 		hbp = xlog_alloc_buffer(log, 1);
+ 		h_size = XLOG_BIG_RECORD_BSIZE;
+ 	}