6.1-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Mon, 12 Aug 2024 14:42:21 +0000 (16:42 +0200)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Mon, 12 Aug 2024 14:42:21 +0000 (16:42 +0200)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 12 Aug 2024 14:42:21 +0000 (16:42 +0200)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 12 Aug 2024 14:42:21 +0000 (16:42 +0200)
diff --git a/queue-6.1/block-call-.limit_depth-after-.hctx-has-been-set.patch b/queue-6.1/block-call-.limit_depth-after-.hctx-has-been-set.patch

new file mode 100644 (file)

index 0000000..e36c960
--- /dev/null
+++ b/queue-6.1/block-call-.limit_depth-after-.hctx-has-been-set.patch
@@ -0,0 +1,55 @@
+From 6259151c04d4e0085e00d2dcb471ebdd1778e72e Mon Sep 17 00:00:00 2001
+From: Bart Van Assche <bvanassche@acm.org>
+Date: Thu, 9 May 2024 10:01:48 -0700
+Subject: block: Call .limit_depth() after .hctx has been set
+
+From: Bart Van Assche <bvanassche@acm.org>
+
+commit 6259151c04d4e0085e00d2dcb471ebdd1778e72e upstream.
+
+Call .limit_depth() after data->hctx has been set such that data->hctx can
+be used in .limit_depth() implementations.
+
+Cc: Christoph Hellwig <hch@lst.de>
+Cc: Damien Le Moal <dlemoal@kernel.org>
+Cc: Zhiguo Niu <zhiguo.niu@unisoc.com>
+Fixes: 07757588e507 ("block/mq-deadline: Reserve 25% of scheduler tags for synchronous requests")
+Signed-off-by: Bart Van Assche <bvanassche@acm.org>
+Tested-by: Zhiguo Niu <zhiguo.niu@unisoc.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Link: https://lore.kernel.org/r/20240509170149.7639-2-bvanassche@acm.org
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ block/blk-mq.c |    6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+--- a/block/blk-mq.c
++++ b/block/blk-mq.c
+@@ -439,6 +439,7 @@ __blk_mq_alloc_requests_batch(struct blk
+ 
+ static struct request *__blk_mq_alloc_requests(struct blk_mq_alloc_data *data)
+ {
++      void (*limit_depth)(blk_opf_t, struct blk_mq_alloc_data *) = NULL;
+       struct request_queue *q = data->q;
+       u64 alloc_time_ns = 0;
+       struct request *rq;
+@@ -465,7 +466,7 @@ static struct request *__blk_mq_alloc_re
+                   !blk_op_is_passthrough(data->cmd_flags) &&
+                   e->type->ops.limit_depth &&
+                   !(data->flags & BLK_MQ_REQ_RESERVED))
+-                      e->type->ops.limit_depth(data->cmd_flags, data);
++                      limit_depth = e->type->ops.limit_depth;
+       }
+ 
+ retry:
+@@ -477,6 +478,9 @@ retry:
+       if (data->flags & BLK_MQ_REQ_RESERVED)
+               data->rq_flags |= RQF_RESV;
+ 
++      if (limit_depth)
++              limit_depth(data->cmd_flags, data);
++
+       /*
+        * Try batched alloc if we want more than 1 tag.
+        */
diff --git a/queue-6.1/block-mq-deadline-fix-the-tag-reservation-code.patch b/queue-6.1/block-mq-deadline-fix-the-tag-reservation-code.patch

new file mode 100644 (file)

index 0000000..7c3faf2
--- /dev/null
+++ b/queue-6.1/block-mq-deadline-fix-the-tag-reservation-code.patch
@@ -0,0 +1,78 @@
+From 39823b47bbd40502632ffba90ebb34fff7c8b5e8 Mon Sep 17 00:00:00 2001
+From: Bart Van Assche <bvanassche@acm.org>
+Date: Thu, 9 May 2024 10:01:49 -0700
+Subject: block/mq-deadline: Fix the tag reservation code
+
+From: Bart Van Assche <bvanassche@acm.org>
+
+commit 39823b47bbd40502632ffba90ebb34fff7c8b5e8 upstream.
+
+The current tag reservation code is based on a misunderstanding of the
+meaning of data->shallow_depth. Fix the tag reservation code as follows:
+* By default, do not reserve any tags for synchronous requests because
+  for certain use cases reserving tags reduces performance. See also
+  Harshit Mogalapalli, [bug-report] Performance regression with fio
+  sequential-write on a multipath setup, 2024-03-07
+  (https://lore.kernel.org/linux-block/5ce2ae5d-61e2-4ede-ad55-551112602401@oracle.com/)
+* Reduce min_shallow_depth to one because min_shallow_depth must be less
+  than or equal any shallow_depth value.
+* Scale dd->async_depth from the range [1, nr_requests] to [1,
+  bits_per_sbitmap_word].
+
+Cc: Christoph Hellwig <hch@lst.de>
+Cc: Damien Le Moal <dlemoal@kernel.org>
+Cc: Zhiguo Niu <zhiguo.niu@unisoc.com>
+Fixes: 07757588e507 ("block/mq-deadline: Reserve 25% of scheduler tags for synchronous requests")
+Signed-off-by: Bart Van Assche <bvanassche@acm.org>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Link: https://lore.kernel.org/r/20240509170149.7639-3-bvanassche@acm.org
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ block/mq-deadline.c |   20 +++++++++++++++++---
+ 1 file changed, 17 insertions(+), 3 deletions(-)
+
+--- a/block/mq-deadline.c
++++ b/block/mq-deadline.c
+@@ -598,6 +598,20 @@ unlock:
+ }
+ 
+ /*
++ * 'depth' is a number in the range 1..INT_MAX representing a number of
++ * requests. Scale it with a factor (1 << bt->sb.shift) / q->nr_requests since
++ * 1..(1 << bt->sb.shift) is the range expected by sbitmap_get_shallow().
++ * Values larger than q->nr_requests have the same effect as q->nr_requests.
++ */
++static int dd_to_word_depth(struct blk_mq_hw_ctx *hctx, unsigned int qdepth)
++{
++      struct sbitmap_queue *bt = &hctx->sched_tags->bitmap_tags;
++      const unsigned int nrr = hctx->queue->nr_requests;
++
++      return ((qdepth << bt->sb.shift) + nrr - 1) / nrr;
++}
++
++/*
+  * Called by __blk_mq_alloc_request(). The shallow_depth value set by this
+  * function is used by __blk_mq_get_tag().
+  */
+@@ -613,7 +627,7 @@ static void dd_limit_depth(blk_opf_t opf
+        * Throttle asynchronous requests and writes such that these requests
+        * do not block the allocation of synchronous requests.
+        */
+-      data->shallow_depth = dd->async_depth;
++      data->shallow_depth = dd_to_word_depth(data->hctx, dd->async_depth);
+ }
+ 
+ /* Called by blk_mq_update_nr_requests(). */
+@@ -623,9 +637,9 @@ static void dd_depth_updated(struct blk_
+       struct deadline_data *dd = q->elevator->elevator_data;
+       struct blk_mq_tags *tags = hctx->sched_tags;
+ 
+-      dd->async_depth = max(1UL, 3 * q->nr_requests / 4);
++      dd->async_depth = q->nr_requests;
+ 
+-      sbitmap_queue_min_shallow_depth(&tags->bitmap_tags, dd->async_depth);
++      sbitmap_queue_min_shallow_depth(&tags->bitmap_tags, 1);
+ }
+ 
+ /* Called by blk_mq_init_hctx() and blk_mq_init_sched(). */
diff --git a/queue-6.1/mm-hugetlb-fix-potential-race-in-__update_and_free_hugetlb_folio.patch b/queue-6.1/mm-hugetlb-fix-potential-race-in-__update_and_free_hugetlb_folio.patch

new file mode 100644 (file)

index 0000000..b0532ef
--- /dev/null
+++ b/queue-6.1/mm-hugetlb-fix-potential-race-in-__update_and_free_hugetlb_folio.patch
@@ -0,0 +1,75 @@
+From 5596d9e8b553dacb0ac34bcf873cbbfb16c3ba3e Mon Sep 17 00:00:00 2001
+From: Miaohe Lin <linmiaohe@huawei.com>
+Date: Mon, 8 Jul 2024 10:51:27 +0800
+Subject: mm/hugetlb: fix potential race in __update_and_free_hugetlb_folio()
+
+From: Miaohe Lin <linmiaohe@huawei.com>
+
+commit 5596d9e8b553dacb0ac34bcf873cbbfb16c3ba3e upstream.
+
+There is a potential race between __update_and_free_hugetlb_folio() and
+try_memory_failure_hugetlb():
+
+ CPU1                                  CPU2
+ __update_and_free_hugetlb_folio       try_memory_failure_hugetlb
+                                        folio_test_hugetlb
+                                         -- It's still hugetlb folio.
+  folio_clear_hugetlb_hwpoison
+                                         spin_lock_irq(&hugetlb_lock);
+                                          __get_huge_page_for_hwpoison
+                                           folio_set_hugetlb_hwpoison
+                                         spin_unlock_irq(&hugetlb_lock);
+  spin_lock_irq(&hugetlb_lock);
+  __folio_clear_hugetlb(folio);
+   -- Hugetlb flag is cleared but too late.
+  spin_unlock_irq(&hugetlb_lock);
+
+When the above race occurs, raw error page info will be leaked.  Even
+worse, raw error pages won't have hwpoisoned flag set and hit
+pcplists/buddy.  Fix this issue by deferring
+folio_clear_hugetlb_hwpoison() until __folio_clear_hugetlb() is done.  So
+all raw error pages will have hwpoisoned flag set.
+
+Link: https://lkml.kernel.org/r/20240708025127.107713-1-linmiaohe@huawei.com
+Fixes: 32c877191e02 ("hugetlb: do not clear hugetlb dtor until allocating vmemmap")
+Signed-off-by: Miaohe Lin <linmiaohe@huawei.com>
+Acked-by: Muchun Song <muchun.song@linux.dev>
+Reviewed-by: Oscar Salvador <osalvador@suse.de>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Miaohe Lin <linmiaohe@huawei.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/hugetlb.c |   14 +++++++-------
+ 1 file changed, 7 insertions(+), 7 deletions(-)
+
+--- a/mm/hugetlb.c
++++ b/mm/hugetlb.c
+@@ -1786,13 +1786,6 @@ static void __update_and_free_page(struc
+       }
+ 
+       /*
+-       * Move PageHWPoison flag from head page to the raw error pages,
+-       * which makes any healthy subpages reusable.
+-       */
+-      if (unlikely(PageHWPoison(page)))
+-              hugetlb_clear_page_hwpoison(page);
+-
+-      /*
+        * If vmemmap pages were allocated above, then we need to clear the
+        * hugetlb destructor under the hugetlb lock.
+        */
+@@ -1802,6 +1795,13 @@ static void __update_and_free_page(struc
+               spin_unlock_irq(&hugetlb_lock);
+       }
+ 
++      /*
++       * Move PageHWPoison flag from head page to the raw error pages,
++       * which makes any healthy subpages reusable.
++       */
++      if (unlikely(PageHWPoison(page)))
++              hugetlb_clear_page_hwpoison(page);
++
+       for (i = 0; i < pages_per_huge_page(h); i++) {
+               subpage = nth_page(page, i);
+               subpage->flags &= ~(1 << PG_locked | 1 << PG_error |
diff --git a/queue-6.1/series b/queue-6.1/series

index e335a9f3416f178b9d575637a4e1dc0d8356a744..4172ac6e7e4aa095309000a19194d64de5547033 100644 (file)
--- a/queue-6.1/series
+++ b/queue-6.1/series
@@ -138,3 +138,7 @@ mm-huge_memory-use-config_64bit-to-relax-huge-page-alignment-on-32-bit-machines.
  btrfs-fix-corruption-after-buffer-fault-in-during-direct-io-append-write.patch
  ipv6-fix-source-address-selection-with-route-leak.patch
  tools-headers-arm64-sync-arm64-s-cputype.h-with-the-kernel-sources.patch
+mm-hugetlb-fix-potential-race-in-__update_and_free_hugetlb_folio.patch
+block-call-.limit_depth-after-.hctx-has-been-set.patch
+block-mq-deadline-fix-the-tag-reservation-code.patch
+xfs-fix-log-recovery-buffer-allocation-for-the-legacy-h_size-fixup.patch
diff --git a/queue-6.1/xfs-fix-log-recovery-buffer-allocation-for-the-legacy-h_size-fixup.patch b/queue-6.1/xfs-fix-log-recovery-buffer-allocation-for-the-legacy-h_size-fixup.patch

new file mode 100644 (file)

index 0000000..cbac612
--- /dev/null
+++ b/queue-6.1/xfs-fix-log-recovery-buffer-allocation-for-the-legacy-h_size-fixup.patch
@@ -0,0 +1,72 @@
+From 45cf976008ddef4a9c9a30310c9b4fb2a9a6602a Mon Sep 17 00:00:00 2001
+From: Christoph Hellwig <hch@lst.de>
+Date: Tue, 30 Apr 2024 06:07:55 +0200
+Subject: xfs: fix log recovery buffer allocation for the legacy h_size fixup
+
+From: Christoph Hellwig <hch@lst.de>
+
+commit 45cf976008ddef4a9c9a30310c9b4fb2a9a6602a upstream.
+
+Commit a70f9fe52daa ("xfs: detect and handle invalid iclog size set by
+mkfs") added a fixup for incorrect h_size values used for the initial
+umount record in old xfsprogs versions.  Later commit 0c771b99d6c9
+("xfs: clean up calculation of LR header blocks") cleaned up the log
+reover buffer calculation, but stoped using the fixed up h_size value
+to size the log recovery buffer, which can lead to an out of bounds
+access when the incorrect h_size does not come from the old mkfs
+tool, but a fuzzer.
+
+Fix this by open coding xlog_logrec_hblks and taking the fixed h_size
+into account for this calculation.
+
+Fixes: 0c771b99d6c9 ("xfs: clean up calculation of LR header blocks")
+Reported-by: Sam Sun <samsun1006219@gmail.com>
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Brian Foster <bfoster@redhat.com>
+Reviewed-by: "Darrick J. Wong" <djwong@kernel.org>
+Signed-off-by: Chandan Babu R <chandanbabu@kernel.org>
+Signed-off-by: Kevin Berry <kpberry@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_log_recover.c |   20 ++++++++++++++------
+ 1 file changed, 14 insertions(+), 6 deletions(-)
+
+--- a/fs/xfs/xfs_log_recover.c
++++ b/fs/xfs/xfs_log_recover.c
+@@ -2960,7 +2960,7 @@ xlog_do_recovery_pass(
+       int                     error = 0, h_size, h_len;
+       int                     error2 = 0;
+       int                     bblks, split_bblks;
+-      int                     hblks, split_hblks, wrapped_hblks;
++      int                     hblks = 1, split_hblks, wrapped_hblks;
+       int                     i;
+       struct hlist_head       rhash[XLOG_RHASH_SIZE];
+       LIST_HEAD               (buffer_list);
+@@ -3016,14 +3016,22 @@ xlog_do_recovery_pass(
+               if (error)
+                       goto bread_err1;
+ 
+-              hblks = xlog_logrec_hblks(log, rhead);
+-              if (hblks != 1) {
+-                      kmem_free(hbp);
+-                      hbp = xlog_alloc_buffer(log, hblks);
++              /*
++               * This open codes xlog_logrec_hblks so that we can reuse the
++               * fixed up h_size value calculated above.  Without that we'd
++               * still allocate the buffer based on the incorrect on-disk
++               * size.
++               */
++              if (h_size > XLOG_HEADER_CYCLE_SIZE &&
++                  (rhead->h_version & cpu_to_be32(XLOG_VERSION_2))) {
++                      hblks = DIV_ROUND_UP(h_size, XLOG_HEADER_CYCLE_SIZE);
++                      if (hblks > 1) {
++                              kmem_free(hbp);
++                              hbp = xlog_alloc_buffer(log, hblks);
++                      }
+               }
+       } else {
+               ASSERT(log->l_sectBBsize == 1);
+-              hblks = 1;
+               hbp = xlog_alloc_buffer(log, 1);
+               h_size = XLOG_BIG_RECORD_BSIZE;
+       }
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Mon, 12 Aug 2024 14:42:21 +0000 (16:42 +0200)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Mon, 12 Aug 2024 14:42:21 +0000 (16:42 +0200)
queue-6.1/block-call-.limit_depth-after-.hctx-has-been-set.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/block-mq-deadline-fix-the-tag-reservation-code.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/mm-hugetlb-fix-potential-race-in-__update_and_free_hugetlb_folio.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/series		patch \| blob \| blame \| history
queue-6.1/xfs-fix-log-recovery-buffer-allocation-for-the-legacy-h_size-fixup.patch	[new file with mode: 0644]	patch \| blob