From 94ac92fb7c1d554405250226651a7aa1c361f55a Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Sun, 9 May 2021 16:03:30 +0200
Subject: [PATCH] 5.12-stable patches

added patches:
	dm-raid-fix-inconclusive-reshape-layout-on-fast-raid4-5-6-table-reload-sequences.patch
	exfat-fix-erroneous-discard-when-clear-cluster-bit.patch
	fuse-fix-write-deadlock.patch
	md-raid1-properly-indicate-failure-when-ending-a-failed-write-request.patch
	mm-page_alloc-ignore-init_on_free-1-for-debug_pagealloc-1.patch
	rcu-nocb-fix-missed-nocb_timer-requeue.patch
	sfc-adjust-efx-xdp_tx_queue_count-with-the-real-number-of-initialized-queues.patch
	sfc-farch-fix-tx-queue-lookup-in-tx-event-handling.patch
	sfc-farch-fix-tx-queue-lookup-in-tx-flush-done-handling.patch
---
 ...ast-raid4-5-6-table-reload-sequences.patch | 134 +++++++++++++++
 ...neous-discard-when-clear-cluster-bit.patch |  60 +++++++
 queue-5.12/fuse-fix-write-deadlock.patch      | 162 ++++++++++++++++++
 ...e-when-ending-a-failed-write-request.patch |  35 ++++
 ...init_on_free-1-for-debug_pagealloc-1.patch | 103 +++++++++++
 ...u-nocb-fix-missed-nocb_timer-requeue.patch | 122 +++++++++++++
 queue-5.12/series                             |   9 +
 ...he-real-number-of-initialized-queues.patch |  62 +++++++
 ...tx-queue-lookup-in-tx-event-handling.patch |  43 +++++
 ...eue-lookup-in-tx-flush-done-handling.patch |  46 +++++
 10 files changed, 776 insertions(+)
 create mode 100644 queue-5.12/dm-raid-fix-inconclusive-reshape-layout-on-fast-raid4-5-6-table-reload-sequences.patch
 create mode 100644 queue-5.12/exfat-fix-erroneous-discard-when-clear-cluster-bit.patch
 create mode 100644 queue-5.12/fuse-fix-write-deadlock.patch
 create mode 100644 queue-5.12/md-raid1-properly-indicate-failure-when-ending-a-failed-write-request.patch
 create mode 100644 queue-5.12/mm-page_alloc-ignore-init_on_free-1-for-debug_pagealloc-1.patch
 create mode 100644 queue-5.12/rcu-nocb-fix-missed-nocb_timer-requeue.patch
 create mode 100644 queue-5.12/sfc-adjust-efx-xdp_tx_queue_count-with-the-real-number-of-initialized-queues.patch
 create mode 100644 queue-5.12/sfc-farch-fix-tx-queue-lookup-in-tx-event-handling.patch
 create mode 100644 queue-5.12/sfc-farch-fix-tx-queue-lookup-in-tx-flush-done-handling.patch

diff --git a/queue-5.12/dm-raid-fix-inconclusive-reshape-layout-on-fast-raid4-5-6-table-reload-sequences.patch b/queue-5.12/dm-raid-fix-inconclusive-reshape-layout-on-fast-raid4-5-6-table-reload-sequences.patch
new file mode 100644
index 00000000000..0c1c6fd8216
--- /dev/null
+++ b/queue-5.12/dm-raid-fix-inconclusive-reshape-layout-on-fast-raid4-5-6-table-reload-sequences.patch
@@ -0,0 +1,134 @@
+From f99a8e4373eeacb279bc9696937a55adbff7a28a Mon Sep 17 00:00:00 2001
+From: Heinz Mauelshagen <heinzm@redhat.com>
+Date: Wed, 21 Apr 2021 23:32:36 +0200
+Subject: dm raid: fix inconclusive reshape layout on fast raid4/5/6 table reload sequences
+
+From: Heinz Mauelshagen <heinzm@redhat.com>
+
+commit f99a8e4373eeacb279bc9696937a55adbff7a28a upstream.
+
+If fast table reloads occur during an ongoing reshape of raid4/5/6
+devices the target may race reading a superblock vs the the MD resync
+thread; causing an inconclusive reshape state to be read in its
+constructor.
+
+lvm2 test lvconvert-raid-reshape-stripes-load-reload.sh can cause
+BUG_ON() to trigger in md_run(), e.g.:
+"kernel BUG at drivers/md/raid5.c:7567!".
+
+Scenario triggering the bug:
+
+1. the MD sync thread calls end_reshape() from raid5_sync_request()
+   when done reshaping. However end_reshape() _only_ updates the
+   reshape position to MaxSector keeping the changed layout
+   configuration though (i.e. any delta disks, chunk sector or RAID
+   algorithm changes). That inconclusive configuration is stored in
+   the superblock.
+
+2. dm-raid constructs a mapping, loading named inconsistent superblock
+   as of step 1 before step 3 is able to finish resetting the reshape
+   state completely, and calls md_run() which leads to mentioned bug
+   in raid5.c.
+
+3. the MD RAID personality's finish_reshape() is called; which resets
+   the reshape information on chunk sectors, delta disks, etc. This
+   explains why the bug is rarely seen on multi-core machines, as MD's
+   finish_reshape() superblock update races with the dm-raid
+   constructor's superblock load in step 2.
+
+Fix identifies inconclusive superblock content in the dm-raid
+constructor and resets it before calling md_run(), factoring out
+identifying checks into rs_is_layout_change() to share in existing
+rs_reshape_requested() and new rs_reset_inclonclusive_reshape(). Also
+enhance a comment and remove an empty line.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Heinz Mauelshagen <heinzm@redhat.com>
+Signed-off-by: Mike Snitzer <snitzer@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/md/dm-raid.c |   34 ++++++++++++++++++++++++++++------
+ 1 file changed, 28 insertions(+), 6 deletions(-)
+
+--- a/drivers/md/dm-raid.c
++++ b/drivers/md/dm-raid.c
+@@ -1868,6 +1868,14 @@ static bool rs_takeover_requested(struct
+ 	return rs->md.new_level != rs->md.level;
+ }
+ 
++/* True if layout is set to reshape. */
++static bool rs_is_layout_change(struct raid_set *rs, bool use_mddev)
++{
++	return (use_mddev ? rs->md.delta_disks : rs->delta_disks) ||
++	       rs->md.new_layout != rs->md.layout ||
++	       rs->md.new_chunk_sectors != rs->md.chunk_sectors;
++}
++
+ /* True if @rs is requested to reshape by ctr */
+ static bool rs_reshape_requested(struct raid_set *rs)
+ {
+@@ -1880,9 +1888,7 @@ static bool rs_reshape_requested(struct
+ 	if (rs_is_raid0(rs))
+ 		return false;
+ 
+-	change = mddev->new_layout != mddev->layout ||
+-		 mddev->new_chunk_sectors != mddev->chunk_sectors ||
+-		 rs->delta_disks;
++	change = rs_is_layout_change(rs, false);
+ 
+ 	/* Historical case to support raid1 reshape without delta disks */
+ 	if (rs_is_raid1(rs)) {
+@@ -2817,7 +2823,7 @@ static sector_t _get_reshape_sectors(str
+ }
+ 
+ /*
+- *
++ * Reshape:
+  * - change raid layout
+  * - change chunk size
+  * - add disks
+@@ -2927,6 +2933,20 @@ static int rs_setup_reshape(struct raid_
+ }
+ 
+ /*
++ * If the md resync thread has updated superblock with max reshape position
++ * at the end of a reshape but not (yet) reset the layout configuration
++ * changes -> reset the latter.
++ */
++static void rs_reset_inconclusive_reshape(struct raid_set *rs)
++{
++	if (!rs_is_reshaping(rs) && rs_is_layout_change(rs, true)) {
++		rs_set_cur(rs);
++		rs->md.delta_disks = 0;
++		rs->md.reshape_backwards = 0;
++	}
++}
++
++/*
+  * Enable/disable discard support on RAID set depending on
+  * RAID level and discard properties of underlying RAID members.
+  */
+@@ -3212,11 +3232,14 @@ size_check:
+ 	if (r)
+ 		goto bad;
+ 
++	/* Catch any inconclusive reshape superblock content. */
++	rs_reset_inconclusive_reshape(rs);
++
+ 	/* Start raid set read-only and assumed clean to change in raid_resume() */
+ 	rs->md.ro = 1;
+ 	rs->md.in_sync = 1;
+ 
+-	/* Keep array frozen */
++	/* Keep array frozen until resume. */
+ 	set_bit(MD_RECOVERY_FROZEN, &rs->md.recovery);
+ 
+ 	/* Has to be held on running the array */
+@@ -3230,7 +3253,6 @@ size_check:
+ 	}
+ 
+ 	r = md_start(&rs->md);
+-
+ 	if (r) {
+ 		ti->error = "Failed to start raid array";
+ 		mddev_unlock(&rs->md);
diff --git a/queue-5.12/exfat-fix-erroneous-discard-when-clear-cluster-bit.patch b/queue-5.12/exfat-fix-erroneous-discard-when-clear-cluster-bit.patch
new file mode 100644
index 00000000000..9da46006b2d
--- /dev/null
+++ b/queue-5.12/exfat-fix-erroneous-discard-when-clear-cluster-bit.patch
@@ -0,0 +1,60 @@
+From 77edfc6e51055b61cae2f54c8e6c3bb7c762e4fe Mon Sep 17 00:00:00 2001
+From: Hyeongseok Kim <hyeongseok@gmail.com>
+Date: Thu, 4 Mar 2021 09:15:34 +0900
+Subject: exfat: fix erroneous discard when clear cluster bit
+
+From: Hyeongseok Kim <hyeongseok@gmail.com>
+
+commit 77edfc6e51055b61cae2f54c8e6c3bb7c762e4fe upstream.
+
+If mounted with discard option, exFAT issues discard command when clear
+cluster bit to remove file. But the input parameter of cluster-to-sector
+calculation is abnormally added by reserved cluster size which is 2,
+leading to discard unrelated sectors included in target+2 cluster.
+With fixing this, remove the wrong comments in set/clear/find bitmap
+functions.
+
+Fixes: 1e49a94cf707 ("exfat: add bitmap operations")
+Cc: stable@vger.kernel.org # v5.7+
+Signed-off-by: Hyeongseok Kim <hyeongseok@gmail.com>
+Acked-by: Sungjong Seo <sj1557.seo@samsung.com>
+Signed-off-by: Namjae Jeon <namjae.jeon@samsung.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/exfat/balloc.c |   11 +----------
+ 1 file changed, 1 insertion(+), 10 deletions(-)
+
+--- a/fs/exfat/balloc.c
++++ b/fs/exfat/balloc.c
+@@ -141,10 +141,6 @@ void exfat_free_bitmap(struct exfat_sb_i
+ 	kfree(sbi->vol_amap);
+ }
+ 
+-/*
+- * If the value of "clu" is 0, it means cluster 2 which is the first cluster of
+- * the cluster heap.
+- */
+ int exfat_set_bitmap(struct inode *inode, unsigned int clu)
+ {
+ 	int i, b;
+@@ -162,10 +158,6 @@ int exfat_set_bitmap(struct inode *inode
+ 	return 0;
+ }
+ 
+-/*
+- * If the value of "clu" is 0, it means cluster 2 which is the first cluster of
+- * the cluster heap.
+- */
+ void exfat_clear_bitmap(struct inode *inode, unsigned int clu, bool sync)
+ {
+ 	int i, b;
+@@ -186,8 +178,7 @@ void exfat_clear_bitmap(struct inode *in
+ 		int ret_discard;
+ 
+ 		ret_discard = sb_issue_discard(sb,
+-			exfat_cluster_to_sector(sbi, clu +
+-						EXFAT_RESERVED_CLUSTERS),
++			exfat_cluster_to_sector(sbi, clu),
+ 			(1 << sbi->sect_per_clus_bits), GFP_NOFS, 0);
+ 
+ 		if (ret_discard == -EOPNOTSUPP) {
diff --git a/queue-5.12/fuse-fix-write-deadlock.patch b/queue-5.12/fuse-fix-write-deadlock.patch
new file mode 100644
index 00000000000..da79b315964
--- /dev/null
+++ b/queue-5.12/fuse-fix-write-deadlock.patch
@@ -0,0 +1,162 @@
+From 4f06dd92b5d0a6f8eec6a34b8d6ef3e1f4ac1e10 Mon Sep 17 00:00:00 2001
+From: Vivek Goyal <vgoyal@redhat.com>
+Date: Wed, 21 Oct 2020 16:12:49 -0400
+Subject: fuse: fix write deadlock
+
+From: Vivek Goyal <vgoyal@redhat.com>
+
+commit 4f06dd92b5d0a6f8eec6a34b8d6ef3e1f4ac1e10 upstream.
+
+There are two modes for write(2) and friends in fuse:
+
+a) write through (update page cache, send sync WRITE request to userspace)
+
+b) buffered write (update page cache, async writeout later)
+
+The write through method kept all the page cache pages locked that were
+used for the request.  Keeping more than one page locked is deadlock prone
+and Qian Cai demonstrated this with trinity fuzzing.
+
+The reason for keeping the pages locked is that concurrent mapped reads
+shouldn't try to pull possibly stale data into the page cache.
+
+For full page writes, the easy way to fix this is to make the cached page
+be the authoritative source by marking the page PG_uptodate immediately.
+After this the page can be safely unlocked, since mapped/cached reads will
+take the written data from the cache.
+
+Concurrent mapped writes will now cause data in the original WRITE request
+to be updated; this however doesn't cause any data inconsistency and this
+scenario should be exceedingly rare anyway.
+
+If the WRITE request returns with an error in the above case, currently the
+page is not marked uptodate; this means that a concurrent read will always
+read consistent data.  After this patch the page is uptodate between
+writing to the cache and receiving the error: there's window where a cached
+read will read the wrong data.  While theoretically this could be a
+regression, it is unlikely to be one in practice, since this is normal for
+buffered writes.
+
+In case of a partial page write to an already uptodate page the locking is
+also unnecessary, with the above caveats.
+
+Partial write of a not uptodate page still needs to be handled.  One way
+would be to read the complete page before doing the write.  This is not
+possible, since it might break filesystems that don't expect any READ
+requests when the file was opened O_WRONLY.
+
+The other solution is to serialize the synchronous write with reads from
+the partial pages.  The easiest way to do this is to keep the partial pages
+locked.  The problem is that a write() may involve two such pages (one head
+and one tail).  This patch fixes it by only locking the partial tail page.
+If there's a partial head page as well, then split that off as a separate
+WRITE request.
+
+Reported-by: Qian Cai <cai@lca.pw>
+Link: https://lore.kernel.org/linux-fsdevel/4794a3fa3742a5e84fb0f934944204b55730829b.camel@lca.pw/
+Fixes: ea9b9907b82a ("fuse: implement perform_write")
+Cc: <stable@vger.kernel.org> # v2.6.26
+Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
+Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/fuse/file.c   |   41 +++++++++++++++++++++++++++++------------
+ fs/fuse/fuse_i.h |    1 +
+ 2 files changed, 30 insertions(+), 12 deletions(-)
+
+--- a/fs/fuse/file.c
++++ b/fs/fuse/file.c
+@@ -1099,6 +1099,7 @@ static ssize_t fuse_send_write_pages(str
+ 	struct fuse_file *ff = file->private_data;
+ 	struct fuse_mount *fm = ff->fm;
+ 	unsigned int offset, i;
++	bool short_write;
+ 	int err;
+ 
+ 	for (i = 0; i < ap->num_pages; i++)
+@@ -1113,32 +1114,38 @@ static ssize_t fuse_send_write_pages(str
+ 	if (!err && ia->write.out.size > count)
+ 		err = -EIO;
+ 
++	short_write = ia->write.out.size < count;
+ 	offset = ap->descs[0].offset;
+ 	count = ia->write.out.size;
+ 	for (i = 0; i < ap->num_pages; i++) {
+ 		struct page *page = ap->pages[i];
+ 
+-		if (!err && !offset && count >= PAGE_SIZE)
+-			SetPageUptodate(page);
+-
+-		if (count > PAGE_SIZE - offset)
+-			count -= PAGE_SIZE - offset;
+-		else
+-			count = 0;
+-		offset = 0;
+-
+-		unlock_page(page);
++		if (err) {
++			ClearPageUptodate(page);
++		} else {
++			if (count >= PAGE_SIZE - offset)
++				count -= PAGE_SIZE - offset;
++			else {
++				if (short_write)
++					ClearPageUptodate(page);
++				count = 0;
++			}
++			offset = 0;
++		}
++		if (ia->write.page_locked && (i == ap->num_pages - 1))
++			unlock_page(page);
+ 		put_page(page);
+ 	}
+ 
+ 	return err;
+ }
+ 
+-static ssize_t fuse_fill_write_pages(struct fuse_args_pages *ap,
++static ssize_t fuse_fill_write_pages(struct fuse_io_args *ia,
+ 				     struct address_space *mapping,
+ 				     struct iov_iter *ii, loff_t pos,
+ 				     unsigned int max_pages)
+ {
++	struct fuse_args_pages *ap = &ia->ap;
+ 	struct fuse_conn *fc = get_fuse_conn(mapping->host);
+ 	unsigned offset = pos & (PAGE_SIZE - 1);
+ 	size_t count = 0;
+@@ -1191,6 +1198,16 @@ static ssize_t fuse_fill_write_pages(str
+ 		if (offset == PAGE_SIZE)
+ 			offset = 0;
+ 
++		/* If we copied full page, mark it uptodate */
++		if (tmp == PAGE_SIZE)
++			SetPageUptodate(page);
++
++		if (PageUptodate(page)) {
++			unlock_page(page);
++		} else {
++			ia->write.page_locked = true;
++			break;
++		}
+ 		if (!fc->big_writes)
+ 			break;
+ 	} while (iov_iter_count(ii) && count < fc->max_write &&
+@@ -1234,7 +1251,7 @@ static ssize_t fuse_perform_write(struct
+ 			break;
+ 		}
+ 
+-		count = fuse_fill_write_pages(ap, mapping, ii, pos, nr_pages);
++		count = fuse_fill_write_pages(&ia, mapping, ii, pos, nr_pages);
+ 		if (count <= 0) {
+ 			err = count;
+ 		} else {
+--- a/fs/fuse/fuse_i.h
++++ b/fs/fuse/fuse_i.h
+@@ -912,6 +912,7 @@ struct fuse_io_args {
+ 		struct {
+ 			struct fuse_write_in in;
+ 			struct fuse_write_out out;
++			bool page_locked;
+ 		} write;
+ 	};
+ 	struct fuse_args_pages ap;
diff --git a/queue-5.12/md-raid1-properly-indicate-failure-when-ending-a-failed-write-request.patch b/queue-5.12/md-raid1-properly-indicate-failure-when-ending-a-failed-write-request.patch
new file mode 100644
index 00000000000..acfa801c7e9
--- /dev/null
+++ b/queue-5.12/md-raid1-properly-indicate-failure-when-ending-a-failed-write-request.patch
@@ -0,0 +1,35 @@
+From 2417b9869b81882ab90fd5ed1081a1cb2d4db1dd Mon Sep 17 00:00:00 2001
+From: Paul Clements <paul.clements@us.sios.com>
+Date: Thu, 15 Apr 2021 17:17:57 -0400
+Subject: md/raid1: properly indicate failure when ending a failed write request
+
+From: Paul Clements <paul.clements@us.sios.com>
+
+commit 2417b9869b81882ab90fd5ed1081a1cb2d4db1dd upstream.
+
+This patch addresses a data corruption bug in raid1 arrays using bitmaps.
+Without this fix, the bitmap bits for the failed I/O end up being cleared.
+
+Since we are in the failure leg of raid1_end_write_request, the request
+either needs to be retried (R1BIO_WriteError) or failed (R1BIO_Degraded).
+
+Fixes: eeba6809d8d5 ("md/raid1: end bio when the device faulty")
+Cc: stable@vger.kernel.org # v5.2+
+Signed-off-by: Paul Clements <paul.clements@us.sios.com>
+Signed-off-by: Song Liu <song@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/md/raid1.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/drivers/md/raid1.c
++++ b/drivers/md/raid1.c
+@@ -478,6 +478,8 @@ static void raid1_end_write_request(stru
+ 		if (!test_bit(Faulty, &rdev->flags))
+ 			set_bit(R1BIO_WriteError, &r1_bio->state);
+ 		else {
++			/* Fail the request */
++			set_bit(R1BIO_Degraded, &r1_bio->state);
+ 			/* Finished with this branch */
+ 			r1_bio->bios[mirror] = NULL;
+ 			to_put = bio;
diff --git a/queue-5.12/mm-page_alloc-ignore-init_on_free-1-for-debug_pagealloc-1.patch b/queue-5.12/mm-page_alloc-ignore-init_on_free-1-for-debug_pagealloc-1.patch
new file mode 100644
index 00000000000..de75211d926
--- /dev/null
+++ b/queue-5.12/mm-page_alloc-ignore-init_on_free-1-for-debug_pagealloc-1.patch
@@ -0,0 +1,103 @@
+From 9df65f522536719682bccd24245ff94db956256c Mon Sep 17 00:00:00 2001
+From: Sergei Trofimovich <slyfox@gentoo.org>
+Date: Thu, 29 Apr 2021 23:02:11 -0700
+Subject: mm: page_alloc: ignore init_on_free=1 for debug_pagealloc=1
+
+From: Sergei Trofimovich <slyfox@gentoo.org>
+
+commit 9df65f522536719682bccd24245ff94db956256c upstream.
+
+On !ARCH_SUPPORTS_DEBUG_PAGEALLOC (like ia64) debug_pagealloc=1 implies
+page_poison=on:
+
+    if (page_poisoning_enabled() ||
+         (!IS_ENABLED(CONFIG_ARCH_SUPPORTS_DEBUG_PAGEALLOC) &&
+          debug_pagealloc_enabled()))
+            static_branch_enable(&_page_poisoning_enabled);
+
+page_poison=on needs to override init_on_free=1.
+
+Before the change it did not work as expected for the following case:
+- have PAGE_POISONING=y
+- have page_poison unset
+- have !ARCH_SUPPORTS_DEBUG_PAGEALLOC arch (like ia64)
+- have init_on_free=1
+- have debug_pagealloc=1
+
+That way we get both keys enabled:
+- static_branch_enable(&init_on_free);
+- static_branch_enable(&_page_poisoning_enabled);
+
+which leads to poisoned pages returned for __GFP_ZERO pages.
+
+After the change we execute only:
+- static_branch_enable(&_page_poisoning_enabled);
+  and ignore init_on_free=1.
+
+Link: https://lkml.kernel.org/r/20210329222555.3077928-1-slyfox@gentoo.org
+Link: https://lkml.org/lkml/2021/3/26/443
+Fixes: 8db26a3d4735 ("mm, page_poison: use static key more efficiently")
+Signed-off-by: Sergei Trofimovich <slyfox@gentoo.org>
+Acked-by: Vlastimil Babka <vbabka@suse.cz>
+Reviewed-by: David Hildenbrand <david@redhat.com>
+Cc: Andrey Konovalov <andreyknvl@gmail.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/page_alloc.c |   30 +++++++++++++++++-------------
+ 1 file changed, 17 insertions(+), 13 deletions(-)
+
+--- a/mm/page_alloc.c
++++ b/mm/page_alloc.c
+@@ -764,32 +764,36 @@ static inline void clear_page_guard(stru
+  */
+ void init_mem_debugging_and_hardening(void)
+ {
++	bool page_poisoning_requested = false;
++
++#ifdef CONFIG_PAGE_POISONING
++	/*
++	 * Page poisoning is debug page alloc for some arches. If
++	 * either of those options are enabled, enable poisoning.
++	 */
++	if (page_poisoning_enabled() ||
++	     (!IS_ENABLED(CONFIG_ARCH_SUPPORTS_DEBUG_PAGEALLOC) &&
++	      debug_pagealloc_enabled())) {
++		static_branch_enable(&_page_poisoning_enabled);
++		page_poisoning_requested = true;
++	}
++#endif
++
+ 	if (_init_on_alloc_enabled_early) {
+-		if (page_poisoning_enabled())
++		if (page_poisoning_requested)
+ 			pr_info("mem auto-init: CONFIG_PAGE_POISONING is on, "
+ 				"will take precedence over init_on_alloc\n");
+ 		else
+ 			static_branch_enable(&init_on_alloc);
+ 	}
+ 	if (_init_on_free_enabled_early) {
+-		if (page_poisoning_enabled())
++		if (page_poisoning_requested)
+ 			pr_info("mem auto-init: CONFIG_PAGE_POISONING is on, "
+ 				"will take precedence over init_on_free\n");
+ 		else
+ 			static_branch_enable(&init_on_free);
+ 	}
+ 
+-#ifdef CONFIG_PAGE_POISONING
+-	/*
+-	 * Page poisoning is debug page alloc for some arches. If
+-	 * either of those options are enabled, enable poisoning.
+-	 */
+-	if (page_poisoning_enabled() ||
+-	     (!IS_ENABLED(CONFIG_ARCH_SUPPORTS_DEBUG_PAGEALLOC) &&
+-	      debug_pagealloc_enabled()))
+-		static_branch_enable(&_page_poisoning_enabled);
+-#endif
+-
+ #ifdef CONFIG_DEBUG_PAGEALLOC
+ 	if (!debug_pagealloc_enabled())
+ 		return;
diff --git a/queue-5.12/rcu-nocb-fix-missed-nocb_timer-requeue.patch b/queue-5.12/rcu-nocb-fix-missed-nocb_timer-requeue.patch
new file mode 100644
index 00000000000..040d6fac1e3
--- /dev/null
+++ b/queue-5.12/rcu-nocb-fix-missed-nocb_timer-requeue.patch
@@ -0,0 +1,122 @@
+From b2fcf2102049f6e56981e0ab3d9b633b8e2741da Mon Sep 17 00:00:00 2001
+From: Frederic Weisbecker <frederic@kernel.org>
+Date: Tue, 23 Feb 2021 01:09:59 +0100
+Subject: rcu/nocb: Fix missed nocb_timer requeue
+
+From: Frederic Weisbecker <frederic@kernel.org>
+
+commit b2fcf2102049f6e56981e0ab3d9b633b8e2741da upstream.
+
+This sequence of events can lead to a failure to requeue a CPU's
+->nocb_timer:
+
+1.	There are no callbacks queued for any CPU covered by CPU 0-2's
+	->nocb_gp_kthread.  Note that ->nocb_gp_kthread is associated
+	with CPU 0.
+
+2.	CPU 1 enqueues its first callback with interrupts disabled, and
+	thus must defer awakening its ->nocb_gp_kthread.  It therefore
+	queues its rcu_data structure's ->nocb_timer.  At this point,
+	CPU 1's rdp->nocb_defer_wakeup is RCU_NOCB_WAKE.
+
+3.	CPU 2, which shares the same ->nocb_gp_kthread, also enqueues a
+	callback, but with interrupts enabled, allowing it to directly
+	awaken the ->nocb_gp_kthread.
+
+4.	The newly awakened ->nocb_gp_kthread associates both CPU 1's
+	and CPU 2's callbacks with a future grace period and arranges
+	for that grace period to be started.
+
+5.	This ->nocb_gp_kthread goes to sleep waiting for the end of this
+	future grace period.
+
+6.	This grace period elapses before the CPU 1's timer fires.
+	This is normally improbably given that the timer is set for only
+	one jiffy, but timers can be delayed.  Besides, it is possible
+	that kernel was built with CONFIG_RCU_STRICT_GRACE_PERIOD=y.
+
+7.	The grace period ends, so rcu_gp_kthread awakens the
+	->nocb_gp_kthread, which in turn awakens both CPU 1's and
+	CPU 2's ->nocb_cb_kthread.  Then ->nocb_gb_kthread sleeps
+	waiting for more newly queued callbacks.
+
+8.	CPU 1's ->nocb_cb_kthread invokes its callback, then sleeps
+	waiting for more invocable callbacks.
+
+9.	Note that neither kthread updated any ->nocb_timer state,
+	so CPU 1's ->nocb_defer_wakeup is still set to RCU_NOCB_WAKE.
+
+10.	CPU 1 enqueues its second callback, this time with interrupts
+ 	enabled so it can wake directly	->nocb_gp_kthread.
+	It does so with calling wake_nocb_gp() which also cancels the
+	pending timer that got queued in step 2. But that doesn't reset
+	CPU 1's ->nocb_defer_wakeup which is still set to RCU_NOCB_WAKE.
+	So CPU 1's ->nocb_defer_wakeup and its ->nocb_timer are now
+	desynchronized.
+
+11.	->nocb_gp_kthread associates the callback queued in 10 with a new
+	grace period, arranges for that grace period to start and sleeps
+	waiting for it to complete.
+
+12.	The grace period ends, rcu_gp_kthread awakens ->nocb_gp_kthread,
+	which in turn wakes up CPU 1's ->nocb_cb_kthread which then
+	invokes the callback queued in 10.
+
+13.	CPU 1 enqueues its third callback, this time with interrupts
+	disabled so it must queue a timer for a deferred wakeup. However
+	the value of its ->nocb_defer_wakeup is RCU_NOCB_WAKE which
+	incorrectly indicates that a timer is already queued.  Instead,
+	CPU 1's ->nocb_timer was cancelled in 10.  CPU 1 therefore fails
+	to queue the ->nocb_timer.
+
+14.	CPU 1 has its pending callback and it may go unnoticed until
+	some other CPU ever wakes up ->nocb_gp_kthread or CPU 1 ever
+	calls an explicit deferred wakeup, for example, during idle entry.
+
+This commit fixes this bug by resetting rdp->nocb_defer_wakeup everytime
+we delete the ->nocb_timer.
+
+It is quite possible that there is a similar scenario involving
+->nocb_bypass_timer and ->nocb_defer_wakeup.  However, despite some
+effort from several people, a failure scenario has not yet been located.
+However, that by no means guarantees that no such scenario exists.
+Finding a failure scenario is left as an exercise for the reader, and the
+"Fixes:" tag below relates to ->nocb_bypass_timer instead of ->nocb_timer.
+
+Fixes: d1b222c6be1f (rcu/nocb: Add bypass callback queueing)
+Cc: <stable@vger.kernel.org>
+Cc: Josh Triplett <josh@joshtriplett.org>
+Cc: Lai Jiangshan <jiangshanlai@gmail.com>
+Cc: Joel Fernandes <joel@joelfernandes.org>
+Cc: Boqun Feng <boqun.feng@gmail.com>
+Reviewed-by: Neeraj Upadhyay <neeraju@codeaurora.org>
+Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
+Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/rcu/tree_plugin.h |    7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+--- a/kernel/rcu/tree_plugin.h
++++ b/kernel/rcu/tree_plugin.h
+@@ -1646,7 +1646,11 @@ static bool wake_nocb_gp(struct rcu_data
+ 		rcu_nocb_unlock_irqrestore(rdp, flags);
+ 		return false;
+ 	}
+-	del_timer(&rdp->nocb_timer);
++
++	if (READ_ONCE(rdp->nocb_defer_wakeup) > RCU_NOCB_WAKE_NOT) {
++		WRITE_ONCE(rdp->nocb_defer_wakeup, RCU_NOCB_WAKE_NOT);
++		del_timer(&rdp->nocb_timer);
++	}
+ 	rcu_nocb_unlock_irqrestore(rdp, flags);
+ 	raw_spin_lock_irqsave(&rdp_gp->nocb_gp_lock, flags);
+ 	if (force || READ_ONCE(rdp_gp->nocb_gp_sleep)) {
+@@ -2265,7 +2269,6 @@ static bool do_nocb_deferred_wakeup_comm
+ 		return false;
+ 	}
+ 	ndw = READ_ONCE(rdp->nocb_defer_wakeup);
+-	WRITE_ONCE(rdp->nocb_defer_wakeup, RCU_NOCB_WAKE_NOT);
+ 	ret = wake_nocb_gp(rdp, ndw == RCU_NOCB_WAKE_FORCE, flags);
+ 	trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("DeferredWake"));
+ 
diff --git a/queue-5.12/series b/queue-5.12/series
index c71615c380f..e8e05c42e76 100644
--- a/queue-5.12/series
+++ b/queue-5.12/series
@@ -317,3 +317,12 @@ tpm-efi-use-local-variable-for-calculating-final-log-size.patch
 tpm-vtpm_proxy-avoid-reading-host-log-when-using-a-virtual-device.patch
 crypto-arm-curve25519-move-.fpu-after-.arch.patch
 crypto-rng-fix-crypto_rng_reset-refcounting-when-crypto_stats.patch
+md-raid1-properly-indicate-failure-when-ending-a-failed-write-request.patch
+dm-raid-fix-inconclusive-reshape-layout-on-fast-raid4-5-6-table-reload-sequences.patch
+fuse-fix-write-deadlock.patch
+mm-page_alloc-ignore-init_on_free-1-for-debug_pagealloc-1.patch
+exfat-fix-erroneous-discard-when-clear-cluster-bit.patch
+sfc-farch-fix-tx-queue-lookup-in-tx-flush-done-handling.patch
+sfc-farch-fix-tx-queue-lookup-in-tx-event-handling.patch
+sfc-adjust-efx-xdp_tx_queue_count-with-the-real-number-of-initialized-queues.patch
+rcu-nocb-fix-missed-nocb_timer-requeue.patch
diff --git a/queue-5.12/sfc-adjust-efx-xdp_tx_queue_count-with-the-real-number-of-initialized-queues.patch b/queue-5.12/sfc-adjust-efx-xdp_tx_queue_count-with-the-real-number-of-initialized-queues.patch
new file mode 100644
index 00000000000..3ec98834563
--- /dev/null
+++ b/queue-5.12/sfc-adjust-efx-xdp_tx_queue_count-with-the-real-number-of-initialized-queues.patch
@@ -0,0 +1,62 @@
+From 99ba0ea616aabdc8e26259fd722503e012199a76 Mon Sep 17 00:00:00 2001
+From: Ignat Korchagin <ignat@cloudflare.com>
+Date: Tue, 27 Apr 2021 22:09:38 +0100
+Subject: sfc: adjust efx->xdp_tx_queue_count with the real number of initialized queues
+
+From: Ignat Korchagin <ignat@cloudflare.com>
+
+commit 99ba0ea616aabdc8e26259fd722503e012199a76 upstream.
+
+efx->xdp_tx_queue_count is initially initialized to num_possible_cpus() and is
+later used to allocate and traverse efx->xdp_tx_queues lookup array. However,
+we may end up not initializing all the array slots with real queues during
+probing. This results, for example, in a NULL pointer dereference, when running
+"# ethtool -S <iface>", similar to below
+
+[2570283.664955][T4126959] BUG: kernel NULL pointer dereference, address: 00000000000000f8
+[2570283.681283][T4126959] #PF: supervisor read access in kernel mode
+[2570283.695678][T4126959] #PF: error_code(0x0000) - not-present page
+[2570283.710013][T4126959] PGD 0 P4D 0
+[2570283.721649][T4126959] Oops: 0000 [#1] SMP PTI
+[2570283.734108][T4126959] CPU: 23 PID: 4126959 Comm: ethtool Tainted: G           O      5.10.20-cloudflare-2021.3.1 #1
+[2570283.752641][T4126959] Hardware name: <redacted>
+[2570283.781408][T4126959] RIP: 0010:efx_ethtool_get_stats+0x2ca/0x330 [sfc]
+[2570283.796073][T4126959] Code: 00 85 c0 74 39 48 8b 95 a8 0f 00 00 48 85 d2 74 2d 31 c0 eb 07 48 8b 95 a8 0f 00 00 48 63 c8 49 83 c4 08 83 c0 01 48 8b 14 ca <48> 8b 92 f8 00 00 00 49 89 54 24 f8 39 85 a0 0f 00 00 77 d7 48 8b
+[2570283.831259][T4126959] RSP: 0018:ffffb79a77657ce8 EFLAGS: 00010202
+[2570283.845121][T4126959] RAX: 0000000000000019 RBX: ffffb799cd0c9280 RCX: 0000000000000018
+[2570283.860872][T4126959] RDX: 0000000000000000 RSI: ffff96dd970ce000 RDI: 0000000000000005
+[2570283.876525][T4126959] RBP: ffff96dd86f0a000 R08: ffff96dd970ce480 R09: 000000000000005f
+[2570283.892014][T4126959] R10: ffffb799cd0c9fff R11: ffffb799cd0c9000 R12: ffffb799cd0c94f8
+[2570283.907406][T4126959] R13: ffffffffc11b1090 R14: ffff96dd970ce000 R15: ffffffffc11cd66c
+[2570283.922705][T4126959] FS:  00007fa7723f8740(0000) GS:ffff96f51fac0000(0000) knlGS:0000000000000000
+[2570283.938848][T4126959] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+[2570283.952524][T4126959] CR2: 00000000000000f8 CR3: 0000001a73e6e006 CR4: 00000000007706e0
+[2570283.967529][T4126959] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+[2570283.982400][T4126959] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+[2570283.997308][T4126959] PKRU: 55555554
+[2570284.007649][T4126959] Call Trace:
+[2570284.017598][T4126959]  dev_ethtool+0x1832/0x2830
+
+Fix this by adjusting efx->xdp_tx_queue_count after probing to reflect the true
+value of initialized slots in efx->xdp_tx_queues.
+
+Signed-off-by: Ignat Korchagin <ignat@cloudflare.com>
+Fixes: e26ca4b53582 ("sfc: reduce the number of requested xdp ev queues")
+Cc: <stable@vger.kernel.org> # 5.12.x
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/sfc/efx_channels.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/drivers/net/ethernet/sfc/efx_channels.c
++++ b/drivers/net/ethernet/sfc/efx_channels.c
+@@ -914,6 +914,8 @@ int efx_set_channels(struct efx_nic *efx
+ 			}
+ 		}
+ 	}
++	if (xdp_queue_number)
++		efx->xdp_tx_queue_count = xdp_queue_number;
+ 
+ 	rc = netif_set_real_num_tx_queues(efx->net_dev, efx->n_tx_channels);
+ 	if (rc)
diff --git a/queue-5.12/sfc-farch-fix-tx-queue-lookup-in-tx-event-handling.patch b/queue-5.12/sfc-farch-fix-tx-queue-lookup-in-tx-event-handling.patch
new file mode 100644
index 00000000000..abb15fd82b7
--- /dev/null
+++ b/queue-5.12/sfc-farch-fix-tx-queue-lookup-in-tx-event-handling.patch
@@ -0,0 +1,43 @@
+From 83b09a1807415608b387c7bc748d329fefc5617e Mon Sep 17 00:00:00 2001
+From: Edward Cree <ecree.xilinx@gmail.com>
+Date: Tue, 20 Apr 2021 13:28:28 +0100
+Subject: sfc: farch: fix TX queue lookup in TX event handling
+
+From: Edward Cree <ecree.xilinx@gmail.com>
+
+commit 83b09a1807415608b387c7bc748d329fefc5617e upstream.
+
+We're starting from a TXQ label, not a TXQ type, so
+ efx_channel_get_tx_queue() is inappropriate (and could return NULL,
+ leading to panics).
+
+Fixes: 12804793b17c ("sfc: decouple TXQ type from label")
+Cc: stable@vger.kernel.org
+Signed-off-by: Edward Cree <ecree.xilinx@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/sfc/farch.c |    8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+--- a/drivers/net/ethernet/sfc/farch.c
++++ b/drivers/net/ethernet/sfc/farch.c
+@@ -835,14 +835,14 @@ efx_farch_handle_tx_event(struct efx_cha
+ 		/* Transmit completion */
+ 		tx_ev_desc_ptr = EFX_QWORD_FIELD(*event, FSF_AZ_TX_EV_DESC_PTR);
+ 		tx_ev_q_label = EFX_QWORD_FIELD(*event, FSF_AZ_TX_EV_Q_LABEL);
+-		tx_queue = efx_channel_get_tx_queue(
+-			channel, tx_ev_q_label % EFX_MAX_TXQ_PER_CHANNEL);
++		tx_queue = channel->tx_queue +
++				(tx_ev_q_label % EFX_MAX_TXQ_PER_CHANNEL);
+ 		efx_xmit_done(tx_queue, tx_ev_desc_ptr);
+ 	} else if (EFX_QWORD_FIELD(*event, FSF_AZ_TX_EV_WQ_FF_FULL)) {
+ 		/* Rewrite the FIFO write pointer */
+ 		tx_ev_q_label = EFX_QWORD_FIELD(*event, FSF_AZ_TX_EV_Q_LABEL);
+-		tx_queue = efx_channel_get_tx_queue(
+-			channel, tx_ev_q_label % EFX_MAX_TXQ_PER_CHANNEL);
++		tx_queue = channel->tx_queue +
++				(tx_ev_q_label % EFX_MAX_TXQ_PER_CHANNEL);
+ 
+ 		netif_tx_lock(efx->net_dev);
+ 		efx_farch_notify_tx_desc(tx_queue);
diff --git a/queue-5.12/sfc-farch-fix-tx-queue-lookup-in-tx-flush-done-handling.patch b/queue-5.12/sfc-farch-fix-tx-queue-lookup-in-tx-flush-done-handling.patch
new file mode 100644
index 00000000000..3102922a54c
--- /dev/null
+++ b/queue-5.12/sfc-farch-fix-tx-queue-lookup-in-tx-flush-done-handling.patch
@@ -0,0 +1,46 @@
+From 5b1faa92289b53cad654123ed2bc8e10f6ddd4ac Mon Sep 17 00:00:00 2001
+From: Edward Cree <ecree.xilinx@gmail.com>
+Date: Tue, 20 Apr 2021 13:27:22 +0100
+Subject: sfc: farch: fix TX queue lookup in TX flush done handling
+
+From: Edward Cree <ecree.xilinx@gmail.com>
+
+commit 5b1faa92289b53cad654123ed2bc8e10f6ddd4ac upstream.
+
+We're starting from a TXQ instance number ('qid'), not a TXQ type, so
+ efx_get_tx_queue() is inappropriate (and could return NULL, leading
+ to panics).
+
+Fixes: 12804793b17c ("sfc: decouple TXQ type from label")
+Reported-by: Trevor Hemsley <themsley@voiceflex.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Edward Cree <ecree.xilinx@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/sfc/farch.c |    8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+--- a/drivers/net/ethernet/sfc/farch.c
++++ b/drivers/net/ethernet/sfc/farch.c
+@@ -1081,16 +1081,16 @@ static void
+ efx_farch_handle_tx_flush_done(struct efx_nic *efx, efx_qword_t *event)
+ {
+ 	struct efx_tx_queue *tx_queue;
++	struct efx_channel *channel;
+ 	int qid;
+ 
+ 	qid = EFX_QWORD_FIELD(*event, FSF_AZ_DRIVER_EV_SUBDATA);
+ 	if (qid < EFX_MAX_TXQ_PER_CHANNEL * (efx->n_tx_channels + efx->n_extra_tx_channels)) {
+-		tx_queue = efx_get_tx_queue(efx, qid / EFX_MAX_TXQ_PER_CHANNEL,
+-					    qid % EFX_MAX_TXQ_PER_CHANNEL);
+-		if (atomic_cmpxchg(&tx_queue->flush_outstanding, 1, 0)) {
++		channel = efx_get_tx_channel(efx, qid / EFX_MAX_TXQ_PER_CHANNEL);
++		tx_queue = channel->tx_queue + (qid % EFX_MAX_TXQ_PER_CHANNEL);
++		if (atomic_cmpxchg(&tx_queue->flush_outstanding, 1, 0))
+ 			efx_farch_magic_event(tx_queue->channel,
+ 					      EFX_CHANNEL_MAGIC_TX_DRAIN(tx_queue));
+-		}
+ 	}
+ }
+ 
-- 
2.47.3