From 696f94896fa279af4d5080436dba21de9404c7b4 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 26 Sep 2022 08:34:55 +0200
Subject: [PATCH] 5.10-stable patches

added patches:
	devdax-fix-soft-reservation-memory-description.patch
	ext4-fix-bug-in-extents-parsing-when-eh_entries-0-and-eh_depth-0.patch
	ext4-limit-the-number-of-retries-after-discarding-preallocations-blocks.patch
---
 ...-soft-reservation-memory-description.patch | 58 +++++++++++++
 ...ing-when-eh_entries-0-and-eh_depth-0.patch | 85 +++++++++++++++++++
 ...ter-discarding-preallocations-blocks.patch | 74 ++++++++++++++++
 queue-5.10/series                             |  3 +
 4 files changed, 220 insertions(+)
 create mode 100644 queue-5.10/devdax-fix-soft-reservation-memory-description.patch
 create mode 100644 queue-5.10/ext4-fix-bug-in-extents-parsing-when-eh_entries-0-and-eh_depth-0.patch
 create mode 100644 queue-5.10/ext4-limit-the-number-of-retries-after-discarding-preallocations-blocks.patch

diff --git a/queue-5.10/devdax-fix-soft-reservation-memory-description.patch b/queue-5.10/devdax-fix-soft-reservation-memory-description.patch
new file mode 100644
index 00000000000..ca5964fdcd3
--- /dev/null
+++ b/queue-5.10/devdax-fix-soft-reservation-memory-description.patch
@@ -0,0 +1,58 @@
+From 67feaba413ec68daf4124e9870878899b4ed9a0e Mon Sep 17 00:00:00 2001
+From: Dan Williams <dan.j.williams@intel.com>
+Date: Fri, 23 Sep 2022 15:05:56 -0700
+Subject: devdax: Fix soft-reservation memory description
+
+From: Dan Williams <dan.j.williams@intel.com>
+
+commit 67feaba413ec68daf4124e9870878899b4ed9a0e upstream.
+
+The "hmem" platform-devices that are created to represent the
+platform-advertised "Soft Reserved" memory ranges end up inserting a
+resource that causes the iomem_resource tree to look like this:
+
+340000000-43fffffff : hmem.0
+  340000000-43fffffff : Soft Reserved
+    340000000-43fffffff : dax0.0
+
+This is because insert_resource() reparents ranges when they completely
+intersect an existing range.
+
+This matters because code that uses region_intersects() to scan for a
+given IORES_DESC will only check that top-level 'hmem.0' resource and
+not the 'Soft Reserved' descendant.
+
+So, to support EINJ (via einj_error_inject()) to inject errors into
+memory hosted by a dax-device, be sure to describe the memory as
+IORES_DESC_SOFT_RESERVED. This is a follow-on to:
+
+commit b13a3e5fd40b ("ACPI: APEI: Fix _EINJ vs EFI_MEMORY_SP")
+
+...that fixed EINJ support for "Soft Reserved" ranges in the first
+instance.
+
+Fixes: 262b45ae3ab4 ("x86/efi: EFI soft reservation to E820 enumeration")
+Reported-by: Ricardo Sandoval Torres <ricardo.sandoval.torres@intel.com>
+Tested-by: Ricardo Sandoval Torres <ricardo.sandoval.torres@intel.com>
+Cc: <stable@vger.kernel.org>
+Cc: Tony Luck <tony.luck@intel.com>
+Cc: Omar Avelar <omar.avelar@intel.com>
+Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+Cc: Mark Gross <markgross@kernel.org>
+Link: https://lore.kernel.org/r/166397075670.389916.7435722208896316387.stgit@dwillia2-xfh.jf.intel.com
+Signed-off-by: Dan Williams <dan.j.williams@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/dax/hmem/device.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/dax/hmem/device.c
++++ b/drivers/dax/hmem/device.c
+@@ -15,6 +15,7 @@ void hmem_register_device(int target_nid
+ 		.start = r->start,
+ 		.end = r->end,
+ 		.flags = IORESOURCE_MEM,
++		.desc = IORES_DESC_SOFT_RESERVED,
+ 	};
+ 	struct platform_device *pdev;
+ 	struct memregion_info info;
diff --git a/queue-5.10/ext4-fix-bug-in-extents-parsing-when-eh_entries-0-and-eh_depth-0.patch b/queue-5.10/ext4-fix-bug-in-extents-parsing-when-eh_entries-0-and-eh_depth-0.patch
new file mode 100644
index 00000000000..1527f7c5e9d
--- /dev/null
+++ b/queue-5.10/ext4-fix-bug-in-extents-parsing-when-eh_entries-0-and-eh_depth-0.patch
@@ -0,0 +1,85 @@
+From 29a5b8a137ac8eb410cc823653a29ac0e7b7e1b0 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Lu=C3=ADs=20Henriques?= <lhenriques@suse.de>
+Date: Mon, 22 Aug 2022 10:42:35 +0100
+Subject: ext4: fix bug in extents parsing when eh_entries == 0 and eh_depth > 0
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: LuÃ­s Henriques <lhenriques@suse.de>
+
+commit 29a5b8a137ac8eb410cc823653a29ac0e7b7e1b0 upstream.
+
+When walking through an inode extents, the ext4_ext_binsearch_idx() function
+assumes that the extent header has been previously validated.  However, there
+are no checks that verify that the number of entries (eh->eh_entries) is
+non-zero when depth is > 0.  And this will lead to problems because the
+EXT_FIRST_INDEX() and EXT_LAST_INDEX() will return garbage and result in this:
+
+[  135.245946] ------------[ cut here ]------------
+[  135.247579] kernel BUG at fs/ext4/extents.c:2258!
+[  135.249045] invalid opcode: 0000 [#1] PREEMPT SMP
+[  135.250320] CPU: 2 PID: 238 Comm: tmp118 Not tainted 5.19.0-rc8+ #4
+[  135.252067] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.15.0-0-g2dd4b9b-rebuilt.opensuse.org 04/01/2014
+[  135.255065] RIP: 0010:ext4_ext_map_blocks+0xc20/0xcb0
+[  135.256475] Code:
+[  135.261433] RSP: 0018:ffffc900005939f8 EFLAGS: 00010246
+[  135.262847] RAX: 0000000000000024 RBX: ffffc90000593b70 RCX: 0000000000000023
+[  135.264765] RDX: ffff8880038e5f10 RSI: 0000000000000003 RDI: ffff8880046e922c
+[  135.266670] RBP: ffff8880046e9348 R08: 0000000000000001 R09: ffff888002ca580c
+[  135.268576] R10: 0000000000002602 R11: 0000000000000000 R12: 0000000000000024
+[  135.270477] R13: 0000000000000000 R14: 0000000000000024 R15: 0000000000000000
+[  135.272394] FS:  00007fdabdc56740(0000) GS:ffff88807dd00000(0000) knlGS:0000000000000000
+[  135.274510] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+[  135.276075] CR2: 00007ffc26bd4f00 CR3: 0000000006261004 CR4: 0000000000170ea0
+[  135.277952] Call Trace:
+[  135.278635]  <TASK>
+[  135.279247]  ? preempt_count_add+0x6d/0xa0
+[  135.280358]  ? percpu_counter_add_batch+0x55/0xb0
+[  135.281612]  ? _raw_read_unlock+0x18/0x30
+[  135.282704]  ext4_map_blocks+0x294/0x5a0
+[  135.283745]  ? xa_load+0x6f/0xa0
+[  135.284562]  ext4_mpage_readpages+0x3d6/0x770
+[  135.285646]  read_pages+0x67/0x1d0
+[  135.286492]  ? folio_add_lru+0x51/0x80
+[  135.287441]  page_cache_ra_unbounded+0x124/0x170
+[  135.288510]  filemap_get_pages+0x23d/0x5a0
+[  135.289457]  ? path_openat+0xa72/0xdd0
+[  135.290332]  filemap_read+0xbf/0x300
+[  135.291158]  ? _raw_spin_lock_irqsave+0x17/0x40
+[  135.292192]  new_sync_read+0x103/0x170
+[  135.293014]  vfs_read+0x15d/0x180
+[  135.293745]  ksys_read+0xa1/0xe0
+[  135.294461]  do_syscall_64+0x3c/0x80
+[  135.295284]  entry_SYSCALL_64_after_hwframe+0x46/0xb0
+
+This patch simply adds an extra check in __ext4_ext_check(), verifying that
+eh_entries is not 0 when eh_depth is > 0.
+
+Link: https://bugzilla.kernel.org/show_bug.cgi?id=215941
+Link: https://bugzilla.kernel.org/show_bug.cgi?id=216283
+Cc: Baokun Li <libaokun1@huawei.com>
+Cc: stable@kernel.org
+Signed-off-by: LuÃ­s Henriques <lhenriques@suse.de>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Reviewed-by: Baokun Li <libaokun1@huawei.com>
+Link: https://lore.kernel.org/r/20220822094235.2690-1-lhenriques@suse.de
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/ext4/extents.c |    4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/fs/ext4/extents.c
++++ b/fs/ext4/extents.c
+@@ -459,6 +459,10 @@ static int __ext4_ext_check(const char *
+ 		error_msg = "invalid eh_entries";
+ 		goto corrupted;
+ 	}
++	if (unlikely((eh->eh_entries == 0) && (depth > 0))) {
++		error_msg = "eh_entries is 0 but eh_depth is > 0";
++		goto corrupted;
++	}
+ 	if (!ext4_valid_extent_entries(inode, eh, lblk, &pblk, depth)) {
+ 		error_msg = "invalid extent entries";
+ 		goto corrupted;
diff --git a/queue-5.10/ext4-limit-the-number-of-retries-after-discarding-preallocations-blocks.patch b/queue-5.10/ext4-limit-the-number-of-retries-after-discarding-preallocations-blocks.patch
new file mode 100644
index 00000000000..f206c53b722
--- /dev/null
+++ b/queue-5.10/ext4-limit-the-number-of-retries-after-discarding-preallocations-blocks.patch
@@ -0,0 +1,74 @@
+From 80fa46d6b9e7b1527bfd2197d75431fd9c382161 Mon Sep 17 00:00:00 2001
+From: Theodore Ts'o <tytso@mit.edu>
+Date: Thu, 1 Sep 2022 18:03:14 -0400
+Subject: ext4: limit the number of retries after discarding preallocations blocks
+
+From: Theodore Ts'o <tytso@mit.edu>
+
+commit 80fa46d6b9e7b1527bfd2197d75431fd9c382161 upstream.
+
+This patch avoids threads live-locking for hours when a large number
+threads are competing over the last few free extents as they blocks
+getting added and removed from preallocation pools.  From our bug
+reporter:
+
+   A reliable way for triggering this has multiple writers
+   continuously write() to files when the filesystem is full, while
+   small amounts of space are freed (e.g. by truncating a large file
+   -1MiB at a time). In the local filesystem, this can be done by
+   simply not checking the return code of write (0) and/or the error
+   (ENOSPACE) that is set. Over NFS with an async mount, even clients
+   with proper error checking will behave this way since the linux NFS
+   client implementation will not propagate the server errors [the
+   write syscalls immediately return success] until the file handle is
+   closed. This leads to a situation where NFS clients send a
+   continuous stream of WRITE rpcs which result in ERRNOSPACE -- but
+   since the client isn't seeing this, the stream of writes continues
+   at maximum network speed.
+
+   When some space does appear, multiple writers will all attempt to
+   claim it for their current write. For NFS, we may see dozens to
+   hundreds of threads that do this.
+
+   The real-world scenario of this is database backup tooling (in
+   particular, github.com/mdkent/percona-xtrabackup) which may write
+   large files (>1TiB) to NFS for safe keeping. Some temporary files
+   are written, rewound, and read back -- all before closing the file
+   handle (the temp file is actually unlinked, to trigger automatic
+   deletion on close/crash.) An application like this operating on an
+   async NFS mount will not see an error code until TiB have been
+   written/read.
+
+   The lockup was observed when running this database backup on large
+   filesystems (64 TiB in this case) with a high number of block
+   groups and no free space. Fragmentation is generally not a factor
+   in this filesystem (~thousands of large files, mostly contiguous
+   except for the parts written while the filesystem is at capacity.)
+
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Cc: stable@kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/ext4/mballoc.c |    4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/fs/ext4/mballoc.c
++++ b/fs/ext4/mballoc.c
+@@ -4959,6 +4959,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t
+ 	ext4_fsblk_t block = 0;
+ 	unsigned int inquota = 0;
+ 	unsigned int reserv_clstrs = 0;
++	int retries = 0;
+ 	u64 seq;
+ 
+ 	might_sleep();
+@@ -5061,7 +5062,8 @@ repeat:
+ 			ar->len = ac->ac_b_ex.fe_len;
+ 		}
+ 	} else {
+-		if (ext4_mb_discard_preallocations_should_retry(sb, ac, &seq))
++		if (++retries < 3 &&
++		    ext4_mb_discard_preallocations_should_retry(sb, ac, &seq))
+ 			goto repeat;
+ 		/*
+ 		 * If block allocation fails then the pa allocated above
diff --git a/queue-5.10/series b/queue-5.10/series
index 60ee4befa84..75e8a3b8444 100644
--- a/queue-5.10/series
+++ b/queue-5.10/series
@@ -135,3 +135,6 @@ i2c-imx-if-pm_runtime_get_sync-returned-1-device-acc.patch
 i2c-mlxbf-incorrect-base-address-passed-during-io-wr.patch
 i2c-mlxbf-prevent-stack-overflow-in-mlxbf_i2c_smbus_.patch
 i2c-mlxbf-fix-frequency-calculation.patch
+devdax-fix-soft-reservation-memory-description.patch
+ext4-fix-bug-in-extents-parsing-when-eh_entries-0-and-eh_depth-0.patch
+ext4-limit-the-number-of-retries-after-discarding-preallocations-blocks.patch
-- 
2.47.3