--- /dev/null
+From 67feaba413ec68daf4124e9870878899b4ed9a0e Mon Sep 17 00:00:00 2001
+From: Dan Williams <dan.j.williams@intel.com>
+Date: Fri, 23 Sep 2022 15:05:56 -0700
+Subject: devdax: Fix soft-reservation memory description
+
+From: Dan Williams <dan.j.williams@intel.com>
+
+commit 67feaba413ec68daf4124e9870878899b4ed9a0e upstream.
+
+The "hmem" platform-devices that are created to represent the
+platform-advertised "Soft Reserved" memory ranges end up inserting a
+resource that causes the iomem_resource tree to look like this:
+
+340000000-43fffffff : hmem.0
+ 340000000-43fffffff : Soft Reserved
+ 340000000-43fffffff : dax0.0
+
+This is because insert_resource() reparents ranges when they completely
+intersect an existing range.
+
+This matters because code that uses region_intersects() to scan for a
+given IORES_DESC will only check that top-level 'hmem.0' resource and
+not the 'Soft Reserved' descendant.
+
+So, to support EINJ (via einj_error_inject()) to inject errors into
+memory hosted by a dax-device, be sure to describe the memory as
+IORES_DESC_SOFT_RESERVED. This is a follow-on to:
+
+commit b13a3e5fd40b ("ACPI: APEI: Fix _EINJ vs EFI_MEMORY_SP")
+
+...that fixed EINJ support for "Soft Reserved" ranges in the first
+instance.
+
+Fixes: 262b45ae3ab4 ("x86/efi: EFI soft reservation to E820 enumeration")
+Reported-by: Ricardo Sandoval Torres <ricardo.sandoval.torres@intel.com>
+Tested-by: Ricardo Sandoval Torres <ricardo.sandoval.torres@intel.com>
+Cc: <stable@vger.kernel.org>
+Cc: Tony Luck <tony.luck@intel.com>
+Cc: Omar Avelar <omar.avelar@intel.com>
+Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+Cc: Mark Gross <markgross@kernel.org>
+Link: https://lore.kernel.org/r/166397075670.389916.7435722208896316387.stgit@dwillia2-xfh.jf.intel.com
+Signed-off-by: Dan Williams <dan.j.williams@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/dax/hmem/device.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/dax/hmem/device.c
++++ b/drivers/dax/hmem/device.c
+@@ -15,6 +15,7 @@ void hmem_register_device(int target_nid
+ .start = r->start,
+ .end = r->end,
+ .flags = IORESOURCE_MEM,
++ .desc = IORES_DESC_SOFT_RESERVED,
+ };
+ struct platform_device *pdev;
+ struct memregion_info info;
--- /dev/null
+From 29a5b8a137ac8eb410cc823653a29ac0e7b7e1b0 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Lu=C3=ADs=20Henriques?= <lhenriques@suse.de>
+Date: Mon, 22 Aug 2022 10:42:35 +0100
+Subject: ext4: fix bug in extents parsing when eh_entries == 0 and eh_depth > 0
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Luís Henriques <lhenriques@suse.de>
+
+commit 29a5b8a137ac8eb410cc823653a29ac0e7b7e1b0 upstream.
+
+When walking through an inode extents, the ext4_ext_binsearch_idx() function
+assumes that the extent header has been previously validated. However, there
+are no checks that verify that the number of entries (eh->eh_entries) is
+non-zero when depth is > 0. And this will lead to problems because the
+EXT_FIRST_INDEX() and EXT_LAST_INDEX() will return garbage and result in this:
+
+[ 135.245946] ------------[ cut here ]------------
+[ 135.247579] kernel BUG at fs/ext4/extents.c:2258!
+[ 135.249045] invalid opcode: 0000 [#1] PREEMPT SMP
+[ 135.250320] CPU: 2 PID: 238 Comm: tmp118 Not tainted 5.19.0-rc8+ #4
+[ 135.252067] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.15.0-0-g2dd4b9b-rebuilt.opensuse.org 04/01/2014
+[ 135.255065] RIP: 0010:ext4_ext_map_blocks+0xc20/0xcb0
+[ 135.256475] Code:
+[ 135.261433] RSP: 0018:ffffc900005939f8 EFLAGS: 00010246
+[ 135.262847] RAX: 0000000000000024 RBX: ffffc90000593b70 RCX: 0000000000000023
+[ 135.264765] RDX: ffff8880038e5f10 RSI: 0000000000000003 RDI: ffff8880046e922c
+[ 135.266670] RBP: ffff8880046e9348 R08: 0000000000000001 R09: ffff888002ca580c
+[ 135.268576] R10: 0000000000002602 R11: 0000000000000000 R12: 0000000000000024
+[ 135.270477] R13: 0000000000000000 R14: 0000000000000024 R15: 0000000000000000
+[ 135.272394] FS: 00007fdabdc56740(0000) GS:ffff88807dd00000(0000) knlGS:0000000000000000
+[ 135.274510] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+[ 135.276075] CR2: 00007ffc26bd4f00 CR3: 0000000006261004 CR4: 0000000000170ea0
+[ 135.277952] Call Trace:
+[ 135.278635] <TASK>
+[ 135.279247] ? preempt_count_add+0x6d/0xa0
+[ 135.280358] ? percpu_counter_add_batch+0x55/0xb0
+[ 135.281612] ? _raw_read_unlock+0x18/0x30
+[ 135.282704] ext4_map_blocks+0x294/0x5a0
+[ 135.283745] ? xa_load+0x6f/0xa0
+[ 135.284562] ext4_mpage_readpages+0x3d6/0x770
+[ 135.285646] read_pages+0x67/0x1d0
+[ 135.286492] ? folio_add_lru+0x51/0x80
+[ 135.287441] page_cache_ra_unbounded+0x124/0x170
+[ 135.288510] filemap_get_pages+0x23d/0x5a0
+[ 135.289457] ? path_openat+0xa72/0xdd0
+[ 135.290332] filemap_read+0xbf/0x300
+[ 135.291158] ? _raw_spin_lock_irqsave+0x17/0x40
+[ 135.292192] new_sync_read+0x103/0x170
+[ 135.293014] vfs_read+0x15d/0x180
+[ 135.293745] ksys_read+0xa1/0xe0
+[ 135.294461] do_syscall_64+0x3c/0x80
+[ 135.295284] entry_SYSCALL_64_after_hwframe+0x46/0xb0
+
+This patch simply adds an extra check in __ext4_ext_check(), verifying that
+eh_entries is not 0 when eh_depth is > 0.
+
+Link: https://bugzilla.kernel.org/show_bug.cgi?id=215941
+Link: https://bugzilla.kernel.org/show_bug.cgi?id=216283
+Cc: Baokun Li <libaokun1@huawei.com>
+Cc: stable@kernel.org
+Signed-off-by: Luís Henriques <lhenriques@suse.de>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Reviewed-by: Baokun Li <libaokun1@huawei.com>
+Link: https://lore.kernel.org/r/20220822094235.2690-1-lhenriques@suse.de
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/ext4/extents.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/fs/ext4/extents.c
++++ b/fs/ext4/extents.c
+@@ -460,6 +460,10 @@ static int __ext4_ext_check(const char *
+ error_msg = "invalid eh_entries";
+ goto corrupted;
+ }
++ if (unlikely((eh->eh_entries == 0) && (depth > 0))) {
++ error_msg = "eh_entries is 0 but eh_depth is > 0";
++ goto corrupted;
++ }
+ if (!ext4_valid_extent_entries(inode, eh, lblk, &pblk, depth)) {
+ error_msg = "invalid extent entries";
+ goto corrupted;
--- /dev/null
+From 80fa46d6b9e7b1527bfd2197d75431fd9c382161 Mon Sep 17 00:00:00 2001
+From: Theodore Ts'o <tytso@mit.edu>
+Date: Thu, 1 Sep 2022 18:03:14 -0400
+Subject: ext4: limit the number of retries after discarding preallocations blocks
+
+From: Theodore Ts'o <tytso@mit.edu>
+
+commit 80fa46d6b9e7b1527bfd2197d75431fd9c382161 upstream.
+
+This patch avoids threads live-locking for hours when a large number
+threads are competing over the last few free extents as they blocks
+getting added and removed from preallocation pools. From our bug
+reporter:
+
+ A reliable way for triggering this has multiple writers
+ continuously write() to files when the filesystem is full, while
+ small amounts of space are freed (e.g. by truncating a large file
+ -1MiB at a time). In the local filesystem, this can be done by
+ simply not checking the return code of write (0) and/or the error
+ (ENOSPACE) that is set. Over NFS with an async mount, even clients
+ with proper error checking will behave this way since the linux NFS
+ client implementation will not propagate the server errors [the
+ write syscalls immediately return success] until the file handle is
+ closed. This leads to a situation where NFS clients send a
+ continuous stream of WRITE rpcs which result in ERRNOSPACE -- but
+ since the client isn't seeing this, the stream of writes continues
+ at maximum network speed.
+
+ When some space does appear, multiple writers will all attempt to
+ claim it for their current write. For NFS, we may see dozens to
+ hundreds of threads that do this.
+
+ The real-world scenario of this is database backup tooling (in
+ particular, github.com/mdkent/percona-xtrabackup) which may write
+ large files (>1TiB) to NFS for safe keeping. Some temporary files
+ are written, rewound, and read back -- all before closing the file
+ handle (the temp file is actually unlinked, to trigger automatic
+ deletion on close/crash.) An application like this operating on an
+ async NFS mount will not see an error code until TiB have been
+ written/read.
+
+ The lockup was observed when running this database backup on large
+ filesystems (64 TiB in this case) with a high number of block
+ groups and no free space. Fragmentation is generally not a factor
+ in this filesystem (~thousands of large files, mostly contiguous
+ except for the parts written while the filesystem is at capacity.)
+
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Cc: stable@kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/ext4/mballoc.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/fs/ext4/mballoc.c
++++ b/fs/ext4/mballoc.c
+@@ -5559,6 +5559,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t
+ ext4_fsblk_t block = 0;
+ unsigned int inquota = 0;
+ unsigned int reserv_clstrs = 0;
++ int retries = 0;
+ u64 seq;
+
+ might_sleep();
+@@ -5661,7 +5662,8 @@ repeat:
+ ar->len = ac->ac_b_ex.fe_len;
+ }
+ } else {
+- if (ext4_mb_discard_preallocations_should_retry(sb, ac, &seq))
++ if (++retries < 3 &&
++ ext4_mb_discard_preallocations_should_retry(sb, ac, &seq))
+ goto repeat;
+ /*
+ * If block allocation fails then the pa allocated above
--- /dev/null
+From 4fca50d440cc5d4dc570ad5484cc0b70b381bc2a Mon Sep 17 00:00:00 2001
+From: Jan Kara <jack@suse.cz>
+Date: Thu, 8 Sep 2022 11:21:24 +0200
+Subject: ext4: make mballoc try target group first even with mb_optimize_scan
+
+From: Jan Kara <jack@suse.cz>
+
+commit 4fca50d440cc5d4dc570ad5484cc0b70b381bc2a upstream.
+
+One of the side-effects of mb_optimize_scan was that the optimized
+functions to select next group to try were called even before we tried
+the goal group. As a result we no longer allocate files close to
+corresponding inodes as well as we don't try to expand currently
+allocated extent in the same group. This results in reaim regression
+with workfile.disk workload of upto 8% with many clients on my test
+machine:
+
+ baseline mb_optimize_scan
+Hmean disk-1 2114.16 ( 0.00%) 2099.37 ( -0.70%)
+Hmean disk-41 87794.43 ( 0.00%) 83787.47 * -4.56%*
+Hmean disk-81 148170.73 ( 0.00%) 135527.05 * -8.53%*
+Hmean disk-121 177506.11 ( 0.00%) 166284.93 * -6.32%*
+Hmean disk-161 220951.51 ( 0.00%) 207563.39 * -6.06%*
+Hmean disk-201 208722.74 ( 0.00%) 203235.59 ( -2.63%)
+Hmean disk-241 222051.60 ( 0.00%) 217705.51 ( -1.96%)
+Hmean disk-281 252244.17 ( 0.00%) 241132.72 * -4.41%*
+Hmean disk-321 255844.84 ( 0.00%) 245412.84 * -4.08%*
+
+Also this is causing huge regression (time increased by a factor of 5 or
+so) when untarring archive with lots of small files on some eMMC storage
+cards.
+
+Fix the problem by making sure we try goal group first.
+
+Fixes: 196e402adf2e ("ext4: improve cr 0 / cr 1 group scanning")
+CC: stable@kernel.org
+Reported-and-tested-by: Stefan Wahren <stefan.wahren@i2se.com>
+Tested-by: Ojaswin Mujoo <ojaswin@linux.ibm.com>
+Reviewed-by: Ritesh Harjani (IBM) <ritesh.list@gmail.com>
+Link: https://lore.kernel.org/all/20220727105123.ckwrhbilzrxqpt24@quack3/
+Link: https://lore.kernel.org/all/0d81a7c2-46b7-6010-62a4-3e6cfc1628d6@i2se.com/
+Signed-off-by: Jan Kara <jack@suse.cz>
+Link: https://lore.kernel.org/r/20220908092136.11770-1-jack@suse.cz
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/ext4/mballoc.c | 14 +++++++-------
+ 1 file changed, 7 insertions(+), 7 deletions(-)
+
+--- a/fs/ext4/mballoc.c
++++ b/fs/ext4/mballoc.c
+@@ -1049,8 +1049,10 @@ static void ext4_mb_choose_next_group(st
+ {
+ *new_cr = ac->ac_criteria;
+
+- if (!should_optimize_scan(ac) || ac->ac_groups_linear_remaining)
++ if (!should_optimize_scan(ac) || ac->ac_groups_linear_remaining) {
++ *group = next_linear_group(ac, *group, ngroups);
+ return;
++ }
+
+ if (*new_cr == 0) {
+ ext4_mb_choose_next_group_cr0(ac, new_cr, group, ngroups);
+@@ -2630,7 +2632,7 @@ static noinline_for_stack int
+ ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
+ {
+ ext4_group_t prefetch_grp = 0, ngroups, group, i;
+- int cr = -1;
++ int cr = -1, new_cr;
+ int err = 0, first_err = 0;
+ unsigned int nr = 0, prefetch_ios = 0;
+ struct ext4_sb_info *sbi;
+@@ -2705,13 +2707,11 @@ repeat:
+ ac->ac_groups_linear_remaining = sbi->s_mb_max_linear_groups;
+ prefetch_grp = group;
+
+- for (i = 0; i < ngroups; group = next_linear_group(ac, group, ngroups),
+- i++) {
+- int ret = 0, new_cr;
++ for (i = 0, new_cr = cr; i < ngroups; i++,
++ ext4_mb_choose_next_group(ac, &new_cr, &group, ngroups)) {
++ int ret = 0;
+
+ cond_resched();
+-
+- ext4_mb_choose_next_group(ac, &new_cr, &group, ngroups);
+ if (new_cr != cr) {
+ cr = new_cr;
+ goto repeat;
certs-make-system-keyring-depend-on-built-in-x509-pa.patch
makefile.debug-set-g-unconditional-on-config_debug_i.patch
makefile.debug-re-enable-debug-info-for-.s-files.patch
+devdax-fix-soft-reservation-memory-description.patch
+ext4-fix-bug-in-extents-parsing-when-eh_entries-0-and-eh_depth-0.patch
+ext4-limit-the-number-of-retries-after-discarding-preallocations-blocks.patch
+ext4-make-mballoc-try-target-group-first-even-with-mb_optimize_scan.patch