From 696f94896fa279af4d5080436dba21de9404c7b4 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 26 Sep 2022 08:34:55 +0200 Subject: [PATCH] 5.10-stable patches added patches: devdax-fix-soft-reservation-memory-description.patch ext4-fix-bug-in-extents-parsing-when-eh_entries-0-and-eh_depth-0.patch ext4-limit-the-number-of-retries-after-discarding-preallocations-blocks.patch --- ...-soft-reservation-memory-description.patch | 58 +++++++++++++ ...ing-when-eh_entries-0-and-eh_depth-0.patch | 85 +++++++++++++++++++ ...ter-discarding-preallocations-blocks.patch | 74 ++++++++++++++++ queue-5.10/series | 3 + 4 files changed, 220 insertions(+) create mode 100644 queue-5.10/devdax-fix-soft-reservation-memory-description.patch create mode 100644 queue-5.10/ext4-fix-bug-in-extents-parsing-when-eh_entries-0-and-eh_depth-0.patch create mode 100644 queue-5.10/ext4-limit-the-number-of-retries-after-discarding-preallocations-blocks.patch diff --git a/queue-5.10/devdax-fix-soft-reservation-memory-description.patch b/queue-5.10/devdax-fix-soft-reservation-memory-description.patch new file mode 100644 index 00000000000..ca5964fdcd3 --- /dev/null +++ b/queue-5.10/devdax-fix-soft-reservation-memory-description.patch @@ -0,0 +1,58 @@ +From 67feaba413ec68daf4124e9870878899b4ed9a0e Mon Sep 17 00:00:00 2001 +From: Dan Williams +Date: Fri, 23 Sep 2022 15:05:56 -0700 +Subject: devdax: Fix soft-reservation memory description + +From: Dan Williams + +commit 67feaba413ec68daf4124e9870878899b4ed9a0e upstream. + +The "hmem" platform-devices that are created to represent the +platform-advertised "Soft Reserved" memory ranges end up inserting a +resource that causes the iomem_resource tree to look like this: + +340000000-43fffffff : hmem.0 + 340000000-43fffffff : Soft Reserved + 340000000-43fffffff : dax0.0 + +This is because insert_resource() reparents ranges when they completely +intersect an existing range. + +This matters because code that uses region_intersects() to scan for a +given IORES_DESC will only check that top-level 'hmem.0' resource and +not the 'Soft Reserved' descendant. + +So, to support EINJ (via einj_error_inject()) to inject errors into +memory hosted by a dax-device, be sure to describe the memory as +IORES_DESC_SOFT_RESERVED. This is a follow-on to: + +commit b13a3e5fd40b ("ACPI: APEI: Fix _EINJ vs EFI_MEMORY_SP") + +...that fixed EINJ support for "Soft Reserved" ranges in the first +instance. + +Fixes: 262b45ae3ab4 ("x86/efi: EFI soft reservation to E820 enumeration") +Reported-by: Ricardo Sandoval Torres +Tested-by: Ricardo Sandoval Torres +Cc: +Cc: Tony Luck +Cc: Omar Avelar +Cc: Rafael J. Wysocki +Cc: Mark Gross +Link: https://lore.kernel.org/r/166397075670.389916.7435722208896316387.stgit@dwillia2-xfh.jf.intel.com +Signed-off-by: Dan Williams +Signed-off-by: Greg Kroah-Hartman +--- + drivers/dax/hmem/device.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/drivers/dax/hmem/device.c ++++ b/drivers/dax/hmem/device.c +@@ -15,6 +15,7 @@ void hmem_register_device(int target_nid + .start = r->start, + .end = r->end, + .flags = IORESOURCE_MEM, ++ .desc = IORES_DESC_SOFT_RESERVED, + }; + struct platform_device *pdev; + struct memregion_info info; diff --git a/queue-5.10/ext4-fix-bug-in-extents-parsing-when-eh_entries-0-and-eh_depth-0.patch b/queue-5.10/ext4-fix-bug-in-extents-parsing-when-eh_entries-0-and-eh_depth-0.patch new file mode 100644 index 00000000000..1527f7c5e9d --- /dev/null +++ b/queue-5.10/ext4-fix-bug-in-extents-parsing-when-eh_entries-0-and-eh_depth-0.patch @@ -0,0 +1,85 @@ +From 29a5b8a137ac8eb410cc823653a29ac0e7b7e1b0 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Lu=C3=ADs=20Henriques?= +Date: Mon, 22 Aug 2022 10:42:35 +0100 +Subject: ext4: fix bug in extents parsing when eh_entries == 0 and eh_depth > 0 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Luís Henriques + +commit 29a5b8a137ac8eb410cc823653a29ac0e7b7e1b0 upstream. + +When walking through an inode extents, the ext4_ext_binsearch_idx() function +assumes that the extent header has been previously validated. However, there +are no checks that verify that the number of entries (eh->eh_entries) is +non-zero when depth is > 0. And this will lead to problems because the +EXT_FIRST_INDEX() and EXT_LAST_INDEX() will return garbage and result in this: + +[ 135.245946] ------------[ cut here ]------------ +[ 135.247579] kernel BUG at fs/ext4/extents.c:2258! +[ 135.249045] invalid opcode: 0000 [#1] PREEMPT SMP +[ 135.250320] CPU: 2 PID: 238 Comm: tmp118 Not tainted 5.19.0-rc8+ #4 +[ 135.252067] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.15.0-0-g2dd4b9b-rebuilt.opensuse.org 04/01/2014 +[ 135.255065] RIP: 0010:ext4_ext_map_blocks+0xc20/0xcb0 +[ 135.256475] Code: +[ 135.261433] RSP: 0018:ffffc900005939f8 EFLAGS: 00010246 +[ 135.262847] RAX: 0000000000000024 RBX: ffffc90000593b70 RCX: 0000000000000023 +[ 135.264765] RDX: ffff8880038e5f10 RSI: 0000000000000003 RDI: ffff8880046e922c +[ 135.266670] RBP: ffff8880046e9348 R08: 0000000000000001 R09: ffff888002ca580c +[ 135.268576] R10: 0000000000002602 R11: 0000000000000000 R12: 0000000000000024 +[ 135.270477] R13: 0000000000000000 R14: 0000000000000024 R15: 0000000000000000 +[ 135.272394] FS: 00007fdabdc56740(0000) GS:ffff88807dd00000(0000) knlGS:0000000000000000 +[ 135.274510] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +[ 135.276075] CR2: 00007ffc26bd4f00 CR3: 0000000006261004 CR4: 0000000000170ea0 +[ 135.277952] Call Trace: +[ 135.278635] +[ 135.279247] ? preempt_count_add+0x6d/0xa0 +[ 135.280358] ? percpu_counter_add_batch+0x55/0xb0 +[ 135.281612] ? _raw_read_unlock+0x18/0x30 +[ 135.282704] ext4_map_blocks+0x294/0x5a0 +[ 135.283745] ? xa_load+0x6f/0xa0 +[ 135.284562] ext4_mpage_readpages+0x3d6/0x770 +[ 135.285646] read_pages+0x67/0x1d0 +[ 135.286492] ? folio_add_lru+0x51/0x80 +[ 135.287441] page_cache_ra_unbounded+0x124/0x170 +[ 135.288510] filemap_get_pages+0x23d/0x5a0 +[ 135.289457] ? path_openat+0xa72/0xdd0 +[ 135.290332] filemap_read+0xbf/0x300 +[ 135.291158] ? _raw_spin_lock_irqsave+0x17/0x40 +[ 135.292192] new_sync_read+0x103/0x170 +[ 135.293014] vfs_read+0x15d/0x180 +[ 135.293745] ksys_read+0xa1/0xe0 +[ 135.294461] do_syscall_64+0x3c/0x80 +[ 135.295284] entry_SYSCALL_64_after_hwframe+0x46/0xb0 + +This patch simply adds an extra check in __ext4_ext_check(), verifying that +eh_entries is not 0 when eh_depth is > 0. + +Link: https://bugzilla.kernel.org/show_bug.cgi?id=215941 +Link: https://bugzilla.kernel.org/show_bug.cgi?id=216283 +Cc: Baokun Li +Cc: stable@kernel.org +Signed-off-by: Luís Henriques +Reviewed-by: Jan Kara +Reviewed-by: Baokun Li +Link: https://lore.kernel.org/r/20220822094235.2690-1-lhenriques@suse.de +Signed-off-by: Theodore Ts'o +Signed-off-by: Greg Kroah-Hartman +--- + fs/ext4/extents.c | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/fs/ext4/extents.c ++++ b/fs/ext4/extents.c +@@ -459,6 +459,10 @@ static int __ext4_ext_check(const char * + error_msg = "invalid eh_entries"; + goto corrupted; + } ++ if (unlikely((eh->eh_entries == 0) && (depth > 0))) { ++ error_msg = "eh_entries is 0 but eh_depth is > 0"; ++ goto corrupted; ++ } + if (!ext4_valid_extent_entries(inode, eh, lblk, &pblk, depth)) { + error_msg = "invalid extent entries"; + goto corrupted; diff --git a/queue-5.10/ext4-limit-the-number-of-retries-after-discarding-preallocations-blocks.patch b/queue-5.10/ext4-limit-the-number-of-retries-after-discarding-preallocations-blocks.patch new file mode 100644 index 00000000000..f206c53b722 --- /dev/null +++ b/queue-5.10/ext4-limit-the-number-of-retries-after-discarding-preallocations-blocks.patch @@ -0,0 +1,74 @@ +From 80fa46d6b9e7b1527bfd2197d75431fd9c382161 Mon Sep 17 00:00:00 2001 +From: Theodore Ts'o +Date: Thu, 1 Sep 2022 18:03:14 -0400 +Subject: ext4: limit the number of retries after discarding preallocations blocks + +From: Theodore Ts'o + +commit 80fa46d6b9e7b1527bfd2197d75431fd9c382161 upstream. + +This patch avoids threads live-locking for hours when a large number +threads are competing over the last few free extents as they blocks +getting added and removed from preallocation pools. From our bug +reporter: + + A reliable way for triggering this has multiple writers + continuously write() to files when the filesystem is full, while + small amounts of space are freed (e.g. by truncating a large file + -1MiB at a time). In the local filesystem, this can be done by + simply not checking the return code of write (0) and/or the error + (ENOSPACE) that is set. Over NFS with an async mount, even clients + with proper error checking will behave this way since the linux NFS + client implementation will not propagate the server errors [the + write syscalls immediately return success] until the file handle is + closed. This leads to a situation where NFS clients send a + continuous stream of WRITE rpcs which result in ERRNOSPACE -- but + since the client isn't seeing this, the stream of writes continues + at maximum network speed. + + When some space does appear, multiple writers will all attempt to + claim it for their current write. For NFS, we may see dozens to + hundreds of threads that do this. + + The real-world scenario of this is database backup tooling (in + particular, github.com/mdkent/percona-xtrabackup) which may write + large files (>1TiB) to NFS for safe keeping. Some temporary files + are written, rewound, and read back -- all before closing the file + handle (the temp file is actually unlinked, to trigger automatic + deletion on close/crash.) An application like this operating on an + async NFS mount will not see an error code until TiB have been + written/read. + + The lockup was observed when running this database backup on large + filesystems (64 TiB in this case) with a high number of block + groups and no free space. Fragmentation is generally not a factor + in this filesystem (~thousands of large files, mostly contiguous + except for the parts written while the filesystem is at capacity.) + +Signed-off-by: Theodore Ts'o +Cc: stable@kernel.org +Signed-off-by: Greg Kroah-Hartman +--- + fs/ext4/mballoc.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/fs/ext4/mballoc.c ++++ b/fs/ext4/mballoc.c +@@ -4959,6 +4959,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t + ext4_fsblk_t block = 0; + unsigned int inquota = 0; + unsigned int reserv_clstrs = 0; ++ int retries = 0; + u64 seq; + + might_sleep(); +@@ -5061,7 +5062,8 @@ repeat: + ar->len = ac->ac_b_ex.fe_len; + } + } else { +- if (ext4_mb_discard_preallocations_should_retry(sb, ac, &seq)) ++ if (++retries < 3 && ++ ext4_mb_discard_preallocations_should_retry(sb, ac, &seq)) + goto repeat; + /* + * If block allocation fails then the pa allocated above diff --git a/queue-5.10/series b/queue-5.10/series index 60ee4befa84..75e8a3b8444 100644 --- a/queue-5.10/series +++ b/queue-5.10/series @@ -135,3 +135,6 @@ i2c-imx-if-pm_runtime_get_sync-returned-1-device-acc.patch i2c-mlxbf-incorrect-base-address-passed-during-io-wr.patch i2c-mlxbf-prevent-stack-overflow-in-mlxbf_i2c_smbus_.patch i2c-mlxbf-fix-frequency-calculation.patch +devdax-fix-soft-reservation-memory-description.patch +ext4-fix-bug-in-extents-parsing-when-eh_entries-0-and-eh_depth-0.patch +ext4-limit-the-number-of-retries-after-discarding-preallocations-blocks.patch -- 2.47.3