From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Thu, 23 Nov 2023 12:21:34 +0000 (+0000)
Subject: 4.14-stable patches
X-Git-Tag: v4.14.331~89
X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=2633ce4c5993042214eaf9d9b317afecfc9565c3;p=thirdparty%2Fkernel%2Fstable-queue.git

4.14-stable patches

added patches:
	jbd2-fix-potential-data-lost-in-recovering-journal-raced-with-synchronizing-fs-bdev.patch
	mcb-fix-error-handling-for-different-scenarios-when-parsing.patch
	s390-cmma-fix-handling-of-swapper_pg_dir-and-invalid_pg_dir.patch
	s390-cmma-fix-initial-kernel-address-space-page-table-walk.patch
---

diff --git a/queue-4.14/jbd2-fix-potential-data-lost-in-recovering-journal-raced-with-synchronizing-fs-bdev.patch b/queue-4.14/jbd2-fix-potential-data-lost-in-recovering-journal-raced-with-synchronizing-fs-bdev.patch
new file mode 100644
index 00000000000..0c9fb31120b
--- /dev/null
+++ b/queue-4.14/jbd2-fix-potential-data-lost-in-recovering-journal-raced-with-synchronizing-fs-bdev.patch
@@ -0,0 +1,94 @@
+From 61187fce8600e8ef90e601be84f9d0f3222c1206 Mon Sep 17 00:00:00 2001
+From: Zhihao Cheng <chengzhihao1@huawei.com>
+Date: Tue, 19 Sep 2023 09:25:25 +0800
+Subject: jbd2: fix potential data lost in recovering journal raced with synchronizing fs bdev
+
+From: Zhihao Cheng <chengzhihao1@huawei.com>
+
+commit 61187fce8600e8ef90e601be84f9d0f3222c1206 upstream.
+
+JBD2 makes sure journal data is fallen on fs device by sync_blockdev(),
+however, other process could intercept the EIO information from bdev's
+mapping, which leads journal recovering successful even EIO occurs during
+data written back to fs device.
+
+We found this problem in our product, iscsi + multipath is chosen for block
+device of ext4. Unstable network may trigger kpartx to rescan partitions in
+device mapper layer. Detailed process is shown as following:
+
+  mount          kpartx          irq
+jbd2_journal_recover
+ do_one_pass
+  memcpy(nbh->b_data, obh->b_data) // copy data to fs dev from journal
+  mark_buffer_dirty // mark bh dirty
+         vfs_read
+	  generic_file_read_iter // dio
+	   filemap_write_and_wait_range
+	    __filemap_fdatawrite_range
+	     do_writepages
+	      block_write_full_folio
+	       submit_bh_wbc
+	            >>  EIO occurs in disk  <<
+	                     end_buffer_async_write
+			      mark_buffer_write_io_error
+			       mapping_set_error
+			        set_bit(AS_EIO, &mapping->flags) // set!
+	    filemap_check_errors
+	     test_and_clear_bit(AS_EIO, &mapping->flags) // clear!
+ err2 = sync_blockdev
+  filemap_write_and_wait
+   filemap_check_errors
+    test_and_clear_bit(AS_EIO, &mapping->flags) // false
+ err2 = 0
+
+Filesystem is mounted successfully even data from journal is failed written
+into disk, and ext4/ocfs2 could become corrupted.
+
+Fix it by comparing the wb_err state in fs block device before recovering
+and after recovering.
+
+A reproducer can be found in the kernel bugzilla referenced below.
+
+Link: https://bugzilla.kernel.org/show_bug.cgi?id=217888
+Cc: stable@vger.kernel.org
+Signed-off-by: Zhihao Cheng <chengzhihao1@huawei.com>
+Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Link: https://lore.kernel.org/r/20230919012525.1783108-1-chengzhihao1@huawei.com
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/jbd2/recovery.c |    8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+--- a/fs/jbd2/recovery.c
++++ b/fs/jbd2/recovery.c
+@@ -250,6 +250,8 @@ int jbd2_journal_recover(journal_t *jour
+ 	journal_superblock_t *	sb;
+ 
+ 	struct recovery_info	info;
++	errseq_t		wb_err;
++	struct address_space	*mapping;
+ 
+ 	memset(&info, 0, sizeof(info));
+ 	sb = journal->j_superblock;
+@@ -267,6 +269,9 @@ int jbd2_journal_recover(journal_t *jour
+ 		return 0;
+ 	}
+ 
++	wb_err = 0;
++	mapping = journal->j_fs_dev->bd_inode->i_mapping;
++	errseq_check_and_advance(&mapping->wb_err, &wb_err);
+ 	err = do_one_pass(journal, &info, PASS_SCAN);
+ 	if (!err)
+ 		err = do_one_pass(journal, &info, PASS_REVOKE);
+@@ -287,6 +292,9 @@ int jbd2_journal_recover(journal_t *jour
+ 	err2 = sync_blockdev(journal->j_fs_dev);
+ 	if (!err)
+ 		err = err2;
++	err2 = errseq_check_and_advance(&mapping->wb_err, &wb_err);
++	if (!err)
++		err = err2;
+ 	/* Make sure all replayed data is on permanent storage */
+ 	if (journal->j_flags & JBD2_BARRIER) {
+ 		err2 = blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL);
diff --git a/queue-4.14/mcb-fix-error-handling-for-different-scenarios-when-parsing.patch b/queue-4.14/mcb-fix-error-handling-for-different-scenarios-when-parsing.patch
new file mode 100644
index 00000000000..43e60264f79
--- /dev/null
+++ b/queue-4.14/mcb-fix-error-handling-for-different-scenarios-when-parsing.patch
@@ -0,0 +1,49 @@
+From 63ba2d07b4be72b94216d20561f43e1150b25d98 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Sanju=C3=A1n=20Garc=C3=ADa=2C=20Jorge?=
+ <Jorge.SanjuanGarcia@duagon.com>
+Date: Thu, 19 Oct 2023 14:15:34 +0000
+Subject: mcb: fix error handling for different scenarios when parsing
+
+From: SanjuÃ¡n GarcÃ­a, Jorge <Jorge.SanjuanGarcia@duagon.com>
+
+commit 63ba2d07b4be72b94216d20561f43e1150b25d98 upstream.
+
+chameleon_parse_gdd() may fail for different reasons and end up
+in the err tag. Make sure we at least always free the mcb_device
+allocated with mcb_alloc_dev().
+
+If mcb_device_register() fails, make sure to give up the reference
+in the same place the device was added.
+
+Fixes: 728ac3389296 ("mcb: mcb-parse: fix error handing in chameleon_parse_gdd()")
+Cc: stable <stable@kernel.org>
+Reviewed-by: Jose Javier Rodriguez Barbarin <JoseJavier.Rodriguez@duagon.com>
+Signed-off-by: Jorge Sanjuan Garcia <jorge.sanjuangarcia@duagon.com>
+Link: https://lore.kernel.org/r/20231019141434.57971-2-jorge.sanjuangarcia@duagon.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/mcb/mcb-core.c  |    1 +
+ drivers/mcb/mcb-parse.c |    2 +-
+ 2 files changed, 2 insertions(+), 1 deletion(-)
+
+--- a/drivers/mcb/mcb-core.c
++++ b/drivers/mcb/mcb-core.c
+@@ -251,6 +251,7 @@ int mcb_device_register(struct mcb_bus *
+ 	return 0;
+ 
+ out:
++	put_device(&dev->dev);
+ 
+ 	return ret;
+ }
+--- a/drivers/mcb/mcb-parse.c
++++ b/drivers/mcb/mcb-parse.c
+@@ -105,7 +105,7 @@ static int chameleon_parse_gdd(struct mc
+ 	return 0;
+ 
+ err:
+-	put_device(&mdev->dev);
++	mcb_free_dev(mdev);
+ 
+ 	return ret;
+ }
diff --git a/queue-4.14/s390-cmma-fix-handling-of-swapper_pg_dir-and-invalid_pg_dir.patch b/queue-4.14/s390-cmma-fix-handling-of-swapper_pg_dir-and-invalid_pg_dir.patch
new file mode 100644
index 00000000000..0ab403e9821
--- /dev/null
+++ b/queue-4.14/s390-cmma-fix-handling-of-swapper_pg_dir-and-invalid_pg_dir.patch
@@ -0,0 +1,45 @@
+From 84bb41d5df48868055d159d9247b80927f1f70f9 Mon Sep 17 00:00:00 2001
+From: Heiko Carstens <hca@linux.ibm.com>
+Date: Tue, 24 Oct 2023 10:15:20 +0200
+Subject: s390/cmma: fix handling of swapper_pg_dir and invalid_pg_dir
+
+From: Heiko Carstens <hca@linux.ibm.com>
+
+commit 84bb41d5df48868055d159d9247b80927f1f70f9 upstream.
+
+If the cmma no-dat feature is available the kernel page tables are walked
+to identify and mark all pages which are used for address translation (all
+region, segment, and page tables). In a subsequent loop all other pages are
+marked as "no-dat" pages with the ESSA instruction.
+
+This information is visible to the hypervisor, so that the hypervisor can
+optimize purging of guest TLB entries. All pages used for swapper_pg_dir
+and invalid_pg_dir are incorrectly marked as no-dat, which in turn can
+result in incorrect guest TLB flushes.
+
+Fix this by marking those pages correctly as being used for DAT.
+
+Cc: <stable@vger.kernel.org>
+Reviewed-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
+Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
+Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/s390/mm/page-states.c |    6 ++++++
+ 1 file changed, 6 insertions(+)
+
+--- a/arch/s390/mm/page-states.c
++++ b/arch/s390/mm/page-states.c
+@@ -204,6 +204,12 @@ void __init cmma_init_nodat(void)
+ 		return;
+ 	/* Mark pages used in kernel page tables */
+ 	mark_kernel_pgd();
++	page = virt_to_page(&swapper_pg_dir);
++	for (i = 0; i < 4; i++)
++		set_bit(PG_arch_1, &page[i].flags);
++	page = virt_to_page(&invalid_pg_dir);
++	for (i = 0; i < 4; i++)
++		set_bit(PG_arch_1, &page[i].flags);
+ 
+ 	/* Set all kernel pages not used for page tables to stable/no-dat */
+ 	for_each_memblock(memory, reg) {
diff --git a/queue-4.14/s390-cmma-fix-initial-kernel-address-space-page-table-walk.patch b/queue-4.14/s390-cmma-fix-initial-kernel-address-space-page-table-walk.patch
new file mode 100644
index 00000000000..4881f314214
--- /dev/null
+++ b/queue-4.14/s390-cmma-fix-initial-kernel-address-space-page-table-walk.patch
@@ -0,0 +1,69 @@
+From 16ba44826a04834d3eeeda4b731c2ea3481062b7 Mon Sep 17 00:00:00 2001
+From: Heiko Carstens <hca@linux.ibm.com>
+Date: Tue, 17 Oct 2023 21:07:03 +0200
+Subject: s390/cmma: fix initial kernel address space page table walk
+
+From: Heiko Carstens <hca@linux.ibm.com>
+
+commit 16ba44826a04834d3eeeda4b731c2ea3481062b7 upstream.
+
+If the cmma no-dat feature is available the kernel page tables are walked
+to identify and mark all pages which are used for address translation (all
+region, segment, and page tables). In a subsequent loop all other pages are
+marked as "no-dat" pages with the ESSA instruction.
+
+This information is visible to the hypervisor, so that the hypervisor can
+optimize purging of guest TLB entries. The initial loop however does not
+cover the complete kernel address space. This can result in pages being
+marked as not being used for dynamic address translation, even though they
+are. In turn guest TLB entries incorrectly may not be purged.
+
+Fix this by adjusting the end address of the kernel address range being
+walked.
+
+Cc: <stable@vger.kernel.org>
+Reviewed-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
+Reviewed-by: Alexander Gordeev <agordeev@linux.ibm.com>
+Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
+Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/s390/mm/page-states.c |   13 ++++++++++---
+ 1 file changed, 10 insertions(+), 3 deletions(-)
+
+--- a/arch/s390/mm/page-states.c
++++ b/arch/s390/mm/page-states.c
+@@ -167,15 +167,22 @@ static void mark_kernel_p4d(pgd_t *pgd,
+ 
+ static void mark_kernel_pgd(void)
+ {
+-	unsigned long addr, next;
++	unsigned long addr, next, max_addr;
+ 	struct page *page;
+ 	pgd_t *pgd;
+ 	int i;
+ 
+ 	addr = 0;
++	/*
++	 * Figure out maximum virtual address accessible with the
++	 * kernel ASCE. This is required to keep the page table walker
++	 * from accessing non-existent entries.
++	 */
++	max_addr = (S390_lowcore.kernel_asce.val & _ASCE_TYPE_MASK) >> 2;
++	max_addr = 1UL << (max_addr * 11 + 31);
+ 	pgd = pgd_offset_k(addr);
+ 	do {
+-		next = pgd_addr_end(addr, MODULES_END);
++		next = pgd_addr_end(addr, max_addr);
+ 		if (pgd_none(*pgd))
+ 			continue;
+ 		if (!pgd_folded(*pgd)) {
+@@ -184,7 +191,7 @@ static void mark_kernel_pgd(void)
+ 				set_bit(PG_arch_1, &page[i].flags);
+ 		}
+ 		mark_kernel_p4d(pgd, addr, next);
+-	} while (pgd++, addr = next, addr != MODULES_END);
++	} while (pgd++, addr = next, addr != max_addr);
+ }
+ 
+ void __init cmma_init_nodat(void)
diff --git a/queue-4.14/series b/queue-4.14/series
index 3f1e7fcf731..cd4140679b3 100644
--- a/queue-4.14/series
+++ b/queue-4.14/series
@@ -37,3 +37,7 @@ pm-hibernate-use-__get_safe_page-rather-than-touching-the-list.patch
 pm-hibernate-clean-up-sync_read-handling-in-snapshot_write_next.patch
 mmc-meson-gx-remove-setting-of-cmd_cfg_error.patch
 genirq-generic_chip-make-irq_remove_generic_chip-irqdomain-aware.patch
+jbd2-fix-potential-data-lost-in-recovering-journal-raced-with-synchronizing-fs-bdev.patch
+mcb-fix-error-handling-for-different-scenarios-when-parsing.patch
+s390-cmma-fix-initial-kernel-address-space-page-table-walk.patch
+s390-cmma-fix-handling-of-swapper_pg_dir-and-invalid_pg_dir.patch