]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/blob
0c9fb31120ba486e2aafb0b26fc473e099ac8962
[thirdparty/kernel/stable-queue.git] /
1 From 61187fce8600e8ef90e601be84f9d0f3222c1206 Mon Sep 17 00:00:00 2001
2 From: Zhihao Cheng <chengzhihao1@huawei.com>
3 Date: Tue, 19 Sep 2023 09:25:25 +0800
4 Subject: jbd2: fix potential data lost in recovering journal raced with synchronizing fs bdev
5
6 From: Zhihao Cheng <chengzhihao1@huawei.com>
7
8 commit 61187fce8600e8ef90e601be84f9d0f3222c1206 upstream.
9
10 JBD2 makes sure journal data is fallen on fs device by sync_blockdev(),
11 however, other process could intercept the EIO information from bdev's
12 mapping, which leads journal recovering successful even EIO occurs during
13 data written back to fs device.
14
15 We found this problem in our product, iscsi + multipath is chosen for block
16 device of ext4. Unstable network may trigger kpartx to rescan partitions in
17 device mapper layer. Detailed process is shown as following:
18
19 mount kpartx irq
20 jbd2_journal_recover
21 do_one_pass
22 memcpy(nbh->b_data, obh->b_data) // copy data to fs dev from journal
23 mark_buffer_dirty // mark bh dirty
24 vfs_read
25 generic_file_read_iter // dio
26 filemap_write_and_wait_range
27 __filemap_fdatawrite_range
28 do_writepages
29 block_write_full_folio
30 submit_bh_wbc
31 >> EIO occurs in disk <<
32 end_buffer_async_write
33 mark_buffer_write_io_error
34 mapping_set_error
35 set_bit(AS_EIO, &mapping->flags) // set!
36 filemap_check_errors
37 test_and_clear_bit(AS_EIO, &mapping->flags) // clear!
38 err2 = sync_blockdev
39 filemap_write_and_wait
40 filemap_check_errors
41 test_and_clear_bit(AS_EIO, &mapping->flags) // false
42 err2 = 0
43
44 Filesystem is mounted successfully even data from journal is failed written
45 into disk, and ext4/ocfs2 could become corrupted.
46
47 Fix it by comparing the wb_err state in fs block device before recovering
48 and after recovering.
49
50 A reproducer can be found in the kernel bugzilla referenced below.
51
52 Link: https://bugzilla.kernel.org/show_bug.cgi?id=217888
53 Cc: stable@vger.kernel.org
54 Signed-off-by: Zhihao Cheng <chengzhihao1@huawei.com>
55 Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
56 Reviewed-by: Jan Kara <jack@suse.cz>
57 Link: https://lore.kernel.org/r/20230919012525.1783108-1-chengzhihao1@huawei.com
58 Signed-off-by: Theodore Ts'o <tytso@mit.edu>
59 Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
60 ---
61 fs/jbd2/recovery.c | 8 ++++++++
62 1 file changed, 8 insertions(+)
63
64 --- a/fs/jbd2/recovery.c
65 +++ b/fs/jbd2/recovery.c
66 @@ -250,6 +250,8 @@ int jbd2_journal_recover(journal_t *jour
67 journal_superblock_t * sb;
68
69 struct recovery_info info;
70 + errseq_t wb_err;
71 + struct address_space *mapping;
72
73 memset(&info, 0, sizeof(info));
74 sb = journal->j_superblock;
75 @@ -267,6 +269,9 @@ int jbd2_journal_recover(journal_t *jour
76 return 0;
77 }
78
79 + wb_err = 0;
80 + mapping = journal->j_fs_dev->bd_inode->i_mapping;
81 + errseq_check_and_advance(&mapping->wb_err, &wb_err);
82 err = do_one_pass(journal, &info, PASS_SCAN);
83 if (!err)
84 err = do_one_pass(journal, &info, PASS_REVOKE);
85 @@ -287,6 +292,9 @@ int jbd2_journal_recover(journal_t *jour
86 err2 = sync_blockdev(journal->j_fs_dev);
87 if (!err)
88 err = err2;
89 + err2 = errseq_check_and_advance(&mapping->wb_err, &wb_err);
90 + if (!err)
91 + err = err2;
92 /* Make sure all replayed data is on permanent storage */
93 if (journal->j_flags & JBD2_BARRIER) {
94 err2 = blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL);