]>
Commit | Line | Data |
---|---|---|
d2849b99 GKH |
1 | From e6a9467ea14bae8691b0f72c500510c42ea8edb8 Mon Sep 17 00:00:00 2001 |
2 | From: "Darrick J. Wong" <darrick.wong@oracle.com> | |
3 | Date: Thu, 28 Mar 2019 20:43:38 -0700 | |
4 | Subject: ocfs2: fix inode bh swapping mixup in ocfs2_reflink_inodes_lock | |
5 | ||
6 | From: Darrick J. Wong <darrick.wong@oracle.com> | |
7 | ||
8 | commit e6a9467ea14bae8691b0f72c500510c42ea8edb8 upstream. | |
9 | ||
10 | ocfs2_reflink_inodes_lock() can swap the inode1/inode2 variables so that | |
11 | we always grab cluster locks in order of increasing inode number. | |
12 | ||
13 | Unfortunately, we forget to swap the inode record buffer head pointers | |
14 | when we've done this, which leads to incorrect bookkeepping when we're | |
15 | trying to make the two inodes have the same refcount tree. | |
16 | ||
17 | This has the effect of causing filesystem shutdowns if you're trying to | |
18 | reflink data from inode 100 into inode 97, where inode 100 already has a | |
19 | refcount tree attached and inode 97 doesn't. The reflink code decides | |
20 | to copy the refcount tree pointer from 100 to 97, but uses inode 97's | |
21 | inode record to open the tree root (which it doesn't have) and blows up. | |
22 | This issue causes filesystem shutdowns and metadata corruption! | |
23 | ||
24 | Link: http://lkml.kernel.org/r/20190312214910.GK20533@magnolia | |
25 | Fixes: 29ac8e856cb369 ("ocfs2: implement the VFS clone_range, copy_range, and dedupe_range features") | |
26 | Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> | |
27 | Reviewed-by: Joseph Qi <jiangqi903@gmail.com> | |
28 | Cc: Mark Fasheh <mfasheh@versity.com> | |
29 | Cc: Joel Becker <jlbec@evilplan.org> | |
30 | Cc: Junxiao Bi <junxiao.bi@oracle.com> | |
31 | Cc: Joseph Qi <joseph.qi@huawei.com> | |
32 | Cc: <stable@vger.kernel.org> | |
33 | Signed-off-by: Andrew Morton <akpm@linux-foundation.org> | |
34 | Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> | |
35 | Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> | |
36 | ||
37 | --- | |
38 | fs/ocfs2/refcounttree.c | 42 ++++++++++++++++++++++++------------------ | |
39 | 1 file changed, 24 insertions(+), 18 deletions(-) | |
40 | ||
41 | --- a/fs/ocfs2/refcounttree.c | |
42 | +++ b/fs/ocfs2/refcounttree.c | |
43 | @@ -4716,22 +4716,23 @@ out: | |
44 | ||
45 | /* Lock an inode and grab a bh pointing to the inode. */ | |
46 | static int ocfs2_reflink_inodes_lock(struct inode *s_inode, | |
47 | - struct buffer_head **bh1, | |
48 | + struct buffer_head **bh_s, | |
49 | struct inode *t_inode, | |
50 | - struct buffer_head **bh2) | |
51 | + struct buffer_head **bh_t) | |
52 | { | |
53 | - struct inode *inode1; | |
54 | - struct inode *inode2; | |
55 | + struct inode *inode1 = s_inode; | |
56 | + struct inode *inode2 = t_inode; | |
57 | struct ocfs2_inode_info *oi1; | |
58 | struct ocfs2_inode_info *oi2; | |
59 | + struct buffer_head *bh1 = NULL; | |
60 | + struct buffer_head *bh2 = NULL; | |
61 | bool same_inode = (s_inode == t_inode); | |
62 | + bool need_swap = (inode1->i_ino > inode2->i_ino); | |
63 | int status; | |
64 | ||
65 | /* First grab the VFS and rw locks. */ | |
66 | lock_two_nondirectories(s_inode, t_inode); | |
67 | - inode1 = s_inode; | |
68 | - inode2 = t_inode; | |
69 | - if (inode1->i_ino > inode2->i_ino) | |
70 | + if (need_swap) | |
71 | swap(inode1, inode2); | |
72 | ||
73 | status = ocfs2_rw_lock(inode1, 1); | |
74 | @@ -4754,17 +4755,13 @@ static int ocfs2_reflink_inodes_lock(str | |
75 | trace_ocfs2_double_lock((unsigned long long)oi1->ip_blkno, | |
76 | (unsigned long long)oi2->ip_blkno); | |
77 | ||
78 | - if (*bh1) | |
79 | - *bh1 = NULL; | |
80 | - if (*bh2) | |
81 | - *bh2 = NULL; | |
82 | - | |
83 | /* We always want to lock the one with the lower lockid first. */ | |
84 | if (oi1->ip_blkno > oi2->ip_blkno) | |
85 | mlog_errno(-ENOLCK); | |
86 | ||
87 | /* lock id1 */ | |
88 | - status = ocfs2_inode_lock_nested(inode1, bh1, 1, OI_LS_REFLINK_TARGET); | |
89 | + status = ocfs2_inode_lock_nested(inode1, &bh1, 1, | |
90 | + OI_LS_REFLINK_TARGET); | |
91 | if (status < 0) { | |
92 | if (status != -ENOENT) | |
93 | mlog_errno(status); | |
94 | @@ -4773,15 +4770,25 @@ static int ocfs2_reflink_inodes_lock(str | |
95 | ||
96 | /* lock id2 */ | |
97 | if (!same_inode) { | |
98 | - status = ocfs2_inode_lock_nested(inode2, bh2, 1, | |
99 | + status = ocfs2_inode_lock_nested(inode2, &bh2, 1, | |
100 | OI_LS_REFLINK_TARGET); | |
101 | if (status < 0) { | |
102 | if (status != -ENOENT) | |
103 | mlog_errno(status); | |
104 | goto out_cl1; | |
105 | } | |
106 | - } else | |
107 | - *bh2 = *bh1; | |
108 | + } else { | |
109 | + bh2 = bh1; | |
110 | + } | |
111 | + | |
112 | + /* | |
113 | + * If we swapped inode order above, we have to swap the buffer heads | |
114 | + * before passing them back to the caller. | |
115 | + */ | |
116 | + if (need_swap) | |
117 | + swap(bh1, bh2); | |
118 | + *bh_s = bh1; | |
119 | + *bh_t = bh2; | |
120 | ||
121 | trace_ocfs2_double_lock_end( | |
122 | (unsigned long long)oi1->ip_blkno, | |
123 | @@ -4791,8 +4798,7 @@ static int ocfs2_reflink_inodes_lock(str | |
124 | ||
125 | out_cl1: | |
126 | ocfs2_inode_unlock(inode1, 1); | |
127 | - brelse(*bh1); | |
128 | - *bh1 = NULL; | |
129 | + brelse(bh1); | |
130 | out_rw2: | |
131 | ocfs2_rw_unlock(inode2, 1); | |
132 | out_i2: |