]>
Commit | Line | Data |
---|---|---|
82094b55 AF |
1 | From 6b791bcc8b2ae21daf95d18cff2f1eca7a64c9a5 Mon Sep 17 00:00:00 2001 |
2 | From: Tao Ma <tao.ma@oracle.com> | |
3 | Date: Fri, 12 Jun 2009 14:18:36 +0800 | |
4 | Subject: [PATCH] ocfs2: Adjust rightmost path in ocfs2_add_branch. | |
5 | References: bnc#528427 | |
6 | ||
7 | In ocfs2_add_branch, we use the rightmost rec of the leaf extent block | |
8 | to generate the e_cpos for the newly added branch. In the most case, it | |
9 | is OK but if the parent extent block's rightmost rec covers more clusters | |
10 | than the leaf does, it will cause kernel panic if we insert some clusters | |
11 | in it. The message is something like: | |
12 | (7445,1):ocfs2_insert_at_leaf:3775 ERROR: bug expression: | |
13 | le16_to_cpu(el->l_next_free_rec) >= le16_to_cpu(el->l_count) | |
14 | (7445,1):ocfs2_insert_at_leaf:3775 ERROR: inode 66053, depth 0, count 28, | |
15 | next free 28, rec.cpos 270, rec.clusters 1, insert.cpos 275, insert.clusters 1 | |
16 | [<fa7ad565>] ? ocfs2_do_insert_extent+0xb58/0xda0 [ocfs2] | |
17 | [<fa7b08f2>] ? ocfs2_insert_extent+0x5bd/0x6ba [ocfs2] | |
18 | [<fa7b1b8b>] ? ocfs2_add_clusters_in_btree+0x37f/0x564 [ocfs2] | |
19 | ... | |
20 | ||
21 | The panic can be easily reproduced by the following small test case | |
22 | (with bs=512, cs=4K, and I remove all the error handling so that it looks | |
23 | clear enough for reading). | |
24 | ||
25 | int main(int argc, char **argv) | |
26 | { | |
27 | int fd, i; | |
28 | char buf[5] = "test"; | |
29 | ||
30 | fd = open(argv[1], O_RDWR|O_CREAT); | |
31 | ||
32 | for (i = 0; i < 30; i++) { | |
33 | lseek(fd, 40960 * i, SEEK_SET); | |
34 | write(fd, buf, 5); | |
35 | } | |
36 | ||
37 | ftruncate(fd, 1146880); | |
38 | ||
39 | lseek(fd, 1126400, SEEK_SET); | |
40 | write(fd, buf, 5); | |
41 | ||
42 | close(fd); | |
43 | ||
44 | return 0; | |
45 | } | |
46 | ||
47 | The reason of the panic is that: | |
48 | the 30 writes and the ftruncate makes the file's extent list looks like: | |
49 | ||
50 | Tree Depth: 1 Count: 19 Next Free Rec: 1 | |
51 | ## Offset Clusters Block# | |
52 | 0 0 280 86183 | |
53 | SubAlloc Bit: 7 SubAlloc Slot: 0 | |
54 | Blknum: 86183 Next Leaf: 0 | |
55 | CRC32: 00000000 ECC: 0000 | |
56 | Tree Depth: 0 Count: 28 Next Free Rec: 28 | |
57 | ## Offset Clusters Block# Flags | |
58 | 0 0 1 143368 0x0 | |
59 | 1 10 1 143376 0x0 | |
60 | ... | |
61 | 26 260 1 143576 0x0 | |
62 | 27 270 1 143584 0x0 | |
63 | ||
64 | Now another write at 1126400(275 cluster) whiich will write at the gap | |
65 | between 271 and 280 will trigger ocfs2_add_branch, but the result after | |
66 | the function looks like: | |
67 | Tree Depth: 1 Count: 19 Next Free Rec: 2 | |
68 | ## Offset Clusters Block# | |
69 | 0 0 280 86183 | |
70 | 1 271 0 143592 | |
71 | So the extent record is intersected and make the following operation bug out. | |
72 | ||
73 | This patch just try to remove the gap before we add the new branch, so that | |
74 | the root(branch) rightmost rec will cover the same right position. So in the | |
75 | above case, before adding branch the tree will be changed to | |
76 | Tree Depth: 1 Count: 19 Next Free Rec: 1 | |
77 | ## Offset Clusters Block# | |
78 | 0 0 271 86183 | |
79 | SubAlloc Bit: 7 SubAlloc Slot: 0 | |
80 | Blknum: 86183 Next Leaf: 0 | |
81 | CRC32: 00000000 ECC: 0000 | |
82 | Tree Depth: 0 Count: 28 Next Free Rec: 28 | |
83 | ## Offset Clusters Block# Flags | |
84 | 0 0 1 143368 0x0 | |
85 | 1 10 1 143376 0x0 | |
86 | ... | |
87 | 26 260 1 143576 0x0 | |
88 | 27 270 1 143584 0x0 | |
89 | And after branch add, the tree looks like | |
90 | Tree Depth: 1 Count: 19 Next Free Rec: 2 | |
91 | ## Offset Clusters Block# | |
92 | 0 0 271 86183 | |
93 | 1 271 0 143592 | |
94 | ||
95 | Signed-off-by: Tao Ma <tao.ma@oracle.com> | |
96 | Acked-by: Mark Fasheh <mfasheh@suse.com> | |
97 | Signed-off-by: Joel Becker <joel.becker@oracle.com> | |
98 | Signed-off-by: Mark Fasheh <mfasheh@suse.com> | |
99 | ||
100 | --- | |
101 | fs/ocfs2/alloc.c | 80 +++++++++++++++++++++++++++++++++++++++++++++++++++--- | |
102 | 1 files changed, 76 insertions(+), 4 deletions(-) | |
103 | ||
104 | Index: linux-2.6.27-SLE11_BRANCH/fs/ocfs2/alloc.c | |
105 | =================================================================== | |
106 | --- linux-2.6.27-SLE11_BRANCH.orig/fs/ocfs2/alloc.c | |
107 | +++ linux-2.6.27-SLE11_BRANCH/fs/ocfs2/alloc.c | |
108 | @@ -414,6 +414,12 @@ struct ocfs2_path { | |
109 | #define path_leaf_el(_path) ((_path)->p_node[(_path)->p_tree_depth].el) | |
110 | #define path_num_items(_path) ((_path)->p_tree_depth + 1) | |
111 | ||
112 | +static int ocfs2_find_path(struct inode *inode, struct ocfs2_path *path, | |
113 | + u32 cpos); | |
114 | +static void ocfs2_adjust_rightmost_records(struct inode *inode, | |
115 | + handle_t *handle, | |
116 | + struct ocfs2_path *path, | |
117 | + struct ocfs2_extent_rec *insert_rec); | |
118 | /* | |
119 | * Reset the actual path elements so that we can re-use the structure | |
120 | * to build another path. Generally, this involves freeing the buffer | |
121 | @@ -540,6 +546,11 @@ static struct ocfs2_path *ocfs2_new_path | |
122 | return path; | |
123 | } | |
124 | ||
125 | +static struct ocfs2_path *ocfs2_new_path_from_et(struct ocfs2_extent_tree *et) | |
126 | +{ | |
127 | + return ocfs2_new_path(et->et_root_bh, et->et_root_el); | |
128 | +} | |
129 | + | |
130 | /* | |
131 | * Convenience function to journal all components in a path. | |
132 | */ | |
133 | @@ -838,6 +849,54 @@ static inline u32 ocfs2_sum_rightmost_re | |
134 | } | |
135 | ||
136 | /* | |
137 | + * Change range of the branches in the right most path according to the leaf | |
138 | + * extent block's rightmost record. | |
139 | + */ | |
140 | +static int ocfs2_adjust_rightmost_branch(handle_t *handle, | |
141 | + struct inode *inode, | |
142 | + struct ocfs2_extent_tree *et) | |
143 | +{ | |
144 | + int status; | |
145 | + struct ocfs2_path *path = NULL; | |
146 | + struct ocfs2_extent_list *el; | |
147 | + struct ocfs2_extent_rec *rec; | |
148 | + | |
149 | + path = ocfs2_new_path_from_et(et); | |
150 | + if (!path) { | |
151 | + status = -ENOMEM; | |
152 | + return status; | |
153 | + } | |
154 | + | |
155 | + status = ocfs2_find_path(inode, path, UINT_MAX); | |
156 | + if (status < 0) { | |
157 | + mlog_errno(status); | |
158 | + goto out; | |
159 | + } | |
160 | + | |
161 | + status = ocfs2_extend_trans(handle, path_num_items(path) + | |
162 | + handle->h_buffer_credits); | |
163 | + if (status < 0) { | |
164 | + mlog_errno(status); | |
165 | + goto out; | |
166 | + } | |
167 | + | |
168 | + status = ocfs2_journal_access_path(inode, handle, path); | |
169 | + if (status < 0) { | |
170 | + mlog_errno(status); | |
171 | + goto out; | |
172 | + } | |
173 | + | |
174 | + el = path_leaf_el(path); | |
175 | + rec = &el->l_recs[le32_to_cpu(el->l_next_free_rec) - 1]; | |
176 | + | |
177 | + ocfs2_adjust_rightmost_records(inode, handle, path, rec); | |
178 | + | |
179 | +out: | |
180 | + ocfs2_free_path(path); | |
181 | + return status; | |
182 | +} | |
183 | + | |
184 | +/* | |
185 | * Add an entire tree branch to our inode. eb_bh is the extent block | |
186 | * to start at, if we don't want to start the branch at the dinode | |
187 | * structure. | |
188 | @@ -863,7 +922,7 @@ static int ocfs2_add_branch(struct ocfs2 | |
189 | struct ocfs2_extent_block *eb; | |
190 | struct ocfs2_extent_list *eb_el; | |
191 | struct ocfs2_extent_list *el; | |
192 | - u32 new_cpos; | |
193 | + u32 new_cpos, root_end; | |
194 | ||
195 | mlog_entry_void(); | |
196 | ||
197 | @@ -880,6 +939,27 @@ static int ocfs2_add_branch(struct ocfs2 | |
198 | ||
199 | new_blocks = le16_to_cpu(el->l_tree_depth); | |
200 | ||
201 | + eb = (struct ocfs2_extent_block *)(*last_eb_bh)->b_data; | |
202 | + new_cpos = ocfs2_sum_rightmost_rec(&eb->h_list); | |
203 | + root_end = ocfs2_sum_rightmost_rec(et->et_root_el); | |
204 | + | |
205 | + /* | |
206 | + * If there is a gap before the root end and the real end | |
207 | + * of the righmost leaf block, we need to remove the gap | |
208 | + * between new_cpos and root_end first so that the tree | |
209 | + * is consistent after we add a new branch(it will start | |
210 | + * from new_cpos). | |
211 | + */ | |
212 | + if (root_end > new_cpos) { | |
213 | + mlog(0, "adjust the cluster end from %u to %u\n", | |
214 | + root_end, new_cpos); | |
215 | + status = ocfs2_adjust_rightmost_branch(handle, inode, et); | |
216 | + if (status) { | |
217 | + mlog_errno(status); | |
218 | + goto bail; | |
219 | + } | |
220 | + } | |
221 | + | |
222 | /* allocate the number of new eb blocks we need */ | |
223 | new_eb_bhs = kcalloc(new_blocks, sizeof(struct buffer_head *), | |
224 | GFP_KERNEL); | |
225 | @@ -896,9 +976,6 @@ static int ocfs2_add_branch(struct ocfs2 | |
226 | goto bail; | |
227 | } | |
228 | ||
229 | - eb = (struct ocfs2_extent_block *)(*last_eb_bh)->b_data; | |
230 | - new_cpos = ocfs2_sum_rightmost_rec(&eb->h_list); | |
231 | - | |
232 | /* Note: new_eb_bhs[new_blocks - 1] is the guy which will be | |
233 | * linked with the rest of the tree. | |
234 | * conversly, new_eb_bhs[0] is the new bottommost leaf. |