]> git.ipfire.org Git - people/teissler/ipfire-2.x.git/blob - src/patches/suse-2.6.27.25/patches.suse/ocfs2-Abstract-ocfs2_extent_tree-in-b.patch
Updated xen patches taken from suse.
[people/teissler/ipfire-2.x.git] / src / patches / suse-2.6.27.25 / patches.suse / ocfs2-Abstract-ocfs2_extent_tree-in-b.patch
1 From: Tao Ma <tao.ma@oracle.com>
2 Subject: [PATCH 03/16] ocfs2: Abstract ocfs2_extent_tree in b-tree operations.
3 Patch-mainline: 2.6.28?
4 References: FATE302067
5
6 In the old extent tree operation, we take the hypothesis that we
7 are using the ocfs2_extent_list in ocfs2_dinode as the tree root.
8 As xattr will also use ocfs2_extent_list to store large value
9 for a xattr entry, we refactor the tree operation so that xattr
10 can use it directly.
11
12 The refactoring includes 4 steps:
13 1. Abstract set/get of last_eb_blk and update_clusters since they may
14 be stored in different location for dinode and xattr.
15 2. Add a new structure named ocfs2_extent_tree to indicate the
16 extent tree the operation will work on.
17 3. Remove all the use of fe_bh and di, use root_bh and root_el in
18 extent tree instead. So now all the fe_bh is replaced with
19 et->root_bh, el with root_el accordingly.
20 4. Make ocfs2_lock_allocators generic. Now it is limited to be only used
21 in file extend allocation. But the whole function is useful when we want
22 to store large EAs.
23
24 Note: This patch doesn't touch ocfs2_commit_truncate() since it is not used
25 for anything other than truncate inode data btrees.
26
27 Signed-off-by: Tao Ma <tao.ma@oracle.com>
28 Signed-off-by: Mark Fasheh <mfasheh@suse.com>
29 ---
30 fs/ocfs2/alloc.c | 508 +++++++++++++++++++++++++++++++++------------------
31 fs/ocfs2/alloc.h | 23 ++-
32 fs/ocfs2/aops.c | 11 +-
33 fs/ocfs2/dir.c | 7 +-
34 fs/ocfs2/file.c | 104 ++---------
35 fs/ocfs2/file.h | 4 -
36 fs/ocfs2/suballoc.c | 82 ++++++++
37 fs/ocfs2/suballoc.h | 5 +
38 8 files changed, 456 insertions(+), 288 deletions(-)
39
40 Index: linux-2.6.26/fs/ocfs2/alloc.c
41 ===================================================================
42 --- linux-2.6.26.orig/fs/ocfs2/alloc.c
43 +++ linux-2.6.26/fs/ocfs2/alloc.c
44 @@ -49,6 +49,143 @@
45
46 #include "buffer_head_io.h"
47
48 +/*
49 + * ocfs2_extent_tree and ocfs2_extent_tree_operations are used to abstract
50 + * the b-tree operations in ocfs2. Now all the b-tree operations are not
51 + * limited to ocfs2_dinode only. Any data which need to allocate clusters
52 + * to store can use b-tree. And it only needs to implement its ocfs2_extent_tree
53 + * and operation.
54 + *
55 + * ocfs2_extent_tree contains info for the root of the b-tree, it must have a
56 + * root ocfs2_extent_list and a root_bh so that they can be used in the b-tree
57 + * functions.
58 + * ocfs2_extent_tree_operations abstract the normal operations we do for
59 + * the root of extent b-tree.
60 + */
61 +struct ocfs2_extent_tree;
62 +
63 +struct ocfs2_extent_tree_operations {
64 + void (*set_last_eb_blk) (struct ocfs2_extent_tree *et, u64 blkno);
65 + u64 (*get_last_eb_blk) (struct ocfs2_extent_tree *et);
66 + void (*update_clusters) (struct inode *inode,
67 + struct ocfs2_extent_tree *et,
68 + u32 new_clusters);
69 + int (*sanity_check) (struct inode *inode, struct ocfs2_extent_tree *et);
70 +};
71 +
72 +struct ocfs2_extent_tree {
73 + enum ocfs2_extent_tree_type type;
74 + struct ocfs2_extent_tree_operations *eops;
75 + struct buffer_head *root_bh;
76 + struct ocfs2_extent_list *root_el;
77 +};
78 +
79 +static void ocfs2_dinode_set_last_eb_blk(struct ocfs2_extent_tree *et,
80 + u64 blkno)
81 +{
82 + struct ocfs2_dinode *di = (struct ocfs2_dinode *)et->root_bh->b_data;
83 +
84 + BUG_ON(et->type != OCFS2_DINODE_EXTENT);
85 + di->i_last_eb_blk = cpu_to_le64(blkno);
86 +}
87 +
88 +static u64 ocfs2_dinode_get_last_eb_blk(struct ocfs2_extent_tree *et)
89 +{
90 + struct ocfs2_dinode *di = (struct ocfs2_dinode *)et->root_bh->b_data;
91 +
92 + BUG_ON(et->type != OCFS2_DINODE_EXTENT);
93 + return le64_to_cpu(di->i_last_eb_blk);
94 +}
95 +
96 +static void ocfs2_dinode_update_clusters(struct inode *inode,
97 + struct ocfs2_extent_tree *et,
98 + u32 clusters)
99 +{
100 + struct ocfs2_dinode *di =
101 + (struct ocfs2_dinode *)et->root_bh->b_data;
102 +
103 + le32_add_cpu(&di->i_clusters, clusters);
104 + spin_lock(&OCFS2_I(inode)->ip_lock);
105 + OCFS2_I(inode)->ip_clusters = le32_to_cpu(di->i_clusters);
106 + spin_unlock(&OCFS2_I(inode)->ip_lock);
107 +}
108 +
109 +static int ocfs2_dinode_sanity_check(struct inode *inode,
110 + struct ocfs2_extent_tree *et)
111 +{
112 + int ret = 0;
113 + struct ocfs2_dinode *di;
114 +
115 + BUG_ON(et->type != OCFS2_DINODE_EXTENT);
116 +
117 + di = (struct ocfs2_dinode *)et->root_bh->b_data;
118 + if (!OCFS2_IS_VALID_DINODE(di)) {
119 + ret = -EIO;
120 + ocfs2_error(inode->i_sb,
121 + "Inode %llu has invalid path root",
122 + (unsigned long long)OCFS2_I(inode)->ip_blkno);
123 + }
124 +
125 + return ret;
126 +}
127 +
128 +static struct ocfs2_extent_tree_operations ocfs2_dinode_et_ops = {
129 + .set_last_eb_blk = ocfs2_dinode_set_last_eb_blk,
130 + .get_last_eb_blk = ocfs2_dinode_get_last_eb_blk,
131 + .update_clusters = ocfs2_dinode_update_clusters,
132 + .sanity_check = ocfs2_dinode_sanity_check,
133 +};
134 +
135 +static struct ocfs2_extent_tree*
136 + ocfs2_new_extent_tree(struct buffer_head *bh,
137 + enum ocfs2_extent_tree_type et_type)
138 +{
139 + struct ocfs2_extent_tree *et;
140 +
141 + et = kzalloc(sizeof(*et), GFP_NOFS);
142 + if (!et)
143 + return NULL;
144 +
145 + et->type = et_type;
146 + get_bh(bh);
147 + et->root_bh = bh;
148 +
149 + /* current we only support dinode extent. */
150 + BUG_ON(et->type != OCFS2_DINODE_EXTENT);
151 + if (et_type == OCFS2_DINODE_EXTENT) {
152 + et->root_el = &((struct ocfs2_dinode *)bh->b_data)->id2.i_list;
153 + et->eops = &ocfs2_dinode_et_ops;
154 + }
155 +
156 + return et;
157 +}
158 +
159 +static void ocfs2_free_extent_tree(struct ocfs2_extent_tree *et)
160 +{
161 + if (et) {
162 + brelse(et->root_bh);
163 + kfree(et);
164 + }
165 +}
166 +
167 +static inline void ocfs2_set_last_eb_blk(struct ocfs2_extent_tree *et,
168 + u64 new_last_eb_blk)
169 +{
170 + et->eops->set_last_eb_blk(et, new_last_eb_blk);
171 +}
172 +
173 +static inline u64 ocfs2_get_last_eb_blk(struct ocfs2_extent_tree *et)
174 +{
175 + return et->eops->get_last_eb_blk(et);
176 +}
177 +
178 +static inline void ocfs2_update_clusters(struct inode *inode,
179 + struct ocfs2_extent_tree *et,
180 + u32 clusters)
181 +{
182 + et->eops->update_clusters(inode, et, clusters);
183 +}
184 +
185 static void ocfs2_free_truncate_context(struct ocfs2_truncate_context *tc);
186 static int ocfs2_cache_extent_block_free(struct ocfs2_cached_dealloc_ctxt *ctxt,
187 struct ocfs2_extent_block *eb);
188 @@ -205,17 +342,6 @@ static struct ocfs2_path *ocfs2_new_path
189 }
190
191 /*
192 - * Allocate and initialize a new path based on a disk inode tree.
193 - */
194 -static struct ocfs2_path *ocfs2_new_inode_path(struct buffer_head *di_bh)
195 -{
196 - struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
197 - struct ocfs2_extent_list *el = &di->id2.i_list;
198 -
199 - return ocfs2_new_path(di_bh, el);
200 -}
201 -
202 -/*
203 * Convenience function to journal all components in a path.
204 */
205 static int ocfs2_journal_access_path(struct inode *inode, handle_t *handle,
206 @@ -368,24 +494,33 @@ struct ocfs2_merge_ctxt {
207 */
208 int ocfs2_num_free_extents(struct ocfs2_super *osb,
209 struct inode *inode,
210 - struct buffer_head *bh)
211 + struct buffer_head *root_bh,
212 + enum ocfs2_extent_tree_type type)
213 {
214 int retval;
215 - struct ocfs2_extent_list *el;
216 + struct ocfs2_extent_list *el = NULL;
217 struct ocfs2_extent_block *eb;
218 struct buffer_head *eb_bh = NULL;
219 - struct ocfs2_dinode *fe = (struct ocfs2_dinode *)bh->b_data;
220 + u64 last_eb_blk = 0;
221
222 mlog_entry_void();
223
224 - if (!OCFS2_IS_VALID_DINODE(fe)) {
225 - OCFS2_RO_ON_INVALID_DINODE(inode->i_sb, fe);
226 - retval = -EIO;
227 - goto bail;
228 + if (type == OCFS2_DINODE_EXTENT) {
229 + struct ocfs2_dinode *fe =
230 + (struct ocfs2_dinode *)root_bh->b_data;
231 + if (!OCFS2_IS_VALID_DINODE(fe)) {
232 + OCFS2_RO_ON_INVALID_DINODE(inode->i_sb, fe);
233 + retval = -EIO;
234 + goto bail;
235 + }
236 +
237 + if (fe->i_last_eb_blk)
238 + last_eb_blk = le64_to_cpu(fe->i_last_eb_blk);
239 + el = &fe->id2.i_list;
240 }
241
242 - if (fe->i_last_eb_blk) {
243 - retval = ocfs2_read_block(osb, le64_to_cpu(fe->i_last_eb_blk),
244 + if (last_eb_blk) {
245 + retval = ocfs2_read_block(osb, last_eb_blk,
246 &eb_bh, OCFS2_BH_CACHED, inode);
247 if (retval < 0) {
248 mlog_errno(retval);
249 @@ -393,8 +528,7 @@ int ocfs2_num_free_extents(struct ocfs2_
250 }
251 eb = (struct ocfs2_extent_block *) eb_bh->b_data;
252 el = &eb->h_list;
253 - } else
254 - el = &fe->id2.i_list;
255 + }
256
257 BUG_ON(el->l_tree_depth != 0);
258
259 @@ -532,7 +666,7 @@ static inline u32 ocfs2_sum_rightmost_re
260 static int ocfs2_add_branch(struct ocfs2_super *osb,
261 handle_t *handle,
262 struct inode *inode,
263 - struct buffer_head *fe_bh,
264 + struct ocfs2_extent_tree *et,
265 struct buffer_head *eb_bh,
266 struct buffer_head **last_eb_bh,
267 struct ocfs2_alloc_context *meta_ac)
268 @@ -541,7 +675,6 @@ static int ocfs2_add_branch(struct ocfs2
269 u64 next_blkno, new_last_eb_blk;
270 struct buffer_head *bh;
271 struct buffer_head **new_eb_bhs = NULL;
272 - struct ocfs2_dinode *fe;
273 struct ocfs2_extent_block *eb;
274 struct ocfs2_extent_list *eb_el;
275 struct ocfs2_extent_list *el;
276 @@ -551,13 +684,11 @@ static int ocfs2_add_branch(struct ocfs2
277
278 BUG_ON(!last_eb_bh || !*last_eb_bh);
279
280 - fe = (struct ocfs2_dinode *) fe_bh->b_data;
281 -
282 if (eb_bh) {
283 eb = (struct ocfs2_extent_block *) eb_bh->b_data;
284 el = &eb->h_list;
285 } else
286 - el = &fe->id2.i_list;
287 + el = et->root_el;
288
289 /* we never add a branch to a leaf. */
290 BUG_ON(!el->l_tree_depth);
291 @@ -647,7 +778,7 @@ static int ocfs2_add_branch(struct ocfs2
292 mlog_errno(status);
293 goto bail;
294 }
295 - status = ocfs2_journal_access(handle, inode, fe_bh,
296 + status = ocfs2_journal_access(handle, inode, et->root_bh,
297 OCFS2_JOURNAL_ACCESS_WRITE);
298 if (status < 0) {
299 mlog_errno(status);
300 @@ -663,7 +794,7 @@ static int ocfs2_add_branch(struct ocfs2
301 }
302
303 /* Link the new branch into the rest of the tree (el will
304 - * either be on the fe, or the extent block passed in. */
305 + * either be on the root_bh, or the extent block passed in. */
306 i = le16_to_cpu(el->l_next_free_rec);
307 el->l_recs[i].e_blkno = cpu_to_le64(next_blkno);
308 el->l_recs[i].e_cpos = cpu_to_le32(new_cpos);
309 @@ -672,7 +803,7 @@ static int ocfs2_add_branch(struct ocfs2
310
311 /* fe needs a new last extent block pointer, as does the
312 * next_leaf on the previously last-extent-block. */
313 - fe->i_last_eb_blk = cpu_to_le64(new_last_eb_blk);
314 + ocfs2_set_last_eb_blk(et, new_last_eb_blk);
315
316 eb = (struct ocfs2_extent_block *) (*last_eb_bh)->b_data;
317 eb->h_next_leaf_blk = cpu_to_le64(new_last_eb_blk);
318 @@ -680,7 +811,7 @@ static int ocfs2_add_branch(struct ocfs2
319 status = ocfs2_journal_dirty(handle, *last_eb_bh);
320 if (status < 0)
321 mlog_errno(status);
322 - status = ocfs2_journal_dirty(handle, fe_bh);
323 + status = ocfs2_journal_dirty(handle, et->root_bh);
324 if (status < 0)
325 mlog_errno(status);
326 if (eb_bh) {
327 @@ -718,16 +849,15 @@ bail:
328 static int ocfs2_shift_tree_depth(struct ocfs2_super *osb,
329 handle_t *handle,
330 struct inode *inode,
331 - struct buffer_head *fe_bh,
332 + struct ocfs2_extent_tree *et,
333 struct ocfs2_alloc_context *meta_ac,
334 struct buffer_head **ret_new_eb_bh)
335 {
336 int status, i;
337 u32 new_clusters;
338 struct buffer_head *new_eb_bh = NULL;
339 - struct ocfs2_dinode *fe;
340 struct ocfs2_extent_block *eb;
341 - struct ocfs2_extent_list *fe_el;
342 + struct ocfs2_extent_list *root_el;
343 struct ocfs2_extent_list *eb_el;
344
345 mlog_entry_void();
346 @@ -747,8 +877,7 @@ static int ocfs2_shift_tree_depth(struct
347 }
348
349 eb_el = &eb->h_list;
350 - fe = (struct ocfs2_dinode *) fe_bh->b_data;
351 - fe_el = &fe->id2.i_list;
352 + root_el = et->root_el;
353
354 status = ocfs2_journal_access(handle, inode, new_eb_bh,
355 OCFS2_JOURNAL_ACCESS_CREATE);
356 @@ -757,11 +886,11 @@ static int ocfs2_shift_tree_depth(struct
357 goto bail;
358 }
359
360 - /* copy the fe data into the new extent block */
361 - eb_el->l_tree_depth = fe_el->l_tree_depth;
362 - eb_el->l_next_free_rec = fe_el->l_next_free_rec;
363 - for(i = 0; i < le16_to_cpu(fe_el->l_next_free_rec); i++)
364 - eb_el->l_recs[i] = fe_el->l_recs[i];
365 + /* copy the root extent list data into the new extent block */
366 + eb_el->l_tree_depth = root_el->l_tree_depth;
367 + eb_el->l_next_free_rec = root_el->l_next_free_rec;
368 + for (i = 0; i < le16_to_cpu(root_el->l_next_free_rec); i++)
369 + eb_el->l_recs[i] = root_el->l_recs[i];
370
371 status = ocfs2_journal_dirty(handle, new_eb_bh);
372 if (status < 0) {
373 @@ -769,7 +898,7 @@ static int ocfs2_shift_tree_depth(struct
374 goto bail;
375 }
376
377 - status = ocfs2_journal_access(handle, inode, fe_bh,
378 + status = ocfs2_journal_access(handle, inode, et->root_bh,
379 OCFS2_JOURNAL_ACCESS_WRITE);
380 if (status < 0) {
381 mlog_errno(status);
382 @@ -778,21 +907,21 @@ static int ocfs2_shift_tree_depth(struct
383
384 new_clusters = ocfs2_sum_rightmost_rec(eb_el);
385
386 - /* update fe now */
387 - le16_add_cpu(&fe_el->l_tree_depth, 1);
388 - fe_el->l_recs[0].e_cpos = 0;
389 - fe_el->l_recs[0].e_blkno = eb->h_blkno;
390 - fe_el->l_recs[0].e_int_clusters = cpu_to_le32(new_clusters);
391 - for(i = 1; i < le16_to_cpu(fe_el->l_next_free_rec); i++)
392 - memset(&fe_el->l_recs[i], 0, sizeof(struct ocfs2_extent_rec));
393 - fe_el->l_next_free_rec = cpu_to_le16(1);
394 + /* update root_bh now */
395 + le16_add_cpu(&root_el->l_tree_depth, 1);
396 + root_el->l_recs[0].e_cpos = 0;
397 + root_el->l_recs[0].e_blkno = eb->h_blkno;
398 + root_el->l_recs[0].e_int_clusters = cpu_to_le32(new_clusters);
399 + for (i = 1; i < le16_to_cpu(root_el->l_next_free_rec); i++)
400 + memset(&root_el->l_recs[i], 0, sizeof(struct ocfs2_extent_rec));
401 + root_el->l_next_free_rec = cpu_to_le16(1);
402
403 /* If this is our 1st tree depth shift, then last_eb_blk
404 * becomes the allocated extent block */
405 - if (fe_el->l_tree_depth == cpu_to_le16(1))
406 - fe->i_last_eb_blk = eb->h_blkno;
407 + if (root_el->l_tree_depth == cpu_to_le16(1))
408 + ocfs2_set_last_eb_blk(et, le64_to_cpu(eb->h_blkno));
409
410 - status = ocfs2_journal_dirty(handle, fe_bh);
411 + status = ocfs2_journal_dirty(handle, et->root_bh);
412 if (status < 0) {
413 mlog_errno(status);
414 goto bail;
415 @@ -818,22 +947,21 @@ bail:
416 * 1) a lowest extent block is found, then we pass it back in
417 * *lowest_eb_bh and return '0'
418 *
419 - * 2) the search fails to find anything, but the dinode has room. We
420 + * 2) the search fails to find anything, but the root_el has room. We
421 * pass NULL back in *lowest_eb_bh, but still return '0'
422 *
423 - * 3) the search fails to find anything AND the dinode is full, in
424 + * 3) the search fails to find anything AND the root_el is full, in
425 * which case we return > 0
426 *
427 * return status < 0 indicates an error.
428 */
429 static int ocfs2_find_branch_target(struct ocfs2_super *osb,
430 struct inode *inode,
431 - struct buffer_head *fe_bh,
432 + struct ocfs2_extent_tree *et,
433 struct buffer_head **target_bh)
434 {
435 int status = 0, i;
436 u64 blkno;
437 - struct ocfs2_dinode *fe;
438 struct ocfs2_extent_block *eb;
439 struct ocfs2_extent_list *el;
440 struct buffer_head *bh = NULL;
441 @@ -843,8 +971,7 @@ static int ocfs2_find_branch_target(stru
442
443 *target_bh = NULL;
444
445 - fe = (struct ocfs2_dinode *) fe_bh->b_data;
446 - el = &fe->id2.i_list;
447 + el = et->root_el;
448
449 while(le16_to_cpu(el->l_tree_depth) > 1) {
450 if (le16_to_cpu(el->l_next_free_rec) == 0) {
451 @@ -896,8 +1023,8 @@ static int ocfs2_find_branch_target(stru
452
453 /* If we didn't find one and the fe doesn't have any room,
454 * then return '1' */
455 - if (!lowest_bh
456 - && (fe->id2.i_list.l_next_free_rec == fe->id2.i_list.l_count))
457 + el = et->root_el;
458 + if (!lowest_bh && (el->l_next_free_rec == el->l_count))
459 status = 1;
460
461 *target_bh = lowest_bh;
462 @@ -920,19 +1047,19 @@ bail:
463 * *last_eb_bh will be updated by ocfs2_add_branch().
464 */
465 static int ocfs2_grow_tree(struct inode *inode, handle_t *handle,
466 - struct buffer_head *di_bh, int *final_depth,
467 + struct ocfs2_extent_tree *et, int *final_depth,
468 struct buffer_head **last_eb_bh,
469 struct ocfs2_alloc_context *meta_ac)
470 {
471 int ret, shift;
472 - struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
473 - int depth = le16_to_cpu(di->id2.i_list.l_tree_depth);
474 + struct ocfs2_extent_list *el = et->root_el;
475 + int depth = le16_to_cpu(el->l_tree_depth);
476 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
477 struct buffer_head *bh = NULL;
478
479 BUG_ON(meta_ac == NULL);
480
481 - shift = ocfs2_find_branch_target(osb, inode, di_bh, &bh);
482 + shift = ocfs2_find_branch_target(osb, inode, et, &bh);
483 if (shift < 0) {
484 ret = shift;
485 mlog_errno(ret);
486 @@ -949,7 +1076,7 @@ static int ocfs2_grow_tree(struct inode
487 /* ocfs2_shift_tree_depth will return us a buffer with
488 * the new extent block (so we can pass that to
489 * ocfs2_add_branch). */
490 - ret = ocfs2_shift_tree_depth(osb, handle, inode, di_bh,
491 + ret = ocfs2_shift_tree_depth(osb, handle, inode, et,
492 meta_ac, &bh);
493 if (ret < 0) {
494 mlog_errno(ret);
495 @@ -976,7 +1103,7 @@ static int ocfs2_grow_tree(struct inode
496 /* call ocfs2_add_branch to add the final part of the tree with
497 * the new data. */
498 mlog(0, "add branch. bh = %p\n", bh);
499 - ret = ocfs2_add_branch(osb, handle, inode, di_bh, bh, last_eb_bh,
500 + ret = ocfs2_add_branch(osb, handle, inode, et, bh, last_eb_bh,
501 meta_ac);
502 if (ret < 0) {
503 mlog_errno(ret);
504 @@ -2068,11 +2195,11 @@ static int ocfs2_rotate_subtree_left(str
505 struct ocfs2_path *right_path,
506 int subtree_index,
507 struct ocfs2_cached_dealloc_ctxt *dealloc,
508 - int *deleted)
509 + int *deleted,
510 + struct ocfs2_extent_tree *et)
511 {
512 int ret, i, del_right_subtree = 0, right_has_empty = 0;
513 - struct buffer_head *root_bh, *di_bh = path_root_bh(right_path);
514 - struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
515 + struct buffer_head *root_bh, *et_root_bh = path_root_bh(right_path);
516 struct ocfs2_extent_list *right_leaf_el, *left_leaf_el;
517 struct ocfs2_extent_block *eb;
518
519 @@ -2124,7 +2251,7 @@ static int ocfs2_rotate_subtree_left(str
520 * We have to update i_last_eb_blk during the meta
521 * data delete.
522 */
523 - ret = ocfs2_journal_access(handle, inode, di_bh,
524 + ret = ocfs2_journal_access(handle, inode, et_root_bh,
525 OCFS2_JOURNAL_ACCESS_WRITE);
526 if (ret) {
527 mlog_errno(ret);
528 @@ -2199,7 +2326,7 @@ static int ocfs2_rotate_subtree_left(str
529 ocfs2_update_edge_lengths(inode, handle, left_path);
530
531 eb = (struct ocfs2_extent_block *)path_leaf_bh(left_path)->b_data;
532 - di->i_last_eb_blk = eb->h_blkno;
533 + ocfs2_set_last_eb_blk(et, le64_to_cpu(eb->h_blkno));
534
535 /*
536 * Removal of the extent in the left leaf was skipped
537 @@ -2209,7 +2336,7 @@ static int ocfs2_rotate_subtree_left(str
538 if (right_has_empty)
539 ocfs2_remove_empty_extent(left_leaf_el);
540
541 - ret = ocfs2_journal_dirty(handle, di_bh);
542 + ret = ocfs2_journal_dirty(handle, et_root_bh);
543 if (ret)
544 mlog_errno(ret);
545
546 @@ -2332,7 +2459,8 @@ static int __ocfs2_rotate_tree_left(stru
547 handle_t *handle, int orig_credits,
548 struct ocfs2_path *path,
549 struct ocfs2_cached_dealloc_ctxt *dealloc,
550 - struct ocfs2_path **empty_extent_path)
551 + struct ocfs2_path **empty_extent_path,
552 + struct ocfs2_extent_tree *et)
553 {
554 int ret, subtree_root, deleted;
555 u32 right_cpos;
556 @@ -2405,7 +2533,7 @@ static int __ocfs2_rotate_tree_left(stru
557
558 ret = ocfs2_rotate_subtree_left(inode, handle, left_path,
559 right_path, subtree_root,
560 - dealloc, &deleted);
561 + dealloc, &deleted, et);
562 if (ret == -EAGAIN) {
563 /*
564 * The rotation has to temporarily stop due to
565 @@ -2448,29 +2576,20 @@ out:
566 }
567
568 static int ocfs2_remove_rightmost_path(struct inode *inode, handle_t *handle,
569 - struct ocfs2_path *path,
570 - struct ocfs2_cached_dealloc_ctxt *dealloc)
571 + struct ocfs2_path *path,
572 + struct ocfs2_cached_dealloc_ctxt *dealloc,
573 + struct ocfs2_extent_tree *et)
574 {
575 int ret, subtree_index;
576 u32 cpos;
577 struct ocfs2_path *left_path = NULL;
578 - struct ocfs2_dinode *di;
579 struct ocfs2_extent_block *eb;
580 struct ocfs2_extent_list *el;
581
582 - /*
583 - * XXX: This code assumes that the root is an inode, which is
584 - * true for now but may change as tree code gets generic.
585 - */
586 - di = (struct ocfs2_dinode *)path_root_bh(path)->b_data;
587 - if (!OCFS2_IS_VALID_DINODE(di)) {
588 - ret = -EIO;
589 - ocfs2_error(inode->i_sb,
590 - "Inode %llu has invalid path root",
591 - (unsigned long long)OCFS2_I(inode)->ip_blkno);
592 - goto out;
593 - }
594
595 + ret = et->eops->sanity_check(inode, et);
596 + if (ret)
597 + goto out;
598 /*
599 * There's two ways we handle this depending on
600 * whether path is the only existing one.
601 @@ -2527,7 +2646,7 @@ static int ocfs2_remove_rightmost_path(s
602 ocfs2_update_edge_lengths(inode, handle, left_path);
603
604 eb = (struct ocfs2_extent_block *)path_leaf_bh(left_path)->b_data;
605 - di->i_last_eb_blk = eb->h_blkno;
606 + ocfs2_set_last_eb_blk(et, le64_to_cpu(eb->h_blkno));
607 } else {
608 /*
609 * 'path' is also the leftmost path which
610 @@ -2538,12 +2657,12 @@ static int ocfs2_remove_rightmost_path(s
611 */
612 ocfs2_unlink_path(inode, handle, dealloc, path, 1);
613
614 - el = &di->id2.i_list;
615 + el = et->root_el;
616 el->l_tree_depth = 0;
617 el->l_next_free_rec = 0;
618 memset(&el->l_recs[0], 0, sizeof(struct ocfs2_extent_rec));
619
620 - di->i_last_eb_blk = 0;
621 + ocfs2_set_last_eb_blk(et, 0);
622 }
623
624 ocfs2_journal_dirty(handle, path_root_bh(path));
625 @@ -2571,7 +2690,8 @@ out:
626 */
627 static int ocfs2_rotate_tree_left(struct inode *inode, handle_t *handle,
628 struct ocfs2_path *path,
629 - struct ocfs2_cached_dealloc_ctxt *dealloc)
630 + struct ocfs2_cached_dealloc_ctxt *dealloc,
631 + struct ocfs2_extent_tree *et)
632 {
633 int ret, orig_credits = handle->h_buffer_credits;
634 struct ocfs2_path *tmp_path = NULL, *restart_path = NULL;
635 @@ -2585,7 +2705,7 @@ static int ocfs2_rotate_tree_left(struct
636 if (path->p_tree_depth == 0) {
637 rightmost_no_delete:
638 /*
639 - * In-inode extents. This is trivially handled, so do
640 + * Inline extents. This is trivially handled, so do
641 * it up front.
642 */
643 ret = ocfs2_rotate_rightmost_leaf_left(inode, handle,
644 @@ -2639,7 +2759,7 @@ rightmost_no_delete:
645 */
646
647 ret = ocfs2_remove_rightmost_path(inode, handle, path,
648 - dealloc);
649 + dealloc, et);
650 if (ret)
651 mlog_errno(ret);
652 goto out;
653 @@ -2651,7 +2771,7 @@ rightmost_no_delete:
654 */
655 try_rotate:
656 ret = __ocfs2_rotate_tree_left(inode, handle, orig_credits, path,
657 - dealloc, &restart_path);
658 + dealloc, &restart_path, et);
659 if (ret && ret != -EAGAIN) {
660 mlog_errno(ret);
661 goto out;
662 @@ -2663,7 +2783,7 @@ try_rotate:
663
664 ret = __ocfs2_rotate_tree_left(inode, handle, orig_credits,
665 tmp_path, dealloc,
666 - &restart_path);
667 + &restart_path, et);
668 if (ret && ret != -EAGAIN) {
669 mlog_errno(ret);
670 goto out;
671 @@ -2949,6 +3069,7 @@ static int ocfs2_merge_rec_left(struct i
672 handle_t *handle,
673 struct ocfs2_extent_rec *split_rec,
674 struct ocfs2_cached_dealloc_ctxt *dealloc,
675 + struct ocfs2_extent_tree *et,
676 int index)
677 {
678 int ret, i, subtree_index = 0, has_empty_extent = 0;
679 @@ -3069,7 +3190,8 @@ static int ocfs2_merge_rec_left(struct i
680 le16_to_cpu(el->l_next_free_rec) == 1) {
681
682 ret = ocfs2_remove_rightmost_path(inode, handle,
683 - right_path, dealloc);
684 + right_path,
685 + dealloc, et);
686 if (ret) {
687 mlog_errno(ret);
688 goto out;
689 @@ -3096,7 +3218,8 @@ static int ocfs2_try_to_merge_extent(str
690 int split_index,
691 struct ocfs2_extent_rec *split_rec,
692 struct ocfs2_cached_dealloc_ctxt *dealloc,
693 - struct ocfs2_merge_ctxt *ctxt)
694 + struct ocfs2_merge_ctxt *ctxt,
695 + struct ocfs2_extent_tree *et)
696
697 {
698 int ret = 0;
699 @@ -3114,7 +3237,7 @@ static int ocfs2_try_to_merge_extent(str
700 * illegal.
701 */
702 ret = ocfs2_rotate_tree_left(inode, handle, path,
703 - dealloc);
704 + dealloc, et);
705 if (ret) {
706 mlog_errno(ret);
707 goto out;
708 @@ -3157,7 +3280,8 @@ static int ocfs2_try_to_merge_extent(str
709 BUG_ON(!ocfs2_is_empty_extent(&el->l_recs[0]));
710
711 /* The merge left us with an empty extent, remove it. */
712 - ret = ocfs2_rotate_tree_left(inode, handle, path, dealloc);
713 + ret = ocfs2_rotate_tree_left(inode, handle, path,
714 + dealloc, et);
715 if (ret) {
716 mlog_errno(ret);
717 goto out;
718 @@ -3171,7 +3295,7 @@ static int ocfs2_try_to_merge_extent(str
719 */
720 ret = ocfs2_merge_rec_left(inode, path,
721 handle, rec,
722 - dealloc,
723 + dealloc, et,
724 split_index);
725
726 if (ret) {
727 @@ -3180,7 +3304,7 @@ static int ocfs2_try_to_merge_extent(str
728 }
729
730 ret = ocfs2_rotate_tree_left(inode, handle, path,
731 - dealloc);
732 + dealloc, et);
733 /*
734 * Error from this last rotate is not critical, so
735 * print but don't bubble it up.
736 @@ -3200,7 +3324,7 @@ static int ocfs2_try_to_merge_extent(str
737 ret = ocfs2_merge_rec_left(inode,
738 path,
739 handle, split_rec,
740 - dealloc,
741 + dealloc, et,
742 split_index);
743 if (ret) {
744 mlog_errno(ret);
745 @@ -3223,7 +3347,7 @@ static int ocfs2_try_to_merge_extent(str
746 * our leaf. Try to rotate it away.
747 */
748 ret = ocfs2_rotate_tree_left(inode, handle, path,
749 - dealloc);
750 + dealloc, et);
751 if (ret)
752 mlog_errno(ret);
753 ret = 0;
754 @@ -3357,16 +3481,6 @@ rotate:
755 ocfs2_rotate_leaf(el, insert_rec);
756 }
757
758 -static inline void ocfs2_update_dinode_clusters(struct inode *inode,
759 - struct ocfs2_dinode *di,
760 - u32 clusters)
761 -{
762 - le32_add_cpu(&di->i_clusters, clusters);
763 - spin_lock(&OCFS2_I(inode)->ip_lock);
764 - OCFS2_I(inode)->ip_clusters = le32_to_cpu(di->i_clusters);
765 - spin_unlock(&OCFS2_I(inode)->ip_lock);
766 -}
767 -
768 static void ocfs2_adjust_rightmost_records(struct inode *inode,
769 handle_t *handle,
770 struct ocfs2_path *path,
771 @@ -3568,8 +3682,8 @@ static void ocfs2_split_record(struct in
772 }
773
774 /*
775 - * This function only does inserts on an allocation b-tree. For dinode
776 - * lists, ocfs2_insert_at_leaf() is called directly.
777 + * This function only does inserts on an allocation b-tree. For tree
778 + * depth = 0, ocfs2_insert_at_leaf() is called directly.
779 *
780 * right_path is the path we want to do the actual insert
781 * in. left_path should only be passed in if we need to update that
782 @@ -3666,7 +3780,7 @@ out:
783
784 static int ocfs2_do_insert_extent(struct inode *inode,
785 handle_t *handle,
786 - struct buffer_head *di_bh,
787 + struct ocfs2_extent_tree *et,
788 struct ocfs2_extent_rec *insert_rec,
789 struct ocfs2_insert_type *type)
790 {
791 @@ -3674,13 +3788,11 @@ static int ocfs2_do_insert_extent(struct
792 u32 cpos;
793 struct ocfs2_path *right_path = NULL;
794 struct ocfs2_path *left_path = NULL;
795 - struct ocfs2_dinode *di;
796 struct ocfs2_extent_list *el;
797
798 - di = (struct ocfs2_dinode *) di_bh->b_data;
799 - el = &di->id2.i_list;
800 + el = et->root_el;
801
802 - ret = ocfs2_journal_access(handle, inode, di_bh,
803 + ret = ocfs2_journal_access(handle, inode, et->root_bh,
804 OCFS2_JOURNAL_ACCESS_WRITE);
805 if (ret) {
806 mlog_errno(ret);
807 @@ -3692,7 +3804,7 @@ static int ocfs2_do_insert_extent(struct
808 goto out_update_clusters;
809 }
810
811 - right_path = ocfs2_new_inode_path(di_bh);
812 + right_path = ocfs2_new_path(et->root_bh, et->root_el);
813 if (!right_path) {
814 ret = -ENOMEM;
815 mlog_errno(ret);
816 @@ -3742,7 +3854,7 @@ static int ocfs2_do_insert_extent(struct
817 * ocfs2_rotate_tree_right() might have extended the
818 * transaction without re-journaling our tree root.
819 */
820 - ret = ocfs2_journal_access(handle, inode, di_bh,
821 + ret = ocfs2_journal_access(handle, inode, et->root_bh,
822 OCFS2_JOURNAL_ACCESS_WRITE);
823 if (ret) {
824 mlog_errno(ret);
825 @@ -3767,10 +3879,10 @@ static int ocfs2_do_insert_extent(struct
826
827 out_update_clusters:
828 if (type->ins_split == SPLIT_NONE)
829 - ocfs2_update_dinode_clusters(inode, di,
830 - le16_to_cpu(insert_rec->e_leaf_clusters));
831 + ocfs2_update_clusters(inode, et,
832 + le16_to_cpu(insert_rec->e_leaf_clusters));
833
834 - ret = ocfs2_journal_dirty(handle, di_bh);
835 + ret = ocfs2_journal_dirty(handle, et->root_bh);
836 if (ret)
837 mlog_errno(ret);
838
839 @@ -3924,8 +4036,8 @@ static void ocfs2_figure_contig_type(str
840 * ocfs2_figure_appending_type() will figure out whether we'll have to
841 * insert at the tail of the rightmost leaf.
842 *
843 - * This should also work against the dinode list for tree's with 0
844 - * depth. If we consider the dinode list to be the rightmost leaf node
845 + * This should also work against the root extent list for tree's with 0
846 + * depth. If we consider the root extent list to be the rightmost leaf node
847 * then the logic here makes sense.
848 */
849 static void ocfs2_figure_appending_type(struct ocfs2_insert_type *insert,
850 @@ -3976,14 +4088,13 @@ set_tail_append:
851 * structure.
852 */
853 static int ocfs2_figure_insert_type(struct inode *inode,
854 - struct buffer_head *di_bh,
855 + struct ocfs2_extent_tree *et,
856 struct buffer_head **last_eb_bh,
857 struct ocfs2_extent_rec *insert_rec,
858 int *free_records,
859 struct ocfs2_insert_type *insert)
860 {
861 int ret;
862 - struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
863 struct ocfs2_extent_block *eb;
864 struct ocfs2_extent_list *el;
865 struct ocfs2_path *path = NULL;
866 @@ -3991,7 +4102,7 @@ static int ocfs2_figure_insert_type(stru
867
868 insert->ins_split = SPLIT_NONE;
869
870 - el = &di->id2.i_list;
871 + el = et->root_el;
872 insert->ins_tree_depth = le16_to_cpu(el->l_tree_depth);
873
874 if (el->l_tree_depth) {
875 @@ -4002,7 +4113,7 @@ static int ocfs2_figure_insert_type(stru
876 * may want it later.
877 */
878 ret = ocfs2_read_block(OCFS2_SB(inode->i_sb),
879 - le64_to_cpu(di->i_last_eb_blk), &bh,
880 + ocfs2_get_last_eb_blk(et), &bh,
881 OCFS2_BH_CACHED, inode);
882 if (ret) {
883 mlog_exit(ret);
884 @@ -4029,7 +4140,7 @@ static int ocfs2_figure_insert_type(stru
885 return 0;
886 }
887
888 - path = ocfs2_new_inode_path(di_bh);
889 + path = ocfs2_new_path(et->root_bh, et->root_el);
890 if (!path) {
891 ret = -ENOMEM;
892 mlog_errno(ret);
893 @@ -4079,7 +4190,8 @@ static int ocfs2_figure_insert_type(stru
894 * the case that we're doing a tail append, so maybe we can
895 * take advantage of that information somehow.
896 */
897 - if (le64_to_cpu(di->i_last_eb_blk) == path_leaf_bh(path)->b_blocknr) {
898 + if (ocfs2_get_last_eb_blk(et) ==
899 + path_leaf_bh(path)->b_blocknr) {
900 /*
901 * Ok, ocfs2_find_path() returned us the rightmost
902 * tree path. This might be an appending insert. There are
903 @@ -4109,21 +4221,30 @@ out:
904 int ocfs2_insert_extent(struct ocfs2_super *osb,
905 handle_t *handle,
906 struct inode *inode,
907 - struct buffer_head *fe_bh,
908 + struct buffer_head *root_bh,
909 u32 cpos,
910 u64 start_blk,
911 u32 new_clusters,
912 u8 flags,
913 - struct ocfs2_alloc_context *meta_ac)
914 + struct ocfs2_alloc_context *meta_ac,
915 + enum ocfs2_extent_tree_type et_type)
916 {
917 int status;
918 int uninitialized_var(free_records);
919 struct buffer_head *last_eb_bh = NULL;
920 struct ocfs2_insert_type insert = {0, };
921 struct ocfs2_extent_rec rec;
922 + struct ocfs2_extent_tree *et = NULL;
923
924 BUG_ON(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL);
925
926 + et = ocfs2_new_extent_tree(root_bh, et_type);
927 + if (!et) {
928 + status = -ENOMEM;
929 + mlog_errno(status);
930 + goto bail;
931 + }
932 +
933 mlog(0, "add %u clusters at position %u to inode %llu\n",
934 new_clusters, cpos, (unsigned long long)OCFS2_I(inode)->ip_blkno);
935
936 @@ -4141,7 +4262,7 @@ int ocfs2_insert_extent(struct ocfs2_sup
937 rec.e_leaf_clusters = cpu_to_le16(new_clusters);
938 rec.e_flags = flags;
939
940 - status = ocfs2_figure_insert_type(inode, fe_bh, &last_eb_bh, &rec,
941 + status = ocfs2_figure_insert_type(inode, et, &last_eb_bh, &rec,
942 &free_records, &insert);
943 if (status < 0) {
944 mlog_errno(status);
945 @@ -4155,7 +4276,7 @@ int ocfs2_insert_extent(struct ocfs2_sup
946 free_records, insert.ins_tree_depth);
947
948 if (insert.ins_contig == CONTIG_NONE && free_records == 0) {
949 - status = ocfs2_grow_tree(inode, handle, fe_bh,
950 + status = ocfs2_grow_tree(inode, handle, et,
951 &insert.ins_tree_depth, &last_eb_bh,
952 meta_ac);
953 if (status) {
954 @@ -4165,16 +4286,18 @@ int ocfs2_insert_extent(struct ocfs2_sup
955 }
956
957 /* Finally, we can add clusters. This might rotate the tree for us. */
958 - status = ocfs2_do_insert_extent(inode, handle, fe_bh, &rec, &insert);
959 + status = ocfs2_do_insert_extent(inode, handle, et, &rec, &insert);
960 if (status < 0)
961 mlog_errno(status);
962 - else
963 + else if (et->type == OCFS2_DINODE_EXTENT)
964 ocfs2_extent_map_insert_rec(inode, &rec);
965
966 bail:
967 if (last_eb_bh)
968 brelse(last_eb_bh);
969
970 + if (et)
971 + ocfs2_free_extent_tree(et);
972 mlog_exit(status);
973 return status;
974 }
975 @@ -4202,7 +4325,7 @@ static void ocfs2_make_right_split_rec(s
976 static int ocfs2_split_and_insert(struct inode *inode,
977 handle_t *handle,
978 struct ocfs2_path *path,
979 - struct buffer_head *di_bh,
980 + struct ocfs2_extent_tree *et,
981 struct buffer_head **last_eb_bh,
982 int split_index,
983 struct ocfs2_extent_rec *orig_split_rec,
984 @@ -4216,7 +4339,6 @@ static int ocfs2_split_and_insert(struct
985 struct ocfs2_extent_rec split_rec = *orig_split_rec;
986 struct ocfs2_insert_type insert;
987 struct ocfs2_extent_block *eb;
988 - struct ocfs2_dinode *di;
989
990 leftright:
991 /*
992 @@ -4225,8 +4347,7 @@ leftright:
993 */
994 rec = path_leaf_el(path)->l_recs[split_index];
995
996 - di = (struct ocfs2_dinode *)di_bh->b_data;
997 - rightmost_el = &di->id2.i_list;
998 + rightmost_el = et->root_el;
999
1000 depth = le16_to_cpu(rightmost_el->l_tree_depth);
1001 if (depth) {
1002 @@ -4237,8 +4358,8 @@ leftright:
1003
1004 if (le16_to_cpu(rightmost_el->l_next_free_rec) ==
1005 le16_to_cpu(rightmost_el->l_count)) {
1006 - ret = ocfs2_grow_tree(inode, handle, di_bh, &depth, last_eb_bh,
1007 - meta_ac);
1008 + ret = ocfs2_grow_tree(inode, handle, et,
1009 + &depth, last_eb_bh, meta_ac);
1010 if (ret) {
1011 mlog_errno(ret);
1012 goto out;
1013 @@ -4275,8 +4396,7 @@ leftright:
1014 do_leftright = 1;
1015 }
1016
1017 - ret = ocfs2_do_insert_extent(inode, handle, di_bh, &split_rec,
1018 - &insert);
1019 + ret = ocfs2_do_insert_extent(inode, handle, et, &split_rec, &insert);
1020 if (ret) {
1021 mlog_errno(ret);
1022 goto out;
1023 @@ -4318,8 +4438,9 @@ out:
1024 * of the tree is required. All other cases will degrade into a less
1025 * optimal tree layout.
1026 *
1027 - * last_eb_bh should be the rightmost leaf block for any inode with a
1028 - * btree. Since a split may grow the tree or a merge might shrink it, the caller cannot trust the contents of that buffer after this call.
1029 + * last_eb_bh should be the rightmost leaf block for any extent
1030 + * btree. Since a split may grow the tree or a merge might shrink it,
1031 + * the caller cannot trust the contents of that buffer after this call.
1032 *
1033 * This code is optimized for readability - several passes might be
1034 * made over certain portions of the tree. All of those blocks will
1035 @@ -4327,7 +4448,7 @@ out:
1036 * extra overhead is not expressed in terms of disk reads.
1037 */
1038 static int __ocfs2_mark_extent_written(struct inode *inode,
1039 - struct buffer_head *di_bh,
1040 + struct ocfs2_extent_tree *et,
1041 handle_t *handle,
1042 struct ocfs2_path *path,
1043 int split_index,
1044 @@ -4367,10 +4488,9 @@ static int __ocfs2_mark_extent_written(s
1045 */
1046 if (path->p_tree_depth) {
1047 struct ocfs2_extent_block *eb;
1048 - struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
1049
1050 ret = ocfs2_read_block(OCFS2_SB(inode->i_sb),
1051 - le64_to_cpu(di->i_last_eb_blk),
1052 + ocfs2_get_last_eb_blk(et),
1053 &last_eb_bh, OCFS2_BH_CACHED, inode);
1054 if (ret) {
1055 mlog_exit(ret);
1056 @@ -4404,7 +4524,7 @@ static int __ocfs2_mark_extent_written(s
1057 if (ctxt.c_split_covers_rec)
1058 el->l_recs[split_index] = *split_rec;
1059 else
1060 - ret = ocfs2_split_and_insert(inode, handle, path, di_bh,
1061 + ret = ocfs2_split_and_insert(inode, handle, path, et,
1062 &last_eb_bh, split_index,
1063 split_rec, meta_ac);
1064 if (ret)
1065 @@ -4412,7 +4532,7 @@ static int __ocfs2_mark_extent_written(s
1066 } else {
1067 ret = ocfs2_try_to_merge_extent(inode, handle, path,
1068 split_index, split_rec,
1069 - dealloc, &ctxt);
1070 + dealloc, &ctxt, et);
1071 if (ret)
1072 mlog_errno(ret);
1073 }
1074 @@ -4430,16 +4550,18 @@ out:
1075 *
1076 * The caller is responsible for passing down meta_ac if we'll need it.
1077 */
1078 -int ocfs2_mark_extent_written(struct inode *inode, struct buffer_head *di_bh,
1079 +int ocfs2_mark_extent_written(struct inode *inode, struct buffer_head *root_bh,
1080 handle_t *handle, u32 cpos, u32 len, u32 phys,
1081 struct ocfs2_alloc_context *meta_ac,
1082 - struct ocfs2_cached_dealloc_ctxt *dealloc)
1083 + struct ocfs2_cached_dealloc_ctxt *dealloc,
1084 + enum ocfs2_extent_tree_type et_type)
1085 {
1086 int ret, index;
1087 u64 start_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys);
1088 struct ocfs2_extent_rec split_rec;
1089 struct ocfs2_path *left_path = NULL;
1090 struct ocfs2_extent_list *el;
1091 + struct ocfs2_extent_tree *et = NULL;
1092
1093 mlog(0, "Inode %lu cpos %u, len %u, phys %u (%llu)\n",
1094 inode->i_ino, cpos, len, phys, (unsigned long long)start_blkno);
1095 @@ -4453,13 +4575,21 @@ int ocfs2_mark_extent_written(struct ino
1096 goto out;
1097 }
1098
1099 + et = ocfs2_new_extent_tree(root_bh, et_type);
1100 + if (!et) {
1101 + ret = -ENOMEM;
1102 + mlog_errno(ret);
1103 + goto out;
1104 + }
1105 +
1106 /*
1107 * XXX: This should be fixed up so that we just re-insert the
1108 * next extent records.
1109 */
1110 - ocfs2_extent_map_trunc(inode, 0);
1111 + if (et_type == OCFS2_DINODE_EXTENT)
1112 + ocfs2_extent_map_trunc(inode, 0);
1113
1114 - left_path = ocfs2_new_inode_path(di_bh);
1115 + left_path = ocfs2_new_path(et->root_bh, et->root_el);
1116 if (!left_path) {
1117 ret = -ENOMEM;
1118 mlog_errno(ret);
1119 @@ -4490,23 +4620,25 @@ int ocfs2_mark_extent_written(struct ino
1120 split_rec.e_flags = path_leaf_el(left_path)->l_recs[index].e_flags;
1121 split_rec.e_flags &= ~OCFS2_EXT_UNWRITTEN;
1122
1123 - ret = __ocfs2_mark_extent_written(inode, di_bh, handle, left_path,
1124 - index, &split_rec, meta_ac, dealloc);
1125 + ret = __ocfs2_mark_extent_written(inode, et, handle, left_path,
1126 + index, &split_rec, meta_ac,
1127 + dealloc);
1128 if (ret)
1129 mlog_errno(ret);
1130
1131 out:
1132 ocfs2_free_path(left_path);
1133 + if (et)
1134 + ocfs2_free_extent_tree(et);
1135 return ret;
1136 }
1137
1138 -static int ocfs2_split_tree(struct inode *inode, struct buffer_head *di_bh,
1139 +static int ocfs2_split_tree(struct inode *inode, struct ocfs2_extent_tree *et,
1140 handle_t *handle, struct ocfs2_path *path,
1141 int index, u32 new_range,
1142 struct ocfs2_alloc_context *meta_ac)
1143 {
1144 int ret, depth, credits = handle->h_buffer_credits;
1145 - struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
1146 struct buffer_head *last_eb_bh = NULL;
1147 struct ocfs2_extent_block *eb;
1148 struct ocfs2_extent_list *rightmost_el, *el;
1149 @@ -4524,7 +4656,7 @@ static int ocfs2_split_tree(struct inode
1150 depth = path->p_tree_depth;
1151 if (depth > 0) {
1152 ret = ocfs2_read_block(OCFS2_SB(inode->i_sb),
1153 - le64_to_cpu(di->i_last_eb_blk),
1154 + ocfs2_get_last_eb_blk(et),
1155 &last_eb_bh, OCFS2_BH_CACHED, inode);
1156 if (ret < 0) {
1157 mlog_errno(ret);
1158 @@ -4537,7 +4669,7 @@ static int ocfs2_split_tree(struct inode
1159 rightmost_el = path_leaf_el(path);
1160
1161 credits += path->p_tree_depth +
1162 - ocfs2_extend_meta_needed(&di->id2.i_list);
1163 + ocfs2_extend_meta_needed(et->root_el);
1164 ret = ocfs2_extend_trans(handle, credits);
1165 if (ret) {
1166 mlog_errno(ret);
1167 @@ -4546,7 +4678,7 @@ static int ocfs2_split_tree(struct inode
1168
1169 if (le16_to_cpu(rightmost_el->l_next_free_rec) ==
1170 le16_to_cpu(rightmost_el->l_count)) {
1171 - ret = ocfs2_grow_tree(inode, handle, di_bh, &depth, &last_eb_bh,
1172 + ret = ocfs2_grow_tree(inode, handle, et, &depth, &last_eb_bh,
1173 meta_ac);
1174 if (ret) {
1175 mlog_errno(ret);
1176 @@ -4560,7 +4692,7 @@ static int ocfs2_split_tree(struct inode
1177 insert.ins_split = SPLIT_RIGHT;
1178 insert.ins_tree_depth = depth;
1179
1180 - ret = ocfs2_do_insert_extent(inode, handle, di_bh, &split_rec, &insert);
1181 + ret = ocfs2_do_insert_extent(inode, handle, et, &split_rec, &insert);
1182 if (ret)
1183 mlog_errno(ret);
1184
1185 @@ -4572,7 +4704,8 @@ out:
1186 static int ocfs2_truncate_rec(struct inode *inode, handle_t *handle,
1187 struct ocfs2_path *path, int index,
1188 struct ocfs2_cached_dealloc_ctxt *dealloc,
1189 - u32 cpos, u32 len)
1190 + u32 cpos, u32 len,
1191 + struct ocfs2_extent_tree *et)
1192 {
1193 int ret;
1194 u32 left_cpos, rec_range, trunc_range;
1195 @@ -4584,7 +4717,7 @@ static int ocfs2_truncate_rec(struct ino
1196 struct ocfs2_extent_block *eb;
1197
1198 if (ocfs2_is_empty_extent(&el->l_recs[0]) && index > 0) {
1199 - ret = ocfs2_rotate_tree_left(inode, handle, path, dealloc);
1200 + ret = ocfs2_rotate_tree_left(inode, handle, path, dealloc, et);
1201 if (ret) {
1202 mlog_errno(ret);
1203 goto out;
1204 @@ -4715,7 +4848,7 @@ static int ocfs2_truncate_rec(struct ino
1205
1206 ocfs2_journal_dirty(handle, path_leaf_bh(path));
1207
1208 - ret = ocfs2_rotate_tree_left(inode, handle, path, dealloc);
1209 + ret = ocfs2_rotate_tree_left(inode, handle, path, dealloc, et);
1210 if (ret) {
1211 mlog_errno(ret);
1212 goto out;
1213 @@ -4726,20 +4859,29 @@ out:
1214 return ret;
1215 }
1216
1217 -int ocfs2_remove_extent(struct inode *inode, struct buffer_head *di_bh,
1218 +int ocfs2_remove_extent(struct inode *inode, struct buffer_head *root_bh,
1219 u32 cpos, u32 len, handle_t *handle,
1220 struct ocfs2_alloc_context *meta_ac,
1221 - struct ocfs2_cached_dealloc_ctxt *dealloc)
1222 + struct ocfs2_cached_dealloc_ctxt *dealloc,
1223 + enum ocfs2_extent_tree_type et_type)
1224 {
1225 int ret, index;
1226 u32 rec_range, trunc_range;
1227 struct ocfs2_extent_rec *rec;
1228 struct ocfs2_extent_list *el;
1229 - struct ocfs2_path *path;
1230 + struct ocfs2_path *path = NULL;
1231 + struct ocfs2_extent_tree *et = NULL;
1232 +
1233 + et = ocfs2_new_extent_tree(root_bh, et_type);
1234 + if (!et) {
1235 + ret = -ENOMEM;
1236 + mlog_errno(ret);
1237 + goto out;
1238 + }
1239
1240 ocfs2_extent_map_trunc(inode, 0);
1241
1242 - path = ocfs2_new_inode_path(di_bh);
1243 + path = ocfs2_new_path(et->root_bh, et->root_el);
1244 if (!path) {
1245 ret = -ENOMEM;
1246 mlog_errno(ret);
1247 @@ -4792,13 +4934,13 @@ int ocfs2_remove_extent(struct inode *in
1248
1249 if (le32_to_cpu(rec->e_cpos) == cpos || rec_range == trunc_range) {
1250 ret = ocfs2_truncate_rec(inode, handle, path, index, dealloc,
1251 - cpos, len);
1252 + cpos, len, et);
1253 if (ret) {
1254 mlog_errno(ret);
1255 goto out;
1256 }
1257 } else {
1258 - ret = ocfs2_split_tree(inode, di_bh, handle, path, index,
1259 + ret = ocfs2_split_tree(inode, et, handle, path, index,
1260 trunc_range, meta_ac);
1261 if (ret) {
1262 mlog_errno(ret);
1263 @@ -4847,7 +4989,7 @@ int ocfs2_remove_extent(struct inode *in
1264 }
1265
1266 ret = ocfs2_truncate_rec(inode, handle, path, index, dealloc,
1267 - cpos, len);
1268 + cpos, len, et);
1269 if (ret) {
1270 mlog_errno(ret);
1271 goto out;
1272 @@ -4856,6 +4998,8 @@ int ocfs2_remove_extent(struct inode *in
1273
1274 out:
1275 ocfs2_free_path(path);
1276 + if (et)
1277 + ocfs2_free_extent_tree(et);
1278 return ret;
1279 }
1280
1281 @@ -6364,7 +6508,8 @@ int ocfs2_convert_inline_data_to_extents
1282 * the in-inode data from our pages.
1283 */
1284 ret = ocfs2_insert_extent(osb, handle, inode, di_bh,
1285 - 0, block, 1, 0, NULL);
1286 + 0, block, 1, 0,
1287 + NULL, OCFS2_DINODE_EXTENT);
1288 if (ret) {
1289 mlog_errno(ret);
1290 goto out_commit;
1291 @@ -6406,13 +6551,14 @@ int ocfs2_commit_truncate(struct ocfs2_s
1292 handle_t *handle = NULL;
1293 struct inode *tl_inode = osb->osb_tl_inode;
1294 struct ocfs2_path *path = NULL;
1295 + struct ocfs2_dinode *di = (struct ocfs2_dinode *)fe_bh->b_data;
1296
1297 mlog_entry_void();
1298
1299 new_highest_cpos = ocfs2_clusters_for_bytes(osb->sb,
1300 i_size_read(inode));
1301
1302 - path = ocfs2_new_inode_path(fe_bh);
1303 + path = ocfs2_new_path(fe_bh, &di->id2.i_list);
1304 if (!path) {
1305 status = -ENOMEM;
1306 mlog_errno(status);
1307 Index: linux-2.6.26/fs/ocfs2/alloc.h
1308 ===================================================================
1309 --- linux-2.6.26.orig/fs/ocfs2/alloc.h
1310 +++ linux-2.6.26/fs/ocfs2/alloc.h
1311 @@ -26,28 +26,37 @@
1312 #ifndef OCFS2_ALLOC_H
1313 #define OCFS2_ALLOC_H
1314
1315 +enum ocfs2_extent_tree_type {
1316 + OCFS2_DINODE_EXTENT = 0,
1317 +};
1318 +
1319 struct ocfs2_alloc_context;
1320 int ocfs2_insert_extent(struct ocfs2_super *osb,
1321 handle_t *handle,
1322 struct inode *inode,
1323 - struct buffer_head *fe_bh,
1324 + struct buffer_head *root_bh,
1325 u32 cpos,
1326 u64 start_blk,
1327 u32 new_clusters,
1328 u8 flags,
1329 - struct ocfs2_alloc_context *meta_ac);
1330 + struct ocfs2_alloc_context *meta_ac,
1331 + enum ocfs2_extent_tree_type et_type);
1332 struct ocfs2_cached_dealloc_ctxt;
1333 -int ocfs2_mark_extent_written(struct inode *inode, struct buffer_head *di_bh,
1334 +int ocfs2_mark_extent_written(struct inode *inode, struct buffer_head *root_bh,
1335 handle_t *handle, u32 cpos, u32 len, u32 phys,
1336 struct ocfs2_alloc_context *meta_ac,
1337 - struct ocfs2_cached_dealloc_ctxt *dealloc);
1338 -int ocfs2_remove_extent(struct inode *inode, struct buffer_head *di_bh,
1339 + struct ocfs2_cached_dealloc_ctxt *dealloc,
1340 + enum ocfs2_extent_tree_type et_type);
1341 +int ocfs2_remove_extent(struct inode *inode, struct buffer_head *root_bh,
1342 u32 cpos, u32 len, handle_t *handle,
1343 struct ocfs2_alloc_context *meta_ac,
1344 - struct ocfs2_cached_dealloc_ctxt *dealloc);
1345 + struct ocfs2_cached_dealloc_ctxt *dealloc,
1346 + enum ocfs2_extent_tree_type et_type);
1347 int ocfs2_num_free_extents(struct ocfs2_super *osb,
1348 struct inode *inode,
1349 - struct buffer_head *bh);
1350 + struct buffer_head *root_bh,
1351 + enum ocfs2_extent_tree_type et_type);
1352 +
1353 /*
1354 * how many new metadata chunks would an allocation need at maximum?
1355 *
1356 Index: linux-2.6.26/fs/ocfs2/aops.c
1357 ===================================================================
1358 --- linux-2.6.26.orig/fs/ocfs2/aops.c
1359 +++ linux-2.6.26/fs/ocfs2/aops.c
1360 @@ -1278,7 +1278,8 @@ static int ocfs2_write_cluster(struct ad
1361 } else if (unwritten) {
1362 ret = ocfs2_mark_extent_written(inode, wc->w_di_bh,
1363 wc->w_handle, cpos, 1, phys,
1364 - meta_ac, &wc->w_dealloc);
1365 + meta_ac, &wc->w_dealloc,
1366 + OCFS2_DINODE_EXTENT);
1367 if (ret < 0) {
1368 mlog_errno(ret);
1369 goto out;
1370 @@ -1712,7 +1713,13 @@ int ocfs2_write_begin_nolock(struct addr
1371 * ocfs2_lock_allocators(). It greatly over-estimates
1372 * the work to be done.
1373 */
1374 - ret = ocfs2_lock_allocators(inode, wc->w_di_bh,
1375 + mlog(0, "extend inode %llu, i_size = %lld, di->i_clusters = %u,"
1376 + " clusters_to_add = %u, extents_to_split = %u\n",
1377 + (unsigned long long)OCFS2_I(inode)->ip_blkno,
1378 + (long long)i_size_read(inode), le32_to_cpu(di->i_clusters),
1379 + clusters_to_alloc, extents_to_split);
1380 +
1381 + ret = ocfs2_lock_allocators(inode, wc->w_di_bh, &di->id2.i_list,
1382 clusters_to_alloc, extents_to_split,
1383 &data_ac, &meta_ac);
1384 if (ret) {
1385 Index: linux-2.6.26/fs/ocfs2/dir.c
1386 ===================================================================
1387 --- linux-2.6.26.orig/fs/ocfs2/dir.c
1388 +++ linux-2.6.26/fs/ocfs2/dir.c
1389 @@ -1306,7 +1306,7 @@ static int ocfs2_expand_inline_dir(struc
1390 * related blocks have been journaled already.
1391 */
1392 ret = ocfs2_insert_extent(osb, handle, dir, di_bh, 0, blkno, len, 0,
1393 - NULL);
1394 + NULL, OCFS2_DINODE_EXTENT);
1395 if (ret) {
1396 mlog_errno(ret);
1397 goto out_commit;
1398 @@ -1338,7 +1338,7 @@ static int ocfs2_expand_inline_dir(struc
1399 blkno = ocfs2_clusters_to_blocks(dir->i_sb, bit_off);
1400
1401 ret = ocfs2_insert_extent(osb, handle, dir, di_bh, 1, blkno,
1402 - len, 0, NULL);
1403 + len, 0, NULL, OCFS2_DINODE_EXTENT);
1404 if (ret) {
1405 mlog_errno(ret);
1406 goto out_commit;
1407 @@ -1481,7 +1481,8 @@ static int ocfs2_extend_dir(struct ocfs2
1408 if (dir_i_size == ocfs2_clusters_to_bytes(sb, OCFS2_I(dir)->ip_clusters)) {
1409 spin_unlock(&OCFS2_I(dir)->ip_lock);
1410 num_free_extents = ocfs2_num_free_extents(osb, dir,
1411 - parent_fe_bh);
1412 + parent_fe_bh,
1413 + OCFS2_DINODE_EXTENT);
1414 if (num_free_extents < 0) {
1415 status = num_free_extents;
1416 mlog_errno(status);
1417 Index: linux-2.6.26/fs/ocfs2/file.c
1418 ===================================================================
1419 --- linux-2.6.26.orig/fs/ocfs2/file.c
1420 +++ linux-2.6.26/fs/ocfs2/file.c
1421 @@ -521,7 +521,8 @@ int ocfs2_do_extend_allocation(struct oc
1422 if (mark_unwritten)
1423 flags = OCFS2_EXT_UNWRITTEN;
1424
1425 - free_extents = ocfs2_num_free_extents(osb, inode, fe_bh);
1426 + free_extents = ocfs2_num_free_extents(osb, inode, fe_bh,
1427 + OCFS2_DINODE_EXTENT);
1428 if (free_extents < 0) {
1429 status = free_extents;
1430 mlog_errno(status);
1431 @@ -570,7 +571,7 @@ int ocfs2_do_extend_allocation(struct oc
1432 num_bits, bit_off, (unsigned long long)OCFS2_I(inode)->ip_blkno);
1433 status = ocfs2_insert_extent(osb, handle, inode, fe_bh,
1434 *logical_offset, block, num_bits,
1435 - flags, meta_ac);
1436 + flags, meta_ac, OCFS2_DINODE_EXTENT);
1437 if (status < 0) {
1438 mlog_errno(status);
1439 goto leave;
1440 @@ -599,92 +600,6 @@ leave:
1441 return status;
1442 }
1443
1444 -/*
1445 - * For a given allocation, determine which allocators will need to be
1446 - * accessed, and lock them, reserving the appropriate number of bits.
1447 - *
1448 - * Sparse file systems call this from ocfs2_write_begin_nolock()
1449 - * and ocfs2_allocate_unwritten_extents().
1450 - *
1451 - * File systems which don't support holes call this from
1452 - * ocfs2_extend_allocation().
1453 - */
1454 -int ocfs2_lock_allocators(struct inode *inode, struct buffer_head *di_bh,
1455 - u32 clusters_to_add, u32 extents_to_split,
1456 - struct ocfs2_alloc_context **data_ac,
1457 - struct ocfs2_alloc_context **meta_ac)
1458 -{
1459 - int ret = 0, num_free_extents;
1460 - unsigned int max_recs_needed = clusters_to_add + 2 * extents_to_split;
1461 - struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1462 - struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
1463 -
1464 - *meta_ac = NULL;
1465 - if (data_ac)
1466 - *data_ac = NULL;
1467 -
1468 - BUG_ON(clusters_to_add != 0 && data_ac == NULL);
1469 -
1470 - mlog(0, "extend inode %llu, i_size = %lld, di->i_clusters = %u, "
1471 - "clusters_to_add = %u, extents_to_split = %u\n",
1472 - (unsigned long long)OCFS2_I(inode)->ip_blkno, (long long)i_size_read(inode),
1473 - le32_to_cpu(di->i_clusters), clusters_to_add, extents_to_split);
1474 -
1475 - num_free_extents = ocfs2_num_free_extents(osb, inode, di_bh);
1476 - if (num_free_extents < 0) {
1477 - ret = num_free_extents;
1478 - mlog_errno(ret);
1479 - goto out;
1480 - }
1481 -
1482 - /*
1483 - * Sparse allocation file systems need to be more conservative
1484 - * with reserving room for expansion - the actual allocation
1485 - * happens while we've got a journal handle open so re-taking
1486 - * a cluster lock (because we ran out of room for another
1487 - * extent) will violate ordering rules.
1488 - *
1489 - * Most of the time we'll only be seeing this 1 cluster at a time
1490 - * anyway.
1491 - *
1492 - * Always lock for any unwritten extents - we might want to
1493 - * add blocks during a split.
1494 - */
1495 - if (!num_free_extents ||
1496 - (ocfs2_sparse_alloc(osb) && num_free_extents < max_recs_needed)) {
1497 - ret = ocfs2_reserve_new_metadata(osb, &di->id2.i_list, meta_ac);
1498 - if (ret < 0) {
1499 - if (ret != -ENOSPC)
1500 - mlog_errno(ret);
1501 - goto out;
1502 - }
1503 - }
1504 -
1505 - if (clusters_to_add == 0)
1506 - goto out;
1507 -
1508 - ret = ocfs2_reserve_clusters(osb, clusters_to_add, data_ac);
1509 - if (ret < 0) {
1510 - if (ret != -ENOSPC)
1511 - mlog_errno(ret);
1512 - goto out;
1513 - }
1514 -
1515 -out:
1516 - if (ret) {
1517 - if (*meta_ac) {
1518 - ocfs2_free_alloc_context(*meta_ac);
1519 - *meta_ac = NULL;
1520 - }
1521 -
1522 - /*
1523 - * We cannot have an error and a non null *data_ac.
1524 - */
1525 - }
1526 -
1527 - return ret;
1528 -}
1529 -
1530 static int __ocfs2_extend_allocation(struct inode *inode, u32 logical_start,
1531 u32 clusters_to_add, int mark_unwritten)
1532 {
1533 @@ -725,7 +640,13 @@ static int __ocfs2_extend_allocation(str
1534 restart_all:
1535 BUG_ON(le32_to_cpu(fe->i_clusters) != OCFS2_I(inode)->ip_clusters);
1536
1537 - status = ocfs2_lock_allocators(inode, bh, clusters_to_add, 0, &data_ac,
1538 + mlog(0, "extend inode %llu, i_size = %lld, di->i_clusters = %u, "
1539 + "clusters_to_add = %u\n",
1540 + (unsigned long long)OCFS2_I(inode)->ip_blkno,
1541 + (long long)i_size_read(inode), le32_to_cpu(fe->i_clusters),
1542 + clusters_to_add);
1543 + status = ocfs2_lock_allocators(inode, bh, &fe->id2.i_list,
1544 + clusters_to_add, 0, &data_ac,
1545 &meta_ac);
1546 if (status) {
1547 mlog_errno(status);
1548 @@ -1397,7 +1318,8 @@ static int __ocfs2_remove_inode_range(st
1549 struct ocfs2_alloc_context *meta_ac = NULL;
1550 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
1551
1552 - ret = ocfs2_lock_allocators(inode, di_bh, 0, 1, NULL, &meta_ac);
1553 + ret = ocfs2_lock_allocators(inode, di_bh, &di->id2.i_list,
1554 + 0, 1, NULL, &meta_ac);
1555 if (ret) {
1556 mlog_errno(ret);
1557 return ret;
1558 @@ -1428,7 +1350,7 @@ static int __ocfs2_remove_inode_range(st
1559 }
1560
1561 ret = ocfs2_remove_extent(inode, di_bh, cpos, len, handle, meta_ac,
1562 - dealloc);
1563 + dealloc, OCFS2_DINODE_EXTENT);
1564 if (ret) {
1565 mlog_errno(ret);
1566 goto out_commit;
1567 Index: linux-2.6.26/fs/ocfs2/file.h
1568 ===================================================================
1569 --- linux-2.6.26.orig/fs/ocfs2/file.h
1570 +++ linux-2.6.26/fs/ocfs2/file.h
1571 @@ -55,10 +55,6 @@ int ocfs2_do_extend_allocation(struct oc
1572 enum ocfs2_alloc_restarted *reason_ret);
1573 int ocfs2_extend_no_holes(struct inode *inode, u64 new_i_size,
1574 u64 zero_to);
1575 -int ocfs2_lock_allocators(struct inode *inode, struct buffer_head *fe,
1576 - u32 clusters_to_add, u32 extents_to_split,
1577 - struct ocfs2_alloc_context **data_ac,
1578 - struct ocfs2_alloc_context **meta_ac);
1579 int ocfs2_setattr(struct dentry *dentry, struct iattr *attr);
1580 int ocfs2_getattr(struct vfsmount *mnt, struct dentry *dentry,
1581 struct kstat *stat);
1582 Index: linux-2.6.26/fs/ocfs2/suballoc.c
1583 ===================================================================
1584 --- linux-2.6.26.orig/fs/ocfs2/suballoc.c
1585 +++ linux-2.6.26/fs/ocfs2/suballoc.c
1586 @@ -1891,3 +1891,85 @@ static inline void ocfs2_debug_suballoc_
1587 (unsigned long long)fe->id2.i_chain.cl_recs[i].c_blkno);
1588 }
1589 }
1590 +
1591 +/*
1592 + * For a given allocation, determine which allocators will need to be
1593 + * accessed, and lock them, reserving the appropriate number of bits.
1594 + *
1595 + * Sparse file systems call this from ocfs2_write_begin_nolock()
1596 + * and ocfs2_allocate_unwritten_extents().
1597 + *
1598 + * File systems which don't support holes call this from
1599 + * ocfs2_extend_allocation().
1600 + */
1601 +int ocfs2_lock_allocators(struct inode *inode, struct buffer_head *root_bh,
1602 + struct ocfs2_extent_list *root_el,
1603 + u32 clusters_to_add, u32 extents_to_split,
1604 + struct ocfs2_alloc_context **data_ac,
1605 + struct ocfs2_alloc_context **meta_ac)
1606 +{
1607 + int ret = 0, num_free_extents;
1608 + unsigned int max_recs_needed = clusters_to_add + 2 * extents_to_split;
1609 + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1610 +
1611 + *meta_ac = NULL;
1612 + if (data_ac)
1613 + *data_ac = NULL;
1614 +
1615 + BUG_ON(clusters_to_add != 0 && data_ac == NULL);
1616 +
1617 + num_free_extents = ocfs2_num_free_extents(osb, inode, root_bh,
1618 + OCFS2_DINODE_EXTENT);
1619 + if (num_free_extents < 0) {
1620 + ret = num_free_extents;
1621 + mlog_errno(ret);
1622 + goto out;
1623 + }
1624 +
1625 + /*
1626 + * Sparse allocation file systems need to be more conservative
1627 + * with reserving room for expansion - the actual allocation
1628 + * happens while we've got a journal handle open so re-taking
1629 + * a cluster lock (because we ran out of room for another
1630 + * extent) will violate ordering rules.
1631 + *
1632 + * Most of the time we'll only be seeing this 1 cluster at a time
1633 + * anyway.
1634 + *
1635 + * Always lock for any unwritten extents - we might want to
1636 + * add blocks during a split.
1637 + */
1638 + if (!num_free_extents ||
1639 + (ocfs2_sparse_alloc(osb) && num_free_extents < max_recs_needed)) {
1640 + ret = ocfs2_reserve_new_metadata(osb, root_el, meta_ac);
1641 + if (ret < 0) {
1642 + if (ret != -ENOSPC)
1643 + mlog_errno(ret);
1644 + goto out;
1645 + }
1646 + }
1647 +
1648 + if (clusters_to_add == 0)
1649 + goto out;
1650 +
1651 + ret = ocfs2_reserve_clusters(osb, clusters_to_add, data_ac);
1652 + if (ret < 0) {
1653 + if (ret != -ENOSPC)
1654 + mlog_errno(ret);
1655 + goto out;
1656 + }
1657 +
1658 +out:
1659 + if (ret) {
1660 + if (*meta_ac) {
1661 + ocfs2_free_alloc_context(*meta_ac);
1662 + *meta_ac = NULL;
1663 + }
1664 +
1665 + /*
1666 + * We cannot have an error and a non null *data_ac.
1667 + */
1668 + }
1669 +
1670 + return ret;
1671 +}
1672 Index: linux-2.6.26/fs/ocfs2/suballoc.h
1673 ===================================================================
1674 --- linux-2.6.26.orig/fs/ocfs2/suballoc.h
1675 +++ linux-2.6.26/fs/ocfs2/suballoc.h
1676 @@ -161,4 +161,9 @@ u64 ocfs2_which_cluster_group(struct ino
1677 int ocfs2_check_group_descriptor(struct super_block *sb,
1678 struct ocfs2_dinode *di,
1679 struct ocfs2_group_desc *gd);
1680 +int ocfs2_lock_allocators(struct inode *inode, struct buffer_head *root_bh,
1681 + struct ocfs2_extent_list *root_el,
1682 + u32 clusters_to_add, u32 extents_to_split,
1683 + struct ocfs2_alloc_context **data_ac,
1684 + struct ocfs2_alloc_context **meta_ac);
1685 #endif /* _CHAINALLOC_H_ */