]> git.ipfire.org Git - people/pmueller/ipfire-2.x.git/blob - src/patches/suse-2.6.27.25/patches.fixes/ext4-fixes-2.6.28-rc8.patch
Updated xen patches taken from suse.
[people/pmueller/ipfire-2.x.git] / src / patches / suse-2.6.27.25 / patches.fixes / ext4-fixes-2.6.28-rc8.patch
1 From: Mingming Cao <cmm@us.ibm.com>
2 Subject: Ext4 update
3 References: fate#303783
4
5 Bring ext4 codebase to the state of 2.6.28-rc8. It has lots of bugfixes, some
6 of them really important ones (data corruption, easily triggerable kernel
7 oopses with delayed allocation, ...).
8
9 Signed-off-by: Jan Kara <jack@suse.cz>
10
11 diff -rup b/fs/ext4//acl.h a/fs/ext4///acl.h
12 --- b/fs/ext4/acl.h 2009-02-11 14:37:58.000000000 +0100
13 +++ a/fs/ext4/acl.h 2009-02-10 21:40:14.000000000 +0100
14 @@ -51,18 +51,18 @@ static inline int ext4_acl_count(size_t
15 }
16 }
17
18 -#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL
19 +#if defined(CONFIG_EXT4_FS_POSIX_ACL) || defined(CONFIG_EXT4DEV_FS_POSIX_ACL)
20
21 /* Value for inode->u.ext4_i.i_acl and inode->u.ext4_i.i_default_acl
22 if the ACL has not been cached */
23 #define EXT4_ACL_NOT_CACHED ((void *)-1)
24
25 /* acl.c */
26 -extern int ext4_permission (struct inode *, int);
27 -extern int ext4_acl_chmod (struct inode *);
28 -extern int ext4_init_acl (handle_t *, struct inode *, struct inode *);
29 +extern int ext4_permission(struct inode *, int);
30 +extern int ext4_acl_chmod(struct inode *);
31 +extern int ext4_init_acl(handle_t *, struct inode *, struct inode *);
32
33 -#else /* CONFIG_EXT4DEV_FS_POSIX_ACL */
34 +#else /* CONFIG_EXT4_FS_POSIX_ACL */
35 #include <linux/sched.h>
36 #define ext4_permission NULL
37
38 @@ -77,5 +77,5 @@ ext4_init_acl(handle_t *handle, struct i
39 {
40 return 0;
41 }
42 -#endif /* CONFIG_EXT4DEV_FS_POSIX_ACL */
43 +#endif /* CONFIG_EXT4_FS_POSIX_ACL */
44
45 diff -rup b/fs/ext4//balloc.c a/fs/ext4///balloc.c
46 --- b/fs/ext4/balloc.c 2009-02-11 14:37:58.000000000 +0100
47 +++ a/fs/ext4/balloc.c 2009-02-10 21:40:11.000000000 +0100
48 @@ -20,6 +20,7 @@
49 #include "ext4.h"
50 #include "ext4_jbd2.h"
51 #include "group.h"
52 +#include "mballoc.h"
53
54 /*
55 * balloc.c contains the blocks allocation and deallocation routines
56 @@ -83,6 +84,7 @@ static int ext4_group_used_meta_blocks(s
57 }
58 return used_blocks;
59 }
60 +
61 /* Initializes an uninitialized block bitmap if given, and returns the
62 * number of blocks free in the group. */
63 unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
64 @@ -99,10 +101,10 @@ unsigned ext4_init_block_bitmap(struct s
65 * essentially implementing a per-group read-only flag. */
66 if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) {
67 ext4_error(sb, __func__,
68 - "Checksum bad for group %lu\n", block_group);
69 - gdp->bg_free_blocks_count = 0;
70 - gdp->bg_free_inodes_count = 0;
71 - gdp->bg_itable_unused = 0;
72 + "Checksum bad for group %u\n", block_group);
73 + ext4_free_blks_set(sb, gdp, 0);
74 + ext4_free_inodes_set(sb, gdp, 0);
75 + ext4_itable_unused_set(sb, gdp, 0);
76 memset(bh->b_data, 0xff, sb->s_blocksize);
77 return 0;
78 }
79 @@ -132,7 +134,7 @@ unsigned ext4_init_block_bitmap(struct s
80 */
81 group_blocks = ext4_blocks_count(sbi->s_es) -
82 le32_to_cpu(sbi->s_es->s_first_data_block) -
83 - (EXT4_BLOCKS_PER_GROUP(sb) * (sbi->s_groups_count -1));
84 + (EXT4_BLOCKS_PER_GROUP(sb) * (sbi->s_groups_count - 1));
85 } else {
86 group_blocks = EXT4_BLOCKS_PER_GROUP(sb);
87 }
88 @@ -200,20 +202,20 @@ unsigned ext4_init_block_bitmap(struct s
89 * @bh: pointer to the buffer head to store the block
90 * group descriptor
91 */
92 -struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb,
93 +struct ext4_group_desc * ext4_get_group_desc(struct super_block *sb,
94 ext4_group_t block_group,
95 - struct buffer_head ** bh)
96 + struct buffer_head **bh)
97 {
98 - unsigned long group_desc;
99 - unsigned long offset;
100 - struct ext4_group_desc * desc;
101 + unsigned int group_desc;
102 + unsigned int offset;
103 + struct ext4_group_desc *desc;
104 struct ext4_sb_info *sbi = EXT4_SB(sb);
105
106 if (block_group >= sbi->s_groups_count) {
107 - ext4_error (sb, "ext4_get_group_desc",
108 - "block_group >= groups_count - "
109 - "block_group = %lu, groups_count = %lu",
110 - block_group, sbi->s_groups_count);
111 + ext4_error(sb, "ext4_get_group_desc",
112 + "block_group >= groups_count - "
113 + "block_group = %u, groups_count = %u",
114 + block_group, sbi->s_groups_count);
115
116 return NULL;
117 }
118 @@ -222,10 +224,10 @@ struct ext4_group_desc * ext4_get_group_
119 group_desc = block_group >> EXT4_DESC_PER_BLOCK_BITS(sb);
120 offset = block_group & (EXT4_DESC_PER_BLOCK(sb) - 1);
121 if (!sbi->s_group_desc[group_desc]) {
122 - ext4_error (sb, "ext4_get_group_desc",
123 - "Group descriptor not loaded - "
124 - "block_group = %lu, group_desc = %lu, desc = %lu",
125 - block_group, group_desc, offset);
126 + ext4_error(sb, "ext4_get_group_desc",
127 + "Group descriptor not loaded - "
128 + "block_group = %u, group_desc = %u, desc = %u",
129 + block_group, group_desc, offset);
130 return NULL;
131 }
132
133 @@ -302,8 +304,8 @@ err_out:
134 struct buffer_head *
135 ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group)
136 {
137 - struct ext4_group_desc * desc;
138 - struct buffer_head * bh = NULL;
139 + struct ext4_group_desc *desc;
140 + struct buffer_head *bh = NULL;
141 ext4_fsblk_t bitmap_blk;
142
143 desc = ext4_get_group_desc(sb, block_group, NULL);
144 @@ -314,27 +316,50 @@ ext4_read_block_bitmap(struct super_bloc
145 if (unlikely(!bh)) {
146 ext4_error(sb, __func__,
147 "Cannot read block bitmap - "
148 - "block_group = %lu, block_bitmap = %llu",
149 + "block_group = %u, block_bitmap = %llu",
150 block_group, bitmap_blk);
151 return NULL;
152 }
153 - if (bh_uptodate_or_lock(bh))
154 +
155 + if (bitmap_uptodate(bh))
156 return bh;
157
158 + lock_buffer(bh);
159 + if (bitmap_uptodate(bh)) {
160 + unlock_buffer(bh);
161 + return bh;
162 + }
163 spin_lock(sb_bgl_lock(EXT4_SB(sb), block_group));
164 if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
165 ext4_init_block_bitmap(sb, bh, block_group, desc);
166 + set_bitmap_uptodate(bh);
167 set_buffer_uptodate(bh);
168 - unlock_buffer(bh);
169 spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group));
170 + unlock_buffer(bh);
171 return bh;
172 }
173 spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group));
174 + if (buffer_uptodate(bh)) {
175 + /*
176 + * if not uninit if bh is uptodate,
177 + * bitmap is also uptodate
178 + */
179 + set_bitmap_uptodate(bh);
180 + unlock_buffer(bh);
181 + return bh;
182 + }
183 + /*
184 + * submit the buffer_head for read. We can
185 + * safely mark the bitmap as uptodate now.
186 + * We do it here so the bitmap uptodate bit
187 + * get set with buffer lock held.
188 + */
189 + set_bitmap_uptodate(bh);
190 if (bh_submit_read(bh) < 0) {
191 put_bh(bh);
192 ext4_error(sb, __func__,
193 "Cannot read block bitmap - "
194 - "block_group = %lu, block_bitmap = %llu",
195 + "block_group = %u, block_bitmap = %llu",
196 block_group, bitmap_blk);
197 return NULL;
198 }
199 @@ -345,356 +370,50 @@ ext4_read_block_bitmap(struct super_bloc
200 */
201 return bh;
202 }
203 -/*
204 - * The reservation window structure operations
205 - * --------------------------------------------
206 - * Operations include:
207 - * dump, find, add, remove, is_empty, find_next_reservable_window, etc.
208 - *
209 - * We use a red-black tree to represent per-filesystem reservation
210 - * windows.
211 - *
212 - */
213 -
214 -/**
215 - * __rsv_window_dump() -- Dump the filesystem block allocation reservation map
216 - * @rb_root: root of per-filesystem reservation rb tree
217 - * @verbose: verbose mode
218 - * @fn: function which wishes to dump the reservation map
219 - *
220 - * If verbose is turned on, it will print the whole block reservation
221 - * windows(start, end). Otherwise, it will only print out the "bad" windows,
222 - * those windows that overlap with their immediate neighbors.
223 - */
224 -#if 1
225 -static void __rsv_window_dump(struct rb_root *root, int verbose,
226 - const char *fn)
227 -{
228 - struct rb_node *n;
229 - struct ext4_reserve_window_node *rsv, *prev;
230 - int bad;
231 -
232 -restart:
233 - n = rb_first(root);
234 - bad = 0;
235 - prev = NULL;
236 -
237 - printk("Block Allocation Reservation Windows Map (%s):\n", fn);
238 - while (n) {
239 - rsv = rb_entry(n, struct ext4_reserve_window_node, rsv_node);
240 - if (verbose)
241 - printk("reservation window 0x%p "
242 - "start: %llu, end: %llu\n",
243 - rsv, rsv->rsv_start, rsv->rsv_end);
244 - if (rsv->rsv_start && rsv->rsv_start >= rsv->rsv_end) {
245 - printk("Bad reservation %p (start >= end)\n",
246 - rsv);
247 - bad = 1;
248 - }
249 - if (prev && prev->rsv_end >= rsv->rsv_start) {
250 - printk("Bad reservation %p (prev->end >= start)\n",
251 - rsv);
252 - bad = 1;
253 - }
254 - if (bad) {
255 - if (!verbose) {
256 - printk("Restarting reservation walk in verbose mode\n");
257 - verbose = 1;
258 - goto restart;
259 - }
260 - }
261 - n = rb_next(n);
262 - prev = rsv;
263 - }
264 - printk("Window map complete.\n");
265 - BUG_ON(bad);
266 -}
267 -#define rsv_window_dump(root, verbose) \
268 - __rsv_window_dump((root), (verbose), __func__)
269 -#else
270 -#define rsv_window_dump(root, verbose) do {} while (0)
271 -#endif
272 -
273 -/**
274 - * goal_in_my_reservation()
275 - * @rsv: inode's reservation window
276 - * @grp_goal: given goal block relative to the allocation block group
277 - * @group: the current allocation block group
278 - * @sb: filesystem super block
279 - *
280 - * Test if the given goal block (group relative) is within the file's
281 - * own block reservation window range.
282 - *
283 - * If the reservation window is outside the goal allocation group, return 0;
284 - * grp_goal (given goal block) could be -1, which means no specific
285 - * goal block. In this case, always return 1.
286 - * If the goal block is within the reservation window, return 1;
287 - * otherwise, return 0;
288 - */
289 -static int
290 -goal_in_my_reservation(struct ext4_reserve_window *rsv, ext4_grpblk_t grp_goal,
291 - ext4_group_t group, struct super_block *sb)
292 -{
293 - ext4_fsblk_t group_first_block, group_last_block;
294 -
295 - group_first_block = ext4_group_first_block_no(sb, group);
296 - group_last_block = group_first_block + (EXT4_BLOCKS_PER_GROUP(sb) - 1);
297 -
298 - if ((rsv->_rsv_start > group_last_block) ||
299 - (rsv->_rsv_end < group_first_block))
300 - return 0;
301 - if ((grp_goal >= 0) && ((grp_goal + group_first_block < rsv->_rsv_start)
302 - || (grp_goal + group_first_block > rsv->_rsv_end)))
303 - return 0;
304 - return 1;
305 -}
306
307 /**
308 - * search_reserve_window()
309 - * @rb_root: root of reservation tree
310 - * @goal: target allocation block
311 - *
312 - * Find the reserved window which includes the goal, or the previous one
313 - * if the goal is not in any window.
314 - * Returns NULL if there are no windows or if all windows start after the goal.
315 - */
316 -static struct ext4_reserve_window_node *
317 -search_reserve_window(struct rb_root *root, ext4_fsblk_t goal)
318 -{
319 - struct rb_node *n = root->rb_node;
320 - struct ext4_reserve_window_node *rsv;
321 -
322 - if (!n)
323 - return NULL;
324 -
325 - do {
326 - rsv = rb_entry(n, struct ext4_reserve_window_node, rsv_node);
327 -
328 - if (goal < rsv->rsv_start)
329 - n = n->rb_left;
330 - else if (goal > rsv->rsv_end)
331 - n = n->rb_right;
332 - else
333 - return rsv;
334 - } while (n);
335 - /*
336 - * We've fallen off the end of the tree: the goal wasn't inside
337 - * any particular node. OK, the previous node must be to one
338 - * side of the interval containing the goal. If it's the RHS,
339 - * we need to back up one.
340 - */
341 - if (rsv->rsv_start > goal) {
342 - n = rb_prev(&rsv->rsv_node);
343 - rsv = rb_entry(n, struct ext4_reserve_window_node, rsv_node);
344 - }
345 - return rsv;
346 -}
347 -
348 -/**
349 - * ext4_rsv_window_add() -- Insert a window to the block reservation rb tree.
350 - * @sb: super block
351 - * @rsv: reservation window to add
352 - *
353 - * Must be called with rsv_lock hold.
354 - */
355 -void ext4_rsv_window_add(struct super_block *sb,
356 - struct ext4_reserve_window_node *rsv)
357 -{
358 - struct rb_root *root = &EXT4_SB(sb)->s_rsv_window_root;
359 - struct rb_node *node = &rsv->rsv_node;
360 - ext4_fsblk_t start = rsv->rsv_start;
361 -
362 - struct rb_node ** p = &root->rb_node;
363 - struct rb_node * parent = NULL;
364 - struct ext4_reserve_window_node *this;
365 -
366 - while (*p)
367 - {
368 - parent = *p;
369 - this = rb_entry(parent, struct ext4_reserve_window_node, rsv_node);
370 -
371 - if (start < this->rsv_start)
372 - p = &(*p)->rb_left;
373 - else if (start > this->rsv_end)
374 - p = &(*p)->rb_right;
375 - else {
376 - rsv_window_dump(root, 1);
377 - BUG();
378 - }
379 - }
380 -
381 - rb_link_node(node, parent, p);
382 - rb_insert_color(node, root);
383 -}
384 -
385 -/**
386 - * ext4_rsv_window_remove() -- unlink a window from the reservation rb tree
387 - * @sb: super block
388 - * @rsv: reservation window to remove
389 - *
390 - * Mark the block reservation window as not allocated, and unlink it
391 - * from the filesystem reservation window rb tree. Must be called with
392 - * rsv_lock hold.
393 - */
394 -static void rsv_window_remove(struct super_block *sb,
395 - struct ext4_reserve_window_node *rsv)
396 -{
397 - rsv->rsv_start = EXT4_RESERVE_WINDOW_NOT_ALLOCATED;
398 - rsv->rsv_end = EXT4_RESERVE_WINDOW_NOT_ALLOCATED;
399 - rsv->rsv_alloc_hit = 0;
400 - rb_erase(&rsv->rsv_node, &EXT4_SB(sb)->s_rsv_window_root);
401 -}
402 -
403 -/*
404 - * rsv_is_empty() -- Check if the reservation window is allocated.
405 - * @rsv: given reservation window to check
406 - *
407 - * returns 1 if the end block is EXT4_RESERVE_WINDOW_NOT_ALLOCATED.
408 - */
409 -static inline int rsv_is_empty(struct ext4_reserve_window *rsv)
410 -{
411 - /* a valid reservation end block could not be 0 */
412 - return rsv->_rsv_end == EXT4_RESERVE_WINDOW_NOT_ALLOCATED;
413 -}
414 -
415 -/**
416 - * ext4_init_block_alloc_info()
417 - * @inode: file inode structure
418 - *
419 - * Allocate and initialize the reservation window structure, and
420 - * link the window to the ext4 inode structure at last
421 - *
422 - * The reservation window structure is only dynamically allocated
423 - * and linked to ext4 inode the first time the open file
424 - * needs a new block. So, before every ext4_new_block(s) call, for
425 - * regular files, we should check whether the reservation window
426 - * structure exists or not. In the latter case, this function is called.
427 - * Fail to do so will result in block reservation being turned off for that
428 - * open file.
429 - *
430 - * This function is called from ext4_get_blocks_handle(), also called
431 - * when setting the reservation window size through ioctl before the file
432 - * is open for write (needs block allocation).
433 - *
434 - * Needs down_write(i_data_sem) protection prior to call this function.
435 - */
436 -void ext4_init_block_alloc_info(struct inode *inode)
437 -{
438 - struct ext4_inode_info *ei = EXT4_I(inode);
439 - struct ext4_block_alloc_info *block_i = ei->i_block_alloc_info;
440 - struct super_block *sb = inode->i_sb;
441 -
442 - block_i = kmalloc(sizeof(*block_i), GFP_NOFS);
443 - if (block_i) {
444 - struct ext4_reserve_window_node *rsv = &block_i->rsv_window_node;
445 -
446 - rsv->rsv_start = EXT4_RESERVE_WINDOW_NOT_ALLOCATED;
447 - rsv->rsv_end = EXT4_RESERVE_WINDOW_NOT_ALLOCATED;
448 -
449 - /*
450 - * if filesystem is mounted with NORESERVATION, the goal
451 - * reservation window size is set to zero to indicate
452 - * block reservation is off
453 - */
454 - if (!test_opt(sb, RESERVATION))
455 - rsv->rsv_goal_size = 0;
456 - else
457 - rsv->rsv_goal_size = EXT4_DEFAULT_RESERVE_BLOCKS;
458 - rsv->rsv_alloc_hit = 0;
459 - block_i->last_alloc_logical_block = 0;
460 - block_i->last_alloc_physical_block = 0;
461 - }
462 - ei->i_block_alloc_info = block_i;
463 -}
464 -
465 -/**
466 - * ext4_discard_reservation()
467 - * @inode: inode
468 - *
469 - * Discard(free) block reservation window on last file close, or truncate
470 - * or at last iput().
471 - *
472 - * It is being called in three cases:
473 - * ext4_release_file(): last writer close the file
474 - * ext4_clear_inode(): last iput(), when nobody link to this file.
475 - * ext4_truncate(): when the block indirect map is about to change.
476 - *
477 - */
478 -void ext4_discard_reservation(struct inode *inode)
479 -{
480 - struct ext4_inode_info *ei = EXT4_I(inode);
481 - struct ext4_block_alloc_info *block_i = ei->i_block_alloc_info;
482 - struct ext4_reserve_window_node *rsv;
483 - spinlock_t *rsv_lock = &EXT4_SB(inode->i_sb)->s_rsv_window_lock;
484 -
485 - ext4_mb_discard_inode_preallocations(inode);
486 -
487 - if (!block_i)
488 - return;
489 -
490 - rsv = &block_i->rsv_window_node;
491 - if (!rsv_is_empty(&rsv->rsv_window)) {
492 - spin_lock(rsv_lock);
493 - if (!rsv_is_empty(&rsv->rsv_window))
494 - rsv_window_remove(inode->i_sb, rsv);
495 - spin_unlock(rsv_lock);
496 - }
497 -}
498 -
499 -/**
500 - * ext4_free_blocks_sb() -- Free given blocks and update quota
501 + * ext4_add_groupblocks() -- Add given blocks to an existing group
502 * @handle: handle to this transaction
503 * @sb: super block
504 - * @block: start physcial block to free
505 + * @block: start physcial block to add to the block group
506 * @count: number of blocks to free
507 - * @pdquot_freed_blocks: pointer to quota
508 + *
509 + * This marks the blocks as free in the bitmap. We ask the
510 + * mballoc to reload the buddy after this by setting group
511 + * EXT4_GROUP_INFO_NEED_INIT_BIT flag
512 */
513 -void ext4_free_blocks_sb(handle_t *handle, struct super_block *sb,
514 - ext4_fsblk_t block, unsigned long count,
515 - unsigned long *pdquot_freed_blocks)
516 +void ext4_add_groupblocks(handle_t *handle, struct super_block *sb,
517 + ext4_fsblk_t block, unsigned long count)
518 {
519 struct buffer_head *bitmap_bh = NULL;
520 struct buffer_head *gd_bh;
521 ext4_group_t block_group;
522 ext4_grpblk_t bit;
523 - unsigned long i;
524 - unsigned long overflow;
525 - struct ext4_group_desc * desc;
526 - struct ext4_super_block * es;
527 + unsigned int i;
528 + struct ext4_group_desc *desc;
529 + struct ext4_super_block *es;
530 struct ext4_sb_info *sbi;
531 - int err = 0, ret;
532 - ext4_grpblk_t group_freed;
533 + int err = 0, ret, blk_free_count;
534 + ext4_grpblk_t blocks_freed;
535 + struct ext4_group_info *grp;
536
537 - *pdquot_freed_blocks = 0;
538 sbi = EXT4_SB(sb);
539 es = sbi->s_es;
540 - if (block < le32_to_cpu(es->s_first_data_block) ||
541 - block + count < block ||
542 - block + count > ext4_blocks_count(es)) {
543 - ext4_error (sb, "ext4_free_blocks",
544 - "Freeing blocks not in datazone - "
545 - "block = %llu, count = %lu", block, count);
546 - goto error_return;
547 - }
548 -
549 - ext4_debug ("freeing block(s) %llu-%llu\n", block, block + count - 1);
550 + ext4_debug("Adding block(s) %llu-%llu\n", block, block + count - 1);
551
552 -do_more:
553 - overflow = 0;
554 ext4_get_group_no_and_offset(sb, block, &block_group, &bit);
555 + grp = ext4_get_group_info(sb, block_group);
556 /*
557 * Check to see if we are freeing blocks across a group
558 * boundary.
559 */
560 if (bit + count > EXT4_BLOCKS_PER_GROUP(sb)) {
561 - overflow = bit + count - EXT4_BLOCKS_PER_GROUP(sb);
562 - count -= overflow;
563 + goto error_return;
564 }
565 - brelse(bitmap_bh);
566 bitmap_bh = ext4_read_block_bitmap(sb, block_group);
567 if (!bitmap_bh)
568 goto error_return;
569 - desc = ext4_get_group_desc (sb, block_group, &gd_bh);
570 + desc = ext4_get_group_desc(sb, block_group, &gd_bh);
571 if (!desc)
572 goto error_return;
573
574 @@ -703,18 +422,17 @@ do_more:
575 in_range(block, ext4_inode_table(sb, desc), sbi->s_itb_per_group) ||
576 in_range(block + count - 1, ext4_inode_table(sb, desc),
577 sbi->s_itb_per_group)) {
578 - ext4_error (sb, "ext4_free_blocks",
579 - "Freeing blocks in system zones - "
580 - "Block = %llu, count = %lu",
581 - block, count);
582 + ext4_error(sb, __func__,
583 + "Adding blocks in system zones - "
584 + "Block = %llu, count = %lu",
585 + block, count);
586 goto error_return;
587 }
588
589 /*
590 - * We are about to start releasing blocks in the bitmap,
591 + * We are about to add blocks to the bitmap,
592 * so we need undo access.
593 */
594 - /* @@@ check errors */
595 BUFFER_TRACE(bitmap_bh, "getting undo access");
596 err = ext4_journal_get_undo_access(handle, bitmap_bh);
597 if (err)
598 @@ -729,90 +447,43 @@ do_more:
599 err = ext4_journal_get_write_access(handle, gd_bh);
600 if (err)
601 goto error_return;
602 -
603 - jbd_lock_bh_state(bitmap_bh);
604 -
605 - for (i = 0, group_freed = 0; i < count; i++) {
606 - /*
607 - * An HJ special. This is expensive...
608 - */
609 -#ifdef CONFIG_JBD2_DEBUG
610 - jbd_unlock_bh_state(bitmap_bh);
611 - {
612 - struct buffer_head *debug_bh;
613 - debug_bh = sb_find_get_block(sb, block + i);
614 - if (debug_bh) {
615 - BUFFER_TRACE(debug_bh, "Deleted!");
616 - if (!bh2jh(bitmap_bh)->b_committed_data)
617 - BUFFER_TRACE(debug_bh,
618 - "No commited data in bitmap");
619 - BUFFER_TRACE2(debug_bh, bitmap_bh, "bitmap");
620 - __brelse(debug_bh);
621 - }
622 - }
623 - jbd_lock_bh_state(bitmap_bh);
624 -#endif
625 - if (need_resched()) {
626 - jbd_unlock_bh_state(bitmap_bh);
627 - cond_resched();
628 - jbd_lock_bh_state(bitmap_bh);
629 - }
630 - /* @@@ This prevents newly-allocated data from being
631 - * freed and then reallocated within the same
632 - * transaction.
633 - *
634 - * Ideally we would want to allow that to happen, but to
635 - * do so requires making jbd2_journal_forget() capable of
636 - * revoking the queued write of a data block, which
637 - * implies blocking on the journal lock. *forget()
638 - * cannot block due to truncate races.
639 - *
640 - * Eventually we can fix this by making jbd2_journal_forget()
641 - * return a status indicating whether or not it was able
642 - * to revoke the buffer. On successful revoke, it is
643 - * safe not to set the allocation bit in the committed
644 - * bitmap, because we know that there is no outstanding
645 - * activity on the buffer any more and so it is safe to
646 - * reallocate it.
647 - */
648 - BUFFER_TRACE(bitmap_bh, "set in b_committed_data");
649 - J_ASSERT_BH(bitmap_bh,
650 - bh2jh(bitmap_bh)->b_committed_data != NULL);
651 - ext4_set_bit_atomic(sb_bgl_lock(sbi, block_group), bit + i,
652 - bh2jh(bitmap_bh)->b_committed_data);
653 -
654 - /*
655 - * We clear the bit in the bitmap after setting the committed
656 - * data bit, because this is the reverse order to that which
657 - * the allocator uses.
658 - */
659 + /*
660 + * make sure we don't allow a parallel init on other groups in the
661 + * same buddy cache
662 + */
663 + down_write(&grp->alloc_sem);
664 + for (i = 0, blocks_freed = 0; i < count; i++) {
665 BUFFER_TRACE(bitmap_bh, "clear bit");
666 if (!ext4_clear_bit_atomic(sb_bgl_lock(sbi, block_group),
667 bit + i, bitmap_bh->b_data)) {
668 - jbd_unlock_bh_state(bitmap_bh);
669 ext4_error(sb, __func__,
670 "bit already cleared for block %llu",
671 (ext4_fsblk_t)(block + i));
672 - jbd_lock_bh_state(bitmap_bh);
673 BUFFER_TRACE(bitmap_bh, "bit already cleared");
674 } else {
675 - group_freed++;
676 + blocks_freed++;
677 }
678 }
679 - jbd_unlock_bh_state(bitmap_bh);
680 -
681 spin_lock(sb_bgl_lock(sbi, block_group));
682 - le16_add_cpu(&desc->bg_free_blocks_count, group_freed);
683 + blk_free_count = blocks_freed + ext4_free_blks_count(sb, desc);
684 + ext4_free_blks_set(sb, desc, blk_free_count);
685 desc->bg_checksum = ext4_group_desc_csum(sbi, block_group, desc);
686 spin_unlock(sb_bgl_lock(sbi, block_group));
687 - percpu_counter_add(&sbi->s_freeblocks_counter, count);
688 + percpu_counter_add(&sbi->s_freeblocks_counter, blocks_freed);
689
690 if (sbi->s_log_groups_per_flex) {
691 ext4_group_t flex_group = ext4_flex_group(sbi, block_group);
692 spin_lock(sb_bgl_lock(sbi, flex_group));
693 - sbi->s_flex_groups[flex_group].free_blocks += count;
694 + sbi->s_flex_groups[flex_group].free_blocks += blocks_freed;
695 spin_unlock(sb_bgl_lock(sbi, flex_group));
696 }
697 + /*
698 + * request to reload the buddy with the
699 + * new bitmap information
700 + */
701 + set_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &(grp->bb_state));
702 + ext4_mb_update_group_info(grp, blocks_freed);
703 + up_write(&grp->alloc_sem);
704
705 /* We dirtied the bitmap block */
706 BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
707 @@ -821,15 +492,10 @@ do_more:
708 /* And the group descriptor block */
709 BUFFER_TRACE(gd_bh, "dirtied group descriptor block");
710 ret = ext4_journal_dirty_metadata(handle, gd_bh);
711 - if (!err) err = ret;
712 - *pdquot_freed_blocks += group_freed;
713 -
714 - if (overflow && !err) {
715 - block += count;
716 - count = overflow;
717 - goto do_more;
718 - }
719 + if (!err)
720 + err = ret;
721 sb->s_dirt = 1;
722 +
723 error_return:
724 brelse(bitmap_bh);
725 ext4_std_error(sb, err);
726 @@ -848,792 +514,86 @@ void ext4_free_blocks(handle_t *handle,
727 ext4_fsblk_t block, unsigned long count,
728 int metadata)
729 {
730 - struct super_block * sb;
731 + struct super_block *sb;
732 unsigned long dquot_freed_blocks;
733
734 /* this isn't the right place to decide whether block is metadata
735 * inode.c/extents.c knows better, but for safety ... */
736 - if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode) ||
737 - ext4_should_journal_data(inode))
738 + if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
739 + metadata = 1;
740 +
741 + /* We need to make sure we don't reuse
742 + * block released untill the transaction commit.
743 + * writeback mode have weak data consistency so
744 + * don't force data as metadata when freeing block
745 + * for writeback mode.
746 + */
747 + if (metadata == 0 && !ext4_should_writeback_data(inode))
748 metadata = 1;
749
750 sb = inode->i_sb;
751
752 - if (!test_opt(sb, MBALLOC) || !EXT4_SB(sb)->s_group_info)
753 - ext4_free_blocks_sb(handle, sb, block, count,
754 - &dquot_freed_blocks);
755 - else
756 - ext4_mb_free_blocks(handle, inode, block, count,
757 - metadata, &dquot_freed_blocks);
758 + ext4_mb_free_blocks(handle, inode, block, count,
759 + metadata, &dquot_freed_blocks);
760 if (dquot_freed_blocks)
761 DQUOT_FREE_BLOCK(inode, dquot_freed_blocks);
762 return;
763 }
764
765 /**
766 - * ext4_test_allocatable()
767 - * @nr: given allocation block group
768 - * @bh: bufferhead contains the bitmap of the given block group
769 - *
770 - * For ext4 allocations, we must not reuse any blocks which are
771 - * allocated in the bitmap buffer's "last committed data" copy. This
772 - * prevents deletes from freeing up the page for reuse until we have
773 - * committed the delete transaction.
774 - *
775 - * If we didn't do this, then deleting something and reallocating it as
776 - * data would allow the old block to be overwritten before the
777 - * transaction committed (because we force data to disk before commit).
778 - * This would lead to corruption if we crashed between overwriting the
779 - * data and committing the delete.
780 - *
781 - * @@@ We may want to make this allocation behaviour conditional on
782 - * data-writes at some point, and disable it for metadata allocations or
783 - * sync-data inodes.
784 - */
785 -static int ext4_test_allocatable(ext4_grpblk_t nr, struct buffer_head *bh)
786 -{
787 - int ret;
788 - struct journal_head *jh = bh2jh(bh);
789 -
790 - if (ext4_test_bit(nr, bh->b_data))
791 - return 0;
792 -
793 - jbd_lock_bh_state(bh);
794 - if (!jh->b_committed_data)
795 - ret = 1;
796 - else
797 - ret = !ext4_test_bit(nr, jh->b_committed_data);
798 - jbd_unlock_bh_state(bh);
799 - return ret;
800 -}
801 -
802 -/**
803 - * bitmap_search_next_usable_block()
804 - * @start: the starting block (group relative) of the search
805 - * @bh: bufferhead contains the block group bitmap
806 - * @maxblocks: the ending block (group relative) of the reservation
807 - *
808 - * The bitmap search --- search forward alternately through the actual
809 - * bitmap on disk and the last-committed copy in journal, until we find a
810 - * bit free in both bitmaps.
811 - */
812 -static ext4_grpblk_t
813 -bitmap_search_next_usable_block(ext4_grpblk_t start, struct buffer_head *bh,
814 - ext4_grpblk_t maxblocks)
815 -{
816 - ext4_grpblk_t next;
817 - struct journal_head *jh = bh2jh(bh);
818 -
819 - while (start < maxblocks) {
820 - next = ext4_find_next_zero_bit(bh->b_data, maxblocks, start);
821 - if (next >= maxblocks)
822 - return -1;
823 - if (ext4_test_allocatable(next, bh))
824 - return next;
825 - jbd_lock_bh_state(bh);
826 - if (jh->b_committed_data)
827 - start = ext4_find_next_zero_bit(jh->b_committed_data,
828 - maxblocks, next);
829 - jbd_unlock_bh_state(bh);
830 - }
831 - return -1;
832 -}
833 -
834 -/**
835 - * find_next_usable_block()
836 - * @start: the starting block (group relative) to find next
837 - * allocatable block in bitmap.
838 - * @bh: bufferhead contains the block group bitmap
839 - * @maxblocks: the ending block (group relative) for the search
840 - *
841 - * Find an allocatable block in a bitmap. We honor both the bitmap and
842 - * its last-committed copy (if that exists), and perform the "most
843 - * appropriate allocation" algorithm of looking for a free block near
844 - * the initial goal; then for a free byte somewhere in the bitmap; then
845 - * for any free bit in the bitmap.
846 - */
847 -static ext4_grpblk_t
848 -find_next_usable_block(ext4_grpblk_t start, struct buffer_head *bh,
849 - ext4_grpblk_t maxblocks)
850 -{
851 - ext4_grpblk_t here, next;
852 - char *p, *r;
853 -
854 - if (start > 0) {
855 - /*
856 - * The goal was occupied; search forward for a free
857 - * block within the next XX blocks.
858 - *
859 - * end_goal is more or less random, but it has to be
860 - * less than EXT4_BLOCKS_PER_GROUP. Aligning up to the
861 - * next 64-bit boundary is simple..
862 - */
863 - ext4_grpblk_t end_goal = (start + 63) & ~63;
864 - if (end_goal > maxblocks)
865 - end_goal = maxblocks;
866 - here = ext4_find_next_zero_bit(bh->b_data, end_goal, start);
867 - if (here < end_goal && ext4_test_allocatable(here, bh))
868 - return here;
869 - ext4_debug("Bit not found near goal\n");
870 - }
871 -
872 - here = start;
873 - if (here < 0)
874 - here = 0;
875 -
876 - p = ((char *)bh->b_data) + (here >> 3);
877 - r = memscan(p, 0, ((maxblocks + 7) >> 3) - (here >> 3));
878 - next = (r - ((char *)bh->b_data)) << 3;
879 -
880 - if (next < maxblocks && next >= start && ext4_test_allocatable(next, bh))
881 - return next;
882 -
883 - /*
884 - * The bitmap search --- search forward alternately through the actual
885 - * bitmap and the last-committed copy until we find a bit free in
886 - * both
887 - */
888 - here = bitmap_search_next_usable_block(here, bh, maxblocks);
889 - return here;
890 -}
891 -
892 -/**
893 - * claim_block()
894 - * @block: the free block (group relative) to allocate
895 - * @bh: the bufferhead containts the block group bitmap
896 - *
897 - * We think we can allocate this block in this bitmap. Try to set the bit.
898 - * If that succeeds then check that nobody has allocated and then freed the
899 - * block since we saw that is was not marked in b_committed_data. If it _was_
900 - * allocated and freed then clear the bit in the bitmap again and return
901 - * zero (failure).
902 - */
903 -static inline int
904 -claim_block(spinlock_t *lock, ext4_grpblk_t block, struct buffer_head *bh)
905 -{
906 - struct journal_head *jh = bh2jh(bh);
907 - int ret;
908 -
909 - if (ext4_set_bit_atomic(lock, block, bh->b_data))
910 - return 0;
911 - jbd_lock_bh_state(bh);
912 - if (jh->b_committed_data && ext4_test_bit(block,jh->b_committed_data)) {
913 - ext4_clear_bit_atomic(lock, block, bh->b_data);
914 - ret = 0;
915 - } else {
916 - ret = 1;
917 - }
918 - jbd_unlock_bh_state(bh);
919 - return ret;
920 -}
921 -
922 -/**
923 - * ext4_try_to_allocate()
924 - * @sb: superblock
925 - * @handle: handle to this transaction
926 - * @group: given allocation block group
927 - * @bitmap_bh: bufferhead holds the block bitmap
928 - * @grp_goal: given target block within the group
929 - * @count: target number of blocks to allocate
930 - * @my_rsv: reservation window
931 - *
932 - * Attempt to allocate blocks within a give range. Set the range of allocation
933 - * first, then find the first free bit(s) from the bitmap (within the range),
934 - * and at last, allocate the blocks by claiming the found free bit as allocated.
935 - *
936 - * To set the range of this allocation:
937 - * if there is a reservation window, only try to allocate block(s) from the
938 - * file's own reservation window;
939 - * Otherwise, the allocation range starts from the give goal block, ends at
940 - * the block group's last block.
941 - *
942 - * If we failed to allocate the desired block then we may end up crossing to a
943 - * new bitmap. In that case we must release write access to the old one via
944 - * ext4_journal_release_buffer(), else we'll run out of credits.
945 - */
946 -static ext4_grpblk_t
947 -ext4_try_to_allocate(struct super_block *sb, handle_t *handle,
948 - ext4_group_t group, struct buffer_head *bitmap_bh,
949 - ext4_grpblk_t grp_goal, unsigned long *count,
950 - struct ext4_reserve_window *my_rsv)
951 -{
952 - ext4_fsblk_t group_first_block;
953 - ext4_grpblk_t start, end;
954 - unsigned long num = 0;
955 -
956 - /* we do allocation within the reservation window if we have a window */
957 - if (my_rsv) {
958 - group_first_block = ext4_group_first_block_no(sb, group);
959 - if (my_rsv->_rsv_start >= group_first_block)
960 - start = my_rsv->_rsv_start - group_first_block;
961 - else
962 - /* reservation window cross group boundary */
963 - start = 0;
964 - end = my_rsv->_rsv_end - group_first_block + 1;
965 - if (end > EXT4_BLOCKS_PER_GROUP(sb))
966 - /* reservation window crosses group boundary */
967 - end = EXT4_BLOCKS_PER_GROUP(sb);
968 - if ((start <= grp_goal) && (grp_goal < end))
969 - start = grp_goal;
970 - else
971 - grp_goal = -1;
972 - } else {
973 - if (grp_goal > 0)
974 - start = grp_goal;
975 - else
976 - start = 0;
977 - end = EXT4_BLOCKS_PER_GROUP(sb);
978 - }
979 -
980 - BUG_ON(start > EXT4_BLOCKS_PER_GROUP(sb));
981 -
982 -repeat:
983 - if (grp_goal < 0 || !ext4_test_allocatable(grp_goal, bitmap_bh)) {
984 - grp_goal = find_next_usable_block(start, bitmap_bh, end);
985 - if (grp_goal < 0)
986 - goto fail_access;
987 - if (!my_rsv) {
988 - int i;
989 -
990 - for (i = 0; i < 7 && grp_goal > start &&
991 - ext4_test_allocatable(grp_goal - 1,
992 - bitmap_bh);
993 - i++, grp_goal--)
994 - ;
995 - }
996 - }
997 - start = grp_goal;
998 -
999 - if (!claim_block(sb_bgl_lock(EXT4_SB(sb), group),
1000 - grp_goal, bitmap_bh)) {
1001 - /*
1002 - * The block was allocated by another thread, or it was
1003 - * allocated and then freed by another thread
1004 - */
1005 - start++;
1006 - grp_goal++;
1007 - if (start >= end)
1008 - goto fail_access;
1009 - goto repeat;
1010 - }
1011 - num++;
1012 - grp_goal++;
1013 - while (num < *count && grp_goal < end
1014 - && ext4_test_allocatable(grp_goal, bitmap_bh)
1015 - && claim_block(sb_bgl_lock(EXT4_SB(sb), group),
1016 - grp_goal, bitmap_bh)) {
1017 - num++;
1018 - grp_goal++;
1019 - }
1020 - *count = num;
1021 - return grp_goal - num;
1022 -fail_access:
1023 - *count = num;
1024 - return -1;
1025 -}
1026 -
1027 -/**
1028 - * find_next_reservable_window():
1029 - * find a reservable space within the given range.
1030 - * It does not allocate the reservation window for now:
1031 - * alloc_new_reservation() will do the work later.
1032 - *
1033 - * @search_head: the head of the searching list;
1034 - * This is not necessarily the list head of the whole filesystem
1035 - *
1036 - * We have both head and start_block to assist the search
1037 - * for the reservable space. The list starts from head,
1038 - * but we will shift to the place where start_block is,
1039 - * then start from there, when looking for a reservable space.
1040 - *
1041 - * @size: the target new reservation window size
1042 - *
1043 - * @group_first_block: the first block we consider to start
1044 - * the real search from
1045 - *
1046 - * @last_block:
1047 - * the maximum block number that our goal reservable space
1048 - * could start from. This is normally the last block in this
1049 - * group. The search will end when we found the start of next
1050 - * possible reservable space is out of this boundary.
1051 - * This could handle the cross boundary reservation window
1052 - * request.
1053 - *
1054 - * basically we search from the given range, rather than the whole
1055 - * reservation double linked list, (start_block, last_block)
1056 - * to find a free region that is of my size and has not
1057 - * been reserved.
1058 - *
1059 - */
1060 -static int find_next_reservable_window(
1061 - struct ext4_reserve_window_node *search_head,
1062 - struct ext4_reserve_window_node *my_rsv,
1063 - struct super_block * sb,
1064 - ext4_fsblk_t start_block,
1065 - ext4_fsblk_t last_block)
1066 -{
1067 - struct rb_node *next;
1068 - struct ext4_reserve_window_node *rsv, *prev;
1069 - ext4_fsblk_t cur;
1070 - int size = my_rsv->rsv_goal_size;
1071 -
1072 - /* TODO: make the start of the reservation window byte-aligned */
1073 - /* cur = *start_block & ~7;*/
1074 - cur = start_block;
1075 - rsv = search_head;
1076 - if (!rsv)
1077 - return -1;
1078 -
1079 - while (1) {
1080 - if (cur <= rsv->rsv_end)
1081 - cur = rsv->rsv_end + 1;
1082 -
1083 - /* TODO?
1084 - * in the case we could not find a reservable space
1085 - * that is what is expected, during the re-search, we could
1086 - * remember what's the largest reservable space we could have
1087 - * and return that one.
1088 - *
1089 - * For now it will fail if we could not find the reservable
1090 - * space with expected-size (or more)...
1091 - */
1092 - if (cur > last_block)
1093 - return -1; /* fail */
1094 -
1095 - prev = rsv;
1096 - next = rb_next(&rsv->rsv_node);
1097 - rsv = rb_entry(next,struct ext4_reserve_window_node,rsv_node);
1098 -
1099 - /*
1100 - * Reached the last reservation, we can just append to the
1101 - * previous one.
1102 - */
1103 - if (!next)
1104 - break;
1105 -
1106 - if (cur + size <= rsv->rsv_start) {
1107 - /*
1108 - * Found a reserveable space big enough. We could
1109 - * have a reservation across the group boundary here
1110 - */
1111 - break;
1112 - }
1113 - }
1114 - /*
1115 - * we come here either :
1116 - * when we reach the end of the whole list,
1117 - * and there is empty reservable space after last entry in the list.
1118 - * append it to the end of the list.
1119 - *
1120 - * or we found one reservable space in the middle of the list,
1121 - * return the reservation window that we could append to.
1122 - * succeed.
1123 - */
1124 -
1125 - if ((prev != my_rsv) && (!rsv_is_empty(&my_rsv->rsv_window)))
1126 - rsv_window_remove(sb, my_rsv);
1127 -
1128 - /*
1129 - * Let's book the whole avaliable window for now. We will check the
1130 - * disk bitmap later and then, if there are free blocks then we adjust
1131 - * the window size if it's larger than requested.
1132 - * Otherwise, we will remove this node from the tree next time
1133 - * call find_next_reservable_window.
1134 - */
1135 - my_rsv->rsv_start = cur;
1136 - my_rsv->rsv_end = cur + size - 1;
1137 - my_rsv->rsv_alloc_hit = 0;
1138 -
1139 - if (prev != my_rsv)
1140 - ext4_rsv_window_add(sb, my_rsv);
1141 -
1142 - return 0;
1143 -}
1144 -
1145 -/**
1146 - * alloc_new_reservation()--allocate a new reservation window
1147 - *
1148 - * To make a new reservation, we search part of the filesystem
1149 - * reservation list (the list that inside the group). We try to
1150 - * allocate a new reservation window near the allocation goal,
1151 - * or the beginning of the group, if there is no goal.
1152 - *
1153 - * We first find a reservable space after the goal, then from
1154 - * there, we check the bitmap for the first free block after
1155 - * it. If there is no free block until the end of group, then the
1156 - * whole group is full, we failed. Otherwise, check if the free
1157 - * block is inside the expected reservable space, if so, we
1158 - * succeed.
1159 - * If the first free block is outside the reservable space, then
1160 - * start from the first free block, we search for next available
1161 - * space, and go on.
1162 - *
1163 - * on succeed, a new reservation will be found and inserted into the list
1164 - * It contains at least one free block, and it does not overlap with other
1165 - * reservation windows.
1166 - *
1167 - * failed: we failed to find a reservation window in this group
1168 - *
1169 - * @rsv: the reservation
1170 - *
1171 - * @grp_goal: The goal (group-relative). It is where the search for a
1172 - * free reservable space should start from.
1173 - * if we have a grp_goal(grp_goal >0 ), then start from there,
1174 - * no grp_goal(grp_goal = -1), we start from the first block
1175 - * of the group.
1176 - *
1177 - * @sb: the super block
1178 - * @group: the group we are trying to allocate in
1179 - * @bitmap_bh: the block group block bitmap
1180 + * ext4_has_free_blocks()
1181 + * @sbi: in-core super block structure.
1182 + * @nblocks: number of needed blocks
1183 *
1184 + * Check if filesystem has nblocks free & available for allocation.
1185 + * On success return 1, return 0 on failure.
1186 */
1187 -static int alloc_new_reservation(struct ext4_reserve_window_node *my_rsv,
1188 - ext4_grpblk_t grp_goal, struct super_block *sb,
1189 - ext4_group_t group, struct buffer_head *bitmap_bh)
1190 +int ext4_has_free_blocks(struct ext4_sb_info *sbi, s64 nblocks)
1191 {
1192 - struct ext4_reserve_window_node *search_head;
1193 - ext4_fsblk_t group_first_block, group_end_block, start_block;
1194 - ext4_grpblk_t first_free_block;
1195 - struct rb_root *fs_rsv_root = &EXT4_SB(sb)->s_rsv_window_root;
1196 - unsigned long size;
1197 - int ret;
1198 - spinlock_t *rsv_lock = &EXT4_SB(sb)->s_rsv_window_lock;
1199 -
1200 - group_first_block = ext4_group_first_block_no(sb, group);
1201 - group_end_block = group_first_block + (EXT4_BLOCKS_PER_GROUP(sb) - 1);
1202 -
1203 - if (grp_goal < 0)
1204 - start_block = group_first_block;
1205 - else
1206 - start_block = grp_goal + group_first_block;
1207 + s64 free_blocks, dirty_blocks, root_blocks;
1208 + struct percpu_counter *fbc = &sbi->s_freeblocks_counter;
1209 + struct percpu_counter *dbc = &sbi->s_dirtyblocks_counter;
1210
1211 - size = my_rsv->rsv_goal_size;
1212 + free_blocks = percpu_counter_read_positive(fbc);
1213 + dirty_blocks = percpu_counter_read_positive(dbc);
1214 + root_blocks = ext4_r_blocks_count(sbi->s_es);
1215
1216 - if (!rsv_is_empty(&my_rsv->rsv_window)) {
1217 - /*
1218 - * if the old reservation is cross group boundary
1219 - * and if the goal is inside the old reservation window,
1220 - * we will come here when we just failed to allocate from
1221 - * the first part of the window. We still have another part
1222 - * that belongs to the next group. In this case, there is no
1223 - * point to discard our window and try to allocate a new one
1224 - * in this group(which will fail). we should
1225 - * keep the reservation window, just simply move on.
1226 - *
1227 - * Maybe we could shift the start block of the reservation
1228 - * window to the first block of next group.
1229 - */
1230 -
1231 - if ((my_rsv->rsv_start <= group_end_block) &&
1232 - (my_rsv->rsv_end > group_end_block) &&
1233 - (start_block >= my_rsv->rsv_start))
1234 - return -1;
1235 -
1236 - if ((my_rsv->rsv_alloc_hit >
1237 - (my_rsv->rsv_end - my_rsv->rsv_start + 1) / 2)) {
1238 - /*
1239 - * if the previously allocation hit ratio is
1240 - * greater than 1/2, then we double the size of
1241 - * the reservation window the next time,
1242 - * otherwise we keep the same size window
1243 - */
1244 - size = size * 2;
1245 - if (size > EXT4_MAX_RESERVE_BLOCKS)
1246 - size = EXT4_MAX_RESERVE_BLOCKS;
1247 - my_rsv->rsv_goal_size= size;
1248 + if (free_blocks - (nblocks + root_blocks + dirty_blocks) <
1249 + EXT4_FREEBLOCKS_WATERMARK) {
1250 + free_blocks = percpu_counter_sum_positive(fbc);
1251 + dirty_blocks = percpu_counter_sum_positive(dbc);
1252 + if (dirty_blocks < 0) {
1253 + printk(KERN_CRIT "Dirty block accounting "
1254 + "went wrong %lld\n",
1255 + (long long)dirty_blocks);
1256 }
1257 }
1258 -
1259 - spin_lock(rsv_lock);
1260 - /*
1261 - * shift the search start to the window near the goal block
1262 - */
1263 - search_head = search_reserve_window(fs_rsv_root, start_block);
1264 -
1265 - /*
1266 - * find_next_reservable_window() simply finds a reservable window
1267 - * inside the given range(start_block, group_end_block).
1268 - *
1269 - * To make sure the reservation window has a free bit inside it, we
1270 - * need to check the bitmap after we found a reservable window.
1271 - */
1272 -retry:
1273 - ret = find_next_reservable_window(search_head, my_rsv, sb,
1274 - start_block, group_end_block);
1275 -
1276 - if (ret == -1) {
1277 - if (!rsv_is_empty(&my_rsv->rsv_window))
1278 - rsv_window_remove(sb, my_rsv);
1279 - spin_unlock(rsv_lock);
1280 - return -1;
1281 - }
1282 -
1283 - /*
1284 - * On success, find_next_reservable_window() returns the
1285 - * reservation window where there is a reservable space after it.
1286 - * Before we reserve this reservable space, we need
1287 - * to make sure there is at least a free block inside this region.
1288 - *
1289 - * searching the first free bit on the block bitmap and copy of
1290 - * last committed bitmap alternatively, until we found a allocatable
1291 - * block. Search start from the start block of the reservable space
1292 - * we just found.
1293 - */
1294 - spin_unlock(rsv_lock);
1295 - first_free_block = bitmap_search_next_usable_block(
1296 - my_rsv->rsv_start - group_first_block,
1297 - bitmap_bh, group_end_block - group_first_block + 1);
1298 -
1299 - if (first_free_block < 0) {
1300 - /*
1301 - * no free block left on the bitmap, no point
1302 - * to reserve the space. return failed.
1303 - */
1304 - spin_lock(rsv_lock);
1305 - if (!rsv_is_empty(&my_rsv->rsv_window))
1306 - rsv_window_remove(sb, my_rsv);
1307 - spin_unlock(rsv_lock);
1308 - return -1; /* failed */
1309 - }
1310 -
1311 - start_block = first_free_block + group_first_block;
1312 - /*
1313 - * check if the first free block is within the
1314 - * free space we just reserved
1315 - */
1316 - if (start_block >= my_rsv->rsv_start && start_block <= my_rsv->rsv_end)
1317 - return 0; /* success */
1318 - /*
1319 - * if the first free bit we found is out of the reservable space
1320 - * continue search for next reservable space,
1321 - * start from where the free block is,
1322 - * we also shift the list head to where we stopped last time
1323 - */
1324 - search_head = my_rsv;
1325 - spin_lock(rsv_lock);
1326 - goto retry;
1327 -}
1328 -
1329 -/**
1330 - * try_to_extend_reservation()
1331 - * @my_rsv: given reservation window
1332 - * @sb: super block
1333 - * @size: the delta to extend
1334 - *
1335 - * Attempt to expand the reservation window large enough to have
1336 - * required number of free blocks
1337 - *
1338 - * Since ext4_try_to_allocate() will always allocate blocks within
1339 - * the reservation window range, if the window size is too small,
1340 - * multiple blocks allocation has to stop at the end of the reservation
1341 - * window. To make this more efficient, given the total number of
1342 - * blocks needed and the current size of the window, we try to
1343 - * expand the reservation window size if necessary on a best-effort
1344 - * basis before ext4_new_blocks() tries to allocate blocks,
1345 - */
1346 -static void try_to_extend_reservation(struct ext4_reserve_window_node *my_rsv,
1347 - struct super_block *sb, int size)
1348 -{
1349 - struct ext4_reserve_window_node *next_rsv;
1350 - struct rb_node *next;
1351 - spinlock_t *rsv_lock = &EXT4_SB(sb)->s_rsv_window_lock;
1352 -
1353 - if (!spin_trylock(rsv_lock))
1354 - return;
1355 -
1356 - next = rb_next(&my_rsv->rsv_node);
1357 -
1358 - if (!next)
1359 - my_rsv->rsv_end += size;
1360 - else {
1361 - next_rsv = rb_entry(next, struct ext4_reserve_window_node, rsv_node);
1362 -
1363 - if ((next_rsv->rsv_start - my_rsv->rsv_end - 1) >= size)
1364 - my_rsv->rsv_end += size;
1365 - else
1366 - my_rsv->rsv_end = next_rsv->rsv_start - 1;
1367 - }
1368 - spin_unlock(rsv_lock);
1369 -}
1370 -
1371 -/**
1372 - * ext4_try_to_allocate_with_rsv()
1373 - * @sb: superblock
1374 - * @handle: handle to this transaction
1375 - * @group: given allocation block group
1376 - * @bitmap_bh: bufferhead holds the block bitmap
1377 - * @grp_goal: given target block within the group
1378 - * @count: target number of blocks to allocate
1379 - * @my_rsv: reservation window
1380 - * @errp: pointer to store the error code
1381 - *
1382 - * This is the main function used to allocate a new block and its reservation
1383 - * window.
1384 - *
1385 - * Each time when a new block allocation is need, first try to allocate from
1386 - * its own reservation. If it does not have a reservation window, instead of
1387 - * looking for a free bit on bitmap first, then look up the reservation list to
1388 - * see if it is inside somebody else's reservation window, we try to allocate a
1389 - * reservation window for it starting from the goal first. Then do the block
1390 - * allocation within the reservation window.
1391 - *
1392 - * This will avoid keeping on searching the reservation list again and
1393 - * again when somebody is looking for a free block (without
1394 - * reservation), and there are lots of free blocks, but they are all
1395 - * being reserved.
1396 - *
1397 - * We use a red-black tree for the per-filesystem reservation list.
1398 - *
1399 - */
1400 -static ext4_grpblk_t
1401 -ext4_try_to_allocate_with_rsv(struct super_block *sb, handle_t *handle,
1402 - ext4_group_t group, struct buffer_head *bitmap_bh,
1403 - ext4_grpblk_t grp_goal,
1404 - struct ext4_reserve_window_node * my_rsv,
1405 - unsigned long *count, int *errp)
1406 -{
1407 - ext4_fsblk_t group_first_block, group_last_block;
1408 - ext4_grpblk_t ret = 0;
1409 - int fatal;
1410 - unsigned long num = *count;
1411 -
1412 - *errp = 0;
1413 -
1414 - /*
1415 - * Make sure we use undo access for the bitmap, because it is critical
1416 - * that we do the frozen_data COW on bitmap buffers in all cases even
1417 - * if the buffer is in BJ_Forget state in the committing transaction.
1418 + /* Check whether we have space after
1419 + * accounting for current dirty blocks & root reserved blocks.
1420 */
1421 - BUFFER_TRACE(bitmap_bh, "get undo access for new block");
1422 - fatal = ext4_journal_get_undo_access(handle, bitmap_bh);
1423 - if (fatal) {
1424 - *errp = fatal;
1425 - return -1;
1426 - }
1427 -
1428 - /*
1429 - * we don't deal with reservation when
1430 - * filesystem is mounted without reservation
1431 - * or the file is not a regular file
1432 - * or last attempt to allocate a block with reservation turned on failed
1433 - */
1434 - if (my_rsv == NULL ) {
1435 - ret = ext4_try_to_allocate(sb, handle, group, bitmap_bh,
1436 - grp_goal, count, NULL);
1437 - goto out;
1438 - }
1439 - /*
1440 - * grp_goal is a group relative block number (if there is a goal)
1441 - * 0 <= grp_goal < EXT4_BLOCKS_PER_GROUP(sb)
1442 - * first block is a filesystem wide block number
1443 - * first block is the block number of the first block in this group
1444 - */
1445 - group_first_block = ext4_group_first_block_no(sb, group);
1446 - group_last_block = group_first_block + (EXT4_BLOCKS_PER_GROUP(sb) - 1);
1447 -
1448 - /*
1449 - * Basically we will allocate a new block from inode's reservation
1450 - * window.
1451 - *
1452 - * We need to allocate a new reservation window, if:
1453 - * a) inode does not have a reservation window; or
1454 - * b) last attempt to allocate a block from existing reservation
1455 - * failed; or
1456 - * c) we come here with a goal and with a reservation window
1457 - *
1458 - * We do not need to allocate a new reservation window if we come here
1459 - * at the beginning with a goal and the goal is inside the window, or
1460 - * we don't have a goal but already have a reservation window.
1461 - * then we could go to allocate from the reservation window directly.
1462 - */
1463 - while (1) {
1464 - if (rsv_is_empty(&my_rsv->rsv_window) || (ret < 0) ||
1465 - !goal_in_my_reservation(&my_rsv->rsv_window,
1466 - grp_goal, group, sb)) {
1467 - if (my_rsv->rsv_goal_size < *count)
1468 - my_rsv->rsv_goal_size = *count;
1469 - ret = alloc_new_reservation(my_rsv, grp_goal, sb,
1470 - group, bitmap_bh);
1471 - if (ret < 0)
1472 - break; /* failed */
1473 -
1474 - if (!goal_in_my_reservation(&my_rsv->rsv_window,
1475 - grp_goal, group, sb))
1476 - grp_goal = -1;
1477 - } else if (grp_goal >= 0) {
1478 - int curr = my_rsv->rsv_end -
1479 - (grp_goal + group_first_block) + 1;
1480 -
1481 - if (curr < *count)
1482 - try_to_extend_reservation(my_rsv, sb,
1483 - *count - curr);
1484 - }
1485 + if (free_blocks >= ((root_blocks + nblocks) + dirty_blocks))
1486 + return 1;
1487
1488 - if ((my_rsv->rsv_start > group_last_block) ||
1489 - (my_rsv->rsv_end < group_first_block)) {
1490 - rsv_window_dump(&EXT4_SB(sb)->s_rsv_window_root, 1);
1491 - BUG();
1492 - }
1493 - ret = ext4_try_to_allocate(sb, handle, group, bitmap_bh,
1494 - grp_goal, &num, &my_rsv->rsv_window);
1495 - if (ret >= 0) {
1496 - my_rsv->rsv_alloc_hit += num;
1497 - *count = num;
1498 - break; /* succeed */
1499 - }
1500 - num = *count;
1501 - }
1502 -out:
1503 - if (ret >= 0) {
1504 - BUFFER_TRACE(bitmap_bh, "journal_dirty_metadata for "
1505 - "bitmap block");
1506 - fatal = ext4_journal_dirty_metadata(handle, bitmap_bh);
1507 - if (fatal) {
1508 - *errp = fatal;
1509 - return -1;
1510 - }
1511 - return ret;
1512 + /* Hm, nope. Are (enough) root reserved blocks available? */
1513 + if (sbi->s_resuid == current_fsuid() ||
1514 + ((sbi->s_resgid != 0) && in_group_p(sbi->s_resgid)) ||
1515 + capable(CAP_SYS_RESOURCE)) {
1516 + if (free_blocks >= (nblocks + dirty_blocks))
1517 + return 1;
1518 }
1519
1520 - BUFFER_TRACE(bitmap_bh, "journal_release_buffer");
1521 - ext4_journal_release_buffer(handle, bitmap_bh);
1522 - return ret;
1523 + return 0;
1524 }
1525
1526 -/**
1527 - * ext4_has_free_blocks()
1528 - * @sbi: in-core super block structure.
1529 - * @nblocks: number of neeed blocks
1530 - *
1531 - * Check if filesystem has free blocks available for allocation.
1532 - * Return the number of blocks avaible for allocation for this request
1533 - * On success, return nblocks
1534 - */
1535 -ext4_fsblk_t ext4_has_free_blocks(struct ext4_sb_info *sbi,
1536 - ext4_fsblk_t nblocks)
1537 +int ext4_claim_free_blocks(struct ext4_sb_info *sbi,
1538 + s64 nblocks)
1539 {
1540 - ext4_fsblk_t free_blocks;
1541 - ext4_fsblk_t root_blocks = 0;
1542 -
1543 - free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter);
1544 -
1545 - if (!capable(CAP_SYS_RESOURCE) &&
1546 - sbi->s_resuid != current->fsuid &&
1547 - (sbi->s_resgid == 0 || !in_group_p(sbi->s_resgid)))
1548 - root_blocks = ext4_r_blocks_count(sbi->s_es);
1549 -#ifdef CONFIG_SMP
1550 - if (free_blocks - root_blocks < FBC_BATCH)
1551 - free_blocks =
1552 - percpu_counter_sum(&sbi->s_freeblocks_counter);
1553 -#endif
1554 - if (free_blocks <= root_blocks)
1555 - /* we don't have free space */
1556 + if (ext4_has_free_blocks(sbi, nblocks)) {
1557 + percpu_counter_add(&sbi->s_dirtyblocks_counter, nblocks);
1558 return 0;
1559 - if (free_blocks - root_blocks < nblocks)
1560 - return free_blocks - root_blocks;
1561 - return nblocks;
1562 - }
1563 -
1564 + } else
1565 + return -ENOSPC;
1566 +}
1567
1568 /**
1569 * ext4_should_retry_alloc()
1570 @@ -1657,402 +617,45 @@ int ext4_should_retry_alloc(struct super
1571 return jbd2_journal_force_commit_nested(EXT4_SB(sb)->s_journal);
1572 }
1573
1574 -/**
1575 - * ext4_old_new_blocks() -- core block bitmap based block allocation function
1576 - *
1577 - * @handle: handle to this transaction
1578 - * @inode: file inode
1579 - * @goal: given target block(filesystem wide)
1580 - * @count: target number of blocks to allocate
1581 - * @errp: error code
1582 - *
1583 - * ext4_old_new_blocks uses a goal block to assist allocation and look up
1584 - * the block bitmap directly to do block allocation. It tries to
1585 - * allocate block(s) from the block group contains the goal block first. If
1586 - * that fails, it will try to allocate block(s) from other block groups
1587 - * without any specific goal block.
1588 - *
1589 - * This function is called when -o nomballoc mount option is enabled
1590 - *
1591 - */
1592 -ext4_fsblk_t ext4_old_new_blocks(handle_t *handle, struct inode *inode,
1593 - ext4_fsblk_t goal, unsigned long *count, int *errp)
1594 -{
1595 - struct buffer_head *bitmap_bh = NULL;
1596 - struct buffer_head *gdp_bh;
1597 - ext4_group_t group_no;
1598 - ext4_group_t goal_group;
1599 - ext4_grpblk_t grp_target_blk; /* blockgroup relative goal block */
1600 - ext4_grpblk_t grp_alloc_blk; /* blockgroup-relative allocated block*/
1601 - ext4_fsblk_t ret_block; /* filesyetem-wide allocated block */
1602 - ext4_group_t bgi; /* blockgroup iteration index */
1603 - int fatal = 0, err;
1604 - int performed_allocation = 0;
1605 - ext4_grpblk_t free_blocks; /* number of free blocks in a group */
1606 - struct super_block *sb;
1607 - struct ext4_group_desc *gdp;
1608 - struct ext4_super_block *es;
1609 - struct ext4_sb_info *sbi;
1610 - struct ext4_reserve_window_node *my_rsv = NULL;
1611 - struct ext4_block_alloc_info *block_i;
1612 - unsigned short windowsz = 0;
1613 - ext4_group_t ngroups;
1614 - unsigned long num = *count;
1615 -
1616 - sb = inode->i_sb;
1617 - if (!sb) {
1618 - *errp = -ENODEV;
1619 - printk("ext4_new_block: nonexistent device");
1620 - return 0;
1621 - }
1622 -
1623 - sbi = EXT4_SB(sb);
1624 - if (!EXT4_I(inode)->i_delalloc_reserved_flag) {
1625 - /*
1626 - * With delalloc we already reserved the blocks
1627 - */
1628 - *count = ext4_has_free_blocks(sbi, *count);
1629 - }
1630 - if (*count == 0) {
1631 - *errp = -ENOSPC;
1632 - return 0; /*return with ENOSPC error */
1633 - }
1634 - num = *count;
1635 -
1636 - /*
1637 - * Check quota for allocation of this block.
1638 - */
1639 - if (DQUOT_ALLOC_BLOCK(inode, num)) {
1640 - *errp = -EDQUOT;
1641 - return 0;
1642 - }
1643 -
1644 - sbi = EXT4_SB(sb);
1645 - es = EXT4_SB(sb)->s_es;
1646 - ext4_debug("goal=%llu.\n", goal);
1647 - /*
1648 - * Allocate a block from reservation only when
1649 - * filesystem is mounted with reservation(default,-o reservation), and
1650 - * it's a regular file, and
1651 - * the desired window size is greater than 0 (One could use ioctl
1652 - * command EXT4_IOC_SETRSVSZ to set the window size to 0 to turn off
1653 - * reservation on that particular file)
1654 - */
1655 - block_i = EXT4_I(inode)->i_block_alloc_info;
1656 - if (block_i && ((windowsz = block_i->rsv_window_node.rsv_goal_size) > 0))
1657 - my_rsv = &block_i->rsv_window_node;
1658 -
1659 - /*
1660 - * First, test whether the goal block is free.
1661 - */
1662 - if (goal < le32_to_cpu(es->s_first_data_block) ||
1663 - goal >= ext4_blocks_count(es))
1664 - goal = le32_to_cpu(es->s_first_data_block);
1665 - ext4_get_group_no_and_offset(sb, goal, &group_no, &grp_target_blk);
1666 - goal_group = group_no;
1667 -retry_alloc:
1668 - gdp = ext4_get_group_desc(sb, group_no, &gdp_bh);
1669 - if (!gdp)
1670 - goto io_error;
1671 -
1672 - free_blocks = le16_to_cpu(gdp->bg_free_blocks_count);
1673 - /*
1674 - * if there is not enough free blocks to make a new resevation
1675 - * turn off reservation for this allocation
1676 - */
1677 - if (my_rsv && (free_blocks < windowsz)
1678 - && (rsv_is_empty(&my_rsv->rsv_window)))
1679 - my_rsv = NULL;
1680 -
1681 - if (free_blocks > 0) {
1682 - bitmap_bh = ext4_read_block_bitmap(sb, group_no);
1683 - if (!bitmap_bh)
1684 - goto io_error;
1685 - grp_alloc_blk = ext4_try_to_allocate_with_rsv(sb, handle,
1686 - group_no, bitmap_bh, grp_target_blk,
1687 - my_rsv, &num, &fatal);
1688 - if (fatal)
1689 - goto out;
1690 - if (grp_alloc_blk >= 0)
1691 - goto allocated;
1692 - }
1693 -
1694 - ngroups = EXT4_SB(sb)->s_groups_count;
1695 - smp_rmb();
1696 -
1697 - /*
1698 - * Now search the rest of the groups. We assume that
1699 - * group_no and gdp correctly point to the last group visited.
1700 - */
1701 - for (bgi = 0; bgi < ngroups; bgi++) {
1702 - group_no++;
1703 - if (group_no >= ngroups)
1704 - group_no = 0;
1705 - gdp = ext4_get_group_desc(sb, group_no, &gdp_bh);
1706 - if (!gdp)
1707 - goto io_error;
1708 - free_blocks = le16_to_cpu(gdp->bg_free_blocks_count);
1709 - /*
1710 - * skip this group if the number of
1711 - * free blocks is less than half of the reservation
1712 - * window size.
1713 - */
1714 - if (free_blocks <= (windowsz/2))
1715 - continue;
1716 -
1717 - brelse(bitmap_bh);
1718 - bitmap_bh = ext4_read_block_bitmap(sb, group_no);
1719 - if (!bitmap_bh)
1720 - goto io_error;
1721 - /*
1722 - * try to allocate block(s) from this group, without a goal(-1).
1723 - */
1724 - grp_alloc_blk = ext4_try_to_allocate_with_rsv(sb, handle,
1725 - group_no, bitmap_bh, -1, my_rsv,
1726 - &num, &fatal);
1727 - if (fatal)
1728 - goto out;
1729 - if (grp_alloc_blk >= 0)
1730 - goto allocated;
1731 - }
1732 - /*
1733 - * We may end up a bogus ealier ENOSPC error due to
1734 - * filesystem is "full" of reservations, but
1735 - * there maybe indeed free blocks avaliable on disk
1736 - * In this case, we just forget about the reservations
1737 - * just do block allocation as without reservations.
1738 - */
1739 - if (my_rsv) {
1740 - my_rsv = NULL;
1741 - windowsz = 0;
1742 - group_no = goal_group;
1743 - goto retry_alloc;
1744 - }
1745 - /* No space left on the device */
1746 - *errp = -ENOSPC;
1747 - goto out;
1748 -
1749 -allocated:
1750 -
1751 - ext4_debug("using block group %lu(%d)\n",
1752 - group_no, gdp->bg_free_blocks_count);
1753 -
1754 - BUFFER_TRACE(gdp_bh, "get_write_access");
1755 - fatal = ext4_journal_get_write_access(handle, gdp_bh);
1756 - if (fatal)
1757 - goto out;
1758 -
1759 - ret_block = grp_alloc_blk + ext4_group_first_block_no(sb, group_no);
1760 -
1761 - if (in_range(ext4_block_bitmap(sb, gdp), ret_block, num) ||
1762 - in_range(ext4_inode_bitmap(sb, gdp), ret_block, num) ||
1763 - in_range(ret_block, ext4_inode_table(sb, gdp),
1764 - EXT4_SB(sb)->s_itb_per_group) ||
1765 - in_range(ret_block + num - 1, ext4_inode_table(sb, gdp),
1766 - EXT4_SB(sb)->s_itb_per_group)) {
1767 - ext4_error(sb, "ext4_new_block",
1768 - "Allocating block in system zone - "
1769 - "blocks from %llu, length %lu",
1770 - ret_block, num);
1771 - /*
1772 - * claim_block marked the blocks we allocated
1773 - * as in use. So we may want to selectively
1774 - * mark some of the blocks as free
1775 - */
1776 - goto retry_alloc;
1777 - }
1778 -
1779 - performed_allocation = 1;
1780 -
1781 -#ifdef CONFIG_JBD2_DEBUG
1782 - {
1783 - struct buffer_head *debug_bh;
1784 -
1785 - /* Record bitmap buffer state in the newly allocated block */
1786 - debug_bh = sb_find_get_block(sb, ret_block);
1787 - if (debug_bh) {
1788 - BUFFER_TRACE(debug_bh, "state when allocated");
1789 - BUFFER_TRACE2(debug_bh, bitmap_bh, "bitmap state");
1790 - brelse(debug_bh);
1791 - }
1792 - }
1793 - jbd_lock_bh_state(bitmap_bh);
1794 - spin_lock(sb_bgl_lock(sbi, group_no));
1795 - if (buffer_jbd(bitmap_bh) && bh2jh(bitmap_bh)->b_committed_data) {
1796 - int i;
1797 -
1798 - for (i = 0; i < num; i++) {
1799 - if (ext4_test_bit(grp_alloc_blk+i,
1800 - bh2jh(bitmap_bh)->b_committed_data)) {
1801 - printk("%s: block was unexpectedly set in "
1802 - "b_committed_data\n", __func__);
1803 - }
1804 - }
1805 - }
1806 - ext4_debug("found bit %d\n", grp_alloc_blk);
1807 - spin_unlock(sb_bgl_lock(sbi, group_no));
1808 - jbd_unlock_bh_state(bitmap_bh);
1809 -#endif
1810 -
1811 - if (ret_block + num - 1 >= ext4_blocks_count(es)) {
1812 - ext4_error(sb, "ext4_new_block",
1813 - "block(%llu) >= blocks count(%llu) - "
1814 - "block_group = %lu, es == %p ", ret_block,
1815 - ext4_blocks_count(es), group_no, es);
1816 - goto out;
1817 - }
1818 -
1819 - /*
1820 - * It is up to the caller to add the new buffer to a journal
1821 - * list of some description. We don't know in advance whether
1822 - * the caller wants to use it as metadata or data.
1823 - */
1824 - spin_lock(sb_bgl_lock(sbi, group_no));
1825 - if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))
1826 - gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
1827 - le16_add_cpu(&gdp->bg_free_blocks_count, -num);
1828 - gdp->bg_checksum = ext4_group_desc_csum(sbi, group_no, gdp);
1829 - spin_unlock(sb_bgl_lock(sbi, group_no));
1830 - if (!EXT4_I(inode)->i_delalloc_reserved_flag)
1831 - percpu_counter_sub(&sbi->s_freeblocks_counter, num);
1832 -
1833 - if (sbi->s_log_groups_per_flex) {
1834 - ext4_group_t flex_group = ext4_flex_group(sbi, group_no);
1835 - spin_lock(sb_bgl_lock(sbi, flex_group));
1836 - sbi->s_flex_groups[flex_group].free_blocks -= num;
1837 - spin_unlock(sb_bgl_lock(sbi, flex_group));
1838 - }
1839 -
1840 - BUFFER_TRACE(gdp_bh, "journal_dirty_metadata for group descriptor");
1841 - err = ext4_journal_dirty_metadata(handle, gdp_bh);
1842 - if (!fatal)
1843 - fatal = err;
1844 -
1845 - sb->s_dirt = 1;
1846 - if (fatal)
1847 - goto out;
1848 -
1849 - *errp = 0;
1850 - brelse(bitmap_bh);
1851 - DQUOT_FREE_BLOCK(inode, *count-num);
1852 - *count = num;
1853 - return ret_block;
1854 -
1855 -io_error:
1856 - *errp = -EIO;
1857 -out:
1858 - if (fatal) {
1859 - *errp = fatal;
1860 - ext4_std_error(sb, fatal);
1861 - }
1862 - /*
1863 - * Undo the block allocation
1864 - */
1865 - if (!performed_allocation)
1866 - DQUOT_FREE_BLOCK(inode, *count);
1867 - brelse(bitmap_bh);
1868 - return 0;
1869 -}
1870 -
1871 -#define EXT4_META_BLOCK 0x1
1872 -
1873 -static ext4_fsblk_t do_blk_alloc(handle_t *handle, struct inode *inode,
1874 - ext4_lblk_t iblock, ext4_fsblk_t goal,
1875 - unsigned long *count, int *errp, int flags)
1876 -{
1877 - struct ext4_allocation_request ar;
1878 - ext4_fsblk_t ret;
1879 -
1880 - if (!test_opt(inode->i_sb, MBALLOC)) {
1881 - return ext4_old_new_blocks(handle, inode, goal, count, errp);
1882 - }
1883 -
1884 - memset(&ar, 0, sizeof(ar));
1885 - /* Fill with neighbour allocated blocks */
1886 -
1887 - ar.inode = inode;
1888 - ar.goal = goal;
1889 - ar.len = *count;
1890 - ar.logical = iblock;
1891 -
1892 - if (S_ISREG(inode->i_mode) && !(flags & EXT4_META_BLOCK))
1893 - /* enable in-core preallocation for data block allocation */
1894 - ar.flags = EXT4_MB_HINT_DATA;
1895 - else
1896 - /* disable in-core preallocation for non-regular files */
1897 - ar.flags = 0;
1898 -
1899 - ret = ext4_mb_new_blocks(handle, &ar, errp);
1900 - *count = ar.len;
1901 - return ret;
1902 -}
1903 -
1904 /*
1905 * ext4_new_meta_blocks() -- allocate block for meta data (indexing) blocks
1906 *
1907 * @handle: handle to this transaction
1908 * @inode: file inode
1909 * @goal: given target block(filesystem wide)
1910 - * @count: total number of blocks need
1911 + * @count: pointer to total number of blocks needed
1912 * @errp: error code
1913 *
1914 - * Return 1st allocated block numberon success, *count stores total account
1915 + * Return 1st allocated block number on success, *count stores total account
1916 * error stores in errp pointer
1917 */
1918 ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode,
1919 ext4_fsblk_t goal, unsigned long *count, int *errp)
1920 {
1921 + struct ext4_allocation_request ar;
1922 ext4_fsblk_t ret;
1923 - ret = do_blk_alloc(handle, inode, 0, goal,
1924 - count, errp, EXT4_META_BLOCK);
1925 +
1926 + memset(&ar, 0, sizeof(ar));
1927 + /* Fill with neighbour allocated blocks */
1928 + ar.inode = inode;
1929 + ar.goal = goal;
1930 + ar.len = count ? *count : 1;
1931 +
1932 + ret = ext4_mb_new_blocks(handle, &ar, errp);
1933 + if (count)
1934 + *count = ar.len;
1935 +
1936 /*
1937 * Account for the allocated meta blocks
1938 */
1939 - if (!(*errp)) {
1940 + if (!(*errp) && EXT4_I(inode)->i_delalloc_reserved_flag) {
1941 spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
1942 - EXT4_I(inode)->i_allocated_meta_blocks += *count;
1943 + EXT4_I(inode)->i_allocated_meta_blocks += ar.len;
1944 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
1945 }
1946 return ret;
1947 }
1948
1949 -/*
1950 - * ext4_new_meta_block() -- allocate block for meta data (indexing) blocks
1951 - *
1952 - * @handle: handle to this transaction
1953 - * @inode: file inode
1954 - * @goal: given target block(filesystem wide)
1955 - * @errp: error code
1956 - *
1957 - * Return allocated block number on success
1958 - */
1959 -ext4_fsblk_t ext4_new_meta_block(handle_t *handle, struct inode *inode,
1960 - ext4_fsblk_t goal, int *errp)
1961 -{
1962 - unsigned long count = 1;
1963 - return ext4_new_meta_blocks(handle, inode, goal, &count, errp);
1964 -}
1965 -
1966 -/*
1967 - * ext4_new_blocks() -- allocate data blocks
1968 - *
1969 - * @handle: handle to this transaction
1970 - * @inode: file inode
1971 - * @goal: given target block(filesystem wide)
1972 - * @count: total number of blocks need
1973 - * @errp: error code
1974 - *
1975 - * Return 1st allocated block numberon success, *count stores total account
1976 - * error stores in errp pointer
1977 - */
1978 -
1979 -ext4_fsblk_t ext4_new_blocks(handle_t *handle, struct inode *inode,
1980 - ext4_lblk_t iblock, ext4_fsblk_t goal,
1981 - unsigned long *count, int *errp)
1982 -{
1983 - return do_blk_alloc(handle, inode, iblock, goal, count, errp, 0);
1984 -}
1985 -
1986 /**
1987 * ext4_count_free_blocks() -- count filesystem free blocks
1988 * @sb: superblock
1989 @@ -2068,7 +671,7 @@ ext4_fsblk_t ext4_count_free_blocks(stru
1990 #ifdef EXT4FS_DEBUG
1991 struct ext4_super_block *es;
1992 ext4_fsblk_t bitmap_count;
1993 - unsigned long x;
1994 + unsigned int x;
1995 struct buffer_head *bitmap_bh = NULL;
1996
1997 es = EXT4_SB(sb)->s_es;
1998 @@ -2088,15 +691,14 @@ ext4_fsblk_t ext4_count_free_blocks(stru
1999 continue;
2000
2001 x = ext4_count_free(bitmap_bh, sb->s_blocksize);
2002 - printk(KERN_DEBUG "group %lu: stored = %d, counted = %lu\n",
2003 + printk(KERN_DEBUG "group %lu: stored = %d, counted = %u\n",
2004 i, le16_to_cpu(gdp->bg_free_blocks_count), x);
2005 bitmap_count += x;
2006 }
2007 brelse(bitmap_bh);
2008 - printk("ext4_count_free_blocks: stored = %llu"
2009 - ", computed = %llu, %llu\n",
2010 - ext4_free_blocks_count(es),
2011 - desc_count, bitmap_count);
2012 + printk(KERN_DEBUG "ext4_count_free_blocks: stored = %llu"
2013 + ", computed = %llu, %llu\n", ext4_free_blocks_count(es),
2014 + desc_count, bitmap_count);
2015 return bitmap_count;
2016 #else
2017 desc_count = 0;
2018 @@ -2105,7 +707,7 @@ ext4_fsblk_t ext4_count_free_blocks(stru
2019 gdp = ext4_get_group_desc(sb, i, NULL);
2020 if (!gdp)
2021 continue;
2022 - desc_count += le16_to_cpu(gdp->bg_free_blocks_count);
2023 + desc_count += ext4_free_blks_count(sb, gdp);
2024 }
2025
2026 return desc_count;
2027 @@ -2183,8 +785,9 @@ unsigned long ext4_bg_num_gdb(struct sup
2028
2029 if (!EXT4_HAS_INCOMPAT_FEATURE(sb,EXT4_FEATURE_INCOMPAT_META_BG) ||
2030 metagroup < first_meta_bg)
2031 - return ext4_bg_num_gdb_nometa(sb,group);
2032 + return ext4_bg_num_gdb_nometa(sb, group);
2033
2034 return ext4_bg_num_gdb_meta(sb,group);
2035
2036 }
2037 +
2038 diff -rup b/fs/ext4//bitmap.c a/fs/ext4///bitmap.c
2039 --- b/fs/ext4/bitmap.c 2009-02-11 14:37:58.000000000 +0100
2040 +++ a/fs/ext4/bitmap.c 2009-02-10 21:40:11.000000000 +0100
2041 @@ -15,17 +15,16 @@
2042
2043 static const int nibblemap[] = {4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0};
2044
2045 -unsigned long ext4_count_free (struct buffer_head * map, unsigned int numchars)
2046 +unsigned int ext4_count_free(struct buffer_head *map, unsigned int numchars)
2047 {
2048 - unsigned int i;
2049 - unsigned long sum = 0;
2050 + unsigned int i, sum = 0;
2051
2052 if (!map)
2053 - return (0);
2054 + return 0;
2055 for (i = 0; i < numchars; i++)
2056 sum += nibblemap[map->b_data[i] & 0xf] +
2057 nibblemap[(map->b_data[i] >> 4) & 0xf];
2058 - return (sum);
2059 + return sum;
2060 }
2061
2062 #endif /* EXT4FS_DEBUG */
2063 diff -rup b/fs/ext4//dir.c a/fs/ext4///dir.c
2064 --- b/fs/ext4/dir.c 2009-02-11 14:37:58.000000000 +0100
2065 +++ a/fs/ext4/dir.c 2009-02-10 21:40:11.000000000 +0100
2066 @@ -33,10 +33,10 @@ static unsigned char ext4_filetype_table
2067 };
2068
2069 static int ext4_readdir(struct file *, void *, filldir_t);
2070 -static int ext4_dx_readdir(struct file * filp,
2071 - void * dirent, filldir_t filldir);
2072 -static int ext4_release_dir (struct inode * inode,
2073 - struct file * filp);
2074 +static int ext4_dx_readdir(struct file *filp,
2075 + void *dirent, filldir_t filldir);
2076 +static int ext4_release_dir(struct inode *inode,
2077 + struct file *filp);
2078
2079 const struct file_operations ext4_dir_operations = {
2080 .llseek = generic_file_llseek,
2081 @@ -61,12 +61,12 @@ static unsigned char get_dtype(struct su
2082 }
2083
2084
2085 -int ext4_check_dir_entry (const char * function, struct inode * dir,
2086 - struct ext4_dir_entry_2 * de,
2087 - struct buffer_head * bh,
2088 - unsigned long offset)
2089 +int ext4_check_dir_entry(const char *function, struct inode *dir,
2090 + struct ext4_dir_entry_2 *de,
2091 + struct buffer_head *bh,
2092 + unsigned int offset)
2093 {
2094 - const char * error_msg = NULL;
2095 + const char *error_msg = NULL;
2096 const int rlen = ext4_rec_len_from_disk(de->rec_len);
2097
2098 if (rlen < EXT4_DIR_REC_LEN(1))
2099 @@ -82,20 +82,20 @@ int ext4_check_dir_entry (const char * f
2100 error_msg = "inode out of bounds";
2101
2102 if (error_msg != NULL)
2103 - ext4_error (dir->i_sb, function,
2104 + ext4_error(dir->i_sb, function,
2105 "bad entry in directory #%lu: %s - "
2106 - "offset=%lu, inode=%lu, rec_len=%d, name_len=%d",
2107 + "offset=%u, inode=%u, rec_len=%d, name_len=%d",
2108 dir->i_ino, error_msg, offset,
2109 - (unsigned long) le32_to_cpu(de->inode),
2110 + le32_to_cpu(de->inode),
2111 rlen, de->name_len);
2112 return error_msg == NULL ? 1 : 0;
2113 }
2114
2115 -static int ext4_readdir(struct file * filp,
2116 - void * dirent, filldir_t filldir)
2117 +static int ext4_readdir(struct file *filp,
2118 + void *dirent, filldir_t filldir)
2119 {
2120 int error = 0;
2121 - unsigned long offset;
2122 + unsigned int offset;
2123 int i, stored;
2124 struct ext4_dir_entry_2 *de;
2125 struct super_block *sb;
2126 @@ -192,14 +192,14 @@ revalidate:
2127 while (!error && filp->f_pos < inode->i_size
2128 && offset < sb->s_blocksize) {
2129 de = (struct ext4_dir_entry_2 *) (bh->b_data + offset);
2130 - if (!ext4_check_dir_entry ("ext4_readdir", inode, de,
2131 - bh, offset)) {
2132 + if (!ext4_check_dir_entry("ext4_readdir", inode, de,
2133 + bh, offset)) {
2134 /*
2135 * On error, skip the f_pos to the next block
2136 */
2137 filp->f_pos = (filp->f_pos |
2138 (sb->s_blocksize - 1)) + 1;
2139 - brelse (bh);
2140 + brelse(bh);
2141 ret = stored;
2142 goto out;
2143 }
2144 @@ -223,12 +223,12 @@ revalidate:
2145 break;
2146 if (version != filp->f_version)
2147 goto revalidate;
2148 - stored ++;
2149 + stored++;
2150 }
2151 filp->f_pos += ext4_rec_len_from_disk(de->rec_len);
2152 }
2153 offset = 0;
2154 - brelse (bh);
2155 + brelse(bh);
2156 }
2157 out:
2158 return ret;
2159 @@ -295,9 +295,9 @@ static void free_rb_tree_fname(struct rb
2160 parent = rb_parent(n);
2161 fname = rb_entry(n, struct fname, rb_hash);
2162 while (fname) {
2163 - struct fname * old = fname;
2164 + struct fname *old = fname;
2165 fname = fname->next;
2166 - kfree (old);
2167 + kfree(old);
2168 }
2169 if (!parent)
2170 root->rb_node = NULL;
2171 @@ -336,7 +336,7 @@ int ext4_htree_store_dirent(struct file
2172 struct ext4_dir_entry_2 *dirent)
2173 {
2174 struct rb_node **p, *parent = NULL;
2175 - struct fname * fname, *new_fn;
2176 + struct fname *fname, *new_fn;
2177 struct dir_private_info *info;
2178 int len;
2179
2180 @@ -393,19 +393,20 @@ int ext4_htree_store_dirent(struct file
2181 * for all entres on the fname linked list. (Normally there is only
2182 * one entry on the linked list, unless there are 62 bit hash collisions.)
2183 */
2184 -static int call_filldir(struct file * filp, void * dirent,
2185 +static int call_filldir(struct file *filp, void *dirent,
2186 filldir_t filldir, struct fname *fname)
2187 {
2188 struct dir_private_info *info = filp->private_data;
2189 loff_t curr_pos;
2190 struct inode *inode = filp->f_path.dentry->d_inode;
2191 - struct super_block * sb;
2192 + struct super_block *sb;
2193 int error;
2194
2195 sb = inode->i_sb;
2196
2197 if (!fname) {
2198 - printk("call_filldir: called with null fname?!?\n");
2199 + printk(KERN_ERR "ext4: call_filldir: called with "
2200 + "null fname?!?\n");
2201 return 0;
2202 }
2203 curr_pos = hash2pos(fname->hash, fname->minor_hash);
2204 @@ -424,8 +425,8 @@ static int call_filldir(struct file * fi
2205 return 0;
2206 }
2207
2208 -static int ext4_dx_readdir(struct file * filp,
2209 - void * dirent, filldir_t filldir)
2210 +static int ext4_dx_readdir(struct file *filp,
2211 + void *dirent, filldir_t filldir)
2212 {
2213 struct dir_private_info *info = filp->private_data;
2214 struct inode *inode = filp->f_path.dentry->d_inode;
2215 @@ -512,7 +513,7 @@ finished:
2216 return 0;
2217 }
2218
2219 -static int ext4_release_dir (struct inode * inode, struct file * filp)
2220 +static int ext4_release_dir(struct inode *inode, struct file *filp)
2221 {
2222 if (filp->private_data)
2223 ext4_htree_free_dir_info(filp->private_data);
2224 diff -rup b/fs/ext4//ext4_extents.h a/fs/ext4///ext4_extents.h
2225 --- b/fs/ext4/ext4_extents.h 2009-02-11 14:37:58.000000000 +0100
2226 +++ a/fs/ext4/ext4_extents.h 2009-02-10 21:40:14.000000000 +0100
2227 @@ -181,11 +181,6 @@ static inline unsigned short ext_depth(s
2228 return le16_to_cpu(ext_inode_hdr(inode)->eh_depth);
2229 }
2230
2231 -static inline void ext4_ext_tree_changed(struct inode *inode)
2232 -{
2233 - EXT4_I(inode)->i_ext_generation++;
2234 -}
2235 -
2236 static inline void
2237 ext4_ext_invalidate_cache(struct inode *inode)
2238 {
2239 diff -rup b/fs/ext4//ext4.h a/fs/ext4///ext4.h
2240 --- b/fs/ext4/ext4.h 2009-02-11 14:37:58.000000000 +0100
2241 +++ a/fs/ext4/ext4.h 2009-02-10 21:40:14.000000000 +0100
2242 @@ -19,6 +19,7 @@
2243 #include <linux/types.h>
2244 #include <linux/blkdev.h>
2245 #include <linux/magic.h>
2246 +#include <linux/jbd2.h>
2247 #include "ext4_i.h"
2248
2249 /*
2250 @@ -44,9 +45,9 @@
2251 #ifdef EXT4FS_DEBUG
2252 #define ext4_debug(f, a...) \
2253 do { \
2254 - printk (KERN_DEBUG "EXT4-fs DEBUG (%s, %d): %s:", \
2255 + printk(KERN_DEBUG "EXT4-fs DEBUG (%s, %d): %s:", \
2256 __FILE__, __LINE__, __func__); \
2257 - printk (KERN_DEBUG f, ## a); \
2258 + printk(KERN_DEBUG f, ## a); \
2259 } while (0)
2260 #else
2261 #define ext4_debug(f, a...) do {} while (0)
2262 @@ -94,9 +95,9 @@ struct ext4_allocation_request {
2263 /* phys. block for ^^^ */
2264 ext4_fsblk_t pright;
2265 /* how many blocks we want to allocate */
2266 - unsigned long len;
2267 + unsigned int len;
2268 /* flags. see above EXT4_MB_HINT_* */
2269 - unsigned long flags;
2270 + unsigned int flags;
2271 };
2272
2273 /*
2274 @@ -128,7 +129,7 @@ struct ext4_allocation_request {
2275 #else
2276 # define EXT4_BLOCK_SIZE(s) (EXT4_MIN_BLOCK_SIZE << (s)->s_log_block_size)
2277 #endif
2278 -#define EXT4_ADDR_PER_BLOCK(s) (EXT4_BLOCK_SIZE(s) / sizeof (__u32))
2279 +#define EXT4_ADDR_PER_BLOCK(s) (EXT4_BLOCK_SIZE(s) / sizeof(__u32))
2280 #ifdef __KERNEL__
2281 # define EXT4_BLOCK_SIZE_BITS(s) ((s)->s_blocksize_bits)
2282 #else
2283 @@ -156,12 +157,12 @@ struct ext4_group_desc
2284 __le32 bg_block_bitmap_lo; /* Blocks bitmap block */
2285 __le32 bg_inode_bitmap_lo; /* Inodes bitmap block */
2286 __le32 bg_inode_table_lo; /* Inodes table block */
2287 - __le16 bg_free_blocks_count; /* Free blocks count */
2288 - __le16 bg_free_inodes_count; /* Free inodes count */
2289 - __le16 bg_used_dirs_count; /* Directories count */
2290 + __le16 bg_free_blocks_count_lo;/* Free blocks count */
2291 + __le16 bg_free_inodes_count_lo;/* Free inodes count */
2292 + __le16 bg_used_dirs_count_lo; /* Directories count */
2293 __le16 bg_flags; /* EXT4_BG_flags (INODE_UNINIT, etc) */
2294 __u32 bg_reserved[2]; /* Likely block/inode bitmap checksum */
2295 - __le16 bg_itable_unused; /* Unused inodes count */
2296 + __le16 bg_itable_unused_lo; /* Unused inodes count */
2297 __le16 bg_checksum; /* crc16(sb_uuid+group+desc) */
2298 __le32 bg_block_bitmap_hi; /* Blocks bitmap block MSB */
2299 __le32 bg_inode_bitmap_hi; /* Inodes bitmap block MSB */
2300 @@ -169,7 +170,7 @@ struct ext4_group_desc
2301 __le16 bg_free_blocks_count_hi;/* Free blocks count MSB */
2302 __le16 bg_free_inodes_count_hi;/* Free inodes count MSB */
2303 __le16 bg_used_dirs_count_hi; /* Directories count MSB */
2304 - __le16 bg_itable_unused_hi; /* Unused inodes count MSB */
2305 + __le16 bg_itable_unused_hi; /* Unused inodes count MSB */
2306 __u32 bg_reserved2[3];
2307 };
2308
2309 @@ -245,7 +246,7 @@ struct flex_groups {
2310 #define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */
2311
2312 #define EXT4_FL_USER_VISIBLE 0x000BDFFF /* User visible flags */
2313 -#define EXT4_FL_USER_MODIFIABLE 0x000380FF /* User modifiable flags */
2314 +#define EXT4_FL_USER_MODIFIABLE 0x000B80FF /* User modifiable flags */
2315
2316 /*
2317 * Inode dynamic state flags
2318 @@ -511,7 +512,6 @@ do { \
2319 /*
2320 * Mount flags
2321 */
2322 -#define EXT4_MOUNT_CHECK 0x00001 /* Do mount-time checks */
2323 #define EXT4_MOUNT_OLDALLOC 0x00002 /* Don't use the new Orlov allocator */
2324 #define EXT4_MOUNT_GRPID 0x00004 /* Create files with directory's group */
2325 #define EXT4_MOUNT_DEBUG 0x00008 /* Some debugging messages */
2326 @@ -539,7 +539,6 @@ do { \
2327 #define EXT4_MOUNT_JOURNAL_CHECKSUM 0x800000 /* Journal checksums */
2328 #define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */
2329 #define EXT4_MOUNT_I_VERSION 0x2000000 /* i_version support */
2330 -#define EXT4_MOUNT_MBALLOC 0x4000000 /* Buddy allocation support */
2331 #define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */
2332 /* Compatibility, for having both ext2_fs.h and ext4_fs.h included at once */
2333 #ifndef _LINUX_EXT2_FS_H
2334 @@ -668,7 +667,7 @@ struct ext4_super_block {
2335 };
2336
2337 #ifdef __KERNEL__
2338 -static inline struct ext4_sb_info * EXT4_SB(struct super_block *sb)
2339 +static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb)
2340 {
2341 return sb->s_fs_info;
2342 }
2343 @@ -726,11 +725,11 @@ static inline int ext4_valid_inum(struct
2344 */
2345
2346 #define EXT4_HAS_COMPAT_FEATURE(sb,mask) \
2347 - ( EXT4_SB(sb)->s_es->s_feature_compat & cpu_to_le32(mask) )
2348 + ((EXT4_SB(sb)->s_es->s_feature_compat & cpu_to_le32(mask)) != 0)
2349 #define EXT4_HAS_RO_COMPAT_FEATURE(sb,mask) \
2350 - ( EXT4_SB(sb)->s_es->s_feature_ro_compat & cpu_to_le32(mask) )
2351 + ((EXT4_SB(sb)->s_es->s_feature_ro_compat & cpu_to_le32(mask)) != 0)
2352 #define EXT4_HAS_INCOMPAT_FEATURE(sb,mask) \
2353 - ( EXT4_SB(sb)->s_es->s_feature_incompat & cpu_to_le32(mask) )
2354 + ((EXT4_SB(sb)->s_es->s_feature_incompat & cpu_to_le32(mask)) != 0)
2355 #define EXT4_SET_COMPAT_FEATURE(sb,mask) \
2356 EXT4_SB(sb)->s_es->s_feature_compat |= cpu_to_le32(mask)
2357 #define EXT4_SET_RO_COMPAT_FEATURE(sb,mask) \
2358 @@ -790,6 +789,8 @@ static inline int ext4_valid_inum(struct
2359 #define EXT4_DEF_RESUID 0
2360 #define EXT4_DEF_RESGID 0
2361
2362 +#define EXT4_DEF_INODE_READAHEAD_BLKS 32
2363 +
2364 /*
2365 * Default mount options
2366 */
2367 @@ -889,6 +890,9 @@ static inline __le16 ext4_rec_len_to_dis
2368 #define DX_HASH_LEGACY 0
2369 #define DX_HASH_HALF_MD4 1
2370 #define DX_HASH_TEA 2
2371 +#define DX_HASH_LEGACY_UNSIGNED 3
2372 +#define DX_HASH_HALF_MD4_UNSIGNED 4
2373 +#define DX_HASH_TEA_UNSIGNED 5
2374
2375 #ifdef __KERNEL__
2376
2377 @@ -953,7 +957,25 @@ ext4_group_first_block_no(struct super_b
2378 #define ERR_BAD_DX_DIR -75000
2379
2380 void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr,
2381 - unsigned long *blockgrpp, ext4_grpblk_t *offsetp);
2382 + ext4_group_t *blockgrpp, ext4_grpblk_t *offsetp);
2383 +
2384 +extern struct proc_dir_entry *ext4_proc_root;
2385 +
2386 +#ifdef CONFIG_PROC_FS
2387 +extern const struct file_operations ext4_ui_proc_fops;
2388 +
2389 +#define EXT4_PROC_HANDLER(name, var) \
2390 +do { \
2391 + proc = proc_create_data(name, mode, sbi->s_proc, \
2392 + &ext4_ui_proc_fops, &sbi->s_##var); \
2393 + if (proc == NULL) { \
2394 + printk(KERN_ERR "EXT4-fs: can't create %s\n", name); \
2395 + goto err_out; \
2396 + } \
2397 +} while (0)
2398 +#else
2399 +#define EXT4_PROC_HANDLER(name, var)
2400 +#endif
2401
2402 /*
2403 * Function prototypes
2404 @@ -967,6 +989,9 @@ void ext4_get_group_no_and_offset(struct
2405 # define ATTRIB_NORET __attribute__((noreturn))
2406 # define NORET_AND noreturn,
2407
2408 +/* bitmap.c */
2409 +extern unsigned int ext4_count_free(struct buffer_head *, unsigned);
2410 +
2411 /* balloc.c */
2412 extern unsigned int ext4_block_group(struct super_block *sb,
2413 ext4_fsblk_t blocknr);
2414 @@ -975,55 +1000,44 @@ extern ext4_grpblk_t ext4_block_group_of
2415 extern int ext4_bg_has_super(struct super_block *sb, ext4_group_t group);
2416 extern unsigned long ext4_bg_num_gdb(struct super_block *sb,
2417 ext4_group_t group);
2418 -extern ext4_fsblk_t ext4_new_meta_block(handle_t *handle, struct inode *inode,
2419 - ext4_fsblk_t goal, int *errp);
2420 extern ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode,
2421 ext4_fsblk_t goal, unsigned long *count, int *errp);
2422 -extern ext4_fsblk_t ext4_new_blocks(handle_t *handle, struct inode *inode,
2423 - ext4_lblk_t iblock, ext4_fsblk_t goal,
2424 - unsigned long *count, int *errp);
2425 -extern ext4_fsblk_t ext4_old_new_blocks(handle_t *handle, struct inode *inode,
2426 - ext4_fsblk_t goal, unsigned long *count, int *errp);
2427 -extern ext4_fsblk_t ext4_has_free_blocks(struct ext4_sb_info *sbi,
2428 - ext4_fsblk_t nblocks);
2429 -extern void ext4_free_blocks (handle_t *handle, struct inode *inode,
2430 +extern int ext4_claim_free_blocks(struct ext4_sb_info *sbi, s64 nblocks);
2431 +extern int ext4_has_free_blocks(struct ext4_sb_info *sbi, s64 nblocks);
2432 +extern void ext4_free_blocks(handle_t *handle, struct inode *inode,
2433 ext4_fsblk_t block, unsigned long count, int metadata);
2434 -extern void ext4_free_blocks_sb (handle_t *handle, struct super_block *sb,
2435 - ext4_fsblk_t block, unsigned long count,
2436 - unsigned long *pdquot_freed_blocks);
2437 -extern ext4_fsblk_t ext4_count_free_blocks (struct super_block *);
2438 -extern void ext4_check_blocks_bitmap (struct super_block *);
2439 +extern void ext4_add_groupblocks(handle_t *handle, struct super_block *sb,
2440 + ext4_fsblk_t block, unsigned long count);
2441 +extern ext4_fsblk_t ext4_count_free_blocks(struct super_block *);
2442 +extern void ext4_check_blocks_bitmap(struct super_block *);
2443 extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb,
2444 ext4_group_t block_group,
2445 struct buffer_head ** bh);
2446 extern int ext4_should_retry_alloc(struct super_block *sb, int *retries);
2447 -extern void ext4_init_block_alloc_info(struct inode *);
2448 -extern void ext4_rsv_window_add(struct super_block *sb, struct ext4_reserve_window_node *rsv);
2449
2450 /* dir.c */
2451 extern int ext4_check_dir_entry(const char *, struct inode *,
2452 struct ext4_dir_entry_2 *,
2453 - struct buffer_head *, unsigned long);
2454 + struct buffer_head *, unsigned int);
2455 extern int ext4_htree_store_dirent(struct file *dir_file, __u32 hash,
2456 __u32 minor_hash,
2457 struct ext4_dir_entry_2 *dirent);
2458 extern void ext4_htree_free_dir_info(struct dir_private_info *p);
2459
2460 /* fsync.c */
2461 -extern int ext4_sync_file (struct file *, struct dentry *, int);
2462 +extern int ext4_sync_file(struct file *, struct dentry *, int);
2463
2464 /* hash.c */
2465 extern int ext4fs_dirhash(const char *name, int len, struct
2466 dx_hash_info *hinfo);
2467
2468 /* ialloc.c */
2469 -extern struct inode * ext4_new_inode (handle_t *, struct inode *, int);
2470 -extern void ext4_free_inode (handle_t *, struct inode *);
2471 -extern struct inode * ext4_orphan_get (struct super_block *, unsigned long);
2472 -extern unsigned long ext4_count_free_inodes (struct super_block *);
2473 -extern unsigned long ext4_count_dirs (struct super_block *);
2474 -extern void ext4_check_inodes_bitmap (struct super_block *);
2475 -extern unsigned long ext4_count_free (struct buffer_head *, unsigned);
2476 +extern struct inode * ext4_new_inode(handle_t *, struct inode *, int);
2477 +extern void ext4_free_inode(handle_t *, struct inode *);
2478 +extern struct inode * ext4_orphan_get(struct super_block *, unsigned long);
2479 +extern unsigned long ext4_count_free_inodes(struct super_block *);
2480 +extern unsigned long ext4_count_dirs(struct super_block *);
2481 +extern void ext4_check_inodes_bitmap(struct super_block *);
2482
2483 /* mballoc.c */
2484 extern long ext4_mb_stats;
2485 @@ -1033,17 +1047,18 @@ extern int ext4_mb_release(struct super_
2486 extern ext4_fsblk_t ext4_mb_new_blocks(handle_t *,
2487 struct ext4_allocation_request *, int *);
2488 extern int ext4_mb_reserve_blocks(struct super_block *, int);
2489 -extern void ext4_mb_discard_inode_preallocations(struct inode *);
2490 +extern void ext4_discard_preallocations(struct inode *);
2491 extern int __init init_ext4_mballoc(void);
2492 extern void exit_ext4_mballoc(void);
2493 extern void ext4_mb_free_blocks(handle_t *, struct inode *,
2494 unsigned long, unsigned long, int, unsigned long *);
2495 -extern int ext4_mb_add_more_groupinfo(struct super_block *sb,
2496 +extern int ext4_mb_add_groupinfo(struct super_block *sb,
2497 ext4_group_t i, struct ext4_group_desc *desc);
2498 extern void ext4_mb_update_group_info(struct ext4_group_info *grp,
2499 ext4_grpblk_t add);
2500 -
2501 -
2502 +extern int ext4_mb_get_buddy_cache_lock(struct super_block *, ext4_group_t);
2503 +extern void ext4_mb_put_buddy_cache_lock(struct super_block *,
2504 + ext4_group_t, int);
2505 /* inode.c */
2506 int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode,
2507 struct buffer_head *bh, ext4_fsblk_t blocknr);
2508 @@ -1051,24 +1066,19 @@ struct buffer_head *ext4_getblk(handle_t
2509 ext4_lblk_t, int, int *);
2510 struct buffer_head *ext4_bread(handle_t *, struct inode *,
2511 ext4_lblk_t, int, int *);
2512 -int ext4_get_blocks_handle(handle_t *handle, struct inode *inode,
2513 - ext4_lblk_t iblock, unsigned long maxblocks,
2514 - struct buffer_head *bh_result,
2515 - int create, int extend_disksize);
2516
2517 extern struct inode *ext4_iget(struct super_block *, unsigned long);
2518 -extern int ext4_write_inode (struct inode *, int);
2519 -extern int ext4_setattr (struct dentry *, struct iattr *);
2520 +extern int ext4_write_inode(struct inode *, int);
2521 +extern int ext4_setattr(struct dentry *, struct iattr *);
2522 extern int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry,
2523 struct kstat *stat);
2524 -extern void ext4_delete_inode (struct inode *);
2525 -extern int ext4_sync_inode (handle_t *, struct inode *);
2526 -extern void ext4_discard_reservation (struct inode *);
2527 +extern void ext4_delete_inode(struct inode *);
2528 +extern int ext4_sync_inode(handle_t *, struct inode *);
2529 extern void ext4_dirty_inode(struct inode *);
2530 extern int ext4_change_inode_journal_flag(struct inode *, int);
2531 extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *);
2532 extern int ext4_can_truncate(struct inode *inode);
2533 -extern void ext4_truncate (struct inode *);
2534 +extern void ext4_truncate(struct inode *);
2535 extern void ext4_set_inode_flags(struct inode *);
2536 extern void ext4_get_inode_flags(struct ext4_inode_info *);
2537 extern void ext4_set_aops(struct inode *inode);
2538 @@ -1081,7 +1091,7 @@ extern int ext4_page_mkwrite(struct vm_a
2539
2540 /* ioctl.c */
2541 extern long ext4_ioctl(struct file *, unsigned int, unsigned long);
2542 -extern long ext4_compat_ioctl (struct file *, unsigned int, unsigned long);
2543 +extern long ext4_compat_ioctl(struct file *, unsigned int, unsigned long);
2544
2545 /* migrate.c */
2546 extern int ext4_ext_migrate(struct inode *);
2547 @@ -1099,14 +1109,17 @@ extern int ext4_group_extend(struct supe
2548 ext4_fsblk_t n_blocks_count);
2549
2550 /* super.c */
2551 -extern void ext4_error (struct super_block *, const char *, const char *, ...)
2552 +extern void ext4_error(struct super_block *, const char *, const char *, ...)
2553 __attribute__ ((format (printf, 3, 4)));
2554 -extern void __ext4_std_error (struct super_block *, const char *, int);
2555 -extern void ext4_abort (struct super_block *, const char *, const char *, ...)
2556 +extern void __ext4_std_error(struct super_block *, const char *, int);
2557 +extern void ext4_abort(struct super_block *, const char *, const char *, ...)
2558 __attribute__ ((format (printf, 3, 4)));
2559 -extern void ext4_warning (struct super_block *, const char *, const char *, ...)
2560 +extern void ext4_warning(struct super_block *, const char *, const char *, ...)
2561 __attribute__ ((format (printf, 3, 4)));
2562 -extern void ext4_update_dynamic_rev (struct super_block *sb);
2563 +extern void ext4_grp_locked_error(struct super_block *, ext4_group_t,
2564 + const char *, const char *, ...)
2565 + __attribute__ ((format (printf, 4, 5)));
2566 +extern void ext4_update_dynamic_rev(struct super_block *sb);
2567 extern int ext4_update_compat_feature(handle_t *handle, struct super_block *sb,
2568 __u32 compat);
2569 extern int ext4_update_rocompat_feature(handle_t *handle,
2570 @@ -1119,12 +1132,28 @@ extern ext4_fsblk_t ext4_inode_bitmap(st
2571 struct ext4_group_desc *bg);
2572 extern ext4_fsblk_t ext4_inode_table(struct super_block *sb,
2573 struct ext4_group_desc *bg);
2574 +extern __u32 ext4_free_blks_count(struct super_block *sb,
2575 + struct ext4_group_desc *bg);
2576 +extern __u32 ext4_free_inodes_count(struct super_block *sb,
2577 + struct ext4_group_desc *bg);
2578 +extern __u32 ext4_used_dirs_count(struct super_block *sb,
2579 + struct ext4_group_desc *bg);
2580 +extern __u32 ext4_itable_unused_count(struct super_block *sb,
2581 + struct ext4_group_desc *bg);
2582 extern void ext4_block_bitmap_set(struct super_block *sb,
2583 struct ext4_group_desc *bg, ext4_fsblk_t blk);
2584 extern void ext4_inode_bitmap_set(struct super_block *sb,
2585 struct ext4_group_desc *bg, ext4_fsblk_t blk);
2586 extern void ext4_inode_table_set(struct super_block *sb,
2587 struct ext4_group_desc *bg, ext4_fsblk_t blk);
2588 +extern void ext4_free_blks_set(struct super_block *sb,
2589 + struct ext4_group_desc *bg, __u32 count);
2590 +extern void ext4_free_inodes_set(struct super_block *sb,
2591 + struct ext4_group_desc *bg, __u32 count);
2592 +extern void ext4_used_dirs_set(struct super_block *sb,
2593 + struct ext4_group_desc *bg, __u32 count);
2594 +extern void ext4_itable_unused_set(struct super_block *sb,
2595 + struct ext4_group_desc *bg, __u32 count);
2596
2597 static inline ext4_fsblk_t ext4_blocks_count(struct ext4_super_block *es)
2598 {
2599 @@ -1179,7 +1208,7 @@ static inline void ext4_isize_set(struct
2600
2601 static inline
2602 struct ext4_group_info *ext4_get_group_info(struct super_block *sb,
2603 - ext4_group_t group)
2604 + ext4_group_t group)
2605 {
2606 struct ext4_group_info ***grp_info;
2607 long indexv, indexh;
2608 @@ -1207,6 +1236,72 @@ do { \
2609 __ext4_std_error((sb), __func__, (errno)); \
2610 } while (0)
2611
2612 +#ifdef CONFIG_SMP
2613 +/* Each CPU can accumulate FBC_BATCH blocks in their local
2614 + * counters. So we need to make sure we have free blocks more
2615 + * than FBC_BATCH * nr_cpu_ids. Also add a window of 4 times.
2616 + */
2617 +#define EXT4_FREEBLOCKS_WATERMARK (4 * (FBC_BATCH * nr_cpu_ids))
2618 +#else
2619 +#define EXT4_FREEBLOCKS_WATERMARK 0
2620 +#endif
2621 +
2622 +static inline void ext4_update_i_disksize(struct inode *inode, loff_t newsize)
2623 +{
2624 + /*
2625 + * XXX: replace with spinlock if seen contended -bzzz
2626 + */
2627 + down_write(&EXT4_I(inode)->i_data_sem);
2628 + if (newsize > EXT4_I(inode)->i_disksize)
2629 + EXT4_I(inode)->i_disksize = newsize;
2630 + up_write(&EXT4_I(inode)->i_data_sem);
2631 + return ;
2632 +}
2633 +
2634 +struct ext4_group_info {
2635 + unsigned long bb_state;
2636 + struct rb_root bb_free_root;
2637 + unsigned short bb_first_free;
2638 + unsigned short bb_free;
2639 + unsigned short bb_fragments;
2640 + struct list_head bb_prealloc_list;
2641 +#ifdef DOUBLE_CHECK
2642 + void *bb_bitmap;
2643 +#endif
2644 + struct rw_semaphore alloc_sem;
2645 + unsigned short bb_counters[];
2646 +};
2647 +
2648 +#define EXT4_GROUP_INFO_NEED_INIT_BIT 0
2649 +#define EXT4_GROUP_INFO_LOCKED_BIT 1
2650 +
2651 +#define EXT4_MB_GRP_NEED_INIT(grp) \
2652 + (test_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &((grp)->bb_state)))
2653 +
2654 +static inline void ext4_lock_group(struct super_block *sb, ext4_group_t group)
2655 +{
2656 + struct ext4_group_info *grinfo = ext4_get_group_info(sb, group);
2657 +
2658 + bit_spin_lock(EXT4_GROUP_INFO_LOCKED_BIT, &(grinfo->bb_state));
2659 +}
2660 +
2661 +static inline void ext4_unlock_group(struct super_block *sb,
2662 + ext4_group_t group)
2663 +{
2664 + struct ext4_group_info *grinfo = ext4_get_group_info(sb, group);
2665 +
2666 + bit_spin_unlock(EXT4_GROUP_INFO_LOCKED_BIT, &(grinfo->bb_state));
2667 +}
2668 +
2669 +static inline int ext4_is_group_locked(struct super_block *sb,
2670 + ext4_group_t group)
2671 +{
2672 + struct ext4_group_info *grinfo = ext4_get_group_info(sb, group);
2673 +
2674 + return bit_spin_is_locked(EXT4_GROUP_INFO_LOCKED_BIT,
2675 + &(grinfo->bb_state));
2676 +}
2677 +
2678 /*
2679 * Inodes and files operations
2680 */
2681 @@ -1232,18 +1327,37 @@ extern int ext4_ext_writepage_trans_bloc
2682 extern int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks,
2683 int chunk);
2684 extern int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
2685 - ext4_lblk_t iblock,
2686 - unsigned long max_blocks, struct buffer_head *bh_result,
2687 - int create, int extend_disksize);
2688 + ext4_lblk_t iblock, unsigned int max_blocks,
2689 + struct buffer_head *bh_result,
2690 + int create, int extend_disksize);
2691 extern void ext4_ext_truncate(struct inode *);
2692 extern void ext4_ext_init(struct super_block *);
2693 extern void ext4_ext_release(struct super_block *);
2694 extern long ext4_fallocate(struct inode *inode, int mode, loff_t offset,
2695 loff_t len);
2696 extern int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode,
2697 - sector_t block, unsigned long max_blocks,
2698 + sector_t block, unsigned int max_blocks,
2699 struct buffer_head *bh, int create,
2700 int extend_disksize, int flag);
2701 +
2702 +#define BH_JBDPrivateStart (BH_Unshadow+1)
2703 +/*
2704 + * Add new method to test wether block and inode bitmaps are properly
2705 + * initialized. With uninit_bg reading the block from disk is not enough
2706 + * to mark the bitmap uptodate. We need to also zero-out the bitmap
2707 + */
2708 +#define BH_BITMAP_UPTODATE BH_JBDPrivateStart
2709 +
2710 +static inline int bitmap_uptodate(struct buffer_head *bh)
2711 +{
2712 + return (buffer_uptodate(bh) &&
2713 + test_bit(BH_BITMAP_UPTODATE, &(bh)->b_state));
2714 +}
2715 +static inline void set_bitmap_uptodate(struct buffer_head *bh)
2716 +{
2717 + set_bit(BH_BITMAP_UPTODATE, &(bh)->b_state);
2718 +}
2719 +
2720 #endif /* __KERNEL__ */
2721
2722 #endif /* _EXT4_H */
2723 diff -rup b/fs/ext4//ext4_i.h a/fs/ext4///ext4_i.h
2724 --- b/fs/ext4/ext4_i.h 2009-02-11 14:37:58.000000000 +0100
2725 +++ a/fs/ext4/ext4_i.h 2009-02-10 21:40:14.000000000 +0100
2726 @@ -31,39 +31,7 @@ typedef unsigned long long ext4_fsblk_t;
2727 typedef __u32 ext4_lblk_t;
2728
2729 /* data type for block group number */
2730 -typedef unsigned long ext4_group_t;
2731 -
2732 -struct ext4_reserve_window {
2733 - ext4_fsblk_t _rsv_start; /* First byte reserved */
2734 - ext4_fsblk_t _rsv_end; /* Last byte reserved or 0 */
2735 -};
2736 -
2737 -struct ext4_reserve_window_node {
2738 - struct rb_node rsv_node;
2739 - __u32 rsv_goal_size;
2740 - __u32 rsv_alloc_hit;
2741 - struct ext4_reserve_window rsv_window;
2742 -};
2743 -
2744 -struct ext4_block_alloc_info {
2745 - /* information about reservation window */
2746 - struct ext4_reserve_window_node rsv_window_node;
2747 - /*
2748 - * was i_next_alloc_block in ext4_inode_info
2749 - * is the logical (file-relative) number of the
2750 - * most-recently-allocated block in this file.
2751 - * We use this for detecting linearly ascending allocation requests.
2752 - */
2753 - ext4_lblk_t last_alloc_logical_block;
2754 - /*
2755 - * Was i_next_alloc_goal in ext4_inode_info
2756 - * is the *physical* companion to i_next_alloc_block.
2757 - * it the physical block number of the block which was most-recentl
2758 - * allocated to this file. This give us the goal (target) for the next
2759 - * allocation when we detect linearly ascending requests.
2760 - */
2761 - ext4_fsblk_t last_alloc_physical_block;
2762 -};
2763 +typedef unsigned int ext4_group_t;
2764
2765 #define rsv_start rsv_window._rsv_start
2766 #define rsv_end rsv_window._rsv_end
2767 @@ -97,11 +65,8 @@ struct ext4_inode_info {
2768 ext4_group_t i_block_group;
2769 __u32 i_state; /* Dynamic state flags for ext4 */
2770
2771 - /* block reservation info */
2772 - struct ext4_block_alloc_info *i_block_alloc_info;
2773 -
2774 ext4_lblk_t i_dir_start_lookup;
2775 -#ifdef CONFIG_EXT4DEV_FS_XATTR
2776 +#if defined(CONFIG_EXT4_FS_XATTR) || defined(CONFIG_EXT4DEV_FS_XATTR)
2777 /*
2778 * Extended attributes can be read independently of the main file
2779 * data. Taking i_mutex even when reading would cause contention
2780 @@ -111,7 +76,7 @@ struct ext4_inode_info {
2781 */
2782 struct rw_semaphore xattr_sem;
2783 #endif
2784 -#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL
2785 +#if defined(CONFIG_EXT4_FS_POSIX_ACL) || defined(CONFIG_EXT4DEV_FS_POSIX_ACL)
2786 struct posix_acl *i_acl;
2787 struct posix_acl *i_default_acl;
2788 #endif
2789 @@ -135,9 +100,6 @@ struct ext4_inode_info {
2790 */
2791 loff_t i_disksize;
2792
2793 - /* on-disk additional length */
2794 - __u16 i_extra_isize;
2795 -
2796 /*
2797 * i_data_sem is for serialising ext4_truncate() against
2798 * ext4_getblock(). In the 2.4 ext2 design, great chunks of inode's
2799 @@ -152,7 +114,6 @@ struct ext4_inode_info {
2800 struct inode vfs_inode;
2801 struct jbd2_inode jinode;
2802
2803 - unsigned long i_ext_generation;
2804 struct ext4_ext_cache i_cached_extent;
2805 /*
2806 * File creation time. Its function is same as that of
2807 @@ -165,10 +126,14 @@ struct ext4_inode_info {
2808 spinlock_t i_prealloc_lock;
2809
2810 /* allocation reservation info for delalloc */
2811 - unsigned long i_reserved_data_blocks;
2812 - unsigned long i_reserved_meta_blocks;
2813 - unsigned long i_allocated_meta_blocks;
2814 + unsigned int i_reserved_data_blocks;
2815 + unsigned int i_reserved_meta_blocks;
2816 + unsigned int i_allocated_meta_blocks;
2817 unsigned short i_delalloc_reserved_flag;
2818 +
2819 + /* on-disk additional length */
2820 + __u16 i_extra_isize;
2821 +
2822 spinlock_t i_block_reservation_lock;
2823 };
2824
2825 diff -rup b/fs/ext4//ext4_sb.h a/fs/ext4///ext4_sb.h
2826 --- b/fs/ext4/ext4_sb.h 2009-02-11 14:37:58.000000000 +0100
2827 +++ a/fs/ext4/ext4_sb.h 2009-02-10 21:40:14.000000000 +0100
2828 @@ -40,8 +40,8 @@ struct ext4_sb_info {
2829 unsigned long s_blocks_last; /* Last seen block count */
2830 loff_t s_bitmap_maxbytes; /* max bytes for bitmap files */
2831 struct buffer_head * s_sbh; /* Buffer containing the super block */
2832 - struct ext4_super_block * s_es; /* Pointer to the super block in the buffer */
2833 - struct buffer_head ** s_group_desc;
2834 + struct ext4_super_block *s_es; /* Pointer to the super block in the buffer */
2835 + struct buffer_head **s_group_desc;
2836 unsigned long s_mount_opt;
2837 ext4_fsblk_t s_sb_block;
2838 uid_t s_resuid;
2839 @@ -52,23 +52,26 @@ struct ext4_sb_info {
2840 int s_desc_per_block_bits;
2841 int s_inode_size;
2842 int s_first_ino;
2843 + unsigned int s_inode_readahead_blks;
2844 spinlock_t s_next_gen_lock;
2845 u32 s_next_generation;
2846 u32 s_hash_seed[4];
2847 int s_def_hash_version;
2848 + int s_hash_unsigned; /* 3 if hash should be signed, 0 if not */
2849 struct percpu_counter s_freeblocks_counter;
2850 struct percpu_counter s_freeinodes_counter;
2851 struct percpu_counter s_dirs_counter;
2852 + struct percpu_counter s_dirtyblocks_counter;
2853 struct blockgroup_lock s_blockgroup_lock;
2854 + struct proc_dir_entry *s_proc;
2855
2856 /* root of the per fs reservation window tree */
2857 spinlock_t s_rsv_window_lock;
2858 struct rb_root s_rsv_window_root;
2859 - struct ext4_reserve_window_node s_rsv_window_head;
2860
2861 /* Journaling */
2862 - struct inode * s_journal_inode;
2863 - struct journal_s * s_journal;
2864 + struct inode *s_journal_inode;
2865 + struct journal_s *s_journal;
2866 struct list_head s_orphan;
2867 unsigned long s_commit_interval;
2868 struct block_device *journal_bdev;
2869 @@ -106,12 +109,12 @@ struct ext4_sb_info {
2870
2871 /* tunables */
2872 unsigned long s_stripe;
2873 - unsigned long s_mb_stream_request;
2874 - unsigned long s_mb_max_to_scan;
2875 - unsigned long s_mb_min_to_scan;
2876 - unsigned long s_mb_stats;
2877 - unsigned long s_mb_order2_reqs;
2878 - unsigned long s_mb_group_prealloc;
2879 + unsigned int s_mb_stream_request;
2880 + unsigned int s_mb_max_to_scan;
2881 + unsigned int s_mb_min_to_scan;
2882 + unsigned int s_mb_stats;
2883 + unsigned int s_mb_order2_reqs;
2884 + unsigned int s_mb_group_prealloc;
2885 /* where last allocation was done - for stream allocation */
2886 unsigned long s_mb_last_group;
2887 unsigned long s_mb_last_start;
2888 @@ -121,7 +124,6 @@ struct ext4_sb_info {
2889 int s_mb_history_cur;
2890 int s_mb_history_max;
2891 int s_mb_history_num;
2892 - struct proc_dir_entry *s_mb_proc;
2893 spinlock_t s_mb_history_lock;
2894 int s_mb_history_filter;
2895
2896 diff -rup b/fs/ext4//extents.c a/fs/ext4///extents.c
2897 --- b/fs/ext4/extents.c 2009-02-11 14:37:58.000000000 +0100
2898 +++ a/fs/ext4/extents.c 2009-02-10 21:40:11.000000000 +0100
2899 @@ -190,7 +190,7 @@ ext4_ext_new_meta_block(handle_t *handle
2900 ext4_fsblk_t goal, newblock;
2901
2902 goal = ext4_ext_find_goal(inode, path, le32_to_cpu(ex->ee_block));
2903 - newblock = ext4_new_meta_block(handle, inode, goal, err);
2904 + newblock = ext4_new_meta_blocks(handle, inode, goal, NULL, err);
2905 return newblock;
2906 }
2907
2908 @@ -383,8 +383,8 @@ static void ext4_ext_show_leaf(struct in
2909 ext_debug("\n");
2910 }
2911 #else
2912 -#define ext4_ext_show_path(inode,path)
2913 -#define ext4_ext_show_leaf(inode,path)
2914 +#define ext4_ext_show_path(inode, path)
2915 +#define ext4_ext_show_leaf(inode, path)
2916 #endif
2917
2918 void ext4_ext_drop_refs(struct ext4_ext_path *path)
2919 @@ -440,9 +440,10 @@ ext4_ext_binsearch_idx(struct inode *ino
2920 for (k = 0; k < le16_to_cpu(eh->eh_entries); k++, ix++) {
2921 if (k != 0 &&
2922 le32_to_cpu(ix->ei_block) <= le32_to_cpu(ix[-1].ei_block)) {
2923 - printk("k=%d, ix=0x%p, first=0x%p\n", k,
2924 - ix, EXT_FIRST_INDEX(eh));
2925 - printk("%u <= %u\n",
2926 + printk(KERN_DEBUG "k=%d, ix=0x%p, "
2927 + "first=0x%p\n", k,
2928 + ix, EXT_FIRST_INDEX(eh));
2929 + printk(KERN_DEBUG "%u <= %u\n",
2930 le32_to_cpu(ix->ei_block),
2931 le32_to_cpu(ix[-1].ei_block));
2932 }
2933 @@ -1158,15 +1159,13 @@ ext4_ext_search_right(struct inode *inod
2934 while (--depth >= 0) {
2935 ix = path[depth].p_idx;
2936 if (ix != EXT_LAST_INDEX(path[depth].p_hdr))
2937 - break;
2938 + goto got_index;
2939 }
2940
2941 - if (depth < 0) {
2942 - /* we've gone up to the root and
2943 - * found no index to the right */
2944 - return 0;
2945 - }
2946 + /* we've gone up to the root and found no index to the right */
2947 + return 0;
2948
2949 +got_index:
2950 /* we've found index to the right, let's
2951 * follow it and find the closest allocated
2952 * block to the right */
2953 @@ -1199,7 +1198,6 @@ ext4_ext_search_right(struct inode *inod
2954 *phys = ext_pblock(ex);
2955 put_bh(bh);
2956 return 0;
2957 -
2958 }
2959
2960 /*
2961 @@ -1475,7 +1473,7 @@ int ext4_ext_insert_extent(handle_t *han
2962 struct ext4_ext_path *path,
2963 struct ext4_extent *newext)
2964 {
2965 - struct ext4_extent_header * eh;
2966 + struct ext4_extent_header *eh;
2967 struct ext4_extent *ex, *fex;
2968 struct ext4_extent *nearex; /* nearest extent */
2969 struct ext4_ext_path *npath = NULL;
2970 @@ -1620,7 +1618,6 @@ cleanup:
2971 ext4_ext_drop_refs(npath);
2972 kfree(npath);
2973 }
2974 - ext4_ext_tree_changed(inode);
2975 ext4_ext_invalidate_cache(inode);
2976 return err;
2977 }
2978 @@ -2124,7 +2121,6 @@ static int ext4_ext_remove_space(struct
2979 }
2980 }
2981 out:
2982 - ext4_ext_tree_changed(inode);
2983 ext4_ext_drop_refs(path);
2984 kfree(path);
2985 ext4_journal_stop(handle);
2986 @@ -2142,7 +2138,7 @@ void ext4_ext_init(struct super_block *s
2987 */
2988
2989 if (test_opt(sb, EXTENTS)) {
2990 - printk("EXT4-fs: file extents enabled");
2991 + printk(KERN_INFO "EXT4-fs: file extents enabled");
2992 #ifdef AGGRESSIVE_TEST
2993 printk(", aggressive tests");
2994 #endif
2995 @@ -2271,7 +2267,7 @@ static int ext4_ext_convert_to_initializ
2996 struct inode *inode,
2997 struct ext4_ext_path *path,
2998 ext4_lblk_t iblock,
2999 - unsigned long max_blocks)
3000 + unsigned int max_blocks)
3001 {
3002 struct ext4_extent *ex, newex, orig_ex;
3003 struct ext4_extent *ex1 = NULL;
3004 @@ -2569,26 +2565,26 @@ fix_extent_len:
3005 */
3006 int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
3007 ext4_lblk_t iblock,
3008 - unsigned long max_blocks, struct buffer_head *bh_result,
3009 + unsigned int max_blocks, struct buffer_head *bh_result,
3010 int create, int extend_disksize)
3011 {
3012 struct ext4_ext_path *path = NULL;
3013 struct ext4_extent_header *eh;
3014 struct ext4_extent newex, *ex;
3015 - ext4_fsblk_t goal, newblock;
3016 - int err = 0, depth, ret;
3017 - unsigned long allocated = 0;
3018 + ext4_fsblk_t newblock;
3019 + int err = 0, depth, ret, cache_type;
3020 + unsigned int allocated = 0;
3021 struct ext4_allocation_request ar;
3022 loff_t disksize;
3023
3024 __clear_bit(BH_New, &bh_result->b_state);
3025 - ext_debug("blocks %u/%lu requested for inode %u\n",
3026 + ext_debug("blocks %u/%u requested for inode %u\n",
3027 iblock, max_blocks, inode->i_ino);
3028
3029 /* check in cache */
3030 - goal = ext4_ext_in_cache(inode, iblock, &newex);
3031 - if (goal) {
3032 - if (goal == EXT4_EXT_CACHE_GAP) {
3033 + cache_type = ext4_ext_in_cache(inode, iblock, &newex);
3034 + if (cache_type) {
3035 + if (cache_type == EXT4_EXT_CACHE_GAP) {
3036 if (!create) {
3037 /*
3038 * block isn't allocated yet and
3039 @@ -2597,7 +2593,7 @@ int ext4_ext_get_blocks(handle_t *handle
3040 goto out2;
3041 }
3042 /* we should allocate requested block */
3043 - } else if (goal == EXT4_EXT_CACHE_EXTENT) {
3044 + } else if (cache_type == EXT4_EXT_CACHE_EXTENT) {
3045 /* block is already allocated */
3046 newblock = iblock
3047 - le32_to_cpu(newex.ee_block)
3048 @@ -2696,11 +2692,8 @@ int ext4_ext_get_blocks(handle_t *handle
3049 goto out2;
3050 }
3051 /*
3052 - * Okay, we need to do block allocation. Lazily initialize the block
3053 - * allocation info here if necessary.
3054 + * Okay, we need to do block allocation.
3055 */
3056 - if (S_ISREG(inode->i_mode) && (!EXT4_I(inode)->i_block_alloc_info))
3057 - ext4_init_block_alloc_info(inode);
3058
3059 /* find neighbour allocated blocks */
3060 ar.lleft = iblock;
3061 @@ -2748,7 +2741,7 @@ int ext4_ext_get_blocks(handle_t *handle
3062 if (!newblock)
3063 goto out2;
3064 ext_debug("allocate new block: goal %llu, found %llu/%lu\n",
3065 - goal, newblock, allocated);
3066 + ar.goal, newblock, allocated);
3067
3068 /* try to insert new extent into found leaf and return */
3069 ext4_ext_store_pblock(&newex, newblock);
3070 @@ -2760,7 +2753,7 @@ int ext4_ext_get_blocks(handle_t *handle
3071 /* free data blocks we just allocated */
3072 /* not a good idea to call discard here directly,
3073 * but otherwise we'd need to call it every free() */
3074 - ext4_mb_discard_inode_preallocations(inode);
3075 + ext4_discard_preallocations(inode);
3076 ext4_free_blocks(handle, inode, ext_pblock(&newex),
3077 ext4_ext_get_actual_len(&newex), 0);
3078 goto out2;
3079 @@ -2824,7 +2817,7 @@ void ext4_ext_truncate(struct inode *ino
3080 down_write(&EXT4_I(inode)->i_data_sem);
3081 ext4_ext_invalidate_cache(inode);
3082
3083 - ext4_discard_reservation(inode);
3084 + ext4_discard_preallocations(inode);
3085
3086 /*
3087 * TODO: optimization is possible here.
3088 @@ -2877,10 +2870,11 @@ static void ext4_falloc_update_inode(str
3089 * Update only when preallocation was requested beyond
3090 * the file size.
3091 */
3092 - if (!(mode & FALLOC_FL_KEEP_SIZE) &&
3093 - new_size > i_size_read(inode)) {
3094 - i_size_write(inode, new_size);
3095 - EXT4_I(inode)->i_disksize = new_size;
3096 + if (!(mode & FALLOC_FL_KEEP_SIZE)) {
3097 + if (new_size > i_size_read(inode))
3098 + i_size_write(inode, new_size);
3099 + if (new_size > EXT4_I(inode)->i_disksize)
3100 + ext4_update_i_disksize(inode, new_size);
3101 }
3102
3103 }
3104 @@ -2897,7 +2891,7 @@ long ext4_fallocate(struct inode *inode,
3105 handle_t *handle;
3106 ext4_lblk_t block;
3107 loff_t new_size;
3108 - unsigned long max_blocks;
3109 + unsigned int max_blocks;
3110 int ret = 0;
3111 int ret2 = 0;
3112 int retries = 0;
3113 diff -rup b/fs/ext4//file.c a/fs/ext4///file.c
3114 --- b/fs/ext4/file.c 2009-02-11 14:37:58.000000000 +0100
3115 +++ a/fs/ext4/file.c 2009-02-10 21:40:11.000000000 +0100
3116 @@ -31,14 +31,14 @@
3117 * from ext4_file_open: open gets called at every open, but release
3118 * gets called only when /all/ the files are closed.
3119 */
3120 -static int ext4_release_file (struct inode * inode, struct file * filp)
3121 +static int ext4_release_file(struct inode *inode, struct file *filp)
3122 {
3123 /* if we are the last writer on the inode, drop the block reservation */
3124 if ((filp->f_mode & FMODE_WRITE) &&
3125 (atomic_read(&inode->i_writecount) == 1))
3126 {
3127 down_write(&EXT4_I(inode)->i_data_sem);
3128 - ext4_discard_reservation(inode);
3129 + ext4_discard_preallocations(inode);
3130 up_write(&EXT4_I(inode)->i_data_sem);
3131 }
3132 if (is_dx(inode) && filp->private_data)
3133 @@ -162,7 +162,7 @@ const struct inode_operations ext4_file_
3134 .truncate = ext4_truncate,
3135 .setattr = ext4_setattr,
3136 .getattr = ext4_getattr,
3137 -#ifdef CONFIG_EXT4DEV_FS_XATTR
3138 +#if defined(CONFIG_EXT4_FS_XATTR) || defined(CONFIG_EXT4DEV_FS_XATTR)
3139 .setxattr = generic_setxattr,
3140 .getxattr = generic_getxattr,
3141 .listxattr = ext4_listxattr,
3142 diff -rup b/fs/ext4//fsync.c a/fs/ext4///fsync.c
3143 --- b/fs/ext4/fsync.c 2009-02-11 14:37:58.000000000 +0100
3144 +++ a/fs/ext4/fsync.c 2009-02-10 21:40:11.000000000 +0100
3145 @@ -28,6 +28,7 @@
3146 #include <linux/writeback.h>
3147 #include <linux/jbd2.h>
3148 #include <linux/blkdev.h>
3149 +#include <linux/marker.h>
3150 #include "ext4.h"
3151 #include "ext4_jbd2.h"
3152
3153 @@ -43,7 +44,7 @@
3154 * inode to disk.
3155 */
3156
3157 -int ext4_sync_file(struct file * file, struct dentry *dentry, int datasync)
3158 +int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync)
3159 {
3160 struct inode *inode = dentry->d_inode;
3161 journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
3162 @@ -51,6 +52,10 @@ int ext4_sync_file(struct file * file, s
3163
3164 J_ASSERT(ext4_journal_current_handle() == NULL);
3165
3166 + trace_mark(ext4_sync_file, "dev %s datasync %d ino %ld parent %ld",
3167 + inode->i_sb->s_id, datasync, inode->i_ino,
3168 + dentry->d_parent->d_inode->i_ino);
3169 +
3170 /*
3171 * data=writeback:
3172 * The caller's filemap_fdatawrite()/wait will sync the data.
3173 diff -rup b/fs/ext4//hash.c a/fs/ext4///hash.c
3174 --- b/fs/ext4/hash.c 2009-02-11 14:37:58.000000000 +0100
3175 +++ a/fs/ext4/hash.c 2009-02-10 21:40:11.000000000 +0100
3176 @@ -27,7 +27,7 @@ static void TEA_transform(__u32 buf[4],
3177 sum += DELTA;
3178 b0 += ((b1 << 4)+a) ^ (b1+sum) ^ ((b1 >> 5)+b);
3179 b1 += ((b0 << 4)+c) ^ (b0+sum) ^ ((b0 >> 5)+d);
3180 - } while(--n);
3181 + } while (--n);
3182
3183 buf[0] += b0;
3184 buf[1] += b1;
3185 @@ -35,23 +35,43 @@ static void TEA_transform(__u32 buf[4],
3186
3187
3188 /* The old legacy hash */
3189 -static __u32 dx_hack_hash (const char *name, int len)
3190 +static __u32 dx_hack_hash_unsigned(const char *name, int len)
3191 {
3192 - __u32 hash0 = 0x12a3fe2d, hash1 = 0x37abe8f9;
3193 + __u32 hash, hash0 = 0x12a3fe2d, hash1 = 0x37abe8f9;
3194 + const unsigned char *ucp = (const unsigned char *) name;
3195 +
3196 + while (len--) {
3197 + hash = hash1 + (hash0 ^ (((int) *ucp++) * 7152373));
3198 +
3199 + if (hash & 0x80000000)
3200 + hash -= 0x7fffffff;
3201 + hash1 = hash0;
3202 + hash0 = hash;
3203 + }
3204 + return hash0 << 1;
3205 +}
3206 +
3207 +static __u32 dx_hack_hash_signed(const char *name, int len)
3208 +{
3209 + __u32 hash, hash0 = 0x12a3fe2d, hash1 = 0x37abe8f9;
3210 + const signed char *scp = (const signed char *) name;
3211 +
3212 while (len--) {
3213 - __u32 hash = hash1 + (hash0 ^ (*name++ * 7152373));
3214 + hash = hash1 + (hash0 ^ (((int) *scp++) * 7152373));
3215
3216 - if (hash & 0x80000000) hash -= 0x7fffffff;
3217 + if (hash & 0x80000000)
3218 + hash -= 0x7fffffff;
3219 hash1 = hash0;
3220 hash0 = hash;
3221 }
3222 - return (hash0 << 1);
3223 + return hash0 << 1;
3224 }
3225
3226 -static void str2hashbuf(const char *msg, int len, __u32 *buf, int num)
3227 +static void str2hashbuf_signed(const char *msg, int len, __u32 *buf, int num)
3228 {
3229 __u32 pad, val;
3230 int i;
3231 + const signed char *scp = (const signed char *) msg;
3232
3233 pad = (__u32)len | ((__u32)len << 8);
3234 pad |= pad << 16;
3235 @@ -59,10 +79,38 @@ static void str2hashbuf(const char *msg,
3236 val = pad;
3237 if (len > num*4)
3238 len = num * 4;
3239 - for (i=0; i < len; i++) {
3240 + for (i = 0; i < len; i++) {
3241 if ((i % 4) == 0)
3242 val = pad;
3243 - val = msg[i] + (val << 8);
3244 + val = ((int) scp[i]) + (val << 8);
3245 + if ((i % 4) == 3) {
3246 + *buf++ = val;
3247 + val = pad;
3248 + num--;
3249 + }
3250 + }
3251 + if (--num >= 0)
3252 + *buf++ = val;
3253 + while (--num >= 0)
3254 + *buf++ = pad;
3255 +}
3256 +
3257 +static void str2hashbuf_unsigned(const char *msg, int len, __u32 *buf, int num)
3258 +{
3259 + __u32 pad, val;
3260 + int i;
3261 + const unsigned char *ucp = (const unsigned char *) msg;
3262 +
3263 + pad = (__u32)len | ((__u32)len << 8);
3264 + pad |= pad << 16;
3265 +
3266 + val = pad;
3267 + if (len > num*4)
3268 + len = num * 4;
3269 + for (i = 0; i < len; i++) {
3270 + if ((i % 4) == 0)
3271 + val = pad;
3272 + val = ((int) ucp[i]) + (val << 8);
3273 if ((i % 4) == 3) {
3274 *buf++ = val;
3275 val = pad;
3276 @@ -95,6 +143,8 @@ int ext4fs_dirhash(const char *name, int
3277 const char *p;
3278 int i;
3279 __u32 in[8], buf[4];
3280 + void (*str2hashbuf)(const char *, int, __u32 *, int) =
3281 + str2hashbuf_signed;
3282
3283 /* Initialize the default seed for the hash checksum functions */
3284 buf[0] = 0x67452301;
3285 @@ -104,7 +154,7 @@ int ext4fs_dirhash(const char *name, int
3286
3287 /* Check to see if the seed is all zero's */
3288 if (hinfo->seed) {
3289 - for (i=0; i < 4; i++) {
3290 + for (i = 0; i < 4; i++) {
3291 if (hinfo->seed[i])
3292 break;
3293 }
3294 @@ -113,13 +163,18 @@ int ext4fs_dirhash(const char *name, int
3295 }
3296
3297 switch (hinfo->hash_version) {
3298 + case DX_HASH_LEGACY_UNSIGNED:
3299 + hash = dx_hack_hash_unsigned(name, len);
3300 + break;
3301 case DX_HASH_LEGACY:
3302 - hash = dx_hack_hash(name, len);
3303 + hash = dx_hack_hash_signed(name, len);
3304 break;
3305 + case DX_HASH_HALF_MD4_UNSIGNED:
3306 + str2hashbuf = str2hashbuf_unsigned;
3307 case DX_HASH_HALF_MD4:
3308 p = name;
3309 while (len > 0) {
3310 - str2hashbuf(p, len, in, 8);
3311 + (*str2hashbuf)(p, len, in, 8);
3312 half_md4_transform(buf, in);
3313 len -= 32;
3314 p += 32;
3315 @@ -127,10 +182,12 @@ int ext4fs_dirhash(const char *name, int
3316 minor_hash = buf[2];
3317 hash = buf[1];
3318 break;
3319 + case DX_HASH_TEA_UNSIGNED:
3320 + str2hashbuf = str2hashbuf_unsigned;
3321 case DX_HASH_TEA:
3322 p = name;
3323 while (len > 0) {
3324 - str2hashbuf(p, len, in, 4);
3325 + (*str2hashbuf)(p, len, in, 4);
3326 TEA_transform(buf, in);
3327 len -= 16;
3328 p += 16;
3329 diff -rup b/fs/ext4//ialloc.c a/fs/ext4///ialloc.c
3330 --- b/fs/ext4/ialloc.c 2009-02-11 14:37:58.000000000 +0100
3331 +++ a/fs/ext4/ialloc.c 2009-02-10 21:40:11.000000000 +0100
3332 @@ -74,17 +74,17 @@ unsigned ext4_init_inode_bitmap(struct s
3333 /* If checksum is bad mark all blocks and inodes use to prevent
3334 * allocation, essentially implementing a per-group read-only flag. */
3335 if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) {
3336 - ext4_error(sb, __func__, "Checksum bad for group %lu\n",
3337 + ext4_error(sb, __func__, "Checksum bad for group %u\n",
3338 block_group);
3339 - gdp->bg_free_blocks_count = 0;
3340 - gdp->bg_free_inodes_count = 0;
3341 - gdp->bg_itable_unused = 0;
3342 + ext4_free_blks_set(sb, gdp, 0);
3343 + ext4_free_inodes_set(sb, gdp, 0);
3344 + ext4_itable_unused_set(sb, gdp, 0);
3345 memset(bh->b_data, 0xff, sb->s_blocksize);
3346 return 0;
3347 }
3348
3349 memset(bh->b_data, 0, (EXT4_INODES_PER_GROUP(sb) + 7) / 8);
3350 - mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), EXT4_BLOCKS_PER_GROUP(sb),
3351 + mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), sb->s_blocksize * 8,
3352 bh->b_data);
3353
3354 return EXT4_INODES_PER_GROUP(sb);
3355 @@ -111,27 +111,49 @@ ext4_read_inode_bitmap(struct super_bloc
3356 if (unlikely(!bh)) {
3357 ext4_error(sb, __func__,
3358 "Cannot read inode bitmap - "
3359 - "block_group = %lu, inode_bitmap = %llu",
3360 + "block_group = %u, inode_bitmap = %llu",
3361 block_group, bitmap_blk);
3362 return NULL;
3363 }
3364 - if (bh_uptodate_or_lock(bh))
3365 + if (bitmap_uptodate(bh))
3366 return bh;
3367
3368 + lock_buffer(bh);
3369 + if (bitmap_uptodate(bh)) {
3370 + unlock_buffer(bh);
3371 + return bh;
3372 + }
3373 spin_lock(sb_bgl_lock(EXT4_SB(sb), block_group));
3374 if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) {
3375 ext4_init_inode_bitmap(sb, bh, block_group, desc);
3376 + set_bitmap_uptodate(bh);
3377 set_buffer_uptodate(bh);
3378 - unlock_buffer(bh);
3379 spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group));
3380 + unlock_buffer(bh);
3381 return bh;
3382 }
3383 spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group));
3384 + if (buffer_uptodate(bh)) {
3385 + /*
3386 + * if not uninit if bh is uptodate,
3387 + * bitmap is also uptodate
3388 + */
3389 + set_bitmap_uptodate(bh);
3390 + unlock_buffer(bh);
3391 + return bh;
3392 + }
3393 + /*
3394 + * submit the buffer_head for read. We can
3395 + * safely mark the bitmap as uptodate now.
3396 + * We do it here so the bitmap uptodate bit
3397 + * get set with buffer lock held.
3398 + */
3399 + set_bitmap_uptodate(bh);
3400 if (bh_submit_read(bh) < 0) {
3401 put_bh(bh);
3402 ext4_error(sb, __func__,
3403 "Cannot read inode bitmap - "
3404 - "block_group = %lu, inode_bitmap = %llu",
3405 + "block_group = %u, inode_bitmap = %llu",
3406 block_group, bitmap_blk);
3407 return NULL;
3408 }
3409 @@ -154,39 +176,40 @@ ext4_read_inode_bitmap(struct super_bloc
3410 * though), and then we'd have two inodes sharing the
3411 * same inode number and space on the harddisk.
3412 */
3413 -void ext4_free_inode (handle_t *handle, struct inode * inode)
3414 +void ext4_free_inode(handle_t *handle, struct inode *inode)
3415 {
3416 - struct super_block * sb = inode->i_sb;
3417 + struct super_block *sb = inode->i_sb;
3418 int is_directory;
3419 unsigned long ino;
3420 struct buffer_head *bitmap_bh = NULL;
3421 struct buffer_head *bh2;
3422 ext4_group_t block_group;
3423 unsigned long bit;
3424 - struct ext4_group_desc * gdp;
3425 - struct ext4_super_block * es;
3426 + struct ext4_group_desc *gdp;
3427 + struct ext4_super_block *es;
3428 struct ext4_sb_info *sbi;
3429 - int fatal = 0, err;
3430 + int fatal = 0, err, count;
3431 ext4_group_t flex_group;
3432
3433 if (atomic_read(&inode->i_count) > 1) {
3434 - printk ("ext4_free_inode: inode has count=%d\n",
3435 - atomic_read(&inode->i_count));
3436 + printk(KERN_ERR "ext4_free_inode: inode has count=%d\n",
3437 + atomic_read(&inode->i_count));
3438 return;
3439 }
3440 if (inode->i_nlink) {
3441 - printk ("ext4_free_inode: inode has nlink=%d\n",
3442 - inode->i_nlink);
3443 + printk(KERN_ERR "ext4_free_inode: inode has nlink=%d\n",
3444 + inode->i_nlink);
3445 return;
3446 }
3447 if (!sb) {
3448 - printk("ext4_free_inode: inode on nonexistent device\n");
3449 + printk(KERN_ERR "ext4_free_inode: inode on "
3450 + "nonexistent device\n");
3451 return;
3452 }
3453 sbi = EXT4_SB(sb);
3454
3455 ino = inode->i_ino;
3456 - ext4_debug ("freeing inode %lu\n", ino);
3457 + ext4_debug("freeing inode %lu\n", ino);
3458
3459 /*
3460 * Note: we must free any quota before locking the superblock,
3461 @@ -200,12 +223,12 @@ void ext4_free_inode (handle_t *handle,
3462 is_directory = S_ISDIR(inode->i_mode);
3463
3464 /* Do this BEFORE marking the inode not in use or returning an error */
3465 - clear_inode (inode);
3466 + clear_inode(inode);
3467
3468 es = EXT4_SB(sb)->s_es;
3469 if (ino < EXT4_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) {
3470 - ext4_error (sb, "ext4_free_inode",
3471 - "reserved or nonexistent inode %lu", ino);
3472 + ext4_error(sb, "ext4_free_inode",
3473 + "reserved or nonexistent inode %lu", ino);
3474 goto error_return;
3475 }
3476 block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb);
3477 @@ -222,10 +245,10 @@ void ext4_free_inode (handle_t *handle,
3478 /* Ok, now we can actually update the inode bitmaps.. */
3479 if (!ext4_clear_bit_atomic(sb_bgl_lock(sbi, block_group),
3480 bit, bitmap_bh->b_data))
3481 - ext4_error (sb, "ext4_free_inode",
3482 - "bit already cleared for inode %lu", ino);
3483 + ext4_error(sb, "ext4_free_inode",
3484 + "bit already cleared for inode %lu", ino);
3485 else {
3486 - gdp = ext4_get_group_desc (sb, block_group, &bh2);
3487 + gdp = ext4_get_group_desc(sb, block_group, &bh2);
3488
3489 BUFFER_TRACE(bh2, "get_write_access");
3490 fatal = ext4_journal_get_write_access(handle, bh2);
3491 @@ -233,9 +256,12 @@ void ext4_free_inode (handle_t *handle,
3492
3493 if (gdp) {
3494 spin_lock(sb_bgl_lock(sbi, block_group));
3495 - le16_add_cpu(&gdp->bg_free_inodes_count, 1);
3496 - if (is_directory)
3497 - le16_add_cpu(&gdp->bg_used_dirs_count, -1);
3498 + count = ext4_free_inodes_count(sb, gdp) + 1;
3499 + ext4_free_inodes_set(sb, gdp, count);
3500 + if (is_directory) {
3501 + count = ext4_used_dirs_count(sb, gdp) - 1;
3502 + ext4_used_dirs_set(sb, gdp, count);
3503 + }
3504 gdp->bg_checksum = ext4_group_desc_csum(sbi,
3505 block_group, gdp);
3506 spin_unlock(sb_bgl_lock(sbi, block_group));
3507 @@ -287,14 +313,14 @@ static int find_group_dir(struct super_b
3508 avefreei = freei / ngroups;
3509
3510 for (group = 0; group < ngroups; group++) {
3511 - desc = ext4_get_group_desc (sb, group, NULL);
3512 - if (!desc || !desc->bg_free_inodes_count)
3513 + desc = ext4_get_group_desc(sb, group, NULL);
3514 + if (!desc || !ext4_free_inodes_count(sb, desc))
3515 continue;
3516 - if (le16_to_cpu(desc->bg_free_inodes_count) < avefreei)
3517 + if (ext4_free_inodes_count(sb, desc) < avefreei)
3518 continue;
3519 if (!best_desc ||
3520 - (le16_to_cpu(desc->bg_free_blocks_count) >
3521 - le16_to_cpu(best_desc->bg_free_blocks_count))) {
3522 + (ext4_free_blks_count(sb, desc) >
3523 + ext4_free_blks_count(sb, best_desc))) {
3524 *best_group = group;
3525 best_desc = desc;
3526 ret = 0;
3527 @@ -366,7 +392,7 @@ found_flexbg:
3528 for (i = best_flex * flex_size; i < ngroups &&
3529 i < (best_flex + 1) * flex_size; i++) {
3530 desc = ext4_get_group_desc(sb, i, &bh);
3531 - if (le16_to_cpu(desc->bg_free_inodes_count)) {
3532 + if (ext4_free_inodes_count(sb, desc)) {
3533 *best_group = i;
3534 goto out;
3535 }
3536 @@ -440,17 +466,17 @@ static int find_group_orlov(struct super
3537 for (i = 0; i < ngroups; i++) {
3538 grp = (parent_group + i) % ngroups;
3539 desc = ext4_get_group_desc(sb, grp, NULL);
3540 - if (!desc || !desc->bg_free_inodes_count)
3541 + if (!desc || !ext4_free_inodes_count(sb, desc))
3542 continue;
3543 - if (le16_to_cpu(desc->bg_used_dirs_count) >= best_ndir)
3544 + if (ext4_used_dirs_count(sb, desc) >= best_ndir)
3545 continue;
3546 - if (le16_to_cpu(desc->bg_free_inodes_count) < avefreei)
3547 + if (ext4_free_inodes_count(sb, desc) < avefreei)
3548 continue;
3549 - if (le16_to_cpu(desc->bg_free_blocks_count) < avefreeb)
3550 + if (ext4_free_blks_count(sb, desc) < avefreeb)
3551 continue;
3552 *group = grp;
3553 ret = 0;
3554 - best_ndir = le16_to_cpu(desc->bg_used_dirs_count);
3555 + best_ndir = ext4_used_dirs_count(sb, desc);
3556 }
3557 if (ret == 0)
3558 return ret;
3559 @@ -476,13 +502,13 @@ static int find_group_orlov(struct super
3560 for (i = 0; i < ngroups; i++) {
3561 *group = (parent_group + i) % ngroups;
3562 desc = ext4_get_group_desc(sb, *group, NULL);
3563 - if (!desc || !desc->bg_free_inodes_count)
3564 + if (!desc || !ext4_free_inodes_count(sb, desc))
3565 continue;
3566 - if (le16_to_cpu(desc->bg_used_dirs_count) >= max_dirs)
3567 + if (ext4_used_dirs_count(sb, desc) >= max_dirs)
3568 continue;
3569 - if (le16_to_cpu(desc->bg_free_inodes_count) < min_inodes)
3570 + if (ext4_free_inodes_count(sb, desc) < min_inodes)
3571 continue;
3572 - if (le16_to_cpu(desc->bg_free_blocks_count) < min_blocks)
3573 + if (ext4_free_blks_count(sb, desc) < min_blocks)
3574 continue;
3575 return 0;
3576 }
3577 @@ -491,8 +517,8 @@ fallback:
3578 for (i = 0; i < ngroups; i++) {
3579 *group = (parent_group + i) % ngroups;
3580 desc = ext4_get_group_desc(sb, *group, NULL);
3581 - if (desc && desc->bg_free_inodes_count &&
3582 - le16_to_cpu(desc->bg_free_inodes_count) >= avefreei)
3583 + if (desc && ext4_free_inodes_count(sb, desc) &&
3584 + ext4_free_inodes_count(sb, desc) >= avefreei)
3585 return 0;
3586 }
3587
3588 @@ -521,8 +547,8 @@ static int find_group_other(struct super
3589 */
3590 *group = parent_group;
3591 desc = ext4_get_group_desc(sb, *group, NULL);
3592 - if (desc && le16_to_cpu(desc->bg_free_inodes_count) &&
3593 - le16_to_cpu(desc->bg_free_blocks_count))
3594 + if (desc && ext4_free_inodes_count(sb, desc) &&
3595 + ext4_free_blks_count(sb, desc))
3596 return 0;
3597
3598 /*
3599 @@ -545,8 +571,8 @@ static int find_group_other(struct super
3600 if (*group >= ngroups)
3601 *group -= ngroups;
3602 desc = ext4_get_group_desc(sb, *group, NULL);
3603 - if (desc && le16_to_cpu(desc->bg_free_inodes_count) &&
3604 - le16_to_cpu(desc->bg_free_blocks_count))
3605 + if (desc && ext4_free_inodes_count(sb, desc) &&
3606 + ext4_free_blks_count(sb, desc))
3607 return 0;
3608 }
3609
3610 @@ -559,7 +585,7 @@ static int find_group_other(struct super
3611 if (++*group >= ngroups)
3612 *group = 0;
3613 desc = ext4_get_group_desc(sb, *group, NULL);
3614 - if (desc && le16_to_cpu(desc->bg_free_inodes_count))
3615 + if (desc && ext4_free_inodes_count(sb, desc))
3616 return 0;
3617 }
3618
3619 @@ -567,6 +593,79 @@ static int find_group_other(struct super
3620 }
3621
3622 /*
3623 + * claim the inode from the inode bitmap. If the group
3624 + * is uninit we need to take the groups's sb_bgl_lock
3625 + * and clear the uninit flag. The inode bitmap update
3626 + * and group desc uninit flag clear should be done
3627 + * after holding sb_bgl_lock so that ext4_read_inode_bitmap
3628 + * doesn't race with the ext4_claim_inode
3629 + */
3630 +static int ext4_claim_inode(struct super_block *sb,
3631 + struct buffer_head *inode_bitmap_bh,
3632 + unsigned long ino, ext4_group_t group, int mode)
3633 +{
3634 + int free = 0, retval = 0, count;
3635 + struct ext4_sb_info *sbi = EXT4_SB(sb);
3636 + struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group, NULL);
3637 +
3638 + spin_lock(sb_bgl_lock(sbi, group));
3639 + if (ext4_set_bit(ino, inode_bitmap_bh->b_data)) {
3640 + /* not a free inode */
3641 + retval = 1;
3642 + goto err_ret;
3643 + }
3644 + ino++;
3645 + if ((group == 0 && ino < EXT4_FIRST_INO(sb)) ||
3646 + ino > EXT4_INODES_PER_GROUP(sb)) {
3647 + spin_unlock(sb_bgl_lock(sbi, group));
3648 + ext4_error(sb, __func__,
3649 + "reserved inode or inode > inodes count - "
3650 + "block_group = %u, inode=%lu", group,
3651 + ino + group * EXT4_INODES_PER_GROUP(sb));
3652 + return 1;
3653 + }
3654 + /* If we didn't allocate from within the initialized part of the inode
3655 + * table then we need to initialize up to this inode. */
3656 + if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) {
3657 +
3658 + if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) {
3659 + gdp->bg_flags &= cpu_to_le16(~EXT4_BG_INODE_UNINIT);
3660 + /* When marking the block group with
3661 + * ~EXT4_BG_INODE_UNINIT we don't want to depend
3662 + * on the value of bg_itable_unused even though
3663 + * mke2fs could have initialized the same for us.
3664 + * Instead we calculated the value below
3665 + */
3666 +
3667 + free = 0;
3668 + } else {
3669 + free = EXT4_INODES_PER_GROUP(sb) -
3670 + ext4_itable_unused_count(sb, gdp);
3671 + }
3672 +
3673 + /*
3674 + * Check the relative inode number against the last used
3675 + * relative inode number in this group. if it is greater
3676 + * we need to update the bg_itable_unused count
3677 + *
3678 + */
3679 + if (ino > free)
3680 + ext4_itable_unused_set(sb, gdp,
3681 + (EXT4_INODES_PER_GROUP(sb) - ino));
3682 + }
3683 + count = ext4_free_inodes_count(sb, gdp) - 1;
3684 + ext4_free_inodes_set(sb, gdp, count);
3685 + if (S_ISDIR(mode)) {
3686 + count = ext4_used_dirs_count(sb, gdp) + 1;
3687 + ext4_used_dirs_set(sb, gdp, count);
3688 + }
3689 + gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp);
3690 +err_ret:
3691 + spin_unlock(sb_bgl_lock(sbi, group));
3692 + return retval;
3693 +}
3694 +
3695 +/*
3696 * There are two policies for allocating an inode. If the new inode is
3697 * a directory, then a forward search is made for a block group with both
3698 * free space and a low directory-to-inode ratio; if that fails, then of
3699 @@ -576,16 +675,16 @@ static int find_group_other(struct super
3700 * For other inodes, search forward from the parent directory's block
3701 * group to find a free inode.
3702 */
3703 -struct inode *ext4_new_inode(handle_t *handle, struct inode * dir, int mode)
3704 +struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode)
3705 {
3706 struct super_block *sb;
3707 - struct buffer_head *bitmap_bh = NULL;
3708 - struct buffer_head *bh2;
3709 + struct buffer_head *inode_bitmap_bh = NULL;
3710 + struct buffer_head *group_desc_bh;
3711 ext4_group_t group = 0;
3712 unsigned long ino = 0;
3713 - struct inode * inode;
3714 - struct ext4_group_desc * gdp = NULL;
3715 - struct ext4_super_block * es;
3716 + struct inode *inode;
3717 + struct ext4_group_desc *gdp = NULL;
3718 + struct ext4_super_block *es;
3719 struct ext4_inode_info *ei;
3720 struct ext4_sb_info *sbi;
3721 int ret2, err = 0;
3722 @@ -613,7 +712,7 @@ struct inode *ext4_new_inode(handle_t *h
3723 }
3724
3725 if (S_ISDIR(mode)) {
3726 - if (test_opt (sb, OLDALLOC))
3727 + if (test_opt(sb, OLDALLOC))
3728 ret2 = find_group_dir(sb, dir, &group);
3729 else
3730 ret2 = find_group_orlov(sb, dir, &group);
3731 @@ -628,40 +727,50 @@ got_group:
3732 for (i = 0; i < sbi->s_groups_count; i++) {
3733 err = -EIO;
3734
3735 - gdp = ext4_get_group_desc(sb, group, &bh2);
3736 + gdp = ext4_get_group_desc(sb, group, &group_desc_bh);
3737 if (!gdp)
3738 goto fail;
3739
3740 - brelse(bitmap_bh);
3741 - bitmap_bh = ext4_read_inode_bitmap(sb, group);
3742 - if (!bitmap_bh)
3743 + brelse(inode_bitmap_bh);
3744 + inode_bitmap_bh = ext4_read_inode_bitmap(sb, group);
3745 + if (!inode_bitmap_bh)
3746 goto fail;
3747
3748 ino = 0;
3749
3750 repeat_in_this_group:
3751 ino = ext4_find_next_zero_bit((unsigned long *)
3752 - bitmap_bh->b_data, EXT4_INODES_PER_GROUP(sb), ino);
3753 + inode_bitmap_bh->b_data,
3754 + EXT4_INODES_PER_GROUP(sb), ino);
3755 if (ino < EXT4_INODES_PER_GROUP(sb)) {
3756
3757 - BUFFER_TRACE(bitmap_bh, "get_write_access");
3758 - err = ext4_journal_get_write_access(handle, bitmap_bh);
3759 + BUFFER_TRACE(inode_bitmap_bh, "get_write_access");
3760 + err = ext4_journal_get_write_access(handle,
3761 + inode_bitmap_bh);
3762 if (err)
3763 goto fail;
3764
3765 - if (!ext4_set_bit_atomic(sb_bgl_lock(sbi, group),
3766 - ino, bitmap_bh->b_data)) {
3767 + BUFFER_TRACE(group_desc_bh, "get_write_access");
3768 + err = ext4_journal_get_write_access(handle,
3769 + group_desc_bh);
3770 + if (err)
3771 + goto fail;
3772 + if (!ext4_claim_inode(sb, inode_bitmap_bh,
3773 + ino, group, mode)) {
3774 /* we won it */
3775 - BUFFER_TRACE(bitmap_bh,
3776 + BUFFER_TRACE(inode_bitmap_bh,
3777 "call ext4_journal_dirty_metadata");
3778 err = ext4_journal_dirty_metadata(handle,
3779 - bitmap_bh);
3780 + inode_bitmap_bh);
3781 if (err)
3782 goto fail;
3783 + /* zero bit is inode number 1*/
3784 + ino++;
3785 goto got;
3786 }
3787 /* we lost it */
3788 - jbd2_journal_release_buffer(handle, bitmap_bh);
3789 + jbd2_journal_release_buffer(handle, inode_bitmap_bh);
3790 + jbd2_journal_release_buffer(handle, group_desc_bh);
3791
3792 if (++ino < EXT4_INODES_PER_GROUP(sb))
3793 goto repeat_in_this_group;
3794 @@ -681,30 +790,16 @@ repeat_in_this_group:
3795 goto out;
3796
3797 got:
3798 - ino++;
3799 - if ((group == 0 && ino < EXT4_FIRST_INO(sb)) ||
3800 - ino > EXT4_INODES_PER_GROUP(sb)) {
3801 - ext4_error(sb, __func__,
3802 - "reserved inode or inode > inodes count - "
3803 - "block_group = %lu, inode=%lu", group,
3804 - ino + group * EXT4_INODES_PER_GROUP(sb));
3805 - err = -EIO;
3806 - goto fail;
3807 - }
3808 -
3809 - BUFFER_TRACE(bh2, "get_write_access");
3810 - err = ext4_journal_get_write_access(handle, bh2);
3811 - if (err) goto fail;
3812 -
3813 /* We may have to initialize the block bitmap if it isn't already */
3814 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM) &&
3815 gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
3816 - struct buffer_head *block_bh = ext4_read_block_bitmap(sb, group);
3817 + struct buffer_head *block_bitmap_bh;
3818
3819 - BUFFER_TRACE(block_bh, "get block bitmap access");
3820 - err = ext4_journal_get_write_access(handle, block_bh);
3821 + block_bitmap_bh = ext4_read_block_bitmap(sb, group);
3822 + BUFFER_TRACE(block_bitmap_bh, "get block bitmap access");
3823 + err = ext4_journal_get_write_access(handle, block_bitmap_bh);
3824 if (err) {
3825 - brelse(block_bh);
3826 + brelse(block_bitmap_bh);
3827 goto fail;
3828 }
3829
3830 @@ -712,9 +807,9 @@ got:
3831 spin_lock(sb_bgl_lock(sbi, group));
3832 /* recheck and clear flag under lock if we still need to */
3833 if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
3834 - gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
3835 free = ext4_free_blocks_after_init(sb, group, gdp);
3836 - gdp->bg_free_blocks_count = cpu_to_le16(free);
3837 + gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
3838 + ext4_free_blks_set(sb, gdp, free);
3839 gdp->bg_checksum = ext4_group_desc_csum(sbi, group,
3840 gdp);
3841 }
3842 @@ -722,55 +817,19 @@ got:
3843
3844 /* Don't need to dirty bitmap block if we didn't change it */
3845 if (free) {
3846 - BUFFER_TRACE(block_bh, "dirty block bitmap");
3847 - err = ext4_journal_dirty_metadata(handle, block_bh);
3848 + BUFFER_TRACE(block_bitmap_bh, "dirty block bitmap");
3849 + err = ext4_journal_dirty_metadata(handle,
3850 + block_bitmap_bh);
3851 }
3852
3853 - brelse(block_bh);
3854 + brelse(block_bitmap_bh);
3855 if (err)
3856 goto fail;
3857 }
3858 -
3859 - spin_lock(sb_bgl_lock(sbi, group));
3860 - /* If we didn't allocate from within the initialized part of the inode
3861 - * table then we need to initialize up to this inode. */
3862 - if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) {
3863 - if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) {
3864 - gdp->bg_flags &= cpu_to_le16(~EXT4_BG_INODE_UNINIT);
3865 -
3866 - /* When marking the block group with
3867 - * ~EXT4_BG_INODE_UNINIT we don't want to depend
3868 - * on the value of bg_itable_unused even though
3869 - * mke2fs could have initialized the same for us.
3870 - * Instead we calculated the value below
3871 - */
3872 -
3873 - free = 0;
3874 - } else {
3875 - free = EXT4_INODES_PER_GROUP(sb) -
3876 - le16_to_cpu(gdp->bg_itable_unused);
3877 - }
3878 -
3879 - /*
3880 - * Check the relative inode number against the last used
3881 - * relative inode number in this group. if it is greater
3882 - * we need to update the bg_itable_unused count
3883 - *
3884 - */
3885 - if (ino > free)
3886 - gdp->bg_itable_unused =
3887 - cpu_to_le16(EXT4_INODES_PER_GROUP(sb) - ino);
3888 - }
3889 -
3890 - le16_add_cpu(&gdp->bg_free_inodes_count, -1);
3891 - if (S_ISDIR(mode)) {
3892 - le16_add_cpu(&gdp->bg_used_dirs_count, 1);
3893 - }
3894 - gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp);
3895 - spin_unlock(sb_bgl_lock(sbi, group));
3896 - BUFFER_TRACE(bh2, "call ext4_journal_dirty_metadata");
3897 - err = ext4_journal_dirty_metadata(handle, bh2);
3898 - if (err) goto fail;
3899 + BUFFER_TRACE(group_desc_bh, "call ext4_journal_dirty_metadata");
3900 + err = ext4_journal_dirty_metadata(handle, group_desc_bh);
3901 + if (err)
3902 + goto fail;
3903
3904 percpu_counter_dec(&sbi->s_freeinodes_counter);
3905 if (S_ISDIR(mode))
3906 @@ -784,15 +843,15 @@ got:
3907 spin_unlock(sb_bgl_lock(sbi, flex_group));
3908 }
3909
3910 - inode->i_uid = current->fsuid;
3911 - if (test_opt (sb, GRPID))
3912 + inode->i_uid = current_fsuid();
3913 + if (test_opt(sb, GRPID))
3914 inode->i_gid = dir->i_gid;
3915 else if (dir->i_mode & S_ISGID) {
3916 inode->i_gid = dir->i_gid;
3917 if (S_ISDIR(mode))
3918 mode |= S_ISGID;
3919 } else
3920 - inode->i_gid = current->fsgid;
3921 + inode->i_gid = current_fsgid();
3922 inode->i_mode = mode;
3923
3924 inode->i_ino = ino + group * EXT4_INODES_PER_GROUP(sb);
3925 @@ -818,7 +877,6 @@ got:
3926 ei->i_flags &= ~EXT4_DIRSYNC_FL;
3927 ei->i_file_acl = 0;
3928 ei->i_dtime = 0;
3929 - ei->i_block_alloc_info = NULL;
3930 ei->i_block_group = group;
3931
3932 ext4_set_inode_flags(inode);
3933 @@ -834,7 +892,7 @@ got:
3934 ei->i_extra_isize = EXT4_SB(sb)->s_want_extra_isize;
3935
3936 ret = inode;
3937 - if(DQUOT_ALLOC_INODE(inode)) {
3938 + if (DQUOT_ALLOC_INODE(inode)) {
3939 err = -EDQUOT;
3940 goto fail_drop;
3941 }
3942 @@ -843,7 +901,7 @@ got:
3943 if (err)
3944 goto fail_free_drop;
3945
3946 - err = ext4_init_security(handle,inode, dir);
3947 + err = ext4_init_security(handle, inode, dir);
3948 if (err)
3949 goto fail_free_drop;
3950
3951 @@ -869,7 +927,7 @@ out:
3952 iput(inode);
3953 ret = ERR_PTR(err);
3954 really_out:
3955 - brelse(bitmap_bh);
3956 + brelse(inode_bitmap_bh);
3957 return ret;
3958
3959 fail_free_drop:
3960 @@ -880,7 +938,7 @@ fail_drop:
3961 inode->i_flags |= S_NOQUOTA;
3962 inode->i_nlink = 0;
3963 iput(inode);
3964 - brelse(bitmap_bh);
3965 + brelse(inode_bitmap_bh);
3966 return ERR_PTR(err);
3967 }
3968
3969 @@ -961,7 +1019,7 @@ error:
3970 return ERR_PTR(err);
3971 }
3972
3973 -unsigned long ext4_count_free_inodes (struct super_block * sb)
3974 +unsigned long ext4_count_free_inodes(struct super_block *sb)
3975 {
3976 unsigned long desc_count;
3977 struct ext4_group_desc *gdp;
3978 @@ -976,10 +1034,10 @@ unsigned long ext4_count_free_inodes (st
3979 bitmap_count = 0;
3980 gdp = NULL;
3981 for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) {
3982 - gdp = ext4_get_group_desc (sb, i, NULL);
3983 + gdp = ext4_get_group_desc(sb, i, NULL);
3984 if (!gdp)
3985 continue;
3986 - desc_count += le16_to_cpu(gdp->bg_free_inodes_count);
3987 + desc_count += ext4_free_inodes_count(sb, gdp);
3988 brelse(bitmap_bh);
3989 bitmap_bh = ext4_read_inode_bitmap(sb, i);
3990 if (!bitmap_bh)
3991 @@ -987,20 +1045,21 @@ unsigned long ext4_count_free_inodes (st
3992
3993 x = ext4_count_free(bitmap_bh, EXT4_INODES_PER_GROUP(sb) / 8);
3994 printk(KERN_DEBUG "group %lu: stored = %d, counted = %lu\n",
3995 - i, le16_to_cpu(gdp->bg_free_inodes_count), x);
3996 + i, ext4_free_inodes_count(sb, gdp), x);
3997 bitmap_count += x;
3998 }
3999 brelse(bitmap_bh);
4000 - printk("ext4_count_free_inodes: stored = %u, computed = %lu, %lu\n",
4001 - le32_to_cpu(es->s_free_inodes_count), desc_count, bitmap_count);
4002 + printk(KERN_DEBUG "ext4_count_free_inodes: "
4003 + "stored = %u, computed = %lu, %lu\n",
4004 + le32_to_cpu(es->s_free_inodes_count), desc_count, bitmap_count);
4005 return desc_count;
4006 #else
4007 desc_count = 0;
4008 for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) {
4009 - gdp = ext4_get_group_desc (sb, i, NULL);
4010 + gdp = ext4_get_group_desc(sb, i, NULL);
4011 if (!gdp)
4012 continue;
4013 - desc_count += le16_to_cpu(gdp->bg_free_inodes_count);
4014 + desc_count += ext4_free_inodes_count(sb, gdp);
4015 cond_resched();
4016 }
4017 return desc_count;
4018 @@ -1008,16 +1067,16 @@ unsigned long ext4_count_free_inodes (st
4019 }
4020
4021 /* Called at mount-time, super-block is locked */
4022 -unsigned long ext4_count_dirs (struct super_block * sb)
4023 +unsigned long ext4_count_dirs(struct super_block * sb)
4024 {
4025 unsigned long count = 0;
4026 ext4_group_t i;
4027
4028 for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) {
4029 - struct ext4_group_desc *gdp = ext4_get_group_desc (sb, i, NULL);
4030 + struct ext4_group_desc *gdp = ext4_get_group_desc(sb, i, NULL);
4031 if (!gdp)
4032 continue;
4033 - count += le16_to_cpu(gdp->bg_used_dirs_count);
4034 + count += ext4_used_dirs_count(sb, gdp);
4035 }
4036 return count;
4037 }
4038 diff -rup b/fs/ext4//inode.c a/fs/ext4///inode.c
4039 --- b/fs/ext4/inode.c 2009-02-11 14:37:58.000000000 +0100
4040 +++ a/fs/ext4/inode.c 2009-02-11 01:08:42.000000000 +0100
4041 @@ -190,7 +190,7 @@ static int ext4_journal_test_restart(han
4042 /*
4043 * Called at the last iput() if i_nlink is zero.
4044 */
4045 -void ext4_delete_inode (struct inode * inode)
4046 +void ext4_delete_inode(struct inode *inode)
4047 {
4048 handle_t *handle;
4049 int err;
4050 @@ -330,11 +330,11 @@ static int ext4_block_to_path(struct ino
4051 int final = 0;
4052
4053 if (i_block < 0) {
4054 - ext4_warning (inode->i_sb, "ext4_block_to_path", "block < 0");
4055 + ext4_warning(inode->i_sb, "ext4_block_to_path", "block < 0");
4056 } else if (i_block < direct_blocks) {
4057 offsets[n++] = i_block;
4058 final = direct_blocks;
4059 - } else if ( (i_block -= direct_blocks) < indirect_blocks) {
4060 + } else if ((i_block -= direct_blocks) < indirect_blocks) {
4061 offsets[n++] = EXT4_IND_BLOCK;
4062 offsets[n++] = i_block;
4063 final = ptrs;
4064 @@ -400,14 +400,14 @@ static Indirect *ext4_get_branch(struct
4065
4066 *err = 0;
4067 /* i_data is not going away, no lock needed */
4068 - add_chain (chain, NULL, EXT4_I(inode)->i_data + *offsets);
4069 + add_chain(chain, NULL, EXT4_I(inode)->i_data + *offsets);
4070 if (!p->key)
4071 goto no_block;
4072 while (--depth) {
4073 bh = sb_bread(sb, le32_to_cpu(p->key));
4074 if (!bh)
4075 goto failure;
4076 - add_chain(++p, bh, (__le32*)bh->b_data + *++offsets);
4077 + add_chain(++p, bh, (__le32 *)bh->b_data + *++offsets);
4078 /* Reader: end */
4079 if (!p->key)
4080 goto no_block;
4081 @@ -443,7 +443,7 @@ no_block:
4082 static ext4_fsblk_t ext4_find_near(struct inode *inode, Indirect *ind)
4083 {
4084 struct ext4_inode_info *ei = EXT4_I(inode);
4085 - __le32 *start = ind->bh ? (__le32*) ind->bh->b_data : ei->i_data;
4086 + __le32 *start = ind->bh ? (__le32 *) ind->bh->b_data : ei->i_data;
4087 __le32 *p;
4088 ext4_fsblk_t bg_start;
4089 ext4_fsblk_t last_block;
4090 @@ -486,18 +486,9 @@ static ext4_fsblk_t ext4_find_near(struc
4091 static ext4_fsblk_t ext4_find_goal(struct inode *inode, ext4_lblk_t block,
4092 Indirect *partial)
4093 {
4094 - struct ext4_block_alloc_info *block_i;
4095 -
4096 - block_i = EXT4_I(inode)->i_block_alloc_info;
4097 -
4098 /*
4099 - * try the heuristic for sequential allocation,
4100 - * failing that at least try to get decent locality.
4101 + * XXX need to get goal block from mballoc's data structures
4102 */
4103 - if (block_i && (block == block_i->last_alloc_logical_block + 1)
4104 - && (block_i->last_alloc_physical_block != 0)) {
4105 - return block_i->last_alloc_physical_block + 1;
4106 - }
4107
4108 return ext4_find_near(inode, partial);
4109 }
4110 @@ -514,10 +505,10 @@ static ext4_fsblk_t ext4_find_goal(struc
4111 * return the total number of blocks to be allocate, including the
4112 * direct and indirect blocks.
4113 */
4114 -static int ext4_blks_to_allocate(Indirect *branch, int k, unsigned long blks,
4115 +static int ext4_blks_to_allocate(Indirect *branch, int k, unsigned int blks,
4116 int blocks_to_boundary)
4117 {
4118 - unsigned long count = 0;
4119 + unsigned int count = 0;
4120
4121 /*
4122 * Simple case, [t,d]Indirect block(s) has not allocated yet
4123 @@ -555,6 +546,7 @@ static int ext4_alloc_blocks(handle_t *h
4124 int indirect_blks, int blks,
4125 ext4_fsblk_t new_blocks[4], int *err)
4126 {
4127 + struct ext4_allocation_request ar;
4128 int target, i;
4129 unsigned long count = 0, blk_allocated = 0;
4130 int index = 0;
4131 @@ -603,10 +595,15 @@ static int ext4_alloc_blocks(handle_t *h
4132 if (!target)
4133 goto allocated;
4134 /* Now allocate data blocks */
4135 - count = target;
4136 - /* allocating blocks for data blocks */
4137 - current_block = ext4_new_blocks(handle, inode, iblock,
4138 - goal, &count, err);
4139 + memset(&ar, 0, sizeof(ar));
4140 + ar.inode = inode;
4141 + ar.goal = goal;
4142 + ar.len = target;
4143 + ar.logical = iblock;
4144 + ar.flags = EXT4_MB_HINT_DATA;
4145 +
4146 + current_block = ext4_mb_new_blocks(handle, &ar, err);
4147 +
4148 if (*err && (target == blks)) {
4149 /*
4150 * if the allocation failed and we didn't allocate
4151 @@ -622,7 +619,7 @@ static int ext4_alloc_blocks(handle_t *h
4152 */
4153 new_blocks[index] = current_block;
4154 }
4155 - blk_allocated += count;
4156 + blk_allocated += ar.len;
4157 }
4158 allocated:
4159 /* total number of blocks allocated for direct blocks */
4160 @@ -630,7 +627,7 @@ allocated:
4161 *err = 0;
4162 return ret;
4163 failed_out:
4164 - for (i = 0; i <index; i++)
4165 + for (i = 0; i < index; i++)
4166 ext4_free_blocks(handle, inode, new_blocks[i], 1, 0);
4167 return ret;
4168 }
4169 @@ -703,7 +700,7 @@ static int ext4_alloc_branch(handle_t *h
4170 branch[n].p = (__le32 *) bh->b_data + offsets[n];
4171 branch[n].key = cpu_to_le32(new_blocks[n]);
4172 *branch[n].p = branch[n].key;
4173 - if ( n == indirect_blks) {
4174 + if (n == indirect_blks) {
4175 current_block = new_blocks[n];
4176 /*
4177 * End of chain, update the last new metablock of
4178 @@ -730,7 +727,7 @@ failed:
4179 BUFFER_TRACE(branch[i].bh, "call jbd2_journal_forget");
4180 ext4_journal_forget(handle, branch[i].bh);
4181 }
4182 - for (i = 0; i <indirect_blks; i++)
4183 + for (i = 0; i < indirect_blks; i++)
4184 ext4_free_blocks(handle, inode, new_blocks[i], 1, 0);
4185
4186 ext4_free_blocks(handle, inode, new_blocks[i], num, 0);
4187 @@ -757,10 +754,8 @@ static int ext4_splice_branch(handle_t *
4188 {
4189 int i;
4190 int err = 0;
4191 - struct ext4_block_alloc_info *block_i;
4192 ext4_fsblk_t current_block;
4193
4194 - block_i = EXT4_I(inode)->i_block_alloc_info;
4195 /*
4196 * If we're splicing into a [td]indirect block (as opposed to the
4197 * inode) then we need to get write access to the [td]indirect block
4198 @@ -783,18 +778,7 @@ static int ext4_splice_branch(handle_t *
4199 if (num == 0 && blks > 1) {
4200 current_block = le32_to_cpu(where->key) + 1;
4201 for (i = 1; i < blks; i++)
4202 - *(where->p + i ) = cpu_to_le32(current_block++);
4203 - }
4204 -
4205 - /*
4206 - * update the most recently allocated logical & physical block
4207 - * in i_block_alloc_info, to assist find the proper goal block for next
4208 - * allocation
4209 - */
4210 - if (block_i) {
4211 - block_i->last_alloc_logical_block = block + blks - 1;
4212 - block_i->last_alloc_physical_block =
4213 - le32_to_cpu(where[num].key) + blks - 1;
4214 + *(where->p + i) = cpu_to_le32(current_block++);
4215 }
4216
4217 /* We are done with atomic stuff, now do the rest of housekeeping */
4218 @@ -861,10 +845,10 @@ err_out:
4219 * down_read(&EXT4_I(inode)->i_data_sem) if not allocating file system block
4220 * (ie, create is zero). Otherwise down_write(&EXT4_I(inode)->i_data_sem)
4221 */
4222 -int ext4_get_blocks_handle(handle_t *handle, struct inode *inode,
4223 - ext4_lblk_t iblock, unsigned long maxblocks,
4224 - struct buffer_head *bh_result,
4225 - int create, int extend_disksize)
4226 +static int ext4_get_blocks_handle(handle_t *handle, struct inode *inode,
4227 + ext4_lblk_t iblock, unsigned int maxblocks,
4228 + struct buffer_head *bh_result,
4229 + int create, int extend_disksize)
4230 {
4231 int err = -EIO;
4232 ext4_lblk_t offsets[4];
4233 @@ -914,12 +898,8 @@ int ext4_get_blocks_handle(handle_t *han
4234 goto cleanup;
4235
4236 /*
4237 - * Okay, we need to do block allocation. Lazily initialize the block
4238 - * allocation info here if necessary
4239 + * Okay, we need to do block allocation.
4240 */
4241 - if (S_ISREG(inode->i_mode) && (!ei->i_block_alloc_info))
4242 - ext4_init_block_alloc_info(inode);
4243 -
4244 goal = ext4_find_goal(inode, iblock, partial);
4245
4246 /* the number of blocks need to allocate for [d,t]indirect blocks */
4247 @@ -1030,19 +1010,20 @@ static void ext4_da_update_reserve_space
4248 BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks);
4249 mdb_free = EXT4_I(inode)->i_reserved_meta_blocks - mdb;
4250
4251 - /* Account for allocated meta_blocks */
4252 - mdb_free -= EXT4_I(inode)->i_allocated_meta_blocks;
4253 -
4254 - /* update fs free blocks counter for truncate case */
4255 - percpu_counter_add(&sbi->s_freeblocks_counter, mdb_free);
4256 + if (mdb_free) {
4257 + /* Account for allocated meta_blocks */
4258 + mdb_free -= EXT4_I(inode)->i_allocated_meta_blocks;
4259 +
4260 + /* update fs dirty blocks counter */
4261 + percpu_counter_sub(&sbi->s_dirtyblocks_counter, mdb_free);
4262 + EXT4_I(inode)->i_allocated_meta_blocks = 0;
4263 + EXT4_I(inode)->i_reserved_meta_blocks = mdb;
4264 + }
4265
4266 /* update per-inode reservations */
4267 BUG_ON(used > EXT4_I(inode)->i_reserved_data_blocks);
4268 EXT4_I(inode)->i_reserved_data_blocks -= used;
4269
4270 - BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks);
4271 - EXT4_I(inode)->i_reserved_meta_blocks = mdb;
4272 - EXT4_I(inode)->i_allocated_meta_blocks = 0;
4273 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
4274 }
4275
4276 @@ -1069,7 +1050,7 @@ static void ext4_da_update_reserve_space
4277 * It returns the error in case of allocation failure.
4278 */
4279 int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block,
4280 - unsigned long max_blocks, struct buffer_head *bh,
4281 + unsigned int max_blocks, struct buffer_head *bh,
4282 int create, int extend_disksize, int flag)
4283 {
4284 int retval;
4285 @@ -1241,7 +1222,7 @@ struct buffer_head *ext4_getblk(handle_t
4286 BUFFER_TRACE(bh, "call get_create_access");
4287 fatal = ext4_journal_get_create_access(handle, bh);
4288 if (!fatal && !buffer_uptodate(bh)) {
4289 - memset(bh->b_data,0,inode->i_sb->s_blocksize);
4290 + memset(bh->b_data, 0, inode->i_sb->s_blocksize);
4291 set_buffer_uptodate(bh);
4292 }
4293 unlock_buffer(bh);
4294 @@ -1266,7 +1247,7 @@ err:
4295 struct buffer_head *ext4_bread(handle_t *handle, struct inode *inode,
4296 ext4_lblk_t block, int create, int *err)
4297 {
4298 - struct buffer_head * bh;
4299 + struct buffer_head *bh;
4300
4301 bh = ext4_getblk(handle, inode, block, create, err);
4302 if (!bh)
4303 @@ -1282,13 +1263,13 @@ struct buffer_head *ext4_bread(handle_t
4304 return NULL;
4305 }
4306
4307 -static int walk_page_buffers( handle_t *handle,
4308 - struct buffer_head *head,
4309 - unsigned from,
4310 - unsigned to,
4311 - int *partial,
4312 - int (*fn)( handle_t *handle,
4313 - struct buffer_head *bh))
4314 +static int walk_page_buffers(handle_t *handle,
4315 + struct buffer_head *head,
4316 + unsigned from,
4317 + unsigned to,
4318 + int *partial,
4319 + int (*fn)(handle_t *handle,
4320 + struct buffer_head *bh))
4321 {
4322 struct buffer_head *bh;
4323 unsigned block_start, block_end;
4324 @@ -1296,9 +1277,9 @@ static int walk_page_buffers( handle_t *
4325 int err, ret = 0;
4326 struct buffer_head *next;
4327
4328 - for ( bh = head, block_start = 0;
4329 - ret == 0 && (bh != head || !block_start);
4330 - block_start = block_end, bh = next)
4331 + for (bh = head, block_start = 0;
4332 + ret == 0 && (bh != head || !block_start);
4333 + block_start = block_end, bh = next)
4334 {
4335 next = bh->b_this_page;
4336 block_end = block_start + blocksize;
4337 @@ -1351,23 +1332,23 @@ static int ext4_write_begin(struct file
4338 loff_t pos, unsigned len, unsigned flags,
4339 struct page **pagep, void **fsdata)
4340 {
4341 - struct inode *inode = mapping->host;
4342 + struct inode *inode = mapping->host;
4343 int ret, needed_blocks = ext4_writepage_trans_blocks(inode);
4344 handle_t *handle;
4345 int retries = 0;
4346 - struct page *page;
4347 + struct page *page;
4348 pgoff_t index;
4349 - unsigned from, to;
4350 + unsigned from, to;
4351
4352 index = pos >> PAGE_CACHE_SHIFT;
4353 - from = pos & (PAGE_CACHE_SIZE - 1);
4354 - to = from + len;
4355 + from = pos & (PAGE_CACHE_SIZE - 1);
4356 + to = from + len;
4357
4358 retry:
4359 - handle = ext4_journal_start(inode, needed_blocks);
4360 - if (IS_ERR(handle)) {
4361 - ret = PTR_ERR(handle);
4362 - goto out;
4363 + handle = ext4_journal_start(inode, needed_blocks);
4364 + if (IS_ERR(handle)) {
4365 + ret = PTR_ERR(handle);
4366 + goto out;
4367 }
4368
4369 page = grab_cache_page_write_begin(mapping, index, flags);
4370 @@ -1387,9 +1368,16 @@ retry:
4371 }
4372
4373 if (ret) {
4374 - unlock_page(page);
4375 + unlock_page(page);
4376 ext4_journal_stop(handle);
4377 - page_cache_release(page);
4378 + page_cache_release(page);
4379 + /*
4380 + * block_write_begin may have instantiated a few blocks
4381 + * outside i_size. Trim these off again. Don't need
4382 + * i_size_read because we hold i_mutex.
4383 + */
4384 + if (pos + len > inode->i_size)
4385 + vmtruncate(inode, inode->i_size);
4386 }
4387
4388 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
4389 @@ -1426,16 +1414,18 @@ static int ext4_ordered_write_end(struct
4390 ret = ext4_jbd2_file_inode(handle, inode);
4391
4392 if (ret == 0) {
4393 - /*
4394 - * generic_write_end() will run mark_inode_dirty() if i_size
4395 - * changes. So let's piggyback the i_disksize mark_inode_dirty
4396 - * into that.
4397 - */
4398 loff_t new_i_size;
4399
4400 new_i_size = pos + copied;
4401 - if (new_i_size > EXT4_I(inode)->i_disksize)
4402 - EXT4_I(inode)->i_disksize = new_i_size;
4403 + if (new_i_size > EXT4_I(inode)->i_disksize) {
4404 + ext4_update_i_disksize(inode, new_i_size);
4405 + /* We need to mark inode dirty even if
4406 + * new_i_size is less that inode->i_size
4407 + * bu greater than i_disksize.(hint delalloc)
4408 + */
4409 + ext4_mark_inode_dirty(handle, inode);
4410 + }
4411 +
4412 ret2 = generic_write_end(file, mapping, pos, len, copied,
4413 page, fsdata);
4414 copied = ret2;
4415 @@ -1460,8 +1450,14 @@ static int ext4_writeback_write_end(stru
4416 loff_t new_i_size;
4417
4418 new_i_size = pos + copied;
4419 - if (new_i_size > EXT4_I(inode)->i_disksize)
4420 - EXT4_I(inode)->i_disksize = new_i_size;
4421 + if (new_i_size > EXT4_I(inode)->i_disksize) {
4422 + ext4_update_i_disksize(inode, new_i_size);
4423 + /* We need to mark inode dirty even if
4424 + * new_i_size is less that inode->i_size
4425 + * bu greater than i_disksize.(hint delalloc)
4426 + */
4427 + ext4_mark_inode_dirty(handle, inode);
4428 + }
4429
4430 ret2 = generic_write_end(file, mapping, pos, len, copied,
4431 page, fsdata);
4432 @@ -1486,6 +1482,7 @@ static int ext4_journalled_write_end(str
4433 int ret = 0, ret2;
4434 int partial = 0;
4435 unsigned from, to;
4436 + loff_t new_i_size;
4437
4438 from = pos & (PAGE_CACHE_SIZE - 1);
4439 to = from + len;
4440 @@ -1500,11 +1497,12 @@ static int ext4_journalled_write_end(str
4441 to, &partial, write_end_fn);
4442 if (!partial)
4443 SetPageUptodate(page);
4444 - if (pos+copied > inode->i_size)
4445 + new_i_size = pos + copied;
4446 + if (new_i_size > inode->i_size)
4447 i_size_write(inode, pos+copied);
4448 EXT4_I(inode)->i_state |= EXT4_STATE_JDATA;
4449 - if (inode->i_size > EXT4_I(inode)->i_disksize) {
4450 - EXT4_I(inode)->i_disksize = inode->i_size;
4451 + if (new_i_size > EXT4_I(inode)->i_disksize) {
4452 + ext4_update_i_disksize(inode, new_i_size);
4453 ret2 = ext4_mark_inode_dirty(handle, inode);
4454 if (!ret)
4455 ret = ret2;
4456 @@ -1521,6 +1519,7 @@ static int ext4_journalled_write_end(str
4457
4458 static int ext4_da_reserve_space(struct inode *inode, int nrblocks)
4459 {
4460 + int retries = 0;
4461 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
4462 unsigned long md_needed, mdblocks, total = 0;
4463
4464 @@ -1529,6 +1528,7 @@ static int ext4_da_reserve_space(struct
4465 * in order to allocate nrblocks
4466 * worse case is one extent per block
4467 */
4468 +repeat:
4469 spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
4470 total = EXT4_I(inode)->i_reserved_data_blocks + nrblocks;
4471 mdblocks = ext4_calc_metadata_amount(inode, total);
4472 @@ -1537,13 +1537,14 @@ static int ext4_da_reserve_space(struct
4473 md_needed = mdblocks - EXT4_I(inode)->i_reserved_meta_blocks;
4474 total = md_needed + nrblocks;
4475
4476 - if (ext4_has_free_blocks(sbi, total) < total) {
4477 + if (ext4_claim_free_blocks(sbi, total)) {
4478 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
4479 + if (ext4_should_retry_alloc(inode->i_sb, &retries)) {
4480 + yield();
4481 + goto repeat;
4482 + }
4483 return -ENOSPC;
4484 }
4485 - /* reduce fs free blocks counter */
4486 - percpu_counter_sub(&sbi->s_freeblocks_counter, total);
4487 -
4488 EXT4_I(inode)->i_reserved_data_blocks += nrblocks;
4489 EXT4_I(inode)->i_reserved_meta_blocks = mdblocks;
4490
4491 @@ -1585,8 +1586,8 @@ static void ext4_da_release_space(struct
4492
4493 release = to_free + mdb_free;
4494
4495 - /* update fs free blocks counter for truncate case */
4496 - percpu_counter_add(&sbi->s_freeblocks_counter, release);
4497 + /* update fs dirty blocks counter for truncate case */
4498 + percpu_counter_sub(&sbi->s_dirtyblocks_counter, release);
4499
4500 /* update per-inode reservations */
4501 BUG_ON(to_free > EXT4_I(inode)->i_reserved_data_blocks);
4502 @@ -1629,7 +1630,8 @@ struct mpage_da_data {
4503 get_block_t *get_block;
4504 struct writeback_control *wbc;
4505 int io_done;
4506 - long pages_written;
4507 + int pages_written;
4508 + int retval;
4509 };
4510
4511 /*
4512 @@ -1648,18 +1650,25 @@ struct mpage_da_data {
4513 */
4514 static int mpage_da_submit_io(struct mpage_da_data *mpd)
4515 {
4516 - struct address_space *mapping = mpd->inode->i_mapping;
4517 - int ret = 0, err, nr_pages, i;
4518 - unsigned long index, end;
4519 + long pages_skipped;
4520 struct pagevec pvec;
4521 + unsigned long index, end;
4522 + int ret = 0, err, nr_pages, i;
4523 + struct inode *inode = mpd->inode;
4524 + struct address_space *mapping = inode->i_mapping;
4525
4526 BUG_ON(mpd->next_page <= mpd->first_page);
4527 - pagevec_init(&pvec, 0);
4528 + /*
4529 + * We need to start from the first_page to the next_page - 1
4530 + * to make sure we also write the mapped dirty buffer_heads.
4531 + * If we look at mpd->lbh.b_blocknr we would only be looking
4532 + * at the currently mapped buffer_heads.
4533 + */
4534 index = mpd->first_page;
4535 end = mpd->next_page - 1;
4536
4537 + pagevec_init(&pvec, 0);
4538 while (index <= end) {
4539 - /* XXX: optimize tail */
4540 nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE);
4541 if (nr_pages == 0)
4542 break;
4543 @@ -1671,8 +1680,16 @@ static int mpage_da_submit_io(struct mpa
4544 break;
4545 index++;
4546
4547 + BUG_ON(!PageLocked(page));
4548 + BUG_ON(PageWriteback(page));
4549 +
4550 + pages_skipped = mpd->wbc->pages_skipped;
4551 err = mapping->a_ops->writepage(page, mpd->wbc);
4552 - if (!err)
4553 + if (!err && (pages_skipped == mpd->wbc->pages_skipped))
4554 + /*
4555 + * have successfully written the page
4556 + * without skipping the same
4557 + */
4558 mpd->pages_written++;
4559 /*
4560 * In error case, we have to continue because
4561 @@ -1783,6 +1800,57 @@ static inline void __unmap_underlying_bl
4562 unmap_underlying_metadata(bdev, bh->b_blocknr + i);
4563 }
4564
4565 +static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd,
4566 + sector_t logical, long blk_cnt)
4567 +{
4568 + int nr_pages, i;
4569 + pgoff_t index, end;
4570 + struct pagevec pvec;
4571 + struct inode *inode = mpd->inode;
4572 + struct address_space *mapping = inode->i_mapping;
4573 +
4574 + index = logical >> (PAGE_CACHE_SHIFT - inode->i_blkbits);
4575 + end = (logical + blk_cnt - 1) >>
4576 + (PAGE_CACHE_SHIFT - inode->i_blkbits);
4577 + while (index <= end) {
4578 + nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE);
4579 + if (nr_pages == 0)
4580 + break;
4581 + for (i = 0; i < nr_pages; i++) {
4582 + struct page *page = pvec.pages[i];
4583 + index = page->index;
4584 + if (index > end)
4585 + break;
4586 + index++;
4587 +
4588 + BUG_ON(!PageLocked(page));
4589 + BUG_ON(PageWriteback(page));
4590 + block_invalidatepage(page, 0);
4591 + ClearPageUptodate(page);
4592 + unlock_page(page);
4593 + }
4594 + }
4595 + return;
4596 +}
4597 +
4598 +static void ext4_print_free_blocks(struct inode *inode)
4599 +{
4600 + struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
4601 + printk(KERN_EMERG "Total free blocks count %lld\n",
4602 + ext4_count_free_blocks(inode->i_sb));
4603 + printk(KERN_EMERG "Free/Dirty block details\n");
4604 + printk(KERN_EMERG "free_blocks=%lld\n",
4605 + (long long)percpu_counter_sum(&sbi->s_freeblocks_counter));
4606 + printk(KERN_EMERG "dirty_blocks=%lld\n",
4607 + (long long)percpu_counter_sum(&sbi->s_dirtyblocks_counter));
4608 + printk(KERN_EMERG "Block reservation details\n");
4609 + printk(KERN_EMERG "i_reserved_data_blocks=%u\n",
4610 + EXT4_I(inode)->i_reserved_data_blocks);
4611 + printk(KERN_EMERG "i_reserved_meta_blocks=%u\n",
4612 + EXT4_I(inode)->i_reserved_meta_blocks);
4613 + return;
4614 +}
4615 +
4616 /*
4617 * mpage_da_map_blocks - go through given space
4618 *
4619 @@ -1792,32 +1860,69 @@ static inline void __unmap_underlying_bl
4620 * The function skips space we know is already mapped to disk blocks.
4621 *
4622 */
4623 -static void mpage_da_map_blocks(struct mpage_da_data *mpd)
4624 +static int mpage_da_map_blocks(struct mpage_da_data *mpd)
4625 {
4626 int err = 0;
4627 - struct buffer_head *lbh = &mpd->lbh;
4628 - sector_t next = lbh->b_blocknr;
4629 struct buffer_head new;
4630 + struct buffer_head *lbh = &mpd->lbh;
4631 + sector_t next;
4632
4633 /*
4634 * We consider only non-mapped and non-allocated blocks
4635 */
4636 if (buffer_mapped(lbh) && !buffer_delay(lbh))
4637 - return;
4638 -
4639 + return 0;
4640 new.b_state = lbh->b_state;
4641 new.b_blocknr = 0;
4642 new.b_size = lbh->b_size;
4643 -
4644 + next = lbh->b_blocknr;
4645 /*
4646 * If we didn't accumulate anything
4647 * to write simply return
4648 */
4649 if (!new.b_size)
4650 - return;
4651 + return 0;
4652 err = mpd->get_block(mpd->inode, next, &new, 1);
4653 - if (err)
4654 - return;
4655 + if (err) {
4656 +
4657 + /* If get block returns with error
4658 + * we simply return. Later writepage
4659 + * will redirty the page and writepages
4660 + * will find the dirty page again
4661 + */
4662 + if (err == -EAGAIN)
4663 + return 0;
4664 +
4665 + if (err == -ENOSPC &&
4666 + ext4_count_free_blocks(mpd->inode->i_sb)) {
4667 + mpd->retval = err;
4668 + return 0;
4669 + }
4670 +
4671 + /*
4672 + * get block failure will cause us
4673 + * to loop in writepages. Because
4674 + * a_ops->writepage won't be able to
4675 + * make progress. The page will be redirtied
4676 + * by writepage and writepages will again
4677 + * try to write the same.
4678 + */
4679 + printk(KERN_EMERG "%s block allocation failed for inode %lu "
4680 + "at logical offset %llu with max blocks "
4681 + "%zd with error %d\n",
4682 + __func__, mpd->inode->i_ino,
4683 + (unsigned long long)next,
4684 + lbh->b_size >> mpd->inode->i_blkbits, err);
4685 + printk(KERN_EMERG "This should not happen.!! "
4686 + "Data will be lost\n");
4687 + if (err == -ENOSPC) {
4688 + ext4_print_free_blocks(mpd->inode);
4689 + }
4690 + /* invlaidate all the pages */
4691 + ext4_da_block_invalidatepages(mpd, next,
4692 + lbh->b_size >> mpd->inode->i_blkbits);
4693 + return err;
4694 + }
4695 BUG_ON(new.b_size == 0);
4696
4697 if (buffer_new(&new))
4698 @@ -1830,7 +1935,7 @@ static void mpage_da_map_blocks(struct m
4699 if (buffer_delay(lbh) || buffer_unwritten(lbh))
4700 mpage_put_bnr_to_bhs(mpd, next, &new);
4701
4702 - return;
4703 + return 0;
4704 }
4705
4706 #define BH_FLAGS ((1 << BH_Uptodate) | (1 << BH_Mapped) | \
4707 @@ -1899,8 +2004,8 @@ flush_it:
4708 * We couldn't merge the block to our extent, so we
4709 * need to flush current extent and start new one
4710 */
4711 - mpage_da_map_blocks(mpd);
4712 - mpage_da_submit_io(mpd);
4713 + if (mpage_da_map_blocks(mpd) == 0)
4714 + mpage_da_submit_io(mpd);
4715 mpd->io_done = 1;
4716 return;
4717 }
4718 @@ -1942,8 +2047,8 @@ static int __mpage_da_writepage(struct p
4719 * and start IO on them using writepage()
4720 */
4721 if (mpd->next_page != mpd->first_page) {
4722 - mpage_da_map_blocks(mpd);
4723 - mpage_da_submit_io(mpd);
4724 + if (mpage_da_map_blocks(mpd) == 0)
4725 + mpage_da_submit_io(mpd);
4726 /*
4727 * skip rest of the page in the page_vec
4728 */
4729 @@ -1991,11 +2096,29 @@ static int __mpage_da_writepage(struct p
4730 bh = head;
4731 do {
4732 BUG_ON(buffer_locked(bh));
4733 + /*
4734 + * We need to try to allocate
4735 + * unmapped blocks in the same page.
4736 + * Otherwise we won't make progress
4737 + * with the page in ext4_da_writepage
4738 + */
4739 if (buffer_dirty(bh) &&
4740 (!buffer_mapped(bh) || buffer_delay(bh))) {
4741 mpage_add_bh_to_extent(mpd, logical, bh);
4742 if (mpd->io_done)
4743 return MPAGE_DA_EXTENT_TAIL;
4744 + } else if (buffer_dirty(bh) && (buffer_mapped(bh))) {
4745 + /*
4746 + * mapped dirty buffer. We need to update
4747 + * the b_state because we look at
4748 + * b_state in mpage_da_map_blocks. We don't
4749 + * update b_size because if we find an
4750 + * unmapped buffer_head later we need to
4751 + * use the b_state flag of that buffer_head.
4752 + */
4753 + if (mpd->lbh.b_size == 0)
4754 + mpd->lbh.b_state =
4755 + bh->b_state & BH_FLAGS;
4756 }
4757 logical++;
4758 } while ((bh = bh->b_this_page) != head);
4759 @@ -2018,39 +2141,42 @@ static int __mpage_da_writepage(struct p
4760 */
4761 static int mpage_da_writepages(struct address_space *mapping,
4762 struct writeback_control *wbc,
4763 - get_block_t get_block)
4764 + struct mpage_da_data *mpd)
4765 {
4766 - struct mpage_da_data mpd;
4767 - long to_write;
4768 int ret;
4769 + long nr_to_write;
4770 + pgoff_t index;
4771
4772 - if (!get_block)
4773 - return generic_writepages(mapping, wbc);
4774 -
4775 - mpd.wbc = wbc;
4776 - mpd.inode = mapping->host;
4777 - mpd.lbh.b_size = 0;
4778 - mpd.lbh.b_state = 0;
4779 - mpd.lbh.b_blocknr = 0;
4780 - mpd.first_page = 0;
4781 - mpd.next_page = 0;
4782 - mpd.get_block = get_block;
4783 - mpd.io_done = 0;
4784 - mpd.pages_written = 0;
4785
4786 - to_write = wbc->nr_to_write;
4787
4788 - ret = write_cache_pages(mapping, wbc, __mpage_da_writepage, &mpd);
4789 + if (!mpd->get_block)
4790 + return generic_writepages(mapping, wbc);
4791
4792 + mpd->lbh.b_size = 0;
4793 + mpd->lbh.b_state = 0;
4794 + mpd->lbh.b_blocknr = 0;
4795 + mpd->first_page = 0;
4796 + mpd->next_page = 0;
4797 + mpd->io_done = 0;
4798 + mpd->pages_written = 0;
4799 + mpd->retval = 0;
4800 +
4801 + nr_to_write = wbc->nr_to_write;
4802 + index = mapping->writeback_index;
4803 + ret = write_cache_pages(mapping, wbc, __mpage_da_writepage, mpd);
4804 + wbc->nr_to_write = nr_to_write;
4805 + mapping->writeback_index = index;
4806 /*
4807 * Handle last extent of pages
4808 */
4809 - if (!mpd.io_done && mpd.next_page != mpd.first_page) {
4810 - mpage_da_map_blocks(&mpd);
4811 - mpage_da_submit_io(&mpd);
4812 - }
4813 + if (!mpd->io_done && mpd->next_page != mpd->first_page) {
4814 + if (mpage_da_map_blocks(mpd) == 0)
4815 + mpage_da_submit_io(mpd);
4816
4817 - wbc->nr_to_write = to_write - mpd.pages_written;
4818 + mpd->io_done = 1;
4819 + ret = MPAGE_DA_EXTENT_TAIL;
4820 + }
4821 + wbc->nr_to_write -= mpd->pages_written;
4822 return ret;
4823 }
4824
4825 @@ -2103,18 +2229,24 @@ static int ext4_da_get_block_write(struc
4826 handle_t *handle = NULL;
4827
4828 handle = ext4_journal_current_handle();
4829 - if (!handle) {
4830 - ret = ext4_get_blocks_wrap(handle, inode, iblock, max_blocks,
4831 - bh_result, 0, 0, 0);
4832 - BUG_ON(!ret);
4833 - } else {
4834 - ret = ext4_get_blocks_wrap(handle, inode, iblock, max_blocks,
4835 - bh_result, create, 0, EXT4_DELALLOC_RSVED);
4836 - }
4837 -
4838 + BUG_ON(!handle);
4839 + ret = ext4_get_blocks_wrap(handle, inode, iblock, max_blocks,
4840 + bh_result, create, 0, EXT4_DELALLOC_RSVED);
4841 if (ret > 0) {
4842 +
4843 bh_result->b_size = (ret << inode->i_blkbits);
4844
4845 + if (ext4_should_order_data(inode)) {
4846 + int retval;
4847 + retval = ext4_jbd2_file_inode(handle, inode);
4848 + if (retval)
4849 + /*
4850 + * Failed to add inode for ordered
4851 + * mode. Don't update file size
4852 + */
4853 + return retval;
4854 + }
4855 +
4856 /*
4857 * Update on-disk size along with block allocation
4858 * we don't use 'extend_disksize' as size may change
4859 @@ -2124,18 +2256,9 @@ static int ext4_da_get_block_write(struc
4860 if (disksize > i_size_read(inode))
4861 disksize = i_size_read(inode);
4862 if (disksize > EXT4_I(inode)->i_disksize) {
4863 - /*
4864 - * XXX: replace with spinlock if seen contended -bzzz
4865 - */
4866 - down_write(&EXT4_I(inode)->i_data_sem);
4867 - if (disksize > EXT4_I(inode)->i_disksize)
4868 - EXT4_I(inode)->i_disksize = disksize;
4869 - up_write(&EXT4_I(inode)->i_data_sem);
4870 -
4871 - if (EXT4_I(inode)->i_disksize == disksize) {
4872 - ret = ext4_mark_inode_dirty(handle, inode);
4873 - return ret;
4874 - }
4875 + ext4_update_i_disksize(inode, disksize);
4876 + ret = ext4_mark_inode_dirty(handle, inode);
4877 + return ret;
4878 }
4879 ret = 0;
4880 }
4881 @@ -2181,7 +2304,7 @@ static int ext4_da_writepage(struct page
4882 {
4883 int ret = 0;
4884 loff_t size;
4885 - unsigned long len;
4886 + unsigned int len;
4887 struct buffer_head *page_bufs;
4888 struct inode *inode = page->mapping->host;
4889
4890 @@ -2284,11 +2407,14 @@ static int ext4_da_writepages_trans_bloc
4891 static int ext4_da_writepages(struct address_space *mapping,
4892 struct writeback_control *wbc)
4893 {
4894 + pgoff_t index;
4895 + int range_whole = 0;
4896 handle_t *handle = NULL;
4897 - loff_t range_start = 0;
4898 + struct mpage_da_data mpd;
4899 struct inode *inode = mapping->host;
4900 + int pages_written = 0;
4901 + long pages_skipped;
4902 int needed_blocks, ret = 0, nr_to_writebump = 0;
4903 - long to_write, pages_skipped = 0;
4904 struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb);
4905
4906 /*
4907 @@ -2298,6 +2424,20 @@ static int ext4_da_writepages(struct add
4908 */
4909 if (!mapping->nrpages || !mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
4910 return 0;
4911 +
4912 + /*
4913 + * If the filesystem has aborted, it is read-only, so return
4914 + * right away instead of dumping stack traces later on that
4915 + * will obscure the real source of the problem. We test
4916 + * EXT4_MOUNT_ABORT instead of sb->s_flag's MS_RDONLY because
4917 + * the latter could be true if the filesystem is mounted
4918 + * read-only, and in that case, ext4_da_writepages should
4919 + * *never* be called, so if that ever happens, we would want
4920 + * the stack trace.
4921 + */
4922 + if (unlikely(sbi->s_mount_opt & EXT4_MOUNT_ABORT))
4923 + return -EROFS;
4924 +
4925 /*
4926 * Make sure nr_to_write is >= sbi->s_mb_stream_request
4927 * This make sure small files blocks are allocated in
4928 @@ -2308,20 +2448,24 @@ static int ext4_da_writepages(struct add
4929 nr_to_writebump = sbi->s_mb_stream_request - wbc->nr_to_write;
4930 wbc->nr_to_write = sbi->s_mb_stream_request;
4931 }
4932 + if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
4933 + range_whole = 1;
4934
4935 - if (!wbc->range_cyclic)
4936 - /*
4937 - * If range_cyclic is not set force range_cont
4938 - * and save the old writeback_index
4939 - */
4940 - wbc->range_cont = 1;
4941 + if (wbc->range_cyclic)
4942 + index = mapping->writeback_index;
4943 + else
4944 + index = wbc->range_start >> PAGE_CACHE_SHIFT;
4945
4946 - range_start = wbc->range_start;
4947 + mpd.wbc = wbc;
4948 + mpd.inode = mapping->host;
4949 +
4950 + /*
4951 + * we don't want write_cache_pages to update
4952 + * nr_to_write and writeback_index
4953 + */
4954 pages_skipped = wbc->pages_skipped;
4955
4956 -restart_loop:
4957 - to_write = wbc->nr_to_write;
4958 - while (!ret && to_write > 0) {
4959 + while (!ret && wbc->nr_to_write > 0) {
4960
4961 /*
4962 * we insert one extent at a time. So we need
4963 @@ -2336,63 +2480,87 @@ restart_loop:
4964 handle = ext4_journal_start(inode, needed_blocks);
4965 if (IS_ERR(handle)) {
4966 ret = PTR_ERR(handle);
4967 - printk(KERN_EMERG "%s: jbd2_start: "
4968 + printk(KERN_CRIT "%s: jbd2_start: "
4969 "%ld pages, ino %lu; err %d\n", __func__,
4970 wbc->nr_to_write, inode->i_ino, ret);
4971 dump_stack();
4972 goto out_writepages;
4973 }
4974 - if (ext4_should_order_data(inode)) {
4975 - /*
4976 - * With ordered mode we need to add
4977 - * the inode to the journal handl
4978 - * when we do block allocation.
4979 - */
4980 - ret = ext4_jbd2_file_inode(handle, inode);
4981 - if (ret) {
4982 - ext4_journal_stop(handle);
4983 - goto out_writepages;
4984 - }
4985 - }
4986 + mpd.get_block = ext4_da_get_block_write;
4987 + ret = mpage_da_writepages(mapping, wbc, &mpd);
4988
4989 - to_write -= wbc->nr_to_write;
4990 - ret = mpage_da_writepages(mapping, wbc,
4991 - ext4_da_get_block_write);
4992 ext4_journal_stop(handle);
4993 - if (ret == MPAGE_DA_EXTENT_TAIL) {
4994 +
4995 + if (mpd.retval == -ENOSPC) {
4996 + /* commit the transaction which would
4997 + * free blocks released in the transaction
4998 + * and try again
4999 + */
5000 + jbd2_journal_force_commit_nested(sbi->s_journal);
5001 + wbc->pages_skipped = pages_skipped;
5002 + ret = 0;
5003 + } else if (ret == MPAGE_DA_EXTENT_TAIL) {
5004 /*
5005 * got one extent now try with
5006 * rest of the pages
5007 */
5008 - to_write += wbc->nr_to_write;
5009 + pages_written += mpd.pages_written;
5010 + wbc->pages_skipped = pages_skipped;
5011 ret = 0;
5012 - } else if (wbc->nr_to_write) {
5013 + } else if (wbc->nr_to_write)
5014 /*
5015 * There is no more writeout needed
5016 * or we requested for a noblocking writeout
5017 * and we found the device congested
5018 */
5019 - to_write += wbc->nr_to_write;
5020 break;
5021 - }
5022 - wbc->nr_to_write = to_write;
5023 - }
5024 -
5025 - if (wbc->range_cont && (pages_skipped != wbc->pages_skipped)) {
5026 - /* We skipped pages in this loop */
5027 - wbc->range_start = range_start;
5028 - wbc->nr_to_write = to_write +
5029 - wbc->pages_skipped - pages_skipped;
5030 - wbc->pages_skipped = pages_skipped;
5031 - goto restart_loop;
5032 }
5033 + if (pages_skipped != wbc->pages_skipped)
5034 + printk(KERN_EMERG "This should not happen leaving %s "
5035 + "with nr_to_write = %ld ret = %d\n",
5036 + __func__, wbc->nr_to_write, ret);
5037 +
5038 + /* Update index */
5039 + index += pages_written;
5040 + if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
5041 + /*
5042 + * set the writeback_index so that range_cyclic
5043 + * mode will write it back later
5044 + */
5045 + mapping->writeback_index = index;
5046
5047 out_writepages:
5048 - wbc->nr_to_write = to_write - nr_to_writebump;
5049 - wbc->range_start = range_start;
5050 + wbc->nr_to_write -= nr_to_writebump;
5051 return ret;
5052 }
5053
5054 +#define FALL_BACK_TO_NONDELALLOC 1
5055 +static int ext4_nonda_switch(struct super_block *sb)
5056 +{
5057 + s64 free_blocks, dirty_blocks;
5058 + struct ext4_sb_info *sbi = EXT4_SB(sb);
5059 +
5060 + /*
5061 + * switch to non delalloc mode if we are running low
5062 + * on free block. The free block accounting via percpu
5063 + * counters can get slightly wrong with FBC_BATCH getting
5064 + * accumulated on each CPU without updating global counters
5065 + * Delalloc need an accurate free block accounting. So switch
5066 + * to non delalloc when we are near to error range.
5067 + */
5068 + free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter);
5069 + dirty_blocks = percpu_counter_read_positive(&sbi->s_dirtyblocks_counter);
5070 + if (2 * free_blocks < 3 * dirty_blocks ||
5071 + free_blocks < (dirty_blocks + EXT4_FREEBLOCKS_WATERMARK)) {
5072 + /*
5073 + * free block count is less that 150% of dirty blocks
5074 + * or free blocks is less that watermark
5075 + */
5076 + return 1;
5077 + }
5078 + return 0;
5079 +}
5080 +
5081 static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
5082 loff_t pos, unsigned len, unsigned flags,
5083 struct page **pagep, void **fsdata)
5084 @@ -2408,6 +2576,12 @@ static int ext4_da_write_begin(struct fi
5085 from = pos & (PAGE_CACHE_SIZE - 1);
5086 to = from + len;
5087
5088 + if (ext4_nonda_switch(inode->i_sb)) {
5089 + *fsdata = (void *)FALL_BACK_TO_NONDELALLOC;
5090 + return ext4_write_begin(file, mapping, pos,
5091 + len, flags, pagep, fsdata);
5092 + }
5093 + *fsdata = (void *)0;
5094 retry:
5095 /*
5096 * With delayed allocation, we don't log the i_disksize update
5097 @@ -2435,6 +2609,13 @@ retry:
5098 unlock_page(page);
5099 ext4_journal_stop(handle);
5100 page_cache_release(page);
5101 + /*
5102 + * block_write_begin may have instantiated a few blocks
5103 + * outside i_size. Trim these off again. Don't need
5104 + * i_size_read because we hold i_mutex.
5105 + */
5106 + if (pos + len > inode->i_size)
5107 + vmtruncate(inode, inode->i_size);
5108 }
5109
5110 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
5111 @@ -2458,7 +2639,7 @@ static int ext4_da_should_update_i_disks
5112 bh = page_buffers(page);
5113 idx = offset >> inode->i_blkbits;
5114
5115 - for (i=0; i < idx; i++)
5116 + for (i = 0; i < idx; i++)
5117 bh = bh->b_this_page;
5118
5119 if (!buffer_mapped(bh) || (buffer_delay(bh)))
5120 @@ -2476,9 +2657,22 @@ static int ext4_da_write_end(struct file
5121 handle_t *handle = ext4_journal_current_handle();
5122 loff_t new_i_size;
5123 unsigned long start, end;
5124 + int write_mode = (int)(unsigned long)fsdata;
5125 +
5126 + if (write_mode == FALL_BACK_TO_NONDELALLOC) {
5127 + if (ext4_should_order_data(inode)) {
5128 + return ext4_ordered_write_end(file, mapping, pos,
5129 + len, copied, page, fsdata);
5130 + } else if (ext4_should_writeback_data(inode)) {
5131 + return ext4_writeback_write_end(file, mapping, pos,
5132 + len, copied, page, fsdata);
5133 + } else {
5134 + BUG();
5135 + }
5136 + }
5137
5138 start = pos & (PAGE_CACHE_SIZE - 1);
5139 - end = start + copied -1;
5140 + end = start + copied - 1;
5141
5142 /*
5143 * generic_write_end() will run mark_inode_dirty() if i_size
5144 @@ -2502,6 +2696,11 @@ static int ext4_da_write_end(struct file
5145 EXT4_I(inode)->i_disksize = new_i_size;
5146 }
5147 up_write(&EXT4_I(inode)->i_data_sem);
5148 + /* We need to mark inode dirty even if
5149 + * new_i_size is less that inode->i_size
5150 + * bu greater than i_disksize.(hint delalloc)
5151 + */
5152 + ext4_mark_inode_dirty(handle, inode);
5153 }
5154 }
5155 ret2 = generic_write_end(file, mapping, pos, len, copied,
5156 @@ -2593,7 +2792,7 @@ static sector_t ext4_bmap(struct address
5157 return 0;
5158 }
5159
5160 - return generic_block_bmap(mapping,block,ext4_get_block);
5161 + return generic_block_bmap(mapping, block, ext4_get_block);
5162 }
5163
5164 static int bget_one(handle_t *handle, struct buffer_head *bh)
5165 @@ -3199,7 +3398,7 @@ static Indirect *ext4_find_shared(struct
5166 if (!partial->key && *partial->p)
5167 /* Writer: end */
5168 goto no_top;
5169 - for (p=partial; p>chain && all_zeroes((__le32*)p->bh->b_data,p->p); p--)
5170 + for (p = partial; (p > chain) && all_zeroes((__le32 *) p->bh->b_data, p->p); p--)
5171 ;
5172 /*
5173 * OK, we've found the last block that must survive. The rest of our
5174 @@ -3218,7 +3417,7 @@ static Indirect *ext4_find_shared(struct
5175 }
5176 /* Writer: end */
5177
5178 - while(partial > p) {
5179 + while (partial > p) {
5180 brelse(partial->bh);
5181 partial--;
5182 }
5183 @@ -3410,9 +3609,9 @@ static void ext4_free_branches(handle_t
5184 /* This zaps the entire block. Bottom up. */
5185 BUFFER_TRACE(bh, "free child branches");
5186 ext4_free_branches(handle, inode, bh,
5187 - (__le32*)bh->b_data,
5188 - (__le32*)bh->b_data + addr_per_block,
5189 - depth);
5190 + (__le32 *) bh->b_data,
5191 + (__le32 *) bh->b_data + addr_per_block,
5192 + depth);
5193
5194 /*
5195 * We've probably journalled the indirect block several
5196 @@ -3580,7 +3779,7 @@ void ext4_truncate(struct inode *inode)
5197 */
5198 down_write(&ei->i_data_sem);
5199
5200 - ext4_discard_reservation(inode);
5201 + ext4_discard_preallocations(inode);
5202
5203 /*
5204 * The orphan list entry will now protect us from any crash which
5205 @@ -3675,41 +3874,6 @@ out_stop:
5206 ext4_journal_stop(handle);
5207 }
5208
5209 -static ext4_fsblk_t ext4_get_inode_block(struct super_block *sb,
5210 - unsigned long ino, struct ext4_iloc *iloc)
5211 -{
5212 - ext4_group_t block_group;
5213 - unsigned long offset;
5214 - ext4_fsblk_t block;
5215 - struct ext4_group_desc *gdp;
5216 -
5217 - if (!ext4_valid_inum(sb, ino)) {
5218 - /*
5219 - * This error is already checked for in namei.c unless we are
5220 - * looking at an NFS filehandle, in which case no error
5221 - * report is needed
5222 - */
5223 - return 0;
5224 - }
5225 -
5226 - block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb);
5227 - gdp = ext4_get_group_desc(sb, block_group, NULL);
5228 - if (!gdp)
5229 - return 0;
5230 -
5231 - /*
5232 - * Figure out the offset within the block group inode table
5233 - */
5234 - offset = ((ino - 1) % EXT4_INODES_PER_GROUP(sb)) *
5235 - EXT4_INODE_SIZE(sb);
5236 - block = ext4_inode_table(sb, gdp) +
5237 - (offset >> EXT4_BLOCK_SIZE_BITS(sb));
5238 -
5239 - iloc->block_group = block_group;
5240 - iloc->offset = offset & (EXT4_BLOCK_SIZE(sb) - 1);
5241 - return block;
5242 -}
5243 -
5244 /*
5245 * ext4_get_inode_loc returns with an extra refcount against the inode's
5246 * underlying buffer_head on success. If 'in_mem' is true, we have all
5247 @@ -3719,19 +3883,35 @@ static ext4_fsblk_t ext4_get_inode_block
5248 static int __ext4_get_inode_loc(struct inode *inode,
5249 struct ext4_iloc *iloc, int in_mem)
5250 {
5251 - ext4_fsblk_t block;
5252 - struct buffer_head *bh;
5253 + struct ext4_group_desc *gdp;
5254 + struct buffer_head *bh;
5255 + struct super_block *sb = inode->i_sb;
5256 + ext4_fsblk_t block;
5257 + int inodes_per_block, inode_offset;
5258 +
5259 + iloc->bh = NULL;
5260 + if (!ext4_valid_inum(sb, inode->i_ino))
5261 + return -EIO;
5262
5263 - block = ext4_get_inode_block(inode->i_sb, inode->i_ino, iloc);
5264 - if (!block)
5265 + iloc->block_group = (inode->i_ino - 1) / EXT4_INODES_PER_GROUP(sb);
5266 + gdp = ext4_get_group_desc(sb, iloc->block_group, NULL);
5267 + if (!gdp)
5268 return -EIO;
5269
5270 - bh = sb_getblk(inode->i_sb, block);
5271 + /*
5272 + * Figure out the offset within the block group inode table
5273 + */
5274 + inodes_per_block = (EXT4_BLOCK_SIZE(sb) / EXT4_INODE_SIZE(sb));
5275 + inode_offset = ((inode->i_ino - 1) %
5276 + EXT4_INODES_PER_GROUP(sb));
5277 + block = ext4_inode_table(sb, gdp) + (inode_offset / inodes_per_block);
5278 + iloc->offset = (inode_offset % inodes_per_block) * EXT4_INODE_SIZE(sb);
5279 +
5280 + bh = sb_getblk(sb, block);
5281 if (!bh) {
5282 - ext4_error (inode->i_sb, "ext4_get_inode_loc",
5283 - "unable to read inode block - "
5284 - "inode=%lu, block=%llu",
5285 - inode->i_ino, block);
5286 + ext4_error(sb, "ext4_get_inode_loc", "unable to read "
5287 + "inode block - inode=%lu, block=%llu",
5288 + inode->i_ino, block);
5289 return -EIO;
5290 }
5291 if (!buffer_uptodate(bh)) {
5292 @@ -3759,28 +3939,12 @@ static int __ext4_get_inode_loc(struct i
5293 */
5294 if (in_mem) {
5295 struct buffer_head *bitmap_bh;
5296 - struct ext4_group_desc *desc;
5297 - int inodes_per_buffer;
5298 - int inode_offset, i;
5299 - ext4_group_t block_group;
5300 - int start;
5301 -
5302 - block_group = (inode->i_ino - 1) /
5303 - EXT4_INODES_PER_GROUP(inode->i_sb);
5304 - inodes_per_buffer = bh->b_size /
5305 - EXT4_INODE_SIZE(inode->i_sb);
5306 - inode_offset = ((inode->i_ino - 1) %
5307 - EXT4_INODES_PER_GROUP(inode->i_sb));
5308 - start = inode_offset & ~(inodes_per_buffer - 1);
5309 + int i, start;
5310
5311 - /* Is the inode bitmap in cache? */
5312 - desc = ext4_get_group_desc(inode->i_sb,
5313 - block_group, NULL);
5314 - if (!desc)
5315 - goto make_io;
5316 + start = inode_offset & ~(inodes_per_block - 1);
5317
5318 - bitmap_bh = sb_getblk(inode->i_sb,
5319 - ext4_inode_bitmap(inode->i_sb, desc));
5320 + /* Is the inode bitmap in cache? */
5321 + bitmap_bh = sb_getblk(sb, ext4_inode_bitmap(sb, gdp));
5322 if (!bitmap_bh)
5323 goto make_io;
5324
5325 @@ -3793,14 +3957,14 @@ static int __ext4_get_inode_loc(struct i
5326 brelse(bitmap_bh);
5327 goto make_io;
5328 }
5329 - for (i = start; i < start + inodes_per_buffer; i++) {
5330 + for (i = start; i < start + inodes_per_block; i++) {
5331 if (i == inode_offset)
5332 continue;
5333 if (ext4_test_bit(i, bitmap_bh->b_data))
5334 break;
5335 }
5336 brelse(bitmap_bh);
5337 - if (i == start + inodes_per_buffer) {
5338 + if (i == start + inodes_per_block) {
5339 /* all other inodes are free, so skip I/O */
5340 memset(bh->b_data, 0, bh->b_size);
5341 set_buffer_uptodate(bh);
5342 @@ -3811,6 +3975,36 @@ static int __ext4_get_inode_loc(struct i
5343
5344 make_io:
5345 /*
5346 + * If we need to do any I/O, try to pre-readahead extra
5347 + * blocks from the inode table.
5348 + */
5349 + if (EXT4_SB(sb)->s_inode_readahead_blks) {
5350 + ext4_fsblk_t b, end, table;
5351 + unsigned num;
5352 +
5353 + table = ext4_inode_table(sb, gdp);
5354 + /* Make sure s_inode_readahead_blks is a power of 2 */
5355 + while (EXT4_SB(sb)->s_inode_readahead_blks &
5356 + (EXT4_SB(sb)->s_inode_readahead_blks-1))
5357 + EXT4_SB(sb)->s_inode_readahead_blks =
5358 + (EXT4_SB(sb)->s_inode_readahead_blks &
5359 + (EXT4_SB(sb)->s_inode_readahead_blks-1));
5360 + b = block & ~(EXT4_SB(sb)->s_inode_readahead_blks-1);
5361 + if (table > b)
5362 + b = table;
5363 + end = b + EXT4_SB(sb)->s_inode_readahead_blks;
5364 + num = EXT4_INODES_PER_GROUP(sb);
5365 + if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
5366 + EXT4_FEATURE_RO_COMPAT_GDT_CSUM))
5367 + num -= ext4_itable_unused_count(sb, gdp);
5368 + table += num / inodes_per_block;
5369 + if (end > table)
5370 + end = table;
5371 + while (b <= end)
5372 + sb_breadahead(sb, b++);
5373 + }
5374 +
5375 + /*
5376 * There are other valid inodes in the buffer, this inode
5377 * has in-inode xattrs, or we don't have this inode in memory.
5378 * Read the block from disk.
5379 @@ -3820,10 +4014,9 @@ make_io:
5380 submit_bh(READ_META, bh);
5381 wait_on_buffer(bh);
5382 if (!buffer_uptodate(bh)) {
5383 - ext4_error(inode->i_sb, "ext4_get_inode_loc",
5384 - "unable to read inode block - "
5385 - "inode=%lu, block=%llu",
5386 - inode->i_ino, block);
5387 + ext4_error(sb, __func__,
5388 + "unable to read inode block - inode=%lu, "
5389 + "block=%llu", inode->i_ino, block);
5390 brelse(bh);
5391 return -EIO;
5392 }
5393 @@ -3915,11 +4108,10 @@ struct inode *ext4_iget(struct super_blo
5394 return inode;
5395
5396 ei = EXT4_I(inode);
5397 -#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL
5398 +#if defined(CONFIG_EXT4_FS_POSIX_ACL) || defined(CONFIG_EXT4DEV_FS_POSIX_ACL)
5399 ei->i_acl = EXT4_ACL_NOT_CACHED;
5400 ei->i_default_acl = EXT4_ACL_NOT_CACHED;
5401 #endif
5402 - ei->i_block_alloc_info = NULL;
5403
5404 ret = __ext4_get_inode_loc(inode, &iloc, 0);
5405 if (ret < 0)
5406 @@ -3929,7 +4121,7 @@ struct inode *ext4_iget(struct super_blo
5407 inode->i_mode = le16_to_cpu(raw_inode->i_mode);
5408 inode->i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low);
5409 inode->i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low);
5410 - if(!(test_opt (inode->i_sb, NO_UID32))) {
5411 + if (!(test_opt(inode->i_sb, NO_UID32))) {
5412 inode->i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16;
5413 inode->i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16;
5414 }
5415 @@ -3947,7 +4139,7 @@ struct inode *ext4_iget(struct super_blo
5416 if (inode->i_mode == 0 ||
5417 !(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_ORPHAN_FS)) {
5418 /* this inode is deleted */
5419 - brelse (bh);
5420 + brelse(bh);
5421 ret = -ESTALE;
5422 goto bad_inode;
5423 }
5424 @@ -3980,7 +4172,7 @@ struct inode *ext4_iget(struct super_blo
5425 ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize);
5426 if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize >
5427 EXT4_INODE_SIZE(inode->i_sb)) {
5428 - brelse (bh);
5429 + brelse(bh);
5430 ret = -EIO;
5431 goto bad_inode;
5432 }
5433 @@ -4033,7 +4225,7 @@ struct inode *ext4_iget(struct super_blo
5434 init_special_inode(inode, inode->i_mode,
5435 new_decode_dev(le32_to_cpu(raw_inode->i_block[1])));
5436 }
5437 - brelse (iloc.bh);
5438 + brelse(iloc.bh);
5439 ext4_set_inode_flags(inode);
5440 unlock_new_inode(inode);
5441 return inode;
5442 @@ -4050,7 +4242,6 @@ static int ext4_inode_blocks_set(handle_
5443 struct inode *inode = &(ei->vfs_inode);
5444 u64 i_blocks = inode->i_blocks;
5445 struct super_block *sb = inode->i_sb;
5446 - int err = 0;
5447
5448 if (i_blocks <= ~0U) {
5449 /*
5450 @@ -4060,36 +4251,27 @@ static int ext4_inode_blocks_set(handle_
5451 raw_inode->i_blocks_lo = cpu_to_le32(i_blocks);
5452 raw_inode->i_blocks_high = 0;
5453 ei->i_flags &= ~EXT4_HUGE_FILE_FL;
5454 - } else if (i_blocks <= 0xffffffffffffULL) {
5455 + return 0;
5456 + }
5457 + if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_HUGE_FILE))
5458 + return -EFBIG;
5459 +
5460 + if (i_blocks <= 0xffffffffffffULL) {
5461 /*
5462 * i_blocks can be represented in a 48 bit variable
5463 * as multiple of 512 bytes
5464 */
5465 - err = ext4_update_rocompat_feature(handle, sb,
5466 - EXT4_FEATURE_RO_COMPAT_HUGE_FILE);
5467 - if (err)
5468 - goto err_out;
5469 - /* i_block is stored in the split 48 bit fields */
5470 raw_inode->i_blocks_lo = cpu_to_le32(i_blocks);
5471 raw_inode->i_blocks_high = cpu_to_le16(i_blocks >> 32);
5472 ei->i_flags &= ~EXT4_HUGE_FILE_FL;
5473 } else {
5474 - /*
5475 - * i_blocks should be represented in a 48 bit variable
5476 - * as multiple of file system block size
5477 - */
5478 - err = ext4_update_rocompat_feature(handle, sb,
5479 - EXT4_FEATURE_RO_COMPAT_HUGE_FILE);
5480 - if (err)
5481 - goto err_out;
5482 ei->i_flags |= EXT4_HUGE_FILE_FL;
5483 /* i_block is stored in file system block size */
5484 i_blocks = i_blocks >> (inode->i_blkbits - 9);
5485 raw_inode->i_blocks_lo = cpu_to_le32(i_blocks);
5486 raw_inode->i_blocks_high = cpu_to_le16(i_blocks >> 32);
5487 }
5488 -err_out:
5489 - return err;
5490 + return 0;
5491 }
5492
5493 /*
5494 @@ -4115,14 +4297,14 @@ static int ext4_do_update_inode(handle_t
5495
5496 ext4_get_inode_flags(ei);
5497 raw_inode->i_mode = cpu_to_le16(inode->i_mode);
5498 - if(!(test_opt(inode->i_sb, NO_UID32))) {
5499 + if (!(test_opt(inode->i_sb, NO_UID32))) {
5500 raw_inode->i_uid_low = cpu_to_le16(low_16_bits(inode->i_uid));
5501 raw_inode->i_gid_low = cpu_to_le16(low_16_bits(inode->i_gid));
5502 /*
5503 * Fix up interoperability with old kernels. Otherwise, old inodes get
5504 * re-used with the upper 16 bits of the uid/gid intact
5505 */
5506 - if(!ei->i_dtime) {
5507 + if (!ei->i_dtime) {
5508 raw_inode->i_uid_high =
5509 cpu_to_le16(high_16_bits(inode->i_uid));
5510 raw_inode->i_gid_high =
5511 @@ -4210,7 +4392,7 @@ static int ext4_do_update_inode(handle_t
5512 ei->i_state &= ~EXT4_STATE_NEW;
5513
5514 out_brelse:
5515 - brelse (bh);
5516 + brelse(bh);
5517 ext4_std_error(inode->i_sb, err);
5518 return err;
5519 }
5520 @@ -4814,6 +4996,7 @@ int ext4_page_mkwrite(struct vm_area_str
5521 loff_t size;
5522 unsigned long len;
5523 int ret = -EINVAL;
5524 + void *fsdata;
5525 struct file *file = vma->vm_file;
5526 struct inode *inode = file->f_path.dentry->d_inode;
5527 struct address_space *mapping = inode->i_mapping;
5528 @@ -4852,11 +5035,11 @@ int ext4_page_mkwrite(struct vm_area_str
5529 * on the same page though
5530 */
5531 ret = mapping->a_ops->write_begin(file, mapping, page_offset(page),
5532 - len, AOP_FLAG_UNINTERRUPTIBLE, &page, NULL);
5533 + len, AOP_FLAG_UNINTERRUPTIBLE, &page, &fsdata);
5534 if (ret < 0)
5535 goto out_unlock;
5536 ret = mapping->a_ops->write_end(file, mapping, page_offset(page),
5537 - len, len, page, NULL);
5538 + len, len, page, fsdata);
5539 if (ret < 0)
5540 goto out_unlock;
5541 ret = 0;
5542 diff -rup b/fs/ext4//ioctl.c a/fs/ext4///ioctl.c
5543 --- b/fs/ext4/ioctl.c 2009-02-11 14:37:58.000000000 +0100
5544 +++ a/fs/ext4/ioctl.c 2009-02-10 21:40:11.000000000 +0100
5545 @@ -23,9 +23,8 @@ long ext4_ioctl(struct file *filp, unsig
5546 struct inode *inode = filp->f_dentry->d_inode;
5547 struct ext4_inode_info *ei = EXT4_I(inode);
5548 unsigned int flags;
5549 - unsigned short rsv_window_size;
5550
5551 - ext4_debug ("cmd = %u, arg = %lu\n", cmd, arg);
5552 + ext4_debug("cmd = %u, arg = %lu\n", cmd, arg);
5553
5554 switch (cmd) {
5555 case EXT4_IOC_GETFLAGS:
5556 @@ -34,7 +33,7 @@ long ext4_ioctl(struct file *filp, unsig
5557 return put_user(flags, (int __user *) arg);
5558 case EXT4_IOC_SETFLAGS: {
5559 handle_t *handle = NULL;
5560 - int err;
5561 + int err, migrate = 0;
5562 struct ext4_iloc iloc;
5563 unsigned int oldflags;
5564 unsigned int jflag;
5565 @@ -82,6 +81,17 @@ long ext4_ioctl(struct file *filp, unsig
5566 if (!capable(CAP_SYS_RESOURCE))
5567 goto flags_out;
5568 }
5569 + if (oldflags & EXT4_EXTENTS_FL) {
5570 + /* We don't support clearning extent flags */
5571 + if (!(flags & EXT4_EXTENTS_FL)) {
5572 + err = -EOPNOTSUPP;
5573 + goto flags_out;
5574 + }
5575 + } else if (flags & EXT4_EXTENTS_FL) {
5576 + /* migrate the file */
5577 + migrate = 1;
5578 + flags &= ~EXT4_EXTENTS_FL;
5579 + }
5580
5581 handle = ext4_journal_start(inode, 1);
5582 if (IS_ERR(handle)) {
5583 @@ -109,6 +119,10 @@ flags_err:
5584
5585 if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL))
5586 err = ext4_change_inode_journal_flag(inode, jflag);
5587 + if (err)
5588 + goto flags_out;
5589 + if (migrate)
5590 + err = ext4_ext_migrate(inode);
5591 flags_out:
5592 mutex_unlock(&inode->i_mutex);
5593 mnt_drop_write(filp->f_path.mnt);
5594 @@ -175,53 +189,10 @@ setversion_out:
5595 return ret;
5596 }
5597 #endif
5598 - case EXT4_IOC_GETRSVSZ:
5599 - if (test_opt(inode->i_sb, RESERVATION)
5600 - && S_ISREG(inode->i_mode)
5601 - && ei->i_block_alloc_info) {
5602 - rsv_window_size = ei->i_block_alloc_info->rsv_window_node.rsv_goal_size;
5603 - return put_user(rsv_window_size, (int __user *)arg);
5604 - }
5605 - return -ENOTTY;
5606 - case EXT4_IOC_SETRSVSZ: {
5607 - int err;
5608 -
5609 - if (!test_opt(inode->i_sb, RESERVATION) ||!S_ISREG(inode->i_mode))
5610 - return -ENOTTY;
5611 -
5612 - if (!is_owner_or_cap(inode))
5613 - return -EACCES;
5614 -
5615 - if (get_user(rsv_window_size, (int __user *)arg))
5616 - return -EFAULT;
5617 -
5618 - err = mnt_want_write(filp->f_path.mnt);
5619 - if (err)
5620 - return err;
5621 -
5622 - if (rsv_window_size > EXT4_MAX_RESERVE_BLOCKS)
5623 - rsv_window_size = EXT4_MAX_RESERVE_BLOCKS;
5624 -
5625 - /*
5626 - * need to allocate reservation structure for this inode
5627 - * before set the window size
5628 - */
5629 - down_write(&ei->i_data_sem);
5630 - if (!ei->i_block_alloc_info)
5631 - ext4_init_block_alloc_info(inode);
5632 -
5633 - if (ei->i_block_alloc_info){
5634 - struct ext4_reserve_window_node *rsv = &ei->i_block_alloc_info->rsv_window_node;
5635 - rsv->rsv_goal_size = rsv_window_size;
5636 - }
5637 - up_write(&ei->i_data_sem);
5638 - mnt_drop_write(filp->f_path.mnt);
5639 - return 0;
5640 - }
5641 case EXT4_IOC_GROUP_EXTEND: {
5642 ext4_fsblk_t n_blocks_count;
5643 struct super_block *sb = inode->i_sb;
5644 - int err;
5645 + int err, err2;
5646
5647 if (!capable(CAP_SYS_RESOURCE))
5648 return -EPERM;
5649 @@ -235,8 +206,10 @@ setversion_out:
5650
5651 err = ext4_group_extend(sb, EXT4_SB(sb)->s_es, n_blocks_count);
5652 jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
5653 - jbd2_journal_flush(EXT4_SB(sb)->s_journal);
5654 + err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal);
5655 jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
5656 + if (err == 0)
5657 + err = err2;
5658 mnt_drop_write(filp->f_path.mnt);
5659
5660 return err;
5661 @@ -244,7 +217,7 @@ setversion_out:
5662 case EXT4_IOC_GROUP_ADD: {
5663 struct ext4_new_group_data input;
5664 struct super_block *sb = inode->i_sb;
5665 - int err;
5666 + int err, err2;
5667
5668 if (!capable(CAP_SYS_RESOURCE))
5669 return -EPERM;
5670 @@ -259,8 +232,10 @@ setversion_out:
5671
5672 err = ext4_group_add(sb, &input);
5673 jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
5674 - jbd2_journal_flush(EXT4_SB(sb)->s_journal);
5675 + err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal);
5676 jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
5677 + if (err == 0)
5678 + err = err2;
5679 mnt_drop_write(filp->f_path.mnt);
5680
5681 return err;
5682 diff -rup b/fs/ext4//mballoc.c a/fs/ext4///mballoc.c
5683 --- b/fs/ext4/mballoc.c 2009-02-11 14:37:58.000000000 +0100
5684 +++ a/fs/ext4/mballoc.c 2009-02-10 21:40:11.000000000 +0100
5685 @@ -100,7 +100,7 @@
5686 * inode as:
5687 *
5688 * { page }
5689 - * [ group 0 buddy][ group 0 bitmap] [group 1][ group 1]...
5690 + * [ group 0 bitmap][ group 0 buddy] [group 1][ group 1]...
5691 *
5692 *
5693 * one block each for bitmap and buddy information. So for each group we
5694 @@ -330,6 +330,20 @@
5695 * object
5696 *
5697 */
5698 +static struct kmem_cache *ext4_pspace_cachep;
5699 +static struct kmem_cache *ext4_ac_cachep;
5700 +static struct kmem_cache *ext4_free_ext_cachep;
5701 +static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
5702 + ext4_group_t group);
5703 +static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
5704 + ext4_group_t group);
5705 +static int ext4_mb_init_per_dev_proc(struct super_block *sb);
5706 +static int ext4_mb_destroy_per_dev_proc(struct super_block *sb);
5707 +static void ext4_mb_poll_new_transaction(struct super_block *, handle_t *);
5708 +static void ext4_mb_free_committed_blocks(struct super_block *);
5709 +
5710 +
5711 +
5712
5713 static inline void *mb_correct_addr_and_bit(int *bit, void *addr)
5714 {
5715 @@ -445,9 +459,9 @@ static void mb_free_blocks_double(struct
5716 blocknr += first + i;
5717 blocknr +=
5718 le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
5719 -
5720 - ext4_error(sb, __func__, "double-free of inode"
5721 - " %lu's block %llu(bit %u in group %lu)\n",
5722 + ext4_grp_locked_error(sb, e4b->bd_group,
5723 + __func__, "double-free of inode"
5724 + " %lu's block %llu(bit %u in group %u)\n",
5725 inode ? inode->i_ino : 0, blocknr,
5726 first + i, e4b->bd_group);
5727 }
5728 @@ -477,9 +491,10 @@ static void mb_cmp_bitmaps(struct ext4_b
5729 b2 = (unsigned char *) bitmap;
5730 for (i = 0; i < e4b->bd_sb->s_blocksize; i++) {
5731 if (b1[i] != b2[i]) {
5732 - printk("corruption in group %lu at byte %u(%u):"
5733 - " %x in copy != %x on disk/prealloc\n",
5734 - e4b->bd_group, i, i * 8, b1[i], b2[i]);
5735 + printk(KERN_ERR "corruption in group %u "
5736 + "at byte %u(%u): %x in copy != %x "
5737 + "on disk/prealloc\n",
5738 + e4b->bd_group, i, i * 8, b1[i], b2[i]);
5739 BUG();
5740 }
5741 }
5742 @@ -533,9 +548,6 @@ static int __mb_check_buddy(struct ext4_
5743 void *buddy;
5744 void *buddy2;
5745
5746 - if (!test_opt(sb, MBALLOC))
5747 - return 0;
5748 -
5749 {
5750 static int mb_check_counter;
5751 if (mb_check_counter++ % 100 != 0)
5752 @@ -692,8 +704,8 @@ static void ext4_mb_generate_buddy(struc
5753 grp->bb_fragments = fragments;
5754
5755 if (free != grp->bb_free) {
5756 - ext4_error(sb, __func__,
5757 - "EXT4-fs: group %lu: %u blocks in bitmap, %u in gd\n",
5758 + ext4_grp_locked_error(sb, group, __func__,
5759 + "EXT4-fs: group %u: %u blocks in bitmap, %u in gd\n",
5760 group, free, grp->bb_free);
5761 /*
5762 * If we intent to continue, we consider group descritor
5763 @@ -718,7 +730,7 @@ static void ext4_mb_generate_buddy(struc
5764 * stored in the inode as
5765 *
5766 * { page }
5767 - * [ group 0 buddy][ group 0 bitmap] [group 1][ group 1]...
5768 + * [ group 0 bitmap][ group 0 buddy] [group 1][ group 1]...
5769 *
5770 *
5771 * one block each for bitmap and buddy information.
5772 @@ -784,23 +796,45 @@ static int ext4_mb_init_cache(struct pag
5773 if (bh[i] == NULL)
5774 goto out;
5775
5776 - if (bh_uptodate_or_lock(bh[i]))
5777 + if (bitmap_uptodate(bh[i]))
5778 continue;
5779
5780 + lock_buffer(bh[i]);
5781 + if (bitmap_uptodate(bh[i])) {
5782 + unlock_buffer(bh[i]);
5783 + continue;
5784 + }
5785 spin_lock(sb_bgl_lock(EXT4_SB(sb), first_group + i));
5786 if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
5787 ext4_init_block_bitmap(sb, bh[i],
5788 first_group + i, desc);
5789 + set_bitmap_uptodate(bh[i]);
5790 set_buffer_uptodate(bh[i]);
5791 - unlock_buffer(bh[i]);
5792 spin_unlock(sb_bgl_lock(EXT4_SB(sb), first_group + i));
5793 + unlock_buffer(bh[i]);
5794 continue;
5795 }
5796 spin_unlock(sb_bgl_lock(EXT4_SB(sb), first_group + i));
5797 + if (buffer_uptodate(bh[i])) {
5798 + /*
5799 + * if not uninit if bh is uptodate,
5800 + * bitmap is also uptodate
5801 + */
5802 + set_bitmap_uptodate(bh[i]);
5803 + unlock_buffer(bh[i]);
5804 + continue;
5805 + }
5806 get_bh(bh[i]);
5807 + /*
5808 + * submit the buffer_head for read. We can
5809 + * safely mark the bitmap as uptodate now.
5810 + * We do it here so the bitmap uptodate bit
5811 + * get set with buffer lock held.
5812 + */
5813 + set_bitmap_uptodate(bh[i]);
5814 bh[i]->b_end_io = end_buffer_read_sync;
5815 submit_bh(READ, bh[i]);
5816 - mb_debug("read bitmap for group %lu\n", first_group + i);
5817 + mb_debug("read bitmap for group %u\n", first_group + i);
5818 }
5819
5820 /* wait for I/O completion */
5821 @@ -814,6 +848,8 @@ static int ext4_mb_init_cache(struct pag
5822
5823 err = 0;
5824 first_block = page->index * blocks_per_page;
5825 + /* init the page */
5826 + memset(page_address(page), 0xff, PAGE_CACHE_SIZE);
5827 for (i = 0; i < blocks_per_page; i++) {
5828 int group;
5829 struct ext4_group_info *grinfo;
5830 @@ -840,7 +876,6 @@ static int ext4_mb_init_cache(struct pag
5831 BUG_ON(incore == NULL);
5832 mb_debug("put buddy for group %u in page %lu/%x\n",
5833 group, page->index, i * blocksize);
5834 - memset(data, 0xff, blocksize);
5835 grinfo = ext4_get_group_info(sb, group);
5836 grinfo->bb_fragments = 0;
5837 memset(grinfo->bb_counters, 0,
5838 @@ -848,7 +883,9 @@ static int ext4_mb_init_cache(struct pag
5839 /*
5840 * incore got set to the group block bitmap below
5841 */
5842 + ext4_lock_group(sb, group);
5843 ext4_mb_generate_buddy(sb, data, incore, group);
5844 + ext4_unlock_group(sb, group);
5845 incore = NULL;
5846 } else {
5847 /* this is block of bitmap */
5848 @@ -862,6 +899,7 @@ static int ext4_mb_init_cache(struct pag
5849
5850 /* mark all preallocated blks used in in-core bitmap */
5851 ext4_mb_generate_from_pa(sb, data, group);
5852 + ext4_mb_generate_from_freelist(sb, data, group);
5853 ext4_unlock_group(sb, group);
5854
5855 /* set incore so that the buddy information can be
5856 @@ -885,19 +923,22 @@ out:
5857 static noinline_for_stack int
5858 ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
5859 struct ext4_buddy *e4b)
5860 +__acquires(e4b->alloc_semp)
5861 {
5862 - struct ext4_sb_info *sbi = EXT4_SB(sb);
5863 - struct inode *inode = sbi->s_buddy_cache;
5864 int blocks_per_page;
5865 int block;
5866 int pnum;
5867 int poff;
5868 struct page *page;
5869 int ret;
5870 + struct ext4_group_info *grp;
5871 + struct ext4_sb_info *sbi = EXT4_SB(sb);
5872 + struct inode *inode = sbi->s_buddy_cache;
5873
5874 - mb_debug("load group %lu\n", group);
5875 + mb_debug("load group %u\n", group);
5876
5877 blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
5878 + grp = ext4_get_group_info(sb, group);
5879
5880 e4b->bd_blkbits = sb->s_blocksize_bits;
5881 e4b->bd_info = ext4_get_group_info(sb, group);
5882 @@ -905,6 +946,16 @@ ext4_mb_load_buddy(struct super_block *s
5883 e4b->bd_group = group;
5884 e4b->bd_buddy_page = NULL;
5885 e4b->bd_bitmap_page = NULL;
5886 + e4b->alloc_semp = &grp->alloc_sem;
5887 +
5888 + /* Take the read lock on the group alloc
5889 + * sem. This would make sure a parallel
5890 + * ext4_mb_init_group happening on other
5891 + * groups mapped by the page is blocked
5892 + * till we are done with allocation
5893 + */
5894 + down_read(e4b->alloc_semp);
5895 + __acquire(e4b->alloc_semp);
5896
5897 /*
5898 * the buddy cache inode stores the block bitmap
5899 @@ -920,6 +971,14 @@ ext4_mb_load_buddy(struct super_block *s
5900 page = find_get_page(inode->i_mapping, pnum);
5901 if (page == NULL || !PageUptodate(page)) {
5902 if (page)
5903 + /*
5904 + * drop the page reference and try
5905 + * to get the page with lock. If we
5906 + * are not uptodate that implies
5907 + * somebody just created the page but
5908 + * is yet to initialize the same. So
5909 + * wait for it to initialize.
5910 + */
5911 page_cache_release(page);
5912 page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
5913 if (page) {
5914 @@ -985,15 +1044,23 @@ err:
5915 page_cache_release(e4b->bd_buddy_page);
5916 e4b->bd_buddy = NULL;
5917 e4b->bd_bitmap = NULL;
5918 +
5919 + /* Done with the buddy cache */
5920 + up_read(e4b->alloc_semp);
5921 return ret;
5922 }
5923
5924 static void ext4_mb_release_desc(struct ext4_buddy *e4b)
5925 +__releases(e4b->alloc_semp)
5926 {
5927 if (e4b->bd_bitmap_page)
5928 page_cache_release(e4b->bd_bitmap_page);
5929 if (e4b->bd_buddy_page)
5930 page_cache_release(e4b->bd_buddy_page);
5931 + /* Done with the buddy cache */
5932 + if (e4b->alloc_semp)
5933 + up_read(e4b->alloc_semp);
5934 + __release(e4b->alloc_semp);
5935 }
5936
5937
5938 @@ -1031,7 +1098,10 @@ static void mb_clear_bits(spinlock_t *lo
5939 cur += 32;
5940 continue;
5941 }
5942 - mb_clear_bit_atomic(lock, cur, bm);
5943 + if (lock)
5944 + mb_clear_bit_atomic(lock, cur, bm);
5945 + else
5946 + mb_clear_bit(cur, bm);
5947 cur++;
5948 }
5949 }
5950 @@ -1049,7 +1119,10 @@ static void mb_set_bits(spinlock_t *lock
5951 cur += 32;
5952 continue;
5953 }
5954 - mb_set_bit_atomic(lock, cur, bm);
5955 + if (lock)
5956 + mb_set_bit_atomic(lock, cur, bm);
5957 + else
5958 + mb_set_bit(cur, bm);
5959 cur++;
5960 }
5961 }
5962 @@ -1094,12 +1167,11 @@ static void mb_free_blocks(struct inode
5963 blocknr += block;
5964 blocknr +=
5965 le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
5966 - ext4_unlock_group(sb, e4b->bd_group);
5967 - ext4_error(sb, __func__, "double-free of inode"
5968 - " %lu's block %llu(bit %u in group %lu)\n",
5969 + ext4_grp_locked_error(sb, e4b->bd_group,
5970 + __func__, "double-free of inode"
5971 + " %lu's block %llu(bit %u in group %u)\n",
5972 inode ? inode->i_ino : 0, blocknr, block,
5973 e4b->bd_group);
5974 - ext4_lock_group(sb, e4b->bd_group);
5975 }
5976 mb_clear_bit(block, EXT4_MB_BITMAP(e4b));
5977 e4b->bd_info->bb_counters[order]++;
5978 @@ -1296,13 +1368,20 @@ static void ext4_mb_use_best_found(struc
5979 ac->ac_tail = ret & 0xffff;
5980 ac->ac_buddy = ret >> 16;
5981
5982 - /* XXXXXXX: SUCH A HORRIBLE **CK */
5983 - /*FIXME!! Why ? */
5984 + /*
5985 + * take the page reference. We want the page to be pinned
5986 + * so that we don't get a ext4_mb_init_cache_call for this
5987 + * group until we update the bitmap. That would mean we
5988 + * double allocate blocks. The reference is dropped
5989 + * in ext4_mb_release_context
5990 + */
5991 ac->ac_bitmap_page = e4b->bd_bitmap_page;
5992 get_page(ac->ac_bitmap_page);
5993 ac->ac_buddy_page = e4b->bd_buddy_page;
5994 get_page(ac->ac_buddy_page);
5995 -
5996 + /* on allocation we use ac to track the held semaphore */
5997 + ac->alloc_semp = e4b->alloc_semp;
5998 + e4b->alloc_semp = NULL;
5999 /* store last allocated for subsequent stream allocation */
6000 if ((ac->ac_flags & EXT4_MB_HINT_DATA)) {
6001 spin_lock(&sbi->s_md_lock);
6002 @@ -1433,8 +1512,10 @@ static int ext4_mb_try_best_found(struct
6003
6004 BUG_ON(ex.fe_len <= 0);
6005 err = ext4_mb_load_buddy(ac->ac_sb, group, e4b);
6006 - if (err)
6007 + if (err) {
6008 + __release(e4b->alloc_semp);
6009 return err;
6010 + }
6011
6012 ext4_lock_group(ac->ac_sb, group);
6013 max = mb_find_extent(e4b, 0, ex.fe_start, ex.fe_len, &ex);
6014 @@ -1464,8 +1545,10 @@ static int ext4_mb_find_by_goal(struct e
6015 return 0;
6016
6017 err = ext4_mb_load_buddy(ac->ac_sb, group, e4b);
6018 - if (err)
6019 + if (err) {
6020 + __release(e4b->alloc_semp);
6021 return err;
6022 + }
6023
6024 ext4_lock_group(ac->ac_sb, group);
6025 max = mb_find_extent(e4b, 0, ac->ac_g_ex.fe_start,
6026 @@ -1575,7 +1658,8 @@ static void ext4_mb_complex_scan_group(s
6027 * free blocks even though group info says we
6028 * we have free blocks
6029 */
6030 - ext4_error(sb, __func__, "%d free blocks as per "
6031 + ext4_grp_locked_error(sb, e4b->bd_group,
6032 + __func__, "%d free blocks as per "
6033 "group info. But bitmap says 0\n",
6034 free);
6035 break;
6036 @@ -1584,7 +1668,8 @@ static void ext4_mb_complex_scan_group(s
6037 mb_find_extent(e4b, 0, i, ac->ac_g_ex.fe_len, &ex);
6038 BUG_ON(ex.fe_len <= 0);
6039 if (free < ex.fe_len) {
6040 - ext4_error(sb, __func__, "%d free blocks as per "
6041 + ext4_grp_locked_error(sb, e4b->bd_group,
6042 + __func__, "%d free blocks as per "
6043 "group info. But got %d blocks\n",
6044 free, ex.fe_len);
6045 /*
6046 @@ -1692,6 +1777,173 @@ static int ext4_mb_good_group(struct ext
6047 return 0;
6048 }
6049
6050 +/*
6051 + * lock the group_info alloc_sem of all the groups
6052 + * belonging to the same buddy cache page. This
6053 + * make sure other parallel operation on the buddy
6054 + * cache doesn't happen whild holding the buddy cache
6055 + * lock
6056 + */
6057 +int ext4_mb_get_buddy_cache_lock(struct super_block *sb, ext4_group_t group)
6058 +{
6059 + int i;
6060 + int block, pnum;
6061 + int blocks_per_page;
6062 + int groups_per_page;
6063 + ext4_group_t first_group;
6064 + struct ext4_group_info *grp;
6065 +
6066 + blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
6067 + /*
6068 + * the buddy cache inode stores the block bitmap
6069 + * and buddy information in consecutive blocks.
6070 + * So for each group we need two blocks.
6071 + */
6072 + block = group * 2;
6073 + pnum = block / blocks_per_page;
6074 + first_group = pnum * blocks_per_page / 2;
6075 +
6076 + groups_per_page = blocks_per_page >> 1;
6077 + if (groups_per_page == 0)
6078 + groups_per_page = 1;
6079 + /* read all groups the page covers into the cache */
6080 + for (i = 0; i < groups_per_page; i++) {
6081 +
6082 + if ((first_group + i) >= EXT4_SB(sb)->s_groups_count)
6083 + break;
6084 + grp = ext4_get_group_info(sb, first_group + i);
6085 + /* take all groups write allocation
6086 + * semaphore. This make sure there is
6087 + * no block allocation going on in any
6088 + * of that groups
6089 + */
6090 + down_write_nested(&grp->alloc_sem, i);
6091 + }
6092 + return i;
6093 +}
6094 +
6095 +void ext4_mb_put_buddy_cache_lock(struct super_block *sb,
6096 + ext4_group_t group, int locked_group)
6097 +{
6098 + int i;
6099 + int block, pnum;
6100 + int blocks_per_page;
6101 + ext4_group_t first_group;
6102 + struct ext4_group_info *grp;
6103 +
6104 + blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
6105 + /*
6106 + * the buddy cache inode stores the block bitmap
6107 + * and buddy information in consecutive blocks.
6108 + * So for each group we need two blocks.
6109 + */
6110 + block = group * 2;
6111 + pnum = block / blocks_per_page;
6112 + first_group = pnum * blocks_per_page / 2;
6113 + /* release locks on all the groups */
6114 + for (i = 0; i < locked_group; i++) {
6115 +
6116 + grp = ext4_get_group_info(sb, first_group + i);
6117 + /* take all groups write allocation
6118 + * semaphore. This make sure there is
6119 + * no block allocation going on in any
6120 + * of that groups
6121 + */
6122 + up_write(&grp->alloc_sem);
6123 + }
6124 +
6125 +}
6126 +
6127 +static int ext4_mb_init_group(struct super_block *sb, ext4_group_t group)
6128 +{
6129 +
6130 + int ret;
6131 + void *bitmap;
6132 + int blocks_per_page;
6133 + int block, pnum, poff;
6134 + int num_grp_locked = 0;
6135 + struct ext4_group_info *this_grp;
6136 + struct ext4_sb_info *sbi = EXT4_SB(sb);
6137 + struct inode *inode = sbi->s_buddy_cache;
6138 + struct page *page = NULL, *bitmap_page = NULL;
6139 +
6140 + mb_debug("init group %lu\n", group);
6141 + blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
6142 + this_grp = ext4_get_group_info(sb, group);
6143 + /*
6144 + * This ensures we don't add group
6145 + * to this buddy cache via resize
6146 + */
6147 + num_grp_locked = ext4_mb_get_buddy_cache_lock(sb, group);
6148 + if (!EXT4_MB_GRP_NEED_INIT(this_grp)) {
6149 + /*
6150 + * somebody initialized the group
6151 + * return without doing anything
6152 + */
6153 + ret = 0;
6154 + goto err;
6155 + }
6156 + /*
6157 + * the buddy cache inode stores the block bitmap
6158 + * and buddy information in consecutive blocks.
6159 + * So for each group we need two blocks.
6160 + */
6161 + block = group * 2;
6162 + pnum = block / blocks_per_page;
6163 + poff = block % blocks_per_page;
6164 + page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
6165 + if (page) {
6166 + BUG_ON(page->mapping != inode->i_mapping);
6167 + ret = ext4_mb_init_cache(page, NULL);
6168 + if (ret) {
6169 + unlock_page(page);
6170 + goto err;
6171 + }
6172 + unlock_page(page);
6173 + }
6174 + if (page == NULL || !PageUptodate(page)) {
6175 + ret = -EIO;
6176 + goto err;
6177 + }
6178 + mark_page_accessed(page);
6179 + bitmap_page = page;
6180 + bitmap = page_address(page) + (poff * sb->s_blocksize);
6181 +
6182 + /* init buddy cache */
6183 + block++;
6184 + pnum = block / blocks_per_page;
6185 + poff = block % blocks_per_page;
6186 + page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
6187 + if (page == bitmap_page) {
6188 + /*
6189 + * If both the bitmap and buddy are in
6190 + * the same page we don't need to force
6191 + * init the buddy
6192 + */
6193 + unlock_page(page);
6194 + } else if (page) {
6195 + BUG_ON(page->mapping != inode->i_mapping);
6196 + ret = ext4_mb_init_cache(page, bitmap);
6197 + if (ret) {
6198 + unlock_page(page);
6199 + goto err;
6200 + }
6201 + unlock_page(page);
6202 + }
6203 + if (page == NULL || !PageUptodate(page)) {
6204 + ret = -EIO;
6205 + goto err;
6206 + }
6207 + mark_page_accessed(page);
6208 +err:
6209 + ext4_mb_put_buddy_cache_lock(sb, group, num_grp_locked);
6210 + if (bitmap_page)
6211 + page_cache_release(bitmap_page);
6212 + if (page)
6213 + page_cache_release(page);
6214 + return ret;
6215 +}
6216 +
6217 static noinline_for_stack int
6218 ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
6219 {
6220 @@ -1775,7 +2027,7 @@ repeat:
6221 group = 0;
6222
6223 /* quick check to skip empty groups */
6224 - grp = ext4_get_group_info(ac->ac_sb, group);
6225 + grp = ext4_get_group_info(sb, group);
6226 if (grp->bb_free == 0)
6227 continue;
6228
6229 @@ -1788,10 +2040,9 @@ repeat:
6230 * we need full data about the group
6231 * to make a good selection
6232 */
6233 - err = ext4_mb_load_buddy(sb, group, &e4b);
6234 + err = ext4_mb_init_group(sb, group);
6235 if (err)
6236 goto out;
6237 - ext4_mb_release_desc(&e4b);
6238 }
6239
6240 /*
6241 @@ -1802,8 +2053,10 @@ repeat:
6242 continue;
6243
6244 err = ext4_mb_load_buddy(sb, group, &e4b);
6245 - if (err)
6246 + if (err) {
6247 + __release(e4b->alloc_semp);
6248 goto out;
6249 + }
6250
6251 ext4_lock_group(sb, group);
6252 if (!ext4_mb_good_group(ac, group, cr)) {
6253 @@ -1932,13 +2185,13 @@ static int ext4_mb_seq_history_show(stru
6254 if (hs->op == EXT4_MB_HISTORY_ALLOC) {
6255 fmt = "%-5u %-8u %-23s %-23s %-23s %-5u %-5u %-2u "
6256 "%-5u %-5s %-5u %-6u\n";
6257 - sprintf(buf2, "%lu/%d/%u@%u", hs->result.fe_group,
6258 + sprintf(buf2, "%u/%d/%u@%u", hs->result.fe_group,
6259 hs->result.fe_start, hs->result.fe_len,
6260 hs->result.fe_logical);
6261 - sprintf(buf, "%lu/%d/%u@%u", hs->orig.fe_group,
6262 + sprintf(buf, "%u/%d/%u@%u", hs->orig.fe_group,
6263 hs->orig.fe_start, hs->orig.fe_len,
6264 hs->orig.fe_logical);
6265 - sprintf(buf3, "%lu/%d/%u@%u", hs->goal.fe_group,
6266 + sprintf(buf3, "%u/%d/%u@%u", hs->goal.fe_group,
6267 hs->goal.fe_start, hs->goal.fe_len,
6268 hs->goal.fe_logical);
6269 seq_printf(seq, fmt, hs->pid, hs->ino, buf, buf3, buf2,
6270 @@ -1947,20 +2200,20 @@ static int ext4_mb_seq_history_show(stru
6271 hs->buddy ? 1 << hs->buddy : 0);
6272 } else if (hs->op == EXT4_MB_HISTORY_PREALLOC) {
6273 fmt = "%-5u %-8u %-23s %-23s %-23s\n";
6274 - sprintf(buf2, "%lu/%d/%u@%u", hs->result.fe_group,
6275 + sprintf(buf2, "%u/%d/%u@%u", hs->result.fe_group,
6276 hs->result.fe_start, hs->result.fe_len,
6277 hs->result.fe_logical);
6278 - sprintf(buf, "%lu/%d/%u@%u", hs->orig.fe_group,
6279 + sprintf(buf, "%u/%d/%u@%u", hs->orig.fe_group,
6280 hs->orig.fe_start, hs->orig.fe_len,
6281 hs->orig.fe_logical);
6282 seq_printf(seq, fmt, hs->pid, hs->ino, buf, "", buf2);
6283 } else if (hs->op == EXT4_MB_HISTORY_DISCARD) {
6284 - sprintf(buf2, "%lu/%d/%u", hs->result.fe_group,
6285 + sprintf(buf2, "%u/%d/%u", hs->result.fe_group,
6286 hs->result.fe_start, hs->result.fe_len);
6287 seq_printf(seq, "%-5u %-8u %-23s discard\n",
6288 hs->pid, hs->ino, buf2);
6289 } else if (hs->op == EXT4_MB_HISTORY_FREE) {
6290 - sprintf(buf2, "%lu/%d/%u", hs->result.fe_group,
6291 + sprintf(buf2, "%u/%d/%u", hs->result.fe_group,
6292 hs->result.fe_start, hs->result.fe_len);
6293 seq_printf(seq, "%-5u %-8u %-23s free\n",
6294 hs->pid, hs->ino, buf2);
6295 @@ -2073,7 +2326,7 @@ static void *ext4_mb_seq_groups_start(st
6296 return NULL;
6297
6298 group = *pos + 1;
6299 - return (void *) group;
6300 + return (void *) ((unsigned long) group);
6301 }
6302
6303 static void *ext4_mb_seq_groups_next(struct seq_file *seq, void *v, loff_t *pos)
6304 @@ -2086,13 +2339,13 @@ static void *ext4_mb_seq_groups_next(str
6305 if (*pos < 0 || *pos >= sbi->s_groups_count)
6306 return NULL;
6307 group = *pos + 1;
6308 - return (void *) group;;
6309 + return (void *) ((unsigned long) group);
6310 }
6311
6312 static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v)
6313 {
6314 struct super_block *sb = seq->private;
6315 - long group = (long) v;
6316 + ext4_group_t group = (ext4_group_t) ((unsigned long) v);
6317 int i;
6318 int err;
6319 struct ext4_buddy e4b;
6320 @@ -2114,7 +2367,8 @@ static int ext4_mb_seq_groups_show(struc
6321 sizeof(struct ext4_group_info);
6322 err = ext4_mb_load_buddy(sb, group, &e4b);
6323 if (err) {
6324 - seq_printf(seq, "#%-5lu: I/O error\n", group);
6325 + __release(e4b->alloc_semp);
6326 + seq_printf(seq, "#%-5u: I/O error\n", group);
6327 return 0;
6328 }
6329 ext4_lock_group(sb, group);
6330 @@ -2122,7 +2376,7 @@ static int ext4_mb_seq_groups_show(struc
6331 ext4_unlock_group(sb, group);
6332 ext4_mb_release_desc(&e4b);
6333
6334 - seq_printf(seq, "#%-5lu: %-5u %-5u %-5u [", group, sg.info.bb_free,
6335 + seq_printf(seq, "#%-5u: %-5u %-5u %-5u [", group, sg.info.bb_free,
6336 sg.info.bb_fragments, sg.info.bb_first_free);
6337 for (i = 0; i <= 13; i++)
6338 seq_printf(seq, " %-5u", i <= sb->s_blocksize_bits + 1 ?
6339 @@ -2169,9 +2423,10 @@ static void ext4_mb_history_release(stru
6340 {
6341 struct ext4_sb_info *sbi = EXT4_SB(sb);
6342
6343 - remove_proc_entry("mb_groups", sbi->s_mb_proc);
6344 - remove_proc_entry("mb_history", sbi->s_mb_proc);
6345 -
6346 + if (sbi->s_proc != NULL) {
6347 + remove_proc_entry("mb_groups", sbi->s_proc);
6348 + remove_proc_entry("mb_history", sbi->s_proc);
6349 + }
6350 kfree(sbi->s_mb_history);
6351 }
6352
6353 @@ -2180,10 +2435,10 @@ static void ext4_mb_history_init(struct
6354 struct ext4_sb_info *sbi = EXT4_SB(sb);
6355 int i;
6356
6357 - if (sbi->s_mb_proc != NULL) {
6358 - proc_create_data("mb_history", S_IRUGO, sbi->s_mb_proc,
6359 + if (sbi->s_proc != NULL) {
6360 + proc_create_data("mb_history", S_IRUGO, sbi->s_proc,
6361 &ext4_mb_seq_history_fops, sb);
6362 - proc_create_data("mb_groups", S_IRUGO, sbi->s_mb_proc,
6363 + proc_create_data("mb_groups", S_IRUGO, sbi->s_proc,
6364 &ext4_mb_seq_groups_fops, sb);
6365 }
6366
6367 @@ -2295,10 +2550,12 @@ int ext4_mb_add_groupinfo(struct super_b
6368 ext4_free_blocks_after_init(sb, group, desc);
6369 } else {
6370 meta_group_info[i]->bb_free =
6371 - le16_to_cpu(desc->bg_free_blocks_count);
6372 + ext4_free_blks_count(sb, desc);
6373 }
6374
6375 INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list);
6376 + init_rwsem(&meta_group_info[i]->alloc_sem);
6377 + meta_group_info[i]->bb_free_root.rb_node = NULL;;
6378
6379 #ifdef DOUBLE_CHECK
6380 {
6381 @@ -2325,54 +2582,6 @@ exit_meta_group_info:
6382 } /* ext4_mb_add_groupinfo */
6383
6384 /*
6385 - * Add a group to the existing groups.
6386 - * This function is used for online resize
6387 - */
6388 -int ext4_mb_add_more_groupinfo(struct super_block *sb, ext4_group_t group,
6389 - struct ext4_group_desc *desc)
6390 -{
6391 - struct ext4_sb_info *sbi = EXT4_SB(sb);
6392 - struct inode *inode = sbi->s_buddy_cache;
6393 - int blocks_per_page;
6394 - int block;
6395 - int pnum;
6396 - struct page *page;
6397 - int err;
6398 -
6399 - /* Add group based on group descriptor*/
6400 - err = ext4_mb_add_groupinfo(sb, group, desc);
6401 - if (err)
6402 - return err;
6403 -
6404 - /*
6405 - * Cache pages containing dynamic mb_alloc datas (buddy and bitmap
6406 - * datas) are set not up to date so that they will be re-initilaized
6407 - * during the next call to ext4_mb_load_buddy
6408 - */
6409 -
6410 - /* Set buddy page as not up to date */
6411 - blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
6412 - block = group * 2;
6413 - pnum = block / blocks_per_page;
6414 - page = find_get_page(inode->i_mapping, pnum);
6415 - if (page != NULL) {
6416 - ClearPageUptodate(page);
6417 - page_cache_release(page);
6418 - }
6419 -
6420 - /* Set bitmap page as not up to date */
6421 - block++;
6422 - pnum = block / blocks_per_page;
6423 - page = find_get_page(inode->i_mapping, pnum);
6424 - if (page != NULL) {
6425 - ClearPageUptodate(page);
6426 - page_cache_release(page);
6427 - }
6428 -
6429 - return 0;
6430 -}
6431 -
6432 -/*
6433 * Update an existing group.
6434 * This function is used for online resize
6435 */
6436 @@ -2455,7 +2664,7 @@ static int ext4_mb_init_backend(struct s
6437 desc = ext4_get_group_desc(sb, i, NULL);
6438 if (desc == NULL) {
6439 printk(KERN_ERR
6440 - "EXT4-fs: can't read descriptor %lu\n", i);
6441 + "EXT4-fs: can't read descriptor %u\n", i);
6442 goto err_freebuddy;
6443 }
6444 if (ext4_mb_add_groupinfo(sb, i, desc) != 0)
6445 @@ -2485,19 +2694,14 @@ int ext4_mb_init(struct super_block *sb,
6446 unsigned max;
6447 int ret;
6448
6449 - if (!test_opt(sb, MBALLOC))
6450 - return 0;
6451 -
6452 i = (sb->s_blocksize_bits + 2) * sizeof(unsigned short);
6453
6454 sbi->s_mb_offsets = kmalloc(i, GFP_KERNEL);
6455 if (sbi->s_mb_offsets == NULL) {
6456 - clear_opt(sbi->s_mount_opt, MBALLOC);
6457 return -ENOMEM;
6458 }
6459 sbi->s_mb_maxs = kmalloc(i, GFP_KERNEL);
6460 if (sbi->s_mb_maxs == NULL) {
6461 - clear_opt(sbi->s_mount_opt, MBALLOC);
6462 kfree(sbi->s_mb_maxs);
6463 return -ENOMEM;
6464 }
6465 @@ -2520,7 +2724,6 @@ int ext4_mb_init(struct super_block *sb,
6466 /* init file for buddy data */
6467 ret = ext4_mb_init_backend(sb);
6468 if (ret != 0) {
6469 - clear_opt(sbi->s_mount_opt, MBALLOC);
6470 kfree(sbi->s_mb_offsets);
6471 kfree(sbi->s_mb_maxs);
6472 return ret;
6473 @@ -2540,17 +2743,15 @@ int ext4_mb_init(struct super_block *sb,
6474 sbi->s_mb_history_filter = EXT4_MB_HISTORY_DEFAULT;
6475 sbi->s_mb_group_prealloc = MB_DEFAULT_GROUP_PREALLOC;
6476
6477 - i = sizeof(struct ext4_locality_group) * nr_cpu_ids;
6478 - sbi->s_locality_groups = kmalloc(i, GFP_KERNEL);
6479 + sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group);
6480 if (sbi->s_locality_groups == NULL) {
6481 - clear_opt(sbi->s_mount_opt, MBALLOC);
6482 kfree(sbi->s_mb_offsets);
6483 kfree(sbi->s_mb_maxs);
6484 return -ENOMEM;
6485 }
6486 - for (i = 0; i < nr_cpu_ids; i++) {
6487 + for_each_possible_cpu(i) {
6488 struct ext4_locality_group *lg;
6489 - lg = &sbi->s_locality_groups[i];
6490 + lg = per_cpu_ptr(sbi->s_locality_groups, i);
6491 mutex_init(&lg->lg_mutex);
6492 for (j = 0; j < PREALLOC_TB_SIZE; j++)
6493 INIT_LIST_HEAD(&lg->lg_prealloc_list[j]);
6494 @@ -2560,7 +2761,7 @@ int ext4_mb_init(struct super_block *sb,
6495 ext4_mb_init_per_dev_proc(sb);
6496 ext4_mb_history_init(sb);
6497
6498 - printk("EXT4-fs: mballoc enabled\n");
6499 + printk(KERN_INFO "EXT4-fs: mballoc enabled\n");
6500 return 0;
6501 }
6502
6503 @@ -2589,9 +2790,6 @@ int ext4_mb_release(struct super_block *
6504 struct ext4_group_info *grinfo;
6505 struct ext4_sb_info *sbi = EXT4_SB(sb);
6506
6507 - if (!test_opt(sb, MBALLOC))
6508 - return 0;
6509 -
6510 /* release freed, non-committed blocks */
6511 spin_lock(&sbi->s_md_lock);
6512 list_splice_init(&sbi->s_closed_transaction,
6513 @@ -2647,8 +2845,7 @@ int ext4_mb_release(struct super_block *
6514 atomic_read(&sbi->s_mb_discarded));
6515 }
6516
6517 - kfree(sbi->s_locality_groups);
6518 -
6519 + free_percpu(sbi->s_locality_groups);
6520 ext4_mb_history_release(sb);
6521 ext4_mb_destroy_per_dev_proc(sb);
6522
6523 @@ -2658,13 +2855,11 @@ int ext4_mb_release(struct super_block *
6524 static noinline_for_stack void
6525 ext4_mb_free_committed_blocks(struct super_block *sb)
6526 {
6527 - struct ext4_sb_info *sbi = EXT4_SB(sb);
6528 - int err;
6529 - int i;
6530 - int count = 0;
6531 - int count2 = 0;
6532 - struct ext4_free_metadata *md;
6533 struct ext4_buddy e4b;
6534 + struct ext4_group_info *db;
6535 + struct ext4_sb_info *sbi = EXT4_SB(sb);
6536 + int err, count = 0, count2 = 0;
6537 + struct ext4_free_data *entry;
6538
6539 if (list_empty(&sbi->s_committed_transaction))
6540 return;
6541 @@ -2672,44 +2867,46 @@ ext4_mb_free_committed_blocks(struct sup
6542 /* there is committed blocks to be freed yet */
6543 do {
6544 /* get next array of blocks */
6545 - md = NULL;
6546 + entry = NULL;
6547 spin_lock(&sbi->s_md_lock);
6548 if (!list_empty(&sbi->s_committed_transaction)) {
6549 - md = list_entry(sbi->s_committed_transaction.next,
6550 - struct ext4_free_metadata, list);
6551 - list_del(&md->list);
6552 + entry = list_entry(sbi->s_committed_transaction.next,
6553 + struct ext4_free_data, list);
6554 + list_del(&entry->list);
6555 }
6556 spin_unlock(&sbi->s_md_lock);
6557
6558 - if (md == NULL)
6559 + if (entry == NULL)
6560 break;
6561
6562 - mb_debug("gonna free %u blocks in group %lu (0x%p):",
6563 - md->num, md->group, md);
6564 + mb_debug("gonna free %u blocks in group %u (0x%p):",
6565 + entry->count, entry->group, entry);
6566
6567 - err = ext4_mb_load_buddy(sb, md->group, &e4b);
6568 + err = ext4_mb_load_buddy(sb, entry->group, &e4b);
6569 /* we expect to find existing buddy because it's pinned */
6570 BUG_ON(err != 0);
6571
6572 + db = e4b.bd_info;
6573 /* there are blocks to put in buddy to make them really free */
6574 - count += md->num;
6575 + count += entry->count;
6576 count2++;
6577 - ext4_lock_group(sb, md->group);
6578 - for (i = 0; i < md->num; i++) {
6579 - mb_debug(" %u", md->blocks[i]);
6580 - mb_free_blocks(NULL, &e4b, md->blocks[i], 1);
6581 - }
6582 - mb_debug("\n");
6583 - ext4_unlock_group(sb, md->group);
6584 -
6585 - /* balance refcounts from ext4_mb_free_metadata() */
6586 - page_cache_release(e4b.bd_buddy_page);
6587 - page_cache_release(e4b.bd_bitmap_page);
6588 + ext4_lock_group(sb, entry->group);
6589 + /* Take it out of per group rb tree */
6590 + rb_erase(&entry->node, &(db->bb_free_root));
6591 + mb_free_blocks(NULL, &e4b, entry->start_blk, entry->count);
6592 +
6593 + if (!db->bb_free_root.rb_node) {
6594 + /* No more items in the per group rb tree
6595 + * balance refcounts from ext4_mb_free_metadata()
6596 + */
6597 + page_cache_release(e4b.bd_buddy_page);
6598 + page_cache_release(e4b.bd_bitmap_page);
6599 + }
6600 + ext4_unlock_group(sb, entry->group);
6601
6602 - kfree(md);
6603 + kmem_cache_free(ext4_free_ext_cachep, entry);
6604 ext4_mb_release_desc(&e4b);
6605 -
6606 - } while (md);
6607 + } while (1);
6608
6609 mb_debug("freed %u blocks in %u structures\n", count, count2);
6610 }
6611 @@ -2721,129 +2918,52 @@ ext4_mb_free_committed_blocks(struct sup
6612 #define EXT4_MB_STREAM_REQ "stream_req"
6613 #define EXT4_MB_GROUP_PREALLOC "group_prealloc"
6614
6615 -
6616 -
6617 -#define MB_PROC_FOPS(name) \
6618 -static int ext4_mb_##name##_proc_show(struct seq_file *m, void *v) \
6619 -{ \
6620 - struct ext4_sb_info *sbi = m->private; \
6621 - \
6622 - seq_printf(m, "%ld\n", sbi->s_mb_##name); \
6623 - return 0; \
6624 -} \
6625 - \
6626 -static int ext4_mb_##name##_proc_open(struct inode *inode, struct file *file)\
6627 -{ \
6628 - return single_open(file, ext4_mb_##name##_proc_show, PDE(inode)->data);\
6629 -} \
6630 - \
6631 -static ssize_t ext4_mb_##name##_proc_write(struct file *file, \
6632 - const char __user *buf, size_t cnt, loff_t *ppos) \
6633 -{ \
6634 - struct ext4_sb_info *sbi = PDE(file->f_path.dentry->d_inode)->data;\
6635 - char str[32]; \
6636 - long value; \
6637 - if (cnt >= sizeof(str)) \
6638 - return -EINVAL; \
6639 - if (copy_from_user(str, buf, cnt)) \
6640 - return -EFAULT; \
6641 - value = simple_strtol(str, NULL, 0); \
6642 - if (value <= 0) \
6643 - return -ERANGE; \
6644 - sbi->s_mb_##name = value; \
6645 - return cnt; \
6646 -} \
6647 - \
6648 -static const struct file_operations ext4_mb_##name##_proc_fops = { \
6649 - .owner = THIS_MODULE, \
6650 - .open = ext4_mb_##name##_proc_open, \
6651 - .read = seq_read, \
6652 - .llseek = seq_lseek, \
6653 - .release = single_release, \
6654 - .write = ext4_mb_##name##_proc_write, \
6655 -};
6656 -
6657 -MB_PROC_FOPS(stats);
6658 -MB_PROC_FOPS(max_to_scan);
6659 -MB_PROC_FOPS(min_to_scan);
6660 -MB_PROC_FOPS(order2_reqs);
6661 -MB_PROC_FOPS(stream_request);
6662 -MB_PROC_FOPS(group_prealloc);
6663 -
6664 -#define MB_PROC_HANDLER(name, var) \
6665 -do { \
6666 - proc = proc_create_data(name, mode, sbi->s_mb_proc, \
6667 - &ext4_mb_##var##_proc_fops, sbi); \
6668 - if (proc == NULL) { \
6669 - printk(KERN_ERR "EXT4-fs: can't to create %s\n", name); \
6670 - goto err_out; \
6671 - } \
6672 -} while (0)
6673 -
6674 static int ext4_mb_init_per_dev_proc(struct super_block *sb)
6675 {
6676 +#ifdef CONFIG_PROC_FS
6677 mode_t mode = S_IFREG | S_IRUGO | S_IWUSR;
6678 struct ext4_sb_info *sbi = EXT4_SB(sb);
6679 struct proc_dir_entry *proc;
6680 - char devname[BDEVNAME_SIZE], *p;
6681
6682 - if (proc_root_ext4 == NULL) {
6683 - sbi->s_mb_proc = NULL;
6684 + if (sbi->s_proc == NULL)
6685 return -EINVAL;
6686 - }
6687 - bdevname(sb->s_bdev, devname);
6688 - p = devname;
6689 - while ((p = strchr(p, '/')))
6690 - *p = '!';
6691 -
6692 - sbi->s_mb_proc = proc_mkdir(devname, proc_root_ext4);
6693 - if (!sbi->s_mb_proc)
6694 - goto err_create_dir;
6695 -
6696 - MB_PROC_HANDLER(EXT4_MB_STATS_NAME, stats);
6697 - MB_PROC_HANDLER(EXT4_MB_MAX_TO_SCAN_NAME, max_to_scan);
6698 - MB_PROC_HANDLER(EXT4_MB_MIN_TO_SCAN_NAME, min_to_scan);
6699 - MB_PROC_HANDLER(EXT4_MB_ORDER2_REQ, order2_reqs);
6700 - MB_PROC_HANDLER(EXT4_MB_STREAM_REQ, stream_request);
6701 - MB_PROC_HANDLER(EXT4_MB_GROUP_PREALLOC, group_prealloc);
6702
6703 + EXT4_PROC_HANDLER(EXT4_MB_STATS_NAME, mb_stats);
6704 + EXT4_PROC_HANDLER(EXT4_MB_MAX_TO_SCAN_NAME, mb_max_to_scan);
6705 + EXT4_PROC_HANDLER(EXT4_MB_MIN_TO_SCAN_NAME, mb_min_to_scan);
6706 + EXT4_PROC_HANDLER(EXT4_MB_ORDER2_REQ, mb_order2_reqs);
6707 + EXT4_PROC_HANDLER(EXT4_MB_STREAM_REQ, mb_stream_request);
6708 + EXT4_PROC_HANDLER(EXT4_MB_GROUP_PREALLOC, mb_group_prealloc);
6709 return 0;
6710
6711 err_out:
6712 - remove_proc_entry(EXT4_MB_GROUP_PREALLOC, sbi->s_mb_proc);
6713 - remove_proc_entry(EXT4_MB_STREAM_REQ, sbi->s_mb_proc);
6714 - remove_proc_entry(EXT4_MB_ORDER2_REQ, sbi->s_mb_proc);
6715 - remove_proc_entry(EXT4_MB_MIN_TO_SCAN_NAME, sbi->s_mb_proc);
6716 - remove_proc_entry(EXT4_MB_MAX_TO_SCAN_NAME, sbi->s_mb_proc);
6717 - remove_proc_entry(EXT4_MB_STATS_NAME, sbi->s_mb_proc);
6718 - remove_proc_entry(devname, proc_root_ext4);
6719 - sbi->s_mb_proc = NULL;
6720 -err_create_dir:
6721 - printk(KERN_ERR "EXT4-fs: Unable to create %s\n", devname);
6722 -
6723 + remove_proc_entry(EXT4_MB_GROUP_PREALLOC, sbi->s_proc);
6724 + remove_proc_entry(EXT4_MB_STREAM_REQ, sbi->s_proc);
6725 + remove_proc_entry(EXT4_MB_ORDER2_REQ, sbi->s_proc);
6726 + remove_proc_entry(EXT4_MB_MIN_TO_SCAN_NAME, sbi->s_proc);
6727 + remove_proc_entry(EXT4_MB_MAX_TO_SCAN_NAME, sbi->s_proc);
6728 + remove_proc_entry(EXT4_MB_STATS_NAME, sbi->s_proc);
6729 return -ENOMEM;
6730 +#else
6731 + return 0;
6732 +#endif
6733 }
6734
6735 static int ext4_mb_destroy_per_dev_proc(struct super_block *sb)
6736 {
6737 +#ifdef CONFIG_PROC_FS
6738 struct ext4_sb_info *sbi = EXT4_SB(sb);
6739 - char devname[BDEVNAME_SIZE], *p;
6740
6741 - if (sbi->s_mb_proc == NULL)
6742 + if (sbi->s_proc == NULL)
6743 return -EINVAL;
6744
6745 - bdevname(sb->s_bdev, devname);
6746 - p = devname;
6747 - while ((p = strchr(p, '/')))
6748 - *p = '!';
6749 - remove_proc_entry(EXT4_MB_GROUP_PREALLOC, sbi->s_mb_proc);
6750 - remove_proc_entry(EXT4_MB_STREAM_REQ, sbi->s_mb_proc);
6751 - remove_proc_entry(EXT4_MB_ORDER2_REQ, sbi->s_mb_proc);
6752 - remove_proc_entry(EXT4_MB_MIN_TO_SCAN_NAME, sbi->s_mb_proc);
6753 - remove_proc_entry(EXT4_MB_MAX_TO_SCAN_NAME, sbi->s_mb_proc);
6754 - remove_proc_entry(EXT4_MB_STATS_NAME, sbi->s_mb_proc);
6755 - remove_proc_entry(devname, proc_root_ext4);
6756 -
6757 + remove_proc_entry(EXT4_MB_GROUP_PREALLOC, sbi->s_proc);
6758 + remove_proc_entry(EXT4_MB_STREAM_REQ, sbi->s_proc);
6759 + remove_proc_entry(EXT4_MB_ORDER2_REQ, sbi->s_proc);
6760 + remove_proc_entry(EXT4_MB_MIN_TO_SCAN_NAME, sbi->s_proc);
6761 + remove_proc_entry(EXT4_MB_MAX_TO_SCAN_NAME, sbi->s_proc);
6762 + remove_proc_entry(EXT4_MB_STATS_NAME, sbi->s_proc);
6763 +#endif
6764 return 0;
6765 }
6766
6767 @@ -2864,11 +2984,16 @@ int __init init_ext4_mballoc(void)
6768 kmem_cache_destroy(ext4_pspace_cachep);
6769 return -ENOMEM;
6770 }
6771 -#ifdef CONFIG_PROC_FS
6772 - proc_root_ext4 = proc_mkdir("fs/ext4", NULL);
6773 - if (proc_root_ext4 == NULL)
6774 - printk(KERN_ERR "EXT4-fs: Unable to create fs/ext4\n");
6775 -#endif
6776 +
6777 + ext4_free_ext_cachep =
6778 + kmem_cache_create("ext4_free_block_extents",
6779 + sizeof(struct ext4_free_data),
6780 + 0, SLAB_RECLAIM_ACCOUNT, NULL);
6781 + if (ext4_free_ext_cachep == NULL) {
6782 + kmem_cache_destroy(ext4_pspace_cachep);
6783 + kmem_cache_destroy(ext4_ac_cachep);
6784 + return -ENOMEM;
6785 + }
6786 return 0;
6787 }
6788
6789 @@ -2877,9 +3002,7 @@ void exit_ext4_mballoc(void)
6790 /* XXX: synchronize_rcu(); */
6791 kmem_cache_destroy(ext4_pspace_cachep);
6792 kmem_cache_destroy(ext4_ac_cachep);
6793 -#ifdef CONFIG_PROC_FS
6794 - remove_proc_entry("fs/ext4", NULL);
6795 -#endif
6796 + kmem_cache_destroy(ext4_free_ext_cachep);
6797 }
6798
6799
6800 @@ -2889,7 +3012,7 @@ void exit_ext4_mballoc(void)
6801 */
6802 static noinline_for_stack int
6803 ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
6804 - handle_t *handle)
6805 + handle_t *handle, unsigned int reserv_blks)
6806 {
6807 struct buffer_head *bitmap_bh = NULL;
6808 struct ext4_super_block *es;
6809 @@ -2922,7 +3045,7 @@ ext4_mb_mark_diskspace_used(struct ext4_
6810 if (!gdp)
6811 goto out_err;
6812
6813 - ext4_debug("using block group %lu(%d)\n", ac->ac_b_ex.fe_group,
6814 + ext4_debug("using block group %u(%d)\n", ac->ac_b_ex.fe_group,
6815 gdp->bg_free_blocks_count);
6816
6817 err = ext4_journal_get_write_access(handle, gdp_bh);
6818 @@ -2941,8 +3064,8 @@ ext4_mb_mark_diskspace_used(struct ext4_
6819 in_range(block + len - 1, ext4_inode_table(sb, gdp),
6820 EXT4_SB(sb)->s_itb_per_group)) {
6821 ext4_error(sb, __func__,
6822 - "Allocating block in system zone - block = %llu",
6823 - block);
6824 + "Allocating block %llu in system zone of %d group\n",
6825 + block, ac->ac_b_ex.fe_group);
6826 /* File system mounted not to panic on error
6827 * Fix the bitmap and repeat the block allocation
6828 * We leak some of the blocks here.
6829 @@ -2964,29 +3087,29 @@ ext4_mb_mark_diskspace_used(struct ext4_
6830 }
6831 }
6832 #endif
6833 - mb_set_bits(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group), bitmap_bh->b_data,
6834 - ac->ac_b_ex.fe_start, ac->ac_b_ex.fe_len);
6835 -
6836 spin_lock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group));
6837 + mb_set_bits(NULL, bitmap_bh->b_data,
6838 + ac->ac_b_ex.fe_start, ac->ac_b_ex.fe_len);
6839 if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
6840 gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
6841 - gdp->bg_free_blocks_count =
6842 - cpu_to_le16(ext4_free_blocks_after_init(sb,
6843 - ac->ac_b_ex.fe_group,
6844 - gdp));
6845 + ext4_free_blks_set(sb, gdp,
6846 + ext4_free_blocks_after_init(sb,
6847 + ac->ac_b_ex.fe_group, gdp));
6848 }
6849 - le16_add_cpu(&gdp->bg_free_blocks_count, -ac->ac_b_ex.fe_len);
6850 + len = ext4_free_blks_count(sb, gdp) - ac->ac_b_ex.fe_len;
6851 + ext4_free_blks_set(sb, gdp, len);
6852 gdp->bg_checksum = ext4_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp);
6853 spin_unlock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group));
6854 -
6855 + percpu_counter_sub(&sbi->s_freeblocks_counter, ac->ac_b_ex.fe_len);
6856 /*
6857 - * free blocks account has already be reduced/reserved
6858 - * at write_begin() time for delayed allocation
6859 - * do not double accounting
6860 + * Now reduce the dirty block count also. Should not go negative
6861 */
6862 if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED))
6863 - percpu_counter_sub(&sbi->s_freeblocks_counter,
6864 - ac->ac_b_ex.fe_len);
6865 + /* release all the reserved blocks if non delalloc */
6866 + percpu_counter_sub(&sbi->s_dirtyblocks_counter, reserv_blks);
6867 + else
6868 + percpu_counter_sub(&sbi->s_dirtyblocks_counter,
6869 + ac->ac_b_ex.fe_len);
6870
6871 if (sbi->s_log_groups_per_flex) {
6872 ext4_group_t flex_group = ext4_flex_group(sbi,
6873 @@ -3128,7 +3251,7 @@ ext4_mb_normalize_request(struct ext4_al
6874 /* check we don't cross already preallocated blocks */
6875 rcu_read_lock();
6876 list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
6877 - unsigned long pa_end;
6878 + ext4_lblk_t pa_end;
6879
6880 if (pa->pa_deleted)
6881 continue;
6882 @@ -3172,7 +3295,7 @@ ext4_mb_normalize_request(struct ext4_al
6883 /* XXX: extra loop to check we really don't overlap preallocations */
6884 rcu_read_lock();
6885 list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
6886 - unsigned long pa_end;
6887 + ext4_lblk_t pa_end;
6888 spin_lock(&pa->pa_lock);
6889 if (pa->pa_deleted == 0) {
6890 pa_end = pa->pa_lstart + pa->pa_len;
6891 @@ -3404,6 +3527,32 @@ ext4_mb_use_preallocated(struct ext4_all
6892 }
6893
6894 /*
6895 + * the function goes through all block freed in the group
6896 + * but not yet committed and marks them used in in-core bitmap.
6897 + * buddy must be generated from this bitmap
6898 + * Need to be called with ext4 group lock (ext4_lock_group)
6899 + */
6900 +static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
6901 + ext4_group_t group)
6902 +{
6903 + struct rb_node *n;
6904 + struct ext4_group_info *grp;
6905 + struct ext4_free_data *entry;
6906 +
6907 + grp = ext4_get_group_info(sb, group);
6908 + n = rb_first(&(grp->bb_free_root));
6909 +
6910 + while (n) {
6911 + entry = rb_entry(n, struct ext4_free_data, node);
6912 + mb_set_bits(sb_bgl_lock(EXT4_SB(sb), group),
6913 + bitmap, entry->start_blk,
6914 + entry->count);
6915 + n = rb_next(n);
6916 + }
6917 + return;
6918 +}
6919 +
6920 +/*
6921 * the function goes through all preallocation in this group and marks them
6922 * used in in-core bitmap. buddy must be generated from this bitmap
6923 * Need to be called with ext4 group lock (ext4_lock_group)
6924 @@ -3443,7 +3592,7 @@ static void ext4_mb_generate_from_pa(str
6925 preallocated += len;
6926 count++;
6927 }
6928 - mb_debug("prellocated %u for group %lu\n", preallocated, group);
6929 + mb_debug("prellocated %u for group %u\n", preallocated, group);
6930 }
6931
6932 static void ext4_mb_pa_callback(struct rcu_head *head)
6933 @@ -3460,7 +3609,7 @@ static void ext4_mb_pa_callback(struct r
6934 static void ext4_mb_put_pa(struct ext4_allocation_context *ac,
6935 struct super_block *sb, struct ext4_prealloc_space *pa)
6936 {
6937 - unsigned long grp;
6938 + ext4_group_t grp;
6939
6940 if (!atomic_dec_and_test(&pa->pa_count) || pa->pa_free != 0)
6941 return;
6942 @@ -3676,8 +3825,8 @@ ext4_mb_release_inode_pa(struct ext4_bud
6943 {
6944 struct super_block *sb = e4b->bd_sb;
6945 struct ext4_sb_info *sbi = EXT4_SB(sb);
6946 - unsigned long end;
6947 - unsigned long next;
6948 + unsigned int end;
6949 + unsigned int next;
6950 ext4_group_t group;
6951 ext4_grpblk_t bit;
6952 sector_t start;
6953 @@ -3723,8 +3872,9 @@ ext4_mb_release_inode_pa(struct ext4_bud
6954 pa, (unsigned long) pa->pa_lstart,
6955 (unsigned long) pa->pa_pstart,
6956 (unsigned long) pa->pa_len);
6957 - ext4_error(sb, __func__, "free %u, pa_free %u\n",
6958 - free, pa->pa_free);
6959 + ext4_grp_locked_error(sb, group,
6960 + __func__, "free %u, pa_free %u\n",
6961 + free, pa->pa_free);
6962 /*
6963 * pa is already deleted so we use the value obtained
6964 * from the bitmap and continue.
6965 @@ -3789,7 +3939,7 @@ ext4_mb_discard_group_preallocations(str
6966 int busy = 0;
6967 int free = 0;
6968
6969 - mb_debug("discard preallocation for group %lu\n", group);
6970 + mb_debug("discard preallocation for group %u\n", group);
6971
6972 if (list_empty(&grp->bb_prealloc_list))
6973 return 0;
6974 @@ -3797,14 +3947,15 @@ ext4_mb_discard_group_preallocations(str
6975 bitmap_bh = ext4_read_block_bitmap(sb, group);
6976 if (bitmap_bh == NULL) {
6977 ext4_error(sb, __func__, "Error in reading block "
6978 - "bitmap for %lu\n", group);
6979 + "bitmap for %u\n", group);
6980 return 0;
6981 }
6982
6983 err = ext4_mb_load_buddy(sb, group, &e4b);
6984 if (err) {
6985 + __release(e4b->alloc_semp);
6986 ext4_error(sb, __func__, "Error in loading buddy "
6987 - "information for %lu\n", group);
6988 + "information for %u\n", group);
6989 put_bh(bitmap_bh);
6990 return 0;
6991 }
6992 @@ -3894,7 +4045,7 @@ out:
6993 *
6994 * FIXME!! Make sure it is valid at all the call sites
6995 */
6996 -void ext4_mb_discard_inode_preallocations(struct inode *inode)
6997 +void ext4_discard_preallocations(struct inode *inode)
6998 {
6999 struct ext4_inode_info *ei = EXT4_I(inode);
7000 struct super_block *sb = inode->i_sb;
7001 @@ -3906,7 +4057,7 @@ void ext4_mb_discard_inode_preallocation
7002 struct ext4_buddy e4b;
7003 int err;
7004
7005 - if (!test_opt(sb, MBALLOC) || !S_ISREG(inode->i_mode)) {
7006 + if (!S_ISREG(inode->i_mode)) {
7007 /*BUG_ON(!list_empty(&ei->i_prealloc_list));*/
7008 return;
7009 }
7010 @@ -3970,15 +4121,16 @@ repeat:
7011
7012 err = ext4_mb_load_buddy(sb, group, &e4b);
7013 if (err) {
7014 + __release(e4b->alloc_semp);
7015 ext4_error(sb, __func__, "Error in loading buddy "
7016 - "information for %lu\n", group);
7017 + "information for %u\n", group);
7018 continue;
7019 }
7020
7021 bitmap_bh = ext4_read_block_bitmap(sb, group);
7022 if (bitmap_bh == NULL) {
7023 ext4_error(sb, __func__, "Error in reading block "
7024 - "bitmap for %lu\n", group);
7025 + "bitmap for %u\n", group);
7026 ext4_mb_release_desc(&e4b);
7027 continue;
7028 }
7029 @@ -4104,8 +4256,7 @@ static void ext4_mb_group_or_file(struct
7030 * per cpu locality group is to reduce the contention between block
7031 * request from multiple CPUs.
7032 */
7033 - ac->ac_lg = &sbi->s_locality_groups[get_cpu()];
7034 - put_cpu();
7035 + ac->ac_lg = per_cpu_ptr(sbi->s_locality_groups, raw_smp_processor_id());
7036
7037 /* we're going to use group allocation */
7038 ac->ac_flags |= EXT4_MB_HINT_GROUP_ALLOC;
7039 @@ -4122,8 +4273,8 @@ ext4_mb_initialize_context(struct ext4_a
7040 struct ext4_sb_info *sbi = EXT4_SB(sb);
7041 struct ext4_super_block *es = sbi->s_es;
7042 ext4_group_t group;
7043 - unsigned long len;
7044 - unsigned long goal;
7045 + unsigned int len;
7046 + ext4_fsblk_t goal;
7047 ext4_grpblk_t block;
7048
7049 /* we can't allocate > group size */
7050 @@ -4166,6 +4317,7 @@ ext4_mb_initialize_context(struct ext4_a
7051 ac->ac_pa = NULL;
7052 ac->ac_bitmap_page = NULL;
7053 ac->ac_buddy_page = NULL;
7054 + ac->alloc_semp = NULL;
7055 ac->ac_lg = NULL;
7056
7057 /* we have to define context: we'll we work with a file or
7058 @@ -4243,8 +4395,9 @@ ext4_mb_discard_lg_preallocations(struct
7059
7060 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, NULL);
7061 if (ext4_mb_load_buddy(sb, group, &e4b)) {
7062 + __release(e4b->alloc_semp);
7063 ext4_error(sb, __func__, "Error in loading buddy "
7064 - "information for %lu\n", group);
7065 + "information for %u\n", group);
7066 continue;
7067 }
7068 ext4_lock_group(sb, group);
7069 @@ -4346,6 +4499,8 @@ static int ext4_mb_release_context(struc
7070 }
7071 ext4_mb_put_pa(ac, ac->ac_sb, pa);
7072 }
7073 + if (ac->alloc_semp)
7074 + up_read(ac->alloc_semp);
7075 if (ac->ac_bitmap_page)
7076 page_cache_release(ac->ac_bitmap_page);
7077 if (ac->ac_buddy_page)
7078 @@ -4379,40 +4534,39 @@ static int ext4_mb_discard_preallocation
7079 ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
7080 struct ext4_allocation_request *ar, int *errp)
7081 {
7082 + int freed;
7083 struct ext4_allocation_context *ac = NULL;
7084 struct ext4_sb_info *sbi;
7085 struct super_block *sb;
7086 ext4_fsblk_t block = 0;
7087 - int freed;
7088 - int inquota;
7089 + unsigned int inquota;
7090 + unsigned int reserv_blks = 0;
7091
7092 sb = ar->inode->i_sb;
7093 sbi = EXT4_SB(sb);
7094
7095 - if (!test_opt(sb, MBALLOC)) {
7096 - block = ext4_old_new_blocks(handle, ar->inode, ar->goal,
7097 - &(ar->len), errp);
7098 - return block;
7099 - }
7100 if (!EXT4_I(ar->inode)->i_delalloc_reserved_flag) {
7101 /*
7102 * With delalloc we already reserved the blocks
7103 */
7104 - ar->len = ext4_has_free_blocks(sbi, ar->len);
7105 - }
7106 -
7107 - if (ar->len == 0) {
7108 - *errp = -ENOSPC;
7109 - return 0;
7110 + while (ar->len && ext4_claim_free_blocks(sbi, ar->len)) {
7111 + /* let others to free the space */
7112 + yield();
7113 + ar->len = ar->len >> 1;
7114 + }
7115 + if (!ar->len) {
7116 + *errp = -ENOSPC;
7117 + return 0;
7118 + }
7119 + reserv_blks = ar->len;
7120 }
7121 -
7122 while (ar->len && DQUOT_ALLOC_BLOCK(ar->inode, ar->len)) {
7123 ar->flags |= EXT4_MB_HINT_NOPREALLOC;
7124 ar->len--;
7125 }
7126 if (ar->len == 0) {
7127 *errp = -EDQUOT;
7128 - return 0;
7129 + goto out3;
7130 }
7131 inquota = ar->len;
7132
7133 @@ -4449,10 +4603,14 @@ repeat:
7134 ac->ac_o_ex.fe_len < ac->ac_b_ex.fe_len)
7135 ext4_mb_new_preallocation(ac);
7136 }
7137 -
7138 if (likely(ac->ac_status == AC_STATUS_FOUND)) {
7139 - *errp = ext4_mb_mark_diskspace_used(ac, handle);
7140 + *errp = ext4_mb_mark_diskspace_used(ac, handle, reserv_blks);
7141 if (*errp == -EAGAIN) {
7142 + /*
7143 + * drop the reference that we took
7144 + * in ext4_mb_use_best_found
7145 + */
7146 + ext4_mb_release_context(ac);
7147 ac->ac_b_ex.fe_group = 0;
7148 ac->ac_b_ex.fe_start = 0;
7149 ac->ac_b_ex.fe_len = 0;
7150 @@ -4483,6 +4641,13 @@ out2:
7151 out1:
7152 if (ar->len < inquota)
7153 DQUOT_FREE_BLOCK(ar->inode, inquota - ar->len);
7154 +out3:
7155 + if (!ar->len) {
7156 + if (!EXT4_I(ar->inode)->i_delalloc_reserved_flag)
7157 + /* release all the reserved blocks if non delalloc */
7158 + percpu_counter_sub(&sbi->s_dirtyblocks_counter,
7159 + reserv_blks);
7160 + }
7161
7162 return block;
7163 }
7164 @@ -4517,65 +4682,97 @@ static void ext4_mb_poll_new_transaction
7165 ext4_mb_free_committed_blocks(sb);
7166 }
7167
7168 +/*
7169 + * We can merge two free data extents only if the physical blocks
7170 + * are contiguous, AND the extents were freed by the same transaction,
7171 + * AND the blocks are associated with the same group.
7172 + */
7173 +static int can_merge(struct ext4_free_data *entry1,
7174 + struct ext4_free_data *entry2)
7175 +{
7176 + if ((entry1->t_tid == entry2->t_tid) &&
7177 + (entry1->group == entry2->group) &&
7178 + ((entry1->start_blk + entry1->count) == entry2->start_blk))
7179 + return 1;
7180 + return 0;
7181 +}
7182 +
7183 static noinline_for_stack int
7184 ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
7185 - ext4_group_t group, ext4_grpblk_t block, int count)
7186 + struct ext4_free_data *new_entry)
7187 {
7188 + ext4_grpblk_t block;
7189 + struct ext4_free_data *entry;
7190 struct ext4_group_info *db = e4b->bd_info;
7191 struct super_block *sb = e4b->bd_sb;
7192 struct ext4_sb_info *sbi = EXT4_SB(sb);
7193 - struct ext4_free_metadata *md;
7194 - int i;
7195 + struct rb_node **n = &db->bb_free_root.rb_node, *node;
7196 + struct rb_node *parent = NULL, *new_node;
7197
7198 BUG_ON(e4b->bd_bitmap_page == NULL);
7199 BUG_ON(e4b->bd_buddy_page == NULL);
7200
7201 - ext4_lock_group(sb, group);
7202 - for (i = 0; i < count; i++) {
7203 - md = db->bb_md_cur;
7204 - if (md && db->bb_tid != handle->h_transaction->t_tid) {
7205 - db->bb_md_cur = NULL;
7206 - md = NULL;
7207 + new_node = &new_entry->node;
7208 + block = new_entry->start_blk;
7209 +
7210 + if (!*n) {
7211 + /* first free block exent. We need to
7212 + protect buddy cache from being freed,
7213 + * otherwise we'll refresh it from
7214 + * on-disk bitmap and lose not-yet-available
7215 + * blocks */
7216 + page_cache_get(e4b->bd_buddy_page);
7217 + page_cache_get(e4b->bd_bitmap_page);
7218 + }
7219 + while (*n) {
7220 + parent = *n;
7221 + entry = rb_entry(parent, struct ext4_free_data, node);
7222 + if (block < entry->start_blk)
7223 + n = &(*n)->rb_left;
7224 + else if (block >= (entry->start_blk + entry->count))
7225 + n = &(*n)->rb_right;
7226 + else {
7227 + ext4_grp_locked_error(sb, e4b->bd_group, __func__,
7228 + "Double free of blocks %d (%d %d)\n",
7229 + block, entry->start_blk, entry->count);
7230 + return 0;
7231 }
7232 + }
7233
7234 - if (md == NULL) {
7235 - ext4_unlock_group(sb, group);
7236 - md = kmalloc(sizeof(*md), GFP_NOFS);
7237 - if (md == NULL)
7238 - return -ENOMEM;
7239 - md->num = 0;
7240 - md->group = group;
7241 + rb_link_node(new_node, parent, n);
7242 + rb_insert_color(new_node, &db->bb_free_root);
7243
7244 - ext4_lock_group(sb, group);
7245 - if (db->bb_md_cur == NULL) {
7246 - spin_lock(&sbi->s_md_lock);
7247 - list_add(&md->list, &sbi->s_active_transaction);
7248 - spin_unlock(&sbi->s_md_lock);
7249 - /* protect buddy cache from being freed,
7250 - * otherwise we'll refresh it from
7251 - * on-disk bitmap and lose not-yet-available
7252 - * blocks */
7253 - page_cache_get(e4b->bd_buddy_page);
7254 - page_cache_get(e4b->bd_bitmap_page);
7255 - db->bb_md_cur = md;
7256 - db->bb_tid = handle->h_transaction->t_tid;
7257 - mb_debug("new md 0x%p for group %lu\n",
7258 - md, md->group);
7259 - } else {
7260 - kfree(md);
7261 - md = db->bb_md_cur;
7262 - }
7263 + /* Now try to see the extent can be merged to left and right */
7264 + node = rb_prev(new_node);
7265 + if (node) {
7266 + entry = rb_entry(node, struct ext4_free_data, node);
7267 + if (can_merge(entry, new_entry)) {
7268 + new_entry->start_blk = entry->start_blk;
7269 + new_entry->count += entry->count;
7270 + rb_erase(node, &(db->bb_free_root));
7271 + spin_lock(&sbi->s_md_lock);
7272 + list_del(&entry->list);
7273 + spin_unlock(&sbi->s_md_lock);
7274 + kmem_cache_free(ext4_free_ext_cachep, entry);
7275 }
7276 + }
7277
7278 - BUG_ON(md->num >= EXT4_BB_MAX_BLOCKS);
7279 - md->blocks[md->num] = block + i;
7280 - md->num++;
7281 - if (md->num == EXT4_BB_MAX_BLOCKS) {
7282 - /* no more space, put full container on a sb's list */
7283 - db->bb_md_cur = NULL;
7284 + node = rb_next(new_node);
7285 + if (node) {
7286 + entry = rb_entry(node, struct ext4_free_data, node);
7287 + if (can_merge(new_entry, entry)) {
7288 + new_entry->count += entry->count;
7289 + rb_erase(node, &(db->bb_free_root));
7290 + spin_lock(&sbi->s_md_lock);
7291 + list_del(&entry->list);
7292 + spin_unlock(&sbi->s_md_lock);
7293 + kmem_cache_free(ext4_free_ext_cachep, entry);
7294 }
7295 }
7296 - ext4_unlock_group(sb, group);
7297 + /* Add the extent to active_transaction list */
7298 + spin_lock(&sbi->s_md_lock);
7299 + list_add(&new_entry->list, &sbi->s_active_transaction);
7300 + spin_unlock(&sbi->s_md_lock);
7301 return 0;
7302 }
7303
7304 @@ -4591,7 +4788,7 @@ void ext4_mb_free_blocks(handle_t *handl
7305 struct ext4_allocation_context *ac = NULL;
7306 struct ext4_group_desc *gdp;
7307 struct ext4_super_block *es;
7308 - unsigned long overflow;
7309 + unsigned int overflow;
7310 ext4_grpblk_t bit;
7311 struct buffer_head *gd_bh;
7312 ext4_group_t block_group;
7313 @@ -4675,11 +4872,6 @@ do_more:
7314 err = ext4_journal_get_write_access(handle, gd_bh);
7315 if (err)
7316 goto error_return;
7317 -
7318 - err = ext4_mb_load_buddy(sb, block_group, &e4b);
7319 - if (err)
7320 - goto error_return;
7321 -
7322 #ifdef AGGRESSIVE_CHECK
7323 {
7324 int i;
7325 @@ -4687,13 +4879,6 @@ do_more:
7326 BUG_ON(!mb_test_bit(bit + i, bitmap_bh->b_data));
7327 }
7328 #endif
7329 - mb_clear_bits(sb_bgl_lock(sbi, block_group), bitmap_bh->b_data,
7330 - bit, count);
7331 -
7332 - /* We dirtied the bitmap block */
7333 - BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
7334 - err = ext4_journal_dirty_metadata(handle, bitmap_bh);
7335 -
7336 if (ac) {
7337 ac->ac_b_ex.fe_group = block_group;
7338 ac->ac_b_ex.fe_start = bit;
7339 @@ -4701,19 +4886,43 @@ do_more:
7340 ext4_mb_store_history(ac);
7341 }
7342
7343 + err = ext4_mb_load_buddy(sb, block_group, &e4b);
7344 + if (err) {
7345 + __release(e4b->alloc_semp);
7346 + goto error_return;
7347 + }
7348 if (metadata) {
7349 - /* blocks being freed are metadata. these blocks shouldn't
7350 - * be used until this transaction is committed */
7351 - ext4_mb_free_metadata(handle, &e4b, block_group, bit, count);
7352 + struct ext4_free_data *new_entry;
7353 + /*
7354 + * blocks being freed are metadata. these blocks shouldn't
7355 + * be used until this transaction is committed
7356 + */
7357 + new_entry = kmem_cache_alloc(ext4_free_ext_cachep, GFP_NOFS);
7358 + new_entry->start_blk = bit;
7359 + new_entry->group = block_group;
7360 + new_entry->count = count;
7361 + new_entry->t_tid = handle->h_transaction->t_tid;
7362 + ext4_lock_group(sb, block_group);
7363 + mb_clear_bits(sb_bgl_lock(sbi, block_group), bitmap_bh->b_data,
7364 + bit, count);
7365 + ext4_mb_free_metadata(handle, &e4b, new_entry);
7366 + ext4_unlock_group(sb, block_group);
7367 } else {
7368 ext4_lock_group(sb, block_group);
7369 + /* need to update group_info->bb_free and bitmap
7370 + * with group lock held. generate_buddy look at
7371 + * them with group lock_held
7372 + */
7373 + mb_clear_bits(sb_bgl_lock(sbi, block_group), bitmap_bh->b_data,
7374 + bit, count);
7375 mb_free_blocks(inode, &e4b, bit, count);
7376 ext4_mb_return_to_preallocation(inode, &e4b, block, count);
7377 ext4_unlock_group(sb, block_group);
7378 }
7379
7380 spin_lock(sb_bgl_lock(sbi, block_group));
7381 - le16_add_cpu(&gdp->bg_free_blocks_count, count);
7382 + ret = ext4_free_blks_count(sb, gdp) + count;
7383 + ext4_free_blks_set(sb, gdp, ret);
7384 gdp->bg_checksum = ext4_group_desc_csum(sbi, block_group, gdp);
7385 spin_unlock(sb_bgl_lock(sbi, block_group));
7386 percpu_counter_add(&sbi->s_freeblocks_counter, count);
7387 @@ -4729,6 +4938,10 @@ do_more:
7388
7389 *freed += count;
7390
7391 + /* We dirtied the bitmap block */
7392 + BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
7393 + err = ext4_journal_dirty_metadata(handle, bitmap_bh);
7394 +
7395 /* And the group descriptor block */
7396 BUFFER_TRACE(gd_bh, "dirtied group descriptor block");
7397 ret = ext4_journal_dirty_metadata(handle, gd_bh);
7398 diff -rup b/fs/ext4//mballoc.h a/fs/ext4///mballoc.h
7399 --- b/fs/ext4/mballoc.h 2009-02-11 14:37:58.000000000 +0100
7400 +++ a/fs/ext4/mballoc.h 2009-02-10 21:40:14.000000000 +0100
7401 @@ -18,6 +18,9 @@
7402 #include <linux/pagemap.h>
7403 #include <linux/seq_file.h>
7404 #include <linux/version.h>
7405 +#include <linux/blkdev.h>
7406 +#include <linux/marker.h>
7407 +#include <linux/mutex.h>
7408 #include "ext4_jbd2.h"
7409 #include "ext4.h"
7410 #include "group.h"
7411 @@ -96,41 +99,24 @@
7412 */
7413 #define MB_DEFAULT_GROUP_PREALLOC 512
7414
7415 -static struct kmem_cache *ext4_pspace_cachep;
7416 -static struct kmem_cache *ext4_ac_cachep;
7417
7418 -#ifdef EXT4_BB_MAX_BLOCKS
7419 -#undef EXT4_BB_MAX_BLOCKS
7420 -#endif
7421 -#define EXT4_BB_MAX_BLOCKS 30
7422 +struct ext4_free_data {
7423 + /* this links the free block information from group_info */
7424 + struct rb_node node;
7425
7426 -struct ext4_free_metadata {
7427 - ext4_group_t group;
7428 - unsigned short num;
7429 - ext4_grpblk_t blocks[EXT4_BB_MAX_BLOCKS];
7430 + /* this links the free block information from ext4_sb_info */
7431 struct list_head list;
7432 -};
7433 -
7434 -struct ext4_group_info {
7435 - unsigned long bb_state;
7436 - unsigned long bb_tid;
7437 - struct ext4_free_metadata *bb_md_cur;
7438 - unsigned short bb_first_free;
7439 - unsigned short bb_free;
7440 - unsigned short bb_fragments;
7441 - struct list_head bb_prealloc_list;
7442 -#ifdef DOUBLE_CHECK
7443 - void *bb_bitmap;
7444 -#endif
7445 - unsigned short bb_counters[];
7446 -};
7447
7448 -#define EXT4_GROUP_INFO_NEED_INIT_BIT 0
7449 -#define EXT4_GROUP_INFO_LOCKED_BIT 1
7450 + /* group which free block extent belongs */
7451 + ext4_group_t group;
7452
7453 -#define EXT4_MB_GRP_NEED_INIT(grp) \
7454 - (test_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &((grp)->bb_state)))
7455 + /* free block extent */
7456 + ext4_grpblk_t start_blk;
7457 + ext4_grpblk_t count;
7458
7459 + /* transaction which freed this extent */
7460 + tid_t t_tid;
7461 +};
7462
7463 struct ext4_prealloc_space {
7464 struct list_head pa_inode_list;
7465 @@ -209,6 +195,11 @@ struct ext4_allocation_context {
7466 __u8 ac_op; /* operation, for history only */
7467 struct page *ac_bitmap_page;
7468 struct page *ac_buddy_page;
7469 + /*
7470 + * pointer to the held semaphore upon successful
7471 + * block allocation
7472 + */
7473 + struct rw_semaphore *alloc_semp;
7474 struct ext4_prealloc_space *ac_pa;
7475 struct ext4_locality_group *ac_lg;
7476 };
7477 @@ -242,6 +233,7 @@ struct ext4_buddy {
7478 struct super_block *bd_sb;
7479 __u16 bd_blkbits;
7480 ext4_group_t bd_group;
7481 + struct rw_semaphore *alloc_semp;
7482 };
7483 #define EXT4_MB_BITMAP(e4b) ((e4b)->bd_bitmap)
7484 #define EXT4_MB_BUDDY(e4b) ((e4b)->bd_buddy)
7485 @@ -251,53 +243,12 @@ static inline void ext4_mb_store_history
7486 {
7487 return;
7488 }
7489 -#else
7490 -static void ext4_mb_store_history(struct ext4_allocation_context *ac);
7491 #endif
7492
7493 #define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1)
7494
7495 -static struct proc_dir_entry *proc_root_ext4;
7496 struct buffer_head *read_block_bitmap(struct super_block *, ext4_group_t);
7497 -
7498 -static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
7499 - ext4_group_t group);
7500 -static void ext4_mb_poll_new_transaction(struct super_block *, handle_t *);
7501 -static void ext4_mb_free_committed_blocks(struct super_block *);
7502 -static void ext4_mb_return_to_preallocation(struct inode *inode,
7503 - struct ext4_buddy *e4b, sector_t block,
7504 - int count);
7505 -static void ext4_mb_put_pa(struct ext4_allocation_context *,
7506 - struct super_block *, struct ext4_prealloc_space *pa);
7507 -static int ext4_mb_init_per_dev_proc(struct super_block *sb);
7508 -static int ext4_mb_destroy_per_dev_proc(struct super_block *sb);
7509 -
7510 -
7511 -static inline void ext4_lock_group(struct super_block *sb, ext4_group_t group)
7512 -{
7513 - struct ext4_group_info *grinfo = ext4_get_group_info(sb, group);
7514 -
7515 - bit_spin_lock(EXT4_GROUP_INFO_LOCKED_BIT, &(grinfo->bb_state));
7516 -}
7517 -
7518 -static inline void ext4_unlock_group(struct super_block *sb,
7519 - ext4_group_t group)
7520 -{
7521 - struct ext4_group_info *grinfo = ext4_get_group_info(sb, group);
7522 -
7523 - bit_spin_unlock(EXT4_GROUP_INFO_LOCKED_BIT, &(grinfo->bb_state));
7524 -}
7525 -
7526 -static inline int ext4_is_group_locked(struct super_block *sb,
7527 - ext4_group_t group)
7528 -{
7529 - struct ext4_group_info *grinfo = ext4_get_group_info(sb, group);
7530 -
7531 - return bit_spin_is_locked(EXT4_GROUP_INFO_LOCKED_BIT,
7532 - &(grinfo->bb_state));
7533 -}
7534 -
7535 -static ext4_fsblk_t ext4_grp_offs_to_block(struct super_block *sb,
7536 +static inline ext4_fsblk_t ext4_grp_offs_to_block(struct super_block *sb,
7537 struct ext4_free_extent *fex)
7538 {
7539 ext4_fsblk_t block;
7540 diff -rup b/fs/ext4//namei.c a/fs/ext4///namei.c
7541 --- b/fs/ext4/namei.c 2009-02-11 14:37:58.000000000 +0100
7542 +++ a/fs/ext4/namei.c 2009-02-10 21:40:11.000000000 +0100
7543 @@ -151,34 +151,36 @@ struct dx_map_entry
7544
7545 static inline ext4_lblk_t dx_get_block(struct dx_entry *entry);
7546 static void dx_set_block(struct dx_entry *entry, ext4_lblk_t value);
7547 -static inline unsigned dx_get_hash (struct dx_entry *entry);
7548 -static void dx_set_hash (struct dx_entry *entry, unsigned value);
7549 -static unsigned dx_get_count (struct dx_entry *entries);
7550 -static unsigned dx_get_limit (struct dx_entry *entries);
7551 -static void dx_set_count (struct dx_entry *entries, unsigned value);
7552 -static void dx_set_limit (struct dx_entry *entries, unsigned value);
7553 -static unsigned dx_root_limit (struct inode *dir, unsigned infosize);
7554 -static unsigned dx_node_limit (struct inode *dir);
7555 -static struct dx_frame *dx_probe(struct dentry *dentry,
7556 +static inline unsigned dx_get_hash(struct dx_entry *entry);
7557 +static void dx_set_hash(struct dx_entry *entry, unsigned value);
7558 +static unsigned dx_get_count(struct dx_entry *entries);
7559 +static unsigned dx_get_limit(struct dx_entry *entries);
7560 +static void dx_set_count(struct dx_entry *entries, unsigned value);
7561 +static void dx_set_limit(struct dx_entry *entries, unsigned value);
7562 +static unsigned dx_root_limit(struct inode *dir, unsigned infosize);
7563 +static unsigned dx_node_limit(struct inode *dir);
7564 +static struct dx_frame *dx_probe(const struct qstr *d_name,
7565 struct inode *dir,
7566 struct dx_hash_info *hinfo,
7567 struct dx_frame *frame,
7568 int *err);
7569 -static void dx_release (struct dx_frame *frames);
7570 -static int dx_make_map (struct ext4_dir_entry_2 *de, int size,
7571 - struct dx_hash_info *hinfo, struct dx_map_entry map[]);
7572 +static void dx_release(struct dx_frame *frames);
7573 +static int dx_make_map(struct ext4_dir_entry_2 *de, int size,
7574 + struct dx_hash_info *hinfo, struct dx_map_entry map[]);
7575 static void dx_sort_map(struct dx_map_entry *map, unsigned count);
7576 -static struct ext4_dir_entry_2 *dx_move_dirents (char *from, char *to,
7577 +static struct ext4_dir_entry_2 *dx_move_dirents(char *from, char *to,
7578 struct dx_map_entry *offsets, int count);
7579 -static struct ext4_dir_entry_2* dx_pack_dirents (char *base, int size);
7580 +static struct ext4_dir_entry_2* dx_pack_dirents(char *base, int size);
7581 static void dx_insert_block(struct dx_frame *frame,
7582 u32 hash, ext4_lblk_t block);
7583 static int ext4_htree_next_block(struct inode *dir, __u32 hash,
7584 struct dx_frame *frame,
7585 struct dx_frame *frames,
7586 __u32 *start_hash);
7587 -static struct buffer_head * ext4_dx_find_entry(struct dentry *dentry,
7588 - struct ext4_dir_entry_2 **res_dir, int *err);
7589 +static struct buffer_head * ext4_dx_find_entry(struct inode *dir,
7590 + const struct qstr *d_name,
7591 + struct ext4_dir_entry_2 **res_dir,
7592 + int *err);
7593 static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
7594 struct inode *inode);
7595
7596 @@ -207,44 +209,44 @@ static inline void dx_set_block(struct d
7597 entry->block = cpu_to_le32(value);
7598 }
7599
7600 -static inline unsigned dx_get_hash (struct dx_entry *entry)
7601 +static inline unsigned dx_get_hash(struct dx_entry *entry)
7602 {
7603 return le32_to_cpu(entry->hash);
7604 }
7605
7606 -static inline void dx_set_hash (struct dx_entry *entry, unsigned value)
7607 +static inline void dx_set_hash(struct dx_entry *entry, unsigned value)
7608 {
7609 entry->hash = cpu_to_le32(value);
7610 }
7611
7612 -static inline unsigned dx_get_count (struct dx_entry *entries)
7613 +static inline unsigned dx_get_count(struct dx_entry *entries)
7614 {
7615 return le16_to_cpu(((struct dx_countlimit *) entries)->count);
7616 }
7617
7618 -static inline unsigned dx_get_limit (struct dx_entry *entries)
7619 +static inline unsigned dx_get_limit(struct dx_entry *entries)
7620 {
7621 return le16_to_cpu(((struct dx_countlimit *) entries)->limit);
7622 }
7623
7624 -static inline void dx_set_count (struct dx_entry *entries, unsigned value)
7625 +static inline void dx_set_count(struct dx_entry *entries, unsigned value)
7626 {
7627 ((struct dx_countlimit *) entries)->count = cpu_to_le16(value);
7628 }
7629
7630 -static inline void dx_set_limit (struct dx_entry *entries, unsigned value)
7631 +static inline void dx_set_limit(struct dx_entry *entries, unsigned value)
7632 {
7633 ((struct dx_countlimit *) entries)->limit = cpu_to_le16(value);
7634 }
7635
7636 -static inline unsigned dx_root_limit (struct inode *dir, unsigned infosize)
7637 +static inline unsigned dx_root_limit(struct inode *dir, unsigned infosize)
7638 {
7639 unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(1) -
7640 EXT4_DIR_REC_LEN(2) - infosize;
7641 return entry_space / sizeof(struct dx_entry);
7642 }
7643
7644 -static inline unsigned dx_node_limit (struct inode *dir)
7645 +static inline unsigned dx_node_limit(struct inode *dir)
7646 {
7647 unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(0);
7648 return entry_space / sizeof(struct dx_entry);
7649 @@ -254,12 +256,12 @@ static inline unsigned dx_node_limit (st
7650 * Debug
7651 */
7652 #ifdef DX_DEBUG
7653 -static void dx_show_index (char * label, struct dx_entry *entries)
7654 +static void dx_show_index(char * label, struct dx_entry *entries)
7655 {
7656 int i, n = dx_get_count (entries);
7657 - printk("%s index ", label);
7658 + printk(KERN_DEBUG "%s index ", label);
7659 for (i = 0; i < n; i++) {
7660 - printk("%x->%lu ", i? dx_get_hash(entries + i) :
7661 + printk("%x->%lu ", i ? dx_get_hash(entries + i) :
7662 0, (unsigned long)dx_get_block(entries + i));
7663 }
7664 printk("\n");
7665 @@ -306,7 +308,7 @@ struct stats dx_show_entries(struct dx_h
7666 struct dx_entry *entries, int levels)
7667 {
7668 unsigned blocksize = dir->i_sb->s_blocksize;
7669 - unsigned count = dx_get_count (entries), names = 0, space = 0, i;
7670 + unsigned count = dx_get_count(entries), names = 0, space = 0, i;
7671 unsigned bcount = 0;
7672 struct buffer_head *bh;
7673 int err;
7674 @@ -325,11 +327,12 @@ struct stats dx_show_entries(struct dx_h
7675 names += stats.names;
7676 space += stats.space;
7677 bcount += stats.bcount;
7678 - brelse (bh);
7679 + brelse(bh);
7680 }
7681 if (bcount)
7682 - printk("%snames %u, fullness %u (%u%%)\n", levels?"":" ",
7683 - names, space/bcount,(space/bcount)*100/blocksize);
7684 + printk(KERN_DEBUG "%snames %u, fullness %u (%u%%)\n",
7685 + levels ? "" : " ", names, space/bcount,
7686 + (space/bcount)*100/blocksize);
7687 return (struct stats) { names, space, bcount};
7688 }
7689 #endif /* DX_DEBUG */
7690 @@ -344,7 +347,7 @@ struct stats dx_show_entries(struct dx_h
7691 * back to userspace.
7692 */
7693 static struct dx_frame *
7694 -dx_probe(struct dentry *dentry, struct inode *dir,
7695 +dx_probe(const struct qstr *d_name, struct inode *dir,
7696 struct dx_hash_info *hinfo, struct dx_frame *frame_in, int *err)
7697 {
7698 unsigned count, indirect;
7699 @@ -355,8 +358,6 @@ dx_probe(struct dentry *dentry, struct i
7700 u32 hash;
7701
7702 frame->bh = NULL;
7703 - if (dentry)
7704 - dir = dentry->d_parent->d_inode;
7705 if (!(bh = ext4_bread (NULL,dir, 0, 0, err)))
7706 goto fail;
7707 root = (struct dx_root *) bh->b_data;
7708 @@ -371,9 +372,11 @@ dx_probe(struct dentry *dentry, struct i
7709 goto fail;
7710 }
7711 hinfo->hash_version = root->info.hash_version;
7712 + if (hinfo->hash_version <= DX_HASH_TEA)
7713 + hinfo->hash_version += EXT4_SB(dir->i_sb)->s_hash_unsigned;
7714 hinfo->seed = EXT4_SB(dir->i_sb)->s_hash_seed;
7715 - if (dentry)
7716 - ext4fs_dirhash(dentry->d_name.name, dentry->d_name.len, hinfo);
7717 + if (d_name)
7718 + ext4fs_dirhash(d_name->name, d_name->len, hinfo);
7719 hash = hinfo->hash;
7720
7721 if (root->info.unused_flags & 1) {
7722 @@ -406,7 +409,7 @@ dx_probe(struct dentry *dentry, struct i
7723 goto fail;
7724 }
7725
7726 - dxtrace (printk("Look up %x", hash));
7727 + dxtrace(printk("Look up %x", hash));
7728 while (1)
7729 {
7730 count = dx_get_count(entries);
7731 @@ -555,7 +558,7 @@ static int ext4_htree_next_block(struct
7732 0, &err)))
7733 return err; /* Failure */
7734 p++;
7735 - brelse (p->bh);
7736 + brelse(p->bh);
7737 p->bh = bh;
7738 p->at = p->entries = ((struct dx_node *) bh->b_data)->entries;
7739 }
7740 @@ -593,7 +596,7 @@ static int htree_dirblock_to_tree(struct
7741 /* On error, skip the f_pos to the next block. */
7742 dir_file->f_pos = (dir_file->f_pos |
7743 (dir->i_sb->s_blocksize - 1)) + 1;
7744 - brelse (bh);
7745 + brelse(bh);
7746 return count;
7747 }
7748 ext4fs_dirhash(de->name, de->name_len, hinfo);
7749 @@ -635,11 +638,14 @@ int ext4_htree_fill_tree(struct file *di
7750 int ret, err;
7751 __u32 hashval;
7752
7753 - dxtrace(printk("In htree_fill_tree, start hash: %x:%x\n", start_hash,
7754 - start_minor_hash));
7755 + dxtrace(printk(KERN_DEBUG "In htree_fill_tree, start hash: %x:%x\n",
7756 + start_hash, start_minor_hash));
7757 dir = dir_file->f_path.dentry->d_inode;
7758 if (!(EXT4_I(dir)->i_flags & EXT4_INDEX_FL)) {
7759 hinfo.hash_version = EXT4_SB(dir->i_sb)->s_def_hash_version;
7760 + if (hinfo.hash_version <= DX_HASH_TEA)
7761 + hinfo.hash_version +=
7762 + EXT4_SB(dir->i_sb)->s_hash_unsigned;
7763 hinfo.seed = EXT4_SB(dir->i_sb)->s_hash_seed;
7764 count = htree_dirblock_to_tree(dir_file, dir, 0, &hinfo,
7765 start_hash, start_minor_hash);
7766 @@ -648,7 +654,7 @@ int ext4_htree_fill_tree(struct file *di
7767 }
7768 hinfo.hash = start_hash;
7769 hinfo.minor_hash = 0;
7770 - frame = dx_probe(NULL, dir_file->f_path.dentry->d_inode, &hinfo, frames, &err);
7771 + frame = dx_probe(NULL, dir, &hinfo, frames, &err);
7772 if (!frame)
7773 return err;
7774
7775 @@ -694,8 +700,8 @@ int ext4_htree_fill_tree(struct file *di
7776 break;
7777 }
7778 dx_release(frames);
7779 - dxtrace(printk("Fill tree: returned %d entries, next hash: %x\n",
7780 - count, *next_hash));
7781 + dxtrace(printk(KERN_DEBUG "Fill tree: returned %d entries, "
7782 + "next hash: %x\n", count, *next_hash));
7783 return count;
7784 errout:
7785 dx_release(frames);
7786 @@ -802,17 +808,17 @@ static inline int ext4_match (int len, c
7787 /*
7788 * Returns 0 if not found, -1 on failure, and 1 on success
7789 */
7790 -static inline int search_dirblock(struct buffer_head * bh,
7791 +static inline int search_dirblock(struct buffer_head *bh,
7792 struct inode *dir,
7793 - struct dentry *dentry,
7794 - unsigned long offset,
7795 + const struct qstr *d_name,
7796 + unsigned int offset,
7797 struct ext4_dir_entry_2 ** res_dir)
7798 {
7799 struct ext4_dir_entry_2 * de;
7800 char * dlimit;
7801 int de_len;
7802 - const char *name = dentry->d_name.name;
7803 - int namelen = dentry->d_name.len;
7804 + const char *name = d_name->name;
7805 + int namelen = d_name->len;
7806
7807 de = (struct ext4_dir_entry_2 *) bh->b_data;
7808 dlimit = bh->b_data + dir->i_sb->s_blocksize;
7809 @@ -851,12 +857,13 @@ static inline int search_dirblock(struct
7810 * The returned buffer_head has ->b_count elevated. The caller is expected
7811 * to brelse() it when appropriate.
7812 */
7813 -static struct buffer_head * ext4_find_entry (struct dentry *dentry,
7814 +static struct buffer_head * ext4_find_entry (struct inode *dir,
7815 + const struct qstr *d_name,
7816 struct ext4_dir_entry_2 ** res_dir)
7817 {
7818 - struct super_block * sb;
7819 - struct buffer_head * bh_use[NAMEI_RA_SIZE];
7820 - struct buffer_head * bh, *ret = NULL;
7821 + struct super_block *sb;
7822 + struct buffer_head *bh_use[NAMEI_RA_SIZE];
7823 + struct buffer_head *bh, *ret = NULL;
7824 ext4_lblk_t start, block, b;
7825 int ra_max = 0; /* Number of bh's in the readahead
7826 buffer, bh_use[] */
7827 @@ -865,16 +872,15 @@ static struct buffer_head * ext4_find_en
7828 int num = 0;
7829 ext4_lblk_t nblocks;
7830 int i, err;
7831 - struct inode *dir = dentry->d_parent->d_inode;
7832 int namelen;
7833
7834 *res_dir = NULL;
7835 sb = dir->i_sb;
7836 - namelen = dentry->d_name.len;
7837 + namelen = d_name->len;
7838 if (namelen > EXT4_NAME_LEN)
7839 return NULL;
7840 if (is_dx(dir)) {
7841 - bh = ext4_dx_find_entry(dentry, res_dir, &err);
7842 + bh = ext4_dx_find_entry(dir, d_name, res_dir, &err);
7843 /*
7844 * On success, or if the error was file not found,
7845 * return. Otherwise, fall back to doing a search the
7846 @@ -882,7 +888,8 @@ static struct buffer_head * ext4_find_en
7847 */
7848 if (bh || (err != ERR_BAD_DX_DIR))
7849 return bh;
7850 - dxtrace(printk("ext4_find_entry: dx failed, falling back\n"));
7851 + dxtrace(printk(KERN_DEBUG "ext4_find_entry: dx failed, "
7852 + "falling back\n"));
7853 }
7854 nblocks = dir->i_size >> EXT4_BLOCK_SIZE_BITS(sb);
7855 start = EXT4_I(dir)->i_dir_start_lookup;
7856 @@ -926,7 +933,7 @@ restart:
7857 brelse(bh);
7858 goto next;
7859 }
7860 - i = search_dirblock(bh, dir, dentry,
7861 + i = search_dirblock(bh, dir, d_name,
7862 block << EXT4_BLOCK_SIZE_BITS(sb), res_dir);
7863 if (i == 1) {
7864 EXT4_I(dir)->i_dir_start_lookup = block;
7865 @@ -956,11 +963,11 @@ restart:
7866 cleanup_and_exit:
7867 /* Clean up the read-ahead blocks */
7868 for (; ra_ptr < ra_max; ra_ptr++)
7869 - brelse (bh_use[ra_ptr]);
7870 + brelse(bh_use[ra_ptr]);
7871 return ret;
7872 }
7873
7874 -static struct buffer_head * ext4_dx_find_entry(struct dentry *dentry,
7875 +static struct buffer_head * ext4_dx_find_entry(struct inode *dir, const struct qstr *d_name,
7876 struct ext4_dir_entry_2 **res_dir, int *err)
7877 {
7878 struct super_block * sb;
7879 @@ -971,14 +978,13 @@ static struct buffer_head * ext4_dx_find
7880 struct buffer_head *bh;
7881 ext4_lblk_t block;
7882 int retval;
7883 - int namelen = dentry->d_name.len;
7884 - const u8 *name = dentry->d_name.name;
7885 - struct inode *dir = dentry->d_parent->d_inode;
7886 + int namelen = d_name->len;
7887 + const u8 *name = d_name->name;
7888
7889 sb = dir->i_sb;
7890 /* NFS may look up ".." - look at dx_root directory block */
7891 if (namelen > 2 || name[0] != '.'||(name[1] != '.' && name[1] != '\0')){
7892 - if (!(frame = dx_probe(dentry, NULL, &hinfo, frames, err)))
7893 + if (!(frame = dx_probe(d_name, dir, &hinfo, frames, err)))
7894 return NULL;
7895 } else {
7896 frame = frames;
7897 @@ -1010,7 +1016,7 @@ static struct buffer_head * ext4_dx_find
7898 return bh;
7899 }
7900 }
7901 - brelse (bh);
7902 + brelse(bh);
7903 /* Check to see if we should continue to search */
7904 retval = ext4_htree_next_block(dir, hash, frame,
7905 frames, NULL);
7906 @@ -1025,28 +1031,28 @@ static struct buffer_head * ext4_dx_find
7907
7908 *err = -ENOENT;
7909 errout:
7910 - dxtrace(printk("%s not found\n", name));
7911 + dxtrace(printk(KERN_DEBUG "%s not found\n", name));
7912 dx_release (frames);
7913 return NULL;
7914 }
7915
7916 -static struct dentry *ext4_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd)
7917 +static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
7918 {
7919 - struct inode * inode;
7920 - struct ext4_dir_entry_2 * de;
7921 - struct buffer_head * bh;
7922 + struct inode *inode;
7923 + struct ext4_dir_entry_2 *de;
7924 + struct buffer_head *bh;
7925
7926 if (dentry->d_name.len > EXT4_NAME_LEN)
7927 return ERR_PTR(-ENAMETOOLONG);
7928
7929 - bh = ext4_find_entry(dentry, &de);
7930 + bh = ext4_find_entry(dir, &dentry->d_name, &de);
7931 inode = NULL;
7932 if (bh) {
7933 - unsigned long ino = le32_to_cpu(de->inode);
7934 - brelse (bh);
7935 + __u32 ino = le32_to_cpu(de->inode);
7936 + brelse(bh);
7937 if (!ext4_valid_inum(dir->i_sb, ino)) {
7938 ext4_error(dir->i_sb, "ext4_lookup",
7939 - "bad inode number: %lu", ino);
7940 + "bad inode number: %u", ino);
7941 return ERR_PTR(-EIO);
7942 }
7943 inode = ext4_iget(dir->i_sb, ino);
7944 @@ -1059,18 +1065,17 @@ static struct dentry *ext4_lookup(struct
7945
7946 struct dentry *ext4_get_parent(struct dentry *child)
7947 {
7948 - unsigned long ino;
7949 + __u32 ino;
7950 struct dentry *parent;
7951 struct inode *inode;
7952 - struct dentry dotdot;
7953 + static const struct qstr dotdot = {
7954 + .name = "..",
7955 + .len = 2,
7956 + };
7957 struct ext4_dir_entry_2 * de;
7958 struct buffer_head *bh;
7959
7960 - dotdot.d_name.name = "..";
7961 - dotdot.d_name.len = 2;
7962 - dotdot.d_parent = child; /* confusing, isn't it! */
7963 -
7964 - bh = ext4_find_entry(&dotdot, &de);
7965 + bh = ext4_find_entry(child->d_inode, &dotdot, &de);
7966 inode = NULL;
7967 if (!bh)
7968 return ERR_PTR(-ENOENT);
7969 @@ -1079,7 +1084,7 @@ struct dentry *ext4_get_parent(struct de
7970
7971 if (!ext4_valid_inum(child->d_inode->i_sb, ino)) {
7972 ext4_error(child->d_inode->i_sb, "ext4_get_parent",
7973 - "bad inode number: %lu", ino);
7974 + "bad inode number: %u", ino);
7975 return ERR_PTR(-EIO);
7976 }
7977
7978 @@ -1176,9 +1181,9 @@ static struct ext4_dir_entry_2 *do_split
7979 u32 hash2;
7980 struct dx_map_entry *map;
7981 char *data1 = (*bh)->b_data, *data2;
7982 - unsigned split, move, size, i;
7983 + unsigned split, move, size;
7984 struct ext4_dir_entry_2 *de = NULL, *de2;
7985 - int err = 0;
7986 + int err = 0, i;
7987
7988 bh2 = ext4_append (handle, dir, &newblock, &err);
7989 if (!(bh2)) {
7990 @@ -1201,10 +1206,10 @@ static struct ext4_dir_entry_2 *do_split
7991
7992 /* create map in the end of data2 block */
7993 map = (struct dx_map_entry *) (data2 + blocksize);
7994 - count = dx_make_map ((struct ext4_dir_entry_2 *) data1,
7995 + count = dx_make_map((struct ext4_dir_entry_2 *) data1,
7996 blocksize, hinfo, map);
7997 map -= count;
7998 - dx_sort_map (map, count);
7999 + dx_sort_map(map, count);
8000 /* Split the existing block in the middle, size-wise */
8001 size = 0;
8002 move = 0;
8003 @@ -1225,7 +1230,7 @@ static struct ext4_dir_entry_2 *do_split
8004
8005 /* Fancy dance to stay within two buffers */
8006 de2 = dx_move_dirents(data1, data2, map + split, count - split);
8007 - de = dx_pack_dirents(data1,blocksize);
8008 + de = dx_pack_dirents(data1, blocksize);
8009 de->rec_len = ext4_rec_len_to_disk(data1 + blocksize - (char *) de);
8010 de2->rec_len = ext4_rec_len_to_disk(data2 + blocksize - (char *) de2);
8011 dxtrace(dx_show_leaf (hinfo, (struct ext4_dir_entry_2 *) data1, blocksize, 1));
8012 @@ -1237,15 +1242,15 @@ static struct ext4_dir_entry_2 *do_split
8013 swap(*bh, bh2);
8014 de = de2;
8015 }
8016 - dx_insert_block (frame, hash2 + continued, newblock);
8017 - err = ext4_journal_dirty_metadata (handle, bh2);
8018 + dx_insert_block(frame, hash2 + continued, newblock);
8019 + err = ext4_journal_dirty_metadata(handle, bh2);
8020 if (err)
8021 goto journal_error;
8022 - err = ext4_journal_dirty_metadata (handle, frame->bh);
8023 + err = ext4_journal_dirty_metadata(handle, frame->bh);
8024 if (err)
8025 goto journal_error;
8026 - brelse (bh2);
8027 - dxtrace(dx_show_index ("frame", frame->entries));
8028 + brelse(bh2);
8029 + dxtrace(dx_show_index("frame", frame->entries));
8030 return de;
8031
8032 journal_error:
8033 @@ -1271,12 +1276,12 @@ errout:
8034 */
8035 static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
8036 struct inode *inode, struct ext4_dir_entry_2 *de,
8037 - struct buffer_head * bh)
8038 + struct buffer_head *bh)
8039 {
8040 struct inode *dir = dentry->d_parent->d_inode;
8041 const char *name = dentry->d_name.name;
8042 int namelen = dentry->d_name.len;
8043 - unsigned long offset = 0;
8044 + unsigned int offset = 0;
8045 unsigned short reclen;
8046 int nlen, rlen, err;
8047 char *top;
8048 @@ -1288,11 +1293,11 @@ static int add_dirent_to_buf(handle_t *h
8049 while ((char *) de <= top) {
8050 if (!ext4_check_dir_entry("ext4_add_entry", dir, de,
8051 bh, offset)) {
8052 - brelse (bh);
8053 + brelse(bh);
8054 return -EIO;
8055 }
8056 - if (ext4_match (namelen, name, de)) {
8057 - brelse (bh);
8058 + if (ext4_match(namelen, name, de)) {
8059 + brelse(bh);
8060 return -EEXIST;
8061 }
8062 nlen = EXT4_DIR_REC_LEN(de->name_len);
8063 @@ -1329,7 +1334,7 @@ static int add_dirent_to_buf(handle_t *h
8064 } else
8065 de->inode = 0;
8066 de->name_len = namelen;
8067 - memcpy (de->name, name, namelen);
8068 + memcpy(de->name, name, namelen);
8069 /*
8070 * XXX shouldn't update any times until successful
8071 * completion of syscall, but too many callers depend
8072 @@ -1377,7 +1382,7 @@ static int make_indexed_dir(handle_t *ha
8073 struct fake_dirent *fde;
8074
8075 blocksize = dir->i_sb->s_blocksize;
8076 - dxtrace(printk("Creating index\n"));
8077 + dxtrace(printk(KERN_DEBUG "Creating index\n"));
8078 retval = ext4_journal_get_write_access(handle, bh);
8079 if (retval) {
8080 ext4_std_error(dir->i_sb, retval);
8081 @@ -1386,7 +1391,7 @@ static int make_indexed_dir(handle_t *ha
8082 }
8083 root = (struct dx_root *) bh->b_data;
8084
8085 - bh2 = ext4_append (handle, dir, &block, &retval);
8086 + bh2 = ext4_append(handle, dir, &block, &retval);
8087 if (!(bh2)) {
8088 brelse(bh);
8089 return retval;
8090 @@ -1412,12 +1417,14 @@ static int make_indexed_dir(handle_t *ha
8091 root->info.info_length = sizeof(root->info);
8092 root->info.hash_version = EXT4_SB(dir->i_sb)->s_def_hash_version;
8093 entries = root->entries;
8094 - dx_set_block (entries, 1);
8095 - dx_set_count (entries, 1);
8096 - dx_set_limit (entries, dx_root_limit(dir, sizeof(root->info)));
8097 + dx_set_block(entries, 1);
8098 + dx_set_count(entries, 1);
8099 + dx_set_limit(entries, dx_root_limit(dir, sizeof(root->info)));
8100
8101 /* Initialize as for dx_probe */
8102 hinfo.hash_version = root->info.hash_version;
8103 + if (hinfo.hash_version <= DX_HASH_TEA)
8104 + hinfo.hash_version += EXT4_SB(dir->i_sb)->s_hash_unsigned;
8105 hinfo.seed = EXT4_SB(dir->i_sb)->s_hash_seed;
8106 ext4fs_dirhash(name, namelen, &hinfo);
8107 frame = frames;
8108 @@ -1443,14 +1450,13 @@ static int make_indexed_dir(handle_t *ha
8109 * may not sleep between calling this and putting something into
8110 * the entry, as someone else might have used it while you slept.
8111 */
8112 -static int ext4_add_entry (handle_t *handle, struct dentry *dentry,
8113 - struct inode *inode)
8114 +static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
8115 + struct inode *inode)
8116 {
8117 struct inode *dir = dentry->d_parent->d_inode;
8118 - unsigned long offset;
8119 - struct buffer_head * bh;
8120 + struct buffer_head *bh;
8121 struct ext4_dir_entry_2 *de;
8122 - struct super_block * sb;
8123 + struct super_block *sb;
8124 int retval;
8125 int dx_fallback=0;
8126 unsigned blocksize;
8127 @@ -1469,7 +1475,7 @@ static int ext4_add_entry (handle_t *han
8128 ext4_mark_inode_dirty(handle, dir);
8129 }
8130 blocks = dir->i_size >> sb->s_blocksize_bits;
8131 - for (block = 0, offset = 0; block < blocks; block++) {
8132 + for (block = 0; block < blocks; block++) {
8133 bh = ext4_bread(handle, dir, block, 0, &retval);
8134 if(!bh)
8135 return retval;
8136 @@ -1500,13 +1506,13 @@ static int ext4_dx_add_entry(handle_t *h
8137 struct dx_frame frames[2], *frame;
8138 struct dx_entry *entries, *at;
8139 struct dx_hash_info hinfo;
8140 - struct buffer_head * bh;
8141 + struct buffer_head *bh;
8142 struct inode *dir = dentry->d_parent->d_inode;
8143 - struct super_block * sb = dir->i_sb;
8144 + struct super_block *sb = dir->i_sb;
8145 struct ext4_dir_entry_2 *de;
8146 int err;
8147
8148 - frame = dx_probe(dentry, NULL, &hinfo, frames, &err);
8149 + frame = dx_probe(&dentry->d_name, dir, &hinfo, frames, &err);
8150 if (!frame)
8151 return err;
8152 entries = frame->entries;
8153 @@ -1527,7 +1533,7 @@ static int ext4_dx_add_entry(handle_t *h
8154 }
8155
8156 /* Block full, should compress but for now just split */
8157 - dxtrace(printk("using %u of %u node entries\n",
8158 + dxtrace(printk(KERN_DEBUG "using %u of %u node entries\n",
8159 dx_get_count(entries), dx_get_limit(entries)));
8160 /* Need to split index? */
8161 if (dx_get_count(entries) == dx_get_limit(entries)) {
8162 @@ -1559,7 +1565,8 @@ static int ext4_dx_add_entry(handle_t *h
8163 if (levels) {
8164 unsigned icount1 = icount/2, icount2 = icount - icount1;
8165 unsigned hash2 = dx_get_hash(entries + icount1);
8166 - dxtrace(printk("Split index %i/%i\n", icount1, icount2));
8167 + dxtrace(printk(KERN_DEBUG "Split index %i/%i\n",
8168 + icount1, icount2));
8169
8170 BUFFER_TRACE(frame->bh, "get_write_access"); /* index root */
8171 err = ext4_journal_get_write_access(handle,
8172 @@ -1567,11 +1574,11 @@ static int ext4_dx_add_entry(handle_t *h
8173 if (err)
8174 goto journal_error;
8175
8176 - memcpy ((char *) entries2, (char *) (entries + icount1),
8177 - icount2 * sizeof(struct dx_entry));
8178 - dx_set_count (entries, icount1);
8179 - dx_set_count (entries2, icount2);
8180 - dx_set_limit (entries2, dx_node_limit(dir));
8181 + memcpy((char *) entries2, (char *) (entries + icount1),
8182 + icount2 * sizeof(struct dx_entry));
8183 + dx_set_count(entries, icount1);
8184 + dx_set_count(entries2, icount2);
8185 + dx_set_limit(entries2, dx_node_limit(dir));
8186
8187 /* Which index block gets the new entry? */
8188 if (at - entries >= icount1) {
8189 @@ -1579,16 +1586,17 @@ static int ext4_dx_add_entry(handle_t *h
8190 frame->entries = entries = entries2;
8191 swap(frame->bh, bh2);
8192 }
8193 - dx_insert_block (frames + 0, hash2, newblock);
8194 - dxtrace(dx_show_index ("node", frames[1].entries));
8195 - dxtrace(dx_show_index ("node",
8196 + dx_insert_block(frames + 0, hash2, newblock);
8197 + dxtrace(dx_show_index("node", frames[1].entries));
8198 + dxtrace(dx_show_index("node",
8199 ((struct dx_node *) bh2->b_data)->entries));
8200 err = ext4_journal_dirty_metadata(handle, bh2);
8201 if (err)
8202 goto journal_error;
8203 brelse (bh2);
8204 } else {
8205 - dxtrace(printk("Creating second level index...\n"));
8206 + dxtrace(printk(KERN_DEBUG
8207 + "Creating second level index...\n"));
8208 memcpy((char *) entries2, (char *) entries,
8209 icount * sizeof(struct dx_entry));
8210 dx_set_limit(entries2, dx_node_limit(dir));
8211 @@ -1630,12 +1638,12 @@ cleanup:
8212 * ext4_delete_entry deletes a directory entry by merging it with the
8213 * previous entry
8214 */
8215 -static int ext4_delete_entry (handle_t *handle,
8216 - struct inode * dir,
8217 - struct ext4_dir_entry_2 * de_del,
8218 - struct buffer_head * bh)
8219 +static int ext4_delete_entry(handle_t *handle,
8220 + struct inode *dir,
8221 + struct ext4_dir_entry_2 *de_del,
8222 + struct buffer_head *bh)
8223 {
8224 - struct ext4_dir_entry_2 * de, * pde;
8225 + struct ext4_dir_entry_2 *de, *pde;
8226 int i;
8227
8228 i = 0;
8229 @@ -1716,11 +1724,11 @@ static int ext4_add_nondir(handle_t *han
8230 * If the create succeeds, we fill in the inode information
8231 * with d_instantiate().
8232 */
8233 -static int ext4_create (struct inode * dir, struct dentry * dentry, int mode,
8234 - struct nameidata *nd)
8235 +static int ext4_create(struct inode *dir, struct dentry *dentry, int mode,
8236 + struct nameidata *nd)
8237 {
8238 handle_t *handle;
8239 - struct inode * inode;
8240 + struct inode *inode;
8241 int err, retries = 0;
8242
8243 retry:
8244 @@ -1747,8 +1755,8 @@ retry:
8245 return err;
8246 }
8247
8248 -static int ext4_mknod (struct inode * dir, struct dentry *dentry,
8249 - int mode, dev_t rdev)
8250 +static int ext4_mknod(struct inode *dir, struct dentry *dentry,
8251 + int mode, dev_t rdev)
8252 {
8253 handle_t *handle;
8254 struct inode *inode;
8255 @@ -1767,11 +1775,11 @@ retry:
8256 if (IS_DIRSYNC(dir))
8257 handle->h_sync = 1;
8258
8259 - inode = ext4_new_inode (handle, dir, mode);
8260 + inode = ext4_new_inode(handle, dir, mode);
8261 err = PTR_ERR(inode);
8262 if (!IS_ERR(inode)) {
8263 init_special_inode(inode, inode->i_mode, rdev);
8264 -#ifdef CONFIG_EXT4DEV_FS_XATTR
8265 +#if defined(CONFIG_EXT4_FS_XATTR) || defined(CONFIG_EXT4DEV_FS_XATTR)
8266 inode->i_op = &ext4_special_inode_operations;
8267 #endif
8268 err = ext4_add_nondir(handle, dentry, inode);
8269 @@ -1782,12 +1790,12 @@ retry:
8270 return err;
8271 }
8272
8273 -static int ext4_mkdir(struct inode * dir, struct dentry * dentry, int mode)
8274 +static int ext4_mkdir(struct inode *dir, struct dentry *dentry, int mode)
8275 {
8276 handle_t *handle;
8277 - struct inode * inode;
8278 - struct buffer_head * dir_block;
8279 - struct ext4_dir_entry_2 * de;
8280 + struct inode *inode;
8281 + struct buffer_head *dir_block;
8282 + struct ext4_dir_entry_2 *de;
8283 int err, retries = 0;
8284
8285 if (EXT4_DIR_LINK_MAX(dir))
8286 @@ -1803,7 +1811,7 @@ retry:
8287 if (IS_DIRSYNC(dir))
8288 handle->h_sync = 1;
8289
8290 - inode = ext4_new_inode (handle, dir, S_IFDIR | mode);
8291 + inode = ext4_new_inode(handle, dir, S_IFDIR | mode);
8292 err = PTR_ERR(inode);
8293 if (IS_ERR(inode))
8294 goto out_stop;
8295 @@ -1811,7 +1819,7 @@ retry:
8296 inode->i_op = &ext4_dir_inode_operations;
8297 inode->i_fop = &ext4_dir_operations;
8298 inode->i_size = EXT4_I(inode)->i_disksize = inode->i_sb->s_blocksize;
8299 - dir_block = ext4_bread (handle, inode, 0, 1, &err);
8300 + dir_block = ext4_bread(handle, inode, 0, 1, &err);
8301 if (!dir_block)
8302 goto out_clear_inode;
8303 BUFFER_TRACE(dir_block, "get_write_access");
8304 @@ -1820,26 +1828,26 @@ retry:
8305 de->inode = cpu_to_le32(inode->i_ino);
8306 de->name_len = 1;
8307 de->rec_len = ext4_rec_len_to_disk(EXT4_DIR_REC_LEN(de->name_len));
8308 - strcpy (de->name, ".");
8309 + strcpy(de->name, ".");
8310 ext4_set_de_type(dir->i_sb, de, S_IFDIR);
8311 de = ext4_next_entry(de);
8312 de->inode = cpu_to_le32(dir->i_ino);
8313 de->rec_len = ext4_rec_len_to_disk(inode->i_sb->s_blocksize -
8314 EXT4_DIR_REC_LEN(1));
8315 de->name_len = 2;
8316 - strcpy (de->name, "..");
8317 + strcpy(de->name, "..");
8318 ext4_set_de_type(dir->i_sb, de, S_IFDIR);
8319 inode->i_nlink = 2;
8320 BUFFER_TRACE(dir_block, "call ext4_journal_dirty_metadata");
8321 ext4_journal_dirty_metadata(handle, dir_block);
8322 - brelse (dir_block);
8323 + brelse(dir_block);
8324 ext4_mark_inode_dirty(handle, inode);
8325 - err = ext4_add_entry (handle, dentry, inode);
8326 + err = ext4_add_entry(handle, dentry, inode);
8327 if (err) {
8328 out_clear_inode:
8329 clear_nlink(inode);
8330 ext4_mark_inode_dirty(handle, inode);
8331 - iput (inode);
8332 + iput(inode);
8333 goto out_stop;
8334 }
8335 ext4_inc_count(handle, dir);
8336 @@ -1856,17 +1864,17 @@ out_stop:
8337 /*
8338 * routine to check that the specified directory is empty (for rmdir)
8339 */
8340 -static int empty_dir (struct inode * inode)
8341 +static int empty_dir(struct inode *inode)
8342 {
8343 - unsigned long offset;
8344 - struct buffer_head * bh;
8345 - struct ext4_dir_entry_2 * de, * de1;
8346 - struct super_block * sb;
8347 + unsigned int offset;
8348 + struct buffer_head *bh;
8349 + struct ext4_dir_entry_2 *de, *de1;
8350 + struct super_block *sb;
8351 int err = 0;
8352
8353 sb = inode->i_sb;
8354 if (inode->i_size < EXT4_DIR_REC_LEN(1) + EXT4_DIR_REC_LEN(2) ||
8355 - !(bh = ext4_bread (NULL, inode, 0, 0, &err))) {
8356 + !(bh = ext4_bread(NULL, inode, 0, 0, &err))) {
8357 if (err)
8358 ext4_error(inode->i_sb, __func__,
8359 "error %d reading directory #%lu offset 0",
8360 @@ -1881,29 +1889,29 @@ static int empty_dir (struct inode * ino
8361 de1 = ext4_next_entry(de);
8362 if (le32_to_cpu(de->inode) != inode->i_ino ||
8363 !le32_to_cpu(de1->inode) ||
8364 - strcmp (".", de->name) ||
8365 - strcmp ("..", de1->name)) {
8366 - ext4_warning (inode->i_sb, "empty_dir",
8367 - "bad directory (dir #%lu) - no `.' or `..'",
8368 - inode->i_ino);
8369 - brelse (bh);
8370 + strcmp(".", de->name) ||
8371 + strcmp("..", de1->name)) {
8372 + ext4_warning(inode->i_sb, "empty_dir",
8373 + "bad directory (dir #%lu) - no `.' or `..'",
8374 + inode->i_ino);
8375 + brelse(bh);
8376 return 1;
8377 }
8378 offset = ext4_rec_len_from_disk(de->rec_len) +
8379 ext4_rec_len_from_disk(de1->rec_len);
8380 de = ext4_next_entry(de1);
8381 - while (offset < inode->i_size ) {
8382 + while (offset < inode->i_size) {
8383 if (!bh ||
8384 (void *) de >= (void *) (bh->b_data+sb->s_blocksize)) {
8385 err = 0;
8386 - brelse (bh);
8387 - bh = ext4_bread (NULL, inode,
8388 + brelse(bh);
8389 + bh = ext4_bread(NULL, inode,
8390 offset >> EXT4_BLOCK_SIZE_BITS(sb), 0, &err);
8391 if (!bh) {
8392 if (err)
8393 ext4_error(sb, __func__,
8394 "error %d reading directory"
8395 - " #%lu offset %lu",
8396 + " #%lu offset %u",
8397 err, inode->i_ino, offset);
8398 offset += sb->s_blocksize;
8399 continue;
8400 @@ -1917,13 +1925,13 @@ static int empty_dir (struct inode * ino
8401 continue;
8402 }
8403 if (le32_to_cpu(de->inode)) {
8404 - brelse (bh);
8405 + brelse(bh);
8406 return 0;
8407 }
8408 offset += ext4_rec_len_from_disk(de->rec_len);
8409 de = ext4_next_entry(de);
8410 }
8411 - brelse (bh);
8412 + brelse(bh);
8413 return 1;
8414 }
8415
8416 @@ -1954,8 +1962,8 @@ int ext4_orphan_add(handle_t *handle, st
8417 * ->i_nlink. For, say it, character device. Not a regular file,
8418 * not a directory, not a symlink and ->i_nlink > 0.
8419 */
8420 - J_ASSERT ((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
8421 - S_ISLNK(inode->i_mode)) || inode->i_nlink == 0);
8422 + J_ASSERT((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
8423 + S_ISLNK(inode->i_mode)) || inode->i_nlink == 0);
8424
8425 BUFFER_TRACE(EXT4_SB(sb)->s_sbh, "get_write_access");
8426 err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh);
8427 @@ -2003,7 +2011,7 @@ int ext4_orphan_del(handle_t *handle, st
8428 struct list_head *prev;
8429 struct ext4_inode_info *ei = EXT4_I(inode);
8430 struct ext4_sb_info *sbi;
8431 - unsigned long ino_next;
8432 + __u32 ino_next;
8433 struct ext4_iloc iloc;
8434 int err = 0;
8435
8436 @@ -2033,7 +2041,7 @@ int ext4_orphan_del(handle_t *handle, st
8437 goto out_err;
8438
8439 if (prev == &sbi->s_orphan) {
8440 - jbd_debug(4, "superblock will point to %lu\n", ino_next);
8441 + jbd_debug(4, "superblock will point to %u\n", ino_next);
8442 BUFFER_TRACE(sbi->s_sbh, "get_write_access");
8443 err = ext4_journal_get_write_access(handle, sbi->s_sbh);
8444 if (err)
8445 @@ -2045,7 +2053,7 @@ int ext4_orphan_del(handle_t *handle, st
8446 struct inode *i_prev =
8447 &list_entry(prev, struct ext4_inode_info, i_orphan)->vfs_inode;
8448
8449 - jbd_debug(4, "orphan inode %lu will point to %lu\n",
8450 + jbd_debug(4, "orphan inode %lu will point to %u\n",
8451 i_prev->i_ino, ino_next);
8452 err = ext4_reserve_inode_write(handle, i_prev, &iloc2);
8453 if (err)
8454 @@ -2069,12 +2077,12 @@ out_brelse:
8455 goto out_err;
8456 }
8457
8458 -static int ext4_rmdir (struct inode * dir, struct dentry *dentry)
8459 +static int ext4_rmdir(struct inode *dir, struct dentry *dentry)
8460 {
8461 int retval;
8462 - struct inode * inode;
8463 - struct buffer_head * bh;
8464 - struct ext4_dir_entry_2 * de;
8465 + struct inode *inode;
8466 + struct buffer_head *bh;
8467 + struct ext4_dir_entry_2 *de;
8468 handle_t *handle;
8469
8470 /* Initialize quotas before so that eventual writes go in
8471 @@ -2085,7 +2093,7 @@ static int ext4_rmdir (struct inode * di
8472 return PTR_ERR(handle);
8473
8474 retval = -ENOENT;
8475 - bh = ext4_find_entry (dentry, &de);
8476 + bh = ext4_find_entry(dir, &dentry->d_name, &de);
8477 if (!bh)
8478 goto end_rmdir;
8479
8480 @@ -2099,16 +2107,16 @@ static int ext4_rmdir (struct inode * di
8481 goto end_rmdir;
8482
8483 retval = -ENOTEMPTY;
8484 - if (!empty_dir (inode))
8485 + if (!empty_dir(inode))
8486 goto end_rmdir;
8487
8488 retval = ext4_delete_entry(handle, dir, de, bh);
8489 if (retval)
8490 goto end_rmdir;
8491 if (!EXT4_DIR_LINK_EMPTY(inode))
8492 - ext4_warning (inode->i_sb, "ext4_rmdir",
8493 - "empty directory has too many links (%d)",
8494 - inode->i_nlink);
8495 + ext4_warning(inode->i_sb, "ext4_rmdir",
8496 + "empty directory has too many links (%d)",
8497 + inode->i_nlink);
8498 inode->i_version++;
8499 clear_nlink(inode);
8500 /* There's no need to set i_disksize: the fact that i_nlink is
8501 @@ -2124,16 +2132,16 @@ static int ext4_rmdir (struct inode * di
8502
8503 end_rmdir:
8504 ext4_journal_stop(handle);
8505 - brelse (bh);
8506 + brelse(bh);
8507 return retval;
8508 }
8509
8510 -static int ext4_unlink(struct inode * dir, struct dentry *dentry)
8511 +static int ext4_unlink(struct inode *dir, struct dentry *dentry)
8512 {
8513 int retval;
8514 - struct inode * inode;
8515 - struct buffer_head * bh;
8516 - struct ext4_dir_entry_2 * de;
8517 + struct inode *inode;
8518 + struct buffer_head *bh;
8519 + struct ext4_dir_entry_2 *de;
8520 handle_t *handle;
8521
8522 /* Initialize quotas before so that eventual writes go
8523 @@ -2147,7 +2155,7 @@ static int ext4_unlink(struct inode * di
8524 handle->h_sync = 1;
8525
8526 retval = -ENOENT;
8527 - bh = ext4_find_entry (dentry, &de);
8528 + bh = ext4_find_entry(dir, &dentry->d_name, &de);
8529 if (!bh)
8530 goto end_unlink;
8531
8532 @@ -2158,9 +2166,9 @@ static int ext4_unlink(struct inode * di
8533 goto end_unlink;
8534
8535 if (!inode->i_nlink) {
8536 - ext4_warning (inode->i_sb, "ext4_unlink",
8537 - "Deleting nonexistent file (%lu), %d",
8538 - inode->i_ino, inode->i_nlink);
8539 + ext4_warning(inode->i_sb, "ext4_unlink",
8540 + "Deleting nonexistent file (%lu), %d",
8541 + inode->i_ino, inode->i_nlink);
8542 inode->i_nlink = 1;
8543 }
8544 retval = ext4_delete_entry(handle, dir, de, bh);
8545 @@ -2178,15 +2186,15 @@ static int ext4_unlink(struct inode * di
8546
8547 end_unlink:
8548 ext4_journal_stop(handle);
8549 - brelse (bh);
8550 + brelse(bh);
8551 return retval;
8552 }
8553
8554 -static int ext4_symlink (struct inode * dir,
8555 - struct dentry *dentry, const char * symname)
8556 +static int ext4_symlink(struct inode *dir,
8557 + struct dentry *dentry, const char *symname)
8558 {
8559 handle_t *handle;
8560 - struct inode * inode;
8561 + struct inode *inode;
8562 int l, err, retries = 0;
8563
8564 l = strlen(symname)+1;
8565 @@ -2203,12 +2211,12 @@ retry:
8566 if (IS_DIRSYNC(dir))
8567 handle->h_sync = 1;
8568
8569 - inode = ext4_new_inode (handle, dir, S_IFLNK|S_IRWXUGO);
8570 + inode = ext4_new_inode(handle, dir, S_IFLNK|S_IRWXUGO);
8571 err = PTR_ERR(inode);
8572 if (IS_ERR(inode))
8573 goto out_stop;
8574
8575 - if (l > sizeof (EXT4_I(inode)->i_data)) {
8576 + if (l > sizeof(EXT4_I(inode)->i_data)) {
8577 inode->i_op = &ext4_symlink_inode_operations;
8578 ext4_set_aops(inode);
8579 /*
8580 @@ -2216,18 +2224,19 @@ retry:
8581 * We have a transaction open. All is sweetness. It also sets
8582 * i_size in generic_commit_write().
8583 */
8584 - err = __page_symlink(inode, symname, l, 1);
8585 + err = __page_symlink(inode, symname, l,
8586 + mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS);
8587 if (err) {
8588 clear_nlink(inode);
8589 ext4_mark_inode_dirty(handle, inode);
8590 - iput (inode);
8591 + iput(inode);
8592 goto out_stop;
8593 }
8594 } else {
8595 /* clear the extent format for fast symlink */
8596 EXT4_I(inode)->i_flags &= ~EXT4_EXTENTS_FL;
8597 inode->i_op = &ext4_fast_symlink_inode_operations;
8598 - memcpy((char*)&EXT4_I(inode)->i_data,symname,l);
8599 + memcpy((char *)&EXT4_I(inode)->i_data, symname, l);
8600 inode->i_size = l-1;
8601 }
8602 EXT4_I(inode)->i_disksize = inode->i_size;
8603 @@ -2239,8 +2248,8 @@ out_stop:
8604 return err;
8605 }
8606
8607 -static int ext4_link (struct dentry * old_dentry,
8608 - struct inode * dir, struct dentry *dentry)
8609 +static int ext4_link(struct dentry *old_dentry,
8610 + struct inode *dir, struct dentry *dentry)
8611 {
8612 handle_t *handle;
8613 struct inode *inode = old_dentry->d_inode;
8614 @@ -2283,13 +2292,13 @@ retry:
8615 * Anybody can rename anything with this: the permission checks are left to the
8616 * higher-level routines.
8617 */
8618 -static int ext4_rename (struct inode * old_dir, struct dentry *old_dentry,
8619 - struct inode * new_dir,struct dentry *new_dentry)
8620 +static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
8621 + struct inode *new_dir, struct dentry *new_dentry)
8622 {
8623 handle_t *handle;
8624 - struct inode * old_inode, * new_inode;
8625 - struct buffer_head * old_bh, * new_bh, * dir_bh;
8626 - struct ext4_dir_entry_2 * old_de, * new_de;
8627 + struct inode *old_inode, *new_inode;
8628 + struct buffer_head *old_bh, *new_bh, *dir_bh;
8629 + struct ext4_dir_entry_2 *old_de, *new_de;
8630 int retval;
8631
8632 old_bh = new_bh = dir_bh = NULL;
8633 @@ -2307,7 +2316,7 @@ static int ext4_rename (struct inode * o
8634 if (IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir))
8635 handle->h_sync = 1;
8636
8637 - old_bh = ext4_find_entry (old_dentry, &old_de);
8638 + old_bh = ext4_find_entry(old_dir, &old_dentry->d_name, &old_de);
8639 /*
8640 * Check for inode number is _not_ due to possible IO errors.
8641 * We might rmdir the source, keep it as pwd of some process
8642 @@ -2320,32 +2329,32 @@ static int ext4_rename (struct inode * o
8643 goto end_rename;
8644
8645 new_inode = new_dentry->d_inode;
8646 - new_bh = ext4_find_entry (new_dentry, &new_de);
8647 + new_bh = ext4_find_entry(new_dir, &new_dentry->d_name, &new_de);
8648 if (new_bh) {
8649 if (!new_inode) {
8650 - brelse (new_bh);
8651 + brelse(new_bh);
8652 new_bh = NULL;
8653 }
8654 }
8655 if (S_ISDIR(old_inode->i_mode)) {
8656 if (new_inode) {
8657 retval = -ENOTEMPTY;
8658 - if (!empty_dir (new_inode))
8659 + if (!empty_dir(new_inode))
8660 goto end_rename;
8661 }
8662 retval = -EIO;
8663 - dir_bh = ext4_bread (handle, old_inode, 0, 0, &retval);
8664 + dir_bh = ext4_bread(handle, old_inode, 0, 0, &retval);
8665 if (!dir_bh)
8666 goto end_rename;
8667 if (le32_to_cpu(PARENT_INO(dir_bh->b_data)) != old_dir->i_ino)
8668 goto end_rename;
8669 retval = -EMLINK;
8670 - if (!new_inode && new_dir!=old_dir &&
8671 + if (!new_inode && new_dir != old_dir &&
8672 new_dir->i_nlink >= EXT4_LINK_MAX)
8673 goto end_rename;
8674 }
8675 if (!new_bh) {
8676 - retval = ext4_add_entry (handle, new_dentry, old_inode);
8677 + retval = ext4_add_entry(handle, new_dentry, old_inode);
8678 if (retval)
8679 goto end_rename;
8680 } else {
8681 @@ -2387,7 +2396,7 @@ static int ext4_rename (struct inode * o
8682 struct buffer_head *old_bh2;
8683 struct ext4_dir_entry_2 *old_de2;
8684
8685 - old_bh2 = ext4_find_entry(old_dentry, &old_de2);
8686 + old_bh2 = ext4_find_entry(old_dir, &old_dentry->d_name, &old_de2);
8687 if (old_bh2) {
8688 retval = ext4_delete_entry(handle, old_dir,
8689 old_de2, old_bh2);
8690 @@ -2432,9 +2441,9 @@ static int ext4_rename (struct inode * o
8691 retval = 0;
8692
8693 end_rename:
8694 - brelse (dir_bh);
8695 - brelse (old_bh);
8696 - brelse (new_bh);
8697 + brelse(dir_bh);
8698 + brelse(old_bh);
8699 + brelse(new_bh);
8700 ext4_journal_stop(handle);
8701 return retval;
8702 }
8703 @@ -2453,7 +2462,7 @@ const struct inode_operations ext4_dir_i
8704 .mknod = ext4_mknod,
8705 .rename = ext4_rename,
8706 .setattr = ext4_setattr,
8707 -#ifdef CONFIG_EXT4DEV_FS_XATTR
8708 +#if defined(CONFIG_EXT4_FS_XATTR) || defined(CONFIG_EXT4DEV_FS_XATTR)
8709 .setxattr = generic_setxattr,
8710 .getxattr = generic_getxattr,
8711 .listxattr = ext4_listxattr,
8712 @@ -2464,7 +2473,7 @@ const struct inode_operations ext4_dir_i
8713
8714 const struct inode_operations ext4_special_inode_operations = {
8715 .setattr = ext4_setattr,
8716 -#ifdef CONFIG_EXT4DEV_FS_XATTR
8717 +#if defined(CONFIG_EXT4_FS_XATTR) || defined(CONFIG_EXT4DEV_FS_XATTR)
8718 .setxattr = generic_setxattr,
8719 .getxattr = generic_getxattr,
8720 .listxattr = ext4_listxattr,
8721 diff -rup b/fs/ext4//resize.c a/fs/ext4///resize.c
8722 --- b/fs/ext4/resize.c 2009-02-11 14:37:58.000000000 +0100
8723 +++ a/fs/ext4/resize.c 2009-02-10 21:40:11.000000000 +0100
8724 @@ -50,7 +50,7 @@ static int verify_group_input(struct sup
8725 ext4_get_group_no_and_offset(sb, start, NULL, &offset);
8726 if (group != sbi->s_groups_count)
8727 ext4_warning(sb, __func__,
8728 - "Cannot add at group %u (only %lu groups)",
8729 + "Cannot add at group %u (only %u groups)",
8730 input->group, sbi->s_groups_count);
8731 else if (offset != 0)
8732 ext4_warning(sb, __func__, "Last group not full");
8733 @@ -284,11 +284,9 @@ static int setup_new_group_blocks(struct
8734 if ((err = extend_or_restart_transaction(handle, 2, bh)))
8735 goto exit_bh;
8736
8737 - mark_bitmap_end(input->blocks_count, EXT4_BLOCKS_PER_GROUP(sb),
8738 - bh->b_data);
8739 + mark_bitmap_end(input->blocks_count, sb->s_blocksize * 8, bh->b_data);
8740 ext4_journal_dirty_metadata(handle, bh);
8741 brelse(bh);
8742 -
8743 /* Mark unused entries in inode bitmap used */
8744 ext4_debug("clear inode bitmap %#04llx (+%llu)\n",
8745 input->inode_bitmap, input->inode_bitmap - start);
8746 @@ -297,7 +295,7 @@ static int setup_new_group_blocks(struct
8747 goto exit_journal;
8748 }
8749
8750 - mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), EXT4_BLOCKS_PER_GROUP(sb),
8751 + mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), sb->s_blocksize * 8,
8752 bh->b_data);
8753 ext4_journal_dirty_metadata(handle, bh);
8754 exit_bh:
8755 @@ -416,8 +414,8 @@ static int add_new_gdb(handle_t *handle,
8756 "EXT4-fs: ext4_add_new_gdb: adding group block %lu\n",
8757 gdb_num);
8758
8759 - /*
8760 - * If we are not using the primary superblock/GDT copy don't resize,
8761 + /*
8762 + * If we are not using the primary superblock/GDT copy don't resize,
8763 * because the user tools have no way of handling this. Probably a
8764 * bad time to do it anyways.
8765 */
8766 @@ -715,7 +713,7 @@ static void update_backups(struct super_
8767 exit_err:
8768 if (err) {
8769 ext4_warning(sb, __func__,
8770 - "can't update backup for group %lu (err %d), "
8771 + "can't update backup for group %u (err %d), "
8772 "forcing fsck on next reboot", group, err);
8773 sbi->s_mount_state &= ~EXT4_VALID_FS;
8774 sbi->s_es->s_state &= cpu_to_le16(~EXT4_VALID_FS);
8775 @@ -747,6 +745,7 @@ int ext4_group_add(struct super_block *s
8776 struct inode *inode = NULL;
8777 handle_t *handle;
8778 int gdb_off, gdb_num;
8779 + int num_grp_locked = 0;
8780 int err, err2;
8781
8782 gdb_num = input->group / EXT4_DESC_PER_BLOCK(sb);
8783 @@ -787,6 +786,7 @@ int ext4_group_add(struct super_block *s
8784 }
8785 }
8786
8787 +
8788 if ((err = verify_group_input(sb, input)))
8789 goto exit_put;
8790
8791 @@ -855,6 +855,7 @@ int ext4_group_add(struct super_block *s
8792 * using the new disk blocks.
8793 */
8794
8795 + num_grp_locked = ext4_mb_get_buddy_cache_lock(sb, input->group);
8796 /* Update group descriptor block for new group */
8797 gdp = (struct ext4_group_desc *)((char *)primary->b_data +
8798 gdb_off * EXT4_DESC_SIZE(sb));
8799 @@ -862,19 +863,21 @@ int ext4_group_add(struct super_block *s
8800 ext4_block_bitmap_set(sb, gdp, input->block_bitmap); /* LV FIXME */
8801 ext4_inode_bitmap_set(sb, gdp, input->inode_bitmap); /* LV FIXME */
8802 ext4_inode_table_set(sb, gdp, input->inode_table); /* LV FIXME */
8803 - gdp->bg_free_blocks_count = cpu_to_le16(input->free_blocks_count);
8804 - gdp->bg_free_inodes_count = cpu_to_le16(EXT4_INODES_PER_GROUP(sb));
8805 + ext4_free_blks_set(sb, gdp, input->free_blocks_count);
8806 + ext4_free_inodes_set(sb, gdp, EXT4_INODES_PER_GROUP(sb));
8807 + gdp->bg_flags |= cpu_to_le16(EXT4_BG_INODE_ZEROED);
8808 gdp->bg_checksum = ext4_group_desc_csum(sbi, input->group, gdp);
8809
8810 /*
8811 * We can allocate memory for mb_alloc based on the new group
8812 * descriptor
8813 */
8814 - if (test_opt(sb, MBALLOC)) {
8815 - err = ext4_mb_add_more_groupinfo(sb, input->group, gdp);
8816 - if (err)
8817 - goto exit_journal;
8818 + err = ext4_mb_add_groupinfo(sb, input->group, gdp);
8819 + if (err) {
8820 + ext4_mb_put_buddy_cache_lock(sb, input->group, num_grp_locked);
8821 + goto exit_journal;
8822 }
8823 +
8824 /*
8825 * Make the new blocks and inodes valid next. We do this before
8826 * increasing the group count so that once the group is enabled,
8827 @@ -915,6 +918,7 @@ int ext4_group_add(struct super_block *s
8828
8829 /* Update the global fs size fields */
8830 sbi->s_groups_count++;
8831 + ext4_mb_put_buddy_cache_lock(sb, input->group, num_grp_locked);
8832
8833 ext4_journal_dirty_metadata(handle, primary);
8834
8835 @@ -973,12 +977,10 @@ int ext4_group_extend(struct super_block
8836 ext4_group_t o_groups_count;
8837 ext4_grpblk_t last;
8838 ext4_grpblk_t add;
8839 - struct buffer_head * bh;
8840 + struct buffer_head *bh;
8841 handle_t *handle;
8842 int err;
8843 - unsigned long freed_blocks;
8844 ext4_group_t group;
8845 - struct ext4_group_info *grp;
8846
8847 /* We don't need to worry about locking wrt other resizers just
8848 * yet: we're going to revalidate es->s_blocks_count after
8849 @@ -1077,50 +1079,13 @@ int ext4_group_extend(struct super_block
8850 unlock_super(sb);
8851 ext4_debug("freeing blocks %llu through %llu\n", o_blocks_count,
8852 o_blocks_count + add);
8853 - ext4_free_blocks_sb(handle, sb, o_blocks_count, add, &freed_blocks);
8854 + /* We add the blocks to the bitmap and set the group need init bit */
8855 + ext4_add_groupblocks(handle, sb, o_blocks_count, add);
8856 ext4_debug("freed blocks %llu through %llu\n", o_blocks_count,
8857 o_blocks_count + add);
8858 if ((err = ext4_journal_stop(handle)))
8859 goto exit_put;
8860
8861 - /*
8862 - * Mark mballoc pages as not up to date so that they will be updated
8863 - * next time they are loaded by ext4_mb_load_buddy.
8864 - */
8865 - if (test_opt(sb, MBALLOC)) {
8866 - struct ext4_sb_info *sbi = EXT4_SB(sb);
8867 - struct inode *inode = sbi->s_buddy_cache;
8868 - int blocks_per_page;
8869 - int block;
8870 - int pnum;
8871 - struct page *page;
8872 -
8873 - /* Set buddy page as not up to date */
8874 - blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
8875 - block = group * 2;
8876 - pnum = block / blocks_per_page;
8877 - page = find_get_page(inode->i_mapping, pnum);
8878 - if (page != NULL) {
8879 - ClearPageUptodate(page);
8880 - page_cache_release(page);
8881 - }
8882 -
8883 - /* Set bitmap page as not up to date */
8884 - block++;
8885 - pnum = block / blocks_per_page;
8886 - page = find_get_page(inode->i_mapping, pnum);
8887 - if (page != NULL) {
8888 - ClearPageUptodate(page);
8889 - page_cache_release(page);
8890 - }
8891 -
8892 - /* Get the info on the last group */
8893 - grp = ext4_get_group_info(sb, group);
8894 -
8895 - /* Update free blocks in group info */
8896 - ext4_mb_update_group_info(grp, add);
8897 - }
8898 -
8899 if (test_opt(sb, DEBUG))
8900 printk(KERN_DEBUG "EXT4-fs: extended group to %llu blocks\n",
8901 ext4_blocks_count(es));
8902 diff -rup b/fs/ext4//super.c a/fs/ext4///super.c
8903 --- b/fs/ext4/super.c 2009-02-11 14:37:58.000000000 +0100
8904 +++ a/fs/ext4/super.c 2009-02-11 13:47:04.000000000 +0100
8905 @@ -34,6 +34,8 @@
8906 #include <linux/namei.h>
8907 #include <linux/quotaops.h>
8908 #include <linux/seq_file.h>
8909 +#include <linux/proc_fs.h>
8910 +#include <linux/marker.h>
8911 #include <linux/log2.h>
8912 #include <linux/crc16.h>
8913 #include <asm/uaccess.h>
8914 @@ -45,6 +47,8 @@
8915 #include "namei.h"
8916 #include "group.h"
8917
8918 +struct proc_dir_entry *ext4_proc_root;
8919 +
8920 static int ext4_load_journal(struct super_block *, struct ext4_super_block *,
8921 unsigned long journal_devnum);
8922 static int ext4_create_journal(struct super_block *, struct ext4_super_block *,
8923 @@ -89,6 +93,38 @@ ext4_fsblk_t ext4_inode_table(struct sup
8924 (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_table_hi) << 32 : 0);
8925 }
8926
8927 +__u32 ext4_free_blks_count(struct super_block *sb,
8928 + struct ext4_group_desc *bg)
8929 +{
8930 + return le16_to_cpu(bg->bg_free_blocks_count_lo) |
8931 + (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
8932 + (__u32)le16_to_cpu(bg->bg_free_blocks_count_hi) << 16 : 0);
8933 +}
8934 +
8935 +__u32 ext4_free_inodes_count(struct super_block *sb,
8936 + struct ext4_group_desc *bg)
8937 +{
8938 + return le16_to_cpu(bg->bg_free_inodes_count_lo) |
8939 + (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
8940 + (__u32)le16_to_cpu(bg->bg_free_inodes_count_hi) << 16 : 0);
8941 +}
8942 +
8943 +__u32 ext4_used_dirs_count(struct super_block *sb,
8944 + struct ext4_group_desc *bg)
8945 +{
8946 + return le16_to_cpu(bg->bg_used_dirs_count_lo) |
8947 + (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
8948 + (__u32)le16_to_cpu(bg->bg_used_dirs_count_hi) << 16 : 0);
8949 +}
8950 +
8951 +__u32 ext4_itable_unused_count(struct super_block *sb,
8952 + struct ext4_group_desc *bg)
8953 +{
8954 + return le16_to_cpu(bg->bg_itable_unused_lo) |
8955 + (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
8956 + (__u32)le16_to_cpu(bg->bg_itable_unused_hi) << 16 : 0);
8957 +}
8958 +
8959 void ext4_block_bitmap_set(struct super_block *sb,
8960 struct ext4_group_desc *bg, ext4_fsblk_t blk)
8961 {
8962 @@ -113,6 +149,38 @@ void ext4_inode_table_set(struct super_b
8963 bg->bg_inode_table_hi = cpu_to_le32(blk >> 32);
8964 }
8965
8966 +void ext4_free_blks_set(struct super_block *sb,
8967 + struct ext4_group_desc *bg, __u32 count)
8968 +{
8969 + bg->bg_free_blocks_count_lo = cpu_to_le16((__u16)count);
8970 + if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
8971 + bg->bg_free_blocks_count_hi = cpu_to_le16(count >> 16);
8972 +}
8973 +
8974 +void ext4_free_inodes_set(struct super_block *sb,
8975 + struct ext4_group_desc *bg, __u32 count)
8976 +{
8977 + bg->bg_free_inodes_count_lo = cpu_to_le16((__u16)count);
8978 + if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
8979 + bg->bg_free_inodes_count_hi = cpu_to_le16(count >> 16);
8980 +}
8981 +
8982 +void ext4_used_dirs_set(struct super_block *sb,
8983 + struct ext4_group_desc *bg, __u32 count)
8984 +{
8985 + bg->bg_used_dirs_count_lo = cpu_to_le16((__u16)count);
8986 + if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
8987 + bg->bg_used_dirs_count_hi = cpu_to_le16(count >> 16);
8988 +}
8989 +
8990 +void ext4_itable_unused_set(struct super_block *sb,
8991 + struct ext4_group_desc *bg, __u32 count)
8992 +{
8993 + bg->bg_itable_unused_lo = cpu_to_le16((__u16)count);
8994 + if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
8995 + bg->bg_itable_unused_hi = cpu_to_le16(count >> 16);
8996 +}
8997 +
8998 /*
8999 * Wrappers for jbd2_journal_start/end.
9000 *
9001 @@ -329,7 +397,8 @@ void ext4_abort(struct super_block *sb,
9002 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
9003 sb->s_flags |= MS_RDONLY;
9004 EXT4_SB(sb)->s_mount_opt |= EXT4_MOUNT_ABORT;
9005 - jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO);
9006 + if (EXT4_SB(sb)->s_journal)
9007 + jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO);
9008 }
9009
9010 void ext4_warning(struct super_block *sb, const char *function,
9011 @@ -345,6 +414,44 @@ void ext4_warning(struct super_block *sb
9012 va_end(args);
9013 }
9014
9015 +void ext4_grp_locked_error(struct super_block *sb, ext4_group_t grp,
9016 + const char *function, const char *fmt, ...)
9017 +__releases(bitlock)
9018 +__acquires(bitlock)
9019 +{
9020 + va_list args;
9021 + struct ext4_super_block *es = EXT4_SB(sb)->s_es;
9022 +
9023 + va_start(args, fmt);
9024 + printk(KERN_CRIT "EXT4-fs error (device %s): %s: ", sb->s_id, function);
9025 + vprintk(fmt, args);
9026 + printk("\n");
9027 + va_end(args);
9028 +
9029 + if (test_opt(sb, ERRORS_CONT)) {
9030 + EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
9031 + es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
9032 + ext4_commit_super(sb, es, 0);
9033 + return;
9034 + }
9035 + ext4_unlock_group(sb, grp);
9036 + ext4_handle_error(sb);
9037 + /*
9038 + * We only get here in the ERRORS_RO case; relocking the group
9039 + * may be dangerous, but nothing bad will happen since the
9040 + * filesystem will have already been marked read/only and the
9041 + * journal has been aborted. We return 1 as a hint to callers
9042 + * who might what to use the return value from
9043 + * ext4_grp_locked_error() to distinguish beween the
9044 + * ERRORS_CONT and ERRORS_RO case, and perhaps return more
9045 + * aggressively from the ext4 function in question, with a
9046 + * more appropriate error code.
9047 + */
9048 + ext4_lock_group(sb, grp);
9049 + return;
9050 +}
9051 +
9052 +
9053 void ext4_update_dynamic_rev(struct super_block *sb)
9054 {
9055 struct ext4_super_block *es = EXT4_SB(sb)->s_es;
9056 @@ -370,66 +477,6 @@ void ext4_update_dynamic_rev(struct supe
9057 */
9058 }
9059
9060 -int ext4_update_compat_feature(handle_t *handle,
9061 - struct super_block *sb, __u32 compat)
9062 -{
9063 - int err = 0;
9064 - if (!EXT4_HAS_COMPAT_FEATURE(sb, compat)) {
9065 - err = ext4_journal_get_write_access(handle,
9066 - EXT4_SB(sb)->s_sbh);
9067 - if (err)
9068 - return err;
9069 - EXT4_SET_COMPAT_FEATURE(sb, compat);
9070 - sb->s_dirt = 1;
9071 - handle->h_sync = 1;
9072 - BUFFER_TRACE(EXT4_SB(sb)->s_sbh,
9073 - "call ext4_journal_dirty_met adata");
9074 - err = ext4_journal_dirty_metadata(handle,
9075 - EXT4_SB(sb)->s_sbh);
9076 - }
9077 - return err;
9078 -}
9079 -
9080 -int ext4_update_rocompat_feature(handle_t *handle,
9081 - struct super_block *sb, __u32 rocompat)
9082 -{
9083 - int err = 0;
9084 - if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, rocompat)) {
9085 - err = ext4_journal_get_write_access(handle,
9086 - EXT4_SB(sb)->s_sbh);
9087 - if (err)
9088 - return err;
9089 - EXT4_SET_RO_COMPAT_FEATURE(sb, rocompat);
9090 - sb->s_dirt = 1;
9091 - handle->h_sync = 1;
9092 - BUFFER_TRACE(EXT4_SB(sb)->s_sbh,
9093 - "call ext4_journal_dirty_met adata");
9094 - err = ext4_journal_dirty_metadata(handle,
9095 - EXT4_SB(sb)->s_sbh);
9096 - }
9097 - return err;
9098 -}
9099 -
9100 -int ext4_update_incompat_feature(handle_t *handle,
9101 - struct super_block *sb, __u32 incompat)
9102 -{
9103 - int err = 0;
9104 - if (!EXT4_HAS_INCOMPAT_FEATURE(sb, incompat)) {
9105 - err = ext4_journal_get_write_access(handle,
9106 - EXT4_SB(sb)->s_sbh);
9107 - if (err)
9108 - return err;
9109 - EXT4_SET_INCOMPAT_FEATURE(sb, incompat);
9110 - sb->s_dirt = 1;
9111 - handle->h_sync = 1;
9112 - BUFFER_TRACE(EXT4_SB(sb)->s_sbh,
9113 - "call ext4_journal_dirty_met adata");
9114 - err = ext4_journal_dirty_metadata(handle,
9115 - EXT4_SB(sb)->s_sbh);
9116 - }
9117 - return err;
9118 -}
9119 -
9120 /*
9121 * Open the external journal device
9122 */
9123 @@ -505,13 +552,16 @@ static void ext4_put_super(struct super_
9124 ext4_xattr_put_super(sb);
9125 jbd2_journal_destroy(sbi->s_journal);
9126 sbi->s_journal = NULL;
9127 +
9128 if (!(sb->s_flags & MS_RDONLY)) {
9129 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
9130 es->s_state = cpu_to_le16(sbi->s_mount_state);
9131 - BUFFER_TRACE(sbi->s_sbh, "marking dirty");
9132 - mark_buffer_dirty(sbi->s_sbh);
9133 ext4_commit_super(sb, es, 1);
9134 }
9135 + if (sbi->s_proc) {
9136 + remove_proc_entry("inode_readahead_blks", sbi->s_proc);
9137 + remove_proc_entry(sb->s_id, ext4_proc_root);
9138 + }
9139
9140 for (i = 0; i < sbi->s_gdb_count; i++)
9141 brelse(sbi->s_group_desc[i]);
9142 @@ -520,6 +570,7 @@ static void ext4_put_super(struct super_
9143 percpu_counter_destroy(&sbi->s_freeblocks_counter);
9144 percpu_counter_destroy(&sbi->s_freeinodes_counter);
9145 percpu_counter_destroy(&sbi->s_dirs_counter);
9146 + percpu_counter_destroy(&sbi->s_dirtyblocks_counter);
9147 brelse(sbi->s_sbh);
9148 #ifdef CONFIG_QUOTA
9149 for (i = 0; i < MAXQUOTAS; i++)
9150 @@ -562,11 +613,10 @@ static struct inode *ext4_alloc_inode(st
9151 ei = kmem_cache_alloc(ext4_inode_cachep, GFP_NOFS);
9152 if (!ei)
9153 return NULL;
9154 -#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL
9155 +#if defined(CONFIG_EXT4_FS_POSIX_ACL) || defined(CONFIG_EXT4DEV_FS_POSIX_ACL)
9156 ei->i_acl = EXT4_ACL_NOT_CACHED;
9157 ei->i_default_acl = EXT4_ACL_NOT_CACHED;
9158 #endif
9159 - ei->i_block_alloc_info = NULL;
9160 ei->vfs_inode.i_version = 1;
9161 ei->vfs_inode.i_data.writeback_index = 0;
9162 memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache));
9163 @@ -599,7 +649,7 @@ static void init_once(void *foo)
9164 struct ext4_inode_info *ei = (struct ext4_inode_info *) foo;
9165
9166 INIT_LIST_HEAD(&ei->i_orphan);
9167 -#ifdef CONFIG_EXT4DEV_FS_XATTR
9168 +#if defined(CONFIG_EXT4_FS_XATTR) || defined(CONFIG_EXT4DEV_FS_XATTR)
9169 init_rwsem(&ei->xattr_sem);
9170 #endif
9171 init_rwsem(&ei->i_data_sem);
9172 @@ -625,8 +675,7 @@ static void destroy_inodecache(void)
9173
9174 static void ext4_clear_inode(struct inode *inode)
9175 {
9176 - struct ext4_block_alloc_info *rsv = EXT4_I(inode)->i_block_alloc_info;
9177 -#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL
9178 +#if defined(CONFIG_EXT4_FS_POSIX_ACL) || defined(CONFIG_EXT4DEV_FS_POSIX_ACL)
9179 if (EXT4_I(inode)->i_acl &&
9180 EXT4_I(inode)->i_acl != EXT4_ACL_NOT_CACHED) {
9181 posix_acl_release(EXT4_I(inode)->i_acl);
9182 @@ -638,10 +687,7 @@ static void ext4_clear_inode(struct inod
9183 EXT4_I(inode)->i_default_acl = EXT4_ACL_NOT_CACHED;
9184 }
9185 #endif
9186 - ext4_discard_reservation(inode);
9187 - EXT4_I(inode)->i_block_alloc_info = NULL;
9188 - if (unlikely(rsv))
9189 - kfree(rsv);
9190 + ext4_discard_preallocations(inode);
9191 jbd2_journal_release_jbd_inode(EXT4_SB(inode->i_sb)->s_journal,
9192 &EXT4_I(inode)->jinode);
9193 }
9194 @@ -654,7 +700,7 @@ static inline void ext4_show_quota_optio
9195
9196 if (sbi->s_jquota_fmt)
9197 seq_printf(seq, ",jqfmt=%s",
9198 - (sbi->s_jquota_fmt == QFMT_VFS_OLD) ? "vfsold": "vfsv0");
9199 + (sbi->s_jquota_fmt == QFMT_VFS_OLD) ? "vfsold" : "vfsv0");
9200
9201 if (sbi->s_qf_names[USRQUOTA])
9202 seq_printf(seq, ",usrjquota=%s", sbi->s_qf_names[USRQUOTA]);
9203 @@ -718,7 +764,7 @@ static int ext4_show_options(struct seq_
9204 seq_puts(seq, ",debug");
9205 if (test_opt(sb, OLDALLOC))
9206 seq_puts(seq, ",oldalloc");
9207 -#ifdef CONFIG_EXT4DEV_FS_XATTR
9208 +#if defined(CONFIG_EXT4_FS_XATTR) || defined(CONFIG_EXT4DEV_FS_XATTR)
9209 if (test_opt(sb, XATTR_USER) &&
9210 !(def_mount_opts & EXT4_DEFM_XATTR_USER))
9211 seq_puts(seq, ",user_xattr");
9212 @@ -727,7 +773,7 @@ static int ext4_show_options(struct seq_
9213 seq_puts(seq, ",nouser_xattr");
9214 }
9215 #endif
9216 -#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL
9217 +#if defined(CONFIG_EXT4_FS_POSIX_ACL) || defined(CONFIG_EXT4DEV_FS_POSIX_ACL)
9218 if (test_opt(sb, POSIX_ACL) && !(def_mount_opts & EXT4_DEFM_ACL))
9219 seq_puts(seq, ",acl");
9220 if (!test_opt(sb, POSIX_ACL) && (def_mount_opts & EXT4_DEFM_ACL))
9221 @@ -752,8 +798,6 @@ static int ext4_show_options(struct seq_
9222 seq_puts(seq, ",nobh");
9223 if (!test_opt(sb, EXTENTS))
9224 seq_puts(seq, ",noextents");
9225 - if (!test_opt(sb, MBALLOC))
9226 - seq_puts(seq, ",nomballoc");
9227 if (test_opt(sb, I_VERSION))
9228 seq_puts(seq, ",i_version");
9229 if (!test_opt(sb, DELALLOC))
9230 @@ -773,6 +817,10 @@ static int ext4_show_options(struct seq_
9231 else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)
9232 seq_puts(seq, ",data=writeback");
9233
9234 + if (sbi->s_inode_readahead_blks != EXT4_DEF_INODE_READAHEAD_BLKS)
9235 + seq_printf(seq, ",inode_readahead_blks=%u",
9236 + sbi->s_inode_readahead_blks);
9237 +
9238 ext4_show_quota_options(seq, sb);
9239 return 0;
9240 }
9241 @@ -822,7 +870,7 @@ static struct dentry *ext4_fh_to_parent(
9242 }
9243
9244 #ifdef CONFIG_QUOTA
9245 -#define QTYPE2NAME(t) ((t) == USRQUOTA?"user":"group")
9246 +#define QTYPE2NAME(t) ((t) == USRQUOTA ? "user" : "group")
9247 #define QTYPE2MOPT(on, t) ((t) == USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA))
9248
9249 static int ext4_dquot_initialize(struct inode *inode, int type);
9250 @@ -896,7 +944,7 @@ static const struct export_operations ex
9251 enum {
9252 Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid,
9253 Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro,
9254 - Opt_nouid32, Opt_nocheck, Opt_debug, Opt_oldalloc, Opt_orlov,
9255 + Opt_nouid32, Opt_debug, Opt_oldalloc, Opt_orlov,
9256 Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl,
9257 Opt_reservation, Opt_noreservation, Opt_noload, Opt_nobh, Opt_bh,
9258 Opt_commit, Opt_journal_update, Opt_journal_inum, Opt_journal_dev,
9259 @@ -906,10 +954,11 @@ enum {
9260 Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota,
9261 Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota,
9262 Opt_grpquota, Opt_extents, Opt_noextents, Opt_i_version,
9263 - Opt_mballoc, Opt_nomballoc, Opt_stripe, Opt_delalloc, Opt_nodelalloc,
9264 + Opt_stripe, Opt_delalloc, Opt_nodelalloc,
9265 + Opt_inode_readahead_blks
9266 };
9267
9268 -static match_table_t tokens = {
9269 +static const match_table_t tokens = {
9270 {Opt_bsd_df, "bsddf"},
9271 {Opt_minix_df, "minixdf"},
9272 {Opt_grpid, "grpid"},
9273 @@ -923,8 +972,6 @@ static match_table_t tokens = {
9274 {Opt_err_panic, "errors=panic"},
9275 {Opt_err_ro, "errors=remount-ro"},
9276 {Opt_nouid32, "nouid32"},
9277 - {Opt_nocheck, "nocheck"},
9278 - {Opt_nocheck, "check=none"},
9279 {Opt_debug, "debug"},
9280 {Opt_oldalloc, "oldalloc"},
9281 {Opt_orlov, "orlov"},
9282 @@ -961,12 +1008,11 @@ static match_table_t tokens = {
9283 {Opt_extents, "extents"},
9284 {Opt_noextents, "noextents"},
9285 {Opt_i_version, "i_version"},
9286 - {Opt_mballoc, "mballoc"},
9287 - {Opt_nomballoc, "nomballoc"},
9288 {Opt_stripe, "stripe=%u"},
9289 {Opt_resize, "resize"},
9290 {Opt_delalloc, "delalloc"},
9291 {Opt_nodelalloc, "nodelalloc"},
9292 + {Opt_inode_readahead_blks, "inode_readahead_blks=%u"},
9293 {Opt_err, NULL},
9294 };
9295
9296 @@ -981,7 +1027,7 @@ static ext4_fsblk_t get_sb_block(void **
9297 /*todo: use simple_strtoll with >32bit ext4 */
9298 sb_block = simple_strtoul(options, &options, 0);
9299 if (*options && *options != ',') {
9300 - printk("EXT4-fs: Invalid sb specification: %s\n",
9301 + printk(KERN_ERR "EXT4-fs: Invalid sb specification: %s\n",
9302 (char *) *data);
9303 return 1;
9304 }
9305 @@ -1060,9 +1106,6 @@ static int parse_options(char *options,
9306 case Opt_nouid32:
9307 set_opt(sbi->s_mount_opt, NO_UID32);
9308 break;
9309 - case Opt_nocheck:
9310 - clear_opt(sbi->s_mount_opt, CHECK);
9311 - break;
9312 case Opt_debug:
9313 set_opt(sbi->s_mount_opt, DEBUG);
9314 break;
9315 @@ -1072,7 +1115,7 @@ static int parse_options(char *options,
9316 case Opt_orlov:
9317 clear_opt(sbi->s_mount_opt, OLDALLOC);
9318 break;
9319 -#ifdef CONFIG_EXT4DEV_FS_XATTR
9320 +#if defined(CONFIG_EXT4_FS_XATTR) || defined(CONFIG_EXT4DEV_FS_XATTR)
9321 case Opt_user_xattr:
9322 set_opt(sbi->s_mount_opt, XATTR_USER);
9323 break;
9324 @@ -1082,10 +1125,11 @@ static int parse_options(char *options,
9325 #else
9326 case Opt_user_xattr:
9327 case Opt_nouser_xattr:
9328 - printk("EXT4 (no)user_xattr options not supported\n");
9329 + printk(KERN_ERR "EXT4 (no)user_xattr options "
9330 + "not supported\n");
9331 break;
9332 #endif
9333 -#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL
9334 +#if defined(CONFIG_EXT4_FS_POSIX_ACL) || defined(CONFIG_EXT4DEV_FS_POSIX_ACL)
9335 case Opt_acl:
9336 set_opt(sbi->s_mount_opt, POSIX_ACL);
9337 break;
9338 @@ -1095,7 +1139,8 @@ static int parse_options(char *options,
9339 #else
9340 case Opt_acl:
9341 case Opt_noacl:
9342 - printk("EXT4 (no)acl options not supported\n");
9343 + printk(KERN_ERR "EXT4 (no)acl options "
9344 + "not supported\n");
9345 break;
9346 #endif
9347 case Opt_reservation:
9348 @@ -1185,12 +1230,11 @@ static int parse_options(char *options,
9349 case Opt_grpjquota:
9350 qtype = GRPQUOTA;
9351 set_qf_name:
9352 - if ((sb_any_quota_enabled(sb) ||
9353 - sb_any_quota_suspended(sb)) &&
9354 + if (sb_any_quota_loaded(sb) &&
9355 !sbi->s_qf_names[qtype]) {
9356 printk(KERN_ERR
9357 - "EXT4-fs: Cannot change journaled "
9358 - "quota options when quota turned on.\n");
9359 + "EXT4-fs: Cannot change journaled "
9360 + "quota options when quota turned on.\n");
9361 return 0;
9362 }
9363 qname = match_strdup(&args[0]);
9364 @@ -1225,8 +1269,7 @@ set_qf_name:
9365 case Opt_offgrpjquota:
9366 qtype = GRPQUOTA;
9367 clear_qf_name:
9368 - if ((sb_any_quota_enabled(sb) ||
9369 - sb_any_quota_suspended(sb)) &&
9370 + if (sb_any_quota_loaded(sb) &&
9371 sbi->s_qf_names[qtype]) {
9372 printk(KERN_ERR "EXT4-fs: Cannot change "
9373 "journaled quota options when "
9374 @@ -1245,8 +1288,7 @@ clear_qf_name:
9375 case Opt_jqfmt_vfsv0:
9376 qfmt = QFMT_VFS_V0;
9377 set_qf_format:
9378 - if ((sb_any_quota_enabled(sb) ||
9379 - sb_any_quota_suspended(sb)) &&
9380 + if (sb_any_quota_loaded(sb) &&
9381 sbi->s_jquota_fmt != qfmt) {
9382 printk(KERN_ERR "EXT4-fs: Cannot change "
9383 "journaled quota options when "
9384 @@ -1265,7 +1307,7 @@ set_qf_format:
9385 set_opt(sbi->s_mount_opt, GRPQUOTA);
9386 break;
9387 case Opt_noquota:
9388 - if (sb_any_quota_enabled(sb)) {
9389 + if (sb_any_quota_loaded(sb)) {
9390 printk(KERN_ERR "EXT4-fs: Cannot change quota "
9391 "options when quota turned on.\n");
9392 return 0;
9393 @@ -1357,12 +1399,6 @@ set_qf_format:
9394 case Opt_nodelalloc:
9395 clear_opt(sbi->s_mount_opt, DELALLOC);
9396 break;
9397 - case Opt_mballoc:
9398 - set_opt(sbi->s_mount_opt, MBALLOC);
9399 - break;
9400 - case Opt_nomballoc:
9401 - clear_opt(sbi->s_mount_opt, MBALLOC);
9402 - break;
9403 case Opt_stripe:
9404 if (match_int(&args[0], &option))
9405 return 0;
9406 @@ -1373,6 +1409,13 @@ set_qf_format:
9407 case Opt_delalloc:
9408 set_opt(sbi->s_mount_opt, DELALLOC);
9409 break;
9410 + case Opt_inode_readahead_blks:
9411 + if (match_int(&args[0], &option))
9412 + return 0;
9413 + if (option < 0 || option > (1 << 30))
9414 + return 0;
9415 + sbi->s_inode_readahead_blks = option;
9416 + break;
9417 default:
9418 printk(KERN_ERR
9419 "EXT4-fs: Unrecognized mount option \"%s\" "
9420 @@ -1465,7 +1508,7 @@ static int ext4_setup_super(struct super
9421
9422 ext4_commit_super(sb, es, 1);
9423 if (test_opt(sb, DEBUG))
9424 - printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%lu, "
9425 + printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%u, "
9426 "bpg=%lu, ipg=%lu, mo=%04lx]\n",
9427 sb->s_blocksize,
9428 sbi->s_groups_count,
9429 @@ -1473,14 +1516,14 @@ static int ext4_setup_super(struct super
9430 EXT4_INODES_PER_GROUP(sb),
9431 sbi->s_mount_opt);
9432
9433 - printk(KERN_INFO "EXT4 FS on %s, ", sb->s_id);
9434 if (EXT4_SB(sb)->s_journal->j_inode == NULL) {
9435 char b[BDEVNAME_SIZE];
9436
9437 - printk("external journal on %s\n",
9438 - bdevname(EXT4_SB(sb)->s_journal->j_dev, b));
9439 + printk(KERN_INFO "EXT4 FS on %s, external journal on %s\n",
9440 + sb->s_id, bdevname(EXT4_SB(sb)->s_journal->j_dev, b));
9441 } else {
9442 - printk("internal journal\n");
9443 + printk(KERN_INFO "EXT4 FS on %s, internal journal\n",
9444 + sb->s_id);
9445 }
9446 return res;
9447 }
9448 @@ -1493,7 +1536,6 @@ static int ext4_fill_flex_info(struct su
9449 ext4_group_t flex_group_count;
9450 ext4_group_t flex_group;
9451 int groups_per_flex = 0;
9452 - __u64 block_bitmap = 0;
9453 int i;
9454
9455 if (!sbi->s_es->s_log_groups_per_flex) {
9456 @@ -1512,21 +1554,18 @@ static int ext4_fill_flex_info(struct su
9457 sizeof(struct flex_groups), GFP_KERNEL);
9458 if (sbi->s_flex_groups == NULL) {
9459 printk(KERN_ERR "EXT4-fs: not enough memory for "
9460 - "%lu flex groups\n", flex_group_count);
9461 + "%u flex groups\n", flex_group_count);
9462 goto failed;
9463 }
9464
9465 - gdp = ext4_get_group_desc(sb, 1, &bh);
9466 - block_bitmap = ext4_block_bitmap(sb, gdp) - 1;
9467 -
9468 for (i = 0; i < sbi->s_groups_count; i++) {
9469 gdp = ext4_get_group_desc(sb, i, &bh);
9470
9471 flex_group = ext4_flex_group(sbi, i);
9472 sbi->s_flex_groups[flex_group].free_inodes +=
9473 - le16_to_cpu(gdp->bg_free_inodes_count);
9474 + ext4_free_inodes_count(sb, gdp);
9475 sbi->s_flex_groups[flex_group].free_blocks +=
9476 - le16_to_cpu(gdp->bg_free_blocks_count);
9477 + ext4_free_blks_count(sb, gdp);
9478 }
9479
9480 return 1;
9481 @@ -1586,7 +1625,7 @@ static int ext4_check_descriptors(struct
9482 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG))
9483 flexbg_flag = 1;
9484
9485 - ext4_debug ("Checking group descriptors");
9486 + ext4_debug("Checking group descriptors");
9487
9488 for (i = 0; i < sbi->s_groups_count; i++) {
9489 struct ext4_group_desc *gdp = ext4_get_group_desc(sb, i, NULL);
9490 @@ -1600,29 +1639,29 @@ static int ext4_check_descriptors(struct
9491 block_bitmap = ext4_block_bitmap(sb, gdp);
9492 if (block_bitmap < first_block || block_bitmap > last_block) {
9493 printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
9494 - "Block bitmap for group %lu not in group "
9495 - "(block %llu)!", i, block_bitmap);
9496 + "Block bitmap for group %u not in group "
9497 + "(block %llu)!\n", i, block_bitmap);
9498 return 0;
9499 }
9500 inode_bitmap = ext4_inode_bitmap(sb, gdp);
9501 if (inode_bitmap < first_block || inode_bitmap > last_block) {
9502 printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
9503 - "Inode bitmap for group %lu not in group "
9504 - "(block %llu)!", i, inode_bitmap);
9505 + "Inode bitmap for group %u not in group "
9506 + "(block %llu)!\n", i, inode_bitmap);
9507 return 0;
9508 }
9509 inode_table = ext4_inode_table(sb, gdp);
9510 if (inode_table < first_block ||
9511 inode_table + sbi->s_itb_per_group - 1 > last_block) {
9512 printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
9513 - "Inode table for group %lu not in group "
9514 - "(block %llu)!", i, inode_table);
9515 + "Inode table for group %u not in group "
9516 + "(block %llu)!\n", i, inode_table);
9517 return 0;
9518 }
9519 spin_lock(sb_bgl_lock(sbi, i));
9520 if (!ext4_group_desc_csum_verify(sbi, i, gdp)) {
9521 printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
9522 - "Checksum for group %lu failed (%u!=%u)\n",
9523 + "Checksum for group %u failed (%u!=%u)\n",
9524 i, le16_to_cpu(ext4_group_desc_csum(sbi, i,
9525 gdp)), le16_to_cpu(gdp->bg_checksum));
9526 if (!(sb->s_flags & MS_RDONLY)) {
9527 @@ -1718,9 +1757,9 @@ static void ext4_orphan_cleanup(struct s
9528 DQUOT_INIT(inode);
9529 if (inode->i_nlink) {
9530 printk(KERN_DEBUG
9531 - "%s: truncating inode %lu to %Ld bytes\n",
9532 + "%s: truncating inode %lu to %lld bytes\n",
9533 __func__, inode->i_ino, inode->i_size);
9534 - jbd_debug(2, "truncating inode %lu to %Ld bytes\n",
9535 + jbd_debug(2, "truncating inode %lu to %lld bytes\n",
9536 inode->i_ino, inode->i_size);
9537 ext4_truncate(inode);
9538 nr_truncates++;
9539 @@ -1761,13 +1800,13 @@ static void ext4_orphan_cleanup(struct s
9540 *
9541 * Note, this does *not* consider any metadata overhead for vfs i_blocks.
9542 */
9543 -static loff_t ext4_max_size(int blkbits)
9544 +static loff_t ext4_max_size(int blkbits, int has_huge_files)
9545 {
9546 loff_t res;
9547 loff_t upper_limit = MAX_LFS_FILESIZE;
9548
9549 /* small i_blocks in vfs inode? */
9550 - if (sizeof(blkcnt_t) < sizeof(u64)) {
9551 + if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) {
9552 /*
9553 * CONFIG_LSF is not enabled implies the inode
9554 * i_block represent total blocks in 512 bytes
9555 @@ -1797,7 +1836,7 @@ static loff_t ext4_max_size(int blkbits)
9556 * block limit, and also a limit of (2^48 - 1) 512-byte sectors in i_blocks.
9557 * We need to be 1 filesystem block less than the 2^48 sector limit.
9558 */
9559 -static loff_t ext4_max_bitmap_size(int bits)
9560 +static loff_t ext4_max_bitmap_size(int bits, int has_huge_files)
9561 {
9562 loff_t res = EXT4_NDIR_BLOCKS;
9563 int meta_blocks;
9564 @@ -1810,11 +1849,11 @@ static loff_t ext4_max_bitmap_size(int b
9565 * total number of 512 bytes blocks of the file
9566 */
9567
9568 - if (sizeof(blkcnt_t) < sizeof(u64)) {
9569 + if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) {
9570 /*
9571 - * CONFIG_LSF is not enabled implies the inode
9572 - * i_block represent total blocks in 512 bytes
9573 - * 32 == size of vfs inode i_blocks * 8
9574 + * !has_huge_files or CONFIG_LSF is not enabled
9575 + * implies the inode i_block represent total blocks in
9576 + * 512 bytes 32 == size of vfs inode i_blocks * 8
9577 */
9578 upper_limit = (1LL << 32) - 1;
9579
9580 @@ -1918,12 +1957,13 @@ static int ext4_fill_super(struct super_
9581 unsigned long journal_devnum = 0;
9582 unsigned long def_mount_opts;
9583 struct inode *root;
9584 + char *cp;
9585 int ret = -EINVAL;
9586 int blocksize;
9587 int db_count;
9588 int i;
9589 - int needs_recovery;
9590 - __le32 features;
9591 + int needs_recovery, has_huge_files;
9592 + int features;
9593 __u64 blocks_count;
9594 int err;
9595
9596 @@ -1934,10 +1974,15 @@ static int ext4_fill_super(struct super_
9597 sbi->s_mount_opt = 0;
9598 sbi->s_resuid = EXT4_DEF_RESUID;
9599 sbi->s_resgid = EXT4_DEF_RESGID;
9600 + sbi->s_inode_readahead_blks = EXT4_DEF_INODE_READAHEAD_BLKS;
9601 sbi->s_sb_block = sb_block;
9602
9603 unlock_kernel();
9604
9605 + /* Cleanup superblock name */
9606 + for (cp = sb->s_id; (cp = strchr(cp, '/'));)
9607 + *cp = '!';
9608 +
9609 blocksize = sb_min_blocksize(sb, EXT4_MIN_BLOCK_SIZE);
9610 if (!blocksize) {
9611 printk(KERN_ERR "EXT4-fs: unable to set blocksize\n");
9612 @@ -1977,11 +2022,11 @@ static int ext4_fill_super(struct super_
9613 set_opt(sbi->s_mount_opt, GRPID);
9614 if (def_mount_opts & EXT4_DEFM_UID16)
9615 set_opt(sbi->s_mount_opt, NO_UID32);
9616 -#ifdef CONFIG_EXT4DEV_FS_XATTR
9617 +#if defined(CONFIG_EXT4_FS_XATTR) || defined(CONFIG_EXT4DEV_FS_XATTR)
9618 if (def_mount_opts & EXT4_DEFM_XATTR_USER)
9619 set_opt(sbi->s_mount_opt, XATTR_USER);
9620 #endif
9621 -#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL
9622 +#if defined(CONFIG_EXT4_FS_POSIX_ACL) || defined(CONFIG_EXT4DEV_FS_POSIX_ACL)
9623 if (def_mount_opts & EXT4_DEFM_ACL)
9624 set_opt(sbi->s_mount_opt, POSIX_ACL);
9625 #endif
9626 @@ -2016,11 +2061,6 @@ static int ext4_fill_super(struct super_
9627 ext4_warning(sb, __func__,
9628 "extents feature not enabled on this filesystem, "
9629 "use tune2fs.\n");
9630 - /*
9631 - * turn on mballoc code by default in ext4 filesystem
9632 - * Use -o nomballoc to turn it off
9633 - */
9634 - set_opt(sbi->s_mount_opt, MBALLOC);
9635
9636 /*
9637 * enable delayed allocation by default
9638 @@ -2045,16 +2085,6 @@ static int ext4_fill_super(struct super_
9639 "running e2fsck is recommended\n");
9640
9641 /*
9642 - * Since ext4 is still considered development code, we require
9643 - * that the TEST_FILESYS flag in s->flags be set.
9644 - */
9645 - if (!(le32_to_cpu(es->s_flags) & EXT2_FLAGS_TEST_FILESYS)) {
9646 - printk(KERN_WARNING "EXT4-fs: %s: not marked "
9647 - "OK to use with test code.\n", sb->s_id);
9648 - goto failed_mount;
9649 - }
9650 -
9651 - /*
9652 * Check feature flags regardless of the revision level, since we
9653 * previously didn't change the revision level when setting the flags,
9654 * so there is a chance incompat flags are set on a rev 0 filesystem.
9655 @@ -2062,18 +2092,22 @@ static int ext4_fill_super(struct super_
9656 features = EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT4_FEATURE_INCOMPAT_SUPP);
9657 if (features) {
9658 printk(KERN_ERR "EXT4-fs: %s: couldn't mount because of "
9659 - "unsupported optional features (%x).\n",
9660 - sb->s_id, le32_to_cpu(features));
9661 + "unsupported optional features (%x).\n", sb->s_id,
9662 + (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_incompat) &
9663 + ~EXT4_FEATURE_INCOMPAT_SUPP));
9664 goto failed_mount;
9665 }
9666 features = EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT4_FEATURE_RO_COMPAT_SUPP);
9667 if (!(sb->s_flags & MS_RDONLY) && features) {
9668 printk(KERN_ERR "EXT4-fs: %s: couldn't mount RDWR because of "
9669 - "unsupported optional features (%x).\n",
9670 - sb->s_id, le32_to_cpu(features));
9671 + "unsupported optional features (%x).\n", sb->s_id,
9672 + (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_ro_compat) &
9673 + ~EXT4_FEATURE_RO_COMPAT_SUPP));
9674 goto failed_mount;
9675 }
9676 - if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_HUGE_FILE)) {
9677 + has_huge_files = EXT4_HAS_RO_COMPAT_FEATURE(sb,
9678 + EXT4_FEATURE_RO_COMPAT_HUGE_FILE);
9679 + if (has_huge_files) {
9680 /*
9681 * Large file size enabled file system can only be
9682 * mount if kernel is build with CONFIG_LSF
9683 @@ -2123,8 +2157,9 @@ static int ext4_fill_super(struct super_
9684 }
9685 }
9686
9687 - sbi->s_bitmap_maxbytes = ext4_max_bitmap_size(sb->s_blocksize_bits);
9688 - sb->s_maxbytes = ext4_max_size(sb->s_blocksize_bits);
9689 + sbi->s_bitmap_maxbytes = ext4_max_bitmap_size(sb->s_blocksize_bits,
9690 + has_huge_files);
9691 + sb->s_maxbytes = ext4_max_size(sb->s_blocksize_bits, has_huge_files);
9692
9693 if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV) {
9694 sbi->s_inode_size = EXT4_GOOD_OLD_INODE_SIZE;
9695 @@ -2172,6 +2207,18 @@ static int ext4_fill_super(struct super_
9696 for (i = 0; i < 4; i++)
9697 sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]);
9698 sbi->s_def_hash_version = es->s_def_hash_version;
9699 + i = le32_to_cpu(es->s_flags);
9700 + if (i & EXT2_FLAGS_UNSIGNED_HASH)
9701 + sbi->s_hash_unsigned = 3;
9702 + else if ((i & EXT2_FLAGS_SIGNED_HASH) == 0) {
9703 +#ifdef __CHAR_UNSIGNED__
9704 + es->s_flags |= cpu_to_le32(EXT2_FLAGS_UNSIGNED_HASH);
9705 + sbi->s_hash_unsigned = 3;
9706 +#else
9707 + es->s_flags |= cpu_to_le32(EXT2_FLAGS_SIGNED_HASH);
9708 +#endif
9709 + sb->s_dirt = 1;
9710 + }
9711
9712 if (sbi->s_blocks_per_group > blocksize * 8) {
9713 printk(KERN_ERR
9714 @@ -2223,6 +2270,16 @@ static int ext4_fill_super(struct super_
9715 goto failed_mount;
9716 }
9717
9718 +#ifdef CONFIG_PROC_FS
9719 + if (ext4_proc_root)
9720 + sbi->s_proc = proc_mkdir(sb->s_id, ext4_proc_root);
9721 +
9722 + if (sbi->s_proc)
9723 + proc_create_data("inode_readahead_blks", 0644, sbi->s_proc,
9724 + &ext4_ui_proc_fops,
9725 + &sbi->s_inode_readahead_blks);
9726 +#endif
9727 +
9728 bgl_lock_init(&sbi->s_blockgroup_lock);
9729
9730 for (i = 0; i < db_count; i++) {
9731 @@ -2261,24 +2318,14 @@ static int ext4_fill_super(struct super_
9732 err = percpu_counter_init(&sbi->s_dirs_counter,
9733 ext4_count_dirs(sb));
9734 }
9735 + if (!err) {
9736 + err = percpu_counter_init(&sbi->s_dirtyblocks_counter, 0);
9737 + }
9738 if (err) {
9739 printk(KERN_ERR "EXT4-fs: insufficient memory\n");
9740 goto failed_mount3;
9741 }
9742
9743 - /* per fileystem reservation list head & lock */
9744 - spin_lock_init(&sbi->s_rsv_window_lock);
9745 - sbi->s_rsv_window_root = RB_ROOT;
9746 - /* Add a single, static dummy reservation to the start of the
9747 - * reservation window list --- it gives us a placeholder for
9748 - * append-at-start-of-list which makes the allocation logic
9749 - * _much_ simpler. */
9750 - sbi->s_rsv_window_head.rsv_start = EXT4_RESERVE_WINDOW_NOT_ALLOCATED;
9751 - sbi->s_rsv_window_head.rsv_end = EXT4_RESERVE_WINDOW_NOT_ALLOCATED;
9752 - sbi->s_rsv_window_head.rsv_alloc_hit = 0;
9753 - sbi->s_rsv_window_head.rsv_goal_size = 0;
9754 - ext4_rsv_window_add(sb, &sbi->s_rsv_window_head);
9755 -
9756 sbi->s_stripe = ext4_get_stripe_size(sbi);
9757
9758 /*
9759 @@ -2498,11 +2545,16 @@ failed_mount3:
9760 percpu_counter_destroy(&sbi->s_freeblocks_counter);
9761 percpu_counter_destroy(&sbi->s_freeinodes_counter);
9762 percpu_counter_destroy(&sbi->s_dirs_counter);
9763 + percpu_counter_destroy(&sbi->s_dirtyblocks_counter);
9764 failed_mount2:
9765 for (i = 0; i < db_count; i++)
9766 brelse(sbi->s_group_desc[i]);
9767 kfree(sbi->s_group_desc);
9768 failed_mount:
9769 + if (sbi->s_proc) {
9770 + remove_proc_entry("inode_readahead_blks", sbi->s_proc);
9771 + remove_proc_entry(sb->s_id, ext4_proc_root);
9772 + }
9773 #ifdef CONFIG_QUOTA
9774 for (i = 0; i < MAXQUOTAS; i++)
9775 kfree(sbi->s_qf_names[i]);
9776 @@ -2561,7 +2613,7 @@ static journal_t *ext4_get_journal(struc
9777 return NULL;
9778 }
9779
9780 - jbd_debug(2, "Journal inode found at %p: %Ld bytes\n",
9781 + jbd_debug(2, "Journal inode found at %p: %lld bytes\n",
9782 journal_inode, journal_inode->i_size);
9783 if (!S_ISREG(journal_inode->i_mode)) {
9784 printk(KERN_ERR "EXT4-fs: invalid journal inode.\n");
9785 @@ -2724,6 +2776,11 @@ static int ext4_load_journal(struct supe
9786 return -EINVAL;
9787 }
9788
9789 + if (journal->j_flags & JBD2_BARRIER)
9790 + printk(KERN_INFO "EXT4-fs: barriers enabled\n");
9791 + else
9792 + printk(KERN_INFO "EXT4-fs: barriers disabled\n");
9793 +
9794 if (!really_read_only && test_opt(sb, UPDATE_JOURNAL)) {
9795 err = jbd2_journal_update_format(journal);
9796 if (err) {
9797 @@ -2823,8 +2880,11 @@ static void ext4_commit_super(struct sup
9798 set_buffer_uptodate(sbh);
9799 }
9800 es->s_wtime = cpu_to_le32(get_seconds());
9801 - ext4_free_blocks_count_set(es, ext4_count_free_blocks(sb));
9802 - es->s_free_inodes_count = cpu_to_le32(ext4_count_free_inodes(sb));
9803 + ext4_free_blocks_count_set(es, percpu_counter_sum_positive(
9804 + &EXT4_SB(sb)->s_freeblocks_counter));
9805 + es->s_free_inodes_count = cpu_to_le32(percpu_counter_sum_positive(
9806 + &EXT4_SB(sb)->s_freeinodes_counter));
9807 +
9808 BUFFER_TRACE(sbh, "marking dirty");
9809 mark_buffer_dirty(sbh);
9810 if (sync) {
9811 @@ -2850,7 +2910,9 @@ static void ext4_mark_recovery_complete(
9812 journal_t *journal = EXT4_SB(sb)->s_journal;
9813
9814 jbd2_journal_lock_updates(journal);
9815 - jbd2_journal_flush(journal);
9816 + if (jbd2_journal_flush(journal) < 0)
9817 + goto out;
9818 +
9819 lock_super(sb);
9820 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER) &&
9821 sb->s_flags & MS_RDONLY) {
9822 @@ -2859,6 +2921,8 @@ static void ext4_mark_recovery_complete(
9823 ext4_commit_super(sb, es, 1);
9824 }
9825 unlock_super(sb);
9826 +
9827 +out:
9828 jbd2_journal_unlock_updates(journal);
9829 }
9830
9831 @@ -2934,6 +2998,7 @@ static int ext4_sync_fs(struct super_blo
9832 {
9833 int ret = 0;
9834
9835 + trace_mark(ext4_sync_fs, "dev %s wait %d", sb->s_id, wait);
9836 sb->s_dirt = 0;
9837 if (wait)
9838 ret = ext4_force_commit(sb);
9839 @@ -2955,7 +3020,13 @@ static void ext4_write_super_lockfs(stru
9840
9841 /* Now we set up the journal barrier. */
9842 jbd2_journal_lock_updates(journal);
9843 - jbd2_journal_flush(journal);
9844 +
9845 + /*
9846 + * We don't want to clear needs_recovery flag when we failed
9847 + * to flush the journal.
9848 + */
9849 + if (jbd2_journal_flush(journal) < 0)
9850 + return;
9851
9852 /* Journal blocked and flushed, clear needs_recovery flag. */
9853 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
9854 @@ -3053,13 +3124,14 @@ static int ext4_remount(struct super_blo
9855 ext4_mark_recovery_complete(sb, es);
9856 lock_super(sb);
9857 } else {
9858 - __le32 ret;
9859 + int ret;
9860 if ((ret = EXT4_HAS_RO_COMPAT_FEATURE(sb,
9861 ~EXT4_FEATURE_RO_COMPAT_SUPP))) {
9862 printk(KERN_WARNING "EXT4-fs: %s: couldn't "
9863 "remount RDWR because of unsupported "
9864 - "optional features (%x).\n",
9865 - sb->s_id, le32_to_cpu(ret));
9866 + "optional features (%x).\n", sb->s_id,
9867 + (le32_to_cpu(sbi->s_es->s_feature_ro_compat) &
9868 + ~EXT4_FEATURE_RO_COMPAT_SUPP));
9869 err = -EROFS;
9870 goto restore_opts;
9871 }
9872 @@ -3076,7 +3148,7 @@ static int ext4_remount(struct super_blo
9873 if (!ext4_group_desc_csum_verify(sbi, g, gdp)) {
9874 printk(KERN_ERR
9875 "EXT4-fs: ext4_remount: "
9876 - "Checksum for group %lu failed (%u!=%u)\n",
9877 + "Checksum for group %u failed (%u!=%u)\n",
9878 g, le16_to_cpu(ext4_group_desc_csum(sbi, g, gdp)),
9879 le16_to_cpu(gdp->bg_checksum));
9880 err = -EINVAL;
9881 @@ -3189,7 +3261,8 @@ static int ext4_statfs(struct dentry *de
9882 buf->f_type = EXT4_SUPER_MAGIC;
9883 buf->f_bsize = sb->s_blocksize;
9884 buf->f_blocks = ext4_blocks_count(es) - sbi->s_overhead_last;
9885 - buf->f_bfree = percpu_counter_sum_positive(&sbi->s_freeblocks_counter);
9886 + buf->f_bfree = percpu_counter_sum_positive(&sbi->s_freeblocks_counter) -
9887 + percpu_counter_sum_positive(&sbi->s_dirtyblocks_counter);
9888 ext4_free_blocks_count_set(es, buf->f_bfree);
9889 buf->f_bavail = buf->f_bfree - ext4_r_blocks_count(es);
9890 if (buf->f_bfree < ext4_r_blocks_count(es))
9891 @@ -3394,8 +3467,12 @@ static int ext4_quota_on(struct super_bl
9892 * otherwise be livelocked...
9893 */
9894 jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
9895 - jbd2_journal_flush(EXT4_SB(sb)->s_journal);
9896 + err = jbd2_journal_flush(EXT4_SB(sb)->s_journal);
9897 jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
9898 + if (err) {
9899 + path_put(&nd.path);
9900 + return err;
9901 + }
9902 }
9903
9904 err = vfs_quota_on_path(sb, type, format_id, &nd.path);
9905 @@ -3459,7 +3536,7 @@ static ssize_t ext4_quota_write(struct s
9906 handle_t *handle = journal_current_handle();
9907
9908 if (!handle) {
9909 - printk(KERN_WARNING "EXT4-fs: Quota write (off=%Lu, len=%Lu)"
9910 + printk(KERN_WARNING "EXT4-fs: Quota write (off=%llu, len=%llu)"
9911 " cancelled because transaction is not started.\n",
9912 (unsigned long long)off, (unsigned long long)len);
9913 return -EIO;
9914 @@ -3520,18 +3597,73 @@ static int ext4_get_sb(struct file_syste
9915 return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super, mnt);
9916 }
9917
9918 +#ifdef CONFIG_PROC_FS
9919 +static int ext4_ui_proc_show(struct seq_file *m, void *v)
9920 +{
9921 + unsigned int *p = m->private;
9922 +
9923 + seq_printf(m, "%u\n", *p);
9924 + return 0;
9925 +}
9926 +
9927 +static int ext4_ui_proc_open(struct inode *inode, struct file *file)
9928 +{
9929 + return single_open(file, ext4_ui_proc_show, PDE(inode)->data);
9930 +}
9931 +
9932 +static ssize_t ext4_ui_proc_write(struct file *file, const char __user *buf,
9933 + size_t cnt, loff_t *ppos)
9934 +{
9935 + unsigned long *p = PDE(file->f_path.dentry->d_inode)->data;
9936 + char str[32];
9937 +
9938 + if (cnt >= sizeof(str))
9939 + return -EINVAL;
9940 + if (copy_from_user(str, buf, cnt))
9941 + return -EFAULT;
9942 +
9943 + *p = simple_strtoul(str, NULL, 0);
9944 + return cnt;
9945 +}
9946 +
9947 +const struct file_operations ext4_ui_proc_fops = {
9948 + .owner = THIS_MODULE,
9949 + .open = ext4_ui_proc_open,
9950 + .read = seq_read,
9951 + .llseek = seq_lseek,
9952 + .release = single_release,
9953 + .write = ext4_ui_proc_write,
9954 +};
9955 +#endif
9956 +
9957 +static struct file_system_type ext4_fs_type = {
9958 + .owner = THIS_MODULE,
9959 + .name = "ext4",
9960 + .get_sb = ext4_get_sb,
9961 + .kill_sb = kill_block_super,
9962 + .fs_flags = FS_REQUIRES_DEV,
9963 +};
9964 +
9965 +static int ext4dev_get_sb(struct file_system_type *fs_type,
9966 + int flags, const char *dev_name, void *data, struct vfsmount *mnt)
9967 +{
9968 + return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super, mnt);
9969 +}
9970 +
9971 static struct file_system_type ext4dev_fs_type = {
9972 .owner = THIS_MODULE,
9973 .name = "ext4dev",
9974 - .get_sb = ext4_get_sb,
9975 + .get_sb = ext4dev_get_sb,
9976 .kill_sb = kill_block_super,
9977 .fs_flags = FS_REQUIRES_DEV,
9978 };
9979 +MODULE_ALIAS("ext4dev");
9980
9981 static int __init init_ext4_fs(void)
9982 {
9983 int err;
9984
9985 + ext4_proc_root = proc_mkdir("fs/ext4", NULL);
9986 err = init_ext4_mballoc();
9987 if (err)
9988 return err;
9989 @@ -3542,9 +3674,14 @@ static int __init init_ext4_fs(void)
9990 err = init_inodecache();
9991 if (err)
9992 goto out1;
9993 - err = register_filesystem(&ext4dev_fs_type);
9994 + err = register_filesystem(&ext4_fs_type);
9995 if (err)
9996 goto out;
9997 + err = register_filesystem(&ext4dev_fs_type);
9998 + if (err) {
9999 + unregister_filesystem(&ext4_fs_type);
10000 + goto out;
10001 + }
10002 return 0;
10003 out:
10004 destroy_inodecache();
10005 @@ -3557,10 +3694,12 @@ out2:
10006
10007 static void __exit exit_ext4_fs(void)
10008 {
10009 + unregister_filesystem(&ext4_fs_type);
10010 unregister_filesystem(&ext4dev_fs_type);
10011 destroy_inodecache();
10012 exit_ext4_xattr();
10013 exit_ext4_mballoc();
10014 + remove_proc_entry("fs/ext4", NULL);
10015 }
10016
10017 MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others");
10018 diff -rup b/fs/ext4//symlink.c a/fs/ext4///symlink.c
10019 --- b/fs/ext4/symlink.c 2009-02-11 14:37:58.000000000 +0100
10020 +++ a/fs/ext4/symlink.c 2009-02-10 21:40:11.000000000 +0100
10021 @@ -23,10 +23,10 @@
10022 #include "ext4.h"
10023 #include "xattr.h"
10024
10025 -static void * ext4_follow_link(struct dentry *dentry, struct nameidata *nd)
10026 +static void *ext4_follow_link(struct dentry *dentry, struct nameidata *nd)
10027 {
10028 struct ext4_inode_info *ei = EXT4_I(dentry->d_inode);
10029 - nd_set_link(nd, (char*)ei->i_data);
10030 + nd_set_link(nd, (char *) ei->i_data);
10031 return NULL;
10032 }
10033
10034 @@ -34,7 +34,7 @@ const struct inode_operations ext4_symli
10035 .readlink = generic_readlink,
10036 .follow_link = page_follow_link_light,
10037 .put_link = page_put_link,
10038 -#ifdef CONFIG_EXT4DEV_FS_XATTR
10039 +#if defined(CONFIG_EXT4_FS_XATTR) || defined(CONFIG_EXT4DEV_FS_XATTR)
10040 .setxattr = generic_setxattr,
10041 .getxattr = generic_getxattr,
10042 .listxattr = ext4_listxattr,
10043 @@ -45,7 +45,7 @@ const struct inode_operations ext4_symli
10044 const struct inode_operations ext4_fast_symlink_inode_operations = {
10045 .readlink = generic_readlink,
10046 .follow_link = ext4_follow_link,
10047 -#ifdef CONFIG_EXT4DEV_FS_XATTR
10048 +#if defined(CONFIG_EXT4_FS_XATTR) || defined(CONFIG_EXT4DEV_FS_XATTR)
10049 .setxattr = generic_setxattr,
10050 .getxattr = generic_getxattr,
10051 .listxattr = ext4_listxattr,
10052 diff -rup b/fs/ext4//xattr.c a/fs/ext4///xattr.c
10053 --- b/fs/ext4/xattr.c 2009-02-11 14:37:58.000000000 +0100
10054 +++ a/fs/ext4/xattr.c 2009-02-10 21:40:11.000000000 +0100
10055 @@ -99,12 +99,12 @@ static struct mb_cache *ext4_xattr_cache
10056
10057 static struct xattr_handler *ext4_xattr_handler_map[] = {
10058 [EXT4_XATTR_INDEX_USER] = &ext4_xattr_user_handler,
10059 -#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL
10060 +#if defined(CONFIG_EXT4_FS_POSIX_ACL) || defined(CONFIG_EXT4DEV_FS_POSIX_ACL)
10061 [EXT4_XATTR_INDEX_POSIX_ACL_ACCESS] = &ext4_xattr_acl_access_handler,
10062 [EXT4_XATTR_INDEX_POSIX_ACL_DEFAULT] = &ext4_xattr_acl_default_handler,
10063 #endif
10064 [EXT4_XATTR_INDEX_TRUSTED] = &ext4_xattr_trusted_handler,
10065 -#ifdef CONFIG_EXT4DEV_FS_SECURITY
10066 +#if defined(CONFIG_EXT4_FS_SECURITY) || defined(CONFIG_EXT4DEV_FS_SECURITY)
10067 [EXT4_XATTR_INDEX_SECURITY] = &ext4_xattr_security_handler,
10068 #endif
10069 };
10070 @@ -112,11 +112,11 @@ static struct xattr_handler *ext4_xattr_
10071 struct xattr_handler *ext4_xattr_handlers[] = {
10072 &ext4_xattr_user_handler,
10073 &ext4_xattr_trusted_handler,
10074 -#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL
10075 +#if defined(CONFIG_EXT4_FS_POSIX_ACL) || defined(CONFIG_EXT4DEV_FS_POSIX_ACL)
10076 &ext4_xattr_acl_access_handler,
10077 &ext4_xattr_acl_default_handler,
10078 #endif
10079 -#ifdef CONFIG_EXT4DEV_FS_SECURITY
10080 +#if defined(CONFIG_EXT4_FS_SECURITY) || defined(CONFIG_EXT4DEV_FS_SECURITY)
10081 &ext4_xattr_security_handler,
10082 #endif
10083 NULL
10084 @@ -810,8 +810,8 @@ inserted:
10085 /* We need to allocate a new block */
10086 ext4_fsblk_t goal = ext4_group_first_block_no(sb,
10087 EXT4_I(inode)->i_block_group);
10088 - ext4_fsblk_t block = ext4_new_meta_block(handle, inode,
10089 - goal, &error);
10090 + ext4_fsblk_t block = ext4_new_meta_blocks(handle, inode,
10091 + goal, NULL, &error);
10092 if (error)
10093 goto cleanup;
10094 ea_idebug(inode, "creating block %d", block);
10095 diff -rup b/fs/ext4//xattr.h a/fs/ext4///xattr.h
10096 --- b/fs/ext4/xattr.h 2009-02-11 14:37:58.000000000 +0100
10097 +++ a/fs/ext4/xattr.h 2009-02-10 21:40:14.000000000 +0100
10098 @@ -51,8 +51,8 @@ struct ext4_xattr_entry {
10099 (((name_len) + EXT4_XATTR_ROUND + \
10100 sizeof(struct ext4_xattr_entry)) & ~EXT4_XATTR_ROUND)
10101 #define EXT4_XATTR_NEXT(entry) \
10102 - ( (struct ext4_xattr_entry *)( \
10103 - (char *)(entry) + EXT4_XATTR_LEN((entry)->e_name_len)) )
10104 + ((struct ext4_xattr_entry *)( \
10105 + (char *)(entry) + EXT4_XATTR_LEN((entry)->e_name_len)))
10106 #define EXT4_XATTR_SIZE(size) \
10107 (((size) + EXT4_XATTR_ROUND) & ~EXT4_XATTR_ROUND)
10108
10109 @@ -63,7 +63,7 @@ struct ext4_xattr_entry {
10110 EXT4_I(inode)->i_extra_isize))
10111 #define IFIRST(hdr) ((struct ext4_xattr_entry *)((hdr)+1))
10112
10113 -# ifdef CONFIG_EXT4DEV_FS_XATTR
10114 +# if defined(CONFIG_EXT4_FS_XATTR) || defined(CONFIG_EXT4DEV_FS_XATTR)
10115
10116 extern struct xattr_handler ext4_xattr_user_handler;
10117 extern struct xattr_handler ext4_xattr_trusted_handler;
10118 @@ -88,7 +88,7 @@ extern void exit_ext4_xattr(void);
10119
10120 extern struct xattr_handler *ext4_xattr_handlers[];
10121
10122 -# else /* CONFIG_EXT4DEV_FS_XATTR */
10123 +# else /* CONFIG_EXT4_FS_XATTR */
10124
10125 static inline int
10126 ext4_xattr_get(struct inode *inode, int name_index, const char *name,
10127 @@ -141,9 +141,9 @@ ext4_expand_extra_isize_ea(struct inode
10128
10129 #define ext4_xattr_handlers NULL
10130
10131 -# endif /* CONFIG_EXT4DEV_FS_XATTR */
10132 +# endif /* CONFIG_EXT4_FS_XATTR */
10133
10134 -#ifdef CONFIG_EXT4DEV_FS_SECURITY
10135 +#if defined(CONFIG_EXT4_FS_SECURITY) || defined(CONFIG_EXT4DEV_FS_SECURITY)
10136 extern int ext4_init_security(handle_t *handle, struct inode *inode,
10137 struct inode *dir);
10138 #else