]> git.ipfire.org Git - people/pmueller/ipfire-2.x.git/blame - src/patches/suse-2.6.27.31/patches.fixes/ext4-fixes-2.6.28-rc8.patch
Add a patch to fix Intel E100 wake-on-lan problems.
[people/pmueller/ipfire-2.x.git] / src / patches / suse-2.6.27.31 / patches.fixes / ext4-fixes-2.6.28-rc8.patch
CommitLineData
2cb7cef9
BS
1From: Mingming Cao <cmm@us.ibm.com>
2Subject: Ext4 update
3References: fate#303783
4
5Bring ext4 codebase to the state of 2.6.28-rc8. It has lots of bugfixes, some
6of them really important ones (data corruption, easily triggerable kernel
7oopses with delayed allocation, ...).
8
9Signed-off-by: Jan Kara <jack@suse.cz>
10
11diff -rup b/fs/ext4//acl.h a/fs/ext4///acl.h
12--- b/fs/ext4/acl.h 2009-02-11 14:37:58.000000000 +0100
13+++ a/fs/ext4/acl.h 2009-02-10 21:40:14.000000000 +0100
14@@ -51,18 +51,18 @@ static inline int ext4_acl_count(size_t
15 }
16 }
17
18-#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL
19+#if defined(CONFIG_EXT4_FS_POSIX_ACL) || defined(CONFIG_EXT4DEV_FS_POSIX_ACL)
20
21 /* Value for inode->u.ext4_i.i_acl and inode->u.ext4_i.i_default_acl
22 if the ACL has not been cached */
23 #define EXT4_ACL_NOT_CACHED ((void *)-1)
24
25 /* acl.c */
26-extern int ext4_permission (struct inode *, int);
27-extern int ext4_acl_chmod (struct inode *);
28-extern int ext4_init_acl (handle_t *, struct inode *, struct inode *);
29+extern int ext4_permission(struct inode *, int);
30+extern int ext4_acl_chmod(struct inode *);
31+extern int ext4_init_acl(handle_t *, struct inode *, struct inode *);
32
33-#else /* CONFIG_EXT4DEV_FS_POSIX_ACL */
34+#else /* CONFIG_EXT4_FS_POSIX_ACL */
35 #include <linux/sched.h>
36 #define ext4_permission NULL
37
38@@ -77,5 +77,5 @@ ext4_init_acl(handle_t *handle, struct i
39 {
40 return 0;
41 }
42-#endif /* CONFIG_EXT4DEV_FS_POSIX_ACL */
43+#endif /* CONFIG_EXT4_FS_POSIX_ACL */
44
45diff -rup b/fs/ext4//balloc.c a/fs/ext4///balloc.c
46--- b/fs/ext4/balloc.c 2009-02-11 14:37:58.000000000 +0100
47+++ a/fs/ext4/balloc.c 2009-02-10 21:40:11.000000000 +0100
48@@ -20,6 +20,7 @@
49 #include "ext4.h"
50 #include "ext4_jbd2.h"
51 #include "group.h"
52+#include "mballoc.h"
53
54 /*
55 * balloc.c contains the blocks allocation and deallocation routines
56@@ -83,6 +84,7 @@ static int ext4_group_used_meta_blocks(s
57 }
58 return used_blocks;
59 }
60+
61 /* Initializes an uninitialized block bitmap if given, and returns the
62 * number of blocks free in the group. */
63 unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
64@@ -99,10 +101,10 @@ unsigned ext4_init_block_bitmap(struct s
65 * essentially implementing a per-group read-only flag. */
66 if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) {
67 ext4_error(sb, __func__,
68- "Checksum bad for group %lu\n", block_group);
69- gdp->bg_free_blocks_count = 0;
70- gdp->bg_free_inodes_count = 0;
71- gdp->bg_itable_unused = 0;
72+ "Checksum bad for group %u\n", block_group);
73+ ext4_free_blks_set(sb, gdp, 0);
74+ ext4_free_inodes_set(sb, gdp, 0);
75+ ext4_itable_unused_set(sb, gdp, 0);
76 memset(bh->b_data, 0xff, sb->s_blocksize);
77 return 0;
78 }
79@@ -132,7 +134,7 @@ unsigned ext4_init_block_bitmap(struct s
80 */
81 group_blocks = ext4_blocks_count(sbi->s_es) -
82 le32_to_cpu(sbi->s_es->s_first_data_block) -
83- (EXT4_BLOCKS_PER_GROUP(sb) * (sbi->s_groups_count -1));
84+ (EXT4_BLOCKS_PER_GROUP(sb) * (sbi->s_groups_count - 1));
85 } else {
86 group_blocks = EXT4_BLOCKS_PER_GROUP(sb);
87 }
88@@ -200,20 +202,20 @@ unsigned ext4_init_block_bitmap(struct s
89 * @bh: pointer to the buffer head to store the block
90 * group descriptor
91 */
92-struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb,
93+struct ext4_group_desc * ext4_get_group_desc(struct super_block *sb,
94 ext4_group_t block_group,
95- struct buffer_head ** bh)
96+ struct buffer_head **bh)
97 {
98- unsigned long group_desc;
99- unsigned long offset;
100- struct ext4_group_desc * desc;
101+ unsigned int group_desc;
102+ unsigned int offset;
103+ struct ext4_group_desc *desc;
104 struct ext4_sb_info *sbi = EXT4_SB(sb);
105
106 if (block_group >= sbi->s_groups_count) {
107- ext4_error (sb, "ext4_get_group_desc",
108- "block_group >= groups_count - "
109- "block_group = %lu, groups_count = %lu",
110- block_group, sbi->s_groups_count);
111+ ext4_error(sb, "ext4_get_group_desc",
112+ "block_group >= groups_count - "
113+ "block_group = %u, groups_count = %u",
114+ block_group, sbi->s_groups_count);
115
116 return NULL;
117 }
118@@ -222,10 +224,10 @@ struct ext4_group_desc * ext4_get_group_
119 group_desc = block_group >> EXT4_DESC_PER_BLOCK_BITS(sb);
120 offset = block_group & (EXT4_DESC_PER_BLOCK(sb) - 1);
121 if (!sbi->s_group_desc[group_desc]) {
122- ext4_error (sb, "ext4_get_group_desc",
123- "Group descriptor not loaded - "
124- "block_group = %lu, group_desc = %lu, desc = %lu",
125- block_group, group_desc, offset);
126+ ext4_error(sb, "ext4_get_group_desc",
127+ "Group descriptor not loaded - "
128+ "block_group = %u, group_desc = %u, desc = %u",
129+ block_group, group_desc, offset);
130 return NULL;
131 }
132
133@@ -302,8 +304,8 @@ err_out:
134 struct buffer_head *
135 ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group)
136 {
137- struct ext4_group_desc * desc;
138- struct buffer_head * bh = NULL;
139+ struct ext4_group_desc *desc;
140+ struct buffer_head *bh = NULL;
141 ext4_fsblk_t bitmap_blk;
142
143 desc = ext4_get_group_desc(sb, block_group, NULL);
144@@ -314,27 +316,50 @@ ext4_read_block_bitmap(struct super_bloc
145 if (unlikely(!bh)) {
146 ext4_error(sb, __func__,
147 "Cannot read block bitmap - "
148- "block_group = %lu, block_bitmap = %llu",
149+ "block_group = %u, block_bitmap = %llu",
150 block_group, bitmap_blk);
151 return NULL;
152 }
153- if (bh_uptodate_or_lock(bh))
154+
155+ if (bitmap_uptodate(bh))
156 return bh;
157
158+ lock_buffer(bh);
159+ if (bitmap_uptodate(bh)) {
160+ unlock_buffer(bh);
161+ return bh;
162+ }
163 spin_lock(sb_bgl_lock(EXT4_SB(sb), block_group));
164 if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
165 ext4_init_block_bitmap(sb, bh, block_group, desc);
166+ set_bitmap_uptodate(bh);
167 set_buffer_uptodate(bh);
168- unlock_buffer(bh);
169 spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group));
170+ unlock_buffer(bh);
171 return bh;
172 }
173 spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group));
174+ if (buffer_uptodate(bh)) {
175+ /*
176+ * if not uninit if bh is uptodate,
177+ * bitmap is also uptodate
178+ */
179+ set_bitmap_uptodate(bh);
180+ unlock_buffer(bh);
181+ return bh;
182+ }
183+ /*
184+ * submit the buffer_head for read. We can
185+ * safely mark the bitmap as uptodate now.
186+ * We do it here so the bitmap uptodate bit
187+ * get set with buffer lock held.
188+ */
189+ set_bitmap_uptodate(bh);
190 if (bh_submit_read(bh) < 0) {
191 put_bh(bh);
192 ext4_error(sb, __func__,
193 "Cannot read block bitmap - "
194- "block_group = %lu, block_bitmap = %llu",
195+ "block_group = %u, block_bitmap = %llu",
196 block_group, bitmap_blk);
197 return NULL;
198 }
199@@ -345,356 +370,50 @@ ext4_read_block_bitmap(struct super_bloc
200 */
201 return bh;
202 }
203-/*
204- * The reservation window structure operations
205- * --------------------------------------------
206- * Operations include:
207- * dump, find, add, remove, is_empty, find_next_reservable_window, etc.
208- *
209- * We use a red-black tree to represent per-filesystem reservation
210- * windows.
211- *
212- */
213-
214-/**
215- * __rsv_window_dump() -- Dump the filesystem block allocation reservation map
216- * @rb_root: root of per-filesystem reservation rb tree
217- * @verbose: verbose mode
218- * @fn: function which wishes to dump the reservation map
219- *
220- * If verbose is turned on, it will print the whole block reservation
221- * windows(start, end). Otherwise, it will only print out the "bad" windows,
222- * those windows that overlap with their immediate neighbors.
223- */
224-#if 1
225-static void __rsv_window_dump(struct rb_root *root, int verbose,
226- const char *fn)
227-{
228- struct rb_node *n;
229- struct ext4_reserve_window_node *rsv, *prev;
230- int bad;
231-
232-restart:
233- n = rb_first(root);
234- bad = 0;
235- prev = NULL;
236-
237- printk("Block Allocation Reservation Windows Map (%s):\n", fn);
238- while (n) {
239- rsv = rb_entry(n, struct ext4_reserve_window_node, rsv_node);
240- if (verbose)
241- printk("reservation window 0x%p "
242- "start: %llu, end: %llu\n",
243- rsv, rsv->rsv_start, rsv->rsv_end);
244- if (rsv->rsv_start && rsv->rsv_start >= rsv->rsv_end) {
245- printk("Bad reservation %p (start >= end)\n",
246- rsv);
247- bad = 1;
248- }
249- if (prev && prev->rsv_end >= rsv->rsv_start) {
250- printk("Bad reservation %p (prev->end >= start)\n",
251- rsv);
252- bad = 1;
253- }
254- if (bad) {
255- if (!verbose) {
256- printk("Restarting reservation walk in verbose mode\n");
257- verbose = 1;
258- goto restart;
259- }
260- }
261- n = rb_next(n);
262- prev = rsv;
263- }
264- printk("Window map complete.\n");
265- BUG_ON(bad);
266-}
267-#define rsv_window_dump(root, verbose) \
268- __rsv_window_dump((root), (verbose), __func__)
269-#else
270-#define rsv_window_dump(root, verbose) do {} while (0)
271-#endif
272-
273-/**
274- * goal_in_my_reservation()
275- * @rsv: inode's reservation window
276- * @grp_goal: given goal block relative to the allocation block group
277- * @group: the current allocation block group
278- * @sb: filesystem super block
279- *
280- * Test if the given goal block (group relative) is within the file's
281- * own block reservation window range.
282- *
283- * If the reservation window is outside the goal allocation group, return 0;
284- * grp_goal (given goal block) could be -1, which means no specific
285- * goal block. In this case, always return 1.
286- * If the goal block is within the reservation window, return 1;
287- * otherwise, return 0;
288- */
289-static int
290-goal_in_my_reservation(struct ext4_reserve_window *rsv, ext4_grpblk_t grp_goal,
291- ext4_group_t group, struct super_block *sb)
292-{
293- ext4_fsblk_t group_first_block, group_last_block;
294-
295- group_first_block = ext4_group_first_block_no(sb, group);
296- group_last_block = group_first_block + (EXT4_BLOCKS_PER_GROUP(sb) - 1);
297-
298- if ((rsv->_rsv_start > group_last_block) ||
299- (rsv->_rsv_end < group_first_block))
300- return 0;
301- if ((grp_goal >= 0) && ((grp_goal + group_first_block < rsv->_rsv_start)
302- || (grp_goal + group_first_block > rsv->_rsv_end)))
303- return 0;
304- return 1;
305-}
306
307 /**
308- * search_reserve_window()
309- * @rb_root: root of reservation tree
310- * @goal: target allocation block
311- *
312- * Find the reserved window which includes the goal, or the previous one
313- * if the goal is not in any window.
314- * Returns NULL if there are no windows or if all windows start after the goal.
315- */
316-static struct ext4_reserve_window_node *
317-search_reserve_window(struct rb_root *root, ext4_fsblk_t goal)
318-{
319- struct rb_node *n = root->rb_node;
320- struct ext4_reserve_window_node *rsv;
321-
322- if (!n)
323- return NULL;
324-
325- do {
326- rsv = rb_entry(n, struct ext4_reserve_window_node, rsv_node);
327-
328- if (goal < rsv->rsv_start)
329- n = n->rb_left;
330- else if (goal > rsv->rsv_end)
331- n = n->rb_right;
332- else
333- return rsv;
334- } while (n);
335- /*
336- * We've fallen off the end of the tree: the goal wasn't inside
337- * any particular node. OK, the previous node must be to one
338- * side of the interval containing the goal. If it's the RHS,
339- * we need to back up one.
340- */
341- if (rsv->rsv_start > goal) {
342- n = rb_prev(&rsv->rsv_node);
343- rsv = rb_entry(n, struct ext4_reserve_window_node, rsv_node);
344- }
345- return rsv;
346-}
347-
348-/**
349- * ext4_rsv_window_add() -- Insert a window to the block reservation rb tree.
350- * @sb: super block
351- * @rsv: reservation window to add
352- *
353- * Must be called with rsv_lock hold.
354- */
355-void ext4_rsv_window_add(struct super_block *sb,
356- struct ext4_reserve_window_node *rsv)
357-{
358- struct rb_root *root = &EXT4_SB(sb)->s_rsv_window_root;
359- struct rb_node *node = &rsv->rsv_node;
360- ext4_fsblk_t start = rsv->rsv_start;
361-
362- struct rb_node ** p = &root->rb_node;
363- struct rb_node * parent = NULL;
364- struct ext4_reserve_window_node *this;
365-
366- while (*p)
367- {
368- parent = *p;
369- this = rb_entry(parent, struct ext4_reserve_window_node, rsv_node);
370-
371- if (start < this->rsv_start)
372- p = &(*p)->rb_left;
373- else if (start > this->rsv_end)
374- p = &(*p)->rb_right;
375- else {
376- rsv_window_dump(root, 1);
377- BUG();
378- }
379- }
380-
381- rb_link_node(node, parent, p);
382- rb_insert_color(node, root);
383-}
384-
385-/**
386- * ext4_rsv_window_remove() -- unlink a window from the reservation rb tree
387- * @sb: super block
388- * @rsv: reservation window to remove
389- *
390- * Mark the block reservation window as not allocated, and unlink it
391- * from the filesystem reservation window rb tree. Must be called with
392- * rsv_lock hold.
393- */
394-static void rsv_window_remove(struct super_block *sb,
395- struct ext4_reserve_window_node *rsv)
396-{
397- rsv->rsv_start = EXT4_RESERVE_WINDOW_NOT_ALLOCATED;
398- rsv->rsv_end = EXT4_RESERVE_WINDOW_NOT_ALLOCATED;
399- rsv->rsv_alloc_hit = 0;
400- rb_erase(&rsv->rsv_node, &EXT4_SB(sb)->s_rsv_window_root);
401-}
402-
403-/*
404- * rsv_is_empty() -- Check if the reservation window is allocated.
405- * @rsv: given reservation window to check
406- *
407- * returns 1 if the end block is EXT4_RESERVE_WINDOW_NOT_ALLOCATED.
408- */
409-static inline int rsv_is_empty(struct ext4_reserve_window *rsv)
410-{
411- /* a valid reservation end block could not be 0 */
412- return rsv->_rsv_end == EXT4_RESERVE_WINDOW_NOT_ALLOCATED;
413-}
414-
415-/**
416- * ext4_init_block_alloc_info()
417- * @inode: file inode structure
418- *
419- * Allocate and initialize the reservation window structure, and
420- * link the window to the ext4 inode structure at last
421- *
422- * The reservation window structure is only dynamically allocated
423- * and linked to ext4 inode the first time the open file
424- * needs a new block. So, before every ext4_new_block(s) call, for
425- * regular files, we should check whether the reservation window
426- * structure exists or not. In the latter case, this function is called.
427- * Fail to do so will result in block reservation being turned off for that
428- * open file.
429- *
430- * This function is called from ext4_get_blocks_handle(), also called
431- * when setting the reservation window size through ioctl before the file
432- * is open for write (needs block allocation).
433- *
434- * Needs down_write(i_data_sem) protection prior to call this function.
435- */
436-void ext4_init_block_alloc_info(struct inode *inode)
437-{
438- struct ext4_inode_info *ei = EXT4_I(inode);
439- struct ext4_block_alloc_info *block_i = ei->i_block_alloc_info;
440- struct super_block *sb = inode->i_sb;
441-
442- block_i = kmalloc(sizeof(*block_i), GFP_NOFS);
443- if (block_i) {
444- struct ext4_reserve_window_node *rsv = &block_i->rsv_window_node;
445-
446- rsv->rsv_start = EXT4_RESERVE_WINDOW_NOT_ALLOCATED;
447- rsv->rsv_end = EXT4_RESERVE_WINDOW_NOT_ALLOCATED;
448-
449- /*
450- * if filesystem is mounted with NORESERVATION, the goal
451- * reservation window size is set to zero to indicate
452- * block reservation is off
453- */
454- if (!test_opt(sb, RESERVATION))
455- rsv->rsv_goal_size = 0;
456- else
457- rsv->rsv_goal_size = EXT4_DEFAULT_RESERVE_BLOCKS;
458- rsv->rsv_alloc_hit = 0;
459- block_i->last_alloc_logical_block = 0;
460- block_i->last_alloc_physical_block = 0;
461- }
462- ei->i_block_alloc_info = block_i;
463-}
464-
465-/**
466- * ext4_discard_reservation()
467- * @inode: inode
468- *
469- * Discard(free) block reservation window on last file close, or truncate
470- * or at last iput().
471- *
472- * It is being called in three cases:
473- * ext4_release_file(): last writer close the file
474- * ext4_clear_inode(): last iput(), when nobody link to this file.
475- * ext4_truncate(): when the block indirect map is about to change.
476- *
477- */
478-void ext4_discard_reservation(struct inode *inode)
479-{
480- struct ext4_inode_info *ei = EXT4_I(inode);
481- struct ext4_block_alloc_info *block_i = ei->i_block_alloc_info;
482- struct ext4_reserve_window_node *rsv;
483- spinlock_t *rsv_lock = &EXT4_SB(inode->i_sb)->s_rsv_window_lock;
484-
485- ext4_mb_discard_inode_preallocations(inode);
486-
487- if (!block_i)
488- return;
489-
490- rsv = &block_i->rsv_window_node;
491- if (!rsv_is_empty(&rsv->rsv_window)) {
492- spin_lock(rsv_lock);
493- if (!rsv_is_empty(&rsv->rsv_window))
494- rsv_window_remove(inode->i_sb, rsv);
495- spin_unlock(rsv_lock);
496- }
497-}
498-
499-/**
500- * ext4_free_blocks_sb() -- Free given blocks and update quota
501+ * ext4_add_groupblocks() -- Add given blocks to an existing group
502 * @handle: handle to this transaction
503 * @sb: super block
504- * @block: start physcial block to free
505+ * @block: start physcial block to add to the block group
506 * @count: number of blocks to free
507- * @pdquot_freed_blocks: pointer to quota
508+ *
509+ * This marks the blocks as free in the bitmap. We ask the
510+ * mballoc to reload the buddy after this by setting group
511+ * EXT4_GROUP_INFO_NEED_INIT_BIT flag
512 */
513-void ext4_free_blocks_sb(handle_t *handle, struct super_block *sb,
514- ext4_fsblk_t block, unsigned long count,
515- unsigned long *pdquot_freed_blocks)
516+void ext4_add_groupblocks(handle_t *handle, struct super_block *sb,
517+ ext4_fsblk_t block, unsigned long count)
518 {
519 struct buffer_head *bitmap_bh = NULL;
520 struct buffer_head *gd_bh;
521 ext4_group_t block_group;
522 ext4_grpblk_t bit;
523- unsigned long i;
524- unsigned long overflow;
525- struct ext4_group_desc * desc;
526- struct ext4_super_block * es;
527+ unsigned int i;
528+ struct ext4_group_desc *desc;
529+ struct ext4_super_block *es;
530 struct ext4_sb_info *sbi;
531- int err = 0, ret;
532- ext4_grpblk_t group_freed;
533+ int err = 0, ret, blk_free_count;
534+ ext4_grpblk_t blocks_freed;
535+ struct ext4_group_info *grp;
536
537- *pdquot_freed_blocks = 0;
538 sbi = EXT4_SB(sb);
539 es = sbi->s_es;
540- if (block < le32_to_cpu(es->s_first_data_block) ||
541- block + count < block ||
542- block + count > ext4_blocks_count(es)) {
543- ext4_error (sb, "ext4_free_blocks",
544- "Freeing blocks not in datazone - "
545- "block = %llu, count = %lu", block, count);
546- goto error_return;
547- }
548-
549- ext4_debug ("freeing block(s) %llu-%llu\n", block, block + count - 1);
550+ ext4_debug("Adding block(s) %llu-%llu\n", block, block + count - 1);
551
552-do_more:
553- overflow = 0;
554 ext4_get_group_no_and_offset(sb, block, &block_group, &bit);
555+ grp = ext4_get_group_info(sb, block_group);
556 /*
557 * Check to see if we are freeing blocks across a group
558 * boundary.
559 */
560 if (bit + count > EXT4_BLOCKS_PER_GROUP(sb)) {
561- overflow = bit + count - EXT4_BLOCKS_PER_GROUP(sb);
562- count -= overflow;
563+ goto error_return;
564 }
565- brelse(bitmap_bh);
566 bitmap_bh = ext4_read_block_bitmap(sb, block_group);
567 if (!bitmap_bh)
568 goto error_return;
569- desc = ext4_get_group_desc (sb, block_group, &gd_bh);
570+ desc = ext4_get_group_desc(sb, block_group, &gd_bh);
571 if (!desc)
572 goto error_return;
573
574@@ -703,18 +422,17 @@ do_more:
575 in_range(block, ext4_inode_table(sb, desc), sbi->s_itb_per_group) ||
576 in_range(block + count - 1, ext4_inode_table(sb, desc),
577 sbi->s_itb_per_group)) {
578- ext4_error (sb, "ext4_free_blocks",
579- "Freeing blocks in system zones - "
580- "Block = %llu, count = %lu",
581- block, count);
582+ ext4_error(sb, __func__,
583+ "Adding blocks in system zones - "
584+ "Block = %llu, count = %lu",
585+ block, count);
586 goto error_return;
587 }
588
589 /*
590- * We are about to start releasing blocks in the bitmap,
591+ * We are about to add blocks to the bitmap,
592 * so we need undo access.
593 */
594- /* @@@ check errors */
595 BUFFER_TRACE(bitmap_bh, "getting undo access");
596 err = ext4_journal_get_undo_access(handle, bitmap_bh);
597 if (err)
598@@ -729,90 +447,43 @@ do_more:
599 err = ext4_journal_get_write_access(handle, gd_bh);
600 if (err)
601 goto error_return;
602-
603- jbd_lock_bh_state(bitmap_bh);
604-
605- for (i = 0, group_freed = 0; i < count; i++) {
606- /*
607- * An HJ special. This is expensive...
608- */
609-#ifdef CONFIG_JBD2_DEBUG
610- jbd_unlock_bh_state(bitmap_bh);
611- {
612- struct buffer_head *debug_bh;
613- debug_bh = sb_find_get_block(sb, block + i);
614- if (debug_bh) {
615- BUFFER_TRACE(debug_bh, "Deleted!");
616- if (!bh2jh(bitmap_bh)->b_committed_data)
617- BUFFER_TRACE(debug_bh,
618- "No commited data in bitmap");
619- BUFFER_TRACE2(debug_bh, bitmap_bh, "bitmap");
620- __brelse(debug_bh);
621- }
622- }
623- jbd_lock_bh_state(bitmap_bh);
624-#endif
625- if (need_resched()) {
626- jbd_unlock_bh_state(bitmap_bh);
627- cond_resched();
628- jbd_lock_bh_state(bitmap_bh);
629- }
630- /* @@@ This prevents newly-allocated data from being
631- * freed and then reallocated within the same
632- * transaction.
633- *
634- * Ideally we would want to allow that to happen, but to
635- * do so requires making jbd2_journal_forget() capable of
636- * revoking the queued write of a data block, which
637- * implies blocking on the journal lock. *forget()
638- * cannot block due to truncate races.
639- *
640- * Eventually we can fix this by making jbd2_journal_forget()
641- * return a status indicating whether or not it was able
642- * to revoke the buffer. On successful revoke, it is
643- * safe not to set the allocation bit in the committed
644- * bitmap, because we know that there is no outstanding
645- * activity on the buffer any more and so it is safe to
646- * reallocate it.
647- */
648- BUFFER_TRACE(bitmap_bh, "set in b_committed_data");
649- J_ASSERT_BH(bitmap_bh,
650- bh2jh(bitmap_bh)->b_committed_data != NULL);
651- ext4_set_bit_atomic(sb_bgl_lock(sbi, block_group), bit + i,
652- bh2jh(bitmap_bh)->b_committed_data);
653-
654- /*
655- * We clear the bit in the bitmap after setting the committed
656- * data bit, because this is the reverse order to that which
657- * the allocator uses.
658- */
659+ /*
660+ * make sure we don't allow a parallel init on other groups in the
661+ * same buddy cache
662+ */
663+ down_write(&grp->alloc_sem);
664+ for (i = 0, blocks_freed = 0; i < count; i++) {
665 BUFFER_TRACE(bitmap_bh, "clear bit");
666 if (!ext4_clear_bit_atomic(sb_bgl_lock(sbi, block_group),
667 bit + i, bitmap_bh->b_data)) {
668- jbd_unlock_bh_state(bitmap_bh);
669 ext4_error(sb, __func__,
670 "bit already cleared for block %llu",
671 (ext4_fsblk_t)(block + i));
672- jbd_lock_bh_state(bitmap_bh);
673 BUFFER_TRACE(bitmap_bh, "bit already cleared");
674 } else {
675- group_freed++;
676+ blocks_freed++;
677 }
678 }
679- jbd_unlock_bh_state(bitmap_bh);
680-
681 spin_lock(sb_bgl_lock(sbi, block_group));
682- le16_add_cpu(&desc->bg_free_blocks_count, group_freed);
683+ blk_free_count = blocks_freed + ext4_free_blks_count(sb, desc);
684+ ext4_free_blks_set(sb, desc, blk_free_count);
685 desc->bg_checksum = ext4_group_desc_csum(sbi, block_group, desc);
686 spin_unlock(sb_bgl_lock(sbi, block_group));
687- percpu_counter_add(&sbi->s_freeblocks_counter, count);
688+ percpu_counter_add(&sbi->s_freeblocks_counter, blocks_freed);
689
690 if (sbi->s_log_groups_per_flex) {
691 ext4_group_t flex_group = ext4_flex_group(sbi, block_group);
692 spin_lock(sb_bgl_lock(sbi, flex_group));
693- sbi->s_flex_groups[flex_group].free_blocks += count;
694+ sbi->s_flex_groups[flex_group].free_blocks += blocks_freed;
695 spin_unlock(sb_bgl_lock(sbi, flex_group));
696 }
697+ /*
698+ * request to reload the buddy with the
699+ * new bitmap information
700+ */
701+ set_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &(grp->bb_state));
702+ ext4_mb_update_group_info(grp, blocks_freed);
703+ up_write(&grp->alloc_sem);
704
705 /* We dirtied the bitmap block */
706 BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
707@@ -821,15 +492,10 @@ do_more:
708 /* And the group descriptor block */
709 BUFFER_TRACE(gd_bh, "dirtied group descriptor block");
710 ret = ext4_journal_dirty_metadata(handle, gd_bh);
711- if (!err) err = ret;
712- *pdquot_freed_blocks += group_freed;
713-
714- if (overflow && !err) {
715- block += count;
716- count = overflow;
717- goto do_more;
718- }
719+ if (!err)
720+ err = ret;
721 sb->s_dirt = 1;
722+
723 error_return:
724 brelse(bitmap_bh);
725 ext4_std_error(sb, err);
726@@ -848,792 +514,86 @@ void ext4_free_blocks(handle_t *handle,
727 ext4_fsblk_t block, unsigned long count,
728 int metadata)
729 {
730- struct super_block * sb;
731+ struct super_block *sb;
732 unsigned long dquot_freed_blocks;
733
734 /* this isn't the right place to decide whether block is metadata
735 * inode.c/extents.c knows better, but for safety ... */
736- if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode) ||
737- ext4_should_journal_data(inode))
738+ if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
739+ metadata = 1;
740+
741+ /* We need to make sure we don't reuse
742+ * block released untill the transaction commit.
743+ * writeback mode have weak data consistency so
744+ * don't force data as metadata when freeing block
745+ * for writeback mode.
746+ */
747+ if (metadata == 0 && !ext4_should_writeback_data(inode))
748 metadata = 1;
749
750 sb = inode->i_sb;
751
752- if (!test_opt(sb, MBALLOC) || !EXT4_SB(sb)->s_group_info)
753- ext4_free_blocks_sb(handle, sb, block, count,
754- &dquot_freed_blocks);
755- else
756- ext4_mb_free_blocks(handle, inode, block, count,
757- metadata, &dquot_freed_blocks);
758+ ext4_mb_free_blocks(handle, inode, block, count,
759+ metadata, &dquot_freed_blocks);
760 if (dquot_freed_blocks)
761 DQUOT_FREE_BLOCK(inode, dquot_freed_blocks);
762 return;
763 }
764
765 /**
766- * ext4_test_allocatable()
767- * @nr: given allocation block group
768- * @bh: bufferhead contains the bitmap of the given block group
769- *
770- * For ext4 allocations, we must not reuse any blocks which are
771- * allocated in the bitmap buffer's "last committed data" copy. This
772- * prevents deletes from freeing up the page for reuse until we have
773- * committed the delete transaction.
774- *
775- * If we didn't do this, then deleting something and reallocating it as
776- * data would allow the old block to be overwritten before the
777- * transaction committed (because we force data to disk before commit).
778- * This would lead to corruption if we crashed between overwriting the
779- * data and committing the delete.
780- *
781- * @@@ We may want to make this allocation behaviour conditional on
782- * data-writes at some point, and disable it for metadata allocations or
783- * sync-data inodes.
784- */
785-static int ext4_test_allocatable(ext4_grpblk_t nr, struct buffer_head *bh)
786-{
787- int ret;
788- struct journal_head *jh = bh2jh(bh);
789-
790- if (ext4_test_bit(nr, bh->b_data))
791- return 0;
792-
793- jbd_lock_bh_state(bh);
794- if (!jh->b_committed_data)
795- ret = 1;
796- else
797- ret = !ext4_test_bit(nr, jh->b_committed_data);
798- jbd_unlock_bh_state(bh);
799- return ret;
800-}
801-
802-/**
803- * bitmap_search_next_usable_block()
804- * @start: the starting block (group relative) of the search
805- * @bh: bufferhead contains the block group bitmap
806- * @maxblocks: the ending block (group relative) of the reservation
807- *
808- * The bitmap search --- search forward alternately through the actual
809- * bitmap on disk and the last-committed copy in journal, until we find a
810- * bit free in both bitmaps.
811- */
812-static ext4_grpblk_t
813-bitmap_search_next_usable_block(ext4_grpblk_t start, struct buffer_head *bh,
814- ext4_grpblk_t maxblocks)
815-{
816- ext4_grpblk_t next;
817- struct journal_head *jh = bh2jh(bh);
818-
819- while (start < maxblocks) {
820- next = ext4_find_next_zero_bit(bh->b_data, maxblocks, start);
821- if (next >= maxblocks)
822- return -1;
823- if (ext4_test_allocatable(next, bh))
824- return next;
825- jbd_lock_bh_state(bh);
826- if (jh->b_committed_data)
827- start = ext4_find_next_zero_bit(jh->b_committed_data,
828- maxblocks, next);
829- jbd_unlock_bh_state(bh);
830- }
831- return -1;
832-}
833-
834-/**
835- * find_next_usable_block()
836- * @start: the starting block (group relative) to find next
837- * allocatable block in bitmap.
838- * @bh: bufferhead contains the block group bitmap
839- * @maxblocks: the ending block (group relative) for the search
840- *
841- * Find an allocatable block in a bitmap. We honor both the bitmap and
842- * its last-committed copy (if that exists), and perform the "most
843- * appropriate allocation" algorithm of looking for a free block near
844- * the initial goal; then for a free byte somewhere in the bitmap; then
845- * for any free bit in the bitmap.
846- */
847-static ext4_grpblk_t
848-find_next_usable_block(ext4_grpblk_t start, struct buffer_head *bh,
849- ext4_grpblk_t maxblocks)
850-{
851- ext4_grpblk_t here, next;
852- char *p, *r;
853-
854- if (start > 0) {
855- /*
856- * The goal was occupied; search forward for a free
857- * block within the next XX blocks.
858- *
859- * end_goal is more or less random, but it has to be
860- * less than EXT4_BLOCKS_PER_GROUP. Aligning up to the
861- * next 64-bit boundary is simple..
862- */
863- ext4_grpblk_t end_goal = (start + 63) & ~63;
864- if (end_goal > maxblocks)
865- end_goal = maxblocks;
866- here = ext4_find_next_zero_bit(bh->b_data, end_goal, start);
867- if (here < end_goal && ext4_test_allocatable(here, bh))
868- return here;
869- ext4_debug("Bit not found near goal\n");
870- }
871-
872- here = start;
873- if (here < 0)
874- here = 0;
875-
876- p = ((char *)bh->b_data) + (here >> 3);
877- r = memscan(p, 0, ((maxblocks + 7) >> 3) - (here >> 3));
878- next = (r - ((char *)bh->b_data)) << 3;
879-
880- if (next < maxblocks && next >= start && ext4_test_allocatable(next, bh))
881- return next;
882-
883- /*
884- * The bitmap search --- search forward alternately through the actual
885- * bitmap and the last-committed copy until we find a bit free in
886- * both
887- */
888- here = bitmap_search_next_usable_block(here, bh, maxblocks);
889- return here;
890-}
891-
892-/**
893- * claim_block()
894- * @block: the free block (group relative) to allocate
895- * @bh: the bufferhead containts the block group bitmap
896- *
897- * We think we can allocate this block in this bitmap. Try to set the bit.
898- * If that succeeds then check that nobody has allocated and then freed the
899- * block since we saw that is was not marked in b_committed_data. If it _was_
900- * allocated and freed then clear the bit in the bitmap again and return
901- * zero (failure).
902- */
903-static inline int
904-claim_block(spinlock_t *lock, ext4_grpblk_t block, struct buffer_head *bh)
905-{
906- struct journal_head *jh = bh2jh(bh);
907- int ret;
908-
909- if (ext4_set_bit_atomic(lock, block, bh->b_data))
910- return 0;
911- jbd_lock_bh_state(bh);
912- if (jh->b_committed_data && ext4_test_bit(block,jh->b_committed_data)) {
913- ext4_clear_bit_atomic(lock, block, bh->b_data);
914- ret = 0;
915- } else {
916- ret = 1;
917- }
918- jbd_unlock_bh_state(bh);
919- return ret;
920-}
921-
922-/**
923- * ext4_try_to_allocate()
924- * @sb: superblock
925- * @handle: handle to this transaction
926- * @group: given allocation block group
927- * @bitmap_bh: bufferhead holds the block bitmap
928- * @grp_goal: given target block within the group
929- * @count: target number of blocks to allocate
930- * @my_rsv: reservation window
931- *
932- * Attempt to allocate blocks within a give range. Set the range of allocation
933- * first, then find the first free bit(s) from the bitmap (within the range),
934- * and at last, allocate the blocks by claiming the found free bit as allocated.
935- *
936- * To set the range of this allocation:
937- * if there is a reservation window, only try to allocate block(s) from the
938- * file's own reservation window;
939- * Otherwise, the allocation range starts from the give goal block, ends at
940- * the block group's last block.
941- *
942- * If we failed to allocate the desired block then we may end up crossing to a
943- * new bitmap. In that case we must release write access to the old one via
944- * ext4_journal_release_buffer(), else we'll run out of credits.
945- */
946-static ext4_grpblk_t
947-ext4_try_to_allocate(struct super_block *sb, handle_t *handle,
948- ext4_group_t group, struct buffer_head *bitmap_bh,
949- ext4_grpblk_t grp_goal, unsigned long *count,
950- struct ext4_reserve_window *my_rsv)
951-{
952- ext4_fsblk_t group_first_block;
953- ext4_grpblk_t start, end;
954- unsigned long num = 0;
955-
956- /* we do allocation within the reservation window if we have a window */
957- if (my_rsv) {
958- group_first_block = ext4_group_first_block_no(sb, group);
959- if (my_rsv->_rsv_start >= group_first_block)
960- start = my_rsv->_rsv_start - group_first_block;
961- else
962- /* reservation window cross group boundary */
963- start = 0;
964- end = my_rsv->_rsv_end - group_first_block + 1;
965- if (end > EXT4_BLOCKS_PER_GROUP(sb))
966- /* reservation window crosses group boundary */
967- end = EXT4_BLOCKS_PER_GROUP(sb);
968- if ((start <= grp_goal) && (grp_goal < end))
969- start = grp_goal;
970- else
971- grp_goal = -1;
972- } else {
973- if (grp_goal > 0)
974- start = grp_goal;
975- else
976- start = 0;
977- end = EXT4_BLOCKS_PER_GROUP(sb);
978- }
979-
980- BUG_ON(start > EXT4_BLOCKS_PER_GROUP(sb));
981-
982-repeat:
983- if (grp_goal < 0 || !ext4_test_allocatable(grp_goal, bitmap_bh)) {
984- grp_goal = find_next_usable_block(start, bitmap_bh, end);
985- if (grp_goal < 0)
986- goto fail_access;
987- if (!my_rsv) {
988- int i;
989-
990- for (i = 0; i < 7 && grp_goal > start &&
991- ext4_test_allocatable(grp_goal - 1,
992- bitmap_bh);
993- i++, grp_goal--)
994- ;
995- }
996- }
997- start = grp_goal;
998-
999- if (!claim_block(sb_bgl_lock(EXT4_SB(sb), group),
1000- grp_goal, bitmap_bh)) {
1001- /*
1002- * The block was allocated by another thread, or it was
1003- * allocated and then freed by another thread
1004- */
1005- start++;
1006- grp_goal++;
1007- if (start >= end)
1008- goto fail_access;
1009- goto repeat;
1010- }
1011- num++;
1012- grp_goal++;
1013- while (num < *count && grp_goal < end
1014- && ext4_test_allocatable(grp_goal, bitmap_bh)
1015- && claim_block(sb_bgl_lock(EXT4_SB(sb), group),
1016- grp_goal, bitmap_bh)) {
1017- num++;
1018- grp_goal++;
1019- }
1020- *count = num;
1021- return grp_goal - num;
1022-fail_access:
1023- *count = num;
1024- return -1;
1025-}
1026-
1027-/**
1028- * find_next_reservable_window():
1029- * find a reservable space within the given range.
1030- * It does not allocate the reservation window for now:
1031- * alloc_new_reservation() will do the work later.
1032- *
1033- * @search_head: the head of the searching list;
1034- * This is not necessarily the list head of the whole filesystem
1035- *
1036- * We have both head and start_block to assist the search
1037- * for the reservable space. The list starts from head,
1038- * but we will shift to the place where start_block is,
1039- * then start from there, when looking for a reservable space.
1040- *
1041- * @size: the target new reservation window size
1042- *
1043- * @group_first_block: the first block we consider to start
1044- * the real search from
1045- *
1046- * @last_block:
1047- * the maximum block number that our goal reservable space
1048- * could start from. This is normally the last block in this
1049- * group. The search will end when we found the start of next
1050- * possible reservable space is out of this boundary.
1051- * This could handle the cross boundary reservation window
1052- * request.
1053- *
1054- * basically we search from the given range, rather than the whole
1055- * reservation double linked list, (start_block, last_block)
1056- * to find a free region that is of my size and has not
1057- * been reserved.
1058- *
1059- */
1060-static int find_next_reservable_window(
1061- struct ext4_reserve_window_node *search_head,
1062- struct ext4_reserve_window_node *my_rsv,
1063- struct super_block * sb,
1064- ext4_fsblk_t start_block,
1065- ext4_fsblk_t last_block)
1066-{
1067- struct rb_node *next;
1068- struct ext4_reserve_window_node *rsv, *prev;
1069- ext4_fsblk_t cur;
1070- int size = my_rsv->rsv_goal_size;
1071-
1072- /* TODO: make the start of the reservation window byte-aligned */
1073- /* cur = *start_block & ~7;*/
1074- cur = start_block;
1075- rsv = search_head;
1076- if (!rsv)
1077- return -1;
1078-
1079- while (1) {
1080- if (cur <= rsv->rsv_end)
1081- cur = rsv->rsv_end + 1;
1082-
1083- /* TODO?
1084- * in the case we could not find a reservable space
1085- * that is what is expected, during the re-search, we could
1086- * remember what's the largest reservable space we could have
1087- * and return that one.
1088- *
1089- * For now it will fail if we could not find the reservable
1090- * space with expected-size (or more)...
1091- */
1092- if (cur > last_block)
1093- return -1; /* fail */
1094-
1095- prev = rsv;
1096- next = rb_next(&rsv->rsv_node);
1097- rsv = rb_entry(next,struct ext4_reserve_window_node,rsv_node);
1098-
1099- /*
1100- * Reached the last reservation, we can just append to the
1101- * previous one.
1102- */
1103- if (!next)
1104- break;
1105-
1106- if (cur + size <= rsv->rsv_start) {
1107- /*
1108- * Found a reserveable space big enough. We could
1109- * have a reservation across the group boundary here
1110- */
1111- break;
1112- }
1113- }
1114- /*
1115- * we come here either :
1116- * when we reach the end of the whole list,
1117- * and there is empty reservable space after last entry in the list.
1118- * append it to the end of the list.
1119- *
1120- * or we found one reservable space in the middle of the list,
1121- * return the reservation window that we could append to.
1122- * succeed.
1123- */
1124-
1125- if ((prev != my_rsv) && (!rsv_is_empty(&my_rsv->rsv_window)))
1126- rsv_window_remove(sb, my_rsv);
1127-
1128- /*
1129- * Let's book the whole avaliable window for now. We will check the
1130- * disk bitmap later and then, if there are free blocks then we adjust
1131- * the window size if it's larger than requested.
1132- * Otherwise, we will remove this node from the tree next time
1133- * call find_next_reservable_window.
1134- */
1135- my_rsv->rsv_start = cur;
1136- my_rsv->rsv_end = cur + size - 1;
1137- my_rsv->rsv_alloc_hit = 0;
1138-
1139- if (prev != my_rsv)
1140- ext4_rsv_window_add(sb, my_rsv);
1141-
1142- return 0;
1143-}
1144-
1145-/**
1146- * alloc_new_reservation()--allocate a new reservation window
1147- *
1148- * To make a new reservation, we search part of the filesystem
1149- * reservation list (the list that inside the group). We try to
1150- * allocate a new reservation window near the allocation goal,
1151- * or the beginning of the group, if there is no goal.
1152- *
1153- * We first find a reservable space after the goal, then from
1154- * there, we check the bitmap for the first free block after
1155- * it. If there is no free block until the end of group, then the
1156- * whole group is full, we failed. Otherwise, check if the free
1157- * block is inside the expected reservable space, if so, we
1158- * succeed.
1159- * If the first free block is outside the reservable space, then
1160- * start from the first free block, we search for next available
1161- * space, and go on.
1162- *
1163- * on succeed, a new reservation will be found and inserted into the list
1164- * It contains at least one free block, and it does not overlap with other
1165- * reservation windows.
1166- *
1167- * failed: we failed to find a reservation window in this group
1168- *
1169- * @rsv: the reservation
1170- *
1171- * @grp_goal: The goal (group-relative). It is where the search for a
1172- * free reservable space should start from.
1173- * if we have a grp_goal(grp_goal >0 ), then start from there,
1174- * no grp_goal(grp_goal = -1), we start from the first block
1175- * of the group.
1176- *
1177- * @sb: the super block
1178- * @group: the group we are trying to allocate in
1179- * @bitmap_bh: the block group block bitmap
1180+ * ext4_has_free_blocks()
1181+ * @sbi: in-core super block structure.
1182+ * @nblocks: number of needed blocks
1183 *
1184+ * Check if filesystem has nblocks free & available for allocation.
1185+ * On success return 1, return 0 on failure.
1186 */
1187-static int alloc_new_reservation(struct ext4_reserve_window_node *my_rsv,
1188- ext4_grpblk_t grp_goal, struct super_block *sb,
1189- ext4_group_t group, struct buffer_head *bitmap_bh)
1190+int ext4_has_free_blocks(struct ext4_sb_info *sbi, s64 nblocks)
1191 {
1192- struct ext4_reserve_window_node *search_head;
1193- ext4_fsblk_t group_first_block, group_end_block, start_block;
1194- ext4_grpblk_t first_free_block;
1195- struct rb_root *fs_rsv_root = &EXT4_SB(sb)->s_rsv_window_root;
1196- unsigned long size;
1197- int ret;
1198- spinlock_t *rsv_lock = &EXT4_SB(sb)->s_rsv_window_lock;
1199-
1200- group_first_block = ext4_group_first_block_no(sb, group);
1201- group_end_block = group_first_block + (EXT4_BLOCKS_PER_GROUP(sb) - 1);
1202-
1203- if (grp_goal < 0)
1204- start_block = group_first_block;
1205- else
1206- start_block = grp_goal + group_first_block;
1207+ s64 free_blocks, dirty_blocks, root_blocks;
1208+ struct percpu_counter *fbc = &sbi->s_freeblocks_counter;
1209+ struct percpu_counter *dbc = &sbi->s_dirtyblocks_counter;
1210
1211- size = my_rsv->rsv_goal_size;
1212+ free_blocks = percpu_counter_read_positive(fbc);
1213+ dirty_blocks = percpu_counter_read_positive(dbc);
1214+ root_blocks = ext4_r_blocks_count(sbi->s_es);
1215
1216- if (!rsv_is_empty(&my_rsv->rsv_window)) {
1217- /*
1218- * if the old reservation is cross group boundary
1219- * and if the goal is inside the old reservation window,
1220- * we will come here when we just failed to allocate from
1221- * the first part of the window. We still have another part
1222- * that belongs to the next group. In this case, there is no
1223- * point to discard our window and try to allocate a new one
1224- * in this group(which will fail). we should
1225- * keep the reservation window, just simply move on.
1226- *
1227- * Maybe we could shift the start block of the reservation
1228- * window to the first block of next group.
1229- */
1230-
1231- if ((my_rsv->rsv_start <= group_end_block) &&
1232- (my_rsv->rsv_end > group_end_block) &&
1233- (start_block >= my_rsv->rsv_start))
1234- return -1;
1235-
1236- if ((my_rsv->rsv_alloc_hit >
1237- (my_rsv->rsv_end - my_rsv->rsv_start + 1) / 2)) {
1238- /*
1239- * if the previously allocation hit ratio is
1240- * greater than 1/2, then we double the size of
1241- * the reservation window the next time,
1242- * otherwise we keep the same size window
1243- */
1244- size = size * 2;
1245- if (size > EXT4_MAX_RESERVE_BLOCKS)
1246- size = EXT4_MAX_RESERVE_BLOCKS;
1247- my_rsv->rsv_goal_size= size;
1248+ if (free_blocks - (nblocks + root_blocks + dirty_blocks) <
1249+ EXT4_FREEBLOCKS_WATERMARK) {
1250+ free_blocks = percpu_counter_sum_positive(fbc);
1251+ dirty_blocks = percpu_counter_sum_positive(dbc);
1252+ if (dirty_blocks < 0) {
1253+ printk(KERN_CRIT "Dirty block accounting "
1254+ "went wrong %lld\n",
1255+ (long long)dirty_blocks);
1256 }
1257 }
1258-
1259- spin_lock(rsv_lock);
1260- /*
1261- * shift the search start to the window near the goal block
1262- */
1263- search_head = search_reserve_window(fs_rsv_root, start_block);
1264-
1265- /*
1266- * find_next_reservable_window() simply finds a reservable window
1267- * inside the given range(start_block, group_end_block).
1268- *
1269- * To make sure the reservation window has a free bit inside it, we
1270- * need to check the bitmap after we found a reservable window.
1271- */
1272-retry:
1273- ret = find_next_reservable_window(search_head, my_rsv, sb,
1274- start_block, group_end_block);
1275-
1276- if (ret == -1) {
1277- if (!rsv_is_empty(&my_rsv->rsv_window))
1278- rsv_window_remove(sb, my_rsv);
1279- spin_unlock(rsv_lock);
1280- return -1;
1281- }
1282-
1283- /*
1284- * On success, find_next_reservable_window() returns the
1285- * reservation window where there is a reservable space after it.
1286- * Before we reserve this reservable space, we need
1287- * to make sure there is at least a free block inside this region.
1288- *
1289- * searching the first free bit on the block bitmap and copy of
1290- * last committed bitmap alternatively, until we found a allocatable
1291- * block. Search start from the start block of the reservable space
1292- * we just found.
1293- */
1294- spin_unlock(rsv_lock);
1295- first_free_block = bitmap_search_next_usable_block(
1296- my_rsv->rsv_start - group_first_block,
1297- bitmap_bh, group_end_block - group_first_block + 1);
1298-
1299- if (first_free_block < 0) {
1300- /*
1301- * no free block left on the bitmap, no point
1302- * to reserve the space. return failed.
1303- */
1304- spin_lock(rsv_lock);
1305- if (!rsv_is_empty(&my_rsv->rsv_window))
1306- rsv_window_remove(sb, my_rsv);
1307- spin_unlock(rsv_lock);
1308- return -1; /* failed */
1309- }
1310-
1311- start_block = first_free_block + group_first_block;
1312- /*
1313- * check if the first free block is within the
1314- * free space we just reserved
1315- */
1316- if (start_block >= my_rsv->rsv_start && start_block <= my_rsv->rsv_end)
1317- return 0; /* success */
1318- /*
1319- * if the first free bit we found is out of the reservable space
1320- * continue search for next reservable space,
1321- * start from where the free block is,
1322- * we also shift the list head to where we stopped last time
1323- */
1324- search_head = my_rsv;
1325- spin_lock(rsv_lock);
1326- goto retry;
1327-}
1328-
1329-/**
1330- * try_to_extend_reservation()
1331- * @my_rsv: given reservation window
1332- * @sb: super block
1333- * @size: the delta to extend
1334- *
1335- * Attempt to expand the reservation window large enough to have
1336- * required number of free blocks
1337- *
1338- * Since ext4_try_to_allocate() will always allocate blocks within
1339- * the reservation window range, if the window size is too small,
1340- * multiple blocks allocation has to stop at the end of the reservation
1341- * window. To make this more efficient, given the total number of
1342- * blocks needed and the current size of the window, we try to
1343- * expand the reservation window size if necessary on a best-effort
1344- * basis before ext4_new_blocks() tries to allocate blocks,
1345- */
1346-static void try_to_extend_reservation(struct ext4_reserve_window_node *my_rsv,
1347- struct super_block *sb, int size)
1348-{
1349- struct ext4_reserve_window_node *next_rsv;
1350- struct rb_node *next;
1351- spinlock_t *rsv_lock = &EXT4_SB(sb)->s_rsv_window_lock;
1352-
1353- if (!spin_trylock(rsv_lock))
1354- return;
1355-
1356- next = rb_next(&my_rsv->rsv_node);
1357-
1358- if (!next)
1359- my_rsv->rsv_end += size;
1360- else {
1361- next_rsv = rb_entry(next, struct ext4_reserve_window_node, rsv_node);
1362-
1363- if ((next_rsv->rsv_start - my_rsv->rsv_end - 1) >= size)
1364- my_rsv->rsv_end += size;
1365- else
1366- my_rsv->rsv_end = next_rsv->rsv_start - 1;
1367- }
1368- spin_unlock(rsv_lock);
1369-}
1370-
1371-/**
1372- * ext4_try_to_allocate_with_rsv()
1373- * @sb: superblock
1374- * @handle: handle to this transaction
1375- * @group: given allocation block group
1376- * @bitmap_bh: bufferhead holds the block bitmap
1377- * @grp_goal: given target block within the group
1378- * @count: target number of blocks to allocate
1379- * @my_rsv: reservation window
1380- * @errp: pointer to store the error code
1381- *
1382- * This is the main function used to allocate a new block and its reservation
1383- * window.
1384- *
1385- * Each time when a new block allocation is need, first try to allocate from
1386- * its own reservation. If it does not have a reservation window, instead of
1387- * looking for a free bit on bitmap first, then look up the reservation list to
1388- * see if it is inside somebody else's reservation window, we try to allocate a
1389- * reservation window for it starting from the goal first. Then do the block
1390- * allocation within the reservation window.
1391- *
1392- * This will avoid keeping on searching the reservation list again and
1393- * again when somebody is looking for a free block (without
1394- * reservation), and there are lots of free blocks, but they are all
1395- * being reserved.
1396- *
1397- * We use a red-black tree for the per-filesystem reservation list.
1398- *
1399- */
1400-static ext4_grpblk_t
1401-ext4_try_to_allocate_with_rsv(struct super_block *sb, handle_t *handle,
1402- ext4_group_t group, struct buffer_head *bitmap_bh,
1403- ext4_grpblk_t grp_goal,
1404- struct ext4_reserve_window_node * my_rsv,
1405- unsigned long *count, int *errp)
1406-{
1407- ext4_fsblk_t group_first_block, group_last_block;
1408- ext4_grpblk_t ret = 0;
1409- int fatal;
1410- unsigned long num = *count;
1411-
1412- *errp = 0;
1413-
1414- /*
1415- * Make sure we use undo access for the bitmap, because it is critical
1416- * that we do the frozen_data COW on bitmap buffers in all cases even
1417- * if the buffer is in BJ_Forget state in the committing transaction.
1418+ /* Check whether we have space after
1419+ * accounting for current dirty blocks & root reserved blocks.
1420 */
1421- BUFFER_TRACE(bitmap_bh, "get undo access for new block");
1422- fatal = ext4_journal_get_undo_access(handle, bitmap_bh);
1423- if (fatal) {
1424- *errp = fatal;
1425- return -1;
1426- }
1427-
1428- /*
1429- * we don't deal with reservation when
1430- * filesystem is mounted without reservation
1431- * or the file is not a regular file
1432- * or last attempt to allocate a block with reservation turned on failed
1433- */
1434- if (my_rsv == NULL ) {
1435- ret = ext4_try_to_allocate(sb, handle, group, bitmap_bh,
1436- grp_goal, count, NULL);
1437- goto out;
1438- }
1439- /*
1440- * grp_goal is a group relative block number (if there is a goal)
1441- * 0 <= grp_goal < EXT4_BLOCKS_PER_GROUP(sb)
1442- * first block is a filesystem wide block number
1443- * first block is the block number of the first block in this group
1444- */
1445- group_first_block = ext4_group_first_block_no(sb, group);
1446- group_last_block = group_first_block + (EXT4_BLOCKS_PER_GROUP(sb) - 1);
1447-
1448- /*
1449- * Basically we will allocate a new block from inode's reservation
1450- * window.
1451- *
1452- * We need to allocate a new reservation window, if:
1453- * a) inode does not have a reservation window; or
1454- * b) last attempt to allocate a block from existing reservation
1455- * failed; or
1456- * c) we come here with a goal and with a reservation window
1457- *
1458- * We do not need to allocate a new reservation window if we come here
1459- * at the beginning with a goal and the goal is inside the window, or
1460- * we don't have a goal but already have a reservation window.
1461- * then we could go to allocate from the reservation window directly.
1462- */
1463- while (1) {
1464- if (rsv_is_empty(&my_rsv->rsv_window) || (ret < 0) ||
1465- !goal_in_my_reservation(&my_rsv->rsv_window,
1466- grp_goal, group, sb)) {
1467- if (my_rsv->rsv_goal_size < *count)
1468- my_rsv->rsv_goal_size = *count;
1469- ret = alloc_new_reservation(my_rsv, grp_goal, sb,
1470- group, bitmap_bh);
1471- if (ret < 0)
1472- break; /* failed */
1473-
1474- if (!goal_in_my_reservation(&my_rsv->rsv_window,
1475- grp_goal, group, sb))
1476- grp_goal = -1;
1477- } else if (grp_goal >= 0) {
1478- int curr = my_rsv->rsv_end -
1479- (grp_goal + group_first_block) + 1;
1480-
1481- if (curr < *count)
1482- try_to_extend_reservation(my_rsv, sb,
1483- *count - curr);
1484- }
1485+ if (free_blocks >= ((root_blocks + nblocks) + dirty_blocks))
1486+ return 1;
1487
1488- if ((my_rsv->rsv_start > group_last_block) ||
1489- (my_rsv->rsv_end < group_first_block)) {
1490- rsv_window_dump(&EXT4_SB(sb)->s_rsv_window_root, 1);
1491- BUG();
1492- }
1493- ret = ext4_try_to_allocate(sb, handle, group, bitmap_bh,
1494- grp_goal, &num, &my_rsv->rsv_window);
1495- if (ret >= 0) {
1496- my_rsv->rsv_alloc_hit += num;
1497- *count = num;
1498- break; /* succeed */
1499- }
1500- num = *count;
1501- }
1502-out:
1503- if (ret >= 0) {
1504- BUFFER_TRACE(bitmap_bh, "journal_dirty_metadata for "
1505- "bitmap block");
1506- fatal = ext4_journal_dirty_metadata(handle, bitmap_bh);
1507- if (fatal) {
1508- *errp = fatal;
1509- return -1;
1510- }
1511- return ret;
1512+ /* Hm, nope. Are (enough) root reserved blocks available? */
1513+ if (sbi->s_resuid == current_fsuid() ||
1514+ ((sbi->s_resgid != 0) && in_group_p(sbi->s_resgid)) ||
1515+ capable(CAP_SYS_RESOURCE)) {
1516+ if (free_blocks >= (nblocks + dirty_blocks))
1517+ return 1;
1518 }
1519
1520- BUFFER_TRACE(bitmap_bh, "journal_release_buffer");
1521- ext4_journal_release_buffer(handle, bitmap_bh);
1522- return ret;
1523+ return 0;
1524 }
1525
1526-/**
1527- * ext4_has_free_blocks()
1528- * @sbi: in-core super block structure.
1529- * @nblocks: number of neeed blocks
1530- *
1531- * Check if filesystem has free blocks available for allocation.
1532- * Return the number of blocks avaible for allocation for this request
1533- * On success, return nblocks
1534- */
1535-ext4_fsblk_t ext4_has_free_blocks(struct ext4_sb_info *sbi,
1536- ext4_fsblk_t nblocks)
1537+int ext4_claim_free_blocks(struct ext4_sb_info *sbi,
1538+ s64 nblocks)
1539 {
1540- ext4_fsblk_t free_blocks;
1541- ext4_fsblk_t root_blocks = 0;
1542-
1543- free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter);
1544-
1545- if (!capable(CAP_SYS_RESOURCE) &&
1546- sbi->s_resuid != current->fsuid &&
1547- (sbi->s_resgid == 0 || !in_group_p(sbi->s_resgid)))
1548- root_blocks = ext4_r_blocks_count(sbi->s_es);
1549-#ifdef CONFIG_SMP
1550- if (free_blocks - root_blocks < FBC_BATCH)
1551- free_blocks =
1552- percpu_counter_sum(&sbi->s_freeblocks_counter);
1553-#endif
1554- if (free_blocks <= root_blocks)
1555- /* we don't have free space */
1556+ if (ext4_has_free_blocks(sbi, nblocks)) {
1557+ percpu_counter_add(&sbi->s_dirtyblocks_counter, nblocks);
1558 return 0;
1559- if (free_blocks - root_blocks < nblocks)
1560- return free_blocks - root_blocks;
1561- return nblocks;
1562- }
1563-
1564+ } else
1565+ return -ENOSPC;
1566+}
1567
1568 /**
1569 * ext4_should_retry_alloc()
1570@@ -1657,402 +617,45 @@ int ext4_should_retry_alloc(struct super
1571 return jbd2_journal_force_commit_nested(EXT4_SB(sb)->s_journal);
1572 }
1573
1574-/**
1575- * ext4_old_new_blocks() -- core block bitmap based block allocation function
1576- *
1577- * @handle: handle to this transaction
1578- * @inode: file inode
1579- * @goal: given target block(filesystem wide)
1580- * @count: target number of blocks to allocate
1581- * @errp: error code
1582- *
1583- * ext4_old_new_blocks uses a goal block to assist allocation and look up
1584- * the block bitmap directly to do block allocation. It tries to
1585- * allocate block(s) from the block group contains the goal block first. If
1586- * that fails, it will try to allocate block(s) from other block groups
1587- * without any specific goal block.
1588- *
1589- * This function is called when -o nomballoc mount option is enabled
1590- *
1591- */
1592-ext4_fsblk_t ext4_old_new_blocks(handle_t *handle, struct inode *inode,
1593- ext4_fsblk_t goal, unsigned long *count, int *errp)
1594-{
1595- struct buffer_head *bitmap_bh = NULL;
1596- struct buffer_head *gdp_bh;
1597- ext4_group_t group_no;
1598- ext4_group_t goal_group;
1599- ext4_grpblk_t grp_target_blk; /* blockgroup relative goal block */
1600- ext4_grpblk_t grp_alloc_blk; /* blockgroup-relative allocated block*/
1601- ext4_fsblk_t ret_block; /* filesyetem-wide allocated block */
1602- ext4_group_t bgi; /* blockgroup iteration index */
1603- int fatal = 0, err;
1604- int performed_allocation = 0;
1605- ext4_grpblk_t free_blocks; /* number of free blocks in a group */
1606- struct super_block *sb;
1607- struct ext4_group_desc *gdp;
1608- struct ext4_super_block *es;
1609- struct ext4_sb_info *sbi;
1610- struct ext4_reserve_window_node *my_rsv = NULL;
1611- struct ext4_block_alloc_info *block_i;
1612- unsigned short windowsz = 0;
1613- ext4_group_t ngroups;
1614- unsigned long num = *count;
1615-
1616- sb = inode->i_sb;
1617- if (!sb) {
1618- *errp = -ENODEV;
1619- printk("ext4_new_block: nonexistent device");
1620- return 0;
1621- }
1622-
1623- sbi = EXT4_SB(sb);
1624- if (!EXT4_I(inode)->i_delalloc_reserved_flag) {
1625- /*
1626- * With delalloc we already reserved the blocks
1627- */
1628- *count = ext4_has_free_blocks(sbi, *count);
1629- }
1630- if (*count == 0) {
1631- *errp = -ENOSPC;
1632- return 0; /*return with ENOSPC error */
1633- }
1634- num = *count;
1635-
1636- /*
1637- * Check quota for allocation of this block.
1638- */
1639- if (DQUOT_ALLOC_BLOCK(inode, num)) {
1640- *errp = -EDQUOT;
1641- return 0;
1642- }
1643-
1644- sbi = EXT4_SB(sb);
1645- es = EXT4_SB(sb)->s_es;
1646- ext4_debug("goal=%llu.\n", goal);
1647- /*
1648- * Allocate a block from reservation only when
1649- * filesystem is mounted with reservation(default,-o reservation), and
1650- * it's a regular file, and
1651- * the desired window size is greater than 0 (One could use ioctl
1652- * command EXT4_IOC_SETRSVSZ to set the window size to 0 to turn off
1653- * reservation on that particular file)
1654- */
1655- block_i = EXT4_I(inode)->i_block_alloc_info;
1656- if (block_i && ((windowsz = block_i->rsv_window_node.rsv_goal_size) > 0))
1657- my_rsv = &block_i->rsv_window_node;
1658-
1659- /*
1660- * First, test whether the goal block is free.
1661- */
1662- if (goal < le32_to_cpu(es->s_first_data_block) ||
1663- goal >= ext4_blocks_count(es))
1664- goal = le32_to_cpu(es->s_first_data_block);
1665- ext4_get_group_no_and_offset(sb, goal, &group_no, &grp_target_blk);
1666- goal_group = group_no;
1667-retry_alloc:
1668- gdp = ext4_get_group_desc(sb, group_no, &gdp_bh);
1669- if (!gdp)
1670- goto io_error;
1671-
1672- free_blocks = le16_to_cpu(gdp->bg_free_blocks_count);
1673- /*
1674- * if there is not enough free blocks to make a new resevation
1675- * turn off reservation for this allocation
1676- */
1677- if (my_rsv && (free_blocks < windowsz)
1678- && (rsv_is_empty(&my_rsv->rsv_window)))
1679- my_rsv = NULL;
1680-
1681- if (free_blocks > 0) {
1682- bitmap_bh = ext4_read_block_bitmap(sb, group_no);
1683- if (!bitmap_bh)
1684- goto io_error;
1685- grp_alloc_blk = ext4_try_to_allocate_with_rsv(sb, handle,
1686- group_no, bitmap_bh, grp_target_blk,
1687- my_rsv, &num, &fatal);
1688- if (fatal)
1689- goto out;
1690- if (grp_alloc_blk >= 0)
1691- goto allocated;
1692- }
1693-
1694- ngroups = EXT4_SB(sb)->s_groups_count;
1695- smp_rmb();
1696-
1697- /*
1698- * Now search the rest of the groups. We assume that
1699- * group_no and gdp correctly point to the last group visited.
1700- */
1701- for (bgi = 0; bgi < ngroups; bgi++) {
1702- group_no++;
1703- if (group_no >= ngroups)
1704- group_no = 0;
1705- gdp = ext4_get_group_desc(sb, group_no, &gdp_bh);
1706- if (!gdp)
1707- goto io_error;
1708- free_blocks = le16_to_cpu(gdp->bg_free_blocks_count);
1709- /*
1710- * skip this group if the number of
1711- * free blocks is less than half of the reservation
1712- * window size.
1713- */
1714- if (free_blocks <= (windowsz/2))
1715- continue;
1716-
1717- brelse(bitmap_bh);
1718- bitmap_bh = ext4_read_block_bitmap(sb, group_no);
1719- if (!bitmap_bh)
1720- goto io_error;
1721- /*
1722- * try to allocate block(s) from this group, without a goal(-1).
1723- */
1724- grp_alloc_blk = ext4_try_to_allocate_with_rsv(sb, handle,
1725- group_no, bitmap_bh, -1, my_rsv,
1726- &num, &fatal);
1727- if (fatal)
1728- goto out;
1729- if (grp_alloc_blk >= 0)
1730- goto allocated;
1731- }
1732- /*
1733- * We may end up a bogus ealier ENOSPC error due to
1734- * filesystem is "full" of reservations, but
1735- * there maybe indeed free blocks avaliable on disk
1736- * In this case, we just forget about the reservations
1737- * just do block allocation as without reservations.
1738- */
1739- if (my_rsv) {
1740- my_rsv = NULL;
1741- windowsz = 0;
1742- group_no = goal_group;
1743- goto retry_alloc;
1744- }
1745- /* No space left on the device */
1746- *errp = -ENOSPC;
1747- goto out;
1748-
1749-allocated:
1750-
1751- ext4_debug("using block group %lu(%d)\n",
1752- group_no, gdp->bg_free_blocks_count);
1753-
1754- BUFFER_TRACE(gdp_bh, "get_write_access");
1755- fatal = ext4_journal_get_write_access(handle, gdp_bh);
1756- if (fatal)
1757- goto out;
1758-
1759- ret_block = grp_alloc_blk + ext4_group_first_block_no(sb, group_no);
1760-
1761- if (in_range(ext4_block_bitmap(sb, gdp), ret_block, num) ||
1762- in_range(ext4_inode_bitmap(sb, gdp), ret_block, num) ||
1763- in_range(ret_block, ext4_inode_table(sb, gdp),
1764- EXT4_SB(sb)->s_itb_per_group) ||
1765- in_range(ret_block + num - 1, ext4_inode_table(sb, gdp),
1766- EXT4_SB(sb)->s_itb_per_group)) {
1767- ext4_error(sb, "ext4_new_block",
1768- "Allocating block in system zone - "
1769- "blocks from %llu, length %lu",
1770- ret_block, num);
1771- /*
1772- * claim_block marked the blocks we allocated
1773- * as in use. So we may want to selectively
1774- * mark some of the blocks as free
1775- */
1776- goto retry_alloc;
1777- }
1778-
1779- performed_allocation = 1;
1780-
1781-#ifdef CONFIG_JBD2_DEBUG
1782- {
1783- struct buffer_head *debug_bh;
1784-
1785- /* Record bitmap buffer state in the newly allocated block */
1786- debug_bh = sb_find_get_block(sb, ret_block);
1787- if (debug_bh) {
1788- BUFFER_TRACE(debug_bh, "state when allocated");
1789- BUFFER_TRACE2(debug_bh, bitmap_bh, "bitmap state");
1790- brelse(debug_bh);
1791- }
1792- }
1793- jbd_lock_bh_state(bitmap_bh);
1794- spin_lock(sb_bgl_lock(sbi, group_no));
1795- if (buffer_jbd(bitmap_bh) && bh2jh(bitmap_bh)->b_committed_data) {
1796- int i;
1797-
1798- for (i = 0; i < num; i++) {
1799- if (ext4_test_bit(grp_alloc_blk+i,
1800- bh2jh(bitmap_bh)->b_committed_data)) {
1801- printk("%s: block was unexpectedly set in "
1802- "b_committed_data\n", __func__);
1803- }
1804- }
1805- }
1806- ext4_debug("found bit %d\n", grp_alloc_blk);
1807- spin_unlock(sb_bgl_lock(sbi, group_no));
1808- jbd_unlock_bh_state(bitmap_bh);
1809-#endif
1810-
1811- if (ret_block + num - 1 >= ext4_blocks_count(es)) {
1812- ext4_error(sb, "ext4_new_block",
1813- "block(%llu) >= blocks count(%llu) - "
1814- "block_group = %lu, es == %p ", ret_block,
1815- ext4_blocks_count(es), group_no, es);
1816- goto out;
1817- }
1818-
1819- /*
1820- * It is up to the caller to add the new buffer to a journal
1821- * list of some description. We don't know in advance whether
1822- * the caller wants to use it as metadata or data.
1823- */
1824- spin_lock(sb_bgl_lock(sbi, group_no));
1825- if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))
1826- gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
1827- le16_add_cpu(&gdp->bg_free_blocks_count, -num);
1828- gdp->bg_checksum = ext4_group_desc_csum(sbi, group_no, gdp);
1829- spin_unlock(sb_bgl_lock(sbi, group_no));
1830- if (!EXT4_I(inode)->i_delalloc_reserved_flag)
1831- percpu_counter_sub(&sbi->s_freeblocks_counter, num);
1832-
1833- if (sbi->s_log_groups_per_flex) {
1834- ext4_group_t flex_group = ext4_flex_group(sbi, group_no);
1835- spin_lock(sb_bgl_lock(sbi, flex_group));
1836- sbi->s_flex_groups[flex_group].free_blocks -= num;
1837- spin_unlock(sb_bgl_lock(sbi, flex_group));
1838- }
1839-
1840- BUFFER_TRACE(gdp_bh, "journal_dirty_metadata for group descriptor");
1841- err = ext4_journal_dirty_metadata(handle, gdp_bh);
1842- if (!fatal)
1843- fatal = err;
1844-
1845- sb->s_dirt = 1;
1846- if (fatal)
1847- goto out;
1848-
1849- *errp = 0;
1850- brelse(bitmap_bh);
1851- DQUOT_FREE_BLOCK(inode, *count-num);
1852- *count = num;
1853- return ret_block;
1854-
1855-io_error:
1856- *errp = -EIO;
1857-out:
1858- if (fatal) {
1859- *errp = fatal;
1860- ext4_std_error(sb, fatal);
1861- }
1862- /*
1863- * Undo the block allocation
1864- */
1865- if (!performed_allocation)
1866- DQUOT_FREE_BLOCK(inode, *count);
1867- brelse(bitmap_bh);
1868- return 0;
1869-}
1870-
1871-#define EXT4_META_BLOCK 0x1
1872-
1873-static ext4_fsblk_t do_blk_alloc(handle_t *handle, struct inode *inode,
1874- ext4_lblk_t iblock, ext4_fsblk_t goal,
1875- unsigned long *count, int *errp, int flags)
1876-{
1877- struct ext4_allocation_request ar;
1878- ext4_fsblk_t ret;
1879-
1880- if (!test_opt(inode->i_sb, MBALLOC)) {
1881- return ext4_old_new_blocks(handle, inode, goal, count, errp);
1882- }
1883-
1884- memset(&ar, 0, sizeof(ar));
1885- /* Fill with neighbour allocated blocks */
1886-
1887- ar.inode = inode;
1888- ar.goal = goal;
1889- ar.len = *count;
1890- ar.logical = iblock;
1891-
1892- if (S_ISREG(inode->i_mode) && !(flags & EXT4_META_BLOCK))
1893- /* enable in-core preallocation for data block allocation */
1894- ar.flags = EXT4_MB_HINT_DATA;
1895- else
1896- /* disable in-core preallocation for non-regular files */
1897- ar.flags = 0;
1898-
1899- ret = ext4_mb_new_blocks(handle, &ar, errp);
1900- *count = ar.len;
1901- return ret;
1902-}
1903-
1904 /*
1905 * ext4_new_meta_blocks() -- allocate block for meta data (indexing) blocks
1906 *
1907 * @handle: handle to this transaction
1908 * @inode: file inode
1909 * @goal: given target block(filesystem wide)
1910- * @count: total number of blocks need
1911+ * @count: pointer to total number of blocks needed
1912 * @errp: error code
1913 *
1914- * Return 1st allocated block numberon success, *count stores total account
1915+ * Return 1st allocated block number on success, *count stores total account
1916 * error stores in errp pointer
1917 */
1918 ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode,
1919 ext4_fsblk_t goal, unsigned long *count, int *errp)
1920 {
1921+ struct ext4_allocation_request ar;
1922 ext4_fsblk_t ret;
1923- ret = do_blk_alloc(handle, inode, 0, goal,
1924- count, errp, EXT4_META_BLOCK);
1925+
1926+ memset(&ar, 0, sizeof(ar));
1927+ /* Fill with neighbour allocated blocks */
1928+ ar.inode = inode;
1929+ ar.goal = goal;
1930+ ar.len = count ? *count : 1;
1931+
1932+ ret = ext4_mb_new_blocks(handle, &ar, errp);
1933+ if (count)
1934+ *count = ar.len;
1935+
1936 /*
1937 * Account for the allocated meta blocks
1938 */
1939- if (!(*errp)) {
1940+ if (!(*errp) && EXT4_I(inode)->i_delalloc_reserved_flag) {
1941 spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
1942- EXT4_I(inode)->i_allocated_meta_blocks += *count;
1943+ EXT4_I(inode)->i_allocated_meta_blocks += ar.len;
1944 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
1945 }
1946 return ret;
1947 }
1948
1949-/*
1950- * ext4_new_meta_block() -- allocate block for meta data (indexing) blocks
1951- *
1952- * @handle: handle to this transaction
1953- * @inode: file inode
1954- * @goal: given target block(filesystem wide)
1955- * @errp: error code
1956- *
1957- * Return allocated block number on success
1958- */
1959-ext4_fsblk_t ext4_new_meta_block(handle_t *handle, struct inode *inode,
1960- ext4_fsblk_t goal, int *errp)
1961-{
1962- unsigned long count = 1;
1963- return ext4_new_meta_blocks(handle, inode, goal, &count, errp);
1964-}
1965-
1966-/*
1967- * ext4_new_blocks() -- allocate data blocks
1968- *
1969- * @handle: handle to this transaction
1970- * @inode: file inode
1971- * @goal: given target block(filesystem wide)
1972- * @count: total number of blocks need
1973- * @errp: error code
1974- *
1975- * Return 1st allocated block numberon success, *count stores total account
1976- * error stores in errp pointer
1977- */
1978-
1979-ext4_fsblk_t ext4_new_blocks(handle_t *handle, struct inode *inode,
1980- ext4_lblk_t iblock, ext4_fsblk_t goal,
1981- unsigned long *count, int *errp)
1982-{
1983- return do_blk_alloc(handle, inode, iblock, goal, count, errp, 0);
1984-}
1985-
1986 /**
1987 * ext4_count_free_blocks() -- count filesystem free blocks
1988 * @sb: superblock
1989@@ -2068,7 +671,7 @@ ext4_fsblk_t ext4_count_free_blocks(stru
1990 #ifdef EXT4FS_DEBUG
1991 struct ext4_super_block *es;
1992 ext4_fsblk_t bitmap_count;
1993- unsigned long x;
1994+ unsigned int x;
1995 struct buffer_head *bitmap_bh = NULL;
1996
1997 es = EXT4_SB(sb)->s_es;
1998@@ -2088,15 +691,14 @@ ext4_fsblk_t ext4_count_free_blocks(stru
1999 continue;
2000
2001 x = ext4_count_free(bitmap_bh, sb->s_blocksize);
2002- printk(KERN_DEBUG "group %lu: stored = %d, counted = %lu\n",
2003+ printk(KERN_DEBUG "group %lu: stored = %d, counted = %u\n",
2004 i, le16_to_cpu(gdp->bg_free_blocks_count), x);
2005 bitmap_count += x;
2006 }
2007 brelse(bitmap_bh);
2008- printk("ext4_count_free_blocks: stored = %llu"
2009- ", computed = %llu, %llu\n",
2010- ext4_free_blocks_count(es),
2011- desc_count, bitmap_count);
2012+ printk(KERN_DEBUG "ext4_count_free_blocks: stored = %llu"
2013+ ", computed = %llu, %llu\n", ext4_free_blocks_count(es),
2014+ desc_count, bitmap_count);
2015 return bitmap_count;
2016 #else
2017 desc_count = 0;
2018@@ -2105,7 +707,7 @@ ext4_fsblk_t ext4_count_free_blocks(stru
2019 gdp = ext4_get_group_desc(sb, i, NULL);
2020 if (!gdp)
2021 continue;
2022- desc_count += le16_to_cpu(gdp->bg_free_blocks_count);
2023+ desc_count += ext4_free_blks_count(sb, gdp);
2024 }
2025
2026 return desc_count;
2027@@ -2183,8 +785,9 @@ unsigned long ext4_bg_num_gdb(struct sup
2028
2029 if (!EXT4_HAS_INCOMPAT_FEATURE(sb,EXT4_FEATURE_INCOMPAT_META_BG) ||
2030 metagroup < first_meta_bg)
2031- return ext4_bg_num_gdb_nometa(sb,group);
2032+ return ext4_bg_num_gdb_nometa(sb, group);
2033
2034 return ext4_bg_num_gdb_meta(sb,group);
2035
2036 }
2037+
2038diff -rup b/fs/ext4//bitmap.c a/fs/ext4///bitmap.c
2039--- b/fs/ext4/bitmap.c 2009-02-11 14:37:58.000000000 +0100
2040+++ a/fs/ext4/bitmap.c 2009-02-10 21:40:11.000000000 +0100
2041@@ -15,17 +15,16 @@
2042
2043 static const int nibblemap[] = {4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0};
2044
2045-unsigned long ext4_count_free (struct buffer_head * map, unsigned int numchars)
2046+unsigned int ext4_count_free(struct buffer_head *map, unsigned int numchars)
2047 {
2048- unsigned int i;
2049- unsigned long sum = 0;
2050+ unsigned int i, sum = 0;
2051
2052 if (!map)
2053- return (0);
2054+ return 0;
2055 for (i = 0; i < numchars; i++)
2056 sum += nibblemap[map->b_data[i] & 0xf] +
2057 nibblemap[(map->b_data[i] >> 4) & 0xf];
2058- return (sum);
2059+ return sum;
2060 }
2061
2062 #endif /* EXT4FS_DEBUG */
2063diff -rup b/fs/ext4//dir.c a/fs/ext4///dir.c
2064--- b/fs/ext4/dir.c 2009-02-11 14:37:58.000000000 +0100
2065+++ a/fs/ext4/dir.c 2009-02-10 21:40:11.000000000 +0100
2066@@ -33,10 +33,10 @@ static unsigned char ext4_filetype_table
2067 };
2068
2069 static int ext4_readdir(struct file *, void *, filldir_t);
2070-static int ext4_dx_readdir(struct file * filp,
2071- void * dirent, filldir_t filldir);
2072-static int ext4_release_dir (struct inode * inode,
2073- struct file * filp);
2074+static int ext4_dx_readdir(struct file *filp,
2075+ void *dirent, filldir_t filldir);
2076+static int ext4_release_dir(struct inode *inode,
2077+ struct file *filp);
2078
2079 const struct file_operations ext4_dir_operations = {
2080 .llseek = generic_file_llseek,
2081@@ -61,12 +61,12 @@ static unsigned char get_dtype(struct su
2082 }
2083
2084
2085-int ext4_check_dir_entry (const char * function, struct inode * dir,
2086- struct ext4_dir_entry_2 * de,
2087- struct buffer_head * bh,
2088- unsigned long offset)
2089+int ext4_check_dir_entry(const char *function, struct inode *dir,
2090+ struct ext4_dir_entry_2 *de,
2091+ struct buffer_head *bh,
2092+ unsigned int offset)
2093 {
2094- const char * error_msg = NULL;
2095+ const char *error_msg = NULL;
2096 const int rlen = ext4_rec_len_from_disk(de->rec_len);
2097
2098 if (rlen < EXT4_DIR_REC_LEN(1))
2099@@ -82,20 +82,20 @@ int ext4_check_dir_entry (const char * f
2100 error_msg = "inode out of bounds";
2101
2102 if (error_msg != NULL)
2103- ext4_error (dir->i_sb, function,
2104+ ext4_error(dir->i_sb, function,
2105 "bad entry in directory #%lu: %s - "
2106- "offset=%lu, inode=%lu, rec_len=%d, name_len=%d",
2107+ "offset=%u, inode=%u, rec_len=%d, name_len=%d",
2108 dir->i_ino, error_msg, offset,
2109- (unsigned long) le32_to_cpu(de->inode),
2110+ le32_to_cpu(de->inode),
2111 rlen, de->name_len);
2112 return error_msg == NULL ? 1 : 0;
2113 }
2114
2115-static int ext4_readdir(struct file * filp,
2116- void * dirent, filldir_t filldir)
2117+static int ext4_readdir(struct file *filp,
2118+ void *dirent, filldir_t filldir)
2119 {
2120 int error = 0;
2121- unsigned long offset;
2122+ unsigned int offset;
2123 int i, stored;
2124 struct ext4_dir_entry_2 *de;
2125 struct super_block *sb;
2126@@ -192,14 +192,14 @@ revalidate:
2127 while (!error && filp->f_pos < inode->i_size
2128 && offset < sb->s_blocksize) {
2129 de = (struct ext4_dir_entry_2 *) (bh->b_data + offset);
2130- if (!ext4_check_dir_entry ("ext4_readdir", inode, de,
2131- bh, offset)) {
2132+ if (!ext4_check_dir_entry("ext4_readdir", inode, de,
2133+ bh, offset)) {
2134 /*
2135 * On error, skip the f_pos to the next block
2136 */
2137 filp->f_pos = (filp->f_pos |
2138 (sb->s_blocksize - 1)) + 1;
2139- brelse (bh);
2140+ brelse(bh);
2141 ret = stored;
2142 goto out;
2143 }
2144@@ -223,12 +223,12 @@ revalidate:
2145 break;
2146 if (version != filp->f_version)
2147 goto revalidate;
2148- stored ++;
2149+ stored++;
2150 }
2151 filp->f_pos += ext4_rec_len_from_disk(de->rec_len);
2152 }
2153 offset = 0;
2154- brelse (bh);
2155+ brelse(bh);
2156 }
2157 out:
2158 return ret;
2159@@ -295,9 +295,9 @@ static void free_rb_tree_fname(struct rb
2160 parent = rb_parent(n);
2161 fname = rb_entry(n, struct fname, rb_hash);
2162 while (fname) {
2163- struct fname * old = fname;
2164+ struct fname *old = fname;
2165 fname = fname->next;
2166- kfree (old);
2167+ kfree(old);
2168 }
2169 if (!parent)
2170 root->rb_node = NULL;
2171@@ -336,7 +336,7 @@ int ext4_htree_store_dirent(struct file
2172 struct ext4_dir_entry_2 *dirent)
2173 {
2174 struct rb_node **p, *parent = NULL;
2175- struct fname * fname, *new_fn;
2176+ struct fname *fname, *new_fn;
2177 struct dir_private_info *info;
2178 int len;
2179
2180@@ -393,19 +393,20 @@ int ext4_htree_store_dirent(struct file
2181 * for all entres on the fname linked list. (Normally there is only
2182 * one entry on the linked list, unless there are 62 bit hash collisions.)
2183 */
2184-static int call_filldir(struct file * filp, void * dirent,
2185+static int call_filldir(struct file *filp, void *dirent,
2186 filldir_t filldir, struct fname *fname)
2187 {
2188 struct dir_private_info *info = filp->private_data;
2189 loff_t curr_pos;
2190 struct inode *inode = filp->f_path.dentry->d_inode;
2191- struct super_block * sb;
2192+ struct super_block *sb;
2193 int error;
2194
2195 sb = inode->i_sb;
2196
2197 if (!fname) {
2198- printk("call_filldir: called with null fname?!?\n");
2199+ printk(KERN_ERR "ext4: call_filldir: called with "
2200+ "null fname?!?\n");
2201 return 0;
2202 }
2203 curr_pos = hash2pos(fname->hash, fname->minor_hash);
2204@@ -424,8 +425,8 @@ static int call_filldir(struct file * fi
2205 return 0;
2206 }
2207
2208-static int ext4_dx_readdir(struct file * filp,
2209- void * dirent, filldir_t filldir)
2210+static int ext4_dx_readdir(struct file *filp,
2211+ void *dirent, filldir_t filldir)
2212 {
2213 struct dir_private_info *info = filp->private_data;
2214 struct inode *inode = filp->f_path.dentry->d_inode;
2215@@ -512,7 +513,7 @@ finished:
2216 return 0;
2217 }
2218
2219-static int ext4_release_dir (struct inode * inode, struct file * filp)
2220+static int ext4_release_dir(struct inode *inode, struct file *filp)
2221 {
2222 if (filp->private_data)
2223 ext4_htree_free_dir_info(filp->private_data);
2224diff -rup b/fs/ext4//ext4_extents.h a/fs/ext4///ext4_extents.h
2225--- b/fs/ext4/ext4_extents.h 2009-02-11 14:37:58.000000000 +0100
2226+++ a/fs/ext4/ext4_extents.h 2009-02-10 21:40:14.000000000 +0100
2227@@ -181,11 +181,6 @@ static inline unsigned short ext_depth(s
2228 return le16_to_cpu(ext_inode_hdr(inode)->eh_depth);
2229 }
2230
2231-static inline void ext4_ext_tree_changed(struct inode *inode)
2232-{
2233- EXT4_I(inode)->i_ext_generation++;
2234-}
2235-
2236 static inline void
2237 ext4_ext_invalidate_cache(struct inode *inode)
2238 {
2239diff -rup b/fs/ext4//ext4.h a/fs/ext4///ext4.h
2240--- b/fs/ext4/ext4.h 2009-02-11 14:37:58.000000000 +0100
2241+++ a/fs/ext4/ext4.h 2009-02-10 21:40:14.000000000 +0100
2242@@ -19,6 +19,7 @@
2243 #include <linux/types.h>
2244 #include <linux/blkdev.h>
2245 #include <linux/magic.h>
2246+#include <linux/jbd2.h>
2247 #include "ext4_i.h"
2248
2249 /*
2250@@ -44,9 +45,9 @@
2251 #ifdef EXT4FS_DEBUG
2252 #define ext4_debug(f, a...) \
2253 do { \
2254- printk (KERN_DEBUG "EXT4-fs DEBUG (%s, %d): %s:", \
2255+ printk(KERN_DEBUG "EXT4-fs DEBUG (%s, %d): %s:", \
2256 __FILE__, __LINE__, __func__); \
2257- printk (KERN_DEBUG f, ## a); \
2258+ printk(KERN_DEBUG f, ## a); \
2259 } while (0)
2260 #else
2261 #define ext4_debug(f, a...) do {} while (0)
2262@@ -94,9 +95,9 @@ struct ext4_allocation_request {
2263 /* phys. block for ^^^ */
2264 ext4_fsblk_t pright;
2265 /* how many blocks we want to allocate */
2266- unsigned long len;
2267+ unsigned int len;
2268 /* flags. see above EXT4_MB_HINT_* */
2269- unsigned long flags;
2270+ unsigned int flags;
2271 };
2272
2273 /*
2274@@ -128,7 +129,7 @@ struct ext4_allocation_request {
2275 #else
2276 # define EXT4_BLOCK_SIZE(s) (EXT4_MIN_BLOCK_SIZE << (s)->s_log_block_size)
2277 #endif
2278-#define EXT4_ADDR_PER_BLOCK(s) (EXT4_BLOCK_SIZE(s) / sizeof (__u32))
2279+#define EXT4_ADDR_PER_BLOCK(s) (EXT4_BLOCK_SIZE(s) / sizeof(__u32))
2280 #ifdef __KERNEL__
2281 # define EXT4_BLOCK_SIZE_BITS(s) ((s)->s_blocksize_bits)
2282 #else
2283@@ -156,12 +157,12 @@ struct ext4_group_desc
2284 __le32 bg_block_bitmap_lo; /* Blocks bitmap block */
2285 __le32 bg_inode_bitmap_lo; /* Inodes bitmap block */
2286 __le32 bg_inode_table_lo; /* Inodes table block */
2287- __le16 bg_free_blocks_count; /* Free blocks count */
2288- __le16 bg_free_inodes_count; /* Free inodes count */
2289- __le16 bg_used_dirs_count; /* Directories count */
2290+ __le16 bg_free_blocks_count_lo;/* Free blocks count */
2291+ __le16 bg_free_inodes_count_lo;/* Free inodes count */
2292+ __le16 bg_used_dirs_count_lo; /* Directories count */
2293 __le16 bg_flags; /* EXT4_BG_flags (INODE_UNINIT, etc) */
2294 __u32 bg_reserved[2]; /* Likely block/inode bitmap checksum */
2295- __le16 bg_itable_unused; /* Unused inodes count */
2296+ __le16 bg_itable_unused_lo; /* Unused inodes count */
2297 __le16 bg_checksum; /* crc16(sb_uuid+group+desc) */
2298 __le32 bg_block_bitmap_hi; /* Blocks bitmap block MSB */
2299 __le32 bg_inode_bitmap_hi; /* Inodes bitmap block MSB */
2300@@ -169,7 +170,7 @@ struct ext4_group_desc
2301 __le16 bg_free_blocks_count_hi;/* Free blocks count MSB */
2302 __le16 bg_free_inodes_count_hi;/* Free inodes count MSB */
2303 __le16 bg_used_dirs_count_hi; /* Directories count MSB */
2304- __le16 bg_itable_unused_hi; /* Unused inodes count MSB */
2305+ __le16 bg_itable_unused_hi; /* Unused inodes count MSB */
2306 __u32 bg_reserved2[3];
2307 };
2308
2309@@ -245,7 +246,7 @@ struct flex_groups {
2310 #define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */
2311
2312 #define EXT4_FL_USER_VISIBLE 0x000BDFFF /* User visible flags */
2313-#define EXT4_FL_USER_MODIFIABLE 0x000380FF /* User modifiable flags */
2314+#define EXT4_FL_USER_MODIFIABLE 0x000B80FF /* User modifiable flags */
2315
2316 /*
2317 * Inode dynamic state flags
2318@@ -511,7 +512,6 @@ do { \
2319 /*
2320 * Mount flags
2321 */
2322-#define EXT4_MOUNT_CHECK 0x00001 /* Do mount-time checks */
2323 #define EXT4_MOUNT_OLDALLOC 0x00002 /* Don't use the new Orlov allocator */
2324 #define EXT4_MOUNT_GRPID 0x00004 /* Create files with directory's group */
2325 #define EXT4_MOUNT_DEBUG 0x00008 /* Some debugging messages */
2326@@ -539,7 +539,6 @@ do { \
2327 #define EXT4_MOUNT_JOURNAL_CHECKSUM 0x800000 /* Journal checksums */
2328 #define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */
2329 #define EXT4_MOUNT_I_VERSION 0x2000000 /* i_version support */
2330-#define EXT4_MOUNT_MBALLOC 0x4000000 /* Buddy allocation support */
2331 #define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */
2332 /* Compatibility, for having both ext2_fs.h and ext4_fs.h included at once */
2333 #ifndef _LINUX_EXT2_FS_H
2334@@ -668,7 +667,7 @@ struct ext4_super_block {
2335 };
2336
2337 #ifdef __KERNEL__
2338-static inline struct ext4_sb_info * EXT4_SB(struct super_block *sb)
2339+static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb)
2340 {
2341 return sb->s_fs_info;
2342 }
2343@@ -726,11 +725,11 @@ static inline int ext4_valid_inum(struct
2344 */
2345
2346 #define EXT4_HAS_COMPAT_FEATURE(sb,mask) \
2347- ( EXT4_SB(sb)->s_es->s_feature_compat & cpu_to_le32(mask) )
2348+ ((EXT4_SB(sb)->s_es->s_feature_compat & cpu_to_le32(mask)) != 0)
2349 #define EXT4_HAS_RO_COMPAT_FEATURE(sb,mask) \
2350- ( EXT4_SB(sb)->s_es->s_feature_ro_compat & cpu_to_le32(mask) )
2351+ ((EXT4_SB(sb)->s_es->s_feature_ro_compat & cpu_to_le32(mask)) != 0)
2352 #define EXT4_HAS_INCOMPAT_FEATURE(sb,mask) \
2353- ( EXT4_SB(sb)->s_es->s_feature_incompat & cpu_to_le32(mask) )
2354+ ((EXT4_SB(sb)->s_es->s_feature_incompat & cpu_to_le32(mask)) != 0)
2355 #define EXT4_SET_COMPAT_FEATURE(sb,mask) \
2356 EXT4_SB(sb)->s_es->s_feature_compat |= cpu_to_le32(mask)
2357 #define EXT4_SET_RO_COMPAT_FEATURE(sb,mask) \
2358@@ -790,6 +789,8 @@ static inline int ext4_valid_inum(struct
2359 #define EXT4_DEF_RESUID 0
2360 #define EXT4_DEF_RESGID 0
2361
2362+#define EXT4_DEF_INODE_READAHEAD_BLKS 32
2363+
2364 /*
2365 * Default mount options
2366 */
2367@@ -889,6 +890,9 @@ static inline __le16 ext4_rec_len_to_dis
2368 #define DX_HASH_LEGACY 0
2369 #define DX_HASH_HALF_MD4 1
2370 #define DX_HASH_TEA 2
2371+#define DX_HASH_LEGACY_UNSIGNED 3
2372+#define DX_HASH_HALF_MD4_UNSIGNED 4
2373+#define DX_HASH_TEA_UNSIGNED 5
2374
2375 #ifdef __KERNEL__
2376
2377@@ -953,7 +957,25 @@ ext4_group_first_block_no(struct super_b
2378 #define ERR_BAD_DX_DIR -75000
2379
2380 void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr,
2381- unsigned long *blockgrpp, ext4_grpblk_t *offsetp);
2382+ ext4_group_t *blockgrpp, ext4_grpblk_t *offsetp);
2383+
2384+extern struct proc_dir_entry *ext4_proc_root;
2385+
2386+#ifdef CONFIG_PROC_FS
2387+extern const struct file_operations ext4_ui_proc_fops;
2388+
2389+#define EXT4_PROC_HANDLER(name, var) \
2390+do { \
2391+ proc = proc_create_data(name, mode, sbi->s_proc, \
2392+ &ext4_ui_proc_fops, &sbi->s_##var); \
2393+ if (proc == NULL) { \
2394+ printk(KERN_ERR "EXT4-fs: can't create %s\n", name); \
2395+ goto err_out; \
2396+ } \
2397+} while (0)
2398+#else
2399+#define EXT4_PROC_HANDLER(name, var)
2400+#endif
2401
2402 /*
2403 * Function prototypes
2404@@ -967,6 +989,9 @@ void ext4_get_group_no_and_offset(struct
2405 # define ATTRIB_NORET __attribute__((noreturn))
2406 # define NORET_AND noreturn,
2407
2408+/* bitmap.c */
2409+extern unsigned int ext4_count_free(struct buffer_head *, unsigned);
2410+
2411 /* balloc.c */
2412 extern unsigned int ext4_block_group(struct super_block *sb,
2413 ext4_fsblk_t blocknr);
2414@@ -975,55 +1000,44 @@ extern ext4_grpblk_t ext4_block_group_of
2415 extern int ext4_bg_has_super(struct super_block *sb, ext4_group_t group);
2416 extern unsigned long ext4_bg_num_gdb(struct super_block *sb,
2417 ext4_group_t group);
2418-extern ext4_fsblk_t ext4_new_meta_block(handle_t *handle, struct inode *inode,
2419- ext4_fsblk_t goal, int *errp);
2420 extern ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode,
2421 ext4_fsblk_t goal, unsigned long *count, int *errp);
2422-extern ext4_fsblk_t ext4_new_blocks(handle_t *handle, struct inode *inode,
2423- ext4_lblk_t iblock, ext4_fsblk_t goal,
2424- unsigned long *count, int *errp);
2425-extern ext4_fsblk_t ext4_old_new_blocks(handle_t *handle, struct inode *inode,
2426- ext4_fsblk_t goal, unsigned long *count, int *errp);
2427-extern ext4_fsblk_t ext4_has_free_blocks(struct ext4_sb_info *sbi,
2428- ext4_fsblk_t nblocks);
2429-extern void ext4_free_blocks (handle_t *handle, struct inode *inode,
2430+extern int ext4_claim_free_blocks(struct ext4_sb_info *sbi, s64 nblocks);
2431+extern int ext4_has_free_blocks(struct ext4_sb_info *sbi, s64 nblocks);
2432+extern void ext4_free_blocks(handle_t *handle, struct inode *inode,
2433 ext4_fsblk_t block, unsigned long count, int metadata);
2434-extern void ext4_free_blocks_sb (handle_t *handle, struct super_block *sb,
2435- ext4_fsblk_t block, unsigned long count,
2436- unsigned long *pdquot_freed_blocks);
2437-extern ext4_fsblk_t ext4_count_free_blocks (struct super_block *);
2438-extern void ext4_check_blocks_bitmap (struct super_block *);
2439+extern void ext4_add_groupblocks(handle_t *handle, struct super_block *sb,
2440+ ext4_fsblk_t block, unsigned long count);
2441+extern ext4_fsblk_t ext4_count_free_blocks(struct super_block *);
2442+extern void ext4_check_blocks_bitmap(struct super_block *);
2443 extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb,
2444 ext4_group_t block_group,
2445 struct buffer_head ** bh);
2446 extern int ext4_should_retry_alloc(struct super_block *sb, int *retries);
2447-extern void ext4_init_block_alloc_info(struct inode *);
2448-extern void ext4_rsv_window_add(struct super_block *sb, struct ext4_reserve_window_node *rsv);
2449
2450 /* dir.c */
2451 extern int ext4_check_dir_entry(const char *, struct inode *,
2452 struct ext4_dir_entry_2 *,
2453- struct buffer_head *, unsigned long);
2454+ struct buffer_head *, unsigned int);
2455 extern int ext4_htree_store_dirent(struct file *dir_file, __u32 hash,
2456 __u32 minor_hash,
2457 struct ext4_dir_entry_2 *dirent);
2458 extern void ext4_htree_free_dir_info(struct dir_private_info *p);
2459
2460 /* fsync.c */
2461-extern int ext4_sync_file (struct file *, struct dentry *, int);
2462+extern int ext4_sync_file(struct file *, struct dentry *, int);
2463
2464 /* hash.c */
2465 extern int ext4fs_dirhash(const char *name, int len, struct
2466 dx_hash_info *hinfo);
2467
2468 /* ialloc.c */
2469-extern struct inode * ext4_new_inode (handle_t *, struct inode *, int);
2470-extern void ext4_free_inode (handle_t *, struct inode *);
2471-extern struct inode * ext4_orphan_get (struct super_block *, unsigned long);
2472-extern unsigned long ext4_count_free_inodes (struct super_block *);
2473-extern unsigned long ext4_count_dirs (struct super_block *);
2474-extern void ext4_check_inodes_bitmap (struct super_block *);
2475-extern unsigned long ext4_count_free (struct buffer_head *, unsigned);
2476+extern struct inode * ext4_new_inode(handle_t *, struct inode *, int);
2477+extern void ext4_free_inode(handle_t *, struct inode *);
2478+extern struct inode * ext4_orphan_get(struct super_block *, unsigned long);
2479+extern unsigned long ext4_count_free_inodes(struct super_block *);
2480+extern unsigned long ext4_count_dirs(struct super_block *);
2481+extern void ext4_check_inodes_bitmap(struct super_block *);
2482
2483 /* mballoc.c */
2484 extern long ext4_mb_stats;
2485@@ -1033,17 +1047,18 @@ extern int ext4_mb_release(struct super_
2486 extern ext4_fsblk_t ext4_mb_new_blocks(handle_t *,
2487 struct ext4_allocation_request *, int *);
2488 extern int ext4_mb_reserve_blocks(struct super_block *, int);
2489-extern void ext4_mb_discard_inode_preallocations(struct inode *);
2490+extern void ext4_discard_preallocations(struct inode *);
2491 extern int __init init_ext4_mballoc(void);
2492 extern void exit_ext4_mballoc(void);
2493 extern void ext4_mb_free_blocks(handle_t *, struct inode *,
2494 unsigned long, unsigned long, int, unsigned long *);
2495-extern int ext4_mb_add_more_groupinfo(struct super_block *sb,
2496+extern int ext4_mb_add_groupinfo(struct super_block *sb,
2497 ext4_group_t i, struct ext4_group_desc *desc);
2498 extern void ext4_mb_update_group_info(struct ext4_group_info *grp,
2499 ext4_grpblk_t add);
2500-
2501-
2502+extern int ext4_mb_get_buddy_cache_lock(struct super_block *, ext4_group_t);
2503+extern void ext4_mb_put_buddy_cache_lock(struct super_block *,
2504+ ext4_group_t, int);
2505 /* inode.c */
2506 int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode,
2507 struct buffer_head *bh, ext4_fsblk_t blocknr);
2508@@ -1051,24 +1066,19 @@ struct buffer_head *ext4_getblk(handle_t
2509 ext4_lblk_t, int, int *);
2510 struct buffer_head *ext4_bread(handle_t *, struct inode *,
2511 ext4_lblk_t, int, int *);
2512-int ext4_get_blocks_handle(handle_t *handle, struct inode *inode,
2513- ext4_lblk_t iblock, unsigned long maxblocks,
2514- struct buffer_head *bh_result,
2515- int create, int extend_disksize);
2516
2517 extern struct inode *ext4_iget(struct super_block *, unsigned long);
2518-extern int ext4_write_inode (struct inode *, int);
2519-extern int ext4_setattr (struct dentry *, struct iattr *);
2520+extern int ext4_write_inode(struct inode *, int);
2521+extern int ext4_setattr(struct dentry *, struct iattr *);
2522 extern int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry,
2523 struct kstat *stat);
2524-extern void ext4_delete_inode (struct inode *);
2525-extern int ext4_sync_inode (handle_t *, struct inode *);
2526-extern void ext4_discard_reservation (struct inode *);
2527+extern void ext4_delete_inode(struct inode *);
2528+extern int ext4_sync_inode(handle_t *, struct inode *);
2529 extern void ext4_dirty_inode(struct inode *);
2530 extern int ext4_change_inode_journal_flag(struct inode *, int);
2531 extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *);
2532 extern int ext4_can_truncate(struct inode *inode);
2533-extern void ext4_truncate (struct inode *);
2534+extern void ext4_truncate(struct inode *);
2535 extern void ext4_set_inode_flags(struct inode *);
2536 extern void ext4_get_inode_flags(struct ext4_inode_info *);
2537 extern void ext4_set_aops(struct inode *inode);
2538@@ -1081,7 +1091,7 @@ extern int ext4_page_mkwrite(struct vm_a
2539
2540 /* ioctl.c */
2541 extern long ext4_ioctl(struct file *, unsigned int, unsigned long);
2542-extern long ext4_compat_ioctl (struct file *, unsigned int, unsigned long);
2543+extern long ext4_compat_ioctl(struct file *, unsigned int, unsigned long);
2544
2545 /* migrate.c */
2546 extern int ext4_ext_migrate(struct inode *);
2547@@ -1099,14 +1109,17 @@ extern int ext4_group_extend(struct supe
2548 ext4_fsblk_t n_blocks_count);
2549
2550 /* super.c */
2551-extern void ext4_error (struct super_block *, const char *, const char *, ...)
2552+extern void ext4_error(struct super_block *, const char *, const char *, ...)
2553 __attribute__ ((format (printf, 3, 4)));
2554-extern void __ext4_std_error (struct super_block *, const char *, int);
2555-extern void ext4_abort (struct super_block *, const char *, const char *, ...)
2556+extern void __ext4_std_error(struct super_block *, const char *, int);
2557+extern void ext4_abort(struct super_block *, const char *, const char *, ...)
2558 __attribute__ ((format (printf, 3, 4)));
2559-extern void ext4_warning (struct super_block *, const char *, const char *, ...)
2560+extern void ext4_warning(struct super_block *, const char *, const char *, ...)
2561 __attribute__ ((format (printf, 3, 4)));
2562-extern void ext4_update_dynamic_rev (struct super_block *sb);
2563+extern void ext4_grp_locked_error(struct super_block *, ext4_group_t,
2564+ const char *, const char *, ...)
2565+ __attribute__ ((format (printf, 4, 5)));
2566+extern void ext4_update_dynamic_rev(struct super_block *sb);
2567 extern int ext4_update_compat_feature(handle_t *handle, struct super_block *sb,
2568 __u32 compat);
2569 extern int ext4_update_rocompat_feature(handle_t *handle,
2570@@ -1119,12 +1132,28 @@ extern ext4_fsblk_t ext4_inode_bitmap(st
2571 struct ext4_group_desc *bg);
2572 extern ext4_fsblk_t ext4_inode_table(struct super_block *sb,
2573 struct ext4_group_desc *bg);
2574+extern __u32 ext4_free_blks_count(struct super_block *sb,
2575+ struct ext4_group_desc *bg);
2576+extern __u32 ext4_free_inodes_count(struct super_block *sb,
2577+ struct ext4_group_desc *bg);
2578+extern __u32 ext4_used_dirs_count(struct super_block *sb,
2579+ struct ext4_group_desc *bg);
2580+extern __u32 ext4_itable_unused_count(struct super_block *sb,
2581+ struct ext4_group_desc *bg);
2582 extern void ext4_block_bitmap_set(struct super_block *sb,
2583 struct ext4_group_desc *bg, ext4_fsblk_t blk);
2584 extern void ext4_inode_bitmap_set(struct super_block *sb,
2585 struct ext4_group_desc *bg, ext4_fsblk_t blk);
2586 extern void ext4_inode_table_set(struct super_block *sb,
2587 struct ext4_group_desc *bg, ext4_fsblk_t blk);
2588+extern void ext4_free_blks_set(struct super_block *sb,
2589+ struct ext4_group_desc *bg, __u32 count);
2590+extern void ext4_free_inodes_set(struct super_block *sb,
2591+ struct ext4_group_desc *bg, __u32 count);
2592+extern void ext4_used_dirs_set(struct super_block *sb,
2593+ struct ext4_group_desc *bg, __u32 count);
2594+extern void ext4_itable_unused_set(struct super_block *sb,
2595+ struct ext4_group_desc *bg, __u32 count);
2596
2597 static inline ext4_fsblk_t ext4_blocks_count(struct ext4_super_block *es)
2598 {
2599@@ -1179,7 +1208,7 @@ static inline void ext4_isize_set(struct
2600
2601 static inline
2602 struct ext4_group_info *ext4_get_group_info(struct super_block *sb,
2603- ext4_group_t group)
2604+ ext4_group_t group)
2605 {
2606 struct ext4_group_info ***grp_info;
2607 long indexv, indexh;
2608@@ -1207,6 +1236,72 @@ do { \
2609 __ext4_std_error((sb), __func__, (errno)); \
2610 } while (0)
2611
2612+#ifdef CONFIG_SMP
2613+/* Each CPU can accumulate FBC_BATCH blocks in their local
2614+ * counters. So we need to make sure we have free blocks more
2615+ * than FBC_BATCH * nr_cpu_ids. Also add a window of 4 times.
2616+ */
2617+#define EXT4_FREEBLOCKS_WATERMARK (4 * (FBC_BATCH * nr_cpu_ids))
2618+#else
2619+#define EXT4_FREEBLOCKS_WATERMARK 0
2620+#endif
2621+
2622+static inline void ext4_update_i_disksize(struct inode *inode, loff_t newsize)
2623+{
2624+ /*
2625+ * XXX: replace with spinlock if seen contended -bzzz
2626+ */
2627+ down_write(&EXT4_I(inode)->i_data_sem);
2628+ if (newsize > EXT4_I(inode)->i_disksize)
2629+ EXT4_I(inode)->i_disksize = newsize;
2630+ up_write(&EXT4_I(inode)->i_data_sem);
2631+ return ;
2632+}
2633+
2634+struct ext4_group_info {
2635+ unsigned long bb_state;
2636+ struct rb_root bb_free_root;
2637+ unsigned short bb_first_free;
2638+ unsigned short bb_free;
2639+ unsigned short bb_fragments;
2640+ struct list_head bb_prealloc_list;
2641+#ifdef DOUBLE_CHECK
2642+ void *bb_bitmap;
2643+#endif
2644+ struct rw_semaphore alloc_sem;
2645+ unsigned short bb_counters[];
2646+};
2647+
2648+#define EXT4_GROUP_INFO_NEED_INIT_BIT 0
2649+#define EXT4_GROUP_INFO_LOCKED_BIT 1
2650+
2651+#define EXT4_MB_GRP_NEED_INIT(grp) \
2652+ (test_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &((grp)->bb_state)))
2653+
2654+static inline void ext4_lock_group(struct super_block *sb, ext4_group_t group)
2655+{
2656+ struct ext4_group_info *grinfo = ext4_get_group_info(sb, group);
2657+
2658+ bit_spin_lock(EXT4_GROUP_INFO_LOCKED_BIT, &(grinfo->bb_state));
2659+}
2660+
2661+static inline void ext4_unlock_group(struct super_block *sb,
2662+ ext4_group_t group)
2663+{
2664+ struct ext4_group_info *grinfo = ext4_get_group_info(sb, group);
2665+
2666+ bit_spin_unlock(EXT4_GROUP_INFO_LOCKED_BIT, &(grinfo->bb_state));
2667+}
2668+
2669+static inline int ext4_is_group_locked(struct super_block *sb,
2670+ ext4_group_t group)
2671+{
2672+ struct ext4_group_info *grinfo = ext4_get_group_info(sb, group);
2673+
2674+ return bit_spin_is_locked(EXT4_GROUP_INFO_LOCKED_BIT,
2675+ &(grinfo->bb_state));
2676+}
2677+
2678 /*
2679 * Inodes and files operations
2680 */
2681@@ -1232,18 +1327,37 @@ extern int ext4_ext_writepage_trans_bloc
2682 extern int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks,
2683 int chunk);
2684 extern int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
2685- ext4_lblk_t iblock,
2686- unsigned long max_blocks, struct buffer_head *bh_result,
2687- int create, int extend_disksize);
2688+ ext4_lblk_t iblock, unsigned int max_blocks,
2689+ struct buffer_head *bh_result,
2690+ int create, int extend_disksize);
2691 extern void ext4_ext_truncate(struct inode *);
2692 extern void ext4_ext_init(struct super_block *);
2693 extern void ext4_ext_release(struct super_block *);
2694 extern long ext4_fallocate(struct inode *inode, int mode, loff_t offset,
2695 loff_t len);
2696 extern int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode,
2697- sector_t block, unsigned long max_blocks,
2698+ sector_t block, unsigned int max_blocks,
2699 struct buffer_head *bh, int create,
2700 int extend_disksize, int flag);
2701+
2702+#define BH_JBDPrivateStart (BH_Unshadow+1)
2703+/*
2704+ * Add new method to test wether block and inode bitmaps are properly
2705+ * initialized. With uninit_bg reading the block from disk is not enough
2706+ * to mark the bitmap uptodate. We need to also zero-out the bitmap
2707+ */
2708+#define BH_BITMAP_UPTODATE BH_JBDPrivateStart
2709+
2710+static inline int bitmap_uptodate(struct buffer_head *bh)
2711+{
2712+ return (buffer_uptodate(bh) &&
2713+ test_bit(BH_BITMAP_UPTODATE, &(bh)->b_state));
2714+}
2715+static inline void set_bitmap_uptodate(struct buffer_head *bh)
2716+{
2717+ set_bit(BH_BITMAP_UPTODATE, &(bh)->b_state);
2718+}
2719+
2720 #endif /* __KERNEL__ */
2721
2722 #endif /* _EXT4_H */
2723diff -rup b/fs/ext4//ext4_i.h a/fs/ext4///ext4_i.h
2724--- b/fs/ext4/ext4_i.h 2009-02-11 14:37:58.000000000 +0100
2725+++ a/fs/ext4/ext4_i.h 2009-02-10 21:40:14.000000000 +0100
2726@@ -31,39 +31,7 @@ typedef unsigned long long ext4_fsblk_t;
2727 typedef __u32 ext4_lblk_t;
2728
2729 /* data type for block group number */
2730-typedef unsigned long ext4_group_t;
2731-
2732-struct ext4_reserve_window {
2733- ext4_fsblk_t _rsv_start; /* First byte reserved */
2734- ext4_fsblk_t _rsv_end; /* Last byte reserved or 0 */
2735-};
2736-
2737-struct ext4_reserve_window_node {
2738- struct rb_node rsv_node;
2739- __u32 rsv_goal_size;
2740- __u32 rsv_alloc_hit;
2741- struct ext4_reserve_window rsv_window;
2742-};
2743-
2744-struct ext4_block_alloc_info {
2745- /* information about reservation window */
2746- struct ext4_reserve_window_node rsv_window_node;
2747- /*
2748- * was i_next_alloc_block in ext4_inode_info
2749- * is the logical (file-relative) number of the
2750- * most-recently-allocated block in this file.
2751- * We use this for detecting linearly ascending allocation requests.
2752- */
2753- ext4_lblk_t last_alloc_logical_block;
2754- /*
2755- * Was i_next_alloc_goal in ext4_inode_info
2756- * is the *physical* companion to i_next_alloc_block.
2757- * it the physical block number of the block which was most-recentl
2758- * allocated to this file. This give us the goal (target) for the next
2759- * allocation when we detect linearly ascending requests.
2760- */
2761- ext4_fsblk_t last_alloc_physical_block;
2762-};
2763+typedef unsigned int ext4_group_t;
2764
2765 #define rsv_start rsv_window._rsv_start
2766 #define rsv_end rsv_window._rsv_end
2767@@ -97,11 +65,8 @@ struct ext4_inode_info {
2768 ext4_group_t i_block_group;
2769 __u32 i_state; /* Dynamic state flags for ext4 */
2770
2771- /* block reservation info */
2772- struct ext4_block_alloc_info *i_block_alloc_info;
2773-
2774 ext4_lblk_t i_dir_start_lookup;
2775-#ifdef CONFIG_EXT4DEV_FS_XATTR
2776+#if defined(CONFIG_EXT4_FS_XATTR) || defined(CONFIG_EXT4DEV_FS_XATTR)
2777 /*
2778 * Extended attributes can be read independently of the main file
2779 * data. Taking i_mutex even when reading would cause contention
2780@@ -111,7 +76,7 @@ struct ext4_inode_info {
2781 */
2782 struct rw_semaphore xattr_sem;
2783 #endif
2784-#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL
2785+#if defined(CONFIG_EXT4_FS_POSIX_ACL) || defined(CONFIG_EXT4DEV_FS_POSIX_ACL)
2786 struct posix_acl *i_acl;
2787 struct posix_acl *i_default_acl;
2788 #endif
2789@@ -135,9 +100,6 @@ struct ext4_inode_info {
2790 */
2791 loff_t i_disksize;
2792
2793- /* on-disk additional length */
2794- __u16 i_extra_isize;
2795-
2796 /*
2797 * i_data_sem is for serialising ext4_truncate() against
2798 * ext4_getblock(). In the 2.4 ext2 design, great chunks of inode's
2799@@ -152,7 +114,6 @@ struct ext4_inode_info {
2800 struct inode vfs_inode;
2801 struct jbd2_inode jinode;
2802
2803- unsigned long i_ext_generation;
2804 struct ext4_ext_cache i_cached_extent;
2805 /*
2806 * File creation time. Its function is same as that of
2807@@ -165,10 +126,14 @@ struct ext4_inode_info {
2808 spinlock_t i_prealloc_lock;
2809
2810 /* allocation reservation info for delalloc */
2811- unsigned long i_reserved_data_blocks;
2812- unsigned long i_reserved_meta_blocks;
2813- unsigned long i_allocated_meta_blocks;
2814+ unsigned int i_reserved_data_blocks;
2815+ unsigned int i_reserved_meta_blocks;
2816+ unsigned int i_allocated_meta_blocks;
2817 unsigned short i_delalloc_reserved_flag;
2818+
2819+ /* on-disk additional length */
2820+ __u16 i_extra_isize;
2821+
2822 spinlock_t i_block_reservation_lock;
2823 };
2824
2825diff -rup b/fs/ext4//ext4_sb.h a/fs/ext4///ext4_sb.h
2826--- b/fs/ext4/ext4_sb.h 2009-02-11 14:37:58.000000000 +0100
2827+++ a/fs/ext4/ext4_sb.h 2009-02-10 21:40:14.000000000 +0100
2828@@ -40,8 +40,8 @@ struct ext4_sb_info {
2829 unsigned long s_blocks_last; /* Last seen block count */
2830 loff_t s_bitmap_maxbytes; /* max bytes for bitmap files */
2831 struct buffer_head * s_sbh; /* Buffer containing the super block */
2832- struct ext4_super_block * s_es; /* Pointer to the super block in the buffer */
2833- struct buffer_head ** s_group_desc;
2834+ struct ext4_super_block *s_es; /* Pointer to the super block in the buffer */
2835+ struct buffer_head **s_group_desc;
2836 unsigned long s_mount_opt;
2837 ext4_fsblk_t s_sb_block;
2838 uid_t s_resuid;
2839@@ -52,23 +52,26 @@ struct ext4_sb_info {
2840 int s_desc_per_block_bits;
2841 int s_inode_size;
2842 int s_first_ino;
2843+ unsigned int s_inode_readahead_blks;
2844 spinlock_t s_next_gen_lock;
2845 u32 s_next_generation;
2846 u32 s_hash_seed[4];
2847 int s_def_hash_version;
2848+ int s_hash_unsigned; /* 3 if hash should be signed, 0 if not */
2849 struct percpu_counter s_freeblocks_counter;
2850 struct percpu_counter s_freeinodes_counter;
2851 struct percpu_counter s_dirs_counter;
2852+ struct percpu_counter s_dirtyblocks_counter;
2853 struct blockgroup_lock s_blockgroup_lock;
2854+ struct proc_dir_entry *s_proc;
2855
2856 /* root of the per fs reservation window tree */
2857 spinlock_t s_rsv_window_lock;
2858 struct rb_root s_rsv_window_root;
2859- struct ext4_reserve_window_node s_rsv_window_head;
2860
2861 /* Journaling */
2862- struct inode * s_journal_inode;
2863- struct journal_s * s_journal;
2864+ struct inode *s_journal_inode;
2865+ struct journal_s *s_journal;
2866 struct list_head s_orphan;
2867 unsigned long s_commit_interval;
2868 struct block_device *journal_bdev;
2869@@ -106,12 +109,12 @@ struct ext4_sb_info {
2870
2871 /* tunables */
2872 unsigned long s_stripe;
2873- unsigned long s_mb_stream_request;
2874- unsigned long s_mb_max_to_scan;
2875- unsigned long s_mb_min_to_scan;
2876- unsigned long s_mb_stats;
2877- unsigned long s_mb_order2_reqs;
2878- unsigned long s_mb_group_prealloc;
2879+ unsigned int s_mb_stream_request;
2880+ unsigned int s_mb_max_to_scan;
2881+ unsigned int s_mb_min_to_scan;
2882+ unsigned int s_mb_stats;
2883+ unsigned int s_mb_order2_reqs;
2884+ unsigned int s_mb_group_prealloc;
2885 /* where last allocation was done - for stream allocation */
2886 unsigned long s_mb_last_group;
2887 unsigned long s_mb_last_start;
2888@@ -121,7 +124,6 @@ struct ext4_sb_info {
2889 int s_mb_history_cur;
2890 int s_mb_history_max;
2891 int s_mb_history_num;
2892- struct proc_dir_entry *s_mb_proc;
2893 spinlock_t s_mb_history_lock;
2894 int s_mb_history_filter;
2895
2896diff -rup b/fs/ext4//extents.c a/fs/ext4///extents.c
2897--- b/fs/ext4/extents.c 2009-02-11 14:37:58.000000000 +0100
2898+++ a/fs/ext4/extents.c 2009-02-10 21:40:11.000000000 +0100
2899@@ -190,7 +190,7 @@ ext4_ext_new_meta_block(handle_t *handle
2900 ext4_fsblk_t goal, newblock;
2901
2902 goal = ext4_ext_find_goal(inode, path, le32_to_cpu(ex->ee_block));
2903- newblock = ext4_new_meta_block(handle, inode, goal, err);
2904+ newblock = ext4_new_meta_blocks(handle, inode, goal, NULL, err);
2905 return newblock;
2906 }
2907
2908@@ -383,8 +383,8 @@ static void ext4_ext_show_leaf(struct in
2909 ext_debug("\n");
2910 }
2911 #else
2912-#define ext4_ext_show_path(inode,path)
2913-#define ext4_ext_show_leaf(inode,path)
2914+#define ext4_ext_show_path(inode, path)
2915+#define ext4_ext_show_leaf(inode, path)
2916 #endif
2917
2918 void ext4_ext_drop_refs(struct ext4_ext_path *path)
2919@@ -440,9 +440,10 @@ ext4_ext_binsearch_idx(struct inode *ino
2920 for (k = 0; k < le16_to_cpu(eh->eh_entries); k++, ix++) {
2921 if (k != 0 &&
2922 le32_to_cpu(ix->ei_block) <= le32_to_cpu(ix[-1].ei_block)) {
2923- printk("k=%d, ix=0x%p, first=0x%p\n", k,
2924- ix, EXT_FIRST_INDEX(eh));
2925- printk("%u <= %u\n",
2926+ printk(KERN_DEBUG "k=%d, ix=0x%p, "
2927+ "first=0x%p\n", k,
2928+ ix, EXT_FIRST_INDEX(eh));
2929+ printk(KERN_DEBUG "%u <= %u\n",
2930 le32_to_cpu(ix->ei_block),
2931 le32_to_cpu(ix[-1].ei_block));
2932 }
2933@@ -1158,15 +1159,13 @@ ext4_ext_search_right(struct inode *inod
2934 while (--depth >= 0) {
2935 ix = path[depth].p_idx;
2936 if (ix != EXT_LAST_INDEX(path[depth].p_hdr))
2937- break;
2938+ goto got_index;
2939 }
2940
2941- if (depth < 0) {
2942- /* we've gone up to the root and
2943- * found no index to the right */
2944- return 0;
2945- }
2946+ /* we've gone up to the root and found no index to the right */
2947+ return 0;
2948
2949+got_index:
2950 /* we've found index to the right, let's
2951 * follow it and find the closest allocated
2952 * block to the right */
2953@@ -1199,7 +1198,6 @@ ext4_ext_search_right(struct inode *inod
2954 *phys = ext_pblock(ex);
2955 put_bh(bh);
2956 return 0;
2957-
2958 }
2959
2960 /*
2961@@ -1475,7 +1473,7 @@ int ext4_ext_insert_extent(handle_t *han
2962 struct ext4_ext_path *path,
2963 struct ext4_extent *newext)
2964 {
2965- struct ext4_extent_header * eh;
2966+ struct ext4_extent_header *eh;
2967 struct ext4_extent *ex, *fex;
2968 struct ext4_extent *nearex; /* nearest extent */
2969 struct ext4_ext_path *npath = NULL;
2970@@ -1620,7 +1618,6 @@ cleanup:
2971 ext4_ext_drop_refs(npath);
2972 kfree(npath);
2973 }
2974- ext4_ext_tree_changed(inode);
2975 ext4_ext_invalidate_cache(inode);
2976 return err;
2977 }
2978@@ -2124,7 +2121,6 @@ static int ext4_ext_remove_space(struct
2979 }
2980 }
2981 out:
2982- ext4_ext_tree_changed(inode);
2983 ext4_ext_drop_refs(path);
2984 kfree(path);
2985 ext4_journal_stop(handle);
2986@@ -2142,7 +2138,7 @@ void ext4_ext_init(struct super_block *s
2987 */
2988
2989 if (test_opt(sb, EXTENTS)) {
2990- printk("EXT4-fs: file extents enabled");
2991+ printk(KERN_INFO "EXT4-fs: file extents enabled");
2992 #ifdef AGGRESSIVE_TEST
2993 printk(", aggressive tests");
2994 #endif
2995@@ -2271,7 +2267,7 @@ static int ext4_ext_convert_to_initializ
2996 struct inode *inode,
2997 struct ext4_ext_path *path,
2998 ext4_lblk_t iblock,
2999- unsigned long max_blocks)
3000+ unsigned int max_blocks)
3001 {
3002 struct ext4_extent *ex, newex, orig_ex;
3003 struct ext4_extent *ex1 = NULL;
3004@@ -2569,26 +2565,26 @@ fix_extent_len:
3005 */
3006 int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
3007 ext4_lblk_t iblock,
3008- unsigned long max_blocks, struct buffer_head *bh_result,
3009+ unsigned int max_blocks, struct buffer_head *bh_result,
3010 int create, int extend_disksize)
3011 {
3012 struct ext4_ext_path *path = NULL;
3013 struct ext4_extent_header *eh;
3014 struct ext4_extent newex, *ex;
3015- ext4_fsblk_t goal, newblock;
3016- int err = 0, depth, ret;
3017- unsigned long allocated = 0;
3018+ ext4_fsblk_t newblock;
3019+ int err = 0, depth, ret, cache_type;
3020+ unsigned int allocated = 0;
3021 struct ext4_allocation_request ar;
3022 loff_t disksize;
3023
3024 __clear_bit(BH_New, &bh_result->b_state);
3025- ext_debug("blocks %u/%lu requested for inode %u\n",
3026+ ext_debug("blocks %u/%u requested for inode %u\n",
3027 iblock, max_blocks, inode->i_ino);
3028
3029 /* check in cache */
3030- goal = ext4_ext_in_cache(inode, iblock, &newex);
3031- if (goal) {
3032- if (goal == EXT4_EXT_CACHE_GAP) {
3033+ cache_type = ext4_ext_in_cache(inode, iblock, &newex);
3034+ if (cache_type) {
3035+ if (cache_type == EXT4_EXT_CACHE_GAP) {
3036 if (!create) {
3037 /*
3038 * block isn't allocated yet and
3039@@ -2597,7 +2593,7 @@ int ext4_ext_get_blocks(handle_t *handle
3040 goto out2;
3041 }
3042 /* we should allocate requested block */
3043- } else if (goal == EXT4_EXT_CACHE_EXTENT) {
3044+ } else if (cache_type == EXT4_EXT_CACHE_EXTENT) {
3045 /* block is already allocated */
3046 newblock = iblock
3047 - le32_to_cpu(newex.ee_block)
3048@@ -2696,11 +2692,8 @@ int ext4_ext_get_blocks(handle_t *handle
3049 goto out2;
3050 }
3051 /*
3052- * Okay, we need to do block allocation. Lazily initialize the block
3053- * allocation info here if necessary.
3054+ * Okay, we need to do block allocation.
3055 */
3056- if (S_ISREG(inode->i_mode) && (!EXT4_I(inode)->i_block_alloc_info))
3057- ext4_init_block_alloc_info(inode);
3058
3059 /* find neighbour allocated blocks */
3060 ar.lleft = iblock;
3061@@ -2748,7 +2741,7 @@ int ext4_ext_get_blocks(handle_t *handle
3062 if (!newblock)
3063 goto out2;
3064 ext_debug("allocate new block: goal %llu, found %llu/%lu\n",
3065- goal, newblock, allocated);
3066+ ar.goal, newblock, allocated);
3067
3068 /* try to insert new extent into found leaf and return */
3069 ext4_ext_store_pblock(&newex, newblock);
3070@@ -2760,7 +2753,7 @@ int ext4_ext_get_blocks(handle_t *handle
3071 /* free data blocks we just allocated */
3072 /* not a good idea to call discard here directly,
3073 * but otherwise we'd need to call it every free() */
3074- ext4_mb_discard_inode_preallocations(inode);
3075+ ext4_discard_preallocations(inode);
3076 ext4_free_blocks(handle, inode, ext_pblock(&newex),
3077 ext4_ext_get_actual_len(&newex), 0);
3078 goto out2;
3079@@ -2824,7 +2817,7 @@ void ext4_ext_truncate(struct inode *ino
3080 down_write(&EXT4_I(inode)->i_data_sem);
3081 ext4_ext_invalidate_cache(inode);
3082
3083- ext4_discard_reservation(inode);
3084+ ext4_discard_preallocations(inode);
3085
3086 /*
3087 * TODO: optimization is possible here.
3088@@ -2877,10 +2870,11 @@ static void ext4_falloc_update_inode(str
3089 * Update only when preallocation was requested beyond
3090 * the file size.
3091 */
3092- if (!(mode & FALLOC_FL_KEEP_SIZE) &&
3093- new_size > i_size_read(inode)) {
3094- i_size_write(inode, new_size);
3095- EXT4_I(inode)->i_disksize = new_size;
3096+ if (!(mode & FALLOC_FL_KEEP_SIZE)) {
3097+ if (new_size > i_size_read(inode))
3098+ i_size_write(inode, new_size);
3099+ if (new_size > EXT4_I(inode)->i_disksize)
3100+ ext4_update_i_disksize(inode, new_size);
3101 }
3102
3103 }
3104@@ -2897,7 +2891,7 @@ long ext4_fallocate(struct inode *inode,
3105 handle_t *handle;
3106 ext4_lblk_t block;
3107 loff_t new_size;
3108- unsigned long max_blocks;
3109+ unsigned int max_blocks;
3110 int ret = 0;
3111 int ret2 = 0;
3112 int retries = 0;
3113diff -rup b/fs/ext4//file.c a/fs/ext4///file.c
3114--- b/fs/ext4/file.c 2009-02-11 14:37:58.000000000 +0100
3115+++ a/fs/ext4/file.c 2009-02-10 21:40:11.000000000 +0100
3116@@ -31,14 +31,14 @@
3117 * from ext4_file_open: open gets called at every open, but release
3118 * gets called only when /all/ the files are closed.
3119 */
3120-static int ext4_release_file (struct inode * inode, struct file * filp)
3121+static int ext4_release_file(struct inode *inode, struct file *filp)
3122 {
3123 /* if we are the last writer on the inode, drop the block reservation */
3124 if ((filp->f_mode & FMODE_WRITE) &&
3125 (atomic_read(&inode->i_writecount) == 1))
3126 {
3127 down_write(&EXT4_I(inode)->i_data_sem);
3128- ext4_discard_reservation(inode);
3129+ ext4_discard_preallocations(inode);
3130 up_write(&EXT4_I(inode)->i_data_sem);
3131 }
3132 if (is_dx(inode) && filp->private_data)
3133@@ -162,7 +162,7 @@ const struct inode_operations ext4_file_
3134 .truncate = ext4_truncate,
3135 .setattr = ext4_setattr,
3136 .getattr = ext4_getattr,
3137-#ifdef CONFIG_EXT4DEV_FS_XATTR
3138+#if defined(CONFIG_EXT4_FS_XATTR) || defined(CONFIG_EXT4DEV_FS_XATTR)
3139 .setxattr = generic_setxattr,
3140 .getxattr = generic_getxattr,
3141 .listxattr = ext4_listxattr,
3142diff -rup b/fs/ext4//fsync.c a/fs/ext4///fsync.c
3143--- b/fs/ext4/fsync.c 2009-02-11 14:37:58.000000000 +0100
3144+++ a/fs/ext4/fsync.c 2009-02-10 21:40:11.000000000 +0100
3145@@ -28,6 +28,7 @@
3146 #include <linux/writeback.h>
3147 #include <linux/jbd2.h>
3148 #include <linux/blkdev.h>
3149+#include <linux/marker.h>
3150 #include "ext4.h"
3151 #include "ext4_jbd2.h"
3152
3153@@ -43,7 +44,7 @@
3154 * inode to disk.
3155 */
3156
3157-int ext4_sync_file(struct file * file, struct dentry *dentry, int datasync)
3158+int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync)
3159 {
3160 struct inode *inode = dentry->d_inode;
3161 journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
3162@@ -51,6 +52,10 @@ int ext4_sync_file(struct file * file, s
3163
3164 J_ASSERT(ext4_journal_current_handle() == NULL);
3165
3166+ trace_mark(ext4_sync_file, "dev %s datasync %d ino %ld parent %ld",
3167+ inode->i_sb->s_id, datasync, inode->i_ino,
3168+ dentry->d_parent->d_inode->i_ino);
3169+
3170 /*
3171 * data=writeback:
3172 * The caller's filemap_fdatawrite()/wait will sync the data.
3173diff -rup b/fs/ext4//hash.c a/fs/ext4///hash.c
3174--- b/fs/ext4/hash.c 2009-02-11 14:37:58.000000000 +0100
3175+++ a/fs/ext4/hash.c 2009-02-10 21:40:11.000000000 +0100
3176@@ -27,7 +27,7 @@ static void TEA_transform(__u32 buf[4],
3177 sum += DELTA;
3178 b0 += ((b1 << 4)+a) ^ (b1+sum) ^ ((b1 >> 5)+b);
3179 b1 += ((b0 << 4)+c) ^ (b0+sum) ^ ((b0 >> 5)+d);
3180- } while(--n);
3181+ } while (--n);
3182
3183 buf[0] += b0;
3184 buf[1] += b1;
3185@@ -35,23 +35,43 @@ static void TEA_transform(__u32 buf[4],
3186
3187
3188 /* The old legacy hash */
3189-static __u32 dx_hack_hash (const char *name, int len)
3190+static __u32 dx_hack_hash_unsigned(const char *name, int len)
3191 {
3192- __u32 hash0 = 0x12a3fe2d, hash1 = 0x37abe8f9;
3193+ __u32 hash, hash0 = 0x12a3fe2d, hash1 = 0x37abe8f9;
3194+ const unsigned char *ucp = (const unsigned char *) name;
3195+
3196+ while (len--) {
3197+ hash = hash1 + (hash0 ^ (((int) *ucp++) * 7152373));
3198+
3199+ if (hash & 0x80000000)
3200+ hash -= 0x7fffffff;
3201+ hash1 = hash0;
3202+ hash0 = hash;
3203+ }
3204+ return hash0 << 1;
3205+}
3206+
3207+static __u32 dx_hack_hash_signed(const char *name, int len)
3208+{
3209+ __u32 hash, hash0 = 0x12a3fe2d, hash1 = 0x37abe8f9;
3210+ const signed char *scp = (const signed char *) name;
3211+
3212 while (len--) {
3213- __u32 hash = hash1 + (hash0 ^ (*name++ * 7152373));
3214+ hash = hash1 + (hash0 ^ (((int) *scp++) * 7152373));
3215
3216- if (hash & 0x80000000) hash -= 0x7fffffff;
3217+ if (hash & 0x80000000)
3218+ hash -= 0x7fffffff;
3219 hash1 = hash0;
3220 hash0 = hash;
3221 }
3222- return (hash0 << 1);
3223+ return hash0 << 1;
3224 }
3225
3226-static void str2hashbuf(const char *msg, int len, __u32 *buf, int num)
3227+static void str2hashbuf_signed(const char *msg, int len, __u32 *buf, int num)
3228 {
3229 __u32 pad, val;
3230 int i;
3231+ const signed char *scp = (const signed char *) msg;
3232
3233 pad = (__u32)len | ((__u32)len << 8);
3234 pad |= pad << 16;
3235@@ -59,10 +79,38 @@ static void str2hashbuf(const char *msg,
3236 val = pad;
3237 if (len > num*4)
3238 len = num * 4;
3239- for (i=0; i < len; i++) {
3240+ for (i = 0; i < len; i++) {
3241 if ((i % 4) == 0)
3242 val = pad;
3243- val = msg[i] + (val << 8);
3244+ val = ((int) scp[i]) + (val << 8);
3245+ if ((i % 4) == 3) {
3246+ *buf++ = val;
3247+ val = pad;
3248+ num--;
3249+ }
3250+ }
3251+ if (--num >= 0)
3252+ *buf++ = val;
3253+ while (--num >= 0)
3254+ *buf++ = pad;
3255+}
3256+
3257+static void str2hashbuf_unsigned(const char *msg, int len, __u32 *buf, int num)
3258+{
3259+ __u32 pad, val;
3260+ int i;
3261+ const unsigned char *ucp = (const unsigned char *) msg;
3262+
3263+ pad = (__u32)len | ((__u32)len << 8);
3264+ pad |= pad << 16;
3265+
3266+ val = pad;
3267+ if (len > num*4)
3268+ len = num * 4;
3269+ for (i = 0; i < len; i++) {
3270+ if ((i % 4) == 0)
3271+ val = pad;
3272+ val = ((int) ucp[i]) + (val << 8);
3273 if ((i % 4) == 3) {
3274 *buf++ = val;
3275 val = pad;
3276@@ -95,6 +143,8 @@ int ext4fs_dirhash(const char *name, int
3277 const char *p;
3278 int i;
3279 __u32 in[8], buf[4];
3280+ void (*str2hashbuf)(const char *, int, __u32 *, int) =
3281+ str2hashbuf_signed;
3282
3283 /* Initialize the default seed for the hash checksum functions */
3284 buf[0] = 0x67452301;
3285@@ -104,7 +154,7 @@ int ext4fs_dirhash(const char *name, int
3286
3287 /* Check to see if the seed is all zero's */
3288 if (hinfo->seed) {
3289- for (i=0; i < 4; i++) {
3290+ for (i = 0; i < 4; i++) {
3291 if (hinfo->seed[i])
3292 break;
3293 }
3294@@ -113,13 +163,18 @@ int ext4fs_dirhash(const char *name, int
3295 }
3296
3297 switch (hinfo->hash_version) {
3298+ case DX_HASH_LEGACY_UNSIGNED:
3299+ hash = dx_hack_hash_unsigned(name, len);
3300+ break;
3301 case DX_HASH_LEGACY:
3302- hash = dx_hack_hash(name, len);
3303+ hash = dx_hack_hash_signed(name, len);
3304 break;
3305+ case DX_HASH_HALF_MD4_UNSIGNED:
3306+ str2hashbuf = str2hashbuf_unsigned;
3307 case DX_HASH_HALF_MD4:
3308 p = name;
3309 while (len > 0) {
3310- str2hashbuf(p, len, in, 8);
3311+ (*str2hashbuf)(p, len, in, 8);
3312 half_md4_transform(buf, in);
3313 len -= 32;
3314 p += 32;
3315@@ -127,10 +182,12 @@ int ext4fs_dirhash(const char *name, int
3316 minor_hash = buf[2];
3317 hash = buf[1];
3318 break;
3319+ case DX_HASH_TEA_UNSIGNED:
3320+ str2hashbuf = str2hashbuf_unsigned;
3321 case DX_HASH_TEA:
3322 p = name;
3323 while (len > 0) {
3324- str2hashbuf(p, len, in, 4);
3325+ (*str2hashbuf)(p, len, in, 4);
3326 TEA_transform(buf, in);
3327 len -= 16;
3328 p += 16;
3329diff -rup b/fs/ext4//ialloc.c a/fs/ext4///ialloc.c
3330--- b/fs/ext4/ialloc.c 2009-02-11 14:37:58.000000000 +0100
3331+++ a/fs/ext4/ialloc.c 2009-02-10 21:40:11.000000000 +0100
3332@@ -74,17 +74,17 @@ unsigned ext4_init_inode_bitmap(struct s
3333 /* If checksum is bad mark all blocks and inodes use to prevent
3334 * allocation, essentially implementing a per-group read-only flag. */
3335 if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) {
3336- ext4_error(sb, __func__, "Checksum bad for group %lu\n",
3337+ ext4_error(sb, __func__, "Checksum bad for group %u\n",
3338 block_group);
3339- gdp->bg_free_blocks_count = 0;
3340- gdp->bg_free_inodes_count = 0;
3341- gdp->bg_itable_unused = 0;
3342+ ext4_free_blks_set(sb, gdp, 0);
3343+ ext4_free_inodes_set(sb, gdp, 0);
3344+ ext4_itable_unused_set(sb, gdp, 0);
3345 memset(bh->b_data, 0xff, sb->s_blocksize);
3346 return 0;
3347 }
3348
3349 memset(bh->b_data, 0, (EXT4_INODES_PER_GROUP(sb) + 7) / 8);
3350- mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), EXT4_BLOCKS_PER_GROUP(sb),
3351+ mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), sb->s_blocksize * 8,
3352 bh->b_data);
3353
3354 return EXT4_INODES_PER_GROUP(sb);
3355@@ -111,27 +111,49 @@ ext4_read_inode_bitmap(struct super_bloc
3356 if (unlikely(!bh)) {
3357 ext4_error(sb, __func__,
3358 "Cannot read inode bitmap - "
3359- "block_group = %lu, inode_bitmap = %llu",
3360+ "block_group = %u, inode_bitmap = %llu",
3361 block_group, bitmap_blk);
3362 return NULL;
3363 }
3364- if (bh_uptodate_or_lock(bh))
3365+ if (bitmap_uptodate(bh))
3366 return bh;
3367
3368+ lock_buffer(bh);
3369+ if (bitmap_uptodate(bh)) {
3370+ unlock_buffer(bh);
3371+ return bh;
3372+ }
3373 spin_lock(sb_bgl_lock(EXT4_SB(sb), block_group));
3374 if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) {
3375 ext4_init_inode_bitmap(sb, bh, block_group, desc);
3376+ set_bitmap_uptodate(bh);
3377 set_buffer_uptodate(bh);
3378- unlock_buffer(bh);
3379 spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group));
3380+ unlock_buffer(bh);
3381 return bh;
3382 }
3383 spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group));
3384+ if (buffer_uptodate(bh)) {
3385+ /*
3386+ * if not uninit if bh is uptodate,
3387+ * bitmap is also uptodate
3388+ */
3389+ set_bitmap_uptodate(bh);
3390+ unlock_buffer(bh);
3391+ return bh;
3392+ }
3393+ /*
3394+ * submit the buffer_head for read. We can
3395+ * safely mark the bitmap as uptodate now.
3396+ * We do it here so the bitmap uptodate bit
3397+ * get set with buffer lock held.
3398+ */
3399+ set_bitmap_uptodate(bh);
3400 if (bh_submit_read(bh) < 0) {
3401 put_bh(bh);
3402 ext4_error(sb, __func__,
3403 "Cannot read inode bitmap - "
3404- "block_group = %lu, inode_bitmap = %llu",
3405+ "block_group = %u, inode_bitmap = %llu",
3406 block_group, bitmap_blk);
3407 return NULL;
3408 }
3409@@ -154,39 +176,40 @@ ext4_read_inode_bitmap(struct super_bloc
3410 * though), and then we'd have two inodes sharing the
3411 * same inode number and space on the harddisk.
3412 */
3413-void ext4_free_inode (handle_t *handle, struct inode * inode)
3414+void ext4_free_inode(handle_t *handle, struct inode *inode)
3415 {
3416- struct super_block * sb = inode->i_sb;
3417+ struct super_block *sb = inode->i_sb;
3418 int is_directory;
3419 unsigned long ino;
3420 struct buffer_head *bitmap_bh = NULL;
3421 struct buffer_head *bh2;
3422 ext4_group_t block_group;
3423 unsigned long bit;
3424- struct ext4_group_desc * gdp;
3425- struct ext4_super_block * es;
3426+ struct ext4_group_desc *gdp;
3427+ struct ext4_super_block *es;
3428 struct ext4_sb_info *sbi;
3429- int fatal = 0, err;
3430+ int fatal = 0, err, count;
3431 ext4_group_t flex_group;
3432
3433 if (atomic_read(&inode->i_count) > 1) {
3434- printk ("ext4_free_inode: inode has count=%d\n",
3435- atomic_read(&inode->i_count));
3436+ printk(KERN_ERR "ext4_free_inode: inode has count=%d\n",
3437+ atomic_read(&inode->i_count));
3438 return;
3439 }
3440 if (inode->i_nlink) {
3441- printk ("ext4_free_inode: inode has nlink=%d\n",
3442- inode->i_nlink);
3443+ printk(KERN_ERR "ext4_free_inode: inode has nlink=%d\n",
3444+ inode->i_nlink);
3445 return;
3446 }
3447 if (!sb) {
3448- printk("ext4_free_inode: inode on nonexistent device\n");
3449+ printk(KERN_ERR "ext4_free_inode: inode on "
3450+ "nonexistent device\n");
3451 return;
3452 }
3453 sbi = EXT4_SB(sb);
3454
3455 ino = inode->i_ino;
3456- ext4_debug ("freeing inode %lu\n", ino);
3457+ ext4_debug("freeing inode %lu\n", ino);
3458
3459 /*
3460 * Note: we must free any quota before locking the superblock,
3461@@ -200,12 +223,12 @@ void ext4_free_inode (handle_t *handle,
3462 is_directory = S_ISDIR(inode->i_mode);
3463
3464 /* Do this BEFORE marking the inode not in use or returning an error */
3465- clear_inode (inode);
3466+ clear_inode(inode);
3467
3468 es = EXT4_SB(sb)->s_es;
3469 if (ino < EXT4_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) {
3470- ext4_error (sb, "ext4_free_inode",
3471- "reserved or nonexistent inode %lu", ino);
3472+ ext4_error(sb, "ext4_free_inode",
3473+ "reserved or nonexistent inode %lu", ino);
3474 goto error_return;
3475 }
3476 block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb);
3477@@ -222,10 +245,10 @@ void ext4_free_inode (handle_t *handle,
3478 /* Ok, now we can actually update the inode bitmaps.. */
3479 if (!ext4_clear_bit_atomic(sb_bgl_lock(sbi, block_group),
3480 bit, bitmap_bh->b_data))
3481- ext4_error (sb, "ext4_free_inode",
3482- "bit already cleared for inode %lu", ino);
3483+ ext4_error(sb, "ext4_free_inode",
3484+ "bit already cleared for inode %lu", ino);
3485 else {
3486- gdp = ext4_get_group_desc (sb, block_group, &bh2);
3487+ gdp = ext4_get_group_desc(sb, block_group, &bh2);
3488
3489 BUFFER_TRACE(bh2, "get_write_access");
3490 fatal = ext4_journal_get_write_access(handle, bh2);
3491@@ -233,9 +256,12 @@ void ext4_free_inode (handle_t *handle,
3492
3493 if (gdp) {
3494 spin_lock(sb_bgl_lock(sbi, block_group));
3495- le16_add_cpu(&gdp->bg_free_inodes_count, 1);
3496- if (is_directory)
3497- le16_add_cpu(&gdp->bg_used_dirs_count, -1);
3498+ count = ext4_free_inodes_count(sb, gdp) + 1;
3499+ ext4_free_inodes_set(sb, gdp, count);
3500+ if (is_directory) {
3501+ count = ext4_used_dirs_count(sb, gdp) - 1;
3502+ ext4_used_dirs_set(sb, gdp, count);
3503+ }
3504 gdp->bg_checksum = ext4_group_desc_csum(sbi,
3505 block_group, gdp);
3506 spin_unlock(sb_bgl_lock(sbi, block_group));
3507@@ -287,14 +313,14 @@ static int find_group_dir(struct super_b
3508 avefreei = freei / ngroups;
3509
3510 for (group = 0; group < ngroups; group++) {
3511- desc = ext4_get_group_desc (sb, group, NULL);
3512- if (!desc || !desc->bg_free_inodes_count)
3513+ desc = ext4_get_group_desc(sb, group, NULL);
3514+ if (!desc || !ext4_free_inodes_count(sb, desc))
3515 continue;
3516- if (le16_to_cpu(desc->bg_free_inodes_count) < avefreei)
3517+ if (ext4_free_inodes_count(sb, desc) < avefreei)
3518 continue;
3519 if (!best_desc ||
3520- (le16_to_cpu(desc->bg_free_blocks_count) >
3521- le16_to_cpu(best_desc->bg_free_blocks_count))) {
3522+ (ext4_free_blks_count(sb, desc) >
3523+ ext4_free_blks_count(sb, best_desc))) {
3524 *best_group = group;
3525 best_desc = desc;
3526 ret = 0;
3527@@ -366,7 +392,7 @@ found_flexbg:
3528 for (i = best_flex * flex_size; i < ngroups &&
3529 i < (best_flex + 1) * flex_size; i++) {
3530 desc = ext4_get_group_desc(sb, i, &bh);
3531- if (le16_to_cpu(desc->bg_free_inodes_count)) {
3532+ if (ext4_free_inodes_count(sb, desc)) {
3533 *best_group = i;
3534 goto out;
3535 }
3536@@ -440,17 +466,17 @@ static int find_group_orlov(struct super
3537 for (i = 0; i < ngroups; i++) {
3538 grp = (parent_group + i) % ngroups;
3539 desc = ext4_get_group_desc(sb, grp, NULL);
3540- if (!desc || !desc->bg_free_inodes_count)
3541+ if (!desc || !ext4_free_inodes_count(sb, desc))
3542 continue;
3543- if (le16_to_cpu(desc->bg_used_dirs_count) >= best_ndir)
3544+ if (ext4_used_dirs_count(sb, desc) >= best_ndir)
3545 continue;
3546- if (le16_to_cpu(desc->bg_free_inodes_count) < avefreei)
3547+ if (ext4_free_inodes_count(sb, desc) < avefreei)
3548 continue;
3549- if (le16_to_cpu(desc->bg_free_blocks_count) < avefreeb)
3550+ if (ext4_free_blks_count(sb, desc) < avefreeb)
3551 continue;
3552 *group = grp;
3553 ret = 0;
3554- best_ndir = le16_to_cpu(desc->bg_used_dirs_count);
3555+ best_ndir = ext4_used_dirs_count(sb, desc);
3556 }
3557 if (ret == 0)
3558 return ret;
3559@@ -476,13 +502,13 @@ static int find_group_orlov(struct super
3560 for (i = 0; i < ngroups; i++) {
3561 *group = (parent_group + i) % ngroups;
3562 desc = ext4_get_group_desc(sb, *group, NULL);
3563- if (!desc || !desc->bg_free_inodes_count)
3564+ if (!desc || !ext4_free_inodes_count(sb, desc))
3565 continue;
3566- if (le16_to_cpu(desc->bg_used_dirs_count) >= max_dirs)
3567+ if (ext4_used_dirs_count(sb, desc) >= max_dirs)
3568 continue;
3569- if (le16_to_cpu(desc->bg_free_inodes_count) < min_inodes)
3570+ if (ext4_free_inodes_count(sb, desc) < min_inodes)
3571 continue;
3572- if (le16_to_cpu(desc->bg_free_blocks_count) < min_blocks)
3573+ if (ext4_free_blks_count(sb, desc) < min_blocks)
3574 continue;
3575 return 0;
3576 }
3577@@ -491,8 +517,8 @@ fallback:
3578 for (i = 0; i < ngroups; i++) {
3579 *group = (parent_group + i) % ngroups;
3580 desc = ext4_get_group_desc(sb, *group, NULL);
3581- if (desc && desc->bg_free_inodes_count &&
3582- le16_to_cpu(desc->bg_free_inodes_count) >= avefreei)
3583+ if (desc && ext4_free_inodes_count(sb, desc) &&
3584+ ext4_free_inodes_count(sb, desc) >= avefreei)
3585 return 0;
3586 }
3587
3588@@ -521,8 +547,8 @@ static int find_group_other(struct super
3589 */
3590 *group = parent_group;
3591 desc = ext4_get_group_desc(sb, *group, NULL);
3592- if (desc && le16_to_cpu(desc->bg_free_inodes_count) &&
3593- le16_to_cpu(desc->bg_free_blocks_count))
3594+ if (desc && ext4_free_inodes_count(sb, desc) &&
3595+ ext4_free_blks_count(sb, desc))
3596 return 0;
3597
3598 /*
3599@@ -545,8 +571,8 @@ static int find_group_other(struct super
3600 if (*group >= ngroups)
3601 *group -= ngroups;
3602 desc = ext4_get_group_desc(sb, *group, NULL);
3603- if (desc && le16_to_cpu(desc->bg_free_inodes_count) &&
3604- le16_to_cpu(desc->bg_free_blocks_count))
3605+ if (desc && ext4_free_inodes_count(sb, desc) &&
3606+ ext4_free_blks_count(sb, desc))
3607 return 0;
3608 }
3609
3610@@ -559,7 +585,7 @@ static int find_group_other(struct super
3611 if (++*group >= ngroups)
3612 *group = 0;
3613 desc = ext4_get_group_desc(sb, *group, NULL);
3614- if (desc && le16_to_cpu(desc->bg_free_inodes_count))
3615+ if (desc && ext4_free_inodes_count(sb, desc))
3616 return 0;
3617 }
3618
3619@@ -567,6 +593,79 @@ static int find_group_other(struct super
3620 }
3621
3622 /*
3623+ * claim the inode from the inode bitmap. If the group
3624+ * is uninit we need to take the groups's sb_bgl_lock
3625+ * and clear the uninit flag. The inode bitmap update
3626+ * and group desc uninit flag clear should be done
3627+ * after holding sb_bgl_lock so that ext4_read_inode_bitmap
3628+ * doesn't race with the ext4_claim_inode
3629+ */
3630+static int ext4_claim_inode(struct super_block *sb,
3631+ struct buffer_head *inode_bitmap_bh,
3632+ unsigned long ino, ext4_group_t group, int mode)
3633+{
3634+ int free = 0, retval = 0, count;
3635+ struct ext4_sb_info *sbi = EXT4_SB(sb);
3636+ struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group, NULL);
3637+
3638+ spin_lock(sb_bgl_lock(sbi, group));
3639+ if (ext4_set_bit(ino, inode_bitmap_bh->b_data)) {
3640+ /* not a free inode */
3641+ retval = 1;
3642+ goto err_ret;
3643+ }
3644+ ino++;
3645+ if ((group == 0 && ino < EXT4_FIRST_INO(sb)) ||
3646+ ino > EXT4_INODES_PER_GROUP(sb)) {
3647+ spin_unlock(sb_bgl_lock(sbi, group));
3648+ ext4_error(sb, __func__,
3649+ "reserved inode or inode > inodes count - "
3650+ "block_group = %u, inode=%lu", group,
3651+ ino + group * EXT4_INODES_PER_GROUP(sb));
3652+ return 1;
3653+ }
3654+ /* If we didn't allocate from within the initialized part of the inode
3655+ * table then we need to initialize up to this inode. */
3656+ if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) {
3657+
3658+ if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) {
3659+ gdp->bg_flags &= cpu_to_le16(~EXT4_BG_INODE_UNINIT);
3660+ /* When marking the block group with
3661+ * ~EXT4_BG_INODE_UNINIT we don't want to depend
3662+ * on the value of bg_itable_unused even though
3663+ * mke2fs could have initialized the same for us.
3664+ * Instead we calculated the value below
3665+ */
3666+
3667+ free = 0;
3668+ } else {
3669+ free = EXT4_INODES_PER_GROUP(sb) -
3670+ ext4_itable_unused_count(sb, gdp);
3671+ }
3672+
3673+ /*
3674+ * Check the relative inode number against the last used
3675+ * relative inode number in this group. if it is greater
3676+ * we need to update the bg_itable_unused count
3677+ *
3678+ */
3679+ if (ino > free)
3680+ ext4_itable_unused_set(sb, gdp,
3681+ (EXT4_INODES_PER_GROUP(sb) - ino));
3682+ }
3683+ count = ext4_free_inodes_count(sb, gdp) - 1;
3684+ ext4_free_inodes_set(sb, gdp, count);
3685+ if (S_ISDIR(mode)) {
3686+ count = ext4_used_dirs_count(sb, gdp) + 1;
3687+ ext4_used_dirs_set(sb, gdp, count);
3688+ }
3689+ gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp);
3690+err_ret:
3691+ spin_unlock(sb_bgl_lock(sbi, group));
3692+ return retval;
3693+}
3694+
3695+/*
3696 * There are two policies for allocating an inode. If the new inode is
3697 * a directory, then a forward search is made for a block group with both
3698 * free space and a low directory-to-inode ratio; if that fails, then of
3699@@ -576,16 +675,16 @@ static int find_group_other(struct super
3700 * For other inodes, search forward from the parent directory's block
3701 * group to find a free inode.
3702 */
3703-struct inode *ext4_new_inode(handle_t *handle, struct inode * dir, int mode)
3704+struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode)
3705 {
3706 struct super_block *sb;
3707- struct buffer_head *bitmap_bh = NULL;
3708- struct buffer_head *bh2;
3709+ struct buffer_head *inode_bitmap_bh = NULL;
3710+ struct buffer_head *group_desc_bh;
3711 ext4_group_t group = 0;
3712 unsigned long ino = 0;
3713- struct inode * inode;
3714- struct ext4_group_desc * gdp = NULL;
3715- struct ext4_super_block * es;
3716+ struct inode *inode;
3717+ struct ext4_group_desc *gdp = NULL;
3718+ struct ext4_super_block *es;
3719 struct ext4_inode_info *ei;
3720 struct ext4_sb_info *sbi;
3721 int ret2, err = 0;
3722@@ -613,7 +712,7 @@ struct inode *ext4_new_inode(handle_t *h
3723 }
3724
3725 if (S_ISDIR(mode)) {
3726- if (test_opt (sb, OLDALLOC))
3727+ if (test_opt(sb, OLDALLOC))
3728 ret2 = find_group_dir(sb, dir, &group);
3729 else
3730 ret2 = find_group_orlov(sb, dir, &group);
3731@@ -628,40 +727,50 @@ got_group:
3732 for (i = 0; i < sbi->s_groups_count; i++) {
3733 err = -EIO;
3734
3735- gdp = ext4_get_group_desc(sb, group, &bh2);
3736+ gdp = ext4_get_group_desc(sb, group, &group_desc_bh);
3737 if (!gdp)
3738 goto fail;
3739
3740- brelse(bitmap_bh);
3741- bitmap_bh = ext4_read_inode_bitmap(sb, group);
3742- if (!bitmap_bh)
3743+ brelse(inode_bitmap_bh);
3744+ inode_bitmap_bh = ext4_read_inode_bitmap(sb, group);
3745+ if (!inode_bitmap_bh)
3746 goto fail;
3747
3748 ino = 0;
3749
3750 repeat_in_this_group:
3751 ino = ext4_find_next_zero_bit((unsigned long *)
3752- bitmap_bh->b_data, EXT4_INODES_PER_GROUP(sb), ino);
3753+ inode_bitmap_bh->b_data,
3754+ EXT4_INODES_PER_GROUP(sb), ino);
3755 if (ino < EXT4_INODES_PER_GROUP(sb)) {
3756
3757- BUFFER_TRACE(bitmap_bh, "get_write_access");
3758- err = ext4_journal_get_write_access(handle, bitmap_bh);
3759+ BUFFER_TRACE(inode_bitmap_bh, "get_write_access");
3760+ err = ext4_journal_get_write_access(handle,
3761+ inode_bitmap_bh);
3762 if (err)
3763 goto fail;
3764
3765- if (!ext4_set_bit_atomic(sb_bgl_lock(sbi, group),
3766- ino, bitmap_bh->b_data)) {
3767+ BUFFER_TRACE(group_desc_bh, "get_write_access");
3768+ err = ext4_journal_get_write_access(handle,
3769+ group_desc_bh);
3770+ if (err)
3771+ goto fail;
3772+ if (!ext4_claim_inode(sb, inode_bitmap_bh,
3773+ ino, group, mode)) {
3774 /* we won it */
3775- BUFFER_TRACE(bitmap_bh,
3776+ BUFFER_TRACE(inode_bitmap_bh,
3777 "call ext4_journal_dirty_metadata");
3778 err = ext4_journal_dirty_metadata(handle,
3779- bitmap_bh);
3780+ inode_bitmap_bh);
3781 if (err)
3782 goto fail;
3783+ /* zero bit is inode number 1*/
3784+ ino++;
3785 goto got;
3786 }
3787 /* we lost it */
3788- jbd2_journal_release_buffer(handle, bitmap_bh);
3789+ jbd2_journal_release_buffer(handle, inode_bitmap_bh);
3790+ jbd2_journal_release_buffer(handle, group_desc_bh);
3791
3792 if (++ino < EXT4_INODES_PER_GROUP(sb))
3793 goto repeat_in_this_group;
3794@@ -681,30 +790,16 @@ repeat_in_this_group:
3795 goto out;
3796
3797 got:
3798- ino++;
3799- if ((group == 0 && ino < EXT4_FIRST_INO(sb)) ||
3800- ino > EXT4_INODES_PER_GROUP(sb)) {
3801- ext4_error(sb, __func__,
3802- "reserved inode or inode > inodes count - "
3803- "block_group = %lu, inode=%lu", group,
3804- ino + group * EXT4_INODES_PER_GROUP(sb));
3805- err = -EIO;
3806- goto fail;
3807- }
3808-
3809- BUFFER_TRACE(bh2, "get_write_access");
3810- err = ext4_journal_get_write_access(handle, bh2);
3811- if (err) goto fail;
3812-
3813 /* We may have to initialize the block bitmap if it isn't already */
3814 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM) &&
3815 gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
3816- struct buffer_head *block_bh = ext4_read_block_bitmap(sb, group);
3817+ struct buffer_head *block_bitmap_bh;
3818
3819- BUFFER_TRACE(block_bh, "get block bitmap access");
3820- err = ext4_journal_get_write_access(handle, block_bh);
3821+ block_bitmap_bh = ext4_read_block_bitmap(sb, group);
3822+ BUFFER_TRACE(block_bitmap_bh, "get block bitmap access");
3823+ err = ext4_journal_get_write_access(handle, block_bitmap_bh);
3824 if (err) {
3825- brelse(block_bh);
3826+ brelse(block_bitmap_bh);
3827 goto fail;
3828 }
3829
3830@@ -712,9 +807,9 @@ got:
3831 spin_lock(sb_bgl_lock(sbi, group));
3832 /* recheck and clear flag under lock if we still need to */
3833 if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
3834- gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
3835 free = ext4_free_blocks_after_init(sb, group, gdp);
3836- gdp->bg_free_blocks_count = cpu_to_le16(free);
3837+ gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
3838+ ext4_free_blks_set(sb, gdp, free);
3839 gdp->bg_checksum = ext4_group_desc_csum(sbi, group,
3840 gdp);
3841 }
3842@@ -722,55 +817,19 @@ got:
3843
3844 /* Don't need to dirty bitmap block if we didn't change it */
3845 if (free) {
3846- BUFFER_TRACE(block_bh, "dirty block bitmap");
3847- err = ext4_journal_dirty_metadata(handle, block_bh);
3848+ BUFFER_TRACE(block_bitmap_bh, "dirty block bitmap");
3849+ err = ext4_journal_dirty_metadata(handle,
3850+ block_bitmap_bh);
3851 }
3852
3853- brelse(block_bh);
3854+ brelse(block_bitmap_bh);
3855 if (err)
3856 goto fail;
3857 }
3858-
3859- spin_lock(sb_bgl_lock(sbi, group));
3860- /* If we didn't allocate from within the initialized part of the inode
3861- * table then we need to initialize up to this inode. */
3862- if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) {
3863- if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) {
3864- gdp->bg_flags &= cpu_to_le16(~EXT4_BG_INODE_UNINIT);
3865-
3866- /* When marking the block group with
3867- * ~EXT4_BG_INODE_UNINIT we don't want to depend
3868- * on the value of bg_itable_unused even though
3869- * mke2fs could have initialized the same for us.
3870- * Instead we calculated the value below
3871- */
3872-
3873- free = 0;
3874- } else {
3875- free = EXT4_INODES_PER_GROUP(sb) -
3876- le16_to_cpu(gdp->bg_itable_unused);
3877- }
3878-
3879- /*
3880- * Check the relative inode number against the last used
3881- * relative inode number in this group. if it is greater
3882- * we need to update the bg_itable_unused count
3883- *
3884- */
3885- if (ino > free)
3886- gdp->bg_itable_unused =
3887- cpu_to_le16(EXT4_INODES_PER_GROUP(sb) - ino);
3888- }
3889-
3890- le16_add_cpu(&gdp->bg_free_inodes_count, -1);
3891- if (S_ISDIR(mode)) {
3892- le16_add_cpu(&gdp->bg_used_dirs_count, 1);
3893- }
3894- gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp);
3895- spin_unlock(sb_bgl_lock(sbi, group));
3896- BUFFER_TRACE(bh2, "call ext4_journal_dirty_metadata");
3897- err = ext4_journal_dirty_metadata(handle, bh2);
3898- if (err) goto fail;
3899+ BUFFER_TRACE(group_desc_bh, "call ext4_journal_dirty_metadata");
3900+ err = ext4_journal_dirty_metadata(handle, group_desc_bh);
3901+ if (err)
3902+ goto fail;
3903
3904 percpu_counter_dec(&sbi->s_freeinodes_counter);
3905 if (S_ISDIR(mode))
3906@@ -784,15 +843,15 @@ got:
3907 spin_unlock(sb_bgl_lock(sbi, flex_group));
3908 }
3909
3910- inode->i_uid = current->fsuid;
3911- if (test_opt (sb, GRPID))
3912+ inode->i_uid = current_fsuid();
3913+ if (test_opt(sb, GRPID))
3914 inode->i_gid = dir->i_gid;
3915 else if (dir->i_mode & S_ISGID) {
3916 inode->i_gid = dir->i_gid;
3917 if (S_ISDIR(mode))
3918 mode |= S_ISGID;
3919 } else
3920- inode->i_gid = current->fsgid;
3921+ inode->i_gid = current_fsgid();
3922 inode->i_mode = mode;
3923
3924 inode->i_ino = ino + group * EXT4_INODES_PER_GROUP(sb);
3925@@ -818,7 +877,6 @@ got:
3926 ei->i_flags &= ~EXT4_DIRSYNC_FL;
3927 ei->i_file_acl = 0;
3928 ei->i_dtime = 0;
3929- ei->i_block_alloc_info = NULL;
3930 ei->i_block_group = group;
3931
3932 ext4_set_inode_flags(inode);
3933@@ -834,7 +892,7 @@ got:
3934 ei->i_extra_isize = EXT4_SB(sb)->s_want_extra_isize;
3935
3936 ret = inode;
3937- if(DQUOT_ALLOC_INODE(inode)) {
3938+ if (DQUOT_ALLOC_INODE(inode)) {
3939 err = -EDQUOT;
3940 goto fail_drop;
3941 }
3942@@ -843,7 +901,7 @@ got:
3943 if (err)
3944 goto fail_free_drop;
3945
3946- err = ext4_init_security(handle,inode, dir);
3947+ err = ext4_init_security(handle, inode, dir);
3948 if (err)
3949 goto fail_free_drop;
3950
3951@@ -869,7 +927,7 @@ out:
3952 iput(inode);
3953 ret = ERR_PTR(err);
3954 really_out:
3955- brelse(bitmap_bh);
3956+ brelse(inode_bitmap_bh);
3957 return ret;
3958
3959 fail_free_drop:
3960@@ -880,7 +938,7 @@ fail_drop:
3961 inode->i_flags |= S_NOQUOTA;
3962 inode->i_nlink = 0;
3963 iput(inode);
3964- brelse(bitmap_bh);
3965+ brelse(inode_bitmap_bh);
3966 return ERR_PTR(err);
3967 }
3968
3969@@ -961,7 +1019,7 @@ error:
3970 return ERR_PTR(err);
3971 }
3972
3973-unsigned long ext4_count_free_inodes (struct super_block * sb)
3974+unsigned long ext4_count_free_inodes(struct super_block *sb)
3975 {
3976 unsigned long desc_count;
3977 struct ext4_group_desc *gdp;
3978@@ -976,10 +1034,10 @@ unsigned long ext4_count_free_inodes (st
3979 bitmap_count = 0;
3980 gdp = NULL;
3981 for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) {
3982- gdp = ext4_get_group_desc (sb, i, NULL);
3983+ gdp = ext4_get_group_desc(sb, i, NULL);
3984 if (!gdp)
3985 continue;
3986- desc_count += le16_to_cpu(gdp->bg_free_inodes_count);
3987+ desc_count += ext4_free_inodes_count(sb, gdp);
3988 brelse(bitmap_bh);
3989 bitmap_bh = ext4_read_inode_bitmap(sb, i);
3990 if (!bitmap_bh)
3991@@ -987,20 +1045,21 @@ unsigned long ext4_count_free_inodes (st
3992
3993 x = ext4_count_free(bitmap_bh, EXT4_INODES_PER_GROUP(sb) / 8);
3994 printk(KERN_DEBUG "group %lu: stored = %d, counted = %lu\n",
3995- i, le16_to_cpu(gdp->bg_free_inodes_count), x);
3996+ i, ext4_free_inodes_count(sb, gdp), x);
3997 bitmap_count += x;
3998 }
3999 brelse(bitmap_bh);
4000- printk("ext4_count_free_inodes: stored = %u, computed = %lu, %lu\n",
4001- le32_to_cpu(es->s_free_inodes_count), desc_count, bitmap_count);
4002+ printk(KERN_DEBUG "ext4_count_free_inodes: "
4003+ "stored = %u, computed = %lu, %lu\n",
4004+ le32_to_cpu(es->s_free_inodes_count), desc_count, bitmap_count);
4005 return desc_count;
4006 #else
4007 desc_count = 0;
4008 for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) {
4009- gdp = ext4_get_group_desc (sb, i, NULL);
4010+ gdp = ext4_get_group_desc(sb, i, NULL);
4011 if (!gdp)
4012 continue;
4013- desc_count += le16_to_cpu(gdp->bg_free_inodes_count);
4014+ desc_count += ext4_free_inodes_count(sb, gdp);
4015 cond_resched();
4016 }
4017 return desc_count;
4018@@ -1008,16 +1067,16 @@ unsigned long ext4_count_free_inodes (st
4019 }
4020
4021 /* Called at mount-time, super-block is locked */
4022-unsigned long ext4_count_dirs (struct super_block * sb)
4023+unsigned long ext4_count_dirs(struct super_block * sb)
4024 {
4025 unsigned long count = 0;
4026 ext4_group_t i;
4027
4028 for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) {
4029- struct ext4_group_desc *gdp = ext4_get_group_desc (sb, i, NULL);
4030+ struct ext4_group_desc *gdp = ext4_get_group_desc(sb, i, NULL);
4031 if (!gdp)
4032 continue;
4033- count += le16_to_cpu(gdp->bg_used_dirs_count);
4034+ count += ext4_used_dirs_count(sb, gdp);
4035 }
4036 return count;
4037 }
4038diff -rup b/fs/ext4//inode.c a/fs/ext4///inode.c
4039--- b/fs/ext4/inode.c 2009-02-11 14:37:58.000000000 +0100
4040+++ a/fs/ext4/inode.c 2009-02-11 01:08:42.000000000 +0100
4041@@ -190,7 +190,7 @@ static int ext4_journal_test_restart(han
4042 /*
4043 * Called at the last iput() if i_nlink is zero.
4044 */
4045-void ext4_delete_inode (struct inode * inode)
4046+void ext4_delete_inode(struct inode *inode)
4047 {
4048 handle_t *handle;
4049 int err;
4050@@ -330,11 +330,11 @@ static int ext4_block_to_path(struct ino
4051 int final = 0;
4052
4053 if (i_block < 0) {
4054- ext4_warning (inode->i_sb, "ext4_block_to_path", "block < 0");
4055+ ext4_warning(inode->i_sb, "ext4_block_to_path", "block < 0");
4056 } else if (i_block < direct_blocks) {
4057 offsets[n++] = i_block;
4058 final = direct_blocks;
4059- } else if ( (i_block -= direct_blocks) < indirect_blocks) {
4060+ } else if ((i_block -= direct_blocks) < indirect_blocks) {
4061 offsets[n++] = EXT4_IND_BLOCK;
4062 offsets[n++] = i_block;
4063 final = ptrs;
4064@@ -400,14 +400,14 @@ static Indirect *ext4_get_branch(struct
4065
4066 *err = 0;
4067 /* i_data is not going away, no lock needed */
4068- add_chain (chain, NULL, EXT4_I(inode)->i_data + *offsets);
4069+ add_chain(chain, NULL, EXT4_I(inode)->i_data + *offsets);
4070 if (!p->key)
4071 goto no_block;
4072 while (--depth) {
4073 bh = sb_bread(sb, le32_to_cpu(p->key));
4074 if (!bh)
4075 goto failure;
4076- add_chain(++p, bh, (__le32*)bh->b_data + *++offsets);
4077+ add_chain(++p, bh, (__le32 *)bh->b_data + *++offsets);
4078 /* Reader: end */
4079 if (!p->key)
4080 goto no_block;
4081@@ -443,7 +443,7 @@ no_block:
4082 static ext4_fsblk_t ext4_find_near(struct inode *inode, Indirect *ind)
4083 {
4084 struct ext4_inode_info *ei = EXT4_I(inode);
4085- __le32 *start = ind->bh ? (__le32*) ind->bh->b_data : ei->i_data;
4086+ __le32 *start = ind->bh ? (__le32 *) ind->bh->b_data : ei->i_data;
4087 __le32 *p;
4088 ext4_fsblk_t bg_start;
4089 ext4_fsblk_t last_block;
4090@@ -486,18 +486,9 @@ static ext4_fsblk_t ext4_find_near(struc
4091 static ext4_fsblk_t ext4_find_goal(struct inode *inode, ext4_lblk_t block,
4092 Indirect *partial)
4093 {
4094- struct ext4_block_alloc_info *block_i;
4095-
4096- block_i = EXT4_I(inode)->i_block_alloc_info;
4097-
4098 /*
4099- * try the heuristic for sequential allocation,
4100- * failing that at least try to get decent locality.
4101+ * XXX need to get goal block from mballoc's data structures
4102 */
4103- if (block_i && (block == block_i->last_alloc_logical_block + 1)
4104- && (block_i->last_alloc_physical_block != 0)) {
4105- return block_i->last_alloc_physical_block + 1;
4106- }
4107
4108 return ext4_find_near(inode, partial);
4109 }
4110@@ -514,10 +505,10 @@ static ext4_fsblk_t ext4_find_goal(struc
4111 * return the total number of blocks to be allocate, including the
4112 * direct and indirect blocks.
4113 */
4114-static int ext4_blks_to_allocate(Indirect *branch, int k, unsigned long blks,
4115+static int ext4_blks_to_allocate(Indirect *branch, int k, unsigned int blks,
4116 int blocks_to_boundary)
4117 {
4118- unsigned long count = 0;
4119+ unsigned int count = 0;
4120
4121 /*
4122 * Simple case, [t,d]Indirect block(s) has not allocated yet
4123@@ -555,6 +546,7 @@ static int ext4_alloc_blocks(handle_t *h
4124 int indirect_blks, int blks,
4125 ext4_fsblk_t new_blocks[4], int *err)
4126 {
4127+ struct ext4_allocation_request ar;
4128 int target, i;
4129 unsigned long count = 0, blk_allocated = 0;
4130 int index = 0;
4131@@ -603,10 +595,15 @@ static int ext4_alloc_blocks(handle_t *h
4132 if (!target)
4133 goto allocated;
4134 /* Now allocate data blocks */
4135- count = target;
4136- /* allocating blocks for data blocks */
4137- current_block = ext4_new_blocks(handle, inode, iblock,
4138- goal, &count, err);
4139+ memset(&ar, 0, sizeof(ar));
4140+ ar.inode = inode;
4141+ ar.goal = goal;
4142+ ar.len = target;
4143+ ar.logical = iblock;
4144+ ar.flags = EXT4_MB_HINT_DATA;
4145+
4146+ current_block = ext4_mb_new_blocks(handle, &ar, err);
4147+
4148 if (*err && (target == blks)) {
4149 /*
4150 * if the allocation failed and we didn't allocate
4151@@ -622,7 +619,7 @@ static int ext4_alloc_blocks(handle_t *h
4152 */
4153 new_blocks[index] = current_block;
4154 }
4155- blk_allocated += count;
4156+ blk_allocated += ar.len;
4157 }
4158 allocated:
4159 /* total number of blocks allocated for direct blocks */
4160@@ -630,7 +627,7 @@ allocated:
4161 *err = 0;
4162 return ret;
4163 failed_out:
4164- for (i = 0; i <index; i++)
4165+ for (i = 0; i < index; i++)
4166 ext4_free_blocks(handle, inode, new_blocks[i], 1, 0);
4167 return ret;
4168 }
4169@@ -703,7 +700,7 @@ static int ext4_alloc_branch(handle_t *h
4170 branch[n].p = (__le32 *) bh->b_data + offsets[n];
4171 branch[n].key = cpu_to_le32(new_blocks[n]);
4172 *branch[n].p = branch[n].key;
4173- if ( n == indirect_blks) {
4174+ if (n == indirect_blks) {
4175 current_block = new_blocks[n];
4176 /*
4177 * End of chain, update the last new metablock of
4178@@ -730,7 +727,7 @@ failed:
4179 BUFFER_TRACE(branch[i].bh, "call jbd2_journal_forget");
4180 ext4_journal_forget(handle, branch[i].bh);
4181 }
4182- for (i = 0; i <indirect_blks; i++)
4183+ for (i = 0; i < indirect_blks; i++)
4184 ext4_free_blocks(handle, inode, new_blocks[i], 1, 0);
4185
4186 ext4_free_blocks(handle, inode, new_blocks[i], num, 0);
4187@@ -757,10 +754,8 @@ static int ext4_splice_branch(handle_t *
4188 {
4189 int i;
4190 int err = 0;
4191- struct ext4_block_alloc_info *block_i;
4192 ext4_fsblk_t current_block;
4193
4194- block_i = EXT4_I(inode)->i_block_alloc_info;
4195 /*
4196 * If we're splicing into a [td]indirect block (as opposed to the
4197 * inode) then we need to get write access to the [td]indirect block
4198@@ -783,18 +778,7 @@ static int ext4_splice_branch(handle_t *
4199 if (num == 0 && blks > 1) {
4200 current_block = le32_to_cpu(where->key) + 1;
4201 for (i = 1; i < blks; i++)
4202- *(where->p + i ) = cpu_to_le32(current_block++);
4203- }
4204-
4205- /*
4206- * update the most recently allocated logical & physical block
4207- * in i_block_alloc_info, to assist find the proper goal block for next
4208- * allocation
4209- */
4210- if (block_i) {
4211- block_i->last_alloc_logical_block = block + blks - 1;
4212- block_i->last_alloc_physical_block =
4213- le32_to_cpu(where[num].key) + blks - 1;
4214+ *(where->p + i) = cpu_to_le32(current_block++);
4215 }
4216
4217 /* We are done with atomic stuff, now do the rest of housekeeping */
4218@@ -861,10 +845,10 @@ err_out:
4219 * down_read(&EXT4_I(inode)->i_data_sem) if not allocating file system block
4220 * (ie, create is zero). Otherwise down_write(&EXT4_I(inode)->i_data_sem)
4221 */
4222-int ext4_get_blocks_handle(handle_t *handle, struct inode *inode,
4223- ext4_lblk_t iblock, unsigned long maxblocks,
4224- struct buffer_head *bh_result,
4225- int create, int extend_disksize)
4226+static int ext4_get_blocks_handle(handle_t *handle, struct inode *inode,
4227+ ext4_lblk_t iblock, unsigned int maxblocks,
4228+ struct buffer_head *bh_result,
4229+ int create, int extend_disksize)
4230 {
4231 int err = -EIO;
4232 ext4_lblk_t offsets[4];
4233@@ -914,12 +898,8 @@ int ext4_get_blocks_handle(handle_t *han
4234 goto cleanup;
4235
4236 /*
4237- * Okay, we need to do block allocation. Lazily initialize the block
4238- * allocation info here if necessary
4239+ * Okay, we need to do block allocation.
4240 */
4241- if (S_ISREG(inode->i_mode) && (!ei->i_block_alloc_info))
4242- ext4_init_block_alloc_info(inode);
4243-
4244 goal = ext4_find_goal(inode, iblock, partial);
4245
4246 /* the number of blocks need to allocate for [d,t]indirect blocks */
4247@@ -1030,19 +1010,20 @@ static void ext4_da_update_reserve_space
4248 BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks);
4249 mdb_free = EXT4_I(inode)->i_reserved_meta_blocks - mdb;
4250
4251- /* Account for allocated meta_blocks */
4252- mdb_free -= EXT4_I(inode)->i_allocated_meta_blocks;
4253-
4254- /* update fs free blocks counter for truncate case */
4255- percpu_counter_add(&sbi->s_freeblocks_counter, mdb_free);
4256+ if (mdb_free) {
4257+ /* Account for allocated meta_blocks */
4258+ mdb_free -= EXT4_I(inode)->i_allocated_meta_blocks;
4259+
4260+ /* update fs dirty blocks counter */
4261+ percpu_counter_sub(&sbi->s_dirtyblocks_counter, mdb_free);
4262+ EXT4_I(inode)->i_allocated_meta_blocks = 0;
4263+ EXT4_I(inode)->i_reserved_meta_blocks = mdb;
4264+ }
4265
4266 /* update per-inode reservations */
4267 BUG_ON(used > EXT4_I(inode)->i_reserved_data_blocks);
4268 EXT4_I(inode)->i_reserved_data_blocks -= used;
4269
4270- BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks);
4271- EXT4_I(inode)->i_reserved_meta_blocks = mdb;
4272- EXT4_I(inode)->i_allocated_meta_blocks = 0;
4273 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
4274 }
4275
4276@@ -1069,7 +1050,7 @@ static void ext4_da_update_reserve_space
4277 * It returns the error in case of allocation failure.
4278 */
4279 int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block,
4280- unsigned long max_blocks, struct buffer_head *bh,
4281+ unsigned int max_blocks, struct buffer_head *bh,
4282 int create, int extend_disksize, int flag)
4283 {
4284 int retval;
4285@@ -1241,7 +1222,7 @@ struct buffer_head *ext4_getblk(handle_t
4286 BUFFER_TRACE(bh, "call get_create_access");
4287 fatal = ext4_journal_get_create_access(handle, bh);
4288 if (!fatal && !buffer_uptodate(bh)) {
4289- memset(bh->b_data,0,inode->i_sb->s_blocksize);
4290+ memset(bh->b_data, 0, inode->i_sb->s_blocksize);
4291 set_buffer_uptodate(bh);
4292 }
4293 unlock_buffer(bh);
4294@@ -1266,7 +1247,7 @@ err:
4295 struct buffer_head *ext4_bread(handle_t *handle, struct inode *inode,
4296 ext4_lblk_t block, int create, int *err)
4297 {
4298- struct buffer_head * bh;
4299+ struct buffer_head *bh;
4300
4301 bh = ext4_getblk(handle, inode, block, create, err);
4302 if (!bh)
4303@@ -1282,13 +1263,13 @@ struct buffer_head *ext4_bread(handle_t
4304 return NULL;
4305 }
4306
4307-static int walk_page_buffers( handle_t *handle,
4308- struct buffer_head *head,
4309- unsigned from,
4310- unsigned to,
4311- int *partial,
4312- int (*fn)( handle_t *handle,
4313- struct buffer_head *bh))
4314+static int walk_page_buffers(handle_t *handle,
4315+ struct buffer_head *head,
4316+ unsigned from,
4317+ unsigned to,
4318+ int *partial,
4319+ int (*fn)(handle_t *handle,
4320+ struct buffer_head *bh))
4321 {
4322 struct buffer_head *bh;
4323 unsigned block_start, block_end;
4324@@ -1296,9 +1277,9 @@ static int walk_page_buffers( handle_t *
4325 int err, ret = 0;
4326 struct buffer_head *next;
4327
4328- for ( bh = head, block_start = 0;
4329- ret == 0 && (bh != head || !block_start);
4330- block_start = block_end, bh = next)
4331+ for (bh = head, block_start = 0;
4332+ ret == 0 && (bh != head || !block_start);
4333+ block_start = block_end, bh = next)
4334 {
4335 next = bh->b_this_page;
4336 block_end = block_start + blocksize;
4337@@ -1351,23 +1332,23 @@ static int ext4_write_begin(struct file
4338 loff_t pos, unsigned len, unsigned flags,
4339 struct page **pagep, void **fsdata)
4340 {
4341- struct inode *inode = mapping->host;
4342+ struct inode *inode = mapping->host;
4343 int ret, needed_blocks = ext4_writepage_trans_blocks(inode);
4344 handle_t *handle;
4345 int retries = 0;
4346- struct page *page;
4347+ struct page *page;
4348 pgoff_t index;
4349- unsigned from, to;
4350+ unsigned from, to;
4351
4352 index = pos >> PAGE_CACHE_SHIFT;
4353- from = pos & (PAGE_CACHE_SIZE - 1);
4354- to = from + len;
4355+ from = pos & (PAGE_CACHE_SIZE - 1);
4356+ to = from + len;
4357
4358 retry:
4359- handle = ext4_journal_start(inode, needed_blocks);
4360- if (IS_ERR(handle)) {
4361- ret = PTR_ERR(handle);
4362- goto out;
4363+ handle = ext4_journal_start(inode, needed_blocks);
4364+ if (IS_ERR(handle)) {
4365+ ret = PTR_ERR(handle);
4366+ goto out;
4367 }
4368
4369 page = grab_cache_page_write_begin(mapping, index, flags);
4370@@ -1387,9 +1368,16 @@ retry:
4371 }
4372
4373 if (ret) {
4374- unlock_page(page);
4375+ unlock_page(page);
4376 ext4_journal_stop(handle);
4377- page_cache_release(page);
4378+ page_cache_release(page);
4379+ /*
4380+ * block_write_begin may have instantiated a few blocks
4381+ * outside i_size. Trim these off again. Don't need
4382+ * i_size_read because we hold i_mutex.
4383+ */
4384+ if (pos + len > inode->i_size)
4385+ vmtruncate(inode, inode->i_size);
4386 }
4387
4388 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
4389@@ -1426,16 +1414,18 @@ static int ext4_ordered_write_end(struct
4390 ret = ext4_jbd2_file_inode(handle, inode);
4391
4392 if (ret == 0) {
4393- /*
4394- * generic_write_end() will run mark_inode_dirty() if i_size
4395- * changes. So let's piggyback the i_disksize mark_inode_dirty
4396- * into that.
4397- */
4398 loff_t new_i_size;
4399
4400 new_i_size = pos + copied;
4401- if (new_i_size > EXT4_I(inode)->i_disksize)
4402- EXT4_I(inode)->i_disksize = new_i_size;
4403+ if (new_i_size > EXT4_I(inode)->i_disksize) {
4404+ ext4_update_i_disksize(inode, new_i_size);
4405+ /* We need to mark inode dirty even if
4406+ * new_i_size is less that inode->i_size
4407+ * bu greater than i_disksize.(hint delalloc)
4408+ */
4409+ ext4_mark_inode_dirty(handle, inode);
4410+ }
4411+
4412 ret2 = generic_write_end(file, mapping, pos, len, copied,
4413 page, fsdata);
4414 copied = ret2;
4415@@ -1460,8 +1450,14 @@ static int ext4_writeback_write_end(stru
4416 loff_t new_i_size;
4417
4418 new_i_size = pos + copied;
4419- if (new_i_size > EXT4_I(inode)->i_disksize)
4420- EXT4_I(inode)->i_disksize = new_i_size;
4421+ if (new_i_size > EXT4_I(inode)->i_disksize) {
4422+ ext4_update_i_disksize(inode, new_i_size);
4423+ /* We need to mark inode dirty even if
4424+ * new_i_size is less that inode->i_size
4425+ * bu greater than i_disksize.(hint delalloc)
4426+ */
4427+ ext4_mark_inode_dirty(handle, inode);
4428+ }
4429
4430 ret2 = generic_write_end(file, mapping, pos, len, copied,
4431 page, fsdata);
4432@@ -1486,6 +1482,7 @@ static int ext4_journalled_write_end(str
4433 int ret = 0, ret2;
4434 int partial = 0;
4435 unsigned from, to;
4436+ loff_t new_i_size;
4437
4438 from = pos & (PAGE_CACHE_SIZE - 1);
4439 to = from + len;
4440@@ -1500,11 +1497,12 @@ static int ext4_journalled_write_end(str
4441 to, &partial, write_end_fn);
4442 if (!partial)
4443 SetPageUptodate(page);
4444- if (pos+copied > inode->i_size)
4445+ new_i_size = pos + copied;
4446+ if (new_i_size > inode->i_size)
4447 i_size_write(inode, pos+copied);
4448 EXT4_I(inode)->i_state |= EXT4_STATE_JDATA;
4449- if (inode->i_size > EXT4_I(inode)->i_disksize) {
4450- EXT4_I(inode)->i_disksize = inode->i_size;
4451+ if (new_i_size > EXT4_I(inode)->i_disksize) {
4452+ ext4_update_i_disksize(inode, new_i_size);
4453 ret2 = ext4_mark_inode_dirty(handle, inode);
4454 if (!ret)
4455 ret = ret2;
4456@@ -1521,6 +1519,7 @@ static int ext4_journalled_write_end(str
4457
4458 static int ext4_da_reserve_space(struct inode *inode, int nrblocks)
4459 {
4460+ int retries = 0;
4461 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
4462 unsigned long md_needed, mdblocks, total = 0;
4463
4464@@ -1529,6 +1528,7 @@ static int ext4_da_reserve_space(struct
4465 * in order to allocate nrblocks
4466 * worse case is one extent per block
4467 */
4468+repeat:
4469 spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
4470 total = EXT4_I(inode)->i_reserved_data_blocks + nrblocks;
4471 mdblocks = ext4_calc_metadata_amount(inode, total);
4472@@ -1537,13 +1537,14 @@ static int ext4_da_reserve_space(struct
4473 md_needed = mdblocks - EXT4_I(inode)->i_reserved_meta_blocks;
4474 total = md_needed + nrblocks;
4475
4476- if (ext4_has_free_blocks(sbi, total) < total) {
4477+ if (ext4_claim_free_blocks(sbi, total)) {
4478 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
4479+ if (ext4_should_retry_alloc(inode->i_sb, &retries)) {
4480+ yield();
4481+ goto repeat;
4482+ }
4483 return -ENOSPC;
4484 }
4485- /* reduce fs free blocks counter */
4486- percpu_counter_sub(&sbi->s_freeblocks_counter, total);
4487-
4488 EXT4_I(inode)->i_reserved_data_blocks += nrblocks;
4489 EXT4_I(inode)->i_reserved_meta_blocks = mdblocks;
4490
4491@@ -1585,8 +1586,8 @@ static void ext4_da_release_space(struct
4492
4493 release = to_free + mdb_free;
4494
4495- /* update fs free blocks counter for truncate case */
4496- percpu_counter_add(&sbi->s_freeblocks_counter, release);
4497+ /* update fs dirty blocks counter for truncate case */
4498+ percpu_counter_sub(&sbi->s_dirtyblocks_counter, release);
4499
4500 /* update per-inode reservations */
4501 BUG_ON(to_free > EXT4_I(inode)->i_reserved_data_blocks);
4502@@ -1629,7 +1630,8 @@ struct mpage_da_data {
4503 get_block_t *get_block;
4504 struct writeback_control *wbc;
4505 int io_done;
4506- long pages_written;
4507+ int pages_written;
4508+ int retval;
4509 };
4510
4511 /*
4512@@ -1648,18 +1650,25 @@ struct mpage_da_data {
4513 */
4514 static int mpage_da_submit_io(struct mpage_da_data *mpd)
4515 {
4516- struct address_space *mapping = mpd->inode->i_mapping;
4517- int ret = 0, err, nr_pages, i;
4518- unsigned long index, end;
4519+ long pages_skipped;
4520 struct pagevec pvec;
4521+ unsigned long index, end;
4522+ int ret = 0, err, nr_pages, i;
4523+ struct inode *inode = mpd->inode;
4524+ struct address_space *mapping = inode->i_mapping;
4525
4526 BUG_ON(mpd->next_page <= mpd->first_page);
4527- pagevec_init(&pvec, 0);
4528+ /*
4529+ * We need to start from the first_page to the next_page - 1
4530+ * to make sure we also write the mapped dirty buffer_heads.
4531+ * If we look at mpd->lbh.b_blocknr we would only be looking
4532+ * at the currently mapped buffer_heads.
4533+ */
4534 index = mpd->first_page;
4535 end = mpd->next_page - 1;
4536
4537+ pagevec_init(&pvec, 0);
4538 while (index <= end) {
4539- /* XXX: optimize tail */
4540 nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE);
4541 if (nr_pages == 0)
4542 break;
4543@@ -1671,8 +1680,16 @@ static int mpage_da_submit_io(struct mpa
4544 break;
4545 index++;
4546
4547+ BUG_ON(!PageLocked(page));
4548+ BUG_ON(PageWriteback(page));
4549+
4550+ pages_skipped = mpd->wbc->pages_skipped;
4551 err = mapping->a_ops->writepage(page, mpd->wbc);
4552- if (!err)
4553+ if (!err && (pages_skipped == mpd->wbc->pages_skipped))
4554+ /*
4555+ * have successfully written the page
4556+ * without skipping the same
4557+ */
4558 mpd->pages_written++;
4559 /*
4560 * In error case, we have to continue because
4561@@ -1783,6 +1800,57 @@ static inline void __unmap_underlying_bl
4562 unmap_underlying_metadata(bdev, bh->b_blocknr + i);
4563 }
4564
4565+static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd,
4566+ sector_t logical, long blk_cnt)
4567+{
4568+ int nr_pages, i;
4569+ pgoff_t index, end;
4570+ struct pagevec pvec;
4571+ struct inode *inode = mpd->inode;
4572+ struct address_space *mapping = inode->i_mapping;
4573+
4574+ index = logical >> (PAGE_CACHE_SHIFT - inode->i_blkbits);
4575+ end = (logical + blk_cnt - 1) >>
4576+ (PAGE_CACHE_SHIFT - inode->i_blkbits);
4577+ while (index <= end) {
4578+ nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE);
4579+ if (nr_pages == 0)
4580+ break;
4581+ for (i = 0; i < nr_pages; i++) {
4582+ struct page *page = pvec.pages[i];
4583+ index = page->index;
4584+ if (index > end)
4585+ break;
4586+ index++;
4587+
4588+ BUG_ON(!PageLocked(page));
4589+ BUG_ON(PageWriteback(page));
4590+ block_invalidatepage(page, 0);
4591+ ClearPageUptodate(page);
4592+ unlock_page(page);
4593+ }
4594+ }
4595+ return;
4596+}
4597+
4598+static void ext4_print_free_blocks(struct inode *inode)
4599+{
4600+ struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
4601+ printk(KERN_EMERG "Total free blocks count %lld\n",
4602+ ext4_count_free_blocks(inode->i_sb));
4603+ printk(KERN_EMERG "Free/Dirty block details\n");
4604+ printk(KERN_EMERG "free_blocks=%lld\n",
4605+ (long long)percpu_counter_sum(&sbi->s_freeblocks_counter));
4606+ printk(KERN_EMERG "dirty_blocks=%lld\n",
4607+ (long long)percpu_counter_sum(&sbi->s_dirtyblocks_counter));
4608+ printk(KERN_EMERG "Block reservation details\n");
4609+ printk(KERN_EMERG "i_reserved_data_blocks=%u\n",
4610+ EXT4_I(inode)->i_reserved_data_blocks);
4611+ printk(KERN_EMERG "i_reserved_meta_blocks=%u\n",
4612+ EXT4_I(inode)->i_reserved_meta_blocks);
4613+ return;
4614+}
4615+
4616 /*
4617 * mpage_da_map_blocks - go through given space
4618 *
4619@@ -1792,32 +1860,69 @@ static inline void __unmap_underlying_bl
4620 * The function skips space we know is already mapped to disk blocks.
4621 *
4622 */
4623-static void mpage_da_map_blocks(struct mpage_da_data *mpd)
4624+static int mpage_da_map_blocks(struct mpage_da_data *mpd)
4625 {
4626 int err = 0;
4627- struct buffer_head *lbh = &mpd->lbh;
4628- sector_t next = lbh->b_blocknr;
4629 struct buffer_head new;
4630+ struct buffer_head *lbh = &mpd->lbh;
4631+ sector_t next;
4632
4633 /*
4634 * We consider only non-mapped and non-allocated blocks
4635 */
4636 if (buffer_mapped(lbh) && !buffer_delay(lbh))
4637- return;
4638-
4639+ return 0;
4640 new.b_state = lbh->b_state;
4641 new.b_blocknr = 0;
4642 new.b_size = lbh->b_size;
4643-
4644+ next = lbh->b_blocknr;
4645 /*
4646 * If we didn't accumulate anything
4647 * to write simply return
4648 */
4649 if (!new.b_size)
4650- return;
4651+ return 0;
4652 err = mpd->get_block(mpd->inode, next, &new, 1);
4653- if (err)
4654- return;
4655+ if (err) {
4656+
4657+ /* If get block returns with error
4658+ * we simply return. Later writepage
4659+ * will redirty the page and writepages
4660+ * will find the dirty page again
4661+ */
4662+ if (err == -EAGAIN)
4663+ return 0;
4664+
4665+ if (err == -ENOSPC &&
4666+ ext4_count_free_blocks(mpd->inode->i_sb)) {
4667+ mpd->retval = err;
4668+ return 0;
4669+ }
4670+
4671+ /*
4672+ * get block failure will cause us
4673+ * to loop in writepages. Because
4674+ * a_ops->writepage won't be able to
4675+ * make progress. The page will be redirtied
4676+ * by writepage and writepages will again
4677+ * try to write the same.
4678+ */
4679+ printk(KERN_EMERG "%s block allocation failed for inode %lu "
4680+ "at logical offset %llu with max blocks "
4681+ "%zd with error %d\n",
4682+ __func__, mpd->inode->i_ino,
4683+ (unsigned long long)next,
4684+ lbh->b_size >> mpd->inode->i_blkbits, err);
4685+ printk(KERN_EMERG "This should not happen.!! "
4686+ "Data will be lost\n");
4687+ if (err == -ENOSPC) {
4688+ ext4_print_free_blocks(mpd->inode);
4689+ }
4690+ /* invlaidate all the pages */
4691+ ext4_da_block_invalidatepages(mpd, next,
4692+ lbh->b_size >> mpd->inode->i_blkbits);
4693+ return err;
4694+ }
4695 BUG_ON(new.b_size == 0);
4696
4697 if (buffer_new(&new))
4698@@ -1830,7 +1935,7 @@ static void mpage_da_map_blocks(struct m
4699 if (buffer_delay(lbh) || buffer_unwritten(lbh))
4700 mpage_put_bnr_to_bhs(mpd, next, &new);
4701
4702- return;
4703+ return 0;
4704 }
4705
4706 #define BH_FLAGS ((1 << BH_Uptodate) | (1 << BH_Mapped) | \
4707@@ -1899,8 +2004,8 @@ flush_it:
4708 * We couldn't merge the block to our extent, so we
4709 * need to flush current extent and start new one
4710 */
4711- mpage_da_map_blocks(mpd);
4712- mpage_da_submit_io(mpd);
4713+ if (mpage_da_map_blocks(mpd) == 0)
4714+ mpage_da_submit_io(mpd);
4715 mpd->io_done = 1;
4716 return;
4717 }
4718@@ -1942,8 +2047,8 @@ static int __mpage_da_writepage(struct p
4719 * and start IO on them using writepage()
4720 */
4721 if (mpd->next_page != mpd->first_page) {
4722- mpage_da_map_blocks(mpd);
4723- mpage_da_submit_io(mpd);
4724+ if (mpage_da_map_blocks(mpd) == 0)
4725+ mpage_da_submit_io(mpd);
4726 /*
4727 * skip rest of the page in the page_vec
4728 */
4729@@ -1991,11 +2096,29 @@ static int __mpage_da_writepage(struct p
4730 bh = head;
4731 do {
4732 BUG_ON(buffer_locked(bh));
4733+ /*
4734+ * We need to try to allocate
4735+ * unmapped blocks in the same page.
4736+ * Otherwise we won't make progress
4737+ * with the page in ext4_da_writepage
4738+ */
4739 if (buffer_dirty(bh) &&
4740 (!buffer_mapped(bh) || buffer_delay(bh))) {
4741 mpage_add_bh_to_extent(mpd, logical, bh);
4742 if (mpd->io_done)
4743 return MPAGE_DA_EXTENT_TAIL;
4744+ } else if (buffer_dirty(bh) && (buffer_mapped(bh))) {
4745+ /*
4746+ * mapped dirty buffer. We need to update
4747+ * the b_state because we look at
4748+ * b_state in mpage_da_map_blocks. We don't
4749+ * update b_size because if we find an
4750+ * unmapped buffer_head later we need to
4751+ * use the b_state flag of that buffer_head.
4752+ */
4753+ if (mpd->lbh.b_size == 0)
4754+ mpd->lbh.b_state =
4755+ bh->b_state & BH_FLAGS;
4756 }
4757 logical++;
4758 } while ((bh = bh->b_this_page) != head);
4759@@ -2018,39 +2141,42 @@ static int __mpage_da_writepage(struct p
4760 */
4761 static int mpage_da_writepages(struct address_space *mapping,
4762 struct writeback_control *wbc,
4763- get_block_t get_block)
4764+ struct mpage_da_data *mpd)
4765 {
4766- struct mpage_da_data mpd;
4767- long to_write;
4768 int ret;
4769+ long nr_to_write;
4770+ pgoff_t index;
4771
4772- if (!get_block)
4773- return generic_writepages(mapping, wbc);
4774-
4775- mpd.wbc = wbc;
4776- mpd.inode = mapping->host;
4777- mpd.lbh.b_size = 0;
4778- mpd.lbh.b_state = 0;
4779- mpd.lbh.b_blocknr = 0;
4780- mpd.first_page = 0;
4781- mpd.next_page = 0;
4782- mpd.get_block = get_block;
4783- mpd.io_done = 0;
4784- mpd.pages_written = 0;
4785
4786- to_write = wbc->nr_to_write;
4787
4788- ret = write_cache_pages(mapping, wbc, __mpage_da_writepage, &mpd);
4789+ if (!mpd->get_block)
4790+ return generic_writepages(mapping, wbc);
4791
4792+ mpd->lbh.b_size = 0;
4793+ mpd->lbh.b_state = 0;
4794+ mpd->lbh.b_blocknr = 0;
4795+ mpd->first_page = 0;
4796+ mpd->next_page = 0;
4797+ mpd->io_done = 0;
4798+ mpd->pages_written = 0;
4799+ mpd->retval = 0;
4800+
4801+ nr_to_write = wbc->nr_to_write;
4802+ index = mapping->writeback_index;
4803+ ret = write_cache_pages(mapping, wbc, __mpage_da_writepage, mpd);
4804+ wbc->nr_to_write = nr_to_write;
4805+ mapping->writeback_index = index;
4806 /*
4807 * Handle last extent of pages
4808 */
4809- if (!mpd.io_done && mpd.next_page != mpd.first_page) {
4810- mpage_da_map_blocks(&mpd);
4811- mpage_da_submit_io(&mpd);
4812- }
4813+ if (!mpd->io_done && mpd->next_page != mpd->first_page) {
4814+ if (mpage_da_map_blocks(mpd) == 0)
4815+ mpage_da_submit_io(mpd);
4816
4817- wbc->nr_to_write = to_write - mpd.pages_written;
4818+ mpd->io_done = 1;
4819+ ret = MPAGE_DA_EXTENT_TAIL;
4820+ }
4821+ wbc->nr_to_write -= mpd->pages_written;
4822 return ret;
4823 }
4824
4825@@ -2103,18 +2229,24 @@ static int ext4_da_get_block_write(struc
4826 handle_t *handle = NULL;
4827
4828 handle = ext4_journal_current_handle();
4829- if (!handle) {
4830- ret = ext4_get_blocks_wrap(handle, inode, iblock, max_blocks,
4831- bh_result, 0, 0, 0);
4832- BUG_ON(!ret);
4833- } else {
4834- ret = ext4_get_blocks_wrap(handle, inode, iblock, max_blocks,
4835- bh_result, create, 0, EXT4_DELALLOC_RSVED);
4836- }
4837-
4838+ BUG_ON(!handle);
4839+ ret = ext4_get_blocks_wrap(handle, inode, iblock, max_blocks,
4840+ bh_result, create, 0, EXT4_DELALLOC_RSVED);
4841 if (ret > 0) {
4842+
4843 bh_result->b_size = (ret << inode->i_blkbits);
4844
4845+ if (ext4_should_order_data(inode)) {
4846+ int retval;
4847+ retval = ext4_jbd2_file_inode(handle, inode);
4848+ if (retval)
4849+ /*
4850+ * Failed to add inode for ordered
4851+ * mode. Don't update file size
4852+ */
4853+ return retval;
4854+ }
4855+
4856 /*
4857 * Update on-disk size along with block allocation
4858 * we don't use 'extend_disksize' as size may change
4859@@ -2124,18 +2256,9 @@ static int ext4_da_get_block_write(struc
4860 if (disksize > i_size_read(inode))
4861 disksize = i_size_read(inode);
4862 if (disksize > EXT4_I(inode)->i_disksize) {
4863- /*
4864- * XXX: replace with spinlock if seen contended -bzzz
4865- */
4866- down_write(&EXT4_I(inode)->i_data_sem);
4867- if (disksize > EXT4_I(inode)->i_disksize)
4868- EXT4_I(inode)->i_disksize = disksize;
4869- up_write(&EXT4_I(inode)->i_data_sem);
4870-
4871- if (EXT4_I(inode)->i_disksize == disksize) {
4872- ret = ext4_mark_inode_dirty(handle, inode);
4873- return ret;
4874- }
4875+ ext4_update_i_disksize(inode, disksize);
4876+ ret = ext4_mark_inode_dirty(handle, inode);
4877+ return ret;
4878 }
4879 ret = 0;
4880 }
4881@@ -2181,7 +2304,7 @@ static int ext4_da_writepage(struct page
4882 {
4883 int ret = 0;
4884 loff_t size;
4885- unsigned long len;
4886+ unsigned int len;
4887 struct buffer_head *page_bufs;
4888 struct inode *inode = page->mapping->host;
4889
4890@@ -2284,11 +2407,14 @@ static int ext4_da_writepages_trans_bloc
4891 static int ext4_da_writepages(struct address_space *mapping,
4892 struct writeback_control *wbc)
4893 {
4894+ pgoff_t index;
4895+ int range_whole = 0;
4896 handle_t *handle = NULL;
4897- loff_t range_start = 0;
4898+ struct mpage_da_data mpd;
4899 struct inode *inode = mapping->host;
4900+ int pages_written = 0;
4901+ long pages_skipped;
4902 int needed_blocks, ret = 0, nr_to_writebump = 0;
4903- long to_write, pages_skipped = 0;
4904 struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb);
4905
4906 /*
4907@@ -2298,6 +2424,20 @@ static int ext4_da_writepages(struct add
4908 */
4909 if (!mapping->nrpages || !mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
4910 return 0;
4911+
4912+ /*
4913+ * If the filesystem has aborted, it is read-only, so return
4914+ * right away instead of dumping stack traces later on that
4915+ * will obscure the real source of the problem. We test
4916+ * EXT4_MOUNT_ABORT instead of sb->s_flag's MS_RDONLY because
4917+ * the latter could be true if the filesystem is mounted
4918+ * read-only, and in that case, ext4_da_writepages should
4919+ * *never* be called, so if that ever happens, we would want
4920+ * the stack trace.
4921+ */
4922+ if (unlikely(sbi->s_mount_opt & EXT4_MOUNT_ABORT))
4923+ return -EROFS;
4924+
4925 /*
4926 * Make sure nr_to_write is >= sbi->s_mb_stream_request
4927 * This make sure small files blocks are allocated in
4928@@ -2308,20 +2448,24 @@ static int ext4_da_writepages(struct add
4929 nr_to_writebump = sbi->s_mb_stream_request - wbc->nr_to_write;
4930 wbc->nr_to_write = sbi->s_mb_stream_request;
4931 }
4932+ if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
4933+ range_whole = 1;
4934
4935- if (!wbc->range_cyclic)
4936- /*
4937- * If range_cyclic is not set force range_cont
4938- * and save the old writeback_index
4939- */
4940- wbc->range_cont = 1;
4941+ if (wbc->range_cyclic)
4942+ index = mapping->writeback_index;
4943+ else
4944+ index = wbc->range_start >> PAGE_CACHE_SHIFT;
4945
4946- range_start = wbc->range_start;
4947+ mpd.wbc = wbc;
4948+ mpd.inode = mapping->host;
4949+
4950+ /*
4951+ * we don't want write_cache_pages to update
4952+ * nr_to_write and writeback_index
4953+ */
4954 pages_skipped = wbc->pages_skipped;
4955
4956-restart_loop:
4957- to_write = wbc->nr_to_write;
4958- while (!ret && to_write > 0) {
4959+ while (!ret && wbc->nr_to_write > 0) {
4960
4961 /*
4962 * we insert one extent at a time. So we need
4963@@ -2336,63 +2480,87 @@ restart_loop:
4964 handle = ext4_journal_start(inode, needed_blocks);
4965 if (IS_ERR(handle)) {
4966 ret = PTR_ERR(handle);
4967- printk(KERN_EMERG "%s: jbd2_start: "
4968+ printk(KERN_CRIT "%s: jbd2_start: "
4969 "%ld pages, ino %lu; err %d\n", __func__,
4970 wbc->nr_to_write, inode->i_ino, ret);
4971 dump_stack();
4972 goto out_writepages;
4973 }
4974- if (ext4_should_order_data(inode)) {
4975- /*
4976- * With ordered mode we need to add
4977- * the inode to the journal handl
4978- * when we do block allocation.
4979- */
4980- ret = ext4_jbd2_file_inode(handle, inode);
4981- if (ret) {
4982- ext4_journal_stop(handle);
4983- goto out_writepages;
4984- }
4985- }
4986+ mpd.get_block = ext4_da_get_block_write;
4987+ ret = mpage_da_writepages(mapping, wbc, &mpd);
4988
4989- to_write -= wbc->nr_to_write;
4990- ret = mpage_da_writepages(mapping, wbc,
4991- ext4_da_get_block_write);
4992 ext4_journal_stop(handle);
4993- if (ret == MPAGE_DA_EXTENT_TAIL) {
4994+
4995+ if (mpd.retval == -ENOSPC) {
4996+ /* commit the transaction which would
4997+ * free blocks released in the transaction
4998+ * and try again
4999+ */
5000+ jbd2_journal_force_commit_nested(sbi->s_journal);
5001+ wbc->pages_skipped = pages_skipped;
5002+ ret = 0;
5003+ } else if (ret == MPAGE_DA_EXTENT_TAIL) {
5004 /*
5005 * got one extent now try with
5006 * rest of the pages
5007 */
5008- to_write += wbc->nr_to_write;
5009+ pages_written += mpd.pages_written;
5010+ wbc->pages_skipped = pages_skipped;
5011 ret = 0;
5012- } else if (wbc->nr_to_write) {
5013+ } else if (wbc->nr_to_write)
5014 /*
5015 * There is no more writeout needed
5016 * or we requested for a noblocking writeout
5017 * and we found the device congested
5018 */
5019- to_write += wbc->nr_to_write;
5020 break;
5021- }
5022- wbc->nr_to_write = to_write;
5023- }
5024-
5025- if (wbc->range_cont && (pages_skipped != wbc->pages_skipped)) {
5026- /* We skipped pages in this loop */
5027- wbc->range_start = range_start;
5028- wbc->nr_to_write = to_write +
5029- wbc->pages_skipped - pages_skipped;
5030- wbc->pages_skipped = pages_skipped;
5031- goto restart_loop;
5032 }
5033+ if (pages_skipped != wbc->pages_skipped)
5034+ printk(KERN_EMERG "This should not happen leaving %s "
5035+ "with nr_to_write = %ld ret = %d\n",
5036+ __func__, wbc->nr_to_write, ret);
5037+
5038+ /* Update index */
5039+ index += pages_written;
5040+ if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
5041+ /*
5042+ * set the writeback_index so that range_cyclic
5043+ * mode will write it back later
5044+ */
5045+ mapping->writeback_index = index;
5046
5047 out_writepages:
5048- wbc->nr_to_write = to_write - nr_to_writebump;
5049- wbc->range_start = range_start;
5050+ wbc->nr_to_write -= nr_to_writebump;
5051 return ret;
5052 }
5053
5054+#define FALL_BACK_TO_NONDELALLOC 1
5055+static int ext4_nonda_switch(struct super_block *sb)
5056+{
5057+ s64 free_blocks, dirty_blocks;
5058+ struct ext4_sb_info *sbi = EXT4_SB(sb);
5059+
5060+ /*
5061+ * switch to non delalloc mode if we are running low
5062+ * on free block. The free block accounting via percpu
5063+ * counters can get slightly wrong with FBC_BATCH getting
5064+ * accumulated on each CPU without updating global counters
5065+ * Delalloc need an accurate free block accounting. So switch
5066+ * to non delalloc when we are near to error range.
5067+ */
5068+ free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter);
5069+ dirty_blocks = percpu_counter_read_positive(&sbi->s_dirtyblocks_counter);
5070+ if (2 * free_blocks < 3 * dirty_blocks ||
5071+ free_blocks < (dirty_blocks + EXT4_FREEBLOCKS_WATERMARK)) {
5072+ /*
5073+ * free block count is less that 150% of dirty blocks
5074+ * or free blocks is less that watermark
5075+ */
5076+ return 1;
5077+ }
5078+ return 0;
5079+}
5080+
5081 static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
5082 loff_t pos, unsigned len, unsigned flags,
5083 struct page **pagep, void **fsdata)
5084@@ -2408,6 +2576,12 @@ static int ext4_da_write_begin(struct fi
5085 from = pos & (PAGE_CACHE_SIZE - 1);
5086 to = from + len;
5087
5088+ if (ext4_nonda_switch(inode->i_sb)) {
5089+ *fsdata = (void *)FALL_BACK_TO_NONDELALLOC;
5090+ return ext4_write_begin(file, mapping, pos,
5091+ len, flags, pagep, fsdata);
5092+ }
5093+ *fsdata = (void *)0;
5094 retry:
5095 /*
5096 * With delayed allocation, we don't log the i_disksize update
5097@@ -2435,6 +2609,13 @@ retry:
5098 unlock_page(page);
5099 ext4_journal_stop(handle);
5100 page_cache_release(page);
5101+ /*
5102+ * block_write_begin may have instantiated a few blocks
5103+ * outside i_size. Trim these off again. Don't need
5104+ * i_size_read because we hold i_mutex.
5105+ */
5106+ if (pos + len > inode->i_size)
5107+ vmtruncate(inode, inode->i_size);
5108 }
5109
5110 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
5111@@ -2458,7 +2639,7 @@ static int ext4_da_should_update_i_disks
5112 bh = page_buffers(page);
5113 idx = offset >> inode->i_blkbits;
5114
5115- for (i=0; i < idx; i++)
5116+ for (i = 0; i < idx; i++)
5117 bh = bh->b_this_page;
5118
5119 if (!buffer_mapped(bh) || (buffer_delay(bh)))
5120@@ -2476,9 +2657,22 @@ static int ext4_da_write_end(struct file
5121 handle_t *handle = ext4_journal_current_handle();
5122 loff_t new_i_size;
5123 unsigned long start, end;
5124+ int write_mode = (int)(unsigned long)fsdata;
5125+
5126+ if (write_mode == FALL_BACK_TO_NONDELALLOC) {
5127+ if (ext4_should_order_data(inode)) {
5128+ return ext4_ordered_write_end(file, mapping, pos,
5129+ len, copied, page, fsdata);
5130+ } else if (ext4_should_writeback_data(inode)) {
5131+ return ext4_writeback_write_end(file, mapping, pos,
5132+ len, copied, page, fsdata);
5133+ } else {
5134+ BUG();
5135+ }
5136+ }
5137
5138 start = pos & (PAGE_CACHE_SIZE - 1);
5139- end = start + copied -1;
5140+ end = start + copied - 1;
5141
5142 /*
5143 * generic_write_end() will run mark_inode_dirty() if i_size
5144@@ -2502,6 +2696,11 @@ static int ext4_da_write_end(struct file
5145 EXT4_I(inode)->i_disksize = new_i_size;
5146 }
5147 up_write(&EXT4_I(inode)->i_data_sem);
5148+ /* We need to mark inode dirty even if
5149+ * new_i_size is less that inode->i_size
5150+ * bu greater than i_disksize.(hint delalloc)
5151+ */
5152+ ext4_mark_inode_dirty(handle, inode);
5153 }
5154 }
5155 ret2 = generic_write_end(file, mapping, pos, len, copied,
5156@@ -2593,7 +2792,7 @@ static sector_t ext4_bmap(struct address
5157 return 0;
5158 }
5159
5160- return generic_block_bmap(mapping,block,ext4_get_block);
5161+ return generic_block_bmap(mapping, block, ext4_get_block);
5162 }
5163
5164 static int bget_one(handle_t *handle, struct buffer_head *bh)
5165@@ -3199,7 +3398,7 @@ static Indirect *ext4_find_shared(struct
5166 if (!partial->key && *partial->p)
5167 /* Writer: end */
5168 goto no_top;
5169- for (p=partial; p>chain && all_zeroes((__le32*)p->bh->b_data,p->p); p--)
5170+ for (p = partial; (p > chain) && all_zeroes((__le32 *) p->bh->b_data, p->p); p--)
5171 ;
5172 /*
5173 * OK, we've found the last block that must survive. The rest of our
5174@@ -3218,7 +3417,7 @@ static Indirect *ext4_find_shared(struct
5175 }
5176 /* Writer: end */
5177
5178- while(partial > p) {
5179+ while (partial > p) {
5180 brelse(partial->bh);
5181 partial--;
5182 }
5183@@ -3410,9 +3609,9 @@ static void ext4_free_branches(handle_t
5184 /* This zaps the entire block. Bottom up. */
5185 BUFFER_TRACE(bh, "free child branches");
5186 ext4_free_branches(handle, inode, bh,
5187- (__le32*)bh->b_data,
5188- (__le32*)bh->b_data + addr_per_block,
5189- depth);
5190+ (__le32 *) bh->b_data,
5191+ (__le32 *) bh->b_data + addr_per_block,
5192+ depth);
5193
5194 /*
5195 * We've probably journalled the indirect block several
5196@@ -3580,7 +3779,7 @@ void ext4_truncate(struct inode *inode)
5197 */
5198 down_write(&ei->i_data_sem);
5199
5200- ext4_discard_reservation(inode);
5201+ ext4_discard_preallocations(inode);
5202
5203 /*
5204 * The orphan list entry will now protect us from any crash which
5205@@ -3675,41 +3874,6 @@ out_stop:
5206 ext4_journal_stop(handle);
5207 }
5208
5209-static ext4_fsblk_t ext4_get_inode_block(struct super_block *sb,
5210- unsigned long ino, struct ext4_iloc *iloc)
5211-{
5212- ext4_group_t block_group;
5213- unsigned long offset;
5214- ext4_fsblk_t block;
5215- struct ext4_group_desc *gdp;
5216-
5217- if (!ext4_valid_inum(sb, ino)) {
5218- /*
5219- * This error is already checked for in namei.c unless we are
5220- * looking at an NFS filehandle, in which case no error
5221- * report is needed
5222- */
5223- return 0;
5224- }
5225-
5226- block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb);
5227- gdp = ext4_get_group_desc(sb, block_group, NULL);
5228- if (!gdp)
5229- return 0;
5230-
5231- /*
5232- * Figure out the offset within the block group inode table
5233- */
5234- offset = ((ino - 1) % EXT4_INODES_PER_GROUP(sb)) *
5235- EXT4_INODE_SIZE(sb);
5236- block = ext4_inode_table(sb, gdp) +
5237- (offset >> EXT4_BLOCK_SIZE_BITS(sb));
5238-
5239- iloc->block_group = block_group;
5240- iloc->offset = offset & (EXT4_BLOCK_SIZE(sb) - 1);
5241- return block;
5242-}
5243-
5244 /*
5245 * ext4_get_inode_loc returns with an extra refcount against the inode's
5246 * underlying buffer_head on success. If 'in_mem' is true, we have all
5247@@ -3719,19 +3883,35 @@ static ext4_fsblk_t ext4_get_inode_block
5248 static int __ext4_get_inode_loc(struct inode *inode,
5249 struct ext4_iloc *iloc, int in_mem)
5250 {
5251- ext4_fsblk_t block;
5252- struct buffer_head *bh;
5253+ struct ext4_group_desc *gdp;
5254+ struct buffer_head *bh;
5255+ struct super_block *sb = inode->i_sb;
5256+ ext4_fsblk_t block;
5257+ int inodes_per_block, inode_offset;
5258+
5259+ iloc->bh = NULL;
5260+ if (!ext4_valid_inum(sb, inode->i_ino))
5261+ return -EIO;
5262
5263- block = ext4_get_inode_block(inode->i_sb, inode->i_ino, iloc);
5264- if (!block)
5265+ iloc->block_group = (inode->i_ino - 1) / EXT4_INODES_PER_GROUP(sb);
5266+ gdp = ext4_get_group_desc(sb, iloc->block_group, NULL);
5267+ if (!gdp)
5268 return -EIO;
5269
5270- bh = sb_getblk(inode->i_sb, block);
5271+ /*
5272+ * Figure out the offset within the block group inode table
5273+ */
5274+ inodes_per_block = (EXT4_BLOCK_SIZE(sb) / EXT4_INODE_SIZE(sb));
5275+ inode_offset = ((inode->i_ino - 1) %
5276+ EXT4_INODES_PER_GROUP(sb));
5277+ block = ext4_inode_table(sb, gdp) + (inode_offset / inodes_per_block);
5278+ iloc->offset = (inode_offset % inodes_per_block) * EXT4_INODE_SIZE(sb);
5279+
5280+ bh = sb_getblk(sb, block);
5281 if (!bh) {
5282- ext4_error (inode->i_sb, "ext4_get_inode_loc",
5283- "unable to read inode block - "
5284- "inode=%lu, block=%llu",
5285- inode->i_ino, block);
5286+ ext4_error(sb, "ext4_get_inode_loc", "unable to read "
5287+ "inode block - inode=%lu, block=%llu",
5288+ inode->i_ino, block);
5289 return -EIO;
5290 }
5291 if (!buffer_uptodate(bh)) {
5292@@ -3759,28 +3939,12 @@ static int __ext4_get_inode_loc(struct i
5293 */
5294 if (in_mem) {
5295 struct buffer_head *bitmap_bh;
5296- struct ext4_group_desc *desc;
5297- int inodes_per_buffer;
5298- int inode_offset, i;
5299- ext4_group_t block_group;
5300- int start;
5301-
5302- block_group = (inode->i_ino - 1) /
5303- EXT4_INODES_PER_GROUP(inode->i_sb);
5304- inodes_per_buffer = bh->b_size /
5305- EXT4_INODE_SIZE(inode->i_sb);
5306- inode_offset = ((inode->i_ino - 1) %
5307- EXT4_INODES_PER_GROUP(inode->i_sb));
5308- start = inode_offset & ~(inodes_per_buffer - 1);
5309+ int i, start;
5310
5311- /* Is the inode bitmap in cache? */
5312- desc = ext4_get_group_desc(inode->i_sb,
5313- block_group, NULL);
5314- if (!desc)
5315- goto make_io;
5316+ start = inode_offset & ~(inodes_per_block - 1);
5317
5318- bitmap_bh = sb_getblk(inode->i_sb,
5319- ext4_inode_bitmap(inode->i_sb, desc));
5320+ /* Is the inode bitmap in cache? */
5321+ bitmap_bh = sb_getblk(sb, ext4_inode_bitmap(sb, gdp));
5322 if (!bitmap_bh)
5323 goto make_io;
5324
5325@@ -3793,14 +3957,14 @@ static int __ext4_get_inode_loc(struct i
5326 brelse(bitmap_bh);
5327 goto make_io;
5328 }
5329- for (i = start; i < start + inodes_per_buffer; i++) {
5330+ for (i = start; i < start + inodes_per_block; i++) {
5331 if (i == inode_offset)
5332 continue;
5333 if (ext4_test_bit(i, bitmap_bh->b_data))
5334 break;
5335 }
5336 brelse(bitmap_bh);
5337- if (i == start + inodes_per_buffer) {
5338+ if (i == start + inodes_per_block) {
5339 /* all other inodes are free, so skip I/O */
5340 memset(bh->b_data, 0, bh->b_size);
5341 set_buffer_uptodate(bh);
5342@@ -3811,6 +3975,36 @@ static int __ext4_get_inode_loc(struct i
5343
5344 make_io:
5345 /*
5346+ * If we need to do any I/O, try to pre-readahead extra
5347+ * blocks from the inode table.
5348+ */
5349+ if (EXT4_SB(sb)->s_inode_readahead_blks) {
5350+ ext4_fsblk_t b, end, table;
5351+ unsigned num;
5352+
5353+ table = ext4_inode_table(sb, gdp);
5354+ /* Make sure s_inode_readahead_blks is a power of 2 */
5355+ while (EXT4_SB(sb)->s_inode_readahead_blks &
5356+ (EXT4_SB(sb)->s_inode_readahead_blks-1))
5357+ EXT4_SB(sb)->s_inode_readahead_blks =
5358+ (EXT4_SB(sb)->s_inode_readahead_blks &
5359+ (EXT4_SB(sb)->s_inode_readahead_blks-1));
5360+ b = block & ~(EXT4_SB(sb)->s_inode_readahead_blks-1);
5361+ if (table > b)
5362+ b = table;
5363+ end = b + EXT4_SB(sb)->s_inode_readahead_blks;
5364+ num = EXT4_INODES_PER_GROUP(sb);
5365+ if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
5366+ EXT4_FEATURE_RO_COMPAT_GDT_CSUM))
5367+ num -= ext4_itable_unused_count(sb, gdp);
5368+ table += num / inodes_per_block;
5369+ if (end > table)
5370+ end = table;
5371+ while (b <= end)
5372+ sb_breadahead(sb, b++);
5373+ }
5374+
5375+ /*
5376 * There are other valid inodes in the buffer, this inode
5377 * has in-inode xattrs, or we don't have this inode in memory.
5378 * Read the block from disk.
5379@@ -3820,10 +4014,9 @@ make_io:
5380 submit_bh(READ_META, bh);
5381 wait_on_buffer(bh);
5382 if (!buffer_uptodate(bh)) {
5383- ext4_error(inode->i_sb, "ext4_get_inode_loc",
5384- "unable to read inode block - "
5385- "inode=%lu, block=%llu",
5386- inode->i_ino, block);
5387+ ext4_error(sb, __func__,
5388+ "unable to read inode block - inode=%lu, "
5389+ "block=%llu", inode->i_ino, block);
5390 brelse(bh);
5391 return -EIO;
5392 }
5393@@ -3915,11 +4108,10 @@ struct inode *ext4_iget(struct super_blo
5394 return inode;
5395
5396 ei = EXT4_I(inode);
5397-#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL
5398+#if defined(CONFIG_EXT4_FS_POSIX_ACL) || defined(CONFIG_EXT4DEV_FS_POSIX_ACL)
5399 ei->i_acl = EXT4_ACL_NOT_CACHED;
5400 ei->i_default_acl = EXT4_ACL_NOT_CACHED;
5401 #endif
5402- ei->i_block_alloc_info = NULL;
5403
5404 ret = __ext4_get_inode_loc(inode, &iloc, 0);
5405 if (ret < 0)
5406@@ -3929,7 +4121,7 @@ struct inode *ext4_iget(struct super_blo
5407 inode->i_mode = le16_to_cpu(raw_inode->i_mode);
5408 inode->i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low);
5409 inode->i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low);
5410- if(!(test_opt (inode->i_sb, NO_UID32))) {
5411+ if (!(test_opt(inode->i_sb, NO_UID32))) {
5412 inode->i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16;
5413 inode->i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16;
5414 }
5415@@ -3947,7 +4139,7 @@ struct inode *ext4_iget(struct super_blo
5416 if (inode->i_mode == 0 ||
5417 !(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_ORPHAN_FS)) {
5418 /* this inode is deleted */
5419- brelse (bh);
5420+ brelse(bh);
5421 ret = -ESTALE;
5422 goto bad_inode;
5423 }
5424@@ -3980,7 +4172,7 @@ struct inode *ext4_iget(struct super_blo
5425 ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize);
5426 if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize >
5427 EXT4_INODE_SIZE(inode->i_sb)) {
5428- brelse (bh);
5429+ brelse(bh);
5430 ret = -EIO;
5431 goto bad_inode;
5432 }
5433@@ -4033,7 +4225,7 @@ struct inode *ext4_iget(struct super_blo
5434 init_special_inode(inode, inode->i_mode,
5435 new_decode_dev(le32_to_cpu(raw_inode->i_block[1])));
5436 }
5437- brelse (iloc.bh);
5438+ brelse(iloc.bh);
5439 ext4_set_inode_flags(inode);
5440 unlock_new_inode(inode);
5441 return inode;
5442@@ -4050,7 +4242,6 @@ static int ext4_inode_blocks_set(handle_
5443 struct inode *inode = &(ei->vfs_inode);
5444 u64 i_blocks = inode->i_blocks;
5445 struct super_block *sb = inode->i_sb;
5446- int err = 0;
5447
5448 if (i_blocks <= ~0U) {
5449 /*
5450@@ -4060,36 +4251,27 @@ static int ext4_inode_blocks_set(handle_
5451 raw_inode->i_blocks_lo = cpu_to_le32(i_blocks);
5452 raw_inode->i_blocks_high = 0;
5453 ei->i_flags &= ~EXT4_HUGE_FILE_FL;
5454- } else if (i_blocks <= 0xffffffffffffULL) {
5455+ return 0;
5456+ }
5457+ if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_HUGE_FILE))
5458+ return -EFBIG;
5459+
5460+ if (i_blocks <= 0xffffffffffffULL) {
5461 /*
5462 * i_blocks can be represented in a 48 bit variable
5463 * as multiple of 512 bytes
5464 */
5465- err = ext4_update_rocompat_feature(handle, sb,
5466- EXT4_FEATURE_RO_COMPAT_HUGE_FILE);
5467- if (err)
5468- goto err_out;
5469- /* i_block is stored in the split 48 bit fields */
5470 raw_inode->i_blocks_lo = cpu_to_le32(i_blocks);
5471 raw_inode->i_blocks_high = cpu_to_le16(i_blocks >> 32);
5472 ei->i_flags &= ~EXT4_HUGE_FILE_FL;
5473 } else {
5474- /*
5475- * i_blocks should be represented in a 48 bit variable
5476- * as multiple of file system block size
5477- */
5478- err = ext4_update_rocompat_feature(handle, sb,
5479- EXT4_FEATURE_RO_COMPAT_HUGE_FILE);
5480- if (err)
5481- goto err_out;
5482 ei->i_flags |= EXT4_HUGE_FILE_FL;
5483 /* i_block is stored in file system block size */
5484 i_blocks = i_blocks >> (inode->i_blkbits - 9);
5485 raw_inode->i_blocks_lo = cpu_to_le32(i_blocks);
5486 raw_inode->i_blocks_high = cpu_to_le16(i_blocks >> 32);
5487 }
5488-err_out:
5489- return err;
5490+ return 0;
5491 }
5492
5493 /*
5494@@ -4115,14 +4297,14 @@ static int ext4_do_update_inode(handle_t
5495
5496 ext4_get_inode_flags(ei);
5497 raw_inode->i_mode = cpu_to_le16(inode->i_mode);
5498- if(!(test_opt(inode->i_sb, NO_UID32))) {
5499+ if (!(test_opt(inode->i_sb, NO_UID32))) {
5500 raw_inode->i_uid_low = cpu_to_le16(low_16_bits(inode->i_uid));
5501 raw_inode->i_gid_low = cpu_to_le16(low_16_bits(inode->i_gid));
5502 /*
5503 * Fix up interoperability with old kernels. Otherwise, old inodes get
5504 * re-used with the upper 16 bits of the uid/gid intact
5505 */
5506- if(!ei->i_dtime) {
5507+ if (!ei->i_dtime) {
5508 raw_inode->i_uid_high =
5509 cpu_to_le16(high_16_bits(inode->i_uid));
5510 raw_inode->i_gid_high =
5511@@ -4210,7 +4392,7 @@ static int ext4_do_update_inode(handle_t
5512 ei->i_state &= ~EXT4_STATE_NEW;
5513
5514 out_brelse:
5515- brelse (bh);
5516+ brelse(bh);
5517 ext4_std_error(inode->i_sb, err);
5518 return err;
5519 }
5520@@ -4814,6 +4996,7 @@ int ext4_page_mkwrite(struct vm_area_str
5521 loff_t size;
5522 unsigned long len;
5523 int ret = -EINVAL;
5524+ void *fsdata;
5525 struct file *file = vma->vm_file;
5526 struct inode *inode = file->f_path.dentry->d_inode;
5527 struct address_space *mapping = inode->i_mapping;
5528@@ -4852,11 +5035,11 @@ int ext4_page_mkwrite(struct vm_area_str
5529 * on the same page though
5530 */
5531 ret = mapping->a_ops->write_begin(file, mapping, page_offset(page),
5532- len, AOP_FLAG_UNINTERRUPTIBLE, &page, NULL);
5533+ len, AOP_FLAG_UNINTERRUPTIBLE, &page, &fsdata);
5534 if (ret < 0)
5535 goto out_unlock;
5536 ret = mapping->a_ops->write_end(file, mapping, page_offset(page),
5537- len, len, page, NULL);
5538+ len, len, page, fsdata);
5539 if (ret < 0)
5540 goto out_unlock;
5541 ret = 0;
5542diff -rup b/fs/ext4//ioctl.c a/fs/ext4///ioctl.c
5543--- b/fs/ext4/ioctl.c 2009-02-11 14:37:58.000000000 +0100
5544+++ a/fs/ext4/ioctl.c 2009-02-10 21:40:11.000000000 +0100
5545@@ -23,9 +23,8 @@ long ext4_ioctl(struct file *filp, unsig
5546 struct inode *inode = filp->f_dentry->d_inode;
5547 struct ext4_inode_info *ei = EXT4_I(inode);
5548 unsigned int flags;
5549- unsigned short rsv_window_size;
5550
5551- ext4_debug ("cmd = %u, arg = %lu\n", cmd, arg);
5552+ ext4_debug("cmd = %u, arg = %lu\n", cmd, arg);
5553
5554 switch (cmd) {
5555 case EXT4_IOC_GETFLAGS:
5556@@ -34,7 +33,7 @@ long ext4_ioctl(struct file *filp, unsig
5557 return put_user(flags, (int __user *) arg);
5558 case EXT4_IOC_SETFLAGS: {
5559 handle_t *handle = NULL;
5560- int err;
5561+ int err, migrate = 0;
5562 struct ext4_iloc iloc;
5563 unsigned int oldflags;
5564 unsigned int jflag;
5565@@ -82,6 +81,17 @@ long ext4_ioctl(struct file *filp, unsig
5566 if (!capable(CAP_SYS_RESOURCE))
5567 goto flags_out;
5568 }
5569+ if (oldflags & EXT4_EXTENTS_FL) {
5570+ /* We don't support clearning extent flags */
5571+ if (!(flags & EXT4_EXTENTS_FL)) {
5572+ err = -EOPNOTSUPP;
5573+ goto flags_out;
5574+ }
5575+ } else if (flags & EXT4_EXTENTS_FL) {
5576+ /* migrate the file */
5577+ migrate = 1;
5578+ flags &= ~EXT4_EXTENTS_FL;
5579+ }
5580
5581 handle = ext4_journal_start(inode, 1);
5582 if (IS_ERR(handle)) {
5583@@ -109,6 +119,10 @@ flags_err:
5584
5585 if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL))
5586 err = ext4_change_inode_journal_flag(inode, jflag);
5587+ if (err)
5588+ goto flags_out;
5589+ if (migrate)
5590+ err = ext4_ext_migrate(inode);
5591 flags_out:
5592 mutex_unlock(&inode->i_mutex);
5593 mnt_drop_write(filp->f_path.mnt);
5594@@ -175,53 +189,10 @@ setversion_out:
5595 return ret;
5596 }
5597 #endif
5598- case EXT4_IOC_GETRSVSZ:
5599- if (test_opt(inode->i_sb, RESERVATION)
5600- && S_ISREG(inode->i_mode)
5601- && ei->i_block_alloc_info) {
5602- rsv_window_size = ei->i_block_alloc_info->rsv_window_node.rsv_goal_size;
5603- return put_user(rsv_window_size, (int __user *)arg);
5604- }
5605- return -ENOTTY;
5606- case EXT4_IOC_SETRSVSZ: {
5607- int err;
5608-
5609- if (!test_opt(inode->i_sb, RESERVATION) ||!S_ISREG(inode->i_mode))
5610- return -ENOTTY;
5611-
5612- if (!is_owner_or_cap(inode))
5613- return -EACCES;
5614-
5615- if (get_user(rsv_window_size, (int __user *)arg))
5616- return -EFAULT;
5617-
5618- err = mnt_want_write(filp->f_path.mnt);
5619- if (err)
5620- return err;
5621-
5622- if (rsv_window_size > EXT4_MAX_RESERVE_BLOCKS)
5623- rsv_window_size = EXT4_MAX_RESERVE_BLOCKS;
5624-
5625- /*
5626- * need to allocate reservation structure for this inode
5627- * before set the window size
5628- */
5629- down_write(&ei->i_data_sem);
5630- if (!ei->i_block_alloc_info)
5631- ext4_init_block_alloc_info(inode);
5632-
5633- if (ei->i_block_alloc_info){
5634- struct ext4_reserve_window_node *rsv = &ei->i_block_alloc_info->rsv_window_node;
5635- rsv->rsv_goal_size = rsv_window_size;
5636- }
5637- up_write(&ei->i_data_sem);
5638- mnt_drop_write(filp->f_path.mnt);
5639- return 0;
5640- }
5641 case EXT4_IOC_GROUP_EXTEND: {
5642 ext4_fsblk_t n_blocks_count;
5643 struct super_block *sb = inode->i_sb;
5644- int err;
5645+ int err, err2;
5646
5647 if (!capable(CAP_SYS_RESOURCE))
5648 return -EPERM;
5649@@ -235,8 +206,10 @@ setversion_out:
5650
5651 err = ext4_group_extend(sb, EXT4_SB(sb)->s_es, n_blocks_count);
5652 jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
5653- jbd2_journal_flush(EXT4_SB(sb)->s_journal);
5654+ err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal);
5655 jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
5656+ if (err == 0)
5657+ err = err2;
5658 mnt_drop_write(filp->f_path.mnt);
5659
5660 return err;
5661@@ -244,7 +217,7 @@ setversion_out:
5662 case EXT4_IOC_GROUP_ADD: {
5663 struct ext4_new_group_data input;
5664 struct super_block *sb = inode->i_sb;
5665- int err;
5666+ int err, err2;
5667
5668 if (!capable(CAP_SYS_RESOURCE))
5669 return -EPERM;
5670@@ -259,8 +232,10 @@ setversion_out:
5671
5672 err = ext4_group_add(sb, &input);
5673 jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
5674- jbd2_journal_flush(EXT4_SB(sb)->s_journal);
5675+ err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal);
5676 jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
5677+ if (err == 0)
5678+ err = err2;
5679 mnt_drop_write(filp->f_path.mnt);
5680
5681 return err;
5682diff -rup b/fs/ext4//mballoc.c a/fs/ext4///mballoc.c
5683--- b/fs/ext4/mballoc.c 2009-02-11 14:37:58.000000000 +0100
5684+++ a/fs/ext4/mballoc.c 2009-02-10 21:40:11.000000000 +0100
5685@@ -100,7 +100,7 @@
5686 * inode as:
5687 *
5688 * { page }
5689- * [ group 0 buddy][ group 0 bitmap] [group 1][ group 1]...
5690+ * [ group 0 bitmap][ group 0 buddy] [group 1][ group 1]...
5691 *
5692 *
5693 * one block each for bitmap and buddy information. So for each group we
5694@@ -330,6 +330,20 @@
5695 * object
5696 *
5697 */
5698+static struct kmem_cache *ext4_pspace_cachep;
5699+static struct kmem_cache *ext4_ac_cachep;
5700+static struct kmem_cache *ext4_free_ext_cachep;
5701+static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
5702+ ext4_group_t group);
5703+static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
5704+ ext4_group_t group);
5705+static int ext4_mb_init_per_dev_proc(struct super_block *sb);
5706+static int ext4_mb_destroy_per_dev_proc(struct super_block *sb);
5707+static void ext4_mb_poll_new_transaction(struct super_block *, handle_t *);
5708+static void ext4_mb_free_committed_blocks(struct super_block *);
5709+
5710+
5711+
5712
5713 static inline void *mb_correct_addr_and_bit(int *bit, void *addr)
5714 {
5715@@ -445,9 +459,9 @@ static void mb_free_blocks_double(struct
5716 blocknr += first + i;
5717 blocknr +=
5718 le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
5719-
5720- ext4_error(sb, __func__, "double-free of inode"
5721- " %lu's block %llu(bit %u in group %lu)\n",
5722+ ext4_grp_locked_error(sb, e4b->bd_group,
5723+ __func__, "double-free of inode"
5724+ " %lu's block %llu(bit %u in group %u)\n",
5725 inode ? inode->i_ino : 0, blocknr,
5726 first + i, e4b->bd_group);
5727 }
5728@@ -477,9 +491,10 @@ static void mb_cmp_bitmaps(struct ext4_b
5729 b2 = (unsigned char *) bitmap;
5730 for (i = 0; i < e4b->bd_sb->s_blocksize; i++) {
5731 if (b1[i] != b2[i]) {
5732- printk("corruption in group %lu at byte %u(%u):"
5733- " %x in copy != %x on disk/prealloc\n",
5734- e4b->bd_group, i, i * 8, b1[i], b2[i]);
5735+ printk(KERN_ERR "corruption in group %u "
5736+ "at byte %u(%u): %x in copy != %x "
5737+ "on disk/prealloc\n",
5738+ e4b->bd_group, i, i * 8, b1[i], b2[i]);
5739 BUG();
5740 }
5741 }
5742@@ -533,9 +548,6 @@ static int __mb_check_buddy(struct ext4_
5743 void *buddy;
5744 void *buddy2;
5745
5746- if (!test_opt(sb, MBALLOC))
5747- return 0;
5748-
5749 {
5750 static int mb_check_counter;
5751 if (mb_check_counter++ % 100 != 0)
5752@@ -692,8 +704,8 @@ static void ext4_mb_generate_buddy(struc
5753 grp->bb_fragments = fragments;
5754
5755 if (free != grp->bb_free) {
5756- ext4_error(sb, __func__,
5757- "EXT4-fs: group %lu: %u blocks in bitmap, %u in gd\n",
5758+ ext4_grp_locked_error(sb, group, __func__,
5759+ "EXT4-fs: group %u: %u blocks in bitmap, %u in gd\n",
5760 group, free, grp->bb_free);
5761 /*
5762 * If we intent to continue, we consider group descritor
5763@@ -718,7 +730,7 @@ static void ext4_mb_generate_buddy(struc
5764 * stored in the inode as
5765 *
5766 * { page }
5767- * [ group 0 buddy][ group 0 bitmap] [group 1][ group 1]...
5768+ * [ group 0 bitmap][ group 0 buddy] [group 1][ group 1]...
5769 *
5770 *
5771 * one block each for bitmap and buddy information.
5772@@ -784,23 +796,45 @@ static int ext4_mb_init_cache(struct pag
5773 if (bh[i] == NULL)
5774 goto out;
5775
5776- if (bh_uptodate_or_lock(bh[i]))
5777+ if (bitmap_uptodate(bh[i]))
5778 continue;
5779
5780+ lock_buffer(bh[i]);
5781+ if (bitmap_uptodate(bh[i])) {
5782+ unlock_buffer(bh[i]);
5783+ continue;
5784+ }
5785 spin_lock(sb_bgl_lock(EXT4_SB(sb), first_group + i));
5786 if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
5787 ext4_init_block_bitmap(sb, bh[i],
5788 first_group + i, desc);
5789+ set_bitmap_uptodate(bh[i]);
5790 set_buffer_uptodate(bh[i]);
5791- unlock_buffer(bh[i]);
5792 spin_unlock(sb_bgl_lock(EXT4_SB(sb), first_group + i));
5793+ unlock_buffer(bh[i]);
5794 continue;
5795 }
5796 spin_unlock(sb_bgl_lock(EXT4_SB(sb), first_group + i));
5797+ if (buffer_uptodate(bh[i])) {
5798+ /*
5799+ * if not uninit if bh is uptodate,
5800+ * bitmap is also uptodate
5801+ */
5802+ set_bitmap_uptodate(bh[i]);
5803+ unlock_buffer(bh[i]);
5804+ continue;
5805+ }
5806 get_bh(bh[i]);
5807+ /*
5808+ * submit the buffer_head for read. We can
5809+ * safely mark the bitmap as uptodate now.
5810+ * We do it here so the bitmap uptodate bit
5811+ * get set with buffer lock held.
5812+ */
5813+ set_bitmap_uptodate(bh[i]);
5814 bh[i]->b_end_io = end_buffer_read_sync;
5815 submit_bh(READ, bh[i]);
5816- mb_debug("read bitmap for group %lu\n", first_group + i);
5817+ mb_debug("read bitmap for group %u\n", first_group + i);
5818 }
5819
5820 /* wait for I/O completion */
5821@@ -814,6 +848,8 @@ static int ext4_mb_init_cache(struct pag
5822
5823 err = 0;
5824 first_block = page->index * blocks_per_page;
5825+ /* init the page */
5826+ memset(page_address(page), 0xff, PAGE_CACHE_SIZE);
5827 for (i = 0; i < blocks_per_page; i++) {
5828 int group;
5829 struct ext4_group_info *grinfo;
5830@@ -840,7 +876,6 @@ static int ext4_mb_init_cache(struct pag
5831 BUG_ON(incore == NULL);
5832 mb_debug("put buddy for group %u in page %lu/%x\n",
5833 group, page->index, i * blocksize);
5834- memset(data, 0xff, blocksize);
5835 grinfo = ext4_get_group_info(sb, group);
5836 grinfo->bb_fragments = 0;
5837 memset(grinfo->bb_counters, 0,
5838@@ -848,7 +883,9 @@ static int ext4_mb_init_cache(struct pag
5839 /*
5840 * incore got set to the group block bitmap below
5841 */
5842+ ext4_lock_group(sb, group);
5843 ext4_mb_generate_buddy(sb, data, incore, group);
5844+ ext4_unlock_group(sb, group);
5845 incore = NULL;
5846 } else {
5847 /* this is block of bitmap */
5848@@ -862,6 +899,7 @@ static int ext4_mb_init_cache(struct pag
5849
5850 /* mark all preallocated blks used in in-core bitmap */
5851 ext4_mb_generate_from_pa(sb, data, group);
5852+ ext4_mb_generate_from_freelist(sb, data, group);
5853 ext4_unlock_group(sb, group);
5854
5855 /* set incore so that the buddy information can be
5856@@ -885,19 +923,22 @@ out:
5857 static noinline_for_stack int
5858 ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
5859 struct ext4_buddy *e4b)
5860+__acquires(e4b->alloc_semp)
5861 {
5862- struct ext4_sb_info *sbi = EXT4_SB(sb);
5863- struct inode *inode = sbi->s_buddy_cache;
5864 int blocks_per_page;
5865 int block;
5866 int pnum;
5867 int poff;
5868 struct page *page;
5869 int ret;
5870+ struct ext4_group_info *grp;
5871+ struct ext4_sb_info *sbi = EXT4_SB(sb);
5872+ struct inode *inode = sbi->s_buddy_cache;
5873
5874- mb_debug("load group %lu\n", group);
5875+ mb_debug("load group %u\n", group);
5876
5877 blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
5878+ grp = ext4_get_group_info(sb, group);
5879
5880 e4b->bd_blkbits = sb->s_blocksize_bits;
5881 e4b->bd_info = ext4_get_group_info(sb, group);
5882@@ -905,6 +946,16 @@ ext4_mb_load_buddy(struct super_block *s
5883 e4b->bd_group = group;
5884 e4b->bd_buddy_page = NULL;
5885 e4b->bd_bitmap_page = NULL;
5886+ e4b->alloc_semp = &grp->alloc_sem;
5887+
5888+ /* Take the read lock on the group alloc
5889+ * sem. This would make sure a parallel
5890+ * ext4_mb_init_group happening on other
5891+ * groups mapped by the page is blocked
5892+ * till we are done with allocation
5893+ */
5894+ down_read(e4b->alloc_semp);
5895+ __acquire(e4b->alloc_semp);
5896
5897 /*
5898 * the buddy cache inode stores the block bitmap
5899@@ -920,6 +971,14 @@ ext4_mb_load_buddy(struct super_block *s
5900 page = find_get_page(inode->i_mapping, pnum);
5901 if (page == NULL || !PageUptodate(page)) {
5902 if (page)
5903+ /*
5904+ * drop the page reference and try
5905+ * to get the page with lock. If we
5906+ * are not uptodate that implies
5907+ * somebody just created the page but
5908+ * is yet to initialize the same. So
5909+ * wait for it to initialize.
5910+ */
5911 page_cache_release(page);
5912 page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
5913 if (page) {
5914@@ -985,15 +1044,23 @@ err:
5915 page_cache_release(e4b->bd_buddy_page);
5916 e4b->bd_buddy = NULL;
5917 e4b->bd_bitmap = NULL;
5918+
5919+ /* Done with the buddy cache */
5920+ up_read(e4b->alloc_semp);
5921 return ret;
5922 }
5923
5924 static void ext4_mb_release_desc(struct ext4_buddy *e4b)
5925+__releases(e4b->alloc_semp)
5926 {
5927 if (e4b->bd_bitmap_page)
5928 page_cache_release(e4b->bd_bitmap_page);
5929 if (e4b->bd_buddy_page)
5930 page_cache_release(e4b->bd_buddy_page);
5931+ /* Done with the buddy cache */
5932+ if (e4b->alloc_semp)
5933+ up_read(e4b->alloc_semp);
5934+ __release(e4b->alloc_semp);
5935 }
5936
5937
5938@@ -1031,7 +1098,10 @@ static void mb_clear_bits(spinlock_t *lo
5939 cur += 32;
5940 continue;
5941 }
5942- mb_clear_bit_atomic(lock, cur, bm);
5943+ if (lock)
5944+ mb_clear_bit_atomic(lock, cur, bm);
5945+ else
5946+ mb_clear_bit(cur, bm);
5947 cur++;
5948 }
5949 }
5950@@ -1049,7 +1119,10 @@ static void mb_set_bits(spinlock_t *lock
5951 cur += 32;
5952 continue;
5953 }
5954- mb_set_bit_atomic(lock, cur, bm);
5955+ if (lock)
5956+ mb_set_bit_atomic(lock, cur, bm);
5957+ else
5958+ mb_set_bit(cur, bm);
5959 cur++;
5960 }
5961 }
5962@@ -1094,12 +1167,11 @@ static void mb_free_blocks(struct inode
5963 blocknr += block;
5964 blocknr +=
5965 le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
5966- ext4_unlock_group(sb, e4b->bd_group);
5967- ext4_error(sb, __func__, "double-free of inode"
5968- " %lu's block %llu(bit %u in group %lu)\n",
5969+ ext4_grp_locked_error(sb, e4b->bd_group,
5970+ __func__, "double-free of inode"
5971+ " %lu's block %llu(bit %u in group %u)\n",
5972 inode ? inode->i_ino : 0, blocknr, block,
5973 e4b->bd_group);
5974- ext4_lock_group(sb, e4b->bd_group);
5975 }
5976 mb_clear_bit(block, EXT4_MB_BITMAP(e4b));
5977 e4b->bd_info->bb_counters[order]++;
5978@@ -1296,13 +1368,20 @@ static void ext4_mb_use_best_found(struc
5979 ac->ac_tail = ret & 0xffff;
5980 ac->ac_buddy = ret >> 16;
5981
5982- /* XXXXXXX: SUCH A HORRIBLE **CK */
5983- /*FIXME!! Why ? */
5984+ /*
5985+ * take the page reference. We want the page to be pinned
5986+ * so that we don't get a ext4_mb_init_cache_call for this
5987+ * group until we update the bitmap. That would mean we
5988+ * double allocate blocks. The reference is dropped
5989+ * in ext4_mb_release_context
5990+ */
5991 ac->ac_bitmap_page = e4b->bd_bitmap_page;
5992 get_page(ac->ac_bitmap_page);
5993 ac->ac_buddy_page = e4b->bd_buddy_page;
5994 get_page(ac->ac_buddy_page);
5995-
5996+ /* on allocation we use ac to track the held semaphore */
5997+ ac->alloc_semp = e4b->alloc_semp;
5998+ e4b->alloc_semp = NULL;
5999 /* store last allocated for subsequent stream allocation */
6000 if ((ac->ac_flags & EXT4_MB_HINT_DATA)) {
6001 spin_lock(&sbi->s_md_lock);
6002@@ -1433,8 +1512,10 @@ static int ext4_mb_try_best_found(struct
6003
6004 BUG_ON(ex.fe_len <= 0);
6005 err = ext4_mb_load_buddy(ac->ac_sb, group, e4b);
6006- if (err)
6007+ if (err) {
6008+ __release(e4b->alloc_semp);
6009 return err;
6010+ }
6011
6012 ext4_lock_group(ac->ac_sb, group);
6013 max = mb_find_extent(e4b, 0, ex.fe_start, ex.fe_len, &ex);
6014@@ -1464,8 +1545,10 @@ static int ext4_mb_find_by_goal(struct e
6015 return 0;
6016
6017 err = ext4_mb_load_buddy(ac->ac_sb, group, e4b);
6018- if (err)
6019+ if (err) {
6020+ __release(e4b->alloc_semp);
6021 return err;
6022+ }
6023
6024 ext4_lock_group(ac->ac_sb, group);
6025 max = mb_find_extent(e4b, 0, ac->ac_g_ex.fe_start,
6026@@ -1575,7 +1658,8 @@ static void ext4_mb_complex_scan_group(s
6027 * free blocks even though group info says we
6028 * we have free blocks
6029 */
6030- ext4_error(sb, __func__, "%d free blocks as per "
6031+ ext4_grp_locked_error(sb, e4b->bd_group,
6032+ __func__, "%d free blocks as per "
6033 "group info. But bitmap says 0\n",
6034 free);
6035 break;
6036@@ -1584,7 +1668,8 @@ static void ext4_mb_complex_scan_group(s
6037 mb_find_extent(e4b, 0, i, ac->ac_g_ex.fe_len, &ex);
6038 BUG_ON(ex.fe_len <= 0);
6039 if (free < ex.fe_len) {
6040- ext4_error(sb, __func__, "%d free blocks as per "
6041+ ext4_grp_locked_error(sb, e4b->bd_group,
6042+ __func__, "%d free blocks as per "
6043 "group info. But got %d blocks\n",
6044 free, ex.fe_len);
6045 /*
6046@@ -1692,6 +1777,173 @@ static int ext4_mb_good_group(struct ext
6047 return 0;
6048 }
6049
6050+/*
6051+ * lock the group_info alloc_sem of all the groups
6052+ * belonging to the same buddy cache page. This
6053+ * make sure other parallel operation on the buddy
6054+ * cache doesn't happen whild holding the buddy cache
6055+ * lock
6056+ */
6057+int ext4_mb_get_buddy_cache_lock(struct super_block *sb, ext4_group_t group)
6058+{
6059+ int i;
6060+ int block, pnum;
6061+ int blocks_per_page;
6062+ int groups_per_page;
6063+ ext4_group_t first_group;
6064+ struct ext4_group_info *grp;
6065+
6066+ blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
6067+ /*
6068+ * the buddy cache inode stores the block bitmap
6069+ * and buddy information in consecutive blocks.
6070+ * So for each group we need two blocks.
6071+ */
6072+ block = group * 2;
6073+ pnum = block / blocks_per_page;
6074+ first_group = pnum * blocks_per_page / 2;
6075+
6076+ groups_per_page = blocks_per_page >> 1;
6077+ if (groups_per_page == 0)
6078+ groups_per_page = 1;
6079+ /* read all groups the page covers into the cache */
6080+ for (i = 0; i < groups_per_page; i++) {
6081+
6082+ if ((first_group + i) >= EXT4_SB(sb)->s_groups_count)
6083+ break;
6084+ grp = ext4_get_group_info(sb, first_group + i);
6085+ /* take all groups write allocation
6086+ * semaphore. This make sure there is
6087+ * no block allocation going on in any
6088+ * of that groups
6089+ */
6090+ down_write_nested(&grp->alloc_sem, i);
6091+ }
6092+ return i;
6093+}
6094+
6095+void ext4_mb_put_buddy_cache_lock(struct super_block *sb,
6096+ ext4_group_t group, int locked_group)
6097+{
6098+ int i;
6099+ int block, pnum;
6100+ int blocks_per_page;
6101+ ext4_group_t first_group;
6102+ struct ext4_group_info *grp;
6103+
6104+ blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
6105+ /*
6106+ * the buddy cache inode stores the block bitmap
6107+ * and buddy information in consecutive blocks.
6108+ * So for each group we need two blocks.
6109+ */
6110+ block = group * 2;
6111+ pnum = block / blocks_per_page;
6112+ first_group = pnum * blocks_per_page / 2;
6113+ /* release locks on all the groups */
6114+ for (i = 0; i < locked_group; i++) {
6115+
6116+ grp = ext4_get_group_info(sb, first_group + i);
6117+ /* take all groups write allocation
6118+ * semaphore. This make sure there is
6119+ * no block allocation going on in any
6120+ * of that groups
6121+ */
6122+ up_write(&grp->alloc_sem);
6123+ }
6124+
6125+}
6126+
6127+static int ext4_mb_init_group(struct super_block *sb, ext4_group_t group)
6128+{
6129+
6130+ int ret;
6131+ void *bitmap;
6132+ int blocks_per_page;
6133+ int block, pnum, poff;
6134+ int num_grp_locked = 0;
6135+ struct ext4_group_info *this_grp;
6136+ struct ext4_sb_info *sbi = EXT4_SB(sb);
6137+ struct inode *inode = sbi->s_buddy_cache;
6138+ struct page *page = NULL, *bitmap_page = NULL;
6139+
6140+ mb_debug("init group %lu\n", group);
6141+ blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
6142+ this_grp = ext4_get_group_info(sb, group);
6143+ /*
6144+ * This ensures we don't add group
6145+ * to this buddy cache via resize
6146+ */
6147+ num_grp_locked = ext4_mb_get_buddy_cache_lock(sb, group);
6148+ if (!EXT4_MB_GRP_NEED_INIT(this_grp)) {
6149+ /*
6150+ * somebody initialized the group
6151+ * return without doing anything
6152+ */
6153+ ret = 0;
6154+ goto err;
6155+ }
6156+ /*
6157+ * the buddy cache inode stores the block bitmap
6158+ * and buddy information in consecutive blocks.
6159+ * So for each group we need two blocks.
6160+ */
6161+ block = group * 2;
6162+ pnum = block / blocks_per_page;
6163+ poff = block % blocks_per_page;
6164+ page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
6165+ if (page) {
6166+ BUG_ON(page->mapping != inode->i_mapping);
6167+ ret = ext4_mb_init_cache(page, NULL);
6168+ if (ret) {
6169+ unlock_page(page);
6170+ goto err;
6171+ }
6172+ unlock_page(page);
6173+ }
6174+ if (page == NULL || !PageUptodate(page)) {
6175+ ret = -EIO;
6176+ goto err;
6177+ }
6178+ mark_page_accessed(page);
6179+ bitmap_page = page;
6180+ bitmap = page_address(page) + (poff * sb->s_blocksize);
6181+
6182+ /* init buddy cache */
6183+ block++;
6184+ pnum = block / blocks_per_page;
6185+ poff = block % blocks_per_page;
6186+ page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
6187+ if (page == bitmap_page) {
6188+ /*
6189+ * If both the bitmap and buddy are in
6190+ * the same page we don't need to force
6191+ * init the buddy
6192+ */
6193+ unlock_page(page);
6194+ } else if (page) {
6195+ BUG_ON(page->mapping != inode->i_mapping);
6196+ ret = ext4_mb_init_cache(page, bitmap);
6197+ if (ret) {
6198+ unlock_page(page);
6199+ goto err;
6200+ }
6201+ unlock_page(page);
6202+ }
6203+ if (page == NULL || !PageUptodate(page)) {
6204+ ret = -EIO;
6205+ goto err;
6206+ }
6207+ mark_page_accessed(page);
6208+err:
6209+ ext4_mb_put_buddy_cache_lock(sb, group, num_grp_locked);
6210+ if (bitmap_page)
6211+ page_cache_release(bitmap_page);
6212+ if (page)
6213+ page_cache_release(page);
6214+ return ret;
6215+}
6216+
6217 static noinline_for_stack int
6218 ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
6219 {
6220@@ -1775,7 +2027,7 @@ repeat:
6221 group = 0;
6222
6223 /* quick check to skip empty groups */
6224- grp = ext4_get_group_info(ac->ac_sb, group);
6225+ grp = ext4_get_group_info(sb, group);
6226 if (grp->bb_free == 0)
6227 continue;
6228
6229@@ -1788,10 +2040,9 @@ repeat:
6230 * we need full data about the group
6231 * to make a good selection
6232 */
6233- err = ext4_mb_load_buddy(sb, group, &e4b);
6234+ err = ext4_mb_init_group(sb, group);
6235 if (err)
6236 goto out;
6237- ext4_mb_release_desc(&e4b);
6238 }
6239
6240 /*
6241@@ -1802,8 +2053,10 @@ repeat:
6242 continue;
6243
6244 err = ext4_mb_load_buddy(sb, group, &e4b);
6245- if (err)
6246+ if (err) {
6247+ __release(e4b->alloc_semp);
6248 goto out;
6249+ }
6250
6251 ext4_lock_group(sb, group);
6252 if (!ext4_mb_good_group(ac, group, cr)) {
6253@@ -1932,13 +2185,13 @@ static int ext4_mb_seq_history_show(stru
6254 if (hs->op == EXT4_MB_HISTORY_ALLOC) {
6255 fmt = "%-5u %-8u %-23s %-23s %-23s %-5u %-5u %-2u "
6256 "%-5u %-5s %-5u %-6u\n";
6257- sprintf(buf2, "%lu/%d/%u@%u", hs->result.fe_group,
6258+ sprintf(buf2, "%u/%d/%u@%u", hs->result.fe_group,
6259 hs->result.fe_start, hs->result.fe_len,
6260 hs->result.fe_logical);
6261- sprintf(buf, "%lu/%d/%u@%u", hs->orig.fe_group,
6262+ sprintf(buf, "%u/%d/%u@%u", hs->orig.fe_group,
6263 hs->orig.fe_start, hs->orig.fe_len,
6264 hs->orig.fe_logical);
6265- sprintf(buf3, "%lu/%d/%u@%u", hs->goal.fe_group,
6266+ sprintf(buf3, "%u/%d/%u@%u", hs->goal.fe_group,
6267 hs->goal.fe_start, hs->goal.fe_len,
6268 hs->goal.fe_logical);
6269 seq_printf(seq, fmt, hs->pid, hs->ino, buf, buf3, buf2,
6270@@ -1947,20 +2200,20 @@ static int ext4_mb_seq_history_show(stru
6271 hs->buddy ? 1 << hs->buddy : 0);
6272 } else if (hs->op == EXT4_MB_HISTORY_PREALLOC) {
6273 fmt = "%-5u %-8u %-23s %-23s %-23s\n";
6274- sprintf(buf2, "%lu/%d/%u@%u", hs->result.fe_group,
6275+ sprintf(buf2, "%u/%d/%u@%u", hs->result.fe_group,
6276 hs->result.fe_start, hs->result.fe_len,
6277 hs->result.fe_logical);
6278- sprintf(buf, "%lu/%d/%u@%u", hs->orig.fe_group,
6279+ sprintf(buf, "%u/%d/%u@%u", hs->orig.fe_group,
6280 hs->orig.fe_start, hs->orig.fe_len,
6281 hs->orig.fe_logical);
6282 seq_printf(seq, fmt, hs->pid, hs->ino, buf, "", buf2);
6283 } else if (hs->op == EXT4_MB_HISTORY_DISCARD) {
6284- sprintf(buf2, "%lu/%d/%u", hs->result.fe_group,
6285+ sprintf(buf2, "%u/%d/%u", hs->result.fe_group,
6286 hs->result.fe_start, hs->result.fe_len);
6287 seq_printf(seq, "%-5u %-8u %-23s discard\n",
6288 hs->pid, hs->ino, buf2);
6289 } else if (hs->op == EXT4_MB_HISTORY_FREE) {
6290- sprintf(buf2, "%lu/%d/%u", hs->result.fe_group,
6291+ sprintf(buf2, "%u/%d/%u", hs->result.fe_group,
6292 hs->result.fe_start, hs->result.fe_len);
6293 seq_printf(seq, "%-5u %-8u %-23s free\n",
6294 hs->pid, hs->ino, buf2);
6295@@ -2073,7 +2326,7 @@ static void *ext4_mb_seq_groups_start(st
6296 return NULL;
6297
6298 group = *pos + 1;
6299- return (void *) group;
6300+ return (void *) ((unsigned long) group);
6301 }
6302
6303 static void *ext4_mb_seq_groups_next(struct seq_file *seq, void *v, loff_t *pos)
6304@@ -2086,13 +2339,13 @@ static void *ext4_mb_seq_groups_next(str
6305 if (*pos < 0 || *pos >= sbi->s_groups_count)
6306 return NULL;
6307 group = *pos + 1;
6308- return (void *) group;;
6309+ return (void *) ((unsigned long) group);
6310 }
6311
6312 static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v)
6313 {
6314 struct super_block *sb = seq->private;
6315- long group = (long) v;
6316+ ext4_group_t group = (ext4_group_t) ((unsigned long) v);
6317 int i;
6318 int err;
6319 struct ext4_buddy e4b;
6320@@ -2114,7 +2367,8 @@ static int ext4_mb_seq_groups_show(struc
6321 sizeof(struct ext4_group_info);
6322 err = ext4_mb_load_buddy(sb, group, &e4b);
6323 if (err) {
6324- seq_printf(seq, "#%-5lu: I/O error\n", group);
6325+ __release(e4b->alloc_semp);
6326+ seq_printf(seq, "#%-5u: I/O error\n", group);
6327 return 0;
6328 }
6329 ext4_lock_group(sb, group);
6330@@ -2122,7 +2376,7 @@ static int ext4_mb_seq_groups_show(struc
6331 ext4_unlock_group(sb, group);
6332 ext4_mb_release_desc(&e4b);
6333
6334- seq_printf(seq, "#%-5lu: %-5u %-5u %-5u [", group, sg.info.bb_free,
6335+ seq_printf(seq, "#%-5u: %-5u %-5u %-5u [", group, sg.info.bb_free,
6336 sg.info.bb_fragments, sg.info.bb_first_free);
6337 for (i = 0; i <= 13; i++)
6338 seq_printf(seq, " %-5u", i <= sb->s_blocksize_bits + 1 ?
6339@@ -2169,9 +2423,10 @@ static void ext4_mb_history_release(stru
6340 {
6341 struct ext4_sb_info *sbi = EXT4_SB(sb);
6342
6343- remove_proc_entry("mb_groups", sbi->s_mb_proc);
6344- remove_proc_entry("mb_history", sbi->s_mb_proc);
6345-
6346+ if (sbi->s_proc != NULL) {
6347+ remove_proc_entry("mb_groups", sbi->s_proc);
6348+ remove_proc_entry("mb_history", sbi->s_proc);
6349+ }
6350 kfree(sbi->s_mb_history);
6351 }
6352
6353@@ -2180,10 +2435,10 @@ static void ext4_mb_history_init(struct
6354 struct ext4_sb_info *sbi = EXT4_SB(sb);
6355 int i;
6356
6357- if (sbi->s_mb_proc != NULL) {
6358- proc_create_data("mb_history", S_IRUGO, sbi->s_mb_proc,
6359+ if (sbi->s_proc != NULL) {
6360+ proc_create_data("mb_history", S_IRUGO, sbi->s_proc,
6361 &ext4_mb_seq_history_fops, sb);
6362- proc_create_data("mb_groups", S_IRUGO, sbi->s_mb_proc,
6363+ proc_create_data("mb_groups", S_IRUGO, sbi->s_proc,
6364 &ext4_mb_seq_groups_fops, sb);
6365 }
6366
6367@@ -2295,10 +2550,12 @@ int ext4_mb_add_groupinfo(struct super_b
6368 ext4_free_blocks_after_init(sb, group, desc);
6369 } else {
6370 meta_group_info[i]->bb_free =
6371- le16_to_cpu(desc->bg_free_blocks_count);
6372+ ext4_free_blks_count(sb, desc);
6373 }
6374
6375 INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list);
6376+ init_rwsem(&meta_group_info[i]->alloc_sem);
6377+ meta_group_info[i]->bb_free_root.rb_node = NULL;;
6378
6379 #ifdef DOUBLE_CHECK
6380 {
6381@@ -2325,54 +2582,6 @@ exit_meta_group_info:
6382 } /* ext4_mb_add_groupinfo */
6383
6384 /*
6385- * Add a group to the existing groups.
6386- * This function is used for online resize
6387- */
6388-int ext4_mb_add_more_groupinfo(struct super_block *sb, ext4_group_t group,
6389- struct ext4_group_desc *desc)
6390-{
6391- struct ext4_sb_info *sbi = EXT4_SB(sb);
6392- struct inode *inode = sbi->s_buddy_cache;
6393- int blocks_per_page;
6394- int block;
6395- int pnum;
6396- struct page *page;
6397- int err;
6398-
6399- /* Add group based on group descriptor*/
6400- err = ext4_mb_add_groupinfo(sb, group, desc);
6401- if (err)
6402- return err;
6403-
6404- /*
6405- * Cache pages containing dynamic mb_alloc datas (buddy and bitmap
6406- * datas) are set not up to date so that they will be re-initilaized
6407- * during the next call to ext4_mb_load_buddy
6408- */
6409-
6410- /* Set buddy page as not up to date */
6411- blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
6412- block = group * 2;
6413- pnum = block / blocks_per_page;
6414- page = find_get_page(inode->i_mapping, pnum);
6415- if (page != NULL) {
6416- ClearPageUptodate(page);
6417- page_cache_release(page);
6418- }
6419-
6420- /* Set bitmap page as not up to date */
6421- block++;
6422- pnum = block / blocks_per_page;
6423- page = find_get_page(inode->i_mapping, pnum);
6424- if (page != NULL) {
6425- ClearPageUptodate(page);
6426- page_cache_release(page);
6427- }
6428-
6429- return 0;
6430-}
6431-
6432-/*
6433 * Update an existing group.
6434 * This function is used for online resize
6435 */
6436@@ -2455,7 +2664,7 @@ static int ext4_mb_init_backend(struct s
6437 desc = ext4_get_group_desc(sb, i, NULL);
6438 if (desc == NULL) {
6439 printk(KERN_ERR
6440- "EXT4-fs: can't read descriptor %lu\n", i);
6441+ "EXT4-fs: can't read descriptor %u\n", i);
6442 goto err_freebuddy;
6443 }
6444 if (ext4_mb_add_groupinfo(sb, i, desc) != 0)
6445@@ -2485,19 +2694,14 @@ int ext4_mb_init(struct super_block *sb,
6446 unsigned max;
6447 int ret;
6448
6449- if (!test_opt(sb, MBALLOC))
6450- return 0;
6451-
6452 i = (sb->s_blocksize_bits + 2) * sizeof(unsigned short);
6453
6454 sbi->s_mb_offsets = kmalloc(i, GFP_KERNEL);
6455 if (sbi->s_mb_offsets == NULL) {
6456- clear_opt(sbi->s_mount_opt, MBALLOC);
6457 return -ENOMEM;
6458 }
6459 sbi->s_mb_maxs = kmalloc(i, GFP_KERNEL);
6460 if (sbi->s_mb_maxs == NULL) {
6461- clear_opt(sbi->s_mount_opt, MBALLOC);
6462 kfree(sbi->s_mb_maxs);
6463 return -ENOMEM;
6464 }
6465@@ -2520,7 +2724,6 @@ int ext4_mb_init(struct super_block *sb,
6466 /* init file for buddy data */
6467 ret = ext4_mb_init_backend(sb);
6468 if (ret != 0) {
6469- clear_opt(sbi->s_mount_opt, MBALLOC);
6470 kfree(sbi->s_mb_offsets);
6471 kfree(sbi->s_mb_maxs);
6472 return ret;
6473@@ -2540,17 +2743,15 @@ int ext4_mb_init(struct super_block *sb,
6474 sbi->s_mb_history_filter = EXT4_MB_HISTORY_DEFAULT;
6475 sbi->s_mb_group_prealloc = MB_DEFAULT_GROUP_PREALLOC;
6476
6477- i = sizeof(struct ext4_locality_group) * nr_cpu_ids;
6478- sbi->s_locality_groups = kmalloc(i, GFP_KERNEL);
6479+ sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group);
6480 if (sbi->s_locality_groups == NULL) {
6481- clear_opt(sbi->s_mount_opt, MBALLOC);
6482 kfree(sbi->s_mb_offsets);
6483 kfree(sbi->s_mb_maxs);
6484 return -ENOMEM;
6485 }
6486- for (i = 0; i < nr_cpu_ids; i++) {
6487+ for_each_possible_cpu(i) {
6488 struct ext4_locality_group *lg;
6489- lg = &sbi->s_locality_groups[i];
6490+ lg = per_cpu_ptr(sbi->s_locality_groups, i);
6491 mutex_init(&lg->lg_mutex);
6492 for (j = 0; j < PREALLOC_TB_SIZE; j++)
6493 INIT_LIST_HEAD(&lg->lg_prealloc_list[j]);
6494@@ -2560,7 +2761,7 @@ int ext4_mb_init(struct super_block *sb,
6495 ext4_mb_init_per_dev_proc(sb);
6496 ext4_mb_history_init(sb);
6497
6498- printk("EXT4-fs: mballoc enabled\n");
6499+ printk(KERN_INFO "EXT4-fs: mballoc enabled\n");
6500 return 0;
6501 }
6502
6503@@ -2589,9 +2790,6 @@ int ext4_mb_release(struct super_block *
6504 struct ext4_group_info *grinfo;
6505 struct ext4_sb_info *sbi = EXT4_SB(sb);
6506
6507- if (!test_opt(sb, MBALLOC))
6508- return 0;
6509-
6510 /* release freed, non-committed blocks */
6511 spin_lock(&sbi->s_md_lock);
6512 list_splice_init(&sbi->s_closed_transaction,
6513@@ -2647,8 +2845,7 @@ int ext4_mb_release(struct super_block *
6514 atomic_read(&sbi->s_mb_discarded));
6515 }
6516
6517- kfree(sbi->s_locality_groups);
6518-
6519+ free_percpu(sbi->s_locality_groups);
6520 ext4_mb_history_release(sb);
6521 ext4_mb_destroy_per_dev_proc(sb);
6522
6523@@ -2658,13 +2855,11 @@ int ext4_mb_release(struct super_block *
6524 static noinline_for_stack void
6525 ext4_mb_free_committed_blocks(struct super_block *sb)
6526 {
6527- struct ext4_sb_info *sbi = EXT4_SB(sb);
6528- int err;
6529- int i;
6530- int count = 0;
6531- int count2 = 0;
6532- struct ext4_free_metadata *md;
6533 struct ext4_buddy e4b;
6534+ struct ext4_group_info *db;
6535+ struct ext4_sb_info *sbi = EXT4_SB(sb);
6536+ int err, count = 0, count2 = 0;
6537+ struct ext4_free_data *entry;
6538
6539 if (list_empty(&sbi->s_committed_transaction))
6540 return;
6541@@ -2672,44 +2867,46 @@ ext4_mb_free_committed_blocks(struct sup
6542 /* there is committed blocks to be freed yet */
6543 do {
6544 /* get next array of blocks */
6545- md = NULL;
6546+ entry = NULL;
6547 spin_lock(&sbi->s_md_lock);
6548 if (!list_empty(&sbi->s_committed_transaction)) {
6549- md = list_entry(sbi->s_committed_transaction.next,
6550- struct ext4_free_metadata, list);
6551- list_del(&md->list);
6552+ entry = list_entry(sbi->s_committed_transaction.next,
6553+ struct ext4_free_data, list);
6554+ list_del(&entry->list);
6555 }
6556 spin_unlock(&sbi->s_md_lock);
6557
6558- if (md == NULL)
6559+ if (entry == NULL)
6560 break;
6561
6562- mb_debug("gonna free %u blocks in group %lu (0x%p):",
6563- md->num, md->group, md);
6564+ mb_debug("gonna free %u blocks in group %u (0x%p):",
6565+ entry->count, entry->group, entry);
6566
6567- err = ext4_mb_load_buddy(sb, md->group, &e4b);
6568+ err = ext4_mb_load_buddy(sb, entry->group, &e4b);
6569 /* we expect to find existing buddy because it's pinned */
6570 BUG_ON(err != 0);
6571
6572+ db = e4b.bd_info;
6573 /* there are blocks to put in buddy to make them really free */
6574- count += md->num;
6575+ count += entry->count;
6576 count2++;
6577- ext4_lock_group(sb, md->group);
6578- for (i = 0; i < md->num; i++) {
6579- mb_debug(" %u", md->blocks[i]);
6580- mb_free_blocks(NULL, &e4b, md->blocks[i], 1);
6581- }
6582- mb_debug("\n");
6583- ext4_unlock_group(sb, md->group);
6584-
6585- /* balance refcounts from ext4_mb_free_metadata() */
6586- page_cache_release(e4b.bd_buddy_page);
6587- page_cache_release(e4b.bd_bitmap_page);
6588+ ext4_lock_group(sb, entry->group);
6589+ /* Take it out of per group rb tree */
6590+ rb_erase(&entry->node, &(db->bb_free_root));
6591+ mb_free_blocks(NULL, &e4b, entry->start_blk, entry->count);
6592+
6593+ if (!db->bb_free_root.rb_node) {
6594+ /* No more items in the per group rb tree
6595+ * balance refcounts from ext4_mb_free_metadata()
6596+ */
6597+ page_cache_release(e4b.bd_buddy_page);
6598+ page_cache_release(e4b.bd_bitmap_page);
6599+ }
6600+ ext4_unlock_group(sb, entry->group);
6601
6602- kfree(md);
6603+ kmem_cache_free(ext4_free_ext_cachep, entry);
6604 ext4_mb_release_desc(&e4b);
6605-
6606- } while (md);
6607+ } while (1);
6608
6609 mb_debug("freed %u blocks in %u structures\n", count, count2);
6610 }
6611@@ -2721,129 +2918,52 @@ ext4_mb_free_committed_blocks(struct sup
6612 #define EXT4_MB_STREAM_REQ "stream_req"
6613 #define EXT4_MB_GROUP_PREALLOC "group_prealloc"
6614
6615-
6616-
6617-#define MB_PROC_FOPS(name) \
6618-static int ext4_mb_##name##_proc_show(struct seq_file *m, void *v) \
6619-{ \
6620- struct ext4_sb_info *sbi = m->private; \
6621- \
6622- seq_printf(m, "%ld\n", sbi->s_mb_##name); \
6623- return 0; \
6624-} \
6625- \
6626-static int ext4_mb_##name##_proc_open(struct inode *inode, struct file *file)\
6627-{ \
6628- return single_open(file, ext4_mb_##name##_proc_show, PDE(inode)->data);\
6629-} \
6630- \
6631-static ssize_t ext4_mb_##name##_proc_write(struct file *file, \
6632- const char __user *buf, size_t cnt, loff_t *ppos) \
6633-{ \
6634- struct ext4_sb_info *sbi = PDE(file->f_path.dentry->d_inode)->data;\
6635- char str[32]; \
6636- long value; \
6637- if (cnt >= sizeof(str)) \
6638- return -EINVAL; \
6639- if (copy_from_user(str, buf, cnt)) \
6640- return -EFAULT; \
6641- value = simple_strtol(str, NULL, 0); \
6642- if (value <= 0) \
6643- return -ERANGE; \
6644- sbi->s_mb_##name = value; \
6645- return cnt; \
6646-} \
6647- \
6648-static const struct file_operations ext4_mb_##name##_proc_fops = { \
6649- .owner = THIS_MODULE, \
6650- .open = ext4_mb_##name##_proc_open, \
6651- .read = seq_read, \
6652- .llseek = seq_lseek, \
6653- .release = single_release, \
6654- .write = ext4_mb_##name##_proc_write, \
6655-};
6656-
6657-MB_PROC_FOPS(stats);
6658-MB_PROC_FOPS(max_to_scan);
6659-MB_PROC_FOPS(min_to_scan);
6660-MB_PROC_FOPS(order2_reqs);
6661-MB_PROC_FOPS(stream_request);
6662-MB_PROC_FOPS(group_prealloc);
6663-
6664-#define MB_PROC_HANDLER(name, var) \
6665-do { \
6666- proc = proc_create_data(name, mode, sbi->s_mb_proc, \
6667- &ext4_mb_##var##_proc_fops, sbi); \
6668- if (proc == NULL) { \
6669- printk(KERN_ERR "EXT4-fs: can't to create %s\n", name); \
6670- goto err_out; \
6671- } \
6672-} while (0)
6673-
6674 static int ext4_mb_init_per_dev_proc(struct super_block *sb)
6675 {
6676+#ifdef CONFIG_PROC_FS
6677 mode_t mode = S_IFREG | S_IRUGO | S_IWUSR;
6678 struct ext4_sb_info *sbi = EXT4_SB(sb);
6679 struct proc_dir_entry *proc;
6680- char devname[BDEVNAME_SIZE], *p;
6681
6682- if (proc_root_ext4 == NULL) {
6683- sbi->s_mb_proc = NULL;
6684+ if (sbi->s_proc == NULL)
6685 return -EINVAL;
6686- }
6687- bdevname(sb->s_bdev, devname);
6688- p = devname;
6689- while ((p = strchr(p, '/')))
6690- *p = '!';
6691-
6692- sbi->s_mb_proc = proc_mkdir(devname, proc_root_ext4);
6693- if (!sbi->s_mb_proc)
6694- goto err_create_dir;
6695-
6696- MB_PROC_HANDLER(EXT4_MB_STATS_NAME, stats);
6697- MB_PROC_HANDLER(EXT4_MB_MAX_TO_SCAN_NAME, max_to_scan);
6698- MB_PROC_HANDLER(EXT4_MB_MIN_TO_SCAN_NAME, min_to_scan);
6699- MB_PROC_HANDLER(EXT4_MB_ORDER2_REQ, order2_reqs);
6700- MB_PROC_HANDLER(EXT4_MB_STREAM_REQ, stream_request);
6701- MB_PROC_HANDLER(EXT4_MB_GROUP_PREALLOC, group_prealloc);
6702
6703+ EXT4_PROC_HANDLER(EXT4_MB_STATS_NAME, mb_stats);
6704+ EXT4_PROC_HANDLER(EXT4_MB_MAX_TO_SCAN_NAME, mb_max_to_scan);
6705+ EXT4_PROC_HANDLER(EXT4_MB_MIN_TO_SCAN_NAME, mb_min_to_scan);
6706+ EXT4_PROC_HANDLER(EXT4_MB_ORDER2_REQ, mb_order2_reqs);
6707+ EXT4_PROC_HANDLER(EXT4_MB_STREAM_REQ, mb_stream_request);
6708+ EXT4_PROC_HANDLER(EXT4_MB_GROUP_PREALLOC, mb_group_prealloc);
6709 return 0;
6710
6711 err_out:
6712- remove_proc_entry(EXT4_MB_GROUP_PREALLOC, sbi->s_mb_proc);
6713- remove_proc_entry(EXT4_MB_STREAM_REQ, sbi->s_mb_proc);
6714- remove_proc_entry(EXT4_MB_ORDER2_REQ, sbi->s_mb_proc);
6715- remove_proc_entry(EXT4_MB_MIN_TO_SCAN_NAME, sbi->s_mb_proc);
6716- remove_proc_entry(EXT4_MB_MAX_TO_SCAN_NAME, sbi->s_mb_proc);
6717- remove_proc_entry(EXT4_MB_STATS_NAME, sbi->s_mb_proc);
6718- remove_proc_entry(devname, proc_root_ext4);
6719- sbi->s_mb_proc = NULL;
6720-err_create_dir:
6721- printk(KERN_ERR "EXT4-fs: Unable to create %s\n", devname);
6722-
6723+ remove_proc_entry(EXT4_MB_GROUP_PREALLOC, sbi->s_proc);
6724+ remove_proc_entry(EXT4_MB_STREAM_REQ, sbi->s_proc);
6725+ remove_proc_entry(EXT4_MB_ORDER2_REQ, sbi->s_proc);
6726+ remove_proc_entry(EXT4_MB_MIN_TO_SCAN_NAME, sbi->s_proc);
6727+ remove_proc_entry(EXT4_MB_MAX_TO_SCAN_NAME, sbi->s_proc);
6728+ remove_proc_entry(EXT4_MB_STATS_NAME, sbi->s_proc);
6729 return -ENOMEM;
6730+#else
6731+ return 0;
6732+#endif
6733 }
6734
6735 static int ext4_mb_destroy_per_dev_proc(struct super_block *sb)
6736 {
6737+#ifdef CONFIG_PROC_FS
6738 struct ext4_sb_info *sbi = EXT4_SB(sb);
6739- char devname[BDEVNAME_SIZE], *p;
6740
6741- if (sbi->s_mb_proc == NULL)
6742+ if (sbi->s_proc == NULL)
6743 return -EINVAL;
6744
6745- bdevname(sb->s_bdev, devname);
6746- p = devname;
6747- while ((p = strchr(p, '/')))
6748- *p = '!';
6749- remove_proc_entry(EXT4_MB_GROUP_PREALLOC, sbi->s_mb_proc);
6750- remove_proc_entry(EXT4_MB_STREAM_REQ, sbi->s_mb_proc);
6751- remove_proc_entry(EXT4_MB_ORDER2_REQ, sbi->s_mb_proc);
6752- remove_proc_entry(EXT4_MB_MIN_TO_SCAN_NAME, sbi->s_mb_proc);
6753- remove_proc_entry(EXT4_MB_MAX_TO_SCAN_NAME, sbi->s_mb_proc);
6754- remove_proc_entry(EXT4_MB_STATS_NAME, sbi->s_mb_proc);
6755- remove_proc_entry(devname, proc_root_ext4);
6756-
6757+ remove_proc_entry(EXT4_MB_GROUP_PREALLOC, sbi->s_proc);
6758+ remove_proc_entry(EXT4_MB_STREAM_REQ, sbi->s_proc);
6759+ remove_proc_entry(EXT4_MB_ORDER2_REQ, sbi->s_proc);
6760+ remove_proc_entry(EXT4_MB_MIN_TO_SCAN_NAME, sbi->s_proc);
6761+ remove_proc_entry(EXT4_MB_MAX_TO_SCAN_NAME, sbi->s_proc);
6762+ remove_proc_entry(EXT4_MB_STATS_NAME, sbi->s_proc);
6763+#endif
6764 return 0;
6765 }
6766
6767@@ -2864,11 +2984,16 @@ int __init init_ext4_mballoc(void)
6768 kmem_cache_destroy(ext4_pspace_cachep);
6769 return -ENOMEM;
6770 }
6771-#ifdef CONFIG_PROC_FS
6772- proc_root_ext4 = proc_mkdir("fs/ext4", NULL);
6773- if (proc_root_ext4 == NULL)
6774- printk(KERN_ERR "EXT4-fs: Unable to create fs/ext4\n");
6775-#endif
6776+
6777+ ext4_free_ext_cachep =
6778+ kmem_cache_create("ext4_free_block_extents",
6779+ sizeof(struct ext4_free_data),
6780+ 0, SLAB_RECLAIM_ACCOUNT, NULL);
6781+ if (ext4_free_ext_cachep == NULL) {
6782+ kmem_cache_destroy(ext4_pspace_cachep);
6783+ kmem_cache_destroy(ext4_ac_cachep);
6784+ return -ENOMEM;
6785+ }
6786 return 0;
6787 }
6788
6789@@ -2877,9 +3002,7 @@ void exit_ext4_mballoc(void)
6790 /* XXX: synchronize_rcu(); */
6791 kmem_cache_destroy(ext4_pspace_cachep);
6792 kmem_cache_destroy(ext4_ac_cachep);
6793-#ifdef CONFIG_PROC_FS
6794- remove_proc_entry("fs/ext4", NULL);
6795-#endif
6796+ kmem_cache_destroy(ext4_free_ext_cachep);
6797 }
6798
6799
6800@@ -2889,7 +3012,7 @@ void exit_ext4_mballoc(void)
6801 */
6802 static noinline_for_stack int
6803 ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
6804- handle_t *handle)
6805+ handle_t *handle, unsigned int reserv_blks)
6806 {
6807 struct buffer_head *bitmap_bh = NULL;
6808 struct ext4_super_block *es;
6809@@ -2922,7 +3045,7 @@ ext4_mb_mark_diskspace_used(struct ext4_
6810 if (!gdp)
6811 goto out_err;
6812
6813- ext4_debug("using block group %lu(%d)\n", ac->ac_b_ex.fe_group,
6814+ ext4_debug("using block group %u(%d)\n", ac->ac_b_ex.fe_group,
6815 gdp->bg_free_blocks_count);
6816
6817 err = ext4_journal_get_write_access(handle, gdp_bh);
6818@@ -2941,8 +3064,8 @@ ext4_mb_mark_diskspace_used(struct ext4_
6819 in_range(block + len - 1, ext4_inode_table(sb, gdp),
6820 EXT4_SB(sb)->s_itb_per_group)) {
6821 ext4_error(sb, __func__,
6822- "Allocating block in system zone - block = %llu",
6823- block);
6824+ "Allocating block %llu in system zone of %d group\n",
6825+ block, ac->ac_b_ex.fe_group);
6826 /* File system mounted not to panic on error
6827 * Fix the bitmap and repeat the block allocation
6828 * We leak some of the blocks here.
6829@@ -2964,29 +3087,29 @@ ext4_mb_mark_diskspace_used(struct ext4_
6830 }
6831 }
6832 #endif
6833- mb_set_bits(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group), bitmap_bh->b_data,
6834- ac->ac_b_ex.fe_start, ac->ac_b_ex.fe_len);
6835-
6836 spin_lock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group));
6837+ mb_set_bits(NULL, bitmap_bh->b_data,
6838+ ac->ac_b_ex.fe_start, ac->ac_b_ex.fe_len);
6839 if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
6840 gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
6841- gdp->bg_free_blocks_count =
6842- cpu_to_le16(ext4_free_blocks_after_init(sb,
6843- ac->ac_b_ex.fe_group,
6844- gdp));
6845+ ext4_free_blks_set(sb, gdp,
6846+ ext4_free_blocks_after_init(sb,
6847+ ac->ac_b_ex.fe_group, gdp));
6848 }
6849- le16_add_cpu(&gdp->bg_free_blocks_count, -ac->ac_b_ex.fe_len);
6850+ len = ext4_free_blks_count(sb, gdp) - ac->ac_b_ex.fe_len;
6851+ ext4_free_blks_set(sb, gdp, len);
6852 gdp->bg_checksum = ext4_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp);
6853 spin_unlock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group));
6854-
6855+ percpu_counter_sub(&sbi->s_freeblocks_counter, ac->ac_b_ex.fe_len);
6856 /*
6857- * free blocks account has already be reduced/reserved
6858- * at write_begin() time for delayed allocation
6859- * do not double accounting
6860+ * Now reduce the dirty block count also. Should not go negative
6861 */
6862 if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED))
6863- percpu_counter_sub(&sbi->s_freeblocks_counter,
6864- ac->ac_b_ex.fe_len);
6865+ /* release all the reserved blocks if non delalloc */
6866+ percpu_counter_sub(&sbi->s_dirtyblocks_counter, reserv_blks);
6867+ else
6868+ percpu_counter_sub(&sbi->s_dirtyblocks_counter,
6869+ ac->ac_b_ex.fe_len);
6870
6871 if (sbi->s_log_groups_per_flex) {
6872 ext4_group_t flex_group = ext4_flex_group(sbi,
6873@@ -3128,7 +3251,7 @@ ext4_mb_normalize_request(struct ext4_al
6874 /* check we don't cross already preallocated blocks */
6875 rcu_read_lock();
6876 list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
6877- unsigned long pa_end;
6878+ ext4_lblk_t pa_end;
6879
6880 if (pa->pa_deleted)
6881 continue;
6882@@ -3172,7 +3295,7 @@ ext4_mb_normalize_request(struct ext4_al
6883 /* XXX: extra loop to check we really don't overlap preallocations */
6884 rcu_read_lock();
6885 list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
6886- unsigned long pa_end;
6887+ ext4_lblk_t pa_end;
6888 spin_lock(&pa->pa_lock);
6889 if (pa->pa_deleted == 0) {
6890 pa_end = pa->pa_lstart + pa->pa_len;
6891@@ -3404,6 +3527,32 @@ ext4_mb_use_preallocated(struct ext4_all
6892 }
6893
6894 /*
6895+ * the function goes through all block freed in the group
6896+ * but not yet committed and marks them used in in-core bitmap.
6897+ * buddy must be generated from this bitmap
6898+ * Need to be called with ext4 group lock (ext4_lock_group)
6899+ */
6900+static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
6901+ ext4_group_t group)
6902+{
6903+ struct rb_node *n;
6904+ struct ext4_group_info *grp;
6905+ struct ext4_free_data *entry;
6906+
6907+ grp = ext4_get_group_info(sb, group);
6908+ n = rb_first(&(grp->bb_free_root));
6909+
6910+ while (n) {
6911+ entry = rb_entry(n, struct ext4_free_data, node);
6912+ mb_set_bits(sb_bgl_lock(EXT4_SB(sb), group),
6913+ bitmap, entry->start_blk,
6914+ entry->count);
6915+ n = rb_next(n);
6916+ }
6917+ return;
6918+}
6919+
6920+/*
6921 * the function goes through all preallocation in this group and marks them
6922 * used in in-core bitmap. buddy must be generated from this bitmap
6923 * Need to be called with ext4 group lock (ext4_lock_group)
6924@@ -3443,7 +3592,7 @@ static void ext4_mb_generate_from_pa(str
6925 preallocated += len;
6926 count++;
6927 }
6928- mb_debug("prellocated %u for group %lu\n", preallocated, group);
6929+ mb_debug("prellocated %u for group %u\n", preallocated, group);
6930 }
6931
6932 static void ext4_mb_pa_callback(struct rcu_head *head)
6933@@ -3460,7 +3609,7 @@ static void ext4_mb_pa_callback(struct r
6934 static void ext4_mb_put_pa(struct ext4_allocation_context *ac,
6935 struct super_block *sb, struct ext4_prealloc_space *pa)
6936 {
6937- unsigned long grp;
6938+ ext4_group_t grp;
6939
6940 if (!atomic_dec_and_test(&pa->pa_count) || pa->pa_free != 0)
6941 return;
6942@@ -3676,8 +3825,8 @@ ext4_mb_release_inode_pa(struct ext4_bud
6943 {
6944 struct super_block *sb = e4b->bd_sb;
6945 struct ext4_sb_info *sbi = EXT4_SB(sb);
6946- unsigned long end;
6947- unsigned long next;
6948+ unsigned int end;
6949+ unsigned int next;
6950 ext4_group_t group;
6951 ext4_grpblk_t bit;
6952 sector_t start;
6953@@ -3723,8 +3872,9 @@ ext4_mb_release_inode_pa(struct ext4_bud
6954 pa, (unsigned long) pa->pa_lstart,
6955 (unsigned long) pa->pa_pstart,
6956 (unsigned long) pa->pa_len);
6957- ext4_error(sb, __func__, "free %u, pa_free %u\n",
6958- free, pa->pa_free);
6959+ ext4_grp_locked_error(sb, group,
6960+ __func__, "free %u, pa_free %u\n",
6961+ free, pa->pa_free);
6962 /*
6963 * pa is already deleted so we use the value obtained
6964 * from the bitmap and continue.
6965@@ -3789,7 +3939,7 @@ ext4_mb_discard_group_preallocations(str
6966 int busy = 0;
6967 int free = 0;
6968
6969- mb_debug("discard preallocation for group %lu\n", group);
6970+ mb_debug("discard preallocation for group %u\n", group);
6971
6972 if (list_empty(&grp->bb_prealloc_list))
6973 return 0;
6974@@ -3797,14 +3947,15 @@ ext4_mb_discard_group_preallocations(str
6975 bitmap_bh = ext4_read_block_bitmap(sb, group);
6976 if (bitmap_bh == NULL) {
6977 ext4_error(sb, __func__, "Error in reading block "
6978- "bitmap for %lu\n", group);
6979+ "bitmap for %u\n", group);
6980 return 0;
6981 }
6982
6983 err = ext4_mb_load_buddy(sb, group, &e4b);
6984 if (err) {
6985+ __release(e4b->alloc_semp);
6986 ext4_error(sb, __func__, "Error in loading buddy "
6987- "information for %lu\n", group);
6988+ "information for %u\n", group);
6989 put_bh(bitmap_bh);
6990 return 0;
6991 }
6992@@ -3894,7 +4045,7 @@ out:
6993 *
6994 * FIXME!! Make sure it is valid at all the call sites
6995 */
6996-void ext4_mb_discard_inode_preallocations(struct inode *inode)
6997+void ext4_discard_preallocations(struct inode *inode)
6998 {
6999 struct ext4_inode_info *ei = EXT4_I(inode);
7000 struct super_block *sb = inode->i_sb;
7001@@ -3906,7 +4057,7 @@ void ext4_mb_discard_inode_preallocation
7002 struct ext4_buddy e4b;
7003 int err;
7004
7005- if (!test_opt(sb, MBALLOC) || !S_ISREG(inode->i_mode)) {
7006+ if (!S_ISREG(inode->i_mode)) {
7007 /*BUG_ON(!list_empty(&ei->i_prealloc_list));*/
7008 return;
7009 }
7010@@ -3970,15 +4121,16 @@ repeat:
7011
7012 err = ext4_mb_load_buddy(sb, group, &e4b);
7013 if (err) {
7014+ __release(e4b->alloc_semp);
7015 ext4_error(sb, __func__, "Error in loading buddy "
7016- "information for %lu\n", group);
7017+ "information for %u\n", group);
7018 continue;
7019 }
7020
7021 bitmap_bh = ext4_read_block_bitmap(sb, group);
7022 if (bitmap_bh == NULL) {
7023 ext4_error(sb, __func__, "Error in reading block "
7024- "bitmap for %lu\n", group);
7025+ "bitmap for %u\n", group);
7026 ext4_mb_release_desc(&e4b);
7027 continue;
7028 }
7029@@ -4104,8 +4256,7 @@ static void ext4_mb_group_or_file(struct
7030 * per cpu locality group is to reduce the contention between block
7031 * request from multiple CPUs.
7032 */
7033- ac->ac_lg = &sbi->s_locality_groups[get_cpu()];
7034- put_cpu();
7035+ ac->ac_lg = per_cpu_ptr(sbi->s_locality_groups, raw_smp_processor_id());
7036
7037 /* we're going to use group allocation */
7038 ac->ac_flags |= EXT4_MB_HINT_GROUP_ALLOC;
7039@@ -4122,8 +4273,8 @@ ext4_mb_initialize_context(struct ext4_a
7040 struct ext4_sb_info *sbi = EXT4_SB(sb);
7041 struct ext4_super_block *es = sbi->s_es;
7042 ext4_group_t group;
7043- unsigned long len;
7044- unsigned long goal;
7045+ unsigned int len;
7046+ ext4_fsblk_t goal;
7047 ext4_grpblk_t block;
7048
7049 /* we can't allocate > group size */
7050@@ -4166,6 +4317,7 @@ ext4_mb_initialize_context(struct ext4_a
7051 ac->ac_pa = NULL;
7052 ac->ac_bitmap_page = NULL;
7053 ac->ac_buddy_page = NULL;
7054+ ac->alloc_semp = NULL;
7055 ac->ac_lg = NULL;
7056
7057 /* we have to define context: we'll we work with a file or
7058@@ -4243,8 +4395,9 @@ ext4_mb_discard_lg_preallocations(struct
7059
7060 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, NULL);
7061 if (ext4_mb_load_buddy(sb, group, &e4b)) {
7062+ __release(e4b->alloc_semp);
7063 ext4_error(sb, __func__, "Error in loading buddy "
7064- "information for %lu\n", group);
7065+ "information for %u\n", group);
7066 continue;
7067 }
7068 ext4_lock_group(sb, group);
7069@@ -4346,6 +4499,8 @@ static int ext4_mb_release_context(struc
7070 }
7071 ext4_mb_put_pa(ac, ac->ac_sb, pa);
7072 }
7073+ if (ac->alloc_semp)
7074+ up_read(ac->alloc_semp);
7075 if (ac->ac_bitmap_page)
7076 page_cache_release(ac->ac_bitmap_page);
7077 if (ac->ac_buddy_page)
7078@@ -4379,40 +4534,39 @@ static int ext4_mb_discard_preallocation
7079 ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
7080 struct ext4_allocation_request *ar, int *errp)
7081 {
7082+ int freed;
7083 struct ext4_allocation_context *ac = NULL;
7084 struct ext4_sb_info *sbi;
7085 struct super_block *sb;
7086 ext4_fsblk_t block = 0;
7087- int freed;
7088- int inquota;
7089+ unsigned int inquota;
7090+ unsigned int reserv_blks = 0;
7091
7092 sb = ar->inode->i_sb;
7093 sbi = EXT4_SB(sb);
7094
7095- if (!test_opt(sb, MBALLOC)) {
7096- block = ext4_old_new_blocks(handle, ar->inode, ar->goal,
7097- &(ar->len), errp);
7098- return block;
7099- }
7100 if (!EXT4_I(ar->inode)->i_delalloc_reserved_flag) {
7101 /*
7102 * With delalloc we already reserved the blocks
7103 */
7104- ar->len = ext4_has_free_blocks(sbi, ar->len);
7105- }
7106-
7107- if (ar->len == 0) {
7108- *errp = -ENOSPC;
7109- return 0;
7110+ while (ar->len && ext4_claim_free_blocks(sbi, ar->len)) {
7111+ /* let others to free the space */
7112+ yield();
7113+ ar->len = ar->len >> 1;
7114+ }
7115+ if (!ar->len) {
7116+ *errp = -ENOSPC;
7117+ return 0;
7118+ }
7119+ reserv_blks = ar->len;
7120 }
7121-
7122 while (ar->len && DQUOT_ALLOC_BLOCK(ar->inode, ar->len)) {
7123 ar->flags |= EXT4_MB_HINT_NOPREALLOC;
7124 ar->len--;
7125 }
7126 if (ar->len == 0) {
7127 *errp = -EDQUOT;
7128- return 0;
7129+ goto out3;
7130 }
7131 inquota = ar->len;
7132
7133@@ -4449,10 +4603,14 @@ repeat:
7134 ac->ac_o_ex.fe_len < ac->ac_b_ex.fe_len)
7135 ext4_mb_new_preallocation(ac);
7136 }
7137-
7138 if (likely(ac->ac_status == AC_STATUS_FOUND)) {
7139- *errp = ext4_mb_mark_diskspace_used(ac, handle);
7140+ *errp = ext4_mb_mark_diskspace_used(ac, handle, reserv_blks);
7141 if (*errp == -EAGAIN) {
7142+ /*
7143+ * drop the reference that we took
7144+ * in ext4_mb_use_best_found
7145+ */
7146+ ext4_mb_release_context(ac);
7147 ac->ac_b_ex.fe_group = 0;
7148 ac->ac_b_ex.fe_start = 0;
7149 ac->ac_b_ex.fe_len = 0;
7150@@ -4483,6 +4641,13 @@ out2:
7151 out1:
7152 if (ar->len < inquota)
7153 DQUOT_FREE_BLOCK(ar->inode, inquota - ar->len);
7154+out3:
7155+ if (!ar->len) {
7156+ if (!EXT4_I(ar->inode)->i_delalloc_reserved_flag)
7157+ /* release all the reserved blocks if non delalloc */
7158+ percpu_counter_sub(&sbi->s_dirtyblocks_counter,
7159+ reserv_blks);
7160+ }
7161
7162 return block;
7163 }
7164@@ -4517,65 +4682,97 @@ static void ext4_mb_poll_new_transaction
7165 ext4_mb_free_committed_blocks(sb);
7166 }
7167
7168+/*
7169+ * We can merge two free data extents only if the physical blocks
7170+ * are contiguous, AND the extents were freed by the same transaction,
7171+ * AND the blocks are associated with the same group.
7172+ */
7173+static int can_merge(struct ext4_free_data *entry1,
7174+ struct ext4_free_data *entry2)
7175+{
7176+ if ((entry1->t_tid == entry2->t_tid) &&
7177+ (entry1->group == entry2->group) &&
7178+ ((entry1->start_blk + entry1->count) == entry2->start_blk))
7179+ return 1;
7180+ return 0;
7181+}
7182+
7183 static noinline_for_stack int
7184 ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
7185- ext4_group_t group, ext4_grpblk_t block, int count)
7186+ struct ext4_free_data *new_entry)
7187 {
7188+ ext4_grpblk_t block;
7189+ struct ext4_free_data *entry;
7190 struct ext4_group_info *db = e4b->bd_info;
7191 struct super_block *sb = e4b->bd_sb;
7192 struct ext4_sb_info *sbi = EXT4_SB(sb);
7193- struct ext4_free_metadata *md;
7194- int i;
7195+ struct rb_node **n = &db->bb_free_root.rb_node, *node;
7196+ struct rb_node *parent = NULL, *new_node;
7197
7198 BUG_ON(e4b->bd_bitmap_page == NULL);
7199 BUG_ON(e4b->bd_buddy_page == NULL);
7200
7201- ext4_lock_group(sb, group);
7202- for (i = 0; i < count; i++) {
7203- md = db->bb_md_cur;
7204- if (md && db->bb_tid != handle->h_transaction->t_tid) {
7205- db->bb_md_cur = NULL;
7206- md = NULL;
7207+ new_node = &new_entry->node;
7208+ block = new_entry->start_blk;
7209+
7210+ if (!*n) {
7211+ /* first free block exent. We need to
7212+ protect buddy cache from being freed,
7213+ * otherwise we'll refresh it from
7214+ * on-disk bitmap and lose not-yet-available
7215+ * blocks */
7216+ page_cache_get(e4b->bd_buddy_page);
7217+ page_cache_get(e4b->bd_bitmap_page);
7218+ }
7219+ while (*n) {
7220+ parent = *n;
7221+ entry = rb_entry(parent, struct ext4_free_data, node);
7222+ if (block < entry->start_blk)
7223+ n = &(*n)->rb_left;
7224+ else if (block >= (entry->start_blk + entry->count))
7225+ n = &(*n)->rb_right;
7226+ else {
7227+ ext4_grp_locked_error(sb, e4b->bd_group, __func__,
7228+ "Double free of blocks %d (%d %d)\n",
7229+ block, entry->start_blk, entry->count);
7230+ return 0;
7231 }
7232+ }
7233
7234- if (md == NULL) {
7235- ext4_unlock_group(sb, group);
7236- md = kmalloc(sizeof(*md), GFP_NOFS);
7237- if (md == NULL)
7238- return -ENOMEM;
7239- md->num = 0;
7240- md->group = group;
7241+ rb_link_node(new_node, parent, n);
7242+ rb_insert_color(new_node, &db->bb_free_root);
7243
7244- ext4_lock_group(sb, group);
7245- if (db->bb_md_cur == NULL) {
7246- spin_lock(&sbi->s_md_lock);
7247- list_add(&md->list, &sbi->s_active_transaction);
7248- spin_unlock(&sbi->s_md_lock);
7249- /* protect buddy cache from being freed,
7250- * otherwise we'll refresh it from
7251- * on-disk bitmap and lose not-yet-available
7252- * blocks */
7253- page_cache_get(e4b->bd_buddy_page);
7254- page_cache_get(e4b->bd_bitmap_page);
7255- db->bb_md_cur = md;
7256- db->bb_tid = handle->h_transaction->t_tid;
7257- mb_debug("new md 0x%p for group %lu\n",
7258- md, md->group);
7259- } else {
7260- kfree(md);
7261- md = db->bb_md_cur;
7262- }
7263+ /* Now try to see the extent can be merged to left and right */
7264+ node = rb_prev(new_node);
7265+ if (node) {
7266+ entry = rb_entry(node, struct ext4_free_data, node);
7267+ if (can_merge(entry, new_entry)) {
7268+ new_entry->start_blk = entry->start_blk;
7269+ new_entry->count += entry->count;
7270+ rb_erase(node, &(db->bb_free_root));
7271+ spin_lock(&sbi->s_md_lock);
7272+ list_del(&entry->list);
7273+ spin_unlock(&sbi->s_md_lock);
7274+ kmem_cache_free(ext4_free_ext_cachep, entry);
7275 }
7276+ }
7277
7278- BUG_ON(md->num >= EXT4_BB_MAX_BLOCKS);
7279- md->blocks[md->num] = block + i;
7280- md->num++;
7281- if (md->num == EXT4_BB_MAX_BLOCKS) {
7282- /* no more space, put full container on a sb's list */
7283- db->bb_md_cur = NULL;
7284+ node = rb_next(new_node);
7285+ if (node) {
7286+ entry = rb_entry(node, struct ext4_free_data, node);
7287+ if (can_merge(new_entry, entry)) {
7288+ new_entry->count += entry->count;
7289+ rb_erase(node, &(db->bb_free_root));
7290+ spin_lock(&sbi->s_md_lock);
7291+ list_del(&entry->list);
7292+ spin_unlock(&sbi->s_md_lock);
7293+ kmem_cache_free(ext4_free_ext_cachep, entry);
7294 }
7295 }
7296- ext4_unlock_group(sb, group);
7297+ /* Add the extent to active_transaction list */
7298+ spin_lock(&sbi->s_md_lock);
7299+ list_add(&new_entry->list, &sbi->s_active_transaction);
7300+ spin_unlock(&sbi->s_md_lock);
7301 return 0;
7302 }
7303
7304@@ -4591,7 +4788,7 @@ void ext4_mb_free_blocks(handle_t *handl
7305 struct ext4_allocation_context *ac = NULL;
7306 struct ext4_group_desc *gdp;
7307 struct ext4_super_block *es;
7308- unsigned long overflow;
7309+ unsigned int overflow;
7310 ext4_grpblk_t bit;
7311 struct buffer_head *gd_bh;
7312 ext4_group_t block_group;
7313@@ -4675,11 +4872,6 @@ do_more:
7314 err = ext4_journal_get_write_access(handle, gd_bh);
7315 if (err)
7316 goto error_return;
7317-
7318- err = ext4_mb_load_buddy(sb, block_group, &e4b);
7319- if (err)
7320- goto error_return;
7321-
7322 #ifdef AGGRESSIVE_CHECK
7323 {
7324 int i;
7325@@ -4687,13 +4879,6 @@ do_more:
7326 BUG_ON(!mb_test_bit(bit + i, bitmap_bh->b_data));
7327 }
7328 #endif
7329- mb_clear_bits(sb_bgl_lock(sbi, block_group), bitmap_bh->b_data,
7330- bit, count);
7331-
7332- /* We dirtied the bitmap block */
7333- BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
7334- err = ext4_journal_dirty_metadata(handle, bitmap_bh);
7335-
7336 if (ac) {
7337 ac->ac_b_ex.fe_group = block_group;
7338 ac->ac_b_ex.fe_start = bit;
7339@@ -4701,19 +4886,43 @@ do_more:
7340 ext4_mb_store_history(ac);
7341 }
7342
7343+ err = ext4_mb_load_buddy(sb, block_group, &e4b);
7344+ if (err) {
7345+ __release(e4b->alloc_semp);
7346+ goto error_return;
7347+ }
7348 if (metadata) {
7349- /* blocks being freed are metadata. these blocks shouldn't
7350- * be used until this transaction is committed */
7351- ext4_mb_free_metadata(handle, &e4b, block_group, bit, count);
7352+ struct ext4_free_data *new_entry;
7353+ /*
7354+ * blocks being freed are metadata. these blocks shouldn't
7355+ * be used until this transaction is committed
7356+ */
7357+ new_entry = kmem_cache_alloc(ext4_free_ext_cachep, GFP_NOFS);
7358+ new_entry->start_blk = bit;
7359+ new_entry->group = block_group;
7360+ new_entry->count = count;
7361+ new_entry->t_tid = handle->h_transaction->t_tid;
7362+ ext4_lock_group(sb, block_group);
7363+ mb_clear_bits(sb_bgl_lock(sbi, block_group), bitmap_bh->b_data,
7364+ bit, count);
7365+ ext4_mb_free_metadata(handle, &e4b, new_entry);
7366+ ext4_unlock_group(sb, block_group);
7367 } else {
7368 ext4_lock_group(sb, block_group);
7369+ /* need to update group_info->bb_free and bitmap
7370+ * with group lock held. generate_buddy look at
7371+ * them with group lock_held
7372+ */
7373+ mb_clear_bits(sb_bgl_lock(sbi, block_group), bitmap_bh->b_data,
7374+ bit, count);
7375 mb_free_blocks(inode, &e4b, bit, count);
7376 ext4_mb_return_to_preallocation(inode, &e4b, block, count);
7377 ext4_unlock_group(sb, block_group);
7378 }
7379
7380 spin_lock(sb_bgl_lock(sbi, block_group));
7381- le16_add_cpu(&gdp->bg_free_blocks_count, count);
7382+ ret = ext4_free_blks_count(sb, gdp) + count;
7383+ ext4_free_blks_set(sb, gdp, ret);
7384 gdp->bg_checksum = ext4_group_desc_csum(sbi, block_group, gdp);
7385 spin_unlock(sb_bgl_lock(sbi, block_group));
7386 percpu_counter_add(&sbi->s_freeblocks_counter, count);
7387@@ -4729,6 +4938,10 @@ do_more:
7388
7389 *freed += count;
7390
7391+ /* We dirtied the bitmap block */
7392+ BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
7393+ err = ext4_journal_dirty_metadata(handle, bitmap_bh);
7394+
7395 /* And the group descriptor block */
7396 BUFFER_TRACE(gd_bh, "dirtied group descriptor block");
7397 ret = ext4_journal_dirty_metadata(handle, gd_bh);
7398diff -rup b/fs/ext4//mballoc.h a/fs/ext4///mballoc.h
7399--- b/fs/ext4/mballoc.h 2009-02-11 14:37:58.000000000 +0100
7400+++ a/fs/ext4/mballoc.h 2009-02-10 21:40:14.000000000 +0100
7401@@ -18,6 +18,9 @@
7402 #include <linux/pagemap.h>
7403 #include <linux/seq_file.h>
7404 #include <linux/version.h>
7405+#include <linux/blkdev.h>
7406+#include <linux/marker.h>
7407+#include <linux/mutex.h>
7408 #include "ext4_jbd2.h"
7409 #include "ext4.h"
7410 #include "group.h"
7411@@ -96,41 +99,24 @@
7412 */
7413 #define MB_DEFAULT_GROUP_PREALLOC 512
7414
7415-static struct kmem_cache *ext4_pspace_cachep;
7416-static struct kmem_cache *ext4_ac_cachep;
7417
7418-#ifdef EXT4_BB_MAX_BLOCKS
7419-#undef EXT4_BB_MAX_BLOCKS
7420-#endif
7421-#define EXT4_BB_MAX_BLOCKS 30
7422+struct ext4_free_data {
7423+ /* this links the free block information from group_info */
7424+ struct rb_node node;
7425
7426-struct ext4_free_metadata {
7427- ext4_group_t group;
7428- unsigned short num;
7429- ext4_grpblk_t blocks[EXT4_BB_MAX_BLOCKS];
7430+ /* this links the free block information from ext4_sb_info */
7431 struct list_head list;
7432-};
7433-
7434-struct ext4_group_info {
7435- unsigned long bb_state;
7436- unsigned long bb_tid;
7437- struct ext4_free_metadata *bb_md_cur;
7438- unsigned short bb_first_free;
7439- unsigned short bb_free;
7440- unsigned short bb_fragments;
7441- struct list_head bb_prealloc_list;
7442-#ifdef DOUBLE_CHECK
7443- void *bb_bitmap;
7444-#endif
7445- unsigned short bb_counters[];
7446-};
7447
7448-#define EXT4_GROUP_INFO_NEED_INIT_BIT 0
7449-#define EXT4_GROUP_INFO_LOCKED_BIT 1
7450+ /* group which free block extent belongs */
7451+ ext4_group_t group;
7452
7453-#define EXT4_MB_GRP_NEED_INIT(grp) \
7454- (test_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &((grp)->bb_state)))
7455+ /* free block extent */
7456+ ext4_grpblk_t start_blk;
7457+ ext4_grpblk_t count;
7458
7459+ /* transaction which freed this extent */
7460+ tid_t t_tid;
7461+};
7462
7463 struct ext4_prealloc_space {
7464 struct list_head pa_inode_list;
7465@@ -209,6 +195,11 @@ struct ext4_allocation_context {
7466 __u8 ac_op; /* operation, for history only */
7467 struct page *ac_bitmap_page;
7468 struct page *ac_buddy_page;
7469+ /*
7470+ * pointer to the held semaphore upon successful
7471+ * block allocation
7472+ */
7473+ struct rw_semaphore *alloc_semp;
7474 struct ext4_prealloc_space *ac_pa;
7475 struct ext4_locality_group *ac_lg;
7476 };
7477@@ -242,6 +233,7 @@ struct ext4_buddy {
7478 struct super_block *bd_sb;
7479 __u16 bd_blkbits;
7480 ext4_group_t bd_group;
7481+ struct rw_semaphore *alloc_semp;
7482 };
7483 #define EXT4_MB_BITMAP(e4b) ((e4b)->bd_bitmap)
7484 #define EXT4_MB_BUDDY(e4b) ((e4b)->bd_buddy)
7485@@ -251,53 +243,12 @@ static inline void ext4_mb_store_history
7486 {
7487 return;
7488 }
7489-#else
7490-static void ext4_mb_store_history(struct ext4_allocation_context *ac);
7491 #endif
7492
7493 #define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1)
7494
7495-static struct proc_dir_entry *proc_root_ext4;
7496 struct buffer_head *read_block_bitmap(struct super_block *, ext4_group_t);
7497-
7498-static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
7499- ext4_group_t group);
7500-static void ext4_mb_poll_new_transaction(struct super_block *, handle_t *);
7501-static void ext4_mb_free_committed_blocks(struct super_block *);
7502-static void ext4_mb_return_to_preallocation(struct inode *inode,
7503- struct ext4_buddy *e4b, sector_t block,
7504- int count);
7505-static void ext4_mb_put_pa(struct ext4_allocation_context *,
7506- struct super_block *, struct ext4_prealloc_space *pa);
7507-static int ext4_mb_init_per_dev_proc(struct super_block *sb);
7508-static int ext4_mb_destroy_per_dev_proc(struct super_block *sb);
7509-
7510-
7511-static inline void ext4_lock_group(struct super_block *sb, ext4_group_t group)
7512-{
7513- struct ext4_group_info *grinfo = ext4_get_group_info(sb, group);
7514-
7515- bit_spin_lock(EXT4_GROUP_INFO_LOCKED_BIT, &(grinfo->bb_state));
7516-}
7517-
7518-static inline void ext4_unlock_group(struct super_block *sb,
7519- ext4_group_t group)
7520-{
7521- struct ext4_group_info *grinfo = ext4_get_group_info(sb, group);
7522-
7523- bit_spin_unlock(EXT4_GROUP_INFO_LOCKED_BIT, &(grinfo->bb_state));
7524-}
7525-
7526-static inline int ext4_is_group_locked(struct super_block *sb,
7527- ext4_group_t group)
7528-{
7529- struct ext4_group_info *grinfo = ext4_get_group_info(sb, group);
7530-
7531- return bit_spin_is_locked(EXT4_GROUP_INFO_LOCKED_BIT,
7532- &(grinfo->bb_state));
7533-}
7534-
7535-static ext4_fsblk_t ext4_grp_offs_to_block(struct super_block *sb,
7536+static inline ext4_fsblk_t ext4_grp_offs_to_block(struct super_block *sb,
7537 struct ext4_free_extent *fex)
7538 {
7539 ext4_fsblk_t block;
7540diff -rup b/fs/ext4//namei.c a/fs/ext4///namei.c
7541--- b/fs/ext4/namei.c 2009-02-11 14:37:58.000000000 +0100
7542+++ a/fs/ext4/namei.c 2009-02-10 21:40:11.000000000 +0100
7543@@ -151,34 +151,36 @@ struct dx_map_entry
7544
7545 static inline ext4_lblk_t dx_get_block(struct dx_entry *entry);
7546 static void dx_set_block(struct dx_entry *entry, ext4_lblk_t value);
7547-static inline unsigned dx_get_hash (struct dx_entry *entry);
7548-static void dx_set_hash (struct dx_entry *entry, unsigned value);
7549-static unsigned dx_get_count (struct dx_entry *entries);
7550-static unsigned dx_get_limit (struct dx_entry *entries);
7551-static void dx_set_count (struct dx_entry *entries, unsigned value);
7552-static void dx_set_limit (struct dx_entry *entries, unsigned value);
7553-static unsigned dx_root_limit (struct inode *dir, unsigned infosize);
7554-static unsigned dx_node_limit (struct inode *dir);
7555-static struct dx_frame *dx_probe(struct dentry *dentry,
7556+static inline unsigned dx_get_hash(struct dx_entry *entry);
7557+static void dx_set_hash(struct dx_entry *entry, unsigned value);
7558+static unsigned dx_get_count(struct dx_entry *entries);
7559+static unsigned dx_get_limit(struct dx_entry *entries);
7560+static void dx_set_count(struct dx_entry *entries, unsigned value);
7561+static void dx_set_limit(struct dx_entry *entries, unsigned value);
7562+static unsigned dx_root_limit(struct inode *dir, unsigned infosize);
7563+static unsigned dx_node_limit(struct inode *dir);
7564+static struct dx_frame *dx_probe(const struct qstr *d_name,
7565 struct inode *dir,
7566 struct dx_hash_info *hinfo,
7567 struct dx_frame *frame,
7568 int *err);
7569-static void dx_release (struct dx_frame *frames);
7570-static int dx_make_map (struct ext4_dir_entry_2 *de, int size,
7571- struct dx_hash_info *hinfo, struct dx_map_entry map[]);
7572+static void dx_release(struct dx_frame *frames);
7573+static int dx_make_map(struct ext4_dir_entry_2 *de, int size,
7574+ struct dx_hash_info *hinfo, struct dx_map_entry map[]);
7575 static void dx_sort_map(struct dx_map_entry *map, unsigned count);
7576-static struct ext4_dir_entry_2 *dx_move_dirents (char *from, char *to,
7577+static struct ext4_dir_entry_2 *dx_move_dirents(char *from, char *to,
7578 struct dx_map_entry *offsets, int count);
7579-static struct ext4_dir_entry_2* dx_pack_dirents (char *base, int size);
7580+static struct ext4_dir_entry_2* dx_pack_dirents(char *base, int size);
7581 static void dx_insert_block(struct dx_frame *frame,
7582 u32 hash, ext4_lblk_t block);
7583 static int ext4_htree_next_block(struct inode *dir, __u32 hash,
7584 struct dx_frame *frame,
7585 struct dx_frame *frames,
7586 __u32 *start_hash);
7587-static struct buffer_head * ext4_dx_find_entry(struct dentry *dentry,
7588- struct ext4_dir_entry_2 **res_dir, int *err);
7589+static struct buffer_head * ext4_dx_find_entry(struct inode *dir,
7590+ const struct qstr *d_name,
7591+ struct ext4_dir_entry_2 **res_dir,
7592+ int *err);
7593 static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
7594 struct inode *inode);
7595
7596@@ -207,44 +209,44 @@ static inline void dx_set_block(struct d
7597 entry->block = cpu_to_le32(value);
7598 }
7599
7600-static inline unsigned dx_get_hash (struct dx_entry *entry)
7601+static inline unsigned dx_get_hash(struct dx_entry *entry)
7602 {
7603 return le32_to_cpu(entry->hash);
7604 }
7605
7606-static inline void dx_set_hash (struct dx_entry *entry, unsigned value)
7607+static inline void dx_set_hash(struct dx_entry *entry, unsigned value)
7608 {
7609 entry->hash = cpu_to_le32(value);
7610 }
7611
7612-static inline unsigned dx_get_count (struct dx_entry *entries)
7613+static inline unsigned dx_get_count(struct dx_entry *entries)
7614 {
7615 return le16_to_cpu(((struct dx_countlimit *) entries)->count);
7616 }
7617
7618-static inline unsigned dx_get_limit (struct dx_entry *entries)
7619+static inline unsigned dx_get_limit(struct dx_entry *entries)
7620 {
7621 return le16_to_cpu(((struct dx_countlimit *) entries)->limit);
7622 }
7623
7624-static inline void dx_set_count (struct dx_entry *entries, unsigned value)
7625+static inline void dx_set_count(struct dx_entry *entries, unsigned value)
7626 {
7627 ((struct dx_countlimit *) entries)->count = cpu_to_le16(value);
7628 }
7629
7630-static inline void dx_set_limit (struct dx_entry *entries, unsigned value)
7631+static inline void dx_set_limit(struct dx_entry *entries, unsigned value)
7632 {
7633 ((struct dx_countlimit *) entries)->limit = cpu_to_le16(value);
7634 }
7635
7636-static inline unsigned dx_root_limit (struct inode *dir, unsigned infosize)
7637+static inline unsigned dx_root_limit(struct inode *dir, unsigned infosize)
7638 {
7639 unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(1) -
7640 EXT4_DIR_REC_LEN(2) - infosize;
7641 return entry_space / sizeof(struct dx_entry);
7642 }
7643
7644-static inline unsigned dx_node_limit (struct inode *dir)
7645+static inline unsigned dx_node_limit(struct inode *dir)
7646 {
7647 unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(0);
7648 return entry_space / sizeof(struct dx_entry);
7649@@ -254,12 +256,12 @@ static inline unsigned dx_node_limit (st
7650 * Debug
7651 */
7652 #ifdef DX_DEBUG
7653-static void dx_show_index (char * label, struct dx_entry *entries)
7654+static void dx_show_index(char * label, struct dx_entry *entries)
7655 {
7656 int i, n = dx_get_count (entries);
7657- printk("%s index ", label);
7658+ printk(KERN_DEBUG "%s index ", label);
7659 for (i = 0; i < n; i++) {
7660- printk("%x->%lu ", i? dx_get_hash(entries + i) :
7661+ printk("%x->%lu ", i ? dx_get_hash(entries + i) :
7662 0, (unsigned long)dx_get_block(entries + i));
7663 }
7664 printk("\n");
7665@@ -306,7 +308,7 @@ struct stats dx_show_entries(struct dx_h
7666 struct dx_entry *entries, int levels)
7667 {
7668 unsigned blocksize = dir->i_sb->s_blocksize;
7669- unsigned count = dx_get_count (entries), names = 0, space = 0, i;
7670+ unsigned count = dx_get_count(entries), names = 0, space = 0, i;
7671 unsigned bcount = 0;
7672 struct buffer_head *bh;
7673 int err;
7674@@ -325,11 +327,12 @@ struct stats dx_show_entries(struct dx_h
7675 names += stats.names;
7676 space += stats.space;
7677 bcount += stats.bcount;
7678- brelse (bh);
7679+ brelse(bh);
7680 }
7681 if (bcount)
7682- printk("%snames %u, fullness %u (%u%%)\n", levels?"":" ",
7683- names, space/bcount,(space/bcount)*100/blocksize);
7684+ printk(KERN_DEBUG "%snames %u, fullness %u (%u%%)\n",
7685+ levels ? "" : " ", names, space/bcount,
7686+ (space/bcount)*100/blocksize);
7687 return (struct stats) { names, space, bcount};
7688 }
7689 #endif /* DX_DEBUG */
7690@@ -344,7 +347,7 @@ struct stats dx_show_entries(struct dx_h
7691 * back to userspace.
7692 */
7693 static struct dx_frame *
7694-dx_probe(struct dentry *dentry, struct inode *dir,
7695+dx_probe(const struct qstr *d_name, struct inode *dir,
7696 struct dx_hash_info *hinfo, struct dx_frame *frame_in, int *err)
7697 {
7698 unsigned count, indirect;
7699@@ -355,8 +358,6 @@ dx_probe(struct dentry *dentry, struct i
7700 u32 hash;
7701
7702 frame->bh = NULL;
7703- if (dentry)
7704- dir = dentry->d_parent->d_inode;
7705 if (!(bh = ext4_bread (NULL,dir, 0, 0, err)))
7706 goto fail;
7707 root = (struct dx_root *) bh->b_data;
7708@@ -371,9 +372,11 @@ dx_probe(struct dentry *dentry, struct i
7709 goto fail;
7710 }
7711 hinfo->hash_version = root->info.hash_version;
7712+ if (hinfo->hash_version <= DX_HASH_TEA)
7713+ hinfo->hash_version += EXT4_SB(dir->i_sb)->s_hash_unsigned;
7714 hinfo->seed = EXT4_SB(dir->i_sb)->s_hash_seed;
7715- if (dentry)
7716- ext4fs_dirhash(dentry->d_name.name, dentry->d_name.len, hinfo);
7717+ if (d_name)
7718+ ext4fs_dirhash(d_name->name, d_name->len, hinfo);
7719 hash = hinfo->hash;
7720
7721 if (root->info.unused_flags & 1) {
7722@@ -406,7 +409,7 @@ dx_probe(struct dentry *dentry, struct i
7723 goto fail;
7724 }
7725
7726- dxtrace (printk("Look up %x", hash));
7727+ dxtrace(printk("Look up %x", hash));
7728 while (1)
7729 {
7730 count = dx_get_count(entries);
7731@@ -555,7 +558,7 @@ static int ext4_htree_next_block(struct
7732 0, &err)))
7733 return err; /* Failure */
7734 p++;
7735- brelse (p->bh);
7736+ brelse(p->bh);
7737 p->bh = bh;
7738 p->at = p->entries = ((struct dx_node *) bh->b_data)->entries;
7739 }
7740@@ -593,7 +596,7 @@ static int htree_dirblock_to_tree(struct
7741 /* On error, skip the f_pos to the next block. */
7742 dir_file->f_pos = (dir_file->f_pos |
7743 (dir->i_sb->s_blocksize - 1)) + 1;
7744- brelse (bh);
7745+ brelse(bh);
7746 return count;
7747 }
7748 ext4fs_dirhash(de->name, de->name_len, hinfo);
7749@@ -635,11 +638,14 @@ int ext4_htree_fill_tree(struct file *di
7750 int ret, err;
7751 __u32 hashval;
7752
7753- dxtrace(printk("In htree_fill_tree, start hash: %x:%x\n", start_hash,
7754- start_minor_hash));
7755+ dxtrace(printk(KERN_DEBUG "In htree_fill_tree, start hash: %x:%x\n",
7756+ start_hash, start_minor_hash));
7757 dir = dir_file->f_path.dentry->d_inode;
7758 if (!(EXT4_I(dir)->i_flags & EXT4_INDEX_FL)) {
7759 hinfo.hash_version = EXT4_SB(dir->i_sb)->s_def_hash_version;
7760+ if (hinfo.hash_version <= DX_HASH_TEA)
7761+ hinfo.hash_version +=
7762+ EXT4_SB(dir->i_sb)->s_hash_unsigned;
7763 hinfo.seed = EXT4_SB(dir->i_sb)->s_hash_seed;
7764 count = htree_dirblock_to_tree(dir_file, dir, 0, &hinfo,
7765 start_hash, start_minor_hash);
7766@@ -648,7 +654,7 @@ int ext4_htree_fill_tree(struct file *di
7767 }
7768 hinfo.hash = start_hash;
7769 hinfo.minor_hash = 0;
7770- frame = dx_probe(NULL, dir_file->f_path.dentry->d_inode, &hinfo, frames, &err);
7771+ frame = dx_probe(NULL, dir, &hinfo, frames, &err);
7772 if (!frame)
7773 return err;
7774
7775@@ -694,8 +700,8 @@ int ext4_htree_fill_tree(struct file *di
7776 break;
7777 }
7778 dx_release(frames);
7779- dxtrace(printk("Fill tree: returned %d entries, next hash: %x\n",
7780- count, *next_hash));
7781+ dxtrace(printk(KERN_DEBUG "Fill tree: returned %d entries, "
7782+ "next hash: %x\n", count, *next_hash));
7783 return count;
7784 errout:
7785 dx_release(frames);
7786@@ -802,17 +808,17 @@ static inline int ext4_match (int len, c
7787 /*
7788 * Returns 0 if not found, -1 on failure, and 1 on success
7789 */
7790-static inline int search_dirblock(struct buffer_head * bh,
7791+static inline int search_dirblock(struct buffer_head *bh,
7792 struct inode *dir,
7793- struct dentry *dentry,
7794- unsigned long offset,
7795+ const struct qstr *d_name,
7796+ unsigned int offset,
7797 struct ext4_dir_entry_2 ** res_dir)
7798 {
7799 struct ext4_dir_entry_2 * de;
7800 char * dlimit;
7801 int de_len;
7802- const char *name = dentry->d_name.name;
7803- int namelen = dentry->d_name.len;
7804+ const char *name = d_name->name;
7805+ int namelen = d_name->len;
7806
7807 de = (struct ext4_dir_entry_2 *) bh->b_data;
7808 dlimit = bh->b_data + dir->i_sb->s_blocksize;
7809@@ -851,12 +857,13 @@ static inline int search_dirblock(struct
7810 * The returned buffer_head has ->b_count elevated. The caller is expected
7811 * to brelse() it when appropriate.
7812 */
7813-static struct buffer_head * ext4_find_entry (struct dentry *dentry,
7814+static struct buffer_head * ext4_find_entry (struct inode *dir,
7815+ const struct qstr *d_name,
7816 struct ext4_dir_entry_2 ** res_dir)
7817 {
7818- struct super_block * sb;
7819- struct buffer_head * bh_use[NAMEI_RA_SIZE];
7820- struct buffer_head * bh, *ret = NULL;
7821+ struct super_block *sb;
7822+ struct buffer_head *bh_use[NAMEI_RA_SIZE];
7823+ struct buffer_head *bh, *ret = NULL;
7824 ext4_lblk_t start, block, b;
7825 int ra_max = 0; /* Number of bh's in the readahead
7826 buffer, bh_use[] */
7827@@ -865,16 +872,15 @@ static struct buffer_head * ext4_find_en
7828 int num = 0;
7829 ext4_lblk_t nblocks;
7830 int i, err;
7831- struct inode *dir = dentry->d_parent->d_inode;
7832 int namelen;
7833
7834 *res_dir = NULL;
7835 sb = dir->i_sb;
7836- namelen = dentry->d_name.len;
7837+ namelen = d_name->len;
7838 if (namelen > EXT4_NAME_LEN)
7839 return NULL;
7840 if (is_dx(dir)) {
7841- bh = ext4_dx_find_entry(dentry, res_dir, &err);
7842+ bh = ext4_dx_find_entry(dir, d_name, res_dir, &err);
7843 /*
7844 * On success, or if the error was file not found,
7845 * return. Otherwise, fall back to doing a search the
7846@@ -882,7 +888,8 @@ static struct buffer_head * ext4_find_en
7847 */
7848 if (bh || (err != ERR_BAD_DX_DIR))
7849 return bh;
7850- dxtrace(printk("ext4_find_entry: dx failed, falling back\n"));
7851+ dxtrace(printk(KERN_DEBUG "ext4_find_entry: dx failed, "
7852+ "falling back\n"));
7853 }
7854 nblocks = dir->i_size >> EXT4_BLOCK_SIZE_BITS(sb);
7855 start = EXT4_I(dir)->i_dir_start_lookup;
7856@@ -926,7 +933,7 @@ restart:
7857 brelse(bh);
7858 goto next;
7859 }
7860- i = search_dirblock(bh, dir, dentry,
7861+ i = search_dirblock(bh, dir, d_name,
7862 block << EXT4_BLOCK_SIZE_BITS(sb), res_dir);
7863 if (i == 1) {
7864 EXT4_I(dir)->i_dir_start_lookup = block;
7865@@ -956,11 +963,11 @@ restart:
7866 cleanup_and_exit:
7867 /* Clean up the read-ahead blocks */
7868 for (; ra_ptr < ra_max; ra_ptr++)
7869- brelse (bh_use[ra_ptr]);
7870+ brelse(bh_use[ra_ptr]);
7871 return ret;
7872 }
7873
7874-static struct buffer_head * ext4_dx_find_entry(struct dentry *dentry,
7875+static struct buffer_head * ext4_dx_find_entry(struct inode *dir, const struct qstr *d_name,
7876 struct ext4_dir_entry_2 **res_dir, int *err)
7877 {
7878 struct super_block * sb;
7879@@ -971,14 +978,13 @@ static struct buffer_head * ext4_dx_find
7880 struct buffer_head *bh;
7881 ext4_lblk_t block;
7882 int retval;
7883- int namelen = dentry->d_name.len;
7884- const u8 *name = dentry->d_name.name;
7885- struct inode *dir = dentry->d_parent->d_inode;
7886+ int namelen = d_name->len;
7887+ const u8 *name = d_name->name;
7888
7889 sb = dir->i_sb;
7890 /* NFS may look up ".." - look at dx_root directory block */
7891 if (namelen > 2 || name[0] != '.'||(name[1] != '.' && name[1] != '\0')){
7892- if (!(frame = dx_probe(dentry, NULL, &hinfo, frames, err)))
7893+ if (!(frame = dx_probe(d_name, dir, &hinfo, frames, err)))
7894 return NULL;
7895 } else {
7896 frame = frames;
7897@@ -1010,7 +1016,7 @@ static struct buffer_head * ext4_dx_find
7898 return bh;
7899 }
7900 }
7901- brelse (bh);
7902+ brelse(bh);
7903 /* Check to see if we should continue to search */
7904 retval = ext4_htree_next_block(dir, hash, frame,
7905 frames, NULL);
7906@@ -1025,28 +1031,28 @@ static struct buffer_head * ext4_dx_find
7907
7908 *err = -ENOENT;
7909 errout:
7910- dxtrace(printk("%s not found\n", name));
7911+ dxtrace(printk(KERN_DEBUG "%s not found\n", name));
7912 dx_release (frames);
7913 return NULL;
7914 }
7915
7916-static struct dentry *ext4_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd)
7917+static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
7918 {
7919- struct inode * inode;
7920- struct ext4_dir_entry_2 * de;
7921- struct buffer_head * bh;
7922+ struct inode *inode;
7923+ struct ext4_dir_entry_2 *de;
7924+ struct buffer_head *bh;
7925
7926 if (dentry->d_name.len > EXT4_NAME_LEN)
7927 return ERR_PTR(-ENAMETOOLONG);
7928
7929- bh = ext4_find_entry(dentry, &de);
7930+ bh = ext4_find_entry(dir, &dentry->d_name, &de);
7931 inode = NULL;
7932 if (bh) {
7933- unsigned long ino = le32_to_cpu(de->inode);
7934- brelse (bh);
7935+ __u32 ino = le32_to_cpu(de->inode);
7936+ brelse(bh);
7937 if (!ext4_valid_inum(dir->i_sb, ino)) {
7938 ext4_error(dir->i_sb, "ext4_lookup",
7939- "bad inode number: %lu", ino);
7940+ "bad inode number: %u", ino);
7941 return ERR_PTR(-EIO);
7942 }
7943 inode = ext4_iget(dir->i_sb, ino);
7944@@ -1059,18 +1065,17 @@ static struct dentry *ext4_lookup(struct
7945
7946 struct dentry *ext4_get_parent(struct dentry *child)
7947 {
7948- unsigned long ino;
7949+ __u32 ino;
7950 struct dentry *parent;
7951 struct inode *inode;
7952- struct dentry dotdot;
7953+ static const struct qstr dotdot = {
7954+ .name = "..",
7955+ .len = 2,
7956+ };
7957 struct ext4_dir_entry_2 * de;
7958 struct buffer_head *bh;
7959
7960- dotdot.d_name.name = "..";
7961- dotdot.d_name.len = 2;
7962- dotdot.d_parent = child; /* confusing, isn't it! */
7963-
7964- bh = ext4_find_entry(&dotdot, &de);
7965+ bh = ext4_find_entry(child->d_inode, &dotdot, &de);
7966 inode = NULL;
7967 if (!bh)
7968 return ERR_PTR(-ENOENT);
7969@@ -1079,7 +1084,7 @@ struct dentry *ext4_get_parent(struct de
7970
7971 if (!ext4_valid_inum(child->d_inode->i_sb, ino)) {
7972 ext4_error(child->d_inode->i_sb, "ext4_get_parent",
7973- "bad inode number: %lu", ino);
7974+ "bad inode number: %u", ino);
7975 return ERR_PTR(-EIO);
7976 }
7977
7978@@ -1176,9 +1181,9 @@ static struct ext4_dir_entry_2 *do_split
7979 u32 hash2;
7980 struct dx_map_entry *map;
7981 char *data1 = (*bh)->b_data, *data2;
7982- unsigned split, move, size, i;
7983+ unsigned split, move, size;
7984 struct ext4_dir_entry_2 *de = NULL, *de2;
7985- int err = 0;
7986+ int err = 0, i;
7987
7988 bh2 = ext4_append (handle, dir, &newblock, &err);
7989 if (!(bh2)) {
7990@@ -1201,10 +1206,10 @@ static struct ext4_dir_entry_2 *do_split
7991
7992 /* create map in the end of data2 block */
7993 map = (struct dx_map_entry *) (data2 + blocksize);
7994- count = dx_make_map ((struct ext4_dir_entry_2 *) data1,
7995+ count = dx_make_map((struct ext4_dir_entry_2 *) data1,
7996 blocksize, hinfo, map);
7997 map -= count;
7998- dx_sort_map (map, count);
7999+ dx_sort_map(map, count);
8000 /* Split the existing block in the middle, size-wise */
8001 size = 0;
8002 move = 0;
8003@@ -1225,7 +1230,7 @@ static struct ext4_dir_entry_2 *do_split
8004
8005 /* Fancy dance to stay within two buffers */
8006 de2 = dx_move_dirents(data1, data2, map + split, count - split);
8007- de = dx_pack_dirents(data1,blocksize);
8008+ de = dx_pack_dirents(data1, blocksize);
8009 de->rec_len = ext4_rec_len_to_disk(data1 + blocksize - (char *) de);
8010 de2->rec_len = ext4_rec_len_to_disk(data2 + blocksize - (char *) de2);
8011 dxtrace(dx_show_leaf (hinfo, (struct ext4_dir_entry_2 *) data1, blocksize, 1));
8012@@ -1237,15 +1242,15 @@ static struct ext4_dir_entry_2 *do_split
8013 swap(*bh, bh2);
8014 de = de2;
8015 }
8016- dx_insert_block (frame, hash2 + continued, newblock);
8017- err = ext4_journal_dirty_metadata (handle, bh2);
8018+ dx_insert_block(frame, hash2 + continued, newblock);
8019+ err = ext4_journal_dirty_metadata(handle, bh2);
8020 if (err)
8021 goto journal_error;
8022- err = ext4_journal_dirty_metadata (handle, frame->bh);
8023+ err = ext4_journal_dirty_metadata(handle, frame->bh);
8024 if (err)
8025 goto journal_error;
8026- brelse (bh2);
8027- dxtrace(dx_show_index ("frame", frame->entries));
8028+ brelse(bh2);
8029+ dxtrace(dx_show_index("frame", frame->entries));
8030 return de;
8031
8032 journal_error:
8033@@ -1271,12 +1276,12 @@ errout:
8034 */
8035 static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
8036 struct inode *inode, struct ext4_dir_entry_2 *de,
8037- struct buffer_head * bh)
8038+ struct buffer_head *bh)
8039 {
8040 struct inode *dir = dentry->d_parent->d_inode;
8041 const char *name = dentry->d_name.name;
8042 int namelen = dentry->d_name.len;
8043- unsigned long offset = 0;
8044+ unsigned int offset = 0;
8045 unsigned short reclen;
8046 int nlen, rlen, err;
8047 char *top;
8048@@ -1288,11 +1293,11 @@ static int add_dirent_to_buf(handle_t *h
8049 while ((char *) de <= top) {
8050 if (!ext4_check_dir_entry("ext4_add_entry", dir, de,
8051 bh, offset)) {
8052- brelse (bh);
8053+ brelse(bh);
8054 return -EIO;
8055 }
8056- if (ext4_match (namelen, name, de)) {
8057- brelse (bh);
8058+ if (ext4_match(namelen, name, de)) {
8059+ brelse(bh);
8060 return -EEXIST;
8061 }
8062 nlen = EXT4_DIR_REC_LEN(de->name_len);
8063@@ -1329,7 +1334,7 @@ static int add_dirent_to_buf(handle_t *h
8064 } else
8065 de->inode = 0;
8066 de->name_len = namelen;
8067- memcpy (de->name, name, namelen);
8068+ memcpy(de->name, name, namelen);
8069 /*
8070 * XXX shouldn't update any times until successful
8071 * completion of syscall, but too many callers depend
8072@@ -1377,7 +1382,7 @@ static int make_indexed_dir(handle_t *ha
8073 struct fake_dirent *fde;
8074
8075 blocksize = dir->i_sb->s_blocksize;
8076- dxtrace(printk("Creating index\n"));
8077+ dxtrace(printk(KERN_DEBUG "Creating index\n"));
8078 retval = ext4_journal_get_write_access(handle, bh);
8079 if (retval) {
8080 ext4_std_error(dir->i_sb, retval);
8081@@ -1386,7 +1391,7 @@ static int make_indexed_dir(handle_t *ha
8082 }
8083 root = (struct dx_root *) bh->b_data;
8084
8085- bh2 = ext4_append (handle, dir, &block, &retval);
8086+ bh2 = ext4_append(handle, dir, &block, &retval);
8087 if (!(bh2)) {
8088 brelse(bh);
8089 return retval;
8090@@ -1412,12 +1417,14 @@ static int make_indexed_dir(handle_t *ha
8091 root->info.info_length = sizeof(root->info);
8092 root->info.hash_version = EXT4_SB(dir->i_sb)->s_def_hash_version;
8093 entries = root->entries;
8094- dx_set_block (entries, 1);
8095- dx_set_count (entries, 1);
8096- dx_set_limit (entries, dx_root_limit(dir, sizeof(root->info)));
8097+ dx_set_block(entries, 1);
8098+ dx_set_count(entries, 1);
8099+ dx_set_limit(entries, dx_root_limit(dir, sizeof(root->info)));
8100
8101 /* Initialize as for dx_probe */
8102 hinfo.hash_version = root->info.hash_version;
8103+ if (hinfo.hash_version <= DX_HASH_TEA)
8104+ hinfo.hash_version += EXT4_SB(dir->i_sb)->s_hash_unsigned;
8105 hinfo.seed = EXT4_SB(dir->i_sb)->s_hash_seed;
8106 ext4fs_dirhash(name, namelen, &hinfo);
8107 frame = frames;
8108@@ -1443,14 +1450,13 @@ static int make_indexed_dir(handle_t *ha
8109 * may not sleep between calling this and putting something into
8110 * the entry, as someone else might have used it while you slept.
8111 */
8112-static int ext4_add_entry (handle_t *handle, struct dentry *dentry,
8113- struct inode *inode)
8114+static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
8115+ struct inode *inode)
8116 {
8117 struct inode *dir = dentry->d_parent->d_inode;
8118- unsigned long offset;
8119- struct buffer_head * bh;
8120+ struct buffer_head *bh;
8121 struct ext4_dir_entry_2 *de;
8122- struct super_block * sb;
8123+ struct super_block *sb;
8124 int retval;
8125 int dx_fallback=0;
8126 unsigned blocksize;
8127@@ -1469,7 +1475,7 @@ static int ext4_add_entry (handle_t *han
8128 ext4_mark_inode_dirty(handle, dir);
8129 }
8130 blocks = dir->i_size >> sb->s_blocksize_bits;
8131- for (block = 0, offset = 0; block < blocks; block++) {
8132+ for (block = 0; block < blocks; block++) {
8133 bh = ext4_bread(handle, dir, block, 0, &retval);
8134 if(!bh)
8135 return retval;
8136@@ -1500,13 +1506,13 @@ static int ext4_dx_add_entry(handle_t *h
8137 struct dx_frame frames[2], *frame;
8138 struct dx_entry *entries, *at;
8139 struct dx_hash_info hinfo;
8140- struct buffer_head * bh;
8141+ struct buffer_head *bh;
8142 struct inode *dir = dentry->d_parent->d_inode;
8143- struct super_block * sb = dir->i_sb;
8144+ struct super_block *sb = dir->i_sb;
8145 struct ext4_dir_entry_2 *de;
8146 int err;
8147
8148- frame = dx_probe(dentry, NULL, &hinfo, frames, &err);
8149+ frame = dx_probe(&dentry->d_name, dir, &hinfo, frames, &err);
8150 if (!frame)
8151 return err;
8152 entries = frame->entries;
8153@@ -1527,7 +1533,7 @@ static int ext4_dx_add_entry(handle_t *h
8154 }
8155
8156 /* Block full, should compress but for now just split */
8157- dxtrace(printk("using %u of %u node entries\n",
8158+ dxtrace(printk(KERN_DEBUG "using %u of %u node entries\n",
8159 dx_get_count(entries), dx_get_limit(entries)));
8160 /* Need to split index? */
8161 if (dx_get_count(entries) == dx_get_limit(entries)) {
8162@@ -1559,7 +1565,8 @@ static int ext4_dx_add_entry(handle_t *h
8163 if (levels) {
8164 unsigned icount1 = icount/2, icount2 = icount - icount1;
8165 unsigned hash2 = dx_get_hash(entries + icount1);
8166- dxtrace(printk("Split index %i/%i\n", icount1, icount2));
8167+ dxtrace(printk(KERN_DEBUG "Split index %i/%i\n",
8168+ icount1, icount2));
8169
8170 BUFFER_TRACE(frame->bh, "get_write_access"); /* index root */
8171 err = ext4_journal_get_write_access(handle,
8172@@ -1567,11 +1574,11 @@ static int ext4_dx_add_entry(handle_t *h
8173 if (err)
8174 goto journal_error;
8175
8176- memcpy ((char *) entries2, (char *) (entries + icount1),
8177- icount2 * sizeof(struct dx_entry));
8178- dx_set_count (entries, icount1);
8179- dx_set_count (entries2, icount2);
8180- dx_set_limit (entries2, dx_node_limit(dir));
8181+ memcpy((char *) entries2, (char *) (entries + icount1),
8182+ icount2 * sizeof(struct dx_entry));
8183+ dx_set_count(entries, icount1);
8184+ dx_set_count(entries2, icount2);
8185+ dx_set_limit(entries2, dx_node_limit(dir));
8186
8187 /* Which index block gets the new entry? */
8188 if (at - entries >= icount1) {
8189@@ -1579,16 +1586,17 @@ static int ext4_dx_add_entry(handle_t *h
8190 frame->entries = entries = entries2;
8191 swap(frame->bh, bh2);
8192 }
8193- dx_insert_block (frames + 0, hash2, newblock);
8194- dxtrace(dx_show_index ("node", frames[1].entries));
8195- dxtrace(dx_show_index ("node",
8196+ dx_insert_block(frames + 0, hash2, newblock);
8197+ dxtrace(dx_show_index("node", frames[1].entries));
8198+ dxtrace(dx_show_index("node",
8199 ((struct dx_node *) bh2->b_data)->entries));
8200 err = ext4_journal_dirty_metadata(handle, bh2);
8201 if (err)
8202 goto journal_error;
8203 brelse (bh2);
8204 } else {
8205- dxtrace(printk("Creating second level index...\n"));
8206+ dxtrace(printk(KERN_DEBUG
8207+ "Creating second level index...\n"));
8208 memcpy((char *) entries2, (char *) entries,
8209 icount * sizeof(struct dx_entry));
8210 dx_set_limit(entries2, dx_node_limit(dir));
8211@@ -1630,12 +1638,12 @@ cleanup:
8212 * ext4_delete_entry deletes a directory entry by merging it with the
8213 * previous entry
8214 */
8215-static int ext4_delete_entry (handle_t *handle,
8216- struct inode * dir,
8217- struct ext4_dir_entry_2 * de_del,
8218- struct buffer_head * bh)
8219+static int ext4_delete_entry(handle_t *handle,
8220+ struct inode *dir,
8221+ struct ext4_dir_entry_2 *de_del,
8222+ struct buffer_head *bh)
8223 {
8224- struct ext4_dir_entry_2 * de, * pde;
8225+ struct ext4_dir_entry_2 *de, *pde;
8226 int i;
8227
8228 i = 0;
8229@@ -1716,11 +1724,11 @@ static int ext4_add_nondir(handle_t *han
8230 * If the create succeeds, we fill in the inode information
8231 * with d_instantiate().
8232 */
8233-static int ext4_create (struct inode * dir, struct dentry * dentry, int mode,
8234- struct nameidata *nd)
8235+static int ext4_create(struct inode *dir, struct dentry *dentry, int mode,
8236+ struct nameidata *nd)
8237 {
8238 handle_t *handle;
8239- struct inode * inode;
8240+ struct inode *inode;
8241 int err, retries = 0;
8242
8243 retry:
8244@@ -1747,8 +1755,8 @@ retry:
8245 return err;
8246 }
8247
8248-static int ext4_mknod (struct inode * dir, struct dentry *dentry,
8249- int mode, dev_t rdev)
8250+static int ext4_mknod(struct inode *dir, struct dentry *dentry,
8251+ int mode, dev_t rdev)
8252 {
8253 handle_t *handle;
8254 struct inode *inode;
8255@@ -1767,11 +1775,11 @@ retry:
8256 if (IS_DIRSYNC(dir))
8257 handle->h_sync = 1;
8258
8259- inode = ext4_new_inode (handle, dir, mode);
8260+ inode = ext4_new_inode(handle, dir, mode);
8261 err = PTR_ERR(inode);
8262 if (!IS_ERR(inode)) {
8263 init_special_inode(inode, inode->i_mode, rdev);
8264-#ifdef CONFIG_EXT4DEV_FS_XATTR
8265+#if defined(CONFIG_EXT4_FS_XATTR) || defined(CONFIG_EXT4DEV_FS_XATTR)
8266 inode->i_op = &ext4_special_inode_operations;
8267 #endif
8268 err = ext4_add_nondir(handle, dentry, inode);
8269@@ -1782,12 +1790,12 @@ retry:
8270 return err;
8271 }
8272
8273-static int ext4_mkdir(struct inode * dir, struct dentry * dentry, int mode)
8274+static int ext4_mkdir(struct inode *dir, struct dentry *dentry, int mode)
8275 {
8276 handle_t *handle;
8277- struct inode * inode;
8278- struct buffer_head * dir_block;
8279- struct ext4_dir_entry_2 * de;
8280+ struct inode *inode;
8281+ struct buffer_head *dir_block;
8282+ struct ext4_dir_entry_2 *de;
8283 int err, retries = 0;
8284
8285 if (EXT4_DIR_LINK_MAX(dir))
8286@@ -1803,7 +1811,7 @@ retry:
8287 if (IS_DIRSYNC(dir))
8288 handle->h_sync = 1;
8289
8290- inode = ext4_new_inode (handle, dir, S_IFDIR | mode);
8291+ inode = ext4_new_inode(handle, dir, S_IFDIR | mode);
8292 err = PTR_ERR(inode);
8293 if (IS_ERR(inode))
8294 goto out_stop;
8295@@ -1811,7 +1819,7 @@ retry:
8296 inode->i_op = &ext4_dir_inode_operations;
8297 inode->i_fop = &ext4_dir_operations;
8298 inode->i_size = EXT4_I(inode)->i_disksize = inode->i_sb->s_blocksize;
8299- dir_block = ext4_bread (handle, inode, 0, 1, &err);
8300+ dir_block = ext4_bread(handle, inode, 0, 1, &err);
8301 if (!dir_block)
8302 goto out_clear_inode;
8303 BUFFER_TRACE(dir_block, "get_write_access");
8304@@ -1820,26 +1828,26 @@ retry:
8305 de->inode = cpu_to_le32(inode->i_ino);
8306 de->name_len = 1;
8307 de->rec_len = ext4_rec_len_to_disk(EXT4_DIR_REC_LEN(de->name_len));
8308- strcpy (de->name, ".");
8309+ strcpy(de->name, ".");
8310 ext4_set_de_type(dir->i_sb, de, S_IFDIR);
8311 de = ext4_next_entry(de);
8312 de->inode = cpu_to_le32(dir->i_ino);
8313 de->rec_len = ext4_rec_len_to_disk(inode->i_sb->s_blocksize -
8314 EXT4_DIR_REC_LEN(1));
8315 de->name_len = 2;
8316- strcpy (de->name, "..");
8317+ strcpy(de->name, "..");
8318 ext4_set_de_type(dir->i_sb, de, S_IFDIR);
8319 inode->i_nlink = 2;
8320 BUFFER_TRACE(dir_block, "call ext4_journal_dirty_metadata");
8321 ext4_journal_dirty_metadata(handle, dir_block);
8322- brelse (dir_block);
8323+ brelse(dir_block);
8324 ext4_mark_inode_dirty(handle, inode);
8325- err = ext4_add_entry (handle, dentry, inode);
8326+ err = ext4_add_entry(handle, dentry, inode);
8327 if (err) {
8328 out_clear_inode:
8329 clear_nlink(inode);
8330 ext4_mark_inode_dirty(handle, inode);
8331- iput (inode);
8332+ iput(inode);
8333 goto out_stop;
8334 }
8335 ext4_inc_count(handle, dir);
8336@@ -1856,17 +1864,17 @@ out_stop:
8337 /*
8338 * routine to check that the specified directory is empty (for rmdir)
8339 */
8340-static int empty_dir (struct inode * inode)
8341+static int empty_dir(struct inode *inode)
8342 {
8343- unsigned long offset;
8344- struct buffer_head * bh;
8345- struct ext4_dir_entry_2 * de, * de1;
8346- struct super_block * sb;
8347+ unsigned int offset;
8348+ struct buffer_head *bh;
8349+ struct ext4_dir_entry_2 *de, *de1;
8350+ struct super_block *sb;
8351 int err = 0;
8352
8353 sb = inode->i_sb;
8354 if (inode->i_size < EXT4_DIR_REC_LEN(1) + EXT4_DIR_REC_LEN(2) ||
8355- !(bh = ext4_bread (NULL, inode, 0, 0, &err))) {
8356+ !(bh = ext4_bread(NULL, inode, 0, 0, &err))) {
8357 if (err)
8358 ext4_error(inode->i_sb, __func__,
8359 "error %d reading directory #%lu offset 0",
8360@@ -1881,29 +1889,29 @@ static int empty_dir (struct inode * ino
8361 de1 = ext4_next_entry(de);
8362 if (le32_to_cpu(de->inode) != inode->i_ino ||
8363 !le32_to_cpu(de1->inode) ||
8364- strcmp (".", de->name) ||
8365- strcmp ("..", de1->name)) {
8366- ext4_warning (inode->i_sb, "empty_dir",
8367- "bad directory (dir #%lu) - no `.' or `..'",
8368- inode->i_ino);
8369- brelse (bh);
8370+ strcmp(".", de->name) ||
8371+ strcmp("..", de1->name)) {
8372+ ext4_warning(inode->i_sb, "empty_dir",
8373+ "bad directory (dir #%lu) - no `.' or `..'",
8374+ inode->i_ino);
8375+ brelse(bh);
8376 return 1;
8377 }
8378 offset = ext4_rec_len_from_disk(de->rec_len) +
8379 ext4_rec_len_from_disk(de1->rec_len);
8380 de = ext4_next_entry(de1);
8381- while (offset < inode->i_size ) {
8382+ while (offset < inode->i_size) {
8383 if (!bh ||
8384 (void *) de >= (void *) (bh->b_data+sb->s_blocksize)) {
8385 err = 0;
8386- brelse (bh);
8387- bh = ext4_bread (NULL, inode,
8388+ brelse(bh);
8389+ bh = ext4_bread(NULL, inode,
8390 offset >> EXT4_BLOCK_SIZE_BITS(sb), 0, &err);
8391 if (!bh) {
8392 if (err)
8393 ext4_error(sb, __func__,
8394 "error %d reading directory"
8395- " #%lu offset %lu",
8396+ " #%lu offset %u",
8397 err, inode->i_ino, offset);
8398 offset += sb->s_blocksize;
8399 continue;
8400@@ -1917,13 +1925,13 @@ static int empty_dir (struct inode * ino
8401 continue;
8402 }
8403 if (le32_to_cpu(de->inode)) {
8404- brelse (bh);
8405+ brelse(bh);
8406 return 0;
8407 }
8408 offset += ext4_rec_len_from_disk(de->rec_len);
8409 de = ext4_next_entry(de);
8410 }
8411- brelse (bh);
8412+ brelse(bh);
8413 return 1;
8414 }
8415
8416@@ -1954,8 +1962,8 @@ int ext4_orphan_add(handle_t *handle, st
8417 * ->i_nlink. For, say it, character device. Not a regular file,
8418 * not a directory, not a symlink and ->i_nlink > 0.
8419 */
8420- J_ASSERT ((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
8421- S_ISLNK(inode->i_mode)) || inode->i_nlink == 0);
8422+ J_ASSERT((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
8423+ S_ISLNK(inode->i_mode)) || inode->i_nlink == 0);
8424
8425 BUFFER_TRACE(EXT4_SB(sb)->s_sbh, "get_write_access");
8426 err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh);
8427@@ -2003,7 +2011,7 @@ int ext4_orphan_del(handle_t *handle, st
8428 struct list_head *prev;
8429 struct ext4_inode_info *ei = EXT4_I(inode);
8430 struct ext4_sb_info *sbi;
8431- unsigned long ino_next;
8432+ __u32 ino_next;
8433 struct ext4_iloc iloc;
8434 int err = 0;
8435
8436@@ -2033,7 +2041,7 @@ int ext4_orphan_del(handle_t *handle, st
8437 goto out_err;
8438
8439 if (prev == &sbi->s_orphan) {
8440- jbd_debug(4, "superblock will point to %lu\n", ino_next);
8441+ jbd_debug(4, "superblock will point to %u\n", ino_next);
8442 BUFFER_TRACE(sbi->s_sbh, "get_write_access");
8443 err = ext4_journal_get_write_access(handle, sbi->s_sbh);
8444 if (err)
8445@@ -2045,7 +2053,7 @@ int ext4_orphan_del(handle_t *handle, st
8446 struct inode *i_prev =
8447 &list_entry(prev, struct ext4_inode_info, i_orphan)->vfs_inode;
8448
8449- jbd_debug(4, "orphan inode %lu will point to %lu\n",
8450+ jbd_debug(4, "orphan inode %lu will point to %u\n",
8451 i_prev->i_ino, ino_next);
8452 err = ext4_reserve_inode_write(handle, i_prev, &iloc2);
8453 if (err)
8454@@ -2069,12 +2077,12 @@ out_brelse:
8455 goto out_err;
8456 }
8457
8458-static int ext4_rmdir (struct inode * dir, struct dentry *dentry)
8459+static int ext4_rmdir(struct inode *dir, struct dentry *dentry)
8460 {
8461 int retval;
8462- struct inode * inode;
8463- struct buffer_head * bh;
8464- struct ext4_dir_entry_2 * de;
8465+ struct inode *inode;
8466+ struct buffer_head *bh;
8467+ struct ext4_dir_entry_2 *de;
8468 handle_t *handle;
8469
8470 /* Initialize quotas before so that eventual writes go in
8471@@ -2085,7 +2093,7 @@ static int ext4_rmdir (struct inode * di
8472 return PTR_ERR(handle);
8473
8474 retval = -ENOENT;
8475- bh = ext4_find_entry (dentry, &de);
8476+ bh = ext4_find_entry(dir, &dentry->d_name, &de);
8477 if (!bh)
8478 goto end_rmdir;
8479
8480@@ -2099,16 +2107,16 @@ static int ext4_rmdir (struct inode * di
8481 goto end_rmdir;
8482
8483 retval = -ENOTEMPTY;
8484- if (!empty_dir (inode))
8485+ if (!empty_dir(inode))
8486 goto end_rmdir;
8487
8488 retval = ext4_delete_entry(handle, dir, de, bh);
8489 if (retval)
8490 goto end_rmdir;
8491 if (!EXT4_DIR_LINK_EMPTY(inode))
8492- ext4_warning (inode->i_sb, "ext4_rmdir",
8493- "empty directory has too many links (%d)",
8494- inode->i_nlink);
8495+ ext4_warning(inode->i_sb, "ext4_rmdir",
8496+ "empty directory has too many links (%d)",
8497+ inode->i_nlink);
8498 inode->i_version++;
8499 clear_nlink(inode);
8500 /* There's no need to set i_disksize: the fact that i_nlink is
8501@@ -2124,16 +2132,16 @@ static int ext4_rmdir (struct inode * di
8502
8503 end_rmdir:
8504 ext4_journal_stop(handle);
8505- brelse (bh);
8506+ brelse(bh);
8507 return retval;
8508 }
8509
8510-static int ext4_unlink(struct inode * dir, struct dentry *dentry)
8511+static int ext4_unlink(struct inode *dir, struct dentry *dentry)
8512 {
8513 int retval;
8514- struct inode * inode;
8515- struct buffer_head * bh;
8516- struct ext4_dir_entry_2 * de;
8517+ struct inode *inode;
8518+ struct buffer_head *bh;
8519+ struct ext4_dir_entry_2 *de;
8520 handle_t *handle;
8521
8522 /* Initialize quotas before so that eventual writes go
8523@@ -2147,7 +2155,7 @@ static int ext4_unlink(struct inode * di
8524 handle->h_sync = 1;
8525
8526 retval = -ENOENT;
8527- bh = ext4_find_entry (dentry, &de);
8528+ bh = ext4_find_entry(dir, &dentry->d_name, &de);
8529 if (!bh)
8530 goto end_unlink;
8531
8532@@ -2158,9 +2166,9 @@ static int ext4_unlink(struct inode * di
8533 goto end_unlink;
8534
8535 if (!inode->i_nlink) {
8536- ext4_warning (inode->i_sb, "ext4_unlink",
8537- "Deleting nonexistent file (%lu), %d",
8538- inode->i_ino, inode->i_nlink);
8539+ ext4_warning(inode->i_sb, "ext4_unlink",
8540+ "Deleting nonexistent file (%lu), %d",
8541+ inode->i_ino, inode->i_nlink);
8542 inode->i_nlink = 1;
8543 }
8544 retval = ext4_delete_entry(handle, dir, de, bh);
8545@@ -2178,15 +2186,15 @@ static int ext4_unlink(struct inode * di
8546
8547 end_unlink:
8548 ext4_journal_stop(handle);
8549- brelse (bh);
8550+ brelse(bh);
8551 return retval;
8552 }
8553
8554-static int ext4_symlink (struct inode * dir,
8555- struct dentry *dentry, const char * symname)
8556+static int ext4_symlink(struct inode *dir,
8557+ struct dentry *dentry, const char *symname)
8558 {
8559 handle_t *handle;
8560- struct inode * inode;
8561+ struct inode *inode;
8562 int l, err, retries = 0;
8563
8564 l = strlen(symname)+1;
8565@@ -2203,12 +2211,12 @@ retry:
8566 if (IS_DIRSYNC(dir))
8567 handle->h_sync = 1;
8568
8569- inode = ext4_new_inode (handle, dir, S_IFLNK|S_IRWXUGO);
8570+ inode = ext4_new_inode(handle, dir, S_IFLNK|S_IRWXUGO);
8571 err = PTR_ERR(inode);
8572 if (IS_ERR(inode))
8573 goto out_stop;
8574
8575- if (l > sizeof (EXT4_I(inode)->i_data)) {
8576+ if (l > sizeof(EXT4_I(inode)->i_data)) {
8577 inode->i_op = &ext4_symlink_inode_operations;
8578 ext4_set_aops(inode);
8579 /*
8580@@ -2216,18 +2224,19 @@ retry:
8581 * We have a transaction open. All is sweetness. It also sets
8582 * i_size in generic_commit_write().
8583 */
8584- err = __page_symlink(inode, symname, l, 1);
8585+ err = __page_symlink(inode, symname, l,
8586+ mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS);
8587 if (err) {
8588 clear_nlink(inode);
8589 ext4_mark_inode_dirty(handle, inode);
8590- iput (inode);
8591+ iput(inode);
8592 goto out_stop;
8593 }
8594 } else {
8595 /* clear the extent format for fast symlink */
8596 EXT4_I(inode)->i_flags &= ~EXT4_EXTENTS_FL;
8597 inode->i_op = &ext4_fast_symlink_inode_operations;
8598- memcpy((char*)&EXT4_I(inode)->i_data,symname,l);
8599+ memcpy((char *)&EXT4_I(inode)->i_data, symname, l);
8600 inode->i_size = l-1;
8601 }
8602 EXT4_I(inode)->i_disksize = inode->i_size;
8603@@ -2239,8 +2248,8 @@ out_stop:
8604 return err;
8605 }
8606
8607-static int ext4_link (struct dentry * old_dentry,
8608- struct inode * dir, struct dentry *dentry)
8609+static int ext4_link(struct dentry *old_dentry,
8610+ struct inode *dir, struct dentry *dentry)
8611 {
8612 handle_t *handle;
8613 struct inode *inode = old_dentry->d_inode;
8614@@ -2283,13 +2292,13 @@ retry:
8615 * Anybody can rename anything with this: the permission checks are left to the
8616 * higher-level routines.
8617 */
8618-static int ext4_rename (struct inode * old_dir, struct dentry *old_dentry,
8619- struct inode * new_dir,struct dentry *new_dentry)
8620+static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
8621+ struct inode *new_dir, struct dentry *new_dentry)
8622 {
8623 handle_t *handle;
8624- struct inode * old_inode, * new_inode;
8625- struct buffer_head * old_bh, * new_bh, * dir_bh;
8626- struct ext4_dir_entry_2 * old_de, * new_de;
8627+ struct inode *old_inode, *new_inode;
8628+ struct buffer_head *old_bh, *new_bh, *dir_bh;
8629+ struct ext4_dir_entry_2 *old_de, *new_de;
8630 int retval;
8631
8632 old_bh = new_bh = dir_bh = NULL;
8633@@ -2307,7 +2316,7 @@ static int ext4_rename (struct inode * o
8634 if (IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir))
8635 handle->h_sync = 1;
8636
8637- old_bh = ext4_find_entry (old_dentry, &old_de);
8638+ old_bh = ext4_find_entry(old_dir, &old_dentry->d_name, &old_de);
8639 /*
8640 * Check for inode number is _not_ due to possible IO errors.
8641 * We might rmdir the source, keep it as pwd of some process
8642@@ -2320,32 +2329,32 @@ static int ext4_rename (struct inode * o
8643 goto end_rename;
8644
8645 new_inode = new_dentry->d_inode;
8646- new_bh = ext4_find_entry (new_dentry, &new_de);
8647+ new_bh = ext4_find_entry(new_dir, &new_dentry->d_name, &new_de);
8648 if (new_bh) {
8649 if (!new_inode) {
8650- brelse (new_bh);
8651+ brelse(new_bh);
8652 new_bh = NULL;
8653 }
8654 }
8655 if (S_ISDIR(old_inode->i_mode)) {
8656 if (new_inode) {
8657 retval = -ENOTEMPTY;
8658- if (!empty_dir (new_inode))
8659+ if (!empty_dir(new_inode))
8660 goto end_rename;
8661 }
8662 retval = -EIO;
8663- dir_bh = ext4_bread (handle, old_inode, 0, 0, &retval);
8664+ dir_bh = ext4_bread(handle, old_inode, 0, 0, &retval);
8665 if (!dir_bh)
8666 goto end_rename;
8667 if (le32_to_cpu(PARENT_INO(dir_bh->b_data)) != old_dir->i_ino)
8668 goto end_rename;
8669 retval = -EMLINK;
8670- if (!new_inode && new_dir!=old_dir &&
8671+ if (!new_inode && new_dir != old_dir &&
8672 new_dir->i_nlink >= EXT4_LINK_MAX)
8673 goto end_rename;
8674 }
8675 if (!new_bh) {
8676- retval = ext4_add_entry (handle, new_dentry, old_inode);
8677+ retval = ext4_add_entry(handle, new_dentry, old_inode);
8678 if (retval)
8679 goto end_rename;
8680 } else {
8681@@ -2387,7 +2396,7 @@ static int ext4_rename (struct inode * o
8682 struct buffer_head *old_bh2;
8683 struct ext4_dir_entry_2 *old_de2;
8684
8685- old_bh2 = ext4_find_entry(old_dentry, &old_de2);
8686+ old_bh2 = ext4_find_entry(old_dir, &old_dentry->d_name, &old_de2);
8687 if (old_bh2) {
8688 retval = ext4_delete_entry(handle, old_dir,
8689 old_de2, old_bh2);
8690@@ -2432,9 +2441,9 @@ static int ext4_rename (struct inode * o
8691 retval = 0;
8692
8693 end_rename:
8694- brelse (dir_bh);
8695- brelse (old_bh);
8696- brelse (new_bh);
8697+ brelse(dir_bh);
8698+ brelse(old_bh);
8699+ brelse(new_bh);
8700 ext4_journal_stop(handle);
8701 return retval;
8702 }
8703@@ -2453,7 +2462,7 @@ const struct inode_operations ext4_dir_i
8704 .mknod = ext4_mknod,
8705 .rename = ext4_rename,
8706 .setattr = ext4_setattr,
8707-#ifdef CONFIG_EXT4DEV_FS_XATTR
8708+#if defined(CONFIG_EXT4_FS_XATTR) || defined(CONFIG_EXT4DEV_FS_XATTR)
8709 .setxattr = generic_setxattr,
8710 .getxattr = generic_getxattr,
8711 .listxattr = ext4_listxattr,
8712@@ -2464,7 +2473,7 @@ const struct inode_operations ext4_dir_i
8713
8714 const struct inode_operations ext4_special_inode_operations = {
8715 .setattr = ext4_setattr,
8716-#ifdef CONFIG_EXT4DEV_FS_XATTR
8717+#if defined(CONFIG_EXT4_FS_XATTR) || defined(CONFIG_EXT4DEV_FS_XATTR)
8718 .setxattr = generic_setxattr,
8719 .getxattr = generic_getxattr,
8720 .listxattr = ext4_listxattr,
8721diff -rup b/fs/ext4//resize.c a/fs/ext4///resize.c
8722--- b/fs/ext4/resize.c 2009-02-11 14:37:58.000000000 +0100
8723+++ a/fs/ext4/resize.c 2009-02-10 21:40:11.000000000 +0100
8724@@ -50,7 +50,7 @@ static int verify_group_input(struct sup
8725 ext4_get_group_no_and_offset(sb, start, NULL, &offset);
8726 if (group != sbi->s_groups_count)
8727 ext4_warning(sb, __func__,
8728- "Cannot add at group %u (only %lu groups)",
8729+ "Cannot add at group %u (only %u groups)",
8730 input->group, sbi->s_groups_count);
8731 else if (offset != 0)
8732 ext4_warning(sb, __func__, "Last group not full");
8733@@ -284,11 +284,9 @@ static int setup_new_group_blocks(struct
8734 if ((err = extend_or_restart_transaction(handle, 2, bh)))
8735 goto exit_bh;
8736
8737- mark_bitmap_end(input->blocks_count, EXT4_BLOCKS_PER_GROUP(sb),
8738- bh->b_data);
8739+ mark_bitmap_end(input->blocks_count, sb->s_blocksize * 8, bh->b_data);
8740 ext4_journal_dirty_metadata(handle, bh);
8741 brelse(bh);
8742-
8743 /* Mark unused entries in inode bitmap used */
8744 ext4_debug("clear inode bitmap %#04llx (+%llu)\n",
8745 input->inode_bitmap, input->inode_bitmap - start);
8746@@ -297,7 +295,7 @@ static int setup_new_group_blocks(struct
8747 goto exit_journal;
8748 }
8749
8750- mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), EXT4_BLOCKS_PER_GROUP(sb),
8751+ mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), sb->s_blocksize * 8,
8752 bh->b_data);
8753 ext4_journal_dirty_metadata(handle, bh);
8754 exit_bh:
8755@@ -416,8 +414,8 @@ static int add_new_gdb(handle_t *handle,
8756 "EXT4-fs: ext4_add_new_gdb: adding group block %lu\n",
8757 gdb_num);
8758
8759- /*
8760- * If we are not using the primary superblock/GDT copy don't resize,
8761+ /*
8762+ * If we are not using the primary superblock/GDT copy don't resize,
8763 * because the user tools have no way of handling this. Probably a
8764 * bad time to do it anyways.
8765 */
8766@@ -715,7 +713,7 @@ static void update_backups(struct super_
8767 exit_err:
8768 if (err) {
8769 ext4_warning(sb, __func__,
8770- "can't update backup for group %lu (err %d), "
8771+ "can't update backup for group %u (err %d), "
8772 "forcing fsck on next reboot", group, err);
8773 sbi->s_mount_state &= ~EXT4_VALID_FS;
8774 sbi->s_es->s_state &= cpu_to_le16(~EXT4_VALID_FS);
8775@@ -747,6 +745,7 @@ int ext4_group_add(struct super_block *s
8776 struct inode *inode = NULL;
8777 handle_t *handle;
8778 int gdb_off, gdb_num;
8779+ int num_grp_locked = 0;
8780 int err, err2;
8781
8782 gdb_num = input->group / EXT4_DESC_PER_BLOCK(sb);
8783@@ -787,6 +786,7 @@ int ext4_group_add(struct super_block *s
8784 }
8785 }
8786
8787+
8788 if ((err = verify_group_input(sb, input)))
8789 goto exit_put;
8790
8791@@ -855,6 +855,7 @@ int ext4_group_add(struct super_block *s
8792 * using the new disk blocks.
8793 */
8794
8795+ num_grp_locked = ext4_mb_get_buddy_cache_lock(sb, input->group);
8796 /* Update group descriptor block for new group */
8797 gdp = (struct ext4_group_desc *)((char *)primary->b_data +
8798 gdb_off * EXT4_DESC_SIZE(sb));
8799@@ -862,19 +863,21 @@ int ext4_group_add(struct super_block *s
8800 ext4_block_bitmap_set(sb, gdp, input->block_bitmap); /* LV FIXME */
8801 ext4_inode_bitmap_set(sb, gdp, input->inode_bitmap); /* LV FIXME */
8802 ext4_inode_table_set(sb, gdp, input->inode_table); /* LV FIXME */
8803- gdp->bg_free_blocks_count = cpu_to_le16(input->free_blocks_count);
8804- gdp->bg_free_inodes_count = cpu_to_le16(EXT4_INODES_PER_GROUP(sb));
8805+ ext4_free_blks_set(sb, gdp, input->free_blocks_count);
8806+ ext4_free_inodes_set(sb, gdp, EXT4_INODES_PER_GROUP(sb));
8807+ gdp->bg_flags |= cpu_to_le16(EXT4_BG_INODE_ZEROED);
8808 gdp->bg_checksum = ext4_group_desc_csum(sbi, input->group, gdp);
8809
8810 /*
8811 * We can allocate memory for mb_alloc based on the new group
8812 * descriptor
8813 */
8814- if (test_opt(sb, MBALLOC)) {
8815- err = ext4_mb_add_more_groupinfo(sb, input->group, gdp);
8816- if (err)
8817- goto exit_journal;
8818+ err = ext4_mb_add_groupinfo(sb, input->group, gdp);
8819+ if (err) {
8820+ ext4_mb_put_buddy_cache_lock(sb, input->group, num_grp_locked);
8821+ goto exit_journal;
8822 }
8823+
8824 /*
8825 * Make the new blocks and inodes valid next. We do this before
8826 * increasing the group count so that once the group is enabled,
8827@@ -915,6 +918,7 @@ int ext4_group_add(struct super_block *s
8828
8829 /* Update the global fs size fields */
8830 sbi->s_groups_count++;
8831+ ext4_mb_put_buddy_cache_lock(sb, input->group, num_grp_locked);
8832
8833 ext4_journal_dirty_metadata(handle, primary);
8834
8835@@ -973,12 +977,10 @@ int ext4_group_extend(struct super_block
8836 ext4_group_t o_groups_count;
8837 ext4_grpblk_t last;
8838 ext4_grpblk_t add;
8839- struct buffer_head * bh;
8840+ struct buffer_head *bh;
8841 handle_t *handle;
8842 int err;
8843- unsigned long freed_blocks;
8844 ext4_group_t group;
8845- struct ext4_group_info *grp;
8846
8847 /* We don't need to worry about locking wrt other resizers just
8848 * yet: we're going to revalidate es->s_blocks_count after
8849@@ -1077,50 +1079,13 @@ int ext4_group_extend(struct super_block
8850 unlock_super(sb);
8851 ext4_debug("freeing blocks %llu through %llu\n", o_blocks_count,
8852 o_blocks_count + add);
8853- ext4_free_blocks_sb(handle, sb, o_blocks_count, add, &freed_blocks);
8854+ /* We add the blocks to the bitmap and set the group need init bit */
8855+ ext4_add_groupblocks(handle, sb, o_blocks_count, add);
8856 ext4_debug("freed blocks %llu through %llu\n", o_blocks_count,
8857 o_blocks_count + add);
8858 if ((err = ext4_journal_stop(handle)))
8859 goto exit_put;
8860
8861- /*
8862- * Mark mballoc pages as not up to date so that they will be updated
8863- * next time they are loaded by ext4_mb_load_buddy.
8864- */
8865- if (test_opt(sb, MBALLOC)) {
8866- struct ext4_sb_info *sbi = EXT4_SB(sb);
8867- struct inode *inode = sbi->s_buddy_cache;
8868- int blocks_per_page;
8869- int block;
8870- int pnum;
8871- struct page *page;
8872-
8873- /* Set buddy page as not up to date */
8874- blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
8875- block = group * 2;
8876- pnum = block / blocks_per_page;
8877- page = find_get_page(inode->i_mapping, pnum);
8878- if (page != NULL) {
8879- ClearPageUptodate(page);
8880- page_cache_release(page);
8881- }
8882-
8883- /* Set bitmap page as not up to date */
8884- block++;
8885- pnum = block / blocks_per_page;
8886- page = find_get_page(inode->i_mapping, pnum);
8887- if (page != NULL) {
8888- ClearPageUptodate(page);
8889- page_cache_release(page);
8890- }
8891-
8892- /* Get the info on the last group */
8893- grp = ext4_get_group_info(sb, group);
8894-
8895- /* Update free blocks in group info */
8896- ext4_mb_update_group_info(grp, add);
8897- }
8898-
8899 if (test_opt(sb, DEBUG))
8900 printk(KERN_DEBUG "EXT4-fs: extended group to %llu blocks\n",
8901 ext4_blocks_count(es));
8902diff -rup b/fs/ext4//super.c a/fs/ext4///super.c
8903--- b/fs/ext4/super.c 2009-02-11 14:37:58.000000000 +0100
8904+++ a/fs/ext4/super.c 2009-02-11 13:47:04.000000000 +0100
8905@@ -34,6 +34,8 @@
8906 #include <linux/namei.h>
8907 #include <linux/quotaops.h>
8908 #include <linux/seq_file.h>
8909+#include <linux/proc_fs.h>
8910+#include <linux/marker.h>
8911 #include <linux/log2.h>
8912 #include <linux/crc16.h>
8913 #include <asm/uaccess.h>
8914@@ -45,6 +47,8 @@
8915 #include "namei.h"
8916 #include "group.h"
8917
8918+struct proc_dir_entry *ext4_proc_root;
8919+
8920 static int ext4_load_journal(struct super_block *, struct ext4_super_block *,
8921 unsigned long journal_devnum);
8922 static int ext4_create_journal(struct super_block *, struct ext4_super_block *,
8923@@ -89,6 +93,38 @@ ext4_fsblk_t ext4_inode_table(struct sup
8924 (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_table_hi) << 32 : 0);
8925 }
8926
8927+__u32 ext4_free_blks_count(struct super_block *sb,
8928+ struct ext4_group_desc *bg)
8929+{
8930+ return le16_to_cpu(bg->bg_free_blocks_count_lo) |
8931+ (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
8932+ (__u32)le16_to_cpu(bg->bg_free_blocks_count_hi) << 16 : 0);
8933+}
8934+
8935+__u32 ext4_free_inodes_count(struct super_block *sb,
8936+ struct ext4_group_desc *bg)
8937+{
8938+ return le16_to_cpu(bg->bg_free_inodes_count_lo) |
8939+ (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
8940+ (__u32)le16_to_cpu(bg->bg_free_inodes_count_hi) << 16 : 0);
8941+}
8942+
8943+__u32 ext4_used_dirs_count(struct super_block *sb,
8944+ struct ext4_group_desc *bg)
8945+{
8946+ return le16_to_cpu(bg->bg_used_dirs_count_lo) |
8947+ (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
8948+ (__u32)le16_to_cpu(bg->bg_used_dirs_count_hi) << 16 : 0);
8949+}
8950+
8951+__u32 ext4_itable_unused_count(struct super_block *sb,
8952+ struct ext4_group_desc *bg)
8953+{
8954+ return le16_to_cpu(bg->bg_itable_unused_lo) |
8955+ (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
8956+ (__u32)le16_to_cpu(bg->bg_itable_unused_hi) << 16 : 0);
8957+}
8958+
8959 void ext4_block_bitmap_set(struct super_block *sb,
8960 struct ext4_group_desc *bg, ext4_fsblk_t blk)
8961 {
8962@@ -113,6 +149,38 @@ void ext4_inode_table_set(struct super_b
8963 bg->bg_inode_table_hi = cpu_to_le32(blk >> 32);
8964 }
8965
8966+void ext4_free_blks_set(struct super_block *sb,
8967+ struct ext4_group_desc *bg, __u32 count)
8968+{
8969+ bg->bg_free_blocks_count_lo = cpu_to_le16((__u16)count);
8970+ if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
8971+ bg->bg_free_blocks_count_hi = cpu_to_le16(count >> 16);
8972+}
8973+
8974+void ext4_free_inodes_set(struct super_block *sb,
8975+ struct ext4_group_desc *bg, __u32 count)
8976+{
8977+ bg->bg_free_inodes_count_lo = cpu_to_le16((__u16)count);
8978+ if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
8979+ bg->bg_free_inodes_count_hi = cpu_to_le16(count >> 16);
8980+}
8981+
8982+void ext4_used_dirs_set(struct super_block *sb,
8983+ struct ext4_group_desc *bg, __u32 count)
8984+{
8985+ bg->bg_used_dirs_count_lo = cpu_to_le16((__u16)count);
8986+ if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
8987+ bg->bg_used_dirs_count_hi = cpu_to_le16(count >> 16);
8988+}
8989+
8990+void ext4_itable_unused_set(struct super_block *sb,
8991+ struct ext4_group_desc *bg, __u32 count)
8992+{
8993+ bg->bg_itable_unused_lo = cpu_to_le16((__u16)count);
8994+ if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
8995+ bg->bg_itable_unused_hi = cpu_to_le16(count >> 16);
8996+}
8997+
8998 /*
8999 * Wrappers for jbd2_journal_start/end.
9000 *
9001@@ -329,7 +397,8 @@ void ext4_abort(struct super_block *sb,
9002 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
9003 sb->s_flags |= MS_RDONLY;
9004 EXT4_SB(sb)->s_mount_opt |= EXT4_MOUNT_ABORT;
9005- jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO);
9006+ if (EXT4_SB(sb)->s_journal)
9007+ jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO);
9008 }
9009
9010 void ext4_warning(struct super_block *sb, const char *function,
9011@@ -345,6 +414,44 @@ void ext4_warning(struct super_block *sb
9012 va_end(args);
9013 }
9014
9015+void ext4_grp_locked_error(struct super_block *sb, ext4_group_t grp,
9016+ const char *function, const char *fmt, ...)
9017+__releases(bitlock)
9018+__acquires(bitlock)
9019+{
9020+ va_list args;
9021+ struct ext4_super_block *es = EXT4_SB(sb)->s_es;
9022+
9023+ va_start(args, fmt);
9024+ printk(KERN_CRIT "EXT4-fs error (device %s): %s: ", sb->s_id, function);
9025+ vprintk(fmt, args);
9026+ printk("\n");
9027+ va_end(args);
9028+
9029+ if (test_opt(sb, ERRORS_CONT)) {
9030+ EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
9031+ es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
9032+ ext4_commit_super(sb, es, 0);
9033+ return;
9034+ }
9035+ ext4_unlock_group(sb, grp);
9036+ ext4_handle_error(sb);
9037+ /*
9038+ * We only get here in the ERRORS_RO case; relocking the group
9039+ * may be dangerous, but nothing bad will happen since the
9040+ * filesystem will have already been marked read/only and the
9041+ * journal has been aborted. We return 1 as a hint to callers
9042+ * who might what to use the return value from
9043+ * ext4_grp_locked_error() to distinguish beween the
9044+ * ERRORS_CONT and ERRORS_RO case, and perhaps return more
9045+ * aggressively from the ext4 function in question, with a
9046+ * more appropriate error code.
9047+ */
9048+ ext4_lock_group(sb, grp);
9049+ return;
9050+}
9051+
9052+
9053 void ext4_update_dynamic_rev(struct super_block *sb)
9054 {
9055 struct ext4_super_block *es = EXT4_SB(sb)->s_es;
9056@@ -370,66 +477,6 @@ void ext4_update_dynamic_rev(struct supe
9057 */
9058 }
9059
9060-int ext4_update_compat_feature(handle_t *handle,
9061- struct super_block *sb, __u32 compat)
9062-{
9063- int err = 0;
9064- if (!EXT4_HAS_COMPAT_FEATURE(sb, compat)) {
9065- err = ext4_journal_get_write_access(handle,
9066- EXT4_SB(sb)->s_sbh);
9067- if (err)
9068- return err;
9069- EXT4_SET_COMPAT_FEATURE(sb, compat);
9070- sb->s_dirt = 1;
9071- handle->h_sync = 1;
9072- BUFFER_TRACE(EXT4_SB(sb)->s_sbh,
9073- "call ext4_journal_dirty_met adata");
9074- err = ext4_journal_dirty_metadata(handle,
9075- EXT4_SB(sb)->s_sbh);
9076- }
9077- return err;
9078-}
9079-
9080-int ext4_update_rocompat_feature(handle_t *handle,
9081- struct super_block *sb, __u32 rocompat)
9082-{
9083- int err = 0;
9084- if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, rocompat)) {
9085- err = ext4_journal_get_write_access(handle,
9086- EXT4_SB(sb)->s_sbh);
9087- if (err)
9088- return err;
9089- EXT4_SET_RO_COMPAT_FEATURE(sb, rocompat);
9090- sb->s_dirt = 1;
9091- handle->h_sync = 1;
9092- BUFFER_TRACE(EXT4_SB(sb)->s_sbh,
9093- "call ext4_journal_dirty_met adata");
9094- err = ext4_journal_dirty_metadata(handle,
9095- EXT4_SB(sb)->s_sbh);
9096- }
9097- return err;
9098-}
9099-
9100-int ext4_update_incompat_feature(handle_t *handle,
9101- struct super_block *sb, __u32 incompat)
9102-{
9103- int err = 0;
9104- if (!EXT4_HAS_INCOMPAT_FEATURE(sb, incompat)) {
9105- err = ext4_journal_get_write_access(handle,
9106- EXT4_SB(sb)->s_sbh);
9107- if (err)
9108- return err;
9109- EXT4_SET_INCOMPAT_FEATURE(sb, incompat);
9110- sb->s_dirt = 1;
9111- handle->h_sync = 1;
9112- BUFFER_TRACE(EXT4_SB(sb)->s_sbh,
9113- "call ext4_journal_dirty_met adata");
9114- err = ext4_journal_dirty_metadata(handle,
9115- EXT4_SB(sb)->s_sbh);
9116- }
9117- return err;
9118-}
9119-
9120 /*
9121 * Open the external journal device
9122 */
9123@@ -505,13 +552,16 @@ static void ext4_put_super(struct super_
9124 ext4_xattr_put_super(sb);
9125 jbd2_journal_destroy(sbi->s_journal);
9126 sbi->s_journal = NULL;
9127+
9128 if (!(sb->s_flags & MS_RDONLY)) {
9129 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
9130 es->s_state = cpu_to_le16(sbi->s_mount_state);
9131- BUFFER_TRACE(sbi->s_sbh, "marking dirty");
9132- mark_buffer_dirty(sbi->s_sbh);
9133 ext4_commit_super(sb, es, 1);
9134 }
9135+ if (sbi->s_proc) {
9136+ remove_proc_entry("inode_readahead_blks", sbi->s_proc);
9137+ remove_proc_entry(sb->s_id, ext4_proc_root);
9138+ }
9139
9140 for (i = 0; i < sbi->s_gdb_count; i++)
9141 brelse(sbi->s_group_desc[i]);
9142@@ -520,6 +570,7 @@ static void ext4_put_super(struct super_
9143 percpu_counter_destroy(&sbi->s_freeblocks_counter);
9144 percpu_counter_destroy(&sbi->s_freeinodes_counter);
9145 percpu_counter_destroy(&sbi->s_dirs_counter);
9146+ percpu_counter_destroy(&sbi->s_dirtyblocks_counter);
9147 brelse(sbi->s_sbh);
9148 #ifdef CONFIG_QUOTA
9149 for (i = 0; i < MAXQUOTAS; i++)
9150@@ -562,11 +613,10 @@ static struct inode *ext4_alloc_inode(st
9151 ei = kmem_cache_alloc(ext4_inode_cachep, GFP_NOFS);
9152 if (!ei)
9153 return NULL;
9154-#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL
9155+#if defined(CONFIG_EXT4_FS_POSIX_ACL) || defined(CONFIG_EXT4DEV_FS_POSIX_ACL)
9156 ei->i_acl = EXT4_ACL_NOT_CACHED;
9157 ei->i_default_acl = EXT4_ACL_NOT_CACHED;
9158 #endif
9159- ei->i_block_alloc_info = NULL;
9160 ei->vfs_inode.i_version = 1;
9161 ei->vfs_inode.i_data.writeback_index = 0;
9162 memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache));
9163@@ -599,7 +649,7 @@ static void init_once(void *foo)
9164 struct ext4_inode_info *ei = (struct ext4_inode_info *) foo;
9165
9166 INIT_LIST_HEAD(&ei->i_orphan);
9167-#ifdef CONFIG_EXT4DEV_FS_XATTR
9168+#if defined(CONFIG_EXT4_FS_XATTR) || defined(CONFIG_EXT4DEV_FS_XATTR)
9169 init_rwsem(&ei->xattr_sem);
9170 #endif
9171 init_rwsem(&ei->i_data_sem);
9172@@ -625,8 +675,7 @@ static void destroy_inodecache(void)
9173
9174 static void ext4_clear_inode(struct inode *inode)
9175 {
9176- struct ext4_block_alloc_info *rsv = EXT4_I(inode)->i_block_alloc_info;
9177-#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL
9178+#if defined(CONFIG_EXT4_FS_POSIX_ACL) || defined(CONFIG_EXT4DEV_FS_POSIX_ACL)
9179 if (EXT4_I(inode)->i_acl &&
9180 EXT4_I(inode)->i_acl != EXT4_ACL_NOT_CACHED) {
9181 posix_acl_release(EXT4_I(inode)->i_acl);
9182@@ -638,10 +687,7 @@ static void ext4_clear_inode(struct inod
9183 EXT4_I(inode)->i_default_acl = EXT4_ACL_NOT_CACHED;
9184 }
9185 #endif
9186- ext4_discard_reservation(inode);
9187- EXT4_I(inode)->i_block_alloc_info = NULL;
9188- if (unlikely(rsv))
9189- kfree(rsv);
9190+ ext4_discard_preallocations(inode);
9191 jbd2_journal_release_jbd_inode(EXT4_SB(inode->i_sb)->s_journal,
9192 &EXT4_I(inode)->jinode);
9193 }
9194@@ -654,7 +700,7 @@ static inline void ext4_show_quota_optio
9195
9196 if (sbi->s_jquota_fmt)
9197 seq_printf(seq, ",jqfmt=%s",
9198- (sbi->s_jquota_fmt == QFMT_VFS_OLD) ? "vfsold": "vfsv0");
9199+ (sbi->s_jquota_fmt == QFMT_VFS_OLD) ? "vfsold" : "vfsv0");
9200
9201 if (sbi->s_qf_names[USRQUOTA])
9202 seq_printf(seq, ",usrjquota=%s", sbi->s_qf_names[USRQUOTA]);
9203@@ -718,7 +764,7 @@ static int ext4_show_options(struct seq_
9204 seq_puts(seq, ",debug");
9205 if (test_opt(sb, OLDALLOC))
9206 seq_puts(seq, ",oldalloc");
9207-#ifdef CONFIG_EXT4DEV_FS_XATTR
9208+#if defined(CONFIG_EXT4_FS_XATTR) || defined(CONFIG_EXT4DEV_FS_XATTR)
9209 if (test_opt(sb, XATTR_USER) &&
9210 !(def_mount_opts & EXT4_DEFM_XATTR_USER))
9211 seq_puts(seq, ",user_xattr");
9212@@ -727,7 +773,7 @@ static int ext4_show_options(struct seq_
9213 seq_puts(seq, ",nouser_xattr");
9214 }
9215 #endif
9216-#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL
9217+#if defined(CONFIG_EXT4_FS_POSIX_ACL) || defined(CONFIG_EXT4DEV_FS_POSIX_ACL)
9218 if (test_opt(sb, POSIX_ACL) && !(def_mount_opts & EXT4_DEFM_ACL))
9219 seq_puts(seq, ",acl");
9220 if (!test_opt(sb, POSIX_ACL) && (def_mount_opts & EXT4_DEFM_ACL))
9221@@ -752,8 +798,6 @@ static int ext4_show_options(struct seq_
9222 seq_puts(seq, ",nobh");
9223 if (!test_opt(sb, EXTENTS))
9224 seq_puts(seq, ",noextents");
9225- if (!test_opt(sb, MBALLOC))
9226- seq_puts(seq, ",nomballoc");
9227 if (test_opt(sb, I_VERSION))
9228 seq_puts(seq, ",i_version");
9229 if (!test_opt(sb, DELALLOC))
9230@@ -773,6 +817,10 @@ static int ext4_show_options(struct seq_
9231 else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)
9232 seq_puts(seq, ",data=writeback");
9233
9234+ if (sbi->s_inode_readahead_blks != EXT4_DEF_INODE_READAHEAD_BLKS)
9235+ seq_printf(seq, ",inode_readahead_blks=%u",
9236+ sbi->s_inode_readahead_blks);
9237+
9238 ext4_show_quota_options(seq, sb);
9239 return 0;
9240 }
9241@@ -822,7 +870,7 @@ static struct dentry *ext4_fh_to_parent(
9242 }
9243
9244 #ifdef CONFIG_QUOTA
9245-#define QTYPE2NAME(t) ((t) == USRQUOTA?"user":"group")
9246+#define QTYPE2NAME(t) ((t) == USRQUOTA ? "user" : "group")
9247 #define QTYPE2MOPT(on, t) ((t) == USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA))
9248
9249 static int ext4_dquot_initialize(struct inode *inode, int type);
9250@@ -896,7 +944,7 @@ static const struct export_operations ex
9251 enum {
9252 Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid,
9253 Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro,
9254- Opt_nouid32, Opt_nocheck, Opt_debug, Opt_oldalloc, Opt_orlov,
9255+ Opt_nouid32, Opt_debug, Opt_oldalloc, Opt_orlov,
9256 Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl,
9257 Opt_reservation, Opt_noreservation, Opt_noload, Opt_nobh, Opt_bh,
9258 Opt_commit, Opt_journal_update, Opt_journal_inum, Opt_journal_dev,
9259@@ -906,10 +954,11 @@ enum {
9260 Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota,
9261 Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota,
9262 Opt_grpquota, Opt_extents, Opt_noextents, Opt_i_version,
9263- Opt_mballoc, Opt_nomballoc, Opt_stripe, Opt_delalloc, Opt_nodelalloc,
9264+ Opt_stripe, Opt_delalloc, Opt_nodelalloc,
9265+ Opt_inode_readahead_blks
9266 };
9267
9268-static match_table_t tokens = {
9269+static const match_table_t tokens = {
9270 {Opt_bsd_df, "bsddf"},
9271 {Opt_minix_df, "minixdf"},
9272 {Opt_grpid, "grpid"},
9273@@ -923,8 +972,6 @@ static match_table_t tokens = {
9274 {Opt_err_panic, "errors=panic"},
9275 {Opt_err_ro, "errors=remount-ro"},
9276 {Opt_nouid32, "nouid32"},
9277- {Opt_nocheck, "nocheck"},
9278- {Opt_nocheck, "check=none"},
9279 {Opt_debug, "debug"},
9280 {Opt_oldalloc, "oldalloc"},
9281 {Opt_orlov, "orlov"},
9282@@ -961,12 +1008,11 @@ static match_table_t tokens = {
9283 {Opt_extents, "extents"},
9284 {Opt_noextents, "noextents"},
9285 {Opt_i_version, "i_version"},
9286- {Opt_mballoc, "mballoc"},
9287- {Opt_nomballoc, "nomballoc"},
9288 {Opt_stripe, "stripe=%u"},
9289 {Opt_resize, "resize"},
9290 {Opt_delalloc, "delalloc"},
9291 {Opt_nodelalloc, "nodelalloc"},
9292+ {Opt_inode_readahead_blks, "inode_readahead_blks=%u"},
9293 {Opt_err, NULL},
9294 };
9295
9296@@ -981,7 +1027,7 @@ static ext4_fsblk_t get_sb_block(void **
9297 /*todo: use simple_strtoll with >32bit ext4 */
9298 sb_block = simple_strtoul(options, &options, 0);
9299 if (*options && *options != ',') {
9300- printk("EXT4-fs: Invalid sb specification: %s\n",
9301+ printk(KERN_ERR "EXT4-fs: Invalid sb specification: %s\n",
9302 (char *) *data);
9303 return 1;
9304 }
9305@@ -1060,9 +1106,6 @@ static int parse_options(char *options,
9306 case Opt_nouid32:
9307 set_opt(sbi->s_mount_opt, NO_UID32);
9308 break;
9309- case Opt_nocheck:
9310- clear_opt(sbi->s_mount_opt, CHECK);
9311- break;
9312 case Opt_debug:
9313 set_opt(sbi->s_mount_opt, DEBUG);
9314 break;
9315@@ -1072,7 +1115,7 @@ static int parse_options(char *options,
9316 case Opt_orlov:
9317 clear_opt(sbi->s_mount_opt, OLDALLOC);
9318 break;
9319-#ifdef CONFIG_EXT4DEV_FS_XATTR
9320+#if defined(CONFIG_EXT4_FS_XATTR) || defined(CONFIG_EXT4DEV_FS_XATTR)
9321 case Opt_user_xattr:
9322 set_opt(sbi->s_mount_opt, XATTR_USER);
9323 break;
9324@@ -1082,10 +1125,11 @@ static int parse_options(char *options,
9325 #else
9326 case Opt_user_xattr:
9327 case Opt_nouser_xattr:
9328- printk("EXT4 (no)user_xattr options not supported\n");
9329+ printk(KERN_ERR "EXT4 (no)user_xattr options "
9330+ "not supported\n");
9331 break;
9332 #endif
9333-#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL
9334+#if defined(CONFIG_EXT4_FS_POSIX_ACL) || defined(CONFIG_EXT4DEV_FS_POSIX_ACL)
9335 case Opt_acl:
9336 set_opt(sbi->s_mount_opt, POSIX_ACL);
9337 break;
9338@@ -1095,7 +1139,8 @@ static int parse_options(char *options,
9339 #else
9340 case Opt_acl:
9341 case Opt_noacl:
9342- printk("EXT4 (no)acl options not supported\n");
9343+ printk(KERN_ERR "EXT4 (no)acl options "
9344+ "not supported\n");
9345 break;
9346 #endif
9347 case Opt_reservation:
9348@@ -1185,12 +1230,11 @@ static int parse_options(char *options,
9349 case Opt_grpjquota:
9350 qtype = GRPQUOTA;
9351 set_qf_name:
9352- if ((sb_any_quota_enabled(sb) ||
9353- sb_any_quota_suspended(sb)) &&
9354+ if (sb_any_quota_loaded(sb) &&
9355 !sbi->s_qf_names[qtype]) {
9356 printk(KERN_ERR
9357- "EXT4-fs: Cannot change journaled "
9358- "quota options when quota turned on.\n");
9359+ "EXT4-fs: Cannot change journaled "
9360+ "quota options when quota turned on.\n");
9361 return 0;
9362 }
9363 qname = match_strdup(&args[0]);
9364@@ -1225,8 +1269,7 @@ set_qf_name:
9365 case Opt_offgrpjquota:
9366 qtype = GRPQUOTA;
9367 clear_qf_name:
9368- if ((sb_any_quota_enabled(sb) ||
9369- sb_any_quota_suspended(sb)) &&
9370+ if (sb_any_quota_loaded(sb) &&
9371 sbi->s_qf_names[qtype]) {
9372 printk(KERN_ERR "EXT4-fs: Cannot change "
9373 "journaled quota options when "
9374@@ -1245,8 +1288,7 @@ clear_qf_name:
9375 case Opt_jqfmt_vfsv0:
9376 qfmt = QFMT_VFS_V0;
9377 set_qf_format:
9378- if ((sb_any_quota_enabled(sb) ||
9379- sb_any_quota_suspended(sb)) &&
9380+ if (sb_any_quota_loaded(sb) &&
9381 sbi->s_jquota_fmt != qfmt) {
9382 printk(KERN_ERR "EXT4-fs: Cannot change "
9383 "journaled quota options when "
9384@@ -1265,7 +1307,7 @@ set_qf_format:
9385 set_opt(sbi->s_mount_opt, GRPQUOTA);
9386 break;
9387 case Opt_noquota:
9388- if (sb_any_quota_enabled(sb)) {
9389+ if (sb_any_quota_loaded(sb)) {
9390 printk(KERN_ERR "EXT4-fs: Cannot change quota "
9391 "options when quota turned on.\n");
9392 return 0;
9393@@ -1357,12 +1399,6 @@ set_qf_format:
9394 case Opt_nodelalloc:
9395 clear_opt(sbi->s_mount_opt, DELALLOC);
9396 break;
9397- case Opt_mballoc:
9398- set_opt(sbi->s_mount_opt, MBALLOC);
9399- break;
9400- case Opt_nomballoc:
9401- clear_opt(sbi->s_mount_opt, MBALLOC);
9402- break;
9403 case Opt_stripe:
9404 if (match_int(&args[0], &option))
9405 return 0;
9406@@ -1373,6 +1409,13 @@ set_qf_format:
9407 case Opt_delalloc:
9408 set_opt(sbi->s_mount_opt, DELALLOC);
9409 break;
9410+ case Opt_inode_readahead_blks:
9411+ if (match_int(&args[0], &option))
9412+ return 0;
9413+ if (option < 0 || option > (1 << 30))
9414+ return 0;
9415+ sbi->s_inode_readahead_blks = option;
9416+ break;
9417 default:
9418 printk(KERN_ERR
9419 "EXT4-fs: Unrecognized mount option \"%s\" "
9420@@ -1465,7 +1508,7 @@ static int ext4_setup_super(struct super
9421
9422 ext4_commit_super(sb, es, 1);
9423 if (test_opt(sb, DEBUG))
9424- printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%lu, "
9425+ printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%u, "
9426 "bpg=%lu, ipg=%lu, mo=%04lx]\n",
9427 sb->s_blocksize,
9428 sbi->s_groups_count,
9429@@ -1473,14 +1516,14 @@ static int ext4_setup_super(struct super
9430 EXT4_INODES_PER_GROUP(sb),
9431 sbi->s_mount_opt);
9432
9433- printk(KERN_INFO "EXT4 FS on %s, ", sb->s_id);
9434 if (EXT4_SB(sb)->s_journal->j_inode == NULL) {
9435 char b[BDEVNAME_SIZE];
9436
9437- printk("external journal on %s\n",
9438- bdevname(EXT4_SB(sb)->s_journal->j_dev, b));
9439+ printk(KERN_INFO "EXT4 FS on %s, external journal on %s\n",
9440+ sb->s_id, bdevname(EXT4_SB(sb)->s_journal->j_dev, b));
9441 } else {
9442- printk("internal journal\n");
9443+ printk(KERN_INFO "EXT4 FS on %s, internal journal\n",
9444+ sb->s_id);
9445 }
9446 return res;
9447 }
9448@@ -1493,7 +1536,6 @@ static int ext4_fill_flex_info(struct su
9449 ext4_group_t flex_group_count;
9450 ext4_group_t flex_group;
9451 int groups_per_flex = 0;
9452- __u64 block_bitmap = 0;
9453 int i;
9454
9455 if (!sbi->s_es->s_log_groups_per_flex) {
9456@@ -1512,21 +1554,18 @@ static int ext4_fill_flex_info(struct su
9457 sizeof(struct flex_groups), GFP_KERNEL);
9458 if (sbi->s_flex_groups == NULL) {
9459 printk(KERN_ERR "EXT4-fs: not enough memory for "
9460- "%lu flex groups\n", flex_group_count);
9461+ "%u flex groups\n", flex_group_count);
9462 goto failed;
9463 }
9464
9465- gdp = ext4_get_group_desc(sb, 1, &bh);
9466- block_bitmap = ext4_block_bitmap(sb, gdp) - 1;
9467-
9468 for (i = 0; i < sbi->s_groups_count; i++) {
9469 gdp = ext4_get_group_desc(sb, i, &bh);
9470
9471 flex_group = ext4_flex_group(sbi, i);
9472 sbi->s_flex_groups[flex_group].free_inodes +=
9473- le16_to_cpu(gdp->bg_free_inodes_count);
9474+ ext4_free_inodes_count(sb, gdp);
9475 sbi->s_flex_groups[flex_group].free_blocks +=
9476- le16_to_cpu(gdp->bg_free_blocks_count);
9477+ ext4_free_blks_count(sb, gdp);
9478 }
9479
9480 return 1;
9481@@ -1586,7 +1625,7 @@ static int ext4_check_descriptors(struct
9482 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG))
9483 flexbg_flag = 1;
9484
9485- ext4_debug ("Checking group descriptors");
9486+ ext4_debug("Checking group descriptors");
9487
9488 for (i = 0; i < sbi->s_groups_count; i++) {
9489 struct ext4_group_desc *gdp = ext4_get_group_desc(sb, i, NULL);
9490@@ -1600,29 +1639,29 @@ static int ext4_check_descriptors(struct
9491 block_bitmap = ext4_block_bitmap(sb, gdp);
9492 if (block_bitmap < first_block || block_bitmap > last_block) {
9493 printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
9494- "Block bitmap for group %lu not in group "
9495- "(block %llu)!", i, block_bitmap);
9496+ "Block bitmap for group %u not in group "
9497+ "(block %llu)!\n", i, block_bitmap);
9498 return 0;
9499 }
9500 inode_bitmap = ext4_inode_bitmap(sb, gdp);
9501 if (inode_bitmap < first_block || inode_bitmap > last_block) {
9502 printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
9503- "Inode bitmap for group %lu not in group "
9504- "(block %llu)!", i, inode_bitmap);
9505+ "Inode bitmap for group %u not in group "
9506+ "(block %llu)!\n", i, inode_bitmap);
9507 return 0;
9508 }
9509 inode_table = ext4_inode_table(sb, gdp);
9510 if (inode_table < first_block ||
9511 inode_table + sbi->s_itb_per_group - 1 > last_block) {
9512 printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
9513- "Inode table for group %lu not in group "
9514- "(block %llu)!", i, inode_table);
9515+ "Inode table for group %u not in group "
9516+ "(block %llu)!\n", i, inode_table);
9517 return 0;
9518 }
9519 spin_lock(sb_bgl_lock(sbi, i));
9520 if (!ext4_group_desc_csum_verify(sbi, i, gdp)) {
9521 printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
9522- "Checksum for group %lu failed (%u!=%u)\n",
9523+ "Checksum for group %u failed (%u!=%u)\n",
9524 i, le16_to_cpu(ext4_group_desc_csum(sbi, i,
9525 gdp)), le16_to_cpu(gdp->bg_checksum));
9526 if (!(sb->s_flags & MS_RDONLY)) {
9527@@ -1718,9 +1757,9 @@ static void ext4_orphan_cleanup(struct s
9528 DQUOT_INIT(inode);
9529 if (inode->i_nlink) {
9530 printk(KERN_DEBUG
9531- "%s: truncating inode %lu to %Ld bytes\n",
9532+ "%s: truncating inode %lu to %lld bytes\n",
9533 __func__, inode->i_ino, inode->i_size);
9534- jbd_debug(2, "truncating inode %lu to %Ld bytes\n",
9535+ jbd_debug(2, "truncating inode %lu to %lld bytes\n",
9536 inode->i_ino, inode->i_size);
9537 ext4_truncate(inode);
9538 nr_truncates++;
9539@@ -1761,13 +1800,13 @@ static void ext4_orphan_cleanup(struct s
9540 *
9541 * Note, this does *not* consider any metadata overhead for vfs i_blocks.
9542 */
9543-static loff_t ext4_max_size(int blkbits)
9544+static loff_t ext4_max_size(int blkbits, int has_huge_files)
9545 {
9546 loff_t res;
9547 loff_t upper_limit = MAX_LFS_FILESIZE;
9548
9549 /* small i_blocks in vfs inode? */
9550- if (sizeof(blkcnt_t) < sizeof(u64)) {
9551+ if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) {
9552 /*
9553 * CONFIG_LSF is not enabled implies the inode
9554 * i_block represent total blocks in 512 bytes
9555@@ -1797,7 +1836,7 @@ static loff_t ext4_max_size(int blkbits)
9556 * block limit, and also a limit of (2^48 - 1) 512-byte sectors in i_blocks.
9557 * We need to be 1 filesystem block less than the 2^48 sector limit.
9558 */
9559-static loff_t ext4_max_bitmap_size(int bits)
9560+static loff_t ext4_max_bitmap_size(int bits, int has_huge_files)
9561 {
9562 loff_t res = EXT4_NDIR_BLOCKS;
9563 int meta_blocks;
9564@@ -1810,11 +1849,11 @@ static loff_t ext4_max_bitmap_size(int b
9565 * total number of 512 bytes blocks of the file
9566 */
9567
9568- if (sizeof(blkcnt_t) < sizeof(u64)) {
9569+ if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) {
9570 /*
9571- * CONFIG_LSF is not enabled implies the inode
9572- * i_block represent total blocks in 512 bytes
9573- * 32 == size of vfs inode i_blocks * 8
9574+ * !has_huge_files or CONFIG_LSF is not enabled
9575+ * implies the inode i_block represent total blocks in
9576+ * 512 bytes 32 == size of vfs inode i_blocks * 8
9577 */
9578 upper_limit = (1LL << 32) - 1;
9579
9580@@ -1918,12 +1957,13 @@ static int ext4_fill_super(struct super_
9581 unsigned long journal_devnum = 0;
9582 unsigned long def_mount_opts;
9583 struct inode *root;
9584+ char *cp;
9585 int ret = -EINVAL;
9586 int blocksize;
9587 int db_count;
9588 int i;
9589- int needs_recovery;
9590- __le32 features;
9591+ int needs_recovery, has_huge_files;
9592+ int features;
9593 __u64 blocks_count;
9594 int err;
9595
9596@@ -1934,10 +1974,15 @@ static int ext4_fill_super(struct super_
9597 sbi->s_mount_opt = 0;
9598 sbi->s_resuid = EXT4_DEF_RESUID;
9599 sbi->s_resgid = EXT4_DEF_RESGID;
9600+ sbi->s_inode_readahead_blks = EXT4_DEF_INODE_READAHEAD_BLKS;
9601 sbi->s_sb_block = sb_block;
9602
9603 unlock_kernel();
9604
9605+ /* Cleanup superblock name */
9606+ for (cp = sb->s_id; (cp = strchr(cp, '/'));)
9607+ *cp = '!';
9608+
9609 blocksize = sb_min_blocksize(sb, EXT4_MIN_BLOCK_SIZE);
9610 if (!blocksize) {
9611 printk(KERN_ERR "EXT4-fs: unable to set blocksize\n");
9612@@ -1977,11 +2022,11 @@ static int ext4_fill_super(struct super_
9613 set_opt(sbi->s_mount_opt, GRPID);
9614 if (def_mount_opts & EXT4_DEFM_UID16)
9615 set_opt(sbi->s_mount_opt, NO_UID32);
9616-#ifdef CONFIG_EXT4DEV_FS_XATTR
9617+#if defined(CONFIG_EXT4_FS_XATTR) || defined(CONFIG_EXT4DEV_FS_XATTR)
9618 if (def_mount_opts & EXT4_DEFM_XATTR_USER)
9619 set_opt(sbi->s_mount_opt, XATTR_USER);
9620 #endif
9621-#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL
9622+#if defined(CONFIG_EXT4_FS_POSIX_ACL) || defined(CONFIG_EXT4DEV_FS_POSIX_ACL)
9623 if (def_mount_opts & EXT4_DEFM_ACL)
9624 set_opt(sbi->s_mount_opt, POSIX_ACL);
9625 #endif
9626@@ -2016,11 +2061,6 @@ static int ext4_fill_super(struct super_
9627 ext4_warning(sb, __func__,
9628 "extents feature not enabled on this filesystem, "
9629 "use tune2fs.\n");
9630- /*
9631- * turn on mballoc code by default in ext4 filesystem
9632- * Use -o nomballoc to turn it off
9633- */
9634- set_opt(sbi->s_mount_opt, MBALLOC);
9635
9636 /*
9637 * enable delayed allocation by default
9638@@ -2045,16 +2085,6 @@ static int ext4_fill_super(struct super_
9639 "running e2fsck is recommended\n");
9640
9641 /*
9642- * Since ext4 is still considered development code, we require
9643- * that the TEST_FILESYS flag in s->flags be set.
9644- */
9645- if (!(le32_to_cpu(es->s_flags) & EXT2_FLAGS_TEST_FILESYS)) {
9646- printk(KERN_WARNING "EXT4-fs: %s: not marked "
9647- "OK to use with test code.\n", sb->s_id);
9648- goto failed_mount;
9649- }
9650-
9651- /*
9652 * Check feature flags regardless of the revision level, since we
9653 * previously didn't change the revision level when setting the flags,
9654 * so there is a chance incompat flags are set on a rev 0 filesystem.
9655@@ -2062,18 +2092,22 @@ static int ext4_fill_super(struct super_
9656 features = EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT4_FEATURE_INCOMPAT_SUPP);
9657 if (features) {
9658 printk(KERN_ERR "EXT4-fs: %s: couldn't mount because of "
9659- "unsupported optional features (%x).\n",
9660- sb->s_id, le32_to_cpu(features));
9661+ "unsupported optional features (%x).\n", sb->s_id,
9662+ (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_incompat) &
9663+ ~EXT4_FEATURE_INCOMPAT_SUPP));
9664 goto failed_mount;
9665 }
9666 features = EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT4_FEATURE_RO_COMPAT_SUPP);
9667 if (!(sb->s_flags & MS_RDONLY) && features) {
9668 printk(KERN_ERR "EXT4-fs: %s: couldn't mount RDWR because of "
9669- "unsupported optional features (%x).\n",
9670- sb->s_id, le32_to_cpu(features));
9671+ "unsupported optional features (%x).\n", sb->s_id,
9672+ (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_ro_compat) &
9673+ ~EXT4_FEATURE_RO_COMPAT_SUPP));
9674 goto failed_mount;
9675 }
9676- if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_HUGE_FILE)) {
9677+ has_huge_files = EXT4_HAS_RO_COMPAT_FEATURE(sb,
9678+ EXT4_FEATURE_RO_COMPAT_HUGE_FILE);
9679+ if (has_huge_files) {
9680 /*
9681 * Large file size enabled file system can only be
9682 * mount if kernel is build with CONFIG_LSF
9683@@ -2123,8 +2157,9 @@ static int ext4_fill_super(struct super_
9684 }
9685 }
9686
9687- sbi->s_bitmap_maxbytes = ext4_max_bitmap_size(sb->s_blocksize_bits);
9688- sb->s_maxbytes = ext4_max_size(sb->s_blocksize_bits);
9689+ sbi->s_bitmap_maxbytes = ext4_max_bitmap_size(sb->s_blocksize_bits,
9690+ has_huge_files);
9691+ sb->s_maxbytes = ext4_max_size(sb->s_blocksize_bits, has_huge_files);
9692
9693 if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV) {
9694 sbi->s_inode_size = EXT4_GOOD_OLD_INODE_SIZE;
9695@@ -2172,6 +2207,18 @@ static int ext4_fill_super(struct super_
9696 for (i = 0; i < 4; i++)
9697 sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]);
9698 sbi->s_def_hash_version = es->s_def_hash_version;
9699+ i = le32_to_cpu(es->s_flags);
9700+ if (i & EXT2_FLAGS_UNSIGNED_HASH)
9701+ sbi->s_hash_unsigned = 3;
9702+ else if ((i & EXT2_FLAGS_SIGNED_HASH) == 0) {
9703+#ifdef __CHAR_UNSIGNED__
9704+ es->s_flags |= cpu_to_le32(EXT2_FLAGS_UNSIGNED_HASH);
9705+ sbi->s_hash_unsigned = 3;
9706+#else
9707+ es->s_flags |= cpu_to_le32(EXT2_FLAGS_SIGNED_HASH);
9708+#endif
9709+ sb->s_dirt = 1;
9710+ }
9711
9712 if (sbi->s_blocks_per_group > blocksize * 8) {
9713 printk(KERN_ERR
9714@@ -2223,6 +2270,16 @@ static int ext4_fill_super(struct super_
9715 goto failed_mount;
9716 }
9717
9718+#ifdef CONFIG_PROC_FS
9719+ if (ext4_proc_root)
9720+ sbi->s_proc = proc_mkdir(sb->s_id, ext4_proc_root);
9721+
9722+ if (sbi->s_proc)
9723+ proc_create_data("inode_readahead_blks", 0644, sbi->s_proc,
9724+ &ext4_ui_proc_fops,
9725+ &sbi->s_inode_readahead_blks);
9726+#endif
9727+
9728 bgl_lock_init(&sbi->s_blockgroup_lock);
9729
9730 for (i = 0; i < db_count; i++) {
9731@@ -2261,24 +2318,14 @@ static int ext4_fill_super(struct super_
9732 err = percpu_counter_init(&sbi->s_dirs_counter,
9733 ext4_count_dirs(sb));
9734 }
9735+ if (!err) {
9736+ err = percpu_counter_init(&sbi->s_dirtyblocks_counter, 0);
9737+ }
9738 if (err) {
9739 printk(KERN_ERR "EXT4-fs: insufficient memory\n");
9740 goto failed_mount3;
9741 }
9742
9743- /* per fileystem reservation list head & lock */
9744- spin_lock_init(&sbi->s_rsv_window_lock);
9745- sbi->s_rsv_window_root = RB_ROOT;
9746- /* Add a single, static dummy reservation to the start of the
9747- * reservation window list --- it gives us a placeholder for
9748- * append-at-start-of-list which makes the allocation logic
9749- * _much_ simpler. */
9750- sbi->s_rsv_window_head.rsv_start = EXT4_RESERVE_WINDOW_NOT_ALLOCATED;
9751- sbi->s_rsv_window_head.rsv_end = EXT4_RESERVE_WINDOW_NOT_ALLOCATED;
9752- sbi->s_rsv_window_head.rsv_alloc_hit = 0;
9753- sbi->s_rsv_window_head.rsv_goal_size = 0;
9754- ext4_rsv_window_add(sb, &sbi->s_rsv_window_head);
9755-
9756 sbi->s_stripe = ext4_get_stripe_size(sbi);
9757
9758 /*
9759@@ -2498,11 +2545,16 @@ failed_mount3:
9760 percpu_counter_destroy(&sbi->s_freeblocks_counter);
9761 percpu_counter_destroy(&sbi->s_freeinodes_counter);
9762 percpu_counter_destroy(&sbi->s_dirs_counter);
9763+ percpu_counter_destroy(&sbi->s_dirtyblocks_counter);
9764 failed_mount2:
9765 for (i = 0; i < db_count; i++)
9766 brelse(sbi->s_group_desc[i]);
9767 kfree(sbi->s_group_desc);
9768 failed_mount:
9769+ if (sbi->s_proc) {
9770+ remove_proc_entry("inode_readahead_blks", sbi->s_proc);
9771+ remove_proc_entry(sb->s_id, ext4_proc_root);
9772+ }
9773 #ifdef CONFIG_QUOTA
9774 for (i = 0; i < MAXQUOTAS; i++)
9775 kfree(sbi->s_qf_names[i]);
9776@@ -2561,7 +2613,7 @@ static journal_t *ext4_get_journal(struc
9777 return NULL;
9778 }
9779
9780- jbd_debug(2, "Journal inode found at %p: %Ld bytes\n",
9781+ jbd_debug(2, "Journal inode found at %p: %lld bytes\n",
9782 journal_inode, journal_inode->i_size);
9783 if (!S_ISREG(journal_inode->i_mode)) {
9784 printk(KERN_ERR "EXT4-fs: invalid journal inode.\n");
9785@@ -2724,6 +2776,11 @@ static int ext4_load_journal(struct supe
9786 return -EINVAL;
9787 }
9788
9789+ if (journal->j_flags & JBD2_BARRIER)
9790+ printk(KERN_INFO "EXT4-fs: barriers enabled\n");
9791+ else
9792+ printk(KERN_INFO "EXT4-fs: barriers disabled\n");
9793+
9794 if (!really_read_only && test_opt(sb, UPDATE_JOURNAL)) {
9795 err = jbd2_journal_update_format(journal);
9796 if (err) {
9797@@ -2823,8 +2880,11 @@ static void ext4_commit_super(struct sup
9798 set_buffer_uptodate(sbh);
9799 }
9800 es->s_wtime = cpu_to_le32(get_seconds());
9801- ext4_free_blocks_count_set(es, ext4_count_free_blocks(sb));
9802- es->s_free_inodes_count = cpu_to_le32(ext4_count_free_inodes(sb));
9803+ ext4_free_blocks_count_set(es, percpu_counter_sum_positive(
9804+ &EXT4_SB(sb)->s_freeblocks_counter));
9805+ es->s_free_inodes_count = cpu_to_le32(percpu_counter_sum_positive(
9806+ &EXT4_SB(sb)->s_freeinodes_counter));
9807+
9808 BUFFER_TRACE(sbh, "marking dirty");
9809 mark_buffer_dirty(sbh);
9810 if (sync) {
9811@@ -2850,7 +2910,9 @@ static void ext4_mark_recovery_complete(
9812 journal_t *journal = EXT4_SB(sb)->s_journal;
9813
9814 jbd2_journal_lock_updates(journal);
9815- jbd2_journal_flush(journal);
9816+ if (jbd2_journal_flush(journal) < 0)
9817+ goto out;
9818+
9819 lock_super(sb);
9820 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER) &&
9821 sb->s_flags & MS_RDONLY) {
9822@@ -2859,6 +2921,8 @@ static void ext4_mark_recovery_complete(
9823 ext4_commit_super(sb, es, 1);
9824 }
9825 unlock_super(sb);
9826+
9827+out:
9828 jbd2_journal_unlock_updates(journal);
9829 }
9830
9831@@ -2934,6 +2998,7 @@ static int ext4_sync_fs(struct super_blo
9832 {
9833 int ret = 0;
9834
9835+ trace_mark(ext4_sync_fs, "dev %s wait %d", sb->s_id, wait);
9836 sb->s_dirt = 0;
9837 if (wait)
9838 ret = ext4_force_commit(sb);
9839@@ -2955,7 +3020,13 @@ static void ext4_write_super_lockfs(stru
9840
9841 /* Now we set up the journal barrier. */
9842 jbd2_journal_lock_updates(journal);
9843- jbd2_journal_flush(journal);
9844+
9845+ /*
9846+ * We don't want to clear needs_recovery flag when we failed
9847+ * to flush the journal.
9848+ */
9849+ if (jbd2_journal_flush(journal) < 0)
9850+ return;
9851
9852 /* Journal blocked and flushed, clear needs_recovery flag. */
9853 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
9854@@ -3053,13 +3124,14 @@ static int ext4_remount(struct super_blo
9855 ext4_mark_recovery_complete(sb, es);
9856 lock_super(sb);
9857 } else {
9858- __le32 ret;
9859+ int ret;
9860 if ((ret = EXT4_HAS_RO_COMPAT_FEATURE(sb,
9861 ~EXT4_FEATURE_RO_COMPAT_SUPP))) {
9862 printk(KERN_WARNING "EXT4-fs: %s: couldn't "
9863 "remount RDWR because of unsupported "
9864- "optional features (%x).\n",
9865- sb->s_id, le32_to_cpu(ret));
9866+ "optional features (%x).\n", sb->s_id,
9867+ (le32_to_cpu(sbi->s_es->s_feature_ro_compat) &
9868+ ~EXT4_FEATURE_RO_COMPAT_SUPP));
9869 err = -EROFS;
9870 goto restore_opts;
9871 }
9872@@ -3076,7 +3148,7 @@ static int ext4_remount(struct super_blo
9873 if (!ext4_group_desc_csum_verify(sbi, g, gdp)) {
9874 printk(KERN_ERR
9875 "EXT4-fs: ext4_remount: "
9876- "Checksum for group %lu failed (%u!=%u)\n",
9877+ "Checksum for group %u failed (%u!=%u)\n",
9878 g, le16_to_cpu(ext4_group_desc_csum(sbi, g, gdp)),
9879 le16_to_cpu(gdp->bg_checksum));
9880 err = -EINVAL;
9881@@ -3189,7 +3261,8 @@ static int ext4_statfs(struct dentry *de
9882 buf->f_type = EXT4_SUPER_MAGIC;
9883 buf->f_bsize = sb->s_blocksize;
9884 buf->f_blocks = ext4_blocks_count(es) - sbi->s_overhead_last;
9885- buf->f_bfree = percpu_counter_sum_positive(&sbi->s_freeblocks_counter);
9886+ buf->f_bfree = percpu_counter_sum_positive(&sbi->s_freeblocks_counter) -
9887+ percpu_counter_sum_positive(&sbi->s_dirtyblocks_counter);
9888 ext4_free_blocks_count_set(es, buf->f_bfree);
9889 buf->f_bavail = buf->f_bfree - ext4_r_blocks_count(es);
9890 if (buf->f_bfree < ext4_r_blocks_count(es))
9891@@ -3394,8 +3467,12 @@ static int ext4_quota_on(struct super_bl
9892 * otherwise be livelocked...
9893 */
9894 jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
9895- jbd2_journal_flush(EXT4_SB(sb)->s_journal);
9896+ err = jbd2_journal_flush(EXT4_SB(sb)->s_journal);
9897 jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
9898+ if (err) {
9899+ path_put(&nd.path);
9900+ return err;
9901+ }
9902 }
9903
9904 err = vfs_quota_on_path(sb, type, format_id, &nd.path);
9905@@ -3459,7 +3536,7 @@ static ssize_t ext4_quota_write(struct s
9906 handle_t *handle = journal_current_handle();
9907
9908 if (!handle) {
9909- printk(KERN_WARNING "EXT4-fs: Quota write (off=%Lu, len=%Lu)"
9910+ printk(KERN_WARNING "EXT4-fs: Quota write (off=%llu, len=%llu)"
9911 " cancelled because transaction is not started.\n",
9912 (unsigned long long)off, (unsigned long long)len);
9913 return -EIO;
9914@@ -3520,18 +3597,73 @@ static int ext4_get_sb(struct file_syste
9915 return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super, mnt);
9916 }
9917
9918+#ifdef CONFIG_PROC_FS
9919+static int ext4_ui_proc_show(struct seq_file *m, void *v)
9920+{
9921+ unsigned int *p = m->private;
9922+
9923+ seq_printf(m, "%u\n", *p);
9924+ return 0;
9925+}
9926+
9927+static int ext4_ui_proc_open(struct inode *inode, struct file *file)
9928+{
9929+ return single_open(file, ext4_ui_proc_show, PDE(inode)->data);
9930+}
9931+
9932+static ssize_t ext4_ui_proc_write(struct file *file, const char __user *buf,
9933+ size_t cnt, loff_t *ppos)
9934+{
9935+ unsigned long *p = PDE(file->f_path.dentry->d_inode)->data;
9936+ char str[32];
9937+
9938+ if (cnt >= sizeof(str))
9939+ return -EINVAL;
9940+ if (copy_from_user(str, buf, cnt))
9941+ return -EFAULT;
9942+
9943+ *p = simple_strtoul(str, NULL, 0);
9944+ return cnt;
9945+}
9946+
9947+const struct file_operations ext4_ui_proc_fops = {
9948+ .owner = THIS_MODULE,
9949+ .open = ext4_ui_proc_open,
9950+ .read = seq_read,
9951+ .llseek = seq_lseek,
9952+ .release = single_release,
9953+ .write = ext4_ui_proc_write,
9954+};
9955+#endif
9956+
9957+static struct file_system_type ext4_fs_type = {
9958+ .owner = THIS_MODULE,
9959+ .name = "ext4",
9960+ .get_sb = ext4_get_sb,
9961+ .kill_sb = kill_block_super,
9962+ .fs_flags = FS_REQUIRES_DEV,
9963+};
9964+
9965+static int ext4dev_get_sb(struct file_system_type *fs_type,
9966+ int flags, const char *dev_name, void *data, struct vfsmount *mnt)
9967+{
9968+ return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super, mnt);
9969+}
9970+
9971 static struct file_system_type ext4dev_fs_type = {
9972 .owner = THIS_MODULE,
9973 .name = "ext4dev",
9974- .get_sb = ext4_get_sb,
9975+ .get_sb = ext4dev_get_sb,
9976 .kill_sb = kill_block_super,
9977 .fs_flags = FS_REQUIRES_DEV,
9978 };
9979+MODULE_ALIAS("ext4dev");
9980
9981 static int __init init_ext4_fs(void)
9982 {
9983 int err;
9984
9985+ ext4_proc_root = proc_mkdir("fs/ext4", NULL);
9986 err = init_ext4_mballoc();
9987 if (err)
9988 return err;
9989@@ -3542,9 +3674,14 @@ static int __init init_ext4_fs(void)
9990 err = init_inodecache();
9991 if (err)
9992 goto out1;
9993- err = register_filesystem(&ext4dev_fs_type);
9994+ err = register_filesystem(&ext4_fs_type);
9995 if (err)
9996 goto out;
9997+ err = register_filesystem(&ext4dev_fs_type);
9998+ if (err) {
9999+ unregister_filesystem(&ext4_fs_type);
10000+ goto out;
10001+ }
10002 return 0;
10003 out:
10004 destroy_inodecache();
10005@@ -3557,10 +3694,12 @@ out2:
10006
10007 static void __exit exit_ext4_fs(void)
10008 {
10009+ unregister_filesystem(&ext4_fs_type);
10010 unregister_filesystem(&ext4dev_fs_type);
10011 destroy_inodecache();
10012 exit_ext4_xattr();
10013 exit_ext4_mballoc();
10014+ remove_proc_entry("fs/ext4", NULL);
10015 }
10016
10017 MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others");
10018diff -rup b/fs/ext4//symlink.c a/fs/ext4///symlink.c
10019--- b/fs/ext4/symlink.c 2009-02-11 14:37:58.000000000 +0100
10020+++ a/fs/ext4/symlink.c 2009-02-10 21:40:11.000000000 +0100
10021@@ -23,10 +23,10 @@
10022 #include "ext4.h"
10023 #include "xattr.h"
10024
10025-static void * ext4_follow_link(struct dentry *dentry, struct nameidata *nd)
10026+static void *ext4_follow_link(struct dentry *dentry, struct nameidata *nd)
10027 {
10028 struct ext4_inode_info *ei = EXT4_I(dentry->d_inode);
10029- nd_set_link(nd, (char*)ei->i_data);
10030+ nd_set_link(nd, (char *) ei->i_data);
10031 return NULL;
10032 }
10033
10034@@ -34,7 +34,7 @@ const struct inode_operations ext4_symli
10035 .readlink = generic_readlink,
10036 .follow_link = page_follow_link_light,
10037 .put_link = page_put_link,
10038-#ifdef CONFIG_EXT4DEV_FS_XATTR
10039+#if defined(CONFIG_EXT4_FS_XATTR) || defined(CONFIG_EXT4DEV_FS_XATTR)
10040 .setxattr = generic_setxattr,
10041 .getxattr = generic_getxattr,
10042 .listxattr = ext4_listxattr,
10043@@ -45,7 +45,7 @@ const struct inode_operations ext4_symli
10044 const struct inode_operations ext4_fast_symlink_inode_operations = {
10045 .readlink = generic_readlink,
10046 .follow_link = ext4_follow_link,
10047-#ifdef CONFIG_EXT4DEV_FS_XATTR
10048+#if defined(CONFIG_EXT4_FS_XATTR) || defined(CONFIG_EXT4DEV_FS_XATTR)
10049 .setxattr = generic_setxattr,
10050 .getxattr = generic_getxattr,
10051 .listxattr = ext4_listxattr,
10052diff -rup b/fs/ext4//xattr.c a/fs/ext4///xattr.c
10053--- b/fs/ext4/xattr.c 2009-02-11 14:37:58.000000000 +0100
10054+++ a/fs/ext4/xattr.c 2009-02-10 21:40:11.000000000 +0100
10055@@ -99,12 +99,12 @@ static struct mb_cache *ext4_xattr_cache
10056
10057 static struct xattr_handler *ext4_xattr_handler_map[] = {
10058 [EXT4_XATTR_INDEX_USER] = &ext4_xattr_user_handler,
10059-#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL
10060+#if defined(CONFIG_EXT4_FS_POSIX_ACL) || defined(CONFIG_EXT4DEV_FS_POSIX_ACL)
10061 [EXT4_XATTR_INDEX_POSIX_ACL_ACCESS] = &ext4_xattr_acl_access_handler,
10062 [EXT4_XATTR_INDEX_POSIX_ACL_DEFAULT] = &ext4_xattr_acl_default_handler,
10063 #endif
10064 [EXT4_XATTR_INDEX_TRUSTED] = &ext4_xattr_trusted_handler,
10065-#ifdef CONFIG_EXT4DEV_FS_SECURITY
10066+#if defined(CONFIG_EXT4_FS_SECURITY) || defined(CONFIG_EXT4DEV_FS_SECURITY)
10067 [EXT4_XATTR_INDEX_SECURITY] = &ext4_xattr_security_handler,
10068 #endif
10069 };
10070@@ -112,11 +112,11 @@ static struct xattr_handler *ext4_xattr_
10071 struct xattr_handler *ext4_xattr_handlers[] = {
10072 &ext4_xattr_user_handler,
10073 &ext4_xattr_trusted_handler,
10074-#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL
10075+#if defined(CONFIG_EXT4_FS_POSIX_ACL) || defined(CONFIG_EXT4DEV_FS_POSIX_ACL)
10076 &ext4_xattr_acl_access_handler,
10077 &ext4_xattr_acl_default_handler,
10078 #endif
10079-#ifdef CONFIG_EXT4DEV_FS_SECURITY
10080+#if defined(CONFIG_EXT4_FS_SECURITY) || defined(CONFIG_EXT4DEV_FS_SECURITY)
10081 &ext4_xattr_security_handler,
10082 #endif
10083 NULL
10084@@ -810,8 +810,8 @@ inserted:
10085 /* We need to allocate a new block */
10086 ext4_fsblk_t goal = ext4_group_first_block_no(sb,
10087 EXT4_I(inode)->i_block_group);
10088- ext4_fsblk_t block = ext4_new_meta_block(handle, inode,
10089- goal, &error);
10090+ ext4_fsblk_t block = ext4_new_meta_blocks(handle, inode,
10091+ goal, NULL, &error);
10092 if (error)
10093 goto cleanup;
10094 ea_idebug(inode, "creating block %d", block);
10095diff -rup b/fs/ext4//xattr.h a/fs/ext4///xattr.h
10096--- b/fs/ext4/xattr.h 2009-02-11 14:37:58.000000000 +0100
10097+++ a/fs/ext4/xattr.h 2009-02-10 21:40:14.000000000 +0100
10098@@ -51,8 +51,8 @@ struct ext4_xattr_entry {
10099 (((name_len) + EXT4_XATTR_ROUND + \
10100 sizeof(struct ext4_xattr_entry)) & ~EXT4_XATTR_ROUND)
10101 #define EXT4_XATTR_NEXT(entry) \
10102- ( (struct ext4_xattr_entry *)( \
10103- (char *)(entry) + EXT4_XATTR_LEN((entry)->e_name_len)) )
10104+ ((struct ext4_xattr_entry *)( \
10105+ (char *)(entry) + EXT4_XATTR_LEN((entry)->e_name_len)))
10106 #define EXT4_XATTR_SIZE(size) \
10107 (((size) + EXT4_XATTR_ROUND) & ~EXT4_XATTR_ROUND)
10108
10109@@ -63,7 +63,7 @@ struct ext4_xattr_entry {
10110 EXT4_I(inode)->i_extra_isize))
10111 #define IFIRST(hdr) ((struct ext4_xattr_entry *)((hdr)+1))
10112
10113-# ifdef CONFIG_EXT4DEV_FS_XATTR
10114+# if defined(CONFIG_EXT4_FS_XATTR) || defined(CONFIG_EXT4DEV_FS_XATTR)
10115
10116 extern struct xattr_handler ext4_xattr_user_handler;
10117 extern struct xattr_handler ext4_xattr_trusted_handler;
10118@@ -88,7 +88,7 @@ extern void exit_ext4_xattr(void);
10119
10120 extern struct xattr_handler *ext4_xattr_handlers[];
10121
10122-# else /* CONFIG_EXT4DEV_FS_XATTR */
10123+# else /* CONFIG_EXT4_FS_XATTR */
10124
10125 static inline int
10126 ext4_xattr_get(struct inode *inode, int name_index, const char *name,
10127@@ -141,9 +141,9 @@ ext4_expand_extra_isize_ea(struct inode
10128
10129 #define ext4_xattr_handlers NULL
10130
10131-# endif /* CONFIG_EXT4DEV_FS_XATTR */
10132+# endif /* CONFIG_EXT4_FS_XATTR */
10133
10134-#ifdef CONFIG_EXT4DEV_FS_SECURITY
10135+#if defined(CONFIG_EXT4_FS_SECURITY) || defined(CONFIG_EXT4DEV_FS_SECURITY)
10136 extern int ext4_init_security(handle_t *handle, struct inode *inode,
10137 struct inode *dir);
10138 #else