[people/teissler/ipfire-2.x.git] / src / patches / suse-2.6.27.31 / patches.kernel.org / revert-ext4-changes-in-2.6.27.19-and-2.6.27.20-and-2.6.27.25.patch

From: Greg Kroah-Hartman <gregkh@suse.de>
Subject: revert ext4 changes in 2.6.27.19 and 2.6.27.20 and 2.6.27.25
Patch-mainline: no

As we are already taking a different version of ext4, revert the
changes that were made to ext4 in 2.6.27.19 and 2.6.27.20 and 2.6.27.25

Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

--- b/Documentation/filesystems/ext4.txt
+++ a/Documentation/filesystems/ext4.txt
@@ -73,7 +73,7 @@
 * extent format more robust in face of on-disk corruption due to magics,
 * internal redunancy in tree
 * improved file allocation (multi-block alloc)
+* fix 32000 subdirectory limit
-* lift 32000 subdirectory limit imposed by i_links_count[1]
 * nsec timestamps for mtime, atime, ctime, create time
 * inode version field on disk (NFSv4, Lustre)
 * reduced e2fsck time via uninit_bg feature
@@ -88,9 +88,6 @@
 * efficent new ordered mode in JBD2 and ext4(avoid using buffer head to force
   the ordering)
 
-[1] Filesystems with a block size of 1k may see a limit imposed by the
-directory hash tree having a maximum depth of two.
-
 2.2 Candidate features for future inclusion
 
 * Online defrag (patches available but not well tested)
reverted:
--- b/fs/ext4/balloc.c
+++ a/fs/ext4/balloc.c
@@ -20,7 +20,6 @@
 #include "ext4.h"
 #include "ext4_jbd2.h"
 #include "group.h"
-#include "mballoc.h"
 
 /*
  * balloc.c contains the blocks allocation and deallocation routines
@@ -319,41 +318,18 @@
 			    block_group, bitmap_blk);
 		return NULL;
 	}
+	if (bh_uptodate_or_lock(bh))
-
-	if (bitmap_uptodate(bh))
 		return bh;
 
-	lock_buffer(bh);
-	if (bitmap_uptodate(bh)) {
-		unlock_buffer(bh);
-		return bh;
-	}
 	spin_lock(sb_bgl_lock(EXT4_SB(sb), block_group));
 	if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
 		ext4_init_block_bitmap(sb, bh, block_group, desc);
-		set_bitmap_uptodate(bh);
 		set_buffer_uptodate(bh);
 		unlock_buffer(bh);
 		spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group));
 		return bh;
 	}
 	spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group));
-	if (buffer_uptodate(bh)) {
-		/*
-		 * if not uninit if bh is uptodate,
-		 * bitmap is also uptodate
-		 */
-		set_bitmap_uptodate(bh);
-		unlock_buffer(bh);
-		return bh;
-	}
-	/*
-	 * submit the buffer_head for read. We can
-	 * safely mark the bitmap as uptodate now.
-	 * We do it here so the bitmap uptodate bit
-	 * get set with buffer lock held.
-	 */
-	set_bitmap_uptodate(bh);
 	if (bh_submit_read(bh) < 0) {
 		put_bh(bh);
 		ext4_error(sb, __func__,
@@ -861,136 +837,6 @@
 }
 
 /**
- * ext4_add_groupblocks() -- Add given blocks to an existing group
- * @handle:			handle to this transaction
- * @sb:				super block
- * @block:			start physcial block to add to the block group
- * @count:			number of blocks to free
- *
- * This marks the blocks as free in the bitmap. We ask the
- * mballoc to reload the buddy after this by setting group
- * EXT4_GROUP_INFO_NEED_INIT_BIT flag
- */
-void ext4_add_groupblocks(handle_t *handle, struct super_block *sb,
-			 ext4_fsblk_t block, unsigned long count)
-{
-	struct buffer_head *bitmap_bh = NULL;
-	struct buffer_head *gd_bh;
-	ext4_group_t block_group;
-	ext4_grpblk_t bit;
-	unsigned long i;
-	struct ext4_group_desc *desc;
-	struct ext4_super_block *es;
-	struct ext4_sb_info *sbi;
-	int err = 0, ret;
-	ext4_grpblk_t blocks_freed;
-	struct ext4_group_info *grp;
-
-	sbi = EXT4_SB(sb);
-	es = sbi->s_es;
-	ext4_debug("Adding block(s) %llu-%llu\n", block, block + count - 1);
-
-	ext4_get_group_no_and_offset(sb, block, &block_group, &bit);
-	grp = ext4_get_group_info(sb, block_group);
-	/*
-	 * Check to see if we are freeing blocks across a group
-	 * boundary.
-	 */
-	if (bit + count > EXT4_BLOCKS_PER_GROUP(sb))
-		goto error_return;
-
-	bitmap_bh = ext4_read_block_bitmap(sb, block_group);
-	if (!bitmap_bh)
-		goto error_return;
-	desc = ext4_get_group_desc(sb, block_group, &gd_bh);
-	if (!desc)
-		goto error_return;
-
-	if (in_range(ext4_block_bitmap(sb, desc), block, count) ||
-	    in_range(ext4_inode_bitmap(sb, desc), block, count) ||
-	    in_range(block, ext4_inode_table(sb, desc), sbi->s_itb_per_group) ||
-	    in_range(block + count - 1, ext4_inode_table(sb, desc),
-		     sbi->s_itb_per_group)) {
-		ext4_error(sb, __func__,
-			   "Adding blocks in system zones - "
-			    "Block = %llu, count = %lu",
-			    block, count);
-		goto error_return;
-	}
-
-	/*
-	 * We are about to add blocks to the bitmap,
-	 * so we need undo access.
-	 */
-	BUFFER_TRACE(bitmap_bh, "getting undo access");
-	err = ext4_journal_get_undo_access(handle, bitmap_bh);
-	if (err)
-		goto error_return;
-
-	/*
-	 * We are about to modify some metadata.  Call the journal APIs
-	 * to unshare ->b_data if a currently-committing transaction is
-	 * using it
-	 */
-	BUFFER_TRACE(gd_bh, "get_write_access");
-	err = ext4_journal_get_write_access(handle, gd_bh);
-	if (err)
-		goto error_return;
-	/*
-	 * make sure we don't allow a parallel init on other groups in the
-	 * same buddy cache
-	 */
-	down_write(&grp->alloc_sem);
-	for (i = 0, blocks_freed = 0; i < count; i++) {
-		BUFFER_TRACE(bitmap_bh, "clear bit");
-		if (!ext4_clear_bit_atomic(sb_bgl_lock(sbi, block_group),
-						bit + i, bitmap_bh->b_data)) {
-			ext4_error(sb, __func__,
-				   "bit already cleared for block %llu",
-				   (ext4_fsblk_t)(block + i));
-			BUFFER_TRACE(bitmap_bh, "bit already cleared");
-		} else {
-			blocks_freed++;
-		}
-	}
-	spin_lock(sb_bgl_lock(sbi, block_group));
-	le16_add_cpu(&desc->bg_free_blocks_count, blocks_freed);
-	desc->bg_checksum = ext4_group_desc_csum(sbi, block_group, desc);
-	spin_unlock(sb_bgl_lock(sbi, block_group));
-	percpu_counter_add(&sbi->s_freeblocks_counter, blocks_freed);
-
-	if (sbi->s_log_groups_per_flex) {
-		ext4_group_t flex_group = ext4_flex_group(sbi, block_group);
-		spin_lock(sb_bgl_lock(sbi, flex_group));
-		sbi->s_flex_groups[flex_group].free_blocks += blocks_freed;
-		spin_unlock(sb_bgl_lock(sbi, flex_group));
-	}
-	/*
-	 * request to reload the buddy with the
-	 * new bitmap information
-	 */
-	set_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &(grp->bb_state));
-	ext4_mb_update_group_info(grp, blocks_freed);
-	up_write(&grp->alloc_sem);
-
-	/* We dirtied the bitmap block */
-	BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
-	err = ext4_journal_dirty_metadata(handle, bitmap_bh);
-
-	/* And the group descriptor block */
-	BUFFER_TRACE(gd_bh, "dirtied group descriptor block");
-	ret = ext4_journal_dirty_metadata(handle, gd_bh);
-	if (!err)
-		err = ret;
-	sb->s_dirt = 1;
-
-error_return:
-	brelse(bitmap_bh);
-	ext4_std_error(sb, err);
-	return;
-}
-
-/**
  * ext4_free_blocks() -- Free given blocks and update quota
  * @handle:		handle for this transaction
  * @inode:		inode
reverted:
--- b/fs/ext4/ext4.h
+++ a/fs/ext4/ext4.h
@@ -19,7 +19,6 @@
 #include <linux/types.h>
 #include <linux/blkdev.h>
 #include <linux/magic.h>
-#include <linux/jbd2.h>
 #include "ext4_i.h"
 
 /*
@@ -248,30 +247,6 @@
 #define EXT4_FL_USER_VISIBLE		0x000BDFFF /* User visible flags */
 #define EXT4_FL_USER_MODIFIABLE		0x000380FF /* User modifiable flags */
 
-/* Flags that should be inherited by new inodes from their parent. */
-#define EXT4_FL_INHERITED (EXT4_SECRM_FL | EXT4_UNRM_FL | EXT4_COMPR_FL |\
-			   EXT4_SYNC_FL | EXT4_IMMUTABLE_FL | EXT4_APPEND_FL |\
-			   EXT4_NODUMP_FL | EXT4_NOATIME_FL |\
-			   EXT4_NOCOMPR_FL | EXT4_JOURNAL_DATA_FL |\
-			   EXT4_NOTAIL_FL | EXT4_DIRSYNC_FL)
-
-/* Flags that are appropriate for regular files (all but dir-specific ones). */
-#define EXT4_REG_FLMASK (~(EXT4_DIRSYNC_FL | EXT4_TOPDIR_FL))
-
-/* Flags that are appropriate for non-directories/regular files. */
-#define EXT4_OTHER_FLMASK (EXT4_NODUMP_FL | EXT4_NOATIME_FL)
-
-/* Mask out flags that are inappropriate for the given type of inode. */
-static inline __u32 ext4_mask_flags(umode_t mode, __u32 flags)
-{
-	if (S_ISDIR(mode))
-		return flags;
-	else if (S_ISREG(mode))
-		return flags & EXT4_REG_FLMASK;
-	else
-		return flags & EXT4_OTHER_FLMASK;
-}
-
 /*
  * Inode dynamic state flags
  */
@@ -279,7 +254,6 @@
 #define EXT4_STATE_NEW			0x00000002 /* inode is newly created */
 #define EXT4_STATE_XATTR		0x00000004 /* has in-inode xattrs */
 #define EXT4_STATE_NO_EXPAND		0x00000008 /* No space for expansion */
-#define EXT4_STATE_DA_ALLOC_CLOSE	0x00000010 /* Alloc DA blks on close */
 
 /* Used to pass group descriptor data when online resize is done */
 struct ext4_new_group_input {
@@ -327,9 +301,7 @@
 #define EXT4_IOC_GROUP_EXTEND		_IOW('f', 7, unsigned long)
 #define EXT4_IOC_GROUP_ADD		_IOW('f', 8, struct ext4_new_group_input)
 #define EXT4_IOC_MIGRATE		_IO('f', 9)
- /* note ioctl 10 reserved for an early version of the FIEMAP ioctl */
  /* note ioctl 11 reserved for filesystem-independent FIEMAP ioctl */
-#define EXT4_IOC_ALLOC_DA_BLKS		_IO('f', 12)
 
 /*
  * ioctl commands in 32 bit emulation
@@ -887,7 +859,7 @@
 {
 	unsigned len = le16_to_cpu(dlen);
 
+	if (len == EXT4_MAX_REC_LEN)
-	if (len == EXT4_MAX_REC_LEN || len == 0)
 		return 1 << 16;
 	return len;
 }
@@ -917,9 +889,6 @@
 #define DX_HASH_LEGACY		0
 #define DX_HASH_HALF_MD4	1
 #define DX_HASH_TEA		2
-#define DX_HASH_LEGACY_UNSIGNED	3
-#define DX_HASH_HALF_MD4_UNSIGNED	4
-#define DX_HASH_TEA_UNSIGNED		5
 
 #ifdef __KERNEL__
 
@@ -1019,11 +988,9 @@
 						ext4_fsblk_t nblocks);
 extern void ext4_free_blocks (handle_t *handle, struct inode *inode,
 			ext4_fsblk_t block, unsigned long count, int metadata);
+extern void ext4_free_blocks_sb (handle_t *handle, struct super_block *sb,
+				 ext4_fsblk_t block, unsigned long count,
-extern void ext4_free_blocks_sb(handle_t *handle, struct super_block *sb,
-				ext4_fsblk_t block, unsigned long count,
 				unsigned long *pdquot_freed_blocks);
-extern void ext4_add_groupblocks(handle_t *handle, struct super_block *sb,
-				ext4_fsblk_t block, unsigned long count);
 extern ext4_fsblk_t ext4_count_free_blocks (struct super_block *);
 extern void ext4_check_blocks_bitmap (struct super_block *);
 extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb,
@@ -1071,13 +1038,12 @@
 extern void exit_ext4_mballoc(void);
 extern void ext4_mb_free_blocks(handle_t *, struct inode *,
 		unsigned long, unsigned long, int, unsigned long *);
+extern int ext4_mb_add_more_groupinfo(struct super_block *sb,
-extern int ext4_mb_add_groupinfo(struct super_block *sb,
 		ext4_group_t i, struct ext4_group_desc *desc);
 extern void ext4_mb_update_group_info(struct ext4_group_info *grp,
 		ext4_grpblk_t add);
+
+
-extern int ext4_mb_get_buddy_cache_lock(struct super_block *, ext4_group_t);
-extern void ext4_mb_put_buddy_cache_lock(struct super_block *,
-						ext4_group_t, int);
 /* inode.c */
 int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode,
 		struct buffer_head *bh, ext4_fsblk_t blocknr);
@@ -1105,14 +1071,13 @@
 extern void ext4_truncate (struct inode *);
 extern void ext4_set_inode_flags(struct inode *);
 extern void ext4_get_inode_flags(struct ext4_inode_info *);
-extern int ext4_alloc_da_blocks(struct inode *inode);
 extern void ext4_set_aops(struct inode *inode);
 extern int ext4_writepage_trans_blocks(struct inode *);
 extern int ext4_meta_trans_blocks(struct inode *, int nrblocks, int idxblocks);
 extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks);
 extern int ext4_block_truncate_page(handle_t *handle,
 		struct address_space *mapping, loff_t from);
+extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page);
-extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
 
 /* ioctl.c */
 extern long ext4_ioctl(struct file *, unsigned int, unsigned long);
@@ -1202,11 +1167,8 @@
 
 static inline loff_t ext4_isize(struct ext4_inode *raw_inode)
 {
+	return ((loff_t)le32_to_cpu(raw_inode->i_size_high) << 32) |
+		le32_to_cpu(raw_inode->i_size_lo);
-	if (S_ISREG(le16_to_cpu(raw_inode->i_mode)))
-		return ((loff_t)le32_to_cpu(raw_inode->i_size_high) << 32) |
-			le32_to_cpu(raw_inode->i_size_lo);
-	else
-		return (loff_t) le32_to_cpu(raw_inode->i_size_lo);
 }
 
 static inline void ext4_isize_set(struct ext4_inode *raw_inode, loff_t i_size)
@@ -1282,23 +1244,6 @@
 			sector_t block, unsigned long max_blocks,
 			struct buffer_head *bh, int create,
 			int extend_disksize, int flag);
-/*
- * Add new method to test wether block and inode bitmaps are properly
- * initialized. With uninit_bg reading the block from disk is not enough
- * to mark the bitmap uptodate. We need to also zero-out the bitmap
- */
-#define BH_BITMAP_UPTODATE BH_JBDPrivateStart
-
-static inline int bitmap_uptodate(struct buffer_head *bh)
-{
-	return (buffer_uptodate(bh) &&
-		test_bit(BH_BITMAP_UPTODATE, &(bh)->b_state));
-}
-static inline void set_bitmap_uptodate(struct buffer_head *bh)
-{
-	set_bit(BH_BITMAP_UPTODATE, &(bh)->b_state);
-}
-
 #endif	/* __KERNEL__ */
 
 #endif	/* _EXT4_H */
reverted:
--- b/fs/ext4/ext4_sb.h
+++ a/fs/ext4/ext4_sb.h
@@ -56,7 +56,6 @@
 	u32 s_next_generation;
 	u32 s_hash_seed[4];
 	int s_def_hash_version;
-	int s_hash_unsigned;	/* 3 if hash should be signed, 0 if not */
 	struct percpu_counter s_freeblocks_counter;
 	struct percpu_counter s_freeinodes_counter;
 	struct percpu_counter s_dirs_counter;
@@ -103,8 +102,7 @@
 	struct list_head s_committed_transaction;
 	spinlock_t s_md_lock;
 	tid_t s_last_transaction;
+	unsigned short *s_mb_offsets, *s_mb_maxs;
-	unsigned short *s_mb_offsets;
-	unsigned int *s_mb_maxs;
 
 	/* tunables */
 	unsigned long s_stripe;
reverted:
--- b/fs/ext4/extents.c
+++ a/fs/ext4/extents.c
@@ -1118,8 +1118,7 @@
 	struct ext4_extent_idx *ix;
 	struct ext4_extent *ex;
 	ext4_fsblk_t block;
+	int depth, ee_len;
-	int depth;	/* Note, NOT eh_depth; depth from top of tree */
-	int ee_len;
 
 	BUG_ON(path == NULL);
 	depth = path->p_depth;
@@ -1178,8 +1177,7 @@
 		if (bh == NULL)
 			return -EIO;
 		eh = ext_block_hdr(bh);
+		if (ext4_ext_check_header(inode, eh, depth)) {
-		/* subtract from p_depth to get proper eh_depth */
-		if (ext4_ext_check_header(inode, eh, path->p_depth - depth)) {
 			put_bh(bh);
 			return -EIO;
 		}
@@ -1633,13 +1631,11 @@
 {
 	struct ext4_ext_cache *cex;
 	BUG_ON(len == 0);
-	spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
 	cex = &EXT4_I(inode)->i_cached_extent;
 	cex->ec_type = type;
 	cex->ec_block = block;
 	cex->ec_len = len;
 	cex->ec_start = start;
-	spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
 }
 
 /*
@@ -1696,17 +1692,12 @@
 			struct ext4_extent *ex)
 {
 	struct ext4_ext_cache *cex;
-	int ret = EXT4_EXT_CACHE_NO;
 
-	/*
-	 * We borrow i_block_reservation_lock to protect i_cached_extent
-	 */
-	spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
 	cex = &EXT4_I(inode)->i_cached_extent;
 
 	/* has cache valid data? */
 	if (cex->ec_type == EXT4_EXT_CACHE_NO)
+		return EXT4_EXT_CACHE_NO;
-		goto errout;
 
 	BUG_ON(cex->ec_type != EXT4_EXT_CACHE_GAP &&
 			cex->ec_type != EXT4_EXT_CACHE_EXTENT);
@@ -1717,11 +1708,11 @@
 		ext_debug("%u cached by %u:%u:%llu\n",
 				block,
 				cex->ec_block, cex->ec_len, cex->ec_start);
+		return cex->ec_type;
-		ret = cex->ec_type;
 	}
+
+	/* not in cache */
+	return EXT4_EXT_CACHE_NO;
-errout:
-	spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
-	return ret;
 }
 
 /*
@@ -2677,8 +2668,6 @@
 				if (allocated > max_blocks)
 					allocated = max_blocks;
 				set_buffer_unwritten(bh_result);
-				bh_result->b_bdev = inode->i_sb->s_bdev;
-				bh_result->b_blocknr = newblock;
 				goto out2;
 			}
 
reverted:
--- b/fs/ext4/file.c
+++ a/fs/ext4/file.c
@@ -33,14 +33,9 @@
  */
 static int ext4_release_file (struct inode * inode, struct file * filp)
 {
-	if (EXT4_I(inode)->i_state & EXT4_STATE_DA_ALLOC_CLOSE) {
-		ext4_alloc_da_blocks(inode);
-		EXT4_I(inode)->i_state &= ~EXT4_STATE_DA_ALLOC_CLOSE;
-	}
 	/* if we are the last writer on the inode, drop the block reservation */
 	if ((filp->f_mode & FMODE_WRITE) &&
+			(atomic_read(&inode->i_writecount) == 1))
-			(atomic_read(&inode->i_writecount) == 1) &&
-		        !EXT4_I(inode)->i_reserved_data_blocks)
 	{
 		down_write(&EXT4_I(inode)->i_data_sem);
 		ext4_discard_reservation(inode);
reverted:
--- b/fs/ext4/hash.c
+++ a/fs/ext4/hash.c
@@ -35,71 +35,23 @@
 
 
 /* The old legacy hash */
+static __u32 dx_hack_hash (const char *name, int len)
-static __u32 dx_hack_hash_unsigned(const char *name, int len)
 {
+	__u32 hash0 = 0x12a3fe2d, hash1 = 0x37abe8f9;
-	__u32 hash, hash0 = 0x12a3fe2d, hash1 = 0x37abe8f9;
-	const unsigned char *ucp = (const unsigned char *) name;
-
-	while (len--) {
-		hash = hash1 + (hash0 ^ (((int) *ucp++) * 7152373));
-
-		if (hash & 0x80000000)
-			hash -= 0x7fffffff;
-		hash1 = hash0;
-		hash0 = hash;
-	}
-	return hash0 << 1;
-}
-
-static __u32 dx_hack_hash_signed(const char *name, int len)
-{
-	__u32 hash, hash0 = 0x12a3fe2d, hash1 = 0x37abe8f9;
-	const signed char *scp = (const signed char *) name;
-
 	while (len--) {
+		__u32 hash = hash1 + (hash0 ^ (*name++ * 7152373));
-		hash = hash1 + (hash0 ^ (((int) *scp++) * 7152373));
 
+		if (hash & 0x80000000) hash -= 0x7fffffff;
-		if (hash & 0x80000000)
-			hash -= 0x7fffffff;
 		hash1 = hash0;
 		hash0 = hash;
 	}
+	return (hash0 << 1);
-	return hash0 << 1;
 }
 
+static void str2hashbuf(const char *msg, int len, __u32 *buf, int num)
-static void str2hashbuf_signed(const char *msg, int len, __u32 *buf, int num)
 {
 	__u32	pad, val;
 	int	i;
-	const signed char *scp = (const signed char *) msg;
-
-	pad = (__u32)len | ((__u32)len << 8);
-	pad |= pad << 16;
-
-	val = pad;
-	if (len > num*4)
-		len = num * 4;
-	for (i = 0; i < len; i++) {
-		if ((i % 4) == 0)
-			val = pad;
-		val = ((int) scp[i]) + (val << 8);
-		if ((i % 4) == 3) {
-			*buf++ = val;
-			val = pad;
-			num--;
-		}
-	}
-	if (--num >= 0)
-		*buf++ = val;
-	while (--num >= 0)
-		*buf++ = pad;
-}
-
-static void str2hashbuf_unsigned(const char *msg, int len, __u32 *buf, int num)
-{
-	__u32	pad, val;
-	int	i;
-	const unsigned char *ucp = (const unsigned char *) msg;
 
 	pad = (__u32)len | ((__u32)len << 8);
 	pad |= pad << 16;
@@ -110,7 +62,7 @@
 	for (i=0; i < len; i++) {
 		if ((i % 4) == 0)
 			val = pad;
+		val = msg[i] + (val << 8);
-		val = ((int) ucp[i]) + (val << 8);
 		if ((i % 4) == 3) {
 			*buf++ = val;
 			val = pad;
@@ -143,8 +95,6 @@
 	const char	*p;
 	int		i;
 	__u32		in[8], buf[4];
-	void		(*str2hashbuf)(const char *, int, __u32 *, int) =
-				str2hashbuf_signed;
 
 	/* Initialize the default seed for the hash checksum functions */
 	buf[0] = 0x67452301;
@@ -163,18 +113,13 @@
 	}
 
 	switch (hinfo->hash_version) {
-	case DX_HASH_LEGACY_UNSIGNED:
-		hash = dx_hack_hash_unsigned(name, len);
-		break;
 	case DX_HASH_LEGACY:
+		hash = dx_hack_hash(name, len);
-		hash = dx_hack_hash_signed(name, len);
 		break;
-	case DX_HASH_HALF_MD4_UNSIGNED:
-		str2hashbuf = str2hashbuf_unsigned;
 	case DX_HASH_HALF_MD4:
 		p = name;
 		while (len > 0) {
+			str2hashbuf(p, len, in, 8);
-			(*str2hashbuf)(p, len, in, 8);
 			half_md4_transform(buf, in);
 			len -= 32;
 			p += 32;
@@ -182,12 +127,10 @@
 		minor_hash = buf[2];
 		hash = buf[1];
 		break;
-	case DX_HASH_TEA_UNSIGNED:
-		str2hashbuf = str2hashbuf_unsigned;
 	case DX_HASH_TEA:
 		p = name;
 		while (len > 0) {
+			str2hashbuf(p, len, in, 4);
-			(*str2hashbuf)(p, len, in, 4);
 			TEA_transform(buf, in);
 			len -= 16;
 			p += 16;
reverted:
--- b/fs/ext4/ialloc.c
+++ a/fs/ext4/ialloc.c
@@ -84,7 +84,7 @@
 	}
 
 	memset(bh->b_data, 0, (EXT4_INODES_PER_GROUP(sb) + 7) / 8);
+	mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), EXT4_BLOCKS_PER_GROUP(sb),
-	mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), sb->s_blocksize * 8,
 			bh->b_data);
 
 	return EXT4_INODES_PER_GROUP(sb);
@@ -115,40 +115,18 @@
 			    block_group, bitmap_blk);
 		return NULL;
 	}
+	if (bh_uptodate_or_lock(bh))
-	if (bitmap_uptodate(bh))
 		return bh;
 
-	lock_buffer(bh);
-	if (bitmap_uptodate(bh)) {
-		unlock_buffer(bh);
-		return bh;
-	}
 	spin_lock(sb_bgl_lock(EXT4_SB(sb), block_group));
 	if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) {
 		ext4_init_inode_bitmap(sb, bh, block_group, desc);
-		set_bitmap_uptodate(bh);
 		set_buffer_uptodate(bh);
 		unlock_buffer(bh);
 		spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group));
 		return bh;
 	}
 	spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group));
-	if (buffer_uptodate(bh)) {
-		/*
-		 * if not uninit if bh is uptodate,
-		 * bitmap is also uptodate
-		 */
-		set_bitmap_uptodate(bh);
-		unlock_buffer(bh);
-		return bh;
-	}
-	/*
-	 * submit the buffer_head for read. We can
-	 * safely mark the bitmap as uptodate now.
-	 * We do it here so the bitmap uptodate bit
-	 * get set with buffer lock held.
-	 */
-	set_bitmap_uptodate(bh);
 	if (bh_submit_read(bh) < 0) {
 		put_bh(bh);
 		ext4_error(sb, __func__,
@@ -188,7 +166,7 @@
 	struct ext4_group_desc * gdp;
 	struct ext4_super_block * es;
 	struct ext4_sb_info *sbi;
+	int fatal = 0, err;
-	int fatal = 0, err, cleared;
 	ext4_group_t flex_group;
 
 	if (atomic_read(&inode->i_count) > 1) {
@@ -242,12 +220,10 @@
 		goto error_return;
 
 	/* Ok, now we can actually update the inode bitmaps.. */
+	if (!ext4_clear_bit_atomic(sb_bgl_lock(sbi, block_group),
+					bit, bitmap_bh->b_data))
+		ext4_error (sb, "ext4_free_inode",
+			      "bit already cleared for inode %lu", ino);
-	spin_lock(sb_bgl_lock(sbi, block_group));
-	cleared = ext4_clear_bit(bit, bitmap_bh->b_data);
-	spin_unlock(sb_bgl_lock(sbi, block_group));
-	if (!cleared)
-		ext4_error(sb, "ext4_free_inode",
-			   "bit already cleared for inode %lu", ino);
 	else {
 		gdp = ext4_get_group_desc (sb, block_group, &bh2);
 
@@ -591,77 +567,6 @@
 }
 
 /*
- * claim the inode from the inode bitmap. If the group
- * is uninit we need to take the groups's sb_bgl_lock
- * and clear the uninit flag. The inode bitmap update
- * and group desc uninit flag clear should be done
- * after holding sb_bgl_lock so that ext4_read_inode_bitmap
- * doesn't race with the ext4_claim_inode
- */
-static int ext4_claim_inode(struct super_block *sb,
-			struct buffer_head *inode_bitmap_bh,
-			unsigned long ino, ext4_group_t group, int mode)
-{
-	int free = 0, retval = 0;
-	struct ext4_sb_info *sbi = EXT4_SB(sb);
-	struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group, NULL);
-
-	spin_lock(sb_bgl_lock(sbi, group));
-	if (ext4_set_bit(ino, inode_bitmap_bh->b_data)) {
-		/* not a free inode */
-		retval = 1;
-		goto err_ret;
-	}
-	ino++;
-	if ((group == 0 && ino < EXT4_FIRST_INO(sb)) ||
-			ino > EXT4_INODES_PER_GROUP(sb)) {
-		spin_unlock(sb_bgl_lock(sbi, group));
-		ext4_error(sb, __func__,
-			   "reserved inode or inode > inodes count - "
-			   "block_group = %lu, inode=%lu", group,
-			   ino + group * EXT4_INODES_PER_GROUP(sb));
-		return 1;
-	}
-	/* If we didn't allocate from within the initialized part of the inode
-	 * table then we need to initialize up to this inode. */
-	if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) {
-
-		if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) {
-			gdp->bg_flags &= cpu_to_le16(~EXT4_BG_INODE_UNINIT);
-			/* When marking the block group with
-			 * ~EXT4_BG_INODE_UNINIT we don't want to depend
-			 * on the value of bg_itable_unused even though
-			 * mke2fs could have initialized the same for us.
-			 * Instead we calculated the value below
-			 */
-
-			free = 0;
-		} else {
-			free = EXT4_INODES_PER_GROUP(sb) -
-				le16_to_cpu(gdp->bg_itable_unused);
-		}
-
-		/*
-		 * Check the relative inode number against the last used
-		 * relative inode number in this group. if it is greater
-		 * we need to  update the bg_itable_unused count
-		 *
-		 */
-		if (ino > free)
-			gdp->bg_itable_unused =
-				cpu_to_le16(EXT4_INODES_PER_GROUP(sb) - ino);
-	}
-	le16_add_cpu(&gdp->bg_free_inodes_count, -1);
-	if (S_ISDIR(mode)) {
-		le16_add_cpu(&gdp->bg_used_dirs_count, 1);
-	}
-	gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp);
-err_ret:
-	spin_unlock(sb_bgl_lock(sbi, group));
-	return retval;
-}
-
-/*
  * There are two policies for allocating an inode.  If the new inode is
  * a directory, then a forward search is made for a block group with both
  * free space and a low directory-to-inode ratio; if that fails, then of
@@ -687,7 +592,6 @@
 	struct inode *ret;
 	ext4_group_t i;
 	int free = 0;
-	static int once = 1;
 	ext4_group_t flex_group;
 
 	/* Cannot create files in a deleted directory */
@@ -705,15 +609,6 @@
 
 	if (sbi->s_log_groups_per_flex) {
 		ret2 = find_group_flex(sb, dir, &group);
-		if (ret2 == -1) {
-			ret2 = find_group_other(sb, dir, &group);
-			if (ret2 == 0 && once) {
-				once = 0;
-				printk(KERN_NOTICE "ext4: find_group_flex "
-				       "failed, fallback succeeded dir %lu\n",
-				       dir->i_ino);
-			}
-		}
 		goto got_group;
 	}
 
@@ -754,12 +649,8 @@
 			if (err)
 				goto fail;
 
+			if (!ext4_set_bit_atomic(sb_bgl_lock(sbi, group),
+						ino, bitmap_bh->b_data)) {
-			BUFFER_TRACE(bh2, "get_write_access");
-			err = ext4_journal_get_write_access(handle, bh2);
-			if (err)
-				goto fail;
-			if (!ext4_claim_inode(sb, bitmap_bh,
-						ino, group, mode)) {
 				/* we won it */
 				BUFFER_TRACE(bitmap_bh,
 					"call ext4_journal_dirty_metadata");
@@ -767,13 +658,10 @@
 								bitmap_bh);
 				if (err)
 					goto fail;
-				/* zero bit is inode number 1*/
-				ino++;
 				goto got;
 			}
 			/* we lost it */
 			jbd2_journal_release_buffer(handle, bitmap_bh);
-			jbd2_journal_release_buffer(handle, bh2);
 
 			if (++ino < EXT4_INODES_PER_GROUP(sb))
 				goto repeat_in_this_group;
@@ -793,6 +681,21 @@
 	goto out;
 
 got:
+	ino++;
+	if ((group == 0 && ino < EXT4_FIRST_INO(sb)) ||
+	    ino > EXT4_INODES_PER_GROUP(sb)) {
+		ext4_error(sb, __func__,
+			   "reserved inode or inode > inodes count - "
+			   "block_group = %lu, inode=%lu", group,
+			   ino + group * EXT4_INODES_PER_GROUP(sb));
+		err = -EIO;
+		goto fail;
+	}
+
+	BUFFER_TRACE(bh2, "get_write_access");
+	err = ext4_journal_get_write_access(handle, bh2);
+	if (err) goto fail;
+
 	/* We may have to initialize the block bitmap if it isn't already */
 	if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM) &&
 	    gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
@@ -827,10 +730,47 @@
 		if (err)
 			goto fail;
 	}
+
+	spin_lock(sb_bgl_lock(sbi, group));
+	/* If we didn't allocate from within the initialized part of the inode
+	 * table then we need to initialize up to this inode. */
+	if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) {
+		if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) {
+			gdp->bg_flags &= cpu_to_le16(~EXT4_BG_INODE_UNINIT);
+
+			/* When marking the block group with
+			 * ~EXT4_BG_INODE_UNINIT we don't want to depend
+			 * on the value of bg_itable_unused even though
+			 * mke2fs could have initialized the same for us.
+			 * Instead we calculated the value below
+			 */
+
+			free = 0;
+		} else {
+			free = EXT4_INODES_PER_GROUP(sb) -
+				le16_to_cpu(gdp->bg_itable_unused);
+		}
+
+		/*
+		 * Check the relative inode number against the last used
+		 * relative inode number in this group. if it is greater
+		 * we need to  update the bg_itable_unused count
+		 *
+		 */
+		if (ino > free)
+			gdp->bg_itable_unused =
+				cpu_to_le16(EXT4_INODES_PER_GROUP(sb) - ino);
+	}
+
+	le16_add_cpu(&gdp->bg_free_inodes_count, -1);
+	if (S_ISDIR(mode)) {
+		le16_add_cpu(&gdp->bg_used_dirs_count, 1);
+	}
+	gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp);
+	spin_unlock(sb_bgl_lock(sbi, group));
+	BUFFER_TRACE(bh2, "call ext4_journal_dirty_metadata");
-	BUFFER_TRACE(bh2, "call ext4_handle_dirty_metadata");
 	err = ext4_journal_dirty_metadata(handle, bh2);
+	if (err) goto fail;
-	if (err)
-		goto fail;
 
 	percpu_counter_dec(&sbi->s_freeinodes_counter);
 	if (S_ISDIR(mode))
@@ -866,12 +806,16 @@
 	ei->i_disksize = 0;
 
 	/*
+	 * Don't inherit extent flag from directory. We set extent flag on
+	 * newly created directory and file only if -o extent mount option is
+	 * specified
-	 * Don't inherit extent flag from directory, amongst others. We set
-	 * extent flag on newly created directory and file only if -o extent
-	 * mount option is specified
 	 */
+	ei->i_flags = EXT4_I(dir)->i_flags & ~(EXT4_INDEX_FL|EXT4_EXTENTS_FL);
+	if (S_ISLNK(mode))
+		ei->i_flags &= ~(EXT4_IMMUTABLE_FL|EXT4_APPEND_FL);
+	/* dirsync only applies to directories */
+	if (!S_ISDIR(mode))
+		ei->i_flags &= ~EXT4_DIRSYNC_FL;
-	ei->i_flags =
-		ext4_mask_flags(mode, EXT4_I(dir)->i_flags & EXT4_FL_INHERITED);
 	ei->i_file_acl = 0;
 	ei->i_dtime = 0;
 	ei->i_block_alloc_info = NULL;
reverted:
--- b/fs/ext4/inode.c
+++ a/fs/ext4/inode.c
@@ -46,10 +46,8 @@
 static inline int ext4_begin_ordered_truncate(struct inode *inode,
 					      loff_t new_size)
 {
+	return jbd2_journal_begin_ordered_truncate(&EXT4_I(inode)->jinode,
+						   new_size);
-	return jbd2_journal_begin_ordered_truncate(
-					EXT4_SB(inode->i_sb)->s_journal,
-					&EXT4_I(inode)->jinode,
-					new_size);
 }
 
 static void ext4_invalidatepage(struct page *page, unsigned long offset);
@@ -353,9 +351,9 @@
 		final = ptrs;
 	} else {
 		ext4_warning(inode->i_sb, "ext4_block_to_path",
+				"block %lu > max",
-				"block %lu > max in inode %lu",
 				i_block + direct_blocks +
+				indirect_blocks + double_blocks);
-				indirect_blocks + double_blocks, inode->i_ino);
 	}
 	if (boundary)
 		*boundary = final - 1 - (i_block & (ptrs - 1));
@@ -1046,14 +1044,6 @@
 	EXT4_I(inode)->i_reserved_meta_blocks = mdb;
 	EXT4_I(inode)->i_allocated_meta_blocks = 0;
 	spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
-
-	/*
-	 * If we have done all the pending block allocations and if
-	 * there aren't any writers on the inode, we can discard the
-	 * inode's preallocations.
-	 */
-	if (!total && (atomic_read(&inode->i_writecount) == 0))
-		ext4_discard_reservation(inode);
 }
 
 /*
@@ -1085,7 +1075,6 @@
 	int retval;
 
 	clear_buffer_mapped(bh);
-	clear_buffer_unwritten(bh);
 
 	/*
 	 * Try to see if we can get  the block without requesting
@@ -1116,18 +1105,6 @@
 		return retval;
 
 	/*
-	 * When we call get_blocks without the create flag, the
-	 * BH_Unwritten flag could have gotten set if the blocks
-	 * requested were part of a uninitialized extent.  We need to
-	 * clear this flag now that we are committed to convert all or
-	 * part of the uninitialized extent to be an initialized
-	 * extent.  This is because we need to avoid the combination
-	 * of BH_Unwritten and BH_Mapped flags being simultaneously
-	 * set on the buffer_head.
-	 */
-	clear_buffer_unwritten(bh);
-
-	/*
 	 * New blocks allocate and/or writing to uninitialized extent
 	 * will possibly result in updating i_data, so we take
 	 * the write lock of i_data_sem, and call get_blocks()
@@ -1393,10 +1370,6 @@
   		goto out;
 	}
 
-	/* We cannot recurse into the filesystem as the transaction is already
-	 * started */
-	flags |= AOP_FLAG_NOFS;
-
 	page = grab_cache_page_write_begin(mapping, index, flags);
 	if (!page) {
 		ext4_journal_stop(handle);
@@ -1406,7 +1379,7 @@
 	*pagep = page;
 
 	ret = block_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
+							ext4_get_block);
-				ext4_get_block);
 
 	if (!ret && ext4_should_journal_data(inode)) {
 		ret = walk_page_buffers(handle, page_buffers(page),
@@ -1675,25 +1648,18 @@
  */
 static int mpage_da_submit_io(struct mpage_da_data *mpd)
 {
+	struct address_space *mapping = mpd->inode->i_mapping;
+	int ret = 0, err, nr_pages, i;
+	unsigned long index, end;
-	long pages_skipped;
 	struct pagevec pvec;
-	unsigned long index, end;
-	int ret = 0, err, nr_pages, i;
-	struct inode *inode = mpd->inode;
-	struct address_space *mapping = inode->i_mapping;
 
 	BUG_ON(mpd->next_page <= mpd->first_page);
+	pagevec_init(&pvec, 0);
-	/*
-	 * We need to start from the first_page to the next_page - 1
-	 * to make sure we also write the mapped dirty buffer_heads.
-	 * If we look at mpd->lbh.b_blocknr we would only be looking
-	 * at the currently mapped buffer_heads.
-	 */
 	index = mpd->first_page;
 	end = mpd->next_page - 1;
 
-	pagevec_init(&pvec, 0);
 	while (index <= end) {
+		/* XXX: optimize tail */
 		nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE);
 		if (nr_pages == 0)
 			break;
@@ -1705,10 +1671,6 @@
 				break;
 			index++;
 
-			BUG_ON(!PageLocked(page));
-			BUG_ON(PageWriteback(page));
-
-			pages_skipped = mpd->wbc->pages_skipped;
 			err = mapping->a_ops->writepage(page, mpd->wbc);
 			if (!err)
 				mpd->pages_written++;
@@ -2029,29 +1991,11 @@
 		bh = head;
 		do {
 			BUG_ON(buffer_locked(bh));
-			/*
-			 * We need to try to allocate
-			 * unmapped blocks in the same page.
-			 * Otherwise we won't make progress
-			 * with the page in ext4_da_writepage
-			 */
 			if (buffer_dirty(bh) &&
 				(!buffer_mapped(bh) || buffer_delay(bh))) {
 				mpage_add_bh_to_extent(mpd, logical, bh);
 				if (mpd->io_done)
 					return MPAGE_DA_EXTENT_TAIL;
-			} else if (buffer_dirty(bh) && (buffer_mapped(bh))) {
-				/*
-				 * mapped dirty buffer. We need to update
-				 * the b_state because we look at
-				 * b_state in mpage_da_map_blocks. We don't
-				 * update b_size because if we find an
-				 * unmapped buffer_head later we need to
-				 * use the b_state flag of that buffer_head.
-				 */
-				if (mpd->lbh.b_size == 0)
-					mpd->lbh.b_state =
-						bh->b_state & BH_FLAGS;
 			}
 			logical++;
 		} while ((bh = bh->b_this_page) != head);
@@ -2118,10 +2062,6 @@
 				  struct buffer_head *bh_result, int create)
 {
 	int ret = 0;
-	sector_t invalid_block = ~((sector_t) 0xffff);
-
-	if (invalid_block < ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es))
-		invalid_block = ~0;
 
 	BUG_ON(create == 0);
 	BUG_ON(bh_result->b_size != inode->i_sb->s_blocksize);
@@ -2143,18 +2083,11 @@
 			/* not enough space to reserve */
 			return ret;
 
+		map_bh(bh_result, inode->i_sb, 0);
-		map_bh(bh_result, inode->i_sb, invalid_block);
 		set_buffer_new(bh_result);
 		set_buffer_delay(bh_result);
 	} else if (ret > 0) {
 		bh_result->b_size = (ret << inode->i_blkbits);
-		/*
-		 * With sub-block writes into unwritten extents
-		 * we also need to mark the buffer as new so that
-		 * the unwritten parts of the buffer gets correctly zeroed.
-		 */
-		if (buffer_unwritten(bh_result))
-			set_buffer_new(bh_result);
 		ret = 0;
 	}
 
@@ -2365,20 +2298,6 @@
 	 */
 	if (!mapping->nrpages || !mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
 		return 0;
-
-	/*
-	 * If the filesystem has aborted, it is read-only, so return
-	 * right away instead of dumping stack traces later on that
-	 * will obscure the real source of the problem.  We test
-	 * EXT4_MOUNT_ABORT instead of sb->s_flag's MS_RDONLY because
-	 * the latter could be true if the filesystem is mounted
-	 * read-only, and in that case, ext4_da_writepages should
-	 * *never* be called, so if that ever happens, we would want
-	 * the stack trace.
-	 */
-	if (unlikely(sbi->s_mount_opt & EXT4_MOUNT_ABORT))
-		return -EROFS;
-
 	/*
 	 * Make sure nr_to_write is >= sbi->s_mb_stream_request
 	 * This make sure small files blocks are allocated in
@@ -2417,7 +2336,7 @@
 		handle = ext4_journal_start(inode, needed_blocks);
 		if (IS_ERR(handle)) {
 			ret = PTR_ERR(handle);
+			printk(KERN_EMERG "%s: jbd2_start: "
-			printk(KERN_CRIT "%s: jbd2_start: "
 			       "%ld pages, ino %lu; err %d\n", __func__,
 				wbc->nr_to_write, inode->i_ino, ret);
 			dump_stack();
@@ -2501,9 +2420,6 @@
 		ret = PTR_ERR(handle);
 		goto out;
 	}
-	/* We cannot recurse into the filesystem as the transaction is already
-	 * started */
-	flags |= AOP_FLAG_NOFS;
 
 	page = grab_cache_page_write_begin(mapping, index, flags);
 	if (!page) {
@@ -2617,48 +2533,6 @@
 	return;
 }
 
-/*
- * Force all delayed allocation blocks to be allocated for a given inode.
- */
-int ext4_alloc_da_blocks(struct inode *inode)
-{
-	if (!EXT4_I(inode)->i_reserved_data_blocks &&
-	    !EXT4_I(inode)->i_reserved_meta_blocks)
-		return 0;
-
-	/*
-	 * We do something simple for now.  The filemap_flush() will
-	 * also start triggering a write of the data blocks, which is
-	 * not strictly speaking necessary (and for users of
-	 * laptop_mode, not even desirable).  However, to do otherwise
-	 * would require replicating code paths in:
-	 *
-	 * ext4_da_writepages() ->
-	 *    write_cache_pages() ---> (via passed in callback function)
-	 *        __mpage_da_writepage() -->
-	 *           mpage_add_bh_to_extent()
-	 *           mpage_da_map_blocks()
-	 *
-	 * The problem is that write_cache_pages(), located in
-	 * mm/page-writeback.c, marks pages clean in preparation for
-	 * doing I/O, which is not desirable if we're not planning on
-	 * doing I/O at all.
-	 *
-	 * We could call write_cache_pages(), and then redirty all of
-	 * the pages by calling redirty_page_for_writeback() but that
-	 * would be ugly in the extreme.  So instead we would need to
-	 * replicate parts of the code in the above functions,
-	 * simplifying them becuase we wouldn't actually intend to
-	 * write out the pages, but rather only collect contiguous
-	 * logical block extents, call the multi-block allocator, and
-	 * then update the buffer heads with the block allocations.
-	 *
-	 * For now, though, we'll cheat by calling filemap_flush(),
-	 * which will map the blocks, and start the I/O, but not
-	 * actually wait for the I/O to complete.
-	 */
-	return filemap_flush(inode->i_mapping);
-}
 
 /*
  * bmap() is special.  It gets used by applications such as lilo and by
@@ -3668,9 +3542,6 @@
 	if (!ext4_can_truncate(inode))
 		return;
 
-	if (inode->i_size == 0)
-		ei->i_state |= EXT4_STATE_DA_ALLOC_CLOSE;
-
 	if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) {
 		ext4_ext_truncate(inode);
 		return;
@@ -4088,9 +3959,11 @@
 	ei->i_flags = le32_to_cpu(raw_inode->i_flags);
 	inode->i_blocks = ext4_inode_blocks(raw_inode, ei);
 	ei->i_file_acl = le32_to_cpu(raw_inode->i_file_acl_lo);
+	if (EXT4_SB(inode->i_sb)->s_es->s_creator_os !=
+	    cpu_to_le32(EXT4_OS_HURD)) {
-	if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_64BIT))
 		ei->i_file_acl |=
 			((__u64)le16_to_cpu(raw_inode->i_file_acl_high)) << 32;
+	}
 	inode->i_size = ext4_isize(raw_inode);
 	ei->i_disksize = inode->i_size;
 	inode->i_generation = le32_to_cpu(raw_inode->i_generation);
@@ -4137,18 +4010,6 @@
 			(__u64)(le32_to_cpu(raw_inode->i_version_hi)) << 32;
 	}
 
-	if (ei->i_file_acl &&
-	    ((ei->i_file_acl <
-	      (le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block) +
-	       EXT4_SB(sb)->s_gdb_count)) ||
-	     (ei->i_file_acl >= ext4_blocks_count(EXT4_SB(sb)->s_es)))) {
-		ext4_error(sb, __func__,
-			   "bad extended attribute block %llu in inode #%lu",
-			   ei->i_file_acl, inode->i_ino);
-		ret = -EIO;
-		goto bad_inode;
-	}
-
 	if (S_ISREG(inode->i_mode)) {
 		inode->i_op = &ext4_file_inode_operations;
 		inode->i_fop = &ext4_file_operations;
@@ -4163,8 +4024,7 @@
 			inode->i_op = &ext4_symlink_inode_operations;
 			ext4_set_aops(inode);
 		}
+	} else {
-	} else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) ||
-	      S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
 		inode->i_op = &ext4_special_inode_operations;
 		if (raw_inode->i_block[0])
 			init_special_inode(inode, inode->i_mode,
@@ -4172,13 +4032,6 @@
 		else
 			init_special_inode(inode, inode->i_mode,
 			   new_decode_dev(le32_to_cpu(raw_inode->i_block[1])));
-	} else {
-		brelse(bh);
-		ret = -EIO;
-		ext4_error(inode->i_sb, __func__,
-			   "bogus i_mode (%o) for inode=%lu",
-			   inode->i_mode, inode->i_ino);
-		goto bad_inode;
 	}
 	brelse (iloc.bh);
 	ext4_set_inode_flags(inode);
@@ -4956,9 +4809,8 @@
 	return !buffer_mapped(bh);
 }
 
+int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page)
-int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
-	struct page *page = vmf->page;
 	loff_t size;
 	unsigned long len;
 	int ret = -EINVAL;
@@ -5009,8 +4861,6 @@
 		goto out_unlock;
 	ret = 0;
 out_unlock:
-	if (ret)
-		ret = VM_FAULT_SIGBUS;
 	up_read(&inode->i_alloc_sem);
 	return ret;
 }
reverted:
--- b/fs/ext4/ioctl.c
+++ a/fs/ext4/ioctl.c
@@ -49,7 +49,8 @@
 		if (err)
 			return err;
 
+		if (!S_ISDIR(inode->i_mode))
+			flags &= ~EXT4_DIRSYNC_FL;
-		flags = ext4_mask_flags(inode->i_mode, flags);
 
 		err = -EPERM;
 		mutex_lock(&inode->i_mutex);
@@ -287,20 +288,6 @@
 		return err;
 	}
 
-	case EXT4_IOC_ALLOC_DA_BLKS:
-	{
-		int err;
-		if (!is_owner_or_cap(inode))
-			return -EACCES;
-
-		err = mnt_want_write(filp->f_path.mnt);
-		if (err)
-			return err;
-		err = ext4_alloc_da_blocks(inode);
-		mnt_drop_write(filp->f_path.mnt);
-		return err;
-	}
-
 	default:
 		return -ENOTTY;
 	}
reverted:
--- b/fs/ext4/mballoc.c
+++ a/fs/ext4/mballoc.c
@@ -100,7 +100,7 @@
  * inode as:
  *
  *  {                        page                        }
+ *  [ group 0 buddy][ group 0 bitmap] [group 1][ group 1]...
- *  [ group 0 bitmap][ group 0 buddy] [group 1][ group 1]...
  *
  *
  * one block each for bitmap and buddy information.  So for each group we
@@ -330,18 +330,6 @@
  *        object
  *
  */
-static struct kmem_cache *ext4_pspace_cachep;
-static struct kmem_cache *ext4_ac_cachep;
-static struct kmem_cache *ext4_free_ext_cachep;
-static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
-					ext4_group_t group);
-static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
-						ext4_group_t group);
-static int ext4_mb_init_per_dev_proc(struct super_block *sb);
-static int ext4_mb_destroy_per_dev_proc(struct super_block *sb);
-static void ext4_mb_free_committed_blocks(struct super_block *);
-static void ext4_mb_poll_new_transaction(struct super_block *sb,
-					 handle_t *handle);
 
 static inline void *mb_correct_addr_and_bit(int *bit, void *addr)
 {
@@ -730,7 +718,7 @@
  * stored in the inode as
  *
  * {                        page                        }
+ * [ group 0 buddy][ group 0 bitmap] [group 1][ group 1]...
- * [ group 0 bitmap][ group 0 buddy] [group 1][ group 1]...
  *
  *
  * one block each for bitmap and buddy information.
@@ -796,42 +784,20 @@
 		if (bh[i] == NULL)
 			goto out;
 
+		if (bh_uptodate_or_lock(bh[i]))
-		if (bitmap_uptodate(bh[i]))
 			continue;
 
- 		lock_buffer(bh[i]);
-		if (bitmap_uptodate(bh[i])) {
-			unlock_buffer(bh[i]);
-			continue;
-		}
 		spin_lock(sb_bgl_lock(EXT4_SB(sb), first_group + i));
 		if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
 			ext4_init_block_bitmap(sb, bh[i],
 						first_group + i, desc);
-			set_bitmap_uptodate(bh[i]);
 			set_buffer_uptodate(bh[i]);
 			unlock_buffer(bh[i]);
 			spin_unlock(sb_bgl_lock(EXT4_SB(sb), first_group + i));
 			continue;
 		}
 		spin_unlock(sb_bgl_lock(EXT4_SB(sb), first_group + i));
-		if (buffer_uptodate(bh[i])) {
-			/*
-			 * if not uninit if bh is uptodate,
-			 * bitmap is also uptodate
-			 */
-			set_bitmap_uptodate(bh[i]);
-			unlock_buffer(bh[i]);
-			continue;
-		}
 		get_bh(bh[i]);
-		/*
-		 * submit the buffer_head for read. We can
-		 * safely mark the bitmap as uptodate now.
-		 * We do it here so the bitmap uptodate bit
-		 * get set with buffer lock held.
-		 */
-		set_bitmap_uptodate(bh[i]);
 		bh[i]->b_end_io = end_buffer_read_sync;
 		submit_bh(READ, bh[i]);
 		mb_debug("read bitmap for group %lu\n", first_group + i);
@@ -848,8 +814,6 @@
 
 	err = 0;
 	first_block = page->index * blocks_per_page;
-	/* init the page  */
-	memset(page_address(page), 0xff, PAGE_CACHE_SIZE);
 	for (i = 0; i < blocks_per_page; i++) {
 		int group;
 		struct ext4_group_info *grinfo;
@@ -876,6 +840,7 @@
 			BUG_ON(incore == NULL);
 			mb_debug("put buddy for group %u in page %lu/%x\n",
 				group, page->index, i * blocksize);
+			memset(data, 0xff, blocksize);
 			grinfo = ext4_get_group_info(sb, group);
 			grinfo->bb_fragments = 0;
 			memset(grinfo->bb_counters, 0,
@@ -883,9 +848,7 @@
 			/*
 			 * incore got set to the group block bitmap below
 			 */
-			ext4_lock_group(sb, group);
 			ext4_mb_generate_buddy(sb, data, incore, group);
-			ext4_unlock_group(sb, group);
 			incore = NULL;
 		} else {
 			/* this is block of bitmap */
@@ -899,7 +862,6 @@
 
 			/* mark all preallocated blks used in in-core bitmap */
 			ext4_mb_generate_from_pa(sb, data, group);
-			ext4_mb_generate_from_freelist(sb, data, group);
 			ext4_unlock_group(sb, group);
 
 			/* set incore so that the buddy information can be
@@ -924,20 +886,18 @@
 ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
 					struct ext4_buddy *e4b)
 {
+	struct ext4_sb_info *sbi = EXT4_SB(sb);
+	struct inode *inode = sbi->s_buddy_cache;
 	int blocks_per_page;
 	int block;
 	int pnum;
 	int poff;
 	struct page *page;
 	int ret;
- 	struct ext4_group_info *grp;
- 	struct ext4_sb_info *sbi = EXT4_SB(sb);
- 	struct inode *inode = sbi->s_buddy_cache;
 
 	mb_debug("load group %lu\n", group);
 
 	blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
-	grp = ext4_get_group_info(sb, group);
 
 	e4b->bd_blkbits = sb->s_blocksize_bits;
 	e4b->bd_info = ext4_get_group_info(sb, group);
@@ -945,15 +905,6 @@
 	e4b->bd_group = group;
 	e4b->bd_buddy_page = NULL;
 	e4b->bd_bitmap_page = NULL;
-	e4b->alloc_semp = &grp->alloc_sem;
-
-	/* Take the read lock on the group alloc
-	 * sem. This would make sure a parallel
-	 * ext4_mb_init_group happening on other
-	 * groups mapped by the page is blocked
-	 * till we are done with allocation
-	 */
-	down_read(e4b->alloc_semp);
 
 	/*
 	 * the buddy cache inode stores the block bitmap
@@ -969,14 +920,6 @@
 	page = find_get_page(inode->i_mapping, pnum);
 	if (page == NULL || !PageUptodate(page)) {
 		if (page)
-			/*
-			 * drop the page reference and try
-			 * to get the page with lock. If we
-			 * are not uptodate that implies
-			 * somebody just created the page but
-			 * is yet to initialize the same. So
-			 * wait for it to initialize.
-			 */
 			page_cache_release(page);
 		page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
 		if (page) {
@@ -1042,9 +985,6 @@
 		page_cache_release(e4b->bd_buddy_page);
 	e4b->bd_buddy = NULL;
 	e4b->bd_bitmap = NULL;
-
-	/* Done with the buddy cache */
-	up_read(e4b->alloc_semp);
 	return ret;
 }
 
@@ -1054,9 +994,6 @@
 		page_cache_release(e4b->bd_bitmap_page);
 	if (e4b->bd_buddy_page)
 		page_cache_release(e4b->bd_buddy_page);
-	/* Done with the buddy cache */
-	if (e4b->alloc_semp)
-		up_read(e4b->alloc_semp);
 }
 
 
@@ -1094,10 +1031,7 @@
 			cur += 32;
 			continue;
 		}
+		mb_clear_bit_atomic(lock, cur, bm);
-		if (lock)
-			mb_clear_bit_atomic(lock, cur, bm);
-		else
-			mb_clear_bit(cur, bm);
 		cur++;
 	}
 }
@@ -1115,10 +1049,7 @@
 			cur += 32;
 			continue;
 		}
+		mb_set_bit_atomic(lock, cur, bm);
-		if (lock)
-			mb_set_bit_atomic(lock, cur, bm);
-		else
-			mb_set_bit(cur, bm);
 		cur++;
 	}
 }
@@ -1365,20 +1296,13 @@
 	ac->ac_tail = ret & 0xffff;
 	ac->ac_buddy = ret >> 16;
 
+	/* XXXXXXX: SUCH A HORRIBLE **CK */
+	/*FIXME!! Why ? */
-	/*
-	 * take the page reference. We want the page to be pinned
-	 * so that we don't get a ext4_mb_init_cache_call for this
-	 * group until we update the bitmap. That would mean we
-	 * double allocate blocks. The reference is dropped
-	 * in ext4_mb_release_context
-	 */
 	ac->ac_bitmap_page = e4b->bd_bitmap_page;
 	get_page(ac->ac_bitmap_page);
 	ac->ac_buddy_page = e4b->bd_buddy_page;
 	get_page(ac->ac_buddy_page);
+
-	/* on allocation we use ac to track the held semaphore */
-	ac->alloc_semp =  e4b->alloc_semp;
-	e4b->alloc_semp = NULL;
 	/* store last allocated for subsequent stream allocation */
 	if ((ac->ac_flags & EXT4_MB_HINT_DATA)) {
 		spin_lock(&sbi->s_md_lock);
@@ -1402,8 +1326,6 @@
 	struct ext4_free_extent ex;
 	int max;
 
-	if (ac->ac_status == AC_STATUS_FOUND)
-		return;
 	/*
 	 * We don't want to scan for a whole year
 	 */
@@ -1450,7 +1372,7 @@
 	struct ext4_free_extent *gex = &ac->ac_g_ex;
 
 	BUG_ON(ex->fe_len <= 0);
+	BUG_ON(ex->fe_len >= EXT4_BLOCKS_PER_GROUP(ac->ac_sb));
-	BUG_ON(ex->fe_len > EXT4_BLOCKS_PER_GROUP(ac->ac_sb));
 	BUG_ON(ex->fe_start >= EXT4_BLOCKS_PER_GROUP(ac->ac_sb));
 	BUG_ON(ac->ac_status != AC_STATUS_CONTINUE);
 
@@ -1770,173 +1692,6 @@
 	return 0;
 }
 
-/*
- * lock the group_info alloc_sem of all the groups
- * belonging to the same buddy cache page. This
- * make sure other parallel operation on the buddy
- * cache doesn't happen  whild holding the buddy cache
- * lock
- */
-int ext4_mb_get_buddy_cache_lock(struct super_block *sb, ext4_group_t group)
-{
-	int i;
-	int block, pnum;
-	int blocks_per_page;
-	int groups_per_page;
-	ext4_group_t first_group;
-	struct ext4_group_info *grp;
-
-	blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
-	/*
-	 * the buddy cache inode stores the block bitmap
-	 * and buddy information in consecutive blocks.
-	 * So for each group we need two blocks.
-	 */
-	block = group * 2;
-	pnum = block / blocks_per_page;
-	first_group = pnum * blocks_per_page / 2;
-
-	groups_per_page = blocks_per_page >> 1;
-	if (groups_per_page == 0)
-		groups_per_page = 1;
-	/* read all groups the page covers into the cache */
-	for (i = 0; i < groups_per_page; i++) {
-
-		if ((first_group + i) >= EXT4_SB(sb)->s_groups_count)
-			break;
-		grp = ext4_get_group_info(sb, first_group + i);
-		/* take all groups write allocation
-		 * semaphore. This make sure there is
-		 * no block allocation going on in any
-		 * of that groups
-		 */
-		down_write(&grp->alloc_sem);
-	}
-	return i;
-}
-
-void ext4_mb_put_buddy_cache_lock(struct super_block *sb,
-					ext4_group_t group, int locked_group)
-{
-	int i;
-	int block, pnum;
-	int blocks_per_page;
-	ext4_group_t first_group;
-	struct ext4_group_info *grp;
-
-	blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
-	/*
-	 * the buddy cache inode stores the block bitmap
-	 * and buddy information in consecutive blocks.
-	 * So for each group we need two blocks.
-	 */
-	block = group * 2;
-	pnum = block / blocks_per_page;
-	first_group = pnum * blocks_per_page / 2;
-	/* release locks on all the groups */
-	for (i = 0; i < locked_group; i++) {
-
-		grp = ext4_get_group_info(sb, first_group + i);
-		/* take all groups write allocation
-		 * semaphore. This make sure there is
-		 * no block allocation going on in any
-		 * of that groups
-		 */
-		up_write(&grp->alloc_sem);
-	}
-
-}
-
-static int ext4_mb_init_group(struct super_block *sb, ext4_group_t group)
-{
-
-	int ret;
-	void *bitmap;
-	int blocks_per_page;
-	int block, pnum, poff;
-	int num_grp_locked = 0;
-	struct ext4_group_info *this_grp;
-	struct ext4_sb_info *sbi = EXT4_SB(sb);
-	struct inode *inode = sbi->s_buddy_cache;
-	struct page *page = NULL, *bitmap_page = NULL;
-
-	mb_debug("init group %lu\n", group);
-	blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
-	this_grp = ext4_get_group_info(sb, group);
-	/*
-	 * This ensures we don't add group
-	 * to this buddy cache via resize
-	 */
-	num_grp_locked =  ext4_mb_get_buddy_cache_lock(sb, group);
-	if (!EXT4_MB_GRP_NEED_INIT(this_grp)) {
-		/*
-		 * somebody initialized the group
-		 * return without doing anything
-		 */
-		ret = 0;
-		goto err;
-	}
-	/*
-	 * the buddy cache inode stores the block bitmap
-	 * and buddy information in consecutive blocks.
-	 * So for each group we need two blocks.
-	 */
-	block = group * 2;
-	pnum = block / blocks_per_page;
-	poff = block % blocks_per_page;
-	page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
-	if (page) {
-		BUG_ON(page->mapping != inode->i_mapping);
-		ret = ext4_mb_init_cache(page, NULL);
-		if (ret) {
-			unlock_page(page);
-			goto err;
-		}
-		unlock_page(page);
-	}
-	if (page == NULL || !PageUptodate(page)) {
-		ret = -EIO;
-		goto err;
-	}
-	mark_page_accessed(page);
-	bitmap_page = page;
-	bitmap = page_address(page) + (poff * sb->s_blocksize);
-
-	/* init buddy cache */
-	block++;
-	pnum = block / blocks_per_page;
-	poff = block % blocks_per_page;
-	page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
-	if (page == bitmap_page) {
-		/*
-		 * If both the bitmap and buddy are in
-		 * the same page we don't need to force
-		 * init the buddy
-		 */
-		unlock_page(page);
-	} else if (page) {
-		BUG_ON(page->mapping != inode->i_mapping);
-		ret = ext4_mb_init_cache(page, bitmap);
-		if (ret) {
-			unlock_page(page);
-			goto err;
-		}
-		unlock_page(page);
-	}
-	if (page == NULL || !PageUptodate(page)) {
-		ret = -EIO;
-		goto err;
-	}
-	mark_page_accessed(page);
-err:
-	ext4_mb_put_buddy_cache_lock(sb, group, num_grp_locked);
-	if (bitmap_page)
-		page_cache_release(bitmap_page);
-	if (page)
-		page_cache_release(page);
-	return ret;
-}
-
 static noinline_for_stack int
 ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
 {
@@ -2020,7 +1775,7 @@
 				group = 0;
 
 			/* quick check to skip empty groups */
+			grp = ext4_get_group_info(ac->ac_sb, group);
-			grp = ext4_get_group_info(sb, group);
 			if (grp->bb_free == 0)
 				continue;
 
@@ -2033,9 +1788,10 @@
 				 * we need full data about the group
 				 * to make a good selection
 				 */
+				err = ext4_mb_load_buddy(sb, group, &e4b);
-				err = ext4_mb_init_group(sb, group);
 				if (err)
 					goto out;
+				ext4_mb_release_desc(&e4b);
 			}
 
 			/*
@@ -2543,8 +2299,6 @@
 	}
 
 	INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list);
-	init_rwsem(&meta_group_info[i]->alloc_sem);
-	meta_group_info[i]->bb_free_root.rb_node = NULL;;
 
 #ifdef DOUBLE_CHECK
 	{
@@ -2571,6 +2325,54 @@
 } /* ext4_mb_add_groupinfo */
 
 /*
+ * Add a group to the existing groups.
+ * This function is used for online resize
+ */
+int ext4_mb_add_more_groupinfo(struct super_block *sb, ext4_group_t group,
+			       struct ext4_group_desc *desc)
+{
+	struct ext4_sb_info *sbi = EXT4_SB(sb);
+	struct inode *inode = sbi->s_buddy_cache;
+	int blocks_per_page;
+	int block;
+	int pnum;
+	struct page *page;
+	int err;
+
+	/* Add group based on group descriptor*/
+	err = ext4_mb_add_groupinfo(sb, group, desc);
+	if (err)
+		return err;
+
+	/*
+	 * Cache pages containing dynamic mb_alloc datas (buddy and bitmap
+	 * datas) are set not up to date so that they will be re-initilaized
+	 * during the next call to ext4_mb_load_buddy
+	 */
+
+	/* Set buddy page as not up to date */
+	blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
+	block = group * 2;
+	pnum = block / blocks_per_page;
+	page = find_get_page(inode->i_mapping, pnum);
+	if (page != NULL) {
+		ClearPageUptodate(page);
+		page_cache_release(page);
+	}
+
+	/* Set bitmap page as not up to date */
+	block++;
+	pnum = block / blocks_per_page;
+	page = find_get_page(inode->i_mapping, pnum);
+	if (page != NULL) {
+		ClearPageUptodate(page);
+		page_cache_release(page);
+	}
+
+	return 0;
+}
+
+/*
  * Update an existing group.
  * This function is used for online resize
  */
@@ -2693,12 +2495,10 @@
 		clear_opt(sbi->s_mount_opt, MBALLOC);
 		return -ENOMEM;
 	}
-
-	i = (sb->s_blocksize_bits + 2) * sizeof(unsigned int);
 	sbi->s_mb_maxs = kmalloc(i, GFP_KERNEL);
 	if (sbi->s_mb_maxs == NULL) {
 		clear_opt(sbi->s_mount_opt, MBALLOC);
+		kfree(sbi->s_mb_maxs);
-		kfree(sbi->s_mb_offsets);
 		return -ENOMEM;
 	}
 
@@ -2858,11 +2658,13 @@
 static noinline_for_stack void
 ext4_mb_free_committed_blocks(struct super_block *sb)
 {
+	struct ext4_sb_info *sbi = EXT4_SB(sb);
+	int err;
+	int i;
+	int count = 0;
+	int count2 = 0;
+	struct ext4_free_metadata *md;
 	struct ext4_buddy e4b;
-	struct ext4_group_info *db;
-	struct ext4_sb_info *sbi = EXT4_SB(sb);
-	int err, count = 0, count2 = 0;
-	struct ext4_free_data *entry;
 
 	if (list_empty(&sbi->s_committed_transaction))
 		return;
@@ -2870,46 +2672,44 @@
 	/* there is committed blocks to be freed yet */
 	do {
 		/* get next array of blocks */
+		md = NULL;
-		entry = NULL;
 		spin_lock(&sbi->s_md_lock);
 		if (!list_empty(&sbi->s_committed_transaction)) {
+			md = list_entry(sbi->s_committed_transaction.next,
+					struct ext4_free_metadata, list);
+			list_del(&md->list);
-			entry = list_entry(sbi->s_committed_transaction.next,
-					struct ext4_free_data, list);
-			list_del(&entry->list);
 		}
 		spin_unlock(&sbi->s_md_lock);
 
+		if (md == NULL)
-		if (entry == NULL)
 			break;
 
 		mb_debug("gonna free %u blocks in group %lu (0x%p):",
+				md->num, md->group, md);
-				entry->count, entry->group, entry);
 
+		err = ext4_mb_load_buddy(sb, md->group, &e4b);
-		err = ext4_mb_load_buddy(sb, entry->group, &e4b);
 		/* we expect to find existing buddy because it's pinned */
 		BUG_ON(err != 0);
 
-		db = e4b.bd_info;
 		/* there are blocks to put in buddy to make them really free */
+		count += md->num;
-		count += entry->count;
 		count2++;
+		ext4_lock_group(sb, md->group);
+		for (i = 0; i < md->num; i++) {
+			mb_debug(" %u", md->blocks[i]);
+			mb_free_blocks(NULL, &e4b, md->blocks[i], 1);
-		ext4_lock_group(sb, entry->group);
-		/* Take it out of per group rb tree */
-		rb_erase(&entry->node, &(db->bb_free_root));
-		mb_free_blocks(NULL, &e4b, entry->start_blk, entry->count);
-
-		if (!db->bb_free_root.rb_node) {
-			/* No more items in the per group rb tree
-			 * balance refcounts from ext4_mb_free_metadata()
-			 */
-			page_cache_release(e4b.bd_buddy_page);
-			page_cache_release(e4b.bd_bitmap_page);
 		}
+		mb_debug("\n");
+		ext4_unlock_group(sb, md->group);
-		ext4_unlock_group(sb, entry->group);
 
+		/* balance refcounts from ext4_mb_free_metadata() */
+		page_cache_release(e4b.bd_buddy_page);
+		page_cache_release(e4b.bd_bitmap_page);
+
+		kfree(md);
-		kmem_cache_free(ext4_free_ext_cachep, entry);
 		ext4_mb_release_desc(&e4b);
+
+	} while (md);
-	} while (1);
 
 	mb_debug("freed %u blocks in %u structures\n", count, count2);
 }
@@ -3064,16 +2864,6 @@
 		kmem_cache_destroy(ext4_pspace_cachep);
 		return -ENOMEM;
 	}
-
-	ext4_free_ext_cachep =
-		kmem_cache_create("ext4_free_block_extents",
-				     sizeof(struct ext4_free_data),
-				     0, SLAB_RECLAIM_ACCOUNT, NULL);
-	if (ext4_free_ext_cachep == NULL) {
-		kmem_cache_destroy(ext4_pspace_cachep);
-		kmem_cache_destroy(ext4_ac_cachep);
-		return -ENOMEM;
-	}
 #ifdef CONFIG_PROC_FS
 	proc_root_ext4 = proc_mkdir("fs/ext4", NULL);
 	if (proc_root_ext4 == NULL)
@@ -3090,7 +2880,6 @@
 #ifdef CONFIG_PROC_FS
 	remove_proc_entry("fs/ext4", NULL);
 #endif
-	kmem_cache_destroy(ext4_free_ext_cachep);
 }
 
 
@@ -3152,8 +2941,8 @@
 	    in_range(block + len - 1, ext4_inode_table(sb, gdp),
 		     EXT4_SB(sb)->s_itb_per_group)) {
 		ext4_error(sb, __func__,
+			   "Allocating block in system zone - block = %llu",
+			   block);
-			   "Allocating block %llu in system zone of %lu group\n",
-			   block, ac->ac_b_ex.fe_group);
 		/* File system mounted not to panic on error
 		 * Fix the bitmap and repeat the block allocation
 		 * We leak some of the blocks here.
@@ -3175,9 +2964,10 @@
 		}
 	}
 #endif
+	mb_set_bits(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group), bitmap_bh->b_data,
+				ac->ac_b_ex.fe_start, ac->ac_b_ex.fe_len);
+
 	spin_lock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group));
-	mb_set_bits(NULL, bitmap_bh->b_data,
-				ac->ac_b_ex.fe_start, ac->ac_b_ex.fe_len);
 	if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
 		gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
 		gdp->bg_free_blocks_count =
@@ -3400,7 +3190,7 @@
 	}
 	BUG_ON(start + size <= ac->ac_o_ex.fe_logical &&
 			start > ac->ac_o_ex.fe_logical);
+	BUG_ON(size <= 0 || size >= EXT4_BLOCKS_PER_GROUP(ac->ac_sb));
-	BUG_ON(size <= 0 || size > EXT4_BLOCKS_PER_GROUP(ac->ac_sb));
 
 	/* now prepare goal request */
 
@@ -3610,37 +3400,10 @@
 		ac->ac_criteria = 20;
 		return 1;
 	}
-
 	return 0;
 }
 
 /*
- * the function goes through all block freed in the group
- * but not yet committed and marks them used in in-core bitmap.
- * buddy must be generated from this bitmap
- * Need to be called with ext4 group lock (ext4_lock_group)
- */
-static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
-						ext4_group_t group)
-{
-	struct rb_node *n;
-	struct ext4_group_info *grp;
-	struct ext4_free_data *entry;
-
-	grp = ext4_get_group_info(sb, group);
-	n = rb_first(&(grp->bb_free_root));
-
-	while (n) {
-		entry = rb_entry(n, struct ext4_free_data, node);
-		mb_set_bits(sb_bgl_lock(EXT4_SB(sb), group),
-				bitmap, entry->start_blk,
-				entry->count);
-		n = rb_next(n);
-	}
-	return;
-}
-
-/*
  * the function goes through all preallocation in this group and marks them
  * used in in-core bitmap. buddy must be generated from this bitmap
  * Need to be called with ext4 group lock (ext4_lock_group)
@@ -3698,7 +3461,6 @@
 			struct super_block *sb, struct ext4_prealloc_space *pa)
 {
 	unsigned long grp;
-	ext4_fsblk_t grp_blk;
 
 	if (!atomic_dec_and_test(&pa->pa_count) || pa->pa_free != 0)
 		return;
@@ -3713,12 +3475,8 @@
 	pa->pa_deleted = 1;
 	spin_unlock(&pa->pa_lock);
 
+	/* -1 is to protect from crossing allocation group */
+	ext4_get_group_no_and_offset(sb, pa->pa_pstart - 1, &grp, NULL);
-	grp_blk = pa->pa_pstart;
-	/* If linear, pa_pstart may be in the next group when pa is used up */
-	if (pa->pa_linear)
-		grp_blk--;
-
-	ext4_get_group_no_and_offset(sb, grp_blk, &grp, NULL);
 
 	/*
 	 * possible race:
@@ -3807,8 +3565,6 @@
 	pa->pa_free = pa->pa_len;
 	atomic_set(&pa->pa_count, 1);
 	spin_lock_init(&pa->pa_lock);
-	INIT_LIST_HEAD(&pa->pa_inode_list);
-	INIT_LIST_HEAD(&pa->pa_group_list);
 	pa->pa_deleted = 0;
 	pa->pa_linear = 0;
 
@@ -3867,7 +3623,6 @@
 	atomic_set(&pa->pa_count, 1);
 	spin_lock_init(&pa->pa_lock);
 	INIT_LIST_HEAD(&pa->pa_inode_list);
-	INIT_LIST_HEAD(&pa->pa_group_list);
 	pa->pa_deleted = 0;
 	pa->pa_linear = 1;
 
@@ -4411,7 +4166,6 @@
 	ac->ac_pa = NULL;
 	ac->ac_bitmap_page = NULL;
 	ac->ac_buddy_page = NULL;
-	ac->alloc_semp = NULL;
 	ac->ac_lg = NULL;
 
 	/* we have to define context: we'll we work with a file or
@@ -4532,7 +4286,7 @@
 						pa_inode_list) {
 		spin_lock(&tmp_pa->pa_lock);
 		if (tmp_pa->pa_deleted) {
+			spin_unlock(&pa->pa_lock);
-			spin_unlock(&tmp_pa->pa_lock);
 			continue;
 		}
 		if (!added && pa->pa_free < tmp_pa->pa_free) {
@@ -4577,23 +4331,18 @@
 			pa->pa_free -= ac->ac_b_ex.fe_len;
 			pa->pa_len -= ac->ac_b_ex.fe_len;
 			spin_unlock(&pa->pa_lock);
+			/*
+			 * We want to add the pa to the right bucket.
+			 * Remove it from the list and while adding
+			 * make sure the list to which we are adding
+			 * doesn't grow big.
+			 */
+			if (likely(pa->pa_free)) {
+				spin_lock(pa->pa_obj_lock);
+				list_del_rcu(&pa->pa_inode_list);
+				spin_unlock(pa->pa_obj_lock);
+				ext4_mb_add_n_trim(ac);
+			}
-		}
-	}
-	if (ac->alloc_semp)
-		up_read(ac->alloc_semp);
-	if (pa) {
-		/*
-		 * We want to add the pa to the right bucket.
-		 * Remove it from the list and while adding
-		 * make sure the list to which we are adding
-		 * doesn't grow big.  We need to release
-		 * alloc_semp before calling ext4_mb_add_n_trim()
-		 */
-		if (pa->pa_linear && likely(pa->pa_free)) {
-			spin_lock(pa->pa_obj_lock);
-			list_del_rcu(&pa->pa_inode_list);
-			spin_unlock(pa->pa_obj_lock);
-			ext4_mb_add_n_trim(ac);
 		}
 		ext4_mb_put_pa(ac, ac->ac_sb, pa);
 	}
@@ -4700,14 +4449,10 @@
 				ac->ac_o_ex.fe_len < ac->ac_b_ex.fe_len)
 			ext4_mb_new_preallocation(ac);
 	}
+
 	if (likely(ac->ac_status == AC_STATUS_FOUND)) {
 		*errp = ext4_mb_mark_diskspace_used(ac, handle);
 		if (*errp ==  -EAGAIN) {
-			/*
-			 * drop the reference that we took
-			 * in ext4_mb_use_best_found
-			 */
-			ext4_mb_release_context(ac);
 			ac->ac_b_ex.fe_group = 0;
 			ac->ac_b_ex.fe_start = 0;
 			ac->ac_b_ex.fe_len = 0;
@@ -4772,97 +4517,65 @@
 	ext4_mb_free_committed_blocks(sb);
 }
 
-/*
- * We can merge two free data extents only if the physical blocks
- * are contiguous, AND the extents were freed by the same transaction,
- * AND the blocks are associated with the same group.
- */
-static int can_merge(struct ext4_free_data *entry1,
-			struct ext4_free_data *entry2)
-{
-	if ((entry1->t_tid == entry2->t_tid) &&
-	    (entry1->group == entry2->group) &&
-	    ((entry1->start_blk + entry1->count) == entry2->start_blk))
-		return 1;
-	return 0;
-}
-
 static noinline_for_stack int
 ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
+			  ext4_group_t group, ext4_grpblk_t block, int count)
-			struct ext4_free_data *new_entry)
 {
-	ext4_grpblk_t block;
-	struct ext4_free_data *entry;
 	struct ext4_group_info *db = e4b->bd_info;
 	struct super_block *sb = e4b->bd_sb;
 	struct ext4_sb_info *sbi = EXT4_SB(sb);
+	struct ext4_free_metadata *md;
+	int i;
-	struct rb_node **n = &db->bb_free_root.rb_node, *node;
-	struct rb_node *parent = NULL, *new_node;
 
 	BUG_ON(e4b->bd_bitmap_page == NULL);
 	BUG_ON(e4b->bd_buddy_page == NULL);
 
+	ext4_lock_group(sb, group);
+	for (i = 0; i < count; i++) {
+		md = db->bb_md_cur;
+		if (md && db->bb_tid != handle->h_transaction->t_tid) {
+			db->bb_md_cur = NULL;
+			md = NULL;
-	new_node = &new_entry->node;
-	block = new_entry->start_blk;
-
-	if (!*n) {
-		/* first free block exent. We need to
-		   protect buddy cache from being freed,
-		 * otherwise we'll refresh it from
-		 * on-disk bitmap and lose not-yet-available
-		 * blocks */
-		page_cache_get(e4b->bd_buddy_page);
-		page_cache_get(e4b->bd_bitmap_page);
-	}
-	while (*n) {
-		parent = *n;
-		entry = rb_entry(parent, struct ext4_free_data, node);
-		if (block < entry->start_blk)
-			n = &(*n)->rb_left;
-		else if (block >= (entry->start_blk + entry->count))
-			n = &(*n)->rb_right;
-		else {
-			ext4_error(sb, __func__,
-			    "Double free of blocks %d (%d %d)\n",
-			    block, entry->start_blk, entry->count);
-			return 0;
 		}
-	}
 
+		if (md == NULL) {
+			ext4_unlock_group(sb, group);
+			md = kmalloc(sizeof(*md), GFP_NOFS);
+			if (md == NULL)
+				return -ENOMEM;
+			md->num = 0;
+			md->group = group;
+
+			ext4_lock_group(sb, group);
+			if (db->bb_md_cur == NULL) {
+				spin_lock(&sbi->s_md_lock);
+				list_add(&md->list, &sbi->s_active_transaction);
+				spin_unlock(&sbi->s_md_lock);
+				/* protect buddy cache from being freed,
+				 * otherwise we'll refresh it from
+				 * on-disk bitmap and lose not-yet-available
+				 * blocks */
+				page_cache_get(e4b->bd_buddy_page);
+				page_cache_get(e4b->bd_bitmap_page);
+				db->bb_md_cur = md;
+				db->bb_tid = handle->h_transaction->t_tid;
+				mb_debug("new md 0x%p for group %lu\n",
+						md, md->group);
+			} else {
+				kfree(md);
+				md = db->bb_md_cur;
+			}
-	rb_link_node(new_node, parent, n);
-	rb_insert_color(new_node, &db->bb_free_root);
-
-	/* Now try to see the extent can be merged to left and right */
-	node = rb_prev(new_node);
-	if (node) {
-		entry = rb_entry(node, struct ext4_free_data, node);
-		if (can_merge(entry, new_entry)) {
-			new_entry->start_blk = entry->start_blk;
-			new_entry->count += entry->count;
-			rb_erase(node, &(db->bb_free_root));
-			spin_lock(&sbi->s_md_lock);
-			list_del(&entry->list);
-			spin_unlock(&sbi->s_md_lock);
-			kmem_cache_free(ext4_free_ext_cachep, entry);
 		}
-	}
 
+		BUG_ON(md->num >= EXT4_BB_MAX_BLOCKS);
+		md->blocks[md->num] = block + i;
+		md->num++;
+		if (md->num == EXT4_BB_MAX_BLOCKS) {
+			/* no more space, put full container on a sb's list */
+			db->bb_md_cur = NULL;
-	node = rb_next(new_node);
-	if (node) {
-		entry = rb_entry(node, struct ext4_free_data, node);
-		if (can_merge(new_entry, entry)) {
-			new_entry->count += entry->count;
-			rb_erase(node, &(db->bb_free_root));
-			spin_lock(&sbi->s_md_lock);
-			list_del(&entry->list);
-			spin_unlock(&sbi->s_md_lock);
-			kmem_cache_free(ext4_free_ext_cachep, entry);
 		}
 	}
+	ext4_unlock_group(sb, group);
-	/* Add the extent to active_transaction list */
-	spin_lock(&sbi->s_md_lock);
-	list_add(&new_entry->list, &sbi->s_active_transaction);
-	spin_unlock(&sbi->s_md_lock);
 	return 0;
 }
 
@@ -4962,6 +4675,11 @@
 	err = ext4_journal_get_write_access(handle, gd_bh);
 	if (err)
 		goto error_return;
+
+	err = ext4_mb_load_buddy(sb, block_group, &e4b);
+	if (err)
+		goto error_return;
+
 #ifdef AGGRESSIVE_CHECK
 	{
 		int i;
@@ -4969,6 +4687,13 @@
 			BUG_ON(!mb_test_bit(bit + i, bitmap_bh->b_data));
 	}
 #endif
+	mb_clear_bits(sb_bgl_lock(sbi, block_group), bitmap_bh->b_data,
+			bit, count);
+
+	/* We dirtied the bitmap block */
+	BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
+	err = ext4_journal_dirty_metadata(handle, bitmap_bh);
+
 	if (ac) {
 		ac->ac_b_ex.fe_group = block_group;
 		ac->ac_b_ex.fe_start = bit;
@@ -4976,33 +4701,12 @@
 		ext4_mb_store_history(ac);
 	}
 
-	err = ext4_mb_load_buddy(sb, block_group, &e4b);
-	if (err)
-		goto error_return;
 	if (metadata) {
+		/* blocks being freed are metadata. these blocks shouldn't
+		 * be used until this transaction is committed */
+		ext4_mb_free_metadata(handle, &e4b, block_group, bit, count);
-		struct ext4_free_data *new_entry;
-		/*
-		 * blocks being freed are metadata. these blocks shouldn't
-		 * be used until this transaction is committed
-		 */
-		new_entry  = kmem_cache_alloc(ext4_free_ext_cachep, GFP_NOFS);
-		new_entry->start_blk = bit;
-		new_entry->group  = block_group;
-		new_entry->count = count;
-		new_entry->t_tid = handle->h_transaction->t_tid;
-		ext4_lock_group(sb, block_group);
-		mb_clear_bits(sb_bgl_lock(sbi, block_group), bitmap_bh->b_data,
-				bit, count);
-		ext4_mb_free_metadata(handle, &e4b, new_entry);
-		ext4_unlock_group(sb, block_group);
 	} else {
 		ext4_lock_group(sb, block_group);
-		/* need to update group_info->bb_free and bitmap
-		 * with group lock held. generate_buddy look at
-		 * them with group lock_held
-		 */
-		mb_clear_bits(sb_bgl_lock(sbi, block_group), bitmap_bh->b_data,
-				bit, count);
 		mb_free_blocks(inode, &e4b, bit, count);
 		ext4_mb_return_to_preallocation(inode, &e4b, block, count);
 		ext4_unlock_group(sb, block_group);
@@ -5025,10 +4729,6 @@
 
 	*freed += count;
 
-	/* We dirtied the bitmap block */
-	BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
-	err = ext4_journal_dirty_metadata(handle, bitmap_bh);
-
 	/* And the group descriptor block */
 	BUFFER_TRACE(gd_bh, "dirtied group descriptor block");
 	ret = ext4_journal_dirty_metadata(handle, gd_bh);
reverted:
--- b/fs/ext4/mballoc.h
+++ a/fs/ext4/mballoc.h
@@ -18,7 +18,6 @@
 #include <linux/pagemap.h>
 #include <linux/seq_file.h>
 #include <linux/version.h>
-#include <linux/mutex.h>
 #include "ext4_jbd2.h"
 #include "ext4.h"
 #include "group.h"
@@ -97,27 +96,25 @@
  */
 #define MB_DEFAULT_GROUP_PREALLOC	512
 
+static struct kmem_cache *ext4_pspace_cachep;
+static struct kmem_cache *ext4_ac_cachep;
-struct ext4_free_data {
-	/* this links the free block information from group_info */
-	struct rb_node node;
 
+#ifdef EXT4_BB_MAX_BLOCKS
+#undef EXT4_BB_MAX_BLOCKS
+#endif
+#define EXT4_BB_MAX_BLOCKS	30
-	/* this links the free block information from ext4_sb_info */
-	struct list_head list;
 
+struct ext4_free_metadata {
-	/* group which free block extent belongs */
 	ext4_group_t group;
+	unsigned short num;
+	ext4_grpblk_t  blocks[EXT4_BB_MAX_BLOCKS];
+	struct list_head list;
-
-	/* free block extent */
-	ext4_grpblk_t start_blk;
-	ext4_grpblk_t count;
-
-	/* transaction which freed this extent */
-	tid_t	t_tid;
 };
 
 struct ext4_group_info {
 	unsigned long	bb_state;
+	unsigned long	bb_tid;
+	struct ext4_free_metadata *bb_md_cur;
-	struct rb_root  bb_free_root;
 	unsigned short	bb_first_free;
 	unsigned short	bb_free;
 	unsigned short	bb_fragments;
@@ -125,7 +122,6 @@
 #ifdef DOUBLE_CHECK
 	void		*bb_bitmap;
 #endif
-	struct rw_semaphore alloc_sem;
 	unsigned short	bb_counters[];
 };
 
@@ -213,11 +209,6 @@
 	__u8 ac_op;		/* operation, for history only */
 	struct page *ac_bitmap_page;
 	struct page *ac_buddy_page;
-	/*
-	 * pointer to the held semaphore upon successful
-	 * block allocation
-	 */
-	struct rw_semaphore *alloc_semp;
 	struct ext4_prealloc_space *ac_pa;
 	struct ext4_locality_group *ac_lg;
 };
@@ -251,7 +242,6 @@
 	struct super_block *bd_sb;
 	__u16 bd_blkbits;
 	ext4_group_t bd_group;
-	struct rw_semaphore *alloc_semp;
 };
 #define EXT4_MB_BITMAP(e4b)	((e4b)->bd_bitmap)
 #define EXT4_MB_BUDDY(e4b)	((e4b)->bd_buddy)
@@ -261,6 +251,8 @@
 {
 	return;
 }
+#else
+static void ext4_mb_store_history(struct ext4_allocation_context *ac);
 #endif
 
 #define in_range(b, first, len)	((b) >= (first) && (b) <= (first) + (len) - 1)
@@ -268,6 +260,19 @@
 static struct proc_dir_entry *proc_root_ext4;
 struct buffer_head *read_block_bitmap(struct super_block *, ext4_group_t);
 
+static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
+					ext4_group_t group);
+static void ext4_mb_poll_new_transaction(struct super_block *, handle_t *);
+static void ext4_mb_free_committed_blocks(struct super_block *);
+static void ext4_mb_return_to_preallocation(struct inode *inode,
+					struct ext4_buddy *e4b, sector_t block,
+					int count);
+static void ext4_mb_put_pa(struct ext4_allocation_context *,
+			struct super_block *, struct ext4_prealloc_space *pa);
+static int ext4_mb_init_per_dev_proc(struct super_block *sb);
+static int ext4_mb_destroy_per_dev_proc(struct super_block *sb);
+
+
 static inline void ext4_lock_group(struct super_block *sb, ext4_group_t group)
 {
 	struct ext4_group_info *grinfo = ext4_get_group_info(sb, group);
@@ -292,7 +297,7 @@
 						&(grinfo->bb_state));
 }
 
+static ext4_fsblk_t ext4_grp_offs_to_block(struct super_block *sb,
-static inline ext4_fsblk_t ext4_grp_offs_to_block(struct super_block *sb,
 					struct ext4_free_extent *fex)
 {
 	ext4_fsblk_t block;
reverted:
--- b/fs/ext4/migrate.c
+++ a/fs/ext4/migrate.c
@@ -480,7 +480,7 @@
 					+ 1);
 	if (IS_ERR(handle)) {
 		retval = PTR_ERR(handle);
+		goto err_out;
-		return retval;
 	}
 	tmp_inode = ext4_new_inode(handle,
 				inode->i_sb->s_root->d_inode,
@@ -488,7 +488,8 @@
 	if (IS_ERR(tmp_inode)) {
 		retval = -ENOMEM;
 		ext4_journal_stop(handle);
+		tmp_inode = NULL;
+		goto err_out;
-		return retval;
 	}
 	i_size_write(tmp_inode, i_size_read(inode));
 	/*
@@ -616,7 +617,8 @@
 
 	ext4_journal_stop(handle);
 
+	if (tmp_inode)
+		iput(tmp_inode);
-	iput(tmp_inode);
 
 	return retval;
 }
reverted:
--- b/fs/ext4/namei.c
+++ a/fs/ext4/namei.c
@@ -371,8 +371,6 @@
 		goto fail;
 	}
 	hinfo->hash_version = root->info.hash_version;
-	if (hinfo->hash_version <= DX_HASH_TEA)
-		hinfo->hash_version += EXT4_SB(dir->i_sb)->s_hash_unsigned;
 	hinfo->seed = EXT4_SB(dir->i_sb)->s_hash_seed;
 	if (dentry)
 		ext4fs_dirhash(dentry->d_name.name, dentry->d_name.len, hinfo);
@@ -642,9 +640,6 @@
 	dir = dir_file->f_path.dentry->d_inode;
 	if (!(EXT4_I(dir)->i_flags & EXT4_INDEX_FL)) {
 		hinfo.hash_version = EXT4_SB(dir->i_sb)->s_def_hash_version;
-		if (hinfo.hash_version <= DX_HASH_TEA)
-			hinfo.hash_version +=
-				EXT4_SB(dir->i_sb)->s_hash_unsigned;
 		hinfo.seed = EXT4_SB(dir->i_sb)->s_hash_seed;
 		count = htree_dirblock_to_tree(dir_file, dir, 0, &hinfo,
 					       start_hash, start_minor_hash);
@@ -1055,16 +1050,8 @@
 			return ERR_PTR(-EIO);
 		}
 		inode = ext4_iget(dir->i_sb, ino);
+		if (IS_ERR(inode))
+			return ERR_CAST(inode);
-		if (unlikely(IS_ERR(inode))) {
-			if (PTR_ERR(inode) == -ESTALE) {
-				ext4_error(dir->i_sb, __func__,
-						"deleted inode referenced: %u",
-						ino);
-				return ERR_PTR(-EIO);
-			} else {
-				return ERR_CAST(inode);
-			}
-		}
 	}
 	return d_splice_alias(inode, dentry);
 }
@@ -1390,7 +1377,7 @@
 	struct fake_dirent *fde;
 
 	blocksize =  dir->i_sb->s_blocksize;
+	dxtrace(printk("Creating index\n"));
-	dxtrace(printk(KERN_DEBUG "Creating index: inode %lu\n", dir->i_ino));
 	retval = ext4_journal_get_write_access(handle, bh);
 	if (retval) {
 		ext4_std_error(dir->i_sb, retval);
@@ -1399,20 +1386,6 @@
 	}
 	root = (struct dx_root *) bh->b_data;
 
-	/* The 0th block becomes the root, move the dirents out */
-	fde = &root->dotdot;
-	de = (struct ext4_dir_entry_2 *)((char *)fde +
-		ext4_rec_len_from_disk(fde->rec_len));
-	if ((char *) de >= (((char *) root) + blocksize)) {
-		ext4_error(dir->i_sb, __func__,
-			   "invalid rec_len for '..' in inode %lu",
-			   dir->i_ino);
-		brelse(bh);
-		return -EIO;
-	}
-	len = ((char *) root) + blocksize - (char *) de;
-
-	/* Allocate new block for the 0th block's dirents */
 	bh2 = ext4_append (handle, dir, &block, &retval);
 	if (!(bh2)) {
 		brelse(bh);
@@ -1421,6 +1394,11 @@
 	EXT4_I(dir)->i_flags |= EXT4_INDEX_FL;
 	data1 = bh2->b_data;
 
+	/* The 0th block becomes the root, move the dirents out */
+	fde = &root->dotdot;
+	de = (struct ext4_dir_entry_2 *)((char *)fde +
+		ext4_rec_len_from_disk(fde->rec_len));
+	len = ((char *) root) + blocksize - (char *) de;
 	memcpy (data1, de, len);
 	de = (struct ext4_dir_entry_2 *) data1;
 	top = data1 + len;
@@ -1440,8 +1418,6 @@
 
 	/* Initialize as for dx_probe */
 	hinfo.hash_version = root->info.hash_version;
-	if (hinfo.hash_version <= DX_HASH_TEA)
-		hinfo.hash_version += EXT4_SB(dir->i_sb)->s_hash_unsigned;
 	hinfo.seed = EXT4_SB(dir->i_sb)->s_hash_seed;
 	ext4fs_dirhash(name, namelen, &hinfo);
 	frame = frames;
@@ -2314,7 +2290,7 @@
 	struct inode * old_inode, * new_inode;
 	struct buffer_head * old_bh, * new_bh, * dir_bh;
 	struct ext4_dir_entry_2 * old_de, * new_de;
+	int retval;
-	int retval, force_da_alloc = 0;
 
 	old_bh = new_bh = dir_bh = NULL;
 
@@ -2452,7 +2428,6 @@
 		ext4_mark_inode_dirty(handle, new_inode);
 		if (!new_inode->i_nlink)
 			ext4_orphan_add(handle, new_inode);
-		force_da_alloc = 1;
 	}
 	retval = 0;
 
@@ -2461,8 +2436,6 @@
 	brelse (old_bh);
 	brelse (new_bh);
 	ext4_journal_stop(handle);
-	if (retval == 0 && force_da_alloc)
-		ext4_alloc_da_blocks(old_inode);
 	return retval;
 }
 
reverted:
--- b/fs/ext4/resize.c
+++ a/fs/ext4/resize.c
@@ -284,9 +284,11 @@
 	if ((err = extend_or_restart_transaction(handle, 2, bh)))
 		goto exit_bh;
 
+	mark_bitmap_end(input->blocks_count, EXT4_BLOCKS_PER_GROUP(sb),
+			bh->b_data);
-	mark_bitmap_end(input->blocks_count, sb->s_blocksize * 8, bh->b_data);
 	ext4_journal_dirty_metadata(handle, bh);
 	brelse(bh);
+
 	/* Mark unused entries in inode bitmap used */
 	ext4_debug("clear inode bitmap %#04llx (+%llu)\n",
 		   input->inode_bitmap, input->inode_bitmap - start);
@@ -295,7 +297,7 @@
 		goto exit_journal;
 	}
 
+	mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), EXT4_BLOCKS_PER_GROUP(sb),
-	mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), sb->s_blocksize * 8,
 			bh->b_data);
 	ext4_journal_dirty_metadata(handle, bh);
 exit_bh:
@@ -745,7 +747,6 @@
 	struct inode *inode = NULL;
 	handle_t *handle;
 	int gdb_off, gdb_num;
-	int num_grp_locked = 0;
 	int err, err2;
 
 	gdb_num = input->group / EXT4_DESC_PER_BLOCK(sb);
@@ -786,7 +787,6 @@
 		}
 	}
 
-
 	if ((err = verify_group_input(sb, input)))
 		goto exit_put;
 
@@ -855,18 +855,15 @@
          * using the new disk blocks.
          */
 
-	num_grp_locked = ext4_mb_get_buddy_cache_lock(sb, input->group);
 	/* Update group descriptor block for new group */
 	gdp = (struct ext4_group_desc *)((char *)primary->b_data +
 					 gdb_off * EXT4_DESC_SIZE(sb));
 
-	memset(gdp, 0, EXT4_DESC_SIZE(sb));
 	ext4_block_bitmap_set(sb, gdp, input->block_bitmap); /* LV FIXME */
 	ext4_inode_bitmap_set(sb, gdp, input->inode_bitmap); /* LV FIXME */
 	ext4_inode_table_set(sb, gdp, input->inode_table); /* LV FIXME */
 	gdp->bg_free_blocks_count = cpu_to_le16(input->free_blocks_count);
 	gdp->bg_free_inodes_count = cpu_to_le16(EXT4_INODES_PER_GROUP(sb));
-	gdp->bg_flags = cpu_to_le16(EXT4_BG_INODE_ZEROED);
 	gdp->bg_checksum = ext4_group_desc_csum(sbi, input->group, gdp);
 
 	/*
@@ -874,11 +871,9 @@
 	 * descriptor
 	 */
 	if (test_opt(sb, MBALLOC)) {
+		err = ext4_mb_add_more_groupinfo(sb, input->group, gdp);
+		if (err)
-		err = ext4_mb_add_groupinfo(sb, input->group, gdp);
-		if (err) {
-			ext4_mb_put_buddy_cache_lock(sb, input->group, num_grp_locked);
 			goto exit_journal;
-		}
 	}
 	/*
 	 * Make the new blocks and inodes valid next.  We do this before
@@ -920,7 +915,6 @@
 
 	/* Update the global fs size fields */
 	sbi->s_groups_count++;
-	ext4_mb_put_buddy_cache_lock(sb, input->group, num_grp_locked);
 
 	ext4_journal_dirty_metadata(handle, primary);
 
@@ -982,7 +976,9 @@
 	struct buffer_head * bh;
 	handle_t *handle;
 	int err;
+	unsigned long freed_blocks;
 	ext4_group_t group;
+	struct ext4_group_info *grp;
 
 	/* We don't need to worry about locking wrt other resizers just
 	 * yet: we're going to revalidate es->s_blocks_count after
@@ -1081,13 +1077,50 @@
 	unlock_super(sb);
 	ext4_debug("freeing blocks %llu through %llu\n", o_blocks_count,
 		   o_blocks_count + add);
+	ext4_free_blocks_sb(handle, sb, o_blocks_count, add, &freed_blocks);
-	/* We add the blocks to the bitmap and set the group need init bit */
-	ext4_add_groupblocks(handle, sb, o_blocks_count, add);
 	ext4_debug("freed blocks %llu through %llu\n", o_blocks_count,
 		   o_blocks_count + add);
 	if ((err = ext4_journal_stop(handle)))
 		goto exit_put;
 
+	/*
+	 * Mark mballoc pages as not up to date so that they will be updated
+	 * next time they are loaded by ext4_mb_load_buddy.
+	 */
+	if (test_opt(sb, MBALLOC)) {
+		struct ext4_sb_info *sbi = EXT4_SB(sb);
+		struct inode *inode = sbi->s_buddy_cache;
+		int blocks_per_page;
+		int block;
+		int pnum;
+		struct page *page;
+
+		/* Set buddy page as not up to date */
+		blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
+		block = group * 2;
+		pnum = block / blocks_per_page;
+		page = find_get_page(inode->i_mapping, pnum);
+		if (page != NULL) {
+			ClearPageUptodate(page);
+			page_cache_release(page);
+		}
+
+		/* Set bitmap page as not up to date */
+		block++;
+		pnum = block / blocks_per_page;
+		page = find_get_page(inode->i_mapping, pnum);
+		if (page != NULL) {
+			ClearPageUptodate(page);
+			page_cache_release(page);
+		}
+
+		/* Get the info on the last group */
+		grp = ext4_get_group_info(sb, group);
+
+		/* Update free blocks in group info */
+		ext4_mb_update_group_info(grp, add);
+	}
+
 	if (test_opt(sb, DEBUG))
 		printk(KERN_DEBUG "EXT4-fs: extended group to %llu blocks\n",
 		       ext4_blocks_count(es));
reverted:
--- b/fs/ext4/super.c
+++ a/fs/ext4/super.c
@@ -1493,6 +1493,7 @@
 	ext4_group_t flex_group_count;
 	ext4_group_t flex_group;
 	int groups_per_flex = 0;
+	__u64 block_bitmap = 0;
 	int i;
 
 	if (!sbi->s_es->s_log_groups_per_flex) {
@@ -1515,6 +1516,9 @@
 		goto failed;
 	}
 
+	gdp = ext4_get_group_desc(sb, 1, &bh);
+	block_bitmap = ext4_block_bitmap(sb, gdp) - 1;
+
 	for (i = 0; i < sbi->s_groups_count; i++) {
 		gdp = ext4_get_group_desc(sb, i, &bh);
 
@@ -1916,8 +1920,8 @@
 	struct inode *root;
 	int ret = -EINVAL;
 	int blocksize;
+	int db_count;
+	int i;
-	unsigned int db_count;
-	unsigned int i;
 	int needs_recovery;
 	__le32 features;
 	__u64 blocks_count;
@@ -2168,18 +2172,6 @@
 	for (i = 0; i < 4; i++)
 		sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]);
 	sbi->s_def_hash_version = es->s_def_hash_version;
-	i = le32_to_cpu(es->s_flags);
-	if (i & EXT2_FLAGS_UNSIGNED_HASH)
-		sbi->s_hash_unsigned = 3;
-	else if ((i & EXT2_FLAGS_SIGNED_HASH) == 0) {
-#ifdef __CHAR_UNSIGNED__
-		es->s_flags |= cpu_to_le32(EXT2_FLAGS_UNSIGNED_HASH);
-		sbi->s_hash_unsigned = 3;
-#else
-		es->s_flags |= cpu_to_le32(EXT2_FLAGS_SIGNED_HASH);
-#endif
-		sb->s_dirt = 1;
-	}
 
 	if (sbi->s_blocks_per_group > blocksize * 8) {
 		printk(KERN_ERR
@@ -2207,30 +2199,20 @@
 	if (EXT4_BLOCKS_PER_GROUP(sb) == 0)
 		goto cantfind_ext4;
 
+	/* ensure blocks_count calculation below doesn't sign-extend */
+	if (ext4_blocks_count(es) + EXT4_BLOCKS_PER_GROUP(sb) <
+	    le32_to_cpu(es->s_first_data_block) + 1) {
+		printk(KERN_WARNING "EXT4-fs: bad geometry: block count %llu, "
+		       "first data block %u, blocks per group %lu\n",
+			ext4_blocks_count(es),
+			le32_to_cpu(es->s_first_data_block),
+			EXT4_BLOCKS_PER_GROUP(sb));
-	/*
-	 * It makes no sense for the first data block to be beyond the end
-	 * of the filesystem.
-	 */
-	if (le32_to_cpu(es->s_first_data_block) >= ext4_blocks_count(es)) {
-		printk(KERN_WARNING "EXT4-fs: bad geometry: first data"
-		       "block %u is beyond end of filesystem (%llu)\n",
-		       le32_to_cpu(es->s_first_data_block),
-		       ext4_blocks_count(es));
 		goto failed_mount;
 	}
 	blocks_count = (ext4_blocks_count(es) -
 			le32_to_cpu(es->s_first_data_block) +
 			EXT4_BLOCKS_PER_GROUP(sb) - 1);
 	do_div(blocks_count, EXT4_BLOCKS_PER_GROUP(sb));
-	if (blocks_count > ((uint64_t)1<<32) - EXT4_DESC_PER_BLOCK(sb)) {
-		printk(KERN_WARNING "EXT4-fs: groups count too large: %u "
-		       "(block count %llu, first data block %u, "
-		       "blocks per group %lu)\n", sbi->s_groups_count,
-		       ext4_blocks_count(es),
-		       le32_to_cpu(es->s_first_data_block),
-		       EXT4_BLOCKS_PER_GROUP(sb));
-		goto failed_mount;
-	}
 	sbi->s_groups_count = blocks_count;
 	db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) /
 		   EXT4_DESC_PER_BLOCK(sb);
@@ -2950,14 +2932,14 @@
 
 static int ext4_sync_fs(struct super_block *sb, int wait)
 {
+	int ret = 0;
-	tid_t target;
 
 	sb->s_dirt = 0;
+	if (wait)
+		ret = ext4_force_commit(sb);
+	else
+		jbd2_journal_start_commit(EXT4_SB(sb)->s_journal, NULL);
+	return ret;
-	if (jbd2_journal_start_commit(EXT4_SB(sb)->s_journal, &target)) {
-		if (wait)
-			jbd2_log_wait_commit(EXT4_SB(sb)->s_journal, target);
-	}
-	return 0;
 }
 
 /*
reverted:
--- b/fs/jbd2/commit.c
+++ a/fs/jbd2/commit.c
@@ -24,7 +24,6 @@
 #include <linux/crc32.h>
 #include <linux/writeback.h>
 #include <linux/backing-dev.h>
-#include <linux/bio.h>
 
 /*
  * Default IO end handler for temporary BJ_IO buffer_heads.
@@ -171,34 +170,12 @@
  * This function along with journal_submit_commit_record
  * allows to write the commit record asynchronously.
  */
+static int journal_wait_on_commit_record(struct buffer_head *bh)
-static int journal_wait_on_commit_record(journal_t *journal,
-					 struct buffer_head *bh)
 {
 	int ret = 0;
 
-retry:
 	clear_buffer_dirty(bh);
 	wait_on_buffer(bh);
-	if (buffer_eopnotsupp(bh) && (journal->j_flags & JBD2_BARRIER)) {
-		printk(KERN_WARNING
-		       "JBD2: wait_on_commit_record: sync failed on %s - "
-		       "disabling barriers\n", journal->j_devname);
-		spin_lock(&journal->j_state_lock);
-		journal->j_flags &= ~JBD2_BARRIER;
-		spin_unlock(&journal->j_state_lock);
-
-		lock_buffer(bh);
-		clear_buffer_dirty(bh);
-		set_buffer_uptodate(bh);
-		bh->b_end_io = journal_end_buffer_io_sync;
-
-		ret = submit_bh(WRITE_SYNC, bh);
-		if (ret) {
-			unlock_buffer(bh);
-			return ret;
-		}
-		goto retry;
-	}
 
 	if (unlikely(!buffer_uptodate(bh)))
 		ret = -EIO;
@@ -818,7 +795,7 @@
 			__jbd2_journal_abort_hard(journal);
 	}
 	if (!err && !is_journal_aborted(journal))
+		err = journal_wait_on_commit_record(cbh);
-		err = journal_wait_on_commit_record(journal, cbh);
 
 	if (err)
 		jbd2_journal_abort(journal, err);
reverted:
--- b/fs/jbd2/journal.c
+++ a/fs/jbd2/journal.c
@@ -430,7 +430,7 @@
 }
 
 /*
+ * Called under j_state_lock.  Returns true if a transaction was started.
- * Called under j_state_lock.  Returns true if a transaction commit was started.
  */
 int __jbd2_log_start_commit(journal_t *journal, tid_t target)
 {
@@ -498,8 +498,7 @@
 
 /*
  * Start a commit of the current running transaction (if any).  Returns true
+ * if a transaction was started, and fills its tid in at *ptid
- * if a transaction is going to be committed (or is currently already
- * committing), and fills its tid in at *ptid
  */
 int jbd2_journal_start_commit(journal_t *journal, tid_t *ptid)
 {
@@ -509,19 +508,15 @@
 	if (journal->j_running_transaction) {
 		tid_t tid = journal->j_running_transaction->t_tid;
 
+		ret = __jbd2_log_start_commit(journal, tid);
+		if (ret && ptid)
-		__jbd2_log_start_commit(journal, tid);
-		/* There's a running transaction and we've just made sure
-		 * it's commit has been scheduled. */
-		if (ptid)
 			*ptid = tid;
+	} else if (journal->j_committing_transaction && ptid) {
-		ret = 1;
-	} else if (journal->j_committing_transaction) {
 		/*
 		 * If ext3_write_super() recently started a commit, then we
 		 * have to wait for completion of that transaction
 		 */
+		*ptid = journal->j_committing_transaction->t_tid;
-		if (ptid)
-			*ptid = journal->j_committing_transaction->t_tid;
 		ret = 1;
 	}
 	spin_unlock(&journal->j_state_lock);
reverted:
--- b/fs/jbd2/revoke.c
+++ a/fs/jbd2/revoke.c
@@ -55,25 +55,6 @@
  *			need do nothing.
  * RevokeValid set, Revoked set:
  *			buffer has been revoked.
- *
- * Locking rules:
- * We keep two hash tables of revoke records. One hashtable belongs to the
- * running transaction (is pointed to by journal->j_revoke), the other one
- * belongs to the committing transaction. Accesses to the second hash table
- * happen only from the kjournald and no other thread touches this table.  Also
- * journal_switch_revoke_table() which switches which hashtable belongs to the
- * running and which to the committing transaction is called only from
- * kjournald. Therefore we need no locks when accessing the hashtable belonging
- * to the committing transaction.
- *
- * All users operating on the hash table belonging to the running transaction
- * have a handle to the transaction. Therefore they are safe from kjournald
- * switching hash tables under them. For operations on the lists of entries in
- * the hash table j_revoke_lock is used.
- *
- * Finally, also replay code uses the hash tables but at this moment noone else
- * can touch them (filesystem isn't mounted yet) and hence no locking is
- * needed.
  */
 
 #ifndef __KERNEL__
@@ -420,6 +401,8 @@
  * the second time we would still have a pending revoke to cancel.  So,
  * do not trust the Revoked bit on buffers unless RevokeValid is also
  * set.
+ *
+ * The caller must have the journal locked.
  */
 int jbd2_journal_cancel_revoke(handle_t *handle, struct journal_head *jh)
 {
@@ -497,7 +480,10 @@
 /*
  * Write revoke records to the journal for all entries in the current
  * revoke hash, deleting the entries as we go.
+ *
+ * Called with the journal lock held.
  */
+
 void jbd2_journal_write_revoke_records(journal_t *journal,
 				  transaction_t *transaction)
 {
reverted:
--- b/fs/jbd2/transaction.c
+++ a/fs/jbd2/transaction.c
@@ -2049,46 +2049,26 @@
 }
 
 /*
+ * This function must be called when inode is journaled in ordered mode
+ * before truncation happens. It starts writeout of truncated part in
+ * case it is in the committing transaction so that we stand to ordered
+ * mode consistency guarantees.
- * File truncate and transaction commit interact with each other in a
- * non-trivial way.  If a transaction writing data block A is
- * committing, we cannot discard the data by truncate until we have
- * written them.  Otherwise if we crashed after the transaction with
- * write has committed but before the transaction with truncate has
- * committed, we could see stale data in block A.  This function is a
- * helper to solve this problem.  It starts writeout of the truncated
- * part in case it is in the committing transaction.
- *
- * Filesystem code must call this function when inode is journaled in
- * ordered mode before truncation happens and after the inode has been
- * placed on orphan list with the new inode size. The second condition
- * avoids the race that someone writes new data and we start
- * committing the transaction after this function has been called but
- * before a transaction for truncate is started (and furthermore it
- * allows us to optimize the case where the addition to orphan list
- * happens in the same transaction as write --- we don't have to write
- * any data in such case).
  */
+int jbd2_journal_begin_ordered_truncate(struct jbd2_inode *inode,
-int jbd2_journal_begin_ordered_truncate(journal_t *journal,
-					struct jbd2_inode *jinode,
 					loff_t new_size)
 {
+	journal_t *journal;
+	transaction_t *commit_trans;
-	transaction_t *inode_trans, *commit_trans;
 	int ret = 0;
 
+	if (!inode->i_transaction && !inode->i_next_transaction)
-	/* This is a quick check to avoid locking if not necessary */
-	if (!jinode->i_transaction)
 		goto out;
+	journal = inode->i_transaction->t_journal;
-	/* Locks are here just to force reading of recent values, it is
-	 * enough that the transaction was not committing before we started
-	 * a transaction adding the inode to orphan list */
 	spin_lock(&journal->j_state_lock);
 	commit_trans = journal->j_committing_transaction;
 	spin_unlock(&journal->j_state_lock);
+	if (inode->i_transaction == commit_trans) {
+		ret = filemap_fdatawrite_range(inode->i_vfs_inode->i_mapping,
-	spin_lock(&journal->j_list_lock);
-	inode_trans = jinode->i_transaction;
-	spin_unlock(&journal->j_list_lock);
-	if (inode_trans == commit_trans) {
-		ret = filemap_fdatawrite_range(jinode->i_vfs_inode->i_mapping,
 			new_size, LLONG_MAX);
 		if (ret)
 			jbd2_journal_abort(journal, ret);
reverted:
--- b/include/linux/jbd2.h
+++ a/include/linux/jbd2.h
@@ -308,8 +308,7 @@
 		int val = (expr);					     \
 		if (!val) {						     \
 			printk(KERN_ERR					     \
+				"EXT3-fs unexpected failure: %s;\n",# expr); \
-			       "JBD2 unexpected failure: %s: %s;\n",	     \
-			       __func__, #expr);			     \
 			printk(KERN_ERR why "\n");			     \
 		}							     \
 		val;							     \
@@ -330,7 +329,6 @@
 	BH_State,		/* Pins most journal_head state */
 	BH_JournalHead,		/* Pins bh->b_private and jh->b_bh */
 	BH_Unshadow,		/* Dummy bit, for BJ_Shadow wakeup filtering */
-	BH_JBDPrivateStart,	/* First bit available for private use by FS */
 };
 
 BUFFER_FNS(JBD, jbd)
@@ -1075,8 +1073,7 @@
 extern int	   jbd2_journal_bmap(journal_t *, unsigned long, unsigned long long *);
 extern int	   jbd2_journal_force_commit(journal_t *);
 extern int	   jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *inode);
+extern int	   jbd2_journal_begin_ordered_truncate(struct jbd2_inode *inode, loff_t new_size);
-extern int	   jbd2_journal_begin_ordered_truncate(journal_t *journal,
-				struct jbd2_inode *inode, loff_t new_size);
 extern void	   jbd2_journal_init_jbd_inode(struct jbd2_inode *jinode, struct inode *inode);
 extern void	   jbd2_journal_release_jbd_inode(journal_t *journal, struct jbd2_inode *jinode);