From: Greg Kroah-Hartman <gregkh@suse.de>
Date: Mon, 19 Apr 2010 17:26:16 +0000 (-0700)
Subject: .27 ext4 patches
X-Git-Tag: v2.6.32.12~35
X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=d7b4be12a43a5b3c85891ac50ae3d16114a74550;p=thirdparty%2Fkernel%2Fstable-queue.git

.27 ext4 patches
---

diff --git a/queue-2.6.27/ext4-add-percpu-dirty-block-accounting.patch b/queue-2.6.27/ext4-add-percpu-dirty-block-accounting.patch
new file mode 100644
index 00000000000..a86a935b047
--- /dev/null
+++ b/queue-2.6.27/ext4-add-percpu-dirty-block-accounting.patch
@@ -0,0 +1,312 @@
+From tytso@mit.edu  Mon Apr 19 10:21:01 2010
+From: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+Date: Mon, 15 Mar 2010 20:25:58 -0400
+Subject: ext4: Add percpu dirty block accounting.
+To: stable@kernel.org
+Cc: "Theodore Ts'o" <tytso@mit.edu>, Ext4 Developers List <linux-ext4@vger.kernel.org>, Mingming Cao <cmm@us.ibm.com>, "Jayson R. King" <dev@jaysonking.com>, "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
+Message-ID: <1268699165-17461-5-git-send-email-tytso@mit.edu>
+
+
+From: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+
+commit 6bc6e63fcd7dac9e633ea29f1fddd9580ab28f3f upstream.
+
+This patch adds dirty block accounting using percpu_counters.  Delayed
+allocation block reservation is now done by updating dirty block
+counter.  In a later patch we switch to non delalloc mode if the
+filesystem free blocks is greater than 150% of total filesystem dirty
+blocks
+
+Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+Signed-off-by: Mingming Cao<cmm@us.ibm.com>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Jayson R. King <dev@jaysonking.com>
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ fs/ext4/balloc.c  |   62 ++++++++++++++++++++++++++++++++++--------------------
+ fs/ext4/ext4_sb.h |    1 
+ fs/ext4/inode.c   |   22 +++++++++----------
+ fs/ext4/mballoc.c |   31 ++++++++++++---------------
+ fs/ext4/super.c   |    8 ++++++
+ 5 files changed, 73 insertions(+), 51 deletions(-)
+
+--- a/fs/ext4/balloc.c
++++ b/fs/ext4/balloc.c
+@@ -1757,26 +1757,38 @@ out:
+ int ext4_claim_free_blocks(struct ext4_sb_info *sbi,
+ 						ext4_fsblk_t nblocks)
+ {
+-	s64 free_blocks;
++	s64 free_blocks, dirty_blocks;
+ 	ext4_fsblk_t root_blocks = 0;
+ 	struct percpu_counter *fbc = &sbi->s_freeblocks_counter;
++	struct percpu_counter *dbc = &sbi->s_dirtyblocks_counter;
+ 
+-	free_blocks = percpu_counter_read(fbc);
++	free_blocks  = percpu_counter_read_positive(fbc);
++	dirty_blocks = percpu_counter_read_positive(dbc);
+ 
+ 	if (!capable(CAP_SYS_RESOURCE) &&
+ 		sbi->s_resuid != current->fsuid &&
+ 		(sbi->s_resgid == 0 || !in_group_p(sbi->s_resgid)))
+ 		root_blocks = ext4_r_blocks_count(sbi->s_es);
+ 
+-	if (free_blocks - (nblocks + root_blocks) < EXT4_FREEBLOCKS_WATERMARK)
+-		free_blocks = percpu_counter_sum(&sbi->s_freeblocks_counter);
+-
+-	if (free_blocks < (root_blocks + nblocks))
++	if (free_blocks - (nblocks + root_blocks + dirty_blocks) <
++						EXT4_FREEBLOCKS_WATERMARK) {
++		free_blocks  = percpu_counter_sum(fbc);
++		dirty_blocks = percpu_counter_sum(dbc);
++		if (dirty_blocks < 0) {
++			printk(KERN_CRIT "Dirty block accounting "
++					"went wrong %lld\n",
++					dirty_blocks);
++		}
++	}
++	/* Check whether we have space after
++	 * accounting for current dirty blocks
++	 */
++	if (free_blocks < ((s64)(root_blocks + nblocks) + dirty_blocks))
+ 		/* we don't have free space */
+ 		return -ENOSPC;
+ 
+-	/* reduce fs free blocks counter */
+-	percpu_counter_sub(fbc, nblocks);
++	/* Add the blocks to nblocks */
++	percpu_counter_add(dbc, nblocks);
+ 	return 0;
+ }
+ 
+@@ -1792,23 +1804,28 @@ int ext4_claim_free_blocks(struct ext4_s
+ ext4_fsblk_t ext4_has_free_blocks(struct ext4_sb_info *sbi,
+ 						ext4_fsblk_t nblocks)
+ {
+-	ext4_fsblk_t free_blocks;
++	ext4_fsblk_t free_blocks, dirty_blocks;
+ 	ext4_fsblk_t root_blocks = 0;
++	struct percpu_counter *fbc = &sbi->s_freeblocks_counter;
++	struct percpu_counter *dbc = &sbi->s_dirtyblocks_counter;
+ 
+-	free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter);
++	free_blocks  = percpu_counter_read_positive(fbc);
++	dirty_blocks = percpu_counter_read_positive(dbc);
+ 
+ 	if (!capable(CAP_SYS_RESOURCE) &&
+ 		sbi->s_resuid != current->fsuid &&
+ 		(sbi->s_resgid == 0 || !in_group_p(sbi->s_resgid)))
+ 		root_blocks = ext4_r_blocks_count(sbi->s_es);
+ 
+-	if (free_blocks - (nblocks + root_blocks) < EXT4_FREEBLOCKS_WATERMARK)
+-		free_blocks = percpu_counter_sum_positive(&sbi->s_freeblocks_counter);
+-
+-	if (free_blocks <= root_blocks)
++	if (free_blocks - (nblocks + root_blocks + dirty_blocks) <
++						EXT4_FREEBLOCKS_WATERMARK) {
++		free_blocks  = percpu_counter_sum_positive(fbc);
++		dirty_blocks = percpu_counter_sum_positive(dbc);
++	}
++	if (free_blocks <= (root_blocks + dirty_blocks))
+ 		/* we don't have free space */
+ 		return 0;
+-	if (free_blocks - root_blocks < nblocks)
++	if (free_blocks - (root_blocks + dirty_blocks) < nblocks)
+ 		return free_blocks - root_blocks;
+ 	return nblocks;
+ }
+@@ -2089,13 +2106,14 @@ allocated:
+ 	le16_add_cpu(&gdp->bg_free_blocks_count, -num);
+ 	gdp->bg_checksum = ext4_group_desc_csum(sbi, group_no, gdp);
+ 	spin_unlock(sb_bgl_lock(sbi, group_no));
+-	if (!EXT4_I(inode)->i_delalloc_reserved_flag && (*count != num)) {
+-		/*
+-		 * we allocated less blocks than we
+-		 * claimed. Add the difference back.
+-		 */
+-		percpu_counter_add(&sbi->s_freeblocks_counter, *count - num);
+-	}
++	percpu_counter_sub(&sbi->s_freeblocks_counter, num);
++	/*
++	 * Now reduce the dirty block count also. Should not go negative
++	 */
++	if (!EXT4_I(inode)->i_delalloc_reserved_flag)
++		percpu_counter_sub(&sbi->s_dirtyblocks_counter, *count);
++	else
++		percpu_counter_sub(&sbi->s_dirtyblocks_counter, num);
+ 	if (sbi->s_log_groups_per_flex) {
+ 		ext4_group_t flex_group = ext4_flex_group(sbi, group_no);
+ 		spin_lock(sb_bgl_lock(sbi, flex_group));
+--- a/fs/ext4/ext4_sb.h
++++ b/fs/ext4/ext4_sb.h
+@@ -60,6 +60,7 @@ struct ext4_sb_info {
+ 	struct percpu_counter s_freeblocks_counter;
+ 	struct percpu_counter s_freeinodes_counter;
+ 	struct percpu_counter s_dirs_counter;
++	struct percpu_counter s_dirtyblocks_counter;
+ 	struct blockgroup_lock s_blockgroup_lock;
+ 
+ 	/* root of the per fs reservation window tree */
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -1032,19 +1032,20 @@ static void ext4_da_update_reserve_space
+ 	BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks);
+ 	mdb_free = EXT4_I(inode)->i_reserved_meta_blocks - mdb;
+ 
+-	/* Account for allocated meta_blocks */
+-	mdb_free -= EXT4_I(inode)->i_allocated_meta_blocks;
+-
+-	/* update fs free blocks counter for truncate case */
+-	percpu_counter_add(&sbi->s_freeblocks_counter, mdb_free);
++	if (mdb_free) {
++		/* Account for allocated meta_blocks */
++		mdb_free -= EXT4_I(inode)->i_allocated_meta_blocks;
++
++		/* update fs dirty blocks counter */
++		percpu_counter_sub(&sbi->s_dirtyblocks_counter, mdb_free);
++		EXT4_I(inode)->i_allocated_meta_blocks = 0;
++		EXT4_I(inode)->i_reserved_meta_blocks = mdb;
++	}
+ 
+ 	/* update per-inode reservations */
+ 	BUG_ON(used  > EXT4_I(inode)->i_reserved_data_blocks);
+ 	EXT4_I(inode)->i_reserved_data_blocks -= used;
+ 
+-	BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks);
+-	EXT4_I(inode)->i_reserved_meta_blocks = mdb;
+-	EXT4_I(inode)->i_allocated_meta_blocks = 0;
+ 	spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
+ 
+ 	/*
+@@ -1609,8 +1610,8 @@ static void ext4_da_release_space(struct
+ 
+ 	release = to_free + mdb_free;
+ 
+-	/* update fs free blocks counter for truncate case */
+-	percpu_counter_add(&sbi->s_freeblocks_counter, release);
++	/* update fs dirty blocks counter for truncate case */
++	percpu_counter_sub(&sbi->s_dirtyblocks_counter, release);
+ 
+ 	/* update per-inode reservations */
+ 	BUG_ON(to_free > EXT4_I(inode)->i_reserved_data_blocks);
+@@ -2546,7 +2547,6 @@ static int ext4_da_write_begin(struct fi
+ 	index = pos >> PAGE_CACHE_SHIFT;
+ 	from = pos & (PAGE_CACHE_SIZE - 1);
+ 	to = from + len;
+-
+ retry:
+ 	/*
+ 	 * With delayed allocation, we don't log the i_disksize update
+--- a/fs/ext4/mballoc.c
++++ b/fs/ext4/mballoc.c
+@@ -3100,7 +3100,7 @@ void exit_ext4_mballoc(void)
+  */
+ static noinline_for_stack int
+ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
+-				handle_t *handle)
++				handle_t *handle, unsigned long reserv_blks)
+ {
+ 	struct buffer_head *bitmap_bh = NULL;
+ 	struct ext4_super_block *es;
+@@ -3188,21 +3188,16 @@ ext4_mb_mark_diskspace_used(struct ext4_
+ 	le16_add_cpu(&gdp->bg_free_blocks_count, -ac->ac_b_ex.fe_len);
+ 	gdp->bg_checksum = ext4_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp);
+ 	spin_unlock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group));
+-
++	percpu_counter_sub(&sbi->s_freeblocks_counter, ac->ac_b_ex.fe_len);
+ 	/*
+-	 * free blocks account has already be reduced/reserved
+-	 * at write_begin() time for delayed allocation
+-	 * do not double accounting
++	 * Now reduce the dirty block count also. Should not go negative
+ 	 */
+-	if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED) &&
+-			ac->ac_o_ex.fe_len != ac->ac_b_ex.fe_len) {
+-		/*
+-		 * we allocated less blocks than we calimed
+-		 * Add the difference back
+-		 */
+-		percpu_counter_add(&sbi->s_freeblocks_counter,
+-				ac->ac_o_ex.fe_len - ac->ac_b_ex.fe_len);
+-	}
++	if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED))
++		/* release all the reserved blocks if non delalloc */
++		percpu_counter_sub(&sbi->s_dirtyblocks_counter, reserv_blks);
++	else
++		percpu_counter_sub(&sbi->s_dirtyblocks_counter,
++						ac->ac_b_ex.fe_len);
+ 
+ 	if (sbi->s_log_groups_per_flex) {
+ 		ext4_group_t flex_group = ext4_flex_group(sbi,
+@@ -4636,12 +4631,13 @@ static int ext4_mb_discard_preallocation
+ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
+ 				 struct ext4_allocation_request *ar, int *errp)
+ {
++	int freed;
+ 	struct ext4_allocation_context *ac = NULL;
+ 	struct ext4_sb_info *sbi;
+ 	struct super_block *sb;
+ 	ext4_fsblk_t block = 0;
+-	int freed;
+-	int inquota;
++	unsigned long inquota;
++	unsigned long reserv_blks = 0;
+ 
+ 	sb = ar->inode->i_sb;
+ 	sbi = EXT4_SB(sb);
+@@ -4659,6 +4655,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t
+ 			*errp = -ENOSPC;
+ 			return 0;
+ 		}
++		reserv_blks = ar->len;
+ 	}
+ 	while (ar->len && DQUOT_ALLOC_BLOCK(ar->inode, ar->len)) {
+ 		ar->flags |= EXT4_MB_HINT_NOPREALLOC;
+@@ -4704,7 +4701,7 @@ repeat:
+ 			ext4_mb_new_preallocation(ac);
+ 	}
+ 	if (likely(ac->ac_status == AC_STATUS_FOUND)) {
+-		*errp = ext4_mb_mark_diskspace_used(ac, handle);
++		*errp = ext4_mb_mark_diskspace_used(ac, handle, reserv_blks);
+ 		if (*errp ==  -EAGAIN) {
+ 			/*
+ 			 * drop the reference that we took
+--- a/fs/ext4/super.c
++++ b/fs/ext4/super.c
+@@ -521,6 +521,7 @@ static void ext4_put_super(struct super_
+ 	percpu_counter_destroy(&sbi->s_freeblocks_counter);
+ 	percpu_counter_destroy(&sbi->s_freeinodes_counter);
+ 	percpu_counter_destroy(&sbi->s_dirs_counter);
++	percpu_counter_destroy(&sbi->s_dirtyblocks_counter);
+ 	brelse(sbi->s_sbh);
+ #ifdef CONFIG_QUOTA
+ 	for (i = 0; i < MAXQUOTAS; i++)
+@@ -2280,6 +2281,9 @@ static int ext4_fill_super(struct super_
+ 		err = percpu_counter_init(&sbi->s_dirs_counter,
+ 				ext4_count_dirs(sb));
+ 	}
++	if (!err) {
++		err = percpu_counter_init(&sbi->s_dirtyblocks_counter, 0);
++	}
+ 	if (err) {
+ 		printk(KERN_ERR "EXT4-fs: insufficient memory\n");
+ 		goto failed_mount3;
+@@ -2517,6 +2521,7 @@ failed_mount3:
+ 	percpu_counter_destroy(&sbi->s_freeblocks_counter);
+ 	percpu_counter_destroy(&sbi->s_freeinodes_counter);
+ 	percpu_counter_destroy(&sbi->s_dirs_counter);
++	percpu_counter_destroy(&sbi->s_dirtyblocks_counter);
+ failed_mount2:
+ 	for (i = 0; i < db_count; i++)
+ 		brelse(sbi->s_group_desc[i]);
+@@ -3208,7 +3213,8 @@ static int ext4_statfs(struct dentry *de
+ 	buf->f_type = EXT4_SUPER_MAGIC;
+ 	buf->f_bsize = sb->s_blocksize;
+ 	buf->f_blocks = ext4_blocks_count(es) - sbi->s_overhead_last;
+-	buf->f_bfree = percpu_counter_sum_positive(&sbi->s_freeblocks_counter);
++	buf->f_bfree = percpu_counter_sum_positive(&sbi->s_freeblocks_counter) -
++		       percpu_counter_sum_positive(&sbi->s_dirtyblocks_counter);
+ 	ext4_free_blocks_count_set(es, buf->f_bfree);
+ 	buf->f_bavail = buf->f_bfree - ext4_r_blocks_count(es);
+ 	if (buf->f_bfree < ext4_r_blocks_count(es))
diff --git a/queue-2.6.27/ext4-fix-file-fragmentation-during-large-file-write.patch b/queue-2.6.27/ext4-fix-file-fragmentation-during-large-file-write.patch
new file mode 100644
index 00000000000..00bd9778a92
--- /dev/null
+++ b/queue-2.6.27/ext4-fix-file-fragmentation-during-large-file-write.patch
@@ -0,0 +1,199 @@
+From tytso@mit.edu  Mon Apr 19 10:23:42 2010
+From: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+Date: Mon, 15 Mar 2010 20:26:04 -0400
+Subject: ext4: Fix file fragmentation during large file write.
+To: stable@kernel.org
+Cc: Ext4 Developers List <linux-ext4@vger.kernel.org>, "Theodore Ts'o" <tytso@mit.edu>, "Jayson R. King" <dev@jaysonking.com>, "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
+Message-ID: <1268699165-17461-11-git-send-email-tytso@mit.edu>
+
+
+From: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+
+commit 22208dedbd7626e5fc4339c417f8d24cc21f79d7 upstream.
+
+The range_cyclic writeback mode uses the address_space writeback_index
+as the start index for writeback.  With delayed allocation we were
+updating writeback_index wrongly resulting in highly fragmented file.
+This patch reduces the number of extents reduced from 4000 to 27 for a
+3GB file.
+
+Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+[dev@jaysonking.com: Some changed lines from the original version of this patch were dropped, since they were rolled up with another cherry-picked patch applied to 2.6.27.y earlier.]
+Signed-off-by: Jayson R. King <dev@jaysonking.com>
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/ext4/inode.c |   88 +++++++++++++++++++++++++++++++++++---------------------
+ 1 file changed, 55 insertions(+), 33 deletions(-)
+
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -1721,7 +1721,11 @@ static int mpage_da_submit_io(struct mpa
+ 
+ 			pages_skipped = mpd->wbc->pages_skipped;
+ 			err = mapping->a_ops->writepage(page, mpd->wbc);
+-			if (!err)
++			if (!err && (pages_skipped == mpd->wbc->pages_skipped))
++				/*
++				 * have successfully written the page
++				 * without skipping the same
++				 */
+ 				mpd->pages_written++;
+ 			/*
+ 			 * In error case, we have to continue because
+@@ -2175,7 +2179,6 @@ static int mpage_da_writepages(struct ad
+ 			       struct writeback_control *wbc,
+ 			       struct mpage_da_data *mpd)
+ {
+-	long to_write;
+ 	int ret;
+ 
+ 	if (!mpd->get_block)
+@@ -2190,19 +2193,18 @@ static int mpage_da_writepages(struct ad
+ 	mpd->pages_written = 0;
+ 	mpd->retval = 0;
+ 
+-	to_write = wbc->nr_to_write;
+-
+ 	ret = write_cache_pages(mapping, wbc, __mpage_da_writepage, mpd);
+-
+ 	/*
+ 	 * Handle last extent of pages
+ 	 */
+ 	if (!mpd->io_done && mpd->next_page != mpd->first_page) {
+ 		if (mpage_da_map_blocks(mpd) == 0)
+ 			mpage_da_submit_io(mpd);
+-	}
+ 
+-	wbc->nr_to_write = to_write - mpd->pages_written;
++		mpd->io_done = 1;
++		ret = MPAGE_DA_EXTENT_TAIL;
++	}
++	wbc->nr_to_write -= mpd->pages_written;
+ 	return ret;
+ }
+ 
+@@ -2447,11 +2449,14 @@ static int ext4_da_writepages_trans_bloc
+ static int ext4_da_writepages(struct address_space *mapping,
+ 			      struct writeback_control *wbc)
+ {
++	pgoff_t	index;
++	int range_whole = 0;
+ 	handle_t *handle = NULL;
+ 	struct mpage_da_data mpd;
+ 	struct inode *inode = mapping->host;
++	int no_nrwrite_index_update;
++	long pages_written = 0, pages_skipped;
+ 	int needed_blocks, ret = 0, nr_to_writebump = 0;
+-	long to_write, pages_skipped = 0;
+ 	struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb);
+ 
+ 	/*
+@@ -2485,16 +2490,26 @@ static int ext4_da_writepages(struct add
+ 		nr_to_writebump = sbi->s_mb_stream_request - wbc->nr_to_write;
+ 		wbc->nr_to_write = sbi->s_mb_stream_request;
+ 	}
++	if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
++		range_whole = 1;
+ 
+-
+-	pages_skipped = wbc->pages_skipped;
++	if (wbc->range_cyclic)
++		index = mapping->writeback_index;
++	else
++		index = wbc->range_start >> PAGE_CACHE_SHIFT;
+ 
+ 	mpd.wbc = wbc;
+ 	mpd.inode = mapping->host;
+ 
+-restart_loop:
+-	to_write = wbc->nr_to_write;
+-	while (!ret && to_write > 0) {
++	/*
++	 * we don't want write_cache_pages to update
++	 * nr_to_write and writeback_index
++	 */
++	no_nrwrite_index_update = wbc->no_nrwrite_index_update;
++	wbc->no_nrwrite_index_update = 1;
++	pages_skipped = wbc->pages_skipped;
++
++	while (!ret && wbc->nr_to_write > 0) {
+ 
+ 		/*
+ 		 * we  insert one extent at a time. So we need
+@@ -2527,46 +2542,53 @@ restart_loop:
+ 				goto out_writepages;
+ 			}
+ 		}
+-		to_write -= wbc->nr_to_write;
+-
+ 		mpd.get_block = ext4_da_get_block_write;
+ 		ret = mpage_da_writepages(mapping, wbc, &mpd);
+ 
+ 		ext4_journal_stop(handle);
+ 
+-		if (mpd.retval == -ENOSPC)
++		if (mpd.retval == -ENOSPC) {
++			/* commit the transaction which would
++			 * free blocks released in the transaction
++			 * and try again
++			 */
+ 			jbd2_journal_force_commit_nested(sbi->s_journal);
+-
+-		/* reset the retry count */
+-		if (ret == MPAGE_DA_EXTENT_TAIL) {
++			wbc->pages_skipped = pages_skipped;
++			ret = 0;
++		} else if (ret == MPAGE_DA_EXTENT_TAIL) {
+ 			/*
+ 			 * got one extent now try with
+ 			 * rest of the pages
+ 			 */
+-			to_write += wbc->nr_to_write;
++			pages_written += mpd.pages_written;
++			wbc->pages_skipped = pages_skipped;
+ 			ret = 0;
+-		} else if (wbc->nr_to_write) {
++		} else if (wbc->nr_to_write)
+ 			/*
+ 			 * There is no more writeout needed
+ 			 * or we requested for a noblocking writeout
+ 			 * and we found the device congested
+ 			 */
+-			to_write += wbc->nr_to_write;
+ 			break;
+-		}
+-		wbc->nr_to_write = to_write;
+-	}
+-
+-	if (!wbc->range_cyclic && (pages_skipped != wbc->pages_skipped)) {
+-		/* We skipped pages in this loop */
+-		wbc->nr_to_write = to_write +
+-				wbc->pages_skipped - pages_skipped;
+-		wbc->pages_skipped = pages_skipped;
+-		goto restart_loop;
+ 	}
++	if (pages_skipped != wbc->pages_skipped)
++		printk(KERN_EMERG "This should not happen leaving %s "
++				"with nr_to_write = %ld ret = %d\n",
++				__func__, wbc->nr_to_write, ret);
++
++	/* Update index */
++	index += pages_written;
++	if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
++		/*
++		 * set the writeback_index so that range_cyclic
++		 * mode will write it back later
++		 */
++		mapping->writeback_index = index;
+ 
+ out_writepages:
+-	wbc->nr_to_write = to_write - nr_to_writebump;
++	if (!no_nrwrite_index_update)
++		wbc->no_nrwrite_index_update = 0;
++	wbc->nr_to_write -= nr_to_writebump;
+ 	return ret;
+ }
+ 
diff --git a/queue-2.6.27/ext4-implement-range_cyclic-in-ext4_da_writepages-instead-of-write_cache_pages.patch b/queue-2.6.27/ext4-implement-range_cyclic-in-ext4_da_writepages-instead-of-write_cache_pages.patch
new file mode 100644
index 00000000000..e3e06329c3d
--- /dev/null
+++ b/queue-2.6.27/ext4-implement-range_cyclic-in-ext4_da_writepages-instead-of-write_cache_pages.patch
@@ -0,0 +1,103 @@
+From tytso@mit.edu  Mon Apr 19 10:24:03 2010
+From: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+Date: Mon, 15 Mar 2010 20:26:05 -0400
+Subject: ext4: Implement range_cyclic in ext4_da_writepages instead of write_cache_pages
+To: stable@kernel.org
+Cc: Ext4 Developers List <linux-ext4@vger.kernel.org>, "Theodore Ts'o" <tytso@mit.edu>, "Jayson R. King" <dev@jaysonking.com>, "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
+Message-ID: <1268699165-17461-12-git-send-email-tytso@mit.edu>
+
+
+From: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+
+commit 2acf2c261b823d9d9ed954f348b97620297a36b5 upstream.
+
+With delayed allocation we lock the page in write_cache_pages() and
+try to build an in memory extent of contiguous blocks.  This is needed
+so that we can get large contiguous blocks request.  If range_cyclic
+mode is enabled, write_cache_pages() will loop back to the 0 index if
+no I/O has been done yet, and try to start writing from the beginning
+of the range.  That causes an attempt to take the page lock of lower
+index page while holding the page lock of higher index page, which can
+cause a dead lock with another writeback thread.
+
+The solution is to implement the range_cyclic behavior in
+ext4_da_writepages() instead.
+
+http://bugzilla.kernel.org/show_bug.cgi?id=12579
+
+Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Jayson R. King <dev@jaysonking.com>
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/ext4/inode.c |   21 +++++++++++++++++++--
+ 1 file changed, 19 insertions(+), 2 deletions(-)
+
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -2456,6 +2456,7 @@ static int ext4_da_writepages(struct add
+ 	struct inode *inode = mapping->host;
+ 	int no_nrwrite_index_update;
+ 	long pages_written = 0, pages_skipped;
++	int range_cyclic, cycled = 1, io_done = 0;
+ 	int needed_blocks, ret = 0, nr_to_writebump = 0;
+ 	struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb);
+ 
+@@ -2493,9 +2494,15 @@ static int ext4_da_writepages(struct add
+ 	if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
+ 		range_whole = 1;
+ 
+-	if (wbc->range_cyclic)
++	range_cyclic = wbc->range_cyclic;
++	if (wbc->range_cyclic) {
+ 		index = mapping->writeback_index;
+-	else
++		if (index)
++			cycled = 0;
++		wbc->range_start = index << PAGE_CACHE_SHIFT;
++		wbc->range_end  = LLONG_MAX;
++		wbc->range_cyclic = 0;
++	} else
+ 		index = wbc->range_start >> PAGE_CACHE_SHIFT;
+ 
+ 	mpd.wbc = wbc;
+@@ -2509,6 +2516,7 @@ static int ext4_da_writepages(struct add
+ 	wbc->no_nrwrite_index_update = 1;
+ 	pages_skipped = wbc->pages_skipped;
+ 
++retry:
+ 	while (!ret && wbc->nr_to_write > 0) {
+ 
+ 		/*
+@@ -2563,6 +2571,7 @@ static int ext4_da_writepages(struct add
+ 			pages_written += mpd.pages_written;
+ 			wbc->pages_skipped = pages_skipped;
+ 			ret = 0;
++			io_done = 1;
+ 		} else if (wbc->nr_to_write)
+ 			/*
+ 			 * There is no more writeout needed
+@@ -2571,6 +2580,13 @@ static int ext4_da_writepages(struct add
+ 			 */
+ 			break;
+ 	}
++	if (!io_done && !cycled) {
++		cycled = 1;
++		index = 0;
++		wbc->range_start = index << PAGE_CACHE_SHIFT;
++		wbc->range_end  = mapping->writeback_index - 1;
++		goto retry;
++	}
+ 	if (pages_skipped != wbc->pages_skipped)
+ 		printk(KERN_EMERG "This should not happen leaving %s "
+ 				"with nr_to_write = %ld ret = %d\n",
+@@ -2578,6 +2594,7 @@ static int ext4_da_writepages(struct add
+ 
+ 	/* Update index */
+ 	index += pages_written;
++	wbc->range_cyclic = range_cyclic;
+ 	if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
+ 		/*
+ 		 * set the writeback_index so that range_cyclic
diff --git a/queue-2.6.27/ext4-invalidate-pages-if-delalloc-block-allocation-fails.patch b/queue-2.6.27/ext4-invalidate-pages-if-delalloc-block-allocation-fails.patch
new file mode 100644
index 00000000000..33b971309ab
--- /dev/null
+++ b/queue-2.6.27/ext4-invalidate-pages-if-delalloc-block-allocation-fails.patch
@@ -0,0 +1,171 @@
+From tytso@mit.edu  Mon Apr 19 10:19:40 2010
+From: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+Date: Mon, 15 Mar 2010 20:25:55 -0400
+Subject: ext4: invalidate pages if delalloc block allocation fails.
+To: stable@kernel.org
+Cc: Ext4 Developers List <linux-ext4@vger.kernel.org>, "Theodore Ts'o" <tytso@mit.edu>, "Jayson R. King" <dev@jaysonking.com>, "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
+Message-ID: <1268699165-17461-2-git-send-email-tytso@mit.edu>
+
+
+From: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+
+commit c4a0c46ec92c194c873232b88debce4e1a448483 upstream.
+
+We are a bit agressive in invalidating all the pages. But
+it is ok because we really don't know why the block allocation
+failed and it is better to come of the writeback path
+so that user can look for more info.
+
+Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+Signed-off-by: Jayson R. King <dev@jaysonking.com>
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/ext4/inode.c |   85 ++++++++++++++++++++++++++++++++++++++++++++++++--------
+ 1 file changed, 73 insertions(+), 12 deletions(-)
+
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -1821,6 +1821,39 @@ static inline void __unmap_underlying_bl
+ 		unmap_underlying_metadata(bdev, bh->b_blocknr + i);
+ }
+ 
++static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd,
++					sector_t logical, long blk_cnt)
++{
++	int nr_pages, i;
++	pgoff_t index, end;
++	struct pagevec pvec;
++	struct inode *inode = mpd->inode;
++	struct address_space *mapping = inode->i_mapping;
++
++	index = logical >> (PAGE_CACHE_SHIFT - inode->i_blkbits);
++	end   = (logical + blk_cnt - 1) >>
++				(PAGE_CACHE_SHIFT - inode->i_blkbits);
++	while (index <= end) {
++		nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE);
++		if (nr_pages == 0)
++			break;
++		for (i = 0; i < nr_pages; i++) {
++			struct page *page = pvec.pages[i];
++			index = page->index;
++			if (index > end)
++				break;
++			index++;
++
++			BUG_ON(!PageLocked(page));
++			BUG_ON(PageWriteback(page));
++			block_invalidatepage(page, 0);
++			ClearPageUptodate(page);
++			unlock_page(page);
++		}
++	}
++	return;
++}
++
+ /*
+  * mpage_da_map_blocks - go through given space
+  *
+@@ -1830,7 +1863,7 @@ static inline void __unmap_underlying_bl
+  * The function skips space we know is already mapped to disk blocks.
+  *
+  */
+-static void mpage_da_map_blocks(struct mpage_da_data *mpd)
++static int  mpage_da_map_blocks(struct mpage_da_data *mpd)
+ {
+ 	int err = 0;
+ 	struct buffer_head *lbh = &mpd->lbh;
+@@ -1841,7 +1874,7 @@ static void mpage_da_map_blocks(struct m
+ 	 * We consider only non-mapped and non-allocated blocks
+ 	 */
+ 	if (buffer_mapped(lbh) && !buffer_delay(lbh))
+-		return;
++		return 0;
+ 
+ 	new.b_state = lbh->b_state;
+ 	new.b_blocknr = 0;
+@@ -1852,10 +1885,38 @@ static void mpage_da_map_blocks(struct m
+ 	 * to write simply return
+ 	 */
+ 	if (!new.b_size)
+-		return;
++		return 0;
+ 	err = mpd->get_block(mpd->inode, next, &new, 1);
+-	if (err)
+-		return;
++	if (err) {
++
++		/* If get block returns with error
++		 * we simply return. Later writepage
++		 * will redirty the page and writepages
++		 * will find the dirty page again
++		 */
++		if (err == -EAGAIN)
++			return 0;
++		/*
++		 * get block failure will cause us
++		 * to loop in writepages. Because
++		 * a_ops->writepage won't be able to
++		 * make progress. The page will be redirtied
++		 * by writepage and writepages will again
++		 * try to write the same.
++		 */
++		printk(KERN_EMERG "%s block allocation failed for inode %lu "
++				  "at logical offset %llu with max blocks "
++				  "%zd with error %d\n",
++				  __func__, mpd->inode->i_ino,
++				  (unsigned long long)next,
++				  lbh->b_size >> mpd->inode->i_blkbits, err);
++		printk(KERN_EMERG "This should not happen.!! "
++					"Data will be lost\n");
++		/* invlaidate all the pages */
++		ext4_da_block_invalidatepages(mpd, next,
++				lbh->b_size >> mpd->inode->i_blkbits);
++		return err;
++	}
+ 	BUG_ON(new.b_size == 0);
+ 
+ 	if (buffer_new(&new))
+@@ -1868,7 +1929,7 @@ static void mpage_da_map_blocks(struct m
+ 	if (buffer_delay(lbh) || buffer_unwritten(lbh))
+ 		mpage_put_bnr_to_bhs(mpd, next, &new);
+ 
+-	return;
++	return 0;
+ }
+ 
+ #define BH_FLAGS ((1 << BH_Uptodate) | (1 << BH_Mapped) | \
+@@ -1937,8 +1998,8 @@ flush_it:
+ 	 * We couldn't merge the block to our extent, so we
+ 	 * need to flush current  extent and start new one
+ 	 */
+-	mpage_da_map_blocks(mpd);
+-	mpage_da_submit_io(mpd);
++	if (mpage_da_map_blocks(mpd) == 0)
++		mpage_da_submit_io(mpd);
+ 	mpd->io_done = 1;
+ 	return;
+ }
+@@ -1980,8 +2041,8 @@ static int __mpage_da_writepage(struct p
+ 		 * and start IO on them using writepage()
+ 		 */
+ 		if (mpd->next_page != mpd->first_page) {
+-			mpage_da_map_blocks(mpd);
+-			mpage_da_submit_io(mpd);
++			if (mpage_da_map_blocks(mpd) == 0)
++				mpage_da_submit_io(mpd);
+ 			/*
+ 			 * skip rest of the page in the page_vec
+ 			 */
+@@ -2102,8 +2163,8 @@ static int mpage_da_writepages(struct ad
+ 	 * Handle last extent of pages
+ 	 */
+ 	if (!mpd.io_done && mpd.next_page != mpd.first_page) {
+-		mpage_da_map_blocks(&mpd);
+-		mpage_da_submit_io(&mpd);
++		if (mpage_da_map_blocks(&mpd) == 0)
++			mpage_da_submit_io(&mpd);
+ 	}
+ 
+ 	wbc->nr_to_write = to_write - mpd.pages_written;
diff --git a/queue-2.6.27/ext4-make-sure-all-the-block-allocation-paths-reserve-blocks.patch b/queue-2.6.27/ext4-make-sure-all-the-block-allocation-paths-reserve-blocks.patch
new file mode 100644
index 00000000000..56313d64fb2
--- /dev/null
+++ b/queue-2.6.27/ext4-make-sure-all-the-block-allocation-paths-reserve-blocks.patch
@@ -0,0 +1,218 @@
+From tytso@mit.edu  Mon Apr 19 10:20:41 2010
+From: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+Date: Mon, 15 Mar 2010 20:25:57 -0400
+Subject: ext4: Make sure all the block allocation paths reserve blocks
+To: stable@kernel.org
+Cc: Ext4 Developers List <linux-ext4@vger.kernel.org>, "Theodore Ts'o" <tytso@mit.edu>, "Jayson R. King" <dev@jaysonking.com>, "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
+Message-ID: <1268699165-17461-4-git-send-email-tytso@mit.edu>
+
+
+From: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+
+commit a30d542a0035b886ffaafd0057ced0a2b28c3a4f upstream.
+
+With delayed allocation we need to make sure block are reserved before
+we attempt to allocate them. Otherwise we get block allocation failure
+(ENOSPC) during writepages which cannot be handled. This would mean
+silent data loss (We do a printk stating data will be lost). This patch
+updates the DIO and fallocate code path to do block reservation before
+block allocation. This is needed to make sure parallel DIO and fallocate
+request doesn't take block out of delayed reserve space.
+
+When free blocks count go below a threshold we switch to a slow patch
+which looks at other CPU's accumulated percpu counter values.
+
+Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Jayson R. King <dev@jaysonking.com>
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/ext4/balloc.c  |   58 +++++++++++++++++++++++++++++++++++++++---------------
+ fs/ext4/ext4.h    |   13 ++++++++++++
+ fs/ext4/inode.c   |    5 ----
+ fs/ext4/mballoc.c |   23 ++++++++++++---------
+ 4 files changed, 69 insertions(+), 30 deletions(-)
+
+--- a/fs/ext4/balloc.c
++++ b/fs/ext4/balloc.c
+@@ -1754,6 +1754,32 @@ out:
+ 	return ret;
+ }
+ 
++int ext4_claim_free_blocks(struct ext4_sb_info *sbi,
++						ext4_fsblk_t nblocks)
++{
++	s64 free_blocks;
++	ext4_fsblk_t root_blocks = 0;
++	struct percpu_counter *fbc = &sbi->s_freeblocks_counter;
++
++	free_blocks = percpu_counter_read(fbc);
++
++	if (!capable(CAP_SYS_RESOURCE) &&
++		sbi->s_resuid != current->fsuid &&
++		(sbi->s_resgid == 0 || !in_group_p(sbi->s_resgid)))
++		root_blocks = ext4_r_blocks_count(sbi->s_es);
++
++	if (free_blocks - (nblocks + root_blocks) < EXT4_FREEBLOCKS_WATERMARK)
++		free_blocks = percpu_counter_sum(&sbi->s_freeblocks_counter);
++
++	if (free_blocks < (root_blocks + nblocks))
++		/* we don't have free space */
++		return -ENOSPC;
++
++	/* reduce fs free blocks counter */
++	percpu_counter_sub(fbc, nblocks);
++	return 0;
++}
++
+ /**
+  * ext4_has_free_blocks()
+  * @sbi:	in-core super block structure.
+@@ -1775,18 +1801,17 @@ ext4_fsblk_t ext4_has_free_blocks(struct
+ 		sbi->s_resuid != current->fsuid &&
+ 		(sbi->s_resgid == 0 || !in_group_p(sbi->s_resgid)))
+ 		root_blocks = ext4_r_blocks_count(sbi->s_es);
+-#ifdef CONFIG_SMP
+-	if (free_blocks - root_blocks < FBC_BATCH)
+-		free_blocks =
+-			percpu_counter_sum(&sbi->s_freeblocks_counter);
+-#endif
++
++	if (free_blocks - (nblocks + root_blocks) < EXT4_FREEBLOCKS_WATERMARK)
++		free_blocks = percpu_counter_sum_positive(&sbi->s_freeblocks_counter);
++
+ 	if (free_blocks <= root_blocks)
+ 		/* we don't have free space */
+ 		return 0;
+ 	if (free_blocks - root_blocks < nblocks)
+ 		return free_blocks - root_blocks;
+ 	return nblocks;
+- }
++}
+ 
+ 
+ /**
+@@ -1865,14 +1890,11 @@ ext4_fsblk_t ext4_old_new_blocks(handle_
+ 		/*
+ 		 * With delalloc we already reserved the blocks
+ 		 */
+-		*count = ext4_has_free_blocks(sbi, *count);
+-	}
+-	if (*count == 0) {
+-		*errp = -ENOSPC;
+-		return 0;	/*return with ENOSPC error */
++		if (ext4_claim_free_blocks(sbi, *count)) {
++			*errp = -ENOSPC;
++			return 0;	/*return with ENOSPC error */
++		}
+ 	}
+-	num = *count;
+-
+ 	/*
+ 	 * Check quota for allocation of this block.
+ 	 */
+@@ -2067,9 +2089,13 @@ allocated:
+ 	le16_add_cpu(&gdp->bg_free_blocks_count, -num);
+ 	gdp->bg_checksum = ext4_group_desc_csum(sbi, group_no, gdp);
+ 	spin_unlock(sb_bgl_lock(sbi, group_no));
+-	if (!EXT4_I(inode)->i_delalloc_reserved_flag)
+-		percpu_counter_sub(&sbi->s_freeblocks_counter, num);
+-
++	if (!EXT4_I(inode)->i_delalloc_reserved_flag && (*count != num)) {
++		/*
++		 * we allocated less blocks than we
++		 * claimed. Add the difference back.
++		 */
++		percpu_counter_add(&sbi->s_freeblocks_counter, *count - num);
++	}
+ 	if (sbi->s_log_groups_per_flex) {
+ 		ext4_group_t flex_group = ext4_flex_group(sbi, group_no);
+ 		spin_lock(sb_bgl_lock(sbi, flex_group));
+--- a/fs/ext4/ext4.h
++++ b/fs/ext4/ext4.h
+@@ -1015,6 +1015,8 @@ extern ext4_fsblk_t ext4_new_blocks(hand
+ 					unsigned long *count, int *errp);
+ extern ext4_fsblk_t ext4_old_new_blocks(handle_t *handle, struct inode *inode,
+ 			ext4_fsblk_t goal, unsigned long *count, int *errp);
++extern int ext4_claim_free_blocks(struct ext4_sb_info *sbi,
++						ext4_fsblk_t nblocks);
+ extern ext4_fsblk_t ext4_has_free_blocks(struct ext4_sb_info *sbi,
+ 						ext4_fsblk_t nblocks);
+ extern void ext4_free_blocks (handle_t *handle, struct inode *inode,
+@@ -1245,6 +1247,17 @@ do {								\
+ 		__ext4_std_error((sb), __func__, (errno));	\
+ } while (0)
+ 
++#ifdef CONFIG_SMP
++/* Each CPU can accumulate FBC_BATCH blocks in their local
++ * counters. So we need to make sure we have free blocks more
++ * than FBC_BATCH  * nr_cpu_ids. Also add a window of 4 times.
++ */
++#define EXT4_FREEBLOCKS_WATERMARK (4 * (FBC_BATCH * nr_cpu_ids))
++#else
++#define EXT4_FREEBLOCKS_WATERMARK 0
++#endif
++
++
+ /*
+  * Inodes and files operations
+  */
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -1564,13 +1564,10 @@ static int ext4_da_reserve_space(struct
+ 	md_needed = mdblocks - EXT4_I(inode)->i_reserved_meta_blocks;
+ 	total = md_needed + nrblocks;
+ 
+-	if (ext4_has_free_blocks(sbi, total) < total) {
++	if (ext4_claim_free_blocks(sbi, total)) {
+ 		spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
+ 		return -ENOSPC;
+ 	}
+-	/* reduce fs free blocks counter */
+-	percpu_counter_sub(&sbi->s_freeblocks_counter, total);
+-
+ 	EXT4_I(inode)->i_reserved_data_blocks += nrblocks;
+ 	EXT4_I(inode)->i_reserved_meta_blocks = mdblocks;
+ 
+--- a/fs/ext4/mballoc.c
++++ b/fs/ext4/mballoc.c
+@@ -3194,9 +3194,15 @@ ext4_mb_mark_diskspace_used(struct ext4_
+ 	 * at write_begin() time for delayed allocation
+ 	 * do not double accounting
+ 	 */
+-	if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED))
+-		percpu_counter_sub(&sbi->s_freeblocks_counter,
+-					ac->ac_b_ex.fe_len);
++	if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED) &&
++			ac->ac_o_ex.fe_len != ac->ac_b_ex.fe_len) {
++		/*
++		 * we allocated less blocks than we calimed
++		 * Add the difference back
++		 */
++		percpu_counter_add(&sbi->s_freeblocks_counter,
++				ac->ac_o_ex.fe_len - ac->ac_b_ex.fe_len);
++	}
+ 
+ 	if (sbi->s_log_groups_per_flex) {
+ 		ext4_group_t flex_group = ext4_flex_group(sbi,
+@@ -4649,14 +4655,11 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t
+ 		/*
+ 		 * With delalloc we already reserved the blocks
+ 		 */
+-		ar->len = ext4_has_free_blocks(sbi, ar->len);
+-	}
+-
+-	if (ar->len == 0) {
+-		*errp = -ENOSPC;
+-		return 0;
++		if (ext4_claim_free_blocks(sbi, ar->len)) {
++			*errp = -ENOSPC;
++			return 0;
++		}
+ 	}
+-
+ 	while (ar->len && DQUOT_ALLOC_BLOCK(ar->inode, ar->len)) {
+ 		ar->flags |= EXT4_MB_HINT_NOPREALLOC;
+ 		ar->len--;
diff --git a/queue-2.6.27/ext4-retry-block-allocation-if-we-have-free-blocks-left.patch b/queue-2.6.27/ext4-retry-block-allocation-if-we-have-free-blocks-left.patch
new file mode 100644
index 00000000000..76a9962be1f
--- /dev/null
+++ b/queue-2.6.27/ext4-retry-block-allocation-if-we-have-free-blocks-left.patch
@@ -0,0 +1,200 @@
+From tytso@mit.edu  Mon Apr 19 10:22:08 2010
+From: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+Date: Mon, 15 Mar 2010 20:26:00 -0400
+Subject: ext4: Retry block allocation if we have free blocks left
+To: stable@kernel.org
+Cc: "Theodore Ts'o" <tytso@mit.edu>, Ext4 Developers List <linux-ext4@vger.kernel.org>, Mingming Cao <cmm@us.ibm.com>, "Jayson R. King" <dev@jaysonking.com>, "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
+Message-ID: <1268699165-17461-7-git-send-email-tytso@mit.edu>
+
+
+From: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+
+commit df22291ff0fde0d350cf15dac3e5cc33ac528875 upstream.
+
+When we truncate files, the meta-data blocks released are not reused
+untill we commit the truncate transaction.  That means delayed get_block
+request will return ENOSPC even if we have free blocks left.  Force a
+journal commit and retry block allocation if we get ENOSPC with free
+blocks left.
+
+Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+Signed-off-by: Mingming Cao <cmm@us.ibm.com>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Jayson R. King <dev@jaysonking.com>
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/ext4/inode.c |   81 +++++++++++++++++++++++++++++++++++++++-----------------
+ 1 file changed, 57 insertions(+), 24 deletions(-)
+
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -1661,6 +1661,7 @@ struct mpage_da_data {
+ 	struct writeback_control *wbc;
+ 	int io_done;
+ 	long pages_written;
++	int retval;
+ };
+ 
+ /*
+@@ -1858,6 +1859,24 @@ static void ext4_da_block_invalidatepage
+ 	return;
+ }
+ 
++static void ext4_print_free_blocks(struct inode *inode)
++{
++	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
++	printk(KERN_EMERG "Total free blocks count %lld\n",
++			ext4_count_free_blocks(inode->i_sb));
++	printk(KERN_EMERG "Free/Dirty block details\n");
++	printk(KERN_EMERG "free_blocks=%lld\n",
++			percpu_counter_sum(&sbi->s_freeblocks_counter));
++	printk(KERN_EMERG "dirty_blocks=%lld\n",
++			percpu_counter_sum(&sbi->s_dirtyblocks_counter));
++	printk(KERN_EMERG "Block reservation details\n");
++	printk(KERN_EMERG "i_reserved_data_blocks=%lu\n",
++			EXT4_I(inode)->i_reserved_data_blocks);
++	printk(KERN_EMERG "i_reserved_meta_blocks=%lu\n",
++			EXT4_I(inode)->i_reserved_meta_blocks);
++	return;
++}
++
+ /*
+  * mpage_da_map_blocks - go through given space
+  *
+@@ -1872,7 +1891,7 @@ static int  mpage_da_map_blocks(struct m
+ 	int err = 0;
+ 	struct buffer_head new;
+ 	struct buffer_head *lbh = &mpd->lbh;
+-	sector_t next = lbh->b_blocknr;
++	sector_t next;
+ 
+ 	/*
+ 	 * We consider only non-mapped and non-allocated blocks
+@@ -1882,6 +1901,7 @@ static int  mpage_da_map_blocks(struct m
+ 	new.b_state = lbh->b_state;
+ 	new.b_blocknr = 0;
+ 	new.b_size = lbh->b_size;
++	next = lbh->b_blocknr;
+ 	/*
+ 	 * If we didn't accumulate anything
+ 	 * to write simply return
+@@ -1898,6 +1918,13 @@ static int  mpage_da_map_blocks(struct m
+ 		 */
+ 		if (err == -EAGAIN)
+ 			return 0;
++
++		if (err == -ENOSPC &&
++				ext4_count_free_blocks(mpd->inode->i_sb)) {
++			mpd->retval = err;
++			return 0;
++		}
++
+ 		/*
+ 		 * get block failure will cause us
+ 		 * to loop in writepages. Because
+@@ -1915,8 +1942,7 @@ static int  mpage_da_map_blocks(struct m
+ 		printk(KERN_EMERG "This should not happen.!! "
+ 					"Data will be lost\n");
+ 		if (err == -ENOSPC) {
+-			printk(KERN_CRIT "Total free blocks count %lld\n",
+-				ext4_count_free_blocks(mpd->inode->i_sb));
++			ext4_print_free_blocks(mpd->inode);
+ 		}
+ 		/* invlaidate all the pages */
+ 		ext4_da_block_invalidatepages(mpd, next,
+@@ -2141,39 +2167,36 @@ static int __mpage_da_writepage(struct p
+  */
+ static int mpage_da_writepages(struct address_space *mapping,
+ 			       struct writeback_control *wbc,
+-			       get_block_t get_block)
++			       struct mpage_da_data *mpd)
+ {
+-	struct mpage_da_data mpd;
+ 	long to_write;
+ 	int ret;
+ 
+-	if (!get_block)
++	if (!mpd->get_block)
+ 		return generic_writepages(mapping, wbc);
+ 
+-	mpd.wbc = wbc;
+-	mpd.inode = mapping->host;
+-	mpd.lbh.b_size = 0;
+-	mpd.lbh.b_state = 0;
+-	mpd.lbh.b_blocknr = 0;
+-	mpd.first_page = 0;
+-	mpd.next_page = 0;
+-	mpd.get_block = get_block;
+-	mpd.io_done = 0;
+-	mpd.pages_written = 0;
++	mpd->lbh.b_size = 0;
++	mpd->lbh.b_state = 0;
++	mpd->lbh.b_blocknr = 0;
++	mpd->first_page = 0;
++	mpd->next_page = 0;
++	mpd->io_done = 0;
++	mpd->pages_written = 0;
++	mpd->retval = 0;
+ 
+ 	to_write = wbc->nr_to_write;
+ 
+-	ret = write_cache_pages(mapping, wbc, __mpage_da_writepage, &mpd);
++	ret = write_cache_pages(mapping, wbc, __mpage_da_writepage, mpd);
+ 
+ 	/*
+ 	 * Handle last extent of pages
+ 	 */
+-	if (!mpd.io_done && mpd.next_page != mpd.first_page) {
+-		if (mpage_da_map_blocks(&mpd) == 0)
+-			mpage_da_submit_io(&mpd);
++	if (!mpd->io_done && mpd->next_page != mpd->first_page) {
++		if (mpage_da_map_blocks(mpd) == 0)
++			mpage_da_submit_io(mpd);
+ 	}
+ 
+-	wbc->nr_to_write = to_write - mpd.pages_written;
++	wbc->nr_to_write = to_write - mpd->pages_written;
+ 	return ret;
+ }
+ 
+@@ -2420,6 +2443,7 @@ static int ext4_da_writepages(struct add
+ {
+ 	handle_t *handle = NULL;
+ 	loff_t range_start = 0;
++	struct mpage_da_data mpd;
+ 	struct inode *inode = mapping->host;
+ 	int needed_blocks, ret = 0, nr_to_writebump = 0;
+ 	long to_write, pages_skipped = 0;
+@@ -2467,6 +2491,9 @@ static int ext4_da_writepages(struct add
+ 	range_start =  wbc->range_start;
+ 	pages_skipped = wbc->pages_skipped;
+ 
++	mpd.wbc = wbc;
++	mpd.inode = mapping->host;
++
+ restart_loop:
+ 	to_write = wbc->nr_to_write;
+ 	while (!ret && to_write > 0) {
+@@ -2502,11 +2529,17 @@ restart_loop:
+ 				goto out_writepages;
+ 			}
+ 		}
+-
+ 		to_write -= wbc->nr_to_write;
+-		ret = mpage_da_writepages(mapping, wbc,
+-					  ext4_da_get_block_write);
++
++		mpd.get_block = ext4_da_get_block_write;
++		ret = mpage_da_writepages(mapping, wbc, &mpd);
++
+ 		ext4_journal_stop(handle);
++
++		if (mpd.retval == -ENOSPC)
++			jbd2_journal_force_commit_nested(sbi->s_journal);
++
++		/* reset the retry count */
+ 		if (ret == MPAGE_DA_EXTENT_TAIL) {
+ 			/*
+ 			 * got one extent now try with
diff --git a/queue-2.6.27/ext4-retry-block-reservation.patch b/queue-2.6.27/ext4-retry-block-reservation.patch
new file mode 100644
index 00000000000..d4e3cde902d
--- /dev/null
+++ b/queue-2.6.27/ext4-retry-block-reservation.patch
@@ -0,0 +1,131 @@
+From tytso@mit.edu  Mon Apr 19 10:21:18 2010
+From: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+Date: Mon, 15 Mar 2010 20:25:59 -0400
+Subject: ext4: Retry block reservation
+To: stable@kernel.org
+Cc: "Theodore Ts'o" <tytso@mit.edu>, Ext4 Developers List <linux-ext4@vger.kernel.org>, Mingming Cao <cmm@us.ibm.com>, "Jayson R. King" <dev@jaysonking.com>, "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
+Message-ID: <1268699165-17461-6-git-send-email-tytso@mit.edu>
+
+
+From: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+
+commit 030ba6bc67b4f2bc5cd174f57785a1745c929abe upstream.
+
+During block reservation if we don't have enough blocks left, retry
+block reservation with smaller block counts.  This makes sure we try
+fallocate and DIO with smaller request size and don't fail early.  The
+delayed allocation reservation cannot try with smaller block count. So
+retry block reservation to handle temporary disk full conditions.  Also
+print free blocks details if we fail block allocation during writepages.
+
+Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+Signed-off-by: Mingming Cao <cmm@us.ibm.com>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Jayson R. King <dev@jaysonking.com>
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/ext4/balloc.c  |    8 +++++++-
+ fs/ext4/inode.c   |   14 +++++++++++---
+ fs/ext4/mballoc.c |    7 ++++++-
+ 3 files changed, 24 insertions(+), 5 deletions(-)
+
+--- a/fs/ext4/balloc.c
++++ b/fs/ext4/balloc.c
+@@ -1907,10 +1907,16 @@ ext4_fsblk_t ext4_old_new_blocks(handle_
+ 		/*
+ 		 * With delalloc we already reserved the blocks
+ 		 */
+-		if (ext4_claim_free_blocks(sbi, *count)) {
++		while (*count && ext4_claim_free_blocks(sbi, *count)) {
++			/* let others to free the space */
++			yield();
++			*count = *count >> 1;
++		}
++		if (!*count) {
+ 			*errp = -ENOSPC;
+ 			return 0;	/*return with ENOSPC error */
+ 		}
++		num = *count;
+ 	}
+ 	/*
+ 	 * Check quota for allocation of this block.
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -1549,6 +1549,7 @@ static int ext4_journalled_write_end(str
+ 
+ static int ext4_da_reserve_space(struct inode *inode, int nrblocks)
+ {
++	int retries = 0;
+        struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+        unsigned long md_needed, mdblocks, total = 0;
+ 
+@@ -1557,6 +1558,7 @@ static int ext4_da_reserve_space(struct
+ 	 * in order to allocate nrblocks
+ 	 * worse case is one extent per block
+ 	 */
++repeat:
+ 	spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
+ 	total = EXT4_I(inode)->i_reserved_data_blocks + nrblocks;
+ 	mdblocks = ext4_calc_metadata_amount(inode, total);
+@@ -1567,6 +1569,10 @@ static int ext4_da_reserve_space(struct
+ 
+ 	if (ext4_claim_free_blocks(sbi, total)) {
+ 		spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
++		if (ext4_should_retry_alloc(inode->i_sb, &retries)) {
++			yield();
++			goto repeat;
++		}
+ 		return -ENOSPC;
+ 	}
+ 	EXT4_I(inode)->i_reserved_data_blocks += nrblocks;
+@@ -1864,20 +1870,18 @@ static void ext4_da_block_invalidatepage
+ static int  mpage_da_map_blocks(struct mpage_da_data *mpd)
+ {
+ 	int err = 0;
++	struct buffer_head new;
+ 	struct buffer_head *lbh = &mpd->lbh;
+ 	sector_t next = lbh->b_blocknr;
+-	struct buffer_head new;
+ 
+ 	/*
+ 	 * We consider only non-mapped and non-allocated blocks
+ 	 */
+ 	if (buffer_mapped(lbh) && !buffer_delay(lbh))
+ 		return 0;
+-
+ 	new.b_state = lbh->b_state;
+ 	new.b_blocknr = 0;
+ 	new.b_size = lbh->b_size;
+-
+ 	/*
+ 	 * If we didn't accumulate anything
+ 	 * to write simply return
+@@ -1910,6 +1914,10 @@ static int  mpage_da_map_blocks(struct m
+ 				  lbh->b_size >> mpd->inode->i_blkbits, err);
+ 		printk(KERN_EMERG "This should not happen.!! "
+ 					"Data will be lost\n");
++		if (err == -ENOSPC) {
++			printk(KERN_CRIT "Total free blocks count %lld\n",
++				ext4_count_free_blocks(mpd->inode->i_sb));
++		}
+ 		/* invlaidate all the pages */
+ 		ext4_da_block_invalidatepages(mpd, next,
+ 				lbh->b_size >> mpd->inode->i_blkbits);
+--- a/fs/ext4/mballoc.c
++++ b/fs/ext4/mballoc.c
+@@ -4651,7 +4651,12 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t
+ 		/*
+ 		 * With delalloc we already reserved the blocks
+ 		 */
+-		if (ext4_claim_free_blocks(sbi, ar->len)) {
++		while (ar->len && ext4_claim_free_blocks(sbi, ar->len)) {
++			/* let others to free the space */
++			yield();
++			ar->len = ar->len >> 1;
++		}
++		if (!ar->len) {
+ 			*errp = -ENOSPC;
+ 			return 0;
+ 		}
diff --git a/queue-2.6.27/ext4-use-tag-dirty-lookup-during-mpage_da_submit_io.patch b/queue-2.6.27/ext4-use-tag-dirty-lookup-during-mpage_da_submit_io.patch
new file mode 100644
index 00000000000..b908a0badc5
--- /dev/null
+++ b/queue-2.6.27/ext4-use-tag-dirty-lookup-during-mpage_da_submit_io.patch
@@ -0,0 +1,99 @@
+From tytso@mit.edu  Mon Apr 19 10:22:28 2010
+From: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+Date: Mon, 15 Mar 2010 20:26:01 -0400
+Subject: ext4: Use tag dirty lookup during mpage_da_submit_io
+To: stable@kernel.org
+Cc: Ext4 Developers List <linux-ext4@vger.kernel.org>, "Theodore Ts'o" <tytso@mit.edu>, "Jayson R. King" <dev@jaysonking.com>, "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
+Message-ID: <1268699165-17461-8-git-send-email-tytso@mit.edu>
+
+
+From: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+
+commit af6f029d3836eb7264cd3fbb13a6baf0e5fdb5ea upstream.
+
+This enables us to drop the range_cont writeback mode
+use from ext4_da_writepages.
+
+Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+Signed-off-by: Jayson R. King <dev@jaysonking.com>
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/ext4/inode.c |   30 +++++++++++++-----------------
+ 1 file changed, 13 insertions(+), 17 deletions(-)
+
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -1699,17 +1699,23 @@ static int mpage_da_submit_io(struct mpa
+ 
+ 	pagevec_init(&pvec, 0);
+ 	while (index <= end) {
+-		nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE);
++		/*
++		 * We can use PAGECACHE_TAG_DIRTY lookup here because
++		 * even though we have cleared the dirty flag on the page
++		 * We still keep the page in the radix tree with tag
++		 * PAGECACHE_TAG_DIRTY. See clear_page_dirty_for_io.
++		 * The PAGECACHE_TAG_DIRTY is cleared in set_page_writeback
++		 * which is called via the below writepage callback.
++		 */
++		nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
++					PAGECACHE_TAG_DIRTY,
++					min(end - index,
++					(pgoff_t)PAGEVEC_SIZE-1) + 1);
+ 		if (nr_pages == 0)
+ 			break;
+ 		for (i = 0; i < nr_pages; i++) {
+ 			struct page *page = pvec.pages[i];
+ 
+-			index = page->index;
+-			if (index > end)
+-				break;
+-			index++;
+-
+ 			BUG_ON(!PageLocked(page));
+ 			BUG_ON(PageWriteback(page));
+ 
+@@ -2442,7 +2448,6 @@ static int ext4_da_writepages(struct add
+ 			      struct writeback_control *wbc)
+ {
+ 	handle_t *handle = NULL;
+-	loff_t range_start = 0;
+ 	struct mpage_da_data mpd;
+ 	struct inode *inode = mapping->host;
+ 	int needed_blocks, ret = 0, nr_to_writebump = 0;
+@@ -2481,14 +2486,7 @@ static int ext4_da_writepages(struct add
+ 		wbc->nr_to_write = sbi->s_mb_stream_request;
+ 	}
+ 
+-	if (!wbc->range_cyclic)
+-		/*
+-		 * If range_cyclic is not set force range_cont
+-		 * and save the old writeback_index
+-		 */
+-		wbc->range_cont = 1;
+ 
+-	range_start =  wbc->range_start;
+ 	pages_skipped = wbc->pages_skipped;
+ 
+ 	mpd.wbc = wbc;
+@@ -2559,9 +2557,8 @@ restart_loop:
+ 		wbc->nr_to_write = to_write;
+ 	}
+ 
+-	if (wbc->range_cont && (pages_skipped != wbc->pages_skipped)) {
++	if (!wbc->range_cyclic && (pages_skipped != wbc->pages_skipped)) {
+ 		/* We skipped pages in this loop */
+-		wbc->range_start = range_start;
+ 		wbc->nr_to_write = to_write +
+ 				wbc->pages_skipped - pages_skipped;
+ 		wbc->pages_skipped = pages_skipped;
+@@ -2570,7 +2567,6 @@ restart_loop:
+ 
+ out_writepages:
+ 	wbc->nr_to_write = to_write - nr_to_writebump;
+-	wbc->range_start = range_start;
+ 	return ret;
+ }
+ 
diff --git a/queue-2.6.27/percpu-counter-clean-up-percpu_counter_sum_and_set.patch b/queue-2.6.27/percpu-counter-clean-up-percpu_counter_sum_and_set.patch
new file mode 100644
index 00000000000..86d95f9e164
--- /dev/null
+++ b/queue-2.6.27/percpu-counter-clean-up-percpu_counter_sum_and_set.patch
@@ -0,0 +1,104 @@
+From tytso@mit.edu  Mon Apr 19 10:20:04 2010
+From: Mingming Cao <cmm@us.ibm.com>
+Date: Mon, 15 Mar 2010 20:25:56 -0400
+Subject: percpu counter: clean up percpu_counter_sum_and_set()
+To: stable@kernel.org
+Cc: "Theodore Ts'o" <tytso@mit.edu>, Andrew Morton <akpm@linux-foundation.org>, Ext4 Developers List <linux-ext4@vger.kernel.org>, Mingming Cao <cmm@us.ibm.com>, "Jayson R. King" <dev@jaysonking.com>
+Message-ID: <1268699165-17461-3-git-send-email-tytso@mit.edu>
+
+
+From: Mingming Cao <cmm@us.ibm.com>
+
+commit 1f7c14c62ce63805f9574664a6c6de3633d4a354 upstream.
+
+percpu_counter_sum_and_set() and percpu_counter_sum() is the same except
+the former updates the global counter after accounting.  Since we are
+taking the fbc->lock to calculate the precise value of the counter in
+percpu_counter_sum() anyway, it should simply set fbc->count too, as the
+percpu_counter_sum_and_set() does.
+
+This patch merges these two interfaces into one.
+
+Signed-off-by: Mingming Cao <cmm@us.ibm.com>
+Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
+Cc: <linux-ext4@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Jayson R. King <dev@jaysonking.com>
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+
+---
+ fs/ext4/balloc.c               |    2 +-
+ include/linux/percpu_counter.h |   12 +++---------
+ lib/percpu_counter.c           |    8 +++-----
+ 3 files changed, 7 insertions(+), 15 deletions(-)
+
+--- a/fs/ext4/balloc.c
++++ b/fs/ext4/balloc.c
+@@ -1778,7 +1778,7 @@ ext4_fsblk_t ext4_has_free_blocks(struct
+ #ifdef CONFIG_SMP
+ 	if (free_blocks - root_blocks < FBC_BATCH)
+ 		free_blocks =
+-			percpu_counter_sum_and_set(&sbi->s_freeblocks_counter);
++			percpu_counter_sum(&sbi->s_freeblocks_counter);
+ #endif
+ 	if (free_blocks <= root_blocks)
+ 		/* we don't have free space */
+--- a/include/linux/percpu_counter.h
++++ b/include/linux/percpu_counter.h
+@@ -35,7 +35,7 @@ int percpu_counter_init_irq(struct percp
+ void percpu_counter_destroy(struct percpu_counter *fbc);
+ void percpu_counter_set(struct percpu_counter *fbc, s64 amount);
+ void __percpu_counter_add(struct percpu_counter *fbc, s64 amount, s32 batch);
+-s64 __percpu_counter_sum(struct percpu_counter *fbc, int set);
++s64 __percpu_counter_sum(struct percpu_counter *fbc);
+ 
+ static inline void percpu_counter_add(struct percpu_counter *fbc, s64 amount)
+ {
+@@ -44,19 +44,13 @@ static inline void percpu_counter_add(st
+ 
+ static inline s64 percpu_counter_sum_positive(struct percpu_counter *fbc)
+ {
+-	s64 ret = __percpu_counter_sum(fbc, 0);
++	s64 ret = __percpu_counter_sum(fbc);
+ 	return ret < 0 ? 0 : ret;
+ }
+ 
+-static inline s64 percpu_counter_sum_and_set(struct percpu_counter *fbc)
+-{
+-	return __percpu_counter_sum(fbc, 1);
+-}
+-
+-
+ static inline s64 percpu_counter_sum(struct percpu_counter *fbc)
+ {
+-	return __percpu_counter_sum(fbc, 0);
++	return __percpu_counter_sum(fbc);
+ }
+ 
+ static inline s64 percpu_counter_read(struct percpu_counter *fbc)
+--- a/lib/percpu_counter.c
++++ b/lib/percpu_counter.c
+@@ -52,7 +52,7 @@ EXPORT_SYMBOL(__percpu_counter_add);
+  * Add up all the per-cpu counts, return the result.  This is a more accurate
+  * but much slower version of percpu_counter_read_positive()
+  */
+-s64 __percpu_counter_sum(struct percpu_counter *fbc, int set)
++s64 __percpu_counter_sum(struct percpu_counter *fbc)
+ {
+ 	s64 ret;
+ 	int cpu;
+@@ -62,11 +62,9 @@ s64 __percpu_counter_sum(struct percpu_c
+ 	for_each_online_cpu(cpu) {
+ 		s32 *pcount = per_cpu_ptr(fbc->counters, cpu);
+ 		ret += *pcount;
+-		if (set)
+-			*pcount = 0;
++		*pcount = 0;
+ 	}
+-	if (set)
+-		fbc->count = ret;
++	fbc->count = ret;
+ 
+ 	spin_unlock(&fbc->lock);
+ 	return ret;
diff --git a/queue-2.6.27/series b/queue-2.6.27/series
index 9091fb02c6d..3041f8329e3 100644
--- a/queue-2.6.27/series
+++ b/queue-2.6.27/series
@@ -1 +1,12 @@
 alsa-mixart-range-checking-proc-file.patch
+ext4-invalidate-pages-if-delalloc-block-allocation-fails.patch
+percpu-counter-clean-up-percpu_counter_sum_and_set.patch
+ext4-make-sure-all-the-block-allocation-paths-reserve-blocks.patch
+ext4-add-percpu-dirty-block-accounting.patch
+ext4-retry-block-reservation.patch
+ext4-retry-block-allocation-if-we-have-free-blocks-left.patch
+ext4-use-tag-dirty-lookup-during-mpage_da_submit_io.patch
+vfs-remove-the-range_cont-writeback-mode.patch
+vfs-add-no_nrwrite_index_update-writeback-control-flag.patch
+ext4-fix-file-fragmentation-during-large-file-write.patch
+ext4-implement-range_cyclic-in-ext4_da_writepages-instead-of-write_cache_pages.patch
diff --git a/queue-2.6.27/vfs-add-no_nrwrite_index_update-writeback-control-flag.patch b/queue-2.6.27/vfs-add-no_nrwrite_index_update-writeback-control-flag.patch
new file mode 100644
index 00000000000..a6b528652cd
--- /dev/null
+++ b/queue-2.6.27/vfs-add-no_nrwrite_index_update-writeback-control-flag.patch
@@ -0,0 +1,87 @@
+From tytso@mit.edu  Mon Apr 19 10:23:14 2010
+From: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+Date: Mon, 15 Mar 2010 20:26:03 -0400
+Subject: vfs: Add no_nrwrite_index_update writeback control flag
+To: stable@kernel.org
+Cc: linux-fsdevel@vger.kernel.org, Ext4 Developers List <linux-ext4@vger.kernel.org>, "Theodore Ts'o" <tytso@mit.edu>, "Jayson R. King" <dev@jaysonking.com>, "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
+Message-ID: <1268699165-17461-10-git-send-email-tytso@mit.edu>
+
+
+From: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+
+commit 17bc6c30cf6bfffd816bdc53682dd46fc34a2cf4 upstream.
+
+If no_nrwrite_index_update is set we don't update nr_to_write and
+address space writeback_index in write_cache_pages.  This change
+enables a file system to skip these updates in write_cache_pages and do
+them in the writepages() callback.  This patch will be followed by an
+ext4 patch that make use of these new flags.
+
+Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+CC: linux-fsdevel@vger.kernel.org
+[dev@jaysonking.com: Modified the patch to account for subsequent changes in mainline being cherry-picked earlier for 2.6.27.y.]
+Signed-off-by: Jayson R. King <dev@jaysonking.com>
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ include/linux/writeback.h |    9 +++++++++
+ mm/page-writeback.c       |   14 +++++++++-----
+ 2 files changed, 18 insertions(+), 5 deletions(-)
+
+--- a/include/linux/writeback.h
++++ b/include/linux/writeback.h
+@@ -62,6 +62,15 @@ struct writeback_control {
+ 	unsigned for_writepages:1;	/* This is a writepages() call */
+ 	unsigned range_cyclic:1;	/* range_start is cyclic */
+ 	unsigned more_io:1;		/* more io to be dispatched */
++	/*
++	 * write_cache_pages() won't update wbc->nr_to_write and
++	 * mapping->writeback_index if no_nrwrite_index_update
++	 * is set.  write_cache_pages() may write more than we
++	 * requested and we want to make sure nr_to_write and
++	 * writeback_index are updated in a consistent manner
++	 * so we use a single control to update them
++	 */
++	unsigned no_nrwrite_index_update:1;
+ };
+ 
+ /*
+--- a/mm/page-writeback.c
++++ b/mm/page-writeback.c
+@@ -878,6 +878,7 @@ int write_cache_pages(struct address_spa
+ 	pgoff_t done_index;
+ 	int cycled;
+ 	int range_whole = 0;
++	long nr_to_write = wbc->nr_to_write;
+ 
+ 	if (wbc->nonblocking && bdi_write_congested(bdi)) {
+ 		wbc->encountered_congestion = 1;
+@@ -985,9 +986,9 @@ continue_unlock:
+ 				}
+  			}
+ 
+-			if (wbc->nr_to_write > 0) {
+-				wbc->nr_to_write--;
+-				if (wbc->nr_to_write == 0 &&
++			if (nr_to_write > 0) {
++				nr_to_write--;
++				if (nr_to_write == 0 &&
+ 				    wbc->sync_mode == WB_SYNC_NONE) {
+ 					/*
+ 					 * We stop writing back only if we are
+@@ -1024,8 +1025,11 @@ continue_unlock:
+ 		end = writeback_index - 1;
+ 		goto retry;
+ 	}
+-	if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
+-		mapping->writeback_index = done_index;
++	if (!wbc->no_nrwrite_index_update) {
++		if (wbc->range_cyclic || (range_whole && nr_to_write > 0))
++			mapping->writeback_index = done_index;
++		wbc->nr_to_write = nr_to_write;
++	}
+ 
+ 	return ret;
+ }
diff --git a/queue-2.6.27/vfs-remove-the-range_cont-writeback-mode.patch b/queue-2.6.27/vfs-remove-the-range_cont-writeback-mode.patch
new file mode 100644
index 00000000000..5e5a70c269e
--- /dev/null
+++ b/queue-2.6.27/vfs-remove-the-range_cont-writeback-mode.patch
@@ -0,0 +1,50 @@
+From tytso@mit.edu  Mon Apr 19 10:22:47 2010
+From: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+Date: Mon, 15 Mar 2010 20:26:02 -0400
+Subject: vfs: Remove the range_cont writeback mode.
+To: stable@kernel.org
+Cc: linux-fsdevel@vger.kernel.org, Ext4 Developers List <linux-ext4@vger.kernel.org>, "Theodore Ts'o" <tytso@mit.edu>, "Jayson R. King" <dev@jaysonking.com>, "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
+Message-ID: <1268699165-17461-9-git-send-email-tytso@mit.edu>
+
+
+From: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+
+commit 74baaaaec8b4f22e1ae279f5ecca4ff705b28912 upstream.
+
+Ext4 was the only user of range_cont writeback mode and ext4 switched
+to a different method. So remove the range_cont mode which is not used
+in the kernel.
+
+Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+CC: linux-fsdevel@vger.kernel.org
+Signed-off-by: Jayson R. King <dev@jaysonking.com>
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ include/linux/writeback.h |    1 -
+ mm/page-writeback.c       |    2 --
+ 2 files changed, 3 deletions(-)
+
+--- a/include/linux/writeback.h
++++ b/include/linux/writeback.h
+@@ -62,7 +62,6 @@ struct writeback_control {
+ 	unsigned for_writepages:1;	/* This is a writepages() call */
+ 	unsigned range_cyclic:1;	/* range_start is cyclic */
+ 	unsigned more_io:1;		/* more io to be dispatched */
+-	unsigned range_cont:1;
+ };
+ 
+ /*
+--- a/mm/page-writeback.c
++++ b/mm/page-writeback.c
+@@ -1027,8 +1027,6 @@ continue_unlock:
+ 	if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
+ 		mapping->writeback_index = done_index;
+ 
+-	if (wbc->range_cont)
+-		wbc->range_start = index << PAGE_CACHE_SHIFT;
+ 	return ret;
+ }
+ EXPORT_SYMBOL(write_cache_pages);