--- /dev/null
+From tytso@mit.edu Mon Apr 19 10:21:01 2010
+From: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+Date: Mon, 15 Mar 2010 20:25:58 -0400
+Subject: ext4: Add percpu dirty block accounting.
+To: stable@kernel.org
+Cc: "Theodore Ts'o" <tytso@mit.edu>, Ext4 Developers List <linux-ext4@vger.kernel.org>, Mingming Cao <cmm@us.ibm.com>, "Jayson R. King" <dev@jaysonking.com>, "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
+Message-ID: <1268699165-17461-5-git-send-email-tytso@mit.edu>
+
+
+From: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+
+commit 6bc6e63fcd7dac9e633ea29f1fddd9580ab28f3f upstream.
+
+This patch adds dirty block accounting using percpu_counters. Delayed
+allocation block reservation is now done by updating dirty block
+counter. In a later patch we switch to non delalloc mode if the
+filesystem free blocks is greater than 150% of total filesystem dirty
+blocks
+
+Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+Signed-off-by: Mingming Cao<cmm@us.ibm.com>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Jayson R. King <dev@jaysonking.com>
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ fs/ext4/balloc.c | 62 ++++++++++++++++++++++++++++++++++--------------------
+ fs/ext4/ext4_sb.h | 1
+ fs/ext4/inode.c | 22 +++++++++----------
+ fs/ext4/mballoc.c | 31 ++++++++++++---------------
+ fs/ext4/super.c | 8 ++++++
+ 5 files changed, 73 insertions(+), 51 deletions(-)
+
+--- a/fs/ext4/balloc.c
++++ b/fs/ext4/balloc.c
+@@ -1757,26 +1757,38 @@ out:
+ int ext4_claim_free_blocks(struct ext4_sb_info *sbi,
+ ext4_fsblk_t nblocks)
+ {
+- s64 free_blocks;
++ s64 free_blocks, dirty_blocks;
+ ext4_fsblk_t root_blocks = 0;
+ struct percpu_counter *fbc = &sbi->s_freeblocks_counter;
++ struct percpu_counter *dbc = &sbi->s_dirtyblocks_counter;
+
+- free_blocks = percpu_counter_read(fbc);
++ free_blocks = percpu_counter_read_positive(fbc);
++ dirty_blocks = percpu_counter_read_positive(dbc);
+
+ if (!capable(CAP_SYS_RESOURCE) &&
+ sbi->s_resuid != current->fsuid &&
+ (sbi->s_resgid == 0 || !in_group_p(sbi->s_resgid)))
+ root_blocks = ext4_r_blocks_count(sbi->s_es);
+
+- if (free_blocks - (nblocks + root_blocks) < EXT4_FREEBLOCKS_WATERMARK)
+- free_blocks = percpu_counter_sum(&sbi->s_freeblocks_counter);
+-
+- if (free_blocks < (root_blocks + nblocks))
++ if (free_blocks - (nblocks + root_blocks + dirty_blocks) <
++ EXT4_FREEBLOCKS_WATERMARK) {
++ free_blocks = percpu_counter_sum(fbc);
++ dirty_blocks = percpu_counter_sum(dbc);
++ if (dirty_blocks < 0) {
++ printk(KERN_CRIT "Dirty block accounting "
++ "went wrong %lld\n",
++ dirty_blocks);
++ }
++ }
++ /* Check whether we have space after
++ * accounting for current dirty blocks
++ */
++ if (free_blocks < ((s64)(root_blocks + nblocks) + dirty_blocks))
+ /* we don't have free space */
+ return -ENOSPC;
+
+- /* reduce fs free blocks counter */
+- percpu_counter_sub(fbc, nblocks);
++ /* Add the blocks to nblocks */
++ percpu_counter_add(dbc, nblocks);
+ return 0;
+ }
+
+@@ -1792,23 +1804,28 @@ int ext4_claim_free_blocks(struct ext4_s
+ ext4_fsblk_t ext4_has_free_blocks(struct ext4_sb_info *sbi,
+ ext4_fsblk_t nblocks)
+ {
+- ext4_fsblk_t free_blocks;
++ ext4_fsblk_t free_blocks, dirty_blocks;
+ ext4_fsblk_t root_blocks = 0;
++ struct percpu_counter *fbc = &sbi->s_freeblocks_counter;
++ struct percpu_counter *dbc = &sbi->s_dirtyblocks_counter;
+
+- free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter);
++ free_blocks = percpu_counter_read_positive(fbc);
++ dirty_blocks = percpu_counter_read_positive(dbc);
+
+ if (!capable(CAP_SYS_RESOURCE) &&
+ sbi->s_resuid != current->fsuid &&
+ (sbi->s_resgid == 0 || !in_group_p(sbi->s_resgid)))
+ root_blocks = ext4_r_blocks_count(sbi->s_es);
+
+- if (free_blocks - (nblocks + root_blocks) < EXT4_FREEBLOCKS_WATERMARK)
+- free_blocks = percpu_counter_sum_positive(&sbi->s_freeblocks_counter);
+-
+- if (free_blocks <= root_blocks)
++ if (free_blocks - (nblocks + root_blocks + dirty_blocks) <
++ EXT4_FREEBLOCKS_WATERMARK) {
++ free_blocks = percpu_counter_sum_positive(fbc);
++ dirty_blocks = percpu_counter_sum_positive(dbc);
++ }
++ if (free_blocks <= (root_blocks + dirty_blocks))
+ /* we don't have free space */
+ return 0;
+- if (free_blocks - root_blocks < nblocks)
++ if (free_blocks - (root_blocks + dirty_blocks) < nblocks)
+ return free_blocks - root_blocks;
+ return nblocks;
+ }
+@@ -2089,13 +2106,14 @@ allocated:
+ le16_add_cpu(&gdp->bg_free_blocks_count, -num);
+ gdp->bg_checksum = ext4_group_desc_csum(sbi, group_no, gdp);
+ spin_unlock(sb_bgl_lock(sbi, group_no));
+- if (!EXT4_I(inode)->i_delalloc_reserved_flag && (*count != num)) {
+- /*
+- * we allocated less blocks than we
+- * claimed. Add the difference back.
+- */
+- percpu_counter_add(&sbi->s_freeblocks_counter, *count - num);
+- }
++ percpu_counter_sub(&sbi->s_freeblocks_counter, num);
++ /*
++ * Now reduce the dirty block count also. Should not go negative
++ */
++ if (!EXT4_I(inode)->i_delalloc_reserved_flag)
++ percpu_counter_sub(&sbi->s_dirtyblocks_counter, *count);
++ else
++ percpu_counter_sub(&sbi->s_dirtyblocks_counter, num);
+ if (sbi->s_log_groups_per_flex) {
+ ext4_group_t flex_group = ext4_flex_group(sbi, group_no);
+ spin_lock(sb_bgl_lock(sbi, flex_group));
+--- a/fs/ext4/ext4_sb.h
++++ b/fs/ext4/ext4_sb.h
+@@ -60,6 +60,7 @@ struct ext4_sb_info {
+ struct percpu_counter s_freeblocks_counter;
+ struct percpu_counter s_freeinodes_counter;
+ struct percpu_counter s_dirs_counter;
++ struct percpu_counter s_dirtyblocks_counter;
+ struct blockgroup_lock s_blockgroup_lock;
+
+ /* root of the per fs reservation window tree */
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -1032,19 +1032,20 @@ static void ext4_da_update_reserve_space
+ BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks);
+ mdb_free = EXT4_I(inode)->i_reserved_meta_blocks - mdb;
+
+- /* Account for allocated meta_blocks */
+- mdb_free -= EXT4_I(inode)->i_allocated_meta_blocks;
+-
+- /* update fs free blocks counter for truncate case */
+- percpu_counter_add(&sbi->s_freeblocks_counter, mdb_free);
++ if (mdb_free) {
++ /* Account for allocated meta_blocks */
++ mdb_free -= EXT4_I(inode)->i_allocated_meta_blocks;
++
++ /* update fs dirty blocks counter */
++ percpu_counter_sub(&sbi->s_dirtyblocks_counter, mdb_free);
++ EXT4_I(inode)->i_allocated_meta_blocks = 0;
++ EXT4_I(inode)->i_reserved_meta_blocks = mdb;
++ }
+
+ /* update per-inode reservations */
+ BUG_ON(used > EXT4_I(inode)->i_reserved_data_blocks);
+ EXT4_I(inode)->i_reserved_data_blocks -= used;
+
+- BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks);
+- EXT4_I(inode)->i_reserved_meta_blocks = mdb;
+- EXT4_I(inode)->i_allocated_meta_blocks = 0;
+ spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
+
+ /*
+@@ -1609,8 +1610,8 @@ static void ext4_da_release_space(struct
+
+ release = to_free + mdb_free;
+
+- /* update fs free blocks counter for truncate case */
+- percpu_counter_add(&sbi->s_freeblocks_counter, release);
++ /* update fs dirty blocks counter for truncate case */
++ percpu_counter_sub(&sbi->s_dirtyblocks_counter, release);
+
+ /* update per-inode reservations */
+ BUG_ON(to_free > EXT4_I(inode)->i_reserved_data_blocks);
+@@ -2546,7 +2547,6 @@ static int ext4_da_write_begin(struct fi
+ index = pos >> PAGE_CACHE_SHIFT;
+ from = pos & (PAGE_CACHE_SIZE - 1);
+ to = from + len;
+-
+ retry:
+ /*
+ * With delayed allocation, we don't log the i_disksize update
+--- a/fs/ext4/mballoc.c
++++ b/fs/ext4/mballoc.c
+@@ -3100,7 +3100,7 @@ void exit_ext4_mballoc(void)
+ */
+ static noinline_for_stack int
+ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
+- handle_t *handle)
++ handle_t *handle, unsigned long reserv_blks)
+ {
+ struct buffer_head *bitmap_bh = NULL;
+ struct ext4_super_block *es;
+@@ -3188,21 +3188,16 @@ ext4_mb_mark_diskspace_used(struct ext4_
+ le16_add_cpu(&gdp->bg_free_blocks_count, -ac->ac_b_ex.fe_len);
+ gdp->bg_checksum = ext4_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp);
+ spin_unlock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group));
+-
++ percpu_counter_sub(&sbi->s_freeblocks_counter, ac->ac_b_ex.fe_len);
+ /*
+- * free blocks account has already be reduced/reserved
+- * at write_begin() time for delayed allocation
+- * do not double accounting
++ * Now reduce the dirty block count also. Should not go negative
+ */
+- if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED) &&
+- ac->ac_o_ex.fe_len != ac->ac_b_ex.fe_len) {
+- /*
+- * we allocated less blocks than we calimed
+- * Add the difference back
+- */
+- percpu_counter_add(&sbi->s_freeblocks_counter,
+- ac->ac_o_ex.fe_len - ac->ac_b_ex.fe_len);
+- }
++ if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED))
++ /* release all the reserved blocks if non delalloc */
++ percpu_counter_sub(&sbi->s_dirtyblocks_counter, reserv_blks);
++ else
++ percpu_counter_sub(&sbi->s_dirtyblocks_counter,
++ ac->ac_b_ex.fe_len);
+
+ if (sbi->s_log_groups_per_flex) {
+ ext4_group_t flex_group = ext4_flex_group(sbi,
+@@ -4636,12 +4631,13 @@ static int ext4_mb_discard_preallocation
+ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
+ struct ext4_allocation_request *ar, int *errp)
+ {
++ int freed;
+ struct ext4_allocation_context *ac = NULL;
+ struct ext4_sb_info *sbi;
+ struct super_block *sb;
+ ext4_fsblk_t block = 0;
+- int freed;
+- int inquota;
++ unsigned long inquota;
++ unsigned long reserv_blks = 0;
+
+ sb = ar->inode->i_sb;
+ sbi = EXT4_SB(sb);
+@@ -4659,6 +4655,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t
+ *errp = -ENOSPC;
+ return 0;
+ }
++ reserv_blks = ar->len;
+ }
+ while (ar->len && DQUOT_ALLOC_BLOCK(ar->inode, ar->len)) {
+ ar->flags |= EXT4_MB_HINT_NOPREALLOC;
+@@ -4704,7 +4701,7 @@ repeat:
+ ext4_mb_new_preallocation(ac);
+ }
+ if (likely(ac->ac_status == AC_STATUS_FOUND)) {
+- *errp = ext4_mb_mark_diskspace_used(ac, handle);
++ *errp = ext4_mb_mark_diskspace_used(ac, handle, reserv_blks);
+ if (*errp == -EAGAIN) {
+ /*
+ * drop the reference that we took
+--- a/fs/ext4/super.c
++++ b/fs/ext4/super.c
+@@ -521,6 +521,7 @@ static void ext4_put_super(struct super_
+ percpu_counter_destroy(&sbi->s_freeblocks_counter);
+ percpu_counter_destroy(&sbi->s_freeinodes_counter);
+ percpu_counter_destroy(&sbi->s_dirs_counter);
++ percpu_counter_destroy(&sbi->s_dirtyblocks_counter);
+ brelse(sbi->s_sbh);
+ #ifdef CONFIG_QUOTA
+ for (i = 0; i < MAXQUOTAS; i++)
+@@ -2280,6 +2281,9 @@ static int ext4_fill_super(struct super_
+ err = percpu_counter_init(&sbi->s_dirs_counter,
+ ext4_count_dirs(sb));
+ }
++ if (!err) {
++ err = percpu_counter_init(&sbi->s_dirtyblocks_counter, 0);
++ }
+ if (err) {
+ printk(KERN_ERR "EXT4-fs: insufficient memory\n");
+ goto failed_mount3;
+@@ -2517,6 +2521,7 @@ failed_mount3:
+ percpu_counter_destroy(&sbi->s_freeblocks_counter);
+ percpu_counter_destroy(&sbi->s_freeinodes_counter);
+ percpu_counter_destroy(&sbi->s_dirs_counter);
++ percpu_counter_destroy(&sbi->s_dirtyblocks_counter);
+ failed_mount2:
+ for (i = 0; i < db_count; i++)
+ brelse(sbi->s_group_desc[i]);
+@@ -3208,7 +3213,8 @@ static int ext4_statfs(struct dentry *de
+ buf->f_type = EXT4_SUPER_MAGIC;
+ buf->f_bsize = sb->s_blocksize;
+ buf->f_blocks = ext4_blocks_count(es) - sbi->s_overhead_last;
+- buf->f_bfree = percpu_counter_sum_positive(&sbi->s_freeblocks_counter);
++ buf->f_bfree = percpu_counter_sum_positive(&sbi->s_freeblocks_counter) -
++ percpu_counter_sum_positive(&sbi->s_dirtyblocks_counter);
+ ext4_free_blocks_count_set(es, buf->f_bfree);
+ buf->f_bavail = buf->f_bfree - ext4_r_blocks_count(es);
+ if (buf->f_bfree < ext4_r_blocks_count(es))
--- /dev/null
+From tytso@mit.edu Mon Apr 19 10:23:42 2010
+From: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+Date: Mon, 15 Mar 2010 20:26:04 -0400
+Subject: ext4: Fix file fragmentation during large file write.
+To: stable@kernel.org
+Cc: Ext4 Developers List <linux-ext4@vger.kernel.org>, "Theodore Ts'o" <tytso@mit.edu>, "Jayson R. King" <dev@jaysonking.com>, "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
+Message-ID: <1268699165-17461-11-git-send-email-tytso@mit.edu>
+
+
+From: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+
+commit 22208dedbd7626e5fc4339c417f8d24cc21f79d7 upstream.
+
+The range_cyclic writeback mode uses the address_space writeback_index
+as the start index for writeback. With delayed allocation we were
+updating writeback_index wrongly resulting in highly fragmented file.
+This patch reduces the number of extents reduced from 4000 to 27 for a
+3GB file.
+
+Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+[dev@jaysonking.com: Some changed lines from the original version of this patch were dropped, since they were rolled up with another cherry-picked patch applied to 2.6.27.y earlier.]
+Signed-off-by: Jayson R. King <dev@jaysonking.com>
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/ext4/inode.c | 88 +++++++++++++++++++++++++++++++++++---------------------
+ 1 file changed, 55 insertions(+), 33 deletions(-)
+
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -1721,7 +1721,11 @@ static int mpage_da_submit_io(struct mpa
+
+ pages_skipped = mpd->wbc->pages_skipped;
+ err = mapping->a_ops->writepage(page, mpd->wbc);
+- if (!err)
++ if (!err && (pages_skipped == mpd->wbc->pages_skipped))
++ /*
++ * have successfully written the page
++ * without skipping the same
++ */
+ mpd->pages_written++;
+ /*
+ * In error case, we have to continue because
+@@ -2175,7 +2179,6 @@ static int mpage_da_writepages(struct ad
+ struct writeback_control *wbc,
+ struct mpage_da_data *mpd)
+ {
+- long to_write;
+ int ret;
+
+ if (!mpd->get_block)
+@@ -2190,19 +2193,18 @@ static int mpage_da_writepages(struct ad
+ mpd->pages_written = 0;
+ mpd->retval = 0;
+
+- to_write = wbc->nr_to_write;
+-
+ ret = write_cache_pages(mapping, wbc, __mpage_da_writepage, mpd);
+-
+ /*
+ * Handle last extent of pages
+ */
+ if (!mpd->io_done && mpd->next_page != mpd->first_page) {
+ if (mpage_da_map_blocks(mpd) == 0)
+ mpage_da_submit_io(mpd);
+- }
+
+- wbc->nr_to_write = to_write - mpd->pages_written;
++ mpd->io_done = 1;
++ ret = MPAGE_DA_EXTENT_TAIL;
++ }
++ wbc->nr_to_write -= mpd->pages_written;
+ return ret;
+ }
+
+@@ -2447,11 +2449,14 @@ static int ext4_da_writepages_trans_bloc
+ static int ext4_da_writepages(struct address_space *mapping,
+ struct writeback_control *wbc)
+ {
++ pgoff_t index;
++ int range_whole = 0;
+ handle_t *handle = NULL;
+ struct mpage_da_data mpd;
+ struct inode *inode = mapping->host;
++ int no_nrwrite_index_update;
++ long pages_written = 0, pages_skipped;
+ int needed_blocks, ret = 0, nr_to_writebump = 0;
+- long to_write, pages_skipped = 0;
+ struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb);
+
+ /*
+@@ -2485,16 +2490,26 @@ static int ext4_da_writepages(struct add
+ nr_to_writebump = sbi->s_mb_stream_request - wbc->nr_to_write;
+ wbc->nr_to_write = sbi->s_mb_stream_request;
+ }
++ if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
++ range_whole = 1;
+
+-
+- pages_skipped = wbc->pages_skipped;
++ if (wbc->range_cyclic)
++ index = mapping->writeback_index;
++ else
++ index = wbc->range_start >> PAGE_CACHE_SHIFT;
+
+ mpd.wbc = wbc;
+ mpd.inode = mapping->host;
+
+-restart_loop:
+- to_write = wbc->nr_to_write;
+- while (!ret && to_write > 0) {
++ /*
++ * we don't want write_cache_pages to update
++ * nr_to_write and writeback_index
++ */
++ no_nrwrite_index_update = wbc->no_nrwrite_index_update;
++ wbc->no_nrwrite_index_update = 1;
++ pages_skipped = wbc->pages_skipped;
++
++ while (!ret && wbc->nr_to_write > 0) {
+
+ /*
+ * we insert one extent at a time. So we need
+@@ -2527,46 +2542,53 @@ restart_loop:
+ goto out_writepages;
+ }
+ }
+- to_write -= wbc->nr_to_write;
+-
+ mpd.get_block = ext4_da_get_block_write;
+ ret = mpage_da_writepages(mapping, wbc, &mpd);
+
+ ext4_journal_stop(handle);
+
+- if (mpd.retval == -ENOSPC)
++ if (mpd.retval == -ENOSPC) {
++ /* commit the transaction which would
++ * free blocks released in the transaction
++ * and try again
++ */
+ jbd2_journal_force_commit_nested(sbi->s_journal);
+-
+- /* reset the retry count */
+- if (ret == MPAGE_DA_EXTENT_TAIL) {
++ wbc->pages_skipped = pages_skipped;
++ ret = 0;
++ } else if (ret == MPAGE_DA_EXTENT_TAIL) {
+ /*
+ * got one extent now try with
+ * rest of the pages
+ */
+- to_write += wbc->nr_to_write;
++ pages_written += mpd.pages_written;
++ wbc->pages_skipped = pages_skipped;
+ ret = 0;
+- } else if (wbc->nr_to_write) {
++ } else if (wbc->nr_to_write)
+ /*
+ * There is no more writeout needed
+ * or we requested for a noblocking writeout
+ * and we found the device congested
+ */
+- to_write += wbc->nr_to_write;
+ break;
+- }
+- wbc->nr_to_write = to_write;
+- }
+-
+- if (!wbc->range_cyclic && (pages_skipped != wbc->pages_skipped)) {
+- /* We skipped pages in this loop */
+- wbc->nr_to_write = to_write +
+- wbc->pages_skipped - pages_skipped;
+- wbc->pages_skipped = pages_skipped;
+- goto restart_loop;
+ }
++ if (pages_skipped != wbc->pages_skipped)
++ printk(KERN_EMERG "This should not happen leaving %s "
++ "with nr_to_write = %ld ret = %d\n",
++ __func__, wbc->nr_to_write, ret);
++
++ /* Update index */
++ index += pages_written;
++ if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
++ /*
++ * set the writeback_index so that range_cyclic
++ * mode will write it back later
++ */
++ mapping->writeback_index = index;
+
+ out_writepages:
+- wbc->nr_to_write = to_write - nr_to_writebump;
++ if (!no_nrwrite_index_update)
++ wbc->no_nrwrite_index_update = 0;
++ wbc->nr_to_write -= nr_to_writebump;
+ return ret;
+ }
+
--- /dev/null
+From tytso@mit.edu Mon Apr 19 10:24:03 2010
+From: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+Date: Mon, 15 Mar 2010 20:26:05 -0400
+Subject: ext4: Implement range_cyclic in ext4_da_writepages instead of write_cache_pages
+To: stable@kernel.org
+Cc: Ext4 Developers List <linux-ext4@vger.kernel.org>, "Theodore Ts'o" <tytso@mit.edu>, "Jayson R. King" <dev@jaysonking.com>, "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
+Message-ID: <1268699165-17461-12-git-send-email-tytso@mit.edu>
+
+
+From: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+
+commit 2acf2c261b823d9d9ed954f348b97620297a36b5 upstream.
+
+With delayed allocation we lock the page in write_cache_pages() and
+try to build an in memory extent of contiguous blocks. This is needed
+so that we can get large contiguous blocks request. If range_cyclic
+mode is enabled, write_cache_pages() will loop back to the 0 index if
+no I/O has been done yet, and try to start writing from the beginning
+of the range. That causes an attempt to take the page lock of lower
+index page while holding the page lock of higher index page, which can
+cause a dead lock with another writeback thread.
+
+The solution is to implement the range_cyclic behavior in
+ext4_da_writepages() instead.
+
+http://bugzilla.kernel.org/show_bug.cgi?id=12579
+
+Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Jayson R. King <dev@jaysonking.com>
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/ext4/inode.c | 21 +++++++++++++++++++--
+ 1 file changed, 19 insertions(+), 2 deletions(-)
+
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -2456,6 +2456,7 @@ static int ext4_da_writepages(struct add
+ struct inode *inode = mapping->host;
+ int no_nrwrite_index_update;
+ long pages_written = 0, pages_skipped;
++ int range_cyclic, cycled = 1, io_done = 0;
+ int needed_blocks, ret = 0, nr_to_writebump = 0;
+ struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb);
+
+@@ -2493,9 +2494,15 @@ static int ext4_da_writepages(struct add
+ if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
+ range_whole = 1;
+
+- if (wbc->range_cyclic)
++ range_cyclic = wbc->range_cyclic;
++ if (wbc->range_cyclic) {
+ index = mapping->writeback_index;
+- else
++ if (index)
++ cycled = 0;
++ wbc->range_start = index << PAGE_CACHE_SHIFT;
++ wbc->range_end = LLONG_MAX;
++ wbc->range_cyclic = 0;
++ } else
+ index = wbc->range_start >> PAGE_CACHE_SHIFT;
+
+ mpd.wbc = wbc;
+@@ -2509,6 +2516,7 @@ static int ext4_da_writepages(struct add
+ wbc->no_nrwrite_index_update = 1;
+ pages_skipped = wbc->pages_skipped;
+
++retry:
+ while (!ret && wbc->nr_to_write > 0) {
+
+ /*
+@@ -2563,6 +2571,7 @@ static int ext4_da_writepages(struct add
+ pages_written += mpd.pages_written;
+ wbc->pages_skipped = pages_skipped;
+ ret = 0;
++ io_done = 1;
+ } else if (wbc->nr_to_write)
+ /*
+ * There is no more writeout needed
+@@ -2571,6 +2580,13 @@ static int ext4_da_writepages(struct add
+ */
+ break;
+ }
++ if (!io_done && !cycled) {
++ cycled = 1;
++ index = 0;
++ wbc->range_start = index << PAGE_CACHE_SHIFT;
++ wbc->range_end = mapping->writeback_index - 1;
++ goto retry;
++ }
+ if (pages_skipped != wbc->pages_skipped)
+ printk(KERN_EMERG "This should not happen leaving %s "
+ "with nr_to_write = %ld ret = %d\n",
+@@ -2578,6 +2594,7 @@ static int ext4_da_writepages(struct add
+
+ /* Update index */
+ index += pages_written;
++ wbc->range_cyclic = range_cyclic;
+ if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
+ /*
+ * set the writeback_index so that range_cyclic
--- /dev/null
+From tytso@mit.edu Mon Apr 19 10:19:40 2010
+From: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+Date: Mon, 15 Mar 2010 20:25:55 -0400
+Subject: ext4: invalidate pages if delalloc block allocation fails.
+To: stable@kernel.org
+Cc: Ext4 Developers List <linux-ext4@vger.kernel.org>, "Theodore Ts'o" <tytso@mit.edu>, "Jayson R. King" <dev@jaysonking.com>, "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
+Message-ID: <1268699165-17461-2-git-send-email-tytso@mit.edu>
+
+
+From: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+
+commit c4a0c46ec92c194c873232b88debce4e1a448483 upstream.
+
+We are a bit agressive in invalidating all the pages. But
+it is ok because we really don't know why the block allocation
+failed and it is better to come of the writeback path
+so that user can look for more info.
+
+Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+Signed-off-by: Jayson R. King <dev@jaysonking.com>
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/ext4/inode.c | 85 ++++++++++++++++++++++++++++++++++++++++++++++++--------
+ 1 file changed, 73 insertions(+), 12 deletions(-)
+
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -1821,6 +1821,39 @@ static inline void __unmap_underlying_bl
+ unmap_underlying_metadata(bdev, bh->b_blocknr + i);
+ }
+
++static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd,
++ sector_t logical, long blk_cnt)
++{
++ int nr_pages, i;
++ pgoff_t index, end;
++ struct pagevec pvec;
++ struct inode *inode = mpd->inode;
++ struct address_space *mapping = inode->i_mapping;
++
++ index = logical >> (PAGE_CACHE_SHIFT - inode->i_blkbits);
++ end = (logical + blk_cnt - 1) >>
++ (PAGE_CACHE_SHIFT - inode->i_blkbits);
++ while (index <= end) {
++ nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE);
++ if (nr_pages == 0)
++ break;
++ for (i = 0; i < nr_pages; i++) {
++ struct page *page = pvec.pages[i];
++ index = page->index;
++ if (index > end)
++ break;
++ index++;
++
++ BUG_ON(!PageLocked(page));
++ BUG_ON(PageWriteback(page));
++ block_invalidatepage(page, 0);
++ ClearPageUptodate(page);
++ unlock_page(page);
++ }
++ }
++ return;
++}
++
+ /*
+ * mpage_da_map_blocks - go through given space
+ *
+@@ -1830,7 +1863,7 @@ static inline void __unmap_underlying_bl
+ * The function skips space we know is already mapped to disk blocks.
+ *
+ */
+-static void mpage_da_map_blocks(struct mpage_da_data *mpd)
++static int mpage_da_map_blocks(struct mpage_da_data *mpd)
+ {
+ int err = 0;
+ struct buffer_head *lbh = &mpd->lbh;
+@@ -1841,7 +1874,7 @@ static void mpage_da_map_blocks(struct m
+ * We consider only non-mapped and non-allocated blocks
+ */
+ if (buffer_mapped(lbh) && !buffer_delay(lbh))
+- return;
++ return 0;
+
+ new.b_state = lbh->b_state;
+ new.b_blocknr = 0;
+@@ -1852,10 +1885,38 @@ static void mpage_da_map_blocks(struct m
+ * to write simply return
+ */
+ if (!new.b_size)
+- return;
++ return 0;
+ err = mpd->get_block(mpd->inode, next, &new, 1);
+- if (err)
+- return;
++ if (err) {
++
++ /* If get block returns with error
++ * we simply return. Later writepage
++ * will redirty the page and writepages
++ * will find the dirty page again
++ */
++ if (err == -EAGAIN)
++ return 0;
++ /*
++ * get block failure will cause us
++ * to loop in writepages. Because
++ * a_ops->writepage won't be able to
++ * make progress. The page will be redirtied
++ * by writepage and writepages will again
++ * try to write the same.
++ */
++ printk(KERN_EMERG "%s block allocation failed for inode %lu "
++ "at logical offset %llu with max blocks "
++ "%zd with error %d\n",
++ __func__, mpd->inode->i_ino,
++ (unsigned long long)next,
++ lbh->b_size >> mpd->inode->i_blkbits, err);
++ printk(KERN_EMERG "This should not happen.!! "
++ "Data will be lost\n");
++ /* invlaidate all the pages */
++ ext4_da_block_invalidatepages(mpd, next,
++ lbh->b_size >> mpd->inode->i_blkbits);
++ return err;
++ }
+ BUG_ON(new.b_size == 0);
+
+ if (buffer_new(&new))
+@@ -1868,7 +1929,7 @@ static void mpage_da_map_blocks(struct m
+ if (buffer_delay(lbh) || buffer_unwritten(lbh))
+ mpage_put_bnr_to_bhs(mpd, next, &new);
+
+- return;
++ return 0;
+ }
+
+ #define BH_FLAGS ((1 << BH_Uptodate) | (1 << BH_Mapped) | \
+@@ -1937,8 +1998,8 @@ flush_it:
+ * We couldn't merge the block to our extent, so we
+ * need to flush current extent and start new one
+ */
+- mpage_da_map_blocks(mpd);
+- mpage_da_submit_io(mpd);
++ if (mpage_da_map_blocks(mpd) == 0)
++ mpage_da_submit_io(mpd);
+ mpd->io_done = 1;
+ return;
+ }
+@@ -1980,8 +2041,8 @@ static int __mpage_da_writepage(struct p
+ * and start IO on them using writepage()
+ */
+ if (mpd->next_page != mpd->first_page) {
+- mpage_da_map_blocks(mpd);
+- mpage_da_submit_io(mpd);
++ if (mpage_da_map_blocks(mpd) == 0)
++ mpage_da_submit_io(mpd);
+ /*
+ * skip rest of the page in the page_vec
+ */
+@@ -2102,8 +2163,8 @@ static int mpage_da_writepages(struct ad
+ * Handle last extent of pages
+ */
+ if (!mpd.io_done && mpd.next_page != mpd.first_page) {
+- mpage_da_map_blocks(&mpd);
+- mpage_da_submit_io(&mpd);
++ if (mpage_da_map_blocks(&mpd) == 0)
++ mpage_da_submit_io(&mpd);
+ }
+
+ wbc->nr_to_write = to_write - mpd.pages_written;
--- /dev/null
+From tytso@mit.edu Mon Apr 19 10:20:41 2010
+From: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+Date: Mon, 15 Mar 2010 20:25:57 -0400
+Subject: ext4: Make sure all the block allocation paths reserve blocks
+To: stable@kernel.org
+Cc: Ext4 Developers List <linux-ext4@vger.kernel.org>, "Theodore Ts'o" <tytso@mit.edu>, "Jayson R. King" <dev@jaysonking.com>, "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
+Message-ID: <1268699165-17461-4-git-send-email-tytso@mit.edu>
+
+
+From: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+
+commit a30d542a0035b886ffaafd0057ced0a2b28c3a4f upstream.
+
+With delayed allocation we need to make sure block are reserved before
+we attempt to allocate them. Otherwise we get block allocation failure
+(ENOSPC) during writepages which cannot be handled. This would mean
+silent data loss (We do a printk stating data will be lost). This patch
+updates the DIO and fallocate code path to do block reservation before
+block allocation. This is needed to make sure parallel DIO and fallocate
+request doesn't take block out of delayed reserve space.
+
+When free blocks count go below a threshold we switch to a slow patch
+which looks at other CPU's accumulated percpu counter values.
+
+Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Jayson R. King <dev@jaysonking.com>
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/ext4/balloc.c | 58 +++++++++++++++++++++++++++++++++++++++---------------
+ fs/ext4/ext4.h | 13 ++++++++++++
+ fs/ext4/inode.c | 5 ----
+ fs/ext4/mballoc.c | 23 ++++++++++++---------
+ 4 files changed, 69 insertions(+), 30 deletions(-)
+
+--- a/fs/ext4/balloc.c
++++ b/fs/ext4/balloc.c
+@@ -1754,6 +1754,32 @@ out:
+ return ret;
+ }
+
++int ext4_claim_free_blocks(struct ext4_sb_info *sbi,
++ ext4_fsblk_t nblocks)
++{
++ s64 free_blocks;
++ ext4_fsblk_t root_blocks = 0;
++ struct percpu_counter *fbc = &sbi->s_freeblocks_counter;
++
++ free_blocks = percpu_counter_read(fbc);
++
++ if (!capable(CAP_SYS_RESOURCE) &&
++ sbi->s_resuid != current->fsuid &&
++ (sbi->s_resgid == 0 || !in_group_p(sbi->s_resgid)))
++ root_blocks = ext4_r_blocks_count(sbi->s_es);
++
++ if (free_blocks - (nblocks + root_blocks) < EXT4_FREEBLOCKS_WATERMARK)
++ free_blocks = percpu_counter_sum(&sbi->s_freeblocks_counter);
++
++ if (free_blocks < (root_blocks + nblocks))
++ /* we don't have free space */
++ return -ENOSPC;
++
++ /* reduce fs free blocks counter */
++ percpu_counter_sub(fbc, nblocks);
++ return 0;
++}
++
+ /**
+ * ext4_has_free_blocks()
+ * @sbi: in-core super block structure.
+@@ -1775,18 +1801,17 @@ ext4_fsblk_t ext4_has_free_blocks(struct
+ sbi->s_resuid != current->fsuid &&
+ (sbi->s_resgid == 0 || !in_group_p(sbi->s_resgid)))
+ root_blocks = ext4_r_blocks_count(sbi->s_es);
+-#ifdef CONFIG_SMP
+- if (free_blocks - root_blocks < FBC_BATCH)
+- free_blocks =
+- percpu_counter_sum(&sbi->s_freeblocks_counter);
+-#endif
++
++ if (free_blocks - (nblocks + root_blocks) < EXT4_FREEBLOCKS_WATERMARK)
++ free_blocks = percpu_counter_sum_positive(&sbi->s_freeblocks_counter);
++
+ if (free_blocks <= root_blocks)
+ /* we don't have free space */
+ return 0;
+ if (free_blocks - root_blocks < nblocks)
+ return free_blocks - root_blocks;
+ return nblocks;
+- }
++}
+
+
+ /**
+@@ -1865,14 +1890,11 @@ ext4_fsblk_t ext4_old_new_blocks(handle_
+ /*
+ * With delalloc we already reserved the blocks
+ */
+- *count = ext4_has_free_blocks(sbi, *count);
+- }
+- if (*count == 0) {
+- *errp = -ENOSPC;
+- return 0; /*return with ENOSPC error */
++ if (ext4_claim_free_blocks(sbi, *count)) {
++ *errp = -ENOSPC;
++ return 0; /*return with ENOSPC error */
++ }
+ }
+- num = *count;
+-
+ /*
+ * Check quota for allocation of this block.
+ */
+@@ -2067,9 +2089,13 @@ allocated:
+ le16_add_cpu(&gdp->bg_free_blocks_count, -num);
+ gdp->bg_checksum = ext4_group_desc_csum(sbi, group_no, gdp);
+ spin_unlock(sb_bgl_lock(sbi, group_no));
+- if (!EXT4_I(inode)->i_delalloc_reserved_flag)
+- percpu_counter_sub(&sbi->s_freeblocks_counter, num);
+-
++ if (!EXT4_I(inode)->i_delalloc_reserved_flag && (*count != num)) {
++ /*
++ * we allocated less blocks than we
++ * claimed. Add the difference back.
++ */
++ percpu_counter_add(&sbi->s_freeblocks_counter, *count - num);
++ }
+ if (sbi->s_log_groups_per_flex) {
+ ext4_group_t flex_group = ext4_flex_group(sbi, group_no);
+ spin_lock(sb_bgl_lock(sbi, flex_group));
+--- a/fs/ext4/ext4.h
++++ b/fs/ext4/ext4.h
+@@ -1015,6 +1015,8 @@ extern ext4_fsblk_t ext4_new_blocks(hand
+ unsigned long *count, int *errp);
+ extern ext4_fsblk_t ext4_old_new_blocks(handle_t *handle, struct inode *inode,
+ ext4_fsblk_t goal, unsigned long *count, int *errp);
++extern int ext4_claim_free_blocks(struct ext4_sb_info *sbi,
++ ext4_fsblk_t nblocks);
+ extern ext4_fsblk_t ext4_has_free_blocks(struct ext4_sb_info *sbi,
+ ext4_fsblk_t nblocks);
+ extern void ext4_free_blocks (handle_t *handle, struct inode *inode,
+@@ -1245,6 +1247,17 @@ do { \
+ __ext4_std_error((sb), __func__, (errno)); \
+ } while (0)
+
++#ifdef CONFIG_SMP
++/* Each CPU can accumulate FBC_BATCH blocks in their local
++ * counters. So we need to make sure we have free blocks more
++ * than FBC_BATCH * nr_cpu_ids. Also add a window of 4 times.
++ */
++#define EXT4_FREEBLOCKS_WATERMARK (4 * (FBC_BATCH * nr_cpu_ids))
++#else
++#define EXT4_FREEBLOCKS_WATERMARK 0
++#endif
++
++
+ /*
+ * Inodes and files operations
+ */
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -1564,13 +1564,10 @@ static int ext4_da_reserve_space(struct
+ md_needed = mdblocks - EXT4_I(inode)->i_reserved_meta_blocks;
+ total = md_needed + nrblocks;
+
+- if (ext4_has_free_blocks(sbi, total) < total) {
++ if (ext4_claim_free_blocks(sbi, total)) {
+ spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
+ return -ENOSPC;
+ }
+- /* reduce fs free blocks counter */
+- percpu_counter_sub(&sbi->s_freeblocks_counter, total);
+-
+ EXT4_I(inode)->i_reserved_data_blocks += nrblocks;
+ EXT4_I(inode)->i_reserved_meta_blocks = mdblocks;
+
+--- a/fs/ext4/mballoc.c
++++ b/fs/ext4/mballoc.c
+@@ -3194,9 +3194,15 @@ ext4_mb_mark_diskspace_used(struct ext4_
+ * at write_begin() time for delayed allocation
+ * do not double accounting
+ */
+- if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED))
+- percpu_counter_sub(&sbi->s_freeblocks_counter,
+- ac->ac_b_ex.fe_len);
++ if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED) &&
++ ac->ac_o_ex.fe_len != ac->ac_b_ex.fe_len) {
++ /*
++ * we allocated less blocks than we calimed
++ * Add the difference back
++ */
++ percpu_counter_add(&sbi->s_freeblocks_counter,
++ ac->ac_o_ex.fe_len - ac->ac_b_ex.fe_len);
++ }
+
+ if (sbi->s_log_groups_per_flex) {
+ ext4_group_t flex_group = ext4_flex_group(sbi,
+@@ -4649,14 +4655,11 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t
+ /*
+ * With delalloc we already reserved the blocks
+ */
+- ar->len = ext4_has_free_blocks(sbi, ar->len);
+- }
+-
+- if (ar->len == 0) {
+- *errp = -ENOSPC;
+- return 0;
++ if (ext4_claim_free_blocks(sbi, ar->len)) {
++ *errp = -ENOSPC;
++ return 0;
++ }
+ }
+-
+ while (ar->len && DQUOT_ALLOC_BLOCK(ar->inode, ar->len)) {
+ ar->flags |= EXT4_MB_HINT_NOPREALLOC;
+ ar->len--;
--- /dev/null
+From tytso@mit.edu Mon Apr 19 10:22:08 2010
+From: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+Date: Mon, 15 Mar 2010 20:26:00 -0400
+Subject: ext4: Retry block allocation if we have free blocks left
+To: stable@kernel.org
+Cc: "Theodore Ts'o" <tytso@mit.edu>, Ext4 Developers List <linux-ext4@vger.kernel.org>, Mingming Cao <cmm@us.ibm.com>, "Jayson R. King" <dev@jaysonking.com>, "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
+Message-ID: <1268699165-17461-7-git-send-email-tytso@mit.edu>
+
+
+From: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+
+commit df22291ff0fde0d350cf15dac3e5cc33ac528875 upstream.
+
+When we truncate files, the meta-data blocks released are not reused
+untill we commit the truncate transaction. That means delayed get_block
+request will return ENOSPC even if we have free blocks left. Force a
+journal commit and retry block allocation if we get ENOSPC with free
+blocks left.
+
+Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+Signed-off-by: Mingming Cao <cmm@us.ibm.com>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Jayson R. King <dev@jaysonking.com>
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/ext4/inode.c | 81 +++++++++++++++++++++++++++++++++++++++-----------------
+ 1 file changed, 57 insertions(+), 24 deletions(-)
+
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -1661,6 +1661,7 @@ struct mpage_da_data {
+ struct writeback_control *wbc;
+ int io_done;
+ long pages_written;
++ int retval;
+ };
+
+ /*
+@@ -1858,6 +1859,24 @@ static void ext4_da_block_invalidatepage
+ return;
+ }
+
++static void ext4_print_free_blocks(struct inode *inode)
++{
++ struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
++ printk(KERN_EMERG "Total free blocks count %lld\n",
++ ext4_count_free_blocks(inode->i_sb));
++ printk(KERN_EMERG "Free/Dirty block details\n");
++ printk(KERN_EMERG "free_blocks=%lld\n",
++ percpu_counter_sum(&sbi->s_freeblocks_counter));
++ printk(KERN_EMERG "dirty_blocks=%lld\n",
++ percpu_counter_sum(&sbi->s_dirtyblocks_counter));
++ printk(KERN_EMERG "Block reservation details\n");
++ printk(KERN_EMERG "i_reserved_data_blocks=%lu\n",
++ EXT4_I(inode)->i_reserved_data_blocks);
++ printk(KERN_EMERG "i_reserved_meta_blocks=%lu\n",
++ EXT4_I(inode)->i_reserved_meta_blocks);
++ return;
++}
++
+ /*
+ * mpage_da_map_blocks - go through given space
+ *
+@@ -1872,7 +1891,7 @@ static int mpage_da_map_blocks(struct m
+ int err = 0;
+ struct buffer_head new;
+ struct buffer_head *lbh = &mpd->lbh;
+- sector_t next = lbh->b_blocknr;
++ sector_t next;
+
+ /*
+ * We consider only non-mapped and non-allocated blocks
+@@ -1882,6 +1901,7 @@ static int mpage_da_map_blocks(struct m
+ new.b_state = lbh->b_state;
+ new.b_blocknr = 0;
+ new.b_size = lbh->b_size;
++ next = lbh->b_blocknr;
+ /*
+ * If we didn't accumulate anything
+ * to write simply return
+@@ -1898,6 +1918,13 @@ static int mpage_da_map_blocks(struct m
+ */
+ if (err == -EAGAIN)
+ return 0;
++
++ if (err == -ENOSPC &&
++ ext4_count_free_blocks(mpd->inode->i_sb)) {
++ mpd->retval = err;
++ return 0;
++ }
++
+ /*
+ * get block failure will cause us
+ * to loop in writepages. Because
+@@ -1915,8 +1942,7 @@ static int mpage_da_map_blocks(struct m
+ printk(KERN_EMERG "This should not happen.!! "
+ "Data will be lost\n");
+ if (err == -ENOSPC) {
+- printk(KERN_CRIT "Total free blocks count %lld\n",
+- ext4_count_free_blocks(mpd->inode->i_sb));
++ ext4_print_free_blocks(mpd->inode);
+ }
+ /* invlaidate all the pages */
+ ext4_da_block_invalidatepages(mpd, next,
+@@ -2141,39 +2167,36 @@ static int __mpage_da_writepage(struct p
+ */
+ static int mpage_da_writepages(struct address_space *mapping,
+ struct writeback_control *wbc,
+- get_block_t get_block)
++ struct mpage_da_data *mpd)
+ {
+- struct mpage_da_data mpd;
+ long to_write;
+ int ret;
+
+- if (!get_block)
++ if (!mpd->get_block)
+ return generic_writepages(mapping, wbc);
+
+- mpd.wbc = wbc;
+- mpd.inode = mapping->host;
+- mpd.lbh.b_size = 0;
+- mpd.lbh.b_state = 0;
+- mpd.lbh.b_blocknr = 0;
+- mpd.first_page = 0;
+- mpd.next_page = 0;
+- mpd.get_block = get_block;
+- mpd.io_done = 0;
+- mpd.pages_written = 0;
++ mpd->lbh.b_size = 0;
++ mpd->lbh.b_state = 0;
++ mpd->lbh.b_blocknr = 0;
++ mpd->first_page = 0;
++ mpd->next_page = 0;
++ mpd->io_done = 0;
++ mpd->pages_written = 0;
++ mpd->retval = 0;
+
+ to_write = wbc->nr_to_write;
+
+- ret = write_cache_pages(mapping, wbc, __mpage_da_writepage, &mpd);
++ ret = write_cache_pages(mapping, wbc, __mpage_da_writepage, mpd);
+
+ /*
+ * Handle last extent of pages
+ */
+- if (!mpd.io_done && mpd.next_page != mpd.first_page) {
+- if (mpage_da_map_blocks(&mpd) == 0)
+- mpage_da_submit_io(&mpd);
++ if (!mpd->io_done && mpd->next_page != mpd->first_page) {
++ if (mpage_da_map_blocks(mpd) == 0)
++ mpage_da_submit_io(mpd);
+ }
+
+- wbc->nr_to_write = to_write - mpd.pages_written;
++ wbc->nr_to_write = to_write - mpd->pages_written;
+ return ret;
+ }
+
+@@ -2420,6 +2443,7 @@ static int ext4_da_writepages(struct add
+ {
+ handle_t *handle = NULL;
+ loff_t range_start = 0;
++ struct mpage_da_data mpd;
+ struct inode *inode = mapping->host;
+ int needed_blocks, ret = 0, nr_to_writebump = 0;
+ long to_write, pages_skipped = 0;
+@@ -2467,6 +2491,9 @@ static int ext4_da_writepages(struct add
+ range_start = wbc->range_start;
+ pages_skipped = wbc->pages_skipped;
+
++ mpd.wbc = wbc;
++ mpd.inode = mapping->host;
++
+ restart_loop:
+ to_write = wbc->nr_to_write;
+ while (!ret && to_write > 0) {
+@@ -2502,11 +2529,17 @@ restart_loop:
+ goto out_writepages;
+ }
+ }
+-
+ to_write -= wbc->nr_to_write;
+- ret = mpage_da_writepages(mapping, wbc,
+- ext4_da_get_block_write);
++
++ mpd.get_block = ext4_da_get_block_write;
++ ret = mpage_da_writepages(mapping, wbc, &mpd);
++
+ ext4_journal_stop(handle);
++
++ if (mpd.retval == -ENOSPC)
++ jbd2_journal_force_commit_nested(sbi->s_journal);
++
++ /* reset the retry count */
+ if (ret == MPAGE_DA_EXTENT_TAIL) {
+ /*
+ * got one extent now try with
--- /dev/null
+From tytso@mit.edu Mon Apr 19 10:21:18 2010
+From: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+Date: Mon, 15 Mar 2010 20:25:59 -0400
+Subject: ext4: Retry block reservation
+To: stable@kernel.org
+Cc: "Theodore Ts'o" <tytso@mit.edu>, Ext4 Developers List <linux-ext4@vger.kernel.org>, Mingming Cao <cmm@us.ibm.com>, "Jayson R. King" <dev@jaysonking.com>, "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
+Message-ID: <1268699165-17461-6-git-send-email-tytso@mit.edu>
+
+
+From: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+
+commit 030ba6bc67b4f2bc5cd174f57785a1745c929abe upstream.
+
+During block reservation if we don't have enough blocks left, retry
+block reservation with smaller block counts. This makes sure we try
+fallocate and DIO with smaller request size and don't fail early. The
+delayed allocation reservation cannot try with smaller block count. So
+retry block reservation to handle temporary disk full conditions. Also
+print free blocks details if we fail block allocation during writepages.
+
+Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+Signed-off-by: Mingming Cao <cmm@us.ibm.com>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Jayson R. King <dev@jaysonking.com>
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/ext4/balloc.c | 8 +++++++-
+ fs/ext4/inode.c | 14 +++++++++++---
+ fs/ext4/mballoc.c | 7 ++++++-
+ 3 files changed, 24 insertions(+), 5 deletions(-)
+
+--- a/fs/ext4/balloc.c
++++ b/fs/ext4/balloc.c
+@@ -1907,10 +1907,16 @@ ext4_fsblk_t ext4_old_new_blocks(handle_
+ /*
+ * With delalloc we already reserved the blocks
+ */
+- if (ext4_claim_free_blocks(sbi, *count)) {
++ while (*count && ext4_claim_free_blocks(sbi, *count)) {
++ /* let others to free the space */
++ yield();
++ *count = *count >> 1;
++ }
++ if (!*count) {
+ *errp = -ENOSPC;
+ return 0; /*return with ENOSPC error */
+ }
++ num = *count;
+ }
+ /*
+ * Check quota for allocation of this block.
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -1549,6 +1549,7 @@ static int ext4_journalled_write_end(str
+
+ static int ext4_da_reserve_space(struct inode *inode, int nrblocks)
+ {
++ int retries = 0;
+ struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+ unsigned long md_needed, mdblocks, total = 0;
+
+@@ -1557,6 +1558,7 @@ static int ext4_da_reserve_space(struct
+ * in order to allocate nrblocks
+ * worse case is one extent per block
+ */
++repeat:
+ spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
+ total = EXT4_I(inode)->i_reserved_data_blocks + nrblocks;
+ mdblocks = ext4_calc_metadata_amount(inode, total);
+@@ -1567,6 +1569,10 @@ static int ext4_da_reserve_space(struct
+
+ if (ext4_claim_free_blocks(sbi, total)) {
+ spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
++ if (ext4_should_retry_alloc(inode->i_sb, &retries)) {
++ yield();
++ goto repeat;
++ }
+ return -ENOSPC;
+ }
+ EXT4_I(inode)->i_reserved_data_blocks += nrblocks;
+@@ -1864,20 +1870,18 @@ static void ext4_da_block_invalidatepage
+ static int mpage_da_map_blocks(struct mpage_da_data *mpd)
+ {
+ int err = 0;
++ struct buffer_head new;
+ struct buffer_head *lbh = &mpd->lbh;
+ sector_t next = lbh->b_blocknr;
+- struct buffer_head new;
+
+ /*
+ * We consider only non-mapped and non-allocated blocks
+ */
+ if (buffer_mapped(lbh) && !buffer_delay(lbh))
+ return 0;
+-
+ new.b_state = lbh->b_state;
+ new.b_blocknr = 0;
+ new.b_size = lbh->b_size;
+-
+ /*
+ * If we didn't accumulate anything
+ * to write simply return
+@@ -1910,6 +1914,10 @@ static int mpage_da_map_blocks(struct m
+ lbh->b_size >> mpd->inode->i_blkbits, err);
+ printk(KERN_EMERG "This should not happen.!! "
+ "Data will be lost\n");
++ if (err == -ENOSPC) {
++ printk(KERN_CRIT "Total free blocks count %lld\n",
++ ext4_count_free_blocks(mpd->inode->i_sb));
++ }
+ /* invlaidate all the pages */
+ ext4_da_block_invalidatepages(mpd, next,
+ lbh->b_size >> mpd->inode->i_blkbits);
+--- a/fs/ext4/mballoc.c
++++ b/fs/ext4/mballoc.c
+@@ -4651,7 +4651,12 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t
+ /*
+ * With delalloc we already reserved the blocks
+ */
+- if (ext4_claim_free_blocks(sbi, ar->len)) {
++ while (ar->len && ext4_claim_free_blocks(sbi, ar->len)) {
++ /* let others to free the space */
++ yield();
++ ar->len = ar->len >> 1;
++ }
++ if (!ar->len) {
+ *errp = -ENOSPC;
+ return 0;
+ }
--- /dev/null
+From tytso@mit.edu Mon Apr 19 10:22:28 2010
+From: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+Date: Mon, 15 Mar 2010 20:26:01 -0400
+Subject: ext4: Use tag dirty lookup during mpage_da_submit_io
+To: stable@kernel.org
+Cc: Ext4 Developers List <linux-ext4@vger.kernel.org>, "Theodore Ts'o" <tytso@mit.edu>, "Jayson R. King" <dev@jaysonking.com>, "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
+Message-ID: <1268699165-17461-8-git-send-email-tytso@mit.edu>
+
+
+From: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+
+commit af6f029d3836eb7264cd3fbb13a6baf0e5fdb5ea upstream.
+
+This enables us to drop the range_cont writeback mode
+use from ext4_da_writepages.
+
+Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+Signed-off-by: Jayson R. King <dev@jaysonking.com>
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/ext4/inode.c | 30 +++++++++++++-----------------
+ 1 file changed, 13 insertions(+), 17 deletions(-)
+
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -1699,17 +1699,23 @@ static int mpage_da_submit_io(struct mpa
+
+ pagevec_init(&pvec, 0);
+ while (index <= end) {
+- nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE);
++ /*
++ * We can use PAGECACHE_TAG_DIRTY lookup here because
++ * even though we have cleared the dirty flag on the page
++ * We still keep the page in the radix tree with tag
++ * PAGECACHE_TAG_DIRTY. See clear_page_dirty_for_io.
++ * The PAGECACHE_TAG_DIRTY is cleared in set_page_writeback
++ * which is called via the below writepage callback.
++ */
++ nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
++ PAGECACHE_TAG_DIRTY,
++ min(end - index,
++ (pgoff_t)PAGEVEC_SIZE-1) + 1);
+ if (nr_pages == 0)
+ break;
+ for (i = 0; i < nr_pages; i++) {
+ struct page *page = pvec.pages[i];
+
+- index = page->index;
+- if (index > end)
+- break;
+- index++;
+-
+ BUG_ON(!PageLocked(page));
+ BUG_ON(PageWriteback(page));
+
+@@ -2442,7 +2448,6 @@ static int ext4_da_writepages(struct add
+ struct writeback_control *wbc)
+ {
+ handle_t *handle = NULL;
+- loff_t range_start = 0;
+ struct mpage_da_data mpd;
+ struct inode *inode = mapping->host;
+ int needed_blocks, ret = 0, nr_to_writebump = 0;
+@@ -2481,14 +2486,7 @@ static int ext4_da_writepages(struct add
+ wbc->nr_to_write = sbi->s_mb_stream_request;
+ }
+
+- if (!wbc->range_cyclic)
+- /*
+- * If range_cyclic is not set force range_cont
+- * and save the old writeback_index
+- */
+- wbc->range_cont = 1;
+
+- range_start = wbc->range_start;
+ pages_skipped = wbc->pages_skipped;
+
+ mpd.wbc = wbc;
+@@ -2559,9 +2557,8 @@ restart_loop:
+ wbc->nr_to_write = to_write;
+ }
+
+- if (wbc->range_cont && (pages_skipped != wbc->pages_skipped)) {
++ if (!wbc->range_cyclic && (pages_skipped != wbc->pages_skipped)) {
+ /* We skipped pages in this loop */
+- wbc->range_start = range_start;
+ wbc->nr_to_write = to_write +
+ wbc->pages_skipped - pages_skipped;
+ wbc->pages_skipped = pages_skipped;
+@@ -2570,7 +2567,6 @@ restart_loop:
+
+ out_writepages:
+ wbc->nr_to_write = to_write - nr_to_writebump;
+- wbc->range_start = range_start;
+ return ret;
+ }
+
--- /dev/null
+From tytso@mit.edu Mon Apr 19 10:20:04 2010
+From: Mingming Cao <cmm@us.ibm.com>
+Date: Mon, 15 Mar 2010 20:25:56 -0400
+Subject: percpu counter: clean up percpu_counter_sum_and_set()
+To: stable@kernel.org
+Cc: "Theodore Ts'o" <tytso@mit.edu>, Andrew Morton <akpm@linux-foundation.org>, Ext4 Developers List <linux-ext4@vger.kernel.org>, Mingming Cao <cmm@us.ibm.com>, "Jayson R. King" <dev@jaysonking.com>
+Message-ID: <1268699165-17461-3-git-send-email-tytso@mit.edu>
+
+
+From: Mingming Cao <cmm@us.ibm.com>
+
+commit 1f7c14c62ce63805f9574664a6c6de3633d4a354 upstream.
+
+percpu_counter_sum_and_set() and percpu_counter_sum() is the same except
+the former updates the global counter after accounting. Since we are
+taking the fbc->lock to calculate the precise value of the counter in
+percpu_counter_sum() anyway, it should simply set fbc->count too, as the
+percpu_counter_sum_and_set() does.
+
+This patch merges these two interfaces into one.
+
+Signed-off-by: Mingming Cao <cmm@us.ibm.com>
+Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
+Cc: <linux-ext4@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Jayson R. King <dev@jaysonking.com>
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+
+---
+ fs/ext4/balloc.c | 2 +-
+ include/linux/percpu_counter.h | 12 +++---------
+ lib/percpu_counter.c | 8 +++-----
+ 3 files changed, 7 insertions(+), 15 deletions(-)
+
+--- a/fs/ext4/balloc.c
++++ b/fs/ext4/balloc.c
+@@ -1778,7 +1778,7 @@ ext4_fsblk_t ext4_has_free_blocks(struct
+ #ifdef CONFIG_SMP
+ if (free_blocks - root_blocks < FBC_BATCH)
+ free_blocks =
+- percpu_counter_sum_and_set(&sbi->s_freeblocks_counter);
++ percpu_counter_sum(&sbi->s_freeblocks_counter);
+ #endif
+ if (free_blocks <= root_blocks)
+ /* we don't have free space */
+--- a/include/linux/percpu_counter.h
++++ b/include/linux/percpu_counter.h
+@@ -35,7 +35,7 @@ int percpu_counter_init_irq(struct percp
+ void percpu_counter_destroy(struct percpu_counter *fbc);
+ void percpu_counter_set(struct percpu_counter *fbc, s64 amount);
+ void __percpu_counter_add(struct percpu_counter *fbc, s64 amount, s32 batch);
+-s64 __percpu_counter_sum(struct percpu_counter *fbc, int set);
++s64 __percpu_counter_sum(struct percpu_counter *fbc);
+
+ static inline void percpu_counter_add(struct percpu_counter *fbc, s64 amount)
+ {
+@@ -44,19 +44,13 @@ static inline void percpu_counter_add(st
+
+ static inline s64 percpu_counter_sum_positive(struct percpu_counter *fbc)
+ {
+- s64 ret = __percpu_counter_sum(fbc, 0);
++ s64 ret = __percpu_counter_sum(fbc);
+ return ret < 0 ? 0 : ret;
+ }
+
+-static inline s64 percpu_counter_sum_and_set(struct percpu_counter *fbc)
+-{
+- return __percpu_counter_sum(fbc, 1);
+-}
+-
+-
+ static inline s64 percpu_counter_sum(struct percpu_counter *fbc)
+ {
+- return __percpu_counter_sum(fbc, 0);
++ return __percpu_counter_sum(fbc);
+ }
+
+ static inline s64 percpu_counter_read(struct percpu_counter *fbc)
+--- a/lib/percpu_counter.c
++++ b/lib/percpu_counter.c
+@@ -52,7 +52,7 @@ EXPORT_SYMBOL(__percpu_counter_add);
+ * Add up all the per-cpu counts, return the result. This is a more accurate
+ * but much slower version of percpu_counter_read_positive()
+ */
+-s64 __percpu_counter_sum(struct percpu_counter *fbc, int set)
++s64 __percpu_counter_sum(struct percpu_counter *fbc)
+ {
+ s64 ret;
+ int cpu;
+@@ -62,11 +62,9 @@ s64 __percpu_counter_sum(struct percpu_c
+ for_each_online_cpu(cpu) {
+ s32 *pcount = per_cpu_ptr(fbc->counters, cpu);
+ ret += *pcount;
+- if (set)
+- *pcount = 0;
++ *pcount = 0;
+ }
+- if (set)
+- fbc->count = ret;
++ fbc->count = ret;
+
+ spin_unlock(&fbc->lock);
+ return ret;
alsa-mixart-range-checking-proc-file.patch
+ext4-invalidate-pages-if-delalloc-block-allocation-fails.patch
+percpu-counter-clean-up-percpu_counter_sum_and_set.patch
+ext4-make-sure-all-the-block-allocation-paths-reserve-blocks.patch
+ext4-add-percpu-dirty-block-accounting.patch
+ext4-retry-block-reservation.patch
+ext4-retry-block-allocation-if-we-have-free-blocks-left.patch
+ext4-use-tag-dirty-lookup-during-mpage_da_submit_io.patch
+vfs-remove-the-range_cont-writeback-mode.patch
+vfs-add-no_nrwrite_index_update-writeback-control-flag.patch
+ext4-fix-file-fragmentation-during-large-file-write.patch
+ext4-implement-range_cyclic-in-ext4_da_writepages-instead-of-write_cache_pages.patch
--- /dev/null
+From tytso@mit.edu Mon Apr 19 10:23:14 2010
+From: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+Date: Mon, 15 Mar 2010 20:26:03 -0400
+Subject: vfs: Add no_nrwrite_index_update writeback control flag
+To: stable@kernel.org
+Cc: linux-fsdevel@vger.kernel.org, Ext4 Developers List <linux-ext4@vger.kernel.org>, "Theodore Ts'o" <tytso@mit.edu>, "Jayson R. King" <dev@jaysonking.com>, "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
+Message-ID: <1268699165-17461-10-git-send-email-tytso@mit.edu>
+
+
+From: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+
+commit 17bc6c30cf6bfffd816bdc53682dd46fc34a2cf4 upstream.
+
+If no_nrwrite_index_update is set we don't update nr_to_write and
+address space writeback_index in write_cache_pages. This change
+enables a file system to skip these updates in write_cache_pages and do
+them in the writepages() callback. This patch will be followed by an
+ext4 patch that make use of these new flags.
+
+Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+CC: linux-fsdevel@vger.kernel.org
+[dev@jaysonking.com: Modified the patch to account for subsequent changes in mainline being cherry-picked earlier for 2.6.27.y.]
+Signed-off-by: Jayson R. King <dev@jaysonking.com>
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ include/linux/writeback.h | 9 +++++++++
+ mm/page-writeback.c | 14 +++++++++-----
+ 2 files changed, 18 insertions(+), 5 deletions(-)
+
+--- a/include/linux/writeback.h
++++ b/include/linux/writeback.h
+@@ -62,6 +62,15 @@ struct writeback_control {
+ unsigned for_writepages:1; /* This is a writepages() call */
+ unsigned range_cyclic:1; /* range_start is cyclic */
+ unsigned more_io:1; /* more io to be dispatched */
++ /*
++ * write_cache_pages() won't update wbc->nr_to_write and
++ * mapping->writeback_index if no_nrwrite_index_update
++ * is set. write_cache_pages() may write more than we
++ * requested and we want to make sure nr_to_write and
++ * writeback_index are updated in a consistent manner
++ * so we use a single control to update them
++ */
++ unsigned no_nrwrite_index_update:1;
+ };
+
+ /*
+--- a/mm/page-writeback.c
++++ b/mm/page-writeback.c
+@@ -878,6 +878,7 @@ int write_cache_pages(struct address_spa
+ pgoff_t done_index;
+ int cycled;
+ int range_whole = 0;
++ long nr_to_write = wbc->nr_to_write;
+
+ if (wbc->nonblocking && bdi_write_congested(bdi)) {
+ wbc->encountered_congestion = 1;
+@@ -985,9 +986,9 @@ continue_unlock:
+ }
+ }
+
+- if (wbc->nr_to_write > 0) {
+- wbc->nr_to_write--;
+- if (wbc->nr_to_write == 0 &&
++ if (nr_to_write > 0) {
++ nr_to_write--;
++ if (nr_to_write == 0 &&
+ wbc->sync_mode == WB_SYNC_NONE) {
+ /*
+ * We stop writing back only if we are
+@@ -1024,8 +1025,11 @@ continue_unlock:
+ end = writeback_index - 1;
+ goto retry;
+ }
+- if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
+- mapping->writeback_index = done_index;
++ if (!wbc->no_nrwrite_index_update) {
++ if (wbc->range_cyclic || (range_whole && nr_to_write > 0))
++ mapping->writeback_index = done_index;
++ wbc->nr_to_write = nr_to_write;
++ }
+
+ return ret;
+ }
--- /dev/null
+From tytso@mit.edu Mon Apr 19 10:22:47 2010
+From: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+Date: Mon, 15 Mar 2010 20:26:02 -0400
+Subject: vfs: Remove the range_cont writeback mode.
+To: stable@kernel.org
+Cc: linux-fsdevel@vger.kernel.org, Ext4 Developers List <linux-ext4@vger.kernel.org>, "Theodore Ts'o" <tytso@mit.edu>, "Jayson R. King" <dev@jaysonking.com>, "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
+Message-ID: <1268699165-17461-9-git-send-email-tytso@mit.edu>
+
+
+From: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+
+commit 74baaaaec8b4f22e1ae279f5ecca4ff705b28912 upstream.
+
+Ext4 was the only user of range_cont writeback mode and ext4 switched
+to a different method. So remove the range_cont mode which is not used
+in the kernel.
+
+Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+CC: linux-fsdevel@vger.kernel.org
+Signed-off-by: Jayson R. King <dev@jaysonking.com>
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ include/linux/writeback.h | 1 -
+ mm/page-writeback.c | 2 --
+ 2 files changed, 3 deletions(-)
+
+--- a/include/linux/writeback.h
++++ b/include/linux/writeback.h
+@@ -62,7 +62,6 @@ struct writeback_control {
+ unsigned for_writepages:1; /* This is a writepages() call */
+ unsigned range_cyclic:1; /* range_start is cyclic */
+ unsigned more_io:1; /* more io to be dispatched */
+- unsigned range_cont:1;
+ };
+
+ /*
+--- a/mm/page-writeback.c
++++ b/mm/page-writeback.c
+@@ -1027,8 +1027,6 @@ continue_unlock:
+ if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
+ mapping->writeback_index = done_index;
+
+- if (wbc->range_cont)
+- wbc->range_start = index << PAGE_CACHE_SHIFT;
+ return ret;
+ }
+ EXPORT_SYMBOL(write_cache_pages);