From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 8 Oct 2012 19:41:11 +0000 (-0700)
Subject: 3.6-stable patches
X-Git-Tag: v3.0.46~39
X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=a195d12b3ec6331f3cd0810a5905f202c6badd90;p=thirdparty%2Fkernel%2Fstable-queue.git

3.6-stable patches

added patches:
	ext4-avoid-duplicate-writes-of-the-backup-bg-descriptor-blocks.patch
	ext4-don-t-copy-non-existent-gdt-blocks-when-resizing.patch
	ext4-fix-potential-deadlock-in-ext4_nonda_switch.patch
	ext4-ignore-last-group-w-o-enough-space-when-resizing-instead-of-bug-ing.patch
---

diff --git a/queue-3.6/ext4-avoid-duplicate-writes-of-the-backup-bg-descriptor-blocks.patch b/queue-3.6/ext4-avoid-duplicate-writes-of-the-backup-bg-descriptor-blocks.patch
new file mode 100644
index 00000000000..b20707a409f
--- /dev/null
+++ b/queue-3.6/ext4-avoid-duplicate-writes-of-the-backup-bg-descriptor-blocks.patch
@@ -0,0 +1,43 @@
+From 2ebd1704ded88a8ae29b5f3998b13959c715c4be Mon Sep 17 00:00:00 2001
+From: Yongqiang Yang <xiaoqiangnk@gmail.com>
+Date: Wed, 5 Sep 2012 01:27:50 -0400
+Subject: ext4: avoid duplicate writes of the backup bg descriptor blocks
+
+From: Yongqiang Yang <xiaoqiangnk@gmail.com>
+
+commit 2ebd1704ded88a8ae29b5f3998b13959c715c4be upstream.
+
+The resize code was needlessly writing the backup block group
+descriptor blocks multiple times (once per block group) during an
+online resize.
+
+Signed-off-by: Yongqiang Yang <xiaoqiangnk@gmail.com>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ext4/resize.c |   10 ++++++----
+ 1 file changed, 6 insertions(+), 4 deletions(-)
+
+--- a/fs/ext4/resize.c
++++ b/fs/ext4/resize.c
+@@ -1358,13 +1358,15 @@ exit_journal:
+ 		err = err2;
+ 
+ 	if (!err) {
+-		int i;
++		int gdb_num = group / EXT4_DESC_PER_BLOCK(sb);
++		int gdb_num_end = ((group + flex_gd->count - 1) /
++				   EXT4_DESC_PER_BLOCK(sb));
++
+ 		update_backups(sb, sbi->s_sbh->b_blocknr, (char *)es,
+ 			       sizeof(struct ext4_super_block));
+-		for (i = 0; i < flex_gd->count; i++, group++) {
++		for (; gdb_num <= gdb_num_end; gdb_num++) {
+ 			struct buffer_head *gdb_bh;
+-			int gdb_num;
+-			gdb_num = group / EXT4_BLOCKS_PER_GROUP(sb);
++
+ 			gdb_bh = sbi->s_group_desc[gdb_num];
+ 			update_backups(sb, gdb_bh->b_blocknr, gdb_bh->b_data,
+ 				       gdb_bh->b_size);
diff --git a/queue-3.6/ext4-don-t-copy-non-existent-gdt-blocks-when-resizing.patch b/queue-3.6/ext4-don-t-copy-non-existent-gdt-blocks-when-resizing.patch
new file mode 100644
index 00000000000..76c33f3ee19
--- /dev/null
+++ b/queue-3.6/ext4-don-t-copy-non-existent-gdt-blocks-when-resizing.patch
@@ -0,0 +1,44 @@
+From 6df935ad2fced9033ab52078825fcaf6365f34b7 Mon Sep 17 00:00:00 2001
+From: Yongqiang Yang <xiaoqiangnk@gmail.com>
+Date: Wed, 5 Sep 2012 01:25:50 -0400
+Subject: ext4: don't copy non-existent gdt blocks when resizing
+
+From: Yongqiang Yang <xiaoqiangnk@gmail.com>
+
+commit 6df935ad2fced9033ab52078825fcaf6365f34b7 upstream.
+
+The resize code was copying blocks at the beginning of each block
+group in order to copy the superblock and block group descriptor table
+(gdt) blocks.  This was, unfortunately, being done even for block
+groups that did not have super blocks or gdt blocks.  This is a
+complete waste of perfectly good I/O bandwidth, to skip writing those
+blocks for sparse bg's.
+
+Signed-off-by: Yongqiang Yang <xiaoqiangnk@gmail.com>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ext4/resize.c |    4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/fs/ext4/resize.c
++++ b/fs/ext4/resize.c
+@@ -456,6 +456,9 @@ static int setup_new_flex_group_blocks(s
+ 		gdblocks = ext4_bg_num_gdb(sb, group);
+ 		start = ext4_group_first_block_no(sb, group);
+ 
++		if (!ext4_bg_has_super(sb, group))
++			goto handle_itb;
++
+ 		/* Copy all of the GDT blocks into the backup in this group */
+ 		for (j = 0, block = start + 1; j < gdblocks; j++, block++) {
+ 			struct buffer_head *gdb;
+@@ -498,6 +501,7 @@ static int setup_new_flex_group_blocks(s
+ 				goto out;
+ 		}
+ 
++handle_itb:
+ 		/* Initialize group tables of the grop @group */
+ 		if (!(bg_flags[i] & EXT4_BG_INODE_ZEROED))
+ 			goto handle_bb;
diff --git a/queue-3.6/ext4-fix-potential-deadlock-in-ext4_nonda_switch.patch b/queue-3.6/ext4-fix-potential-deadlock-in-ext4_nonda_switch.patch
new file mode 100644
index 00000000000..05c6b68c6f3
--- /dev/null
+++ b/queue-3.6/ext4-fix-potential-deadlock-in-ext4_nonda_switch.patch
@@ -0,0 +1,124 @@
+From 00d4e7362ed01987183e9528295de3213031309c Mon Sep 17 00:00:00 2001
+From: Theodore Ts'o <tytso@mit.edu>
+Date: Wed, 19 Sep 2012 22:42:36 -0400
+Subject: ext4: fix potential deadlock in ext4_nonda_switch()
+
+From: Theodore Ts'o <tytso@mit.edu>
+
+commit 00d4e7362ed01987183e9528295de3213031309c upstream.
+
+In ext4_nonda_switch(), if the file system is getting full we used to
+call writeback_inodes_sb_if_idle().  The problem is that we can be
+holding i_mutex already, and this causes a potential deadlock when
+writeback_inodes_sb_if_idle() when it tries to take s_umount.  (See
+lockdep output below).
+
+As it turns out we don't need need to hold s_umount; the fact that we
+are in the middle of the write(2) system call will keep the superblock
+pinned.  Unfortunately writeback_inodes_sb() checks to make sure
+s_umount is taken, and the VFS uses a different mechanism for making
+sure the file system doesn't get unmounted out from under us.  The
+simplest way of dealing with this is to just simply grab s_umount
+using a trylock, and skip kicking the writeback flusher thread in the
+very unlikely case that we can't take a read lock on s_umount without
+blocking.
+
+Also, we now check the cirteria for kicking the writeback thread
+before we decide to whether to fall back to non-delayed writeback, so
+if there are any outstanding delayed allocation writes, we try to get
+them resolved as soon as possible.
+
+   [ INFO: possible circular locking dependency detected ]
+   3.6.0-rc1-00042-gce894ca #367 Not tainted
+   -------------------------------------------------------
+   dd/8298 is trying to acquire lock:
+    (&type->s_umount_key#18){++++..}, at: [<c02277d4>] writeback_inodes_sb_if_idle+0x28/0x46
+
+   but task is already holding lock:
+    (&sb->s_type->i_mutex_key#8){+.+...}, at: [<c01ddcce>] generic_file_aio_write+0x5f/0xd3
+
+   which lock already depends on the new lock.
+
+   2 locks held by dd/8298:
+    #0:  (sb_writers#2){.+.+.+}, at: [<c01ddcc5>] generic_file_aio_write+0x56/0xd3
+    #1:  (&sb->s_type->i_mutex_key#8){+.+...}, at: [<c01ddcce>] generic_file_aio_write+0x5f/0xd3
+
+   stack backtrace:
+   Pid: 8298, comm: dd Not tainted 3.6.0-rc1-00042-gce894ca #367
+   Call Trace:
+    [<c015b79c>] ? console_unlock+0x345/0x372
+    [<c06d62a1>] print_circular_bug+0x190/0x19d
+    [<c019906c>] __lock_acquire+0x86d/0xb6c
+    [<c01999db>] ? mark_held_locks+0x5c/0x7b
+    [<c0199724>] lock_acquire+0x66/0xb9
+    [<c02277d4>] ? writeback_inodes_sb_if_idle+0x28/0x46
+    [<c06db935>] down_read+0x28/0x58
+    [<c02277d4>] ? writeback_inodes_sb_if_idle+0x28/0x46
+    [<c02277d4>] writeback_inodes_sb_if_idle+0x28/0x46
+    [<c026f3b2>] ext4_nonda_switch+0xe1/0xf4
+    [<c0271ece>] ext4_da_write_begin+0x27/0x193
+    [<c01dcdb0>] generic_file_buffered_write+0xc8/0x1bb
+    [<c01ddc47>] __generic_file_aio_write+0x1dd/0x205
+    [<c01ddce7>] generic_file_aio_write+0x78/0xd3
+    [<c026d336>] ext4_file_write+0x480/0x4a6
+    [<c0198c1d>] ? __lock_acquire+0x41e/0xb6c
+    [<c0180944>] ? sched_clock_cpu+0x11a/0x13e
+    [<c01967e9>] ? trace_hardirqs_off+0xb/0xd
+    [<c018099f>] ? local_clock+0x37/0x4e
+    [<c0209f2c>] do_sync_write+0x67/0x9d
+    [<c0209ec5>] ? wait_on_retry_sync_kiocb+0x44/0x44
+    [<c020a7b9>] vfs_write+0x7b/0xe6
+    [<c020a9a6>] sys_write+0x3b/0x64
+    [<c06dd4bd>] syscall_call+0x7/0xb
+
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ext4/inode.c   |   17 ++++++++++-------
+ fs/fs-writeback.c |    1 +
+ 2 files changed, 11 insertions(+), 7 deletions(-)
+
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -2463,6 +2463,16 @@ static int ext4_nonda_switch(struct supe
+ 	free_blocks  = EXT4_C2B(sbi,
+ 		percpu_counter_read_positive(&sbi->s_freeclusters_counter));
+ 	dirty_blocks = percpu_counter_read_positive(&sbi->s_dirtyclusters_counter);
++	/*
++	 * Start pushing delalloc when 1/2 of free blocks are dirty.
++	 */
++	if (dirty_blocks && (free_blocks < 2 * dirty_blocks) &&
++	    !writeback_in_progress(sb->s_bdi) &&
++	    down_read_trylock(&sb->s_umount)) {
++		writeback_inodes_sb(sb, WB_REASON_FS_FREE_SPACE);
++		up_read(&sb->s_umount);
++	}
++
+ 	if (2 * free_blocks < 3 * dirty_blocks ||
+ 		free_blocks < (dirty_blocks + EXT4_FREECLUSTERS_WATERMARK)) {
+ 		/*
+@@ -2471,13 +2481,6 @@ static int ext4_nonda_switch(struct supe
+ 		 */
+ 		return 1;
+ 	}
+-	/*
+-	 * Even if we don't switch but are nearing capacity,
+-	 * start pushing delalloc when 1/2 of free blocks are dirty.
+-	 */
+-	if (free_blocks < 2 * dirty_blocks)
+-		writeback_inodes_sb_if_idle(sb, WB_REASON_FS_FREE_SPACE);
+-
+ 	return 0;
+ }
+ 
+--- a/fs/fs-writeback.c
++++ b/fs/fs-writeback.c
+@@ -63,6 +63,7 @@ int writeback_in_progress(struct backing
+ {
+ 	return test_bit(BDI_writeback_running, &bdi->state);
+ }
++EXPORT_SYMBOL(writeback_in_progress);
+ 
+ static inline struct backing_dev_info *inode_to_bdi(struct inode *inode)
+ {
diff --git a/queue-3.6/ext4-ignore-last-group-w-o-enough-space-when-resizing-instead-of-bug-ing.patch b/queue-3.6/ext4-ignore-last-group-w-o-enough-space-when-resizing-instead-of-bug-ing.patch
new file mode 100644
index 00000000000..acbeb802109
--- /dev/null
+++ b/queue-3.6/ext4-ignore-last-group-w-o-enough-space-when-resizing-instead-of-bug-ing.patch
@@ -0,0 +1,71 @@
+From 03c1c29053f678234dbd51bf3d65f3b7529021de Mon Sep 17 00:00:00 2001
+From: Yongqiang Yang <xiaoqiangnk@gmail.com>
+Date: Wed, 5 Sep 2012 01:21:50 -0400
+Subject: ext4: ignore last group w/o enough space when resizing instead of BUG'ing
+
+From: Yongqiang Yang <xiaoqiangnk@gmail.com>
+
+commit 03c1c29053f678234dbd51bf3d65f3b7529021de upstream.
+
+If the last group does not have enough space for group tables, ignore
+it instead of calling BUG_ON().
+
+Reported-by: Daniel Drake <dsd@laptop.org>
+Signed-off-by: Yongqiang Yang <xiaoqiangnk@gmail.com>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ext4/resize.c |   12 +++++++++---
+ 1 file changed, 9 insertions(+), 3 deletions(-)
+
+--- a/fs/ext4/resize.c
++++ b/fs/ext4/resize.c
+@@ -200,8 +200,11 @@ static void free_flex_gd(struct ext4_new
+  * be a partial of a flex group.
+  *
+  * @sb: super block of fs to which the groups belongs
++ *
++ * Returns 0 on a successful allocation of the metadata blocks in the
++ * block group.
+  */
+-static void ext4_alloc_group_tables(struct super_block *sb,
++static int ext4_alloc_group_tables(struct super_block *sb,
+ 				struct ext4_new_flex_group_data *flex_gd,
+ 				int flexbg_size)
+ {
+@@ -226,6 +229,8 @@ static void ext4_alloc_group_tables(stru
+ 	       (last_group & ~(flexbg_size - 1))));
+ next_group:
+ 	group = group_data[0].group;
++	if (src_group >= group_data[0].group + flex_gd->count)
++		return -ENOSPC;
+ 	start_blk = ext4_group_first_block_no(sb, src_group);
+ 	last_blk = start_blk + group_data[src_group - group].blocks_count;
+ 
+@@ -235,7 +240,6 @@ next_group:
+ 
+ 	start_blk += overhead;
+ 
+-	BUG_ON(src_group >= group_data[0].group + flex_gd->count);
+ 	/* We collect contiguous blocks as much as possible. */
+ 	src_group++;
+ 	for (; src_group <= last_group; src_group++)
+@@ -300,6 +304,7 @@ next_group:
+ 			       group_data[i].free_blocks_count);
+ 		}
+ 	}
++	return 0;
+ }
+ 
+ static struct buffer_head *bclean(handle_t *handle, struct super_block *sb,
+@@ -1729,7 +1734,8 @@ int ext4_resize_fs(struct super_block *s
+ 	 */
+ 	while (ext4_setup_next_flex_gd(sb, flex_gd, n_blocks_count,
+ 					      flexbg_size)) {
+-		ext4_alloc_group_tables(sb, flex_gd, flexbg_size);
++		if (ext4_alloc_group_tables(sb, flex_gd, flexbg_size) != 0)
++			break;
+ 		err = ext4_flex_group_add(sb, resize_inode, flex_gd);
+ 		if (unlikely(err))
+ 			break;
diff --git a/queue-3.6/series b/queue-3.6/series
index d45de26ca4e..806edd1464d 100644
--- a/queue-3.6/series
+++ b/queue-3.6/series
@@ -43,3 +43,7 @@ scsi-zfcp-remove-invalid-reference-to-list-iterator-variable.patch
 scsi-zfcp-restore-refcount-check-on-port_remove.patch
 scsi-zfcp-only-access-zfcp_scsi_dev-for-valid-scsi_device.patch
 pci-check-p2p-bridge-for-invalid-secondary-subordinate-range.patch
+ext4-ignore-last-group-w-o-enough-space-when-resizing-instead-of-bug-ing.patch
+ext4-don-t-copy-non-existent-gdt-blocks-when-resizing.patch
+ext4-avoid-duplicate-writes-of-the-backup-bg-descriptor-blocks.patch
+ext4-fix-potential-deadlock-in-ext4_nonda_switch.patch