From: Greg Kroah-Hartman Date: Mon, 8 Oct 2012 19:41:11 +0000 (-0700) Subject: 3.6-stable patches X-Git-Tag: v3.0.46~39 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=a195d12b3ec6331f3cd0810a5905f202c6badd90;p=thirdparty%2Fkernel%2Fstable-queue.git 3.6-stable patches added patches: ext4-avoid-duplicate-writes-of-the-backup-bg-descriptor-blocks.patch ext4-don-t-copy-non-existent-gdt-blocks-when-resizing.patch ext4-fix-potential-deadlock-in-ext4_nonda_switch.patch ext4-ignore-last-group-w-o-enough-space-when-resizing-instead-of-bug-ing.patch --- diff --git a/queue-3.6/ext4-avoid-duplicate-writes-of-the-backup-bg-descriptor-blocks.patch b/queue-3.6/ext4-avoid-duplicate-writes-of-the-backup-bg-descriptor-blocks.patch new file mode 100644 index 00000000000..b20707a409f --- /dev/null +++ b/queue-3.6/ext4-avoid-duplicate-writes-of-the-backup-bg-descriptor-blocks.patch @@ -0,0 +1,43 @@ +From 2ebd1704ded88a8ae29b5f3998b13959c715c4be Mon Sep 17 00:00:00 2001 +From: Yongqiang Yang +Date: Wed, 5 Sep 2012 01:27:50 -0400 +Subject: ext4: avoid duplicate writes of the backup bg descriptor blocks + +From: Yongqiang Yang + +commit 2ebd1704ded88a8ae29b5f3998b13959c715c4be upstream. + +The resize code was needlessly writing the backup block group +descriptor blocks multiple times (once per block group) during an +online resize. + +Signed-off-by: Yongqiang Yang +Signed-off-by: "Theodore Ts'o" +Signed-off-by: Greg Kroah-Hartman + +--- + fs/ext4/resize.c | 10 ++++++---- + 1 file changed, 6 insertions(+), 4 deletions(-) + +--- a/fs/ext4/resize.c ++++ b/fs/ext4/resize.c +@@ -1358,13 +1358,15 @@ exit_journal: + err = err2; + + if (!err) { +- int i; ++ int gdb_num = group / EXT4_DESC_PER_BLOCK(sb); ++ int gdb_num_end = ((group + flex_gd->count - 1) / ++ EXT4_DESC_PER_BLOCK(sb)); ++ + update_backups(sb, sbi->s_sbh->b_blocknr, (char *)es, + sizeof(struct ext4_super_block)); +- for (i = 0; i < flex_gd->count; i++, group++) { ++ for (; gdb_num <= gdb_num_end; gdb_num++) { + struct buffer_head *gdb_bh; +- int gdb_num; +- gdb_num = group / EXT4_BLOCKS_PER_GROUP(sb); ++ + gdb_bh = sbi->s_group_desc[gdb_num]; + update_backups(sb, gdb_bh->b_blocknr, gdb_bh->b_data, + gdb_bh->b_size); diff --git a/queue-3.6/ext4-don-t-copy-non-existent-gdt-blocks-when-resizing.patch b/queue-3.6/ext4-don-t-copy-non-existent-gdt-blocks-when-resizing.patch new file mode 100644 index 00000000000..76c33f3ee19 --- /dev/null +++ b/queue-3.6/ext4-don-t-copy-non-existent-gdt-blocks-when-resizing.patch @@ -0,0 +1,44 @@ +From 6df935ad2fced9033ab52078825fcaf6365f34b7 Mon Sep 17 00:00:00 2001 +From: Yongqiang Yang +Date: Wed, 5 Sep 2012 01:25:50 -0400 +Subject: ext4: don't copy non-existent gdt blocks when resizing + +From: Yongqiang Yang + +commit 6df935ad2fced9033ab52078825fcaf6365f34b7 upstream. + +The resize code was copying blocks at the beginning of each block +group in order to copy the superblock and block group descriptor table +(gdt) blocks. This was, unfortunately, being done even for block +groups that did not have super blocks or gdt blocks. This is a +complete waste of perfectly good I/O bandwidth, to skip writing those +blocks for sparse bg's. + +Signed-off-by: Yongqiang Yang +Signed-off-by: "Theodore Ts'o" +Signed-off-by: Greg Kroah-Hartman + +--- + fs/ext4/resize.c | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/fs/ext4/resize.c ++++ b/fs/ext4/resize.c +@@ -456,6 +456,9 @@ static int setup_new_flex_group_blocks(s + gdblocks = ext4_bg_num_gdb(sb, group); + start = ext4_group_first_block_no(sb, group); + ++ if (!ext4_bg_has_super(sb, group)) ++ goto handle_itb; ++ + /* Copy all of the GDT blocks into the backup in this group */ + for (j = 0, block = start + 1; j < gdblocks; j++, block++) { + struct buffer_head *gdb; +@@ -498,6 +501,7 @@ static int setup_new_flex_group_blocks(s + goto out; + } + ++handle_itb: + /* Initialize group tables of the grop @group */ + if (!(bg_flags[i] & EXT4_BG_INODE_ZEROED)) + goto handle_bb; diff --git a/queue-3.6/ext4-fix-potential-deadlock-in-ext4_nonda_switch.patch b/queue-3.6/ext4-fix-potential-deadlock-in-ext4_nonda_switch.patch new file mode 100644 index 00000000000..05c6b68c6f3 --- /dev/null +++ b/queue-3.6/ext4-fix-potential-deadlock-in-ext4_nonda_switch.patch @@ -0,0 +1,124 @@ +From 00d4e7362ed01987183e9528295de3213031309c Mon Sep 17 00:00:00 2001 +From: Theodore Ts'o +Date: Wed, 19 Sep 2012 22:42:36 -0400 +Subject: ext4: fix potential deadlock in ext4_nonda_switch() + +From: Theodore Ts'o + +commit 00d4e7362ed01987183e9528295de3213031309c upstream. + +In ext4_nonda_switch(), if the file system is getting full we used to +call writeback_inodes_sb_if_idle(). The problem is that we can be +holding i_mutex already, and this causes a potential deadlock when +writeback_inodes_sb_if_idle() when it tries to take s_umount. (See +lockdep output below). + +As it turns out we don't need need to hold s_umount; the fact that we +are in the middle of the write(2) system call will keep the superblock +pinned. Unfortunately writeback_inodes_sb() checks to make sure +s_umount is taken, and the VFS uses a different mechanism for making +sure the file system doesn't get unmounted out from under us. The +simplest way of dealing with this is to just simply grab s_umount +using a trylock, and skip kicking the writeback flusher thread in the +very unlikely case that we can't take a read lock on s_umount without +blocking. + +Also, we now check the cirteria for kicking the writeback thread +before we decide to whether to fall back to non-delayed writeback, so +if there are any outstanding delayed allocation writes, we try to get +them resolved as soon as possible. + + [ INFO: possible circular locking dependency detected ] + 3.6.0-rc1-00042-gce894ca #367 Not tainted + ------------------------------------------------------- + dd/8298 is trying to acquire lock: + (&type->s_umount_key#18){++++..}, at: [] writeback_inodes_sb_if_idle+0x28/0x46 + + but task is already holding lock: + (&sb->s_type->i_mutex_key#8){+.+...}, at: [] generic_file_aio_write+0x5f/0xd3 + + which lock already depends on the new lock. + + 2 locks held by dd/8298: + #0: (sb_writers#2){.+.+.+}, at: [] generic_file_aio_write+0x56/0xd3 + #1: (&sb->s_type->i_mutex_key#8){+.+...}, at: [] generic_file_aio_write+0x5f/0xd3 + + stack backtrace: + Pid: 8298, comm: dd Not tainted 3.6.0-rc1-00042-gce894ca #367 + Call Trace: + [] ? console_unlock+0x345/0x372 + [] print_circular_bug+0x190/0x19d + [] __lock_acquire+0x86d/0xb6c + [] ? mark_held_locks+0x5c/0x7b + [] lock_acquire+0x66/0xb9 + [] ? writeback_inodes_sb_if_idle+0x28/0x46 + [] down_read+0x28/0x58 + [] ? writeback_inodes_sb_if_idle+0x28/0x46 + [] writeback_inodes_sb_if_idle+0x28/0x46 + [] ext4_nonda_switch+0xe1/0xf4 + [] ext4_da_write_begin+0x27/0x193 + [] generic_file_buffered_write+0xc8/0x1bb + [] __generic_file_aio_write+0x1dd/0x205 + [] generic_file_aio_write+0x78/0xd3 + [] ext4_file_write+0x480/0x4a6 + [] ? __lock_acquire+0x41e/0xb6c + [] ? sched_clock_cpu+0x11a/0x13e + [] ? trace_hardirqs_off+0xb/0xd + [] ? local_clock+0x37/0x4e + [] do_sync_write+0x67/0x9d + [] ? wait_on_retry_sync_kiocb+0x44/0x44 + [] vfs_write+0x7b/0xe6 + [] sys_write+0x3b/0x64 + [] syscall_call+0x7/0xb + +Signed-off-by: "Theodore Ts'o" +Signed-off-by: Greg Kroah-Hartman + +--- + fs/ext4/inode.c | 17 ++++++++++------- + fs/fs-writeback.c | 1 + + 2 files changed, 11 insertions(+), 7 deletions(-) + +--- a/fs/ext4/inode.c ++++ b/fs/ext4/inode.c +@@ -2463,6 +2463,16 @@ static int ext4_nonda_switch(struct supe + free_blocks = EXT4_C2B(sbi, + percpu_counter_read_positive(&sbi->s_freeclusters_counter)); + dirty_blocks = percpu_counter_read_positive(&sbi->s_dirtyclusters_counter); ++ /* ++ * Start pushing delalloc when 1/2 of free blocks are dirty. ++ */ ++ if (dirty_blocks && (free_blocks < 2 * dirty_blocks) && ++ !writeback_in_progress(sb->s_bdi) && ++ down_read_trylock(&sb->s_umount)) { ++ writeback_inodes_sb(sb, WB_REASON_FS_FREE_SPACE); ++ up_read(&sb->s_umount); ++ } ++ + if (2 * free_blocks < 3 * dirty_blocks || + free_blocks < (dirty_blocks + EXT4_FREECLUSTERS_WATERMARK)) { + /* +@@ -2471,13 +2481,6 @@ static int ext4_nonda_switch(struct supe + */ + return 1; + } +- /* +- * Even if we don't switch but are nearing capacity, +- * start pushing delalloc when 1/2 of free blocks are dirty. +- */ +- if (free_blocks < 2 * dirty_blocks) +- writeback_inodes_sb_if_idle(sb, WB_REASON_FS_FREE_SPACE); +- + return 0; + } + +--- a/fs/fs-writeback.c ++++ b/fs/fs-writeback.c +@@ -63,6 +63,7 @@ int writeback_in_progress(struct backing + { + return test_bit(BDI_writeback_running, &bdi->state); + } ++EXPORT_SYMBOL(writeback_in_progress); + + static inline struct backing_dev_info *inode_to_bdi(struct inode *inode) + { diff --git a/queue-3.6/ext4-ignore-last-group-w-o-enough-space-when-resizing-instead-of-bug-ing.patch b/queue-3.6/ext4-ignore-last-group-w-o-enough-space-when-resizing-instead-of-bug-ing.patch new file mode 100644 index 00000000000..acbeb802109 --- /dev/null +++ b/queue-3.6/ext4-ignore-last-group-w-o-enough-space-when-resizing-instead-of-bug-ing.patch @@ -0,0 +1,71 @@ +From 03c1c29053f678234dbd51bf3d65f3b7529021de Mon Sep 17 00:00:00 2001 +From: Yongqiang Yang +Date: Wed, 5 Sep 2012 01:21:50 -0400 +Subject: ext4: ignore last group w/o enough space when resizing instead of BUG'ing + +From: Yongqiang Yang + +commit 03c1c29053f678234dbd51bf3d65f3b7529021de upstream. + +If the last group does not have enough space for group tables, ignore +it instead of calling BUG_ON(). + +Reported-by: Daniel Drake +Signed-off-by: Yongqiang Yang +Signed-off-by: "Theodore Ts'o" +Signed-off-by: Greg Kroah-Hartman + +--- + fs/ext4/resize.c | 12 +++++++++--- + 1 file changed, 9 insertions(+), 3 deletions(-) + +--- a/fs/ext4/resize.c ++++ b/fs/ext4/resize.c +@@ -200,8 +200,11 @@ static void free_flex_gd(struct ext4_new + * be a partial of a flex group. + * + * @sb: super block of fs to which the groups belongs ++ * ++ * Returns 0 on a successful allocation of the metadata blocks in the ++ * block group. + */ +-static void ext4_alloc_group_tables(struct super_block *sb, ++static int ext4_alloc_group_tables(struct super_block *sb, + struct ext4_new_flex_group_data *flex_gd, + int flexbg_size) + { +@@ -226,6 +229,8 @@ static void ext4_alloc_group_tables(stru + (last_group & ~(flexbg_size - 1)))); + next_group: + group = group_data[0].group; ++ if (src_group >= group_data[0].group + flex_gd->count) ++ return -ENOSPC; + start_blk = ext4_group_first_block_no(sb, src_group); + last_blk = start_blk + group_data[src_group - group].blocks_count; + +@@ -235,7 +240,6 @@ next_group: + + start_blk += overhead; + +- BUG_ON(src_group >= group_data[0].group + flex_gd->count); + /* We collect contiguous blocks as much as possible. */ + src_group++; + for (; src_group <= last_group; src_group++) +@@ -300,6 +304,7 @@ next_group: + group_data[i].free_blocks_count); + } + } ++ return 0; + } + + static struct buffer_head *bclean(handle_t *handle, struct super_block *sb, +@@ -1729,7 +1734,8 @@ int ext4_resize_fs(struct super_block *s + */ + while (ext4_setup_next_flex_gd(sb, flex_gd, n_blocks_count, + flexbg_size)) { +- ext4_alloc_group_tables(sb, flex_gd, flexbg_size); ++ if (ext4_alloc_group_tables(sb, flex_gd, flexbg_size) != 0) ++ break; + err = ext4_flex_group_add(sb, resize_inode, flex_gd); + if (unlikely(err)) + break; diff --git a/queue-3.6/series b/queue-3.6/series index d45de26ca4e..806edd1464d 100644 --- a/queue-3.6/series +++ b/queue-3.6/series @@ -43,3 +43,7 @@ scsi-zfcp-remove-invalid-reference-to-list-iterator-variable.patch scsi-zfcp-restore-refcount-check-on-port_remove.patch scsi-zfcp-only-access-zfcp_scsi_dev-for-valid-scsi_device.patch pci-check-p2p-bridge-for-invalid-secondary-subordinate-range.patch +ext4-ignore-last-group-w-o-enough-space-when-resizing-instead-of-bug-ing.patch +ext4-don-t-copy-non-existent-gdt-blocks-when-resizing.patch +ext4-avoid-duplicate-writes-of-the-backup-bg-descriptor-blocks.patch +ext4-fix-potential-deadlock-in-ext4_nonda_switch.patch