lots of ext4 and some ext3 and one ext2 patch for 2.6.27

author Greg Kroah-Hartman <gregkh@suse.de>

Wed, 3 Dec 2008 18:55:16 +0000 (10:55 -0800)

committer Greg Kroah-Hartman <gregkh@suse.de>

Wed, 3 Dec 2008 18:55:16 +0000 (10:55 -0800)
author Greg Kroah-Hartman <gregkh@suse.de>
Wed, 3 Dec 2008 18:55:16 +0000 (10:55 -0800)
committer Greg Kroah-Hartman <gregkh@suse.de>
Wed, 3 Dec 2008 18:55:16 +0000 (10:55 -0800)
diff --git a/queue-2.6.27/ext2-fix-ext2-block-reservation-early-enospc-issue.patch b/queue-2.6.27/ext2-fix-ext2-block-reservation-early-enospc-issue.patch

new file mode 100644 (file)

index 0000000..517c1da
--- /dev/null
+++ b/queue-2.6.27/ext2-fix-ext2-block-reservation-early-enospc-issue.patch
@@ -0,0 +1,64 @@
+From d707d31c972b657dfc2efefd0b99cc4e14223dab Mon Sep 17 00:00:00 2001
+From: Mingming Cao <cmm@us.ibm.com>
+Date: Wed, 15 Oct 2008 22:04:01 -0700
+Subject: ext2: fix ext2 block reservation early ENOSPC issue
+
+From: Mingming Cao <cmm@us.ibm.com>
+
+commit d707d31c972b657dfc2efefd0b99cc4e14223dab upstream.
+
+We could run into ENOSPC error on ext2, even when there is free blocks on
+the filesystem.
+
+The problem is triggered in the case the goal block group has 0 free
+blocks , and the rest block groups are skipped due to the check of
+"free_blocks < windowsz/2".  Current code could fall back to non
+reservation allocation to prevent early ENOSPC after examing all the block
+groups with reservation on , but this code was bypassed if the reservation
+window is turned off already, which is true in this case.
+
+This patch fixed two issues:
+1) We don't need to turn off block reservation if the goal block group has
+0 free blocks left and continue search for the rest of block groups.
+
+Current code the intention is to turn off the block reservation if the
+goal allocation group has a few (some) free blocks left (not enough for
+make the desired reservation window),to try to allocation in the goal
+block group, to get better locality.  But if the goal blocks have 0 free
+blocks, it should leave the block reservation on, and continues search for
+the next block groups,rather than turn off block reservation completely.
+
+2) we don't need to check the window size if the block reservation is off.
+
+The problem was originally found and fixed in ext4.
+
+Signed-off-by: Mingming Cao <cmm@us.ibm.com>
+Cc: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Willy Tarreau <w@1wt.eu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/ext2/balloc.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/fs/ext2/balloc.c
++++ b/fs/ext2/balloc.c
+@@ -1295,6 +1295,7 @@ retry_alloc:
+        * turn off reservation for this allocation
+        */
+       if (my_rsv && (free_blocks < windowsz)
++              && (free_blocks > 0)
+               && (rsv_is_empty(&my_rsv->rsv_window)))
+               my_rsv = NULL;
+ 
+@@ -1332,7 +1333,7 @@ retry_alloc:
+                * free blocks is less than half of the reservation
+                * window size.
+                */
+-              if (free_blocks <= (windowsz/2))
++              if (my_rsv && (free_blocks <= (windowsz/2)))
+                       continue;
+ 
+               brelse(bitmap_bh);
diff --git a/queue-2.6.27/ext3-don-t-try-to-resize-if-there-are-no-reserved-gdt-blocks-left.patch b/queue-2.6.27/ext3-don-t-try-to-resize-if-there-are-no-reserved-gdt-blocks-left.patch

new file mode 100644 (file)

index 0000000..664a1cb
--- /dev/null
+++ b/queue-2.6.27/ext3-don-t-try-to-resize-if-there-are-no-reserved-gdt-blocks-left.patch
@@ -0,0 +1,40 @@
+From 972fbf779832e5ad15effa7712789aeff9224c37 Mon Sep 17 00:00:00 2001
+From: Josef Bacik <jbacik@redhat.com>
+Date: Sat, 18 Oct 2008 20:27:55 -0700
+Subject: ext3: don't try to resize if there are no reserved gdt blocks left
+
+From: Josef Bacik <jbacik@redhat.com>
+
+commit 972fbf779832e5ad15effa7712789aeff9224c37 upstream.
+
+When trying to resize a ext3 fs and you run out of reserved gdt blocks,
+you get an error that doesn't actually tell you what went wrong, it just
+says that the gdb it picked is not correct, which is the case since you
+don't have any reserved gdt blocks left.  This patch adds a check to make
+sure you have reserved gdt blocks to use, and if not prints out a more
+relevant error.
+
+Signed-off-by: Josef Bacik <jbacik@redhat.com>
+Cc: <linux-ext4@vger.kernel.org>
+Cc: Andreas Dilger <adilger@sun.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Willy Tarreau <w@1wt.eu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/ext3/resize.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/fs/ext3/resize.c
++++ b/fs/ext3/resize.c
+@@ -790,7 +790,8 @@ int ext3_group_add(struct super_block *s
+ 
+       if (reserved_gdb || gdb_off == 0) {
+               if (!EXT3_HAS_COMPAT_FEATURE(sb,
+-                                           EXT3_FEATURE_COMPAT_RESIZE_INODE)){
++                                           EXT3_FEATURE_COMPAT_RESIZE_INODE)
++                  || !le16_to_cpu(es->s_reserved_gdt_blocks)) {
+                       ext3_warning(sb, __func__,
+                                    "No reserved GDT blocks, can't resize");
+                       return -EPERM;
diff --git a/queue-2.6.27/ext3-fix-duplicate-entries-returned-from-getdents-system-call.patch b/queue-2.6.27/ext3-fix-duplicate-entries-returned-from-getdents-system-call.patch

new file mode 100644 (file)

index 0000000..2124aac
--- /dev/null
+++ b/queue-2.6.27/ext3-fix-duplicate-entries-returned-from-getdents-system-call.patch
@@ -0,0 +1,68 @@
+From 8c9fa93d51123c5540762b1a9e1919d6f9c4af7c Mon Sep 17 00:00:00 2001
+From: Theodore Ts'o <tytso@mit.edu>
+Date: Sat, 25 Oct 2008 11:38:37 -0400
+Subject: ext3: Fix duplicate entries returned from getdents() system call
+
+From: Theodore Ts'o <tytso@mit.edu>
+
+commit 8c9fa93d51123c5540762b1a9e1919d6f9c4af7c upstream.
+
+Fix a regression caused by commit 6a897cf4, "ext3: fix ext3_dx_readdir
+hash collision handling", where deleting files in a large directory
+(requiring more than one getdents system call), results in some
+filenames being returned twice.  This was caused by a failure to
+update info->curr_hash and info->curr_minor_hash, so that if the
+directory had gotten modified since the last getdents() system call
+(as would be the case if the user is running "rm -r" or "git clean"),
+a directory entry would get returned twice to the userspace.
+
+This patch fixes the bug reported by Markus Trippelsdorf at:
+http://bugzilla.kernel.org/show_bug.cgi?id=11844
+
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Tested-by: Markus Trippelsdorf <markus@trippelsdorf.de>
+Cc: Willy Tarreau <w@1wt.eu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/ext3/dir.c |   20 ++++++++------------
+ 1 file changed, 8 insertions(+), 12 deletions(-)
+
+--- a/fs/ext3/dir.c
++++ b/fs/ext3/dir.c
+@@ -456,17 +456,8 @@ static int ext3_dx_readdir(struct file *
+       if (info->extra_fname) {
+               if (call_filldir(filp, dirent, filldir, info->extra_fname))
+                       goto finished;
+-
+               info->extra_fname = NULL;
+-              info->curr_node = rb_next(info->curr_node);
+-              if (!info->curr_node) {
+-                      if (info->next_hash == ~0) {
+-                              filp->f_pos = EXT3_HTREE_EOF;
+-                              goto finished;
+-                      }
+-                      info->curr_hash = info->next_hash;
+-                      info->curr_minor_hash = 0;
+-              }
++              goto next_node;
+       } else if (!info->curr_node)
+               info->curr_node = rb_first(&info->root);
+ 
+@@ -498,9 +489,14 @@ static int ext3_dx_readdir(struct file *
+               info->curr_minor_hash = fname->minor_hash;
+               if (call_filldir(filp, dirent, filldir, fname))
+                       break;
+-
++      next_node:
+               info->curr_node = rb_next(info->curr_node);
+-              if (!info->curr_node) {
++              if (info->curr_node) {
++                      fname = rb_entry(info->curr_node, struct fname,
++                                       rb_hash);
++                      info->curr_hash = fname->hash;
++                      info->curr_minor_hash = fname->minor_hash;
++              } else {
+                       if (info->next_hash == ~0) {
+                               filp->f_pos = EXT3_HTREE_EOF;
+                               break;
diff --git a/queue-2.6.27/ext3-fix-ext3-block-reservation-early-enospc-issue.patch b/queue-2.6.27/ext3-fix-ext3-block-reservation-early-enospc-issue.patch

new file mode 100644 (file)

index 0000000..7e10152
--- /dev/null
+++ b/queue-2.6.27/ext3-fix-ext3-block-reservation-early-enospc-issue.patch
@@ -0,0 +1,64 @@
+From 46d01a225e694f1a4343beea44f1e85105aedd7e Mon Sep 17 00:00:00 2001
+From: Mingming Cao <cmm@us.ibm.com>
+Date: Sat, 18 Oct 2008 20:27:56 -0700
+Subject: ext3: fix ext3 block reservation early ENOSPC issue
+
+From: Mingming Cao <cmm@us.ibm.com>
+
+commit 46d01a225e694f1a4343beea44f1e85105aedd7e upstream.
+
+We could run into ENOSPC error on ext3, even when there is free blocks on
+the filesystem.
+
+The problem is triggered in the case the goal block group has 0 free
+blocks , and the rest block groups are skipped due to the check of
+"free_blocks < windowsz/2".  Current code could fall back to non
+reservation allocation to prevent early ENOSPC after examing all the block
+groups with reservation on , but this code was bypassed if the reservation
+window is turned off already, which is true in this case.
+
+This patch fixed two issues:
+1) We don't need to turn off block reservation if the goal block group has
+0 free blocks left and continue search for the rest of block groups.
+
+Current code the intention is to turn off the block reservation if the
+goal allocation group has a few (some) free blocks left (not enough for
+make the desired reservation window),to try to allocation in the goal
+block group, to get better locality.  But if the goal blocks have 0 free
+blocks, it should leave the block reservation on, and continues search for
+the next block groups,rather than turn off block reservation completely.
+
+2) we don't need to check the window size if the block reservation is off.
+
+The problem was originally found and fixed in ext4.
+
+Signed-off-by: Mingming Cao <cmm@us.ibm.com>
+Cc: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Willy Tarreau <w@1wt.eu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/ext3/balloc.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/fs/ext3/balloc.c
++++ b/fs/ext3/balloc.c
+@@ -1547,6 +1547,7 @@ retry_alloc:
+        * turn off reservation for this allocation
+        */
+       if (my_rsv && (free_blocks < windowsz)
++              && (free_blocks > 0)
+               && (rsv_is_empty(&my_rsv->rsv_window)))
+               my_rsv = NULL;
+ 
+@@ -1585,7 +1586,7 @@ retry_alloc:
+                * free blocks is less than half of the reservation
+                * window size.
+                */
+-              if (free_blocks <= (windowsz/2))
++              if (my_rsv && (free_blocks <= (windowsz/2)))
+                       continue;
+ 
+               brelse(bitmap_bh);
diff --git a/queue-2.6.27/ext3-fix-ext3_dx_readdir-hash-collision-handling.patch b/queue-2.6.27/ext3-fix-ext3_dx_readdir-hash-collision-handling.patch

new file mode 100644 (file)

index 0000000..f1fa250
--- /dev/null
+++ b/queue-2.6.27/ext3-fix-ext3_dx_readdir-hash-collision-handling.patch
@@ -0,0 +1,62 @@
+From 6a897cf447a83c9c3fd1b85a1e525c02d6eada7d Mon Sep 17 00:00:00 2001
+From: Eugene Dashevsky <eugene@ibrix.com>
+Date: Sat, 18 Oct 2008 20:27:59 -0700
+Subject: ext3: fix ext3_dx_readdir hash collision handling
+
+From: Eugene Dashevsky <eugene@ibrix.com>
+
+commit 6a897cf447a83c9c3fd1b85a1e525c02d6eada7d upstream.
+
+This fixes a bug where readdir() would return a directory entry twice
+if there was a hash collision in an hash tree indexed directory.
+
+[akpm@linux-foundation.org: coding-style fixes]
+Signed-off-by: Eugene Dashevsky <eugene@ibrix.com>
+Signed-off-by: Mike Snitzer <msnitzer@ibrix.com>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Willy Tarreau <w@1wt.eu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/ext3/dir.c |   20 +++++++++++++++-----
+ 1 file changed, 15 insertions(+), 5 deletions(-)
+
+--- a/fs/ext3/dir.c
++++ b/fs/ext3/dir.c
+@@ -414,7 +414,7 @@ static int call_filldir(struct file * fi
+                               get_dtype(sb, fname->file_type));
+               if (error) {
+                       filp->f_pos = curr_pos;
+-                      info->extra_fname = fname->next;
++                      info->extra_fname = fname;
+                       return error;
+               }
+               fname = fname->next;
+@@ -453,11 +453,21 @@ static int ext3_dx_readdir(struct file *
+        * If there are any leftover names on the hash collision
+        * chain, return them first.
+        */
+-      if (info->extra_fname &&
+-          call_filldir(filp, dirent, filldir, info->extra_fname))
+-              goto finished;
++      if (info->extra_fname) {
++              if (call_filldir(filp, dirent, filldir, info->extra_fname))
++                      goto finished;
+ 
+-      if (!info->curr_node)
++              info->extra_fname = NULL;
++              info->curr_node = rb_next(info->curr_node);
++              if (!info->curr_node) {
++                      if (info->next_hash == ~0) {
++                              filp->f_pos = EXT3_HTREE_EOF;
++                              goto finished;
++                      }
++                      info->curr_hash = info->next_hash;
++                      info->curr_minor_hash = 0;
++              }
++      } else if (!info->curr_node)
+               info->curr_node = rb_first(&info->root);
+ 
+       while (1) {
diff --git a/queue-2.6.27/ext4-add-checksum-calculation-when-clearing-uninit-flag-in-ext4_new_inode.patch b/queue-2.6.27/ext4-add-checksum-calculation-when-clearing-uninit-flag-in-ext4_new_inode.patch

new file mode 100644 (file)

index 0000000..ce01198
--- /dev/null
+++ b/queue-2.6.27/ext4-add-checksum-calculation-when-clearing-uninit-flag-in-ext4_new_inode.patch
@@ -0,0 +1,39 @@
+From tytso@mit.edu  Wed Dec  3 10:45:18 2008
+From: Frederic Bohe <frederic.bohe@bull.net>
+Date: Sun, 16 Nov 2008 11:05:40 -0500
+Subject: ext4: add checksum calculation when clearing UNINIT flag in ext4_new_inode
+To: stable@kernel.org
+Cc: Frederic Bohe <frederic.bohe@bull.net>, Ext4 Developers List <linux-ext4@vger.kernel.org>, "Theodore Ts'o" <tytso@mit.edu>
+Message-ID: <1226851540-8032-21-git-send-email-tytso@mit.edu>
+
+From: Frederic Bohe <frederic.bohe@bull.net>
+
+(cherry picked from commit 23712a9c28b9f80a8cf70c8490358d5f562d2465)
+
+When initializing an uninitialized block group in ext4_new_inode(),
+its block group checksum must be re-calculated.  This fixes a race
+when several threads try to allocate a new inode in an UNINIT'd group.
+
+There is some question whether we need to be initializing the block
+bitmap in ext4_new_inode() at all, but for now, if we are going to
+init the block group, let's eliminate the race.
+
+Signed-off-by: Frederic Bohe <frederic.bohe@bull.net>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/ext4/ialloc.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/fs/ext4/ialloc.c
++++ b/fs/ext4/ialloc.c
+@@ -717,6 +717,8 @@ got:
+                       gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
+                       free = ext4_free_blocks_after_init(sb, group, gdp);
+                       gdp->bg_free_blocks_count = cpu_to_le16(free);
++                      gdp->bg_checksum = ext4_group_desc_csum(sbi, group,
++                                                              gdp);
+               }
+               spin_unlock(sb_bgl_lock(sbi, group));
+ 
diff --git a/queue-2.6.27/ext4-add-missing-unlock-in-ext4_check_descriptors-on-error-path.patch b/queue-2.6.27/ext4-add-missing-unlock-in-ext4_check_descriptors-on-error-path.patch

new file mode 100644 (file)

index 0000000..f6bb964
--- /dev/null
+++ b/queue-2.6.27/ext4-add-missing-unlock-in-ext4_check_descriptors-on-error-path.patch
@@ -0,0 +1,39 @@
+From tytso@mit.edu  Wed Dec  3 09:57:50 2008
+From: Li Zefan <lizf@cn.fujitsu.com>
+Date: Sun, 16 Nov 2008 11:05:24 -0500
+Subject: ext4: add missing unlock in ext4_check_descriptors() on error path
+To: stable@kernel.org
+Cc: "Theodore Ts'o" <tytso@mit.edu>, Ext4 Developers List <linux-ext4@vger.kernel.org>, Li Zefan <lizf@cn.fujitsu.com>
+Message-ID: <1226851540-8032-5-git-send-email-tytso@mit.edu>
+
+
+From: Li Zefan <lizf@cn.fujitsu.com>
+
+(cherry picked from commit 7ee1ec4ca30c6df8e989615cdaacb75f2af4fa6b)
+
+If there group descriptors are corrupted we need unlock the block
+group lock before returning from the function; else we will oops when
+freeing a spinlock which is still being held.
+
+Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/ext4/super.c |    4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/fs/ext4/super.c
++++ b/fs/ext4/super.c
+@@ -1626,8 +1626,10 @@ static int ext4_check_descriptors(struct
+                              "Checksum for group %lu failed (%u!=%u)\n",
+                              i, le16_to_cpu(ext4_group_desc_csum(sbi, i,
+                              gdp)), le16_to_cpu(gdp->bg_checksum));
+-                      if (!(sb->s_flags & MS_RDONLY))
++                      if (!(sb->s_flags & MS_RDONLY)) {
++                              spin_unlock(sb_bgl_lock(sbi, i));
+                               return 0;
++                      }
+               }
+               spin_unlock(sb_bgl_lock(sbi, i));
+               if (!flexbg_flag)
diff --git a/queue-2.6.27/ext4-calculate-journal-credits-correctly.patch b/queue-2.6.27/ext4-calculate-journal-credits-correctly.patch

new file mode 100644 (file)

index 0000000..2b4be26
--- /dev/null
+++ b/queue-2.6.27/ext4-calculate-journal-credits-correctly.patch
@@ -0,0 +1,46 @@
+From tytso@mit.edu  Wed Dec  3 10:44:36 2008
+From: "Theodore Ts'o" <tytso@mit.edu>
+Date: Sun, 16 Nov 2008 11:05:38 -0500
+Subject: ext4: calculate journal credits correctly
+To: stable@kernel.org
+Cc: Ext4 Developers List <linux-ext4@vger.kernel.org>, "Theodore Ts'o" <tytso@mit.edu>
+Message-ID: <1226851540-8032-19-git-send-email-tytso@mit.edu>
+
+From: "Theodore Ts'o" <tytso@mit.edu>
+
+(cherry picked from commit ac51d83705c2a38c71f39cde99708b14e6212a60)
+
+This fixes a 2.6.27 regression which was introduced in commit a02908f1.
+
+We weren't passing the chunk parameter down to the two subections,
+ext4_indirect_trans_blocks() and ext4_ext_index_trans_blocks(), with
+the result that massively overestimate the amount of credits needed by
+ext4_da_writepages, especially in the non-extents case.  This causes
+failures especially on /boot partitions, which tend to be small and
+non-extent using since GRUB doesn't handle extents.
+
+This patch fixes the bug reported by Joseph Fannin at:
+http://bugzilla.kernel.org/show_bug.cgi?id=11964
+
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/ext4/inode.c |    5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -4444,9 +4444,10 @@ static int ext4_indirect_trans_blocks(st
+ static int ext4_index_trans_blocks(struct inode *inode, int nrblocks, int chunk)
+ {
+       if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL))
+-              return ext4_indirect_trans_blocks(inode, nrblocks, 0);
+-      return ext4_ext_index_trans_blocks(inode, nrblocks, 0);
++              return ext4_indirect_trans_blocks(inode, nrblocks, chunk);
++      return ext4_ext_index_trans_blocks(inode, nrblocks, chunk);
+ }
++
+ /*
+  * Account for index blocks, block groups bitmaps and block group
+  * descriptor blocks if modify datablocks and index blocks
diff --git a/queue-2.6.27/ext4-convert-to-host-order-before-using-the-values.patch b/queue-2.6.27/ext4-convert-to-host-order-before-using-the-values.patch

new file mode 100644 (file)

index 0000000..1e2a8fb
--- /dev/null
+++ b/queue-2.6.27/ext4-convert-to-host-order-before-using-the-values.patch
@@ -0,0 +1,37 @@
+From tytso@mit.edu  Wed Dec  3 10:43:46 2008
+From: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+Date: Sun, 16 Nov 2008 11:05:36 -0500
+Subject: ext4: Convert to host order before using the values.
+To: stable@kernel.org
+Cc: Ext4 Developers List <linux-ext4@vger.kernel.org>, "Theodore Ts'o" <tytso@mit.edu>, "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
+Message-ID: <1226851540-8032-17-git-send-email-tytso@mit.edu>
+
+From: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+
+(cherry picked from commit d94e99a64c3beece22dbfb2b335771a59184eb0a)
+
+Use le16_to_cpu to read the s_reserved_gdt_blocks values
+from super block.
+
+Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/ext4/super.c |    5 ++---
+ 1 file changed, 2 insertions(+), 3 deletions(-)
+
+--- a/fs/ext4/super.c
++++ b/fs/ext4/super.c
+@@ -1506,9 +1506,8 @@ static int ext4_fill_flex_info(struct su
+ 
+       /* We allocate both existing and potentially added groups */
+       flex_group_count = ((sbi->s_groups_count + groups_per_flex - 1) +
+-                          ((sbi->s_es->s_reserved_gdt_blocks +1 ) <<
+-                            EXT4_DESC_PER_BLOCK_BITS(sb))) /
+-                         groups_per_flex;
++                      ((le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) + 1) <<
++                            EXT4_DESC_PER_BLOCK_BITS(sb))) / groups_per_flex;
+       sbi->s_flex_groups = kzalloc(flex_group_count *
+                                    sizeof(struct flex_groups), GFP_KERNEL);
+       if (sbi->s_flex_groups == NULL) {
diff --git a/queue-2.6.27/ext4-do-mballoc-init-before-doing-filesystem-recovery.patch b/queue-2.6.27/ext4-do-mballoc-init-before-doing-filesystem-recovery.patch

new file mode 100644 (file)

index 0000000..c0b3aee
--- /dev/null
+++ b/queue-2.6.27/ext4-do-mballoc-init-before-doing-filesystem-recovery.patch
@@ -0,0 +1,65 @@
+From tytso@mit.edu  Wed Dec  3 10:42:48 2008
+From: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+Date: Sun, 16 Nov 2008 11:05:33 -0500
+Subject: ext4: Do mballoc init before doing filesystem recovery
+To: stable@kernel.org
+Cc: Ext4 Developers List <linux-ext4@vger.kernel.org>, "Theodore Ts'o" <tytso@mit.edu>, "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
+Message-ID: <1226851540-8032-14-git-send-email-tytso@mit.edu>
+
+From: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+
+(cherry picked from commit c2774d84fd6cab2bfa2a2fae0b1ca8d8ebde48a2)
+
+During filesystem recovery we may be doing a truncate
+which expects some of the mballoc data structures to
+be initialized. So do ext4_mb_init before recovery.
+
+Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/ext4/super.c |   25 +++++++++++++++----------
+ 1 file changed, 15 insertions(+), 10 deletions(-)
+
+--- a/fs/ext4/super.c
++++ b/fs/ext4/super.c
+@@ -2449,6 +2449,21 @@ static int ext4_fill_super(struct super_
+                       "available.\n");
+       }
+ 
++      if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
++              printk(KERN_WARNING "EXT4-fs: Ignoring delalloc option - "
++                              "requested data journaling mode\n");
++              clear_opt(sbi->s_mount_opt, DELALLOC);
++      } else if (test_opt(sb, DELALLOC))
++              printk(KERN_INFO "EXT4-fs: delayed allocation enabled\n");
++
++      ext4_ext_init(sb);
++      err = ext4_mb_init(sb, needs_recovery);
++      if (err) {
++              printk(KERN_ERR "EXT4-fs: failed to initalize mballoc (%d)\n",
++                     err);
++              goto failed_mount4;
++      }
++
+       /*
+        * akpm: core read_super() calls in here with the superblock locked.
+        * That deadlocks, because orphan cleanup needs to lock the superblock
+@@ -2468,16 +2483,6 @@ static int ext4_fill_super(struct super_
+              test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA ? "ordered":
+              "writeback");
+ 
+-      if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
+-              printk(KERN_WARNING "EXT4-fs: Ignoring delalloc option - "
+-                              "requested data journaling mode\n");
+-              clear_opt(sbi->s_mount_opt, DELALLOC);
+-      } else if (test_opt(sb, DELALLOC))
+-              printk(KERN_INFO "EXT4-fs: delayed allocation enabled\n");
+-
+-      ext4_ext_init(sb);
+-      ext4_mb_init(sb, needs_recovery);
+-
+       lock_kernel();
+       return 0;
+ 
diff --git a/queue-2.6.27/ext4-elevate-write-count-for-migrate-ioctl.patch b/queue-2.6.27/ext4-elevate-write-count-for-migrate-ioctl.patch

new file mode 100644 (file)

index 0000000..9a0ab37
--- /dev/null
+++ b/queue-2.6.27/ext4-elevate-write-count-for-migrate-ioctl.patch
@@ -0,0 +1,100 @@
+From tytso@mit.edu  Wed Dec  3 09:58:51 2008
+From: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+Date: Sun, 16 Nov 2008 11:05:25 -0500
+Subject: ext4: elevate write count for migrate ioctl
+To: stable@kernel.org
+Cc: Ext4 Developers List <linux-ext4@vger.kernel.org>, "Theodore Ts'o" <tytso@mit.edu>, "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
+Message-ID: <1226851540-8032-6-git-send-email-tytso@mit.edu>
+
+From: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+
+(cherry picked from commit 2a43a878001cc5cb7c3c7be2e8dad0a1aeb939b0)
+
+The migrate ioctl writes to the filsystem, so we need to elevate the
+write count.
+
+Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/ext4/ext4.h    |    3 +--
+ fs/ext4/ioctl.c   |   21 ++++++++++++++++++++-
+ fs/ext4/migrate.c |   10 +---------
+ 3 files changed, 22 insertions(+), 12 deletions(-)
+
+--- a/fs/ext4/ext4.h
++++ b/fs/ext4/ext4.h
+@@ -1083,8 +1083,7 @@ extern long ext4_ioctl(struct file *, un
+ extern long ext4_compat_ioctl (struct file *, unsigned int, unsigned long);
+ 
+ /* migrate.c */
+-extern int ext4_ext_migrate(struct inode *, struct file *, unsigned int,
+-                     unsigned long);
++extern int ext4_ext_migrate(struct inode *);
+ /* namei.c */
+ extern int ext4_orphan_add(handle_t *, struct inode *);
+ extern int ext4_orphan_del(handle_t *, struct inode *);
+--- a/fs/ext4/ioctl.c
++++ b/fs/ext4/ioctl.c
+@@ -267,7 +267,26 @@ setversion_out:
+       }
+ 
+       case EXT4_IOC_MIGRATE:
+-              return ext4_ext_migrate(inode, filp, cmd, arg);
++      {
++              int err;
++              if (!is_owner_or_cap(inode))
++                      return -EACCES;
++
++              err = mnt_want_write(filp->f_path.mnt);
++              if (err)
++                      return err;
++              /*
++               * inode_mutex prevent write and truncate on the file.
++               * Read still goes through. We take i_data_sem in
++               * ext4_ext_swap_inode_data before we switch the
++               * inode format to prevent read.
++               */
++              mutex_lock(&(inode->i_mutex));
++              err = ext4_ext_migrate(inode);
++              mutex_unlock(&(inode->i_mutex));
++              mnt_drop_write(filp->f_path.mnt);
++              return err;
++      }
+ 
+       default:
+               return -ENOTTY;
+--- a/fs/ext4/migrate.c
++++ b/fs/ext4/migrate.c
+@@ -447,8 +447,7 @@ static int free_ext_block(handle_t *hand
+ 
+ }
+ 
+-int ext4_ext_migrate(struct inode *inode, struct file *filp,
+-                              unsigned int cmd, unsigned long arg)
++int ext4_ext_migrate(struct inode *inode)
+ {
+       handle_t *handle;
+       int retval = 0, i;
+@@ -516,12 +515,6 @@ int ext4_ext_migrate(struct inode *inode
+        * when we add extents we extent the journal
+        */
+       /*
+-       * inode_mutex prevent write and truncate on the file. Read still goes
+-       * through. We take i_data_sem in ext4_ext_swap_inode_data before we
+-       * switch the inode format to prevent read.
+-       */
+-      mutex_lock(&(inode->i_mutex));
+-      /*
+        * Even though we take i_mutex we can still cause block allocation
+        * via mmap write to holes. If we have allocated new blocks we fail
+        * migrate.  New block allocation will clear EXT4_EXT_MIGRATE flag.
+@@ -623,7 +616,6 @@ err_out:
+       tmp_inode->i_nlink = 0;
+ 
+       ext4_journal_stop(handle);
+-      mutex_unlock(&(inode->i_mutex));
+ 
+       if (tmp_inode)
+               iput(tmp_inode);
diff --git a/queue-2.6.27/ext4-fix-11321-create-proc-ext4-stats-more-carefully.patch b/queue-2.6.27/ext4-fix-11321-create-proc-ext4-stats-more-carefully.patch

new file mode 100644 (file)

index 0000000..b5122ee
--- /dev/null
+++ b/queue-2.6.27/ext4-fix-11321-create-proc-ext4-stats-more-carefully.patch
@@ -0,0 +1,92 @@
+From tytso@mit.edu  Wed Dec  3 09:56:22 2008
+From: Alexey Dobriyan <adobriyan@gmail.com>
+Date: Sun, 16 Nov 2008 11:05:22 -0500
+Subject: ext4: fix #11321: create /proc/ext4/*/stats more carefully
+To: stable@kernel.org
+Cc: Ext4 Developers List <linux-ext4@vger.kernel.org>, "Theodore Ts'o" <tytso@mit.edu>, Alexey Dobriyan <adobriyan@gmail.com>
+Message-ID: <1226851540-8032-3-git-send-email-tytso@mit.edu>
+
+
+From: Alexey Dobriyan <adobriyan@gmail.com>
+
+(cherry picked from commit 899fc1a4cf404747de2666534d508804597ee22f)
+
+ext4 creates per-suberblock directory in /proc/ext4/ . Name used as
+basis is taken from bdevname, which, surprise, can contain slash.
+
+However, proc while allowing to use proc_create("a/b", parent) form of
+PDE creation, assumes that parent/a was already created.
+
+bdevname in question is 'cciss/c0d0p9', directory is not created and all
+this stuff goes directly into /proc (which is real bug).
+
+Warning comes when _second_ partition is mounted.
+
+http://bugzilla.kernel.org/show_bug.cgi?id=11321
+
+Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/ext4/mballoc.c |   16 +++++++++++++---
+ 1 file changed, 13 insertions(+), 3 deletions(-)
+
+--- a/fs/ext4/mballoc.c
++++ b/fs/ext4/mballoc.c
+@@ -2785,14 +2785,20 @@ static int ext4_mb_init_per_dev_proc(str
+       mode_t mode = S_IFREG | S_IRUGO | S_IWUSR;
+       struct ext4_sb_info *sbi = EXT4_SB(sb);
+       struct proc_dir_entry *proc;
+-      char devname[64];
++      char devname[BDEVNAME_SIZE], *p;
+ 
+       if (proc_root_ext4 == NULL) {
+               sbi->s_mb_proc = NULL;
+               return -EINVAL;
+       }
+       bdevname(sb->s_bdev, devname);
++      p = devname;
++      while ((p = strchr(p, '/')))
++              *p = '!';
++
+       sbi->s_mb_proc = proc_mkdir(devname, proc_root_ext4);
++      if (!sbi->s_mb_proc)
++              goto err_create_dir;
+ 
+       MB_PROC_HANDLER(EXT4_MB_STATS_NAME, stats);
+       MB_PROC_HANDLER(EXT4_MB_MAX_TO_SCAN_NAME, max_to_scan);
+@@ -2804,7 +2810,6 @@ static int ext4_mb_init_per_dev_proc(str
+       return 0;
+ 
+ err_out:
+-      printk(KERN_ERR "EXT4-fs: Unable to create %s\n", devname);
+       remove_proc_entry(EXT4_MB_GROUP_PREALLOC, sbi->s_mb_proc);
+       remove_proc_entry(EXT4_MB_STREAM_REQ, sbi->s_mb_proc);
+       remove_proc_entry(EXT4_MB_ORDER2_REQ, sbi->s_mb_proc);
+@@ -2813,6 +2818,8 @@ err_out:
+       remove_proc_entry(EXT4_MB_STATS_NAME, sbi->s_mb_proc);
+       remove_proc_entry(devname, proc_root_ext4);
+       sbi->s_mb_proc = NULL;
++err_create_dir:
++      printk(KERN_ERR "EXT4-fs: Unable to create %s\n", devname);
+ 
+       return -ENOMEM;
+ }
+@@ -2820,12 +2827,15 @@ err_out:
+ static int ext4_mb_destroy_per_dev_proc(struct super_block *sb)
+ {
+       struct ext4_sb_info *sbi = EXT4_SB(sb);
+-      char devname[64];
++      char devname[BDEVNAME_SIZE], *p;
+ 
+       if (sbi->s_mb_proc == NULL)
+               return -EINVAL;
+ 
+       bdevname(sb->s_bdev, devname);
++      p = devname;
++      while ((p = strchr(p, '/')))
++              *p = '!';
+       remove_proc_entry(EXT4_MB_GROUP_PREALLOC, sbi->s_mb_proc);
+       remove_proc_entry(EXT4_MB_STREAM_REQ, sbi->s_mb_proc);
+       remove_proc_entry(EXT4_MB_ORDER2_REQ, sbi->s_mb_proc);
diff --git a/queue-2.6.27/ext4-fix-duplicate-entries-returned-from-getdents-system-call.patch b/queue-2.6.27/ext4-fix-duplicate-entries-returned-from-getdents-system-call.patch

new file mode 100644 (file)

index 0000000..07cd22c
--- /dev/null
+++ b/queue-2.6.27/ext4-fix-duplicate-entries-returned-from-getdents-system-call.patch
@@ -0,0 +1,72 @@
+From tytso@mit.edu  Wed Dec  3 10:43:07 2008
+From: "Theodore Ts'o" <tytso@mit.edu>
+Date: Sun, 16 Nov 2008 11:05:34 -0500
+Subject: ext4: Fix duplicate entries returned from getdents() system call
+To: stable@kernel.org
+Cc: Ext4 Developers List <linux-ext4@vger.kernel.org>, "Theodore Ts'o" <tytso@mit.edu>
+Message-ID: <1226851540-8032-15-git-send-email-tytso@mit.edu>
+
+From: "Theodore Ts'o" <tytso@mit.edu>
+
+(cherry picked from commit 3c37fc86d20fe35be656f070997d62f75c2e4874)
+
+Fix a regression caused by commit d0156417, "ext4: fix ext4_dx_readdir
+hash collision handling", where deleting files in a large directory
+(requiring more than one getdents system call), results in some
+filenames being returned twice.  This was caused by a failure to
+update info->curr_hash and info->curr_minor_hash, so that if the
+directory had gotten modified since the last getdents() system call
+(as would be the case if the user is running "rm -r" or "git clean"),
+a directory entry would get returned twice to the userspace.
+
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+
+This patch fixes the bug reported by Markus Trippelsdorf at:
+http://bugzilla.kernel.org/show_bug.cgi?id=11844
+
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Tested-by: Markus Trippelsdorf <markus@trippelsdorf.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/ext4/dir.c |   20 ++++++++------------
+ 1 file changed, 8 insertions(+), 12 deletions(-)
+
+--- a/fs/ext4/dir.c
++++ b/fs/ext4/dir.c
+@@ -458,17 +458,8 @@ static int ext4_dx_readdir(struct file *
+       if (info->extra_fname) {
+               if (call_filldir(filp, dirent, filldir, info->extra_fname))
+                       goto finished;
+-
+               info->extra_fname = NULL;
+-              info->curr_node = rb_next(info->curr_node);
+-              if (!info->curr_node) {
+-                      if (info->next_hash == ~0) {
+-                              filp->f_pos = EXT4_HTREE_EOF;
+-                              goto finished;
+-                      }
+-                      info->curr_hash = info->next_hash;
+-                      info->curr_minor_hash = 0;
+-              }
++              goto next_node;
+       } else if (!info->curr_node)
+               info->curr_node = rb_first(&info->root);
+ 
+@@ -500,9 +491,14 @@ static int ext4_dx_readdir(struct file *
+               info->curr_minor_hash = fname->minor_hash;
+               if (call_filldir(filp, dirent, filldir, fname))
+                       break;
+-
++      next_node:
+               info->curr_node = rb_next(info->curr_node);
+-              if (!info->curr_node) {
++              if (info->curr_node) {
++                      fname = rb_entry(info->curr_node, struct fname,
++                                       rb_hash);
++                      info->curr_hash = fname->hash;
++                      info->curr_minor_hash = fname->minor_hash;
++              } else {
+                       if (info->next_hash == ~0) {
+                               filp->f_pos = EXT4_HTREE_EOF;
+                               break;
diff --git a/queue-2.6.27/ext4-fix-initialization-of-uninit-bitmap-blocks.patch b/queue-2.6.27/ext4-fix-initialization-of-uninit-bitmap-blocks.patch

new file mode 100644 (file)

index 0000000..bd52f8a
--- /dev/null
+++ b/queue-2.6.27/ext4-fix-initialization-of-uninit-bitmap-blocks.patch
@@ -0,0 +1,86 @@
+From tytso@mit.edu  Wed Dec  3 10:03:38 2008
+From: Frederic Bohe <frederic.bohe@bull.net>
+Date: Sun, 16 Nov 2008 11:05:28 -0500
+Subject: ext4: fix initialization of UNINIT bitmap blocks
+To: stable@kernel.org
+Cc: Frederic Bohe <frederic.bohe@bull.net>, Ext4 Developers List <linux-ext4@vger.kernel.org>, "Theodore Ts'o" <tytso@mit.edu>
+Message-ID: <1226851540-8032-9-git-send-email-tytso@mit.edu>
+
+From: Frederic Bohe <frederic.bohe@bull.net>
+
+(cherry picked from commit c806e68f5647109350ec546fee5b526962970fd2)
+
+This fixes a bug which caused on-line resizing of filesystems with a
+1k blocksize to fail.  The root cause of this bug was the fact that if
+an uninitalized bitmap block gets read in by userspace (which
+e2fsprogs does try to avoid, but can happen when the blocksize is less
+than the pagesize and an adjacent blocks is read into memory)
+ext4_read_block_bitmap() was erroneously depending on the buffer
+uptodate flag to decide whether it needed to initialize the bitmap
+block in memory --- i.e., to set the standard set of blocks in use by
+a block group (superblock, bitmaps, inode table, etc.).  Essentially,
+ext4_read_block_bitmap() assumed it was the only routine that might
+try to read a block containing a block bitmap, which is simply not
+true.
+
+To fix this, ext4_read_block_bitmap() and ext4_read_inode_bitmap()
+must always initialize uninitialized bitmap blocks.  Once a block or
+inode is allocated out of that bitmap, it will be marked as
+initialized in the block group descriptor, so in general this won't
+result any extra unnecessary work.
+
+Signed-off-by: Frederic Bohe <frederic.bohe@bull.net>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/ext4/balloc.c  |    4 +++-
+ fs/ext4/ialloc.c  |    4 +++-
+ fs/ext4/mballoc.c |    4 +++-
+ 3 files changed, 9 insertions(+), 3 deletions(-)
+
+--- a/fs/ext4/balloc.c
++++ b/fs/ext4/balloc.c
+@@ -318,9 +318,11 @@ ext4_read_block_bitmap(struct super_bloc
+                           block_group, bitmap_blk);
+               return NULL;
+       }
+-      if (bh_uptodate_or_lock(bh))
++      if (buffer_uptodate(bh) &&
++          !(desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)))
+               return bh;
+ 
++      lock_buffer(bh);
+       spin_lock(sb_bgl_lock(EXT4_SB(sb), block_group));
+       if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
+               ext4_init_block_bitmap(sb, bh, block_group, desc);
+--- a/fs/ext4/ialloc.c
++++ b/fs/ext4/ialloc.c
+@@ -115,9 +115,11 @@ ext4_read_inode_bitmap(struct super_bloc
+                           block_group, bitmap_blk);
+               return NULL;
+       }
+-      if (bh_uptodate_or_lock(bh))
++      if (buffer_uptodate(bh) &&
++          !(desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)))
+               return bh;
+ 
++      lock_buffer(bh);
+       spin_lock(sb_bgl_lock(EXT4_SB(sb), block_group));
+       if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) {
+               ext4_init_inode_bitmap(sb, bh, block_group, desc);
+--- a/fs/ext4/mballoc.c
++++ b/fs/ext4/mballoc.c
+@@ -784,9 +784,11 @@ static int ext4_mb_init_cache(struct pag
+               if (bh[i] == NULL)
+                       goto out;
+ 
+-              if (bh_uptodate_or_lock(bh[i]))
++              if (buffer_uptodate(bh[i]) &&
++                  !(desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)))
+                       continue;
+ 
++              lock_buffer(bh[i]);
+               spin_lock(sb_bgl_lock(EXT4_SB(sb), first_group + i));
+               if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
+                       ext4_init_block_bitmap(sb, bh[i],
diff --git a/queue-2.6.27/ext4-fix-xattr-deadlock.patch b/queue-2.6.27/ext4-fix-xattr-deadlock.patch

new file mode 100644 (file)

index 0000000..7da443e
--- /dev/null
+++ b/queue-2.6.27/ext4-fix-xattr-deadlock.patch
@@ -0,0 +1,58 @@
+From tytso@mit.edu  Wed Dec  3 10:42:04 2008
+From: Kalpak Shah <kalpak.shah@sun.com>
+Date: Sun, 16 Nov 2008 11:05:31 -0500
+Subject: ext4: fix xattr deadlock
+To: stable@kernel.org
+Cc: Kalpak Shah <kalpak.shah@sun.com>, Ext4 Developers List <linux-ext4@vger.kernel.org>, "Theodore Ts'o" <tytso@mit.edu>
+Message-ID: <1226851540-8032-12-git-send-email-tytso@mit.edu>
+
+
+From: Kalpak Shah <kalpak.shah@sun.com>
+
+(cherry picked from commit 4d20c685fa365766a8f13584b4c8178a15ab7103)
+
+ext4_xattr_set_handle() eventually ends up calling
+ext4_mark_inode_dirty() which tries to expand the inode by shifting
+the EAs.  This leads to the xattr_sem being downed again and leading
+to a deadlock.
+
+This patch makes sure that if ext4_xattr_set_handle() is in the
+call-chain, ext4_mark_inode_dirty() will not expand the inode.
+
+Signed-off-by: Kalpak Shah <kalpak.shah@sun.com>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/ext4/xattr.c |    6 ++++++
+ 1 file changed, 6 insertions(+)
+
+--- a/fs/ext4/xattr.c
++++ b/fs/ext4/xattr.c
+@@ -959,6 +959,7 @@ ext4_xattr_set_handle(handle_t *handle, 
+       struct ext4_xattr_block_find bs = {
+               .s = { .not_found = -ENODATA, },
+       };
++      unsigned long no_expand;
+       int error;
+ 
+       if (!name)
+@@ -966,6 +967,9 @@ ext4_xattr_set_handle(handle_t *handle, 
+       if (strlen(name) > 255)
+               return -ERANGE;
+       down_write(&EXT4_I(inode)->xattr_sem);
++      no_expand = EXT4_I(inode)->i_state & EXT4_STATE_NO_EXPAND;
++      EXT4_I(inode)->i_state |= EXT4_STATE_NO_EXPAND;
++
+       error = ext4_get_inode_loc(inode, &is.iloc);
+       if (error)
+               goto cleanup;
+@@ -1042,6 +1046,8 @@ ext4_xattr_set_handle(handle_t *handle, 
+ cleanup:
+       brelse(is.iloc.bh);
+       brelse(bs.bh);
++      if (no_expand == 0)
++              EXT4_I(inode)->i_state &= ~EXT4_STATE_NO_EXPAND;
+       up_write(&EXT4_I(inode)->xattr_sem);
+       return error;
+ }
diff --git a/queue-2.6.27/ext4-free-ext4_prealloc_space-using-kmem_cache_free.patch b/queue-2.6.27/ext4-free-ext4_prealloc_space-using-kmem_cache_free.patch

new file mode 100644 (file)

index 0000000..06d81d5
--- /dev/null
+++ b/queue-2.6.27/ext4-free-ext4_prealloc_space-using-kmem_cache_free.patch
@@ -0,0 +1,34 @@
+From tytso@mit.edu  Wed Dec  3 10:42:23 2008
+From: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+Date: Sun, 16 Nov 2008 11:05:32 -0500
+Subject: ext4: Free ext4_prealloc_space using kmem_cache_free
+To: stable@kernel.org
+Cc: Ext4 Developers List <linux-ext4@vger.kernel.org>, "Theodore Ts'o" <tytso@mit.edu>, "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
+Message-ID: <1226851540-8032-13-git-send-email-tytso@mit.edu>
+
+From: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+
+(cherry picked from commit 688f05a01983711a4e715b1d6e15a89a89c96a66)
+
+We should use kmem_cache_free to free memory allocated
+via kmem_cache_alloc
+
+Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/ext4/mballoc.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/ext4/mballoc.c
++++ b/fs/ext4/mballoc.c
+@@ -2577,7 +2577,7 @@ static void ext4_mb_cleanup_pa(struct ex
+               pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list);
+               list_del(&pa->pa_group_list);
+               count++;
+-              kfree(pa);
++              kmem_cache_free(ext4_pspace_cachep, pa);
+       }
+       if (count)
+               mb_debug("mballoc: %u PAs left\n", count);
diff --git a/queue-2.6.27/ext4-jbd2-avoid-warn-messages-when-failing-to-write-to-the-superblock.patch b/queue-2.6.27/ext4-jbd2-avoid-warn-messages-when-failing-to-write-to-the-superblock.patch

new file mode 100644 (file)

index 0000000..16e6615
--- /dev/null
+++ b/queue-2.6.27/ext4-jbd2-avoid-warn-messages-when-failing-to-write-to-the-superblock.patch
@@ -0,0 +1,104 @@
+From tytso@mit.edu  Wed Dec  3 10:02:23 2008
+From: "Theodore Ts'o" <tytso@mit.edu>
+Date: Sun, 16 Nov 2008 11:05:27 -0500
+Subject: ext4/jbd2: Avoid WARN() messages when failing to write to the superblock
+To: stable@kernel.org
+Cc: Ext4 Developers List <linux-ext4@vger.kernel.org>, "Theodore Ts'o" <tytso@mit.edu>
+Message-ID: <1226851540-8032-8-git-send-email-tytso@mit.edu>
+
+From: "Theodore Ts'o" <tytso@mit.edu>
+
+(cherry picked from commit 914258bf2cb22bf4336a1b1d90c551b4b11ca5aa)
+
+This fixes some very common warnings reported by kerneloops.org
+
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/ext4/super.c   |   23 ++++++++++++++++++++++-
+ fs/jbd2/journal.c |   27 +++++++++++++++++++++++++--
+ 2 files changed, 47 insertions(+), 3 deletions(-)
+
+--- a/fs/ext4/super.c
++++ b/fs/ext4/super.c
+@@ -2804,13 +2804,34 @@ static void ext4_commit_super(struct sup
+ 
+       if (!sbh)
+               return;
++      if (buffer_write_io_error(sbh)) {
++              /*
++               * Oh, dear.  A previous attempt to write the
++               * superblock failed.  This could happen because the
++               * USB device was yanked out.  Or it could happen to
++               * be a transient write error and maybe the block will
++               * be remapped.  Nothing we can do but to retry the
++               * write and hope for the best.
++               */
++              printk(KERN_ERR "ext4: previous I/O error to "
++                     "superblock detected for %s.\n", sb->s_id);
++              clear_buffer_write_io_error(sbh);
++              set_buffer_uptodate(sbh);
++      }
+       es->s_wtime = cpu_to_le32(get_seconds());
+       ext4_free_blocks_count_set(es, ext4_count_free_blocks(sb));
+       es->s_free_inodes_count = cpu_to_le32(ext4_count_free_inodes(sb));
+       BUFFER_TRACE(sbh, "marking dirty");
+       mark_buffer_dirty(sbh);
+-      if (sync)
++      if (sync) {
+               sync_dirty_buffer(sbh);
++              if (buffer_write_io_error(sbh)) {
++                      printk(KERN_ERR "ext4: I/O error while writing "
++                             "superblock for %s.\n", sb->s_id);
++                      clear_buffer_write_io_error(sbh);
++                      set_buffer_uptodate(sbh);
++              }
++      }
+ }
+ 
+ 
+--- a/fs/jbd2/journal.c
++++ b/fs/jbd2/journal.c
+@@ -1259,6 +1259,22 @@ void jbd2_journal_update_superblock(jour
+               goto out;
+       }
+ 
++      if (buffer_write_io_error(bh)) {
++              /*
++               * Oh, dear.  A previous attempt to write the journal
++               * superblock failed.  This could happen because the
++               * USB device was yanked out.  Or it could happen to
++               * be a transient write error and maybe the block will
++               * be remapped.  Nothing we can do but to retry the
++               * write and hope for the best.
++               */
++              printk(KERN_ERR "JBD2: previous I/O error detected "
++                     "for journal superblock update for %s.\n",
++                     journal->j_devname);
++              clear_buffer_write_io_error(bh);
++              set_buffer_uptodate(bh);
++      }
++
+       spin_lock(&journal->j_state_lock);
+       jbd_debug(1,"JBD: updating superblock (start %ld, seq %d, errno %d)\n",
+                 journal->j_tail, journal->j_tail_sequence, journal->j_errno);
+@@ -1270,9 +1286,16 @@ void jbd2_journal_update_superblock(jour
+ 
+       BUFFER_TRACE(bh, "marking dirty");
+       mark_buffer_dirty(bh);
+-      if (wait)
++      if (wait) {
+               sync_dirty_buffer(bh);
+-      else
++              if (buffer_write_io_error(bh)) {
++                      printk(KERN_ERR "JBD2: I/O error detected "
++                             "when updating journal superblock for %s.\n",
++                             journal->j_devname);
++                      clear_buffer_write_io_error(bh);
++                      set_buffer_uptodate(bh);
++              }
++      } else
+               ll_rw_block(SWRITE, 1, &bh);
+ 
+ out:
diff --git a/queue-2.6.27/ext4-mark-the-buffer_heads-as-dirty-and-uptodate-after-prepare_write.patch b/queue-2.6.27/ext4-mark-the-buffer_heads-as-dirty-and-uptodate-after-prepare_write.patch

new file mode 100644 (file)

index 0000000..792ec6c
--- /dev/null
+++ b/queue-2.6.27/ext4-mark-the-buffer_heads-as-dirty-and-uptodate-after-prepare_write.patch
@@ -0,0 +1,36 @@
+From tytso@mit.edu  Wed Dec  3 10:44:57 2008
+From: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+Date: Sun, 16 Nov 2008 11:05:39 -0500
+Subject: ext4: Mark the buffer_heads as dirty and uptodate after prepare_write
+To: stable@kernel.org
+Cc: Ext4 Developers List <linux-ext4@vger.kernel.org>, "Theodore Ts'o" <tytso@mit.edu>, "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
+Message-ID: <1226851540-8032-20-git-send-email-tytso@mit.edu>
+
+From: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+
+(cherry picked from commit ed9b3e3379731e9f9d2f73f3d7fd9e7d2ce3df4a)
+
+We need to make sure we mark the buffer_heads as dirty and uptodate
+so that block_write_full_page write them correctly.
+
+This fixes mmap corruptions that can occur in low memory situations.
+
+Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/ext4/inode.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -2242,6 +2242,8 @@ static int ext4_da_writepage(struct page
+                       unlock_page(page);
+                       return 0;
+               }
++              /* now mark the buffer_heads as dirty and uptodate */
++              block_commit_write(page, 0, PAGE_CACHE_SIZE);
+       }
+ 
+       if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode))
diff --git a/queue-2.6.27/ext4-renumber-ext4_ioc_migrate.patch b/queue-2.6.27/ext4-renumber-ext4_ioc_migrate.patch

new file mode 100644 (file)

index 0000000..4bbe5e6
--- /dev/null
+++ b/queue-2.6.27/ext4-renumber-ext4_ioc_migrate.patch
@@ -0,0 +1,50 @@
+From tytso@mit.edu  Wed Dec  3 09:59:19 2008
+From: "Theodore Ts'o" <tytso@mit.edu>
+Date: Sun, 16 Nov 2008 11:05:26 -0500
+Subject: ext4: Renumber EXT4_IOC_MIGRATE
+To: stable@kernel.org
+Cc: Ext4 Developers List <linux-ext4@vger.kernel.org>, "Theodore Ts'o" <tytso@mit.edu>
+Message-ID: <1226851540-8032-7-git-send-email-tytso@mit.edu>
+
+From: "Theodore Ts'o" <tytso@mit.edu>
+
+(cherry picked from commit 8eea80d52b9d87cfd771055534bd2c24f73704d7)
+
+Pick an ioctl number for EXT4_IOC_MIGRATE that won't conflict with
+other ext4 ioctl's.  Since there haven't been any major userspace
+users of this ioctl, we can afford to change this now, to avoid
+potential problems later.
+
+Also, reorder the ioctl numbers in ext4.h to avoid this sort of
+mistake in the future.
+
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/ext4/ext4.h |    7 ++++---
+ 1 file changed, 4 insertions(+), 3 deletions(-)
+
+--- a/fs/ext4/ext4.h
++++ b/fs/ext4/ext4.h
+@@ -291,8 +291,6 @@ struct ext4_new_group_data {
+ #define       EXT4_IOC_SETFLAGS               FS_IOC_SETFLAGS
+ #define       EXT4_IOC_GETVERSION             _IOR('f', 3, long)
+ #define       EXT4_IOC_SETVERSION             _IOW('f', 4, long)
+-#define EXT4_IOC_GROUP_EXTEND         _IOW('f', 7, unsigned long)
+-#define EXT4_IOC_GROUP_ADD            _IOW('f', 8,struct ext4_new_group_input)
+ #define       EXT4_IOC_GETVERSION_OLD         FS_IOC_GETVERSION
+ #define       EXT4_IOC_SETVERSION_OLD         FS_IOC_SETVERSION
+ #ifdef CONFIG_JBD2_DEBUG
+@@ -300,7 +298,10 @@ struct ext4_new_group_data {
+ #endif
+ #define EXT4_IOC_GETRSVSZ             _IOR('f', 5, long)
+ #define EXT4_IOC_SETRSVSZ             _IOW('f', 6, long)
+-#define EXT4_IOC_MIGRATE              _IO('f', 7)
++#define EXT4_IOC_GROUP_EXTEND         _IOW('f', 7, unsigned long)
++#define EXT4_IOC_GROUP_ADD            _IOW('f', 8, struct ext4_new_group_input)
++#define EXT4_IOC_MIGRATE              _IO('f', 9)
++ /* note ioctl 11 reserved for filesystem-independent FIEMAP ioctl */
+ 
+ /*
+  * ioctl commands in 32 bit emulation
diff --git a/queue-2.6.27/ext4-update-flex_bg-free-blocks-and-free-inodes-counters-when-resizing.patch b/queue-2.6.27/ext4-update-flex_bg-free-blocks-and-free-inodes-counters-when-resizing.patch

new file mode 100644 (file)

index 0000000..db37c09
--- /dev/null
+++ b/queue-2.6.27/ext4-update-flex_bg-free-blocks-and-free-inodes-counters-when-resizing.patch
@@ -0,0 +1,59 @@
+From tytso@mit.edu  Wed Dec  3 09:54:56 2008
+From: Frederic Bohe <frederic.bohe@bull.net>
+Date: Sun, 16 Nov 2008 11:05:21 -0500
+Subject: ext4: Update flex_bg free blocks and free inodes counters when resizing.
+To: stable@kernel.org
+Cc: Frederic Bohe <frederic.bohe@bull.net>, Ext4 Developers List <linux-ext4@vger.kernel.org>, "Theodore Ts'o" <tytso@mit.edu>
+Message-ID: <1226851540-8032-2-git-send-email-tytso@mit.edu>
+
+
+From: Frederic Bohe <frederic.bohe@bull.net>
+
+(cherry picked from commit c62a11fd9555007b1caab83b5bcbb443a43e32bb)
+
+This fixes a bug which prevented the newly created inodes after a
+resize from being used on filesystems with flex_bg.
+
+Signed-off-by: Frederic Bohe <frederic.bohe@bull.net>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/ext4/resize.c |    9 +++++++++
+ fs/ext4/super.c  |    7 +++++--
+ 2 files changed, 14 insertions(+), 2 deletions(-)
+
+--- a/fs/ext4/resize.c
++++ b/fs/ext4/resize.c
+@@ -929,6 +929,15 @@ int ext4_group_add(struct super_block *s
+       percpu_counter_add(&sbi->s_freeinodes_counter,
+                          EXT4_INODES_PER_GROUP(sb));
+ 
++      if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) {
++              ext4_group_t flex_group;
++              flex_group = ext4_flex_group(sbi, input->group);
++              sbi->s_flex_groups[flex_group].free_blocks +=
++                      input->free_blocks_count;
++              sbi->s_flex_groups[flex_group].free_inodes +=
++                      EXT4_INODES_PER_GROUP(sb);
++      }
++
+       ext4_journal_dirty_metadata(handle, sbi->s_sbh);
+       sb->s_dirt = 1;
+ 
+--- a/fs/ext4/super.c
++++ b/fs/ext4/super.c
+@@ -1504,8 +1504,11 @@ static int ext4_fill_flex_info(struct su
+       sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex;
+       groups_per_flex = 1 << sbi->s_log_groups_per_flex;
+ 
+-      flex_group_count = (sbi->s_groups_count + groups_per_flex - 1) /
+-              groups_per_flex;
++      /* We allocate both existing and potentially added groups */
++      flex_group_count = ((sbi->s_groups_count + groups_per_flex - 1) +
++                          ((sbi->s_es->s_reserved_gdt_blocks +1 ) <<
++                            EXT4_DESC_PER_BLOCK_BITS(sb))) /
++                         groups_per_flex;
+       sbi->s_flex_groups = kzalloc(flex_group_count *
+                                    sizeof(struct flex_groups), GFP_KERNEL);
+       if (sbi->s_flex_groups == NULL) {
diff --git a/queue-2.6.27/ext4-wait-on-all-pending-commits-in-ext4_sync_fs.patch b/queue-2.6.27/ext4-wait-on-all-pending-commits-in-ext4_sync_fs.patch

new file mode 100644 (file)

index 0000000..296676a
--- /dev/null
+++ b/queue-2.6.27/ext4-wait-on-all-pending-commits-in-ext4_sync_fs.patch
@@ -0,0 +1,75 @@
+From tytso@mit.edu  Wed Dec  3 10:44:07 2008
+From: "Theodore Ts'o" <tytso@mit.edu>
+Date: Sun, 16 Nov 2008 11:05:37 -0500
+Subject: ext4: wait on all pending commits in ext4_sync_fs()
+To: stable@kernel.org
+Cc: Eric Sandeen <sandeen@redhat.com>, Andrew Morton <akpm@linux-foundation.org>, Ext4 Developers List <linux-ext4@vger.kernel.org>, "Theodore Ts'o" <tytso@mit.edu>, Arthur Jones <ajones@riverbed.com>
+Message-ID: <1226851540-8032-18-git-send-email-tytso@mit.edu>
+
+From: "Theodore Ts'o" <tytso@mit.edu>
+
+(cherry picked from commit 14ce0cb411c88681ab8f3a4c9caa7f42e97a3184)
+
+In ext4_sync_fs, we only wait for a commit to finish if we started it,
+but there may be one already in progress which will not be synced.
+
+In the case of a data=ordered umount with pending long symlinks which
+are delayed due to a long list of other I/O on the backing block
+device, this causes the buffer associated with the long symlinks to
+not be moved to the inode dirty list in the second phase of
+fsync_super.  Then, before they can be dirtied again, kjournald exits,
+seeing the UMOUNT flag and the dirty pages are never written to the
+backing block device, causing long symlink corruption and exposing new
+or previously freed block data to userspace.
+
+To ensure all commits are synced, we flush all journal commits now
+when sync_fs'ing ext4.
+
+Signed-off-by: Arthur Jones <ajones@riverbed.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Cc: Eric Sandeen <sandeen@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/ext4/super.c |   19 ++++++++-----------
+ 1 file changed, 8 insertions(+), 11 deletions(-)
+
+--- a/fs/ext4/super.c
++++ b/fs/ext4/super.c
+@@ -2920,12 +2920,9 @@ int ext4_force_commit(struct super_block
+ /*
+  * Ext4 always journals updates to the superblock itself, so we don't
+  * have to propagate any other updates to the superblock on disk at this
+- * point.  Just start an async writeback to get the buffers on their way
+- * to the disk.
+- *
+- * This implicitly triggers the writebehind on sync().
++ * point.  (We can probably nuke this function altogether, and remove
++ * any mention to sb->s_dirt in all of fs/ext4; eventual cleanup...)
+  */
+-
+ static void ext4_write_super(struct super_block *sb)
+ {
+       if (mutex_trylock(&sb->s_lock) != 0)
+@@ -2935,14 +2932,14 @@ static void ext4_write_super(struct supe
+ 
+ static int ext4_sync_fs(struct super_block *sb, int wait)
+ {
+-      tid_t target;
++      int ret = 0;
+ 
+       sb->s_dirt = 0;
+-      if (jbd2_journal_start_commit(EXT4_SB(sb)->s_journal, &target)) {
+-              if (wait)
+-                      jbd2_log_wait_commit(EXT4_SB(sb)->s_journal, target);
+-      }
+-      return 0;
++      if (wait)
++              ret = ext4_force_commit(sb);
++      else
++              jbd2_journal_start_commit(EXT4_SB(sb)->s_journal, NULL);
++      return ret;
+ }
+ 
+ /*
diff --git a/queue-2.6.27/jbd-ordered-data-integrity-fix.patch b/queue-2.6.27/jbd-ordered-data-integrity-fix.patch

new file mode 100644 (file)

index 0000000..db14a0e
--- /dev/null
+++ b/queue-2.6.27/jbd-ordered-data-integrity-fix.patch
@@ -0,0 +1,71 @@
+From 960a22ae60c8a723bd17da3b929fe0bcea6d007e Mon Sep 17 00:00:00 2001
+From: Hidehiro Kawai <hidehiro.kawai.ez@hitachi.com>
+Date: Sat, 18 Oct 2008 20:27:58 -0700
+Subject: jbd: ordered data integrity fix
+
+From: Hidehiro Kawai <hidehiro.kawai.ez@hitachi.com>
+
+commit 960a22ae60c8a723bd17da3b929fe0bcea6d007e upstream.
+
+In ordered mode, if a file data buffer being dirtied exists in the
+committing transaction, we write the buffer to the disk, move it from the
+committing transaction to the running transaction, then dirty it.  But we
+don't have to remove the buffer from the committing transaction when the
+buffer couldn't be written out, otherwise it would miss the error and the
+committing transaction would not abort.
+
+This patch adds an error check before removing the buffer from the
+committing transaction.
+
+Signed-off-by: Hidehiro Kawai <hidehiro.kawai.ez@hitachi.com>
+Acked-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Willy Tarreau <w@1wt.eu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/jbd/transaction.c |   16 +++++++++++++---
+ 1 file changed, 13 insertions(+), 3 deletions(-)
+
+--- a/fs/jbd/transaction.c
++++ b/fs/jbd/transaction.c
+@@ -954,9 +954,10 @@ int journal_dirty_data(handle_t *handle,
+       journal_t *journal = handle->h_transaction->t_journal;
+       int need_brelse = 0;
+       struct journal_head *jh;
++      int ret = 0;
+ 
+       if (is_handle_aborted(handle))
+-              return 0;
++              return ret;
+ 
+       jh = journal_add_journal_head(bh);
+       JBUFFER_TRACE(jh, "entry");
+@@ -1067,7 +1068,16 @@ int journal_dirty_data(handle_t *handle,
+                                  time if it is redirtied */
+                       }
+ 
+-                      /* journal_clean_data_list() may have got there first */
++                      /*
++                       * We cannot remove the buffer with io error from the
++                       * committing transaction, because otherwise it would
++                       * miss the error and the commit would not abort.
++                       */
++                      if (unlikely(!buffer_uptodate(bh))) {
++                              ret = -EIO;
++                              goto no_journal;
++                      }
++
+                       if (jh->b_transaction != NULL) {
+                               JBUFFER_TRACE(jh, "unfile from commit");
+                               __journal_temp_unlink_buffer(jh);
+@@ -1108,7 +1118,7 @@ no_journal:
+       }
+       JBUFFER_TRACE(jh, "exit");
+       journal_put_journal_head(jh);
+-      return 0;
++      return ret;
+ }
+ 
+ /**
diff --git a/queue-2.6.27/jbd2-abort-instead-of-waiting-for-nonexistent-transaction.patch b/queue-2.6.27/jbd2-abort-instead-of-waiting-for-nonexistent-transaction.patch

new file mode 100644 (file)

index 0000000..58b5aa9
--- /dev/null
+++ b/queue-2.6.27/jbd2-abort-instead-of-waiting-for-nonexistent-transaction.patch
@@ -0,0 +1,70 @@
+From tytso@mit.edu  Wed Dec  3 10:04:03 2008
+From: Duane Griffin <duaneg@dghda.com>
+Date: Sun, 16 Nov 2008 11:05:29 -0500
+Subject: jbd2: abort instead of waiting for nonexistent transaction
+To: stable@kernel.org
+Cc: "Theodore Ts'o" <tytso@mit.edu>, Andrew Morton <akpm@linux-foundation.org>, Ext4 Developers List <linux-ext4@vger.kernel.org>, Sami Liedes <sliedes@cc.hut.fi>, Duane Griffin <duaneg@dghda.com>
+Message-ID: <1226851540-8032-10-git-send-email-tytso@mit.edu>
+
+
+From: Duane Griffin <duaneg@dghda.com>
+
+(cherry picked from commit 23f8b79eae8a74e42a006ffa7c456e295c7e1c0d)
+
+The __jbd2_log_wait_for_space function sits in a loop checkpointing
+transactions until there is sufficient space free in the journal.
+However, if there are no transactions to be processed (e.g.  because the
+free space calculation is wrong due to a corrupted filesystem) it will
+never progress.
+
+Check for space being required when no transactions are outstanding and
+abort the journal instead of endlessly looping.
+
+This patch fixes the bug reported by Sami Liedes at:
+http://bugzilla.kernel.org/show_bug.cgi?id=10976
+
+Signed-off-by: Duane Griffin <duaneg@dghda.com>
+Cc: Sami Liedes <sliedes@cc.hut.fi>
+Cc: <linux-ext4@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/jbd2/checkpoint.c |   19 +++++++++++++++++--
+ 1 file changed, 17 insertions(+), 2 deletions(-)
+
+--- a/fs/jbd2/checkpoint.c
++++ b/fs/jbd2/checkpoint.c
+@@ -126,14 +126,29 @@ void __jbd2_log_wait_for_space(journal_t
+ 
+               /*
+                * Test again, another process may have checkpointed while we
+-               * were waiting for the checkpoint lock
++               * were waiting for the checkpoint lock. If there are no
++               * outstanding transactions there is nothing to checkpoint and
++               * we can't make progress. Abort the journal in this case.
+                */
+               spin_lock(&journal->j_state_lock);
++              spin_lock(&journal->j_list_lock);
+               nblocks = jbd_space_needed(journal);
+               if (__jbd2_log_space_left(journal) < nblocks) {
++                      int chkpt = journal->j_checkpoint_transactions != NULL;
++
++                      spin_unlock(&journal->j_list_lock);
+                       spin_unlock(&journal->j_state_lock);
+-                      jbd2_log_do_checkpoint(journal);
++                      if (chkpt) {
++                              jbd2_log_do_checkpoint(journal);
++                      } else {
++                              printk(KERN_ERR "%s: no transactions\n",
++                                     __func__);
++                              jbd2_journal_abort(journal, 0);
++                      }
++
+                       spin_lock(&journal->j_state_lock);
++              } else {
++                      spin_unlock(&journal->j_list_lock);
+               }
+               mutex_unlock(&journal->j_checkpoint_mutex);
+       }
diff --git a/queue-2.6.27/jbd2-don-t-give-up-looking-for-space-so-easily-in-__jbd2_log_wait_for_space.patch b/queue-2.6.27/jbd2-don-t-give-up-looking-for-space-so-easily-in-__jbd2_log_wait_for_space.patch

new file mode 100644 (file)

index 0000000..fb9ddc9
--- /dev/null
+++ b/queue-2.6.27/jbd2-don-t-give-up-looking-for-space-so-easily-in-__jbd2_log_wait_for_space.patch
@@ -0,0 +1,98 @@
+From tytso@mit.edu  Wed Dec  3 10:43:25 2008
+From: "Theodore Ts'o" <tytso@mit.edu>
+Date: Sun, 16 Nov 2008 11:05:35 -0500
+Subject: jbd2: don't give up looking for space so easily in __jbd2_log_wait_for_space
+To: stable@kernel.org
+Cc: Toshiyuki Okajima <toshi.okajima@jp.fujitsu.com>, Ext4 Developers List <linux-ext4@vger.kernel.org>, "Theodore Ts'o" <tytso@mit.edu>, Duane Griffin <duaneg@dghda.com>
+Message-ID: <1226851540-8032-16-git-send-email-tytso@mit.edu>
+
+From: "Theodore Ts'o" <tytso@mit.edu>
+
+(cherry picked from commit 8c3f25d8950c3e9fe6c9849f88679b3f2a071550)
+
+Commit 23f8b79e introducd a regression because it assumed that if
+there were no transactions ready to be checkpointed, that no progress
+could be made on making space available in the journal, and so the
+journal should be aborted.  This assumption is false; it could be the
+case that simply calling jbd2_cleanup_journal_tail() will recover the
+necessary space, or, for small journals, the currently committing
+transaction could be responsible for chewing up the required space in
+the log, so we need to wait for the currently committing transaction
+to finish before trying to force a checkpoint operation.
+
+This patch fixes a bug reported by Mihai Harpau at:
+https://bugzilla.redhat.com/show_bug.cgi?id=469582
+
+This patch fixes a bug reported by François Valenduc at:
+http://bugzilla.kernel.org/show_bug.cgi?id=11840
+
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Cc: Duane Griffin <duaneg@dghda.com>
+Cc: Toshiyuki Okajima <toshi.okajima@jp.fujitsu.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/jbd2/checkpoint.c |   32 +++++++++++++++++++++++++-------
+ 1 file changed, 25 insertions(+), 7 deletions(-)
+
+--- a/fs/jbd2/checkpoint.c
++++ b/fs/jbd2/checkpoint.c
+@@ -114,7 +114,7 @@ static int __try_to_free_cp_buf(struct j
+  */
+ void __jbd2_log_wait_for_space(journal_t *journal)
+ {
+-      int nblocks;
++      int nblocks, space_left;
+       assert_spin_locked(&journal->j_state_lock);
+ 
+       nblocks = jbd_space_needed(journal);
+@@ -127,25 +127,43 @@ void __jbd2_log_wait_for_space(journal_t
+               /*
+                * Test again, another process may have checkpointed while we
+                * were waiting for the checkpoint lock. If there are no
+-               * outstanding transactions there is nothing to checkpoint and
+-               * we can't make progress. Abort the journal in this case.
++               * transactions ready to be checkpointed, try to recover
++               * journal space by calling cleanup_journal_tail(), and if
++               * that doesn't work, by waiting for the currently committing
++               * transaction to complete.  If there is absolutely no way
++               * to make progress, this is either a BUG or corrupted
++               * filesystem, so abort the journal and leave a stack
++               * trace for forensic evidence.
+                */
+               spin_lock(&journal->j_state_lock);
+               spin_lock(&journal->j_list_lock);
+               nblocks = jbd_space_needed(journal);
+-              if (__jbd2_log_space_left(journal) < nblocks) {
++              space_left = __jbd2_log_space_left(journal);
++              if (space_left < nblocks) {
+                       int chkpt = journal->j_checkpoint_transactions != NULL;
++                      tid_t tid = 0;
+ 
++                      if (journal->j_committing_transaction)
++                              tid = journal->j_committing_transaction->t_tid;
+                       spin_unlock(&journal->j_list_lock);
+                       spin_unlock(&journal->j_state_lock);
+                       if (chkpt) {
+                               jbd2_log_do_checkpoint(journal);
++                      } else if (jbd2_cleanup_journal_tail(journal) == 0) {
++                              /* We were able to recover space; yay! */
++                              ;
++                      } else if (tid) {
++                              jbd2_log_wait_commit(journal, tid);
+                       } else {
+-                              printk(KERN_ERR "%s: no transactions\n",
+-                                     __func__);
++                              printk(KERN_ERR "%s: needed %d blocks and "
++                                     "only had %d space available\n",
++                                     __func__, nblocks, space_left);
++                              printk(KERN_ERR "%s: no way to get more "
++                                     "journal space in %s\n", __func__,
++                                     journal->j_devname);
++                              WARN_ON(1);
+                               jbd2_journal_abort(journal, 0);
+                       }
+-
+                       spin_lock(&journal->j_state_lock);
+               } else {
+                       spin_unlock(&journal->j_list_lock);
diff --git a/queue-2.6.27/jbd2-fix-buffer-head-leak-when-writing-the-commit-block.patch b/queue-2.6.27/jbd2-fix-buffer-head-leak-when-writing-the-commit-block.patch

new file mode 100644 (file)

index 0000000..595912d
--- /dev/null
+++ b/queue-2.6.27/jbd2-fix-buffer-head-leak-when-writing-the-commit-block.patch
@@ -0,0 +1,45 @@
+From tytso@mit.edu  Wed Dec  3 10:04:23 2008
+From: "Theodore Ts'o" <tytso@mit.edu>
+Date: Sun, 16 Nov 2008 11:05:30 -0500
+Subject: jbd2: Fix buffer head leak when writing the commit block
+To: stable@kernel.org
+Cc: Ext4 Developers List <linux-ext4@vger.kernel.org>, "Theodore Ts'o" <tytso@mit.edu>
+Message-ID: <1226851540-8032-11-git-send-email-tytso@mit.edu>
+
+From: "Theodore Ts'o" <tytso@mit.edu>
+
+(cherry picked from commit 45a90bfd90c1215bf824c0f705b409723f52361b)
+
+Also make sure the buffer heads are marked clean before submitting bh
+for writing.  The previous code was marking the buffer head dirty,
+which would have forced an unneeded write (and seek) to the journal
+for no good reason.
+
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/jbd2/commit.c |    5 ++---
+ 1 file changed, 2 insertions(+), 3 deletions(-)
+
+--- a/fs/jbd2/commit.c
++++ b/fs/jbd2/commit.c
+@@ -126,8 +126,7 @@ static int journal_submit_commit_record(
+ 
+       JBUFFER_TRACE(descriptor, "submit commit block");
+       lock_buffer(bh);
+-      get_bh(bh);
+-      set_buffer_dirty(bh);
++      clear_buffer_dirty(bh);
+       set_buffer_uptodate(bh);
+       bh->b_end_io = journal_end_buffer_io_sync;
+ 
+@@ -160,7 +159,7 @@ static int journal_submit_commit_record(
+               /* And try again, without the barrier */
+               lock_buffer(bh);
+               set_buffer_uptodate(bh);
+-              set_buffer_dirty(bh);
++              clear_buffer_dirty(bh);
+               ret = submit_bh(WRITE, bh);
+       }
+       *cbh = bh;
diff --git a/queue-2.6.27/jbd2-fix-proc-setup-for-devices-that-contain-in-their-names.patch b/queue-2.6.27/jbd2-fix-proc-setup-for-devices-that-contain-in-their-names.patch

new file mode 100644 (file)

index 0000000..2d30dd0
--- /dev/null
+++ b/queue-2.6.27/jbd2-fix-proc-setup-for-devices-that-contain-in-their-names.patch
@@ -0,0 +1,105 @@
+From tytso@mit.edu  Wed Dec  3 09:56:53 2008
+From: "Theodore Ts'o" <tytso@mit.edu>
+Date: Sun, 16 Nov 2008 11:05:23 -0500
+Subject: jbd2: fix /proc setup for devices that contain '/' in their names
+To: stable@kernel.org
+Cc: Ext4 Developers List <linux-ext4@vger.kernel.org>, "Theodore Ts'o" <tytso@mit.edu>
+Message-ID: <1226851540-8032-4-git-send-email-tytso@mit.edu>
+
+From: "Theodore Ts'o" <tytso@mit.edu>
+
+trimed down version of commit 05496769e5da83ce22ed97345afd9c7b71d6bd24 upstream.
+
+Some devices such as "cciss/c0d0p9" will cause jbd2 setup and teardown
+failures when /proc filenames are created with embedded slashes.  This
+is a slimmed down version of commit 05496769, with the stack reduction
+aspects of the patch omitted to meet the -stable criteria.
+
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/jbd2/journal.c    |   22 ++++++++++++++--------
+ include/linux/jbd2.h |    3 ++-
+ 2 files changed, 16 insertions(+), 9 deletions(-)
+
+--- a/fs/jbd2/journal.c
++++ b/fs/jbd2/journal.c
+@@ -901,10 +901,7 @@ static struct proc_dir_entry *proc_jbd2_
+ 
+ static void jbd2_stats_proc_init(journal_t *journal)
+ {
+-      char name[BDEVNAME_SIZE];
+-
+-      bdevname(journal->j_dev, name);
+-      journal->j_proc_entry = proc_mkdir(name, proc_jbd2_stats);
++      journal->j_proc_entry = proc_mkdir(journal->j_devname, proc_jbd2_stats);
+       if (journal->j_proc_entry) {
+               proc_create_data("history", S_IRUGO, journal->j_proc_entry,
+                                &jbd2_seq_history_fops, journal);
+@@ -915,12 +912,9 @@ static void jbd2_stats_proc_init(journal
+ 
+ static void jbd2_stats_proc_exit(journal_t *journal)
+ {
+-      char name[BDEVNAME_SIZE];
+-
+-      bdevname(journal->j_dev, name);
+       remove_proc_entry("info", journal->j_proc_entry);
+       remove_proc_entry("history", journal->j_proc_entry);
+-      remove_proc_entry(name, proc_jbd2_stats);
++      remove_proc_entry(journal->j_devname, proc_jbd2_stats);
+ }
+ 
+ static void journal_init_stats(journal_t *journal)
+@@ -1018,6 +1012,7 @@ journal_t * jbd2_journal_init_dev(struct
+ {
+       journal_t *journal = journal_init_common();
+       struct buffer_head *bh;
++      char *p;
+       int n;
+ 
+       if (!journal)
+@@ -1039,6 +1034,10 @@ journal_t * jbd2_journal_init_dev(struct
+       journal->j_fs_dev = fs_dev;
+       journal->j_blk_offset = start;
+       journal->j_maxlen = len;
++      bdevname(journal->j_dev, journal->j_devname);
++      p = journal->j_devname;
++      while ((p = strchr(p, '/')))
++              *p = '!';
+       jbd2_stats_proc_init(journal);
+ 
+       bh = __getblk(journal->j_dev, start, journal->j_blocksize);
+@@ -1061,6 +1060,7 @@ journal_t * jbd2_journal_init_inode (str
+ {
+       struct buffer_head *bh;
+       journal_t *journal = journal_init_common();
++      char *p;
+       int err;
+       int n;
+       unsigned long long blocknr;
+@@ -1070,6 +1070,12 @@ journal_t * jbd2_journal_init_inode (str
+ 
+       journal->j_dev = journal->j_fs_dev = inode->i_sb->s_bdev;
+       journal->j_inode = inode;
++      bdevname(journal->j_dev, journal->j_devname);
++      p = journal->j_devname;
++      while ((p = strchr(p, '/')))
++              *p = '!';
++      p = journal->j_devname + strlen(journal->j_devname);
++      sprintf(p, ":%lu", journal->j_inode->i_ino);
+       jbd_debug(1,
+                 "journal %p: inode %s/%ld, size %Ld, bits %d, blksize %ld\n",
+                 journal, inode->i_sb->s_id, inode->i_ino,
+--- a/include/linux/jbd2.h
++++ b/include/linux/jbd2.h
+@@ -850,7 +850,8 @@ struct journal_s
+        */
+       struct block_device     *j_dev;
+       int                     j_blocksize;
+-      unsigned long long              j_blk_offset;
++      unsigned long long      j_blk_offset;
++      char                    j_devname[BDEVNAME_SIZE+24];
+ 
+       /*
+        * Device which holds the client fs.  For internal journal this will be
diff --git a/queue-2.6.27/series b/queue-2.6.27/series

index 82b514b199a6e73478eb17f6c1fa5c41fbf5a1e9..98b8928833278830a315be1449eb5ce6efaa8990 100644 (file)
--- a/queue-2.6.27/series
+++ b/queue-2.6.27/series
@@ -76,3 +76,29 @@ cifs-fix-build-break.patch
  cifs-fix-check-for-tcon-seal-setting-and-fix-oops-on-failed-mount-from-earlier-patch.patch
  cifs-prevent-cifs_writepages-from-skipping-unwritten-pages.patch
  cifs-fix-check-for-dead-tcon-in-smb_init.patch
+ext4-update-flex_bg-free-blocks-and-free-inodes-counters-when-resizing.patch
+ext4-fix-11321-create-proc-ext4-stats-more-carefully.patch
+jbd2-fix-proc-setup-for-devices-that-contain-in-their-names.patch
+ext4-add-missing-unlock-in-ext4_check_descriptors-on-error-path.patch
+ext4-elevate-write-count-for-migrate-ioctl.patch
+ext4-renumber-ext4_ioc_migrate.patch
+ext4-jbd2-avoid-warn-messages-when-failing-to-write-to-the-superblock.patch
+ext4-fix-initialization-of-uninit-bitmap-blocks.patch
+jbd2-abort-instead-of-waiting-for-nonexistent-transaction.patch
+jbd2-fix-buffer-head-leak-when-writing-the-commit-block.patch
+ext4-fix-xattr-deadlock.patch
+ext4-free-ext4_prealloc_space-using-kmem_cache_free.patch
+ext4-do-mballoc-init-before-doing-filesystem-recovery.patch
+ext4-fix-duplicate-entries-returned-from-getdents-system-call.patch
+jbd2-don-t-give-up-looking-for-space-so-easily-in-__jbd2_log_wait_for_space.patch
+ext4-convert-to-host-order-before-using-the-values.patch
+ext4-wait-on-all-pending-commits-in-ext4_sync_fs.patch
+ext4-calculate-journal-credits-correctly.patch
+ext4-mark-the-buffer_heads-as-dirty-and-uptodate-after-prepare_write.patch
+ext4-add-checksum-calculation-when-clearing-uninit-flag-in-ext4_new_inode.patch
+ext3-fix-ext3_dx_readdir-hash-collision-handling.patch
+ext3-fix-duplicate-entries-returned-from-getdents-system-call.patch
+ext3-don-t-try-to-resize-if-there-are-no-reserved-gdt-blocks-left.patch
+ext2-fix-ext2-block-reservation-early-enospc-issue.patch
+ext3-fix-ext3-block-reservation-early-enospc-issue.patch
+jbd-ordered-data-integrity-fix.patch
author	Greg Kroah-Hartman <gregkh@suse.de>
	Wed, 3 Dec 2008 18:55:16 +0000 (10:55 -0800)
committer	Greg Kroah-Hartman <gregkh@suse.de>
	Wed, 3 Dec 2008 18:55:16 +0000 (10:55 -0800)
queue-2.6.27/ext2-fix-ext2-block-reservation-early-enospc-issue.patch	[new file with mode: 0644]	patch \| blob
queue-2.6.27/ext3-don-t-try-to-resize-if-there-are-no-reserved-gdt-blocks-left.patch	[new file with mode: 0644]	patch \| blob
queue-2.6.27/ext3-fix-duplicate-entries-returned-from-getdents-system-call.patch	[new file with mode: 0644]	patch \| blob
queue-2.6.27/ext3-fix-ext3-block-reservation-early-enospc-issue.patch	[new file with mode: 0644]	patch \| blob
queue-2.6.27/ext3-fix-ext3_dx_readdir-hash-collision-handling.patch	[new file with mode: 0644]	patch \| blob
queue-2.6.27/ext4-add-checksum-calculation-when-clearing-uninit-flag-in-ext4_new_inode.patch	[new file with mode: 0644]	patch \| blob
queue-2.6.27/ext4-add-missing-unlock-in-ext4_check_descriptors-on-error-path.patch	[new file with mode: 0644]	patch \| blob
queue-2.6.27/ext4-calculate-journal-credits-correctly.patch	[new file with mode: 0644]	patch \| blob
queue-2.6.27/ext4-convert-to-host-order-before-using-the-values.patch	[new file with mode: 0644]	patch \| blob
queue-2.6.27/ext4-do-mballoc-init-before-doing-filesystem-recovery.patch	[new file with mode: 0644]	patch \| blob
queue-2.6.27/ext4-elevate-write-count-for-migrate-ioctl.patch	[new file with mode: 0644]	patch \| blob
queue-2.6.27/ext4-fix-11321-create-proc-ext4-stats-more-carefully.patch	[new file with mode: 0644]	patch \| blob
queue-2.6.27/ext4-fix-duplicate-entries-returned-from-getdents-system-call.patch	[new file with mode: 0644]	patch \| blob
queue-2.6.27/ext4-fix-initialization-of-uninit-bitmap-blocks.patch	[new file with mode: 0644]	patch \| blob
queue-2.6.27/ext4-fix-xattr-deadlock.patch	[new file with mode: 0644]	patch \| blob
queue-2.6.27/ext4-free-ext4_prealloc_space-using-kmem_cache_free.patch	[new file with mode: 0644]	patch \| blob
queue-2.6.27/ext4-jbd2-avoid-warn-messages-when-failing-to-write-to-the-superblock.patch	[new file with mode: 0644]	patch \| blob
queue-2.6.27/ext4-mark-the-buffer_heads-as-dirty-and-uptodate-after-prepare_write.patch	[new file with mode: 0644]	patch \| blob
queue-2.6.27/ext4-renumber-ext4_ioc_migrate.patch	[new file with mode: 0644]	patch \| blob
queue-2.6.27/ext4-update-flex_bg-free-blocks-and-free-inodes-counters-when-resizing.patch	[new file with mode: 0644]	patch \| blob
queue-2.6.27/ext4-wait-on-all-pending-commits-in-ext4_sync_fs.patch	[new file with mode: 0644]	patch \| blob
queue-2.6.27/jbd-ordered-data-integrity-fix.patch	[new file with mode: 0644]	patch \| blob
queue-2.6.27/jbd2-abort-instead-of-waiting-for-nonexistent-transaction.patch	[new file with mode: 0644]	patch \| blob
queue-2.6.27/jbd2-don-t-give-up-looking-for-space-so-easily-in-__jbd2_log_wait_for_space.patch	[new file with mode: 0644]	patch \| blob
queue-2.6.27/jbd2-fix-buffer-head-leak-when-writing-the-commit-block.patch	[new file with mode: 0644]	patch \| blob
queue-2.6.27/jbd2-fix-proc-setup-for-devices-that-contain-in-their-names.patch	[new file with mode: 0644]	patch \| blob
queue-2.6.27/series		patch \| blob \| blame \| history