--- /dev/null
+From stable-bounces@linux.kernel.org Tue Jun 2 05:11:55 2009
+From: "Theodore Ts'o" <tytso@mit.edu>
+Date: Tue, 2 Jun 2009 08:09:22 -0400
+Subject: ext4: add EXT4_IOC_ALLOC_DA_BLKS ioctl
+To: stable@kernel.org
+Cc: linux-ext4@vger.kernel.org, "Theodore Ts'o" <tytso@mit.edu>
+Message-ID: <1243944576-20915-10-git-send-email-tytso@mit.edu>
+
+From: "Theodore Ts'o" <tytso@mit.edu>
+
+(cherry picked from commit ccd2506bd43113659aa904d5bea5d1300605e2a6)
+
+Add an ioctl which forces all of the delay allocated blocks to be
+allocated. This also provides a function ext4_alloc_da_blocks() which
+will be used by the following commits to force files to be fully
+allocated to preserve application-expected ext3 behaviour.
+
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ fs/ext4/ext4.h | 3 +++
+ fs/ext4/inode.c | 42 ++++++++++++++++++++++++++++++++++++++++++
+ fs/ext4/ioctl.c | 14 ++++++++++++++
+ 3 files changed, 59 insertions(+)
+
+--- a/fs/ext4/ext4.h
++++ b/fs/ext4/ext4.h
+@@ -326,7 +326,9 @@ struct ext4_new_group_data {
+ #define EXT4_IOC_GROUP_EXTEND _IOW('f', 7, unsigned long)
+ #define EXT4_IOC_GROUP_ADD _IOW('f', 8, struct ext4_new_group_input)
+ #define EXT4_IOC_MIGRATE _IO('f', 9)
++ /* note ioctl 10 reserved for an early version of the FIEMAP ioctl */
+ /* note ioctl 11 reserved for filesystem-independent FIEMAP ioctl */
++#define EXT4_IOC_ALLOC_DA_BLKS _IO('f', 12)
+
+ /*
+ * ioctl commands in 32 bit emulation
+@@ -1102,6 +1104,7 @@ extern int ext4_can_truncate(struct inod
+ extern void ext4_truncate (struct inode *);
+ extern void ext4_set_inode_flags(struct inode *);
+ extern void ext4_get_inode_flags(struct ext4_inode_info *);
++extern int ext4_alloc_da_blocks(struct inode *inode);
+ extern void ext4_set_aops(struct inode *inode);
+ extern int ext4_writepage_trans_blocks(struct inode *);
+ extern int ext4_meta_trans_blocks(struct inode *, int nrblocks, int idxblocks);
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -2585,6 +2585,48 @@ out:
+ return;
+ }
+
++/*
++ * Force all delayed allocation blocks to be allocated for a given inode.
++ */
++int ext4_alloc_da_blocks(struct inode *inode)
++{
++ if (!EXT4_I(inode)->i_reserved_data_blocks &&
++ !EXT4_I(inode)->i_reserved_meta_blocks)
++ return 0;
++
++ /*
++ * We do something simple for now. The filemap_flush() will
++ * also start triggering a write of the data blocks, which is
++ * not strictly speaking necessary (and for users of
++ * laptop_mode, not even desirable). However, to do otherwise
++ * would require replicating code paths in:
++ *
++ * ext4_da_writepages() ->
++ * write_cache_pages() ---> (via passed in callback function)
++ * __mpage_da_writepage() -->
++ * mpage_add_bh_to_extent()
++ * mpage_da_map_blocks()
++ *
++ * The problem is that write_cache_pages(), located in
++ * mm/page-writeback.c, marks pages clean in preparation for
++ * doing I/O, which is not desirable if we're not planning on
++ * doing I/O at all.
++ *
++ * We could call write_cache_pages(), and then redirty all of
++ * the pages by calling redirty_page_for_writeback() but that
++ * would be ugly in the extreme. So instead we would need to
++ * replicate parts of the code in the above functions,
++ * simplifying them becuase we wouldn't actually intend to
++ * write out the pages, but rather only collect contiguous
++ * logical block extents, call the multi-block allocator, and
++ * then update the buffer heads with the block allocations.
++ *
++ * For now, though, we'll cheat by calling filemap_flush(),
++ * which will map the blocks, and start the I/O, but not
++ * actually wait for the I/O to complete.
++ */
++ return filemap_flush(inode->i_mapping);
++}
+
+ /*
+ * bmap() is special. It gets used by applications such as lilo and by
+--- a/fs/ext4/ioctl.c
++++ b/fs/ext4/ioctl.c
+@@ -287,6 +287,20 @@ setversion_out:
+ return err;
+ }
+
++ case EXT4_IOC_ALLOC_DA_BLKS:
++ {
++ int err;
++ if (!is_owner_or_cap(inode))
++ return -EACCES;
++
++ err = mnt_want_write(filp->f_path.mnt);
++ if (err)
++ return err;
++ err = ext4_alloc_da_blocks(inode);
++ mnt_drop_write(filp->f_path.mnt);
++ return err;
++ }
++
+ default:
+ return -ENOTTY;
+ }
--- /dev/null
+From stable-bounces@linux.kernel.org Tue Jun 2 05:11:50 2009
+From: "Theodore Ts'o" <tytso@mit.edu>
+Date: Tue, 2 Jun 2009 08:09:21 -0400
+Subject: ext4: Add fine print for the 32000 subdirectory limit
+To: stable@kernel.org
+Cc: linux-ext4@vger.kernel.org, "Theodore Ts'o" <tytso@mit.edu>
+Message-ID: <1243944576-20915-9-git-send-email-tytso@mit.edu>
+
+From: "Theodore Ts'o" <tytso@mit.edu>
+
+(cherry picked from commit 722bde6875bfb49a0c84e5601eb82dd7ac02d27c)
+
+Some poeple are reading the ext4 feature list too literally and create
+dubious test cases involving very long filenames and 1k blocksize and
+then complain when they run into an htree-imposed limit. So add fine
+print to the "fix 32000 subdirectory limit" ext4 feature.
+
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ Documentation/filesystems/ext4.txt | 5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+--- a/Documentation/filesystems/ext4.txt
++++ b/Documentation/filesystems/ext4.txt
+@@ -73,7 +73,7 @@ Mailing list: linux-ext4@vger.kernel.org
+ * extent format more robust in face of on-disk corruption due to magics,
+ * internal redunancy in tree
+ * improved file allocation (multi-block alloc)
+-* fix 32000 subdirectory limit
++* lift 32000 subdirectory limit imposed by i_links_count[1]
+ * nsec timestamps for mtime, atime, ctime, create time
+ * inode version field on disk (NFSv4, Lustre)
+ * reduced e2fsck time via uninit_bg feature
+@@ -88,6 +88,9 @@ Mailing list: linux-ext4@vger.kernel.org
+ * efficent new ordered mode in JBD2 and ext4(avoid using buffer head to force
+ the ordering)
+
++[1] Filesystems with a block size of 1k may see a limit imposed by the
++directory hash tree having a maximum depth of two.
++
+ 2.2 Candidate features for future inclusion
+
+ * Online defrag (patches available but not well tested)
--- /dev/null
+From stable-bounces@linux.kernel.org Tue Jun 2 05:11:39 2009
+From: "Theodore Ts'o" <tytso@mit.edu>
+Date: Tue, 2 Jun 2009 08:09:23 -0400
+Subject: ext4: Automatically allocate delay allocated blocks on close
+To: stable@kernel.org
+Cc: linux-ext4@vger.kernel.org, "Theodore Ts'o" <tytso@mit.edu>
+Message-ID: <1243944576-20915-11-git-send-email-tytso@mit.edu>
+
+From: "Theodore Ts'o" <tytso@mit.edu>
+
+(cherry picked from commit 7d8f9f7d150dded7b68e61ca6403a1f166fb4edf)
+
+When closing a file that had been previously truncated, force any
+delay allocated blocks that to be allocated so that if the filesystem
+is mounted with data=ordered, the data blocks will be pushed out to
+disk along with the journal commit. Many application programs expect
+this, so we do this to avoid zero length files if the system crashes
+unexpectedly.
+
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ fs/ext4/ext4.h | 1 +
+ fs/ext4/file.c | 4 ++++
+ fs/ext4/inode.c | 3 +++
+ 3 files changed, 8 insertions(+)
+
+--- a/fs/ext4/ext4.h
++++ b/fs/ext4/ext4.h
+@@ -279,6 +279,7 @@ static inline __u32 ext4_mask_flags(umod
+ #define EXT4_STATE_NEW 0x00000002 /* inode is newly created */
+ #define EXT4_STATE_XATTR 0x00000004 /* has in-inode xattrs */
+ #define EXT4_STATE_NO_EXPAND 0x00000008 /* No space for expansion */
++#define EXT4_STATE_DA_ALLOC_CLOSE 0x00000010 /* Alloc DA blks on close */
+
+ /* Used to pass group descriptor data when online resize is done */
+ struct ext4_new_group_input {
+--- a/fs/ext4/file.c
++++ b/fs/ext4/file.c
+@@ -33,6 +33,10 @@
+ */
+ static int ext4_release_file (struct inode * inode, struct file * filp)
+ {
++ if (EXT4_I(inode)->i_state & EXT4_STATE_DA_ALLOC_CLOSE) {
++ ext4_alloc_da_blocks(inode);
++ EXT4_I(inode)->i_state &= ~EXT4_STATE_DA_ALLOC_CLOSE;
++ }
+ /* if we are the last writer on the inode, drop the block reservation */
+ if ((filp->f_mode & FMODE_WRITE) &&
+ (atomic_read(&inode->i_writecount) == 1))
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -3636,6 +3636,9 @@ void ext4_truncate(struct inode *inode)
+ if (!ext4_can_truncate(inode))
+ return;
+
++ if (inode->i_size == 0)
++ ei->i_state |= EXT4_STATE_DA_ALLOC_CLOSE;
++
+ if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) {
+ ext4_ext_truncate(inode);
+ return;
--- /dev/null
+From stable-bounces@linux.kernel.org Tue Jun 2 05:13:03 2009
+From: "Theodore Ts'o" <tytso@mit.edu>
+Date: Tue, 2 Jun 2009 08:09:24 -0400
+Subject: ext4: Automatically allocate delay allocated blocks on rename
+To: stable@kernel.org
+Cc: linux-ext4@vger.kernel.org, "Theodore Ts'o" <tytso@mit.edu>
+Message-ID: <1243944576-20915-12-git-send-email-tytso@mit.edu>
+
+From: "Theodore Ts'o" <tytso@mit.edu>
+
+(cherry picked from commit 8750c6d5fcbd3342b3d908d157f81d345c5325a7)
+
+When renaming a file such that a link to another inode is overwritten,
+force any delay allocated blocks that to be allocated so that if the
+filesystem is mounted with data=ordered, the data blocks will be
+pushed out to disk along with the journal commit. Many application
+programs expect this, so we do this to avoid zero length files if the
+system crashes unexpectedly.
+
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ fs/ext4/namei.c | 5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+--- a/fs/ext4/namei.c
++++ b/fs/ext4/namei.c
+@@ -2314,7 +2314,7 @@ static int ext4_rename (struct inode * o
+ struct inode * old_inode, * new_inode;
+ struct buffer_head * old_bh, * new_bh, * dir_bh;
+ struct ext4_dir_entry_2 * old_de, * new_de;
+- int retval;
++ int retval, force_da_alloc = 0;
+
+ old_bh = new_bh = dir_bh = NULL;
+
+@@ -2452,6 +2452,7 @@ static int ext4_rename (struct inode * o
+ ext4_mark_inode_dirty(handle, new_inode);
+ if (!new_inode->i_nlink)
+ ext4_orphan_add(handle, new_inode);
++ force_da_alloc = 1;
+ }
+ retval = 0;
+
+@@ -2460,6 +2461,8 @@ end_rename:
+ brelse (old_bh);
+ brelse (new_bh);
+ ext4_journal_stop(handle);
++ if (retval == 0 && force_da_alloc)
++ ext4_alloc_da_blocks(old_inode);
+ return retval;
+ }
+
--- /dev/null
+From stable-bounces@linux.kernel.org Tue Jun 2 05:11:50 2009
+From: "Theodore Ts'o" <tytso@mit.edu>
+Date: Tue, 2 Jun 2009 08:09:26 -0400
+Subject: ext4: Check for an valid i_mode when reading the inode from disk
+To: stable@kernel.org
+Cc: linux-ext4@vger.kernel.org, "Theodore Ts'o" <tytso@mit.edu>
+Message-ID: <1243944576-20915-14-git-send-email-tytso@mit.edu>
+
+From: "Theodore Ts'o" <tytso@mit.edu>
+
+(cherry picked from commit 563bdd61fe4dbd6b58cf7eb06f8d8f14479ae1dc)
+
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ fs/ext4/inode.c | 10 +++++++++-
+ 1 file changed, 9 insertions(+), 1 deletion(-)
+
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -4129,7 +4129,8 @@ struct inode *ext4_iget(struct super_blo
+ inode->i_op = &ext4_symlink_inode_operations;
+ ext4_set_aops(inode);
+ }
+- } else {
++ } else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) ||
++ S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
+ inode->i_op = &ext4_special_inode_operations;
+ if (raw_inode->i_block[0])
+ init_special_inode(inode, inode->i_mode,
+@@ -4137,6 +4138,13 @@ struct inode *ext4_iget(struct super_blo
+ else
+ init_special_inode(inode, inode->i_mode,
+ new_decode_dev(le32_to_cpu(raw_inode->i_block[1])));
++ } else {
++ brelse(bh);
++ ret = -EIO;
++ ext4_error(inode->i_sb, __func__,
++ "bogus i_mode (%o) for inode=%lu",
++ inode->i_mode, inode->i_ino);
++ goto bad_inode;
+ }
+ brelse (iloc.bh);
+ ext4_set_inode_flags(inode);
--- /dev/null
+From stable-bounces@linux.kernel.org Tue Jun 2 05:11:35 2009
+From: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+Date: Tue, 2 Jun 2009 08:09:35 -0400
+Subject: ext4: Clear the unwritten buffer_head flag after the extent is initialized
+To: stable@kernel.org
+Cc: linux-ext4@vger.kernel.org, "Theodore Ts'o" <tytso@mit.edu>, "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
+Message-ID: <1243944576-20915-23-git-send-email-tytso@mit.edu>
+
+
+From: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+
+(cherry picked from commit 2a8964d63d50dd2d65d71d342bc7fb6ef4117614)
+
+The BH_Unwritten flag indicates that the buffer is allocated on disk
+but has not been written; that is, the disk was part of a persistent
+preallocation area. That flag should only be set when a get_blocks()
+function is looking up a inode's logical to physical block mapping.
+
+When ext4_get_blocks_wrap() is called with create=1, the uninitialized
+extent is converted into an initialized one, so the BH_Unwritten flag
+is no longer appropriate. Hence, we need to make sure the
+BH_Unwritten is not left set, since the combination of BH_Mapped and
+BH_Unwritten is not allowed; among other things, it will result ext4's
+get_block() to be called over and over again during the write_begin
+phase of write(2).
+
+Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ fs/ext4/inode.c | 13 +++++++++++++
+ 1 file changed, 13 insertions(+)
+
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -1085,6 +1085,7 @@ int ext4_get_blocks_wrap(handle_t *handl
+ int retval;
+
+ clear_buffer_mapped(bh);
++ clear_buffer_unwritten(bh);
+
+ /*
+ * Try to see if we can get the block without requesting
+@@ -1115,6 +1116,18 @@ int ext4_get_blocks_wrap(handle_t *handl
+ return retval;
+
+ /*
++ * When we call get_blocks without the create flag, the
++ * BH_Unwritten flag could have gotten set if the blocks
++ * requested were part of a uninitialized extent. We need to
++ * clear this flag now that we are committed to convert all or
++ * part of the uninitialized extent to be an initialized
++ * extent. This is because we need to avoid the combination
++ * of BH_Unwritten and BH_Mapped flags being simultaneously
++ * set on the buffer_head.
++ */
++ clear_buffer_unwritten(bh);
++
++ /*
+ * New blocks allocate and/or writing to uninitialized extent
+ * will possibly result in updating i_data, so we take
+ * the write lock of i_data_sem, and call get_blocks()
--- /dev/null
+From stable-bounces@linux.kernel.org Tue Jun 2 05:11:17 2009
+From: Duane Griffin <duaneg@dghda.com>
+Date: Tue, 2 Jun 2009 08:09:18 -0400
+Subject: ext4: don't inherit inappropriate inode flags from parent
+To: stable@kernel.org
+Cc: Andrew Morton <akpm@linux-foundation.org>, linux-ext4@vger.kernel.org, "Theodore Ts'o" <tytso@mit.edu>, Duane Griffin <duaneg@dghda.com>
+Message-ID: <1243944576-20915-6-git-send-email-tytso@mit.edu>
+
+
+From: Duane Griffin <duaneg@dghda.com>
+
+(cherry picked from commit 8fa43a81b97853fc69417bb6054182e78f95cbeb)
+
+At present INDEX and EXTENTS are the only flags that new ext4 inodes do
+NOT inherit from their parent. In addition prevent the flags DIRTY,
+ECOMPR, IMAGIC, TOPDIR, HUGE_FILE and EXT_MIGRATE from being inherited.
+List inheritable flags explicitly to prevent future flags from
+accidentally being inherited.
+
+This fixes the TOPDIR flag inheritance bug reported at
+http://bugzilla.kernel.org/show_bug.cgi?id=9866.
+
+Signed-off-by: Duane Griffin <duaneg@dghda.com>
+Acked-by: Andreas Dilger <adilger@sun.com>
+Cc: <linux-ext4@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ fs/ext4/ext4.h | 7 +++++++
+ fs/ext4/ialloc.c | 2 +-
+ 2 files changed, 8 insertions(+), 1 deletion(-)
+
+--- a/fs/ext4/ext4.h
++++ b/fs/ext4/ext4.h
+@@ -248,6 +248,13 @@ struct flex_groups {
+ #define EXT4_FL_USER_VISIBLE 0x000BDFFF /* User visible flags */
+ #define EXT4_FL_USER_MODIFIABLE 0x000380FF /* User modifiable flags */
+
++/* Flags that should be inherited by new inodes from their parent. */
++#define EXT4_FL_INHERITED (EXT4_SECRM_FL | EXT4_UNRM_FL | EXT4_COMPR_FL |\
++ EXT4_SYNC_FL | EXT4_IMMUTABLE_FL | EXT4_APPEND_FL |\
++ EXT4_NODUMP_FL | EXT4_NOATIME_FL |\
++ EXT4_NOCOMPR_FL | EXT4_JOURNAL_DATA_FL |\
++ EXT4_NOTAIL_FL | EXT4_DIRSYNC_FL)
++
+ /*
+ * Inode dynamic state flags
+ */
+--- a/fs/ext4/ialloc.c
++++ b/fs/ext4/ialloc.c
+@@ -869,7 +869,7 @@ got:
+ * newly created directory and file only if -o extent mount option is
+ * specified
+ */
+- ei->i_flags = EXT4_I(dir)->i_flags & ~(EXT4_INDEX_FL|EXT4_EXTENTS_FL);
++ ei->i_flags = EXT4_I(dir)->i_flags & EXT4_FL_INHERITED;
+ if (S_ISLNK(mode))
+ ei->i_flags &= ~(EXT4_IMMUTABLE_FL|EXT4_APPEND_FL);
+ /* dirsync only applies to directories */
--- /dev/null
+From stable-bounces@linux.kernel.org Tue Jun 2 05:12:21 2009
+From: Eric Sandeen <sandeen@redhat.com>
+Date: Tue, 2 Jun 2009 08:09:17 -0400
+Subject: ext4: fix bb_prealloc_list corruption due to wrong group locking
+To: stable@kernel.org
+Cc: Eric Sandeen <sandeen@redhat.com>, linux-ext4@vger.kernel.org, "Theodore Ts'o" <tytso@mit.edu>
+Message-ID: <1243944576-20915-5-git-send-email-tytso@mit.edu>
+
+
+From: Eric Sandeen <sandeen@redhat.com>
+
+(cherry-picked from commit d33a1976fbee1ee321d6f014333d8f03a39d526c)
+
+This is for Red Hat bug 490026: EXT4 panic, list corruption in
+ext4_mb_new_inode_pa
+
+ext4_lock_group(sb, group) is supposed to protect this list for
+each group, and a common code flow to remove an album is like
+this:
+
+ ext4_get_group_no_and_offset(sb, pa->pa_pstart, &grp, NULL);
+ ext4_lock_group(sb, grp);
+ list_del(&pa->pa_group_list);
+ ext4_unlock_group(sb, grp);
+
+so it's critical that we get the right group number back for
+this prealloc context, to lock the right group (the one
+associated with this pa) and prevent concurrent list manipulation.
+
+however, ext4_mb_put_pa() passes in (pa->pa_pstart - 1) with a
+comment, "-1 is to protect from crossing allocation group".
+
+This makes sense for the group_pa, where pa_pstart is advanced
+by the length which has been used (in ext4_mb_release_context()),
+and when the entire length has been used, pa_pstart has been
+advanced to the first block of the next group.
+
+However, for inode_pa, pa_pstart is never advanced; it's just
+set once to the first block in the group and not moved after
+that. So in this case, if we subtract one in ext4_mb_put_pa(),
+we are actually locking the *previous* group, and opening the
+race with the other threads which do not subtract off the extra
+block.
+
+Signed-off-by: Eric Sandeen <sandeen@redhat.com>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ fs/ext4/mballoc.c | 9 +++++++--
+ 1 file changed, 7 insertions(+), 2 deletions(-)
+
+--- a/fs/ext4/mballoc.c
++++ b/fs/ext4/mballoc.c
+@@ -3698,6 +3698,7 @@ static void ext4_mb_put_pa(struct ext4_a
+ struct super_block *sb, struct ext4_prealloc_space *pa)
+ {
+ unsigned long grp;
++ ext4_fsblk_t grp_blk;
+
+ if (!atomic_dec_and_test(&pa->pa_count) || pa->pa_free != 0)
+ return;
+@@ -3712,8 +3713,12 @@ static void ext4_mb_put_pa(struct ext4_a
+ pa->pa_deleted = 1;
+ spin_unlock(&pa->pa_lock);
+
+- /* -1 is to protect from crossing allocation group */
+- ext4_get_group_no_and_offset(sb, pa->pa_pstart - 1, &grp, NULL);
++ grp_blk = pa->pa_pstart;
++ /* If linear, pa_pstart may be in the next group when pa is used up */
++ if (pa->pa_linear)
++ grp_blk--;
++
++ ext4_get_group_no_and_offset(sb, grp_blk, &grp, NULL);
+
+ /*
+ * possible race:
--- /dev/null
+From stable-bounces@linux.kernel.org Tue Jun 2 05:12:17 2009
+From: Eric Sandeen <sandeen@redhat.com>
+Date: Tue, 2 Jun 2009 08:09:16 -0400
+Subject: ext4: fix bogus BUG_ONs in in mballoc code
+To: stable@kernel.org
+Cc: Eric Sandeen <sandeen@redhat.com>, linux-ext4@vger.kernel.org, "Theodore Ts'o" <tytso@mit.edu>
+Message-ID: <1243944576-20915-4-git-send-email-tytso@mit.edu>
+
+
+From: Eric Sandeen <sandeen@redhat.com>
+
+(cherry picked from commit 8d03c7a0c550e7ab24cadcef5e66656bfadec8b9)
+
+Thiemo Nagel reported that:
+
+# dd if=/dev/zero of=image.ext4 bs=1M count=2
+# mkfs.ext4 -v -F -b 1024 -m 0 -g 512 -G 4 -I 128 -N 1 \
+ -O large_file,dir_index,flex_bg,extent,sparse_super image.ext4
+# mount -o loop image.ext4 mnt/
+# dd if=/dev/zero of=mnt/file
+
+oopsed, with a BUG_ON in ext4_mb_normalize_request because
+size == EXT4_BLOCKS_PER_GROUP
+
+It appears to me (esp. after talking to Andreas) that the BUG_ON
+is bogus; a request of exactly EXT4_BLOCKS_PER_GROUP should
+be allowed, though larger sizes do indicate a problem.
+
+Fix that an another (apparently rare) codepath with a similar check.
+
+Reported-by: Thiemo Nagel <thiemo.nagel@ph.tum.de>
+Signed-off-by: Eric Sandeen <sandeen@redhat.com>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ fs/ext4/mballoc.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/fs/ext4/mballoc.c
++++ b/fs/ext4/mballoc.c
+@@ -1450,7 +1450,7 @@ static void ext4_mb_measure_extent(struc
+ struct ext4_free_extent *gex = &ac->ac_g_ex;
+
+ BUG_ON(ex->fe_len <= 0);
+- BUG_ON(ex->fe_len >= EXT4_BLOCKS_PER_GROUP(ac->ac_sb));
++ BUG_ON(ex->fe_len > EXT4_BLOCKS_PER_GROUP(ac->ac_sb));
+ BUG_ON(ex->fe_start >= EXT4_BLOCKS_PER_GROUP(ac->ac_sb));
+ BUG_ON(ac->ac_status != AC_STATUS_CONTINUE);
+
+@@ -3400,7 +3400,7 @@ ext4_mb_normalize_request(struct ext4_al
+ }
+ BUG_ON(start + size <= ac->ac_o_ex.fe_logical &&
+ start > ac->ac_o_ex.fe_logical);
+- BUG_ON(size <= 0 || size >= EXT4_BLOCKS_PER_GROUP(ac->ac_sb));
++ BUG_ON(size <= 0 || size > EXT4_BLOCKS_PER_GROUP(ac->ac_sb));
+
+ /* now prepare goal request */
+
--- /dev/null
+From stable-bounces@linux.kernel.org Tue Jun 2 05:11:25 2009
+From: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+Date: Tue, 2 Jun 2009 08:09:25 -0400
+Subject: ext4: Fix discard of inode prealloc space with delayed allocation.
+To: stable@kernel.org
+Cc: linux-ext4@vger.kernel.org, "Theodore Ts'o" <tytso@mit.edu>, "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
+Message-ID: <1243944576-20915-13-git-send-email-tytso@mit.edu>
+
+
+From: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+
+(cherry picked from commit d6014301b5599fba395c42a1e96a7fe86f7d0b2d)
+
+With delayed allocation we should not/cannot discard inode prealloc
+space during file close. We would still have dirty pages for which we
+haven't allocated blocks yet. With this fix after each get_blocks
+request we check whether we have zero reserved blocks and if yes and
+we don't have any writers on the file we discard inode prealloc space.
+
+Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ fs/ext4/file.c | 3 ++-
+ fs/ext4/inode.c | 8 ++++++++
+ 2 files changed, 10 insertions(+), 1 deletion(-)
+
+--- a/fs/ext4/file.c
++++ b/fs/ext4/file.c
+@@ -39,7 +39,8 @@ static int ext4_release_file (struct ino
+ }
+ /* if we are the last writer on the inode, drop the block reservation */
+ if ((filp->f_mode & FMODE_WRITE) &&
+- (atomic_read(&inode->i_writecount) == 1))
++ (atomic_read(&inode->i_writecount) == 1) &&
++ !EXT4_I(inode)->i_reserved_data_blocks)
+ {
+ down_write(&EXT4_I(inode)->i_data_sem);
+ ext4_discard_reservation(inode);
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -1046,6 +1046,14 @@ static void ext4_da_update_reserve_space
+ EXT4_I(inode)->i_reserved_meta_blocks = mdb;
+ EXT4_I(inode)->i_allocated_meta_blocks = 0;
+ spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
++
++ /*
++ * If we have done all the pending block allocations and if
++ * there aren't any writers on the inode, we can discard the
++ * inode's preallocations.
++ */
++ if (!total && (atomic_read(&inode->i_writecount) == 0))
++ ext4_discard_reservation(inode);
+ }
+
+ /*
--- /dev/null
+From stable-bounces@linux.kernel.org Tue Jun 2 05:11:04 2009
+From: Eric Sandeen <sandeen@redhat.com>
+Date: Tue, 2 Jun 2009 08:09:13 -0400
+Subject: ext4: fix ext4_free_inode() vs. ext4_claim_inode() race
+To: stable@kernel.org
+Cc: Eric Sandeen <sandeen@redhat.com>, linux-ext4@vger.kernel.org, "Theodore Ts'o" <tytso@mit.edu>
+Message-ID: <1243944576-20915-1-git-send-email-tytso@mit.edu>
+
+
+From: Eric Sandeen <sandeen@redhat.com>
+
+(cherry picked from commit 7ce9d5d1f3c8736511daa413c64985a05b2feee3)
+
+I was seeing fsck errors on inode bitmaps after a 4 thread
+dbench run on a 4 cpu machine:
+
+Inode bitmap differences: -50736 -(50752--50753) etc...
+
+I believe that this is because ext4_free_inode() uses atomic
+bitops, and although ext4_new_inode() *used* to also use atomic
+bitops for synchronization, commit
+393418676a7602e1d7d3f6e560159c65c8cbd50e changed this to use
+the sb_bgl_lock, so that we could also synchronize against
+read_inode_bitmap and initialization of uninit inode tables.
+
+However, that change left ext4_free_inode using atomic bitops,
+which I think leaves no synchronization between setting &
+unsetting bits in the inode table.
+
+The below patch fixes it for me, although I wonder if we're
+getting at all heavy-handed with this spinlock...
+
+Signed-off-by: Eric Sandeen <sandeen@redhat.com>
+Reviewed-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ fs/ext4/ialloc.c | 12 +++++++-----
+ 1 file changed, 7 insertions(+), 5 deletions(-)
+
+--- a/fs/ext4/ialloc.c
++++ b/fs/ext4/ialloc.c
+@@ -188,7 +188,7 @@ void ext4_free_inode (handle_t *handle,
+ struct ext4_group_desc * gdp;
+ struct ext4_super_block * es;
+ struct ext4_sb_info *sbi;
+- int fatal = 0, err;
++ int fatal = 0, err, cleared;
+ ext4_group_t flex_group;
+
+ if (atomic_read(&inode->i_count) > 1) {
+@@ -242,10 +242,12 @@ void ext4_free_inode (handle_t *handle,
+ goto error_return;
+
+ /* Ok, now we can actually update the inode bitmaps.. */
+- if (!ext4_clear_bit_atomic(sb_bgl_lock(sbi, block_group),
+- bit, bitmap_bh->b_data))
+- ext4_error (sb, "ext4_free_inode",
+- "bit already cleared for inode %lu", ino);
++ spin_lock(sb_bgl_lock(sbi, block_group));
++ cleared = ext4_clear_bit(bit, bitmap_bh->b_data);
++ spin_unlock(sb_bgl_lock(sbi, block_group));
++ if (!cleared)
++ ext4_error(sb, "ext4_free_inode",
++ "bit already cleared for inode %lu", ino);
+ else {
+ gdp = ext4_get_group_desc (sb, block_group, &bh2);
+
--- /dev/null
+From stable-bounces@linux.kernel.org Tue Jun 2 05:11:10 2009
+From: Eric Sandeen <sandeen@redhat.com>
+Date: Tue, 2 Jun 2009 08:09:14 -0400
+Subject: ext4: fix header check in ext4_ext_search_right() for deep extent trees.
+To: stable@kernel.org
+Cc: Eric Sandeen <sandeen@redhat.com>, linux-ext4@vger.kernel.org, "Theodore Ts'o" <tytso@mit.edu>
+Message-ID: <1243944576-20915-2-git-send-email-tytso@mit.edu>
+
+
+From: Eric Sandeen <sandeen@redhat.com>
+
+(cherry picked from commit 395a87bfefbc400011417e9eaae33169f9f036c0)
+
+The ext4_ext_search_right() function is confusing; it uses a
+"depth" variable which is 0 at the root and maximum at the leaves,
+but the on-disk metadata uses a "depth" (actually eh_depth) which
+is opposite: maximum at the root, and 0 at the leaves.
+
+The ext4_ext_check_header() function is given a depth and checks
+the header agaisnt that depth; it expects the on-disk semantics,
+but we are giving it the opposite in the while loop in this
+function. We should be giving it the on-disk notion of "depth"
+which we can get from (p_depth - depth) - and if you look, the last
+(more commonly hit) call to ext4_ext_check_header() does just this.
+
+Sending in the wrong depth results in (incorrect) messages
+about corruption:
+
+EXT4-fs error (device sdb1): ext4_ext_search_right: bad header
+in inode #2621457: unexpected eh_depth - magic f30a, entries 340,
+max 340(0), depth 1(2)
+
+http://bugzilla.kernel.org/show_bug.cgi?id=12821
+
+Reported-by: David Dindorp <ddi@dubex.dk>
+Signed-off-by: Eric Sandeen <sandeen@redhat.com>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ fs/ext4/extents.c | 6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+--- a/fs/ext4/extents.c
++++ b/fs/ext4/extents.c
+@@ -1118,7 +1118,8 @@ ext4_ext_search_right(struct inode *inod
+ struct ext4_extent_idx *ix;
+ struct ext4_extent *ex;
+ ext4_fsblk_t block;
+- int depth, ee_len;
++ int depth; /* Note, NOT eh_depth; depth from top of tree */
++ int ee_len;
+
+ BUG_ON(path == NULL);
+ depth = path->p_depth;
+@@ -1177,7 +1178,8 @@ ext4_ext_search_right(struct inode *inod
+ if (bh == NULL)
+ return -EIO;
+ eh = ext_block_hdr(bh);
+- if (ext4_ext_check_header(inode, eh, depth)) {
++ /* subtract from p_depth to get proper eh_depth */
++ if (ext4_ext_check_header(inode, eh, path->p_depth - depth)) {
+ put_bh(bh);
+ return -EIO;
+ }
--- /dev/null
+From stable-bounces@linux.kernel.org Tue Jun 2 05:12:26 2009
+From: "Theodore Ts'o" <tytso@mit.edu>
+Date: Tue, 2 Jun 2009 08:09:29 -0400
+Subject: ext4: fix locking typo in mballoc which could cause soft lockup hangs
+To: stable@kernel.org
+Cc: Chris Wright <chrisw@sous-sol.org>, linux-ext4@vger.kernel.org, "Theodore Ts'o" <tytso@mit.edu>
+Message-ID: <1243944576-20915-17-git-send-email-tytso@mit.edu>
+
+From: "Theodore Ts'o" <tytso@mit.edu>
+
+upstream commit: e7c9e3e99adf6c49c5d593a51375916acc039d1e
+
+Smatch (http://repo.or.cz/w/smatch.git/) complains about the locking in
+ext4_mb_add_n_trim() from fs/ext4/mballoc.c
+
+ 4438 list_for_each_entry_rcu(tmp_pa, &lg->lg_prealloc_list[order],
+ 4439 pa_inode_list) {
+ 4440 spin_lock(&tmp_pa->pa_lock);
+ 4441 if (tmp_pa->pa_deleted) {
+ 4442 spin_unlock(&pa->pa_lock);
+ 4443 continue;
+ 4444 }
+
+Brown paper bag time...
+
+Reported-by: Dan Carpenter <error27@gmail.com>
+Reviewed-by: Eric Sandeen <sandeen@redhat.com>
+Reviewed-by: Aneesh Kumar K.V <aneesh.kumar@gmail.com>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Cc: stable@kernel.org
+Signed-off-by: Chris Wright <chrisw@sous-sol.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ fs/ext4/mballoc.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/ext4/mballoc.c
++++ b/fs/ext4/mballoc.c
+@@ -4532,7 +4532,7 @@ static void ext4_mb_add_n_trim(struct ex
+ pa_inode_list) {
+ spin_lock(&tmp_pa->pa_lock);
+ if (tmp_pa->pa_deleted) {
+- spin_unlock(&pa->pa_lock);
++ spin_unlock(&tmp_pa->pa_lock);
+ continue;
+ }
+ if (!added && pa->pa_free < tmp_pa->pa_free) {
--- /dev/null
+From stable-bounces@linux.kernel.org Tue Jun 2 05:12:01 2009
+From: "Theodore Ts'o" <tytso@mit.edu>
+Date: Tue, 2 Jun 2009 08:09:36 -0400
+Subject: ext4: Fix race in ext4_inode_info.i_cached_extent
+To: stable@kernel.org
+Cc: linux-ext4@vger.kernel.org, "Theodore Ts'o" <tytso@mit.edu>
+Message-ID: <1243944576-20915-24-git-send-email-tytso@mit.edu>
+
+From: "Theodore Ts'o" <tytso@mit.edu>
+
+(cherry picked from commit 2ec0ae3acec47f628179ee95fe2c4da01b5e9fc4)
+
+If two CPU's simultaneously call ext4_ext_get_blocks() at the same
+time, there is nothing protecting the i_cached_extent structure from
+being used and updated at the same time. This could potentially cause
+the wrong location on disk to be read or written to, including
+potentially causing the corruption of the block group descriptors
+and/or inode table.
+
+This bug has been in the ext4 code since almost the very beginning of
+ext4's development. Fortunately once the data is stored in the page
+cache cache, ext4_get_blocks() doesn't need to be called, so trying to
+replicate this problem to the point where we could identify its root
+cause was *extremely* difficult. Many thanks to Kevin Shanahan for
+working over several months to be able to reproduce this easily so we
+could finally nail down the cause of the corruption.
+
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Reviewed-by: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ fs/ext4/extents.c | 17 ++++++++++++-----
+ 1 file changed, 12 insertions(+), 5 deletions(-)
+
+--- a/fs/ext4/extents.c
++++ b/fs/ext4/extents.c
+@@ -1633,11 +1633,13 @@ ext4_ext_put_in_cache(struct inode *inod
+ {
+ struct ext4_ext_cache *cex;
+ BUG_ON(len == 0);
++ spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
+ cex = &EXT4_I(inode)->i_cached_extent;
+ cex->ec_type = type;
+ cex->ec_block = block;
+ cex->ec_len = len;
+ cex->ec_start = start;
++ spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
+ }
+
+ /*
+@@ -1694,12 +1696,17 @@ ext4_ext_in_cache(struct inode *inode, e
+ struct ext4_extent *ex)
+ {
+ struct ext4_ext_cache *cex;
++ int ret = EXT4_EXT_CACHE_NO;
+
++ /*
++ * We borrow i_block_reservation_lock to protect i_cached_extent
++ */
++ spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
+ cex = &EXT4_I(inode)->i_cached_extent;
+
+ /* has cache valid data? */
+ if (cex->ec_type == EXT4_EXT_CACHE_NO)
+- return EXT4_EXT_CACHE_NO;
++ goto errout;
+
+ BUG_ON(cex->ec_type != EXT4_EXT_CACHE_GAP &&
+ cex->ec_type != EXT4_EXT_CACHE_EXTENT);
+@@ -1710,11 +1717,11 @@ ext4_ext_in_cache(struct inode *inode, e
+ ext_debug("%u cached by %u:%u:%llu\n",
+ block,
+ cex->ec_block, cex->ec_len, cex->ec_start);
+- return cex->ec_type;
++ ret = cex->ec_type;
+ }
+-
+- /* not in cache */
+- return EXT4_EXT_CACHE_NO;
++errout:
++ spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
++ return ret;
+ }
+
+ /*
--- /dev/null
+From stable-bounces@linux.kernel.org Tue Jun 2 05:12:11 2009
+From: "Theodore Ts'o" <tytso@mit.edu>
+Date: Tue, 2 Jun 2009 08:09:31 -0400
+Subject: ext4: Fix softlockup caused by illegal i_file_acl value in on-disk inode
+To: stable@kernel.org
+Cc: linux-ext4@vger.kernel.org, "Theodore Ts'o" <tytso@mit.edu>
+Message-ID: <1243944576-20915-19-git-send-email-tytso@mit.edu>
+
+From: "Theodore Ts'o" <tytso@mit.edu>
+
+(cherry picked from commit 485c26ec70f823f2a9cf45982b724893e53a859e)
+
+If the block containing external extended attributes (which is stored
+in i_file_acl and i_file_acl_high) is larger than the on-disk
+filesystem, the process which tried to access the extended attributes
+will endlessly issue kernel printks complaining that
+"__find_get_block_slow() failed", locking up that CPU until the system
+is forcibly rebooted.
+
+So when we read in the inode, make sure the i_file_acl value is legal,
+and if not, flag the filesystem as being corrupted.
+
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ fs/ext4/inode.c | 12 ++++++++++++
+ 1 file changed, 12 insertions(+)
+
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -4115,6 +4115,18 @@ struct inode *ext4_iget(struct super_blo
+ (__u64)(le32_to_cpu(raw_inode->i_version_hi)) << 32;
+ }
+
++ if (ei->i_file_acl &&
++ ((ei->i_file_acl <
++ (le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block) +
++ EXT4_SB(sb)->s_gdb_count)) ||
++ (ei->i_file_acl >= ext4_blocks_count(EXT4_SB(sb)->s_es)))) {
++ ext4_error(sb, __func__,
++ "bad extended attribute block %llu in inode #%lu",
++ ei->i_file_acl, inode->i_ino);
++ ret = -EIO;
++ goto bad_inode;
++ }
++
+ if (S_ISREG(inode->i_mode)) {
+ inode->i_op = &ext4_file_inode_operations;
+ inode->i_fop = &ext4_file_operations;
--- /dev/null
+From stable-bounces@linux.kernel.org Tue Jun 2 05:11:45 2009
+From: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+Date: Tue, 2 Jun 2009 08:09:33 -0400
+Subject: ext4: Fix sub-block zeroing for writes into preallocated extents
+To: stable@kernel.org
+Cc: linux-ext4@vger.kernel.org, "Theodore Ts'o" <tytso@mit.edu>, "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
+Message-ID: <1243944576-20915-21-git-send-email-tytso@mit.edu>
+
+
+From: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+
+(cherry picked from commit 9c1ee184a30394e54165fa4c15923cabd952c106)
+
+We need to mark the buffer_head mapping preallocated space as new
+during write_begin. Otherwise we don't zero out the page cache content
+properly for a partial write. This will cause file corruption with
+preallocation.
+
+Now that we mark the buffer_head new we also need to have a valid
+buffer_head blocknr so that unmap_underlying_metadata() unmaps the
+correct block.
+
+Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ fs/ext4/extents.c | 2 ++
+ fs/ext4/inode.c | 7 +++++++
+ 2 files changed, 9 insertions(+)
+
+--- a/fs/ext4/extents.c
++++ b/fs/ext4/extents.c
+@@ -2670,6 +2670,8 @@ int ext4_ext_get_blocks(handle_t *handle
+ if (allocated > max_blocks)
+ allocated = max_blocks;
+ set_buffer_unwritten(bh_result);
++ bh_result->b_bdev = inode->i_sb->s_bdev;
++ bh_result->b_blocknr = newblock;
+ goto out2;
+ }
+
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -2131,6 +2131,13 @@ static int ext4_da_get_block_prep(struct
+ set_buffer_delay(bh_result);
+ } else if (ret > 0) {
+ bh_result->b_size = (ret << inode->i_blkbits);
++ /*
++ * With sub-block writes into unwritten extents
++ * we also need to mark the buffer as new so that
++ * the unwritten parts of the buffer gets correctly zeroed.
++ */
++ if (buffer_unwritten(bh_result))
++ set_buffer_new(bh_result);
+ ret = 0;
+ }
+
--- /dev/null
+From stable-bounces@linux.kernel.org Tue Jun 2 05:12:50 2009
+From: Dan Carpenter <error27@gmail.com>
+Date: Tue, 2 Jun 2009 08:09:28 -0400
+Subject: ext4: fix typo which causes a memory leak on error path
+To: stable@kernel.org
+Cc: Chris Wright <chrisw@sous-sol.org>, linux-ext4@vger.kernel.org, "Theodore Ts'o" <tytso@mit.edu>, Dan Carpenter <error27@gmail.com>
+Message-ID: <1243944576-20915-16-git-send-email-tytso@mit.edu>
+
+
+From: Dan Carpenter <error27@gmail.com>
+
+upstream commit: a7b19448ddbdc34b2b8fedc048ba154ca798667b
+
+This was found by smatch (http://repo.or.cz/w/smatch.git/)
+
+Signed-off-by: Dan Carpenter <error27@gmail.com>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Cc: stable@kernel.org
+Signed-off-by: Chris Wright <chrisw@sous-sol.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ fs/ext4/mballoc.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/ext4/mballoc.c
++++ b/fs/ext4/mballoc.c
+@@ -2698,7 +2698,7 @@ int ext4_mb_init(struct super_block *sb,
+ sbi->s_mb_maxs = kmalloc(i, GFP_KERNEL);
+ if (sbi->s_mb_maxs == NULL) {
+ clear_opt(sbi->s_mount_opt, MBALLOC);
+- kfree(sbi->s_mb_maxs);
++ kfree(sbi->s_mb_offsets);
+ return -ENOMEM;
+ }
+
--- /dev/null
+From stable-bounces@linux.kernel.org Tue Jun 2 05:11:19 2009
+From: "Theodore Ts'o" <tytso@mit.edu>
+Date: Tue, 2 Jun 2009 08:09:32 -0400
+Subject: ext4: Ignore i_file_acl_high unless EXT4_FEATURE_INCOMPAT_64BIT is present
+To: stable@kernel.org
+Cc: linux-ext4@vger.kernel.org, "Theodore Ts'o" <tytso@mit.edu>
+Message-ID: <1243944576-20915-20-git-send-email-tytso@mit.edu>
+
+From: "Theodore Ts'o" <tytso@mit.edu>
+
+(cherry picked from commit a9e817425dc0baede8ebe5fbc9984a640257432b)
+
+Don't try to look at i_file_acl_high unless the INCOMPAT_64BIT feature
+bit is set. The field is normally zero, but older versions of e2fsck
+didn't automatically check to make sure of this, so in the spirit of
+"be liberal in what you accept", don't look at i_file_acl_high unless
+we are using a 64-bit filesystem.
+
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/ext4/inode.c | 4 +---
+ 1 file changed, 1 insertion(+), 3 deletions(-)
+
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -4064,11 +4064,9 @@ struct inode *ext4_iget(struct super_blo
+ ei->i_flags = le32_to_cpu(raw_inode->i_flags);
+ inode->i_blocks = ext4_inode_blocks(raw_inode, ei);
+ ei->i_file_acl = le32_to_cpu(raw_inode->i_file_acl_lo);
+- if (EXT4_SB(inode->i_sb)->s_es->s_creator_os !=
+- cpu_to_le32(EXT4_OS_HURD)) {
++ if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_64BIT))
+ ei->i_file_acl |=
+ ((__u64)le16_to_cpu(raw_inode->i_file_acl_high)) << 32;
+- }
+ inode->i_size = ext4_isize(raw_inode);
+ ei->i_disksize = inode->i_size;
+ inode->i_generation = le32_to_cpu(raw_inode->i_generation);
--- /dev/null
+From stable-bounces@linux.kernel.org Tue Jun 2 05:11:00 2009
+From: "Theodore Ts'o" <tytso@mit.edu>
+Date: Tue, 2 Jun 2009 08:09:15 -0400
+Subject: ext4: Print the find_group_flex() warning only once
+To: stable@kernel.org
+Cc: linux-ext4@vger.kernel.org, "Theodore Ts'o" <tytso@mit.edu>
+Message-ID: <1243944576-20915-3-git-send-email-tytso@mit.edu>
+
+From: "Theodore Ts'o" <tytso@mit.edu>
+
+(cherry picked from commit 2842c3b5449f31470b61db716f1926b594fb6156)
+
+This is a short-term warning, and even printk_ratelimit() can result
+in too much noise in system logs. So only print it once as a warning.
+
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ fs/ext4/ialloc.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/fs/ext4/ialloc.c
++++ b/fs/ext4/ialloc.c
+@@ -687,6 +687,7 @@ struct inode *ext4_new_inode(handle_t *h
+ struct inode *ret;
+ ext4_group_t i;
+ int free = 0;
++ static int once = 1;
+ ext4_group_t flex_group;
+
+ /* Cannot create files in a deleted directory */
+@@ -706,7 +707,8 @@ struct inode *ext4_new_inode(handle_t *h
+ ret2 = find_group_flex(sb, dir, &group);
+ if (ret2 == -1) {
+ ret2 = find_group_other(sb, dir, &group);
+- if (ret2 == 0 && printk_ratelimit())
++ if (ret2 == 0 && once)
++ once = 0;
+ printk(KERN_NOTICE "ext4: find_group_flex "
+ "failed, fallback succeeded dir %lu\n",
+ dir->i_ino);
--- /dev/null
+From stable-bounces@linux.kernel.org Tue Jun 2 05:11:21 2009
+From: Chuck Ebbert <cebbert@redhat.com>
+Date: Tue, 2 Jun 2009 08:09:30 -0400
+Subject: ext4: really print the find_group_flex fallback warning only once
+To: stable@kernel.org
+Cc: linux-ext4@vger.kernel.org, "Theodore Ts'o" <tytso@mit.edu>, Chuck Ebbert <cebbert@redhat.com>
+Message-ID: <1243944576-20915-18-git-send-email-tytso@mit.edu>
+
+
+From: Chuck Ebbert <cebbert@redhat.com>
+
+(cherry picked from commit 6b82f3cb2d480b7714eb0ff61aee99c22160389e)
+
+Missing braces caused the warning to print more than once.
+
+Signed-Off-By: Chuck Ebbert <cebbert@redhat.com>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ fs/ext4/ialloc.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/fs/ext4/ialloc.c
++++ b/fs/ext4/ialloc.c
+@@ -707,11 +707,12 @@ struct inode *ext4_new_inode(handle_t *h
+ ret2 = find_group_flex(sb, dir, &group);
+ if (ret2 == -1) {
+ ret2 = find_group_other(sb, dir, &group);
+- if (ret2 == 0 && once)
++ if (ret2 == 0 && once) {
+ once = 0;
+ printk(KERN_NOTICE "ext4: find_group_flex "
+ "failed, fallback succeeded dir %lu\n",
+ dir->i_ino);
++ }
+ }
+ goto got_group;
+ }
--- /dev/null
+From stable-bounces@linux.kernel.org Tue Jun 2 05:12:56 2009
+From: Bryan Donlan <bdonlan@gmail.com>
+Date: Tue, 2 Jun 2009 08:09:20 -0400
+Subject: ext4: return -EIO not -ESTALE on directory traversal through deleted inode
+To: stable@kernel.org
+Cc: "Theodore Ts'o" <tytso@mit.edu>, Andrew Morton <akpm@linux-foundation.org>, linux-ext4@vger.kernel.org, Bryan Donlan <bdonlan@gmail.com>
+Message-ID: <1243944576-20915-8-git-send-email-tytso@mit.edu>
+
+
+From: Bryan Donlan <bdonlan@gmail.com>
+
+(cherry picked from commit e6f009b0b45220c004672d41a58865e94946104d)
+
+ext4_iget() returns -ESTALE if invoked on a deleted inode, in order to
+report errors to NFS properly. However, in ext4_lookup(), this
+-ESTALE can be propagated to userspace if the filesystem is corrupted
+such that a directory entry references a deleted inode. This leads to
+a misleading error message - "Stale NFS file handle" - and confusion
+on the part of the admin.
+
+The bug can be easily reproduced by creating a new filesystem, making
+a link to an unused inode using debugfs, then mounting and attempting
+to ls -l said link.
+
+This patch thus changes ext4_lookup to return -EIO if it receives
+-ESTALE from ext4_iget(), as ext4 does for other filesystem metadata
+corruption; and also invokes the appropriate ext*_error functions when
+this case is detected.
+
+Signed-off-by: Bryan Donlan <bdonlan@gmail.com>
+Cc: <linux-ext4@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ fs/ext4/namei.c | 12 ++++++++++--
+ 1 file changed, 10 insertions(+), 2 deletions(-)
+
+--- a/fs/ext4/namei.c
++++ b/fs/ext4/namei.c
+@@ -1055,8 +1055,16 @@ static struct dentry *ext4_lookup(struct
+ return ERR_PTR(-EIO);
+ }
+ inode = ext4_iget(dir->i_sb, ino);
+- if (IS_ERR(inode))
+- return ERR_CAST(inode);
++ if (unlikely(IS_ERR(inode))) {
++ if (PTR_ERR(inode) == -ESTALE) {
++ ext4_error(dir->i_sb, __func__,
++ "deleted inode referenced: %u",
++ ino);
++ return ERR_PTR(-EIO);
++ } else {
++ return ERR_CAST(inode);
++ }
++ }
+ }
+ return d_splice_alias(inode, dentry);
+ }
--- /dev/null
+From stable-bounces@linux.kernel.org Tue Jun 2 05:12:06 2009
+From: Duane Griffin <duaneg@dghda.com>
+Date: Tue, 2 Jun 2009 08:09:19 -0400
+Subject: ext4: tighten restrictions on inode flags
+To: stable@kernel.org
+Cc: Andrew Morton <akpm@linux-foundation.org>, linux-ext4@vger.kernel.org, "Theodore Ts'o" <tytso@mit.edu>, Duane Griffin <duaneg@dghda.com>
+Message-ID: <1243944576-20915-7-git-send-email-tytso@mit.edu>
+
+
+From: Duane Griffin <duaneg@dghda.com>
+
+(cherry picked from commit 2dc6b0d48ca0599837df21b14bb8393d0804af57)
+
+At the moment there are few restrictions on which flags may be set on
+which inodes. Specifically DIRSYNC may only be set on directories and
+IMMUTABLE and APPEND may not be set on links. Tighten that to disallow
+TOPDIR being set on non-directories and only NODUMP and NOATIME to be set
+on non-regular file, non-directories.
+
+Introduces a flags masking function which masks flags based on mode and
+use it during inode creation and when flags are set via the ioctl to
+facilitate future consistency.
+
+Signed-off-by: Duane Griffin <duaneg@dghda.com>
+Acked-by: Andreas Dilger <adilger@sun.com>
+Cc: <linux-ext4@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ fs/ext4/ext4.h | 17 +++++++++++++++++
+ fs/ext4/ialloc.c | 14 +++++---------
+ fs/ext4/ioctl.c | 3 +--
+ 3 files changed, 23 insertions(+), 11 deletions(-)
+
+--- a/fs/ext4/ext4.h
++++ b/fs/ext4/ext4.h
+@@ -255,6 +255,23 @@ struct flex_groups {
+ EXT4_NOCOMPR_FL | EXT4_JOURNAL_DATA_FL |\
+ EXT4_NOTAIL_FL | EXT4_DIRSYNC_FL)
+
++/* Flags that are appropriate for regular files (all but dir-specific ones). */
++#define EXT4_REG_FLMASK (~(EXT4_DIRSYNC_FL | EXT4_TOPDIR_FL))
++
++/* Flags that are appropriate for non-directories/regular files. */
++#define EXT4_OTHER_FLMASK (EXT4_NODUMP_FL | EXT4_NOATIME_FL)
++
++/* Mask out flags that are inappropriate for the given type of inode. */
++static inline __u32 ext4_mask_flags(umode_t mode, __u32 flags)
++{
++ if (S_ISDIR(mode))
++ return flags;
++ else if (S_ISREG(mode))
++ return flags & EXT4_REG_FLMASK;
++ else
++ return flags & EXT4_OTHER_FLMASK;
++}
++
+ /*
+ * Inode dynamic state flags
+ */
+--- a/fs/ext4/ialloc.c
++++ b/fs/ext4/ialloc.c
+@@ -865,16 +865,12 @@ got:
+ ei->i_disksize = 0;
+
+ /*
+- * Don't inherit extent flag from directory. We set extent flag on
+- * newly created directory and file only if -o extent mount option is
+- * specified
++ * Don't inherit extent flag from directory, amongst others. We set
++ * extent flag on newly created directory and file only if -o extent
++ * mount option is specified
+ */
+- ei->i_flags = EXT4_I(dir)->i_flags & EXT4_FL_INHERITED;
+- if (S_ISLNK(mode))
+- ei->i_flags &= ~(EXT4_IMMUTABLE_FL|EXT4_APPEND_FL);
+- /* dirsync only applies to directories */
+- if (!S_ISDIR(mode))
+- ei->i_flags &= ~EXT4_DIRSYNC_FL;
++ ei->i_flags =
++ ext4_mask_flags(mode, EXT4_I(dir)->i_flags & EXT4_FL_INHERITED);
+ ei->i_file_acl = 0;
+ ei->i_dtime = 0;
+ ei->i_block_alloc_info = NULL;
+--- a/fs/ext4/ioctl.c
++++ b/fs/ext4/ioctl.c
+@@ -49,8 +49,7 @@ long ext4_ioctl(struct file *filp, unsig
+ if (err)
+ return err;
+
+- if (!S_ISDIR(inode->i_mode))
+- flags &= ~EXT4_DIRSYNC_FL;
++ flags = ext4_mask_flags(inode->i_mode, flags);
+
+ err = -EPERM;
+ mutex_lock(&inode->i_mutex);
--- /dev/null
+From stable-bounces@linux.kernel.org Tue Jun 2 05:11:31 2009
+From: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+Date: Tue, 2 Jun 2009 08:09:34 -0400
+Subject: ext4: Use a fake block number for delayed new buffer_head
+To: stable@kernel.org
+Cc: linux-ext4@vger.kernel.org, "Theodore Ts'o" <tytso@mit.edu>, "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
+Message-ID: <1243944576-20915-22-git-send-email-tytso@mit.edu>
+
+
+From: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+
+(cherry picked from commit 33b9817e2ae097c7b8d256e3510ac6c54fc6d9d0)
+
+Use a very large unsigned number (~0xffff) as as the fake block number
+for the delayed new buffer. The VFS should never try to write out this
+number, but if it does, this will make it obvious.
+
+Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ fs/ext4/inode.c | 6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -2105,6 +2105,10 @@ static int ext4_da_get_block_prep(struct
+ struct buffer_head *bh_result, int create)
+ {
+ int ret = 0;
++ sector_t invalid_block = ~((sector_t) 0xffff);
++
++ if (invalid_block < ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es))
++ invalid_block = ~0;
+
+ BUG_ON(create == 0);
+ BUG_ON(bh_result->b_size != inode->i_sb->s_blocksize);
+@@ -2126,7 +2130,7 @@ static int ext4_da_get_block_prep(struct
+ /* not enough space to reserve */
+ return ret;
+
+- map_bh(bh_result, inode->i_sb, 0);
++ map_bh(bh_result, inode->i_sb, invalid_block);
+ set_buffer_new(bh_result);
+ set_buffer_delay(bh_result);
+ } else if (ret > 0) {
--- /dev/null
+From stable-bounces@linux.kernel.org Tue Jun 2 05:13:07 2009
+From: Jan Kara <jack@suse.cz>
+Date: Tue, 2 Jun 2009 08:09:27 -0400
+Subject: jbd2: Update locking coments
+To: stable@kernel.org
+Cc: "Theodore Ts'o" <tytso@mit.edu>, linux-ext4@vger.kernel.org, Jan Kara <jack@suse.cz>
+Message-ID: <1243944576-20915-15-git-send-email-tytso@mit.edu>
+
+
+From: Jan Kara <jack@suse.cz>
+
+(cherry picked from commit 86db97c87f744364d5889ca8a4134ca2048b8f83)
+
+Update information about locking in JBD2 revoke code. Inconsistency in
+comments found by Lin Tan <tammy000@gmail.com>.
+
+CC: Lin Tan <tammy000@gmail.com>.
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+---
+ fs/jbd2/revoke.c | 24 +++++++++++++++++++-----
+ 1 file changed, 19 insertions(+), 5 deletions(-)
+
+--- a/fs/jbd2/revoke.c
++++ b/fs/jbd2/revoke.c
+@@ -55,6 +55,25 @@
+ * need do nothing.
+ * RevokeValid set, Revoked set:
+ * buffer has been revoked.
++ *
++ * Locking rules:
++ * We keep two hash tables of revoke records. One hashtable belongs to the
++ * running transaction (is pointed to by journal->j_revoke), the other one
++ * belongs to the committing transaction. Accesses to the second hash table
++ * happen only from the kjournald and no other thread touches this table. Also
++ * journal_switch_revoke_table() which switches which hashtable belongs to the
++ * running and which to the committing transaction is called only from
++ * kjournald. Therefore we need no locks when accessing the hashtable belonging
++ * to the committing transaction.
++ *
++ * All users operating on the hash table belonging to the running transaction
++ * have a handle to the transaction. Therefore they are safe from kjournald
++ * switching hash tables under them. For operations on the lists of entries in
++ * the hash table j_revoke_lock is used.
++ *
++ * Finally, also replay code uses the hash tables but at this moment noone else
++ * can touch them (filesystem isn't mounted yet) and hence no locking is
++ * needed.
+ */
+
+ #ifndef __KERNEL__
+@@ -401,8 +420,6 @@ int jbd2_journal_revoke(handle_t *handle
+ * the second time we would still have a pending revoke to cancel. So,
+ * do not trust the Revoked bit on buffers unless RevokeValid is also
+ * set.
+- *
+- * The caller must have the journal locked.
+ */
+ int jbd2_journal_cancel_revoke(handle_t *handle, struct journal_head *jh)
+ {
+@@ -480,10 +497,7 @@ void jbd2_journal_switch_revoke_table(jo
+ /*
+ * Write revoke records to the journal for all entries in the current
+ * revoke hash, deleting the entries as we go.
+- *
+- * Called with the journal lock held.
+ */
+-
+ void jbd2_journal_write_revoke_records(journal_t *journal,
+ transaction_t *transaction)
+ {
v4l-dvb-cx88-prevent-general-protection-fault-on-rmmod.patch
x86-fix-dmi-on-efi.patch
mac80211-pid-fix-memory-corruption.patch
+ext4-fix-ext4_free_inode-vs.-ext4_claim_inode-race.patch
+ext4-fix-header-check-in-ext4_ext_search_right-for-deep-extent-trees.patch
+ext4-print-the-find_group_flex-warning-only-once.patch
+ext4-fix-bogus-bug_ons-in-in-mballoc-code.patch
+ext4-fix-bb_prealloc_list-corruption-due-to-wrong-group-locking.patch
+ext4-don-t-inherit-inappropriate-inode-flags-from-parent.patch
+ext4-tighten-restrictions-on-inode-flags.patch
+ext4-return-eio-not-estale-on-directory-traversal-through-deleted-inode.patch
+ext4-add-fine-print-for-the-32000-subdirectory-limit.patch
+ext4-add-ext4_ioc_alloc_da_blks-ioctl.patch
+ext4-automatically-allocate-delay-allocated-blocks-on-close.patch
+ext4-automatically-allocate-delay-allocated-blocks-on-rename.patch
+ext4-fix-discard-of-inode-prealloc-space-with-delayed-allocation.patch
+ext4-check-for-an-valid-i_mode-when-reading-the-inode-from-disk.patch
+jbd2-update-locking-coments.patch
+ext4-fix-typo-which-causes-a-memory-leak-on-error-path.patch
+ext4-fix-locking-typo-in-mballoc-which-could-cause-soft-lockup-hangs.patch
+ext4-really-print-the-find_group_flex-fallback-warning-only-once.patch
+ext4-fix-softlockup-caused-by-illegal-i_file_acl-value-in-on-disk-inode.patch
+ext4-ignore-i_file_acl_high-unless-ext4_feature_incompat_64bit-is-present.patch
+ext4-fix-sub-block-zeroing-for-writes-into-preallocated-extents.patch
+ext4-use-a-fake-block-number-for-delayed-new-buffer_head.patch
+ext4-clear-the-unwritten-buffer_head-flag-after-the-extent-is-initialized.patch
+ext4-fix-race-in-ext4_inode_info.i_cached_extent.patch