From db5bd50c5640c7855a725d433a234b87ee57ffc0 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 9 Jun 2009 17:01:06 -0700 Subject: [PATCH] ext4 patches for .27 --- ...xt4-add-ext4_ioc_alloc_da_blks-ioctl.patch | 119 ++++++++++++++++++ ...int-for-the-32000-subdirectory-limit.patch | 44 +++++++ ...cate-delay-allocated-blocks-on-close.patch | 62 +++++++++ ...ate-delay-allocated-blocks-on-rename.patch | 53 ++++++++ ...ode-when-reading-the-inode-from-disk.patch | 44 +++++++ ...flag-after-the-extent-is-initialized.patch | 62 +++++++++ ...nappropriate-inode-flags-from-parent.patch | 60 +++++++++ ...orruption-due-to-wrong-group-locking.patch | 76 +++++++++++ ...fix-bogus-bug_ons-in-in-mballoc-code.patch | 58 +++++++++ ...ealloc-space-with-delayed-allocation.patch | 56 +++++++++ ...free_inode-vs.-ext4_claim_inode-race.patch | 68 ++++++++++ ...t_search_right-for-deep-extent-trees.patch | 64 ++++++++++ ...-which-could-cause-soft-lockup-hangs.patch | 47 +++++++ ...e-in-ext4_inode_info.i_cached_extent.patch | 85 +++++++++++++ ...al-i_file_acl-value-in-on-disk-inode.patch | 49 ++++++++ ...for-writes-into-preallocated-extents.patch | 57 +++++++++ ...h-causes-a-memory-leak-on-error-path.patch | 35 ++++++ ...t4_feature_incompat_64bit-is-present.patch | 40 ++++++ ...he-find_group_flex-warning-only-once.patch | 41 ++++++ ...roup_flex-fallback-warning-only-once.patch | 38 ++++++ ...tory-traversal-through-deleted-inode.patch | 59 +++++++++ ...-tighten-restrictions-on-inode-flags.patch | 97 ++++++++++++++ ...k-number-for-delayed-new-buffer_head.patch | 46 +++++++ .../jbd2-update-locking-coments.patch | 72 +++++++++++ queue-2.6.27/series | 24 ++++ 25 files changed, 1456 insertions(+) create mode 100644 queue-2.6.27/ext4-add-ext4_ioc_alloc_da_blks-ioctl.patch create mode 100644 queue-2.6.27/ext4-add-fine-print-for-the-32000-subdirectory-limit.patch create mode 100644 queue-2.6.27/ext4-automatically-allocate-delay-allocated-blocks-on-close.patch create mode 100644 queue-2.6.27/ext4-automatically-allocate-delay-allocated-blocks-on-rename.patch create mode 100644 queue-2.6.27/ext4-check-for-an-valid-i_mode-when-reading-the-inode-from-disk.patch create mode 100644 queue-2.6.27/ext4-clear-the-unwritten-buffer_head-flag-after-the-extent-is-initialized.patch create mode 100644 queue-2.6.27/ext4-don-t-inherit-inappropriate-inode-flags-from-parent.patch create mode 100644 queue-2.6.27/ext4-fix-bb_prealloc_list-corruption-due-to-wrong-group-locking.patch create mode 100644 queue-2.6.27/ext4-fix-bogus-bug_ons-in-in-mballoc-code.patch create mode 100644 queue-2.6.27/ext4-fix-discard-of-inode-prealloc-space-with-delayed-allocation.patch create mode 100644 queue-2.6.27/ext4-fix-ext4_free_inode-vs.-ext4_claim_inode-race.patch create mode 100644 queue-2.6.27/ext4-fix-header-check-in-ext4_ext_search_right-for-deep-extent-trees.patch create mode 100644 queue-2.6.27/ext4-fix-locking-typo-in-mballoc-which-could-cause-soft-lockup-hangs.patch create mode 100644 queue-2.6.27/ext4-fix-race-in-ext4_inode_info.i_cached_extent.patch create mode 100644 queue-2.6.27/ext4-fix-softlockup-caused-by-illegal-i_file_acl-value-in-on-disk-inode.patch create mode 100644 queue-2.6.27/ext4-fix-sub-block-zeroing-for-writes-into-preallocated-extents.patch create mode 100644 queue-2.6.27/ext4-fix-typo-which-causes-a-memory-leak-on-error-path.patch create mode 100644 queue-2.6.27/ext4-ignore-i_file_acl_high-unless-ext4_feature_incompat_64bit-is-present.patch create mode 100644 queue-2.6.27/ext4-print-the-find_group_flex-warning-only-once.patch create mode 100644 queue-2.6.27/ext4-really-print-the-find_group_flex-fallback-warning-only-once.patch create mode 100644 queue-2.6.27/ext4-return-eio-not-estale-on-directory-traversal-through-deleted-inode.patch create mode 100644 queue-2.6.27/ext4-tighten-restrictions-on-inode-flags.patch create mode 100644 queue-2.6.27/ext4-use-a-fake-block-number-for-delayed-new-buffer_head.patch create mode 100644 queue-2.6.27/jbd2-update-locking-coments.patch diff --git a/queue-2.6.27/ext4-add-ext4_ioc_alloc_da_blks-ioctl.patch b/queue-2.6.27/ext4-add-ext4_ioc_alloc_da_blks-ioctl.patch new file mode 100644 index 00000000000..4ccedc5d986 --- /dev/null +++ b/queue-2.6.27/ext4-add-ext4_ioc_alloc_da_blks-ioctl.patch @@ -0,0 +1,119 @@ +From stable-bounces@linux.kernel.org Tue Jun 2 05:11:55 2009 +From: "Theodore Ts'o" +Date: Tue, 2 Jun 2009 08:09:22 -0400 +Subject: ext4: add EXT4_IOC_ALLOC_DA_BLKS ioctl +To: stable@kernel.org +Cc: linux-ext4@vger.kernel.org, "Theodore Ts'o" +Message-ID: <1243944576-20915-10-git-send-email-tytso@mit.edu> + +From: "Theodore Ts'o" + +(cherry picked from commit ccd2506bd43113659aa904d5bea5d1300605e2a6) + +Add an ioctl which forces all of the delay allocated blocks to be +allocated. This also provides a function ext4_alloc_da_blocks() which +will be used by the following commits to force files to be fully +allocated to preserve application-expected ext3 behaviour. + +Signed-off-by: "Theodore Ts'o" +Signed-off-by: Greg Kroah-Hartman +--- + fs/ext4/ext4.h | 3 +++ + fs/ext4/inode.c | 42 ++++++++++++++++++++++++++++++++++++++++++ + fs/ext4/ioctl.c | 14 ++++++++++++++ + 3 files changed, 59 insertions(+) + +--- a/fs/ext4/ext4.h ++++ b/fs/ext4/ext4.h +@@ -326,7 +326,9 @@ struct ext4_new_group_data { + #define EXT4_IOC_GROUP_EXTEND _IOW('f', 7, unsigned long) + #define EXT4_IOC_GROUP_ADD _IOW('f', 8, struct ext4_new_group_input) + #define EXT4_IOC_MIGRATE _IO('f', 9) ++ /* note ioctl 10 reserved for an early version of the FIEMAP ioctl */ + /* note ioctl 11 reserved for filesystem-independent FIEMAP ioctl */ ++#define EXT4_IOC_ALLOC_DA_BLKS _IO('f', 12) + + /* + * ioctl commands in 32 bit emulation +@@ -1102,6 +1104,7 @@ extern int ext4_can_truncate(struct inod + extern void ext4_truncate (struct inode *); + extern void ext4_set_inode_flags(struct inode *); + extern void ext4_get_inode_flags(struct ext4_inode_info *); ++extern int ext4_alloc_da_blocks(struct inode *inode); + extern void ext4_set_aops(struct inode *inode); + extern int ext4_writepage_trans_blocks(struct inode *); + extern int ext4_meta_trans_blocks(struct inode *, int nrblocks, int idxblocks); +--- a/fs/ext4/inode.c ++++ b/fs/ext4/inode.c +@@ -2585,6 +2585,48 @@ out: + return; + } + ++/* ++ * Force all delayed allocation blocks to be allocated for a given inode. ++ */ ++int ext4_alloc_da_blocks(struct inode *inode) ++{ ++ if (!EXT4_I(inode)->i_reserved_data_blocks && ++ !EXT4_I(inode)->i_reserved_meta_blocks) ++ return 0; ++ ++ /* ++ * We do something simple for now. The filemap_flush() will ++ * also start triggering a write of the data blocks, which is ++ * not strictly speaking necessary (and for users of ++ * laptop_mode, not even desirable). However, to do otherwise ++ * would require replicating code paths in: ++ * ++ * ext4_da_writepages() -> ++ * write_cache_pages() ---> (via passed in callback function) ++ * __mpage_da_writepage() --> ++ * mpage_add_bh_to_extent() ++ * mpage_da_map_blocks() ++ * ++ * The problem is that write_cache_pages(), located in ++ * mm/page-writeback.c, marks pages clean in preparation for ++ * doing I/O, which is not desirable if we're not planning on ++ * doing I/O at all. ++ * ++ * We could call write_cache_pages(), and then redirty all of ++ * the pages by calling redirty_page_for_writeback() but that ++ * would be ugly in the extreme. So instead we would need to ++ * replicate parts of the code in the above functions, ++ * simplifying them becuase we wouldn't actually intend to ++ * write out the pages, but rather only collect contiguous ++ * logical block extents, call the multi-block allocator, and ++ * then update the buffer heads with the block allocations. ++ * ++ * For now, though, we'll cheat by calling filemap_flush(), ++ * which will map the blocks, and start the I/O, but not ++ * actually wait for the I/O to complete. ++ */ ++ return filemap_flush(inode->i_mapping); ++} + + /* + * bmap() is special. It gets used by applications such as lilo and by +--- a/fs/ext4/ioctl.c ++++ b/fs/ext4/ioctl.c +@@ -287,6 +287,20 @@ setversion_out: + return err; + } + ++ case EXT4_IOC_ALLOC_DA_BLKS: ++ { ++ int err; ++ if (!is_owner_or_cap(inode)) ++ return -EACCES; ++ ++ err = mnt_want_write(filp->f_path.mnt); ++ if (err) ++ return err; ++ err = ext4_alloc_da_blocks(inode); ++ mnt_drop_write(filp->f_path.mnt); ++ return err; ++ } ++ + default: + return -ENOTTY; + } diff --git a/queue-2.6.27/ext4-add-fine-print-for-the-32000-subdirectory-limit.patch b/queue-2.6.27/ext4-add-fine-print-for-the-32000-subdirectory-limit.patch new file mode 100644 index 00000000000..35a6d096142 --- /dev/null +++ b/queue-2.6.27/ext4-add-fine-print-for-the-32000-subdirectory-limit.patch @@ -0,0 +1,44 @@ +From stable-bounces@linux.kernel.org Tue Jun 2 05:11:50 2009 +From: "Theodore Ts'o" +Date: Tue, 2 Jun 2009 08:09:21 -0400 +Subject: ext4: Add fine print for the 32000 subdirectory limit +To: stable@kernel.org +Cc: linux-ext4@vger.kernel.org, "Theodore Ts'o" +Message-ID: <1243944576-20915-9-git-send-email-tytso@mit.edu> + +From: "Theodore Ts'o" + +(cherry picked from commit 722bde6875bfb49a0c84e5601eb82dd7ac02d27c) + +Some poeple are reading the ext4 feature list too literally and create +dubious test cases involving very long filenames and 1k blocksize and +then complain when they run into an htree-imposed limit. So add fine +print to the "fix 32000 subdirectory limit" ext4 feature. + +Signed-off-by: "Theodore Ts'o" +Signed-off-by: Greg Kroah-Hartman +--- + Documentation/filesystems/ext4.txt | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +--- a/Documentation/filesystems/ext4.txt ++++ b/Documentation/filesystems/ext4.txt +@@ -73,7 +73,7 @@ Mailing list: linux-ext4@vger.kernel.org + * extent format more robust in face of on-disk corruption due to magics, + * internal redunancy in tree + * improved file allocation (multi-block alloc) +-* fix 32000 subdirectory limit ++* lift 32000 subdirectory limit imposed by i_links_count[1] + * nsec timestamps for mtime, atime, ctime, create time + * inode version field on disk (NFSv4, Lustre) + * reduced e2fsck time via uninit_bg feature +@@ -88,6 +88,9 @@ Mailing list: linux-ext4@vger.kernel.org + * efficent new ordered mode in JBD2 and ext4(avoid using buffer head to force + the ordering) + ++[1] Filesystems with a block size of 1k may see a limit imposed by the ++directory hash tree having a maximum depth of two. ++ + 2.2 Candidate features for future inclusion + + * Online defrag (patches available but not well tested) diff --git a/queue-2.6.27/ext4-automatically-allocate-delay-allocated-blocks-on-close.patch b/queue-2.6.27/ext4-automatically-allocate-delay-allocated-blocks-on-close.patch new file mode 100644 index 00000000000..2b39cc1e4f8 --- /dev/null +++ b/queue-2.6.27/ext4-automatically-allocate-delay-allocated-blocks-on-close.patch @@ -0,0 +1,62 @@ +From stable-bounces@linux.kernel.org Tue Jun 2 05:11:39 2009 +From: "Theodore Ts'o" +Date: Tue, 2 Jun 2009 08:09:23 -0400 +Subject: ext4: Automatically allocate delay allocated blocks on close +To: stable@kernel.org +Cc: linux-ext4@vger.kernel.org, "Theodore Ts'o" +Message-ID: <1243944576-20915-11-git-send-email-tytso@mit.edu> + +From: "Theodore Ts'o" + +(cherry picked from commit 7d8f9f7d150dded7b68e61ca6403a1f166fb4edf) + +When closing a file that had been previously truncated, force any +delay allocated blocks that to be allocated so that if the filesystem +is mounted with data=ordered, the data blocks will be pushed out to +disk along with the journal commit. Many application programs expect +this, so we do this to avoid zero length files if the system crashes +unexpectedly. + +Signed-off-by: "Theodore Ts'o" +Signed-off-by: Greg Kroah-Hartman +--- + fs/ext4/ext4.h | 1 + + fs/ext4/file.c | 4 ++++ + fs/ext4/inode.c | 3 +++ + 3 files changed, 8 insertions(+) + +--- a/fs/ext4/ext4.h ++++ b/fs/ext4/ext4.h +@@ -279,6 +279,7 @@ static inline __u32 ext4_mask_flags(umod + #define EXT4_STATE_NEW 0x00000002 /* inode is newly created */ + #define EXT4_STATE_XATTR 0x00000004 /* has in-inode xattrs */ + #define EXT4_STATE_NO_EXPAND 0x00000008 /* No space for expansion */ ++#define EXT4_STATE_DA_ALLOC_CLOSE 0x00000010 /* Alloc DA blks on close */ + + /* Used to pass group descriptor data when online resize is done */ + struct ext4_new_group_input { +--- a/fs/ext4/file.c ++++ b/fs/ext4/file.c +@@ -33,6 +33,10 @@ + */ + static int ext4_release_file (struct inode * inode, struct file * filp) + { ++ if (EXT4_I(inode)->i_state & EXT4_STATE_DA_ALLOC_CLOSE) { ++ ext4_alloc_da_blocks(inode); ++ EXT4_I(inode)->i_state &= ~EXT4_STATE_DA_ALLOC_CLOSE; ++ } + /* if we are the last writer on the inode, drop the block reservation */ + if ((filp->f_mode & FMODE_WRITE) && + (atomic_read(&inode->i_writecount) == 1)) +--- a/fs/ext4/inode.c ++++ b/fs/ext4/inode.c +@@ -3636,6 +3636,9 @@ void ext4_truncate(struct inode *inode) + if (!ext4_can_truncate(inode)) + return; + ++ if (inode->i_size == 0) ++ ei->i_state |= EXT4_STATE_DA_ALLOC_CLOSE; ++ + if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) { + ext4_ext_truncate(inode); + return; diff --git a/queue-2.6.27/ext4-automatically-allocate-delay-allocated-blocks-on-rename.patch b/queue-2.6.27/ext4-automatically-allocate-delay-allocated-blocks-on-rename.patch new file mode 100644 index 00000000000..c27b42e4896 --- /dev/null +++ b/queue-2.6.27/ext4-automatically-allocate-delay-allocated-blocks-on-rename.patch @@ -0,0 +1,53 @@ +From stable-bounces@linux.kernel.org Tue Jun 2 05:13:03 2009 +From: "Theodore Ts'o" +Date: Tue, 2 Jun 2009 08:09:24 -0400 +Subject: ext4: Automatically allocate delay allocated blocks on rename +To: stable@kernel.org +Cc: linux-ext4@vger.kernel.org, "Theodore Ts'o" +Message-ID: <1243944576-20915-12-git-send-email-tytso@mit.edu> + +From: "Theodore Ts'o" + +(cherry picked from commit 8750c6d5fcbd3342b3d908d157f81d345c5325a7) + +When renaming a file such that a link to another inode is overwritten, +force any delay allocated blocks that to be allocated so that if the +filesystem is mounted with data=ordered, the data blocks will be +pushed out to disk along with the journal commit. Many application +programs expect this, so we do this to avoid zero length files if the +system crashes unexpectedly. + +Signed-off-by: "Theodore Ts'o" +Signed-off-by: Greg Kroah-Hartman +--- + fs/ext4/namei.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +--- a/fs/ext4/namei.c ++++ b/fs/ext4/namei.c +@@ -2314,7 +2314,7 @@ static int ext4_rename (struct inode * o + struct inode * old_inode, * new_inode; + struct buffer_head * old_bh, * new_bh, * dir_bh; + struct ext4_dir_entry_2 * old_de, * new_de; +- int retval; ++ int retval, force_da_alloc = 0; + + old_bh = new_bh = dir_bh = NULL; + +@@ -2452,6 +2452,7 @@ static int ext4_rename (struct inode * o + ext4_mark_inode_dirty(handle, new_inode); + if (!new_inode->i_nlink) + ext4_orphan_add(handle, new_inode); ++ force_da_alloc = 1; + } + retval = 0; + +@@ -2460,6 +2461,8 @@ end_rename: + brelse (old_bh); + brelse (new_bh); + ext4_journal_stop(handle); ++ if (retval == 0 && force_da_alloc) ++ ext4_alloc_da_blocks(old_inode); + return retval; + } + diff --git a/queue-2.6.27/ext4-check-for-an-valid-i_mode-when-reading-the-inode-from-disk.patch b/queue-2.6.27/ext4-check-for-an-valid-i_mode-when-reading-the-inode-from-disk.patch new file mode 100644 index 00000000000..40e00c91520 --- /dev/null +++ b/queue-2.6.27/ext4-check-for-an-valid-i_mode-when-reading-the-inode-from-disk.patch @@ -0,0 +1,44 @@ +From stable-bounces@linux.kernel.org Tue Jun 2 05:11:50 2009 +From: "Theodore Ts'o" +Date: Tue, 2 Jun 2009 08:09:26 -0400 +Subject: ext4: Check for an valid i_mode when reading the inode from disk +To: stable@kernel.org +Cc: linux-ext4@vger.kernel.org, "Theodore Ts'o" +Message-ID: <1243944576-20915-14-git-send-email-tytso@mit.edu> + +From: "Theodore Ts'o" + +(cherry picked from commit 563bdd61fe4dbd6b58cf7eb06f8d8f14479ae1dc) + +Signed-off-by: "Theodore Ts'o" +Signed-off-by: Greg Kroah-Hartman +--- + fs/ext4/inode.c | 10 +++++++++- + 1 file changed, 9 insertions(+), 1 deletion(-) + +--- a/fs/ext4/inode.c ++++ b/fs/ext4/inode.c +@@ -4129,7 +4129,8 @@ struct inode *ext4_iget(struct super_blo + inode->i_op = &ext4_symlink_inode_operations; + ext4_set_aops(inode); + } +- } else { ++ } else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) || ++ S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) { + inode->i_op = &ext4_special_inode_operations; + if (raw_inode->i_block[0]) + init_special_inode(inode, inode->i_mode, +@@ -4137,6 +4138,13 @@ struct inode *ext4_iget(struct super_blo + else + init_special_inode(inode, inode->i_mode, + new_decode_dev(le32_to_cpu(raw_inode->i_block[1]))); ++ } else { ++ brelse(bh); ++ ret = -EIO; ++ ext4_error(inode->i_sb, __func__, ++ "bogus i_mode (%o) for inode=%lu", ++ inode->i_mode, inode->i_ino); ++ goto bad_inode; + } + brelse (iloc.bh); + ext4_set_inode_flags(inode); diff --git a/queue-2.6.27/ext4-clear-the-unwritten-buffer_head-flag-after-the-extent-is-initialized.patch b/queue-2.6.27/ext4-clear-the-unwritten-buffer_head-flag-after-the-extent-is-initialized.patch new file mode 100644 index 00000000000..8035b2a8dda --- /dev/null +++ b/queue-2.6.27/ext4-clear-the-unwritten-buffer_head-flag-after-the-extent-is-initialized.patch @@ -0,0 +1,62 @@ +From stable-bounces@linux.kernel.org Tue Jun 2 05:11:35 2009 +From: Aneesh Kumar K.V +Date: Tue, 2 Jun 2009 08:09:35 -0400 +Subject: ext4: Clear the unwritten buffer_head flag after the extent is initialized +To: stable@kernel.org +Cc: linux-ext4@vger.kernel.org, "Theodore Ts'o" , "Aneesh Kumar K.V" +Message-ID: <1243944576-20915-23-git-send-email-tytso@mit.edu> + + +From: Aneesh Kumar K.V + +(cherry picked from commit 2a8964d63d50dd2d65d71d342bc7fb6ef4117614) + +The BH_Unwritten flag indicates that the buffer is allocated on disk +but has not been written; that is, the disk was part of a persistent +preallocation area. That flag should only be set when a get_blocks() +function is looking up a inode's logical to physical block mapping. + +When ext4_get_blocks_wrap() is called with create=1, the uninitialized +extent is converted into an initialized one, so the BH_Unwritten flag +is no longer appropriate. Hence, we need to make sure the +BH_Unwritten is not left set, since the combination of BH_Mapped and +BH_Unwritten is not allowed; among other things, it will result ext4's +get_block() to be called over and over again during the write_begin +phase of write(2). + +Signed-off-by: Aneesh Kumar K.V +Signed-off-by: "Theodore Ts'o" +Signed-off-by: Greg Kroah-Hartman +--- + fs/ext4/inode.c | 13 +++++++++++++ + 1 file changed, 13 insertions(+) + +--- a/fs/ext4/inode.c ++++ b/fs/ext4/inode.c +@@ -1085,6 +1085,7 @@ int ext4_get_blocks_wrap(handle_t *handl + int retval; + + clear_buffer_mapped(bh); ++ clear_buffer_unwritten(bh); + + /* + * Try to see if we can get the block without requesting +@@ -1115,6 +1116,18 @@ int ext4_get_blocks_wrap(handle_t *handl + return retval; + + /* ++ * When we call get_blocks without the create flag, the ++ * BH_Unwritten flag could have gotten set if the blocks ++ * requested were part of a uninitialized extent. We need to ++ * clear this flag now that we are committed to convert all or ++ * part of the uninitialized extent to be an initialized ++ * extent. This is because we need to avoid the combination ++ * of BH_Unwritten and BH_Mapped flags being simultaneously ++ * set on the buffer_head. ++ */ ++ clear_buffer_unwritten(bh); ++ ++ /* + * New blocks allocate and/or writing to uninitialized extent + * will possibly result in updating i_data, so we take + * the write lock of i_data_sem, and call get_blocks() diff --git a/queue-2.6.27/ext4-don-t-inherit-inappropriate-inode-flags-from-parent.patch b/queue-2.6.27/ext4-don-t-inherit-inappropriate-inode-flags-from-parent.patch new file mode 100644 index 00000000000..cdc55440917 --- /dev/null +++ b/queue-2.6.27/ext4-don-t-inherit-inappropriate-inode-flags-from-parent.patch @@ -0,0 +1,60 @@ +From stable-bounces@linux.kernel.org Tue Jun 2 05:11:17 2009 +From: Duane Griffin +Date: Tue, 2 Jun 2009 08:09:18 -0400 +Subject: ext4: don't inherit inappropriate inode flags from parent +To: stable@kernel.org +Cc: Andrew Morton , linux-ext4@vger.kernel.org, "Theodore Ts'o" , Duane Griffin +Message-ID: <1243944576-20915-6-git-send-email-tytso@mit.edu> + + +From: Duane Griffin + +(cherry picked from commit 8fa43a81b97853fc69417bb6054182e78f95cbeb) + +At present INDEX and EXTENTS are the only flags that new ext4 inodes do +NOT inherit from their parent. In addition prevent the flags DIRTY, +ECOMPR, IMAGIC, TOPDIR, HUGE_FILE and EXT_MIGRATE from being inherited. +List inheritable flags explicitly to prevent future flags from +accidentally being inherited. + +This fixes the TOPDIR flag inheritance bug reported at +http://bugzilla.kernel.org/show_bug.cgi?id=9866. + +Signed-off-by: Duane Griffin +Acked-by: Andreas Dilger +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: "Theodore Ts'o" +Signed-off-by: Greg Kroah-Hartman +--- + fs/ext4/ext4.h | 7 +++++++ + fs/ext4/ialloc.c | 2 +- + 2 files changed, 8 insertions(+), 1 deletion(-) + +--- a/fs/ext4/ext4.h ++++ b/fs/ext4/ext4.h +@@ -248,6 +248,13 @@ struct flex_groups { + #define EXT4_FL_USER_VISIBLE 0x000BDFFF /* User visible flags */ + #define EXT4_FL_USER_MODIFIABLE 0x000380FF /* User modifiable flags */ + ++/* Flags that should be inherited by new inodes from their parent. */ ++#define EXT4_FL_INHERITED (EXT4_SECRM_FL | EXT4_UNRM_FL | EXT4_COMPR_FL |\ ++ EXT4_SYNC_FL | EXT4_IMMUTABLE_FL | EXT4_APPEND_FL |\ ++ EXT4_NODUMP_FL | EXT4_NOATIME_FL |\ ++ EXT4_NOCOMPR_FL | EXT4_JOURNAL_DATA_FL |\ ++ EXT4_NOTAIL_FL | EXT4_DIRSYNC_FL) ++ + /* + * Inode dynamic state flags + */ +--- a/fs/ext4/ialloc.c ++++ b/fs/ext4/ialloc.c +@@ -869,7 +869,7 @@ got: + * newly created directory and file only if -o extent mount option is + * specified + */ +- ei->i_flags = EXT4_I(dir)->i_flags & ~(EXT4_INDEX_FL|EXT4_EXTENTS_FL); ++ ei->i_flags = EXT4_I(dir)->i_flags & EXT4_FL_INHERITED; + if (S_ISLNK(mode)) + ei->i_flags &= ~(EXT4_IMMUTABLE_FL|EXT4_APPEND_FL); + /* dirsync only applies to directories */ diff --git a/queue-2.6.27/ext4-fix-bb_prealloc_list-corruption-due-to-wrong-group-locking.patch b/queue-2.6.27/ext4-fix-bb_prealloc_list-corruption-due-to-wrong-group-locking.patch new file mode 100644 index 00000000000..f4f857806c9 --- /dev/null +++ b/queue-2.6.27/ext4-fix-bb_prealloc_list-corruption-due-to-wrong-group-locking.patch @@ -0,0 +1,76 @@ +From stable-bounces@linux.kernel.org Tue Jun 2 05:12:21 2009 +From: Eric Sandeen +Date: Tue, 2 Jun 2009 08:09:17 -0400 +Subject: ext4: fix bb_prealloc_list corruption due to wrong group locking +To: stable@kernel.org +Cc: Eric Sandeen , linux-ext4@vger.kernel.org, "Theodore Ts'o" +Message-ID: <1243944576-20915-5-git-send-email-tytso@mit.edu> + + +From: Eric Sandeen + +(cherry-picked from commit d33a1976fbee1ee321d6f014333d8f03a39d526c) + +This is for Red Hat bug 490026: EXT4 panic, list corruption in +ext4_mb_new_inode_pa + +ext4_lock_group(sb, group) is supposed to protect this list for +each group, and a common code flow to remove an album is like +this: + + ext4_get_group_no_and_offset(sb, pa->pa_pstart, &grp, NULL); + ext4_lock_group(sb, grp); + list_del(&pa->pa_group_list); + ext4_unlock_group(sb, grp); + +so it's critical that we get the right group number back for +this prealloc context, to lock the right group (the one +associated with this pa) and prevent concurrent list manipulation. + +however, ext4_mb_put_pa() passes in (pa->pa_pstart - 1) with a +comment, "-1 is to protect from crossing allocation group". + +This makes sense for the group_pa, where pa_pstart is advanced +by the length which has been used (in ext4_mb_release_context()), +and when the entire length has been used, pa_pstart has been +advanced to the first block of the next group. + +However, for inode_pa, pa_pstart is never advanced; it's just +set once to the first block in the group and not moved after +that. So in this case, if we subtract one in ext4_mb_put_pa(), +we are actually locking the *previous* group, and opening the +race with the other threads which do not subtract off the extra +block. + +Signed-off-by: Eric Sandeen +Signed-off-by: "Theodore Ts'o" +Signed-off-by: Greg Kroah-Hartman +--- + fs/ext4/mballoc.c | 9 +++++++-- + 1 file changed, 7 insertions(+), 2 deletions(-) + +--- a/fs/ext4/mballoc.c ++++ b/fs/ext4/mballoc.c +@@ -3698,6 +3698,7 @@ static void ext4_mb_put_pa(struct ext4_a + struct super_block *sb, struct ext4_prealloc_space *pa) + { + unsigned long grp; ++ ext4_fsblk_t grp_blk; + + if (!atomic_dec_and_test(&pa->pa_count) || pa->pa_free != 0) + return; +@@ -3712,8 +3713,12 @@ static void ext4_mb_put_pa(struct ext4_a + pa->pa_deleted = 1; + spin_unlock(&pa->pa_lock); + +- /* -1 is to protect from crossing allocation group */ +- ext4_get_group_no_and_offset(sb, pa->pa_pstart - 1, &grp, NULL); ++ grp_blk = pa->pa_pstart; ++ /* If linear, pa_pstart may be in the next group when pa is used up */ ++ if (pa->pa_linear) ++ grp_blk--; ++ ++ ext4_get_group_no_and_offset(sb, grp_blk, &grp, NULL); + + /* + * possible race: diff --git a/queue-2.6.27/ext4-fix-bogus-bug_ons-in-in-mballoc-code.patch b/queue-2.6.27/ext4-fix-bogus-bug_ons-in-in-mballoc-code.patch new file mode 100644 index 00000000000..d07073bb908 --- /dev/null +++ b/queue-2.6.27/ext4-fix-bogus-bug_ons-in-in-mballoc-code.patch @@ -0,0 +1,58 @@ +From stable-bounces@linux.kernel.org Tue Jun 2 05:12:17 2009 +From: Eric Sandeen +Date: Tue, 2 Jun 2009 08:09:16 -0400 +Subject: ext4: fix bogus BUG_ONs in in mballoc code +To: stable@kernel.org +Cc: Eric Sandeen , linux-ext4@vger.kernel.org, "Theodore Ts'o" +Message-ID: <1243944576-20915-4-git-send-email-tytso@mit.edu> + + +From: Eric Sandeen + +(cherry picked from commit 8d03c7a0c550e7ab24cadcef5e66656bfadec8b9) + +Thiemo Nagel reported that: + +# dd if=/dev/zero of=image.ext4 bs=1M count=2 +# mkfs.ext4 -v -F -b 1024 -m 0 -g 512 -G 4 -I 128 -N 1 \ + -O large_file,dir_index,flex_bg,extent,sparse_super image.ext4 +# mount -o loop image.ext4 mnt/ +# dd if=/dev/zero of=mnt/file + +oopsed, with a BUG_ON in ext4_mb_normalize_request because +size == EXT4_BLOCKS_PER_GROUP + +It appears to me (esp. after talking to Andreas) that the BUG_ON +is bogus; a request of exactly EXT4_BLOCKS_PER_GROUP should +be allowed, though larger sizes do indicate a problem. + +Fix that an another (apparently rare) codepath with a similar check. + +Reported-by: Thiemo Nagel +Signed-off-by: Eric Sandeen +Signed-off-by: "Theodore Ts'o" +Signed-off-by: Greg Kroah-Hartman +--- + fs/ext4/mballoc.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/fs/ext4/mballoc.c ++++ b/fs/ext4/mballoc.c +@@ -1450,7 +1450,7 @@ static void ext4_mb_measure_extent(struc + struct ext4_free_extent *gex = &ac->ac_g_ex; + + BUG_ON(ex->fe_len <= 0); +- BUG_ON(ex->fe_len >= EXT4_BLOCKS_PER_GROUP(ac->ac_sb)); ++ BUG_ON(ex->fe_len > EXT4_BLOCKS_PER_GROUP(ac->ac_sb)); + BUG_ON(ex->fe_start >= EXT4_BLOCKS_PER_GROUP(ac->ac_sb)); + BUG_ON(ac->ac_status != AC_STATUS_CONTINUE); + +@@ -3400,7 +3400,7 @@ ext4_mb_normalize_request(struct ext4_al + } + BUG_ON(start + size <= ac->ac_o_ex.fe_logical && + start > ac->ac_o_ex.fe_logical); +- BUG_ON(size <= 0 || size >= EXT4_BLOCKS_PER_GROUP(ac->ac_sb)); ++ BUG_ON(size <= 0 || size > EXT4_BLOCKS_PER_GROUP(ac->ac_sb)); + + /* now prepare goal request */ + diff --git a/queue-2.6.27/ext4-fix-discard-of-inode-prealloc-space-with-delayed-allocation.patch b/queue-2.6.27/ext4-fix-discard-of-inode-prealloc-space-with-delayed-allocation.patch new file mode 100644 index 00000000000..c534148efbe --- /dev/null +++ b/queue-2.6.27/ext4-fix-discard-of-inode-prealloc-space-with-delayed-allocation.patch @@ -0,0 +1,56 @@ +From stable-bounces@linux.kernel.org Tue Jun 2 05:11:25 2009 +From: Aneesh Kumar K.V +Date: Tue, 2 Jun 2009 08:09:25 -0400 +Subject: ext4: Fix discard of inode prealloc space with delayed allocation. +To: stable@kernel.org +Cc: linux-ext4@vger.kernel.org, "Theodore Ts'o" , "Aneesh Kumar K.V" +Message-ID: <1243944576-20915-13-git-send-email-tytso@mit.edu> + + +From: Aneesh Kumar K.V + +(cherry picked from commit d6014301b5599fba395c42a1e96a7fe86f7d0b2d) + +With delayed allocation we should not/cannot discard inode prealloc +space during file close. We would still have dirty pages for which we +haven't allocated blocks yet. With this fix after each get_blocks +request we check whether we have zero reserved blocks and if yes and +we don't have any writers on the file we discard inode prealloc space. + +Signed-off-by: Aneesh Kumar K.V +Signed-off-by: "Theodore Ts'o" +Signed-off-by: Greg Kroah-Hartman +--- + fs/ext4/file.c | 3 ++- + fs/ext4/inode.c | 8 ++++++++ + 2 files changed, 10 insertions(+), 1 deletion(-) + +--- a/fs/ext4/file.c ++++ b/fs/ext4/file.c +@@ -39,7 +39,8 @@ static int ext4_release_file (struct ino + } + /* if we are the last writer on the inode, drop the block reservation */ + if ((filp->f_mode & FMODE_WRITE) && +- (atomic_read(&inode->i_writecount) == 1)) ++ (atomic_read(&inode->i_writecount) == 1) && ++ !EXT4_I(inode)->i_reserved_data_blocks) + { + down_write(&EXT4_I(inode)->i_data_sem); + ext4_discard_reservation(inode); +--- a/fs/ext4/inode.c ++++ b/fs/ext4/inode.c +@@ -1046,6 +1046,14 @@ static void ext4_da_update_reserve_space + EXT4_I(inode)->i_reserved_meta_blocks = mdb; + EXT4_I(inode)->i_allocated_meta_blocks = 0; + spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); ++ ++ /* ++ * If we have done all the pending block allocations and if ++ * there aren't any writers on the inode, we can discard the ++ * inode's preallocations. ++ */ ++ if (!total && (atomic_read(&inode->i_writecount) == 0)) ++ ext4_discard_reservation(inode); + } + + /* diff --git a/queue-2.6.27/ext4-fix-ext4_free_inode-vs.-ext4_claim_inode-race.patch b/queue-2.6.27/ext4-fix-ext4_free_inode-vs.-ext4_claim_inode-race.patch new file mode 100644 index 00000000000..4a722daf799 --- /dev/null +++ b/queue-2.6.27/ext4-fix-ext4_free_inode-vs.-ext4_claim_inode-race.patch @@ -0,0 +1,68 @@ +From stable-bounces@linux.kernel.org Tue Jun 2 05:11:04 2009 +From: Eric Sandeen +Date: Tue, 2 Jun 2009 08:09:13 -0400 +Subject: ext4: fix ext4_free_inode() vs. ext4_claim_inode() race +To: stable@kernel.org +Cc: Eric Sandeen , linux-ext4@vger.kernel.org, "Theodore Ts'o" +Message-ID: <1243944576-20915-1-git-send-email-tytso@mit.edu> + + +From: Eric Sandeen + +(cherry picked from commit 7ce9d5d1f3c8736511daa413c64985a05b2feee3) + +I was seeing fsck errors on inode bitmaps after a 4 thread +dbench run on a 4 cpu machine: + +Inode bitmap differences: -50736 -(50752--50753) etc... + +I believe that this is because ext4_free_inode() uses atomic +bitops, and although ext4_new_inode() *used* to also use atomic +bitops for synchronization, commit +393418676a7602e1d7d3f6e560159c65c8cbd50e changed this to use +the sb_bgl_lock, so that we could also synchronize against +read_inode_bitmap and initialization of uninit inode tables. + +However, that change left ext4_free_inode using atomic bitops, +which I think leaves no synchronization between setting & +unsetting bits in the inode table. + +The below patch fixes it for me, although I wonder if we're +getting at all heavy-handed with this spinlock... + +Signed-off-by: Eric Sandeen +Reviewed-by: Aneesh Kumar K.V +Signed-off-by: "Theodore Ts'o" +Signed-off-by: Greg Kroah-Hartman +--- + fs/ext4/ialloc.c | 12 +++++++----- + 1 file changed, 7 insertions(+), 5 deletions(-) + +--- a/fs/ext4/ialloc.c ++++ b/fs/ext4/ialloc.c +@@ -188,7 +188,7 @@ void ext4_free_inode (handle_t *handle, + struct ext4_group_desc * gdp; + struct ext4_super_block * es; + struct ext4_sb_info *sbi; +- int fatal = 0, err; ++ int fatal = 0, err, cleared; + ext4_group_t flex_group; + + if (atomic_read(&inode->i_count) > 1) { +@@ -242,10 +242,12 @@ void ext4_free_inode (handle_t *handle, + goto error_return; + + /* Ok, now we can actually update the inode bitmaps.. */ +- if (!ext4_clear_bit_atomic(sb_bgl_lock(sbi, block_group), +- bit, bitmap_bh->b_data)) +- ext4_error (sb, "ext4_free_inode", +- "bit already cleared for inode %lu", ino); ++ spin_lock(sb_bgl_lock(sbi, block_group)); ++ cleared = ext4_clear_bit(bit, bitmap_bh->b_data); ++ spin_unlock(sb_bgl_lock(sbi, block_group)); ++ if (!cleared) ++ ext4_error(sb, "ext4_free_inode", ++ "bit already cleared for inode %lu", ino); + else { + gdp = ext4_get_group_desc (sb, block_group, &bh2); + diff --git a/queue-2.6.27/ext4-fix-header-check-in-ext4_ext_search_right-for-deep-extent-trees.patch b/queue-2.6.27/ext4-fix-header-check-in-ext4_ext_search_right-for-deep-extent-trees.patch new file mode 100644 index 00000000000..6329659db12 --- /dev/null +++ b/queue-2.6.27/ext4-fix-header-check-in-ext4_ext_search_right-for-deep-extent-trees.patch @@ -0,0 +1,64 @@ +From stable-bounces@linux.kernel.org Tue Jun 2 05:11:10 2009 +From: Eric Sandeen +Date: Tue, 2 Jun 2009 08:09:14 -0400 +Subject: ext4: fix header check in ext4_ext_search_right() for deep extent trees. +To: stable@kernel.org +Cc: Eric Sandeen , linux-ext4@vger.kernel.org, "Theodore Ts'o" +Message-ID: <1243944576-20915-2-git-send-email-tytso@mit.edu> + + +From: Eric Sandeen + +(cherry picked from commit 395a87bfefbc400011417e9eaae33169f9f036c0) + +The ext4_ext_search_right() function is confusing; it uses a +"depth" variable which is 0 at the root and maximum at the leaves, +but the on-disk metadata uses a "depth" (actually eh_depth) which +is opposite: maximum at the root, and 0 at the leaves. + +The ext4_ext_check_header() function is given a depth and checks +the header agaisnt that depth; it expects the on-disk semantics, +but we are giving it the opposite in the while loop in this +function. We should be giving it the on-disk notion of "depth" +which we can get from (p_depth - depth) - and if you look, the last +(more commonly hit) call to ext4_ext_check_header() does just this. + +Sending in the wrong depth results in (incorrect) messages +about corruption: + +EXT4-fs error (device sdb1): ext4_ext_search_right: bad header +in inode #2621457: unexpected eh_depth - magic f30a, entries 340, +max 340(0), depth 1(2) + +http://bugzilla.kernel.org/show_bug.cgi?id=12821 + +Reported-by: David Dindorp +Signed-off-by: Eric Sandeen +Signed-off-by: "Theodore Ts'o" +Signed-off-by: Greg Kroah-Hartman +--- + fs/ext4/extents.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +--- a/fs/ext4/extents.c ++++ b/fs/ext4/extents.c +@@ -1118,7 +1118,8 @@ ext4_ext_search_right(struct inode *inod + struct ext4_extent_idx *ix; + struct ext4_extent *ex; + ext4_fsblk_t block; +- int depth, ee_len; ++ int depth; /* Note, NOT eh_depth; depth from top of tree */ ++ int ee_len; + + BUG_ON(path == NULL); + depth = path->p_depth; +@@ -1177,7 +1178,8 @@ ext4_ext_search_right(struct inode *inod + if (bh == NULL) + return -EIO; + eh = ext_block_hdr(bh); +- if (ext4_ext_check_header(inode, eh, depth)) { ++ /* subtract from p_depth to get proper eh_depth */ ++ if (ext4_ext_check_header(inode, eh, path->p_depth - depth)) { + put_bh(bh); + return -EIO; + } diff --git a/queue-2.6.27/ext4-fix-locking-typo-in-mballoc-which-could-cause-soft-lockup-hangs.patch b/queue-2.6.27/ext4-fix-locking-typo-in-mballoc-which-could-cause-soft-lockup-hangs.patch new file mode 100644 index 00000000000..39a5a5c0ef3 --- /dev/null +++ b/queue-2.6.27/ext4-fix-locking-typo-in-mballoc-which-could-cause-soft-lockup-hangs.patch @@ -0,0 +1,47 @@ +From stable-bounces@linux.kernel.org Tue Jun 2 05:12:26 2009 +From: "Theodore Ts'o" +Date: Tue, 2 Jun 2009 08:09:29 -0400 +Subject: ext4: fix locking typo in mballoc which could cause soft lockup hangs +To: stable@kernel.org +Cc: Chris Wright , linux-ext4@vger.kernel.org, "Theodore Ts'o" +Message-ID: <1243944576-20915-17-git-send-email-tytso@mit.edu> + +From: "Theodore Ts'o" + +upstream commit: e7c9e3e99adf6c49c5d593a51375916acc039d1e + +Smatch (http://repo.or.cz/w/smatch.git/) complains about the locking in +ext4_mb_add_n_trim() from fs/ext4/mballoc.c + + 4438 list_for_each_entry_rcu(tmp_pa, &lg->lg_prealloc_list[order], + 4439 pa_inode_list) { + 4440 spin_lock(&tmp_pa->pa_lock); + 4441 if (tmp_pa->pa_deleted) { + 4442 spin_unlock(&pa->pa_lock); + 4443 continue; + 4444 } + +Brown paper bag time... + +Reported-by: Dan Carpenter +Reviewed-by: Eric Sandeen +Reviewed-by: Aneesh Kumar K.V +Signed-off-by: "Theodore Ts'o" +Cc: stable@kernel.org +Signed-off-by: Chris Wright +Signed-off-by: Greg Kroah-Hartman +--- + fs/ext4/mballoc.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/fs/ext4/mballoc.c ++++ b/fs/ext4/mballoc.c +@@ -4532,7 +4532,7 @@ static void ext4_mb_add_n_trim(struct ex + pa_inode_list) { + spin_lock(&tmp_pa->pa_lock); + if (tmp_pa->pa_deleted) { +- spin_unlock(&pa->pa_lock); ++ spin_unlock(&tmp_pa->pa_lock); + continue; + } + if (!added && pa->pa_free < tmp_pa->pa_free) { diff --git a/queue-2.6.27/ext4-fix-race-in-ext4_inode_info.i_cached_extent.patch b/queue-2.6.27/ext4-fix-race-in-ext4_inode_info.i_cached_extent.patch new file mode 100644 index 00000000000..c2cc584c621 --- /dev/null +++ b/queue-2.6.27/ext4-fix-race-in-ext4_inode_info.i_cached_extent.patch @@ -0,0 +1,85 @@ +From stable-bounces@linux.kernel.org Tue Jun 2 05:12:01 2009 +From: "Theodore Ts'o" +Date: Tue, 2 Jun 2009 08:09:36 -0400 +Subject: ext4: Fix race in ext4_inode_info.i_cached_extent +To: stable@kernel.org +Cc: linux-ext4@vger.kernel.org, "Theodore Ts'o" +Message-ID: <1243944576-20915-24-git-send-email-tytso@mit.edu> + +From: "Theodore Ts'o" + +(cherry picked from commit 2ec0ae3acec47f628179ee95fe2c4da01b5e9fc4) + +If two CPU's simultaneously call ext4_ext_get_blocks() at the same +time, there is nothing protecting the i_cached_extent structure from +being used and updated at the same time. This could potentially cause +the wrong location on disk to be read or written to, including +potentially causing the corruption of the block group descriptors +and/or inode table. + +This bug has been in the ext4 code since almost the very beginning of +ext4's development. Fortunately once the data is stored in the page +cache cache, ext4_get_blocks() doesn't need to be called, so trying to +replicate this problem to the point where we could identify its root +cause was *extremely* difficult. Many thanks to Kevin Shanahan for +working over several months to be able to reproduce this easily so we +could finally nail down the cause of the corruption. + +Signed-off-by: "Theodore Ts'o" +Reviewed-by: "Aneesh Kumar K.V" +Signed-off-by: Greg Kroah-Hartman +--- + fs/ext4/extents.c | 17 ++++++++++++----- + 1 file changed, 12 insertions(+), 5 deletions(-) + +--- a/fs/ext4/extents.c ++++ b/fs/ext4/extents.c +@@ -1633,11 +1633,13 @@ ext4_ext_put_in_cache(struct inode *inod + { + struct ext4_ext_cache *cex; + BUG_ON(len == 0); ++ spin_lock(&EXT4_I(inode)->i_block_reservation_lock); + cex = &EXT4_I(inode)->i_cached_extent; + cex->ec_type = type; + cex->ec_block = block; + cex->ec_len = len; + cex->ec_start = start; ++ spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); + } + + /* +@@ -1694,12 +1696,17 @@ ext4_ext_in_cache(struct inode *inode, e + struct ext4_extent *ex) + { + struct ext4_ext_cache *cex; ++ int ret = EXT4_EXT_CACHE_NO; + ++ /* ++ * We borrow i_block_reservation_lock to protect i_cached_extent ++ */ ++ spin_lock(&EXT4_I(inode)->i_block_reservation_lock); + cex = &EXT4_I(inode)->i_cached_extent; + + /* has cache valid data? */ + if (cex->ec_type == EXT4_EXT_CACHE_NO) +- return EXT4_EXT_CACHE_NO; ++ goto errout; + + BUG_ON(cex->ec_type != EXT4_EXT_CACHE_GAP && + cex->ec_type != EXT4_EXT_CACHE_EXTENT); +@@ -1710,11 +1717,11 @@ ext4_ext_in_cache(struct inode *inode, e + ext_debug("%u cached by %u:%u:%llu\n", + block, + cex->ec_block, cex->ec_len, cex->ec_start); +- return cex->ec_type; ++ ret = cex->ec_type; + } +- +- /* not in cache */ +- return EXT4_EXT_CACHE_NO; ++errout: ++ spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); ++ return ret; + } + + /* diff --git a/queue-2.6.27/ext4-fix-softlockup-caused-by-illegal-i_file_acl-value-in-on-disk-inode.patch b/queue-2.6.27/ext4-fix-softlockup-caused-by-illegal-i_file_acl-value-in-on-disk-inode.patch new file mode 100644 index 00000000000..f03d1a54d0c --- /dev/null +++ b/queue-2.6.27/ext4-fix-softlockup-caused-by-illegal-i_file_acl-value-in-on-disk-inode.patch @@ -0,0 +1,49 @@ +From stable-bounces@linux.kernel.org Tue Jun 2 05:12:11 2009 +From: "Theodore Ts'o" +Date: Tue, 2 Jun 2009 08:09:31 -0400 +Subject: ext4: Fix softlockup caused by illegal i_file_acl value in on-disk inode +To: stable@kernel.org +Cc: linux-ext4@vger.kernel.org, "Theodore Ts'o" +Message-ID: <1243944576-20915-19-git-send-email-tytso@mit.edu> + +From: "Theodore Ts'o" + +(cherry picked from commit 485c26ec70f823f2a9cf45982b724893e53a859e) + +If the block containing external extended attributes (which is stored +in i_file_acl and i_file_acl_high) is larger than the on-disk +filesystem, the process which tried to access the extended attributes +will endlessly issue kernel printks complaining that +"__find_get_block_slow() failed", locking up that CPU until the system +is forcibly rebooted. + +So when we read in the inode, make sure the i_file_acl value is legal, +and if not, flag the filesystem as being corrupted. + +Signed-off-by: "Theodore Ts'o" +Signed-off-by: Greg Kroah-Hartman +--- + fs/ext4/inode.c | 12 ++++++++++++ + 1 file changed, 12 insertions(+) + +--- a/fs/ext4/inode.c ++++ b/fs/ext4/inode.c +@@ -4115,6 +4115,18 @@ struct inode *ext4_iget(struct super_blo + (__u64)(le32_to_cpu(raw_inode->i_version_hi)) << 32; + } + ++ if (ei->i_file_acl && ++ ((ei->i_file_acl < ++ (le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block) + ++ EXT4_SB(sb)->s_gdb_count)) || ++ (ei->i_file_acl >= ext4_blocks_count(EXT4_SB(sb)->s_es)))) { ++ ext4_error(sb, __func__, ++ "bad extended attribute block %llu in inode #%lu", ++ ei->i_file_acl, inode->i_ino); ++ ret = -EIO; ++ goto bad_inode; ++ } ++ + if (S_ISREG(inode->i_mode)) { + inode->i_op = &ext4_file_inode_operations; + inode->i_fop = &ext4_file_operations; diff --git a/queue-2.6.27/ext4-fix-sub-block-zeroing-for-writes-into-preallocated-extents.patch b/queue-2.6.27/ext4-fix-sub-block-zeroing-for-writes-into-preallocated-extents.patch new file mode 100644 index 00000000000..a26f6e4b074 --- /dev/null +++ b/queue-2.6.27/ext4-fix-sub-block-zeroing-for-writes-into-preallocated-extents.patch @@ -0,0 +1,57 @@ +From stable-bounces@linux.kernel.org Tue Jun 2 05:11:45 2009 +From: Aneesh Kumar K.V +Date: Tue, 2 Jun 2009 08:09:33 -0400 +Subject: ext4: Fix sub-block zeroing for writes into preallocated extents +To: stable@kernel.org +Cc: linux-ext4@vger.kernel.org, "Theodore Ts'o" , "Aneesh Kumar K.V" +Message-ID: <1243944576-20915-21-git-send-email-tytso@mit.edu> + + +From: Aneesh Kumar K.V + +(cherry picked from commit 9c1ee184a30394e54165fa4c15923cabd952c106) + +We need to mark the buffer_head mapping preallocated space as new +during write_begin. Otherwise we don't zero out the page cache content +properly for a partial write. This will cause file corruption with +preallocation. + +Now that we mark the buffer_head new we also need to have a valid +buffer_head blocknr so that unmap_underlying_metadata() unmaps the +correct block. + +Signed-off-by: Aneesh Kumar K.V +Signed-off-by: "Theodore Ts'o" +Signed-off-by: Greg Kroah-Hartman +--- + fs/ext4/extents.c | 2 ++ + fs/ext4/inode.c | 7 +++++++ + 2 files changed, 9 insertions(+) + +--- a/fs/ext4/extents.c ++++ b/fs/ext4/extents.c +@@ -2670,6 +2670,8 @@ int ext4_ext_get_blocks(handle_t *handle + if (allocated > max_blocks) + allocated = max_blocks; + set_buffer_unwritten(bh_result); ++ bh_result->b_bdev = inode->i_sb->s_bdev; ++ bh_result->b_blocknr = newblock; + goto out2; + } + +--- a/fs/ext4/inode.c ++++ b/fs/ext4/inode.c +@@ -2131,6 +2131,13 @@ static int ext4_da_get_block_prep(struct + set_buffer_delay(bh_result); + } else if (ret > 0) { + bh_result->b_size = (ret << inode->i_blkbits); ++ /* ++ * With sub-block writes into unwritten extents ++ * we also need to mark the buffer as new so that ++ * the unwritten parts of the buffer gets correctly zeroed. ++ */ ++ if (buffer_unwritten(bh_result)) ++ set_buffer_new(bh_result); + ret = 0; + } + diff --git a/queue-2.6.27/ext4-fix-typo-which-causes-a-memory-leak-on-error-path.patch b/queue-2.6.27/ext4-fix-typo-which-causes-a-memory-leak-on-error-path.patch new file mode 100644 index 00000000000..740ee48ea9a --- /dev/null +++ b/queue-2.6.27/ext4-fix-typo-which-causes-a-memory-leak-on-error-path.patch @@ -0,0 +1,35 @@ +From stable-bounces@linux.kernel.org Tue Jun 2 05:12:50 2009 +From: Dan Carpenter +Date: Tue, 2 Jun 2009 08:09:28 -0400 +Subject: ext4: fix typo which causes a memory leak on error path +To: stable@kernel.org +Cc: Chris Wright , linux-ext4@vger.kernel.org, "Theodore Ts'o" , Dan Carpenter +Message-ID: <1243944576-20915-16-git-send-email-tytso@mit.edu> + + +From: Dan Carpenter + +upstream commit: a7b19448ddbdc34b2b8fedc048ba154ca798667b + +This was found by smatch (http://repo.or.cz/w/smatch.git/) + +Signed-off-by: Dan Carpenter +Signed-off-by: "Theodore Ts'o" +Cc: stable@kernel.org +Signed-off-by: Chris Wright +Signed-off-by: Greg Kroah-Hartman +--- + fs/ext4/mballoc.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/fs/ext4/mballoc.c ++++ b/fs/ext4/mballoc.c +@@ -2698,7 +2698,7 @@ int ext4_mb_init(struct super_block *sb, + sbi->s_mb_maxs = kmalloc(i, GFP_KERNEL); + if (sbi->s_mb_maxs == NULL) { + clear_opt(sbi->s_mount_opt, MBALLOC); +- kfree(sbi->s_mb_maxs); ++ kfree(sbi->s_mb_offsets); + return -ENOMEM; + } + diff --git a/queue-2.6.27/ext4-ignore-i_file_acl_high-unless-ext4_feature_incompat_64bit-is-present.patch b/queue-2.6.27/ext4-ignore-i_file_acl_high-unless-ext4_feature_incompat_64bit-is-present.patch new file mode 100644 index 00000000000..b40bc322e3e --- /dev/null +++ b/queue-2.6.27/ext4-ignore-i_file_acl_high-unless-ext4_feature_incompat_64bit-is-present.patch @@ -0,0 +1,40 @@ +From stable-bounces@linux.kernel.org Tue Jun 2 05:11:19 2009 +From: "Theodore Ts'o" +Date: Tue, 2 Jun 2009 08:09:32 -0400 +Subject: ext4: Ignore i_file_acl_high unless EXT4_FEATURE_INCOMPAT_64BIT is present +To: stable@kernel.org +Cc: linux-ext4@vger.kernel.org, "Theodore Ts'o" +Message-ID: <1243944576-20915-20-git-send-email-tytso@mit.edu> + +From: "Theodore Ts'o" + +(cherry picked from commit a9e817425dc0baede8ebe5fbc9984a640257432b) + +Don't try to look at i_file_acl_high unless the INCOMPAT_64BIT feature +bit is set. The field is normally zero, but older versions of e2fsck +didn't automatically check to make sure of this, so in the spirit of +"be liberal in what you accept", don't look at i_file_acl_high unless +we are using a 64-bit filesystem. + +Signed-off-by: "Theodore Ts'o" +Signed-off-by: Greg Kroah-Hartman + +--- + fs/ext4/inode.c | 4 +--- + 1 file changed, 1 insertion(+), 3 deletions(-) + +--- a/fs/ext4/inode.c ++++ b/fs/ext4/inode.c +@@ -4064,11 +4064,9 @@ struct inode *ext4_iget(struct super_blo + ei->i_flags = le32_to_cpu(raw_inode->i_flags); + inode->i_blocks = ext4_inode_blocks(raw_inode, ei); + ei->i_file_acl = le32_to_cpu(raw_inode->i_file_acl_lo); +- if (EXT4_SB(inode->i_sb)->s_es->s_creator_os != +- cpu_to_le32(EXT4_OS_HURD)) { ++ if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_64BIT)) + ei->i_file_acl |= + ((__u64)le16_to_cpu(raw_inode->i_file_acl_high)) << 32; +- } + inode->i_size = ext4_isize(raw_inode); + ei->i_disksize = inode->i_size; + inode->i_generation = le32_to_cpu(raw_inode->i_generation); diff --git a/queue-2.6.27/ext4-print-the-find_group_flex-warning-only-once.patch b/queue-2.6.27/ext4-print-the-find_group_flex-warning-only-once.patch new file mode 100644 index 00000000000..350fd7f6320 --- /dev/null +++ b/queue-2.6.27/ext4-print-the-find_group_flex-warning-only-once.patch @@ -0,0 +1,41 @@ +From stable-bounces@linux.kernel.org Tue Jun 2 05:11:00 2009 +From: "Theodore Ts'o" +Date: Tue, 2 Jun 2009 08:09:15 -0400 +Subject: ext4: Print the find_group_flex() warning only once +To: stable@kernel.org +Cc: linux-ext4@vger.kernel.org, "Theodore Ts'o" +Message-ID: <1243944576-20915-3-git-send-email-tytso@mit.edu> + +From: "Theodore Ts'o" + +(cherry picked from commit 2842c3b5449f31470b61db716f1926b594fb6156) + +This is a short-term warning, and even printk_ratelimit() can result +in too much noise in system logs. So only print it once as a warning. + +Signed-off-by: "Theodore Ts'o" +Signed-off-by: Greg Kroah-Hartman +--- + fs/ext4/ialloc.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/fs/ext4/ialloc.c ++++ b/fs/ext4/ialloc.c +@@ -687,6 +687,7 @@ struct inode *ext4_new_inode(handle_t *h + struct inode *ret; + ext4_group_t i; + int free = 0; ++ static int once = 1; + ext4_group_t flex_group; + + /* Cannot create files in a deleted directory */ +@@ -706,7 +707,8 @@ struct inode *ext4_new_inode(handle_t *h + ret2 = find_group_flex(sb, dir, &group); + if (ret2 == -1) { + ret2 = find_group_other(sb, dir, &group); +- if (ret2 == 0 && printk_ratelimit()) ++ if (ret2 == 0 && once) ++ once = 0; + printk(KERN_NOTICE "ext4: find_group_flex " + "failed, fallback succeeded dir %lu\n", + dir->i_ino); diff --git a/queue-2.6.27/ext4-really-print-the-find_group_flex-fallback-warning-only-once.patch b/queue-2.6.27/ext4-really-print-the-find_group_flex-fallback-warning-only-once.patch new file mode 100644 index 00000000000..482c0fe1db2 --- /dev/null +++ b/queue-2.6.27/ext4-really-print-the-find_group_flex-fallback-warning-only-once.patch @@ -0,0 +1,38 @@ +From stable-bounces@linux.kernel.org Tue Jun 2 05:11:21 2009 +From: Chuck Ebbert +Date: Tue, 2 Jun 2009 08:09:30 -0400 +Subject: ext4: really print the find_group_flex fallback warning only once +To: stable@kernel.org +Cc: linux-ext4@vger.kernel.org, "Theodore Ts'o" , Chuck Ebbert +Message-ID: <1243944576-20915-18-git-send-email-tytso@mit.edu> + + +From: Chuck Ebbert + +(cherry picked from commit 6b82f3cb2d480b7714eb0ff61aee99c22160389e) + +Missing braces caused the warning to print more than once. + +Signed-Off-By: Chuck Ebbert +Signed-off-by: "Theodore Ts'o" +Signed-off-by: Greg Kroah-Hartman +--- + fs/ext4/ialloc.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/fs/ext4/ialloc.c ++++ b/fs/ext4/ialloc.c +@@ -707,11 +707,12 @@ struct inode *ext4_new_inode(handle_t *h + ret2 = find_group_flex(sb, dir, &group); + if (ret2 == -1) { + ret2 = find_group_other(sb, dir, &group); +- if (ret2 == 0 && once) ++ if (ret2 == 0 && once) { + once = 0; + printk(KERN_NOTICE "ext4: find_group_flex " + "failed, fallback succeeded dir %lu\n", + dir->i_ino); ++ } + } + goto got_group; + } diff --git a/queue-2.6.27/ext4-return-eio-not-estale-on-directory-traversal-through-deleted-inode.patch b/queue-2.6.27/ext4-return-eio-not-estale-on-directory-traversal-through-deleted-inode.patch new file mode 100644 index 00000000000..be401ab4ea0 --- /dev/null +++ b/queue-2.6.27/ext4-return-eio-not-estale-on-directory-traversal-through-deleted-inode.patch @@ -0,0 +1,59 @@ +From stable-bounces@linux.kernel.org Tue Jun 2 05:12:56 2009 +From: Bryan Donlan +Date: Tue, 2 Jun 2009 08:09:20 -0400 +Subject: ext4: return -EIO not -ESTALE on directory traversal through deleted inode +To: stable@kernel.org +Cc: "Theodore Ts'o" , Andrew Morton , linux-ext4@vger.kernel.org, Bryan Donlan +Message-ID: <1243944576-20915-8-git-send-email-tytso@mit.edu> + + +From: Bryan Donlan + +(cherry picked from commit e6f009b0b45220c004672d41a58865e94946104d) + +ext4_iget() returns -ESTALE if invoked on a deleted inode, in order to +report errors to NFS properly. However, in ext4_lookup(), this +-ESTALE can be propagated to userspace if the filesystem is corrupted +such that a directory entry references a deleted inode. This leads to +a misleading error message - "Stale NFS file handle" - and confusion +on the part of the admin. + +The bug can be easily reproduced by creating a new filesystem, making +a link to an unused inode using debugfs, then mounting and attempting +to ls -l said link. + +This patch thus changes ext4_lookup to return -EIO if it receives +-ESTALE from ext4_iget(), as ext4 does for other filesystem metadata +corruption; and also invokes the appropriate ext*_error functions when +this case is detected. + +Signed-off-by: Bryan Donlan +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: "Theodore Ts'o" +Signed-off-by: Greg Kroah-Hartman +--- + fs/ext4/namei.c | 12 ++++++++++-- + 1 file changed, 10 insertions(+), 2 deletions(-) + +--- a/fs/ext4/namei.c ++++ b/fs/ext4/namei.c +@@ -1055,8 +1055,16 @@ static struct dentry *ext4_lookup(struct + return ERR_PTR(-EIO); + } + inode = ext4_iget(dir->i_sb, ino); +- if (IS_ERR(inode)) +- return ERR_CAST(inode); ++ if (unlikely(IS_ERR(inode))) { ++ if (PTR_ERR(inode) == -ESTALE) { ++ ext4_error(dir->i_sb, __func__, ++ "deleted inode referenced: %u", ++ ino); ++ return ERR_PTR(-EIO); ++ } else { ++ return ERR_CAST(inode); ++ } ++ } + } + return d_splice_alias(inode, dentry); + } diff --git a/queue-2.6.27/ext4-tighten-restrictions-on-inode-flags.patch b/queue-2.6.27/ext4-tighten-restrictions-on-inode-flags.patch new file mode 100644 index 00000000000..995eb18019d --- /dev/null +++ b/queue-2.6.27/ext4-tighten-restrictions-on-inode-flags.patch @@ -0,0 +1,97 @@ +From stable-bounces@linux.kernel.org Tue Jun 2 05:12:06 2009 +From: Duane Griffin +Date: Tue, 2 Jun 2009 08:09:19 -0400 +Subject: ext4: tighten restrictions on inode flags +To: stable@kernel.org +Cc: Andrew Morton , linux-ext4@vger.kernel.org, "Theodore Ts'o" , Duane Griffin +Message-ID: <1243944576-20915-7-git-send-email-tytso@mit.edu> + + +From: Duane Griffin + +(cherry picked from commit 2dc6b0d48ca0599837df21b14bb8393d0804af57) + +At the moment there are few restrictions on which flags may be set on +which inodes. Specifically DIRSYNC may only be set on directories and +IMMUTABLE and APPEND may not be set on links. Tighten that to disallow +TOPDIR being set on non-directories and only NODUMP and NOATIME to be set +on non-regular file, non-directories. + +Introduces a flags masking function which masks flags based on mode and +use it during inode creation and when flags are set via the ioctl to +facilitate future consistency. + +Signed-off-by: Duane Griffin +Acked-by: Andreas Dilger +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: "Theodore Ts'o" +Signed-off-by: Greg Kroah-Hartman +--- + fs/ext4/ext4.h | 17 +++++++++++++++++ + fs/ext4/ialloc.c | 14 +++++--------- + fs/ext4/ioctl.c | 3 +-- + 3 files changed, 23 insertions(+), 11 deletions(-) + +--- a/fs/ext4/ext4.h ++++ b/fs/ext4/ext4.h +@@ -255,6 +255,23 @@ struct flex_groups { + EXT4_NOCOMPR_FL | EXT4_JOURNAL_DATA_FL |\ + EXT4_NOTAIL_FL | EXT4_DIRSYNC_FL) + ++/* Flags that are appropriate for regular files (all but dir-specific ones). */ ++#define EXT4_REG_FLMASK (~(EXT4_DIRSYNC_FL | EXT4_TOPDIR_FL)) ++ ++/* Flags that are appropriate for non-directories/regular files. */ ++#define EXT4_OTHER_FLMASK (EXT4_NODUMP_FL | EXT4_NOATIME_FL) ++ ++/* Mask out flags that are inappropriate for the given type of inode. */ ++static inline __u32 ext4_mask_flags(umode_t mode, __u32 flags) ++{ ++ if (S_ISDIR(mode)) ++ return flags; ++ else if (S_ISREG(mode)) ++ return flags & EXT4_REG_FLMASK; ++ else ++ return flags & EXT4_OTHER_FLMASK; ++} ++ + /* + * Inode dynamic state flags + */ +--- a/fs/ext4/ialloc.c ++++ b/fs/ext4/ialloc.c +@@ -865,16 +865,12 @@ got: + ei->i_disksize = 0; + + /* +- * Don't inherit extent flag from directory. We set extent flag on +- * newly created directory and file only if -o extent mount option is +- * specified ++ * Don't inherit extent flag from directory, amongst others. We set ++ * extent flag on newly created directory and file only if -o extent ++ * mount option is specified + */ +- ei->i_flags = EXT4_I(dir)->i_flags & EXT4_FL_INHERITED; +- if (S_ISLNK(mode)) +- ei->i_flags &= ~(EXT4_IMMUTABLE_FL|EXT4_APPEND_FL); +- /* dirsync only applies to directories */ +- if (!S_ISDIR(mode)) +- ei->i_flags &= ~EXT4_DIRSYNC_FL; ++ ei->i_flags = ++ ext4_mask_flags(mode, EXT4_I(dir)->i_flags & EXT4_FL_INHERITED); + ei->i_file_acl = 0; + ei->i_dtime = 0; + ei->i_block_alloc_info = NULL; +--- a/fs/ext4/ioctl.c ++++ b/fs/ext4/ioctl.c +@@ -49,8 +49,7 @@ long ext4_ioctl(struct file *filp, unsig + if (err) + return err; + +- if (!S_ISDIR(inode->i_mode)) +- flags &= ~EXT4_DIRSYNC_FL; ++ flags = ext4_mask_flags(inode->i_mode, flags); + + err = -EPERM; + mutex_lock(&inode->i_mutex); diff --git a/queue-2.6.27/ext4-use-a-fake-block-number-for-delayed-new-buffer_head.patch b/queue-2.6.27/ext4-use-a-fake-block-number-for-delayed-new-buffer_head.patch new file mode 100644 index 00000000000..196a00816d0 --- /dev/null +++ b/queue-2.6.27/ext4-use-a-fake-block-number-for-delayed-new-buffer_head.patch @@ -0,0 +1,46 @@ +From stable-bounces@linux.kernel.org Tue Jun 2 05:11:31 2009 +From: Aneesh Kumar K.V +Date: Tue, 2 Jun 2009 08:09:34 -0400 +Subject: ext4: Use a fake block number for delayed new buffer_head +To: stable@kernel.org +Cc: linux-ext4@vger.kernel.org, "Theodore Ts'o" , "Aneesh Kumar K.V" +Message-ID: <1243944576-20915-22-git-send-email-tytso@mit.edu> + + +From: Aneesh Kumar K.V + +(cherry picked from commit 33b9817e2ae097c7b8d256e3510ac6c54fc6d9d0) + +Use a very large unsigned number (~0xffff) as as the fake block number +for the delayed new buffer. The VFS should never try to write out this +number, but if it does, this will make it obvious. + +Signed-off-by: Aneesh Kumar K.V +Signed-off-by: "Theodore Ts'o" +Signed-off-by: Greg Kroah-Hartman +--- + fs/ext4/inode.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +--- a/fs/ext4/inode.c ++++ b/fs/ext4/inode.c +@@ -2105,6 +2105,10 @@ static int ext4_da_get_block_prep(struct + struct buffer_head *bh_result, int create) + { + int ret = 0; ++ sector_t invalid_block = ~((sector_t) 0xffff); ++ ++ if (invalid_block < ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es)) ++ invalid_block = ~0; + + BUG_ON(create == 0); + BUG_ON(bh_result->b_size != inode->i_sb->s_blocksize); +@@ -2126,7 +2130,7 @@ static int ext4_da_get_block_prep(struct + /* not enough space to reserve */ + return ret; + +- map_bh(bh_result, inode->i_sb, 0); ++ map_bh(bh_result, inode->i_sb, invalid_block); + set_buffer_new(bh_result); + set_buffer_delay(bh_result); + } else if (ret > 0) { diff --git a/queue-2.6.27/jbd2-update-locking-coments.patch b/queue-2.6.27/jbd2-update-locking-coments.patch new file mode 100644 index 00000000000..4aa96d1e17b --- /dev/null +++ b/queue-2.6.27/jbd2-update-locking-coments.patch @@ -0,0 +1,72 @@ +From stable-bounces@linux.kernel.org Tue Jun 2 05:13:07 2009 +From: Jan Kara +Date: Tue, 2 Jun 2009 08:09:27 -0400 +Subject: jbd2: Update locking coments +To: stable@kernel.org +Cc: "Theodore Ts'o" , linux-ext4@vger.kernel.org, Jan Kara +Message-ID: <1243944576-20915-15-git-send-email-tytso@mit.edu> + + +From: Jan Kara + +(cherry picked from commit 86db97c87f744364d5889ca8a4134ca2048b8f83) + +Update information about locking in JBD2 revoke code. Inconsistency in +comments found by Lin Tan . + +CC: Lin Tan . +Signed-off-by: Jan Kara +Signed-off-by: "Theodore Ts'o" +Signed-off-by: Greg Kroah-Hartman +--- + fs/jbd2/revoke.c | 24 +++++++++++++++++++----- + 1 file changed, 19 insertions(+), 5 deletions(-) + +--- a/fs/jbd2/revoke.c ++++ b/fs/jbd2/revoke.c +@@ -55,6 +55,25 @@ + * need do nothing. + * RevokeValid set, Revoked set: + * buffer has been revoked. ++ * ++ * Locking rules: ++ * We keep two hash tables of revoke records. One hashtable belongs to the ++ * running transaction (is pointed to by journal->j_revoke), the other one ++ * belongs to the committing transaction. Accesses to the second hash table ++ * happen only from the kjournald and no other thread touches this table. Also ++ * journal_switch_revoke_table() which switches which hashtable belongs to the ++ * running and which to the committing transaction is called only from ++ * kjournald. Therefore we need no locks when accessing the hashtable belonging ++ * to the committing transaction. ++ * ++ * All users operating on the hash table belonging to the running transaction ++ * have a handle to the transaction. Therefore they are safe from kjournald ++ * switching hash tables under them. For operations on the lists of entries in ++ * the hash table j_revoke_lock is used. ++ * ++ * Finally, also replay code uses the hash tables but at this moment noone else ++ * can touch them (filesystem isn't mounted yet) and hence no locking is ++ * needed. + */ + + #ifndef __KERNEL__ +@@ -401,8 +420,6 @@ int jbd2_journal_revoke(handle_t *handle + * the second time we would still have a pending revoke to cancel. So, + * do not trust the Revoked bit on buffers unless RevokeValid is also + * set. +- * +- * The caller must have the journal locked. + */ + int jbd2_journal_cancel_revoke(handle_t *handle, struct journal_head *jh) + { +@@ -480,10 +497,7 @@ void jbd2_journal_switch_revoke_table(jo + /* + * Write revoke records to the journal for all entries in the current + * revoke hash, deleting the entries as we go. +- * +- * Called with the journal lock held. + */ +- + void jbd2_journal_write_revoke_records(journal_t *journal, + transaction_t *transaction) + { diff --git a/queue-2.6.27/series b/queue-2.6.27/series index b192e0a8847..8bab2edce04 100644 --- a/queue-2.6.27/series +++ b/queue-2.6.27/series @@ -34,3 +34,27 @@ x86-pci-fix-mmconfig-detection-with-32bit-near-4g.patch v4l-dvb-cx88-prevent-general-protection-fault-on-rmmod.patch x86-fix-dmi-on-efi.patch mac80211-pid-fix-memory-corruption.patch +ext4-fix-ext4_free_inode-vs.-ext4_claim_inode-race.patch +ext4-fix-header-check-in-ext4_ext_search_right-for-deep-extent-trees.patch +ext4-print-the-find_group_flex-warning-only-once.patch +ext4-fix-bogus-bug_ons-in-in-mballoc-code.patch +ext4-fix-bb_prealloc_list-corruption-due-to-wrong-group-locking.patch +ext4-don-t-inherit-inappropriate-inode-flags-from-parent.patch +ext4-tighten-restrictions-on-inode-flags.patch +ext4-return-eio-not-estale-on-directory-traversal-through-deleted-inode.patch +ext4-add-fine-print-for-the-32000-subdirectory-limit.patch +ext4-add-ext4_ioc_alloc_da_blks-ioctl.patch +ext4-automatically-allocate-delay-allocated-blocks-on-close.patch +ext4-automatically-allocate-delay-allocated-blocks-on-rename.patch +ext4-fix-discard-of-inode-prealloc-space-with-delayed-allocation.patch +ext4-check-for-an-valid-i_mode-when-reading-the-inode-from-disk.patch +jbd2-update-locking-coments.patch +ext4-fix-typo-which-causes-a-memory-leak-on-error-path.patch +ext4-fix-locking-typo-in-mballoc-which-could-cause-soft-lockup-hangs.patch +ext4-really-print-the-find_group_flex-fallback-warning-only-once.patch +ext4-fix-softlockup-caused-by-illegal-i_file_acl-value-in-on-disk-inode.patch +ext4-ignore-i_file_acl_high-unless-ext4_feature_incompat_64bit-is-present.patch +ext4-fix-sub-block-zeroing-for-writes-into-preallocated-extents.patch +ext4-use-a-fake-block-number-for-delayed-new-buffer_head.patch +ext4-clear-the-unwritten-buffer_head-flag-after-the-extent-is-initialized.patch +ext4-fix-race-in-ext4_inode_info.i_cached_extent.patch -- 2.47.2