From c62ac1bfe22e5dcea4e3ced4a7ddda6b4f2a02f8 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 12 Mar 2009 23:33:32 -0700 Subject: [PATCH] more .27 patches --- ...dd-i2c_board_info-for-riscpc-pcf8583.patch | 69 ++++++++++ ...xt4-add-fallback-for-find_group_flex.patch | 49 +++++++ ..._write_begin-and-ext4_da_write_begin.patch | 61 +++++++++ queue-2.6.27/ext4-fix-lockdep-warning.patch | 78 +++++++++++ ...in-ext4_ext_migrate-s-error-handling.patch | 55 ++++++++ ...ty-directory-blocks-correctly-in-64k.patch | 46 +++++++ ...e-preallocation-list_head-s-properly.patch | 44 +++++++ .../fix-no_timer_check-on-x86_64.patch | 45 +++++++ ...-jbd2_journal_begin_ordered_truncate.patch | 124 ++++++++++++++++++ ...n-value-of-jbd2_journal_start_commit.patch | 103 +++++++++++++++ ...-all-pending-commits-in-ext4_sync_fs.patch | 57 ++++++++ queue-2.6.27/series | 11 ++ 12 files changed, 742 insertions(+) create mode 100644 queue-2.6.27/arm-add-i2c_board_info-for-riscpc-pcf8583.patch create mode 100644 queue-2.6.27/ext4-add-fallback-for-find_group_flex.patch create mode 100644 queue-2.6.27/ext4-fix-deadlock-in-ext4_write_begin-and-ext4_da_write_begin.patch create mode 100644 queue-2.6.27/ext4-fix-lockdep-warning.patch create mode 100644 queue-2.6.27/ext4-fix-null-dereference-in-ext4_ext_migrate-s-error-handling.patch create mode 100644 queue-2.6.27/ext4-fix-to-read-empty-directory-blocks-correctly-in-64k.patch create mode 100644 queue-2.6.27/ext4-initialize-preallocation-list_head-s-properly.patch create mode 100644 queue-2.6.27/fix-no_timer_check-on-x86_64.patch create mode 100644 queue-2.6.27/jbd2-avoid-possible-null-dereference-in-jbd2_journal_begin_ordered_truncate.patch create mode 100644 queue-2.6.27/jbd2-fix-return-value-of-jbd2_journal_start_commit.patch create mode 100644 queue-2.6.27/revert-ext4-wait-on-all-pending-commits-in-ext4_sync_fs.patch diff --git a/queue-2.6.27/arm-add-i2c_board_info-for-riscpc-pcf8583.patch b/queue-2.6.27/arm-add-i2c_board_info-for-riscpc-pcf8583.patch new file mode 100644 index 00000000000..0f906c95e00 --- /dev/null +++ b/queue-2.6.27/arm-add-i2c_board_info-for-riscpc-pcf8583.patch @@ -0,0 +1,69 @@ +From khali@linux-fr.org Thu Mar 12 23:24:26 2009 +From: Russell King +Date: Wed, 25 Feb 2009 21:36:29 +0100 +Subject: ARM: Add i2c_board_info for RiscPC PCF8583 +To: stable@kernel.org +Cc: Russell King +Message-ID: <20090225213629.566fb4cb@hyperion.delvare> + + +From: Russell King + +commit 531660ef5604c75de6fdead9da1304051af17c09 upstream + +Add the necessary i2c_board_info structure to fix the lack of PCF8583 +RTC on RiscPC. + +Signed-off-by: Russell King +Signed-off-by: Jean Delvare +Cc: Alessandro Zummo +Signed-off-by: Greg Kroah-Hartman + +--- + arch/arm/mach-rpc/riscpc.c | 6 ++++++ + drivers/i2c/busses/i2c-acorn.c | 3 ++- + 2 files changed, 8 insertions(+), 1 deletion(-) + +--- a/arch/arm/mach-rpc/riscpc.c ++++ b/arch/arm/mach-rpc/riscpc.c +@@ -18,6 +18,7 @@ + #include + #include + #include ++#include + + #include + #include +@@ -201,8 +202,13 @@ static struct platform_device *devs[] __ + &pata_device, + }; + ++static struct i2c_board_info i2c_rtc = { ++ I2C_BOARD_INFO("pcf8583", 0x50) ++}; ++ + static int __init rpc_init(void) + { ++ i2c_register_board_info(0, &i2c_rtc, 1); + return platform_add_devices(devs, ARRAY_SIZE(devs)); + } + +--- a/drivers/i2c/busses/i2c-acorn.c ++++ b/drivers/i2c/busses/i2c-acorn.c +@@ -84,6 +84,7 @@ static struct i2c_algo_bit_data ioc_data + + static struct i2c_adapter ioc_ops = { + .id = I2C_HW_B_IOC, ++ .nr = 0, + .algo_data = &ioc_data, + }; + +@@ -91,7 +92,7 @@ static int __init i2c_ioc_init(void) + { + force_ones = FORCE_ONES | SCL | SDA; + +- return i2c_bit_add_bus(&ioc_ops); ++ return i2c_bit_add_numbered_bus(&ioc_ops); + } + + module_init(i2c_ioc_init); diff --git a/queue-2.6.27/ext4-add-fallback-for-find_group_flex.patch b/queue-2.6.27/ext4-add-fallback-for-find_group_flex.patch new file mode 100644 index 00000000000..4a199259a5c --- /dev/null +++ b/queue-2.6.27/ext4-add-fallback-for-find_group_flex.patch @@ -0,0 +1,49 @@ +From tytso@mit.edu Thu Mar 12 23:32:27 2009 +From: "Theodore Ts'o" +Date: Thu, 5 Mar 2009 02:34:06 -0500 +Subject: ext4: Add fallback for find_group_flex +To: stable@kernel.org +Cc: Ext4 Developers List , "Theodore Ts'o" +Message-ID: <1236238447-8341-1-git-send-email-tytso@mit.edu> + +From: "Theodore Ts'o" + +(cherry picked from commit 05bf9e839d9de4e8a094274a0a2fd07beb47eaf1) + +This is a workaround for find_group_flex() which badly needs to be +replaced. One of its problems (besides ignoring the Orlov algorithm) +is that it is a bit hyperactive about returning failure under +suspicious circumstances. This can lead to spurious ENOSPC failures +even when there are inodes still available. + +Work around this for now by retrying the search using +find_group_other() if find_group_flex() returns -1. If +find_group_other() succeeds when find_group_flex() has failed, log a +warning message. + +A better block/inode allocator that will fix this problem for real has +been queued up for the next merge window. + +Signed-off-by: "Theodore Ts'o" +Signed-off-by: Greg Kroah-Hartman + +--- + fs/ext4/ialloc.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +--- a/fs/ext4/ialloc.c ++++ b/fs/ext4/ialloc.c +@@ -702,6 +702,13 @@ struct inode *ext4_new_inode(handle_t *h + + if (sbi->s_log_groups_per_flex) { + ret2 = find_group_flex(sb, dir, &group); ++ if (ret2 == -1) { ++ ret2 = find_group_other(sb, dir, &group); ++ if (ret2 == 0 && printk_ratelimit()) ++ printk(KERN_NOTICE "ext4: find_group_flex " ++ "failed, fallback succeeded dir %lu\n", ++ dir->i_ino); ++ } + goto got_group; + } + diff --git a/queue-2.6.27/ext4-fix-deadlock-in-ext4_write_begin-and-ext4_da_write_begin.patch b/queue-2.6.27/ext4-fix-deadlock-in-ext4_write_begin-and-ext4_da_write_begin.patch new file mode 100644 index 00000000000..a2e970cd5f8 --- /dev/null +++ b/queue-2.6.27/ext4-fix-deadlock-in-ext4_write_begin-and-ext4_da_write_begin.patch @@ -0,0 +1,61 @@ +From tytso@mit.edu Thu Mar 12 23:32:47 2009 +From: Jan Kara +Date: Thu, 5 Mar 2009 02:34:07 -0500 +Subject: ext4: Fix deadlock in ext4_write_begin() and ext4_da_write_begin() +To: stable@kernel.org +Cc: "Theodore Ts'o" , Ext4 Developers List , Jan Kara +Message-ID: <1236238447-8341-2-git-send-email-tytso@mit.edu> + +From: Jan Kara + +(cherry picked from commit ebd3610b110bbb18ea6f9f2aeed1e1068c537227) + +Functions ext4_write_begin() and ext4_da_write_begin() call +grab_cache_page_write_begin() without AOP_FLAG_NOFS. Thus it +can happen that page reclaim is triggered in that function +and it recurses back into the filesystem (or some other filesystem). +But this can lead to various problems as a transaction is already +started at that point. Add the necessary flag. + +http://bugzilla.kernel.org/show_bug.cgi?id=11688 + +Signed-off-by: Jan Kara +Signed-off-by: "Theodore Ts'o" +Signed-off-by: Greg Kroah-Hartman + +--- + fs/ext4/inode.c | 9 ++++++++- + 1 file changed, 8 insertions(+), 1 deletion(-) + +--- a/fs/ext4/inode.c ++++ b/fs/ext4/inode.c +@@ -1372,6 +1372,10 @@ retry: + goto out; + } + ++ /* We cannot recurse into the filesystem as the transaction is already ++ * started */ ++ flags |= AOP_FLAG_NOFS; ++ + page = grab_cache_page_write_begin(mapping, index, flags); + if (!page) { + ext4_journal_stop(handle); +@@ -1381,7 +1385,7 @@ retry: + *pagep = page; + + ret = block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, +- ext4_get_block); ++ ext4_get_block); + + if (!ret && ext4_should_journal_data(inode)) { + ret = walk_page_buffers(handle, page_buffers(page), +@@ -2465,6 +2469,9 @@ retry: + ret = PTR_ERR(handle); + goto out; + } ++ /* We cannot recurse into the filesystem as the transaction is already ++ * started */ ++ flags |= AOP_FLAG_NOFS; + + page = grab_cache_page_write_begin(mapping, index, flags); + if (!page) { diff --git a/queue-2.6.27/ext4-fix-lockdep-warning.patch b/queue-2.6.27/ext4-fix-lockdep-warning.patch new file mode 100644 index 00000000000..a36bf6eff7a --- /dev/null +++ b/queue-2.6.27/ext4-fix-lockdep-warning.patch @@ -0,0 +1,78 @@ +From tytso@mit.edu Thu Mar 12 23:30:39 2009 +From: Aneesh Kumar K.V +Date: Tue, 24 Feb 2009 12:14:52 -0500 +Subject: ext4: Fix lockdep warning +To: stable@kernel.org +Cc: "Theodore Ts'o" , "Aneesh Kumar K.V" +Message-ID: <1235495694-8116-5-git-send-email-tytso@mit.edu> + +From: Aneesh Kumar K.V + +(cherry picked from commit ba4439165f0f0d25b2fe065cf0c1ff8130b802eb) + +We should not call ext4_mb_add_n_trim while holding alloc_semp. + + ============================================= + [ INFO: possible recursive locking detected ] + 2.6.29-rc4-git1-dirty #124 + --------------------------------------------- + ffsb/3116 is trying to acquire lock: + (&meta_group_info[i]->alloc_sem){----}, at: [] + ext4_mb_load_buddy+0xd2/0x343 + + but task is already holding lock: + (&meta_group_info[i]->alloc_sem){----}, at: [] + ext4_mb_load_buddy+0xd2/0x343 + +http://bugzilla.kernel.org/show_bug.cgi?id=12672 + +Signed-off-by: Aneesh Kumar K.V +Signed-off-by: "Theodore Ts'o" +Signed-off-by: Greg Kroah-Hartman + +--- + fs/ext4/mballoc.c | 29 ++++++++++++++++------------- + 1 file changed, 16 insertions(+), 13 deletions(-) + +--- a/fs/ext4/mballoc.c ++++ b/fs/ext4/mballoc.c +@@ -4569,23 +4569,26 @@ static int ext4_mb_release_context(struc + pa->pa_free -= ac->ac_b_ex.fe_len; + pa->pa_len -= ac->ac_b_ex.fe_len; + spin_unlock(&pa->pa_lock); +- /* +- * We want to add the pa to the right bucket. +- * Remove it from the list and while adding +- * make sure the list to which we are adding +- * doesn't grow big. +- */ +- if (likely(pa->pa_free)) { +- spin_lock(pa->pa_obj_lock); +- list_del_rcu(&pa->pa_inode_list); +- spin_unlock(pa->pa_obj_lock); +- ext4_mb_add_n_trim(ac); +- } + } +- ext4_mb_put_pa(ac, ac->ac_sb, pa); + } + if (ac->alloc_semp) + up_read(ac->alloc_semp); ++ if (pa) { ++ /* ++ * We want to add the pa to the right bucket. ++ * Remove it from the list and while adding ++ * make sure the list to which we are adding ++ * doesn't grow big. We need to release ++ * alloc_semp before calling ext4_mb_add_n_trim() ++ */ ++ if (pa->pa_linear && likely(pa->pa_free)) { ++ spin_lock(pa->pa_obj_lock); ++ list_del_rcu(&pa->pa_inode_list); ++ spin_unlock(pa->pa_obj_lock); ++ ext4_mb_add_n_trim(ac); ++ } ++ ext4_mb_put_pa(ac, ac->ac_sb, pa); ++ } + if (ac->ac_bitmap_page) + page_cache_release(ac->ac_bitmap_page); + if (ac->ac_buddy_page) diff --git a/queue-2.6.27/ext4-fix-null-dereference-in-ext4_ext_migrate-s-error-handling.patch b/queue-2.6.27/ext4-fix-null-dereference-in-ext4_ext_migrate-s-error-handling.patch new file mode 100644 index 00000000000..df525aac5ab --- /dev/null +++ b/queue-2.6.27/ext4-fix-null-dereference-in-ext4_ext_migrate-s-error-handling.patch @@ -0,0 +1,55 @@ +From tytso@mit.edu Thu Mar 12 23:31:40 2009 +From: "Theodore Ts'o" +Date: Tue, 24 Feb 2009 12:14:54 -0500 +Subject: ext4: Fix NULL dereference in ext4_ext_migrate()'s error handling +To: stable@kernel.org +Cc: "Theodore Ts'o" , Dan Carpenter +Message-ID: <1235495694-8116-7-git-send-email-tytso@mit.edu> + +From: Dan Carpenter + +(cherry picked from commit 090542641de833c6f756895fc2f139f046e298f9) + +This was found through a code checker (http://repo.or.cz/w/smatch.git/). +It looks like you might be able to trigger the error by trying to migrate +a readonly file system. + +Signed-off-by: Dan Carpenter +Signed-off-by: "Theodore Ts'o" +Signed-off-by: Greg Kroah-Hartman + +--- + fs/ext4/migrate.c | 8 +++----- + 1 file changed, 3 insertions(+), 5 deletions(-) + +--- a/fs/ext4/migrate.c ++++ b/fs/ext4/migrate.c +@@ -480,7 +480,7 @@ int ext4_ext_migrate(struct inode *inode + + 1); + if (IS_ERR(handle)) { + retval = PTR_ERR(handle); +- goto err_out; ++ return retval; + } + tmp_inode = ext4_new_inode(handle, + inode->i_sb->s_root->d_inode, +@@ -488,8 +488,7 @@ int ext4_ext_migrate(struct inode *inode + if (IS_ERR(tmp_inode)) { + retval = -ENOMEM; + ext4_journal_stop(handle); +- tmp_inode = NULL; +- goto err_out; ++ return retval; + } + i_size_write(tmp_inode, i_size_read(inode)); + /* +@@ -617,8 +616,7 @@ err_out: + + ext4_journal_stop(handle); + +- if (tmp_inode) +- iput(tmp_inode); ++ iput(tmp_inode); + + return retval; + } diff --git a/queue-2.6.27/ext4-fix-to-read-empty-directory-blocks-correctly-in-64k.patch b/queue-2.6.27/ext4-fix-to-read-empty-directory-blocks-correctly-in-64k.patch new file mode 100644 index 00000000000..5255c97b748 --- /dev/null +++ b/queue-2.6.27/ext4-fix-to-read-empty-directory-blocks-correctly-in-64k.patch @@ -0,0 +1,46 @@ +From tytso@mit.edu Thu Mar 12 23:29:58 2009 +From: Wei Yongjun +Date: Tue, 24 Feb 2009 12:14:51 -0500 +Subject: ext4: Fix to read empty directory blocks correctly in 64k +To: stable@kernel.org +Cc: "Theodore Ts'o" , Wei Yongjun +Message-ID: <1235495694-8116-4-git-send-email-tytso@mit.edu> + +From: Wei Yongjun + +(cherry picked from commit 7be2baaa0322c59ba888aa5260a8c130666acd41) + +The rec_len field in the directory entry is 16 bits, so there was a +problem representing rec_len for filesystems with a 64k block size in +the case where the directory entry takes the entire 64k block. +Unfortunately, there were two schemes that were proposed; one where +all zeros meant 65536 and one where all ones (65535) meant 65536. +E2fsprogs used 0, whereas the kernel used 65535. Oops. Fortunately +this case happens extremely rarely, with the most common case being +the lost+found directory, created by mke2fs. + +So we will be liberal in what we accept, and accept both encodings, +but we will continue to encode 65536 as 65535. This will require a +change in e2fsprogs, but with fortunately ext4 filesystems normally +have the dir_index feature enabled, which precludes having a +completely empty directory block. + +Signed-off-by: Wei Yongjun +Signed-off-by: "Theodore Ts'o" +Signed-off-by: Greg Kroah-Hartman + +--- + fs/ext4/ext4.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/fs/ext4/ext4.h ++++ b/fs/ext4/ext4.h +@@ -860,7 +860,7 @@ static inline unsigned ext4_rec_len_from + { + unsigned len = le16_to_cpu(dlen); + +- if (len == EXT4_MAX_REC_LEN) ++ if (len == EXT4_MAX_REC_LEN || len == 0) + return 1 << 16; + return len; + } diff --git a/queue-2.6.27/ext4-initialize-preallocation-list_head-s-properly.patch b/queue-2.6.27/ext4-initialize-preallocation-list_head-s-properly.patch new file mode 100644 index 00000000000..91e13806093 --- /dev/null +++ b/queue-2.6.27/ext4-initialize-preallocation-list_head-s-properly.patch @@ -0,0 +1,44 @@ +From tytso@mit.edu Thu Mar 12 23:31:06 2009 +From: "Theodore Ts'o" +Date: Tue, 24 Feb 2009 12:14:53 -0500 +Subject: ext4: Initialize preallocation list_head's properly +To: stable@kernel.org +Cc: "Theodore Ts'o" , "Aneesh Kumar K.V" +Message-ID: <1235495694-8116-6-git-send-email-tytso@mit.edu> + +From: Aneesh Kumar K.V + +(cherry picked from commit d794bf8e0936dce45104565cd48c571061f4c1e3) + +When creating a new ext4_prealloc_space structure, we have to +initialize its list_head pointers before we add them to any prealloc +lists. Otherwise, with list debug enabled, we will get list +corruption warnings. + +Signed-off-by: Aneesh Kumar K.V +Signed-off-by: "Theodore Ts'o" +Signed-off-by: Greg Kroah-Hartman + +--- + fs/ext4/mballoc.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/fs/ext4/mballoc.c ++++ b/fs/ext4/mballoc.c +@@ -3802,6 +3802,8 @@ ext4_mb_new_inode_pa(struct ext4_allocat + pa->pa_free = pa->pa_len; + atomic_set(&pa->pa_count, 1); + spin_lock_init(&pa->pa_lock); ++ INIT_LIST_HEAD(&pa->pa_inode_list); ++ INIT_LIST_HEAD(&pa->pa_group_list); + pa->pa_deleted = 0; + pa->pa_linear = 0; + +@@ -3860,6 +3862,7 @@ ext4_mb_new_group_pa(struct ext4_allocat + atomic_set(&pa->pa_count, 1); + spin_lock_init(&pa->pa_lock); + INIT_LIST_HEAD(&pa->pa_inode_list); ++ INIT_LIST_HEAD(&pa->pa_group_list); + pa->pa_deleted = 0; + pa->pa_linear = 1; + diff --git a/queue-2.6.27/fix-no_timer_check-on-x86_64.patch b/queue-2.6.27/fix-no_timer_check-on-x86_64.patch new file mode 100644 index 00000000000..28bb87fa254 --- /dev/null +++ b/queue-2.6.27/fix-no_timer_check-on-x86_64.patch @@ -0,0 +1,45 @@ +From agraf@suse.de Thu Mar 12 23:25:09 2009 +From: Alexander Graf +Date: Tue, 10 Mar 2009 12:58:20 +0100 +Subject: Fix no_timer_check on x86_64 +To: stable@kernel.org +Cc: mtosatti@redhat.com +Message-ID: <1236686300-14054-1-git-send-email-agraf@suse.de> + +From: Alexander Graf + +fixed upstream in 2.6.28 in merge of ioapic*.c for x86 + +In io_apic_32.c the logic of no_timer_check is "always make timer_irq_works +return 1". + +Io_apic_64.c on the other hand checks for + if (!no_timer_check && timer_irq_works()) +basically meaning "make timer_irq_works fail" in the crucial first check. + +Now, in order to not move too much code, we can just reverse the logic here +and should be fine off, basically rendering no_timer_check useful again. + +This issue seems to be resolved as of 2.6.28 by the merge of io_apic*.c, +but still exists for at least 2.6.27. + +Signed-off-by: Alexander Graf +Acked-by: Marcelo Tosatti +Signed-off-by: Greg Kroah-Hartman + + +--- + arch/x86/kernel/io_apic_64.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/arch/x86/kernel/io_apic_64.c ++++ b/arch/x86/kernel/io_apic_64.c +@@ -1729,7 +1729,7 @@ static inline void __init check_timer(vo + setup_timer_IRQ0_pin(apic1, pin1, cfg->vector); + } + unmask_IO_APIC_irq(0); +- if (!no_timer_check && timer_irq_works()) { ++ if (no_timer_check || timer_irq_works()) { + if (nmi_watchdog == NMI_IO_APIC) { + setup_nmi(); + enable_8259A_irq(0); diff --git a/queue-2.6.27/jbd2-avoid-possible-null-dereference-in-jbd2_journal_begin_ordered_truncate.patch b/queue-2.6.27/jbd2-avoid-possible-null-dereference-in-jbd2_journal_begin_ordered_truncate.patch new file mode 100644 index 00000000000..92823980d82 --- /dev/null +++ b/queue-2.6.27/jbd2-avoid-possible-null-dereference-in-jbd2_journal_begin_ordered_truncate.patch @@ -0,0 +1,124 @@ +From tytso@mit.edu Thu Mar 12 23:28:55 2009 +From: Jan Kara +Date: Tue, 24 Feb 2009 16:13:18 -0500 +Subject: jbd2: Avoid possible NULL dereference in jbd2_journal_begin_ordered_truncate() +To: stable@kernel.org +Cc: mfasheh@suse.de, linux-ext4@vger.kernel.org, Jan Kara , Dan Carpenter , ocfs2-devel@oss.oracle.com +Message-ID: <20090224211318.GC7064@mit.edu> + +From: Jan Kara + +(cherry picked from commit 7f5aa215088b817add9c71914b83650bdd49f8a9) + +If we race with commit code setting i_transaction to NULL, we could +possibly dereference it. Proper locking requires the journal pointer +(to access journal->j_list_lock), which we don't have. So we have to +change the prototype of the function so that filesystem passes us the +journal pointer. Also add a more detailed comment about why the +function jbd2_journal_begin_ordered_truncate() does what it does and +how it should be used. + +Thanks to Dan Carpenter for pointing to the +suspitious code. + +Signed-off-by: Jan Kara +Signed-off-by: "Theodore Ts'o" +Acked-by: Joel Becker +CC: linux-ext4@vger.kernel.org +CC: mfasheh@suse.de +CC: Dan Carpenter +Signed-off-by: Greg Kroah-Hartman + +--- + fs/ext4/inode.c | 6 ++++-- + fs/jbd2/transaction.c | 42 +++++++++++++++++++++++++++++++----------- + include/linux/jbd2.h | 3 ++- + 3 files changed, 37 insertions(+), 14 deletions(-) + +--- a/fs/ext4/inode.c ++++ b/fs/ext4/inode.c +@@ -46,8 +46,10 @@ + static inline int ext4_begin_ordered_truncate(struct inode *inode, + loff_t new_size) + { +- return jbd2_journal_begin_ordered_truncate(&EXT4_I(inode)->jinode, +- new_size); ++ return jbd2_journal_begin_ordered_truncate( ++ EXT4_SB(inode->i_sb)->s_journal, ++ &EXT4_I(inode)->jinode, ++ new_size); + } + + static void ext4_invalidatepage(struct page *page, unsigned long offset); +--- a/fs/jbd2/transaction.c ++++ b/fs/jbd2/transaction.c +@@ -2049,26 +2049,46 @@ done: + } + + /* +- * This function must be called when inode is journaled in ordered mode +- * before truncation happens. It starts writeout of truncated part in +- * case it is in the committing transaction so that we stand to ordered +- * mode consistency guarantees. ++ * File truncate and transaction commit interact with each other in a ++ * non-trivial way. If a transaction writing data block A is ++ * committing, we cannot discard the data by truncate until we have ++ * written them. Otherwise if we crashed after the transaction with ++ * write has committed but before the transaction with truncate has ++ * committed, we could see stale data in block A. This function is a ++ * helper to solve this problem. It starts writeout of the truncated ++ * part in case it is in the committing transaction. ++ * ++ * Filesystem code must call this function when inode is journaled in ++ * ordered mode before truncation happens and after the inode has been ++ * placed on orphan list with the new inode size. The second condition ++ * avoids the race that someone writes new data and we start ++ * committing the transaction after this function has been called but ++ * before a transaction for truncate is started (and furthermore it ++ * allows us to optimize the case where the addition to orphan list ++ * happens in the same transaction as write --- we don't have to write ++ * any data in such case). + */ +-int jbd2_journal_begin_ordered_truncate(struct jbd2_inode *inode, ++int jbd2_journal_begin_ordered_truncate(journal_t *journal, ++ struct jbd2_inode *jinode, + loff_t new_size) + { +- journal_t *journal; +- transaction_t *commit_trans; ++ transaction_t *inode_trans, *commit_trans; + int ret = 0; + +- if (!inode->i_transaction && !inode->i_next_transaction) ++ /* This is a quick check to avoid locking if not necessary */ ++ if (!jinode->i_transaction) + goto out; +- journal = inode->i_transaction->t_journal; ++ /* Locks are here just to force reading of recent values, it is ++ * enough that the transaction was not committing before we started ++ * a transaction adding the inode to orphan list */ + spin_lock(&journal->j_state_lock); + commit_trans = journal->j_committing_transaction; + spin_unlock(&journal->j_state_lock); +- if (inode->i_transaction == commit_trans) { +- ret = filemap_fdatawrite_range(inode->i_vfs_inode->i_mapping, ++ spin_lock(&journal->j_list_lock); ++ inode_trans = jinode->i_transaction; ++ spin_unlock(&journal->j_list_lock); ++ if (inode_trans == commit_trans) { ++ ret = filemap_fdatawrite_range(jinode->i_vfs_inode->i_mapping, + new_size, LLONG_MAX); + if (ret) + jbd2_journal_abort(journal, ret); +--- a/include/linux/jbd2.h ++++ b/include/linux/jbd2.h +@@ -1075,7 +1075,8 @@ extern int jbd2_journal_clear_err (j + extern int jbd2_journal_bmap(journal_t *, unsigned long, unsigned long long *); + extern int jbd2_journal_force_commit(journal_t *); + extern int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *inode); +-extern int jbd2_journal_begin_ordered_truncate(struct jbd2_inode *inode, loff_t new_size); ++extern int jbd2_journal_begin_ordered_truncate(journal_t *journal, ++ struct jbd2_inode *inode, loff_t new_size); + extern void jbd2_journal_init_jbd_inode(struct jbd2_inode *jinode, struct inode *inode); + extern void jbd2_journal_release_jbd_inode(journal_t *journal, struct jbd2_inode *jinode); + diff --git a/queue-2.6.27/jbd2-fix-return-value-of-jbd2_journal_start_commit.patch b/queue-2.6.27/jbd2-fix-return-value-of-jbd2_journal_start_commit.patch new file mode 100644 index 00000000000..695a6243f8d --- /dev/null +++ b/queue-2.6.27/jbd2-fix-return-value-of-jbd2_journal_start_commit.patch @@ -0,0 +1,103 @@ +From tytso@mit.edu Thu Mar 12 23:26:22 2009 +From: Jan Kara +Date: Tue, 24 Feb 2009 12:14:48 -0500 +Subject: jbd2: Fix return value of jbd2_journal_start_commit() +To: stable@kernel.org +Cc: "Theodore Ts'o" , linux-ext4@vger.kernel.org, Jan Kara , Eric Sandeen +Message-ID: <1235495694-8116-1-git-send-email-tytso@mit.edu> + + +From: Jan Kara + +(cherry picked from commit c88ccea3143975294f5a52097546bcbb75975f52) + +The function jbd2_journal_start_commit() returns 1 if either a +transaction is committing or the function has queued a transaction +commit. But it returns 0 if we raced with somebody queueing the +transaction commit as well. This resulted in ext4_sync_fs() not +functioning correctly (description from Arthur Jones): + + In the case of a data=ordered umount with pending long symlinks + which are delayed due to a long list of other I/O on the backing + block device, this causes the buffer associated with the long + symlinks to not be moved to the inode dirty list in the second + phase of fsync_super. Then, before they can be dirtied again, + kjournald exits, seeing the UMOUNT flag and the dirty pages are + never written to the backing block device, causing long symlink + corruption and exposing new or previously freed block data to + userspace. + +This can be reproduced with a script created by Eric Sandeen +: + + #!/bin/bash + + umount /mnt/test2 + mount /dev/sdb4 /mnt/test2 + rm -f /mnt/test2/* + dd if=/dev/zero of=/mnt/test2/bigfile bs=1M count=512 + touch /mnt/test2/thisisveryveryveryveryveryveryveryveryveryveryveryveryveryveryveryverylongfilename + ln -s /mnt/test2/thisisveryveryveryveryveryveryveryveryveryveryveryveryveryveryveryverylongfilename + /mnt/test2/link + umount /mnt/test2 + mount /dev/sdb4 /mnt/test2 + ls /mnt/test2/ + +This patch fixes jbd2_journal_start_commit() to always return 1 when +there's a transaction committing or queued for commit. + +Signed-off-by: Jan Kara +Signed-off-by: "Theodore Ts'o" +CC: Eric Sandeen +CC: linux-ext4@vger.kernel.org +Signed-off-by: Greg Kroah-Hartman + +--- + fs/jbd2/journal.c | 17 +++++++++++------ + 1 file changed, 11 insertions(+), 6 deletions(-) + +--- a/fs/jbd2/journal.c ++++ b/fs/jbd2/journal.c +@@ -430,7 +430,7 @@ int __jbd2_log_space_left(journal_t *jou + } + + /* +- * Called under j_state_lock. Returns true if a transaction was started. ++ * Called under j_state_lock. Returns true if a transaction commit was started. + */ + int __jbd2_log_start_commit(journal_t *journal, tid_t target) + { +@@ -498,7 +498,8 @@ int jbd2_journal_force_commit_nested(jou + + /* + * Start a commit of the current running transaction (if any). Returns true +- * if a transaction was started, and fills its tid in at *ptid ++ * if a transaction is going to be committed (or is currently already ++ * committing), and fills its tid in at *ptid + */ + int jbd2_journal_start_commit(journal_t *journal, tid_t *ptid) + { +@@ -508,15 +509,19 @@ int jbd2_journal_start_commit(journal_t + if (journal->j_running_transaction) { + tid_t tid = journal->j_running_transaction->t_tid; + +- ret = __jbd2_log_start_commit(journal, tid); +- if (ret && ptid) ++ __jbd2_log_start_commit(journal, tid); ++ /* There's a running transaction and we've just made sure ++ * it's commit has been scheduled. */ ++ if (ptid) + *ptid = tid; +- } else if (journal->j_committing_transaction && ptid) { ++ ret = 1; ++ } else if (journal->j_committing_transaction) { + /* + * If ext3_write_super() recently started a commit, then we + * have to wait for completion of that transaction + */ +- *ptid = journal->j_committing_transaction->t_tid; ++ if (ptid) ++ *ptid = journal->j_committing_transaction->t_tid; + ret = 1; + } + spin_unlock(&journal->j_state_lock); diff --git a/queue-2.6.27/revert-ext4-wait-on-all-pending-commits-in-ext4_sync_fs.patch b/queue-2.6.27/revert-ext4-wait-on-all-pending-commits-in-ext4_sync_fs.patch new file mode 100644 index 00000000000..088c1b15550 --- /dev/null +++ b/queue-2.6.27/revert-ext4-wait-on-all-pending-commits-in-ext4_sync_fs.patch @@ -0,0 +1,57 @@ +From tytso@mit.edu Thu Mar 12 23:27:03 2009 +From: Jan Kara +Date: Tue, 24 Feb 2009 12:14:49 -0500 +Subject: Revert "ext4: wait on all pending commits in ext4_sync_fs()" +To: stable@kernel.org +Cc: "Theodore Ts'o" , linux-ext4@vger.kernel.org, Jan Kara , Eric Sandeen +Message-ID: <1235495694-8116-2-git-send-email-tytso@mit.edu> + + +From: Jan Kara + +(cherry picked from commit 9eddacf9e9c03578ef2c07c9534423e823d677f8) + +This undoes commit 14ce0cb411c88681ab8f3a4c9caa7f42e97a3184. + +Since jbd2_journal_start_commit() is now fixed to return 1 when we +started a transaction commit, there's some transaction waiting to be +committed or there's a transaction already committing, we don't +need to call ext4_force_commit() in ext4_sync_fs(). Furthermore +ext4_force_commit() can unnecessarily create sync transaction which is +expensive so it's worthwhile to remove it when we can. + +http://bugzilla.kernel.org/show_bug.cgi?id=12224 + +Signed-off-by: Jan Kara +Signed-off-by: "Theodore Ts'o" +Cc: Eric Sandeen +Cc: linux-ext4@vger.kernel.org +Signed-off-by: Greg Kroah-Hartman + +--- + fs/ext4/super.c | 12 ++++++------ + 1 file changed, 6 insertions(+), 6 deletions(-) + +--- a/fs/ext4/super.c ++++ b/fs/ext4/super.c +@@ -2950,14 +2950,14 @@ static void ext4_write_super(struct supe + + static int ext4_sync_fs(struct super_block *sb, int wait) + { +- int ret = 0; ++ tid_t target; + + sb->s_dirt = 0; +- if (wait) +- ret = ext4_force_commit(sb); +- else +- jbd2_journal_start_commit(EXT4_SB(sb)->s_journal, NULL); +- return ret; ++ if (jbd2_journal_start_commit(EXT4_SB(sb)->s_journal, &target)) { ++ if (wait) ++ jbd2_log_wait_commit(EXT4_SB(sb)->s_journal, target); ++ } ++ return 0; + } + + /* diff --git a/queue-2.6.27/series b/queue-2.6.27/series index 8906cb52c11..1f926e70692 100644 --- a/queue-2.6.27/series +++ b/queue-2.6.27/series @@ -81,3 +81,14 @@ hwmon-hide-misleading-error-message.patch i2c-fix-misplaced-parentheses.patch i2c-timeouts-reach-1.patch ide-iops-fix-odd-length-atapi-pio-transfers.patch +arm-add-i2c_board_info-for-riscpc-pcf8583.patch +fix-no_timer_check-on-x86_64.patch +jbd2-fix-return-value-of-jbd2_journal_start_commit.patch +revert-ext4-wait-on-all-pending-commits-in-ext4_sync_fs.patch +jbd2-avoid-possible-null-dereference-in-jbd2_journal_begin_ordered_truncate.patch +ext4-fix-to-read-empty-directory-blocks-correctly-in-64k.patch +ext4-fix-lockdep-warning.patch +ext4-initialize-preallocation-list_head-s-properly.patch +ext4-fix-null-dereference-in-ext4_ext_migrate-s-error-handling.patch +ext4-add-fallback-for-find_group_flex.patch +ext4-fix-deadlock-in-ext4_write_begin-and-ext4_da_write_begin.patch -- 2.47.3