From: Greg Kroah-Hartman Date: Fri, 3 May 2013 23:17:01 +0000 (-0700) Subject: 3.9-stable patches X-Git-Tag: v3.0.77~12 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=1599a5c4c7bc83e98552187df97aa45ba983c941;p=thirdparty%2Fkernel%2Fstable-queue.git 3.9-stable patches added patches: e1000e-fix-numeric-overflow-in-phc-settime-method.patch ext4-fix-big-endian-bug-in-metadata-checksum-calculations.patch ext4-fix-journal-callback-list-traversal.patch ext4-fix-kconfig-documentation-for-config_ext4_debug.patch ext4-fix-online-resizing-for-ext3-compat-file-systems.patch ext4-jbd2-don-t-wait-forever-for-stale-tid-caused-by-wraparound.patch ext4-unregister-es_shrinker-if-mount-failed.patch ixgbe-fix-eicr-write-in-ixgbe_msix_other.patch jbd2-fix-race-between-jbd2_journal_remove_checkpoint-and-j_commit_callback.patch x86-64-init-do-not-set-nx-bits-on-non-nx-capable-hardware.patch --- diff --git a/queue-3.9/e1000e-fix-numeric-overflow-in-phc-settime-method.patch b/queue-3.9/e1000e-fix-numeric-overflow-in-phc-settime-method.patch new file mode 100644 index 00000000000..91c660502a4 --- /dev/null +++ b/queue-3.9/e1000e-fix-numeric-overflow-in-phc-settime-method.patch @@ -0,0 +1,36 @@ +From 73e3dd6b45c4c870fc2641eb04c24e3f12dab1e0 Mon Sep 17 00:00:00 2001 +From: Richard Cochran +Date: Tue, 23 Apr 2013 01:56:34 +0000 +Subject: e1000e: fix numeric overflow in phc settime method + +From: Richard Cochran + +commit 73e3dd6b45c4c870fc2641eb04c24e3f12dab1e0 upstream. + +The PTP Hardware Clock settime function in the e1000e driver +computes nanoseconds from a struct timespec. The code converts the +seconds field .tv_sec by multiplying it with NSEC_PER_SEC. However, +both operands are of type long, resulting in an unintended overflow. +The patch fixes the issue by using the helper function from time.h. + +Signed-off-by: Richard Cochran +Tested-by: Aaron Brown +Signed-off-by: Jeff Kirsher +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/net/ethernet/intel/e1000e/ptp.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +--- a/drivers/net/ethernet/intel/e1000e/ptp.c ++++ b/drivers/net/ethernet/intel/e1000e/ptp.c +@@ -145,8 +145,7 @@ static int e1000e_phc_settime(struct ptp + unsigned long flags; + u64 ns; + +- ns = ts->tv_sec * NSEC_PER_SEC; +- ns += ts->tv_nsec; ++ ns = timespec_to_ns(ts); + + /* reset the timecounter */ + spin_lock_irqsave(&adapter->systim_lock, flags); diff --git a/queue-3.9/ext4-fix-big-endian-bug-in-metadata-checksum-calculations.patch b/queue-3.9/ext4-fix-big-endian-bug-in-metadata-checksum-calculations.patch new file mode 100644 index 00000000000..cd0c007f764 --- /dev/null +++ b/queue-3.9/ext4-fix-big-endian-bug-in-metadata-checksum-calculations.patch @@ -0,0 +1,57 @@ +From 171a7f21a76a0958c225b97c00a97a10390d40ee Mon Sep 17 00:00:00 2001 +From: Dmitry Monakhov +Date: Tue, 9 Apr 2013 23:56:48 -0400 +Subject: ext4: fix big-endian bug in metadata checksum calculations + +From: Dmitry Monakhov + +commit 171a7f21a76a0958c225b97c00a97a10390d40ee upstream. + +Signed-off-by: Dmitry Monakhov +Signed-off-by: "Theodore Ts'o" +Signed-off-by: Greg Kroah-Hartman + +--- + fs/ext4/inode.c | 8 ++++---- + fs/ext4/mmp.c | 2 +- + 2 files changed, 5 insertions(+), 5 deletions(-) + +--- a/fs/ext4/inode.c ++++ b/fs/ext4/inode.c +@@ -55,21 +55,21 @@ static __u32 ext4_inode_csum(struct inod + __u16 csum_hi = 0; + __u32 csum; + +- csum_lo = raw->i_checksum_lo; ++ csum_lo = le16_to_cpu(raw->i_checksum_lo); + raw->i_checksum_lo = 0; + if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE && + EXT4_FITS_IN_INODE(raw, ei, i_checksum_hi)) { +- csum_hi = raw->i_checksum_hi; ++ csum_hi = le16_to_cpu(raw->i_checksum_hi); + raw->i_checksum_hi = 0; + } + + csum = ext4_chksum(sbi, ei->i_csum_seed, (__u8 *)raw, + EXT4_INODE_SIZE(inode->i_sb)); + +- raw->i_checksum_lo = csum_lo; ++ raw->i_checksum_lo = cpu_to_le16(csum_lo); + if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE && + EXT4_FITS_IN_INODE(raw, ei, i_checksum_hi)) +- raw->i_checksum_hi = csum_hi; ++ raw->i_checksum_hi = cpu_to_le16(csum_hi); + + return csum; + } +--- a/fs/ext4/mmp.c ++++ b/fs/ext4/mmp.c +@@ -7,7 +7,7 @@ + #include "ext4.h" + + /* Checksumming functions */ +-static __u32 ext4_mmp_csum(struct super_block *sb, struct mmp_struct *mmp) ++static __le32 ext4_mmp_csum(struct super_block *sb, struct mmp_struct *mmp) + { + struct ext4_sb_info *sbi = EXT4_SB(sb); + int offset = offsetof(struct mmp_struct, mmp_checksum); diff --git a/queue-3.9/ext4-fix-journal-callback-list-traversal.patch b/queue-3.9/ext4-fix-journal-callback-list-traversal.patch new file mode 100644 index 00000000000..a43f7258118 --- /dev/null +++ b/queue-3.9/ext4-fix-journal-callback-list-traversal.patch @@ -0,0 +1,127 @@ +From 5d3ee20855e28169d711b394857ee608a5023094 Mon Sep 17 00:00:00 2001 +From: Dmitry Monakhov +Date: Wed, 3 Apr 2013 22:08:52 -0400 +Subject: ext4: fix journal callback list traversal + +From: Dmitry Monakhov + +commit 5d3ee20855e28169d711b394857ee608a5023094 upstream. + +It is incorrect to use list_for_each_entry_safe() for journal callback +traversial because ->next may be removed by other task: +->ext4_mb_free_metadata() + ->ext4_mb_free_metadata() + ->ext4_journal_callback_del() + +This results in the following issue: + +WARNING: at lib/list_debug.c:62 __list_del_entry+0x1c0/0x250() +Hardware name: +list_del corruption. prev->next should be ffff88019a4ec198, but was 6b6b6b6b6b6b6b6b +Modules linked in: cpufreq_ondemand acpi_cpufreq freq_table mperf coretemp kvm_intel kvm crc32c_intel ghash_clmulni_intel microcode sg xhci_hcd button sd_mod crc_t10dif aesni_intel ablk_helper cryptd lrw aes_x86_64 xts gf128mul ahci libahci pata_acpi ata_generic dm_mirror dm_region_hash dm_log dm_mod +Pid: 16400, comm: jbd2/dm-1-8 Tainted: G W 3.8.0-rc3+ #107 +Call Trace: + [] warn_slowpath_common+0xad/0xf0 + [] warn_slowpath_fmt+0x46/0x50 + [] ? ext4_journal_commit_callback+0x99/0xc0 + [] __list_del_entry+0x1c0/0x250 + [] ext4_journal_commit_callback+0x6f/0xc0 + [] jbd2_journal_commit_transaction+0x23a6/0x2570 + [] ? try_to_del_timer_sync+0x82/0xa0 + [] ? del_timer_sync+0x91/0x1e0 + [] kjournald2+0x19f/0x6a0 + [] ? wake_up_bit+0x40/0x40 + [] ? bit_spin_lock+0x80/0x80 + [] kthread+0x10e/0x120 + [] ? __init_kthread_worker+0x70/0x70 + [] ret_from_fork+0x7c/0xb0 + [] ? __init_kthread_worker+0x70/0x70 + +This patch fix the issue as follows: +- ext4_journal_commit_callback() make list truly traversial safe + simply by always starting from list_head +- fix race between two ext4_journal_callback_del() and + ext4_journal_callback_try_del() + +Signed-off-by: Dmitry Monakhov +Signed-off-by: "Theodore Ts'o" +Reviewed-by: Jan Kara +Signed-off-by: Greg Kroah-Hartman + +--- + fs/ext4/ext4_jbd2.h | 6 +++++- + fs/ext4/mballoc.c | 8 ++++---- + fs/ext4/super.c | 7 +++++-- + 3 files changed, 14 insertions(+), 7 deletions(-) + +--- a/fs/ext4/ext4_jbd2.h ++++ b/fs/ext4/ext4_jbd2.h +@@ -194,16 +194,20 @@ static inline void ext4_journal_callback + * ext4_journal_callback_del: delete a registered callback + * @handle: active journal transaction handle on which callback was registered + * @jce: registered journal callback entry to unregister ++ * Return true if object was sucessfully removed + */ +-static inline void ext4_journal_callback_del(handle_t *handle, ++static inline bool ext4_journal_callback_try_del(handle_t *handle, + struct ext4_journal_cb_entry *jce) + { ++ bool deleted; + struct ext4_sb_info *sbi = + EXT4_SB(handle->h_transaction->t_journal->j_private); + + spin_lock(&sbi->s_md_lock); ++ deleted = !list_empty(&jce->jce_list); + list_del_init(&jce->jce_list); + spin_unlock(&sbi->s_md_lock); ++ return deleted; + } + + int +--- a/fs/ext4/mballoc.c ++++ b/fs/ext4/mballoc.c +@@ -4420,11 +4420,11 @@ ext4_mb_free_metadata(handle_t *handle, + node = rb_prev(new_node); + if (node) { + entry = rb_entry(node, struct ext4_free_data, efd_node); +- if (can_merge(entry, new_entry)) { ++ if (can_merge(entry, new_entry) && ++ ext4_journal_callback_try_del(handle, &entry->efd_jce)) { + new_entry->efd_start_cluster = entry->efd_start_cluster; + new_entry->efd_count += entry->efd_count; + rb_erase(node, &(db->bb_free_root)); +- ext4_journal_callback_del(handle, &entry->efd_jce); + kmem_cache_free(ext4_free_data_cachep, entry); + } + } +@@ -4432,10 +4432,10 @@ ext4_mb_free_metadata(handle_t *handle, + node = rb_next(new_node); + if (node) { + entry = rb_entry(node, struct ext4_free_data, efd_node); +- if (can_merge(new_entry, entry)) { ++ if (can_merge(new_entry, entry) && ++ ext4_journal_callback_try_del(handle, &entry->efd_jce)) { + new_entry->efd_count += entry->efd_count; + rb_erase(node, &(db->bb_free_root)); +- ext4_journal_callback_del(handle, &entry->efd_jce); + kmem_cache_free(ext4_free_data_cachep, entry); + } + } +--- a/fs/ext4/super.c ++++ b/fs/ext4/super.c +@@ -353,10 +353,13 @@ static void ext4_journal_commit_callback + struct super_block *sb = journal->j_private; + struct ext4_sb_info *sbi = EXT4_SB(sb); + int error = is_journal_aborted(journal); +- struct ext4_journal_cb_entry *jce, *tmp; ++ struct ext4_journal_cb_entry *jce; + ++ BUG_ON(txn->t_state == T_FINISHED); + spin_lock(&sbi->s_md_lock); +- list_for_each_entry_safe(jce, tmp, &txn->t_private_list, jce_list) { ++ while (!list_empty(&txn->t_private_list)) { ++ jce = list_entry(txn->t_private_list.next, ++ struct ext4_journal_cb_entry, jce_list); + list_del_init(&jce->jce_list); + spin_unlock(&sbi->s_md_lock); + jce->jce_func(sb, jce, error); diff --git a/queue-3.9/ext4-fix-kconfig-documentation-for-config_ext4_debug.patch b/queue-3.9/ext4-fix-kconfig-documentation-for-config_ext4_debug.patch new file mode 100644 index 00000000000..61b8098a438 --- /dev/null +++ b/queue-3.9/ext4-fix-kconfig-documentation-for-config_ext4_debug.patch @@ -0,0 +1,29 @@ +From 7f3e3c7cfcec148ccca9c0dd2dbfd7b00b7ac10f Mon Sep 17 00:00:00 2001 +From: Theodore Ts'o +Date: Sun, 21 Apr 2013 20:32:03 -0400 +Subject: ext4: fix Kconfig documentation for CONFIG_EXT4_DEBUG + +From: Theodore Ts'o + +commit 7f3e3c7cfcec148ccca9c0dd2dbfd7b00b7ac10f upstream. + +Fox the Kconfig documentation for CONFIG_EXT4_DEBUG to match the +change made by commit a0b30c1229: ext4: use module parameters instead +of debugfs for mballoc_debug + +Signed-off-by: "Theodore Ts'o" +Signed-off-by: Greg Kroah-Hartman + +--- + fs/ext4/Kconfig | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/fs/ext4/Kconfig ++++ b/fs/ext4/Kconfig +@@ -71,4 +71,5 @@ config EXT4_DEBUG + Enables run-time debugging support for the ext4 filesystem. + + If you select Y here, then you will be able to turn on debugging +- with a command such as "echo 1 > /sys/kernel/debug/ext4/mballoc-debug" ++ with a command such as: ++ echo 1 > /sys/module/ext4/parameters/mballoc_debug diff --git a/queue-3.9/ext4-fix-online-resizing-for-ext3-compat-file-systems.patch b/queue-3.9/ext4-fix-online-resizing-for-ext3-compat-file-systems.patch new file mode 100644 index 00000000000..5caa45e944d --- /dev/null +++ b/queue-3.9/ext4-fix-online-resizing-for-ext3-compat-file-systems.patch @@ -0,0 +1,33 @@ +From c5c72d814cf0f650010337c73638b25e6d14d2d4 Mon Sep 17 00:00:00 2001 +From: Theodore Ts'o +Date: Sun, 21 Apr 2013 20:19:43 -0400 +Subject: ext4: fix online resizing for ext3-compat file systems + +From: Theodore Ts'o + +commit c5c72d814cf0f650010337c73638b25e6d14d2d4 upstream. + +Commit fb0a387dcdc restricts block allocations for indirect-mapped +files to block groups less than s_blockfile_groups. However, the +online resizing code wasn't setting s_blockfile_groups, so the newly +added block groups were not available for non-extent mapped files. + +Reported-by: Eric Sandeen +Signed-off-by: "Theodore Ts'o" +Signed-off-by: Greg Kroah-Hartman + +--- + fs/ext4/resize.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/fs/ext4/resize.c ++++ b/fs/ext4/resize.c +@@ -1341,6 +1341,8 @@ static void ext4_update_super(struct sup + + /* Update the global fs size fields */ + sbi->s_groups_count += flex_gd->count; ++ sbi->s_blockfile_groups = min_t(ext4_group_t, sbi->s_groups_count, ++ (EXT4_MAX_BLOCK_FILE_PHYS / EXT4_BLOCKS_PER_GROUP(sb))); + + /* Update the reserved block counts only once the new group is + * active. */ diff --git a/queue-3.9/ext4-jbd2-don-t-wait-forever-for-stale-tid-caused-by-wraparound.patch b/queue-3.9/ext4-jbd2-don-t-wait-forever-for-stale-tid-caused-by-wraparound.patch new file mode 100644 index 00000000000..11f59a1d2a3 --- /dev/null +++ b/queue-3.9/ext4-jbd2-don-t-wait-forever-for-stale-tid-caused-by-wraparound.patch @@ -0,0 +1,118 @@ +From d76a3a77113db020d9bb1e894822869410450bd9 Mon Sep 17 00:00:00 2001 +From: Theodore Ts'o +Date: Wed, 3 Apr 2013 22:02:52 -0400 +Subject: ext4/jbd2: don't wait (forever) for stale tid caused by wraparound + +From: Theodore Ts'o + +commit d76a3a77113db020d9bb1e894822869410450bd9 upstream. + +In the case where an inode has a very stale transaction id (tid) in +i_datasync_tid or i_sync_tid, it's possible that after a very large +(2**31) number of transactions, that the tid number space might wrap, +causing tid_geq()'s calculations to fail. + +Commit deeeaf13 "jbd2: fix fsync() tid wraparound bug", later modified +by commit e7b04ac0 "jbd2: don't wake kjournald unnecessarily", +attempted to fix this problem, but it only avoided kjournald spinning +forever by fixing the logic in jbd2_log_start_commit(). + +Unfortunately, in the codepaths in fs/ext4/fsync.c and fs/ext4/inode.c +that might call jbd2_log_start_commit() with a stale tid, those +functions will subsequently call jbd2_log_wait_commit() with the same +stale tid, and then wait for a very long time. To fix this, we +replace the calls to jbd2_log_start_commit() and +jbd2_log_wait_commit() with a call to a new function, +jbd2_complete_transaction(), which will correctly handle stale tid's. + +As a bonus, jbd2_complete_transaction() will avoid locking +j_state_lock for writing unless a commit needs to be started. This +should have a small (but probably not measurable) improvement for +ext4's scalability. + +Signed-off-by: "Theodore Ts'o" +Reported-by: Ben Hutchings +Reported-by: George Barnett +Signed-off-by: Greg Kroah-Hartman + +--- + fs/ext4/fsync.c | 3 +-- + fs/ext4/inode.c | 3 +-- + fs/jbd2/journal.c | 31 +++++++++++++++++++++++++++++++ + include/linux/jbd2.h | 1 + + 4 files changed, 34 insertions(+), 4 deletions(-) + +--- a/fs/ext4/fsync.c ++++ b/fs/ext4/fsync.c +@@ -166,8 +166,7 @@ int ext4_sync_file(struct file *file, lo + if (journal->j_flags & JBD2_BARRIER && + !jbd2_trans_will_send_data_barrier(journal, commit_tid)) + needs_barrier = true; +- jbd2_log_start_commit(journal, commit_tid); +- ret = jbd2_log_wait_commit(journal, commit_tid); ++ ret = jbd2_complete_transaction(journal, commit_tid); + if (needs_barrier) { + err = blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL); + if (!ret) +--- a/fs/ext4/inode.c ++++ b/fs/ext4/inode.c +@@ -210,8 +210,7 @@ void ext4_evict_inode(struct inode *inod + journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; + tid_t commit_tid = EXT4_I(inode)->i_datasync_tid; + +- jbd2_log_start_commit(journal, commit_tid); +- jbd2_log_wait_commit(journal, commit_tid); ++ jbd2_complete_transaction(journal, commit_tid); + filemap_write_and_wait(&inode->i_data); + } + truncate_inode_pages(&inode->i_data, 0); +--- a/fs/jbd2/journal.c ++++ b/fs/jbd2/journal.c +@@ -710,6 +710,37 @@ int jbd2_log_wait_commit(journal_t *jour + } + + /* ++ * When this function returns the transaction corresponding to tid ++ * will be completed. If the transaction has currently running, start ++ * committing that transaction before waiting for it to complete. If ++ * the transaction id is stale, it is by definition already completed, ++ * so just return SUCCESS. ++ */ ++int jbd2_complete_transaction(journal_t *journal, tid_t tid) ++{ ++ int need_to_wait = 1; ++ ++ read_lock(&journal->j_state_lock); ++ if (journal->j_running_transaction && ++ journal->j_running_transaction->t_tid == tid) { ++ if (journal->j_commit_request != tid) { ++ /* transaction not yet started, so request it */ ++ read_unlock(&journal->j_state_lock); ++ jbd2_log_start_commit(journal, tid); ++ goto wait_commit; ++ } ++ } else if (!(journal->j_committing_transaction && ++ journal->j_committing_transaction->t_tid == tid)) ++ need_to_wait = 0; ++ read_unlock(&journal->j_state_lock); ++ if (!need_to_wait) ++ return 0; ++wait_commit: ++ return jbd2_log_wait_commit(journal, tid); ++} ++EXPORT_SYMBOL(jbd2_complete_transaction); ++ ++/* + * Log buffer allocation routines: + */ + +--- a/include/linux/jbd2.h ++++ b/include/linux/jbd2.h +@@ -1200,6 +1200,7 @@ int __jbd2_log_start_commit(journal_t *j + int jbd2_journal_start_commit(journal_t *journal, tid_t *tid); + int jbd2_journal_force_commit_nested(journal_t *journal); + int jbd2_log_wait_commit(journal_t *journal, tid_t tid); ++int jbd2_complete_transaction(journal_t *journal, tid_t tid); + int jbd2_log_do_checkpoint(journal_t *journal); + int jbd2_trans_will_send_data_barrier(journal_t *journal, tid_t tid); + diff --git a/queue-3.9/ext4-unregister-es_shrinker-if-mount-failed.patch b/queue-3.9/ext4-unregister-es_shrinker-if-mount-failed.patch new file mode 100644 index 00000000000..8c80dbf43e0 --- /dev/null +++ b/queue-3.9/ext4-unregister-es_shrinker-if-mount-failed.patch @@ -0,0 +1,91 @@ +From a75ae78f087f933ab3432e98bb4dbbf2196cf6d5 Mon Sep 17 00:00:00 2001 +From: Dmitry Monakhov +Date: Wed, 3 Apr 2013 22:10:52 -0400 +Subject: ext4: unregister es_shrinker if mount failed + +From: Dmitry Monakhov + +commit a75ae78f087f933ab3432e98bb4dbbf2196cf6d5 upstream. + +Otherwise destroyed ext_sb_info will be part of global shinker list +and result in the following OOPS: + +JBD2: corrupted journal superblock +JBD2: recovery failed +EXT4-fs (dm-2): error loading journal +general protection fault: 0000 [#1] SMP +Modules linked in: fuse acpi_cpufreq freq_table mperf coretemp kvm_intel kvm crc32c_intel microcode sg button sd_mod crc_t10dif ahci libahci pata_acpi ata_generic dm_mirror dm_region_hash dm_log dm_\ +mod +CPU 1 +Pid: 2758, comm: mount Not tainted 3.8.0-rc3+ #136 /DH55TC +RIP: 0010:[] [] unregister_shrinker+0xad/0xe0 +RSP: 0000:ffff88011d5cbcd8 EFLAGS: 00010207 +RAX: 6b6b6b6b6b6b6b6b RBX: 6b6b6b6b6b6b6b53 RCX: 0000000000000006 +RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000246 +RBP: ffff88011d5cbce8 R08: 0000000000000002 R09: 0000000000000001 +R10: 0000000000000001 R11: 0000000000000000 R12: ffff88011cd3f848 +R13: ffff88011cd3f830 R14: ffff88011cd3f000 R15: 0000000000000000 +FS: 00007f7b721dd7e0(0000) GS:ffff880121a00000(0000) knlGS:0000000000000000 +CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b +CR2: 00007fffa6f75038 CR3: 000000011bc1c000 CR4: 00000000000007e0 +DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 +DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 +Process mount (pid: 2758, threadinfo ffff88011d5ca000, task ffff880116aacb80) +Stack: +ffff88011cd3f000 ffffffff8209b6c0 ffff88011d5cbd18 ffffffff812482f1 +00000000000003f3 00000000ffffffea ffff880115f4c200 0000000000000000 +ffff88011d5cbda8 ffffffff81249381 ffff8801219d8bf8 ffffffff00000000 +Call Trace: +[] deactivate_locked_super+0x91/0xb0 +[] mount_bdev+0x331/0x340 +[] ? ext4_alloc_flex_bg_array+0x180/0x180 +[] ext4_mount+0x15/0x20 +[] mount_fs+0x9a/0x2e0 +[] vfs_kern_mount+0xc5/0x170 +[] do_new_mount+0x172/0x2e0 +[] do_mount+0x376/0x380 +[] sys_mount+0x138/0x150 +[] system_call_fastpath+0x16/0x1b +Code: 8b 05 88 04 eb 00 48 3d 90 ff 06 82 48 8d 58 e8 75 19 4c 89 e7 e8 e4 d7 2c 00 48 c7 c7 00 ff 06 82 e8 58 5f ef ff 5b 41 5c c9 c3 <48> 8b 4b 18 48 8b 73 20 48 89 da 31 c0 48 c7 c7 c5 a0 e4 81 e\ +8 +RIP [] unregister_shrinker+0xad/0xe0 +RSP + +Signed-off-by: Dmitry Monakhov +Signed-off-by: "Theodore Ts'o" +Signed-off-by: Greg Kroah-Hartman + +--- + fs/ext4/super.c | 7 ++++--- + 1 file changed, 4 insertions(+), 3 deletions(-) + +--- a/fs/ext4/super.c ++++ b/fs/ext4/super.c +@@ -3701,6 +3701,9 @@ static int ext4_fill_super(struct super_ + sbi->s_err_report.function = print_daily_error_info; + sbi->s_err_report.data = (unsigned long) sb; + ++ /* Register extent status tree shrinker */ ++ ext4_es_register_shrinker(sb); ++ + err = percpu_counter_init(&sbi->s_freeclusters_counter, + ext4_count_free_clusters(sb)); + if (!err) { +@@ -3726,9 +3729,6 @@ static int ext4_fill_super(struct super_ + sbi->s_max_writeback_mb_bump = 128; + sbi->s_extent_max_zeroout_kb = 32; + +- /* Register extent status tree shrinker */ +- ext4_es_register_shrinker(sb); +- + /* + * set up enough so that it can read an inode + */ +@@ -4013,6 +4013,7 @@ failed_mount_wq: + sbi->s_journal = NULL; + } + failed_mount3: ++ ext4_es_unregister_shrinker(sb); + del_timer(&sbi->s_err_report); + if (sbi->s_flex_groups) + ext4_kvfree(sbi->s_flex_groups); diff --git a/queue-3.9/ixgbe-fix-eicr-write-in-ixgbe_msix_other.patch b/queue-3.9/ixgbe-fix-eicr-write-in-ixgbe_msix_other.patch new file mode 100644 index 00000000000..8eb7c44f38b --- /dev/null +++ b/queue-3.9/ixgbe-fix-eicr-write-in-ixgbe_msix_other.patch @@ -0,0 +1,46 @@ +From d87d830720a1446403ed38bfc2da268be0d356d1 Mon Sep 17 00:00:00 2001 +From: Jacob Keller +Date: Sat, 2 Mar 2013 07:51:42 +0000 +Subject: ixgbe: fix EICR write in ixgbe_msix_other + +From: Jacob Keller + +commit d87d830720a1446403ed38bfc2da268be0d356d1 upstream. + +Previously, the ixgbe_msix_other was writing the full 32bits of the set +interrupts, instead of only the ones which the ixgbe_msix_other is +handling. This resulted in a loss of performance when the X540's PPS feature is +enabled due to sometimes clearing queue interrupts which resulted in the driver +not getting the interrupt for cleaning the q_vector rings often enough. The fix +is to simply mask the lower 16bits off so that this handler does not write them +in the EICR, which causes them to remain high and be properly handled by the +clean_rings interrupt routine as normal. + +Signed-off-by: Jacob Keller +Tested-by: Phil Schmitt +Signed-off-by: Jeff Kirsher +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 10 ++++++++++ + 1 file changed, 10 insertions(+) + +--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c ++++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +@@ -2405,6 +2405,16 @@ static irqreturn_t ixgbe_msix_other(int + * with the write to EICR. + */ + eicr = IXGBE_READ_REG(hw, IXGBE_EICS); ++ ++ /* The lower 16bits of the EICR register are for the queue interrupts ++ * which should be masked here in order to not accidently clear them if ++ * the bits are high when ixgbe_msix_other is called. There is a race ++ * condition otherwise which results in possible performance loss ++ * especially if the ixgbe_msix_other interrupt is triggering ++ * consistently (as it would when PPS is turned on for the X540 device) ++ */ ++ eicr &= 0xFFFF0000; ++ + IXGBE_WRITE_REG(hw, IXGBE_EICR, eicr); + + if (eicr & IXGBE_EICR_LSC) diff --git a/queue-3.9/jbd2-fix-race-between-jbd2_journal_remove_checkpoint-and-j_commit_callback.patch b/queue-3.9/jbd2-fix-race-between-jbd2_journal_remove_checkpoint-and-j_commit_callback.patch new file mode 100644 index 00000000000..4be9269605f --- /dev/null +++ b/queue-3.9/jbd2-fix-race-between-jbd2_journal_remove_checkpoint-and-j_commit_callback.patch @@ -0,0 +1,154 @@ +From 794446c6946513c684d448205fbd76fa35f38b72 Mon Sep 17 00:00:00 2001 +From: Dmitry Monakhov +Date: Wed, 3 Apr 2013 22:06:52 -0400 +Subject: jbd2: fix race between jbd2_journal_remove_checkpoint and ->j_commit_callback + +From: Dmitry Monakhov + +commit 794446c6946513c684d448205fbd76fa35f38b72 upstream. + +The following race is possible: + +[kjournald2] other_task +jbd2_journal_commit_transaction() + j_state = T_FINISHED; + spin_unlock(&journal->j_list_lock); + ->jbd2_journal_remove_checkpoint() + ->jbd2_journal_free_transaction(); + ->kmem_cache_free(transaction) + ->j_commit_callback(journal, transaction); + -> USE_AFTER_FREE + +WARNING: at lib/list_debug.c:62 __list_del_entry+0x1c0/0x250() +Hardware name: +list_del corruption. prev->next should be ffff88019a4ec198, but was 6b6b6b6b6b6b6b6b +Modules linked in: cpufreq_ondemand acpi_cpufreq freq_table mperf coretemp kvm_intel kvm crc32c_intel ghash_clmulni_intel microcode sg xhci_hcd button sd_mod crc_t10dif aesni_intel ablk_helper cryptd lrw aes_x86_64 xts gf128mul ahci libahci pata_acpi ata_generic dm_mirror dm_region_hash dm_log dm_mod +Pid: 16400, comm: jbd2/dm-1-8 Tainted: G W 3.8.0-rc3+ #107 +Call Trace: + [] warn_slowpath_common+0xad/0xf0 + [] warn_slowpath_fmt+0x46/0x50 + [] ? ext4_journal_commit_callback+0x99/0xc0 + [] __list_del_entry+0x1c0/0x250 + [] ext4_journal_commit_callback+0x6f/0xc0 + [] jbd2_journal_commit_transaction+0x23a6/0x2570 + [] ? try_to_del_timer_sync+0x82/0xa0 + [] ? del_timer_sync+0x91/0x1e0 + [] kjournald2+0x19f/0x6a0 + [] ? wake_up_bit+0x40/0x40 + [] ? bit_spin_lock+0x80/0x80 + [] kthread+0x10e/0x120 + [] ? __init_kthread_worker+0x70/0x70 + [] ret_from_fork+0x7c/0xb0 + [] ? __init_kthread_worker+0x70/0x70 + +In order to demonstrace this issue one should mount ext4 with mount -o +discard option on SSD disk. This makes callback longer and race +window becomes wider. + +In order to fix this we should mark transaction as finished only after +callbacks have completed + +Signed-off-by: Dmitry Monakhov +Signed-off-by: "Theodore Ts'o" +Signed-off-by: Greg Kroah-Hartman + +--- + fs/jbd2/commit.c | 50 ++++++++++++++++++++++++++++---------------------- + include/linux/jbd2.h | 1 + + 2 files changed, 29 insertions(+), 22 deletions(-) + +--- a/fs/jbd2/commit.c ++++ b/fs/jbd2/commit.c +@@ -382,7 +382,7 @@ void jbd2_journal_commit_transaction(jou + int space_left = 0; + int first_tag = 0; + int tag_flag; +- int i, to_free = 0; ++ int i; + int tag_bytes = journal_tag_bytes(journal); + struct buffer_head *cbh = NULL; /* For transactional checksums */ + __u32 crc32_sum = ~0; +@@ -1134,7 +1134,7 @@ restart_loop: + journal->j_stats.run.rs_blocks_logged += stats.run.rs_blocks_logged; + spin_unlock(&journal->j_history_lock); + +- commit_transaction->t_state = T_FINISHED; ++ commit_transaction->t_state = T_COMMIT_CALLBACK; + J_ASSERT(commit_transaction == journal->j_committing_transaction); + journal->j_commit_sequence = commit_transaction->t_tid; + journal->j_committing_transaction = NULL; +@@ -1149,38 +1149,44 @@ restart_loop: + journal->j_average_commit_time*3) / 4; + else + journal->j_average_commit_time = commit_time; ++ + write_unlock(&journal->j_state_lock); + +- if (commit_transaction->t_checkpoint_list == NULL && +- commit_transaction->t_checkpoint_io_list == NULL) { +- __jbd2_journal_drop_transaction(journal, commit_transaction); +- to_free = 1; ++ if (journal->j_checkpoint_transactions == NULL) { ++ journal->j_checkpoint_transactions = commit_transaction; ++ commit_transaction->t_cpnext = commit_transaction; ++ commit_transaction->t_cpprev = commit_transaction; + } else { +- if (journal->j_checkpoint_transactions == NULL) { +- journal->j_checkpoint_transactions = commit_transaction; +- commit_transaction->t_cpnext = commit_transaction; +- commit_transaction->t_cpprev = commit_transaction; +- } else { +- commit_transaction->t_cpnext = +- journal->j_checkpoint_transactions; +- commit_transaction->t_cpprev = +- commit_transaction->t_cpnext->t_cpprev; +- commit_transaction->t_cpnext->t_cpprev = +- commit_transaction; +- commit_transaction->t_cpprev->t_cpnext = ++ commit_transaction->t_cpnext = ++ journal->j_checkpoint_transactions; ++ commit_transaction->t_cpprev = ++ commit_transaction->t_cpnext->t_cpprev; ++ commit_transaction->t_cpnext->t_cpprev = ++ commit_transaction; ++ commit_transaction->t_cpprev->t_cpnext = + commit_transaction; +- } + } + spin_unlock(&journal->j_list_lock); +- ++ /* Drop all spin_locks because commit_callback may be block. ++ * __journal_remove_checkpoint() can not destroy transaction ++ * under us because it is not marked as T_FINISHED yet */ + if (journal->j_commit_callback) + journal->j_commit_callback(journal, commit_transaction); + + trace_jbd2_end_commit(journal, commit_transaction); + jbd_debug(1, "JBD2: commit %d complete, head %d\n", + journal->j_commit_sequence, journal->j_tail_sequence); +- if (to_free) +- jbd2_journal_free_transaction(commit_transaction); + ++ write_lock(&journal->j_state_lock); ++ spin_lock(&journal->j_list_lock); ++ commit_transaction->t_state = T_FINISHED; ++ /* Recheck checkpoint lists after j_list_lock was dropped */ ++ if (commit_transaction->t_checkpoint_list == NULL && ++ commit_transaction->t_checkpoint_io_list == NULL) { ++ __jbd2_journal_drop_transaction(journal, commit_transaction); ++ jbd2_journal_free_transaction(commit_transaction); ++ } ++ spin_unlock(&journal->j_list_lock); ++ write_unlock(&journal->j_state_lock); + wake_up(&journal->j_wait_done_commit); + } +--- a/include/linux/jbd2.h ++++ b/include/linux/jbd2.h +@@ -480,6 +480,7 @@ struct transaction_s + T_COMMIT, + T_COMMIT_DFLUSH, + T_COMMIT_JFLUSH, ++ T_COMMIT_CALLBACK, + T_FINISHED + } t_state; + diff --git a/queue-3.9/series b/queue-3.9/series index 2586b4ef074..6a44b60a3ce 100644 --- a/queue-3.9/series +++ b/queue-3.9/series @@ -94,3 +94,13 @@ nfsd-use-kmem_cache_free-instead-of-kfree.patch nfsd-decode-and-send-64bit-time-values.patch wireless-regulatory-fix-channel-disabling-race-condition.patch ipc-sysv-shared-memory-limited-to-8tib.patch +ixgbe-fix-eicr-write-in-ixgbe_msix_other.patch +e1000e-fix-numeric-overflow-in-phc-settime-method.patch +x86-64-init-do-not-set-nx-bits-on-non-nx-capable-hardware.patch +ext4-jbd2-don-t-wait-forever-for-stale-tid-caused-by-wraparound.patch +jbd2-fix-race-between-jbd2_journal_remove_checkpoint-and-j_commit_callback.patch +ext4-fix-journal-callback-list-traversal.patch +ext4-unregister-es_shrinker-if-mount-failed.patch +ext4-fix-big-endian-bug-in-metadata-checksum-calculations.patch +ext4-fix-online-resizing-for-ext3-compat-file-systems.patch +ext4-fix-kconfig-documentation-for-config_ext4_debug.patch diff --git a/queue-3.9/x86-64-init-do-not-set-nx-bits-on-non-nx-capable-hardware.patch b/queue-3.9/x86-64-init-do-not-set-nx-bits-on-non-nx-capable-hardware.patch new file mode 100644 index 00000000000..55d5732cef9 --- /dev/null +++ b/queue-3.9/x86-64-init-do-not-set-nx-bits-on-non-nx-capable-hardware.patch @@ -0,0 +1,54 @@ +From 78d77df71510a96e042de7ba6dbd7998103642cb Mon Sep 17 00:00:00 2001 +From: "H. Peter Anvin" +Date: Thu, 2 May 2013 10:33:46 -0700 +Subject: x86-64, init: Do not set NX bits on non-NX capable hardware + +From: "H. Peter Anvin" + +commit 78d77df71510a96e042de7ba6dbd7998103642cb upstream. + +During early init, we would incorrectly set the NX bit even if the NX +feature was not supported. Instead, only set this bit if NX is +actually available and enabled. We already do very early detection of +the NX bit to enable it in EFER, this simply extends this detection to +the early page table mask. + +Reported-by: Fernando Luis Vázquez Cao +Signed-off-by: H. Peter Anvin +Link: http://lkml.kernel.org/r/1367476850.5660.2.camel@nexus +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/kernel/head64.c | 3 ++- + arch/x86/kernel/head_64.S | 1 + + 2 files changed, 3 insertions(+), 1 deletion(-) + +--- a/arch/x86/kernel/head64.c ++++ b/arch/x86/kernel/head64.c +@@ -34,6 +34,7 @@ + extern pgd_t early_level4_pgt[PTRS_PER_PGD]; + extern pmd_t early_dynamic_pgts[EARLY_DYNAMIC_PAGE_TABLES][PTRS_PER_PMD]; + static unsigned int __initdata next_early_pgt = 2; ++pmdval_t __initdata early_pmd_flags = __PAGE_KERNEL_LARGE & ~(_PAGE_GLOBAL | _PAGE_NX); + + /* Wipe all early page tables except for the kernel symbol map */ + static void __init reset_early_page_tables(void) +@@ -99,7 +100,7 @@ again: + pmd_p[i] = 0; + *pud_p = (pudval_t)pmd_p - __START_KERNEL_map + phys_base + _KERNPG_TABLE; + } +- pmd = (physaddr & PMD_MASK) + (__PAGE_KERNEL_LARGE & ~_PAGE_GLOBAL); ++ pmd = (physaddr & PMD_MASK) + early_pmd_flags; + pmd_p[pmd_index(address)] = pmd; + + return 0; +--- a/arch/x86/kernel/head_64.S ++++ b/arch/x86/kernel/head_64.S +@@ -200,6 +200,7 @@ ENTRY(secondary_startup_64) + btl $20,%edi /* No Execute supported? */ + jnc 1f + btsl $_EFER_NX, %eax ++ btsq $_PAGE_BIT_NX,early_pmd_flags(%rip) + 1: wrmsr /* Make changes effective */ + + /* Setup cr0 */