From: Greg Kroah-Hartman Date: Fri, 3 May 2013 23:16:01 +0000 (-0700) Subject: 3.4-stable patches X-Git-Tag: v3.0.77~14 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=033b09185a272f29c2fe197755f78a6c45e605ca;p=thirdparty%2Fkernel%2Fstable-queue.git 3.4-stable patches added patches: ext4-fix-journal-callback-list-traversal.patch ext4-fix-kconfig-documentation-for-config_ext4_debug.patch ext4-fix-online-resizing-for-ext3-compat-file-systems.patch ixgbe-fix-eicr-write-in-ixgbe_msix_other.patch jbd2-fix-race-between-jbd2_journal_remove_checkpoint-and-j_commit_callback.patch --- diff --git a/queue-3.4/ext4-fix-journal-callback-list-traversal.patch b/queue-3.4/ext4-fix-journal-callback-list-traversal.patch new file mode 100644 index 00000000000..8d20ea034c9 --- /dev/null +++ b/queue-3.4/ext4-fix-journal-callback-list-traversal.patch @@ -0,0 +1,127 @@ +From 5d3ee20855e28169d711b394857ee608a5023094 Mon Sep 17 00:00:00 2001 +From: Dmitry Monakhov +Date: Wed, 3 Apr 2013 22:08:52 -0400 +Subject: ext4: fix journal callback list traversal + +From: Dmitry Monakhov + +commit 5d3ee20855e28169d711b394857ee608a5023094 upstream. + +It is incorrect to use list_for_each_entry_safe() for journal callback +traversial because ->next may be removed by other task: +->ext4_mb_free_metadata() + ->ext4_mb_free_metadata() + ->ext4_journal_callback_del() + +This results in the following issue: + +WARNING: at lib/list_debug.c:62 __list_del_entry+0x1c0/0x250() +Hardware name: +list_del corruption. prev->next should be ffff88019a4ec198, but was 6b6b6b6b6b6b6b6b +Modules linked in: cpufreq_ondemand acpi_cpufreq freq_table mperf coretemp kvm_intel kvm crc32c_intel ghash_clmulni_intel microcode sg xhci_hcd button sd_mod crc_t10dif aesni_intel ablk_helper cryptd lrw aes_x86_64 xts gf128mul ahci libahci pata_acpi ata_generic dm_mirror dm_region_hash dm_log dm_mod +Pid: 16400, comm: jbd2/dm-1-8 Tainted: G W 3.8.0-rc3+ #107 +Call Trace: + [] warn_slowpath_common+0xad/0xf0 + [] warn_slowpath_fmt+0x46/0x50 + [] ? ext4_journal_commit_callback+0x99/0xc0 + [] __list_del_entry+0x1c0/0x250 + [] ext4_journal_commit_callback+0x6f/0xc0 + [] jbd2_journal_commit_transaction+0x23a6/0x2570 + [] ? try_to_del_timer_sync+0x82/0xa0 + [] ? del_timer_sync+0x91/0x1e0 + [] kjournald2+0x19f/0x6a0 + [] ? wake_up_bit+0x40/0x40 + [] ? bit_spin_lock+0x80/0x80 + [] kthread+0x10e/0x120 + [] ? __init_kthread_worker+0x70/0x70 + [] ret_from_fork+0x7c/0xb0 + [] ? __init_kthread_worker+0x70/0x70 + +This patch fix the issue as follows: +- ext4_journal_commit_callback() make list truly traversial safe + simply by always starting from list_head +- fix race between two ext4_journal_callback_del() and + ext4_journal_callback_try_del() + +Signed-off-by: Dmitry Monakhov +Signed-off-by: "Theodore Ts'o" +Reviewed-by: Jan Kara +Signed-off-by: Greg Kroah-Hartman + +--- + fs/ext4/ext4_jbd2.h | 6 +++++- + fs/ext4/mballoc.c | 8 ++++---- + fs/ext4/super.c | 7 +++++-- + 3 files changed, 14 insertions(+), 7 deletions(-) + +--- a/fs/ext4/ext4_jbd2.h ++++ b/fs/ext4/ext4_jbd2.h +@@ -164,16 +164,20 @@ static inline void ext4_journal_callback + * ext4_journal_callback_del: delete a registered callback + * @handle: active journal transaction handle on which callback was registered + * @jce: registered journal callback entry to unregister ++ * Return true if object was sucessfully removed + */ +-static inline void ext4_journal_callback_del(handle_t *handle, ++static inline bool ext4_journal_callback_try_del(handle_t *handle, + struct ext4_journal_cb_entry *jce) + { ++ bool deleted; + struct ext4_sb_info *sbi = + EXT4_SB(handle->h_transaction->t_journal->j_private); + + spin_lock(&sbi->s_md_lock); ++ deleted = !list_empty(&jce->jce_list); + list_del_init(&jce->jce_list); + spin_unlock(&sbi->s_md_lock); ++ return deleted; + } + + int +--- a/fs/ext4/mballoc.c ++++ b/fs/ext4/mballoc.c +@@ -4436,11 +4436,11 @@ ext4_mb_free_metadata(handle_t *handle, + node = rb_prev(new_node); + if (node) { + entry = rb_entry(node, struct ext4_free_data, efd_node); +- if (can_merge(entry, new_entry)) { ++ if (can_merge(entry, new_entry) && ++ ext4_journal_callback_try_del(handle, &entry->efd_jce)) { + new_entry->efd_start_cluster = entry->efd_start_cluster; + new_entry->efd_count += entry->efd_count; + rb_erase(node, &(db->bb_free_root)); +- ext4_journal_callback_del(handle, &entry->efd_jce); + kmem_cache_free(ext4_free_data_cachep, entry); + } + } +@@ -4448,10 +4448,10 @@ ext4_mb_free_metadata(handle_t *handle, + node = rb_next(new_node); + if (node) { + entry = rb_entry(node, struct ext4_free_data, efd_node); +- if (can_merge(new_entry, entry)) { ++ if (can_merge(new_entry, entry) && ++ ext4_journal_callback_try_del(handle, &entry->efd_jce)) { + new_entry->efd_count += entry->efd_count; + rb_erase(node, &(db->bb_free_root)); +- ext4_journal_callback_del(handle, &entry->efd_jce); + kmem_cache_free(ext4_free_data_cachep, entry); + } + } +--- a/fs/ext4/super.c ++++ b/fs/ext4/super.c +@@ -437,10 +437,13 @@ static void ext4_journal_commit_callback + struct super_block *sb = journal->j_private; + struct ext4_sb_info *sbi = EXT4_SB(sb); + int error = is_journal_aborted(journal); +- struct ext4_journal_cb_entry *jce, *tmp; ++ struct ext4_journal_cb_entry *jce; + ++ BUG_ON(txn->t_state == T_FINISHED); + spin_lock(&sbi->s_md_lock); +- list_for_each_entry_safe(jce, tmp, &txn->t_private_list, jce_list) { ++ while (!list_empty(&txn->t_private_list)) { ++ jce = list_entry(txn->t_private_list.next, ++ struct ext4_journal_cb_entry, jce_list); + list_del_init(&jce->jce_list); + spin_unlock(&sbi->s_md_lock); + jce->jce_func(sb, jce, error); diff --git a/queue-3.4/ext4-fix-kconfig-documentation-for-config_ext4_debug.patch b/queue-3.4/ext4-fix-kconfig-documentation-for-config_ext4_debug.patch new file mode 100644 index 00000000000..f6e6c69c58a --- /dev/null +++ b/queue-3.4/ext4-fix-kconfig-documentation-for-config_ext4_debug.patch @@ -0,0 +1,29 @@ +From 7f3e3c7cfcec148ccca9c0dd2dbfd7b00b7ac10f Mon Sep 17 00:00:00 2001 +From: Theodore Ts'o +Date: Sun, 21 Apr 2013 20:32:03 -0400 +Subject: ext4: fix Kconfig documentation for CONFIG_EXT4_DEBUG + +From: Theodore Ts'o + +commit 7f3e3c7cfcec148ccca9c0dd2dbfd7b00b7ac10f upstream. + +Fox the Kconfig documentation for CONFIG_EXT4_DEBUG to match the +change made by commit a0b30c1229: ext4: use module parameters instead +of debugfs for mballoc_debug + +Signed-off-by: "Theodore Ts'o" +Signed-off-by: Greg Kroah-Hartman + +--- + fs/ext4/Kconfig | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/fs/ext4/Kconfig ++++ b/fs/ext4/Kconfig +@@ -82,4 +82,5 @@ config EXT4_DEBUG + Enables run-time debugging support for the ext4 filesystem. + + If you select Y here, then you will be able to turn on debugging +- with a command such as "echo 1 > /sys/kernel/debug/ext4/mballoc-debug" ++ with a command such as: ++ echo 1 > /sys/module/ext4/parameters/mballoc_debug diff --git a/queue-3.4/ext4-fix-online-resizing-for-ext3-compat-file-systems.patch b/queue-3.4/ext4-fix-online-resizing-for-ext3-compat-file-systems.patch new file mode 100644 index 00000000000..9fbb6270569 --- /dev/null +++ b/queue-3.4/ext4-fix-online-resizing-for-ext3-compat-file-systems.patch @@ -0,0 +1,33 @@ +From c5c72d814cf0f650010337c73638b25e6d14d2d4 Mon Sep 17 00:00:00 2001 +From: Theodore Ts'o +Date: Sun, 21 Apr 2013 20:19:43 -0400 +Subject: ext4: fix online resizing for ext3-compat file systems + +From: Theodore Ts'o + +commit c5c72d814cf0f650010337c73638b25e6d14d2d4 upstream. + +Commit fb0a387dcdc restricts block allocations for indirect-mapped +files to block groups less than s_blockfile_groups. However, the +online resizing code wasn't setting s_blockfile_groups, so the newly +added block groups were not available for non-extent mapped files. + +Reported-by: Eric Sandeen +Signed-off-by: "Theodore Ts'o" +Signed-off-by: Greg Kroah-Hartman + +--- + fs/ext4/resize.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/fs/ext4/resize.c ++++ b/fs/ext4/resize.c +@@ -1202,6 +1202,8 @@ static void ext4_update_super(struct sup + + /* Update the global fs size fields */ + sbi->s_groups_count += flex_gd->count; ++ sbi->s_blockfile_groups = min_t(ext4_group_t, sbi->s_groups_count, ++ (EXT4_MAX_BLOCK_FILE_PHYS / EXT4_BLOCKS_PER_GROUP(sb))); + + /* Update the reserved block counts only once the new group is + * active. */ diff --git a/queue-3.4/ixgbe-fix-eicr-write-in-ixgbe_msix_other.patch b/queue-3.4/ixgbe-fix-eicr-write-in-ixgbe_msix_other.patch new file mode 100644 index 00000000000..857b51293d7 --- /dev/null +++ b/queue-3.4/ixgbe-fix-eicr-write-in-ixgbe_msix_other.patch @@ -0,0 +1,46 @@ +From d87d830720a1446403ed38bfc2da268be0d356d1 Mon Sep 17 00:00:00 2001 +From: Jacob Keller +Date: Sat, 2 Mar 2013 07:51:42 +0000 +Subject: ixgbe: fix EICR write in ixgbe_msix_other + +From: Jacob Keller + +commit d87d830720a1446403ed38bfc2da268be0d356d1 upstream. + +Previously, the ixgbe_msix_other was writing the full 32bits of the set +interrupts, instead of only the ones which the ixgbe_msix_other is +handling. This resulted in a loss of performance when the X540's PPS feature is +enabled due to sometimes clearing queue interrupts which resulted in the driver +not getting the interrupt for cleaning the q_vector rings often enough. The fix +is to simply mask the lower 16bits off so that this handler does not write them +in the EICR, which causes them to remain high and be properly handled by the +clean_rings interrupt routine as normal. + +Signed-off-by: Jacob Keller +Tested-by: Phil Schmitt +Signed-off-by: Jeff Kirsher +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 10 ++++++++++ + 1 file changed, 10 insertions(+) + +--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c ++++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +@@ -2263,6 +2263,16 @@ static irqreturn_t ixgbe_msix_other(int + * with the write to EICR. + */ + eicr = IXGBE_READ_REG(hw, IXGBE_EICS); ++ ++ /* The lower 16bits of the EICR register are for the queue interrupts ++ * which should be masked here in order to not accidently clear them if ++ * the bits are high when ixgbe_msix_other is called. There is a race ++ * condition otherwise which results in possible performance loss ++ * especially if the ixgbe_msix_other interrupt is triggering ++ * consistently (as it would when PPS is turned on for the X540 device) ++ */ ++ eicr &= 0xFFFF0000; ++ + IXGBE_WRITE_REG(hw, IXGBE_EICR, eicr); + + if (eicr & IXGBE_EICR_LSC) diff --git a/queue-3.4/jbd2-fix-race-between-jbd2_journal_remove_checkpoint-and-j_commit_callback.patch b/queue-3.4/jbd2-fix-race-between-jbd2_journal_remove_checkpoint-and-j_commit_callback.patch new file mode 100644 index 00000000000..37d99e1533b --- /dev/null +++ b/queue-3.4/jbd2-fix-race-between-jbd2_journal_remove_checkpoint-and-j_commit_callback.patch @@ -0,0 +1,154 @@ +From 794446c6946513c684d448205fbd76fa35f38b72 Mon Sep 17 00:00:00 2001 +From: Dmitry Monakhov +Date: Wed, 3 Apr 2013 22:06:52 -0400 +Subject: jbd2: fix race between jbd2_journal_remove_checkpoint and ->j_commit_callback + +From: Dmitry Monakhov + +commit 794446c6946513c684d448205fbd76fa35f38b72 upstream. + +The following race is possible: + +[kjournald2] other_task +jbd2_journal_commit_transaction() + j_state = T_FINISHED; + spin_unlock(&journal->j_list_lock); + ->jbd2_journal_remove_checkpoint() + ->jbd2_journal_free_transaction(); + ->kmem_cache_free(transaction) + ->j_commit_callback(journal, transaction); + -> USE_AFTER_FREE + +WARNING: at lib/list_debug.c:62 __list_del_entry+0x1c0/0x250() +Hardware name: +list_del corruption. prev->next should be ffff88019a4ec198, but was 6b6b6b6b6b6b6b6b +Modules linked in: cpufreq_ondemand acpi_cpufreq freq_table mperf coretemp kvm_intel kvm crc32c_intel ghash_clmulni_intel microcode sg xhci_hcd button sd_mod crc_t10dif aesni_intel ablk_helper cryptd lrw aes_x86_64 xts gf128mul ahci libahci pata_acpi ata_generic dm_mirror dm_region_hash dm_log dm_mod +Pid: 16400, comm: jbd2/dm-1-8 Tainted: G W 3.8.0-rc3+ #107 +Call Trace: + [] warn_slowpath_common+0xad/0xf0 + [] warn_slowpath_fmt+0x46/0x50 + [] ? ext4_journal_commit_callback+0x99/0xc0 + [] __list_del_entry+0x1c0/0x250 + [] ext4_journal_commit_callback+0x6f/0xc0 + [] jbd2_journal_commit_transaction+0x23a6/0x2570 + [] ? try_to_del_timer_sync+0x82/0xa0 + [] ? del_timer_sync+0x91/0x1e0 + [] kjournald2+0x19f/0x6a0 + [] ? wake_up_bit+0x40/0x40 + [] ? bit_spin_lock+0x80/0x80 + [] kthread+0x10e/0x120 + [] ? __init_kthread_worker+0x70/0x70 + [] ret_from_fork+0x7c/0xb0 + [] ? __init_kthread_worker+0x70/0x70 + +In order to demonstrace this issue one should mount ext4 with mount -o +discard option on SSD disk. This makes callback longer and race +window becomes wider. + +In order to fix this we should mark transaction as finished only after +callbacks have completed + +Signed-off-by: Dmitry Monakhov +Signed-off-by: "Theodore Ts'o" +Signed-off-by: Greg Kroah-Hartman + +--- + fs/jbd2/commit.c | 50 ++++++++++++++++++++++++++++---------------------- + include/linux/jbd2.h | 1 + + 2 files changed, 29 insertions(+), 22 deletions(-) + +--- a/fs/jbd2/commit.c ++++ b/fs/jbd2/commit.c +@@ -325,7 +325,7 @@ void jbd2_journal_commit_transaction(jou + int space_left = 0; + int first_tag = 0; + int tag_flag; +- int i, to_free = 0; ++ int i; + int tag_bytes = journal_tag_bytes(journal); + struct buffer_head *cbh = NULL; /* For transactional checksums */ + __u32 crc32_sum = ~0; +@@ -1044,7 +1044,7 @@ restart_loop: + journal->j_stats.run.rs_blocks_logged += stats.run.rs_blocks_logged; + spin_unlock(&journal->j_history_lock); + +- commit_transaction->t_state = T_FINISHED; ++ commit_transaction->t_state = T_COMMIT_CALLBACK; + J_ASSERT(commit_transaction == journal->j_committing_transaction); + journal->j_commit_sequence = commit_transaction->t_tid; + journal->j_committing_transaction = NULL; +@@ -1059,38 +1059,44 @@ restart_loop: + journal->j_average_commit_time*3) / 4; + else + journal->j_average_commit_time = commit_time; ++ + write_unlock(&journal->j_state_lock); + +- if (commit_transaction->t_checkpoint_list == NULL && +- commit_transaction->t_checkpoint_io_list == NULL) { +- __jbd2_journal_drop_transaction(journal, commit_transaction); +- to_free = 1; ++ if (journal->j_checkpoint_transactions == NULL) { ++ journal->j_checkpoint_transactions = commit_transaction; ++ commit_transaction->t_cpnext = commit_transaction; ++ commit_transaction->t_cpprev = commit_transaction; + } else { +- if (journal->j_checkpoint_transactions == NULL) { +- journal->j_checkpoint_transactions = commit_transaction; +- commit_transaction->t_cpnext = commit_transaction; +- commit_transaction->t_cpprev = commit_transaction; +- } else { +- commit_transaction->t_cpnext = +- journal->j_checkpoint_transactions; +- commit_transaction->t_cpprev = +- commit_transaction->t_cpnext->t_cpprev; +- commit_transaction->t_cpnext->t_cpprev = +- commit_transaction; +- commit_transaction->t_cpprev->t_cpnext = ++ commit_transaction->t_cpnext = ++ journal->j_checkpoint_transactions; ++ commit_transaction->t_cpprev = ++ commit_transaction->t_cpnext->t_cpprev; ++ commit_transaction->t_cpnext->t_cpprev = ++ commit_transaction; ++ commit_transaction->t_cpprev->t_cpnext = + commit_transaction; +- } + } + spin_unlock(&journal->j_list_lock); +- ++ /* Drop all spin_locks because commit_callback may be block. ++ * __journal_remove_checkpoint() can not destroy transaction ++ * under us because it is not marked as T_FINISHED yet */ + if (journal->j_commit_callback) + journal->j_commit_callback(journal, commit_transaction); + + trace_jbd2_end_commit(journal, commit_transaction); + jbd_debug(1, "JBD2: commit %d complete, head %d\n", + journal->j_commit_sequence, journal->j_tail_sequence); +- if (to_free) +- jbd2_journal_free_transaction(commit_transaction); + ++ write_lock(&journal->j_state_lock); ++ spin_lock(&journal->j_list_lock); ++ commit_transaction->t_state = T_FINISHED; ++ /* Recheck checkpoint lists after j_list_lock was dropped */ ++ if (commit_transaction->t_checkpoint_list == NULL && ++ commit_transaction->t_checkpoint_io_list == NULL) { ++ __jbd2_journal_drop_transaction(journal, commit_transaction); ++ jbd2_journal_free_transaction(commit_transaction); ++ } ++ spin_unlock(&journal->j_list_lock); ++ write_unlock(&journal->j_state_lock); + wake_up(&journal->j_wait_done_commit); + } +--- a/include/linux/jbd2.h ++++ b/include/linux/jbd2.h +@@ -470,6 +470,7 @@ struct transaction_s + T_COMMIT, + T_COMMIT_DFLUSH, + T_COMMIT_JFLUSH, ++ T_COMMIT_CALLBACK, + T_FINISHED + } t_state; + diff --git a/queue-3.4/series b/queue-3.4/series index 3617d42ad49..b29d8fb840a 100644 --- a/queue-3.4/series +++ b/queue-3.4/series @@ -51,3 +51,8 @@ nfsd4-don-t-close-read-write-opens-too-soon.patch nfsd-decode-and-send-64bit-time-values.patch wireless-regulatory-fix-channel-disabling-race-condition.patch ipc-sysv-shared-memory-limited-to-8tib.patch +ixgbe-fix-eicr-write-in-ixgbe_msix_other.patch +jbd2-fix-race-between-jbd2_journal_remove_checkpoint-and-j_commit_callback.patch +ext4-fix-journal-callback-list-traversal.patch +ext4-fix-online-resizing-for-ext3-compat-file-systems.patch +ext4-fix-kconfig-documentation-for-config_ext4_debug.patch