]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
3.4-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 3 May 2013 23:16:01 +0000 (16:16 -0700)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 3 May 2013 23:16:01 +0000 (16:16 -0700)
added patches:
ext4-fix-journal-callback-list-traversal.patch
ext4-fix-kconfig-documentation-for-config_ext4_debug.patch
ext4-fix-online-resizing-for-ext3-compat-file-systems.patch
ixgbe-fix-eicr-write-in-ixgbe_msix_other.patch
jbd2-fix-race-between-jbd2_journal_remove_checkpoint-and-j_commit_callback.patch

queue-3.4/ext4-fix-journal-callback-list-traversal.patch [new file with mode: 0644]
queue-3.4/ext4-fix-kconfig-documentation-for-config_ext4_debug.patch [new file with mode: 0644]
queue-3.4/ext4-fix-online-resizing-for-ext3-compat-file-systems.patch [new file with mode: 0644]
queue-3.4/ixgbe-fix-eicr-write-in-ixgbe_msix_other.patch [new file with mode: 0644]
queue-3.4/jbd2-fix-race-between-jbd2_journal_remove_checkpoint-and-j_commit_callback.patch [new file with mode: 0644]
queue-3.4/series

diff --git a/queue-3.4/ext4-fix-journal-callback-list-traversal.patch b/queue-3.4/ext4-fix-journal-callback-list-traversal.patch
new file mode 100644 (file)
index 0000000..8d20ea0
--- /dev/null
@@ -0,0 +1,127 @@
+From 5d3ee20855e28169d711b394857ee608a5023094 Mon Sep 17 00:00:00 2001
+From: Dmitry Monakhov <dmonakhov@openvz.org>
+Date: Wed, 3 Apr 2013 22:08:52 -0400
+Subject: ext4: fix journal callback list traversal
+
+From: Dmitry Monakhov <dmonakhov@openvz.org>
+
+commit 5d3ee20855e28169d711b394857ee608a5023094 upstream.
+
+It is incorrect to use list_for_each_entry_safe() for journal callback
+traversial because ->next may be removed by other task:
+->ext4_mb_free_metadata()
+  ->ext4_mb_free_metadata()
+    ->ext4_journal_callback_del()
+
+This results in the following issue:
+
+WARNING: at lib/list_debug.c:62 __list_del_entry+0x1c0/0x250()
+Hardware name:
+list_del corruption. prev->next should be ffff88019a4ec198, but was 6b6b6b6b6b6b6b6b
+Modules linked in: cpufreq_ondemand acpi_cpufreq freq_table mperf coretemp kvm_intel kvm crc32c_intel ghash_clmulni_intel microcode sg xhci_hcd button sd_mod crc_t10dif aesni_intel ablk_helper cryptd lrw aes_x86_64 xts gf128mul ahci libahci pata_acpi ata_generic dm_mirror dm_region_hash dm_log dm_mod
+Pid: 16400, comm: jbd2/dm-1-8 Tainted: G        W    3.8.0-rc3+ #107
+Call Trace:
+ [<ffffffff8106fb0d>] warn_slowpath_common+0xad/0xf0
+ [<ffffffff8106fc06>] warn_slowpath_fmt+0x46/0x50
+ [<ffffffff813637e9>] ? ext4_journal_commit_callback+0x99/0xc0
+ [<ffffffff8148cae0>] __list_del_entry+0x1c0/0x250
+ [<ffffffff813637bf>] ext4_journal_commit_callback+0x6f/0xc0
+ [<ffffffff813ca336>] jbd2_journal_commit_transaction+0x23a6/0x2570
+ [<ffffffff8108aa42>] ? try_to_del_timer_sync+0x82/0xa0
+ [<ffffffff8108b491>] ? del_timer_sync+0x91/0x1e0
+ [<ffffffff813d3ecf>] kjournald2+0x19f/0x6a0
+ [<ffffffff810ad630>] ? wake_up_bit+0x40/0x40
+ [<ffffffff813d3d30>] ? bit_spin_lock+0x80/0x80
+ [<ffffffff810ac6be>] kthread+0x10e/0x120
+ [<ffffffff810ac5b0>] ? __init_kthread_worker+0x70/0x70
+ [<ffffffff818ff6ac>] ret_from_fork+0x7c/0xb0
+ [<ffffffff810ac5b0>] ? __init_kthread_worker+0x70/0x70
+
+This patch fix the issue as follows:
+- ext4_journal_commit_callback() make list truly traversial safe
+  simply by always starting from list_head
+- fix race between two ext4_journal_callback_del() and
+  ext4_journal_callback_try_del()
+
+Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ext4/ext4_jbd2.h |    6 +++++-
+ fs/ext4/mballoc.c   |    8 ++++----
+ fs/ext4/super.c     |    7 +++++--
+ 3 files changed, 14 insertions(+), 7 deletions(-)
+
+--- a/fs/ext4/ext4_jbd2.h
++++ b/fs/ext4/ext4_jbd2.h
+@@ -164,16 +164,20 @@ static inline void ext4_journal_callback
+  * ext4_journal_callback_del: delete a registered callback
+  * @handle: active journal transaction handle on which callback was registered
+  * @jce: registered journal callback entry to unregister
++ * Return true if object was sucessfully removed
+  */
+-static inline void ext4_journal_callback_del(handle_t *handle,
++static inline bool ext4_journal_callback_try_del(handle_t *handle,
+                                            struct ext4_journal_cb_entry *jce)
+ {
++      bool deleted;
+       struct ext4_sb_info *sbi =
+                       EXT4_SB(handle->h_transaction->t_journal->j_private);
+       spin_lock(&sbi->s_md_lock);
++      deleted = !list_empty(&jce->jce_list);
+       list_del_init(&jce->jce_list);
+       spin_unlock(&sbi->s_md_lock);
++      return deleted;
+ }
+ int
+--- a/fs/ext4/mballoc.c
++++ b/fs/ext4/mballoc.c
+@@ -4436,11 +4436,11 @@ ext4_mb_free_metadata(handle_t *handle,
+       node = rb_prev(new_node);
+       if (node) {
+               entry = rb_entry(node, struct ext4_free_data, efd_node);
+-              if (can_merge(entry, new_entry)) {
++              if (can_merge(entry, new_entry) &&
++                  ext4_journal_callback_try_del(handle, &entry->efd_jce)) {
+                       new_entry->efd_start_cluster = entry->efd_start_cluster;
+                       new_entry->efd_count += entry->efd_count;
+                       rb_erase(node, &(db->bb_free_root));
+-                      ext4_journal_callback_del(handle, &entry->efd_jce);
+                       kmem_cache_free(ext4_free_data_cachep, entry);
+               }
+       }
+@@ -4448,10 +4448,10 @@ ext4_mb_free_metadata(handle_t *handle,
+       node = rb_next(new_node);
+       if (node) {
+               entry = rb_entry(node, struct ext4_free_data, efd_node);
+-              if (can_merge(new_entry, entry)) {
++              if (can_merge(new_entry, entry) &&
++                  ext4_journal_callback_try_del(handle, &entry->efd_jce)) {
+                       new_entry->efd_count += entry->efd_count;
+                       rb_erase(node, &(db->bb_free_root));
+-                      ext4_journal_callback_del(handle, &entry->efd_jce);
+                       kmem_cache_free(ext4_free_data_cachep, entry);
+               }
+       }
+--- a/fs/ext4/super.c
++++ b/fs/ext4/super.c
+@@ -437,10 +437,13 @@ static void ext4_journal_commit_callback
+       struct super_block              *sb = journal->j_private;
+       struct ext4_sb_info             *sbi = EXT4_SB(sb);
+       int                             error = is_journal_aborted(journal);
+-      struct ext4_journal_cb_entry    *jce, *tmp;
++      struct ext4_journal_cb_entry    *jce;
++      BUG_ON(txn->t_state == T_FINISHED);
+       spin_lock(&sbi->s_md_lock);
+-      list_for_each_entry_safe(jce, tmp, &txn->t_private_list, jce_list) {
++      while (!list_empty(&txn->t_private_list)) {
++              jce = list_entry(txn->t_private_list.next,
++                               struct ext4_journal_cb_entry, jce_list);
+               list_del_init(&jce->jce_list);
+               spin_unlock(&sbi->s_md_lock);
+               jce->jce_func(sb, jce, error);
diff --git a/queue-3.4/ext4-fix-kconfig-documentation-for-config_ext4_debug.patch b/queue-3.4/ext4-fix-kconfig-documentation-for-config_ext4_debug.patch
new file mode 100644 (file)
index 0000000..f6e6c69
--- /dev/null
@@ -0,0 +1,29 @@
+From 7f3e3c7cfcec148ccca9c0dd2dbfd7b00b7ac10f Mon Sep 17 00:00:00 2001
+From: Theodore Ts'o <tytso@mit.edu>
+Date: Sun, 21 Apr 2013 20:32:03 -0400
+Subject: ext4: fix Kconfig documentation for CONFIG_EXT4_DEBUG
+
+From: Theodore Ts'o <tytso@mit.edu>
+
+commit 7f3e3c7cfcec148ccca9c0dd2dbfd7b00b7ac10f upstream.
+
+Fox the Kconfig documentation for CONFIG_EXT4_DEBUG to match the
+change made by commit a0b30c1229: ext4: use module parameters instead
+of debugfs for mballoc_debug
+
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ext4/Kconfig |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/fs/ext4/Kconfig
++++ b/fs/ext4/Kconfig
+@@ -82,4 +82,5 @@ config EXT4_DEBUG
+         Enables run-time debugging support for the ext4 filesystem.
+         If you select Y here, then you will be able to turn on debugging
+-        with a command such as "echo 1 > /sys/kernel/debug/ext4/mballoc-debug"
++        with a command such as:
++              echo 1 > /sys/module/ext4/parameters/mballoc_debug
diff --git a/queue-3.4/ext4-fix-online-resizing-for-ext3-compat-file-systems.patch b/queue-3.4/ext4-fix-online-resizing-for-ext3-compat-file-systems.patch
new file mode 100644 (file)
index 0000000..9fbb627
--- /dev/null
@@ -0,0 +1,33 @@
+From c5c72d814cf0f650010337c73638b25e6d14d2d4 Mon Sep 17 00:00:00 2001
+From: Theodore Ts'o <tytso@mit.edu>
+Date: Sun, 21 Apr 2013 20:19:43 -0400
+Subject: ext4: fix online resizing for ext3-compat file systems
+
+From: Theodore Ts'o <tytso@mit.edu>
+
+commit c5c72d814cf0f650010337c73638b25e6d14d2d4 upstream.
+
+Commit fb0a387dcdc restricts block allocations for indirect-mapped
+files to block groups less than s_blockfile_groups.  However, the
+online resizing code wasn't setting s_blockfile_groups, so the newly
+added block groups were not available for non-extent mapped files.
+
+Reported-by: Eric Sandeen <sandeen@redhat.com>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/ext4/resize.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/fs/ext4/resize.c
++++ b/fs/ext4/resize.c
+@@ -1202,6 +1202,8 @@ static void ext4_update_super(struct sup
+       /* Update the global fs size fields */
+       sbi->s_groups_count += flex_gd->count;
++      sbi->s_blockfile_groups = min_t(ext4_group_t, sbi->s_groups_count,
++                      (EXT4_MAX_BLOCK_FILE_PHYS / EXT4_BLOCKS_PER_GROUP(sb)));
+       /* Update the reserved block counts only once the new group is
+        * active. */
diff --git a/queue-3.4/ixgbe-fix-eicr-write-in-ixgbe_msix_other.patch b/queue-3.4/ixgbe-fix-eicr-write-in-ixgbe_msix_other.patch
new file mode 100644 (file)
index 0000000..857b512
--- /dev/null
@@ -0,0 +1,46 @@
+From d87d830720a1446403ed38bfc2da268be0d356d1 Mon Sep 17 00:00:00 2001
+From: Jacob Keller <jacob.e.keller@intel.com>
+Date: Sat, 2 Mar 2013 07:51:42 +0000
+Subject: ixgbe: fix EICR write in ixgbe_msix_other
+
+From: Jacob Keller <jacob.e.keller@intel.com>
+
+commit d87d830720a1446403ed38bfc2da268be0d356d1 upstream.
+
+Previously, the ixgbe_msix_other was writing the full 32bits of the set
+interrupts, instead of only the ones which the ixgbe_msix_other is
+handling. This resulted in a loss of performance when the X540's PPS feature is
+enabled due to sometimes clearing queue interrupts which resulted in the driver
+not getting the interrupt for cleaning the q_vector rings often enough. The fix
+is to simply mask the lower 16bits off so that this handler does not write them
+in the EICR, which causes them to remain high and be properly handled by the
+clean_rings interrupt routine as normal.
+
+Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
+Tested-by: Phil Schmitt <phillip.j.schmitt@intel.com>
+Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/net/ethernet/intel/ixgbe/ixgbe_main.c |   10 ++++++++++
+ 1 file changed, 10 insertions(+)
+
+--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
++++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+@@ -2263,6 +2263,16 @@ static irqreturn_t ixgbe_msix_other(int
+        * with the write to EICR.
+        */
+       eicr = IXGBE_READ_REG(hw, IXGBE_EICS);
++
++      /* The lower 16bits of the EICR register are for the queue interrupts
++       * which should be masked here in order to not accidently clear them if
++       * the bits are high when ixgbe_msix_other is called. There is a race
++       * condition otherwise which results in possible performance loss
++       * especially if the ixgbe_msix_other interrupt is triggering
++       * consistently (as it would when PPS is turned on for the X540 device)
++       */
++      eicr &= 0xFFFF0000;
++
+       IXGBE_WRITE_REG(hw, IXGBE_EICR, eicr);
+       if (eicr & IXGBE_EICR_LSC)
diff --git a/queue-3.4/jbd2-fix-race-between-jbd2_journal_remove_checkpoint-and-j_commit_callback.patch b/queue-3.4/jbd2-fix-race-between-jbd2_journal_remove_checkpoint-and-j_commit_callback.patch
new file mode 100644 (file)
index 0000000..37d99e1
--- /dev/null
@@ -0,0 +1,154 @@
+From 794446c6946513c684d448205fbd76fa35f38b72 Mon Sep 17 00:00:00 2001
+From: Dmitry Monakhov <dmonakhov@openvz.org>
+Date: Wed, 3 Apr 2013 22:06:52 -0400
+Subject: jbd2: fix race between jbd2_journal_remove_checkpoint and ->j_commit_callback
+
+From: Dmitry Monakhov <dmonakhov@openvz.org>
+
+commit 794446c6946513c684d448205fbd76fa35f38b72 upstream.
+
+The following race is possible:
+
+[kjournald2]                              other_task
+jbd2_journal_commit_transaction()
+  j_state = T_FINISHED;
+  spin_unlock(&journal->j_list_lock);
+                                         ->jbd2_journal_remove_checkpoint()
+                                          ->jbd2_journal_free_transaction();
+                                            ->kmem_cache_free(transaction)
+  ->j_commit_callback(journal, transaction);
+    -> USE_AFTER_FREE
+
+WARNING: at lib/list_debug.c:62 __list_del_entry+0x1c0/0x250()
+Hardware name:
+list_del corruption. prev->next should be ffff88019a4ec198, but was 6b6b6b6b6b6b6b6b
+Modules linked in: cpufreq_ondemand acpi_cpufreq freq_table mperf coretemp kvm_intel kvm crc32c_intel ghash_clmulni_intel microcode sg xhci_hcd button sd_mod crc_t10dif aesni_intel ablk_helper cryptd lrw aes_x86_64 xts gf128mul ahci libahci pata_acpi ata_generic dm_mirror dm_region_hash dm_log dm_mod
+Pid: 16400, comm: jbd2/dm-1-8 Tainted: G        W    3.8.0-rc3+ #107
+Call Trace:
+ [<ffffffff8106fb0d>] warn_slowpath_common+0xad/0xf0
+ [<ffffffff8106fc06>] warn_slowpath_fmt+0x46/0x50
+ [<ffffffff813637e9>] ? ext4_journal_commit_callback+0x99/0xc0
+ [<ffffffff8148cae0>] __list_del_entry+0x1c0/0x250
+ [<ffffffff813637bf>] ext4_journal_commit_callback+0x6f/0xc0
+ [<ffffffff813ca336>] jbd2_journal_commit_transaction+0x23a6/0x2570
+ [<ffffffff8108aa42>] ? try_to_del_timer_sync+0x82/0xa0
+ [<ffffffff8108b491>] ? del_timer_sync+0x91/0x1e0
+ [<ffffffff813d3ecf>] kjournald2+0x19f/0x6a0
+ [<ffffffff810ad630>] ? wake_up_bit+0x40/0x40
+ [<ffffffff813d3d30>] ? bit_spin_lock+0x80/0x80
+ [<ffffffff810ac6be>] kthread+0x10e/0x120
+ [<ffffffff810ac5b0>] ? __init_kthread_worker+0x70/0x70
+ [<ffffffff818ff6ac>] ret_from_fork+0x7c/0xb0
+ [<ffffffff810ac5b0>] ? __init_kthread_worker+0x70/0x70
+
+In order to demonstrace this issue one should mount ext4 with mount -o
+discard option on SSD disk.  This makes callback longer and race
+window becomes wider.
+
+In order to fix this we should mark transaction as finished only after
+callbacks have completed
+
+Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>
+Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/jbd2/commit.c     |   50 ++++++++++++++++++++++++++++----------------------
+ include/linux/jbd2.h |    1 +
+ 2 files changed, 29 insertions(+), 22 deletions(-)
+
+--- a/fs/jbd2/commit.c
++++ b/fs/jbd2/commit.c
+@@ -325,7 +325,7 @@ void jbd2_journal_commit_transaction(jou
+       int space_left = 0;
+       int first_tag = 0;
+       int tag_flag;
+-      int i, to_free = 0;
++      int i;
+       int tag_bytes = journal_tag_bytes(journal);
+       struct buffer_head *cbh = NULL; /* For transactional checksums */
+       __u32 crc32_sum = ~0;
+@@ -1044,7 +1044,7 @@ restart_loop:
+       journal->j_stats.run.rs_blocks_logged += stats.run.rs_blocks_logged;
+       spin_unlock(&journal->j_history_lock);
+-      commit_transaction->t_state = T_FINISHED;
++      commit_transaction->t_state = T_COMMIT_CALLBACK;
+       J_ASSERT(commit_transaction == journal->j_committing_transaction);
+       journal->j_commit_sequence = commit_transaction->t_tid;
+       journal->j_committing_transaction = NULL;
+@@ -1059,38 +1059,44 @@ restart_loop:
+                               journal->j_average_commit_time*3) / 4;
+       else
+               journal->j_average_commit_time = commit_time;
++
+       write_unlock(&journal->j_state_lock);
+-      if (commit_transaction->t_checkpoint_list == NULL &&
+-          commit_transaction->t_checkpoint_io_list == NULL) {
+-              __jbd2_journal_drop_transaction(journal, commit_transaction);
+-              to_free = 1;
++      if (journal->j_checkpoint_transactions == NULL) {
++              journal->j_checkpoint_transactions = commit_transaction;
++              commit_transaction->t_cpnext = commit_transaction;
++              commit_transaction->t_cpprev = commit_transaction;
+       } else {
+-              if (journal->j_checkpoint_transactions == NULL) {
+-                      journal->j_checkpoint_transactions = commit_transaction;
+-                      commit_transaction->t_cpnext = commit_transaction;
+-                      commit_transaction->t_cpprev = commit_transaction;
+-              } else {
+-                      commit_transaction->t_cpnext =
+-                              journal->j_checkpoint_transactions;
+-                      commit_transaction->t_cpprev =
+-                              commit_transaction->t_cpnext->t_cpprev;
+-                      commit_transaction->t_cpnext->t_cpprev =
+-                              commit_transaction;
+-                      commit_transaction->t_cpprev->t_cpnext =
++              commit_transaction->t_cpnext =
++                      journal->j_checkpoint_transactions;
++              commit_transaction->t_cpprev =
++                      commit_transaction->t_cpnext->t_cpprev;
++              commit_transaction->t_cpnext->t_cpprev =
++                      commit_transaction;
++              commit_transaction->t_cpprev->t_cpnext =
+                               commit_transaction;
+-              }
+       }
+       spin_unlock(&journal->j_list_lock);
+-
++      /* Drop all spin_locks because commit_callback may be block.
++       * __journal_remove_checkpoint() can not destroy transaction
++       * under us because it is not marked as T_FINISHED yet */
+       if (journal->j_commit_callback)
+               journal->j_commit_callback(journal, commit_transaction);
+       trace_jbd2_end_commit(journal, commit_transaction);
+       jbd_debug(1, "JBD2: commit %d complete, head %d\n",
+                 journal->j_commit_sequence, journal->j_tail_sequence);
+-      if (to_free)
+-              jbd2_journal_free_transaction(commit_transaction);
++      write_lock(&journal->j_state_lock);
++      spin_lock(&journal->j_list_lock);
++      commit_transaction->t_state = T_FINISHED;
++      /* Recheck checkpoint lists after j_list_lock was dropped */
++      if (commit_transaction->t_checkpoint_list == NULL &&
++          commit_transaction->t_checkpoint_io_list == NULL) {
++              __jbd2_journal_drop_transaction(journal, commit_transaction);
++              jbd2_journal_free_transaction(commit_transaction);
++      }
++      spin_unlock(&journal->j_list_lock);
++      write_unlock(&journal->j_state_lock);
+       wake_up(&journal->j_wait_done_commit);
+ }
+--- a/include/linux/jbd2.h
++++ b/include/linux/jbd2.h
+@@ -470,6 +470,7 @@ struct transaction_s
+               T_COMMIT,
+               T_COMMIT_DFLUSH,
+               T_COMMIT_JFLUSH,
++              T_COMMIT_CALLBACK,
+               T_FINISHED
+       }                       t_state;
index 3617d42ad497ac1e899d426cdd9eee7c92189e8f..b29d8fb840aedc4aee3c9686ae0335f3a82f46d3 100644 (file)
@@ -51,3 +51,8 @@ nfsd4-don-t-close-read-write-opens-too-soon.patch
 nfsd-decode-and-send-64bit-time-values.patch
 wireless-regulatory-fix-channel-disabling-race-condition.patch
 ipc-sysv-shared-memory-limited-to-8tib.patch
+ixgbe-fix-eicr-write-in-ixgbe_msix_other.patch
+jbd2-fix-race-between-jbd2_journal_remove_checkpoint-and-j_commit_callback.patch
+ext4-fix-journal-callback-list-traversal.patch
+ext4-fix-online-resizing-for-ext3-compat-file-systems.patch
+ext4-fix-kconfig-documentation-for-config_ext4_debug.patch