]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
3.14-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sun, 25 Jan 2015 17:54:21 +0000 (09:54 -0800)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sun, 25 Jan 2015 17:54:21 +0000 (09:54 -0800)
added patches:
fsnotify-next_i-is-freed-during-fsnotify_unmount_inodes.patch

queue-3.14/fsnotify-next_i-is-freed-during-fsnotify_unmount_inodes.patch [new file with mode: 0644]
queue-3.14/series

diff --git a/queue-3.14/fsnotify-next_i-is-freed-during-fsnotify_unmount_inodes.patch b/queue-3.14/fsnotify-next_i-is-freed-during-fsnotify_unmount_inodes.patch
new file mode 100644 (file)
index 0000000..3e3ab2f
--- /dev/null
@@ -0,0 +1,108 @@
+From 6424babfd68dd8a83d9c60a5242d27038856599f Mon Sep 17 00:00:00 2001
+From: Jerry Hoemann <jerry.hoemann@hp.com>
+Date: Wed, 29 Oct 2014 14:50:22 -0700
+Subject: fsnotify: next_i is freed during fsnotify_unmount_inodes.
+
+From: Jerry Hoemann <jerry.hoemann@hp.com>
+
+commit 6424babfd68dd8a83d9c60a5242d27038856599f upstream.
+
+During file system stress testing on 3.10 and 3.12 based kernels, the
+umount command occasionally hung in fsnotify_unmount_inodes in the
+section of code:
+
+                spin_lock(&inode->i_lock);
+                if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) {
+                        spin_unlock(&inode->i_lock);
+                        continue;
+                }
+
+As this section of code holds the global inode_sb_list_lock, eventually
+the system hangs trying to acquire the lock.
+
+Multiple crash dumps showed:
+
+The inode->i_state == 0x60 and i_count == 0 and i_sb_list would point
+back at itself.  As this is not the value of list upon entry to the
+function, the kernel never exits the loop.
+
+To help narrow down problem, the call to list_del_init in
+inode_sb_list_del was changed to list_del.  This poisons the pointers in
+the i_sb_list and causes a kernel to panic if it transverse a freed
+inode.
+
+Subsequent stress testing paniced in fsnotify_unmount_inodes at the
+bottom of the list_for_each_entry_safe loop showing next_i had become
+free.
+
+We believe the root cause of the problem is that next_i is being freed
+during the window of time that the list_for_each_entry_safe loop
+temporarily releases inode_sb_list_lock to call fsnotify and
+fsnotify_inode_delete.
+
+The code in fsnotify_unmount_inodes attempts to prevent the freeing of
+inode and next_i by calling __iget.  However, the code doesn't do the
+__iget call on next_i
+
+       if i_count == 0 or
+       if i_state & (I_FREEING | I_WILL_FREE)
+
+The patch addresses this issue by advancing next_i in the above two cases
+until we either find a next_i which we can __iget or we reach the end of
+the list.  This makes the handling of next_i more closely match the
+handling of the variable "inode."
+
+The time to reproduce the hang is highly variable (from hours to days.) We
+ran the stress test on a 3.10 kernel with the proposed patch for a week
+without failure.
+
+During list_for_each_entry_safe, next_i is becoming free causing
+the loop to never terminate.  Advance next_i in those cases where
+__iget is not done.
+
+Signed-off-by: Jerry Hoemann <jerry.hoemann@hp.com>
+Cc: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
+Cc: Ken Helias <kenhelias@firemail.de>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Jan Kara <jack@suse.cz>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/notify/inode_mark.c |   17 +++++++++++------
+ 1 file changed, 11 insertions(+), 6 deletions(-)
+
+--- a/fs/notify/inode_mark.c
++++ b/fs/notify/inode_mark.c
+@@ -288,20 +288,25 @@ void fsnotify_unmount_inodes(struct list
+               spin_unlock(&inode->i_lock);
+               /* In case the dropping of a reference would nuke next_i. */
+-              if ((&next_i->i_sb_list != list) &&
+-                  atomic_read(&next_i->i_count)) {
++              while (&next_i->i_sb_list != list) {
+                       spin_lock(&next_i->i_lock);
+-                      if (!(next_i->i_state & (I_FREEING | I_WILL_FREE))) {
++                      if (!(next_i->i_state & (I_FREEING | I_WILL_FREE)) &&
++                                              atomic_read(&next_i->i_count)) {
+                               __iget(next_i);
+                               need_iput = next_i;
++                              spin_unlock(&next_i->i_lock);
++                              break;
+                       }
+                       spin_unlock(&next_i->i_lock);
++                      next_i = list_entry(next_i->i_sb_list.next,
++                                              struct inode, i_sb_list);
+               }
+               /*
+-               * We can safely drop inode_sb_list_lock here because we hold
+-               * references on both inode and next_i.  Also no new inodes
+-               * will be added since the umount has begun.
++               * We can safely drop inode_sb_list_lock here because either
++               * we actually hold references on both inode and next_i or
++               * end of list.  Also no new inodes will be added since the
++               * umount has begun.
+                */
+               spin_unlock(&inode_sb_list_lock);
index 715cef709eabb1c76271c671823b1b136d0d4b79..2470deda13b75a78c16ba2c6f699d3d1944179ea 100644 (file)
@@ -93,3 +93,4 @@ kvm-nvmx-disable-unrestricted-mode-if-ept-0.patch
 netfilter-ipset-small-potential-read-beyond-the-end-of-buffer.patch
 net-prevent-of-emerging-cross-namespace-symlinks.patch
 net-fix-creation-adjacent-device-symlinks.patch
+fsnotify-next_i-is-freed-during-fsnotify_unmount_inodes.patch