3.4-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Mon, 25 Nov 2013 22:50:20 +0000 (14:50 -0800)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Mon, 25 Nov 2013 22:50:20 +0000 (14:50 -0800)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 25 Nov 2013 22:50:20 +0000 (14:50 -0800)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 25 Nov 2013 22:50:20 +0000 (14:50 -0800)
diff --git a/queue-3.4/exec-do-not-abuse-cred_guard_mutex-in-threadgroup_lock.patch b/queue-3.4/exec-do-not-abuse-cred_guard_mutex-in-threadgroup_lock.patch

new file mode 100644 (file)

index 0000000..3091e00
--- /dev/null
+++ b/queue-3.4/exec-do-not-abuse-cred_guard_mutex-in-threadgroup_lock.patch
@@ -0,0 +1,109 @@
+From e56fb2874015370e3b7f8d85051f6dce26051df9 Mon Sep 17 00:00:00 2001
+From: Oleg Nesterov <oleg@redhat.com>
+Date: Tue, 30 Apr 2013 15:28:20 -0700
+Subject: exec: do not abuse ->cred_guard_mutex in threadgroup_lock()
+
+From: Oleg Nesterov <oleg@redhat.com>
+
+commit e56fb2874015370e3b7f8d85051f6dce26051df9 upstream.
+
+threadgroup_lock() takes signal->cred_guard_mutex to ensure that
+thread_group_leader() is stable.  This doesn't look nice, the scope of
+this lock in do_execve() is huge.
+
+And as Dave pointed out this can lead to deadlock, we have the
+following dependencies:
+
+       do_execve:              cred_guard_mutex -> i_mutex
+       cgroup_mount:           i_mutex -> cgroup_mutex
+       attach_task_by_pid:     cgroup_mutex -> cred_guard_mutex
+
+Change de_thread() to take threadgroup_change_begin() around the
+switch-the-leader code and change threadgroup_lock() to avoid
+->cred_guard_mutex.
+
+Note that de_thread() can't sleep with ->group_rwsem held, this can
+obviously deadlock with the exiting leader if the writer is active, so it
+does threadgroup_change_end() before schedule().
+
+Reported-by: Dave Jones <davej@redhat.com>
+Acked-by: Tejun Heo <tj@kernel.org>
+Acked-by: Li Zefan <lizefan@huawei.com>
+Signed-off-by: Oleg Nesterov <oleg@redhat.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+[ zhj: adjust context ]
+Signed-off-by: Zhao Hongjiang <zhaohongjiang@huawei.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/exec.c             |    3 +++
+ include/linux/sched.h |   18 ++++--------------
+ 2 files changed, 7 insertions(+), 14 deletions(-)
+
+--- a/fs/exec.c
++++ b/fs/exec.c
+@@ -909,11 +909,13 @@ static int de_thread(struct task_struct
+ 
+               sig->notify_count = -1; /* for exit_notify() */
+               for (;;) {
++                      threadgroup_change_begin(tsk);
+                       write_lock_irq(&tasklist_lock);
+                       if (likely(leader->exit_state))
+                               break;
+                       __set_current_state(TASK_UNINTERRUPTIBLE);
+                       write_unlock_irq(&tasklist_lock);
++                      threadgroup_change_end(tsk);
+                       schedule();
+               }
+ 
+@@ -969,6 +971,7 @@ static int de_thread(struct task_struct
+               if (unlikely(leader->ptrace))
+                       __wake_up_parent(leader, leader->parent);
+               write_unlock_irq(&tasklist_lock);
++              threadgroup_change_end(tsk);
+ 
+               release_task(leader);
+       }
+--- a/include/linux/sched.h
++++ b/include/linux/sched.h
+@@ -2466,27 +2466,18 @@ static inline void threadgroup_change_en
+  *
+  * Lock the threadgroup @tsk belongs to.  No new task is allowed to enter
+  * and member tasks aren't allowed to exit (as indicated by PF_EXITING) or
+- * perform exec.  This is useful for cases where the threadgroup needs to
+- * stay stable across blockable operations.
++ * change ->group_leader/pid.  This is useful for cases where the threadgroup
++ * needs to stay stable across blockable operations.
+  *
+  * fork and exit paths explicitly call threadgroup_change_{begin|end}() for
+  * synchronization.  While held, no new task will be added to threadgroup
+  * and no existing live task will have its PF_EXITING set.
+  *
+- * During exec, a task goes and puts its thread group through unusual
+- * changes.  After de-threading, exclusive access is assumed to resources
+- * which are usually shared by tasks in the same group - e.g. sighand may
+- * be replaced with a new one.  Also, the exec'ing task takes over group
+- * leader role including its pid.  Exclude these changes while locked by
+- * grabbing cred_guard_mutex which is used to synchronize exec path.
++ * de_thread() does threadgroup_change_{begin|end}() when a non-leader
++ * sub-thread becomes a new leader.
+  */
+ static inline void threadgroup_lock(struct task_struct *tsk)
+ {
+-      /*
+-       * exec uses exit for de-threading nesting group_rwsem inside
+-       * cred_guard_mutex. Grab cred_guard_mutex first.
+-       */
+-      mutex_lock(&tsk->signal->cred_guard_mutex);
+       down_write(&tsk->signal->group_rwsem);
+ }
+ 
+@@ -2499,7 +2490,6 @@ static inline void threadgroup_lock(stru
+ static inline void threadgroup_unlock(struct task_struct *tsk)
+ {
+       up_write(&tsk->signal->group_rwsem);
+-      mutex_unlock(&tsk->signal->cred_guard_mutex);
+ }
+ #else
+ static inline void threadgroup_change_begin(struct task_struct *tsk) {}
diff --git a/queue-3.4/include-linux-fs.h-disable-preempt-when-acquire-i_size_seqcount-write-lock.patch b/queue-3.4/include-linux-fs.h-disable-preempt-when-acquire-i_size_seqcount-write-lock.patch

new file mode 100644 (file)

index 0000000..c53f3bd
--- /dev/null
+++ b/queue-3.4/include-linux-fs.h-disable-preempt-when-acquire-i_size_seqcount-write-lock.patch
@@ -0,0 +1,47 @@
+From 74e3d1e17b2e11d175970b85acd44f5927000ba2 Mon Sep 17 00:00:00 2001
+From: Fan Du <fan.du@windriver.com>
+Date: Tue, 30 Apr 2013 15:27:27 -0700
+Subject: include/linux/fs.h: disable preempt when acquire i_size_seqcount write lock
+
+From: Fan Du <fan.du@windriver.com>
+
+commit 74e3d1e17b2e11d175970b85acd44f5927000ba2 upstream.
+
+Two rt tasks bind to one CPU core.
+
+The higher priority rt task A preempts a lower priority rt task B which
+has already taken the write seq lock, and then the higher priority rt
+task A try to acquire read seq lock, it's doomed to lockup.
+
+rt task A with lower priority: call write
+i_size_write                                        rt task B with higher priority: call sync, and preempt task A
+  write_seqcount_begin(&inode->i_size_seqcount);    i_size_read
+  inode->i_size = i_size;                             read_seqcount_begin <-- lockup here...
+
+So disable preempt when acquiring every i_size_seqcount *write* lock will
+cure the problem.
+
+Signed-off-by: Fan Du <fan.du@windriver.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Zhao Hongjiang <zhaohongjiang@huawei.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/linux/fs.h |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/include/linux/fs.h
++++ b/include/linux/fs.h
+@@ -915,9 +915,11 @@ static inline loff_t i_size_read(const s
+ static inline void i_size_write(struct inode *inode, loff_t i_size)
+ {
+ #if BITS_PER_LONG==32 && defined(CONFIG_SMP)
++      preempt_disable();
+       write_seqcount_begin(&inode->i_size_seqcount);
+       inode->i_size = i_size;
+       write_seqcount_end(&inode->i_size_seqcount);
++      preempt_enable();
+ #elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPT)
+       preempt_disable();
+       inode->i_size = i_size;
diff --git a/queue-3.4/nest-rename_lock-inside-vfsmount_lock.patch b/queue-3.4/nest-rename_lock-inside-vfsmount_lock.patch

new file mode 100644 (file)

index 0000000..362e301
--- /dev/null
+++ b/queue-3.4/nest-rename_lock-inside-vfsmount_lock.patch
@@ -0,0 +1,124 @@
+From 7ea600b5314529f9d1b9d6d3c41cb26fce6a7a4a Mon Sep 17 00:00:00 2001
+From: Al Viro <viro@zeniv.linux.org.uk>
+Date: Tue, 26 Mar 2013 18:25:57 -0400
+Subject: Nest rename_lock inside vfsmount_lock
+
+From: Al Viro <viro@zeniv.linux.org.uk>
+
+commit 7ea600b5314529f9d1b9d6d3c41cb26fce6a7a4a upstream.
+
+... lest we get livelocks between path_is_under() and d_path() and friends.
+
+The thing is, wrt fairness lglocks are more similar to rwsems than to rwlocks;
+it is possible to have thread B spin on attempt to take lock shared while thread
+A is already holding it shared, if B is on lower-numbered CPU than A and there's
+a thread C spinning on attempt to take the same lock exclusive.
+
+As the result, we need consistent ordering between vfsmount_lock (lglock) and
+rename_lock (seq_lock), even though everything that takes both is going to take
+vfsmount_lock only shared.
+
+Spotted-by: Brad Spengler <spender@grsecurity.net>
+Cc: stable@vger.kernel.org
+Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
+[ zhj: backport to 3.4:
+  - Adjust context
+  - s/&vfsmount_lock/vfsmount_lock/]
+Signed-off-by: Zhao Hongjiang <zhaohongjiang@huawei.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/dcache.c |   16 +++++++++++-----
+ 1 file changed, 11 insertions(+), 5 deletions(-)
+
+--- a/fs/dcache.c
++++ b/fs/dcache.c
+@@ -2513,7 +2513,6 @@ static int prepend_path(const struct pat
+       bool slash = false;
+       int error = 0;
+ 
+-      br_read_lock(vfsmount_lock);
+       while (dentry != root->dentry || vfsmnt != root->mnt) {
+               struct dentry * parent;
+ 
+@@ -2543,8 +2542,6 @@ static int prepend_path(const struct pat
+       if (!error && !slash)
+               error = prepend(buffer, buflen, "/", 1);
+ 
+-out:
+-      br_read_unlock(vfsmount_lock);
+       return error;
+ 
+ global_root:
+@@ -2561,7 +2558,7 @@ global_root:
+               error = prepend(buffer, buflen, "/", 1);
+       if (!error)
+               error = real_mount(vfsmnt)->mnt_ns ? 1 : 2;
+-      goto out;
++      return error;
+ }
+ 
+ /**
+@@ -2588,9 +2585,11 @@ char *__d_path(const struct path *path,
+       int error;
+ 
+       prepend(&res, &buflen, "\0", 1);
++      br_read_lock(vfsmount_lock);
+       write_seqlock(&rename_lock);
+       error = prepend_path(path, root, &res, &buflen);
+       write_sequnlock(&rename_lock);
++      br_read_unlock(vfsmount_lock);
+ 
+       if (error < 0)
+               return ERR_PTR(error);
+@@ -2607,9 +2606,11 @@ char *d_absolute_path(const struct path
+       int error;
+ 
+       prepend(&res, &buflen, "\0", 1);
++      br_read_lock(vfsmount_lock);
+       write_seqlock(&rename_lock);
+       error = prepend_path(path, &root, &res, &buflen);
+       write_sequnlock(&rename_lock);
++      br_read_unlock(vfsmount_lock);
+ 
+       if (error > 1)
+               error = -EINVAL;
+@@ -2673,11 +2674,13 @@ char *d_path(const struct path *path, ch
+               return path->dentry->d_op->d_dname(path->dentry, buf, buflen);
+ 
+       get_fs_root(current->fs, &root);
++      br_read_lock(vfsmount_lock);
+       write_seqlock(&rename_lock);
+       error = path_with_deleted(path, &root, &res, &buflen);
++      write_sequnlock(&rename_lock);
++      br_read_unlock(vfsmount_lock);
+       if (error < 0)
+               res = ERR_PTR(error);
+-      write_sequnlock(&rename_lock);
+       path_put(&root);
+       return res;
+ }
+@@ -2832,6 +2835,7 @@ SYSCALL_DEFINE2(getcwd, char __user *, b
+       get_fs_root_and_pwd(current->fs, &root, &pwd);
+ 
+       error = -ENOENT;
++      br_read_lock(vfsmount_lock);
+       write_seqlock(&rename_lock);
+       if (!d_unlinked(pwd.dentry)) {
+               unsigned long len;
+@@ -2841,6 +2845,7 @@ SYSCALL_DEFINE2(getcwd, char __user *, b
+               prepend(&cwd, &buflen, "\0", 1);
+               error = prepend_path(&pwd, &root, &cwd, &buflen);
+               write_sequnlock(&rename_lock);
++              br_read_unlock(vfsmount_lock);
+ 
+               if (error < 0)
+                       goto out;
+@@ -2861,6 +2866,7 @@ SYSCALL_DEFINE2(getcwd, char __user *, b
+               }
+       } else {
+               write_sequnlock(&rename_lock);
++              br_read_unlock(vfsmount_lock);
+       }
+ 
+ out:
diff --git a/queue-3.4/series b/queue-3.4/series

index 44529cb7f720512469c5df2b6847e2db17147a2b..9c34137bced6f31065e2eca6ee3b8a5c957c0ffd 100644 (file)
--- a/queue-3.4/series
+++ b/queue-3.4/series
@@ -13,3 +13,6 @@ can-c_can-fix-rx-message-handling-handle-lost-message-before-eob.patch
  fix-a-few-incorrectly-checked-remap_pfn_range-calls.patch
  sunrpc-handle-ekeyexpired-in-call_refreshresult.patch
  sunrpc-don-t-map-ekeyexpired-to-eacces-in-call_refreshresult.patch
+nest-rename_lock-inside-vfsmount_lock.patch
+exec-do-not-abuse-cred_guard_mutex-in-threadgroup_lock.patch
+include-linux-fs.h-disable-preempt-when-acquire-i_size_seqcount-write-lock.patch
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Mon, 25 Nov 2013 22:50:20 +0000 (14:50 -0800)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Mon, 25 Nov 2013 22:50:20 +0000 (14:50 -0800)
queue-3.4/exec-do-not-abuse-cred_guard_mutex-in-threadgroup_lock.patch	[new file with mode: 0644]	patch \| blob
queue-3.4/include-linux-fs.h-disable-preempt-when-acquire-i_size_seqcount-write-lock.patch	[new file with mode: 0644]	patch \| blob
queue-3.4/nest-rename_lock-inside-vfsmount_lock.patch	[new file with mode: 0644]	patch \| blob
queue-3.4/series		patch \| blob \| blame \| history