From 294bf07e2c13afad94e63e753b2cc7b5b4eefe35 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 25 Nov 2013 14:50:20 -0800
Subject: [PATCH] 3.4-stable patches

added patches:
	exec-do-not-abuse-cred_guard_mutex-in-threadgroup_lock.patch
	include-linux-fs.h-disable-preempt-when-acquire-i_size_seqcount-write-lock.patch
	nest-rename_lock-inside-vfsmount_lock.patch
---
 ...cred_guard_mutex-in-threadgroup_lock.patch | 109 +++++++++++++++
 ...n-acquire-i_size_seqcount-write-lock.patch |  47 +++++++
 ...est-rename_lock-inside-vfsmount_lock.patch | 124 ++++++++++++++++++
 queue-3.4/series                              |   3 +
 4 files changed, 283 insertions(+)
 create mode 100644 queue-3.4/exec-do-not-abuse-cred_guard_mutex-in-threadgroup_lock.patch
 create mode 100644 queue-3.4/include-linux-fs.h-disable-preempt-when-acquire-i_size_seqcount-write-lock.patch
 create mode 100644 queue-3.4/nest-rename_lock-inside-vfsmount_lock.patch

diff --git a/queue-3.4/exec-do-not-abuse-cred_guard_mutex-in-threadgroup_lock.patch b/queue-3.4/exec-do-not-abuse-cred_guard_mutex-in-threadgroup_lock.patch
new file mode 100644
index 00000000000..3091e00ec05
--- /dev/null
+++ b/queue-3.4/exec-do-not-abuse-cred_guard_mutex-in-threadgroup_lock.patch
@@ -0,0 +1,109 @@
+From e56fb2874015370e3b7f8d85051f6dce26051df9 Mon Sep 17 00:00:00 2001
+From: Oleg Nesterov <oleg@redhat.com>
+Date: Tue, 30 Apr 2013 15:28:20 -0700
+Subject: exec: do not abuse ->cred_guard_mutex in threadgroup_lock()
+
+From: Oleg Nesterov <oleg@redhat.com>
+
+commit e56fb2874015370e3b7f8d85051f6dce26051df9 upstream.
+
+threadgroup_lock() takes signal->cred_guard_mutex to ensure that
+thread_group_leader() is stable.  This doesn't look nice, the scope of
+this lock in do_execve() is huge.
+
+And as Dave pointed out this can lead to deadlock, we have the
+following dependencies:
+
+	do_execve:		cred_guard_mutex -> i_mutex
+	cgroup_mount:		i_mutex -> cgroup_mutex
+	attach_task_by_pid:	cgroup_mutex -> cred_guard_mutex
+
+Change de_thread() to take threadgroup_change_begin() around the
+switch-the-leader code and change threadgroup_lock() to avoid
+->cred_guard_mutex.
+
+Note that de_thread() can't sleep with ->group_rwsem held, this can
+obviously deadlock with the exiting leader if the writer is active, so it
+does threadgroup_change_end() before schedule().
+
+Reported-by: Dave Jones <davej@redhat.com>
+Acked-by: Tejun Heo <tj@kernel.org>
+Acked-by: Li Zefan <lizefan@huawei.com>
+Signed-off-by: Oleg Nesterov <oleg@redhat.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+[ zhj: adjust context ]
+Signed-off-by: Zhao Hongjiang <zhaohongjiang@huawei.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/exec.c             |    3 +++
+ include/linux/sched.h |   18 ++++--------------
+ 2 files changed, 7 insertions(+), 14 deletions(-)
+
+--- a/fs/exec.c
++++ b/fs/exec.c
+@@ -909,11 +909,13 @@ static int de_thread(struct task_struct
+ 
+ 		sig->notify_count = -1;	/* for exit_notify() */
+ 		for (;;) {
++			threadgroup_change_begin(tsk);
+ 			write_lock_irq(&tasklist_lock);
+ 			if (likely(leader->exit_state))
+ 				break;
+ 			__set_current_state(TASK_UNINTERRUPTIBLE);
+ 			write_unlock_irq(&tasklist_lock);
++			threadgroup_change_end(tsk);
+ 			schedule();
+ 		}
+ 
+@@ -969,6 +971,7 @@ static int de_thread(struct task_struct
+ 		if (unlikely(leader->ptrace))
+ 			__wake_up_parent(leader, leader->parent);
+ 		write_unlock_irq(&tasklist_lock);
++		threadgroup_change_end(tsk);
+ 
+ 		release_task(leader);
+ 	}
+--- a/include/linux/sched.h
++++ b/include/linux/sched.h
+@@ -2466,27 +2466,18 @@ static inline void threadgroup_change_en
+  *
+  * Lock the threadgroup @tsk belongs to.  No new task is allowed to enter
+  * and member tasks aren't allowed to exit (as indicated by PF_EXITING) or
+- * perform exec.  This is useful for cases where the threadgroup needs to
+- * stay stable across blockable operations.
++ * change ->group_leader/pid.  This is useful for cases where the threadgroup
++ * needs to stay stable across blockable operations.
+  *
+  * fork and exit paths explicitly call threadgroup_change_{begin|end}() for
+  * synchronization.  While held, no new task will be added to threadgroup
+  * and no existing live task will have its PF_EXITING set.
+  *
+- * During exec, a task goes and puts its thread group through unusual
+- * changes.  After de-threading, exclusive access is assumed to resources
+- * which are usually shared by tasks in the same group - e.g. sighand may
+- * be replaced with a new one.  Also, the exec'ing task takes over group
+- * leader role including its pid.  Exclude these changes while locked by
+- * grabbing cred_guard_mutex which is used to synchronize exec path.
++ * de_thread() does threadgroup_change_{begin|end}() when a non-leader
++ * sub-thread becomes a new leader.
+  */
+ static inline void threadgroup_lock(struct task_struct *tsk)
+ {
+-	/*
+-	 * exec uses exit for de-threading nesting group_rwsem inside
+-	 * cred_guard_mutex. Grab cred_guard_mutex first.
+-	 */
+-	mutex_lock(&tsk->signal->cred_guard_mutex);
+ 	down_write(&tsk->signal->group_rwsem);
+ }
+ 
+@@ -2499,7 +2490,6 @@ static inline void threadgroup_lock(stru
+ static inline void threadgroup_unlock(struct task_struct *tsk)
+ {
+ 	up_write(&tsk->signal->group_rwsem);
+-	mutex_unlock(&tsk->signal->cred_guard_mutex);
+ }
+ #else
+ static inline void threadgroup_change_begin(struct task_struct *tsk) {}
diff --git a/queue-3.4/include-linux-fs.h-disable-preempt-when-acquire-i_size_seqcount-write-lock.patch b/queue-3.4/include-linux-fs.h-disable-preempt-when-acquire-i_size_seqcount-write-lock.patch
new file mode 100644
index 00000000000..c53f3bd8f25
--- /dev/null
+++ b/queue-3.4/include-linux-fs.h-disable-preempt-when-acquire-i_size_seqcount-write-lock.patch
@@ -0,0 +1,47 @@
+From 74e3d1e17b2e11d175970b85acd44f5927000ba2 Mon Sep 17 00:00:00 2001
+From: Fan Du <fan.du@windriver.com>
+Date: Tue, 30 Apr 2013 15:27:27 -0700
+Subject: include/linux/fs.h: disable preempt when acquire i_size_seqcount write lock
+
+From: Fan Du <fan.du@windriver.com>
+
+commit 74e3d1e17b2e11d175970b85acd44f5927000ba2 upstream.
+
+Two rt tasks bind to one CPU core.
+
+The higher priority rt task A preempts a lower priority rt task B which
+has already taken the write seq lock, and then the higher priority rt
+task A try to acquire read seq lock, it's doomed to lockup.
+
+rt task A with lower priority: call write
+i_size_write                                        rt task B with higher priority: call sync, and preempt task A
+  write_seqcount_begin(&inode->i_size_seqcount);    i_size_read
+  inode->i_size = i_size;                             read_seqcount_begin <-- lockup here...
+
+So disable preempt when acquiring every i_size_seqcount *write* lock will
+cure the problem.
+
+Signed-off-by: Fan Du <fan.du@windriver.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Zhao Hongjiang <zhaohongjiang@huawei.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/linux/fs.h |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/include/linux/fs.h
++++ b/include/linux/fs.h
+@@ -915,9 +915,11 @@ static inline loff_t i_size_read(const s
+ static inline void i_size_write(struct inode *inode, loff_t i_size)
+ {
+ #if BITS_PER_LONG==32 && defined(CONFIG_SMP)
++	preempt_disable();
+ 	write_seqcount_begin(&inode->i_size_seqcount);
+ 	inode->i_size = i_size;
+ 	write_seqcount_end(&inode->i_size_seqcount);
++	preempt_enable();
+ #elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPT)
+ 	preempt_disable();
+ 	inode->i_size = i_size;
diff --git a/queue-3.4/nest-rename_lock-inside-vfsmount_lock.patch b/queue-3.4/nest-rename_lock-inside-vfsmount_lock.patch
new file mode 100644
index 00000000000..362e3010d23
--- /dev/null
+++ b/queue-3.4/nest-rename_lock-inside-vfsmount_lock.patch
@@ -0,0 +1,124 @@
+From 7ea600b5314529f9d1b9d6d3c41cb26fce6a7a4a Mon Sep 17 00:00:00 2001
+From: Al Viro <viro@zeniv.linux.org.uk>
+Date: Tue, 26 Mar 2013 18:25:57 -0400
+Subject: Nest rename_lock inside vfsmount_lock
+
+From: Al Viro <viro@zeniv.linux.org.uk>
+
+commit 7ea600b5314529f9d1b9d6d3c41cb26fce6a7a4a upstream.
+
+... lest we get livelocks between path_is_under() and d_path() and friends.
+
+The thing is, wrt fairness lglocks are more similar to rwsems than to rwlocks;
+it is possible to have thread B spin on attempt to take lock shared while thread
+A is already holding it shared, if B is on lower-numbered CPU than A and there's
+a thread C spinning on attempt to take the same lock exclusive.
+
+As the result, we need consistent ordering between vfsmount_lock (lglock) and
+rename_lock (seq_lock), even though everything that takes both is going to take
+vfsmount_lock only shared.
+
+Spotted-by: Brad Spengler <spender@grsecurity.net>
+Cc: stable@vger.kernel.org
+Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
+[ zhj: backport to 3.4:
+  - Adjust context
+  - s/&vfsmount_lock/vfsmount_lock/]
+Signed-off-by: Zhao Hongjiang <zhaohongjiang@huawei.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/dcache.c |   16 +++++++++++-----
+ 1 file changed, 11 insertions(+), 5 deletions(-)
+
+--- a/fs/dcache.c
++++ b/fs/dcache.c
+@@ -2513,7 +2513,6 @@ static int prepend_path(const struct pat
+ 	bool slash = false;
+ 	int error = 0;
+ 
+-	br_read_lock(vfsmount_lock);
+ 	while (dentry != root->dentry || vfsmnt != root->mnt) {
+ 		struct dentry * parent;
+ 
+@@ -2543,8 +2542,6 @@ static int prepend_path(const struct pat
+ 	if (!error && !slash)
+ 		error = prepend(buffer, buflen, "/", 1);
+ 
+-out:
+-	br_read_unlock(vfsmount_lock);
+ 	return error;
+ 
+ global_root:
+@@ -2561,7 +2558,7 @@ global_root:
+ 		error = prepend(buffer, buflen, "/", 1);
+ 	if (!error)
+ 		error = real_mount(vfsmnt)->mnt_ns ? 1 : 2;
+-	goto out;
++	return error;
+ }
+ 
+ /**
+@@ -2588,9 +2585,11 @@ char *__d_path(const struct path *path,
+ 	int error;
+ 
+ 	prepend(&res, &buflen, "\0", 1);
++	br_read_lock(vfsmount_lock);
+ 	write_seqlock(&rename_lock);
+ 	error = prepend_path(path, root, &res, &buflen);
+ 	write_sequnlock(&rename_lock);
++	br_read_unlock(vfsmount_lock);
+ 
+ 	if (error < 0)
+ 		return ERR_PTR(error);
+@@ -2607,9 +2606,11 @@ char *d_absolute_path(const struct path
+ 	int error;
+ 
+ 	prepend(&res, &buflen, "\0", 1);
++	br_read_lock(vfsmount_lock);
+ 	write_seqlock(&rename_lock);
+ 	error = prepend_path(path, &root, &res, &buflen);
+ 	write_sequnlock(&rename_lock);
++	br_read_unlock(vfsmount_lock);
+ 
+ 	if (error > 1)
+ 		error = -EINVAL;
+@@ -2673,11 +2674,13 @@ char *d_path(const struct path *path, ch
+ 		return path->dentry->d_op->d_dname(path->dentry, buf, buflen);
+ 
+ 	get_fs_root(current->fs, &root);
++	br_read_lock(vfsmount_lock);
+ 	write_seqlock(&rename_lock);
+ 	error = path_with_deleted(path, &root, &res, &buflen);
++	write_sequnlock(&rename_lock);
++	br_read_unlock(vfsmount_lock);
+ 	if (error < 0)
+ 		res = ERR_PTR(error);
+-	write_sequnlock(&rename_lock);
+ 	path_put(&root);
+ 	return res;
+ }
+@@ -2832,6 +2835,7 @@ SYSCALL_DEFINE2(getcwd, char __user *, b
+ 	get_fs_root_and_pwd(current->fs, &root, &pwd);
+ 
+ 	error = -ENOENT;
++	br_read_lock(vfsmount_lock);
+ 	write_seqlock(&rename_lock);
+ 	if (!d_unlinked(pwd.dentry)) {
+ 		unsigned long len;
+@@ -2841,6 +2845,7 @@ SYSCALL_DEFINE2(getcwd, char __user *, b
+ 		prepend(&cwd, &buflen, "\0", 1);
+ 		error = prepend_path(&pwd, &root, &cwd, &buflen);
+ 		write_sequnlock(&rename_lock);
++		br_read_unlock(vfsmount_lock);
+ 
+ 		if (error < 0)
+ 			goto out;
+@@ -2861,6 +2866,7 @@ SYSCALL_DEFINE2(getcwd, char __user *, b
+ 		}
+ 	} else {
+ 		write_sequnlock(&rename_lock);
++		br_read_unlock(vfsmount_lock);
+ 	}
+ 
+ out:
diff --git a/queue-3.4/series b/queue-3.4/series
index 44529cb7f72..9c34137bced 100644
--- a/queue-3.4/series
+++ b/queue-3.4/series
@@ -13,3 +13,6 @@ can-c_can-fix-rx-message-handling-handle-lost-message-before-eob.patch
 fix-a-few-incorrectly-checked-remap_pfn_range-calls.patch
 sunrpc-handle-ekeyexpired-in-call_refreshresult.patch
 sunrpc-don-t-map-ekeyexpired-to-eacces-in-call_refreshresult.patch
+nest-rename_lock-inside-vfsmount_lock.patch
+exec-do-not-abuse-cred_guard_mutex-in-threadgroup_lock.patch
+include-linux-fs.h-disable-preempt-when-acquire-i_size_seqcount-write-lock.patch
-- 
2.47.3