From 294bf07e2c13afad94e63e753b2cc7b5b4eefe35 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 25 Nov 2013 14:50:20 -0800 Subject: [PATCH] 3.4-stable patches added patches: exec-do-not-abuse-cred_guard_mutex-in-threadgroup_lock.patch include-linux-fs.h-disable-preempt-when-acquire-i_size_seqcount-write-lock.patch nest-rename_lock-inside-vfsmount_lock.patch --- ...cred_guard_mutex-in-threadgroup_lock.patch | 109 +++++++++++++++ ...n-acquire-i_size_seqcount-write-lock.patch | 47 +++++++ ...est-rename_lock-inside-vfsmount_lock.patch | 124 ++++++++++++++++++ queue-3.4/series | 3 + 4 files changed, 283 insertions(+) create mode 100644 queue-3.4/exec-do-not-abuse-cred_guard_mutex-in-threadgroup_lock.patch create mode 100644 queue-3.4/include-linux-fs.h-disable-preempt-when-acquire-i_size_seqcount-write-lock.patch create mode 100644 queue-3.4/nest-rename_lock-inside-vfsmount_lock.patch diff --git a/queue-3.4/exec-do-not-abuse-cred_guard_mutex-in-threadgroup_lock.patch b/queue-3.4/exec-do-not-abuse-cred_guard_mutex-in-threadgroup_lock.patch new file mode 100644 index 00000000000..3091e00ec05 --- /dev/null +++ b/queue-3.4/exec-do-not-abuse-cred_guard_mutex-in-threadgroup_lock.patch @@ -0,0 +1,109 @@ +From e56fb2874015370e3b7f8d85051f6dce26051df9 Mon Sep 17 00:00:00 2001 +From: Oleg Nesterov +Date: Tue, 30 Apr 2013 15:28:20 -0700 +Subject: exec: do not abuse ->cred_guard_mutex in threadgroup_lock() + +From: Oleg Nesterov + +commit e56fb2874015370e3b7f8d85051f6dce26051df9 upstream. + +threadgroup_lock() takes signal->cred_guard_mutex to ensure that +thread_group_leader() is stable. This doesn't look nice, the scope of +this lock in do_execve() is huge. + +And as Dave pointed out this can lead to deadlock, we have the +following dependencies: + + do_execve: cred_guard_mutex -> i_mutex + cgroup_mount: i_mutex -> cgroup_mutex + attach_task_by_pid: cgroup_mutex -> cred_guard_mutex + +Change de_thread() to take threadgroup_change_begin() around the +switch-the-leader code and change threadgroup_lock() to avoid +->cred_guard_mutex. + +Note that de_thread() can't sleep with ->group_rwsem held, this can +obviously deadlock with the exiting leader if the writer is active, so it +does threadgroup_change_end() before schedule(). + +Reported-by: Dave Jones +Acked-by: Tejun Heo +Acked-by: Li Zefan +Signed-off-by: Oleg Nesterov +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +[ zhj: adjust context ] +Signed-off-by: Zhao Hongjiang +Signed-off-by: Greg Kroah-Hartman +--- + fs/exec.c | 3 +++ + include/linux/sched.h | 18 ++++-------------- + 2 files changed, 7 insertions(+), 14 deletions(-) + +--- a/fs/exec.c ++++ b/fs/exec.c +@@ -909,11 +909,13 @@ static int de_thread(struct task_struct + + sig->notify_count = -1; /* for exit_notify() */ + for (;;) { ++ threadgroup_change_begin(tsk); + write_lock_irq(&tasklist_lock); + if (likely(leader->exit_state)) + break; + __set_current_state(TASK_UNINTERRUPTIBLE); + write_unlock_irq(&tasklist_lock); ++ threadgroup_change_end(tsk); + schedule(); + } + +@@ -969,6 +971,7 @@ static int de_thread(struct task_struct + if (unlikely(leader->ptrace)) + __wake_up_parent(leader, leader->parent); + write_unlock_irq(&tasklist_lock); ++ threadgroup_change_end(tsk); + + release_task(leader); + } +--- a/include/linux/sched.h ++++ b/include/linux/sched.h +@@ -2466,27 +2466,18 @@ static inline void threadgroup_change_en + * + * Lock the threadgroup @tsk belongs to. No new task is allowed to enter + * and member tasks aren't allowed to exit (as indicated by PF_EXITING) or +- * perform exec. This is useful for cases where the threadgroup needs to +- * stay stable across blockable operations. ++ * change ->group_leader/pid. This is useful for cases where the threadgroup ++ * needs to stay stable across blockable operations. + * + * fork and exit paths explicitly call threadgroup_change_{begin|end}() for + * synchronization. While held, no new task will be added to threadgroup + * and no existing live task will have its PF_EXITING set. + * +- * During exec, a task goes and puts its thread group through unusual +- * changes. After de-threading, exclusive access is assumed to resources +- * which are usually shared by tasks in the same group - e.g. sighand may +- * be replaced with a new one. Also, the exec'ing task takes over group +- * leader role including its pid. Exclude these changes while locked by +- * grabbing cred_guard_mutex which is used to synchronize exec path. ++ * de_thread() does threadgroup_change_{begin|end}() when a non-leader ++ * sub-thread becomes a new leader. + */ + static inline void threadgroup_lock(struct task_struct *tsk) + { +- /* +- * exec uses exit for de-threading nesting group_rwsem inside +- * cred_guard_mutex. Grab cred_guard_mutex first. +- */ +- mutex_lock(&tsk->signal->cred_guard_mutex); + down_write(&tsk->signal->group_rwsem); + } + +@@ -2499,7 +2490,6 @@ static inline void threadgroup_lock(stru + static inline void threadgroup_unlock(struct task_struct *tsk) + { + up_write(&tsk->signal->group_rwsem); +- mutex_unlock(&tsk->signal->cred_guard_mutex); + } + #else + static inline void threadgroup_change_begin(struct task_struct *tsk) {} diff --git a/queue-3.4/include-linux-fs.h-disable-preempt-when-acquire-i_size_seqcount-write-lock.patch b/queue-3.4/include-linux-fs.h-disable-preempt-when-acquire-i_size_seqcount-write-lock.patch new file mode 100644 index 00000000000..c53f3bd8f25 --- /dev/null +++ b/queue-3.4/include-linux-fs.h-disable-preempt-when-acquire-i_size_seqcount-write-lock.patch @@ -0,0 +1,47 @@ +From 74e3d1e17b2e11d175970b85acd44f5927000ba2 Mon Sep 17 00:00:00 2001 +From: Fan Du +Date: Tue, 30 Apr 2013 15:27:27 -0700 +Subject: include/linux/fs.h: disable preempt when acquire i_size_seqcount write lock + +From: Fan Du + +commit 74e3d1e17b2e11d175970b85acd44f5927000ba2 upstream. + +Two rt tasks bind to one CPU core. + +The higher priority rt task A preempts a lower priority rt task B which +has already taken the write seq lock, and then the higher priority rt +task A try to acquire read seq lock, it's doomed to lockup. + +rt task A with lower priority: call write +i_size_write rt task B with higher priority: call sync, and preempt task A + write_seqcount_begin(&inode->i_size_seqcount); i_size_read + inode->i_size = i_size; read_seqcount_begin <-- lockup here... + +So disable preempt when acquiring every i_size_seqcount *write* lock will +cure the problem. + +Signed-off-by: Fan Du +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Cc: Zhao Hongjiang +Signed-off-by: Greg Kroah-Hartman + +--- + include/linux/fs.h | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/include/linux/fs.h ++++ b/include/linux/fs.h +@@ -915,9 +915,11 @@ static inline loff_t i_size_read(const s + static inline void i_size_write(struct inode *inode, loff_t i_size) + { + #if BITS_PER_LONG==32 && defined(CONFIG_SMP) ++ preempt_disable(); + write_seqcount_begin(&inode->i_size_seqcount); + inode->i_size = i_size; + write_seqcount_end(&inode->i_size_seqcount); ++ preempt_enable(); + #elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPT) + preempt_disable(); + inode->i_size = i_size; diff --git a/queue-3.4/nest-rename_lock-inside-vfsmount_lock.patch b/queue-3.4/nest-rename_lock-inside-vfsmount_lock.patch new file mode 100644 index 00000000000..362e3010d23 --- /dev/null +++ b/queue-3.4/nest-rename_lock-inside-vfsmount_lock.patch @@ -0,0 +1,124 @@ +From 7ea600b5314529f9d1b9d6d3c41cb26fce6a7a4a Mon Sep 17 00:00:00 2001 +From: Al Viro +Date: Tue, 26 Mar 2013 18:25:57 -0400 +Subject: Nest rename_lock inside vfsmount_lock + +From: Al Viro + +commit 7ea600b5314529f9d1b9d6d3c41cb26fce6a7a4a upstream. + +... lest we get livelocks between path_is_under() and d_path() and friends. + +The thing is, wrt fairness lglocks are more similar to rwsems than to rwlocks; +it is possible to have thread B spin on attempt to take lock shared while thread +A is already holding it shared, if B is on lower-numbered CPU than A and there's +a thread C spinning on attempt to take the same lock exclusive. + +As the result, we need consistent ordering between vfsmount_lock (lglock) and +rename_lock (seq_lock), even though everything that takes both is going to take +vfsmount_lock only shared. + +Spotted-by: Brad Spengler +Cc: stable@vger.kernel.org +Signed-off-by: Al Viro +[ zhj: backport to 3.4: + - Adjust context + - s/&vfsmount_lock/vfsmount_lock/] +Signed-off-by: Zhao Hongjiang +Signed-off-by: Greg Kroah-Hartman + +--- + fs/dcache.c | 16 +++++++++++----- + 1 file changed, 11 insertions(+), 5 deletions(-) + +--- a/fs/dcache.c ++++ b/fs/dcache.c +@@ -2513,7 +2513,6 @@ static int prepend_path(const struct pat + bool slash = false; + int error = 0; + +- br_read_lock(vfsmount_lock); + while (dentry != root->dentry || vfsmnt != root->mnt) { + struct dentry * parent; + +@@ -2543,8 +2542,6 @@ static int prepend_path(const struct pat + if (!error && !slash) + error = prepend(buffer, buflen, "/", 1); + +-out: +- br_read_unlock(vfsmount_lock); + return error; + + global_root: +@@ -2561,7 +2558,7 @@ global_root: + error = prepend(buffer, buflen, "/", 1); + if (!error) + error = real_mount(vfsmnt)->mnt_ns ? 1 : 2; +- goto out; ++ return error; + } + + /** +@@ -2588,9 +2585,11 @@ char *__d_path(const struct path *path, + int error; + + prepend(&res, &buflen, "\0", 1); ++ br_read_lock(vfsmount_lock); + write_seqlock(&rename_lock); + error = prepend_path(path, root, &res, &buflen); + write_sequnlock(&rename_lock); ++ br_read_unlock(vfsmount_lock); + + if (error < 0) + return ERR_PTR(error); +@@ -2607,9 +2606,11 @@ char *d_absolute_path(const struct path + int error; + + prepend(&res, &buflen, "\0", 1); ++ br_read_lock(vfsmount_lock); + write_seqlock(&rename_lock); + error = prepend_path(path, &root, &res, &buflen); + write_sequnlock(&rename_lock); ++ br_read_unlock(vfsmount_lock); + + if (error > 1) + error = -EINVAL; +@@ -2673,11 +2674,13 @@ char *d_path(const struct path *path, ch + return path->dentry->d_op->d_dname(path->dentry, buf, buflen); + + get_fs_root(current->fs, &root); ++ br_read_lock(vfsmount_lock); + write_seqlock(&rename_lock); + error = path_with_deleted(path, &root, &res, &buflen); ++ write_sequnlock(&rename_lock); ++ br_read_unlock(vfsmount_lock); + if (error < 0) + res = ERR_PTR(error); +- write_sequnlock(&rename_lock); + path_put(&root); + return res; + } +@@ -2832,6 +2835,7 @@ SYSCALL_DEFINE2(getcwd, char __user *, b + get_fs_root_and_pwd(current->fs, &root, &pwd); + + error = -ENOENT; ++ br_read_lock(vfsmount_lock); + write_seqlock(&rename_lock); + if (!d_unlinked(pwd.dentry)) { + unsigned long len; +@@ -2841,6 +2845,7 @@ SYSCALL_DEFINE2(getcwd, char __user *, b + prepend(&cwd, &buflen, "\0", 1); + error = prepend_path(&pwd, &root, &cwd, &buflen); + write_sequnlock(&rename_lock); ++ br_read_unlock(vfsmount_lock); + + if (error < 0) + goto out; +@@ -2861,6 +2866,7 @@ SYSCALL_DEFINE2(getcwd, char __user *, b + } + } else { + write_sequnlock(&rename_lock); ++ br_read_unlock(vfsmount_lock); + } + + out: diff --git a/queue-3.4/series b/queue-3.4/series index 44529cb7f72..9c34137bced 100644 --- a/queue-3.4/series +++ b/queue-3.4/series @@ -13,3 +13,6 @@ can-c_can-fix-rx-message-handling-handle-lost-message-before-eob.patch fix-a-few-incorrectly-checked-remap_pfn_range-calls.patch sunrpc-handle-ekeyexpired-in-call_refreshresult.patch sunrpc-don-t-map-ekeyexpired-to-eacces-in-call_refreshresult.patch +nest-rename_lock-inside-vfsmount_lock.patch +exec-do-not-abuse-cred_guard_mutex-in-threadgroup_lock.patch +include-linux-fs.h-disable-preempt-when-acquire-i_size_seqcount-write-lock.patch -- 2.47.3