From: Greg Kroah-Hartman Date: Mon, 4 May 2009 23:53:45 +0000 (-0700) Subject: more .29 patches added X-Git-Tag: v2.6.27.23~8 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=62a3ba13b2d9939ab3dc67acc2124ceef8b249c7;p=thirdparty%2Fkernel%2Fstable-queue.git more .29 patches added --- diff --git a/queue-2.6.29/annotate-struct-fs_struct-s-usage-count-restriction.patch b/queue-2.6.29/annotate-struct-fs_struct-s-usage-count-restriction.patch new file mode 100644 index 00000000000..5d3c4b5cef0 --- /dev/null +++ b/queue-2.6.29/annotate-struct-fs_struct-s-usage-count-restriction.patch @@ -0,0 +1,37 @@ +From 795e2fe0a3b69dbc040d7efcf517e0cbad6901d0 Mon Sep 17 00:00:00 2001 +From: David Howells +Date: Sat, 28 Mar 2009 23:23:01 +0000 +Subject: Annotate struct fs_struct's usage count restriction + +From: David Howells + +commit 795e2fe0a3b69dbc040d7efcf517e0cbad6901d0 upstream. + +Annotate struct fs_struct's usage count to indicate the restrictions upon it. +It may not be incremented, except by clone(CLONE_FS), as this affects the +check in check_unsafe_exec() in fs/exec.c. + +Signed-off-by: David Howells +Signed-off-by: Hugh Dickins +Cc: stable@kernel.org +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + include/linux/fs_struct.h | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +--- a/include/linux/fs_struct.h ++++ b/include/linux/fs_struct.h +@@ -4,7 +4,10 @@ + #include + + struct fs_struct { +- atomic_t count; ++ atomic_t count; /* This usage count is used by check_unsafe_exec() for ++ * security checking purposes - therefore it may not be ++ * incremented, except by clone(CLONE_FS). ++ */ + rwlock_t lock; + int umask; + struct path root, pwd; diff --git a/queue-2.6.29/check_unsafe_exec-doesn-t-care-about-signal-handlers-sharing.patch b/queue-2.6.29/check_unsafe_exec-doesn-t-care-about-signal-handlers-sharing.patch new file mode 100644 index 00000000000..29ad81997e7 --- /dev/null +++ b/queue-2.6.29/check_unsafe_exec-doesn-t-care-about-signal-handlers-sharing.patch @@ -0,0 +1,46 @@ +From f1191b50ec11c8e2ca766d6d99eb5bb9d2c084a3 Mon Sep 17 00:00:00 2001 +From: Al Viro +Date: Mon, 30 Mar 2009 07:35:18 -0400 +Subject: check_unsafe_exec() doesn't care about signal handlers sharing + +From: Al Viro + +commit f1191b50ec11c8e2ca766d6d99eb5bb9d2c084a3 upstream. + +... since we'll unshare sighand anyway + +Signed-off-by: Al Viro +Signed-off-by: Greg Kroah-Hartman + +--- + fs/exec.c | 7 ++----- + 1 file changed, 2 insertions(+), 5 deletions(-) + +--- a/fs/exec.c ++++ b/fs/exec.c +@@ -1053,23 +1053,20 @@ int check_unsafe_exec(struct linux_binpr + { + struct task_struct *p = current, *t; + unsigned long flags; +- unsigned n_fs, n_sighand; ++ unsigned n_fs; + int res = 0; + + bprm->unsafe = tracehook_unsafe_exec(p); + + n_fs = 1; +- n_sighand = 1; + write_lock(&p->fs->lock); + lock_task_sighand(p, &flags); + for (t = next_thread(p); t != p; t = next_thread(t)) { + if (t->fs == p->fs) + n_fs++; +- n_sighand++; + } + +- if (p->fs->users > n_fs || +- atomic_read(&p->sighand->count) > n_sighand) { ++ if (p->fs->users > n_fs) { + bprm->unsafe |= LSM_UNSAFE_SHARE; + } else { + if (p->fs->in_exec) diff --git a/queue-2.6.29/check_unsafe_exec-s-lock_task_sighand-rcu_read_lock.patch b/queue-2.6.29/check_unsafe_exec-s-lock_task_sighand-rcu_read_lock.patch new file mode 100644 index 00000000000..62ff83ce644 --- /dev/null +++ b/queue-2.6.29/check_unsafe_exec-s-lock_task_sighand-rcu_read_lock.patch @@ -0,0 +1,62 @@ +From 437f7fdb607f32b737e4da9f14bebcfdac2c90c3 Mon Sep 17 00:00:00 2001 +From: Oleg Nesterov +Date: Fri, 24 Apr 2009 01:02:45 +0200 +Subject: check_unsafe_exec: s/lock_task_sighand/rcu_read_lock/ + +From: Oleg Nesterov + +commit 437f7fdb607f32b737e4da9f14bebcfdac2c90c3 upstream. + +write_lock(¤t->fs->lock) guarantees we can't wrongly miss +LSM_UNSAFE_SHARE, this is what we care about. Use rcu_read_lock() +instead of ->siglock to iterate over the sub-threads. We must see +all CLONE_THREAD|CLONE_FS threads which didn't pass exit_fs(), it +takes fs->lock too. + +With or without this patch we can miss the freshly cloned thread +and set LSM_UNSAFE_SHARE, we don't care. + +Signed-off-by: Oleg Nesterov +Acked-by: Roland McGrath +[ Fixed lock/unlock typo - Hugh ] +Acked-by: Hugh Dickins +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + fs/exec.c | 6 ++---- + 1 file changed, 2 insertions(+), 4 deletions(-) + +--- a/fs/exec.c ++++ b/fs/exec.c +@@ -1052,7 +1052,6 @@ EXPORT_SYMBOL(install_exec_creds); + int check_unsafe_exec(struct linux_binprm *bprm) + { + struct task_struct *p = current, *t; +- unsigned long flags; + unsigned n_fs; + int res = 0; + +@@ -1060,11 +1059,12 @@ int check_unsafe_exec(struct linux_binpr + + n_fs = 1; + write_lock(&p->fs->lock); +- lock_task_sighand(p, &flags); ++ rcu_read_lock(); + for (t = next_thread(p); t != p; t = next_thread(t)) { + if (t->fs == p->fs) + n_fs++; + } ++ rcu_read_unlock(); + + if (p->fs->users > n_fs) { + bprm->unsafe |= LSM_UNSAFE_SHARE; +@@ -1075,8 +1075,6 @@ int check_unsafe_exec(struct linux_binpr + res = 1; + } + } +- +- unlock_task_sighand(p, &flags); + write_unlock(&p->fs->lock); + + return res; diff --git a/queue-2.6.29/compat_do_execve-should-unshare_files.patch b/queue-2.6.29/compat_do_execve-should-unshare_files.patch new file mode 100644 index 00000000000..e88a45d3c4e --- /dev/null +++ b/queue-2.6.29/compat_do_execve-should-unshare_files.patch @@ -0,0 +1,70 @@ +From 53e9309e01277ec99c38e84e0ca16921287cf470 Mon Sep 17 00:00:00 2001 +From: Hugh Dickins +Date: Sat, 28 Mar 2009 23:16:03 +0000 +Subject: compat_do_execve should unshare_files + +From: Hugh Dickins + +commit 53e9309e01277ec99c38e84e0ca16921287cf470 upstream. + +2.6.26's commit fd8328be874f4190a811c58cd4778ec2c74d2c05 +"sanitize handling of shared descriptor tables in failing execve()" +moved the unshare_files() from flush_old_exec() and several binfmts +to the head of do_execve(); but forgot to make the same change to +compat_do_execve(), leaving a CLONE_FILES files_struct shared across +exec from a 32-bit process on a 64-bit kernel. + +It's arguable whether the files_struct really ought to be unshared +across exec; but 2.6.1 made that so to stop the loading binary's fd +leaking into other threads, and a 32-bit process on a 64-bit kernel +ought to behave in the same way as 32 on 32 and 64 on 64. + +Signed-off-by: Hugh Dickins +Cc: stable@kernel.org +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + fs/compat.c | 12 +++++++++++- + 1 file changed, 11 insertions(+), 1 deletion(-) + +--- a/fs/compat.c ++++ b/fs/compat.c +@@ -1392,12 +1392,17 @@ int compat_do_execve(char * filename, + { + struct linux_binprm *bprm; + struct file *file; ++ struct files_struct *displaced; + int retval; + ++ retval = unshare_files(&displaced); ++ if (retval) ++ goto out_ret; ++ + retval = -ENOMEM; + bprm = kzalloc(sizeof(*bprm), GFP_KERNEL); + if (!bprm) +- goto out_ret; ++ goto out_files; + + retval = mutex_lock_interruptible(¤t->cred_exec_mutex); + if (retval < 0) +@@ -1457,6 +1462,8 @@ int compat_do_execve(char * filename, + mutex_unlock(¤t->cred_exec_mutex); + acct_update_integrals(current); + free_bprm(bprm); ++ if (displaced) ++ put_files_struct(displaced); + return retval; + + out: +@@ -1475,6 +1482,9 @@ out_unlock: + out_free: + free_bprm(bprm); + ++out_files: ++ if (displaced) ++ reset_files_struct(displaced); + out_ret: + return retval; + } diff --git a/queue-2.6.29/do_execve-must-not-clear-fs-in_exec-if-it-was-set-by-another-thread.patch b/queue-2.6.29/do_execve-must-not-clear-fs-in_exec-if-it-was-set-by-another-thread.patch new file mode 100644 index 00000000000..034918b226e --- /dev/null +++ b/queue-2.6.29/do_execve-must-not-clear-fs-in_exec-if-it-was-set-by-another-thread.patch @@ -0,0 +1,154 @@ +From 8c652f96d3852b97a49c331cd0bb02d22f3cb31b Mon Sep 17 00:00:00 2001 +From: Oleg Nesterov +Date: Fri, 24 Apr 2009 01:01:56 +0200 +Subject: do_execve() must not clear fs->in_exec if it was set by another thread + +From: Oleg Nesterov + +commit 8c652f96d3852b97a49c331cd0bb02d22f3cb31b upstream. + +If do_execve() fails after check_unsafe_exec(), it clears fs->in_exec +unconditionally. This is wrong if we race with our sub-thread which +also does do_execve: + + Two threads T1 and T2 and another process P, all share the same + ->fs. + + T1 starts do_execve(BAD_FILE). It calls check_unsafe_exec(), since + ->fs is shared, we set LSM_UNSAFE but not ->in_exec. + + P exits and decrements fs->users. + + T2 starts do_execve(), calls check_unsafe_exec(), now ->fs is not + shared, we set fs->in_exec. + + T1 continues, open_exec(BAD_FILE) fails, we clear ->in_exec and + return to the user-space. + + T1 does clone(CLONE_FS /* without CLONE_THREAD */). + + T2 continues without LSM_UNSAFE_SHARE while ->fs is shared with + another process. + +Change check_unsafe_exec() to return res = 1 if we set ->in_exec, and change +do_execve() to clear ->in_exec depending on res. + +When do_execve() suceeds, it is safe to clear ->in_exec unconditionally. +It can be set only if we don't share ->fs with another process, and since +we already killed all sub-threads either ->in_exec == 0 or we are the +only user of this ->fs. + +Also, we do not need fs->lock to clear fs->in_exec. + +Signed-off-by: Oleg Nesterov +Acked-by: Roland McGrath +Acked-by: Hugh Dickins +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + fs/compat.c | 11 +++++------ + fs/exec.c | 19 ++++++++++--------- + 2 files changed, 15 insertions(+), 15 deletions(-) + +--- a/fs/compat.c ++++ b/fs/compat.c +@@ -1394,6 +1394,7 @@ int compat_do_execve(char * filename, + struct linux_binprm *bprm; + struct file *file; + struct files_struct *displaced; ++ bool clear_in_exec; + int retval; + + retval = unshare_files(&displaced); +@@ -1415,8 +1416,9 @@ int compat_do_execve(char * filename, + goto out_unlock; + + retval = check_unsafe_exec(bprm); +- if (retval) ++ if (retval < 0) + goto out_unlock; ++ clear_in_exec = retval; + + file = open_exec(filename); + retval = PTR_ERR(file); +@@ -1463,9 +1465,7 @@ int compat_do_execve(char * filename, + goto out; + + /* execve succeeded */ +- write_lock(¤t->fs->lock); + current->fs->in_exec = 0; +- write_unlock(¤t->fs->lock); + mutex_unlock(¤t->cred_exec_mutex); + acct_update_integrals(current); + free_bprm(bprm); +@@ -1484,9 +1484,8 @@ out_file: + } + + out_unmark: +- write_lock(¤t->fs->lock); +- current->fs->in_exec = 0; +- write_unlock(¤t->fs->lock); ++ if (clear_in_exec) ++ current->fs->in_exec = 0; + + out_unlock: + mutex_unlock(¤t->cred_exec_mutex); +--- a/fs/exec.c ++++ b/fs/exec.c +@@ -1069,9 +1069,11 @@ int check_unsafe_exec(struct linux_binpr + if (p->fs->users > n_fs) { + bprm->unsafe |= LSM_UNSAFE_SHARE; + } else { +- if (p->fs->in_exec) +- res = -EAGAIN; +- p->fs->in_exec = 1; ++ res = -EAGAIN; ++ if (!p->fs->in_exec) { ++ p->fs->in_exec = 1; ++ res = 1; ++ } + } + + unlock_task_sighand(p, &flags); +@@ -1273,6 +1275,7 @@ int do_execve(char * filename, + struct linux_binprm *bprm; + struct file *file; + struct files_struct *displaced; ++ bool clear_in_exec; + int retval; + + retval = unshare_files(&displaced); +@@ -1294,8 +1297,9 @@ int do_execve(char * filename, + goto out_unlock; + + retval = check_unsafe_exec(bprm); +- if (retval) ++ if (retval < 0) + goto out_unlock; ++ clear_in_exec = retval; + + file = open_exec(filename); + retval = PTR_ERR(file); +@@ -1343,9 +1347,7 @@ int do_execve(char * filename, + goto out; + + /* execve succeeded */ +- write_lock(¤t->fs->lock); + current->fs->in_exec = 0; +- write_unlock(¤t->fs->lock); + mutex_unlock(¤t->cred_exec_mutex); + acct_update_integrals(current); + free_bprm(bprm); +@@ -1364,9 +1366,8 @@ out_file: + } + + out_unmark: +- write_lock(¤t->fs->lock); +- current->fs->in_exec = 0; +- write_unlock(¤t->fs->lock); ++ if (clear_in_exec) ++ current->fs->in_exec = 0; + + out_unlock: + mutex_unlock(¤t->cred_exec_mutex); diff --git a/queue-2.6.29/fix-setuid-sometimes-doesn-t.patch b/queue-2.6.29/fix-setuid-sometimes-doesn-t.patch new file mode 100644 index 00000000000..d12f40b2b8c --- /dev/null +++ b/queue-2.6.29/fix-setuid-sometimes-doesn-t.patch @@ -0,0 +1,104 @@ +From e426b64c412aaa3e9eb3e4b261dc5be0d5a83e78 Mon Sep 17 00:00:00 2001 +From: Hugh Dickins +Date: Sat, 28 Mar 2009 23:20:19 +0000 +Subject: fix setuid sometimes doesn't + +From: Hugh Dickins + +commit e426b64c412aaa3e9eb3e4b261dc5be0d5a83e78 upstream. + +Joe Malicki reports that setuid sometimes doesn't: very rarely, +a setuid root program does not get root euid; and, by the way, +they have a health check running lsof every few minutes. + +Right, check_unsafe_exec() notes whether the files_struct is being +shared by more threads than will get killed by the exec, and if so +sets LSM_UNSAFE_SHARE to make bprm_set_creds() careful about euid. +But /proc//fd and /proc//fdinfo lookups make transient +use of get_files_struct(), which also raises that sharing count. + +There's a rather simple fix for this: exec's check on files->count +has been redundant ever since 2.6.1 made it unshare_files() (except +while compat_do_execve() omitted to do so) - just remove that check. + +[Note to -stable: this patch will not apply before 2.6.29: earlier +releases should just remove the files->count line from unsafe_exec().] + +Reported-by: Joe Malicki +Narrowed-down-by: Michael Itz +Tested-by: Joe Malicki +Signed-off-by: Hugh Dickins +Cc: stable@kernel.org +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + fs/compat.c | 2 +- + fs/exec.c | 10 +++------- + fs/internal.h | 2 +- + 3 files changed, 5 insertions(+), 9 deletions(-) + +--- a/fs/compat.c ++++ b/fs/compat.c +@@ -1412,7 +1412,7 @@ int compat_do_execve(char * filename, + bprm->cred = prepare_exec_creds(); + if (!bprm->cred) + goto out_unlock; +- check_unsafe_exec(bprm, current->files); ++ check_unsafe_exec(bprm); + + file = open_exec(filename); + retval = PTR_ERR(file); +--- a/fs/exec.c ++++ b/fs/exec.c +@@ -1049,28 +1049,24 @@ EXPORT_SYMBOL(install_exec_creds); + * - the caller must hold current->cred_exec_mutex to protect against + * PTRACE_ATTACH + */ +-void check_unsafe_exec(struct linux_binprm *bprm, struct files_struct *files) ++void check_unsafe_exec(struct linux_binprm *bprm) + { + struct task_struct *p = current, *t; + unsigned long flags; +- unsigned n_fs, n_files, n_sighand; ++ unsigned n_fs, n_sighand; + + bprm->unsafe = tracehook_unsafe_exec(p); + + n_fs = 1; +- n_files = 1; + n_sighand = 1; + lock_task_sighand(p, &flags); + for (t = next_thread(p); t != p; t = next_thread(t)) { + if (t->fs == p->fs) + n_fs++; +- if (t->files == files) +- n_files++; + n_sighand++; + } + + if (atomic_read(&p->fs->count) > n_fs || +- atomic_read(&p->files->count) > n_files || + atomic_read(&p->sighand->count) > n_sighand) + bprm->unsafe |= LSM_UNSAFE_SHARE; + +@@ -1289,7 +1285,7 @@ int do_execve(char * filename, + bprm->cred = prepare_exec_creds(); + if (!bprm->cred) + goto out_unlock; +- check_unsafe_exec(bprm, displaced); ++ check_unsafe_exec(bprm); + + file = open_exec(filename); + retval = PTR_ERR(file); +--- a/fs/internal.h ++++ b/fs/internal.h +@@ -43,7 +43,7 @@ extern void __init chrdev_init(void); + /* + * exec.c + */ +-extern void check_unsafe_exec(struct linux_binprm *, struct files_struct *); ++extern void check_unsafe_exec(struct linux_binprm *); + + /* + * namespace.c diff --git a/queue-2.6.29/fix-setuid-sometimes-wouldn-t.patch b/queue-2.6.29/fix-setuid-sometimes-wouldn-t.patch new file mode 100644 index 00000000000..d67220a1dcf --- /dev/null +++ b/queue-2.6.29/fix-setuid-sometimes-wouldn-t.patch @@ -0,0 +1,139 @@ +From 7c2c7d993044cddc5010f6f429b100c63bc7dffb Mon Sep 17 00:00:00 2001 +From: Hugh Dickins +Date: Sat, 28 Mar 2009 23:21:27 +0000 +Subject: fix setuid sometimes wouldn't + +From: Hugh Dickins + +commit 7c2c7d993044cddc5010f6f429b100c63bc7dffb upstream. + +check_unsafe_exec() also notes whether the fs_struct is being +shared by more threads than will get killed by the exec, and if so +sets LSM_UNSAFE_SHARE to make bprm_set_creds() careful about euid. +But /proc//cwd and /proc//root lookups make transient +use of get_fs_struct(), which also raises that sharing count. + +This might occasionally cause a setuid program not to change euid, +in the same way as happened with files->count (check_unsafe_exec +also looks at sighand->count, but /proc doesn't raise that one). + +We'd prefer exec not to unshare fs_struct: so fix this in procfs, +replacing get_fs_struct() by get_fs_path(), which does path_get +while still holding task_lock, instead of raising fs->count. + +Signed-off-by: Hugh Dickins +Cc: stable@kernel.org +Signed-off-by: Greg Kroah-Hartman + +--- + fs/proc/base.c | 50 ++++++++++++++++---------------------------------- + 1 file changed, 16 insertions(+), 34 deletions(-) + +--- a/fs/proc/base.c ++++ b/fs/proc/base.c +@@ -146,15 +146,22 @@ static unsigned int pid_entry_count_dirs + return count; + } + +-static struct fs_struct *get_fs_struct(struct task_struct *task) ++static int get_fs_path(struct task_struct *task, struct path *path, bool root) + { + struct fs_struct *fs; ++ int result = -ENOENT; ++ + task_lock(task); + fs = task->fs; +- if(fs) +- atomic_inc(&fs->count); ++ if (fs) { ++ read_lock(&fs->lock); ++ *path = root ? fs->root : fs->pwd; ++ path_get(path); ++ read_unlock(&fs->lock); ++ result = 0; ++ } + task_unlock(task); +- return fs; ++ return result; + } + + static int get_nr_threads(struct task_struct *tsk) +@@ -172,42 +179,24 @@ static int get_nr_threads(struct task_st + static int proc_cwd_link(struct inode *inode, struct path *path) + { + struct task_struct *task = get_proc_task(inode); +- struct fs_struct *fs = NULL; + int result = -ENOENT; + + if (task) { +- fs = get_fs_struct(task); ++ result = get_fs_path(task, path, 0); + put_task_struct(task); + } +- if (fs) { +- read_lock(&fs->lock); +- *path = fs->pwd; +- path_get(&fs->pwd); +- read_unlock(&fs->lock); +- result = 0; +- put_fs_struct(fs); +- } + return result; + } + + static int proc_root_link(struct inode *inode, struct path *path) + { + struct task_struct *task = get_proc_task(inode); +- struct fs_struct *fs = NULL; + int result = -ENOENT; + + if (task) { +- fs = get_fs_struct(task); ++ result = get_fs_path(task, path, 1); + put_task_struct(task); + } +- if (fs) { +- read_lock(&fs->lock); +- *path = fs->root; +- path_get(&fs->root); +- read_unlock(&fs->lock); +- result = 0; +- put_fs_struct(fs); +- } + return result; + } + +@@ -596,7 +585,6 @@ static int mounts_open_common(struct ino + struct task_struct *task = get_proc_task(inode); + struct nsproxy *nsp; + struct mnt_namespace *ns = NULL; +- struct fs_struct *fs = NULL; + struct path root; + struct proc_mounts *p; + int ret = -EINVAL; +@@ -610,22 +598,16 @@ static int mounts_open_common(struct ino + get_mnt_ns(ns); + } + rcu_read_unlock(); +- if (ns) +- fs = get_fs_struct(task); ++ if (ns && get_fs_path(task, &root, 1) == 0) ++ ret = 0; + put_task_struct(task); + } + + if (!ns) + goto err; +- if (!fs) ++ if (ret) + goto err_put_ns; + +- read_lock(&fs->lock); +- root = fs->root; +- path_get(&root); +- read_unlock(&fs->lock); +- put_fs_struct(fs); +- + ret = -ENOMEM; + p = kmalloc(sizeof(struct proc_mounts), GFP_KERNEL); + if (!p) diff --git a/queue-2.6.29/get-rid-of-bumping-fs_struct-refcount-in-pivot_root.patch b/queue-2.6.29/get-rid-of-bumping-fs_struct-refcount-in-pivot_root.patch new file mode 100644 index 00000000000..4203ac15e9b --- /dev/null +++ b/queue-2.6.29/get-rid-of-bumping-fs_struct-refcount-in-pivot_root.patch @@ -0,0 +1,65 @@ +From f8ef3ed2bebd2c4cb9ece92efa185d7aead8831a Mon Sep 17 00:00:00 2001 +From: Al Viro +Date: Mon, 30 Mar 2009 20:36:33 -0400 +Subject: Get rid of bumping fs_struct refcount in pivot_root(2) + +From: Al Viro + +commit f8ef3ed2bebd2c4cb9ece92efa185d7aead8831a upstream. + +Not because execve races with _that_ are serious - we really +need a situation when final drop of fs_struct refcount is +done by something that used to have it as current->fs. + +Signed-off-by: Al Viro +Signed-off-by: Greg Kroah-Hartman + +--- + fs/namespace.c | 26 +++++++++++++++++--------- + 1 file changed, 17 insertions(+), 9 deletions(-) + +--- a/fs/namespace.c ++++ b/fs/namespace.c +@@ -2127,25 +2127,33 @@ static void chroot_fs_refs(struct path * + { + struct task_struct *g, *p; + struct fs_struct *fs; ++ int count = 0; + + read_lock(&tasklist_lock); + do_each_thread(g, p) { + task_lock(p); + fs = p->fs; + if (fs) { +- atomic_inc(&fs->count); +- task_unlock(p); ++ write_lock(&fs->lock); + if (fs->root.dentry == old_root->dentry +- && fs->root.mnt == old_root->mnt) +- set_fs_root(fs, new_root); ++ && fs->root.mnt == old_root->mnt) { ++ path_get(new_root); ++ fs->root = *new_root; ++ count++; ++ } + if (fs->pwd.dentry == old_root->dentry +- && fs->pwd.mnt == old_root->mnt) +- set_fs_pwd(fs, new_root); +- put_fs_struct(fs); +- } else +- task_unlock(p); ++ && fs->pwd.mnt == old_root->mnt) { ++ path_get(new_root); ++ fs->pwd = *new_root; ++ count++; ++ } ++ write_unlock(&fs->lock); ++ } ++ task_unlock(p); + } while_each_thread(g, p); + read_unlock(&tasklist_lock); ++ while (count--) ++ path_put(old_root); + } + + /* diff --git a/queue-2.6.29/kill-unsharing-fs_struct-in-__set_personality.patch b/queue-2.6.29/kill-unsharing-fs_struct-in-__set_personality.patch new file mode 100644 index 00000000000..bc24c69588a --- /dev/null +++ b/queue-2.6.29/kill-unsharing-fs_struct-in-__set_personality.patch @@ -0,0 +1,46 @@ +From 11d06b2a1e5658f448a308aa3beb97bacd64a940 Mon Sep 17 00:00:00 2001 +From: Al Viro +Date: Mon, 30 Mar 2009 05:45:36 -0400 +Subject: Kill unsharing fs_struct in __set_personality() + +From: Al Viro + +commit 11d06b2a1e5658f448a308aa3beb97bacd64a940 upstream. + +That's a rudiment of altroot support. I.e. it should've been buried +a long time ago. + +Signed-off-by: Al Viro +Signed-off-by: Greg Kroah-Hartman + +--- a/kernel/exec_domain.c ++++ b/kernel/exec_domain.c +@@ -145,28 +145,6 @@ __set_personality(u_long personality) + return 0; + } + +- if (atomic_read(¤t->fs->count) != 1) { +- struct fs_struct *fsp, *ofsp; +- +- fsp = copy_fs_struct(current->fs); +- if (fsp == NULL) { +- module_put(ep->module); +- return -ENOMEM; +- } +- +- task_lock(current); +- ofsp = current->fs; +- current->fs = fsp; +- task_unlock(current); +- +- put_fs_struct(ofsp); +- } +- +- /* +- * At that point we are guaranteed to be the sole owner of +- * current->fs. +- */ +- + current->personality = personality; + oep = current_thread_info()->exec_domain; + current_thread_info()->exec_domain = ep; diff --git a/queue-2.6.29/new-locking-refcounting-for-fs_struct.patch b/queue-2.6.29/new-locking-refcounting-for-fs_struct.patch new file mode 100644 index 00000000000..37d22f116ec --- /dev/null +++ b/queue-2.6.29/new-locking-refcounting-for-fs_struct.patch @@ -0,0 +1,401 @@ +From 498052bba55ecaff58db6a1436b0e25bfd75a7ff Mon Sep 17 00:00:00 2001 +From: Al Viro +Date: Mon, 30 Mar 2009 07:20:30 -0400 +Subject: New locking/refcounting for fs_struct + +From: Al Viro + +commit 498052bba55ecaff58db6a1436b0e25bfd75a7ff upstream. + +* all changes of current->fs are done under task_lock and write_lock of + old fs->lock +* refcount is not atomic anymore (same protection) +* its decrements are done when removing reference from current; at the + same time we decide whether to free it. +* put_fs_struct() is gone +* new field - ->in_exec. Set by check_unsafe_exec() if we are trying to do + execve() and only subthreads share fs_struct. Cleared when finishing exec + (success and failure alike). Makes CLONE_FS fail with -EAGAIN if set. +* check_unsafe_exec() may fail with -EAGAIN if another execve() from subthread + is in progress. + +Signed-off-by: Al Viro +Signed-off-by: Greg Kroah-Hartman + +--- + fs/compat.c | 16 +++++++++- + fs/exec.c | 31 +++++++++++++++++--- + fs/fs_struct.c | 69 ++++++++++++++++++++++++++++++++-------------- + fs/internal.h | 2 - + fs/proc/task_nommu.c | 2 - + include/linux/fs_struct.h | 8 ++--- + kernel/fork.c | 37 ++++++++++++++++++------ + 7 files changed, 121 insertions(+), 44 deletions(-) + +--- a/fs/compat.c ++++ b/fs/compat.c +@@ -51,6 +51,7 @@ + #include + #include + #include ++#include + + #include + #include +@@ -1412,12 +1413,15 @@ int compat_do_execve(char * filename, + bprm->cred = prepare_exec_creds(); + if (!bprm->cred) + goto out_unlock; +- check_unsafe_exec(bprm); ++ ++ retval = check_unsafe_exec(bprm); ++ if (retval) ++ goto out_unlock; + + file = open_exec(filename); + retval = PTR_ERR(file); + if (IS_ERR(file)) +- goto out_unlock; ++ goto out_unmark; + + sched_exec(); + +@@ -1459,6 +1463,9 @@ int compat_do_execve(char * filename, + goto out; + + /* execve succeeded */ ++ write_lock(¤t->fs->lock); ++ current->fs->in_exec = 0; ++ write_unlock(¤t->fs->lock); + mutex_unlock(¤t->cred_exec_mutex); + acct_update_integrals(current); + free_bprm(bprm); +@@ -1476,6 +1483,11 @@ out_file: + fput(bprm->file); + } + ++out_unmark: ++ write_lock(¤t->fs->lock); ++ current->fs->in_exec = 0; ++ write_unlock(¤t->fs->lock); ++ + out_unlock: + mutex_unlock(¤t->cred_exec_mutex); + +--- a/fs/exec.c ++++ b/fs/exec.c +@@ -1049,16 +1049,18 @@ EXPORT_SYMBOL(install_exec_creds); + * - the caller must hold current->cred_exec_mutex to protect against + * PTRACE_ATTACH + */ +-void check_unsafe_exec(struct linux_binprm *bprm) ++int check_unsafe_exec(struct linux_binprm *bprm) + { + struct task_struct *p = current, *t; + unsigned long flags; + unsigned n_fs, n_sighand; ++ int res = 0; + + bprm->unsafe = tracehook_unsafe_exec(p); + + n_fs = 1; + n_sighand = 1; ++ write_lock(&p->fs->lock); + lock_task_sighand(p, &flags); + for (t = next_thread(p); t != p; t = next_thread(t)) { + if (t->fs == p->fs) +@@ -1066,11 +1068,19 @@ void check_unsafe_exec(struct linux_binp + n_sighand++; + } + +- if (atomic_read(&p->fs->count) > n_fs || +- atomic_read(&p->sighand->count) > n_sighand) ++ if (p->fs->users > n_fs || ++ atomic_read(&p->sighand->count) > n_sighand) { + bprm->unsafe |= LSM_UNSAFE_SHARE; ++ } else { ++ if (p->fs->in_exec) ++ res = -EAGAIN; ++ p->fs->in_exec = 1; ++ } + + unlock_task_sighand(p, &flags); ++ write_unlock(&p->fs->lock); ++ ++ return res; + } + + /* +@@ -1285,12 +1295,15 @@ int do_execve(char * filename, + bprm->cred = prepare_exec_creds(); + if (!bprm->cred) + goto out_unlock; +- check_unsafe_exec(bprm); ++ ++ retval = check_unsafe_exec(bprm); ++ if (retval) ++ goto out_unlock; + + file = open_exec(filename); + retval = PTR_ERR(file); + if (IS_ERR(file)) +- goto out_unlock; ++ goto out_unmark; + + sched_exec(); + +@@ -1333,6 +1346,9 @@ int do_execve(char * filename, + goto out; + + /* execve succeeded */ ++ write_lock(¤t->fs->lock); ++ current->fs->in_exec = 0; ++ write_unlock(¤t->fs->lock); + mutex_unlock(¤t->cred_exec_mutex); + acct_update_integrals(current); + free_bprm(bprm); +@@ -1350,6 +1366,11 @@ out_file: + fput(bprm->file); + } + ++out_unmark: ++ write_lock(¤t->fs->lock); ++ current->fs->in_exec = 0; ++ write_unlock(¤t->fs->lock); ++ + out_unlock: + mutex_unlock(¤t->cred_exec_mutex); + +--- a/fs/fs_struct.c ++++ b/fs/fs_struct.c +@@ -72,25 +72,27 @@ void chroot_fs_refs(struct path *old_roo + path_put(old_root); + } + +-void put_fs_struct(struct fs_struct *fs) ++void free_fs_struct(struct fs_struct *fs) + { +- /* No need to hold fs->lock if we are killing it */ +- if (atomic_dec_and_test(&fs->count)) { +- path_put(&fs->root); +- path_put(&fs->pwd); +- kmem_cache_free(fs_cachep, fs); +- } ++ path_put(&fs->root); ++ path_put(&fs->pwd); ++ kmem_cache_free(fs_cachep, fs); + } + + void exit_fs(struct task_struct *tsk) + { +- struct fs_struct * fs = tsk->fs; ++ struct fs_struct *fs = tsk->fs; + + if (fs) { ++ int kill; + task_lock(tsk); ++ write_lock(&fs->lock); + tsk->fs = NULL; ++ kill = !--fs->users; ++ write_unlock(&fs->lock); + task_unlock(tsk); +- put_fs_struct(fs); ++ if (kill) ++ free_fs_struct(fs); + } + } + +@@ -99,7 +101,8 @@ struct fs_struct *copy_fs_struct(struct + struct fs_struct *fs = kmem_cache_alloc(fs_cachep, GFP_KERNEL); + /* We don't need to lock fs - think why ;-) */ + if (fs) { +- atomic_set(&fs->count, 1); ++ fs->users = 1; ++ fs->in_exec = 0; + rwlock_init(&fs->lock); + fs->umask = old->umask; + read_lock(&old->lock); +@@ -114,28 +117,54 @@ struct fs_struct *copy_fs_struct(struct + + int unshare_fs_struct(void) + { +- struct fs_struct *fsp = copy_fs_struct(current->fs); +- if (!fsp) ++ struct fs_struct *fs = current->fs; ++ struct fs_struct *new_fs = copy_fs_struct(fs); ++ int kill; ++ ++ if (!new_fs) + return -ENOMEM; +- exit_fs(current); +- current->fs = fsp; ++ ++ task_lock(current); ++ write_lock(&fs->lock); ++ kill = !--fs->users; ++ current->fs = new_fs; ++ write_unlock(&fs->lock); ++ task_unlock(current); ++ ++ if (kill) ++ free_fs_struct(fs); ++ + return 0; + } + EXPORT_SYMBOL_GPL(unshare_fs_struct); + + /* to be mentioned only in INIT_TASK */ + struct fs_struct init_fs = { +- .count = ATOMIC_INIT(1), ++ .users = 1, + .lock = __RW_LOCK_UNLOCKED(init_fs.lock), + .umask = 0022, + }; + + void daemonize_fs_struct(void) + { +- struct fs_struct *fs; ++ struct fs_struct *fs = current->fs; + +- exit_fs(current); /* current->fs->count--; */ +- fs = &init_fs; +- current->fs = fs; +- atomic_inc(&fs->count); ++ if (fs) { ++ int kill; ++ ++ task_lock(current); ++ ++ write_lock(&init_fs.lock); ++ init_fs.users++; ++ write_unlock(&init_fs.lock); ++ ++ write_lock(&fs->lock); ++ current->fs = &init_fs; ++ kill = !--fs->users; ++ write_unlock(&fs->lock); ++ ++ task_unlock(current); ++ if (kill) ++ free_fs_struct(fs); ++ } + } +--- a/fs/internal.h ++++ b/fs/internal.h +@@ -44,7 +44,7 @@ extern void __init chrdev_init(void); + /* + * exec.c + */ +-extern void check_unsafe_exec(struct linux_binprm *); ++extern int check_unsafe_exec(struct linux_binprm *); + + /* + * namespace.c +--- a/fs/proc/task_nommu.c ++++ b/fs/proc/task_nommu.c +@@ -49,7 +49,7 @@ void task_mem(struct seq_file *m, struct + else + bytes += kobjsize(mm); + +- if (current->fs && atomic_read(¤t->fs->count) > 1) ++ if (current->fs && current->fs->users > 1) + sbytes += kobjsize(current->fs); + else + bytes += kobjsize(current->fs); +--- a/include/linux/fs_struct.h ++++ b/include/linux/fs_struct.h +@@ -4,12 +4,10 @@ + #include + + struct fs_struct { +- atomic_t count; /* This usage count is used by check_unsafe_exec() for +- * security checking purposes - therefore it may not be +- * incremented, except by clone(CLONE_FS). +- */ ++ int users; + rwlock_t lock; + int umask; ++ int in_exec; + struct path root, pwd; + }; + +@@ -19,7 +17,7 @@ extern void exit_fs(struct task_struct * + extern void set_fs_root(struct fs_struct *, struct path *); + extern void set_fs_pwd(struct fs_struct *, struct path *); + extern struct fs_struct *copy_fs_struct(struct fs_struct *); +-extern void put_fs_struct(struct fs_struct *); ++extern void free_fs_struct(struct fs_struct *); + extern void daemonize_fs_struct(void); + extern int unshare_fs_struct(void); + +--- a/kernel/fork.c ++++ b/kernel/fork.c +@@ -678,11 +678,19 @@ fail_nomem: + + static int copy_fs(unsigned long clone_flags, struct task_struct *tsk) + { ++ struct fs_struct *fs = current->fs; + if (clone_flags & CLONE_FS) { +- atomic_inc(¤t->fs->count); ++ /* tsk->fs is already what we want */ ++ write_lock(&fs->lock); ++ if (fs->in_exec) { ++ write_unlock(&fs->lock); ++ return -EAGAIN; ++ } ++ fs->users++; ++ write_unlock(&fs->lock); + return 0; + } +- tsk->fs = copy_fs_struct(current->fs); ++ tsk->fs = copy_fs_struct(fs); + if (!tsk->fs) + return -ENOMEM; + return 0; +@@ -1518,12 +1526,16 @@ static int unshare_fs(unsigned long unsh + { + struct fs_struct *fs = current->fs; + +- if ((unshare_flags & CLONE_FS) && +- (fs && atomic_read(&fs->count) > 1)) { +- *new_fsp = copy_fs_struct(current->fs); +- if (!*new_fsp) +- return -ENOMEM; +- } ++ if (!(unshare_flags & CLONE_FS) || !fs) ++ return 0; ++ ++ /* don't need lock here; in the worst case we'll do useless copy */ ++ if (fs->users == 1) ++ return 0; ++ ++ *new_fsp = copy_fs_struct(fs); ++ if (!*new_fsp) ++ return -ENOMEM; + + return 0; + } +@@ -1639,8 +1651,13 @@ SYSCALL_DEFINE1(unshare, unsigned long, + + if (new_fs) { + fs = current->fs; ++ write_lock(&fs->lock); + current->fs = new_fs; +- new_fs = fs; ++ if (--fs->users) ++ new_fs = NULL; ++ else ++ new_fs = fs; ++ write_unlock(&fs->lock); + } + + if (new_mm) { +@@ -1679,7 +1696,7 @@ bad_unshare_cleanup_sigh: + + bad_unshare_cleanup_fs: + if (new_fs) +- put_fs_struct(new_fs); ++ free_fs_struct(new_fs); + + bad_unshare_cleanup_thread: + bad_unshare_out: diff --git a/queue-2.6.29/series b/queue-2.6.29/series index d67c88646cf..e8fcd40754a 100644 --- a/queue-2.6.29/series +++ b/queue-2.6.29/series @@ -31,3 +31,14 @@ ptrace-ptrace_attach-fix-the-usage-of-cred_exec_mutex.patch usb-serial-fix-lifetime-and-locking-problems.patch acpi-revert-conflicting-workaround-for-bios-w-mangled-prt-entries.patch powerpc-sanitize-stack-pointer-in-signal-handling-code.patch +compat_do_execve-should-unshare_files.patch +fix-setuid-sometimes-doesn-t.patch +fix-setuid-sometimes-wouldn-t.patch +annotate-struct-fs_struct-s-usage-count-restriction.patch +kill-unsharing-fs_struct-in-__set_personality.patch +get-rid-of-bumping-fs_struct-refcount-in-pivot_root.patch +take-fs_struct-handling-to-new-file.patch +new-locking-refcounting-for-fs_struct.patch +check_unsafe_exec-doesn-t-care-about-signal-handlers-sharing.patch +do_execve-must-not-clear-fs-in_exec-if-it-was-set-by-another-thread.patch +check_unsafe_exec-s-lock_task_sighand-rcu_read_lock.patch diff --git a/queue-2.6.29/take-fs_struct-handling-to-new-file.patch b/queue-2.6.29/take-fs_struct-handling-to-new-file.patch new file mode 100644 index 00000000000..98efafabfd2 --- /dev/null +++ b/queue-2.6.29/take-fs_struct-handling-to-new-file.patch @@ -0,0 +1,435 @@ +From 3e93cd671813e204c258f1e6c797959920cf7772 Mon Sep 17 00:00:00 2001 +From: Al Viro +Date: Sun, 29 Mar 2009 19:00:13 -0400 +Subject: Take fs_struct handling to new file (fs/fs_struct.c) + +From: Al Viro + +commit 3e93cd671813e204c258f1e6c797959920cf7772 upstream. + +Pure code move; two new helper functions for nfsd and daemonize +(unshare_fs_struct() and daemonize_fs_struct() resp.; for now - +the same code as used to be in callers). unshare_fs_struct() +exported (for nfsd, as copy_fs_struct()/exit_fs() used to be), +copy_fs_struct() and exit_fs() don't need exports anymore. + +Signed-off-by: Al Viro +Signed-off-by: Greg Kroah-Hartman + +--- + fs/Makefile | 2 + fs/fs_struct.c | 141 ++++++++++++++++++++++++++++++++++++++++++++++ + fs/internal.h | 6 + + fs/namei.c | 7 -- + fs/namespace.c | 68 ---------------------- + fs/nfsd/nfssvc.c | 7 -- + include/linux/fs_struct.h | 2 + kernel/exit.c | 31 ---------- + kernel/fork.c | 29 --------- + 9 files changed, 155 insertions(+), 138 deletions(-) + +--- /dev/null ++++ b/fs/fs_struct.c +@@ -0,0 +1,141 @@ ++#include ++#include ++#include ++#include ++#include ++ ++/* ++ * Replace the fs->{rootmnt,root} with {mnt,dentry}. Put the old values. ++ * It can block. ++ */ ++void set_fs_root(struct fs_struct *fs, struct path *path) ++{ ++ struct path old_root; ++ ++ write_lock(&fs->lock); ++ old_root = fs->root; ++ fs->root = *path; ++ path_get(path); ++ write_unlock(&fs->lock); ++ if (old_root.dentry) ++ path_put(&old_root); ++} ++ ++/* ++ * Replace the fs->{pwdmnt,pwd} with {mnt,dentry}. Put the old values. ++ * It can block. ++ */ ++void set_fs_pwd(struct fs_struct *fs, struct path *path) ++{ ++ struct path old_pwd; ++ ++ write_lock(&fs->lock); ++ old_pwd = fs->pwd; ++ fs->pwd = *path; ++ path_get(path); ++ write_unlock(&fs->lock); ++ ++ if (old_pwd.dentry) ++ path_put(&old_pwd); ++} ++ ++void chroot_fs_refs(struct path *old_root, struct path *new_root) ++{ ++ struct task_struct *g, *p; ++ struct fs_struct *fs; ++ int count = 0; ++ ++ read_lock(&tasklist_lock); ++ do_each_thread(g, p) { ++ task_lock(p); ++ fs = p->fs; ++ if (fs) { ++ write_lock(&fs->lock); ++ if (fs->root.dentry == old_root->dentry ++ && fs->root.mnt == old_root->mnt) { ++ path_get(new_root); ++ fs->root = *new_root; ++ count++; ++ } ++ if (fs->pwd.dentry == old_root->dentry ++ && fs->pwd.mnt == old_root->mnt) { ++ path_get(new_root); ++ fs->pwd = *new_root; ++ count++; ++ } ++ write_unlock(&fs->lock); ++ } ++ task_unlock(p); ++ } while_each_thread(g, p); ++ read_unlock(&tasklist_lock); ++ while (count--) ++ path_put(old_root); ++} ++ ++void put_fs_struct(struct fs_struct *fs) ++{ ++ /* No need to hold fs->lock if we are killing it */ ++ if (atomic_dec_and_test(&fs->count)) { ++ path_put(&fs->root); ++ path_put(&fs->pwd); ++ kmem_cache_free(fs_cachep, fs); ++ } ++} ++ ++void exit_fs(struct task_struct *tsk) ++{ ++ struct fs_struct * fs = tsk->fs; ++ ++ if (fs) { ++ task_lock(tsk); ++ tsk->fs = NULL; ++ task_unlock(tsk); ++ put_fs_struct(fs); ++ } ++} ++ ++struct fs_struct *copy_fs_struct(struct fs_struct *old) ++{ ++ struct fs_struct *fs = kmem_cache_alloc(fs_cachep, GFP_KERNEL); ++ /* We don't need to lock fs - think why ;-) */ ++ if (fs) { ++ atomic_set(&fs->count, 1); ++ rwlock_init(&fs->lock); ++ fs->umask = old->umask; ++ read_lock(&old->lock); ++ fs->root = old->root; ++ path_get(&old->root); ++ fs->pwd = old->pwd; ++ path_get(&old->pwd); ++ read_unlock(&old->lock); ++ } ++ return fs; ++} ++ ++int unshare_fs_struct(void) ++{ ++ struct fs_struct *fsp = copy_fs_struct(current->fs); ++ if (!fsp) ++ return -ENOMEM; ++ exit_fs(current); ++ current->fs = fsp; ++ return 0; ++} ++EXPORT_SYMBOL_GPL(unshare_fs_struct); ++ ++/* to be mentioned only in INIT_TASK */ ++struct fs_struct init_fs = { ++ .count = ATOMIC_INIT(1), ++ .lock = __RW_LOCK_UNLOCKED(init_fs.lock), ++ .umask = 0022, ++}; ++ ++void daemonize_fs_struct(void) ++{ ++ struct fs_struct *fs; ++ ++ exit_fs(current); /* current->fs->count--; */ ++ fs = &init_fs; ++ current->fs = fs; ++ atomic_inc(&fs->count); ++} +--- a/fs/internal.h ++++ b/fs/internal.h +@@ -11,6 +11,7 @@ + + struct super_block; + struct linux_binprm; ++struct path; + + /* + * block_dev.c +@@ -60,3 +61,8 @@ extern void umount_tree(struct vfsmount + extern struct vfsmount *copy_tree(struct vfsmount *, struct dentry *, int); + + extern void __init mnt_init(void); ++ ++/* ++ * fs_struct.c ++ */ ++extern void chroot_fs_refs(struct path *, struct path *); +--- a/fs/Makefile ++++ b/fs/Makefile +@@ -11,7 +11,7 @@ obj-y := open.o read_write.o file_table. + attr.o bad_inode.o file.o filesystems.o namespace.o \ + seq_file.o xattr.o libfs.o fs-writeback.o \ + pnode.o drop_caches.o splice.o sync.o utimes.o \ +- stack.o ++ stack.o fs_struct.o + + ifeq ($(CONFIG_BLOCK),y) + obj-y += buffer.o bio.o block_dev.o direct-io.o mpage.o ioprio.o +--- a/fs/namei.c ++++ b/fs/namei.c +@@ -2891,10 +2891,3 @@ EXPORT_SYMBOL(vfs_symlink); + EXPORT_SYMBOL(vfs_unlink); + EXPORT_SYMBOL(dentry_unhash); + EXPORT_SYMBOL(generic_readlink); +- +-/* to be mentioned only in INIT_TASK */ +-struct fs_struct init_fs = { +- .count = ATOMIC_INIT(1), +- .lock = __RW_LOCK_UNLOCKED(init_fs.lock), +- .umask = 0022, +-}; +--- a/fs/namespace.c ++++ b/fs/namespace.c +@@ -2089,74 +2089,6 @@ out1: + } + + /* +- * Replace the fs->{rootmnt,root} with {mnt,dentry}. Put the old values. +- * It can block. Requires the big lock held. +- */ +-void set_fs_root(struct fs_struct *fs, struct path *path) +-{ +- struct path old_root; +- +- write_lock(&fs->lock); +- old_root = fs->root; +- fs->root = *path; +- path_get(path); +- write_unlock(&fs->lock); +- if (old_root.dentry) +- path_put(&old_root); +-} +- +-/* +- * Replace the fs->{pwdmnt,pwd} with {mnt,dentry}. Put the old values. +- * It can block. Requires the big lock held. +- */ +-void set_fs_pwd(struct fs_struct *fs, struct path *path) +-{ +- struct path old_pwd; +- +- write_lock(&fs->lock); +- old_pwd = fs->pwd; +- fs->pwd = *path; +- path_get(path); +- write_unlock(&fs->lock); +- +- if (old_pwd.dentry) +- path_put(&old_pwd); +-} +- +-static void chroot_fs_refs(struct path *old_root, struct path *new_root) +-{ +- struct task_struct *g, *p; +- struct fs_struct *fs; +- int count = 0; +- +- read_lock(&tasklist_lock); +- do_each_thread(g, p) { +- task_lock(p); +- fs = p->fs; +- if (fs) { +- write_lock(&fs->lock); +- if (fs->root.dentry == old_root->dentry +- && fs->root.mnt == old_root->mnt) { +- path_get(new_root); +- fs->root = *new_root; +- count++; +- } +- if (fs->pwd.dentry == old_root->dentry +- && fs->pwd.mnt == old_root->mnt) { +- path_get(new_root); +- fs->pwd = *new_root; +- count++; +- } +- write_unlock(&fs->lock); +- } +- task_unlock(p); +- } while_each_thread(g, p); +- read_unlock(&tasklist_lock); +- while (count--) +- path_put(old_root); +-} +- +-/* + * pivot_root Semantics: + * Moves the root file system of the current process to the directory put_old, + * makes new_root as the new root file system of the current process, and sets +--- a/fs/nfsd/nfssvc.c ++++ b/fs/nfsd/nfssvc.c +@@ -404,7 +404,6 @@ static int + nfsd(void *vrqstp) + { + struct svc_rqst *rqstp = (struct svc_rqst *) vrqstp; +- struct fs_struct *fsp; + int err, preverr = 0; + + /* Lock module and set up kernel thread */ +@@ -413,13 +412,11 @@ nfsd(void *vrqstp) + /* At this point, the thread shares current->fs + * with the init process. We need to create files with a + * umask of 0 instead of init's umask. */ +- fsp = copy_fs_struct(current->fs); +- if (!fsp) { ++ if (unshare_fs_struct() < 0) { + printk("Unable to start nfsd thread: out of memory\n"); + goto out; + } +- exit_fs(current); +- current->fs = fsp; ++ + current->fs->umask = 0; + + /* +--- a/include/linux/fs_struct.h ++++ b/include/linux/fs_struct.h +@@ -20,5 +20,7 @@ extern void set_fs_root(struct fs_struct + extern void set_fs_pwd(struct fs_struct *, struct path *); + extern struct fs_struct *copy_fs_struct(struct fs_struct *); + extern void put_fs_struct(struct fs_struct *); ++extern void daemonize_fs_struct(void); ++extern int unshare_fs_struct(void); + + #endif /* _LINUX_FS_STRUCT_H */ +--- a/kernel/exit.c ++++ b/kernel/exit.c +@@ -429,7 +429,6 @@ EXPORT_SYMBOL(disallow_signal); + void daemonize(const char *name, ...) + { + va_list args; +- struct fs_struct *fs; + sigset_t blocked; + + va_start(args, name); +@@ -462,11 +461,7 @@ void daemonize(const char *name, ...) + + /* Become as one with the init task */ + +- exit_fs(current); /* current->fs->count--; */ +- fs = init_task.fs; +- current->fs = fs; +- atomic_inc(&fs->count); +- ++ daemonize_fs_struct(); + exit_files(current); + current->files = init_task.files; + atomic_inc(¤t->files->count); +@@ -565,30 +560,6 @@ void exit_files(struct task_struct *tsk) + } + } + +-void put_fs_struct(struct fs_struct *fs) +-{ +- /* No need to hold fs->lock if we are killing it */ +- if (atomic_dec_and_test(&fs->count)) { +- path_put(&fs->root); +- path_put(&fs->pwd); +- kmem_cache_free(fs_cachep, fs); +- } +-} +- +-void exit_fs(struct task_struct *tsk) +-{ +- struct fs_struct * fs = tsk->fs; +- +- if (fs) { +- task_lock(tsk); +- tsk->fs = NULL; +- task_unlock(tsk); +- put_fs_struct(fs); +- } +-} +- +-EXPORT_SYMBOL_GPL(exit_fs); +- + #ifdef CONFIG_MM_OWNER + /* + * Task p is exiting and it owned mm, lets find a new owner for it +--- a/kernel/fork.c ++++ b/kernel/fork.c +@@ -676,38 +676,13 @@ fail_nomem: + return retval; + } + +-static struct fs_struct *__copy_fs_struct(struct fs_struct *old) +-{ +- struct fs_struct *fs = kmem_cache_alloc(fs_cachep, GFP_KERNEL); +- /* We don't need to lock fs - think why ;-) */ +- if (fs) { +- atomic_set(&fs->count, 1); +- rwlock_init(&fs->lock); +- fs->umask = old->umask; +- read_lock(&old->lock); +- fs->root = old->root; +- path_get(&old->root); +- fs->pwd = old->pwd; +- path_get(&old->pwd); +- read_unlock(&old->lock); +- } +- return fs; +-} +- +-struct fs_struct *copy_fs_struct(struct fs_struct *old) +-{ +- return __copy_fs_struct(old); +-} +- +-EXPORT_SYMBOL_GPL(copy_fs_struct); +- + static int copy_fs(unsigned long clone_flags, struct task_struct *tsk) + { + if (clone_flags & CLONE_FS) { + atomic_inc(¤t->fs->count); + return 0; + } +- tsk->fs = __copy_fs_struct(current->fs); ++ tsk->fs = copy_fs_struct(current->fs); + if (!tsk->fs) + return -ENOMEM; + return 0; +@@ -1545,7 +1520,7 @@ static int unshare_fs(unsigned long unsh + + if ((unshare_flags & CLONE_FS) && + (fs && atomic_read(&fs->count) > 1)) { +- *new_fsp = __copy_fs_struct(current->fs); ++ *new_fsp = copy_fs_struct(current->fs); + if (!*new_fsp) + return -ENOMEM; + }