--- /dev/null
+From 795e2fe0a3b69dbc040d7efcf517e0cbad6901d0 Mon Sep 17 00:00:00 2001
+From: David Howells <dhowells@redhat.com>
+Date: Sat, 28 Mar 2009 23:23:01 +0000
+Subject: Annotate struct fs_struct's usage count restriction
+
+From: David Howells <dhowells@redhat.com>
+
+commit 795e2fe0a3b69dbc040d7efcf517e0cbad6901d0 upstream.
+
+Annotate struct fs_struct's usage count to indicate the restrictions upon it.
+It may not be incremented, except by clone(CLONE_FS), as this affects the
+check in check_unsafe_exec() in fs/exec.c.
+
+Signed-off-by: David Howells <dhowells@redhat.com>
+Signed-off-by: Hugh Dickins <hugh@veritas.com>
+Cc: stable@kernel.org
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ include/linux/fs_struct.h | 5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+--- a/include/linux/fs_struct.h
++++ b/include/linux/fs_struct.h
+@@ -4,7 +4,10 @@
+ #include <linux/path.h>
+
+ struct fs_struct {
+- atomic_t count;
++ atomic_t count; /* This usage count is used by check_unsafe_exec() for
++ * security checking purposes - therefore it may not be
++ * incremented, except by clone(CLONE_FS).
++ */
+ rwlock_t lock;
+ int umask;
+ struct path root, pwd;
--- /dev/null
+From f1191b50ec11c8e2ca766d6d99eb5bb9d2c084a3 Mon Sep 17 00:00:00 2001
+From: Al Viro <viro@zeniv.linux.org.uk>
+Date: Mon, 30 Mar 2009 07:35:18 -0400
+Subject: check_unsafe_exec() doesn't care about signal handlers sharing
+
+From: Al Viro <viro@zeniv.linux.org.uk>
+
+commit f1191b50ec11c8e2ca766d6d99eb5bb9d2c084a3 upstream.
+
+... since we'll unshare sighand anyway
+
+Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/exec.c | 7 ++-----
+ 1 file changed, 2 insertions(+), 5 deletions(-)
+
+--- a/fs/exec.c
++++ b/fs/exec.c
+@@ -1053,23 +1053,20 @@ int check_unsafe_exec(struct linux_binpr
+ {
+ struct task_struct *p = current, *t;
+ unsigned long flags;
+- unsigned n_fs, n_sighand;
++ unsigned n_fs;
+ int res = 0;
+
+ bprm->unsafe = tracehook_unsafe_exec(p);
+
+ n_fs = 1;
+- n_sighand = 1;
+ write_lock(&p->fs->lock);
+ lock_task_sighand(p, &flags);
+ for (t = next_thread(p); t != p; t = next_thread(t)) {
+ if (t->fs == p->fs)
+ n_fs++;
+- n_sighand++;
+ }
+
+- if (p->fs->users > n_fs ||
+- atomic_read(&p->sighand->count) > n_sighand) {
++ if (p->fs->users > n_fs) {
+ bprm->unsafe |= LSM_UNSAFE_SHARE;
+ } else {
+ if (p->fs->in_exec)
--- /dev/null
+From 437f7fdb607f32b737e4da9f14bebcfdac2c90c3 Mon Sep 17 00:00:00 2001
+From: Oleg Nesterov <oleg@redhat.com>
+Date: Fri, 24 Apr 2009 01:02:45 +0200
+Subject: check_unsafe_exec: s/lock_task_sighand/rcu_read_lock/
+
+From: Oleg Nesterov <oleg@redhat.com>
+
+commit 437f7fdb607f32b737e4da9f14bebcfdac2c90c3 upstream.
+
+write_lock(¤t->fs->lock) guarantees we can't wrongly miss
+LSM_UNSAFE_SHARE, this is what we care about. Use rcu_read_lock()
+instead of ->siglock to iterate over the sub-threads. We must see
+all CLONE_THREAD|CLONE_FS threads which didn't pass exit_fs(), it
+takes fs->lock too.
+
+With or without this patch we can miss the freshly cloned thread
+and set LSM_UNSAFE_SHARE, we don't care.
+
+Signed-off-by: Oleg Nesterov <oleg@redhat.com>
+Acked-by: Roland McGrath <roland@redhat.com>
+[ Fixed lock/unlock typo - Hugh ]
+Acked-by: Hugh Dickins <hugh@veritas.com>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/exec.c | 6 ++----
+ 1 file changed, 2 insertions(+), 4 deletions(-)
+
+--- a/fs/exec.c
++++ b/fs/exec.c
+@@ -1052,7 +1052,6 @@ EXPORT_SYMBOL(install_exec_creds);
+ int check_unsafe_exec(struct linux_binprm *bprm)
+ {
+ struct task_struct *p = current, *t;
+- unsigned long flags;
+ unsigned n_fs;
+ int res = 0;
+
+@@ -1060,11 +1059,12 @@ int check_unsafe_exec(struct linux_binpr
+
+ n_fs = 1;
+ write_lock(&p->fs->lock);
+- lock_task_sighand(p, &flags);
++ rcu_read_lock();
+ for (t = next_thread(p); t != p; t = next_thread(t)) {
+ if (t->fs == p->fs)
+ n_fs++;
+ }
++ rcu_read_unlock();
+
+ if (p->fs->users > n_fs) {
+ bprm->unsafe |= LSM_UNSAFE_SHARE;
+@@ -1075,8 +1075,6 @@ int check_unsafe_exec(struct linux_binpr
+ res = 1;
+ }
+ }
+-
+- unlock_task_sighand(p, &flags);
+ write_unlock(&p->fs->lock);
+
+ return res;
--- /dev/null
+From 53e9309e01277ec99c38e84e0ca16921287cf470 Mon Sep 17 00:00:00 2001
+From: Hugh Dickins <hugh@veritas.com>
+Date: Sat, 28 Mar 2009 23:16:03 +0000
+Subject: compat_do_execve should unshare_files
+
+From: Hugh Dickins <hugh@veritas.com>
+
+commit 53e9309e01277ec99c38e84e0ca16921287cf470 upstream.
+
+2.6.26's commit fd8328be874f4190a811c58cd4778ec2c74d2c05
+"sanitize handling of shared descriptor tables in failing execve()"
+moved the unshare_files() from flush_old_exec() and several binfmts
+to the head of do_execve(); but forgot to make the same change to
+compat_do_execve(), leaving a CLONE_FILES files_struct shared across
+exec from a 32-bit process on a 64-bit kernel.
+
+It's arguable whether the files_struct really ought to be unshared
+across exec; but 2.6.1 made that so to stop the loading binary's fd
+leaking into other threads, and a 32-bit process on a 64-bit kernel
+ought to behave in the same way as 32 on 32 and 64 on 64.
+
+Signed-off-by: Hugh Dickins <hugh@veritas.com>
+Cc: stable@kernel.org
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/compat.c | 12 +++++++++++-
+ 1 file changed, 11 insertions(+), 1 deletion(-)
+
+--- a/fs/compat.c
++++ b/fs/compat.c
+@@ -1392,12 +1392,17 @@ int compat_do_execve(char * filename,
+ {
+ struct linux_binprm *bprm;
+ struct file *file;
++ struct files_struct *displaced;
+ int retval;
+
++ retval = unshare_files(&displaced);
++ if (retval)
++ goto out_ret;
++
+ retval = -ENOMEM;
+ bprm = kzalloc(sizeof(*bprm), GFP_KERNEL);
+ if (!bprm)
+- goto out_ret;
++ goto out_files;
+
+ retval = mutex_lock_interruptible(¤t->cred_exec_mutex);
+ if (retval < 0)
+@@ -1457,6 +1462,8 @@ int compat_do_execve(char * filename,
+ mutex_unlock(¤t->cred_exec_mutex);
+ acct_update_integrals(current);
+ free_bprm(bprm);
++ if (displaced)
++ put_files_struct(displaced);
+ return retval;
+
+ out:
+@@ -1475,6 +1482,9 @@ out_unlock:
+ out_free:
+ free_bprm(bprm);
+
++out_files:
++ if (displaced)
++ reset_files_struct(displaced);
+ out_ret:
+ return retval;
+ }
--- /dev/null
+From 8c652f96d3852b97a49c331cd0bb02d22f3cb31b Mon Sep 17 00:00:00 2001
+From: Oleg Nesterov <oleg@redhat.com>
+Date: Fri, 24 Apr 2009 01:01:56 +0200
+Subject: do_execve() must not clear fs->in_exec if it was set by another thread
+
+From: Oleg Nesterov <oleg@redhat.com>
+
+commit 8c652f96d3852b97a49c331cd0bb02d22f3cb31b upstream.
+
+If do_execve() fails after check_unsafe_exec(), it clears fs->in_exec
+unconditionally. This is wrong if we race with our sub-thread which
+also does do_execve:
+
+ Two threads T1 and T2 and another process P, all share the same
+ ->fs.
+
+ T1 starts do_execve(BAD_FILE). It calls check_unsafe_exec(), since
+ ->fs is shared, we set LSM_UNSAFE but not ->in_exec.
+
+ P exits and decrements fs->users.
+
+ T2 starts do_execve(), calls check_unsafe_exec(), now ->fs is not
+ shared, we set fs->in_exec.
+
+ T1 continues, open_exec(BAD_FILE) fails, we clear ->in_exec and
+ return to the user-space.
+
+ T1 does clone(CLONE_FS /* without CLONE_THREAD */).
+
+ T2 continues without LSM_UNSAFE_SHARE while ->fs is shared with
+ another process.
+
+Change check_unsafe_exec() to return res = 1 if we set ->in_exec, and change
+do_execve() to clear ->in_exec depending on res.
+
+When do_execve() suceeds, it is safe to clear ->in_exec unconditionally.
+It can be set only if we don't share ->fs with another process, and since
+we already killed all sub-threads either ->in_exec == 0 or we are the
+only user of this ->fs.
+
+Also, we do not need fs->lock to clear fs->in_exec.
+
+Signed-off-by: Oleg Nesterov <oleg@redhat.com>
+Acked-by: Roland McGrath <roland@redhat.com>
+Acked-by: Hugh Dickins <hugh@veritas.com>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/compat.c | 11 +++++------
+ fs/exec.c | 19 ++++++++++---------
+ 2 files changed, 15 insertions(+), 15 deletions(-)
+
+--- a/fs/compat.c
++++ b/fs/compat.c
+@@ -1394,6 +1394,7 @@ int compat_do_execve(char * filename,
+ struct linux_binprm *bprm;
+ struct file *file;
+ struct files_struct *displaced;
++ bool clear_in_exec;
+ int retval;
+
+ retval = unshare_files(&displaced);
+@@ -1415,8 +1416,9 @@ int compat_do_execve(char * filename,
+ goto out_unlock;
+
+ retval = check_unsafe_exec(bprm);
+- if (retval)
++ if (retval < 0)
+ goto out_unlock;
++ clear_in_exec = retval;
+
+ file = open_exec(filename);
+ retval = PTR_ERR(file);
+@@ -1463,9 +1465,7 @@ int compat_do_execve(char * filename,
+ goto out;
+
+ /* execve succeeded */
+- write_lock(¤t->fs->lock);
+ current->fs->in_exec = 0;
+- write_unlock(¤t->fs->lock);
+ mutex_unlock(¤t->cred_exec_mutex);
+ acct_update_integrals(current);
+ free_bprm(bprm);
+@@ -1484,9 +1484,8 @@ out_file:
+ }
+
+ out_unmark:
+- write_lock(¤t->fs->lock);
+- current->fs->in_exec = 0;
+- write_unlock(¤t->fs->lock);
++ if (clear_in_exec)
++ current->fs->in_exec = 0;
+
+ out_unlock:
+ mutex_unlock(¤t->cred_exec_mutex);
+--- a/fs/exec.c
++++ b/fs/exec.c
+@@ -1069,9 +1069,11 @@ int check_unsafe_exec(struct linux_binpr
+ if (p->fs->users > n_fs) {
+ bprm->unsafe |= LSM_UNSAFE_SHARE;
+ } else {
+- if (p->fs->in_exec)
+- res = -EAGAIN;
+- p->fs->in_exec = 1;
++ res = -EAGAIN;
++ if (!p->fs->in_exec) {
++ p->fs->in_exec = 1;
++ res = 1;
++ }
+ }
+
+ unlock_task_sighand(p, &flags);
+@@ -1273,6 +1275,7 @@ int do_execve(char * filename,
+ struct linux_binprm *bprm;
+ struct file *file;
+ struct files_struct *displaced;
++ bool clear_in_exec;
+ int retval;
+
+ retval = unshare_files(&displaced);
+@@ -1294,8 +1297,9 @@ int do_execve(char * filename,
+ goto out_unlock;
+
+ retval = check_unsafe_exec(bprm);
+- if (retval)
++ if (retval < 0)
+ goto out_unlock;
++ clear_in_exec = retval;
+
+ file = open_exec(filename);
+ retval = PTR_ERR(file);
+@@ -1343,9 +1347,7 @@ int do_execve(char * filename,
+ goto out;
+
+ /* execve succeeded */
+- write_lock(¤t->fs->lock);
+ current->fs->in_exec = 0;
+- write_unlock(¤t->fs->lock);
+ mutex_unlock(¤t->cred_exec_mutex);
+ acct_update_integrals(current);
+ free_bprm(bprm);
+@@ -1364,9 +1366,8 @@ out_file:
+ }
+
+ out_unmark:
+- write_lock(¤t->fs->lock);
+- current->fs->in_exec = 0;
+- write_unlock(¤t->fs->lock);
++ if (clear_in_exec)
++ current->fs->in_exec = 0;
+
+ out_unlock:
+ mutex_unlock(¤t->cred_exec_mutex);
--- /dev/null
+From e426b64c412aaa3e9eb3e4b261dc5be0d5a83e78 Mon Sep 17 00:00:00 2001
+From: Hugh Dickins <hugh@veritas.com>
+Date: Sat, 28 Mar 2009 23:20:19 +0000
+Subject: fix setuid sometimes doesn't
+
+From: Hugh Dickins <hugh@veritas.com>
+
+commit e426b64c412aaa3e9eb3e4b261dc5be0d5a83e78 upstream.
+
+Joe Malicki reports that setuid sometimes doesn't: very rarely,
+a setuid root program does not get root euid; and, by the way,
+they have a health check running lsof every few minutes.
+
+Right, check_unsafe_exec() notes whether the files_struct is being
+shared by more threads than will get killed by the exec, and if so
+sets LSM_UNSAFE_SHARE to make bprm_set_creds() careful about euid.
+But /proc/<pid>/fd and /proc/<pid>/fdinfo lookups make transient
+use of get_files_struct(), which also raises that sharing count.
+
+There's a rather simple fix for this: exec's check on files->count
+has been redundant ever since 2.6.1 made it unshare_files() (except
+while compat_do_execve() omitted to do so) - just remove that check.
+
+[Note to -stable: this patch will not apply before 2.6.29: earlier
+releases should just remove the files->count line from unsafe_exec().]
+
+Reported-by: Joe Malicki <jmalicki@metacarta.com>
+Narrowed-down-by: Michael Itz <mitz@metacarta.com>
+Tested-by: Joe Malicki <jmalicki@metacarta.com>
+Signed-off-by: Hugh Dickins <hugh@veritas.com>
+Cc: stable@kernel.org
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/compat.c | 2 +-
+ fs/exec.c | 10 +++-------
+ fs/internal.h | 2 +-
+ 3 files changed, 5 insertions(+), 9 deletions(-)
+
+--- a/fs/compat.c
++++ b/fs/compat.c
+@@ -1412,7 +1412,7 @@ int compat_do_execve(char * filename,
+ bprm->cred = prepare_exec_creds();
+ if (!bprm->cred)
+ goto out_unlock;
+- check_unsafe_exec(bprm, current->files);
++ check_unsafe_exec(bprm);
+
+ file = open_exec(filename);
+ retval = PTR_ERR(file);
+--- a/fs/exec.c
++++ b/fs/exec.c
+@@ -1049,28 +1049,24 @@ EXPORT_SYMBOL(install_exec_creds);
+ * - the caller must hold current->cred_exec_mutex to protect against
+ * PTRACE_ATTACH
+ */
+-void check_unsafe_exec(struct linux_binprm *bprm, struct files_struct *files)
++void check_unsafe_exec(struct linux_binprm *bprm)
+ {
+ struct task_struct *p = current, *t;
+ unsigned long flags;
+- unsigned n_fs, n_files, n_sighand;
++ unsigned n_fs, n_sighand;
+
+ bprm->unsafe = tracehook_unsafe_exec(p);
+
+ n_fs = 1;
+- n_files = 1;
+ n_sighand = 1;
+ lock_task_sighand(p, &flags);
+ for (t = next_thread(p); t != p; t = next_thread(t)) {
+ if (t->fs == p->fs)
+ n_fs++;
+- if (t->files == files)
+- n_files++;
+ n_sighand++;
+ }
+
+ if (atomic_read(&p->fs->count) > n_fs ||
+- atomic_read(&p->files->count) > n_files ||
+ atomic_read(&p->sighand->count) > n_sighand)
+ bprm->unsafe |= LSM_UNSAFE_SHARE;
+
+@@ -1289,7 +1285,7 @@ int do_execve(char * filename,
+ bprm->cred = prepare_exec_creds();
+ if (!bprm->cred)
+ goto out_unlock;
+- check_unsafe_exec(bprm, displaced);
++ check_unsafe_exec(bprm);
+
+ file = open_exec(filename);
+ retval = PTR_ERR(file);
+--- a/fs/internal.h
++++ b/fs/internal.h
+@@ -43,7 +43,7 @@ extern void __init chrdev_init(void);
+ /*
+ * exec.c
+ */
+-extern void check_unsafe_exec(struct linux_binprm *, struct files_struct *);
++extern void check_unsafe_exec(struct linux_binprm *);
+
+ /*
+ * namespace.c
--- /dev/null
+From 7c2c7d993044cddc5010f6f429b100c63bc7dffb Mon Sep 17 00:00:00 2001
+From: Hugh Dickins <hugh@veritas.com>
+Date: Sat, 28 Mar 2009 23:21:27 +0000
+Subject: fix setuid sometimes wouldn't
+
+From: Hugh Dickins <hugh@veritas.com>
+
+commit 7c2c7d993044cddc5010f6f429b100c63bc7dffb upstream.
+
+check_unsafe_exec() also notes whether the fs_struct is being
+shared by more threads than will get killed by the exec, and if so
+sets LSM_UNSAFE_SHARE to make bprm_set_creds() careful about euid.
+But /proc/<pid>/cwd and /proc/<pid>/root lookups make transient
+use of get_fs_struct(), which also raises that sharing count.
+
+This might occasionally cause a setuid program not to change euid,
+in the same way as happened with files->count (check_unsafe_exec
+also looks at sighand->count, but /proc doesn't raise that one).
+
+We'd prefer exec not to unshare fs_struct: so fix this in procfs,
+replacing get_fs_struct() by get_fs_path(), which does path_get
+while still holding task_lock, instead of raising fs->count.
+
+Signed-off-by: Hugh Dickins <hugh@veritas.com>
+Cc: stable@kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/proc/base.c | 50 ++++++++++++++++----------------------------------
+ 1 file changed, 16 insertions(+), 34 deletions(-)
+
+--- a/fs/proc/base.c
++++ b/fs/proc/base.c
+@@ -146,15 +146,22 @@ static unsigned int pid_entry_count_dirs
+ return count;
+ }
+
+-static struct fs_struct *get_fs_struct(struct task_struct *task)
++static int get_fs_path(struct task_struct *task, struct path *path, bool root)
+ {
+ struct fs_struct *fs;
++ int result = -ENOENT;
++
+ task_lock(task);
+ fs = task->fs;
+- if(fs)
+- atomic_inc(&fs->count);
++ if (fs) {
++ read_lock(&fs->lock);
++ *path = root ? fs->root : fs->pwd;
++ path_get(path);
++ read_unlock(&fs->lock);
++ result = 0;
++ }
+ task_unlock(task);
+- return fs;
++ return result;
+ }
+
+ static int get_nr_threads(struct task_struct *tsk)
+@@ -172,42 +179,24 @@ static int get_nr_threads(struct task_st
+ static int proc_cwd_link(struct inode *inode, struct path *path)
+ {
+ struct task_struct *task = get_proc_task(inode);
+- struct fs_struct *fs = NULL;
+ int result = -ENOENT;
+
+ if (task) {
+- fs = get_fs_struct(task);
++ result = get_fs_path(task, path, 0);
+ put_task_struct(task);
+ }
+- if (fs) {
+- read_lock(&fs->lock);
+- *path = fs->pwd;
+- path_get(&fs->pwd);
+- read_unlock(&fs->lock);
+- result = 0;
+- put_fs_struct(fs);
+- }
+ return result;
+ }
+
+ static int proc_root_link(struct inode *inode, struct path *path)
+ {
+ struct task_struct *task = get_proc_task(inode);
+- struct fs_struct *fs = NULL;
+ int result = -ENOENT;
+
+ if (task) {
+- fs = get_fs_struct(task);
++ result = get_fs_path(task, path, 1);
+ put_task_struct(task);
+ }
+- if (fs) {
+- read_lock(&fs->lock);
+- *path = fs->root;
+- path_get(&fs->root);
+- read_unlock(&fs->lock);
+- result = 0;
+- put_fs_struct(fs);
+- }
+ return result;
+ }
+
+@@ -596,7 +585,6 @@ static int mounts_open_common(struct ino
+ struct task_struct *task = get_proc_task(inode);
+ struct nsproxy *nsp;
+ struct mnt_namespace *ns = NULL;
+- struct fs_struct *fs = NULL;
+ struct path root;
+ struct proc_mounts *p;
+ int ret = -EINVAL;
+@@ -610,22 +598,16 @@ static int mounts_open_common(struct ino
+ get_mnt_ns(ns);
+ }
+ rcu_read_unlock();
+- if (ns)
+- fs = get_fs_struct(task);
++ if (ns && get_fs_path(task, &root, 1) == 0)
++ ret = 0;
+ put_task_struct(task);
+ }
+
+ if (!ns)
+ goto err;
+- if (!fs)
++ if (ret)
+ goto err_put_ns;
+
+- read_lock(&fs->lock);
+- root = fs->root;
+- path_get(&root);
+- read_unlock(&fs->lock);
+- put_fs_struct(fs);
+-
+ ret = -ENOMEM;
+ p = kmalloc(sizeof(struct proc_mounts), GFP_KERNEL);
+ if (!p)
--- /dev/null
+From f8ef3ed2bebd2c4cb9ece92efa185d7aead8831a Mon Sep 17 00:00:00 2001
+From: Al Viro <viro@zeniv.linux.org.uk>
+Date: Mon, 30 Mar 2009 20:36:33 -0400
+Subject: Get rid of bumping fs_struct refcount in pivot_root(2)
+
+From: Al Viro <viro@zeniv.linux.org.uk>
+
+commit f8ef3ed2bebd2c4cb9ece92efa185d7aead8831a upstream.
+
+Not because execve races with _that_ are serious - we really
+need a situation when final drop of fs_struct refcount is
+done by something that used to have it as current->fs.
+
+Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/namespace.c | 26 +++++++++++++++++---------
+ 1 file changed, 17 insertions(+), 9 deletions(-)
+
+--- a/fs/namespace.c
++++ b/fs/namespace.c
+@@ -2127,25 +2127,33 @@ static void chroot_fs_refs(struct path *
+ {
+ struct task_struct *g, *p;
+ struct fs_struct *fs;
++ int count = 0;
+
+ read_lock(&tasklist_lock);
+ do_each_thread(g, p) {
+ task_lock(p);
+ fs = p->fs;
+ if (fs) {
+- atomic_inc(&fs->count);
+- task_unlock(p);
++ write_lock(&fs->lock);
+ if (fs->root.dentry == old_root->dentry
+- && fs->root.mnt == old_root->mnt)
+- set_fs_root(fs, new_root);
++ && fs->root.mnt == old_root->mnt) {
++ path_get(new_root);
++ fs->root = *new_root;
++ count++;
++ }
+ if (fs->pwd.dentry == old_root->dentry
+- && fs->pwd.mnt == old_root->mnt)
+- set_fs_pwd(fs, new_root);
+- put_fs_struct(fs);
+- } else
+- task_unlock(p);
++ && fs->pwd.mnt == old_root->mnt) {
++ path_get(new_root);
++ fs->pwd = *new_root;
++ count++;
++ }
++ write_unlock(&fs->lock);
++ }
++ task_unlock(p);
+ } while_each_thread(g, p);
+ read_unlock(&tasklist_lock);
++ while (count--)
++ path_put(old_root);
+ }
+
+ /*
--- /dev/null
+From 11d06b2a1e5658f448a308aa3beb97bacd64a940 Mon Sep 17 00:00:00 2001
+From: Al Viro <viro@zeniv.linux.org.uk>
+Date: Mon, 30 Mar 2009 05:45:36 -0400
+Subject: Kill unsharing fs_struct in __set_personality()
+
+From: Al Viro <viro@zeniv.linux.org.uk>
+
+commit 11d06b2a1e5658f448a308aa3beb97bacd64a940 upstream.
+
+That's a rudiment of altroot support. I.e. it should've been buried
+a long time ago.
+
+Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+--- a/kernel/exec_domain.c
++++ b/kernel/exec_domain.c
+@@ -145,28 +145,6 @@ __set_personality(u_long personality)
+ return 0;
+ }
+
+- if (atomic_read(¤t->fs->count) != 1) {
+- struct fs_struct *fsp, *ofsp;
+-
+- fsp = copy_fs_struct(current->fs);
+- if (fsp == NULL) {
+- module_put(ep->module);
+- return -ENOMEM;
+- }
+-
+- task_lock(current);
+- ofsp = current->fs;
+- current->fs = fsp;
+- task_unlock(current);
+-
+- put_fs_struct(ofsp);
+- }
+-
+- /*
+- * At that point we are guaranteed to be the sole owner of
+- * current->fs.
+- */
+-
+ current->personality = personality;
+ oep = current_thread_info()->exec_domain;
+ current_thread_info()->exec_domain = ep;
--- /dev/null
+From 498052bba55ecaff58db6a1436b0e25bfd75a7ff Mon Sep 17 00:00:00 2001
+From: Al Viro <viro@zeniv.linux.org.uk>
+Date: Mon, 30 Mar 2009 07:20:30 -0400
+Subject: New locking/refcounting for fs_struct
+
+From: Al Viro <viro@zeniv.linux.org.uk>
+
+commit 498052bba55ecaff58db6a1436b0e25bfd75a7ff upstream.
+
+* all changes of current->fs are done under task_lock and write_lock of
+ old fs->lock
+* refcount is not atomic anymore (same protection)
+* its decrements are done when removing reference from current; at the
+ same time we decide whether to free it.
+* put_fs_struct() is gone
+* new field - ->in_exec. Set by check_unsafe_exec() if we are trying to do
+ execve() and only subthreads share fs_struct. Cleared when finishing exec
+ (success and failure alike). Makes CLONE_FS fail with -EAGAIN if set.
+* check_unsafe_exec() may fail with -EAGAIN if another execve() from subthread
+ is in progress.
+
+Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/compat.c | 16 +++++++++-
+ fs/exec.c | 31 +++++++++++++++++---
+ fs/fs_struct.c | 69 ++++++++++++++++++++++++++++++++--------------
+ fs/internal.h | 2 -
+ fs/proc/task_nommu.c | 2 -
+ include/linux/fs_struct.h | 8 ++---
+ kernel/fork.c | 37 ++++++++++++++++++------
+ 7 files changed, 121 insertions(+), 44 deletions(-)
+
+--- a/fs/compat.c
++++ b/fs/compat.c
+@@ -51,6 +51,7 @@
+ #include <linux/poll.h>
+ #include <linux/mm.h>
+ #include <linux/eventpoll.h>
++#include <linux/fs_struct.h>
+
+ #include <asm/uaccess.h>
+ #include <asm/mmu_context.h>
+@@ -1412,12 +1413,15 @@ int compat_do_execve(char * filename,
+ bprm->cred = prepare_exec_creds();
+ if (!bprm->cred)
+ goto out_unlock;
+- check_unsafe_exec(bprm);
++
++ retval = check_unsafe_exec(bprm);
++ if (retval)
++ goto out_unlock;
+
+ file = open_exec(filename);
+ retval = PTR_ERR(file);
+ if (IS_ERR(file))
+- goto out_unlock;
++ goto out_unmark;
+
+ sched_exec();
+
+@@ -1459,6 +1463,9 @@ int compat_do_execve(char * filename,
+ goto out;
+
+ /* execve succeeded */
++ write_lock(¤t->fs->lock);
++ current->fs->in_exec = 0;
++ write_unlock(¤t->fs->lock);
+ mutex_unlock(¤t->cred_exec_mutex);
+ acct_update_integrals(current);
+ free_bprm(bprm);
+@@ -1476,6 +1483,11 @@ out_file:
+ fput(bprm->file);
+ }
+
++out_unmark:
++ write_lock(¤t->fs->lock);
++ current->fs->in_exec = 0;
++ write_unlock(¤t->fs->lock);
++
+ out_unlock:
+ mutex_unlock(¤t->cred_exec_mutex);
+
+--- a/fs/exec.c
++++ b/fs/exec.c
+@@ -1049,16 +1049,18 @@ EXPORT_SYMBOL(install_exec_creds);
+ * - the caller must hold current->cred_exec_mutex to protect against
+ * PTRACE_ATTACH
+ */
+-void check_unsafe_exec(struct linux_binprm *bprm)
++int check_unsafe_exec(struct linux_binprm *bprm)
+ {
+ struct task_struct *p = current, *t;
+ unsigned long flags;
+ unsigned n_fs, n_sighand;
++ int res = 0;
+
+ bprm->unsafe = tracehook_unsafe_exec(p);
+
+ n_fs = 1;
+ n_sighand = 1;
++ write_lock(&p->fs->lock);
+ lock_task_sighand(p, &flags);
+ for (t = next_thread(p); t != p; t = next_thread(t)) {
+ if (t->fs == p->fs)
+@@ -1066,11 +1068,19 @@ void check_unsafe_exec(struct linux_binp
+ n_sighand++;
+ }
+
+- if (atomic_read(&p->fs->count) > n_fs ||
+- atomic_read(&p->sighand->count) > n_sighand)
++ if (p->fs->users > n_fs ||
++ atomic_read(&p->sighand->count) > n_sighand) {
+ bprm->unsafe |= LSM_UNSAFE_SHARE;
++ } else {
++ if (p->fs->in_exec)
++ res = -EAGAIN;
++ p->fs->in_exec = 1;
++ }
+
+ unlock_task_sighand(p, &flags);
++ write_unlock(&p->fs->lock);
++
++ return res;
+ }
+
+ /*
+@@ -1285,12 +1295,15 @@ int do_execve(char * filename,
+ bprm->cred = prepare_exec_creds();
+ if (!bprm->cred)
+ goto out_unlock;
+- check_unsafe_exec(bprm);
++
++ retval = check_unsafe_exec(bprm);
++ if (retval)
++ goto out_unlock;
+
+ file = open_exec(filename);
+ retval = PTR_ERR(file);
+ if (IS_ERR(file))
+- goto out_unlock;
++ goto out_unmark;
+
+ sched_exec();
+
+@@ -1333,6 +1346,9 @@ int do_execve(char * filename,
+ goto out;
+
+ /* execve succeeded */
++ write_lock(¤t->fs->lock);
++ current->fs->in_exec = 0;
++ write_unlock(¤t->fs->lock);
+ mutex_unlock(¤t->cred_exec_mutex);
+ acct_update_integrals(current);
+ free_bprm(bprm);
+@@ -1350,6 +1366,11 @@ out_file:
+ fput(bprm->file);
+ }
+
++out_unmark:
++ write_lock(¤t->fs->lock);
++ current->fs->in_exec = 0;
++ write_unlock(¤t->fs->lock);
++
+ out_unlock:
+ mutex_unlock(¤t->cred_exec_mutex);
+
+--- a/fs/fs_struct.c
++++ b/fs/fs_struct.c
+@@ -72,25 +72,27 @@ void chroot_fs_refs(struct path *old_roo
+ path_put(old_root);
+ }
+
+-void put_fs_struct(struct fs_struct *fs)
++void free_fs_struct(struct fs_struct *fs)
+ {
+- /* No need to hold fs->lock if we are killing it */
+- if (atomic_dec_and_test(&fs->count)) {
+- path_put(&fs->root);
+- path_put(&fs->pwd);
+- kmem_cache_free(fs_cachep, fs);
+- }
++ path_put(&fs->root);
++ path_put(&fs->pwd);
++ kmem_cache_free(fs_cachep, fs);
+ }
+
+ void exit_fs(struct task_struct *tsk)
+ {
+- struct fs_struct * fs = tsk->fs;
++ struct fs_struct *fs = tsk->fs;
+
+ if (fs) {
++ int kill;
+ task_lock(tsk);
++ write_lock(&fs->lock);
+ tsk->fs = NULL;
++ kill = !--fs->users;
++ write_unlock(&fs->lock);
+ task_unlock(tsk);
+- put_fs_struct(fs);
++ if (kill)
++ free_fs_struct(fs);
+ }
+ }
+
+@@ -99,7 +101,8 @@ struct fs_struct *copy_fs_struct(struct
+ struct fs_struct *fs = kmem_cache_alloc(fs_cachep, GFP_KERNEL);
+ /* We don't need to lock fs - think why ;-) */
+ if (fs) {
+- atomic_set(&fs->count, 1);
++ fs->users = 1;
++ fs->in_exec = 0;
+ rwlock_init(&fs->lock);
+ fs->umask = old->umask;
+ read_lock(&old->lock);
+@@ -114,28 +117,54 @@ struct fs_struct *copy_fs_struct(struct
+
+ int unshare_fs_struct(void)
+ {
+- struct fs_struct *fsp = copy_fs_struct(current->fs);
+- if (!fsp)
++ struct fs_struct *fs = current->fs;
++ struct fs_struct *new_fs = copy_fs_struct(fs);
++ int kill;
++
++ if (!new_fs)
+ return -ENOMEM;
+- exit_fs(current);
+- current->fs = fsp;
++
++ task_lock(current);
++ write_lock(&fs->lock);
++ kill = !--fs->users;
++ current->fs = new_fs;
++ write_unlock(&fs->lock);
++ task_unlock(current);
++
++ if (kill)
++ free_fs_struct(fs);
++
+ return 0;
+ }
+ EXPORT_SYMBOL_GPL(unshare_fs_struct);
+
+ /* to be mentioned only in INIT_TASK */
+ struct fs_struct init_fs = {
+- .count = ATOMIC_INIT(1),
++ .users = 1,
+ .lock = __RW_LOCK_UNLOCKED(init_fs.lock),
+ .umask = 0022,
+ };
+
+ void daemonize_fs_struct(void)
+ {
+- struct fs_struct *fs;
++ struct fs_struct *fs = current->fs;
+
+- exit_fs(current); /* current->fs->count--; */
+- fs = &init_fs;
+- current->fs = fs;
+- atomic_inc(&fs->count);
++ if (fs) {
++ int kill;
++
++ task_lock(current);
++
++ write_lock(&init_fs.lock);
++ init_fs.users++;
++ write_unlock(&init_fs.lock);
++
++ write_lock(&fs->lock);
++ current->fs = &init_fs;
++ kill = !--fs->users;
++ write_unlock(&fs->lock);
++
++ task_unlock(current);
++ if (kill)
++ free_fs_struct(fs);
++ }
+ }
+--- a/fs/internal.h
++++ b/fs/internal.h
+@@ -44,7 +44,7 @@ extern void __init chrdev_init(void);
+ /*
+ * exec.c
+ */
+-extern void check_unsafe_exec(struct linux_binprm *);
++extern int check_unsafe_exec(struct linux_binprm *);
+
+ /*
+ * namespace.c
+--- a/fs/proc/task_nommu.c
++++ b/fs/proc/task_nommu.c
+@@ -49,7 +49,7 @@ void task_mem(struct seq_file *m, struct
+ else
+ bytes += kobjsize(mm);
+
+- if (current->fs && atomic_read(¤t->fs->count) > 1)
++ if (current->fs && current->fs->users > 1)
+ sbytes += kobjsize(current->fs);
+ else
+ bytes += kobjsize(current->fs);
+--- a/include/linux/fs_struct.h
++++ b/include/linux/fs_struct.h
+@@ -4,12 +4,10 @@
+ #include <linux/path.h>
+
+ struct fs_struct {
+- atomic_t count; /* This usage count is used by check_unsafe_exec() for
+- * security checking purposes - therefore it may not be
+- * incremented, except by clone(CLONE_FS).
+- */
++ int users;
+ rwlock_t lock;
+ int umask;
++ int in_exec;
+ struct path root, pwd;
+ };
+
+@@ -19,7 +17,7 @@ extern void exit_fs(struct task_struct *
+ extern void set_fs_root(struct fs_struct *, struct path *);
+ extern void set_fs_pwd(struct fs_struct *, struct path *);
+ extern struct fs_struct *copy_fs_struct(struct fs_struct *);
+-extern void put_fs_struct(struct fs_struct *);
++extern void free_fs_struct(struct fs_struct *);
+ extern void daemonize_fs_struct(void);
+ extern int unshare_fs_struct(void);
+
+--- a/kernel/fork.c
++++ b/kernel/fork.c
+@@ -678,11 +678,19 @@ fail_nomem:
+
+ static int copy_fs(unsigned long clone_flags, struct task_struct *tsk)
+ {
++ struct fs_struct *fs = current->fs;
+ if (clone_flags & CLONE_FS) {
+- atomic_inc(¤t->fs->count);
++ /* tsk->fs is already what we want */
++ write_lock(&fs->lock);
++ if (fs->in_exec) {
++ write_unlock(&fs->lock);
++ return -EAGAIN;
++ }
++ fs->users++;
++ write_unlock(&fs->lock);
+ return 0;
+ }
+- tsk->fs = copy_fs_struct(current->fs);
++ tsk->fs = copy_fs_struct(fs);
+ if (!tsk->fs)
+ return -ENOMEM;
+ return 0;
+@@ -1518,12 +1526,16 @@ static int unshare_fs(unsigned long unsh
+ {
+ struct fs_struct *fs = current->fs;
+
+- if ((unshare_flags & CLONE_FS) &&
+- (fs && atomic_read(&fs->count) > 1)) {
+- *new_fsp = copy_fs_struct(current->fs);
+- if (!*new_fsp)
+- return -ENOMEM;
+- }
++ if (!(unshare_flags & CLONE_FS) || !fs)
++ return 0;
++
++ /* don't need lock here; in the worst case we'll do useless copy */
++ if (fs->users == 1)
++ return 0;
++
++ *new_fsp = copy_fs_struct(fs);
++ if (!*new_fsp)
++ return -ENOMEM;
+
+ return 0;
+ }
+@@ -1639,8 +1651,13 @@ SYSCALL_DEFINE1(unshare, unsigned long,
+
+ if (new_fs) {
+ fs = current->fs;
++ write_lock(&fs->lock);
+ current->fs = new_fs;
+- new_fs = fs;
++ if (--fs->users)
++ new_fs = NULL;
++ else
++ new_fs = fs;
++ write_unlock(&fs->lock);
+ }
+
+ if (new_mm) {
+@@ -1679,7 +1696,7 @@ bad_unshare_cleanup_sigh:
+
+ bad_unshare_cleanup_fs:
+ if (new_fs)
+- put_fs_struct(new_fs);
++ free_fs_struct(new_fs);
+
+ bad_unshare_cleanup_thread:
+ bad_unshare_out:
usb-serial-fix-lifetime-and-locking-problems.patch
acpi-revert-conflicting-workaround-for-bios-w-mangled-prt-entries.patch
powerpc-sanitize-stack-pointer-in-signal-handling-code.patch
+compat_do_execve-should-unshare_files.patch
+fix-setuid-sometimes-doesn-t.patch
+fix-setuid-sometimes-wouldn-t.patch
+annotate-struct-fs_struct-s-usage-count-restriction.patch
+kill-unsharing-fs_struct-in-__set_personality.patch
+get-rid-of-bumping-fs_struct-refcount-in-pivot_root.patch
+take-fs_struct-handling-to-new-file.patch
+new-locking-refcounting-for-fs_struct.patch
+check_unsafe_exec-doesn-t-care-about-signal-handlers-sharing.patch
+do_execve-must-not-clear-fs-in_exec-if-it-was-set-by-another-thread.patch
+check_unsafe_exec-s-lock_task_sighand-rcu_read_lock.patch
--- /dev/null
+From 3e93cd671813e204c258f1e6c797959920cf7772 Mon Sep 17 00:00:00 2001
+From: Al Viro <viro@zeniv.linux.org.uk>
+Date: Sun, 29 Mar 2009 19:00:13 -0400
+Subject: Take fs_struct handling to new file (fs/fs_struct.c)
+
+From: Al Viro <viro@zeniv.linux.org.uk>
+
+commit 3e93cd671813e204c258f1e6c797959920cf7772 upstream.
+
+Pure code move; two new helper functions for nfsd and daemonize
+(unshare_fs_struct() and daemonize_fs_struct() resp.; for now -
+the same code as used to be in callers). unshare_fs_struct()
+exported (for nfsd, as copy_fs_struct()/exit_fs() used to be),
+copy_fs_struct() and exit_fs() don't need exports anymore.
+
+Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/Makefile | 2
+ fs/fs_struct.c | 141 ++++++++++++++++++++++++++++++++++++++++++++++
+ fs/internal.h | 6 +
+ fs/namei.c | 7 --
+ fs/namespace.c | 68 ----------------------
+ fs/nfsd/nfssvc.c | 7 --
+ include/linux/fs_struct.h | 2
+ kernel/exit.c | 31 ----------
+ kernel/fork.c | 29 ---------
+ 9 files changed, 155 insertions(+), 138 deletions(-)
+
+--- /dev/null
++++ b/fs/fs_struct.c
+@@ -0,0 +1,141 @@
++#include <linux/module.h>
++#include <linux/sched.h>
++#include <linux/fs.h>
++#include <linux/path.h>
++#include <linux/slab.h>
++
++/*
++ * Replace the fs->{rootmnt,root} with {mnt,dentry}. Put the old values.
++ * It can block.
++ */
++void set_fs_root(struct fs_struct *fs, struct path *path)
++{
++ struct path old_root;
++
++ write_lock(&fs->lock);
++ old_root = fs->root;
++ fs->root = *path;
++ path_get(path);
++ write_unlock(&fs->lock);
++ if (old_root.dentry)
++ path_put(&old_root);
++}
++
++/*
++ * Replace the fs->{pwdmnt,pwd} with {mnt,dentry}. Put the old values.
++ * It can block.
++ */
++void set_fs_pwd(struct fs_struct *fs, struct path *path)
++{
++ struct path old_pwd;
++
++ write_lock(&fs->lock);
++ old_pwd = fs->pwd;
++ fs->pwd = *path;
++ path_get(path);
++ write_unlock(&fs->lock);
++
++ if (old_pwd.dentry)
++ path_put(&old_pwd);
++}
++
++void chroot_fs_refs(struct path *old_root, struct path *new_root)
++{
++ struct task_struct *g, *p;
++ struct fs_struct *fs;
++ int count = 0;
++
++ read_lock(&tasklist_lock);
++ do_each_thread(g, p) {
++ task_lock(p);
++ fs = p->fs;
++ if (fs) {
++ write_lock(&fs->lock);
++ if (fs->root.dentry == old_root->dentry
++ && fs->root.mnt == old_root->mnt) {
++ path_get(new_root);
++ fs->root = *new_root;
++ count++;
++ }
++ if (fs->pwd.dentry == old_root->dentry
++ && fs->pwd.mnt == old_root->mnt) {
++ path_get(new_root);
++ fs->pwd = *new_root;
++ count++;
++ }
++ write_unlock(&fs->lock);
++ }
++ task_unlock(p);
++ } while_each_thread(g, p);
++ read_unlock(&tasklist_lock);
++ while (count--)
++ path_put(old_root);
++}
++
++void put_fs_struct(struct fs_struct *fs)
++{
++ /* No need to hold fs->lock if we are killing it */
++ if (atomic_dec_and_test(&fs->count)) {
++ path_put(&fs->root);
++ path_put(&fs->pwd);
++ kmem_cache_free(fs_cachep, fs);
++ }
++}
++
++void exit_fs(struct task_struct *tsk)
++{
++ struct fs_struct * fs = tsk->fs;
++
++ if (fs) {
++ task_lock(tsk);
++ tsk->fs = NULL;
++ task_unlock(tsk);
++ put_fs_struct(fs);
++ }
++}
++
++struct fs_struct *copy_fs_struct(struct fs_struct *old)
++{
++ struct fs_struct *fs = kmem_cache_alloc(fs_cachep, GFP_KERNEL);
++ /* We don't need to lock fs - think why ;-) */
++ if (fs) {
++ atomic_set(&fs->count, 1);
++ rwlock_init(&fs->lock);
++ fs->umask = old->umask;
++ read_lock(&old->lock);
++ fs->root = old->root;
++ path_get(&old->root);
++ fs->pwd = old->pwd;
++ path_get(&old->pwd);
++ read_unlock(&old->lock);
++ }
++ return fs;
++}
++
++int unshare_fs_struct(void)
++{
++ struct fs_struct *fsp = copy_fs_struct(current->fs);
++ if (!fsp)
++ return -ENOMEM;
++ exit_fs(current);
++ current->fs = fsp;
++ return 0;
++}
++EXPORT_SYMBOL_GPL(unshare_fs_struct);
++
++/* to be mentioned only in INIT_TASK */
++struct fs_struct init_fs = {
++ .count = ATOMIC_INIT(1),
++ .lock = __RW_LOCK_UNLOCKED(init_fs.lock),
++ .umask = 0022,
++};
++
++void daemonize_fs_struct(void)
++{
++ struct fs_struct *fs;
++
++ exit_fs(current); /* current->fs->count--; */
++ fs = &init_fs;
++ current->fs = fs;
++ atomic_inc(&fs->count);
++}
+--- a/fs/internal.h
++++ b/fs/internal.h
+@@ -11,6 +11,7 @@
+
+ struct super_block;
+ struct linux_binprm;
++struct path;
+
+ /*
+ * block_dev.c
+@@ -60,3 +61,8 @@ extern void umount_tree(struct vfsmount
+ extern struct vfsmount *copy_tree(struct vfsmount *, struct dentry *, int);
+
+ extern void __init mnt_init(void);
++
++/*
++ * fs_struct.c
++ */
++extern void chroot_fs_refs(struct path *, struct path *);
+--- a/fs/Makefile
++++ b/fs/Makefile
+@@ -11,7 +11,7 @@ obj-y := open.o read_write.o file_table.
+ attr.o bad_inode.o file.o filesystems.o namespace.o \
+ seq_file.o xattr.o libfs.o fs-writeback.o \
+ pnode.o drop_caches.o splice.o sync.o utimes.o \
+- stack.o
++ stack.o fs_struct.o
+
+ ifeq ($(CONFIG_BLOCK),y)
+ obj-y += buffer.o bio.o block_dev.o direct-io.o mpage.o ioprio.o
+--- a/fs/namei.c
++++ b/fs/namei.c
+@@ -2891,10 +2891,3 @@ EXPORT_SYMBOL(vfs_symlink);
+ EXPORT_SYMBOL(vfs_unlink);
+ EXPORT_SYMBOL(dentry_unhash);
+ EXPORT_SYMBOL(generic_readlink);
+-
+-/* to be mentioned only in INIT_TASK */
+-struct fs_struct init_fs = {
+- .count = ATOMIC_INIT(1),
+- .lock = __RW_LOCK_UNLOCKED(init_fs.lock),
+- .umask = 0022,
+-};
+--- a/fs/namespace.c
++++ b/fs/namespace.c
+@@ -2089,74 +2089,6 @@ out1:
+ }
+
+ /*
+- * Replace the fs->{rootmnt,root} with {mnt,dentry}. Put the old values.
+- * It can block. Requires the big lock held.
+- */
+-void set_fs_root(struct fs_struct *fs, struct path *path)
+-{
+- struct path old_root;
+-
+- write_lock(&fs->lock);
+- old_root = fs->root;
+- fs->root = *path;
+- path_get(path);
+- write_unlock(&fs->lock);
+- if (old_root.dentry)
+- path_put(&old_root);
+-}
+-
+-/*
+- * Replace the fs->{pwdmnt,pwd} with {mnt,dentry}. Put the old values.
+- * It can block. Requires the big lock held.
+- */
+-void set_fs_pwd(struct fs_struct *fs, struct path *path)
+-{
+- struct path old_pwd;
+-
+- write_lock(&fs->lock);
+- old_pwd = fs->pwd;
+- fs->pwd = *path;
+- path_get(path);
+- write_unlock(&fs->lock);
+-
+- if (old_pwd.dentry)
+- path_put(&old_pwd);
+-}
+-
+-static void chroot_fs_refs(struct path *old_root, struct path *new_root)
+-{
+- struct task_struct *g, *p;
+- struct fs_struct *fs;
+- int count = 0;
+-
+- read_lock(&tasklist_lock);
+- do_each_thread(g, p) {
+- task_lock(p);
+- fs = p->fs;
+- if (fs) {
+- write_lock(&fs->lock);
+- if (fs->root.dentry == old_root->dentry
+- && fs->root.mnt == old_root->mnt) {
+- path_get(new_root);
+- fs->root = *new_root;
+- count++;
+- }
+- if (fs->pwd.dentry == old_root->dentry
+- && fs->pwd.mnt == old_root->mnt) {
+- path_get(new_root);
+- fs->pwd = *new_root;
+- count++;
+- }
+- write_unlock(&fs->lock);
+- }
+- task_unlock(p);
+- } while_each_thread(g, p);
+- read_unlock(&tasklist_lock);
+- while (count--)
+- path_put(old_root);
+-}
+-
+-/*
+ * pivot_root Semantics:
+ * Moves the root file system of the current process to the directory put_old,
+ * makes new_root as the new root file system of the current process, and sets
+--- a/fs/nfsd/nfssvc.c
++++ b/fs/nfsd/nfssvc.c
+@@ -404,7 +404,6 @@ static int
+ nfsd(void *vrqstp)
+ {
+ struct svc_rqst *rqstp = (struct svc_rqst *) vrqstp;
+- struct fs_struct *fsp;
+ int err, preverr = 0;
+
+ /* Lock module and set up kernel thread */
+@@ -413,13 +412,11 @@ nfsd(void *vrqstp)
+ /* At this point, the thread shares current->fs
+ * with the init process. We need to create files with a
+ * umask of 0 instead of init's umask. */
+- fsp = copy_fs_struct(current->fs);
+- if (!fsp) {
++ if (unshare_fs_struct() < 0) {
+ printk("Unable to start nfsd thread: out of memory\n");
+ goto out;
+ }
+- exit_fs(current);
+- current->fs = fsp;
++
+ current->fs->umask = 0;
+
+ /*
+--- a/include/linux/fs_struct.h
++++ b/include/linux/fs_struct.h
+@@ -20,5 +20,7 @@ extern void set_fs_root(struct fs_struct
+ extern void set_fs_pwd(struct fs_struct *, struct path *);
+ extern struct fs_struct *copy_fs_struct(struct fs_struct *);
+ extern void put_fs_struct(struct fs_struct *);
++extern void daemonize_fs_struct(void);
++extern int unshare_fs_struct(void);
+
+ #endif /* _LINUX_FS_STRUCT_H */
+--- a/kernel/exit.c
++++ b/kernel/exit.c
+@@ -429,7 +429,6 @@ EXPORT_SYMBOL(disallow_signal);
+ void daemonize(const char *name, ...)
+ {
+ va_list args;
+- struct fs_struct *fs;
+ sigset_t blocked;
+
+ va_start(args, name);
+@@ -462,11 +461,7 @@ void daemonize(const char *name, ...)
+
+ /* Become as one with the init task */
+
+- exit_fs(current); /* current->fs->count--; */
+- fs = init_task.fs;
+- current->fs = fs;
+- atomic_inc(&fs->count);
+-
++ daemonize_fs_struct();
+ exit_files(current);
+ current->files = init_task.files;
+ atomic_inc(¤t->files->count);
+@@ -565,30 +560,6 @@ void exit_files(struct task_struct *tsk)
+ }
+ }
+
+-void put_fs_struct(struct fs_struct *fs)
+-{
+- /* No need to hold fs->lock if we are killing it */
+- if (atomic_dec_and_test(&fs->count)) {
+- path_put(&fs->root);
+- path_put(&fs->pwd);
+- kmem_cache_free(fs_cachep, fs);
+- }
+-}
+-
+-void exit_fs(struct task_struct *tsk)
+-{
+- struct fs_struct * fs = tsk->fs;
+-
+- if (fs) {
+- task_lock(tsk);
+- tsk->fs = NULL;
+- task_unlock(tsk);
+- put_fs_struct(fs);
+- }
+-}
+-
+-EXPORT_SYMBOL_GPL(exit_fs);
+-
+ #ifdef CONFIG_MM_OWNER
+ /*
+ * Task p is exiting and it owned mm, lets find a new owner for it
+--- a/kernel/fork.c
++++ b/kernel/fork.c
+@@ -676,38 +676,13 @@ fail_nomem:
+ return retval;
+ }
+
+-static struct fs_struct *__copy_fs_struct(struct fs_struct *old)
+-{
+- struct fs_struct *fs = kmem_cache_alloc(fs_cachep, GFP_KERNEL);
+- /* We don't need to lock fs - think why ;-) */
+- if (fs) {
+- atomic_set(&fs->count, 1);
+- rwlock_init(&fs->lock);
+- fs->umask = old->umask;
+- read_lock(&old->lock);
+- fs->root = old->root;
+- path_get(&old->root);
+- fs->pwd = old->pwd;
+- path_get(&old->pwd);
+- read_unlock(&old->lock);
+- }
+- return fs;
+-}
+-
+-struct fs_struct *copy_fs_struct(struct fs_struct *old)
+-{
+- return __copy_fs_struct(old);
+-}
+-
+-EXPORT_SYMBOL_GPL(copy_fs_struct);
+-
+ static int copy_fs(unsigned long clone_flags, struct task_struct *tsk)
+ {
+ if (clone_flags & CLONE_FS) {
+ atomic_inc(¤t->fs->count);
+ return 0;
+ }
+- tsk->fs = __copy_fs_struct(current->fs);
++ tsk->fs = copy_fs_struct(current->fs);
+ if (!tsk->fs)
+ return -ENOMEM;
+ return 0;
+@@ -1545,7 +1520,7 @@ static int unshare_fs(unsigned long unsh
+
+ if ((unshare_flags & CLONE_FS) &&
+ (fs && atomic_read(&fs->count) > 1)) {
+- *new_fsp = __copy_fs_struct(current->fs);
++ *new_fsp = copy_fs_struct(current->fs);
+ if (!*new_fsp)
+ return -ENOMEM;
+ }