]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
more .29 patches added
authorGreg Kroah-Hartman <gregkh@suse.de>
Mon, 4 May 2009 23:53:45 +0000 (16:53 -0700)
committerGreg Kroah-Hartman <gregkh@suse.de>
Mon, 4 May 2009 23:53:45 +0000 (16:53 -0700)
12 files changed:
queue-2.6.29/annotate-struct-fs_struct-s-usage-count-restriction.patch [new file with mode: 0644]
queue-2.6.29/check_unsafe_exec-doesn-t-care-about-signal-handlers-sharing.patch [new file with mode: 0644]
queue-2.6.29/check_unsafe_exec-s-lock_task_sighand-rcu_read_lock.patch [new file with mode: 0644]
queue-2.6.29/compat_do_execve-should-unshare_files.patch [new file with mode: 0644]
queue-2.6.29/do_execve-must-not-clear-fs-in_exec-if-it-was-set-by-another-thread.patch [new file with mode: 0644]
queue-2.6.29/fix-setuid-sometimes-doesn-t.patch [new file with mode: 0644]
queue-2.6.29/fix-setuid-sometimes-wouldn-t.patch [new file with mode: 0644]
queue-2.6.29/get-rid-of-bumping-fs_struct-refcount-in-pivot_root.patch [new file with mode: 0644]
queue-2.6.29/kill-unsharing-fs_struct-in-__set_personality.patch [new file with mode: 0644]
queue-2.6.29/new-locking-refcounting-for-fs_struct.patch [new file with mode: 0644]
queue-2.6.29/series
queue-2.6.29/take-fs_struct-handling-to-new-file.patch [new file with mode: 0644]

diff --git a/queue-2.6.29/annotate-struct-fs_struct-s-usage-count-restriction.patch b/queue-2.6.29/annotate-struct-fs_struct-s-usage-count-restriction.patch
new file mode 100644 (file)
index 0000000..5d3c4b5
--- /dev/null
@@ -0,0 +1,37 @@
+From 795e2fe0a3b69dbc040d7efcf517e0cbad6901d0 Mon Sep 17 00:00:00 2001
+From: David Howells <dhowells@redhat.com>
+Date: Sat, 28 Mar 2009 23:23:01 +0000
+Subject: Annotate struct fs_struct's usage count restriction
+
+From: David Howells <dhowells@redhat.com>
+
+commit 795e2fe0a3b69dbc040d7efcf517e0cbad6901d0 upstream.
+
+Annotate struct fs_struct's usage count to indicate the restrictions upon it.
+It may not be incremented, except by clone(CLONE_FS), as this affects the
+check in check_unsafe_exec() in fs/exec.c.
+
+Signed-off-by: David Howells <dhowells@redhat.com>
+Signed-off-by: Hugh Dickins <hugh@veritas.com>
+Cc: stable@kernel.org
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ include/linux/fs_struct.h |    5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+--- a/include/linux/fs_struct.h
++++ b/include/linux/fs_struct.h
+@@ -4,7 +4,10 @@
+ #include <linux/path.h>
+ struct fs_struct {
+-      atomic_t count;
++      atomic_t count; /* This usage count is used by check_unsafe_exec() for
++                       * security checking purposes - therefore it may not be
++                       * incremented, except by clone(CLONE_FS).
++                       */
+       rwlock_t lock;
+       int umask;
+       struct path root, pwd;
diff --git a/queue-2.6.29/check_unsafe_exec-doesn-t-care-about-signal-handlers-sharing.patch b/queue-2.6.29/check_unsafe_exec-doesn-t-care-about-signal-handlers-sharing.patch
new file mode 100644 (file)
index 0000000..29ad819
--- /dev/null
@@ -0,0 +1,46 @@
+From f1191b50ec11c8e2ca766d6d99eb5bb9d2c084a3 Mon Sep 17 00:00:00 2001
+From: Al Viro <viro@zeniv.linux.org.uk>
+Date: Mon, 30 Mar 2009 07:35:18 -0400
+Subject: check_unsafe_exec() doesn't care about signal handlers sharing
+
+From: Al Viro <viro@zeniv.linux.org.uk>
+
+commit f1191b50ec11c8e2ca766d6d99eb5bb9d2c084a3 upstream.
+
+... since we'll unshare sighand anyway
+
+Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/exec.c |    7 ++-----
+ 1 file changed, 2 insertions(+), 5 deletions(-)
+
+--- a/fs/exec.c
++++ b/fs/exec.c
+@@ -1053,23 +1053,20 @@ int check_unsafe_exec(struct linux_binpr
+ {
+       struct task_struct *p = current, *t;
+       unsigned long flags;
+-      unsigned n_fs, n_sighand;
++      unsigned n_fs;
+       int res = 0;
+       bprm->unsafe = tracehook_unsafe_exec(p);
+       n_fs = 1;
+-      n_sighand = 1;
+       write_lock(&p->fs->lock);
+       lock_task_sighand(p, &flags);
+       for (t = next_thread(p); t != p; t = next_thread(t)) {
+               if (t->fs == p->fs)
+                       n_fs++;
+-              n_sighand++;
+       }
+-      if (p->fs->users > n_fs ||
+-          atomic_read(&p->sighand->count) > n_sighand) {
++      if (p->fs->users > n_fs) {
+               bprm->unsafe |= LSM_UNSAFE_SHARE;
+       } else {
+               if (p->fs->in_exec)
diff --git a/queue-2.6.29/check_unsafe_exec-s-lock_task_sighand-rcu_read_lock.patch b/queue-2.6.29/check_unsafe_exec-s-lock_task_sighand-rcu_read_lock.patch
new file mode 100644 (file)
index 0000000..62ff83c
--- /dev/null
@@ -0,0 +1,62 @@
+From 437f7fdb607f32b737e4da9f14bebcfdac2c90c3 Mon Sep 17 00:00:00 2001
+From: Oleg Nesterov <oleg@redhat.com>
+Date: Fri, 24 Apr 2009 01:02:45 +0200
+Subject: check_unsafe_exec: s/lock_task_sighand/rcu_read_lock/
+
+From: Oleg Nesterov <oleg@redhat.com>
+
+commit 437f7fdb607f32b737e4da9f14bebcfdac2c90c3 upstream.
+
+write_lock(&current->fs->lock) guarantees we can't wrongly miss
+LSM_UNSAFE_SHARE, this is what we care about. Use rcu_read_lock()
+instead of ->siglock to iterate over the sub-threads. We must see
+all CLONE_THREAD|CLONE_FS threads which didn't pass exit_fs(), it
+takes fs->lock too.
+
+With or without this patch we can miss the freshly cloned thread
+and set LSM_UNSAFE_SHARE, we don't care.
+
+Signed-off-by: Oleg Nesterov <oleg@redhat.com>
+Acked-by: Roland McGrath <roland@redhat.com>
+[ Fixed lock/unlock typo  - Hugh ]
+Acked-by: Hugh Dickins <hugh@veritas.com>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/exec.c |    6 ++----
+ 1 file changed, 2 insertions(+), 4 deletions(-)
+
+--- a/fs/exec.c
++++ b/fs/exec.c
+@@ -1052,7 +1052,6 @@ EXPORT_SYMBOL(install_exec_creds);
+ int check_unsafe_exec(struct linux_binprm *bprm)
+ {
+       struct task_struct *p = current, *t;
+-      unsigned long flags;
+       unsigned n_fs;
+       int res = 0;
+@@ -1060,11 +1059,12 @@ int check_unsafe_exec(struct linux_binpr
+       n_fs = 1;
+       write_lock(&p->fs->lock);
+-      lock_task_sighand(p, &flags);
++      rcu_read_lock();
+       for (t = next_thread(p); t != p; t = next_thread(t)) {
+               if (t->fs == p->fs)
+                       n_fs++;
+       }
++      rcu_read_unlock();
+       if (p->fs->users > n_fs) {
+               bprm->unsafe |= LSM_UNSAFE_SHARE;
+@@ -1075,8 +1075,6 @@ int check_unsafe_exec(struct linux_binpr
+                       res = 1;
+               }
+       }
+-
+-      unlock_task_sighand(p, &flags);
+       write_unlock(&p->fs->lock);
+       return res;
diff --git a/queue-2.6.29/compat_do_execve-should-unshare_files.patch b/queue-2.6.29/compat_do_execve-should-unshare_files.patch
new file mode 100644 (file)
index 0000000..e88a45d
--- /dev/null
@@ -0,0 +1,70 @@
+From 53e9309e01277ec99c38e84e0ca16921287cf470 Mon Sep 17 00:00:00 2001
+From: Hugh Dickins <hugh@veritas.com>
+Date: Sat, 28 Mar 2009 23:16:03 +0000
+Subject: compat_do_execve should unshare_files
+
+From: Hugh Dickins <hugh@veritas.com>
+
+commit 53e9309e01277ec99c38e84e0ca16921287cf470 upstream.
+
+2.6.26's commit fd8328be874f4190a811c58cd4778ec2c74d2c05
+"sanitize handling of shared descriptor tables in failing execve()"
+moved the unshare_files() from flush_old_exec() and several binfmts
+to the head of do_execve(); but forgot to make the same change to
+compat_do_execve(), leaving a CLONE_FILES files_struct shared across
+exec from a 32-bit process on a 64-bit kernel.
+
+It's arguable whether the files_struct really ought to be unshared
+across exec; but 2.6.1 made that so to stop the loading binary's fd
+leaking into other threads, and a 32-bit process on a 64-bit kernel
+ought to behave in the same way as 32 on 32 and 64 on 64.
+
+Signed-off-by: Hugh Dickins <hugh@veritas.com>
+Cc: stable@kernel.org
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/compat.c |   12 +++++++++++-
+ 1 file changed, 11 insertions(+), 1 deletion(-)
+
+--- a/fs/compat.c
++++ b/fs/compat.c
+@@ -1392,12 +1392,17 @@ int compat_do_execve(char * filename,
+ {
+       struct linux_binprm *bprm;
+       struct file *file;
++      struct files_struct *displaced;
+       int retval;
++      retval = unshare_files(&displaced);
++      if (retval)
++              goto out_ret;
++
+       retval = -ENOMEM;
+       bprm = kzalloc(sizeof(*bprm), GFP_KERNEL);
+       if (!bprm)
+-              goto out_ret;
++              goto out_files;
+       retval = mutex_lock_interruptible(&current->cred_exec_mutex);
+       if (retval < 0)
+@@ -1457,6 +1462,8 @@ int compat_do_execve(char * filename,
+       mutex_unlock(&current->cred_exec_mutex);
+       acct_update_integrals(current);
+       free_bprm(bprm);
++      if (displaced)
++              put_files_struct(displaced);
+       return retval;
+ out:
+@@ -1475,6 +1482,9 @@ out_unlock:
+ out_free:
+       free_bprm(bprm);
++out_files:
++      if (displaced)
++              reset_files_struct(displaced);
+ out_ret:
+       return retval;
+ }
diff --git a/queue-2.6.29/do_execve-must-not-clear-fs-in_exec-if-it-was-set-by-another-thread.patch b/queue-2.6.29/do_execve-must-not-clear-fs-in_exec-if-it-was-set-by-another-thread.patch
new file mode 100644 (file)
index 0000000..034918b
--- /dev/null
@@ -0,0 +1,154 @@
+From 8c652f96d3852b97a49c331cd0bb02d22f3cb31b Mon Sep 17 00:00:00 2001
+From: Oleg Nesterov <oleg@redhat.com>
+Date: Fri, 24 Apr 2009 01:01:56 +0200
+Subject: do_execve() must not clear fs->in_exec if it was set by another thread
+
+From: Oleg Nesterov <oleg@redhat.com>
+
+commit 8c652f96d3852b97a49c331cd0bb02d22f3cb31b upstream.
+
+If do_execve() fails after check_unsafe_exec(), it clears fs->in_exec
+unconditionally. This is wrong if we race with our sub-thread which
+also does do_execve:
+
+       Two threads T1 and T2 and another process P, all share the same
+       ->fs.
+
+       T1 starts do_execve(BAD_FILE). It calls check_unsafe_exec(), since
+       ->fs is shared, we set LSM_UNSAFE but not ->in_exec.
+
+       P exits and decrements fs->users.
+
+       T2 starts do_execve(), calls check_unsafe_exec(), now ->fs is not
+       shared, we set fs->in_exec.
+
+       T1 continues, open_exec(BAD_FILE) fails, we clear ->in_exec and
+       return to the user-space.
+
+       T1 does clone(CLONE_FS /* without CLONE_THREAD */).
+
+       T2 continues without LSM_UNSAFE_SHARE while ->fs is shared with
+       another process.
+
+Change check_unsafe_exec() to return res = 1 if we set ->in_exec, and change
+do_execve() to clear ->in_exec depending on res.
+
+When do_execve() suceeds, it is safe to clear ->in_exec unconditionally.
+It can be set only if we don't share ->fs with another process, and since
+we already killed all sub-threads either ->in_exec == 0 or we are the
+only user of this ->fs.
+
+Also, we do not need fs->lock to clear fs->in_exec.
+
+Signed-off-by: Oleg Nesterov <oleg@redhat.com>
+Acked-by: Roland McGrath <roland@redhat.com>
+Acked-by: Hugh Dickins <hugh@veritas.com>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/compat.c |   11 +++++------
+ fs/exec.c   |   19 ++++++++++---------
+ 2 files changed, 15 insertions(+), 15 deletions(-)
+
+--- a/fs/compat.c
++++ b/fs/compat.c
+@@ -1394,6 +1394,7 @@ int compat_do_execve(char * filename,
+       struct linux_binprm *bprm;
+       struct file *file;
+       struct files_struct *displaced;
++      bool clear_in_exec;
+       int retval;
+       retval = unshare_files(&displaced);
+@@ -1415,8 +1416,9 @@ int compat_do_execve(char * filename,
+               goto out_unlock;
+       retval = check_unsafe_exec(bprm);
+-      if (retval)
++      if (retval < 0)
+               goto out_unlock;
++      clear_in_exec = retval;
+       file = open_exec(filename);
+       retval = PTR_ERR(file);
+@@ -1463,9 +1465,7 @@ int compat_do_execve(char * filename,
+               goto out;
+       /* execve succeeded */
+-      write_lock(&current->fs->lock);
+       current->fs->in_exec = 0;
+-      write_unlock(&current->fs->lock);
+       mutex_unlock(&current->cred_exec_mutex);
+       acct_update_integrals(current);
+       free_bprm(bprm);
+@@ -1484,9 +1484,8 @@ out_file:
+       }
+ out_unmark:
+-      write_lock(&current->fs->lock);
+-      current->fs->in_exec = 0;
+-      write_unlock(&current->fs->lock);
++      if (clear_in_exec)
++              current->fs->in_exec = 0;
+ out_unlock:
+       mutex_unlock(&current->cred_exec_mutex);
+--- a/fs/exec.c
++++ b/fs/exec.c
+@@ -1069,9 +1069,11 @@ int check_unsafe_exec(struct linux_binpr
+       if (p->fs->users > n_fs) {
+               bprm->unsafe |= LSM_UNSAFE_SHARE;
+       } else {
+-              if (p->fs->in_exec)
+-                      res = -EAGAIN;
+-              p->fs->in_exec = 1;
++              res = -EAGAIN;
++              if (!p->fs->in_exec) {
++                      p->fs->in_exec = 1;
++                      res = 1;
++              }
+       }
+       unlock_task_sighand(p, &flags);
+@@ -1273,6 +1275,7 @@ int do_execve(char * filename,
+       struct linux_binprm *bprm;
+       struct file *file;
+       struct files_struct *displaced;
++      bool clear_in_exec;
+       int retval;
+       retval = unshare_files(&displaced);
+@@ -1294,8 +1297,9 @@ int do_execve(char * filename,
+               goto out_unlock;
+       retval = check_unsafe_exec(bprm);
+-      if (retval)
++      if (retval < 0)
+               goto out_unlock;
++      clear_in_exec = retval;
+       file = open_exec(filename);
+       retval = PTR_ERR(file);
+@@ -1343,9 +1347,7 @@ int do_execve(char * filename,
+               goto out;
+       /* execve succeeded */
+-      write_lock(&current->fs->lock);
+       current->fs->in_exec = 0;
+-      write_unlock(&current->fs->lock);
+       mutex_unlock(&current->cred_exec_mutex);
+       acct_update_integrals(current);
+       free_bprm(bprm);
+@@ -1364,9 +1366,8 @@ out_file:
+       }
+ out_unmark:
+-      write_lock(&current->fs->lock);
+-      current->fs->in_exec = 0;
+-      write_unlock(&current->fs->lock);
++      if (clear_in_exec)
++              current->fs->in_exec = 0;
+ out_unlock:
+       mutex_unlock(&current->cred_exec_mutex);
diff --git a/queue-2.6.29/fix-setuid-sometimes-doesn-t.patch b/queue-2.6.29/fix-setuid-sometimes-doesn-t.patch
new file mode 100644 (file)
index 0000000..d12f40b
--- /dev/null
@@ -0,0 +1,104 @@
+From e426b64c412aaa3e9eb3e4b261dc5be0d5a83e78 Mon Sep 17 00:00:00 2001
+From: Hugh Dickins <hugh@veritas.com>
+Date: Sat, 28 Mar 2009 23:20:19 +0000
+Subject: fix setuid sometimes doesn't
+
+From: Hugh Dickins <hugh@veritas.com>
+
+commit e426b64c412aaa3e9eb3e4b261dc5be0d5a83e78 upstream.
+
+Joe Malicki reports that setuid sometimes doesn't: very rarely,
+a setuid root program does not get root euid; and, by the way,
+they have a health check running lsof every few minutes.
+
+Right, check_unsafe_exec() notes whether the files_struct is being
+shared by more threads than will get killed by the exec, and if so
+sets LSM_UNSAFE_SHARE to make bprm_set_creds() careful about euid.
+But /proc/<pid>/fd and /proc/<pid>/fdinfo lookups make transient
+use of get_files_struct(), which also raises that sharing count.
+
+There's a rather simple fix for this: exec's check on files->count
+has been redundant ever since 2.6.1 made it unshare_files() (except
+while compat_do_execve() omitted to do so) - just remove that check.
+
+[Note to -stable: this patch will not apply before 2.6.29: earlier
+releases should just remove the files->count line from unsafe_exec().]
+
+Reported-by: Joe Malicki <jmalicki@metacarta.com>
+Narrowed-down-by: Michael Itz <mitz@metacarta.com>
+Tested-by: Joe Malicki <jmalicki@metacarta.com>
+Signed-off-by: Hugh Dickins <hugh@veritas.com>
+Cc: stable@kernel.org
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/compat.c   |    2 +-
+ fs/exec.c     |   10 +++-------
+ fs/internal.h |    2 +-
+ 3 files changed, 5 insertions(+), 9 deletions(-)
+
+--- a/fs/compat.c
++++ b/fs/compat.c
+@@ -1412,7 +1412,7 @@ int compat_do_execve(char * filename,
+       bprm->cred = prepare_exec_creds();
+       if (!bprm->cred)
+               goto out_unlock;
+-      check_unsafe_exec(bprm, current->files);
++      check_unsafe_exec(bprm);
+       file = open_exec(filename);
+       retval = PTR_ERR(file);
+--- a/fs/exec.c
++++ b/fs/exec.c
+@@ -1049,28 +1049,24 @@ EXPORT_SYMBOL(install_exec_creds);
+  * - the caller must hold current->cred_exec_mutex to protect against
+  *   PTRACE_ATTACH
+  */
+-void check_unsafe_exec(struct linux_binprm *bprm, struct files_struct *files)
++void check_unsafe_exec(struct linux_binprm *bprm)
+ {
+       struct task_struct *p = current, *t;
+       unsigned long flags;
+-      unsigned n_fs, n_files, n_sighand;
++      unsigned n_fs, n_sighand;
+       bprm->unsafe = tracehook_unsafe_exec(p);
+       n_fs = 1;
+-      n_files = 1;
+       n_sighand = 1;
+       lock_task_sighand(p, &flags);
+       for (t = next_thread(p); t != p; t = next_thread(t)) {
+               if (t->fs == p->fs)
+                       n_fs++;
+-              if (t->files == files)
+-                      n_files++;
+               n_sighand++;
+       }
+       if (atomic_read(&p->fs->count) > n_fs ||
+-          atomic_read(&p->files->count) > n_files ||
+           atomic_read(&p->sighand->count) > n_sighand)
+               bprm->unsafe |= LSM_UNSAFE_SHARE;
+@@ -1289,7 +1285,7 @@ int do_execve(char * filename,
+       bprm->cred = prepare_exec_creds();
+       if (!bprm->cred)
+               goto out_unlock;
+-      check_unsafe_exec(bprm, displaced);
++      check_unsafe_exec(bprm);
+       file = open_exec(filename);
+       retval = PTR_ERR(file);
+--- a/fs/internal.h
++++ b/fs/internal.h
+@@ -43,7 +43,7 @@ extern void __init chrdev_init(void);
+ /*
+  * exec.c
+  */
+-extern void check_unsafe_exec(struct linux_binprm *, struct files_struct *);
++extern void check_unsafe_exec(struct linux_binprm *);
+ /*
+  * namespace.c
diff --git a/queue-2.6.29/fix-setuid-sometimes-wouldn-t.patch b/queue-2.6.29/fix-setuid-sometimes-wouldn-t.patch
new file mode 100644 (file)
index 0000000..d67220a
--- /dev/null
@@ -0,0 +1,139 @@
+From 7c2c7d993044cddc5010f6f429b100c63bc7dffb Mon Sep 17 00:00:00 2001
+From: Hugh Dickins <hugh@veritas.com>
+Date: Sat, 28 Mar 2009 23:21:27 +0000
+Subject: fix setuid sometimes wouldn't
+
+From: Hugh Dickins <hugh@veritas.com>
+
+commit 7c2c7d993044cddc5010f6f429b100c63bc7dffb upstream.
+
+check_unsafe_exec() also notes whether the fs_struct is being
+shared by more threads than will get killed by the exec, and if so
+sets LSM_UNSAFE_SHARE to make bprm_set_creds() careful about euid.
+But /proc/<pid>/cwd and /proc/<pid>/root lookups make transient
+use of get_fs_struct(), which also raises that sharing count.
+
+This might occasionally cause a setuid program not to change euid,
+in the same way as happened with files->count (check_unsafe_exec
+also looks at sighand->count, but /proc doesn't raise that one).
+
+We'd prefer exec not to unshare fs_struct: so fix this in procfs,
+replacing get_fs_struct() by get_fs_path(), which does path_get
+while still holding task_lock, instead of raising fs->count.
+
+Signed-off-by: Hugh Dickins <hugh@veritas.com>
+Cc: stable@kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/proc/base.c |   50 ++++++++++++++++----------------------------------
+ 1 file changed, 16 insertions(+), 34 deletions(-)
+
+--- a/fs/proc/base.c
++++ b/fs/proc/base.c
+@@ -146,15 +146,22 @@ static unsigned int pid_entry_count_dirs
+       return count;
+ }
+-static struct fs_struct *get_fs_struct(struct task_struct *task)
++static int get_fs_path(struct task_struct *task, struct path *path, bool root)
+ {
+       struct fs_struct *fs;
++      int result = -ENOENT;
++
+       task_lock(task);
+       fs = task->fs;
+-      if(fs)
+-              atomic_inc(&fs->count);
++      if (fs) {
++              read_lock(&fs->lock);
++              *path = root ? fs->root : fs->pwd;
++              path_get(path);
++              read_unlock(&fs->lock);
++              result = 0;
++      }
+       task_unlock(task);
+-      return fs;
++      return result;
+ }
+ static int get_nr_threads(struct task_struct *tsk)
+@@ -172,42 +179,24 @@ static int get_nr_threads(struct task_st
+ static int proc_cwd_link(struct inode *inode, struct path *path)
+ {
+       struct task_struct *task = get_proc_task(inode);
+-      struct fs_struct *fs = NULL;
+       int result = -ENOENT;
+       if (task) {
+-              fs = get_fs_struct(task);
++              result = get_fs_path(task, path, 0);
+               put_task_struct(task);
+       }
+-      if (fs) {
+-              read_lock(&fs->lock);
+-              *path = fs->pwd;
+-              path_get(&fs->pwd);
+-              read_unlock(&fs->lock);
+-              result = 0;
+-              put_fs_struct(fs);
+-      }
+       return result;
+ }
+ static int proc_root_link(struct inode *inode, struct path *path)
+ {
+       struct task_struct *task = get_proc_task(inode);
+-      struct fs_struct *fs = NULL;
+       int result = -ENOENT;
+       if (task) {
+-              fs = get_fs_struct(task);
++              result = get_fs_path(task, path, 1);
+               put_task_struct(task);
+       }
+-      if (fs) {
+-              read_lock(&fs->lock);
+-              *path = fs->root;
+-              path_get(&fs->root);
+-              read_unlock(&fs->lock);
+-              result = 0;
+-              put_fs_struct(fs);
+-      }
+       return result;
+ }
+@@ -596,7 +585,6 @@ static int mounts_open_common(struct ino
+       struct task_struct *task = get_proc_task(inode);
+       struct nsproxy *nsp;
+       struct mnt_namespace *ns = NULL;
+-      struct fs_struct *fs = NULL;
+       struct path root;
+       struct proc_mounts *p;
+       int ret = -EINVAL;
+@@ -610,22 +598,16 @@ static int mounts_open_common(struct ino
+                               get_mnt_ns(ns);
+               }
+               rcu_read_unlock();
+-              if (ns)
+-                      fs = get_fs_struct(task);
++              if (ns && get_fs_path(task, &root, 1) == 0)
++                      ret = 0;
+               put_task_struct(task);
+       }
+       if (!ns)
+               goto err;
+-      if (!fs)
++      if (ret)
+               goto err_put_ns;
+-      read_lock(&fs->lock);
+-      root = fs->root;
+-      path_get(&root);
+-      read_unlock(&fs->lock);
+-      put_fs_struct(fs);
+-
+       ret = -ENOMEM;
+       p = kmalloc(sizeof(struct proc_mounts), GFP_KERNEL);
+       if (!p)
diff --git a/queue-2.6.29/get-rid-of-bumping-fs_struct-refcount-in-pivot_root.patch b/queue-2.6.29/get-rid-of-bumping-fs_struct-refcount-in-pivot_root.patch
new file mode 100644 (file)
index 0000000..4203ac1
--- /dev/null
@@ -0,0 +1,65 @@
+From f8ef3ed2bebd2c4cb9ece92efa185d7aead8831a Mon Sep 17 00:00:00 2001
+From: Al Viro <viro@zeniv.linux.org.uk>
+Date: Mon, 30 Mar 2009 20:36:33 -0400
+Subject: Get rid of bumping fs_struct refcount in pivot_root(2)
+
+From: Al Viro <viro@zeniv.linux.org.uk>
+
+commit f8ef3ed2bebd2c4cb9ece92efa185d7aead8831a upstream.
+
+Not because execve races with _that_ are serious - we really
+need a situation when final drop of fs_struct refcount is
+done by something that used to have it as current->fs.
+
+Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/namespace.c |   26 +++++++++++++++++---------
+ 1 file changed, 17 insertions(+), 9 deletions(-)
+
+--- a/fs/namespace.c
++++ b/fs/namespace.c
+@@ -2127,25 +2127,33 @@ static void chroot_fs_refs(struct path *
+ {
+       struct task_struct *g, *p;
+       struct fs_struct *fs;
++      int count = 0;
+       read_lock(&tasklist_lock);
+       do_each_thread(g, p) {
+               task_lock(p);
+               fs = p->fs;
+               if (fs) {
+-                      atomic_inc(&fs->count);
+-                      task_unlock(p);
++                      write_lock(&fs->lock);
+                       if (fs->root.dentry == old_root->dentry
+-                          && fs->root.mnt == old_root->mnt)
+-                              set_fs_root(fs, new_root);
++                          && fs->root.mnt == old_root->mnt) {
++                              path_get(new_root);
++                              fs->root = *new_root;
++                              count++;
++                      }
+                       if (fs->pwd.dentry == old_root->dentry
+-                          && fs->pwd.mnt == old_root->mnt)
+-                              set_fs_pwd(fs, new_root);
+-                      put_fs_struct(fs);
+-              } else
+-                      task_unlock(p);
++                          && fs->pwd.mnt == old_root->mnt) {
++                              path_get(new_root);
++                              fs->pwd = *new_root;
++                              count++;
++                      }
++                      write_unlock(&fs->lock);
++              }
++              task_unlock(p);
+       } while_each_thread(g, p);
+       read_unlock(&tasklist_lock);
++      while (count--)
++              path_put(old_root);
+ }
+ /*
diff --git a/queue-2.6.29/kill-unsharing-fs_struct-in-__set_personality.patch b/queue-2.6.29/kill-unsharing-fs_struct-in-__set_personality.patch
new file mode 100644 (file)
index 0000000..bc24c69
--- /dev/null
@@ -0,0 +1,46 @@
+From 11d06b2a1e5658f448a308aa3beb97bacd64a940 Mon Sep 17 00:00:00 2001
+From: Al Viro <viro@zeniv.linux.org.uk>
+Date: Mon, 30 Mar 2009 05:45:36 -0400
+Subject: Kill unsharing fs_struct in __set_personality()
+
+From: Al Viro <viro@zeniv.linux.org.uk>
+
+commit 11d06b2a1e5658f448a308aa3beb97bacd64a940 upstream.
+
+That's a rudiment of altroot support.  I.e. it should've been buried
+a long time ago.
+
+Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+--- a/kernel/exec_domain.c
++++ b/kernel/exec_domain.c
+@@ -145,28 +145,6 @@ __set_personality(u_long personality)
+               return 0;
+       }
+-      if (atomic_read(&current->fs->count) != 1) {
+-              struct fs_struct *fsp, *ofsp;
+-
+-              fsp = copy_fs_struct(current->fs);
+-              if (fsp == NULL) {
+-                      module_put(ep->module);
+-                      return -ENOMEM;
+-              }
+-
+-              task_lock(current);
+-              ofsp = current->fs;
+-              current->fs = fsp;
+-              task_unlock(current);
+-
+-              put_fs_struct(ofsp);
+-      }
+-
+-      /*
+-       * At that point we are guaranteed to be the sole owner of
+-       * current->fs.
+-       */
+-
+       current->personality = personality;
+       oep = current_thread_info()->exec_domain;
+       current_thread_info()->exec_domain = ep;
diff --git a/queue-2.6.29/new-locking-refcounting-for-fs_struct.patch b/queue-2.6.29/new-locking-refcounting-for-fs_struct.patch
new file mode 100644 (file)
index 0000000..37d22f1
--- /dev/null
@@ -0,0 +1,401 @@
+From 498052bba55ecaff58db6a1436b0e25bfd75a7ff Mon Sep 17 00:00:00 2001
+From: Al Viro <viro@zeniv.linux.org.uk>
+Date: Mon, 30 Mar 2009 07:20:30 -0400
+Subject: New locking/refcounting for fs_struct
+
+From: Al Viro <viro@zeniv.linux.org.uk>
+
+commit 498052bba55ecaff58db6a1436b0e25bfd75a7ff upstream.
+
+* all changes of current->fs are done under task_lock and write_lock of
+  old fs->lock
+* refcount is not atomic anymore (same protection)
+* its decrements are done when removing reference from current; at the
+  same time we decide whether to free it.
+* put_fs_struct() is gone
+* new field - ->in_exec.  Set by check_unsafe_exec() if we are trying to do
+  execve() and only subthreads share fs_struct.  Cleared when finishing exec
+  (success and failure alike).  Makes CLONE_FS fail with -EAGAIN if set.
+* check_unsafe_exec() may fail with -EAGAIN if another execve() from subthread
+  is in progress.
+
+Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/compat.c               |   16 +++++++++-
+ fs/exec.c                 |   31 +++++++++++++++++---
+ fs/fs_struct.c            |   69 ++++++++++++++++++++++++++++++++--------------
+ fs/internal.h             |    2 -
+ fs/proc/task_nommu.c      |    2 -
+ include/linux/fs_struct.h |    8 ++---
+ kernel/fork.c             |   37 ++++++++++++++++++------
+ 7 files changed, 121 insertions(+), 44 deletions(-)
+
+--- a/fs/compat.c
++++ b/fs/compat.c
+@@ -51,6 +51,7 @@
+ #include <linux/poll.h>
+ #include <linux/mm.h>
+ #include <linux/eventpoll.h>
++#include <linux/fs_struct.h>
+ #include <asm/uaccess.h>
+ #include <asm/mmu_context.h>
+@@ -1412,12 +1413,15 @@ int compat_do_execve(char * filename,
+       bprm->cred = prepare_exec_creds();
+       if (!bprm->cred)
+               goto out_unlock;
+-      check_unsafe_exec(bprm);
++
++      retval = check_unsafe_exec(bprm);
++      if (retval)
++              goto out_unlock;
+       file = open_exec(filename);
+       retval = PTR_ERR(file);
+       if (IS_ERR(file))
+-              goto out_unlock;
++              goto out_unmark;
+       sched_exec();
+@@ -1459,6 +1463,9 @@ int compat_do_execve(char * filename,
+               goto out;
+       /* execve succeeded */
++      write_lock(&current->fs->lock);
++      current->fs->in_exec = 0;
++      write_unlock(&current->fs->lock);
+       mutex_unlock(&current->cred_exec_mutex);
+       acct_update_integrals(current);
+       free_bprm(bprm);
+@@ -1476,6 +1483,11 @@ out_file:
+               fput(bprm->file);
+       }
++out_unmark:
++      write_lock(&current->fs->lock);
++      current->fs->in_exec = 0;
++      write_unlock(&current->fs->lock);
++
+ out_unlock:
+       mutex_unlock(&current->cred_exec_mutex);
+--- a/fs/exec.c
++++ b/fs/exec.c
+@@ -1049,16 +1049,18 @@ EXPORT_SYMBOL(install_exec_creds);
+  * - the caller must hold current->cred_exec_mutex to protect against
+  *   PTRACE_ATTACH
+  */
+-void check_unsafe_exec(struct linux_binprm *bprm)
++int check_unsafe_exec(struct linux_binprm *bprm)
+ {
+       struct task_struct *p = current, *t;
+       unsigned long flags;
+       unsigned n_fs, n_sighand;
++      int res = 0;
+       bprm->unsafe = tracehook_unsafe_exec(p);
+       n_fs = 1;
+       n_sighand = 1;
++      write_lock(&p->fs->lock);
+       lock_task_sighand(p, &flags);
+       for (t = next_thread(p); t != p; t = next_thread(t)) {
+               if (t->fs == p->fs)
+@@ -1066,11 +1068,19 @@ void check_unsafe_exec(struct linux_binp
+               n_sighand++;
+       }
+-      if (atomic_read(&p->fs->count) > n_fs ||
+-          atomic_read(&p->sighand->count) > n_sighand)
++      if (p->fs->users > n_fs ||
++          atomic_read(&p->sighand->count) > n_sighand) {
+               bprm->unsafe |= LSM_UNSAFE_SHARE;
++      } else {
++              if (p->fs->in_exec)
++                      res = -EAGAIN;
++              p->fs->in_exec = 1;
++      }
+       unlock_task_sighand(p, &flags);
++      write_unlock(&p->fs->lock);
++
++      return res;
+ }
+ /* 
+@@ -1285,12 +1295,15 @@ int do_execve(char * filename,
+       bprm->cred = prepare_exec_creds();
+       if (!bprm->cred)
+               goto out_unlock;
+-      check_unsafe_exec(bprm);
++
++      retval = check_unsafe_exec(bprm);
++      if (retval)
++              goto out_unlock;
+       file = open_exec(filename);
+       retval = PTR_ERR(file);
+       if (IS_ERR(file))
+-              goto out_unlock;
++              goto out_unmark;
+       sched_exec();
+@@ -1333,6 +1346,9 @@ int do_execve(char * filename,
+               goto out;
+       /* execve succeeded */
++      write_lock(&current->fs->lock);
++      current->fs->in_exec = 0;
++      write_unlock(&current->fs->lock);
+       mutex_unlock(&current->cred_exec_mutex);
+       acct_update_integrals(current);
+       free_bprm(bprm);
+@@ -1350,6 +1366,11 @@ out_file:
+               fput(bprm->file);
+       }
++out_unmark:
++      write_lock(&current->fs->lock);
++      current->fs->in_exec = 0;
++      write_unlock(&current->fs->lock);
++
+ out_unlock:
+       mutex_unlock(&current->cred_exec_mutex);
+--- a/fs/fs_struct.c
++++ b/fs/fs_struct.c
+@@ -72,25 +72,27 @@ void chroot_fs_refs(struct path *old_roo
+               path_put(old_root);
+ }
+-void put_fs_struct(struct fs_struct *fs)
++void free_fs_struct(struct fs_struct *fs)
+ {
+-      /* No need to hold fs->lock if we are killing it */
+-      if (atomic_dec_and_test(&fs->count)) {
+-              path_put(&fs->root);
+-              path_put(&fs->pwd);
+-              kmem_cache_free(fs_cachep, fs);
+-      }
++      path_put(&fs->root);
++      path_put(&fs->pwd);
++      kmem_cache_free(fs_cachep, fs);
+ }
+ void exit_fs(struct task_struct *tsk)
+ {
+-      struct fs_struct * fs = tsk->fs;
++      struct fs_struct *fs = tsk->fs;
+       if (fs) {
++              int kill;
+               task_lock(tsk);
++              write_lock(&fs->lock);
+               tsk->fs = NULL;
++              kill = !--fs->users;
++              write_unlock(&fs->lock);
+               task_unlock(tsk);
+-              put_fs_struct(fs);
++              if (kill)
++                      free_fs_struct(fs);
+       }
+ }
+@@ -99,7 +101,8 @@ struct fs_struct *copy_fs_struct(struct 
+       struct fs_struct *fs = kmem_cache_alloc(fs_cachep, GFP_KERNEL);
+       /* We don't need to lock fs - think why ;-) */
+       if (fs) {
+-              atomic_set(&fs->count, 1);
++              fs->users = 1;
++              fs->in_exec = 0;
+               rwlock_init(&fs->lock);
+               fs->umask = old->umask;
+               read_lock(&old->lock);
+@@ -114,28 +117,54 @@ struct fs_struct *copy_fs_struct(struct 
+ int unshare_fs_struct(void)
+ {
+-      struct fs_struct *fsp = copy_fs_struct(current->fs);
+-      if (!fsp)
++      struct fs_struct *fs = current->fs;
++      struct fs_struct *new_fs = copy_fs_struct(fs);
++      int kill;
++
++      if (!new_fs)
+               return -ENOMEM;
+-      exit_fs(current);
+-      current->fs = fsp;
++
++      task_lock(current);
++      write_lock(&fs->lock);
++      kill = !--fs->users;
++      current->fs = new_fs;
++      write_unlock(&fs->lock);
++      task_unlock(current);
++
++      if (kill)
++              free_fs_struct(fs);
++
+       return 0;
+ }
+ EXPORT_SYMBOL_GPL(unshare_fs_struct);
+ /* to be mentioned only in INIT_TASK */
+ struct fs_struct init_fs = {
+-      .count          = ATOMIC_INIT(1),
++      .users          = 1,
+       .lock           = __RW_LOCK_UNLOCKED(init_fs.lock),
+       .umask          = 0022,
+ };
+ void daemonize_fs_struct(void)
+ {
+-      struct fs_struct *fs;
++      struct fs_struct *fs = current->fs;
+-      exit_fs(current);       /* current->fs->count--; */
+-      fs = &init_fs;
+-      current->fs = fs;
+-      atomic_inc(&fs->count);
++      if (fs) {
++              int kill;
++
++              task_lock(current);
++
++              write_lock(&init_fs.lock);
++              init_fs.users++;
++              write_unlock(&init_fs.lock);
++
++              write_lock(&fs->lock);
++              current->fs = &init_fs;
++              kill = !--fs->users;
++              write_unlock(&fs->lock);
++
++              task_unlock(current);
++              if (kill)
++                      free_fs_struct(fs);
++      }
+ }
+--- a/fs/internal.h
++++ b/fs/internal.h
+@@ -44,7 +44,7 @@ extern void __init chrdev_init(void);
+ /*
+  * exec.c
+  */
+-extern void check_unsafe_exec(struct linux_binprm *);
++extern int check_unsafe_exec(struct linux_binprm *);
+ /*
+  * namespace.c
+--- a/fs/proc/task_nommu.c
++++ b/fs/proc/task_nommu.c
+@@ -49,7 +49,7 @@ void task_mem(struct seq_file *m, struct
+       else
+               bytes += kobjsize(mm);
+       
+-      if (current->fs && atomic_read(&current->fs->count) > 1)
++      if (current->fs && current->fs->users > 1)
+               sbytes += kobjsize(current->fs);
+       else
+               bytes += kobjsize(current->fs);
+--- a/include/linux/fs_struct.h
++++ b/include/linux/fs_struct.h
+@@ -4,12 +4,10 @@
+ #include <linux/path.h>
+ struct fs_struct {
+-      atomic_t count; /* This usage count is used by check_unsafe_exec() for
+-                       * security checking purposes - therefore it may not be
+-                       * incremented, except by clone(CLONE_FS).
+-                       */
++      int users;
+       rwlock_t lock;
+       int umask;
++      int in_exec;
+       struct path root, pwd;
+ };
+@@ -19,7 +17,7 @@ extern void exit_fs(struct task_struct *
+ extern void set_fs_root(struct fs_struct *, struct path *);
+ extern void set_fs_pwd(struct fs_struct *, struct path *);
+ extern struct fs_struct *copy_fs_struct(struct fs_struct *);
+-extern void put_fs_struct(struct fs_struct *);
++extern void free_fs_struct(struct fs_struct *);
+ extern void daemonize_fs_struct(void);
+ extern int unshare_fs_struct(void);
+--- a/kernel/fork.c
++++ b/kernel/fork.c
+@@ -678,11 +678,19 @@ fail_nomem:
+ static int copy_fs(unsigned long clone_flags, struct task_struct *tsk)
+ {
++      struct fs_struct *fs = current->fs;
+       if (clone_flags & CLONE_FS) {
+-              atomic_inc(&current->fs->count);
++              /* tsk->fs is already what we want */
++              write_lock(&fs->lock);
++              if (fs->in_exec) {
++                      write_unlock(&fs->lock);
++                      return -EAGAIN;
++              }
++              fs->users++;
++              write_unlock(&fs->lock);
+               return 0;
+       }
+-      tsk->fs = copy_fs_struct(current->fs);
++      tsk->fs = copy_fs_struct(fs);
+       if (!tsk->fs)
+               return -ENOMEM;
+       return 0;
+@@ -1518,12 +1526,16 @@ static int unshare_fs(unsigned long unsh
+ {
+       struct fs_struct *fs = current->fs;
+-      if ((unshare_flags & CLONE_FS) &&
+-          (fs && atomic_read(&fs->count) > 1)) {
+-              *new_fsp = copy_fs_struct(current->fs);
+-              if (!*new_fsp)
+-                      return -ENOMEM;
+-      }
++      if (!(unshare_flags & CLONE_FS) || !fs)
++              return 0;
++
++      /* don't need lock here; in the worst case we'll do useless copy */
++      if (fs->users == 1)
++              return 0;
++
++      *new_fsp = copy_fs_struct(fs);
++      if (!*new_fsp)
++              return -ENOMEM;
+       return 0;
+ }
+@@ -1639,8 +1651,13 @@ SYSCALL_DEFINE1(unshare, unsigned long, 
+               if (new_fs) {
+                       fs = current->fs;
++                      write_lock(&fs->lock);
+                       current->fs = new_fs;
+-                      new_fs = fs;
++                      if (--fs->users)
++                              new_fs = NULL;
++                      else
++                              new_fs = fs;
++                      write_unlock(&fs->lock);
+               }
+               if (new_mm) {
+@@ -1679,7 +1696,7 @@ bad_unshare_cleanup_sigh:
+ bad_unshare_cleanup_fs:
+       if (new_fs)
+-              put_fs_struct(new_fs);
++              free_fs_struct(new_fs);
+ bad_unshare_cleanup_thread:
+ bad_unshare_out:
index d67c88646cfa3b9605bab0b4613037ccf250b90a..e8fcd40754a627bd3208579df042dfe251a3605c 100644 (file)
@@ -31,3 +31,14 @@ ptrace-ptrace_attach-fix-the-usage-of-cred_exec_mutex.patch
 usb-serial-fix-lifetime-and-locking-problems.patch
 acpi-revert-conflicting-workaround-for-bios-w-mangled-prt-entries.patch
 powerpc-sanitize-stack-pointer-in-signal-handling-code.patch
+compat_do_execve-should-unshare_files.patch
+fix-setuid-sometimes-doesn-t.patch
+fix-setuid-sometimes-wouldn-t.patch
+annotate-struct-fs_struct-s-usage-count-restriction.patch
+kill-unsharing-fs_struct-in-__set_personality.patch
+get-rid-of-bumping-fs_struct-refcount-in-pivot_root.patch
+take-fs_struct-handling-to-new-file.patch
+new-locking-refcounting-for-fs_struct.patch
+check_unsafe_exec-doesn-t-care-about-signal-handlers-sharing.patch
+do_execve-must-not-clear-fs-in_exec-if-it-was-set-by-another-thread.patch
+check_unsafe_exec-s-lock_task_sighand-rcu_read_lock.patch
diff --git a/queue-2.6.29/take-fs_struct-handling-to-new-file.patch b/queue-2.6.29/take-fs_struct-handling-to-new-file.patch
new file mode 100644 (file)
index 0000000..98efafa
--- /dev/null
@@ -0,0 +1,435 @@
+From 3e93cd671813e204c258f1e6c797959920cf7772 Mon Sep 17 00:00:00 2001
+From: Al Viro <viro@zeniv.linux.org.uk>
+Date: Sun, 29 Mar 2009 19:00:13 -0400
+Subject: Take fs_struct handling to new file (fs/fs_struct.c)
+
+From: Al Viro <viro@zeniv.linux.org.uk>
+
+commit 3e93cd671813e204c258f1e6c797959920cf7772 upstream.
+
+Pure code move; two new helper functions for nfsd and daemonize
+(unshare_fs_struct() and daemonize_fs_struct() resp.; for now -
+the same code as used to be in callers).  unshare_fs_struct()
+exported (for nfsd, as copy_fs_struct()/exit_fs() used to be),
+copy_fs_struct() and exit_fs() don't need exports anymore.
+
+Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/Makefile               |    2 
+ fs/fs_struct.c            |  141 ++++++++++++++++++++++++++++++++++++++++++++++
+ fs/internal.h             |    6 +
+ fs/namei.c                |    7 --
+ fs/namespace.c            |   68 ----------------------
+ fs/nfsd/nfssvc.c          |    7 --
+ include/linux/fs_struct.h |    2 
+ kernel/exit.c             |   31 ----------
+ kernel/fork.c             |   29 ---------
+ 9 files changed, 155 insertions(+), 138 deletions(-)
+
+--- /dev/null
++++ b/fs/fs_struct.c
+@@ -0,0 +1,141 @@
++#include <linux/module.h>
++#include <linux/sched.h>
++#include <linux/fs.h>
++#include <linux/path.h>
++#include <linux/slab.h>
++
++/*
++ * Replace the fs->{rootmnt,root} with {mnt,dentry}. Put the old values.
++ * It can block.
++ */
++void set_fs_root(struct fs_struct *fs, struct path *path)
++{
++      struct path old_root;
++
++      write_lock(&fs->lock);
++      old_root = fs->root;
++      fs->root = *path;
++      path_get(path);
++      write_unlock(&fs->lock);
++      if (old_root.dentry)
++              path_put(&old_root);
++}
++
++/*
++ * Replace the fs->{pwdmnt,pwd} with {mnt,dentry}. Put the old values.
++ * It can block.
++ */
++void set_fs_pwd(struct fs_struct *fs, struct path *path)
++{
++      struct path old_pwd;
++
++      write_lock(&fs->lock);
++      old_pwd = fs->pwd;
++      fs->pwd = *path;
++      path_get(path);
++      write_unlock(&fs->lock);
++
++      if (old_pwd.dentry)
++              path_put(&old_pwd);
++}
++
++void chroot_fs_refs(struct path *old_root, struct path *new_root)
++{
++      struct task_struct *g, *p;
++      struct fs_struct *fs;
++      int count = 0;
++
++      read_lock(&tasklist_lock);
++      do_each_thread(g, p) {
++              task_lock(p);
++              fs = p->fs;
++              if (fs) {
++                      write_lock(&fs->lock);
++                      if (fs->root.dentry == old_root->dentry
++                          && fs->root.mnt == old_root->mnt) {
++                              path_get(new_root);
++                              fs->root = *new_root;
++                              count++;
++                      }
++                      if (fs->pwd.dentry == old_root->dentry
++                          && fs->pwd.mnt == old_root->mnt) {
++                              path_get(new_root);
++                              fs->pwd = *new_root;
++                              count++;
++                      }
++                      write_unlock(&fs->lock);
++              }
++              task_unlock(p);
++      } while_each_thread(g, p);
++      read_unlock(&tasklist_lock);
++      while (count--)
++              path_put(old_root);
++}
++
++void put_fs_struct(struct fs_struct *fs)
++{
++      /* No need to hold fs->lock if we are killing it */
++      if (atomic_dec_and_test(&fs->count)) {
++              path_put(&fs->root);
++              path_put(&fs->pwd);
++              kmem_cache_free(fs_cachep, fs);
++      }
++}
++
++void exit_fs(struct task_struct *tsk)
++{
++      struct fs_struct * fs = tsk->fs;
++
++      if (fs) {
++              task_lock(tsk);
++              tsk->fs = NULL;
++              task_unlock(tsk);
++              put_fs_struct(fs);
++      }
++}
++
++struct fs_struct *copy_fs_struct(struct fs_struct *old)
++{
++      struct fs_struct *fs = kmem_cache_alloc(fs_cachep, GFP_KERNEL);
++      /* We don't need to lock fs - think why ;-) */
++      if (fs) {
++              atomic_set(&fs->count, 1);
++              rwlock_init(&fs->lock);
++              fs->umask = old->umask;
++              read_lock(&old->lock);
++              fs->root = old->root;
++              path_get(&old->root);
++              fs->pwd = old->pwd;
++              path_get(&old->pwd);
++              read_unlock(&old->lock);
++      }
++      return fs;
++}
++
++int unshare_fs_struct(void)
++{
++      struct fs_struct *fsp = copy_fs_struct(current->fs);
++      if (!fsp)
++              return -ENOMEM;
++      exit_fs(current);
++      current->fs = fsp;
++      return 0;
++}
++EXPORT_SYMBOL_GPL(unshare_fs_struct);
++
++/* to be mentioned only in INIT_TASK */
++struct fs_struct init_fs = {
++      .count          = ATOMIC_INIT(1),
++      .lock           = __RW_LOCK_UNLOCKED(init_fs.lock),
++      .umask          = 0022,
++};
++
++void daemonize_fs_struct(void)
++{
++      struct fs_struct *fs;
++
++      exit_fs(current);       /* current->fs->count--; */
++      fs = &init_fs;
++      current->fs = fs;
++      atomic_inc(&fs->count);
++}
+--- a/fs/internal.h
++++ b/fs/internal.h
+@@ -11,6 +11,7 @@
+ struct super_block;
+ struct linux_binprm;
++struct path;
+ /*
+  * block_dev.c
+@@ -60,3 +61,8 @@ extern void umount_tree(struct vfsmount 
+ extern struct vfsmount *copy_tree(struct vfsmount *, struct dentry *, int);
+ extern void __init mnt_init(void);
++
++/*
++ * fs_struct.c
++ */
++extern void chroot_fs_refs(struct path *, struct path *);
+--- a/fs/Makefile
++++ b/fs/Makefile
+@@ -11,7 +11,7 @@ obj-y :=     open.o read_write.o file_table.
+               attr.o bad_inode.o file.o filesystems.o namespace.o \
+               seq_file.o xattr.o libfs.o fs-writeback.o \
+               pnode.o drop_caches.o splice.o sync.o utimes.o \
+-              stack.o
++              stack.o fs_struct.o
+ ifeq ($(CONFIG_BLOCK),y)
+ obj-y +=      buffer.o bio.o block_dev.o direct-io.o mpage.o ioprio.o
+--- a/fs/namei.c
++++ b/fs/namei.c
+@@ -2891,10 +2891,3 @@ EXPORT_SYMBOL(vfs_symlink);
+ EXPORT_SYMBOL(vfs_unlink);
+ EXPORT_SYMBOL(dentry_unhash);
+ EXPORT_SYMBOL(generic_readlink);
+-
+-/* to be mentioned only in INIT_TASK */
+-struct fs_struct init_fs = {
+-      .count          = ATOMIC_INIT(1),
+-      .lock           = __RW_LOCK_UNLOCKED(init_fs.lock),
+-      .umask          = 0022,
+-};
+--- a/fs/namespace.c
++++ b/fs/namespace.c
+@@ -2089,74 +2089,6 @@ out1:
+ }
+ /*
+- * Replace the fs->{rootmnt,root} with {mnt,dentry}. Put the old values.
+- * It can block. Requires the big lock held.
+- */
+-void set_fs_root(struct fs_struct *fs, struct path *path)
+-{
+-      struct path old_root;
+-
+-      write_lock(&fs->lock);
+-      old_root = fs->root;
+-      fs->root = *path;
+-      path_get(path);
+-      write_unlock(&fs->lock);
+-      if (old_root.dentry)
+-              path_put(&old_root);
+-}
+-
+-/*
+- * Replace the fs->{pwdmnt,pwd} with {mnt,dentry}. Put the old values.
+- * It can block. Requires the big lock held.
+- */
+-void set_fs_pwd(struct fs_struct *fs, struct path *path)
+-{
+-      struct path old_pwd;
+-
+-      write_lock(&fs->lock);
+-      old_pwd = fs->pwd;
+-      fs->pwd = *path;
+-      path_get(path);
+-      write_unlock(&fs->lock);
+-
+-      if (old_pwd.dentry)
+-              path_put(&old_pwd);
+-}
+-
+-static void chroot_fs_refs(struct path *old_root, struct path *new_root)
+-{
+-      struct task_struct *g, *p;
+-      struct fs_struct *fs;
+-      int count = 0;
+-
+-      read_lock(&tasklist_lock);
+-      do_each_thread(g, p) {
+-              task_lock(p);
+-              fs = p->fs;
+-              if (fs) {
+-                      write_lock(&fs->lock);
+-                      if (fs->root.dentry == old_root->dentry
+-                          && fs->root.mnt == old_root->mnt) {
+-                              path_get(new_root);
+-                              fs->root = *new_root;
+-                              count++;
+-                      }
+-                      if (fs->pwd.dentry == old_root->dentry
+-                          && fs->pwd.mnt == old_root->mnt) {
+-                              path_get(new_root);
+-                              fs->pwd = *new_root;
+-                              count++;
+-                      }
+-                      write_unlock(&fs->lock);
+-              }
+-              task_unlock(p);
+-      } while_each_thread(g, p);
+-      read_unlock(&tasklist_lock);
+-      while (count--)
+-              path_put(old_root);
+-}
+-
+-/*
+  * pivot_root Semantics:
+  * Moves the root file system of the current process to the directory put_old,
+  * makes new_root as the new root file system of the current process, and sets
+--- a/fs/nfsd/nfssvc.c
++++ b/fs/nfsd/nfssvc.c
+@@ -404,7 +404,6 @@ static int
+ nfsd(void *vrqstp)
+ {
+       struct svc_rqst *rqstp = (struct svc_rqst *) vrqstp;
+-      struct fs_struct *fsp;
+       int err, preverr = 0;
+       /* Lock module and set up kernel thread */
+@@ -413,13 +412,11 @@ nfsd(void *vrqstp)
+       /* At this point, the thread shares current->fs
+        * with the init process. We need to create files with a
+        * umask of 0 instead of init's umask. */
+-      fsp = copy_fs_struct(current->fs);
+-      if (!fsp) {
++      if (unshare_fs_struct() < 0) {
+               printk("Unable to start nfsd thread: out of memory\n");
+               goto out;
+       }
+-      exit_fs(current);
+-      current->fs = fsp;
++
+       current->fs->umask = 0;
+       /*
+--- a/include/linux/fs_struct.h
++++ b/include/linux/fs_struct.h
+@@ -20,5 +20,7 @@ extern void set_fs_root(struct fs_struct
+ extern void set_fs_pwd(struct fs_struct *, struct path *);
+ extern struct fs_struct *copy_fs_struct(struct fs_struct *);
+ extern void put_fs_struct(struct fs_struct *);
++extern void daemonize_fs_struct(void);
++extern int unshare_fs_struct(void);
+ #endif /* _LINUX_FS_STRUCT_H */
+--- a/kernel/exit.c
++++ b/kernel/exit.c
+@@ -429,7 +429,6 @@ EXPORT_SYMBOL(disallow_signal);
+ void daemonize(const char *name, ...)
+ {
+       va_list args;
+-      struct fs_struct *fs;
+       sigset_t blocked;
+       va_start(args, name);
+@@ -462,11 +461,7 @@ void daemonize(const char *name, ...)
+       /* Become as one with the init task */
+-      exit_fs(current);       /* current->fs->count--; */
+-      fs = init_task.fs;
+-      current->fs = fs;
+-      atomic_inc(&fs->count);
+-
++      daemonize_fs_struct();
+       exit_files(current);
+       current->files = init_task.files;
+       atomic_inc(&current->files->count);
+@@ -565,30 +560,6 @@ void exit_files(struct task_struct *tsk)
+       }
+ }
+-void put_fs_struct(struct fs_struct *fs)
+-{
+-      /* No need to hold fs->lock if we are killing it */
+-      if (atomic_dec_and_test(&fs->count)) {
+-              path_put(&fs->root);
+-              path_put(&fs->pwd);
+-              kmem_cache_free(fs_cachep, fs);
+-      }
+-}
+-
+-void exit_fs(struct task_struct *tsk)
+-{
+-      struct fs_struct * fs = tsk->fs;
+-
+-      if (fs) {
+-              task_lock(tsk);
+-              tsk->fs = NULL;
+-              task_unlock(tsk);
+-              put_fs_struct(fs);
+-      }
+-}
+-
+-EXPORT_SYMBOL_GPL(exit_fs);
+-
+ #ifdef CONFIG_MM_OWNER
+ /*
+  * Task p is exiting and it owned mm, lets find a new owner for it
+--- a/kernel/fork.c
++++ b/kernel/fork.c
+@@ -676,38 +676,13 @@ fail_nomem:
+       return retval;
+ }
+-static struct fs_struct *__copy_fs_struct(struct fs_struct *old)
+-{
+-      struct fs_struct *fs = kmem_cache_alloc(fs_cachep, GFP_KERNEL);
+-      /* We don't need to lock fs - think why ;-) */
+-      if (fs) {
+-              atomic_set(&fs->count, 1);
+-              rwlock_init(&fs->lock);
+-              fs->umask = old->umask;
+-              read_lock(&old->lock);
+-              fs->root = old->root;
+-              path_get(&old->root);
+-              fs->pwd = old->pwd;
+-              path_get(&old->pwd);
+-              read_unlock(&old->lock);
+-      }
+-      return fs;
+-}
+-
+-struct fs_struct *copy_fs_struct(struct fs_struct *old)
+-{
+-      return __copy_fs_struct(old);
+-}
+-
+-EXPORT_SYMBOL_GPL(copy_fs_struct);
+-
+ static int copy_fs(unsigned long clone_flags, struct task_struct *tsk)
+ {
+       if (clone_flags & CLONE_FS) {
+               atomic_inc(&current->fs->count);
+               return 0;
+       }
+-      tsk->fs = __copy_fs_struct(current->fs);
++      tsk->fs = copy_fs_struct(current->fs);
+       if (!tsk->fs)
+               return -ENOMEM;
+       return 0;
+@@ -1545,7 +1520,7 @@ static int unshare_fs(unsigned long unsh
+       if ((unshare_flags & CLONE_FS) &&
+           (fs && atomic_read(&fs->count) > 1)) {
+-              *new_fsp = __copy_fs_struct(current->fs);
++              *new_fsp = copy_fs_struct(current->fs);
+               if (!*new_fsp)
+                       return -ENOMEM;
+       }