]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
next/ patches added for after this release...
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 9 Jul 2015 06:08:55 +0000 (23:08 -0700)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 9 Jul 2015 06:08:55 +0000 (23:08 -0700)
20 files changed:
next/4.0/fs-add-helper-functions-for-permanently-empty-directories.patch [new file with mode: 0644]
next/4.0/kernfs-add-support-for-always-empty-directories.patch [new file with mode: 0644]
next/4.0/mnt-modify-fs_fully_visible-to-deal-with-locked-ro-nodev-and-atime.patch [new file with mode: 0644]
next/4.0/mnt-refactor-the-logic-for-mounting-sysfs-and-proc-in-a-user-namespace.patch [new file with mode: 0644]
next/4.0/mnt-update-fs_fully_visible-to-test-for-permanently-empty-directories.patch [new file with mode: 0644]
next/4.0/proc-allow-creating-permanently-empty-directories-that-serve-as-mount-points.patch [new file with mode: 0644]
next/4.0/series [new file with mode: 0644]
next/4.0/sysctl-allow-creating-permanently-empty-directories-that-serve-as-mountpoints.patch [new file with mode: 0644]
next/4.0/sysfs-add-support-for-permanently-empty-directories-to-serve-as-mount-points.patch [new file with mode: 0644]
next/4.0/sysfs-create-mountpoints-with-sysfs_create_mount_point.patch [new file with mode: 0644]
next/4.1/fs-add-helper-functions-for-permanently-empty-directories.patch [new file with mode: 0644]
next/4.1/kernfs-add-support-for-always-empty-directories.patch [new file with mode: 0644]
next/4.1/mnt-modify-fs_fully_visible-to-deal-with-locked-ro-nodev-and-atime.patch [new file with mode: 0644]
next/4.1/mnt-refactor-the-logic-for-mounting-sysfs-and-proc-in-a-user-namespace.patch [new file with mode: 0644]
next/4.1/mnt-update-fs_fully_visible-to-test-for-permanently-empty-directories.patch [new file with mode: 0644]
next/4.1/proc-allow-creating-permanently-empty-directories-that-serve-as-mount-points.patch [new file with mode: 0644]
next/4.1/series [new file with mode: 0644]
next/4.1/sysctl-allow-creating-permanently-empty-directories-that-serve-as-mountpoints.patch [new file with mode: 0644]
next/4.1/sysfs-add-support-for-permanently-empty-directories-to-serve-as-mount-points.patch [new file with mode: 0644]
next/4.1/sysfs-create-mountpoints-with-sysfs_create_mount_point.patch [new file with mode: 0644]

diff --git a/next/4.0/fs-add-helper-functions-for-permanently-empty-directories.patch b/next/4.0/fs-add-helper-functions-for-permanently-empty-directories.patch
new file mode 100644 (file)
index 0000000..e119884
--- /dev/null
@@ -0,0 +1,141 @@
+From fbabfd0f4ee2e8847bf56edf481249ad1bb8c44d Mon Sep 17 00:00:00 2001
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+Date: Sat, 9 May 2015 15:54:49 -0500
+Subject: fs: Add helper functions for permanently empty directories.
+
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+
+commit fbabfd0f4ee2e8847bf56edf481249ad1bb8c44d upstream.
+
+To ensure it is safe to mount proc and sysfs I need to check if
+filesystems that are mounted on top of them are mounted on truly empty
+directories.  Given that some directories can gain entries over time,
+knowing that a directory is empty right now is insufficient.
+
+Therefore add supporting infrastructure for permantently empty
+directories that proc and sysfs can use when they create mount points
+for filesystems and fs_fully_visible can use to test for permanently
+empty directories to ensure that nothing will be gained by mounting a
+fresh copy of proc or sysfs.
+
+Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/libfs.c         |   96 +++++++++++++++++++++++++++++++++++++++++++++++++++++
+ include/linux/fs.h |    2 +
+ 2 files changed, 98 insertions(+)
+
+--- a/fs/libfs.c
++++ b/fs/libfs.c
+@@ -1093,3 +1093,99 @@ simple_nosetlease(struct file *filp, lon
+       return -EINVAL;
+ }
+ EXPORT_SYMBOL(simple_nosetlease);
++
++
++/*
++ * Operations for a permanently empty directory.
++ */
++static struct dentry *empty_dir_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
++{
++      return ERR_PTR(-ENOENT);
++}
++
++static int empty_dir_getattr(struct vfsmount *mnt, struct dentry *dentry,
++                               struct kstat *stat)
++{
++      struct inode *inode = d_inode(dentry);
++      generic_fillattr(inode, stat);
++      return 0;
++}
++
++static int empty_dir_setattr(struct dentry *dentry, struct iattr *attr)
++{
++      return -EPERM;
++}
++
++static int empty_dir_setxattr(struct dentry *dentry, const char *name,
++                            const void *value, size_t size, int flags)
++{
++      return -EOPNOTSUPP;
++}
++
++static ssize_t empty_dir_getxattr(struct dentry *dentry, const char *name,
++                                void *value, size_t size)
++{
++      return -EOPNOTSUPP;
++}
++
++static int empty_dir_removexattr(struct dentry *dentry, const char *name)
++{
++      return -EOPNOTSUPP;
++}
++
++static ssize_t empty_dir_listxattr(struct dentry *dentry, char *list, size_t size)
++{
++      return -EOPNOTSUPP;
++}
++
++static const struct inode_operations empty_dir_inode_operations = {
++      .lookup         = empty_dir_lookup,
++      .permission     = generic_permission,
++      .setattr        = empty_dir_setattr,
++      .getattr        = empty_dir_getattr,
++      .setxattr       = empty_dir_setxattr,
++      .getxattr       = empty_dir_getxattr,
++      .removexattr    = empty_dir_removexattr,
++      .listxattr      = empty_dir_listxattr,
++};
++
++static loff_t empty_dir_llseek(struct file *file, loff_t offset, int whence)
++{
++      /* An empty directory has two entries . and .. at offsets 0 and 1 */
++      return generic_file_llseek_size(file, offset, whence, 2, 2);
++}
++
++static int empty_dir_readdir(struct file *file, struct dir_context *ctx)
++{
++      dir_emit_dots(file, ctx);
++      return 0;
++}
++
++static const struct file_operations empty_dir_operations = {
++      .llseek         = empty_dir_llseek,
++      .read           = generic_read_dir,
++      .iterate        = empty_dir_readdir,
++      .fsync          = noop_fsync,
++};
++
++
++void make_empty_dir_inode(struct inode *inode)
++{
++      set_nlink(inode, 2);
++      inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO;
++      inode->i_uid = GLOBAL_ROOT_UID;
++      inode->i_gid = GLOBAL_ROOT_GID;
++      inode->i_rdev = 0;
++      inode->i_size = 2;
++      inode->i_blkbits = PAGE_SHIFT;
++      inode->i_blocks = 0;
++
++      inode->i_op = &empty_dir_inode_operations;
++      inode->i_fop = &empty_dir_operations;
++}
++
++bool is_empty_dir_inode(struct inode *inode)
++{
++      return (inode->i_fop == &empty_dir_operations) &&
++              (inode->i_op == &empty_dir_inode_operations);
++}
+--- a/include/linux/fs.h
++++ b/include/linux/fs.h
+@@ -2721,6 +2721,8 @@ extern struct dentry *simple_lookup(stru
+ extern ssize_t generic_read_dir(struct file *, char __user *, size_t, loff_t *);
+ extern const struct file_operations simple_dir_operations;
+ extern const struct inode_operations simple_dir_inode_operations;
++extern void make_empty_dir_inode(struct inode *inode);
++extern bool is_empty_dir_inode(struct inode *inode);
+ struct tree_descr { char *name; const struct file_operations *ops; int mode; };
+ struct dentry *d_alloc_name(struct dentry *, const char *);
+ extern int simple_fill_super(struct super_block *, unsigned long, struct tree_descr *);
diff --git a/next/4.0/kernfs-add-support-for-always-empty-directories.patch b/next/4.0/kernfs-add-support-for-always-empty-directories.patch
new file mode 100644 (file)
index 0000000..ef38af3
--- /dev/null
@@ -0,0 +1,117 @@
+From ea015218f2f7ace2dad9cedd21ed95bdba2886d7 Mon Sep 17 00:00:00 2001
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+Date: Wed, 13 May 2015 16:09:29 -0500
+Subject: kernfs: Add support for always empty directories.
+
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+
+commit ea015218f2f7ace2dad9cedd21ed95bdba2886d7 upstream.
+
+Add a new function kernfs_create_empty_dir that can be used to create
+directory that can not be modified.
+
+Update the code to use make_empty_dir_inode when reporting a
+permanently empty directory to the vfs.
+
+Update the code to not allow adding to permanently empty directories.
+
+Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/kernfs/dir.c        |   38 +++++++++++++++++++++++++++++++++++++-
+ fs/kernfs/inode.c      |    2 ++
+ include/linux/kernfs.h |    3 +++
+ 3 files changed, 42 insertions(+), 1 deletion(-)
+
+--- a/fs/kernfs/dir.c
++++ b/fs/kernfs/dir.c
+@@ -592,6 +592,9 @@ int kernfs_add_one(struct kernfs_node *k
+               goto out_unlock;
+       ret = -ENOENT;
++      if (parent->flags & KERNFS_EMPTY_DIR)
++              goto out_unlock;
++
+       if ((parent->flags & KERNFS_ACTIVATED) && !kernfs_active(parent))
+               goto out_unlock;
+@@ -783,6 +786,38 @@ struct kernfs_node *kernfs_create_dir_ns
+       return ERR_PTR(rc);
+ }
++/**
++ * kernfs_create_empty_dir - create an always empty directory
++ * @parent: parent in which to create a new directory
++ * @name: name of the new directory
++ *
++ * Returns the created node on success, ERR_PTR() value on failure.
++ */
++struct kernfs_node *kernfs_create_empty_dir(struct kernfs_node *parent,
++                                          const char *name)
++{
++      struct kernfs_node *kn;
++      int rc;
++
++      /* allocate */
++      kn = kernfs_new_node(parent, name, S_IRUGO|S_IXUGO|S_IFDIR, KERNFS_DIR);
++      if (!kn)
++              return ERR_PTR(-ENOMEM);
++
++      kn->flags |= KERNFS_EMPTY_DIR;
++      kn->dir.root = parent->dir.root;
++      kn->ns = NULL;
++      kn->priv = NULL;
++
++      /* link in */
++      rc = kernfs_add_one(kn);
++      if (!rc)
++              return kn;
++
++      kernfs_put(kn);
++      return ERR_PTR(rc);
++}
++
+ static struct dentry *kernfs_iop_lookup(struct inode *dir,
+                                       struct dentry *dentry,
+                                       unsigned int flags)
+@@ -1254,7 +1289,8 @@ int kernfs_rename_ns(struct kernfs_node
+       mutex_lock(&kernfs_mutex);
+       error = -ENOENT;
+-      if (!kernfs_active(kn) || !kernfs_active(new_parent))
++      if (!kernfs_active(kn) || !kernfs_active(new_parent) ||
++          (new_parent->flags & KERNFS_EMPTY_DIR))
+               goto out;
+       error = 0;
+--- a/fs/kernfs/inode.c
++++ b/fs/kernfs/inode.c
+@@ -296,6 +296,8 @@ static void kernfs_init_inode(struct ker
+       case KERNFS_DIR:
+               inode->i_op = &kernfs_dir_iops;
+               inode->i_fop = &kernfs_dir_fops;
++              if (kn->flags & KERNFS_EMPTY_DIR)
++                      make_empty_dir_inode(inode);
+               break;
+       case KERNFS_FILE:
+               inode->i_size = kn->attr.size;
+--- a/include/linux/kernfs.h
++++ b/include/linux/kernfs.h
+@@ -45,6 +45,7 @@ enum kernfs_node_flag {
+       KERNFS_LOCKDEP          = 0x0100,
+       KERNFS_SUICIDAL         = 0x0400,
+       KERNFS_SUICIDED         = 0x0800,
++      KERNFS_EMPTY_DIR        = 0x1000,
+ };
+ /* @flags for kernfs_create_root() */
+@@ -285,6 +286,8 @@ void kernfs_destroy_root(struct kernfs_r
+ struct kernfs_node *kernfs_create_dir_ns(struct kernfs_node *parent,
+                                        const char *name, umode_t mode,
+                                        void *priv, const void *ns);
++struct kernfs_node *kernfs_create_empty_dir(struct kernfs_node *parent,
++                                          const char *name);
+ struct kernfs_node *__kernfs_create_file(struct kernfs_node *parent,
+                                        const char *name,
+                                        umode_t mode, loff_t size,
diff --git a/next/4.0/mnt-modify-fs_fully_visible-to-deal-with-locked-ro-nodev-and-atime.patch b/next/4.0/mnt-modify-fs_fully_visible-to-deal-with-locked-ro-nodev-and-atime.patch
new file mode 100644 (file)
index 0000000..c724bd2
--- /dev/null
@@ -0,0 +1,93 @@
+From 8c6cf9cc829fcd0b179b59f7fe288941d0e31108 Mon Sep 17 00:00:00 2001
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+Date: Fri, 8 May 2015 23:49:47 -0500
+Subject: mnt: Modify fs_fully_visible to deal with locked ro nodev and atime
+
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+
+commit 8c6cf9cc829fcd0b179b59f7fe288941d0e31108 upstream.
+
+Ignore an existing mount if the locked readonly, nodev or atime
+attributes are less permissive than the desired attributes
+of the new mount.
+
+On success ensure the new mount locks all of the same readonly, nodev and
+atime attributes as the old mount.
+
+The nosuid and noexec attributes are not checked here as this change
+is destined for stable and enforcing those attributes causes a
+regression in lxc and libvirt-lxc where those applications will not
+start and there are no known executables on sysfs or proc and no known
+way to create exectuables without code modifications
+
+Fixes: e51db73532955 ("userns: Better restrictions on when proc and sysfs can be mounted")
+Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/namespace.c |   24 +++++++++++++++++++++---
+ 1 file changed, 21 insertions(+), 3 deletions(-)
+
+--- a/fs/namespace.c
++++ b/fs/namespace.c
+@@ -2334,7 +2334,7 @@ unlock:
+       return err;
+ }
+-static bool fs_fully_visible(struct file_system_type *fs_type);
++static bool fs_fully_visible(struct file_system_type *fs_type, int *new_mnt_flags);
+ /*
+  * create a new mount for userspace and request it to be added into the
+@@ -2368,7 +2368,7 @@ static int do_new_mount(struct path *pat
+                       mnt_flags |= MNT_NODEV | MNT_LOCK_NODEV;
+               }
+               if (type->fs_flags & FS_USERNS_VISIBLE) {
+-                      if (!fs_fully_visible(type))
++                      if (!fs_fully_visible(type, &mnt_flags))
+                               return -EPERM;
+               }
+       }
+@@ -3172,9 +3172,10 @@ bool current_chrooted(void)
+       return chrooted;
+ }
+-static bool fs_fully_visible(struct file_system_type *type)
++static bool fs_fully_visible(struct file_system_type *type, int *new_mnt_flags)
+ {
+       struct mnt_namespace *ns = current->nsproxy->mnt_ns;
++      int new_flags = *new_mnt_flags;
+       struct mount *mnt;
+       bool visible = false;
+@@ -3193,6 +3194,19 @@ static bool fs_fully_visible(struct file
+               if (mnt->mnt.mnt_root != mnt->mnt.mnt_sb->s_root)
+                       continue;
++              /* Verify the mount flags are equal to or more permissive
++               * than the proposed new mount.
++               */
++              if ((mnt->mnt.mnt_flags & MNT_LOCK_READONLY) &&
++                  !(new_flags & MNT_READONLY))
++                      continue;
++              if ((mnt->mnt.mnt_flags & MNT_LOCK_NODEV) &&
++                  !(new_flags & MNT_NODEV))
++                      continue;
++              if ((mnt->mnt.mnt_flags & MNT_LOCK_ATIME) &&
++                  ((mnt->mnt.mnt_flags & MNT_ATIME_MASK) != (new_flags & MNT_ATIME_MASK)))
++                      continue;
++
+               /* This mount is not fully visible if there are any
+                * locked child mounts that cover anything except for
+                * empty directories.
+@@ -3206,6 +3220,10 @@ static bool fs_fully_visible(struct file
+                       if (!is_empty_dir_inode(inode))
+                               goto next;
+               }
++              /* Preserve the locked attributes */
++              *new_mnt_flags |= mnt->mnt.mnt_flags & (MNT_LOCK_READONLY | \
++                                                      MNT_LOCK_NODEV    | \
++                                                      MNT_LOCK_ATIME);
+               visible = true;
+               goto found;
+       next:   ;
diff --git a/next/4.0/mnt-refactor-the-logic-for-mounting-sysfs-and-proc-in-a-user-namespace.patch b/next/4.0/mnt-refactor-the-logic-for-mounting-sysfs-and-proc-in-a-user-namespace.patch
new file mode 100644 (file)
index 0000000..214f25f
--- /dev/null
@@ -0,0 +1,122 @@
+From 1b852bceb0d111e510d1a15826ecc4a19358d512 Mon Sep 17 00:00:00 2001
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+Date: Fri, 8 May 2015 23:22:29 -0500
+Subject: mnt: Refactor the logic for mounting sysfs and proc in a user namespace
+
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+
+commit 1b852bceb0d111e510d1a15826ecc4a19358d512 upstream.
+
+Fresh mounts of proc and sysfs are a very special case that works very
+much like a bind mount.  Unfortunately the current structure can not
+preserve the MNT_LOCK... mount flags.  Therefore refactor the logic
+into a form that can be modified to preserve those lock bits.
+
+Add a new filesystem flag FS_USERNS_VISIBLE that requires some mount
+of the filesystem be fully visible in the current mount namespace,
+before the filesystem may be mounted.
+
+Move the logic for calling fs_fully_visible from proc and sysfs into
+fs/namespace.c where it has greater access to mount namespace state.
+
+Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/namespace.c     |    8 +++++++-
+ fs/proc/root.c     |    5 +----
+ fs/sysfs/mount.c   |    5 +----
+ include/linux/fs.h |    2 +-
+ 4 files changed, 10 insertions(+), 10 deletions(-)
+
+--- a/fs/namespace.c
++++ b/fs/namespace.c
+@@ -2334,6 +2334,8 @@ unlock:
+       return err;
+ }
++static bool fs_fully_visible(struct file_system_type *fs_type);
++
+ /*
+  * create a new mount for userspace and request it to be added into the
+  * namespace's tree
+@@ -2365,6 +2367,10 @@ static int do_new_mount(struct path *pat
+                       flags |= MS_NODEV;
+                       mnt_flags |= MNT_NODEV | MNT_LOCK_NODEV;
+               }
++              if (type->fs_flags & FS_USERNS_VISIBLE) {
++                      if (!fs_fully_visible(type))
++                              return -EPERM;
++              }
+       }
+       mnt = vfs_kern_mount(type, flags, name, data);
+@@ -3166,7 +3172,7 @@ bool current_chrooted(void)
+       return chrooted;
+ }
+-bool fs_fully_visible(struct file_system_type *type)
++static bool fs_fully_visible(struct file_system_type *type)
+ {
+       struct mnt_namespace *ns = current->nsproxy->mnt_ns;
+       struct mount *mnt;
+--- a/fs/proc/root.c
++++ b/fs/proc/root.c
+@@ -112,9 +112,6 @@ static struct dentry *proc_mount(struct
+               ns = task_active_pid_ns(current);
+               options = data;
+-              if (!capable(CAP_SYS_ADMIN) && !fs_fully_visible(fs_type))
+-                      return ERR_PTR(-EPERM);
+-
+               /* Does the mounter have privilege over the pid namespace? */
+               if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN))
+                       return ERR_PTR(-EPERM);
+@@ -159,7 +156,7 @@ static struct file_system_type proc_fs_t
+       .name           = "proc",
+       .mount          = proc_mount,
+       .kill_sb        = proc_kill_sb,
+-      .fs_flags       = FS_USERNS_MOUNT,
++      .fs_flags       = FS_USERNS_VISIBLE | FS_USERNS_MOUNT,
+ };
+ void __init proc_root_init(void)
+--- a/fs/sysfs/mount.c
++++ b/fs/sysfs/mount.c
+@@ -31,9 +31,6 @@ static struct dentry *sysfs_mount(struct
+       bool new_sb;
+       if (!(flags & MS_KERNMOUNT)) {
+-              if (!capable(CAP_SYS_ADMIN) && !fs_fully_visible(fs_type))
+-                      return ERR_PTR(-EPERM);
+-
+               if (!kobj_ns_current_may_mount(KOBJ_NS_TYPE_NET))
+                       return ERR_PTR(-EPERM);
+       }
+@@ -58,7 +55,7 @@ static struct file_system_type sysfs_fs_
+       .name           = "sysfs",
+       .mount          = sysfs_mount,
+       .kill_sb        = sysfs_kill_sb,
+-      .fs_flags       = FS_USERNS_MOUNT,
++      .fs_flags       = FS_USERNS_VISIBLE | FS_USERNS_MOUNT,
+ };
+ int __init sysfs_init(void)
+--- a/include/linux/fs.h
++++ b/include/linux/fs.h
+@@ -1863,6 +1863,7 @@ struct file_system_type {
+ #define FS_HAS_SUBTYPE                4
+ #define FS_USERNS_MOUNT               8       /* Can be mounted by userns root */
+ #define FS_USERNS_DEV_MOUNT   16 /* A userns mount does not imply MNT_NODEV */
++#define FS_USERNS_VISIBLE     32      /* FS must already be visible */
+ #define FS_RENAME_DOES_D_MOVE 32768   /* FS will handle d_move() during rename() internally. */
+       struct dentry *(*mount) (struct file_system_type *, int,
+                      const char *, void *);
+@@ -1950,7 +1951,6 @@ extern int vfs_ustat(dev_t, struct kstat
+ extern int freeze_super(struct super_block *super);
+ extern int thaw_super(struct super_block *super);
+ extern bool our_mnt(struct vfsmount *mnt);
+-extern bool fs_fully_visible(struct file_system_type *);
+ extern int current_umask(void);
diff --git a/next/4.0/mnt-update-fs_fully_visible-to-test-for-permanently-empty-directories.patch b/next/4.0/mnt-update-fs_fully_visible-to-test-for-permanently-empty-directories.patch
new file mode 100644 (file)
index 0000000..4765f80
--- /dev/null
@@ -0,0 +1,50 @@
+From 7236c85e1be51a9e25ba0f6e087a66ca89605a49 Mon Sep 17 00:00:00 2001
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+Date: Wed, 13 May 2015 20:51:09 -0500
+Subject: mnt: Update fs_fully_visible to test for permanently empty directories
+
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+
+commit 7236c85e1be51a9e25ba0f6e087a66ca89605a49 upstream.
+
+fs_fully_visible attempts to make fresh mounts of proc and sysfs give
+the mounter no more access to proc and sysfs than if they could have
+by creating a bind mount.  One aspect of proc and sysfs that makes
+this particularly tricky is that there are other filesystems that
+typically mount on top of proc and sysfs.  As those filesystems are
+mounted on empty directories in practice it is safe to ignore them.
+However testing to ensure filesystems are mounted on empty directories
+has not been something the in kernel data structures have supported so
+the current test for an empty directory which checks to see
+if nlink <= 2 is a bit lacking.
+
+proc and sysfs have recently been modified to use the new empty_dir
+infrastructure to create all of their dedicated mount points.  Instead
+of testing for S_ISDIR(inode->i_mode) && i_nlink <= 2 to see if a
+directory is empty, test for is_empty_dir_inode(inode).  That small
+change guaranteess mounts found on proc and sysfs really are safe to
+ignore, because the directories are not only empty but nothing can
+ever be added to them.  This guarantees there is nothing to worry
+about when mounting proc and sysfs.
+
+Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/namespace.c |    5 ++---
+ 1 file changed, 2 insertions(+), 3 deletions(-)
+
+--- a/fs/namespace.c
++++ b/fs/namespace.c
+@@ -3196,9 +3196,8 @@ bool fs_fully_visible(struct file_system
+                       /* Only worry about locked mounts */
+                       if (!(mnt->mnt.mnt_flags & MNT_LOCKED))
+                               continue;
+-                      if (!S_ISDIR(inode->i_mode))
+-                              goto next;
+-                      if (inode->i_nlink > 2)
++                      /* Is the directory permanetly empty? */
++                      if (!is_empty_dir_inode(inode))
+                               goto next;
+               }
+               visible = true;
diff --git a/next/4.0/proc-allow-creating-permanently-empty-directories-that-serve-as-mount-points.patch b/next/4.0/proc-allow-creating-permanently-empty-directories-that-serve-as-mount-points.patch
new file mode 100644 (file)
index 0000000..2ab325a
--- /dev/null
@@ -0,0 +1,114 @@
+From eb6d38d5427b3ad42f5268da0f1dd31bb0af1264 Mon Sep 17 00:00:00 2001
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+Date: Mon, 11 May 2015 16:44:25 -0500
+Subject: proc: Allow creating permanently empty directories that serve as mount points
+
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+
+commit eb6d38d5427b3ad42f5268da0f1dd31bb0af1264 upstream.
+
+Add a new function proc_create_mount_point that when used to creates a
+directory that can not be added to.
+
+Add a new function is_empty_pde to test if a function is a mount
+point.
+
+Update the code to use make_empty_dir_inode when reporting
+a permanently empty directory to the vfs.
+
+Update the code to not allow adding to permanently empty directories.
+
+Update /proc/openprom and /proc/fs/nfsd to be permanently empty directories.
+
+Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/proc/generic.c  |   23 +++++++++++++++++++++++
+ fs/proc/inode.c    |    4 ++++
+ fs/proc/internal.h |    6 ++++++
+ fs/proc/root.c     |    4 ++--
+ 4 files changed, 35 insertions(+), 2 deletions(-)
+
+--- a/fs/proc/generic.c
++++ b/fs/proc/generic.c
+@@ -373,6 +373,10 @@ static struct proc_dir_entry *__proc_cre
+               WARN(1, "create '/proc/%s' by hand\n", qstr.name);
+               return NULL;
+       }
++      if (is_empty_pde(*parent)) {
++              WARN(1, "attempt to add to permanently empty directory");
++              return NULL;
++      }
+       ent = kzalloc(sizeof(struct proc_dir_entry) + qstr.len + 1, GFP_KERNEL);
+       if (!ent)
+@@ -455,6 +459,25 @@ struct proc_dir_entry *proc_mkdir(const
+ }
+ EXPORT_SYMBOL(proc_mkdir);
++struct proc_dir_entry *proc_create_mount_point(const char *name)
++{
++      umode_t mode = S_IFDIR | S_IRUGO | S_IXUGO;
++      struct proc_dir_entry *ent, *parent = NULL;
++
++      ent = __proc_create(&parent, name, mode, 2);
++      if (ent) {
++              ent->data = NULL;
++              ent->proc_fops = NULL;
++              ent->proc_iops = NULL;
++              if (proc_register(parent, ent) < 0) {
++                      kfree(ent);
++                      parent->nlink--;
++                      ent = NULL;
++              }
++      }
++      return ent;
++}
++
+ struct proc_dir_entry *proc_create_data(const char *name, umode_t mode,
+                                       struct proc_dir_entry *parent,
+                                       const struct file_operations *proc_fops,
+--- a/fs/proc/inode.c
++++ b/fs/proc/inode.c
+@@ -423,6 +423,10 @@ struct inode *proc_get_inode(struct supe
+               inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
+               PROC_I(inode)->pde = de;
++              if (is_empty_pde(de)) {
++                      make_empty_dir_inode(inode);
++                      return inode;
++              }
+               if (de->mode) {
+                       inode->i_mode = de->mode;
+                       inode->i_uid = de->uid;
+--- a/fs/proc/internal.h
++++ b/fs/proc/internal.h
+@@ -191,6 +191,12 @@ static inline struct proc_dir_entry *pde
+ }
+ extern void pde_put(struct proc_dir_entry *);
++static inline bool is_empty_pde(const struct proc_dir_entry *pde)
++{
++      return S_ISDIR(pde->mode) && !pde->proc_iops;
++}
++struct proc_dir_entry *proc_create_mount_point(const char *name);
++
+ /*
+  * inode.c
+  */
+--- a/fs/proc/root.c
++++ b/fs/proc/root.c
+@@ -182,10 +182,10 @@ void __init proc_root_init(void)
+ #endif
+       proc_mkdir("fs", NULL);
+       proc_mkdir("driver", NULL);
+-      proc_mkdir("fs/nfsd", NULL); /* somewhere for the nfsd filesystem to be mounted */
++      proc_create_mount_point("fs/nfsd"); /* somewhere for the nfsd filesystem to be mounted */
+ #if defined(CONFIG_SUN_OPENPROMFS) || defined(CONFIG_SUN_OPENPROMFS_MODULE)
+       /* just give it a mountpoint */
+-      proc_mkdir("openprom", NULL);
++      proc_create_mount_point("openprom");
+ #endif
+       proc_tty_init();
+       proc_mkdir("bus", NULL);
diff --git a/next/4.0/series b/next/4.0/series
new file mode 100644 (file)
index 0000000..d37c86b
--- /dev/null
@@ -0,0 +1,9 @@
+fs-add-helper-functions-for-permanently-empty-directories.patch
+sysctl-allow-creating-permanently-empty-directories-that-serve-as-mountpoints.patch
+proc-allow-creating-permanently-empty-directories-that-serve-as-mount-points.patch
+kernfs-add-support-for-always-empty-directories.patch
+sysfs-add-support-for-permanently-empty-directories-to-serve-as-mount-points.patch
+mnt-update-fs_fully_visible-to-test-for-permanently-empty-directories.patch
+mnt-refactor-the-logic-for-mounting-sysfs-and-proc-in-a-user-namespace.patch
+mnt-modify-fs_fully_visible-to-deal-with-locked-ro-nodev-and-atime.patch
+sysfs-create-mountpoints-with-sysfs_create_mount_point.patch
diff --git a/next/4.0/sysctl-allow-creating-permanently-empty-directories-that-serve-as-mountpoints.patch b/next/4.0/sysctl-allow-creating-permanently-empty-directories-that-serve-as-mountpoints.patch
new file mode 100644 (file)
index 0000000..04a8ea1
--- /dev/null
@@ -0,0 +1,131 @@
+From f9bd6733d3f11e24f3949becf277507d422ee1eb Mon Sep 17 00:00:00 2001
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+Date: Sat, 9 May 2015 22:09:14 -0500
+Subject: sysctl: Allow creating permanently empty directories that serve as mountpoints.
+
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+
+commit f9bd6733d3f11e24f3949becf277507d422ee1eb upstream.
+
+Add a magic sysctl table sysctl_mount_point that when used to
+create a directory forces that directory to be permanently empty.
+
+Update the code to use make_empty_dir_inode when accessing permanently
+empty directories.
+
+Update the code to not allow adding to permanently empty directories.
+
+Update /proc/sys/fs/binfmt_misc to be a permanently empty directory.
+
+Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/proc/proc_sysctl.c  |   37 +++++++++++++++++++++++++++++++++++++
+ include/linux/sysctl.h |    3 +++
+ kernel/sysctl.c        |    8 +-------
+ 3 files changed, 41 insertions(+), 7 deletions(-)
+
+--- a/fs/proc/proc_sysctl.c
++++ b/fs/proc/proc_sysctl.c
+@@ -19,6 +19,28 @@ static const struct inode_operations pro
+ static const struct file_operations proc_sys_dir_file_operations;
+ static const struct inode_operations proc_sys_dir_operations;
++/* Support for permanently empty directories */
++
++struct ctl_table sysctl_mount_point[] = {
++      { }
++};
++
++static bool is_empty_dir(struct ctl_table_header *head)
++{
++      return head->ctl_table[0].child == sysctl_mount_point;
++}
++
++static void set_empty_dir(struct ctl_dir *dir)
++{
++      dir->header.ctl_table[0].child = sysctl_mount_point;
++}
++
++static void clear_empty_dir(struct ctl_dir *dir)
++
++{
++      dir->header.ctl_table[0].child = NULL;
++}
++
+ void proc_sys_poll_notify(struct ctl_table_poll *poll)
+ {
+       if (!poll)
+@@ -187,6 +209,17 @@ static int insert_header(struct ctl_dir
+       struct ctl_table *entry;
+       int err;
++      /* Is this a permanently empty directory? */
++      if (is_empty_dir(&dir->header))
++              return -EROFS;
++
++      /* Am I creating a permanently empty directory? */
++      if (header->ctl_table == sysctl_mount_point) {
++              if (!RB_EMPTY_ROOT(&dir->root))
++                      return -EINVAL;
++              set_empty_dir(dir);
++      }
++
+       dir->header.nreg++;
+       header->parent = dir;
+       err = insert_links(header);
+@@ -202,6 +235,8 @@ fail:
+       erase_header(header);
+       put_links(header);
+ fail_links:
++      if (header->ctl_table == sysctl_mount_point)
++              clear_empty_dir(dir);
+       header->parent = NULL;
+       drop_sysctl_table(&dir->header);
+       return err;
+@@ -419,6 +454,8 @@ static struct inode *proc_sys_make_inode
+               inode->i_mode |= S_IFDIR;
+               inode->i_op = &proc_sys_dir_operations;
+               inode->i_fop = &proc_sys_dir_file_operations;
++              if (is_empty_dir(head))
++                      make_empty_dir_inode(inode);
+       }
+ out:
+       return inode;
+--- a/include/linux/sysctl.h
++++ b/include/linux/sysctl.h
+@@ -188,6 +188,9 @@ struct ctl_table_header *register_sysctl
+ void unregister_sysctl_table(struct ctl_table_header * table);
+ extern int sysctl_init(void);
++
++extern struct ctl_table sysctl_mount_point[];
++
+ #else /* CONFIG_SYSCTL */
+ static inline struct ctl_table_header *register_sysctl_table(struct ctl_table * table)
+ {
+--- a/kernel/sysctl.c
++++ b/kernel/sysctl.c
+@@ -1510,12 +1510,6 @@ static struct ctl_table vm_table[] = {
+       { }
+ };
+-#if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE)
+-static struct ctl_table binfmt_misc_table[] = {
+-      { }
+-};
+-#endif
+-
+ static struct ctl_table fs_table[] = {
+       {
+               .procname       = "inode-nr",
+@@ -1669,7 +1663,7 @@ static struct ctl_table fs_table[] = {
+       {
+               .procname       = "binfmt_misc",
+               .mode           = 0555,
+-              .child          = binfmt_misc_table,
++              .child          = sysctl_mount_point,
+       },
+ #endif
+       {
diff --git a/next/4.0/sysfs-add-support-for-permanently-empty-directories-to-serve-as-mount-points.patch b/next/4.0/sysfs-add-support-for-permanently-empty-directories-to-serve-as-mount-points.patch
new file mode 100644 (file)
index 0000000..abeaf93
--- /dev/null
@@ -0,0 +1,95 @@
+From 87d2846fcf88113fae2341da1ca9a71f0d916f2c Mon Sep 17 00:00:00 2001
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+Date: Wed, 13 May 2015 16:31:40 -0500
+Subject: sysfs: Add support for permanently empty directories to serve as mount points.
+
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+
+commit 87d2846fcf88113fae2341da1ca9a71f0d916f2c upstream.
+
+Add two functions sysfs_create_mount_point and
+sysfs_remove_mount_point that hang a permanently empty directory off
+of a kobject or remove a permanently emptpy directory hanging from a
+kobject.  Export these new functions so modular filesystems can use
+them.
+
+Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/sysfs/dir.c        |   34 ++++++++++++++++++++++++++++++++++
+ include/linux/sysfs.h |   15 +++++++++++++++
+ 2 files changed, 49 insertions(+)
+
+--- a/fs/sysfs/dir.c
++++ b/fs/sysfs/dir.c
+@@ -121,3 +121,37 @@ int sysfs_move_dir_ns(struct kobject *ko
+       return kernfs_rename_ns(kn, new_parent, kn->name, new_ns);
+ }
++
++/**
++ * sysfs_create_mount_point - create an always empty directory
++ * @parent_kobj:  kobject that will contain this always empty directory
++ * @name: The name of the always empty directory to add
++ */
++int sysfs_create_mount_point(struct kobject *parent_kobj, const char *name)
++{
++      struct kernfs_node *kn, *parent = parent_kobj->sd;
++
++      kn = kernfs_create_empty_dir(parent, name);
++      if (IS_ERR(kn)) {
++              if (PTR_ERR(kn) == -EEXIST)
++                      sysfs_warn_dup(parent, name);
++              return PTR_ERR(kn);
++      }
++
++      return 0;
++}
++EXPORT_SYMBOL_GPL(sysfs_create_mount_point);
++
++/**
++ *    sysfs_remove_mount_point - remove an always empty directory.
++ *    @parent_kobj: kobject that will contain this always empty directory
++ *    @name: The name of the always empty directory to remove
++ *
++ */
++void sysfs_remove_mount_point(struct kobject *parent_kobj, const char *name)
++{
++      struct kernfs_node *parent = parent_kobj->sd;
++
++      kernfs_remove_by_name_ns(parent, name, NULL);
++}
++EXPORT_SYMBOL_GPL(sysfs_remove_mount_point);
+--- a/include/linux/sysfs.h
++++ b/include/linux/sysfs.h
+@@ -195,6 +195,10 @@ int __must_check sysfs_rename_dir_ns(str
+ int __must_check sysfs_move_dir_ns(struct kobject *kobj,
+                                  struct kobject *new_parent_kobj,
+                                  const void *new_ns);
++int __must_check sysfs_create_mount_point(struct kobject *parent_kobj,
++                                        const char *name);
++void sysfs_remove_mount_point(struct kobject *parent_kobj,
++                            const char *name);
+ int __must_check sysfs_create_file_ns(struct kobject *kobj,
+                                     const struct attribute *attr,
+@@ -283,6 +287,17 @@ static inline int sysfs_move_dir_ns(stru
+       return 0;
+ }
++static inline int sysfs_create_mount_point(struct kobject *parent_kobj,
++                                         const char *name)
++{
++      return 0;
++}
++
++static inline void sysfs_remove_mount_point(struct kobject *parent_kobj,
++                                          const char *name)
++{
++}
++
+ static inline int sysfs_create_file_ns(struct kobject *kobj,
+                                      const struct attribute *attr,
+                                      const void *ns)
diff --git a/next/4.0/sysfs-create-mountpoints-with-sysfs_create_mount_point.patch b/next/4.0/sysfs-create-mountpoints-with-sysfs_create_mount_point.patch
new file mode 100644 (file)
index 0000000..1f44e73
--- /dev/null
@@ -0,0 +1,352 @@
+From f9bb48825a6b5d02f4cabcc78967c75db903dcdc Mon Sep 17 00:00:00 2001
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+Date: Wed, 13 May 2015 17:35:41 -0500
+Subject: sysfs: Create mountpoints with sysfs_create_mount_point
+
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+
+commit f9bb48825a6b5d02f4cabcc78967c75db903dcdc upstream.
+
+This allows for better documentation in the code and
+it allows for a simpler and fully correct version of
+fs_fully_visible to be written.
+
+The mount points converted and their filesystems are:
+/sys/hypervisor/s390/       s390_hypfs
+/sys/kernel/config/         configfs
+/sys/kernel/debug/          debugfs
+/sys/firmware/efi/efivars/  efivarfs
+/sys/fs/fuse/connections/   fusectl
+/sys/fs/pstore/             pstore
+/sys/kernel/tracing/        tracefs
+/sys/fs/cgroup/             cgroup
+/sys/kernel/security/       securityfs
+/sys/fs/selinux/            selinuxfs
+/sys/fs/smackfs/            smackfs
+
+Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/s390/hypfs/inode.c      |   12 ++++--------
+ drivers/firmware/efi/efi.c   |    6 ++----
+ fs/configfs/mount.c          |   10 ++++------
+ fs/debugfs/inode.c           |   11 ++++-------
+ fs/fuse/inode.c              |    9 +++------
+ fs/pstore/inode.c            |   12 ++++--------
+ kernel/cgroup.c              |   10 ++++------
+ security/inode.c             |   10 ++++------
+ security/selinux/selinuxfs.c |   11 +++++------
+ security/smack/smackfs.c     |    8 ++++----
+ 10 files changed, 38 insertions(+), 61 deletions(-)
+
+--- a/arch/s390/hypfs/inode.c
++++ b/arch/s390/hypfs/inode.c
+@@ -458,8 +458,6 @@ static const struct super_operations hyp
+       .show_options   = hypfs_show_options,
+ };
+-static struct kobject *s390_kobj;
+-
+ static int __init hypfs_init(void)
+ {
+       int rc;
+@@ -483,18 +481,16 @@ static int __init hypfs_init(void)
+               rc = -ENODATA;
+               goto fail_hypfs_sprp_exit;
+       }
+-      s390_kobj = kobject_create_and_add("s390", hypervisor_kobj);
+-      if (!s390_kobj) {
+-              rc = -ENOMEM;
++      rc = sysfs_create_mount_point(hypervisor_kobj, "s390");
++      if (rc)
+               goto fail_hypfs_diag0c_exit;
+-      }
+       rc = register_filesystem(&hypfs_type);
+       if (rc)
+               goto fail_filesystem;
+       return 0;
+ fail_filesystem:
+-      kobject_put(s390_kobj);
++      sysfs_remove_mount_point(hypervisor_kobj, "s390");
+ fail_hypfs_diag0c_exit:
+       hypfs_diag0c_exit();
+ fail_hypfs_sprp_exit:
+@@ -512,7 +508,7 @@ fail_dbfs_exit:
+ static void __exit hypfs_exit(void)
+ {
+       unregister_filesystem(&hypfs_type);
+-      kobject_put(s390_kobj);
++      sysfs_remove_mount_point(hypervisor_kobj, "s390");
+       hypfs_diag0c_exit();
+       hypfs_sprp_exit();
+       hypfs_vm_exit();
+--- a/drivers/firmware/efi/efi.c
++++ b/drivers/firmware/efi/efi.c
+@@ -65,7 +65,6 @@ static int __init parse_efi_cmdline(char
+ early_param("efi", parse_efi_cmdline);
+ static struct kobject *efi_kobj;
+-static struct kobject *efivars_kobj;
+ /*
+  * Let's not leave out systab information that snuck into
+@@ -212,10 +211,9 @@ static int __init efisubsys_init(void)
+               goto err_remove_group;
+       /* and the standard mountpoint for efivarfs */
+-      efivars_kobj = kobject_create_and_add("efivars", efi_kobj);
+-      if (!efivars_kobj) {
++      error = sysfs_create_mount_point(efi_kobj, "efivars");
++      if (error) {
+               pr_err("efivars: Subsystem registration failed.\n");
+-              error = -ENOMEM;
+               goto err_remove_group;
+       }
+--- a/fs/configfs/mount.c
++++ b/fs/configfs/mount.c
+@@ -129,8 +129,6 @@ void configfs_release_fs(void)
+ }
+-static struct kobject *config_kobj;
+-
+ static int __init configfs_init(void)
+ {
+       int err = -ENOMEM;
+@@ -141,8 +139,8 @@ static int __init configfs_init(void)
+       if (!configfs_dir_cachep)
+               goto out;
+-      config_kobj = kobject_create_and_add("config", kernel_kobj);
+-      if (!config_kobj)
++      err = sysfs_create_mount_point(kernel_kobj, "config");
++      if (err)
+               goto out2;
+       err = register_filesystem(&configfs_fs_type);
+@@ -152,7 +150,7 @@ static int __init configfs_init(void)
+       return 0;
+ out3:
+       pr_err("Unable to register filesystem!\n");
+-      kobject_put(config_kobj);
++      sysfs_remove_mount_point(kernel_kobj, "config");
+ out2:
+       kmem_cache_destroy(configfs_dir_cachep);
+       configfs_dir_cachep = NULL;
+@@ -163,7 +161,7 @@ out:
+ static void __exit configfs_exit(void)
+ {
+       unregister_filesystem(&configfs_fs_type);
+-      kobject_put(config_kobj);
++      sysfs_remove_mount_point(kernel_kobj, "config");
+       kmem_cache_destroy(configfs_dir_cachep);
+       configfs_dir_cachep = NULL;
+ }
+--- a/fs/debugfs/inode.c
++++ b/fs/debugfs/inode.c
+@@ -713,20 +713,17 @@ bool debugfs_initialized(void)
+ }
+ EXPORT_SYMBOL_GPL(debugfs_initialized);
+-
+-static struct kobject *debug_kobj;
+-
+ static int __init debugfs_init(void)
+ {
+       int retval;
+-      debug_kobj = kobject_create_and_add("debug", kernel_kobj);
+-      if (!debug_kobj)
+-              return -EINVAL;
++      retval = sysfs_create_mount_point(kernel_kobj, "debug");
++      if (retval)
++              return retval;
+       retval = register_filesystem(&debug_fs_type);
+       if (retval)
+-              kobject_put(debug_kobj);
++              sysfs_remove_mount_point(kernel_kobj, "debug");
+       else
+               debugfs_registered = true;
+--- a/fs/fuse/inode.c
++++ b/fs/fuse/inode.c
+@@ -1238,7 +1238,6 @@ static void fuse_fs_cleanup(void)
+ }
+ static struct kobject *fuse_kobj;
+-static struct kobject *connections_kobj;
+ static int fuse_sysfs_init(void)
+ {
+@@ -1250,11 +1249,9 @@ static int fuse_sysfs_init(void)
+               goto out_err;
+       }
+-      connections_kobj = kobject_create_and_add("connections", fuse_kobj);
+-      if (!connections_kobj) {
+-              err = -ENOMEM;
++      err = sysfs_create_mount_point(fuse_kobj, "connections");
++      if (err)
+               goto out_fuse_unregister;
+-      }
+       return 0;
+@@ -1266,7 +1263,7 @@ static int fuse_sysfs_init(void)
+ static void fuse_sysfs_cleanup(void)
+ {
+-      kobject_put(connections_kobj);
++      sysfs_remove_mount_point(fuse_kobj, "connections");
+       kobject_put(fuse_kobj);
+ }
+--- a/fs/pstore/inode.c
++++ b/fs/pstore/inode.c
+@@ -458,22 +458,18 @@ static struct file_system_type pstore_fs
+       .kill_sb        = pstore_kill_sb,
+ };
+-static struct kobject *pstore_kobj;
+-
+ static int __init init_pstore_fs(void)
+ {
+-      int err = 0;
++      int err;
+       /* Create a convenient mount point for people to access pstore */
+-      pstore_kobj = kobject_create_and_add("pstore", fs_kobj);
+-      if (!pstore_kobj) {
+-              err = -ENOMEM;
++      err = sysfs_create_mount_point(fs_kobj, "pstore");
++      if (err)
+               goto out;
+-      }
+       err = register_filesystem(&pstore_fs_type);
+       if (err < 0)
+-              kobject_put(pstore_kobj);
++              sysfs_remove_mount_point(fs_kobj, "pstore");
+ out:
+       return err;
+--- a/kernel/cgroup.c
++++ b/kernel/cgroup.c
+@@ -1924,8 +1924,6 @@ static struct file_system_type cgroup_fs
+       .kill_sb = cgroup_kill_sb,
+ };
+-static struct kobject *cgroup_kobj;
+-
+ /**
+  * task_cgroup_path - cgroup path of a task in the first cgroup hierarchy
+  * @task: target task
+@@ -5042,13 +5040,13 @@ int __init cgroup_init(void)
+               }
+       }
+-      cgroup_kobj = kobject_create_and_add("cgroup", fs_kobj);
+-      if (!cgroup_kobj)
+-              return -ENOMEM;
++      err = sysfs_create_mount_point(fs_kobj, "cgroup");
++      if (err)
++              return err;
+       err = register_filesystem(&cgroup_fs_type);
+       if (err < 0) {
+-              kobject_put(cgroup_kobj);
++              sysfs_remove_mount_point(fs_kobj, "cgroup");
+               return err;
+       }
+--- a/security/inode.c
++++ b/security/inode.c
+@@ -215,19 +215,17 @@ void securityfs_remove(struct dentry *de
+ }
+ EXPORT_SYMBOL_GPL(securityfs_remove);
+-static struct kobject *security_kobj;
+-
+ static int __init securityfs_init(void)
+ {
+       int retval;
+-      security_kobj = kobject_create_and_add("security", kernel_kobj);
+-      if (!security_kobj)
+-              return -EINVAL;
++      retval = sysfs_create_mount_point(kernel_kobj, "security");
++      if (retval)
++              return retval;
+       retval = register_filesystem(&fs_type);
+       if (retval)
+-              kobject_put(security_kobj);
++              sysfs_remove_mount_point(kernel_kobj, "security");
+       return retval;
+ }
+--- a/security/selinux/selinuxfs.c
++++ b/security/selinux/selinuxfs.c
+@@ -1853,7 +1853,6 @@ static struct file_system_type sel_fs_ty
+ };
+ struct vfsmount *selinuxfs_mount;
+-static struct kobject *selinuxfs_kobj;
+ static int __init init_sel_fs(void)
+ {
+@@ -1862,13 +1861,13 @@ static int __init init_sel_fs(void)
+       if (!selinux_enabled)
+               return 0;
+-      selinuxfs_kobj = kobject_create_and_add("selinux", fs_kobj);
+-      if (!selinuxfs_kobj)
+-              return -ENOMEM;
++      err = sysfs_create_mount_point(fs_kobj, "selinux");
++      if (err)
++              return err;
+       err = register_filesystem(&sel_fs_type);
+       if (err) {
+-              kobject_put(selinuxfs_kobj);
++              sysfs_remove_mount_point(fs_kobj, "selinux");
+               return err;
+       }
+@@ -1887,7 +1886,7 @@ __initcall(init_sel_fs);
+ #ifdef CONFIG_SECURITY_SELINUX_DISABLE
+ void exit_sel_fs(void)
+ {
+-      kobject_put(selinuxfs_kobj);
++      sysfs_remove_mount_point(fs_kobj, "selinux");
+       kern_unmount(selinuxfs_mount);
+       unregister_filesystem(&sel_fs_type);
+ }
+--- a/security/smack/smackfs.c
++++ b/security/smack/smackfs.c
+@@ -2150,16 +2150,16 @@ static const struct file_operations smk_
+       .llseek         = generic_file_llseek,
+ };
+-static struct kset *smackfs_kset;
+ /**
+  * smk_init_sysfs - initialize /sys/fs/smackfs
+  *
+  */
+ static int smk_init_sysfs(void)
+ {
+-      smackfs_kset = kset_create_and_add("smackfs", NULL, fs_kobj);
+-      if (!smackfs_kset)
+-              return -ENOMEM;
++      int err;
++      err = sysfs_create_mount_point(fs_kobj, "smackfs");
++      if (err)
++              return err;
+       return 0;
+ }
diff --git a/next/4.1/fs-add-helper-functions-for-permanently-empty-directories.patch b/next/4.1/fs-add-helper-functions-for-permanently-empty-directories.patch
new file mode 100644 (file)
index 0000000..96a55c0
--- /dev/null
@@ -0,0 +1,141 @@
+From fbabfd0f4ee2e8847bf56edf481249ad1bb8c44d Mon Sep 17 00:00:00 2001
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+Date: Sat, 9 May 2015 15:54:49 -0500
+Subject: fs: Add helper functions for permanently empty directories.
+
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+
+commit fbabfd0f4ee2e8847bf56edf481249ad1bb8c44d upstream.
+
+To ensure it is safe to mount proc and sysfs I need to check if
+filesystems that are mounted on top of them are mounted on truly empty
+directories.  Given that some directories can gain entries over time,
+knowing that a directory is empty right now is insufficient.
+
+Therefore add supporting infrastructure for permantently empty
+directories that proc and sysfs can use when they create mount points
+for filesystems and fs_fully_visible can use to test for permanently
+empty directories to ensure that nothing will be gained by mounting a
+fresh copy of proc or sysfs.
+
+Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/libfs.c         |   96 +++++++++++++++++++++++++++++++++++++++++++++++++++++
+ include/linux/fs.h |    2 +
+ 2 files changed, 98 insertions(+)
+
+--- a/fs/libfs.c
++++ b/fs/libfs.c
+@@ -1093,3 +1093,99 @@ simple_nosetlease(struct file *filp, lon
+       return -EINVAL;
+ }
+ EXPORT_SYMBOL(simple_nosetlease);
++
++
++/*
++ * Operations for a permanently empty directory.
++ */
++static struct dentry *empty_dir_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
++{
++      return ERR_PTR(-ENOENT);
++}
++
++static int empty_dir_getattr(struct vfsmount *mnt, struct dentry *dentry,
++                               struct kstat *stat)
++{
++      struct inode *inode = d_inode(dentry);
++      generic_fillattr(inode, stat);
++      return 0;
++}
++
++static int empty_dir_setattr(struct dentry *dentry, struct iattr *attr)
++{
++      return -EPERM;
++}
++
++static int empty_dir_setxattr(struct dentry *dentry, const char *name,
++                            const void *value, size_t size, int flags)
++{
++      return -EOPNOTSUPP;
++}
++
++static ssize_t empty_dir_getxattr(struct dentry *dentry, const char *name,
++                                void *value, size_t size)
++{
++      return -EOPNOTSUPP;
++}
++
++static int empty_dir_removexattr(struct dentry *dentry, const char *name)
++{
++      return -EOPNOTSUPP;
++}
++
++static ssize_t empty_dir_listxattr(struct dentry *dentry, char *list, size_t size)
++{
++      return -EOPNOTSUPP;
++}
++
++static const struct inode_operations empty_dir_inode_operations = {
++      .lookup         = empty_dir_lookup,
++      .permission     = generic_permission,
++      .setattr        = empty_dir_setattr,
++      .getattr        = empty_dir_getattr,
++      .setxattr       = empty_dir_setxattr,
++      .getxattr       = empty_dir_getxattr,
++      .removexattr    = empty_dir_removexattr,
++      .listxattr      = empty_dir_listxattr,
++};
++
++static loff_t empty_dir_llseek(struct file *file, loff_t offset, int whence)
++{
++      /* An empty directory has two entries . and .. at offsets 0 and 1 */
++      return generic_file_llseek_size(file, offset, whence, 2, 2);
++}
++
++static int empty_dir_readdir(struct file *file, struct dir_context *ctx)
++{
++      dir_emit_dots(file, ctx);
++      return 0;
++}
++
++static const struct file_operations empty_dir_operations = {
++      .llseek         = empty_dir_llseek,
++      .read           = generic_read_dir,
++      .iterate        = empty_dir_readdir,
++      .fsync          = noop_fsync,
++};
++
++
++void make_empty_dir_inode(struct inode *inode)
++{
++      set_nlink(inode, 2);
++      inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO;
++      inode->i_uid = GLOBAL_ROOT_UID;
++      inode->i_gid = GLOBAL_ROOT_GID;
++      inode->i_rdev = 0;
++      inode->i_size = 2;
++      inode->i_blkbits = PAGE_SHIFT;
++      inode->i_blocks = 0;
++
++      inode->i_op = &empty_dir_inode_operations;
++      inode->i_fop = &empty_dir_operations;
++}
++
++bool is_empty_dir_inode(struct inode *inode)
++{
++      return (inode->i_fop == &empty_dir_operations) &&
++              (inode->i_op == &empty_dir_inode_operations);
++}
+--- a/include/linux/fs.h
++++ b/include/linux/fs.h
+@@ -2780,6 +2780,8 @@ extern struct dentry *simple_lookup(stru
+ extern ssize_t generic_read_dir(struct file *, char __user *, size_t, loff_t *);
+ extern const struct file_operations simple_dir_operations;
+ extern const struct inode_operations simple_dir_inode_operations;
++extern void make_empty_dir_inode(struct inode *inode);
++extern bool is_empty_dir_inode(struct inode *inode);
+ struct tree_descr { char *name; const struct file_operations *ops; int mode; };
+ struct dentry *d_alloc_name(struct dentry *, const char *);
+ extern int simple_fill_super(struct super_block *, unsigned long, struct tree_descr *);
diff --git a/next/4.1/kernfs-add-support-for-always-empty-directories.patch b/next/4.1/kernfs-add-support-for-always-empty-directories.patch
new file mode 100644 (file)
index 0000000..ef38af3
--- /dev/null
@@ -0,0 +1,117 @@
+From ea015218f2f7ace2dad9cedd21ed95bdba2886d7 Mon Sep 17 00:00:00 2001
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+Date: Wed, 13 May 2015 16:09:29 -0500
+Subject: kernfs: Add support for always empty directories.
+
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+
+commit ea015218f2f7ace2dad9cedd21ed95bdba2886d7 upstream.
+
+Add a new function kernfs_create_empty_dir that can be used to create
+directory that can not be modified.
+
+Update the code to use make_empty_dir_inode when reporting a
+permanently empty directory to the vfs.
+
+Update the code to not allow adding to permanently empty directories.
+
+Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/kernfs/dir.c        |   38 +++++++++++++++++++++++++++++++++++++-
+ fs/kernfs/inode.c      |    2 ++
+ include/linux/kernfs.h |    3 +++
+ 3 files changed, 42 insertions(+), 1 deletion(-)
+
+--- a/fs/kernfs/dir.c
++++ b/fs/kernfs/dir.c
+@@ -592,6 +592,9 @@ int kernfs_add_one(struct kernfs_node *k
+               goto out_unlock;
+       ret = -ENOENT;
++      if (parent->flags & KERNFS_EMPTY_DIR)
++              goto out_unlock;
++
+       if ((parent->flags & KERNFS_ACTIVATED) && !kernfs_active(parent))
+               goto out_unlock;
+@@ -783,6 +786,38 @@ struct kernfs_node *kernfs_create_dir_ns
+       return ERR_PTR(rc);
+ }
++/**
++ * kernfs_create_empty_dir - create an always empty directory
++ * @parent: parent in which to create a new directory
++ * @name: name of the new directory
++ *
++ * Returns the created node on success, ERR_PTR() value on failure.
++ */
++struct kernfs_node *kernfs_create_empty_dir(struct kernfs_node *parent,
++                                          const char *name)
++{
++      struct kernfs_node *kn;
++      int rc;
++
++      /* allocate */
++      kn = kernfs_new_node(parent, name, S_IRUGO|S_IXUGO|S_IFDIR, KERNFS_DIR);
++      if (!kn)
++              return ERR_PTR(-ENOMEM);
++
++      kn->flags |= KERNFS_EMPTY_DIR;
++      kn->dir.root = parent->dir.root;
++      kn->ns = NULL;
++      kn->priv = NULL;
++
++      /* link in */
++      rc = kernfs_add_one(kn);
++      if (!rc)
++              return kn;
++
++      kernfs_put(kn);
++      return ERR_PTR(rc);
++}
++
+ static struct dentry *kernfs_iop_lookup(struct inode *dir,
+                                       struct dentry *dentry,
+                                       unsigned int flags)
+@@ -1254,7 +1289,8 @@ int kernfs_rename_ns(struct kernfs_node
+       mutex_lock(&kernfs_mutex);
+       error = -ENOENT;
+-      if (!kernfs_active(kn) || !kernfs_active(new_parent))
++      if (!kernfs_active(kn) || !kernfs_active(new_parent) ||
++          (new_parent->flags & KERNFS_EMPTY_DIR))
+               goto out;
+       error = 0;
+--- a/fs/kernfs/inode.c
++++ b/fs/kernfs/inode.c
+@@ -296,6 +296,8 @@ static void kernfs_init_inode(struct ker
+       case KERNFS_DIR:
+               inode->i_op = &kernfs_dir_iops;
+               inode->i_fop = &kernfs_dir_fops;
++              if (kn->flags & KERNFS_EMPTY_DIR)
++                      make_empty_dir_inode(inode);
+               break;
+       case KERNFS_FILE:
+               inode->i_size = kn->attr.size;
+--- a/include/linux/kernfs.h
++++ b/include/linux/kernfs.h
+@@ -45,6 +45,7 @@ enum kernfs_node_flag {
+       KERNFS_LOCKDEP          = 0x0100,
+       KERNFS_SUICIDAL         = 0x0400,
+       KERNFS_SUICIDED         = 0x0800,
++      KERNFS_EMPTY_DIR        = 0x1000,
+ };
+ /* @flags for kernfs_create_root() */
+@@ -285,6 +286,8 @@ void kernfs_destroy_root(struct kernfs_r
+ struct kernfs_node *kernfs_create_dir_ns(struct kernfs_node *parent,
+                                        const char *name, umode_t mode,
+                                        void *priv, const void *ns);
++struct kernfs_node *kernfs_create_empty_dir(struct kernfs_node *parent,
++                                          const char *name);
+ struct kernfs_node *__kernfs_create_file(struct kernfs_node *parent,
+                                        const char *name,
+                                        umode_t mode, loff_t size,
diff --git a/next/4.1/mnt-modify-fs_fully_visible-to-deal-with-locked-ro-nodev-and-atime.patch b/next/4.1/mnt-modify-fs_fully_visible-to-deal-with-locked-ro-nodev-and-atime.patch
new file mode 100644 (file)
index 0000000..7f5ac4a
--- /dev/null
@@ -0,0 +1,93 @@
+From 8c6cf9cc829fcd0b179b59f7fe288941d0e31108 Mon Sep 17 00:00:00 2001
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+Date: Fri, 8 May 2015 23:49:47 -0500
+Subject: mnt: Modify fs_fully_visible to deal with locked ro nodev and atime
+
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+
+commit 8c6cf9cc829fcd0b179b59f7fe288941d0e31108 upstream.
+
+Ignore an existing mount if the locked readonly, nodev or atime
+attributes are less permissive than the desired attributes
+of the new mount.
+
+On success ensure the new mount locks all of the same readonly, nodev and
+atime attributes as the old mount.
+
+The nosuid and noexec attributes are not checked here as this change
+is destined for stable and enforcing those attributes causes a
+regression in lxc and libvirt-lxc where those applications will not
+start and there are no known executables on sysfs or proc and no known
+way to create exectuables without code modifications
+
+Fixes: e51db73532955 ("userns: Better restrictions on when proc and sysfs can be mounted")
+Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/namespace.c |   24 +++++++++++++++++++++---
+ 1 file changed, 21 insertions(+), 3 deletions(-)
+
+--- a/fs/namespace.c
++++ b/fs/namespace.c
+@@ -2332,7 +2332,7 @@ unlock:
+       return err;
+ }
+-static bool fs_fully_visible(struct file_system_type *fs_type);
++static bool fs_fully_visible(struct file_system_type *fs_type, int *new_mnt_flags);
+ /*
+  * create a new mount for userspace and request it to be added into the
+@@ -2366,7 +2366,7 @@ static int do_new_mount(struct path *pat
+                       mnt_flags |= MNT_NODEV | MNT_LOCK_NODEV;
+               }
+               if (type->fs_flags & FS_USERNS_VISIBLE) {
+-                      if (!fs_fully_visible(type))
++                      if (!fs_fully_visible(type, &mnt_flags))
+                               return -EPERM;
+               }
+       }
+@@ -3170,9 +3170,10 @@ bool current_chrooted(void)
+       return chrooted;
+ }
+-static bool fs_fully_visible(struct file_system_type *type)
++static bool fs_fully_visible(struct file_system_type *type, int *new_mnt_flags)
+ {
+       struct mnt_namespace *ns = current->nsproxy->mnt_ns;
++      int new_flags = *new_mnt_flags;
+       struct mount *mnt;
+       bool visible = false;
+@@ -3191,6 +3192,19 @@ static bool fs_fully_visible(struct file
+               if (mnt->mnt.mnt_root != mnt->mnt.mnt_sb->s_root)
+                       continue;
++              /* Verify the mount flags are equal to or more permissive
++               * than the proposed new mount.
++               */
++              if ((mnt->mnt.mnt_flags & MNT_LOCK_READONLY) &&
++                  !(new_flags & MNT_READONLY))
++                      continue;
++              if ((mnt->mnt.mnt_flags & MNT_LOCK_NODEV) &&
++                  !(new_flags & MNT_NODEV))
++                      continue;
++              if ((mnt->mnt.mnt_flags & MNT_LOCK_ATIME) &&
++                  ((mnt->mnt.mnt_flags & MNT_ATIME_MASK) != (new_flags & MNT_ATIME_MASK)))
++                      continue;
++
+               /* This mount is not fully visible if there are any
+                * locked child mounts that cover anything except for
+                * empty directories.
+@@ -3204,6 +3218,10 @@ static bool fs_fully_visible(struct file
+                       if (!is_empty_dir_inode(inode))
+                               goto next;
+               }
++              /* Preserve the locked attributes */
++              *new_mnt_flags |= mnt->mnt.mnt_flags & (MNT_LOCK_READONLY | \
++                                                      MNT_LOCK_NODEV    | \
++                                                      MNT_LOCK_ATIME);
+               visible = true;
+               goto found;
+       next:   ;
diff --git a/next/4.1/mnt-refactor-the-logic-for-mounting-sysfs-and-proc-in-a-user-namespace.patch b/next/4.1/mnt-refactor-the-logic-for-mounting-sysfs-and-proc-in-a-user-namespace.patch
new file mode 100644 (file)
index 0000000..29618ce
--- /dev/null
@@ -0,0 +1,122 @@
+From 1b852bceb0d111e510d1a15826ecc4a19358d512 Mon Sep 17 00:00:00 2001
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+Date: Fri, 8 May 2015 23:22:29 -0500
+Subject: mnt: Refactor the logic for mounting sysfs and proc in a user namespace
+
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+
+commit 1b852bceb0d111e510d1a15826ecc4a19358d512 upstream.
+
+Fresh mounts of proc and sysfs are a very special case that works very
+much like a bind mount.  Unfortunately the current structure can not
+preserve the MNT_LOCK... mount flags.  Therefore refactor the logic
+into a form that can be modified to preserve those lock bits.
+
+Add a new filesystem flag FS_USERNS_VISIBLE that requires some mount
+of the filesystem be fully visible in the current mount namespace,
+before the filesystem may be mounted.
+
+Move the logic for calling fs_fully_visible from proc and sysfs into
+fs/namespace.c where it has greater access to mount namespace state.
+
+Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/namespace.c     |    8 +++++++-
+ fs/proc/root.c     |    5 +----
+ fs/sysfs/mount.c   |    5 +----
+ include/linux/fs.h |    2 +-
+ 4 files changed, 10 insertions(+), 10 deletions(-)
+
+--- a/fs/namespace.c
++++ b/fs/namespace.c
+@@ -2332,6 +2332,8 @@ unlock:
+       return err;
+ }
++static bool fs_fully_visible(struct file_system_type *fs_type);
++
+ /*
+  * create a new mount for userspace and request it to be added into the
+  * namespace's tree
+@@ -2363,6 +2365,10 @@ static int do_new_mount(struct path *pat
+                       flags |= MS_NODEV;
+                       mnt_flags |= MNT_NODEV | MNT_LOCK_NODEV;
+               }
++              if (type->fs_flags & FS_USERNS_VISIBLE) {
++                      if (!fs_fully_visible(type))
++                              return -EPERM;
++              }
+       }
+       mnt = vfs_kern_mount(type, flags, name, data);
+@@ -3164,7 +3170,7 @@ bool current_chrooted(void)
+       return chrooted;
+ }
+-bool fs_fully_visible(struct file_system_type *type)
++static bool fs_fully_visible(struct file_system_type *type)
+ {
+       struct mnt_namespace *ns = current->nsproxy->mnt_ns;
+       struct mount *mnt;
+--- a/fs/proc/root.c
++++ b/fs/proc/root.c
+@@ -112,9 +112,6 @@ static struct dentry *proc_mount(struct
+               ns = task_active_pid_ns(current);
+               options = data;
+-              if (!capable(CAP_SYS_ADMIN) && !fs_fully_visible(fs_type))
+-                      return ERR_PTR(-EPERM);
+-
+               /* Does the mounter have privilege over the pid namespace? */
+               if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN))
+                       return ERR_PTR(-EPERM);
+@@ -159,7 +156,7 @@ static struct file_system_type proc_fs_t
+       .name           = "proc",
+       .mount          = proc_mount,
+       .kill_sb        = proc_kill_sb,
+-      .fs_flags       = FS_USERNS_MOUNT,
++      .fs_flags       = FS_USERNS_VISIBLE | FS_USERNS_MOUNT,
+ };
+ void __init proc_root_init(void)
+--- a/fs/sysfs/mount.c
++++ b/fs/sysfs/mount.c
+@@ -31,9 +31,6 @@ static struct dentry *sysfs_mount(struct
+       bool new_sb;
+       if (!(flags & MS_KERNMOUNT)) {
+-              if (!capable(CAP_SYS_ADMIN) && !fs_fully_visible(fs_type))
+-                      return ERR_PTR(-EPERM);
+-
+               if (!kobj_ns_current_may_mount(KOBJ_NS_TYPE_NET))
+                       return ERR_PTR(-EPERM);
+       }
+@@ -58,7 +55,7 @@ static struct file_system_type sysfs_fs_
+       .name           = "sysfs",
+       .mount          = sysfs_mount,
+       .kill_sb        = sysfs_kill_sb,
+-      .fs_flags       = FS_USERNS_MOUNT,
++      .fs_flags       = FS_USERNS_VISIBLE | FS_USERNS_MOUNT,
+ };
+ int __init sysfs_init(void)
+--- a/include/linux/fs.h
++++ b/include/linux/fs.h
+@@ -1897,6 +1897,7 @@ struct file_system_type {
+ #define FS_HAS_SUBTYPE                4
+ #define FS_USERNS_MOUNT               8       /* Can be mounted by userns root */
+ #define FS_USERNS_DEV_MOUNT   16 /* A userns mount does not imply MNT_NODEV */
++#define FS_USERNS_VISIBLE     32      /* FS must already be visible */
+ #define FS_RENAME_DOES_D_MOVE 32768   /* FS will handle d_move() during rename() internally. */
+       struct dentry *(*mount) (struct file_system_type *, int,
+                      const char *, void *);
+@@ -1984,7 +1985,6 @@ extern int vfs_ustat(dev_t, struct kstat
+ extern int freeze_super(struct super_block *super);
+ extern int thaw_super(struct super_block *super);
+ extern bool our_mnt(struct vfsmount *mnt);
+-extern bool fs_fully_visible(struct file_system_type *);
+ extern int current_umask(void);
diff --git a/next/4.1/mnt-update-fs_fully_visible-to-test-for-permanently-empty-directories.patch b/next/4.1/mnt-update-fs_fully_visible-to-test-for-permanently-empty-directories.patch
new file mode 100644 (file)
index 0000000..4687293
--- /dev/null
@@ -0,0 +1,50 @@
+From 7236c85e1be51a9e25ba0f6e087a66ca89605a49 Mon Sep 17 00:00:00 2001
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+Date: Wed, 13 May 2015 20:51:09 -0500
+Subject: mnt: Update fs_fully_visible to test for permanently empty directories
+
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+
+commit 7236c85e1be51a9e25ba0f6e087a66ca89605a49 upstream.
+
+fs_fully_visible attempts to make fresh mounts of proc and sysfs give
+the mounter no more access to proc and sysfs than if they could have
+by creating a bind mount.  One aspect of proc and sysfs that makes
+this particularly tricky is that there are other filesystems that
+typically mount on top of proc and sysfs.  As those filesystems are
+mounted on empty directories in practice it is safe to ignore them.
+However testing to ensure filesystems are mounted on empty directories
+has not been something the in kernel data structures have supported so
+the current test for an empty directory which checks to see
+if nlink <= 2 is a bit lacking.
+
+proc and sysfs have recently been modified to use the new empty_dir
+infrastructure to create all of their dedicated mount points.  Instead
+of testing for S_ISDIR(inode->i_mode) && i_nlink <= 2 to see if a
+directory is empty, test for is_empty_dir_inode(inode).  That small
+change guaranteess mounts found on proc and sysfs really are safe to
+ignore, because the directories are not only empty but nothing can
+ever be added to them.  This guarantees there is nothing to worry
+about when mounting proc and sysfs.
+
+Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/namespace.c |    5 ++---
+ 1 file changed, 2 insertions(+), 3 deletions(-)
+
+--- a/fs/namespace.c
++++ b/fs/namespace.c
+@@ -3194,9 +3194,8 @@ bool fs_fully_visible(struct file_system
+                       /* Only worry about locked mounts */
+                       if (!(mnt->mnt.mnt_flags & MNT_LOCKED))
+                               continue;
+-                      if (!S_ISDIR(inode->i_mode))
+-                              goto next;
+-                      if (inode->i_nlink > 2)
++                      /* Is the directory permanetly empty? */
++                      if (!is_empty_dir_inode(inode))
+                               goto next;
+               }
+               visible = true;
diff --git a/next/4.1/proc-allow-creating-permanently-empty-directories-that-serve-as-mount-points.patch b/next/4.1/proc-allow-creating-permanently-empty-directories-that-serve-as-mount-points.patch
new file mode 100644 (file)
index 0000000..2ab325a
--- /dev/null
@@ -0,0 +1,114 @@
+From eb6d38d5427b3ad42f5268da0f1dd31bb0af1264 Mon Sep 17 00:00:00 2001
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+Date: Mon, 11 May 2015 16:44:25 -0500
+Subject: proc: Allow creating permanently empty directories that serve as mount points
+
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+
+commit eb6d38d5427b3ad42f5268da0f1dd31bb0af1264 upstream.
+
+Add a new function proc_create_mount_point that when used to creates a
+directory that can not be added to.
+
+Add a new function is_empty_pde to test if a function is a mount
+point.
+
+Update the code to use make_empty_dir_inode when reporting
+a permanently empty directory to the vfs.
+
+Update the code to not allow adding to permanently empty directories.
+
+Update /proc/openprom and /proc/fs/nfsd to be permanently empty directories.
+
+Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/proc/generic.c  |   23 +++++++++++++++++++++++
+ fs/proc/inode.c    |    4 ++++
+ fs/proc/internal.h |    6 ++++++
+ fs/proc/root.c     |    4 ++--
+ 4 files changed, 35 insertions(+), 2 deletions(-)
+
+--- a/fs/proc/generic.c
++++ b/fs/proc/generic.c
+@@ -373,6 +373,10 @@ static struct proc_dir_entry *__proc_cre
+               WARN(1, "create '/proc/%s' by hand\n", qstr.name);
+               return NULL;
+       }
++      if (is_empty_pde(*parent)) {
++              WARN(1, "attempt to add to permanently empty directory");
++              return NULL;
++      }
+       ent = kzalloc(sizeof(struct proc_dir_entry) + qstr.len + 1, GFP_KERNEL);
+       if (!ent)
+@@ -455,6 +459,25 @@ struct proc_dir_entry *proc_mkdir(const
+ }
+ EXPORT_SYMBOL(proc_mkdir);
++struct proc_dir_entry *proc_create_mount_point(const char *name)
++{
++      umode_t mode = S_IFDIR | S_IRUGO | S_IXUGO;
++      struct proc_dir_entry *ent, *parent = NULL;
++
++      ent = __proc_create(&parent, name, mode, 2);
++      if (ent) {
++              ent->data = NULL;
++              ent->proc_fops = NULL;
++              ent->proc_iops = NULL;
++              if (proc_register(parent, ent) < 0) {
++                      kfree(ent);
++                      parent->nlink--;
++                      ent = NULL;
++              }
++      }
++      return ent;
++}
++
+ struct proc_dir_entry *proc_create_data(const char *name, umode_t mode,
+                                       struct proc_dir_entry *parent,
+                                       const struct file_operations *proc_fops,
+--- a/fs/proc/inode.c
++++ b/fs/proc/inode.c
+@@ -423,6 +423,10 @@ struct inode *proc_get_inode(struct supe
+               inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
+               PROC_I(inode)->pde = de;
++              if (is_empty_pde(de)) {
++                      make_empty_dir_inode(inode);
++                      return inode;
++              }
+               if (de->mode) {
+                       inode->i_mode = de->mode;
+                       inode->i_uid = de->uid;
+--- a/fs/proc/internal.h
++++ b/fs/proc/internal.h
+@@ -191,6 +191,12 @@ static inline struct proc_dir_entry *pde
+ }
+ extern void pde_put(struct proc_dir_entry *);
++static inline bool is_empty_pde(const struct proc_dir_entry *pde)
++{
++      return S_ISDIR(pde->mode) && !pde->proc_iops;
++}
++struct proc_dir_entry *proc_create_mount_point(const char *name);
++
+ /*
+  * inode.c
+  */
+--- a/fs/proc/root.c
++++ b/fs/proc/root.c
+@@ -182,10 +182,10 @@ void __init proc_root_init(void)
+ #endif
+       proc_mkdir("fs", NULL);
+       proc_mkdir("driver", NULL);
+-      proc_mkdir("fs/nfsd", NULL); /* somewhere for the nfsd filesystem to be mounted */
++      proc_create_mount_point("fs/nfsd"); /* somewhere for the nfsd filesystem to be mounted */
+ #if defined(CONFIG_SUN_OPENPROMFS) || defined(CONFIG_SUN_OPENPROMFS_MODULE)
+       /* just give it a mountpoint */
+-      proc_mkdir("openprom", NULL);
++      proc_create_mount_point("openprom");
+ #endif
+       proc_tty_init();
+       proc_mkdir("bus", NULL);
diff --git a/next/4.1/series b/next/4.1/series
new file mode 100644 (file)
index 0000000..c8867fb
--- /dev/null
@@ -0,0 +1,9 @@
+fs-add-helper-functions-for-permanently-empty-directories.patch
+sysctl-allow-creating-permanently-empty-directories-that-serve-as-mountpoints.patch
+proc-allow-creating-permanently-empty-directories-that-serve-as-mount-points.patch
+kernfs-add-support-for-always-empty-directories.patch
+sysfs-add-support-for-permanently-empty-directories-to-serve-as-mount-points.patch
+sysfs-create-mountpoints-with-sysfs_create_mount_point.patch
+mnt-update-fs_fully_visible-to-test-for-permanently-empty-directories.patch
+mnt-refactor-the-logic-for-mounting-sysfs-and-proc-in-a-user-namespace.patch
+mnt-modify-fs_fully_visible-to-deal-with-locked-ro-nodev-and-atime.patch
diff --git a/next/4.1/sysctl-allow-creating-permanently-empty-directories-that-serve-as-mountpoints.patch b/next/4.1/sysctl-allow-creating-permanently-empty-directories-that-serve-as-mountpoints.patch
new file mode 100644 (file)
index 0000000..659925e
--- /dev/null
@@ -0,0 +1,131 @@
+From f9bd6733d3f11e24f3949becf277507d422ee1eb Mon Sep 17 00:00:00 2001
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+Date: Sat, 9 May 2015 22:09:14 -0500
+Subject: sysctl: Allow creating permanently empty directories that serve as mountpoints.
+
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+
+commit f9bd6733d3f11e24f3949becf277507d422ee1eb upstream.
+
+Add a magic sysctl table sysctl_mount_point that when used to
+create a directory forces that directory to be permanently empty.
+
+Update the code to use make_empty_dir_inode when accessing permanently
+empty directories.
+
+Update the code to not allow adding to permanently empty directories.
+
+Update /proc/sys/fs/binfmt_misc to be a permanently empty directory.
+
+Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/proc/proc_sysctl.c  |   37 +++++++++++++++++++++++++++++++++++++
+ include/linux/sysctl.h |    3 +++
+ kernel/sysctl.c        |    8 +-------
+ 3 files changed, 41 insertions(+), 7 deletions(-)
+
+--- a/fs/proc/proc_sysctl.c
++++ b/fs/proc/proc_sysctl.c
+@@ -19,6 +19,28 @@ static const struct inode_operations pro
+ static const struct file_operations proc_sys_dir_file_operations;
+ static const struct inode_operations proc_sys_dir_operations;
++/* Support for permanently empty directories */
++
++struct ctl_table sysctl_mount_point[] = {
++      { }
++};
++
++static bool is_empty_dir(struct ctl_table_header *head)
++{
++      return head->ctl_table[0].child == sysctl_mount_point;
++}
++
++static void set_empty_dir(struct ctl_dir *dir)
++{
++      dir->header.ctl_table[0].child = sysctl_mount_point;
++}
++
++static void clear_empty_dir(struct ctl_dir *dir)
++
++{
++      dir->header.ctl_table[0].child = NULL;
++}
++
+ void proc_sys_poll_notify(struct ctl_table_poll *poll)
+ {
+       if (!poll)
+@@ -187,6 +209,17 @@ static int insert_header(struct ctl_dir
+       struct ctl_table *entry;
+       int err;
++      /* Is this a permanently empty directory? */
++      if (is_empty_dir(&dir->header))
++              return -EROFS;
++
++      /* Am I creating a permanently empty directory? */
++      if (header->ctl_table == sysctl_mount_point) {
++              if (!RB_EMPTY_ROOT(&dir->root))
++                      return -EINVAL;
++              set_empty_dir(dir);
++      }
++
+       dir->header.nreg++;
+       header->parent = dir;
+       err = insert_links(header);
+@@ -202,6 +235,8 @@ fail:
+       erase_header(header);
+       put_links(header);
+ fail_links:
++      if (header->ctl_table == sysctl_mount_point)
++              clear_empty_dir(dir);
+       header->parent = NULL;
+       drop_sysctl_table(&dir->header);
+       return err;
+@@ -419,6 +454,8 @@ static struct inode *proc_sys_make_inode
+               inode->i_mode |= S_IFDIR;
+               inode->i_op = &proc_sys_dir_operations;
+               inode->i_fop = &proc_sys_dir_file_operations;
++              if (is_empty_dir(head))
++                      make_empty_dir_inode(inode);
+       }
+ out:
+       return inode;
+--- a/include/linux/sysctl.h
++++ b/include/linux/sysctl.h
+@@ -188,6 +188,9 @@ struct ctl_table_header *register_sysctl
+ void unregister_sysctl_table(struct ctl_table_header * table);
+ extern int sysctl_init(void);
++
++extern struct ctl_table sysctl_mount_point[];
++
+ #else /* CONFIG_SYSCTL */
+ static inline struct ctl_table_header *register_sysctl_table(struct ctl_table * table)
+ {
+--- a/kernel/sysctl.c
++++ b/kernel/sysctl.c
+@@ -1531,12 +1531,6 @@ static struct ctl_table vm_table[] = {
+       { }
+ };
+-#if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE)
+-static struct ctl_table binfmt_misc_table[] = {
+-      { }
+-};
+-#endif
+-
+ static struct ctl_table fs_table[] = {
+       {
+               .procname       = "inode-nr",
+@@ -1690,7 +1684,7 @@ static struct ctl_table fs_table[] = {
+       {
+               .procname       = "binfmt_misc",
+               .mode           = 0555,
+-              .child          = binfmt_misc_table,
++              .child          = sysctl_mount_point,
+       },
+ #endif
+       {
diff --git a/next/4.1/sysfs-add-support-for-permanently-empty-directories-to-serve-as-mount-points.patch b/next/4.1/sysfs-add-support-for-permanently-empty-directories-to-serve-as-mount-points.patch
new file mode 100644 (file)
index 0000000..829e153
--- /dev/null
@@ -0,0 +1,95 @@
+From 87d2846fcf88113fae2341da1ca9a71f0d916f2c Mon Sep 17 00:00:00 2001
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+Date: Wed, 13 May 2015 16:31:40 -0500
+Subject: sysfs: Add support for permanently empty directories to serve as mount points.
+
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+
+commit 87d2846fcf88113fae2341da1ca9a71f0d916f2c upstream.
+
+Add two functions sysfs_create_mount_point and
+sysfs_remove_mount_point that hang a permanently empty directory off
+of a kobject or remove a permanently emptpy directory hanging from a
+kobject.  Export these new functions so modular filesystems can use
+them.
+
+Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/sysfs/dir.c        |   34 ++++++++++++++++++++++++++++++++++
+ include/linux/sysfs.h |   15 +++++++++++++++
+ 2 files changed, 49 insertions(+)
+
+--- a/fs/sysfs/dir.c
++++ b/fs/sysfs/dir.c
+@@ -121,3 +121,37 @@ int sysfs_move_dir_ns(struct kobject *ko
+       return kernfs_rename_ns(kn, new_parent, kn->name, new_ns);
+ }
++
++/**
++ * sysfs_create_mount_point - create an always empty directory
++ * @parent_kobj:  kobject that will contain this always empty directory
++ * @name: The name of the always empty directory to add
++ */
++int sysfs_create_mount_point(struct kobject *parent_kobj, const char *name)
++{
++      struct kernfs_node *kn, *parent = parent_kobj->sd;
++
++      kn = kernfs_create_empty_dir(parent, name);
++      if (IS_ERR(kn)) {
++              if (PTR_ERR(kn) == -EEXIST)
++                      sysfs_warn_dup(parent, name);
++              return PTR_ERR(kn);
++      }
++
++      return 0;
++}
++EXPORT_SYMBOL_GPL(sysfs_create_mount_point);
++
++/**
++ *    sysfs_remove_mount_point - remove an always empty directory.
++ *    @parent_kobj: kobject that will contain this always empty directory
++ *    @name: The name of the always empty directory to remove
++ *
++ */
++void sysfs_remove_mount_point(struct kobject *parent_kobj, const char *name)
++{
++      struct kernfs_node *parent = parent_kobj->sd;
++
++      kernfs_remove_by_name_ns(parent, name, NULL);
++}
++EXPORT_SYMBOL_GPL(sysfs_remove_mount_point);
+--- a/include/linux/sysfs.h
++++ b/include/linux/sysfs.h
+@@ -210,6 +210,10 @@ int __must_check sysfs_rename_dir_ns(str
+ int __must_check sysfs_move_dir_ns(struct kobject *kobj,
+                                  struct kobject *new_parent_kobj,
+                                  const void *new_ns);
++int __must_check sysfs_create_mount_point(struct kobject *parent_kobj,
++                                        const char *name);
++void sysfs_remove_mount_point(struct kobject *parent_kobj,
++                            const char *name);
+ int __must_check sysfs_create_file_ns(struct kobject *kobj,
+                                     const struct attribute *attr,
+@@ -298,6 +302,17 @@ static inline int sysfs_move_dir_ns(stru
+       return 0;
+ }
++static inline int sysfs_create_mount_point(struct kobject *parent_kobj,
++                                         const char *name)
++{
++      return 0;
++}
++
++static inline void sysfs_remove_mount_point(struct kobject *parent_kobj,
++                                          const char *name)
++{
++}
++
+ static inline int sysfs_create_file_ns(struct kobject *kobj,
+                                      const struct attribute *attr,
+                                      const void *ns)
diff --git a/next/4.1/sysfs-create-mountpoints-with-sysfs_create_mount_point.patch b/next/4.1/sysfs-create-mountpoints-with-sysfs_create_mount_point.patch
new file mode 100644 (file)
index 0000000..12d8d4c
--- /dev/null
@@ -0,0 +1,372 @@
+From f9bb48825a6b5d02f4cabcc78967c75db903dcdc Mon Sep 17 00:00:00 2001
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+Date: Wed, 13 May 2015 17:35:41 -0500
+Subject: sysfs: Create mountpoints with sysfs_create_mount_point
+
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+
+commit f9bb48825a6b5d02f4cabcc78967c75db903dcdc upstream.
+
+This allows for better documentation in the code and
+it allows for a simpler and fully correct version of
+fs_fully_visible to be written.
+
+The mount points converted and their filesystems are:
+/sys/hypervisor/s390/       s390_hypfs
+/sys/kernel/config/         configfs
+/sys/kernel/debug/          debugfs
+/sys/firmware/efi/efivars/  efivarfs
+/sys/fs/fuse/connections/   fusectl
+/sys/fs/pstore/             pstore
+/sys/kernel/tracing/        tracefs
+/sys/fs/cgroup/             cgroup
+/sys/kernel/security/       securityfs
+/sys/fs/selinux/            selinuxfs
+/sys/fs/smackfs/            smackfs
+
+Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/s390/hypfs/inode.c      |   12 ++++--------
+ drivers/firmware/efi/efi.c   |    6 ++----
+ fs/configfs/mount.c          |   10 ++++------
+ fs/debugfs/inode.c           |   11 ++++-------
+ fs/fuse/inode.c              |    9 +++------
+ fs/pstore/inode.c            |   12 ++++--------
+ fs/tracefs/inode.c           |    6 ++----
+ kernel/cgroup.c              |   10 ++++------
+ security/inode.c             |   10 ++++------
+ security/selinux/selinuxfs.c |   11 +++++------
+ security/smack/smackfs.c     |    8 ++++----
+ 11 files changed, 40 insertions(+), 65 deletions(-)
+
+--- a/arch/s390/hypfs/inode.c
++++ b/arch/s390/hypfs/inode.c
+@@ -456,8 +456,6 @@ static const struct super_operations hyp
+       .show_options   = hypfs_show_options,
+ };
+-static struct kobject *s390_kobj;
+-
+ static int __init hypfs_init(void)
+ {
+       int rc;
+@@ -481,18 +479,16 @@ static int __init hypfs_init(void)
+               rc = -ENODATA;
+               goto fail_hypfs_sprp_exit;
+       }
+-      s390_kobj = kobject_create_and_add("s390", hypervisor_kobj);
+-      if (!s390_kobj) {
+-              rc = -ENOMEM;
++      rc = sysfs_create_mount_point(hypervisor_kobj, "s390");
++      if (rc)
+               goto fail_hypfs_diag0c_exit;
+-      }
+       rc = register_filesystem(&hypfs_type);
+       if (rc)
+               goto fail_filesystem;
+       return 0;
+ fail_filesystem:
+-      kobject_put(s390_kobj);
++      sysfs_remove_mount_point(hypervisor_kobj, "s390");
+ fail_hypfs_diag0c_exit:
+       hypfs_diag0c_exit();
+ fail_hypfs_sprp_exit:
+@@ -510,7 +506,7 @@ fail_dbfs_exit:
+ static void __exit hypfs_exit(void)
+ {
+       unregister_filesystem(&hypfs_type);
+-      kobject_put(s390_kobj);
++      sysfs_remove_mount_point(hypervisor_kobj, "s390");
+       hypfs_diag0c_exit();
+       hypfs_sprp_exit();
+       hypfs_vm_exit();
+--- a/drivers/firmware/efi/efi.c
++++ b/drivers/firmware/efi/efi.c
+@@ -65,7 +65,6 @@ static int __init parse_efi_cmdline(char
+ early_param("efi", parse_efi_cmdline);
+ static struct kobject *efi_kobj;
+-static struct kobject *efivars_kobj;
+ /*
+  * Let's not leave out systab information that snuck into
+@@ -212,10 +211,9 @@ static int __init efisubsys_init(void)
+               goto err_remove_group;
+       /* and the standard mountpoint for efivarfs */
+-      efivars_kobj = kobject_create_and_add("efivars", efi_kobj);
+-      if (!efivars_kobj) {
++      error = sysfs_create_mount_point(efi_kobj, "efivars");
++      if (error) {
+               pr_err("efivars: Subsystem registration failed.\n");
+-              error = -ENOMEM;
+               goto err_remove_group;
+       }
+--- a/fs/configfs/mount.c
++++ b/fs/configfs/mount.c
+@@ -129,8 +129,6 @@ void configfs_release_fs(void)
+ }
+-static struct kobject *config_kobj;
+-
+ static int __init configfs_init(void)
+ {
+       int err = -ENOMEM;
+@@ -141,8 +139,8 @@ static int __init configfs_init(void)
+       if (!configfs_dir_cachep)
+               goto out;
+-      config_kobj = kobject_create_and_add("config", kernel_kobj);
+-      if (!config_kobj)
++      err = sysfs_create_mount_point(kernel_kobj, "config");
++      if (err)
+               goto out2;
+       err = register_filesystem(&configfs_fs_type);
+@@ -152,7 +150,7 @@ static int __init configfs_init(void)
+       return 0;
+ out3:
+       pr_err("Unable to register filesystem!\n");
+-      kobject_put(config_kobj);
++      sysfs_remove_mount_point(kernel_kobj, "config");
+ out2:
+       kmem_cache_destroy(configfs_dir_cachep);
+       configfs_dir_cachep = NULL;
+@@ -163,7 +161,7 @@ out:
+ static void __exit configfs_exit(void)
+ {
+       unregister_filesystem(&configfs_fs_type);
+-      kobject_put(config_kobj);
++      sysfs_remove_mount_point(kernel_kobj, "config");
+       kmem_cache_destroy(configfs_dir_cachep);
+       configfs_dir_cachep = NULL;
+ }
+--- a/fs/debugfs/inode.c
++++ b/fs/debugfs/inode.c
+@@ -716,20 +716,17 @@ bool debugfs_initialized(void)
+ }
+ EXPORT_SYMBOL_GPL(debugfs_initialized);
+-
+-static struct kobject *debug_kobj;
+-
+ static int __init debugfs_init(void)
+ {
+       int retval;
+-      debug_kobj = kobject_create_and_add("debug", kernel_kobj);
+-      if (!debug_kobj)
+-              return -EINVAL;
++      retval = sysfs_create_mount_point(kernel_kobj, "debug");
++      if (retval)
++              return retval;
+       retval = register_filesystem(&debug_fs_type);
+       if (retval)
+-              kobject_put(debug_kobj);
++              sysfs_remove_mount_point(kernel_kobj, "debug");
+       else
+               debugfs_registered = true;
+--- a/fs/fuse/inode.c
++++ b/fs/fuse/inode.c
+@@ -1238,7 +1238,6 @@ static void fuse_fs_cleanup(void)
+ }
+ static struct kobject *fuse_kobj;
+-static struct kobject *connections_kobj;
+ static int fuse_sysfs_init(void)
+ {
+@@ -1250,11 +1249,9 @@ static int fuse_sysfs_init(void)
+               goto out_err;
+       }
+-      connections_kobj = kobject_create_and_add("connections", fuse_kobj);
+-      if (!connections_kobj) {
+-              err = -ENOMEM;
++      err = sysfs_create_mount_point(fuse_kobj, "connections");
++      if (err)
+               goto out_fuse_unregister;
+-      }
+       return 0;
+@@ -1266,7 +1263,7 @@ static int fuse_sysfs_init(void)
+ static void fuse_sysfs_cleanup(void)
+ {
+-      kobject_put(connections_kobj);
++      sysfs_remove_mount_point(fuse_kobj, "connections");
+       kobject_put(fuse_kobj);
+ }
+--- a/fs/pstore/inode.c
++++ b/fs/pstore/inode.c
+@@ -461,22 +461,18 @@ static struct file_system_type pstore_fs
+       .kill_sb        = pstore_kill_sb,
+ };
+-static struct kobject *pstore_kobj;
+-
+ static int __init init_pstore_fs(void)
+ {
+-      int err = 0;
++      int err;
+       /* Create a convenient mount point for people to access pstore */
+-      pstore_kobj = kobject_create_and_add("pstore", fs_kobj);
+-      if (!pstore_kobj) {
+-              err = -ENOMEM;
++      err = sysfs_create_mount_point(fs_kobj, "pstore");
++      if (err)
+               goto out;
+-      }
+       err = register_filesystem(&pstore_fs_type);
+       if (err < 0)
+-              kobject_put(pstore_kobj);
++              sysfs_remove_mount_point(fs_kobj, "pstore");
+ out:
+       return err;
+--- a/fs/tracefs/inode.c
++++ b/fs/tracefs/inode.c
+@@ -631,14 +631,12 @@ bool tracefs_initialized(void)
+       return tracefs_registered;
+ }
+-static struct kobject *trace_kobj;
+-
+ static int __init tracefs_init(void)
+ {
+       int retval;
+-      trace_kobj = kobject_create_and_add("tracing", kernel_kobj);
+-      if (!trace_kobj)
++      retval = sysfs_create_mount_point(kernel_kobj, "tracing");
++      if (retval)
+               return -EINVAL;
+       retval = register_filesystem(&trace_fs_type);
+--- a/kernel/cgroup.c
++++ b/kernel/cgroup.c
+@@ -1924,8 +1924,6 @@ static struct file_system_type cgroup_fs
+       .kill_sb = cgroup_kill_sb,
+ };
+-static struct kobject *cgroup_kobj;
+-
+ /**
+  * task_cgroup_path - cgroup path of a task in the first cgroup hierarchy
+  * @task: target task
+@@ -5044,13 +5042,13 @@ int __init cgroup_init(void)
+                       ss->bind(init_css_set.subsys[ssid]);
+       }
+-      cgroup_kobj = kobject_create_and_add("cgroup", fs_kobj);
+-      if (!cgroup_kobj)
+-              return -ENOMEM;
++      err = sysfs_create_mount_point(fs_kobj, "cgroup");
++      if (err)
++              return err;
+       err = register_filesystem(&cgroup_fs_type);
+       if (err < 0) {
+-              kobject_put(cgroup_kobj);
++              sysfs_remove_mount_point(fs_kobj, "cgroup");
+               return err;
+       }
+--- a/security/inode.c
++++ b/security/inode.c
+@@ -215,19 +215,17 @@ void securityfs_remove(struct dentry *de
+ }
+ EXPORT_SYMBOL_GPL(securityfs_remove);
+-static struct kobject *security_kobj;
+-
+ static int __init securityfs_init(void)
+ {
+       int retval;
+-      security_kobj = kobject_create_and_add("security", kernel_kobj);
+-      if (!security_kobj)
+-              return -EINVAL;
++      retval = sysfs_create_mount_point(kernel_kobj, "security");
++      if (retval)
++              return retval;
+       retval = register_filesystem(&fs_type);
+       if (retval)
+-              kobject_put(security_kobj);
++              sysfs_remove_mount_point(kernel_kobj, "security");
+       return retval;
+ }
+--- a/security/selinux/selinuxfs.c
++++ b/security/selinux/selinuxfs.c
+@@ -1853,7 +1853,6 @@ static struct file_system_type sel_fs_ty
+ };
+ struct vfsmount *selinuxfs_mount;
+-static struct kobject *selinuxfs_kobj;
+ static int __init init_sel_fs(void)
+ {
+@@ -1862,13 +1861,13 @@ static int __init init_sel_fs(void)
+       if (!selinux_enabled)
+               return 0;
+-      selinuxfs_kobj = kobject_create_and_add("selinux", fs_kobj);
+-      if (!selinuxfs_kobj)
+-              return -ENOMEM;
++      err = sysfs_create_mount_point(fs_kobj, "selinux");
++      if (err)
++              return err;
+       err = register_filesystem(&sel_fs_type);
+       if (err) {
+-              kobject_put(selinuxfs_kobj);
++              sysfs_remove_mount_point(fs_kobj, "selinux");
+               return err;
+       }
+@@ -1887,7 +1886,7 @@ __initcall(init_sel_fs);
+ #ifdef CONFIG_SECURITY_SELINUX_DISABLE
+ void exit_sel_fs(void)
+ {
+-      kobject_put(selinuxfs_kobj);
++      sysfs_remove_mount_point(fs_kobj, "selinux");
+       kern_unmount(selinuxfs_mount);
+       unregister_filesystem(&sel_fs_type);
+ }
+--- a/security/smack/smackfs.c
++++ b/security/smack/smackfs.c
+@@ -2241,16 +2241,16 @@ static const struct file_operations smk_
+       .llseek         = generic_file_llseek,
+ };
+-static struct kset *smackfs_kset;
+ /**
+  * smk_init_sysfs - initialize /sys/fs/smackfs
+  *
+  */
+ static int smk_init_sysfs(void)
+ {
+-      smackfs_kset = kset_create_and_add("smackfs", NULL, fs_kobj);
+-      if (!smackfs_kset)
+-              return -ENOMEM;
++      int err;
++      err = sysfs_create_mount_point(fs_kobj, "smackfs");
++      if (err)
++              return err;
+       return 0;
+ }