--- /dev/null
+From fbabfd0f4ee2e8847bf56edf481249ad1bb8c44d Mon Sep 17 00:00:00 2001
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+Date: Sat, 9 May 2015 15:54:49 -0500
+Subject: fs: Add helper functions for permanently empty directories.
+
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+
+commit fbabfd0f4ee2e8847bf56edf481249ad1bb8c44d upstream.
+
+To ensure it is safe to mount proc and sysfs I need to check if
+filesystems that are mounted on top of them are mounted on truly empty
+directories. Given that some directories can gain entries over time,
+knowing that a directory is empty right now is insufficient.
+
+Therefore add supporting infrastructure for permantently empty
+directories that proc and sysfs can use when they create mount points
+for filesystems and fs_fully_visible can use to test for permanently
+empty directories to ensure that nothing will be gained by mounting a
+fresh copy of proc or sysfs.
+
+Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/libfs.c | 96 +++++++++++++++++++++++++++++++++++++++++++++++++++++
+ include/linux/fs.h | 2 +
+ 2 files changed, 98 insertions(+)
+
+--- a/fs/libfs.c
++++ b/fs/libfs.c
+@@ -1093,3 +1093,99 @@ simple_nosetlease(struct file *filp, lon
+ return -EINVAL;
+ }
+ EXPORT_SYMBOL(simple_nosetlease);
++
++
++/*
++ * Operations for a permanently empty directory.
++ */
++static struct dentry *empty_dir_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
++{
++ return ERR_PTR(-ENOENT);
++}
++
++static int empty_dir_getattr(struct vfsmount *mnt, struct dentry *dentry,
++ struct kstat *stat)
++{
++ struct inode *inode = d_inode(dentry);
++ generic_fillattr(inode, stat);
++ return 0;
++}
++
++static int empty_dir_setattr(struct dentry *dentry, struct iattr *attr)
++{
++ return -EPERM;
++}
++
++static int empty_dir_setxattr(struct dentry *dentry, const char *name,
++ const void *value, size_t size, int flags)
++{
++ return -EOPNOTSUPP;
++}
++
++static ssize_t empty_dir_getxattr(struct dentry *dentry, const char *name,
++ void *value, size_t size)
++{
++ return -EOPNOTSUPP;
++}
++
++static int empty_dir_removexattr(struct dentry *dentry, const char *name)
++{
++ return -EOPNOTSUPP;
++}
++
++static ssize_t empty_dir_listxattr(struct dentry *dentry, char *list, size_t size)
++{
++ return -EOPNOTSUPP;
++}
++
++static const struct inode_operations empty_dir_inode_operations = {
++ .lookup = empty_dir_lookup,
++ .permission = generic_permission,
++ .setattr = empty_dir_setattr,
++ .getattr = empty_dir_getattr,
++ .setxattr = empty_dir_setxattr,
++ .getxattr = empty_dir_getxattr,
++ .removexattr = empty_dir_removexattr,
++ .listxattr = empty_dir_listxattr,
++};
++
++static loff_t empty_dir_llseek(struct file *file, loff_t offset, int whence)
++{
++ /* An empty directory has two entries . and .. at offsets 0 and 1 */
++ return generic_file_llseek_size(file, offset, whence, 2, 2);
++}
++
++static int empty_dir_readdir(struct file *file, struct dir_context *ctx)
++{
++ dir_emit_dots(file, ctx);
++ return 0;
++}
++
++static const struct file_operations empty_dir_operations = {
++ .llseek = empty_dir_llseek,
++ .read = generic_read_dir,
++ .iterate = empty_dir_readdir,
++ .fsync = noop_fsync,
++};
++
++
++void make_empty_dir_inode(struct inode *inode)
++{
++ set_nlink(inode, 2);
++ inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO;
++ inode->i_uid = GLOBAL_ROOT_UID;
++ inode->i_gid = GLOBAL_ROOT_GID;
++ inode->i_rdev = 0;
++ inode->i_size = 2;
++ inode->i_blkbits = PAGE_SHIFT;
++ inode->i_blocks = 0;
++
++ inode->i_op = &empty_dir_inode_operations;
++ inode->i_fop = &empty_dir_operations;
++}
++
++bool is_empty_dir_inode(struct inode *inode)
++{
++ return (inode->i_fop == &empty_dir_operations) &&
++ (inode->i_op == &empty_dir_inode_operations);
++}
+--- a/include/linux/fs.h
++++ b/include/linux/fs.h
+@@ -2721,6 +2721,8 @@ extern struct dentry *simple_lookup(stru
+ extern ssize_t generic_read_dir(struct file *, char __user *, size_t, loff_t *);
+ extern const struct file_operations simple_dir_operations;
+ extern const struct inode_operations simple_dir_inode_operations;
++extern void make_empty_dir_inode(struct inode *inode);
++extern bool is_empty_dir_inode(struct inode *inode);
+ struct tree_descr { char *name; const struct file_operations *ops; int mode; };
+ struct dentry *d_alloc_name(struct dentry *, const char *);
+ extern int simple_fill_super(struct super_block *, unsigned long, struct tree_descr *);
--- /dev/null
+From ea015218f2f7ace2dad9cedd21ed95bdba2886d7 Mon Sep 17 00:00:00 2001
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+Date: Wed, 13 May 2015 16:09:29 -0500
+Subject: kernfs: Add support for always empty directories.
+
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+
+commit ea015218f2f7ace2dad9cedd21ed95bdba2886d7 upstream.
+
+Add a new function kernfs_create_empty_dir that can be used to create
+directory that can not be modified.
+
+Update the code to use make_empty_dir_inode when reporting a
+permanently empty directory to the vfs.
+
+Update the code to not allow adding to permanently empty directories.
+
+Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/kernfs/dir.c | 38 +++++++++++++++++++++++++++++++++++++-
+ fs/kernfs/inode.c | 2 ++
+ include/linux/kernfs.h | 3 +++
+ 3 files changed, 42 insertions(+), 1 deletion(-)
+
+--- a/fs/kernfs/dir.c
++++ b/fs/kernfs/dir.c
+@@ -592,6 +592,9 @@ int kernfs_add_one(struct kernfs_node *k
+ goto out_unlock;
+
+ ret = -ENOENT;
++ if (parent->flags & KERNFS_EMPTY_DIR)
++ goto out_unlock;
++
+ if ((parent->flags & KERNFS_ACTIVATED) && !kernfs_active(parent))
+ goto out_unlock;
+
+@@ -783,6 +786,38 @@ struct kernfs_node *kernfs_create_dir_ns
+ return ERR_PTR(rc);
+ }
+
++/**
++ * kernfs_create_empty_dir - create an always empty directory
++ * @parent: parent in which to create a new directory
++ * @name: name of the new directory
++ *
++ * Returns the created node on success, ERR_PTR() value on failure.
++ */
++struct kernfs_node *kernfs_create_empty_dir(struct kernfs_node *parent,
++ const char *name)
++{
++ struct kernfs_node *kn;
++ int rc;
++
++ /* allocate */
++ kn = kernfs_new_node(parent, name, S_IRUGO|S_IXUGO|S_IFDIR, KERNFS_DIR);
++ if (!kn)
++ return ERR_PTR(-ENOMEM);
++
++ kn->flags |= KERNFS_EMPTY_DIR;
++ kn->dir.root = parent->dir.root;
++ kn->ns = NULL;
++ kn->priv = NULL;
++
++ /* link in */
++ rc = kernfs_add_one(kn);
++ if (!rc)
++ return kn;
++
++ kernfs_put(kn);
++ return ERR_PTR(rc);
++}
++
+ static struct dentry *kernfs_iop_lookup(struct inode *dir,
+ struct dentry *dentry,
+ unsigned int flags)
+@@ -1254,7 +1289,8 @@ int kernfs_rename_ns(struct kernfs_node
+ mutex_lock(&kernfs_mutex);
+
+ error = -ENOENT;
+- if (!kernfs_active(kn) || !kernfs_active(new_parent))
++ if (!kernfs_active(kn) || !kernfs_active(new_parent) ||
++ (new_parent->flags & KERNFS_EMPTY_DIR))
+ goto out;
+
+ error = 0;
+--- a/fs/kernfs/inode.c
++++ b/fs/kernfs/inode.c
+@@ -296,6 +296,8 @@ static void kernfs_init_inode(struct ker
+ case KERNFS_DIR:
+ inode->i_op = &kernfs_dir_iops;
+ inode->i_fop = &kernfs_dir_fops;
++ if (kn->flags & KERNFS_EMPTY_DIR)
++ make_empty_dir_inode(inode);
+ break;
+ case KERNFS_FILE:
+ inode->i_size = kn->attr.size;
+--- a/include/linux/kernfs.h
++++ b/include/linux/kernfs.h
+@@ -45,6 +45,7 @@ enum kernfs_node_flag {
+ KERNFS_LOCKDEP = 0x0100,
+ KERNFS_SUICIDAL = 0x0400,
+ KERNFS_SUICIDED = 0x0800,
++ KERNFS_EMPTY_DIR = 0x1000,
+ };
+
+ /* @flags for kernfs_create_root() */
+@@ -285,6 +286,8 @@ void kernfs_destroy_root(struct kernfs_r
+ struct kernfs_node *kernfs_create_dir_ns(struct kernfs_node *parent,
+ const char *name, umode_t mode,
+ void *priv, const void *ns);
++struct kernfs_node *kernfs_create_empty_dir(struct kernfs_node *parent,
++ const char *name);
+ struct kernfs_node *__kernfs_create_file(struct kernfs_node *parent,
+ const char *name,
+ umode_t mode, loff_t size,
--- /dev/null
+From 8c6cf9cc829fcd0b179b59f7fe288941d0e31108 Mon Sep 17 00:00:00 2001
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+Date: Fri, 8 May 2015 23:49:47 -0500
+Subject: mnt: Modify fs_fully_visible to deal with locked ro nodev and atime
+
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+
+commit 8c6cf9cc829fcd0b179b59f7fe288941d0e31108 upstream.
+
+Ignore an existing mount if the locked readonly, nodev or atime
+attributes are less permissive than the desired attributes
+of the new mount.
+
+On success ensure the new mount locks all of the same readonly, nodev and
+atime attributes as the old mount.
+
+The nosuid and noexec attributes are not checked here as this change
+is destined for stable and enforcing those attributes causes a
+regression in lxc and libvirt-lxc where those applications will not
+start and there are no known executables on sysfs or proc and no known
+way to create exectuables without code modifications
+
+Fixes: e51db73532955 ("userns: Better restrictions on when proc and sysfs can be mounted")
+Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/namespace.c | 24 +++++++++++++++++++++---
+ 1 file changed, 21 insertions(+), 3 deletions(-)
+
+--- a/fs/namespace.c
++++ b/fs/namespace.c
+@@ -2334,7 +2334,7 @@ unlock:
+ return err;
+ }
+
+-static bool fs_fully_visible(struct file_system_type *fs_type);
++static bool fs_fully_visible(struct file_system_type *fs_type, int *new_mnt_flags);
+
+ /*
+ * create a new mount for userspace and request it to be added into the
+@@ -2368,7 +2368,7 @@ static int do_new_mount(struct path *pat
+ mnt_flags |= MNT_NODEV | MNT_LOCK_NODEV;
+ }
+ if (type->fs_flags & FS_USERNS_VISIBLE) {
+- if (!fs_fully_visible(type))
++ if (!fs_fully_visible(type, &mnt_flags))
+ return -EPERM;
+ }
+ }
+@@ -3172,9 +3172,10 @@ bool current_chrooted(void)
+ return chrooted;
+ }
+
+-static bool fs_fully_visible(struct file_system_type *type)
++static bool fs_fully_visible(struct file_system_type *type, int *new_mnt_flags)
+ {
+ struct mnt_namespace *ns = current->nsproxy->mnt_ns;
++ int new_flags = *new_mnt_flags;
+ struct mount *mnt;
+ bool visible = false;
+
+@@ -3193,6 +3194,19 @@ static bool fs_fully_visible(struct file
+ if (mnt->mnt.mnt_root != mnt->mnt.mnt_sb->s_root)
+ continue;
+
++ /* Verify the mount flags are equal to or more permissive
++ * than the proposed new mount.
++ */
++ if ((mnt->mnt.mnt_flags & MNT_LOCK_READONLY) &&
++ !(new_flags & MNT_READONLY))
++ continue;
++ if ((mnt->mnt.mnt_flags & MNT_LOCK_NODEV) &&
++ !(new_flags & MNT_NODEV))
++ continue;
++ if ((mnt->mnt.mnt_flags & MNT_LOCK_ATIME) &&
++ ((mnt->mnt.mnt_flags & MNT_ATIME_MASK) != (new_flags & MNT_ATIME_MASK)))
++ continue;
++
+ /* This mount is not fully visible if there are any
+ * locked child mounts that cover anything except for
+ * empty directories.
+@@ -3206,6 +3220,10 @@ static bool fs_fully_visible(struct file
+ if (!is_empty_dir_inode(inode))
+ goto next;
+ }
++ /* Preserve the locked attributes */
++ *new_mnt_flags |= mnt->mnt.mnt_flags & (MNT_LOCK_READONLY | \
++ MNT_LOCK_NODEV | \
++ MNT_LOCK_ATIME);
+ visible = true;
+ goto found;
+ next: ;
--- /dev/null
+From 1b852bceb0d111e510d1a15826ecc4a19358d512 Mon Sep 17 00:00:00 2001
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+Date: Fri, 8 May 2015 23:22:29 -0500
+Subject: mnt: Refactor the logic for mounting sysfs and proc in a user namespace
+
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+
+commit 1b852bceb0d111e510d1a15826ecc4a19358d512 upstream.
+
+Fresh mounts of proc and sysfs are a very special case that works very
+much like a bind mount. Unfortunately the current structure can not
+preserve the MNT_LOCK... mount flags. Therefore refactor the logic
+into a form that can be modified to preserve those lock bits.
+
+Add a new filesystem flag FS_USERNS_VISIBLE that requires some mount
+of the filesystem be fully visible in the current mount namespace,
+before the filesystem may be mounted.
+
+Move the logic for calling fs_fully_visible from proc and sysfs into
+fs/namespace.c where it has greater access to mount namespace state.
+
+Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/namespace.c | 8 +++++++-
+ fs/proc/root.c | 5 +----
+ fs/sysfs/mount.c | 5 +----
+ include/linux/fs.h | 2 +-
+ 4 files changed, 10 insertions(+), 10 deletions(-)
+
+--- a/fs/namespace.c
++++ b/fs/namespace.c
+@@ -2334,6 +2334,8 @@ unlock:
+ return err;
+ }
+
++static bool fs_fully_visible(struct file_system_type *fs_type);
++
+ /*
+ * create a new mount for userspace and request it to be added into the
+ * namespace's tree
+@@ -2365,6 +2367,10 @@ static int do_new_mount(struct path *pat
+ flags |= MS_NODEV;
+ mnt_flags |= MNT_NODEV | MNT_LOCK_NODEV;
+ }
++ if (type->fs_flags & FS_USERNS_VISIBLE) {
++ if (!fs_fully_visible(type))
++ return -EPERM;
++ }
+ }
+
+ mnt = vfs_kern_mount(type, flags, name, data);
+@@ -3166,7 +3172,7 @@ bool current_chrooted(void)
+ return chrooted;
+ }
+
+-bool fs_fully_visible(struct file_system_type *type)
++static bool fs_fully_visible(struct file_system_type *type)
+ {
+ struct mnt_namespace *ns = current->nsproxy->mnt_ns;
+ struct mount *mnt;
+--- a/fs/proc/root.c
++++ b/fs/proc/root.c
+@@ -112,9 +112,6 @@ static struct dentry *proc_mount(struct
+ ns = task_active_pid_ns(current);
+ options = data;
+
+- if (!capable(CAP_SYS_ADMIN) && !fs_fully_visible(fs_type))
+- return ERR_PTR(-EPERM);
+-
+ /* Does the mounter have privilege over the pid namespace? */
+ if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN))
+ return ERR_PTR(-EPERM);
+@@ -159,7 +156,7 @@ static struct file_system_type proc_fs_t
+ .name = "proc",
+ .mount = proc_mount,
+ .kill_sb = proc_kill_sb,
+- .fs_flags = FS_USERNS_MOUNT,
++ .fs_flags = FS_USERNS_VISIBLE | FS_USERNS_MOUNT,
+ };
+
+ void __init proc_root_init(void)
+--- a/fs/sysfs/mount.c
++++ b/fs/sysfs/mount.c
+@@ -31,9 +31,6 @@ static struct dentry *sysfs_mount(struct
+ bool new_sb;
+
+ if (!(flags & MS_KERNMOUNT)) {
+- if (!capable(CAP_SYS_ADMIN) && !fs_fully_visible(fs_type))
+- return ERR_PTR(-EPERM);
+-
+ if (!kobj_ns_current_may_mount(KOBJ_NS_TYPE_NET))
+ return ERR_PTR(-EPERM);
+ }
+@@ -58,7 +55,7 @@ static struct file_system_type sysfs_fs_
+ .name = "sysfs",
+ .mount = sysfs_mount,
+ .kill_sb = sysfs_kill_sb,
+- .fs_flags = FS_USERNS_MOUNT,
++ .fs_flags = FS_USERNS_VISIBLE | FS_USERNS_MOUNT,
+ };
+
+ int __init sysfs_init(void)
+--- a/include/linux/fs.h
++++ b/include/linux/fs.h
+@@ -1863,6 +1863,7 @@ struct file_system_type {
+ #define FS_HAS_SUBTYPE 4
+ #define FS_USERNS_MOUNT 8 /* Can be mounted by userns root */
+ #define FS_USERNS_DEV_MOUNT 16 /* A userns mount does not imply MNT_NODEV */
++#define FS_USERNS_VISIBLE 32 /* FS must already be visible */
+ #define FS_RENAME_DOES_D_MOVE 32768 /* FS will handle d_move() during rename() internally. */
+ struct dentry *(*mount) (struct file_system_type *, int,
+ const char *, void *);
+@@ -1950,7 +1951,6 @@ extern int vfs_ustat(dev_t, struct kstat
+ extern int freeze_super(struct super_block *super);
+ extern int thaw_super(struct super_block *super);
+ extern bool our_mnt(struct vfsmount *mnt);
+-extern bool fs_fully_visible(struct file_system_type *);
+
+ extern int current_umask(void);
+
--- /dev/null
+From 7236c85e1be51a9e25ba0f6e087a66ca89605a49 Mon Sep 17 00:00:00 2001
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+Date: Wed, 13 May 2015 20:51:09 -0500
+Subject: mnt: Update fs_fully_visible to test for permanently empty directories
+
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+
+commit 7236c85e1be51a9e25ba0f6e087a66ca89605a49 upstream.
+
+fs_fully_visible attempts to make fresh mounts of proc and sysfs give
+the mounter no more access to proc and sysfs than if they could have
+by creating a bind mount. One aspect of proc and sysfs that makes
+this particularly tricky is that there are other filesystems that
+typically mount on top of proc and sysfs. As those filesystems are
+mounted on empty directories in practice it is safe to ignore them.
+However testing to ensure filesystems are mounted on empty directories
+has not been something the in kernel data structures have supported so
+the current test for an empty directory which checks to see
+if nlink <= 2 is a bit lacking.
+
+proc and sysfs have recently been modified to use the new empty_dir
+infrastructure to create all of their dedicated mount points. Instead
+of testing for S_ISDIR(inode->i_mode) && i_nlink <= 2 to see if a
+directory is empty, test for is_empty_dir_inode(inode). That small
+change guaranteess mounts found on proc and sysfs really are safe to
+ignore, because the directories are not only empty but nothing can
+ever be added to them. This guarantees there is nothing to worry
+about when mounting proc and sysfs.
+
+Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/namespace.c | 5 ++---
+ 1 file changed, 2 insertions(+), 3 deletions(-)
+
+--- a/fs/namespace.c
++++ b/fs/namespace.c
+@@ -3196,9 +3196,8 @@ bool fs_fully_visible(struct file_system
+ /* Only worry about locked mounts */
+ if (!(mnt->mnt.mnt_flags & MNT_LOCKED))
+ continue;
+- if (!S_ISDIR(inode->i_mode))
+- goto next;
+- if (inode->i_nlink > 2)
++ /* Is the directory permanetly empty? */
++ if (!is_empty_dir_inode(inode))
+ goto next;
+ }
+ visible = true;
--- /dev/null
+From eb6d38d5427b3ad42f5268da0f1dd31bb0af1264 Mon Sep 17 00:00:00 2001
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+Date: Mon, 11 May 2015 16:44:25 -0500
+Subject: proc: Allow creating permanently empty directories that serve as mount points
+
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+
+commit eb6d38d5427b3ad42f5268da0f1dd31bb0af1264 upstream.
+
+Add a new function proc_create_mount_point that when used to creates a
+directory that can not be added to.
+
+Add a new function is_empty_pde to test if a function is a mount
+point.
+
+Update the code to use make_empty_dir_inode when reporting
+a permanently empty directory to the vfs.
+
+Update the code to not allow adding to permanently empty directories.
+
+Update /proc/openprom and /proc/fs/nfsd to be permanently empty directories.
+
+Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/proc/generic.c | 23 +++++++++++++++++++++++
+ fs/proc/inode.c | 4 ++++
+ fs/proc/internal.h | 6 ++++++
+ fs/proc/root.c | 4 ++--
+ 4 files changed, 35 insertions(+), 2 deletions(-)
+
+--- a/fs/proc/generic.c
++++ b/fs/proc/generic.c
+@@ -373,6 +373,10 @@ static struct proc_dir_entry *__proc_cre
+ WARN(1, "create '/proc/%s' by hand\n", qstr.name);
+ return NULL;
+ }
++ if (is_empty_pde(*parent)) {
++ WARN(1, "attempt to add to permanently empty directory");
++ return NULL;
++ }
+
+ ent = kzalloc(sizeof(struct proc_dir_entry) + qstr.len + 1, GFP_KERNEL);
+ if (!ent)
+@@ -455,6 +459,25 @@ struct proc_dir_entry *proc_mkdir(const
+ }
+ EXPORT_SYMBOL(proc_mkdir);
+
++struct proc_dir_entry *proc_create_mount_point(const char *name)
++{
++ umode_t mode = S_IFDIR | S_IRUGO | S_IXUGO;
++ struct proc_dir_entry *ent, *parent = NULL;
++
++ ent = __proc_create(&parent, name, mode, 2);
++ if (ent) {
++ ent->data = NULL;
++ ent->proc_fops = NULL;
++ ent->proc_iops = NULL;
++ if (proc_register(parent, ent) < 0) {
++ kfree(ent);
++ parent->nlink--;
++ ent = NULL;
++ }
++ }
++ return ent;
++}
++
+ struct proc_dir_entry *proc_create_data(const char *name, umode_t mode,
+ struct proc_dir_entry *parent,
+ const struct file_operations *proc_fops,
+--- a/fs/proc/inode.c
++++ b/fs/proc/inode.c
+@@ -423,6 +423,10 @@ struct inode *proc_get_inode(struct supe
+ inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
+ PROC_I(inode)->pde = de;
+
++ if (is_empty_pde(de)) {
++ make_empty_dir_inode(inode);
++ return inode;
++ }
+ if (de->mode) {
+ inode->i_mode = de->mode;
+ inode->i_uid = de->uid;
+--- a/fs/proc/internal.h
++++ b/fs/proc/internal.h
+@@ -191,6 +191,12 @@ static inline struct proc_dir_entry *pde
+ }
+ extern void pde_put(struct proc_dir_entry *);
+
++static inline bool is_empty_pde(const struct proc_dir_entry *pde)
++{
++ return S_ISDIR(pde->mode) && !pde->proc_iops;
++}
++struct proc_dir_entry *proc_create_mount_point(const char *name);
++
+ /*
+ * inode.c
+ */
+--- a/fs/proc/root.c
++++ b/fs/proc/root.c
+@@ -182,10 +182,10 @@ void __init proc_root_init(void)
+ #endif
+ proc_mkdir("fs", NULL);
+ proc_mkdir("driver", NULL);
+- proc_mkdir("fs/nfsd", NULL); /* somewhere for the nfsd filesystem to be mounted */
++ proc_create_mount_point("fs/nfsd"); /* somewhere for the nfsd filesystem to be mounted */
+ #if defined(CONFIG_SUN_OPENPROMFS) || defined(CONFIG_SUN_OPENPROMFS_MODULE)
+ /* just give it a mountpoint */
+- proc_mkdir("openprom", NULL);
++ proc_create_mount_point("openprom");
+ #endif
+ proc_tty_init();
+ proc_mkdir("bus", NULL);
--- /dev/null
+fs-add-helper-functions-for-permanently-empty-directories.patch
+sysctl-allow-creating-permanently-empty-directories-that-serve-as-mountpoints.patch
+proc-allow-creating-permanently-empty-directories-that-serve-as-mount-points.patch
+kernfs-add-support-for-always-empty-directories.patch
+sysfs-add-support-for-permanently-empty-directories-to-serve-as-mount-points.patch
+mnt-update-fs_fully_visible-to-test-for-permanently-empty-directories.patch
+mnt-refactor-the-logic-for-mounting-sysfs-and-proc-in-a-user-namespace.patch
+mnt-modify-fs_fully_visible-to-deal-with-locked-ro-nodev-and-atime.patch
+sysfs-create-mountpoints-with-sysfs_create_mount_point.patch
--- /dev/null
+From f9bd6733d3f11e24f3949becf277507d422ee1eb Mon Sep 17 00:00:00 2001
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+Date: Sat, 9 May 2015 22:09:14 -0500
+Subject: sysctl: Allow creating permanently empty directories that serve as mountpoints.
+
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+
+commit f9bd6733d3f11e24f3949becf277507d422ee1eb upstream.
+
+Add a magic sysctl table sysctl_mount_point that when used to
+create a directory forces that directory to be permanently empty.
+
+Update the code to use make_empty_dir_inode when accessing permanently
+empty directories.
+
+Update the code to not allow adding to permanently empty directories.
+
+Update /proc/sys/fs/binfmt_misc to be a permanently empty directory.
+
+Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/proc/proc_sysctl.c | 37 +++++++++++++++++++++++++++++++++++++
+ include/linux/sysctl.h | 3 +++
+ kernel/sysctl.c | 8 +-------
+ 3 files changed, 41 insertions(+), 7 deletions(-)
+
+--- a/fs/proc/proc_sysctl.c
++++ b/fs/proc/proc_sysctl.c
+@@ -19,6 +19,28 @@ static const struct inode_operations pro
+ static const struct file_operations proc_sys_dir_file_operations;
+ static const struct inode_operations proc_sys_dir_operations;
+
++/* Support for permanently empty directories */
++
++struct ctl_table sysctl_mount_point[] = {
++ { }
++};
++
++static bool is_empty_dir(struct ctl_table_header *head)
++{
++ return head->ctl_table[0].child == sysctl_mount_point;
++}
++
++static void set_empty_dir(struct ctl_dir *dir)
++{
++ dir->header.ctl_table[0].child = sysctl_mount_point;
++}
++
++static void clear_empty_dir(struct ctl_dir *dir)
++
++{
++ dir->header.ctl_table[0].child = NULL;
++}
++
+ void proc_sys_poll_notify(struct ctl_table_poll *poll)
+ {
+ if (!poll)
+@@ -187,6 +209,17 @@ static int insert_header(struct ctl_dir
+ struct ctl_table *entry;
+ int err;
+
++ /* Is this a permanently empty directory? */
++ if (is_empty_dir(&dir->header))
++ return -EROFS;
++
++ /* Am I creating a permanently empty directory? */
++ if (header->ctl_table == sysctl_mount_point) {
++ if (!RB_EMPTY_ROOT(&dir->root))
++ return -EINVAL;
++ set_empty_dir(dir);
++ }
++
+ dir->header.nreg++;
+ header->parent = dir;
+ err = insert_links(header);
+@@ -202,6 +235,8 @@ fail:
+ erase_header(header);
+ put_links(header);
+ fail_links:
++ if (header->ctl_table == sysctl_mount_point)
++ clear_empty_dir(dir);
+ header->parent = NULL;
+ drop_sysctl_table(&dir->header);
+ return err;
+@@ -419,6 +454,8 @@ static struct inode *proc_sys_make_inode
+ inode->i_mode |= S_IFDIR;
+ inode->i_op = &proc_sys_dir_operations;
+ inode->i_fop = &proc_sys_dir_file_operations;
++ if (is_empty_dir(head))
++ make_empty_dir_inode(inode);
+ }
+ out:
+ return inode;
+--- a/include/linux/sysctl.h
++++ b/include/linux/sysctl.h
+@@ -188,6 +188,9 @@ struct ctl_table_header *register_sysctl
+ void unregister_sysctl_table(struct ctl_table_header * table);
+
+ extern int sysctl_init(void);
++
++extern struct ctl_table sysctl_mount_point[];
++
+ #else /* CONFIG_SYSCTL */
+ static inline struct ctl_table_header *register_sysctl_table(struct ctl_table * table)
+ {
+--- a/kernel/sysctl.c
++++ b/kernel/sysctl.c
+@@ -1510,12 +1510,6 @@ static struct ctl_table vm_table[] = {
+ { }
+ };
+
+-#if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE)
+-static struct ctl_table binfmt_misc_table[] = {
+- { }
+-};
+-#endif
+-
+ static struct ctl_table fs_table[] = {
+ {
+ .procname = "inode-nr",
+@@ -1669,7 +1663,7 @@ static struct ctl_table fs_table[] = {
+ {
+ .procname = "binfmt_misc",
+ .mode = 0555,
+- .child = binfmt_misc_table,
++ .child = sysctl_mount_point,
+ },
+ #endif
+ {
--- /dev/null
+From 87d2846fcf88113fae2341da1ca9a71f0d916f2c Mon Sep 17 00:00:00 2001
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+Date: Wed, 13 May 2015 16:31:40 -0500
+Subject: sysfs: Add support for permanently empty directories to serve as mount points.
+
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+
+commit 87d2846fcf88113fae2341da1ca9a71f0d916f2c upstream.
+
+Add two functions sysfs_create_mount_point and
+sysfs_remove_mount_point that hang a permanently empty directory off
+of a kobject or remove a permanently emptpy directory hanging from a
+kobject. Export these new functions so modular filesystems can use
+them.
+
+Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/sysfs/dir.c | 34 ++++++++++++++++++++++++++++++++++
+ include/linux/sysfs.h | 15 +++++++++++++++
+ 2 files changed, 49 insertions(+)
+
+--- a/fs/sysfs/dir.c
++++ b/fs/sysfs/dir.c
+@@ -121,3 +121,37 @@ int sysfs_move_dir_ns(struct kobject *ko
+
+ return kernfs_rename_ns(kn, new_parent, kn->name, new_ns);
+ }
++
++/**
++ * sysfs_create_mount_point - create an always empty directory
++ * @parent_kobj: kobject that will contain this always empty directory
++ * @name: The name of the always empty directory to add
++ */
++int sysfs_create_mount_point(struct kobject *parent_kobj, const char *name)
++{
++ struct kernfs_node *kn, *parent = parent_kobj->sd;
++
++ kn = kernfs_create_empty_dir(parent, name);
++ if (IS_ERR(kn)) {
++ if (PTR_ERR(kn) == -EEXIST)
++ sysfs_warn_dup(parent, name);
++ return PTR_ERR(kn);
++ }
++
++ return 0;
++}
++EXPORT_SYMBOL_GPL(sysfs_create_mount_point);
++
++/**
++ * sysfs_remove_mount_point - remove an always empty directory.
++ * @parent_kobj: kobject that will contain this always empty directory
++ * @name: The name of the always empty directory to remove
++ *
++ */
++void sysfs_remove_mount_point(struct kobject *parent_kobj, const char *name)
++{
++ struct kernfs_node *parent = parent_kobj->sd;
++
++ kernfs_remove_by_name_ns(parent, name, NULL);
++}
++EXPORT_SYMBOL_GPL(sysfs_remove_mount_point);
+--- a/include/linux/sysfs.h
++++ b/include/linux/sysfs.h
+@@ -195,6 +195,10 @@ int __must_check sysfs_rename_dir_ns(str
+ int __must_check sysfs_move_dir_ns(struct kobject *kobj,
+ struct kobject *new_parent_kobj,
+ const void *new_ns);
++int __must_check sysfs_create_mount_point(struct kobject *parent_kobj,
++ const char *name);
++void sysfs_remove_mount_point(struct kobject *parent_kobj,
++ const char *name);
+
+ int __must_check sysfs_create_file_ns(struct kobject *kobj,
+ const struct attribute *attr,
+@@ -283,6 +287,17 @@ static inline int sysfs_move_dir_ns(stru
+ return 0;
+ }
+
++static inline int sysfs_create_mount_point(struct kobject *parent_kobj,
++ const char *name)
++{
++ return 0;
++}
++
++static inline void sysfs_remove_mount_point(struct kobject *parent_kobj,
++ const char *name)
++{
++}
++
+ static inline int sysfs_create_file_ns(struct kobject *kobj,
+ const struct attribute *attr,
+ const void *ns)
--- /dev/null
+From f9bb48825a6b5d02f4cabcc78967c75db903dcdc Mon Sep 17 00:00:00 2001
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+Date: Wed, 13 May 2015 17:35:41 -0500
+Subject: sysfs: Create mountpoints with sysfs_create_mount_point
+
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+
+commit f9bb48825a6b5d02f4cabcc78967c75db903dcdc upstream.
+
+This allows for better documentation in the code and
+it allows for a simpler and fully correct version of
+fs_fully_visible to be written.
+
+The mount points converted and their filesystems are:
+/sys/hypervisor/s390/ s390_hypfs
+/sys/kernel/config/ configfs
+/sys/kernel/debug/ debugfs
+/sys/firmware/efi/efivars/ efivarfs
+/sys/fs/fuse/connections/ fusectl
+/sys/fs/pstore/ pstore
+/sys/kernel/tracing/ tracefs
+/sys/fs/cgroup/ cgroup
+/sys/kernel/security/ securityfs
+/sys/fs/selinux/ selinuxfs
+/sys/fs/smackfs/ smackfs
+
+Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/s390/hypfs/inode.c | 12 ++++--------
+ drivers/firmware/efi/efi.c | 6 ++----
+ fs/configfs/mount.c | 10 ++++------
+ fs/debugfs/inode.c | 11 ++++-------
+ fs/fuse/inode.c | 9 +++------
+ fs/pstore/inode.c | 12 ++++--------
+ kernel/cgroup.c | 10 ++++------
+ security/inode.c | 10 ++++------
+ security/selinux/selinuxfs.c | 11 +++++------
+ security/smack/smackfs.c | 8 ++++----
+ 10 files changed, 38 insertions(+), 61 deletions(-)
+
+--- a/arch/s390/hypfs/inode.c
++++ b/arch/s390/hypfs/inode.c
+@@ -458,8 +458,6 @@ static const struct super_operations hyp
+ .show_options = hypfs_show_options,
+ };
+
+-static struct kobject *s390_kobj;
+-
+ static int __init hypfs_init(void)
+ {
+ int rc;
+@@ -483,18 +481,16 @@ static int __init hypfs_init(void)
+ rc = -ENODATA;
+ goto fail_hypfs_sprp_exit;
+ }
+- s390_kobj = kobject_create_and_add("s390", hypervisor_kobj);
+- if (!s390_kobj) {
+- rc = -ENOMEM;
++ rc = sysfs_create_mount_point(hypervisor_kobj, "s390");
++ if (rc)
+ goto fail_hypfs_diag0c_exit;
+- }
+ rc = register_filesystem(&hypfs_type);
+ if (rc)
+ goto fail_filesystem;
+ return 0;
+
+ fail_filesystem:
+- kobject_put(s390_kobj);
++ sysfs_remove_mount_point(hypervisor_kobj, "s390");
+ fail_hypfs_diag0c_exit:
+ hypfs_diag0c_exit();
+ fail_hypfs_sprp_exit:
+@@ -512,7 +508,7 @@ fail_dbfs_exit:
+ static void __exit hypfs_exit(void)
+ {
+ unregister_filesystem(&hypfs_type);
+- kobject_put(s390_kobj);
++ sysfs_remove_mount_point(hypervisor_kobj, "s390");
+ hypfs_diag0c_exit();
+ hypfs_sprp_exit();
+ hypfs_vm_exit();
+--- a/drivers/firmware/efi/efi.c
++++ b/drivers/firmware/efi/efi.c
+@@ -65,7 +65,6 @@ static int __init parse_efi_cmdline(char
+ early_param("efi", parse_efi_cmdline);
+
+ static struct kobject *efi_kobj;
+-static struct kobject *efivars_kobj;
+
+ /*
+ * Let's not leave out systab information that snuck into
+@@ -212,10 +211,9 @@ static int __init efisubsys_init(void)
+ goto err_remove_group;
+
+ /* and the standard mountpoint for efivarfs */
+- efivars_kobj = kobject_create_and_add("efivars", efi_kobj);
+- if (!efivars_kobj) {
++ error = sysfs_create_mount_point(efi_kobj, "efivars");
++ if (error) {
+ pr_err("efivars: Subsystem registration failed.\n");
+- error = -ENOMEM;
+ goto err_remove_group;
+ }
+
+--- a/fs/configfs/mount.c
++++ b/fs/configfs/mount.c
+@@ -129,8 +129,6 @@ void configfs_release_fs(void)
+ }
+
+
+-static struct kobject *config_kobj;
+-
+ static int __init configfs_init(void)
+ {
+ int err = -ENOMEM;
+@@ -141,8 +139,8 @@ static int __init configfs_init(void)
+ if (!configfs_dir_cachep)
+ goto out;
+
+- config_kobj = kobject_create_and_add("config", kernel_kobj);
+- if (!config_kobj)
++ err = sysfs_create_mount_point(kernel_kobj, "config");
++ if (err)
+ goto out2;
+
+ err = register_filesystem(&configfs_fs_type);
+@@ -152,7 +150,7 @@ static int __init configfs_init(void)
+ return 0;
+ out3:
+ pr_err("Unable to register filesystem!\n");
+- kobject_put(config_kobj);
++ sysfs_remove_mount_point(kernel_kobj, "config");
+ out2:
+ kmem_cache_destroy(configfs_dir_cachep);
+ configfs_dir_cachep = NULL;
+@@ -163,7 +161,7 @@ out:
+ static void __exit configfs_exit(void)
+ {
+ unregister_filesystem(&configfs_fs_type);
+- kobject_put(config_kobj);
++ sysfs_remove_mount_point(kernel_kobj, "config");
+ kmem_cache_destroy(configfs_dir_cachep);
+ configfs_dir_cachep = NULL;
+ }
+--- a/fs/debugfs/inode.c
++++ b/fs/debugfs/inode.c
+@@ -713,20 +713,17 @@ bool debugfs_initialized(void)
+ }
+ EXPORT_SYMBOL_GPL(debugfs_initialized);
+
+-
+-static struct kobject *debug_kobj;
+-
+ static int __init debugfs_init(void)
+ {
+ int retval;
+
+- debug_kobj = kobject_create_and_add("debug", kernel_kobj);
+- if (!debug_kobj)
+- return -EINVAL;
++ retval = sysfs_create_mount_point(kernel_kobj, "debug");
++ if (retval)
++ return retval;
+
+ retval = register_filesystem(&debug_fs_type);
+ if (retval)
+- kobject_put(debug_kobj);
++ sysfs_remove_mount_point(kernel_kobj, "debug");
+ else
+ debugfs_registered = true;
+
+--- a/fs/fuse/inode.c
++++ b/fs/fuse/inode.c
+@@ -1238,7 +1238,6 @@ static void fuse_fs_cleanup(void)
+ }
+
+ static struct kobject *fuse_kobj;
+-static struct kobject *connections_kobj;
+
+ static int fuse_sysfs_init(void)
+ {
+@@ -1250,11 +1249,9 @@ static int fuse_sysfs_init(void)
+ goto out_err;
+ }
+
+- connections_kobj = kobject_create_and_add("connections", fuse_kobj);
+- if (!connections_kobj) {
+- err = -ENOMEM;
++ err = sysfs_create_mount_point(fuse_kobj, "connections");
++ if (err)
+ goto out_fuse_unregister;
+- }
+
+ return 0;
+
+@@ -1266,7 +1263,7 @@ static int fuse_sysfs_init(void)
+
+ static void fuse_sysfs_cleanup(void)
+ {
+- kobject_put(connections_kobj);
++ sysfs_remove_mount_point(fuse_kobj, "connections");
+ kobject_put(fuse_kobj);
+ }
+
+--- a/fs/pstore/inode.c
++++ b/fs/pstore/inode.c
+@@ -458,22 +458,18 @@ static struct file_system_type pstore_fs
+ .kill_sb = pstore_kill_sb,
+ };
+
+-static struct kobject *pstore_kobj;
+-
+ static int __init init_pstore_fs(void)
+ {
+- int err = 0;
++ int err;
+
+ /* Create a convenient mount point for people to access pstore */
+- pstore_kobj = kobject_create_and_add("pstore", fs_kobj);
+- if (!pstore_kobj) {
+- err = -ENOMEM;
++ err = sysfs_create_mount_point(fs_kobj, "pstore");
++ if (err)
+ goto out;
+- }
+
+ err = register_filesystem(&pstore_fs_type);
+ if (err < 0)
+- kobject_put(pstore_kobj);
++ sysfs_remove_mount_point(fs_kobj, "pstore");
+
+ out:
+ return err;
+--- a/kernel/cgroup.c
++++ b/kernel/cgroup.c
+@@ -1924,8 +1924,6 @@ static struct file_system_type cgroup_fs
+ .kill_sb = cgroup_kill_sb,
+ };
+
+-static struct kobject *cgroup_kobj;
+-
+ /**
+ * task_cgroup_path - cgroup path of a task in the first cgroup hierarchy
+ * @task: target task
+@@ -5042,13 +5040,13 @@ int __init cgroup_init(void)
+ }
+ }
+
+- cgroup_kobj = kobject_create_and_add("cgroup", fs_kobj);
+- if (!cgroup_kobj)
+- return -ENOMEM;
++ err = sysfs_create_mount_point(fs_kobj, "cgroup");
++ if (err)
++ return err;
+
+ err = register_filesystem(&cgroup_fs_type);
+ if (err < 0) {
+- kobject_put(cgroup_kobj);
++ sysfs_remove_mount_point(fs_kobj, "cgroup");
+ return err;
+ }
+
+--- a/security/inode.c
++++ b/security/inode.c
+@@ -215,19 +215,17 @@ void securityfs_remove(struct dentry *de
+ }
+ EXPORT_SYMBOL_GPL(securityfs_remove);
+
+-static struct kobject *security_kobj;
+-
+ static int __init securityfs_init(void)
+ {
+ int retval;
+
+- security_kobj = kobject_create_and_add("security", kernel_kobj);
+- if (!security_kobj)
+- return -EINVAL;
++ retval = sysfs_create_mount_point(kernel_kobj, "security");
++ if (retval)
++ return retval;
+
+ retval = register_filesystem(&fs_type);
+ if (retval)
+- kobject_put(security_kobj);
++ sysfs_remove_mount_point(kernel_kobj, "security");
+ return retval;
+ }
+
+--- a/security/selinux/selinuxfs.c
++++ b/security/selinux/selinuxfs.c
+@@ -1853,7 +1853,6 @@ static struct file_system_type sel_fs_ty
+ };
+
+ struct vfsmount *selinuxfs_mount;
+-static struct kobject *selinuxfs_kobj;
+
+ static int __init init_sel_fs(void)
+ {
+@@ -1862,13 +1861,13 @@ static int __init init_sel_fs(void)
+ if (!selinux_enabled)
+ return 0;
+
+- selinuxfs_kobj = kobject_create_and_add("selinux", fs_kobj);
+- if (!selinuxfs_kobj)
+- return -ENOMEM;
++ err = sysfs_create_mount_point(fs_kobj, "selinux");
++ if (err)
++ return err;
+
+ err = register_filesystem(&sel_fs_type);
+ if (err) {
+- kobject_put(selinuxfs_kobj);
++ sysfs_remove_mount_point(fs_kobj, "selinux");
+ return err;
+ }
+
+@@ -1887,7 +1886,7 @@ __initcall(init_sel_fs);
+ #ifdef CONFIG_SECURITY_SELINUX_DISABLE
+ void exit_sel_fs(void)
+ {
+- kobject_put(selinuxfs_kobj);
++ sysfs_remove_mount_point(fs_kobj, "selinux");
+ kern_unmount(selinuxfs_mount);
+ unregister_filesystem(&sel_fs_type);
+ }
+--- a/security/smack/smackfs.c
++++ b/security/smack/smackfs.c
+@@ -2150,16 +2150,16 @@ static const struct file_operations smk_
+ .llseek = generic_file_llseek,
+ };
+
+-static struct kset *smackfs_kset;
+ /**
+ * smk_init_sysfs - initialize /sys/fs/smackfs
+ *
+ */
+ static int smk_init_sysfs(void)
+ {
+- smackfs_kset = kset_create_and_add("smackfs", NULL, fs_kobj);
+- if (!smackfs_kset)
+- return -ENOMEM;
++ int err;
++ err = sysfs_create_mount_point(fs_kobj, "smackfs");
++ if (err)
++ return err;
+ return 0;
+ }
+
--- /dev/null
+fs-add-helper-functions-for-permanently-empty-directories.patch
+sysctl-allow-creating-permanently-empty-directories-that-serve-as-mountpoints.patch
+proc-allow-creating-permanently-empty-directories-that-serve-as-mount-points.patch
+kernfs-add-support-for-always-empty-directories.patch
+sysfs-add-support-for-permanently-empty-directories-to-serve-as-mount-points.patch
+sysfs-create-mountpoints-with-sysfs_create_mount_point.patch
+mnt-update-fs_fully_visible-to-test-for-permanently-empty-directories.patch
+mnt-refactor-the-logic-for-mounting-sysfs-and-proc-in-a-user-namespace.patch
+mnt-modify-fs_fully_visible-to-deal-with-locked-ro-nodev-and-atime.patch