--- /dev/null
+From 7ff4d90b4c24a03666f296c3d4878cd39001e81e Mon Sep 17 00:00:00 2001
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+Date: Fri, 5 Dec 2014 17:19:27 -0600
+Subject: groups: Consolidate the setgroups permission checks
+
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+
+commit 7ff4d90b4c24a03666f296c3d4878cd39001e81e upstream.
+
+Today there are 3 instances of setgroups and due to an oversight their
+permission checking has diverged. Add a common function so that
+they may all share the same permission checking code.
+
+This corrects the current oversight in the current permission checks
+and adds a helper to avoid this in the future.
+
+A user namespace security fix will update this new helper, shortly.
+
+Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/s390/kernel/compat_linux.c | 2 +-
+ include/linux/cred.h | 1 +
+ kernel/groups.c | 9 ++++++++-
+ kernel/uid16.c | 2 +-
+ 4 files changed, 11 insertions(+), 3 deletions(-)
+
+--- a/arch/s390/kernel/compat_linux.c
++++ b/arch/s390/kernel/compat_linux.c
+@@ -248,7 +248,7 @@ asmlinkage long sys32_setgroups16(int gi
+ struct group_info *group_info;
+ int retval;
+
+- if (!capable(CAP_SETGID))
++ if (!may_setgroups())
+ return -EPERM;
+ if ((unsigned)gidsetsize > NGROUPS_MAX)
+ return -EINVAL;
+--- a/include/linux/cred.h
++++ b/include/linux/cred.h
+@@ -68,6 +68,7 @@ extern void groups_free(struct group_inf
+ extern int set_current_groups(struct group_info *);
+ extern int set_groups(struct cred *, struct group_info *);
+ extern int groups_search(const struct group_info *, kgid_t);
++extern bool may_setgroups(void);
+
+ /* access the groups "array" with this macro */
+ #define GROUP_AT(gi, i) \
+--- a/kernel/groups.c
++++ b/kernel/groups.c
+@@ -223,6 +223,13 @@ out:
+ return i;
+ }
+
++bool may_setgroups(void)
++{
++ struct user_namespace *user_ns = current_user_ns();
++
++ return ns_capable(user_ns, CAP_SETGID);
++}
++
+ /*
+ * SMP: Our groups are copy-on-write. We can set them safely
+ * without another task interfering.
+@@ -233,7 +240,7 @@ SYSCALL_DEFINE2(setgroups, int, gidsetsi
+ struct group_info *group_info;
+ int retval;
+
+- if (!nsown_capable(CAP_SETGID))
++ if (!may_setgroups())
+ return -EPERM;
+ if ((unsigned)gidsetsize > NGROUPS_MAX)
+ return -EINVAL;
+--- a/kernel/uid16.c
++++ b/kernel/uid16.c
+@@ -176,7 +176,7 @@ SYSCALL_DEFINE2(setgroups16, int, gidset
+ struct group_info *group_info;
+ int retval;
+
+- if (!nsown_capable(CAP_SETGID))
++ if (!may_setgroups())
+ return -EPERM;
+ if ((unsigned)gidsetsize > NGROUPS_MAX)
+ return -EINVAL;
--- /dev/null
+From 3e1866410f11356a9fd869beb3e95983dc79c067 Mon Sep 17 00:00:00 2001
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+Date: Wed, 13 Aug 2014 01:33:38 -0700
+Subject: mnt: Implicitly add MNT_NODEV on remount when it was implicitly added by mount
+
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+
+commit 3e1866410f11356a9fd869beb3e95983dc79c067 upstream.
+
+Now that remount is properly enforcing the rule that you can't remove
+nodev at least sandstorm.io is breaking when performing a remount.
+
+It turns out that there is an easy intuitive solution implicitly
+add nodev on remount when nodev was implicitly added on mount.
+
+Tested-by: Cedric Bosdonnat <cbosdonnat@suse.com>
+Tested-by: Richard Weinberger <richard@nod.at>
+Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/namespace.c | 8 +++++++-
+ 1 file changed, 7 insertions(+), 1 deletion(-)
+
+--- a/fs/namespace.c
++++ b/fs/namespace.c
+@@ -1816,7 +1816,13 @@ static int do_remount(struct path *path,
+ }
+ if ((mnt->mnt.mnt_flags & MNT_LOCK_NODEV) &&
+ !(mnt_flags & MNT_NODEV)) {
+- return -EPERM;
++ /* Was the nodev implicitly added in mount? */
++ if ((mnt->mnt_ns->user_ns != &init_user_ns) &&
++ !(sb->s_type->fs_flags & FS_USERNS_DEV_MOUNT)) {
++ mnt_flags |= MNT_NODEV;
++ } else {
++ return -EPERM;
++ }
+ }
+ if ((mnt->mnt.mnt_flags & MNT_LOCK_NOSUID) &&
+ !(mnt_flags & MNT_NOSUID)) {
--- /dev/null
+From 4a44a19b470a886997d6647a77bb3e38dcbfa8c5 Mon Sep 17 00:00:00 2001
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+Date: Fri, 22 Aug 2014 16:39:03 -0500
+Subject: mnt: Update unprivileged remount test
+
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+
+commit 4a44a19b470a886997d6647a77bb3e38dcbfa8c5 upstream.
+
+- MNT_NODEV should be irrelevant except when reading back mount flags,
+ no longer specify MNT_NODEV on remount.
+
+- Test MNT_NODEV on devpts where it is meaningful even for unprivileged mounts.
+
+- Add a test to verify that remount of a prexisting mount with the same flags
+ is allowed and does not change those flags.
+
+- Cleanup up the definitions of MS_REC, MS_RELATIME, MS_STRICTATIME that are used
+ when the code is built in an environment without them.
+
+- Correct the test error messages when tests fail. There were not 5 tests
+ that tested MS_RELATIME.
+
+Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ tools/testing/selftests/mount/unprivileged-remount-test.c | 172 +++++++++++---
+ 1 file changed, 142 insertions(+), 30 deletions(-)
+
+--- a/tools/testing/selftests/mount/unprivileged-remount-test.c
++++ b/tools/testing/selftests/mount/unprivileged-remount-test.c
+@@ -6,6 +6,8 @@
+ #include <sys/types.h>
+ #include <sys/mount.h>
+ #include <sys/wait.h>
++#include <sys/vfs.h>
++#include <sys/statvfs.h>
+ #include <stdlib.h>
+ #include <unistd.h>
+ #include <fcntl.h>
+@@ -32,11 +34,14 @@
+ # define CLONE_NEWPID 0x20000000
+ #endif
+
++#ifndef MS_REC
++# define MS_REC 16384
++#endif
+ #ifndef MS_RELATIME
+-#define MS_RELATIME (1 << 21)
++# define MS_RELATIME (1 << 21)
+ #endif
+ #ifndef MS_STRICTATIME
+-#define MS_STRICTATIME (1 << 24)
++# define MS_STRICTATIME (1 << 24)
+ #endif
+
+ static void die(char *fmt, ...)
+@@ -87,6 +92,45 @@ static void write_file(char *filename, c
+ }
+ }
+
++static int read_mnt_flags(const char *path)
++{
++ int ret;
++ struct statvfs stat;
++ int mnt_flags;
++
++ ret = statvfs(path, &stat);
++ if (ret != 0) {
++ die("statvfs of %s failed: %s\n",
++ path, strerror(errno));
++ }
++ if (stat.f_flag & ~(ST_RDONLY | ST_NOSUID | ST_NODEV | \
++ ST_NOEXEC | ST_NOATIME | ST_NODIRATIME | ST_RELATIME | \
++ ST_SYNCHRONOUS | ST_MANDLOCK)) {
++ die("Unrecognized mount flags\n");
++ }
++ mnt_flags = 0;
++ if (stat.f_flag & ST_RDONLY)
++ mnt_flags |= MS_RDONLY;
++ if (stat.f_flag & ST_NOSUID)
++ mnt_flags |= MS_NOSUID;
++ if (stat.f_flag & ST_NODEV)
++ mnt_flags |= MS_NODEV;
++ if (stat.f_flag & ST_NOEXEC)
++ mnt_flags |= MS_NOEXEC;
++ if (stat.f_flag & ST_NOATIME)
++ mnt_flags |= MS_NOATIME;
++ if (stat.f_flag & ST_NODIRATIME)
++ mnt_flags |= MS_NODIRATIME;
++ if (stat.f_flag & ST_RELATIME)
++ mnt_flags |= MS_RELATIME;
++ if (stat.f_flag & ST_SYNCHRONOUS)
++ mnt_flags |= MS_SYNCHRONOUS;
++ if (stat.f_flag & ST_MANDLOCK)
++ mnt_flags |= ST_MANDLOCK;
++
++ return mnt_flags;
++}
++
+ static void create_and_enter_userns(void)
+ {
+ uid_t uid;
+@@ -118,7 +162,8 @@ static void create_and_enter_userns(void
+ }
+
+ static
+-bool test_unpriv_remount(int mount_flags, int remount_flags, int invalid_flags)
++bool test_unpriv_remount(const char *fstype, const char *mount_options,
++ int mount_flags, int remount_flags, int invalid_flags)
+ {
+ pid_t child;
+
+@@ -151,9 +196,11 @@ bool test_unpriv_remount(int mount_flags
+ strerror(errno));
+ }
+
+- if (mount("testing", "/tmp", "ramfs", mount_flags, NULL) != 0) {
+- die("mount of /tmp failed: %s\n",
+- strerror(errno));
++ if (mount("testing", "/tmp", fstype, mount_flags, mount_options) != 0) {
++ die("mount of %s with options '%s' on /tmp failed: %s\n",
++ fstype,
++ mount_options? mount_options : "",
++ strerror(errno));
+ }
+
+ create_and_enter_userns();
+@@ -181,62 +228,127 @@ bool test_unpriv_remount(int mount_flags
+
+ static bool test_unpriv_remount_simple(int mount_flags)
+ {
+- return test_unpriv_remount(mount_flags, mount_flags, 0);
++ return test_unpriv_remount("ramfs", NULL, mount_flags, mount_flags, 0);
+ }
+
+ static bool test_unpriv_remount_atime(int mount_flags, int invalid_flags)
+ {
+- return test_unpriv_remount(mount_flags, mount_flags, invalid_flags);
++ return test_unpriv_remount("ramfs", NULL, mount_flags, mount_flags,
++ invalid_flags);
++}
++
++static bool test_priv_mount_unpriv_remount(void)
++{
++ pid_t child;
++ int ret;
++ const char *orig_path = "/dev";
++ const char *dest_path = "/tmp";
++ int orig_mnt_flags, remount_mnt_flags;
++
++ child = fork();
++ if (child == -1) {
++ die("fork failed: %s\n",
++ strerror(errno));
++ }
++ if (child != 0) { /* parent */
++ pid_t pid;
++ int status;
++ pid = waitpid(child, &status, 0);
++ if (pid == -1) {
++ die("waitpid failed: %s\n",
++ strerror(errno));
++ }
++ if (pid != child) {
++ die("waited for %d got %d\n",
++ child, pid);
++ }
++ if (!WIFEXITED(status)) {
++ die("child did not terminate cleanly\n");
++ }
++ return WEXITSTATUS(status) == EXIT_SUCCESS ? true : false;
++ }
++
++ orig_mnt_flags = read_mnt_flags(orig_path);
++
++ create_and_enter_userns();
++ ret = unshare(CLONE_NEWNS);
++ if (ret != 0) {
++ die("unshare(CLONE_NEWNS) failed: %s\n",
++ strerror(errno));
++ }
++
++ ret = mount(orig_path, dest_path, "bind", MS_BIND | MS_REC, NULL);
++ if (ret != 0) {
++ die("recursive bind mount of %s onto %s failed: %s\n",
++ orig_path, dest_path, strerror(errno));
++ }
++
++ ret = mount(dest_path, dest_path, "none",
++ MS_REMOUNT | MS_BIND | orig_mnt_flags , NULL);
++ if (ret != 0) {
++ /* system("cat /proc/self/mounts"); */
++ die("remount of /tmp failed: %s\n",
++ strerror(errno));
++ }
++
++ remount_mnt_flags = read_mnt_flags(dest_path);
++ if (orig_mnt_flags != remount_mnt_flags) {
++ die("Mount flags unexpectedly changed during remount of %s originally mounted on %s\n",
++ dest_path, orig_path);
++ }
++ exit(EXIT_SUCCESS);
+ }
+
+ int main(int argc, char **argv)
+ {
+- if (!test_unpriv_remount_simple(MS_RDONLY|MS_NODEV)) {
++ if (!test_unpriv_remount_simple(MS_RDONLY)) {
+ die("MS_RDONLY malfunctions\n");
+ }
+- if (!test_unpriv_remount_simple(MS_NODEV)) {
++ if (!test_unpriv_remount("devpts", "newinstance", MS_NODEV, MS_NODEV, 0)) {
+ die("MS_NODEV malfunctions\n");
+ }
+- if (!test_unpriv_remount_simple(MS_NOSUID|MS_NODEV)) {
++ if (!test_unpriv_remount_simple(MS_NOSUID)) {
+ die("MS_NOSUID malfunctions\n");
+ }
+- if (!test_unpriv_remount_simple(MS_NOEXEC|MS_NODEV)) {
++ if (!test_unpriv_remount_simple(MS_NOEXEC)) {
+ die("MS_NOEXEC malfunctions\n");
+ }
+- if (!test_unpriv_remount_atime(MS_RELATIME|MS_NODEV,
+- MS_NOATIME|MS_NODEV))
++ if (!test_unpriv_remount_atime(MS_RELATIME,
++ MS_NOATIME))
+ {
+ die("MS_RELATIME malfunctions\n");
+ }
+- if (!test_unpriv_remount_atime(MS_STRICTATIME|MS_NODEV,
+- MS_NOATIME|MS_NODEV))
++ if (!test_unpriv_remount_atime(MS_STRICTATIME,
++ MS_NOATIME))
+ {
+ die("MS_STRICTATIME malfunctions\n");
+ }
+- if (!test_unpriv_remount_atime(MS_NOATIME|MS_NODEV,
+- MS_STRICTATIME|MS_NODEV))
++ if (!test_unpriv_remount_atime(MS_NOATIME,
++ MS_STRICTATIME))
+ {
+- die("MS_RELATIME malfunctions\n");
++ die("MS_NOATIME malfunctions\n");
+ }
+- if (!test_unpriv_remount_atime(MS_RELATIME|MS_NODIRATIME|MS_NODEV,
+- MS_NOATIME|MS_NODEV))
++ if (!test_unpriv_remount_atime(MS_RELATIME|MS_NODIRATIME,
++ MS_NOATIME))
+ {
+- die("MS_RELATIME malfunctions\n");
++ die("MS_RELATIME|MS_NODIRATIME malfunctions\n");
+ }
+- if (!test_unpriv_remount_atime(MS_STRICTATIME|MS_NODIRATIME|MS_NODEV,
+- MS_NOATIME|MS_NODEV))
++ if (!test_unpriv_remount_atime(MS_STRICTATIME|MS_NODIRATIME,
++ MS_NOATIME))
+ {
+- die("MS_RELATIME malfunctions\n");
++ die("MS_STRICTATIME|MS_NODIRATIME malfunctions\n");
+ }
+- if (!test_unpriv_remount_atime(MS_NOATIME|MS_NODIRATIME|MS_NODEV,
+- MS_STRICTATIME|MS_NODEV))
++ if (!test_unpriv_remount_atime(MS_NOATIME|MS_NODIRATIME,
++ MS_STRICTATIME))
+ {
+- die("MS_RELATIME malfunctions\n");
++ die("MS_NOATIME|MS_DIRATIME malfunctions\n");
+ }
+- if (!test_unpriv_remount(MS_STRICTATIME|MS_NODEV, MS_NODEV,
+- MS_NOATIME|MS_NODEV))
++ if (!test_unpriv_remount("ramfs", NULL, MS_STRICTATIME, 0, MS_NOATIME))
+ {
+ die("Default atime malfunctions\n");
+ }
++ if (!test_priv_mount_unpriv_remount()) {
++ die("Mount flags unexpectedly changed after remount\n");
++ }
+ return EXIT_SUCCESS;
+ }
keys-fix-stale-key-registration-at-error-path.patch
mac80211-fix-multicast-led-blinking-and-counter.patch
mac80211-free-management-frame-keys-when-removing-station.patch
+mnt-implicitly-add-mnt_nodev-on-remount-when-it-was-implicitly-added-by-mount.patch
+mnt-update-unprivileged-remount-test.patch
+umount-disallow-unprivileged-mount-force.patch
+groups-consolidate-the-setgroups-permission-checks.patch
+userns-document-what-the-invariant-required-for-safe-unprivileged-mappings.patch
+userns-don-t-allow-setgroups-until-a-gid-mapping-has-been-setablished.patch
+userns-don-t-allow-unprivileged-creation-of-gid-mappings.patch
+userns-check-euid-no-fsuid-when-establishing-an-unprivileged-uid-mapping.patch
+userns-only-allow-the-creator-of-the-userns-unprivileged-mappings.patch
+userns-rename-id_map_mutex-to-userns_state_mutex.patch
+userns-add-a-knob-to-disable-setgroups-on-a-per-user-namespace-basis.patch
+userns-allow-setting-gid_maps-without-privilege-when-setgroups-is-disabled.patch
userns-unbreak-the-unprivileged-remount-tests.patch
--- /dev/null
+From b2f5d4dc38e034eecb7987e513255265ff9aa1cf Mon Sep 17 00:00:00 2001
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+Date: Sat, 4 Oct 2014 14:44:03 -0700
+Subject: umount: Disallow unprivileged mount force
+
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+
+commit b2f5d4dc38e034eecb7987e513255265ff9aa1cf upstream.
+
+Forced unmount affects not just the mount namespace but the underlying
+superblock as well. Restrict forced unmount to the global root user
+for now. Otherwise it becomes possible a user in a less privileged
+mount namespace to force the shutdown of a superblock of a filesystem
+in a more privileged mount namespace, allowing a DOS attack on root.
+
+Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/namespace.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/fs/namespace.c
++++ b/fs/namespace.c
+@@ -1342,6 +1342,9 @@ SYSCALL_DEFINE2(umount, char __user *, n
+ goto dput_and_out;
+ if (!check_mnt(mnt))
+ goto dput_and_out;
++ retval = -EPERM;
++ if (flags & MNT_FORCE && !capable(CAP_SYS_ADMIN))
++ goto dput_and_out;
+
+ retval = do_umount(mnt, flags);
+ dput_and_out:
--- /dev/null
+From 9cc46516ddf497ea16e8d7cb986ae03a0f6b92f8 Mon Sep 17 00:00:00 2001
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+Date: Tue, 2 Dec 2014 12:27:26 -0600
+Subject: userns: Add a knob to disable setgroups on a per user namespace basis
+
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+
+commit 9cc46516ddf497ea16e8d7cb986ae03a0f6b92f8 upstream.
+
+- Expose the knob to user space through a proc file /proc/<pid>/setgroups
+
+ A value of "deny" means the setgroups system call is disabled in the
+ current processes user namespace and can not be enabled in the
+ future in this user namespace.
+
+ A value of "allow" means the segtoups system call is enabled.
+
+- Descendant user namespaces inherit the value of setgroups from
+ their parents.
+
+- A proc file is used (instead of a sysctl) as sysctls currently do
+ not allow checking the permissions at open time.
+
+- Writing to the proc file is restricted to before the gid_map
+ for the user namespace is set.
+
+ This ensures that disabling setgroups at a user namespace
+ level will never remove the ability to call setgroups
+ from a process that already has that ability.
+
+ A process may opt in to the setgroups disable for itself by
+ creating, entering and configuring a user namespace or by calling
+ setns on an existing user namespace with setgroups disabled.
+ Processes without privileges already can not call setgroups so this
+ is a noop. Prodcess with privilege become processes without
+ privilege when entering a user namespace and as with any other path
+ to dropping privilege they would not have the ability to call
+ setgroups. So this remains within the bounds of what is possible
+ without a knob to disable setgroups permanently in a user namespace.
+
+Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/proc/base.c | 53 +++++++++++++++++++++++++
+ include/linux/user_namespace.h | 7 +++
+ kernel/user.c | 1
+ kernel/user_namespace.c | 85 +++++++++++++++++++++++++++++++++++++++++
+ 4 files changed, 146 insertions(+)
+
+--- a/fs/proc/base.c
++++ b/fs/proc/base.c
+@@ -2612,6 +2612,57 @@ static const struct file_operations proc
+ .llseek = seq_lseek,
+ .release = proc_id_map_release,
+ };
++
++static int proc_setgroups_open(struct inode *inode, struct file *file)
++{
++ struct user_namespace *ns = NULL;
++ struct task_struct *task;
++ int ret;
++
++ ret = -ESRCH;
++ task = get_proc_task(inode);
++ if (task) {
++ rcu_read_lock();
++ ns = get_user_ns(task_cred_xxx(task, user_ns));
++ rcu_read_unlock();
++ put_task_struct(task);
++ }
++ if (!ns)
++ goto err;
++
++ if (file->f_mode & FMODE_WRITE) {
++ ret = -EACCES;
++ if (!ns_capable(ns, CAP_SYS_ADMIN))
++ goto err_put_ns;
++ }
++
++ ret = single_open(file, &proc_setgroups_show, ns);
++ if (ret)
++ goto err_put_ns;
++
++ return 0;
++err_put_ns:
++ put_user_ns(ns);
++err:
++ return ret;
++}
++
++static int proc_setgroups_release(struct inode *inode, struct file *file)
++{
++ struct seq_file *seq = file->private_data;
++ struct user_namespace *ns = seq->private;
++ int ret = single_release(inode, file);
++ put_user_ns(ns);
++ return ret;
++}
++
++static const struct file_operations proc_setgroups_operations = {
++ .open = proc_setgroups_open,
++ .write = proc_setgroups_write,
++ .read = seq_read,
++ .llseek = seq_lseek,
++ .release = proc_setgroups_release,
++};
+ #endif /* CONFIG_USER_NS */
+
+ static int proc_pid_personality(struct seq_file *m, struct pid_namespace *ns,
+@@ -2720,6 +2771,7 @@ static const struct pid_entry tgid_base_
+ REG("uid_map", S_IRUGO|S_IWUSR, proc_uid_map_operations),
+ REG("gid_map", S_IRUGO|S_IWUSR, proc_gid_map_operations),
+ REG("projid_map", S_IRUGO|S_IWUSR, proc_projid_map_operations),
++ REG("setgroups", S_IRUGO|S_IWUSR, proc_setgroups_operations),
+ #endif
+ #ifdef CONFIG_CHECKPOINT_RESTORE
+ REG("timers", S_IRUGO, proc_timers_operations),
+@@ -3073,6 +3125,7 @@ static const struct pid_entry tid_base_s
+ REG("uid_map", S_IRUGO|S_IWUSR, proc_uid_map_operations),
+ REG("gid_map", S_IRUGO|S_IWUSR, proc_gid_map_operations),
+ REG("projid_map", S_IRUGO|S_IWUSR, proc_projid_map_operations),
++ REG("setgroups", S_IRUGO|S_IWUSR, proc_setgroups_operations),
+ #endif
+ };
+
+--- a/include/linux/user_namespace.h
++++ b/include/linux/user_namespace.h
+@@ -17,6 +17,10 @@ struct uid_gid_map { /* 64 bytes -- 1 ca
+ } extent[UID_GID_MAP_MAX_EXTENTS];
+ };
+
++#define USERNS_SETGROUPS_ALLOWED 1UL
++
++#define USERNS_INIT_FLAGS USERNS_SETGROUPS_ALLOWED
++
+ struct user_namespace {
+ struct uid_gid_map uid_map;
+ struct uid_gid_map gid_map;
+@@ -27,6 +31,7 @@ struct user_namespace {
+ kuid_t owner;
+ kgid_t group;
+ unsigned int proc_inum;
++ unsigned long flags;
+ bool may_mount_sysfs;
+ bool may_mount_proc;
+ };
+@@ -59,6 +64,8 @@ extern struct seq_operations proc_projid
+ extern ssize_t proc_uid_map_write(struct file *, const char __user *, size_t, loff_t *);
+ extern ssize_t proc_gid_map_write(struct file *, const char __user *, size_t, loff_t *);
+ extern ssize_t proc_projid_map_write(struct file *, const char __user *, size_t, loff_t *);
++extern ssize_t proc_setgroups_write(struct file *, const char __user *, size_t, loff_t *);
++extern int proc_setgroups_show(struct seq_file *m, void *v);
+ extern bool userns_may_setgroups(const struct user_namespace *ns);
+ #else
+
+--- a/kernel/user.c
++++ b/kernel/user.c
+@@ -51,6 +51,7 @@ struct user_namespace init_user_ns = {
+ .owner = GLOBAL_ROOT_UID,
+ .group = GLOBAL_ROOT_GID,
+ .proc_inum = PROC_USER_INIT_INO,
++ .flags = USERNS_INIT_FLAGS,
+ .may_mount_sysfs = true,
+ .may_mount_proc = true,
+ };
+--- a/kernel/user_namespace.c
++++ b/kernel/user_namespace.c
+@@ -100,6 +100,11 @@ int create_user_ns(struct cred *new)
+ ns->owner = owner;
+ ns->group = group;
+
++ /* Inherit USERNS_SETGROUPS_ALLOWED from our parent */
++ mutex_lock(&userns_state_mutex);
++ ns->flags = parent_ns->flags;
++ mutex_unlock(&userns_state_mutex);
++
+ set_cred_user_ns(new, ns);
+
+ update_mnt_policy(ns);
+@@ -827,6 +832,84 @@ static bool new_idmap_permitted(const st
+ return false;
+ }
+
++int proc_setgroups_show(struct seq_file *seq, void *v)
++{
++ struct user_namespace *ns = seq->private;
++ unsigned long userns_flags = ACCESS_ONCE(ns->flags);
++
++ seq_printf(seq, "%s\n",
++ (userns_flags & USERNS_SETGROUPS_ALLOWED) ?
++ "allow" : "deny");
++ return 0;
++}
++
++ssize_t proc_setgroups_write(struct file *file, const char __user *buf,
++ size_t count, loff_t *ppos)
++{
++ struct seq_file *seq = file->private_data;
++ struct user_namespace *ns = seq->private;
++ char kbuf[8], *pos;
++ bool setgroups_allowed;
++ ssize_t ret;
++
++ /* Only allow a very narrow range of strings to be written */
++ ret = -EINVAL;
++ if ((*ppos != 0) || (count >= sizeof(kbuf)))
++ goto out;
++
++ /* What was written? */
++ ret = -EFAULT;
++ if (copy_from_user(kbuf, buf, count))
++ goto out;
++ kbuf[count] = '\0';
++ pos = kbuf;
++
++ /* What is being requested? */
++ ret = -EINVAL;
++ if (strncmp(pos, "allow", 5) == 0) {
++ pos += 5;
++ setgroups_allowed = true;
++ }
++ else if (strncmp(pos, "deny", 4) == 0) {
++ pos += 4;
++ setgroups_allowed = false;
++ }
++ else
++ goto out;
++
++ /* Verify there is not trailing junk on the line */
++ pos = skip_spaces(pos);
++ if (*pos != '\0')
++ goto out;
++
++ ret = -EPERM;
++ mutex_lock(&userns_state_mutex);
++ if (setgroups_allowed) {
++ /* Enabling setgroups after setgroups has been disabled
++ * is not allowed.
++ */
++ if (!(ns->flags & USERNS_SETGROUPS_ALLOWED))
++ goto out_unlock;
++ } else {
++ /* Permanently disabling setgroups after setgroups has
++ * been enabled by writing the gid_map is not allowed.
++ */
++ if (ns->gid_map.nr_extents != 0)
++ goto out_unlock;
++ ns->flags &= ~USERNS_SETGROUPS_ALLOWED;
++ }
++ mutex_unlock(&userns_state_mutex);
++
++ /* Report a successful write */
++ *ppos = count;
++ ret = count;
++out:
++ return ret;
++out_unlock:
++ mutex_unlock(&userns_state_mutex);
++ goto out;
++}
++
+ bool userns_may_setgroups(const struct user_namespace *ns)
+ {
+ bool allowed;
+@@ -836,6 +919,8 @@ bool userns_may_setgroups(const struct u
+ * the user namespace has been established.
+ */
+ allowed = ns->gid_map.nr_extents != 0;
++ /* Is setgroups allowed? */
++ allowed = allowed && (ns->flags & USERNS_SETGROUPS_ALLOWED);
+ mutex_unlock(&userns_state_mutex);
+
+ return allowed;
--- /dev/null
+From 66d2f338ee4c449396b6f99f5e75cd18eb6df272 Mon Sep 17 00:00:00 2001
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+Date: Fri, 5 Dec 2014 19:36:04 -0600
+Subject: userns: Allow setting gid_maps without privilege when setgroups is disabled
+
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+
+commit 66d2f338ee4c449396b6f99f5e75cd18eb6df272 upstream.
+
+Now that setgroups can be disabled and not reenabled, setting gid_map
+without privielge can now be enabled when setgroups is disabled.
+
+This restores most of the functionality that was lost when unprivileged
+setting of gid_map was removed. Applications that use this functionality
+will need to check to see if they use setgroups or init_groups, and if they
+don't they can be fixed by simply disabling setgroups before writing to
+gid_map.
+
+Reviewed-by: Andy Lutomirski <luto@amacapital.net>
+Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/user_namespace.c | 5 +++++
+ 1 file changed, 5 insertions(+)
+
+--- a/kernel/user_namespace.c
++++ b/kernel/user_namespace.c
+@@ -814,6 +814,11 @@ static bool new_idmap_permitted(const st
+ kuid_t uid = make_kuid(ns->parent, id);
+ if (uid_eq(uid, cred->euid))
+ return true;
++ } else if (cap_setid == CAP_SETGID) {
++ kgid_t gid = make_kgid(ns->parent, id);
++ if (!(ns->flags & USERNS_SETGROUPS_ALLOWED) &&
++ gid_eq(gid, cred->egid))
++ return true;
+ }
+ }
+
--- /dev/null
+From 80dd00a23784b384ccea049bfb3f259d3f973b9d Mon Sep 17 00:00:00 2001
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+Date: Fri, 5 Dec 2014 18:26:30 -0600
+Subject: userns: Check euid no fsuid when establishing an unprivileged uid mapping
+
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+
+commit 80dd00a23784b384ccea049bfb3f259d3f973b9d upstream.
+
+setresuid allows the euid to be set to any of uid, euid, suid, and
+fsuid. Therefor it is safe to allow an unprivileged user to map
+their euid and use CAP_SETUID privileged with exactly that uid,
+as no new credentials can be obtained.
+
+I can not find a combination of existing system calls that allows setting
+uid, euid, suid, and fsuid from the fsuid making the previous use
+of fsuid for allowing unprivileged mappings a bug.
+
+This is part of a fix for CVE-2014-8989.
+
+Reviewed-by: Andy Lutomirski <luto@amacapital.net>
+Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/user_namespace.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/kernel/user_namespace.c
++++ b/kernel/user_namespace.c
+@@ -807,7 +807,7 @@ static bool new_idmap_permitted(const st
+ u32 id = new_map->extent[0].lower_first;
+ if (cap_setid == CAP_SETUID) {
+ kuid_t uid = make_kuid(ns->parent, id);
+- if (uid_eq(uid, file->f_cred->fsuid))
++ if (uid_eq(uid, file->f_cred->euid))
+ return true;
+ }
+ }
--- /dev/null
+From 0542f17bf2c1f2430d368f44c8fcf2f82ec9e53e Mon Sep 17 00:00:00 2001
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+Date: Fri, 5 Dec 2014 17:51:47 -0600
+Subject: userns: Document what the invariant required for safe unprivileged mappings.
+
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+
+commit 0542f17bf2c1f2430d368f44c8fcf2f82ec9e53e upstream.
+
+The rule is simple. Don't allow anything that wouldn't be allowed
+without unprivileged mappings.
+
+It was previously overlooked that establishing gid mappings would
+allow dropping groups and potentially gaining permission to files and
+directories that had lesser permissions for a specific group than for
+all other users.
+
+This is the rule needed to fix CVE-2014-8989 and prevent any other
+security issues with new_idmap_permitted.
+
+The reason for this rule is that the unix permission model is old and
+there are programs out there somewhere that take advantage of every
+little corner of it. So allowing a uid or gid mapping to be
+established without privielge that would allow anything that would not
+be allowed without that mapping will result in expectations from some
+code somewhere being violated. Violated expectations about the
+behavior of the OS is a long way to say a security issue.
+
+Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/user_namespace.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/kernel/user_namespace.c
++++ b/kernel/user_namespace.c
+@@ -800,7 +800,9 @@ static bool new_idmap_permitted(const st
+ struct user_namespace *ns, int cap_setid,
+ struct uid_gid_map *new_map)
+ {
+- /* Allow mapping to your own filesystem ids */
++ /* Don't allow mappings that would allow anything that wouldn't
++ * be allowed without the establishment of unprivileged mappings.
++ */
+ if ((new_map->nr_extents == 1) && (new_map->extent[0].count == 1)) {
+ u32 id = new_map->extent[0].lower_first;
+ if (cap_setid == CAP_SETUID) {
--- /dev/null
+From 273d2c67c3e179adb1e74f403d1e9a06e3f841b5 Mon Sep 17 00:00:00 2001
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+Date: Fri, 5 Dec 2014 18:01:11 -0600
+Subject: userns: Don't allow setgroups until a gid mapping has been setablished
+
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+
+commit 273d2c67c3e179adb1e74f403d1e9a06e3f841b5 upstream.
+
+setgroups is unique in not needing a valid mapping before it can be called,
+in the case of setgroups(0, NULL) which drops all supplemental groups.
+
+The design of the user namespace assumes that CAP_SETGID can not actually
+be used until a gid mapping is established. Therefore add a helper function
+to see if the user namespace gid mapping has been established and call
+that function in the setgroups permission check.
+
+This is part of the fix for CVE-2014-8989, being able to drop groups
+without privilege using user namespaces.
+
+Reviewed-by: Andy Lutomirski <luto@amacapital.net>
+Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ include/linux/user_namespace.h | 5 +++++
+ kernel/groups.c | 4 +++-
+ kernel/user_namespace.c | 14 ++++++++++++++
+ 3 files changed, 22 insertions(+), 1 deletion(-)
+
+--- a/include/linux/user_namespace.h
++++ b/include/linux/user_namespace.h
+@@ -59,6 +59,7 @@ extern struct seq_operations proc_projid
+ extern ssize_t proc_uid_map_write(struct file *, const char __user *, size_t, loff_t *);
+ extern ssize_t proc_gid_map_write(struct file *, const char __user *, size_t, loff_t *);
+ extern ssize_t proc_projid_map_write(struct file *, const char __user *, size_t, loff_t *);
++extern bool userns_may_setgroups(const struct user_namespace *ns);
+ #else
+
+ static inline struct user_namespace *get_user_ns(struct user_namespace *ns)
+@@ -83,6 +84,10 @@ static inline void put_user_ns(struct us
+ {
+ }
+
++static inline bool userns_may_setgroups(const struct user_namespace *ns)
++{
++ return true;
++}
+ #endif
+
+ void update_mnt_policy(struct user_namespace *userns);
+--- a/kernel/groups.c
++++ b/kernel/groups.c
+@@ -6,6 +6,7 @@
+ #include <linux/slab.h>
+ #include <linux/security.h>
+ #include <linux/syscalls.h>
++#include <linux/user_namespace.h>
+ #include <asm/uaccess.h>
+
+ /* init to 2 - one for init_task, one to ensure it is never freed */
+@@ -227,7 +228,8 @@ bool may_setgroups(void)
+ {
+ struct user_namespace *user_ns = current_user_ns();
+
+- return ns_capable(user_ns, CAP_SETGID);
++ return ns_capable(user_ns, CAP_SETGID) &&
++ userns_may_setgroups(user_ns);
+ }
+
+ /*
+--- a/kernel/user_namespace.c
++++ b/kernel/user_namespace.c
+@@ -832,6 +832,20 @@ static bool new_idmap_permitted(const st
+ return false;
+ }
+
++bool userns_may_setgroups(const struct user_namespace *ns)
++{
++ bool allowed;
++
++ mutex_lock(&id_map_mutex);
++ /* It is not safe to use setgroups until a gid mapping in
++ * the user namespace has been established.
++ */
++ allowed = ns->gid_map.nr_extents != 0;
++ mutex_unlock(&id_map_mutex);
++
++ return allowed;
++}
++
+ static void *userns_get(struct task_struct *task)
+ {
+ struct user_namespace *user_ns;
--- /dev/null
+From be7c6dba2332cef0677fbabb606e279ae76652c3 Mon Sep 17 00:00:00 2001
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+Date: Fri, 5 Dec 2014 18:14:19 -0600
+Subject: userns: Don't allow unprivileged creation of gid mappings
+
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+
+commit be7c6dba2332cef0677fbabb606e279ae76652c3 upstream.
+
+As any gid mapping will allow and must allow for backwards
+compatibility dropping groups don't allow any gid mappings to be
+established without CAP_SETGID in the parent user namespace.
+
+For a small class of applications this change breaks userspace
+and removes useful functionality. This small class of applications
+includes tools/testing/selftests/mount/unprivilged-remount-test.c
+
+Most of the removed functionality will be added back with the addition
+of a one way knob to disable setgroups. Once setgroups is disabled
+setting the gid_map becomes as safe as setting the uid_map.
+
+For more common applications that set the uid_map and the gid_map
+with privilege this change will have no affect.
+
+This is part of a fix for CVE-2014-8989.
+
+Reviewed-by: Andy Lutomirski <luto@amacapital.net>
+Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/user_namespace.c | 5 -----
+ 1 file changed, 5 deletions(-)
+
+--- a/kernel/user_namespace.c
++++ b/kernel/user_namespace.c
+@@ -810,11 +810,6 @@ static bool new_idmap_permitted(const st
+ if (uid_eq(uid, file->f_cred->fsuid))
+ return true;
+ }
+- else if (cap_setid == CAP_SETGID) {
+- kgid_t gid = make_kgid(ns->parent, id);
+- if (gid_eq(gid, file->f_cred->fsgid))
+- return true;
+- }
+ }
+
+ /* Allow anyone to set a mapping that doesn't require privilege */
--- /dev/null
+From f95d7918bd1e724675de4940039f2865e5eec5fe Mon Sep 17 00:00:00 2001
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+Date: Wed, 26 Nov 2014 23:22:14 -0600
+Subject: userns: Only allow the creator of the userns unprivileged mappings
+
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+
+commit f95d7918bd1e724675de4940039f2865e5eec5fe upstream.
+
+If you did not create the user namespace and are allowed
+to write to uid_map or gid_map you should already have the necessary
+privilege in the parent user namespace to establish any mapping
+you want so this will not affect userspace in practice.
+
+Limiting unprivileged uid mapping establishment to the creator of the
+user namespace makes it easier to verify all credentials obtained with
+the uid mapping can be obtained without the uid mapping without
+privilege.
+
+Limiting unprivileged gid mapping establishment (which is temporarily
+absent) to the creator of the user namespace also ensures that the
+combination of uid and gid can already be obtained without privilege.
+
+This is part of the fix for CVE-2014-8989.
+
+Reviewed-by: Andy Lutomirski <luto@amacapital.net>
+Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/user_namespace.c | 6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+--- a/kernel/user_namespace.c
++++ b/kernel/user_namespace.c
+@@ -800,14 +800,16 @@ static bool new_idmap_permitted(const st
+ struct user_namespace *ns, int cap_setid,
+ struct uid_gid_map *new_map)
+ {
++ const struct cred *cred = file->f_cred;
+ /* Don't allow mappings that would allow anything that wouldn't
+ * be allowed without the establishment of unprivileged mappings.
+ */
+- if ((new_map->nr_extents == 1) && (new_map->extent[0].count == 1)) {
++ if ((new_map->nr_extents == 1) && (new_map->extent[0].count == 1) &&
++ uid_eq(ns->owner, cred->euid)) {
+ u32 id = new_map->extent[0].lower_first;
+ if (cap_setid == CAP_SETUID) {
+ kuid_t uid = make_kuid(ns->parent, id);
+- if (uid_eq(uid, file->f_cred->euid))
++ if (uid_eq(uid, cred->euid))
+ return true;
+ }
+ }
--- /dev/null
+From f0d62aec931e4ae3333c797d346dc4f188f454ba Mon Sep 17 00:00:00 2001
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+Date: Tue, 9 Dec 2014 14:03:14 -0600
+Subject: userns: Rename id_map_mutex to userns_state_mutex
+
+From: "Eric W. Biederman" <ebiederm@xmission.com>
+
+commit f0d62aec931e4ae3333c797d346dc4f188f454ba upstream.
+
+Generalize id_map_mutex so it can be used for more state of a user namespace.
+
+Reviewed-by: Andy Lutomirski <luto@amacapital.net>
+Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/user_namespace.c | 14 ++++++--------
+ 1 file changed, 6 insertions(+), 8 deletions(-)
+
+--- a/kernel/user_namespace.c
++++ b/kernel/user_namespace.c
+@@ -24,6 +24,7 @@
+ #include <linux/fs_struct.h>
+
+ static struct kmem_cache *user_ns_cachep __read_mostly;
++static DEFINE_MUTEX(userns_state_mutex);
+
+ static bool new_idmap_permitted(const struct file *file,
+ struct user_namespace *ns, int cap_setid,
+@@ -577,9 +578,6 @@ static bool mappings_overlap(struct uid_
+ return false;
+ }
+
+-
+-static DEFINE_MUTEX(id_map_mutex);
+-
+ static ssize_t map_write(struct file *file, const char __user *buf,
+ size_t count, loff_t *ppos,
+ int cap_setid,
+@@ -596,7 +594,7 @@ static ssize_t map_write(struct file *fi
+ ssize_t ret = -EINVAL;
+
+ /*
+- * The id_map_mutex serializes all writes to any given map.
++ * The userns_state_mutex serializes all writes to any given map.
+ *
+ * Any map is only ever written once.
+ *
+@@ -614,7 +612,7 @@ static ssize_t map_write(struct file *fi
+ * order and smp_rmb() is guaranteed that we don't have crazy
+ * architectures returning stale data.
+ */
+- mutex_lock(&id_map_mutex);
++ mutex_lock(&userns_state_mutex);
+
+ ret = -EPERM;
+ /* Only allow one successful write to the map */
+@@ -741,7 +739,7 @@ static ssize_t map_write(struct file *fi
+ *ppos = count;
+ ret = count;
+ out:
+- mutex_unlock(&id_map_mutex);
++ mutex_unlock(&userns_state_mutex);
+ if (page)
+ free_page(page);
+ return ret;
+@@ -833,12 +831,12 @@ bool userns_may_setgroups(const struct u
+ {
+ bool allowed;
+
+- mutex_lock(&id_map_mutex);
++ mutex_lock(&userns_state_mutex);
+ /* It is not safe to use setgroups until a gid mapping in
+ * the user namespace has been established.
+ */
+ allowed = ns->gid_map.nr_extents != 0;
+- mutex_unlock(&id_map_mutex);
++ mutex_unlock(&userns_state_mutex);
+
+ return allowed;
+ }
--- a/tools/testing/selftests/mount/unprivileged-remount-test.c
+++ b/tools/testing/selftests/mount/unprivileged-remount-test.c
-@@ -48,17 +48,14 @@ static void die(char *fmt, ...)
+@@ -53,17 +53,14 @@ static void die(char *fmt, ...)
exit(EXIT_FAILURE);
}
if (buf_len < 0) {
die("vsnprintf failed: %s\n",
strerror(errno));
-@@ -69,6 +66,8 @@ static void write_file(char *filename, c
+@@ -74,6 +71,8 @@ static void write_file(char *filename, c
fd = open(filename, O_WRONLY);
if (fd < 0) {
die("open of %s failed: %s\n",
filename, strerror(errno));
}
-@@ -87,6 +86,26 @@ static void write_file(char *filename, c
+@@ -92,6 +91,26 @@ static void write_file(char *filename, c
}
}
+
+}
+
- static void create_and_enter_userns(void)
+ static int read_mnt_flags(const char *path)
{
- uid_t uid;
-@@ -100,13 +119,10 @@ static void create_and_enter_userns(void
+ int ret;
+@@ -144,13 +163,10 @@ static void create_and_enter_userns(void
strerror(errno));
}