]> git.ipfire.org Git - thirdparty/util-linux.git/blobdiff - sys-utils/unshare.c
docs: update year in libs docs
[thirdparty/util-linux.git] / sys-utils / unshare.c
index b5e0d6608c8241130a5fa000e7f54b932358bd94..8d33f2273524657fe4f48da4765f80ca34098e0a 100644 (file)
 #include <sys/mount.h>
 #include <sys/types.h>
 #include <sys/stat.h>
+#include <sys/prctl.h>
+#include <grp.h>
 
 /* we only need some defines missing in sys/mount.h, no libmount linkage */
 #include <libmount.h>
 
 #include "nls.h"
 #include "c.h"
+#include "caputils.h"
 #include "closestream.h"
 #include "namespace.h"
 #include "exec_shell.h"
 #include "xalloc.h"
 #include "pathnames.h"
 #include "all-io.h"
+#include "signames.h"
+#include "strutils.h"
 
 /* synchronize parent and child by pipe */
 #define PIPE_SYNC_BYTE 0x06
@@ -65,13 +70,18 @@ static struct namespace_file {
 
 static int npersists;  /* number of persistent namespaces */
 
-
 enum {
        SETGROUPS_NONE = -1,
        SETGROUPS_DENY = 0,
        SETGROUPS_ALLOW = 1,
 };
 
+enum {
+       MAP_USER_NONE,
+       MAP_USER_ROOT,
+       MAP_USER_CURRENT,
+};
+
 static const char *setgroups_strings[] =
 {
        [SETGROUPS_DENY] = "deny",
@@ -257,12 +267,23 @@ static void __attribute__((__noreturn__)) usage(void)
        fputs(_(" -p, --pid[=<file>]        unshare pid namespace\n"), out);
        fputs(_(" -U, --user[=<file>]       unshare user namespace\n"), out);
        fputs(_(" -C, --cgroup[=<file>]     unshare cgroup namespace\n"), out);
+       fputs(USAGE_SEPARATOR, out);
        fputs(_(" -f, --fork                fork before launching <program>\n"), out);
-       fputs(_("     --mount-proc[=<dir>]  mount proc filesystem first (implies --mount)\n"), out);
        fputs(_(" -r, --map-root-user       map current user to root (implies --user)\n"), out);
-       fputs(_("     --propagation slave|shared|private|unchanged\n"
+       fputs(_(" -c, --map-current-user    map current user to itself (implies --user)\n"), out);
+       fputs(USAGE_SEPARATOR, out);
+       fputs(_(" --kill-child[=<signame>]  when dying, kill the forked child (implies --fork)\n"
+               "                             defaults to SIGKILL\n"), out);
+       fputs(_(" --mount-proc[=<dir>]      mount proc filesystem first (implies --mount)\n"), out);
+       fputs(_(" --propagation slave|shared|private|unchanged\n"
                "                           modify mount propagation in mount namespace\n"), out);
-       fputs(_(" -s, --setgroups allow|deny  control the setgroups syscall in user namespaces\n"), out);
+       fputs(_(" --setgroups allow|deny    control the setgroups syscall in user namespaces\n"), out);
+       fputs(_(" --keep-caps               retain capabilities granted in user namespaces\n"), out);
+       fputs(USAGE_SEPARATOR, out);
+       fputs(_(" -R, --root=<dir>          run the command with root directory set to <dir>\n"), out);
+       fputs(_(" -w, --wd=<dir>            change working directory to <dir>\n"), out);
+       fputs(_(" -S, --setuid <uid>        set uid in entered namespace\n"), out);
+       fputs(_(" -G, --setgid <gid>        set gid in entered namespace\n"), out);
 
        fputs(USAGE_SEPARATOR, out);
        printf(USAGE_HELP_OPTIONS(27));
@@ -276,7 +297,9 @@ int main(int argc, char *argv[])
        enum {
                OPT_MOUNTPROC = CHAR_MAX + 1,
                OPT_PROPAGATION,
-               OPT_SETGROUPS
+               OPT_SETGROUPS,
+               OPT_KILLCHILD,
+               OPT_KEEPCAPS,
        };
        static const struct option longopts[] = {
                { "help",          no_argument,       NULL, 'h'             },
@@ -291,39 +314,46 @@ int main(int argc, char *argv[])
                { "cgroup",        optional_argument, NULL, 'C'             },
 
                { "fork",          no_argument,       NULL, 'f'             },
+               { "kill-child",    optional_argument, NULL, OPT_KILLCHILD   },
                { "mount-proc",    optional_argument, NULL, OPT_MOUNTPROC   },
                { "map-root-user", no_argument,       NULL, 'r'             },
+               { "map-current-user", no_argument,    NULL, 'c'             },
                { "propagation",   required_argument, NULL, OPT_PROPAGATION },
                { "setgroups",     required_argument, NULL, OPT_SETGROUPS   },
+               { "keep-caps",     no_argument,       NULL, OPT_KEEPCAPS    },
+               { "setuid",        required_argument, NULL, 'S'             },
+               { "setgid",        required_argument, NULL, 'G'             },
+               { "root",          required_argument, NULL, 'R'             },
+               { "wd",            required_argument, NULL, 'w'             },
                { NULL, 0, NULL, 0 }
        };
 
        int setgrpcmd = SETGROUPS_NONE;
        int unshare_flags = 0;
-       int c, forkit = 0, maproot = 0;
+       int c, forkit = 0, mapuser = MAP_USER_NONE;
+       int kill_child_signo = 0; /* 0 means --kill-child was not used */
        const char *procmnt = NULL;
+       const char *newroot = NULL;
+       const char *newdir = NULL;
        pid_t pid = 0;
        int fds[2];
        int status;
        unsigned long propagation = UNSHARE_PROPAGATION_DEFAULT;
-       uid_t real_euid = geteuid();
-       gid_t real_egid = getegid();
+       int force_uid = 0, force_gid = 0;
+       uid_t uid = 0, real_euid = geteuid();
+       gid_t gid = 0, real_egid = getegid();
+       int keepcaps = 0;
 
        setlocale(LC_ALL, "");
        bindtextdomain(PACKAGE, LOCALEDIR);
        textdomain(PACKAGE);
-       atexit(close_stdout);
+       close_stdout_atexit();
 
-       while ((c = getopt_long(argc, argv, "+fhVmuinpCUr", longopts, NULL)) != -1) {
+       while ((c = getopt_long(argc, argv, "+fhVmuinpCUrR:w:S:G:c", longopts, NULL)) != -1) {
                switch (c) {
                case 'f':
                        forkit = 1;
                        break;
-               case 'h':
-                       usage();
-               case 'V':
-                       printf(UTIL_LINUX_VERSION);
-                       return EXIT_SUCCESS;
                case 'm':
                        unshare_flags |= CLONE_NEWNS;
                        if (optarg)
@@ -364,8 +394,20 @@ int main(int argc, char *argv[])
                        procmnt = optarg ? optarg : "/proc";
                        break;
                case 'r':
+                       if (mapuser == MAP_USER_CURRENT)
+                               errx(EXIT_FAILURE, _("options --map-root-user and "
+                                       "--map-current-user are mutually exclusive"));
+
+                       unshare_flags |= CLONE_NEWUSER;
+                       mapuser = MAP_USER_ROOT;
+                       break;
+               case 'c':
+                       if (mapuser == MAP_USER_ROOT)
+                               errx(EXIT_FAILURE, _("options --map-root-user and "
+                                       "--map-current-user are mutually exclusive"));
+
                        unshare_flags |= CLONE_NEWUSER;
-                       maproot = 1;
+                       mapuser = MAP_USER_CURRENT;
                        break;
                case OPT_SETGROUPS:
                        setgrpcmd = setgroups_str2id(optarg);
@@ -373,6 +415,39 @@ int main(int argc, char *argv[])
                case OPT_PROPAGATION:
                        propagation = parse_propagation(optarg);
                        break;
+               case OPT_KILLCHILD:
+                       forkit = 1;
+                       if (optarg) {
+                               if ((kill_child_signo = signame_to_signum(optarg)) < 0)
+                                       errx(EXIT_FAILURE, _("unknown signal: %s"),
+                                            optarg);
+                       } else {
+                               kill_child_signo = SIGKILL;
+                       }
+                       break;
+                case OPT_KEEPCAPS:
+                       keepcaps = 1;
+                       cap_last_cap(); /* Force last cap to be cached before we fork. */
+                       break;
+               case 'S':
+                       uid = strtoul_or_err(optarg, _("failed to parse uid"));
+                       force_uid = 1;
+                       break;
+               case 'G':
+                       gid = strtoul_or_err(optarg, _("failed to parse gid"));
+                       force_gid = 1;
+                       break;
+               case 'R':
+                       newroot = optarg;
+                       break;
+               case 'w':
+                       newdir = optarg;
+                       break;
+
+               case 'h':
+                       usage();
+               case 'V':
+                       print_version(EXIT_SUCCESS);
                default:
                        errtryhelp(EXIT_FAILURE);
                }
@@ -430,34 +505,106 @@ int main(int argc, char *argv[])
                }
        }
 
+       if (kill_child_signo != 0 && prctl(PR_SET_PDEATHSIG, kill_child_signo) < 0)
+               err(EXIT_FAILURE, "prctl failed");
 
-       if (maproot) {
+        /* Since Linux 3.19 unprivileged writing of /proc/self/gid_map
+         * has been disabled unless /proc/self/setgroups is written
+         * first to permanently disable the ability to call setgroups
+         * in that user namespace. */
+        switch (mapuser) {
+        case MAP_USER_ROOT:
                if (setgrpcmd == SETGROUPS_ALLOW)
                        errx(EXIT_FAILURE, _("options --setgroups=allow and "
                                        "--map-root-user are mutually exclusive"));
 
-               /* since Linux 3.19 unprivileged writing of /proc/self/gid_map
-                * has s been disabled unless /proc/self/setgroups is written
-                * first to permanently disable the ability to call setgroups
-                * in that user namespace. */
                setgroups_control(SETGROUPS_DENY);
                map_id(_PATH_PROC_UIDMAP, 0, real_euid);
                map_id(_PATH_PROC_GIDMAP, 0, real_egid);
+                break;
+        case MAP_USER_CURRENT:
+               if (setgrpcmd == SETGROUPS_ALLOW)
+                       errx(EXIT_FAILURE, _("options --setgroups=allow and "
+                                       "--map-current-user are mutually exclusive"));
 
-       } else if (setgrpcmd != SETGROUPS_NONE)
-               setgroups_control(setgrpcmd);
+               setgroups_control(SETGROUPS_DENY);
+               map_id(_PATH_PROC_UIDMAP, real_euid, real_euid);
+               map_id(_PATH_PROC_GIDMAP, real_egid, real_egid);
+                break;
+        case MAP_USER_NONE:
+               if (setgrpcmd != SETGROUPS_NONE)
+                       setgroups_control(setgrpcmd);
+        }
 
        if ((unshare_flags & CLONE_NEWNS) && propagation)
                set_propagation(propagation);
 
-       if (procmnt &&
-           (mount("none", procmnt, NULL, MS_PRIVATE|MS_REC, NULL) != 0 ||
-            mount("proc", procmnt, "proc", MS_NOSUID|MS_NOEXEC|MS_NODEV, NULL) != 0))
+       if (newroot) {
+               if (chroot(newroot) != 0)
+                       err(EXIT_FAILURE,
+                           _("cannot change root directory to '%s'"), newroot);
+               newdir = newdir ?: "/";
+       }
+       if (newdir && chdir(newdir))
+               err(EXIT_FAILURE, _("cannot chdir to '%s'"), newdir);
+
+       if (procmnt) {
+               if (!newroot && mount("none", procmnt, NULL, MS_PRIVATE|MS_REC, NULL) != 0)
+                       err(EXIT_FAILURE, _("umount %s failed"), procmnt);
+               if (mount("proc", procmnt, "proc", MS_NOSUID|MS_NOEXEC|MS_NODEV, NULL) != 0)
                        err(EXIT_FAILURE, _("mount %s failed"), procmnt);
+       }
+
+       if (force_gid) {
+               if (setgroups(0, NULL) != 0)    /* drop supplementary groups */
+                       err(EXIT_FAILURE, _("setgroups failed"));
+               if (setgid(gid) < 0)            /* change GID */
+                       err(EXIT_FAILURE, _("setgid failed"));
+       }
+       if (force_uid && setuid(uid) < 0)       /* change UID */
+               err(EXIT_FAILURE, _("setuid failed"));
+
+       /* We use capabilities system calls to propagate the permitted
+        * capabilities into the ambient set because we have already
+        * forked so are in async-signal-safe context. */
+       if (keepcaps && (unshare_flags & CLONE_NEWUSER)) {
+               struct __user_cap_header_struct header = {
+                       .version = _LINUX_CAPABILITY_VERSION_3,
+                       .pid = 0,
+               };
+
+               struct __user_cap_data_struct payload[_LINUX_CAPABILITY_U32S_3] = { 0 };
+               int cap;
+               uint64_t effective;
+
+               if (capget(&header, payload) < 0)
+                       err(EXIT_FAILURE, _("capget failed"));
+
+               /* In order the make capabilities ambient, we first need to ensure
+                * that they are all inheritable. */
+               payload[0].inheritable = payload[0].permitted;
+               payload[1].inheritable = payload[1].permitted;
+
+               if (capset(&header, payload) < 0)
+                       err(EXIT_FAILURE, _("capset failed"));
+
+               effective = ((uint64_t)payload[1].effective << 32) |  (uint64_t)payload[0].effective;
+
+               for (cap = 0; cap < 64; cap++) {
+                       /* This is the same check as cap_valid(), but using
+                        * the runtime value for the last valid cap. */
+                       if (cap > cap_last_cap())
+                               continue;
+
+                       if ((effective & (1 << cap))
+                           && prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE, cap, 0, 0) < 0)
+                                       err(EXIT_FAILURE, _("prctl(PR_CAP_AMBIENT) failed"));
+                }
+        }
 
        if (optind < argc) {
                execvp(argv[optind], argv + optind);
-               err(EXIT_FAILURE, _("failed to execute %s"), argv[optind]);
+               errexec(argv[optind]);
        }
        exec_shell();
 }