2 * SPDX-License-Identifier: GPL-2.0
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms of the GNU General Public License as published by the
6 * Free Software Foundation; version 2.
8 * Copyright (C) 2012-2023 Eric Biederman <ebiederm@xmission.com>
10 * nsenter(1) - command-line interface for setns(2)
21 #include <sys/types.h>
25 #include <sys/statfs.h>
27 #include <sys/ioctl.h>
28 #ifdef HAVE_LINUX_NSFS_H
29 # include <linux/nsfs.h>
32 # define NS_GET_USERNS _IO(0xb7, 0x1)
35 #ifdef HAVE_LIBSELINUX
36 # include <selinux/selinux.h>
42 #include "closestream.h"
43 #include "namespace.h"
44 #include "exec_shell.h"
50 #include "statfs_magic.h"
51 #include "pathnames.h"
53 static struct namespace_file
{
57 } namespace_files
[] = {
58 /* Careful the order is significant in this array.
60 * The user namespace comes either first or last: first if
61 * you're using it to increase your privilege and last if
62 * you're using it to decrease. We enter the namespaces in
63 * two passes starting initially from offset 1 and then offset
66 { .nstype
= CLONE_NEWUSER
, .name
= "ns/user", .fd
= -1 },
67 { .nstype
= CLONE_NEWCGROUP
,.name
= "ns/cgroup", .fd
= -1 },
68 { .nstype
= CLONE_NEWIPC
, .name
= "ns/ipc", .fd
= -1 },
69 { .nstype
= CLONE_NEWUTS
, .name
= "ns/uts", .fd
= -1 },
70 { .nstype
= CLONE_NEWNET
, .name
= "ns/net", .fd
= -1 },
71 { .nstype
= CLONE_NEWPID
, .name
= "ns/pid", .fd
= -1 },
72 { .nstype
= CLONE_NEWNS
, .name
= "ns/mnt", .fd
= -1 },
73 { .nstype
= CLONE_NEWTIME
, .name
= "ns/time", .fd
= -1 },
74 { .nstype
= 0, .name
= NULL
, .fd
= -1 }
77 static void __attribute__((__noreturn__
)) usage(void)
81 fputs(USAGE_HEADER
, out
);
82 fprintf(out
, _(" %s [options] [<program> [<argument>...]]\n"),
83 program_invocation_short_name
);
85 fputs(USAGE_SEPARATOR
, out
);
86 fputs(_("Run a program with namespaces of other processes.\n"), out
);
88 fputs(USAGE_OPTIONS
, out
);
89 fputs(_(" -a, --all enter all namespaces\n"), out
);
90 fputs(_(" -t, --target <pid> target process to get namespaces from\n"), out
);
91 fputs(_(" -m, --mount[=<file>] enter mount namespace\n"), out
);
92 fputs(_(" -u, --uts[=<file>] enter UTS namespace (hostname etc)\n"), out
);
93 fputs(_(" -i, --ipc[=<file>] enter System V IPC namespace\n"), out
);
94 fputs(_(" -n, --net[=<file>] enter network namespace\n"), out
);
95 fputs(_(" -p, --pid[=<file>] enter pid namespace\n"), out
);
96 fputs(_(" -C, --cgroup[=<file>] enter cgroup namespace\n"), out
);
97 fputs(_(" -U, --user[=<file>] enter user namespace\n"), out
);
98 fputs(_(" --user-parent enter parent user namespace\n"), out
);
99 fputs(_(" -T, --time[=<file>] enter time namespace\n"), out
);
100 fputs(_(" -S, --setuid[=<uid>] set uid in entered namespace\n"), out
);
101 fputs(_(" -G, --setgid[=<gid>] set gid in entered namespace\n"), out
);
102 fputs(_(" --preserve-credentials do not touch uids or gids\n"), out
);
103 fputs(_(" --keep-caps retain capabilities granted in user namespaces\n"), out
);
104 fputs(_(" -r, --root[=<dir>] set the root directory\n"), out
);
105 fputs(_(" -w, --wd[=<dir>] set the working directory\n"), out
);
106 fputs(_(" -W, --wdns <dir> set the working directory in namespace\n"), out
);
107 fputs(_(" -e, --env inherit environment variables from target process\n"), out
);
108 fputs(_(" -F, --no-fork do not fork before exec'ing <program>\n"), out
);
109 fputs(_(" -c, --join-cgroup join the cgroup of the target process\n"), out
);
110 #ifdef HAVE_LIBSELINUX
111 fputs(_(" -Z, --follow-context set SELinux context according to --target PID\n"), out
);
114 fputs(USAGE_SEPARATOR
, out
);
115 fprintf(out
, USAGE_HELP_OPTIONS(24));
116 fprintf(out
, USAGE_MAN_TAIL("nsenter(1)"));
121 static pid_t namespace_target_pid
= 0;
122 static int root_fd
= -1;
123 static int wd_fd
= -1;
124 static int env_fd
= -1;
125 static int uid_gid_fd
= -1;
126 static int cgroup_procs_fd
= -1;
128 static void set_parent_user_ns_fd(void)
130 struct namespace_file
*nsfile
= NULL
;
131 struct namespace_file
*user_nsfile
= NULL
;
134 for (nsfile
= namespace_files
; nsfile
->nstype
; nsfile
++) {
135 if (nsfile
->nstype
== CLONE_NEWUSER
)
136 user_nsfile
= nsfile
;
138 if (nsfile
->fd
== -1)
141 parent_ns
= ioctl(nsfile
->fd
, NS_GET_USERNS
);
143 err(EXIT_FAILURE
, _("failed to open parent ns of %s"), nsfile
->name
);
149 errx(EXIT_FAILURE
, _("no namespaces to get parent of"));
151 user_nsfile
->fd
= parent_ns
;
155 static void open_target_fd(int *fd
, const char *type
, const char *path
)
157 char pathbuf
[PATH_MAX
];
159 if (!path
&& namespace_target_pid
) {
160 snprintf(pathbuf
, sizeof(pathbuf
), "/proc/%u/%s",
161 namespace_target_pid
, type
);
166 _("neither filename nor target pid supplied for %s"),
172 *fd
= open(path
, O_RDONLY
);
174 err(EXIT_FAILURE
, _("cannot open %s"), path
);
177 static void open_namespace_fd(int nstype
, const char *path
)
179 struct namespace_file
*nsfile
;
181 for (nsfile
= namespace_files
; nsfile
->nstype
; nsfile
++) {
182 if (nstype
!= nsfile
->nstype
)
185 open_target_fd(&nsfile
->fd
, nsfile
->name
, path
);
188 /* This should never happen */
189 assert(nsfile
->nstype
);
192 static int get_ns_ino(const char *path
, ino_t
*ino
)
196 if (stat(path
, &st
) != 0)
202 static void open_cgroup_procs(void)
204 char *buf
= NULL
, *path
= NULL
, *p
;
206 char fdpath
[PATH_MAX
];
208 open_target_fd(&cgroup_fd
, "cgroup", optarg
);
210 if (read_all_alloc(cgroup_fd
, &buf
) < 1)
211 err(EXIT_FAILURE
, _("failed to get cgroup path"));
213 p
= strtok(buf
, "\n");
215 path
= strrchr(p
, ':');
217 err(EXIT_FAILURE
, _("failed to get cgroup path"));
220 snprintf(fdpath
, sizeof(fdpath
), _PATH_SYS_CGROUP
"/%s/cgroup.procs", path
);
222 if ((cgroup_procs_fd
= open(fdpath
, O_WRONLY
| O_APPEND
)) < 0)
223 err(EXIT_FAILURE
, _("failed to open cgroup.procs"));
228 static int is_cgroup2(void)
230 struct statfs fs_stat
;
233 rc
= statfs(_PATH_SYS_CGROUP
, &fs_stat
);
235 err(EXIT_FAILURE
, _("statfs %s failed"), _PATH_SYS_CGROUP
);
236 return F_TYPE_EQUAL(fs_stat
.f_type
, STATFS_CGROUP2_MAGIC
);
239 static void join_into_cgroup(void)
242 char buf
[ sizeof(stringify_value(UINT32_MAX
)) ];
246 len
= snprintf(buf
, sizeof(buf
), "%zu", (size_t) pid
);
247 if (write_all(cgroup_procs_fd
, buf
, len
))
248 err(EXIT_FAILURE
, _("write cgroup.procs failed"));
251 static int is_usable_namespace(pid_t target
, const struct namespace_file
*nsfile
)
257 /* Check NS accessibility */
258 snprintf(path
, sizeof(path
), "/proc/%u/%s", getpid(), nsfile
->name
);
259 rc
= get_ns_ino(path
, &my_ino
);
261 return false; /* Unsupported NS */
263 /* It is not permitted to use setns(2) to reenter the caller's
264 * current user namespace; see setns(2) man page for more details.
266 if (nsfile
->nstype
& CLONE_NEWUSER
) {
267 ino_t target_ino
= 0;
269 snprintf(path
, sizeof(path
), "/proc/%u/%s", target
, nsfile
->name
);
270 if (get_ns_ino(path
, &target_ino
) != 0)
271 err(EXIT_FAILURE
, _("stat of %s failed"), path
);
273 if (my_ino
== target_ino
)
277 return true; /* All pass */
280 static void continue_as_child(void)
286 /* Clear any inherited settings */
287 signal(SIGCHLD
, SIG_DFL
);
291 err(EXIT_FAILURE
, _("fork failed"));
293 /* Only the child returns */
298 ret
= waitpid(child
, &status
, WUNTRACED
);
299 if ((ret
== child
) && (WIFSTOPPED(status
))) {
300 /* The child suspended so suspend us as well */
301 kill(getpid(), SIGSTOP
);
302 kill(child
, SIGCONT
);
307 /* Return the child's exit code if possible */
308 if (WIFEXITED(status
)) {
309 exit(WEXITSTATUS(status
));
310 } else if (WIFSIGNALED(status
)) {
311 kill(getpid(), WTERMSIG(status
));
316 int main(int argc
, char *argv
[])
319 OPT_PRESERVE_CRED
= CHAR_MAX
+ 1,
323 static const struct option longopts
[] = {
324 { "all", no_argument
, NULL
, 'a' },
325 { "help", no_argument
, NULL
, 'h' },
326 { "version", no_argument
, NULL
, 'V'},
327 { "target", required_argument
, NULL
, 't' },
328 { "mount", optional_argument
, NULL
, 'm' },
329 { "uts", optional_argument
, NULL
, 'u' },
330 { "ipc", optional_argument
, NULL
, 'i' },
331 { "net", optional_argument
, NULL
, 'n' },
332 { "pid", optional_argument
, NULL
, 'p' },
333 { "user", optional_argument
, NULL
, 'U' },
334 { "cgroup", optional_argument
, NULL
, 'C' },
335 { "time", optional_argument
, NULL
, 'T' },
336 { "setuid", required_argument
, NULL
, 'S' },
337 { "setgid", required_argument
, NULL
, 'G' },
338 { "root", optional_argument
, NULL
, 'r' },
339 { "wd", optional_argument
, NULL
, 'w' },
340 { "wdns", optional_argument
, NULL
, 'W' },
341 { "env", no_argument
, NULL
, 'e' },
342 { "no-fork", no_argument
, NULL
, 'F' },
343 { "join-cgroup", no_argument
, NULL
, 'c'},
344 { "preserve-credentials", no_argument
, NULL
, OPT_PRESERVE_CRED
},
345 { "keep-caps", no_argument
, NULL
, OPT_KEEPCAPS
},
346 { "user-parent", no_argument
, NULL
, OPT_USER_PARENT
},
347 #ifdef HAVE_LIBSELINUX
348 { "follow-context", no_argument
, NULL
, 'Z' },
352 static const ul_excl_t excl
[] = { /* rows and cols in ASCII order */
356 int excl_st
[ARRAY_SIZE(excl
)] = UL_EXCL_STATUS_INIT
;
358 struct namespace_file
*nsfile
;
359 int c
, pass
, namespaces
= 0, setgroups_nerrs
= 0, preserve_cred
= 0;
360 bool do_rd
= false, do_wd
= false, do_uid
= false, force_uid
= false,
361 do_gid
= false, force_gid
= false, do_env
= false, do_all
= false,
362 do_join_cgroup
= false, do_user_parent
= false;
363 int do_fork
= -1; /* unknown yet */
368 struct ul_env_list
*envls
;
369 #ifdef HAVE_LIBSELINUX
373 setlocale(LC_ALL
, "");
374 bindtextdomain(PACKAGE
, LOCALEDIR
);
376 close_stdout_atexit();
379 getopt_long(argc
, argv
, "+ahVt:m::u::i::n::p::C::U::T::S:G:r::w::W::ecFZ",
380 longopts
, NULL
)) != -1) {
382 err_exclusive_options(c
, longopts
, excl
, excl_st
);
389 namespace_target_pid
=
390 strtoul_or_err(optarg
, _("failed to parse pid"));
394 open_namespace_fd(CLONE_NEWNS
, optarg
);
396 namespaces
|= CLONE_NEWNS
;
400 open_namespace_fd(CLONE_NEWUTS
, optarg
);
402 namespaces
|= CLONE_NEWUTS
;
406 open_namespace_fd(CLONE_NEWIPC
, optarg
);
408 namespaces
|= CLONE_NEWIPC
;
412 open_namespace_fd(CLONE_NEWNET
, optarg
);
414 namespaces
|= CLONE_NEWNET
;
418 open_namespace_fd(CLONE_NEWPID
, optarg
);
420 namespaces
|= CLONE_NEWPID
;
424 open_namespace_fd(CLONE_NEWCGROUP
, optarg
);
426 namespaces
|= CLONE_NEWCGROUP
;
430 open_namespace_fd(CLONE_NEWUSER
, optarg
);
432 namespaces
|= CLONE_NEWUSER
;
436 open_namespace_fd(CLONE_NEWTIME
, optarg
);
438 namespaces
|= CLONE_NEWTIME
;
441 if (strcmp(optarg
, "follow") == 0)
444 uid
= strtoul_or_err(optarg
, _("failed to parse uid"));
448 if (strcmp(optarg
, "follow") == 0)
451 gid
= strtoul_or_err(optarg
, _("failed to parse gid"));
458 do_join_cgroup
= true;
462 open_target_fd(&root_fd
, "root", optarg
);
468 open_target_fd(&wd_fd
, "cwd", optarg
);
478 case OPT_PRESERVE_CRED
:
484 case OPT_USER_PARENT
:
485 do_user_parent
= true;
487 #ifdef HAVE_LIBSELINUX
495 print_version(EXIT_SUCCESS
);
497 errtryhelp(EXIT_FAILURE
);
501 #ifdef HAVE_LIBSELINUX
502 if (selinux
&& is_selinux_enabled() > 0) {
505 if (!namespace_target_pid
)
506 errx(EXIT_FAILURE
, _("no target PID specified for --follow-context"));
507 if (getpidcon(namespace_target_pid
, &scon
) < 0)
508 errx(EXIT_FAILURE
, _("failed to get %d SELinux context"),
509 (int) namespace_target_pid
);
510 if (setexeccon(scon
) < 0)
511 errx(EXIT_FAILURE
, _("failed to set exec context to '%s'"), scon
);
517 if (!namespace_target_pid
)
518 errx(EXIT_FAILURE
, _("no target PID specified for --all"));
519 for (nsfile
= namespace_files
; nsfile
->nstype
; nsfile
++) {
521 continue; /* namespace already specified */
523 if (!is_usable_namespace(namespace_target_pid
, nsfile
))
526 namespaces
|= nsfile
->nstype
;
531 * Open remaining namespace and directory descriptors.
533 for (nsfile
= namespace_files
; nsfile
->nstype
; nsfile
++)
534 if (nsfile
->nstype
& namespaces
)
535 open_namespace_fd(nsfile
->nstype
, NULL
);
537 open_target_fd(&root_fd
, "root", NULL
);
539 open_target_fd(&wd_fd
, "cwd", NULL
);
541 open_target_fd(&env_fd
, "environ", NULL
);
542 if (do_uid
|| do_gid
)
543 open_target_fd(&uid_gid_fd
, "", NULL
);
544 if (do_join_cgroup
) {
546 errx(EXIT_FAILURE
, _("--join-cgroup is only supported in cgroup v2"));
551 * Get parent userns from any available ns.
554 set_parent_user_ns_fd();
557 * Update namespaces variable to contain all requested namespaces
559 for (nsfile
= namespace_files
; nsfile
->nstype
; nsfile
++) {
562 namespaces
|= nsfile
->nstype
;
565 /* for user namespaces we always set UID and GID (default is 0)
566 * and clear root's groups if --preserve-credentials is no specified */
567 if ((namespaces
& CLONE_NEWUSER
) && !preserve_cred
) {
568 force_uid
= true, force_gid
= true;
570 /* We call setgroups() before and after we enter user namespace,
571 * let's complain only if both fail */
572 if (setgroups(0, NULL
) != 0)
577 * Now that we know which namespaces we want to enter, enter
578 * them. Do this in two passes, not entering the user
579 * namespace on the first pass. So if we're deprivileging the
580 * container we'll enter the user namespace last and if we're
581 * privileging it then we enter the user namespace first
582 * (because the initial setns will fail).
584 for (pass
= 0; pass
< 2; pass
++) {
585 for (nsfile
= namespace_files
+ 1 - pass
; nsfile
->nstype
; nsfile
++) {
588 if (nsfile
->nstype
== CLONE_NEWPID
&& do_fork
== -1)
590 if (setns(nsfile
->fd
, nsfile
->nstype
)) {
593 _("reassociate to namespace '%s' failed"),
604 /* Remember the current working directory if I'm not changing it */
605 if (root_fd
>= 0 && wd_fd
< 0 && wdns
== NULL
) {
606 wd_fd
= open(".", O_RDONLY
);
609 _("cannot open current working directory"));
612 /* Change the root directory */
614 if (fchdir(root_fd
) < 0)
616 _("change directory by root file descriptor failed"));
619 err(EXIT_FAILURE
, _("chroot failed"));
621 err(EXIT_FAILURE
, _("cannot change directory to %s"), "/");
627 /* working directory specified as in-namespace path */
629 wd_fd
= open(wdns
, O_RDONLY
);
632 _("cannot open current working directory"));
635 /* Change the working directory */
637 if (fchdir(wd_fd
) < 0)
639 _("change directory by working directory file descriptor failed"));
645 /* Pass environment variables of the target process to the spawned process */
647 if ((envls
= env_from_fd(env_fd
)) == NULL
)
648 err(EXIT_FAILURE
, _("failed to get environment variables"));
650 if (env_list_setenv(envls
) < 0)
651 err(EXIT_FAILURE
, _("failed to set environment variables"));
652 env_list_free(envls
);
656 // Join into the target cgroup
657 if (cgroup_procs_fd
>= 0)
660 if (uid_gid_fd
>= 0) {
663 if (fstat(uid_gid_fd
, &st
) > 0)
664 err(EXIT_FAILURE
, _("can not get process stat"));
678 if (force_uid
|| force_gid
) {
679 if (force_gid
&& setgroups(0, NULL
) != 0 && setgroups_nerrs
) /* drop supplementary groups */
680 err(EXIT_FAILURE
, _("setgroups failed"));
681 if (force_gid
&& setgid(gid
) < 0) /* change GID */
682 err(EXIT_FAILURE
, _("setgid failed"));
683 if (force_uid
&& setuid(uid
) < 0) /* change UID */
684 err(EXIT_FAILURE
, _("setuid failed"));
687 if (keepcaps
&& (namespaces
& CLONE_NEWUSER
))
688 cap_permitted_to_ambient();
691 execvp(argv
[optind
], argv
+ optind
);
692 errexec(argv
[optind
]);