From 0d5260b66c5581c8a5855a5f49e298e48e8baf82 Mon Sep 17 00:00:00 2001 From: michael-dev Date: Wed, 15 Apr 2020 23:16:53 +0200 Subject: [PATCH] unshare: Fix PID and TIME namespace persistence After unshare(...) is called, /proc/self/ns/pid does not change. Instead, only /proc/self/ns/pid_for_children is affected. So bind-mounting /proc/self/ns/pid results in the original namespace getting bind-mounted. Fix this by instead bind-mounting ns/pid_for_children. [kzak@redhat.com: - add ns/time_for_children - remove C++ comments - resolve commit conflicts] Signed-off-by: Michael Braun Signed-off-by: Karel Zak --- sys-utils/unshare.1 | 4 +++ sys-utils/unshare.c | 68 ++++++++++++++++++++++++++------------------- 2 files changed, 43 insertions(+), 29 deletions(-) diff --git a/sys-utils/unshare.1 b/sys-utils/unshare.1 index 2e8d76c727..a58821921c 100644 --- a/sys-utils/unshare.1 +++ b/sys-utils/unshare.1 @@ -20,6 +20,10 @@ Once a persistent \%namespace is no longer needed, it can be unpersisted with .BR umount (8). See the \fBEXAMPLES\fR section for more details. .PP +.B unshare +since util-linux version 2.36 uses /\fIproc/[pid]/ns/pid_for_children\fP and \fI/proc/[pid]/ns/time_for_children\fP +files for persistent PID and TIME namespaces. This change requires Linux kernel 4.17 or newer. +.PP The namespaces to be unshared are indicated via options. Unshareable namespaces are: .TP .B mount namespace diff --git a/sys-utils/unshare.c b/sys-utils/unshare.c index 6211aacb5c..7005acfc45 100644 --- a/sys-utils/unshare.c +++ b/sys-utils/unshare.c @@ -64,9 +64,9 @@ static struct namespace_file { { .type = CLONE_NEWIPC, .name = "ns/ipc" }, { .type = CLONE_NEWUTS, .name = "ns/uts" }, { .type = CLONE_NEWNET, .name = "ns/net" }, - { .type = CLONE_NEWPID, .name = "ns/pid" }, + { .type = CLONE_NEWPID, .name = "ns/pid_for_children" }, { .type = CLONE_NEWNS, .name = "ns/mnt" }, - { .type = CLONE_NEWTIME, .name = "ns/time" }, + { .type = CLONE_NEWTIME, .name = "ns/time_for_children" }, { .name = NULL } }; @@ -400,6 +400,7 @@ int main(int argc, char *argv[]) const char *procmnt = NULL; const char *newroot = NULL; const char *newdir = NULL; + pid_t pid_bind = 0; pid_t pid = 0; int fds[2]; int status; @@ -542,13 +543,37 @@ int main(int argc, char *argv[]) "unsharing of a time namespace (-t)")); if (npersists && (unshare_flags & CLONE_NEWNS)) - bind_ns_files_from_child(&pid, fds); + bind_ns_files_from_child(&pid_bind, fds); if (-1 == unshare(unshare_flags)) err(EXIT_FAILURE, _("unshare failed")); - if (npersists) { - if (pid && (unshare_flags & CLONE_NEWNS)) { + if (force_boottime) + settime(boottime, CLOCK_BOOTTIME); + + if (force_monotonic) + settime(monotonic, CLOCK_MONOTONIC); + + if (forkit) { + /* force child forking before mountspace binding + * so pid_for_children is populated */ + pid = fork(); + + switch(pid) { + case -1: + err(EXIT_FAILURE, _("fork failed")); + case 0: /* child */ + if (pid_bind && (unshare_flags & CLONE_NEWNS)) + close(fds[1]); + break; + default: /* parent */ + break; + } + } + + if (npersists && (pid || !forkit)) { + /* run in parent */ + if (pid_bind && (unshare_flags & CLONE_NEWNS)) { int rc; char ch = PIPE_SYNC_BYTE; @@ -559,7 +584,7 @@ int main(int argc, char *argv[]) /* wait for bind_ns_files_from_child() */ do { - rc = waitpid(pid, &status, 0); + rc = waitpid(pid_bind, &status, 0); if (rc < 0) { if (errno == EINTR) continue; @@ -574,29 +599,14 @@ int main(int argc, char *argv[]) bind_ns_files(getpid()); } - if (force_boottime) - settime(boottime, CLOCK_BOOTTIME); - - if (force_monotonic) - settime(monotonic, CLOCK_MONOTONIC); - - if (forkit) { - pid = fork(); - - switch(pid) { - case -1: - err(EXIT_FAILURE, _("fork failed")); - case 0: /* child */ - break; - default: /* parent */ - if (waitpid(pid, &status, 0) == -1) - err(EXIT_FAILURE, _("waitpid failed")); - if (WIFEXITED(status)) - return WEXITSTATUS(status); - if (WIFSIGNALED(status)) - kill(getpid(), WTERMSIG(status)); - err(EXIT_FAILURE, _("child exit failed")); - } + if (pid) { + if (waitpid(pid, &status, 0) == -1) + err(EXIT_FAILURE, _("waitpid failed")); + if (WIFEXITED(status)) + return WEXITSTATUS(status); + if (WIFSIGNALED(status)) + kill(getpid(), WTERMSIG(status)); + err(EXIT_FAILURE, _("child exit failed")); } if (kill_child_signo != 0 && prctl(PR_SET_PDEATHSIG, kill_child_signo) < 0) -- 2.47.2