console_codes.4: Document that \e[1;n] and \e[2;n] support 16 colors

[thirdparty/man-pages.git] / man7 / user_namespaces.7
diff --git a/man7/user_namespaces.7 b/man7/user_namespaces.7

index 5af09c48eeb4f67c472df3a94fa833dd17cc6887..49e9a5eed734fca3159cd18e6f3ea6b508869bef 100644 (file)
--- a/man7/user_namespaces.7
+++ b/man7/user_namespaces.7
@@ -24,20 +24,20 @@
  .\" %%%LICENSE_END
  .\"
  .\"
-.TH USER_NAMESPACES 7 2016-07-17 "Linux" "Linux Programmer's Manual"
+.TH USER_NAMESPACES 7 2019-03-06 "Linux" "Linux Programmer's Manual"
  .SH NAME
  user_namespaces \- overview of Linux user namespaces
  .SH DESCRIPTION
  For an overview of namespaces, see
  .BR namespaces (7).
-
+.PP
  User namespaces isolate security-related identifiers and attributes,
  in particular,
  user IDs and group IDs (see
  .BR credentials (7)),
  the root directory,
  keys (see
-.BR keyctl (2)),
+.BR keyrings (7)),
  .\" FIXME: This page says very little about the interaction
  .\" of user namespaces and keys. Add something on this topic.
  and capabilities (see
@@ -66,7 +66,7 @@ or
  with the
  .BR CLONE_NEWUSER
  flag.
-
+.PP
  The kernel imposes (since version 3.11) a limit of 32 nested levels of
  .\" commit 8742f229b635bf1c1c84a3dfe5e47c814c20b5c8
  user namespaces.
@@ -77,7 +77,7 @@ or
  .BR clone (2)
  that would cause this limit to be exceeded fail with the error
  .BR EUSERS .
-
+.PP
  Each process is a member of exactly one user namespace.
  A process created via
  .BR fork (2)
@@ -92,7 +92,7 @@ if it has the
  .BR CAP_SYS_ADMIN
  in that namespace;
  upon doing so, it gains a full set of capabilities in that namespace.
-
+.PP
  A call to
  .BR clone (2)
  or
@@ -104,13 +104,13 @@ flag makes the new child process (for
  or the caller (for
  .BR unshare (2))
  a member of the new user namespace created by the call.
-
+.PP
  The
  .BR NS_GET_PARENT
  .BR ioctl (2)
  operation can be used to discover the parental relationship
  between user namespaces; see
-.BR namespaces (7).
+.BR ioctl_ns (2).
  .\"
  .\" ============================================================
  .\"
@@ -136,7 +136,7 @@ and
  user namespace,
  even if the new namespace is created or joined by the root user
  (i.e., a process with user ID 0 in the root namespace).
-
+.PP
  Note that a call to
  .BR execve (2)
  will cause a process's capabilities to be recalculated in the usual way (see
@@ -146,7 +146,7 @@ unless the process has a user ID of 0 within the namespace,
  or the executable file has a nonempty inheritable capabilities mask,
  the process will lose all capabilities.
  See the discussion of user and group ID mappings, below.
-
+.PP
  A call to
  .BR clone (2),
  .BR unshare (2),
@@ -171,7 +171,7 @@ retaining its user namespace membership by using a pair of
  .BR setns (2)
  calls to move to another user namespace and then return to
  its original user namespace.
-
+.PP
  The rules for determining whether or not a process has a capability
  in a particular user namespace are as follows:
  .IP 1. 3
@@ -212,6 +212,12 @@ has all capabilities in the namespace.
  By virtue of the previous rule,
  this means that the process has all capabilities in all
  further removed descendant user namespaces as well.
+The
+.B NS_GET_OWNER_UID
+.BR ioctl (2)
+operation can be used to discover the user ID of the owner of the namespace;
+see
+.BR ioctl_ns (2).
  .\"
  .\" ============================================================
  .\"
@@ -221,8 +227,9 @@ permits a process to perform operations (that require privilege)
  only on resources governed by that namespace.
  In other words, having a capability in a user namespace permits a process
  to perform privileged operations on resources that are governed by (nonuser)
-namespaces associated with the user namespace (see the next subsection).
-
+namespaces owned by (associated with) the user namespace
+(see the next subsection).
+.PP
  On the other hand, there are many privileged operations that affect
  resources that are not associated with any namespace type,
  for example, changing the system time (governed by
@@ -234,14 +241,14 @@ and creating a device (governed by
  Only a process with privileges in the
  .I initial
  user namespace can perform such operations.
-
+.PP
  Holding
  .B CAP_SYS_ADMIN
-within the user namespace associated with a process's mount namespace
+within the user namespace that owns a process's mount namespace
  allows that process to create bind mounts
  and mount the following types of filesystems:
  .\" fs_flags = FS_USERNS_MOUNT in kernel sources
-
+.PP
  .RS 4
  .PD 0
  .IP * 2
@@ -271,22 +278,22 @@ and mount the following types of filesystems:
  .PP
  Holding
  .B CAP_SYS_ADMIN
-within the user namespace associated with a process's cgroup namespace
+within the user namespace that owns a process's cgroup namespace
  allows (since Linux 4.6)
-that process to the mount cgroup version 2 filesystem and
+that process to the mount the cgroup version 2 filesystem and
  cgroup version 1 named hierarchies
  (i.e., cgroup filesystems mounted with the
-.BR """none,name="""
+.IR """none,name="""
  option).
-
+.PP
  Holding
  .B CAP_SYS_ADMIN
-within the user namespace associated with a process's PID namespace
+within the user namespace that owns a process's PID namespace
  allows (since Linux 3.8)
  that process to mount
  .I /proc
  filesystems.
-
+.PP
  Note however, that mounting block-based filesystems can be done
  only by a process that holds
  .BR CAP_SYS_ADMIN
@@ -296,16 +303,16 @@ in the initial user namespace.
  .\"
  .SS Interaction of user namespaces and other types of namespaces
  Starting in Linux 3.8, unprivileged processes can create user namespaces,
-and other the other types of namespaces can be created with just the
+and the other types of namespaces can be created with just the
  .B CAP_SYS_ADMIN
  capability in the caller's user namespace.
-
-When a non-user-namespace is created,
+.PP
+When a nonuser namespace is created,
  it is owned by the user namespace in which the creating process
  was a member at the time of the creation of the namespace.
-Actions on the non-user-namespace
+Actions on the nonuser namespace
  require capabilities in the corresponding user namespace.
-
+.PP
  If
  .BR CLONE_NEWUSER
  is specified along with other
@@ -322,12 +329,12 @@ or caller
  privileges over the remaining namespaces created by the call.
  Thus, it is possible for an unprivileged caller to specify this combination
  of flags.
-
+.PP
  When a new namespace (other than a user namespace) is created via
  .BR clone (2)
  or
  .BR unshare (2),
-the kernel records the user namespace of the creating process against
+the kernel records the user namespace of the creating process as the owner of
  the new namespace.
  (This association can't be changed.)
  When a process in the new namespace subsequently performs
@@ -339,18 +346,18 @@ For example, suppose that a process attempts to change the hostname
  .RB ( sethostname (2)),
  a resource governed by the UTS namespace.
  In this case,
-the kernel will determine which user namespace is associated with
+the kernel will determine which user namespace owns
  the process's UTS namespace, and check whether the process has the
  required capability
  .RB ( CAP_SYS_ADMIN )
  in that user namespace.
-
+.PP
  The
  .BR NS_GET_USERNS
  .BR ioctl (2)
-operation can be used to discover the user namespace with which
-a non-user namespace is associated; see
-.BR namespaces (7).
+operation can be used to discover the user namespace
+that owns a nonuser namespace; see
+.BR ioctl_ns (2).
  .\"
  .\" ============================================================
  .\"
@@ -369,13 +376,13 @@ inside the user namespace for the process
  .IR pid .
  These files can be read to view the mappings in a user namespace and
  written to (once) to define the mappings.
-
+.PP
  The description in the following paragraphs explains the details for
  .IR uid_map ;
  .IR gid_map
  is exactly the same,
  but each instance of "user ID" is replaced by "group ID".
-
+.PP
  The
  .I uid_map
  file exposes the mapping of user IDs from the user namespace
@@ -389,7 +396,7 @@ will potentially see different values when reading from a particular
  .I uid_map
  file, depending on the user ID mappings for the user namespaces
  of the reading processes.
-
+.PP
  Each line in the
  .I uid_map
  file specifies a 1-to-1 mapping of a range of contiguous
@@ -441,7 +448,7 @@ System calls that return user IDs (group IDs)\(emfor example,
  and the credential fields in the structure returned by
  .BR stat (2)\(emreturn
  the user ID (group ID) mapped into the caller's user namespace.
-
+.PP
  When a process accesses a file, its user and group IDs
  are mapped into the initial user namespace for the purpose of permission
  checking and assigning IDs when creating a file.
@@ -449,7 +456,7 @@ When a process retrieves file user and group IDs via
  .BR stat (2),
  the IDs are mapped in the opposite direction,
  to produce values relative to the process user and group ID mappings.
-
+.PP
  The initial user namespace has no parent namespace,
  but, for consistency, the kernel provides dummy user and group
  ID mapping files for this namespace.
@@ -458,14 +465,14 @@ Looking at the
  file
  .RI ( gid_map
  is the same) from a shell in the initial namespace shows:
-
+.PP
  .in +4n
-.nf
+.EX
  $ \fBcat /proc/$$/uid_map\fP
           0          0 4294967295
-.fi
+.EE
  .in
-
+.PP
  This mapping tells us
  that the range starting at user ID 0 in this namespace
  maps to a range starting at 0 in the (nonexistent) parent namespace,
@@ -499,7 +506,7 @@ file in a user namespace fails with the error
  Similar rules apply for
  .I gid_map
  files.
-
+.PP
  The lines written to
  .IR uid_map
  .RI ( gid_map )
@@ -510,13 +517,15 @@ and the last field must be greater than 0.
  .IP *
  Lines are terminated by newline characters.
  .IP *
-There is an (arbitrary) limit on the number of lines in the file.
-As at Linux 3.18, the limit is five lines.
+There is a limit on the number of lines in the file.
+In Linux 4.14 and earlier, this limit was (arbitrarily)
+.\" 5*12-byte records could fit in a 64B cache line
+set at 5 lines.
+Since Linux 4.15,
+.\" commit 6397fac4915ab3002dc15aae751455da1a852f25
+the limit is 340 lines.
  In addition, the number of bytes written to
  the file must be less than the system page size,
-.\" FIXME(Eric): the restriction "less than" rather than "less than or equal"
-.\" seems strangely arbitrary. Furthermore, the comment does not agree
-.\" with the code in kernel/user_namespace.c. Which is correct?
  and the write must be performed at the start of the file (i.e.,
  .BR lseek (2)
  and
@@ -540,7 +549,7 @@ At least one line must be written to the file.
  .PP
  Writes that violate the above rules fail with the error
  .BR EINVAL .
-
+.PP
  In order for a process to write to the
  .I /proc/[pid]/uid_map
  .RI ( /proc/[pid]/gid_map )
@@ -623,7 +632,7 @@ and
  .I gid_map
  files have been written, only the mapped values may be used in
  system calls that change user and group IDs.
-
+.PP
  For user IDs, the relevant system calls include
  .BR setuid (2),
  .BR setfsuid (2),
@@ -637,7 +646,7 @@ For group IDs, the relevant system calls include
  .BR setresgid (2),
  and
  .BR setgroups (2).
-
+.PP
  Writing
  .RI \(dq deny \(dq
  to the
@@ -685,7 +694,7 @@ file (and regardless of the process's capabilities), calls to
  are also not permitted if
  .IR /proc/[pid]/gid_map
  has not yet been set.
-
+.PP
  A privileged process (one with the
  .BR CAP_SYS_ADMIN
  capability in the namespace) may write either of the strings
@@ -701,7 +710,7 @@ Writing the string
  .RI \(dq deny \(dq
  prevents any process in the user namespace from employing
  .BR setgroups (2).
-
+.PP
  The essence of the restrictions described in the preceding
  paragraph is that it is permitted to write to
  .I /proc/[pid]/setgroups
@@ -720,10 +729,10 @@ a process can transition only from
  being disallowed to
  .BR setgroups (2)
  being allowed.
-
+.PP
  The default value of this file in the initial user namespace is
  .RI \(dq allow \(dq.
-
+.PP
  Once
  .IR /proc/[pid]/gid_map
  has been written to
@@ -738,11 +747,11 @@ to
  .IR /proc/[pid]/setgroups
  (the write fails with the error
  .BR EPERM ).
-
+.PP
  A child user namespace inherits the
  .IR /proc/[pid]/setgroups
  setting from its parent.
-
+.PP
  If the
  .I setgroups
  file has the value
@@ -752,11 +761,11 @@ then the
  system call can't subsequently be reenabled (by writing
  .RI \(dq allow \(dq
  to the file) in this user namespace.
-(Attempts to do so will fail with the error
+(Attempts to do so fail with the error
  .BR EPERM .)
  This restriction also propagates down to all child user namespaces of
  this user namespace.
-
+.PP
  The
  .I /proc/[pid]/setgroups
  file was added in Linux 3.19,
@@ -790,7 +799,7 @@ by denying any pathway for an unprivileged process to drop groups with
  .\"    * Must write BEFORE writing to /proc/PID/gid_map
  .\"
  .\" setgroups()
-.\"    * Must already have written to gid_maps
+.\"    * Must already have written to gid_map
  .\"    * /proc/PID/setgroups must be "allow"
  .\"
  .\" /proc/PID/gid_map -- writing
@@ -815,7 +824,7 @@ and
  .IR /proc/sys/kernel/overflowgid
  in
  .BR proc (5).
-
+.PP
  The cases where unmapped IDs are mapped in this fashion include
  system calls that return user IDs
  .RB ( getuid (2),
@@ -843,7 +852,7 @@ credentials written to the process accounting file (see
  .BR acct (5)),
  and credentials returned with POSIX message queue notifications (see
  .BR mq_notify (3)).
-
+.PP
  There is one notable case where unmapped user and group IDs are
  .I not
  .\" from_kuid(), from_kgid()
@@ -854,7 +863,7 @@ When viewing a
  or
  .I gid_map
  file in which there is no mapping for the second field,
-that field is displayed as 4294967295 (\-1 as an unsigned integer);
+that field is displayed as 4294967295 (\-1 as an unsigned integer).
  .\"
  .\" ============================================================
  .\"
@@ -909,7 +918,7 @@ User namespaces require support in a range of subsystems across
  the kernel.
  When an unsupported subsystem is configured into the kernel,
  it is not possible to configure user namespaces support.
-
+.PP
  As at Linux 3.8, most relevant subsystems supported user namespaces,
  but a number of filesystems did not have the infrastructure needed
  to map user and group IDs between user namespaces.
@@ -929,20 +938,20 @@ The comments and
  .I usage()
  function inside the program provide a full explanation of the program.
  The following shell session demonstrates its use.
-
+.PP
  First, we look at the run-time environment:
-
+.PP
  .in +4n
-.nf
+.EX
  $ \fBuname \-rs\fP     # Need Linux 3.8 or later
  Linux 3.8.0
  $ \fBid \-u\fP         # Running as unprivileged user
  1000
  $ \fBid \-g\fP
  1000
-.fi
+.EE
  .in
-
+.PP
  Now start a new shell in new user
  .RI ( \-U ),
  mount
@@ -954,56 +963,56 @@ namespaces, with user ID
  and group ID
  .RI ( \-G )
  1000 mapped to 0 inside the user namespace:
-
+.PP
  .in +4n
-.nf
+.EX
  $ \fB./userns_child_exec \-p \-m \-U \-M '0 1000 1' \-G '0 1000 1' bash\fP
-.fi
+.EE
  .in
-
+.PP
  The shell has PID 1, because it is the first process in the new
  PID namespace:
-
+.PP
  .in +4n
-.nf
+.EX
  bash$ \fBecho $$\fP
  1
-.fi
-.in
-
-Inside the user namespace, the shell has user and group ID 0,
-and a full set of permitted and effective capabilities:
-
-.in +4n
-.nf
-bash$ \fBcat /proc/$$/status | egrep '^[UG]id'\fP
-Uid:   0       0       0       0
-Gid:   0       0       0       0
-bash$ \fBcat /proc/$$/status | egrep '^Cap(Prm|Inh|Eff)'\fP
-CapInh:        0000000000000000
-CapPrm:        0000001fffffffff
-CapEff:        0000001fffffffff
-.fi
+.EE
  .in
-
+.PP
  Mounting a new
  .I /proc
  filesystem and listing all of the processes visible
  in the new PID namespace shows that the shell can't see
  any processes outside the PID namespace:
-
+.PP
  .in +4n
-.nf
+.EX
  bash$ \fBmount \-t proc proc /proc\fP
  bash$ \fBps ax\fP
    PID TTY      STAT   TIME COMMAND
      1 pts/3    S      0:00 bash
     22 pts/3    R+     0:00 ps ax
-.fi
+.EE
+.in
+.PP
+Inside the user namespace, the shell has user and group ID 0,
+and a full set of permitted and effective capabilities:
+.PP
+.in +4n
+.EX
+bash$ \fBcat /proc/$$/status | egrep '^[UG]id'\fP
+Uid:   0       0       0       0
+Gid:   0       0       0       0
+bash$ \fBcat /proc/$$/status | egrep '^Cap(Prm|Inh|Eff)'\fP
+CapInh:        0000000000000000
+CapPrm:        0000001fffffffff
+CapEff:        0000001fffffffff
+.EE
  .in
  .SS Program source
  \&
-.nf
+.EX
  /* userns_child_exec.c
  
     Licensed under GNU General Public License v2 or later
@@ -1027,7 +1036,7 @@ bash$ \fBps ax\fP
  /* A simple error\-handling function: print an error message based
     on the value in \(aqerrno\(aq and terminate the calling process */
  
-#define errExit(msg)    do { perror(msg); exit(EXIT_FAILURE); \\
+#define errExit(msg)    do { perror(msg); exit(EXIT_FAILURE); \e
                          } while (0)
  
  struct child_args {
@@ -1040,35 +1049,35 @@ static int verbose;
  static void
  usage(char *pname)
  {
-    fprintf(stderr, "Usage: %s [options] cmd [arg...]\\n\\n", pname);
+    fprintf(stderr, "Usage: %s [options] cmd [arg...]\en\en", pname);
      fprintf(stderr, "Create a child process that executes a shell "
-            "command in a new user namespace,\\n"
-            "and possibly also other new namespace(s).\\n\\n");
-    fprintf(stderr, "Options can be:\\n\\n");
+            "command in a new user namespace,\en"
+            "and possibly also other new namespace(s).\en\en");
+    fprintf(stderr, "Options can be:\en\en");
  #define fpe(str) fprintf(stderr, "    %s", str);
-    fpe("\-i          New IPC namespace\\n");
-    fpe("\-m          New mount namespace\\n");
-    fpe("\-n          New network namespace\\n");
-    fpe("\-p          New PID namespace\\n");
-    fpe("\-u          New UTS namespace\\n");
-    fpe("\-U          New user namespace\\n");
-    fpe("\-M uid_map  Specify UID map for user namespace\\n");
-    fpe("\-G gid_map  Specify GID map for user namespace\\n");
-    fpe("\-z          Map user\(aqs UID and GID to 0 in user namespace\\n");
-    fpe("            (equivalent to: \-M \(aq0 <uid> 1\(aq \-G \(aq0 <gid> 1\(aq)\\n");
-    fpe("\-v          Display verbose messages\\n");
-    fpe("\\n");
-    fpe("If \-z, \-M, or \-G is specified, \-U is required.\\n");
-    fpe("It is not permitted to specify both \-z and either \-M or \-G.\\n");
-    fpe("\\n");
-    fpe("Map strings for \-M and \-G consist of records of the form:\\n");
-    fpe("\\n");
-    fpe("    ID\-inside\-ns   ID\-outside\-ns   len\\n");
-    fpe("\\n");
+    fpe("\-i          New IPC namespace\en");
+    fpe("\-m          New mount namespace\en");
+    fpe("\-n          New network namespace\en");
+    fpe("\-p          New PID namespace\en");
+    fpe("\-u          New UTS namespace\en");
+    fpe("\-U          New user namespace\en");
+    fpe("\-M uid_map  Specify UID map for user namespace\en");
+    fpe("\-G gid_map  Specify GID map for user namespace\en");
+    fpe("\-z          Map user\(aqs UID and GID to 0 in user namespace\en");
+    fpe("            (equivalent to: \-M \(aq0 <uid> 1\(aq \-G \(aq0 <gid> 1\(aq)\en");
+    fpe("\-v          Display verbose messages\en");
+    fpe("\en");
+    fpe("If \-z, \-M, or \-G is specified, \-U is required.\en");
+    fpe("It is not permitted to specify both \-z and either \-M or \-G.\en");
+    fpe("\en");
+    fpe("Map strings for \-M and \-G consist of records of the form:\en");
+    fpe("\en");
+    fpe("    ID\-inside\-ns   ID\-outside\-ns   len\en");
+    fpe("\en");
      fpe("A map string can contain multiple records, separated"
-        " by commas;\\n");
+        " by commas;\en");
      fpe("the commas are replaced by newlines before writing"
-        " to map files.\\n");
+        " to map files.\en");
  
      exit(EXIT_FAILURE);
  }
@@ -1096,17 +1105,17 @@ update_map(char *mapping, char *map_file)
      map_len = strlen(mapping);
      for (j = 0; j < map_len; j++)
          if (mapping[j] == \(aq,\(aq)
-            mapping[j] = \(aq\\n\(aq;
+            mapping[j] = \(aq\en\(aq;
  
      fd = open(map_file, O_RDWR);
      if (fd == \-1) {
-        fprintf(stderr, "ERROR: open %s: %s\\n", map_file,
+        fprintf(stderr, "ERROR: open %s: %s\en", map_file,
                  strerror(errno));
          exit(EXIT_FAILURE);
      }
  
      if (write(fd, mapping, map_len) != map_len) {
-        fprintf(stderr, "ERROR: write %s: %s\\n", map_file,
+        fprintf(stderr, "ERROR: write %s: %s\en", map_file,
                  strerror(errno));
          exit(EXIT_FAILURE);
      }
@@ -1146,13 +1155,13 @@ proc_setgroups_write(pid_t child_pid, char *str)
             user know. */
  
          if (errno != ENOENT)
-            fprintf(stderr, "ERROR: open %s: %s\\n", setgroups_path,
+            fprintf(stderr, "ERROR: open %s: %s\en", setgroups_path,
                  strerror(errno));
          return;
      }
  
      if (write(fd, str, strlen(str)) == \-1)
-        fprintf(stderr, "ERROR: write %s: %s\\n", setgroups_path,
+        fprintf(stderr, "ERROR: write %s: %s\en", setgroups_path,
              strerror(errno));
  
      close(fd);
@@ -1174,13 +1183,15 @@ childFunc(void *arg)
                                     when parent closes its descriptor */
      if (read(args\->pipe_fd[0], &ch, 1) != 0) {
          fprintf(stderr,
-                "Failure in child: read from pipe returned != 0\\n");
+                "Failure in child: read from pipe returned != 0\en");
          exit(EXIT_FAILURE);
      }
  
+    close(args\->pipe_fd[0]);
+
      /* Execute a shell command */
  
-    printf("About to exec %s\\n", args\->argv[0]);
+    printf("About to exec %s\en", args\->argv[0]);
      execvp(args\->argv[0], args\->argv);
      errExit("execvp");
  }
@@ -1260,7 +1271,7 @@ main(int argc, char *argv[])
      /* Parent falls through to here */
  
      if (verbose)
-        printf("%s: PID of child created by clone() is %ld\\n",
+        printf("%s: PID of child created by clone() is %ld\en",
                  argv[0], (long) child_pid);
  
      /* Update the UID and GID maps in the child */
@@ -1296,11 +1307,11 @@ main(int argc, char *argv[])
          errExit("waitpid");
  
      if (verbose)
-        printf("%s: terminating\\n", argv[0]);
+        printf("%s: terminating\en", argv[0]);
  
      exit(EXIT_SUCCESS);
  }
-.fi
+.EE
  .SH SEE ALSO
  .BR newgidmap (1),      \" From the shadow package
  .BR newuidmap (1),      \" From the shadow package
@@ -1316,6 +1327,6 @@ main(int argc, char *argv[])
  .BR credentials (7),
  .BR namespaces (7),
  .BR pid_namespaces (7)
-.sp
+.PP
  The kernel source file
  .IR Documentation/namespaces/resource-control.txt .