analyze: add 'capability' verb for dumping all known and unknown caps

[thirdparty/systemd.git] / man / systemd.exec.xml
diff --git a/man/systemd.exec.xml b/man/systemd.exec.xml

index 6d6eca09a133dba29192f93bb71b6e2e8b27087e..2b53002f78fa1f630b6cc8c431ecebd1b6e7a575 100644 (file)
--- a/man/systemd.exec.xml
+++ b/man/systemd.exec.xml
@@ -145,6 +145,61 @@
          <xi:include href="system-only.xml" xpointer="singular"/></listitem>
        </varlistentry>
  
+      <varlistentry>
+        <term><varname>RootImageOptions=</varname></term>
+
+        <listitem><para>Takes a comma-separated list of mount options that will be used on disk images specified by
+        <varname>RootImage=</varname>. Optionally a partition name can be prefixed, followed by colon, in
+        case the image has multiple partitions, otherwise partition name <literal>root</literal> is implied.
+        Options for multiple partitions can be specified in a single line with space separators. Assigning an empty
+        string removes previous assignments. Duplicated options are ignored. For a list of valid mount options, please
+        refer to <citerefentry><refentrytitle>mount</refentrytitle><manvolnum>8</manvolnum></citerefentry>.</para>
+
+        <para>Valid partition names follow the <ulink url="https://systemd.io/DISCOVERABLE_PARTITIONS">Discoverable
+        Partitions Specification</ulink>.</para>
+
+        <table>
+          <title>Accepted partition names</title>
+
+          <tgroup cols='1'>
+            <colspec colname='partition' />
+            <thead>
+              <row>
+                <entry>Partition Name</entry>
+              </row>
+            </thead>
+            <tbody>
+              <row>
+                <entry>root</entry>
+              </row>
+              <row>
+                <entry>root-secondary</entry>
+              </row>
+              <row>
+                <entry>home</entry>
+              </row>
+              <row>
+                <entry>srv</entry>
+              </row>
+              <row>
+                <entry>esp</entry>
+              </row>
+              <row>
+                <entry>xbootldr</entry>
+              </row>
+              <row>
+                <entry>tmp</entry>
+              </row>
+              <row>
+                <entry>var</entry>
+              </row>
+            </tbody>
+          </tgroup>
+        </table>
+
+        <xi:include href="system-only.xml" xpointer="singular"/></listitem>
+      </varlistentry>
+
        <varlistentry>
          <term><varname>RootHash=</varname></term>
  
@@ -188,10 +243,10 @@
          the same name (except if the image has the <filename>.raw</filename> suffix, in which case the verity data file must
          not have it in its name), the verity data is read from it and automatically used.</para>
  
-        <para>This option is supported only for disk images that contain a single file system, without an enveloping partition
-        table. Images that contain a GPT partition table should instead include both root file system and matching Verity
-        data in the same image, implementing the
-        [Discoverable Partition Specification](https://systemd.io/DISCOVERABLE_PARTITIONS)</para>
+        <para>This option is supported only for disk images that contain a single file system, without an
+        enveloping partition table. Images that contain a GPT partition table should instead include both
+        root file system and matching Verity data in the same image, implementing the <ulink
+        url="https://systemd.io/DISCOVERABLE_PARTITIONS">Discoverable Partition Specification</ulink>.</para>
  
          <xi:include href="system-only.xml" xpointer="singular"/></listitem>
        </varlistentry>
@@ -212,6 +267,55 @@
          <xi:include href="system-only.xml" xpointer="singular"/></listitem>
        </varlistentry>
  
+      <varlistentry>
+        <term><varname>ProtectProc=</varname></term>
+
+        <listitem><para>Takes one of <literal>noaccess</literal>, <literal>invisible</literal>,
+        <literal>ptraceable</literal> or <literal>default</literal> (which it defaults to). When set, this
+        controls the <literal>hidepid=</literal> mount option of the <literal>procfs</literal> instance for
+        the unit that controls which directories with process metainformation
+        (<filename>/proc/<replaceable>PID</replaceable></filename>) are visible and accessible: when set to
+        <literal>noaccess</literal> the ability to access most of other users' process metadata in
+        <filename>/proc/</filename> is taken away for processes of the service. When set to
+        <literal>invisible</literal> processes owned by other users are hidden from
+        <filename>/proc/</filename>. If <literal>ptraceable</literal> all processes that cannot be
+        <function>ptrace()</function>'ed by a process are hidden to it. If <literal>default</literal> no
+        restrictions on <filename>/proc/</filename> access or visibility are made. For further details see
+        <ulink url="https://www.kernel.org/doc/html/latest/filesystems/proc.html#mount-options">The /proc
+        Filesystem</ulink>. It is generally recommended to run most system services with this option set to
+        <literal>invisible</literal>. This option is implemented via file system namespacing, and thus cannot
+        be used with services that shall be able to install mount points in the host file system
+        hierarchy. It also cannot be used for services that need to access metainformation about other users'
+        processes. This option implies <varname>MountAPIVFS=</varname>.</para>
+
+        <para>If the kernel doesn't support per-mount point <option>hidepid=</option> mount options this
+        setting remains without effect, and the unit's processes will be able to access and see other process
+        as if the option was not used.</para>
+
+        <xi:include href="system-only.xml" xpointer="singular"/></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><varname>ProcSubset=</varname></term>
+
+        <listitem><para>Takes one of <literal>all</literal> (the default) and <literal>pid</literal>. If
+        the latter all files and directories not directly associated with process management and introspection
+        are made invisible in the <filename>/proc/</filename> file system configured for the unit's
+        processes. This controls the <literal>subset=</literal> mount option of the <literal>procfs</literal>
+        instance for the unit. For further details see <ulink
+        url="https://www.kernel.org/doc/html/latest/filesystems/proc.html#mount-options">The /proc
+        Filesystem</ulink>. Note that Linux exposes various kernel APIs via <filename>/proc/</filename>,
+        which are made unavailable with this setting. Since these APIs are used frequently this option is
+        useful only in a few, specific cases, and is not suitable for most non-trivial programs.</para>
+
+        <para>Much like <varname>ProtectProc=</varname> above, this is implemented via file system mount
+        namespacing, and hence the same restrictions apply: it is only available to system services, it
+        disables mount propagation to the host mount table, and it implies
+        <varname>MountAPIVFS=</varname>. Also, like <varname>ProtectProc=</varname> this setting is gracefully
+        disabled if the used kernel does not support the <literal>subset=</literal> mount option of
+        <literal>procfs</literal>.</para></listitem>
+      </varlistentry>
+
        <varlistentry>
          <term><varname>BindPaths=</varname></term>
          <term><varname>BindReadOnlyPaths=</varname></term>
@@ -248,6 +352,48 @@
          <xi:include href="system-only.xml" xpointer="singular"/></listitem>
        </varlistentry>
  
+      <varlistentry>
+        <term><varname>MountImages=</varname></term>
+
+        <listitem><para>This setting is similar to <varname>RootImage=</varname> in that it mounts a file
+        system hierarchy from a block device node or loopback file, but the destination directory can be
+        specified as well as mount options. This option expects a whitespace separated list of mount
+        definitions. Each definition consists of a colon-separated tuple of source path and destination
+        definitions, optionally followed by another colon and a list of mount options.</para>
+
+        <para>Mount options may be defined as a single comma-separated list of options, in which case they
+        will be implicitly applied to the root partition on the image, or a series of colon-separated tuples
+        of partition name and mount options. Valid partition names and mount options are the same as for
+        <varname>RootImageOptions=</varname> setting described above.</para>
+
+        <para>Each mount definition may be prefixed with <literal>-</literal>, in which case it will be
+        ignored when its source path does not exist. The source argument is a path to a block device node or
+        regular file. If source or destination contain a <literal>:</literal>, it needs to be escaped as
+        <literal>\:</literal>. The device node or file system image file needs to follow the same rules as
+        specified for <varname>RootImage=</varname>. Any mounts created with this option are specific to the
+        unit, and are not visible in the host's mount table.</para>
+
+        <para>These settings may be used more than once, each usage appends to the unit's list of mount
+        paths. If the empty string is assigned, the entire list of mount paths defined prior to this is
+        reset.</para>
+
+        <para>Note that the destination directory must exist or systemd must be able to create it.  Thus, it
+        is not possible to use those options for mount points nested underneath paths specified in
+        <varname>InaccessiblePaths=</varname>, or under <filename>/home/</filename> and other protected
+        directories if <varname>ProtectHome=yes</varname> is specified.</para>
+
+        <para>When <varname>DevicePolicy=</varname> is set to <literal>closed</literal> or
+        <literal>strict</literal>, or set to <literal>auto</literal> and <varname>DeviceAllow=</varname> is
+        set, then this setting adds <filename>/dev/loop-control</filename> with <constant>rw</constant> mode,
+        <literal>block-loop</literal> and <literal>block-blkext</literal> with <constant>rwm</constant> mode
+        to <varname>DeviceAllow=</varname>. See
+        <citerefentry><refentrytitle>systemd.resource-control</refentrytitle><manvolnum>5</manvolnum></citerefentry>
+        for the details about <varname>DevicePolicy=</varname> or <varname>DeviceAllow=</varname>. Also, see
+        <varname>PrivateDevices=</varname> below, as it may change the setting of
+        <varname>DevicePolicy=</varname>.</para>
+
+        <xi:include href="system-only.xml" xpointer="singular"/></listitem>
+      </varlistentry>
      </variablelist>
    </refsect1>
  
@@ -398,22 +544,28 @@
        <varlistentry>
          <term><varname>CapabilityBoundingSet=</varname></term>
  
-        <listitem><para>Controls which capabilities to include in the capability bounding set for the executed
-        process. See <citerefentry
-        project='man-pages'><refentrytitle>capabilities</refentrytitle><manvolnum>7</manvolnum></citerefentry> for
-        details. Takes a whitespace-separated list of capability names, e.g. <constant>CAP_SYS_ADMIN</constant>,
-        <constant>CAP_DAC_OVERRIDE</constant>, <constant>CAP_SYS_PTRACE</constant>. Capabilities listed will be
-        included in the bounding set, all others are removed. If the list of capabilities is prefixed with
-        <literal>~</literal>, all but the listed capabilities will be included, the effect of the assignment
-        inverted. Note that this option also affects the respective capabilities in the effective, permitted and
-        inheritable capability sets. If this option is not used, the capability bounding set is not modified on process
-        execution, hence no limits on the capabilities of the process are enforced. This option may appear more than
-        once, in which case the bounding sets are merged by <constant>OR</constant>, or by <constant>AND</constant> if
-        the lines are prefixed with <literal>~</literal> (see below). If the empty string is assigned to this option,
-        the bounding set is reset to the empty capability set, and all prior settings have no effect.  If set to
-        <literal>~</literal> (without any further argument), the bounding set is reset to the full set of available
-        capabilities, also undoing any previous settings. This does not affect commands prefixed with
-        <literal>+</literal>.</para>
+        <listitem><para>Controls which capabilities to include in the capability bounding set for the
+        executed process. See <citerefentry
+        project='man-pages'><refentrytitle>capabilities</refentrytitle><manvolnum>7</manvolnum></citerefentry>
+        for details. Takes a whitespace-separated list of capability names,
+        e.g. <constant>CAP_SYS_ADMIN</constant>, <constant>CAP_DAC_OVERRIDE</constant>,
+        <constant>CAP_SYS_PTRACE</constant>. Capabilities listed will be included in the bounding set, all
+        others are removed. If the list of capabilities is prefixed with <literal>~</literal>, all but the
+        listed capabilities will be included, the effect of the assignment inverted. Note that this option
+        also affects the respective capabilities in the effective, permitted and inheritable capability
+        sets. If this option is not used, the capability bounding set is not modified on process execution,
+        hence no limits on the capabilities of the process are enforced. This option may appear more than
+        once, in which case the bounding sets are merged by <constant>OR</constant>, or by
+        <constant>AND</constant> if the lines are prefixed with <literal>~</literal> (see below). If the
+        empty string is assigned to this option, the bounding set is reset to the empty capability set, and
+        all prior settings have no effect.  If set to <literal>~</literal> (without any further argument),
+        the bounding set is reset to the full set of available capabilities, also undoing any previous
+        settings. This does not affect commands prefixed with <literal>+</literal>.</para>
+
+        <para>Use
+        <citerefentry><refentrytitle>systemd-analyze</refentrytitle><manvolnum>1</manvolnum></citerefentry>'s
+        <command>capability</command> command to retrieve a list of capabilities defined on the local
+        system.</para>
  
          <para>Example: if a unit has the following,
          <programlisting>CapabilityBoundingSet=CAP_A CAP_B
@@ -1884,6 +2036,10 @@ RestrictNamespaces=~cgroup net</programlisting>
                  <entry>@timer</entry>
                  <entry>System calls for scheduling operations by time (<citerefentry project='man-pages'><refentrytitle>alarm</refentrytitle><manvolnum>2</manvolnum></citerefentry>, <citerefentry project='man-pages'><refentrytitle>timer_create</refentrytitle><manvolnum>2</manvolnum></citerefentry>, …)</entry>
                </row>
+              <row>
+                <entry>@known</entry>
+                <entry>All system calls defined by the kernel. This list is defined statically in systemd based on a kernel version that was available when this systmed version was released. It will become progressively more out-of-date as the kernel is updated.</entry>
+              </row>
              </tbody>
            </tgroup>
          </table>
@@ -2004,11 +2160,13 @@ SystemCallErrorNumber=EPERM</programlisting>
          project='man-pages'><refentrytitle>environ</refentrytitle><manvolnum>7</manvolnum></citerefentry> for details
          about environment variables.</para>
  
-        <para>Note that environment variables are not suitable for passing secrets (such as passwords, key material, …)
-        to service processes. Environment variables set for a unit are exposed to unprivileged clients via D-Bus IPC,
-        and generally not understood as being data that requires protection. Moreover, environment variables are
-        propagated down the process tree, including across security boundaries (such as setuid/setgid executables), and
-        hence might leak to processes that should not have access to the secret data.</para></listitem>
+        <para>Note that environment variables are not suitable for passing secrets (such as passwords, key
+        material, …)  to service processes. Environment variables set for a unit are exposed to unprivileged
+        clients via D-Bus IPC, and generally not understood as being data that requires protection. Moreover,
+        environment variables are propagated down the process tree, including across security boundaries
+        (such as setuid/setgid executables), and hence might leak to processes that should not have access to
+        the secret data. Use <varname>LoadCredential=</varname> (see below) to pass data to unit processes
+        securely.</para></listitem>
        </varlistentry>
  
        <varlistentry>
@@ -2163,13 +2321,7 @@ SystemCallErrorNumber=EPERM</programlisting>
          <citerefentry><refentrytitle>systemd.socket</refentrytitle><manvolnum>5</manvolnum></citerefentry> for more
          details about named file descriptors and their ordering.</para>
  
-        <para>This setting defaults to <option>null</option>.</para>
-
-        <para>Note that services which specify <option>DefaultDependencies=no</option> and use
-        <varname>StandardInput=</varname> or <varname>StandardOutput=</varname> with
-        <option>tty</option>/<option>tty-force</option>/<option>tty-fail</option>, should specify
-        <option>After=systemd-vconsole-setup.service</option>, to make sure that the tty initialization is
-        finished before they start.</para></listitem>
+        <para>This setting defaults to <option>null</option>.</para></listitem>
        </varlistentry>
  
        <varlistentry>
@@ -2384,7 +2536,9 @@ StandardInputData=SWNrIHNpdHplIGRhIHVuJyBlc3NlIEtsb3BzLAp1ZmYgZWVtYWwga2xvcHAncy
          so that they are automatically established prior to the unit starting up. Note that when this option
          is used log output of this service does not appear in the regular
          <citerefentry><refentrytitle>journalctl</refentrytitle><manvolnum>1</manvolnum></citerefentry>
-        output, unless the <option>--namespace=</option> option is used.</para></listitem>
+        output, unless the <option>--namespace=</option> option is used.</para>
+
+        <xi:include href="system-only.xml" xpointer="singular"/></listitem>
        </varlistentry>
  
        <varlistentry>
@@ -2478,6 +2632,73 @@ StandardInputData=SWNrIHNpdHplIGRhIHVuJyBlc3NlIEtsb3BzLAp1ZmYgZWVtYWwga2xvcHAncy
      </variablelist>
    </refsect1>
  
+  <refsect1>
+    <title>Credentials</title>
+
+    <variablelist class='unit-directives'>
+
+      <varlistentry>
+        <term><varname>LoadCredential=</varname><replaceable>ID</replaceable>:<replaceable>PATH</replaceable></term>
+
+        <listitem><para>Pass a credential to the unit. Credentials are limited-size binary or textual objects
+        that may be passed to unit processes. They are primarily used for passing cryptographic keys (both
+        public and private) or certificates, user account information or identity information from host to
+        services. The data is accessible from the unit's processes via the file system, at a read-only
+        location that (if possible and permitted) is backed by non-swappable memory. The data is only
+        accessible to the user associated with the unit, via the
+        <varname>User=</varname>/<varname>DynamicUser=</varname> settings (as well as the superuser). When
+        available, the location of credentials is exported as the <varname>$CREDENTIALS_DIRECTORY</varname>
+        environment variable to the unit's processes.</para>
+
+        <para>The <varname>LoadCredential=</varname> setting takes a textual ID to use as name for a
+        credential plus a file system path. The ID must be a short ASCII string suitable as filename in the
+        filesystem, and may be chosen freely by the user. If the specified path is absolute it is opened as
+        regular file and the credential data is read from it. If the absolute path refers to an
+        <constant>AF_UNIX</constant> stream socket in the file system a connection is made to it and the
+        credential data read from the connection, providing an easy IPC integration point for dynamically
+        providing credentials from other services. If the specified path is not absolute and itself qualifies
+        as valid credential identifier it is understood to refer to a credential that the service manager
+        itself received via the <varname>$CREDENTIALS_DIRECTORY</varname> environment variable, which may be
+        used to propagate credentials from an invoking environment (e.g. a container manager that invoked the
+        service manager) into a service. The contents of the file/socket may be arbitrary binary or textual
+        data, including newline characters and NUL bytes. This option may be used multiple times, each time
+        defining an additional credential to pass to the unit.</para>
+
+        <para>The credential files/IPC sockets must be accessible to the service manager, but don't have to
+        be directly accessible to the unit's processes: the credential data is read and copied into separate,
+        read-only copies for the unit that are accessible to appropriately privileged processes. This is
+        particularly useful in combination with <varname>DynamicUser=</varname> as this way privileged data
+        can be made available to processes running under a dynamic UID (i.e. not a previously known one)
+        without having to open up access to all users.</para>
+
+        <para>In order to reference the path a credential may be read from within a
+        <varname>ExecStart=</varname> command line use <literal>${CREDENTIALS_DIRECTORY}/mycred</literal>,
+        e.g. <literal>ExecStart=cat ${CREDENTIALS_DIRECTORY}/mycred</literal>.</para>
+
+        <para>Currently, an accumulated credential size limit of 1M bytes per unit is
+        enforced.</para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><varname>SetCredential=</varname><replaceable>ID</replaceable>:<replaceable>VALUE</replaceable></term>
+
+        <listitem><para>The <varname>SetCredential=</varname> setting is similar to
+        <varname>LoadCredential=</varname> but accepts a literal value to use as data for the credential,
+        instead of a file system path to read the data from. Do not use this option for data that is supposed
+        to be secret, as it is accessible to unprivileged processes via IPC. It's only safe to use this for
+        user IDs, public key material and similar non-sensitive data. For everything else use
+        <varname>LoadCredential=</varname>. In order to embed binary data into the credential data use
+        C-style escaping (i.e. <literal>\n</literal> to embed a newline, or <literal>\x00</literal> to embed
+        a NUL byte).</para>
+
+        <para>If a credential of the same ID is listed in both <varname>LoadCredential=</varname> and
+        <varname>SetCredential=</varname>, the latter will act as default if the former cannot be
+        retrieved. In this case not being able to retrieve the credential from the path specified in
+        <varname>LoadCredential=</varname> is not considered fatal.</para></listitem>
+      </varlistentry>
+    </variablelist>
+  </refsect1>
+
    <refsect1>
      <title>System V Compatibility</title>
      <variablelist class='unit-directives'>
@@ -2633,6 +2854,16 @@ StandardInputData=SWNrIHNpdHplIGRhIHVuJyBlc3NlIEtsb3BzLAp1ZmYgZWVtYWwga2xvcHAncy
          </listitem>
        </varlistentry>
  
+      <varlistentry>
+        <term><varname>$CREDENTIALS_DIRECTORY</varname></term>
+
+        <listitem><para>An absolute path to the per-unit directory with credentials configured via
+        <varname>LoadCredential=</varname>/<varname>SetCredential=</varname>. The directory is marked
+        read-only and is placed in unswappable memory (if supported and permitted), and is only accessible to
+        the UID associated with the unit via <varname>User=</varname> or <varname>DynamicUser=</varname> (and
+        the superuser).</para></listitem>
+      </varlistentry>
+
        <varlistentry>
          <term><varname>$MAINPID</varname></term>
  
@@ -3234,7 +3465,11 @@ StandardInputData=SWNrIHNpdHplIGRhIHVuJyBlc3NlIEtsb3BzLAp1ZmYgZWVtYWwga2xvcHAncy
              <entry><constant>EXIT_NUMA_POLICY</constant></entry>
              <entry>Failed to set up unit's NUMA memory policy. See <varname>NUMAPolicy=</varname> and <varname>NUMAMask=</varname> above.</entry>
            </row>
-
+          <row>
+            <entry>243</entry>
+            <entry><constant>EXIT_CREDENTIALS</constant></entry>
+            <entry>Failed to set up unit's credentials. See <varname>LoadCredential=</varname> and <varname>SetCredential=</varname> above.</entry>
+          </row>
          </tbody>
        </tgroup>
      </table>