]> git.ipfire.org Git - thirdparty/systemd.git/commitdiff
nspawn: Add --restrict-address-families= option
authorDaan De Meyer <daan.j.demeyer@gmail.com>
Mon, 22 Dec 2025 10:22:34 +0000 (11:22 +0100)
committerDaan De Meyer <daan.j.demeyer@gmail.com>
Mon, 13 Apr 2026 09:14:11 +0000 (11:14 +0200)
Add a new --restrict-address-families= command line option and
corresponding RestrictAddressFamilies= setting for .nspawn files to
restrict which socket address families may be used inside a container.

Many address families such as AF_VSOCK and AF_NETLINK are not
network-namespaced, so restricting access to them in containers
improves isolation. The option supports allowlist and denylist modes
(via ~ prefix), as well as "none" to block all families, matching the
semantics of RestrictAddressFamilies= in unit files.

The address family parsing logic is extracted into a shared
parse_address_families() helper in parse-helpers.c, which is now also
used by config_parse_address_families() in load-fragment.c.

This is currently opt-in. In a future version, the default will be
changed to restrict address families to AF_INET, AF_INET6 and AF_UNIX.

14 files changed:
NEWS
man/systemd-nspawn.xml
man/systemd.nspawn.xml
shell-completion/bash/systemd-nspawn
shell-completion/zsh/_systemd-nspawn
src/core/load-fragment.c
src/nspawn/nspawn-gperf.gperf
src/nspawn/nspawn-seccomp.c
src/nspawn/nspawn-seccomp.h
src/nspawn/nspawn-settings.c
src/nspawn/nspawn-settings.h
src/nspawn/nspawn.c
src/shared/parse-helpers.c
src/shared/parse-helpers.h

diff --git a/NEWS b/NEWS
index 2d32bd08b4a015c2f25e0aa72803060f7379a8e2..b440af59396188eb12002e0ad009c85be8681baa 100644 (file)
--- a/NEWS
+++ b/NEWS
@@ -30,6 +30,13 @@ CHANGES WITH 261 in spe:
           attestation environments which use hardware CC registers and not the
           TPM quote.
 
+        * systemd-nspawn gained a new --restrict-address-families= option (and
+          corresponding RestrictAddressFamilies= setting in .nspawn files) to
+          restrict which socket address families may be used in the container.
+          This is currently opt-in. In a future version, the default will be
+          changed to restrict socket address families to AF_INET, AF_INET6 and
+          AF_UNIX.
+
         New features:
 
         * A new tmpfiles.d/root.conf has been added that sets permissions
index 5c7acf51594bc9f7faf921c16483de4266474370..045aa60db81f7390d764cfa439ff7ec1f9e55428 100644 (file)
@@ -1340,6 +1340,28 @@ After=sys-subsystem-net-devices-ens1.device</programlisting>
         <xi:include href="version-info.xml" xpointer="v235"/></listitem>
       </varlistentry>
 
+      <varlistentry>
+        <term><option>--restrict-address-families=</option></term>
+
+        <listitem><para>Restrict the socket address families accessible to the container. Takes a
+        space-separated list of address family names, such as <constant>AF_INET</constant>,
+        <constant>AF_INET6</constant> or <constant>AF_UNIX</constant>. When prefixed with
+        <literal>~</literal> the listed address families will be prohibited, otherwise they will be permitted
+        (allowlisted). Use the special value <literal>none</literal> to prohibit all address families. This
+        option may be specified more than once, in which case the configured lists are combined. If both a
+        positive and a negative list are configured, the negative list takes precedence over the positive
+        list.</para>
+
+        <para>Note that currently this option defaults to no restrictions, i.e. all address families are
+        accessible. In a future version of systemd, the default will be changed to restrict address families to
+        <constant>AF_INET</constant>, <constant>AF_INET6</constant> and <constant>AF_UNIX</constant>. Use
+        <option>--restrict-address-families=</option> (with an empty argument) or set
+        <varname>RestrictAddressFamilies=</varname> in a <filename>.nspawn</filename> file to opt out of
+        filtering explicitly.</para>
+
+        <xi:include href="version-info.xml" xpointer="v261"/></listitem>
+      </varlistentry>
+
       <varlistentry>
         <term><option>-Z</option></term>
         <term><option>--selinux-context=</option></term>
index bf9526df8069f71805ed582d33eb5ae7b2d0f51c..29279806852509d7ba02b2888ed3f61d8e4db7f7 100644 (file)
         <xi:include href="version-info.xml" xpointer="v235"/></listitem>
       </varlistentry>
 
+      <varlistentry>
+        <term><varname>RestrictAddressFamilies=</varname></term>
+
+        <listitem><para>Restricts the socket address families accessible to the container. This is equivalent
+        to the <option>--restrict-address-families=</option> command line switch, and takes the same list
+        parameter. See
+        <citerefentry><refentrytitle>systemd-nspawn</refentrytitle><manvolnum>1</manvolnum></citerefentry> for
+        details.</para>
+
+        <xi:include href="version-info.xml" xpointer="v261"/></listitem>
+      </varlistentry>
+
       <varlistentry>
         <term><varname>LimitCPU=</varname></term>
         <term><varname>LimitFSIZE=</varname></term>
index 08ff25d906c1f9368110f13119155c38302af74c..b39d3cbd6d85422997baef9fc220b56279982b7c 100644 (file)
@@ -77,7 +77,8 @@ _systemd_nspawn() {
                       --pivot-root --property --private-users --private-users-ownership --network-namespace-path
                       --network-ipvlan --network-veth-extra --network-zone -p --port --system-call-filter --overlay
                       --overlay-ro --settings --rlimit --hostname --no-new-privileges --oom-score-adjust --cpu-affinity
-                      --resolv-conf --timezone --root-hash-sig --background --oci-bundle --verity-data'
+                      --resolv-conf --timezone --root-hash-sig --background --oci-bundle --verity-data
+                      --restrict-address-families'
     )
 
     _init_completion || return
index fa79b7f8d86794690af3d20c2bea2959e8552b10..ee28fa74759ab82ff7e5583d2119012317ec8229 100644 (file)
@@ -53,4 +53,5 @@ _arguments \
     '--volatile=[Run the system in volatile mode.]:volatile:(no yes state)' \
     "--notify-ready=[Control when the ready notification is sent]:options:(yes no)" \
     "--suppress-sync=[Control whether to suppress disk synchronization for the container payload]:options:(yes no)" \
+    '--restrict-address-families=[Restrict socket address families accessible in the container.]: : _message "address families"' \
     '*:: : _normal'
index 274fd82514d4aba656c23cf981f1d08488d1df2c..52005c8c43600b6ed1b854cb17e9d99ae82386c3 100644 (file)
@@ -10,7 +10,6 @@
 #include "sd-bus.h"
 #include "sd-messages.h"
 
-#include "af-list.h"
 #include "all-units.h"
 #include "alloc-util.h"
 #include "bpf-program.h"
@@ -3474,72 +3473,26 @@ int config_parse_address_families(
                 void *userdata) {
 
         ExecContext *c = data;
-        bool invert = false;
+        bool is_allowlist = c->address_families_allow_list;
         int r;
 
         assert(filename);
         assert(lvalue);
         assert(rvalue);
 
-        if (isempty(rvalue)) {
-                /* Empty assignment resets the list */
-                c->address_families = set_free(c->address_families);
-                c->address_families_allow_list = false;
-                return 0;
-        }
-
-        if (streq(rvalue, "none")) {
-                /* Forbid all address families. */
-                c->address_families = set_free(c->address_families);
-                c->address_families_allow_list = true;
+        r = parse_address_families(rvalue, &c->address_families, &is_allowlist);
+        /* Copy back unconditionally: parse_address_families() may have partially populated
+         * c->address_families before failing, so keep is_allowlist in sync with that state. */
+        c->address_families_allow_list = is_allowlist;
+        if (r == -ENOMEM)
+                return log_oom();
+        if (r < 0) {
+                log_syntax(unit, LOG_WARNING, filename, line, r,
+                           "Failed to parse address family, ignoring: %s", rvalue);
                 return 0;
         }
 
-        if (rvalue[0] == '~') {
-                invert = true;
-                rvalue++;
-        }
-
-        if (!c->address_families) {
-                c->address_families = set_new(NULL);
-                if (!c->address_families)
-                        return log_oom();
-
-                c->address_families_allow_list = !invert;
-        }
-
-        for (const char *p = rvalue;;) {
-                _cleanup_free_ char *word = NULL;
-                int af;
-
-                r = extract_first_word(&p, &word, NULL, EXTRACT_UNQUOTE);
-                if (r == -ENOMEM)
-                        return log_oom();
-                if (r < 0) {
-                        log_syntax(unit, LOG_WARNING, filename, line, r,
-                                   "Invalid syntax, ignoring: %s", rvalue);
-                        return 0;
-                }
-                if (r == 0)
-                        return 0;
-
-                af = af_from_name(word);
-                if (af < 0) {
-                        log_syntax(unit, LOG_WARNING, filename, line, af,
-                                   "Failed to parse address family, ignoring: %s", word);
-                        continue;
-                }
-
-                /* If we previously wanted to forbid an address family and now
-                 * we want to allow it, then just remove it from the list.
-                 */
-                if (!invert == c->address_families_allow_list)  {
-                        r = set_put(c->address_families, INT_TO_PTR(af));
-                        if (r < 0)
-                                return log_oom();
-                } else
-                        set_remove(c->address_families, INT_TO_PTR(af));
-        }
+        return 0;
 }
 #endif
 
index cdad70706e6056ff456ceabe5a684773671cf89c..439e176e458b591d7a6f678c1e4565cab92aa47e 100644 (file)
@@ -19,67 +19,68 @@ struct ConfigPerfItem;
 %struct-type
 %includes
 %%
-Exec.Boot,                    config_parse_boot,               0,                        0
-Exec.Ephemeral,               config_parse_tristate,           0,                        offsetof(Settings, ephemeral)
-Exec.ProcessTwo,              config_parse_pid2,               0,                        0
-Exec.Parameters,              config_parse_strv,               0,                        offsetof(Settings, parameters)
-Exec.Environment,             config_parse_strv,               0,                        offsetof(Settings, environment)
-Exec.User,                    config_parse_string,             CONFIG_PARSE_STRING_SAFE, offsetof(Settings, user)
-Exec.Capability,              config_parse_capability,         0,                        offsetof(Settings, capability)
-Exec.AmbientCapability,       config_parse_capability,         0,                        offsetof(Settings, ambient_capability)
-Exec.DropCapability,          config_parse_capability,         0,                        offsetof(Settings, drop_capability)
-Exec.KillSignal,              config_parse_signal,             0,                        offsetof(Settings, kill_signal)
-Exec.Personality,             config_parse_personality,        0,                        offsetof(Settings, personality)
-Exec.MachineID,               config_parse_id128,              0,                        offsetof(Settings, machine_id)
-Exec.WorkingDirectory,        config_parse_path,               0,                        offsetof(Settings, working_directory)
-Exec.PivotRoot,               config_parse_pivot_root,         0,                        0
-Exec.PrivateUsers,            config_parse_private_users,      0,                        0
-Exec.PrivateUsersDelegate,    config_parse_unsigned,           0,                        offsetof(Settings, delegate_container_ranges)
-Exec.NotifyReady,             config_parse_tristate,           0,                        offsetof(Settings, notify_ready)
-Exec.SystemCallFilter,        config_parse_syscall_filter,     0,                        0
-Exec.LimitCPU,                config_parse_rlimit,             RLIMIT_CPU,               offsetof(Settings, rlimit)
-Exec.LimitFSIZE,              config_parse_rlimit,             RLIMIT_FSIZE,             offsetof(Settings, rlimit)
-Exec.LimitDATA,               config_parse_rlimit,             RLIMIT_DATA,              offsetof(Settings, rlimit)
-Exec.LimitSTACK,              config_parse_rlimit,             RLIMIT_STACK,             offsetof(Settings, rlimit)
-Exec.LimitCORE,               config_parse_rlimit,             RLIMIT_CORE,              offsetof(Settings, rlimit)
-Exec.LimitRSS,                config_parse_rlimit,             RLIMIT_RSS,               offsetof(Settings, rlimit)
-Exec.LimitNOFILE,             config_parse_rlimit,             RLIMIT_NOFILE,            offsetof(Settings, rlimit)
-Exec.LimitAS,                 config_parse_rlimit,             RLIMIT_AS,                offsetof(Settings, rlimit)
-Exec.LimitNPROC,              config_parse_rlimit,             RLIMIT_NPROC,             offsetof(Settings, rlimit)
-Exec.LimitMEMLOCK,            config_parse_rlimit,             RLIMIT_MEMLOCK,           offsetof(Settings, rlimit)
-Exec.LimitLOCKS,              config_parse_rlimit,             RLIMIT_LOCKS,             offsetof(Settings, rlimit)
-Exec.LimitSIGPENDING,         config_parse_rlimit,             RLIMIT_SIGPENDING,        offsetof(Settings, rlimit)
-Exec.LimitMSGQUEUE,           config_parse_rlimit,             RLIMIT_MSGQUEUE,          offsetof(Settings, rlimit)
-Exec.LimitNICE,               config_parse_rlimit,             RLIMIT_NICE,              offsetof(Settings, rlimit)
-Exec.LimitRTPRIO,             config_parse_rlimit,             RLIMIT_RTPRIO,            offsetof(Settings, rlimit)
-Exec.LimitRTTIME,             config_parse_rlimit,             RLIMIT_RTTIME,            offsetof(Settings, rlimit)
-Exec.Hostname,                config_parse_hostname,           0,                        offsetof(Settings, hostname)
-Exec.NoNewPrivileges,         config_parse_tristate,           0,                        offsetof(Settings, no_new_privileges)
-Exec.OOMScoreAdjust,          config_parse_oom_score_adjust,   0,                        0
-Exec.CPUAffinity,             config_parse_cpu_set,            0,                        offsetof(Settings, cpu_set)
-Exec.ResolvConf,              config_parse_resolv_conf,        0,                        offsetof(Settings, resolv_conf)
-Exec.LinkJournal,             config_parse_link_journal,       0,                        0
-Exec.Timezone,                config_parse_timezone_mode,      0,                        offsetof(Settings, timezone)
-Exec.SuppressSync,            config_parse_tristate,           0,                        offsetof(Settings, suppress_sync)
-Files.ReadOnly,               config_parse_tristate,           0,                        offsetof(Settings, read_only)
-Files.Volatile,               config_parse_volatile_mode,      0,                        offsetof(Settings, volatile_mode)
-Files.Bind,                   config_parse_bind,               0,                        0
-Files.BindReadOnly,           config_parse_bind,               1,                        0
-Files.TemporaryFileSystem,    config_parse_tmpfs,              0,                        0
-Files.Inaccessible,           config_parse_inaccessible,       0,                        0
-Files.Overlay,                config_parse_overlay,            0,                        0
-Files.OverlayReadOnly,        config_parse_overlay,            1,                        0
-Files.PrivateUsersChown,      config_parse_userns_chown,       0,                        offsetof(Settings, userns_ownership)
-Files.PrivateUsersOwnership,  config_parse_userns_ownership,   0,                        offsetof(Settings, userns_ownership)
-Files.BindUser,               config_parse_bind_user,          0,                        offsetof(Settings, bind_user)
-Files.BindUserShell,          config_parse_bind_user_shell,    0,                        0
-Network.Private,              config_parse_tristate,           0,                        offsetof(Settings, private_network)
-Network.NamespacePath,        config_parse_path,               0,                        offsetof(Settings, network_namespace_path)
-Network.Interface,            config_parse_network_iface_pair, 0,                        offsetof(Settings, network_interfaces)
-Network.MACVLAN,              config_parse_macvlan_iface_pair, 0,                        offsetof(Settings, network_macvlan)
-Network.IPVLAN,               config_parse_ipvlan_iface_pair,  0,                        offsetof(Settings, network_ipvlan)
-Network.VirtualEthernet,      config_parse_tristate,           0,                        offsetof(Settings, network_veth)
-Network.VirtualEthernetExtra, config_parse_veth_extra,         0,                        0
-Network.Bridge,               config_parse_ifname,             0,                        offsetof(Settings, network_bridge)
-Network.Zone,                 config_parse_network_zone,       0,                        0
-Network.Port,                 config_parse_expose_port,        0,                        0
+Exec.Boot,                    config_parse_boot,                      0,                        0
+Exec.Ephemeral,               config_parse_tristate,                  0,                        offsetof(Settings, ephemeral)
+Exec.ProcessTwo,              config_parse_pid2,                      0,                        0
+Exec.Parameters,              config_parse_strv,                      0,                        offsetof(Settings, parameters)
+Exec.Environment,             config_parse_strv,                      0,                        offsetof(Settings, environment)
+Exec.User,                    config_parse_string,                    CONFIG_PARSE_STRING_SAFE, offsetof(Settings, user)
+Exec.Capability,              config_parse_capability,                0,                        offsetof(Settings, capability)
+Exec.AmbientCapability,       config_parse_capability,                0,                        offsetof(Settings, ambient_capability)
+Exec.DropCapability,          config_parse_capability,                0,                        offsetof(Settings, drop_capability)
+Exec.KillSignal,              config_parse_signal,                    0,                        offsetof(Settings, kill_signal)
+Exec.Personality,             config_parse_personality,               0,                        offsetof(Settings, personality)
+Exec.MachineID,               config_parse_id128,                     0,                        offsetof(Settings, machine_id)
+Exec.WorkingDirectory,        config_parse_path,                      0,                        offsetof(Settings, working_directory)
+Exec.PivotRoot,               config_parse_pivot_root,                0,                        0
+Exec.PrivateUsers,            config_parse_private_users,             0,                        0
+Exec.PrivateUsersDelegate,    config_parse_unsigned,                  0,                        offsetof(Settings, delegate_container_ranges)
+Exec.NotifyReady,             config_parse_tristate,                  0,                        offsetof(Settings, notify_ready)
+Exec.SystemCallFilter,        config_parse_syscall_filter,            0,                        0
+Exec.LimitCPU,                config_parse_rlimit,                    RLIMIT_CPU,               offsetof(Settings, rlimit)
+Exec.LimitFSIZE,              config_parse_rlimit,                    RLIMIT_FSIZE,             offsetof(Settings, rlimit)
+Exec.LimitDATA,               config_parse_rlimit,                    RLIMIT_DATA,              offsetof(Settings, rlimit)
+Exec.LimitSTACK,              config_parse_rlimit,                    RLIMIT_STACK,             offsetof(Settings, rlimit)
+Exec.LimitCORE,               config_parse_rlimit,                    RLIMIT_CORE,              offsetof(Settings, rlimit)
+Exec.LimitRSS,                config_parse_rlimit,                    RLIMIT_RSS,               offsetof(Settings, rlimit)
+Exec.LimitNOFILE,             config_parse_rlimit,                    RLIMIT_NOFILE,            offsetof(Settings, rlimit)
+Exec.LimitAS,                 config_parse_rlimit,                    RLIMIT_AS,                offsetof(Settings, rlimit)
+Exec.LimitNPROC,              config_parse_rlimit,                    RLIMIT_NPROC,             offsetof(Settings, rlimit)
+Exec.LimitMEMLOCK,            config_parse_rlimit,                    RLIMIT_MEMLOCK,           offsetof(Settings, rlimit)
+Exec.LimitLOCKS,              config_parse_rlimit,                    RLIMIT_LOCKS,             offsetof(Settings, rlimit)
+Exec.LimitSIGPENDING,         config_parse_rlimit,                    RLIMIT_SIGPENDING,        offsetof(Settings, rlimit)
+Exec.LimitMSGQUEUE,           config_parse_rlimit,                    RLIMIT_MSGQUEUE,          offsetof(Settings, rlimit)
+Exec.LimitNICE,               config_parse_rlimit,                    RLIMIT_NICE,              offsetof(Settings, rlimit)
+Exec.LimitRTPRIO,             config_parse_rlimit,                    RLIMIT_RTPRIO,            offsetof(Settings, rlimit)
+Exec.LimitRTTIME,             config_parse_rlimit,                    RLIMIT_RTTIME,            offsetof(Settings, rlimit)
+Exec.Hostname,                config_parse_hostname,                  0,                        offsetof(Settings, hostname)
+Exec.NoNewPrivileges,         config_parse_tristate,                  0,                        offsetof(Settings, no_new_privileges)
+Exec.OOMScoreAdjust,          config_parse_oom_score_adjust,          0,                        0
+Exec.CPUAffinity,             config_parse_cpu_set,                   0,                        offsetof(Settings, cpu_set)
+Exec.ResolvConf,              config_parse_resolv_conf,               0,                        offsetof(Settings, resolv_conf)
+Exec.LinkJournal,             config_parse_link_journal,              0,                        0
+Exec.Timezone,                config_parse_timezone_mode,             0,                        offsetof(Settings, timezone)
+Exec.SuppressSync,            config_parse_tristate,                  0,                        offsetof(Settings, suppress_sync)
+Exec.RestrictAddressFamilies, config_parse_restrict_address_families, 0,                        0
+Files.ReadOnly,               config_parse_tristate,                  0,                        offsetof(Settings, read_only)
+Files.Volatile,               config_parse_volatile_mode,             0,                        offsetof(Settings, volatile_mode)
+Files.Bind,                   config_parse_bind,                      0,                        0
+Files.BindReadOnly,           config_parse_bind,                      1,                        0
+Files.TemporaryFileSystem,    config_parse_tmpfs,                     0,                        0
+Files.Inaccessible,           config_parse_inaccessible,              0,                        0
+Files.Overlay,                config_parse_overlay,                   0,                        0
+Files.OverlayReadOnly,        config_parse_overlay,                   1,                        0
+Files.PrivateUsersChown,      config_parse_userns_chown,              0,                        offsetof(Settings, userns_ownership)
+Files.PrivateUsersOwnership,  config_parse_userns_ownership,          0,                        offsetof(Settings, userns_ownership)
+Files.BindUser,               config_parse_bind_user,                 0,                        offsetof(Settings, bind_user)
+Files.BindUserShell,          config_parse_bind_user_shell,           0,                        0
+Network.Private,              config_parse_tristate,                  0,                        offsetof(Settings, private_network)
+Network.NamespacePath,        config_parse_path,                      0,                        offsetof(Settings, network_namespace_path)
+Network.Interface,            config_parse_network_iface_pair,        0,                        offsetof(Settings, network_interfaces)
+Network.MACVLAN,              config_parse_macvlan_iface_pair,        0,                        offsetof(Settings, network_macvlan)
+Network.IPVLAN,               config_parse_ipvlan_iface_pair,         0,                        offsetof(Settings, network_ipvlan)
+Network.VirtualEthernet,      config_parse_tristate,                  0,                        offsetof(Settings, network_veth)
+Network.VirtualEthernetExtra, config_parse_veth_extra,                0,                        0
+Network.Bridge,               config_parse_ifname,                    0,                        offsetof(Settings, network_bridge)
+Network.Zone,                 config_parse_network_zone,              0,                        0
+Network.Port,                 config_parse_expose_port,               0,                        0
index d85a30ee9f9cf136c8a67dae2cd2ef4ec26a9d9a..beffd5da8a862dd9eb35c7e41125bb887f789189 100644 (file)
@@ -7,6 +7,7 @@
 #include "log.h"
 #include "nspawn-seccomp.h"
 #include "seccomp-util.h"
+#include "set.h"
 #include "strv.h"
 
 #if HAVE_SECCOMP
@@ -172,7 +173,13 @@ static int add_syscall_filters(
         return 0;
 }
 
-int setup_seccomp(uint64_t cap_list_retain, char **syscall_allow_list, char **syscall_deny_list) {
+int setup_seccomp(
+                uint64_t cap_list_retain,
+                char **syscall_allow_list,
+                char **syscall_deny_list,
+                Set *restrict_address_families,
+                bool restrict_address_families_is_allowlist) {
+
         uint32_t arch;
         int r;
 
@@ -241,12 +248,18 @@ int setup_seccomp(uint64_t cap_list_retain, char **syscall_allow_list, char **sy
                                         seccomp_arch_to_string(arch));
         }
 
+        if (restrict_address_families_is_allowlist || !set_isempty(restrict_address_families)) {
+                r = seccomp_restrict_address_families(restrict_address_families, restrict_address_families_is_allowlist);
+                if (r < 0)
+                        return log_error_errno(r, "Failed to install address family filter: %m");
+        }
+
         return 0;
 }
 
 #else
 
-int setup_seccomp(uint64_t cap_list_retain, char **syscall_allow_list, char **syscall_deny_list) {
+int setup_seccomp(uint64_t cap_list_retain, char **syscall_allow_list, char **syscall_deny_list, Set *restrict_address_families, bool restrict_address_families_is_allowlist) {
         return 0;
 }
 
index 31520a09300d3659834d6c606b43e57c0039faab..52232ad56aebb9a839e0af2157af6e022ce01dca 100644 (file)
@@ -3,4 +3,9 @@
 
 #include "shared-forward.h"
 
-int setup_seccomp(uint64_t cap_list_retain, char **syscall_allow_list, char **syscall_deny_list);
+int setup_seccomp(
+                uint64_t cap_list_retain,
+                char **syscall_allow_list,
+                char **syscall_deny_list,
+                Set *restrict_address_families,
+                bool restrict_address_families_is_allowlist);
index c058ab28f71deb4e9abbac39d814ac3a442e224f..9abd5024a5049b4250529f8f116c60c47fcad8a0 100644 (file)
 #include "nspawn-mount.h"
 #include "nspawn-network.h"
 #include "nspawn-settings.h"
+#include "parse-helpers.h"
 #include "parse-util.h"
 #include "process-util.h"
 #include "rlimit-util.h"
+#include "set.h"
 #include "socket-util.h"
 #include "string-table.h"
 #include "string-util.h"
@@ -137,6 +139,7 @@ Settings* settings_free(Settings *s) {
         rlimit_free_all(s->rlimit);
         free(s->hostname);
         cpu_set_done(&s->cpu_set);
+        set_free(s->restrict_address_families);
         strv_free(s->bind_user);
         free(s->bind_user_shell);
 
@@ -1054,3 +1057,32 @@ int config_parse_bind_user_shell(
 
         return 0;
 }
+
+int config_parse_restrict_address_families(
+                const char *unit,
+                const char *filename,
+                unsigned line,
+                const char *section,
+                unsigned section_line,
+                const char *lvalue,
+                int ltype,
+                const char *rvalue,
+                void *data,
+                void *userdata) {
+
+        Settings *settings = ASSERT_PTR(data);
+        int r;
+
+        assert(rvalue);
+
+        r = parse_address_families(rvalue, &settings->restrict_address_families, &settings->restrict_address_families_is_allowlist);
+        if (r == -ENOMEM)
+                return log_oom();
+        if (r < 0) {
+                log_syntax(unit, LOG_WARNING, filename, line, r,
+                           "Failed to parse address family, ignoring: %s", rvalue);
+                return 0;
+        }
+
+        return 0;
+}
index 84c342b83c1ebf261bbcc7b26775ab89df3d5bdf..c2e079f0563c1cd156714d68644a5a33ef6c940e 100644 (file)
@@ -92,43 +92,44 @@ typedef enum ConsoleMode {
 } ConsoleMode;
 
 typedef enum SettingsMask {
-        SETTING_START_MODE        = UINT64_C(1) << 0,
-        SETTING_ENVIRONMENT       = UINT64_C(1) << 1,
-        SETTING_USER              = UINT64_C(1) << 2,
-        SETTING_CAPABILITY        = UINT64_C(1) << 3,
-        SETTING_KILL_SIGNAL       = UINT64_C(1) << 4,
-        SETTING_PERSONALITY       = UINT64_C(1) << 5,
-        SETTING_MACHINE_ID        = UINT64_C(1) << 6,
-        SETTING_NETWORK           = UINT64_C(1) << 7,
-        SETTING_EXPOSE_PORTS      = UINT64_C(1) << 8,
-        SETTING_READ_ONLY         = UINT64_C(1) << 9,
-        SETTING_VOLATILE_MODE     = UINT64_C(1) << 10,
-        SETTING_CUSTOM_MOUNTS     = UINT64_C(1) << 11,
-        SETTING_WORKING_DIRECTORY = UINT64_C(1) << 12,
-        SETTING_USERNS            = UINT64_C(1) << 13,
-        SETTING_NOTIFY_READY      = UINT64_C(1) << 14,
-        SETTING_PIVOT_ROOT        = UINT64_C(1) << 15,
-        SETTING_SYSCALL_FILTER    = UINT64_C(1) << 16,
-        SETTING_HOSTNAME          = UINT64_C(1) << 17,
-        SETTING_NO_NEW_PRIVILEGES = UINT64_C(1) << 18,
-        SETTING_OOM_SCORE_ADJUST  = UINT64_C(1) << 19,
-        SETTING_CPU_AFFINITY      = UINT64_C(1) << 20,
-        SETTING_RESOLV_CONF       = UINT64_C(1) << 21,
-        SETTING_LINK_JOURNAL      = UINT64_C(1) << 22,
-        SETTING_TIMEZONE          = UINT64_C(1) << 23,
-        SETTING_EPHEMERAL         = UINT64_C(1) << 24,
-        SETTING_SLICE             = UINT64_C(1) << 25,
-        SETTING_DIRECTORY         = UINT64_C(1) << 26,
-        SETTING_USE_CGNS          = UINT64_C(1) << 27,
-        SETTING_CLONE_NS_FLAGS    = UINT64_C(1) << 28,
-        SETTING_CONSOLE_MODE      = UINT64_C(1) << 29,
-        SETTING_CREDENTIALS       = UINT64_C(1) << 30,
-        SETTING_BIND_USER         = UINT64_C(1) << 31,
-        SETTING_BIND_USER_SHELL   = UINT64_C(1) << 32,
-        SETTING_SUPPRESS_SYNC     = UINT64_C(1) << 33,
-        SETTING_RLIMIT_FIRST      = UINT64_C(1) << 34, /* we define one bit per resource limit here */
-        SETTING_RLIMIT_LAST       = UINT64_C(1) << (34 + _RLIMIT_MAX - 1),
-        _SETTINGS_MASK_ALL        = (UINT64_C(1) << (34 + _RLIMIT_MAX)) -1,
+        SETTING_START_MODE                 = UINT64_C(1) << 0,
+        SETTING_ENVIRONMENT                = UINT64_C(1) << 1,
+        SETTING_USER                       = UINT64_C(1) << 2,
+        SETTING_CAPABILITY                 = UINT64_C(1) << 3,
+        SETTING_KILL_SIGNAL                = UINT64_C(1) << 4,
+        SETTING_PERSONALITY                = UINT64_C(1) << 5,
+        SETTING_MACHINE_ID                 = UINT64_C(1) << 6,
+        SETTING_NETWORK                    = UINT64_C(1) << 7,
+        SETTING_EXPOSE_PORTS               = UINT64_C(1) << 8,
+        SETTING_READ_ONLY                  = UINT64_C(1) << 9,
+        SETTING_VOLATILE_MODE              = UINT64_C(1) << 10,
+        SETTING_CUSTOM_MOUNTS              = UINT64_C(1) << 11,
+        SETTING_WORKING_DIRECTORY          = UINT64_C(1) << 12,
+        SETTING_USERNS                     = UINT64_C(1) << 13,
+        SETTING_NOTIFY_READY               = UINT64_C(1) << 14,
+        SETTING_PIVOT_ROOT                 = UINT64_C(1) << 15,
+        SETTING_SYSCALL_FILTER             = UINT64_C(1) << 16,
+        SETTING_HOSTNAME                   = UINT64_C(1) << 17,
+        SETTING_NO_NEW_PRIVILEGES          = UINT64_C(1) << 18,
+        SETTING_OOM_SCORE_ADJUST           = UINT64_C(1) << 19,
+        SETTING_CPU_AFFINITY               = UINT64_C(1) << 20,
+        SETTING_RESOLV_CONF                = UINT64_C(1) << 21,
+        SETTING_LINK_JOURNAL               = UINT64_C(1) << 22,
+        SETTING_TIMEZONE                   = UINT64_C(1) << 23,
+        SETTING_EPHEMERAL                  = UINT64_C(1) << 24,
+        SETTING_SLICE                      = UINT64_C(1) << 25,
+        SETTING_DIRECTORY                  = UINT64_C(1) << 26,
+        SETTING_USE_CGNS                   = UINT64_C(1) << 27,
+        SETTING_CLONE_NS_FLAGS             = UINT64_C(1) << 28,
+        SETTING_CONSOLE_MODE               = UINT64_C(1) << 29,
+        SETTING_CREDENTIALS                = UINT64_C(1) << 30,
+        SETTING_BIND_USER                  = UINT64_C(1) << 31,
+        SETTING_BIND_USER_SHELL            = UINT64_C(1) << 32,
+        SETTING_SUPPRESS_SYNC              = UINT64_C(1) << 33,
+        SETTING_RESTRICT_ADDRESS_FAMILIES  = UINT64_C(1) << 34,
+        SETTING_RLIMIT_FIRST               = UINT64_C(1) << 35, /* we define one bit per resource limit here */
+        SETTING_RLIMIT_LAST                = UINT64_C(1) << (35 + _RLIMIT_MAX - 1),
+        _SETTINGS_MASK_ALL                 = (UINT64_C(1) << (35 + _RLIMIT_MAX)) -1,
         _SETTING_FORCE_ENUM_WIDTH = UINT64_MAX
 } SettingsMask;
 
@@ -190,6 +191,8 @@ typedef struct Settings {
         bool link_journal_try;
         TimezoneMode timezone;
         int suppress_sync;
+        Set *restrict_address_families;
+        bool restrict_address_families_is_allowlist;
 
         /* [Files] */
         int read_only;
@@ -277,6 +280,7 @@ CONFIG_PARSER_PROTOTYPE(config_parse_userns_chown);
 CONFIG_PARSER_PROTOTYPE(config_parse_userns_ownership);
 CONFIG_PARSER_PROTOTYPE(config_parse_bind_user);
 CONFIG_PARSER_PROTOTYPE(config_parse_bind_user_shell);
+CONFIG_PARSER_PROTOTYPE(config_parse_restrict_address_families);
 
 DECLARE_STRING_TABLE_LOOKUP(resolv_conf_mode, ResolvConfMode);
 
index accf448ea97f2876e8dde76e1473fb93c3b0a6d2..b6332844db80c379933e6f4204479d985b7471d8 100644 (file)
@@ -89,6 +89,7 @@
 #include "nspawn.h"
 #include "nsresource.h"
 #include "os-util.h"
+#include "parse-helpers.h"
 #include "osc-context.h"
 #include "options.h"
 #include "pager.h"
 #include "runtime-scope.h"
 #include "seccomp-util.h"
 #include "selinux-util.h"
+#include "set.h"
 #include "shift-uid.h"
 #include "signal-util.h"
 #include "siphash24.h"
@@ -251,6 +253,8 @@ static char *arg_bind_user_shell = NULL;
 static bool arg_bind_user_shell_copy = false;
 static char **arg_bind_user_groups = NULL;
 static bool arg_suppress_sync = false;
+static Set *arg_restrict_address_families = NULL;
+static bool arg_restrict_address_families_is_allowlist = false;
 static char *arg_settings_filename = NULL;
 static Architecture arg_architecture = _ARCHITECTURE_INVALID;
 static ImagePolicy *arg_image_policy = NULL;
@@ -295,6 +299,7 @@ STATIC_DESTRUCTOR_REGISTER(arg_sysctl, strv_freep);
 STATIC_DESTRUCTOR_REGISTER(arg_bind_user, strv_freep);
 STATIC_DESTRUCTOR_REGISTER(arg_bind_user_shell, freep);
 STATIC_DESTRUCTOR_REGISTER(arg_bind_user_groups, strv_freep);
+STATIC_DESTRUCTOR_REGISTER(arg_restrict_address_families, set_freep);
 STATIC_DESTRUCTOR_REGISTER(arg_settings_filename, freep);
 STATIC_DESTRUCTOR_REGISTER(arg_image_policy, image_policy_freep);
 STATIC_DESTRUCTOR_REGISTER(arg_background, freep);
@@ -1122,6 +1127,14 @@ static int parse_argv(int argc, char *argv[]) {
                         break;
                 }
 
+                OPTION_LONG("restrict-address-families", "LIST", "Restrict socket address families to the given allowlist"):
+                        r = parse_address_families(optarg, &arg_restrict_address_families, &arg_restrict_address_families_is_allowlist);
+                        if (r < 0)
+                                return log_error_errno(r, "Failed to parse --restrict-address-families= argument: %s", optarg);
+
+                        arg_settings_mask |= SETTING_RESTRICT_ADDRESS_FAMILIES;
+                        break;
+
                 OPTION('Z', "selinux-context", "SECLABEL",
                        "Set the SELinux security context to be used by processes in the container"):
                         arg_selinux_context = arg;
@@ -3456,7 +3469,7 @@ static int inner_child(
         } else
 #endif
         {
-                r = setup_seccomp(arg_caps_retain, arg_syscall_allow_list, arg_syscall_deny_list);
+                r = setup_seccomp(arg_caps_retain, arg_syscall_allow_list, arg_syscall_deny_list, arg_restrict_address_families, arg_restrict_address_families_is_allowlist);
                 if (r < 0)
                         return r;
         }
@@ -4944,6 +4957,12 @@ static int merge_settings(Settings *settings, const char *path) {
             settings->suppress_sync >= 0)
                 arg_suppress_sync = settings->suppress_sync;
 
+        if (!FLAGS_SET(arg_settings_mask, SETTING_RESTRICT_ADDRESS_FAMILIES) &&
+            (settings->restrict_address_families || settings->restrict_address_families_is_allowlist)) {
+                set_free_and_replace(arg_restrict_address_families, settings->restrict_address_families);
+                arg_restrict_address_families_is_allowlist = settings->restrict_address_families_is_allowlist;
+        }
+
         /* The following properties can only be set through the OCI settings logic, not from the command line, hence we
          * don't consult arg_settings_mask for them. */
 
@@ -5976,6 +5995,12 @@ static int run(int argc, char *argv[]) {
         if (r < 0)
                 goto finish;
 
+        if (!FLAGS_SET(arg_settings_mask, SETTING_RESTRICT_ADDRESS_FAMILIES) && !arg_restrict_address_families)
+                log_notice("Note: in a future version of systemd-nspawn the default set of permitted socket address"
+                           " families will be restricted to AF_INET, AF_INET6 and AF_UNIX."
+                           " Use --restrict-address-families= to configure the set of permitted socket address"
+                           " families, or set RestrictAddressFamilies= in a .nspawn file.");
+
         /* If we're not unsharing the network namespace and are unsharing the user namespace, we won't have
          * permissions to bind ports in the container, so let's drop the CAP_NET_BIND_SERVICE capability to
          * indicate that. */
index 8a61f2e66997bdccb3c1113467549b25dbf309ae..4e524bef37ed945f47dd22473aff5aa5105b2a18 100644 (file)
@@ -11,6 +11,7 @@
 #include "parse-helpers.h"
 #include "parse-util.h"
 #include "path-util.h"
+#include "set.h"
 #include "string-util.h"
 #include "utf8.h"
 
@@ -86,6 +87,63 @@ int path_simplify_and_warn(
         return 0;
 }
 
+int parse_address_families(const char *rvalue, Set **families, bool *is_allowlist) {
+        bool invert = false;
+        int r;
+
+        assert(rvalue);
+        assert(families);
+        assert(is_allowlist);
+
+        if (isempty(rvalue)) {
+                *families = set_free(*families);
+                *is_allowlist = false;
+                return 0;
+        }
+
+        if (streq(rvalue, "none")) {
+                *families = set_free(*families);
+                *is_allowlist = true;
+                return 0;
+        }
+
+        if (rvalue[0] == '~') {
+                invert = true;
+                rvalue++;
+        }
+
+        if (!*families) {
+                *families = set_new(NULL);
+                if (!*families)
+                        return -ENOMEM;
+
+                *is_allowlist = !invert;
+        }
+
+        for (const char *p = rvalue;;) {
+                _cleanup_free_ char *word = NULL;
+
+                r = extract_first_word(&p, &word, NULL, EXTRACT_UNQUOTE);
+                if (r == 0)
+                        return 0;
+                if (r < 0)
+                        return r;
+
+                int af = af_from_name(word);
+                if (af < 0)
+                        return af;
+
+                /* If we previously wanted to forbid an address family and now we want to allow it, then
+                 * just remove it from the list. */
+                if (!invert == *is_allowlist) {
+                        r = set_put(*families, INT_TO_PTR(af));
+                        if (r < 0)
+                                return r;
+                } else
+                        set_remove(*families, INT_TO_PTR(af));
+        }
+}
+
 static int parse_af_token(
                 const char *token,
                 int *family,
index 402147cbf38a59d3f273cef91fb6e88054daa282..a906dfdaefdb5f2d02088c25a3596ba64c00a3cb 100644 (file)
@@ -20,6 +20,8 @@ int path_simplify_and_warn(
                 unsigned line,
                 const char *lvalue);
 
+int parse_address_families(const char *rvalue, Set **families, bool *is_allowlist);
+
 int parse_socket_bind_item(
                 const char *str,
                 int *address_family,