From: Daan De Meyer Date: Mon, 22 Dec 2025 10:22:34 +0000 (+0100) Subject: nspawn: Add --restrict-address-families= option X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=4bbdc8a6a2eaca3b717810bbae0265eb375ab68c;p=thirdparty%2Fsystemd.git nspawn: Add --restrict-address-families= option Add a new --restrict-address-families= command line option and corresponding RestrictAddressFamilies= setting for .nspawn files to restrict which socket address families may be used inside a container. Many address families such as AF_VSOCK and AF_NETLINK are not network-namespaced, so restricting access to them in containers improves isolation. The option supports allowlist and denylist modes (via ~ prefix), as well as "none" to block all families, matching the semantics of RestrictAddressFamilies= in unit files. The address family parsing logic is extracted into a shared parse_address_families() helper in parse-helpers.c, which is now also used by config_parse_address_families() in load-fragment.c. This is currently opt-in. In a future version, the default will be changed to restrict address families to AF_INET, AF_INET6 and AF_UNIX. --- diff --git a/NEWS b/NEWS index 2d32bd08b4a..b440af59396 100644 --- a/NEWS +++ b/NEWS @@ -30,6 +30,13 @@ CHANGES WITH 261 in spe: attestation environments which use hardware CC registers and not the TPM quote. + * systemd-nspawn gained a new --restrict-address-families= option (and + corresponding RestrictAddressFamilies= setting in .nspawn files) to + restrict which socket address families may be used in the container. + This is currently opt-in. In a future version, the default will be + changed to restrict socket address families to AF_INET, AF_INET6 and + AF_UNIX. + New features: * A new tmpfiles.d/root.conf has been added that sets permissions diff --git a/man/systemd-nspawn.xml b/man/systemd-nspawn.xml index 5c7acf51594..045aa60db81 100644 --- a/man/systemd-nspawn.xml +++ b/man/systemd-nspawn.xml @@ -1340,6 +1340,28 @@ After=sys-subsystem-net-devices-ens1.device + + + + Restrict the socket address families accessible to the container. Takes a + space-separated list of address family names, such as AF_INET, + AF_INET6 or AF_UNIX. When prefixed with + ~ the listed address families will be prohibited, otherwise they will be permitted + (allowlisted). Use the special value none to prohibit all address families. This + option may be specified more than once, in which case the configured lists are combined. If both a + positive and a negative list are configured, the negative list takes precedence over the positive + list. + + Note that currently this option defaults to no restrictions, i.e. all address families are + accessible. In a future version of systemd, the default will be changed to restrict address families to + AF_INET, AF_INET6 and AF_UNIX. Use + (with an empty argument) or set + RestrictAddressFamilies= in a .nspawn file to opt out of + filtering explicitly. + + + + diff --git a/man/systemd.nspawn.xml b/man/systemd.nspawn.xml index bf9526df806..29279806852 100644 --- a/man/systemd.nspawn.xml +++ b/man/systemd.nspawn.xml @@ -340,6 +340,18 @@ + + RestrictAddressFamilies= + + Restricts the socket address families accessible to the container. This is equivalent + to the command line switch, and takes the same list + parameter. See + systemd-nspawn1 for + details. + + + + LimitCPU= LimitFSIZE= diff --git a/shell-completion/bash/systemd-nspawn b/shell-completion/bash/systemd-nspawn index 08ff25d906c..b39d3cbd6d8 100644 --- a/shell-completion/bash/systemd-nspawn +++ b/shell-completion/bash/systemd-nspawn @@ -77,7 +77,8 @@ _systemd_nspawn() { --pivot-root --property --private-users --private-users-ownership --network-namespace-path --network-ipvlan --network-veth-extra --network-zone -p --port --system-call-filter --overlay --overlay-ro --settings --rlimit --hostname --no-new-privileges --oom-score-adjust --cpu-affinity - --resolv-conf --timezone --root-hash-sig --background --oci-bundle --verity-data' + --resolv-conf --timezone --root-hash-sig --background --oci-bundle --verity-data + --restrict-address-families' ) _init_completion || return diff --git a/shell-completion/zsh/_systemd-nspawn b/shell-completion/zsh/_systemd-nspawn index fa79b7f8d86..ee28fa74759 100644 --- a/shell-completion/zsh/_systemd-nspawn +++ b/shell-completion/zsh/_systemd-nspawn @@ -53,4 +53,5 @@ _arguments \ '--volatile=[Run the system in volatile mode.]:volatile:(no yes state)' \ "--notify-ready=[Control when the ready notification is sent]:options:(yes no)" \ "--suppress-sync=[Control whether to suppress disk synchronization for the container payload]:options:(yes no)" \ + '--restrict-address-families=[Restrict socket address families accessible in the container.]: : _message "address families"' \ '*:: : _normal' diff --git a/src/core/load-fragment.c b/src/core/load-fragment.c index 274fd82514d..52005c8c436 100644 --- a/src/core/load-fragment.c +++ b/src/core/load-fragment.c @@ -10,7 +10,6 @@ #include "sd-bus.h" #include "sd-messages.h" -#include "af-list.h" #include "all-units.h" #include "alloc-util.h" #include "bpf-program.h" @@ -3474,72 +3473,26 @@ int config_parse_address_families( void *userdata) { ExecContext *c = data; - bool invert = false; + bool is_allowlist = c->address_families_allow_list; int r; assert(filename); assert(lvalue); assert(rvalue); - if (isempty(rvalue)) { - /* Empty assignment resets the list */ - c->address_families = set_free(c->address_families); - c->address_families_allow_list = false; - return 0; - } - - if (streq(rvalue, "none")) { - /* Forbid all address families. */ - c->address_families = set_free(c->address_families); - c->address_families_allow_list = true; + r = parse_address_families(rvalue, &c->address_families, &is_allowlist); + /* Copy back unconditionally: parse_address_families() may have partially populated + * c->address_families before failing, so keep is_allowlist in sync with that state. */ + c->address_families_allow_list = is_allowlist; + if (r == -ENOMEM) + return log_oom(); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, + "Failed to parse address family, ignoring: %s", rvalue); return 0; } - if (rvalue[0] == '~') { - invert = true; - rvalue++; - } - - if (!c->address_families) { - c->address_families = set_new(NULL); - if (!c->address_families) - return log_oom(); - - c->address_families_allow_list = !invert; - } - - for (const char *p = rvalue;;) { - _cleanup_free_ char *word = NULL; - int af; - - r = extract_first_word(&p, &word, NULL, EXTRACT_UNQUOTE); - if (r == -ENOMEM) - return log_oom(); - if (r < 0) { - log_syntax(unit, LOG_WARNING, filename, line, r, - "Invalid syntax, ignoring: %s", rvalue); - return 0; - } - if (r == 0) - return 0; - - af = af_from_name(word); - if (af < 0) { - log_syntax(unit, LOG_WARNING, filename, line, af, - "Failed to parse address family, ignoring: %s", word); - continue; - } - - /* If we previously wanted to forbid an address family and now - * we want to allow it, then just remove it from the list. - */ - if (!invert == c->address_families_allow_list) { - r = set_put(c->address_families, INT_TO_PTR(af)); - if (r < 0) - return log_oom(); - } else - set_remove(c->address_families, INT_TO_PTR(af)); - } + return 0; } #endif diff --git a/src/nspawn/nspawn-gperf.gperf b/src/nspawn/nspawn-gperf.gperf index cdad70706e6..439e176e458 100644 --- a/src/nspawn/nspawn-gperf.gperf +++ b/src/nspawn/nspawn-gperf.gperf @@ -19,67 +19,68 @@ struct ConfigPerfItem; %struct-type %includes %% -Exec.Boot, config_parse_boot, 0, 0 -Exec.Ephemeral, config_parse_tristate, 0, offsetof(Settings, ephemeral) -Exec.ProcessTwo, config_parse_pid2, 0, 0 -Exec.Parameters, config_parse_strv, 0, offsetof(Settings, parameters) -Exec.Environment, config_parse_strv, 0, offsetof(Settings, environment) -Exec.User, config_parse_string, CONFIG_PARSE_STRING_SAFE, offsetof(Settings, user) -Exec.Capability, config_parse_capability, 0, offsetof(Settings, capability) -Exec.AmbientCapability, config_parse_capability, 0, offsetof(Settings, ambient_capability) -Exec.DropCapability, config_parse_capability, 0, offsetof(Settings, drop_capability) -Exec.KillSignal, config_parse_signal, 0, offsetof(Settings, kill_signal) -Exec.Personality, config_parse_personality, 0, offsetof(Settings, personality) -Exec.MachineID, config_parse_id128, 0, offsetof(Settings, machine_id) -Exec.WorkingDirectory, config_parse_path, 0, offsetof(Settings, working_directory) -Exec.PivotRoot, config_parse_pivot_root, 0, 0 -Exec.PrivateUsers, config_parse_private_users, 0, 0 -Exec.PrivateUsersDelegate, config_parse_unsigned, 0, offsetof(Settings, delegate_container_ranges) -Exec.NotifyReady, config_parse_tristate, 0, offsetof(Settings, notify_ready) -Exec.SystemCallFilter, config_parse_syscall_filter, 0, 0 -Exec.LimitCPU, config_parse_rlimit, RLIMIT_CPU, offsetof(Settings, rlimit) -Exec.LimitFSIZE, config_parse_rlimit, RLIMIT_FSIZE, offsetof(Settings, rlimit) -Exec.LimitDATA, config_parse_rlimit, RLIMIT_DATA, offsetof(Settings, rlimit) -Exec.LimitSTACK, config_parse_rlimit, RLIMIT_STACK, offsetof(Settings, rlimit) -Exec.LimitCORE, config_parse_rlimit, RLIMIT_CORE, offsetof(Settings, rlimit) -Exec.LimitRSS, config_parse_rlimit, RLIMIT_RSS, offsetof(Settings, rlimit) -Exec.LimitNOFILE, config_parse_rlimit, RLIMIT_NOFILE, offsetof(Settings, rlimit) -Exec.LimitAS, config_parse_rlimit, RLIMIT_AS, offsetof(Settings, rlimit) -Exec.LimitNPROC, config_parse_rlimit, RLIMIT_NPROC, offsetof(Settings, rlimit) -Exec.LimitMEMLOCK, config_parse_rlimit, RLIMIT_MEMLOCK, offsetof(Settings, rlimit) -Exec.LimitLOCKS, config_parse_rlimit, RLIMIT_LOCKS, offsetof(Settings, rlimit) -Exec.LimitSIGPENDING, config_parse_rlimit, RLIMIT_SIGPENDING, offsetof(Settings, rlimit) -Exec.LimitMSGQUEUE, config_parse_rlimit, RLIMIT_MSGQUEUE, offsetof(Settings, rlimit) -Exec.LimitNICE, config_parse_rlimit, RLIMIT_NICE, offsetof(Settings, rlimit) -Exec.LimitRTPRIO, config_parse_rlimit, RLIMIT_RTPRIO, offsetof(Settings, rlimit) -Exec.LimitRTTIME, config_parse_rlimit, RLIMIT_RTTIME, offsetof(Settings, rlimit) -Exec.Hostname, config_parse_hostname, 0, offsetof(Settings, hostname) -Exec.NoNewPrivileges, config_parse_tristate, 0, offsetof(Settings, no_new_privileges) -Exec.OOMScoreAdjust, config_parse_oom_score_adjust, 0, 0 -Exec.CPUAffinity, config_parse_cpu_set, 0, offsetof(Settings, cpu_set) -Exec.ResolvConf, config_parse_resolv_conf, 0, offsetof(Settings, resolv_conf) -Exec.LinkJournal, config_parse_link_journal, 0, 0 -Exec.Timezone, config_parse_timezone_mode, 0, offsetof(Settings, timezone) -Exec.SuppressSync, config_parse_tristate, 0, offsetof(Settings, suppress_sync) -Files.ReadOnly, config_parse_tristate, 0, offsetof(Settings, read_only) -Files.Volatile, config_parse_volatile_mode, 0, offsetof(Settings, volatile_mode) -Files.Bind, config_parse_bind, 0, 0 -Files.BindReadOnly, config_parse_bind, 1, 0 -Files.TemporaryFileSystem, config_parse_tmpfs, 0, 0 -Files.Inaccessible, config_parse_inaccessible, 0, 0 -Files.Overlay, config_parse_overlay, 0, 0 -Files.OverlayReadOnly, config_parse_overlay, 1, 0 -Files.PrivateUsersChown, config_parse_userns_chown, 0, offsetof(Settings, userns_ownership) -Files.PrivateUsersOwnership, config_parse_userns_ownership, 0, offsetof(Settings, userns_ownership) -Files.BindUser, config_parse_bind_user, 0, offsetof(Settings, bind_user) -Files.BindUserShell, config_parse_bind_user_shell, 0, 0 -Network.Private, config_parse_tristate, 0, offsetof(Settings, private_network) -Network.NamespacePath, config_parse_path, 0, offsetof(Settings, network_namespace_path) -Network.Interface, config_parse_network_iface_pair, 0, offsetof(Settings, network_interfaces) -Network.MACVLAN, config_parse_macvlan_iface_pair, 0, offsetof(Settings, network_macvlan) -Network.IPVLAN, config_parse_ipvlan_iface_pair, 0, offsetof(Settings, network_ipvlan) -Network.VirtualEthernet, config_parse_tristate, 0, offsetof(Settings, network_veth) -Network.VirtualEthernetExtra, config_parse_veth_extra, 0, 0 -Network.Bridge, config_parse_ifname, 0, offsetof(Settings, network_bridge) -Network.Zone, config_parse_network_zone, 0, 0 -Network.Port, config_parse_expose_port, 0, 0 +Exec.Boot, config_parse_boot, 0, 0 +Exec.Ephemeral, config_parse_tristate, 0, offsetof(Settings, ephemeral) +Exec.ProcessTwo, config_parse_pid2, 0, 0 +Exec.Parameters, config_parse_strv, 0, offsetof(Settings, parameters) +Exec.Environment, config_parse_strv, 0, offsetof(Settings, environment) +Exec.User, config_parse_string, CONFIG_PARSE_STRING_SAFE, offsetof(Settings, user) +Exec.Capability, config_parse_capability, 0, offsetof(Settings, capability) +Exec.AmbientCapability, config_parse_capability, 0, offsetof(Settings, ambient_capability) +Exec.DropCapability, config_parse_capability, 0, offsetof(Settings, drop_capability) +Exec.KillSignal, config_parse_signal, 0, offsetof(Settings, kill_signal) +Exec.Personality, config_parse_personality, 0, offsetof(Settings, personality) +Exec.MachineID, config_parse_id128, 0, offsetof(Settings, machine_id) +Exec.WorkingDirectory, config_parse_path, 0, offsetof(Settings, working_directory) +Exec.PivotRoot, config_parse_pivot_root, 0, 0 +Exec.PrivateUsers, config_parse_private_users, 0, 0 +Exec.PrivateUsersDelegate, config_parse_unsigned, 0, offsetof(Settings, delegate_container_ranges) +Exec.NotifyReady, config_parse_tristate, 0, offsetof(Settings, notify_ready) +Exec.SystemCallFilter, config_parse_syscall_filter, 0, 0 +Exec.LimitCPU, config_parse_rlimit, RLIMIT_CPU, offsetof(Settings, rlimit) +Exec.LimitFSIZE, config_parse_rlimit, RLIMIT_FSIZE, offsetof(Settings, rlimit) +Exec.LimitDATA, config_parse_rlimit, RLIMIT_DATA, offsetof(Settings, rlimit) +Exec.LimitSTACK, config_parse_rlimit, RLIMIT_STACK, offsetof(Settings, rlimit) +Exec.LimitCORE, config_parse_rlimit, RLIMIT_CORE, offsetof(Settings, rlimit) +Exec.LimitRSS, config_parse_rlimit, RLIMIT_RSS, offsetof(Settings, rlimit) +Exec.LimitNOFILE, config_parse_rlimit, RLIMIT_NOFILE, offsetof(Settings, rlimit) +Exec.LimitAS, config_parse_rlimit, RLIMIT_AS, offsetof(Settings, rlimit) +Exec.LimitNPROC, config_parse_rlimit, RLIMIT_NPROC, offsetof(Settings, rlimit) +Exec.LimitMEMLOCK, config_parse_rlimit, RLIMIT_MEMLOCK, offsetof(Settings, rlimit) +Exec.LimitLOCKS, config_parse_rlimit, RLIMIT_LOCKS, offsetof(Settings, rlimit) +Exec.LimitSIGPENDING, config_parse_rlimit, RLIMIT_SIGPENDING, offsetof(Settings, rlimit) +Exec.LimitMSGQUEUE, config_parse_rlimit, RLIMIT_MSGQUEUE, offsetof(Settings, rlimit) +Exec.LimitNICE, config_parse_rlimit, RLIMIT_NICE, offsetof(Settings, rlimit) +Exec.LimitRTPRIO, config_parse_rlimit, RLIMIT_RTPRIO, offsetof(Settings, rlimit) +Exec.LimitRTTIME, config_parse_rlimit, RLIMIT_RTTIME, offsetof(Settings, rlimit) +Exec.Hostname, config_parse_hostname, 0, offsetof(Settings, hostname) +Exec.NoNewPrivileges, config_parse_tristate, 0, offsetof(Settings, no_new_privileges) +Exec.OOMScoreAdjust, config_parse_oom_score_adjust, 0, 0 +Exec.CPUAffinity, config_parse_cpu_set, 0, offsetof(Settings, cpu_set) +Exec.ResolvConf, config_parse_resolv_conf, 0, offsetof(Settings, resolv_conf) +Exec.LinkJournal, config_parse_link_journal, 0, 0 +Exec.Timezone, config_parse_timezone_mode, 0, offsetof(Settings, timezone) +Exec.SuppressSync, config_parse_tristate, 0, offsetof(Settings, suppress_sync) +Exec.RestrictAddressFamilies, config_parse_restrict_address_families, 0, 0 +Files.ReadOnly, config_parse_tristate, 0, offsetof(Settings, read_only) +Files.Volatile, config_parse_volatile_mode, 0, offsetof(Settings, volatile_mode) +Files.Bind, config_parse_bind, 0, 0 +Files.BindReadOnly, config_parse_bind, 1, 0 +Files.TemporaryFileSystem, config_parse_tmpfs, 0, 0 +Files.Inaccessible, config_parse_inaccessible, 0, 0 +Files.Overlay, config_parse_overlay, 0, 0 +Files.OverlayReadOnly, config_parse_overlay, 1, 0 +Files.PrivateUsersChown, config_parse_userns_chown, 0, offsetof(Settings, userns_ownership) +Files.PrivateUsersOwnership, config_parse_userns_ownership, 0, offsetof(Settings, userns_ownership) +Files.BindUser, config_parse_bind_user, 0, offsetof(Settings, bind_user) +Files.BindUserShell, config_parse_bind_user_shell, 0, 0 +Network.Private, config_parse_tristate, 0, offsetof(Settings, private_network) +Network.NamespacePath, config_parse_path, 0, offsetof(Settings, network_namespace_path) +Network.Interface, config_parse_network_iface_pair, 0, offsetof(Settings, network_interfaces) +Network.MACVLAN, config_parse_macvlan_iface_pair, 0, offsetof(Settings, network_macvlan) +Network.IPVLAN, config_parse_ipvlan_iface_pair, 0, offsetof(Settings, network_ipvlan) +Network.VirtualEthernet, config_parse_tristate, 0, offsetof(Settings, network_veth) +Network.VirtualEthernetExtra, config_parse_veth_extra, 0, 0 +Network.Bridge, config_parse_ifname, 0, offsetof(Settings, network_bridge) +Network.Zone, config_parse_network_zone, 0, 0 +Network.Port, config_parse_expose_port, 0, 0 diff --git a/src/nspawn/nspawn-seccomp.c b/src/nspawn/nspawn-seccomp.c index d85a30ee9f9..beffd5da8a8 100644 --- a/src/nspawn/nspawn-seccomp.c +++ b/src/nspawn/nspawn-seccomp.c @@ -7,6 +7,7 @@ #include "log.h" #include "nspawn-seccomp.h" #include "seccomp-util.h" +#include "set.h" #include "strv.h" #if HAVE_SECCOMP @@ -172,7 +173,13 @@ static int add_syscall_filters( return 0; } -int setup_seccomp(uint64_t cap_list_retain, char **syscall_allow_list, char **syscall_deny_list) { +int setup_seccomp( + uint64_t cap_list_retain, + char **syscall_allow_list, + char **syscall_deny_list, + Set *restrict_address_families, + bool restrict_address_families_is_allowlist) { + uint32_t arch; int r; @@ -241,12 +248,18 @@ int setup_seccomp(uint64_t cap_list_retain, char **syscall_allow_list, char **sy seccomp_arch_to_string(arch)); } + if (restrict_address_families_is_allowlist || !set_isempty(restrict_address_families)) { + r = seccomp_restrict_address_families(restrict_address_families, restrict_address_families_is_allowlist); + if (r < 0) + return log_error_errno(r, "Failed to install address family filter: %m"); + } + return 0; } #else -int setup_seccomp(uint64_t cap_list_retain, char **syscall_allow_list, char **syscall_deny_list) { +int setup_seccomp(uint64_t cap_list_retain, char **syscall_allow_list, char **syscall_deny_list, Set *restrict_address_families, bool restrict_address_families_is_allowlist) { return 0; } diff --git a/src/nspawn/nspawn-seccomp.h b/src/nspawn/nspawn-seccomp.h index 31520a09300..52232ad56ae 100644 --- a/src/nspawn/nspawn-seccomp.h +++ b/src/nspawn/nspawn-seccomp.h @@ -3,4 +3,9 @@ #include "shared-forward.h" -int setup_seccomp(uint64_t cap_list_retain, char **syscall_allow_list, char **syscall_deny_list); +int setup_seccomp( + uint64_t cap_list_retain, + char **syscall_allow_list, + char **syscall_deny_list, + Set *restrict_address_families, + bool restrict_address_families_is_allowlist); diff --git a/src/nspawn/nspawn-settings.c b/src/nspawn/nspawn-settings.c index c058ab28f71..9abd5024a50 100644 --- a/src/nspawn/nspawn-settings.c +++ b/src/nspawn/nspawn-settings.c @@ -12,9 +12,11 @@ #include "nspawn-mount.h" #include "nspawn-network.h" #include "nspawn-settings.h" +#include "parse-helpers.h" #include "parse-util.h" #include "process-util.h" #include "rlimit-util.h" +#include "set.h" #include "socket-util.h" #include "string-table.h" #include "string-util.h" @@ -137,6 +139,7 @@ Settings* settings_free(Settings *s) { rlimit_free_all(s->rlimit); free(s->hostname); cpu_set_done(&s->cpu_set); + set_free(s->restrict_address_families); strv_free(s->bind_user); free(s->bind_user_shell); @@ -1054,3 +1057,32 @@ int config_parse_bind_user_shell( return 0; } + +int config_parse_restrict_address_families( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + Settings *settings = ASSERT_PTR(data); + int r; + + assert(rvalue); + + r = parse_address_families(rvalue, &settings->restrict_address_families, &settings->restrict_address_families_is_allowlist); + if (r == -ENOMEM) + return log_oom(); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, + "Failed to parse address family, ignoring: %s", rvalue); + return 0; + } + + return 0; +} diff --git a/src/nspawn/nspawn-settings.h b/src/nspawn/nspawn-settings.h index 84c342b83c1..c2e079f0563 100644 --- a/src/nspawn/nspawn-settings.h +++ b/src/nspawn/nspawn-settings.h @@ -92,43 +92,44 @@ typedef enum ConsoleMode { } ConsoleMode; typedef enum SettingsMask { - SETTING_START_MODE = UINT64_C(1) << 0, - SETTING_ENVIRONMENT = UINT64_C(1) << 1, - SETTING_USER = UINT64_C(1) << 2, - SETTING_CAPABILITY = UINT64_C(1) << 3, - SETTING_KILL_SIGNAL = UINT64_C(1) << 4, - SETTING_PERSONALITY = UINT64_C(1) << 5, - SETTING_MACHINE_ID = UINT64_C(1) << 6, - SETTING_NETWORK = UINT64_C(1) << 7, - SETTING_EXPOSE_PORTS = UINT64_C(1) << 8, - SETTING_READ_ONLY = UINT64_C(1) << 9, - SETTING_VOLATILE_MODE = UINT64_C(1) << 10, - SETTING_CUSTOM_MOUNTS = UINT64_C(1) << 11, - SETTING_WORKING_DIRECTORY = UINT64_C(1) << 12, - SETTING_USERNS = UINT64_C(1) << 13, - SETTING_NOTIFY_READY = UINT64_C(1) << 14, - SETTING_PIVOT_ROOT = UINT64_C(1) << 15, - SETTING_SYSCALL_FILTER = UINT64_C(1) << 16, - SETTING_HOSTNAME = UINT64_C(1) << 17, - SETTING_NO_NEW_PRIVILEGES = UINT64_C(1) << 18, - SETTING_OOM_SCORE_ADJUST = UINT64_C(1) << 19, - SETTING_CPU_AFFINITY = UINT64_C(1) << 20, - SETTING_RESOLV_CONF = UINT64_C(1) << 21, - SETTING_LINK_JOURNAL = UINT64_C(1) << 22, - SETTING_TIMEZONE = UINT64_C(1) << 23, - SETTING_EPHEMERAL = UINT64_C(1) << 24, - SETTING_SLICE = UINT64_C(1) << 25, - SETTING_DIRECTORY = UINT64_C(1) << 26, - SETTING_USE_CGNS = UINT64_C(1) << 27, - SETTING_CLONE_NS_FLAGS = UINT64_C(1) << 28, - SETTING_CONSOLE_MODE = UINT64_C(1) << 29, - SETTING_CREDENTIALS = UINT64_C(1) << 30, - SETTING_BIND_USER = UINT64_C(1) << 31, - SETTING_BIND_USER_SHELL = UINT64_C(1) << 32, - SETTING_SUPPRESS_SYNC = UINT64_C(1) << 33, - SETTING_RLIMIT_FIRST = UINT64_C(1) << 34, /* we define one bit per resource limit here */ - SETTING_RLIMIT_LAST = UINT64_C(1) << (34 + _RLIMIT_MAX - 1), - _SETTINGS_MASK_ALL = (UINT64_C(1) << (34 + _RLIMIT_MAX)) -1, + SETTING_START_MODE = UINT64_C(1) << 0, + SETTING_ENVIRONMENT = UINT64_C(1) << 1, + SETTING_USER = UINT64_C(1) << 2, + SETTING_CAPABILITY = UINT64_C(1) << 3, + SETTING_KILL_SIGNAL = UINT64_C(1) << 4, + SETTING_PERSONALITY = UINT64_C(1) << 5, + SETTING_MACHINE_ID = UINT64_C(1) << 6, + SETTING_NETWORK = UINT64_C(1) << 7, + SETTING_EXPOSE_PORTS = UINT64_C(1) << 8, + SETTING_READ_ONLY = UINT64_C(1) << 9, + SETTING_VOLATILE_MODE = UINT64_C(1) << 10, + SETTING_CUSTOM_MOUNTS = UINT64_C(1) << 11, + SETTING_WORKING_DIRECTORY = UINT64_C(1) << 12, + SETTING_USERNS = UINT64_C(1) << 13, + SETTING_NOTIFY_READY = UINT64_C(1) << 14, + SETTING_PIVOT_ROOT = UINT64_C(1) << 15, + SETTING_SYSCALL_FILTER = UINT64_C(1) << 16, + SETTING_HOSTNAME = UINT64_C(1) << 17, + SETTING_NO_NEW_PRIVILEGES = UINT64_C(1) << 18, + SETTING_OOM_SCORE_ADJUST = UINT64_C(1) << 19, + SETTING_CPU_AFFINITY = UINT64_C(1) << 20, + SETTING_RESOLV_CONF = UINT64_C(1) << 21, + SETTING_LINK_JOURNAL = UINT64_C(1) << 22, + SETTING_TIMEZONE = UINT64_C(1) << 23, + SETTING_EPHEMERAL = UINT64_C(1) << 24, + SETTING_SLICE = UINT64_C(1) << 25, + SETTING_DIRECTORY = UINT64_C(1) << 26, + SETTING_USE_CGNS = UINT64_C(1) << 27, + SETTING_CLONE_NS_FLAGS = UINT64_C(1) << 28, + SETTING_CONSOLE_MODE = UINT64_C(1) << 29, + SETTING_CREDENTIALS = UINT64_C(1) << 30, + SETTING_BIND_USER = UINT64_C(1) << 31, + SETTING_BIND_USER_SHELL = UINT64_C(1) << 32, + SETTING_SUPPRESS_SYNC = UINT64_C(1) << 33, + SETTING_RESTRICT_ADDRESS_FAMILIES = UINT64_C(1) << 34, + SETTING_RLIMIT_FIRST = UINT64_C(1) << 35, /* we define one bit per resource limit here */ + SETTING_RLIMIT_LAST = UINT64_C(1) << (35 + _RLIMIT_MAX - 1), + _SETTINGS_MASK_ALL = (UINT64_C(1) << (35 + _RLIMIT_MAX)) -1, _SETTING_FORCE_ENUM_WIDTH = UINT64_MAX } SettingsMask; @@ -190,6 +191,8 @@ typedef struct Settings { bool link_journal_try; TimezoneMode timezone; int suppress_sync; + Set *restrict_address_families; + bool restrict_address_families_is_allowlist; /* [Files] */ int read_only; @@ -277,6 +280,7 @@ CONFIG_PARSER_PROTOTYPE(config_parse_userns_chown); CONFIG_PARSER_PROTOTYPE(config_parse_userns_ownership); CONFIG_PARSER_PROTOTYPE(config_parse_bind_user); CONFIG_PARSER_PROTOTYPE(config_parse_bind_user_shell); +CONFIG_PARSER_PROTOTYPE(config_parse_restrict_address_families); DECLARE_STRING_TABLE_LOOKUP(resolv_conf_mode, ResolvConfMode); diff --git a/src/nspawn/nspawn.c b/src/nspawn/nspawn.c index accf448ea97..b6332844db8 100644 --- a/src/nspawn/nspawn.c +++ b/src/nspawn/nspawn.c @@ -89,6 +89,7 @@ #include "nspawn.h" #include "nsresource.h" #include "os-util.h" +#include "parse-helpers.h" #include "osc-context.h" #include "options.h" #include "pager.h" @@ -108,6 +109,7 @@ #include "runtime-scope.h" #include "seccomp-util.h" #include "selinux-util.h" +#include "set.h" #include "shift-uid.h" #include "signal-util.h" #include "siphash24.h" @@ -251,6 +253,8 @@ static char *arg_bind_user_shell = NULL; static bool arg_bind_user_shell_copy = false; static char **arg_bind_user_groups = NULL; static bool arg_suppress_sync = false; +static Set *arg_restrict_address_families = NULL; +static bool arg_restrict_address_families_is_allowlist = false; static char *arg_settings_filename = NULL; static Architecture arg_architecture = _ARCHITECTURE_INVALID; static ImagePolicy *arg_image_policy = NULL; @@ -295,6 +299,7 @@ STATIC_DESTRUCTOR_REGISTER(arg_sysctl, strv_freep); STATIC_DESTRUCTOR_REGISTER(arg_bind_user, strv_freep); STATIC_DESTRUCTOR_REGISTER(arg_bind_user_shell, freep); STATIC_DESTRUCTOR_REGISTER(arg_bind_user_groups, strv_freep); +STATIC_DESTRUCTOR_REGISTER(arg_restrict_address_families, set_freep); STATIC_DESTRUCTOR_REGISTER(arg_settings_filename, freep); STATIC_DESTRUCTOR_REGISTER(arg_image_policy, image_policy_freep); STATIC_DESTRUCTOR_REGISTER(arg_background, freep); @@ -1122,6 +1127,14 @@ static int parse_argv(int argc, char *argv[]) { break; } + OPTION_LONG("restrict-address-families", "LIST", "Restrict socket address families to the given allowlist"): + r = parse_address_families(optarg, &arg_restrict_address_families, &arg_restrict_address_families_is_allowlist); + if (r < 0) + return log_error_errno(r, "Failed to parse --restrict-address-families= argument: %s", optarg); + + arg_settings_mask |= SETTING_RESTRICT_ADDRESS_FAMILIES; + break; + OPTION('Z', "selinux-context", "SECLABEL", "Set the SELinux security context to be used by processes in the container"): arg_selinux_context = arg; @@ -3456,7 +3469,7 @@ static int inner_child( } else #endif { - r = setup_seccomp(arg_caps_retain, arg_syscall_allow_list, arg_syscall_deny_list); + r = setup_seccomp(arg_caps_retain, arg_syscall_allow_list, arg_syscall_deny_list, arg_restrict_address_families, arg_restrict_address_families_is_allowlist); if (r < 0) return r; } @@ -4944,6 +4957,12 @@ static int merge_settings(Settings *settings, const char *path) { settings->suppress_sync >= 0) arg_suppress_sync = settings->suppress_sync; + if (!FLAGS_SET(arg_settings_mask, SETTING_RESTRICT_ADDRESS_FAMILIES) && + (settings->restrict_address_families || settings->restrict_address_families_is_allowlist)) { + set_free_and_replace(arg_restrict_address_families, settings->restrict_address_families); + arg_restrict_address_families_is_allowlist = settings->restrict_address_families_is_allowlist; + } + /* The following properties can only be set through the OCI settings logic, not from the command line, hence we * don't consult arg_settings_mask for them. */ @@ -5976,6 +5995,12 @@ static int run(int argc, char *argv[]) { if (r < 0) goto finish; + if (!FLAGS_SET(arg_settings_mask, SETTING_RESTRICT_ADDRESS_FAMILIES) && !arg_restrict_address_families) + log_notice("Note: in a future version of systemd-nspawn the default set of permitted socket address" + " families will be restricted to AF_INET, AF_INET6 and AF_UNIX." + " Use --restrict-address-families= to configure the set of permitted socket address" + " families, or set RestrictAddressFamilies= in a .nspawn file."); + /* If we're not unsharing the network namespace and are unsharing the user namespace, we won't have * permissions to bind ports in the container, so let's drop the CAP_NET_BIND_SERVICE capability to * indicate that. */ diff --git a/src/shared/parse-helpers.c b/src/shared/parse-helpers.c index 8a61f2e6699..4e524bef37e 100644 --- a/src/shared/parse-helpers.c +++ b/src/shared/parse-helpers.c @@ -11,6 +11,7 @@ #include "parse-helpers.h" #include "parse-util.h" #include "path-util.h" +#include "set.h" #include "string-util.h" #include "utf8.h" @@ -86,6 +87,63 @@ int path_simplify_and_warn( return 0; } +int parse_address_families(const char *rvalue, Set **families, bool *is_allowlist) { + bool invert = false; + int r; + + assert(rvalue); + assert(families); + assert(is_allowlist); + + if (isempty(rvalue)) { + *families = set_free(*families); + *is_allowlist = false; + return 0; + } + + if (streq(rvalue, "none")) { + *families = set_free(*families); + *is_allowlist = true; + return 0; + } + + if (rvalue[0] == '~') { + invert = true; + rvalue++; + } + + if (!*families) { + *families = set_new(NULL); + if (!*families) + return -ENOMEM; + + *is_allowlist = !invert; + } + + for (const char *p = rvalue;;) { + _cleanup_free_ char *word = NULL; + + r = extract_first_word(&p, &word, NULL, EXTRACT_UNQUOTE); + if (r == 0) + return 0; + if (r < 0) + return r; + + int af = af_from_name(word); + if (af < 0) + return af; + + /* If we previously wanted to forbid an address family and now we want to allow it, then + * just remove it from the list. */ + if (!invert == *is_allowlist) { + r = set_put(*families, INT_TO_PTR(af)); + if (r < 0) + return r; + } else + set_remove(*families, INT_TO_PTR(af)); + } +} + static int parse_af_token( const char *token, int *family, diff --git a/src/shared/parse-helpers.h b/src/shared/parse-helpers.h index 402147cbf38..a906dfdaefd 100644 --- a/src/shared/parse-helpers.h +++ b/src/shared/parse-helpers.h @@ -20,6 +20,8 @@ int path_simplify_and_warn( unsigned line, const char *lvalue); +int parse_address_families(const char *rvalue, Set **families, bool *is_allowlist); + int parse_socket_bind_item( const char *str, int *address_family,