]> git.ipfire.org Git - thirdparty/systemd.git/blobdiff - src/nspawn/nspawn.c
nspawn: add new .nspawn files for container settings
[thirdparty/systemd.git] / src / nspawn / nspawn.c
index e8a023d023b6748f46ae1cc623d31eacafa5e5b3..a8afcd1466fed899f914116d9593983485b50b55 100644 (file)
 #include "seccomp-util.h"
 #endif
 
-typedef struct ExposePort {
-        int protocol;
-        uint16_t host_port;
-        uint16_t container_port;
-        LIST_FIELDS(struct ExposePort, ports);
-} ExposePort;
+#include "nspawn.h"
+#include "nspawn-settings.h"
 
 typedef enum ContainerStatus {
         CONTAINER_TERMINATED,
@@ -121,28 +117,6 @@ typedef enum LinkJournal {
         LINK_GUEST
 } LinkJournal;
 
-typedef enum Volatile {
-        VOLATILE_NO,
-        VOLATILE_YES,
-        VOLATILE_STATE,
-} Volatile;
-
-typedef enum CustomMountType {
-        CUSTOM_MOUNT_BIND,
-        CUSTOM_MOUNT_TMPFS,
-        CUSTOM_MOUNT_OVERLAY,
-} CustomMountType;
-
-typedef struct CustomMount {
-        CustomMountType type;
-        bool read_only;
-        char *source; /* for overlayfs this is the upper directory */
-        char *destination;
-        char *options;
-        char *work_dir;
-        char **lower;
-} CustomMount;
-
 static char *arg_directory = NULL;
 static char *arg_template = NULL;
 static char *arg_user = NULL;
@@ -195,15 +169,19 @@ static char **arg_network_interfaces = NULL;
 static char **arg_network_macvlan = NULL;
 static char **arg_network_ipvlan = NULL;
 static bool arg_network_veth = false;
-static const char *arg_network_bridge = NULL;
+static char *arg_network_bridge = NULL;
 static unsigned long arg_personality = PERSONALITY_INVALID;
 static char *arg_image = NULL;
-static Volatile arg_volatile = VOLATILE_NO;
+static VolatileMode arg_volatile_mode = VOLATILE_NO;
 static ExposePort *arg_expose_ports = NULL;
 static char **arg_property = NULL;
 static uid_t arg_uid_shift = UID_INVALID, arg_uid_range = 0x10000U;
 static bool arg_userns = false;
 static int arg_kill_signal = 0;
+static bool arg_unified_cgroup_hierarchy = false;
+static SettingsMask arg_settings_mask = 0;
+static int arg_settings_trusted = -1;
+static char **arg_parameters = NULL;
 
 static void help(void) {
         printf("%s [OPTIONS...] [PATH] [ARGUMENTS...]\n\n"
@@ -257,9 +235,11 @@ static void help(void) {
                "                            try-guest, try-host\n"
                "  -j                        Equivalent to --link-journal=try-guest\n"
                "     --read-only            Mount the root directory read-only\n"
-               "     --bind=PATH[:PATH]     Bind mount a file or directory from the host into\n"
+               "     --bind=PATH[:PATH[:OPTIONS]]\n"
+               "                            Bind mount a file or directory from the host into\n"
                "                            the container\n"
-               "     --bind-ro=PATH[:PATH]  Similar, but creates a read-only bind mount\n"
+               "     --bind-ro=PATH[:PATH[:OPTIONS]\n"
+               "                            Similar, but creates a read-only bind mount\n"
                "     --tmpfs=PATH:[OPTIONS] Mount an empty tmpfs to the specified directory\n"
                "     --overlay=PATH[:PATH...]:PATH\n"
                "                            Create an overlay mount from the host to \n"
@@ -272,30 +252,36 @@ static void help(void) {
                "     --keep-unit            Do not register a scope for the machine, reuse\n"
                "                            the service unit nspawn is running in\n"
                "     --volatile[=MODE]      Run the system in volatile mode\n"
+               "     --settings=BOOLEAN     Load additional settings from .nspawn file\n"
                , program_invocation_short_name);
 }
 
-static CustomMount* custom_mount_add(CustomMountType t) {
+static CustomMount* custom_mount_add(CustomMount **l, unsigned *n, CustomMountType t) {
         CustomMount *c, *ret;
 
-        c = realloc(arg_custom_mounts, (arg_n_custom_mounts + 1) * sizeof(CustomMount));
+        assert(l);
+        assert(n);
+        assert(t >= 0);
+        assert(t < _CUSTOM_MOUNT_TYPE_MAX);
+
+        c = realloc(*l, (*n + 1) * sizeof(CustomMount));
         if (!c)
                 return NULL;
 
-        arg_custom_mounts = c;
-        ret = arg_custom_mounts + arg_n_custom_mounts;
-        arg_n_custom_mounts++;
+        *l = c;
+        ret = *l + *n;
+        (*n)++;
 
         *ret = (CustomMount) { .type = t };
 
         return ret;
 }
 
-static void custom_mount_free_all(void) {
+void custom_mount_free_all(CustomMount *l, unsigned n) {
         unsigned i;
 
-        for (i = 0; i < arg_n_custom_mounts; i++) {
-                CustomMount *m = &arg_custom_mounts[i];
+        for (i = 0; i < n; i++) {
+                CustomMount *m = l + i;
 
                 free(m->source);
                 free(m->destination);
@@ -309,8 +295,7 @@ static void custom_mount_free_all(void) {
                 strv_free(m->lower);
         }
 
-        arg_custom_mounts = mfree(arg_custom_mounts);
-        arg_n_custom_mounts = 0;
+        free(l);
 }
 
 static int custom_mount_compare(const void *a, const void *b) {
@@ -383,6 +368,185 @@ static int set_sanitized_path(char **b, const char *path) {
         return 0;
 }
 
+static int detect_unified_cgroup_hierarchy(void) {
+        const char *e;
+        int r;
+
+        /* Allow the user to control whether the unified hierarchy is used */
+        e = getenv("UNIFIED_CGROUP_HIERARCHY");
+        if (e) {
+                r = parse_boolean(e);
+                if (r < 0)
+                        return log_error_errno(r, "Failed to parse $UNIFIED_CGROUP_HIERARCHY.");
+
+                arg_unified_cgroup_hierarchy = r;
+                return 0;
+        }
+
+        /* Otherwise inherit the default from the host system */
+        r = cg_unified();
+        if (r < 0)
+                return log_error_errno(r, "Failed to determine whether the unified cgroups hierarchy is used: %m");
+
+        arg_unified_cgroup_hierarchy = r;
+        return 0;
+}
+
+VolatileMode volatile_mode_from_string(const char *s) {
+        int b;
+
+        if (isempty(s))
+                return _VOLATILE_MODE_INVALID;
+
+        b = parse_boolean(s);
+        if (b > 0)
+                return VOLATILE_YES;
+        if (b == 0)
+                return VOLATILE_NO;
+
+        if (streq(s, "state"))
+                return VOLATILE_STATE;
+
+        return _VOLATILE_MODE_INVALID;
+}
+
+int expose_port_parse(ExposePort **l, const char *s) {
+
+        const char *split, *e;
+        uint16_t container_port, host_port;
+        int protocol;
+        ExposePort *p;
+        int r;
+
+        if ((e = startswith(s, "tcp:")))
+                protocol = IPPROTO_TCP;
+        else if ((e = startswith(s, "udp:")))
+                protocol = IPPROTO_UDP;
+        else {
+                e = s;
+                protocol = IPPROTO_TCP;
+        }
+
+        split = strchr(e, ':');
+        if (split) {
+                char v[split - e + 1];
+
+                memcpy(v, e, split - e);
+                v[split - e] = 0;
+
+                r = safe_atou16(v, &host_port);
+                if (r < 0 || host_port <= 0)
+                        return -EINVAL;
+
+                r = safe_atou16(split + 1, &container_port);
+        } else {
+                r = safe_atou16(e, &container_port);
+                host_port = container_port;
+        }
+
+        if (r < 0 || container_port <= 0)
+                return -EINVAL;
+
+        LIST_FOREACH(ports, p, arg_expose_ports)
+                if (p->protocol == protocol && p->host_port == host_port)
+                        return -EEXIST;
+
+        p = new(ExposePort, 1);
+        if (!p)
+                return -ENOMEM;
+
+        p->protocol = protocol;
+        p->host_port = host_port;
+        p->container_port = container_port;
+
+        LIST_PREPEND(ports, *l, p);
+
+        return 0;
+}
+
+int bind_mount_parse(CustomMount **l, unsigned *n, const char *s, bool read_only) {
+        _cleanup_free_ char *source = NULL, *destination = NULL, *opts = NULL;
+        const char *p = s;
+        CustomMount *m;
+        int r;
+
+        assert(l);
+        assert(n);
+
+        r = extract_many_words(&p, ":", EXTRACT_DONT_COALESCE_SEPARATORS, &source, &destination, NULL);
+        if (r < 0)
+                return r;
+        if (r == 0)
+                return -EINVAL;
+
+        if (r == 1) {
+                destination = strdup(source);
+                if (!destination)
+                        return -ENOMEM;
+        }
+
+        if (r == 2 && !isempty(p)) {
+                opts = strdup(p);
+                if (!opts)
+                        return -ENOMEM;
+        }
+
+        if (!path_is_absolute(source))
+                return -EINVAL;
+
+        if (!path_is_absolute(destination))
+                return -EINVAL;
+
+        m = custom_mount_add(l, n, CUSTOM_MOUNT_BIND);
+        if (!m)
+                return log_oom();
+
+        m->source = source;
+        m->destination = destination;
+        m->read_only = read_only;
+        m->options = opts;
+
+        source = destination = opts = NULL;
+        return 0;
+}
+
+int tmpfs_mount_parse(CustomMount **l, unsigned *n, const char *s) {
+        _cleanup_free_ char *path = NULL, *opts = NULL;
+        const char *p = s;
+        CustomMount *m;
+        int r;
+
+        assert(l);
+        assert(n);
+        assert(s);
+
+        r = extract_first_word(&p, &path, ":", EXTRACT_DONT_COALESCE_SEPARATORS);
+        if (r < 0)
+                return r;
+        if (r == 0)
+                return -EINVAL;
+
+        if (isempty(p))
+                opts = strdup("mode=0755");
+        else
+                opts = strdup(p);
+        if (!opts)
+                return -ENOMEM;
+
+        if (!path_is_absolute(path))
+                return -EINVAL;
+
+        m = custom_mount_add(l, n, CUSTOM_MOUNT_TMPFS);
+        if (!m)
+                return -ENOMEM;
+
+        m->destination = path;
+        m->options = opts;
+
+        path = opts = NULL;
+        return 0;
+}
+
 static int parse_argv(int argc, char *argv[]) {
 
         enum {
@@ -412,6 +576,7 @@ static int parse_argv(int argc, char *argv[]) {
                 ARG_PROPERTY,
                 ARG_PRIVATE_USERS,
                 ARG_KILL_SIGNAL,
+                ARG_SETTINGS,
         };
 
         static const struct option options[] = {
@@ -454,11 +619,13 @@ static int parse_argv(int argc, char *argv[]) {
                 { "property",              required_argument, NULL, ARG_PROPERTY          },
                 { "private-users",         optional_argument, NULL, ARG_PRIVATE_USERS     },
                 { "kill-signal",           required_argument, NULL, ARG_KILL_SIGNAL       },
+                { "settings",              required_argument, NULL, ARG_SETTINGS          },
                 {}
         };
 
         int c, r;
         uint64_t plus = 0, minus = 0;
+        bool mask_all_settings = false, mask_no_settings = false;
 
         assert(argc >= 0);
         assert(argv);
@@ -506,16 +673,20 @@ static int parse_argv(int argc, char *argv[]) {
                         if (r < 0)
                                 return log_oom();
 
+                        arg_settings_mask |= SETTING_USER;
                         break;
 
                 case ARG_NETWORK_BRIDGE:
-                        arg_network_bridge = optarg;
+                        r = free_and_strdup(&arg_network_bridge, optarg);
+                        if (r < 0)
+                                return log_oom();
 
                         /* fall through */
 
                 case 'n':
                         arg_network_veth = true;
                         arg_private_network = true;
+                        arg_settings_mask |= SETTING_NETWORK;
                         break;
 
                 case ARG_NETWORK_INTERFACE:
@@ -523,6 +694,7 @@ static int parse_argv(int argc, char *argv[]) {
                                 return log_oom();
 
                         arg_private_network = true;
+                        arg_settings_mask |= SETTING_NETWORK;
                         break;
 
                 case ARG_NETWORK_MACVLAN:
@@ -530,6 +702,7 @@ static int parse_argv(int argc, char *argv[]) {
                                 return log_oom();
 
                         arg_private_network = true;
+                        arg_settings_mask |= SETTING_NETWORK;
                         break;
 
                 case ARG_NETWORK_IPVLAN:
@@ -540,10 +713,12 @@ static int parse_argv(int argc, char *argv[]) {
 
                 case ARG_PRIVATE_NETWORK:
                         arg_private_network = true;
+                        arg_settings_mask |= SETTING_NETWORK;
                         break;
 
                 case 'b':
                         arg_boot = true;
+                        arg_settings_mask |= SETTING_BOOT;
                         break;
 
                 case ARG_UUID:
@@ -552,6 +727,8 @@ static int parse_argv(int argc, char *argv[]) {
                                 log_error("Invalid UUID: %s", optarg);
                                 return r;
                         }
+
+                        arg_settings_mask |= SETTING_MACHINE_ID;
                         break;
 
                 case 'S':
@@ -559,9 +736,9 @@ static int parse_argv(int argc, char *argv[]) {
                         break;
 
                 case 'M':
-                        if (isempty(optarg)) {
+                        if (isempty(optarg))
                                 arg_machine = mfree(arg_machine);
-                        else {
+                        else {
                                 if (!machine_name_is_valid(optarg)) {
                                         log_error("Invalid machine name: %s", optarg);
                                         return -EINVAL;
@@ -584,6 +761,7 @@ static int parse_argv(int argc, char *argv[]) {
 
                 case ARG_READ_ONLY:
                         arg_read_only = true;
+                        arg_settings_mask |= SETTING_READ_ONLY;
                         break;
 
                 case ARG_CAPABILITY:
@@ -619,6 +797,7 @@ static int parse_argv(int argc, char *argv[]) {
                                 }
                         }
 
+                        arg_settings_mask |= SETTING_CAPABILITY;
                         break;
                 }
 
@@ -654,81 +833,21 @@ static int parse_argv(int argc, char *argv[]) {
                         break;
 
                 case ARG_BIND:
-                case ARG_BIND_RO: {
-                        const char *current = optarg;
-                        _cleanup_free_ char *source = NULL, *destination = NULL;
-                        CustomMount *m;
-
-                        r = extract_many_words(&current, ":", EXTRACT_DONT_COALESCE_SEPARATORS, &source, &destination, NULL);
-                        switch (r) {
-                        case 1:
-                                destination = strdup(source);
-                        case 2:
-                                break;
-                        case -ENOMEM:
-                                return log_oom();
-                        default:
-                                log_error("Invalid bind mount specification: %s", optarg);
-                                return -EINVAL;
-                        }
-
-                        if (!source || !destination)
-                                return log_oom();
-
-                        if (!path_is_absolute(source) || !path_is_absolute(destination)) {
-                                log_error("Invalid bind mount specification: %s", optarg);
-                                return -EINVAL;
-                        }
-
-                        m = custom_mount_add(CUSTOM_MOUNT_BIND);
-                        if (!m)
-                                return log_oom();
-
-                        m->source = source;
-                        m->destination = destination;
-                        m->read_only = c == ARG_BIND_RO;
-
-                        source = destination = NULL;
+                case ARG_BIND_RO:
+                        r = bind_mount_parse(&arg_custom_mounts, &arg_n_custom_mounts, optarg, c == ARG_BIND_RO);
+                        if (r < 0)
+                                return log_error_errno(r, "Failed to parse --bind(-ro)= argument %s: %m", optarg);
 
+                        arg_settings_mask |= SETTING_CUSTOM_MOUNTS;
                         break;
-                }
-
-                case ARG_TMPFS: {
-                        const char *current = optarg;
-                        _cleanup_free_ char *path = NULL, *opts = NULL;
-                        CustomMount *m;
-
-                        r = extract_first_word(&current, &path, ":", EXTRACT_DONT_COALESCE_SEPARATORS);
-                        if (r == -ENOMEM)
-                                return log_oom();
-                        else if (r < 0) {
-                                log_error("Invalid tmpfs specification: %s", optarg);
-                                return r;
-                        }
-                        if (r)
-                                opts = strdup(current);
-                        else
-                                opts = strdup("mode=0755");
-
-                        if (!path || !opts)
-                                return log_oom();
-
-                        if (!path_is_absolute(path)) {
-                                log_error("Invalid tmpfs specification: %s", optarg);
-                                return -EINVAL;
-                        }
-
-                        m = custom_mount_add(CUSTOM_MOUNT_TMPFS);
-                        if (!m)
-                                return log_oom();
 
-                        m->destination = path;
-                        m->options = opts;
-
-                        path = opts = NULL;
+                case ARG_TMPFS:
+                        r = tmpfs_mount_parse(&arg_custom_mounts, &arg_n_custom_mounts, optarg);
+                        if (r < 0)
+                                return log_error_errno(r, "Failed to parse --tmpfs= argument %s: %m", optarg);
 
+                        arg_settings_mask |= SETTING_CUSTOM_MOUNTS;
                         break;
-                }
 
                 case ARG_OVERLAY:
                 case ARG_OVERLAY_RO: {
@@ -779,7 +898,7 @@ static int parse_argv(int argc, char *argv[]) {
                                 lower[n - 2] = NULL;
                         }
 
-                        m = custom_mount_add(CUSTOM_MOUNT_OVERLAY);
+                        m = custom_mount_add(&arg_custom_mounts, &arg_n_custom_mounts, CUSTOM_MOUNT_OVERLAY);
                         if (!m)
                                 return log_oom();
 
@@ -791,6 +910,7 @@ static int parse_argv(int argc, char *argv[]) {
                         upper = destination = NULL;
                         lower = NULL;
 
+                        arg_settings_mask |= SETTING_CUSTOM_MOUNTS;
                         break;
                 }
 
@@ -808,6 +928,8 @@ static int parse_argv(int argc, char *argv[]) {
 
                         strv_free(arg_setenv);
                         arg_setenv = n;
+
+                        arg_settings_mask |= SETTING_ENVIRONMENT;
                         break;
                 }
 
@@ -841,85 +963,36 @@ static int parse_argv(int argc, char *argv[]) {
                                 return -EINVAL;
                         }
 
+                        arg_settings_mask |= SETTING_PERSONALITY;
                         break;
 
                 case ARG_VOLATILE:
 
                         if (!optarg)
-                                arg_volatile = VOLATILE_YES;
-                        else {
-                                r = parse_boolean(optarg);
-                                if (r < 0) {
-                                        if (streq(optarg, "state"))
-                                                arg_volatile = VOLATILE_STATE;
-                                        else {
-                                                log_error("Failed to parse --volatile= argument: %s", optarg);
-                                                return r;
-                                        }
-                                } else
-                                        arg_volatile = r ? VOLATILE_YES : VOLATILE_NO;
-                        }
-
-                        break;
-
-                case 'p': {
-                        const char *split, *e;
-                        uint16_t container_port, host_port;
-                        int protocol;
-                        ExposePort *p;
-
-                        if ((e = startswith(optarg, "tcp:")))
-                                protocol = IPPROTO_TCP;
-                        else if ((e = startswith(optarg, "udp:")))
-                                protocol = IPPROTO_UDP;
+                                arg_volatile_mode = VOLATILE_YES;
                         else {
-                                e = optarg;
-                                protocol = IPPROTO_TCP;
-                        }
-
-                        split = strchr(e, ':');
-                        if (split) {
-                                char v[split - e + 1];
-
-                                memcpy(v, e, split - e);
-                                v[split - e] = 0;
-
-                                r = safe_atou16(v, &host_port);
-                                if (r < 0 || host_port <= 0) {
-                                        log_error("Failed to parse host port: %s", optarg);
-                                        return -EINVAL;
-                                }
-
-                                r = safe_atou16(split + 1, &container_port);
-                        } else {
-                                r = safe_atou16(e, &container_port);
-                                host_port = container_port;
-                        }
-
-                        if (r < 0 || container_port <= 0) {
-                                log_error("Failed to parse host port: %s", optarg);
-                                return -EINVAL;
-                        }
+                                VolatileMode m;
 
-                        LIST_FOREACH(ports, p, arg_expose_ports) {
-                                if (p->protocol == protocol && p->host_port == host_port) {
-                                        log_error("Duplicate port specification: %s", optarg);
+                                m = volatile_mode_from_string(optarg);
+                                if (m < 0) {
+                                        log_error("Failed to parse --volatile= argument: %s", optarg);
                                         return -EINVAL;
-                                }
+                                } else
+                                        arg_volatile_mode = m;
                         }
 
-                        p = new(ExposePort, 1);
-                        if (!p)
-                                return log_oom();
-
-                        p->protocol = protocol;
-                        p->host_port = host_port;
-                        p->container_port = container_port;
+                        arg_settings_mask |= SETTING_VOLATILE_MODE;
+                        break;
 
-                        LIST_PREPEND(ports, arg_expose_ports, p);
+                case 'p':
+                        r = expose_port_parse(&arg_expose_ports, optarg);
+                        if (r == -EEXIST)
+                                return log_error_errno(r, "Duplicate port specification: %s", optarg);
+                        if (r < 0)
+                                return log_error_errno(r, "Failed to parse host port %s: %m", optarg);
 
+                        arg_settings_mask |= SETTING_EXPOSE_PORTS;
                         break;
-                }
 
                 case ARG_PROPERTY:
                         if (strv_extend(&arg_property, optarg) < 0)
@@ -963,6 +1036,42 @@ static int parse_argv(int argc, char *argv[]) {
                                 return -EINVAL;
                         }
 
+                        arg_settings_mask |= SETTING_KILL_SIGNAL;
+                        break;
+
+                case ARG_SETTINGS:
+
+                        /* no               â†’ do not read files
+                         * yes              â†’ read files, do not override cmdline, trust only subset
+                         * override         â†’ read files, override cmdline, trust only subset
+                         * trusted          â†’ read files, do not override cmdline, trust all
+                         */
+
+                        r = parse_boolean(optarg);
+                        if (r < 0) {
+                                if (streq(optarg, "trusted")) {
+                                        mask_all_settings = false;
+                                        mask_no_settings = false;
+                                        arg_settings_trusted = true;
+
+                                } else if (streq(optarg, "override")) {
+                                        mask_all_settings = false;
+                                        mask_no_settings = true;
+                                        arg_settings_trusted = -1;
+                                } else
+                                        return log_error_errno(r, "Failed to parse --settings= argument: %s", optarg);
+                        } else if (r > 0) {
+                                /* yes */
+                                mask_all_settings = false;
+                                mask_no_settings = false;
+                                arg_settings_trusted = -1;
+                        } else {
+                                /* no */
+                                mask_all_settings = true;
+                                mask_no_settings = false;
+                                arg_settings_trusted = false;
+                        }
+
                         break;
 
                 case '?':
@@ -1015,7 +1124,37 @@ static int parse_argv(int argc, char *argv[]) {
                 return -EINVAL;
         }
 
-        if (arg_volatile != VOLATILE_NO && arg_read_only) {
+        if (arg_userns && access("/proc/self/uid_map", F_OK) < 0)
+                return log_error_errno(EOPNOTSUPP, "--private-users= is not supported, kernel compiled without user namespace support.");
+
+        if (argc > optind) {
+                arg_parameters = strv_copy(argv + optind);
+                if (!arg_parameters)
+                        return log_oom();
+
+                arg_settings_mask |= SETTING_BOOT;
+        }
+
+        /* Load all settings from .nspawn files */
+        if (mask_no_settings)
+                arg_settings_mask = 0;
+
+        /* Don't load any settings from .nspawn files */
+        if (mask_all_settings)
+                arg_settings_mask = _SETTINGS_MASK_ALL;
+
+        arg_retain = (arg_retain | plus | (arg_private_network ? 1ULL << CAP_NET_ADMIN : 0)) & ~minus;
+
+        r = detect_unified_cgroup_hierarchy();
+        if (r < 0)
+                return r;
+
+        return 1;
+}
+
+static int verify_arguments(void) {
+
+        if (arg_volatile_mode != VOLATILE_NO && arg_read_only) {
                 log_error("Cannot combine --read-only with --volatile. Note that --volatile already implies a read-only base hierarchy.");
                 return -EINVAL;
         }
@@ -1025,15 +1164,10 @@ static int parse_argv(int argc, char *argv[]) {
                 return -EINVAL;
         }
 
-        if (arg_userns && access("/proc/self/uid_map", F_OK) < 0)
-                return log_error_errno(EOPNOTSUPP, "--private-users= is not supported, kernel compiled without user namespace support.");
-
-        arg_retain = (arg_retain | plus | (arg_private_network ? 1ULL << CAP_NET_ADMIN : 0)) & ~minus;
-
         if (arg_boot && arg_kill_signal <= 0)
                 arg_kill_signal = SIGRTMIN+3;
 
-        return 1;
+        return 0;
 }
 
 static int tmpfs_patch_options(const char *options, char **ret) {
@@ -1091,7 +1225,6 @@ static int mount_all(const char *dest, bool userns) {
                 { "/proc/sys", "/proc/sys",      NULL,     NULL,        MS_BIND,                                                   true,  true  },   /* Bind mount first */
                 { NULL,        "/proc/sys",      NULL,     NULL,        MS_BIND|MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_REMOUNT, true,  true  },   /* Then, make it r/o */
                 { "sysfs",     "/sys",           "sysfs",  NULL,        MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV,                    true,  false },
-                { "tmpfs",     "/sys/fs/cgroup", "tmpfs",  "mode=755",  MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME,               true,  false },
                 { "tmpfs",     "/dev",           "tmpfs",  "mode=755",  MS_NOSUID|MS_STRICTATIME,                                  true,  false },
                 { "tmpfs",     "/dev/shm",       "tmpfs",  "mode=1777", MS_NOSUID|MS_NODEV|MS_STRICTATIME,                         true,  false },
                 { "tmpfs",     "/run",           "tmpfs",  "mode=755",  MS_NOSUID|MS_NODEV|MS_STRICTATIME,                         true,  false },
@@ -1158,13 +1291,53 @@ static int mount_all(const char *dest, bool userns) {
         return 0;
 }
 
+static int parse_mount_bind_options(const char *options, unsigned long *mount_flags, char **mount_opts) {
+        const char *p = options;
+        unsigned long flags = *mount_flags;
+        char *opts = NULL;
+
+        assert(options);
+
+        for (;;) {
+                _cleanup_free_ char *word = NULL;
+                int r = extract_first_word(&p, &word, ",", 0);
+                if (r < 0)
+                        return log_error_errno(r, "Failed to extract mount option: %m");
+                if (r == 0)
+                        break;
+
+                if (streq(word, "rbind"))
+                        flags |= MS_REC;
+                else if (streq(word, "norbind"))
+                        flags &= ~MS_REC;
+                else {
+                        log_error("Invalid bind mount option: %s", word);
+                        return -EINVAL;
+                }
+        }
+
+        *mount_flags = flags;
+        /* in the future mount_opts will hold string options for mount(2) */
+        *mount_opts = opts;
+
+        return 0;
+}
+
 static int mount_bind(const char *dest, CustomMount *m) {
         struct stat source_st, dest_st;
         const char *where;
+        unsigned long mount_flags = MS_BIND | MS_REC;
+        _cleanup_free_ char *mount_opts = NULL;
         int r;
 
         assert(m);
 
+        if (m->options) {
+                r = parse_mount_bind_options(m->options, &mount_flags, &mount_opts);
+                if (r < 0)
+                        return r;
+        }
+
         if (stat(m->source, &source_st) < 0)
                 return log_error_errno(errno, "Failed to stat %s: %m", m->source);
 
@@ -1201,7 +1374,7 @@ static int mount_bind(const char *dest, CustomMount *m) {
         if (r < 0 && r != -EEXIST)
                 return log_error_errno(r, "Failed to create mount point %s: %m", where);
 
-        if (mount(m->source, where, NULL, MS_BIND, NULL) < 0)
+        if (mount(m->source, where, NULL, mount_flags, mount_opts) < 0)
                 return log_error_errno(errno, "mount(%s) failed: %m", where);
 
         if (m->read_only) {
@@ -1337,7 +1510,7 @@ static int mount_custom(const char *dest) {
         return 0;
 }
 
-static int mount_cgroup_hierarchy(const char *dest, const char *controller, const char *hierarchy, bool read_only) {
+static int mount_legacy_cgroup_hierarchy(const char *dest, const char *controller, const char *hierarchy, bool read_only) {
         char *to;
         int r;
 
@@ -1365,11 +1538,31 @@ static int mount_cgroup_hierarchy(const char *dest, const char *controller, cons
         return 1;
 }
 
-static int mount_cgroup(const char *dest) {
+static int mount_legacy_cgroups(const char *dest) {
         _cleanup_set_free_free_ Set *controllers = NULL;
         const char *cgroup_root;
         int r;
 
+        cgroup_root = prefix_roota(dest, "/sys/fs/cgroup");
+
+        /* Mount a tmpfs to /sys/fs/cgroup if it's not mounted there yet. */
+        r = path_is_mount_point(cgroup_root, AT_SYMLINK_FOLLOW);
+        if (r < 0)
+                return log_error_errno(r, "Failed to determine if /sys/fs/cgroup is already mounted: %m");
+        if (r == 0) {
+                _cleanup_free_ char *options = NULL;
+
+                r = tmpfs_patch_options("mode=755", &options);
+                if (r < 0)
+                        return log_oom();
+
+                if (mount("tmpfs", cgroup_root, "tmpfs", MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME, options) < 0)
+                        return log_error_errno(errno, "Failed to mount /sys/fs/cgroup: %m");
+        }
+
+        if (cg_unified() > 0)
+                goto skip_controllers;
+
         controllers = set_new(&string_hash_ops);
         if (!controllers)
                 return log_oom();
@@ -1393,7 +1586,7 @@ static int mount_cgroup(const char *dest) {
                 if (r == -EINVAL) {
                         /* Not a symbolic link, but directly a single cgroup hierarchy */
 
-                        r = mount_cgroup_hierarchy(dest, controller, controller, true);
+                        r = mount_legacy_cgroup_hierarchy(dest, controller, controller, true);
                         if (r < 0)
                                 return r;
 
@@ -1413,7 +1606,7 @@ static int mount_cgroup(const char *dest) {
                                 continue;
                         }
 
-                        r = mount_cgroup_hierarchy(dest, combined, combined, true);
+                        r = mount_legacy_cgroup_hierarchy(dest, combined, combined, true);
                         if (r < 0)
                                 return r;
 
@@ -1427,17 +1620,52 @@ static int mount_cgroup(const char *dest) {
                 }
         }
 
-        r = mount_cgroup_hierarchy(dest, "name=systemd,xattr", "systemd", false);
+skip_controllers:
+        r = mount_legacy_cgroup_hierarchy(dest, "none,name=systemd,xattr", "systemd", false);
         if (r < 0)
                 return r;
 
-        cgroup_root = prefix_roota(dest, "/sys/fs/cgroup");
         if (mount(NULL, cgroup_root, NULL, MS_REMOUNT|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME|MS_RDONLY, "mode=755") < 0)
                 return log_error_errno(errno, "Failed to remount %s read-only: %m", cgroup_root);
 
         return 0;
 }
 
+static int mount_unified_cgroups(const char *dest) {
+        const char *p;
+        int r;
+
+        assert(dest);
+
+        p = strjoina(dest, "/sys/fs/cgroup");
+
+        r = path_is_mount_point(p, AT_SYMLINK_FOLLOW);
+        if (r < 0)
+                return log_error_errno(r, "Failed to determine if %s is mounted already: %m", p);
+        if (r > 0) {
+                p = strjoina(dest, "/sys/fs/cgroup/cgroup.procs");
+                if (access(p, F_OK) >= 0)
+                        return 0;
+                if (errno != ENOENT)
+                        return log_error_errno(errno, "Failed to determine if mount point %s contains the unified cgroup hierarchy: %m", p);
+
+                log_error("%s is already mounted but not a unified cgroup hierarchy. Refusing.", p);
+                return -EINVAL;
+        }
+
+        if (mount("cgroup", p, "cgroup", MS_NOSUID|MS_NOEXEC|MS_NODEV, "__DEVEL__sane_behavior") < 0)
+                return log_error_errno(errno, "Failed to mount unified cgroup hierarchy to %s: %m", p);
+
+        return 0;
+}
+
+static int mount_cgroups(const char *dest) {
+        if (arg_unified_cgroup_hierarchy)
+                return mount_unified_cgroups(dest);
+        else
+                return mount_legacy_cgroups(dest);
+}
+
 static int mount_systemd_cgroup_writable(const char *dest) {
         _cleanup_free_ char *own_cgroup_path = NULL;
         const char *systemd_root, *systemd_own;
@@ -1449,13 +1677,23 @@ static int mount_systemd_cgroup_writable(const char *dest) {
         if (r < 0)
                 return log_error_errno(r, "Failed to determine our own cgroup path: %m");
 
+        /* If we are living in the top-level, then there's nothing to do... */
+        if (path_equal(own_cgroup_path, "/"))
+                return 0;
+
+        if (arg_unified_cgroup_hierarchy) {
+                systemd_own = strjoina(dest, "/sys/fs/cgroup", own_cgroup_path);
+                systemd_root = prefix_roota(dest, "/sys/fs/cgroup");
+        } else {
+                systemd_own = strjoina(dest, "/sys/fs/cgroup/systemd", own_cgroup_path);
+                systemd_root = prefix_roota(dest, "/sys/fs/cgroup/systemd");
+        }
+
         /* Make our own cgroup a (writable) bind mount */
-        systemd_own = strjoina(dest, "/sys/fs/cgroup/systemd", own_cgroup_path);
         if (mount(systemd_own, systemd_own,  NULL, MS_BIND, NULL) < 0)
                 return log_error_errno(errno, "Failed to turn %s into a bind mount: %m", own_cgroup_path);
 
         /* And then remount the systemd cgroup root read-only */
-        systemd_root = prefix_roota(dest, "/sys/fs/cgroup/systemd");
         if (mount(NULL, systemd_root, NULL, MS_BIND|MS_REMOUNT|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_RDONLY, NULL) < 0)
                 return log_error_errno(errno, "Failed to mount cgroup root read-only: %m");
 
@@ -1606,7 +1844,7 @@ static int setup_volatile_state(const char *directory) {
 
         assert(directory);
 
-        if (arg_volatile != VOLATILE_STATE)
+        if (arg_volatile_mode != VOLATILE_STATE)
                 return 0;
 
         /* --volatile=state means we simply overmount /var
@@ -1643,7 +1881,7 @@ static int setup_volatile(const char *directory) {
 
         assert(directory);
 
-        if (arg_volatile != VOLATILE_YES)
+        if (arg_volatile_mode != VOLATILE_YES)
                 return 0;
 
         /* --volatile=yes means we mount a tmpfs to the root dir, and
@@ -2071,6 +2309,15 @@ static int expose_ports(sd_netlink *rtnl, union in_addr_union *exposed) {
         return 0;
 }
 
+void expose_port_free_all(ExposePort *p) {
+
+        while (p) {
+                ExposePort *q = p;
+                LIST_REMOVE(ports, p, q);
+                free(q);
+        }
+}
+
 static int on_address_change(sd_netlink *rtnl, sd_netlink_message *m, void *userdata) {
         union in_addr_union *exposed = userdata;
 
@@ -4002,6 +4249,17 @@ static int on_orderly_shutdown(sd_event_source *s, const struct signalfd_siginfo
 static int determine_names(void) {
         int r;
 
+        if (arg_template && !arg_directory && arg_machine) {
+
+                /* If --template= was specified then we should not
+                 * search for a machine, but instead create a new one
+                 * in /var/lib/machine. */
+
+                arg_directory = strjoin("/var/lib/machines/", arg_machine, NULL);
+                if (!arg_directory)
+                        return log_oom();
+        }
+
         if (!arg_image && !arg_directory) {
                 if (arg_machine) {
                         _cleanup_(image_unrefp) Image *i = NULL;
@@ -4106,9 +4364,7 @@ static int inner_child(
                 bool secondary,
                 int kmsg_socket,
                 int rtnl_socket,
-                FDSet *fds,
-                int argc,
-                char *argv[]) {
+                FDSet *fds) {
 
         _cleanup_free_ char *home = NULL;
         unsigned n_env = 2;
@@ -4132,6 +4388,8 @@ static int inner_child(
         assert(directory);
         assert(kmsg_socket >= 0);
 
+        cg_unified_flush();
+
         if (arg_userns) {
                 /* Tell the parent, that it now can write the UID map. */
                 (void) barrier_place(barrier); /* #1 */
@@ -4262,9 +4520,12 @@ static int inner_child(
 
                 /* Automatically search for the init system */
 
-                m = 1 + argc - optind;
+                m = 1 + strv_length(arg_parameters);
                 a = newa(char*, m + 1);
-                memcpy(a + 1, argv + optind, m * sizeof(char*));
+                if (strv_isempty(arg_parameters))
+                        a[1] = NULL;
+                else
+                        memcpy(a + 1, arg_parameters, m * sizeof(char*));
 
                 a[0] = (char*) "/usr/lib/systemd/systemd";
                 execve(a[0], a, env_use);
@@ -4274,10 +4535,10 @@ static int inner_child(
 
                 a[0] = (char*) "/sbin/init";
                 execve(a[0], a, env_use);
-        } else if (argc > optind)
-                execvpe(argv[optind], argv + optind, env_use);
+        } else if (!strv_isempty(arg_parameters))
+                execvpe(arg_parameters[0], arg_parameters, env_use);
         else {
-                chdir(home ? home : "/root");
+                chdir(home ?: "/root");
                 execle("/bin/bash", "-bash", NULL, env_use);
                 execle("/bin/sh", "-sh", NULL, env_use);
         }
@@ -4299,9 +4560,7 @@ static int outer_child(
                 int kmsg_socket,
                 int rtnl_socket,
                 int uid_shift_socket,
-                FDSet *fds,
-                int argc,
-                char *argv[]) {
+                FDSet *fds) {
 
         pid_t pid;
         ssize_t l;
@@ -4313,6 +4572,8 @@ static int outer_child(
         assert(pid_socket >= 0);
         assert(kmsg_socket >= 0);
 
+        cg_unified_flush();
+
         if (prctl(PR_SET_PDEATHSIG, SIGKILL) < 0)
                 return log_error_errno(errno, "PR_SET_PDEATHSIG failed: %m");
 
@@ -4429,7 +4690,7 @@ static int outer_child(
         if (r < 0)
                 return r;
 
-        r = mount_cgroup(directory);
+        r = mount_cgroups(directory);
         if (r < 0)
                 return r;
 
@@ -4444,7 +4705,6 @@ static int outer_child(
                         NULL);
         if (pid < 0)
                 return log_error_errno(errno, "Failed to fork inner child: %m");
-
         if (pid == 0) {
                 pid_socket = safe_close(pid_socket);
                 uid_shift_socket = safe_close(uid_shift_socket);
@@ -4453,7 +4713,7 @@ static int outer_child(
                  * requested, so that we all are owned by the user if
                  * user namespaces are turned on. */
 
-                r = inner_child(barrier, directory, secondary, kmsg_socket, rtnl_socket, fds, argc, argv);
+                r = inner_child(barrier, directory, secondary, kmsg_socket, rtnl_socket, fds);
                 if (r < 0)
                         _exit(EXIT_FAILURE);
 
@@ -4512,9 +4772,315 @@ static int chown_cgroup(pid_t pid) {
         if (fd < 0)
                 return log_error_errno(errno, "Failed to open %s: %m", fs);
 
-        FOREACH_STRING(fn, ".", "tasks", "notify_on_release", "cgroup.procs", "cgroup.clone_children")
+        FOREACH_STRING(fn,
+                       ".",
+                       "tasks",
+                       "notify_on_release",
+                       "cgroup.procs",
+                       "cgroup.clone_children",
+                       "cgroup.controllers",
+                       "cgroup.subtree_control",
+                       "cgroup.populated")
                 if (fchownat(fd, fn, arg_uid_shift, arg_uid_shift, 0) < 0)
-                        log_warning_errno(errno, "Failed to chown() cgroup file %s, ignoring: %m", fn);
+                        log_full_errno(errno == ENOENT ? LOG_DEBUG :  LOG_WARNING, errno,
+                                       "Failed to chown() cgroup file %s, ignoring: %m", fn);
+
+        return 0;
+}
+
+static int sync_cgroup(pid_t pid) {
+        _cleanup_free_ char *cgroup = NULL;
+        char tree[] = "/tmp/unifiedXXXXXX", pid_string[DECIMAL_STR_MAX(pid) + 1];
+        bool undo_mount = false;
+        const char *fn;
+        int unified, r;
+
+        unified = cg_unified();
+        if (unified < 0)
+                return log_error_errno(unified, "Failed to determine whether the unified hierachy is used: %m");
+
+        if ((unified > 0) == arg_unified_cgroup_hierarchy)
+                return 0;
+
+        /* When the host uses the legacy cgroup setup, but the
+         * container shall use the unified hierarchy, let's make sure
+         * we copy the path from the name=systemd hierarchy into the
+         * unified hierarchy. Similar for the reverse situation. */
+
+        r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &cgroup);
+        if (r < 0)
+                return log_error_errno(r, "Failed to get control group of " PID_FMT ": %m", pid);
+
+        /* In order to access the unified hierarchy we need to mount it */
+        if (!mkdtemp(tree))
+                return log_error_errno(errno, "Failed to generate temporary mount point for unified hierarchy: %m");
+
+        if (unified)
+                r = mount("cgroup", tree, "cgroup", MS_NOSUID|MS_NOEXEC|MS_NODEV, "none,name=systemd,xattr");
+        else
+                r = mount("cgroup", tree, "cgroup", MS_NOSUID|MS_NOEXEC|MS_NODEV, "__DEVEL__sane_behavior");
+        if (r < 0) {
+                r = log_error_errno(errno, "Failed to mount unified hierarchy: %m");
+                goto finish;
+        }
+
+        undo_mount = true;
+
+        fn = strjoina(tree, cgroup, "/cgroup.procs");
+        (void) mkdir_parents(fn, 0755);
+
+        sprintf(pid_string, PID_FMT, pid);
+        r = write_string_file(fn, pid_string, 0);
+        if (r < 0)
+                log_error_errno(r, "Failed to move process: %m");
+
+finish:
+        if (undo_mount)
+                (void) umount(tree);
+
+        (void) rmdir(tree);
+        return r;
+}
+
+static int create_subcgroup(pid_t pid) {
+        _cleanup_free_ char *cgroup = NULL;
+        const char *child;
+        int unified, r;
+        CGroupMask supported;
+
+        /* In the unified hierarchy inner nodes may only only contain
+         * subgroups, but not processes. Hence, if we running in the
+         * unified hierarchy and the container does the same, and we
+         * did not create a scope unit for the container move us and
+         * the container into two separate subcgroups. */
+
+        if (!arg_keep_unit)
+                return 0;
+
+        if (!arg_unified_cgroup_hierarchy)
+                return 0;
+
+        unified = cg_unified();
+        if (unified < 0)
+                return log_error_errno(unified, "Failed to determine whether the unified hierachy is used: %m");
+        if (unified == 0)
+                return 0;
+
+        r = cg_mask_supported(&supported);
+        if (r < 0)
+                return log_error_errno(r, "Failed to determine supported controllers: %m");
+
+        r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 0, &cgroup);
+        if (r < 0)
+                return log_error_errno(r, "Failed to get our control group: %m");
+
+        child = strjoina(cgroup, "/payload");
+        r = cg_create_and_attach(SYSTEMD_CGROUP_CONTROLLER, child, pid);
+        if (r < 0)
+                return log_error_errno(r, "Failed to create %s subcgroup: %m", child);
+
+        child = strjoina(cgroup, "/supervisor");
+        r = cg_create_and_attach(SYSTEMD_CGROUP_CONTROLLER, child, 0);
+        if (r < 0)
+                return log_error_errno(r, "Failed to create %s subcgroup: %m", child);
+
+        /* Try to enable as many controllers as possible for the new payload. */
+        (void) cg_enable_everywhere(supported, supported, cgroup);
+        return 0;
+}
+
+static int load_settings(void) {
+        _cleanup_(settings_freep) Settings *settings = NULL;
+        _cleanup_fclose_ FILE *f = NULL;
+        _cleanup_free_ char *p = NULL;
+        const char *fn, *i;
+        int r;
+
+        /* If all settings are masked, there's no point in looking for
+         * the settings file */
+        if ((arg_settings_mask & _SETTINGS_MASK_ALL) == _SETTINGS_MASK_ALL)
+                return 0;
+
+        fn = strjoina(arg_machine, ".nspawn");
+
+        /* We first look in the admin's directories in /etc and /run */
+        FOREACH_STRING(i, "/etc/systemd/nspawn", "/run/systemd/nspawn") {
+                _cleanup_free_ char *j = NULL;
+
+                j = strjoin(i, "/", fn, NULL);
+                if (!j)
+                        return log_oom();
+
+                f = fopen(j, "re");
+                if (f) {
+                        p = j;
+                        j = NULL;
+
+                        /* By default we trust configuration from /etc and /run */
+                        if (arg_settings_trusted < 0)
+                                arg_settings_trusted = true;
+
+                        break;
+                }
+
+                if (errno != ENOENT)
+                        return log_error_errno(errno, "Failed to open %s: %m", j);
+        }
+
+        if (!f) {
+                /* After that, let's look for a file next to the
+                 * actual image we shall boot. */
+
+                if (arg_image) {
+                        p = file_in_same_dir(arg_image, fn);
+                        if (!p)
+                                return log_oom();
+                } else if (arg_directory) {
+                        p = file_in_same_dir(arg_directory, fn);
+                        if (!p)
+                                return log_oom();
+                }
+
+                if (p) {
+                        f = fopen(p, "re");
+                        if (!f && errno != ENOENT)
+                                return log_error_errno(errno, "Failed to open %s: %m", p);
+
+                        /* By default we do not trust configuration from /var/lib/machines */
+                        if (arg_settings_trusted < 0)
+                                arg_settings_trusted = false;
+                }
+        }
+
+        if (!f)
+                return 0;
+
+        log_debug("Settings are trusted: %s", yes_no(arg_settings_trusted));
+
+        r = settings_load(f, p, &settings);
+        if (r < 0)
+                return r;
+
+        /* Copy over bits from the settings, unless they have been
+         * explicitly masked by command line switches. */
+
+        if ((arg_settings_mask & SETTING_BOOT) == 0 &&
+            settings->boot >= 0) {
+                arg_boot = settings->boot;
+
+                strv_free(arg_parameters);
+                arg_parameters = settings->parameters;
+                settings->parameters = NULL;
+        }
+
+        if ((arg_settings_mask & SETTING_ENVIRONMENT) == 0 &&
+            settings->environment) {
+                strv_free(arg_setenv);
+                arg_setenv = settings->environment;
+                settings->environment = NULL;
+        }
+
+        if ((arg_settings_mask & SETTING_USER) == 0 &&
+            settings->user) {
+                free(arg_user);
+                arg_user = settings->user;
+                settings->user = NULL;
+        }
+
+        if ((arg_settings_mask & SETTING_CAPABILITY) == 0) {
+
+                if (!arg_settings_trusted && settings->capability != 0)
+                        log_warning("Ignoring Capability= setting, file %s is not trusted.", p);
+                else
+                        arg_retain |= settings->capability;
+
+                arg_retain &= ~settings->drop_capability;
+        }
+
+        if ((arg_settings_mask & SETTING_KILL_SIGNAL) == 0 &&
+            settings->kill_signal > 0)
+                arg_kill_signal = settings->kill_signal;
+
+        if ((arg_settings_mask & SETTING_PERSONALITY) == 0 &&
+            settings->personality != PERSONALITY_INVALID)
+                arg_personality = settings->personality;
+
+        if ((arg_settings_mask & SETTING_MACHINE_ID) == 0 &&
+            !sd_id128_is_null(settings->machine_id)) {
+
+                if (!arg_settings_trusted)
+                        log_warning("Ignoring MachineID= setting, file %s is not trusted.", p);
+                else
+                        arg_uuid = settings->machine_id;
+        }
+
+        if ((arg_settings_mask & SETTING_READ_ONLY) == 0 &&
+            settings->read_only >= 0)
+                arg_read_only = settings->read_only;
+
+        if ((arg_settings_mask & SETTING_VOLATILE_MODE) == 0 &&
+            settings->volatile_mode != _VOLATILE_MODE_INVALID)
+                arg_volatile_mode = settings->volatile_mode;
+
+        if ((arg_settings_mask & SETTING_CUSTOM_MOUNTS) == 0 &&
+            settings->n_custom_mounts > 0) {
+
+                if (!arg_settings_trusted)
+                        log_warning("Ignoring TemporaryFileSystem=, Bind= and BindReadOnly= settings, file %s is not trusted.", p);
+                else {
+                        custom_mount_free_all(arg_custom_mounts, arg_n_custom_mounts);
+                        arg_custom_mounts = settings->custom_mounts;
+                        arg_n_custom_mounts = settings->n_custom_mounts;
+
+                        settings->custom_mounts = NULL;
+                        settings->n_custom_mounts = 0;
+                }
+        }
+
+        if ((arg_settings_mask & SETTING_NETWORK) == 0 &&
+            (settings->private_network >= 0 ||
+             settings->network_veth >= 0 ||
+             settings->network_bridge ||
+             settings->network_interfaces ||
+             settings->network_macvlan ||
+             settings->network_ipvlan)) {
+
+                if (!arg_settings_trusted)
+                        log_warning("Ignoring network settings, file %s is not trusted.", p);
+                else {
+                        strv_free(arg_network_interfaces);
+                        arg_network_interfaces = settings->network_interfaces;
+                        settings->network_interfaces = NULL;
+
+                        strv_free(arg_network_macvlan);
+                        arg_network_macvlan = settings->network_macvlan;
+                        settings->network_macvlan = NULL;
+
+                        strv_free(arg_network_ipvlan);
+                        arg_network_ipvlan = settings->network_ipvlan;
+                        settings->network_ipvlan = NULL;
+
+                        free(arg_network_bridge);
+                        arg_network_bridge = settings->network_bridge;
+                        settings->network_bridge = NULL;
+
+                        arg_network_veth = settings->network_veth > 0 || settings->network_bridge;
+
+                        arg_private_network = true; /* all these settings imply private networking */
+                }
+        }
+
+        if ((arg_settings_mask & SETTING_EXPOSE_PORTS) == 0 &&
+            settings->expose_ports) {
+
+                if (!arg_settings_trusted)
+                        log_warning("Ignoring Port= setting, file %s is not trusted.", p);
+                else {
+                        expose_port_free_all(arg_expose_ports);
+                        arg_expose_ports = settings->expose_ports;
+                        settings->expose_ports = NULL;
+                }
+        }
 
         return 0;
 }
@@ -4542,15 +5108,22 @@ int main(int argc, char *argv[]) {
         if (r <= 0)
                 goto finish;
 
-        r = determine_names();
-        if (r < 0)
-                goto finish;
-
         if (geteuid() != 0) {
                 log_error("Need to be root.");
                 r = -EPERM;
                 goto finish;
         }
+        r = determine_names();
+        if (r < 0)
+                goto finish;
+
+        r = load_settings();
+        if (r < 0)
+                goto finish;
+
+        r = verify_arguments();
+        if (r < 0)
+                goto finish;
 
         n_fd_passed = sd_listen_fds(false);
         if (n_fd_passed > 0) {
@@ -4831,8 +5404,7 @@ int main(int argc, char *argv[]) {
                                         kmsg_socket_pair[1],
                                         rtnl_socket_pair[1],
                                         uid_shift_socket_pair[1],
-                                        fds,
-                                        argc, argv);
+                                        fds);
                         if (r < 0)
                                 _exit(EXIT_FAILURE);
 
@@ -4921,6 +5493,14 @@ int main(int argc, char *argv[]) {
                 if (r < 0)
                         goto finish;
 
+                r = sync_cgroup(pid);
+                if (r < 0)
+                        goto finish;
+
+                r = create_subcgroup(pid);
+                if (r < 0)
+                        goto finish;
+
                 r = chown_cgroup(pid);
                 if (r < 0)
                         goto finish;
@@ -5073,24 +5653,21 @@ finish:
                 (void) rm_rf(p, REMOVE_ROOT);
         }
 
+        flush_ports(&exposed);
+
         free(arg_directory);
         free(arg_template);
         free(arg_image);
         free(arg_machine);
         free(arg_user);
         strv_free(arg_setenv);
+        free(arg_network_bridge);
         strv_free(arg_network_interfaces);
         strv_free(arg_network_macvlan);
         strv_free(arg_network_ipvlan);
-        custom_mount_free_all();
-
-        flush_ports(&exposed);
-
-        while (arg_expose_ports) {
-                ExposePort *p = arg_expose_ports;
-                LIST_REMOVE(ports, arg_expose_ports, p);
-                free(p);
-        }
+        strv_free(arg_parameters);
+        custom_mount_free_all(arg_custom_mounts, arg_n_custom_mounts);
+        expose_port_free_all(arg_expose_ports);
 
         return r < 0 ? EXIT_FAILURE : ret;
 }