1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
9 #include "alloc-util.h"
11 #include "capability-list.h"
12 #include "cgroup-util.h"
13 #include "cpu-set-util.h"
14 #include "device-util.h"
15 #include "devnum-util.h"
17 #include "hostname-util.h"
18 #include "json-util.h"
19 #include "nspawn-mount.h"
20 #include "nspawn-oci.h"
21 #include "path-util.h"
22 #include "rlimit-util.h"
23 #include "string-util.h"
25 #include "time-util.h"
28 * OCI runtime tool implementation
33 * How is RLIM_INFINITY supposed to be encoded?
34 * configured effective caps is bullshit, as execv() corrupts it anyway
35 * pipes bind mounted is *very* different from pipes newly created, comments regarding bind mount or not are bogus
36 * annotation values structured? or string?
37 * configurable file system namespace path, but then also root path? wtf?
38 * apply sysctl inside of the container? or outside?
39 * how is unlimited pids tasks limit to be encoded?
40 * what are the defaults for caps if not specified?
41 * what are the default uid/gid mappings if one is missing but the other set, or when user ns is on but no namespace configured
42 * the source field of "mounts" is really weird, as it cannot realistically be relative to the bundle, since we never know if that's what the fs wants
43 * spec contradicts itself on the mount "type" field, as the example uses "bind" as type, but it's not listed in /proc/filesystem, and is something made up by /bin/mount
44 * if type of mount is left out, what shall be assumed? "bind"?
45 * readonly mounts is entirely redundant?
46 * should escaping be applied when joining mount options with ","?
47 * devices cgroup support is bogus, "allow" and "deny" on the kernel level is about adding/removing entries, not about access
48 * spec needs to say that "rwm" devices cgroup combination can't be the empty string
49 * cgrouspv1 crap: kernel, kernelTCP, swappiness, disableOOMKiller, swap, devices, leafWeight
50 * general: it shouldn't leak lower level abstractions this obviously
51 * unmanagable cgroups stuff: realtimeRuntime/realtimePeriod
52 * needs to say what happense when some option is not specified, i.e. which defaults apply
53 * no architecture? no personality?
54 * seccomp example and logic is simply broken: there's no constant "SCMP_ACT_ERRNO".
55 * spec should say what to do with unknown props
56 * /bin/mount regarding NFS and FUSE required?
57 * what does terminal=false mean?
58 * sysctl inside or outside? allow-listing?
59 * swapiness typo -> swappiness
64 * selinuxLabel + mountLabel
69 * swappiness, disableOOMKiller, kernel, kernelTCP, leafWeight (because it's dead, cgroupsv2 can't do it and hence systemd neither)
71 * Non-slice cgroup paths
72 * Propagation that is not slave + shared
73 * more than one uid/gid mapping, mappings with a container base != 0, or non-matching uid/gid mappings
74 * device cgroups access = false items that are not catchall
75 * device cgroups matches where minor is specified, but major isn't. similar where major is specified but char/block is not. also, any match that only has a type set that has less than "rwm" set. also, any entry that has none of rwm set.
79 /* Special values for the cpu.shares attribute */
80 #define CGROUP_CPU_SHARES_INVALID UINT64_MAX
81 #define CGROUP_CPU_SHARES_MIN UINT64_C(2)
82 #define CGROUP_CPU_SHARES_MAX UINT64_C(262144)
83 #define CGROUP_CPU_SHARES_DEFAULT UINT64_C(1024)
85 /* Special values for the blkio.weight attribute */
86 #define CGROUP_BLKIO_WEIGHT_INVALID UINT64_MAX
87 #define CGROUP_BLKIO_WEIGHT_MIN UINT64_C(10)
88 #define CGROUP_BLKIO_WEIGHT_MAX UINT64_C(1000)
89 #define CGROUP_BLKIO_WEIGHT_DEFAULT UINT64_C(500)
91 static int oci_unexpected(const char *name
, sd_json_variant
*v
, sd_json_dispatch_flags_t flags
, void *userdata
) {
92 return json_log(v
, flags
, SYNTHETIC_ERRNO(EINVAL
),
93 "Unexpected OCI element '%s' of type '%s'.", name
, sd_json_variant_type_to_string(sd_json_variant_type(v
)));
96 static int oci_dispatch(sd_json_variant
*v
, const sd_json_dispatch_field table
[], sd_json_dispatch_flags_t flags
, void *userdata
) {
97 return sd_json_dispatch_full(v
, table
, oci_unexpected
, flags
, userdata
, /* reterr_bad_field= */ NULL
);
100 static int oci_unsupported(const char *name
, sd_json_variant
*v
, sd_json_dispatch_flags_t flags
, void *userdata
) {
101 return json_log(v
, flags
, SYNTHETIC_ERRNO(EOPNOTSUPP
),
102 "Unsupported OCI element '%s' of type '%s'.", name
, sd_json_variant_type_to_string(sd_json_variant_type(v
)));
105 static int oci_terminal(const char *name
, sd_json_variant
*v
, sd_json_dispatch_flags_t flags
, void *userdata
) {
106 Settings
*s
= ASSERT_PTR(userdata
);
108 /* If not specified, or set to true, we'll default to either an interactive or a read-only
109 * console. If specified as false, we'll forcibly move to "pipe" mode though. */
110 s
->console_mode
= sd_json_variant_boolean(v
) ? _CONSOLE_MODE_INVALID
: CONSOLE_PIPE
;
114 static int oci_console_dimension(const char *name
, sd_json_variant
*variant
, sd_json_dispatch_flags_t flags
, void *userdata
) {
115 unsigned *u
= ASSERT_PTR(userdata
);
118 k
= sd_json_variant_unsigned(variant
);
120 return json_log(variant
, flags
, SYNTHETIC_ERRNO(ERANGE
),
121 "Console size field '%s' is too small.", strna(name
));
122 if (k
> USHRT_MAX
) /* TIOCSWINSZ's struct winsize uses "unsigned short" for width and height */
123 return json_log(variant
, flags
, SYNTHETIC_ERRNO(ERANGE
),
124 "Console size field '%s' is too large.", strna(name
));
130 static int oci_console_size(const char *name
, sd_json_variant
*v
, sd_json_dispatch_flags_t flags
, void *userdata
) {
131 Settings
*s
= ASSERT_PTR(userdata
);
133 static const sd_json_dispatch_field table
[] = {
134 { "height", SD_JSON_VARIANT_UNSIGNED
, oci_console_dimension
, offsetof(Settings
, console_height
), SD_JSON_MANDATORY
},
135 { "width", SD_JSON_VARIANT_UNSIGNED
, oci_console_dimension
, offsetof(Settings
, console_width
), SD_JSON_MANDATORY
},
139 return oci_dispatch(v
, table
, flags
, s
);
142 static int oci_env(const char *name
, sd_json_variant
*v
, sd_json_dispatch_flags_t flags
, void *userdata
) {
143 char ***l
= ASSERT_PTR(userdata
);
147 JSON_VARIANT_ARRAY_FOREACH(e
, v
) {
150 if (!sd_json_variant_is_string(e
))
151 return json_log(e
, flags
, SYNTHETIC_ERRNO(EINVAL
),
152 "Environment array contains non-string.");
154 assert_se(n
= sd_json_variant_string(e
));
156 if (!env_assignment_is_valid(n
))
157 return json_log(e
, flags
, SYNTHETIC_ERRNO(EINVAL
),
158 "Environment assignment not valid: %s", n
);
160 r
= strv_extend(l
, n
);
168 static int oci_args(const char *name
, sd_json_variant
*v
, sd_json_dispatch_flags_t flags
, void *userdata
) {
169 _cleanup_strv_free_
char **l
= NULL
;
170 char ***value
= ASSERT_PTR(userdata
);
173 r
= sd_json_variant_strv(v
, &l
);
175 return json_log(v
, flags
, r
, "Cannot parse arguments as list of strings: %m");
178 return json_log(v
, flags
, SYNTHETIC_ERRNO(EINVAL
),
179 "Argument list empty, refusing.");
182 return json_log(v
, flags
, SYNTHETIC_ERRNO(EINVAL
),
183 "Executable name is empty, refusing.");
185 return strv_free_and_replace(*value
, l
);
188 static int oci_rlimit_type(const char *name
, sd_json_variant
*v
, sd_json_dispatch_flags_t flags
, void *userdata
) {
190 int *type
= ASSERT_PTR(userdata
);
193 z
= startswith(sd_json_variant_string(v
), "RLIMIT_");
195 return json_log(v
, flags
, SYNTHETIC_ERRNO(EINVAL
),
196 "rlimit entry's name does not begin with 'RLIMIT_', refusing: %s",
197 sd_json_variant_string(v
));
199 t
= rlimit_from_string(z
);
201 return json_log(v
, flags
, t
,
202 "rlimit name unknown: %s", sd_json_variant_string(v
));
208 static int oci_rlimit_value(const char *name
, sd_json_variant
*v
, sd_json_dispatch_flags_t flags
, void *userdata
) {
209 rlim_t
*value
= ASSERT_PTR(userdata
);
212 if (sd_json_variant_is_negative(v
))
215 if (!sd_json_variant_is_unsigned(v
))
216 return json_log(v
, flags
, SYNTHETIC_ERRNO(ERANGE
),
217 "rlimits limit not unsigned, refusing.");
219 z
= (rlim_t
) sd_json_variant_unsigned(v
);
221 if ((uint64_t) z
!= sd_json_variant_unsigned(v
))
222 return json_log(v
, flags
, SYNTHETIC_ERRNO(EINVAL
),
223 "rlimits limit out of range, refusing.");
230 static int oci_rlimits(const char *name
, sd_json_variant
*v
, sd_json_dispatch_flags_t flags
, void *userdata
) {
231 Settings
*s
= ASSERT_PTR(userdata
);
235 JSON_VARIANT_ARRAY_FOREACH(e
, v
) {
243 .soft
= RLIM_INFINITY
,
244 .hard
= RLIM_INFINITY
,
247 static const sd_json_dispatch_field table
[] = {
248 { "soft", SD_JSON_VARIANT_NUMBER
, oci_rlimit_value
, offsetof(struct rlimit_data
, soft
), SD_JSON_MANDATORY
},
249 { "hard", SD_JSON_VARIANT_NUMBER
, oci_rlimit_value
, offsetof(struct rlimit_data
, hard
), SD_JSON_MANDATORY
},
250 { "type", SD_JSON_VARIANT_STRING
, oci_rlimit_type
, offsetof(struct rlimit_data
, type
), SD_JSON_MANDATORY
},
254 r
= oci_dispatch(e
, table
, flags
, &data
);
258 assert(data
.type
>= 0);
259 assert(data
.type
< _RLIMIT_MAX
);
261 if (s
->rlimit
[data
.type
])
262 return json_log(v
, flags
, SYNTHETIC_ERRNO(EINVAL
),
263 "rlimits array contains duplicate entry, refusing.");
265 s
->rlimit
[data
.type
] = new(struct rlimit
, 1);
266 if (!s
->rlimit
[data
.type
])
269 *s
->rlimit
[data
.type
] = (struct rlimit
) {
270 .rlim_cur
= data
.soft
,
271 .rlim_max
= data
.hard
,
278 static int oci_capability_array(const char *name
, sd_json_variant
*v
, sd_json_dispatch_flags_t flags
, void *userdata
) {
279 uint64_t *mask
= ASSERT_PTR(userdata
);
283 JSON_VARIANT_ARRAY_FOREACH(e
, v
) {
287 if (!sd_json_variant_is_string(e
))
288 return json_log(v
, flags
, SYNTHETIC_ERRNO(EINVAL
),
289 "Entry in capabilities array is not a string.");
291 assert_se(n
= sd_json_variant_string(e
));
293 cap
= capability_from_name(n
);
295 return json_log(v
, flags
, SYNTHETIC_ERRNO(EINVAL
),
296 "Unknown capability: %s", n
);
298 m
|= UINT64_C(1) << cap
;
301 if (*mask
== UINT64_MAX
)
309 static int oci_capabilities(const char *name
, sd_json_variant
*v
, sd_json_dispatch_flags_t flags
, void *userdata
) {
311 static const sd_json_dispatch_field table
[] = {
312 { "effective", SD_JSON_VARIANT_ARRAY
, oci_capability_array
, offsetof(CapabilityQuintet
, effective
) },
313 { "bounding", SD_JSON_VARIANT_ARRAY
, oci_capability_array
, offsetof(CapabilityQuintet
, bounding
) },
314 { "inheritable", SD_JSON_VARIANT_ARRAY
, oci_capability_array
, offsetof(CapabilityQuintet
, inheritable
) },
315 { "permitted", SD_JSON_VARIANT_ARRAY
, oci_capability_array
, offsetof(CapabilityQuintet
, permitted
) },
316 { "ambient", SD_JSON_VARIANT_ARRAY
, oci_capability_array
, offsetof(CapabilityQuintet
, ambient
) },
320 Settings
*s
= ASSERT_PTR(userdata
);
323 r
= oci_dispatch(v
, table
, flags
, &s
->full_capabilities
);
327 if (s
->full_capabilities
.bounding
!= UINT64_MAX
) {
328 s
->capability
= s
->full_capabilities
.bounding
;
329 s
->drop_capability
= ~s
->full_capabilities
.bounding
;
335 static int oci_oom_score_adj(const char *name
, sd_json_variant
*v
, sd_json_dispatch_flags_t flags
, void *userdata
) {
336 Settings
*s
= ASSERT_PTR(userdata
);
339 k
= sd_json_variant_integer(v
);
340 if (k
< OOM_SCORE_ADJ_MIN
|| k
> OOM_SCORE_ADJ_MAX
)
341 return json_log(v
, flags
, SYNTHETIC_ERRNO(EINVAL
),
342 "oomScoreAdj value out of range: %" PRIi64
, k
);
344 s
->oom_score_adjust
= (int) k
;
345 s
->oom_score_adjust_set
= true;
350 static int oci_supplementary_gids(const char *name
, sd_json_variant
*v
, sd_json_dispatch_flags_t flags
, void *userdata
) {
351 Settings
*s
= ASSERT_PTR(userdata
);
355 JSON_VARIANT_ARRAY_FOREACH(e
, v
) {
358 if (!sd_json_variant_is_unsigned(e
))
359 return json_log(v
, flags
, SYNTHETIC_ERRNO(EINVAL
),
360 "Supplementary GID entry is not a UID.");
362 r
= sd_json_dispatch_uid_gid(name
, e
, flags
, &gid
);
366 if (!GREEDY_REALLOC(s
->supplementary_gids
, s
->n_supplementary_gids
+ 1))
369 s
->supplementary_gids
[s
->n_supplementary_gids
++] = gid
;
375 static int oci_user(const char *name
, sd_json_variant
*v
, sd_json_dispatch_flags_t flags
, void *userdata
) {
377 static const sd_json_dispatch_field table
[] = {
378 { "uid", SD_JSON_VARIANT_UNSIGNED
, sd_json_dispatch_uid_gid
, offsetof(Settings
, uid
), SD_JSON_MANDATORY
},
379 { "gid", SD_JSON_VARIANT_UNSIGNED
, sd_json_dispatch_uid_gid
, offsetof(Settings
, gid
), SD_JSON_MANDATORY
},
380 { "additionalGids", SD_JSON_VARIANT_ARRAY
, oci_supplementary_gids
, 0, 0 },
384 return oci_dispatch(v
, table
, flags
, userdata
);
387 static int oci_process(const char *name
, sd_json_variant
*v
, sd_json_dispatch_flags_t flags
, void *userdata
) {
389 static const sd_json_dispatch_field table
[] = {
390 { "terminal", SD_JSON_VARIANT_BOOLEAN
, oci_terminal
, 0, 0 },
391 { "consoleSize", SD_JSON_VARIANT_OBJECT
, oci_console_size
, 0, 0 },
392 { "cwd", SD_JSON_VARIANT_STRING
, json_dispatch_path
, offsetof(Settings
, working_directory
), 0 },
393 { "env", SD_JSON_VARIANT_ARRAY
, oci_env
, offsetof(Settings
, environment
), 0 },
394 { "args", SD_JSON_VARIANT_ARRAY
, oci_args
, offsetof(Settings
, parameters
), 0 },
395 { "rlimits", SD_JSON_VARIANT_ARRAY
, oci_rlimits
, 0, 0 },
396 { "apparmorProfile", SD_JSON_VARIANT_STRING
, oci_unsupported
, 0, SD_JSON_PERMISSIVE
},
397 { "capabilities", SD_JSON_VARIANT_OBJECT
, oci_capabilities
, 0, 0 },
398 { "noNewPrivileges", SD_JSON_VARIANT_BOOLEAN
, sd_json_dispatch_tristate
, offsetof(Settings
, no_new_privileges
), 0 },
399 { "oomScoreAdj", SD_JSON_VARIANT_INTEGER
, oci_oom_score_adj
, 0, 0 },
400 { "selinuxLabel", SD_JSON_VARIANT_STRING
, oci_unsupported
, 0, SD_JSON_PERMISSIVE
},
401 { "user", SD_JSON_VARIANT_OBJECT
, oci_user
, 0, 0 },
405 return oci_dispatch(v
, table
, flags
, userdata
);
408 static int oci_root(const char *name
, sd_json_variant
*v
, sd_json_dispatch_flags_t flags
, void *userdata
) {
409 Settings
*s
= ASSERT_PTR(userdata
);
412 static const sd_json_dispatch_field table
[] = {
413 { "path", SD_JSON_VARIANT_STRING
, sd_json_dispatch_string
, offsetof(Settings
, root
) },
414 { "readonly", SD_JSON_VARIANT_BOOLEAN
, sd_json_dispatch_tristate
, offsetof(Settings
, read_only
) },
418 r
= oci_dispatch(v
, table
, flags
, s
);
422 if (s
->root
&& !path_is_absolute(s
->root
)) {
425 joined
= path_join(s
->bundle
, s
->root
);
429 free_and_replace(s
->root
, joined
);
435 static int oci_hostname(const char *name
, sd_json_variant
*v
, sd_json_dispatch_flags_t flags
, void *userdata
) {
436 Settings
*s
= ASSERT_PTR(userdata
);
439 assert_se(n
= sd_json_variant_string(v
));
441 if (!hostname_is_valid(n
, 0))
442 return json_log(v
, flags
, SYNTHETIC_ERRNO(EINVAL
),
443 "Hostname string is not a valid hostname: %s", n
);
445 return free_and_strdup_warn(&s
->hostname
, n
);
448 static bool oci_exclude_mount(const char *path
) {
450 /* Returns "true" for all mounts we insist to mount on our own, and hence ignore the OCI data. */
452 if (PATH_IN_SET(path
,
470 "/proc/sysrq-trigger",
479 /* Similar, skip the whole /sys/fs/cgroups subtree */
480 if (path_startswith(path
, "/sys/fs/cgroup"))
486 typedef struct oci_mount_data
{
493 static void oci_mount_data_done(oci_mount_data
*data
) {
496 free(data
->destination
);
499 strv_free(data
->options
);
502 static int oci_mounts(const char *name
, sd_json_variant
*v
, sd_json_dispatch_flags_t flags
, void *userdata
) {
503 Settings
*s
= ASSERT_PTR(userdata
);
507 JSON_VARIANT_ARRAY_FOREACH(e
, v
) {
508 static const sd_json_dispatch_field table
[] = {
509 { "destination", SD_JSON_VARIANT_STRING
, json_dispatch_path
, offsetof(oci_mount_data
, destination
), SD_JSON_MANDATORY
},
510 { "source", SD_JSON_VARIANT_STRING
, sd_json_dispatch_string
, offsetof(oci_mount_data
, source
), 0 },
511 { "options", SD_JSON_VARIANT_ARRAY
, sd_json_dispatch_strv
, offsetof(oci_mount_data
, options
), 0, },
512 { "type", SD_JSON_VARIANT_STRING
, sd_json_dispatch_string
, offsetof(oci_mount_data
, type
), 0 },
516 _cleanup_free_
char *joined_options
= NULL
;
517 _cleanup_(oci_mount_data_done
) oci_mount_data data
= {};
520 r
= oci_dispatch(e
, table
, flags
, &data
);
524 if (!path_is_absolute(data
.destination
))
525 return json_log(e
, flags
, SYNTHETIC_ERRNO(EINVAL
),
526 "Mount destination not an absolute path: %s", data
.destination
);
528 if (oci_exclude_mount(data
.destination
))
532 joined_options
= strv_join(data
.options
, ",");
537 if (!data
.type
|| streq(data
.type
, "bind")) {
538 if (data
.source
&& !path_is_absolute(data
.source
)) {
541 joined
= path_join(s
->bundle
, data
.source
);
545 free_and_replace(data
.source
, joined
);
548 data
.type
= mfree(data
.type
);
550 m
= custom_mount_add(&s
->custom_mounts
, &s
->n_custom_mounts
, CUSTOM_MOUNT_BIND
);
552 m
= custom_mount_add(&s
->custom_mounts
, &s
->n_custom_mounts
, CUSTOM_MOUNT_ARBITRARY
);
556 m
->destination
= TAKE_PTR(data
.destination
);
557 m
->source
= TAKE_PTR(data
.source
);
558 m
->options
= TAKE_PTR(joined_options
);
559 m
->type_argument
= TAKE_PTR(data
.type
);
565 static int oci_namespace_type(const char *name
, sd_json_variant
*v
, sd_json_dispatch_flags_t flags
, void *userdata
) {
566 unsigned long *nsflags
= ASSERT_PTR(userdata
);
569 assert_se(n
= sd_json_variant_string(v
));
571 /* We don't use namespace_flags_from_string() here, as the OCI spec uses slightly different names than the
574 *nsflags
= CLONE_NEWPID
;
575 else if (streq(n
, "network"))
576 *nsflags
= CLONE_NEWNET
;
577 else if (streq(n
, "mount"))
578 *nsflags
= CLONE_NEWNS
;
579 else if (streq(n
, "ipc"))
580 *nsflags
= CLONE_NEWIPC
;
581 else if (streq(n
, "uts"))
582 *nsflags
= CLONE_NEWUTS
;
583 else if (streq(n
, "user"))
584 *nsflags
= CLONE_NEWUSER
;
585 else if (streq(n
, "cgroup"))
586 *nsflags
= CLONE_NEWCGROUP
;
588 return json_log(v
, flags
, SYNTHETIC_ERRNO(EINVAL
),
589 "Unknown namespace type, refusing: %s", n
);
594 struct namespace_data
{
599 static void namespace_data_done(struct namespace_data
*data
) {
605 static int oci_namespaces(const char *name
, sd_json_variant
*v
, sd_json_dispatch_flags_t flags
, void *userdata
) {
606 Settings
*s
= ASSERT_PTR(userdata
);
611 JSON_VARIANT_ARRAY_FOREACH(e
, v
) {
612 _cleanup_(namespace_data_done
) struct namespace_data data
= {};
614 static const sd_json_dispatch_field table
[] = {
615 { "type", SD_JSON_VARIANT_STRING
, oci_namespace_type
, offsetof(struct namespace_data
, type
), SD_JSON_MANDATORY
},
616 { "path", SD_JSON_VARIANT_STRING
, json_dispatch_path
, offsetof(struct namespace_data
, path
), 0 },
620 r
= oci_dispatch(e
, table
, flags
, &data
);
625 if (data
.type
!= CLONE_NEWNET
)
626 return json_log(e
, flags
, SYNTHETIC_ERRNO(EOPNOTSUPP
),
627 "Specifying namespace path for non-network namespace is not supported.");
629 if (s
->network_namespace_path
)
630 return json_log(e
, flags
, SYNTHETIC_ERRNO(EINVAL
),
631 "Network namespace path specified more than once, refusing.");
633 free_and_replace(s
->network_namespace_path
, data
.path
);
636 if (FLAGS_SET(n
, data
.type
))
637 return json_log(e
, flags
, SYNTHETIC_ERRNO(EINVAL
),
638 "Duplicate namespace specification, refusing.");
643 if (!FLAGS_SET(n
, CLONE_NEWNS
))
644 return json_log(v
, flags
, SYNTHETIC_ERRNO(EOPNOTSUPP
),
645 "Containers without a mount namespace aren't supported.");
647 s
->private_network
= FLAGS_SET(n
, CLONE_NEWNET
);
648 s
->userns_mode
= FLAGS_SET(n
, CLONE_NEWUSER
) ? USER_NAMESPACE_FIXED
: USER_NAMESPACE_NO
;
649 s
->use_cgns
= FLAGS_SET(n
, CLONE_NEWCGROUP
);
651 s
->clone_ns_flags
= n
& (CLONE_NEWIPC
|CLONE_NEWPID
|CLONE_NEWUTS
);
656 static int oci_uid_gid_range(const char *name
, sd_json_variant
*v
, sd_json_dispatch_flags_t flags
, void *userdata
) {
657 uid_t
*uid
= ASSERT_PTR(userdata
);
661 assert_cc(sizeof(uid_t
) == sizeof(gid_t
));
663 /* This is very much like oci_uid_gid(), except the checks are a bit different, as this is a UID range rather
664 * than a specific UID, and hence UID_INVALID has no special significance. OTOH a range of zero makes no
667 k
= sd_json_variant_unsigned(v
);
669 if ((uint64_t) u
!= k
)
670 return json_log(v
, flags
, SYNTHETIC_ERRNO(ERANGE
),
671 "UID/GID out of range: %" PRIu64
, k
);
673 return json_log(v
, flags
, SYNTHETIC_ERRNO(ERANGE
),
674 "UID/GID range can't be zero.");
680 static int oci_uid_gid_mappings(const char *name
, sd_json_variant
*v
, sd_json_dispatch_flags_t flags
, void *userdata
) {
681 struct mapping_data
{
686 .host_id
= UID_INVALID
,
687 .container_id
= UID_INVALID
,
691 static const sd_json_dispatch_field table
[] = {
692 { "containerID", SD_JSON_VARIANT_UNSIGNED
, sd_json_dispatch_uid_gid
, offsetof(struct mapping_data
, container_id
), SD_JSON_MANDATORY
},
693 { "hostID", SD_JSON_VARIANT_UNSIGNED
, sd_json_dispatch_uid_gid
, offsetof(struct mapping_data
, host_id
), SD_JSON_MANDATORY
},
694 { "size", SD_JSON_VARIANT_UNSIGNED
, oci_uid_gid_range
, offsetof(struct mapping_data
, range
), SD_JSON_MANDATORY
},
698 Settings
*s
= ASSERT_PTR(userdata
);
702 if (sd_json_variant_elements(v
) == 0)
705 if (sd_json_variant_elements(v
) > 1)
706 return json_log(v
, flags
, SYNTHETIC_ERRNO(EOPNOTSUPP
),
707 "UID/GID mappings with more than one entry are not supported.");
709 assert_se(e
= sd_json_variant_by_index(v
, 0));
711 r
= oci_dispatch(e
, table
, flags
, &data
);
715 if (data
.range
> UINT32_MAX
- data
.host_id
||
716 data
.range
> UINT32_MAX
- data
.container_id
)
717 return json_log(v
, flags
, SYNTHETIC_ERRNO(EINVAL
),
718 "UID/GID range goes beyond UID/GID validity range, refusing.");
720 if (data
.container_id
!= 0)
721 return json_log(v
, flags
, SYNTHETIC_ERRNO(EOPNOTSUPP
),
722 "UID/GID mappings with a non-zero container base are not supported.");
724 if (data
.range
< 0x10000)
725 json_log(v
, flags
|SD_JSON_WARNING
, 0,
726 "UID/GID mapping with less than 65536 UID/GIDS set up, you are looking for trouble.");
728 if (s
->uid_range
!= UID_INVALID
&&
729 (s
->uid_shift
!= data
.host_id
|| s
->uid_range
!= data
.range
))
730 return json_log(v
, flags
, SYNTHETIC_ERRNO(EOPNOTSUPP
),
731 "Non-matching UID and GID mappings are not supported.");
733 s
->uid_shift
= data
.host_id
;
734 s
->uid_range
= data
.range
;
739 static int oci_device_type(const char *name
, sd_json_variant
*v
, sd_json_dispatch_flags_t flags
, void *userdata
) {
740 mode_t
*mode
= ASSERT_PTR(userdata
);
743 assert_se(t
= sd_json_variant_string(v
));
745 if (STR_IN_SET(t
, "c", "u"))
746 *mode
= (*mode
& ~S_IFMT
) | S_IFCHR
;
747 else if (streq(t
, "b"))
748 *mode
= (*mode
& ~S_IFMT
) | S_IFBLK
;
749 else if (streq(t
, "p"))
750 *mode
= (*mode
& ~S_IFMT
) | S_IFIFO
;
752 return json_log(v
, flags
, SYNTHETIC_ERRNO(EINVAL
),
753 "Unknown device type: %s", t
);
758 static int oci_device_major(const char *name
, sd_json_variant
*v
, sd_json_dispatch_flags_t flags
, void *userdata
) {
759 unsigned *u
= ASSERT_PTR(userdata
);
762 k
= sd_json_variant_unsigned(v
);
763 if (!DEVICE_MAJOR_VALID(k
))
764 return json_log(v
, flags
, SYNTHETIC_ERRNO(ERANGE
),
765 "Device major %" PRIu64
" out of range.", k
);
771 static int oci_device_minor(const char *name
, sd_json_variant
*v
, sd_json_dispatch_flags_t flags
, void *userdata
) {
772 unsigned *u
= ASSERT_PTR(userdata
);
775 k
= sd_json_variant_unsigned(v
);
776 if (!DEVICE_MINOR_VALID(k
))
777 return json_log(v
, flags
, SYNTHETIC_ERRNO(ERANGE
),
778 "Device minor %" PRIu64
" out of range.", k
);
784 static int oci_device_file_mode(const char *name
, sd_json_variant
*v
, sd_json_dispatch_flags_t flags
, void *userdata
) {
785 mode_t
*mode
= ASSERT_PTR(userdata
);
789 k
= sd_json_variant_unsigned(v
);
792 if ((m
& ~07777) != 0 || (uint64_t) m
!= k
)
793 return json_log(v
, flags
, SYNTHETIC_ERRNO(ERANGE
),
794 "fileMode out of range, refusing.");
796 *mode
= (*mode
& ~07777) | m
;
800 static int oci_devices(const char *name
, sd_json_variant
*v
, sd_json_dispatch_flags_t flags
, void *userdata
) {
801 Settings
*s
= ASSERT_PTR(userdata
);
805 JSON_VARIANT_ARRAY_FOREACH(e
, v
) {
807 static const sd_json_dispatch_field table
[] = {
808 { "type", SD_JSON_VARIANT_STRING
, oci_device_type
, offsetof(DeviceNode
, mode
), SD_JSON_MANDATORY
},
809 { "path", SD_JSON_VARIANT_STRING
, json_dispatch_path
, offsetof(DeviceNode
, path
), SD_JSON_MANDATORY
},
810 { "major", SD_JSON_VARIANT_UNSIGNED
, oci_device_major
, offsetof(DeviceNode
, major
), 0 },
811 { "minor", SD_JSON_VARIANT_UNSIGNED
, oci_device_minor
, offsetof(DeviceNode
, minor
), 0 },
812 { "fileMode", SD_JSON_VARIANT_UNSIGNED
, oci_device_file_mode
, offsetof(DeviceNode
, mode
), 0 },
813 { "uid", SD_JSON_VARIANT_UNSIGNED
, sd_json_dispatch_uid_gid
, offsetof(DeviceNode
, uid
), 0 },
814 { "gid", SD_JSON_VARIANT_UNSIGNED
, sd_json_dispatch_uid_gid
, offsetof(DeviceNode
, gid
), 0 },
820 if (!GREEDY_REALLOC(s
->extra_nodes
, s
->n_extra_nodes
+ 1))
823 node
= s
->extra_nodes
+ s
->n_extra_nodes
;
824 *node
= (DeviceNode
) {
832 r
= oci_dispatch(e
, table
, flags
, node
);
836 if (S_ISCHR(node
->mode
) || S_ISBLK(node
->mode
)) {
837 _cleanup_free_
char *path
= NULL
;
839 if (node
->major
== UINT_MAX
|| node
->minor
== UINT_MAX
) {
840 r
= json_log(e
, flags
, SYNTHETIC_ERRNO(EINVAL
),
841 "Major/minor required when device node is device node.");
845 /* Suppress a couple of implicit device nodes */
846 r
= devname_from_devnum(node
->mode
, makedev(node
->major
, node
->minor
), &path
);
848 json_log(e
, flags
|SD_JSON_DEBUG
, r
, "Failed to resolve device node %u:%u, ignoring: %m", node
->major
, node
->minor
);
850 if (PATH_IN_SET(path
,
862 json_log(e
, flags
|SD_JSON_DEBUG
, 0, "Ignoring devices item for device '%s', as it is implicitly created anyway.", path
);
880 static int oci_cgroups_path(const char *name
, sd_json_variant
*v
, sd_json_dispatch_flags_t flags
, void *userdata
) {
881 _cleanup_free_
char *slice
= NULL
, *backwards
= NULL
;
882 Settings
*s
= ASSERT_PTR(userdata
);
886 assert_se(p
= sd_json_variant_string(v
));
888 r
= cg_path_get_slice(p
, &slice
);
890 return json_log(v
, flags
, r
, "Couldn't derive slice unit name from path '%s': %m", p
);
892 r
= cg_slice_to_path(slice
, &backwards
);
894 return json_log(v
, flags
, r
, "Couldn't convert slice unit name '%s' back to path: %m", slice
);
896 if (!path_equal(backwards
, p
))
897 return json_log(v
, flags
, SYNTHETIC_ERRNO(EINVAL
),
898 "Control group path '%s' does not refer to slice unit, refusing.", p
);
900 free_and_replace(s
->slice
, slice
);
904 static int oci_cgroup_device_type(const char *name
, sd_json_variant
*v
, sd_json_dispatch_flags_t flags
, void *userdata
) {
905 mode_t
*mode
= ASSERT_PTR(userdata
);
908 assert_se(n
= sd_json_variant_string(v
));
912 else if (streq(n
, "b"))
915 return json_log(v
, flags
, SYNTHETIC_ERRNO(EINVAL
),
916 "Control group device type unknown: %s", n
);
931 static int oci_cgroup_device_access(const char *name
, sd_json_variant
*v
, sd_json_dispatch_flags_t flags
, void *userdata
) {
932 struct device_data
*d
= ASSERT_PTR(userdata
);
933 bool r
= false, w
= false, m
= false;
935 for (const char *s
= ASSERT_PTR(sd_json_variant_string(v
)); *s
; s
++)
943 return json_log(v
, flags
, SYNTHETIC_ERRNO(EINVAL
),
944 "Unknown device access character '%c'.", *s
);
953 static int oci_cgroup_devices(const char *name
, sd_json_variant
*v
, sd_json_dispatch_flags_t flags
, void *userdata
) {
954 _cleanup_free_
struct device_data
*list
= NULL
;
955 Settings
*s
= ASSERT_PTR(userdata
);
961 JSON_VARIANT_ARRAY_FOREACH(e
, v
) {
963 struct device_data data
= {
968 static const sd_json_dispatch_field table
[] = {
969 { "allow", SD_JSON_VARIANT_BOOLEAN
, sd_json_dispatch_stdbool
, offsetof(struct device_data
, allow
), SD_JSON_MANDATORY
},
970 { "type", SD_JSON_VARIANT_STRING
, oci_cgroup_device_type
, offsetof(struct device_data
, type
), 0 },
971 { "major", SD_JSON_VARIANT_UNSIGNED
, oci_device_major
, offsetof(struct device_data
, major
), 0 },
972 { "minor", SD_JSON_VARIANT_UNSIGNED
, oci_device_minor
, offsetof(struct device_data
, minor
), 0 },
973 { "access", SD_JSON_VARIANT_STRING
, oci_cgroup_device_access
, 0, 0 },
977 r
= oci_dispatch(e
, table
, flags
, &data
);
982 /* The fact that OCI allows 'deny' entries makes really no sense, as 'allow'
983 * vs. 'deny' for the devices cgroup controller is really not about allow-listing and
984 * deny-listing but about adding and removing entries from the allow list. Since we
985 * always start out with an empty allow list we hence ignore the whole thing, as
986 * removing entries which don't exist make no sense. We'll log about this, since this
987 * is really borked in the spec, with one exception: the entry that's supposed to
988 * drop the kernel's default we ignore silently */
990 if (!data
.r
|| !data
.w
|| !data
.m
|| data
.type
!= 0 || data
.major
!= UINT_MAX
|| data
.minor
!= UINT_MAX
)
991 json_log(v
, flags
|SD_JSON_WARNING
, 0, "Devices cgroup allow list with arbitrary 'allow' entries not supported, ignoring.");
993 /* We ignore the 'deny' entry as for us that's implied */
997 if (!data
.r
&& !data
.w
&& !data
.m
) {
998 json_log(v
, flags
|LOG_WARNING
, 0, "Device cgroup allow list entry with no effect found, ignoring.");
1002 if (data
.minor
!= UINT_MAX
&& data
.major
== UINT_MAX
)
1003 return json_log(v
, flags
, SYNTHETIC_ERRNO(EOPNOTSUPP
),
1004 "Device cgroup allow list entries with minors but no majors not supported.");
1006 if (data
.major
!= UINT_MAX
&& data
.type
== 0)
1007 return json_log(v
, flags
, SYNTHETIC_ERRNO(EOPNOTSUPP
),
1008 "Device cgroup allow list entries with majors but no device node type not supported.");
1010 if (data
.type
== 0) {
1011 if (data
.r
&& data
.w
&& data
.m
) /* a catchall allow list entry means we are looking at a noop */
1014 return json_log(v
, flags
, SYNTHETIC_ERRNO(EOPNOTSUPP
),
1015 "Device cgroup allow list entries with no type not supported.");
1018 if (!GREEDY_REALLOC(list
, n_list
+ 1))
1021 list
[n_list
++] = data
;
1027 r
= settings_allocate_properties(s
);
1031 r
= sd_bus_message_open_container(s
->properties
, 'r', "sv");
1033 return bus_log_create_error(r
);
1035 r
= sd_bus_message_append(s
->properties
, "s", "DeviceAllow");
1037 return bus_log_create_error(r
);
1039 r
= sd_bus_message_open_container(s
->properties
, 'v', "a(ss)");
1041 return bus_log_create_error(r
);
1043 r
= sd_bus_message_open_container(s
->properties
, 'a', "(ss)");
1045 return bus_log_create_error(r
);
1047 FOREACH_ARRAY(d
, list
, n_list
) {
1048 _cleanup_free_
char *pattern
= NULL
;
1052 if (d
->minor
== UINT_MAX
) {
1055 if (d
->type
== S_IFBLK
)
1058 assert(d
->type
== S_IFCHR
);
1062 if (d
->major
== UINT_MAX
) {
1063 pattern
= strjoin(t
, "-*");
1067 if (asprintf(&pattern
, "%s-%u", t
, d
->major
) < 0)
1072 assert(d
->major
!= UINT_MAX
); /* If a minor is specified, then a major also needs to be specified */
1074 r
= device_path_make_major_minor(d
->type
, makedev(d
->major
, d
->minor
), &pattern
);
1089 r
= sd_bus_message_append(s
->properties
, "(ss)", pattern
, access
);
1091 return bus_log_create_error(r
);
1094 r
= sd_bus_message_close_container(s
->properties
);
1096 return bus_log_create_error(r
);
1098 r
= sd_bus_message_close_container(s
->properties
);
1100 return bus_log_create_error(r
);
1102 r
= sd_bus_message_close_container(s
->properties
);
1104 return bus_log_create_error(r
);
1109 static int oci_cgroup_memory_limit(const char *name
, sd_json_variant
*v
, sd_json_dispatch_flags_t flags
, void *userdata
) {
1110 uint64_t *m
= ASSERT_PTR(userdata
);
1113 if (sd_json_variant_is_negative(v
)) {
1118 if (!sd_json_variant_is_unsigned(v
))
1119 return json_log(v
, flags
, SYNTHETIC_ERRNO(EINVAL
),
1120 "Memory limit is not an unsigned integer.");
1122 k
= sd_json_variant_unsigned(v
);
1123 if (k
>= UINT64_MAX
)
1124 return json_log(v
, flags
, SYNTHETIC_ERRNO(ERANGE
),
1125 "Memory limit too large: %" PRIu64
, k
);
1131 static int oci_cgroup_memory(const char *name
, sd_json_variant
*v
, sd_json_dispatch_flags_t flags
, void *userdata
) {
1133 struct memory_data
{
1135 uint64_t reservation
;
1138 .limit
= UINT64_MAX
,
1139 .reservation
= UINT64_MAX
,
1143 static const sd_json_dispatch_field table
[] = {
1144 { "limit", SD_JSON_VARIANT_NUMBER
, oci_cgroup_memory_limit
, offsetof(struct memory_data
, limit
), 0 },
1145 { "reservation", SD_JSON_VARIANT_NUMBER
, oci_cgroup_memory_limit
, offsetof(struct memory_data
, reservation
), 0 },
1146 { "swap", SD_JSON_VARIANT_NUMBER
, oci_cgroup_memory_limit
, offsetof(struct memory_data
, swap
), 0 },
1147 { "kernel", SD_JSON_VARIANT_NUMBER
, oci_unsupported
, 0, SD_JSON_PERMISSIVE
},
1148 { "kernelTCP", SD_JSON_VARIANT_NUMBER
, oci_unsupported
, 0, SD_JSON_PERMISSIVE
},
1149 { "swapiness", SD_JSON_VARIANT_NUMBER
, oci_unsupported
, 0, SD_JSON_PERMISSIVE
},
1150 { "disableOOMKiller", SD_JSON_VARIANT_BOOLEAN
, oci_unsupported
, 0, SD_JSON_PERMISSIVE
},
1154 Settings
*s
= ASSERT_PTR(userdata
);
1157 r
= oci_dispatch(v
, table
, flags
, &data
);
1161 if (data
.swap
!= UINT64_MAX
) {
1162 if (data
.limit
== UINT64_MAX
)
1163 json_log(v
, flags
|LOG_WARNING
, 0, "swap limit without memory limit is not supported, ignoring.");
1164 else if (data
.swap
< data
.limit
)
1165 json_log(v
, flags
|LOG_WARNING
, 0, "swap limit is below memory limit, ignoring.");
1167 r
= settings_allocate_properties(s
);
1171 r
= sd_bus_message_append(s
->properties
, "(sv)", "MemorySwapMax", "t", data
.swap
- data
.limit
);
1173 return bus_log_create_error(r
);
1177 if (data
.limit
!= UINT64_MAX
) {
1178 r
= settings_allocate_properties(s
);
1182 r
= sd_bus_message_append(s
->properties
, "(sv)", "MemoryMax", "t", data
.limit
);
1184 return bus_log_create_error(r
);
1187 if (data
.reservation
!= UINT64_MAX
) {
1188 r
= settings_allocate_properties(s
);
1192 r
= sd_bus_message_append(s
->properties
, "(sv)", "MemoryLow", "t", data
.reservation
);
1194 return bus_log_create_error(r
);
1207 static int oci_cgroup_cpu_shares(const char *name
, sd_json_variant
*v
, sd_json_dispatch_flags_t flags
, void *userdata
) {
1208 uint64_t k
, *u
= ASSERT_PTR(userdata
);
1210 k
= sd_json_variant_unsigned(v
);
1211 if (k
< CGROUP_CPU_SHARES_MIN
|| k
> CGROUP_CPU_SHARES_MAX
)
1212 return json_log(v
, flags
, SYNTHETIC_ERRNO(ERANGE
), "shares value out of range.");
1214 /* convert from cgroup v1 cpu.shares to v2 cpu.weight */
1215 assert_cc(CGROUP_CPU_SHARES_MAX
<= UINT64_MAX
/ CGROUP_WEIGHT_DEFAULT
);
1216 *u
= CLAMP(k
* CGROUP_WEIGHT_DEFAULT
/ CGROUP_CPU_SHARES_DEFAULT
, CGROUP_WEIGHT_MIN
, CGROUP_WEIGHT_MAX
);
1220 static int oci_cgroup_cpu_quota(const char *name
, sd_json_variant
*v
, sd_json_dispatch_flags_t flags
, void *userdata
) {
1221 uint64_t k
, *u
= ASSERT_PTR(userdata
);
1223 k
= sd_json_variant_unsigned(v
);
1224 if (k
<= 0 || k
>= UINT64_MAX
)
1225 return json_log(v
, flags
, SYNTHETIC_ERRNO(ERANGE
), "period/quota value out of range.");
1231 static int oci_cgroup_cpu_cpus(const char *name
, sd_json_variant
*v
, sd_json_dispatch_flags_t flags
, void *userdata
) {
1232 struct cpu_data
*data
= ASSERT_PTR(userdata
);
1237 assert_se(n
= sd_json_variant_string(v
));
1239 r
= parse_cpu_set(n
, &set
);
1241 return json_log(v
, flags
, r
, "Failed to parse CPU set specification: %s", n
);
1243 return cpu_set_done_and_replace(data
->cpu_set
, set
);
1246 static int oci_cgroup_cpu(const char *name
, sd_json_variant
*v
, sd_json_dispatch_flags_t flags
, void *userdata
) {
1248 static const sd_json_dispatch_field table
[] = {
1249 { "shares", SD_JSON_VARIANT_UNSIGNED
, oci_cgroup_cpu_shares
, offsetof(struct cpu_data
, weight
), 0 },
1250 { "quota", SD_JSON_VARIANT_UNSIGNED
, oci_cgroup_cpu_quota
, offsetof(struct cpu_data
, quota
), 0 },
1251 { "period", SD_JSON_VARIANT_UNSIGNED
, oci_cgroup_cpu_quota
, offsetof(struct cpu_data
, period
), 0 },
1252 { "realtimeRuntime", SD_JSON_VARIANT_UNSIGNED
, oci_unsupported
, 0, 0 },
1253 { "realtimePeriod", SD_JSON_VARIANT_UNSIGNED
, oci_unsupported
, 0, 0 },
1254 { "cpus", SD_JSON_VARIANT_STRING
, oci_cgroup_cpu_cpus
, 0, 0 },
1255 { "mems", SD_JSON_VARIANT_STRING
, oci_unsupported
, 0, 0 },
1259 struct cpu_data data
= {
1260 .weight
= UINT64_MAX
,
1261 .quota
= UINT64_MAX
,
1262 .period
= UINT64_MAX
,
1265 Settings
*s
= ASSERT_PTR(userdata
);
1268 r
= oci_dispatch(v
, table
, flags
, &data
);
1270 cpu_set_done(&data
.cpu_set
);
1274 cpu_set_done_and_replace(s
->cpu_set
, data
.cpu_set
);
1276 if (data
.weight
!= UINT64_MAX
) {
1277 r
= settings_allocate_properties(s
);
1281 r
= sd_bus_message_append(s
->properties
, "(sv)", "CPUWeight", "t", data
.weight
);
1283 return bus_log_create_error(r
);
1286 if (data
.quota
!= UINT64_MAX
&& data
.period
!= UINT64_MAX
) {
1287 r
= settings_allocate_properties(s
);
1291 r
= sd_bus_message_append(s
->properties
, "(sv)", "CPUQuotaPerSecUSec", "t", data
.quota
* USEC_PER_SEC
/ data
.period
);
1293 return bus_log_create_error(r
);
1295 r
= sd_bus_message_append(s
->properties
, "(sv)", "CPUQuotaPeriodUSec", "t", data
.period
);
1297 return bus_log_create_error(r
);
1299 } else if ((data
.quota
!= UINT64_MAX
) != (data
.period
!= UINT64_MAX
))
1300 return json_log(v
, flags
, SYNTHETIC_ERRNO(EINVAL
),
1301 "CPU quota and period not used together.");
1306 static uint64_t cgroup_weight_blkio_to_io(uint64_t blkio_weight
) {
1307 /* convert from cgroup v1 blkio.weight to v2 io.weight */
1308 assert_cc(CGROUP_BLKIO_WEIGHT_MAX
<= UINT64_MAX
/ CGROUP_WEIGHT_DEFAULT
);
1309 return CLAMP(blkio_weight
* CGROUP_WEIGHT_DEFAULT
/ CGROUP_BLKIO_WEIGHT_DEFAULT
,
1310 CGROUP_WEIGHT_MIN
, CGROUP_WEIGHT_MAX
);
1313 static int oci_cgroup_block_io_weight(const char *name
, sd_json_variant
*v
, sd_json_dispatch_flags_t flags
, void *userdata
) {
1314 Settings
*s
= ASSERT_PTR(userdata
);
1318 k
= sd_json_variant_unsigned(v
);
1319 if (k
< CGROUP_BLKIO_WEIGHT_MIN
|| k
> CGROUP_BLKIO_WEIGHT_MAX
)
1320 return json_log(v
, flags
, SYNTHETIC_ERRNO(ERANGE
),
1321 "Block I/O weight out of range.");
1323 r
= settings_allocate_properties(s
);
1327 r
= sd_bus_message_append(s
->properties
, "(sv)", "IOWeight", "t", cgroup_weight_blkio_to_io(k
));
1329 return bus_log_create_error(r
);
1334 static int oci_cgroup_block_io_weight_device(const char *name
, sd_json_variant
*v
, sd_json_dispatch_flags_t flags
, void *userdata
) {
1335 Settings
*s
= ASSERT_PTR(userdata
);
1339 JSON_VARIANT_ARRAY_FOREACH(e
, v
) {
1340 struct device_data
{
1347 .weight
= UINT64_MAX
,
1350 static const sd_json_dispatch_field table
[] = {
1351 { "major", SD_JSON_VARIANT_UNSIGNED
, oci_device_major
, offsetof(struct device_data
, major
), SD_JSON_MANDATORY
},
1352 { "minor", SD_JSON_VARIANT_UNSIGNED
, oci_device_minor
, offsetof(struct device_data
, minor
), SD_JSON_MANDATORY
},
1353 { "weight", SD_JSON_VARIANT_UNSIGNED
, sd_json_dispatch_uint64
, offsetof(struct device_data
, weight
), 0 },
1354 { "leafWeight", SD_JSON_VARIANT_INTEGER
, oci_unsupported
, 0, SD_JSON_PERMISSIVE
},
1358 _cleanup_free_
char *path
= NULL
;
1360 r
= oci_dispatch(e
, table
, flags
, &data
);
1364 if (data
.weight
== UINT64_MAX
)
1367 if (data
.weight
< CGROUP_BLKIO_WEIGHT_MIN
|| data
.weight
> CGROUP_BLKIO_WEIGHT_MAX
)
1368 return json_log(v
, flags
, SYNTHETIC_ERRNO(ERANGE
),
1369 "Block I/O device weight out of range.");
1371 r
= device_path_make_major_minor(S_IFBLK
, makedev(data
.major
, data
.minor
), &path
);
1373 return json_log(v
, flags
, r
, "Failed to build device path: %m");
1375 r
= settings_allocate_properties(s
);
1379 r
= sd_bus_message_append(s
->properties
, "(sv)", "IODeviceWeight", "a(st)", 1,
1380 path
, cgroup_weight_blkio_to_io(data
.weight
));
1382 return bus_log_create_error(r
);
1388 static int oci_cgroup_block_io_throttle(const char *name
, sd_json_variant
*v
, sd_json_dispatch_flags_t flags
, void *userdata
) {
1389 Settings
*s
= ASSERT_PTR(userdata
);
1394 pname
= streq(name
, "throttleReadBpsDevice") ? "IOReadBandwidthMax" :
1395 streq(name
, "throttleWriteBpsDevice") ? "IOWriteBandwidthMax" :
1396 streq(name
, "throttleReadIOPSDevice") ? "IOReadIOPSMax" :
1399 JSON_VARIANT_ARRAY_FOREACH(e
, v
) {
1400 struct device_data
{
1409 static const sd_json_dispatch_field table
[] = {
1410 { "major", SD_JSON_VARIANT_UNSIGNED
, oci_device_major
, offsetof(struct device_data
, major
), SD_JSON_MANDATORY
},
1411 { "minor", SD_JSON_VARIANT_UNSIGNED
, oci_device_minor
, offsetof(struct device_data
, minor
), SD_JSON_MANDATORY
},
1412 { "rate", SD_JSON_VARIANT_UNSIGNED
, sd_json_dispatch_uint64
, offsetof(struct device_data
, rate
), SD_JSON_MANDATORY
},
1416 _cleanup_free_
char *path
= NULL
;
1418 r
= oci_dispatch(e
, table
, flags
, &data
);
1422 if (data
.rate
>= UINT64_MAX
)
1423 return json_log(v
, flags
, SYNTHETIC_ERRNO(ERANGE
),
1424 "Block I/O device rate out of range.");
1426 r
= device_path_make_major_minor(S_IFBLK
, makedev(data
.major
, data
.minor
), &path
);
1428 return json_log(v
, flags
, r
, "Failed to build device path: %m");
1430 r
= settings_allocate_properties(s
);
1434 r
= sd_bus_message_append(s
->properties
, "(sv)", pname
, "a(st)", 1, path
, (uint64_t) data
.rate
);
1436 return bus_log_create_error(r
);
1442 static int oci_cgroup_block_io(const char *name
, sd_json_variant
*v
, sd_json_dispatch_flags_t flags
, void *userdata
) {
1444 static const sd_json_dispatch_field table
[] = {
1445 { "weight", SD_JSON_VARIANT_UNSIGNED
, oci_cgroup_block_io_weight
, 0, 0 },
1446 { "leafWeight", SD_JSON_VARIANT_UNSIGNED
, oci_unsupported
, 0, SD_JSON_PERMISSIVE
},
1447 { "weightDevice", SD_JSON_VARIANT_ARRAY
, oci_cgroup_block_io_weight_device
, 0, 0 },
1448 { "throttleReadBpsDevice", SD_JSON_VARIANT_ARRAY
, oci_cgroup_block_io_throttle
, 0, 0 },
1449 { "throttleWriteBpsDevice", SD_JSON_VARIANT_ARRAY
, oci_cgroup_block_io_throttle
, 0, 0 },
1450 { "throttleReadIOPSDevice", SD_JSON_VARIANT_ARRAY
, oci_cgroup_block_io_throttle
, 0, 0 },
1451 { "throttleWriteIOPSDevice", SD_JSON_VARIANT_ARRAY
, oci_cgroup_block_io_throttle
, 0, 0 },
1455 return oci_dispatch(v
, table
, flags
, userdata
);
1458 static int oci_cgroup_pids(const char *name
, sd_json_variant
*v
, sd_json_dispatch_flags_t flags
, void *userdata
) {
1460 static const sd_json_dispatch_field table
[] = {
1461 { "limit", SD_JSON_VARIANT_NUMBER
, sd_json_dispatch_variant
, 0, SD_JSON_MANDATORY
},
1465 _cleanup_(sd_json_variant_unrefp
) sd_json_variant
*k
= NULL
;
1466 Settings
*s
= ASSERT_PTR(userdata
);
1470 r
= oci_dispatch(v
, table
, flags
, &k
);
1474 if (sd_json_variant_is_negative(k
))
1477 if (!sd_json_variant_is_unsigned(k
))
1478 return json_log(k
, flags
, SYNTHETIC_ERRNO(EINVAL
),
1479 "pids limit not unsigned integer, refusing.");
1481 m
= (uint64_t) sd_json_variant_unsigned(k
);
1483 if ((uint64_t) m
!= sd_json_variant_unsigned(k
))
1484 return json_log(v
, flags
, SYNTHETIC_ERRNO(EINVAL
),
1485 "pids limit out of range, refusing.");
1488 r
= settings_allocate_properties(s
);
1492 r
= sd_bus_message_append(s
->properties
, "(sv)", "TasksMax", "t", m
);
1494 return bus_log_create_error(r
);
1499 static int oci_resources(const char *name
, sd_json_variant
*v
, sd_json_dispatch_flags_t flags
, void *userdata
) {
1501 static const sd_json_dispatch_field table
[] = {
1502 { "devices", SD_JSON_VARIANT_ARRAY
, oci_cgroup_devices
, 0, 0 },
1503 { "memory", SD_JSON_VARIANT_OBJECT
, oci_cgroup_memory
, 0, 0 },
1504 { "cpu", SD_JSON_VARIANT_OBJECT
, oci_cgroup_cpu
, 0, 0 },
1505 { "blockIO", SD_JSON_VARIANT_OBJECT
, oci_cgroup_block_io
, 0, 0 },
1506 { "hugepageLimits", SD_JSON_VARIANT_ARRAY
, oci_unsupported
, 0, 0 },
1507 { "network", SD_JSON_VARIANT_OBJECT
, oci_unsupported
, 0, 0 },
1508 { "pids", SD_JSON_VARIANT_OBJECT
, oci_cgroup_pids
, 0, 0 },
1509 { "rdma", SD_JSON_VARIANT_OBJECT
, oci_unsupported
, 0, 0 },
1513 return oci_dispatch(v
, table
, flags
, userdata
);
1516 static bool sysctl_key_valid(const char *s
) {
1519 /* Note that we are a bit stricter here than in systemd-sysctl, as that inherited semantics from the old sysctl
1520 * tool, which were really weird (as it swaps / and . in both ways) */
1527 if (*s
<= ' ' || *s
>= 127)
1533 if (dot
) /* Don't allow two dots next to each other (or at the beginning) */
1541 if (dot
) /* don't allow a dot at the end */
1547 static int oci_sysctl(const char *name
, sd_json_variant
*v
, sd_json_dispatch_flags_t flags
, void *userdata
) {
1548 Settings
*s
= ASSERT_PTR(userdata
);
1553 JSON_VARIANT_OBJECT_FOREACH(k
, w
, v
) {
1556 if (!sd_json_variant_is_string(w
))
1557 return json_log(v
, flags
, SYNTHETIC_ERRNO(EINVAL
),
1558 "sysctl parameter is not a string, refusing.");
1560 assert_se(m
= sd_json_variant_string(w
));
1562 if (!sysctl_key_valid(k
))
1563 return json_log(v
, flags
, SYNTHETIC_ERRNO(EINVAL
),
1564 "sysctl key invalid, refusing: %s", k
);
1566 r
= strv_extend_many(&s
->sysctl
, k
, m
);
1575 static int oci_seccomp_action_from_string(const char *name
, uint32_t *ret
) {
1577 static const struct {
1581 { "SCMP_ACT_ALLOW", SCMP_ACT_ALLOW
},
1582 { "SCMP_ACT_ERRNO", SCMP_ACT_ERRNO(EPERM
) }, /* the OCI spec doesn't document the error, but it appears EPERM is supposed to be used */
1583 { "SCMP_ACT_KILL", SCMP_ACT_KILL
},
1584 #ifdef SCMP_ACT_KILL_PROCESS
1585 { "SCMP_ACT_KILL_PROCESS", SCMP_ACT_KILL_PROCESS
},
1587 #ifdef SCMP_ACT_KILL_THREAD
1588 { "SCMP_ACT_KILL_THREAD", SCMP_ACT_KILL_THREAD
},
1591 { "SCMP_ACT_LOG", SCMP_ACT_LOG
},
1593 { "SCMP_ACT_TRAP", SCMP_ACT_TRAP
},
1595 /* We don't support SCMP_ACT_TRACE because that requires a tracer, and that doesn't really make sense
1599 FOREACH_ELEMENT(i
, table
)
1600 if (streq_ptr(name
, i
->name
)) {
1608 static int oci_seccomp_arch_from_string(const char *name
, uint32_t *ret
) {
1610 static const struct {
1614 { "SCMP_ARCH_AARCH64", SCMP_ARCH_AARCH64
},
1615 { "SCMP_ARCH_ARM", SCMP_ARCH_ARM
},
1616 #ifdef SCMP_ARCH_LOONGARCH64
1617 { "SCMP_ARCH_LOONGARCH64", SCMP_ARCH_LOONGARCH64
},
1619 { "SCMP_ARCH_MIPS", SCMP_ARCH_MIPS
},
1620 { "SCMP_ARCH_MIPS64", SCMP_ARCH_MIPS64
},
1621 { "SCMP_ARCH_MIPS64N32", SCMP_ARCH_MIPS64N32
},
1622 { "SCMP_ARCH_MIPSEL", SCMP_ARCH_MIPSEL
},
1623 { "SCMP_ARCH_MIPSEL64", SCMP_ARCH_MIPSEL64
},
1624 { "SCMP_ARCH_MIPSEL64N32", SCMP_ARCH_MIPSEL64N32
},
1625 { "SCMP_ARCH_NATIVE", SCMP_ARCH_NATIVE
},
1626 #ifdef SCMP_ARCH_PARISC
1627 { "SCMP_ARCH_PARISC", SCMP_ARCH_PARISC
},
1629 #ifdef SCMP_ARCH_PARISC64
1630 { "SCMP_ARCH_PARISC64", SCMP_ARCH_PARISC64
},
1632 { "SCMP_ARCH_PPC", SCMP_ARCH_PPC
},
1633 { "SCMP_ARCH_PPC64", SCMP_ARCH_PPC64
},
1634 { "SCMP_ARCH_PPC64LE", SCMP_ARCH_PPC64LE
},
1635 #ifdef SCMP_ARCH_RISCV64
1636 { "SCMP_ARCH_RISCV64", SCMP_ARCH_RISCV64
},
1638 { "SCMP_ARCH_S390", SCMP_ARCH_S390
},
1639 { "SCMP_ARCH_S390X", SCMP_ARCH_S390X
},
1640 { "SCMP_ARCH_X32", SCMP_ARCH_X32
},
1641 { "SCMP_ARCH_X86", SCMP_ARCH_X86
},
1642 { "SCMP_ARCH_X86_64", SCMP_ARCH_X86_64
},
1645 FOREACH_ELEMENT(i
, table
)
1646 if (streq_ptr(i
->name
, name
)) {
1654 static int oci_seccomp_compare_from_string(const char *name
, enum scmp_compare
*ret
) {
1656 static const struct {
1658 enum scmp_compare op
;
1660 { "SCMP_CMP_NE", SCMP_CMP_NE
},
1661 { "SCMP_CMP_LT", SCMP_CMP_LT
},
1662 { "SCMP_CMP_LE", SCMP_CMP_LE
},
1663 { "SCMP_CMP_EQ", SCMP_CMP_EQ
},
1664 { "SCMP_CMP_GE", SCMP_CMP_GE
},
1665 { "SCMP_CMP_GT", SCMP_CMP_GT
},
1666 { "SCMP_CMP_MASKED_EQ", SCMP_CMP_MASKED_EQ
},
1669 FOREACH_ELEMENT(i
, table
)
1670 if (streq_ptr(i
->name
, name
)) {
1678 static int oci_seccomp_archs(const char *name
, sd_json_variant
*v
, sd_json_dispatch_flags_t flags
, void *userdata
) {
1679 scmp_filter_ctx
*sc
= ASSERT_PTR(userdata
);
1683 JSON_VARIANT_ARRAY_FOREACH(e
, v
) {
1686 if (!sd_json_variant_is_string(e
))
1687 return json_log(e
, flags
, SYNTHETIC_ERRNO(EINVAL
),
1688 "Architecture entry is not a string.");
1690 r
= oci_seccomp_arch_from_string(sd_json_variant_string(e
), &a
);
1692 return json_log(e
, flags
, r
, "Unknown architecture: %s", sd_json_variant_string(e
));
1694 r
= seccomp_arch_add(sc
, a
);
1698 return json_log(e
, flags
, r
, "Failed to add architecture to seccomp filter: %m");
1704 struct syscall_rule
{
1707 struct scmp_arg_cmp
*arguments
;
1711 static void syscall_rule_done(struct syscall_rule
*rule
) {
1714 strv_free(rule
->names
);
1715 free(rule
->arguments
);
1718 static int oci_seccomp_action(const char *name
, sd_json_variant
*v
, sd_json_dispatch_flags_t flags
, void *userdata
) {
1719 uint32_t *action
= ASSERT_PTR(userdata
);
1722 r
= oci_seccomp_action_from_string(sd_json_variant_string(v
), action
);
1724 return json_log(v
, flags
, r
, "Unknown system call action '%s': %m", sd_json_variant_string(v
));
1729 static int oci_seccomp_op(const char *name
, sd_json_variant
*v
, sd_json_dispatch_flags_t flags
, void *userdata
) {
1730 enum scmp_compare
*op
= ASSERT_PTR(userdata
);
1733 r
= oci_seccomp_compare_from_string(sd_json_variant_string(v
), op
);
1735 return json_log(v
, flags
, r
, "Unknown seccomp operator '%s': %m", sd_json_variant_string(v
));
1740 static int oci_seccomp_args(const char *name
, sd_json_variant
*v
, sd_json_dispatch_flags_t flags
, void *userdata
) {
1741 struct syscall_rule
*rule
= ASSERT_PTR(userdata
);
1745 JSON_VARIANT_ARRAY_FOREACH(e
, v
) {
1746 static const sd_json_dispatch_field table
[] = {
1747 { "index", SD_JSON_VARIANT_UNSIGNED
, sd_json_dispatch_uint32
, offsetof(struct scmp_arg_cmp
, arg
), SD_JSON_MANDATORY
},
1748 { "value", SD_JSON_VARIANT_UNSIGNED
, sd_json_dispatch_uint64
, offsetof(struct scmp_arg_cmp
, datum_a
), SD_JSON_MANDATORY
},
1749 { "valueTwo", SD_JSON_VARIANT_UNSIGNED
, sd_json_dispatch_uint64
, offsetof(struct scmp_arg_cmp
, datum_b
), 0 },
1750 { "op", SD_JSON_VARIANT_STRING
, oci_seccomp_op
, offsetof(struct scmp_arg_cmp
, op
), SD_JSON_MANDATORY
},
1754 struct scmp_arg_cmp
*p
;
1757 if (!GREEDY_REALLOC(rule
->arguments
, rule
->n_arguments
+ 1))
1760 p
= rule
->arguments
+ rule
->n_arguments
;
1762 *p
= (struct scmp_arg_cmp
) {
1769 r
= oci_dispatch(e
, table
, flags
, p
);
1773 expected
= p
->op
== SCMP_CMP_MASKED_EQ
? 4 : 3;
1775 json_log(e
, flags
|SD_JSON_WARNING
, 0, "Wrong number of system call arguments for JSON data, ignoring.");
1777 /* Note that we are a bit sloppy here and do not insist that SCMP_CMP_MASKED_EQ gets two datum values,
1778 * and the other only one. That's because buildah for example by default calls things with
1779 * SCMP_CMP_MASKED_EQ but only one argument. We use 0 when the value is not specified. */
1781 rule
->n_arguments
++;
1787 static int oci_seccomp_syscalls(const char *name
, sd_json_variant
*v
, sd_json_dispatch_flags_t flags
, void *userdata
) {
1788 scmp_filter_ctx
*sc
= ASSERT_PTR(userdata
);
1792 JSON_VARIANT_ARRAY_FOREACH(e
, v
) {
1793 static const sd_json_dispatch_field table
[] = {
1794 { "names", SD_JSON_VARIANT_ARRAY
, sd_json_dispatch_strv
, offsetof(struct syscall_rule
, names
), SD_JSON_MANDATORY
},
1795 { "action", SD_JSON_VARIANT_STRING
, oci_seccomp_action
, offsetof(struct syscall_rule
, action
), SD_JSON_MANDATORY
},
1796 { "args", SD_JSON_VARIANT_ARRAY
, oci_seccomp_args
, 0, 0 },
1799 _cleanup_(syscall_rule_done
) struct syscall_rule rule
= {
1800 .action
= UINT32_MAX
,
1803 r
= oci_dispatch(e
, table
, flags
, &rule
);
1807 if (strv_isempty(rule
.names
))
1808 return json_log(e
, flags
, SYNTHETIC_ERRNO(EINVAL
), "System call name list is empty.");
1810 STRV_FOREACH(i
, rule
.names
) {
1813 nr
= seccomp_syscall_resolve_name(*i
);
1814 if (nr
== __NR_SCMP_ERROR
) {
1815 log_debug("Unknown syscall %s, skipping.", *i
);
1819 r
= seccomp_rule_add_array(sc
, rule
.action
, nr
, rule
.n_arguments
, rule
.arguments
);
1829 static int oci_seccomp(const char *name
, sd_json_variant
*v
, sd_json_dispatch_flags_t flags
, void *userdata
) {
1832 static const sd_json_dispatch_field table
[] = {
1833 { "defaultAction", SD_JSON_VARIANT_STRING
, NULL
, 0, SD_JSON_MANDATORY
},
1834 { "architectures", SD_JSON_VARIANT_ARRAY
, oci_seccomp_archs
, 0, 0 },
1835 { "syscalls", SD_JSON_VARIANT_ARRAY
, oci_seccomp_syscalls
, 0, 0 },
1839 _cleanup_(seccomp_releasep
) scmp_filter_ctx sc
= NULL
;
1840 Settings
*s
= ASSERT_PTR(userdata
);
1841 sd_json_variant
*def
;
1845 def
= sd_json_variant_by_key(v
, "defaultAction");
1847 return json_log(v
, flags
, SYNTHETIC_ERRNO(EINVAL
), "defaultAction element missing.");
1849 if (!sd_json_variant_is_string(def
))
1850 return json_log(def
, flags
, SYNTHETIC_ERRNO(EINVAL
), "defaultAction is not a string.");
1852 r
= oci_seccomp_action_from_string(sd_json_variant_string(def
), &d
);
1854 return json_log(def
, flags
, r
, "Unknown default action: %s", sd_json_variant_string(def
));
1856 sc
= seccomp_init(d
);
1858 return json_log(v
, flags
, SYNTHETIC_ERRNO(ENOMEM
), "Couldn't allocate seccomp object.");
1860 r
= oci_dispatch(v
, table
, flags
, sc
);
1864 seccomp_release(s
->seccomp
);
1865 s
->seccomp
= TAKE_PTR(sc
);
1868 return json_log(v
, flags
, SYNTHETIC_ERRNO(EOPNOTSUPP
), "libseccomp support not enabled, can't parse seccomp object.");
1872 static int oci_rootfs_propagation(const char *name
, sd_json_variant
*v
, sd_json_dispatch_flags_t flags
, void *userdata
) {
1875 s
= sd_json_variant_string(v
);
1877 if (streq(s
, "shared"))
1880 json_log(v
, flags
|SD_JSON_DEBUG
, 0, "Ignoring rootfsPropagation setting '%s'.", s
);
1884 static int oci_masked_paths(const char *name
, sd_json_variant
*v
, sd_json_dispatch_flags_t flags
, void *userdata
) {
1885 Settings
*s
= ASSERT_PTR(userdata
);
1888 JSON_VARIANT_ARRAY_FOREACH(e
, v
) {
1889 _cleanup_free_
char *destination
= NULL
;
1893 if (!sd_json_variant_is_string(e
))
1894 return json_log(v
, flags
, SYNTHETIC_ERRNO(EINVAL
),
1895 "Path is not a string, refusing.");
1897 assert_se(p
= sd_json_variant_string(e
));
1899 if (!path_is_absolute(p
))
1900 return json_log(v
, flags
, SYNTHETIC_ERRNO(EINVAL
),
1901 "Path is not absolute, refusing: %s", p
);
1903 if (oci_exclude_mount(p
))
1906 destination
= strdup(p
);
1910 m
= custom_mount_add(&s
->custom_mounts
, &s
->n_custom_mounts
, CUSTOM_MOUNT_INACCESSIBLE
);
1914 m
->destination
= TAKE_PTR(destination
);
1916 /* The spec doesn't say this, but apparently pre-existing implementations are lenient towards
1917 * non-existing paths to mask. Let's hence be too. */
1924 static int oci_readonly_paths(const char *name
, sd_json_variant
*v
, sd_json_dispatch_flags_t flags
, void *userdata
) {
1925 Settings
*s
= ASSERT_PTR(userdata
);
1928 JSON_VARIANT_ARRAY_FOREACH(e
, v
) {
1929 _cleanup_free_
char *source
= NULL
, *destination
= NULL
;
1933 if (!sd_json_variant_is_string(e
))
1934 return json_log(v
, flags
, SYNTHETIC_ERRNO(EINVAL
),
1935 "Path is not a string, refusing.");
1937 assert_se(p
= sd_json_variant_string(e
));
1939 if (!path_is_absolute(p
))
1940 return json_log(v
, flags
, SYNTHETIC_ERRNO(EINVAL
),
1941 "Path is not absolute, refusing: %s", p
);
1943 if (oci_exclude_mount(p
))
1946 source
= strjoin("+", p
);
1950 destination
= strdup(p
);
1954 m
= custom_mount_add(&s
->custom_mounts
, &s
->n_custom_mounts
, CUSTOM_MOUNT_BIND
);
1958 m
->source
= TAKE_PTR(source
);
1959 m
->destination
= TAKE_PTR(destination
);
1960 m
->read_only
= true;
1966 static int oci_linux(const char *name
, sd_json_variant
*v
, sd_json_dispatch_flags_t flags
, void *userdata
) {
1968 static const sd_json_dispatch_field table
[] = {
1969 { "namespaces", SD_JSON_VARIANT_ARRAY
, oci_namespaces
, 0, 0 },
1970 { "uidMappings", SD_JSON_VARIANT_ARRAY
, oci_uid_gid_mappings
, 0, 0 },
1971 { "gidMappings", SD_JSON_VARIANT_ARRAY
, oci_uid_gid_mappings
, 0, 0 },
1972 { "devices", SD_JSON_VARIANT_ARRAY
, oci_devices
, 0, 0 },
1973 { "cgroupsPath", SD_JSON_VARIANT_STRING
, oci_cgroups_path
, 0, 0 },
1974 { "resources", SD_JSON_VARIANT_OBJECT
, oci_resources
, 0, 0 },
1975 { "intelRdt", SD_JSON_VARIANT_OBJECT
, oci_unsupported
, 0, SD_JSON_PERMISSIVE
},
1976 { "sysctl", SD_JSON_VARIANT_OBJECT
, oci_sysctl
, 0, 0 },
1977 { "seccomp", SD_JSON_VARIANT_OBJECT
, oci_seccomp
, 0, 0 },
1978 { "rootfsPropagation", SD_JSON_VARIANT_STRING
, oci_rootfs_propagation
, 0, 0 },
1979 { "maskedPaths", SD_JSON_VARIANT_ARRAY
, oci_masked_paths
, 0, 0 },
1980 { "readonlyPaths", SD_JSON_VARIANT_ARRAY
, oci_readonly_paths
, 0, 0 },
1981 { "mountLabel", SD_JSON_VARIANT_STRING
, oci_unsupported
, 0, SD_JSON_PERMISSIVE
},
1985 return oci_dispatch(v
, table
, flags
, userdata
);
1988 static int oci_hook_timeout(const char *name
, sd_json_variant
*v
, sd_json_dispatch_flags_t flags
, void *userdata
) {
1989 usec_t
*u
= ASSERT_PTR(userdata
);
1992 k
= sd_json_variant_unsigned(v
);
1993 if (k
== 0 || k
> (UINT64_MAX
-1)/USEC_PER_SEC
)
1994 return json_log(v
, flags
, SYNTHETIC_ERRNO(ERANGE
),
1995 "Hook timeout value out of range.");
1997 *u
= k
* USEC_PER_SEC
;
2001 static int oci_hooks_array(const char *name
, sd_json_variant
*v
, sd_json_dispatch_flags_t flags
, void *userdata
) {
2002 Settings
*s
= ASSERT_PTR(userdata
);
2006 JSON_VARIANT_ARRAY_FOREACH(e
, v
) {
2008 static const sd_json_dispatch_field table
[] = {
2009 { "path", SD_JSON_VARIANT_STRING
, json_dispatch_path
, offsetof(OciHook
, path
), SD_JSON_MANDATORY
},
2010 { "args", SD_JSON_VARIANT_ARRAY
, oci_args
, offsetof(OciHook
, args
), 0, },
2011 { "env", SD_JSON_VARIANT_ARRAY
, oci_env
, offsetof(OciHook
, env
), 0 },
2012 { "timeout", SD_JSON_VARIANT_UNSIGNED
, oci_hook_timeout
, offsetof(OciHook
, timeout
), 0 },
2016 OciHook
**array
, *new_item
;
2019 if (streq(name
, "prestart")) {
2020 array
= &s
->oci_hooks_prestart
;
2021 n_array
= &s
->n_oci_hooks_prestart
;
2022 } else if (streq(name
, "poststart")) {
2023 array
= &s
->oci_hooks_poststart
;
2024 n_array
= &s
->n_oci_hooks_poststart
;
2026 assert(streq(name
, "poststop"));
2027 array
= &s
->oci_hooks_poststop
;
2028 n_array
= &s
->n_oci_hooks_poststop
;
2031 if (!GREEDY_REALLOC(*array
, *n_array
+ 1))
2034 new_item
= *array
+ *n_array
;
2036 *new_item
= (OciHook
) {
2037 .timeout
= USEC_INFINITY
,
2040 r
= oci_dispatch(e
, table
, flags
, new_item
);
2042 free(new_item
->path
);
2043 strv_free(new_item
->args
);
2044 strv_free(new_item
->env
);
2054 static int oci_hooks(const char *name
, sd_json_variant
*v
, sd_json_dispatch_flags_t flags
, void *userdata
) {
2056 static const sd_json_dispatch_field table
[] = {
2057 { "prestart", SD_JSON_VARIANT_ARRAY
, oci_hooks_array
, 0, 0 },
2058 { "poststart", SD_JSON_VARIANT_ARRAY
, oci_hooks_array
, 0, 0 },
2059 { "poststop", SD_JSON_VARIANT_ARRAY
, oci_hooks_array
, 0, 0 },
2063 return oci_dispatch(v
, table
, flags
, userdata
);
2066 static int oci_annotations(const char *name
, sd_json_variant
*v
, sd_json_dispatch_flags_t flags
, void *userdata
) {
2070 JSON_VARIANT_OBJECT_FOREACH(k
, w
, v
) {
2073 return json_log(v
, flags
, SYNTHETIC_ERRNO(EINVAL
),
2074 "Annotation with empty key, refusing.");
2076 if (!sd_json_variant_is_string(w
))
2077 return json_log(w
, flags
, SYNTHETIC_ERRNO(EINVAL
),
2078 "Annotation has non-string value, refusing.");
2080 json_log(w
, flags
|SD_JSON_DEBUG
, 0, "Ignoring annotation '%s' with value '%s'.", k
, sd_json_variant_string(w
));
2086 int oci_load(FILE *f
, const char *bundle
, Settings
**ret
) {
2088 static const sd_json_dispatch_field table
[] = {
2089 { "ociVersion", SD_JSON_VARIANT_STRING
, NULL
, 0, SD_JSON_MANDATORY
},
2090 { "process", SD_JSON_VARIANT_OBJECT
, oci_process
, 0, 0 },
2091 { "root", SD_JSON_VARIANT_OBJECT
, oci_root
, 0, 0 },
2092 { "hostname", SD_JSON_VARIANT_STRING
, oci_hostname
, 0, 0 },
2093 { "mounts", SD_JSON_VARIANT_ARRAY
, oci_mounts
, 0, 0 },
2094 { "linux", SD_JSON_VARIANT_OBJECT
, oci_linux
, 0, 0 },
2095 { "hooks", SD_JSON_VARIANT_OBJECT
, oci_hooks
, 0, 0 },
2096 { "annotations", SD_JSON_VARIANT_OBJECT
, oci_annotations
, 0, 0 },
2100 _cleanup_(sd_json_variant_unrefp
) sd_json_variant
*oci
= NULL
;
2101 _cleanup_(settings_freep
) Settings
*s
= NULL
;
2102 unsigned line
= 0, column
= 0;
2109 path
= strjoina(bundle
, "/config.json");
2111 r
= sd_json_parse_file(f
, path
, 0, &oci
, &line
, &column
);
2113 if (line
!= 0 && column
!= 0)
2114 return log_error_errno(r
, "Failed to parse '%s' at %u:%u: %m", path
, line
, column
);
2116 return log_error_errno(r
, "Failed to parse '%s': %m", path
);
2119 v
= sd_json_variant_by_key(oci
, "ociVersion");
2121 return log_error_errno(SYNTHETIC_ERRNO(EINVAL
),
2122 "JSON file '%s' is not an OCI bundle configuration file. Refusing.",
2124 if (!streq_ptr(sd_json_variant_string(v
), "1.0.0"))
2125 return log_error_errno(SYNTHETIC_ERRNO(EINVAL
),
2126 "OCI bundle version not supported: %s",
2127 strna(sd_json_variant_string(v
)));
2130 // _cleanup_free_ char *formatted = NULL;
2131 // assert_se(json_variant_format(oci, SD_JSON_FORMAT_PRETTY|JSON_FORMAT_COLOR, &formatted) >= 0);
2132 // fputs(formatted, stdout);
2139 s
->start_mode
= START_PID1
;
2140 s
->resolv_conf
= RESOLV_CONF_OFF
;
2141 s
->link_journal
= LINK_NO
;
2142 s
->timezone
= TIMEZONE_OFF
;
2144 s
->bundle
= strdup(bundle
);
2148 r
= oci_dispatch(oci
, table
, 0, s
);
2152 if (s
->properties
) {
2153 r
= sd_bus_message_seal(s
->properties
, 0, 0);
2155 return log_error_errno(r
, "Cannot seal properties bus message: %m");