✓ SystemCallFilter=
✓ SystemCallArchitectures=
✓ SystemCallErrorNumber=
+✓ SystemCallLog=
✓ MemoryDenyWriteExecute=
✓ RestrictNamespaces=
✓ RestrictRealtime=
details.</para></listitem>
</varlistentry>
+ <varlistentry>
+ <term><varname>SystemCallLog=</varname></term>
+
+ <listitem><para>Takes a space-separated list of system call names. If this setting is used, all
+ system calls executed by the unit processes for the listed ones will be logged. If the first
+ character of the list is <literal>~</literal>, the effect is inverted: all system calls except the
+ listed system calls will be logged. If running in user mode, or in system mode, but without the
+ <constant>CAP_SYS_ADMIN</constant> capability (e.g. setting <varname>User=nobody</varname>),
+ <varname>NoNewPrivileges=yes</varname> is implied. This feature makes use of the Secure Computing
+ Mode 2 interfaces of the kernel ('seccomp filtering') and is useful for auditing or setting up a
+ minimal sandboxing environment. This option may be specified more than once, in which case the filter
+ masks are merged. If the empty string is assigned, the filter is reset, all prior assignments will
+ have no effect. This does not affect commands prefixed with <literal>+</literal>.</para></listitem>
+ </varlistentry>
+
</variablelist>
</refsect1>
return sd_bus_message_close_container(reply);
}
+static int property_get_syscall_log(
+ sd_bus *bus,
+ const char *path,
+ const char *interface,
+ const char *property,
+ sd_bus_message *reply,
+ void *userdata,
+ sd_bus_error *error) {
+
+ ExecContext *c = userdata;
+ _cleanup_strv_free_ char **l = NULL;
+ int r;
+
+#if HAVE_SECCOMP
+ void *id, *val;
+#endif
+
+ assert(bus);
+ assert(reply);
+ assert(c);
+
+ r = sd_bus_message_open_container(reply, 'r', "bas");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_append(reply, "b", c->syscall_log_allow_list);
+ if (r < 0)
+ return r;
+
+#if HAVE_SECCOMP
+ HASHMAP_FOREACH_KEY(val, id, c->syscall_log) {
+ char *name = NULL;
+
+ name = seccomp_syscall_resolve_num_arch(SCMP_ARCH_NATIVE, PTR_TO_INT(id) - 1);
+ if (!name)
+ continue;
+
+ r = strv_consume(&l, name);
+ if (r < 0)
+ return r;
+ }
+#endif
+
+ strv_sort(l);
+
+ r = sd_bus_message_append_strv(reply, l);
+ if (r < 0)
+ return r;
+
+ return sd_bus_message_close_container(reply);
+}
+
static int property_get_syscall_archs(
sd_bus *bus,
const char *path,
SD_BUS_PROPERTY("SystemCallFilter", "(bas)", property_get_syscall_filter, 0, SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("SystemCallArchitectures", "as", property_get_syscall_archs, 0, SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("SystemCallErrorNumber", "i", bus_property_get_int, offsetof(ExecContext, syscall_errno), SD_BUS_VTABLE_PROPERTY_CONST),
+ SD_BUS_PROPERTY("SystemCallLog", "(bas)", property_get_syscall_log, 0, SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("Personality", "s", property_get_personality, offsetof(ExecContext, personality), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("LockPersonality", "b", bus_property_get_bool, offsetof(ExecContext, lock_personality), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("RestrictAddressFamilies", "(bas)", property_get_address_families, 0, SD_BUS_VTABLE_PROPERTY_CONST),
return 1;
+ } else if (streq(name, "SystemCallLog")) {
+ int allow_list;
+ _cleanup_strv_free_ char **l = NULL;
+
+ r = sd_bus_message_enter_container(message, 'r', "bas");
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read(message, "b", &allow_list);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_read_strv(message, &l);
+ if (r < 0)
+ return r;
+
+ r = sd_bus_message_exit_container(message);
+ if (r < 0)
+ return r;
+
+ if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
+ _cleanup_free_ char *joined = NULL;
+ SeccompParseFlags invert_flag = allow_list ? 0 : SECCOMP_PARSE_INVERT;
+ char **s;
+
+ if (strv_isempty(l)) {
+ c->syscall_log_allow_list = false;
+ c->syscall_log = hashmap_free(c->syscall_log);
+
+ unit_write_settingf(u, flags, name, "SystemCallLog=");
+ return 1;
+ }
+
+ if (!c->syscall_log) {
+ c->syscall_log = hashmap_new(NULL);
+ if (!c->syscall_log)
+ return log_oom();
+
+ c->syscall_log_allow_list = allow_list;
+ }
+
+ STRV_FOREACH(s, l) {
+ _cleanup_free_ char *n = NULL;
+ int e;
+
+ r = parse_syscall_and_errno(*s, &n, &e);
+ if (r < 0)
+ return r;
+
+ r = seccomp_parse_syscall_filter(n,
+ 0, /* errno not used */
+ c->syscall_log,
+ SECCOMP_PARSE_LOG | SECCOMP_PARSE_PERMISSIVE |
+ invert_flag |
+ (c->syscall_log_allow_list ? SECCOMP_PARSE_ALLOW_LIST : 0),
+ u->id,
+ NULL, 0);
+ if (r < 0)
+ return r;
+ }
+
+ joined = strv_join(l, " ");
+ if (!joined)
+ return -ENOMEM;
+
+ unit_write_settingf(u, flags, name, "SystemCallLog=%s%s", allow_list ? "" : "~", joined);
+ }
+
+ return 1;
+
} else if (streq(name, "SystemCallArchitectures")) {
_cleanup_strv_free_ char **l = NULL;
!hashmap_isempty(c->syscall_filter);
}
+static bool context_has_syscall_logs(const ExecContext *c) {
+ assert(c);
+
+ return c->syscall_log_allow_list ||
+ !hashmap_isempty(c->syscall_log);
+}
+
static bool context_has_no_new_privileges(const ExecContext *c) {
assert(c);
c->protect_kernel_logs ||
c->private_devices ||
context_has_syscall_filters(c) ||
+ context_has_syscall_logs(c) ||
!set_isempty(c->syscall_archs) ||
c->lock_personality ||
c->protect_hostname;
return seccomp_load_syscall_filter_set_raw(default_action, c->syscall_filter, action, false);
}
+static int apply_syscall_log(const Unit* u, const ExecContext *c) {
+#ifdef SCMP_ACT_LOG
+ uint32_t default_action, action;
+#endif
+
+ assert(u);
+ assert(c);
+
+ if (!context_has_syscall_logs(c))
+ return 0;
+
+#ifdef SCMP_ACT_LOG
+ if (skip_seccomp_unavailable(u, "SystemCallLog="))
+ return 0;
+
+ if (c->syscall_log_allow_list) {
+ /* Log nothing but the ones listed */
+ default_action = SCMP_ACT_ALLOW;
+ action = SCMP_ACT_LOG;
+ } else {
+ /* Log everything but the ones listed */
+ default_action = SCMP_ACT_LOG;
+ action = SCMP_ACT_ALLOW;
+ }
+
+ return seccomp_load_syscall_filter_set_raw(default_action, c->syscall_log, action, false);
+#else
+ /* old libseccomp */
+ log_unit_debug(u, "SECCOMP feature SCMP_ACT_LOG not available, skipping SystemCallLog=");
+ return 0;
+#endif
+}
+
static int apply_syscall_archs(const Unit *u, const ExecContext *c) {
assert(u);
assert(c);
return log_unit_error_errno(unit, r, "Failed to lock personalities: %m");
}
+ r = apply_syscall_log(unit, context);
+ if (r < 0) {
+ *exit_status = EXIT_SECCOMP;
+ return log_unit_error_errno(unit, r, "Failed to apply system call log filters: %m");
+ }
+
/* This really should remain the last step before the execve(), to make sure our own code is unaffected
* by the filter as little as possible. */
r = apply_syscall_filter(unit, context, needs_ambient_hack);
int syscall_errno;
bool syscall_allow_list:1;
+ Hashmap *syscall_log;
+ bool syscall_log_allow_list:1; /* Log listed system calls */
+
bool address_families_allow_list:1;
Set *address_families;
`$1.SystemCallFilter, config_parse_syscall_filter, 0, offsetof($1, exec_context)
$1.SystemCallArchitectures, config_parse_syscall_archs, 0, offsetof($1, exec_context.syscall_archs)
$1.SystemCallErrorNumber, config_parse_syscall_errno, 0, offsetof($1, exec_context)
+$1.SystemCallLog, config_parse_syscall_log, 0, offsetof($1, exec_context)
$1.MemoryDenyWriteExecute, config_parse_bool, 0, offsetof($1, exec_context.memory_deny_write_execute)
$1.RestrictNamespaces, config_parse_restrict_namespaces, 0, offsetof($1, exec_context)
$1.RestrictRealtime, config_parse_bool, 0, offsetof($1, exec_context.restrict_realtime)
`$1.SystemCallFilter, config_parse_warn_compat, DISABLED_CONFIGURATION, 0
$1.SystemCallArchitectures, config_parse_warn_compat, DISABLED_CONFIGURATION, 0
$1.SystemCallErrorNumber, config_parse_warn_compat, DISABLED_CONFIGURATION, 0
+$1.SystemCallLog, config_parse_warn_compat, DISABLED_CONFIGURATION, 0
$1.MemoryDenyWriteExecute, config_parse_warn_compat, DISABLED_CONFIGURATION, 0
$1.RestrictNamespaces, config_parse_warn_compat, DISABLED_CONFIGURATION, 0
$1.RestrictRealtime, config_parse_warn_compat, DISABLED_CONFIGURATION, 0
}
}
+int config_parse_syscall_log(
+ const char *unit,
+ const char *filename,
+ unsigned line,
+ const char *section,
+ unsigned section_line,
+ const char *lvalue,
+ int ltype,
+ const char *rvalue,
+ void *data,
+ void *userdata) {
+
+ ExecContext *c = data;
+ _unused_ const Unit *u = userdata;
+ bool invert = false;
+ const char *p;
+ int r;
+
+ assert(filename);
+ assert(lvalue);
+ assert(rvalue);
+ assert(u);
+
+ if (isempty(rvalue)) {
+ /* Empty assignment resets the list */
+ c->syscall_log = hashmap_free(c->syscall_log);
+ c->syscall_log_allow_list = false;
+ return 0;
+ }
+
+ if (rvalue[0] == '~') {
+ invert = true;
+ rvalue++;
+ }
+
+ if (!c->syscall_log) {
+ c->syscall_log = hashmap_new(NULL);
+ if (!c->syscall_log)
+ return log_oom();
+
+ if (invert)
+ /* Log everything but the ones listed */
+ c->syscall_log_allow_list = false;
+ else
+ /* Log nothing but the ones listed */
+ c->syscall_log_allow_list = true;
+ }
+
+ p = rvalue;
+ for (;;) {
+ _cleanup_free_ char *word = NULL, *name = NULL;
+ int num;
+
+ r = extract_first_word(&p, &word, NULL, 0);
+ if (r == 0)
+ return 0;
+ if (r == -ENOMEM)
+ return log_oom();
+ if (r < 0) {
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Invalid syntax, ignoring: %s", rvalue);
+ return 0;
+ }
+
+ r = parse_syscall_and_errno(word, &name, &num);
+ if (r < 0 || num >= 0) { /* errno code not allowed */
+ log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to parse syscall, ignoring: %s", word);
+ continue;
+ }
+
+ r = seccomp_parse_syscall_filter(
+ name, 0, c->syscall_log,
+ SECCOMP_PARSE_LOG|SECCOMP_PARSE_PERMISSIVE|
+ (invert ? SECCOMP_PARSE_INVERT : 0)|
+ (c->syscall_log_allow_list ? SECCOMP_PARSE_ALLOW_LIST : 0),
+ unit, filename, line);
+ if (r < 0)
+ return r;
+ }
+}
+
int config_parse_syscall_archs(
const char *unit,
const char *filename,
{ config_parse_syscall_filter, "SYSCALLS" },
{ config_parse_syscall_archs, "ARCHS" },
{ config_parse_syscall_errno, "ERRNO" },
+ { config_parse_syscall_log, "SYSCALLS" },
{ config_parse_address_families, "FAMILIES" },
{ config_parse_restrict_namespaces, "NAMESPACES" },
#endif
CONFIG_PARSER_PROTOTYPE(config_parse_syscall_filter);
CONFIG_PARSER_PROTOTYPE(config_parse_syscall_archs);
CONFIG_PARSER_PROTOTYPE(config_parse_syscall_errno);
+CONFIG_PARSER_PROTOTYPE(config_parse_syscall_log);
CONFIG_PARSER_PROTOTYPE(config_parse_environ);
CONFIG_PARSER_PROTOTYPE(config_parse_pass_environ);
CONFIG_PARSER_PROTOTYPE(config_parse_unset_environ);
}
if (STR_IN_SET(field, "RestrictAddressFamilies",
- "SystemCallFilter")) {
+ "SystemCallFilter",
+ "SystemCallLog")) {
int allow_list = 1;
const char *p = eq;
if (error == SECCOMP_ERROR_NUMBER_KILL)
a = scmp_act_kill_process();
+#ifdef SCMP_ACT_LOG
+ else if (action == SCMP_ACT_LOG)
+ a = SCMP_ACT_LOG;
+#endif
else if (action != SCMP_ACT_ALLOW && error >= 0)
a = SCMP_ACT_ERRNO(error);
return 1;
- } else if (STR_IN_SET(name, "SystemCallFilter", "RestrictAddressFamilies")) {
+ } else if (STR_IN_SET(name, "SystemCallFilter", "SystemCallLog", "RestrictAddressFamilies")) {
_cleanup_strv_free_ char **l = NULL;
int allow_list;