From: Mike Yuan Date: Thu, 25 Apr 2024 16:43:29 +0000 (+0800) Subject: core: introduce BindJournalSockets= X-Git-Tag: v257-rc1~562^2~4 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=368a3071e9124aae319bb44ba248fd4b08d6dd81;p=thirdparty%2Fsystemd.git core: introduce BindJournalSockets= Closes #32478 --- diff --git a/man/org.freedesktop.systemd1.xml b/man/org.freedesktop.systemd1.xml index a5c98d3458a..20bbae44871 100644 --- a/man/org.freedesktop.systemd1.xml +++ b/man/org.freedesktop.systemd1.xml @@ -3333,6 +3333,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2eservice { @org.freedesktop.DBus.Property.EmitsChangedSignal("const") readonly b MountAPIVFS = ...; @org.freedesktop.DBus.Property.EmitsChangedSignal("const") + readonly b BindJournalSockets = ...; + @org.freedesktop.DBus.Property.EmitsChangedSignal("const") readonly s KeyringMode = '...'; @org.freedesktop.DBus.Property.EmitsChangedSignal("const") readonly s ProtectProc = '...'; @@ -3932,6 +3934,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2eservice { + + @@ -4642,6 +4646,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2eservice { + + @@ -5468,6 +5474,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2esocket { @org.freedesktop.DBus.Property.EmitsChangedSignal("const") readonly b MountAPIVFS = ...; @org.freedesktop.DBus.Property.EmitsChangedSignal("const") + readonly b BindJournalSockets = ...; + @org.freedesktop.DBus.Property.EmitsChangedSignal("const") readonly s KeyringMode = '...'; @org.freedesktop.DBus.Property.EmitsChangedSignal("const") readonly s ProtectProc = '...'; @@ -6079,6 +6087,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2esocket { + + @@ -6763,6 +6773,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2esocket { + + @@ -7453,6 +7465,8 @@ node /org/freedesktop/systemd1/unit/home_2emount { @org.freedesktop.DBus.Property.EmitsChangedSignal("const") readonly b MountAPIVFS = ...; @org.freedesktop.DBus.Property.EmitsChangedSignal("const") + readonly b BindJournalSockets = ...; + @org.freedesktop.DBus.Property.EmitsChangedSignal("const") readonly s KeyringMode = '...'; @org.freedesktop.DBus.Property.EmitsChangedSignal("const") readonly s ProtectProc = '...'; @@ -7990,6 +8004,8 @@ node /org/freedesktop/systemd1/unit/home_2emount { + + @@ -8586,6 +8602,8 @@ node /org/freedesktop/systemd1/unit/home_2emount { + + @@ -9399,6 +9417,8 @@ node /org/freedesktop/systemd1/unit/dev_2dsda3_2eswap { @org.freedesktop.DBus.Property.EmitsChangedSignal("const") readonly b MountAPIVFS = ...; @org.freedesktop.DBus.Property.EmitsChangedSignal("const") + readonly b BindJournalSockets = ...; + @org.freedesktop.DBus.Property.EmitsChangedSignal("const") readonly s KeyringMode = '...'; @org.freedesktop.DBus.Property.EmitsChangedSignal("const") readonly s ProtectProc = '...'; @@ -9922,6 +9942,8 @@ node /org/freedesktop/systemd1/unit/dev_2dsda3_2eswap { + + @@ -10504,6 +10526,8 @@ node /org/freedesktop/systemd1/unit/dev_2dsda3_2eswap { + + @@ -12149,8 +12173,9 @@ $ gdbus introspect --system --dest org.freedesktop.systemd1 \ StatusBusError, StatusVarlinkError, LiveMountResult, - PrivateTmpEx, and - ImportCredentialEx were added in version 257. + PrivateTmpEx, + ImportCredentialEx, and + BindJournalSockets were added in version 257. Socket Unit Objects @@ -12187,8 +12212,9 @@ $ gdbus introspect --system --dest org.freedesktop.systemd1 \ EffectiveTasksMax, MemoryZSwapWriteback, and PassFileDescriptorsToExec were added in version 256. - PrivateTmpEx, and - ImportCredentialEx were added in version 257. + PrivateTmpEx, + ImportCredentialEx, and + BindJournalSockets were added in version 257. Mount Unit Objects @@ -12222,8 +12248,9 @@ $ gdbus introspect --system --dest org.freedesktop.systemd1 \ EffectiveMemoryMax, EffectiveTasksMax, and MemoryZSwapWriteback were added in version 256. - PrivateTmpEx, and - ImportCredentialEx were added in version 257. + PrivateTmpEx, + ImportCredentialEx, and + BindJournalSockets were added in version 257. Swap Unit Objects @@ -12257,8 +12284,9 @@ $ gdbus introspect --system --dest org.freedesktop.systemd1 \ EffectiveMemoryMax, EffectiveTasksMax, and MemoryZSwapWriteback were added in version 256. - PrivateTmpEx, and - ImportCredentialEx were added in version 257. + PrivateTmpEx, + ImportCredentialEx, and + BindJournalSockets were added in version 257. Slice Unit Objects diff --git a/man/systemd.exec.xml b/man/systemd.exec.xml index c9ae38bf1fa..03189615692 100644 --- a/man/systemd.exec.xml +++ b/man/systemd.exec.xml @@ -366,6 +366,23 @@ + + BindJournalSockets= + + Takes a boolean argument. If true, sockets from + systemd-journald.socket8 + will be bind mounted into the mount namespace. This is particularly useful when a different instance + of /run/ is employed, to make sure processes running in the namespace + can still make use of sd-journal3. + + + This option is implied when LogNamespace= is used, + when MountAPIVFS=yes, or when PrivateDevices=yes is used + in conjunction with either RootDirectory= or RootImage=. + + + + ProtectProc= diff --git a/src/core/dbus-execute.c b/src/core/dbus-execute.c index 2954f63422e..4679f54a3e6 100644 --- a/src/core/dbus-execute.c +++ b/src/core/dbus-execute.c @@ -55,6 +55,7 @@ static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_protect_system, protect_system, static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_personality, personality, unsigned long); static BUS_DEFINE_PROPERTY_GET(property_get_ioprio, "i", ExecContext, exec_context_get_effective_ioprio); static BUS_DEFINE_PROPERTY_GET(property_get_mount_apivfs, "b", ExecContext, exec_context_get_effective_mount_apivfs); +static BUS_DEFINE_PROPERTY_GET(property_get_bind_journal_sockets, "b", ExecContext, exec_context_get_effective_bind_journal_sockets); static BUS_DEFINE_PROPERTY_GET2(property_get_ioprio_class, "i", ExecContext, exec_context_get_effective_ioprio, ioprio_prio_class); static BUS_DEFINE_PROPERTY_GET2(property_get_ioprio_priority, "i", ExecContext, exec_context_get_effective_ioprio, ioprio_prio_data); static BUS_DEFINE_PROPERTY_GET_GLOBAL(property_get_empty_string, "s", NULL); @@ -1193,6 +1194,7 @@ const sd_bus_vtable bus_exec_vtable[] = { SD_BUS_PROPERTY("BindReadOnlyPaths", "a(ssbt)", property_get_bind_paths, 0, SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("TemporaryFileSystem", "a(ss)", property_get_temporary_filesystems, 0, SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("MountAPIVFS", "b", property_get_mount_apivfs, 0, SD_BUS_VTABLE_PROPERTY_CONST), + SD_BUS_PROPERTY("BindJournalSockets", "b", property_get_bind_journal_sockets, 0, SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("KeyringMode", "s", property_get_exec_keyring_mode, offsetof(ExecContext, keyring_mode), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("ProtectProc", "s", property_get_protect_proc, offsetof(ExecContext, protect_proc), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("ProcSubset", "s", property_get_proc_subset, offsetof(ExecContext, proc_subset), SD_BUS_VTABLE_PROPERTY_CONST), @@ -1864,6 +1866,9 @@ int bus_exec_context_set_transient_property( if (streq(name, "MountAPIVFS")) return bus_set_transient_tristate(u, name, &c->mount_apivfs, message, flags, error); + if (streq(name, "BindJournalSockets")) + return bus_set_transient_tristate(u, name, &c->bind_journal_sockets, message, flags, error); + if (streq(name, "PrivateNetwork")) return bus_set_transient_bool(u, name, &c->private_network, message, flags, error); diff --git a/src/core/exec-invoke.c b/src/core/exec-invoke.c index 183bca092db..af5552379ef 100644 --- a/src/core/exec-invoke.c +++ b/src/core/exec-invoke.c @@ -3240,6 +3240,7 @@ static int apply_mount_namespace( .private_tmp = needs_sandboxing ? context->private_tmp : false, .mount_apivfs = needs_sandboxing && exec_context_get_effective_mount_apivfs(context), + .bind_journal_sockets = needs_sandboxing && exec_context_get_effective_bind_journal_sockets(context), /* If NNP is on, we can turn on MS_NOSUID, since it won't have any effect anymore. */ .mount_nosuid = needs_sandboxing && context->no_new_privileges && !mac_selinux_use(), @@ -3861,6 +3862,7 @@ static bool exec_context_need_unprivileged_private_users( context->ipc_namespace_path || context->private_mounts > 0 || context->mount_apivfs > 0 || + context->bind_journal_sockets > 0 || context->n_bind_mounts > 0 || context->n_temporary_filesystems > 0 || context->root_directory || diff --git a/src/core/execute-serialize.c b/src/core/execute-serialize.c index 84628f91fb5..32d6118ab70 100644 --- a/src/core/execute-serialize.c +++ b/src/core/execute-serialize.c @@ -1854,6 +1854,10 @@ static int exec_context_serialize(const ExecContext *c, FILE *f) { if (r < 0) return r; + r = serialize_item_tristate(f, "exec-context-bind-journal-sockets", c->bind_journal_sockets); + if (r < 0) + return r; + r = serialize_item_tristate(f, "exec-context-memory-ksm", c->memory_ksm); if (r < 0) return r; @@ -2726,6 +2730,10 @@ static int exec_context_deserialize(ExecContext *c, FILE *f) { r = safe_atoi(val, &c->mount_apivfs); if (r < 0) return r; + } else if ((val = startswith(l, "exec-context-bind-journal-sockets="))) { + r = safe_atoi(val, &c->bind_journal_sockets); + if (r < 0) + return r; } else if ((val = startswith(l, "exec-context-memory-ksm="))) { r = safe_atoi(val, &c->memory_ksm); if (r < 0) diff --git a/src/core/execute.c b/src/core/execute.c index 5bbd1d835c0..7a365e23b91 100644 --- a/src/core/execute.c +++ b/src/core/execute.c @@ -284,7 +284,7 @@ bool exec_needs_mount_namespace( context->directories[EXEC_DIRECTORY_LOGS].n_items > 0)) return true; - if (context->log_namespace) + if (exec_context_get_effective_bind_journal_sockets(context)) return true; return false; @@ -539,6 +539,7 @@ void exec_context_init(ExecContext *c) { .tty_cols = UINT_MAX, .private_mounts = -1, .mount_apivfs = -1, + .bind_journal_sockets = -1, .memory_ksm = -1, .set_login_environment = -1, }; @@ -979,6 +980,7 @@ void exec_context_dump(const ExecContext *c, FILE* f, const char *prefix) { "%sProtectHome: %s\n" "%sProtectSystem: %s\n" "%sMountAPIVFS: %s\n" + "%sBindJournalSockets: %s\n" "%sIgnoreSIGPIPE: %s\n" "%sMemoryDenyWriteExecute: %s\n" "%sRestrictRealtime: %s\n" @@ -1004,6 +1006,7 @@ void exec_context_dump(const ExecContext *c, FILE* f, const char *prefix) { prefix, protect_home_to_string(c->protect_home), prefix, protect_system_to_string(c->protect_system), prefix, yes_no(exec_context_get_effective_mount_apivfs(c)), + prefix, yes_no(exec_context_get_effective_bind_journal_sockets(c)), prefix, yes_no(c->ignore_sigpipe), prefix, yes_no(c->memory_deny_write_execute), prefix, yes_no(c->restrict_realtime), @@ -1486,6 +1489,27 @@ bool exec_context_get_effective_mount_apivfs(const ExecContext *c) { return false; } +bool exec_context_get_effective_bind_journal_sockets(const ExecContext *c) { + assert(c); + + /* If log namespace is specified, "/run/systemd/journal.namespace/" would be bind mounted to + * "/run/systemd/journal/", which effectively means BindJournalSockets=yes */ + if (c->log_namespace) + return true; + + if (c->bind_journal_sockets >= 0) + return c->bind_journal_sockets > 0; + + if (exec_context_get_effective_mount_apivfs(c)) + return true; + + /* When PrivateDevices=yes, /dev/log gets symlinked to /run/systemd/journal/dev-log */ + if (exec_context_with_rootfs(c) && c->private_devices) + return true; + + return false; +} + void exec_context_free_log_extra_fields(ExecContext *c) { assert(c); diff --git a/src/core/execute.h b/src/core/execute.h index a3fc52bfd04..5754273999b 100644 --- a/src/core/execute.h +++ b/src/core/execute.h @@ -313,6 +313,7 @@ struct ExecContext { int private_mounts; int mount_apivfs; + int bind_journal_sockets; int memory_ksm; PrivateTmp private_tmp; bool private_network; @@ -519,6 +520,7 @@ bool exec_context_maintains_privileges(const ExecContext *c); int exec_context_get_effective_ioprio(const ExecContext *c); bool exec_context_get_effective_mount_apivfs(const ExecContext *c); +bool exec_context_get_effective_bind_journal_sockets(const ExecContext *c); void exec_context_free_log_extra_fields(ExecContext *c); diff --git a/src/core/load-fragment-gperf.gperf.in b/src/core/load-fragment-gperf.gperf.in index 7441d3c759b..32ac87ce85f 100644 --- a/src/core/load-fragment-gperf.gperf.in +++ b/src/core/load-fragment-gperf.gperf.in @@ -137,6 +137,7 @@ {{type}}.ProtectHome, config_parse_protect_home, 0, offsetof({{type}}, exec_context.protect_home) {{type}}.MountFlags, config_parse_exec_mount_propagation_flag, 0, offsetof({{type}}, exec_context.mount_propagation_flag) {{type}}.MountAPIVFS, config_parse_tristate, 0, offsetof({{type}}, exec_context.mount_apivfs) +{{type}}.BindJournalSockets, config_parse_tristate, 0, offsetof({{type}}, exec_context.bind_journal_sockets) {{type}}.Personality, config_parse_personality, 0, offsetof({{type}}, exec_context.personality) {{type}}.RuntimeDirectoryPreserve, config_parse_exec_preserve_mode, 0, offsetof({{type}}, exec_context.runtime_directory_preserve_mode) {{type}}.RuntimeDirectoryMode, config_parse_mode, 0, offsetof({{type}}, exec_context.directories[EXEC_DIRECTORY_RUNTIME].mode) diff --git a/src/core/namespace.c b/src/core/namespace.c index 314268e2eb4..553f3f4940a 100644 --- a/src/core/namespace.c +++ b/src/core/namespace.c @@ -93,7 +93,7 @@ typedef struct MountEntry { const char *path_const; /* Memory allocated on stack or static */ MountMode mode; bool ignore:1; /* Ignore if path does not exist? */ - bool has_prefix:1; /* Already is prefixed by the root dir? */ + bool has_prefix:1; /* Already prefixed by the root dir? */ bool read_only:1; /* Shall this mount point be read-only? */ bool nosuid:1; /* Shall set MS_NOSUID on the mount itself */ bool noexec:1; /* Shall set MS_NOEXEC on the mount itself */ @@ -120,6 +120,12 @@ typedef struct MountList { size_t n_mounts; } MountList; +static const BindMount bind_journal_sockets_table[] = { + { (char*) "/run/systemd/journal/socket", (char*) "/run/systemd/journal/socket", .read_only = true, .ignore_enoent = true }, + { (char*) "/run/systemd/journal/stdout", (char*) "/run/systemd/journal/stdout", .read_only = true, .ignore_enoent = true }, + { (char*) "/run/systemd/journal/dev-log", (char*) "/run/systemd/journal/dev-log", .read_only = true, .ignore_enoent = true }, +}; + /* If MountAPIVFS= is used, let's mount /sys, /proc, /dev and /run into the it, but only as a fallback if the user hasn't mounted * something there already. These mounts are hence overridden by any other explicitly configured mounts. */ static const MountEntry apivfs_table[] = { @@ -2585,6 +2591,11 @@ int setup_namespace(const NamespaceParameters *p, char **error_path) { .read_only = true, .source_malloc = TAKE_PTR(q), }; + + } else if (p->bind_journal_sockets) { + r = append_bind_mounts(&ml, bind_journal_sockets_table, ELEMENTSOF(bind_journal_sockets_table)); + if (r < 0) + return r; } /* Will be used to add bind mounts at runtime */ diff --git a/src/core/namespace.h b/src/core/namespace.h index bff99b9daa8..538d23753c2 100644 --- a/src/core/namespace.h +++ b/src/core/namespace.h @@ -152,6 +152,7 @@ struct NamespaceParameters { bool private_ipc; bool mount_apivfs; + bool bind_journal_sockets; bool mount_nosuid; ProtectHome protect_home; diff --git a/src/shared/bus-unit-util.c b/src/shared/bus-unit-util.c index 6ce76ded431..edfe1482c81 100644 --- a/src/shared/bus-unit-util.c +++ b/src/shared/bus-unit-util.c @@ -1076,6 +1076,7 @@ static int bus_append_execute_property(sd_bus_message *m, const char *field, con "ProtectClock", "ProtectControlGroups", "MountAPIVFS", + "BindJournalSockets", "CPUSchedulingResetOnFork", "LockPersonality", "ProtectHostname",