]> git.ipfire.org Git - thirdparty/systemd.git/commitdiff
core: introduce BindJournalSockets=
authorMike Yuan <me@yhndnzj.com>
Thu, 25 Apr 2024 16:43:29 +0000 (00:43 +0800)
committerMike Yuan <me@yhndnzj.com>
Tue, 3 Sep 2024 19:04:50 +0000 (21:04 +0200)
Closes #32478

man/org.freedesktop.systemd1.xml
man/systemd.exec.xml
src/core/dbus-execute.c
src/core/exec-invoke.c
src/core/execute-serialize.c
src/core/execute.c
src/core/execute.h
src/core/load-fragment-gperf.gperf.in
src/core/namespace.c
src/core/namespace.h
src/shared/bus-unit-util.c

index a5c98d3458a6d335d80a2f33a9b4b204ae804e37..20bbae4487185fba982143cc30647974ed15370a 100644 (file)
@@ -3333,6 +3333,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2eservice {
       @org.freedesktop.DBus.Property.EmitsChangedSignal("const")
       readonly b MountAPIVFS = ...;
       @org.freedesktop.DBus.Property.EmitsChangedSignal("const")
+      readonly b BindJournalSockets = ...;
+      @org.freedesktop.DBus.Property.EmitsChangedSignal("const")
       readonly s KeyringMode = '...';
       @org.freedesktop.DBus.Property.EmitsChangedSignal("const")
       readonly s ProtectProc = '...';
@@ -3932,6 +3934,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2eservice {
 
     <!--property MountAPIVFS is not documented!-->
 
+    <!--property BindJournalSockets is not documented!-->
+
     <!--property KeyringMode is not documented!-->
 
     <!--property ProtectProc is not documented!-->
@@ -4642,6 +4646,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2eservice {
 
     <variablelist class="dbus-property" generated="True" extra-ref="MountAPIVFS"/>
 
+    <variablelist class="dbus-property" generated="True" extra-ref="BindJournalSockets"/>
+
     <variablelist class="dbus-property" generated="True" extra-ref="KeyringMode"/>
 
     <variablelist class="dbus-property" generated="True" extra-ref="ProtectProc"/>
@@ -5468,6 +5474,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2esocket {
       @org.freedesktop.DBus.Property.EmitsChangedSignal("const")
       readonly b MountAPIVFS = ...;
       @org.freedesktop.DBus.Property.EmitsChangedSignal("const")
+      readonly b BindJournalSockets = ...;
+      @org.freedesktop.DBus.Property.EmitsChangedSignal("const")
       readonly s KeyringMode = '...';
       @org.freedesktop.DBus.Property.EmitsChangedSignal("const")
       readonly s ProtectProc = '...';
@@ -6079,6 +6087,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2esocket {
 
     <!--property MountAPIVFS is not documented!-->
 
+    <!--property BindJournalSockets is not documented!-->
+
     <!--property KeyringMode is not documented!-->
 
     <!--property ProtectProc is not documented!-->
@@ -6763,6 +6773,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2esocket {
 
     <variablelist class="dbus-property" generated="True" extra-ref="MountAPIVFS"/>
 
+    <variablelist class="dbus-property" generated="True" extra-ref="BindJournalSockets"/>
+
     <variablelist class="dbus-property" generated="True" extra-ref="KeyringMode"/>
 
     <variablelist class="dbus-property" generated="True" extra-ref="ProtectProc"/>
@@ -7453,6 +7465,8 @@ node /org/freedesktop/systemd1/unit/home_2emount {
       @org.freedesktop.DBus.Property.EmitsChangedSignal("const")
       readonly b MountAPIVFS = ...;
       @org.freedesktop.DBus.Property.EmitsChangedSignal("const")
+      readonly b BindJournalSockets = ...;
+      @org.freedesktop.DBus.Property.EmitsChangedSignal("const")
       readonly s KeyringMode = '...';
       @org.freedesktop.DBus.Property.EmitsChangedSignal("const")
       readonly s ProtectProc = '...';
@@ -7990,6 +8004,8 @@ node /org/freedesktop/systemd1/unit/home_2emount {
 
     <!--property MountAPIVFS is not documented!-->
 
+    <!--property BindJournalSockets is not documented!-->
+
     <!--property KeyringMode is not documented!-->
 
     <!--property ProtectProc is not documented!-->
@@ -8586,6 +8602,8 @@ node /org/freedesktop/systemd1/unit/home_2emount {
 
     <variablelist class="dbus-property" generated="True" extra-ref="MountAPIVFS"/>
 
+    <variablelist class="dbus-property" generated="True" extra-ref="BindJournalSockets"/>
+
     <variablelist class="dbus-property" generated="True" extra-ref="KeyringMode"/>
 
     <variablelist class="dbus-property" generated="True" extra-ref="ProtectProc"/>
@@ -9399,6 +9417,8 @@ node /org/freedesktop/systemd1/unit/dev_2dsda3_2eswap {
       @org.freedesktop.DBus.Property.EmitsChangedSignal("const")
       readonly b MountAPIVFS = ...;
       @org.freedesktop.DBus.Property.EmitsChangedSignal("const")
+      readonly b BindJournalSockets = ...;
+      @org.freedesktop.DBus.Property.EmitsChangedSignal("const")
       readonly s KeyringMode = '...';
       @org.freedesktop.DBus.Property.EmitsChangedSignal("const")
       readonly s ProtectProc = '...';
@@ -9922,6 +9942,8 @@ node /org/freedesktop/systemd1/unit/dev_2dsda3_2eswap {
 
     <!--property MountAPIVFS is not documented!-->
 
+    <!--property BindJournalSockets is not documented!-->
+
     <!--property KeyringMode is not documented!-->
 
     <!--property ProtectProc is not documented!-->
@@ -10504,6 +10526,8 @@ node /org/freedesktop/systemd1/unit/dev_2dsda3_2eswap {
 
     <variablelist class="dbus-property" generated="True" extra-ref="MountAPIVFS"/>
 
+    <variablelist class="dbus-property" generated="True" extra-ref="BindJournalSockets"/>
+
     <variablelist class="dbus-property" generated="True" extra-ref="KeyringMode"/>
 
     <variablelist class="dbus-property" generated="True" extra-ref="ProtectProc"/>
@@ -12149,8 +12173,9 @@ $ gdbus introspect --system --dest org.freedesktop.systemd1 \
       <para><varname>StatusBusError</varname>,
       <varname>StatusVarlinkError</varname>,
       <varname>LiveMountResult</varname>,
-      <varname>PrivateTmpEx</varname>, and
-      <varname>ImportCredentialEx</varname> were added in version 257.</para>
+      <varname>PrivateTmpEx</varname>,
+      <varname>ImportCredentialEx</varname>, and
+      <varname>BindJournalSockets</varname> were added in version 257.</para>
     </refsect2>
     <refsect2>
       <title>Socket Unit Objects</title>
@@ -12187,8 +12212,9 @@ $ gdbus introspect --system --dest org.freedesktop.systemd1 \
       <varname>EffectiveTasksMax</varname>,
       <varname>MemoryZSwapWriteback</varname>, and
       <varname>PassFileDescriptorsToExec</varname> were added in version 256.</para>
-      <para><varname>PrivateTmpEx</varname>, and
-      <varname>ImportCredentialEx</varname> were added in version 257.</para>
+      <para><varname>PrivateTmpEx</varname>,
+      <varname>ImportCredentialEx</varname>, and
+      <varname>BindJournalSockets</varname> were added in version 257.</para>
     </refsect2>
     <refsect2>
       <title>Mount Unit Objects</title>
@@ -12222,8 +12248,9 @@ $ gdbus introspect --system --dest org.freedesktop.systemd1 \
       <varname>EffectiveMemoryMax</varname>,
       <varname>EffectiveTasksMax</varname>, and
       <varname>MemoryZSwapWriteback</varname> were added in version 256.</para>
-      <para><varname>PrivateTmpEx</varname>, and
-      <varname>ImportCredentialEx</varname> were added in version 257.</para>
+      <para><varname>PrivateTmpEx</varname>,
+      <varname>ImportCredentialEx</varname>, and
+      <varname>BindJournalSockets</varname> were added in version 257.</para>
     </refsect2>
     <refsect2>
       <title>Swap Unit Objects</title>
@@ -12257,8 +12284,9 @@ $ gdbus introspect --system --dest org.freedesktop.systemd1 \
       <varname>EffectiveMemoryMax</varname>,
       <varname>EffectiveTasksMax</varname>, and
       <varname>MemoryZSwapWriteback</varname> were added in version 256.</para>
-      <para><varname>PrivateTmpEx</varname>, and
-      <varname>ImportCredentialEx</varname> were added in version 257.</para>
+      <para><varname>PrivateTmpEx</varname>,
+      <varname>ImportCredentialEx</varname>, and
+      <varname>BindJournalSockets</varname> were added in version 257.</para>
     </refsect2>
     <refsect2>
       <title>Slice Unit Objects</title>
index c9ae38bf1fa4cd1c62ddd2b7493269161b97f638..03189615692fb741330ea82740853f1afc240a0f 100644 (file)
         <xi:include href="version-info.xml" xpointer="v233"/></listitem>
       </varlistentry>
 
+      <varlistentry>
+        <term><varname>BindJournalSockets=</varname></term>
+
+        <listitem><para>Takes a boolean argument. If true, sockets from <citerefentry>
+        <refentrytitle>systemd-journald.socket</refentrytitle><manvolnum>8</manvolnum></citerefentry>
+        will be bind mounted into the mount namespace. This is particularly useful when a different instance
+        of <filename>/run/</filename> is employed, to make sure processes running in the namespace
+        can still make use of <citerefentry><refentrytitle>sd-journal</refentrytitle><manvolnum>3</manvolnum></citerefentry>.
+        </para>
+
+        <para>This option is implied when <varname>LogNamespace=</varname> is used,
+        when <varname>MountAPIVFS=yes</varname>, or when <varname>PrivateDevices=yes</varname> is used
+        in conjunction with either <varname>RootDirectory=</varname> or <varname>RootImage=</varname>.</para>
+
+        <xi:include href="version-info.xml" xpointer="v257"/></listitem>
+      </varlistentry>
+
       <varlistentry>
         <term><varname>ProtectProc=</varname></term>
 
index 2954f63422ef476f823cf791c466421e58a06bd9..4679f54a3e64a8b49c7da618be346b7b8aacfd05 100644 (file)
@@ -55,6 +55,7 @@ static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_protect_system, protect_system,
 static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_personality, personality, unsigned long);
 static BUS_DEFINE_PROPERTY_GET(property_get_ioprio, "i", ExecContext, exec_context_get_effective_ioprio);
 static BUS_DEFINE_PROPERTY_GET(property_get_mount_apivfs, "b", ExecContext, exec_context_get_effective_mount_apivfs);
+static BUS_DEFINE_PROPERTY_GET(property_get_bind_journal_sockets, "b", ExecContext, exec_context_get_effective_bind_journal_sockets);
 static BUS_DEFINE_PROPERTY_GET2(property_get_ioprio_class, "i", ExecContext, exec_context_get_effective_ioprio, ioprio_prio_class);
 static BUS_DEFINE_PROPERTY_GET2(property_get_ioprio_priority, "i", ExecContext, exec_context_get_effective_ioprio, ioprio_prio_data);
 static BUS_DEFINE_PROPERTY_GET_GLOBAL(property_get_empty_string, "s", NULL);
@@ -1193,6 +1194,7 @@ const sd_bus_vtable bus_exec_vtable[] = {
         SD_BUS_PROPERTY("BindReadOnlyPaths", "a(ssbt)", property_get_bind_paths, 0, SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("TemporaryFileSystem", "a(ss)", property_get_temporary_filesystems, 0, SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("MountAPIVFS", "b", property_get_mount_apivfs, 0, SD_BUS_VTABLE_PROPERTY_CONST),
+        SD_BUS_PROPERTY("BindJournalSockets", "b", property_get_bind_journal_sockets, 0, SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("KeyringMode", "s", property_get_exec_keyring_mode, offsetof(ExecContext, keyring_mode), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("ProtectProc", "s", property_get_protect_proc, offsetof(ExecContext, protect_proc), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("ProcSubset", "s", property_get_proc_subset, offsetof(ExecContext, proc_subset), SD_BUS_VTABLE_PROPERTY_CONST),
@@ -1864,6 +1866,9 @@ int bus_exec_context_set_transient_property(
         if (streq(name, "MountAPIVFS"))
                 return bus_set_transient_tristate(u, name, &c->mount_apivfs, message, flags, error);
 
+        if (streq(name, "BindJournalSockets"))
+                return bus_set_transient_tristate(u, name, &c->bind_journal_sockets, message, flags, error);
+
         if (streq(name, "PrivateNetwork"))
                 return bus_set_transient_bool(u, name, &c->private_network, message, flags, error);
 
index 183bca092db1d6f5c4a3ba50891c023d9ecc719f..af5552379efc13ca90f600345586ba6c12dc462c 100644 (file)
@@ -3240,6 +3240,7 @@ static int apply_mount_namespace(
                 .private_tmp = needs_sandboxing ? context->private_tmp : false,
 
                 .mount_apivfs = needs_sandboxing && exec_context_get_effective_mount_apivfs(context),
+                .bind_journal_sockets = needs_sandboxing && exec_context_get_effective_bind_journal_sockets(context),
 
                 /* If NNP is on, we can turn on MS_NOSUID, since it won't have any effect anymore. */
                 .mount_nosuid = needs_sandboxing && context->no_new_privileges && !mac_selinux_use(),
@@ -3861,6 +3862,7 @@ static bool exec_context_need_unprivileged_private_users(
                context->ipc_namespace_path ||
                context->private_mounts > 0 ||
                context->mount_apivfs > 0 ||
+               context->bind_journal_sockets > 0 ||
                context->n_bind_mounts > 0 ||
                context->n_temporary_filesystems > 0 ||
                context->root_directory ||
index 84628f91fb597b8f860c8b0570d6b0be3ec853f0..32d6118ab7036aa1d12a43d90741eb0964a648bd 100644 (file)
@@ -1854,6 +1854,10 @@ static int exec_context_serialize(const ExecContext *c, FILE *f) {
         if (r < 0)
                 return r;
 
+        r = serialize_item_tristate(f, "exec-context-bind-journal-sockets", c->bind_journal_sockets);
+        if (r < 0)
+                return r;
+
         r = serialize_item_tristate(f, "exec-context-memory-ksm", c->memory_ksm);
         if (r < 0)
                 return r;
@@ -2726,6 +2730,10 @@ static int exec_context_deserialize(ExecContext *c, FILE *f) {
                         r = safe_atoi(val, &c->mount_apivfs);
                         if (r < 0)
                                 return r;
+                } else if ((val = startswith(l, "exec-context-bind-journal-sockets="))) {
+                        r = safe_atoi(val, &c->bind_journal_sockets);
+                        if (r < 0)
+                                return r;
                 } else if ((val = startswith(l, "exec-context-memory-ksm="))) {
                         r = safe_atoi(val, &c->memory_ksm);
                         if (r < 0)
index 5bbd1d835c0fec8e03e289abc31e0fadc6ad8f19..7a365e23b91ea427b4d2310d442df2761e7e904c 100644 (file)
@@ -284,7 +284,7 @@ bool exec_needs_mount_namespace(
              context->directories[EXEC_DIRECTORY_LOGS].n_items > 0))
                 return true;
 
-        if (context->log_namespace)
+        if (exec_context_get_effective_bind_journal_sockets(context))
                 return true;
 
         return false;
@@ -539,6 +539,7 @@ void exec_context_init(ExecContext *c) {
                 .tty_cols = UINT_MAX,
                 .private_mounts = -1,
                 .mount_apivfs = -1,
+                .bind_journal_sockets = -1,
                 .memory_ksm = -1,
                 .set_login_environment = -1,
         };
@@ -979,6 +980,7 @@ void exec_context_dump(const ExecContext *c, FILE* f, const char *prefix) {
                 "%sProtectHome: %s\n"
                 "%sProtectSystem: %s\n"
                 "%sMountAPIVFS: %s\n"
+                "%sBindJournalSockets: %s\n"
                 "%sIgnoreSIGPIPE: %s\n"
                 "%sMemoryDenyWriteExecute: %s\n"
                 "%sRestrictRealtime: %s\n"
@@ -1004,6 +1006,7 @@ void exec_context_dump(const ExecContext *c, FILE* f, const char *prefix) {
                 prefix, protect_home_to_string(c->protect_home),
                 prefix, protect_system_to_string(c->protect_system),
                 prefix, yes_no(exec_context_get_effective_mount_apivfs(c)),
+                prefix, yes_no(exec_context_get_effective_bind_journal_sockets(c)),
                 prefix, yes_no(c->ignore_sigpipe),
                 prefix, yes_no(c->memory_deny_write_execute),
                 prefix, yes_no(c->restrict_realtime),
@@ -1486,6 +1489,27 @@ bool exec_context_get_effective_mount_apivfs(const ExecContext *c) {
         return false;
 }
 
+bool exec_context_get_effective_bind_journal_sockets(const ExecContext *c) {
+        assert(c);
+
+        /* If log namespace is specified, "/run/systemd/journal.namespace/" would be bind mounted to
+         * "/run/systemd/journal/", which effectively means BindJournalSockets=yes */
+        if (c->log_namespace)
+                return true;
+
+        if (c->bind_journal_sockets >= 0)
+                return c->bind_journal_sockets > 0;
+
+        if (exec_context_get_effective_mount_apivfs(c))
+                return true;
+
+        /* When PrivateDevices=yes, /dev/log gets symlinked to /run/systemd/journal/dev-log */
+        if (exec_context_with_rootfs(c) && c->private_devices)
+                return true;
+
+        return false;
+}
+
 void exec_context_free_log_extra_fields(ExecContext *c) {
         assert(c);
 
index a3fc52bfd0418edd45311c575d3c519279a8c48f..5754273999b319958899168fb9adc697cfb8afa4 100644 (file)
@@ -313,6 +313,7 @@ struct ExecContext {
 
         int private_mounts;
         int mount_apivfs;
+        int bind_journal_sockets;
         int memory_ksm;
         PrivateTmp private_tmp;
         bool private_network;
@@ -519,6 +520,7 @@ bool exec_context_maintains_privileges(const ExecContext *c);
 
 int exec_context_get_effective_ioprio(const ExecContext *c);
 bool exec_context_get_effective_mount_apivfs(const ExecContext *c);
+bool exec_context_get_effective_bind_journal_sockets(const ExecContext *c);
 
 void exec_context_free_log_extra_fields(ExecContext *c);
 
index 7441d3c759bad401b707cd0123f70d698b4e2e73..32ac87ce85f3006eec3657101a9097345108cdec 100644 (file)
 {{type}}.ProtectHome,                      config_parse_protect_home,                   0,                                  offsetof({{type}}, exec_context.protect_home)
 {{type}}.MountFlags,                       config_parse_exec_mount_propagation_flag,    0,                                  offsetof({{type}}, exec_context.mount_propagation_flag)
 {{type}}.MountAPIVFS,                      config_parse_tristate,                       0,                                  offsetof({{type}}, exec_context.mount_apivfs)
+{{type}}.BindJournalSockets,               config_parse_tristate,                       0,                                  offsetof({{type}}, exec_context.bind_journal_sockets)
 {{type}}.Personality,                      config_parse_personality,                    0,                                  offsetof({{type}}, exec_context.personality)
 {{type}}.RuntimeDirectoryPreserve,         config_parse_exec_preserve_mode,             0,                                  offsetof({{type}}, exec_context.runtime_directory_preserve_mode)
 {{type}}.RuntimeDirectoryMode,             config_parse_mode,                           0,                                  offsetof({{type}}, exec_context.directories[EXEC_DIRECTORY_RUNTIME].mode)
index 314268e2eb42d8050d5146ce63748ef515182ed1..553f3f4940a33a5419007effe83a7cc5263dd34f 100644 (file)
@@ -93,7 +93,7 @@ typedef struct MountEntry {
         const char *path_const;   /* Memory allocated on stack or static */
         MountMode mode;
         bool ignore:1;            /* Ignore if path does not exist? */
-        bool has_prefix:1;        /* Already is prefixed by the root dir? */
+        bool has_prefix:1;        /* Already prefixed by the root dir? */
         bool read_only:1;         /* Shall this mount point be read-only? */
         bool nosuid:1;            /* Shall set MS_NOSUID on the mount itself */
         bool noexec:1;            /* Shall set MS_NOEXEC on the mount itself */
@@ -120,6 +120,12 @@ typedef struct MountList {
         size_t n_mounts;
 } MountList;
 
+static const BindMount bind_journal_sockets_table[] = {
+        { (char*) "/run/systemd/journal/socket",  (char*) "/run/systemd/journal/socket",  .read_only = true, .ignore_enoent = true },
+        { (char*) "/run/systemd/journal/stdout",  (char*) "/run/systemd/journal/stdout",  .read_only = true, .ignore_enoent = true },
+        { (char*) "/run/systemd/journal/dev-log", (char*) "/run/systemd/journal/dev-log", .read_only = true, .ignore_enoent = true },
+};
+
 /* If MountAPIVFS= is used, let's mount /sys, /proc, /dev and /run into the it, but only as a fallback if the user hasn't mounted
  * something there already. These mounts are hence overridden by any other explicitly configured mounts. */
 static const MountEntry apivfs_table[] = {
@@ -2585,6 +2591,11 @@ int setup_namespace(const NamespaceParameters *p, char **error_path) {
                         .read_only = true,
                         .source_malloc = TAKE_PTR(q),
                 };
+
+        } else if (p->bind_journal_sockets) {
+                r = append_bind_mounts(&ml, bind_journal_sockets_table, ELEMENTSOF(bind_journal_sockets_table));
+                if (r < 0)
+                        return r;
         }
 
         /* Will be used to add bind mounts at runtime */
index bff99b9daa87e4d24a050f7173f96fab30b4b672..538d23753c292d85f797fedaaab29b79a84e570f 100644 (file)
@@ -152,6 +152,7 @@ struct NamespaceParameters {
         bool private_ipc;
 
         bool mount_apivfs;
+        bool bind_journal_sockets;
         bool mount_nosuid;
 
         ProtectHome protect_home;
index 6ce76ded4319337776e7474cd3125e3f4ddd6c5f..edfe1482c813b4843ad6d257ab684cfe2f1144ff 100644 (file)
@@ -1076,6 +1076,7 @@ static int bus_append_execute_property(sd_bus_message *m, const char *field, con
                               "ProtectClock",
                               "ProtectControlGroups",
                               "MountAPIVFS",
+                              "BindJournalSockets",
                               "CPUSchedulingResetOnFork",
                               "LockPersonality",
                               "ProtectHostname",