core: reuse credential dir across start and start-post if populated,

author Mike Yuan <me@yhndnzj.com>

Sun, 4 Feb 2024 15:22:46 +0000 (23:22 +0800)

committer Mike Yuan <me@yhndnzj.com>

Tue, 6 Feb 2024 16:43:33 +0000 (00:43 +0800)
author Mike Yuan <me@yhndnzj.com>
Sun, 4 Feb 2024 15:22:46 +0000 (23:22 +0800)
committer Mike Yuan <me@yhndnzj.com>
Tue, 6 Feb 2024 16:43:33 +0000 (00:43 +0800)
diff --git a/src/core/exec-credential.c b/src/core/exec-credential.c

index 9b76c5a324996b361c60b25247344bc6267c3876..ad4398021649a8fbc01710ce06b6fa1361792fec 100644 (file)
--- a/src/core/exec-credential.c
+++ b/src/core/exec-credential.c
@@ -52,7 +52,7 @@ DEFINE_HASH_OPS_WITH_VALUE_DESTRUCTOR(
  bool exec_params_need_credentials(const ExecParameters *p) {
          assert(p);
  
-        return FLAGS_SET(p->flags, EXEC_SETUP_CREDENTIALS);
+        return p->flags & (EXEC_SETUP_CREDENTIALS|EXEC_SETUP_CREDENTIALS_FRESH);
  }
  
  bool exec_context_has_credentials(const ExecContext *c) {
@@ -787,10 +787,9 @@ static int setup_credentials_internal(
                  uid_t uid,
                  gid_t gid) {
  
+        bool final_mounted;
          int r, workspace_mounted; /* negative if we don't know yet whether we have/can mount something; true
                                     * if we mounted something; false if we definitely can't mount anything */
-        bool final_mounted;
-        const char *where;
  
          assert(context);
          assert(params);
@@ -798,6 +797,30 @@ static int setup_credentials_internal(
          assert(final);
          assert(workspace);
  
+        r = path_is_mount_point(final);
+        if (r < 0)
+                return r;
+        final_mounted = r > 0;
+
+        if (final_mounted) {
+                if (FLAGS_SET(params->flags, EXEC_SETUP_CREDENTIALS_FRESH)) {
+                        r = umount_verbose(LOG_DEBUG, final, MNT_DETACH|UMOUNT_NOFOLLOW);
+                        if (r < 0)
+                                return r;
+
+                        final_mounted = false;
+                } else {
+                        /* We can reuse the previous credential dir */
+                        r = dir_is_empty(final, /* ignore_hidden_or_backup = */ false);
+                        if (r < 0)
+                                return r;
+                        if (r == 0) {
+                                log_debug("Credential dir for unit '%s' already set up, skipping.", unit);
+                                return 0;
+                        }
+                }
+        }
+
          if (reuse_workspace) {
                  r = path_is_mount_point(workspace);
                  if (r < 0)
@@ -810,40 +833,19 @@ static int setup_credentials_internal(
          } else
                  workspace_mounted = -1; /* ditto */
  
-        r = path_is_mount_point(final);
-        if (r < 0)
-                return r;
-        if (r > 0) {
-                /* If the final place already has something mounted, we use that. If the workspace also has
-                 * something mounted we assume it's actually the same mount (but with MS_RDONLY
-                 * different). */
-                final_mounted = true;
-
-                if (workspace_mounted < 0) {
-                        /* If the final place is mounted, but the workspace isn't, then let's bind mount
-                         * the final version to the workspace, and make it writable, so that we can make
-                         * changes */
-
-                        r = mount_nofollow_verbose(LOG_DEBUG, final, workspace, NULL, MS_BIND|MS_REC, NULL);
-                        if (r < 0)
-                                return r;
-
-                        r = mount_nofollow_verbose(LOG_DEBUG, NULL, workspace, NULL, MS_BIND|MS_REMOUNT|credentials_fs_mount_flags(/* ro= */ false), NULL);
-                        if (r < 0)
-                                return r;
-
-                        workspace_mounted = true;
-                }
-        } else
-                final_mounted = false;
+        /* If both the final place and the workspace are mounted, we have no mounts to set up, based on
+         * the assumption that they're actually the same tmpfs (but the latter with MS_RDONLY different).
+         * If the workspace is not mounted, we just bind the final place over and make it writable. */
+        must_mount = must_mount || final_mounted;
  
          if (workspace_mounted < 0) {
-                /* Nothing is mounted on the workspace yet, let's try to mount something now */
-
-                r = mount_credentials_fs(workspace, CREDENTIALS_TOTAL_SIZE_MAX, /* ro= */ false);
-                if (r < 0) {
-                        /* If that didn't work, try to make a bind mount from the final to the workspace, so
-                         * that we can make it writable there. */
+                if (!final_mounted)
+                        /* Nothing is mounted on the workspace yet, let's try to mount a new tmpfs if
+                         * not using the final place. */
+                        r = mount_credentials_fs(workspace, CREDENTIALS_TOTAL_SIZE_MAX, /* ro= */ false);
+                if (final_mounted || r < 0) {
+                        /* If using final place or failed to mount new tmpfs, make a bind mount from
+                         * the final to the workspace, so that we can make it writable there. */
                          r = mount_nofollow_verbose(LOG_DEBUG, final, workspace, NULL, MS_BIND|MS_REC, NULL);
                          if (r < 0) {
                                  if (!ERRNO_IS_PRIVILEGE(r))
@@ -856,12 +858,19 @@ static int setup_credentials_internal(
                                          return r;
  
                                  /* If we lack privileges to bind mount stuff, then let's gracefully proceed
-                                 * for compat with container envs, and just use the final dir as is. */
+                                 * for compat with container envs, and just use the final dir as is.
+                                 * Final place must not be mounted in this case (refused by must_mount
+                                 * above) */
  
                                  workspace_mounted = false;
                          } else {
                                  /* Make the new bind mount writable (i.e. drop MS_RDONLY) */
-                                r = mount_nofollow_verbose(LOG_DEBUG, NULL, workspace, NULL, MS_BIND|MS_REMOUNT|credentials_fs_mount_flags(/* ro= */ false), NULL);
+                                r = mount_nofollow_verbose(LOG_DEBUG,
+                                                           NULL,
+                                                           workspace,
+                                                           NULL,
+                                                           MS_BIND|MS_REMOUNT|credentials_fs_mount_flags(/* ro= */ false),
+                                                           NULL);
                                  if (r < 0)
                                          return r;
  
@@ -871,34 +880,26 @@ static int setup_credentials_internal(
                          workspace_mounted = true;
          }
  
-        assert(!must_mount || workspace_mounted > 0);
-        where = workspace_mounted ? workspace : final;
+        assert(workspace_mounted >= 0);
+        assert(!must_mount || workspace_mounted);
+
+        const char *where = workspace_mounted ? workspace : final;
  
          (void) label_fix_full(AT_FDCWD, where, final, 0);
  
          r = acquire_credentials(context, params, unit, where, uid, gid, workspace_mounted);
-        if (r < 0)
-                return r;
-
-        if (workspace_mounted) {
-                bool install;
-
-                /* Determine if we should actually install the prepared mount in the final location by bind
-                 * mounting it there. We do so only if the mount is not established there already, and if the
-                 * mount is actually non-empty (i.e. carries at least one credential). Not that in the best
-                 * case we are doing all this in a mount namespace, thus no one else will see that we
-                 * allocated a file system we are getting rid of again here. */
+        if (r < 0) {
+                /* If we're using final place as workspace, and failed to acquire credentials, we might
+                 * have left half-written creds there. Let's get rid of the whole mount, so future
+                 * calls won't reuse it. */
                  if (final_mounted)
-                        install = false; /* already installed */
-                else {
-                        r = dir_is_empty(where, /* ignore_hidden_or_backup= */ false);
-                        if (r < 0)
-                                return r;
+                        (void) umount_verbose(LOG_DEBUG, final, MNT_DETACH|UMOUNT_NOFOLLOW);
  
-                        install = r == 0; /* install only if non-empty */
-                }
+                return r;
+        }
  
-                if (install) {
+        if (workspace_mounted) {
+                if (!final_mounted) {
                          /* Make workspace read-only now, so that any bind mount we make from it defaults to
                           * read-only too */
                          r = mount_nofollow_verbose(LOG_DEBUG, NULL, workspace, NULL, MS_BIND|MS_REMOUNT|credentials_fs_mount_flags(/* ro= */ true), NULL);
@@ -908,7 +909,7 @@ static int setup_credentials_internal(
                          /* And mount it to the final place, read-only */
                          r = mount_nofollow_verbose(LOG_DEBUG, workspace, final, NULL, MS_MOVE, NULL);
                  } else
-                        /* Otherwise get rid of it */
+                        /* Otherwise we just get rid of the bind mount of final place */
                          r = umount_verbose(LOG_DEBUG, workspace, MNT_DETACH|UMOUNT_NOFOLLOW);
                  if (r < 0)
                          return r;
diff --git a/src/core/execute.h b/src/core/execute.h

index 916e66adcdc3b345d4322264122bec4dab4ecec1..eea6b7518ae9f05c0c8814ac85b07365e653302c 100644 (file)
--- a/src/core/execute.h
+++ b/src/core/execute.h
@@ -390,22 +390,23 @@ static inline bool exec_context_with_rootfs(const ExecContext *c) {
  }
  
  typedef enum ExecFlags {
-        EXEC_APPLY_SANDBOXING      = 1 << 0,
-        EXEC_APPLY_CHROOT          = 1 << 1,
-        EXEC_APPLY_TTY_STDIN       = 1 << 2,
-        EXEC_PASS_LOG_UNIT         = 1 << 3, /* Whether to pass the unit name to the service's journal stream connection */
-        EXEC_CHOWN_DIRECTORIES     = 1 << 4, /* chown() the runtime/state/cache/log directories to the user we run as, under all conditions */
-        EXEC_NSS_DYNAMIC_BYPASS    = 1 << 5, /* Set the SYSTEMD_NSS_DYNAMIC_BYPASS environment variable, to disable nss-systemd blocking on PID 1, for use by dbus-daemon */
-        EXEC_CGROUP_DELEGATE       = 1 << 6,
-        EXEC_IS_CONTROL            = 1 << 7,
-        EXEC_CONTROL_CGROUP        = 1 << 8, /* Place the process not in the indicated cgroup but in a subcgroup '/.control', but only EXEC_CGROUP_DELEGATE and EXEC_IS_CONTROL is set, too */
-        EXEC_SETUP_CREDENTIALS     = 1 << 9, /* Set up the credential store logic */
+        EXEC_APPLY_SANDBOXING        = 1 << 0,
+        EXEC_APPLY_CHROOT            = 1 << 1,
+        EXEC_APPLY_TTY_STDIN         = 1 << 2,
+        EXEC_PASS_LOG_UNIT           = 1 << 3,  /* Whether to pass the unit name to the service's journal stream connection */
+        EXEC_CHOWN_DIRECTORIES       = 1 << 4,  /* chown() the runtime/state/cache/log directories to the user we run as, under all conditions */
+        EXEC_NSS_DYNAMIC_BYPASS      = 1 << 5,  /* Set the SYSTEMD_NSS_DYNAMIC_BYPASS environment variable, to disable nss-systemd blocking on PID 1, for use by dbus-daemon */
+        EXEC_CGROUP_DELEGATE         = 1 << 6,
+        EXEC_IS_CONTROL              = 1 << 7,
+        EXEC_CONTROL_CGROUP          = 1 << 8,  /* Place the process not in the indicated cgroup but in a subcgroup '/.control', but only EXEC_CGROUP_DELEGATE and EXEC_IS_CONTROL is set, too */
+        EXEC_SETUP_CREDENTIALS       = 1 << 9,  /* Set up the credential store logic */
+        EXEC_SETUP_CREDENTIALS_FRESH = 1 << 10, /* Set up a new credential store (disable reuse) */
  
          /* The following are not used by execute.c, but by consumers internally */
-        EXEC_PASS_FDS              = 1 << 10,
-        EXEC_SETENV_RESULT         = 1 << 11,
-        EXEC_SET_WATCHDOG          = 1 << 12,
-        EXEC_SETENV_MONITOR_RESULT = 1 << 13, /* Pass exit status to OnFailure= and OnSuccess= dependencies. */
+        EXEC_PASS_FDS                = 1 << 11,
+        EXEC_SETENV_RESULT           = 1 << 12,
+        EXEC_SET_WATCHDOG            = 1 << 13,
+        EXEC_SETENV_MONITOR_RESULT   = 1 << 14, /* Pass exit status to OnFailure= and OnSuccess= dependencies. */
  } ExecFlags;
  
  /* Parameters for a specific invocation of a command. This structure is put together right before a command is
diff --git a/src/core/service.c b/src/core/service.c

index 8553e28e1da87cea4b87c063baf514a156a72db5..2b760eb7aa36d1302e4b26d2b4948f95d22b77e9 100644 (file)
--- a/src/core/service.c
+++ b/src/core/service.c
@@ -1595,27 +1595,33 @@ static Service *service_get_triggering_service(Service *s) {
          return NULL;
  }
  
-static ExecFlags service_exec_flags(ServiceExecCommand command_id) {
+static ExecFlags service_exec_flags(ServiceExecCommand command_id, ExecFlags cred_flag) {
          /* All service main/control processes honor sandboxing and namespacing options (except those
          explicitly excluded in service_spawn()) */
          ExecFlags flags = EXEC_APPLY_SANDBOXING|EXEC_APPLY_CHROOT;
  
          assert(command_id >= 0);
          assert(command_id < _SERVICE_EXEC_COMMAND_MAX);
+        assert((cred_flag & ~(EXEC_SETUP_CREDENTIALS_FRESH|EXEC_SETUP_CREDENTIALS)) == 0);
+        assert((cred_flag != 0) == (command_id == SERVICE_EXEC_START));
  
          /* Control processes spawned before main process also get tty access */
          if (IN_SET(command_id, SERVICE_EXEC_CONDITION, SERVICE_EXEC_START_PRE, SERVICE_EXEC_START))
                  flags |= EXEC_APPLY_TTY_STDIN;
  
-        /* All start phases get access to credentials */
-        if (IN_SET(command_id, SERVICE_EXEC_START_PRE, SERVICE_EXEC_START, SERVICE_EXEC_START_POST))
+        /* All start phases get access to credentials. ExecStartPre= gets a new credential store upon
+         * every invocation, so that updating credential files through it works. When the first main process
+         * starts, passed creds become stable. Also see 'cred_flag'. */
+        if (command_id == SERVICE_EXEC_START_PRE)
+                flags |= EXEC_SETUP_CREDENTIALS_FRESH;
+        if (command_id == SERVICE_EXEC_START_POST)
                  flags |= EXEC_SETUP_CREDENTIALS;
  
          if (IN_SET(command_id, SERVICE_EXEC_START_PRE, SERVICE_EXEC_START))
                  flags |= EXEC_SETENV_MONITOR_RESULT;
  
          if (command_id == SERVICE_EXEC_START)
-                return flags|EXEC_PASS_FDS|EXEC_SET_WATCHDOG;
+                return flags|cred_flag|EXEC_PASS_FDS|EXEC_SET_WATCHDOG;
  
          flags |= EXEC_IS_CONTROL;
  
@@ -1632,13 +1638,12 @@ static ExecFlags service_exec_flags(ServiceExecCommand command_id) {
  static int service_spawn_internal(
                  const char *caller,
                  Service *s,
-                ServiceExecCommand command_id,
                  ExecCommand *c,
+                ExecFlags flags,
                  usec_t timeout,
                  PidRef *ret_pid) {
  
-        _cleanup_(exec_params_shallow_clear) ExecParameters exec_params =
-                        EXEC_PARAMETERS_INIT(service_exec_flags(command_id));
+        _cleanup_(exec_params_shallow_clear) ExecParameters exec_params = EXEC_PARAMETERS_INIT(flags);
          _cleanup_(sd_event_source_unrefp) sd_event_source *exec_fd_source = NULL;
          _cleanup_strv_free_ char **final_env = NULL, **our_env = NULL;
          _cleanup_(pidref_done) PidRef pidref = PIDREF_NULL;
@@ -2083,8 +2088,8 @@ static void service_enter_stop_post(Service *s, ServiceResult f) {
                  pidref_done(&s->control_pid);
  
                  r = service_spawn(s,
-                                  s->control_command_id,
                                    s->control_command,
+                                  service_exec_flags(s->control_command_id, /* cred_flag = */ 0),
                                    s->timeout_stop_usec,
                                    &s->control_pid);
                  if (r < 0) {
@@ -2206,8 +2211,8 @@ static void service_enter_stop(Service *s, ServiceResult f) {
                  pidref_done(&s->control_pid);
  
                  r = service_spawn(s,
-                                  s->control_command_id,
                                    s->control_command,
+                                  service_exec_flags(s->control_command_id, /* cred_flag = */ 0),
                                    s->timeout_stop_usec,
                                    &s->control_pid);
                  if (r < 0) {
@@ -2290,8 +2295,8 @@ static void service_enter_start_post(Service *s) {
                  pidref_done(&s->control_pid);
  
                  r = service_spawn(s,
-                                  s->control_command_id,
                                    s->control_command,
+                                  service_exec_flags(s->control_command_id, /* cred_flag = */ 0),
                                    s->timeout_start_usec,
                                    &s->control_pid);
                  if (r < 0) {
@@ -2400,8 +2405,8 @@ static void service_enter_start(Service *s) {
                  timeout = s->timeout_start_usec;
  
          r = service_spawn(s,
-                          SERVICE_EXEC_START,
                            c,
+                          service_exec_flags(SERVICE_EXEC_START, EXEC_SETUP_CREDENTIALS_FRESH),
                            timeout,
                            &pidref);
          if (r < 0) {
@@ -2460,8 +2465,8 @@ static void service_enter_start_pre(Service *s) {
                  s->control_command_id = SERVICE_EXEC_START_PRE;
  
                  r = service_spawn(s,
-                                  s->control_command_id,
                                    s->control_command,
+                                  service_exec_flags(s->control_command_id, /* cred_flag = */ 0),
                                    s->timeout_start_usec,
                                    &s->control_pid);
                  if (r < 0) {
@@ -2497,8 +2502,8 @@ static void service_enter_condition(Service *s) {
                  pidref_done(&s->control_pid);
  
                  r = service_spawn(s,
-                                  s->control_command_id,
                                    s->control_command,
+                                  service_exec_flags(s->control_command_id, /* cred_flag = */ 0),
                                    s->timeout_start_usec,
                                    &s->control_pid);
  
@@ -2608,8 +2613,8 @@ static void service_enter_reload(Service *s) {
                  pidref_done(&s->control_pid);
  
                  r = service_spawn(s,
-                                  s->control_command_id,
                                    s->control_command,
+                                  service_exec_flags(s->control_command_id, /* cred_flag = */ 0),
                                    s->timeout_start_usec,
                                    &s->control_pid);
                  if (r < 0) {
@@ -2664,8 +2669,8 @@ static void service_run_next_control(Service *s) {
          pidref_done(&s->control_pid);
  
          r = service_spawn(s,
-                          s->control_command_id,
                            s->control_command,
+                          service_exec_flags(s->control_command_id, /* cred_flag = */ 0),
                            timeout,
                            &s->control_pid);
          if (r < 0) {
@@ -2696,8 +2701,8 @@ static void service_run_next_main(Service *s) {
          service_unwatch_main_pid(s);
  
          r = service_spawn(s,
-                          SERVICE_EXEC_START,
                            s->main_command,
+                          service_exec_flags(SERVICE_EXEC_START, EXEC_SETUP_CREDENTIALS),
                            s->timeout_start_usec,
                            &pidref);
          if (r < 0) {
diff --git a/test/units/testsuite-54.sh b/test/units/testsuite-54.sh

index 914f9865454e46ea9435e115686175395468551a..8f20ac432868a95ea3cf600733bdc4314040e194 100755 (executable)
--- a/test/units/testsuite-54.sh
+++ b/test/units/testsuite-54.sh
@@ -3,6 +3,9 @@
  # shellcheck disable=SC2016
  set -eux
  
+# shellcheck source=test/units/util.sh
+. "$(dirname "$0")"/util.sh
+
  systemd-analyze log-level debug
  
  run_with_cred_compare() {
@@ -309,6 +312,28 @@ systemd-run -p DynamicUser=yes -p 'LoadCredential=os:/etc/os-release' \
              --service-type=oneshot --wait --pipe \
              true | cmp /etc/os-release
  
+# https://github.com/systemd/systemd/pull/24734#issuecomment-1925440546
+# Also ExecStartPre= should be able to update creds
+dd if=/dev/urandom of=/tmp/cred-huge bs=600K count=1
+chmod 777 /tmp/cred-huge
+systemd-run -p ProtectSystem=full \
+            -p 'LoadCredential=huge:/tmp/cred-huge' \
+            -p 'ExecStartPre=true' \
+            -p 'ExecStartPre=bash -c "echo fresh >/tmp/cred-huge"' \
+            --unit=test-54-huge-cred.service \
+            --wait --pipe \
+            systemd-creds cat huge | cmp - <(echo "fresh")
+rm /tmp/cred-huge
+
+echo stable >/tmp/cred-stable
+systemd-run -p 'LoadCredential=stable:/tmp/cred-stable' \
+            -p 'ExecStartPost=systemd-creds cat stable' \
+            --unit=test-54-stable.service \
+            --service-type=oneshot --wait --pipe \
+            bash -c "echo bogus >/tmp/cred-stable" | cmp - <(echo "stable")
+assert_eq "$(cat /tmp/cred-stable)" "bogus"
+rm /tmp/cred-stable
+
  if ! systemd-detect-virt -q -c ; then
      # Validate that the credential we inserted via the initrd logic arrived
      test "$(systemd-creds cat --system myinitrdcred)" = "guatemala"
author	Mike Yuan <me@yhndnzj.com>
	Sun, 4 Feb 2024 15:22:46 +0000 (23:22 +0800)
committer	Mike Yuan <me@yhndnzj.com>
	Tue, 6 Feb 2024 16:43:33 +0000 (00:43 +0800)
src/core/exec-credential.c		patch \| blob \| blame \| history
src/core/execute.h		patch \| blob \| blame \| history
src/core/service.c		patch \| blob \| blame \| history
test/units/testsuite-54.sh		patch \| blob \| blame \| history