From: Lennart Poettering Date: Wed, 29 Mar 2023 20:07:22 +0000 (+0200) Subject: service: add ability to pin fd store X-Git-Tag: v254-rc1~736^2~6 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=b9c1883a9cd9b5126fe648f3e198143dc19a222d;p=thirdparty%2Fsystemd.git service: add ability to pin fd store Oftentimes it is useful to allow the per-service fd store to survive longer than for a restart. This is useful in various scenarios: 1. An fd to some security relevant object needs to be stashed somewhere, that should not be cleaned automatically, because the security enforcement would be dropped then. 2. A user namespace fd should be allocated on first invocation and be kept around until the user logs out (i.e. systemd --user ends), á la #16328 (This does not implement what #16318 asks for, but should solve the use-case discussed there.) 3. There's interest in allow a concept of "userspace reboots" where the kernel stays running, and userspace is swapped out (i.e. all services exit, and the rootfs transitioned into a new version of it) while keeping some select resources pinned, very similar to how we implement a switch root. Thus it is useful to allow services to exit, while leaving their fds around till the very end. This is exposed through a new FileDescriptorStorePreserve= setting that is closely modelled after RuntimeDirectoryPreserve= (in fact it reused the same internal type), since we want similar behaviour in the end, and quite often they probably want to be used together. --- diff --git a/man/org.freedesktop.systemd1.xml b/man/org.freedesktop.systemd1.xml index f39893f647a..e462c606362 100644 --- a/man/org.freedesktop.systemd1.xml +++ b/man/org.freedesktop.systemd1.xml @@ -2619,6 +2619,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2eservice { readonly u FileDescriptorStoreMax = ...; @org.freedesktop.DBus.Property.EmitsChangedSignal("false") readonly u NFileDescriptorStore = ...; + @org.freedesktop.DBus.Property.EmitsChangedSignal("false") + readonly s FileDescriptorStorePreserve = '...'; readonly s StatusText = '...'; readonly i StatusErrno = ...; readonly s Result = '...'; @@ -3244,6 +3246,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2eservice { + + @@ -3830,6 +3834,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2eservice { + + diff --git a/man/systemd.service.xml b/man/systemd.service.xml index 1b116b8372a..7de1350a593 100644 --- a/man/systemd.service.xml +++ b/man/systemd.service.xml @@ -1066,7 +1066,7 @@ FDSTORE=1 messages. This is useful for implementing services that can restart after an explicit request or a crash without losing state. Any open sockets and other file descriptors which should not be closed during the restart may be stored this way. Application state - can either be serialized to a file in /run/, or better, stored in a + can either be serialized to a file in RuntimeDirectory=, or stored in a memfd_create2 memory file descriptor. Defaults to 0, i.e. no file descriptors may be stored in the service manager. All file descriptors passed to the service manager from a specific service are passed back @@ -1075,7 +1075,8 @@ details about the precise protocol used and the order in which the file descriptors are passed). Any file descriptors passed to the service manager are automatically closed when POLLHUP or POLLERR is seen on them, or when the service is - fully stopped and no job is queued or being executed for it. If this option is used, + fully stopped and no job is queued or being executed for it (the latter can be tweaked with + FileDescriptorStorePreserve=, see below). If this option is used, NotifyAccess= (see above) should be set to open access to the notification socket provided by systemd. If NotifyAccess= is not set, it will be implicitly set to . @@ -1097,6 +1098,22 @@ details. + + FileDescriptorStorePreserve= + Takes one of no, yes, + restart and controls when to release the service's file descriptor store + (i.e. when to close the contained file descriptors, if any). If set to no the + file descriptor store is automatically released when the service is stopped; if + restart (the default) it is kept around as long as the unit is neither inactive + nor failed, or a job is queued for the service, or the service is expected to be restarted. If + yes the file descriptor store is kept around until the unit is removed from + memory (i.e. is not referenced anymore and inactive). The latter is useful to keep entries in the + file descriptor store pinned until the service manage exits. + + Use systemctl clean --what=fdstore … to release the file descriptor store + explicitly. + + USBFunctionDescriptors= Configure the location of a file containing diff --git a/src/basic/unit-def.c b/src/basic/unit-def.c index a0fab46a193..86b66e2be0c 100644 --- a/src/basic/unit-def.c +++ b/src/basic/unit-def.c @@ -201,6 +201,7 @@ static const char* const service_state_table[_SERVICE_STATE_MAX] = { [SERVICE_FAILED] = "failed", [SERVICE_DEAD_BEFORE_AUTO_RESTART] = "dead-before-auto-restart", [SERVICE_FAILED_BEFORE_AUTO_RESTART] = "failed-before-auto-restart", + [SERVICE_DEAD_RESOURCES_PINNED] = "dead-resources-pinned", [SERVICE_AUTO_RESTART] = "auto-restart", [SERVICE_CLEANING] = "cleaning", }; diff --git a/src/basic/unit-def.h b/src/basic/unit-def.h index 2fab42e9c72..169e1f719ec 100644 --- a/src/basic/unit-def.h +++ b/src/basic/unit-def.h @@ -146,6 +146,7 @@ typedef enum ServiceState { SERVICE_FAILED, SERVICE_DEAD_BEFORE_AUTO_RESTART, SERVICE_FAILED_BEFORE_AUTO_RESTART, + SERVICE_DEAD_RESOURCES_PINNED, /* Like SERVICE_DEAD, but with pinned resources */ SERVICE_AUTO_RESTART, SERVICE_CLEANING, _SERVICE_STATE_MAX, diff --git a/src/core/dbus-execute.c b/src/core/dbus-execute.c index d77842bdabd..ce20183a707 100644 --- a/src/core/dbus-execute.c +++ b/src/core/dbus-execute.c @@ -49,7 +49,7 @@ BUS_DEFINE_PROPERTY_GET_ENUM(bus_property_get_exec_output, exec_output, ExecOutput); static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_exec_input, exec_input, ExecInput); static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_exec_utmp_mode, exec_utmp_mode, ExecUtmpMode); -static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_exec_preserve_mode, exec_preserve_mode, ExecPreserveMode); +BUS_DEFINE_PROPERTY_GET_ENUM(bus_property_get_exec_preserve_mode, exec_preserve_mode, ExecPreserveMode); static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_exec_keyring_mode, exec_keyring_mode, ExecKeyringMode); static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_protect_proc, protect_proc, ProtectProc); static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_proc_subset, proc_subset, ProcSubset); @@ -1318,7 +1318,7 @@ const sd_bus_vtable bus_exec_vtable[] = { SD_BUS_PROPERTY("LockPersonality", "b", bus_property_get_bool, offsetof(ExecContext, lock_personality), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("RestrictAddressFamilies", "(bas)", property_get_address_families, 0, SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("RuntimeDirectorySymlink", "a(sst)", bus_property_get_exec_dir_symlink, offsetof(ExecContext, directories[EXEC_DIRECTORY_RUNTIME]), SD_BUS_VTABLE_PROPERTY_CONST), - SD_BUS_PROPERTY("RuntimeDirectoryPreserve", "s", property_get_exec_preserve_mode, offsetof(ExecContext, runtime_directory_preserve_mode), SD_BUS_VTABLE_PROPERTY_CONST), + SD_BUS_PROPERTY("RuntimeDirectoryPreserve", "s", bus_property_get_exec_preserve_mode, offsetof(ExecContext, runtime_directory_preserve_mode), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("RuntimeDirectoryMode", "u", bus_property_get_mode, offsetof(ExecContext, directories[EXEC_DIRECTORY_RUNTIME].mode), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("RuntimeDirectory", "as", bus_property_get_exec_dir, offsetof(ExecContext, directories[EXEC_DIRECTORY_RUNTIME]), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("StateDirectorySymlink", "a(sst)", bus_property_get_exec_dir_symlink, offsetof(ExecContext, directories[EXEC_DIRECTORY_STATE]), SD_BUS_VTABLE_PROPERTY_CONST), @@ -1695,7 +1695,7 @@ static BUS_DEFINE_SET_TRANSIENT_PARSE(protect_home, ProtectHome, protect_home_fr static BUS_DEFINE_SET_TRANSIENT_PARSE(keyring_mode, ExecKeyringMode, exec_keyring_mode_from_string); static BUS_DEFINE_SET_TRANSIENT_PARSE(protect_proc, ProtectProc, protect_proc_from_string); static BUS_DEFINE_SET_TRANSIENT_PARSE(proc_subset, ProcSubset, proc_subset_from_string); -static BUS_DEFINE_SET_TRANSIENT_PARSE(preserve_mode, ExecPreserveMode, exec_preserve_mode_from_string); +BUS_DEFINE_SET_TRANSIENT_PARSE(exec_preserve_mode, ExecPreserveMode, exec_preserve_mode_from_string); static BUS_DEFINE_SET_TRANSIENT_PARSE_PTR(personality, unsigned long, parse_personality); static BUS_DEFINE_SET_TRANSIENT_TO_STRING_ALLOC(secure_bits, "i", int32_t, int, "%" PRIi32, secure_bits_to_string_alloc_with_check); static BUS_DEFINE_SET_TRANSIENT_TO_STRING_ALLOC(capability, "t", uint64_t, uint64_t, "%" PRIu64, capability_set_to_string); @@ -2047,7 +2047,7 @@ int bus_exec_context_set_transient_property( return bus_set_transient_proc_subset(u, name, &c->proc_subset, message, flags, error); if (streq(name, "RuntimeDirectoryPreserve")) - return bus_set_transient_preserve_mode(u, name, &c->runtime_directory_preserve_mode, message, flags, error); + return bus_set_transient_exec_preserve_mode(u, name, &c->runtime_directory_preserve_mode, message, flags, error); if (streq(name, "UMask")) return bus_set_transient_mode_t(u, name, &c->umask, message, flags, error); diff --git a/src/core/dbus-execute.h b/src/core/dbus-execute.h index c53834140e5..5926bdb4b13 100644 --- a/src/core/dbus-execute.h +++ b/src/core/dbus-execute.h @@ -28,6 +28,8 @@ int bus_property_get_exec_output(sd_bus *bus, const char *path, const char *inte int bus_property_get_exec_command(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *reply, void *userdata, sd_bus_error *ret_error); int bus_property_get_exec_command_list(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *reply, void *userdata, sd_bus_error *ret_error); int bus_property_get_exec_ex_command_list(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *reply, void *userdata, sd_bus_error *ret_error); +int bus_property_get_exec_preserve_mode(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *reply, void *userdata, sd_bus_error *ret_error); int bus_exec_context_set_transient_property(Unit *u, ExecContext *c, const char *name, sd_bus_message *message, UnitWriteFlags flags, sd_bus_error *error); int bus_set_transient_exec_command(Unit *u, const char *name, ExecCommand **exec_command, sd_bus_message *message, UnitWriteFlags flags, sd_bus_error *error); +int bus_set_transient_exec_preserve_mode(Unit *u, const char *name, ExecPreserveMode *p, sd_bus_message *message, UnitWriteFlags flags, sd_bus_error *error); diff --git a/src/core/dbus-service.c b/src/core/dbus-service.c index a563624fce9..ecab3f479d9 100644 --- a/src/core/dbus-service.c +++ b/src/core/dbus-service.c @@ -349,6 +349,7 @@ const sd_bus_vtable bus_service_vtable[] = { SD_BUS_PROPERTY("BusName", "s", NULL, offsetof(Service, bus_name), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("FileDescriptorStoreMax", "u", bus_property_get_unsigned, offsetof(Service, n_fd_store_max), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("NFileDescriptorStore", "u", property_get_size_as_uint32, offsetof(Service, n_fd_store), 0), + SD_BUS_PROPERTY("FileDescriptorStorePreserve", "s", bus_property_get_exec_preserve_mode, offsetof(Service, fd_store_preserve_mode), 0), SD_BUS_PROPERTY("StatusText", "s", NULL, offsetof(Service, status_text), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE), SD_BUS_PROPERTY("StatusErrno", "i", bus_property_get_int, offsetof(Service, status_errno), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE), SD_BUS_PROPERTY("Result", "s", property_get_result, offsetof(Service, result), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE), @@ -599,6 +600,9 @@ static int bus_service_set_transient_property( if (streq(name, "FileDescriptorStoreMax")) return bus_set_transient_unsigned(u, name, &s->n_fd_store_max, message, flags, error); + if (streq(name, "FileDescriptorStorePreserve")) + return bus_set_transient_exec_preserve_mode(u, name, &s->fd_store_preserve_mode, message, flags, error); + if (streq(name, "NotifyAccess")) return bus_set_transient_notify_access(u, name, &s->notify_access, message, flags, error); diff --git a/src/core/load-fragment-gperf.gperf.in b/src/core/load-fragment-gperf.gperf.in index f35c7436550..ce0e2f0c5c2 100644 --- a/src/core/load-fragment-gperf.gperf.in +++ b/src/core/load-fragment-gperf.gperf.in @@ -136,7 +136,7 @@ {{type}}.MountFlags, config_parse_exec_mount_propagation_flag, 0, offsetof({{type}}, exec_context.mount_propagation_flag) {{type}}.MountAPIVFS, config_parse_exec_mount_apivfs, 0, offsetof({{type}}, exec_context) {{type}}.Personality, config_parse_personality, 0, offsetof({{type}}, exec_context.personality) -{{type}}.RuntimeDirectoryPreserve, config_parse_runtime_preserve_mode, 0, offsetof({{type}}, exec_context.runtime_directory_preserve_mode) +{{type}}.RuntimeDirectoryPreserve, config_parse_exec_preserve_mode, 0, offsetof({{type}}, exec_context.runtime_directory_preserve_mode) {{type}}.RuntimeDirectoryMode, config_parse_mode, 0, offsetof({{type}}, exec_context.directories[EXEC_DIRECTORY_RUNTIME].mode) {{type}}.RuntimeDirectory, config_parse_exec_directories, 0, offsetof({{type}}, exec_context.directories[EXEC_DIRECTORY_RUNTIME]) {{type}}.StateDirectoryMode, config_parse_mode, 0, offsetof({{type}}, exec_context.directories[EXEC_DIRECTORY_STATE].mode) @@ -433,6 +433,7 @@ Service.SysVStartPriority, config_parse_warn_compat, Service.NonBlocking, config_parse_bool, 0, offsetof(Service, exec_context.non_blocking) Service.BusName, config_parse_bus_name, 0, offsetof(Service, bus_name) Service.FileDescriptorStoreMax, config_parse_unsigned, 0, offsetof(Service, n_fd_store_max) +Service.FileDescriptorStorePreserve, config_parse_exec_preserve_mode, 0, offsetof(Service, fd_store_preserve_mode) Service.NotifyAccess, config_parse_notify_access, 0, offsetof(Service, notify_access) Service.Sockets, config_parse_service_sockets, 0, 0 Service.BusPolicy, config_parse_warn_compat, DISABLED_LEGACY, 0 diff --git a/src/core/load-fragment.c b/src/core/load-fragment.c index 686a72402d6..cf0096263af 100644 --- a/src/core/load-fragment.c +++ b/src/core/load-fragment.c @@ -138,7 +138,7 @@ DEFINE_CONFIG_PARSE_ENUM(config_parse_job_mode, job_mode, JobMode, "Failed to pa DEFINE_CONFIG_PARSE_ENUM(config_parse_notify_access, notify_access, NotifyAccess, "Failed to parse notify access specifier"); DEFINE_CONFIG_PARSE_ENUM(config_parse_protect_home, protect_home, ProtectHome, "Failed to parse protect home value"); DEFINE_CONFIG_PARSE_ENUM(config_parse_protect_system, protect_system, ProtectSystem, "Failed to parse protect system value"); -DEFINE_CONFIG_PARSE_ENUM(config_parse_runtime_preserve_mode, exec_preserve_mode, ExecPreserveMode, "Failed to parse runtime directory preserve mode"); +DEFINE_CONFIG_PARSE_ENUM(config_parse_exec_preserve_mode, exec_preserve_mode, ExecPreserveMode, "Failed to parse resource preserve mode"); DEFINE_CONFIG_PARSE_ENUM(config_parse_service_type, service_type, ServiceType, "Failed to parse service type"); DEFINE_CONFIG_PARSE_ENUM(config_parse_service_exit_type, service_exit_type, ServiceExitType, "Failed to parse service exit type"); DEFINE_CONFIG_PARSE_ENUM(config_parse_service_restart, service_restart, ServiceRestart, "Failed to parse service restart specifier"); diff --git a/src/core/load-fragment.h b/src/core/load-fragment.h index ab682ee23e7..98adf5ae056 100644 --- a/src/core/load-fragment.h +++ b/src/core/load-fragment.h @@ -101,7 +101,7 @@ CONFIG_PARSER_PROTOTYPE(config_parse_exec_selinux_context); CONFIG_PARSER_PROTOTYPE(config_parse_exec_apparmor_profile); CONFIG_PARSER_PROTOTYPE(config_parse_exec_smack_process_label); CONFIG_PARSER_PROTOTYPE(config_parse_address_families); -CONFIG_PARSER_PROTOTYPE(config_parse_runtime_preserve_mode); +CONFIG_PARSER_PROTOTYPE(config_parse_exec_preserve_mode); CONFIG_PARSER_PROTOTYPE(config_parse_exec_directories); CONFIG_PARSER_PROTOTYPE(config_parse_set_credential); CONFIG_PARSER_PROTOTYPE(config_parse_load_credential); diff --git a/src/core/service.c b/src/core/service.c index 934500abd65..7130cbf4e66 100644 --- a/src/core/service.c +++ b/src/core/service.c @@ -69,6 +69,7 @@ static const UnitActiveState state_translation_table[_SERVICE_STATE_MAX] = { [SERVICE_FAILED] = UNIT_FAILED, [SERVICE_DEAD_BEFORE_AUTO_RESTART] = UNIT_INACTIVE, [SERVICE_FAILED_BEFORE_AUTO_RESTART] = UNIT_FAILED, + [SERVICE_DEAD_RESOURCES_PINNED] = UNIT_INACTIVE, [SERVICE_AUTO_RESTART] = UNIT_ACTIVATING, [SERVICE_CLEANING] = UNIT_MAINTENANCE, }; @@ -97,6 +98,7 @@ static const UnitActiveState state_translation_table_idle[_SERVICE_STATE_MAX] = [SERVICE_FAILED] = UNIT_FAILED, [SERVICE_DEAD_BEFORE_AUTO_RESTART] = UNIT_INACTIVE, [SERVICE_FAILED_BEFORE_AUTO_RESTART] = UNIT_FAILED, + [SERVICE_DEAD_RESOURCES_PINNED] = UNIT_INACTIVE, [SERVICE_AUTO_RESTART] = UNIT_ACTIVATING, [SERVICE_CLEANING] = UNIT_MAINTENANCE, }; @@ -139,6 +141,8 @@ static void service_init(Unit *u) { s->oom_policy = _OOM_POLICY_INVALID; s->reload_begin_usec = USEC_INFINITY; s->reload_signal = SIGHUP; + + s->fd_store_preserve_mode = EXEC_PRESERVE_RESTART; } static void service_unwatch_control_pid(Service *s) { @@ -1031,8 +1035,10 @@ static void service_dump(Unit *u, FILE *f, const char *prefix) { if (s->n_fd_store_max > 0) fprintf(f, "%sFile Descriptor Store Max: %u\n" + "%sFile Descriptor Store Pin: %s\n" "%sFile Descriptor Store Current: %zu\n", prefix, s->n_fd_store_max, + prefix, exec_preserve_mode_to_string(s->fd_store_preserve_mode), prefix, s->n_fd_store); service_dump_fdstore(s, f, prefix); @@ -1244,7 +1250,8 @@ static void service_set_state(Service *s, ServiceState state) { if (IN_SET(state, SERVICE_DEAD, SERVICE_FAILED, - SERVICE_DEAD_BEFORE_AUTO_RESTART, SERVICE_FAILED_BEFORE_AUTO_RESTART, SERVICE_AUTO_RESTART)) { + SERVICE_DEAD_BEFORE_AUTO_RESTART, SERVICE_FAILED_BEFORE_AUTO_RESTART, SERVICE_AUTO_RESTART, + SERVICE_DEAD_RESOURCES_PINNED)) { unit_unwatch_all_pids(UNIT(s)); unit_dequeue_rewatch_pids(UNIT(s)); } @@ -1351,7 +1358,8 @@ static int service_coldplug(Unit *u) { if (!IN_SET(s->deserialized_state, SERVICE_DEAD, SERVICE_FAILED, SERVICE_DEAD_BEFORE_AUTO_RESTART, SERVICE_FAILED_BEFORE_AUTO_RESTART, SERVICE_AUTO_RESTART, - SERVICE_CLEANING)) { + SERVICE_CLEANING, + SERVICE_DEAD_RESOURCES_PINNED)) { (void) unit_enqueue_rewatch_pids(u); (void) unit_setup_exec_runtime(u); } @@ -1939,6 +1947,12 @@ static bool service_will_restart(Unit *u) { return unit_will_restart_default(u); } +static ServiceState service_determine_dead_state(Service *s) { + assert(s); + + return s->fd_store && s->fd_store_preserve_mode == EXEC_PRESERVE_YES ? SERVICE_DEAD_RESOURCES_PINNED : SERVICE_DEAD; +} + static void service_enter_dead(Service *s, ServiceResult f, bool allow_restart) { ServiceState end_state, restart_state; int r; @@ -1955,11 +1969,11 @@ static void service_enter_dead(Service *s, ServiceResult f, bool allow_restart) if (s->result == SERVICE_SUCCESS) { unit_log_success(UNIT(s)); - end_state = SERVICE_DEAD; + end_state = service_determine_dead_state(s); restart_state = SERVICE_DEAD_BEFORE_AUTO_RESTART; } else if (s->result == SERVICE_SKIP_CONDITION) { unit_log_skip(UNIT(s), service_result_to_string(s->result)); - end_state = SERVICE_DEAD; + end_state = service_determine_dead_state(s); restart_state = SERVICE_DEAD_BEFORE_AUTO_RESTART; } else { unit_log_failure(UNIT(s), service_result_to_string(s->result)); @@ -2023,6 +2037,10 @@ static void service_enter_dead(Service *s, ServiceResult f, bool allow_restart) /* Also, remove the runtime directory */ unit_destroy_runtime_data(UNIT(s), &s->exec_context); + /* Also get rid of the fd store, if that's configured. */ + if (s->fd_store_preserve_mode == EXEC_PRESERVE_NO) + service_release_fd_store(s); + /* Get rid of the IPC bits of the user */ unit_unref_uid_gid(UNIT(s), true); @@ -2701,7 +2719,7 @@ static int service_start(Unit *u) { if (IN_SET(s->state, SERVICE_AUTO_RESTART, SERVICE_DEAD_BEFORE_AUTO_RESTART, SERVICE_FAILED_BEFORE_AUTO_RESTART)) return -EAGAIN; - assert(IN_SET(s->state, SERVICE_DEAD, SERVICE_FAILED)); + assert(IN_SET(s->state, SERVICE_DEAD, SERVICE_FAILED, SERVICE_DEAD_RESOURCES_PINNED)); r = unit_acquire_invocation_id(u); if (r < 0) @@ -2760,7 +2778,7 @@ static int service_stop(Unit *u) { case SERVICE_AUTO_RESTART: /* A restart will be scheduled or is in progress. */ - service_set_state(s, SERVICE_DEAD); + service_set_state(s, service_determine_dead_state(s)); return 0; case SERVICE_CONDITION: @@ -2789,6 +2807,7 @@ static int service_stop(Unit *u) { case SERVICE_FAILED_BEFORE_AUTO_RESTART: case SERVICE_DEAD: case SERVICE_FAILED: + case SERVICE_DEAD_RESOURCES_PINNED: default: /* Unknown state, or unit_stop() should already have handled these */ assert_not_reached(); @@ -3397,7 +3416,7 @@ static bool service_may_gc(Unit *u) { /* Only allow collection of actually dead services, i.e. not those that are in the transitionary * SERVICE_DEAD_BEFORE_AUTO_RESTART/SERVICE_FAILED_BEFORE_AUTO_RESTART states. */ - if (!IN_SET(s->state, SERVICE_DEAD, SERVICE_FAILED)) + if (!IN_SET(s->state, SERVICE_DEAD, SERVICE_FAILED, SERVICE_DEAD_RESOURCES_PINNED)) return false; return true; @@ -3624,6 +3643,7 @@ static void service_notify_cgroup_empty_event(Unit *u) { case SERVICE_DEAD_BEFORE_AUTO_RESTART: case SERVICE_FAILED_BEFORE_AUTO_RESTART: case SERVICE_AUTO_RESTART: + case SERVICE_DEAD_RESOURCES_PINNED: unit_prune_cgroup(u); break; @@ -4712,7 +4732,7 @@ int service_set_socket_fd( assert(!s->socket_peer); - if (s->state != SERVICE_DEAD) + if (!IN_SET(s->state, SERVICE_DEAD, SERVICE_DEAD_RESOURCES_PINNED)) return -EAGAIN; if (getpeername_pretty(fd, true, &peer_text) >= 0) { @@ -4749,7 +4769,7 @@ static void service_reset_failed(Unit *u) { assert(s); if (s->state == SERVICE_FAILED) - service_set_state(s, SERVICE_DEAD); + service_set_state(s, service_determine_dead_state(s)); s->result = SERVICE_SUCCESS; s->reload_result = SERVICE_SUCCESS; @@ -4920,14 +4940,19 @@ static void service_release_resources(Unit *u) { /* Don't release resources if this is a transitionary failed/dead state * (i.e. SERVICE_DEAD_BEFORE_AUTO_RESTART/SERVICE_FAILED_BEFORE_AUTO_RESTART), insist on a permanent * failure state. */ - if (!IN_SET(s->state, SERVICE_DEAD, SERVICE_FAILED)) + if (!IN_SET(s->state, SERVICE_DEAD, SERVICE_FAILED, SERVICE_DEAD_RESOURCES_PINNED)) return; log_unit_debug(u, "Releasing resources..."); service_close_socket_fd(s); service_release_stdio_fd(s); - service_release_fd_store(s); + + if (s->fd_store_preserve_mode != EXEC_PRESERVE_YES) + service_release_fd_store(s); + + if (s->state == SERVICE_DEAD_RESOURCES_PINNED && !s->fd_store) + service_set_state(s, SERVICE_DEAD); } static const char* const service_restart_table[_SERVICE_RESTART_MAX] = { diff --git a/src/core/service.h b/src/core/service.h index 03edb18e31e..1d1aa91fc6d 100644 --- a/src/core/service.h +++ b/src/core/service.h @@ -206,6 +206,7 @@ struct Service { ServiceFDStore *fd_store; size_t n_fd_store; unsigned n_fd_store_max; + ExecPreserveMode fd_store_preserve_mode; char *usb_function_descriptors; char *usb_function_strings; diff --git a/src/shared/bus-unit-util.c b/src/shared/bus-unit-util.c index 1c991ae54f2..ebbd1f7f28e 100644 --- a/src/shared/bus-unit-util.c +++ b/src/shared/bus-unit-util.c @@ -2198,7 +2198,8 @@ static int bus_append_service_property(sd_bus_message *m, const char *field, con "USBFunctionStrings", "OOMPolicy", "TimeoutStartFailureMode", - "TimeoutStopFailureMode")) + "TimeoutStopFailureMode", + "FileDescriptorStorePreserve")) return bus_append_string(m, field, eq); if (STR_IN_SET(field, "PermissionsStartOnly",