]> git.ipfire.org Git - thirdparty/systemd.git/blobdiff - src/core/service.c
core: reduce the number of stalled PIDs from the watched processes list when possible
[thirdparty/systemd.git] / src / core / service.c
index 76f1e160697f7d22f53c4554c9d4645f67368505..883ca7bbfebe52672bbd592eb77fc7e27236ac40 100644 (file)
@@ -143,8 +143,6 @@ static void service_unwatch_pid_file(Service *s) {
 }
 
 static int service_set_main_pid(Service *s, pid_t pid) {
-        pid_t ppid;
-
         assert(s);
 
         if (pid <= 1)
@@ -163,12 +161,10 @@ static int service_set_main_pid(Service *s, pid_t pid) {
 
         s->main_pid = pid;
         s->main_pid_known = true;
+        s->main_pid_alien = pid_is_my_child(pid) == 0;
 
-        if (get_process_ppid(pid, &ppid) >= 0 && ppid != getpid_cached()) {
+        if (s->main_pid_alien)
                 log_unit_warning(UNIT(s), "Supervising process "PID_FMT" which is not our child. We'll most likely not notice when it exits.", pid);
-                s->main_pid_alien = true;
-        } else
-                s->main_pid_alien = false;
 
         return 0;
 }
@@ -935,8 +931,8 @@ static int service_load_pid_file(Service *s, bool may_warn) {
         prio = may_warn ? LOG_INFO : LOG_DEBUG;
 
         fd = chase_symlinks(s->pid_file, NULL, CHASE_OPEN|CHASE_SAFE, NULL);
-        if (fd == -EPERM) {
-                log_unit_full(UNIT(s), LOG_DEBUG, fd, "Permission denied while opening PID file or potentially unsafe symlink chain, will now retry with relaxed checks: %s", s->pid_file);
+        if (fd == -ENOLINK) {
+                log_unit_full(UNIT(s), LOG_DEBUG, fd, "Potentially unsafe symlink chain, will now retry with relaxed checks: %s", s->pid_file);
 
                 questionable_pid_file = true;
 
@@ -994,7 +990,7 @@ static int service_load_pid_file(Service *s, bool may_warn) {
         if (r < 0)
                 return r;
 
-        r = unit_watch_pid(UNIT(s), pid);
+        r = unit_watch_pid(UNIT(s), pid, false);
         if (r < 0) /* FIXME: we need to do something here */
                 return log_unit_warning_errno(UNIT(s), r, "Failed to watch PID "PID_FMT" for service: %m", pid);
 
@@ -1024,7 +1020,7 @@ static void service_search_main_pid(Service *s) {
         if (service_set_main_pid(s, pid) < 0)
                 return;
 
-        r = unit_watch_pid(UNIT(s), pid);
+        r = unit_watch_pid(UNIT(s), pid, false);
         if (r < 0)
                 /* FIXME: we need to do something here */
                 log_unit_warning_errno(UNIT(s), r, "Failed to watch PID "PID_FMT" from: %m", pid);
@@ -1158,7 +1154,7 @@ static int service_coldplug(Unit *u) {
                     SERVICE_RUNNING, SERVICE_RELOAD,
                     SERVICE_STOP, SERVICE_STOP_WATCHDOG, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL, SERVICE_STOP_POST,
                     SERVICE_FINAL_SIGTERM, SERVICE_FINAL_SIGKILL))) {
-                r = unit_watch_pid(UNIT(s), s->main_pid);
+                r = unit_watch_pid(UNIT(s), s->main_pid, false);
                 if (r < 0)
                         return r;
         }
@@ -1170,7 +1166,7 @@ static int service_coldplug(Unit *u) {
                    SERVICE_RELOAD,
                    SERVICE_STOP, SERVICE_STOP_WATCHDOG, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL, SERVICE_STOP_POST,
                    SERVICE_FINAL_SIGTERM, SERVICE_FINAL_SIGKILL)) {
-                r = unit_watch_pid(UNIT(s), s->control_pid);
+                r = unit_watch_pid(UNIT(s), s->control_pid, false);
                 if (r < 0)
                         return r;
         }
@@ -1458,7 +1454,7 @@ static int service_spawn(
         if (r < 0)
                 return r;
 
-        our_env = new0(char*, 9);
+        our_env = new0(char*, 10);
         if (!our_env)
                 return -ENOMEM;
 
@@ -1474,6 +1470,10 @@ static int service_spawn(
                 if (asprintf(our_env + n_env++, "MANAGERPID="PID_FMT, getpid_cached()) < 0)
                         return -ENOMEM;
 
+        if (s->pid_file)
+                if (asprintf(our_env + n_env++, "PIDFILE=%s", s->pid_file) < 0)
+                        return -ENOMEM;
+
         if (s->socket_fd >= 0) {
                 union sockaddr_union sa;
                 socklen_t salen = sizeof(sa);
@@ -1535,9 +1535,6 @@ static int service_spawn(
         if (!final_env)
                 return -ENOMEM;
 
-        /* System services should get a new keyring by default. */
-        SET_FLAG(exec_params.flags, EXEC_NEW_KEYRING, MANAGER_IS_SYSTEM(UNIT(s)->manager));
-
         /* System D-Bus needs nss-systemd disabled, so that we don't deadlock */
         SET_FLAG(exec_params.flags, EXEC_NSS_BYPASS_BUS,
                  MANAGER_IS_SYSTEM(UNIT(s)->manager) && unit_has_name(UNIT(s), SPECIAL_DBUS_SERVICE));
@@ -1569,8 +1566,8 @@ static int service_spawn(
         s->exec_fd_event_source = TAKE_PTR(exec_fd_source);
         s->exec_fd_hot = false;
 
-        r = unit_watch_pid(UNIT(s), pid);
-        if (r < 0) /* FIXME: we need to do something here */
+        r = unit_watch_pid(UNIT(s), pid, true);
+        if (r < 0)
                 return r;
 
         *_pid = pid;
@@ -2009,6 +2006,26 @@ static void service_kill_control_process(Service *s) {
         }
 }
 
+static int service_adverse_to_leftover_processes(Service *s) {
+        assert(s);
+
+        /* KillMode=mixed and control group are used to indicate that all process should be killed off.
+         * SendSIGKILL is used for services that require a clean shutdown. These are typically database
+         * service where a SigKilled process would result in a lengthy recovery and who's shutdown or
+         * startup time is quite variable (so Timeout settings aren't of use).
+         *
+         * Here we take these two factors and refuse to start a service if there are existing processes
+         * within a control group. Databases, while generally having some protection against multiple
+         * instances running, lets not stress the rigor of these. Also ExecStartPre parts of the service
+         * aren't as rigoriously written to protect aganst against multiple use. */
+        if (unit_warn_leftover_processes(UNIT(s)) &&
+            IN_SET(s->kill_context.kill_mode, KILL_MIXED, KILL_CONTROL_GROUP) &&
+            !s->kill_context.send_sigkill) {
+               return log_unit_error_errno(UNIT(s), SYNTHETIC_ERRNO(EBUSY), "Will not start SendSIGKILL=no service of type KillMode=control-group or mixed while processes exist");
+        }
+        return 0;
+}
+
 static void service_enter_start(Service *s) {
         ExecCommand *c;
         usec_t timeout;
@@ -2020,7 +2037,9 @@ static void service_enter_start(Service *s) {
         service_unwatch_control_pid(s);
         service_unwatch_main_pid(s);
 
-        unit_warn_leftover_processes(UNIT(s));
+        r = service_adverse_to_leftover_processes(s);
+        if (r < 0)
+                goto fail;
 
         if (s->type == SERVICE_FORKING) {
                 s->control_command_id = SERVICE_EXEC_START;
@@ -2113,7 +2132,9 @@ static void service_enter_start_pre(Service *s) {
         s->control_command = s->exec_command[SERVICE_EXEC_START_PRE];
         if (s->control_command) {
 
-                unit_warn_leftover_processes(UNIT(s));
+                r = service_adverse_to_leftover_processes(s);
+                if (r < 0)
+                        goto fail;
 
                 s->control_command_id = SERVICE_EXEC_START_PRE;
 
@@ -2157,7 +2178,7 @@ static void service_enter_restart(Service *s) {
          * restarted. We use JOB_RESTART (instead of the more obvious
          * JOB_START) here so that those dependency jobs will be added
          * as well. */
-        r = manager_add_job(UNIT(s)->manager, JOB_RESTART, UNIT(s), JOB_FAIL, &error, NULL);
+        r = manager_add_job(UNIT(s)->manager, JOB_RESTART, UNIT(s), JOB_REPLACE, &error, NULL);
         if (r < 0)
                 goto fail;
 
@@ -2339,7 +2360,7 @@ static int service_start(Unit *u) {
         assert(IN_SET(s->state, SERVICE_DEAD, SERVICE_FAILED));
 
         /* Make sure we don't enter a busy loop of some kind. */
-        r = unit_start_limit_test(u);
+        r = unit_test_start_limit(u);
         if (r < 0) {
                 service_enter_dead(s, SERVICE_FAILURE_START_LIMIT_HIT, false);
                 return r;
@@ -3189,11 +3210,19 @@ static void service_sigchld_event(Unit *u, pid_t pid, int code, int status) {
         bool notify_dbus = true;
         Service *s = SERVICE(u);
         ServiceResult f;
+        ExitClean clean_mode;
 
         assert(s);
         assert(pid >= 0);
 
-        if (is_clean_exit(code, status, s->type == SERVICE_ONESHOT ? EXIT_CLEAN_COMMAND : EXIT_CLEAN_DAEMON, &s->success_status))
+        /* Oneshot services and non-SERVICE_EXEC_START commands should not be
+         * considered daemons as they are typically not long running. */
+        if (s->type == SERVICE_ONESHOT || (s->control_pid == pid && s->control_command_id != SERVICE_EXEC_START))
+                clean_mode = EXIT_CLEAN_COMMAND;
+        else
+                clean_mode = EXIT_CLEAN_DAEMON;
+
+        if (is_clean_exit(code, status, clean_mode, &s->success_status))
                 f = SERVICE_SUCCESS;
         else if (code == CLD_EXITED)
                 f = SERVICE_FAILURE_EXIT_CODE;
@@ -3676,7 +3705,7 @@ static void service_notify_message(
                         if (r > 0) {
                                 service_set_main_pid(s, new_main_pid);
 
-                                r = unit_watch_pid(UNIT(s), new_main_pid);
+                                r = unit_watch_pid(UNIT(s), new_main_pid, false);
                                 if (r < 0)
                                         log_unit_warning_errno(UNIT(s), r, "Failed to watch new main PID "PID_FMT" for service: %m", new_main_pid);
 
@@ -3894,7 +3923,7 @@ static void service_bus_name_owner_change(
                         log_unit_debug(u, "D-Bus name %s is now owned by process " PID_FMT, name, pid);
 
                         service_set_main_pid(s, pid);
-                        unit_watch_pid(UNIT(s), pid);
+                        unit_watch_pid(UNIT(s), pid, false);
                 }
         }
 }