]> git.ipfire.org Git - thirdparty/systemd.git/commitdiff
core: reduce the number of stalled PIDs from the watched processes list when possible
authorFranck Bui <fbui@suse.com>
Mon, 18 Mar 2019 19:59:36 +0000 (20:59 +0100)
committerFranck Bui <fbui@suse.com>
Wed, 20 Mar 2019 09:51:49 +0000 (10:51 +0100)
Some PIDs can remain in the watched list even though their processes have
exited since a long time. It can easily happen if the main process of a forking
service manages to spawn a child before the control process exits for example.

However when a pid is about to be mapped to a unit by calling unit_watch_pid(),
the caller usually knows if the pid should belong to this unit exclusively: if
we just forked() off a child, then we can be sure that its PID is otherwise
unused. In this case we take this opportunity to remove any stalled PIDs from
the watched process list.

If we learnt about a PID in any other form (for example via PID file, via
searching, MAINPID= and so on), then we can't assume anything.

src/core/cgroup.c
src/core/dbus-scope.c
src/core/manager.c
src/core/manager.h
src/core/mount.c
src/core/service.c
src/core/socket.c
src/core/swap.c
src/core/unit.c
src/core/unit.h
src/test/test-watch-pid.c

index 8db044befa1ded133b2d2335ba44ff3e6ddbd158..ad67ba043892db86d64cc9442ae5a3121f9bad1a 100644 (file)
@@ -2319,7 +2319,7 @@ static int unit_watch_pids_in_path(Unit *u, const char *path) {
                 pid_t pid;
 
                 while ((r = cg_read_pid(f, &pid)) > 0) {
-                        r = unit_watch_pid(u, pid);
+                        r = unit_watch_pid(u, pid, false);
                         if (r < 0 && ret >= 0)
                                 ret = r;
                 }
index bb807df2e928357a5f4cf3985d5473042fbad771..8eb915e5084b9ba3c816142ddd1b5fafa2c71875 100644 (file)
@@ -106,7 +106,7 @@ static int bus_scope_set_transient_property(
                                 return r;
 
                         if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
-                                r = unit_watch_pid(UNIT(s), pid);
+                                r = unit_watch_pid(UNIT(s), pid, false);
                                 if (r < 0 && r != -EEXIST)
                                         return r;
                         }
index 22d1fda2dcd0437476a7ec45afaeabd62c379d3f..33785d905f7357fab56b14f259327a4ad988b88b 100644 (file)
@@ -2129,6 +2129,16 @@ void manager_clear_jobs(Manager *m) {
                 job_finish_and_invalidate(j, JOB_CANCELED, false, false);
 }
 
+void manager_unwatch_pid(Manager *m, pid_t pid) {
+        assert(m);
+
+        /* First let's drop the unit keyed as "pid". */
+        (void) hashmap_remove(m->watch_pids, PID_TO_PTR(pid));
+
+        /* Then, let's also drop the array keyed by -pid. */
+        free(hashmap_remove(m->watch_pids, PID_TO_PTR(-pid)));
+}
+
 static int manager_dispatch_run_queue(sd_event_source *source, void *userdata) {
         Manager *m = userdata;
         Job *j;
index bce8020cfd4fa32db7dcdd94790cc2d4e96400fb..c43c587a79ddc295ebaedfc72434c6d28b39a719 100644 (file)
@@ -437,6 +437,8 @@ int manager_get_dump_string(Manager *m, char **ret);
 
 void manager_clear_jobs(Manager *m);
 
+void manager_unwatch_pid(Manager *m, pid_t pid);
+
 unsigned manager_dispatch_load_queue(Manager *m);
 
 int manager_default_environment(Manager *m);
index a991bf8794d418637b4eb887d5e38fa5501d8539..4fb4b0f81e389bdb34cb317dcc94ba69ecf4eabd 100644 (file)
@@ -704,7 +704,7 @@ static int mount_coldplug(Unit *u) {
             pid_is_unwaited(m->control_pid) &&
             MOUNT_STATE_WITH_PROCESS(new_state)) {
 
-                r = unit_watch_pid(UNIT(m), m->control_pid);
+                r = unit_watch_pid(UNIT(m), m->control_pid, false);
                 if (r < 0)
                         return r;
 
@@ -810,9 +810,8 @@ static int mount_spawn(Mount *m, ExecCommand *c, pid_t *_pid) {
         if (r < 0)
                 return r;
 
-        r = unit_watch_pid(UNIT(m), pid);
+        r = unit_watch_pid(UNIT(m), pid, true);
         if (r < 0)
-                /* FIXME: we need to do something here */
                 return r;
 
         *_pid = pid;
index 4682e86f8ee7aeab742b8947d0793788ca0a7091..883ca7bbfebe52672bbd592eb77fc7e27236ac40 100644 (file)
@@ -990,7 +990,7 @@ static int service_load_pid_file(Service *s, bool may_warn) {
         if (r < 0)
                 return r;
 
-        r = unit_watch_pid(UNIT(s), pid);
+        r = unit_watch_pid(UNIT(s), pid, false);
         if (r < 0) /* FIXME: we need to do something here */
                 return log_unit_warning_errno(UNIT(s), r, "Failed to watch PID "PID_FMT" for service: %m", pid);
 
@@ -1020,7 +1020,7 @@ static void service_search_main_pid(Service *s) {
         if (service_set_main_pid(s, pid) < 0)
                 return;
 
-        r = unit_watch_pid(UNIT(s), pid);
+        r = unit_watch_pid(UNIT(s), pid, false);
         if (r < 0)
                 /* FIXME: we need to do something here */
                 log_unit_warning_errno(UNIT(s), r, "Failed to watch PID "PID_FMT" from: %m", pid);
@@ -1154,7 +1154,7 @@ static int service_coldplug(Unit *u) {
                     SERVICE_RUNNING, SERVICE_RELOAD,
                     SERVICE_STOP, SERVICE_STOP_WATCHDOG, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL, SERVICE_STOP_POST,
                     SERVICE_FINAL_SIGTERM, SERVICE_FINAL_SIGKILL))) {
-                r = unit_watch_pid(UNIT(s), s->main_pid);
+                r = unit_watch_pid(UNIT(s), s->main_pid, false);
                 if (r < 0)
                         return r;
         }
@@ -1166,7 +1166,7 @@ static int service_coldplug(Unit *u) {
                    SERVICE_RELOAD,
                    SERVICE_STOP, SERVICE_STOP_WATCHDOG, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL, SERVICE_STOP_POST,
                    SERVICE_FINAL_SIGTERM, SERVICE_FINAL_SIGKILL)) {
-                r = unit_watch_pid(UNIT(s), s->control_pid);
+                r = unit_watch_pid(UNIT(s), s->control_pid, false);
                 if (r < 0)
                         return r;
         }
@@ -1566,8 +1566,8 @@ static int service_spawn(
         s->exec_fd_event_source = TAKE_PTR(exec_fd_source);
         s->exec_fd_hot = false;
 
-        r = unit_watch_pid(UNIT(s), pid);
-        if (r < 0) /* FIXME: we need to do something here */
+        r = unit_watch_pid(UNIT(s), pid, true);
+        if (r < 0)
                 return r;
 
         *_pid = pid;
@@ -3705,7 +3705,7 @@ static void service_notify_message(
                         if (r > 0) {
                                 service_set_main_pid(s, new_main_pid);
 
-                                r = unit_watch_pid(UNIT(s), new_main_pid);
+                                r = unit_watch_pid(UNIT(s), new_main_pid, false);
                                 if (r < 0)
                                         log_unit_warning_errno(UNIT(s), r, "Failed to watch new main PID "PID_FMT" for service: %m", new_main_pid);
 
@@ -3923,7 +3923,7 @@ static void service_bus_name_owner_change(
                         log_unit_debug(u, "D-Bus name %s is now owned by process " PID_FMT, name, pid);
 
                         service_set_main_pid(s, pid);
-                        unit_watch_pid(UNIT(s), pid);
+                        unit_watch_pid(UNIT(s), pid, false);
                 }
         }
 }
index fd550290bea5b72145b0754c7c37f20e3c017115..1396842b26926eb1444418ee8636596052ad0943 100644 (file)
@@ -1852,7 +1852,7 @@ static int socket_coldplug(Unit *u) {
                    SOCKET_FINAL_SIGTERM,
                    SOCKET_FINAL_SIGKILL)) {
 
-                r = unit_watch_pid(UNIT(s), s->control_pid);
+                r = unit_watch_pid(UNIT(s), s->control_pid, false);
                 if (r < 0)
                         return r;
 
@@ -1938,9 +1938,8 @@ static int socket_spawn(Socket *s, ExecCommand *c, pid_t *_pid) {
         if (r < 0)
                 return r;
 
-        r = unit_watch_pid(UNIT(s), pid);
+        r = unit_watch_pid(UNIT(s), pid, true);
         if (r < 0)
-                /* FIXME: we need to do something here */
                 return r;
 
         *_pid = pid;
@@ -2009,7 +2008,7 @@ static int socket_chown(Socket *s, pid_t *_pid) {
                 _exit(EXIT_SUCCESS);
         }
 
-        r = unit_watch_pid(UNIT(s), pid);
+        r = unit_watch_pid(UNIT(s), pid, true);
         if (r < 0)
                 goto fail;
 
index b836fdd3046764460b22c0cd2d96e652c2720deb..28acef237399b01b9780d364886732b83301bbe3 100644 (file)
@@ -550,7 +550,7 @@ static int swap_coldplug(Unit *u) {
             pid_is_unwaited(s->control_pid) &&
             SWAP_STATE_WITH_PROCESS(new_state)) {
 
-                r = unit_watch_pid(UNIT(s), s->control_pid);
+                r = unit_watch_pid(UNIT(s), s->control_pid, false);
                 if (r < 0)
                         return r;
 
@@ -657,9 +657,8 @@ static int swap_spawn(Swap *s, ExecCommand *c, pid_t *_pid) {
         if (r < 0)
                 goto fail;
 
-        r = unit_watch_pid(UNIT(s), pid);
+        r = unit_watch_pid(UNIT(s), pid, true);
         if (r < 0)
-                /* FIXME: we need to do something here */
                 goto fail;
 
         *_pid = pid;
index 445cf6695c8cdae0400272c32c60700344f8f1fc..5620fd90178734f30a2e60a978c529e6240d283a 100644 (file)
@@ -2525,7 +2525,7 @@ void unit_notify(Unit *u, UnitActiveState os, UnitActiveState ns, UnitNotifyFlag
         unit_add_to_gc_queue(u);
 }
 
-int unit_watch_pid(Unit *u, pid_t pid) {
+int unit_watch_pid(Unit *u, pid_t pid, bool exclusive) {
         int r;
 
         assert(u);
@@ -2533,6 +2533,12 @@ int unit_watch_pid(Unit *u, pid_t pid) {
 
         /* Watch a specific PID */
 
+        /* Caller might be sure that this PID belongs to this unit only. Let's take this
+         * opportunity to remove any stalled references to this PID as they can be created
+         * easily (when watching a process which is not our direct child). */
+        if (exclusive)
+                manager_unwatch_pid(u->manager, pid);
+
         r = set_ensure_allocated(&u->pids, NULL);
         if (r < 0)
                 return r;
index 35b936771c98bb4e95ccfa57e22d44f0d8242e77..28878b5f4f4e87d6ef21ff82e021770eb550757b 100644 (file)
@@ -678,7 +678,7 @@ typedef enum UnitNotifyFlags {
 
 void unit_notify(Unit *u, UnitActiveState os, UnitActiveState ns, UnitNotifyFlags flags);
 
-int unit_watch_pid(Unit *u, pid_t pid);
+int unit_watch_pid(Unit *u, pid_t pid, bool exclusive);
 void unit_unwatch_pid(Unit *u, pid_t pid);
 void unit_unwatch_all_pids(Unit *u);
 
index 2c6ca0a1a292f51ca52e1cb552a79d3967af786e..04d7e1fbd850faf82cd13da79fe6fa8491d66ac7 100644 (file)
@@ -42,25 +42,25 @@ int main(int argc, char *argv[]) {
         assert_se(hashmap_isempty(m->watch_pids));
         assert_se(manager_get_unit_by_pid(m, 4711) == NULL);
 
-        assert_se(unit_watch_pid(a, 4711) >= 0);
+        assert_se(unit_watch_pid(a, 4711, false) >= 0);
         assert_se(manager_get_unit_by_pid(m, 4711) == a);
 
-        assert_se(unit_watch_pid(a, 4711) >= 0);
+        assert_se(unit_watch_pid(a, 4711, false) >= 0);
         assert_se(manager_get_unit_by_pid(m, 4711) == a);
 
-        assert_se(unit_watch_pid(b, 4711) >= 0);
+        assert_se(unit_watch_pid(b, 4711, false) >= 0);
         u = manager_get_unit_by_pid(m, 4711);
         assert_se(u == a || u == b);
 
-        assert_se(unit_watch_pid(b, 4711) >= 0);
+        assert_se(unit_watch_pid(b, 4711, false) >= 0);
         u = manager_get_unit_by_pid(m, 4711);
         assert_se(u == a || u == b);
 
-        assert_se(unit_watch_pid(c, 4711) >= 0);
+        assert_se(unit_watch_pid(c, 4711, false) >= 0);
         u = manager_get_unit_by_pid(m, 4711);
         assert_se(u == a || u == b || u == c);
 
-        assert_se(unit_watch_pid(c, 4711) >= 0);
+        assert_se(unit_watch_pid(c, 4711, false) >= 0);
         u = manager_get_unit_by_pid(m, 4711);
         assert_se(u == a || u == b || u == c);