Fix orphaned processes when startup process fails during PM_STARTUP

author Michael Paquier <michael@paquier.xyz>

Tue, 21 Apr 2026 00:39:59 +0000 (09:39 +0900)

committer Michael Paquier <michael@paquier.xyz>

Tue, 21 Apr 2026 00:39:59 +0000 (09:39 +0900)
author Michael Paquier <michael@paquier.xyz>
Tue, 21 Apr 2026 00:39:59 +0000 (09:39 +0900)
committer Michael Paquier <michael@paquier.xyz>
Tue, 21 Apr 2026 00:39:59 +0000 (09:39 +0900)
diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c

index b6fd332f1964bfa2a1829a4db60963164e5c5e57..90c7c4528e87258ecac4313f8100f6f89c9fc771 100644 (file)
--- a/src/backend/postmaster/postmaster.c
+++ b/src/backend/postmaster/postmaster.c
@@ -304,12 +304,13 @@ static bool FatalError = false; /* T if recovering from backend crash */
   *
   * When the startup process is ready to start archive recovery, it signals the
   * postmaster, and we switch to PM_RECOVERY state. The background writer and
- * checkpointer are launched, while the startup process continues applying WAL.
- * If Hot Standby is enabled, then, after reaching a consistent point in WAL
- * redo, startup process signals us again, and we switch to PM_HOT_STANDBY
- * state and begin accepting connections to perform read-only queries.  When
- * archive recovery is finished, the startup process exits with exit code 0
- * and we switch to PM_RUN state.
+ * checkpointer are already running (as these are launched during PM_STARTUP),
+ * and the startup process continues applying WAL.  If Hot Standby is enabled,
+ * then, after reaching a consistent point in WAL redo, startup process
+ * signals us again, and we switch to PM_HOT_STANDBY state and begin accepting
+ * connections to perform read-only queries.  When archive recovery is
+ * finished, the startup process exits with exit code 0 and we switch to
+ * PM_RUN state.
   *
   * Normal child backends can only be launched when we are in PM_RUN or
   * PM_HOT_STANDBY state.  (connsAllowed can also restrict launching.)
@@ -2305,29 +2306,13 @@ process_pm_child_exit(void)
                         }
  
                         /*
-                        * Unexpected exit of startup process (including FATAL exit)
-                        * during PM_STARTUP is treated as catastrophic. There are no
-                        * other processes running yet, so we can just exit.
-                        */
-                       if (pmState == PM_STARTUP &&
-                               StartupStatus != STARTUP_SIGNALED &&
-                               !EXIT_STATUS_0(exitstatus))
-                       {
-                               LogChildExit(LOG, _("startup process"),
-                                                        pid, exitstatus);
-                               ereport(LOG,
-                                               (errmsg("aborting startup due to startup process failure")));
-                               ExitPostmaster(1);
-                       }
-
-                       /*
-                        * After PM_STARTUP, any unexpected exit (including FATAL exit) of
-                        * the startup process is catastrophic, so kill other children,
-                        * and set StartupStatus so we don't try to reinitialize after
-                        * they're gone.  Exception: if StartupStatus is STARTUP_SIGNALED,
-                        * then we previously sent the startup process a SIGQUIT; so
-                        * that's probably the reason it died, and we do want to try to
-                        * restart in that case.
+                        * Any unexpected exit (including FATAL exit) of the startup
+                        * process is catastrophic, so kill other children, and set
+                        * StartupStatus so we don't try to reinitialize after they're
+                        * gone.  Exception: if StartupStatus is STARTUP_SIGNALED, then we
+                        * previously sent the startup process a SIGQUIT; so that's
+                        * probably the reason it died, and we do want to try to restart
+                        * in that case.
                          *
                          * This stanza also handles the case where we sent a SIGQUIT
                          * during PM_STARTUP due to some dead-end child crashing: in that
@@ -2780,12 +2765,9 @@ HandleFatalError(QuitSignalReason reason, bool consider_sigabrt)
                         /* shouldn't have any children */
                         Assert(false);
                         break;
-               case PM_STARTUP:
-                       /* should have been handled in process_pm_child_exit */
-                       Assert(false);
-                       break;
  
                         /* wait for children to die */
+               case PM_STARTUP:
                 case PM_RECOVERY:
                 case PM_HOT_STANDBY:
                 case PM_RUN:
author	Michael Paquier <michael@paquier.xyz>
	Tue, 21 Apr 2026 00:39:59 +0000 (09:39 +0900)
committer	Michael Paquier <michael@paquier.xyz>
	Tue, 21 Apr 2026 00:39:59 +0000 (09:39 +0900)