]> git.ipfire.org Git - thirdparty/postgresql.git/commitdiff
Handle data_checksum state changes during launcher_exit
authorDaniel Gustafsson <dgustafsson@postgresql.org>
Thu, 30 Apr 2026 11:41:46 +0000 (13:41 +0200)
committerDaniel Gustafsson <dgustafsson@postgresql.org>
Thu, 30 Apr 2026 11:41:46 +0000 (13:41 +0200)
When erroring out from the datachecksums launcher during data checksum
enabling, before state has transitioned to "on", we revert back to the
"off" state.  Since checksums weren't enabled, there is no use staying
in an inprogress state since the checksum launcher currently doesn't
support restarting from where it left off.  Should restartability get
added in the future, this would need to be revisited.  This state
transition was however missing from the allowed transitions in the
statemachine causing an error.

Author: Daniel Gustafsson <daniel@yesql.se>
Reviewed-by: Tomas Vondra <tomas@vondra.me>
Reviewed-by: Ayush Tiwari <ayushtiwari.slg01@gmail.com>
Reviewed-by: SATYANARAYANA NARLAPURAM <satyanarlapuram@gmail.com>
Discussion: https://postgr.es/m/9197F930-DDEB-4CAC-82A2-16FEC715CCE8@yesql.se

src/backend/access/transam/xlog.c
src/backend/postmaster/datachecksum_state.c

index e39af79c03b5e3dc23a7a2235819b586c9a069a3..f74d7a2ab1a282aa8a5402daece40698c0aeeedc 100644 (file)
@@ -4871,13 +4871,14 @@ SetDataChecksumsOff(void)
        }
 
        /*
-        * If data checksums are currently enabled we first transition to the
-        * "inprogress-off" state during which backends continue to write
-        * checksums without verifying them. When all backends are in
-        * "inprogress-off" the next transition to "off" can be performed, after
-        * which all data checksum processing is disabled.
-        */
-       if (XLogCtl->data_checksum_version == PG_DATA_CHECKSUM_VERSION)
+        * If data checksums are currently enabled, or in the process of being
+        * enabled, we first transition to the "inprogress-off" state during which
+        * backends continue to write checksums without verifying them. When all
+        * backends are in "inprogress-off" the next transition to "off" can be
+        * performed, after which all data checksum processing is disabled.
+        */
+       if (XLogCtl->data_checksum_version == PG_DATA_CHECKSUM_VERSION ||
+               XLogCtl->data_checksum_version == PG_DATA_CHECKSUM_INPROGRESS_ON)
        {
                SpinLockRelease(&XLogCtl->info_lck);
 
index ea10208614472362b23f78c77249ae74111e83e3..77d0316b5cbd51d8f98253ff4c63026ca9cf89de 100644 (file)
@@ -235,7 +235,7 @@ typedef struct ChecksumBarrierCondition
        int                     to;
 } ChecksumBarrierCondition;
 
-static const ChecksumBarrierCondition checksum_barriers[6] =
+static const ChecksumBarrierCondition checksum_barriers[7] =
 {
        /*
         * Disabling checksums: If checksums are currently enabled, disabling must
@@ -261,6 +261,12 @@ static const ChecksumBarrierCondition checksum_barriers[6] =
         * checksums, we can go straight back to 'on'
         */
        {PG_DATA_CHECKSUM_INPROGRESS_OFF, PG_DATA_CHECKSUM_VERSION},
+
+       /*
+        * If checksums are being enabled when launcher_exit is executed, state
+        * is set to off since we cannot reach on at that point.
+        */
+       {PG_DATA_CHECKSUM_INPROGRESS_ON, PG_DATA_CHECKSUM_INPROGRESS_OFF},
 };
 
 /*
@@ -771,7 +777,9 @@ ProcessDatabase(DataChecksumsWorkerDatabase *db)
        pid_t           pid;
        char            activity[NAMEDATALEN + 64];
 
+       LWLockAcquire(DataChecksumsWorkerLock, LW_EXCLUSIVE);
        DataChecksumState->success = DATACHECKSUMSWORKER_FAILED;
+       LWLockRelease(DataChecksumsWorkerLock);
 
        memset(&bgw, 0, sizeof(bgw));
        bgw.bgw_flags = BGWORKER_SHMEM_ACCESS | BGWORKER_BACKEND_DATABASE_CONNECTION;
@@ -881,10 +889,12 @@ ProcessDatabase(DataChecksumsWorkerDatabase *db)
 /*
  * launcher_exit
  *
- * Internal routine for cleaning up state when the launcher process exits. We
- * need to clean up the abort flag to ensure that processing started again if
- * it was previously aborted (note: started again, *not* restarted from where
- * it left off).
+ * Internal routine for cleaning up state when a launcher process which has
+ * performed checksum operations exits. A launcher process which is exiting due
+ * to a duplicate started launcher does not need to perform any cleanup and
+ * this function should not be called. Otherwise, we need to clean up the abort
+ * flag to ensure that processing started again if it was previously aborted
+ * (note: started again, *not* restarted from where it left off).
  */
 static void
 launcher_exit(int code, Datum arg)
@@ -1016,7 +1026,6 @@ WaitForAllTransactionsToFinish(void)
 void
 DataChecksumsWorkerLauncherMain(Datum arg)
 {
-       on_shmem_exit(launcher_exit, 0);
 
        ereport(DEBUG1,
                        errmsg("background worker \"datachecksums launcher\" started"));
@@ -1044,6 +1053,7 @@ DataChecksumsWorkerLauncherMain(Datum arg)
                return;
        }
 
+       on_shmem_exit(launcher_exit, 0);
        launcher_running = true;
 
        /* Initialize a connection to shared catalogs only */