From: Daniel Gustafsson Date: Thu, 30 Apr 2026 11:41:46 +0000 (+0200) Subject: Handle data_checksum state changes during launcher_exit X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=8fb8ded8895620809bb71188476fbc2aec0f419d;p=thirdparty%2Fpostgresql.git Handle data_checksum state changes during launcher_exit When erroring out from the datachecksums launcher during data checksum enabling, before state has transitioned to "on", we revert back to the "off" state. Since checksums weren't enabled, there is no use staying in an inprogress state since the checksum launcher currently doesn't support restarting from where it left off. Should restartability get added in the future, this would need to be revisited. This state transition was however missing from the allowed transitions in the statemachine causing an error. Author: Daniel Gustafsson Reviewed-by: Tomas Vondra Reviewed-by: Ayush Tiwari Reviewed-by: SATYANARAYANA NARLAPURAM Discussion: https://postgr.es/m/9197F930-DDEB-4CAC-82A2-16FEC715CCE8@yesql.se --- diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index e39af79c03b..f74d7a2ab1a 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -4871,13 +4871,14 @@ SetDataChecksumsOff(void) } /* - * If data checksums are currently enabled we first transition to the - * "inprogress-off" state during which backends continue to write - * checksums without verifying them. When all backends are in - * "inprogress-off" the next transition to "off" can be performed, after - * which all data checksum processing is disabled. - */ - if (XLogCtl->data_checksum_version == PG_DATA_CHECKSUM_VERSION) + * If data checksums are currently enabled, or in the process of being + * enabled, we first transition to the "inprogress-off" state during which + * backends continue to write checksums without verifying them. When all + * backends are in "inprogress-off" the next transition to "off" can be + * performed, after which all data checksum processing is disabled. + */ + if (XLogCtl->data_checksum_version == PG_DATA_CHECKSUM_VERSION || + XLogCtl->data_checksum_version == PG_DATA_CHECKSUM_INPROGRESS_ON) { SpinLockRelease(&XLogCtl->info_lck); diff --git a/src/backend/postmaster/datachecksum_state.c b/src/backend/postmaster/datachecksum_state.c index ea102086144..77d0316b5cb 100644 --- a/src/backend/postmaster/datachecksum_state.c +++ b/src/backend/postmaster/datachecksum_state.c @@ -235,7 +235,7 @@ typedef struct ChecksumBarrierCondition int to; } ChecksumBarrierCondition; -static const ChecksumBarrierCondition checksum_barriers[6] = +static const ChecksumBarrierCondition checksum_barriers[7] = { /* * Disabling checksums: If checksums are currently enabled, disabling must @@ -261,6 +261,12 @@ static const ChecksumBarrierCondition checksum_barriers[6] = * checksums, we can go straight back to 'on' */ {PG_DATA_CHECKSUM_INPROGRESS_OFF, PG_DATA_CHECKSUM_VERSION}, + + /* + * If checksums are being enabled when launcher_exit is executed, state + * is set to off since we cannot reach on at that point. + */ + {PG_DATA_CHECKSUM_INPROGRESS_ON, PG_DATA_CHECKSUM_INPROGRESS_OFF}, }; /* @@ -771,7 +777,9 @@ ProcessDatabase(DataChecksumsWorkerDatabase *db) pid_t pid; char activity[NAMEDATALEN + 64]; + LWLockAcquire(DataChecksumsWorkerLock, LW_EXCLUSIVE); DataChecksumState->success = DATACHECKSUMSWORKER_FAILED; + LWLockRelease(DataChecksumsWorkerLock); memset(&bgw, 0, sizeof(bgw)); bgw.bgw_flags = BGWORKER_SHMEM_ACCESS | BGWORKER_BACKEND_DATABASE_CONNECTION; @@ -881,10 +889,12 @@ ProcessDatabase(DataChecksumsWorkerDatabase *db) /* * launcher_exit * - * Internal routine for cleaning up state when the launcher process exits. We - * need to clean up the abort flag to ensure that processing started again if - * it was previously aborted (note: started again, *not* restarted from where - * it left off). + * Internal routine for cleaning up state when a launcher process which has + * performed checksum operations exits. A launcher process which is exiting due + * to a duplicate started launcher does not need to perform any cleanup and + * this function should not be called. Otherwise, we need to clean up the abort + * flag to ensure that processing started again if it was previously aborted + * (note: started again, *not* restarted from where it left off). */ static void launcher_exit(int code, Datum arg) @@ -1016,7 +1026,6 @@ WaitForAllTransactionsToFinish(void) void DataChecksumsWorkerLauncherMain(Datum arg) { - on_shmem_exit(launcher_exit, 0); ereport(DEBUG1, errmsg("background worker \"datachecksums launcher\" started")); @@ -1044,6 +1053,7 @@ DataChecksumsWorkerLauncherMain(Datum arg) return; } + on_shmem_exit(launcher_exit, 0); launcher_running = true; /* Initialize a connection to shared catalogs only */