From: Michael Paquier Date: Tue, 10 Mar 2026 03:00:05 +0000 (+0900) Subject: Switch to FATAL error for missing checkpoint record without backup_label X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=03facc1211b0ff1550f41bcd4da09329080c30f9;p=thirdparty%2Fpostgresql.git Switch to FATAL error for missing checkpoint record without backup_label Crash recovery started without a backup_label previously crashed with a PANIC if the checkpoint record could not be found. This commit lowers the report generated to be a FATAL instead. With recovery methods being more imaginative these days, this should provide more flexibility when handling PostgreSQL recovery processing in the event of a driver error, similarly to 15f68cebdcec. An extra benefit of this change is that it becomes possible to add a test to check that a FATAL is hit with an expected error message pattern. With the recovery code becoming more complicated over the last couple of years, I suspect that this will be benefitial to cover in the long-term. The original PANIC behavior has been introduced in the early days of crash recovery, as of 4d14fe0048cf (PANIC did not exist yet, the code used STOP). Author: Nitin Jadhav Discussion: https://postgr.es/m/CAMm1aWZbQ-Acp_xAxC7mX9uZZMH8+NpfepY9w=AOxbBVT9E=uA@mail.gmail.com --- diff --git a/src/backend/access/transam/xlogrecovery.c b/src/backend/access/transam/xlogrecovery.c index d55a534b138..6d2c4a86b96 100644 --- a/src/backend/access/transam/xlogrecovery.c +++ b/src/backend/access/transam/xlogrecovery.c @@ -735,7 +735,7 @@ InitWalRecovery(ControlFileData *ControlFile, bool *wasShutdown_ptr, * can't read the last checkpoint because this allows us to * simplify processing around checkpoints. */ - ereport(PANIC, + ereport(FATAL, errmsg("could not locate a valid checkpoint record at %X/%08X", LSN_FORMAT_ARGS(CheckPointLoc))); } diff --git a/src/test/recovery/meson.build b/src/test/recovery/meson.build index 8d20488952e..36d789720a3 100644 --- a/src/test/recovery/meson.build +++ b/src/test/recovery/meson.build @@ -60,6 +60,7 @@ tests += { 't/049_wait_for_lsn.pl', 't/050_redo_segment_missing.pl', 't/051_effective_wal_level.pl', + 't/052_checkpoint_segment_missing.pl', ], }, } diff --git a/src/test/recovery/t/052_checkpoint_segment_missing.pl b/src/test/recovery/t/052_checkpoint_segment_missing.pl new file mode 100644 index 00000000000..da54d141f0d --- /dev/null +++ b/src/test/recovery/t/052_checkpoint_segment_missing.pl @@ -0,0 +1,59 @@ +# Copyright (c) 2026, PostgreSQL Global Development Group +# +# Verify crash recovery behavior when the WAL segment containing the +# checkpoint record referenced by pg_controldata is missing. This +# checks the code path where there is no backup_label file, where the +# startup process should fail with FATAL and log a message about the +# missing checkpoint record. + +use strict; +use warnings FATAL => 'all'; +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; +use Test::More; + +my $node = PostgreSQL::Test::Cluster->new('testnode'); +$node->init; +$node->append_conf('postgresql.conf', 'log_checkpoints = on'); +$node->start; + +# Force a checkpoint so as pg_controldata points to a checkpoint record we +# can target. +$node->safe_psql('postgres', 'CHECKPOINT;'); + +# Retrieve the checkpoint LSN and derive the WAL segment name. +my $checkpoint_walfile = $node->safe_psql('postgres', + "SELECT pg_walfile_name(checkpoint_lsn) FROM pg_control_checkpoint()"); + +ok($checkpoint_walfile ne '', + "derived checkpoint WAL file name: $checkpoint_walfile"); + +# Stop the node. +$node->stop('immediate'); + +# Remove the WAL segment containing the checkpoint record. +my $walpath = $node->data_dir . "/pg_wal/$checkpoint_walfile"; +ok(-f $walpath, "checkpoint WAL file exists before deletion: $walpath"); + +unlink $walpath + or die "could not remove WAL file $walpath: $!"; + +ok(!-e $walpath, "checkpoint WAL file removed: $walpath"); + +# Use run_log instead of node->start because this test expects that +# the server ends with an error during recovery. +run_log( + [ + 'pg_ctl', + '--pgdata' => $node->data_dir, + '--log' => $node->logfile, + 'start', + ]); + +# Confirm that recovery has failed as expected. +my $logfile = slurp_file($node->logfile()); +ok( $logfile =~ + qr/FATAL: .* could not locate a valid checkpoint record at .*/, + "FATAL logged for missing checkpoint record (no backup_label path)"); + +done_testing();