From: Fujii Masao Date: Mon, 6 Apr 2026 02:35:03 +0000 (+0900) Subject: Add wal_sender_shutdown_timeout GUC to limit shutdown wait for replication X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=a8f45dee91768cf1447ffaf2527e499e75a194c3;p=thirdparty%2Fpostgresql.git Add wal_sender_shutdown_timeout GUC to limit shutdown wait for replication Previously, during shutdown, walsenders always waited until all pending data was replicated to receivers. This ensures sender and receiver stay in sync after shutdown, which is important for physical replication switchovers, but it can significantly delay shutdown. For example, in logical replication, if apply workers are blocked on locks, walsenders may wait until those locks are released, preventing shutdown from completing for a long time. This commit introduces a new GUC, wal_sender_shutdown_timeout, which specifies the maximum time a walsender waits during shutdown for all pending data to be replicated. When set, shutdown completes once all data is replicated or the timeout expires. A value of -1 (the default) disables the timeout. This can reduce shutdown time when replication is slow or stalled. However, if the timeout is reached, the sender and receiver may be left out of sync, which can be problematic for physical replication switchovers. Author: Andrey Silitskiy Author: Hayato Kuroda Reviewed-by: Ashutosh Bapat Reviewed-by: Kyotaro Horiguchi Reviewed-by: Amit Kapila Reviewed-by: Dilip Kumar Reviewed-by: Masahiko Sawada Reviewed-by: Andres Freund Reviewed-by: Takamichi Osumi Reviewed-by: Peter Smith Reviewed-by: Greg Sabino Mullane Reviewed-by: Alexander Korotkov Reviewed-by: Vitaly Davydov Reviewed-by: Ronan Dunklau Reviewed-by: Michael Paquier Reviewed-by: Japin Li Reviewed-by: Fujii Masao Discussion: https://postgr.es/m/TYAPR01MB586668E50FC2447AD7F92491F5E89@TYAPR01MB5866.jpnprd01.prod.outlook.com --- diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index d3fea738ca3..b44231a362d 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -4791,6 +4791,48 @@ restore_command = 'copy "C:\\server\\archivedir\\%f" "%p"' # Windows + + wal_sender_shutdown_timeout (integer) + + wal_sender_shutdown_timeout configuration parameter + + + + + Specifies the maximum time the server waits during shutdown for all + WAL data to be replicated to the receiver. If this value is specified + without units, it is taken as milliseconds. A value of + -1 (the default) disables the timeout mechanism. + + + When replication is in use, the sending server normally waits until + all WAL data has been transferred to the receiver before completing + shutdown. This helps keep sender and receiver in sync after shutdown, + which is especially important for physical replication switchovers, + but it can delay shutdown. + + + If this parameter is set, the server stops waiting and completes + shutdown when the timeout expires. This can shorten shutdown time, + for example, when replication is slow on high-latency networks or + when a logical replication apply worker is blocked waiting for locks. + However, in this case the sender and receiver may be out of sync after + shutdown. + + + This parameter can be set in primary_conninfo and + in the CONNECTION clause of + CREATE SUBSCRIPTION (for example, include + options=-cwal_sender_shutdown_timeout=10s in the + connection string), allowing different timeouts per replication + connection. For example, when both physical and logical replication + are used, it can be disabled for physical replication (e.g., for + switchovers) while enabled for logical replication to limit shutdown + time. + + + + track_commit_timestamp (boolean) diff --git a/doc/src/sgml/high-availability.sgml b/doc/src/sgml/high-availability.sgml index c3f269e0364..be8d3a5bfea 100644 --- a/doc/src/sgml/high-availability.sgml +++ b/doc/src/sgml/high-availability.sgml @@ -1190,10 +1190,12 @@ primary_slot_name = 'node_a_slot' - Users will stop waiting if a fast shutdown is requested. However, as - when using asynchronous replication, the server will not fully - shutdown until all outstanding WAL records are transferred to the currently - connected standby servers. + Users will stop waiting if a fast shutdown is requested. However, when + using replication, the server will not fully shutdown until all + outstanding WAL records are transferred to the currently connected + standby servers, or + (if set) expires, regardless of whether replication is synchronous or + asynchronous. diff --git a/src/backend/replication/walsender.c b/src/backend/replication/walsender.c index ec39942bfc1..b4a2117a7f9 100644 --- a/src/backend/replication/walsender.c +++ b/src/backend/replication/walsender.c @@ -35,6 +35,8 @@ * checkpoint finishes, the postmaster sends us SIGUSR2. This instructs * walsender to send any outstanding WAL, including the shutdown checkpoint * record, wait for it to be replicated to the standby, and then exit. + * This waiting time can be limited by the wal_sender_shutdown_timeout + * parameter. * * * Portions Copyright (c) 2010-2026, PostgreSQL Global Development Group @@ -140,6 +142,11 @@ int max_wal_senders = 10; /* the maximum number of concurrent * walsenders */ int wal_sender_timeout = 60 * 1000; /* maximum time to send one WAL * data message */ + +int wal_sender_shutdown_timeout = -1; /* maximum time to wait during + * shutdown for WAL + * replication */ + bool log_replication_commands = false; /* @@ -199,6 +206,9 @@ static TimestampTz last_reply_timestamp = 0; /* Have we sent a heartbeat message asking for reply, since last reply? */ static bool waiting_for_ping_response = false; +/* Timestamp when walsender received the shutdown request */ +static TimestampTz shutdown_request_timestamp = 0; + /* * While streaming WAL in Copy mode, streamingDoneSending is set to true * after we have sent CopyDone. We should not send any more CopyData messages @@ -272,6 +282,7 @@ static void WalSndKill(int code, Datum arg); pg_noreturn static void WalSndShutdown(void); static void XLogSendPhysical(void); static void XLogSendLogical(void); +pg_noreturn static void WalSndDoneImmediate(void); static void WalSndDone(WalSndSendDataCallback send_data); static void IdentifySystem(void); static void UploadManifest(void); @@ -291,6 +302,7 @@ static void ProcessPendingWrites(void); static void WalSndKeepalive(bool requestReply, XLogRecPtr writePtr); static void WalSndKeepaliveIfNecessary(void); static void WalSndCheckTimeOut(void); +static void WalSndCheckShutdownTimeout(void); static long WalSndComputeSleeptime(TimestampTz now); static void WalSndWait(uint32 socket_events, long timeout, uint32 wait_event); static void WalSndPrepareWrite(LogicalDecodingContext *ctx, XLogRecPtr lsn, TransactionId xid, bool last_write); @@ -1669,6 +1681,13 @@ ProcessPendingWrites(void) /* die if timeout was reached */ WalSndCheckTimeOut(); + /* + * During shutdown, die if the shutdown timeout expires. Call this + * before WalSndComputeSleeptime() so the timeout is considered when + * computing sleep time. + */ + WalSndCheckShutdownTimeout(); + /* Send keepalive if the time has come */ WalSndKeepaliveIfNecessary(); @@ -1984,6 +2003,13 @@ WalSndWaitForWal(XLogRecPtr loc) /* die if timeout was reached */ WalSndCheckTimeOut(); + /* + * During shutdown, die if the shutdown timeout expires. Call this + * before WalSndComputeSleeptime() so the timeout is considered when + * computing sleep time. + */ + WalSndCheckShutdownTimeout(); + /* Send keepalive if the time has come */ WalSndKeepaliveIfNecessary(); @@ -2843,16 +2869,18 @@ ProcessStandbyPSRequestMessage(void) * If wal_sender_timeout is enabled we want to wake up in time to send * keepalives and to abort the connection if wal_sender_timeout has been * reached. + * + * If wal_sender_shutdown_timeout is enabled, during shutdown, we want to + * wake up in time to exit when it expires. */ static long WalSndComputeSleeptime(TimestampTz now) { + TimestampTz wakeup_time; long sleeptime = 10000; /* 10 s */ if (wal_sender_timeout > 0 && last_reply_timestamp > 0) { - TimestampTz wakeup_time; - /* * At the latest stop sleeping once wal_sender_timeout has been * reached. @@ -2873,6 +2901,20 @@ WalSndComputeSleeptime(TimestampTz now) sleeptime = TimestampDifferenceMilliseconds(now, wakeup_time); } + if (shutdown_request_timestamp != 0 && wal_sender_shutdown_timeout > 0) + { + long shutdown_sleeptime; + + wakeup_time = TimestampTzPlusMilliseconds(shutdown_request_timestamp, + wal_sender_shutdown_timeout); + + shutdown_sleeptime = TimestampDifferenceMilliseconds(now, wakeup_time); + + /* Choose the earliest wakeup. */ + if (shutdown_sleeptime < sleeptime) + sleeptime = shutdown_sleeptime; + } + return sleeptime; } @@ -2914,6 +2956,45 @@ WalSndCheckTimeOut(void) } } +/* + * Check whether the walsender process should terminate due to the expiration + * of wal_sender_shutdown_timeout after the receipt of a shutdown request. + */ +static void +WalSndCheckShutdownTimeout(void) +{ + TimestampTz now; + + /* Do nothing if shutdown has not been requested yet */ + if (!(got_STOPPING || got_SIGUSR2)) + return; + + /* Terminate immediately if the timeout is set to 0 */ + if (wal_sender_shutdown_timeout == 0) + WalSndDoneImmediate(); + + /* + * Record the shutdown request timestamp even if + * wal_sender_shutdown_timeout is disabled (-1), since the setting may + * change during shutdown and the timestamp will be needed in that case. + */ + if (shutdown_request_timestamp == 0) + { + shutdown_request_timestamp = GetCurrentTimestamp(); + return; + } + + /* Do not check the timeout if it's disabled */ + if (wal_sender_shutdown_timeout == -1) + return; + + /* Terminate immediately if the timeout expires */ + now = GetCurrentTimestamp(); + if (TimestampDifferenceExceeds(shutdown_request_timestamp, now, + wal_sender_shutdown_timeout)) + WalSndDoneImmediate(); +} + /* Main loop of walsender process that streams the WAL over Copy messages. */ static void WalSndLoop(WalSndSendDataCallback send_data) @@ -3001,6 +3082,13 @@ WalSndLoop(WalSndSendDataCallback send_data) /* Check for replication timeout. */ WalSndCheckTimeOut(); + /* + * During shutdown, die if the shutdown timeout expires. Call this + * before WalSndComputeSleeptime() so the timeout is considered when + * computing sleep time. + */ + WalSndCheckShutdownTimeout(); + /* Send keepalive if the time has come */ WalSndKeepaliveIfNecessary(); @@ -3616,6 +3704,49 @@ XLogSendLogical(void) } } +/* + * Forced shutdown of walsender if wal_sender_shutdown_timeout has expired. + */ +static void +WalSndDoneImmediate(void) +{ + WalSndState state = MyWalSnd->state; + + if (state == WALSNDSTATE_CATCHUP || + state == WALSNDSTATE_STREAMING || + state == WALSNDSTATE_STOPPING) + { + QueryCompletion qc; + + /* Try to inform receiver that XLOG streaming is done */ + SetQueryCompletion(&qc, CMDTAG_COPY, 0); + EndCommand(&qc, DestRemote, false); + + /* + * Note that the output buffer may be full during the forced shutdown + * of walsender. If pq_flush() is called at that time, the walsender + * process will be stuck. Therefore, call pq_flush_if_writable() + * instead. Successful reception of the done message with the + * walsender forced into a shutdown is not guaranteed. + */ + pq_flush_if_writable(); + } + + /* + * Prevent ereport from attempting to send any more messages to the + * standby. Otherwise, it can cause the process to get stuck if the output + * buffers are full. + */ + if (whereToSendOutput == DestRemote) + whereToSendOutput = DestNone; + + ereport(WARNING, + (errmsg("terminating walsender process due to replication shutdown timeout"), + errdetail("Walsender process might have been terminated before all WAL data was replicated to the receiver."))); + + proc_exit(0); +} + /* * Shutdown if the sender is caught up. * diff --git a/src/backend/utils/misc/guc_parameters.dat b/src/backend/utils/misc/guc_parameters.dat index a315c4ab8ab..7a8a5d0764c 100644 --- a/src/backend/utils/misc/guc_parameters.dat +++ b/src/backend/utils/misc/guc_parameters.dat @@ -3504,6 +3504,16 @@ check_hook => 'check_wal_segment_size', }, +{ name => 'wal_sender_shutdown_timeout', type => 'int', context => 'PGC_USERSET', group => 'REPLICATION_SENDING', + short_desc => 'Sets the maximum time the server waits during shutdown for all WAL data to be replicated to the receiver.', + long_desc => '-1 disables the timeout', + flags => 'GUC_UNIT_MS', + variable => 'wal_sender_shutdown_timeout', + boot_val => '-1', + min => '-1', + max => 'INT_MAX', +}, + { name => 'wal_sender_timeout', type => 'int', context => 'PGC_USERSET', group => 'REPLICATION_SENDING', short_desc => 'Sets the maximum time to wait for WAL replication.', flags => 'GUC_UNIT_MS', diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index 6d0337853e0..10a281dfd4b 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -352,6 +352,7 @@ #max_slot_wal_keep_size = -1 # in megabytes; -1 disables #idle_replication_slot_timeout = 0 # in seconds; 0 disables #wal_sender_timeout = 60s # in milliseconds; 0 disables +#wal_sender_shutdown_timeout = -1 # in milliseconds; -1 disables #track_commit_timestamp = off # collect timestamp of transaction commit # (change requires restart) diff --git a/src/include/replication/walsender.h b/src/include/replication/walsender.h index 8952c848d19..386cedfc7aa 100644 --- a/src/include/replication/walsender.h +++ b/src/include/replication/walsender.h @@ -33,6 +33,7 @@ extern PGDLLIMPORT bool wake_wal_senders; /* user-settable parameters */ extern PGDLLIMPORT int max_wal_senders; extern PGDLLIMPORT int wal_sender_timeout; +extern PGDLLIMPORT int wal_sender_shutdown_timeout; extern PGDLLIMPORT bool log_replication_commands; extern void InitWalSender(void); diff --git a/src/test/subscription/meson.build b/src/test/subscription/meson.build index f4a9cf5057f..e71e95c6297 100644 --- a/src/test/subscription/meson.build +++ b/src/test/subscription/meson.build @@ -47,6 +47,7 @@ tests += { 't/035_conflicts.pl', 't/036_sequences.pl', 't/037_except.pl', + 't/038_walsnd_shutdown_timeout.pl', 't/100_bugs.pl', ], }, diff --git a/src/test/subscription/t/038_walsnd_shutdown_timeout.pl b/src/test/subscription/t/038_walsnd_shutdown_timeout.pl new file mode 100644 index 00000000000..f4ed5d97852 --- /dev/null +++ b/src/test/subscription/t/038_walsnd_shutdown_timeout.pl @@ -0,0 +1,201 @@ + +# Copyright (c) 2026, PostgreSQL Global Development Group + +# Checks that the publisher is able to shut down without +# waiting for sending of all pending data to the subscriber +# with wal_sender_shutdown_timeout set +use strict; +use warnings FATAL => 'all'; +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; +use Test::More; +use Time::HiRes qw(usleep); + +# Initialize publisher node +my $node_publisher = PostgreSQL::Test::Cluster->new('publisher'); +$node_publisher->init(allows_streaming => 'logical'); +$node_publisher->append_conf( + 'postgresql.conf', + qq(wal_sender_timeout = 1h + wal_sender_shutdown_timeout = 10ms)); +$node_publisher->start; + +# Initialize subscriber node +my $node_subscriber = PostgreSQL::Test::Cluster->new('subscriber'); +$node_subscriber->init; +$node_subscriber->start; + +# Create publication for test table +$node_publisher->safe_psql( + 'postgres', qq( + CREATE TABLE test_tab (id int PRIMARY KEY); + CREATE PUBLICATION test_pub FOR TABLE test_tab; +)); + +# Create matching table and subscription on subscriber +my $publisher_connstr = $node_publisher->connstr . ' dbname=postgres'; +$node_subscriber->safe_psql( + 'postgres', qq( + CREATE TABLE test_tab (id int PRIMARY KEY); + CREATE SUBSCRIPTION test_sub CONNECTION '$publisher_connstr' PUBLICATION test_pub WITH (failover = true); +)); + +# Wait for initial table sync to finish +$node_subscriber->wait_for_subscription_sync($node_publisher, 'test_sub'); + +# Start a background session on the subscriber to run a transaction later +# that will block the logical apply worker on a lock. +my $sub_session = $node_subscriber->background_psql('postgres'); + +# Test that when the logical apply worker is blocked on a lock and replication +# is stalled, shutting down the publisher causes the logical walsender to exit +# due to wal_sender_shutdown_timeout, allowing shutdown to complete. + +# Cause the logical apply worker to block on a lock by running conflicting +# transactions on the publisher and subscriber. +$sub_session->query_safe("BEGIN; INSERT INTO test_tab VALUES (0);"); +$node_publisher->safe_psql('postgres', "INSERT INTO test_tab VALUES (0);"); + +my $log_offset = -s $node_publisher->logfile; + +# Verify that the walsender exits due to wal_sender_shutdown_timeout. +$node_publisher->stop('fast'); +ok( $node_publisher->log_contains( + qr/WARNING: .* terminating walsender process due to replication shutdown timeout/, + $log_offset), + "walsender exits due to wal_sender_shutdown_timeout"); + +$sub_session->query_safe("ABORT;"); +$node_publisher->start; +$node_publisher->wait_for_catchup('test_sub'); + +# Test that when the logical apply worker is blocked on a lock, replication +# is stalled, and the logical walsender's output buffer is full, shutting down +# the publisher causes the walsender to exit due to +# wal_sender_shutdown_timeout, allowing shutdown to complete. +# +# This test differs from the previous one in that the walsender's output +# buffer is full (because pending data cannot be transferred). + +# Run a transaction on the subscriber that blocks the logical apply worker +# on a lock. +$sub_session->query_safe("BEGIN; LOCK TABLE test_tab IN EXCLUSIVE MODE;"); + +# Generate enough data to fill the logical walsender's output buffer. +$node_publisher->safe_psql('postgres', + "INSERT INTO test_tab VALUES (generate_series(1, 20000));"); + +# Wait for the logical walsender's output buffer to fill. If the WAL send +# positions do not advance between checks, treat the buffer as full. +my $last_sent_lsn = $node_publisher->safe_psql('postgres', + "SELECT sent_lsn FROM pg_stat_replication WHERE application_name = 'test_sub';" +); + +my $max_attempts = $PostgreSQL::Test::Utils::timeout_default * 10; +while ($max_attempts-- >= 0) +{ + usleep(100_000); + + my $cur_sent_lsn = $node_publisher->safe_psql('postgres', + "SELECT sent_lsn FROM pg_stat_replication WHERE application_name = 'test_sub';" + ); + + my $diff = $node_publisher->safe_psql('postgres', + "SELECT pg_wal_lsn_diff('$cur_sent_lsn', '$last_sent_lsn');"); + last if $diff == 0; + + $last_sent_lsn = $cur_sent_lsn; +} + +$log_offset = -s $node_publisher->logfile; + +# Verify that the walsender exits due to wal_sender_shutdown_timeout. +$node_publisher->stop('fast'); +ok( $node_publisher->log_contains( + qr/WARNING: .* terminating walsender process due to replication shutdown timeout/, + $log_offset), + "walsender with full output buffer exits due to wal_sender_shutdown_timeout" +); + +$sub_session->query_safe("ABORT;"); + +# The next test depends on Perl's `kill`, which apparently is not +# portable to Windows. (It would be nice to use Test::More's `subtest`, +# but that's not in the ancient version we require.) +if ($PostgreSQL::Test::Utils::windows_os) +{ + $node_subscriber->stop('fast'); + done_testing(); + exit; +} + +$node_publisher->start; + +# Test that wal_sender_shutdown_timeout works correctly when both physical +# and logical replication are active, and slot synchronization is running on +# the standby. +# +# In this scenario, the logical apply worker is blocked on a lock and +# the standby's walreceiver is stopped (via SIGSTOP signal), stalling both +# replication streams. Verify that shutting down the publisher (primary) +# causes both physical and logical walsenders to exit due to +# wal_sender_shutdown_timeout, allowing shutdown to complete. +# +# Skip this test on Windows. + +# Create the standby with slot synchronization enabled. +$node_publisher->backup( + 'publisher_backup', + backup_options => [ + '--create-slot', '--slot', + 'test_slot', '-d', + 'dbname=postgres', '--write-recovery-conf' + ]); + +$node_publisher->append_conf('postgresql.conf', + "synchronized_standby_slots = 'test_slot'"); +$node_publisher->reload; + +my $node_standby = PostgreSQL::Test::Cluster->new('standby'); +$node_standby->init_from_backup($node_publisher, 'publisher_backup'); +$node_standby->append_conf( + 'postgresql.conf', + qq(sync_replication_slots = on + hot_standby_feedback = on)); +$node_standby->start; + +# Cause the logical apply worker to block on a lock by running conflicting +# transactions on the publisher and subscriber, stalling logical replication. +$node_publisher->wait_for_catchup('test_sub'); +$sub_session->query_safe("BEGIN; LOCK TABLE test_tab IN EXCLUSIVE MODE;"); +$node_publisher->safe_psql('postgres', "INSERT INTO test_tab VALUES (-1); "); + +# Cause the standby's walreceiver to be blocked with SIGSTOP signal, +# stalling physical replication. +$node_standby->poll_query_until('postgres', + "SELECT EXISTS(SELECT 1 FROM pg_stat_wal_receiver)"); +my $receiverpid = $node_standby->safe_psql('postgres', + "SELECT pid FROM pg_stat_wal_receiver"); +like($receiverpid, qr/^[0-9]+$/, "have walreceiver pid $receiverpid"); +kill 'STOP', $receiverpid; + +$log_offset = -s $node_publisher->logfile; + +# Verify that the walsender exits due to wal_sender_shutdown_timeout +# even when both physical and logical replication are stalled. +# wal_sender_shutdown_timeout. +$node_publisher->safe_psql('postgres', "INSERT INTO test_tab VALUES (-2);"); +$node_publisher->stop('fast'); +ok( $node_publisher->log_contains( + qr/WARNING: .* terminating walsender process due to replication shutdown timeout/, + $log_offset), + "walsender exits due to wal_sender_shutdown_timeout even when both physical and logical replication are stalled" +); + +kill 'CONT', $receiverpid; +$sub_session->quit; + +$node_subscriber->stop('fast'); +$node_standby->stop('fast'); + +done_testing();