From: Amit Kapila Date: Wed, 24 Dec 2025 03:53:42 +0000 (+0000) Subject: Don't advance origin during apply failure. X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=refs%2Fheads%2FREL_16_STABLE;p=thirdparty%2Fpostgresql.git Don't advance origin during apply failure. The logical replication parallel apply worker could incorrectly advance the origin progress during an error or failed apply. This behavior risks transaction loss because such transactions will not be resent by the server. Commit 3f28b2fcac addressed a similar issue for both the apply worker and the table sync worker by registering a before_shmem_exit callback to reset origin information. This prevents the worker from advancing the origin during transaction abortion on shutdown. This patch applies the same fix to the parallel apply worker, ensuring consistent behavior across all worker types. As with 3f28b2fcac, we are backpatching through version 16, since parallel apply mode was introduced there and the issue only occurs when changes are applied before the transaction end record (COMMIT or ABORT) is received. Author: Hou Zhijie Reviewed-by: Chao Li Reviewed-by: Amit Kapila Backpatch-through: 16 Discussion: https://postgr.es/m/TY4PR01MB169078771FB31B395AB496A6B94B4A@TY4PR01MB16907.jpnprd01.prod.outlook.com Discussion: https://postgr.es/m/TYAPR01MB5692FAC23BE40C69DA8ED4AFF5B92@TYAPR01MB5692.jpnprd01.prod.outlook.com --- diff --git a/src/backend/replication/logical/worker.c b/src/backend/replication/logical/worker.c index 9b5c641941f..d6bbffd7c8d 100644 --- a/src/backend/replication/logical/worker.c +++ b/src/backend/replication/logical/worker.c @@ -4544,6 +4544,23 @@ InitializeApplyWorker(void) MySubscription->name))); CommitTransactionCommand(); + + /* + * Register a callback to reset the origin state before aborting any + * pending transaction during shutdown (see ShutdownPostgres()). This will + * avoid origin advancement for an incomplete transaction which could + * otherwise lead to its loss as such a transaction won't be sent by the + * server again. + * + * Note that even a LOG or DEBUG statement placed after setting the origin + * state may process a shutdown signal before committing the current apply + * operation. So, it is important to register such a callback here. + * + * Register this callback here to ensure that all types of logical + * replication workers that set up origins and apply remote transactions + * are protected. + */ + before_shmem_exit(replorigin_reset, (Datum) 0); } /* Logical Replication Apply worker entry point */ @@ -4581,19 +4598,6 @@ ApplyWorkerMain(Datum main_arg) InitializeApplyWorker(); - /* - * Register a callback to reset the origin state before aborting any - * pending transaction during shutdown (see ShutdownPostgres()). This will - * avoid origin advancement for an in-complete transaction which could - * otherwise lead to its loss as such a transaction won't be sent by the - * server again. - * - * Note that even a LOG or DEBUG statement placed after setting the origin - * state may process a shutdown signal before committing the current apply - * operation. So, it is important to register such a callback here. - */ - before_shmem_exit(replorigin_reset, (Datum) 0); - InitializingApplyWorker = false; /* Connect to the origin and start the replication. */ diff --git a/src/test/subscription/t/023_twophase_stream.pl b/src/test/subscription/t/023_twophase_stream.pl index 0303807846e..39ad688a7bd 100644 --- a/src/test/subscription/t/023_twophase_stream.pl +++ b/src/test/subscription/t/023_twophase_stream.pl @@ -429,6 +429,51 @@ $result = $node_subscriber->safe_psql('postgres', "SELECT count(*) FROM test_tab_2"); is($result, qq(1), 'transaction is committed on subscriber'); +# Test the ability to re-apply a transaction when a parallel apply worker fails +# to prepare the transaction due to insufficient max_prepared_transactions +# setting. +$node_subscriber->append_conf( + 'postgresql.conf', qq( +max_prepared_transactions = 0 +debug_logical_replication_streaming = buffered +)); +$node_subscriber->restart; + +$node_publisher->safe_psql( + 'postgres', q{ + BEGIN; + INSERT INTO test_tab_2 values(2); + PREPARE TRANSACTION 'xact'; + COMMIT PREPARED 'xact'; + }); + +$offset = -s $node_subscriber->logfile; + +# Confirm the ERROR is reported because max_prepared_transactions is zero +$node_subscriber->wait_for_log( + qr/ERROR: ( [A-Z0-9]+:)? prepared transactions are disabled/, + $offset); + +# Confirm that the parallel apply worker has encountered an error. The check +# focuses on the worker type as a keyword, since the error message content may +# differ based on whether the leader initially detected the parallel apply +# worker's failure or received a signal from it. +$node_subscriber->wait_for_log( + qr/ERROR: .*logical replication parallel apply worker.*/, + $offset); + +# Set max_prepared_transactions to correct value to resume the replication +$node_subscriber->append_conf('postgresql.conf', + qq(max_prepared_transactions = 10)); +$node_subscriber->restart; + +$node_publisher->wait_for_catchup($appname); + +# Check that transaction is committed on subscriber +$result = + $node_subscriber->safe_psql('postgres', "SELECT count(*) FROM test_tab_2"); +is($result, qq(2), 'transaction is committed on subscriber after retrying'); + ############################### # check all the cleanup ###############################