]> git.ipfire.org Git - thirdparty/postgresql.git/commitdiff
Fix second race with timeline selection during promotion
authorMichael Paquier <michael@paquier.xyz>
Fri, 12 Jun 2026 02:44:11 +0000 (11:44 +0900)
committerMichael Paquier <michael@paquier.xyz>
Fri, 12 Jun 2026 02:44:11 +0000 (11:44 +0900)
read_local_xlog_page_guts has the same race as logical_read_xlog_page:
RecoveryInProgress() can return true during promotion, impacting the
availability of the operations doing WAL page reads with this callback.

This problem is similar to eb4e7224a1c6 that has addressed the issue for
logical replication, impacting more areas of the code where this WAL
page callback can be used (same narrow window during promotion, same
availability issue):
- pg_walinspect.
- Slot advance (SQL function).
- Slot creation.

Repack workers (v19~) and 2PC files (since forever) can also use this
callback, but they are irrelevant as far as I know.  A test is added
with the SQL lookup functions.  This part relies on injection points,
and is backpatched down to v18, like the test added for eb4e7224a1c6.

This issue could probably be fixed as well in v14 and v15 for
pg_walinspect.  However, I also feel that there is a conservative
argument about consistency here due to the support of logical decoding
on standbys, so let's limit ourselves to v16 for now.  pg_walinspect is
used less in the field compared to the two other operations, making
addressing this problem less attractive in these two older branches.

Reported-by: Xuneng Zhou <xunengzhou@gmail.com>
Author: Bertrand Drouvot <bertranddrouvot.pg@gmail.com>
Reviewed-by: Xuneng Zhou <xunengzhou@gmail.com>
Reviewed-by: Hayato Kuroda <kuroda.hayato@fujitsu.com>
Discussion: https://postgr.es/m/7daef094-abf3-4672-bc23-3df4763b16a3%40gmail.com
Backpatch-through: 16

src/backend/access/transam/xlogutils.c
src/test/recovery/t/035_standby_logical_decoding.pl

index 5fbe39133b80609b8c4c24bb128cfc1fad7c87c3..fdc341d8fa48bc20aaca28ddf66581cae57b6400 100644 (file)
@@ -896,7 +896,19 @@ read_local_xlog_page_guts(XLogReaderState *state, XLogRecPtr targetPagePtr,
                if (!RecoveryInProgress())
                        read_upto = GetFlushRecPtr(&currTLI);
                else
+               {
+                       TimeLineID      insertTLI;
+
                        read_upto = GetXLogReplayRecPtr(&currTLI);
+
+                       /*
+                        * If the insertion timeline has already been set, use it. See
+                        * logical_read_xlog_page() for details.
+                        */
+                       insertTLI = GetWALInsertionTimeLineIfSet();
+                       if (insertTLI != 0)
+                               currTLI = insertTLI;
+               }
                tli = currTLI;
 
                /*
index b3a5bb2694c3bbbd83d3708e10d7eb6d46caf8a6..88893f71350827426f460249900a5c560d6990b1 100644 (file)
@@ -1071,6 +1071,8 @@ is($cascading_stdout, $expected,
 # Create a logical slot on the cascading standby for this test.
 $node_cascading_standby->create_logical_slot_on_standby($node_standby,
        'race_slot', 'testdb');
+$node_cascading_standby->create_logical_slot_on_standby($node_standby,
+       'race_slot_sql', 'testdb');
 
 $node_standby->safe_psql('testdb',
        qq[INSERT INTO decoding_test(x,y) SELECT s, s::text FROM generate_series(10,13) s;]
@@ -1087,6 +1089,10 @@ COMMIT};
 $node_standby->safe_psql('testdb', 'CREATE EXTENSION injection_points;');
 $node_standby->wait_for_replay_catchup($node_cascading_standby);
 
+# Open a background psql session BEFORE promotion for the SQL decoding
+# test.
+my $decode_session = $node_cascading_standby->background_psql('testdb');
+
 # Attach injection point to pause startup after WAL segment cleanup
 # but before RecoveryInProgress() flips to false.
 $node_cascading_standby->safe_psql('testdb',
@@ -1125,6 +1131,13 @@ is($stdout2, $expected,
        'got expected output from pg_recvlogical during promotion timeline switch'
 );
 
+# Verify SQL decoding.
+my $sql_out = $decode_session->query_safe(
+       "SELECT data FROM pg_logical_slot_peek_changes('race_slot_sql', NULL, NULL, 'include-xids', '0', 'skip-empty-xacts', '1')"
+);
+is($sql_out, $expected,
+       'pg_logical_slot_peek_changes works during promotion timeline switch');
+
 # Resume promotion.
 $node_cascading_standby->safe_psql('testdb',
        "SELECT injection_points_wakeup('promotion-after-wal-segment-cleanup');");