]> git.ipfire.org Git - thirdparty/postgresql.git/commitdiff
Fix MVCC bug with prepared xact with subxacts on standby
authorHeikki Linnakangas <heikki.linnakangas@iki.fi>
Thu, 27 Jun 2024 18:06:32 +0000 (21:06 +0300)
committerHeikki Linnakangas <heikki.linnakangas@iki.fi>
Thu, 27 Jun 2024 18:10:31 +0000 (21:10 +0300)
We did not recover the subtransaction IDs of prepared transactions
when starting a hot standby from a shutdown checkpoint. As a result,
such subtransactions were considered as aborted, rather than
in-progress. That would lead to hint bits being set incorrectly, and
the subtransactions suddenly becoming visible to old snapshots when
the prepared transaction was committed.

To fix, update pg_subtrans with prepared transactions's subxids when
starting hot standby from a shutdown checkpoint. The snapshots taken
from that state need to be marked as "suboverflowed", so that we also
check the pg_subtrans.

Backport to all supported versions.

Discussion: https://www.postgresql.org/message-id/6b852e98-2d49-4ca1-9e95-db419a2696e0@iki.fi

src/backend/access/transam/twophase.c
src/backend/access/transam/xlog.c
src/backend/storage/ipc/procarray.c
src/backend/storage/ipc/standby.c
src/include/storage/standby.h
src/test/recovery/t/009_twophase.pl
src/tools/pgindent/typedefs.list

index ca7037eb2f987a52f93a8dc9dc898c62bffd4951..b420aef5abc7cf82a33373bd8a7932935ac8fb20 100644 (file)
@@ -2013,9 +2013,8 @@ PrescanPreparedTransactions(TransactionId **xids_p, int *nxids_p)
  * This is never called at the end of recovery - we use
  * RecoverPreparedTransactions() at that point.
  *
- * The lack of calls to SubTransSetParent() calls here is by design;
- * those calls are made by RecoverPreparedTransactions() at the end of recovery
- * for those xacts that need this.
+ * This updates pg_subtrans, so that any subtransactions will be correctly
+ * seen as in-progress in snapshots taken during recovery.
  */
 void
 StandbyRecoverPreparedTransactions(void)
@@ -2035,7 +2034,7 @@ StandbyRecoverPreparedTransactions(void)
 
                buf = ProcessTwoPhaseBuffer(xid,
                                                                        gxact->prepare_start_lsn,
-                                                                       gxact->ondisk, false, false);
+                                                                       gxact->ondisk, true, false);
                if (buf != NULL)
                        pfree(buf);
        }
index 7b8e6206459dc9bbf3a6ca03cdcd2f0df0eb7d06..daad1ff1c3b55722367884b4cabc19ccde7d50ed 100644 (file)
@@ -5273,6 +5273,9 @@ StartupXLOG(void)
                                RunningTransactionsData running;
                                TransactionId latestCompletedXid;
 
+                               /* Update pg_subtrans entries for any prepared transactions */
+                               StandbyRecoverPreparedTransactions();
+
                                /*
                                 * Construct a RunningTransactions snapshot representing a
                                 * shut down server, with only prepared transactions still
@@ -5281,7 +5284,7 @@ StartupXLOG(void)
                                 */
                                running.xcnt = nxids;
                                running.subxcnt = 0;
-                               running.subxid_overflow = false;
+                               running.subxid_status = SUBXIDS_IN_SUBTRANS;
                                running.nextXid = XidFromFullTransactionId(checkPoint.nextXid);
                                running.oldestRunningXid = oldestActiveXID;
                                latestCompletedXid = XidFromFullTransactionId(checkPoint.nextXid);
@@ -5291,8 +5294,6 @@ StartupXLOG(void)
                                running.xids = xids;
 
                                ProcArrayApplyRecoveryInfo(&running);
-
-                               StandbyRecoverPreparedTransactions();
                        }
                }
 
@@ -7647,6 +7648,9 @@ xlog_redo(XLogReaderState *record)
 
                        oldestActiveXID = PrescanPreparedTransactions(&xids, &nxids);
 
+                       /* Update pg_subtrans entries for any prepared transactions */
+                       StandbyRecoverPreparedTransactions();
+
                        /*
                         * Construct a RunningTransactions snapshot representing a shut
                         * down server, with only prepared transactions still alive. We're
@@ -7655,7 +7659,7 @@ xlog_redo(XLogReaderState *record)
                         */
                        running.xcnt = nxids;
                        running.subxcnt = 0;
-                       running.subxid_overflow = false;
+                       running.subxid_status = SUBXIDS_IN_SUBTRANS;
                        running.nextXid = XidFromFullTransactionId(checkPoint.nextXid);
                        running.oldestRunningXid = oldestActiveXID;
                        latestCompletedXid = XidFromFullTransactionId(checkPoint.nextXid);
@@ -7665,8 +7669,6 @@ xlog_redo(XLogReaderState *record)
                        running.xids = xids;
 
                        ProcArrayApplyRecoveryInfo(&running);
-
-                       StandbyRecoverPreparedTransactions();
                }
 
                /* ControlFile->checkPointCopy always tracks the latest ckpt XID */
index 4cfb22222b1126764b4857e39327c3e1bf5645dc..7eb431ed231d6bb669806231ac3bc44f470fdb59 100644 (file)
@@ -1099,7 +1099,7 @@ ProcArrayApplyRecoveryInfo(RunningTransactions running)
                 * If the snapshot isn't overflowed or if its empty we can reset our
                 * pending state and use this snapshot instead.
                 */
-               if (!running->subxid_overflow || running->xcnt == 0)
+               if (running->subxid_status != SUBXIDS_MISSING || running->xcnt == 0)
                {
                        /*
                         * If we have already collected known assigned xids, we need to
@@ -1251,7 +1251,7 @@ ProcArrayApplyRecoveryInfo(RunningTransactions running)
         * missing, so conservatively assume the last one is latestObservedXid.
         * ----------
         */
-       if (running->subxid_overflow)
+       if (running->subxid_status == SUBXIDS_MISSING)
        {
                standbyState = STANDBY_SNAPSHOT_PENDING;
 
@@ -1263,6 +1263,18 @@ ProcArrayApplyRecoveryInfo(RunningTransactions running)
                standbyState = STANDBY_SNAPSHOT_READY;
 
                standbySnapshotPendingXmin = InvalidTransactionId;
+
+               /*
+                * If the 'xids' array didn't include all subtransactions, we have to
+                * mark any snapshots taken as overflowed.
+                */
+               if (running->subxid_status == SUBXIDS_IN_SUBTRANS)
+                       procArray->lastOverflowedXid = latestObservedXid;
+               else
+               {
+                       Assert(running->subxid_status == SUBXIDS_IN_ARRAY);
+                       procArray->lastOverflowedXid = InvalidTransactionId;
+               }
        }
 
        /*
@@ -2897,7 +2909,7 @@ GetRunningTransactionData(void)
 
        CurrentRunningXacts->xcnt = count - subcount;
        CurrentRunningXacts->subxcnt = subcount;
-       CurrentRunningXacts->subxid_overflow = suboverflowed;
+       CurrentRunningXacts->subxid_status = suboverflowed ? SUBXIDS_IN_SUBTRANS : SUBXIDS_IN_ARRAY;
        CurrentRunningXacts->nextXid = XidFromFullTransactionId(ShmemVariableCache->nextXid);
        CurrentRunningXacts->oldestRunningXid = oldestRunningXid;
        CurrentRunningXacts->latestCompletedXid = latestCompletedXid;
index 671b00a33cf1e0783ff2d9904c6abe56aba0d03d..a36eb80e9ae5ea22a255d13f89255f7ae641308d 100644 (file)
@@ -1130,7 +1130,7 @@ standby_redo(XLogReaderState *record)
 
                running.xcnt = xlrec->xcnt;
                running.subxcnt = xlrec->subxcnt;
-               running.subxid_overflow = xlrec->subxid_overflow;
+               running.subxid_status = xlrec->subxid_overflow ? SUBXIDS_MISSING : SUBXIDS_IN_ARRAY;
                running.nextXid = xlrec->nextXid;
                running.latestCompletedXid = xlrec->latestCompletedXid;
                running.oldestRunningXid = xlrec->oldestRunningXid;
@@ -1286,7 +1286,7 @@ LogCurrentRunningXacts(RunningTransactions CurrRunningXacts)
 
        xlrec.xcnt = CurrRunningXacts->xcnt;
        xlrec.subxcnt = CurrRunningXacts->subxcnt;
-       xlrec.subxid_overflow = CurrRunningXacts->subxid_overflow;
+       xlrec.subxid_overflow = (CurrRunningXacts->subxid_status != SUBXIDS_IN_ARRAY);
        xlrec.nextXid = CurrRunningXacts->nextXid;
        xlrec.oldestRunningXid = CurrRunningXacts->oldestRunningXid;
        xlrec.latestCompletedXid = CurrRunningXacts->latestCompletedXid;
@@ -1303,7 +1303,7 @@ LogCurrentRunningXacts(RunningTransactions CurrRunningXacts)
 
        recptr = XLogInsert(RM_STANDBY_ID, XLOG_RUNNING_XACTS);
 
-       if (CurrRunningXacts->subxid_overflow)
+       if (xlrec.subxid_overflow)
                elog(trace_recovery(DEBUG2),
                         "snapshot of %u running transactions overflowed (lsn %X/%X oldest xid %u latest complete %u next xid %u)",
                         CurrRunningXacts->xcnt,
index 6a7763264b0ea4b06a687c055ae0bcea92b4d8f6..1d91e0cd6b6c69da508243f3122a70d4075fdfe0 100644 (file)
@@ -74,11 +74,19 @@ extern void StandbyReleaseOldLocks(TransactionId oldxid);
  * almost immediately see the data we need to begin executing queries.
  */
 
+typedef enum
+{
+       SUBXIDS_IN_ARRAY,                       /* xids array includes all running subxids */
+       SUBXIDS_MISSING,                        /* snapshot overflowed, subxids are missing */
+       SUBXIDS_IN_SUBTRANS,            /* subxids are not included in 'xids', but
+                                                                * pg_subtrans is fully up-to-date */
+} subxids_array_status;
+
 typedef struct RunningTransactionsData
 {
        int                     xcnt;                   /* # of xact ids in xids[] */
        int                     subxcnt;                /* # of subxact ids in xids[] */
-       bool            subxid_overflow;        /* snapshot overflowed, subxids missing */
+       subxids_array_status subxid_status;
        TransactionId nextXid;          /* xid from ShmemVariableCache->nextXid */
        TransactionId oldestRunningXid; /* *not* oldestXmin */
        TransactionId latestCompletedXid;       /* so we can set xmax */
index 3e25b8c4ebdd3d3b533fc8fcf8590f02525645b7..4e956742e3a25842bcaf713c8e12ac3a4be1869d 100644 (file)
@@ -308,6 +308,52 @@ $cur_standby->start;
 
 $cur_primary->psql('postgres', "COMMIT PREPARED 'xact_009_12'");
 
+###############################################################################
+# Check visibility of prepared transactions in standby after a restart while
+# primary is down.
+###############################################################################
+
+$cur_primary->psql(
+       'postgres', "
+       CREATE TABLE t_009_tbl_standby_mvcc (id int, msg text);
+       BEGIN;
+       INSERT INTO t_009_tbl_standby_mvcc VALUES (1, 'issued to ${cur_primary_name}');
+       SAVEPOINT s1;
+       INSERT INTO t_009_tbl_standby_mvcc VALUES (2, 'issued to ${cur_primary_name}');
+       PREPARE TRANSACTION 'xact_009_standby_mvcc';
+       ");
+$cur_primary->stop;
+$cur_standby->restart;
+
+# Acquire a snapshot in standby, before we commit the prepared transaction
+my $standby_session = $cur_standby->background_psql('postgres', on_error_die => 1);
+$standby_session->query_safe("BEGIN ISOLATION LEVEL REPEATABLE READ");
+$psql_out = $standby_session->query_safe(
+       "SELECT count(*) FROM t_009_tbl_standby_mvcc");
+is($psql_out, '0',
+       "Prepared transaction not visible in standby before commit");
+
+# Commit the transaction in primary
+$cur_primary->start;
+$cur_primary->psql('postgres', "
+SET synchronous_commit='remote_apply'; -- To ensure the standby is caught up
+COMMIT PREPARED 'xact_009_standby_mvcc';
+");
+
+# Still not visible to the old snapshot
+$psql_out = $standby_session->query_safe(
+       "SELECT count(*) FROM t_009_tbl_standby_mvcc");
+is($psql_out, '0',
+       "Committed prepared transaction not visible to old snapshot in standby");
+
+# Is visible to a new snapshot
+$standby_session->query_safe("COMMIT");
+$psql_out = $standby_session->query_safe(
+       "SELECT count(*) FROM t_009_tbl_standby_mvcc");
+is($psql_out, '2',
+   "Committed prepared transaction is visible to new snapshot in standby");
+$standby_session->quit;
+
 ###############################################################################
 # Check for a lock conflict between prepared transaction with DDL inside and
 # replay of XLOG_STANDBY_LOCK wal record.
index 78f3ec0cafb07c1ee471273b1b6d9d23f07f2824..31e9533917cb9374df363aad04ef2ccd3d8f77be 100644 (file)
@@ -3698,6 +3698,7 @@ string
 substitute_actual_parameters_context
 substitute_actual_srf_parameters_context
 substitute_phv_relids_context
+subxids_array_status
 symbol
 tablespaceinfo
 teSection