By default, the logical decoding assumes access to shared catalogs, so
the snapshot builder needs to consider cluster-wide XIDs during startup.
That in turn means that, if any transaction is already running (and has
XID assigned), the snapshot builder needs to wait for its completion, as
it does not know if that transaction performed catalog changes earlier.
A possible problem with this concept is that if REPACK (CONCURRENTLY) is
running in some database, backends running the same command in other
databases get stuck until the first one has committed. Thus only a
single backend in the cluster can run REPACK (CONCURRENTLY) at any time.
Likewise, REPACK (CONCURRENTLY) can block walsenders starting on behalf
of subscriptions throughout the cluster.
This patch adds a new option to logical replication output plugin, to
declare that it does not use shared catalogs (i.e. catalogs that can be
changed by transactions running in other databases in the cluster). In
that case, no snapshot the backend will use during the decoding needs to
contain information about transactions running in other databases. Thus
the snapshot builder only needs to wait for completion of transactions
in the current database.
Currently we only use this option in the REPACK background worker. It
could possibly be used in the plugin for logical replication too,
however that would need thorough analysis of that plugin.
Bump WAL version number, due to a new field in xl_running_xacts.
Author: Antonin Houska <ah@cybertec.at>
Reviewed-by: Álvaro Herrera <alvherre@kurilemu.de>
Discussion: https://postgr.es/m/90475.
1775218118@localhost
else if (rel == NULL || rel->rd_rel->relisshared)
{
/* Shared relation: take into account all running xids */
- runningTransactions = GetRunningTransactionData();
+ runningTransactions = GetRunningTransactionData(InvalidOid);
LWLockRelease(ProcArrayLock);
LWLockRelease(XidGenLock);
return runningTransactions->oldestRunningXid;
* Normal relation: take into account xids running within the current
* database
*/
- runningTransactions = GetRunningTransactionData();
+ runningTransactions = GetRunningTransactionData(InvalidOid);
LWLockRelease(ProcArrayLock);
LWLockRelease(XidGenLock);
return runningTransactions->oldestDatabaseRunningXid;
{
OutputPluginOutputType output_type;
bool receive_rewrites;
+ bool need_shared_catalogs;
} OutputPluginOptions;
</programlisting>
<literal>output_type</literal> has to either be set to
also be called for changes made by heap rewrites during certain DDL
operations. These are of interest to plugins that handle DDL
replication, but they require special handling.
+ <literal>need_shared_catalogs</literal> can be set to false if you are
+ certain the plugin functions do not access shared system catalogs.
+ Doing so can speed up creation of replication slots that use this plugin.
</para>
<para>
SysScanDesc sysscan;
Relation irel;
+ /*
+ * If this backend promised that it won't access shared catalogs during
+ * logical decoding, this it the right place to verify.
+ */
+ Assert(!HistoricSnapshotActive() ||
+ accessSharedCatalogsInDecoding ||
+ !heapRelation->rd_rel->relisshared);
+
if (indexOK &&
!IgnoreSystemIndexes &&
!ReindexIsProcessingIndex(indexId))
for (i = 0; i < xlrec->subxcnt; i++)
appendStringInfo(buf, " %u", xlrec->xids[xlrec->xcnt + i]);
}
+
+ appendStringInfo(buf, "; dbid: %u", xlrec->dbid);
}
void
* recovery we don't need to write running xact data.
*/
if (!shutdown && XLogStandbyInfoActive())
- LogStandbySnapshot();
+ LogStandbySnapshot(InvalidOid);
START_CRIT_SECTION();
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
errmsg("pg_log_standby_snapshot() can only be used if \"wal_level\" >= \"replica\"")));
- recptr = LogStandbySnapshot();
+ recptr = LogStandbySnapshot(InvalidOid);
/*
* As a convenience, return the WAL location of the last inserted record
if (now >= timeout &&
last_snapshot_lsn <= GetLastImportantRecPtr())
{
- last_snapshot_lsn = LogStandbySnapshot();
+ last_snapshot_lsn = LogStandbySnapshot(InvalidOid);
last_snapshot_ts = now;
}
}
{
xl_running_xacts *running = (xl_running_xacts *) XLogRecGetData(r);
- SnapBuildProcessRunningXacts(builder, buf->origptr, running);
+ /*
+ * Update this decoder's idea of transactions currently
+ * running. In doing so we will determine whether we have
+ * reached consistent status.
+ *
+ * If the output plugin doesn't need access to shared
+ * catalogs, we can ignore transactions in other databases.
+ */
+ SnapBuildProcessRunningXacts(builder, buf->origptr, running,
+ !ctx->options.need_shared_catalogs);
/*
* Abort all transactions that we keep track of, that are
* all running transactions which includes prepared ones,
* while shutdown checkpoints just know that no non-prepared
* transactions are in progress.
+ *
+ * The database-specific records might work here too, but it's
+ * not their purpose.
*/
- ReorderBufferAbortOld(ctx->reorder, running->oldestRunningXid);
+ if (!OidIsValid(running->dbid))
+ ReorderBufferAbortOld(ctx->reorder, running->oldestRunningXid);
}
break;
case XLOG_STANDBY_LOCK:
ctx->write = do_write;
ctx->update_progress = update_progress;
+ /* Assume shared catalog access. The startup callback can change it. */
+ ctx->options.need_shared_catalogs = true;
+
ctx->output_plugin_options = output_plugin_options;
ctx->fast_forward = fast_forward;
static ResourceOwner SavedResourceOwnerDuringExport = NULL;
static bool ExportInProgress = false;
+/*
+ * If a backend is going to do logical decoding and the output plugin does
+ * not need to access shared catalogs, setting this variable to false can make
+ * the decoding startup faster. In particular, the backend will not need to
+ * wait for completion of already running transactions in other databases.
+ */
+bool accessSharedCatalogsInDecoding = true;
+
/* ->committed and ->catchange manipulation */
static void SnapBuildPurgeOlderTxn(SnapBuild *builder);
uint32 xinfo);
/* xlog reading helper functions for SnapBuildProcessRunningXacts */
-static bool SnapBuildFindSnapshot(SnapBuild *builder, XLogRecPtr lsn, xl_running_xacts *running);
+static bool SnapBuildFindSnapshot(SnapBuild *builder, XLogRecPtr lsn,
+ xl_running_xacts *running);
static void SnapBuildWaitSnapshot(xl_running_xacts *running, TransactionId cutoff);
/* serialization functions */
MemoryContextSwitchTo(oldcontext);
+ /* The default is that shared catalog are used. */
+ accessSharedCatalogsInDecoding = true;
+
return builder;
}
builder->snapshot = NULL;
}
+ /* The default is that shared catalog are used. */
+ accessSharedCatalogsInDecoding = true;
+
/* other resources are deallocated via memory context reset */
MemoryContextDelete(context);
}
* anymore.
*/
void
-SnapBuildProcessRunningXacts(SnapBuild *builder, XLogRecPtr lsn, xl_running_xacts *running)
+SnapBuildProcessRunningXacts(SnapBuild *builder, XLogRecPtr lsn, xl_running_xacts *running,
+ bool db_specific)
{
ReorderBufferTXN *txn;
TransactionId xmin;
*/
if (builder->state < SNAPBUILD_CONSISTENT)
{
+ /*
+ * To reduce the potential for unnecessarily waiting for completion of
+ * unrelated transactions, the caller can declare that only
+ * transactions of the current database are relevant at this stage.
+ */
+ if (db_specific)
+ {
+ /*
+ * If we must only keep track of transactions running in the
+ * current database, we need transaction info from exactly that
+ * database.
+ */
+ if (running->dbid != MyDatabaseId)
+ {
+ LogStandbySnapshot(MyDatabaseId);
+
+ return;
+ }
+
+ /*
+ * We'd better be able to check during scan if the plugin does not
+ * lie.
+ */
+ if (accessSharedCatalogsInDecoding)
+ accessSharedCatalogsInDecoding = false;
+ }
+
/* returns false if there's no point in performing cleanup just yet */
if (!SnapBuildFindSnapshot(builder, lsn, running))
return;
else
SnapBuildSerialize(builder, lsn);
+ /*
+ * Database specific transaction info may exist to reach CONSISTENT state
+ * faster, however the code below makes no use of it. Moreover, such
+ * record might cause problems because the following normal (cluster-wide)
+ * record can have lower value of oldestRunningXid. In that case, let's
+ * wait with the cleanup for the next regular cluster-wide record.
+ */
+ if (OidIsValid(running->dbid))
+ return;
+
/*
* Update range of interesting xids based on the running xacts
* information. We don't increase ->xmax using it, because once we are in
*/
if (!RecoveryInProgress())
{
- LogStandbySnapshot();
+ /*
+ * If the last transaction info was about specific database, so needs
+ * to be the next one - at least until we're in the CONSISTENT state.
+ */
+ LogStandbySnapshot(running->dbid);
}
}
/* Probably unnecessary, as we don't use the SQL interface ... */
opt->output_type = OUTPUT_PLUGIN_BINARY_OUTPUT;
+ /*
+ * REPACK doesn't need access to shared catalogs, so we can speed up the
+ * historic snapshot creation by setting this flag. We'll only have to
+ * wait for transactions in our database.
+ */
+ opt->need_shared_catalogs = false;
+
if (ctx->output_plugin_options != NIL)
{
ereport(ERROR,
XLogRecPtr flushptr;
/* make sure we have enough information to start */
- flushptr = LogStandbySnapshot();
+ flushptr = LogStandbySnapshot(InvalidOid);
/* and make sure it's fsynced to disk */
XLogFlush(flushptr);
*
* Note that if any transaction has overflowed its cached subtransactions
* then there is no real need include any subtransactions.
+ *
+ * If 'dbid' is valid, only gather transactions running in that database.
*/
RunningTransactions
-GetRunningTransactionData(void)
+GetRunningTransactionData(Oid dbid)
{
/* result workspace */
static RunningTransactionsData CurrentRunningXactsData;
if (!TransactionIdIsValid(xid))
continue;
+ /*
+ * Filter by database OID if requested.
+ */
+ if (OidIsValid(dbid))
+ {
+ int pgprocno = arrayP->pgprocnos[index];
+ PGPROC *proc = &allProcs[pgprocno];
+
+ if (proc->databaseId != dbid)
+ continue;
+ }
+
/*
* Be careful not to exclude any xids before calculating the values of
* oldestRunningXid and suboverflowed, since these are used to clean
PGPROC *proc = &allProcs[pgprocno];
int nsubxids;
+ /*
+ * Filter by database OID if requested.
+ */
+ if (OidIsValid(dbid) && proc->databaseId != dbid)
+ continue;
+
/*
* Save subtransaction XIDs. Other backends can't add or remove
* entries while we're holding XidGenLock.
* increases if slots do.
*/
+ CurrentRunningXacts->dbid = dbid;
CurrentRunningXacts->xcnt = count - subcount;
CurrentRunningXacts->subxcnt = subcount;
CurrentRunningXacts->subxid_status = suboverflowed ? SUBXIDS_IN_SUBTRANS : SUBXIDS_IN_ARRAY;
xl_running_xacts *xlrec = (xl_running_xacts *) XLogRecGetData(record);
RunningTransactionsData running;
+ /*
+ * Records issued for specific database are not suitable for physical
+ * replication because that affects the whole cluster. In particular,
+ * the list of XID is probably incomplete here.
+ */
+ if (OidIsValid(xlrec->dbid))
+ return;
+
running.xcnt = xlrec->xcnt;
running.subxcnt = xlrec->subxcnt;
running.subxid_status = xlrec->subxid_overflow ? SUBXIDS_MISSING : SUBXIDS_IN_ARRAY;
* as there's no independent knob to just enable logical decoding. For
* details of how this is used, check snapbuild.c's introductory comment.
*
+ * If 'dbid' is valid, only gather transactions running in that
+ * database. snapbuild.c can use such running xacts information for faster
+ * startup, but it still needs normal (cluster-wide) during the actual
+ * decoding - see standby_decode() and SnapBuildProcessRunningXacts() for
+ * details. Other processes (e.g. checkpointer) issue the cluster-wide records
+ * whether logical decoding is active or not.
+ *
+ * Please be careful about using this argument for other purposes. In
+ * particular, physical replication *must* ignore the database-specific
+ * records, exactly because they do not cover the whole cluster - see
+ * standby_redo().
*
* Returns the RecPtr of the last inserted record.
*/
XLogRecPtr
-LogStandbySnapshot(void)
+LogStandbySnapshot(Oid dbid)
{
XLogRecPtr recptr;
RunningTransactions running;
* Log details of all in-progress transactions. This should be the last
* record we write, because standby will open up when it sees this.
*/
- running = GetRunningTransactionData();
+ running = GetRunningTransactionData(dbid);
/*
* GetRunningTransactionData() acquired ProcArrayLock, we must release it.
xl_running_xacts xlrec;
XLogRecPtr recptr;
+ xlrec.dbid = CurrRunningXacts->dbid;
xlrec.xcnt = CurrRunningXacts->xcnt;
xlrec.subxcnt = CurrRunningXacts->subxcnt;
xlrec.subxid_overflow = (CurrRunningXacts->subxid_status != SUBXIDS_IN_ARRAY);
/*
* Each page of XLOG file has a header like this:
*/
-#define XLOG_PAGE_MAGIC 0xD11E /* can be used as WAL version indicator */
+#define XLOG_PAGE_MAGIC 0xD11F /* can be used as WAL version indicator */
typedef struct XLogPageHeaderData
{
extern void PreventCommandIfParallelMode(const char *cmdname);
extern void PreventCommandDuringRecovery(const char *cmdname);
+/* in replication/snapbuild.c */
+
+/*
+ * Keep track of whether logical decoding in this backend promised not to
+ * access shared catalogs, as a safety check. This is checked by genam.c when
+ * a catalog scan takes place to verify that no shared catalogs are accessed.
+ */
+extern bool accessSharedCatalogsInDecoding;
+
/*****************************************************************************
* pdir.h -- *
* POSTGRES directory path definitions. *
{
OutputPluginOutputType output_type;
bool receive_rewrites;
+ bool need_shared_catalogs;
} OutputPluginOptions;
/*
XLogRecPtr lsn,
xl_heap_new_cid *xlrec);
extern void SnapBuildProcessRunningXacts(SnapBuild *builder, XLogRecPtr lsn,
- xl_running_xacts *running);
+ xl_running_xacts *running,
+ bool db_specific);
extern void SnapBuildSerializationPoint(SnapBuild *builder, XLogRecPtr lsn);
extern bool SnapBuildSnapshotExists(XLogRecPtr lsn);
VirtualTransactionId *sourcevxid);
extern bool ProcArrayInstallRestoredXmin(TransactionId xmin, PGPROC *proc);
-extern RunningTransactions GetRunningTransactionData(void);
+extern RunningTransactions GetRunningTransactionData(Oid dbid);
extern bool TransactionIdIsInProgress(TransactionId xid);
extern TransactionId GetOldestNonRemovableTransactionId(Relation rel);
typedef struct RunningTransactionsData
{
+ Oid dbid; /* only track xacts in this database */
int xcnt; /* # of xact ids in xids[] */
int subxcnt; /* # of subxact ids in xids[] */
subxids_array_status subxid_status;
extern void LogAccessExclusiveLock(Oid dbOid, Oid relOid);
extern void LogAccessExclusiveLockPrepare(void);
-extern XLogRecPtr LogStandbySnapshot(void);
+extern XLogRecPtr LogStandbySnapshot(Oid dbid);
extern void LogStandbyInvalidations(int nmsgs, SharedInvalidationMessage *msgs,
bool relcacheInitFileInval);
*/
typedef struct xl_running_xacts
{
+ Oid dbid; /* only track xacts in this database */
int xcnt; /* # of xact ids in xids[] */
int subxcnt; /* # of subxact ids in xids[] */
bool subxid_overflow; /* snapshot overflowed, subxids missing */