]> git.ipfire.org Git - thirdparty/postgresql.git/commitdiff
Implement lazy XID allocation: transactions that do not modify any database
authorTom Lane <tgl@sss.pgh.pa.us>
Wed, 5 Sep 2007 18:10:48 +0000 (18:10 +0000)
committerTom Lane <tgl@sss.pgh.pa.us>
Wed, 5 Sep 2007 18:10:48 +0000 (18:10 +0000)
rows will normally never obtain an XID at all.  We already did things this way
for subtransactions, but this patch extends the concept to top-level
transactions.  In applications where there are lots of short read-only
transactions, this should improve performance noticeably; not so much from
removal of the actual XID-assignments, as from reduction of overhead that's
driven by the rate of XID consumption.  We add a concept of a "virtual
transaction ID" so that active transactions can be uniquely identified even
if they don't have a regular XID.  This is a much lighter-weight concept:
uniqueness of VXIDs is only guaranteed over the short term, and no on-disk
record is made about them.

Florian Pflug, with some editorialization by Tom.

34 files changed:
doc/src/sgml/catalogs.sgml
doc/src/sgml/config.sgml
src/backend/access/heap/heapam.c
src/backend/access/transam/README
src/backend/access/transam/clog.c
src/backend/access/transam/multixact.c
src/backend/access/transam/twophase.c
src/backend/access/transam/xact.c
src/backend/access/transam/xlog.c
src/backend/catalog/system_views.sql
src/backend/commands/indexcmds.c
src/backend/commands/sequence.c
src/backend/commands/vacuum.c
src/backend/commands/vacuumlazy.c
src/backend/storage/ipc/procarray.c
src/backend/storage/ipc/sinvaladt.c
src/backend/storage/lmgr/lmgr.c
src/backend/storage/lmgr/lock.c
src/backend/storage/lmgr/proc.c
src/backend/storage/smgr/smgr.c
src/backend/utils/adt/lockfuncs.c
src/backend/utils/error/elog.c
src/backend/utils/misc/postgresql.conf.sample
src/include/access/xact.h
src/include/access/xlog.h
src/include/c.h
src/include/catalog/catversion.h
src/include/storage/lmgr.h
src/include/storage/lock.h
src/include/storage/proc.h
src/include/storage/procarray.h
src/include/storage/sinvaladt.h
src/include/storage/smgr.h
src/test/regress/expected/rules.out

index 525d24feace3d03ae25ce08f564f4c1679d0b91f..68ff092148104d9c8b8557a113934c232d9e5fed 100644 (file)
@@ -1,4 +1,4 @@
-<!-- $PostgreSQL: pgsql/doc/src/sgml/catalogs.sgml,v 2.156 2007/09/03 00:39:11 tgl Exp $ -->
+<!-- $PostgreSQL: pgsql/doc/src/sgml/catalogs.sgml,v 2.157 2007/09/05 18:10:47 tgl Exp $ -->
 <!--
  Documentation of the system catalogs, directed toward PostgreSQL developers
  -->
    There are several distinct types of lockable objects:
    whole relations (e.g., tables), individual pages of relations,
    individual tuples of relations,
-   transaction IDs,
+   transaction IDs (both virtual and permanent IDs),
    and general database objects (identified by class OID and object OID,
    in the same way as in <structname>pg_description</structname> or
    <structname>pg_depend</structname>).  Also, the right to extend a
        <literal>page</>,
        <literal>tuple</>,
        <literal>transactionid</>,
+       <literal>virtualxid</>,
        <literal>object</>,
        <literal>userlock</>, or
        <literal>advisory</>
        Tuple number within the page, or NULL if the object is not a tuple
       </entry>
      </row>
+     <row>
+      <entry><structfield>virtualxid</structfield></entry>
+      <entry><type>text</type></entry>
+      <entry></entry>
+      <entry>
+       Virtual ID of a transaction, or NULL if the object is not a
+       virtual transaction ID
+      </entry>
+     </row>
      <row>
       <entry><structfield>transactionid</structfield></entry>
       <entry><type>xid</type></entry>
       </entry>
      </row>
      <row>
-      <entry><structfield>transaction</structfield></entry>
-      <entry><type>xid</type></entry>
+      <entry><structfield>virtualtransaction</structfield></entry>
+      <entry><type>text</type></entry>
       <entry></entry>
       <entry>
-       ID of the transaction that is holding or awaiting this lock
+       Virtual ID of the transaction that is holding or awaiting this lock
       </entry>
      </row>
      <row>
   </para>
 
   <para>
-   Every transaction holds an exclusive lock on its transaction ID for its
-   entire duration. If one transaction finds it necessary to wait specifically
+   Every transaction holds an exclusive lock on its virtual transaction ID for
+   its entire duration.  If a permanent ID is assigned to the transaction
+   (which normally happens only if the transaction changes the state of the
+   database), it also holds an exclusive lock on its permanent transaction ID
+   until it ends.  When one transaction finds it necessary to wait specifically
    for another transaction, it does so by attempting to acquire share lock on
-   the other transaction ID. That will succeed only when the other transaction
+   the other transaction ID (either virtual or permanent ID depending on the
+   situation). That will succeed only when the other transaction
    terminates and releases its locks. 
   </para>
 
    and therefore row-level locks normally do not appear in this view.
    If a transaction is waiting for a
    row-level lock, it will usually appear in the view as waiting for the
-   transaction ID of the current holder of that row lock.
+   permanent transaction ID of the current holder of that row lock.
   </para>
 
   <para>
   </para>
 
   <para>
-   If you have enabled the statistics collector, the
-   <structfield>pid</structfield> column can be joined to the
+   The <structfield>pid</structfield> column can be joined to the
    <structfield>procpid</structfield> column of the
    <structname>pg_stat_activity</structname> view to get more
-   information on the session holding or waiting to hold the lock.
+   information on the session holding or waiting to hold each lock.
    Also, if you are using prepared transactions, the
    <structfield>transaction</> column can be joined to the
    <structfield>transaction</structfield> column of the
index 5ef230a4fe3ac64b8f6da2dfc79cbd3d99485699..d770c4606f2114d7c907fc63681c2099b78fcf12 100644 (file)
@@ -1,4 +1,4 @@
-<!-- $PostgreSQL: pgsql/doc/src/sgml/config.sgml,v 1.141 2007/08/22 04:45:20 tgl Exp $ -->
+<!-- $PostgreSQL: pgsql/doc/src/sgml/config.sgml,v 1.142 2007/09/05 18:10:47 tgl Exp $ -->
 
 <chapter Id="runtime-config">
   <title>Server Configuration</title>
@@ -2939,10 +2939,15 @@ SELECT * FROM parent WHERE key = 2400;
              <entry>Process start time stamp</entry>
              <entry>no</entry>
             </row>
+            <row>
+             <entry><literal>%v</literal></entry>
+             <entry>Virtual transaction ID (backendID/localXID)</entry>
+             <entry>no</entry>
+            </row>
             <row>
              <entry><literal>%x</literal></entry>
-             <entry>Transaction ID</entry>
-             <entry>yes</entry>
+             <entry>Transaction ID (0 if none is assigned)</entry>
+             <entry>no</entry>
             </row>
             <row>
              <entry><literal>%q</literal></entry>
index 3f44bd7d948a31227d2f13e86e4ff9eb003c8186..3f23378b8fa0de65abe97e0e302cd8adc0a2dc3c 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/access/heap/heapam.c,v 1.237 2007/08/14 17:35:18 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/access/heap/heapam.c,v 1.238 2007/09/05 18:10:47 tgl Exp $
  *
  *
  * INTERFACE ROUTINES
@@ -1632,12 +1632,7 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid,
        MarkBufferDirty(buffer);
 
        /* XLOG stuff */
-       if (relation->rd_istemp)
-       {
-               /* No XLOG record, but still need to flag that XID exists on disk */
-               MyXactMadeTempRelUpdate = true;
-       }
-       else if (use_wal)
+       if (use_wal && !relation->rd_istemp)
        {
                xl_heap_insert xlrec;
                xl_heap_header xlhdr;
@@ -1947,11 +1942,6 @@ l1:
                PageSetLSN(dp, recptr);
                PageSetTLI(dp, ThisTimeLineID);
        }
-       else
-       {
-               /* No XLOG record, but still need to flag that XID exists on disk */
-               MyXactMadeTempRelUpdate = true;
-       }
 
        END_CRIT_SECTION();
 
@@ -2403,11 +2393,6 @@ l2:
                PageSetLSN(BufferGetPage(buffer), recptr);
                PageSetTLI(BufferGetPage(buffer), ThisTimeLineID);
        }
-       else
-       {
-               /* No XLOG record, but still need to flag that XID exists on disk */
-               MyXactMadeTempRelUpdate = true;
-       }
 
        END_CRIT_SECTION();
 
@@ -2924,11 +2909,6 @@ l3:
                PageSetLSN(dp, recptr);
                PageSetTLI(dp, ThisTimeLineID);
        }
-       else
-       {
-               /* No XLOG record, but still need to flag that XID exists on disk */
-               MyXactMadeTempRelUpdate = true;
-       }
 
        END_CRIT_SECTION();
 
index 6e7e132acabb1fe5dd1bcb43a09018ccd2d9eeec..87b405917021f070a43b6d67441e15b392995e53 100644 (file)
@@ -1,4 +1,4 @@
-$PostgreSQL: pgsql/src/backend/access/transam/README,v 1.6 2007/08/01 22:45:07 tgl Exp $
+$PostgreSQL: pgsql/src/backend/access/transam/README,v 1.7 2007/09/05 18:10:47 tgl Exp $
 
 The Transaction System
 ----------------------
@@ -187,16 +187,29 @@ Another difference is that BeginInternalSubtransaction is allowed when no
 explicit transaction block has been established, while DefineSavepoint is not.
 
 
-Subtransaction numbering
-------------------------
+Transaction and subtransaction numbering
+----------------------------------------
 
-A top-level transaction is always given a TransactionId (XID) as soon as it is
-created.  This is necessary for a number of reasons, notably XMIN bookkeeping
-for VACUUM.  However, a subtransaction doesn't need its own XID unless it
-(or one of its child subxacts) writes tuples into the database.  Therefore,
-we postpone assigning XIDs to subxacts until and unless they call
-GetCurrentTransactionId.  The subsidiary actions of obtaining a lock on the
-XID and and entering it into pg_subtrans and PG_PROC are done at the same time.
+Transactions and subtransactions are assigned permanent XIDs only when/if
+they first do something that requires one --- typically, insert/update/delete
+a tuple, though there are a few other places that need an XID assigned.
+If a subtransaction requires an XID, we always first assign one to its
+parent.  This maintains the invariant that child transactions have XIDs later
+than their parents, which is assumed in a number of places.
+
+The subsidiary actions of obtaining a lock on the XID and and entering it into
+pg_subtrans and PG_PROC are done at the time it is assigned.
+
+A transaction that has no XID still needs to be identified for various
+purposes, notably holding locks.  For this purpose we assign a "virtual
+transaction ID" or VXID to each top-level transaction.  VXIDs are formed from
+two fields, the backendID and a backend-local counter; this arrangement allows
+assignment of a new VXID at transaction start without any contention for
+shared memory.  To ensure that a VXID isn't re-used too soon after backend
+exit, we store the last local counter value into shared memory at backend
+exit, and initialize it from the previous value for the same backendID slot
+at backend start.  All these counters go back to zero at shared memory
+re-initialization, but that's OK because VXIDs never appear anywhere on-disk.
 
 Internally, a backend needs a way to identify subtransactions whether or not
 they have XIDs; but this need only lasts as long as the parent top transaction
@@ -204,7 +217,8 @@ endures.  Therefore, we have SubTransactionId, which is somewhat like
 CommandId in that it's generated from a counter that we reset at the start of
 each top transaction.  The top-level transaction itself has SubTransactionId 1,
 and subtransactions have IDs 2 and up.  (Zero is reserved for
-InvalidSubTransactionId.)
+InvalidSubTransactionId.)  Note that subtransactions do not have their
+own VXIDs; they use the parent top transaction's VXID.
 
 
 pg_clog and pg_subtrans
index 9665d1295419832151373b2e37404b79d33bed43..419c8656065255f6b253d51a0f324715b30faec8 100644 (file)
@@ -26,7 +26,7 @@
  * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/backend/access/transam/clog.c,v 1.43 2007/08/01 22:45:07 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/access/transam/clog.c,v 1.44 2007/09/05 18:10:47 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -423,10 +423,6 @@ CLOGPagePrecedes(int page1, int page2)
 
 /*
  * Write a ZEROPAGE xlog record
- *
- * Note: xlog record is marked as outside transaction control, since we
- * want it to be redone whether the invoking transaction commits or not.
- * (Besides which, this is normally done just before entering a transaction.)
  */
 static void
 WriteZeroPageXlogRec(int pageno)
@@ -437,7 +433,7 @@ WriteZeroPageXlogRec(int pageno)
        rdata.len = sizeof(int);
        rdata.buffer = InvalidBuffer;
        rdata.next = NULL;
-       (void) XLogInsert(RM_CLOG_ID, CLOG_ZEROPAGE | XLOG_NO_TRAN, &rdata);
+       (void) XLogInsert(RM_CLOG_ID, CLOG_ZEROPAGE, &rdata);
 }
 
 /*
@@ -445,9 +441,6 @@ WriteZeroPageXlogRec(int pageno)
  *
  * We must flush the xlog record to disk before returning --- see notes
  * in TruncateCLOG().
- *
- * Note: xlog record is marked as outside transaction control, since we
- * want it to be redone whether the invoking transaction commits or not.
  */
 static void
 WriteTruncateXlogRec(int pageno)
@@ -459,7 +452,7 @@ WriteTruncateXlogRec(int pageno)
        rdata.len = sizeof(int);
        rdata.buffer = InvalidBuffer;
        rdata.next = NULL;
-       recptr = XLogInsert(RM_CLOG_ID, CLOG_TRUNCATE | XLOG_NO_TRAN, &rdata);
+       recptr = XLogInsert(RM_CLOG_ID, CLOG_TRUNCATE, &rdata);
        XLogFlush(recptr);
 }
 
index 3ce6f14bcf6546a91f3391bc36da7f3cfaed89b6..b34fa9be78502bb304b7a7460e922f6ef5595b6d 100644 (file)
@@ -42,7 +42,7 @@
  * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/backend/access/transam/multixact.c,v 1.24 2007/08/01 22:45:07 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/access/transam/multixact.c,v 1.25 2007/09/05 18:10:47 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -1842,9 +1842,6 @@ MultiXactOffsetPrecedes(MultiXactOffset offset1, MultiXactOffset offset2)
 /*
  * Write an xlog record reflecting the zeroing of either a MEMBERs or
  * OFFSETs page (info shows which)
- *
- * Note: xlog record is marked as outside transaction control, since we
- * want it to be redone whether the invoking transaction commits or not.
  */
 static void
 WriteMZeroPageXlogRec(int pageno, uint8 info)
@@ -1855,7 +1852,7 @@ WriteMZeroPageXlogRec(int pageno, uint8 info)
        rdata.len = sizeof(int);
        rdata.buffer = InvalidBuffer;
        rdata.next = NULL;
-       (void) XLogInsert(RM_MULTIXACT_ID, info | XLOG_NO_TRAN, &rdata);
+       (void) XLogInsert(RM_MULTIXACT_ID, info, &rdata);
 }
 
 /*
index 2ae81e823d5f788c090afd9f90dc8ca395af21f4..3e7e8435029895a7a73679580543cd4abe362630 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *             $PostgreSQL: pgsql/src/backend/access/transam/twophase.c,v 1.32 2007/08/01 22:45:07 tgl Exp $
+ *             $PostgreSQL: pgsql/src/backend/access/transam/twophase.c,v 1.33 2007/09/05 18:10:47 tgl Exp $
  *
  * NOTES
  *             Each global transaction is associated with a global transaction
@@ -274,9 +274,11 @@ MarkAsPreparing(TransactionId xid, const char *gid,
        MemSet(&gxact->proc, 0, sizeof(PGPROC));
        SHMQueueElemInit(&(gxact->proc.links));
        gxact->proc.waitStatus = STATUS_OK;
+       gxact->proc.lxid = InvalidLocalTransactionId;
        gxact->proc.xid = xid;
        gxact->proc.xmin = InvalidTransactionId;
        gxact->proc.pid = 0;
+       gxact->proc.backendId = InvalidBackendId;
        gxact->proc.databaseId = databaseid;
        gxact->proc.roleId = owner;
        gxact->proc.inCommit = false;
@@ -813,8 +815,8 @@ StartPrepare(GlobalTransaction gxact)
        hdr.prepared_at = gxact->prepared_at;
        hdr.owner = gxact->owner;
        hdr.nsubxacts = xactGetCommittedChildren(&children);
-       hdr.ncommitrels = smgrGetPendingDeletes(true, &commitrels);
-       hdr.nabortrels = smgrGetPendingDeletes(false, &abortrels);
+       hdr.ncommitrels = smgrGetPendingDeletes(true, &commitrels, NULL);
+       hdr.nabortrels = smgrGetPendingDeletes(false, &abortrels, NULL);
        StrNCpy(hdr.gid, gxact->gid, GIDSIZE);
 
        save_state_data(&hdr, sizeof(TwoPhaseFileHeader));
@@ -1702,9 +1704,7 @@ RecordTransactionCommitPrepared(TransactionId xid,
        }
        rdata[lastrdata].next = NULL;
 
-       recptr = XLogInsert(RM_XACT_ID,
-                                               XLOG_XACT_COMMIT_PREPARED | XLOG_NO_TRAN,
-                                               rdata);
+       recptr = XLogInsert(RM_XACT_ID, XLOG_XACT_COMMIT_PREPARED, rdata);
 
        /*
         * We don't currently try to sleep before flush here ... nor is there
@@ -1784,9 +1784,7 @@ RecordTransactionAbortPrepared(TransactionId xid,
        }
        rdata[lastrdata].next = NULL;
 
-       recptr = XLogInsert(RM_XACT_ID,
-                                               XLOG_XACT_ABORT_PREPARED | XLOG_NO_TRAN,
-                                               rdata);
+       recptr = XLogInsert(RM_XACT_ID, XLOG_XACT_ABORT_PREPARED, rdata);
 
        /* Always flush, since we're about to remove the 2PC state file */
        XLogFlush(recptr);
index 18787d17770e522ea23bd72ecd81dc878a500164..2e972d56f60fb0605618e3c7304d6ecc78049ff4 100644 (file)
@@ -10,7 +10,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/access/transam/xact.c,v 1.247 2007/09/03 00:39:13 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/access/transam/xact.c,v 1.248 2007/09/05 18:10:47 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -37,6 +37,7 @@
 #include "storage/fd.h"
 #include "storage/lmgr.h"
 #include "storage/procarray.h"
+#include "storage/sinvaladt.h"
 #include "storage/smgr.h"
 #include "utils/combocid.h"
 #include "utils/flatfiles.h"
@@ -216,7 +217,7 @@ static SubXactCallbackItem *SubXact_callbacks = NULL;
 
 
 /* local function prototypes */
-static void AssignSubTransactionId(TransactionState s);
+static void AssignTransactionId(TransactionState s);
 static void AbortTransaction(void);
 static void AtAbort_Memory(void);
 static void AtCleanup_Memory(void);
@@ -232,7 +233,7 @@ static void CallSubXactCallbacks(SubXactEvent event,
                                         SubTransactionId parentSubid);
 static void CleanupTransaction(void);
 static void CommitTransaction(void);
-static void RecordTransactionAbort(void);
+static void RecordTransactionAbort(bool isSubXact);
 static void StartTransaction(void);
 
 static void RecordSubTransactionCommit(void);
@@ -304,25 +305,36 @@ IsAbortedTransactionBlockState(void)
 /*
  *     GetTopTransactionId
  *
- * Get the ID of the main transaction, even if we are currently inside
- * a subtransaction.  If we are not in a transaction at all, or if we
- * are in transaction startup and haven't yet assigned an XID,
- * InvalidTransactionId is returned.
+ * This will return the XID of the main transaction, assigning one if
+ * it's not yet set.  Be careful to call this only inside a valid xact.
  */
 TransactionId
 GetTopTransactionId(void)
 {
+       if (!TransactionIdIsValid(TopTransactionStateData.transactionId))
+               AssignTransactionId(&TopTransactionStateData);
        return TopTransactionStateData.transactionId;
 }
 
+/*
+ *     GetTopTransactionIdIfAny
+ *
+ * This will return the XID of the main transaction, if one is assigned.
+ * It will return InvalidTransactionId if we are not currently inside a
+ * transaction, or inside a transaction that hasn't yet been assigned an XID.
+ */
+TransactionId
+GetTopTransactionIdIfAny(void)
+{
+       return TopTransactionStateData.transactionId;
+}
 
 /*
  *     GetCurrentTransactionId
  *
- * We do not assign XIDs to subtransactions until/unless this is called.
- * When we do assign an XID to a subtransaction, recursively make sure
- * its parent has one as well (this maintains the invariant that a child
- * transaction has an XID following its parent's).
+ * This will return the XID of the current transaction (main or sub
+ * transaction), assigning one if it's not yet set.  Be careful to call this
+ * only inside a valid xact.
  */
 TransactionId
 GetCurrentTransactionId(void)
@@ -330,20 +342,49 @@ GetCurrentTransactionId(void)
        TransactionState s = CurrentTransactionState;
 
        if (!TransactionIdIsValid(s->transactionId))
-               AssignSubTransactionId(s);
-
+               AssignTransactionId(s);
        return s->transactionId;
 }
 
+/*
+ *     GetCurrentTransactionIdIfAny
+ *
+ * This will return the XID of the current sub xact, if one is assigned.
+ * It will return InvalidTransactionId if we are not currently inside a
+ * transaction, or inside a transaction that hasn't been assigned an XID yet.
+ */
+TransactionId
+GetCurrentTransactionIdIfAny(void)
+{
+       return CurrentTransactionState->transactionId;
+}
+
+
+/*
+ * AssignTransactionId
+ *
+ * Assigns a new permanent XID to the given TransactionState.
+ * We do not assign XIDs to transactions until/unless this is called.
+ * Also, any parent TransactionStates that don't yet have XIDs are assigned
+ * one; this maintains the invariant that a child transaction has an XID
+ * following its parent's.
+ */
 static void
-AssignSubTransactionId(TransactionState s)
+AssignTransactionId(TransactionState s)
 {
+       bool isSubXact = (s->parent != NULL);
        ResourceOwner currentOwner;
 
-       Assert(s->parent != NULL);
+       /* Assert that caller didn't screw up */
+       Assert(!TransactionIdIsValid(s->transactionId));
        Assert(s->state == TRANS_INPROGRESS);
-       if (!TransactionIdIsValid(s->parent->transactionId))
-               AssignSubTransactionId(s->parent);
+
+       /*
+        * Ensure parent(s) have XIDs, so that a child always has an XID later
+        * than its parent.
+        */
+       if (isSubXact && !TransactionIdIsValid(s->parent->transactionId))
+               AssignTransactionId(s->parent);
 
        /*
         * Generate a new Xid and record it in PG_PROC and pg_subtrans.
@@ -353,20 +394,20 @@ AssignSubTransactionId(TransactionState s)
         * PG_PROC, the subtrans entry is needed to ensure that other backends see
         * the Xid as "running".  See GetNewTransactionId.
         */
-       s->transactionId = GetNewTransactionId(true);
+       s->transactionId = GetNewTransactionId(isSubXact);
 
-       SubTransSetParent(s->transactionId, s->parent->transactionId);
+       if (isSubXact)
+               SubTransSetParent(s->transactionId, s->parent->transactionId);
 
        /*
-        * Acquire lock on the transaction XID.  (We assume this cannot block.) We
-        * have to be sure that the lock is assigned to the transaction's
-        * ResourceOwner.
+        * Acquire lock on the transaction XID.  (We assume this cannot block.)
+        * We have to ensure that the lock is assigned to the transaction's
+        * own ResourceOwner.
         */
        currentOwner = CurrentResourceOwner;
        PG_TRY();
        {
                CurrentResourceOwner = s->curTransactionOwner;
-
                XactLockTableInsert(s->transactionId);
        }
        PG_CATCH();
@@ -380,22 +421,6 @@ AssignSubTransactionId(TransactionState s)
 }
 
 
-/*
- *     GetCurrentTransactionIdIfAny
- *
- * Unlike GetCurrentTransactionId, this will return InvalidTransactionId
- * if we are currently not in a transaction, or in a transaction or
- * subtransaction that has not yet assigned itself an XID.
- */
-TransactionId
-GetCurrentTransactionIdIfAny(void)
-{
-       TransactionState s = CurrentTransactionState;
-
-       return s->transactionId;
-}
-
-
 /*
  *     GetCurrentSubTransactionId
  */
@@ -726,192 +751,188 @@ AtSubStart_ResourceOwner(void)
 void
 RecordTransactionCommit(void)
 {
+       TransactionId xid = GetTopTransactionIdIfAny();
+       bool            markXidCommitted = TransactionIdIsValid(xid);
        int                     nrels;
        RelFileNode *rels;
+       bool            haveNonTemp;
        int                     nchildren;
        TransactionId *children;
 
        /* Get data needed for commit record */
-       nrels = smgrGetPendingDeletes(true, &rels);
+       nrels = smgrGetPendingDeletes(true, &rels, &haveNonTemp);
        nchildren = xactGetCommittedChildren(&children);
 
        /*
-        * If we made neither any XLOG entries nor any temp-rel updates, and have
-        * no files to be deleted, we can omit recording the transaction commit at
-        * all.  (This test includes the effects of subtransactions, so the
-        * presence of committed subxacts need not alone force a write.)
+        * If we haven't been assigned an XID yet, we neither can, nor do we
+        * want to write a COMMIT record.
         */
-       if (MyXactMadeXLogEntry || MyXactMadeTempRelUpdate || nrels > 0)
+       if (!markXidCommitted)
        {
-               TransactionId xid = GetCurrentTransactionId();
-               bool            madeTCentries;
-               bool            isAsyncCommit = false;
-               XLogRecPtr      recptr;
+               /*
+                * We expect that every smgrscheduleunlink is followed by a catalog
+                * update, and hence XID assignment, so we shouldn't get here with
+                * any pending deletes.  Use a real test not just an Assert to check
+                * this, since it's a bit fragile.
+                */
+               if (nrels != 0)
+                       elog(ERROR, "cannot commit a transaction that deleted files but has no xid");
+
+               /* Can't have child XIDs either; AssignTransactionId enforces this */
+               Assert(nchildren == 0);
+               
+               /*
+                * If we didn't create XLOG entries, we're done here; otherwise we
+                * should flush those entries the same as a commit record.  (An
+                * example of a possible record that wouldn't cause an XID to be
+                * assigned is a sequence advance record due to nextval() --- we
+                * want to flush that to disk before reporting commit.)
+                */
+               if (XactLastRecEnd.xrecoff == 0)
+                       goto cleanup;
+       }
+       else
+       {
+               /*
+                * Begin commit critical section and insert the commit XLOG record.
+                */
+               XLogRecData     rdata[3];
+               int                             lastrdata = 0;
+               xl_xact_commit  xlrec;
 
                /* Tell bufmgr and smgr to prepare for commit */
                BufmgrCommit();
 
-               START_CRIT_SECTION();
-
                /*
-                * We only need to log the commit in XLOG if the transaction made any
-                * transaction-controlled XLOG entries or will delete files.
+                * Mark ourselves as within our "commit critical section".  This
+                * forces any concurrent checkpoint to wait until we've updated
+                * pg_clog.  Without this, it is possible for the checkpoint to
+                * set REDO after the XLOG record but fail to flush the pg_clog
+                * update to disk, leading to loss of the transaction commit if
+                * the system crashes a little later.
+                *
+                * Note: we could, but don't bother to, set this flag in
+                * RecordTransactionAbort.  That's because loss of a transaction
+                * abort is noncritical; the presumption would be that it aborted,
+                * anyway.
+                *
+                * It's safe to change the inCommit flag of our own backend
+                * without holding the ProcArrayLock, since we're the only one
+                * modifying it.  This makes checkpoint's determination of which
+                * xacts are inCommit a bit fuzzy, but it doesn't matter.
                 */
-               madeTCentries = (MyLastRecPtr.xrecoff != 0);
-               if (madeTCentries || nrels > 0)
+               START_CRIT_SECTION();
+               MyProc->inCommit = true;
+
+               SetCurrentTransactionStopTimestamp();
+               xlrec.xact_time = xactStopTimestamp;
+               xlrec.nrels = nrels;
+               xlrec.nsubxacts = nchildren;
+               rdata[0].data = (char *) (&xlrec);
+               rdata[0].len = MinSizeOfXactCommit;
+               rdata[0].buffer = InvalidBuffer;
+               /* dump rels to delete */
+               if (nrels > 0)
                {
-                       XLogRecData rdata[3];
-                       int                     lastrdata = 0;
-                       xl_xact_commit xlrec;
-
-                       /*
-                        * Mark ourselves as within our "commit critical section".  This
-                        * forces any concurrent checkpoint to wait until we've updated
-                        * pg_clog.  Without this, it is possible for the checkpoint to
-                        * set REDO after the XLOG record but fail to flush the pg_clog
-                        * update to disk, leading to loss of the transaction commit if
-                        * the system crashes a little later.
-                        *
-                        * Note: we could, but don't bother to, set this flag in
-                        * RecordTransactionAbort.  That's because loss of a transaction
-                        * abort is noncritical; the presumption would be that it aborted,
-                        * anyway.
-                        *
-                        * It's safe to change the inCommit flag of our own backend
-                        * without holding the ProcArrayLock, since we're the only one
-                        * modifying it.  This makes checkpoint's determination of which
-                        * xacts are inCommit a bit fuzzy, but it doesn't matter.
-                        */
-                       MyProc->inCommit = true;
-
-                       SetCurrentTransactionStopTimestamp();
-                       xlrec.xact_time = xactStopTimestamp;
-                       xlrec.nrels = nrels;
-                       xlrec.nsubxacts = nchildren;
-                       rdata[0].data = (char *) (&xlrec);
-                       rdata[0].len = MinSizeOfXactCommit;
-                       rdata[0].buffer = InvalidBuffer;
-                       /* dump rels to delete */
-                       if (nrels > 0)
-                       {
-                               rdata[0].next = &(rdata[1]);
-                               rdata[1].data = (char *) rels;
-                               rdata[1].len = nrels * sizeof(RelFileNode);
-                               rdata[1].buffer = InvalidBuffer;
-                               lastrdata = 1;
-                       }
-                       /* dump committed child Xids */
-                       if (nchildren > 0)
-                       {
-                               rdata[lastrdata].next = &(rdata[2]);
-                               rdata[2].data = (char *) children;
-                               rdata[2].len = nchildren * sizeof(TransactionId);
-                               rdata[2].buffer = InvalidBuffer;
-                               lastrdata = 2;
-                       }
-                       rdata[lastrdata].next = NULL;
-
-                       recptr = XLogInsert(RM_XACT_ID, XLOG_XACT_COMMIT, rdata);
+                       rdata[0].next = &(rdata[1]);
+                       rdata[1].data = (char *) rels;
+                       rdata[1].len = nrels * sizeof(RelFileNode);
+                       rdata[1].buffer = InvalidBuffer;
+                       lastrdata = 1;
                }
-               else
+               /* dump committed child Xids */
+               if (nchildren > 0)
                {
-                       /* Just flush through last record written by me */
-                       recptr = ProcLastRecEnd;
+                       rdata[lastrdata].next = &(rdata[2]);
+                       rdata[2].data = (char *) children;
+                       rdata[2].len = nchildren * sizeof(TransactionId);
+                       rdata[2].buffer = InvalidBuffer;
+                       lastrdata = 2;
                }
+               rdata[lastrdata].next = NULL;
+
+               (void) XLogInsert(RM_XACT_ID, XLOG_XACT_COMMIT, rdata);
+       }
 
+       /*
+        * Check if we want to commit asynchronously.  If the user has set
+        * synchronous_commit = off, and we're not doing cleanup of any non-temp
+        * rels nor committing any command that wanted to force sync commit, then
+        * we can defer flushing XLOG.  (We must not allow asynchronous commit if
+        * there are any non-temp tables to be deleted, because we might delete
+        * the files before the COMMIT record is flushed to disk.  We do allow
+        * asynchronous commit if all to-be-deleted tables are temporary though,
+        * since they are lost anyway if we crash.)
+        */
+       if (XactSyncCommit || forceSyncCommit || haveNonTemp)
+       {
                /*
-                * We must flush our XLOG entries to disk if we made any XLOG entries,
-                * whether in or out of transaction control.  For example, if we
-                * reported a nextval() result to the client, this ensures that any
-                * XLOG record generated by nextval will hit the disk before we report
-                * the transaction committed.
+                * Synchronous commit case.
                 *
-                * Note: if we generated a commit record above, MyXactMadeXLogEntry
-                * will certainly be set now.
+                * Sleep before flush! So we can flush more than one commit
+                * records per single fsync.  (The idea is some other backend
+                * may do the XLogFlush while we're sleeping.  This needs work
+                * still, because on most Unixen, the minimum select() delay
+                * is 10msec or more, which is way too long.)
+                *
+                * We do not sleep if enableFsync is not turned on, nor if
+                * there are fewer than CommitSiblings other backends with
+                * active transactions.
                 */
-               if (MyXactMadeXLogEntry)
-               {
-                       /*
-                        * If the user has set synchronous_commit = off, and we're
-                        * not doing cleanup of any rels nor committing any command
-                        * that wanted to force sync commit, then we can defer fsync.
-                        */
-                       if (XactSyncCommit || forceSyncCommit || nrels > 0)
-                       {
-                               /*
-                                * Synchronous commit case.
-                                *
-                                * Sleep before flush! So we can flush more than one commit
-                                * records per single fsync.  (The idea is some other backend
-                                * may do the XLogFlush while we're sleeping.  This needs work
-                                * still, because on most Unixen, the minimum select() delay
-                                * is 10msec or more, which is way too long.)
-                                *
-                                * We do not sleep if enableFsync is not turned on, nor if
-                                * there are fewer than CommitSiblings other backends with
-                                * active transactions.
-                                */
-                               if (CommitDelay > 0 && enableFsync &&
-                                       CountActiveBackends() >= CommitSiblings)
-                                       pg_usleep(CommitDelay);
+               if (CommitDelay > 0 && enableFsync &&
+                       CountActiveBackends() >= CommitSiblings)
+                       pg_usleep(CommitDelay);
 
-                               XLogFlush(recptr);
-                       }
-                       else
-                       {
-                               /*
-                                * Asynchronous commit case.
-                                */
-                               isAsyncCommit = true;
+               XLogFlush(XactLastRecEnd);
 
-                               /*
-                                * Report the latest async commit LSN, so that
-                                * the WAL writer knows to flush this commit.
-                                */
-                               XLogSetAsyncCommitLSN(recptr);
-                       }
+               /*
+                * Now we may update the CLOG, if we wrote a COMMIT record above
+                */
+               if (markXidCommitted)
+               {
+                       TransactionIdCommit(xid);
+                       /* to avoid race conditions, the parent must commit first */
+                       TransactionIdCommitTree(nchildren, children);
                }
-
+       }
+       else
+       {
                /*
-                * We must mark the transaction committed in clog if its XID appears
-                * either in permanent rels or in local temporary rels. We test this
-                * by seeing if we made transaction-controlled entries *OR* local-rel
-                * tuple updates.  Note that if we made only the latter, we have not
-                * emitted an XLOG record for our commit, and so in the event of a
-                * crash the clog update might be lost.  This is okay because no one
-                * else will ever care whether we committed.
+                * Asynchronous commit case.
                 *
-                * The recptr here refers to the last xlog entry by this transaction
-                * so is the correct value to use for setting the clog.
+                * Report the latest async commit LSN, so that
+                * the WAL writer knows to flush this commit.
                 */
-               if (madeTCentries || MyXactMadeTempRelUpdate)
+               XLogSetAsyncCommitLSN(XactLastRecEnd);
+
+               /*
+                * We must not immediately update the CLOG, since we didn't
+                * flush the XLOG. Instead, we store the LSN up to which
+                * the XLOG must be flushed before the CLOG may be updated.
+                */
+               if (markXidCommitted)
                {
-                       if (isAsyncCommit)
-                       {
-                               TransactionIdAsyncCommit(xid, recptr);
-                               /* to avoid race conditions, the parent must commit first */
-                               TransactionIdAsyncCommitTree(nchildren, children, recptr);
-                       }
-                       else
-                       {
-                               TransactionIdCommit(xid);
-                               /* to avoid race conditions, the parent must commit first */
-                               TransactionIdCommitTree(nchildren, children);
-                       }
+                       TransactionIdAsyncCommit(xid, XactLastRecEnd);
+                       /* to avoid race conditions, the parent must commit first */
+                       TransactionIdAsyncCommitTree(nchildren, children, XactLastRecEnd);
                }
+       }
 
-               /* Checkpoint can proceed now */
+       /*
+        * If we entered a commit critical section, leave it now, and
+        * let checkpoints proceed.
+        */
+       if (markXidCommitted)
+       {
                MyProc->inCommit = false;
-
                END_CRIT_SECTION();
        }
 
-       /* Break the chain of back-links in the XLOG records I output */
-       MyLastRecPtr.xrecoff = 0;
-       MyXactMadeXLogEntry = false;
-       MyXactMadeTempRelUpdate = false;
+       /* Reset XactLastRecEnd until the next transaction writes something */
+       XactLastRecEnd.xrecoff = 0;
 
-       /* And clean up local data */
+cleanup:
+       /* Clean up local data */
        if (rels)
                pfree(rels);
        if (children)
@@ -1030,23 +1051,20 @@ AtSubCommit_childXids(void)
 static void
 RecordSubTransactionCommit(void)
 {
+       TransactionId xid = GetCurrentTransactionIdIfAny();
+
        /*
         * We do not log the subcommit in XLOG; it doesn't matter until the
         * top-level transaction commits.
         *
-        * We must mark the subtransaction subcommitted in clog if its XID appears
-        * either in permanent rels or in local temporary rels. We test this by
-        * seeing if we made transaction-controlled entries *OR* local-rel tuple
-        * updates.  (The test here actually covers the entire transaction tree so
-        * far, so it may mark subtransactions that don't really need it, but it's
-        * probably not worth being tenser. Note that if a prior subtransaction
-        * dirtied these variables, then RecordTransactionCommit will have to do
-        * the full pushup anyway...)
+        * We must mark the subtransaction subcommitted in the CLOG if
+        * it had a valid XID assigned.  If it did not, nobody else will
+        * ever know about the existence of this subxact.  We don't
+        * have to deal with deletions scheduled for on-commit here, since
+        * they'll be reassigned to our parent (who might still abort).
         */
-       if (MyLastRecPtr.xrecoff != 0 || MyXactMadeTempRelUpdate)
+       if (TransactionIdIsValid(xid))
        {
-               TransactionId xid = GetCurrentTransactionId();
-
                /* XXX does this really need to be a critical section? */
                START_CRIT_SECTION();
 
@@ -1066,108 +1084,118 @@ RecordSubTransactionCommit(void)
  *     RecordTransactionAbort
  */
 static void
-RecordTransactionAbort(void)
+RecordTransactionAbort(bool isSubXact)
 {
+       TransactionId xid = GetCurrentTransactionIdIfAny();
        int                     nrels;
        RelFileNode *rels;
        int                     nchildren;
        TransactionId *children;
-
-       /* Get data needed for abort record */
-       nrels = smgrGetPendingDeletes(false, &rels);
-       nchildren = xactGetCommittedChildren(&children);
+       XLogRecData     rdata[3];
+       int                             lastrdata = 0;
+       xl_xact_abort   xlrec;
 
        /*
-        * If we made neither any transaction-controlled XLOG entries nor any
-        * temp-rel updates, and are not going to delete any files, we can omit
-        * recording the transaction abort at all.      No one will ever care that it
-        * aborted.  (These tests cover our whole transaction tree.)
+        * If we haven't been assigned an XID, nobody will care whether we
+        * aborted or not.  Hence, we're done in that case.  It does not matter
+        * if we have rels to delete (note that this routine is not responsible
+        * for actually deleting 'em).  We cannot have any child XIDs, either.
         */
-       if (MyLastRecPtr.xrecoff != 0 || MyXactMadeTempRelUpdate || nrels > 0)
+       if (!TransactionIdIsValid(xid))
        {
-               TransactionId xid = GetCurrentTransactionId();
+               /* Reset XactLastRecEnd until the next transaction writes something */
+               if (!isSubXact)
+                       XactLastRecEnd.xrecoff = 0;
+               return;
+       }
 
-               /*
-                * Catch the scenario where we aborted partway through
-                * RecordTransactionCommit ...
-                */
-               if (TransactionIdDidCommit(xid))
-                       elog(PANIC, "cannot abort transaction %u, it was already committed", xid);
+       /*
+        * We have a valid XID, so we should write an ABORT record for it.
+        *
+        * We do not flush XLOG to disk here, since the default assumption after a
+        * crash would be that we aborted, anyway.  For the same reason, we don't
+        * need to worry about interlocking against checkpoint start.
+        */
 
-               START_CRIT_SECTION();
+       /*
+        * Check that we haven't aborted halfway through RecordTransactionCommit.
+        */
+       if (TransactionIdDidCommit(xid))
+               elog(PANIC, "cannot abort transaction %u, it was already committed",
+                        xid);
 
-               /*
-                * We only need to log the abort in XLOG if the transaction made any
-                * transaction-controlled XLOG entries or will delete files. (If it
-                * made no transaction-controlled XLOG entries, its XID appears
-                * nowhere in permanent storage, so no one else will ever care if it
-                * committed.)
-                *
-                * We do not flush XLOG to disk unless deleting files, since the
-                * default assumption after a crash would be that we aborted, anyway.
-                * For the same reason, we don't need to worry about interlocking
-                * against checkpoint start.
-                */
-               if (MyLastRecPtr.xrecoff != 0 || nrels > 0)
-               {
-                       XLogRecData rdata[3];
-                       int                     lastrdata = 0;
-                       xl_xact_abort xlrec;
-                       XLogRecPtr      recptr;
-
-                       SetCurrentTransactionStopTimestamp();
-                       xlrec.xact_time = xactStopTimestamp;
-                       xlrec.nrels = nrels;
-                       xlrec.nsubxacts = nchildren;
-                       rdata[0].data = (char *) (&xlrec);
-                       rdata[0].len = MinSizeOfXactAbort;
-                       rdata[0].buffer = InvalidBuffer;
-                       /* dump rels to delete */
-                       if (nrels > 0)
-                       {
-                               rdata[0].next = &(rdata[1]);
-                               rdata[1].data = (char *) rels;
-                               rdata[1].len = nrels * sizeof(RelFileNode);
-                               rdata[1].buffer = InvalidBuffer;
-                               lastrdata = 1;
-                       }
-                       /* dump committed child Xids */
-                       if (nchildren > 0)
-                       {
-                               rdata[lastrdata].next = &(rdata[2]);
-                               rdata[2].data = (char *) children;
-                               rdata[2].len = nchildren * sizeof(TransactionId);
-                               rdata[2].buffer = InvalidBuffer;
-                               lastrdata = 2;
-                       }
-                       rdata[lastrdata].next = NULL;
+       /* Fetch the data we need for the abort record */
+       nrels = smgrGetPendingDeletes(false, &rels, NULL);
+       nchildren = xactGetCommittedChildren(&children);
 
-                       recptr = XLogInsert(RM_XACT_ID, XLOG_XACT_ABORT, rdata);
+       /* XXX do we really need a critical section here? */
+       START_CRIT_SECTION();
 
-                       /* Must flush if we are deleting files... */
-                       if (nrels > 0)
-                               XLogFlush(recptr);
-               }
+       /* Write the ABORT record */
+       if (isSubXact)
+               xlrec.xact_time = GetCurrentTimestamp();
+       else
+       {
+               SetCurrentTransactionStopTimestamp();
+               xlrec.xact_time = xactStopTimestamp;
+       }
+       xlrec.nrels = nrels;
+       xlrec.nsubxacts = nchildren;
+       rdata[0].data = (char *) (&xlrec);
+       rdata[0].len = MinSizeOfXactAbort;
+       rdata[0].buffer = InvalidBuffer;
+       /* dump rels to delete */
+       if (nrels > 0)
+       {
+               rdata[0].next = &(rdata[1]);
+               rdata[1].data = (char *) rels;
+               rdata[1].len = nrels * sizeof(RelFileNode);
+               rdata[1].buffer = InvalidBuffer;
+               lastrdata = 1;
+       }
+       /* dump committed child Xids */
+       if (nchildren > 0)
+       {
+               rdata[lastrdata].next = &(rdata[2]);
+               rdata[2].data = (char *) children;
+               rdata[2].len = nchildren * sizeof(TransactionId);
+               rdata[2].buffer = InvalidBuffer;
+               lastrdata = 2;
+       }
+       rdata[lastrdata].next = NULL;
 
-               /*
-                * Mark the transaction aborted in clog.  This is not absolutely
-                * necessary but we may as well do it while we are here.
-                *
-                * The ordering here isn't critical but it seems best to mark the
-                * parent first.  This assures an atomic transition of all the
-                * subtransactions to aborted state from the point of view of
-                * concurrent TransactionIdDidAbort calls.
-                */
-               TransactionIdAbort(xid);
-               TransactionIdAbortTree(nchildren, children);
+       (void) XLogInsert(RM_XACT_ID, XLOG_XACT_ABORT, rdata);
 
-               END_CRIT_SECTION();
-       }
+       /*
+        * Mark the transaction aborted in clog.  This is not absolutely necessary
+        * but we may as well do it while we are here; also, in the subxact case
+        * it is helpful because XactLockTableWait makes use of it to avoid
+        * waiting for already-aborted subtransactions.  It is OK to do it without
+        * having flushed the ABORT record to disk, because in event of a crash
+        * we'd be assumed to have aborted anyway.
+        *
+        * The ordering here isn't critical but it seems best to mark the
+        * parent first.  This assures an atomic transition of all the
+        * subtransactions to aborted state from the point of view of
+        * concurrent TransactionIdDidAbort calls.
+        */
+       TransactionIdAbort(xid);
+       TransactionIdAbortTree(nchildren, children);
 
-       /* Break the chain of back-links in the XLOG records I output */
-       MyLastRecPtr.xrecoff = 0;
-       MyXactMadeXLogEntry = false;
-       MyXactMadeTempRelUpdate = false;
+       END_CRIT_SECTION();
+
+       /*
+        * If we're aborting a subtransaction, we can immediately remove failed
+        * XIDs from PGPROC's cache of running child XIDs.  We do that here for
+        * subxacts, because we already have the child XID array at hand.  For
+        * main xacts, the equivalent happens just after this function returns.
+        */
+       if (isSubXact)
+               XidCacheRemoveRunningXids(xid, nchildren, children);
+
+       /* Reset XactLastRecEnd until the next transaction writes something */
+       if (!isSubXact)
+               XactLastRecEnd.xrecoff = 0;
 
        /* And clean up local data */
        if (rels)
@@ -1251,108 +1279,6 @@ AtSubAbort_childXids(void)
        s->childXids = NIL;
 }
 
-/*
- * RecordSubTransactionAbort
- */
-static void
-RecordSubTransactionAbort(void)
-{
-       int                     nrels;
-       RelFileNode *rels;
-       TransactionId xid = GetCurrentTransactionId();
-       int                     nchildren;
-       TransactionId *children;
-
-       /* Get data needed for abort record */
-       nrels = smgrGetPendingDeletes(false, &rels);
-       nchildren = xactGetCommittedChildren(&children);
-
-       /*
-        * If we made neither any transaction-controlled XLOG entries nor any
-        * temp-rel updates, and are not going to delete any files, we can omit
-        * recording the transaction abort at all.      No one will ever care that it
-        * aborted.  (These tests cover our whole transaction tree, and therefore
-        * may mark subxacts that don't really need it, but it's probably not
-        * worth being tenser.)
-        *
-        * In this case we needn't worry about marking subcommitted children as
-        * aborted, because they didn't mark themselves as subcommitted in the
-        * first place; see the optimization in RecordSubTransactionCommit.
-        */
-       if (MyLastRecPtr.xrecoff != 0 || MyXactMadeTempRelUpdate || nrels > 0)
-       {
-               START_CRIT_SECTION();
-
-               /*
-                * We only need to log the abort in XLOG if the transaction made any
-                * transaction-controlled XLOG entries or will delete files.
-                */
-               if (MyLastRecPtr.xrecoff != 0 || nrels > 0)
-               {
-                       XLogRecData rdata[3];
-                       int                     lastrdata = 0;
-                       xl_xact_abort xlrec;
-                       XLogRecPtr      recptr;
-
-                       xlrec.xact_time = GetCurrentTimestamp();
-                       xlrec.nrels = nrels;
-                       xlrec.nsubxacts = nchildren;
-                       rdata[0].data = (char *) (&xlrec);
-                       rdata[0].len = MinSizeOfXactAbort;
-                       rdata[0].buffer = InvalidBuffer;
-                       /* dump rels to delete */
-                       if (nrels > 0)
-                       {
-                               rdata[0].next = &(rdata[1]);
-                               rdata[1].data = (char *) rels;
-                               rdata[1].len = nrels * sizeof(RelFileNode);
-                               rdata[1].buffer = InvalidBuffer;
-                               lastrdata = 1;
-                       }
-                       /* dump committed child Xids */
-                       if (nchildren > 0)
-                       {
-                               rdata[lastrdata].next = &(rdata[2]);
-                               rdata[2].data = (char *) children;
-                               rdata[2].len = nchildren * sizeof(TransactionId);
-                               rdata[2].buffer = InvalidBuffer;
-                               lastrdata = 2;
-                       }
-                       rdata[lastrdata].next = NULL;
-
-                       recptr = XLogInsert(RM_XACT_ID, XLOG_XACT_ABORT, rdata);
-
-                       /* Must flush if we are deleting files... */
-                       if (nrels > 0)
-                               XLogFlush(recptr);
-               }
-
-               /*
-                * Mark the transaction aborted in clog.  This is not absolutely
-                * necessary but XactLockTableWait makes use of it to avoid waiting
-                * for already-aborted subtransactions.
-                */
-               TransactionIdAbort(xid);
-               TransactionIdAbortTree(nchildren, children);
-
-               END_CRIT_SECTION();
-       }
-
-       /*
-        * We can immediately remove failed XIDs from PGPROC's cache of running
-        * child XIDs. It's easiest to do it here while we have the child XID
-        * array at hand, even though in the main-transaction case the equivalent
-        * work happens just after return from RecordTransactionAbort.
-        */
-       XidCacheRemoveRunningXids(xid, nchildren, children);
-
-       /* And clean up local data */
-       if (rels)
-               pfree(rels);
-       if (children)
-               pfree(children);
-}
-
 /* ----------------------------------------------------------------
  *                                             CleanupTransaction stuff
  * ----------------------------------------------------------------
@@ -1436,6 +1362,7 @@ static void
 StartTransaction(void)
 {
        TransactionState s;
+       VirtualTransactionId vxid;
 
        /*
         * Let's just make sure the state stack is empty
@@ -1479,13 +1406,25 @@ StartTransaction(void)
        AtStart_ResourceOwner();
 
        /*
-        * generate a new transaction id
+        * Assign a new LocalTransactionId, and combine it with the backendId to
+        * form a virtual transaction id.
+        */
+       vxid.backendId = MyBackendId;
+       vxid.localTransactionId = GetNextLocalTransactionId();
+
+       /*
+        * Lock the virtual transaction id before we announce it in the proc array
         */
-       s->transactionId = GetNewTransactionId(false);
+       VirtualXactLockTableInsert(vxid);
 
-       XactLockTableInsert(s->transactionId);
+       /*
+        * Advertise it in the proc array.  We assume assignment of
+        * LocalTransactionID is atomic, and the backendId should be set already.
+        */
+       Assert(MyProc->backendId == vxid.backendId);
+       MyProc->lxid = vxid.localTransactionId;
 
-       PG_TRACE1(transaction__start, s->transactionId);
+       PG_TRACE1(transaction__start, vxid.localTransactionId);
 
        /*
         * set transaction_timestamp() (a/k/a now()).  We want this to be the same
@@ -1631,9 +1570,17 @@ CommitTransaction(void)
         */
        if (MyProc != NULL)
        {
-               /* Lock ProcArrayLock because that's what GetSnapshotData uses. */
+               /*
+                * Lock ProcArrayLock because that's what GetSnapshotData uses.
+                * You might assume that we can skip this step if we had no
+                * transaction id assigned, because the failure case outlined
+                * in GetSnapshotData cannot happen in that case. This is true,
+                * but we *still* need the lock guarantee that two concurrent
+                * computations of the *oldest* xmin will get the same result.
+                */
                LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
                MyProc->xid = InvalidTransactionId;
+               MyProc->lxid = InvalidLocalTransactionId;
                MyProc->xmin = InvalidTransactionId;
                MyProc->inVacuum = false;               /* must be cleared with xid/xmin */
 
@@ -1861,10 +1808,8 @@ PrepareTransaction(void)
         * Now we clean up backend-internal state and release internal resources.
         */
 
-       /* Break the chain of back-links in the XLOG records I output */
-       MyLastRecPtr.xrecoff = 0;
-       MyXactMadeXLogEntry = false;
-       MyXactMadeTempRelUpdate = false;
+       /* Reset XactLastRecEnd until the next transaction writes something */
+       XactLastRecEnd.xrecoff = 0;
 
        /*
         * Let others know about no transaction in progress by me.      This has to be
@@ -1872,9 +1817,17 @@ PrepareTransaction(void)
         * someone may think it is unlocked and recyclable.
         */
 
-       /* Lock ProcArrayLock because that's what GetSnapshotData uses. */
+       /*
+        * Lock ProcArrayLock because that's what GetSnapshotData uses.
+        * You might assume that we can skip this step if we have no
+        * transaction id assigned, because the failure case outlined
+        * in GetSnapshotData cannot happen in that case. This is true,
+        * but we *still* need the lock guarantee that two concurrent
+        * computations of the *oldest* xmin will get the same result.
+        */
        LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
        MyProc->xid = InvalidTransactionId;
+       MyProc->lxid = InvalidLocalTransactionId;
        MyProc->xmin = InvalidTransactionId;
        MyProc->inVacuum = false;       /* must be cleared with xid/xmin */
 
@@ -2032,8 +1985,7 @@ AbortTransaction(void)
         * Advertise the fact that we aborted in pg_clog (assuming that we got as
         * far as assigning an XID to advertise).
         */
-       if (TransactionIdIsValid(s->transactionId))
-               RecordTransactionAbort();
+       RecordTransactionAbort(false);
 
        /*
         * Let others know about no transaction in progress by me. Note that this
@@ -2042,9 +1994,17 @@ AbortTransaction(void)
         */
        if (MyProc != NULL)
        {
-               /* Lock ProcArrayLock because that's what GetSnapshotData uses. */
+               /*
+                * Lock ProcArrayLock because that's what GetSnapshotData uses.
+                * You might assume that we can skip this step if we have no
+                * transaction id assigned, because the failure case outlined
+                * in GetSnapshotData cannot happen in that case. This is true,
+                * but we *still* need the lock guarantee that two concurrent
+                * computations of the *oldest* xmin will get the same result.
+                */
                LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
                MyProc->xid = InvalidTransactionId;
+               MyProc->lxid = InvalidLocalTransactionId;
                MyProc->xmin = InvalidTransactionId;
                MyProc->inVacuum = false;               /* must be cleared with xid/xmin */
                MyProc->inCommit = false;               /* be sure this gets cleared */
@@ -3752,13 +3712,11 @@ CommitSubTransaction(void)
        CommandCounterIncrement();
 
        /* Mark subtransaction as subcommitted */
-       if (TransactionIdIsValid(s->transactionId))
-       {
-               RecordSubTransactionCommit();
-               AtSubCommit_childXids();
-       }
+       RecordSubTransactionCommit();
 
        /* Post-commit cleanup */
+       if (TransactionIdIsValid(s->transactionId))
+               AtSubCommit_childXids();
        AfterTriggerEndSubXact(true);
        AtSubCommit_Portals(s->subTransactionId,
                                                s->parent->subTransactionId,
@@ -3884,13 +3842,12 @@ AbortSubTransaction(void)
                                                                        s->parent->subTransactionId);
 
                /* Advertise the fact that we aborted in pg_clog. */
+               RecordTransactionAbort(true);
+
+               /* Post-abort cleanup */
                if (TransactionIdIsValid(s->transactionId))
-               {
-                       RecordSubTransactionAbort();
                        AtSubAbort_childXids();
-               }
 
-               /* Post-abort cleanup */
                CallSubXactCallbacks(SUBXACT_EVENT_ABORT_SUB, s->subTransactionId,
                                                         s->parent->subTransactionId);
 
index 1db33fb26da5a01708099a8bcfa42030bba12dd0..5474a91c247966905e55928dd871bbeb4e3b333f 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.279 2007/08/28 23:17:47 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.280 2007/09/05 18:10:47 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -154,38 +154,16 @@ static TimeLineID recoveryTargetTLI;
 static List *expectedTLIs;
 static TimeLineID curFileTLI;
 
-/*
- * MyLastRecPtr points to the start of the last XLOG record inserted by the
- * current transaction.  If MyLastRecPtr.xrecoff == 0, then the current
- * xact hasn't yet inserted any transaction-controlled XLOG records.
- *
- * Note that XLOG records inserted outside transaction control are not
- * reflected into MyLastRecPtr.  They do, however, cause MyXactMadeXLogEntry
- * to be set true.     The latter can be used to test whether the current xact
- * made any loggable changes (including out-of-xact changes, such as
- * sequence updates).
- *
- * When we insert/update/delete a tuple in a temporary relation, we do not
- * make any XLOG record, since we don't care about recovering the state of
- * the temp rel after a crash. However, we will still need to remember
- * whether our transaction committed or aborted in that case.  So, we must
- * set MyXactMadeTempRelUpdate true to indicate that the XID will be of
- * interest later.
- */
-XLogRecPtr     MyLastRecPtr = {0, 0};
-
-bool           MyXactMadeXLogEntry = false;
-
-bool           MyXactMadeTempRelUpdate = false;
-
 /*
  * ProcLastRecPtr points to the start of the last XLOG record inserted by the
- * current backend.  It is updated for all inserts, transaction-controlled
- * or not.     ProcLastRecEnd is similar but points to end+1 of last record.
+ * current backend.  It is updated for all inserts.  XactLastRecEnd points to
+ * end+1 of the last record, and is reset when we end a top-level transaction,
+ * or start a new one; so it can be used to tell if the current transaction has
+ * created any XLOG records.
  */
 static XLogRecPtr ProcLastRecPtr = {0, 0};
 
-XLogRecPtr     ProcLastRecEnd = {0, 0};
+XLogRecPtr     XactLastRecEnd = {0, 0};
 
 /*
  * RedoRecPtr is this backend's local copy of the REDO record pointer
@@ -488,15 +466,10 @@ XLogInsert(RmgrId rmid, uint8 info, XLogRecData *rdata)
        bool            updrqst;
        bool            doPageWrites;
        bool            isLogSwitch = (rmid == RM_XLOG_ID && info == XLOG_SWITCH);
-       bool            no_tran = (rmid == RM_XLOG_ID);
 
+       /* info's high bits are reserved for use by me */
        if (info & XLR_INFO_MASK)
-       {
-               if ((info & XLR_INFO_MASK) != XLOG_NO_TRAN)
-                       elog(PANIC, "invalid xlog info mask %02X", (info & XLR_INFO_MASK));
-               no_tran = true;
-               info &= ~XLR_INFO_MASK;
-       }
+               elog(PANIC, "invalid xlog info mask %02X", info);
 
        /*
         * In bootstrap mode, we don't actually log anything but XLOG resources;
@@ -856,11 +829,8 @@ begin:;
 #endif
 
        /* Record begin of record in appropriate places */
-       if (!no_tran)
-               MyLastRecPtr = RecPtr;
        ProcLastRecPtr = RecPtr;
        Insert->PrevRecord = RecPtr;
-       MyXactMadeXLogEntry = true;
 
        Insert->currpos += SizeOfXLogRecord;
        freespace -= SizeOfXLogRecord;
@@ -1018,7 +988,7 @@ begin:;
                SpinLockRelease(&xlogctl->info_lck);
        }
 
-       ProcLastRecEnd = RecPtr;
+       XactLastRecEnd = RecPtr;
 
        END_CRIT_SECTION();
 
index 7473524865947158df8295d8ac824c00b64dba18..30ea87d5b7ac62b870b889b52183d86d9fa07800 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Copyright (c) 1996-2007, PostgreSQL Global Development Group
  *
- * $PostgreSQL: pgsql/src/backend/catalog/system_views.sql,v 1.41 2007/08/25 17:47:44 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/catalog/system_views.sql,v 1.42 2007/09/05 18:10:47 tgl Exp $
  */
 
 CREATE VIEW pg_roles AS 
@@ -145,8 +145,8 @@ CREATE VIEW pg_locks AS
     SELECT * 
     FROM pg_lock_status() AS L
     (locktype text, database oid, relation oid, page int4, tuple int2,
-     transactionid xid, classid oid, objid oid, objsubid int2,
-     transaction xid, pid int4, mode text, granted boolean);
+     virtualxid text, transactionid xid, classid oid, objid oid, objsubid int2,
+     virtualtransaction text, pid int4, mode text, granted boolean);
 
 CREATE VIEW pg_cursors AS
     SELECT C.name, C.statement, C.is_holdable, C.is_binary,
index d79e73f59d8c29c4afb5f86517d34041bb3e219f..ac56b583f17d8726986fba0975bd696e16e308b8 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/commands/indexcmds.c,v 1.162 2007/08/25 19:08:19 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/commands/indexcmds.c,v 1.163 2007/09/05 18:10:47 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -38,6 +38,7 @@
 #include "parser/parse_expr.h"
 #include "parser/parse_func.h"
 #include "parser/parsetree.h"
+#include "storage/procarray.h"
 #include "utils/acl.h"
 #include "utils/builtins.h"
 #include "utils/fmgroids.h"
@@ -126,9 +127,8 @@ DefineIndex(RangeVar *heapRelation,
        int16      *coloptions;
        IndexInfo  *indexInfo;
        int                     numberOfAttributes;
-       List       *old_xact_list;
-       ListCell   *lc;
-       uint32          ixcnt;
+       VirtualTransactionId *old_lockholders;
+       VirtualTransactionId *old_snapshots;
        LockRelId       heaprelid;
        LOCKTAG         heaplocktag;
        Snapshot        snapshot;
@@ -484,24 +484,36 @@ DefineIndex(RangeVar *heapRelation,
         * xacts that open the table for writing after this point; they will see
         * the new index when they open it.
         *
+        * Note: the reason we use actual lock acquisition here, rather than
+        * just checking the ProcArray and sleeping, is that deadlock is possible
+        * if one of the transactions in question is blocked trying to acquire
+        * an exclusive lock on our table.  The lock code will detect deadlock
+        * and error out properly.
+        *
         * Note: GetLockConflicts() never reports our own xid, hence we need not
-        * check for that.
+        * check for that.  Also, prepared xacts are not reported, which is
+        * fine since they certainly aren't going to do anything more.
         */
        SET_LOCKTAG_RELATION(heaplocktag, heaprelid.dbId, heaprelid.relId);
-       old_xact_list = GetLockConflicts(&heaplocktag, ShareLock);
+       old_lockholders = GetLockConflicts(&heaplocktag, ShareLock);
 
-       foreach(lc, old_xact_list)
+       while (VirtualTransactionIdIsValid(*old_lockholders))
        {
-               TransactionId xid = lfirst_xid(lc);
-
-               XactLockTableWait(xid);
+               VirtualXactLockTableWait(*old_lockholders);
+               old_lockholders++;
        }
 
        /*
         * Now take the "reference snapshot" that will be used by validate_index()
-        * to filter candidate tuples.  All other transactions running at this
-        * time will have to be out-waited before we can commit, because we can't
-        * guarantee that tuples deleted just before this will be in the index.
+        * to filter candidate tuples.  Beware!  There might be still snapshots
+        * in use that treat some transaction as in-progress that our reference
+        * snapshot treats as committed.  If such a recently-committed transaction
+        * deleted tuples in the table, we will not include them in the index; yet
+        * those transactions which see the deleting one as still-in-progress will
+        * expect them to be there once we mark the index as valid.
+        *
+        * We solve this by waiting for all endangered transactions to exit before
+        * we mark the index as valid.
         *
         * We also set ActiveSnapshot to this snap, since functions in indexes may
         * need a snapshot.
@@ -518,14 +530,21 @@ DefineIndex(RangeVar *heapRelation,
         * The index is now valid in the sense that it contains all currently
         * interesting tuples.  But since it might not contain tuples deleted just
         * before the reference snap was taken, we have to wait out any
-        * transactions older than the reference snap.  We can do this by waiting
-        * for each xact explicitly listed in the snap.
+        * transactions that might have older snapshots.  Obtain a list of
+        * VXIDs of such transactions, and wait for them individually.
         *
-        * Note: GetSnapshotData() never stores our own xid into a snap, hence we
-        * need not check for that.
+        * We can exclude any running transactions that have xmin >= the xmax of
+        * our reference snapshot, since they are clearly not interested in any
+        * missing older tuples.  Also, GetCurrentVirtualXIDs never reports our
+        * own vxid, so we need not check for that.
         */
-       for (ixcnt = 0; ixcnt < snapshot->xcnt; ixcnt++)
-               XactLockTableWait(snapshot->xip[ixcnt]);
+       old_snapshots = GetCurrentVirtualXIDs(ActiveSnapshot->xmax);
+
+       while (VirtualTransactionIdIsValid(*old_snapshots))
+       {
+               VirtualXactLockTableWait(*old_snapshots);
+               old_snapshots++;
+       }
 
        /*
         * Index can now be marked valid -- update its pg_index entry
index bd06bfb5da3f63fef607f90ec1cb2f7a20284024..cb2a1380caf20f1e467fbb61adc4d852d64e2136 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/commands/sequence.c,v 1.143 2007/02/01 19:10:26 momjian Exp $
+ *       $PostgreSQL: pgsql/src/backend/commands/sequence.c,v 1.144 2007/09/05 18:10:47 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -25,6 +25,7 @@
 #include "commands/tablecmds.h"
 #include "miscadmin.h"
 #include "nodes/makefuncs.h"
+#include "storage/proc.h"
 #include "utils/acl.h"
 #include "utils/builtins.h"
 #include "utils/lsyscache.h"
@@ -63,7 +64,7 @@ typedef struct SeqTableData
 {
        struct SeqTableData *next;      /* link to next SeqTable object */
        Oid                     relid;                  /* pg_class OID of this sequence */
-       TransactionId xid;                      /* xact in which we last did a seq op */
+       LocalTransactionId lxid;        /* xact in which we last did a seq op */
        int64           last;                   /* value last returned by nextval */
        int64           cached;                 /* last value already cached for nextval */
        /* if last != cached, we have not used up all the cached values */
@@ -282,7 +283,7 @@ DefineSequence(CreateSeqStmt *seq)
                rdata[1].buffer = InvalidBuffer;
                rdata[1].next = NULL;
 
-               recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG | XLOG_NO_TRAN, rdata);
+               recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG, rdata);
 
                PageSetLSN(page, recptr);
                PageSetTLI(page, ThisTimeLineID);
@@ -366,7 +367,7 @@ AlterSequence(AlterSeqStmt *stmt)
                rdata[1].buffer = InvalidBuffer;
                rdata[1].next = NULL;
 
-               recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG | XLOG_NO_TRAN, rdata);
+               recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG, rdata);
 
                PageSetLSN(page, recptr);
                PageSetTLI(page, ThisTimeLineID);
@@ -594,7 +595,7 @@ nextval_internal(Oid relid)
                rdata[1].buffer = InvalidBuffer;
                rdata[1].next = NULL;
 
-               recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG | XLOG_NO_TRAN, rdata);
+               recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG, rdata);
 
                PageSetLSN(page, recptr);
                PageSetTLI(page, ThisTimeLineID);
@@ -764,7 +765,7 @@ do_setval(Oid relid, int64 next, bool iscalled)
                rdata[1].buffer = InvalidBuffer;
                rdata[1].next = NULL;
 
-               recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG | XLOG_NO_TRAN, rdata);
+               recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG, rdata);
 
                PageSetLSN(page, recptr);
                PageSetTLI(page, ThisTimeLineID);
@@ -825,10 +826,10 @@ setval3_oid(PG_FUNCTION_ARGS)
 static Relation
 open_share_lock(SeqTable seq)
 {
-       TransactionId thisxid = GetTopTransactionId();
+       LocalTransactionId thislxid = MyProc->lxid;
 
        /* Get the lock if not already held in this xact */
-       if (seq->xid != thisxid)
+       if (seq->lxid != thislxid)
        {
                ResourceOwner currentOwner;
 
@@ -848,7 +849,7 @@ open_share_lock(SeqTable seq)
                CurrentResourceOwner = currentOwner;
 
                /* Flag that we have a lock in the current xact */
-               seq->xid = thisxid;
+               seq->lxid = thislxid;
        }
 
        /* We now know we have AccessShareLock, and can safely open the rel */
@@ -891,7 +892,7 @@ init_sequence(Oid relid, SeqTable *p_elm, Relation *p_rel)
                                        (errcode(ERRCODE_OUT_OF_MEMORY),
                                         errmsg("out of memory")));
                elm->relid = relid;
-               elm->xid = InvalidTransactionId;
+               elm->lxid = InvalidLocalTransactionId;
                /* increment is set to 0 until we do read_info (see currval) */
                elm->last = elm->cached = elm->increment = 0;
                elm->next = seqtab;
index 358e9a5ad99839eaedd40e5ff8a4e4f425932349..87cf57daec3e298053d9f2faa23593fe84900d25 100644 (file)
@@ -13,7 +13,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/commands/vacuum.c,v 1.355 2007/08/13 19:08:26 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/commands/vacuum.c,v 1.356 2007/09/05 18:10:47 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -2601,14 +2601,6 @@ repair_frag(VRelStats *vacrelstats, Relation onerel,
                                PageSetLSN(page, recptr);
                                PageSetTLI(page, ThisTimeLineID);
                        }
-                       else
-                       {
-                               /*
-                                * No XLOG record, but still need to flag that XID exists on
-                                * disk
-                                */
-                               MyXactMadeTempRelUpdate = true;
-                       }
 
                        END_CRIT_SECTION();
 
@@ -2761,13 +2753,6 @@ move_chain_tuple(Relation rel,
                PageSetLSN(dst_page, recptr);
                PageSetTLI(dst_page, ThisTimeLineID);
        }
-       else
-       {
-               /*
-                * No XLOG record, but still need to flag that XID exists on disk
-                */
-               MyXactMadeTempRelUpdate = true;
-       }
 
        END_CRIT_SECTION();
 
@@ -2868,13 +2853,6 @@ move_plain_tuple(Relation rel,
                PageSetLSN(dst_page, recptr);
                PageSetTLI(dst_page, ThisTimeLineID);
        }
-       else
-       {
-               /*
-                * No XLOG record, but still need to flag that XID exists on disk
-                */
-               MyXactMadeTempRelUpdate = true;
-       }
 
        END_CRIT_SECTION();
 
@@ -3070,11 +3048,6 @@ vacuum_page(Relation onerel, Buffer buffer, VacPage vacpage)
                PageSetLSN(page, recptr);
                PageSetTLI(page, ThisTimeLineID);
        }
-       else
-       {
-               /* No XLOG record, but still need to flag that XID exists on disk */
-               MyXactMadeTempRelUpdate = true;
-       }
 
        END_CRIT_SECTION();
 }
index 3ac097388b2ef42674a344c1c97fbdac089448a8..ecc0ee78074cc3b863cc92796f8b606b7f2010cb 100644 (file)
@@ -36,7 +36,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/commands/vacuumlazy.c,v 1.90 2007/05/30 20:11:57 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/commands/vacuumlazy.c,v 1.91 2007/09/05 18:10:47 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -658,11 +658,6 @@ lazy_vacuum_page(Relation onerel, BlockNumber blkno, Buffer buffer,
                PageSetLSN(page, recptr);
                PageSetTLI(page, ThisTimeLineID);
        }
-       else
-       {
-               /* No XLOG record, but still need to flag that XID exists on disk */
-               MyXactMadeTempRelUpdate = true;
-       }
 
        END_CRIT_SECTION();
 
index 51da9679f35a7874512e4e706574e368188cfaff..577f73a31f199ec70b7c924d8b013c1eae87d784 100644 (file)
@@ -23,7 +23,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/storage/ipc/procarray.c,v 1.28 2007/07/01 02:22:23 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/storage/ipc/procarray.c,v 1.29 2007/09/05 18:10:47 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -404,7 +404,7 @@ TransactionIdIsActive(TransactionId xid)
  * This is also used to determine where to truncate pg_subtrans.  allDbs
  * must be TRUE for that case, and ignoreVacuum FALSE.
  *
- * Note: we include the currently running xids in the set of considered xids.
+ * Note: we include all currently running xids in the set of considered xids.
  * This ensures that if a just-started xact has not yet set its snapshot,
  * when it does set the snapshot it cannot set xmin less than what we compute.
  */
@@ -416,15 +416,19 @@ GetOldestXmin(bool allDbs, bool ignoreVacuum)
        int                     index;
 
        /*
-        * Normally we start the min() calculation with our own XID.  But if
-        * called by checkpointer, we will not be inside a transaction, so use
-        * next XID as starting point for min() calculation.  (Note that if there
-        * are no xacts running at all, that will be the subtrans truncation
-        * point!)
+        * We need to initialize the MIN() calculation with something.
+        * ReadNewTransactionId() is guaranteed to work, but is relatively
+        * expensive due to locking; so first we try a couple of shortcuts.
+        * If we have a valid xmin in our own PGPROC entry, that will do;
+        * or if we have assigned ourselves an XID, that will do.
         */
-       result = GetTopTransactionId();
+       result = MyProc ? MyProc->xmin : InvalidTransactionId;
        if (!TransactionIdIsValid(result))
-               result = ReadNewTransactionId();
+       {
+               result = GetTopTransactionIdIfAny();
+               if (!TransactionIdIsValid(result))
+                       result = ReadNewTransactionId();
+       }
 
        LWLockAcquire(ProcArrayLock, LW_SHARED);
 
@@ -440,23 +444,22 @@ GetOldestXmin(bool allDbs, bool ignoreVacuum)
                        /* Fetch xid just once - see GetNewTransactionId */
                        TransactionId xid = proc->xid;
 
-                       if (TransactionIdIsNormal(xid))
-                       {
-                               /* First consider the transaction own's Xid */
-                               if (TransactionIdPrecedes(xid, result))
-                                       result = xid;
-
-                               /*
-                                * Also consider the transaction's Xmin, if set.
-                                *
-                                * We must check both Xid and Xmin because there is a window
-                                * where an xact's Xid is set but Xmin isn't yet.
-                                */
-                               xid = proc->xmin;
-                               if (TransactionIdIsNormal(xid))
-                                       if (TransactionIdPrecedes(xid, result))
-                                               result = xid;
-                       }
+                       /* First consider the transaction's own Xid, if any */
+                       if (TransactionIdIsNormal(xid) &&
+                               TransactionIdPrecedes(xid, result))
+                               result = xid;
+
+                       /*
+                        * Also consider the transaction's Xmin, if set.
+                        *
+                        * We must check both Xid and Xmin because a transaction might
+                        * have an Xmin but not (yet) an Xid; conversely, if it has
+                        * an Xid, that could determine some not-yet-set Xmin.
+                        */
+                       xid = proc->xmin;       /* Fetch just once */
+                       if (TransactionIdIsNormal(xid) &&
+                               TransactionIdPrecedes(xid, result))
+                               result = xid;
                }
        }
 
@@ -545,8 +548,6 @@ GetSnapshotData(Snapshot snapshot, bool serializable)
                                         errmsg("out of memory")));
        }
 
-       globalxmin = xmin = GetTopTransactionId();
-
        /*
         * It is sufficient to get shared lock on ProcArrayLock, even if we are
         * computing a serializable snapshot and therefore will be setting
@@ -557,6 +558,19 @@ GetSnapshotData(Snapshot snapshot, bool serializable)
         * discussion just below).      So it doesn't matter whether another backend
         * concurrently doing GetSnapshotData or GetOldestXmin sees our xmin as
         * set or not; he'd compute the same xmin for himself either way.
+        * (We are assuming here that xmin can be set and read atomically,
+        * just like xid.)
+        *
+        * There is a corner case in which the above argument doesn't work: if
+        * there isn't any oldest xact, ie, all xids in the array are invalid.
+        * In that case we will compute xmin as the result of ReadNewTransactionId,
+        * and since GetNewTransactionId doesn't take the ProcArrayLock, it's not
+        * so obvious that two backends with overlapping shared locks will get
+        * the same answer.  But GetNewTransactionId is required to store the XID
+        * it assigned into the ProcArray before releasing XidGenLock.  Therefore
+        * the backend that did ReadNewTransactionId later will see that XID in
+        * the array, and will compute the same xmin as the earlier one that saw
+        * no XIDs in the array.
         */
        LWLockAcquire(ProcArrayLock, LW_SHARED);
 
@@ -589,6 +603,9 @@ GetSnapshotData(Snapshot snapshot, bool serializable)
 
        xmax = ReadNewTransactionId();
 
+       /* initialize xmin calculation with xmax */
+       globalxmin = xmin = xmax;
+
        /*
         * Spin over procArray checking xid, xmin, and subxids.  The goal is
         * to gather all active xids, find the lowest xmin, and try to record
@@ -597,34 +614,40 @@ GetSnapshotData(Snapshot snapshot, bool serializable)
        for (index = 0; index < arrayP->numProcs; index++)
        {
                PGPROC     *proc = arrayP->procs[index];
+               TransactionId xid;
+
+               /* Ignore procs running LAZY VACUUM */
+               if (proc->inVacuum)
+                       continue;
+
+               /* Update globalxmin to be the smallest valid xmin */
+               xid = proc->xmin;               /* fetch just once */
+               if (TransactionIdIsNormal(xid) &&
+                       TransactionIdPrecedes(xid, globalxmin))
+                       globalxmin = xid;
 
                /* Fetch xid just once - see GetNewTransactionId */
-               TransactionId xid = proc->xid;
+               xid = proc->xid;
 
                /*
-                * Ignore my own proc (dealt with my xid above), procs not running a
-                * transaction, xacts started since we read the next transaction ID,
-                * and xacts executing LAZY VACUUM. There's no need to store XIDs
-                * above what we got from ReadNewTransactionId, since we'll treat them
-                * as running anyway.  We also assume that such xacts can't compute an
-                * xmin older than ours, so they needn't be considered in computing
-                * globalxmin.
+                * If the transaction has been assigned an xid < xmax we add it to the
+                * snapshot, and update xmin if necessary.  There's no need to store
+                * XIDs above what we got from ReadNewTransactionId, since we'll treat
+                * them as running anyway.  We don't bother to examine their subxids
+                * either.
+                *
+                * We don't include our own XID (if any) in the snapshot, but we must
+                * include it into xmin.
                 */
-               if (proc == MyProc ||
-                       !TransactionIdIsNormal(xid) ||
-                       TransactionIdFollowsOrEquals(xid, xmax) ||
-                       proc->inVacuum)
-                       continue;
-
-               if (TransactionIdPrecedes(xid, xmin))
-                       xmin = xid;
-               snapshot->xip[count++] = xid;
-
-               /* Update globalxmin to be the smallest valid xmin */
-               xid = proc->xmin;
                if (TransactionIdIsNormal(xid))
-                       if (TransactionIdPrecedes(xid, globalxmin))
-                               globalxmin = xid;
+               {
+                       if (TransactionIdFollowsOrEquals(xid, xmax))
+                               continue;
+                       if (proc != MyProc)
+                               snapshot->xip[count++] = xid;
+                       if (TransactionIdPrecedes(xid, xmin))
+                               xmin = xid;
+               }
 
                /*
                 * Save subtransaction XIDs if possible (if we've already overflowed,
@@ -635,8 +658,10 @@ GetSnapshotData(Snapshot snapshot, bool serializable)
                 * remove any.  Hence it's important to fetch nxids just once. Should
                 * be safe to use memcpy, though.  (We needn't worry about missing any
                 * xids added concurrently, because they must postdate xmax.)
+                *
+                * Again, our own XIDs are not included in the snapshot.
                 */
-               if (subcount >= 0)
+               if (subcount >= 0 && proc != MyProc)
                {
                        if (proc->subxids.overflowed)
                                subcount = -1;  /* overflowed */
@@ -818,6 +843,9 @@ BackendPidGetProc(int pid)
  *
  * Only main transaction Ids are considered.  This function is mainly
  * useful for determining what backend owns a lock.
+ *
+ * Beware that not every xact has an XID assigned.  However, as long as you
+ * only call this using an XID found on disk, you're safe.
  */
 int
 BackendXidGetPid(TransactionId xid)
@@ -856,6 +884,63 @@ IsBackendPid(int pid)
        return (BackendPidGetProc(pid) != NULL);
 }
 
+
+/*
+ * GetCurrentVirtualXIDs -- returns an array of currently active VXIDs.
+ *
+ * The array is palloc'd and is terminated with an invalid VXID.
+ *
+ * If limitXmin is not InvalidTransactionId, we skip any backends
+ * with xmin >= limitXmin.  Also, our own process is always skipped.
+ */
+VirtualTransactionId *
+GetCurrentVirtualXIDs(TransactionId limitXmin)
+{
+       VirtualTransactionId *vxids;
+       ProcArrayStruct *arrayP = procArray;
+       int                     count = 0;
+       int                     index;
+
+       /* allocate result space with room for a terminator */
+       vxids = (VirtualTransactionId *)
+               palloc(sizeof(VirtualTransactionId) * (arrayP->maxProcs + 1));
+
+       LWLockAcquire(ProcArrayLock, LW_SHARED);
+
+       for (index = 0; index < arrayP->numProcs; index++)
+       {
+               PGPROC     *proc = arrayP->procs[index];
+               /* Fetch xmin just once - might change on us? */
+               TransactionId pxmin = proc->xmin;
+
+               if (proc == MyProc)
+                       continue;
+
+               /*
+                * Note that InvalidTransactionId precedes all other XIDs, so a
+                * proc that hasn't set xmin yet will always be included.
+                */
+               if (!TransactionIdIsValid(limitXmin) ||
+                       TransactionIdPrecedes(pxmin, limitXmin))
+               {
+                       VirtualTransactionId vxid;
+
+                       GET_VXID_FROM_PGPROC(vxid, *proc);
+                       if (VirtualTransactionIdIsValid(vxid))
+                               vxids[count++] = vxid;
+               }
+       }
+
+       LWLockRelease(ProcArrayLock);
+
+       /* add the terminator */
+       vxids[count].backendId = InvalidBackendId;
+       vxids[count].localTransactionId = InvalidLocalTransactionId;
+
+       return vxids;
+}
+
+
 /*
  * CountActiveBackends --- count backends (other than myself) that are in
  *             active transactions.  This is used as a heuristic to decide if
@@ -885,7 +970,7 @@ CountActiveBackends(void)
                if (proc->pid == 0)
                        continue;                       /* do not count prepared xacts */
                if (proc->xid == InvalidTransactionId)
-                       continue;                       /* do not count if not in a transaction */
+                       continue;                       /* do not count if no XID assigned */
                if (proc->waitLock != NULL)
                        continue;                       /* do not count if blocked on a lock */
                count++;
index 31c4a2dfad1158d0aeee123e9d744da51a6edeab..99690d8b36b23ea9be9ef2ced0f8db936d428b2b 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/storage/ipc/sinvaladt.c,v 1.63 2007/01/05 22:19:38 momjian Exp $
+ *       $PostgreSQL: pgsql/src/backend/storage/ipc/sinvaladt.c,v 1.64 2007/09/05 18:10:47 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
 #include "storage/ipc.h"
 #include "storage/lwlock.h"
 #include "storage/pmsignal.h"
+#include "storage/proc.h"
 #include "storage/shmem.h"
 #include "storage/sinvaladt.h"
 
 
 SISeg     *shmInvalBuffer;
 
+static LocalTransactionId nextLocalTransactionId;
+
 static void CleanupInvalidationState(int status, Datum arg);
 static void SISetProcStateInvalid(SISeg *segP);
 
@@ -40,6 +43,8 @@ SInvalShmemSize(void)
        size = offsetof(SISeg, procState);
        size = add_size(size, mul_size(sizeof(ProcState), MaxBackends));
 
+       size = add_size(size, mul_size(sizeof(LocalTransactionId), MaxBackends));
+
        return size;
 }
 
@@ -51,15 +56,21 @@ void
 SIBufferInit(void)
 {
        SISeg      *segP;
+       Size            size;
        int                     i;
        bool            found;
 
        /* Allocate space in shared memory */
+       size = offsetof(SISeg, procState);
+       size = add_size(size, mul_size(sizeof(ProcState), MaxBackends));
+
        shmInvalBuffer = segP = (SISeg *)
-               ShmemInitStruct("shmInvalBuffer", SInvalShmemSize(), &found);
+               ShmemInitStruct("shmInvalBuffer", size, &found);
        if (found)
                return;
 
+       segP->nextLXID = ShmemAlloc(sizeof(LocalTransactionId) * MaxBackends);
+
        /* Clear message counters, save size of procState array */
        segP->minMsgNum = 0;
        segP->maxMsgNum = 0;
@@ -69,11 +80,12 @@ SIBufferInit(void)
 
        /* The buffer[] array is initially all unused, so we need not fill it */
 
-       /* Mark all backends inactive */
+       /* Mark all backends inactive, and initialize nextLXID */
        for (i = 0; i < segP->maxBackends; i++)
        {
                segP->procState[i].nextMsgNum = -1;             /* inactive */
                segP->procState[i].resetState = false;
+               segP->nextLXID[i] = InvalidLocalTransactionId;
        }
 }
 
@@ -128,9 +140,15 @@ SIBackendInit(SISeg *segP)
        elog(DEBUG2, "my backend id is %d", MyBackendId);
 #endif   /* INVALIDDEBUG */
 
+       /* Advertise assigned backend ID in MyProc */
+       MyProc->backendId = MyBackendId;
+
        /* Reduce free slot count */
        segP->freeBackends--;
 
+       /* Fetch next local transaction ID into local memory */
+       nextLocalTransactionId = segP->nextLXID[MyBackendId - 1];
+
        /* mark myself active, with all extant messages already read */
        stateP->nextMsgNum = segP->maxMsgNum;
        stateP->resetState = false;
@@ -160,6 +178,9 @@ CleanupInvalidationState(int status, Datum arg)
 
        LWLockAcquire(SInvalLock, LW_EXCLUSIVE);
 
+       /* Update next local transaction ID for next holder of this backendID */
+       segP->nextLXID[MyBackendId - 1] = nextLocalTransactionId;
+
        /* Mark myself inactive */
        segP->procState[MyBackendId - 1].nextMsgNum = -1;
        segP->procState[MyBackendId - 1].resetState = false;
@@ -352,3 +373,30 @@ SIDelExpiredDataEntries(SISeg *segP)
                }
        }
 }
+
+
+/*
+ * GetNextLocalTransactionId --- allocate a new LocalTransactionId
+ *
+ * We split VirtualTransactionIds into two parts so that it is possible
+ * to allocate a new one without any contention for shared memory, except
+ * for a bit of additional overhead during backend startup/shutdown.
+ * The high-order part of a VirtualTransactionId is a BackendId, and the
+ * low-order part is a LocalTransactionId, which we assign from a local
+ * counter.  To avoid the risk of a VirtualTransactionId being reused
+ * within a short interval, successive procs occupying the same backend ID
+ * slot should use a consecutive sequence of local IDs, which is implemented
+ * by copying nextLocalTransactionId as seen above.
+ */
+LocalTransactionId
+GetNextLocalTransactionId(void)
+{
+       LocalTransactionId result;
+
+       /* loop to avoid returning InvalidLocalTransactionId at wraparound */
+       do {
+               result = nextLocalTransactionId++;
+       } while (!LocalTransactionIdIsValid(result));
+
+       return result;
+}
index 1c5db363203345ffd4beb7c6113577d411718dac..f947d226fea1c318e335363c872669aecc4ce581 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/storage/lmgr/lmgr.c,v 1.92 2007/07/25 22:16:18 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/storage/lmgr/lmgr.c,v 1.93 2007/09/05 18:10:47 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -421,8 +421,8 @@ UnlockTuple(Relation relation, ItemPointer tid, LOCKMODE lockmode)
  *             XactLockTableInsert
  *
  * Insert a lock showing that the given transaction ID is running ---
- * this is done during xact startup.  The lock can then be used to wait
- * for the transaction to finish.
+ * this is done when an XID is acquired by a transaction or subtransaction.
+ * The lock can then be used to wait for the transaction to finish.
  */
 void
 XactLockTableInsert(TransactionId xid)
@@ -439,8 +439,7 @@ XactLockTableInsert(TransactionId xid)
  *
  * Delete the lock showing that the given transaction ID is running.
  * (This is never used for main transaction IDs; those locks are only
- * released implicitly at transaction end.     But we do use it for subtrans
- * IDs.)
+ * released implicitly at transaction end.     But we do use it for subtrans IDs.)
  */
 void
 XactLockTableDelete(TransactionId xid)
@@ -472,7 +471,7 @@ XactLockTableWait(TransactionId xid)
        for (;;)
        {
                Assert(TransactionIdIsValid(xid));
-               Assert(!TransactionIdEquals(xid, GetTopTransactionId()));
+               Assert(!TransactionIdEquals(xid, GetTopTransactionIdIfAny()));
 
                SET_LOCKTAG_TRANSACTION(tag, xid);
 
@@ -500,7 +499,7 @@ ConditionalXactLockTableWait(TransactionId xid)
        for (;;)
        {
                Assert(TransactionIdIsValid(xid));
-               Assert(!TransactionIdEquals(xid, GetTopTransactionId()));
+               Assert(!TransactionIdEquals(xid, GetTopTransactionIdIfAny()));
 
                SET_LOCKTAG_TRANSACTION(tag, xid);
 
@@ -517,6 +516,70 @@ ConditionalXactLockTableWait(TransactionId xid)
        return true;
 }
 
+
+/*
+ *             VirtualXactLockTableInsert
+ *
+ * Insert a lock showing that the given virtual transaction ID is running ---
+ * this is done at main transaction start when its VXID is assigned.
+ * The lock can then be used to wait for the transaction to finish.
+ */
+void
+VirtualXactLockTableInsert(VirtualTransactionId vxid)
+{
+       LOCKTAG         tag;
+
+       Assert(VirtualTransactionIdIsValid(vxid));
+
+       SET_LOCKTAG_VIRTUALTRANSACTION(tag, vxid);
+
+       (void) LockAcquire(&tag, ExclusiveLock, false, false);
+}
+
+/*
+ *             VirtualXactLockTableWait
+ *
+ * Waits until the lock on the given VXID is released, which shows that
+ * the top-level transaction owning the VXID has ended.
+ */
+void
+VirtualXactLockTableWait(VirtualTransactionId vxid)
+{
+       LOCKTAG         tag;
+
+       Assert(VirtualTransactionIdIsValid(vxid));
+
+       SET_LOCKTAG_VIRTUALTRANSACTION(tag, vxid);
+
+       (void) LockAcquire(&tag, ShareLock, false, false);
+
+       LockRelease(&tag, ShareLock, false);
+}
+
+/*
+ *             ConditionalVirtualXactLockTableWait
+ *
+ * As above, but only lock if we can get the lock without blocking.
+ * Returns TRUE if the lock was acquired.
+ */
+bool
+ConditionalVirtualXactLockTableWait(VirtualTransactionId vxid)
+{
+       LOCKTAG         tag;
+
+       Assert(VirtualTransactionIdIsValid(vxid));
+
+       SET_LOCKTAG_VIRTUALTRANSACTION(tag, vxid);
+
+       if (LockAcquire(&tag, ShareLock, false, true) == LOCKACQUIRE_NOT_AVAIL)
+               return false;
+
+       LockRelease(&tag, ShareLock, false);
+
+       return true;
+}
+
+
 /*
  *             LockDatabaseObject
  *
index a4a0910d393026cc27156be489dc4e8c719a1aa6..06a4f7adae5ba21d429316783921bd73e9404b8d 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/storage/lmgr/lock.c,v 1.177 2007/07/16 21:09:50 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/storage/lmgr/lock.c,v 1.178 2007/09/05 18:10:47 tgl Exp $
  *
  * NOTES
  *       A lock table is a shared memory hash table.  When
@@ -1681,20 +1681,24 @@ LockReassignCurrentOwner(void)
 
 /*
  * GetLockConflicts
- *             Get a list of TransactionIds of xacts currently holding locks
+ *             Get an array of VirtualTransactionIds of xacts currently holding locks
  *             that would conflict with the specified lock/lockmode.
  *             xacts merely awaiting such a lock are NOT reported.
  *
+ * The result array is palloc'd and is terminated with an invalid VXID.
+ *
  * Of course, the result could be out of date by the time it's returned,
  * so use of this function has to be thought about carefully.
  *
- * Only top-level XIDs are reported.  Note we never include the current xact
- * in the result list, since an xact never blocks itself.
+ * Note we never include the current xact's vxid in the result array,
+ * since an xact never blocks itself.  Also, prepared transactions are
+ * ignored, which is a bit more debatable but is appropriate for current
+ * uses of the result.
  */
-List *
+VirtualTransactionId *
 GetLockConflicts(const LOCKTAG *locktag, LOCKMODE lockmode)
 {
-       List       *result = NIL;
+       VirtualTransactionId *vxids;
        LOCKMETHODID lockmethodid = locktag->locktag_lockmethodid;
        LockMethod      lockMethodTable;
        LOCK       *lock;
@@ -1703,6 +1707,7 @@ GetLockConflicts(const LOCKTAG *locktag, LOCKMODE lockmode)
        PROCLOCK   *proclock;
        uint32          hashcode;
        LWLockId        partitionLock;
+       int                     count = 0;
 
        if (lockmethodid <= 0 || lockmethodid >= lengthof(LockMethods))
                elog(ERROR, "unrecognized lock method: %d", lockmethodid);
@@ -1710,6 +1715,14 @@ GetLockConflicts(const LOCKTAG *locktag, LOCKMODE lockmode)
        if (lockmode <= 0 || lockmode > lockMethodTable->numLockModes)
                elog(ERROR, "unrecognized lock mode: %d", lockmode);
 
+       /*
+        * Allocate memory to store results, and fill with InvalidVXID.  We
+        * only need enough space for MaxBackends + a terminator, since
+        * prepared xacts don't count.
+        */
+       vxids = (VirtualTransactionId *)
+               palloc0(sizeof(VirtualTransactionId) * (MaxBackends + 1));
+
        /*
         * Look up the lock object matching the tag.
         */
@@ -1730,7 +1743,7 @@ GetLockConflicts(const LOCKTAG *locktag, LOCKMODE lockmode)
                 * on this lockable object.
                 */
                LWLockRelease(partitionLock);
-               return NIL;
+               return vxids;
        }
 
        /*
@@ -1752,18 +1765,17 @@ GetLockConflicts(const LOCKTAG *locktag, LOCKMODE lockmode)
                        /* A backend never blocks itself */
                        if (proc != MyProc)
                        {
-                               /* Fetch xid just once - see GetNewTransactionId */
-                               TransactionId xid = proc->xid;
+                               VirtualTransactionId vxid;
+
+                               GET_VXID_FROM_PGPROC(vxid, *proc);
 
                                /*
-                                * Race condition: during xact commit/abort we zero out
-                                * PGPROC's xid before we mark its locks released.  If we see
-                                * zero in the xid field, assume the xact is in process of
-                                * shutting down and act as though the lock is already
-                                * released.
+                                * If we see an invalid VXID, then either the xact has already
+                                * committed (or aborted), or it's a prepared xact.  In
+                                * either case we may ignore it.
                                 */
-                               if (TransactionIdIsValid(xid))
-                                       result = lappend_xid(result, xid);
+                               if (VirtualTransactionIdIsValid(vxid))
+                                       vxids[count++] = vxid;
                        }
                }
 
@@ -1773,7 +1785,10 @@ GetLockConflicts(const LOCKTAG *locktag, LOCKMODE lockmode)
 
        LWLockRelease(partitionLock);
 
-       return result;
+       if (count > MaxBackends)        /* should never happen */
+               elog(PANIC, "too many conflicting locks found");
+
+       return vxids;
 }
 
 
@@ -1782,7 +1797,7 @@ GetLockConflicts(const LOCKTAG *locktag, LOCKMODE lockmode)
  *             Do the preparatory work for a PREPARE: make 2PC state file records
  *             for all locks currently held.
  *
- * Non-transactional locks are ignored.
+ * Non-transactional locks are ignored, as are VXID locks.
  *
  * There are some special cases that we error out on: we can't be holding
  * any session locks (should be OK since only VACUUM uses those) and we
@@ -1812,6 +1827,13 @@ AtPrepare_Locks(void)
                if (!LockMethods[LOCALLOCK_LOCKMETHOD(*locallock)]->transactional)
                        continue;
 
+               /*
+                * Ignore VXID locks.  We don't want those to be held by prepared
+                * transactions, since they aren't meaningful after a restart.
+                */
+               if (locallock->tag.lock.locktag_type == LOCKTAG_VIRTUALTRANSACTION)
+                       continue;
+
                /* Ignore it if we don't actually hold the lock */
                if (locallock->nLocks <= 0)
                        continue;
@@ -1899,6 +1921,10 @@ PostPrepare_Locks(TransactionId xid)
                if (!LockMethods[LOCALLOCK_LOCKMETHOD(*locallock)]->transactional)
                        continue;
 
+               /* Ignore VXID locks */
+               if (locallock->tag.lock.locktag_type == LOCKTAG_VIRTUALTRANSACTION)
+                       continue;
+
                /* We already checked there are no session locks */
 
                /* Mark the proclock to show we need to release this lockmode */
@@ -1944,6 +1970,10 @@ PostPrepare_Locks(TransactionId xid)
                        if (!LockMethods[LOCK_LOCKMETHOD(*lock)]->transactional)
                                goto next_item;
 
+                       /* Ignore VXID locks */
+                       if (lock->tag.locktag_type == LOCKTAG_VIRTUALTRANSACTION)
+                               goto next_item;
+
                        PROCLOCK_PRINT("PostPrepare_Locks", proclock);
                        LOCK_PRINT("PostPrepare_Locks", lock, 0);
                        Assert(lock->nRequested >= 0);
index 048fa31bccd9e44f86a9f7f82c33f4ece824650b..5441dd322de7925150ff8e5d13153800daedd550 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/storage/lmgr/proc.c,v 1.192 2007/08/28 03:23:44 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/storage/lmgr/proc.c,v 1.193 2007/09/05 18:10:47 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -282,10 +282,12 @@ InitProcess(void)
         */
        SHMQueueElemInit(&(MyProc->links));
        MyProc->waitStatus = STATUS_OK;
+       MyProc->lxid = InvalidLocalTransactionId;
        MyProc->xid = InvalidTransactionId;
        MyProc->xmin = InvalidTransactionId;
        MyProc->pid = MyProcPid;
-       /* databaseId and roleId will be filled in later */
+       /* backendId, databaseId and roleId will be filled in later */
+       MyProc->backendId = InvalidBackendId;
        MyProc->databaseId = InvalidOid;
        MyProc->roleId = InvalidOid;
        MyProc->inCommit = false;
@@ -359,7 +361,9 @@ InitProcessPhase2(void)
  *
  * Auxiliary processes are presently not expected to wait for real (lockmgr)
  * locks, so we need not set up the deadlock checker.  They are never added
- * to the ProcArray or the sinval messaging mechanism, either.
+ * to the ProcArray or the sinval messaging mechanism, either.  They also
+ * don't get a VXID assigned, since this is only useful when we actually
+ * hold lockmgr locks.
  */
 void
 InitAuxiliaryProcess(void)
@@ -418,8 +422,10 @@ InitAuxiliaryProcess(void)
         */
        SHMQueueElemInit(&(MyProc->links));
        MyProc->waitStatus = STATUS_OK;
+       MyProc->lxid = InvalidLocalTransactionId;
        MyProc->xid = InvalidTransactionId;
        MyProc->xmin = InvalidTransactionId;
+       MyProc->backendId = InvalidBackendId;
        MyProc->databaseId = InvalidOid;
        MyProc->roleId = InvalidOid;
        MyProc->inCommit = false;
index 7137d2dc08c854f432e4182554f26f9500a8c509..22ac13146c8fb1b00d2fd1ea53873d01f2b04200 100644 (file)
@@ -11,7 +11,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/storage/smgr/smgr.c,v 1.105 2007/07/20 16:29:53 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/storage/smgr/smgr.c,v 1.106 2007/09/05 18:10:48 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -347,9 +347,8 @@ smgrcreate(SMgrRelation reln, bool isTemp, bool isRedo)
                return;
 
        /*
-        * Make a non-transactional XLOG entry showing the file creation. It's
-        * non-transactional because we should replay it whether the transaction
-        * commits or not; if not, the file will be dropped at abort time.
+        * Make an XLOG entry showing the file creation.  If we abort, the file
+        * will be dropped at abort time.
         */
        xlrec.rnode = reln->smgr_rnode;
 
@@ -358,7 +357,7 @@ smgrcreate(SMgrRelation reln, bool isTemp, bool isRedo)
        rdata.buffer = InvalidBuffer;
        rdata.next = NULL;
 
-       lsn = XLogInsert(RM_SMGR_ID, XLOG_SMGR_CREATE | XLOG_NO_TRAN, &rdata);
+       lsn = XLogInsert(RM_SMGR_ID, XLOG_SMGR_CREATE, &rdata);
 
        /* Add the relation to the list of stuff to delete at abort */
        pending = (PendingRelDelete *)
@@ -554,10 +553,7 @@ smgrtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp)
        if (!isTemp)
        {
                /*
-                * Make a non-transactional XLOG entry showing the file truncation.
-                * It's non-transactional because we should replay it whether the
-                * transaction commits or not; the underlying file change is certainly
-                * not reversible.
+                * Make an XLOG entry showing the file truncation.
                 */
                XLogRecPtr      lsn;
                XLogRecData rdata;
@@ -571,8 +567,7 @@ smgrtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp)
                rdata.buffer = InvalidBuffer;
                rdata.next = NULL;
 
-               lsn = XLogInsert(RM_SMGR_ID, XLOG_SMGR_TRUNCATE | XLOG_NO_TRAN,
-                                                &rdata);
+               lsn = XLogInsert(RM_SMGR_ID, XLOG_SMGR_TRUNCATE, &rdata);
        }
 }
 
@@ -679,11 +674,14 @@ smgrDoPendingDeletes(bool isCommit)
  * *ptr is set to point to a freshly-palloc'd array of RelFileNodes.
  * If there are no relations to be deleted, *ptr is set to NULL.
  *
+ * If haveNonTemp isn't NULL, the bool it points to gets set to true if
+ * there is any non-temp table pending to be deleted; false if not.
+ *
  * Note that the list does not include anything scheduled for termination
  * by upper-level transactions.
  */
 int
-smgrGetPendingDeletes(bool forCommit, RelFileNode **ptr)
+smgrGetPendingDeletes(bool forCommit, RelFileNode **ptr, bool *haveNonTemp)
 {
        int                     nestLevel = GetCurrentTransactionNestLevel();
        int                     nrels;
@@ -691,6 +689,8 @@ smgrGetPendingDeletes(bool forCommit, RelFileNode **ptr)
        PendingRelDelete *pending;
 
        nrels = 0;
+       if (haveNonTemp)
+               *haveNonTemp = false;
        for (pending = pendingDeletes; pending != NULL; pending = pending->next)
        {
                if (pending->nestLevel >= nestLevel && pending->atCommit == forCommit)
@@ -707,6 +707,8 @@ smgrGetPendingDeletes(bool forCommit, RelFileNode **ptr)
        {
                if (pending->nestLevel >= nestLevel && pending->atCommit == forCommit)
                        *rptr++ = pending->relnode;
+               if (haveNonTemp && !pending->isTemp)
+                       *haveNonTemp = true;
        }
        return nrels;
 }
index 2263a946039b7c3a47b211be415d23bbce36f811..e78d74f9efe0459865241c57f201ec950bf7a220 100644 (file)
@@ -6,7 +6,7 @@
  * Copyright (c) 2002-2007, PostgreSQL Global Development Group
  *
  * IDENTIFICATION
- *             $PostgreSQL: pgsql/src/backend/utils/adt/lockfuncs.c,v 1.28 2007/01/05 22:19:41 momjian Exp $
+ *             $PostgreSQL: pgsql/src/backend/utils/adt/lockfuncs.c,v 1.29 2007/09/05 18:10:48 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -27,6 +27,7 @@ static const char *const LockTagTypeNames[] = {
        "page",
        "tuple",
        "transactionid",
+       "virtualxid",
        "object",
        "userlock",
        "advisory"
@@ -39,6 +40,27 @@ typedef struct
        int                     currIdx;                /* current PROCLOCK index */
 } PG_Lock_Status;
 
+
+/*
+ * VXIDGetDatum - Construct a text representation of a VXID
+ *
+ * This is currently only used in pg_lock_status, so we put it here.
+ */
+static Datum
+VXIDGetDatum(BackendId bid, LocalTransactionId lxid)
+{
+       /*
+        * The representation is "<bid>/<lxid>", decimal and unsigned decimal
+        * respectively.  Note that elog.c also knows how to format a vxid.
+        */
+       char vxidstr[32];
+
+       snprintf(vxidstr, sizeof(vxidstr), "%d/%u", bid, lxid);
+
+       return DirectFunctionCall1(textin, CStringGetDatum(vxidstr));
+}
+
+
 /*
  * pg_lock_status - produce a view with one row per held or awaited lock mode
  */
@@ -64,7 +86,7 @@ pg_lock_status(PG_FUNCTION_ARGS)
 
                /* build tupdesc for result tuples */
                /* this had better match pg_locks view in system_views.sql */
-               tupdesc = CreateTemplateTupleDesc(13, false);
+               tupdesc = CreateTemplateTupleDesc(14, false);
                TupleDescInitEntry(tupdesc, (AttrNumber) 1, "locktype",
                                                   TEXTOID, -1, 0);
                TupleDescInitEntry(tupdesc, (AttrNumber) 2, "database",
@@ -75,21 +97,23 @@ pg_lock_status(PG_FUNCTION_ARGS)
                                                   INT4OID, -1, 0);
                TupleDescInitEntry(tupdesc, (AttrNumber) 5, "tuple",
                                                   INT2OID, -1, 0);
-               TupleDescInitEntry(tupdesc, (AttrNumber) 6, "transactionid",
+               TupleDescInitEntry(tupdesc, (AttrNumber) 6, "virtualxid",
+                                                  TEXTOID, -1, 0);
+               TupleDescInitEntry(tupdesc, (AttrNumber) 7, "transactionid",
                                                   XIDOID, -1, 0);
-               TupleDescInitEntry(tupdesc, (AttrNumber) 7, "classid",
+               TupleDescInitEntry(tupdesc, (AttrNumber) 8, "classid",
                                                   OIDOID, -1, 0);
-               TupleDescInitEntry(tupdesc, (AttrNumber) 8, "objid",
+               TupleDescInitEntry(tupdesc, (AttrNumber) 9, "objid",
                                                   OIDOID, -1, 0);
-               TupleDescInitEntry(tupdesc, (AttrNumber) 9, "objsubid",
+               TupleDescInitEntry(tupdesc, (AttrNumber) 10, "objsubid",
                                                   INT2OID, -1, 0);
-               TupleDescInitEntry(tupdesc, (AttrNumber) 10, "transaction",
-                                                  XIDOID, -1, 0);
-               TupleDescInitEntry(tupdesc, (AttrNumber) 11, "pid",
+               TupleDescInitEntry(tupdesc, (AttrNumber) 11, "virtualtransaction",
+                                                  TEXTOID, -1, 0);
+               TupleDescInitEntry(tupdesc, (AttrNumber) 12, "pid",
                                                   INT4OID, -1, 0);
-               TupleDescInitEntry(tupdesc, (AttrNumber) 12, "mode",
+               TupleDescInitEntry(tupdesc, (AttrNumber) 13, "mode",
                                                   TEXTOID, -1, 0);
-               TupleDescInitEntry(tupdesc, (AttrNumber) 13, "granted",
+               TupleDescInitEntry(tupdesc, (AttrNumber) 14, "granted",
                                                   BOOLOID, -1, 0);
 
                funcctx->tuple_desc = BlessTupleDesc(tupdesc);
@@ -120,8 +144,8 @@ pg_lock_status(PG_FUNCTION_ARGS)
                LOCKMODE        mode = 0;
                const char *locktypename;
                char            tnbuf[32];
-               Datum           values[13];
-               char            nulls[13];
+               Datum           values[14];
+               char            nulls[14];
                HeapTuple       tuple;
                Datum           result;
 
@@ -193,7 +217,6 @@ pg_lock_status(PG_FUNCTION_ARGS)
                values[0] = DirectFunctionCall1(textin,
                                                                                CStringGetDatum(locktypename));
 
-
                switch (lock->tag.locktag_type)
                {
                        case LOCKTAG_RELATION:
@@ -206,6 +229,7 @@ pg_lock_status(PG_FUNCTION_ARGS)
                                nulls[6] = 'n';
                                nulls[7] = 'n';
                                nulls[8] = 'n';
+                               nulls[9] = 'n';
                                break;
                        case LOCKTAG_PAGE:
                                values[1] = ObjectIdGetDatum(lock->tag.locktag_field1);
@@ -216,6 +240,7 @@ pg_lock_status(PG_FUNCTION_ARGS)
                                nulls[6] = 'n';
                                nulls[7] = 'n';
                                nulls[8] = 'n';
+                               nulls[9] = 'n';
                                break;
                        case LOCKTAG_TUPLE:
                                values[1] = ObjectIdGetDatum(lock->tag.locktag_field1);
@@ -226,9 +251,22 @@ pg_lock_status(PG_FUNCTION_ARGS)
                                nulls[6] = 'n';
                                nulls[7] = 'n';
                                nulls[8] = 'n';
+                               nulls[9] = 'n';
                                break;
                        case LOCKTAG_TRANSACTION:
-                               values[5] = TransactionIdGetDatum(lock->tag.locktag_field1);
+                               values[6] = TransactionIdGetDatum(lock->tag.locktag_field1);
+                               nulls[1] = 'n';
+                               nulls[2] = 'n';
+                               nulls[3] = 'n';
+                               nulls[4] = 'n';
+                               nulls[5] = 'n';
+                               nulls[7] = 'n';
+                               nulls[8] = 'n';
+                               nulls[9] = 'n';
+                               break;
+                       case LOCKTAG_VIRTUALTRANSACTION:
+                               values[5] = VXIDGetDatum(lock->tag.locktag_field1,
+                                                                                lock->tag.locktag_field2);
                                nulls[1] = 'n';
                                nulls[2] = 'n';
                                nulls[3] = 'n';
@@ -236,31 +274,33 @@ pg_lock_status(PG_FUNCTION_ARGS)
                                nulls[6] = 'n';
                                nulls[7] = 'n';
                                nulls[8] = 'n';
+                               nulls[9] = 'n';
                                break;
                        case LOCKTAG_OBJECT:
                        case LOCKTAG_USERLOCK:
                        case LOCKTAG_ADVISORY:
                        default:                        /* treat unknown locktags like OBJECT */
                                values[1] = ObjectIdGetDatum(lock->tag.locktag_field1);
-                               values[6] = ObjectIdGetDatum(lock->tag.locktag_field2);
-                               values[7] = ObjectIdGetDatum(lock->tag.locktag_field3);
-                               values[8] = Int16GetDatum(lock->tag.locktag_field4);
+                               values[7] = ObjectIdGetDatum(lock->tag.locktag_field2);
+                               values[8] = ObjectIdGetDatum(lock->tag.locktag_field3);
+                               values[9] = Int16GetDatum(lock->tag.locktag_field4);
                                nulls[2] = 'n';
                                nulls[3] = 'n';
                                nulls[4] = 'n';
                                nulls[5] = 'n';
+                               nulls[6] = 'n';
                                break;
                }
 
-               values[9] = TransactionIdGetDatum(proc->xid);
+               values[10] = VXIDGetDatum(proc->backendId, proc->lxid);
                if (proc->pid != 0)
-                       values[10] = Int32GetDatum(proc->pid);
+                       values[11] = Int32GetDatum(proc->pid);
                else
-                       nulls[10] = 'n';
-               values[11] = DirectFunctionCall1(textin,
+                       nulls[11] = 'n';
+               values[12] = DirectFunctionCall1(textin,
                                          CStringGetDatum(GetLockmodeName(LOCK_LOCKMETHOD(*lock),
                                                                                                          mode)));
-               values[12] = BoolGetDatum(granted);
+               values[13] = BoolGetDatum(granted);
 
                tuple = heap_formtuple(funcctx->tuple_desc, values, nulls);
                result = HeapTupleGetDatum(tuple);
index d0d024e075a4078dd70c679b64272b2408d0d6d5..e8a3ed3db0e06b7a74d083fd88ff403f5f5ae617 100644 (file)
@@ -42,7 +42,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/utils/error/elog.c,v 1.195 2007/08/23 01:24:43 adunstan Exp $
+ *       $PostgreSQL: pgsql/src/backend/utils/error/elog.c,v 1.196 2007/09/05 18:10:48 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -66,6 +66,7 @@
 #include "postmaster/postmaster.h"
 #include "postmaster/syslogger.h"
 #include "storage/ipc.h"
+#include "storage/proc.h"
 #include "tcop/tcopprot.h"
 #include "utils/memutils.h"
 #include "utils/ps_status.h"
@@ -1592,9 +1593,14 @@ log_line_prefix(StringInfo buf)
                                if (MyProcPort == NULL)
                                        i = format_len;
                                break;
+                       case 'v':
+                               /* keep VXID format in sync with lockfuncs.c */
+                               if (MyProc != NULL)
+                                       appendStringInfo(buf, "%d/%u",
+                                                                        MyProc->backendId, MyProc->lxid);
+                               break;
                        case 'x':
-                               if (MyProcPort)
-                                       appendStringInfo(buf, "%u", GetTopTransactionId());
+                               appendStringInfo(buf, "%u", GetTopTransactionIdIfAny());
                                break;
                        case '%':
                                appendStringInfoChar(buf, '%');
@@ -1785,15 +1791,8 @@ write_csvlog(ErrorData *edata)
        appendStringInfoString(&buf, formatted_start_time);
        appendStringInfoChar(&buf, ',');
 
-
        /* Transaction id */
-       if (MyProcPort)
-       {
-               if (IsTransactionState())
-                       appendStringInfo(&buf, "%u", GetTopTransactionId());
-               else
-                       appendStringInfo(&buf, "%u", InvalidTransactionId);
-       }
+       appendStringInfo(&buf, "%u", GetTopTransactionIdIfAny());
 
        appendStringInfoChar(&buf, ',');
 
index b22099c2fd7c88bf11586a9ee73fb0791adca5fe..7de3145aa01c59093d3efa3e1aaf98d720351bf7 100644 (file)
                                        #   %c = session id
                                        #   %l = session line number
                                        #   %s = session start timestamp
-                                       #   %x = transaction id
+                                       #   %v = virtual transaction id
+                                       #   %x = transaction id (0 if none)
                                        #   %q = stop here in non-session 
                                        #        processes
                                        #   %% = '%'
index e8e2b08de42b018ddf2399127f7c6fe657a12dc6..731269af9a073dee29720812b89abb4d70367888 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/access/xact.h,v 1.88 2007/08/01 22:45:09 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/access/xact.h,v 1.89 2007/09/05 18:10:48 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -139,6 +139,7 @@ typedef struct xl_xact_abort_prepared
 extern bool IsTransactionState(void);
 extern bool IsAbortedTransactionBlockState(void);
 extern TransactionId GetTopTransactionId(void);
+extern TransactionId GetTopTransactionIdIfAny(void);
 extern TransactionId GetCurrentTransactionId(void);
 extern TransactionId GetCurrentTransactionIdIfAny(void);
 extern SubTransactionId GetCurrentSubTransactionId(void);
index 2e1928dace06d5548ddcb643d484178574c02d47..372a43797a4f2f3347bd9b91c5b30a7276cd905c 100644 (file)
@@ -6,7 +6,7 @@
  * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/access/xlog.h,v 1.82 2007/08/01 22:45:09 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/access/xlog.h,v 1.83 2007/09/05 18:10:48 tgl Exp $
  */
 #ifndef XLOG_H
 #define XLOG_H
@@ -85,12 +85,6 @@ typedef struct XLogRecord
  */
 #define XLR_BKP_REMOVABLE              0x01
 
-/*
- * Sometimes we log records which are out of transaction control.
- * Rmgr may "or" XLOG_NO_TRAN into info passed to XLogInsert to indicate this.
- */
-#define XLOG_NO_TRAN                   XLR_INFO_MASK
-
 /* Sync methods */
 #define SYNC_METHOD_FSYNC              0
 #define SYNC_METHOD_FDATASYNC  1
@@ -139,10 +133,7 @@ typedef struct XLogRecData
 
 extern TimeLineID ThisTimeLineID;              /* current TLI */
 extern bool InRecovery;
-extern XLogRecPtr MyLastRecPtr;
-extern bool MyXactMadeXLogEntry;
-extern bool MyXactMadeTempRelUpdate;
-extern XLogRecPtr ProcLastRecEnd;
+extern XLogRecPtr XactLastRecEnd;
 
 /* these variables are GUC parameters related to XLOG */
 extern int     CheckPointSegments;
index 35e7bb9150fea278bb203a4df37585e30d00335e..d808609ab01786de4ddb6556a45a0ed2671affe1 100644 (file)
@@ -12,7 +12,7 @@
  * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/c.h,v 1.220 2007/07/25 12:22:52 mha Exp $
+ * $PostgreSQL: pgsql/src/include/c.h,v 1.221 2007/09/05 18:10:48 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -370,6 +370,8 @@ typedef regproc RegProcedure;
 
 typedef uint32 TransactionId;
 
+typedef uint32 LocalTransactionId;
+
 typedef uint32 SubTransactionId;
 
 #define InvalidSubTransactionId                ((SubTransactionId) 0)
index e229f161f945a9d68da7388cca2d1ce5af47fd7d..dcd9c90ecbbac2ed2ecb79d1934a5b680776a715 100644 (file)
@@ -37,7 +37,7 @@
  * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.422 2007/09/04 16:41:42 adunstan Exp $
+ * $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.423 2007/09/05 18:10:48 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -53,6 +53,6 @@
  */
 
 /*                                                     yyyymmddN */
-#define CATALOG_VERSION_NO     200709041
+#define CATALOG_VERSION_NO     200709042
 
 #endif
index 36474cd2781fce01139276d1df6c5662c768afe8..fedf6b1fffb3f36a98f1f0c7349509c04e426400 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/storage/lmgr.h,v 1.58 2007/06/19 20:13:22 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/storage/lmgr.h,v 1.59 2007/09/05 18:10:48 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -55,6 +55,11 @@ extern void XactLockTableDelete(TransactionId xid);
 extern void XactLockTableWait(TransactionId xid);
 extern bool ConditionalXactLockTableWait(TransactionId xid);
 
+/* Lock a VXID (used to wait for a transaction to finish) */
+extern void VirtualXactLockTableInsert(VirtualTransactionId vxid);
+extern void VirtualXactLockTableWait(VirtualTransactionId vxid);
+extern bool ConditionalVirtualXactLockTableWait(VirtualTransactionId vxid);
+
 /* Lock a general object (other than a relation) of the current database */
 extern void LockDatabaseObject(Oid classid, Oid objid, uint16 objsubid,
                                   LOCKMODE lockmode);
index e2a5bc7b6f54e4bd94c521ae11182ec53a29d413..30c8a3fa2bc208f71a59f354651772082450fa29 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/storage/lock.h,v 1.106 2007/06/19 20:13:22 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/storage/lock.h,v 1.107 2007/09/05 18:10:48 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -15,6 +15,7 @@
 #define LOCK_H_
 
 #include "nodes/pg_list.h"
+#include "storage/backendid.h"
 #include "storage/itemptr.h"
 #include "storage/lwlock.h"
 #include "storage/shmem.h"
@@ -41,6 +42,37 @@ extern bool Debug_deadlocks;
 #endif   /* LOCK_DEBUG */
 
 
+/*
+ * Top-level transactions are identified by VirtualTransactionIDs comprising
+ * the BackendId of the backend running the xact, plus a locally-assigned
+ * LocalTransactionId.  These are guaranteed unique over the short term,
+ * but will be reused after a database restart; hence they should never
+ * be stored on disk.
+ *
+ * Note that struct VirtualTransactionId can not be assumed to be atomically
+ * assignable as a whole.  However, type LocalTransactionId is assumed to
+ * be atomically assignable, and the backend ID doesn't change often enough
+ * to be a problem, so we can fetch or assign the two fields separately.
+ * We deliberately refrain from using the struct within PGPROC, to prevent
+ * coding errors from trying to use struct assignment with it; instead use
+ * GET_VXID_FROM_PGPROC().
+ */
+typedef struct
+{
+       BackendId       backendId;              /* determined at backend startup */
+       LocalTransactionId localTransactionId;  /* backend-local transaction id */
+} VirtualTransactionId;
+
+#define InvalidLocalTransactionId              0
+#define LocalTransactionIdIsValid(lxid)        ((lxid) != InvalidLocalTransactionId)
+#define VirtualTransactionIdIsValid(vxid) \
+       (((vxid).backendId != InvalidBackendId) && \
+        LocalTransactionIdIsValid((vxid).localTransactionId))
+#define GET_VXID_FROM_PGPROC(vxid, proc) \
+       ((vxid).backendId = (proc).backendId, \
+        (vxid).localTransactionId = (proc).lxid)
+
+
 /*
  * LOCKMODE is an integer (1..N) indicating a lock type.  LOCKMASK is a bit
  * mask indicating a set of held or requested lock types (the bit 1<<mode
@@ -139,6 +171,8 @@ typedef enum LockTagType
        /* ID info for a tuple is PAGE info + OffsetNumber */
        LOCKTAG_TRANSACTION,            /* transaction (for waiting for xact done) */
        /* ID info for a transaction is its TransactionId */
+       LOCKTAG_VIRTUALTRANSACTION,     /* virtual transaction (ditto) */
+       /* ID info for a virtual transaction is its VirtualTransactionId */
        LOCKTAG_OBJECT,                         /* non-relation database object */
        /* ID info for an object is DB OID + CLASS OID + OBJECT OID + SUBID */
 
@@ -214,6 +248,14 @@ typedef struct LOCKTAG
         (locktag).locktag_type = LOCKTAG_TRANSACTION, \
         (locktag).locktag_lockmethodid = DEFAULT_LOCKMETHOD)
 
+#define SET_LOCKTAG_VIRTUALTRANSACTION(locktag,vxid) \
+       ((locktag).locktag_field1 = (vxid).backendId, \
+        (locktag).locktag_field2 = (vxid).localTransactionId, \
+        (locktag).locktag_field3 = 0, \
+        (locktag).locktag_field4 = 0, \
+        (locktag).locktag_type = LOCKTAG_VIRTUALTRANSACTION, \
+        (locktag).locktag_lockmethodid = DEFAULT_LOCKMETHOD)
+
 #define SET_LOCKTAG_OBJECT(locktag,dboid,classoid,objoid,objsubid) \
        ((locktag).locktag_field1 = (dboid), \
         (locktag).locktag_field2 = (classoid), \
@@ -431,7 +473,8 @@ extern bool LockRelease(const LOCKTAG *locktag,
 extern void LockReleaseAll(LOCKMETHODID lockmethodid, bool allLocks);
 extern void LockReleaseCurrentOwner(void);
 extern void LockReassignCurrentOwner(void);
-extern List *GetLockConflicts(const LOCKTAG *locktag, LOCKMODE lockmode);
+extern VirtualTransactionId *GetLockConflicts(const LOCKTAG *locktag,
+                                                                                         LOCKMODE lockmode);
 extern void AtPrepare_Locks(void);
 extern void PostPrepare_Locks(TransactionId xid);
 extern int LockCheckConflicts(LockMethod lockMethodTable,
index 756b0ffb0e7dd60e6dc8eb60bd860ecfd473e959..9fefa0a5a93e5bd2dfe4505caca7935e8d8cc222 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/storage/proc.h,v 1.99 2007/07/25 12:22:53 mha Exp $
+ * $PostgreSQL: pgsql/src/include/storage/proc.h,v 1.100 2007/09/05 18:10:48 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -62,8 +62,13 @@ struct PGPROC
        PGSemaphoreData sem;            /* ONE semaphore to sleep on */
        int                     waitStatus;             /* STATUS_WAITING, STATUS_OK or STATUS_ERROR */
 
-       TransactionId xid;                      /* transaction currently being executed by
-                                                                * this proc */
+       LocalTransactionId lxid;        /* local id of top-level transaction currently
+                                                                * being executed by this proc, if running;
+                                                                * else InvalidLocalTransactionId */
+
+       TransactionId xid;                      /* id of top-level transaction currently being
+                                                                * executed by this proc, if running and XID
+                                                                * is assigned; else InvalidTransactionId */
 
        TransactionId xmin;                     /* minimal running XID as it was when we were
                                                                 * starting our xact, excluding LAZY VACUUM:
@@ -71,6 +76,7 @@ struct PGPROC
                                                                 * xid >= xmin ! */
 
        int                     pid;                    /* This backend's process id, or 0 */
+       BackendId       backendId;              /* This backend's backend ID (if assigned) */
        Oid                     databaseId;             /* OID of database this backend is using */
        Oid                     roleId;                 /* OID of role using this backend */
 
index dafb83a9658f9503d4de39ac1543592b5ad47f81..524710506a7dfc78ad1d21f8990b54a1b3249ccd 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/storage/procarray.h,v 1.14 2007/06/01 19:38:07 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/storage/procarray.h,v 1.15 2007/09/05 18:10:48 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -33,6 +33,7 @@ extern PGPROC *BackendPidGetProc(int pid);
 extern int     BackendXidGetPid(TransactionId xid);
 extern bool IsBackendPid(int pid);
 
+extern VirtualTransactionId *GetCurrentVirtualXIDs(TransactionId limitXmin);
 extern int     CountActiveBackends(void);
 extern int     CountDBBackends(Oid databaseid);
 extern int     CountUserBackends(Oid roleid);
index 778d7a4a2ba77bddae8288753027842c1a6eaf5d..ff0a68e25a2c9f052f98b0deb139d61295ca54a7 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/storage/sinvaladt.h,v 1.42 2007/01/05 22:19:58 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/storage/sinvaladt.h,v 1.43 2007/09/05 18:10:48 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -85,6 +85,13 @@ typedef struct SISeg
        int                     maxBackends;    /* size of procState array */
        int                     freeBackends;   /* number of empty procState slots */
 
+       /*
+        * Next LocalTransactionId to use for each idle backend slot.  We keep
+        * this here because it is indexed by BackendId and it is convenient to
+        * copy the value to and from local memory when MyBackendId is set.
+        */
+       LocalTransactionId *nextLXID; /* array of maxBackends entries */
+
        /*
         * Circular buffer holding shared-inval messages
         */
@@ -114,4 +121,6 @@ extern int SIGetDataEntry(SISeg *segP, int backendId,
                           SharedInvalidationMessage *data);
 extern void SIDelExpiredDataEntries(SISeg *segP);
 
+extern LocalTransactionId GetNextLocalTransactionId(void);
+
 #endif   /* SINVALADT_H */
index 3beb14febaf13bca85a620287a921167942efa55..bc071e7ef052d85c8aed73ac8c288c89800e0fd3 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/storage/smgr.h,v 1.58 2007/01/17 16:25:01 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/storage/smgr.h,v 1.59 2007/09/05 18:10:48 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -76,7 +76,8 @@ extern void smgrtruncate(SMgrRelation reln, BlockNumber nblocks,
                         bool isTemp);
 extern void smgrimmedsync(SMgrRelation reln);
 extern void smgrDoPendingDeletes(bool isCommit);
-extern int     smgrGetPendingDeletes(bool forCommit, RelFileNode **ptr);
+extern int     smgrGetPendingDeletes(bool forCommit, RelFileNode **ptr,
+                                                                 bool *haveNonTemp);
 extern void AtSubCommit_smgr(void);
 extern void AtSubAbort_smgr(void);
 extern void PostPrepare_smgr(void);
index 39e27a749241f29766b99f5e9978fab37608d2cd..ebbf8d1626210d9e9e86e5c8f2432e4900764fb7 100644 (file)
@@ -1282,7 +1282,7 @@ SELECT viewname, definition FROM pg_views WHERE schemaname <> 'information_schem
  pg_cursors               | SELECT c.name, c.statement, c.is_holdable, c.is_binary, c.is_scrollable, c.creation_time FROM pg_cursor() c(name text, statement text, is_holdable boolean, is_binary boolean, is_scrollable boolean, creation_time timestamp with time zone);
  pg_group                 | SELECT pg_authid.rolname AS groname, pg_authid.oid AS grosysid, ARRAY(SELECT pg_auth_members.member FROM pg_auth_members WHERE (pg_auth_members.roleid = pg_authid.oid)) AS grolist FROM pg_authid WHERE (NOT pg_authid.rolcanlogin);
  pg_indexes               | SELECT n.nspname AS schemaname, c.relname AS tablename, i.relname AS indexname, t.spcname AS tablespace, pg_get_indexdef(i.oid) AS indexdef FROM ((((pg_index x JOIN pg_class c ON ((c.oid = x.indrelid))) JOIN pg_class i ON ((i.oid = x.indexrelid))) LEFT JOIN pg_namespace n ON ((n.oid = c.relnamespace))) LEFT JOIN pg_tablespace t ON ((t.oid = i.reltablespace))) WHERE ((c.relkind = 'r'::"char") AND (i.relkind = 'i'::"char"));
- pg_locks                 | SELECT l.locktype, l.database, l.relation, l.page, l.tuple, l.transactionid, l.classid, l.objid, l.objsubid, l.transaction, l.pid, l.mode, l.granted FROM pg_lock_status() l(locktype text, database oid, relation oid, page integer, tuple smallint, transactionid xid, classid oid, objid oid, objsubid smallint, transaction xid, pid integer, mode text, granted boolean);
+ pg_locks                 | SELECT l.locktype, l.database, l.relation, l.page, l.tuple, l.virtualxid, l.transactionid, l.classid, l.objid, l.objsubid, l.virtualtransaction, l.pid, l.mode, l.granted FROM pg_lock_status() l(locktype text, database oid, relation oid, page integer, tuple smallint, virtualxid text, transactionid xid, classid oid, objid oid, objsubid smallint, virtualtransaction text, pid integer, mode text, granted boolean);
  pg_prepared_statements   | SELECT p.name, p.statement, p.prepare_time, p.parameter_types, p.from_sql FROM pg_prepared_statement() p(name text, statement text, prepare_time timestamp with time zone, parameter_types regtype[], from_sql boolean);
  pg_prepared_xacts        | SELECT p.transaction, p.gid, p.prepared, u.rolname AS owner, d.datname AS database FROM ((pg_prepared_xact() p(transaction xid, gid text, prepared timestamp with time zone, ownerid oid, dbid oid) LEFT JOIN pg_authid u ON ((p.ownerid = u.oid))) LEFT JOIN pg_database d ON ((p.dbid = d.oid)));
  pg_roles                 | SELECT pg_authid.rolname, pg_authid.rolsuper, pg_authid.rolinherit, pg_authid.rolcreaterole, pg_authid.rolcreatedb, pg_authid.rolcatupdate, pg_authid.rolcanlogin, pg_authid.rolconnlimit, '********'::text AS rolpassword, pg_authid.rolvaliduntil, pg_authid.rolconfig, pg_authid.oid FROM pg_authid;