]> git.ipfire.org Git - thirdparty/postgresql.git/commitdiff
Fix failure to guarantee that a checkpoint will write out pg_clog updates
authorTom Lane <tgl@sss.pgh.pa.us>
Wed, 11 Aug 2004 04:08:02 +0000 (04:08 +0000)
committerTom Lane <tgl@sss.pgh.pa.us>
Wed, 11 Aug 2004 04:08:02 +0000 (04:08 +0000)
for transaction commits that occurred just before the checkpoint.  This is
an EXTREMELY serious bug --- kudos to Satoshi Okada for creating a
reproducible test case to prove its existence.

src/backend/access/transam/xact.c
src/backend/access/transam/xlog.c
src/include/storage/lwlock.h

index e6767d4c4b1829095347e5fd1c89047708d59c7f..3409de03a5642482a7ba019f187105ce6731b877 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/access/transam/xact.c,v 1.156 2003/10/16 16:50:41 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/access/transam/xact.c,v 1.156.2.1 2004/08/11 04:08:00 tgl Exp $
  *
  * NOTES
  *             Transaction aborts can now occur two ways:
@@ -503,6 +503,7 @@ RecordTransactionCommit(void)
        if (MyXactMadeXLogEntry || MyXactMadeTempRelUpdate)
        {
                TransactionId xid = GetCurrentTransactionId();
+               bool            madeTCentries;
                XLogRecPtr      recptr;
 
                /* Tell bufmgr and smgr to prepare for commit */
@@ -511,12 +512,29 @@ RecordTransactionCommit(void)
                START_CRIT_SECTION();
 
                /*
-                * We only need to log the commit in xlog if the transaction made
-                * any transaction-controlled XLOG entries.  (Otherwise, its XID
-                * appears nowhere in permanent storage, so no one else will ever
-                * care if it committed.)
+                * If our transaction made any transaction-controlled XLOG entries,
+                * we need to lock out checkpoint start between writing our XLOG
+                * record and updating pg_clog.  Otherwise it is possible for the
+                * checkpoint to set REDO after the XLOG record but fail to flush the
+                * pg_clog update to disk, leading to loss of the transaction commit
+                * if we crash a little later.  Slightly klugy fix for problem
+                * discovered 2004-08-10.
+                *
+                * (If it made no transaction-controlled XLOG entries, its XID
+                * appears nowhere in permanent storage, so no one else will ever care
+                * if it committed; so it doesn't matter if we lose the commit flag.)
+                *
+                * Note we only need a shared lock.
                 */
-               if (MyLastRecPtr.xrecoff != 0)
+               madeTCentries = (MyLastRecPtr.xrecoff != 0);
+               if (madeTCentries)
+                       LWLockAcquire(CheckpointStartLock, LW_SHARED);
+
+               /*
+                * We only need to log the commit in XLOG if the transaction made
+                * any transaction-controlled XLOG entries.
+                */
+               if (madeTCentries)
                {
                        /* Need to emit a commit record */
                        XLogRecData rdata;
@@ -585,6 +603,10 @@ RecordTransactionCommit(void)
                if (MyLastRecPtr.xrecoff != 0 || MyXactMadeTempRelUpdate)
                        TransactionIdCommit(xid);
 
+               /* Unlock checkpoint lock if we acquired it */
+               if (madeTCentries)
+                       LWLockRelease(CheckpointStartLock);
+
                END_CRIT_SECTION();
        }
 
@@ -698,6 +720,8 @@ RecordTransactionAbort(void)
                 * care if it committed.)  We do not flush XLOG to disk in any
                 * case, since the default assumption after a crash would be that
                 * we aborted, anyway.
+                * For the same reason, we don't need to worry about interlocking
+                * against checkpoint start.
                 */
                if (MyLastRecPtr.xrecoff != 0)
                {
index 8eb154f7bab5f0a0a0ffcdd23f5bb74b49e7cf10..a8acf758a8c3f73e4d71bf7122492d8d82232b3b 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Header: /cvsroot/pgsql/src/backend/access/transam/xlog.c,v 1.125.2.1 2004/02/23 23:03:43 tgl Exp $
+ * $Header: /cvsroot/pgsql/src/backend/access/transam/xlog.c,v 1.125.2.2 2004/08/11 04:08:00 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -3159,6 +3159,15 @@ CreateCheckPoint(bool shutdown, bool force)
        checkPoint.ThisStartUpID = ThisStartUpID;
        checkPoint.time = time(NULL);
 
+       /*
+        * We must hold CheckpointStartLock while determining the checkpoint
+        * REDO pointer.  This ensures that any concurrent transaction commits
+        * will be either not yet logged, or logged and recorded in pg_clog.
+        * See notes in RecordTransactionCommit().
+        */
+       LWLockAcquire(CheckpointStartLock, LW_EXCLUSIVE);
+
+       /* And we need WALInsertLock too */
        LWLockAcquire(WALInsertLock, LW_EXCLUSIVE);
 
        /*
@@ -3191,6 +3200,7 @@ CreateCheckPoint(bool shutdown, bool force)
                        ControlFile->checkPointCopy.redo.xrecoff)
                {
                        LWLockRelease(WALInsertLock);
+                       LWLockRelease(CheckpointStartLock);
                        LWLockRelease(CheckpointLock);
                        END_CRIT_SECTION();
                        return;
@@ -3258,11 +3268,13 @@ CreateCheckPoint(bool shutdown, bool force)
 #endif
 
        /*
-        * Now we can release insert lock, allowing other xacts to proceed
-        * even while we are flushing disk buffers.
+        * Now we can release insert lock and checkpoint start lock, allowing
+        * other xacts to proceed even while we are flushing disk buffers.
         */
        LWLockRelease(WALInsertLock);
 
+       LWLockRelease(CheckpointStartLock);
+
        /*
         * Get the other info we need for the checkpoint record.
         */
index 7b2a4c92154916e1f01f8b26696d32bb7efd840d..6f61eb20fdff316bf2012fa073be0f568f5d7091 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: lwlock.h,v 1.8 2003/08/04 02:40:14 momjian Exp $
+ * $Id: lwlock.h,v 1.8.4.1 2004/08/11 04:08:02 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -37,6 +37,7 @@ typedef enum LWLockId
        WALWriteLock,
        ControlFileLock,
        CheckpointLock,
+       CheckpointStartLock,
        RelCacheInitLock,
 
        NumFixedLWLocks,                        /* must be last except for