]> git.ipfire.org Git - thirdparty/postgresql.git/commitdiff
Revert "WAL-log inplace update before revealing it to other sessions."
authorNoah Misch <noah@leadboat.com>
Sat, 2 Nov 2024 16:04:59 +0000 (09:04 -0700)
committerNoah Misch <noah@leadboat.com>
Sat, 2 Nov 2024 16:05:08 +0000 (09:05 -0700)
This reverts commit bfd5c6e279c8e1702eea882439dc7ebdf4d4b3a5 (v17) and
counterparts in each other non-master branch.  This unblocks reverting a
commit on which it depends.

Discussion: https://postgr.es/m/10ec0bc3-5933-1189-6bb8-5dec4114558e@gmail.com

src/backend/access/heap/README.tuplock
src/backend/access/heap/heapam.c
src/backend/access/transam/xloginsert.c

index 31c52ad28f9d6b01ecebaf3ea499c2033ddeda4c..818cd7f980601926ae98293dbcfbb6dd99a14172 100644 (file)
@@ -203,4 +203,6 @@ Inplace updates create an exception to the rule that tuple data won't change
 under a reader holding a pin.  A reader of a heap_fetch() result tuple may
 witness a torn read.  Current inplace-updated fields are aligned and are no
 wider than four bytes, and current readers don't need consistency across
-fields.  Hence, they get by with just fetching each field once.
+fields.  Hence, they get by with just fetching each field once.  XXX such a
+caller may also read a value that has not reached WAL; see
+systable_inplace_update_finish().
index 236f5e78fef4e8aa80837c12bbe1dc9de7745c77..36d1ae38fe6e7e0ad4c11f22b8be7def6d5e2992 100644 (file)
@@ -6139,8 +6139,6 @@ heap_inplace_update_and_unlock(Relation relation,
        HeapTupleHeader htup = oldtup->t_data;
        uint32          oldlen;
        uint32          newlen;
-       char       *dst;
-       char       *src;
 
        Assert(ItemPointerEquals(&oldtup->t_self, &tuple->t_self));
        oldlen = oldtup->t_len - htup->t_hoff;
@@ -6148,9 +6146,6 @@ heap_inplace_update_and_unlock(Relation relation,
        if (oldlen != newlen || htup->t_hoff != tuple->t_data->t_hoff)
                elog(ERROR, "wrong tuple length");
 
-       dst = (char *) htup + htup->t_hoff;
-       src = (char *) tuple->t_data + tuple->t_data->t_hoff;
-
        /*
         * Construct shared cache inval if necessary.  Note that because we only
         * pass the new version of the tuple, this mustn't be used for any
@@ -6169,15 +6164,15 @@ heap_inplace_update_and_unlock(Relation relation,
         */
        PreInplace_Inval();
 
+       /* NO EREPORT(ERROR) from here till changes are logged */
+       START_CRIT_SECTION();
+
+       memcpy((char *) htup + htup->t_hoff,
+                  (char *) tuple->t_data + tuple->t_data->t_hoff,
+                  newlen);
+
        /*----------
-        * NO EREPORT(ERROR) from here till changes are complete
-        *
-        * Our buffer lock won't stop a reader having already pinned and checked
-        * visibility for this tuple.  Hence, we write WAL first, then mutate the
-        * buffer.  Like in MarkBufferDirtyHint() or RecordTransactionCommit(),
-        * checkpoint delay makes that acceptable.  With the usual order of
-        * changes, a crash after memcpy() and before XLogInsert() could allow
-        * datfrozenxid to overtake relfrozenxid:
+        * XXX A crash here can allow datfrozenxid() to get ahead of relfrozenxid:
         *
         * ["D" is a VACUUM (ONLY_DATABASE_STATS)]
         * ["R" is a VACUUM tbl]
@@ -6187,28 +6182,14 @@ heap_inplace_update_and_unlock(Relation relation,
         * D: raise pg_database.datfrozenxid, XLogInsert(), finish
         * [crash]
         * [recovery restores datfrozenxid w/o relfrozenxid]
-        *
-        * Like in MarkBufferDirtyHint() subroutine XLogSaveBufferForHint(), copy
-        * the buffer to the stack before logging.  Here, that facilitates a FPI
-        * of the post-mutation block before we accept other sessions seeing it.
         */
-       Assert(!MyPgXact->delayChkpt);
-       START_CRIT_SECTION();
-       MyPgXact->delayChkpt = true;
+
+       MarkBufferDirty(buffer);
 
        /* XLOG stuff */
        if (RelationNeedsWAL(relation))
        {
                xl_heap_inplace xlrec;
-               PGAlignedBlock copied_buffer;
-               char       *origdata = (char *) BufferGetBlock(buffer);
-               Page            page = BufferGetPage(buffer);
-               uint16          lower = ((PageHeader) page)->pd_lower;
-               uint16          upper = ((PageHeader) page)->pd_upper;
-               uintptr_t       dst_offset_in_block;
-               RelFileNode     rnode;
-               ForkNumber      forkno;
-               BlockNumber blkno;
                XLogRecPtr      recptr;
 
                xlrec.offnum = ItemPointerGetOffsetNumber(&tuple->t_self);
@@ -6216,28 +6197,16 @@ heap_inplace_update_and_unlock(Relation relation,
                XLogBeginInsert();
                XLogRegisterData((char *) &xlrec, SizeOfHeapInplace);
 
-               /* register block matching what buffer will look like after changes */
-               memcpy(copied_buffer.data, origdata, lower);
-               memcpy(copied_buffer.data + upper, origdata + upper, BLCKSZ - upper);
-               dst_offset_in_block = dst - origdata;
-               memcpy(copied_buffer.data + dst_offset_in_block, src, newlen);
-               BufferGetTag(buffer, &rnode, &forkno, &blkno);
-               Assert(forkno == MAIN_FORKNUM);
-               XLogRegisterBlock(0, &rnode, forkno, blkno, copied_buffer.data,
-                                                 REGBUF_STANDARD);
-               XLogRegisterBufData(0, src, newlen);
+               XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
+               XLogRegisterBufData(0, (char *) htup + htup->t_hoff, newlen);
 
                /* inplace updates aren't decoded atm, don't log the origin */
 
                recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_INPLACE);
 
-               PageSetLSN(page, recptr);
+               PageSetLSN(BufferGetPage(buffer), recptr);
        }
 
-       memcpy(dst, src, newlen);
-
-       MarkBufferDirty(buffer);
-
        LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
 
        /*
@@ -6250,7 +6219,6 @@ heap_inplace_update_and_unlock(Relation relation,
         */
        AtInplace_Inval();
 
-       MyPgXact->delayChkpt = false;
        END_CRIT_SECTION();
        UnlockTuple(relation, &tuple->t_self, InplaceUpdateTupleLock);
 
index 1993088a672ff0a58f3af6531fa095d79b2bce27..78a95344f91a56bc0278385246d52bf0e2d7f8b8 100644 (file)
@@ -268,6 +268,8 @@ XLogRegisterBlock(uint8 block_id, RelFileNode *rnode, ForkNumber forknum,
 {
        registered_buffer *regbuf;
 
+       /* This is currently only used to WAL-log a full-page image of a page */
+       Assert(flags & REGBUF_FORCE_IMAGE);
        Assert(begininsert_called);
 
        if (block_id >= max_registered_block_id)