]> git.ipfire.org Git - thirdparty/postgresql.git/commitdiff
Fix WAL-logging of FSM and VM truncation.
authorHeikki Linnakangas <heikki.linnakangas@iki.fi>
Wed, 19 Oct 2016 11:43:34 +0000 (14:43 +0300)
committerHeikki Linnakangas <heikki.linnakangas@iki.fi>
Wed, 19 Oct 2016 12:00:06 +0000 (15:00 +0300)
When a relation is truncated, it is important that the FSM is truncated as
well. Otherwise, after recovery, the FSM can return a page that has been
truncated away, leading to errors like:

ERROR:  could not read block 28991 in file "base/16390/572026": read only 0
of 8192 bytes

We were using MarkBufferDirtyHint() to dirty the buffer holding the last
remaining page of the FSM, but during recovery, that might in fact not
dirty the page, and the FSM update might be lost.

To fix, use the stronger MarkBufferDirty() function. MarkBufferDirty()
requires us to do WAL-logging ourselves, to protect from a torn page, if
checksumming is enabled.

Also fix an oversight in visibilitymap_truncate: it also needs to WAL-log
when checksumming is enabled.

Analysis by Pavan Deolasee.

Discussion: <CABOikdNr5vKucqyZH9s1Mh0XebLs_jRhKv6eJfNnD2wxTn=_9A@mail.gmail.com>

Backpatch to 9.3, where we got data checksums.

src/backend/access/heap/visibilitymap.c
src/backend/storage/freespace/freespace.c

index 7c387720d3b5f104bc3e4ae0557b391fec4f61a7..df8fb498403151f2f5bf2b8003b4d1e50791919c 100644 (file)
@@ -474,6 +474,9 @@ visibilitymap_truncate(Relation rel, BlockNumber nheapblocks)
 
                LockBuffer(mapBuffer, BUFFER_LOCK_EXCLUSIVE);
 
+               /* NO EREPORT(ERROR) from here till changes are logged */
+               START_CRIT_SECTION();
+
                /* Clear out the unwanted bytes. */
                MemSet(&map[truncByte + 1], 0, MAPSIZE - (truncByte + 1));
 
@@ -489,7 +492,20 @@ visibilitymap_truncate(Relation rel, BlockNumber nheapblocks)
                 */
                map[truncByte] &= (1 << truncBit) - 1;
 
+               /*
+                * Truncation of a relation is WAL-logged at a higher-level, and we
+                * will be called at WAL replay. But if checksums are enabled, we need
+                * to still write a WAL record to protect against a torn page, if the
+                * page is flushed to disk before the truncation WAL record. We cannot
+                * use MarkBufferDirtyHint here, because that will not dirty the page
+                * during recovery.
+                */
                MarkBufferDirty(mapBuffer);
+               if (!InRecovery && RelationNeedsWAL(rel) && XLogHintBitIsNeeded())
+                       log_newpage_buffer(mapBuffer, false);
+
+               END_CRIT_SECTION();
+
                UnlockReleaseBuffer(mapBuffer);
        }
        else
index fddb47ce85e893aaffd1b94f5ca9bcef14e106a6..55dabfec184720ede968965aaafc8962bfdfb1b5 100644 (file)
@@ -24,6 +24,7 @@
 #include "postgres.h"
 
 #include "access/htup_details.h"
+#include "access/xlog.h"
 #include "access/xlogutils.h"
 #include "miscadmin.h"
 #include "storage/freespace.h"
@@ -285,8 +286,26 @@ FreeSpaceMapTruncateRel(Relation rel, BlockNumber nblocks)
                if (!BufferIsValid(buf))
                        return;                         /* nothing to do; the FSM was already smaller */
                LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
+
+               /* NO EREPORT(ERROR) from here till changes are logged */
+               START_CRIT_SECTION();
+
                fsm_truncate_avail(BufferGetPage(buf), first_removed_slot);
-               MarkBufferDirtyHint(buf, false);
+
+               /*
+                * Truncation of a relation is WAL-logged at a higher-level, and we
+                * will be called at WAL replay. But if checksums are enabled, we need
+                * to still write a WAL record to protect against a torn page, if the
+                * page is flushed to disk before the truncation WAL record. We cannot
+                * use MarkBufferDirtyHint here, because that will not dirty the page
+                * during recovery.
+                */
+               MarkBufferDirty(buf);
+               if (!InRecovery && RelationNeedsWAL(rel) && XLogHintBitIsNeeded())
+                       log_newpage_buffer(buf, false);
+
+               END_CRIT_SECTION();
+
                UnlockReleaseBuffer(buf);
 
                new_nfsmblocks = fsm_logical_to_physical(first_removed_address) + 1;