]> git.ipfire.org Git - thirdparty/postgresql.git/commitdiff
At update of non-LP_NORMAL TID, fail instead of corrupting page header.
authorNoah Misch <noah@leadboat.com>
Sat, 25 Jan 2025 19:28:14 +0000 (11:28 -0800)
committerNoah Misch <noah@leadboat.com>
Sat, 25 Jan 2025 19:28:14 +0000 (11:28 -0800)
The right mix of DDL and VACUUM could corrupt a catalog page header such
that PageIsVerified() durably fails, requiring a restore from backup.
This affects only catalogs that both have a syscache and have DDL code
that uses syscache tuples to construct updates.  One of the test
permutations shows a variant not yet fixed.

This makes !TransactionIdIsValid(TM_FailureData.xmax) possible with
TM_Deleted.  I think core and PGXN are indifferent to that.

Per bug #17821 from Alexander Lakhin.  Back-patch to v13 (all supported
versions).  The test case is v17+, since it uses INJECTION_POINT.

Discussion: https://postgr.es/m/17821-dd8c334263399284@postgresql.org

src/backend/access/heap/heapam.c
src/backend/utils/cache/inval.c
src/include/access/tableam.h
src/test/modules/injection_points/Makefile
src/test/modules/injection_points/expected/syscache-update-pruned.out [new file with mode: 0644]
src/test/modules/injection_points/expected/syscache-update-pruned_1.out [new file with mode: 0644]
src/test/modules/injection_points/injection_points--1.0.sql
src/test/modules/injection_points/meson.build
src/test/modules/injection_points/regress_injection.c [new file with mode: 0644]
src/test/modules/injection_points/specs/syscache-update-pruned.spec [new file with mode: 0644]

index d0515a180d68ad8187cf3011438c7efcd56fda23..ea0a12b39af72a18f7754e0b631802e2244d1a66 100644 (file)
 #include "storage/predicate.h"
 #include "storage/procarray.h"
 #include "utils/datum.h"
+#include "utils/injection_point.h"
 #include "utils/inval.h"
 #include "utils/spccache.h"
+#include "utils/syscache.h"
 
 
 static HeapTuple heap_prepare_insert(Relation relation, HeapTuple tup,
@@ -3254,6 +3256,7 @@ heap_update(Relation relation, ItemPointer otid, HeapTuple newtup,
        interesting_attrs = bms_add_members(interesting_attrs, id_attrs);
 
        block = ItemPointerGetBlockNumber(otid);
+       INJECTION_POINT("heap_update-before-pin");
        buffer = ReadBuffer(relation, block);
        page = BufferGetPage(buffer);
 
@@ -3269,7 +3272,51 @@ heap_update(Relation relation, ItemPointer otid, HeapTuple newtup,
        LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
 
        lp = PageGetItemId(page, ItemPointerGetOffsetNumber(otid));
-       Assert(ItemIdIsNormal(lp));
+
+       /*
+        * Usually, a buffer pin and/or snapshot blocks pruning of otid, ensuring
+        * we see LP_NORMAL here.  When the otid origin is a syscache, we may have
+        * neither a pin nor a snapshot.  Hence, we may see other LP_ states, each
+        * of which indicates concurrent pruning.
+        *
+        * Failing with TM_Updated would be most accurate.  However, unlike other
+        * TM_Updated scenarios, we don't know the successor ctid in LP_UNUSED and
+        * LP_DEAD cases.  While the distinction between TM_Updated and TM_Deleted
+        * does matter to SQL statements UPDATE and MERGE, those SQL statements
+        * hold a snapshot that ensures LP_NORMAL.  Hence, the choice between
+        * TM_Updated and TM_Deleted affects only the wording of error messages.
+        * Settle on TM_Deleted, for two reasons.  First, it avoids complicating
+        * the specification of when tmfd->ctid is valid.  Second, it creates
+        * error log evidence that we took this branch.
+        *
+        * Since it's possible to see LP_UNUSED at otid, it's also possible to see
+        * LP_NORMAL for a tuple that replaced LP_UNUSED.  If it's a tuple for an
+        * unrelated row, we'll fail with "duplicate key value violates unique".
+        * XXX if otid is the live, newer version of the newtup row, we'll discard
+        * changes originating in versions of this catalog row after the version
+        * the caller got from syscache.  See syscache-update-pruned.spec.
+        */
+       if (!ItemIdIsNormal(lp))
+       {
+               Assert(RelationSupportsSysCache(RelationGetRelid(relation)));
+
+               UnlockReleaseBuffer(buffer);
+               Assert(!have_tuple_lock);
+               if (vmbuffer != InvalidBuffer)
+                       ReleaseBuffer(vmbuffer);
+               tmfd->ctid = *otid;
+               tmfd->xmax = InvalidTransactionId;
+               tmfd->cmax = InvalidCommandId;
+               *update_indexes = TU_None;
+
+               bms_free(hot_attrs);
+               bms_free(sum_attrs);
+               bms_free(key_attrs);
+               bms_free(id_attrs);
+               /* modified_attrs not yet initialized */
+               bms_free(interesting_attrs);
+               return TM_Deleted;
+       }
 
        /*
         * Fill in enough data in oldtup for HeapDetermineColumnsInfo to work
index f41d314eae3f439a88ee51985c10bce03bee1bad..32cf28bb8bc7077c7cbc5673d8eef6bf906ce5a2 100644 (file)
 #include "storage/sinval.h"
 #include "storage/smgr.h"
 #include "utils/catcache.h"
+#include "utils/injection_point.h"
 #include "utils/inval.h"
 #include "utils/memdebug.h"
 #include "utils/memutils.h"
@@ -1134,6 +1135,8 @@ AtEOXact_Inval(bool isCommit)
        /* Must be at top of stack */
        Assert(transInvalInfo->my_level == 1 && transInvalInfo->parent == NULL);
 
+       INJECTION_POINT("AtEOXact_Inval-with-transInvalInfo");
+
        if (isCommit)
        {
                /*
index 09b9b394e0e2c7874f84fa89f7f519b4706d014b..131c050c15f139b9288baa578f0c238d4f2736fe 100644 (file)
@@ -136,7 +136,8 @@ typedef enum TU_UpdateIndexes
  *
  * xmax is the outdating transaction's XID.  If the caller wants to visit the
  * replacement tuple, it must check that this matches before believing the
- * replacement is really a match.
+ * replacement is really a match.  This is InvalidTransactionId if the target
+ * was !LP_NORMAL (expected only for a TID retrieved from syscache).
  *
  * cmax is the outdating command's CID, but only when the failure code is
  * TM_SelfModified (i.e., something in the current transaction outdated the
index 0753a9df58c8ec3f0a2a9283f2bace4c0d7d841f..4f0161fd33a24692701aa47a20ca4f7d276bbf80 100644 (file)
@@ -5,7 +5,8 @@ OBJS = \
        $(WIN32RES) \
        injection_points.o \
        injection_stats.o \
-       injection_stats_fixed.o
+       injection_stats_fixed.o \
+       regress_injection.o
 EXTENSION = injection_points
 DATA = injection_points--1.0.sql
 PGFILEDESC = "injection_points - facility for injection points"
@@ -13,7 +14,7 @@ PGFILEDESC = "injection_points - facility for injection points"
 REGRESS = injection_points reindex_conc
 REGRESS_OPTS = --dlpath=$(top_builddir)/src/test/regress
 
-ISOLATION = basic inplace
+ISOLATION = basic inplace syscache-update-pruned
 
 TAP_TESTS = 1
 
diff --git a/src/test/modules/injection_points/expected/syscache-update-pruned.out b/src/test/modules/injection_points/expected/syscache-update-pruned.out
new file mode 100644 (file)
index 0000000..5dc5a1d
--- /dev/null
@@ -0,0 +1,87 @@
+Parsed test spec with 4 sessions
+
+starting permutation: cachefill1 at2 waitprunable4 vac4 grant1 wakeinval4 wakegrant4
+step cachefill1: SELECT FROM vactest.reloid_catcache_set('vactest.orig50');
+step at2: 
+       CREATE TRIGGER to_set_relhastriggers BEFORE UPDATE ON vactest.orig50
+               FOR EACH ROW EXECUTE PROCEDURE suppress_redundant_updates_trigger();
+ <waiting ...>
+step waitprunable4: CALL vactest.wait_prunable();
+step vac4: VACUUM pg_class;
+step grant1: GRANT SELECT ON vactest.orig50 TO PUBLIC; <waiting ...>
+step wakeinval4: 
+       SELECT FROM injection_points_detach('AtEOXact_Inval-with-transInvalInfo');
+       SELECT FROM injection_points_wakeup('AtEOXact_Inval-with-transInvalInfo');
+ <waiting ...>
+step at2: <... completed>
+step wakeinval4: <... completed>
+step wakegrant4: 
+       SELECT FROM injection_points_detach('heap_update-before-pin');
+       SELECT FROM injection_points_wakeup('heap_update-before-pin');
+ <waiting ...>
+step grant1: <... completed>
+ERROR:  tuple concurrently deleted
+step wakegrant4: <... completed>
+
+starting permutation: cachefill1 at2 waitprunable4 vac4 grant1 wakeinval4 mkrels4 wakegrant4
+step cachefill1: SELECT FROM vactest.reloid_catcache_set('vactest.orig50');
+step at2: 
+       CREATE TRIGGER to_set_relhastriggers BEFORE UPDATE ON vactest.orig50
+               FOR EACH ROW EXECUTE PROCEDURE suppress_redundant_updates_trigger();
+ <waiting ...>
+step waitprunable4: CALL vactest.wait_prunable();
+step vac4: VACUUM pg_class;
+step grant1: GRANT SELECT ON vactest.orig50 TO PUBLIC; <waiting ...>
+step wakeinval4: 
+       SELECT FROM injection_points_detach('AtEOXact_Inval-with-transInvalInfo');
+       SELECT FROM injection_points_wakeup('AtEOXact_Inval-with-transInvalInfo');
+ <waiting ...>
+step at2: <... completed>
+step wakeinval4: <... completed>
+step mkrels4: 
+       SELECT FROM vactest.mkrels('intruder', 1, 100);  -- repopulate LP_UNUSED
+
+step wakegrant4: 
+       SELECT FROM injection_points_detach('heap_update-before-pin');
+       SELECT FROM injection_points_wakeup('heap_update-before-pin');
+ <waiting ...>
+step grant1: <... completed>
+ERROR:  duplicate key value violates unique constraint "pg_class_oid_index"
+step wakegrant4: <... completed>
+
+starting permutation: snap3 cachefill1 at2 mkrels4 r3 waitprunable4 vac4 grant1 wakeinval4 at4 wakegrant4 inspect4
+step snap3: BEGIN ISOLATION LEVEL REPEATABLE READ; SELECT;
+step cachefill1: SELECT FROM vactest.reloid_catcache_set('vactest.orig50');
+step at2: 
+       CREATE TRIGGER to_set_relhastriggers BEFORE UPDATE ON vactest.orig50
+               FOR EACH ROW EXECUTE PROCEDURE suppress_redundant_updates_trigger();
+ <waiting ...>
+step mkrels4: 
+       SELECT FROM vactest.mkrels('intruder', 1, 100);  -- repopulate LP_UNUSED
+
+step r3: ROLLBACK;
+step waitprunable4: CALL vactest.wait_prunable();
+step vac4: VACUUM pg_class;
+step grant1: GRANT SELECT ON vactest.orig50 TO PUBLIC; <waiting ...>
+step wakeinval4: 
+       SELECT FROM injection_points_detach('AtEOXact_Inval-with-transInvalInfo');
+       SELECT FROM injection_points_wakeup('AtEOXact_Inval-with-transInvalInfo');
+ <waiting ...>
+step at2: <... completed>
+step wakeinval4: <... completed>
+step at4: ALTER TABLE vactest.child50 INHERIT vactest.orig50;
+step wakegrant4: 
+       SELECT FROM injection_points_detach('heap_update-before-pin');
+       SELECT FROM injection_points_wakeup('heap_update-before-pin');
+ <waiting ...>
+step grant1: <... completed>
+step wakegrant4: <... completed>
+step inspect4: 
+       SELECT relhastriggers, relhassubclass FROM pg_class
+               WHERE oid = 'vactest.orig50'::regclass;
+
+relhastriggers|relhassubclass
+--------------+--------------
+f             |f             
+(1 row)
+
diff --git a/src/test/modules/injection_points/expected/syscache-update-pruned_1.out b/src/test/modules/injection_points/expected/syscache-update-pruned_1.out
new file mode 100644 (file)
index 0000000..b18857c
--- /dev/null
@@ -0,0 +1,86 @@
+Parsed test spec with 4 sessions
+
+starting permutation: cachefill1 at2 waitprunable4 vac4 grant1 wakeinval4 wakegrant4
+step cachefill1: SELECT FROM vactest.reloid_catcache_set('vactest.orig50');
+step at2: 
+       CREATE TRIGGER to_set_relhastriggers BEFORE UPDATE ON vactest.orig50
+               FOR EACH ROW EXECUTE PROCEDURE suppress_redundant_updates_trigger();
+ <waiting ...>
+step waitprunable4: CALL vactest.wait_prunable();
+step vac4: VACUUM pg_class;
+step grant1: GRANT SELECT ON vactest.orig50 TO PUBLIC; <waiting ...>
+step wakeinval4: 
+       SELECT FROM injection_points_detach('AtEOXact_Inval-with-transInvalInfo');
+       SELECT FROM injection_points_wakeup('AtEOXact_Inval-with-transInvalInfo');
+ <waiting ...>
+step at2: <... completed>
+step wakeinval4: <... completed>
+step wakegrant4: 
+       SELECT FROM injection_points_detach('heap_update-before-pin');
+       SELECT FROM injection_points_wakeup('heap_update-before-pin');
+ <waiting ...>
+step grant1: <... completed>
+step wakegrant4: <... completed>
+
+starting permutation: cachefill1 at2 waitprunable4 vac4 grant1 wakeinval4 mkrels4 wakegrant4
+step cachefill1: SELECT FROM vactest.reloid_catcache_set('vactest.orig50');
+step at2: 
+       CREATE TRIGGER to_set_relhastriggers BEFORE UPDATE ON vactest.orig50
+               FOR EACH ROW EXECUTE PROCEDURE suppress_redundant_updates_trigger();
+ <waiting ...>
+step waitprunable4: CALL vactest.wait_prunable();
+step vac4: VACUUM pg_class;
+step grant1: GRANT SELECT ON vactest.orig50 TO PUBLIC; <waiting ...>
+step wakeinval4: 
+       SELECT FROM injection_points_detach('AtEOXact_Inval-with-transInvalInfo');
+       SELECT FROM injection_points_wakeup('AtEOXact_Inval-with-transInvalInfo');
+ <waiting ...>
+step at2: <... completed>
+step wakeinval4: <... completed>
+step mkrels4: 
+       SELECT FROM vactest.mkrels('intruder', 1, 100);  -- repopulate LP_UNUSED
+
+step wakegrant4: 
+       SELECT FROM injection_points_detach('heap_update-before-pin');
+       SELECT FROM injection_points_wakeup('heap_update-before-pin');
+ <waiting ...>
+step grant1: <... completed>
+step wakegrant4: <... completed>
+
+starting permutation: snap3 cachefill1 at2 mkrels4 r3 waitprunable4 vac4 grant1 wakeinval4 at4 wakegrant4 inspect4
+step snap3: BEGIN ISOLATION LEVEL REPEATABLE READ; SELECT;
+step cachefill1: SELECT FROM vactest.reloid_catcache_set('vactest.orig50');
+step at2: 
+       CREATE TRIGGER to_set_relhastriggers BEFORE UPDATE ON vactest.orig50
+               FOR EACH ROW EXECUTE PROCEDURE suppress_redundant_updates_trigger();
+ <waiting ...>
+step mkrels4: 
+       SELECT FROM vactest.mkrels('intruder', 1, 100);  -- repopulate LP_UNUSED
+
+step r3: ROLLBACK;
+step waitprunable4: CALL vactest.wait_prunable();
+step vac4: VACUUM pg_class;
+step grant1: GRANT SELECT ON vactest.orig50 TO PUBLIC; <waiting ...>
+step wakeinval4: 
+       SELECT FROM injection_points_detach('AtEOXact_Inval-with-transInvalInfo');
+       SELECT FROM injection_points_wakeup('AtEOXact_Inval-with-transInvalInfo');
+ <waiting ...>
+step at2: <... completed>
+step wakeinval4: <... completed>
+step at4: ALTER TABLE vactest.child50 INHERIT vactest.orig50;
+step wakegrant4: 
+       SELECT FROM injection_points_detach('heap_update-before-pin');
+       SELECT FROM injection_points_wakeup('heap_update-before-pin');
+ <waiting ...>
+step grant1: <... completed>
+ERROR:  tuple concurrently updated
+step wakegrant4: <... completed>
+step inspect4: 
+       SELECT relhastriggers, relhassubclass FROM pg_class
+               WHERE oid = 'vactest.orig50'::regclass;
+
+relhastriggers|relhassubclass
+--------------+--------------
+t             |t             
+(1 row)
+
index 6c81d55e0d36725093c098c3efffad662e387015..c445bf64e62387ceac8fe00696086c648a0de4b2 100644 (file)
@@ -97,3 +97,11 @@ CREATE FUNCTION injection_points_stats_fixed(OUT numattach int8,
 RETURNS record
 AS 'MODULE_PATHNAME', 'injection_points_stats_fixed'
 LANGUAGE C STRICT;
+
+--
+-- regress_injection.c functions
+--
+CREATE FUNCTION removable_cutoff(rel regclass)
+RETURNS xid8
+AS 'MODULE_PATHNAME'
+LANGUAGE C CALLED ON NULL INPUT;
index ebe79fe06a12f1f00912fac507aa662d8d9a8658..259045e5c2d4ff4d046d5e1dc1a16d06aea6e62c 100644 (file)
@@ -8,6 +8,7 @@ injection_points_sources = files(
   'injection_points.c',
   'injection_stats.c',
   'injection_stats_fixed.c',
+  'regress_injection.c',
 )
 
 if host_system == 'windows'
@@ -44,8 +45,9 @@ tests += {
     'specs': [
       'basic',
       'inplace',
+      'syscache-update-pruned',
     ],
-    'runningcheck': false, # align with GNU make build system
+    'runningcheck': false, # see syscache-update-pruned
   },
   'tap': {
     'env': {
diff --git a/src/test/modules/injection_points/regress_injection.c b/src/test/modules/injection_points/regress_injection.c
new file mode 100644 (file)
index 0000000..422f416
--- /dev/null
@@ -0,0 +1,71 @@
+/*--------------------------------------------------------------------------
+ *
+ * regress_injection.c
+ *             Functions supporting test-specific subject matter.
+ *
+ * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ *             src/test/modules/injection_points/regress_injection.c
+ *
+ * -------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "access/table.h"
+#include "fmgr.h"
+#include "miscadmin.h"
+#include "storage/procarray.h"
+#include "utils/xid8.h"
+
+/*
+ * removable_cutoff - for syscache-update-pruned.spec
+ *
+ * Wrapper around GetOldestNonRemovableTransactionId().  In general, this can
+ * move backward.  runningcheck=false isolation tests can reasonably prevent
+ * that.  For the causes of backward movement, see
+ * postgr.es/m/CAEze2Wj%2BV0kTx86xB_YbyaqTr5hnE_igdWAwuhSyjXBYscf5-Q%40mail.gmail.com
+ * and the header comment for ComputeXidHorizons().  One can assume this
+ * doesn't move backward if one arranges for concurrent activity not to reach
+ * AbortTransaction() and not to allocate an XID while connected to another
+ * database.  Non-runningcheck tests can control most concurrent activity,
+ * except autovacuum and the isolationtester control connection.  Neither
+ * allocates XIDs, and AbortTransaction() in those would justify test failure.
+ */
+PG_FUNCTION_INFO_V1(removable_cutoff);
+Datum
+removable_cutoff(PG_FUNCTION_ARGS)
+{
+       Relation        rel = NULL;
+       TransactionId xid;
+       FullTransactionId next_fxid_before,
+                               next_fxid;
+
+       /* could take other relkinds callee takes, but we've not yet needed it */
+       if (!PG_ARGISNULL(0))
+               rel = table_open(PG_GETARG_OID(0), AccessShareLock);
+
+       /*
+        * No lock or snapshot necessarily prevents oldestXid from advancing past
+        * "xid" while this function runs.  That concerns us only in that we must
+        * not ascribe "xid" to the wrong epoch.  (That may never arise in
+        * isolation testing, but let's set a good example.)  As a crude solution,
+        * retry until nextXid doesn't change.
+        */
+       next_fxid = ReadNextFullTransactionId();
+       do
+       {
+               CHECK_FOR_INTERRUPTS();
+               next_fxid_before = next_fxid;
+               xid = GetOldestNonRemovableTransactionId(rel);
+               next_fxid = ReadNextFullTransactionId();
+       } while (!FullTransactionIdEquals(next_fxid, next_fxid_before));
+
+       if (rel)
+               table_close(rel, AccessShareLock);
+
+       PG_RETURN_FULLTRANSACTIONID(FullTransactionIdFromAllowableAt(next_fxid,
+                                                                                                                                xid));
+}
diff --git a/src/test/modules/injection_points/specs/syscache-update-pruned.spec b/src/test/modules/injection_points/specs/syscache-update-pruned.spec
new file mode 100644 (file)
index 0000000..b48e897
--- /dev/null
@@ -0,0 +1,179 @@
+# Test race conditions involving:
+# - s1: heap_update($FROM_SYSCACHE), without a snapshot or pin
+# - s2: ALTER TABLE making $FROM_SYSCACHE a dead tuple
+# - s3: "VACUUM pg_class" making $FROM_SYSCACHE become LP_UNUSED
+
+# This is a derivative work of inplace.spec, which exercises the corresponding
+# race condition for inplace updates.
+
+# Despite local injection points, this is incompatible with runningcheck.
+# First, removable_cutoff() could move backward, per its header comment.
+# Second, other activity could trigger sinval queue overflow, negating our
+# efforts to delay inval.  Third, this deadlock emerges:
+#
+# - step at2 waits at an injection point, with interrupts held
+# - an unrelated backend waits for at2 to do PROCSIGNAL_BARRIER_SMGRRELEASE
+# - step waitprunable4 waits for the unrelated backend to release its xmin
+
+# The alternative expected output is for -DCATCACHE_FORCE_RELEASE, a setting
+# that thwarts testing the race conditions this spec seeks.
+
+
+# Need s2 to make a non-HOT update.  Otherwise, "VACUUM pg_class" would leave
+# an LP_REDIRECT that persists.  To get non-HOT, make rels so the pg_class row
+# for vactest.orig50 is on a filled page (assuming BLCKSZ=8192).  Just to save
+# on filesystem syscalls, use relkind=c for every other rel.
+setup
+{
+       CREATE EXTENSION injection_points;
+       CREATE SCHEMA vactest;
+       -- Ensure a leader RELOID catcache entry.  PARALLEL RESTRICTED since a
+       -- parallel worker running pg_relation_filenode() would lack that effect.
+       CREATE FUNCTION vactest.reloid_catcache_set(regclass) RETURNS int
+               LANGUAGE sql PARALLEL RESTRICTED
+               AS 'SELECT 0 FROM pg_relation_filenode($1)';
+       CREATE FUNCTION vactest.mkrels(text, int, int) RETURNS void
+               LANGUAGE plpgsql SET search_path = vactest AS $$
+       DECLARE
+               tname text;
+       BEGIN
+               FOR i in $2 .. $3 LOOP
+                       tname := $1 || i;
+                       EXECUTE FORMAT('CREATE TYPE ' || tname || ' AS ()');
+                       RAISE DEBUG '% at %', tname, ctid
+                               FROM pg_class WHERE oid = tname::regclass;
+               END LOOP;
+       END
+       $$;
+       CREATE PROCEDURE vactest.wait_prunable() LANGUAGE plpgsql AS $$
+       DECLARE
+               barrier xid8;
+               cutoff xid8;
+       BEGIN
+               barrier := pg_current_xact_id();
+               -- autovacuum worker RelationCacheInitializePhase3() or the
+               -- isolationtester control connection might hold a snapshot that
+               -- limits pruning.  Sleep until that clears.
+               LOOP
+                       ROLLBACK;  -- release MyProc->xmin, which could be the oldest
+                       cutoff := removable_cutoff('pg_class');
+                       EXIT WHEN cutoff >= barrier;
+                       RAISE LOG 'removable cutoff %; waiting for %', cutoff, barrier;
+                       PERFORM pg_sleep(.1);
+               END LOOP;
+       END
+       $$;
+}
+setup  { CALL vactest.wait_prunable();  -- maximize next two VACUUMs }
+setup  { VACUUM FULL pg_class;  -- reduce free space }
+setup  { VACUUM FREEZE pg_class;  -- populate fsm etc. }
+setup
+{
+       SELECT FROM vactest.mkrels('orig', 1, 49);
+       CREATE TABLE vactest.orig50 (c int) WITH (autovacuum_enabled = off);
+       CREATE TABLE vactest.child50 (c int) WITH (autovacuum_enabled = off);
+       SELECT FROM vactest.mkrels('orig', 51, 100);
+}
+teardown
+{
+       DROP SCHEMA vactest CASCADE;
+       DROP EXTENSION injection_points;
+}
+
+# Wait during GRANT.  Disable debug_discard_caches, since we're here to
+# exercise an outcome that happens under permissible cache staleness.
+session s1
+setup  {
+       SET debug_discard_caches = 0;
+       SELECT FROM injection_points_set_local();
+       SELECT FROM injection_points_attach('heap_update-before-pin', 'wait');
+}
+step cachefill1        { SELECT FROM vactest.reloid_catcache_set('vactest.orig50'); }
+step grant1    { GRANT SELECT ON vactest.orig50 TO PUBLIC; }
+
+# Update of the tuple that grant1 will update.  Wait before sending invals, so
+# s1 will not get a cache miss.  Choose the commands for making such updates
+# from among those whose heavyweight locking does not conflict with GRANT's
+# heavyweight locking.  (GRANT will see our XID as committed, so observing
+# that XID in the tuple xmax also won't block GRANT.)
+session s2
+setup  {
+       SELECT FROM injection_points_set_local();
+       SELECT FROM
+               injection_points_attach('AtEOXact_Inval-with-transInvalInfo', 'wait');
+}
+step at2       {
+       CREATE TRIGGER to_set_relhastriggers BEFORE UPDATE ON vactest.orig50
+               FOR EACH ROW EXECUTE PROCEDURE suppress_redundant_updates_trigger();
+}
+
+# Hold snapshot to block pruning.
+session s3
+step snap3     { BEGIN ISOLATION LEVEL REPEATABLE READ; SELECT; }
+step r3                { ROLLBACK; }
+
+# Non-blocking actions.
+session s4
+step waitprunable4     { CALL vactest.wait_prunable(); }
+step vac4              { VACUUM pg_class; }
+# Reuse the lp that s1 is waiting to change.  I've observed reuse at the 1st
+# or 18th CREATE, so create excess.
+step mkrels4   {
+       SELECT FROM vactest.mkrels('intruder', 1, 100);  -- repopulate LP_UNUSED
+}
+step wakegrant4        {
+       SELECT FROM injection_points_detach('heap_update-before-pin');
+       SELECT FROM injection_points_wakeup('heap_update-before-pin');
+}
+step at4       { ALTER TABLE vactest.child50 INHERIT vactest.orig50; }
+step wakeinval4        {
+       SELECT FROM injection_points_detach('AtEOXact_Inval-with-transInvalInfo');
+       SELECT FROM injection_points_wakeup('AtEOXact_Inval-with-transInvalInfo');
+}
+# Witness effects of steps at2 and/or at4.
+step inspect4  {
+       SELECT relhastriggers, relhassubclass FROM pg_class
+               WHERE oid = 'vactest.orig50'::regclass;
+}
+
+# TID from syscache becomes LP_UNUSED.  Before the bug fix, this permutation
+# made s1 fail with "attempted to update invisible tuple" or an assert.
+# However, suppose a pd_lsn value such that (pd_lsn.xlogid, pd_lsn.xrecoff)
+# passed for (xmin, xmax) with xmin known-committed and xmax known-aborted.
+# Persistent page header corruption ensued.  For example, s1 overwrote
+# pd_lower, pd_upper, and pd_special as though they were t_ctid.
+permutation
+       cachefill1                      # reads pg_class tuple T0, xmax invalid
+       at2                                     # T0 dead, T1 live
+       waitprunable4           # T0 prunable
+       vac4                            # T0 becomes LP_UNUSED
+       grant1                          # pauses at heap_update(T0)
+       wakeinval4(at2)         # at2 sends inval message
+       wakegrant4(grant1)      # s1 wakes: "tuple concurrently deleted"
+
+# add mkrels4: LP_UNUSED becomes a different rel's row
+permutation
+       cachefill1                      # reads pg_class tuple T0, xmax invalid
+       at2                                     # T0 dead, T1 live
+       waitprunable4           # T0 prunable
+       vac4                            # T0 becomes LP_UNUSED
+       grant1                          # pauses at heap_update(T0)
+       wakeinval4(at2)         # at2 sends inval message
+       mkrels4                         # T0 becomes a new rel
+       wakegrant4(grant1)      # s1 wakes: "duplicate key value violates unique"
+
+# TID from syscache becomes LP_UNUSED, then becomes a newer version of the
+# original rel's row.
+permutation
+       snap3                           # sets MyProc->xmin
+       cachefill1                      # reads pg_class tuple T0, xmax invalid
+       at2                                     # T0 dead, T1 live
+       mkrels4                         # T1's page becomes full
+       r3                                      # clears MyProc->xmin
+       waitprunable4           # T0 prunable
+       vac4                            # T0 becomes LP_UNUSED
+       grant1                          # pauses at heap_update(T0)
+       wakeinval4(at2)         # at2 sends inval message
+       at4                                     # T1 dead, T0 live
+       wakegrant4(grant1)      # s1 wakes: T0 dead, T2 live
+       inspect4                        # observe loss of at2+at4 changes XXX is an extant bug