]> git.ipfire.org Git - thirdparty/postgresql.git/commitdiff
amcheck: Fix snapshot usage in bt_index_parent_check
authorÁlvaro Herrera <alvherre@kurilemu.de>
Wed, 21 Jan 2026 17:55:43 +0000 (18:55 +0100)
committerÁlvaro Herrera <alvherre@kurilemu.de>
Wed, 21 Jan 2026 17:55:43 +0000 (18:55 +0100)
We were using SnapshotAny to do some index checks, but that's wrong and
causes spurious errors when used on indexes created by CREATE INDEX
CONCURRENTLY.  Fix it to use an MVCC snapshot, and add a test for it.

Backpatch of 6bd469d26aca to branches 14-16.  I previously misidentified
the bug's origin: it came in with commit 7f563c09f890 (pg11-era, not
5ae2087202af as claimed previously), so all live branches are affected.

Also take the opportunity to fix some comments that we failed to update
in the original commits and apply pgperltidy.  In branch 14, remove the
unnecessary test plan specification (which would have need to have been
changed anyway; c.f. commit 549ec201d613.)

Diagnosed-by: Donghang Lin <donghanglin@gmail.com>
Author: Mihail Nikalayeu <mihailnikalayeu@gmail.com>
Reviewed-by: Andrey Borodin <x4mmm@yandex-team.ru>
Backpatch-through: 17
Discussion: https://postgr.es/m/CANtu0ojmVd27fEhfpST7RG2KZvwkX=dMyKUqg0KM87FkOSdz8Q@mail.gmail.com

contrib/amcheck/t/002_cic.pl
contrib/amcheck/verify_nbtree.c
doc/src/sgml/amcheck.sgml

index 3587807b9577152cd351ae61857e7efd2dc73225..b23e761abe14e532234d34a1f0380eaac5c0c5d2 100644 (file)
@@ -9,7 +9,7 @@ use Config;
 use PostgresNode;
 use TestLib;
 
-use Test::More tests => 3;
+use Test::More;
 
 my ($node, $result);
 
@@ -61,5 +61,29 @@ $node->pgbench(
                  )
        });
 
+# Test bt_index_parent_check() with indexes created with
+# CREATE INDEX CONCURRENTLY.
+$node->safe_psql('postgres', q(CREATE TABLE quebec(i int primary key)));
+# Insert two rows into index
+$node->safe_psql('postgres',
+       q(INSERT INTO quebec SELECT i FROM generate_series(1, 2) s(i);));
+
+# start background transaction
+my $in_progress_h = $node->background_psql('postgres');
+$in_progress_h->query_safe(q(BEGIN; SELECT pg_current_xact_id();));
+
+# delete one row from table, while background transaction is in progress
+$node->safe_psql('postgres', q(DELETE FROM quebec WHERE i = 1;));
+# create index concurrently, which will skip the deleted row
+$node->safe_psql('postgres',
+       q(CREATE INDEX CONCURRENTLY oscar ON quebec(i);));
+
+# check index using bt_index_parent_check
+$result = $node->psql('postgres',
+       q(SELECT bt_index_parent_check('oscar', heapallindexed => true)));
+is($result, '0', 'bt_index_parent_check for CIC after removed row');
+
+$in_progress_h->quit;
+
 $node->stop;
 done_testing();
index f9afb4d7bd0738e358730aa70d7dacf901362f5c..f75667d80617954059b526e2661512d9588ced10 100644 (file)
@@ -462,11 +462,11 @@ bt_check_every_level(Relation rel, Relation heaprel, bool heapkeyspace,
                                         bool readonly, bool heapallindexed, bool rootdescend)
 {
        BtreeCheckState *state;
+       Snapshot        snapshot = InvalidSnapshot;
        Page            metapage;
        BTMetaPageData *metad;
        uint32          previouslevel;
        BtreeLevel      current;
-       Snapshot        snapshot = SnapshotAny;
 
        if (!readonly)
                elog(DEBUG1, "verifying consistency of tree structure for index \"%s\"",
@@ -515,37 +515,33 @@ bt_check_every_level(Relation rel, Relation heaprel, bool heapkeyspace,
                state->heaptuplespresent = 0;
 
                /*
-                * Register our own snapshot in !readonly case, rather than asking
+                * Register our own snapshot for heapallindexed, rather than asking
                 * table_index_build_scan() to do this for us later.  This needs to
                 * happen before index fingerprinting begins, so we can later be
                 * certain that index fingerprinting should have reached all tuples
                 * returned by table_index_build_scan().
                 */
-               if (!state->readonly)
-               {
-                       snapshot = RegisterSnapshot(GetTransactionSnapshot());
+               snapshot = RegisterSnapshot(GetTransactionSnapshot());
 
-                       /*
-                        * GetTransactionSnapshot() always acquires a new MVCC snapshot in
-                        * READ COMMITTED mode.  A new snapshot is guaranteed to have all
-                        * the entries it requires in the index.
-                        *
-                        * We must defend against the possibility that an old xact
-                        * snapshot was returned at higher isolation levels when that
-                        * snapshot is not safe for index scans of the target index.  This
-                        * is possible when the snapshot sees tuples that are before the
-                        * index's indcheckxmin horizon.  Throwing an error here should be
-                        * very rare.  It doesn't seem worth using a secondary snapshot to
-                        * avoid this.
-                        */
-                       if (IsolationUsesXactSnapshot() && rel->rd_index->indcheckxmin &&
-                               !TransactionIdPrecedes(HeapTupleHeaderGetXmin(rel->rd_indextuple->t_data),
-                                                                          snapshot->xmin))
-                               ereport(ERROR,
-                                               (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
-                                                errmsg("index \"%s\" cannot be verified using transaction snapshot",
-                                                               RelationGetRelationName(rel))));
-               }
+               /*
+                * GetTransactionSnapshot() always acquires a new MVCC snapshot in
+                * READ COMMITTED mode.  A new snapshot is guaranteed to have all the
+                * entries it requires in the index.
+                *
+                * We must defend against the possibility that an old xact snapshot
+                * was returned at higher isolation levels when that snapshot is not
+                * safe for index scans of the target index.  This is possible when
+                * the snapshot sees tuples that are before the index's indcheckxmin
+                * horizon.  Throwing an error here should be very rare.  It doesn't
+                * seem worth using a secondary snapshot to avoid this.
+                */
+               if (IsolationUsesXactSnapshot() && rel->rd_index->indcheckxmin &&
+                       !TransactionIdPrecedes(HeapTupleHeaderGetXmin(rel->rd_indextuple->t_data),
+                                                                  snapshot->xmin))
+                       ereport(ERROR,
+                                       errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
+                                       errmsg("index \"%s\" cannot be verified using transaction snapshot",
+                                                  RelationGetRelationName(rel)));
        }
 
        Assert(!state->rootdescend || state->readonly);
@@ -620,8 +616,7 @@ bt_check_every_level(Relation rel, Relation heaprel, bool heapkeyspace,
                /*
                 * Create our own scan for table_index_build_scan(), rather than
                 * getting it to do so for us.  This is required so that we can
-                * actually use the MVCC snapshot registered earlier in !readonly
-                * case.
+                * actually use the MVCC snapshot registered earlier.
                 *
                 * Note that table_index_build_scan() calls heap_endscan() for us.
                 */
@@ -634,16 +629,15 @@ bt_check_every_level(Relation rel, Relation heaprel, bool heapkeyspace,
 
                /*
                 * Scan will behave as the first scan of a CREATE INDEX CONCURRENTLY
-                * behaves in !readonly case.
+                * behaves.
                 *
                 * It's okay that we don't actually use the same lock strength for the
-                * heap relation as any other ii_Concurrent caller would in !readonly
-                * case.  We have no reason to care about a concurrent VACUUM
-                * operation, since there isn't going to be a second scan of the heap
-                * that needs to be sure that there was no concurrent recycling of
-                * TIDs.
+                * heap relation as any other ii_Concurrent caller would.  We have no
+                * reason to care about a concurrent VACUUM operation, since there
+                * isn't going to be a second scan of the heap that needs to be sure
+                * that there was no concurrent recycling of TIDs.
                 */
-               indexinfo->ii_Concurrent = !state->readonly;
+               indexinfo->ii_Concurrent = true;
 
                /*
                 * Don't wait for uncommitted tuple xact commit/abort when index is a
@@ -667,13 +661,12 @@ bt_check_every_level(Relation rel, Relation heaprel, bool heapkeyspace,
                                                                 state->heaptuplespresent, RelationGetRelationName(heaprel),
                                                                 100.0 * bloom_prop_bits_set(state->filter))));
 
-               if (snapshot != SnapshotAny)
-                       UnregisterSnapshot(snapshot);
-
                bloom_free(state->filter);
        }
 
        /* Be tidy: */
+       if (snapshot != InvalidSnapshot)
+               UnregisterSnapshot(snapshot);
        MemoryContextDelete(state->targetcontext);
 }
 
index c570690b59c5783295143f83f6ea00c99e5c0b4f..bff328351edc6571c5788f2df27e8e74ecd52b81 100644 (file)
@@ -353,7 +353,7 @@ SET client_min_messages = DEBUG1;
   verification functions is <literal>true</literal>, an additional
   phase of verification is performed against the table associated with
   the target index relation.  This consists of a <quote>dummy</quote>
-  <command>CREATE INDEX</command> operation, which checks for the
+  <command>CREATE INDEX CONCURRENTLY</command> operation, which checks for the
   presence of all hypothetical new index tuples against a temporary,
   in-memory summarizing structure (this is built when needed during
   the basic first phase of verification).  The summarizing structure