bool readonly, bool heapallindexed);
static BtreeLevel bt_check_level_from_leftmost(BtreeCheckState *state,
BtreeLevel level);
+static bool bt_leftmost_ignoring_half_dead(BtreeCheckState *state,
+ BlockNumber start,
+ BTPageOpaque start_opaque);
static void bt_target_page_check(BtreeCheckState *state);
static ScanKey bt_right_page_check_scankey(BtreeCheckState *state);
static void bt_downlink_check(BtreeCheckState *state, BlockNumber childblock,
*/
if (state->readonly)
{
- if (!P_LEFTMOST(opaque))
+ if (!bt_leftmost_ignoring_half_dead(state, current, opaque))
ereport(ERROR,
(errcode(ERRCODE_INDEX_CORRUPTED),
errmsg("block %u is not leftmost in index \"%s\"",
}
/*
- * readonly mode can only ever land on live pages and half-dead pages,
- * so sibling pointers should always be in mutual agreement
+ * Sibling links should be in mutual agreement. There arises
+ * leftcurrent == P_NONE && btpo_prev != P_NONE when the left sibling
+ * of the parent's low-key downlink is half-dead. (A half-dead page
+ * has no downlink from its parent.) Under heavyweight locking, the
+ * last bt_leftmost_ignoring_half_dead() validated this btpo_prev.
*/
- if (state->readonly && opaque->btpo_prev != leftcurrent)
+ if (state->readonly &&
+ opaque->btpo_prev != leftcurrent && leftcurrent != P_NONE)
ereport(ERROR,
(errcode(ERRCODE_INDEX_CORRUPTED),
errmsg("left link/right link pair in index \"%s\" not in agreement",
return nextleveldown;
}
+/*
+ * Like P_LEFTMOST(start_opaque), but accept an arbitrarily-long chain of
+ * half-dead, sibling-linked pages to the left. If a half-dead page appears
+ * under state->readonly, the database exited recovery between the first-stage
+ * and second-stage WAL records of a deletion.
+ */
+static bool
+bt_leftmost_ignoring_half_dead(BtreeCheckState *state,
+ BlockNumber start,
+ BTPageOpaque start_opaque)
+{
+ BlockNumber reached = start_opaque->btpo_prev,
+ reached_from = start;
+ bool all_half_dead = true;
+
+ /*
+ * To handle the !readonly case, we'd need to accept BTP_DELETED pages and
+ * potentially observe nbtree/README "Page deletion and backwards scans".
+ */
+ Assert(state->readonly);
+
+ while (reached != P_NONE && all_half_dead)
+ {
+ Page page = palloc_btree_page(state, reached);
+ BTPageOpaque reached_opaque = (BTPageOpaque) PageGetSpecialPointer(page);
+
+ CHECK_FOR_INTERRUPTS();
+
+ /*
+ * Try to detect btpo_prev circular links. _bt_unlink_halfdead_page()
+ * writes that side-links will continue to point to the siblings.
+ * Check btpo_next for that property.
+ */
+ all_half_dead = P_ISHALFDEAD(reached_opaque) &&
+ reached != start &&
+ reached != reached_from &&
+ reached_opaque->btpo_next == reached_from;
+ if (all_half_dead)
+ {
+ XLogRecPtr pagelsn = PageGetLSN(page);
+
+ /* pagelsn should point to an XLOG_BTREE_MARK_PAGE_HALFDEAD */
+ ereport(DEBUG1,
+ (errcode(ERRCODE_NO_DATA),
+ errmsg_internal("harmless interrupted page deletion detected in index \"%s\"",
+ RelationGetRelationName(state->rel)),
+ errdetail_internal("Block=%u right block=%u page lsn=%X/%X.",
+ reached, reached_from,
+ (uint32) (pagelsn >> 32),
+ (uint32) pagelsn)));
+
+ reached_from = reached;
+ reached = reached_opaque->btpo_prev;
+ }
+
+ pfree(page);
+ }
+
+ return all_half_dead;
+}
+
/*
* Function performs the following checks on target page, or pages ancillary to
* target page: