From: Tomas Vondra Date: Tue, 7 Apr 2026 21:25:01 +0000 (+0200) Subject: Add EXPLAIN (IO) instrumentation for TidRangeScan X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=e157fe6f76e0be4f05a725b3099cf53949a2c2af;p=thirdparty%2Fpostgresql.git Add EXPLAIN (IO) instrumentation for TidRangeScan Adds support for EXPLAIN (IO) instrumentation for TidRange scans. This requires adding shared instrumentation for parallel scans, using the separate DSM approach introduced by dd78e69cfc33. Author: Tomas Vondra Reviewed-by: Melanie Plageman Reviewed-by: Lukas Fittl Reviewed-by: Andres Freund Discussion: https://postgr.es/m/flat/a177a6dd-240b-455a-8f25-aca0b1c08c6e%40vondra.me --- diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c index af32f09b3a4..112c17b0d64 100644 --- a/src/backend/commands/explain.c +++ b/src/backend/commands/explain.c @@ -2149,6 +2149,7 @@ ExplainNode(PlanState *planstate, List *ancestors, if (plan->qual) show_instrumentation_count("Rows Removed by Filter", 1, planstate, es); + show_scan_io_usage((ScanState *) planstate, es); } break; case T_ForeignScan: @@ -4127,6 +4128,30 @@ show_scan_io_usage(ScanState *planstate, ExplainState *es) } } + break; + } + case T_TidRangeScan: + { + SharedTidRangeScanInstrumentation *sinstrument + = ((TidRangeScanState *) planstate)->trss_sinstrument; + + if (sinstrument) + { + for (int i = 0; i < sinstrument->num_workers; ++i) + { + TidRangeScanInstrumentation *winstrument = &sinstrument->sinstrument[i]; + + AccumulateIOStats(&stats, &winstrument->stats.io); + + if (!es->workers_state) + continue; + + ExplainOpenWorker(i, es); + print_io_usage(es, &winstrument->stats.io); + ExplainCloseWorker(i, es); + } + } + break; } default: diff --git a/src/backend/executor/execParallel.c b/src/backend/executor/execParallel.c index 9690f0938ae..81b87d82fab 100644 --- a/src/backend/executor/execParallel.c +++ b/src/backend/executor/execParallel.c @@ -291,6 +291,9 @@ ExecParallelEstimate(PlanState *planstate, ExecParallelEstimateContext *e) if (planstate->plan->parallel_aware) ExecTidRangeScanEstimate((TidRangeScanState *) planstate, e->pcxt); + /* even when not parallel-aware, for EXPLAIN ANALYZE */ + ExecTidRangeScanInstrumentEstimate((TidRangeScanState *) planstate, + e->pcxt); break; case T_AppendState: if (planstate->plan->parallel_aware) @@ -536,6 +539,9 @@ ExecParallelInitializeDSM(PlanState *planstate, if (planstate->plan->parallel_aware) ExecTidRangeScanInitializeDSM((TidRangeScanState *) planstate, d->pcxt); + /* even when not parallel-aware, for EXPLAIN ANALYZE */ + ExecTidRangeScanInstrumentInitDSM((TidRangeScanState *) planstate, + d->pcxt); break; case T_AppendState: if (planstate->plan->parallel_aware) @@ -1157,6 +1163,9 @@ ExecParallelRetrieveInstrumentation(PlanState *planstate, case T_SeqScanState: ExecSeqScanRetrieveInstrumentation((SeqScanState *) planstate); break; + case T_TidRangeScanState: + ExecTidRangeScanRetrieveInstrumentation((TidRangeScanState *) planstate); + break; default: break; } @@ -1430,6 +1439,9 @@ ExecParallelInitializeWorker(PlanState *planstate, ParallelWorkerContext *pwcxt) if (planstate->plan->parallel_aware) ExecTidRangeScanInitializeWorker((TidRangeScanState *) planstate, pwcxt); + /* even when not parallel-aware, for EXPLAIN ANALYZE */ + ExecTidRangeScanInstrumentInitWorker((TidRangeScanState *) planstate, + pwcxt); break; case T_AppendState: if (planstate->plan->parallel_aware) diff --git a/src/backend/executor/nodeTidrangescan.c b/src/backend/executor/nodeTidrangescan.c index 4a8fe91b2b3..b387ed6c308 100644 --- a/src/backend/executor/nodeTidrangescan.c +++ b/src/backend/executor/nodeTidrangescan.c @@ -18,7 +18,9 @@ #include "access/sysattr.h" #include "access/tableam.h" #include "catalog/pg_operator.h" +#include "executor/execParallel.h" #include "executor/executor.h" +#include "executor/instrument.h" #include "executor/nodeTidrangescan.h" #include "nodes/nodeFuncs.h" #include "utils/rel.h" @@ -242,12 +244,19 @@ TidRangeNext(TidRangeScanState *node) if (scandesc == NULL) { + uint32 flags = SO_NONE; + + if (ScanRelIsReadOnly(&node->ss)) + flags |= SO_HINT_REL_READ_ONLY; + + if (estate->es_instrument & INSTRUMENT_IO) + flags |= SO_SCAN_INSTRUMENT; + scandesc = table_beginscan_tidrange(node->ss.ss_currentRelation, estate->es_snapshot, &node->trss_mintid, &node->trss_maxtid, - ScanRelIsReadOnly(&node->ss) ? - SO_HINT_REL_READ_ONLY : SO_NONE); + flags); node->ss.ss_currentScanDesc = scandesc; } else @@ -342,6 +351,20 @@ ExecEndTidRangeScan(TidRangeScanState *node) { TableScanDesc scan = node->ss.ss_currentScanDesc; + /* Collect IO stats for this process into shared instrumentation */ + if (node->trss_sinstrument != NULL && IsParallelWorker()) + { + TidRangeScanInstrumentation *si; + + Assert(ParallelWorkerNumber < node->trss_sinstrument->num_workers); + si = &node->trss_sinstrument->sinstrument[ParallelWorkerNumber]; + + if (scan && scan->rs_instrument) + { + AccumulateIOStats(&si->stats.io, &scan->rs_instrument->io); + } + } + if (scan != NULL) table_endscan(scan); } @@ -454,6 +477,13 @@ ExecTidRangeScanInitializeDSM(TidRangeScanState *node, ParallelContext *pcxt) { EState *estate = node->ss.ps.state; ParallelTableScanDesc pscan; + uint32 flags = SO_NONE; + + if (ScanRelIsReadOnly(&node->ss)) + flags |= SO_HINT_REL_READ_ONLY; + + if (estate->es_instrument & INSTRUMENT_IO) + flags |= SO_SCAN_INSTRUMENT; pscan = shm_toc_allocate(pcxt->toc, node->trss_pscanlen); table_parallelscan_initialize(node->ss.ss_currentRelation, @@ -462,9 +492,7 @@ ExecTidRangeScanInitializeDSM(TidRangeScanState *node, ParallelContext *pcxt) shm_toc_insert(pcxt->toc, node->ss.ps.plan->plan_node_id, pscan); node->ss.ss_currentScanDesc = table_beginscan_parallel_tidrange(node->ss.ss_currentRelation, - pscan, - ScanRelIsReadOnly(&node->ss) ? - SO_HINT_REL_READ_ONLY : SO_NONE); + pscan, flags); } /* ---------------------------------------------------------------- @@ -494,11 +522,100 @@ ExecTidRangeScanInitializeWorker(TidRangeScanState *node, ParallelWorkerContext *pwcxt) { ParallelTableScanDesc pscan; + uint32 flags = SO_NONE; + + if (ScanRelIsReadOnly(&node->ss)) + flags |= SO_HINT_REL_READ_ONLY; + + if (node->ss.ps.state->es_instrument & INSTRUMENT_IO) + flags |= SO_SCAN_INSTRUMENT; pscan = shm_toc_lookup(pwcxt->toc, node->ss.ps.plan->plan_node_id, false); node->ss.ss_currentScanDesc = table_beginscan_parallel_tidrange(node->ss.ss_currentRelation, - pscan, - ScanRelIsReadOnly(&node->ss) ? - SO_HINT_REL_READ_ONLY : SO_NONE); + pscan, flags); +} + +/* + * Compute the amount of space we'll need for the shared instrumentation and + * inform pcxt->estimator. + */ +void +ExecTidRangeScanInstrumentEstimate(TidRangeScanState *node, + ParallelContext *pcxt) +{ + EState *estate = node->ss.ps.state; + Size size; + + if ((estate->es_instrument & INSTRUMENT_IO) == 0 || pcxt->nworkers == 0) + return; + + size = add_size(offsetof(SharedTidRangeScanInstrumentation, sinstrument), + mul_size(pcxt->nworkers, sizeof(TidRangeScanInstrumentation))); + + shm_toc_estimate_chunk(&pcxt->estimator, size); + shm_toc_estimate_keys(&pcxt->estimator, 1); +} + +/* + * Set up parallel scan instrumentation. + */ +void +ExecTidRangeScanInstrumentInitDSM(TidRangeScanState *node, + ParallelContext *pcxt) +{ + EState *estate = node->ss.ps.state; + SharedTidRangeScanInstrumentation *sinstrument; + Size size; + + if ((estate->es_instrument & INSTRUMENT_IO) == 0 || pcxt->nworkers == 0) + return; + + size = add_size(offsetof(SharedTidRangeScanInstrumentation, sinstrument), + mul_size(pcxt->nworkers, sizeof(TidRangeScanInstrumentation))); + sinstrument = shm_toc_allocate(pcxt->toc, size); + memset(sinstrument, 0, size); + sinstrument->num_workers = pcxt->nworkers; + shm_toc_insert(pcxt->toc, + node->ss.ps.plan->plan_node_id + + PARALLEL_KEY_SCAN_INSTRUMENT_OFFSET, + sinstrument); + node->trss_sinstrument = sinstrument; +} + +/* + * Look up and save the location of the shared instrumentation. + */ +void +ExecTidRangeScanInstrumentInitWorker(TidRangeScanState *node, + ParallelWorkerContext *pwcxt) +{ + EState *estate = node->ss.ps.state; + + if ((estate->es_instrument & INSTRUMENT_IO) == 0) + return; + + node->trss_sinstrument = shm_toc_lookup(pwcxt->toc, + node->ss.ps.plan->plan_node_id + + PARALLEL_KEY_SCAN_INSTRUMENT_OFFSET, + false); +} + +/* + * Transfer scan instrumentation from DSM to private memory. + */ +void +ExecTidRangeScanRetrieveInstrumentation(TidRangeScanState *node) +{ + SharedTidRangeScanInstrumentation *sinstrument = node->trss_sinstrument; + Size size; + + if (sinstrument == NULL) + return; + + size = offsetof(SharedTidRangeScanInstrumentation, sinstrument) + + sinstrument->num_workers * sizeof(TidRangeScanInstrumentation); + + node->trss_sinstrument = palloc(size); + memcpy(node->trss_sinstrument, sinstrument, size); } diff --git a/src/include/executor/instrument_node.h b/src/include/executor/instrument_node.h index 003dc262b5d..4076990408e 100644 --- a/src/include/executor/instrument_node.h +++ b/src/include/executor/instrument_node.h @@ -285,4 +285,22 @@ typedef struct SharedSeqScanInstrumentation SeqScanInstrumentation sinstrument[FLEXIBLE_ARRAY_MEMBER]; } SharedSeqScanInstrumentation; + +/* + * Instrumentation information for TID range scans + */ +typedef struct TidRangeScanInstrumentation +{ + TableScanInstrumentation stats; +} TidRangeScanInstrumentation; + +/* + * Shared memory container for per-worker information + */ +typedef struct SharedTidRangeScanInstrumentation +{ + int num_workers; + TidRangeScanInstrumentation sinstrument[FLEXIBLE_ARRAY_MEMBER]; +} SharedTidRangeScanInstrumentation; + #endif /* INSTRUMENT_NODE_H */ diff --git a/src/include/executor/nodeTidrangescan.h b/src/include/executor/nodeTidrangescan.h index 8752d1ea8c4..9e7d0a357bb 100644 --- a/src/include/executor/nodeTidrangescan.h +++ b/src/include/executor/nodeTidrangescan.h @@ -28,4 +28,13 @@ extern void ExecTidRangeScanInitializeDSM(TidRangeScanState *node, ParallelConte extern void ExecTidRangeScanReInitializeDSM(TidRangeScanState *node, ParallelContext *pcxt); extern void ExecTidRangeScanInitializeWorker(TidRangeScanState *node, ParallelWorkerContext *pwcxt); +/* instrument support */ +extern void ExecTidRangeScanInstrumentEstimate(TidRangeScanState *node, + ParallelContext *pcxt); +extern void ExecTidRangeScanInstrumentInitDSM(TidRangeScanState *node, + ParallelContext *pcxt); +extern void ExecTidRangeScanInstrumentInitWorker(TidRangeScanState *node, + ParallelWorkerContext *pwcxt); +extern void ExecTidRangeScanRetrieveInstrumentation(TidRangeScanState *node); + #endif /* NODETIDRANGESCAN_H */ diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index 56febb3204c..13359180d25 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -1922,6 +1922,7 @@ typedef struct TidRangeScanState ItemPointerData trss_maxtid; bool trss_inScan; Size trss_pscanlen; + struct SharedTidRangeScanInstrumentation *trss_sinstrument; } TidRangeScanState; /* ---------------- diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index f323b9d758b..2dfe1b38826 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -2867,6 +2867,7 @@ SharedRecordTableKey SharedRecordTypmodRegistry SharedSeqScanInstrumentation SharedSortInfo +SharedTidRangeScanInstrumentation SharedTuplestore SharedTuplestoreAccessor SharedTuplestoreChunk @@ -3171,6 +3172,7 @@ TidOpExpr TidPath TidRangePath TidRangeScan +TidRangeScanInstrumentation TidRangeScanState TidScan TidScanState