From: Tomas Vondra Date: Tue, 7 Apr 2026 21:06:43 +0000 (+0200) Subject: Add EXPLAIN (IO) instrumentation for SeqScan X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=3b1117d6e2e47d86cdbd978b79434c630cb0ef52;p=thirdparty%2Fpostgresql.git Add EXPLAIN (IO) instrumentation for SeqScan Adds support for EXPLAIN (IO) instrumentation for sequential scans. This requires adding shared instrumentation, using the separate DSM approach introduced by dd78e69cfc33. Author: Tomas Vondra Reviewed-by: Melanie Plageman Reviewed-by: Lukas Fittl Reviewed-by: Andres Freund Discussion: https://postgr.es/m/flat/a177a6dd-240b-455a-8f25-aca0b1c08c6e%40vondra.me --- diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c index 20e0bc8f232..af32f09b3a4 100644 --- a/src/backend/commands/explain.c +++ b/src/backend/commands/explain.c @@ -2032,6 +2032,7 @@ ExplainNode(PlanState *planstate, List *ancestors, planstate, es); if (IsA(plan, CteScan)) show_ctescan_info(castNode(CteScanState, planstate), es); + show_scan_io_usage((ScanState *) planstate, es); break; case T_Gather: { @@ -4102,6 +4103,30 @@ show_scan_io_usage(ScanState *planstate, ExplainState *es) } } + break; + } + case T_SeqScan: + { + SharedSeqScanInstrumentation *sinstrument + = ((SeqScanState *) planstate)->sinstrument; + + if (sinstrument) + { + for (int i = 0; i < sinstrument->num_workers; ++i) + { + SeqScanInstrumentation *winstrument = &sinstrument->sinstrument[i]; + + AccumulateIOStats(&stats, &winstrument->stats.io); + + if (!es->workers_state) + continue; + + ExplainOpenWorker(i, es); + print_io_usage(es, &winstrument->stats.io); + ExplainCloseWorker(i, es); + } + } + break; } default: diff --git a/src/backend/executor/execParallel.c b/src/backend/executor/execParallel.c index 1a5ec0c305f..9690f0938ae 100644 --- a/src/backend/executor/execParallel.c +++ b/src/backend/executor/execParallel.c @@ -257,6 +257,9 @@ ExecParallelEstimate(PlanState *planstate, ExecParallelEstimateContext *e) if (planstate->plan->parallel_aware) ExecSeqScanEstimate((SeqScanState *) planstate, e->pcxt); + /* even when not parallel-aware, for EXPLAIN ANALYZE */ + ExecSeqScanInstrumentEstimate((SeqScanState *) planstate, + e->pcxt); break; case T_IndexScanState: if (planstate->plan->parallel_aware) @@ -500,6 +503,9 @@ ExecParallelInitializeDSM(PlanState *planstate, if (planstate->plan->parallel_aware) ExecSeqScanInitializeDSM((SeqScanState *) planstate, d->pcxt); + /* even when not parallel-aware, for EXPLAIN ANALYZE */ + ExecSeqScanInstrumentInitDSM((SeqScanState *) planstate, + d->pcxt); break; case T_IndexScanState: if (planstate->plan->parallel_aware) @@ -1148,6 +1154,9 @@ ExecParallelRetrieveInstrumentation(PlanState *planstate, case T_BitmapHeapScanState: ExecBitmapHeapRetrieveInstrumentation((BitmapHeapScanState *) planstate); break; + case T_SeqScanState: + ExecSeqScanRetrieveInstrumentation((SeqScanState *) planstate); + break; default: break; } @@ -1388,6 +1397,8 @@ ExecParallelInitializeWorker(PlanState *planstate, ParallelWorkerContext *pwcxt) case T_SeqScanState: if (planstate->plan->parallel_aware) ExecSeqScanInitializeWorker((SeqScanState *) planstate, pwcxt); + /* even when not parallel-aware, for EXPLAIN ANALYZE */ + ExecSeqScanInstrumentInitWorker((SeqScanState *) planstate, pwcxt); break; case T_IndexScanState: if (planstate->plan->parallel_aware) diff --git a/src/backend/executor/nodeSeqscan.c b/src/backend/executor/nodeSeqscan.c index 04803b0e37d..5bcb0a861d7 100644 --- a/src/backend/executor/nodeSeqscan.c +++ b/src/backend/executor/nodeSeqscan.c @@ -29,6 +29,7 @@ #include "access/relscan.h" #include "access/tableam.h" +#include "executor/execParallel.h" #include "executor/execScan.h" #include "executor/executor.h" #include "executor/nodeSeqscan.h" @@ -65,15 +66,21 @@ SeqNext(SeqScanState *node) if (scandesc == NULL) { + uint32 flags = SO_NONE; + + if (ScanRelIsReadOnly(&node->ss)) + flags |= SO_HINT_REL_READ_ONLY; + + if (estate->es_instrument & INSTRUMENT_IO) + flags |= SO_SCAN_INSTRUMENT; + /* * We reach here if the scan is not parallel, or if we're serially * executing a scan that was planned to be parallel. */ scandesc = table_beginscan(node->ss.ss_currentRelation, estate->es_snapshot, - 0, NULL, - ScanRelIsReadOnly(&node->ss) ? - SO_HINT_REL_READ_ONLY : SO_NONE); + 0, NULL, flags); node->ss.ss_currentScanDesc = scandesc; } @@ -302,6 +309,22 @@ ExecEndSeqScan(SeqScanState *node) */ scanDesc = node->ss.ss_currentScanDesc; + /* + * Collect I/O stats for this process into shared instrumentation. + */ + if (node->sinstrument != NULL && IsParallelWorker()) + { + SeqScanInstrumentation *si; + + Assert(ParallelWorkerNumber < node->sinstrument->num_workers); + si = &node->sinstrument->sinstrument[ParallelWorkerNumber]; + + if (scanDesc && scanDesc->rs_instrument) + { + AccumulateIOStats(&si->stats.io, &scanDesc->rs_instrument->io); + } + } + /* * close heap scan */ @@ -370,6 +393,13 @@ ExecSeqScanInitializeDSM(SeqScanState *node, { EState *estate = node->ss.ps.state; ParallelTableScanDesc pscan; + uint32 flags = SO_NONE; + + if (ScanRelIsReadOnly(&node->ss)) + flags |= SO_HINT_REL_READ_ONLY; + + if (estate->es_instrument & INSTRUMENT_IO) + flags |= SO_SCAN_INSTRUMENT; pscan = shm_toc_allocate(pcxt->toc, node->pscan_len); table_parallelscan_initialize(node->ss.ss_currentRelation, @@ -378,9 +408,7 @@ ExecSeqScanInitializeDSM(SeqScanState *node, shm_toc_insert(pcxt->toc, node->ss.ps.plan->plan_node_id, pscan); node->ss.ss_currentScanDesc = - table_beginscan_parallel(node->ss.ss_currentRelation, pscan, - ScanRelIsReadOnly(&node->ss) ? - SO_HINT_REL_READ_ONLY : SO_NONE); + table_beginscan_parallel(node->ss.ss_currentRelation, pscan, flags); } /* ---------------------------------------------------------------- @@ -410,10 +438,97 @@ ExecSeqScanInitializeWorker(SeqScanState *node, ParallelWorkerContext *pwcxt) { ParallelTableScanDesc pscan; + uint32 flags = SO_NONE; + + if (ScanRelIsReadOnly(&node->ss)) + flags |= SO_HINT_REL_READ_ONLY; + + if (node->ss.ps.state->es_instrument & INSTRUMENT_IO) + flags |= SO_SCAN_INSTRUMENT; pscan = shm_toc_lookup(pwcxt->toc, node->ss.ps.plan->plan_node_id, false); node->ss.ss_currentScanDesc = - table_beginscan_parallel(node->ss.ss_currentRelation, pscan, - ScanRelIsReadOnly(&node->ss) ? - SO_HINT_REL_READ_ONLY : SO_NONE); + table_beginscan_parallel(node->ss.ss_currentRelation, pscan, flags); +} + +/* + * Compute the amount of space we'll need for the shared instrumentation and + * inform pcxt->estimator. + */ +void +ExecSeqScanInstrumentEstimate(SeqScanState *node, ParallelContext *pcxt) +{ + EState *estate = node->ss.ps.state; + Size size; + + if ((estate->es_instrument & INSTRUMENT_IO) == 0 || pcxt->nworkers == 0) + return; + + size = add_size(offsetof(SharedSeqScanInstrumentation, sinstrument), + mul_size(pcxt->nworkers, sizeof(SeqScanInstrumentation))); + + shm_toc_estimate_chunk(&pcxt->estimator, size); + shm_toc_estimate_keys(&pcxt->estimator, 1); +} + +/* + * Set up parallel sequential scan instrumentation. + */ +void +ExecSeqScanInstrumentInitDSM(SeqScanState *node, ParallelContext *pcxt) +{ + EState *estate = node->ss.ps.state; + SharedSeqScanInstrumentation *sinstrument; + Size size; + + if ((estate->es_instrument & INSTRUMENT_IO) == 0 || pcxt->nworkers == 0) + return; + + size = add_size(offsetof(SharedSeqScanInstrumentation, sinstrument), + mul_size(pcxt->nworkers, sizeof(SeqScanInstrumentation))); + sinstrument = shm_toc_allocate(pcxt->toc, size); + memset(sinstrument, 0, size); + sinstrument->num_workers = pcxt->nworkers; + shm_toc_insert(pcxt->toc, + node->ss.ps.plan->plan_node_id + + PARALLEL_KEY_SCAN_INSTRUMENT_OFFSET, + sinstrument); + node->sinstrument = sinstrument; +} + +/* + * Look up and save the location of the shared instrumentation. + */ +void +ExecSeqScanInstrumentInitWorker(SeqScanState *node, + ParallelWorkerContext *pwcxt) +{ + EState *estate = node->ss.ps.state; + + if ((estate->es_instrument & INSTRUMENT_IO) == 0) + return; + + node->sinstrument = shm_toc_lookup(pwcxt->toc, + node->ss.ps.plan->plan_node_id + + PARALLEL_KEY_SCAN_INSTRUMENT_OFFSET, + false); +} + +/* + * Transfer sequential scan instrumentation from DSM to private memory. + */ +void +ExecSeqScanRetrieveInstrumentation(SeqScanState *node) +{ + SharedSeqScanInstrumentation *sinstrument = node->sinstrument; + Size size; + + if (sinstrument == NULL) + return; + + size = offsetof(SharedSeqScanInstrumentation, sinstrument) + + sinstrument->num_workers * sizeof(SeqScanInstrumentation); + + node->sinstrument = palloc(size); + memcpy(node->sinstrument, sinstrument, size); } diff --git a/src/include/executor/instrument_node.h b/src/include/executor/instrument_node.h index 22a75ccd863..003dc262b5d 100644 --- a/src/include/executor/instrument_node.h +++ b/src/include/executor/instrument_node.h @@ -266,4 +266,23 @@ typedef struct SharedIncrementalSortInfo IncrementalSortInfo sinfo[FLEXIBLE_ARRAY_MEMBER]; } SharedIncrementalSortInfo; + +/* --------------------- + * Instrumentation information for sequential scans + * --------------------- + */ +typedef struct SeqScanInstrumentation +{ + TableScanInstrumentation stats; +} SeqScanInstrumentation; + +/* + * Shared memory container for per-worker information + */ +typedef struct SharedSeqScanInstrumentation +{ + int num_workers; + SeqScanInstrumentation sinstrument[FLEXIBLE_ARRAY_MEMBER]; +} SharedSeqScanInstrumentation; + #endif /* INSTRUMENT_NODE_H */ diff --git a/src/include/executor/nodeSeqscan.h b/src/include/executor/nodeSeqscan.h index 7a1490596fb..9c0ad4879d7 100644 --- a/src/include/executor/nodeSeqscan.h +++ b/src/include/executor/nodeSeqscan.h @@ -28,4 +28,13 @@ extern void ExecSeqScanReInitializeDSM(SeqScanState *node, ParallelContext *pcxt extern void ExecSeqScanInitializeWorker(SeqScanState *node, ParallelWorkerContext *pwcxt); +/* instrument support */ +extern void ExecSeqScanInstrumentEstimate(SeqScanState *node, + ParallelContext *pcxt); +extern void ExecSeqScanInstrumentInitDSM(SeqScanState *node, + ParallelContext *pcxt); +extern void ExecSeqScanInstrumentInitWorker(SeqScanState *node, + ParallelWorkerContext *pwcxt); +extern void ExecSeqScanRetrieveInstrumentation(SeqScanState *node); + #endif /* NODESEQSCAN_H */ diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index 3ecae7552fc..56febb3204c 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -1670,6 +1670,7 @@ typedef struct SeqScanState { ScanState ss; /* its first field is NodeTag */ Size pscan_len; /* size of parallel heap scan descriptor */ + struct SharedSeqScanInstrumentation *sinstrument; } SeqScanState; /* ---------------- diff --git a/src/test/regress/expected/explain.out b/src/test/regress/expected/explain.out index dc31c7ce9f9..74a4d87801e 100644 --- a/src/test/regress/expected/explain.out +++ b/src/test/regress/expected/explain.out @@ -100,7 +100,7 @@ select explain_filter('explain (buffers, format text) select * from int8_tbl i8' (1 row) \a -select explain_filter('explain (analyze, buffers, format xml) select * from int8_tbl i8'); +select explain_filter('explain (analyze, buffers, io, format xml) select * from int8_tbl i8'); explain_filter @@ -119,6 +119,13 @@ explain_filter N.N N false + N.N + N + N + N + N + N.N + N.N N N N @@ -149,7 +156,7 @@ explain_filter (1 row) -select explain_filter('explain (analyze, serialize, buffers, format yaml) select * from int8_tbl i8'); +select explain_filter('explain (analyze, serialize, buffers, io, format yaml) select * from int8_tbl i8'); explain_filter - Plan: Node Type: "Seq Scan" @@ -166,6 +173,13 @@ explain_filter Actual Rows: N.N Actual Loops: N Disabled: false + Average Prefetch Distance: N.N + Max Prefetch Distance: N + Prefetch Capacity: N + I/O Count: N + I/O Waits: N + Average I/O Size: N.N + Average I/Os In Progress: N.N Shared Hit Blocks: N Shared Read Blocks: N Shared Dirtied Blocks: N diff --git a/src/test/regress/sql/explain.sql b/src/test/regress/sql/explain.sql index 8f10e1aff55..2f163c64bf6 100644 --- a/src/test/regress/sql/explain.sql +++ b/src/test/regress/sql/explain.sql @@ -69,8 +69,8 @@ select explain_filter('explain (analyze, buffers, format text) select * from int select explain_filter('explain (buffers, format text) select * from int8_tbl i8'); \a -select explain_filter('explain (analyze, buffers, format xml) select * from int8_tbl i8'); -select explain_filter('explain (analyze, serialize, buffers, format yaml) select * from int8_tbl i8'); +select explain_filter('explain (analyze, buffers, io, format xml) select * from int8_tbl i8'); +select explain_filter('explain (analyze, serialize, buffers, io, format yaml) select * from int8_tbl i8'); select explain_filter('explain (buffers, format json) select * from int8_tbl i8'); \a diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index ca54c783647..f323b9d758b 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -2800,6 +2800,7 @@ SelfJoinCandidate SemTPadded SemiAntiJoinFactors SeqScan +SeqScanInstrumentation SeqScanState SeqTable SeqTableData @@ -2864,6 +2865,7 @@ SharedMemoizeInfo SharedRecordTableEntry SharedRecordTableKey SharedRecordTypmodRegistry +SharedSeqScanInstrumentation SharedSortInfo SharedTuplestore SharedTuplestoreAccessor