]> git.ipfire.org Git - thirdparty/postgresql.git/commitdiff
Add EXPLAIN (IO) instrumentation for TidRangeScan
authorTomas Vondra <tomas.vondra@postgresql.org>
Tue, 7 Apr 2026 21:25:01 +0000 (23:25 +0200)
committerTomas Vondra <tomas.vondra@postgresql.org>
Tue, 7 Apr 2026 21:25:05 +0000 (23:25 +0200)
Adds support for EXPLAIN (IO) instrumentation for TidRange scans. This
requires adding shared instrumentation for parallel scans, using the
separate DSM approach introduced by dd78e69cfc33.

Author: Tomas Vondra <tomas@vondra.me>
Reviewed-by: Melanie Plageman <melanieplageman@gmail.com>
Reviewed-by: Lukas Fittl <lukas@fittl.com>
Reviewed-by: Andres Freund <andres@anarazel.de>
Discussion: https://postgr.es/m/flat/a177a6dd-240b-455a-8f25-aca0b1c08c6e%40vondra.me

src/backend/commands/explain.c
src/backend/executor/execParallel.c
src/backend/executor/nodeTidrangescan.c
src/include/executor/instrument_node.h
src/include/executor/nodeTidrangescan.h
src/include/nodes/execnodes.h
src/tools/pgindent/typedefs.list

index af32f09b3a47ecaa5a4d0bb1cc5e399197c1bd96..112c17b0d64284682ee4ad2d86e64916b2446c14 100644 (file)
@@ -2149,6 +2149,7 @@ ExplainNode(PlanState *planstate, List *ancestors,
                                if (plan->qual)
                                        show_instrumentation_count("Rows Removed by Filter", 1,
                                                                                           planstate, es);
+                               show_scan_io_usage((ScanState *) planstate, es);
                        }
                        break;
                case T_ForeignScan:
@@ -4127,6 +4128,30 @@ show_scan_io_usage(ScanState *planstate, ExplainState *es)
                                        }
                                }
 
+                               break;
+                       }
+               case T_TidRangeScan:
+                       {
+                               SharedTidRangeScanInstrumentation *sinstrument
+                               = ((TidRangeScanState *) planstate)->trss_sinstrument;
+
+                               if (sinstrument)
+                               {
+                                       for (int i = 0; i < sinstrument->num_workers; ++i)
+                                       {
+                                               TidRangeScanInstrumentation *winstrument = &sinstrument->sinstrument[i];
+
+                                               AccumulateIOStats(&stats, &winstrument->stats.io);
+
+                                               if (!es->workers_state)
+                                                       continue;
+
+                                               ExplainOpenWorker(i, es);
+                                               print_io_usage(es, &winstrument->stats.io);
+                                               ExplainCloseWorker(i, es);
+                                       }
+                               }
+
                                break;
                        }
                default:
index 9690f0938ae7a872ad9342d46820911cc46477c4..81b87d82fab47f077b38898fe740a073a5219732 100644 (file)
@@ -291,6 +291,9 @@ ExecParallelEstimate(PlanState *planstate, ExecParallelEstimateContext *e)
                        if (planstate->plan->parallel_aware)
                                ExecTidRangeScanEstimate((TidRangeScanState *) planstate,
                                                                                 e->pcxt);
+                       /* even when not parallel-aware, for EXPLAIN ANALYZE */
+                       ExecTidRangeScanInstrumentEstimate((TidRangeScanState *) planstate,
+                                                                                          e->pcxt);
                        break;
                case T_AppendState:
                        if (planstate->plan->parallel_aware)
@@ -536,6 +539,9 @@ ExecParallelInitializeDSM(PlanState *planstate,
                        if (planstate->plan->parallel_aware)
                                ExecTidRangeScanInitializeDSM((TidRangeScanState *) planstate,
                                                                                          d->pcxt);
+                       /* even when not parallel-aware, for EXPLAIN ANALYZE */
+                       ExecTidRangeScanInstrumentInitDSM((TidRangeScanState *) planstate,
+                                                                                         d->pcxt);
                        break;
                case T_AppendState:
                        if (planstate->plan->parallel_aware)
@@ -1157,6 +1163,9 @@ ExecParallelRetrieveInstrumentation(PlanState *planstate,
                case T_SeqScanState:
                        ExecSeqScanRetrieveInstrumentation((SeqScanState *) planstate);
                        break;
+               case T_TidRangeScanState:
+                       ExecTidRangeScanRetrieveInstrumentation((TidRangeScanState *) planstate);
+                       break;
                default:
                        break;
        }
@@ -1430,6 +1439,9 @@ ExecParallelInitializeWorker(PlanState *planstate, ParallelWorkerContext *pwcxt)
                        if (planstate->plan->parallel_aware)
                                ExecTidRangeScanInitializeWorker((TidRangeScanState *) planstate,
                                                                                                 pwcxt);
+                       /* even when not parallel-aware, for EXPLAIN ANALYZE */
+                       ExecTidRangeScanInstrumentInitWorker((TidRangeScanState *) planstate,
+                                                                                                pwcxt);
                        break;
                case T_AppendState:
                        if (planstate->plan->parallel_aware)
index 4a8fe91b2b342b4533c6495f7bf580af24c35a97..b387ed6c308369c3ccee2680357568defaf88fc3 100644 (file)
@@ -18,7 +18,9 @@
 #include "access/sysattr.h"
 #include "access/tableam.h"
 #include "catalog/pg_operator.h"
+#include "executor/execParallel.h"
 #include "executor/executor.h"
+#include "executor/instrument.h"
 #include "executor/nodeTidrangescan.h"
 #include "nodes/nodeFuncs.h"
 #include "utils/rel.h"
@@ -242,12 +244,19 @@ TidRangeNext(TidRangeScanState *node)
 
                if (scandesc == NULL)
                {
+                       uint32          flags = SO_NONE;
+
+                       if (ScanRelIsReadOnly(&node->ss))
+                               flags |= SO_HINT_REL_READ_ONLY;
+
+                       if (estate->es_instrument & INSTRUMENT_IO)
+                               flags |= SO_SCAN_INSTRUMENT;
+
                        scandesc = table_beginscan_tidrange(node->ss.ss_currentRelation,
                                                                                                estate->es_snapshot,
                                                                                                &node->trss_mintid,
                                                                                                &node->trss_maxtid,
-                                                                                               ScanRelIsReadOnly(&node->ss) ?
-                                                                                               SO_HINT_REL_READ_ONLY : SO_NONE);
+                                                                                               flags);
                        node->ss.ss_currentScanDesc = scandesc;
                }
                else
@@ -342,6 +351,20 @@ ExecEndTidRangeScan(TidRangeScanState *node)
 {
        TableScanDesc scan = node->ss.ss_currentScanDesc;
 
+       /* Collect IO stats for this process into shared instrumentation */
+       if (node->trss_sinstrument != NULL && IsParallelWorker())
+       {
+               TidRangeScanInstrumentation *si;
+
+               Assert(ParallelWorkerNumber < node->trss_sinstrument->num_workers);
+               si = &node->trss_sinstrument->sinstrument[ParallelWorkerNumber];
+
+               if (scan && scan->rs_instrument)
+               {
+                       AccumulateIOStats(&si->stats.io, &scan->rs_instrument->io);
+               }
+       }
+
        if (scan != NULL)
                table_endscan(scan);
 }
@@ -454,6 +477,13 @@ ExecTidRangeScanInitializeDSM(TidRangeScanState *node, ParallelContext *pcxt)
 {
        EState     *estate = node->ss.ps.state;
        ParallelTableScanDesc pscan;
+       uint32          flags = SO_NONE;
+
+       if (ScanRelIsReadOnly(&node->ss))
+               flags |= SO_HINT_REL_READ_ONLY;
+
+       if (estate->es_instrument & INSTRUMENT_IO)
+               flags |= SO_SCAN_INSTRUMENT;
 
        pscan = shm_toc_allocate(pcxt->toc, node->trss_pscanlen);
        table_parallelscan_initialize(node->ss.ss_currentRelation,
@@ -462,9 +492,7 @@ ExecTidRangeScanInitializeDSM(TidRangeScanState *node, ParallelContext *pcxt)
        shm_toc_insert(pcxt->toc, node->ss.ps.plan->plan_node_id, pscan);
        node->ss.ss_currentScanDesc =
                table_beginscan_parallel_tidrange(node->ss.ss_currentRelation,
-                                                                                 pscan,
-                                                                                 ScanRelIsReadOnly(&node->ss) ?
-                                                                                 SO_HINT_REL_READ_ONLY : SO_NONE);
+                                                                                 pscan, flags);
 }
 
 /* ----------------------------------------------------------------
@@ -494,11 +522,100 @@ ExecTidRangeScanInitializeWorker(TidRangeScanState *node,
                                                                 ParallelWorkerContext *pwcxt)
 {
        ParallelTableScanDesc pscan;
+       uint32          flags = SO_NONE;
+
+       if (ScanRelIsReadOnly(&node->ss))
+               flags |= SO_HINT_REL_READ_ONLY;
+
+       if (node->ss.ps.state->es_instrument & INSTRUMENT_IO)
+               flags |= SO_SCAN_INSTRUMENT;
 
        pscan = shm_toc_lookup(pwcxt->toc, node->ss.ps.plan->plan_node_id, false);
        node->ss.ss_currentScanDesc =
                table_beginscan_parallel_tidrange(node->ss.ss_currentRelation,
-                                                                                 pscan,
-                                                                                 ScanRelIsReadOnly(&node->ss) ?
-                                                                                 SO_HINT_REL_READ_ONLY : SO_NONE);
+                                                                                 pscan, flags);
+}
+
+/*
+ * Compute the amount of space we'll need for the shared instrumentation and
+ * inform pcxt->estimator.
+ */
+void
+ExecTidRangeScanInstrumentEstimate(TidRangeScanState *node,
+                                                                  ParallelContext *pcxt)
+{
+       EState     *estate = node->ss.ps.state;
+       Size            size;
+
+       if ((estate->es_instrument & INSTRUMENT_IO) == 0 || pcxt->nworkers == 0)
+               return;
+
+       size = add_size(offsetof(SharedTidRangeScanInstrumentation, sinstrument),
+                                       mul_size(pcxt->nworkers, sizeof(TidRangeScanInstrumentation)));
+
+       shm_toc_estimate_chunk(&pcxt->estimator, size);
+       shm_toc_estimate_keys(&pcxt->estimator, 1);
+}
+
+/*
+ * Set up parallel scan instrumentation.
+ */
+void
+ExecTidRangeScanInstrumentInitDSM(TidRangeScanState *node,
+                                                                 ParallelContext *pcxt)
+{
+       EState     *estate = node->ss.ps.state;
+       SharedTidRangeScanInstrumentation *sinstrument;
+       Size            size;
+
+       if ((estate->es_instrument & INSTRUMENT_IO) == 0 || pcxt->nworkers == 0)
+               return;
+
+       size = add_size(offsetof(SharedTidRangeScanInstrumentation, sinstrument),
+                                       mul_size(pcxt->nworkers, sizeof(TidRangeScanInstrumentation)));
+       sinstrument = shm_toc_allocate(pcxt->toc, size);
+       memset(sinstrument, 0, size);
+       sinstrument->num_workers = pcxt->nworkers;
+       shm_toc_insert(pcxt->toc,
+                                  node->ss.ps.plan->plan_node_id +
+                                  PARALLEL_KEY_SCAN_INSTRUMENT_OFFSET,
+                                  sinstrument);
+       node->trss_sinstrument = sinstrument;
+}
+
+/*
+ * Look up and save the location of the shared instrumentation.
+ */
+void
+ExecTidRangeScanInstrumentInitWorker(TidRangeScanState *node,
+                                                                        ParallelWorkerContext *pwcxt)
+{
+       EState     *estate = node->ss.ps.state;
+
+       if ((estate->es_instrument & INSTRUMENT_IO) == 0)
+               return;
+
+       node->trss_sinstrument = shm_toc_lookup(pwcxt->toc,
+                                                                                       node->ss.ps.plan->plan_node_id +
+                                                                                       PARALLEL_KEY_SCAN_INSTRUMENT_OFFSET,
+                                                                                       false);
+}
+
+/*
+ * Transfer scan instrumentation from DSM to private memory.
+ */
+void
+ExecTidRangeScanRetrieveInstrumentation(TidRangeScanState *node)
+{
+       SharedTidRangeScanInstrumentation *sinstrument = node->trss_sinstrument;
+       Size            size;
+
+       if (sinstrument == NULL)
+               return;
+
+       size = offsetof(SharedTidRangeScanInstrumentation, sinstrument)
+               + sinstrument->num_workers * sizeof(TidRangeScanInstrumentation);
+
+       node->trss_sinstrument = palloc(size);
+       memcpy(node->trss_sinstrument, sinstrument, size);
 }
index 003dc262b5d83ef433b3eeaf029b3f39aabe0298..4076990408ef752d84ce8ddd42c52d6f73b1a5c9 100644 (file)
@@ -285,4 +285,22 @@ typedef struct SharedSeqScanInstrumentation
        SeqScanInstrumentation sinstrument[FLEXIBLE_ARRAY_MEMBER];
 } SharedSeqScanInstrumentation;
 
+
+/*
+ *     Instrumentation information for TID range scans
+ */
+typedef struct TidRangeScanInstrumentation
+{
+       TableScanInstrumentation stats;
+} TidRangeScanInstrumentation;
+
+/*
+ * Shared memory container for per-worker information
+ */
+typedef struct SharedTidRangeScanInstrumentation
+{
+       int                     num_workers;
+       TidRangeScanInstrumentation sinstrument[FLEXIBLE_ARRAY_MEMBER];
+} SharedTidRangeScanInstrumentation;
+
 #endif                                                 /* INSTRUMENT_NODE_H */
index 8752d1ea8c40d436fb5567e225b891a3ea8b4da6..9e7d0a357bbd02c967b8026d8ca1b5f4d43bf2c0 100644 (file)
@@ -28,4 +28,13 @@ extern void ExecTidRangeScanInitializeDSM(TidRangeScanState *node, ParallelConte
 extern void ExecTidRangeScanReInitializeDSM(TidRangeScanState *node, ParallelContext *pcxt);
 extern void ExecTidRangeScanInitializeWorker(TidRangeScanState *node, ParallelWorkerContext *pwcxt);
 
+/* instrument support */
+extern void ExecTidRangeScanInstrumentEstimate(TidRangeScanState *node,
+                                                                                          ParallelContext *pcxt);
+extern void ExecTidRangeScanInstrumentInitDSM(TidRangeScanState *node,
+                                                                                         ParallelContext *pcxt);
+extern void ExecTidRangeScanInstrumentInitWorker(TidRangeScanState *node,
+                                                                                                ParallelWorkerContext *pwcxt);
+extern void ExecTidRangeScanRetrieveInstrumentation(TidRangeScanState *node);
+
 #endif                                                 /* NODETIDRANGESCAN_H */
index 56febb3204c96f3febebdde9b9e8194978f0be48..13359180d256a84bf1ecff91d70784fa415ca958 100644 (file)
@@ -1922,6 +1922,7 @@ typedef struct TidRangeScanState
        ItemPointerData trss_maxtid;
        bool            trss_inScan;
        Size            trss_pscanlen;
+       struct SharedTidRangeScanInstrumentation *trss_sinstrument;
 } TidRangeScanState;
 
 /* ----------------
index f323b9d758be6b7907cad0085c3b3f2d6f838736..2dfe1b38826e87cd88752051cdee3a493ec30f40 100644 (file)
@@ -2867,6 +2867,7 @@ SharedRecordTableKey
 SharedRecordTypmodRegistry
 SharedSeqScanInstrumentation
 SharedSortInfo
+SharedTidRangeScanInstrumentation
 SharedTuplestore
 SharedTuplestoreAccessor
 SharedTuplestoreChunk
@@ -3171,6 +3172,7 @@ TidOpExpr
 TidPath
 TidRangePath
 TidRangeScan
+TidRangeScanInstrumentation
 TidRangeScanState
 TidScan
 TidScanState