]> git.ipfire.org Git - thirdparty/postgresql.git/commitdiff
instrumentation: Allocate query level instrumentation in ExecutorStart
authorAndres Freund <andres@anarazel.de>
Wed, 8 Apr 2026 04:02:26 +0000 (00:02 -0400)
committerAndres Freund <andres@anarazel.de>
Wed, 8 Apr 2026 04:06:45 +0000 (00:06 -0400)
Until now extensions that wanted to measure overall query execution could
create QueryDesc->totaltime, which the core executor would then start and
stop.  That's a bit odd and composes badly, e.g. extensions always had to use
INSTRUMENT_ALL, because otherwise another extension might not get what they
need.

Instead this introduces a new field, QueryDesc->query_instr_options, that
extensions can use to indicate whether they need query level instrumentation
populated, and with which instrumentation options. Extensions should take care
to only add options they need, instead of replacing the options of others.

The prior name of the field, totaltime, sounded like it would only measure
time, but these days the instrumentation infrastructure can track more
resources.  The secondary benefit is that this will make it obvious to
extensions that they may not create the Instrumentation struct themselves
anymore (often extensions build only against a postgres build without
assertions).

Adjust pg_stat_statements and auto_explain to match, and lower the
requested instrumentation level for auto_explain to INSTRUMENT_TIMER,
since the summary instrumentation it needs is only runtime.

The reason to push this now, rather in the PG 20 cycle, is that 5a79e78501f
already required extensions using query level instrumentations to adjust their
code, and it seemed undesirable to require them to do so again for 20.

Author: Lukas Fittl <lukas@fittl.com>
Reviewed-by: Andres Freund <andres@anarazel.de>
Discussion: https://postgr.es/m/CAP53Pkyqsht+exJQYRsjhSWYKu+vFGHhPub7m6PmFD6Or0=p1g@mail.gmail.com

contrib/auto_explain/auto_explain.c
contrib/pg_stat_statements/pg_stat_statements.c
src/backend/executor/execMain.c
src/backend/tcop/pquery.c
src/include/executor/execdesc.h

index 9114ced45a7e4dd09f4ad3ac9f8273705b1af491..97ce498d0c1a055a8eaa135264c7bc4c12f225ec 100644 (file)
@@ -346,6 +346,9 @@ explain_ExecutorStart(QueryDesc *queryDesc, int eflags)
 
        if (auto_explain_enabled())
        {
+               /* We're always interested in runtime */
+               queryDesc->query_instr_options |= INSTRUMENT_TIMER;
+
                /* Enable per-node instrumentation iff log_analyze is required. */
                if (auto_explain_log_analyze && (eflags & EXEC_FLAG_EXPLAIN_ONLY) == 0)
                {
@@ -366,23 +369,6 @@ explain_ExecutorStart(QueryDesc *queryDesc, int eflags)
                prev_ExecutorStart(queryDesc, eflags);
        else
                standard_ExecutorStart(queryDesc, eflags);
-
-       if (auto_explain_enabled())
-       {
-               /*
-                * Set up to track total elapsed time in ExecutorRun.  Make sure the
-                * space is allocated in the per-query context so it will go away at
-                * ExecutorEnd.
-                */
-               if (queryDesc->totaltime == NULL)
-               {
-                       MemoryContext oldcxt;
-
-                       oldcxt = MemoryContextSwitchTo(queryDesc->estate->es_query_cxt);
-                       queryDesc->totaltime = InstrAlloc(INSTRUMENT_ALL);
-                       MemoryContextSwitchTo(oldcxt);
-               }
-       }
 }
 
 /*
@@ -434,7 +420,7 @@ explain_ExecutorFinish(QueryDesc *queryDesc)
 static void
 explain_ExecutorEnd(QueryDesc *queryDesc)
 {
-       if (queryDesc->totaltime && auto_explain_enabled())
+       if (queryDesc->query_instr && auto_explain_enabled())
        {
                MemoryContext oldcxt;
                double          msec;
@@ -446,7 +432,7 @@ explain_ExecutorEnd(QueryDesc *queryDesc)
                oldcxt = MemoryContextSwitchTo(queryDesc->estate->es_query_cxt);
 
                /* Log plan if duration is exceeded. */
-               msec = INSTR_TIME_GET_MILLISEC(queryDesc->totaltime->total);
+               msec = INSTR_TIME_GET_MILLISEC(queryDesc->query_instr->total);
                if (msec >= auto_explain_log_min_duration)
                {
                        ExplainState *es = NewExplainState();
index b5000bc14b78e965ab8899efbdfa9a6d8aae6cfa..95a5411a39d954b325352d0a32ee758a62dfb53d 100644 (file)
@@ -993,11 +993,6 @@ pgss_planner(Query *parse,
 static void
 pgss_ExecutorStart(QueryDesc *queryDesc, int eflags)
 {
-       if (prev_ExecutorStart)
-               prev_ExecutorStart(queryDesc, eflags);
-       else
-               standard_ExecutorStart(queryDesc, eflags);
-
        /*
         * If query has queryId zero, don't track it.  This prevents double
         * counting of optimizable statements that are directly contained in
@@ -1005,20 +1000,14 @@ pgss_ExecutorStart(QueryDesc *queryDesc, int eflags)
         */
        if (pgss_enabled(nesting_level) && queryDesc->plannedstmt->queryId != INT64CONST(0))
        {
-               /*
-                * Set up to track total elapsed time in ExecutorRun.  Make sure the
-                * space is allocated in the per-query context so it will go away at
-                * ExecutorEnd.
-                */
-               if (queryDesc->totaltime == NULL)
-               {
-                       MemoryContext oldcxt;
-
-                       oldcxt = MemoryContextSwitchTo(queryDesc->estate->es_query_cxt);
-                       queryDesc->totaltime = InstrAlloc(INSTRUMENT_ALL);
-                       MemoryContextSwitchTo(oldcxt);
-               }
+               /* Request all summary instrumentation, i.e. timing, buffers and WAL */
+               queryDesc->query_instr_options |= INSTRUMENT_ALL;
        }
+
+       if (prev_ExecutorStart)
+               prev_ExecutorStart(queryDesc, eflags);
+       else
+               standard_ExecutorStart(queryDesc, eflags);
 }
 
 /*
@@ -1071,7 +1060,7 @@ pgss_ExecutorEnd(QueryDesc *queryDesc)
 {
        int64           queryId = queryDesc->plannedstmt->queryId;
 
-       if (queryId != INT64CONST(0) && queryDesc->totaltime &&
+       if (queryId != INT64CONST(0) && queryDesc->query_instr &&
                pgss_enabled(nesting_level))
        {
                pgss_store(queryDesc->sourceText,
@@ -1079,10 +1068,10 @@ pgss_ExecutorEnd(QueryDesc *queryDesc)
                                   queryDesc->plannedstmt->stmt_location,
                                   queryDesc->plannedstmt->stmt_len,
                                   PGSS_EXEC,
-                                  INSTR_TIME_GET_MILLISEC(queryDesc->totaltime->total),
+                                  INSTR_TIME_GET_MILLISEC(queryDesc->query_instr->total),
                                   queryDesc->estate->es_total_processed,
-                                  &queryDesc->totaltime->bufusage,
-                                  &queryDesc->totaltime->walusage,
+                                  &queryDesc->query_instr->bufusage,
+                                  &queryDesc->query_instr->walusage,
                                   queryDesc->estate->es_jit ? &queryDesc->estate->es_jit->instr : NULL,
                                   NULL,
                                   queryDesc->estate->es_parallel_workers_to_launch,
index b0f636bf8b6c2c8713400f2684b473a3be94ac85..4b30f7686801a3e9ce75fe17a3bf0a7605f08e49 100644 (file)
@@ -250,6 +250,15 @@ standard_ExecutorStart(QueryDesc *queryDesc, int eflags)
        estate->es_instrument = queryDesc->instrument_options;
        estate->es_jit_flags = queryDesc->plannedstmt->jitFlags;
 
+       /*
+        * Set up query-level instrumentation if extensions have requested it via
+        * query_instr_options. Ensure an extension has not allocated query_instr
+        * itself.
+        */
+       Assert(queryDesc->query_instr == NULL);
+       if (queryDesc->query_instr_options)
+               queryDesc->query_instr = InstrAlloc(queryDesc->query_instr_options);
+
        /*
         * Set up an AFTER-trigger statement context, unless told not to, or
         * unless it's EXPLAIN-only mode (when ExecutorFinish won't be called).
@@ -332,8 +341,8 @@ standard_ExecutorRun(QueryDesc *queryDesc,
        oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);
 
        /* Allow instrumentation of Executor overall runtime */
-       if (queryDesc->totaltime)
-               InstrStart(queryDesc->totaltime);
+       if (queryDesc->query_instr)
+               InstrStart(queryDesc->query_instr);
 
        /*
         * extract information from the query descriptor and the query feature.
@@ -384,8 +393,8 @@ standard_ExecutorRun(QueryDesc *queryDesc,
        if (sendTuples)
                dest->rShutdown(dest);
 
-       if (queryDesc->totaltime)
-               InstrStop(queryDesc->totaltime);
+       if (queryDesc->query_instr)
+               InstrStop(queryDesc->query_instr);
 
        MemoryContextSwitchTo(oldcontext);
 }
@@ -434,8 +443,8 @@ standard_ExecutorFinish(QueryDesc *queryDesc)
        oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);
 
        /* Allow instrumentation of Executor overall runtime */
-       if (queryDesc->totaltime)
-               InstrStart(queryDesc->totaltime);
+       if (queryDesc->query_instr)
+               InstrStart(queryDesc->query_instr);
 
        /* Run ModifyTable nodes to completion */
        ExecPostprocessPlan(estate);
@@ -444,8 +453,8 @@ standard_ExecutorFinish(QueryDesc *queryDesc)
        if (!(estate->es_top_eflags & EXEC_FLAG_SKIP_TRIGGERS))
                AfterTriggerEndQuery(estate);
 
-       if (queryDesc->totaltime)
-               InstrStop(queryDesc->totaltime);
+       if (queryDesc->query_instr)
+               InstrStop(queryDesc->query_instr);
 
        MemoryContextSwitchTo(oldcontext);
 
@@ -524,7 +533,7 @@ standard_ExecutorEnd(QueryDesc *queryDesc)
        queryDesc->tupDesc = NULL;
        queryDesc->estate = NULL;
        queryDesc->planstate = NULL;
-       queryDesc->totaltime = NULL;
+       queryDesc->query_instr = NULL;
 }
 
 /* ----------------------------------------------------------------
index d8fc75d0bb99570f99655cb69b4ebb2a481ada8e..ee731000820203d5ad4ea6f620eda5293a4caec0 100644 (file)
@@ -86,12 +86,13 @@ CreateQueryDesc(PlannedStmt *plannedstmt,
        qd->params = params;            /* parameter values passed into query */
        qd->queryEnv = queryEnv;
        qd->instrument_options = instrument_options;    /* instrumentation wanted? */
+       qd->query_instr_options = 0;
 
        /* null these fields until set by ExecutorStart */
        qd->tupDesc = NULL;
        qd->estate = NULL;
        qd->planstate = NULL;
-       qd->totaltime = NULL;
+       qd->query_instr = NULL;
 
        /* not yet executed */
        qd->already_executed = false;
index d3a572428449d8a2b6ee806de5cc8f55adc2e5e1..37c2576e4bc9b0db45a7cd920f4d72d5a4de6bc2 100644 (file)
@@ -42,6 +42,8 @@ typedef struct QueryDesc
        ParamListInfo params;           /* param values being passed in */
        QueryEnvironment *queryEnv; /* query environment passed in */
        int                     instrument_options; /* OR of InstrumentOption flags */
+       int                     query_instr_options;    /* OR of InstrumentOption flags for
+                                                                                * query_instr */
 
        /* These fields are set by ExecutorStart */
        TupleDesc       tupDesc;                /* descriptor for result tuples */
@@ -51,8 +53,8 @@ typedef struct QueryDesc
        /* This field is set by ExecutePlan */
        bool            already_executed;       /* true if previously executed */
 
-       /* This is always set NULL by the core system, but plugins can change it */
-       struct Instrumentation *totaltime;      /* total time spent in ExecutorRun */
+       /* This field is allocated by ExecutorStart if needed */
+       struct Instrumentation *query_instr;    /* query level instrumentation */
 } QueryDesc;
 
 /* in pquery.c */