]> git.ipfire.org Git - thirdparty/postgresql.git/commitdiff
Account for optimized MinMax aggregates during SS_finalize_plan.
authorTom Lane <tgl@sss.pgh.pa.us>
Sat, 18 May 2024 18:31:35 +0000 (14:31 -0400)
committerTom Lane <tgl@sss.pgh.pa.us>
Sat, 18 May 2024 18:31:35 +0000 (14:31 -0400)
We are capable of optimizing MIN() and MAX() aggregates on indexed
columns into subqueries that exploit the index, rather than the normal
thing of scanning the whole table.  When we do this, we replace the
Aggref node(s) with Params referencing subquery outputs.  Such Params
really ought to be included in the per-plan-node extParam/allParam
sets computed by SS_finalize_plan.  However, we've never done so
up to now because of an ancient implementation choice to perform
that substitution during set_plan_references, which runs after
SS_finalize_plan, so that SS_finalize_plan never sees these Params.

The cleanest fix would be to perform a separate tree walk to do
these substitutions before SS_finalize_plan runs.  That seems
unattractive, first because a whole-tree mutation pass is expensive,
and second because we lack infrastructure for visiting expression
subtrees in a Plan tree, so that we'd need a new function knowing
as much as SS_finalize_plan knows about that.  I also considered
swapping the order of SS_finalize_plan and set_plan_references,
but that fell foul of various assumptions that seem tricky to fix.
So the approach adopted here is to teach SS_finalize_plan itself
to check for such Aggrefs.  I refactored things a bit in setrefs.c
to avoid having three copies of the code that does that.

Back-patch of v17 commits d0d44049d and 779ac2c74.  When d0d44049d
went in, there was no evidence that it was fixing a reachable bug,
so I refrained from back-patching.  Now we have such evidence.

Per bug #18465 from Hal Takahara.  Back-patch to all supported
branches.

Discussion: https://postgr.es/m/18465-2fae927718976b22@postgresql.org
Discussion: https://postgr.es/m/2391880.1689025003@sss.pgh.pa.us

src/backend/optimizer/plan/setrefs.c
src/backend/optimizer/plan/subselect.c
src/include/optimizer/planmain.h
src/test/regress/expected/aggregates.out
src/test/regress/sql/aggregates.sql

index 9d912a844576cbce4b7ff741cbfdad1b3c9f0692..e165810e807f5db9f85592cad8cb840bd4bd7af0 100644 (file)
@@ -2088,22 +2088,14 @@ fix_scan_expr_mutator(Node *node, fix_scan_expr_context *context)
        if (IsA(node, Aggref))
        {
                Aggref     *aggref = (Aggref *) node;
+               Param      *aggparam;
 
                /* See if the Aggref should be replaced by a Param */
-               if (context->root->minmax_aggs != NIL &&
-                       list_length(aggref->args) == 1)
+               aggparam = find_minmax_agg_replacement_param(context->root, aggref);
+               if (aggparam != NULL)
                {
-                       TargetEntry *curTarget = (TargetEntry *) linitial(aggref->args);
-                       ListCell   *lc;
-
-                       foreach(lc, context->root->minmax_aggs)
-                       {
-                               MinMaxAggInfo *mminfo = (MinMaxAggInfo *) lfirst(lc);
-
-                               if (mminfo->aggfnoid == aggref->aggfnoid &&
-                                       equal(mminfo->target, curTarget->expr))
-                                       return (Node *) copyObject(mminfo->param);
-                       }
+                       /* Make a copy of the Param for paranoia's sake */
+                       return (Node *) copyObject(aggparam);
                }
                /* If no match, just fall through to process it normally */
        }
@@ -3030,22 +3022,14 @@ fix_upper_expr_mutator(Node *node, fix_upper_expr_context *context)
        if (IsA(node, Aggref))
        {
                Aggref     *aggref = (Aggref *) node;
+               Param      *aggparam;
 
                /* See if the Aggref should be replaced by a Param */
-               if (context->root->minmax_aggs != NIL &&
-                       list_length(aggref->args) == 1)
+               aggparam = find_minmax_agg_replacement_param(context->root, aggref);
+               if (aggparam != NULL)
                {
-                       TargetEntry *curTarget = (TargetEntry *) linitial(aggref->args);
-                       ListCell   *lc;
-
-                       foreach(lc, context->root->minmax_aggs)
-                       {
-                               MinMaxAggInfo *mminfo = (MinMaxAggInfo *) lfirst(lc);
-
-                               if (mminfo->aggfnoid == aggref->aggfnoid &&
-                                       equal(mminfo->target, curTarget->expr))
-                                       return (Node *) copyObject(mminfo->param);
-                       }
+                       /* Make a copy of the Param for paranoia's sake */
+                       return (Node *) copyObject(aggparam);
                }
                /* If no match, just fall through to process it normally */
        }
@@ -3199,6 +3183,38 @@ set_windowagg_runcondition_references(PlannerInfo *root,
        return newlist;
 }
 
+/*
+ * find_minmax_agg_replacement_param
+ *             If the given Aggref is one that we are optimizing into a subquery
+ *             (cf. planagg.c), then return the Param that should replace it.
+ *             Else return NULL.
+ *
+ * This is exported so that SS_finalize_plan can use it before setrefs.c runs.
+ * Note that it will not find anything until we have built a Plan from a
+ * MinMaxAggPath, as root->minmax_aggs will never be filled otherwise.
+ */
+Param *
+find_minmax_agg_replacement_param(PlannerInfo *root, Aggref *aggref)
+{
+       if (root->minmax_aggs != NIL &&
+               list_length(aggref->args) == 1)
+       {
+               TargetEntry *curTarget = (TargetEntry *) linitial(aggref->args);
+               ListCell   *lc;
+
+               foreach(lc, root->minmax_aggs)
+               {
+                       MinMaxAggInfo *mminfo = (MinMaxAggInfo *) lfirst(lc);
+
+                       if (mminfo->aggfnoid == aggref->aggfnoid &&
+                               equal(mminfo->target, curTarget->expr))
+                               return mminfo->param;
+               }
+       }
+       return NULL;
+}
+
+
 /*****************************************************************************
  *                                     QUERY DEPENDENCY MANAGEMENT
  *****************************************************************************/
index a1957883baf3806f59a5e84989ec30992e8c71ee..61b1238431321c0a66997afe77c4038529c699d2 100644 (file)
@@ -2849,8 +2849,8 @@ finalize_plan(PlannerInfo *root, Plan *plan,
 }
 
 /*
- * finalize_primnode: add IDs of all PARAM_EXEC params appearing in the given
- * expression tree to the result set.
+ * finalize_primnode: add IDs of all PARAM_EXEC params that appear (or will
+ * appear) in the given expression tree to the result set.
  */
 static bool
 finalize_primnode(Node *node, finalize_primnode_context *context)
@@ -2867,7 +2867,26 @@ finalize_primnode(Node *node, finalize_primnode_context *context)
                }
                return false;                   /* no more to do here */
        }
-       if (IsA(node, SubPlan))
+       else if (IsA(node, Aggref))
+       {
+               /*
+                * Check to see if the aggregate will be replaced by a Param
+                * referencing a subquery output during setrefs.c.  If so, we must
+                * account for that Param here.  (For various reasons, it's not
+                * convenient to perform that substitution earlier than setrefs.c, nor
+                * to perform this processing after setrefs.c.  Thus we need a wart
+                * here.)
+                */
+               Aggref     *aggref = (Aggref *) node;
+               Param      *aggparam;
+
+               aggparam = find_minmax_agg_replacement_param(context->root, aggref);
+               if (aggparam != NULL)
+                       context->paramids = bms_add_member(context->paramids,
+                                                                                          aggparam->paramid);
+               /* Fall through to examine the agg's arguments */
+       }
+       else if (IsA(node, SubPlan))
        {
                SubPlan    *subplan = (SubPlan *) node;
                Plan       *plan = planner_subplan_get_plan(context->root, subplan);
index c4f61c1a09c02829e22271c052f830e5893ebe2c..6f87ee3258d646f42bd4611a9e9135cfae94b629 100644 (file)
@@ -113,6 +113,8 @@ extern bool innerrel_is_unique(PlannerInfo *root,
  */
 extern Plan *set_plan_references(PlannerInfo *root, Plan *plan);
 extern bool trivial_subqueryscan(SubqueryScan *plan);
+extern Param *find_minmax_agg_replacement_param(PlannerInfo *root,
+                                                                                               Aggref *aggref);
 extern void record_plan_function_dependency(PlannerInfo *root, Oid funcid);
 extern void record_plan_type_dependency(PlannerInfo *root, Oid typid);
 extern bool extract_query_dependencies_walker(Node *node, PlannerInfo *root);
index 26031bc780037858f2657e4a383ebe0a5c5f1e88..6b3b9f103d8fa3952de4b6a876b02f58862ffbdf 100644 (file)
@@ -1249,6 +1249,37 @@ NOTICE:  drop cascades to 3 other objects
 DETAIL:  drop cascades to table minmaxtest1
 drop cascades to table minmaxtest2
 drop cascades to table minmaxtest3
+-- DISTINCT can also trigger wrong answers with hash aggregation (bug #18465)
+begin;
+set local enable_sort = off;
+explain (costs off)
+  select f1, (select distinct min(t1.f1) from int4_tbl t1 where t1.f1 = t0.f1)
+  from int4_tbl t0;
+                             QUERY PLAN                              
+---------------------------------------------------------------------
+ Seq Scan on int4_tbl t0
+   SubPlan 2
+     ->  HashAggregate
+           Group Key: $1
+           InitPlan 1 (returns $1)
+             ->  Limit
+                   ->  Seq Scan on int4_tbl t1
+                         Filter: ((f1 IS NOT NULL) AND (f1 = t0.f1))
+           ->  Result
+(9 rows)
+
+select f1, (select distinct min(t1.f1) from int4_tbl t1 where t1.f1 = t0.f1)
+from int4_tbl t0;
+     f1      |     min     
+-------------+-------------
+           0 |           0
+      123456 |      123456
+     -123456 |     -123456
+  2147483647 |  2147483647
+ -2147483647 | -2147483647
+(5 rows)
+
+rollback;
 -- check for correct detection of nested-aggregate errors
 select max(min(unique1)) from tenk1;
 ERROR:  aggregate function calls cannot be nested
index 48bea8af5f8cda235b147400f172a04b2cb3f99f..b89cf26c5a2d22f2830ae7bd5da9b95506b59a07 100644 (file)
@@ -431,6 +431,16 @@ select distinct min(f1), max(f1) from minmaxtest;
 
 drop table minmaxtest cascade;
 
+-- DISTINCT can also trigger wrong answers with hash aggregation (bug #18465)
+begin;
+set local enable_sort = off;
+explain (costs off)
+  select f1, (select distinct min(t1.f1) from int4_tbl t1 where t1.f1 = t0.f1)
+  from int4_tbl t0;
+select f1, (select distinct min(t1.f1) from int4_tbl t1 where t1.f1 = t0.f1)
+from int4_tbl t0;
+rollback;
+
 -- check for correct detection of nested-aggregate errors
 select max(min(unique1)) from tenk1;
 select (select max(min(unique1)) from int8_tbl) from tenk1;