]> git.ipfire.org Git - thirdparty/postgresql.git/commitdiff
Apply band-aid fix for an oversight in reparameterize_path_by_child.
authorTom Lane <tgl@sss.pgh.pa.us>
Thu, 1 Feb 2024 17:34:21 +0000 (12:34 -0500)
committerTom Lane <tgl@sss.pgh.pa.us>
Thu, 1 Feb 2024 17:34:21 +0000 (12:34 -0500)
The path we wish to reparameterize is not a standalone object:
in particular, it implicitly references baserestrictinfo clauses
in the associated RelOptInfo, and if it's a SampleScan path then
there is also the TableSampleClause in the RTE to worry about.
Both of those could contain lateral references to the join partner
relation, which would need to be modified to refer to its child.
Since we aren't doing that, affected queries can give wrong answers,
or odd failures such as "variable not found in subplan target list",
or executor crashes.  But we can't just summarily modify those
expressions, because they are shared with other paths for the rel.
We'd break things if we modify them and then end up using some
non-partitioned-join path.

In HEAD, we plan to fix this by postponing reparameterization
until create_plan(), when we know that those other paths are
no longer of interest, and then adjusting those expressions along
with the ones in the path itself.  That seems like too big a change
for stable branches however.  In the back branches, let's just detect
whether any troublesome lateral references actually exist in those
expressions, and fail reparameterization if so.  This will result in
not performing a partitioned join in such cases.  Given the lack of
field complaints, nobody's likely to miss the optimization.

Report and patch by Richard Guo.  Apply to 12-16 only, since
the intended fix for HEAD looks quite different.  We're not quite
ready to push the HEAD fix, but with back-branch releases coming
up soon, it seems wise to get this stopgap fix in place there.

Discussion: https://postgr.es/m/CAMbWs496+N=UAjOc=rcD3P7B6oJe4rZw08e_TZRUsWbPxZW3Tw@mail.gmail.com

src/backend/optimizer/util/pathnode.c
src/test/regress/expected/partition_join.out
src/test/regress/sql/partition_join.sql

index bbb04d239da5b814fde3b2575dc1760a449f4342..7844216794403fff971e6e37cdb9d7bd3e4d2034 100644 (file)
@@ -26,6 +26,7 @@
 #include "optimizer/optimizer.h"
 #include "optimizer/pathnode.h"
 #include "optimizer/paths.h"
+#include "optimizer/placeholder.h"
 #include "optimizer/planmain.h"
 #include "optimizer/prep.h"
 #include "optimizer/restrictinfo.h"
@@ -57,6 +58,10 @@ static int   append_startup_cost_compare(const void *a, const void *b);
 static List *reparameterize_pathlist_by_child(PlannerInfo *root,
                                                                                          List *pathlist,
                                                                                          RelOptInfo *child_rel);
+static bool contain_references_to(PlannerInfo *root, Node *clause,
+                                                                 Relids relids);
+static bool ris_contain_references_to(PlannerInfo *root, List *rinfos,
+                                                                         Relids relids);
 
 
 /*****************************************************************************
@@ -3915,6 +3920,40 @@ do { \
        switch (nodeTag(path))
        {
                case T_Path:
+
+                       /*
+                        * If the path's restriction clauses contain lateral references to
+                        * the other relation, we can't reparameterize, because we must
+                        * not change the RelOptInfo's contents here.  (Doing so would
+                        * break things if we end up using a non-partitionwise join.)
+                        */
+                       if (ris_contain_references_to(root,
+                                                                                 path->parent->baserestrictinfo,
+                                                                                 child_rel->top_parent_relids))
+                               return NULL;
+
+                       /*
+                        * If it's a SampleScan with tablesample parameters referencing
+                        * the other relation, we can't reparameterize, because we must
+                        * not change the RTE's contents here.  (Doing so would break
+                        * things if we end up using a non-partitionwise join.)
+                        */
+                       if (path->pathtype == T_SampleScan)
+                       {
+                               Index           scan_relid = path->parent->relid;
+                               RangeTblEntry *rte;
+
+                               /* it should be a base rel with a tablesample clause... */
+                               Assert(scan_relid > 0);
+                               rte = planner_rt_fetch(scan_relid, root);
+                               Assert(rte->rtekind == RTE_RELATION);
+                               Assert(rte->tablesample != NULL);
+
+                               if (contain_references_to(root, (Node *) rte->tablesample,
+                                                                                 child_rel->top_parent_relids))
+                                       return NULL;
+                       }
+
                        FLAT_COPY_PATH(new_path, path, Path);
                        break;
 
@@ -3922,6 +3961,18 @@ do { \
                        {
                                IndexPath  *ipath;
 
+                               /*
+                                * If the path's restriction clauses contain lateral
+                                * references to the other relation, we can't reparameterize,
+                                * because we must not change the IndexOptInfo's contents
+                                * here.  (Doing so would break things if we end up using a
+                                * non-partitionwise join.)
+                                */
+                               if (ris_contain_references_to(root,
+                                                                                         path->parent->baserestrictinfo,
+                                                                                         child_rel->top_parent_relids))
+                                       return NULL;
+
                                FLAT_COPY_PATH(ipath, path, IndexPath);
                                ADJUST_CHILD_ATTRS(ipath->indexclauses);
                                new_path = (Path *) ipath;
@@ -3932,6 +3983,18 @@ do { \
                        {
                                BitmapHeapPath *bhpath;
 
+                               /*
+                                * If the path's restriction clauses contain lateral
+                                * references to the other relation, we can't reparameterize,
+                                * because we must not change the RelOptInfo's contents here.
+                                * (Doing so would break things if we end up using a
+                                * non-partitionwise join.)
+                                */
+                               if (ris_contain_references_to(root,
+                                                                                         path->parent->baserestrictinfo,
+                                                                                         child_rel->top_parent_relids))
+                                       return NULL;
+
                                FLAT_COPY_PATH(bhpath, path, BitmapHeapPath);
                                REPARAMETERIZE_CHILD_PATH(bhpath->bitmapqual);
                                new_path = (Path *) bhpath;
@@ -3963,6 +4026,18 @@ do { \
                                ForeignPath *fpath;
                                ReparameterizeForeignPathByChild_function rfpc_func;
 
+                               /*
+                                * If the path's restriction clauses contain lateral
+                                * references to the other relation, we can't reparameterize,
+                                * because we must not change the RelOptInfo's contents here.
+                                * (Doing so would break things if we end up using a
+                                * non-partitionwise join.)
+                                */
+                               if (ris_contain_references_to(root,
+                                                                                         path->parent->baserestrictinfo,
+                                                                                         child_rel->top_parent_relids))
+                                       return NULL;
+
                                FLAT_COPY_PATH(fpath, path, ForeignPath);
                                if (fpath->fdw_outerpath)
                                        REPARAMETERIZE_CHILD_PATH(fpath->fdw_outerpath);
@@ -3981,6 +4056,18 @@ do { \
                        {
                                CustomPath *cpath;
 
+                               /*
+                                * If the path's restriction clauses contain lateral
+                                * references to the other relation, we can't reparameterize,
+                                * because we must not change the RelOptInfo's contents here.
+                                * (Doing so would break things if we end up using a
+                                * non-partitionwise join.)
+                                */
+                               if (ris_contain_references_to(root,
+                                                                                         path->parent->baserestrictinfo,
+                                                                                         child_rel->top_parent_relids))
+                                       return NULL;
+
                                FLAT_COPY_PATH(cpath, path, CustomPath);
                                REPARAMETERIZE_CHILD_PATH_LIST(cpath->custom_paths);
                                if (cpath->methods &&
@@ -4146,3 +4233,91 @@ reparameterize_pathlist_by_child(PlannerInfo *root,
 
        return result;
 }
+
+/*
+ * contain_references_to
+ *             Detect whether any Vars or PlaceHolderVars in the given clause contain
+ *             lateral references to the given 'relids'.
+ */
+static bool
+contain_references_to(PlannerInfo *root, Node *clause, Relids relids)
+{
+       bool            ret = false;
+       List       *vars;
+       ListCell   *lc;
+
+       /*
+        * Examine all Vars and PlaceHolderVars used in the clause.
+        *
+        * By omitting the relevant flags, this also gives us a cheap sanity check
+        * that no aggregates or window functions appear in the clause.  We don't
+        * expect any of those in scan-level restrictions or tablesamples.
+        */
+       vars = pull_var_clause(clause, PVC_INCLUDE_PLACEHOLDERS);
+       foreach(lc, vars)
+       {
+               Node       *node = (Node *) lfirst(lc);
+
+               if (IsA(node, Var))
+               {
+                       Var                *var = (Var *) node;
+
+                       if (bms_is_member(var->varno, relids))
+                       {
+                               ret = true;
+                               break;
+                       }
+               }
+               else if (IsA(node, PlaceHolderVar))
+               {
+                       PlaceHolderVar *phv = (PlaceHolderVar *) node;
+                       PlaceHolderInfo *phinfo = find_placeholder_info(root, phv, false);
+
+                       /*
+                        * We should check both ph_eval_at (in case the PHV is to be
+                        * computed at the other relation and then laterally referenced
+                        * here) and ph_lateral (in case the PHV is to be evaluated here
+                        * but contains lateral references to the other relation).  The
+                        * former case should not occur in baserestrictinfo clauses, but
+                        * it can occur in tablesample clauses.
+                        */
+                       if (bms_overlap(phinfo->ph_eval_at, relids) ||
+                               bms_overlap(phinfo->ph_lateral, relids))
+                       {
+                               ret = true;
+                               break;
+                       }
+               }
+               else
+                       Assert(false);
+       }
+
+       list_free(vars);
+
+       return ret;
+}
+
+/*
+ * ris_contain_references_to
+ *             Apply contain_references_to() to a list of RestrictInfos.
+ *
+ * We need extra code for this because pull_var_clause() can't descend
+ * through RestrictInfos.
+ */
+static bool
+ris_contain_references_to(PlannerInfo *root, List *rinfos, Relids relids)
+{
+       ListCell   *lc;
+
+       foreach(lc, rinfos)
+       {
+               RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc);
+
+               /* Pseudoconstant clauses can't contain any Vars or PHVs */
+               if (rinfo->pseudoconstant)
+                       continue;
+               if (contain_references_to(root, (Node *) rinfo->clause, relids))
+                       return true;
+       }
+       return false;
+}
index fa063485f55715c8b0121d1e7c9a7e0385406d9b..20154eff9c9e9d90cdb13f43079405034166a979 100644 (file)
@@ -459,6 +459,99 @@ SELECT t1.a, ss.t2a, ss.t2c FROM prt1 t1 LEFT JOIN LATERAL
  550 |     | 
 (12 rows)
 
+SET max_parallel_workers_per_gather = 0;
+-- If there are lateral references to the other relation in sample scan,
+-- we cannot generate a partitionwise join.
+EXPLAIN (COSTS OFF)
+SELECT * FROM prt1 t1 JOIN LATERAL
+                         (SELECT * FROM prt1 t2 TABLESAMPLE SYSTEM (t1.a) REPEATABLE(t1.b)) s
+                         ON t1.a = s.a;
+                       QUERY PLAN                        
+---------------------------------------------------------
+ Nested Loop
+   ->  Append
+         ->  Seq Scan on prt1_p1 t1
+         ->  Seq Scan on prt1_p2 t1_1
+         ->  Seq Scan on prt1_p3 t1_2
+   ->  Append
+         ->  Sample Scan on prt1_p1 t2
+               Sampling: system (t1.a) REPEATABLE (t1.b)
+               Filter: (t1.a = a)
+         ->  Sample Scan on prt1_p2 t2_1
+               Sampling: system (t1.a) REPEATABLE (t1.b)
+               Filter: (t1.a = a)
+         ->  Sample Scan on prt1_p3 t2_2
+               Sampling: system (t1.a) REPEATABLE (t1.b)
+               Filter: (t1.a = a)
+(15 rows)
+
+-- If there are lateral references to the other relation in scan's restriction
+-- clauses, we cannot generate a partitionwise join.
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM prt1 t1 LEFT JOIN LATERAL
+                         (SELECT t1.b AS t1b, t2.* FROM prt2 t2) s
+                         ON t1.a = s.b WHERE s.t1b = s.a;
+                          QUERY PLAN                           
+---------------------------------------------------------------
+ Aggregate
+   ->  Nested Loop
+         ->  Append
+               ->  Seq Scan on prt1_p1 t1
+               ->  Seq Scan on prt1_p2 t1_1
+               ->  Seq Scan on prt1_p3 t1_2
+         ->  Append
+               ->  Index Scan using iprt2_p1_b on prt2_p1 t2
+                     Index Cond: (b = t1.a)
+                     Filter: (t1.b = a)
+               ->  Index Scan using iprt2_p2_b on prt2_p2 t2_1
+                     Index Cond: (b = t1.a)
+                     Filter: (t1.b = a)
+               ->  Index Scan using iprt2_p3_b on prt2_p3 t2_2
+                     Index Cond: (b = t1.a)
+                     Filter: (t1.b = a)
+(16 rows)
+
+SELECT count(*) FROM prt1 t1 LEFT JOIN LATERAL
+                         (SELECT t1.b AS t1b, t2.* FROM prt2 t2) s
+                         ON t1.a = s.b WHERE s.t1b = s.a;
+ count 
+-------
+   100
+(1 row)
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM prt1 t1 LEFT JOIN LATERAL
+                         (SELECT t1.b AS t1b, t2.* FROM prt2 t2) s
+                         ON t1.a = s.b WHERE s.t1b = s.b;
+                             QUERY PLAN                             
+--------------------------------------------------------------------
+ Aggregate
+   ->  Nested Loop
+         ->  Append
+               ->  Seq Scan on prt1_p1 t1
+               ->  Seq Scan on prt1_p2 t1_1
+               ->  Seq Scan on prt1_p3 t1_2
+         ->  Append
+               ->  Index Only Scan using iprt2_p1_b on prt2_p1 t2
+                     Index Cond: (b = t1.a)
+                     Filter: (b = t1.b)
+               ->  Index Only Scan using iprt2_p2_b on prt2_p2 t2_1
+                     Index Cond: (b = t1.a)
+                     Filter: (b = t1.b)
+               ->  Index Only Scan using iprt2_p3_b on prt2_p3 t2_2
+                     Index Cond: (b = t1.a)
+                     Filter: (b = t1.b)
+(16 rows)
+
+SELECT count(*) FROM prt1 t1 LEFT JOIN LATERAL
+                         (SELECT t1.b AS t1b, t2.* FROM prt2 t2) s
+                         ON t1.a = s.b WHERE s.t1b = s.b;
+ count 
+-------
+     5
+(1 row)
+
+RESET max_parallel_workers_per_gather;
 -- bug with inadequate sort key representation
 SET enable_partitionwise_aggregate TO true;
 SET enable_hashjoin TO false;
@@ -1787,6 +1880,80 @@ SELECT * FROM prt1_l t1 LEFT JOIN LATERAL
  550 | 0 | 0002 |     |      |     |     |      
 (12 rows)
 
+SET max_parallel_workers_per_gather = 0;
+-- If there are lateral references to the other relation in sample scan,
+-- we cannot generate a partitionwise join.
+EXPLAIN (COSTS OFF)
+SELECT * FROM prt1_l t1 JOIN LATERAL
+                         (SELECT * FROM prt1_l t2 TABLESAMPLE SYSTEM (t1.a) REPEATABLE(t1.b)) s
+                         ON t1.a = s.a AND t1.b = s.b AND t1.c = s.c;
+                                    QUERY PLAN                                    
+----------------------------------------------------------------------------------
+ Nested Loop
+   ->  Append
+         ->  Seq Scan on prt1_l_p1 t1
+         ->  Seq Scan on prt1_l_p2_p1 t1_1
+         ->  Seq Scan on prt1_l_p2_p2 t1_2
+         ->  Seq Scan on prt1_l_p3_p1 t1_3
+         ->  Seq Scan on prt1_l_p3_p2 t1_4
+   ->  Append
+         ->  Sample Scan on prt1_l_p1 t2
+               Sampling: system (t1.a) REPEATABLE (t1.b)
+               Filter: ((t1.a = a) AND (t1.b = b) AND ((t1.c)::text = (c)::text))
+         ->  Sample Scan on prt1_l_p2_p1 t2_1
+               Sampling: system (t1.a) REPEATABLE (t1.b)
+               Filter: ((t1.a = a) AND (t1.b = b) AND ((t1.c)::text = (c)::text))
+         ->  Sample Scan on prt1_l_p2_p2 t2_2
+               Sampling: system (t1.a) REPEATABLE (t1.b)
+               Filter: ((t1.a = a) AND (t1.b = b) AND ((t1.c)::text = (c)::text))
+         ->  Sample Scan on prt1_l_p3_p1 t2_3
+               Sampling: system (t1.a) REPEATABLE (t1.b)
+               Filter: ((t1.a = a) AND (t1.b = b) AND ((t1.c)::text = (c)::text))
+         ->  Sample Scan on prt1_l_p3_p2 t2_4
+               Sampling: system (t1.a) REPEATABLE (t1.b)
+               Filter: ((t1.a = a) AND (t1.b = b) AND ((t1.c)::text = (c)::text))
+(23 rows)
+
+-- If there are lateral references to the other relation in scan's restriction
+-- clauses, we cannot generate a partitionwise join.
+EXPLAIN (COSTS OFF)
+SELECT COUNT(*) FROM prt1_l t1 LEFT JOIN LATERAL
+                         (SELECT t1.b AS t1b, t2.* FROM prt2_l t2) s
+                         ON t1.a = s.b AND t1.b = s.a AND t1.c = s.c
+                         WHERE s.t1b = s.a;
+                                              QUERY PLAN                                               
+-------------------------------------------------------------------------------------------------------
+ Aggregate
+   ->  Nested Loop
+         ->  Append
+               ->  Seq Scan on prt1_l_p1 t1
+               ->  Seq Scan on prt1_l_p2_p1 t1_1
+               ->  Seq Scan on prt1_l_p2_p2 t1_2
+               ->  Seq Scan on prt1_l_p3_p1 t1_3
+               ->  Seq Scan on prt1_l_p3_p2 t1_4
+         ->  Append
+               ->  Seq Scan on prt2_l_p1 t2
+                     Filter: ((a = t1.b) AND (t1.a = b) AND (t1.b = a) AND ((t1.c)::text = (c)::text))
+               ->  Seq Scan on prt2_l_p2_p1 t2_1
+                     Filter: ((a = t1.b) AND (t1.a = b) AND (t1.b = a) AND ((t1.c)::text = (c)::text))
+               ->  Seq Scan on prt2_l_p2_p2 t2_2
+                     Filter: ((a = t1.b) AND (t1.a = b) AND (t1.b = a) AND ((t1.c)::text = (c)::text))
+               ->  Seq Scan on prt2_l_p3_p1 t2_3
+                     Filter: ((a = t1.b) AND (t1.a = b) AND (t1.b = a) AND ((t1.c)::text = (c)::text))
+               ->  Seq Scan on prt2_l_p3_p2 t2_4
+                     Filter: ((a = t1.b) AND (t1.a = b) AND (t1.b = a) AND ((t1.c)::text = (c)::text))
+(19 rows)
+
+SELECT COUNT(*) FROM prt1_l t1 LEFT JOIN LATERAL
+                         (SELECT t1.b AS t1b, t2.* FROM prt2_l t2) s
+                         ON t1.a = s.b AND t1.b = s.a AND t1.c = s.c
+                         WHERE s.t1b = s.a;
+ count 
+-------
+   100
+(1 row)
+
+RESET max_parallel_workers_per_gather;
 -- join with one side empty
 EXPLAIN (COSTS OFF)
 SELECT t1.a, t1.c, t2.b, t2.c FROM (SELECT * FROM prt1_l WHERE a = 1 AND a = 2) t1 RIGHT JOIN prt2_l t2 ON t1.a = t2.b AND t1.b = t2.a AND t1.c = t2.c;
index ffbb87af8b214aa3ca4886718fa821be30ddca3a..82da1e68b355999669ed2a3b3f2fa47f7fb52000 100644 (file)
@@ -91,6 +91,33 @@ SELECT t1.a, ss.t2a, ss.t2c FROM prt1 t1 LEFT JOIN LATERAL
                          (SELECT t2.a AS t2a, t3.a AS t3a, t2.b t2b, t2.c t2c, least(t1.a,t2.a,t3.a) FROM prt1 t2 JOIN prt2 t3 ON (t2.a = t3.b)) ss
                          ON t1.c = ss.t2c WHERE (t1.b + coalesce(ss.t2b, 0)) = 0 ORDER BY t1.a;
 
+SET max_parallel_workers_per_gather = 0;
+-- If there are lateral references to the other relation in sample scan,
+-- we cannot generate a partitionwise join.
+EXPLAIN (COSTS OFF)
+SELECT * FROM prt1 t1 JOIN LATERAL
+                         (SELECT * FROM prt1 t2 TABLESAMPLE SYSTEM (t1.a) REPEATABLE(t1.b)) s
+                         ON t1.a = s.a;
+
+-- If there are lateral references to the other relation in scan's restriction
+-- clauses, we cannot generate a partitionwise join.
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM prt1 t1 LEFT JOIN LATERAL
+                         (SELECT t1.b AS t1b, t2.* FROM prt2 t2) s
+                         ON t1.a = s.b WHERE s.t1b = s.a;
+SELECT count(*) FROM prt1 t1 LEFT JOIN LATERAL
+                         (SELECT t1.b AS t1b, t2.* FROM prt2 t2) s
+                         ON t1.a = s.b WHERE s.t1b = s.a;
+
+EXPLAIN (COSTS OFF)
+SELECT count(*) FROM prt1 t1 LEFT JOIN LATERAL
+                         (SELECT t1.b AS t1b, t2.* FROM prt2 t2) s
+                         ON t1.a = s.b WHERE s.t1b = s.b;
+SELECT count(*) FROM prt1 t1 LEFT JOIN LATERAL
+                         (SELECT t1.b AS t1b, t2.* FROM prt2 t2) s
+                         ON t1.a = s.b WHERE s.t1b = s.b;
+RESET max_parallel_workers_per_gather;
+
 -- bug with inadequate sort key representation
 SET enable_partitionwise_aggregate TO true;
 SET enable_hashjoin TO false;
@@ -360,6 +387,27 @@ SELECT * FROM prt1_l t1 LEFT JOIN LATERAL
                          (SELECT t2.a AS t2a, t2.c AS t2c, t2.b AS t2b, t3.b AS t3b, least(t1.a,t2.a,t3.b) FROM prt1_l t2 JOIN prt2_l t3 ON (t2.a = t3.b AND t2.c = t3.c)) ss
                          ON t1.a = ss.t2a AND t1.c = ss.t2c WHERE t1.b = 0 ORDER BY t1.a;
 
+SET max_parallel_workers_per_gather = 0;
+-- If there are lateral references to the other relation in sample scan,
+-- we cannot generate a partitionwise join.
+EXPLAIN (COSTS OFF)
+SELECT * FROM prt1_l t1 JOIN LATERAL
+                         (SELECT * FROM prt1_l t2 TABLESAMPLE SYSTEM (t1.a) REPEATABLE(t1.b)) s
+                         ON t1.a = s.a AND t1.b = s.b AND t1.c = s.c;
+
+-- If there are lateral references to the other relation in scan's restriction
+-- clauses, we cannot generate a partitionwise join.
+EXPLAIN (COSTS OFF)
+SELECT COUNT(*) FROM prt1_l t1 LEFT JOIN LATERAL
+                         (SELECT t1.b AS t1b, t2.* FROM prt2_l t2) s
+                         ON t1.a = s.b AND t1.b = s.a AND t1.c = s.c
+                         WHERE s.t1b = s.a;
+SELECT COUNT(*) FROM prt1_l t1 LEFT JOIN LATERAL
+                         (SELECT t1.b AS t1b, t2.* FROM prt2_l t2) s
+                         ON t1.a = s.b AND t1.b = s.a AND t1.c = s.c
+                         WHERE s.t1b = s.a;
+RESET max_parallel_workers_per_gather;
+
 -- join with one side empty
 EXPLAIN (COSTS OFF)
 SELECT t1.a, t1.c, t2.b, t2.c FROM (SELECT * FROM prt1_l WHERE a = 1 AND a = 2) t1 RIGHT JOIN prt2_l t2 ON t1.a = t2.b AND t1.b = t2.a AND t1.c = t2.c;