Ignore nullingrels when looking up statistics

author Richard Guo <rguo@postgresql.org>

Thu, 2 Jan 2025 09:02:02 +0000 (18:02 +0900)

committer Richard Guo <rguo@postgresql.org>

Thu, 2 Jan 2025 09:02:02 +0000 (18:02 +0900)
author Richard Guo <rguo@postgresql.org>
Thu, 2 Jan 2025 09:02:02 +0000 (18:02 +0900)
committer Richard Guo <rguo@postgresql.org>
Thu, 2 Jan 2025 09:02:02 +0000 (18:02 +0900)
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c

index c4fcd0076eac0507d5c5d16aa8823e6cc2aa6edb..771781e28ca0ef72cbeed991b18521bed04cce35 100644 (file)
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -120,6 +120,7 @@
  #include "optimizer/plancat.h"
  #include "parser/parse_clause.h"
  #include "parser/parsetree.h"
+#include "rewrite/rewriteManip.h"
  #include "statistics/statistics.h"
  #include "storage/bufmgr.h"
  #include "utils/acl.h"
@@ -3273,6 +3274,15 @@ add_unique_group_var(PlannerInfo *root, List *varinfos,
  
         ndistinct = get_variable_numdistinct(vardata, &isdefault);
  
+       /*
+        * The nullingrels bits within the var could cause the same var to be
+        * counted multiple times if it's marked with different nullingrels.  They
+        * could also prevent us from matching the var to the expressions in
+        * extended statistics (see estimate_multivariate_ndistinct).  So strip
+        * them out first.
+        */
+       var = remove_nulling_relids(var, root->outer_join_rels, NULL);
+
         foreach(lc, varinfos)
         {
                 varinfo = (GroupVarInfo *) lfirst(lc);
@@ -4980,6 +4990,7 @@ examine_variable(PlannerInfo *root, Node *node, int varRelid,
  {
         Node       *basenode;
         Relids          varnos;
+       Relids          basevarnos;
         RelOptInfo *onerel;
  
         /* Make sure we don't return dangling pointers in vardata */
@@ -5021,10 +5032,11 @@ examine_variable(PlannerInfo *root, Node *node, int varRelid,
          * relation are considered "real" vars.
          */
         varnos = pull_varnos(root, basenode);
+       basevarnos = bms_difference(varnos, root->outer_join_rels);
  
         onerel = NULL;
  
-       switch (bms_membership(varnos))
+       switch (bms_membership(basevarnos))
         {
                 case BMS_EMPTY_SET:
                         /* No Vars at all ... must be pseudo-constant clause */
@@ -5033,7 +5045,7 @@ examine_variable(PlannerInfo *root, Node *node, int varRelid,
                         if (varRelid == 0 || bms_is_member(varRelid, varnos))
                         {
                                 onerel = find_base_rel(root,
-                                                                          (varRelid ? varRelid : bms_singleton_member(varnos)));
+                                                                          (varRelid ? varRelid : bms_singleton_member(basevarnos)));
                                 vardata->rel = onerel;
                                 node = basenode;        /* strip any relabeling */
                         }
@@ -5057,7 +5069,7 @@ examine_variable(PlannerInfo *root, Node *node, int varRelid,
                         break;
         }
  
-       bms_free(varnos);
+       bms_free(basevarnos);
  
         vardata->var = node;
         vardata->atttype = exprType(node);
@@ -5082,6 +5094,14 @@ examine_variable(PlannerInfo *root, Node *node, int varRelid,
                 ListCell   *slist;
                 Oid                     userid;
  
+               /*
+                * The nullingrels bits within the expression could prevent us from
+                * matching it to expressional index columns or to the expressions in
+                * extended statistics.  So strip them out first.
+                */
+               if (bms_overlap(varnos, root->outer_join_rels))
+                       node = remove_nulling_relids(node, root->outer_join_rels, NULL);
+
                 /*
                  * Determine the user ID to use for privilege checks: either
                  * onerel->userid if it's set (e.g., in case we're accessing the table
@@ -5352,6 +5372,8 @@ examine_variable(PlannerInfo *root, Node *node, int varRelid,
                         }
                 }
         }
+
+       bms_free(varnos);
  }
  
  /*
diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out

index 9af8d61a732133646a8d9b3e6f96173b4e7b2c1c..84e35981ed8b4e2273b76b35b066c082f890c4d3 100644 (file)
--- a/src/test/regress/expected/join.out
+++ b/src/test/regress/expected/join.out
@@ -2517,10 +2517,11 @@ where t1.f1 = coalesce(t2.f1, 1);
                 ->  Materialize
                       ->  Seq Scan on int4_tbl t2
                             Filter: (f1 > 1)
-         ->  Seq Scan on int4_tbl t3
+         ->  Materialize
+               ->  Seq Scan on int4_tbl t3
     ->  Materialize
           ->  Seq Scan on int4_tbl t4
-(13 rows)
+(14 rows)
  
  explain (costs off)
  select * from int4_tbl t1
@@ -7981,3 +7982,24 @@ where exists (select 1 from j3
  (13 rows)
  
  drop table j3;
+-- Test that we do not account for nullingrels when looking up statistics
+CREATE TABLE group_tbl (a INT, b INT);
+INSERT INTO group_tbl SELECT 1, 1;
+CREATE STATISTICS group_tbl_stat (ndistinct) ON a, b FROM group_tbl;
+ANALYZE group_tbl;
+EXPLAIN (COSTS OFF)
+SELECT 1 FROM group_tbl t1
+    LEFT JOIN (SELECT a c1, COALESCE(a) c2 FROM group_tbl t2) s ON TRUE
+GROUP BY s.c1, s.c2;
+                 QUERY PLAN                 
+--------------------------------------------
+ Group
+   Group Key: t2.a, (COALESCE(t2.a))
+   ->  Sort
+         Sort Key: t2.a, (COALESCE(t2.a))
+         ->  Nested Loop Left Join
+               ->  Seq Scan on group_tbl t1
+               ->  Seq Scan on group_tbl t2
+(7 rows)
+
+DROP TABLE group_tbl;
diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql

index 41949d41dd6328c275df59a8ae277ca9a9934a2a..d6f646a1d50e019ae0071152697fc176f2f8ecf2 100644 (file)
--- a/src/test/regress/sql/join.sql
+++ b/src/test/regress/sql/join.sql
@@ -2928,3 +2928,16 @@ where exists (select 1 from j3
        and t1.unique1 < 1;
  
  drop table j3;
+
+-- Test that we do not account for nullingrels when looking up statistics
+CREATE TABLE group_tbl (a INT, b INT);
+INSERT INTO group_tbl SELECT 1, 1;
+CREATE STATISTICS group_tbl_stat (ndistinct) ON a, b FROM group_tbl;
+ANALYZE group_tbl;
+
+EXPLAIN (COSTS OFF)
+SELECT 1 FROM group_tbl t1
+    LEFT JOIN (SELECT a c1, COALESCE(a) c2 FROM group_tbl t2) s ON TRUE
+GROUP BY s.c1, s.c2;
+
+DROP TABLE group_tbl;
author	Richard Guo <rguo@postgresql.org>
	Thu, 2 Jan 2025 09:02:02 +0000 (18:02 +0900)
committer	Richard Guo <rguo@postgresql.org>
	Thu, 2 Jan 2025 09:02:02 +0000 (18:02 +0900)
src/backend/utils/adt/selfuncs.c		patch \| blob \| blame \| history
src/test/regress/expected/join.out		patch \| blob \| blame \| history
src/test/regress/sql/join.sql		patch \| blob \| blame \| history