]> git.ipfire.org Git - thirdparty/postgresql.git/commitdiff
Adjust estimate_num_groups() to not clamp per-relation group count
authorTom Lane <tgl@sss.pgh.pa.us>
Tue, 1 Feb 2005 23:09:00 +0000 (23:09 +0000)
committerTom Lane <tgl@sss.pgh.pa.us>
Tue, 1 Feb 2005 23:09:00 +0000 (23:09 +0000)
estimate to less than the number of values estimated for any one grouping
Var, as suggested by Manfred.  This is intuitively right, and what's
more it puts the plan choices in the subselect regression test back the
way they were before ...

src/backend/utils/adt/selfuncs.c
src/test/regress/expected/subselect.out

index 2f2f2c4e5e6479cae9c2bfc99b0c3f359c9b4f7b..1621b6ee294462b723c2b38abfaa34a597de46b1 100644 (file)
@@ -15,7 +15,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.169.4.1 2005/01/28 20:35:10 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.169.4.2 2005/02/01 23:08:56 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -2043,6 +2043,7 @@ estimate_num_groups(Query *root, List *groupExprs, double input_rows)
                GroupVarInfo *varinfo1 = (GroupVarInfo *) linitial(varinfos);
                RelOptInfo *rel = varinfo1->rel;
                double          reldistinct = varinfo1->ndistinct;
+               double          relmaxndistinct = reldistinct;
                int                     relvarcount = 1;
                List       *newvarinfos = NIL;
 
@@ -2057,6 +2058,8 @@ estimate_num_groups(Query *root, List *groupExprs, double input_rows)
                        if (varinfo2->rel == varinfo1->rel)
                        {
                                reldistinct *= varinfo2->ndistinct;
+                               if (relmaxndistinct < varinfo2->ndistinct)
+                                       relmaxndistinct = varinfo2->ndistinct;
                                relvarcount++;
                        }
                        else
@@ -2075,12 +2078,23 @@ estimate_num_groups(Query *root, List *groupExprs, double input_rows)
                        /*
                         * Clamp to size of rel, or size of rel / 10 if multiple Vars.
                         * The fudge factor is because the Vars are probably correlated
-                        * but we don't know by how much.
+                        * but we don't know by how much.  We should never clamp to less
+                        * than the largest ndistinct value for any of the Vars, though,
+                        * since there will surely be at least that many groups.
                         */
                        double          clamp = rel->tuples;
 
                        if (relvarcount > 1)
+                       {
                                clamp *= 0.1;
+                               if (clamp < relmaxndistinct)
+                               {
+                                       clamp = relmaxndistinct;
+                                       /* for sanity in case some ndistinct is too large: */
+                                       if (clamp > rel->tuples)
+                                               clamp = rel->tuples;
+                               }
+                       }
                        if (reldistinct > clamp)
                                reldistinct = clamp;
 
index 56bea0359815ff4598052e9f234967b711d53858..07e727de482594b5d8d2e102834c29a126a3f630 100644 (file)
@@ -134,11 +134,11 @@ SELECT '' AS five, f1 AS "Correlated Field"
                      WHERE f3 IS NOT NULL);
  five | Correlated Field 
 ------+------------------
+      |                2
       |                3
       |                1
-      |                3
-      |                2
       |                2
+      |                3
 (5 rows)
 
 --