Fix estimate_hash_bucket_stats's correction for skewed data.

author Tom Lane <tgl@sss.pgh.pa.us>

Wed, 4 Mar 2026 20:33:15 +0000 (15:33 -0500)

committer Tom Lane <tgl@sss.pgh.pa.us>

Wed, 4 Mar 2026 20:33:15 +0000 (15:33 -0500)
author Tom Lane <tgl@sss.pgh.pa.us>
Wed, 4 Mar 2026 20:33:15 +0000 (15:33 -0500)
committer Tom Lane <tgl@sss.pgh.pa.us>
Wed, 4 Mar 2026 20:33:15 +0000 (15:33 -0500)
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c

index dd7e11c0ca5f7da768f3119cf632ae15511ca588..d4da0e8dea931de719a4ed487a2d4ec84b22a1b8 100644 (file)
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -4373,8 +4373,8 @@ estimate_multivariate_bucketsize(PlannerInfo *root, RelOptInfo *inner,
   * exactly those that will be probed most often.  Therefore, the "average"
   * bucket size for costing purposes should really be taken as something close
   * to the "worst case" bucket size.  We try to estimate this by adjusting the
- * fraction if there are too few distinct data values, and then scaling up
- * by the ratio of the most common value's frequency to the average frequency.
+ * fraction if there are too few distinct data values, and then clamping to
+ * at least the bucket size implied by the most common value's frequency.
   *
   * If no statistics are available, use a default estimate of 0.1.  This will
   * discourage use of a hash rather strongly if the inner relation is large,
@@ -4394,9 +4394,7 @@ estimate_hash_bucket_stats(PlannerInfo *root, Node *hashkey, double nbuckets,
  {
         VariableStatData vardata;
         double          estfract,
-                               ndistinct,
-                               stanullfrac,
-                               avgfreq;
+                               ndistinct;
         bool            isdefault;
         AttStatsSlot sslot;
  
@@ -4446,20 +4444,6 @@ estimate_hash_bucket_stats(PlannerInfo *root, Node *hashkey, double nbuckets,
                 return;
         }
  
-       /* Get fraction that are null */
-       if (HeapTupleIsValid(vardata.statsTuple))
-       {
-               Form_pg_statistic stats;
-
-               stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
-               stanullfrac = stats->stanullfrac;
-       }
-       else
-               stanullfrac = 0.0;
-
-       /* Compute avg freq of all distinct data values in raw relation */
-       avgfreq = (1.0 - stanullfrac) / ndistinct;
-
         /*
          * Adjust ndistinct to account for restriction clauses.  Observe we are
          * assuming that the data distribution is affected uniformly by the
@@ -4485,20 +4469,11 @@ estimate_hash_bucket_stats(PlannerInfo *root, Node *hashkey, double nbuckets,
                 estfract = 1.0 / ndistinct;
  
         /*
-        * Adjust estimated bucketsize upward to account for skewed distribution.
-        */
-       if (avgfreq > 0.0 && *mcv_freq > avgfreq)
-               estfract *= *mcv_freq / avgfreq;
-
-       /*
-        * Clamp bucketsize to sane range (the above adjustment could easily
-        * produce an out-of-range result).  We set the lower bound a little above
-        * zero, since zero isn't a very sane result.
+        * Clamp the bucketsize fraction to be not less than the MCV frequency,
+        * since whichever bucket the MCV values end up in will have at least that
+        * size.  This has no effect if *mcv_freq is still zero.
          */
-       if (estfract < 1.0e-6)
-               estfract = 1.0e-6;
-       else if (estfract > 1.0)
-               estfract = 1.0;
+       estfract = Max(estfract, *mcv_freq);
  
         *bucketsize_frac = (Selectivity) estfract;
  
diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out

index 41521f275c8f90f1134e2b9f47f6ef0b08c68916..ea3dabff77c35fb10e9360fb63c48b4eb6252f68 100644 (file)
--- a/src/test/regress/expected/join.out
+++ b/src/test/regress/expected/join.out
@@ -3192,6 +3192,31 @@ ORDER BY 1;
  
  reset enable_nestloop;
  --
+-- test that estimate_hash_bucket_stats estimates correctly with skewed data
+-- (we should choose to hash the filtered table)
+--
+create temp table skewedtable (val int not null, filt int not null);
+insert into skewedtable
+select
+    case when g <= 100 then 0 else (g % 100) + 1 end,
+    g % 10
+from generate_series(1, 1000) g;
+analyze skewedtable;
+explain (costs off)
+select * from skewedtable t1 join skewedtable t2 on t1.val = t2.val
+where t1.filt = 5;
+               QUERY PLAN               
+----------------------------------------
+ Hash Join
+   Hash Cond: (t2.val = t1.val)
+   ->  Seq Scan on skewedtable t2
+   ->  Hash
+         ->  Seq Scan on skewedtable t1
+               Filter: (filt = 5)
+(6 rows)
+
+drop table skewedtable;
+--
  -- basic semijoin and antijoin recognition tests
  --
  explain (costs off)
diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql

index 4acd2512004afe28e7b40c5157eaa780b3260520..ad90c326c000ca4b31d6cf3fae4a97335e0df514 100644 (file)
--- a/src/test/regress/sql/join.sql
+++ b/src/test/regress/sql/join.sql
@@ -834,6 +834,25 @@ ORDER BY 1;
  
  reset enable_nestloop;
  
+--
+-- test that estimate_hash_bucket_stats estimates correctly with skewed data
+-- (we should choose to hash the filtered table)
+--
+
+create temp table skewedtable (val int not null, filt int not null);
+insert into skewedtable
+select
+    case when g <= 100 then 0 else (g % 100) + 1 end,
+    g % 10
+from generate_series(1, 1000) g;
+analyze skewedtable;
+
+explain (costs off)
+select * from skewedtable t1 join skewedtable t2 on t1.val = t2.val
+where t1.filt = 5;
+
+drop table skewedtable;
+
  --
  -- basic semijoin and antijoin recognition tests
  --
author	Tom Lane <tgl@sss.pgh.pa.us>
	Wed, 4 Mar 2026 20:33:15 +0000 (15:33 -0500)
committer	Tom Lane <tgl@sss.pgh.pa.us>
	Wed, 4 Mar 2026 20:33:15 +0000 (15:33 -0500)
src/backend/utils/adt/selfuncs.c		patch \| blob \| blame \| history
src/test/regress/expected/join.out		patch \| blob \| blame \| history
src/test/regress/sql/join.sql		patch \| blob \| blame \| history