Consider startup cost as a figure of merit for partial paths.

author Robert Haas <rhaas@postgresql.org>

Mon, 9 Mar 2026 12:16:30 +0000 (08:16 -0400)

committer Robert Haas <rhaas@postgresql.org>

Mon, 9 Mar 2026 12:16:30 +0000 (08:16 -0400)
author Robert Haas <rhaas@postgresql.org>
Mon, 9 Mar 2026 12:16:30 +0000 (08:16 -0400)
committer Robert Haas <rhaas@postgresql.org>
Mon, 9 Mar 2026 12:16:30 +0000 (08:16 -0400)
diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c

index e0c00e26dd5d57ffd90c8846a61f32f3b372cae3..044560da7bf7a9e2e417b1a783793fc2096db8c8 100644 (file)
--- a/src/backend/optimizer/path/joinpath.c
+++ b/src/backend/optimizer/path/joinpath.c
@@ -1048,6 +1048,7 @@ try_partial_nestloop_path(PlannerInfo *root,
         initial_cost_nestloop(root, &workspace, jointype, nestloop_subtype,
                                                   outer_path, inner_path, extra);
         if (!add_partial_path_precheck(joinrel, workspace.disabled_nodes,
+                                                                  workspace.startup_cost,
                                                                    workspace.total_cost, pathkeys))
                 return;
  
@@ -1237,6 +1238,7 @@ try_partial_mergejoin_path(PlannerInfo *root,
                                                    extra);
  
         if (!add_partial_path_precheck(joinrel, workspace.disabled_nodes,
+                                                                  workspace.startup_cost,
                                                                    workspace.total_cost, pathkeys))
                 return;
  
@@ -1369,6 +1371,7 @@ try_partial_hashjoin_path(PlannerInfo *root,
         initial_cost_hashjoin(root, &workspace, jointype, hashclauses,
                                                   outer_path, inner_path, extra, parallel_hash);
         if (!add_partial_path_precheck(joinrel, workspace.disabled_nodes,
+                                                                  workspace.startup_cost,
                                                                    workspace.total_cost, NIL))
                 return;
  
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c

index ef8ef6e89d377fd0619959bd3294afcee9c84dc2..c94e077000f5a21a626bb6f5bf9492809814258d 100644 (file)
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -778,10 +778,9 @@ add_path_precheck(RelOptInfo *parent_rel, int disabled_nodes,
   *
   *       Because we don't consider parameterized paths here, we also don't
   *       need to consider the row counts as a measure of quality: every path will
- *       produce the same number of rows.  Neither do we need to consider startup
- *       costs: parallelism is only used for plans that will be run to completion.
- *       Therefore, this routine is much simpler than add_path: it needs to
- *       consider only disabled nodes, pathkeys and total cost.
+ *       produce the same number of rows.  However, we do need to consider the
+ *       startup costs: this partial path could be used beneath a Limit node,
+ *       so a fast-start plan could be correct.
   *
   *       As with add_path, we pfree paths that are found to be dominated by
   *       another partial path; this requires that there be no other references to
@@ -819,52 +818,41 @@ add_partial_path(RelOptInfo *parent_rel, Path *new_path)
                 /* Compare pathkeys. */
                 keyscmp = compare_pathkeys(new_path->pathkeys, old_path->pathkeys);
  
-               /* Unless pathkeys are incompatible, keep just one of the two paths. */
+               /*
+                * Unless pathkeys are incompatible, see if one of the paths dominates
+                * the other (both in startup and total cost). It may happen that one
+                * path has lower startup cost, the other has lower total cost.
+                */
                 if (keyscmp != PATHKEYS_DIFFERENT)
                 {
-                       if (unlikely(new_path->disabled_nodes != old_path->disabled_nodes))
+                       PathCostComparison costcmp;
+
+                       /*
+                        * Do a fuzzy cost comparison with standard fuzziness limit.
+                        */
+                       costcmp = compare_path_costs_fuzzily(new_path, old_path,
+                                                                                                STD_FUZZ_FACTOR);
+                       if (costcmp == COSTS_BETTER1)
                         {
-                               if (new_path->disabled_nodes > old_path->disabled_nodes)
-                                       accept_new = false;
-                               else
+                               if (keyscmp != PATHKEYS_BETTER2)
                                         remove_old = true;
                         }
-                       else if (new_path->total_cost > old_path->total_cost
-                                        * STD_FUZZ_FACTOR)
+                       else if (costcmp == COSTS_BETTER2)
                         {
-                               /* New path costs more; keep it only if pathkeys are better. */
                                 if (keyscmp != PATHKEYS_BETTER1)
                                         accept_new = false;
                         }
-                       else if (old_path->total_cost > new_path->total_cost
-                                        * STD_FUZZ_FACTOR)
+                       else if (costcmp == COSTS_EQUAL)
                         {
-                               /* Old path costs more; keep it only if pathkeys are better. */
-                               if (keyscmp != PATHKEYS_BETTER2)
+                               if (keyscmp == PATHKEYS_BETTER1)
                                         remove_old = true;
-                       }
-                       else if (keyscmp == PATHKEYS_BETTER1)
-                       {
-                               /* Costs are about the same, new path has better pathkeys. */
-                               remove_old = true;
-                       }
-                       else if (keyscmp == PATHKEYS_BETTER2)
-                       {
-                               /* Costs are about the same, old path has better pathkeys. */
-                               accept_new = false;
-                       }
-                       else if (old_path->total_cost > new_path->total_cost * 1.0000000001)
-                       {
-                               /* Pathkeys are the same, and the old path costs more. */
-                               remove_old = true;
-                       }
-                       else
-                       {
-                               /*
-                                * Pathkeys are the same, and new path isn't materially
-                                * cheaper.
-                                */
-                               accept_new = false;
+                               else if (keyscmp == PATHKEYS_BETTER2)
+                                       accept_new = false;
+                               else if (compare_path_costs_fuzzily(new_path, old_path,
+                                                                                                       1.0000000001) == COSTS_BETTER1)
+                                       remove_old = true;
+                               else
+                                       accept_new = false;
                         }
                 }
  
@@ -915,16 +903,16 @@ add_partial_path(RelOptInfo *parent_rel, Path *new_path)
   * add_partial_path_precheck
   *       Check whether a proposed new partial path could possibly get accepted.
   *
- * Unlike add_path_precheck, we can ignore startup cost and parameterization,
- * since they don't matter for partial paths (see add_partial_path).  But
- * we do want to make sure we don't add a partial path if there's already
- * a complete path that dominates it, since in that case the proposed path
- * is surely a loser.
+ * Unlike add_path_precheck, we can ignore parameterization, since it doesn't
+ * matter for partial paths (see add_partial_path).  But we do want to make
+ * sure we don't add a partial path if there's already a complete path that
+ * dominates it, since in that case the proposed path is surely a loser.
   */
  bool
  add_partial_path_precheck(RelOptInfo *parent_rel, int disabled_nodes,
-                                                 Cost total_cost, List *pathkeys)
+                                                 Cost startup_cost, Cost total_cost, List *pathkeys)
  {
+       bool            consider_startup = parent_rel->consider_startup;
         ListCell   *p1;
  
         /*
@@ -934,25 +922,81 @@ add_partial_path_precheck(RelOptInfo *parent_rel, int disabled_nodes,
          * is clearly superior to some existing partial path -- at least, modulo
          * final cost computations.  If so, we definitely want to consider it.
          *
-        * Unlike add_path(), we always compare pathkeys here.  This is because we
-        * expect partial_pathlist to be very short, and getting a definitive
-        * answer at this stage avoids the need to call add_path_precheck.
+        * Unlike add_path(), we never try to exit this loop early. This is
+        * because we expect partial_pathlist to be very short, and getting a
+        * definitive answer at this stage avoids the need to call
+        * add_path_precheck.
          */
         foreach(p1, parent_rel->partial_pathlist)
         {
                 Path       *old_path = (Path *) lfirst(p1);
+               PathCostComparison costcmp;
                 PathKeysComparison keyscmp;
  
-               keyscmp = compare_pathkeys(pathkeys, old_path->pathkeys);
-               if (keyscmp != PATHKEYS_DIFFERENT)
+               /*
+                * First, compare costs and disabled nodes. This logic should be
+                * identical to compare_path_costs_fuzzily, except that one of the
+                * paths hasn't been created yet, and the fuzz factor is always
+                * STD_FUZZ_FACTOR.
+                */
+               if (unlikely(old_path->disabled_nodes != disabled_nodes))
+               {
+                       if (disabled_nodes < old_path->disabled_nodes)
+                               costcmp = COSTS_BETTER1;
+                       else
+                               costcmp = COSTS_BETTER2;
+               }
+               else if (total_cost > old_path->total_cost * STD_FUZZ_FACTOR)
                 {
-                       if (total_cost > old_path->total_cost * STD_FUZZ_FACTOR &&
-                               keyscmp != PATHKEYS_BETTER1)
-                               return false;
-                       if (old_path->total_cost > total_cost * STD_FUZZ_FACTOR &&
-                               keyscmp != PATHKEYS_BETTER2)
-                               return true;
+                       if (consider_startup &&
+                               old_path->startup_cost > startup_cost * STD_FUZZ_FACTOR)
+                               costcmp = COSTS_DIFFERENT;
+                       else
+                               costcmp = COSTS_BETTER2;
+               }
+               else if (old_path->total_cost > total_cost * STD_FUZZ_FACTOR)
+               {
+                       if (consider_startup &&
+                               startup_cost > old_path->startup_cost * STD_FUZZ_FACTOR)
+                               costcmp = COSTS_DIFFERENT;
+                       else
+                               costcmp = COSTS_BETTER1;
                 }
+               else if (startup_cost > old_path->startup_cost * STD_FUZZ_FACTOR)
+                       costcmp = COSTS_BETTER2;
+               else if (old_path->startup_cost > startup_cost * STD_FUZZ_FACTOR)
+                       costcmp = COSTS_BETTER1;
+               else
+                       costcmp = COSTS_EQUAL;
+
+               /*
+                * If one path wins on startup cost and the other on total cost, we
+                * can't say for sure which is better.
+                */
+               if (costcmp == COSTS_DIFFERENT)
+                       continue;
+
+               /*
+                * If the two paths have different pathkeys, we can't say for sure
+                * which is better.
+                */
+               keyscmp = compare_pathkeys(pathkeys, old_path->pathkeys);
+               if (keyscmp == PATHKEYS_DIFFERENT)
+                       continue;
+
+               /*
+                * If the existing path is cheaper and the pathkeys are equal or
+                * worse, the new path is not interesting.
+                */
+               if (costcmp == COSTS_BETTER2 && keyscmp != PATHKEYS_BETTER1)
+                       return false;
+
+               /*
+                * If the new path is cheaper and the pathkeys are equal or better, it
+                * is definitely interesting.
+                */
+               if (costcmp == COSTS_BETTER1 && keyscmp != PATHKEYS_BETTER2)
+                       return true;
         }
  
         /*
@@ -960,14 +1004,9 @@ add_partial_path_precheck(RelOptInfo *parent_rel, int disabled_nodes,
          * clearly good enough that it might replace one.  Compare it to
          * non-parallel plans.  If it loses even before accounting for the cost of
          * the Gather node, we should definitely reject it.
-        *
-        * Note that we pass the total_cost to add_path_precheck twice.  This is
-        * because it's never advantageous to consider the startup cost of a
-        * partial path; the resulting plans, if run in parallel, will be run to
-        * completion.
          */
-       if (!add_path_precheck(parent_rel, disabled_nodes, total_cost, total_cost,
-                                                  pathkeys, NULL))
+       if (!add_path_precheck(parent_rel, disabled_nodes, startup_cost,
+                                                  total_cost, pathkeys, NULL))
                 return false;
  
         return true;
diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h

index cf8a654fa53681dc621f99c1ed3993f97b4dbcf4..938510400cc537412ac4eb24da91a9f7b24b251a 100644 (file)
--- a/src/include/optimizer/pathnode.h
+++ b/src/include/optimizer/pathnode.h
@@ -55,7 +55,7 @@ extern bool add_path_precheck(RelOptInfo *parent_rel, int disabled_nodes,
                                                           List *pathkeys, Relids required_outer);
  extern void add_partial_path(RelOptInfo *parent_rel, Path *new_path);
  extern bool add_partial_path_precheck(RelOptInfo *parent_rel,
-                                                                         int disabled_nodes,
+                                                                         int disabled_nodes, Cost startup_cost,
                                                                           Cost total_cost, List *pathkeys);
  
  extern Path *create_seqscan_path(PlannerInfo *root, RelOptInfo *rel,
diff --git a/src/test/regress/expected/incremental_sort.out b/src/test/regress/expected/incremental_sort.out

index fdec5b9ba52a951378aa6dde53bae257d5aa237d..1e6e020fea836264feaa61a91d729f03b26e74bd 100644 (file)
--- a/src/test/regress/expected/incremental_sort.out
+++ b/src/test/regress/expected/incremental_sort.out
@@ -1450,21 +1450,23 @@ explain (costs off) select a,b,sum(c) from t group by 1,2 order by 1,2,3 limit 1
  
  set enable_incremental_sort = on;
  explain (costs off) select a,b,sum(c) from t group by 1,2 order by 1,2,3 limit 1;
-                              QUERY PLAN                              
-----------------------------------------------------------------------
+                                 QUERY PLAN                                 
+----------------------------------------------------------------------------
   Limit
     ->  Incremental Sort
           Sort Key: a, b, (sum(c))
           Presorted Key: a, b
-         ->  GroupAggregate
+         ->  Finalize GroupAggregate
                 Group Key: a, b
                 ->  Gather Merge
                       Workers Planned: 2
-                     ->  Incremental Sort
-                           Sort Key: a, b
-                           Presorted Key: a
-                           ->  Parallel Index Scan using t_a_idx on t
-(12 rows)
+                     ->  Partial GroupAggregate
+                           Group Key: a, b
+                           ->  Incremental Sort
+                                 Sort Key: a, b
+                                 Presorted Key: a
+                                 ->  Parallel Index Scan using t_a_idx on t
+(14 rows)
  
  -- Incremental sort vs. set operations with varno 0
  set enable_hashagg to off;
@@ -1580,8 +1582,8 @@ from tenk1 t1
  join tenk1 t2 on t1.unique1 = t2.unique2
  join tenk1 t3 on t2.unique1 = t3.unique1
  order by count(*);
-                                          QUERY PLAN                                           
------------------------------------------------------------------------------------------------
+                                             QUERY PLAN                                             
+----------------------------------------------------------------------------------------------------
   Sort
     Sort Key: (count(*))
     ->  Finalize Aggregate
@@ -1591,10 +1593,10 @@ order by count(*);
                       ->  Parallel Hash Join
                             Hash Cond: (t2.unique1 = t3.unique1)
                             ->  Parallel Hash Join
-                                 Hash Cond: (t1.unique1 = t2.unique2)
-                                 ->  Parallel Index Only Scan using tenk1_unique1 on tenk1 t1
+                                 Hash Cond: (t2.unique2 = t1.unique1)
+                                 ->  Parallel Index Scan using tenk1_unique2 on tenk1 t2
                                   ->  Parallel Hash
-                                       ->  Parallel Index Scan using tenk1_unique2 on tenk1 t2
+                                       ->  Parallel Index Only Scan using tenk1_unique1 on tenk1 t1
                             ->  Parallel Hash
                                   ->  Parallel Index Only Scan using tenk1_unique1 on tenk1 t3
  (15 rows)
diff --git a/src/test/regress/expected/join_hash.out b/src/test/regress/expected/join_hash.out

index 4749f6ed70d5f45ad3d7f879ef70aba45ff79009..bc7cc76467ffaee0931436fdbff65c2421537b8b 100644 (file)
--- a/src/test/regress/expected/join_hash.out
+++ b/src/test/regress/expected/join_hash.out
@@ -76,8 +76,8 @@ insert into extremely_skewed
  update pg_class
    set reltuples = 2, relpages = pg_relation_size('extremely_skewed') / 8192
    where relname = 'extremely_skewed';
--- Make a relation with a couple of enormous tuples.
-create table wide as select generate_series(1, 2) as id, rpad('', 320000, 'x') as t;
+-- Make a relation with several enormous tuples.
+create table wide as select generate_series(1, 3) as id, rpad('', 320000, 'x') as t;
  alter table wide set (parallel_workers = 2);
  -- The "optimal" case: the hash table fits in memory; we plan for 1
  -- batch, we stick to that number, and peak memory usage stays within
@@ -922,7 +922,7 @@ set work_mem = '128kB';
  set hash_mem_multiplier = 1.0;
  explain (costs off)
    select length(max(s.t))
-  from wide left join (select id, coalesce(t, '') || '' as t from wide) s using (id);
+  from wide left join (select id, coalesce(t, '') || '' as t from wide where id < 3) s using (id);
                             QUERY PLAN                           
  ----------------------------------------------------------------
   Finalize Aggregate
@@ -934,10 +934,11 @@ explain (costs off)
                       ->  Parallel Seq Scan on wide
                       ->  Parallel Hash
                             ->  Parallel Seq Scan on wide wide_1
-(9 rows)
+                                 Filter: (id < 3)
+(10 rows)
  
  select length(max(s.t))
-from wide left join (select id, coalesce(t, '') || '' as t from wide) s using (id);
+from wide left join (select id, coalesce(t, '') || '' as t from wide where id < 3) s using (id);
   length 
  --------
   320000
@@ -947,7 +948,7 @@ select final > 1 as multibatch
    from hash_join_batches(
  $$
    select length(max(s.t))
-  from wide left join (select id, coalesce(t, '') || '' as t from wide) s using (id);
+  from wide left join (select id, coalesce(t, '') || '' as t from wide where id < 3) s using (id);
  $$);
   multibatch 
  ------------
diff --git a/src/test/regress/sql/join_hash.sql b/src/test/regress/sql/join_hash.sql

index 49d3fd61856294cf82d87fe01ad9578a441ea275..53db1754bb261e4903dfb99b40457fae04bf89e5 100644 (file)
--- a/src/test/regress/sql/join_hash.sql
+++ b/src/test/regress/sql/join_hash.sql
@@ -83,8 +83,8 @@ update pg_class
    set reltuples = 2, relpages = pg_relation_size('extremely_skewed') / 8192
    where relname = 'extremely_skewed';
  
--- Make a relation with a couple of enormous tuples.
-create table wide as select generate_series(1, 2) as id, rpad('', 320000, 'x') as t;
+-- Make a relation with several enormous tuples.
+create table wide as select generate_series(1, 3) as id, rpad('', 320000, 'x') as t;
  alter table wide set (parallel_workers = 2);
  
  -- The "optimal" case: the hash table fits in memory; we plan for 1
@@ -496,14 +496,14 @@ set work_mem = '128kB';
  set hash_mem_multiplier = 1.0;
  explain (costs off)
    select length(max(s.t))
-  from wide left join (select id, coalesce(t, '') || '' as t from wide) s using (id);
+  from wide left join (select id, coalesce(t, '') || '' as t from wide where id < 3) s using (id);
  select length(max(s.t))
-from wide left join (select id, coalesce(t, '') || '' as t from wide) s using (id);
+from wide left join (select id, coalesce(t, '') || '' as t from wide where id < 3) s using (id);
  select final > 1 as multibatch
    from hash_join_batches(
  $$
    select length(max(s.t))
-  from wide left join (select id, coalesce(t, '') || '' as t from wide) s using (id);
+  from wide left join (select id, coalesce(t, '') || '' as t from wide where id < 3) s using (id);
  $$);
  rollback to settings;
author	Robert Haas <rhaas@postgresql.org>
	Mon, 9 Mar 2026 12:16:30 +0000 (08:16 -0400)
committer	Robert Haas <rhaas@postgresql.org>
	Mon, 9 Mar 2026 12:16:30 +0000 (08:16 -0400)
src/backend/optimizer/path/joinpath.c		patch \| blob \| blame \| history
src/backend/optimizer/util/pathnode.c		patch \| blob \| blame \| history
src/include/optimizer/pathnode.h		patch \| blob \| blame \| history
src/test/regress/expected/incremental_sort.out		patch \| blob \| blame \| history
src/test/regress/expected/join_hash.out		patch \| blob \| blame \| history
src/test/regress/sql/join_hash.sql		patch \| blob \| blame \| history