]> git.ipfire.org Git - thirdparty/postgresql.git/commitdiff
Teach Append to consider tuple_fraction when accumulating subpaths.
authorAlexander Korotkov <akorotkov@postgresql.org>
Mon, 10 Mar 2025 11:38:39 +0000 (13:38 +0200)
committerAlexander Korotkov <akorotkov@postgresql.org>
Mon, 10 Mar 2025 11:38:39 +0000 (13:38 +0200)
This change is dedicated to more active usage of IndexScan and parameterized
NestLoop paths in partitioned cases under an Append node, as it already works
with plain tables.  As newly added regression tests demonstrate, it should
provide more smartness to the partitionwise technique.

With an indication of how many tuples are needed, it may be more meaningful
to use the 'fractional branch' subpaths of the Append path list, which are
more optimal for this specific number of tuples.  Planning on a higher level,
if the optimizer needs all the tuples, it will choose non-fractional paths.
In the case when, during execution, Append needs to return fewer tuples than
declared by tuple_fraction, it would not be harmful to use the 'intermediate'
variant of paths.  However, it will earn a considerable profit if a sensible
set of tuples is selected.

The change of the existing regression test demonstrates the positive outcome
of this feature: instead of scanning the whole table, the optimizer prefers
to use a parameterized scan, being aware of the only single tuple the join
has to produce to perform the query.

Discussion: https://www.postgresql.org/message-id/flat/CAN-LCVPxnWB39CUBTgOQ9O7Dd8DrA_tpT1EY3LNVnUuvAX1NjA%40mail.gmail.com
Author: Nikita Malakhov <hukutoc@gmail.com>
Author: Andrei Lepikhov <lepihov@gmail.com>
Reviewed-by: Andy Fan <zhihuifan1213@163.com>
Reviewed-by: Alexander Korotkov <aekorotkov@gmail.com>
src/backend/optimizer/path/allpaths.c
src/backend/optimizer/plan/planner.c
src/test/regress/expected/partition_join.out
src/test/regress/expected/union.out
src/test/regress/sql/partition_join.sql

index b5bc9b602e21e0595fac8947964b08f1a66d87ec..df3453f99f0fb12cbf1ffa98b8a3ba3793ef24e9 100644 (file)
@@ -1371,9 +1371,23 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel,
                 */
                if (rel->consider_startup && childrel->cheapest_startup_path != NULL)
                {
+                       Path       *cheapest_path;
+
+                       /*
+                        * With an indication of how many tuples the query should provide,
+                        * the optimizer tries to choose the path optimal for that
+                        * specific number of tuples.
+                        */
+                       if (root->tuple_fraction > 0.0)
+                               cheapest_path =
+                                       get_cheapest_fractional_path(childrel,
+                                                                                                root->tuple_fraction);
+                       else
+                               cheapest_path = childrel->cheapest_startup_path;
+
                        /* cheapest_startup_path must not be a parameterized path. */
-                       Assert(childrel->cheapest_startup_path->param_info == NULL);
-                       accumulate_append_subpath(childrel->cheapest_startup_path,
+                       Assert(cheapest_path->param_info == NULL);
+                       accumulate_append_subpath(cheapest_path,
                                                                          &startup_subpaths,
                                                                          NULL);
                }
index 36ee6dd43de99da7fc7154e02cfa63893a3799ca..014e80c30e6abfc20d7ee05c4dd2a15e6d3c9414 100644 (file)
@@ -6414,6 +6414,11 @@ make_sort_input_target(PlannerInfo *root,
  *       Find the cheapest path for retrieving a specified fraction of all
  *       the tuples expected to be returned by the given relation.
  *
+ * Do not consider parameterized paths.  If the caller needs a path for upper
+ * rel, it can't have parameterized paths.  If the caller needs an append
+ * subpath, it could become limited by the treatment of similar
+ * parameterization of all the subpaths.
+ *
  * We interpret tuple_fraction the same way as grouping_planner.
  *
  * We assume set_cheapest() has been run on the given rel.
@@ -6436,6 +6441,9 @@ get_cheapest_fractional_path(RelOptInfo *rel, double tuple_fraction)
        {
                Path       *path = (Path *) lfirst(l);
 
+               if (path->param_info)
+                       continue;
+
                if (path == rel->cheapest_total_path ||
                        compare_fractional_path_costs(best_path, path, tuple_fraction) <= 0)
                        continue;
index 34b963ce6fece9a6714bdf3a6d1c97151eb8f30b..938cedd79adeb1d74eec1dd9e807d5af65321a6e 100644 (file)
@@ -5260,6 +5260,122 @@ SELECT x.id, y.id FROM fract_t x LEFT JOIN fract_t y USING (id) ORDER BY x.id DE
                      Index Cond: (id = x_2.id)
 (11 rows)
 
+--
+-- Test Append's fractional paths
+--
+CREATE INDEX pht1_c_idx ON pht1(c);
+-- SeqScan might be the best choice if we need one single tuple
+EXPLAIN (COSTS OFF) SELECT * FROM pht1 p1 JOIN pht1 p2 USING (c) LIMIT 1;
+                    QUERY PLAN                    
+--------------------------------------------------
+ Limit
+   ->  Append
+         ->  Nested Loop
+               Join Filter: (p1_1.c = p2_1.c)
+               ->  Seq Scan on pht1_p1 p1_1
+               ->  Materialize
+                     ->  Seq Scan on pht1_p1 p2_1
+         ->  Nested Loop
+               Join Filter: (p1_2.c = p2_2.c)
+               ->  Seq Scan on pht1_p2 p1_2
+               ->  Materialize
+                     ->  Seq Scan on pht1_p2 p2_2
+         ->  Nested Loop
+               Join Filter: (p1_3.c = p2_3.c)
+               ->  Seq Scan on pht1_p3 p1_3
+               ->  Materialize
+                     ->  Seq Scan on pht1_p3 p2_3
+(17 rows)
+
+-- Increase number of tuples requested and an IndexScan will be chosen
+EXPLAIN (COSTS OFF) SELECT * FROM pht1 p1 JOIN pht1 p2 USING (c) LIMIT 100;
+                               QUERY PLAN                               
+------------------------------------------------------------------------
+ Limit
+   ->  Append
+         ->  Nested Loop
+               ->  Seq Scan on pht1_p1 p1_1
+               ->  Memoize
+                     Cache Key: p1_1.c
+                     Cache Mode: logical
+                     ->  Index Scan using pht1_p1_c_idx on pht1_p1 p2_1
+                           Index Cond: (c = p1_1.c)
+         ->  Nested Loop
+               ->  Seq Scan on pht1_p2 p1_2
+               ->  Memoize
+                     Cache Key: p1_2.c
+                     Cache Mode: logical
+                     ->  Index Scan using pht1_p2_c_idx on pht1_p2 p2_2
+                           Index Cond: (c = p1_2.c)
+         ->  Nested Loop
+               ->  Seq Scan on pht1_p3 p1_3
+               ->  Memoize
+                     Cache Key: p1_3.c
+                     Cache Mode: logical
+                     ->  Index Scan using pht1_p3_c_idx on pht1_p3 p2_3
+                           Index Cond: (c = p1_3.c)
+(23 rows)
+
+-- If almost all the data should be fetched - prefer SeqScan
+EXPLAIN (COSTS OFF) SELECT * FROM pht1 p1 JOIN pht1 p2 USING (c) LIMIT 1000;
+                    QUERY PLAN                    
+--------------------------------------------------
+ Limit
+   ->  Append
+         ->  Hash Join
+               Hash Cond: (p1_1.c = p2_1.c)
+               ->  Seq Scan on pht1_p1 p1_1
+               ->  Hash
+                     ->  Seq Scan on pht1_p1 p2_1
+         ->  Hash Join
+               Hash Cond: (p1_2.c = p2_2.c)
+               ->  Seq Scan on pht1_p2 p1_2
+               ->  Hash
+                     ->  Seq Scan on pht1_p2 p2_2
+         ->  Hash Join
+               Hash Cond: (p1_3.c = p2_3.c)
+               ->  Seq Scan on pht1_p3 p1_3
+               ->  Hash
+                     ->  Seq Scan on pht1_p3 p2_3
+(17 rows)
+
+SET max_parallel_workers_per_gather = 1;
+SET debug_parallel_query = on;
+-- Partial paths should also be smart enough to employ limits
+EXPLAIN (COSTS OFF) SELECT * FROM pht1 p1 JOIN pht1 p2 USING (c) LIMIT 100;
+                                  QUERY PLAN                                  
+------------------------------------------------------------------------------
+ Gather
+   Workers Planned: 1
+   Single Copy: true
+   ->  Limit
+         ->  Append
+               ->  Nested Loop
+                     ->  Seq Scan on pht1_p1 p1_1
+                     ->  Memoize
+                           Cache Key: p1_1.c
+                           Cache Mode: logical
+                           ->  Index Scan using pht1_p1_c_idx on pht1_p1 p2_1
+                                 Index Cond: (c = p1_1.c)
+               ->  Nested Loop
+                     ->  Seq Scan on pht1_p2 p1_2
+                     ->  Memoize
+                           Cache Key: p1_2.c
+                           Cache Mode: logical
+                           ->  Index Scan using pht1_p2_c_idx on pht1_p2 p2_2
+                                 Index Cond: (c = p1_2.c)
+               ->  Nested Loop
+                     ->  Seq Scan on pht1_p3 p1_3
+                     ->  Memoize
+                           Cache Key: p1_3.c
+                           Cache Mode: logical
+                           ->  Index Scan using pht1_p3_c_idx on pht1_p3 p2_3
+                                 Index Cond: (c = p1_3.c)
+(26 rows)
+
+RESET debug_parallel_query;
+-- Remove indexes from the partitioned table and its partitions
+DROP INDEX pht1_c_idx CASCADE;
 -- cleanup
 DROP TABLE fract_t;
 RESET max_parallel_workers_per_gather;
index caa8fe70a0586d1d41cc86092793ee3bb55cbc1a..96962817ed45adc4e97c5e6880b514c15b7d5348 100644 (file)
@@ -1472,18 +1472,17 @@ select t1.unique1 from tenk1 t1
 inner join tenk2 t2 on t1.tenthous = t2.tenthous and t2.thousand = 0
    union all
 (values(1)) limit 1;
-                       QUERY PLAN                       
---------------------------------------------------------
+                             QUERY PLAN                              
+---------------------------------------------------------------------
  Limit
    ->  Append
          ->  Nested Loop
-               Join Filter: (t1.tenthous = t2.tenthous)
-               ->  Seq Scan on tenk1 t1
-               ->  Materialize
-                     ->  Seq Scan on tenk2 t2
-                           Filter: (thousand = 0)
+               ->  Seq Scan on tenk2 t2
+                     Filter: (thousand = 0)
+               ->  Index Scan using tenk1_thous_tenthous on tenk1 t1
+                     Index Cond: (tenthous = t2.tenthous)
          ->  Result
-(9 rows)
+(8 rows)
 
 -- Ensure there is no problem if cheapest_startup_path is NULL
 explain (costs off)
index 26b8e3d063ff3677dcda72eb0ba06db793676608..b76c545100126ff1554ae2b4f6d10139dd4d4011 100644 (file)
@@ -1225,6 +1225,27 @@ SELECT x.id, y.id FROM fract_t x LEFT JOIN fract_t y USING (id) ORDER BY x.id AS
 EXPLAIN (COSTS OFF)
 SELECT x.id, y.id FROM fract_t x LEFT JOIN fract_t y USING (id) ORDER BY x.id DESC LIMIT 10;
 
+--
+-- Test Append's fractional paths
+--
+
+CREATE INDEX pht1_c_idx ON pht1(c);
+-- SeqScan might be the best choice if we need one single tuple
+EXPLAIN (COSTS OFF) SELECT * FROM pht1 p1 JOIN pht1 p2 USING (c) LIMIT 1;
+-- Increase number of tuples requested and an IndexScan will be chosen
+EXPLAIN (COSTS OFF) SELECT * FROM pht1 p1 JOIN pht1 p2 USING (c) LIMIT 100;
+-- If almost all the data should be fetched - prefer SeqScan
+EXPLAIN (COSTS OFF) SELECT * FROM pht1 p1 JOIN pht1 p2 USING (c) LIMIT 1000;
+
+SET max_parallel_workers_per_gather = 1;
+SET debug_parallel_query = on;
+-- Partial paths should also be smart enough to employ limits
+EXPLAIN (COSTS OFF) SELECT * FROM pht1 p1 JOIN pht1 p2 USING (c) LIMIT 100;
+RESET debug_parallel_query;
+
+-- Remove indexes from the partitioned table and its partitions
+DROP INDEX pht1_c_idx CASCADE;
+
 -- cleanup
 DROP TABLE fract_t;