]> git.ipfire.org Git - thirdparty/postgresql.git/commitdiff
pg_plan_advice: Handle non-repeatable TABLESAMPLE scans.
authorRobert Haas <rhaas@postgresql.org>
Mon, 13 Apr 2026 12:46:25 +0000 (08:46 -0400)
committerRobert Haas <rhaas@postgresql.org>
Mon, 13 Apr 2026 12:46:25 +0000 (08:46 -0400)
When a tablesample routine says that it is not repeatable across
scans, set_tablesample_rel_pathlist will (usually) materialize it,
confusing pg_plan_advice's plan walker machinery. To fix, update that
machinery to view such Material paths as essentially an extension of
the underlying scan.

Reported-by: Alexander Lakhin <exclusion@gmail.com>
Discussion: http://postgr.es/m/CA+TgmobOOmmXSJz3e+cjTY-bA1+W0dqVDqzxUBEvGtW62whYGg@mail.gmail.com

contrib/pg_plan_advice/Makefile
contrib/pg_plan_advice/expected/scan.out
contrib/pg_plan_advice/pgpa_join.c
contrib/pg_plan_advice/pgpa_scan.c
contrib/pg_plan_advice/pgpa_walker.c
contrib/pg_plan_advice/pgpa_walker.h
contrib/pg_plan_advice/sql/scan.sql

index cd478dc1a6d7c90d4f3f8351fc549fdc4e9fabb4..cbafc50ca4cd9301f0df184be86170880cda8272 100644 (file)
@@ -22,6 +22,8 @@ PGFILEDESC = "pg_plan_advice - help the planner get the right plan"
 REGRESS = gather join_order join_strategy partitionwise prepared \
        scan semijoin syntax
 
+EXTRA_INSTALL = contrib/tsm_system_time
+
 EXTRA_CLEAN = pgpa_parser.h pgpa_parser.c pgpa_scanner.c
 
 ifdef USE_PGXS
index 44ce40f33a6f5d933aa852b680843de37f24f17f..f4036e4cbdd05cdb2440fe05496c791b61c8b8d2 100644 (file)
@@ -770,3 +770,41 @@ SELECT * FROM (SELECT * FROM scan_table s WHERE a = 1 OFFSET 0);
 (7 rows)
 
 COMMIT;
+-- Test a non-repeatable tablesample method with a scan-level Materialize.
+CREATE EXTENSION tsm_system_time;
+CREATE TABLE scan_tsm (i int);
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT 1 FROM (SELECT i FROM scan_tsm TABLESAMPLE system_time (1000)),
+       LATERAL (SELECT i LIMIT 1);
+                              QUERY PLAN                              
+----------------------------------------------------------------------
+ Nested Loop
+   ->  Materialize
+         ->  Sample Scan on scan_tsm
+               Sampling: system_time ('1000'::double precision)
+   ->  Limit
+         ->  Result
+ Generated Plan Advice:
+   JOIN_ORDER(scan_tsm unnamed_subquery#2)
+   NESTED_LOOP_PLAIN(unnamed_subquery#2)
+   NO_GATHER(unnamed_subquery#2 scan_tsm "*RESULT*"@unnamed_subquery)
+(10 rows)
+
+-- Same, but with the scan-level Materialize on the inner side of a join.
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT 1 FROM (SELECT 1 AS x LIMIT 1),
+       LATERAL (SELECT x FROM scan_tsm TABLESAMPLE system_time (1000));
+                             QUERY PLAN                             
+--------------------------------------------------------------------
+ Nested Loop
+   ->  Limit
+         ->  Result
+   ->  Materialize
+         ->  Sample Scan on scan_tsm
+               Sampling: system_time ('1000'::double precision)
+ Generated Plan Advice:
+   JOIN_ORDER(unnamed_subquery scan_tsm)
+   NESTED_LOOP_PLAIN(scan_tsm)
+   NO_GATHER(unnamed_subquery scan_tsm "*RESULT*"@unnamed_subquery)
+(10 rows)
+
index 4610d02356f539cc1522f2578c8fc18deee438e8..38e7b91ed7eb876d85208bfad2e22a8c788c2643 100644 (file)
@@ -363,9 +363,11 @@ pgpa_decompose_join(pgpa_plan_walker_context *walker, Plan *plan,
                        /*
                         * The planner may have chosen to place a Material node on the
                         * inner side of the MergeJoin; if this is present, we record it
-                        * as part of the join strategy.
+                        * as part of the join strategy. (However, scan-level Materialize
+                        * nodes are an exception.)
                         */
-                       if (elidedinner == NULL && IsA(innerplan, Material))
+                       if (elidedinner == NULL && IsA(innerplan, Material) &&
+                               !pgpa_is_scan_level_materialize(innerplan))
                        {
                                elidedinner = pgpa_descend_node(pstmt, &innerplan);
                                strategy = JSTRAT_MERGE_JOIN_MATERIALIZE;
@@ -390,9 +392,11 @@ pgpa_decompose_join(pgpa_plan_walker_context *walker, Plan *plan,
                        /*
                         * The planner may have chosen to place a Material or Memoize node
                         * on the inner side of the NestLoop; if this is present, we
-                        * record it as part of the join strategy.
+                        * record it as part of the join strategy. (However, scan-level
+                        * Materialize nodes are an exception.)
                         */
-                       if (elidedinner == NULL && IsA(innerplan, Material))
+                       if (elidedinner == NULL && IsA(innerplan, Material) &&
+                               !pgpa_is_scan_level_materialize(innerplan))
                        {
                                elidedinner = pgpa_descend_node(pstmt, &innerplan);
                                strategy = JSTRAT_NESTED_LOOP_MATERIALIZE;
index 0467f9b12ba87b38ca4e4ce2541423521bf640a7..21b58a0ac42745453029c1c8cb6a8b65e7cfed3d 100644 (file)
@@ -120,6 +120,17 @@ pgpa_build_scan(pgpa_plan_walker_context *walker, Plan *plan,
                                break;
                }
        }
+       else if (pgpa_is_scan_level_materialize(plan))
+       {
+               /*
+                * Non-repeatable tablesample methods can be wrapped in a Materialize
+                * node that must be treated as part of the scan itself. See
+                * set_tablesample_rel_pathlist().
+                */
+               rti = pgpa_scanrelid(plan->lefttree);
+               relids = bms_make_singleton(rti);
+               strategy = PGPA_SCAN_ORDINARY;
+       }
        else if ((relids = pgpa_relids(plan)) != NULL)
        {
                switch (nodeTag(plan))
index e32684d20750a6b777bbcc27526395c28d7086f4..e49361ae2661a67685cd7c54646e8e65488e5e5d 100644 (file)
@@ -16,6 +16,7 @@
 #include "pgpa_scan.h"
 #include "pgpa_walker.h"
 
+#include "access/tsmapi.h"
 #include "nodes/plannodes.h"
 #include "parser/parsetree.h"
 #include "utils/lsyscache.h"
@@ -609,6 +610,34 @@ pgpa_scanrelid(Plan *plan)
        }
 }
 
+/*
+ * Check whether a plan node is a Material node that should be treated as
+ * a scan. Currently, this only happens when set_tablesample_rel_pathlist
+ * inserts a Material node to protect a SampleScan that uses a non-repeatable
+ * tablesample method.
+ *
+ * (Most Material nodes we're likely to encounter are actually part of the
+ * join strategy: nested loops and merge joins can choose to materialize the
+ * inner sides of the join. The cases identified here are the rare
+ * exceptions.)
+ */
+bool
+pgpa_is_scan_level_materialize(Plan *plan)
+{
+       Plan       *child;
+       SampleScan *sscan;
+       TsmRoutine *tsm;
+
+       if (!IsA(plan, Material))
+               return false;
+       child = plan->lefttree;
+       if (child == NULL || !IsA(child, SampleScan))
+               return false;
+       sscan = (SampleScan *) child;
+       tsm = GetTsmRoutine(sscan->tablesample->tsmhandler);
+       return !tsm->repeatable_across_scans;
+}
+
 /*
  * Construct a new Bitmapset containing non-RTE_JOIN members of 'relids'.
  */
index 47667c0337464aa0fb654da031da0b8e71006b4d..ebe850622d3f35338ab3282b250ca6cb7a8047ec 100644 (file)
@@ -114,6 +114,7 @@ extern void pgpa_add_future_feature(pgpa_plan_walker_context *walker,
 extern ElidedNode *pgpa_last_elided_node(PlannedStmt *pstmt, Plan *plan);
 extern Bitmapset *pgpa_relids(Plan *plan);
 extern Index pgpa_scanrelid(Plan *plan);
+extern bool pgpa_is_scan_level_materialize(Plan *plan);
 extern Bitmapset *pgpa_filter_out_join_relids(Bitmapset *relids, List *rtable);
 
 extern bool pgpa_walker_would_advise(pgpa_plan_walker_context *walker,
index 800ff7a462237b37e00866e4dba7cf6c1a340bf9..98bee88de913c83e5a93caaed2a97db37c0c82bd 100644 (file)
@@ -196,3 +196,15 @@ SELECT * FROM (SELECT * FROM scan_table s WHERE a = 1 OFFSET 0) x;
 EXPLAIN (COSTS OFF, PLAN_ADVICE)
 SELECT * FROM (SELECT * FROM scan_table s WHERE a = 1 OFFSET 0);
 COMMIT;
+
+-- Test a non-repeatable tablesample method with a scan-level Materialize.
+CREATE EXTENSION tsm_system_time;
+CREATE TABLE scan_tsm (i int);
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT 1 FROM (SELECT i FROM scan_tsm TABLESAMPLE system_time (1000)),
+       LATERAL (SELECT i LIMIT 1);
+
+-- Same, but with the scan-level Materialize on the inner side of a join.
+EXPLAIN (COSTS OFF, PLAN_ADVICE)
+SELECT 1 FROM (SELECT 1 AS x LIMIT 1),
+       LATERAL (SELECT x FROM scan_tsm TABLESAMPLE system_time (1000));