]> git.ipfire.org Git - thirdparty/postgresql.git/commitdiff
pg_plan_advice: Refactor to invent pgpa_planner_info
authorRobert Haas <rhaas@postgresql.org>
Thu, 26 Mar 2026 15:57:33 +0000 (11:57 -0400)
committerRobert Haas <rhaas@postgresql.org>
Thu, 26 Mar 2026 15:57:33 +0000 (11:57 -0400)
pg_plan_advice tracks two pieces of per-PlannerInfo data: (1) for each
RTI, the corresponding relation identifier, for purposes of
cross-checking those calculations against the final plan; and (2) the
set of semijoins seen during planning for which the strategy of making
one side unique was considered. The former is tracked using a hash
table that uses <plan_name, RTI> as the key, and the latter is
tracked using a List of <plan_name, relids>.

It seems better to track both of these things in the same way and
to try to reuse some code instead of having everything be completely
separate, so invent pgpa_planner_info; we'll create one every time we
see a new PlannerInfo and need to associate some data with it, and
we'll use the plan_name field to distinguish between PlannerInfo
objects, as it should always be unique. Then, refactor the two
systems mentioned above to use this new infrastructure.

(Note that the adjustment in pgpa_plan_walker is necessary in order
to avoid spuriously triggering the sanity check in that function,
in the case where a pgpa_planner_info is created for a purpose not
related to sj_unique_rels.)

Discussion: https://postgr.es/m/CA+TgmoaK=4w7-qknUo3QhUJ53pXZq=c=KgZmRyD+k7ytqfmgSg@mail.gmail.com
Reviewed-by: Lukas Fittl <lukas@fittl.com>
contrib/pg_plan_advice/pgpa_planner.c
contrib/pg_plan_advice/pgpa_planner.h
contrib/pg_plan_advice/pgpa_walker.c
contrib/pg_plan_advice/pgpa_walker.h
src/tools/pgindent/typedefs.list

index fee88904760c6e4783eccdc7c7608cf706d866a9..6b574da655f1d55aee5d67d01932969ca5f1ecbe 100644 (file)
 #include "parser/parsetree.h"
 #include "utils/lsyscache.h"
 
-#ifdef USE_ASSERT_CHECKING
-
-/*
- * When assertions are enabled, we try generating relation identifiers during
- * planning, saving them in a hash table, and then cross-checking them against
- * the ones generated after planning is complete.
- */
-typedef struct pgpa_ri_checker_key
-{
-       char       *plan_name;
-       Index           rti;
-} pgpa_ri_checker_key;
-
-typedef struct pgpa_ri_checker
-{
-       pgpa_ri_checker_key key;
-       uint32          status;
-       const char *rid_string;
-} pgpa_ri_checker;
-
-static uint32 pgpa_ri_checker_hash_key(pgpa_ri_checker_key key);
-
-static inline bool
-pgpa_ri_checker_compare_key(pgpa_ri_checker_key a, pgpa_ri_checker_key b)
-{
-       if (a.rti != b.rti)
-               return false;
-       if (a.plan_name == NULL)
-               return (b.plan_name == NULL);
-       if (b.plan_name == NULL)
-               return false;
-       return strcmp(a.plan_name, b.plan_name) == 0;
-}
-
-#define SH_PREFIX                      pgpa_ri_check
-#define SH_ELEMENT_TYPE                pgpa_ri_checker
-#define SH_KEY_TYPE                    pgpa_ri_checker_key
-#define SH_KEY                         key
-#define SH_HASH_KEY(tb, key)   pgpa_ri_checker_hash_key(key)
-#define        SH_EQUAL(tb, a, b)      pgpa_ri_checker_compare_key(a, b)
-#define SH_SCOPE                       static inline
-#define SH_DECLARE
-#define SH_DEFINE
-#include "lib/simplehash.h"
-
-#endif
-
 typedef enum pgpa_jo_outcome
 {
        PGPA_JO_PERMITTED,                      /* permit this join order */
@@ -94,11 +47,8 @@ typedef struct pgpa_planner_state
        bool            generate_advice_feedback;
        bool            generate_advice_string;
        pgpa_trove *trove;
-       List       *sj_unique_rels;
-
-#ifdef USE_ASSERT_CHECKING
-       pgpa_ri_check_hash *ri_check_hash;
-#endif
+       List       *proots;
+       pgpa_planner_info *last_proot;
 } pgpa_planner_state;
 
 typedef struct pgpa_join_state
@@ -211,6 +161,9 @@ static List *pgpa_planner_append_feedback(List *list, pgpa_trove *trove,
                                                                                  pgpa_plan_walker_context *walker);
 static void pgpa_planner_feedback_warning(List *feedback);
 
+static pgpa_planner_info *pgpa_planner_get_proot(pgpa_planner_state *pps,
+                                                                                                PlannerInfo *root);
+
 static inline void pgpa_ri_checker_save(pgpa_planner_state *pps,
                                                                                PlannerInfo *root,
                                                                                RelOptInfo *rel);
@@ -340,10 +293,6 @@ pgpa_planner_setup(PlannerGlobal *glob, Query *parse, const char *query_string,
                pps->generate_advice_feedback = generate_advice_feedback;
                pps->generate_advice_string = generate_advice_string;
                pps->trove = trove;
-#ifdef USE_ASSERT_CHECKING
-               pps->ri_check_hash =
-                       pgpa_ri_check_create(CurrentMemoryContext, 1024, NULL);
-#endif
                SetPlannerGlobalExtensionState(glob, planner_extension_id, pps);
        }
 
@@ -384,7 +333,7 @@ pgpa_planner_shutdown(PlannerGlobal *glob, Query *parse,
         */
        if (generate_advice_string || generate_advice_feedback)
        {
-               pgpa_plan_walker(&walker, pstmt, pps->sj_unique_rels);
+               pgpa_plan_walker(&walker, pstmt, pps->proots);
                rt_identifiers = pgpa_create_identifiers_for_planned_stmt(pstmt);
        }
 
@@ -592,8 +541,7 @@ pgpa_join_path_setup(PlannerInfo *root, RelOptInfo *joinrel,
 
        /*
         * If we're considering implementing a semijoin by making one side unique,
-        * make a note of it in the pgpa_planner_state. See comments for
-        * pgpa_sj_unique_rel for why we do this.
+        * make a note of it in the pgpa_planner_state.
         */
        if (jointype == JOIN_UNIQUE_OUTER || jointype == JOIN_UNIQUE_INNER)
        {
@@ -605,36 +553,24 @@ pgpa_join_path_setup(PlannerInfo *root, RelOptInfo *joinrel,
                if (pps != NULL &&
                        (pps->generate_advice_string || pps->generate_advice_feedback))
                {
-                       bool            found = false;
-
-                       /* Avoid adding duplicates. */
-                       foreach_ptr(pgpa_sj_unique_rel, ur, pps->sj_unique_rels)
-                       {
-                               /*
-                                * We should always use the same pointer for the same plan
-                                * name, so we need not use strcmp() here.
-                                */
-                               if (root->plan_name == ur->plan_name &&
-                                       bms_equal(uniquerel->relids, ur->relids))
-                               {
-                                       found = true;
-                                       break;
-                               }
-                       }
+                       pgpa_planner_info *proot;
+                       MemoryContext oldcontext;
 
-                       /* If not a duplicate, append to the list. */
-                       if (!found)
-                       {
-                               pgpa_sj_unique_rel *ur;
-                               MemoryContext oldcontext;
-
-                               oldcontext = MemoryContextSwitchTo(pps->mcxt);
-                               ur = palloc_object(pgpa_sj_unique_rel);
-                               ur->plan_name = root->plan_name;
-                               ur->relids = bms_copy(uniquerel->relids);
-                               pps->sj_unique_rels = lappend(pps->sj_unique_rels, ur);
-                               MemoryContextSwitchTo(oldcontext);
-                       }
+                       /*
+                        * Get or create a pgpa_planner_info object, and then add the
+                        * relids from the unique side to proot->sj_unique_rels.
+                        *
+                        * We must be careful here to use a sufficiently long-lived
+                        * context, since we might have been called by GEQO. We want all
+                        * the data we store here (including the proot, if we create it)
+                        * to last for as long as the pgpa_planner_state.
+                        */
+                       oldcontext = MemoryContextSwitchTo(pps->mcxt);
+                       proot = pgpa_planner_get_proot(pps, root);
+                       if (!list_member(proot->sj_unique_rels, uniquerel->relids))
+                               proot->sj_unique_rels = lappend(proot->sj_unique_rels,
+                                                                                               bms_copy(uniquerel->relids));
+                       MemoryContextSwitchTo(oldcontext);
                }
        }
 
@@ -2017,33 +1953,62 @@ pgpa_planner_feedback_warning(List *feedback)
                                errdetail("%s", detailbuf.data));
 }
 
-#ifdef USE_ASSERT_CHECKING
-
 /*
- * Fast hash function for a key consisting of an RTI and plan name.
+ * Get or create the pgpa_planner_info for the given PlannerInfo.
  */
-static uint32
-pgpa_ri_checker_hash_key(pgpa_ri_checker_key key)
+static pgpa_planner_info *
+pgpa_planner_get_proot(pgpa_planner_state *pps, PlannerInfo *root)
 {
-       fasthash_state hs;
-       int                     sp_len;
+       pgpa_planner_info *new_proot;
 
-       fasthash_init(&hs, 0);
+       /*
+        * If pps->last_proot isn't populated, there are no pgpa_planner_info
+        * objects yet, so we can drop through and create a new one. Otherwise,
+        * search for an object with a matching name, and drop through only if
+        * none is found.
+        */
+       if (pps->last_proot != NULL)
+       {
+               if (root->plan_name == NULL)
+               {
+                       if (pps->last_proot->plan_name == NULL)
+                               return pps->last_proot;
 
-       hs.accum = key.rti;
-       fasthash_combine(&hs);
+                       foreach_ptr(pgpa_planner_info, proot, pps->proots)
+                       {
+                               if (proot->plan_name == NULL)
+                               {
+                                       pps->last_proot = proot;
+                                       return proot;
+                               }
+                       }
+               }
+               else
+               {
+                       if (pps->last_proot->plan_name != NULL &&
+                               strcmp(pps->last_proot->plan_name, root->plan_name) == 0)
+                               return pps->last_proot;
 
-       /* plan_name can be NULL */
-       if (key.plan_name == NULL)
-               sp_len = 0;
-       else
-               sp_len = fasthash_accum_cstring(&hs, key.plan_name);
+                       foreach_ptr(pgpa_planner_info, proot, pps->proots)
+                       {
+                               if (proot->plan_name != NULL &&
+                                       strcmp(proot->plan_name, root->plan_name) == 0)
+                               {
+                                       pps->last_proot = proot;
+                                       return proot;
+                               }
+                       }
+               }
+       }
 
-       /* hashfn_unstable.h recommends using string length as tweak */
-       return fasthash_final32(&hs, sp_len);
-}
+       /* Create new object, add to list, and make it most recently used. */
+       new_proot = palloc0_object(pgpa_planner_info);
+       new_proot->plan_name = root->plan_name;
+       pps->proots = lappend(pps->proots, new_proot);
+       pps->last_proot = new_proot;
 
-#endif
+       return new_proot;
+}
 
 /*
  * Save the range table identifier for one relation for future cross-checking.
@@ -2053,19 +2018,31 @@ pgpa_ri_checker_save(pgpa_planner_state *pps, PlannerInfo *root,
                                         RelOptInfo *rel)
 {
 #ifdef USE_ASSERT_CHECKING
-       pgpa_ri_checker_key key;
-       pgpa_ri_checker *check;
-       pgpa_identifier rid;
-       const char *rid_string;
-       bool            found;
-
-       key.rti = bms_singleton_member(rel->relids);
-       key.plan_name = root->plan_name;
-       pgpa_compute_identifier_by_rti(root, key.rti, &rid);
-       rid_string = pgpa_identifier_string(&rid);
-       check = pgpa_ri_check_insert(pps->ri_check_hash, key, &found);
-       Assert(!found || strcmp(check->rid_string, rid_string) == 0);
-       check->rid_string = rid_string;
+       pgpa_planner_info *proot;
+       pgpa_identifier *rid;
+
+       /* Get the pgpa_planner_info for this PlannerInfo. */
+       proot = pgpa_planner_get_proot(pps, root);
+
+       /* Allocate or extend the proot's rid_array as necessary. */
+       if (proot->rid_array_size < rel->relid)
+       {
+               int                     new_size = pg_nextpower2_32(Max(rel->relid, 8));
+
+               if (proot->rid_array_size == 0)
+                       proot->rid_array = palloc0_array(pgpa_identifier, new_size);
+               else
+                       proot->rid_array = repalloc0_array(proot->rid_array,
+                                                                                          pgpa_identifier,
+                                                                                          proot->rid_array_size,
+                                                                                          new_size);
+               proot->rid_array_size = new_size;
+       }
+
+       /* Save relation identifier details for this RTI if not already done. */
+       rid = &proot->rid_array[rel->relid - 1];
+       if (rid->alias_name == NULL)
+               pgpa_compute_identifier_by_rti(root, rel->relid, rid);
 #endif
 }
 
@@ -2078,26 +2055,22 @@ pgpa_ri_checker_validate(pgpa_planner_state *pps, PlannedStmt *pstmt)
 {
 #ifdef USE_ASSERT_CHECKING
        pgpa_identifier *rt_identifiers;
-       pgpa_ri_check_iterator it;
-       pgpa_ri_checker *check;
+       Index           rtable_length = list_length(pstmt->rtable);
 
        /* Create identifiers from the planned statement. */
        rt_identifiers = pgpa_create_identifiers_for_planned_stmt(pstmt);
 
        /* Iterate over identifiers created during planning, so we can compare. */
-       pgpa_ri_check_start_iterate(pps->ri_check_hash, &it);
-       while ((check = pgpa_ri_check_iterate(pps->ri_check_hash, &it)) != NULL)
+       foreach_ptr(pgpa_planner_info, proot, pps->proots)
        {
                int                     rtoffset = 0;
-               const char *rid_string;
-               Index           flat_rti;
 
                /*
                 * If there's no plan name associated with this entry, then the
                 * rtoffset is 0. Otherwise, we can search the SubPlanRTInfo list to
                 * find the rtoffset.
                 */
-               if (check->key.plan_name != NULL)
+               if (proot->plan_name != NULL)
                {
                        foreach_node(SubPlanRTInfo, rtinfo, pstmt->subrtinfos)
                        {
@@ -2109,18 +2082,8 @@ pgpa_ri_checker_validate(pgpa_planner_state *pps, PlannedStmt *pstmt)
                                 * will be copied, as per add_rtes_to_flat_rtable. Therefore,
                                 * there's no fixed rtoffset that we can apply to the RTIs
                                 * used during planning to locate the corresponding relations
-                                * in the final rtable.
-                                *
-                                * With more complex logic, we could work around that problem
-                                * by remembering the whole contents of the subquery's rtable
-                                * during planning, determining which of those would have been
-                                * copied to the final rtable, and matching them up. But it
-                                * doesn't seem like a worthwhile endeavor for right now,
-                                * because RTIs from such subqueries won't appear in the plan
-                                * tree itself, just in the range table. Hence, we can neither
-                                * generate nor accept advice for them.
                                 */
-                               if (strcmp(check->key.plan_name, rtinfo->plan_name) == 0
+                               if (strcmp(proot->plan_name, rtinfo->plan_name) == 0
                                        && !rtinfo->dummy)
                                {
                                        rtoffset = rtinfo->rtoffset;
@@ -2139,17 +2102,24 @@ pgpa_ri_checker_validate(pgpa_planner_state *pps, PlannedStmt *pstmt)
                                continue;
                }
 
-               /*
-                * check->key.rti is the RTI that we saw prior to range-table
-                * flattening, so we must add the appropriate RT offset to get the
-                * final RTI.
-                */
-               flat_rti = check->key.rti + rtoffset;
-               Assert(flat_rti <= list_length(pstmt->rtable));
+               for (int rti = 1; rti <= proot->rid_array_size; ++rti)
+               {
+                       Index           flat_rti = rtoffset + rti;
+                       pgpa_identifier *rid1 = &proot->rid_array[rti - 1];
+                       pgpa_identifier *rid2;
+
+                       if (rid1->alias_name == NULL)
+                               continue;
 
-               /* Assert that the string we compute now matches the previous one. */
-               rid_string = pgpa_identifier_string(&rt_identifiers[flat_rti - 1]);
-               Assert(strcmp(rid_string, check->rid_string) == 0);
+                       Assert(flat_rti <= rtable_length);
+                       rid2 = &rt_identifiers[flat_rti - 1];
+                       Assert(strcmp(rid1->alias_name, rid2->alias_name) == 0);
+                       Assert(rid1->occurrence == rid2->occurrence);
+                       Assert(strings_equal_or_both_null(rid1->partnsp, rid2->partnsp));
+                       Assert(strings_equal_or_both_null(rid1->partrel, rid2->partrel));
+                       Assert(strings_equal_or_both_null(rid1->plan_name,
+                                                                                         rid2->plan_name));
+               }
        }
 #endif
 }
index c70e486a7f33d0231491c93f6110404dc21f7f40..e9045f69bca496eb8ba6b7254a21e1327ba272a7 100644 (file)
 #ifndef PGPA_PLANNER_H
 #define PGPA_PLANNER_H
 
+#include "pgpa_identifier.h"
+
 extern void pgpa_planner_install_hooks(void);
 
+/*
+ * Per-PlannerInfo information that we gather during query planning.
+ */
+typedef struct pgpa_planner_info
+{
+       /* Plan name taken from the corresponding PlannerInfo; NULL at top level. */
+       char       *plan_name;
+
+#ifdef USE_ASSERT_CHECKING
+       /* Relation identifiers computed for baserels at this query level. */
+       pgpa_identifier *rid_array;
+       int                     rid_array_size;
+#endif
+
+       /*
+        * List of Bitmapset objects. Each represents the relid set of a relation
+        * that the planner considers making unique during semijoin planning.
+        *
+        * When generating advice, we should emit either SEMIJOIN_UNIQUE advice or
+        * SEMIJOIN_NON_UNIQUE advice for each semijoin depending on whether we
+        * chose to implement it as a semijoin or whether we instead chose to make
+        * the nullable side unique and then perform an inner join. When the
+        * make-unique strategy is not chosen, it's not easy to tell from the
+        * final plan tree whether it was considered. That's awkward, because we
+        * don't want to emit useless SEMIJOIN_NON_UNIQUE advice when there was no
+        * decision to be made. This list lets the plan tree walker know in which
+        * cases that approach was considered, so that it doesn't have to guess.
+        */
+       List       *sj_unique_rels;
+} pgpa_planner_info;
+
+/*
+ * When set to a value greater than zero, indicates that advice should be
+ * generated during query planning even in the absence of obvious reasons to
+ * do so. See pg_plan_advice_request_advice_generation().
+ */
 extern int     pgpa_planner_generate_advice;
 
 #endif
index 7b86cc5e6f9ad4278900fd3ef70d96106c49adde..6fbc784bf54d2c798a94c1f86b969eb86c2f2dd0 100644 (file)
@@ -12,6 +12,7 @@
 #include "postgres.h"
 
 #include "pgpa_join.h"
+#include "pgpa_planner.h"
 #include "pgpa_scan.h"
 #include "pgpa_walker.h"
 
@@ -64,12 +65,12 @@ static bool pgpa_walker_contains_no_gather(pgpa_plan_walker_context *walker,
  *
  * Populates walker based on a traversal of the Plan trees in pstmt.
  *
- * sj_unique_rels is a list of pgpa_sj_unique_rel objects, one for each
- * relation we considered making unique as part of semijoin planning.
+ * proots is the list of pgpa_planner_info objects that were generated
+ * during planning.
  */
 void
 pgpa_plan_walker(pgpa_plan_walker_context *walker, PlannedStmt *pstmt,
-                                List *sj_unique_rels)
+                                List *proots)
 {
        ListCell   *lc;
        List       *sj_unique_rtis = NULL;
@@ -92,19 +93,21 @@ pgpa_plan_walker(pgpa_plan_walker_context *walker, PlannedStmt *pstmt,
        }
 
        /* Adjust RTIs from sj_unique_rels for the flattened range table. */
-       foreach_ptr(pgpa_sj_unique_rel, ur, sj_unique_rels)
+       foreach_ptr(pgpa_planner_info, proot, proots)
        {
-               int                     rtindex = -1;
                int                     rtoffset = 0;
                bool            dummy = false;
-               Bitmapset  *relids = NULL;
+
+               /* If there are no sj_unique_rels for this proot, we can skip it. */
+               if (proot->sj_unique_rels == NIL)
+                       continue;
 
                /* If this is a subplan, find the range table offset. */
-               if (ur->plan_name != NULL)
+               if (proot->plan_name != NULL)
                {
                        foreach_node(SubPlanRTInfo, rtinfo, pstmt->subrtinfos)
                        {
-                               if (strcmp(ur->plan_name, rtinfo->plan_name) == 0)
+                               if (strcmp(proot->plan_name, rtinfo->plan_name) == 0)
                                {
                                        rtoffset = rtinfo->rtoffset;
                                        dummy = rtinfo->dummy;
@@ -113,19 +116,24 @@ pgpa_plan_walker(pgpa_plan_walker_context *walker, PlannedStmt *pstmt,
                        }
 
                        if (rtoffset == 0)
-                               elog(ERROR, "no rtoffset for plan %s", ur->plan_name);
+                               elog(ERROR, "no rtoffset for plan %s", proot->plan_name);
                }
 
                /* If this entry pertains to a dummy subquery, ignore it. */
                if (dummy)
                        continue;
 
-               /* Offset each entry from the original set. */
-               while ((rtindex = bms_next_member(ur->relids, rtindex)) >= 0)
-                       relids = bms_add_member(relids, rtindex + rtoffset);
+               /* Offset each relid set by the rtoffset we just computed. */
+               foreach_node(Bitmapset, relids, proot->sj_unique_rels)
+               {
+                       int                     rtindex = -1;
+                       Bitmapset  *flat_relids = NULL;
 
-               /* Store the resulting set. */
-               sj_unique_rtis = lappend(sj_unique_rtis, relids);
+                       while ((rtindex = bms_next_member(relids, rtindex)) >= 0)
+                               flat_relids = bms_add_member(flat_relids, rtindex + rtoffset);
+
+                       sj_unique_rtis = lappend(sj_unique_rtis, flat_relids);
+               }
        }
 
        /*
index 4890d554dd3777bcc24f597b346567e92aa93780..9b74cd3ba55a13c627cbbcdcecb6805844777aa6 100644 (file)
 #include "pgpa_join.h"
 #include "pgpa_scan.h"
 
-/*
- * When generating advice, we should emit either SEMIJOIN_UNIQUE advice or
- * SEMIJOIN_NON_UNIQUE advice for each semijoin depending on whether we chose
- * to implement it as a semijoin or whether we instead chose to make the
- * nullable side unique and then perform an inner join. When the make-unique
- * strategy is not chosen, it's not easy to tell from the final plan tree
- * whether it was considered. That's awkward, because we don't want to emit
- * useless SEMIJOIN_NON_UNIQUE advice when there was no decision to be made.
- *
- * To avoid that, during planning, we create a pgpa_sj_unique_rel for each
- * relation that we considered making unique for purposes of semijoin planning.
- */
-typedef struct pgpa_sj_unique_rel
-{
-       char       *plan_name;
-       Bitmapset  *relids;
-} pgpa_sj_unique_rel;
-
 /*
  * We use the term "query feature" to refer to plan nodes that are interesting
  * in the following way: to generate advice, we'll need to know the set of
@@ -122,7 +104,7 @@ typedef struct pgpa_plan_walker_context
 
 extern void pgpa_plan_walker(pgpa_plan_walker_context *walker,
                                                         PlannedStmt *pstmt,
-                                                        List *sj_unique_rels);
+                                                        List *proots);
 
 extern void pgpa_add_future_feature(pgpa_plan_walker_context *walker,
                                                                        pgpa_qf_type type,
index dbbec84b2226c43df4cdcba65f7b1514c42265df..decc9f7a5721f92765540906e43df32a6dddb953 100644 (file)
@@ -4034,14 +4034,12 @@ pgpa_join_strategy
 pgpa_join_unroller
 pgpa_output_context
 pgpa_plan_walker_context
+pgpa_planner_info
 pgpa_planner_state
 pgpa_qf_type
 pgpa_query_feature
-pgpa_ri_checker
-pgpa_ri_checker_key
 pgpa_scan
 pgpa_scan_strategy
-pgpa_sj_unique_rel
 pgpa_target_type
 pgpa_trove
 pgpa_trove_entry