From cf74558feb8f41b2bc459f59ed3f991024d04893 Mon Sep 17 00:00:00 2001 From: Richard Guo Date: Thu, 12 Feb 2026 15:30:13 +0900 Subject: [PATCH] Reduce LEFT JOIN to ANTI JOIN using NOT NULL constraints For a LEFT JOIN, if any var from the right-hand side (RHS) is forced to null by upper-level quals but is known to be non-null for any matching row, the only way the upper quals can be satisfied is if the join fails to match, producing a null-extended row. Thus, we can treat this left join as an anti-join. Previously, this transformation was limited to cases where the join's own quals were strict for the var forced to null by upper qual levels. This patch extends the logic to check table constraints, leveraging the NOT NULL attribute information already available thanks to the infrastructure introduced by e2debb643. If a forced-null var belongs to the RHS and is defined as NOT NULL in the schema (and is not nullable due to lower-level outer joins), we know that the left join can be reduced to an anti-join. Note that to ensure the var is not nullable by any lower-level outer joins within the current subtree, we collect the relids of base rels that are nullable within each subtree during the first pass of the reduce-outer-joins process. This allows us to verify in the second pass that a NOT NULL var is indeed safe to treat as non-nullable. Based on a proposal by Nicolas Adenis-Lamarre, but this is not the original patch. Suggested-by: Nicolas Adenis-Lamarre Author: Tender Wang Co-authored-by: Richard Guo Discussion: https://postgr.es/m/CACPGbctKMDP50PpRH09in+oWbHtZdahWSroRstLPOoSDKwoFsw@mail.gmail.com --- src/backend/optimizer/prep/prepjointree.c | 204 ++++++++++++++++++---- src/test/regress/expected/join.out | 62 +++++++ src/test/regress/sql/join.sql | 27 +++ 3 files changed, 260 insertions(+), 33 deletions(-) diff --git a/src/backend/optimizer/prep/prepjointree.c b/src/backend/optimizer/prep/prepjointree.c index c80bfc88d82..c90f4b32733 100644 --- a/src/backend/optimizer/prep/prepjointree.c +++ b/src/backend/optimizer/prep/prepjointree.c @@ -88,6 +88,8 @@ typedef struct reduce_outer_joins_pass1_state { Relids relids; /* base relids within this subtree */ bool contains_outer; /* does subtree contain outer join(s)? */ + Relids nullable_rels; /* base relids that are nullable within this + * subtree */ List *sub_states; /* List of states for subtree components */ } reduce_outer_joins_pass1_state; @@ -161,6 +163,8 @@ static void reduce_outer_joins_pass2(Node *jtnode, List *forced_null_vars); static void report_reduced_full_join(reduce_outer_joins_pass2_state *state2, int rtindex, Relids relids); +static bool has_notnull_forced_var(PlannerInfo *root, List *forced_null_vars, + reduce_outer_joins_pass1_state *right_state); static Node *remove_useless_results_recurse(PlannerInfo *root, Node *jtnode, Node **parent_quals, Relids *dropped_outer_joins); @@ -3144,13 +3148,16 @@ flatten_simple_union_all(PlannerInfo *root) * to each side separately.) * * Another transformation we apply here is to recognize cases like - * SELECT ... FROM a LEFT JOIN b ON (a.x = b.y) WHERE b.y IS NULL; - * If the join clause is strict for b.y, then only null-extended rows could - * pass the upper WHERE, and we can conclude that what the query is really - * specifying is an anti-semijoin. We change the join type from JOIN_LEFT - * to JOIN_ANTI. The IS NULL clause then becomes redundant, and must be - * removed to prevent bogus selectivity calculations, but we leave it to - * distribute_qual_to_rels to get rid of such clauses. + * SELECT ... FROM a LEFT JOIN b ON (a.x = b.y) WHERE b.z IS NULL; + * If we can prove that b.z must be non-null for any matching row, either + * because the join clause is strict for b.z and b.z happens to be the join + * key b.y, or because b.z is defined NOT NULL by table constraints and is + * not nullable due to lower-level outer joins, then only null-extended rows + * could pass the upper WHERE, and we can conclude that what the query is + * really specifying is an anti-semijoin. We change the join type from + * JOIN_LEFT to JOIN_ANTI. The IS NULL clause then becomes redundant, and + * must be removed to prevent bogus selectivity calculations, but we leave + * it to distribute_qual_to_rels to get rid of such clauses. * * Also, we get rid of JOIN_RIGHT cases by flipping them around to become * JOIN_LEFT. This saves some code here and in some later planner routines; @@ -3174,8 +3181,9 @@ reduce_outer_joins(PlannerInfo *root) * to stop descending the jointree as soon as there are no outer joins * below our current point. This consideration forces a two-pass process. * The first pass gathers information about which base rels appear below - * each side of each join clause, and about whether there are outer - * join(s) below each side of each join clause. The second pass examines + * each side of each join clause, about whether there are outer join(s) + * below each side of each join clause, and about which base rels are from + * the nullable side of those outer join(s). The second pass examines * qual clauses and changes join types as it descends the tree. */ state1 = reduce_outer_joins_pass1((Node *) root->parse->jointree); @@ -3243,6 +3251,7 @@ reduce_outer_joins_pass1(Node *jtnode) result = palloc_object(reduce_outer_joins_pass1_state); result->relids = NULL; result->contains_outer = false; + result->nullable_rels = NULL; result->sub_states = NIL; if (jtnode == NULL) @@ -3266,29 +3275,62 @@ reduce_outer_joins_pass1(Node *jtnode) result->relids = bms_add_members(result->relids, sub_state->relids); result->contains_outer |= sub_state->contains_outer; + result->nullable_rels = bms_add_members(result->nullable_rels, + sub_state->nullable_rels); result->sub_states = lappend(result->sub_states, sub_state); } } else if (IsA(jtnode, JoinExpr)) { JoinExpr *j = (JoinExpr *) jtnode; - reduce_outer_joins_pass1_state *sub_state; + reduce_outer_joins_pass1_state *left_state; + reduce_outer_joins_pass1_state *right_state; + + /* Recurse to children */ + left_state = reduce_outer_joins_pass1(j->larg); + right_state = reduce_outer_joins_pass1(j->rarg); /* join's own RT index is not wanted in result->relids */ - if (IS_OUTER_JOIN(j->jointype)) - result->contains_outer = true; - - sub_state = reduce_outer_joins_pass1(j->larg); - result->relids = bms_add_members(result->relids, - sub_state->relids); - result->contains_outer |= sub_state->contains_outer; - result->sub_states = lappend(result->sub_states, sub_state); - - sub_state = reduce_outer_joins_pass1(j->rarg); - result->relids = bms_add_members(result->relids, - sub_state->relids); - result->contains_outer |= sub_state->contains_outer; - result->sub_states = lappend(result->sub_states, sub_state); + result->relids = bms_union(left_state->relids, right_state->relids); + + /* Store children's states for pass 2 */ + result->sub_states = list_make2(left_state, right_state); + + /* Collect outer join information */ + switch (j->jointype) + { + case JOIN_INNER: + case JOIN_SEMI: + /* No new nullability; propagate state from children */ + result->contains_outer = left_state->contains_outer || + right_state->contains_outer; + result->nullable_rels = bms_union(left_state->nullable_rels, + right_state->nullable_rels); + break; + case JOIN_LEFT: + case JOIN_ANTI: + /* RHS is nullable; LHS keeps existing status */ + result->contains_outer = true; + result->nullable_rels = bms_union(left_state->nullable_rels, + right_state->relids); + break; + case JOIN_RIGHT: + /* LHS is nullable; RHS keeps existing status */ + result->contains_outer = true; + result->nullable_rels = bms_union(left_state->relids, + right_state->nullable_rels); + break; + case JOIN_FULL: + /* Both sides are nullable */ + result->contains_outer = true; + result->nullable_rels = bms_union(left_state->relids, + right_state->relids); + break; + default: + elog(ERROR, "unrecognized join type: %d", + (int) j->jointype); + break; + } } else elog(ERROR, "unrecognized node type: %d", @@ -3440,15 +3482,16 @@ reduce_outer_joins_pass2(Node *jtnode, /* * See if we can reduce JOIN_LEFT to JOIN_ANTI. This is the case if - * the join's own quals are strict for any var that was forced null by - * higher qual levels. NOTE: there are other ways that we could - * detect an anti-join, in particular if we were to check whether Vars - * coming from the RHS must be non-null because of table constraints. - * That seems complicated and expensive though (in particular, one - * would have to be wary of lower outer joins). For the moment this - * seems sufficient. + * any var from the RHS was forced null by higher qual levels, but is + * known to be non-nullable. We detect this either by seeing if the + * join's own quals are strict for the var, or by checking if the var + * is defined NOT NULL by table constraints (being careful to exclude + * vars that are nullable due to lower-level outer joins). In either + * case, the only way the higher qual clause's requirement for NULL + * can be met is if the join fails to match, producing a null-extended + * row. Thus, we can treat this as an anti-join. */ - if (jointype == JOIN_LEFT) + if (jointype == JOIN_LEFT && forced_null_vars != NIL) { List *nonnullable_vars; Bitmapset *overlap; @@ -3460,9 +3503,13 @@ reduce_outer_joins_pass2(Node *jtnode, * It's not sufficient to check whether nonnullable_vars and * forced_null_vars overlap: we need to know if the overlap * includes any RHS variables. + * + * Also check if any forced-null var is defined NOT NULL by table + * constraints. */ overlap = mbms_overlap_sets(nonnullable_vars, forced_null_vars); - if (bms_overlap(overlap, right_state->relids)) + if (bms_overlap(overlap, right_state->relids) || + has_notnull_forced_var(root, forced_null_vars, right_state)) jointype = JOIN_ANTI; } @@ -3598,6 +3645,97 @@ report_reduced_full_join(reduce_outer_joins_pass2_state *state2, state2->partial_reduced = lappend(state2->partial_reduced, statep); } +/* + * has_notnull_forced_var + * Check if "forced_null_vars" contains any Vars belonging to the subtree + * indicated by "right_state" that are known to be non-nullable due to + * table constraints. + * + * Note that we must also consider the situation where a NOT NULL Var can be + * nulled by lower-level outer joins. + * + * Helper for reduce_outer_joins_pass2. + */ +static bool +has_notnull_forced_var(PlannerInfo *root, List *forced_null_vars, + reduce_outer_joins_pass1_state *right_state) +{ + int varno = -1; + + foreach_node(Bitmapset, attrs, forced_null_vars) + { + RangeTblEntry *rte; + Bitmapset *notnullattnums; + Bitmapset *forcednullattnums = NULL; + int attno; + + varno++; + + /* Skip empty bitmaps */ + if (bms_is_empty(attrs)) + continue; + + /* Skip Vars that do not belong to the target relations */ + if (!bms_is_member(varno, right_state->relids)) + continue; + + /* + * Skip Vars that can be nulled by lower-level outer joins within the + * given subtree. These Vars might be NULL even if the schema defines + * them as NOT NULL. + */ + if (bms_is_member(varno, right_state->nullable_rels)) + continue; + + /* + * Iterate over attributes and adjust the bitmap indexes by + * FirstLowInvalidHeapAttributeNumber to get the actual attribute + * numbers. + */ + attno = -1; + while ((attno = bms_next_member(attrs, attno)) >= 0) + { + AttrNumber real_attno = attno + FirstLowInvalidHeapAttributeNumber; + + /* system columns cannot be NULL */ + if (real_attno < 0) + return true; + + forcednullattnums = bms_add_member(forcednullattnums, real_attno); + } + + rte = rt_fetch(varno, root->parse->rtable); + + /* + * We must skip inheritance parent tables, as some child tables may + * have a NOT NULL constraint for a column while others may not. This + * cannot happen with partitioned tables, though. + */ + if (rte->inh && rte->relkind != RELKIND_PARTITIONED_TABLE) + { + bms_free(forcednullattnums); + continue; + } + + /* Get the column not-null constraint information for this relation */ + notnullattnums = find_relation_notnullatts(root, rte->relid); + + /* + * Check if any forced-null attributes are defined as NOT NULL by + * table constraints. + */ + if (bms_overlap(notnullattnums, forcednullattnums)) + { + bms_free(forcednullattnums); + return true; + } + + bms_free(forcednullattnums); + } + + return false; +} + /* * remove_useless_result_rtes diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out index d05a0ca0373..63d3c5d3ac8 100644 --- a/src/test/regress/expected/join.out +++ b/src/test/regress/expected/join.out @@ -3273,6 +3273,68 @@ where (hundred, thousand) in (select twothousand, twothousand from onek); reset enable_memoize; -- +-- more antijoin recognition tests using NOT NULL constraints +-- +begin; +create temp table tbl_anti(a int not null, b int, c int); +-- this is an antijoin, as t2.a is non-null for any matching row +explain (costs off) +select * from tenk1 t1 left join tbl_anti t2 on t1.unique1 = t2.b +where t2.a is null; + QUERY PLAN +---------------------------------- + Hash Right Anti Join + Hash Cond: (t2.b = t1.unique1) + -> Seq Scan on tbl_anti t2 + -> Hash + -> Seq Scan on tenk1 t1 +(5 rows) + +-- this is an antijoin, as t2.a is non-null for any matching row +explain (costs off) +select * from tenk1 t1 left join + (tbl_anti t2 left join tbl_anti t3 on t2.c = t3.c) on t1.unique1 = t2.b +where t2.a is null; + QUERY PLAN +------------------------------------------- + Hash Right Anti Join + Hash Cond: (t2.b = t1.unique1) + -> Merge Left Join + Merge Cond: (t2.c = t3.c) + -> Sort + Sort Key: t2.c + -> Seq Scan on tbl_anti t2 + -> Sort + Sort Key: t3.c + -> Seq Scan on tbl_anti t3 + -> Hash + -> Seq Scan on tenk1 t1 +(12 rows) + +-- this is not an antijoin, as t3.a can be nulled by t2/t3 join +explain (costs off) +select * from tenk1 t1 left join + (tbl_anti t2 left join tbl_anti t3 on t2.c = t3.c) on t1.unique1 = t2.b +where t3.a is null; + QUERY PLAN +------------------------------------------- + Hash Right Join + Hash Cond: (t2.b = t1.unique1) + Filter: (t3.a IS NULL) + -> Merge Left Join + Merge Cond: (t2.c = t3.c) + -> Sort + Sort Key: t2.c + -> Seq Scan on tbl_anti t2 + -> Sort + Sort Key: t3.c + -> Seq Scan on tbl_anti t3 + -> Hash + -> Seq Scan on tenk1 t1 +(13 rows) + +rollback; +-- -- regression test for bogus RTE_GROUP entries -- explain (costs off) diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql index b91fb7574df..14cbec28766 100644 --- a/src/test/regress/sql/join.sql +++ b/src/test/regress/sql/join.sql @@ -866,6 +866,33 @@ select 1 from tenk1 where (hundred, thousand) in (select twothousand, twothousand from onek); reset enable_memoize; +-- +-- more antijoin recognition tests using NOT NULL constraints +-- + +begin; + +create temp table tbl_anti(a int not null, b int, c int); + +-- this is an antijoin, as t2.a is non-null for any matching row +explain (costs off) +select * from tenk1 t1 left join tbl_anti t2 on t1.unique1 = t2.b +where t2.a is null; + +-- this is an antijoin, as t2.a is non-null for any matching row +explain (costs off) +select * from tenk1 t1 left join + (tbl_anti t2 left join tbl_anti t3 on t2.c = t3.c) on t1.unique1 = t2.b +where t2.a is null; + +-- this is not an antijoin, as t3.a can be nulled by t2/t3 join +explain (costs off) +select * from tenk1 t1 left join + (tbl_anti t2 left join tbl_anti t3 on t2.c = t3.c) on t1.unique1 = t2.b +where t3.a is null; + +rollback; + -- -- regression test for bogus RTE_GROUP entries -- -- 2.47.3