From: Richard Guo Date: Wed, 3 Jun 2026 01:12:40 +0000 (+0900) Subject: Fix eager aggregation for semi/antijoin inner rels X-Git-Url: http://git.ipfire.org/gitweb/index.cgi?a=commitdiff_plain;h=ffeda04259bb0b78e901c61e4b9d0ad86e786f4f;p=thirdparty%2Fpostgresql.git Fix eager aggregation for semi/antijoin inner rels Eager aggregation pushes a partial aggregate down to a base or join relation, to be finalized after that relation is joined with the rest of the query. eager_aggregation_possible_for_relation() already refuses to do this for a relation on the nullable side of an outer join, but it failed to also refuse it for a relation on the inner side of a semijoin or antijoin. Such a join does not emit its inner rows, so a partial aggregate computed on the inner side does not survive the join and cannot be combined by the final aggregation. This can happen only for an aggregate that references no table column, such as count(*): it is considered computable on any relation, including the inner one, whereas an aggregate that references a column is anchored to the outer side and never reaches the inner relation. The existing outer-join check did not catch this because it consults nulling_relids, which only tracks joins that null-extend their inner side. Semijoins and antijoins formed from EXISTS, IN, NOT EXISTS, or NOT IN sublinks do not null-extend and carry no ojrelid, so they are invisible to that check. Fix by additionally rejecting any relation that includes inner-side relations of a semijoin or antijoin but not the join's outer side. Pushing a partial aggregate to the outer side of such a join, grouped by the join key, remains valid and is still allowed. Reported-by: Radim Marek Author: Richard Guo Reviewed-by: Tender Wang Discussion: https://postgr.es/m/CAJgoLk+d_P5sKrx-SZt01Acm_j0QnWn6aKJzFJ=waRu_3C8AoQ@mail.gmail.com --- diff --git a/src/backend/optimizer/README b/src/backend/optimizer/README index 6c35baceedb..78a307cc523 100644 --- a/src/backend/optimizer/README +++ b/src/backend/optimizer/README @@ -1588,6 +1588,17 @@ aggregation. Pushing partial aggregation in this case may result in the rows being grouped differently than expected, or produce incorrect values from the aggregate functions. +Semi joins and anti joins impose a similar restriction. Such a join +does not preserve its inner rows in the join output, so a partial +aggregate computed on the inner side would not survive the join and +could not be combined by the final aggregation. We therefore do not +push partial aggregation down to the inner side of a semi/anti join. +(An anti join reduced from an outer join null-extends its inner side, +so that inner relation is already excluded by the outer-join condition +above; the case specifically addressed here is a semi/anti join that +does not null-extend its inner side, such as one formed from an +EXISTS, IN, NOT EXISTS, or NOT IN sublink.) + During the construction of the join tree, we evaluate each base or join relation to determine if eager aggregation can be applied. If feasible, we create a separate RelOptInfo called a "grouped relation" diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c index 3fc2c2f71d0..687e923c46c 100644 --- a/src/backend/optimizer/util/relnode.c +++ b/src/backend/optimizer/util/relnode.c @@ -2845,6 +2845,32 @@ eager_aggregation_possible_for_relation(PlannerInfo *root, RelOptInfo *rel) return false; } + /* + * Similarly, we cannot push a partial aggregation down to a relation on + * the inner (RHS) side of a semi/anti join. A semi/anti join does not + * preserve its inner rows in the join output, so a partial aggregate + * computed on the inner side would not survive the join and could not be + * combined by the final aggregation. + * + * Note that an anti join reduced from an outer join null-extends its + * inner side, so that inner relation already carries nulling_relids and + * is handled by the outer-join check above. The case this check adds is + * a semi/anti join that does not null-extend its inner side, such as one + * formed from an EXISTS, IN, NOT EXISTS, or NOT IN sublink. + */ + foreach(lc, root->join_info_list) + { + SpecialJoinInfo *sjinfo = lfirst_node(SpecialJoinInfo, lc); + + if (sjinfo->jointype != JOIN_SEMI && sjinfo->jointype != JOIN_ANTI) + continue; + + /* rel includes inner-side rels of this join but not its outer side */ + if (bms_overlap(rel->relids, sjinfo->min_righthand) && + !bms_is_subset(sjinfo->min_lefthand, rel->relids)) + return false; + } + /* * For now we don't try to support PlaceHolderVars. */ diff --git a/src/test/regress/expected/eager_aggregate.out b/src/test/regress/expected/eager_aggregate.out index 456d32eb13d..091ae48a92b 100644 --- a/src/test/regress/expected/eager_aggregate.out +++ b/src/test/regress/expected/eager_aggregate.out @@ -466,6 +466,96 @@ GROUP BY t1.a ORDER BY t1.a; -> Seq Scan on eager_agg_t1 t1 (9 rows) +-- Eager aggregation must not push a partial aggregate onto the inner side of a +-- SEMI or ANTI join +EXPLAIN (VERBOSE, COSTS OFF) +SELECT t2.b, count(*) + FROM eager_agg_t2 t2 + WHERE NOT EXISTS (SELECT 1 FROM eager_agg_t3 t3 WHERE t3.a = t2.a) +GROUP BY t2.b ORDER BY t2.b; + QUERY PLAN +------------------------------------------------------------ + Sort + Output: t2.b, (count(*)) + Sort Key: t2.b + -> HashAggregate + Output: t2.b, count(*) + Group Key: t2.b + -> Hash Anti Join + Output: t2.b + Hash Cond: (t2.a = t3.a) + -> Seq Scan on public.eager_agg_t2 t2 + Output: t2.a, t2.b, t2.c + -> Hash + Output: t3.a + -> Seq Scan on public.eager_agg_t3 t3 + Output: t3.a +(15 rows) + +SELECT t2.b, count(*) + FROM eager_agg_t2 t2 + WHERE NOT EXISTS (SELECT 1 FROM eager_agg_t3 t3 WHERE t3.a = t2.a) +GROUP BY t2.b ORDER BY t2.b; + b | count +---+------- + 0 | 100 + 1 | 99 + 2 | 99 + 3 | 99 + 4 | 99 + 5 | 99 + 6 | 99 + 7 | 99 + 8 | 99 + 9 | 99 +(10 rows) + +-- Eager aggregation may still push a partial aggregate onto the outer side of +-- a SEMI or ANTI join +EXPLAIN (VERBOSE, COSTS OFF) +SELECT t2.b, count(*) + FROM eager_agg_t2 t2 + WHERE EXISTS (SELECT 1 FROM eager_agg_t1 t1 WHERE t1.b = t2.b) +GROUP BY t2.b ORDER BY t2.b; + QUERY PLAN +------------------------------------------------------------------ + Finalize GroupAggregate + Output: t2.b, count(*) + Group Key: t2.b + -> Sort + Output: t2.b, (PARTIAL count(*)) + Sort Key: t2.b + -> Hash Right Semi Join + Output: t2.b, (PARTIAL count(*)) + Hash Cond: (t1.b = t2.b) + -> Seq Scan on public.eager_agg_t1 t1 + Output: t1.a, t1.b, t1.c + -> Hash + Output: t2.b, (PARTIAL count(*)) + -> Partial HashAggregate + Output: t2.b, PARTIAL count(*) + Group Key: t2.b + -> Seq Scan on public.eager_agg_t2 t2 + Output: t2.a, t2.b, t2.c +(18 rows) + +SELECT t2.b, count(*) + FROM eager_agg_t2 t2 + WHERE EXISTS (SELECT 1 FROM eager_agg_t1 t1 WHERE t1.b = t2.b) +GROUP BY t2.b ORDER BY t2.b; + b | count +---+------- + 1 | 100 + 2 | 100 + 3 | 100 + 4 | 100 + 5 | 100 + 6 | 100 + 7 | 100 + 8 | 100 + 9 | 100 +(9 rows) + DROP TABLE eager_agg_t1; DROP TABLE eager_agg_t2; DROP TABLE eager_agg_t3; diff --git a/src/test/regress/sql/eager_aggregate.sql b/src/test/regress/sql/eager_aggregate.sql index 53d9b377a64..7bca9c524da 100644 --- a/src/test/regress/sql/eager_aggregate.sql +++ b/src/test/regress/sql/eager_aggregate.sql @@ -177,6 +177,32 @@ SELECT t1.a, avg(t2.c) FILTER (WHERE random() > 0.5) JOIN eager_agg_t2 t2 ON t1.b = t2.b GROUP BY t1.a ORDER BY t1.a; +-- Eager aggregation must not push a partial aggregate onto the inner side of a +-- SEMI or ANTI join +EXPLAIN (VERBOSE, COSTS OFF) +SELECT t2.b, count(*) + FROM eager_agg_t2 t2 + WHERE NOT EXISTS (SELECT 1 FROM eager_agg_t3 t3 WHERE t3.a = t2.a) +GROUP BY t2.b ORDER BY t2.b; + +SELECT t2.b, count(*) + FROM eager_agg_t2 t2 + WHERE NOT EXISTS (SELECT 1 FROM eager_agg_t3 t3 WHERE t3.a = t2.a) +GROUP BY t2.b ORDER BY t2.b; + +-- Eager aggregation may still push a partial aggregate onto the outer side of +-- a SEMI or ANTI join +EXPLAIN (VERBOSE, COSTS OFF) +SELECT t2.b, count(*) + FROM eager_agg_t2 t2 + WHERE EXISTS (SELECT 1 FROM eager_agg_t1 t1 WHERE t1.b = t2.b) +GROUP BY t2.b ORDER BY t2.b; + +SELECT t2.b, count(*) + FROM eager_agg_t2 t2 + WHERE EXISTS (SELECT 1 FROM eager_agg_t1 t1 WHERE t1.b = t2.b) +GROUP BY t2.b ORDER BY t2.b; + DROP TABLE eager_agg_t1; DROP TABLE eager_agg_t2; DROP TABLE eager_agg_t3;