From: Richard Guo Date: Tue, 10 Feb 2026 01:17:45 +0000 (+0900) Subject: Optimize IS DISTINCT FROM with non-nullable inputs X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=0a379612540cc51e54dc1c0cc4b9ef8797d2533c;p=thirdparty%2Fpostgresql.git Optimize IS DISTINCT FROM with non-nullable inputs The IS DISTINCT FROM construct compares values acting as though NULL were a normal data value, rather than "unknown". Semantically, "x IS DISTINCT FROM y" yields true if the values differ or if exactly one is NULL, and false if they are equal or both NULL. Unlike ordinary comparison operators, it never returns NULL. Previously, the planner only simplified this construct if all inputs were constants, folding it to a constant boolean result. This patch extends the optimization to cases where inputs are non-constant but proven to be non-nullable. Specifically, "x IS DISTINCT FROM NULL" folds to constant TRUE if "x" is known to be non-nullable. For cases where both inputs are guaranteed not to be NULL, the expression becomes semantically equivalent to "x <> y", and the DistinctExpr is converted into an inequality OpExpr. This transformation provides several benefits. It converts the comparison into a standard operator, allowing the use of partial indexes and constraint exclusion. Furthermore, if the clause is negated (i.e., "IS NOT DISTINCT FROM"), it simplifies to an equality operator. This enables the planner to generate better plans using index scans, merge joins, hash joins, and EC-based qual deduction. Author: Richard Guo Reviewed-by: Tender Wang Discussion: https://postgr.es/m/CAMbWs49BMAOWvkdSHxpUDnniqJcEcGq3_8dd_5wTR4xrQY8urA@mail.gmail.com --- diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out index 6066510c7c0..7cad5e67d09 100644 --- a/contrib/postgres_fdw/expected/postgres_fdw.out +++ b/contrib/postgres_fdw/expected/postgres_fdw.out @@ -698,12 +698,12 @@ EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM ft1 t1 WHERE c1 = -c1; -- Op Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" WHERE (("C 1" = (- "C 1"))) (3 rows) -EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM ft1 t1 WHERE (c3 IS NOT NULL) IS DISTINCT FROM (c3 IS NOT NULL); -- DistinctExpr - QUERY PLAN --------------------------------------------------------------------------------------------------------------------------------------- +EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM ft1 t1 WHERE c3 IS DISTINCT FROM c3; -- DistinctExpr + QUERY PLAN +---------------------------------------------------------------------------------------------------------- Foreign Scan on public.ft1 t1 Output: c1, c2, c3, c4, c5, c6, c7, c8 - Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" WHERE (((c3 IS NOT NULL) IS DISTINCT FROM (c3 IS NOT NULL))) + Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" WHERE ((c3 IS DISTINCT FROM c3)) (3 rows) EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM ft1 t1 WHERE c1 = ANY(ARRAY[c2, 1, c1 + 0]); -- ScalarArrayOpExpr diff --git a/contrib/postgres_fdw/sql/postgres_fdw.sql b/contrib/postgres_fdw/sql/postgres_fdw.sql index 4f7ab2ed0ac..eff25bd2baa 100644 --- a/contrib/postgres_fdw/sql/postgres_fdw.sql +++ b/contrib/postgres_fdw/sql/postgres_fdw.sql @@ -340,7 +340,7 @@ EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM ft1 t1 WHERE c3 IS NULL; -- Nu EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM ft1 t1 WHERE c3 IS NOT NULL; -- NullTest EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM ft1 t1 WHERE round(abs(c1), 0) = 1; -- FuncExpr EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM ft1 t1 WHERE c1 = -c1; -- OpExpr(l) -EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM ft1 t1 WHERE (c3 IS NOT NULL) IS DISTINCT FROM (c3 IS NOT NULL); -- DistinctExpr +EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM ft1 t1 WHERE c3 IS DISTINCT FROM c3; -- DistinctExpr EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM ft1 t1 WHERE c1 = ANY(ARRAY[c2, 1, c1 + 0]); -- ScalarArrayOpExpr EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM ft1 t1 WHERE c1 = (ARRAY[c1,c2,3])[1]; -- SubscriptingRef EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM ft1 t1 WHERE c6 = E'foo''s\\bar'; -- check special chars diff --git a/src/backend/optimizer/util/clauses.c b/src/backend/optimizer/util/clauses.c index 32204776c45..71c79ae4aed 100644 --- a/src/backend/optimizer/util/clauses.c +++ b/src/backend/optimizer/util/clauses.c @@ -2705,6 +2705,7 @@ eval_const_expressions_mutator(Node *node, bool has_null_input = false; bool all_null_input = true; bool has_nonconst_input = false; + bool has_nullable_nonconst = false; Expr *simple; DistinctExpr *newexpr; @@ -2721,7 +2722,8 @@ eval_const_expressions_mutator(Node *node, /* * We must do our own check for NULLs because DistinctExpr has * different results for NULL input than the underlying - * operator does. + * operator does. We also check if any non-constant input is + * potentially nullable. */ foreach(arg, args) { @@ -2731,12 +2733,24 @@ eval_const_expressions_mutator(Node *node, all_null_input &= ((Const *) lfirst(arg))->constisnull; } else + { has_nonconst_input = true; + all_null_input = false; + + if (!has_nullable_nonconst && + !expr_is_nonnullable(context->root, + (Expr *) lfirst(arg), false)) + has_nullable_nonconst = true; + } } - /* all constants? then can optimize this out */ if (!has_nonconst_input) { + /* + * All inputs are constants. We can optimize this out + * completely. + */ + /* all nulls? then not distinct */ if (all_null_input) return makeBoolConst(false, false); @@ -2781,6 +2795,48 @@ eval_const_expressions_mutator(Node *node, return (Node *) csimple; } } + else if (!has_nullable_nonconst) + { + /* + * There are non-constant inputs, but since all of them + * are proven non-nullable, "IS DISTINCT FROM" semantics + * are much simpler. + */ + + OpExpr *eqexpr; + + /* + * If one input is an explicit NULL constant, and the + * other is a non-nullable expression, the result is + * always TRUE. + */ + if (has_null_input) + return makeBoolConst(true, false); + + /* + * Otherwise, both inputs are known non-nullable. In this + * case, "IS DISTINCT FROM" is equivalent to the standard + * inequality operator (usually "<>"). We convert this to + * an OpExpr, which is a more efficient representation for + * the planner. It can enable the use of partial indexes + * and constraint exclusion. Furthermore, if the clause + * is negated (ie, "IS NOT DISTINCT FROM"), the resulting + * "=" operator can allow the planner to use index scans, + * merge joins, hash joins, and EC-based qual deductions. + */ + eqexpr = makeNode(OpExpr); + eqexpr->opno = expr->opno; + eqexpr->opfuncid = expr->opfuncid; + eqexpr->opresulttype = BOOLOID; + eqexpr->opretset = expr->opretset; + eqexpr->opcollid = expr->opcollid; + eqexpr->inputcollid = expr->inputcollid; + eqexpr->args = args; + eqexpr->location = expr->location; + + return eval_const_expressions_mutator(negate_clause((Node *) eqexpr), + context); + } /* * The expression cannot be simplified any further, so build diff --git a/src/test/regress/expected/predicate.out b/src/test/regress/expected/predicate.out index 8ff1172008e..a85234aebf6 100644 --- a/src/test/regress/expected/predicate.out +++ b/src/test/regress/expected/predicate.out @@ -632,3 +632,139 @@ SELECT * FROM pred_tab WHERE (a::oid) IS NULL; (3 rows) DROP TABLE pred_tab; +-- +-- Test optimization of IS [NOT] DISTINCT FROM on non-nullable inputs +-- +CREATE TYPE dist_row_t AS (a int, b int); +CREATE TABLE dist_tab (id int, val_nn int NOT NULL, val_null int, row_nn dist_row_t NOT NULL); +INSERT INTO dist_tab VALUES (1, 10, 10, ROW(1, 1)); +INSERT INTO dist_tab VALUES (2, 20, NULL, ROW(2, 2)); +INSERT INTO dist_tab VALUES (3, 30, 30, ROW(1, NULL)); +CREATE INDEX dist_tab_nn_idx ON dist_tab (val_nn); +ANALYZE dist_tab; +-- Ensure that the predicate folds to constant TRUE +EXPLAIN(COSTS OFF) +SELECT id FROM dist_tab WHERE val_nn IS DISTINCT FROM NULL::INT; + QUERY PLAN +---------------------- + Seq Scan on dist_tab +(1 row) + +SELECT id FROM dist_tab WHERE val_nn IS DISTINCT FROM NULL::INT; + id +---- + 1 + 2 + 3 +(3 rows) + +-- Ensure that the predicate folds to constant FALSE +EXPLAIN(COSTS OFF) +SELECT id FROM dist_tab WHERE val_nn IS NOT DISTINCT FROM NULL::INT; + QUERY PLAN +------------------------------ + Result + Replaces: Scan on dist_tab + One-Time Filter: false +(3 rows) + +SELECT id FROM dist_tab WHERE val_nn IS NOT DISTINCT FROM NULL::INT; + id +---- +(0 rows) + +-- Ensure that the predicate is converted to an inequality operator +EXPLAIN (COSTS OFF) +SELECT id FROM dist_tab WHERE val_nn IS DISTINCT FROM 10; + QUERY PLAN +-------------------------- + Seq Scan on dist_tab + Filter: (val_nn <> 10) +(2 rows) + +SELECT id FROM dist_tab WHERE val_nn IS DISTINCT FROM 10; + id +---- + 2 + 3 +(2 rows) + +-- Ensure that the predicate is converted to an equality operator, and thus can +-- use index scan +SET enable_seqscan TO off; +EXPLAIN (COSTS OFF) +SELECT id FROM dist_tab WHERE val_nn IS NOT DISTINCT FROM 10; + QUERY PLAN +---------------------------------------------- + Index Scan using dist_tab_nn_idx on dist_tab + Index Cond: (val_nn = 10) +(2 rows) + +SELECT id FROM dist_tab WHERE val_nn IS NOT DISTINCT FROM 10; + id +---- + 1 +(1 row) + +RESET enable_seqscan; +-- Ensure that the predicate is preserved as "IS DISTINCT FROM" +EXPLAIN (COSTS OFF) +SELECT id FROM dist_tab WHERE val_null IS DISTINCT FROM 20; + QUERY PLAN +------------------------------------------ + Seq Scan on dist_tab + Filter: (val_null IS DISTINCT FROM 20) +(2 rows) + +SELECT id FROM dist_tab WHERE val_null IS DISTINCT FROM 20; + id +---- + 1 + 2 + 3 +(3 rows) + +-- Safety check for rowtypes +-- Ensure that the predicate is converted to an inequality operator +EXPLAIN (COSTS OFF) +SELECT id FROM dist_tab WHERE row_nn IS DISTINCT FROM ROW(1, 5)::dist_row_t; + QUERY PLAN +------------------------------------------- + Seq Scan on dist_tab + Filter: (row_nn <> '(1,5)'::dist_row_t) +(2 rows) + +-- ... and that all 3 rows are returned +SELECT id FROM dist_tab WHERE row_nn IS DISTINCT FROM ROW(1, 5)::dist_row_t; + id +---- + 1 + 2 + 3 +(3 rows) + +-- Ensure that the predicate is converted to an equality operator, and thus +-- mergejoinable or hashjoinable +SET enable_nestloop TO off; +EXPLAIN (COSTS OFF) +SELECT * FROM dist_tab t1 JOIN dist_tab t2 ON t1.val_nn IS NOT DISTINCT FROM t2.val_nn; + QUERY PLAN +-------------------------------------- + Hash Join + Hash Cond: (t1.val_nn = t2.val_nn) + -> Seq Scan on dist_tab t1 + -> Hash + -> Seq Scan on dist_tab t2 +(5 rows) + +SELECT * FROM dist_tab t1 JOIN dist_tab t2 ON t1.val_nn IS NOT DISTINCT FROM t2.val_nn; + id | val_nn | val_null | row_nn | id | val_nn | val_null | row_nn +----+--------+----------+--------+----+--------+----------+-------- + 1 | 10 | 10 | (1,1) | 1 | 10 | 10 | (1,1) + 2 | 20 | | (2,2) | 2 | 20 | | (2,2) + 3 | 30 | 30 | (1,) | 3 | 30 | 30 | (1,) +(3 rows) + +RESET enable_nestloop; +DROP TABLE dist_tab; +DROP TYPE dist_row_t; diff --git a/src/test/regress/sql/predicate.sql b/src/test/regress/sql/predicate.sql index db72b11bb22..3f61184c577 100644 --- a/src/test/regress/sql/predicate.sql +++ b/src/test/regress/sql/predicate.sql @@ -308,3 +308,64 @@ EXPLAIN (COSTS OFF) SELECT * FROM pred_tab WHERE (a::oid) IS NULL; DROP TABLE pred_tab; + +-- +-- Test optimization of IS [NOT] DISTINCT FROM on non-nullable inputs +-- + +CREATE TYPE dist_row_t AS (a int, b int); +CREATE TABLE dist_tab (id int, val_nn int NOT NULL, val_null int, row_nn dist_row_t NOT NULL); + +INSERT INTO dist_tab VALUES (1, 10, 10, ROW(1, 1)); +INSERT INTO dist_tab VALUES (2, 20, NULL, ROW(2, 2)); +INSERT INTO dist_tab VALUES (3, 30, 30, ROW(1, NULL)); + +CREATE INDEX dist_tab_nn_idx ON dist_tab (val_nn); + +ANALYZE dist_tab; + +-- Ensure that the predicate folds to constant TRUE +EXPLAIN(COSTS OFF) +SELECT id FROM dist_tab WHERE val_nn IS DISTINCT FROM NULL::INT; +SELECT id FROM dist_tab WHERE val_nn IS DISTINCT FROM NULL::INT; + +-- Ensure that the predicate folds to constant FALSE +EXPLAIN(COSTS OFF) +SELECT id FROM dist_tab WHERE val_nn IS NOT DISTINCT FROM NULL::INT; +SELECT id FROM dist_tab WHERE val_nn IS NOT DISTINCT FROM NULL::INT; + +-- Ensure that the predicate is converted to an inequality operator +EXPLAIN (COSTS OFF) +SELECT id FROM dist_tab WHERE val_nn IS DISTINCT FROM 10; +SELECT id FROM dist_tab WHERE val_nn IS DISTINCT FROM 10; + +-- Ensure that the predicate is converted to an equality operator, and thus can +-- use index scan +SET enable_seqscan TO off; +EXPLAIN (COSTS OFF) +SELECT id FROM dist_tab WHERE val_nn IS NOT DISTINCT FROM 10; +SELECT id FROM dist_tab WHERE val_nn IS NOT DISTINCT FROM 10; +RESET enable_seqscan; + +-- Ensure that the predicate is preserved as "IS DISTINCT FROM" +EXPLAIN (COSTS OFF) +SELECT id FROM dist_tab WHERE val_null IS DISTINCT FROM 20; +SELECT id FROM dist_tab WHERE val_null IS DISTINCT FROM 20; + +-- Safety check for rowtypes +-- Ensure that the predicate is converted to an inequality operator +EXPLAIN (COSTS OFF) +SELECT id FROM dist_tab WHERE row_nn IS DISTINCT FROM ROW(1, 5)::dist_row_t; +-- ... and that all 3 rows are returned +SELECT id FROM dist_tab WHERE row_nn IS DISTINCT FROM ROW(1, 5)::dist_row_t; + +-- Ensure that the predicate is converted to an equality operator, and thus +-- mergejoinable or hashjoinable +SET enable_nestloop TO off; +EXPLAIN (COSTS OFF) +SELECT * FROM dist_tab t1 JOIN dist_tab t2 ON t1.val_nn IS NOT DISTINCT FROM t2.val_nn; +SELECT * FROM dist_tab t1 JOIN dist_tab t2 ON t1.val_nn IS NOT DISTINCT FROM t2.val_nn; +RESET enable_nestloop; + +DROP TABLE dist_tab; +DROP TYPE dist_row_t;