From f41ab51573a4c3d11f906b32a068182a1fd8596d Mon Sep 17 00:00:00 2001 From: Richard Guo Date: Tue, 10 Feb 2026 10:19:25 +0900 Subject: [PATCH] Teach planner to transform "x IS [NOT] DISTINCT FROM NULL" to a NullTest In the spirit of 8d19d0e13, this patch teaches the planner about the principle that NullTest with !argisrow is fully equivalent to SQL's IS [NOT] DISTINCT FROM NULL. The parser already performs this transformation for literal NULLs. However, a DistinctExpr expression with one input evaluating to NULL during planning (e.g., via const-folding of "1 + NULL" or parameter substitution in custom plans) currently remains as a DistinctExpr node. This patch closes the gap for const-folded NULLs. It specifically targets the case where one input is a constant NULL and the other is a nullable non-constant expression. (If the other input were otherwise, the DistinctExpr node would have already been simplified to a constant TRUE or FALSE.) This transformation can be beneficial because NullTest is much more amenable to optimization than DistinctExpr, since the planner knows a good deal about the former and next to nothing about the latter. Author: Richard Guo Reviewed-by: Tender Wang Discussion: https://postgr.es/m/CAMbWs49BMAOWvkdSHxpUDnniqJcEcGq3_8dd_5wTR4xrQY8urA@mail.gmail.com --- src/backend/optimizer/util/clauses.c | 24 +++++++++ src/test/regress/expected/predicate.out | 69 ++++++++++++++++++++++++- src/test/regress/sql/predicate.sql | 27 +++++++++- 3 files changed, 118 insertions(+), 2 deletions(-) diff --git a/src/backend/optimizer/util/clauses.c b/src/backend/optimizer/util/clauses.c index f73d6a68495..504a30d8836 100644 --- a/src/backend/optimizer/util/clauses.c +++ b/src/backend/optimizer/util/clauses.c @@ -2837,6 +2837,30 @@ eval_const_expressions_mutator(Node *node, return eval_const_expressions_mutator(negate_clause((Node *) eqexpr), context); } + else if (has_null_input) + { + /* + * One input is a nullable non-constant expression, and + * the other is an explicit NULL constant. We can + * transform this to a NullTest with !argisrow, which is + * much more amenable to optimization. + */ + + NullTest *nt = makeNode(NullTest); + + nt->arg = (Expr *) (IsA(linitial(args), Const) ? + lsecond(args) : linitial(args)); + nt->nulltesttype = IS_NOT_NULL; + + /* + * argisrow = false is correct whether or not arg is + * composite + */ + nt->argisrow = false; + nt->location = expr->location; + + return eval_const_expressions_mutator((Node *) nt, context); + } /* * The expression cannot be simplified any further, so build diff --git a/src/test/regress/expected/predicate.out b/src/test/regress/expected/predicate.out index 75590edf1bc..feae77cb840 100644 --- a/src/test/regress/expected/predicate.out +++ b/src/test/regress/expected/predicate.out @@ -633,7 +633,7 @@ SELECT * FROM pred_tab WHERE (a::oid) IS NULL; DROP TABLE pred_tab; -- --- Test optimization of IS [NOT] DISTINCT FROM on non-nullable inputs +-- Test optimization of IS [NOT] DISTINCT FROM -- CREATE TYPE dist_row_t AS (a int, b int); CREATE TABLE dist_tab (id int, val_nn int NOT NULL, val_null int, row_nn dist_row_t NOT NULL); @@ -766,6 +766,73 @@ SELECT * FROM dist_tab t1 JOIN dist_tab t2 ON t1.val_nn IS NOT DISTINCT FROM t2. (3 rows) RESET enable_nestloop; +-- Ensure that the predicate is converted to IS NOT NULL +EXPLAIN (COSTS OFF) +SELECT id FROM dist_tab WHERE val_null IS DISTINCT FROM NULL::INT; + QUERY PLAN +---------------------------------- + Seq Scan on dist_tab + Filter: (val_null IS NOT NULL) +(2 rows) + +SELECT id FROM dist_tab WHERE val_null IS DISTINCT FROM NULL::INT; + id +---- + 1 + 3 +(2 rows) + +-- Ensure that the predicate is converted to IS NULL +EXPLAIN (COSTS OFF) +SELECT id FROM dist_tab WHERE val_null IS NOT DISTINCT FROM NULL::INT; + QUERY PLAN +------------------------------ + Seq Scan on dist_tab + Filter: (val_null IS NULL) +(2 rows) + +SELECT id FROM dist_tab WHERE val_null IS NOT DISTINCT FROM NULL::INT; + id +---- + 2 +(1 row) + +-- Safety check for rowtypes +-- The predicate is converted to IS NOT NULL, and get_rule_expr prints it as IS +-- DISTINCT FROM because argisrow is false, indicating that we're applying a +-- scalar test +EXPLAIN (COSTS OFF) +SELECT id FROM dist_tab WHERE (val_null, val_null) IS DISTINCT FROM NULL::RECORD; + QUERY PLAN +----------------------------------------------------------- + Seq Scan on dist_tab + Filter: (ROW(val_null, val_null) IS DISTINCT FROM NULL) +(2 rows) + +SELECT id FROM dist_tab WHERE (val_null, val_null) IS DISTINCT FROM NULL::RECORD; + id +---- + 1 + 2 + 3 +(3 rows) + +-- The predicate is converted to IS NULL, and get_rule_expr prints it as IS NOT +-- DISTINCT FROM because argisrow is false, indicating that we're applying a +-- scalar test +EXPLAIN (COSTS OFF) +SELECT id FROM dist_tab WHERE (val_null, val_null) IS NOT DISTINCT FROM NULL::RECORD; + QUERY PLAN +--------------------------------------------------------------- + Seq Scan on dist_tab + Filter: (ROW(val_null, val_null) IS NOT DISTINCT FROM NULL) +(2 rows) + +SELECT id FROM dist_tab WHERE (val_null, val_null) IS NOT DISTINCT FROM NULL::RECORD; + id +---- +(0 rows) + DROP TABLE dist_tab; DROP TYPE dist_row_t; -- diff --git a/src/test/regress/sql/predicate.sql b/src/test/regress/sql/predicate.sql index b2811f7f3f0..0f92bb52435 100644 --- a/src/test/regress/sql/predicate.sql +++ b/src/test/regress/sql/predicate.sql @@ -310,7 +310,7 @@ SELECT * FROM pred_tab WHERE (a::oid) IS NULL; DROP TABLE pred_tab; -- --- Test optimization of IS [NOT] DISTINCT FROM on non-nullable inputs +-- Test optimization of IS [NOT] DISTINCT FROM -- CREATE TYPE dist_row_t AS (a int, b int); @@ -367,6 +367,31 @@ SELECT * FROM dist_tab t1 JOIN dist_tab t2 ON t1.val_nn IS NOT DISTINCT FROM t2. SELECT * FROM dist_tab t1 JOIN dist_tab t2 ON t1.val_nn IS NOT DISTINCT FROM t2.val_nn; RESET enable_nestloop; +-- Ensure that the predicate is converted to IS NOT NULL +EXPLAIN (COSTS OFF) +SELECT id FROM dist_tab WHERE val_null IS DISTINCT FROM NULL::INT; +SELECT id FROM dist_tab WHERE val_null IS DISTINCT FROM NULL::INT; + +-- Ensure that the predicate is converted to IS NULL +EXPLAIN (COSTS OFF) +SELECT id FROM dist_tab WHERE val_null IS NOT DISTINCT FROM NULL::INT; +SELECT id FROM dist_tab WHERE val_null IS NOT DISTINCT FROM NULL::INT; + +-- Safety check for rowtypes +-- The predicate is converted to IS NOT NULL, and get_rule_expr prints it as IS +-- DISTINCT FROM because argisrow is false, indicating that we're applying a +-- scalar test +EXPLAIN (COSTS OFF) +SELECT id FROM dist_tab WHERE (val_null, val_null) IS DISTINCT FROM NULL::RECORD; +SELECT id FROM dist_tab WHERE (val_null, val_null) IS DISTINCT FROM NULL::RECORD; + +-- The predicate is converted to IS NULL, and get_rule_expr prints it as IS NOT +-- DISTINCT FROM because argisrow is false, indicating that we're applying a +-- scalar test +EXPLAIN (COSTS OFF) +SELECT id FROM dist_tab WHERE (val_null, val_null) IS NOT DISTINCT FROM NULL::RECORD; +SELECT id FROM dist_tab WHERE (val_null, val_null) IS NOT DISTINCT FROM NULL::RECORD; + DROP TABLE dist_tab; DROP TYPE dist_row_t; -- 2.47.3