From: David Rowley Date: Thu, 19 Mar 2026 04:16:36 +0000 (+1300) Subject: Short-circuit row estimation in NOT IN containing NULL consts X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=c95cd2991f1e3ece689adfe662082f200126d255;p=thirdparty%2Fpostgresql.git Short-circuit row estimation in NOT IN containing NULL consts ScalarArrayOpExpr used for either NOT IN or <>/= ALL, when the array contains a NULL constant, will never evaluate to true. Here we add an explicit short-circuit in scalararraysel() to account for this and return 0.0 rows when we see that a NULL exists. When the array is a constant, we can very quickly see if there are any NULL values and return early before going to much effort in scalararraysel(). For non-const arrays, we short-circuit after finding the first NULL and forego selectivity estimations of any remaining elements. In the future, it might be better to do something for this case in constant folding. We would need to be careful to only do this for strict operators on expressions located in places that don't care about distinguishing false from NULL returns. i.e. EXPRKIND_QUAL expressions. Doing that requires a bit more thought and effort, so here we just fix some needlessly slow selectivity estimations for ScalarArrayOpExpr containing many array elements and at least one NULL. Author: Ilia Evdokimov Reviewed-by: David Geier Reviewed-by: Zsolt Parragi Reviewed-by: David Rowley Discussion: https://postgr.es/m/eaa2598c-5356-4e1e-9ec3-5fd6eb1cd704@tantorlabs.com --- diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index d4da0e8dea9..86b55c9bb8b 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -2018,6 +2018,15 @@ scalararraysel(PlannerInfo *root, if (arrayisnull) /* qual can't succeed if null array */ return (Selectivity) 0.0; arrayval = DatumGetArrayTypeP(arraydatum); + + /* + * When the array contains a NULL constant, same as var_eq_const, we + * assume the operator is strict and nothing will match, thus return + * 0.0. + */ + if (!useOr && array_contains_nulls(arrayval)) + return (Selectivity) 0.0; + get_typlenbyvalalign(ARR_ELEMTYPE(arrayval), &elmlen, &elmbyval, &elmalign); deconstruct_array(arrayval, @@ -2115,6 +2124,14 @@ scalararraysel(PlannerInfo *root, List *args; Selectivity s2; + /* + * When the array contains a NULL constant, same as var_eq_const, + * we assume the operator is strict and nothing will match, thus + * return 0.0. + */ + if (!useOr && IsA(elem, Const) && ((Const *) elem)->constisnull) + return (Selectivity) 0.0; + /* * Theoretically, if elem isn't of nominal_element_type we should * insert a RelabelType, but it seems unlikely that any operator diff --git a/src/test/regress/expected/planner_est.out b/src/test/regress/expected/planner_est.out index 3a47061800a..b62a47552fa 100644 --- a/src/test/regress/expected/planner_est.out +++ b/src/test/regress/expected/planner_est.out @@ -183,4 +183,31 @@ false, true, false, true); Function Scan on generate_series g (cost=N..N rows=1000 width=N) (1 row) +-- +-- Test ScalarArrayOpExpr row estimates for <> ALL for arrays with NULLs. We +-- expect the planner to estimate 1 row will match in both of the following +-- tests. +-- +-- Try a const array containing a NULL +SELECT explain_mask_costs($$ +SELECT * FROM tenk1 WHERE unique1 <> ALL (ARRAY[1, 2, 99, NULL]);$$, +false, true, false, true); + explain_mask_costs +--------------------------------------------------------- + Seq Scan on tenk1 (cost=N..N rows=1 width=N) + Filter: (unique1 <> ALL ('{1,2,99,NULL}'::integer[])) +(2 rows) + +-- Try a non-const array containing a NULL +SELECT explain_mask_costs($$ +SELECT * FROM tenk1 WHERE unique1 <> ALL (ARRAY[1, 2, 98, (SELECT 99), NULL]);$$, +false, true, false, true); + explain_mask_costs +------------------------------------------------------------------------------------- + Seq Scan on tenk1 (cost=N..N rows=1 width=N) + Filter: (unique1 <> ALL (ARRAY[1, 2, 98, (InitPlan expr_1).col1, NULL::integer])) + InitPlan expr_1 + -> Result (cost=N..N rows=1 width=N) +(4 rows) + DROP FUNCTION explain_mask_costs(text, bool, bool, bool, bool); diff --git a/src/test/regress/sql/planner_est.sql b/src/test/regress/sql/planner_est.sql index 47d5ae679c7..53210d5baad 100644 --- a/src/test/regress/sql/planner_est.sql +++ b/src/test/regress/sql/planner_est.sql @@ -131,5 +131,20 @@ SELECT explain_mask_costs($$ SELECT * FROM generate_series(25.0, 2.0, 0.0) g(s);$$, false, true, false, true); +-- +-- Test ScalarArrayOpExpr row estimates for <> ALL for arrays with NULLs. We +-- expect the planner to estimate 1 row will match in both of the following +-- tests. +-- + +-- Try a const array containing a NULL +SELECT explain_mask_costs($$ +SELECT * FROM tenk1 WHERE unique1 <> ALL (ARRAY[1, 2, 99, NULL]);$$, +false, true, false, true); + +-- Try a non-const array containing a NULL +SELECT explain_mask_costs($$ +SELECT * FROM tenk1 WHERE unique1 <> ALL (ARRAY[1, 2, 98, (SELECT 99), NULL]);$$, +false, true, false, true); DROP FUNCTION explain_mask_costs(text, bool, bool, bool, bool);