From: Tom Lane Date: Sat, 20 Sep 2025 16:44:52 +0000 (-0400) Subject: Re-allow using statistics for bool-valued functions in WHERE. X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=1eccb93150707acfcc8f24556a15742a6313c8ac;p=thirdparty%2Fpostgresql.git Re-allow using statistics for bool-valued functions in WHERE. Commit a391ff3c3, which added the ability for a function's support function to provide a custom selectivity estimate for "WHERE f(...)", unintentionally removed the possibility of applying expression statistics after finding there's no applicable support function. That happened because we no longer fell through to boolvarsel() as before. Refactor to do so again, putting the 0.3333333 default back into boolvarsel() where it had been (cf. commit 39df0f150). I surely wouldn't have made this error if 39df0f150 had included a test case, so add one now. At the time we did not have the "extended statistics" infrastructure, but we do now, and it is also unable to work in this scenario because of this error. So make use of that for the test case. This is very clearly a bug fix, but I'm afraid to put it into released branches because of the likelihood of altering plan choices, which we avoid doing in minor releases. So, master only. Reported-by: Frédéric Yhuel Author: Tom Lane Discussion: https://postgr.es/m/a8b99dce-1bfb-4d97-af73-54a32b85c916@dalibo.com --- diff --git a/src/backend/optimizer/path/clausesel.c b/src/backend/optimizer/path/clausesel.c index 5d51f97f219..d0f516b7645 100644 --- a/src/backend/optimizer/path/clausesel.c +++ b/src/backend/optimizer/path/clausesel.c @@ -874,6 +874,10 @@ clause_selectivity_ext(PlannerInfo *root, varRelid, jointype, sjinfo); + + /* If no support, fall back on boolvarsel */ + if (s1 < 0) + s1 = boolvarsel(root, clause, varRelid); } else if (IsA(clause, ScalarArrayOpExpr)) { diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c index f8641204a67..da5d901ec3c 100644 --- a/src/backend/optimizer/util/plancat.c +++ b/src/backend/optimizer/util/plancat.c @@ -2143,9 +2143,8 @@ join_selectivity(PlannerInfo *root, /* * function_selectivity * - * Returns the selectivity of a specified boolean function clause. - * This code executes registered procedures stored in the - * pg_proc relation, by calling the function manager. + * Attempt to estimate the selectivity of a specified boolean function clause + * by asking its support function. If the function lacks support, return -1. * * See clause_selectivity() for the meaning of the additional parameters. */ @@ -2163,15 +2162,8 @@ function_selectivity(PlannerInfo *root, SupportRequestSelectivity req; SupportRequestSelectivity *sresult; - /* - * If no support function is provided, use our historical default - * estimate, 0.3333333. This seems a pretty unprincipled choice, but - * Postgres has been using that estimate for function calls since 1992. - * The hoariness of this behavior suggests that we should not be in too - * much hurry to use another value. - */ if (!prosupport) - return (Selectivity) 0.3333333; + return (Selectivity) -1; /* no support function */ req.type = T_SupportRequestSelectivity; req.root = root; @@ -2188,9 +2180,8 @@ function_selectivity(PlannerInfo *root, DatumGetPointer(OidFunctionCall1(prosupport, PointerGetDatum(&req))); - /* If support function fails, use default */ if (sresult != &req) - return (Selectivity) 0.3333333; + return (Selectivity) -1; /* function did not honor request */ if (req.selectivity < 0.0 || req.selectivity > 1.0) elog(ERROR, "invalid function selectivity: %f", req.selectivity); diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index 1c480cfaaf7..e5e066a5537 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -1528,6 +1528,17 @@ boolvarsel(PlannerInfo *root, Node *arg, int varRelid) selec = var_eq_const(&vardata, BooleanEqualOperator, InvalidOid, BoolGetDatum(true), false, true, false); } + else if (is_funcclause(arg)) + { + /* + * If we have no stats and it's a function call, estimate 0.3333333. + * This seems a pretty unprincipled choice, but Postgres has been + * using that estimate for function calls since 1992. The hoariness + * of this behavior suggests that we should not be in too much hurry + * to use another value. + */ + selec = 0.3333333; + } else { /* Otherwise, the default estimate is 0.5 */ diff --git a/src/test/regress/expected/stats_ext.out b/src/test/regress/expected/stats_ext.out index fdc0aa130bd..9f378a6abdf 100644 --- a/src/test/regress/expected/stats_ext.out +++ b/src/test/regress/expected/stats_ext.out @@ -3537,4 +3537,24 @@ SELECT FROM sb_1 LEFT JOIN sb_2 RESET enable_nestloop; RESET enable_mergejoin; +-- Check that we can use statistics on a bool-valued function. +CREATE FUNCTION extstat_small(x numeric) RETURNS bool +STRICT IMMUTABLE LANGUAGE plpgsql +AS $$ BEGIN RETURN x < 1; END $$; +SELECT * FROM check_estimated_rows('SELECT * FROM sb_2 WHERE extstat_small(y)'); + estimated | actual +-----------+-------- + 3333 | 196 +(1 row) + +CREATE STATISTICS extstat_sb_2_small ON extstat_small(y) FROM sb_2; +ANALYZE sb_2; +SELECT * FROM check_estimated_rows('SELECT * FROM sb_2 WHERE extstat_small(y)'); + estimated | actual +-----------+-------- + 196 | 196 +(1 row) + +-- Tidy up DROP TABLE sb_1, sb_2 CASCADE; +DROP FUNCTION extstat_small(x numeric); diff --git a/src/test/regress/sql/stats_ext.sql b/src/test/regress/sql/stats_ext.sql index 823c7db9dab..d7e5c0c893a 100644 --- a/src/test/regress/sql/stats_ext.sql +++ b/src/test/regress/sql/stats_ext.sql @@ -1811,4 +1811,18 @@ SELECT FROM sb_1 LEFT JOIN sb_2 RESET enable_nestloop; RESET enable_mergejoin; +-- Check that we can use statistics on a bool-valued function. +CREATE FUNCTION extstat_small(x numeric) RETURNS bool +STRICT IMMUTABLE LANGUAGE plpgsql +AS $$ BEGIN RETURN x < 1; END $$; + +SELECT * FROM check_estimated_rows('SELECT * FROM sb_2 WHERE extstat_small(y)'); + +CREATE STATISTICS extstat_sb_2_small ON extstat_small(y) FROM sb_2; +ANALYZE sb_2; + +SELECT * FROM check_estimated_rows('SELECT * FROM sb_2 WHERE extstat_small(y)'); + +-- Tidy up DROP TABLE sb_1, sb_2 CASCADE; +DROP FUNCTION extstat_small(x numeric);