From: David Rowley Date: Wed, 18 Mar 2026 04:22:05 +0000 (+1300) Subject: Move planner row-estimation tests to new planner_est.sql X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=374a6394c6ae2e6f8b70ca7897e1499ff72ae7e5;p=thirdparty%2Fpostgresql.git Move planner row-estimation tests to new planner_est.sql Move explain_mask_costs() and the associated planner row-estimation tests from misc_functions.sql to a new regression test file, planner_est.sql. Previously, there wasn't an ideal home for such tests, likely as there were very few such tests due to width and selectivity estimations being too dependent on statistics and hardware. That's not always the case, as we have SupportRequestRows support functions. More such tests are possibly on the way, so let's create a better home so that we don't have to create the explain_mask_costs() function in each file we might have added such tests to. Author: Ilia Evdokimov Reviewed-by: David Rowley Discussion: https://postgr.es/m/CAApHDvphShGABn-3AoE36dTvGHW7gUpFSw0_ZZnH84wGCW3hHw@mail.gmail.com --- diff --git a/src/test/regress/expected/misc_functions.out b/src/test/regress/expected/misc_functions.out index 6c03b1a79d7..cf55cdf3688 100644 --- a/src/test/regress/expected/misc_functions.out +++ b/src/test/regress/expected/misc_functions.out @@ -2,46 +2,6 @@ \getenv libdir PG_LIBDIR \getenv dlsuffix PG_DLSUFFIX \set regresslib :libdir '/regress' :dlsuffix --- Function to assist with verifying EXPLAIN which includes costs. A series --- of bool flags allows control over which portions are masked out -CREATE FUNCTION explain_mask_costs(query text, do_analyze bool, - hide_costs bool, hide_row_est bool, hide_width bool) RETURNS setof text -LANGUAGE plpgsql AS -$$ -DECLARE - ln text; - analyze_str text; -BEGIN - IF do_analyze = true THEN - analyze_str := 'on'; - ELSE - analyze_str := 'off'; - END IF; - - -- avoid jit related output by disabling it - SET LOCAL jit = 0; - - FOR ln IN - EXECUTE format('explain (analyze %s, costs on, summary off, timing off, buffers off) %s', - analyze_str, query) - LOOP - IF hide_costs = true THEN - ln := regexp_replace(ln, 'cost=\d+\.\d\d\.\.\d+\.\d\d', 'cost=N..N'); - END IF; - - IF hide_row_est = true THEN - -- don't use 'g' so that we leave the actual rows intact - ln := regexp_replace(ln, 'rows=\d+', 'rows=N'); - END IF; - - IF hide_width = true THEN - ln := regexp_replace(ln, 'width=\d+', 'width=N'); - END IF; - - RETURN NEXT ln; - END LOOP; -END; -$$; -- -- num_nulls() -- @@ -671,143 +631,6 @@ SELECT * FROM tenk1 a JOIN my_gen_series(1,10) g ON a.unique1 = g; Index Cond: (unique1 = g.g) (4 rows) --- --- Test the SupportRequestRows support function for generate_series_timestamp() --- --- Ensure the row estimate matches the actual rows -SELECT explain_mask_costs($$ -SELECT * FROM generate_series(TIMESTAMPTZ '2024-02-01', TIMESTAMPTZ '2024-03-01', INTERVAL '1 day') g(s);$$, -true, true, false, true); - explain_mask_costs ---------------------------------------------------------------------------------------------- - Function Scan on generate_series g (cost=N..N rows=30 width=N) (actual rows=30.00 loops=1) -(1 row) - --- As above but with generate_series_timestamp -SELECT explain_mask_costs($$ -SELECT * FROM generate_series(TIMESTAMP '2024-02-01', TIMESTAMP '2024-03-01', INTERVAL '1 day') g(s);$$, -true, true, false, true); - explain_mask_costs ---------------------------------------------------------------------------------------------- - Function Scan on generate_series g (cost=N..N rows=30 width=N) (actual rows=30.00 loops=1) -(1 row) - --- As above but with generate_series_timestamptz_at_zone() -SELECT explain_mask_costs($$ -SELECT * FROM generate_series(TIMESTAMPTZ '2024-02-01', TIMESTAMPTZ '2024-03-01', INTERVAL '1 day', 'UTC') g(s);$$, -true, true, false, true); - explain_mask_costs ---------------------------------------------------------------------------------------------- - Function Scan on generate_series g (cost=N..N rows=30 width=N) (actual rows=30.00 loops=1) -(1 row) - --- Ensure the estimated and actual row counts match when the range isn't --- evenly divisible by the step -SELECT explain_mask_costs($$ -SELECT * FROM generate_series(TIMESTAMPTZ '2024-02-01', TIMESTAMPTZ '2024-03-01', INTERVAL '7 day') g(s);$$, -true, true, false, true); - explain_mask_costs -------------------------------------------------------------------------------------------- - Function Scan on generate_series g (cost=N..N rows=5 width=N) (actual rows=5.00 loops=1) -(1 row) - --- Ensure the estimates match when step is decreasing -SELECT explain_mask_costs($$ -SELECT * FROM generate_series(TIMESTAMPTZ '2024-03-01', TIMESTAMPTZ '2024-02-01', INTERVAL '-1 day') g(s);$$, -true, true, false, true); - explain_mask_costs ---------------------------------------------------------------------------------------------- - Function Scan on generate_series g (cost=N..N rows=30 width=N) (actual rows=30.00 loops=1) -(1 row) - --- Ensure an empty range estimates 1 row -SELECT explain_mask_costs($$ -SELECT * FROM generate_series(TIMESTAMPTZ '2024-03-01', TIMESTAMPTZ '2024-02-01', INTERVAL '1 day') g(s);$$, -true, true, false, true); - explain_mask_costs -------------------------------------------------------------------------------------------- - Function Scan on generate_series g (cost=N..N rows=1 width=N) (actual rows=0.00 loops=1) -(1 row) - --- Ensure we get the default row estimate for infinity values -SELECT explain_mask_costs($$ -SELECT * FROM generate_series(TIMESTAMPTZ '-infinity', TIMESTAMPTZ 'infinity', INTERVAL '1 day') g(s);$$, -false, true, false, true); - explain_mask_costs -------------------------------------------------------------------- - Function Scan on generate_series g (cost=N..N rows=1000 width=N) -(1 row) - --- Ensure the row estimate behaves correctly when step size is zero. --- We expect generate_series_timestamp() to throw the error rather than in --- the support function. -SELECT * FROM generate_series(TIMESTAMPTZ '2024-02-01', TIMESTAMPTZ '2024-03-01', INTERVAL '0 day') g(s); -ERROR: step size cannot equal zero --- --- Test the SupportRequestRows support function for generate_series_numeric() --- --- Ensure the row estimate matches the actual rows -SELECT explain_mask_costs($$ -SELECT * FROM generate_series(1.0, 25.0) g(s);$$, -true, true, false, true); - explain_mask_costs ---------------------------------------------------------------------------------------------- - Function Scan on generate_series g (cost=N..N rows=25 width=N) (actual rows=25.00 loops=1) -(1 row) - --- As above but with non-default step -SELECT explain_mask_costs($$ -SELECT * FROM generate_series(1.0, 25.0, 2.0) g(s);$$, -true, true, false, true); - explain_mask_costs ---------------------------------------------------------------------------------------------- - Function Scan on generate_series g (cost=N..N rows=13 width=N) (actual rows=13.00 loops=1) -(1 row) - --- Ensure the estimates match when step is decreasing -SELECT explain_mask_costs($$ -SELECT * FROM generate_series(25.0, 1.0, -1.0) g(s);$$, -true, true, false, true); - explain_mask_costs ---------------------------------------------------------------------------------------------- - Function Scan on generate_series g (cost=N..N rows=25 width=N) (actual rows=25.00 loops=1) -(1 row) - --- Ensure an empty range estimates 1 row -SELECT explain_mask_costs($$ -SELECT * FROM generate_series(25.0, 1.0, 1.0) g(s);$$, -true, true, false, true); - explain_mask_costs -------------------------------------------------------------------------------------------- - Function Scan on generate_series g (cost=N..N rows=1 width=N) (actual rows=0.00 loops=1) -(1 row) - --- Ensure we get the default row estimate for error cases (infinity/NaN values --- and zero step size) -SELECT explain_mask_costs($$ -SELECT * FROM generate_series('-infinity'::NUMERIC, 'infinity'::NUMERIC, 1.0) g(s);$$, -false, true, false, true); - explain_mask_costs -------------------------------------------------------------------- - Function Scan on generate_series g (cost=N..N rows=1000 width=N) -(1 row) - -SELECT explain_mask_costs($$ -SELECT * FROM generate_series(1.0, 25.0, 'NaN'::NUMERIC) g(s);$$, -false, true, false, true); - explain_mask_costs -------------------------------------------------------------------- - Function Scan on generate_series g (cost=N..N rows=1000 width=N) -(1 row) - -SELECT explain_mask_costs($$ -SELECT * FROM generate_series(25.0, 2.0, 0.0) g(s);$$, -false, true, false, true); - explain_mask_costs -------------------------------------------------------------------- - Function Scan on generate_series g (cost=N..N rows=1000 width=N) -(1 row) - -- -- Test SupportRequestInlineInFrom request -- @@ -970,7 +793,6 @@ SELECT pg_column_toast_chunk_id(a) IS NULL, (1 row) DROP TABLE test_chunk_id; -DROP FUNCTION explain_mask_costs(text, bool, bool, bool, bool); -- test stratnum translation support functions SELECT gist_translate_cmptype_common(7); gist_translate_cmptype_common diff --git a/src/test/regress/expected/planner_est.out b/src/test/regress/expected/planner_est.out new file mode 100644 index 00000000000..3a47061800a --- /dev/null +++ b/src/test/regress/expected/planner_est.out @@ -0,0 +1,186 @@ +-- +-- Tests for testing query planner selectivity and width estimates +-- +-- Most selectivity and width estimations rely too heavily on statistics +-- gathered by ANALYZE, or could vary depending on hardware. However, there +-- are a few cases where we can have more certainty about the expected number +-- of rows, or width of rows. This is a good home for such tests. +-- +-- Function to assist with verifying EXPLAIN which includes costs. A series +-- of bool flags allows control over which portions are masked out +CREATE FUNCTION explain_mask_costs(query text, do_analyze bool, + hide_costs bool, hide_row_est bool, hide_width bool) RETURNS setof text +LANGUAGE plpgsql AS +$$ +DECLARE + ln text; + analyze_str text; +BEGIN + IF do_analyze = true THEN + analyze_str := 'on'; + ELSE + analyze_str := 'off'; + END IF; + + -- avoid jit related output by disabling it + SET LOCAL jit = 0; + + FOR ln IN + EXECUTE format('explain (analyze %s, costs on, summary off, timing off, buffers off) %s', + analyze_str, query) + LOOP + IF hide_costs = true THEN + ln := regexp_replace(ln, 'cost=\d+\.\d\d\.\.\d+\.\d\d', 'cost=N..N'); + END IF; + + IF hide_row_est = true THEN + -- don't use 'g' so that we leave the actual rows intact + ln := regexp_replace(ln, 'rows=\d+', 'rows=N'); + END IF; + + IF hide_width = true THEN + ln := regexp_replace(ln, 'width=\d+', 'width=N'); + END IF; + + RETURN NEXT ln; + END LOOP; +END; +$$; +-- +-- Test the SupportRequestRows support function for generate_series_timestamp() +-- +-- Ensure the row estimate matches the actual rows +SELECT explain_mask_costs($$ +SELECT * FROM generate_series(TIMESTAMPTZ '2024-02-01', TIMESTAMPTZ '2024-03-01', INTERVAL '1 day') g(s);$$, +true, true, false, true); + explain_mask_costs +--------------------------------------------------------------------------------------------- + Function Scan on generate_series g (cost=N..N rows=30 width=N) (actual rows=30.00 loops=1) +(1 row) + +-- As above but with generate_series_timestamp +SELECT explain_mask_costs($$ +SELECT * FROM generate_series(TIMESTAMP '2024-02-01', TIMESTAMP '2024-03-01', INTERVAL '1 day') g(s);$$, +true, true, false, true); + explain_mask_costs +--------------------------------------------------------------------------------------------- + Function Scan on generate_series g (cost=N..N rows=30 width=N) (actual rows=30.00 loops=1) +(1 row) + +-- As above but with generate_series_timestamptz_at_zone() +SELECT explain_mask_costs($$ +SELECT * FROM generate_series(TIMESTAMPTZ '2024-02-01', TIMESTAMPTZ '2024-03-01', INTERVAL '1 day', 'UTC') g(s);$$, +true, true, false, true); + explain_mask_costs +--------------------------------------------------------------------------------------------- + Function Scan on generate_series g (cost=N..N rows=30 width=N) (actual rows=30.00 loops=1) +(1 row) + +-- Ensure the estimated and actual row counts match when the range isn't +-- evenly divisible by the step +SELECT explain_mask_costs($$ +SELECT * FROM generate_series(TIMESTAMPTZ '2024-02-01', TIMESTAMPTZ '2024-03-01', INTERVAL '7 day') g(s);$$, +true, true, false, true); + explain_mask_costs +------------------------------------------------------------------------------------------- + Function Scan on generate_series g (cost=N..N rows=5 width=N) (actual rows=5.00 loops=1) +(1 row) + +-- Ensure the estimates match when step is decreasing +SELECT explain_mask_costs($$ +SELECT * FROM generate_series(TIMESTAMPTZ '2024-03-01', TIMESTAMPTZ '2024-02-01', INTERVAL '-1 day') g(s);$$, +true, true, false, true); + explain_mask_costs +--------------------------------------------------------------------------------------------- + Function Scan on generate_series g (cost=N..N rows=30 width=N) (actual rows=30.00 loops=1) +(1 row) + +-- Ensure an empty range estimates 1 row +SELECT explain_mask_costs($$ +SELECT * FROM generate_series(TIMESTAMPTZ '2024-03-01', TIMESTAMPTZ '2024-02-01', INTERVAL '1 day') g(s);$$, +true, true, false, true); + explain_mask_costs +------------------------------------------------------------------------------------------- + Function Scan on generate_series g (cost=N..N rows=1 width=N) (actual rows=0.00 loops=1) +(1 row) + +-- Ensure we get the default row estimate for infinity values +SELECT explain_mask_costs($$ +SELECT * FROM generate_series(TIMESTAMPTZ '-infinity', TIMESTAMPTZ 'infinity', INTERVAL '1 day') g(s);$$, +false, true, false, true); + explain_mask_costs +------------------------------------------------------------------- + Function Scan on generate_series g (cost=N..N rows=1000 width=N) +(1 row) + +-- Ensure the row estimate behaves correctly when step size is zero. +-- We expect generate_series_timestamp() to throw the error rather than in +-- the support function. +SELECT * FROM generate_series(TIMESTAMPTZ '2024-02-01', TIMESTAMPTZ '2024-03-01', INTERVAL '0 day') g(s); +ERROR: step size cannot equal zero +-- +-- Test the SupportRequestRows support function for generate_series_numeric() +-- +-- Ensure the row estimate matches the actual rows +SELECT explain_mask_costs($$ +SELECT * FROM generate_series(1.0, 25.0) g(s);$$, +true, true, false, true); + explain_mask_costs +--------------------------------------------------------------------------------------------- + Function Scan on generate_series g (cost=N..N rows=25 width=N) (actual rows=25.00 loops=1) +(1 row) + +-- As above but with non-default step +SELECT explain_mask_costs($$ +SELECT * FROM generate_series(1.0, 25.0, 2.0) g(s);$$, +true, true, false, true); + explain_mask_costs +--------------------------------------------------------------------------------------------- + Function Scan on generate_series g (cost=N..N rows=13 width=N) (actual rows=13.00 loops=1) +(1 row) + +-- Ensure the estimates match when step is decreasing +SELECT explain_mask_costs($$ +SELECT * FROM generate_series(25.0, 1.0, -1.0) g(s);$$, +true, true, false, true); + explain_mask_costs +--------------------------------------------------------------------------------------------- + Function Scan on generate_series g (cost=N..N rows=25 width=N) (actual rows=25.00 loops=1) +(1 row) + +-- Ensure an empty range estimates 1 row +SELECT explain_mask_costs($$ +SELECT * FROM generate_series(25.0, 1.0, 1.0) g(s);$$, +true, true, false, true); + explain_mask_costs +------------------------------------------------------------------------------------------- + Function Scan on generate_series g (cost=N..N rows=1 width=N) (actual rows=0.00 loops=1) +(1 row) + +-- Ensure we get the default row estimate for error cases (infinity/NaN values +-- and zero step size) +SELECT explain_mask_costs($$ +SELECT * FROM generate_series('-infinity'::NUMERIC, 'infinity'::NUMERIC, 1.0) g(s);$$, +false, true, false, true); + explain_mask_costs +------------------------------------------------------------------- + Function Scan on generate_series g (cost=N..N rows=1000 width=N) +(1 row) + +SELECT explain_mask_costs($$ +SELECT * FROM generate_series(1.0, 25.0, 'NaN'::NUMERIC) g(s);$$, +false, true, false, true); + explain_mask_costs +------------------------------------------------------------------- + Function Scan on generate_series g (cost=N..N rows=1000 width=N) +(1 row) + +SELECT explain_mask_costs($$ +SELECT * FROM generate_series(25.0, 2.0, 0.0) g(s);$$, +false, true, false, true); + explain_mask_costs +------------------------------------------------------------------- + Function Scan on generate_series g (cost=N..N rows=1000 width=N) +(1 row) + +DROP FUNCTION explain_mask_costs(text, bool, bool, bool, bool); diff --git a/src/test/regress/parallel_schedule b/src/test/regress/parallel_schedule index e779ada70cb..734da057c34 100644 --- a/src/test/regress/parallel_schedule +++ b/src/test/regress/parallel_schedule @@ -123,7 +123,7 @@ test: plancache limit plpgsql copy2 temp domain rangefuncs prepare conversion tr # The stats test resets stats, so nothing else needing stats access can be in # this group. # ---------- -test: partition_merge partition_split partition_join partition_prune reloptions hash_part indexing partition_aggregate partition_info tuplesort explain compression compression_lz4 memoize stats predicate numa eager_aggregate graph_table_rls +test: partition_merge partition_split partition_join partition_prune reloptions hash_part indexing partition_aggregate partition_info tuplesort explain compression compression_lz4 memoize stats predicate numa eager_aggregate graph_table_rls planner_est # event_trigger depends on create_am and cannot run concurrently with # any test that runs DDL diff --git a/src/test/regress/sql/misc_functions.sql b/src/test/regress/sql/misc_functions.sql index 35b7983996c..c8226652f2c 100644 --- a/src/test/regress/sql/misc_functions.sql +++ b/src/test/regress/sql/misc_functions.sql @@ -4,47 +4,6 @@ \set regresslib :libdir '/regress' :dlsuffix --- Function to assist with verifying EXPLAIN which includes costs. A series --- of bool flags allows control over which portions are masked out -CREATE FUNCTION explain_mask_costs(query text, do_analyze bool, - hide_costs bool, hide_row_est bool, hide_width bool) RETURNS setof text -LANGUAGE plpgsql AS -$$ -DECLARE - ln text; - analyze_str text; -BEGIN - IF do_analyze = true THEN - analyze_str := 'on'; - ELSE - analyze_str := 'off'; - END IF; - - -- avoid jit related output by disabling it - SET LOCAL jit = 0; - - FOR ln IN - EXECUTE format('explain (analyze %s, costs on, summary off, timing off, buffers off) %s', - analyze_str, query) - LOOP - IF hide_costs = true THEN - ln := regexp_replace(ln, 'cost=\d+\.\d\d\.\.\d+\.\d\d', 'cost=N..N'); - END IF; - - IF hide_row_est = true THEN - -- don't use 'g' so that we leave the actual rows intact - ln := regexp_replace(ln, 'rows=\d+', 'rows=N'); - END IF; - - IF hide_width = true THEN - ln := regexp_replace(ln, 'width=\d+', 'width=N'); - END IF; - - RETURN NEXT ln; - END LOOP; -END; -$$; - -- -- num_nulls() -- @@ -277,89 +236,6 @@ SELECT * FROM tenk1 a JOIN my_gen_series(1,1000) g ON a.unique1 = g; EXPLAIN (COSTS OFF) SELECT * FROM tenk1 a JOIN my_gen_series(1,10) g ON a.unique1 = g; --- --- Test the SupportRequestRows support function for generate_series_timestamp() --- - --- Ensure the row estimate matches the actual rows -SELECT explain_mask_costs($$ -SELECT * FROM generate_series(TIMESTAMPTZ '2024-02-01', TIMESTAMPTZ '2024-03-01', INTERVAL '1 day') g(s);$$, -true, true, false, true); - --- As above but with generate_series_timestamp -SELECT explain_mask_costs($$ -SELECT * FROM generate_series(TIMESTAMP '2024-02-01', TIMESTAMP '2024-03-01', INTERVAL '1 day') g(s);$$, -true, true, false, true); - --- As above but with generate_series_timestamptz_at_zone() -SELECT explain_mask_costs($$ -SELECT * FROM generate_series(TIMESTAMPTZ '2024-02-01', TIMESTAMPTZ '2024-03-01', INTERVAL '1 day', 'UTC') g(s);$$, -true, true, false, true); - --- Ensure the estimated and actual row counts match when the range isn't --- evenly divisible by the step -SELECT explain_mask_costs($$ -SELECT * FROM generate_series(TIMESTAMPTZ '2024-02-01', TIMESTAMPTZ '2024-03-01', INTERVAL '7 day') g(s);$$, -true, true, false, true); - --- Ensure the estimates match when step is decreasing -SELECT explain_mask_costs($$ -SELECT * FROM generate_series(TIMESTAMPTZ '2024-03-01', TIMESTAMPTZ '2024-02-01', INTERVAL '-1 day') g(s);$$, -true, true, false, true); - --- Ensure an empty range estimates 1 row -SELECT explain_mask_costs($$ -SELECT * FROM generate_series(TIMESTAMPTZ '2024-03-01', TIMESTAMPTZ '2024-02-01', INTERVAL '1 day') g(s);$$, -true, true, false, true); - --- Ensure we get the default row estimate for infinity values -SELECT explain_mask_costs($$ -SELECT * FROM generate_series(TIMESTAMPTZ '-infinity', TIMESTAMPTZ 'infinity', INTERVAL '1 day') g(s);$$, -false, true, false, true); - --- Ensure the row estimate behaves correctly when step size is zero. --- We expect generate_series_timestamp() to throw the error rather than in --- the support function. -SELECT * FROM generate_series(TIMESTAMPTZ '2024-02-01', TIMESTAMPTZ '2024-03-01', INTERVAL '0 day') g(s); - --- --- Test the SupportRequestRows support function for generate_series_numeric() --- - --- Ensure the row estimate matches the actual rows -SELECT explain_mask_costs($$ -SELECT * FROM generate_series(1.0, 25.0) g(s);$$, -true, true, false, true); - --- As above but with non-default step -SELECT explain_mask_costs($$ -SELECT * FROM generate_series(1.0, 25.0, 2.0) g(s);$$, -true, true, false, true); - --- Ensure the estimates match when step is decreasing -SELECT explain_mask_costs($$ -SELECT * FROM generate_series(25.0, 1.0, -1.0) g(s);$$, -true, true, false, true); - --- Ensure an empty range estimates 1 row -SELECT explain_mask_costs($$ -SELECT * FROM generate_series(25.0, 1.0, 1.0) g(s);$$, -true, true, false, true); - --- Ensure we get the default row estimate for error cases (infinity/NaN values --- and zero step size) -SELECT explain_mask_costs($$ -SELECT * FROM generate_series('-infinity'::NUMERIC, 'infinity'::NUMERIC, 1.0) g(s);$$, -false, true, false, true); - -SELECT explain_mask_costs($$ -SELECT * FROM generate_series(1.0, 25.0, 'NaN'::NUMERIC) g(s);$$, -false, true, false, true); - -SELECT explain_mask_costs($$ -SELECT * FROM generate_series(25.0, 2.0, 0.0) g(s);$$, -false, true, false, true); - -- -- Test SupportRequestInlineInFrom request -- @@ -443,7 +319,6 @@ SELECT pg_column_toast_chunk_id(a) IS NULL, pg_column_toast_chunk_id(b) IN (SELECT chunk_id FROM pg_toast.:toastrel) FROM test_chunk_id; DROP TABLE test_chunk_id; -DROP FUNCTION explain_mask_costs(text, bool, bool, bool, bool); -- test stratnum translation support functions SELECT gist_translate_cmptype_common(7); diff --git a/src/test/regress/sql/planner_est.sql b/src/test/regress/sql/planner_est.sql new file mode 100644 index 00000000000..47d5ae679c7 --- /dev/null +++ b/src/test/regress/sql/planner_est.sql @@ -0,0 +1,135 @@ +-- +-- Tests for testing query planner selectivity and width estimates +-- +-- Most selectivity and width estimations rely too heavily on statistics +-- gathered by ANALYZE, or could vary depending on hardware. However, there +-- are a few cases where we can have more certainty about the expected number +-- of rows, or width of rows. This is a good home for such tests. +-- + +-- Function to assist with verifying EXPLAIN which includes costs. A series +-- of bool flags allows control over which portions are masked out +CREATE FUNCTION explain_mask_costs(query text, do_analyze bool, + hide_costs bool, hide_row_est bool, hide_width bool) RETURNS setof text +LANGUAGE plpgsql AS +$$ +DECLARE + ln text; + analyze_str text; +BEGIN + IF do_analyze = true THEN + analyze_str := 'on'; + ELSE + analyze_str := 'off'; + END IF; + + -- avoid jit related output by disabling it + SET LOCAL jit = 0; + + FOR ln IN + EXECUTE format('explain (analyze %s, costs on, summary off, timing off, buffers off) %s', + analyze_str, query) + LOOP + IF hide_costs = true THEN + ln := regexp_replace(ln, 'cost=\d+\.\d\d\.\.\d+\.\d\d', 'cost=N..N'); + END IF; + + IF hide_row_est = true THEN + -- don't use 'g' so that we leave the actual rows intact + ln := regexp_replace(ln, 'rows=\d+', 'rows=N'); + END IF; + + IF hide_width = true THEN + ln := regexp_replace(ln, 'width=\d+', 'width=N'); + END IF; + + RETURN NEXT ln; + END LOOP; +END; +$$; + +-- +-- Test the SupportRequestRows support function for generate_series_timestamp() +-- + +-- Ensure the row estimate matches the actual rows +SELECT explain_mask_costs($$ +SELECT * FROM generate_series(TIMESTAMPTZ '2024-02-01', TIMESTAMPTZ '2024-03-01', INTERVAL '1 day') g(s);$$, +true, true, false, true); + +-- As above but with generate_series_timestamp +SELECT explain_mask_costs($$ +SELECT * FROM generate_series(TIMESTAMP '2024-02-01', TIMESTAMP '2024-03-01', INTERVAL '1 day') g(s);$$, +true, true, false, true); + +-- As above but with generate_series_timestamptz_at_zone() +SELECT explain_mask_costs($$ +SELECT * FROM generate_series(TIMESTAMPTZ '2024-02-01', TIMESTAMPTZ '2024-03-01', INTERVAL '1 day', 'UTC') g(s);$$, +true, true, false, true); + +-- Ensure the estimated and actual row counts match when the range isn't +-- evenly divisible by the step +SELECT explain_mask_costs($$ +SELECT * FROM generate_series(TIMESTAMPTZ '2024-02-01', TIMESTAMPTZ '2024-03-01', INTERVAL '7 day') g(s);$$, +true, true, false, true); + +-- Ensure the estimates match when step is decreasing +SELECT explain_mask_costs($$ +SELECT * FROM generate_series(TIMESTAMPTZ '2024-03-01', TIMESTAMPTZ '2024-02-01', INTERVAL '-1 day') g(s);$$, +true, true, false, true); + +-- Ensure an empty range estimates 1 row +SELECT explain_mask_costs($$ +SELECT * FROM generate_series(TIMESTAMPTZ '2024-03-01', TIMESTAMPTZ '2024-02-01', INTERVAL '1 day') g(s);$$, +true, true, false, true); + +-- Ensure we get the default row estimate for infinity values +SELECT explain_mask_costs($$ +SELECT * FROM generate_series(TIMESTAMPTZ '-infinity', TIMESTAMPTZ 'infinity', INTERVAL '1 day') g(s);$$, +false, true, false, true); + +-- Ensure the row estimate behaves correctly when step size is zero. +-- We expect generate_series_timestamp() to throw the error rather than in +-- the support function. +SELECT * FROM generate_series(TIMESTAMPTZ '2024-02-01', TIMESTAMPTZ '2024-03-01', INTERVAL '0 day') g(s); + +-- +-- Test the SupportRequestRows support function for generate_series_numeric() +-- + +-- Ensure the row estimate matches the actual rows +SELECT explain_mask_costs($$ +SELECT * FROM generate_series(1.0, 25.0) g(s);$$, +true, true, false, true); + +-- As above but with non-default step +SELECT explain_mask_costs($$ +SELECT * FROM generate_series(1.0, 25.0, 2.0) g(s);$$, +true, true, false, true); + +-- Ensure the estimates match when step is decreasing +SELECT explain_mask_costs($$ +SELECT * FROM generate_series(25.0, 1.0, -1.0) g(s);$$, +true, true, false, true); + +-- Ensure an empty range estimates 1 row +SELECT explain_mask_costs($$ +SELECT * FROM generate_series(25.0, 1.0, 1.0) g(s);$$, +true, true, false, true); + +-- Ensure we get the default row estimate for error cases (infinity/NaN values +-- and zero step size) +SELECT explain_mask_costs($$ +SELECT * FROM generate_series('-infinity'::NUMERIC, 'infinity'::NUMERIC, 1.0) g(s);$$, +false, true, false, true); + +SELECT explain_mask_costs($$ +SELECT * FROM generate_series(1.0, 25.0, 'NaN'::NUMERIC) g(s);$$, +false, true, false, true); + +SELECT explain_mask_costs($$ +SELECT * FROM generate_series(25.0, 2.0, 0.0) g(s);$$, +false, true, false, true); + + +DROP FUNCTION explain_mask_costs(text, bool, bool, bool, bool);