From: Richard Guo Date: Wed, 8 Oct 2025 08:01:48 +0000 (+0900) Subject: Allow negative aggtransspace to indicate unbounded state size X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=185e304263347d0979832f7a08a812872d136b18;p=thirdparty%2Fpostgresql.git Allow negative aggtransspace to indicate unbounded state size This patch reuses the existing aggtransspace in pg_aggregate to signal that an aggregate's transition state can grow unboundedly. If aggtransspace is set to a negative value, it now indicates that the transition state may consume unpredictable or large amounts of memory, such as in aggregates like array_agg or string_agg that accumulate input rows. This information can be used by the planner to avoid applying memory-sensitive optimizations (e.g., eager aggregation) when there is a risk of excessive memory usage during partial aggregation. Bump catalog version. Per idea from Robert Haas, though applied differently than originally suggested. Discussion: https://postgr.es/m/CA+TgmoYbkvYwLa+1vOP7RDY7kO2=A7rppoPusoRXe44VDOGBPg@mail.gmail.com --- diff --git a/doc/src/sgml/catalogs.sgml b/doc/src/sgml/catalogs.sgml index e9095bedf21..3acc2222a87 100644 --- a/doc/src/sgml/catalogs.sgml +++ b/doc/src/sgml/catalogs.sgml @@ -596,7 +596,10 @@ Approximate average size (in bytes) of the transition state - data, or zero to use a default estimate + data. A positive value provides an estimate; zero means to + use a default estimate. A negative value indicates the state + data can grow unboundedly in size, such as when the aggregate + accumulates input rows (e.g., array_agg, string_agg). diff --git a/doc/src/sgml/ref/create_aggregate.sgml b/doc/src/sgml/ref/create_aggregate.sgml index 222e0aa5c9d..0472ac2e874 100644 --- a/doc/src/sgml/ref/create_aggregate.sgml +++ b/doc/src/sgml/ref/create_aggregate.sgml @@ -384,9 +384,13 @@ SELECT col FROM tab ORDER BY col USING sortop LIMIT 1; The approximate average size (in bytes) of the aggregate's state value. If this parameter is omitted or is zero, a default estimate is used - based on the state_data_type. + based on the state_data_type. If set to a + negative value, it indicates the state data can grow unboundedly in + size, such as when the aggregate accumulates input rows (e.g., + array_agg, string_agg). The planner uses this value to estimate the memory required for a - grouped aggregate query. + grouped aggregate query and to avoid optimizations that may cause + excessive memory usage. @@ -568,7 +572,8 @@ SELECT col FROM tab ORDER BY col USING sortop LIMIT 1; The approximate average size (in bytes) of the aggregate's state value, when using moving-aggregate mode. This works the same as - state_data_size. + state_data_size, except that negative + values are not used to indicate unbounded state size. diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h index 00b3bce1e12..af54211f330 100644 --- a/src/include/catalog/catversion.h +++ b/src/include/catalog/catversion.h @@ -57,6 +57,6 @@ */ /* yyyymmddN */ -#define CATALOG_VERSION_NO 202510081 +#define CATALOG_VERSION_NO 202510082 #endif diff --git a/src/include/catalog/pg_aggregate.dat b/src/include/catalog/pg_aggregate.dat index d6aa1f6ec47..870769e8f14 100644 --- a/src/include/catalog/pg_aggregate.dat +++ b/src/include/catalog/pg_aggregate.dat @@ -558,26 +558,28 @@ aggfinalfn => 'array_agg_finalfn', aggcombinefn => 'array_agg_combine', aggserialfn => 'array_agg_serialize', aggdeserialfn => 'array_agg_deserialize', aggfinalextra => 't', - aggtranstype => 'internal' }, + aggtranstype => 'internal', aggtransspace => '-1' }, { aggfnoid => 'array_agg(anyarray)', aggtransfn => 'array_agg_array_transfn', aggfinalfn => 'array_agg_array_finalfn', aggcombinefn => 'array_agg_array_combine', aggserialfn => 'array_agg_array_serialize', aggdeserialfn => 'array_agg_array_deserialize', aggfinalextra => 't', - aggtranstype => 'internal' }, + aggtranstype => 'internal', aggtransspace => '-1' }, # text { aggfnoid => 'string_agg(text,text)', aggtransfn => 'string_agg_transfn', aggfinalfn => 'string_agg_finalfn', aggcombinefn => 'string_agg_combine', aggserialfn => 'string_agg_serialize', - aggdeserialfn => 'string_agg_deserialize', aggtranstype => 'internal' }, + aggdeserialfn => 'string_agg_deserialize', + aggtranstype => 'internal', aggtransspace => '-1' }, # bytea { aggfnoid => 'string_agg(bytea,bytea)', aggtransfn => 'bytea_string_agg_transfn', aggfinalfn => 'bytea_string_agg_finalfn', aggcombinefn => 'string_agg_combine', aggserialfn => 'string_agg_serialize', - aggdeserialfn => 'string_agg_deserialize', aggtranstype => 'internal' }, + aggdeserialfn => 'string_agg_deserialize', + aggtranstype => 'internal', aggtransspace => '-1' }, # range { aggfnoid => 'range_intersect_agg(anyrange)', diff --git a/src/test/regress/expected/opr_sanity.out b/src/test/regress/expected/opr_sanity.out index 20bf9ea9cdf..a357e1d0c0e 100644 --- a/src/test/regress/expected/opr_sanity.out +++ b/src/test/regress/expected/opr_sanity.out @@ -1470,7 +1470,7 @@ WHERE aggfnoid = 0 OR aggtransfn = 0 OR (aggkind = 'n' AND aggnumdirectargs > 0) OR aggfinalmodify NOT IN ('r', 's', 'w') OR aggmfinalmodify NOT IN ('r', 's', 'w') OR - aggtranstype = 0 OR aggtransspace < 0 OR aggmtransspace < 0; + aggtranstype = 0 OR aggmtransspace < 0; ctid | aggfnoid ------+---------- (0 rows) diff --git a/src/test/regress/sql/opr_sanity.sql b/src/test/regress/sql/opr_sanity.sql index 2fb3a852878..cd674d7dbca 100644 --- a/src/test/regress/sql/opr_sanity.sql +++ b/src/test/regress/sql/opr_sanity.sql @@ -847,7 +847,7 @@ WHERE aggfnoid = 0 OR aggtransfn = 0 OR (aggkind = 'n' AND aggnumdirectargs > 0) OR aggfinalmodify NOT IN ('r', 's', 'w') OR aggmfinalmodify NOT IN ('r', 's', 'w') OR - aggtranstype = 0 OR aggtransspace < 0 OR aggmtransspace < 0; + aggtranstype = 0 OR aggmtransspace < 0; -- Make sure the matching pg_proc entry is sensible, too.