From: Tom Lane Date: Mon, 29 Sep 2025 20:55:17 +0000 (-0400) Subject: Add GROUP BY ALL. X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=ef38a4d9756db9ae1d20f40aa39f3cf76059b81a;p=thirdparty%2Fpostgresql.git Add GROUP BY ALL. GROUP BY ALL is a form of GROUP BY that adds any TargetExpr that does not contain an aggregate or window function into the groupClause of the query, making it exactly equivalent to specifying those same expressions in an explicit GROUP BY list. This feature is useful for certain kinds of data exploration. It's already present in some other DBMSes, and the SQL committee recently accepted it into the standard, so we can be reasonably confident in the syntax being stable. We do have to invent part of the semantics, as the standard doesn't allow for expressions in GROUP BY, so they haven't specified what to do with window functions. We assume that those should be treated like aggregates, i.e., left out of the constructed GROUP BY list. In passing, wordsmith some existing documentation about GROUP BY, and update some neglected synopsis entries in select_into.sgml. Author: David Christensen Reviewed-by: Tom Lane Discussion: https://postgr.es/m/CAHM0NXjz0kDwtzoe-fnHAqPB1qA8_VJN0XAmCgUZ+iPnvP5LbA@mail.gmail.com --- diff --git a/doc/src/sgml/queries.sgml b/doc/src/sgml/queries.sgml index a326960ff4d..2736868fb06 100644 --- a/doc/src/sgml/queries.sgml +++ b/doc/src/sgml/queries.sgml @@ -1145,10 +1145,36 @@ SELECT product_id, p.name, (sum(s.units) * p.price) AS sales In strict SQL, GROUP BY can only group by columns of - the source table but PostgreSQL extends + the source table, but PostgreSQL extends this to also allow GROUP BY to group by columns in the select list. Grouping by value expressions instead of simple - column names is also allowed. + column names is also allowed (but GROUP BY + expressions cannot contain aggregate functions or window functions). + + + + PostgreSQL also supports the syntax GROUP BY ALL, + which is equivalent to explicitly writing all select-list entries that + do not contain either an aggregate function or a window function. + This can greatly simplify ad-hoc exploration of data. + As an example, these queries are equivalent: + +=> SELECT a, b, a + b, sum(c) FROM test1 GROUP BY ALL; + a | b | ?column? | sum +---+---+----------+---- + 1 | 4 | 5 | 9 + 2 | 5 | 7 | 12 + 3 | 6 | 9 | 15 +(3 rows) + +=> SELECT a, b, a + b, sum(c) FROM test1 GROUP BY a, b, a + b; + a | b | ?column? | sum +---+---+----------+---- + 1 | 4 | 5 | 9 + 2 | 5 | 7 | 12 + 3 | 6 | 9 | 15 +(3 rows) + diff --git a/doc/src/sgml/ref/select.sgml b/doc/src/sgml/ref/select.sgml index d7089eac0be..5a3bcff7607 100644 --- a/doc/src/sgml/ref/select.sgml +++ b/doc/src/sgml/ref/select.sgml @@ -37,7 +37,7 @@ SELECT [ ALL | DISTINCT [ ON ( expressionexpression [ [ AS ] output_name ] } [, ...] ] [ FROM from_item [, ...] ] [ WHERE condition ] - [ GROUP BY [ ALL | DISTINCT ] grouping_element [, ...] ] + [ GROUP BY { ALL | [ ALL | DISTINCT ] grouping_element [, ...] } ] [ HAVING condition ] [ WINDOW window_name AS ( window_definition ) [, ...] ] [ { UNION | INTERSECT | EXCEPT } [ ALL | DISTINCT ] select ] @@ -796,7 +796,7 @@ WHERE condition The optional GROUP BY clause has the general form -GROUP BY [ ALL | DISTINCT ] grouping_element [, ...] +GROUP BY { ALL | [ ALL | DISTINCT ] grouping_element [, ...] } @@ -808,21 +808,31 @@ GROUP BY [ ALL | DISTINCT ] grouping_elementgrouping_element can be an input column name, or the name or ordinal number of an output column (SELECT list item), or an arbitrary - expression formed from input-column values. In case of ambiguity, + expression formed from input-column values; however, it cannot contain + an aggregate function or a window function. In case of ambiguity, a GROUP BY name will be interpreted as an input-column name rather than an output column name. + + The form GROUP BY ALL with no explicit + grouping_elements + provided is equivalent to writing GROUP BY with the + numbers of all SELECT output columns that do not + contain either an aggregate function or a window function. + + If any of GROUPING SETS, ROLLUP or CUBE are present as grouping elements, then the GROUP BY clause as a whole defines some number of independent grouping sets. The effect of this is equivalent to constructing a UNION ALL between - subqueries with the individual grouping sets as their + subqueries having the individual grouping sets as their GROUP BY clauses. The optional DISTINCT - clause removes duplicate sets before processing; it does not - transform the UNION ALL into a UNION DISTINCT. + key word removes duplicate grouping sets before processing; it does not + transform the implied UNION ALL into + a UNION DISTINCT. For further details on the handling of grouping sets see . diff --git a/doc/src/sgml/ref/select_into.sgml b/doc/src/sgml/ref/select_into.sgml index ae7e6bed24f..233f9bfa284 100644 --- a/doc/src/sgml/ref/select_into.sgml +++ b/doc/src/sgml/ref/select_into.sgml @@ -27,15 +27,15 @@ SELECT [ ALL | DISTINCT [ ON ( expressionnew_table [ FROM from_item [, ...] ] [ WHERE condition ] - [ GROUP BY expression [, ...] ] + [ GROUP BY { ALL | [ ALL | DISTINCT ] grouping_element [, ...] } ] [ HAVING condition ] [ WINDOW window_name AS ( window_definition ) [, ...] ] [ { UNION | INTERSECT | EXCEPT } [ ALL | DISTINCT ] select ] [ ORDER BY expression [ ASC | DESC | USING operator ] [ NULLS { FIRST | LAST } ] [, ...] ] [ LIMIT { count | ALL } ] [ OFFSET start [ ROW | ROWS ] ] - [ FETCH { FIRST | NEXT } [ count ] { ROW | ROWS } ONLY ] - [ FOR { UPDATE | SHARE } [ OF table_name [, ...] ] [ NOWAIT ] [...] ] + [ FETCH { FIRST | NEXT } [ count ] { ROW | ROWS } { ONLY | WITH TIES } ] + [ FOR { UPDATE | NO KEY UPDATE | SHARE | KEY SHARE } [ OF from_reference [, ...] ] [ NOWAIT | SKIP LOCKED ] [...] ] diff --git a/src/backend/parser/analyze.c b/src/backend/parser/analyze.c index 5aeb54eb5f6..3b392b084ad 100644 --- a/src/backend/parser/analyze.c +++ b/src/backend/parser/analyze.c @@ -1467,12 +1467,14 @@ transformSelectStmt(ParseState *pstate, SelectStmt *stmt, qry->groupClause = transformGroupClause(pstate, stmt->groupClause, + stmt->groupByAll, &qry->groupingSets, &qry->targetList, qry->sortClause, EXPR_KIND_GROUP_BY, false /* allow SQL92 rules */ ); qry->groupDistinct = stmt->groupDistinct; + qry->groupByAll = stmt->groupByAll; if (stmt->distinctClause == NIL) { diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y index 9fd48acb1f8..f1def67ac7c 100644 --- a/src/backend/parser/gram.y +++ b/src/backend/parser/gram.y @@ -120,6 +120,7 @@ typedef struct SelectLimit typedef struct GroupClause { bool distinct; + bool all; List *list; } GroupClause; @@ -12993,6 +12994,7 @@ simple_select: n->whereClause = $6; n->groupClause = ($7)->list; n->groupDistinct = ($7)->distinct; + n->groupByAll = ($7)->all; n->havingClause = $8; n->windowClause = $9; $$ = (Node *) n; @@ -13010,6 +13012,7 @@ simple_select: n->whereClause = $6; n->groupClause = ($7)->list; n->groupDistinct = ($7)->distinct; + n->groupByAll = ($7)->all; n->havingClause = $8; n->windowClause = $9; $$ = (Node *) n; @@ -13507,14 +13510,24 @@ group_clause: GroupClause *n = (GroupClause *) palloc(sizeof(GroupClause)); n->distinct = $3 == SET_QUANTIFIER_DISTINCT; + n->all = false; n->list = $4; $$ = n; } + | GROUP_P BY ALL + { + GroupClause *n = (GroupClause *) palloc(sizeof(GroupClause)); + n->distinct = false; + n->all = true; + n->list = NIL; + $$ = n; + } | /*EMPTY*/ { GroupClause *n = (GroupClause *) palloc(sizeof(GroupClause)); n->distinct = false; + n->all = false; n->list = NIL; $$ = n; } @@ -17618,6 +17631,7 @@ PLpgSQL_Expr: opt_distinct_clause opt_target_list n->whereClause = $4; n->groupClause = ($5)->list; n->groupDistinct = ($5)->distinct; + n->groupByAll = ($5)->all; n->havingClause = $6; n->windowClause = $7; n->sortClause = $8; diff --git a/src/backend/parser/parse_clause.c b/src/backend/parser/parse_clause.c index 9f20a70ce13..ca26f6f61f2 100644 --- a/src/backend/parser/parse_clause.c +++ b/src/backend/parser/parse_clause.c @@ -2598,6 +2598,9 @@ transformGroupingSet(List **flatresult, * GROUP BY items will be added to the targetlist (as resjunk columns) * if not already present, so the targetlist must be passed by reference. * + * If GROUP BY ALL is specified, the groupClause will be inferred to be all + * non-aggregate, non-window expressions in the targetlist. + * * This is also used for window PARTITION BY clauses (which act almost the * same, but are always interpreted per SQL99 rules). * @@ -2622,6 +2625,7 @@ transformGroupingSet(List **flatresult, * * pstate ParseState * grouplist clause to transform + * groupByAll is this a GROUP BY ALL statement? * groupingSets reference to list to contain the grouping set tree * targetlist reference to TargetEntry list * sortClause ORDER BY clause (SortGroupClause nodes) @@ -2629,7 +2633,8 @@ transformGroupingSet(List **flatresult, * useSQL99 SQL99 rather than SQL92 syntax */ List * -transformGroupClause(ParseState *pstate, List *grouplist, List **groupingSets, +transformGroupClause(ParseState *pstate, List *grouplist, bool groupByAll, + List **groupingSets, List **targetlist, List *sortClause, ParseExprKind exprKind, bool useSQL99) { @@ -2640,6 +2645,63 @@ transformGroupClause(ParseState *pstate, List *grouplist, List **groupingSets, bool hasGroupingSets = false; Bitmapset *seen_local = NULL; + /* Handle GROUP BY ALL */ + if (groupByAll) + { + /* There cannot have been any explicit grouplist items */ + Assert(grouplist == NIL); + + /* Iterate over targets, adding acceptable ones to the result list */ + foreach_ptr(TargetEntry, tle, *targetlist) + { + /* Ignore junk TLEs */ + if (tle->resjunk) + continue; + + /* + * TLEs containing aggregates are not okay to add to GROUP BY + * (compare checkTargetlistEntrySQL92). But the SQL standard + * directs us to skip them, so it's fine. + */ + if (pstate->p_hasAggs && + contain_aggs_of_level((Node *) tle->expr, 0)) + continue; + + /* + * Likewise, TLEs containing window functions are not okay to add + * to GROUP BY. At this writing, the SQL standard is silent on + * what to do with them, but by analogy to aggregates we'll just + * skip them. + */ + if (pstate->p_hasWindowFuncs && + contain_windowfuncs((Node *) tle->expr)) + continue; + + /* + * Otherwise, add the TLE to the result using default sort/group + * semantics. We specify the parse location as the TLE's + * location, despite the comment for addTargetToGroupList + * discouraging that. The only other thing we could point to is + * the ALL keyword, which seems unhelpful when there are multiple + * TLEs. + */ + result = addTargetToGroupList(pstate, tle, + result, *targetlist, + exprLocation((Node *) tle->expr)); + } + + /* If we found any acceptable targets, we're done */ + if (result != NIL) + return result; + + /* + * Otherwise, the SQL standard says to treat it like "GROUP BY ()". + * Build a representation of that, and let the rest of this function + * handle it. + */ + grouplist = list_make1(makeGroupingSet(GROUPING_SET_EMPTY, NIL, -1)); + } + /* * Recursively flatten implicit RowExprs. (Technically this is only needed * for GROUP BY, per the syntax rules for grouping sets, but we do it @@ -2818,6 +2880,7 @@ transformWindowDefinitions(ParseState *pstate, true /* force SQL99 rules */ ); partitionClause = transformGroupClause(pstate, windef->partitionClause, + false /* not GROUP BY ALL */ , NULL, targetlist, orderClause, diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c index defcdaa8b34..c6d83d67b87 100644 --- a/src/backend/utils/adt/ruleutils.c +++ b/src/backend/utils/adt/ruleutils.c @@ -6186,7 +6186,9 @@ get_basic_select_query(Query *query, deparse_context *context) save_ingroupby = context->inGroupBy; context->inGroupBy = true; - if (query->groupingSets == NIL) + if (query->groupByAll) + appendStringInfoString(buf, "ALL"); + else if (query->groupingSets == NIL) { sep = ""; foreach(l, query->groupClause) diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h index 62c21d3670d..4e7d5ceb7b8 100644 --- a/src/include/catalog/catversion.h +++ b/src/include/catalog/catversion.h @@ -57,6 +57,6 @@ */ /* yyyymmddN */ -#define CATALOG_VERSION_NO 202509191 +#define CATALOG_VERSION_NO 202509291 #endif diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h index f1706df58fd..ac0e02a1db7 100644 --- a/src/include/nodes/parsenodes.h +++ b/src/include/nodes/parsenodes.h @@ -214,7 +214,8 @@ typedef struct Query List *returningList; /* return-values list (of TargetEntry) */ List *groupClause; /* a list of SortGroupClause's */ - bool groupDistinct; /* is the group by clause distinct? */ + bool groupDistinct; /* was GROUP BY DISTINCT used? */ + bool groupByAll; /* was GROUP BY ALL used? */ List *groupingSets; /* a list of GroupingSet's if present */ @@ -2192,6 +2193,7 @@ typedef struct SelectStmt Node *whereClause; /* WHERE qualification */ List *groupClause; /* GROUP BY clauses */ bool groupDistinct; /* Is this GROUP BY DISTINCT? */ + bool groupByAll; /* Is this GROUP BY ALL? */ Node *havingClause; /* HAVING conditional-expression */ List *windowClause; /* WINDOW window_name AS (...), ... */ diff --git a/src/include/parser/parse_clause.h b/src/include/parser/parse_clause.h index 3e9894926de..ede3903d1dd 100644 --- a/src/include/parser/parse_clause.h +++ b/src/include/parser/parse_clause.h @@ -26,6 +26,7 @@ extern Node *transformLimitClause(ParseState *pstate, Node *clause, ParseExprKind exprKind, const char *constructName, LimitOption limitOption); extern List *transformGroupClause(ParseState *pstate, List *grouplist, + bool groupByAll, List **groupingSets, List **targetlist, List *sortClause, ParseExprKind exprKind, bool useSQL99); diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out index 1f24f6ffd1f..035f9a78206 100644 --- a/src/test/regress/expected/aggregates.out +++ b/src/test/regress/expected/aggregates.out @@ -1557,6 +1557,129 @@ drop table t2; drop table t3; drop table p_t1; -- +-- Test GROUP BY ALL +-- +-- We don't care about the data here, just the proper transformation of the +-- GROUP BY clause, so test some queries and verify the EXPLAIN plans. +-- +CREATE TEMP TABLE t1 ( + a int, + b int, + c int +); +-- basic example +EXPLAIN (COSTS OFF) SELECT b, COUNT(*) FROM t1 GROUP BY ALL; + QUERY PLAN +---------------------- + HashAggregate + Group Key: b + -> Seq Scan on t1 +(3 rows) + +-- multiple columns, non-consecutive order +EXPLAIN (COSTS OFF) SELECT a, SUM(b), b FROM t1 GROUP BY ALL; + QUERY PLAN +---------------------- + HashAggregate + Group Key: a, b + -> Seq Scan on t1 +(3 rows) + +-- multi columns, no aggregate +EXPLAIN (COSTS OFF) SELECT a + b FROM t1 GROUP BY ALL; + QUERY PLAN +---------------------- + HashAggregate + Group Key: (a + b) + -> Seq Scan on t1 +(3 rows) + +-- check we detect a non-top-level aggregate +EXPLAIN (COSTS OFF) SELECT a, SUM(b) + 4 FROM t1 GROUP BY ALL; + QUERY PLAN +---------------------- + HashAggregate + Group Key: a + -> Seq Scan on t1 +(3 rows) + +-- including grouped column is okay +EXPLAIN (COSTS OFF) SELECT a, SUM(b) + a FROM t1 GROUP BY ALL; + QUERY PLAN +---------------------- + HashAggregate + Group Key: a + -> Seq Scan on t1 +(3 rows) + +-- including non-grouped column, not so much +EXPLAIN (COSTS OFF) SELECT a, SUM(b) + c FROM t1 GROUP BY ALL; +ERROR: column "t1.c" must appear in the GROUP BY clause or be used in an aggregate function +LINE 1: EXPLAIN (COSTS OFF) SELECT a, SUM(b) + c FROM t1 GROUP BY AL... + ^ +-- all aggregates, should reduce to GROUP BY () +EXPLAIN (COSTS OFF) SELECT COUNT(a), SUM(b) FROM t1 GROUP BY ALL; + QUERY PLAN +---------------------- + Aggregate + Group Key: () + -> Seq Scan on t1 +(3 rows) + +-- likewise with empty target list +EXPLAIN (COSTS OFF) SELECT FROM t1 GROUP BY ALL; + QUERY PLAN +----------------------- + Result + Replaces: Aggregate +(2 rows) + +-- window functions are not to be included in GROUP BY, either +EXPLAIN (COSTS OFF) SELECT a, COUNT(a) OVER (PARTITION BY a) FROM t1 GROUP BY ALL; + QUERY PLAN +---------------------------------- + WindowAgg + Window: w1 AS (PARTITION BY a) + -> Sort + Sort Key: a + -> HashAggregate + Group Key: a + -> Seq Scan on t1 +(7 rows) + +-- all cols +EXPLAIN (COSTS OFF) SELECT *, count(*) FROM t1 GROUP BY ALL; + QUERY PLAN +---------------------- + HashAggregate + Group Key: a, b, c + -> Seq Scan on t1 +(3 rows) + +-- group by all with grouping element(s) (equivalent to GROUP BY's +-- default behavior, explicit antithesis to GROUP BY DISTINCT) +EXPLAIN (COSTS OFF) SELECT a, count(*) FROM t1 GROUP BY ALL a; + QUERY PLAN +---------------------- + HashAggregate + Group Key: a + -> Seq Scan on t1 +(3 rows) + +-- verify deparsing of GROUP BY ALL +CREATE TEMP VIEW v1 AS SELECT b, COUNT(*) FROM t1 GROUP BY ALL; +SELECT pg_get_viewdef('v1'::regclass); + pg_get_viewdef +----------------------- + SELECT b, + + count(*) AS count+ + FROM t1 + + GROUP BY ALL; +(1 row) + +DROP VIEW v1; +DROP TABLE t1; +-- -- Test GROUP BY matching of join columns that are type-coerced due to USING -- create temp table t1(f1 int, f2 int); diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql index 62540b1ffa4..908af50def3 100644 --- a/src/test/regress/sql/aggregates.sql +++ b/src/test/regress/sql/aggregates.sql @@ -549,6 +549,60 @@ drop table t2; drop table t3; drop table p_t1; +-- +-- Test GROUP BY ALL +-- +-- We don't care about the data here, just the proper transformation of the +-- GROUP BY clause, so test some queries and verify the EXPLAIN plans. +-- + +CREATE TEMP TABLE t1 ( + a int, + b int, + c int +); + +-- basic example +EXPLAIN (COSTS OFF) SELECT b, COUNT(*) FROM t1 GROUP BY ALL; + +-- multiple columns, non-consecutive order +EXPLAIN (COSTS OFF) SELECT a, SUM(b), b FROM t1 GROUP BY ALL; + +-- multi columns, no aggregate +EXPLAIN (COSTS OFF) SELECT a + b FROM t1 GROUP BY ALL; + +-- check we detect a non-top-level aggregate +EXPLAIN (COSTS OFF) SELECT a, SUM(b) + 4 FROM t1 GROUP BY ALL; + +-- including grouped column is okay +EXPLAIN (COSTS OFF) SELECT a, SUM(b) + a FROM t1 GROUP BY ALL; + +-- including non-grouped column, not so much +EXPLAIN (COSTS OFF) SELECT a, SUM(b) + c FROM t1 GROUP BY ALL; + +-- all aggregates, should reduce to GROUP BY () +EXPLAIN (COSTS OFF) SELECT COUNT(a), SUM(b) FROM t1 GROUP BY ALL; + +-- likewise with empty target list +EXPLAIN (COSTS OFF) SELECT FROM t1 GROUP BY ALL; + +-- window functions are not to be included in GROUP BY, either +EXPLAIN (COSTS OFF) SELECT a, COUNT(a) OVER (PARTITION BY a) FROM t1 GROUP BY ALL; + +-- all cols +EXPLAIN (COSTS OFF) SELECT *, count(*) FROM t1 GROUP BY ALL; + +-- group by all with grouping element(s) (equivalent to GROUP BY's +-- default behavior, explicit antithesis to GROUP BY DISTINCT) +EXPLAIN (COSTS OFF) SELECT a, count(*) FROM t1 GROUP BY ALL a; + +-- verify deparsing of GROUP BY ALL +CREATE TEMP VIEW v1 AS SELECT b, COUNT(*) FROM t1 GROUP BY ALL; +SELECT pg_get_viewdef('v1'::regclass); + +DROP VIEW v1; +DROP TABLE t1; + -- -- Test GROUP BY matching of join columns that are type-coerced due to USING --