From: Mike Bayer Date: Sun, 17 Jan 2021 18:35:02 +0000 (-0500) Subject: Implement support for functions as FROM with columns clause support X-Git-Tag: rel_1_4_0b2~35 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=05a31f2708590161d4b3b4c7ff65196c99b4a22b;p=thirdparty%2Fsqlalchemy%2Fsqlalchemy.git Implement support for functions as FROM with columns clause support WIP Fixes: #3566 Change-Id: I5b093b72533ef695293e737eb75850b9713e5e03 --- diff --git a/doc/build/core/functions.rst b/doc/build/core/functions.rst index 6cfcbd3041..c6e3aa7c3d 100644 --- a/doc/build/core/functions.rst +++ b/doc/build/core/functions.rst @@ -19,6 +19,10 @@ unknown to SQLAlchemy, built-in or user defined. The section here only describes those functions where SQLAlchemy already knows what argument and return types are in use. +.. seealso:: + + :ref:`tutorial_functions` - in the :ref:`unified_tutorial` + .. automodule:: sqlalchemy.sql.functions :members: :undoc-members: diff --git a/doc/build/tutorial/data.rst b/doc/build/tutorial/data.rst index bc81cb0274..67252fc4f1 100644 --- a/doc/build/tutorial/data.rst +++ b/doc/build/tutorial/data.rst @@ -797,6 +797,9 @@ we call upon the name ``count()`` name:: >>> print(count_fn) {opensql}count(user_account.id) +SQL functions are described in more detail later in this tutorial at +:ref:`tutorial_functions`. + When using aggregate functions in SQL, the GROUP BY clause is essential in that it allows rows to be partitioned into groups where aggregate functions will be applied to each group individually. When requesting non-aggregated columns @@ -1278,6 +1281,286 @@ clause: [('patrick',)] {opensql}ROLLBACK{stop} +.. _tutorial_functions: + +Working with SQL Functions +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +First introduced earlier in this section at :ref:`tutorial_group_by_w_aggregates`, +the :data:`_sql.func` object serves as a factory for creating new +:class:`_functions.Function` objects, which when used in a construct +like :func:`_sql.select`, produce a SQL function display, typically +consisting of a name, some parenthesis (although not always), and possibly +some arguments. Examples of typical SQL functions include: + +* the ``count()`` function, an aggregate function which counts how many + rows are returned:: + + .. sourcecode:: pycon+sql + + >>> stmt = select(func.count()).select_from(user_table) + >>> with engine.connect() as conn: + ... result = conn.execute(stmt) + ... print(result.all()) + {opensql}BEGIN (implicit) + SELECT count(*) AS count_1 + FROM user_account + [...] () + [(3,)] + ROLLBACK + + .. + +* the ``lower()`` function, a string function that converts a string to + lower case:: + + .. sourcecode:: pycon+sql + + >>> stmt = select(func.lower("A String With Much UPPERCASE")) + >>> with engine.connect() as conn: + ... print(conn.scalar(stmt)) + {opensql}BEGIN (implicit) + SELECT lower(?) AS lower_1 + [...] ('A String With Much UPPERCASE',) + a string with much uppercase + ROLLBACK + + .. + +* the ``now()`` function, which provides for the current date + and time; as this is a common function, SQLAlhemy knows how to render this + differently for each backend, in the case of SQLite using the + CURRENT_TIMESTAMP function:: + + .. sourcecode:: pycon+sql + + >>> stmt = select(func.now()) + >>> with engine.connect() as conn: + ... result = conn.execute(stmt) + ... print(result.all()) + {opensql}BEGIN (implicit) + SELECT CURRENT_TIMESTAMP AS now_1 + [...] () + [(datetime.datetime(...),)] + ROLLBACK + + .. + +As most database backends feature dozens if not hundreds of different SQL +functions, the :data:`_sql.func` tries to be as liberal as possible in what +it accepts. Any name that is passed is automatically considered to be a +SQL function that will render in a generic way:: + + >>> print(select(func.some_crazy_function(user_table.c.name, 17))) + SELECT some_crazy_function(user_account.name, :some_crazy_function_2) AS some_crazy_function_1 + FROM user_account + +At the same time, a relatively small set of extremely common SQL functions +such as :class:`_functions.now`, :class:`_functions.max`, :class:`_functions.concat` +include pre-packaged versions of themselves which provide for proper typing +information as well as backend-specific SQL generation in some cases:: + + >>> from sqlalchemy.dialects import postgresql, oracle + >>> print(select(func.now()).compile(dialect=postgresql.dialect())) + SELECT now() AS now_1 + >>> print(select(func.now()).compile(dialect=oracle.dialect())) + SELECT CURRENT_TIMESTAMP AS now_1 FROM DUAL + +Functions Have Return Types +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +As functions are column expressions, they also have datatypes. These return +types are significant when making use of the function expression in the +context of a larger expression; that is, math operators will work better +when the datatype of the expression is something like :class:`_types.Integer` +or :class:`_types.Numeric`, JSON accessors in order to work need to be using +a type such as :class:`_types.JSON`. + +The return type of the function may also be significant when executing a +statement and getting rows back, for those cases where SQLAlchemy has to +apply result-set processing. A prime example of this are date-related +functions on SQLite. + +For common aggregate functions like :class:`_functions.count`, +:class:`_functions.max`, :class:`_functions.min` as well as date functions like +:class:`_functions.now`, string functions like :class:`_functions.concat`, the +return type is set up appropriately, sometimes based on usage. The +:class:`_functions.max` function and similar ones will set up the return type +based on the argument given:: + + >>> m1 = func.max(Column("some_int", Integer)) + >>> m1.type + Integer() + + >>> m2 = func.max(Column("some_str", String)) + >>> m2.type + String() + +Date and time functions typically return :class:`_types.DateTime` or +:class:`_types.Date`:: + + >>> func.now().type + DateTime() + >>> func.current_date().type + Date() + +A known string function such as :class:`_ will know to return :class:`_types.String`:: + + >>> func.concat("x", "y").type + String() + +However, for the vast majority of SQL functions, SQLAlchemy does not have them +explicitly present in it's very small list of known functions. For example, +while there is typically no issue using SQL functions ``func.lower()`` +and ``func.upper()`` to convert the casing of strings, SQLAlchemy doesn't +actually know about these functions, so they have a "null" return type:: + + >>> func.upper("lowercase").type + NullType() + +For simple functions like ``upper`` and ``lower``, the issue is not significant, +but when using a SQL function where the return type is important (most typically +involving PostgreSQL JSON or ARRAY functions, SQLite date-related functions), +we can pass the type using ``type_=``, such as:: + + >>> from sqlalchemy import JSON + >>> json_expr = func.json_extract_path('{"key1":{"key2":99}}','key1', type_=JSON) + >>> stmt = select(json_expr["key2"]) + >>> print(stmt.compile(dialect=postgresql.dialect())) + SELECT json_extract_path(%(json_extract_path_1)s, %(json_extract_path_2)s) -> %(json_extract_path_3)s AS anon_1 + +Above, the ``json_extract_path()`` function object would not be able to deliver +the expression ``json_expr["key2"]`` without knowing that its return type +should be using :class:`_types.JSON`. + +Using Window Functions +~~~~~~~~~~~~~~~~~~~~~~ + +A window function is a special use of a SQL aggregate function which calculates +the aggregate value over the rows being returned in a group as the individual +result rows are processed. Whereas a function like ``MAX()`` will give you +the highest value of a column within a set of rows, using the same function +as a "window function" will given you the highest value for each row, +*as of that row*. + +In SQL, window functions allow one to specify the rows over which the +function should be applied, a "partition" value which considers the window +over different sub-sets of rows, and an "order by" expression which importantly +indicates the order in which rows should be applied to the aggregate function. + +All SQL functions include a method :meth:`_functions.Function.over` which +grants the window function, or "OVER", syntax. A common function used with +window functions is the ``row_number()`` function which simply counts +rows. We may partition this row count against user name to number the +email addresses of individual users:: + + .. sourcecode:: pycon+sql + + >>> stmt = select( + ... func.row_number().over(partition_by=user_table.c.name), + ... user_table.c.name, + ... address_table.c.email_address).select_from(user_table).join(address_table) + >>> with engine.connect() as conn: + ... result = conn.execute(stmt) + ... print(result.all()) + {opensql} BEGIN (implicit) + SELECT row_number() OVER (PARTITION BY user_account.name) AS anon_1, user_account.name, address.email_address + FROM user_account JOIN address ON user_account.id = address.user_id + [...] () + [(1, 'sandy', 'sandy@sqlalchemy.org'), (2, 'sandy', 'sandy@squirrelpower.org'), (1, 'spongebob', 'spongebob@sqlalchemy.org')] + ROLLBACK + +Within :meth:`_functions.Function.over`, the :paramref:`_functions.Function.over.order_by` +and :paramref:`_functions.Function.over.partition_by` set up the ORDER BY and +PARTITION BY options. + +Special Modifiers WITHIN GROUP, FILTER +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The "WITHIN GROUP" SQL syntax is used in conjunction with an "ordered set" +or a "hypothetical set" aggregate +function. Common "ordered set" functions include ``percentile_cont()`` +and ``rank()``. SQLAlchemy includes built in implementations +:class:`_functions.rank`, :class:`_functions.dense_rank`, +:class:`_functions.mode`, :class:`_functions.percentile_cont` and +:class:`_functions.percentile_disc` which include a :meth:`_functions.FunctionElement.within_group` +method:: + + >>> print( + ... func.unnest( + ... func.percentile_disc([0.25,0.5,0.75,1]).within_group(user_table.c.name) + ... ) + ... ) + unnest(percentile_disc(:percentile_disc_1) WITHIN GROUP (ORDER BY user_account.name)) + +"FILTER" is supported by some backends to limit the range of an aggregate function to a +particular subset of rows compared to the total range of rows returned:: + + >>> stmt = select( + ... func.count(address_table.c.email_address).filter(user_table.c.name == 'sandy'), + ... func.count(address_table.c.email_address).filter(user_table.c.name == 'spongebob') + ... ).select_from(user_table).join(address_table) + >>> with engine.connect() as conn: + ... result = conn.execute(stmt) + ... print(result.all()) + {opensql}BEGIN (implicit) + SELECT count(address.email_address) FILTER (WHERE user_account.name = ?) AS anon_1, + count(address.email_address) FILTER (WHERE user_account.name = ?) AS anon_2 + FROM user_account JOIN address ON user_account.id = address.user_id + [...] ('sandy', 'spongebob') + [(2, 1)] + ROLLBACK + + +Table-Valued Functions +~~~~~~~~~~~~~~~~~~~~~~~~~ + +Table-valued SQL functions support a scalar representation that contains +named sub-elements. Often used for JSON functions as well as functions like +``generate_series()``, the table-valued function is specified in the FROM +clause, and is then referred towards as a table, or sometimes even as a +column. Functions of this form are prominent within the PostgreSQL database +and are also supported by SQLite. + +To provide support for these, SQLAlchemy provides the +:meth:`_functions.FunctionElement.alias` method, which will convert a +:func:`_sql.func` object into a FROM clause. From there, it can then +be referenced directly in the columns or WHERE clause of a SELECT using +the ``.c`` collection normally. When using the ``.c`` collection, the +names of columns desired should be passed to the original function using +the :paramref:`_functions.Function.table_valued` parameter:: + + >>> stmt = select( + ... func.json_each( + ... '["one", "two", "three"]', + ... ).table_valued("value").alias() + ... ) + >>> with engine.connect() as conn: + ... result = conn.execute(stmt) + ... print(result.all()) + BEGIN (implicit) + SELECT anon_1.value + FROM json_each(?) AS anon_1 + [...] ('["one", "two", "three"]',) + [('one',), ('two',), ('three',)] + ROLLBACK + +A scalar-valued function such as PostgreSQL's ``json_array_elements`` may +also be referred towards as itself in the columns clause, using the special +accessor ``.column``:: + + >>> fn = func.json_array_elements( + ... '[{"c1": 1, "c2": 2}, {"c1": 5, "c2": 10}]', + ... type_=JSON).alias("result_elem") + >>> stmt = select(fn.column['c1'], fn.column['c2']) + >>> print(stmt) + SELECT result_elem[:result_elem_1] AS anon_1, result_elem[:result_elem_2] AS anon_2 + FROM json_array_elements(:json_array_elements_1) AS result_elem + +Above, when we produce the ``json_array_elements()`` function, we provide +the :class:`_types.JSON` datatype as its return type, so that we may use +Python ``__getitem__()`` access on it, e.g. ``fn.column['c1']``. + .. rst-class:: core-header, orm-addin diff --git a/lib/sqlalchemy/dialects/oracle/base.py b/lib/sqlalchemy/dialects/oracle/base.py index 4754beebeb..70fe72ec4b 100644 --- a/lib/sqlalchemy/dialects/oracle/base.py +++ b/lib/sqlalchemy/dialects/oracle/base.py @@ -924,6 +924,19 @@ class OracleCompiler(compiler.SQLCompiler): else: return "" + def visit_function(self, func, **kw): + text = super(OracleCompiler, self).visit_function(func, **kw) + if kw.get("asfrom", False): + text = "TABLE (%s)" % func + return text + + def visit_table_valued_column(self, element, **kw): + text = super(OracleCompiler, self).visit_table_valued_column( + element, **kw + ) + text = "COLUMN_VALUE " + text + return text + def default_from(self): """Called when a ``SELECT`` statement has no froms, and no ``FROM`` clause is to be appended. diff --git a/lib/sqlalchemy/sql/coercions.py b/lib/sqlalchemy/sql/coercions.py index 05e0a4fcf0..b5b5082e6e 100644 --- a/lib/sqlalchemy/sql/coercions.py +++ b/lib/sqlalchemy/sql/coercions.py @@ -431,6 +431,20 @@ class ExpressionElementImpl(_ColumnCoercions, RoleImpl): except exc.ArgumentError as err: self._raise_for_expected(element, err=err) + def _raise_for_expected(self, element, argname=None, resolved=None, **kw): + if isinstance(element, roles.AnonymizedFromClauseRole): + advice = ( + "To create a " + "column expression from a FROM clause row " + "as a whole, use the .record() method." + ) + else: + advice = None + + return super(ExpressionElementImpl, self)._raise_for_expected( + element, argname=argname, resolved=resolved, advice=advice, **kw + ) + class BinaryElementImpl(ExpressionElementImpl, RoleImpl): @@ -597,6 +611,13 @@ class ColumnArgumentOrKeyImpl(_ReturnsStringKey, RoleImpl): __slots__ = () +class StrAsPlainColumnImpl(_CoerceLiterals, RoleImpl): + __slots__ = () + + def _text_coercion(self, element, argname=None): + return elements.ColumnClause(element) + + class ByOfImpl(_CoerceLiterals, _ColumnCoercions, RoleImpl, roles.ByOfRole): __slots__ = () diff --git a/lib/sqlalchemy/sql/compiler.py b/lib/sqlalchemy/sql/compiler.py index ef7db0bbe5..db13cff012 100644 --- a/lib/sqlalchemy/sql/compiler.py +++ b/lib/sqlalchemy/sql/compiler.py @@ -1718,13 +1718,18 @@ class SQLCompiler(Compiled): extract.expr._compiler_dispatch(self, **kwargs), ) + def visit_scalar_function_column(self, element, **kw): + compiled_fn = self.visit_function(element.fn, **kw) + compiled_col = self.visit_column(element, **kw) + return "(%s).%s" % (compiled_fn, compiled_col) + def visit_function(self, func, add_to_result_map=None, **kwargs): if add_to_result_map is not None: add_to_result_map(func.name, func.name, (), func.type) disp = getattr(self, "visit_%s_func" % func.name.lower(), None) if disp: - return disp(func, **kwargs) + text = disp(func, **kwargs) else: name = FUNCTIONS.get(func.__class__, None) if name: @@ -1739,7 +1744,7 @@ class SQLCompiler(Compiled): else name ) name = name + "%(expr)s" - return ".".join( + text = ".".join( [ ( self.preparer.quote(tok) @@ -1752,6 +1757,10 @@ class SQLCompiler(Compiled): + [name] ) % {"expr": self.function_argspec(func, **kwargs)} + if func._with_ordinality: + text += " WITH ORDINALITY" + return text + def visit_next_value_func(self, next_value, **kw): return self.visit_sequence(next_value.sequence) @@ -2527,6 +2536,27 @@ class SQLCompiler(Compiled): else: return self.preparer.format_alias(cte, cte_name) + def visit_table_valued_alias(self, element, **kw): + text = self.visit_alias(element, **kw) + if kw.get("asfrom") and element.named: + text += "(%s)" % ( + ", ".join( + "%s%s" + % ( + col.name, + " %s" + % self.dialect.type_compiler.process(col.type, **kw) + if not col.type._isnull + else "", + ) + for col in element.element.c + ) + ) + return text + + def visit_table_valued_column(self, element, **kw): + return self.visit_column(element, **kw) + def visit_alias( self, alias, diff --git a/lib/sqlalchemy/sql/elements.py b/lib/sqlalchemy/sql/elements.py index 5ea3526eaf..7dc60ce9d0 100644 --- a/lib/sqlalchemy/sql/elements.py +++ b/lib/sqlalchemy/sql/elements.py @@ -4317,11 +4317,82 @@ class Label(roles.LabeledColumnExprRole, ColumnElement): return self.key, e +class NamedColumn(ColumnElement): + is_literal = False + table = None + + def _compare_name_for_result(self, other): + return (hasattr(other, "name") and self.name == other.name) or ( + hasattr(other, "_label") and self._label == other._label + ) + + @util.memoized_property + def description(self): + if util.py3k: + return self.name + else: + return self.name.encode("ascii", "backslashreplace") + + @HasMemoized.memoized_attribute + def _key_label(self): + if self.key != self.name: + return self._gen_label(self.key) + else: + return self._label + + @HasMemoized.memoized_attribute + def _label(self): + return self._gen_label(self.name) + + @HasMemoized.memoized_attribute + def _render_label_in_columns_clause(self): + return True + + def _gen_label(self, name, dedupe_on_key=True): + return name + + def _bind_param(self, operator, obj, type_=None, expanding=False): + return BindParameter( + self.key, + obj, + _compared_to_operator=operator, + _compared_to_type=self.type, + type_=type_, + unique=True, + expanding=expanding, + ) + + def _make_proxy( + self, + selectable, + name=None, + name_is_truncatable=False, + disallow_is_literal=False, + **kw + ): + c = ColumnClause( + coercions.expect(roles.TruncatedLabelRole, name or self.name) + if name_is_truncatable + else (name or self.name), + type_=self.type, + _selectable=selectable, + is_literal=False, + ) + c._propagate_attrs = selectable._propagate_attrs + if name is None: + c.key = self.key + c._proxies = [self] + if selectable._is_clone_of is not None: + c._is_clone_of = selectable._is_clone_of.columns.get(c.key) + return c.key, c + + class ColumnClause( roles.DDLReferredColumnRole, roles.LabeledColumnExprRole, + roles.StrAsPlainColumnRole, Immutable, - ColumnElement, + NamedColumn, ): """Represents a column expression from any textual string. @@ -4360,6 +4431,9 @@ class ColumnClause( """ + table = None + is_literal = False + __visit_name__ = "column" _traverse_internals = [ @@ -4470,27 +4544,6 @@ class ColumnClause( self.type = type_api.to_instance(type_) self.is_literal = is_literal - def _compare_name_for_result(self, other): - if ( - self.is_literal - or self.table is None - or self.table._is_textual - or not hasattr(other, "proxy_set") - or ( - isinstance(other, ColumnClause) - and ( - other.is_literal - or other.table is None - or other.table._is_textual - ) - ) - ): - return (hasattr(other, "name") and self.name == other.name) or ( - hasattr(other, "_label") and self._label == other._label - ) - else: - return other.proxy_set.intersection(self.proxy_set) - def get_children(self, column_tables=False, **kw): # override base get_children() to not return the Table # or selectable that is parent to this column. Traversals @@ -4505,24 +4558,6 @@ class ColumnClause( else: return [] - @util.memoized_property - def description(self): - if util.py3k: - return self.name - else: - return self.name.encode("ascii", "backslashreplace") - - @HasMemoized.memoized_attribute - def _key_label(self): - if self.key != self.name: - return self._gen_label(self.key) - else: - return self._label - - @HasMemoized.memoized_attribute - def _label(self): - return self._gen_label(self.name) - @HasMemoized.memoized_attribute def _render_label_in_columns_clause(self): return self.table is not None @@ -4531,6 +4566,27 @@ class ColumnClause( def _ddl_label(self): return self._gen_label(self.name, dedupe_on_key=False) + def _compare_name_for_result(self, other): + if ( + self.is_literal + or self.table is None + or self.table._is_textual + or not hasattr(other, "proxy_set") + or ( + isinstance(other, ColumnClause) + and ( + other.is_literal + or other.table is None + or other.table._is_textual + ) + ) + ): + return (hasattr(other, "name") and self.name == other.name) or ( + hasattr(other, "_label") and self._label == other._label + ) + else: + return other.proxy_set.intersection(self.proxy_set) + def _gen_label(self, name, dedupe_on_key=True): t = self.table if self.is_literal: @@ -4575,17 +4631,6 @@ class ColumnClause( else: return name - def _bind_param(self, operator, obj, type_=None, expanding=False): - return BindParameter( - self.key, - obj, - _compared_to_operator=operator, - _compared_to_type=self.type, - type_=type_, - unique=True, - expanding=expanding, - ) - def _make_proxy( self, selectable, @@ -4627,6 +4672,46 @@ class ColumnClause( return c.key, c +class Record(NamedColumn): + _traverse_internals = [ + ("name", InternalTraversal.dp_anon_name), + ("type", InternalTraversal.dp_type), + ("fromclause", InternalTraversal.dp_clauseelement), + ] + + __visit_name__ = "column" + + @util.preload_module("sqlalchemy.sql.sqltypes") + def __init__(self, fromclause): + sqltypes = util.preloaded.sql_sqltypes + self.name = fromclause.name + self.fromclause = fromclause + self.type = sqltypes.RecordType() + + @property + def _from_objects(self): + return [self.fromclause] + + +class TableValuedColumn(NamedColumn): + __visit_name__ = "table_valued_column" + + _traverse_internals = [ + ("name", InternalTraversal.dp_anon_name), + ("type", InternalTraversal.dp_type), + ("scalar_alias", InternalTraversal.dp_clauseelement), + ] + + def __init__(self, scalar_alias): + self.scalar_alias = scalar_alias + self.key = self.name = scalar_alias.name + self.type = scalar_alias.element.type + + @property + def _from_objects(self): + return [self.scalar_alias] + + class CollationClause(ColumnElement): __visit_name__ = "collation" diff --git a/lib/sqlalchemy/sql/functions.py b/lib/sqlalchemy/sql/functions.py index a9ea98d047..f493b08db9 100644 --- a/lib/sqlalchemy/sql/functions.py +++ b/lib/sqlalchemy/sql/functions.py @@ -17,22 +17,25 @@ from . import sqltypes from . import util as sqlutil from .base import ColumnCollection from .base import Executable +from .base import Generative from .base import HasMemoized from .elements import _type_from_args from .elements import BinaryExpression from .elements import BindParameter from .elements import Cast from .elements import ClauseList +from .elements import ColumnClause from .elements import ColumnElement from .elements import Extract from .elements import FunctionFilter from .elements import Grouping from .elements import literal_column +from .elements import NamedColumn from .elements import Over from .elements import WithinGroup -from .selectable import Alias from .selectable import FromClause from .selectable import Select +from .selectable import TableValuedAlias from .visitors import InternalTraversal from .visitors import TraversibleType from .. import util @@ -63,7 +66,7 @@ def register_function(identifier, fn, package="_default"): reg[identifier] = fn -class FunctionElement(Executable, ColumnElement, FromClause): +class FunctionElement(Executable, ColumnElement, FromClause, Generative): """Base for SQL function-oriented constructs. .. seealso:: @@ -80,11 +83,19 @@ class FunctionElement(Executable, ColumnElement, FromClause): """ - _traverse_internals = [("clause_expr", InternalTraversal.dp_clauseelement)] + _traverse_internals = [ + ("clause_expr", InternalTraversal.dp_clauseelement), + ("_table_valued", InternalTraversal.dp_clauseelement_tuple), + ("_table_values_named", InternalTraversal.dp_boolean), + ("_with_ordinality", InternalTraversal.dp_boolean), + ] packagenames = () _has_args = False + _table_valued = None + _table_values_named = False + _with_ordinality = False def __init__(self, *clauses, **kwargs): r"""Construct a :class:`.FunctionElement`. @@ -123,6 +134,171 @@ class FunctionElement(Executable, ColumnElement, FromClause): self, multiparams, params, execution_options ) + def scalar_table_valued(self, name, type_=None): + """Return a column expression that's against this + :class:`_functions.FunctionElement` as a scalar + table-valued expression. + + The returned expression is similar to that returned by a single column + accessed off of a :meth:`_functions.FunctionElement.table_valued` + construct, except no FROM clause is generated; the function is rendered + in the similar way as a scalar subquery. + + E.g.:: + + >>> from sqlalchemy import func, select + >>> fn = func.jsonb_each("{'k', 'v'}").scalar_table_valued("key") + >>> print(select(fn)) + SELECT (jsonb_each(:jsonb_each_1)).key + + .. versionadded:: 1.4.0b2 + + .. seealso:: + + :meth:`_functions.FunctionElement.table_valued` + + :meth:`_functions.FunctionElement.named_table_valued` + + :meth:`_functions.FunctionElement.alias` + + :meth:`_functions.FunctionElement.column_valued` + + """ # noqa E501 + + return ScalarFunctionColumn(self, name, type_) + + def table_valued(self, *expr, **kw): + """Return a :class:`_sql.TableValuedAlias` representation of this + :class:`_functions.FunctionElement` with table-valued expressions added. + + e.g.:: + + >>> fn = ( + ... func.generate_series(1, 5). + ... table_valued("value", "start", "stop", "step") + ... ) + + >>> print(select(fn)) + SELECT anon_1.value, anon_1.start, anon_1.stop, anon_1.step + FROM generate_series(:generate_series_1, :generate_series_2) AS anon_1 + + >>> print(select(fn.c.value, fn.c.stop).where(fn.c.value > 2)) + SELECT anon_1.value, anon_1.stop + FROM generate_series(:generate_series_1, :generate_series_2) AS anon_1 + WHERE anon_1.value > :value_1 + + A WITH ORDINALITY expression may be generated by passing the keyword + argument "with_ordinality":: + + >>> fn = func.generate_series(4, 1, -1).table_valued("gen", with_ordinality="ordinality") + >>> print(select(fn)) + + .. versionadded:: 1.4.0b2 + + .. seealso:: + + :meth:`_functions.FunctionElement.table_valued` + + :meth:`_functions.FunctionElement.named_table_valued` + + :meth:`_functions.FunctionElement.alias` + + + """ # noqa 501 + + new_func = self._generate() + new_func._table_valued = [ + coercions.expect(roles.StrAsPlainColumnRole, elem) for elem in expr + ] + + with_ordinality = kw.pop("with_ordinality", None) + if with_ordinality: + new_func._table_valued += ( + coercions.expect(roles.StrAsPlainColumnRole, with_ordinality), + ) + new_func._with_ordinality = True + + return new_func.alias() + + def named_table_valued(self, *expr, **kw): + """Return a :class:`_sql.TableValuedAlias` representation of this + :class:`_functions.FunctionElement` with named table-valued + expressions added. + + E.g.:: + + + >>> fn = ( + ... func.json_to_recordset( + ... '[{"a":1,"b":"foo"},{"a":"2","c":"bar"}]' + ... ) + ... .named_table_valued(column("a", Integer), column("b", String)) + ... ) + >>> print(select(fn.c.a, fn.c.b)) + SELECT anon_1.a, anon_1.b + FROM json_to_recordset(:json_to_recordset_1) AS anon_1(a INTEGER, b VARCHAR) + + A WITH ORDINALITY expression may be generated by passing the keyword + argument "with_ordinality":: + + >>> fn = ( + ... func.json_object_keys('{"a1":"1","a2":"2","a3":"3"}') + ... .named_table_valued("keys", with_ordinality="n") + ... ) + + >>> print(select(fn)) + + .. seealso:: + + :meth:`_functions.FunctionElement.table_valued` + + :meth:`_functions.FunctionElement.named_table_valued` + + :meth:`_functions.FunctionElement.alias` + + + """ # noqa E501 + + new_func = self._generate() + new_func._table_valued = [ + coercions.expect(roles.StrAsPlainColumnRole, elem) for elem in expr + ] + with_ordinality = kw.pop("with_ordinality", None) + + if with_ordinality: + new_func._table_valued += ( + coercions.expect(roles.StrAsPlainColumnRole, with_ordinality), + ) + new_func._with_ordinality = True + + new_func._table_values_named = True + return new_func.alias() + + def column_valued(self, name=None): + """Return this :class:`_function.FunctionElement` as a column expression that + selects from itself as a FROM clause. + + E.g.:: + + >>> from sqlalchemy import select, func + >>> gs = func.generate_series(1, 5, -1).column_valued() + >>> print(select(gs)) + SELECT anon_1 + FROM generate_series(:generate_series_1, :generate_series_2, :generate_series_3) AS anon_1 + + This is shorthand for:: + + gs = func.generate_series(1, 5, -1).alias().column + + + .. seealso:: + + :meth:`_functions.FunctionElement.alias` + + """ # noqa 501 + + return self.alias(name=name).column + @property def columns(self): r"""The set of columns exported by this :class:`.FunctionElement`. @@ -142,8 +318,15 @@ class FunctionElement(Executable, ColumnElement, FromClause): """ - col = self.label(None) - return ColumnCollection(columns=[(col.key, col)]) + if self._table_valued: + cols = [ + ColumnClause(elem) if isinstance(elem, str) else elem + for elem in self._table_valued + ] + else: + cols = [self.label(None)] + + return ColumnCollection(columns=[(col.key, col) for col in cols]) @HasMemoized.memoized_attribute def clauses(self): @@ -305,37 +488,65 @@ class FunctionElement(Executable, ColumnElement, FromClause): return None - def alias(self, name=None, flat=False): + def alias(self, name=None): r"""Produce a :class:`_expression.Alias` construct against this :class:`.FunctionElement`. + .. note:: + + The :meth:`_functions.FunctionElement.alias` method is part of the + mechanism by which "table valued" SQL functions are created. + However, most use cases are covered by higher level methods on + :class:`_functions.FunctionElement` including + :meth:`_functions.FunctionElement.table_valued`, + :meth:`_functions.FunctionElement.named_table_valued`, and + :meth:`_functions.FunctionElement.column_valued`. + This construct wraps the function in a named alias which is suitable for the FROM clause, in the style accepted for example - by PostgreSQL. + by PostgreSQL. A column expression is also provided using the + special ``.column`` attribute, which may + be used to refer to the output of the function as a scalar value + in the columns or where clause, for a backend such as PostgreSQL. + + For a full table-valued expression, use the + :meth:`_function.FunctionElement.table_valued` method first to + establish named columns. e.g.:: - from sqlalchemy.sql import column + >>> from sqlalchemy import func, select, column + >>> data_view = func.unnest([1, 2, 3]).alias("data_view") + >>> print(select(data_view.column)) + SELECT data_view + FROM unnest(:unnest_1) AS data_view - stmt = select(column('data_view')).\ - select_from(SomeTable).\ - select_from(func.unnest(SomeTable.data).alias('data_view') - ) + The :meth:`_functions.FunctionElement.column_valued` method provides + a shortcut for the above pattern:: + + >>> data_view = func.unnest([1, 2, 3]).column_valued("data_view") + >>> print(select(data_view)) + SELECT data_view + FROM unnest(:unnest_1) AS data_view - Would produce: + .. versionadded:: 1.4.0b2 Added the ``.column`` accessor - .. sourcecode:: sql + .. seealso:: - SELECT data_view - FROM sometable, unnest(sometable.data) AS data_view + :meth:`_functions.FunctionElement.table_valued` + + :meth:`_functions.FunctionElement.named_table_valued` + + :meth:`_functions.FunctionElement.scalar_table_valued` + + :meth:`_functions.FunctionElement.column_valued` - .. versionadded:: 0.9.8 The :meth:`.FunctionElement.alias` method - is now supported. Previously, this method's behavior was - undefined and did not behave consistently across versions. """ - return Alias._construct(self, name) + return TableValuedAlias._construct( + self, name, named=self._table_values_named + ) def select(self): """Produce a :func:`_expression.select` construct @@ -441,6 +652,24 @@ class FunctionAsBinary(BinaryExpression): self.sql_function.clauses.clauses[self.right_index - 1] = value +class ScalarFunctionColumn(NamedColumn): + __visit_name__ = "scalar_function_column" + + _traverse_internals = [ + ("name", InternalTraversal.dp_anon_name), + ("type", InternalTraversal.dp_type), + ("fn", InternalTraversal.dp_clauseelement), + ] + + is_literal = False + table = None + + def __init__(self, fn, name, type_=None): + self.fn = fn + self.name = name + self.type = sqltypes.to_instance(type_) + + class _FunctionGenerator(object): """Generate SQL function expressions. @@ -586,10 +815,9 @@ class Function(FunctionElement): func.mypackage.some_function(col1, col2) - .. seealso:: - :ref:`coretutorial_functions` + :ref:`tutorial_functions` - in the :ref:`unified_tutorial` :data:`.func` - namespace which produces registered or ad-hoc :class:`.Function` instances. diff --git a/lib/sqlalchemy/sql/roles.py b/lib/sqlalchemy/sql/roles.py index 2c4ff75c4e..52743bd506 100644 --- a/lib/sqlalchemy/sql/roles.py +++ b/lib/sqlalchemy/sql/roles.py @@ -44,6 +44,10 @@ class ColumnArgumentOrKeyRole(ColumnArgumentRole): _role_name = "Column expression or string key" +class StrAsPlainColumnRole(ColumnArgumentRole): + _role_name = "Column expression or string key" + + class ColumnListRole(SQLRole): """Elements suitable for forming comma separated lists of expressions.""" diff --git a/lib/sqlalchemy/sql/selectable.py b/lib/sqlalchemy/sql/selectable.py index 8e478583f6..c299065ae8 100644 --- a/lib/sqlalchemy/sql/selectable.py +++ b/lib/sqlalchemy/sql/selectable.py @@ -51,6 +51,8 @@ from .elements import ColumnClause from .elements import GroupedElement from .elements import Grouping from .elements import literal_column +from .elements import Record +from .elements import TableValuedColumn from .elements import UnaryExpression from .visitors import InternalTraversal from .. import exc @@ -623,6 +625,33 @@ class FromClause(roles.AnonymizedFromClauseRole, Selectable): return Alias._construct(self, name) + def record(self): + """Return a :class:`_sql.Record` object for this + :class:`_expression.FromClause`. + + A "record" is a :class:`_sql.ColumnElement` that represents a complete + row in a table. Support for this construct is backend dependent, + however databases such as PostgreSQL and Oracle have support + for "record" datatypes. + + E.g.:: + + >>> from sqlalchemy import select, column, func, table + >>> a = table("a", column("id"), column("x"), column("y")) + >>> stmt = select(func.row_to_json(a.record())) + >>> print(stmt) + SELECT row_to_json(a) AS row_to_json_1 + FROM a + + .. versionadded:: 1.4.0b2 + + .. seealso:: + + :ref:`tutorial_functions` - in the :ref:`unified_tutorial` + + """ + return Record(self) + def tablesample(self, sampling, name=None, seed=None): """Return a TABLESAMPLE alias of this :class:`_expression.FromClause`. @@ -1583,6 +1612,44 @@ class Alias(roles.DMLTableRole, AliasedReturnsRows): ).alias(name=name, flat=flat) +class TableValuedAlias(Alias): + """An alias that includes the ability to be used in a columns context. + + Provides the :attr:`_sql.ScalarAlias.column` accessor that returns + a :class:`_sql.ColumnElement` representing this object. + + The main use case for this construct is that of PostgreSQL functions + that may be used in the FROM clause of a SELECT. + + .. versionadded:: 1.4.0b2 + + """ + + __visit_name__ = "table_valued_alias" + + named = False + + _traverse_internals = [ + ("element", InternalTraversal.dp_clauseelement), + ("name", InternalTraversal.dp_anon_name), + ("named", InternalTraversal.dp_boolean), + ] + + def _init(self, selectable, name=None, named=False): + self.named = named + super(TableValuedAlias, self)._init(selectable, name=name) + + @HasMemoized.memoized_attribute + def column(self): + """Return a column expression representing this + :class:`_sql.ScalarAlias`.""" + + return TableValuedColumn(self) + + def alias(self, name=None): + return self.element.alias(name=name) + + class Lateral(AliasedReturnsRows): """Represent a LATERAL subquery. diff --git a/lib/sqlalchemy/sql/sqltypes.py b/lib/sqlalchemy/sql/sqltypes.py index d20c8168d5..cac7d74a47 100644 --- a/lib/sqlalchemy/sql/sqltypes.py +++ b/lib/sqlalchemy/sql/sqltypes.py @@ -3110,6 +3110,10 @@ class NullType(TypeEngine): comparator_factory = Comparator +class RecordType(TypeEngine): + """Refers to a table record type.""" + + class MatchType(Boolean): """Refers to the return type of the MATCH operator. diff --git a/test/dialect/oracle/test_compiler.py b/test/dialect/oracle/test_compiler.py index f09346eb32..f12207d2b9 100644 --- a/test/dialect/oracle/test_compiler.py +++ b/test/dialect/oracle/test_compiler.py @@ -1481,3 +1481,34 @@ class RegexpTest(fixtures.TestBase, testing.AssertsCompiledSQL): "REGEXP_REPLACE(mytable.myid, :myid_1, :myid_2, mytable.name)", checkparams={"myid_1": "pattern", "myid_2": "replacement"}, ) + + +class TableValuedFunctionTest(fixtures.TestBase, testing.AssertsCompiledSQL): + __dialect__ = "oracle" + + def test_scalar_alias_column(self): + fn = func.scalar_strings(5) + stmt = select(fn.alias().column) + self.assert_compile( + stmt, + "SELECT COLUMN_VALUE anon_1 " + "FROM TABLE (scalar_strings(:scalar_strings_1)) anon_1", + ) + + def test_column_valued(self): + fn = func.scalar_strings(5) + stmt = select(fn.column_valued()) + self.assert_compile( + stmt, + "SELECT COLUMN_VALUE anon_1 " + "FROM TABLE (scalar_strings(:scalar_strings_1)) anon_1", + ) + + def test_table_valued(self): + fn = func.three_pairs().table_valued("string1", "string2") + stmt = select(fn.c.string1, fn.c.string2) + self.assert_compile( + stmt, + "SELECT anon_1.string1, anon_1.string2 " + "FROM TABLE (three_pairs()) anon_1", + ) diff --git a/test/dialect/oracle/test_dialect.py b/test/dialect/oracle/test_dialect.py index 2ec3068425..cce33d4a26 100644 --- a/test/dialect/oracle/test_dialect.py +++ b/test/dialect/oracle/test_dialect.py @@ -7,6 +7,7 @@ from sqlalchemy import Computed from sqlalchemy import create_engine from sqlalchemy import exc from sqlalchemy import Float +from sqlalchemy import func from sqlalchemy import Integer from sqlalchemy import literal_column from sqlalchemy import outparam @@ -946,3 +947,110 @@ class CXOracleConnectArgsTest(fixtures.TestBase): "auto_convert_lobs", False, ) + + +class TableValuedTest(fixtures.TestBase): + __backend__ = True + __only_on__ = "oracle" + + @testing.fixture + def scalar_strings(self, connection): + connection.exec_driver_sql( + "CREATE OR REPLACE TYPE strings_t IS TABLE OF VARCHAR2 (100)" + ) + connection.exec_driver_sql( + r""" +CREATE OR REPLACE FUNCTION scalar_strings ( + count_in IN INTEGER) + RETURN strings_t + AUTHID DEFINER +IS + l_strings strings_t := strings_t (); +BEGIN + l_strings.EXTEND (count_in); + + FOR indx IN 1 .. count_in + LOOP + l_strings (indx) := 'some string'; + END LOOP; + + RETURN l_strings; +END; + """ + ) + yield + connection.exec_driver_sql("DROP FUNCTION scalar_strings") + connection.exec_driver_sql("DROP TYPE strings_t") + + @testing.fixture + def two_strings(self, connection): + connection.exec_driver_sql( + """ +CREATE OR REPLACE TYPE two_strings_ot + AUTHID DEFINER IS OBJECT +( + string1 VARCHAR2 (10), + string2 VARCHAR2 (10) +)""" + ) + connection.exec_driver_sql( + """ + CREATE OR REPLACE TYPE two_strings_nt IS TABLE OF two_strings_ot +""" + ) + + connection.exec_driver_sql( + """ + CREATE OR REPLACE FUNCTION three_pairs + RETURN two_strings_nt + AUTHID DEFINER +IS + l_strings two_strings_nt; +BEGIN + RETURN two_strings_nt (two_strings_ot ('a', 'b'), + two_strings_ot ('c', 'd'), + two_strings_ot ('e', 'f')); +END; +""" + ) + yield + connection.exec_driver_sql("DROP FUNCTION three_pairs") + connection.exec_driver_sql("DROP TYPE two_strings_nt") + connection.exec_driver_sql("DROP TYPE two_strings_ot") + + def test_scalar_strings_control(self, scalar_strings, connection): + result = ( + connection.exec_driver_sql( + "SELECT COLUMN_VALUE my_string FROM TABLE (scalar_strings (5))" + ) + .scalars() + .all() + ) + eq_(result, ["some string"] * 5) + + def test_scalar_strings_named_control(self, scalar_strings, connection): + result = ( + connection.exec_driver_sql( + "SELECT COLUMN_VALUE anon_1 " + "FROM TABLE (scalar_strings (5)) anon_1" + ) + .scalars() + .all() + ) + eq_(result, ["some string"] * 5) + + def test_scalar_strings(self, scalar_strings, connection): + fn = func.scalar_strings(5) + result = connection.execute(select(fn.column_valued())).scalars().all() + eq_(result, ["some string"] * 5) + + def test_two_strings_control(self, two_strings, connection): + result = connection.exec_driver_sql( + "SELECT string1, string2 FROM TABLE (three_pairs ())" + ).all() + eq_(result, [("a", "b"), ("c", "d"), ("e", "f")]) + + def test_two_strings(self, two_strings, connection): + fn = func.three_pairs().table_valued("string1", "string2") + result = connection.execute(select(fn.c.string1, fn.c.string2)).all() + eq_(result, [("a", "b"), ("c", "d"), ("e", "f")]) diff --git a/test/dialect/postgresql/test_query.py b/test/dialect/postgresql/test_query.py index 47f86e791f..a2c152501d 100644 --- a/test/dialect/postgresql/test_query.py +++ b/test/dialect/postgresql/test_query.py @@ -3,11 +3,13 @@ import datetime from sqlalchemy import and_ +from sqlalchemy import cast from sqlalchemy import Column from sqlalchemy import Date from sqlalchemy import DateTime from sqlalchemy import exc from sqlalchemy import extract +from sqlalchemy import Float from sqlalchemy import ForeignKey from sqlalchemy import func from sqlalchemy import Integer @@ -22,8 +24,10 @@ from sqlalchemy import Table from sqlalchemy import testing from sqlalchemy import text from sqlalchemy import Time +from sqlalchemy import true from sqlalchemy import tuple_ from sqlalchemy.dialects import postgresql +from sqlalchemy.dialects.postgresql import JSONB from sqlalchemy.testing import assert_raises from sqlalchemy.testing import AssertsCompiledSQL from sqlalchemy.testing import AssertsExecutionResults @@ -1097,3 +1101,229 @@ class ExtractTest(fixtures.TablesTest): datetime.timedelta(days=5) + t.c.dtme, overrides={"day": 15, "epoch": 1337084125.0}, ) + + +class TableValuedRoundTripTest(fixtures.TestBase): + __backend__ = True + __only_on__ = "postgresql" + + def test_generate_series_scalar(self, connection): + x = func.generate_series(1, 2).alias("x") + y = func.generate_series(1, 2).alias("y") + + stmt = select(x.column, y.column).join_from(x, y, true()) + + eq_(connection.execute(stmt).all(), [(1, 1), (1, 2), (2, 1), (2, 2)]) + + def test_aggregate_scalar_over_table_valued(self, metadata, connection): + test = Table( + "test", metadata, Column("id", Integer), Column("data", JSONB) + ) + test.create(connection) + + connection.execute( + test.insert(), + [ + {"id": 1, "data": {"key": [23.7, 108.17, 55.98]}}, + {"id": 2, "data": {"key": [2.320, 9.55]}}, + {"id": 3, "data": {"key": [10.5, 6]}}, + ], + ) + + elem = ( + func.jsonb_array_elements_text(test.c.data["key"]) + .table_valued("value") + .alias("elem") + ) + + maxdepth = select(func.max(cast(elem.c.value, Float))).label( + "maxdepth" + ) + + stmt = select(test.c.id.label("test_id"), maxdepth).order_by( + "maxdepth" + ) + + eq_( + connection.execute(stmt).all(), [(2, 9.55), (3, 10.5), (1, 108.17)] + ) + + @testing.fixture + def assets_transactions(self, metadata, connection): + assets_transactions = Table( + "assets_transactions", + metadata, + Column("id", Integer), + Column("contents", JSONB), + ) + assets_transactions.create(connection) + connection.execute( + assets_transactions.insert(), + [ + {"id": 1, "contents": {"k1": "v1"}}, + {"id": 2, "contents": {"k2": "v2"}}, + {"id": 3, "contents": {"k3": "v3"}}, + ], + ) + return assets_transactions + + def test_scalar_table_valued(self, assets_transactions, connection): + stmt = select( + assets_transactions.c.id, + func.jsonb_each( + assets_transactions.c.contents, type_=JSONB + ).scalar_table_valued("key"), + func.jsonb_each( + assets_transactions.c.contents, type_=JSONB + ).scalar_table_valued("value"), + ) + + eq_( + connection.execute(stmt).all(), + [(1, "k1", "v1"), (2, "k2", "v2"), (3, "k3", "v3")], + ) + + def test_table_valued(self, assets_transactions, connection): + + jb = func.jsonb_each(assets_transactions.c.contents).table_valued( + "key", "value" + ) + + stmt = select(assets_transactions.c.id, jb.c.key, jb.c.value).join( + jb, true() + ) + eq_( + connection.execute(stmt).all(), + [(1, "k1", "v1"), (2, "k2", "v2"), (3, "k3", "v3")], + ) + + @testing.fixture + def axy_table(self, metadata, connection): + a = Table( + "a", + metadata, + Column("id", Integer), + Column("x", Integer), + Column("y", Integer), + ) + a.create(connection) + connection.execute( + a.insert(), + [ + {"id": 1, "x": 5, "y": 4}, + {"id": 2, "x": 15, "y": 3}, + {"id": 3, "x": 7, "y": 9}, + ], + ) + + return a + + def test_function_against_table_record(self, axy_table, connection): + """ + SELECT row_to_json(anon_1) AS row_to_json_1 + FROM (SELECT a.id AS id, a.x AS x, a.y AS y + FROM a) AS anon_1 + + """ + + stmt = select(func.row_to_json(axy_table.record())) + + eq_( + connection.execute(stmt).scalars().all(), + [ + {"id": 1, "x": 5, "y": 4}, + {"id": 2, "x": 15, "y": 3}, + {"id": 3, "x": 7, "y": 9}, + ], + ) + + def test_function_against_subq_record(self, axy_table, connection): + """ + SELECT row_to_json(anon_1) AS row_to_json_1 + FROM (SELECT a.id AS id, a.x AS x, a.y AS y + FROM a) AS anon_1 + + """ + + stmt = select(func.row_to_json(axy_table.select().subquery().record())) + + eq_( + connection.execute(stmt).scalars().all(), + [ + {"id": 1, "x": 5, "y": 4}, + {"id": 2, "x": 15, "y": 3}, + {"id": 3, "x": 7, "y": 9}, + ], + ) + + def test_function_against_row_constructor(self, connection): + + stmt = select(func.row_to_json(func.row(1, "foo"))) + + eq_(connection.scalar(stmt), {"f1": 1, "f2": "foo"}) + + def test_with_ordinality_named(self, connection): + + stmt = select( + func.generate_series(4, 1, -1).named_table_valued( + "gs", with_ordinality="ordinality" + ) + ) + + eq_(connection.execute(stmt).all(), [(4, 1), (3, 2), (2, 3), (1, 4)]) + + def test_with_ordinality_star(self, connection): + + stmt = select("*").select_from( + func.generate_series(4, 1, -1).table_valued( + with_ordinality="ordinality" + ) + ) + + eq_(connection.execute(stmt).all(), [(4, 1), (3, 2), (2, 3), (1, 4)]) + + def test_plain_old_unnest(self, connection): + fn = func.unnest( + postgresql.array(["one", "two", "three", "four"]) + ).column_valued() + + stmt = select(fn) + + eq_( + connection.execute(stmt).all(), + [("one",), ("two",), ("three",), ("four",)], + ) + + def test_unnest_with_ordinality(self, connection): + + array_val = postgresql.array( + [postgresql.array([14, 41, 7]), postgresql.array([54, 9, 49])] + ) + stmt = select("*").select_from( + func.unnest(array_val) + .named_table_valued("elts", with_ordinality="num") + .alias("t") + ) + eq_( + connection.execute(stmt).all(), + [(14, 1), (41, 2), (7, 3), (54, 4), (9, 5), (49, 6)], + ) + + def test_unnest_with_ordinality_named(self, connection): + + array_val = postgresql.array( + [postgresql.array([14, 41, 7]), postgresql.array([54, 9, 49])] + ) + + fn = ( + func.unnest(array_val) + .named_table_valued("elts", with_ordinality="num") + .alias("t") + ) + + stmt = select(fn.c.elts, fn.c.num) + + eq_( + connection.execute(stmt).all(), + [(14, 1), (41, 2), (7, 3), (54, 4), (9, 5), (49, 6)], + ) diff --git a/test/requirements.py b/test/requirements.py index 3c9b39ac71..0098283b3f 100644 --- a/test/requirements.py +++ b/test/requirements.py @@ -956,6 +956,13 @@ class DefaultRequirements(SuiteRequirements): return skip_if(["mssql", "sqlite"]) + @property + def tuple_valued_builtin_functions(self): + return only_on( + lambda config: self._sqlite_json(config) + or against(config, "postgresql") + ) + @property def array_type(self): return only_on( diff --git a/test/sql/test_compare.py b/test/sql/test_compare.py index 1ac3613f74..716ff107ab 100644 --- a/test/sql/test_compare.py +++ b/test/sql/test_compare.py @@ -272,6 +272,31 @@ class CoreFixtures(object): func.next_value(Sequence("q")), func.next_value(Sequence("p")), ), + lambda: ( + func.json_to_recordset("{foo}"), + func.json_to_recordset("{foo}").table_valued("a", "b"), + func.jsonb_to_recordset("{foo}").table_valued("a", "b"), + func.json_to_recordset("{foo}").named_table_valued("a", "b"), + func.json_to_recordset("{foo}").named_table_valued( + "a", with_ordinality="b" + ), + func.json_to_recordset("{foo}").named_table_valued( + "a", with_ordinality="c" + ), + func.json_to_recordset("{foo}").named_table_valued( + column("a", Integer), column("b", String) + ), + func.json_to_recordset("{foo}").named_table_valued("b", "c"), + func.json_to_recordset("{foo}") + .named_table_valued("a", "b") + .alias("foo"), + func.json_to_recordset("{foo}") + .table_valued("a", "b") + .alias("foo"), + func.json_to_recordset("{foo}").column_valued(), + func.json_to_recordset("{foo}").scalar_table_valued("foo"), + ), + lambda: (table_a.record(), table_b.record()), lambda: (True_(), False_()), lambda: (Null(),), lambda: (ReturnTypeFromArgs("foo"), ReturnTypeFromArgs(5)), diff --git a/test/sql/test_functions.py b/test/sql/test_functions.py index 50f50f0f0f..59accd245b 100644 --- a/test/sql/test_functions.py +++ b/test/sql/test_functions.py @@ -5,12 +5,15 @@ import decimal from sqlalchemy import ARRAY from sqlalchemy import bindparam from sqlalchemy import Boolean +from sqlalchemy import cast from sqlalchemy import Column from sqlalchemy import Date from sqlalchemy import DateTime from sqlalchemy import extract +from sqlalchemy import Float from sqlalchemy import func from sqlalchemy import Integer +from sqlalchemy import JSON from sqlalchemy import literal from sqlalchemy import literal_column from sqlalchemy import Numeric @@ -20,6 +23,7 @@ from sqlalchemy import sql from sqlalchemy import String from sqlalchemy import Table from sqlalchemy import testing +from sqlalchemy import true from sqlalchemy import types as sqltypes from sqlalchemy import util from sqlalchemy.dialects import mysql @@ -1148,3 +1152,431 @@ class RegisterTest(fixtures.TestBase, AssertsCompiledSQL): assert "not_registered_func" not in functions._registry["_default"] assert isinstance(func.not_registered_func_child().type, Integer) + + +class TableValuedCompileTest(fixtures.TestBase, AssertsCompiledSQL): + """test the full set of functions as FROM developed in [ticket:3566]""" + + __dialect__ = "default_enhanced" + + def test_aggregate_scalar_over_table_valued(self): + test = table("test", column("id"), column("data", JSON)) + + elem = ( + func.json_array_elements_text(test.c.data["key"]) + .table_valued("value") + .alias("elem") + ) + + maxdepth = select(func.max(cast(elem.c.value, Float))).label( + "maxdepth" + ) + + stmt = select(test.c.id.label("test_id"), maxdepth).order_by( + "maxdepth" + ) + + self.assert_compile( + stmt, + "SELECT test.id AS test_id, " + "(SELECT max(CAST(elem.value AS FLOAT)) AS max_1 " + "FROM json_array_elements_text(test.data[:data_1]) AS elem) " + "AS maxdepth " + "FROM test ORDER BY maxdepth", + ) + + def test_scalar_table_valued(self): + assets_transactions = table( + "assets_transactions", column("id"), column("contents", JSON) + ) + + stmt = select( + assets_transactions.c.id, + func.jsonb_each( + assets_transactions.c.contents + ).scalar_table_valued("key"), + func.jsonb_each( + assets_transactions.c.contents + ).scalar_table_valued("value"), + ) + self.assert_compile( + stmt, + "SELECT assets_transactions.id, " + "(jsonb_each(assets_transactions.contents)).key, " + "(jsonb_each(assets_transactions.contents)).value " + "FROM assets_transactions", + ) + + def test_table_valued_one(self): + assets_transactions = table( + "assets_transactions", column("id"), column("contents", JSON) + ) + + jb = func.jsonb_each(assets_transactions.c.contents).table_valued( + "key", "value" + ) + + stmt = select(assets_transactions.c.id, jb.c.key, jb.c.value).join( + jb, true() + ) + + self.assert_compile( + stmt, + "SELECT assets_transactions.id, anon_1.key, anon_1.value " + "FROM assets_transactions " + "JOIN jsonb_each(assets_transactions.contents) AS anon_1 ON true", + ) + + def test_table_valued_two(self): + """ + SELECT vi.id, vv.value + FROM value_ids() AS vi JOIN values AS vv ON vv.id = vi.id + + """ + + values = table( + "values", + column( + "id", + Integer, + ), + column("value", String), + ) + vi = func.value_ids().table_valued(column("id", Integer)).alias("vi") + vv = values.alias("vv") + + stmt = select(vi.c.id, vv.c.value).select_from( # noqa + vi.join(vv, vv.c.id == vi.c.id) + ) + self.assert_compile( + stmt, + "SELECT vi.id, vv.value FROM value_ids() AS vi " + "JOIN values AS vv ON vv.id = vi.id", + ) + + def test_table_as_record(self): + a = table( + "a", + column("id"), + column("x"), + column("y"), + ) + + stmt = select(func.row_to_json(a.record())) + + self.assert_compile( + stmt, "SELECT row_to_json(a) AS row_to_json_1 FROM a" + ) + + def test_subquery_as_record(self): + """ + SELECT row_to_json(anon_1) AS row_to_json_1 + FROM (SELECT a.id AS id, a.x AS x, a.y AS y + FROM a) AS anon_1 + + """ + + a = table( + "a", + column("id"), + column("x"), + column("y"), + ) + + stmt = select(func.row_to_json(a.select().subquery().record())) + + self.assert_compile( + stmt, + "SELECT row_to_json(anon_1) AS row_to_json_1 FROM " + "(SELECT a.id AS id, a.x AS x, a.y AS y FROM a) AS anon_1", + ) + + def test_scalar_subquery(self): + + a = table( + "a", + column("id"), + column("x"), + column("y"), + ) + + stmt = select(func.row_to_json(a.select().scalar_subquery())) + + self.assert_compile( + stmt, + "SELECT row_to_json((SELECT a.id, a.x, a.y FROM a)) " + "AS row_to_json_1", + ) + + def test_named_with_ordinality(self): + """ + SELECT a.id AS a_id, a.refs AS a_refs, + unnested.unnested AS unnested_unnested, + unnested.ordinality AS unnested_ordinality, + b.id AS b_id, b.ref AS b_ref + FROM a LEFT OUTER JOIN unnest(a.refs) + `WITH ORDINALITY AS unnested(unnested, ordinality) ON true + LEFT OUTER JOIN b ON unnested.unnested = b.ref + + """ # noqa 501 + + a = table("a", column("id"), column("refs")) + b = table("b", column("id"), column("ref")) + + unnested = ( + func.unnest(a.c.refs) + .named_table_valued("unnested", with_ordinality="ordinality") + .alias("unnested") + ) + + stmt = ( + select( + a.c.id, a.c.refs, unnested.c.unnested, unnested.c.ordinality + ) + .outerjoin(unnested, true()) + .outerjoin( + b, + unnested.c.unnested == b.c.ref, + ) + ) + self.assert_compile( + stmt, + "SELECT a.id, a.refs, unnested.unnested, unnested.ordinality " + "FROM a " + "LEFT OUTER JOIN unnest(a.refs) " + "WITH ORDINALITY AS unnested(unnested, ordinality) ON true " + "LEFT OUTER JOIN b ON unnested.unnested = b.ref", + ) + + def test_star_with_ordinality(self): + """ + SELECT * FROM generate_series(4,1,-1) WITH ORDINALITY; + """ + + stmt = select("*").select_from( # noqa + func.generate_series(4, 1, -1).table_valued( + with_ordinality="ordinality" + ) + ) + self.assert_compile( + stmt, + "SELECT * FROM generate_series" + "(:generate_series_1, :generate_series_2, :generate_series_3) " + "WITH ORDINALITY AS anon_1", + ) + + def test_json_object_keys_with_ordinality(self): + """ + SELECT * FROM json_object_keys('{"a1":"1","a2":"2","a3":"3"}') + WITH ORDINALITY AS t(keys, n); + """ + stmt = select("*").select_from( + func.json_object_keys( + literal({"a1": "1", "a2": "2", "a3": "3"}, type_=JSON) + ) + .named_table_valued("keys", with_ordinality="n") + .alias("t") + ) + + self.assert_compile( + stmt, + "SELECT * FROM json_object_keys(:param_1) " + "WITH ORDINALITY AS t(keys, n)", + ) + + def test_alias_column(self): + """ + + :: + + SELECT x, y + FROM + generate_series(:generate_series_1, :generate_series_2) AS x, + generate_series(:generate_series_3, :generate_series_4) AS y + + """ + + x = func.generate_series(1, 2).alias("x") + y = func.generate_series(3, 4).alias("y") + stmt = select(x.column, y.column) + + self.assert_compile( + stmt, + "SELECT x, y FROM " + "generate_series(:generate_series_1, :generate_series_2) AS x, " + "generate_series(:generate_series_3, :generate_series_4) AS y", + ) + + def test_column_valued_one(self): + fn = func.unnest(["one", "two", "three", "four"]).column_valued() + + stmt = select(fn) + + self.assert_compile( + stmt, "SELECT anon_1 FROM unnest(:unnest_1) AS anon_1" + ) + + def test_column_valued_two(self): + """ + + :: + + SELECT x, y + FROM + generate_series(:generate_series_1, :generate_series_2) AS x, + generate_series(:generate_series_3, :generate_series_4) AS y + + """ + + x = func.generate_series(1, 2).column_valued("x") + y = func.generate_series(3, 4).column_valued("y") + stmt = select(x, y) + + self.assert_compile( + stmt, + "SELECT x, y FROM " + "generate_series(:generate_series_1, :generate_series_2) AS x, " + "generate_series(:generate_series_3, :generate_series_4) AS y", + ) + + def test_column_valued_subquery(self): + x = func.generate_series(1, 2).column_valued("x") + y = func.generate_series(3, 4).column_valued("y") + subq = select(x, y).subquery() + stmt = select(subq).where(subq.c.x > 2) + + self.assert_compile( + stmt, + "SELECT anon_1.x, anon_1.y FROM " + "(SELECT x, y FROM " + "generate_series(:generate_series_1, :generate_series_2) AS x, " + "generate_series(:generate_series_3, :generate_series_4) AS y" + ") AS anon_1 " + "WHERE anon_1.x > :x_1", + ) + + def test_ten(self): + """ + # this is the "record" type + + SELECT + table1.user_id AS table1_user_id, + table2.name AS table2_name, + jsonb_table.name AS jsonb_table_name, + count(jsonb_table.time) AS count_1 + FROM table1 + JOIN table2 ON table1.user_id = table2.id + JOIN LATERAL jsonb_to_recordset(table1.jsonb) AS jsonb_table(name TEXT, time FLOAT) ON true + WHERE + table2.route_id = %(route_id_1)s + AND table1.list_id IN (%(list_id_1)s, %(list_id_2)s, %(list_id_3)s) + AND jsonb_table.name IN (%(name_1)s, %(name_2)s, %(name_3)s) + GROUP BY table1.user_id, table2.name, jsonb_table.name + ORDER BY table2.name + + """ # noqa + + def test_function_alias(self): + """ + :: + + SELECT result_elem -> 'Field' as field + FROM "check" AS check_, json_array_elements( + ( + SELECT check_inside.response -> 'Results' + FROM "check" as check_inside + WHERE check_inside.id = check_.id + ) + ) AS result_elem + WHERE result_elem ->> 'Name' = 'FooBar' + + """ + check = table("check", column("id"), column("response", JSON)) + + check_inside = check.alias("check_inside") + check_outside = check.alias("_check") + + subq = ( + select(check_inside.c.response["Results"]) + .where(check_inside.c.id == check_outside.c.id) + .scalar_subquery() + ) + + fn = func.json_array_elements(subq, type_=JSON).alias("result_elem") + + stmt = ( + select(fn.column["Field"].label("field")) + .where(fn.column["Name"] == "FooBar") + .select_from(check_outside) + ) + + self.assert_compile( + stmt, + "SELECT result_elem[:result_elem_1] AS field " + "FROM json_array_elements(" + "(SELECT check_inside.response[:response_1] AS anon_1 " + 'FROM "check" AS check_inside ' + "WHERE check_inside.id = _check.id)" + ') AS result_elem, "check" AS _check ' + "WHERE result_elem[:result_elem_2] = :param_1", + ) + + def test_named_table_valued(self): + + fn = func.json_to_recordset( # noqa + '[{"a":1,"b":"foo"},{"a":"2","c":"bar"}]' + ).named_table_valued(column("a", Integer), column("b", String)) + + stmt = select(fn.c.a, fn.c.b) + + self.assert_compile( + stmt, + "SELECT anon_1.a, anon_1.b " + "FROM json_to_recordset(:json_to_recordset_1) " + "AS anon_1(a INTEGER, b VARCHAR)", + ) + + def test_named_table_valued_subquery(self): + + fn = func.json_to_recordset( # noqa + '[{"a":1,"b":"foo"},{"a":"2","c":"bar"}]' + ).named_table_valued(column("a", Integer), column("b", String)) + + stmt = select(fn.c.a, fn.c.b).subquery() + + stmt = select(stmt) + + self.assert_compile( + stmt, + "SELECT anon_1.a, anon_1.b FROM " + "(SELECT anon_2.a AS a, anon_2.b AS b " + "FROM json_to_recordset(:json_to_recordset_1) " + "AS anon_2(a INTEGER, b VARCHAR)" + ") AS anon_1", + ) + + def test_named_table_valued_alias(self): + + """select * from json_to_recordset + ('[{"a":1,"b":"foo"},{"a":"2","c":"bar"}]') as x(a int, b text);""" + + fn = ( + func.json_to_recordset( # noqa + '[{"a":1,"b":"foo"},{"a":"2","c":"bar"}]' + ) + .named_table_valued(column("a", Integer), column("b", String)) + .alias("jbr") + ) + + stmt = select(fn.c.a, fn.c.b) + + self.assert_compile( + stmt, + "SELECT jbr.a, jbr.b " + "FROM json_to_recordset(:json_to_recordset_1) " + "AS jbr(a INTEGER, b VARCHAR)", + ) + + # continuing from + # https://paquier.xyz/postgresql-2/postgres-9-4-feature-highlight-with-ordinality/ + # https://github.com/sqlalchemy/sqlalchemy/issues/3566 diff --git a/test/sql/test_roles.py b/test/sql/test_roles.py index 0ef90e89e9..3e6f0ecfc3 100644 --- a/test/sql/test_roles.py +++ b/test/sql/test_roles.py @@ -162,6 +162,26 @@ class RoleTest(fixtures.TestBase): select(column("q")).alias(), ) + def test_record_advice(self): + msg = ( + r"SQL expression element expected, got %s. To create a " + r"column expression from a FROM clause row as a whole, " + r"use the .record\(\) method." + ) + assert_raises_message( + exc.ArgumentError, + msg % ("Table.*",), + expect, + roles.ExpressionElementRole, + t, + ) + + # no record() message here right now, it goes to scalar subquery + with testing.expect_warnings( + "implicitly coercing SELECT object to scalar subquery" + ): + expect(roles.ExpressionElementRole, t.select().alias()) + def test_statement_no_text_coercion(self): assert_raises_message( exc.ArgumentError,