]> git.ipfire.org Git - thirdparty/sqlalchemy/sqlalchemy.git/commitdiff
Add TABLESAMPLE clause support.
authorsaarni <saarni@gmail.com>
Thu, 26 May 2016 14:15:24 +0000 (10:15 -0400)
committerMike Bayer <mike_mp@zzzcomputing.com>
Wed, 15 Jun 2016 18:42:08 +0000 (14:42 -0400)
The TABLESAMPLE clause allows randomly selecting an approximate percentage
of rows from a table. At least DB2, Microsoft SQL Server and recent
Postgresql support this standard clause.

Fixes: #3718
Change-Id: I3fb8b9223e12a57100df30876b461884c58d72fa
Pull-request: https://github.com/zzzeek/sqlalchemy/pull/277

doc/build/changelog/changelog_11.rst
doc/build/changelog/migration_11.rst
doc/build/core/selectable.rst
lib/sqlalchemy/__init__.py
lib/sqlalchemy/sql/__init__.py
lib/sqlalchemy/sql/compiler.py
lib/sqlalchemy/sql/expression.py
lib/sqlalchemy/sql/selectable.py
test/sql/test_tablesample.py [new file with mode: 0644]

index 7194ac116866958bb5225c3f40287ac5dbb77783..534c3993c7cccf1b2208d59696b449009ce6b176 100644 (file)
 .. changelog::
     :version: 1.1.0b1
 
+    .. change::
+        :tags: feature, sql
+        :tickets: 3718
+
+        Added TABLESAMPLE support via the new :meth:`.FromClause.tablesample`
+        method and standalone function.  Pull request courtesy Ilja Everilä.
+
+        .. seealso::
+
+            :ref:`change_3718`
+
     .. change::
         :tags: feature, orm, ext
 
index 766b06f2e1538324920a42fea3501752442e26b0..f6e04225db09e7676d6378f0fb64109b6a534eaf 100644 (file)
@@ -1150,6 +1150,32 @@ selectable, e.g. lateral correlation::
 
 :ticket:`2857`
 
+.. _change_3718:
+
+Support for TABLESAMPLE
+-----------------------
+
+The SQL standard TABLESAMPLE can be rendered using the
+:meth:`.FromClause.tablesample` method, which returns a :class:`.TableSample`
+construct similar to an alias::
+
+    from sqlalchemy import func
+
+    selectable = people.tablesample(
+                func.bernoulli(1),
+                name='alias',
+                seed=func.random())
+    stmt = select([selectable.c.people_id])
+
+Assuming ``people`` with a column ``people_id``, the above
+statement would render as::
+
+    SELECT alias.people_id FROM
+    people AS alias TABLESAMPLE bernoulli(:bernoulli_1)
+    REPEATABLE (random())
+
+:ticket:`3718`
+
 .. _change_3216:
 
 The ``.autoincrement`` directive is no longer implicitly enabled for a composite primary key column
index 3f4d9565ec434177ee46a10fec5d205de38c24b1..f2804f9c2593bedf29fabc28e56e99f6e6f08eb2 100644 (file)
@@ -33,6 +33,8 @@ elements are themselves :class:`.ColumnElement` subclasses).
 
 .. autofunction:: sqlalchemy.sql.expression.table
 
+.. autofunction:: tablesample
+
 .. autofunction:: union
 
 .. autofunction:: union_all
@@ -91,6 +93,10 @@ elements are themselves :class:`.ColumnElement` subclasses).
    :members:
    :inherited-members:
 
+.. autoclass:: TableSample
+   :members:
+   :inherited-members:
+
 .. autoclass:: TextAsFrom
    :members:
    :inherited-members:
index b1d240edf92bf2990ac925d00feb1f9f034c8fab..eca9f9c7f32884ad3a3010108abad9b105a8ea4f 100644 (file)
@@ -45,6 +45,7 @@ from .sql import (
     select,
     subquery,
     table,
+    tablesample,
     text,
     true,
     tuple_,
index 7f7abacc2478e6dc3f428915ef258f804814b2b3..a9b842e1aa01475ab9b02aa7d08ede961b1d223d 100644 (file)
@@ -18,6 +18,7 @@ from .expression import (
     Select,
     Selectable,
     TableClause,
+    TableSample,
     Update,
     alias,
     and_,
@@ -59,6 +60,7 @@ from .expression import (
     select,
     subquery,
     table,
+    tablesample,
     text,
     true,
     True_,
index 94c7db20a9d70644a322f22a2d6ee81259b406bc..5e537dfdc3daec0faa3ec6c615d65081a8553019 100644 (file)
@@ -1351,6 +1351,17 @@ class SQLCompiler(Compiled):
         kw['lateral'] = True
         return "LATERAL %s" % self.visit_alias(lateral, **kw)
 
+    def visit_tablesample(self, tablesample, asfrom=False, **kw):
+        text = "%s TABLESAMPLE %s" % (
+            self.visit_alias(tablesample, asfrom=True, **kw),
+            tablesample._get_method()._compiler_dispatch(self, **kw))
+
+        if tablesample.seed is not None:
+            text += " REPEATABLE (%s)" % (
+                tablesample.seed._compiler_dispatch(self, **kw))
+
+        return text
+
     def get_render_as_alias_suffix(self, alias_name_text):
         return " AS " + alias_name_text
 
index 97f74d4e4be08437af7708792a10538fd4df627d..cbb123ec63967386bfed4a9fad376ac32dfa2d81 100644 (file)
@@ -26,7 +26,8 @@ __all__ = [
     'nullslast',
     'or_', 'outparam', 'outerjoin', 'over', 'select', 'subquery',
     'table', 'text',
-    'tuple_', 'type_coerce', 'union', 'union_all', 'update', 'within_group']
+    'tuple_', 'type_coerce', 'union', 'union_all', 'update', 'within_group',
+    'TableSample', 'tablesample']
 
 
 from .visitors import Visitable
@@ -49,7 +50,7 @@ from .base import ColumnCollection, Generative, Executable, \
 from .selectable import Alias, Join, Select, Selectable, TableClause, \
     CompoundSelect, CTE, FromClause, FromGrouping, Lateral, SelectBase, \
     alias, GenerativeSelect, subquery, HasCTE, HasPrefixes, HasSuffixes, \
-    lateral, Exists, ScalarSelect, TextAsFrom
+    lateral, Exists, ScalarSelect, TextAsFrom, TableSample, tablesample
 
 
 from .dml import Insert, Update, Delete, UpdateBase, ValuesBase
index ac955a60f718b70990f0866df3ebf087f9c4bcc0..e62aa1e8e9f673a66d16972d518a194193b57df4 100644 (file)
@@ -183,6 +183,51 @@ def lateral(selectable, name=None):
     return selectable.lateral(name=name)
 
 
+def tablesample(selectable, sampling, name=None, seed=None):
+    """Return a :class:`.TableSample` object.
+
+    :class:`.TableSample` is an :class:`.Alias` subclass that represents
+    a table with the TABLESAMPLE clause applied to it.
+    :func:`~.expression.tablesample`
+    is also available from the :class:`.FromClause` class via the
+    :meth:`.FromClause.tablesample` method.
+
+    The TABLESAMPLE clause allows selecting a randomly selected approximate
+    percentage of rows from a table. It supports multiple sampling methods,
+    most commonly BERNOULLI and SYSTEM.
+
+    e.g.::
+
+        from sqlalchemy import func
+
+        selectable = people.tablesample(
+                    func.bernoulli(1),
+                    name='alias',
+                    seed=func.random())
+        stmt = select([selectable.c.people_id])
+
+    Assuming ``people`` with a column ``people_id``, the above
+    statement would render as::
+
+        SELECT alias.people_id FROM
+        people AS alias TABLESAMPLE bernoulli(:bernoulli_1)
+        REPEATABLE (random())
+
+    .. versionadded:: 1.1
+
+    :param sampling: a ``float`` percentage between 0 and 100 or
+        :class:`.functions.Function`.
+
+    :param name: optional alias name
+
+    :param seed: any real-valued SQL expression.  When specified, the
+     REPEATABLE sub-clause is also rendered.
+
+    """
+    return _interpret_as_from(selectable).tablesample(
+        sampling, name=name, seed=seed)
+
+
 class Selectable(ClauseElement):
     """mark a class as being selectable"""
     __visit_name__ = 'selectable'
@@ -474,6 +519,21 @@ class FromClause(Selectable):
         """
         return Lateral(self, name)
 
+    def tablesample(self, sampling, name=None, seed=None):
+        """Return a TABLESAMPLE alias of this :class:`.FromClause`.
+
+        The return value is the :class:`.TableSample` construct also
+        provided by the top-level :func:`~.expression.tablesample` function.
+
+        .. versionadded:: 1.1
+
+        .. seealso::
+
+            :func:`~.expression.tablesample` - usage guidelines and parameters
+
+        """
+        return TableSample(self, sampling, name, seed)
+
     def is_derived_from(self, fromclause):
         """Return True if this FromClause is 'derived' from the given
         FromClause.
@@ -1268,6 +1328,38 @@ class Lateral(Alias):
     __visit_name__ = 'lateral'
 
 
+class TableSample(Alias):
+    """Represent a TABLESAMPLE clause.
+
+    This object is constructed from the :func:`~.expression.tablesample` module
+    level function as well as the :meth:`.FromClause.tablesample` method available
+    on all :class:`.FromClause` subclasses.
+
+    .. versionadded:: 1.1
+
+    .. seealso::
+
+        :func:`~.expression.tablesample`
+
+    """
+
+    __visit_name__ = 'tablesample'
+
+    def __init__(self, selectable, sampling,
+                 name=None,
+                 seed=None):
+        self.sampling = sampling
+        self.seed = seed
+        super(TableSample, self).__init__(selectable, name=name)
+
+    @util.dependencies("sqlalchemy.sql.functions")
+    def _get_method(self, functions):
+        if isinstance(self.sampling, functions.Function):
+            return self.sampling
+        else:
+            return functions.func.system(self.sampling)
+
+
 class CTE(Generative, HasSuffixes, Alias):
     """Represent a Common Table Expression.
 
diff --git a/test/sql/test_tablesample.py b/test/sql/test_tablesample.py
new file mode 100644 (file)
index 0000000..b2dddaf
--- /dev/null
@@ -0,0 +1,54 @@
+from sqlalchemy.testing import fixtures
+from sqlalchemy.testing import AssertsCompiledSQL, assert_raises_message
+from sqlalchemy.sql import select, func, text
+from sqlalchemy.engine import default
+from sqlalchemy import exc
+from sqlalchemy import Table, Integer, String, Column
+from sqlalchemy import tablesample
+
+
+class TableSampleTest(fixtures.TablesTest, AssertsCompiledSQL):
+    __dialect__ = default.DefaultDialect(supports_native_boolean=True)
+
+    run_setup_bind = None
+
+    run_create_tables = None
+
+    @classmethod
+    def define_tables(cls, metadata):
+        Table('people', metadata,
+              Column('people_id', Integer, primary_key=True),
+              Column('age', Integer),
+              Column('name', String(30)))
+
+    def test_standalone(self):
+        table1 = self.tables.people
+
+        # no special alias handling even though clause is not in the
+        # context of a FROM clause
+        self.assert_compile(
+            tablesample(table1, 1, name='alias'),
+            'people AS alias TABLESAMPLE system(:system_1)'
+        )
+
+        self.assert_compile(
+            table1.tablesample(1, name='alias'),
+            'people AS alias TABLESAMPLE system(:system_1)'
+        )
+
+        self.assert_compile(
+            tablesample(table1, func.bernoulli(1), name='alias',
+                        seed=func.random()),
+            'people AS alias TABLESAMPLE bernoulli(:bernoulli_1) '
+            'REPEATABLE (random())'
+        )
+
+    def test_select_from(self):
+        table1 = self.tables.people
+
+        self.assert_compile(
+            select([table1.tablesample(text('1'), name='alias').c.people_id]),
+            'SELECT alias.people_id FROM '
+            'people AS alias TABLESAMPLE system(1)'
+        )
+