From: Mike Bayer Date: Sun, 4 Jan 2015 01:43:45 +0000 (-0500) Subject: - The :class:`.mysql.SET` type has been overhauled to no longer X-Git-Tag: rel_1_0_0b1~137 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=93742b3d5c16d3f1e27ff0d10c8e65e1b54971a3;p=thirdparty%2Fsqlalchemy%2Fsqlalchemy.git - The :class:`.mysql.SET` type has been overhauled to no longer assume that the empty string, or a set with a single empty string value, is in fact a set with a single empty string; instead, this is by default treated as the empty set. In order to handle persistence of a :class:`.mysql.SET` that actually wants to include the blank value ``''`` as a legitimate value, a new bitwise operational mode is added which is enabled by the :paramref:`.mysql.SET.retrieve_as_bitwise` flag, which will persist and retrieve values unambiguously using their bitflag positioning. Storage and retrieval of unicode values for driver configurations that aren't converting unicode natively is also repaired. fixes #3283 --- diff --git a/doc/build/changelog/changelog_10.rst b/doc/build/changelog/changelog_10.rst index 95eaff0f1e..bfe2ebbc6f 100644 --- a/doc/build/changelog/changelog_10.rst +++ b/doc/build/changelog/changelog_10.rst @@ -22,6 +22,27 @@ series as well. For changes that are specific to 1.0 with an emphasis on compatibility concerns, see :doc:`/changelog/migration_10`. + .. change:: + :tags: bug, mysql + :tickets: 3283 + + The :class:`.mysql.SET` type has been overhauled to no longer + assume that the empty string, or a set with a single empty string + value, is in fact a set with a single empty string; instead, this + is by default treated as the empty set. In order to handle persistence + of a :class:`.mysql.SET` that actually wants to include the blank + value ``''`` as a legitimate value, a new bitwise operational mode + is added which is enabled by the + :paramref:`.mysql.SET.retrieve_as_bitwise` flag, which will persist + and retrieve values unambiguously using their bitflag positioning. + Storage and retrieval of unicode values for driver configurations + that aren't converting unicode natively is also repaired. + + .. seealso:: + + :ref:`change_3283` + + .. change:: :tags: feature, schema :tickets: 3282 diff --git a/doc/build/changelog/migration_10.rst b/doc/build/changelog/migration_10.rst index f9c26017ca..79756ec176 100644 --- a/doc/build/changelog/migration_10.rst +++ b/doc/build/changelog/migration_10.rst @@ -1606,6 +1606,52 @@ by Postgresql as of 9.4. SQLAlchemy allows this using Dialect Improvements and Changes - MySQL ============================================= +.. _change_3283: + +MySQL SET Type Overhauled to support empty sets, unicode, blank value handling +------------------------------------------------------------------------------- + +The :class:`.mysql.SET` type historically not included a system of handling +blank sets and empty values separately; as different drivers had different +behaviors for treatment of empty strings and empty-string-set representations, +the SET type tried only to hedge between these behaviors, opting to treat the +empty set as ``set([''])`` as is still the current behavior for the +MySQL-Connector-Python DBAPI. +Part of the rationale here was that it was otherwise impossible to actually +store a blank string within a MySQL SET, as the driver gives us back strings +with no way to discern between ``set([''])`` and ``set()``. It was left +to the user to determine if ``set([''])`` actually meant "empty set" or not. + +The new behavior moves the use case for the blank string, which is an unusual +case that isn't even documented in MySQL's documentation, into a special +case, and the default behavior of :class:`.mysql.SET` is now: + +* to treat the empty string ``''`` as returned by MySQL-python into the empty + set ``set()``; + +* to convert the single-blank value set ``set([''])`` returned by + MySQL-Connector-Python into the empty set ``set()``; + +* To handle the case of a set type that actually wishes includes the blank + value ``''`` in its list of possible values, + a new feature (required in this use case) is implemented whereby the set + value is persisted and loaded as a bitwise integer value; the + flag :paramref:`.mysql.SET.retrieve_as_bitwise` is added in order to + enable this. + +Using the :paramref:`.mysql.SET.retrieve_as_bitwise` flag allows the set +to be persisted and retrieved with no ambiguity of values. Theoretically +this flag can be turned on in all cases, as long as the given list of +values to the type matches the ordering exactly as declared in the +database; it only makes the SQL echo output a bit more unusual. + +The default behavior of :class:`.mysql.SET` otherwise remains the same, +roundtripping values using strings. The string-based behavior now +supports unicode fully including MySQL-python with use_unicode=0. + +:ticket:`3283` + + MySQL internal "no such table" exceptions not passed to event handlers ---------------------------------------------------------------------- diff --git a/lib/sqlalchemy/dialects/mysql/base.py b/lib/sqlalchemy/dialects/mysql/base.py index 5f990ea4e7..7836e95489 100644 --- a/lib/sqlalchemy/dialects/mysql/base.py +++ b/lib/sqlalchemy/dialects/mysql/base.py @@ -1428,32 +1428,28 @@ class SET(_EnumeratedValues): Column('myset', SET("foo", "bar", "baz")) - :param values: The range of valid values for this SET. Values will be - quoted when generating the schema according to the quoting flag (see - below). - .. versionchanged:: 0.9.0 quoting is applied automatically to - :class:`.mysql.SET` in the same way as for :class:`.mysql.ENUM`. + The list of potential values is required in the case that this + set will be used to generate DDL for a table, or if the + :paramref:`.SET.retrieve_as_bitwise` flag is set to True. - :param charset: Optional, a column-level character set for this string - value. Takes precedence to 'ascii' or 'unicode' short-hand. + :param values: The range of valid values for this SET. - :param collation: Optional, a column-level collation for this string - value. Takes precedence to 'binary' short-hand. + :param convert_unicode: Same flag as that of + :paramref:`.String.convert_unicode`. - :param ascii: Defaults to False: short-hand for the ``latin1`` - character set, generates ASCII in schema. + :param collation: same as that of :paramref:`.String.collation` - :param unicode: Defaults to False: short-hand for the ``ucs2`` - character set, generates UNICODE in schema. + :param charset: same as that of :paramref:`.VARCHAR.charset`. - :param binary: Defaults to False: short-hand, pick the binary - collation type that matches the column's character set. Generates - BINARY in schema. This does not affect the type of data stored, - only the collation of character data. + :param ascii: same as that of :paramref:`.VARCHAR.ascii`. - :param quoting: Defaults to 'auto': automatically determine enum value - quoting. If all enum values are surrounded by the same quoting + :param unicode: same as that of :paramref:`.VARCHAR.unicode`. + + :param binary: same as that of :paramref:`.VARCHAR.binary`. + + :param quoting: Defaults to 'auto': automatically determine set value + quoting. If all values are surrounded by the same quoting character, then use 'quoted' mode. Otherwise, use 'unquoted' mode. 'quoted': values in enums are already quoted, they will be used @@ -1468,50 +1464,116 @@ class SET(_EnumeratedValues): .. versionadded:: 0.9.0 + :param retrieve_as_bitwise: if True, the data for the set type will be + persisted and selected using an integer value, where a set is coerced + into a bitwise mask for persistence. MySQL allows this mode which + has the advantage of being able to store values unambiguously, + such as the blank string ``''``. The datatype will appear + as the expression ``col + 0`` in a SELECT statement, so that the + value is coerced into an integer value in result sets. + This flag is required if one wishes + to persist a set that can store the blank string ``''`` as a value. + + .. warning:: + + When using :paramref:`.mysql.SET.retrieve_as_bitwise`, it is + essential that the list of set values is expressed in the + **exact same order** as exists on the MySQL database. + + .. versionadded:: 1.0.0 + + """ + self.retrieve_as_bitwise = kw.pop('retrieve_as_bitwise', False) values, length = self._init_values(values, kw) self.values = tuple(values) - + if not self.retrieve_as_bitwise and '' in values: + raise exc.ArgumentError( + "Can't use the blank value '' in a SET without " + "setting retrieve_as_bitwise=True") + if self.retrieve_as_bitwise: + self._bitmap = dict( + (value, 2 ** idx) + for idx, value in enumerate(self.values) + ) kw.setdefault('length', length) super(SET, self).__init__(**kw) + def column_expression(self, colexpr): + if self.retrieve_as_bitwise: + return colexpr + 0 + else: + return colexpr + def result_processor(self, dialect, coltype): - def process(value): - # The good news: - # No ',' quoting issues- commas aren't allowed in SET values - # The bad news: - # Plenty of driver inconsistencies here. - if isinstance(value, set): - # ..some versions convert '' to an empty set - if not value: - value.add('') - return value - # ...and some versions return strings - if value is not None: - return set(value.split(',')) - else: - return value + if self.retrieve_as_bitwise: + def process(value): + if value is not None: + value = int(value) + return set( + [ + elem + for idx, elem in enumerate(self.values) + if value & (2 ** idx) + ] + ) + else: + return None + else: + super_convert = super(SET, self).result_processor(dialect, coltype) + + def process(value): + if isinstance(value, util.string_types): + # MySQLdb returns a string, let's parse + if super_convert: + value = super_convert(value) + return set(re.findall(r'[^,]+', value)) + else: + # mysql-connector-python does a naive + # split(",") which throws in an empty string + if value is not None: + value.discard('') + return value return process def bind_processor(self, dialect): super_convert = super(SET, self).bind_processor(dialect) + if self.retrieve_as_bitwise: + def process(value): + if value is None: + return None + elif isinstance(value, util.int_types + util.string_types): + if super_convert: + return super_convert(value) + else: + return value + else: + int_value = 0 + for v in value: + int_value |= self._bitmap[v] + return int_value + else: - def process(value): - if value is None or isinstance( - value, util.int_types + util.string_types): - pass - else: - if None in value: - value = set(value) - value.remove(None) - value.add('') - value = ','.join(value) - if super_convert: - return super_convert(value) - else: - return value + def process(value): + # accept strings and int (actually bitflag) values directly + if value is not None and not isinstance( + value, util.int_types + util.string_types): + value = ",".join(value) + + if super_convert: + return super_convert(value) + else: + return value return process + def adapt(self, impltype, **kw): + kw['retrieve_as_bitwise'] = self.retrieve_as_bitwise + return util.constructor_copy( + self, impltype, + *self.values, + **kw + ) + # old names MSTime = TIME MSSet = SET @@ -2972,6 +3034,9 @@ class MySQLTableDefinitionParser(object): if issubclass(col_type, _EnumeratedValues): type_args = _EnumeratedValues._strip_values(type_args) + if issubclass(col_type, SET) and '' in type_args: + type_kw['retrieve_as_bitwise'] = True + type_instance = col_type(*type_args, **type_kw) col_args, col_kw = [], {} diff --git a/lib/sqlalchemy/util/langhelpers.py b/lib/sqlalchemy/util/langhelpers.py index ac6b50de2e..7f57e501ac 100644 --- a/lib/sqlalchemy/util/langhelpers.py +++ b/lib/sqlalchemy/util/langhelpers.py @@ -969,7 +969,7 @@ def coerce_kw_type(kw, key, type_, flexi_bool=True): kw[key] = type_(kw[key]) -def constructor_copy(obj, cls, **kw): +def constructor_copy(obj, cls, *args, **kw): """Instantiate cls using the __dict__ of obj as constructor arguments. Uses inspect to match the named arguments of ``cls``. @@ -978,7 +978,7 @@ def constructor_copy(obj, cls, **kw): names = get_cls_kwargs(cls) kw.update((k, obj.__dict__[k]) for k in names if k in obj.__dict__) - return cls(**kw) + return cls(*args, **kw) def counter(): diff --git a/test/dialect/mysql/test_types.py b/test/dialect/mysql/test_types.py index 546b642724..4e530e6b6c 100644 --- a/test/dialect/mysql/test_types.py +++ b/test/dialect/mysql/test_types.py @@ -1,6 +1,6 @@ # coding: utf-8 -from sqlalchemy.testing import eq_, assert_raises +from sqlalchemy.testing import eq_, assert_raises, assert_raises_message from sqlalchemy import * from sqlalchemy import sql, exc, schema from sqlalchemy.util import u @@ -681,6 +681,67 @@ class EnumSetTest( exc.StatementError, set_table.insert().execute, e1='c', e2='c', e3='c', e4='c') + @testing.fails_on("+oursql", "oursql raises on the truncate warning") + @testing.provide_metadata + def test_empty_set_no_empty_string(self): + t = Table( + 't', self.metadata, + Column('id', Integer), + Column('data', mysql.SET("a", "b")) + ) + t.create() + with testing.db.begin() as conn: + conn.execute( + t.insert(), + {'id': 1, 'data': set()}, + {'id': 2, 'data': set([''])}, + {'id': 3, 'data': set(['a', ''])}, + {'id': 4, 'data': set(['b'])}, + ) + eq_( + conn.execute(t.select().order_by(t.c.id)).fetchall(), + [ + (1, set()), + (2, set()), + (3, set(['a'])), + (4, set(['b'])), + ] + ) + + def test_bitwise_required_for_empty(self): + assert_raises_message( + exc.ArgumentError, + "Can't use the blank value '' in a SET without setting " + "retrieve_as_bitwise=True", + mysql.SET, "a", "b", '' + ) + + @testing.provide_metadata + def test_empty_set_empty_string(self): + t = Table( + 't', self.metadata, + Column('id', Integer), + Column('data', mysql.SET("a", "b", '', retrieve_as_bitwise=True)) + ) + t.create() + with testing.db.begin() as conn: + conn.execute( + t.insert(), + {'id': 1, 'data': set()}, + {'id': 2, 'data': set([''])}, + {'id': 3, 'data': set(['a', ''])}, + {'id': 4, 'data': set(['b'])}, + ) + eq_( + conn.execute(t.select().order_by(t.c.id)).fetchall(), + [ + (1, set()), + (2, set([''])), + (3, set(['a', ''])), + (4, set(['b'])), + ] + ) + @testing.provide_metadata def test_string_roundtrip(self): set_table = self._set_fixture_one() @@ -705,6 +766,47 @@ class EnumSetTest( eq_(res, expected) + @testing.provide_metadata + def test_unicode_roundtrip(self): + set_table = Table( + 't', self.metadata, + Column('id', Integer, primary_key=True), + Column('data', mysql.SET( + u('réveillé'), u('drôle'), u('S’il'), convert_unicode=True)), + ) + + set_table.create() + with testing.db.begin() as conn: + conn.execute( + set_table.insert(), + {"data": set([u('réveillé'), u('drôle')])}) + + row = conn.execute( + set_table.select() + ).first() + + eq_( + row, + (1, set([u('réveillé'), u('drôle')])) + ) + + @testing.provide_metadata + def test_int_roundtrip(self): + set_table = self._set_fixture_one() + set_table.create() + with testing.db.begin() as conn: + conn.execute( + set_table.insert(), + dict(e1=1, e2=2, e3=3, e4=3, e5=0) + ) + res = conn.execute(set_table.select()).first() + eq_( + res, + ( + set(['a']), set(['b']), set(['a', 'b']), + set(["'a'", 'b']), set([])) + ) + @testing.provide_metadata def test_set_roundtrip_plus_reflection(self): set_table = Table( @@ -725,11 +827,11 @@ class EnumSetTest( expected = expected or store table.insert(store).execute() row = table.select().execute().first() - self.assert_(list(row) == expected) + eq_(row, tuple(expected)) table.delete().execute() roundtrip([None, None, None], [None] * 3) - roundtrip(['', '', ''], [set([''])] * 3) + roundtrip(['', '', ''], [set([])] * 3) roundtrip([set(['dq']), set(['a']), set(['5'])]) roundtrip(['dq', 'a', '5'], [set(['dq']), set(['a']), set(['5'])]) @@ -836,12 +938,14 @@ class EnumSetTest( set_table = Table( 'mysql_set', self.metadata, Column('e1', mysql.SET("'a'")), - Column('e2', mysql.SET("''")), + Column('e2', mysql.SET("''", retrieve_as_bitwise=True)), Column('e3', mysql.SET('a')), - Column('e4', mysql.SET('')), - Column('e5', mysql.SET("'a'", "''")), - Column('e6', mysql.SET("''", "'a'")), - Column('e7', mysql.SET("''", "'''a'''", "'b''b'", "''''"))) + Column('e4', mysql.SET('', retrieve_as_bitwise=True)), + Column('e5', mysql.SET("'a'", "''", retrieve_as_bitwise=True)), + Column('e6', mysql.SET("''", "'a'", retrieve_as_bitwise=True)), + Column('e7', mysql.SET( + "''", "'''a'''", "'b''b'", "''''", + retrieve_as_bitwise=True))) for col in set_table.c: self.assert_(repr(col))