From 5119ce78b5c47f89a9dfca2a2781cec60551a0e7 Mon Sep 17 00:00:00 2001 From: Mike Bayer Date: Mon, 26 Oct 2009 00:32:39 +0000 Subject: [PATCH] - The psycopg2 dialect now uses psycopg2's "unicode extension" on all new connections, which allows all String/Text/etc. types to skip the need to post-process bytestrings into unicode (an expensive step due to its volume). Other dialects which return unicode natively (pg8000, zxjdbc) also skip unicode post-processing. - String/Text/Unicode types now skip the unicode() check on each result column value if the dialect has detected the DBAPI as returning Python unicode objects natively. This check is issued on first connect using "SELECT CAST 'some text' AS VARCHAR(10)" or equivalent, then checking if the returned object is a Python unicode. This allows vast performance increases for native-unicode DBAPIs, including pysqlite/sqlite3, psycopg2, and pg8000. --- CHANGES | 17 +++++++ lib/sqlalchemy/dialects/oracle/cx_oracle.py | 14 ++++-- lib/sqlalchemy/dialects/postgresql/base.py | 14 ++++-- .../dialects/postgresql/psycopg2.py | 45 ++++++++++++++++--- lib/sqlalchemy/engine/default.py | 21 +++++++++ lib/sqlalchemy/engine/strategies.py | 15 +++---- lib/sqlalchemy/test/engines.py | 8 ++++ lib/sqlalchemy/types.py | 13 ++++-- test/aaa_profiling/test_resultset.py | 4 +- test/aaa_profiling/test_zoomark.py | 12 +++-- test/aaa_profiling/test_zoomark_orm.py | 4 +- test/dialect/test_oracle.py | 12 ++++- test/dialect/test_postgresql.py | 4 +- test/engine/test_pool.py | 23 +++++++--- test/orm/test_query.py | 4 +- test/sql/test_types.py | 20 ++++++--- 16 files changed, 187 insertions(+), 43 deletions(-) diff --git a/CHANGES b/CHANGES index 3c9c52a3a2..2c4c92be81 100644 --- a/CHANGES +++ b/CHANGES @@ -434,6 +434,13 @@ CHANGES "postgres_where" names still work with a deprecation warning. + - The psycopg2 dialect now uses psycopg2's "unicode extension" + on all new connections, which allows all String/Text/etc. + types to skip the need to post-process bytestrings into + unicode (an expensive step due to its volume). Other + dialects which return unicode natively (pg8000, zxjdbc) + also skip unicode post-processing. + - Added new ENUM type, which exists as a schema-level construct and extends the generic Enum type. Automatically associates itself with tables and their parent metadata @@ -576,6 +583,16 @@ CHANGES parameters. In particular, Numeric, Float, NUMERIC, FLOAT, DECIMAL don't generate any length or scale unless specified. + + - String/Text/Unicode types now skip the unicode() check + on each result column value if the dialect has + detected the DBAPI as returning Python unicode objects + natively. This check is issued on first connect + using "SELECT CAST 'some text' AS VARCHAR(10)" or + equivalent, then checking if the returned object + is a Python unicode. This allows vast performance + increases for native-unicode DBAPIs, including + pysqlite/sqlite3, psycopg2, and pg8000. - Reflection of types now returns the exact UPPERCASE type within types.py, or the UPPERCASE type within diff --git a/lib/sqlalchemy/dialects/oracle/cx_oracle.py b/lib/sqlalchemy/dialects/oracle/cx_oracle.py index e4d3b312b3..348c5b6b47 100644 --- a/lib/sqlalchemy/dialects/oracle/cx_oracle.py +++ b/lib/sqlalchemy/dialects/oracle/cx_oracle.py @@ -145,7 +145,14 @@ class _LOBMixin(object): class _OracleChar(sqltypes.CHAR): def get_dbapi_type(self, dbapi): return dbapi.FIXED_CHAR - + +class _OracleNVarChar(sqltypes.NVARCHAR): + def result_processor(self, dialect): + if dialect._cx_oracle_native_nvarchar: + return None + else: + return sqltypes.NVARCHAR.result_processor(self, dialect) + class _OracleText(_LOBMixin, sqltypes.Text): def get_dbapi_type(self, dbapi): return dbapi.CLOB @@ -187,7 +194,7 @@ colspecs = { # it would be nice if we could not use it otherwise. oracle.NUMBER : oracle.NUMBER, # don't let this get converted oracle.RAW: _OracleRaw, - + sqltypes.NVARCHAR : _OracleNVarChar, } class Oracle_cx_oracleCompiler(OracleCompiler): @@ -313,7 +320,8 @@ class Oracle_cx_oracle(OracleDialect): if hasattr(self.dbapi, 'version'): cx_oracle_ver = vers(self.dbapi.version) self.supports_unicode_binds = cx_oracle_ver >= (5, 0) - + self._cx_oracle_native_nvarchar = cx_oracle_ver >= (5, 0) + if self.dbapi is None or not self.auto_convert_lobs or not 'CLOB' in self.dbapi.__dict__: self.dbapi_type_map = {} self.ORACLE_BINARY_TYPES = [] diff --git a/lib/sqlalchemy/dialects/postgresql/base.py b/lib/sqlalchemy/dialects/postgresql/base.py index 26c4a8a971..9570dae57c 100644 --- a/lib/sqlalchemy/dialects/postgresql/base.py +++ b/lib/sqlalchemy/dialects/postgresql/base.py @@ -139,11 +139,19 @@ class ARRAY(sqltypes.MutableType, sqltypes.Concatenable, sqltypes.TypeEngine): return self.mutable def dialect_impl(self, dialect, **kwargs): - impl = self.__class__.__new__(self.__class__) - impl.__dict__.update(self.__dict__) + impl = super(ARRAY, self).dialect_impl(dialect, **kwargs) + if impl is self: + impl = self.__class__.__new__(self.__class__) + impl.__dict__.update(self.__dict__) impl.item_type = self.item_type.dialect_impl(dialect) return impl - + + def adapt(self, impltype): + return impltype( + self.item_type, + mutable=self.mutable + ) + def bind_processor(self, dialect): item_proc = self.item_type.bind_processor(dialect) def process(value): diff --git a/lib/sqlalchemy/dialects/postgresql/psycopg2.py b/lib/sqlalchemy/dialects/postgresql/psycopg2.py index 50e4bec3b0..f83baee7fe 100644 --- a/lib/sqlalchemy/dialects/postgresql/psycopg2.py +++ b/lib/sqlalchemy/dialects/postgresql/psycopg2.py @@ -23,7 +23,8 @@ psycopg2-specific keyword arguments which are accepted by :func:`~sqlalchemy.cre on the server and only retrieved as needed. SQLAlchemy's :class:`~sqlalchemy.engine.base.ResultProxy` uses special row-buffering behavior when this feature is enabled, such that groups of 100 rows at a time are fetched over the wire to reduce conversational overhead. - +* *use_native_unicode* - Enable the usage of Psycopg2 "native unicode" mode per connection. True + by default. * *isolation_level* - Sets the transaction isolation level for each transaction within the engine. Valid isolation levels are `READ_COMMITTED`, `READ_UNCOMMITTED`, `REPEATABLE_READ`, and `SERIALIZABLE`. @@ -42,7 +43,9 @@ from sqlalchemy.engine import base, default from sqlalchemy.sql import expression from sqlalchemy.sql import operators as sql_operators from sqlalchemy import types as sqltypes -from sqlalchemy.dialects.postgresql.base import PGDialect, PGCompiler, PGIdentifierPreparer, PGExecutionContext +from sqlalchemy.dialects.postgresql.base import PGDialect, PGCompiler, \ + PGIdentifierPreparer, PGExecutionContext, \ + ENUM, ARRAY class _PGNumeric(sqltypes.Numeric): def bind_processor(self, dialect): @@ -60,6 +63,21 @@ class _PGNumeric(sqltypes.Numeric): return process +class _PGEnum(ENUM): + def __init__(self, *arg, **kw): + super(_PGEnum, self).__init__(*arg, **kw) + if self.convert_unicode: + self.convert_unicode = "force" + +class _PGArray(ARRAY): + def __init__(self, *arg, **kw): + super(_PGArray, self).__init__(*arg, **kw) + # FIXME: this check won't work for setups that + # have convert_unicode only on their create_engine(). + if isinstance(self.item_type, sqltypes.String) and \ + self.item_type.convert_unicode: + self.item_type.convert_unicode = "force" + # TODO: filter out 'FOR UPDATE' statements SERVER_SIDE_CURSOR_RE = re.compile( r'\s*SELECT', @@ -108,7 +126,6 @@ class PostgreSQL_psycopg2IdentifierPreparer(PGIdentifierPreparer): value = value.replace(self.escape_quote, self.escape_to_quote) return value.replace('%', '%%') - class PostgreSQL_psycopg2(PGDialect): driver = 'psycopg2' supports_unicode_statements = False @@ -123,18 +140,36 @@ class PostgreSQL_psycopg2(PGDialect): { sqltypes.Numeric : _PGNumeric, sqltypes.Float: sqltypes.Float, # prevents _PGNumeric from being used + ENUM : _PGEnum, # needs force_unicode + sqltypes.Enum : _PGEnum, # needs force_unicode + ARRAY : _PGArray, # needs force_unicode } ) - def __init__(self, server_side_cursors=False, **kwargs): + def __init__(self, server_side_cursors=False, use_native_unicode=True, **kwargs): PGDialect.__init__(self, **kwargs) self.server_side_cursors = server_side_cursors + self.use_native_unicode = use_native_unicode @classmethod def dbapi(cls): psycopg = __import__('psycopg2') return psycopg - + + _unwrap_connection = None + + def visit_pool(self, pool): + if self.dbapi and self.use_native_unicode: + extensions = __import__('psycopg2.extensions').extensions + def connect(conn, rec): + if self._unwrap_connection: + conn = self._unwrap_connection(conn) + if conn is None: + return + extensions.register_type(extensions.UNICODE, conn) + pool.add_listener({'first_connect': connect, 'connect':connect}) + super(PostgreSQL_psycopg2, self).visit_pool(pool) + def create_connect_args(self, url): opts = url.translate_connect_args(username='user') if 'port' in opts: diff --git a/lib/sqlalchemy/engine/default.py b/lib/sqlalchemy/engine/default.py index 12391c0057..37afa84ec1 100644 --- a/lib/sqlalchemy/engine/default.py +++ b/lib/sqlalchemy/engine/default.py @@ -42,6 +42,7 @@ class DefaultDialect(base.Dialect): # Py2K supports_unicode_statements = False supports_unicode_binds = False + returns_unicode_strings = False # end Py2K name = 'default' @@ -97,12 +98,32 @@ class DefaultDialect(base.Dialect): ## work around dialects that might change these values #self.supports_unicode_statements = True #self.supports_unicode_binds = True + #self.returns_unicode_strings = True def initialize(self, connection): if hasattr(self, '_get_server_version_info'): self.server_version_info = self._get_server_version_info(connection) if hasattr(self, '_get_default_schema_name'): self.default_schema_name = self._get_default_schema_name(connection) + # Py2K + self.returns_unicode_strings = self._check_unicode_returns(connection) + # end Py2K + + def _check_unicode_returns(self, connection): + cursor = connection.connection.cursor() + cursor.execute( + str( + expression.select( + [expression.cast( + expression.literal_column("'test unicode returns'"),sqltypes.VARCHAR(60)) + ]).compile(dialect=self) + ) + ) + + row = cursor.fetchone() + result = isinstance(row[0], unicode) + cursor.close() + return result @classmethod def type_descriptor(cls, typeobj): diff --git a/lib/sqlalchemy/engine/strategies.py b/lib/sqlalchemy/engine/strategies.py index ff62b265ba..7a8856ba85 100644 --- a/lib/sqlalchemy/engine/strategies.py +++ b/lib/sqlalchemy/engine/strategies.py @@ -129,15 +129,12 @@ class DefaultEngineStrategy(EngineStrategy): engine = engineclass(pool, dialect, u, **engine_args) if _initialize: - # some unit tests pass through _initialize=False - # to help mock engines work - class OnInit(object): - def first_connect(self, conn, rec): - c = base.Connection(engine, connection=conn) - dialect.initialize(c) - pool._on_first_connect.insert(0, OnInit()) - - dialect.visit_pool(pool) + dialect.visit_pool(pool) + + def first_connect(conn, rec): + c = base.Connection(engine, connection=conn) + dialect.initialize(c) + pool.add_listener({'first_connect':first_connect}) return engine diff --git a/lib/sqlalchemy/test/engines.py b/lib/sqlalchemy/test/engines.py index 187ad2ff03..31d1658af9 100644 --- a/lib/sqlalchemy/test/engines.py +++ b/lib/sqlalchemy/test/engines.py @@ -287,3 +287,11 @@ class ReplayableSession(object): raise AttributeError(key) else: return result + +def unwrap_connection(conn): + if conn.__class__.__name__ == 'Recorder': + return conn._subject + elif conn.__class__.__name__ == 'Player': + return None + else: + return conn diff --git a/lib/sqlalchemy/types.py b/lib/sqlalchemy/types.py index 27918e15c7..409a266fd4 100644 --- a/lib/sqlalchemy/types.py +++ b/lib/sqlalchemy/types.py @@ -435,7 +435,7 @@ class String(Concatenable, TypeEngine): :param convert_unicode: defaults to False. If True, convert ``unicode`` data sent to the database to a ``str`` bytestring, and convert bytestrings coming back from the - database into ``unicode``. + database into ``unicode``. Bytestrings are encoded using the dialect's :attr:`~sqlalchemy.engine.base.Dialect.encoding`, which @@ -443,6 +443,12 @@ class String(Concatenable, TypeEngine): If False, may be overridden by :attr:`sqlalchemy.engine.base.Dialect.convert_unicode`. + + If the dialect has been detected as returning unicode + strings from a VARCHAR result column, no unicode translation + will be done on results. To force unicode translation + for individual types regardless of dialect setting, + set convert_unicode='force'. :param assert_unicode: @@ -458,7 +464,7 @@ class String(Concatenable, TypeEngine): self.length = length self.convert_unicode = convert_unicode self.assert_unicode = assert_unicode - + def adapt(self, impltype): return impltype( length=self.length, @@ -503,7 +509,8 @@ class String(Concatenable, TypeEngine): return None def result_processor(self, dialect): - if self.convert_unicode or dialect.convert_unicode: + if (not dialect.returns_unicode_strings or self.convert_unicode == 'force') \ + and (self.convert_unicode or dialect.convert_unicode): def process(value): if value is not None and not isinstance(value, unicode): return value.decode(dialect.encoding) diff --git a/test/aaa_profiling/test_resultset.py b/test/aaa_profiling/test_resultset.py index 4a675ce0ec..83901b7f7a 100644 --- a/test/aaa_profiling/test_resultset.py +++ b/test/aaa_profiling/test_resultset.py @@ -33,7 +33,9 @@ class ResultSetTest(TestBase, AssertsExecutionResults): def test_string(self): [tuple(row) for row in t.select().execute().fetchall()] - @profiling.function_call_count(44406, versions={'2.4':33224}) + # sqlite3 returns native unicode. so shouldn't be an + # increase here. + @profiling.function_call_count(14396, versions={'2.4':13214}) def test_unicode(self): [tuple(row) for row in t2.select().execute().fetchall()] diff --git a/test/aaa_profiling/test_zoomark.py b/test/aaa_profiling/test_zoomark.py index ff5801c898..2b110a40f4 100644 --- a/test/aaa_profiling/test_zoomark.py +++ b/test/aaa_profiling/test_zoomark.py @@ -14,6 +14,7 @@ ITERATIONS = 1 dbapi_session = engines.ReplayableSession() metadata = None + class ZooMarkTest(TestBase): """Runs the ZooMark and squawks if method counts vary from the norm. @@ -33,8 +34,10 @@ class ZooMarkTest(TestBase): global metadata creator = testing.db.pool._creator + recorder = lambda: dbapi_session.recorder(creator()) engine = engines.testing_engine(options={'creator':recorder}) + engine.dialect._unwrap_connection = engines.unwrap_connection metadata = MetaData(engine) def test_baseline_1_create_tables(self): @@ -317,9 +320,10 @@ class ZooMarkTest(TestBase): player = lambda: dbapi_session.player() engine = create_engine('postgresql:///', creator=player) + engine.dialect._unwrap_connection = engines.unwrap_connection metadata = MetaData(engine) - @profiling.function_call_count(2991, {'2.4': 1796}) + @profiling.function_call_count(3178, {'2.4': 1796}) def test_profile_1_create_tables(self): self.test_baseline_1_create_tables() @@ -335,11 +339,11 @@ class ZooMarkTest(TestBase): def test_profile_3_properties(self): self.test_baseline_3_properties() - @profiling.function_call_count(14752, {'2.4': 8434}) + @profiling.function_call_count(13341, {'2.4': 8434}) def test_profile_4_expressions(self): self.test_baseline_4_expressions() - @profiling.function_call_count(1347, {'2.4': 901}) + @profiling.function_call_count(1241, {'2.4': 901}) def test_profile_5_aggregates(self): self.test_baseline_5_aggregates() @@ -347,7 +351,7 @@ class ZooMarkTest(TestBase): def test_profile_6_editing(self): self.test_baseline_6_editing() - @profiling.function_call_count(2994, {'2.4': 1844}) + @profiling.function_call_count(2641, {'2.4': 1844}) def test_profile_7_multiview(self): self.test_baseline_7_multiview() diff --git a/test/aaa_profiling/test_zoomark_orm.py b/test/aaa_profiling/test_zoomark_orm.py index 660f478110..1d34a4083b 100644 --- a/test/aaa_profiling/test_zoomark_orm.py +++ b/test/aaa_profiling/test_zoomark_orm.py @@ -36,6 +36,7 @@ class ZooMarkTest(TestBase): creator = testing.db.pool._creator recorder = lambda: dbapi_session.recorder(creator()) engine = engines.testing_engine(options={'creator':recorder}) + engine.dialect._unwrap_connection = engines.unwrap_connection metadata = MetaData(engine) session = sessionmaker()() @@ -282,6 +283,7 @@ class ZooMarkTest(TestBase): player = lambda: dbapi_session.player() engine = create_engine('postgresql:///', creator=player) + engine.dialect._unwrap_connection = engines.unwrap_connection metadata = MetaData(engine) session = sessionmaker()() @@ -305,7 +307,7 @@ class ZooMarkTest(TestBase): def test_profile_4_expressions(self): self.test_baseline_4_expressions() - @profiling.function_call_count(1331) + @profiling.function_call_count(1240) def test_profile_5_aggregates(self): self.test_baseline_5_aggregates() diff --git a/test/dialect/test_oracle.py b/test/dialect/test_oracle.py index 3296f47fce..2153c322fb 100644 --- a/test/dialect/test_oracle.py +++ b/test/dialect/test_oracle.py @@ -480,9 +480,19 @@ class TypesTest(TestBase, AssertsCompiledSQL): m2 = MetaData(testing.db) t2 = Table('t', m2, autoload=True) assert isinstance(t2.c.data.type, sqltypes.NVARCHAR) + + # nvarchar returns unicode natively. cx_oracle + # _OracleNVarChar type should be at play + # here. + assert isinstance( + t2.c.data.type.dialect_impl(testing.db.dialect), + cx_oracle._OracleNVarChar) + data = u'm’a réveillé.' t2.insert().execute(data=data) - eq_(t2.select().execute().first()['data'], data) + res = t2.select().execute().first()['data'] + eq_(res, data) + assert isinstance(res, unicode) finally: metadata.drop_all() diff --git a/test/dialect/test_postgresql.py b/test/dialect/test_postgresql.py index 626d546770..e4a5ffa558 100644 --- a/test/dialect/test_postgresql.py +++ b/test/dialect/test_postgresql.py @@ -1158,10 +1158,10 @@ class ArrayTest(TestBase, AssertsExecutionResults): arrtable = Table('arrtable', metadata, Column('id', Integer, primary_key=True), Column('intarr', postgresql.PGArray(Integer)), - Column('strarr', postgresql.PGArray(String(convert_unicode=True)), nullable=False) + Column('strarr', postgresql.PGArray(Unicode(assert_unicode=False)), nullable=False) ) metadata.create_all() - + def teardown(self): arrtable.delete().execute() diff --git a/test/engine/test_pool.py b/test/engine/test_pool.py index 3f38cda1a2..1aeab2bda6 100644 --- a/test/engine/test_pool.py +++ b/test/engine/test_pool.py @@ -3,7 +3,7 @@ from sqlalchemy import pool, interfaces, create_engine, select import sqlalchemy as tsa from sqlalchemy.test import TestBase, testing from sqlalchemy.test.util import gc_collect, lazy_gc - +from sqlalchemy.test.testing import eq_ mcid = 1 class MockDBAPI(object): @@ -179,10 +179,10 @@ class PoolTest(PoolTestBase): self.checked_out = [] self.checked_in = [] def assert_total(innerself, conn, fconn, cout, cin): - self.assert_(len(innerself.connected) == conn) - self.assert_(len(innerself.first_connected) == fconn) - self.assert_(len(innerself.checked_out) == cout) - self.assert_(len(innerself.checked_in) == cin) + eq_(len(innerself.connected), conn) + eq_(len(innerself.first_connected), fconn) + eq_(len(innerself.checked_out), cout) + eq_(len(innerself.checked_in), cin) def assert_in(innerself, item, in_conn, in_fconn, in_cout, in_cin): self.assert_((item in innerself.connected) == in_conn) self.assert_((item in innerself.first_connected) == in_fconn) @@ -326,6 +326,19 @@ class PoolTest(PoolTestBase): del c snoop.assert_total(2, 0, 2, 1) + # recreated + p = p.recreate() + snoop.clear() + + c = p.connect() + snoop.assert_total(1, 1, 1, 0) + c.close() + snoop.assert_total(1, 1, 1, 1) + c = p.connect() + snoop.assert_total(1, 1, 2, 1) + c.close() + snoop.assert_total(1, 1, 2, 2) + def test_listeners_callables(self): dbapi = MockDBAPI() diff --git a/test/orm/test_query.py b/test/orm/test_query.py index 41a5972676..79dd2a58a1 100644 --- a/test/orm/test_query.py +++ b/test/orm/test_query.py @@ -2490,6 +2490,7 @@ class MixedEntitiesTest(QueryTest, AssertsCompiledSQL): eq_(results, [(User(name='jack'), 'jack')]) self.assert_sql_count(testing.db, go, 1) + @testing.fails_on('postgresql+pg8000', "'type oid 705 not mapped to py type' (due to literal)") def test_self_referential(self): sess = create_session() @@ -2521,7 +2522,8 @@ class MixedEntitiesTest(QueryTest, AssertsCompiledSQL): # ensure column expressions are taken from inside the subquery, not restated at the top - q = sess.query(Order.id, Order.description, literal_column("'q'").label('foo')).filter(Order.description == u'order 3').from_self() + q = sess.query(Order.id, Order.description, literal_column("'q'").label('foo')).\ + filter(Order.description == u'order 3').from_self() self.assert_compile(q, "SELECT anon_1.orders_id AS anon_1_orders_id, anon_1.orders_description AS anon_1_orders_description, " "anon_1.foo AS anon_1_foo FROM (SELECT orders.id AS orders_id, orders.description AS orders_description, " diff --git a/test/sql/test_types.py b/test/sql/test_types.py index 51dd4c12b2..62c579d715 100644 --- a/test/sql/test_types.py +++ b/test/sql/test_types.py @@ -253,7 +253,20 @@ class UnicodeTest(TestBase, AssertsExecutionResults): @engines.close_first def teardown(self): unicode_table.delete().execute() - + + def test_native_unicode(self): + """assert expected values for 'native unicode' mode""" + + assert testing.db.dialect.returns_unicode_strings == \ + ((testing.db.name, testing.db.driver) in \ + ( + ('postgresql','psycopg2'), + ('postgresql','pg8000'), + ('postgresql','zxjdbc'), + ('mysql','zxjdbc'), + ('sqlite','pysqlite'), + )) + def test_round_trip(self): unicodedata = u"Alors vous imaginez ma surprise, au lever du jour, quand une drôle de petit voix m’a réveillé. Elle disait: « S’il vous plaît… dessine-moi un mouton! »" @@ -393,10 +406,7 @@ class EnumTest(TestBase): ] ) - @testing.fails_on('postgresql+zxjdbc', - 'zxjdbc fails on ENUM: column "XXX" is of type XXX ' - 'but expression is of type character varying') - @testing.fails_on('mysql', "MySQL seems to issue a 'data truncated' warning.") + @testing.fails_on('mysql+mysqldb', "MySQL seems to issue a 'data truncated' warning.") def test_constraint(self): assert_raises(exc.DBAPIError, enum_table.insert().execute, -- 2.47.2