From bd2a6e9b161251606b64d299faec583d55c2e802 Mon Sep 17 00:00:00 2001 From: Mike Bayer Date: Sun, 7 Nov 2021 15:47:15 -0500 Subject: [PATCH] removals: all unicode encoding / decoding Removed here includes: * convert_unicode parameters * encoding create_engine() parameter * description encoding support * "non-unicode fallback" modes under Python 2 * String symbols regarding Python 2 non-unicode fallbacks * any concept of DBAPIs that don't accept unicode statements, unicode bound parameters, or that return bytes for strings anywhere except an explicit Binary / BLOB type * unicode processors in Python / C Risk factors: * Whether all DBAPIs do in fact return Unicode objects for all entries in cursor.description now * There was logic for mysql-connector trying to determine description encoding. A quick test shows Unicode coming back but it's not clear if there are still edge cases where they return bytes. if so, these are bugs in that driver, and at most we would only work around it in the mysql-connector DBAPI itself (but we won't do that either). * It seems like Oracle 8 was not expecting unicode bound parameters. I'm assuming this was all Python 2 stuff and does not apply for modern cx_Oracle under Python 3. * third party dialects relying upon built in unicode encoding/decoding but it's hard to imagine any non-SQLAlchemy database driver not dealing exclusively in Python unicode strings in Python 3 Change-Id: I97d762ef6d4dd836487b714d57d8136d0310f28a References: #7257 --- doc/build/changelog/unreleased_20/7257.rst | 25 ++ lib/sqlalchemy/cextension/processors.c | 188 ----------- lib/sqlalchemy/connectors/pyodbc.py | 9 +- lib/sqlalchemy/dialects/mssql/base.py | 9 +- .../dialects/mysql/mysqlconnector.py | 26 -- lib/sqlalchemy/dialects/mysql/pymysql.py | 6 - lib/sqlalchemy/dialects/oracle/base.py | 28 +- lib/sqlalchemy/dialects/oracle/cx_oracle.py | 72 ++--- lib/sqlalchemy/dialects/postgresql/asyncpg.py | 3 - .../dialects/postgresql/psycopg2.py | 40 +-- lib/sqlalchemy/dialects/sqlite/base.py | 2 - lib/sqlalchemy/engine/create.py | 72 ----- lib/sqlalchemy/engine/default.py | 103 +----- lib/sqlalchemy/processors.py | 44 --- lib/sqlalchemy/sql/schema.py | 7 - lib/sqlalchemy/sql/sqltypes.py | 294 ++---------------- lib/sqlalchemy/testing/profiling.py | 6 +- test/aaa_profiling/test_memusage.py | 9 - test/dialect/mssql/test_reflection.py | 7 - test/dialect/oracle/test_dialect.py | 31 +- test/dialect/oracle/test_types.py | 12 - test/dialect/postgresql/test_dialect.py | 8 - test/dialect/postgresql/test_types.py | 5 +- test/sql/test_defaults.py | 26 -- test/sql/test_deprecations.py | 76 ----- test/sql/test_types.py | 107 ------- 26 files changed, 101 insertions(+), 1114 deletions(-) create mode 100644 doc/build/changelog/unreleased_20/7257.rst diff --git a/doc/build/changelog/unreleased_20/7257.rst b/doc/build/changelog/unreleased_20/7257.rst new file mode 100644 index 0000000000..1db74e4f9c --- /dev/null +++ b/doc/build/changelog/unreleased_20/7257.rst @@ -0,0 +1,25 @@ +.. change:: + :tags: general, changed + :tickets: 7257 + + Migrated the codebase to remove all pre-2.0 behaviors and architectures + that were previously noted as deprecated for removal in 2.0, including: + + * removal of all Python 2 code, minimum version is now Python 3.7 + + * :class:`_engine.Engine` and :class:`_engine.Connection` now use the + new 2.0 style of working, which includes "autobegin", library level + autocommit removed, subtransactions and "branched" connections + removed + + * Result objects use 2.0-style behaviors; :class:`_result.Row` is fully + a named tuple without "mapping" behavior, use :class:`_result.RowMapping` + for "mapping" behavior + + * All Unicode encoding/decoding architecture has been removed from + SQLAlchemy. All modern DBAPI implementations support Unicode + transparently thanks to Python 3, so the ``convert_unicode`` feature + as well as related mechanisms to look for bytestrings in + DBAPI ``cursor.description`` etc. have been removed. + + * More are in progress as development continues diff --git a/lib/sqlalchemy/cextension/processors.c b/lib/sqlalchemy/cextension/processors.c index f6f203e749..8c031b70aa 100644 --- a/lib/sqlalchemy/cextension/processors.c +++ b/lib/sqlalchemy/cextension/processors.c @@ -310,12 +310,6 @@ str_to_date(PyObject *self, PyObject *arg) * Structs * ***********/ -typedef struct { - PyObject_HEAD - PyObject *encoding; - PyObject *errors; -} UnicodeResultProcessor; - typedef struct { PyObject_HEAD PyObject *type; @@ -324,180 +318,6 @@ typedef struct { -/************************** - * UnicodeResultProcessor * - **************************/ - -static int -UnicodeResultProcessor_init(UnicodeResultProcessor *self, PyObject *args, - PyObject *kwds) -{ - PyObject *encoding, *errors = NULL; - static char *kwlist[] = {"encoding", "errors", NULL}; - -#if PY_MAJOR_VERSION >= 3 - if (!PyArg_ParseTupleAndKeywords(args, kwds, "U|U:__init__", kwlist, - &encoding, &errors)) - return -1; -#else - if (!PyArg_ParseTupleAndKeywords(args, kwds, "S|S:__init__", kwlist, - &encoding, &errors)) - return -1; -#endif - -#if PY_MAJOR_VERSION >= 3 - encoding = PyUnicode_AsASCIIString(encoding); -#else - Py_INCREF(encoding); -#endif - self->encoding = encoding; - - if (errors) { -#if PY_MAJOR_VERSION >= 3 - errors = PyUnicode_AsASCIIString(errors); -#else - Py_INCREF(errors); -#endif - } else { -#if PY_MAJOR_VERSION >= 3 - errors = PyBytes_FromString("strict"); -#else - errors = PyString_FromString("strict"); -#endif - if (errors == NULL) - return -1; - } - self->errors = errors; - - return 0; -} - -static PyObject * -UnicodeResultProcessor_process(UnicodeResultProcessor *self, PyObject *value) -{ - const char *encoding, *errors; - char *str; - Py_ssize_t len; - - if (value == Py_None) - Py_RETURN_NONE; - -#if PY_MAJOR_VERSION >= 3 - if (PyBytes_AsStringAndSize(value, &str, &len)) - return NULL; - - encoding = PyBytes_AS_STRING(self->encoding); - errors = PyBytes_AS_STRING(self->errors); -#else - if (PyString_AsStringAndSize(value, &str, &len)) - return NULL; - - encoding = PyString_AS_STRING(self->encoding); - errors = PyString_AS_STRING(self->errors); -#endif - - return PyUnicode_Decode(str, len, encoding, errors); -} - -static PyObject * -UnicodeResultProcessor_conditional_process(UnicodeResultProcessor *self, PyObject *value) -{ - const char *encoding, *errors; - char *str; - Py_ssize_t len; - - if (value == Py_None) - Py_RETURN_NONE; - -#if PY_MAJOR_VERSION >= 3 - if (PyUnicode_Check(value) == 1) { - Py_INCREF(value); - return value; - } - - if (PyBytes_AsStringAndSize(value, &str, &len)) - return NULL; - - encoding = PyBytes_AS_STRING(self->encoding); - errors = PyBytes_AS_STRING(self->errors); -#else - - if (PyUnicode_Check(value) == 1) { - Py_INCREF(value); - return value; - } - - if (PyString_AsStringAndSize(value, &str, &len)) - return NULL; - - - encoding = PyString_AS_STRING(self->encoding); - errors = PyString_AS_STRING(self->errors); -#endif - - return PyUnicode_Decode(str, len, encoding, errors); -} - -static void -UnicodeResultProcessor_dealloc(UnicodeResultProcessor *self) -{ - Py_XDECREF(self->encoding); - Py_XDECREF(self->errors); -#if PY_MAJOR_VERSION >= 3 - Py_TYPE(self)->tp_free((PyObject*)self); -#else - self->ob_type->tp_free((PyObject*)self); -#endif -} - -static PyMethodDef UnicodeResultProcessor_methods[] = { - {"process", (PyCFunction)UnicodeResultProcessor_process, METH_O, - "The value processor itself."}, - {"conditional_process", (PyCFunction)UnicodeResultProcessor_conditional_process, METH_O, - "Conditional version of the value processor."}, - {NULL} /* Sentinel */ -}; - -static PyTypeObject UnicodeResultProcessorType = { - PyVarObject_HEAD_INIT(NULL, 0) - "sqlalchemy.cprocessors.UnicodeResultProcessor", /* tp_name */ - sizeof(UnicodeResultProcessor), /* tp_basicsize */ - 0, /* tp_itemsize */ - (destructor)UnicodeResultProcessor_dealloc, /* tp_dealloc */ - 0, /* tp_print */ - 0, /* tp_getattr */ - 0, /* tp_setattr */ - 0, /* tp_compare */ - 0, /* tp_repr */ - 0, /* tp_as_number */ - 0, /* tp_as_sequence */ - 0, /* tp_as_mapping */ - 0, /* tp_hash */ - 0, /* tp_call */ - 0, /* tp_str */ - 0, /* tp_getattro */ - 0, /* tp_setattro */ - 0, /* tp_as_buffer */ - Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */ - "UnicodeResultProcessor objects", /* tp_doc */ - 0, /* tp_traverse */ - 0, /* tp_clear */ - 0, /* tp_richcompare */ - 0, /* tp_weaklistoffset */ - 0, /* tp_iter */ - 0, /* tp_iternext */ - UnicodeResultProcessor_methods, /* tp_methods */ - 0, /* tp_members */ - 0, /* tp_getset */ - 0, /* tp_base */ - 0, /* tp_dict */ - 0, /* tp_descr_get */ - 0, /* tp_descr_set */ - 0, /* tp_dictoffset */ - (initproc)UnicodeResultProcessor_init, /* tp_init */ - 0, /* tp_alloc */ - 0, /* tp_new */ -}; /************************** * DecimalResultProcessor * @@ -664,10 +484,6 @@ initcprocessors(void) { PyObject *m; - UnicodeResultProcessorType.tp_new = PyType_GenericNew; - if (PyType_Ready(&UnicodeResultProcessorType) < 0) - INITERROR; - DecimalResultProcessorType.tp_new = PyType_GenericNew; if (PyType_Ready(&DecimalResultProcessorType) < 0) INITERROR; @@ -682,10 +498,6 @@ initcprocessors(void) PyDateTime_IMPORT; - Py_INCREF(&UnicodeResultProcessorType); - PyModule_AddObject(m, "UnicodeResultProcessor", - (PyObject *)&UnicodeResultProcessorType); - Py_INCREF(&DecimalResultProcessorType); PyModule_AddObject(m, "DecimalResultProcessor", (PyObject *)&DecimalResultProcessorType); diff --git a/lib/sqlalchemy/connectors/pyodbc.py b/lib/sqlalchemy/connectors/pyodbc.py index c2bbdf7ce9..9661015ad3 100644 --- a/lib/sqlalchemy/connectors/pyodbc.py +++ b/lib/sqlalchemy/connectors/pyodbc.py @@ -18,9 +18,6 @@ class PyODBCConnector(Connector): supports_sane_rowcount_returning = True supports_sane_multi_rowcount = False - supports_unicode_statements = True - supports_unicode_binds = True - supports_native_decimal = True default_paramstyle = "named" @@ -30,12 +27,8 @@ class PyODBCConnector(Connector): # hold the desired driver name pyodbc_driver_name = None - def __init__( - self, supports_unicode_binds=None, use_setinputsizes=False, **kw - ): + def __init__(self, use_setinputsizes=False, **kw): super(PyODBCConnector, self).__init__(**kw) - if supports_unicode_binds is not None: - self.supports_unicode_binds = supports_unicode_binds self.use_setinputsizes = use_setinputsizes @classmethod diff --git a/lib/sqlalchemy/dialects/mssql/base.py b/lib/sqlalchemy/dialects/mssql/base.py index 8c8260f3bb..f8ca1ffbf7 100644 --- a/lib/sqlalchemy/dialects/mssql/base.py +++ b/lib/sqlalchemy/dialects/mssql/base.py @@ -1603,17 +1603,12 @@ class MSExecutionContext(default.DefaultExecutionContext): def _opt_encode(self, statement): - if not self.dialect.supports_unicode_statements: - encoded = self.dialect._encoder(statement)[0] - else: - encoded = statement - if self.compiled and self.compiled.schema_translate_map: rst = self.compiled.preparer._render_schema_translates - encoded = rst(encoded, self.compiled.schema_translate_map) + statement = rst(statement, self.compiled.schema_translate_map) - return encoded + return statement def pre_exec(self): """Activate IDENTITY_INSERT if needed.""" diff --git a/lib/sqlalchemy/dialects/mysql/mysqlconnector.py b/lib/sqlalchemy/dialects/mysql/mysqlconnector.py index e17da31745..fef4f14ca0 100644 --- a/lib/sqlalchemy/dialects/mysql/mysqlconnector.py +++ b/lib/sqlalchemy/dialects/mysql/mysqlconnector.py @@ -27,7 +27,6 @@ from .base import BIT from .base import MySQLCompiler from .base import MySQLDialect from .base import MySQLIdentifierPreparer -from ... import processors from ... import util @@ -87,8 +86,6 @@ class MySQLDialect_mysqlconnector(MySQLDialect): driver = "mysqlconnector" supports_statement_cache = True - supports_unicode_binds = True - supports_sane_rowcount = True supports_sane_multi_rowcount = True @@ -101,29 +98,6 @@ class MySQLDialect_mysqlconnector(MySQLDialect): colspecs = util.update_copy(MySQLDialect.colspecs, {BIT: _myconnpyBIT}) - def __init__(self, *arg, **kw): - super(MySQLDialect_mysqlconnector, self).__init__(*arg, **kw) - - # hack description encoding since mysqlconnector randomly - # returns bytes or not - self._description_decoder = ( - processors.to_conditional_unicode_processor_factory - )(self.description_encoding) - - def _check_unicode_description(self, connection): - # hack description encoding since mysqlconnector randomly - # returns bytes or not - return False - - @property - def description_encoding(self): - # total guess - return "latin-1" - - @util.memoized_property - def supports_unicode_statements(self): - return util.py3k or self._mysqlconnector_version_info > (2, 0) - @classmethod def dbapi(cls): from mysql import connector diff --git a/lib/sqlalchemy/dialects/mysql/pymysql.py b/lib/sqlalchemy/dialects/mysql/pymysql.py index 1d2c3be2d7..3c30fb9ea1 100644 --- a/lib/sqlalchemy/dialects/mysql/pymysql.py +++ b/lib/sqlalchemy/dialects/mysql/pymysql.py @@ -48,12 +48,6 @@ class MySQLDialect_pymysql(MySQLDialect_mysqldb): description_encoding = None - # generally, these two values should be both True - # or both False. PyMySQL unicode tests pass all the way back - # to 0.4 either way. See [ticket:3337] - supports_unicode_statements = True - supports_unicode_binds = True - @langhelpers.memoized_property def supports_server_side_cursors(self): try: diff --git a/lib/sqlalchemy/dialects/oracle/base.py b/lib/sqlalchemy/dialects/oracle/base.py index 5a43205dff..229a54b955 100644 --- a/lib/sqlalchemy/dialects/oracle/base.py +++ b/lib/sqlalchemy/dialects/oracle/base.py @@ -10,7 +10,7 @@ r""" :name: Oracle :full_support: 11.2, 18c :normal_support: 11+ - :best_effort: 8+ + :best_effort: 9+ Auto Increment Behavior @@ -341,6 +341,9 @@ and specify "passive_updates=False" on each relationship(). Oracle 8 Compatibility ---------------------- +.. warning:: The status of Oracle 8 compatibility is not known for SQLAlchemy + 2.0. + When Oracle 8 is detected, the dialect internally configures itself to the following behaviors: @@ -349,16 +352,12 @@ following behaviors: makes use of Oracle's (+) operator. * the NVARCHAR2 and NCLOB datatypes are no longer generated as DDL when - the :class:`~sqlalchemy.types.Unicode` is used - VARCHAR2 and CLOB are - issued instead. This because these types don't seem to work correctly on - Oracle 8 even though they are available. The - :class:`~sqlalchemy.types.NVARCHAR` and + the :class:`~sqlalchemy.types.Unicode` is used - VARCHAR2 and CLOB are issued + instead. This because these types don't seem to work correctly on Oracle 8 + even though they are available. The :class:`~sqlalchemy.types.NVARCHAR` and :class:`~sqlalchemy.dialects.oracle.NCLOB` types will always generate NVARCHAR2 and NCLOB. -* the "native unicode" mode is disabled when using cx_oracle, i.e. SQLAlchemy - encodes all Python unicode objects to "string" before passing in as bind - parameters. Synonym/DBLINK Reflection ------------------------- @@ -1439,8 +1438,6 @@ class OracleDialect(default.DefaultDialect): name = "oracle" supports_statement_cache = True supports_alter = True - supports_unicode_statements = False - supports_unicode_binds = False max_identifier_length = 128 supports_simple_order_by_label = False @@ -1576,17 +1573,6 @@ class OracleDialect(default.DefaultDialect): # use the default return None - def _check_unicode_returns(self, connection): - additional_tests = [ - expression.cast( - expression.literal_column("'test nvarchar2 returns'"), - sqltypes.NVARCHAR(60), - ) - ] - return super(OracleDialect, self)._check_unicode_returns( - connection, additional_tests - ) - _isolation_lookup = ["READ COMMITTED", "SERIALIZABLE"] def get_isolation_level(self, connection): diff --git a/lib/sqlalchemy/dialects/oracle/cx_oracle.py b/lib/sqlalchemy/dialects/oracle/cx_oracle.py index 590c9d47c6..23f619a125 100644 --- a/lib/sqlalchemy/dialects/oracle/cx_oracle.py +++ b/lib/sqlalchemy/dialects/oracle/cx_oracle.py @@ -119,7 +119,7 @@ itself. These options are always passed directly to :func:`_sa.create_engine` , such as:: e = create_engine( - "oracle+cx_oracle://user:pass@dsn", coerce_to_unicode=False) + "oracle+cx_oracle://user:pass@dsn", coerce_to_decimal=False) The parameters accepted by the cx_oracle dialect are as follows: @@ -130,8 +130,6 @@ The parameters accepted by the cx_oracle dialect are as follows: * ``auto_convert_lobs`` - defaults to True; See :ref:`cx_oracle_lob`. -* ``coerce_to_unicode`` - see :ref:`cx_oracle_unicode` for detail. - * ``coerce_to_decimal`` - see :ref:`cx_oracle_numeric` for detail. * ``encoding_errors`` - see :ref:`cx_oracle_unicode_encoding_errors` for detail. @@ -210,8 +208,7 @@ Unicode ------- As is the case for all DBAPIs under Python 3, all strings are inherently -Unicode strings. Under Python 2, cx_Oracle also supports Python Unicode -objects directly. In all cases however, the driver requires an explicit +Unicode strings. In all cases however, the driver requires an explicit encoding configuration. Ensuring the Correct Client Encoding @@ -264,25 +261,6 @@ SQLAlchemy dialect to use NCHAR/NCLOB for the :class:`.Unicode` / unless the ``use_nchar_for_unicode=True`` is passed to the dialect when :func:`_sa.create_engine` is called. -Unicode Coercion of result rows under Python 2 -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -When result sets are fetched that include strings, under Python 3 the cx_Oracle -DBAPI returns all strings as Python Unicode objects, since Python 3 only has a -Unicode string type. This occurs for data fetched from datatypes such as -VARCHAR2, CHAR, CLOB, NCHAR, NCLOB, etc. In order to provide cross- -compatibility under Python 2, the SQLAlchemy cx_Oracle dialect will add -Unicode-conversion to string data under Python 2 as well. Historically, this -made use of converters that were supplied by cx_Oracle but were found to be -non-performant; SQLAlchemy's own converters are used for the string to Unicode -conversion under Python 2. To disable the Python 2 Unicode conversion for -VARCHAR2, CHAR, and CLOB, the flag ``coerce_to_unicode=False`` can be passed to -:func:`_sa.create_engine`. - -.. versionchanged:: 1.3 Unicode conversion is applied to all string values - by default under python 2. The ``coerce_to_unicode`` now defaults to True - and can be set to False to disable the Unicode coercion of strings that are - delivered as VARCHAR2/CHAR/CLOB data. .. _cx_oracle_unicode_encoding_errors: @@ -855,9 +833,6 @@ class OracleDialect_cx_oracle(OracleDialect): supports_sane_rowcount = True supports_sane_multi_rowcount = True - supports_unicode_statements = True - supports_unicode_binds = True - use_setinputsizes = True driver = "cx_oracle" @@ -892,6 +867,8 @@ class OracleDialect_cx_oracle(OracleDialect): _cx_oracle_threaded = None + _cursor_var_unicode_kwargs = util.immutabledict() + @util.deprecated_params( threaded=( "1.3", @@ -906,7 +883,6 @@ class OracleDialect_cx_oracle(OracleDialect): def __init__( self, auto_convert_lobs=True, - coerce_to_unicode=True, coerce_to_decimal=True, arraysize=50, encoding_errors=None, @@ -917,10 +893,13 @@ class OracleDialect_cx_oracle(OracleDialect): OracleDialect.__init__(self, **kwargs) self.arraysize = arraysize self.encoding_errors = encoding_errors + if encoding_errors: + self._cursor_var_unicode_kwargs = { + "encodingErrors": encoding_errors + } if threaded is not None: self._cx_oracle_threaded = threaded self.auto_convert_lobs = auto_convert_lobs - self.coerce_to_unicode = coerce_to_unicode self.coerce_to_decimal = coerce_to_decimal if self._use_nchar_for_unicode: self.colspecs = self.colspecs.copy() @@ -939,6 +918,13 @@ class OracleDialect_cx_oracle(OracleDialect): "cx_Oracle version 5.2 and above are supported" ) + if encoding_errors and self.cx_oracle_ver < (6, 4): + util.warn( + "cx_oracle version %r does not support encodingErrors" + % (self.cx_oracle_ver,) + ) + self._cursor_var_unicode_kwargs = util.immutabledict() + self._include_setinputsizes = { cx_Oracle.DATETIME, cx_Oracle.NCLOB, @@ -974,19 +960,6 @@ class OracleDialect_cx_oracle(OracleDialect): self._is_cx_oracle_6 = self.cx_oracle_ver >= (6,) - @property - def _cursor_var_unicode_kwargs(self): - if self.encoding_errors: - if self.cx_oracle_ver >= (6, 4): - return {"encodingErrors": self.encoding_errors} - else: - util.warn( - "cx_oracle version %r does not support encodingErrors" - % (self.cx_oracle_ver,) - ) - - return {} - def _parse_cx_oracle_ver(self, version): m = re.match(r"(\d+)\.(\d+)(?:\.(\d+))?", version) if m: @@ -1002,9 +975,6 @@ class OracleDialect_cx_oracle(OracleDialect): def initialize(self, connection): super(OracleDialect_cx_oracle, self).initialize(connection) - if self._is_oracle_8: - self.supports_unicode_binds = False - self._detect_decimal_char(connection) def get_isolation_level(self, connection): @@ -1141,9 +1111,10 @@ class OracleDialect_cx_oracle(OracleDialect): cursor, name, default_type, size, precision, scale ) - # allow all strings to come back natively as Unicode + # if unicode options were specified, add a decoder, otherwise + # cx_Oracle should return Unicode elif ( - dialect.coerce_to_unicode + dialect._cursor_var_unicode_kwargs and default_type in ( cx_Oracle.STRING, @@ -1338,13 +1309,6 @@ class OracleDialect_cx_oracle(OracleDialect): if dbtype ) - if not self.supports_unicode_binds: - # oracle 8 only - collection = ( - (self.dialect._encoder(key)[0], dbtype) - for key, dbtype in collection - ) - cursor.setinputsizes(**{key: dbtype for key, dbtype in collection}) def do_recover_twophase(self, connection): diff --git a/lib/sqlalchemy/dialects/postgresql/asyncpg.py b/lib/sqlalchemy/dialects/postgresql/asyncpg.py index fedc0b495b..28374ed60d 100644 --- a/lib/sqlalchemy/dialects/postgresql/asyncpg.py +++ b/lib/sqlalchemy/dialects/postgresql/asyncpg.py @@ -863,11 +863,8 @@ class PGDialect_asyncpg(PGDialect): driver = "asyncpg" supports_statement_cache = True - supports_unicode_statements = True supports_server_side_cursors = True - supports_unicode_binds = True - default_paramstyle = "format" supports_sane_multi_rowcount = False execution_ctx_cls = PGExecutionContext_asyncpg diff --git a/lib/sqlalchemy/dialects/postgresql/psycopg2.py b/lib/sqlalchemy/dialects/postgresql/psycopg2.py index 162ddde949..aadd110598 100644 --- a/lib/sqlalchemy/dialects/postgresql/psycopg2.py +++ b/lib/sqlalchemy/dialects/postgresql/psycopg2.py @@ -40,13 +40,6 @@ may be passed to :func:`_sa.create_engine()`, and include the following: :ref:`psycopg2_unicode` -* ``use_native_unicode``: Under Python 2 only, this can be set to False to - disable the use of psycopg2's native Unicode support. - - .. seealso:: - - :ref:`psycopg2_disable_native_unicode` - * ``executemany_mode``, ``executemany_batch_page_size``, ``executemany_values_page_size``: Allows use of psycopg2 @@ -295,10 +288,7 @@ size defaults to 100. These can be affected by passing new values to Unicode with Psycopg2 ---------------------- -The psycopg2 DBAPI driver supports Unicode data transparently. Under Python 2 -only, the SQLAlchemy psycopg2 dialect will enable the -``psycopg2.extensions.UNICODE`` extension by default to ensure Unicode is -handled properly; under Python 3, this is psycopg2's default behavior. +The psycopg2 DBAPI driver supports Unicode data transparently. The client character encoding can be controlled for the psycopg2 dialect in the following ways: @@ -347,21 +337,6 @@ in the following ways: # encoding client_encoding = utf8 -.. _psycopg2_disable_native_unicode: - -Disabling Native Unicode -^^^^^^^^^^^^^^^^^^^^^^^^ - -Under Python 2 only, SQLAlchemy can also be instructed to skip the usage of the -psycopg2 ``UNICODE`` extension and to instead utilize its own unicode -encode/decode services, which are normally reserved only for those DBAPIs that -don't fully support unicode directly. Passing ``use_native_unicode=False`` to -:func:`_sa.create_engine` will disable usage of ``psycopg2.extensions. -UNICODE``. SQLAlchemy will instead encode data itself into Python bytestrings -on the way in and coerce from bytes on the way back, using the value of the -:func:`_sa.create_engine` ``encoding`` parameter, which defaults to ``utf-8``. -SQLAlchemy's own unicode encode/decode functionality is steadily becoming -obsolete as most DBAPIs now support unicode fully. Transactions @@ -659,10 +634,6 @@ class PGDialect_psycopg2(PGDialect): _has_native_hstore = True - engine_config_types = PGDialect.engine_config_types.union( - {"use_native_unicode": util.asbool} - ) - colspecs = util.update_copy( PGDialect.colspecs, { @@ -678,7 +649,6 @@ class PGDialect_psycopg2(PGDialect): def __init__( self, - use_native_unicode=True, client_encoding=None, use_native_hstore=True, use_native_uuid=True, @@ -688,16 +658,10 @@ class PGDialect_psycopg2(PGDialect): **kwargs ): PGDialect.__init__(self, **kwargs) - self.use_native_unicode = use_native_unicode - if not use_native_unicode: - raise exc.ArgumentError( - "psycopg2 native_unicode mode is required under Python 3" - ) if not use_native_hstore: self._has_native_hstore = False self.use_native_hstore = use_native_hstore self.use_native_uuid = use_native_uuid - self.supports_unicode_binds = use_native_unicode self.client_encoding = client_encoding # Parse executemany_mode argument, allowing it to be only one of the @@ -892,8 +856,6 @@ class PGDialect_psycopg2(PGDialect): executemany_values = ( "(%s)" % context.compiled.insert_single_values_expr ) - if not self.supports_unicode_statements: - executemany_values = executemany_values.encode(self.encoding) # guard for statement that was altered via event hook or similar if executemany_values not in statement: diff --git a/lib/sqlalchemy/dialects/sqlite/base.py b/lib/sqlalchemy/dialects/sqlite/base.py index e936c9080a..dc8425859e 100644 --- a/lib/sqlalchemy/dialects/sqlite/base.py +++ b/lib/sqlalchemy/dialects/sqlite/base.py @@ -1795,8 +1795,6 @@ class SQLiteExecutionContext(default.DefaultExecutionContext): class SQLiteDialect(default.DefaultDialect): name = "sqlite" supports_alter = False - supports_unicode_statements = True - supports_unicode_binds = True # SQlite supports "DEFAULT VALUES" but *does not* support # "VALUES (DEFAULT)" diff --git a/lib/sqlalchemy/engine/create.py b/lib/sqlalchemy/engine/create.py index bb657202ff..e6da1d8e64 100644 --- a/lib/sqlalchemy/engine/create.py +++ b/lib/sqlalchemy/engine/create.py @@ -95,21 +95,6 @@ def create_engine(url, **kwargs): additional keyword arguments. See the example at :ref:`custom_dbapi_args`. - :param convert_unicode=False: if set to True, causes - all :class:`.String` datatypes to act as though the - :paramref:`.String.convert_unicode` flag has been set to ``True``, - regardless of a setting of ``False`` on an individual :class:`.String` - type. This has the effect of causing all :class:`.String` -based - columns to accommodate Python Unicode objects directly as though the - datatype were the :class:`.Unicode` type. - - .. deprecated:: 1.3 - - The :paramref:`_sa.create_engine.convert_unicode` parameter - is deprecated and will be removed in a future release. - All modern DBAPIs now support Python Unicode directly and this - parameter is unnecessary. - :param creator: a callable which returns a DBAPI connection. This creation function will be passed to the underlying connection pool and will be used to create all new database @@ -169,63 +154,6 @@ def create_engine(url, **kwargs): :ref:`change_4737` - :param encoding: **legacy Python 2 value only, where it only applies to - specific DBAPIs, not used in Python 3 for any modern DBAPI driver. - Please refer to individual dialect documentation for client encoding - behaviors.** Defaults to the string value ``utf-8``. This value - refers **only** to the character encoding that is used when SQLAlchemy - sends or receives data from a :term:`DBAPI` that does not support - Python Unicode and **is only used under Python 2**, only for certain - DBAPI drivers, and only in certain circumstances. **Python 3 users - please DISREGARD this parameter and refer to the documentation for the - specific dialect in use in order to configure character encoding - behavior.** - - .. note:: The ``encoding`` parameter deals only with in-Python - encoding issues that were prevalent with **some DBAPIS only** - under **Python 2 only**. Under Python 3 it is not used by - any modern dialect. For DBAPIs that require - client encoding configurations, which are most of those outside - of SQLite, please consult specific :ref:`dialect documentation - ` for details. - - All modern DBAPIs that work in Python 3 necessarily feature direct - support for Python unicode strings. Under Python 2, this was not - always the case. For those scenarios where the DBAPI is detected as - not supporting a Python ``unicode`` object under Python 2, this - encoding is used to determine the source/destination encoding. It is - **not used** for those cases where the DBAPI handles unicode directly. - - To properly configure a system to accommodate Python ``unicode`` - objects, the DBAPI should be configured to handle unicode to the - greatest degree as is appropriate - see the notes on unicode pertaining - to the specific target database in use at :ref:`dialect_toplevel`. - - Areas where string encoding may need to be accommodated - outside of the DBAPI, nearly always under **Python 2 only**, - include zero or more of: - - * the values passed to bound parameters, corresponding to - the :class:`.Unicode` type or the :class:`.String` type - when ``convert_unicode`` is ``True``; - * the values returned in result set columns corresponding - to the :class:`.Unicode` type or the :class:`.String` - type when ``convert_unicode`` is ``True``; - * the string SQL statement passed to the DBAPI's - ``cursor.execute()`` method; - * the string names of the keys in the bound parameter - dictionary passed to the DBAPI's ``cursor.execute()`` - as well as ``cursor.setinputsizes()`` methods; - * the string column names retrieved from the DBAPI's - ``cursor.description`` attribute. - - When using Python 3, the DBAPI is required to support all of the above - values as Python ``unicode`` objects, which in Python 3 are just known - as ``str``. In Python 2, the DBAPI does not specify unicode behavior - at all, so SQLAlchemy must make decisions for each of the above values - on a per-DBAPI basis - implementations are completely inconsistent in - their behavior. - :param execution_options: Dictionary execution options which will be applied to all connections. See :meth:`~sqlalchemy.engine.Connection.execution_options` diff --git a/lib/sqlalchemy/engine/default.py b/lib/sqlalchemy/engine/default.py index 9a59250e91..d670cf2311 100644 --- a/lib/sqlalchemy/engine/default.py +++ b/lib/sqlalchemy/engine/default.py @@ -13,7 +13,6 @@ as the base class for their own corresponding classes. """ -import codecs import functools import random import re @@ -26,7 +25,6 @@ from .base import Connection from .. import event from .. import exc from .. import pool -from .. import processors from .. import types as sqltypes from .. import util from ..sql import compiler @@ -92,7 +90,6 @@ class DefaultDialect(interfaces.Dialect): engine_config_types = util.immutabledict( [ - ("convert_unicode", util.bool_or_str("force")), ("pool_timeout", util.asint), ("echo", util.bool_or_str("debug")), ("echo_pool", util.bool_or_str("debug")), @@ -108,9 +105,6 @@ class DefaultDialect(interfaces.Dialect): # *not* the FLOAT type however. supports_native_decimal = False - supports_unicode_statements = True - supports_unicode_binds = True - returns_unicode_strings = sqltypes.String.RETURNS_UNICODE description_encoding = None name = "default" @@ -223,13 +217,6 @@ class DefaultDialect(interfaces.Dialect): NO_DIALECT_SUPPORT = NO_DIALECT_SUPPORT @util.deprecated_params( - convert_unicode=( - "1.3", - "The :paramref:`_sa.create_engine.convert_unicode` parameter " - "and corresponding dialect-level parameters are deprecated, " - "and will be removed in a future release. Modern DBAPIs support " - "Python Unicode natively and this parameter is unnecessary.", - ), empty_in_strategy=( "1.4", "The :paramref:`_sa.create_engine.empty_in_strategy` keyword is " @@ -250,7 +237,6 @@ class DefaultDialect(interfaces.Dialect): ) def __init__( self, - convert_unicode=False, encoding="utf-8", paramstyle=None, dbapi=None, @@ -279,7 +265,6 @@ class DefaultDialect(interfaces.Dialect): else: self.server_side_cursors = True - self.convert_unicode = convert_unicode self.encoding = encoding self.positional = False self._ischema = None @@ -305,16 +290,6 @@ class DefaultDialect(interfaces.Dialect): ) self.label_length = label_length self.compiler_linting = compiler_linting - if self.description_encoding == "use_encoding": - self._description_decoder = ( - processors.to_unicode_processor_factory - )(encoding) - elif self.description_encoding is not None: - self._description_decoder = ( - processors.to_unicode_processor_factory - )(self.description_encoding) - self._encoder = codecs.getencoder(self.encoding) - self._decoder = processors.to_unicode_processor_factory(self.encoding) def _ensure_has_table_connection(self, arg): @@ -391,12 +366,6 @@ class DefaultDialect(interfaces.Dialect): except NotImplementedError: self.default_isolation_level = None - if ( - self.description_encoding is not None - and self._check_unicode_description(connection) - ): - self._description_decoder = self.description_encoding = None - if not self._user_defined_max_identifier_length: max_ident_length = self._check_max_identifier_length(connection) if max_ident_length: @@ -444,22 +413,6 @@ class DefaultDialect(interfaces.Dialect): """ return self.get_isolation_level(dbapi_conn) - def _check_unicode_description(self, connection): - cast_to = util.text_type - - cursor = connection.connection.cursor() - try: - cursor.execute( - cast_to( - expression.select( - expression.literal_column("'x'").label("some_label") - ).compile(dialect=self) - ) - ) - return isinstance(cursor.description[0][0], util.text_type) - finally: - cursor.close() - def type_descriptor(self, typeobj): """Provide a database-specific :class:`.TypeEngine` object, given the generic object which comes from the types module. @@ -790,10 +743,7 @@ class DefaultExecutionContext(interfaces.ExecutionContext): self.unicode_statement, schema_translate_map ) - if not dialect.supports_unicode_statements: - self.statement = dialect._encoder(self.unicode_statement)[0] - else: - self.statement = self.unicode_statement + self.statement = self.unicode_statement self.cursor = self.create_cursor() self.compiled_parameters = [] @@ -913,12 +863,7 @@ class DefaultExecutionContext(interfaces.ExecutionContext): # final self.unicode_statement is now assigned, encode if needed # by dialect - if not dialect.supports_unicode_statements: - self.statement = self.unicode_statement.encode( - self.dialect.encoding - ) - else: - self.statement = self.unicode_statement + self.statement = self.unicode_statement # Convert the dictionary of bind parameter values # into a dict or list to be sent to the DBAPI's @@ -934,25 +879,14 @@ class DefaultExecutionContext(interfaces.ExecutionContext): ] parameters.append(dialect.execute_sequence_format(param)) else: - encode = not dialect.supports_unicode_statements - if encode: - encoder = dialect._encoder for compiled_params in self.compiled_parameters: - if encode: - param = { - encoder(key)[0]: processors[key](compiled_params[key]) - if key in processors - else compiled_params[key] - for key in compiled_params - } - else: - param = { - key: processors[key](compiled_params[key]) - if key in processors - else compiled_params[key] - for key in compiled_params - } + param = { + key: processors[key](compiled_params[key]) + if key in processors + else compiled_params[key] + for key in compiled_params + } parameters.append(param) @@ -988,13 +922,7 @@ class DefaultExecutionContext(interfaces.ExecutionContext): elif isinstance(parameters[0], dialect.execute_sequence_format): self.parameters = parameters elif isinstance(parameters[0], dict): - if dialect.supports_unicode_statements: - self.parameters = parameters - else: - self.parameters = [ - {dialect._encoder(k)[0]: d[k] for k in d} - for d in parameters - ] or [{}] + self.parameters = parameters else: self.parameters = [ dialect.execute_sequence_format(p) for p in parameters @@ -1002,13 +930,7 @@ class DefaultExecutionContext(interfaces.ExecutionContext): self.executemany = len(parameters) > 1 - if not dialect.supports_unicode_statements and isinstance( - statement, util.text_type - ): - self.unicode_statement = statement - self.statement = dialect._encoder(statement)[0] - else: - self.statement = self.unicode_statement = statement + self.statement = self.unicode_statement = statement self.cursor = self.create_cursor() return self @@ -1101,11 +1023,6 @@ class DefaultExecutionContext(interfaces.ExecutionContext): """ conn = self.root_connection - if ( - isinstance(stmt, util.text_type) - and not self.dialect.supports_unicode_statements - ): - stmt = self.dialect._encoder(stmt)[0] if "schema_translate_map" in self.execution_options: schema_translate_map = self.execution_options.get( diff --git a/lib/sqlalchemy/processors.py b/lib/sqlalchemy/processors.py index 0c0aa1bd6c..156005c6a9 100644 --- a/lib/sqlalchemy/processors.py +++ b/lib/sqlalchemy/processors.py @@ -13,7 +13,6 @@ They all share one common characteristic: None is passed through unchanged. """ -import codecs import datetime import re @@ -64,36 +63,6 @@ def str_to_datetime_processor_factory(regexp, type_): def py_fallback(): - def to_unicode_processor_factory(encoding, errors=None): - decoder = codecs.getdecoder(encoding) - - def process(value): - if value is None: - return None - else: - # decoder returns a tuple: (value, len). Simply dropping the - # len part is safe: it is done that way in the normal - # 'xx'.decode(encoding) code path. - return decoder(value, errors)[0] - - return process - - def to_conditional_unicode_processor_factory(encoding, errors=None): - decoder = codecs.getdecoder(encoding) - - def process(value): - if value is None: - return None - elif isinstance(value, util.text_type): - return value - else: - # decoder returns a tuple: (value, len). Simply dropping the - # len part is safe: it is done that way in the normal - # 'xx'.decode(encoding) code path. - return decoder(value, errors)[0] - - return process - def to_decimal_processor_factory(target_class, scale): fstring = "%%.%df" % scale @@ -149,19 +118,6 @@ try: from sqlalchemy.cprocessors import str_to_time # noqa from sqlalchemy.cprocessors import to_float # noqa from sqlalchemy.cprocessors import to_str # noqa - from sqlalchemy.cprocessors import UnicodeResultProcessor # noqa - - def to_unicode_processor_factory(encoding, errors=None): - if errors is not None: - return UnicodeResultProcessor(encoding, errors).process - else: - return UnicodeResultProcessor(encoding).process - - def to_conditional_unicode_processor_factory(encoding, errors=None): - if errors is not None: - return UnicodeResultProcessor(encoding, errors).conditional_process - else: - return UnicodeResultProcessor(encoding).conditional_process def to_decimal_processor_factory(target_class, scale): # Note that the scale argument is not taken into account for integer diff --git a/lib/sqlalchemy/sql/schema.py b/lib/sqlalchemy/sql/schema.py index 89d3168a62..641e62be3e 100644 --- a/lib/sqlalchemy/sql/schema.py +++ b/lib/sqlalchemy/sql/schema.py @@ -1639,13 +1639,6 @@ class Column(DialectKWArgs, SchemaItem, ColumnClause): if isinstance(self.default, (ColumnDefault, Sequence)): args.append(self.default) else: - if getattr(self.type, "_warn_on_bytestring", False): - if isinstance(self.default, util.binary_type): - util.warn( - "Unicode column '%s' has non-unicode " - "default value %r specified." - % (self.key, self.default) - ) args.append(ColumnDefault(self.default)) if self.server_default is not None: diff --git a/lib/sqlalchemy/sql/sqltypes.py b/lib/sqlalchemy/sql/sqltypes.py index 0d7a06e313..5599460728 100644 --- a/lib/sqlalchemy/sql/sqltypes.py +++ b/lib/sqlalchemy/sql/sqltypes.py @@ -9,7 +9,6 @@ """ -import codecs import datetime as dt import decimal import json @@ -127,9 +126,7 @@ class String(Concatenable, TypeEngine): """The base for all string and character types. - In SQL, corresponds to VARCHAR. Can also take Python unicode objects - and encode to the database's encoding in bind params (and the reverse for - result sets.) + In SQL, corresponds to VARCHAR. The `length` field is usually required when the `String` type is used within a CREATE TABLE statement, as VARCHAR requires a length @@ -139,91 +136,10 @@ class String(Concatenable, TypeEngine): __visit_name__ = "string" - RETURNS_UNICODE = util.symbol( - "RETURNS_UNICODE", - """Indicates that the DBAPI returns Python Unicode for VARCHAR, - NVARCHAR, and other character-based datatypes in all cases. - - This is the default value for - :attr:`.DefaultDialect.returns_unicode_strings` under Python 3. - - .. versionadded:: 1.4 - - """, - ) - - RETURNS_BYTES = util.symbol( - "RETURNS_BYTES", - """Indicates that the DBAPI returns byte objects under Python 3 - or non-Unicode string objects under Python 2 for VARCHAR, NVARCHAR, - and other character-based datatypes in all cases. - - This may be applied to the - :attr:`.DefaultDialect.returns_unicode_strings` attribute. - - .. versionadded:: 1.4 - - """, - ) - - RETURNS_CONDITIONAL = util.symbol( - "RETURNS_CONDITIONAL", - """Indicates that the DBAPI may return Unicode or bytestrings for - VARCHAR, NVARCHAR, and other character-based datatypes, and that - SQLAlchemy's default String datatype will need to test on a per-row - basis for Unicode or bytes. - - This may be applied to the - :attr:`.DefaultDialect.returns_unicode_strings` attribute. - - .. versionadded:: 1.4 - - """, - ) - - RETURNS_UNKNOWN = util.symbol( - "RETURNS_UNKNOWN", - """Indicates that the dialect should test on first connect what the - string-returning behavior of character-based datatypes is. - - This is the default value for DefaultDialect.unicode_returns under - Python 2. - - This may be applied to the - :attr:`.DefaultDialect.returns_unicode_strings` attribute under - Python 2 only. The value is disallowed under Python 3. - - .. versionadded:: 1.4 - - .. deprecated:: 1.4 This value will be removed in SQLAlchemy 2.0. - - """, - ) - - @util.deprecated_params( - convert_unicode=( - "1.3", - "The :paramref:`.String.convert_unicode` parameter is deprecated " - "and will be removed in a future release. All modern DBAPIs " - "now support Python Unicode directly and this parameter is " - "unnecessary.", - ), - unicode_error=( - "1.3", - "The :paramref:`.String.unicode_errors` parameter is deprecated " - "and will be removed in a future release. This parameter is " - "unnecessary for modern Python DBAPIs and degrades performance " - "significantly.", - ), - ) def __init__( self, length=None, collation=None, - convert_unicode=False, - unicode_error=None, - _warn_on_bytestring=False, - _expect_unicode=False, ): """ Create a string-holding type. @@ -245,65 +161,17 @@ class String(Concatenable, TypeEngine): >>> print(select(cast('some string', String(collation='utf8')))) SELECT CAST(:param_1 AS VARCHAR COLLATE utf8) AS anon_1 - :param convert_unicode: When set to ``True``, the - :class:`.String` type will assume that - input is to be passed as Python Unicode objects under Python 2, - and results returned as Python Unicode objects. - In the rare circumstance that the DBAPI does not support - Python unicode under Python 2, SQLAlchemy will use its own - encoder/decoder functionality on strings, referring to the - value of the :paramref:`_sa.create_engine.encoding` parameter - parameter passed to :func:`_sa.create_engine` as the encoding. - - For the extremely rare case that Python Unicode - is to be encoded/decoded by SQLAlchemy on a backend - that *does* natively support Python Unicode, - the string value ``"force"`` can be passed here which will - cause SQLAlchemy's encode/decode services to be - used unconditionally. - - .. note:: - - SQLAlchemy's unicode-conversion flags and features only apply - to Python 2; in Python 3, all string objects are Unicode objects. - For this reason, as well as the fact that virtually all modern - DBAPIs now support Unicode natively even under Python 2, - the :paramref:`.String.convert_unicode` flag is inherently a - legacy feature. - .. note:: - In the vast majority of cases, the :class:`.Unicode` or - :class:`.UnicodeText` datatypes should be used for a - :class:`_schema.Column` that expects to store non-ascii data. - These - datatypes will ensure that the correct types are used on the - database side as well as set up the correct Unicode behaviors - under Python 2. - - .. seealso:: - - :paramref:`_sa.create_engine.convert_unicode` - - :class:`_engine.Engine`-wide parameter - - :param unicode_error: Optional, a method to use to handle Unicode - conversion errors. Behaves like the ``errors`` keyword argument to - the standard library's ``string.decode()`` functions, requires - that :paramref:`.String.convert_unicode` is set to - ``"force"`` + In most cases, the :class:`.Unicode` or :class:`.UnicodeText` + datatypes should be used for a :class:`_schema.Column` that expects + to store non-ascii data. These datatypes will ensure that the + correct types are used on the database. """ - if unicode_error is not None and convert_unicode != "force": - raise exc.ArgumentError( - "convert_unicode must be 'force' " "when unicode_error is set." - ) self.length = length self.collation = collation - self._expect_unicode = convert_unicode or _expect_unicode - self._expect_unicode_error = unicode_error - - self._warn_on_bytestring = _warn_on_bytestring def literal_processor(self, dialect): def process(value): @@ -317,100 +185,24 @@ class String(Concatenable, TypeEngine): return process def bind_processor(self, dialect): - if self._expect_unicode or dialect.convert_unicode: - if ( - dialect.supports_unicode_binds - and self._expect_unicode != "force" - ): - if self._warn_on_bytestring: - - def process(value): - if isinstance(value, util.binary_type): - util.warn_limited( - "Unicode type received non-unicode " - "bind param value %r.", - (util.ellipses_string(value),), - ) - return value - - return process - else: - return None - else: - encoder = codecs.getencoder(dialect.encoding) - warn_on_bytestring = self._warn_on_bytestring - - def process(value): - if isinstance(value, util.text_type): - return encoder(value, self._expect_unicode_error)[0] - elif warn_on_bytestring and value is not None: - util.warn_limited( - "Unicode type received non-unicode bind " - "param value %r.", - (util.ellipses_string(value),), - ) - return value - - return process - else: - return None + return None def result_processor(self, dialect, coltype): - wants_unicode = self._expect_unicode or dialect.convert_unicode - needs_convert = wants_unicode and ( - dialect.returns_unicode_strings is not String.RETURNS_UNICODE - or self._expect_unicode in ("force", "force_nocheck") - ) - needs_isinstance = ( - needs_convert - and dialect.returns_unicode_strings - in ( - String.RETURNS_CONDITIONAL, - String.RETURNS_UNICODE, - ) - and self._expect_unicode != "force_nocheck" - ) - if needs_convert: - if needs_isinstance: - return processors.to_conditional_unicode_processor_factory( - dialect.encoding, self._expect_unicode_error - ) - else: - return processors.to_unicode_processor_factory( - dialect.encoding, self._expect_unicode_error - ) - else: - return None + return None @property def python_type(self): - if self._expect_unicode: - return util.text_type - else: - return str + return util.text_type def get_dbapi_type(self, dbapi): return dbapi.STRING - @classmethod - def _warn_deprecated_unicode(cls): - util.warn_deprecated( - "The convert_unicode on Engine and String as well as the " - "unicode_error flag on String are deprecated. All modern " - "DBAPIs now support Python Unicode natively under Python 2, and " - "under Python 3 all strings are inherently Unicode. These flags " - "will be removed in a future release.", - version="1.3", - ) - class Text(String): """A variably sized string type. - In SQL, usually corresponds to CLOB or TEXT. Can also take Python - unicode objects and encode to the database's encoding in bind - params (and the reverse for result sets.) In general, TEXT objects + In SQL, usually corresponds to CLOB or TEXT. In general, TEXT objects do not have a length; while some databases will accept a length argument here, it will be rejected by others. @@ -428,9 +220,7 @@ class Unicode(String): some backends implies an underlying column type that is explicitly supporting of non-ASCII data, such as ``NVARCHAR`` on Oracle and SQL Server. This will impact the output of ``CREATE TABLE`` statements and - ``CAST`` functions at the dialect level, and also in some cases will - indicate different behavior in the DBAPI itself in how it handles bound - parameters. + ``CAST`` functions at the dialect level. The character encoding used by the :class:`.Unicode` type that is used to transmit and receive data to the database is usually determined by the @@ -440,18 +230,10 @@ class Unicode(String): in the :ref:`dialect_toplevel` section. In modern SQLAlchemy, use of the :class:`.Unicode` datatype does not - typically imply any encoding/decoding behavior within SQLAlchemy itself. - Historically, when DBAPIs did not support Python ``unicode`` objects under - Python 2, SQLAlchemy handled unicode encoding/decoding services itself - which would be controlled by the flag :paramref:`.String.convert_unicode`; - this flag is deprecated as it is no longer needed for Python 3. - - When using Python 2, data that is passed to columns that use the - :class:`.Unicode` datatype must be of type ``unicode``, and not ``str`` - which in Python 2 is equivalent to ``bytes``. In Python 3, all data - passed to columns that use the :class:`.Unicode` datatype should be - of type ``str``. See the flag :paramref:`.String.convert_unicode` for - more discussion of unicode encode/decode behavior under Python 2. + imply any encoding/decoding behavior within SQLAlchemy itself. In Python + 3, all string objects are inherently Unicode capable, and SQLAlchemy + does not produce bytestring objects nor does it accommodate a DBAPI that + does not return Python Unicode objects in result sets for string values. .. warning:: Some database backends, particularly SQL Server with pyodbc, are known to have undesirable behaviors regarding data that is noted @@ -466,8 +248,6 @@ class Unicode(String): :class:`.UnicodeText` - unlengthed textual counterpart to :class:`.Unicode`. - :paramref:`.String.convert_unicode` - :meth:`.DialectEvents.do_setinputsizes` @@ -479,13 +259,9 @@ class Unicode(String): """ Create a :class:`.Unicode` object. - Parameters are the same as that of :class:`.String`, - with the exception that ``convert_unicode`` - defaults to ``True``. + Parameters are the same as that of :class:`.String`. """ - kwargs.setdefault("_expect_unicode", True) - kwargs.setdefault("_warn_on_bytestring", True) super(Unicode, self).__init__(length=length, **kwargs) @@ -508,18 +284,11 @@ class UnicodeText(Text): """ Create a Unicode-converting Text type. - Parameters are the same as that of :class:`_expression.TextClause`, - with the exception that ``convert_unicode`` - defaults to ``True``. + Parameters are the same as that of :class:`_expression.TextClause`. """ - kwargs.setdefault("_expect_unicode", True) - kwargs.setdefault("_warn_on_bytestring", True) super(UnicodeText, self).__init__(length=length, **kwargs) - def _warn_deprecated_unicode(self): - pass - class Integer(_LookupExpressionAdapter, TypeEngine): @@ -1306,15 +1075,6 @@ class Enum(Emulated, String, SchemaType): __visit_name__ = "enum" - @util.deprecated_params( - convert_unicode=( - "1.3", - "The :paramref:`.Enum.convert_unicode` parameter is deprecated " - "and will be removed in a future release. All modern DBAPIs " - "now support Python Unicode directly and this parameter is " - "unnecessary.", - ) - ) def __init__(self, *enums, **kw): r"""Construct an enum. @@ -1327,11 +1087,6 @@ class Enum(Emulated, String, SchemaType): .. versionadded:: 1.1 a PEP-435 style enumerated class may be passed. - :param convert_unicode: Enable unicode-aware bind parameter and - result-set processing for this Enum's data under Python 2 only. - Under Python 2, this is set automatically based on the presence of - unicode label strings. This flag will be removed in SQLAlchemy 2.0. - :param create_constraint: defaults to False. When creating a non-native enumerated type, also build a CHECK constraint on the database against the valid values. @@ -1481,14 +1236,8 @@ class Enum(Emulated, String, SchemaType): values, objects = self._parse_into_values(enums, kw) self._setup_for_values(values, objects, kw) - convert_unicode = kw.pop("convert_unicode", None) self.validate_strings = kw.pop("validate_strings", False) - if convert_unicode is None: - _expect_unicode = True - else: - _expect_unicode = convert_unicode - if self.enums: length = max(len(x) for x in self.enums) else: @@ -1504,9 +1253,7 @@ class Enum(Emulated, String, SchemaType): self._valid_lookup[None] = self._object_lookup[None] = None - super(Enum, self).__init__( - length=length, _expect_unicode=_expect_unicode - ) + super(Enum, self).__init__(length=length) if self.enum_class: kw.setdefault("name", self.enum_class.__name__.lower()) @@ -1615,9 +1362,7 @@ class Enum(Emulated, String, SchemaType): op, other_comparator ) if op is operators.concat_op: - typ = String( - self.type.length, _expect_unicode=self.type._expect_unicode - ) + typ = String(self.type.length) return op, typ comparator_factory = Comparator @@ -1659,7 +1404,6 @@ class Enum(Emulated, String, SchemaType): return util.constructor_copy(self, self._generic_type_affinity, *args) def adapt_to_emulated(self, impltype, **kw): - kw.setdefault("_expect_unicode", self._expect_unicode) kw.setdefault("validate_strings", self.validate_strings) kw.setdefault("name", self.name) kw.setdefault("schema", self.schema) @@ -2605,7 +2349,7 @@ class JSON(Indexable, TypeEngine): @util.memoized_property def _str_impl(self): - return String(_expect_unicode=True) + return String() def bind_processor(self, dialect): string_process = self._str_impl.bind_processor(dialect) diff --git a/lib/sqlalchemy/testing/profiling.py b/lib/sqlalchemy/testing/profiling.py index dd50402059..10344c8d69 100644 --- a/lib/sqlalchemy/testing/profiling.py +++ b/lib/sqlalchemy/testing/profiling.py @@ -105,11 +105,7 @@ class ProfileStatsFile(object): dbapi_key, ] - platform_tokens.append( - "nativeunicode" - if config.db.dialect.convert_unicode - else "dbapiunicode" - ) + platform_tokens.append("dbapiunicode") _has_cext = has_compiled_ext() platform_tokens.append(_has_cext and "cextensions" or "nocextensions") return "_".join(platform_tokens) diff --git a/test/aaa_profiling/test_memusage.py b/test/aaa_profiling/test_memusage.py index 895cd9e0cd..518b215dd6 100644 --- a/test/aaa_profiling/test_memusage.py +++ b/test/aaa_profiling/test_memusage.py @@ -29,7 +29,6 @@ from sqlalchemy.orm import sessionmaker from sqlalchemy.orm import subqueryload from sqlalchemy.orm.session import _sessions from sqlalchemy.processors import to_decimal_processor_factory -from sqlalchemy.processors import to_unicode_processor_factory from sqlalchemy.sql import column from sqlalchemy.sql import util as sql_util from sqlalchemy.sql.visitors import cloned_traverse @@ -285,14 +284,6 @@ class MemUsageTest(EnsureZeroed): go() - @testing.requires.cextensions - def test_UnicodeResultProcessor_init(self): - @profile_memory() - def go(): - to_unicode_processor_factory("utf8") - - go() - @testing.requires.cextensions def test_cycles_in_row(self): diff --git a/test/dialect/mssql/test_reflection.py b/test/dialect/mssql/test_reflection.py index 01c5e845ea..1789166ee1 100644 --- a/test/dialect/mssql/test_reflection.py +++ b/test/dialect/mssql/test_reflection.py @@ -22,7 +22,6 @@ from sqlalchemy import types as sqltypes from sqlalchemy import util from sqlalchemy.dialects import mssql from sqlalchemy.dialects.mssql import base -from sqlalchemy.dialects.mssql.information_schema import CoerceUnicode from sqlalchemy.dialects.mssql.information_schema import tables from sqlalchemy.schema import CreateIndex from sqlalchemy.testing import AssertsCompiledSQL @@ -550,12 +549,6 @@ class ReflectionTest(fixtures.TestBase, ComparesTables, AssertsCompiledSQL): class InfoCoerceUnicodeTest(fixtures.TestBase, AssertsCompiledSQL): - def test_info_unicode_coercion(self): - - dialect = mssql.dialect() - value = CoerceUnicode().bind_processor(dialect)("a string") - assert isinstance(value, util.text_type) - def test_info_unicode_cast_no_2000(self): dialect = mssql.dialect() dialect.server_version_info = base.MS_2000_VERSION diff --git a/test/dialect/oracle/test_dialect.py b/test/dialect/oracle/test_dialect.py index f287a9a0b8..ccf771f816 100644 --- a/test/dialect/oracle/test_dialect.py +++ b/test/dialect/oracle/test_dialect.py @@ -259,22 +259,15 @@ class EncodingErrorsTest(fixtures.TestBase): def test_older_cx_oracle_warning(self, cx_Oracle, cx_oracle_type): cx_Oracle.version = "6.3" - ignore_dialect = cx_oracle.dialect( - dbapi=cx_Oracle, encoding_errors="ignore" - ) - ignore_outputhandler = ( - ignore_dialect._generate_connection_outputtype_handler() - ) - - cursor = mock.Mock() - with testing.expect_warnings( r"cx_oracle version \(6, 3\) does not support encodingErrors" ): - ignore_outputhandler( - cursor, "foo", cx_oracle_type, None, None, None + dialect = cx_oracle.dialect( + dbapi=cx_Oracle, encoding_errors="ignore" ) + eq_(dialect._cursor_var_unicode_kwargs, {}) + @_oracle_char_combinations def test_encoding_errors_cx_oracle( self, @@ -319,10 +312,18 @@ class EncodingErrorsTest(fixtures.TestBase): cursor = mock.Mock() plain_outputhandler(cursor, "foo", cx_oracle_type, None, None, None) - eq_( - cursor.mock_calls, - [mock.call.var(mock.ANY, None, cursor.arraysize)], - ) + if cx_oracle_type in (cx_Oracle.FIXED_CHAR, cx_Oracle.STRING): + # no calls; without encodingErrors, use cx_Oracle's default unicode + # handling + eq_( + cursor.mock_calls, + [], + ) + else: + eq_( + cursor.mock_calls, + [mock.call.var(mock.ANY, None, cursor.arraysize)], + ) class ComputedReturningTest(fixtures.TablesTest): diff --git a/test/dialect/oracle/test_types.py b/test/dialect/oracle/test_types.py index 2b54f2b56c..cbbb7be7c9 100644 --- a/test/dialect/oracle/test_types.py +++ b/test/dialect/oracle/test_types.py @@ -714,18 +714,6 @@ class TypesTest(fixtures.TestBase): eq_(sqla_result, cx_oracle_result) - def test_coerce_to_unicode(self, connection): - engine = testing_engine(options=dict(coerce_to_unicode=False)) - with engine.connect() as conn_no_coerce: - value = exec_sql( - conn_no_coerce, "SELECT 'hello' FROM DUAL" - ).scalar() - assert not isinstance(value, util.binary_type) - assert isinstance(value, util.text_type) - - value = exec_sql(connection, "SELECT 'hello' FROM DUAL").scalar() - assert isinstance(value, util.text_type) - def test_reflect_dates(self, metadata, connection): Table( "date_types", diff --git a/test/dialect/postgresql/test_dialect.py b/test/dialect/postgresql/test_dialect.py index fe3700bbbc..c12f4a50ad 100644 --- a/test/dialect/postgresql/test_dialect.py +++ b/test/dialect/postgresql/test_dialect.py @@ -161,14 +161,6 @@ $$ LANGUAGE plpgsql;""" future_connection.dialect.server_version_info, ) - @testing.requires.psycopg2_compatibility - def test_pg_dialect_no_native_unicode_in(self, testing_engine): - with testing.expect_raises_message( - exc.ArgumentError, - "psycopg2 native_unicode mode is required under Python 3", - ): - testing_engine(options=dict(use_native_unicode=False)) - def test_psycopg2_empty_connection_string(self): dialect = psycopg2_dialect.dialect() u = url.make_url("postgresql+psycopg2://") diff --git a/test/dialect/postgresql/test_types.py b/test/dialect/postgresql/test_types.py index ebb4d4b125..96601d6ad2 100644 --- a/test/dialect/postgresql/test_types.py +++ b/test/dialect/postgresql/test_types.py @@ -3826,10 +3826,7 @@ class JSONRoundTripTest(fixtures.TablesTest): result = connection.execute( select(data_table.c.data["k1"].astext) ).first() - if connection.dialect.returns_unicode_strings: - assert isinstance(result[0], util.text_type) - else: - assert isinstance(result[0], util.string_types) + assert isinstance(result[0], util.text_type) def test_query_returned_as_int(self, connection): self._fixture_data(connection) diff --git a/test/sql/test_defaults.py b/test/sql/test_defaults.py index 31cc151559..92e59d9a83 100644 --- a/test/sql/test_defaults.py +++ b/test/sql/test_defaults.py @@ -13,7 +13,6 @@ from sqlalchemy import MetaData from sqlalchemy import Sequence from sqlalchemy import String from sqlalchemy import testing -from sqlalchemy import Unicode from sqlalchemy.schema import CreateTable from sqlalchemy.sql import literal_column from sqlalchemy.sql import select @@ -29,8 +28,6 @@ from sqlalchemy.testing.schema import Column from sqlalchemy.testing.schema import Table from sqlalchemy.types import TypeDecorator from sqlalchemy.types import TypeEngine -from sqlalchemy.util import b -from sqlalchemy.util import u class DDLTest(fixtures.TestBase, AssertsCompiledSQL): @@ -1428,29 +1425,6 @@ class ServerDefaultsOnPKTest(fixtures.TestBase): eq_(list(connection.execute(t.select())), [(5, "data")]) -class UnicodeDefaultsTest(fixtures.TestBase): - __backend__ = True - - def test_no_default(self): - Column(Unicode(32)) - - def test_unicode_default(self): - default = u("foo") - Column(Unicode(32), default=default) - - def test_nonunicode_default(self): - default = b("foo") - assert_raises_message( - sa.exc.SAWarning, - "Unicode column 'foobar' has non-unicode " - "default value b?'foo' specified.", - Column, - "foobar", - Unicode(32), - default=default, - ) - - class InsertFromSelectTest(fixtures.TablesTest): __backend__ = True diff --git a/test/sql/test_deprecations.py b/test/sql/test_deprecations.py index 93e280d4e1..2faae8b874 100644 --- a/test/sql/test_deprecations.py +++ b/test/sql/test_deprecations.py @@ -9,7 +9,6 @@ from sqlalchemy import bindparam from sqlalchemy import case from sqlalchemy import CHAR from sqlalchemy import column -from sqlalchemy import create_engine from sqlalchemy import exc from sqlalchemy import exists from sqlalchemy import ForeignKey @@ -29,7 +28,6 @@ from sqlalchemy import String from sqlalchemy import table from sqlalchemy import testing from sqlalchemy import text -from sqlalchemy import util from sqlalchemy.engine import default from sqlalchemy.sql import coercions from sqlalchemy.sql import LABEL_STYLE_TABLENAME_PLUS_COL @@ -159,29 +157,6 @@ class DeprecationWarningsTest(fixtures.TestBase, AssertsCompiledSQL): ): preparer.quote_schema("hi", True) - def test_string_convert_unicode(self): - with testing.expect_deprecated( - "The String.convert_unicode parameter is deprecated and " - "will be removed in a future release." - ): - String(convert_unicode=True) - - def test_string_convert_unicode_force(self): - with testing.expect_deprecated( - "The String.convert_unicode parameter is deprecated and " - "will be removed in a future release." - ): - String(convert_unicode="force") - - def test_engine_convert_unicode(self): - with testing.expect_deprecated( - "The create_engine.convert_unicode parameter and " - "corresponding dialect-level" - ): - create_engine( - "mysql+mysqldb://", convert_unicode=True, module=mock.Mock() - ) - def test_empty_and_or(self): with testing.expect_deprecated( r"Invoking and_\(\) without arguments is deprecated, and " @@ -213,57 +188,6 @@ class DeprecationWarningsTest(fixtures.TestBase, AssertsCompiledSQL): ) -class ConvertUnicodeDeprecationTest(fixtures.TestBase): - - __backend__ = True - - data = util.u( - "Alors vous imaginez ma surprise, au lever du jour, quand " - "une drôle de petite voix m’a réveillé. " - "Elle disait: « S’il vous plaît… dessine-moi un mouton! »" - ) - - def test_unicode_warnings_dialectlevel(self): - - unicodedata = self.data - - with testing.expect_deprecated( - "The create_engine.convert_unicode parameter and " - "corresponding dialect-level" - ): - dialect = default.DefaultDialect(convert_unicode=True) - dialect.supports_unicode_binds = False - - s = String() - uni = s.dialect_impl(dialect).bind_processor(dialect) - - uni(util.b("x")) - assert isinstance(uni(unicodedata), util.binary_type) - - eq_(uni(unicodedata), unicodedata.encode("utf-8")) - - def test_ignoring_unicode_error(self): - """checks String(unicode_error='ignore') is passed to - underlying codec.""" - - unicodedata = self.data - - with testing.expect_deprecated( - "The String.convert_unicode parameter is deprecated and " - "will be removed in a future release.", - "The String.unicode_errors parameter is deprecated and " - "will be removed in a future release.", - ): - type_ = String( - 248, convert_unicode="force", unicode_error="ignore" - ) - dialect = default.DefaultDialect(encoding="ascii") - proc = type_.result_processor(dialect, 10) - - utfdata = unicodedata.encode("utf8") - eq_(proc(utfdata), unicodedata.encode("ascii", "ignore").decode()) - - class SubqueryCoercionsTest(fixtures.TestBase, AssertsCompiledSQL): __dialect__ = "default" diff --git a/test/sql/test_types.py b/test/sql/test_types.py index 5acc5f0767..a15d163e04 100644 --- a/test/sql/test_types.py +++ b/test/sql/test_types.py @@ -81,7 +81,6 @@ from sqlalchemy.testing import engines from sqlalchemy.testing import eq_ from sqlalchemy.testing import expect_deprecated_20 from sqlalchemy.testing import expect_raises -from sqlalchemy.testing import expect_warnings from sqlalchemy.testing import fixtures from sqlalchemy.testing import is_ from sqlalchemy.testing import is_not @@ -92,7 +91,6 @@ from sqlalchemy.testing.schema import pep435_enum from sqlalchemy.testing.schema import Table from sqlalchemy.testing.util import picklers from sqlalchemy.testing.util import round_decimal -from sqlalchemy.util import u def _all_dialect_modules(): @@ -338,12 +336,6 @@ class AdaptTest(fixtures.TestBase): t2 = t1.adapt(Text) eq_(t2.length, 50) - def test_convert_unicode_text_type(self): - with testing.expect_deprecated( - "The String.convert_unicode parameter is deprecated" - ): - eq_(types.String(convert_unicode=True).python_type, util.text_type) - class TypeAffinityTest(fixtures.TestBase): @testing.combinations( @@ -1026,50 +1018,6 @@ class UserDefinedTest( eq_(a.dialect_specific_args["bar"], "bar") -class StringConvertUnicodeTest(fixtures.TestBase): - @testing.combinations((Unicode,), (String,), argnames="datatype") - @testing.combinations((True,), (False,), argnames="convert_unicode") - @testing.combinations( - (String.RETURNS_CONDITIONAL,), - (String.RETURNS_BYTES,), - (String.RETURNS_UNICODE), - argnames="returns_unicode_strings", - ) - def test_convert_unicode( - self, datatype, convert_unicode, returns_unicode_strings - ): - s1 = datatype() - dialect = mock.Mock( - returns_unicode_strings=returns_unicode_strings, - encoding="utf-8", - convert_unicode=convert_unicode, - ) - - proc = s1.result_processor(dialect, None) - - string = u("méil") - bytestring = string.encode("utf-8") - - if ( - datatype is Unicode or convert_unicode - ) and returns_unicode_strings in ( - String.RETURNS_CONDITIONAL, - String.RETURNS_BYTES, - ): - eq_(proc(bytestring), string) - - if returns_unicode_strings is String.RETURNS_CONDITIONAL: - eq_(proc(string), string) - else: - if util.py3k: - # trying to decode a unicode - assert_raises(TypeError, proc, string) - else: - assert_raises(UnicodeEncodeError, proc, string) - else: - is_(proc, None) - - class TypeCoerceCastTest(fixtures.TablesTest): __backend__ = True @@ -1668,59 +1616,6 @@ class VariantTest(fixtures.TestBase, AssertsCompiledSQL): ) -class UnicodeTest(fixtures.TestBase): - - """Exercise the Unicode and related types. - - Note: unicode round trip tests are now in - sqlalchemy/testing/suite/test_types.py. - - """ - - __backend__ = True - - data = util.u( - "Alors vous imaginez ma surprise, au lever du jour, quand " - "une drôle de petite voix m’a réveillé. " - "Elle disait: « S’il vous plaît… dessine-moi un mouton! »" - ) - - def test_unicode_warnings_typelevel_native_unicode(self): - - unicodedata = self.data - u = Unicode() - dialect = default.DefaultDialect() - dialect.supports_unicode_binds = True - uni = u.dialect_impl(dialect).bind_processor(dialect) - if util.py3k: - assert_raises(exc.SAWarning, uni, b"x") - assert isinstance(uni(unicodedata), str) - else: - assert_raises(exc.SAWarning, uni, "x") - assert isinstance(uni(unicodedata), unicode) # noqa - - def test_unicode_warnings_typelevel_sqla_unicode(self): - unicodedata = self.data - u = Unicode() - dialect = default.DefaultDialect() - dialect.supports_unicode_binds = False - uni = u.dialect_impl(dialect).bind_processor(dialect) - assert_raises(exc.SAWarning, uni, util.b("x")) - assert isinstance(uni(unicodedata), util.binary_type) - - eq_(uni(unicodedata), unicodedata.encode("utf-8")) - - def test_unicode_warnings_totally_wrong_type(self): - u = Unicode() - dialect = default.DefaultDialect() - dialect.supports_unicode_binds = False - uni = u.dialect_impl(dialect).bind_processor(dialect) - with expect_warnings( - "Unicode type received non-unicode bind param value 5." - ): - eq_(uni(5), 5) - - class EnumTest(AssertsCompiledSQL, fixtures.TablesTest): __backend__ = True @@ -2479,13 +2374,11 @@ class EnumTest(AssertsCompiledSQL, fixtures.TablesTest): # depending on backend. assert "('x'," in e.print_sql() - @testing.uses_deprecated(".*convert_unicode") def test_repr(self): e = Enum( "x", "y", name="somename", - convert_unicode=True, quote=True, inherit_schema=True, native_enum=False, -- 2.47.2