From: Mike Bayer Date: Fri, 20 Mar 2015 18:57:28 +0000 (-0400) Subject: - merge recent commits from master which reorganize and X-Git-Tag: rel_0_9_10~56 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=3c8ac32d8bbc80c2f8c11d2f9fbfd3fd8339fda6;p=thirdparty%2Fsqlalchemy%2Fsqlalchemy.git - merge recent commits from master which reorganize and clarify MySQL unicode documentation, bringing it up-to-date with current DBAPI support --- diff --git a/doc/build/dialects/mysql.rst b/doc/build/dialects/mysql.rst index de71a99ac4..33a0d783b1 100644 --- a/doc/build/dialects/mysql.rst +++ b/doc/build/dialects/mysql.rst @@ -25,146 +25,141 @@ construction arguments, are as follows: .. autoclass:: BIGINT :members: __init__ - + .. autoclass:: BINARY :members: __init__ - + .. autoclass:: BIT :members: __init__ - + .. autoclass:: BLOB :members: __init__ - + .. autoclass:: BOOLEAN :members: __init__ - + .. autoclass:: CHAR :members: __init__ - + .. autoclass:: DATE :members: __init__ - + .. autoclass:: DATETIME :members: __init__ - + .. autoclass:: DECIMAL :members: __init__ - + .. autoclass:: DOUBLE :members: __init__ - + .. autoclass:: ENUM :members: __init__ - + .. autoclass:: FLOAT :members: __init__ - + .. autoclass:: INTEGER :members: __init__ - + .. autoclass:: LONGBLOB :members: __init__ - + .. autoclass:: LONGTEXT :members: __init__ - + .. autoclass:: MEDIUMBLOB :members: __init__ - + .. autoclass:: MEDIUMINT :members: __init__ - + .. autoclass:: MEDIUMTEXT :members: __init__ - + .. autoclass:: NCHAR :members: __init__ - + .. autoclass:: NUMERIC :members: __init__ - + .. autoclass:: NVARCHAR :members: __init__ - + .. autoclass:: REAL :members: __init__ - + .. autoclass:: SET :members: __init__ - + .. autoclass:: SMALLINT :members: __init__ - + .. autoclass:: TEXT :members: __init__ - + .. autoclass:: TIME :members: __init__ - + .. autoclass:: TIMESTAMP :members: __init__ - + .. autoclass:: TINYBLOB :members: __init__ - + .. autoclass:: TINYINT :members: __init__ - + .. autoclass:: TINYTEXT :members: __init__ - + .. autoclass:: VARBINARY :members: __init__ - + .. autoclass:: VARCHAR :members: __init__ - + .. autoclass:: YEAR :members: __init__ - + MySQL-Python -------------------- .. automodule:: sqlalchemy.dialects.mysql.mysqldb -OurSQL --------------- - -.. automodule:: sqlalchemy.dialects.mysql.oursql - pymysql ------------- @@ -180,6 +175,11 @@ cymysql .. automodule:: sqlalchemy.dialects.mysql.cymysql +OurSQL +-------------- + +.. automodule:: sqlalchemy.dialects.mysql.oursql + Google App Engine ----------------------- diff --git a/lib/sqlalchemy/dialects/mysql/base.py b/lib/sqlalchemy/dialects/mysql/base.py index a52b75f1ab..43d7889eba 100644 --- a/lib/sqlalchemy/dialects/mysql/base.py +++ b/lib/sqlalchemy/dialects/mysql/base.py @@ -146,6 +146,90 @@ multi-column key for some storage engines:: Column('id', Integer, primary_key=True) ) +.. _mysql_unicode: + +Unicode +------- + +Charset Selection +~~~~~~~~~~~~~~~~~ + +Most MySQL DBAPIs offer the option to set the client character set for +a connection. This is typically delivered using the ``charset`` parameter +in the URL, such as:: + + e = create_engine("mysql+pymysql://scott:tiger@localhost/\ +test?charset=utf8") + +This charset is the **client character set** for the connection. Some +MySQL DBAPIs will default this to a value such as ``latin1``, and some +will make use of the ``default-character-set`` setting in the ``my.cnf`` +file as well. Documentation for the DBAPI in use should be consulted +for specific behavior. + +The encoding used for Unicode has traditionally been ``'utf8'``. However, +for MySQL versions 5.5.3 on forward, a new MySQL-specific encoding +``'utf8mb4'`` has been introduced. The rationale for this new encoding +is due to the fact that MySQL's utf-8 encoding only supports +codepoints up to three bytes instead of four. Therefore, +when communicating with a MySQL database +that includes codepoints more than three bytes in size, +this new charset is preferred, if supported by both the database as well +as the client DBAPI, as in:: + + e = create_engine("mysql+pymysql://scott:tiger@localhost/\ +test?charset=utf8mb4") + +At the moment, up-to-date versions of MySQLdb and PyMySQL support the +``utf8mb4`` charset. Other DBAPIs such as MySQL-Connector and OurSQL +may **not** support it as of yet. + +In order to use ``utf8mb4`` encoding, changes to +the MySQL schema and/or server configuration may be required. + +.. seealso:: + + `The utf8mb4 Character Set \ +`_ - \ +in the MySQL documentation + +Unicode Encoding / Decoding +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +All modern MySQL DBAPIs all offer the service of handling the encoding and +decoding of unicode data between the Python application space and the database. +As this was not always the case, SQLAlchemy also includes a comprehensive system +of performing the encode/decode task as well. As only one of these systems +should be in use at at time, SQLAlchemy has long included functionality +to automatically detect upon first connection whether or not the DBAPI is +automatically handling unicode. + +Whether or not the MySQL DBAPI will handle encoding can usually be configured +using a DBAPI flag ``use_unicode``, which is known to be supported at least +by MySQLdb, PyMySQL, and MySQL-Connector. Setting this value to ``0`` +in the "connect args" or query string will have the effect of disabling the +DBAPI's handling of unicode, such that it instead will return data of the +``str`` type or ``bytes`` type, with data in the configured charset:: + + # connect while disabling the DBAPI's unicode encoding/decoding + e = create_engine("mysql+mysqldb://scott:tiger@localhost/test?charset=utf8&use_unicode=0") + +Current recommendations for modern DBAPIs are as follows: + +* It is generally always safe to leave the ``use_unicode`` flag set at + its default; that is, don't use it at all. +* Under Python 3, the ``use_unicode=0`` flag should **never be used**. + SQLAlchemy under Python 3 generally assumes the DBAPI receives and returns + string values as Python 3 strings, which are inherently unicode objects. +* Under Python 2 with MySQLdb, the ``use_unicode=0`` flag will **offer + superior performance**, as MySQLdb's unicode converters under Python 2 only + have been observed to have unusually slow performance compared to SQLAlchemy's + fast C-based encoders/decoders. + +In short: don't specify ``use_unicode`` *at all*, with the possible +exception of ``use_unicode=0`` on MySQLdb with Python 2 **only** for a +potential performance gain. + Ansi Quoting Style ------------------ diff --git a/lib/sqlalchemy/dialects/mysql/gaerdbms.py b/lib/sqlalchemy/dialects/mysql/gaerdbms.py index bbb6a9868f..58b70737fb 100644 --- a/lib/sqlalchemy/dialects/mysql/gaerdbms.py +++ b/lib/sqlalchemy/dialects/mysql/gaerdbms.py @@ -22,7 +22,7 @@ developers-guide Cloud SQL now recommends creating connections via the mysql dialect using the URL format - `mysql+mysqldb://root@/?unix_socket=/cloudsql/:` + ``mysql+mysqldb://root@/?unix_socket=/cloudsql/:`` Pooling diff --git a/lib/sqlalchemy/dialects/mysql/mysqlconnector.py b/lib/sqlalchemy/dialects/mysql/mysqlconnector.py index c85a47f372..2c7e6de234 100644 --- a/lib/sqlalchemy/dialects/mysql/mysqlconnector.py +++ b/lib/sqlalchemy/dialects/mysql/mysqlconnector.py @@ -14,6 +14,12 @@ :url: http://dev.mysql.com/downloads/connector/python/ +Unicode +------- + +Please see :ref:`mysql_unicode` for current recommendations on unicode +handling. + """ from .base import (MySQLDialect, MySQLExecutionContext, diff --git a/lib/sqlalchemy/dialects/mysql/mysqldb.py b/lib/sqlalchemy/dialects/mysql/mysqldb.py index efbec2f293..fdfca32f5b 100644 --- a/lib/sqlalchemy/dialects/mysql/mysqldb.py +++ b/lib/sqlalchemy/dialects/mysql/mysqldb.py @@ -13,31 +13,22 @@ :connectstring: mysql+mysqldb://:@[:]/ :url: http://sourceforge.net/projects/mysql-python +.. _mysqldb_unicode: Unicode ------- -MySQLdb requires a "charset" parameter to be passed in order for it -to handle non-ASCII characters correctly. When this parameter is passed, -MySQLdb will also implicitly set the "use_unicode" flag to true, which means -that it will return Python unicode objects instead of bytestrings. -However, SQLAlchemy's decode process, when C extensions are enabled, -is orders of magnitude faster than that of MySQLdb as it does not call into -Python functions to do so. Therefore, the **recommended URL to use for -unicode** will include both charset and use_unicode=0:: +Please see :ref:`mysql_unicode` for current recommendations on unicode +handling. - create_engine("mysql+mysqldb://user:pass@host/dbname?charset=utf8&use_unicode=0") +Py3K Support +------------ -As of this writing, MySQLdb only runs on Python 2. It is not known how -MySQLdb behaves on Python 3 as far as unicode decoding. +Currently, MySQLdb only runs on Python 2 and development has been stopped. +`mysqlclient`_ is fork of MySQLdb and provides Python 3 support as well +as some bugfixes. - -Known Issues -------------- - -MySQL-python version 1.2.2 has a serious memory leak related -to unicode conversion, a feature which is disabled via ``use_unicode=0``. -It is strongly advised to use the latest version of MySQL-Python. +.. _mysqlclient: https://github.com/PyMySQL/mysqlclient-python Using MySQLdb with Google Cloud SQL ----------------------------------- diff --git a/lib/sqlalchemy/dialects/mysql/oursql.py b/lib/sqlalchemy/dialects/mysql/oursql.py index ab6585abd0..ae8abc321a 100644 --- a/lib/sqlalchemy/dialects/mysql/oursql.py +++ b/lib/sqlalchemy/dialects/mysql/oursql.py @@ -16,22 +16,10 @@ Unicode ------- -oursql defaults to using ``utf8`` as the connection charset, but other -encodings may be used instead. Like the MySQL-Python driver, unicode support -can be completely disabled:: +Please see :ref:`mysql_unicode` for current recommendations on unicode +handling. - # oursql sets the connection charset to utf8 automatically; all strings come - # back as utf8 str - create_engine('mysql+oursql:///mydb?use_unicode=0') -To not automatically use ``utf8`` and instead use whatever the connection -defaults to, there is a separate parameter:: - - # use the default connection charset; all strings come back as unicode - create_engine('mysql+oursql:///mydb?default_charset=1') - - # use latin1 as the connection charset; all strings come back as unicode - create_engine('mysql+oursql:///mydb?charset=latin1') """ import re diff --git a/lib/sqlalchemy/dialects/mysql/pymysql.py b/lib/sqlalchemy/dialects/mysql/pymysql.py index 96650f9355..87159b5613 100644 --- a/lib/sqlalchemy/dialects/mysql/pymysql.py +++ b/lib/sqlalchemy/dialects/mysql/pymysql.py @@ -12,7 +12,13 @@ :dbapi: pymysql :connectstring: mysql+pymysql://:@/\ [?] - :url: http://code.google.com/p/pymysql/ + :url: http://www.pymysql.org/ + +Unicode +------- + +Please see :ref:`mysql_unicode` for current recommendations on unicode +handling. MySQL-Python Compatibility -------------------------- diff --git a/lib/sqlalchemy/dialects/mysql/pyodbc.py b/lib/sqlalchemy/dialects/mysql/pyodbc.py index 08b3392995..b544f0584e 100644 --- a/lib/sqlalchemy/dialects/mysql/pyodbc.py +++ b/lib/sqlalchemy/dialects/mysql/pyodbc.py @@ -14,14 +14,11 @@ :connectstring: mysql+pyodbc://:@ :url: http://pypi.python.org/pypi/pyodbc/ - -Limitations ------------ - -The mysql-pyodbc dialect is subject to unresolved character encoding issues -which exist within the current ODBC drivers available. -(see http://code.google.com/p/pyodbc/issues/detail?id=25). Consider usage -of OurSQL, MySQLdb, or MySQL-connector/Python. + .. note:: The PyODBC for MySQL dialect is not well supported, and + is subject to unresolved character encoding issues + which exist within the current ODBC drivers available. + (see http://code.google.com/p/pyodbc/issues/detail?id=25). + Other dialects for MySQL are recommended. """ diff --git a/lib/sqlalchemy/dialects/mysql/zxjdbc.py b/lib/sqlalchemy/dialects/mysql/zxjdbc.py index 9db9691604..37b0b63096 100644 --- a/lib/sqlalchemy/dialects/mysql/zxjdbc.py +++ b/lib/sqlalchemy/dialects/mysql/zxjdbc.py @@ -14,6 +14,9 @@ :driverurl: http://dev.mysql.com/downloads/connector/j/ + .. note:: Jython is not supported by current versions of SQLAlchemy. The + zxjdbc dialect should be considered as experimental. + Character Sets --------------