From: Mike Bayer Date: Mon, 1 Aug 2022 14:29:13 +0000 (-0400) Subject: repair psycopg2 (and psycopg) multiple hosts format X-Git-Tag: rel_1_4_40~12^2 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=011e5f87138a29c2b4555bf494cee16c804e1e45;p=thirdparty%2Fsqlalchemy%2Fsqlalchemy.git repair psycopg2 (and psycopg) multiple hosts format Fixed issue in psycopg2 dialect where the "multiple hosts" feature implemented for :ticket:`4392`, where multiple ``host:port`` pairs could be passed in the query string as ``?host=host1:port1&host=host2:port2&host=host3:port3`` was not implemented correctly, as it did not propagate the "port" parameter appropriately. Connections that didn't use a different "port" likely worked without issue, and connections that had "port" for some of the entries may have incorrectly passed on that hostname. The format is now corrected to pass hosts/ports appropriately. As part of this change, maintained support for another multihost style that worked unintentionally, which is comma-separated ``?host=h1,h2,h3&port=p1,p2,p3``. This format is more consistent with libpq's query-string format, whereas the previous format is inspired by a different aspect of libpq's URI format but is not quite the same thing. If the two styles are mixed together, an error is raised as this is ambiguous. Fixes: #4392 Change-Id: Ic9cc0b0e6e90725e158d9efe73e042853dd1263f (cherry picked from commit 93e6f4f05ba885b16accf0ad811160dd7d0eec70) --- diff --git a/doc/build/changelog/unreleased_14/4392.rst b/doc/build/changelog/unreleased_14/4392.rst new file mode 100644 index 0000000000..9b83b09cc5 --- /dev/null +++ b/doc/build/changelog/unreleased_14/4392.rst @@ -0,0 +1,22 @@ +.. change:: + :tags: bug, postgresql + :tickets: 4392 + + Fixed issue in psycopg2 dialect where the "multiple hosts" feature + implemented for :ticket:`4392`, where multiple ``host:port`` pairs could be + passed in the query string as + ``?host=host1:port1&host=host2:port2&host=host3:port3`` was not implemented + correctly, as it did not propagate the "port" parameter appropriately. + Connections that didn't use a different "port" likely worked without issue, + and connections that had "port" for some of the entries may have + incorrectly passed on that hostname. The format is now corrected to pass + hosts/ports appropriately. + + As part of this change, maintained support for another multihost style that + worked unintentionally, which is comma-separated + ``?host=h1,h2,h3&port=p1,p2,p3``. This format is more consistent with + libpq's query-string format, whereas the previous format is inspired by a + different aspect of libpq's URI format but is not quite the same thing. + + If the two styles are mixed together, an error is raised as this is + ambiguous. \ No newline at end of file diff --git a/lib/sqlalchemy/dialects/postgresql/psycopg2.py b/lib/sqlalchemy/dialects/postgresql/psycopg2.py index bacd60bbef..67474271e8 100644 --- a/lib/sqlalchemy/dialects/postgresql/psycopg2.py +++ b/lib/sqlalchemy/dialects/postgresql/psycopg2.py @@ -120,22 +120,51 @@ Specifying multiple fallback hosts psycopg2 supports multiple connection points in the connection string. When the ``host`` parameter is used multiple times in the query section of the URL, SQLAlchemy will create a single string of the host and port -information provided to make the connections:: +information provided to make the connections. Tokens may consist of +``host::port`` or just ``host``; in the latter case, the default port +is selected by libpq. In the example below, three host connections +are specified, for ``HostA::PortA``, ``HostB`` connecting to the default port, +and ``HostC::PortC``:: create_engine( - "postgresql+psycopg2://user:password@/dbname?host=HostA:port1&host=HostB&host=HostC" + "postgresql+psycopg2://user:password@/dbname?host=HostA:PortA&host=HostB&host=HostC:PortC" ) -A connection to each host is then attempted until either a connection is successful -or all connections are unsuccessful in which case an error is raised. +As an alternative, libpq query string format also may be used; this specifies +``host`` and ``port`` as single query string arguments with comma-separated +lists - the default port can be chosen by indicating an empty value +in the comma separated list:: + + create_engine( + "postgresql+psycopg2://user:password@/dbname?host=HostA,HostB,HostC&port=PortA,,PortC" + ) + +With either URL style, connections to each host is attempted based on a +configurable strategy, which may be configured using the libpq +``target_session_attrs`` parameter. Per libpq this defaults to ``any`` +which indicates a connection to each host is then attempted until a connection is successful. +Other strategies include ``primary``, ``prefer-standby``, etc. The complete +list is documented by PostgreSQL at +`libpq connection strings `_. + +For example, to indicate two hosts using the ``primary`` strategy:: + + create_engine( + "postgresql+psycopg2://user:password@/dbname?host=HostA:PortA&host=HostB&host=HostC:PortC&target_session_attrs=primary" + ) + +.. versionchanged:: 1.4.40 Port specification in psycopg2 multiple host format + is repaired, previously ports were not correctly interpreted in this context. + libpq comma-separated format is also now supported. .. versionadded:: 1.3.20 Support for multiple hosts in PostgreSQL connection string. .. seealso:: - `PQConnString \ - `_ + `libpq connection strings `_ - please refer + to this section in the libpq documentation for complete background on multiple host support. + Empty DSN Connections / Environment Variable Connections --------------------------------------------------------- @@ -988,20 +1017,27 @@ class PGDialect_psycopg2(PGDialect): if "host" in url.query: is_multihost = isinstance(url.query["host"], (list, tuple)) - if opts: + if opts or url.query: + if not opts: + opts = {} if "port" in opts: opts["port"] = int(opts["port"]) opts.update(url.query) if is_multihost: - opts["host"] = ",".join(url.query["host"]) - # send individual dbname, user, password, host, port - # parameters to psycopg2.connect() - return ([], opts) - elif url.query: - # any other connection arguments, pass directly - opts.update(url.query) - if is_multihost: - opts["host"] = ",".join(url.query["host"]) + hosts, ports = zip( + *[ + token.split(":") if ":" in token else (token, "") + for token in url.query["host"] + ] + ) + opts["host"] = ",".join(hosts) + if "port" in opts: + raise exc.ArgumentError( + "Can't mix 'multihost' formats together; use " + '"host=h1,h2,h3&port=p1,p2,p3" or ' + '"host=h1:p1&host=h2:p2&host=h3:p3" separately' + ) + opts["port"] = ",".join(ports) return ([], opts) else: # no connection arguments whatsoever; psycopg2.connect() diff --git a/test/dialect/postgresql/test_dialect.py b/test/dialect/postgresql/test_dialect.py index 8aa9036495..fa470a18ce 100644 --- a/test/dialect/postgresql/test_dialect.py +++ b/test/dialect/postgresql/test_dialect.py @@ -8,6 +8,7 @@ from sqlalchemy import BigInteger from sqlalchemy import bindparam from sqlalchemy import cast from sqlalchemy import Column +from sqlalchemy import create_engine from sqlalchemy import DateTime from sqlalchemy import DDL from sqlalchemy import event @@ -41,6 +42,7 @@ from sqlalchemy.engine import url from sqlalchemy.sql.selectable import LABEL_STYLE_TABLENAME_PLUS_COL from sqlalchemy.testing import config from sqlalchemy.testing import engines +from sqlalchemy.testing import expect_raises_message from sqlalchemy.testing import fixtures from sqlalchemy.testing import is_ from sqlalchemy.testing import is_false @@ -237,24 +239,85 @@ $$ LANGUAGE plpgsql;""" eq_(cargs, []) eq_(cparams, {"host": "somehost", "any_random_thing": "yes"}) - def test_psycopg2_nonempty_connection_string_w_query_two(self): + @testing.combinations( + ( + "postgresql+psycopg2://USER:PASS@/DB?host=hostA", + { + "database": "DB", + "user": "USER", + "password": "PASS", + "host": "hostA", + }, + ), + ( + "postgresql+psycopg2://USER:PASS@/DB" + "?host=hostA&host=hostB&host=hostC", + { + "database": "DB", + "user": "USER", + "password": "PASS", + "host": "hostA,hostB,hostC", + "port": ",,", + }, + ), + ( + "postgresql+psycopg2://USER:PASS@/DB" + "?host=hostA&host=hostB:portB&host=hostC:portC", + { + "database": "DB", + "user": "USER", + "password": "PASS", + "host": "hostA,hostB,hostC", + "port": ",portB,portC", + }, + ), + ( + "postgresql+psycopg2://USER:PASS@/DB?" + "host=hostA:portA&host=hostB:portB&host=hostC:portC", + { + "database": "DB", + "user": "USER", + "password": "PASS", + "host": "hostA,hostB,hostC", + "port": "portA,portB,portC", + }, + ), + ( + "postgresql+psycopg2:///" + "?host=hostA:portA&host=hostB:portB&host=hostC:portC", + {"host": "hostA,hostB,hostC", "port": "portA,portB,portC"}, + ), + ( + "postgresql+psycopg2:///" + "?host=hostA:portA&host=hostB:portB&host=hostC:portC", + {"host": "hostA,hostB,hostC", "port": "portA,portB,portC"}, + ), + ( + "postgresql+psycopg2:///" + "?host=hostA,hostB,hostC&port=portA,portB,portC", + {"host": "hostA,hostB,hostC", "port": "portA,portB,portC"}, + ), + argnames="url_string,expected", + ) + def test_psycopg_multi_hosts(self, url_string, expected): dialect = psycopg2_dialect.dialect() - url_string = "postgresql://USER:PASS@/DB?host=hostA" u = url.make_url(url_string) cargs, cparams = dialect.create_connect_args(u) eq_(cargs, []) - eq_(cparams["host"], "hostA") + eq_(cparams, expected) - def test_psycopg2_nonempty_connection_string_w_query_three(self): + @testing.combinations( + "postgresql+psycopg2:///?host=H&host=H&port=5432,5432", + "postgresql+psycopg2://user:pass@/dbname?host=H&host=H&port=5432,5432", + argnames="url_string", + ) + def test_psycopg_no_mix_hosts(self, url_string): dialect = psycopg2_dialect.dialect() - url_string = ( - "postgresql://USER:PASS@/DB" - "?host=hostA:portA&host=hostB&host=hostC" - ) - u = url.make_url(url_string) - cargs, cparams = dialect.create_connect_args(u) - eq_(cargs, []) - eq_(cparams["host"], "hostA:portA,hostB,hostC") + with expect_raises_message( + exc.ArgumentError, "Can't mix 'multihost' formats together" + ): + u = url.make_url(url_string) + dialect.create_connect_args(u) def test_psycopg2_disconnect(self): class Error(Exception): @@ -293,6 +356,41 @@ $$ LANGUAGE plpgsql;""" eq_(dialect.is_disconnect("not an error", None, None), False) +class BackendDialectTest(fixtures.TestBase): + __backend__ = True + + @testing.only_on(["+psycopg", "+psycopg2"]) + @testing.combinations( + "host=H:P&host=H:P&host=H:P", + "host=H:P&host=H&host=H", + "host=H:P&host=H&host=H:P", + "host=H&host=H:P&host=H", + "host=H,H,H&port=P,P,P", + ) + def test_connect_psycopg_multiple_hosts(self, pattern): + """test the fix for #4392""" + + tdb_url = testing.db.url + + host = tdb_url.host + if host == "127.0.0.1": + host = "localhost" + port = str(tdb_url.port) if tdb_url.port else "5432" + + query_str = pattern.replace("H", host).replace("P", port) + url_string = "%s://%s:" "%s@/%s?%s" % ( + tdb_url.drivername, + tdb_url.username, + tdb_url.password, + tdb_url.database, + query_str, + ) + + e = create_engine(url_string) + with e.connect() as conn: + eq_(conn.exec_driver_sql("select 1").scalar(), 1) + + class PGCodeTest(fixtures.TestBase): __only_on__ = "postgresql"