From: Mike Bayer Date: Thu, 6 Jul 2023 14:06:14 +0000 (-0400) Subject: match on single host/port only for integer port X-Git-Tag: rel_2_0_19~18 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=a2c06a2a0acf769060f11bb34c1b55cecae5f5fe;p=thirdparty%2Fsqlalchemy%2Fsqlalchemy.git match on single host/port only for integer port Fixed regression caused by improvements to PostgreSQL URL parsing in :ticket:`10004` where "host" query string arguments that had colons in them, to support various third party proxy servers and/or dialects, would not parse correctly as these were evaluted as ``host:port`` combinations. Parsing has been updated to consider a colon as indicating a ``host:port`` value only if the hostname contains only alphanumeric characters with dots or dashes only (e.g. no slashes), followed by exactly one colon followed by an all-integer token of zero or more integers. In all other cases, the full string is taken as a host. Fixes: #10069 Change-Id: I77beb27e44abc0a66aa0810de855daa4186dacfd --- diff --git a/doc/build/changelog/unreleased_20/10069.rst b/doc/build/changelog/unreleased_20/10069.rst new file mode 100644 index 0000000000..f04854ebc6 --- /dev/null +++ b/doc/build/changelog/unreleased_20/10069.rst @@ -0,0 +1,13 @@ +.. change:: + :tags: bug, postgresql + :tickets: 10069 + + Fixed regression caused by improvements to PostgreSQL URL parsing in + :ticket:`10004` where "host" query string arguments that had colons in + them, to support various third party proxy servers and/or dialects, would + not parse correctly as these were evaluted as ``host:port`` combinations. + Parsing has been updated to consider a colon as indicating a ``host:port`` + value only if the hostname contains only alphanumeric characters with dots + or dashes only (e.g. no slashes), followed by exactly one colon followed by + an all-integer token of zero or more integers. In all other cases, the + full string is taken as a host. diff --git a/lib/sqlalchemy/dialects/postgresql/base.py b/lib/sqlalchemy/dialects/postgresql/base.py index 2d5f5c5ac9..5e0dee0ea3 100644 --- a/lib/sqlalchemy/dialects/postgresql/base.py +++ b/lib/sqlalchemy/dialects/postgresql/base.py @@ -3120,10 +3120,18 @@ class PGDialect(default.DefaultDialect): and len(hosts) == 1 and ":" in hosts[0] ): - integrated_multihost = True - h, p = hosts[0].split(":") - hosts = (h,) - ports = (p,) if p else (None,) + # internet host is alphanumeric plus dots or hyphens. + # this is essentially rfc1123, which refers to rfc952. + # https://stackoverflow.com/questions/3523028/ + # valid-characters-of-a-hostname + host_port_match = re.match( + r"^([a-zA-Z0-9\-\.]*)(?:\:(\d*))?$", hosts[0] + ) + if host_port_match: + integrated_multihost = True + h, p = host_port_match.group(1, 2) + hosts = (h,) + ports = (p,) if p else (None,) if "port" in url.query: if integrated_multihost: diff --git a/test/dialect/postgresql/test_dialect.py b/test/dialect/postgresql/test_dialect.py index 771ffea625..31335a84c0 100644 --- a/test/dialect/postgresql/test_dialect.py +++ b/test/dialect/postgresql/test_dialect.py @@ -268,6 +268,57 @@ class MultiHostConnectTest(fixtures.TestBase): "host": "hostA", }, ), + ( + # issue #10069 -if there is just one host as x:y with no + # integers, treat it as a hostname, to accommodate as many + # third party scenarios as possible + "postgresql+psycopg2://USER:PASS@/DB?host=hostA:xyz", + { + "dbname": "DB", + "user": "USER", + "password": "PASS", + "host": "hostA:xyz", + }, + ), + ( + # also issue #10069 - this parsing is not "defined" right now + # but err on the side of single host + "postgresql+psycopg2://USER:PASS@/DB?host=hostA:123.456", + { + "dbname": "DB", + "user": "USER", + "password": "PASS", + "host": "hostA:123.456", + }, + ), + ( + "postgresql+psycopg2://USER:PASS@/DB?host=192.168.1.50", + { + "dbname": "DB", + "user": "USER", + "password": "PASS", + "host": "192.168.1.50", + }, + ), + ( + "postgresql+psycopg2://USER:PASS@/DB?host=192.168.1.50:", + { + "dbname": "DB", + "user": "USER", + "password": "PASS", + "host": "192.168.1.50", + }, + ), + ( + "postgresql+psycopg2://USER:PASS@/DB?host=192.168.1.50:5678", + { + "dbname": "DB", + "user": "USER", + "password": "PASS", + "host": "192.168.1.50", + "port": "5678", + }, + ), ( "postgresql+psycopg2://USER:PASS@/DB?host=hostA:", { @@ -277,6 +328,54 @@ class MultiHostConnectTest(fixtures.TestBase): "host": "hostA", }, ), + ( + "postgresql+psycopg2://USER:PASS@/DB?host=HOSTNAME", + { + "dbname": "DB", + "user": "USER", + "password": "PASS", + "host": "HOSTNAME", + }, + ), + ( + "postgresql+psycopg2://USER:PASS@/DB?host=HOSTNAME:1234", + { + "dbname": "DB", + "user": "USER", + "password": "PASS", + "host": "HOSTNAME", + "port": "1234", + }, + ), + ( + # issue #10069 + "postgresql+psycopg2://USER:PASS@/DB?" + "host=/cloudsql/my-gcp-project:us-central1:mydbisnstance", + { + "dbname": "DB", + "user": "USER", + "password": "PASS", + "host": "/cloudsql/my-gcp-project:" + "us-central1:mydbisnstance", + }, + ), + ( + # issue #10069 + "postgresql+psycopg2://USER:PASS@/DB?" + "host=/cloudsql/my-gcp-project:4567", + { + "dbname": "DB", + "user": "USER", + "password": "PASS", + # full host,because the "hostname" contains slashes. + # this corresponds to PG's "host" mechanics + # at https://www.postgresql.org/docs/current + # /libpq-connect.html#LIBPQ-PARAMKEYWORDS + # "If a host name looks like an absolute path name, it + # specifies Unix-domain communication " + "host": "/cloudsql/my-gcp-project:4567", + }, + ), ( "postgresql+psycopg2://USER:PASS@/DB?host=hostA:1234", { @@ -425,8 +524,9 @@ class MultiHostConnectTest(fixtures.TestBase): "postgresql+psycopg2://USER:PASS@/DB" "?host=hostA:xyz&host=hostB:123", ), - ("postgresql+psycopg2://USER:PASS@/DB?host=hostA:xyz",), ("postgresql+psycopg2://USER:PASS@/DB?host=hostA&port=xyz",), + # for single host with :xyz, as of #10069 this is treated as a + # hostname by itself, w/ colon plus digits argnames="url_string", ) @testing.combinations(