From c88bb2167b1c4b39c7f9378b621bb8d429269d90 Mon Sep 17 00:00:00 2001 From: Mike Bayer Date: Sat, 17 Sep 2022 10:33:55 -0400 Subject: [PATCH] change verbiage stating exact compliance with RFC-1738 As long as we aren't using urlparse() to parse URLs, we are not RFC-1738 compliant. As we accept underscores in the scheme and not dashes or dots, we are not RFC-1738 compliant, so emulate language like that of PostgreSQL [1] that we "generally follow" this scheme but include some exceptions. [1] https://www.postgresql.org/docs/current/libpq-connect.html#id-1.7.3.8.3.6 Fixes: #8519 Change-Id: I2d7e55d9df17aed122cebb2c4c315f56c06a3da5 --- doc/build/changelog/changelog_02.rst | 6 ++++- doc/build/changelog/changelog_09.rst | 10 ++++----- doc/build/core/engines.rst | 14 +++++++----- lib/sqlalchemy/engine/url.py | 33 ++++++++++++++++------------ 4 files changed, 37 insertions(+), 26 deletions(-) diff --git a/doc/build/changelog/changelog_02.rst b/doc/build/changelog/changelog_02.rst index 69805d6098..3d40a79a32 100644 --- a/doc/build/changelog/changelog_02.rst +++ b/doc/build/changelog/changelog_02.rst @@ -1057,7 +1057,11 @@ :tickets: create_engine now takes only RFC-1738-style strings: - driver://user:password@host:port/database + ``driver://user:password@host:port/database`` + + **update** this format is generally but not exactly RFC-1738, + including that underscores, not dashes or periods, are accepted in the + "scheme" portion. .. change:: :tags: diff --git a/doc/build/changelog/changelog_09.rst b/doc/build/changelog/changelog_09.rst index acf1ede923..c9ec5f3a49 100644 --- a/doc/build/changelog/changelog_09.rst +++ b/doc/build/changelog/changelog_09.rst @@ -2647,11 +2647,11 @@ :tags: bug, engine :tickets: 2873 - The :func:`_sa.create_engine` routine and the related - :func:`.make_url` function no longer considers the ``+`` sign - to be a space within the password field. The parsing has been - adjusted to match RFC 1738 exactly, in that both ``username`` - and ``password`` expect only ``:``, ``@``, and ``/`` to be + The :func:`_sa.create_engine` routine and the related :func:`.make_url` + function no longer considers the ``+`` sign to be a space within the + password field. The parsing in this area has been adjusted to match + more closely to how RFC 1738 handles these tokens, in that both + ``username`` and ``password`` expect only ``:``, ``@``, and ``/`` to be encoded. .. seealso:: diff --git a/doc/build/core/engines.rst b/doc/build/core/engines.rst index 91f6b1cabf..60895ba966 100644 --- a/doc/build/core/engines.rst +++ b/doc/build/core/engines.rst @@ -55,12 +55,14 @@ See the section :ref:`dialect_toplevel` for information on the various backends Database URLs ============= -The :func:`_sa.create_engine` function produces an :class:`_engine.Engine` object based -on a URL. These URLs follow `RFC-1738 -`_, and usually can include username, password, -hostname, database name as well as optional keyword arguments for additional configuration. -In some cases a file path is accepted, and in others a "data source name" replaces -the "host" and "database" portions. The typical form of a database URL is: +The :func:`_sa.create_engine` function produces an :class:`_engine.Engine` +object based on a URL. The format of the URL generally follows `RFC-1738 +`_, with some exceptions, including that +underscores, not dashes or periods, are accepted within the "scheme" portion. +URLs typically include username, password, hostname, database name fields, as +well as optional keyword arguments for additional configuration. In some cases +a file path is accepted, and in others a "data source name" replaces the "host" +and "database" portions. The typical form of a database URL is: .. sourcecode:: none diff --git a/lib/sqlalchemy/engine/url.py b/lib/sqlalchemy/engine/url.py index 6dea3677e9..8d80cfd1c8 100644 --- a/lib/sqlalchemy/engine/url.py +++ b/lib/sqlalchemy/engine/url.py @@ -47,9 +47,10 @@ class URL(NamedTuple): Represent the components of a URL used to connect to a database. This object is suitable to be passed directly to a - :func:`_sa.create_engine` call. The fields of the URL are parsed - from a string by the :func:`.make_url` function. The string - format of the URL is an RFC-1738-style string. + :func:`_sa.create_engine` call. The fields of the URL are parsed from a + string by the :func:`.make_url` function. The string format of the URL + generally follows `RFC-1738 `_, with + some exceptions. To create a new :class:`_engine.URL` object, use the :func:`.make_url` function. To construct a :class:`_engine.URL` @@ -614,12 +615,12 @@ class URL(NamedTuple): """ s = self.drivername + "://" if self.username is not None: - s += _rfc_1738_quote(self.username) + s += _sqla_url_quote(self.username) if self.password is not None: s += ":" + ( "***" if hide_password - else _rfc_1738_quote(str(self.password)) + else _sqla_url_quote(str(self.password)) ) s += "@" if self.host is not None: @@ -817,8 +818,12 @@ class URL(NamedTuple): def make_url(name_or_url: Union[str, URL]) -> URL: """Given a string, produce a new URL instance. - The given string is parsed according to the RFC 1738 spec. If an - existing URL object is passed, just returns the object. + The format of the URL generally follows `RFC-1738 + `_, with some exceptions, including + that underscores, and not dashes or periods, are accepted within the + "scheme" portion. + + If a :class:`.URL` object is passed, it is returned as is. .. seealso:: @@ -827,12 +832,12 @@ def make_url(name_or_url: Union[str, URL]) -> URL: """ if isinstance(name_or_url, str): - return _parse_rfc1738_args(name_or_url) + return _parse_url(name_or_url) else: return name_or_url -def _parse_rfc1738_args(name: str) -> URL: +def _parse_url(name: str) -> URL: pattern = re.compile( r""" (?P[\w\+]+):// @@ -871,10 +876,10 @@ def _parse_rfc1738_args(name: str) -> URL: components["query"] = query if components["username"] is not None: - components["username"] = _rfc_1738_unquote(components["username"]) + components["username"] = _sqla_url_unquote(components["username"]) if components["password"] is not None: - components["password"] = _rfc_1738_unquote(components["password"]) + components["password"] = _sqla_url_unquote(components["password"]) ipv4host = components.pop("ipv4host") ipv6host = components.pop("ipv6host") @@ -888,12 +893,12 @@ def _parse_rfc1738_args(name: str) -> URL: else: raise exc.ArgumentError( - "Could not parse rfc1738 URL from string '%s'" % name + "Could not parse SQLAlchemy URL from string '%s'" % name ) -def _rfc_1738_quote(text: str) -> str: +def _sqla_url_quote(text: str) -> str: return re.sub(r"[:@/]", lambda m: "%%%X" % ord(m.group(0)), text) -_rfc_1738_unquote = unquote +_sqla_url_unquote = unquote -- 2.47.2