From 296c84313ab29bf9599634f38caaf7dd092e4e23 Mon Sep 17 00:00:00 2001 From: Mike Bayer Date: Sat, 17 Oct 2020 11:39:56 -0400 Subject: [PATCH] Ensure escaping of percent signs in columns, parameters Improved support for column names that contain percent signs in the string, including repaired issues involving anoymous labels that also embedded a column name with a percent sign in it, as well as re-established support for bound parameter names with percent signs embedded on the psycopg2 dialect, using a late-escaping process similar to that used by the cx_Oracle dialect. * Added new constructor for _anonymous_label() that ensures incoming string tokens based on column or table names will have percent signs escaped; abstracts away the format of the label. * generalized cx_Oracle's quoted_bind_names facility into the compiler itself, and leveraged this for the psycopg2 dialect's issue with percent signs in names as well. the parameter substitution is now integrated with compiler.construct_parameters() as well as the recently reworked set_input_sizes(), reducing verbosity in the cx_Oracle dialect. Fixes: #5653 Change-Id: Ia2ad13ea68b4b0558d410026e5a33f5cb3fbab2c --- doc/build/changelog/unreleased_14/5653.rst | 11 +++ lib/sqlalchemy/dialects/oracle/base.py | 1 - lib/sqlalchemy/dialects/oracle/cx_oracle.py | 27 ++------ .../dialects/postgresql/psycopg2.py | 9 ++- lib/sqlalchemy/engine/default.py | 31 ++++++++- lib/sqlalchemy/sql/compiler.py | 38 ++++++++-- lib/sqlalchemy/sql/elements.py | 69 ++++++++++++------- lib/sqlalchemy/sql/selectable.py | 2 +- test/requirements.py | 21 +----- test/sql/test_compiler.py | 18 +++++ test/sql/test_selectable.py | 2 +- 11 files changed, 149 insertions(+), 80 deletions(-) create mode 100644 doc/build/changelog/unreleased_14/5653.rst diff --git a/doc/build/changelog/unreleased_14/5653.rst b/doc/build/changelog/unreleased_14/5653.rst new file mode 100644 index 0000000000..0722843b9d --- /dev/null +++ b/doc/build/changelog/unreleased_14/5653.rst @@ -0,0 +1,11 @@ +.. change:: + :tags: bug, sql, postgresql + :tickets: 5653 + + Improved support for column names that contain percent signs in the string, + including repaired issues involving anoymous labels that also embedded a + column name with a percent sign in it, as well as re-established support + for bound parameter names with percent signs embedded on the psycopg2 + dialect, using a late-escaping process similar to that used by the + cx_Oracle dialect. + diff --git a/lib/sqlalchemy/dialects/oracle/base.py b/lib/sqlalchemy/dialects/oracle/base.py index 655614769b..1f1f7501bd 100644 --- a/lib/sqlalchemy/dialects/oracle/base.py +++ b/lib/sqlalchemy/dialects/oracle/base.py @@ -849,7 +849,6 @@ class OracleCompiler(compiler.SQLCompiler): def __init__(self, *args, **kwargs): self.__wheres = {} - self._quoted_bind_names = {} super(OracleCompiler, self).__init__(*args, **kwargs) def visit_mod_binary(self, binary, operator, **kw): diff --git a/lib/sqlalchemy/dialects/oracle/cx_oracle.py b/lib/sqlalchemy/dialects/oracle/cx_oracle.py index 7bde19090d..8eb9f8b3cf 100644 --- a/lib/sqlalchemy/dialects/oracle/cx_oracle.py +++ b/lib/sqlalchemy/dialects/oracle/cx_oracle.py @@ -598,28 +598,20 @@ class OracleCompiler_cx_oracle(OracleCompiler): # need quoting :). names that include illegal characters # won't work however. quoted_name = '"%s"' % name - self._quoted_bind_names[name] = quoted_name - return OracleCompiler.bindparam_string(self, quoted_name, **kw) - else: - return OracleCompiler.bindparam_string(self, name, **kw) + kw["escaped_from"] = name + name = quoted_name + + return OracleCompiler.bindparam_string(self, name, **kw) class OracleExecutionContext_cx_oracle(OracleExecutionContext): out_parameters = None - def _setup_quoted_bind_names(self): - quoted_bind_names = self.compiled._quoted_bind_names - if quoted_bind_names: - for param in self.parameters: - for fromname, toname in quoted_bind_names.items(): - param[toname] = param[fromname] - del param[fromname] - def _generate_out_parameter_vars(self): # check for has_out_parameters or RETURNING, create cx_Oracle.var # objects if so if self.compiled.returning or self.compiled.has_out_parameters: - quoted_bind_names = self.compiled._quoted_bind_names + quoted_bind_names = self.compiled.escaped_bind_names for bindparam in self.compiled.binds.values(): if bindparam.isoutparam: name = self.compiled.bind_names[bindparam] @@ -684,9 +676,6 @@ class OracleExecutionContext_cx_oracle(OracleExecutionContext): self.out_parameters = {} - if self.compiled._quoted_bind_names: - self._setup_quoted_bind_names() - self._generate_out_parameter_vars() self._generate_cursor_outputtype_handler() @@ -1184,12 +1173,6 @@ class OracleDialect_cx_oracle(OracleDialect): for key, dbtype, sqltype in list_of_tuples if dbtype ) - if context and context.compiled: - quoted_bind_names = context.compiled._quoted_bind_names - collection = ( - (quoted_bind_names.get(key, key), dbtype) - for key, dbtype in collection - ) if not self.supports_unicode_binds: # oracle 8 only diff --git a/lib/sqlalchemy/dialects/postgresql/psycopg2.py b/lib/sqlalchemy/dialects/postgresql/psycopg2.py index 2446604ba5..72c36b4a86 100644 --- a/lib/sqlalchemy/dialects/postgresql/psycopg2.py +++ b/lib/sqlalchemy/dialects/postgresql/psycopg2.py @@ -644,7 +644,14 @@ class PGExecutionContext_psycopg2(PGExecutionContext): class PGCompiler_psycopg2(PGCompiler): - pass + def bindparam_string(self, name, **kw): + if "%" in name and not kw.get("post_compile", False): + # psycopg2 will not allow a percent sign in a + # pyformat parameter name even if it is doubled + kw["escaped_from"] = name + name = name.replace("%", "P") + + return PGCompiler.bindparam_string(self, name, **kw) class PGIdentifierPreparer_psycopg2(PGIdentifierPreparer): diff --git a/lib/sqlalchemy/engine/default.py b/lib/sqlalchemy/engine/default.py index d63cb4addd..7f92271e97 100644 --- a/lib/sqlalchemy/engine/default.py +++ b/lib/sqlalchemy/engine/default.py @@ -1507,6 +1507,10 @@ class DefaultExecutionContext(interfaces.ExecutionContext): inputsizes, self.cursor, self.statement, self.parameters, self ) + has_escaped_names = bool(self.compiled.escaped_bind_names) + if has_escaped_names: + escaped_bind_names = self.compiled.escaped_bind_names + if self.dialect.positional: items = [ (key, self.compiled.binds[key]) @@ -1529,7 +1533,11 @@ class DefaultExecutionContext(interfaces.ExecutionContext): dbtypes = inputsizes[bindparam] generic_inputsizes.extend( ( - paramname, + ( + escaped_bind_names.get(paramname, paramname) + if has_escaped_names + else paramname + ), dbtypes[idx % num], bindparam.type.types[idx % num], ) @@ -1540,12 +1548,29 @@ class DefaultExecutionContext(interfaces.ExecutionContext): else: dbtype = inputsizes.get(bindparam, None) generic_inputsizes.extend( - (paramname, dbtype, bindparam.type) + ( + ( + escaped_bind_names.get(paramname, paramname) + if has_escaped_names + else paramname + ), + dbtype, + bindparam.type, + ) for paramname in self._expanded_parameters[key] ) else: dbtype = inputsizes.get(bindparam, None) - generic_inputsizes.append((key, dbtype, bindparam.type)) + + escaped_name = ( + escaped_bind_names.get(key, key) + if has_escaped_names + else key + ) + + generic_inputsizes.append( + (escaped_name, dbtype, bindparam.type) + ) try: self.dialect.do_set_input_sizes( self.cursor, generic_inputsizes, self diff --git a/lib/sqlalchemy/sql/compiler.py b/lib/sqlalchemy/sql/compiler.py index 23cd778d04..8718e15ea9 100644 --- a/lib/sqlalchemy/sql/compiler.py +++ b/lib/sqlalchemy/sql/compiler.py @@ -663,6 +663,12 @@ class SQLCompiler(Compiled): """ + escaped_bind_names = util.EMPTY_DICT + """Late escaping of bound parameter names that has to be converted + to the original name when looking in the parameter dictionary. + + """ + has_out_parameters = False """if True, there are bindparam() objects that have the isoutparam flag set.""" @@ -879,6 +885,8 @@ class SQLCompiler(Compiled): ): """return a dictionary of bind parameter keys and values""" + has_escaped_names = bool(self.escaped_bind_names) + if extracted_parameters: # related the bound parameters collected in the original cache key # to those collected in the incoming cache key. They will not have @@ -908,10 +916,16 @@ class SQLCompiler(Compiled): if params: pd = {} for bindparam, name in self.bind_names.items(): + escaped_name = ( + self.escaped_bind_names.get(name, name) + if has_escaped_names + else name + ) + if bindparam.key in params: - pd[name] = params[bindparam.key] + pd[escaped_name] = params[bindparam.key] elif name in params: - pd[name] = params[name] + pd[escaped_name] = params[name] elif _check and bindparam.required: if _group_number: @@ -936,13 +950,19 @@ class SQLCompiler(Compiled): value_param = bindparam if bindparam.callable: - pd[name] = value_param.effective_value + pd[escaped_name] = value_param.effective_value else: - pd[name] = value_param.value + pd[escaped_name] = value_param.value return pd else: pd = {} for bindparam, name in self.bind_names.items(): + escaped_name = ( + self.escaped_bind_names.get(name, name) + if has_escaped_names + else name + ) + if _check and bindparam.required: if _group_number: raise exc.InvalidRequestError( @@ -964,9 +984,9 @@ class SQLCompiler(Compiled): value_param = bindparam if bindparam.callable: - pd[name] = value_param.effective_value + pd[escaped_name] = value_param.effective_value else: - pd[name] = value_param.value + pd[escaped_name] = value_param.value return pd @util.memoized_instancemethod @@ -2316,6 +2336,7 @@ class SQLCompiler(Compiled): positional_names=None, post_compile=False, expanding=False, + escaped_from=None, **kw ): if self.positional: @@ -2323,6 +2344,11 @@ class SQLCompiler(Compiled): positional_names.append(name) else: self.positiontup.append(name) + + if escaped_from: + if not self.escaped_bind_names: + self.escaped_bind_names = {} + self.escaped_bind_names[escaped_from] = name if post_compile: return "[POSTCOMPILE_%s]" % name else: diff --git a/lib/sqlalchemy/sql/elements.py b/lib/sqlalchemy/sql/elements.py index 5fb28f1d1a..00e28ac207 100644 --- a/lib/sqlalchemy/sql/elements.py +++ b/lib/sqlalchemy/sql/elements.py @@ -957,9 +957,11 @@ class ColumnElement( # as the identifier, because a column and its annotated version are # the same thing in a SQL statement if isinstance(seed, _anonymous_label): - return _anonymous_label("%s%%(%d %s)s" % (seed, hash(self), "")) + return _anonymous_label.safe_construct( + hash(self), "", enclosing_label=seed + ) - return _anonymous_label("%%(%d %s)s" % (hash(self), seed or "anon")) + return _anonymous_label.safe_construct(hash(self), seed or "anon") @util.memoized_property def anon_label(self): @@ -1324,21 +1326,17 @@ class BindParameter(roles.InElementRole, ColumnElement): key = quoted_name(key, quote) if unique: - self.key = _anonymous_label( - "%%(%d %s)s" - % ( - id(self), - re.sub(r"[%\(\) \$]+", "_", key).strip("_") - if key is not None - and not isinstance(key, _anonymous_label) - else "param", - ) + self.key = _anonymous_label.safe_construct( + id(self), + re.sub(r"[%\(\) \$]+", "_", key).strip("_") + if key is not None and not isinstance(key, _anonymous_label) + else "param", ) self._key_is_anon = True elif key: self.key = key else: - self.key = _anonymous_label("%%(%d param)s" % id(self)) + self.key = _anonymous_label.safe_construct(id(self), "param") self._key_is_anon = True # identifying key that won't change across @@ -1407,8 +1405,8 @@ class BindParameter(roles.InElementRole, ColumnElement): def _clone(self, maintain_key=False): c = ClauseElement._clone(self) if not maintain_key and self.unique: - c.key = _anonymous_label( - "%%(%d %s)s" % (id(c), c._orig_key or "param") + c.key = _anonymous_label.safe_construct( + id(c), c._orig_key or "param" ) return c @@ -1442,8 +1440,8 @@ class BindParameter(roles.InElementRole, ColumnElement): def _convert_to_unique(self): if not self.unique: self.unique = True - self.key = _anonymous_label( - "%%(%d %s)s" % (id(self), self._orig_key or "param") + self.key = _anonymous_label.safe_construct( + id(self), self._orig_key or "param" ) def __getstate__(self): @@ -1459,8 +1457,8 @@ class BindParameter(roles.InElementRole, ColumnElement): def __setstate__(self, state): if state.get("unique", False): - state["key"] = _anonymous_label( - "%%(%d %s)s" % (id(self), state.get("_orig_key", "param")) + state["key"] = _anonymous_label.safe_construct( + id(self), state.get("_orig_key", "param") ) self.__dict__.update(state) @@ -4188,8 +4186,8 @@ class Label(roles.LabeledColumnExprRole, ColumnElement): self.name = name self._resolve_label = self.name else: - self.name = _anonymous_label( - "%%(%d %s)s" % (id(self), getattr(element, "name", "anon")) + self.name = _anonymous_label.safe_construct( + id(self), getattr(element, "name", "anon") ) self.key = self._label = self._key_label = self.name @@ -4247,9 +4245,8 @@ class Label(roles.LabeledColumnExprRole, ColumnElement): def _copy_internals(self, clone=_clone, anonymize_labels=False, **kw): self._element = clone(self._element, **kw) if anonymize_labels: - self.name = self._resolve_label = _anonymous_label( - "%%(%d %s)s" - % (id(self), getattr(self.element, "name", "anon")) + self.name = self._resolve_label = _anonymous_label.safe_construct( + id(self), getattr(self.element, "name", "anon") ) self.key = self._label = self._key_label = self.name @@ -4890,17 +4887,39 @@ class _anonymous_label(_truncated_label): __slots__ = () + @classmethod + def safe_construct(cls, seed, body, enclosing_label=None): + # type: (int, str, Optional[_anonymous_label]) -> _anonymous_label + + label = "%%(%d %s)s" % (seed, body.replace("%", "%%")) + if enclosing_label: + label = "%s%s" % (enclosing_label, label) + + return _anonymous_label(label) + def __add__(self, other): + if "%" in other and not isinstance(other, _anonymous_label): + other = util.text_type(other).replace("%", "%%") + else: + other = util.text_type(other) + return _anonymous_label( quoted_name( - util.text_type.__add__(self, util.text_type(other)), self.quote + util.text_type.__add__(self, other), + self.quote, ) ) def __radd__(self, other): + if "%" in other and not isinstance(other, _anonymous_label): + other = util.text_type(other).replace("%", "%%") + else: + other = util.text_type(other) + return _anonymous_label( quoted_name( - util.text_type.__add__(util.text_type(other), self), self.quote + util.text_type.__add__(other, self), + self.quote, ) ) diff --git a/lib/sqlalchemy/sql/selectable.py b/lib/sqlalchemy/sql/selectable.py index 0e88a89991..fd88324007 100644 --- a/lib/sqlalchemy/sql/selectable.py +++ b/lib/sqlalchemy/sql/selectable.py @@ -1432,7 +1432,7 @@ class AliasedReturnsRows(NoInit, FromClause): name = getattr(selectable, "name", None) if isinstance(name, _anonymous_label): name = None - name = _anonymous_label("%%(%d %s)s" % (id(self), name or "anon")) + name = _anonymous_label.safe_construct(id(self), name or "anon") self.name = name def _refresh_for_new_column(self, column): diff --git a/test/requirements.py b/test/requirements.py index 355f8910e3..4486973f61 100644 --- a/test/requirements.py +++ b/test/requirements.py @@ -1343,26 +1343,7 @@ class DefaultRequirements(SuiteRequirements): @property def percent_schema_names(self): - return skip_if( - [ - ( - "+psycopg2", - None, - None, - "psycopg2 2.4 no longer accepts percent " - "sign in bind placeholders", - ), - ( - "+psycopg2cffi", - None, - None, - "psycopg2cffi does not accept percent signs in " - "bind placeholders", - ), - ("mysql", None, None, "executemany() doesn't work here"), - ("mariadb", None, None, "executemany() doesn't work here"), - ] - ) + return exclusions.open() @property def order_by_label_with_expression(self): diff --git a/test/sql/test_compiler.py b/test/sql/test_compiler.py index ffabf9379d..ef2f75b2d6 100644 --- a/test/sql/test_compiler.py +++ b/test/sql/test_compiler.py @@ -941,6 +941,24 @@ class SelectTest(fixtures.TestBase, AssertsCompiledSQL): "AS z FROM keyed) AS anon_2) AS anon_1", ) + @testing.combinations("per cent", "per % cent", "%percent") + def test_percent_names_collide_with_anonymizing(self, name): + table1 = table("t1", column(name)) + + jj = select(table1.c[name]).subquery() + jjj = join(table1, jj, table1.c[name] == jj.c[name]) + + j2 = jjj.select().apply_labels().subquery("foo") + + self.assert_compile( + j2.select(), + 'SELECT foo."t1_%(name)s", foo."anon_1_%(name)s" FROM ' + '(SELECT t1."%(name)s" AS "t1_%(name)s", anon_1."%(name)s" ' + 'AS "anon_1_%(name)s" FROM t1 JOIN (SELECT t1."%(name)s" AS ' + '"%(name)s" FROM t1) AS anon_1 ON t1."%(name)s" = ' + 'anon_1."%(name)s") AS foo' % {"name": name}, + ) + def test_exists(self): s = select(table1.c.myid).where(table1.c.myid == 5) diff --git a/test/sql/test_selectable.py b/test/sql/test_selectable.py index b98fbd3d07..c75e8886de 100644 --- a/test/sql/test_selectable.py +++ b/test/sql/test_selectable.py @@ -740,7 +740,7 @@ class SelectableTest( assert u2.corresponding_column(s1.selected_columns.col1) is u2.c.col1 assert u2.corresponding_column(s2.selected_columns.col1) is u2.c.col1 - def test_foo(self): + def test_union_alias_misc(self): s1 = select(table1.c.col1, table1.c.col2) s2 = select(table1.c.col2, table1.c.col1) -- 2.47.2