From: Mike Bayer Date: Wed, 5 Nov 2008 20:50:48 +0000 (+0000) Subject: - Dialects can now generate label names of adjustable length. X-Git-Tag: rel_0_5rc3~10 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=9f894d2f265bb5fd03ab0b3aa3fd164108c99259;p=thirdparty%2Fsqlalchemy%2Fsqlalchemy.git - Dialects can now generate label names of adjustable length. Pass in the argument "label_length=" to create_engine() to adjust how many characters max will be present in dynamically generated column labels, i.e. "somecolumn AS somelabel". Any value less than 6 will result in a label of minimal size, consiting of an underscore and a numeric counter. The compiler uses the value of dialect.max_identifier_length as a default. [ticket:1211] - removed ANON_NAME regular expression, using string patterns now - _generated_label() unicode subclass is used to indicate generated names which are subject to truncation --- diff --git a/CHANGES b/CHANGES index 2b61127a51..d43ec9c177 100644 --- a/CHANGES +++ b/CHANGES @@ -62,7 +62,16 @@ CHANGES to be based solely on presence of cursor.description. All the regexp-based guessing about statements returning rows has been removed [ticket:1212]. - + + - Dialects can now generate label names of adjustable length. + Pass in the argument "label_length=" to create_engine() + to adjust how many characters max will be present in dynamically + generated column labels, i.e. "somecolumn AS somelabel". Any + value less than 6 will result in a label of minimal size, + consiting of an underscore and a numeric counter. + The compiler uses the value of dialect.max_identifier_length + as a default. [ticket:1211] + - Further simplified SELECT compilation and its relationship to result row processing. diff --git a/doc/build/content/dbengine.txt b/doc/build/content/dbengine.txt index 492e6df4bc..c790685307 100644 --- a/doc/build/content/dbengine.txt +++ b/doc/build/content/dbengine.txt @@ -139,6 +139,7 @@ A list of all standard options, as well as several that are used by particular d * **echo=False** - if True, the Engine will log all statements as well as a repr() of their parameter lists to the engines logger, which defaults to sys.stdout. The `echo` attribute of `Engine` can be modified at any time to turn logging on and off. If set to the string `"debug"`, result rows will be printed to the standard output as well. This flag ultimately controls a Python logger; see [dbengine_logging](rel:dbengine_logging) at the end of this chapter for information on how to configure logging directly. * **echo_pool=False** - if True, the connection pool will log all checkouts/checkins to the logging stream, which defaults to sys.stdout. This flag ultimately controls a Python logger; see [dbengine_logging](rel:dbengine_logging) for information on how to configure logging directly. * **encoding='utf-8'** - the encoding to use for all Unicode translations, both by engine-wide unicode conversion as well as the `Unicode` type object. +* **label_length=None** - optional integer value which limits the size of dynamically generated column labels to that many characters. If less than 6, labels are generated as "_". If `None`, the value of `dialect.max_identifier_length` is used instead. * **module=None** - used by database implementations which support multiple DBAPI modules, this is a reference to a DBAPI2 module to be used instead of the engine's default module. For Postgres, the default is psycopg2. For Oracle, it's cx_Oracle. * **pool=None** - an already-constructed instance of `sqlalchemy.pool.Pool`, such as a `QueuePool` instance. If non-None, this pool will be used directly as the underlying connection pool for the engine, bypassing whatever connection parameters are present in the URL argument. For information on constructing connection pools manually, see [pooling](rel:pooling). * **poolclass=None** - a `sqlalchemy.pool.Pool` subclass, which will be used to create a connection pool instance using the connection parameters given in the URL. Note this differs from `pool` in that you don't actually instantiate the pool in this case, you just indicate what type of pool to be used. diff --git a/lib/sqlalchemy/engine/default.py b/lib/sqlalchemy/engine/default.py index f99cac4659..ec15313e42 100644 --- a/lib/sqlalchemy/engine/default.py +++ b/lib/sqlalchemy/engine/default.py @@ -40,7 +40,7 @@ class DefaultDialect(base.Dialect): supports_default_values = False supports_empty_insert = True - def __init__(self, convert_unicode=False, assert_unicode=False, encoding='utf-8', paramstyle=None, dbapi=None, **kwargs): + def __init__(self, convert_unicode=False, assert_unicode=False, encoding='utf-8', paramstyle=None, dbapi=None, label_length=None, **kwargs): self.convert_unicode = convert_unicode self.assert_unicode = assert_unicode self.encoding = encoding @@ -55,6 +55,9 @@ class DefaultDialect(base.Dialect): self.paramstyle = self.default_paramstyle self.positional = self.paramstyle in ('qmark', 'format', 'numeric') self.identifier_preparer = self.preparer(self) + if label_length and label_length > self.max_identifier_length: + raise exc.ArgumentError("Label length of %d is greater than this dialect's maximum identifier length of %d" % (label_length, self.max_identifier_length)) + self.label_length = label_length def create_execution_context(self, connection, **kwargs): return DefaultExecutionContext(self, connection, **kwargs) diff --git a/lib/sqlalchemy/orm/query.py b/lib/sqlalchemy/orm/query.py index cad35972d3..ec378d9c6d 100644 --- a/lib/sqlalchemy/orm/query.py +++ b/lib/sqlalchemy/orm/query.py @@ -1210,7 +1210,8 @@ class Query(object): try: params[_get_params[primary_key].key] = ident[i] except IndexError: - raise sa_exc.InvalidRequestError("Could not find enough values to formulate primary key for query.get(); primary key columns are %s" % ', '.join("'%s'" % str(c) for c in q.mapper.primary_key)) + raise sa_exc.InvalidRequestError("Could not find enough values to formulate primary key for " + "query.get(); primary key columns are %s" % ', '.join("'%s'" % c for c in q.mapper.primary_key)) q._params = params if lockmode is not None: diff --git a/lib/sqlalchemy/orm/shard.py b/lib/sqlalchemy/orm/shard.py index b4525d8fbf..395d87dbfe 100644 --- a/lib/sqlalchemy/orm/shard.py +++ b/lib/sqlalchemy/orm/shard.py @@ -93,7 +93,7 @@ class ShardedQuery(Query): def _execute_and_instances(self, context): if self._shard_id is not None: - result = self.session.connection(mapper=self._mapper_zero(), shard_id=self._shard_id).execute(context.statement, **self._params) + result = self.session.connection(mapper=self._mapper_zero(), shard_id=self._shard_id).execute(context.statement, self._params) try: return iter(self.instances(result, context)) finally: @@ -101,7 +101,7 @@ class ShardedQuery(Query): else: partial = [] for shard_id in self.query_chooser(self): - result = self.session.connection(mapper=self._mapper_zero(), shard_id=shard_id).execute(context.statement, **self._params) + result = self.session.connection(mapper=self._mapper_zero(), shard_id=shard_id).execute(context.statement, self._params) try: partial = partial + list(self.instances(result, context)) finally: diff --git a/lib/sqlalchemy/sql/compiler.py b/lib/sqlalchemy/sql/compiler.py index 63557f24b5..1c7c66f477 100644 --- a/lib/sqlalchemy/sql/compiler.py +++ b/lib/sqlalchemy/sql/compiler.py @@ -47,7 +47,6 @@ ILLEGAL_INITIAL_CHARACTERS = re.compile(r'[0-9$]') BIND_PARAMS = re.compile(r'(? self.dialect.max_identifier_length: - counter = self.generated_ids.get(ident_class, 1) - truncname = anonname[0:self.dialect.max_identifier_length - 6] + "_" + hex(counter)[2:] - self.generated_ids[ident_class] = counter + 1 + if len(anonname) > self.label_length: + counter = self.truncated_names.get(ident_class, 1) + truncname = anonname[0:max(self.label_length - 6, 0)] + "_" + hex(counter)[2:] + self.truncated_names[ident_class] = counter + 1 else: truncname = anonname - self.generated_ids[(ident_class, name)] = truncname + self.truncated_names[(ident_class, name)] = truncname return truncname - def _process_anon(self, match): - (ident, derived) = match.group(1, 2) - - key = ('anonymous', ident) - if key in self.generated_ids: - return self.generated_ids[key] - else: - anonymous_counter = self.generated_ids.get(('anon_counter', derived), 1) - newname = derived + "_" + str(anonymous_counter) - self.generated_ids[('anon_counter', derived)] = anonymous_counter + 1 - self.generated_ids[key] = newname - return newname - def _anonymize(self, name): - return ANONYMOUS_LABEL.sub(self._process_anon, name) + return name % self.anon_map + + def _process_anon(self, key): + (ident, derived) = key.split(' ') + + anonymous_counter = self.anon_map.get(derived, 1) + self.anon_map[derived] = anonymous_counter + 1 + return derived + "_" + str(anonymous_counter) def bindparam_string(self, name): if self.positional: @@ -438,7 +439,7 @@ class DefaultCompiler(engine.Compiled): def visit_alias(self, alias, asfrom=False, **kwargs): if asfrom: - return self.process(alias.original, asfrom=True, **kwargs) + " AS " + self.preparer.format_alias(alias, self._anonymize(alias.name)) + return self.process(alias.original, asfrom=True, **kwargs) + " AS " + self.preparer.format_alias(alias, alias.name % self.anon_map) else: return self.process(alias.original, **kwargs) @@ -457,7 +458,7 @@ class DefaultCompiler(engine.Compiled): not column.is_literal and \ column.table is not None and \ not isinstance(column.table, sql.Select): - return _CompileLabel(column, column.name) + return _CompileLabel(column, sql._generated_label(column.name)) elif not isinstance(column, (sql._UnaryExpression, sql._TextClause, sql._BindParamClause)) and (not hasattr(column, 'name') or isinstance(column, sql._Function)): return _CompileLabel(column, column.anon_label) else: diff --git a/lib/sqlalchemy/sql/expression.py b/lib/sqlalchemy/sql/expression.py index 3b996d6cba..85f229ba0d 100644 --- a/lib/sqlalchemy/sql/expression.py +++ b/lib/sqlalchemy/sql/expression.py @@ -869,6 +869,9 @@ func = _FunctionGenerator() # TODO: use UnaryExpression for this instead ? modifier = _FunctionGenerator(group=False) +class _generated_label(unicode): + """A unicode subclass used to identify dynamically generated names.""" + def _clone(element): return element._clone() @@ -1607,7 +1610,7 @@ class ColumnElement(ClauseElement, _CompareMixin): expressions and function calls. """ - return "{ANON %d %s}" % (id(self), getattr(self, 'name', 'anon')) + return _generated_label("%%(%d %s)s" % (id(self), getattr(self, 'name', 'anon'))) class ColumnCollection(util.OrderedProperties): """An ordered dictionary that stores a list of ColumnElement @@ -1908,9 +1911,9 @@ class _BindParamClause(ColumnElement): """ if unique: - self.key = "{ANON %d %s}" % (id(self), key or 'param') + self.key = _generated_label("%%(%d %s)s" % (id(self), key or 'param')) else: - self.key = key or "{ANON %d param}" % id(self) + self.key = key or _generated_label("%%(%d param)s" % id(self)) self._orig_key = key or 'param' self.unique = unique self.value = value @@ -1927,13 +1930,13 @@ class _BindParamClause(ColumnElement): def _clone(self): c = ClauseElement._clone(self) if self.unique: - c.key = "{ANON %d %s}" % (id(c), c._orig_key or 'param') + c.key = _generated_label("%%(%d %s)s" % (id(c), c._orig_key or 'param')) return c def _convert_to_unique(self): if not self.unique: self.unique = True - self.key = "{ANON %d %s}" % (id(self), self._orig_key or 'param') + self.key = _generated_label("%%(%d %s)s" % (id(self), self._orig_key or 'param')) def _get_from_objects(self, **modifiers): return [] @@ -2518,7 +2521,7 @@ class Alias(FromClause): if alias is None: if self.original.named_with_column: alias = getattr(self.original, 'name', None) - alias = '{ANON %d %s}' % (id(self), alias or 'anon') + alias = _generated_label('%%(%d %s)s' % (id(self), alias or 'anon')) self.name = alias @property @@ -2637,7 +2640,7 @@ class _Label(ColumnElement): def __init__(self, name, element, type_=None): while isinstance(element, _Label): element = element.element - self.name = self.key = self._label = name or "{ANON %d %s}" % (id(self), getattr(element, 'name', 'anon')) + self.name = self.key = self._label = name or _generated_label("%%(%d %s)s" % (id(self), getattr(element, 'name', 'anon'))) self._element = element self._type = type_ self.quote = element.quote @@ -2736,7 +2739,7 @@ class _ColumnClause(_Immutable, ColumnElement): _label = label + "_" + str(counter) counter += 1 label = _label - return label + return _generated_label(label) else: return self.name diff --git a/test/profiling/compiler.py b/test/profiling/compiler.py index 278d328276..5be8c0f8e2 100644 --- a/test/profiling/compiler.py +++ b/test/profiling/compiler.py @@ -15,15 +15,15 @@ class CompileTest(TestBase, AssertsExecutionResults): Column('c1', Integer, primary_key=True), Column('c2', String(30))) - @profiling.function_call_count(72, {'2.4': 42}) + @profiling.function_call_count(68, {'2.4': 42}) def test_insert(self): t1.insert().compile() - @profiling.function_call_count(70, {'2.4': 45}) + @profiling.function_call_count(68, {'2.4': 45}) def test_update(self): t1.update().compile() - @profiling.function_call_count(202, versions={'2.4':133}) + @profiling.function_call_count(195, versions={'2.4':133}) def test_select(self): s = select([t1], t1.c.c2==t2.c.c1) s.compile() diff --git a/test/profiling/zoomark.py b/test/profiling/zoomark.py index eb21c141e7..08215454a9 100644 --- a/test/profiling/zoomark.py +++ b/test/profiling/zoomark.py @@ -332,11 +332,11 @@ class ZooMarkTest(TestBase): def test_profile_2_insert(self): self.test_baseline_2_insert() - @profiling.function_call_count(4178, {'2.4': 2557}) + @profiling.function_call_count(3858, {'2.4': 2557}) def test_profile_3_properties(self): self.test_baseline_3_properties() - @profiling.function_call_count(15869, {'2.4': 10549}) + @profiling.function_call_count(14752, {'2.4': 10549}) def test_profile_4_expressions(self): self.test_baseline_4_expressions() @@ -344,11 +344,11 @@ class ZooMarkTest(TestBase): def test_profile_5_aggregates(self): self.test_baseline_5_aggregates() - @profiling.function_call_count(2054, {'2.4': 1256}) + @profiling.function_call_count(1904, {'2.4': 1256}) def test_profile_6_editing(self): self.test_baseline_6_editing() - @profiling.function_call_count(3276, {'2.4': 2198}) + @profiling.function_call_count(3110, {'2.4': 2198}) def test_profile_7_multiview(self): self.test_baseline_7_multiview() diff --git a/test/profiling/zoomark_orm.py b/test/profiling/zoomark_orm.py index 784bb35a28..3fff96e1ab 100644 --- a/test/profiling/zoomark_orm.py +++ b/test/profiling/zoomark_orm.py @@ -298,11 +298,11 @@ class ZooMarkTest(TestBase): def test_profile_2_insert(self): self.test_baseline_2_insert() - @profiling.function_call_count(7305) + @profiling.function_call_count(6765) def test_profile_3_properties(self): self.test_baseline_3_properties() - @profiling.function_call_count(25760) + @profiling.function_call_count(23957) def test_profile_4_expressions(self): self.test_baseline_4_expressions() diff --git a/test/sql/labels.py b/test/sql/labels.py index a0f49f3f12..5a620be8c8 100644 --- a/test/sql/labels.py +++ b/test/sql/labels.py @@ -110,14 +110,6 @@ class LongLabelsTest(TestBase, AssertsCompiledSQL): @testing.requires.subqueries def test_subquery(self): - # this is the test that fails if the "max identifier length" is - # shorter than the length of the actual columns created, because the - # column names get truncated. if you try to separate "physical - # columns" from "labels", and only truncate the labels, the - # compiler.DefaultCompiler.visit_select() logic which auto-labels - # columns in a subquery (for the purposes of sqlite compat) breaks the - # code, since it is creating "labels" on the fly but not affecting - # derived columns, which think they are still "physical" q = table1.select(table1.c.this_is_the_primarykey_column == 4).alias('foo') x = select([q]) print x.execute().fetchall() @@ -137,5 +129,34 @@ class LongLabelsTest(TestBase, AssertsCompiledSQL): print x.execute().fetchall() + def test_adjustable(self): + + q = table1.select(table1.c.this_is_the_primarykey_column == 4).alias('foo') + x = select([q]) + + compile_dialect = default.DefaultDialect(label_length=10) + self.assert_compile(x, "SELECT foo.this_is_the_primarykey_column, foo.this_is_the_data_column FROM " + "(SELECT some_large_named_table.this_is_the_primarykey_column AS this_1, some_large_named_table.this_is_the_data_column " + "AS this_2 FROM some_large_named_table WHERE some_large_named_table.this_is_the_primarykey_column = :this_1) AS foo", dialect=compile_dialect) + + compile_dialect = default.DefaultDialect(label_length=4) + self.assert_compile(x, "SELECT foo.this_is_the_primarykey_column, foo.this_is_the_data_column FROM " + "(SELECT some_large_named_table.this_is_the_primarykey_column AS _1, some_large_named_table.this_is_the_data_column AS _2 " + "FROM some_large_named_table WHERE some_large_named_table.this_is_the_primarykey_column = :_1) AS foo", dialect=compile_dialect) + + q = table1.select(table1.c.this_is_the_primarykey_column == 4).alias() + x = select([q], use_labels=True) + + compile_dialect = default.DefaultDialect(label_length=10) + self.assert_compile(x, "SELECT anon_1.this_is_the_primarykey_column AS anon_1, anon_1.this_is_the_data_column AS anon_2 FROM " + "(SELECT some_large_named_table.this_is_the_primarykey_column AS this_3, some_large_named_table.this_is_the_data_column AS this_4 " + "FROM some_large_named_table WHERE some_large_named_table.this_is_the_primarykey_column = :this_1) AS anon_1", dialect=compile_dialect) + + compile_dialect = default.DefaultDialect(label_length=4) + self.assert_compile(x, "SELECT anon_1.this_is_the_primarykey_column AS _1, anon_1.this_is_the_data_column AS _2 FROM " + "(SELECT some_large_named_table.this_is_the_primarykey_column AS _3, some_large_named_table.this_is_the_data_column AS _4 " + "FROM some_large_named_table WHERE some_large_named_table.this_is_the_primarykey_column = :_1) AS anon_1", dialect=compile_dialect) + + if __name__ == '__main__': testenv.main()