From: Mike Bayer Date: Wed, 17 Dec 2014 22:24:23 +0000 (-0500) Subject: - squash-merge the improve_toc branch, which moves all the Sphinx styling X-Git-Tag: rel_0_9_9~53 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=5b056d41c06484c16dc7ac50fea63a7a8d192ada;p=thirdparty%2Fsqlalchemy%2Fsqlalchemy.git - squash-merge the improve_toc branch, which moves all the Sphinx styling and extensions into an external library, and also reorganizes most large documentation pages into many small areas to reduce scrolling and better present the context into a more fine-grained hierarchy. (cherry picked from commit 9561321d0328df270c4ff0360dc7a035db627949) - manually port previous differences between 0.9 and 1.0 docs to the cherry-pick. --- diff --git a/doc/build/builder/__init__.py b/doc/build/builder/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/doc/build/builder/autodoc_mods.py b/doc/build/builder/autodoc_mods.py deleted file mode 100644 index 5a6e991bd5..0000000000 --- a/doc/build/builder/autodoc_mods.py +++ /dev/null @@ -1,102 +0,0 @@ -import re - -def autodoc_skip_member(app, what, name, obj, skip, options): - if what == 'class' and skip and \ - name in ('__init__', '__eq__', '__ne__', '__lt__', - '__le__', '__call__') and \ - obj.__doc__: - return False - else: - return skip - - -_convert_modname = { - "sqlalchemy.sql.sqltypes": "sqlalchemy.types", - "sqlalchemy.sql.type_api": "sqlalchemy.types", - "sqlalchemy.sql.schema": "sqlalchemy.schema", - "sqlalchemy.sql.elements": "sqlalchemy.sql.expression", - "sqlalchemy.sql.selectable": "sqlalchemy.sql.expression", - "sqlalchemy.sql.dml": "sqlalchemy.sql.expression", - "sqlalchemy.sql.ddl": "sqlalchemy.schema", - "sqlalchemy.sql.base": "sqlalchemy.sql.expression" -} - -_convert_modname_w_class = { - ("sqlalchemy.engine.interfaces", "Connectable"): "sqlalchemy.engine", - ("sqlalchemy.sql.base", "DialectKWArgs"): "sqlalchemy.sql.base", -} - -def _adjust_rendered_mod_name(modname, objname): - if (modname, objname) in _convert_modname_w_class: - return _convert_modname_w_class[(modname, objname)] - elif modname in _convert_modname: - return _convert_modname[modname] - else: - return modname - -# im sure this is in the app somewhere, but I don't really -# know where, so we're doing it here. -_track_autodoced = {} -_inherited_names = set() -def autodoc_process_docstring(app, what, name, obj, options, lines): - if what == "class": - _track_autodoced[name] = obj - - # need to translate module names for bases, others - # as we document lots of symbols in namespace modules - # outside of their source - bases = [] - for base in obj.__bases__: - if base is not object: - bases.append(":class:`%s.%s`" % ( - _adjust_rendered_mod_name(base.__module__, base.__name__), - base.__name__)) - - if bases: - lines[:0] = [ - "Bases: %s" % (", ".join(bases)), - "" - ] - - - elif what in ("attribute", "method") and \ - options.get("inherited-members"): - m = re.match(r'(.*?)\.([\w_]+)$', name) - if m: - clsname, attrname = m.group(1, 2) - if clsname in _track_autodoced: - cls = _track_autodoced[clsname] - for supercls in cls.__mro__: - if attrname in supercls.__dict__: - break - if supercls is not cls: - _inherited_names.add("%s.%s" % (supercls.__module__, supercls.__name__)) - _inherited_names.add("%s.%s.%s" % (supercls.__module__, supercls.__name__, attrname)) - lines[:0] = [ - ".. container:: inherited_member", - "", - " *inherited from the* :%s:`~%s.%s.%s` *%s of* :class:`~%s.%s`" % ( - "attr" if what == "attribute" - else "meth", - _adjust_rendered_mod_name(supercls.__module__, supercls.__name__), - supercls.__name__, - attrname, - what, - _adjust_rendered_mod_name(supercls.__module__, supercls.__name__), - supercls.__name__ - ), - "" - ] - -def missing_reference(app, env, node, contnode): - if node.attributes['reftarget'] in _inherited_names: - return node.children[0] - else: - return None - - -def setup(app): - app.connect('autodoc-skip-member', autodoc_skip_member) - app.connect('autodoc-process-docstring', autodoc_process_docstring) - - app.connect('missing-reference', missing_reference) diff --git a/doc/build/builder/dialect_info.py b/doc/build/builder/dialect_info.py deleted file mode 100644 index 48626393d2..0000000000 --- a/doc/build/builder/dialect_info.py +++ /dev/null @@ -1,175 +0,0 @@ -import re -from sphinx.util.compat import Directive -from docutils import nodes - -class DialectDirective(Directive): - has_content = True - - _dialects = {} - - def _parse_content(self): - d = {} - d['default'] = self.content[0] - d['text'] = [] - idx = 0 - for line in self.content[1:]: - idx += 1 - m = re.match(r'\:(.+?)\: +(.+)', line) - if m: - attrname, value = m.group(1, 2) - d[attrname] = value - else: - break - d["text"] = self.content[idx + 1:] - return d - - def _dbapi_node(self): - - dialect_name, dbapi_name = self.dialect_name.split("+") - - try: - dialect_directive = self._dialects[dialect_name] - except KeyError: - raise Exception("No .. dialect:: %s directive has been established" - % dialect_name) - - output = [] - - content = self._parse_content() - - parent_section_ref = self.state.parent.children[0]['ids'][0] - self._append_dbapi_bullet(dialect_name, dbapi_name, - content['name'], parent_section_ref) - - p = nodes.paragraph('', '', - nodes.Text( - "Support for the %s database via the %s driver." % ( - dialect_directive.database_name, - content['name'] - ), - "Support for the %s database via the %s driver." % ( - dialect_directive.database_name, - content['name'] - ) - ), - ) - - self.state.nested_parse(content['text'], 0, p) - output.append(p) - - if "url" in content or "driverurl" in content: - sec = nodes.section( - '', - nodes.title("DBAPI", "DBAPI"), - ids=["dialect-%s-%s-url" % (dialect_name, dbapi_name)] - ) - if "url" in content: - text = "Documentation and download information (if applicable) "\ - "for %s is available at:\n" % content["name"] - uri = content['url'] - sec.append( - nodes.paragraph('', '', - nodes.Text(text, text), - nodes.reference('', '', - nodes.Text(uri, uri), - refuri=uri, - ) - ) - ) - if "driverurl" in content: - text = "Drivers for this database are available at:\n" - sec.append( - nodes.paragraph('', '', - nodes.Text(text, text), - nodes.reference('', '', - nodes.Text(content['driverurl'], content['driverurl']), - refuri=content['driverurl'] - ) - ) - ) - output.append(sec) - - - if "connectstring" in content: - sec = nodes.section( - '', - nodes.title("Connecting", "Connecting"), - nodes.paragraph('', '', - nodes.Text("Connect String:", "Connect String:"), - nodes.literal_block(content['connectstring'], - content['connectstring']) - ), - ids=["dialect-%s-%s-connect" % (dialect_name, dbapi_name)] - ) - output.append(sec) - - return output - - def _dialect_node(self): - self._dialects[self.dialect_name] = self - - content = self._parse_content() - self.database_name = content['name'] - - self.bullets = nodes.bullet_list() - text = "The following dialect/DBAPI options are available. "\ - "Please refer to individual DBAPI sections for connect information." - sec = nodes.section('', - nodes.paragraph('', '', - nodes.Text( - "Support for the %s database." % content['name'], - "Support for the %s database." % content['name'] - ), - ), - nodes.title("DBAPI Support", "DBAPI Support"), - nodes.paragraph('', '', - nodes.Text(text, text), - self.bullets - ), - ids=["dialect-%s" % self.dialect_name] - ) - - return [sec] - - def _append_dbapi_bullet(self, dialect_name, dbapi_name, name, idname): - env = self.state.document.settings.env - dialect_directive = self._dialects[dialect_name] - try: - relative_uri = env.app.builder.get_relative_uri(dialect_directive.docname, self.docname) - except: - relative_uri = "" - list_node = nodes.list_item('', - nodes.paragraph('', '', - nodes.reference('', '', - nodes.Text(name, name), - refdocname=self.docname, - refuri= relative_uri + "#" + idname - ), - #nodes.Text(" ", " "), - #nodes.reference('', '', - # nodes.Text("(connectstring)", "(connectstring)"), - # refdocname=self.docname, - # refuri=env.app.builder.get_relative_uri( - # dialect_directive.docname, self.docname) + - ## "#" + ("dialect-%s-%s-connect" % - # (dialect_name, dbapi_name)) - # ) - ) - ) - dialect_directive.bullets.append(list_node) - - def run(self): - env = self.state.document.settings.env - self.docname = env.docname - - self.dialect_name = dialect_name = self.content[0] - - has_dbapi = "+" in dialect_name - if has_dbapi: - return self._dbapi_node() - else: - return self._dialect_node() - -def setup(app): - app.add_directive('dialect', DialectDirective) - diff --git a/doc/build/builder/mako.py b/doc/build/builder/mako.py deleted file mode 100644 index 0367bf0186..0000000000 --- a/doc/build/builder/mako.py +++ /dev/null @@ -1,65 +0,0 @@ -from __future__ import absolute_import - -from sphinx.application import TemplateBridge -from sphinx.jinja2glue import BuiltinTemplateLoader -from mako.lookup import TemplateLookup -import os - -rtd = os.environ.get('READTHEDOCS', None) == 'True' - -class MakoBridge(TemplateBridge): - def init(self, builder, *args, **kw): - self.jinja2_fallback = BuiltinTemplateLoader() - self.jinja2_fallback.init(builder, *args, **kw) - - builder.config.html_context['release_date'] = builder.config['release_date'] - builder.config.html_context['site_base'] = builder.config['site_base'] - - self.lookup = TemplateLookup(directories=builder.config.templates_path, - #format_exceptions=True, - imports=[ - "from builder import util" - ] - ) - - if rtd: - # RTD layout, imported from sqlalchemy.org - import urllib2 - template = urllib2.urlopen(builder.config['site_base'] + "/docs_adapter.mako").read() - self.lookup.put_string("docs_adapter.mako", template) - - setup_ctx = urllib2.urlopen(builder.config['site_base'] + "/docs_adapter.py").read() - lcls = {} - exec(setup_ctx, lcls) - self.setup_ctx = lcls['setup_context'] - - def setup_ctx(self, context): - pass - - def render(self, template, context): - template = template.replace(".html", ".mako") - context['prevtopic'] = context.pop('prev', None) - context['nexttopic'] = context.pop('next', None) - - # local docs layout - context['rtd'] = False - context['toolbar'] = False - context['base'] = "static_base.mako" - - # override context attributes - self.setup_ctx(context) - - context.setdefault('_', lambda x: x) - return self.lookup.get_template(template).render_unicode(**context) - - def render_string(self, template, context): - # this is used for .js, .css etc. and we don't have - # local copies of that stuff here so use the jinja render. - return self.jinja2_fallback.render_string(template, context) - -def setup(app): - app.config['template_bridge'] = "builder.mako.MakoBridge" - app.add_config_value('release_date', "", 'env') - app.add_config_value('site_base', "", 'env') - app.add_config_value('build_number', "", 'env') - diff --git a/doc/build/builder/sqlformatter.py b/doc/build/builder/sqlformatter.py deleted file mode 100644 index 2d80749000..0000000000 --- a/doc/build/builder/sqlformatter.py +++ /dev/null @@ -1,132 +0,0 @@ -from pygments.lexer import RegexLexer, bygroups, using -from pygments.token import Token -from pygments.filter import Filter -from pygments.filter import apply_filters -from pygments.lexers import PythonLexer, PythonConsoleLexer -from sphinx.highlighting import PygmentsBridge -from pygments.formatters import HtmlFormatter, LatexFormatter - -import re - - -def _strip_trailing_whitespace(iter_): - buf = list(iter_) - if buf: - buf[-1] = (buf[-1][0], buf[-1][1].rstrip()) - for t, v in buf: - yield t, v - - -class StripDocTestFilter(Filter): - def filter(self, lexer, stream): - for ttype, value in stream: - if ttype is Token.Comment and re.match(r'#\s*doctest:', value): - continue - yield ttype, value - -class PyConWithSQLLexer(RegexLexer): - name = 'PyCon+SQL' - aliases = ['pycon+sql'] - - flags = re.IGNORECASE | re.DOTALL - - tokens = { - 'root': [ - (r'{sql}', Token.Sql.Link, 'sqlpopup'), - (r'{opensql}', Token.Sql.Open, 'opensqlpopup'), - (r'.*?\n', using(PythonConsoleLexer)) - ], - 'sqlpopup': [ - ( - r'(.*?\n)((?:PRAGMA|BEGIN|SELECT|INSERT|DELETE|ROLLBACK|' - 'COMMIT|ALTER|UPDATE|CREATE|DROP|PRAGMA' - '|DESCRIBE).*?(?:{stop}\n?|$))', - bygroups(using(PythonConsoleLexer), Token.Sql.Popup), - "#pop" - ) - ], - 'opensqlpopup': [ - ( - r'.*?(?:{stop}\n*|$)', - Token.Sql, - "#pop" - ) - ] - } - - -class PythonWithSQLLexer(RegexLexer): - name = 'Python+SQL' - aliases = ['pycon+sql'] - - flags = re.IGNORECASE | re.DOTALL - - tokens = { - 'root': [ - (r'{sql}', Token.Sql.Link, 'sqlpopup'), - (r'{opensql}', Token.Sql.Open, 'opensqlpopup'), - (r'.*?\n', using(PythonLexer)) - ], - 'sqlpopup': [ - ( - r'(.*?\n)((?:PRAGMA|BEGIN|SELECT|INSERT|DELETE|ROLLBACK' - '|COMMIT|ALTER|UPDATE|CREATE|DROP' - '|PRAGMA|DESCRIBE).*?(?:{stop}\n?|$))', - bygroups(using(PythonLexer), Token.Sql.Popup), - "#pop" - ) - ], - 'opensqlpopup': [ - ( - r'.*?(?:{stop}\n*|$)', - Token.Sql, - "#pop" - ) - ] - } - -class PopupSQLFormatter(HtmlFormatter): - def _format_lines(self, tokensource): - buf = [] - for ttype, value in apply_filters(tokensource, [StripDocTestFilter()]): - if ttype in Token.Sql: - for t, v in HtmlFormatter._format_lines(self, iter(buf)): - yield t, v - buf = [] - - if ttype is Token.Sql: - yield 1, "
%s
" % \ - re.sub(r'(?:[{stop}|\n]*)$', '', value) - elif ttype is Token.Sql.Link: - yield 1, "sql" - elif ttype is Token.Sql.Popup: - yield 1, "" % \ - re.sub(r'(?:[{stop}|\n]*)$', '', value) - else: - buf.append((ttype, value)) - - for t, v in _strip_trailing_whitespace( - HtmlFormatter._format_lines(self, iter(buf))): - yield t, v - -class PopupLatexFormatter(LatexFormatter): - def _filter_tokens(self, tokensource): - for ttype, value in apply_filters(tokensource, [StripDocTestFilter()]): - if ttype in Token.Sql: - if ttype is not Token.Sql.Link and ttype is not Token.Sql.Open: - yield Token.Literal, re.sub(r'{stop}', '', value) - else: - continue - else: - yield ttype, value - - def format(self, tokensource, outfile): - LatexFormatter.format(self, self._filter_tokens(tokensource), outfile) - -def setup(app): - app.add_lexer('pycon+sql', PyConWithSQLLexer()) - app.add_lexer('python+sql', PythonWithSQLLexer()) - - PygmentsBridge.html_formatter = PopupSQLFormatter - PygmentsBridge.latex_formatter = PopupLatexFormatter - diff --git a/doc/build/builder/util.py b/doc/build/builder/util.py deleted file mode 100644 index a9dcff001a..0000000000 --- a/doc/build/builder/util.py +++ /dev/null @@ -1,12 +0,0 @@ -import re - -def striptags(text): - return re.compile(r'<[^>]*>').sub('', text) - -def go(m): - # .html with no anchor if present, otherwise "#" for top of page - return m.group(1) or '#' - -def strip_toplevel_anchors(text): - return re.compile(r'(\.html)?#[-\w]+-toplevel').sub(go, text) - diff --git a/doc/build/builder/viewsource.py b/doc/build/builder/viewsource.py deleted file mode 100644 index 088cef2c22..0000000000 --- a/doc/build/builder/viewsource.py +++ /dev/null @@ -1,209 +0,0 @@ -from docutils import nodes -from sphinx.ext.viewcode import collect_pages -from sphinx.pycode import ModuleAnalyzer -import imp -from sphinx import addnodes -import re -from sphinx.util.compat import Directive -import os -from docutils.statemachine import StringList -from sphinx.environment import NoUri - -import sys - -py2k = sys.version_info < (3, 0) -if py2k: - text_type = unicode -else: - text_type = str - -def view_source(name, rawtext, text, lineno, inliner, - options={}, content=[]): - - env = inliner.document.settings.env - - node = _view_source_node(env, text, None) - return [node], [] - -def _view_source_node(env, text, state): - # pretend we're using viewcode fully, - # install the context it looks for - if not hasattr(env, '_viewcode_modules'): - env._viewcode_modules = {} - - modname = text - text = modname.split(".")[-1] + ".py" - - # imitate sphinx . syntax - if modname.startswith("."): - # see if the modname needs to be corrected in terms - # of current module context - base_module = env.temp_data.get('autodoc:module') - if base_module is None: - base_module = env.temp_data.get('py:module') - - if base_module: - modname = base_module + modname - - urito = env.app.builder.get_relative_uri - - # we're showing code examples which may have dependencies - # which we really don't want to have required so load the - # module by file, not import (though we are importing) - # the top level module here... - pathname = None - for token in modname.split("."): - file_, pathname, desc = imp.find_module(token, [pathname] if pathname else None) - if file_: - file_.close() - - # unlike viewcode which silently traps exceptions, - # I want this to totally barf if the file can't be loaded. - # a failed build better than a complete build missing - # key content - analyzer = ModuleAnalyzer.for_file(pathname, modname) - # copied from viewcode - analyzer.find_tags() - if not isinstance(analyzer.code, text_type): - code = analyzer.code.decode(analyzer.encoding) - else: - code = analyzer.code - - if state is not None: - docstring = _find_mod_docstring(analyzer) - if docstring: - # get rid of "foo.py" at the top - docstring = re.sub(r"^[a-zA-Z_0-9]+\.py", "", docstring) - - # strip - docstring = docstring.strip() - - # yank only first paragraph - docstring = docstring.split("\n\n")[0].strip() - else: - docstring = None - - entry = code, analyzer.tags, {} - env._viewcode_modules[modname] = entry - pagename = '_modules/' + modname.replace('.', '/') - - try: - refuri = urito(env.docname, pagename) - except NoUri: - # if we're in the latex builder etc., this seems - # to be what we get - refuri = None - - - if docstring: - # embed the ref with the doc text so that it isn't - # a separate paragraph - if refuri: - docstring = "`%s <%s>`_ - %s" % (text, refuri, docstring) - else: - docstring = "``%s`` - %s" % (text, docstring) - para = nodes.paragraph('', '') - state.nested_parse(StringList([docstring]), 0, para) - return_node = para - else: - if refuri: - refnode = nodes.reference('', '', - nodes.Text(text, text), - refuri=urito(env.docname, pagename) - ) - else: - refnode = nodes.Text(text, text) - - if state: - return_node = nodes.paragraph('', '', refnode) - else: - return_node = refnode - - return return_node - -from sphinx.pycode.pgen2 import token - -def _find_mod_docstring(analyzer): - """attempt to locate the module-level docstring. - - Note that sphinx autodoc just uses ``__doc__``. But we don't want - to import the module, so we need to parse for it. - - """ - analyzer.tokenize() - for type_, parsed_line, start_pos, end_pos, raw_line in analyzer.tokens: - if type_ == token.COMMENT: - continue - elif type_ == token.STRING: - return eval(parsed_line) - else: - return None - -def _parse_content(content): - d = {} - d['text'] = [] - idx = 0 - for line in content: - idx += 1 - m = re.match(r' *\:(.+?)\:(?: +(.+))?', line) - if m: - attrname, value = m.group(1, 2) - d[attrname] = value or '' - else: - break - d["text"] = content[idx:] - return d - -def _comma_list(text): - return re.split(r"\s*,\s*", text.strip()) - -class AutoSourceDirective(Directive): - has_content = True - - def run(self): - content = _parse_content(self.content) - - - env = self.state.document.settings.env - self.docname = env.docname - - sourcefile = self.state.document.current_source.split(os.pathsep)[0] - dir_ = os.path.dirname(sourcefile) - files = [ - f for f in os.listdir(dir_) if f.endswith(".py") - and f != "__init__.py" - ] - - if "files" in content: - # ordered listing of files to include - files = [fname for fname in _comma_list(content["files"]) - if fname in set(files)] - - node = nodes.paragraph('', '', - nodes.Text("Listing of files:", "Listing of files:") - ) - - bullets = nodes.bullet_list() - for fname in files: - modname, ext = os.path.splitext(fname) - # relative lookup - modname = "." + modname - - link = _view_source_node(env, modname, self.state) - - list_node = nodes.list_item('', - link - ) - bullets += list_node - - node += bullets - - return [node] - -def setup(app): - app.add_role('viewsource', view_source) - - app.add_directive('autosource', AutoSourceDirective) - - # from sphinx.ext.viewcode - app.connect('html-collect-pages', collect_pages) diff --git a/doc/build/conf.py b/doc/build/conf.py index 5c8cf495bf..3c8f014be8 100644 --- a/doc/build/conf.py +++ b/doc/build/conf.py @@ -34,13 +34,9 @@ import sqlalchemy extensions = [ 'sphinx.ext.autodoc', 'sphinx.ext.intersphinx', - 'builder.autodoc_mods', + 'zzzeeksphinx', 'changelog', 'sphinx_paramlinks', - 'builder.dialect_info', - 'builder.mako', - 'builder.sqlformatter', - 'builder.viewsource', ] # Add any paths that contain templates here, relative to this directory. @@ -74,6 +70,21 @@ changelog_render_pullreq = { changelog_render_changeset = "http://www.sqlalchemy.org/trac/changeset/%s" +autodocmods_convert_modname = { + "sqlalchemy.sql.sqltypes": "sqlalchemy.types", + "sqlalchemy.sql.type_api": "sqlalchemy.types", + "sqlalchemy.sql.schema": "sqlalchemy.schema", + "sqlalchemy.sql.elements": "sqlalchemy.sql.expression", + "sqlalchemy.sql.selectable": "sqlalchemy.sql.expression", + "sqlalchemy.sql.dml": "sqlalchemy.sql.expression", + "sqlalchemy.sql.ddl": "sqlalchemy.schema", + "sqlalchemy.sql.base": "sqlalchemy.sql.expression" +} + +autodocmods_convert_modname_w_class = { + ("sqlalchemy.engine.interfaces", "Connectable"): "sqlalchemy.engine", + ("sqlalchemy.sql.base", "DialectKWArgs"): "sqlalchemy.sql.base", +} # The encoding of source files. #source_encoding = 'utf-8-sig' @@ -97,6 +108,8 @@ release = "0.9.8" release_date = "October 13, 2014" site_base = os.environ.get("RTD_SITE_BASE", "http://www.sqlalchemy.org") +site_adapter_template = "docs_adapter.mako" +site_adapter_py = "docs_adapter.py" # arbitrary number recognized by builders.py, incrementing this # will force a rebuild @@ -144,7 +157,7 @@ gettext_compact = False # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. -html_theme = 'default' +html_theme = 'zzzeeksphinx' # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the diff --git a/doc/build/contents.rst b/doc/build/contents.rst index df80e9b79f..95b5e9a19a 100644 --- a/doc/build/contents.rst +++ b/doc/build/contents.rst @@ -13,6 +13,7 @@ documentation, see :ref:`index_toplevel`. orm/index core/index dialects/index + faq/index changelog/index Indices and tables diff --git a/doc/build/core/api_basics.rst b/doc/build/core/api_basics.rst new file mode 100644 index 0000000000..e56a1117b3 --- /dev/null +++ b/doc/build/core/api_basics.rst @@ -0,0 +1,12 @@ +================= +Core API Basics +================= + +.. toctree:: + :maxdepth: 2 + + event + inspection + interfaces + exceptions + internals diff --git a/doc/build/core/custom_types.rst b/doc/build/core/custom_types.rst new file mode 100644 index 0000000000..92c5ca6cfe --- /dev/null +++ b/doc/build/core/custom_types.rst @@ -0,0 +1,498 @@ +.. _types_custom: + +Custom Types +------------ + +A variety of methods exist to redefine the behavior of existing types +as well as to provide new ones. + +Overriding Type Compilation +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +A frequent need is to force the "string" version of a type, that is +the one rendered in a CREATE TABLE statement or other SQL function +like CAST, to be changed. For example, an application may want +to force the rendering of ``BINARY`` for all platforms +except for one, in which is wants ``BLOB`` to be rendered. Usage +of an existing generic type, in this case :class:`.LargeBinary`, is +preferred for most use cases. But to control +types more accurately, a compilation directive that is per-dialect +can be associated with any type:: + + from sqlalchemy.ext.compiler import compiles + from sqlalchemy.types import BINARY + + @compiles(BINARY, "sqlite") + def compile_binary_sqlite(type_, compiler, **kw): + return "BLOB" + +The above code allows the usage of :class:`.types.BINARY`, which +will produce the string ``BINARY`` against all backends except SQLite, +in which case it will produce ``BLOB``. + +See the section :ref:`type_compilation_extension`, a subsection of +:ref:`sqlalchemy.ext.compiler_toplevel`, for additional examples. + +.. _types_typedecorator: + +Augmenting Existing Types +~~~~~~~~~~~~~~~~~~~~~~~~~ + +The :class:`.TypeDecorator` allows the creation of custom types which +add bind-parameter and result-processing behavior to an existing +type object. It is used when additional in-Python marshaling of data +to and from the database is required. + +.. note:: + + The bind- and result-processing of :class:`.TypeDecorator` + is *in addition* to the processing already performed by the hosted + type, which is customized by SQLAlchemy on a per-DBAPI basis to perform + processing specific to that DBAPI. To change the DBAPI-level processing + for an existing type, see the section :ref:`replacing_processors`. + +.. autoclass:: TypeDecorator + :members: + :inherited-members: + + +TypeDecorator Recipes +~~~~~~~~~~~~~~~~~~~~~ +A few key :class:`.TypeDecorator` recipes follow. + +.. _coerce_to_unicode: + +Coercing Encoded Strings to Unicode +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +A common source of confusion regarding the :class:`.Unicode` type +is that it is intended to deal *only* with Python ``unicode`` objects +on the Python side, meaning values passed to it as bind parameters +must be of the form ``u'some string'`` if using Python 2 and not 3. +The encoding/decoding functions it performs are only to suit what the +DBAPI in use requires, and are primarily a private implementation detail. + +The use case of a type that can safely receive Python bytestrings, +that is strings that contain non-ASCII characters and are not ``u''`` +objects in Python 2, can be achieved using a :class:`.TypeDecorator` +which coerces as needed:: + + from sqlalchemy.types import TypeDecorator, Unicode + + class CoerceUTF8(TypeDecorator): + """Safely coerce Python bytestrings to Unicode + before passing off to the database.""" + + impl = Unicode + + def process_bind_param(self, value, dialect): + if isinstance(value, str): + value = value.decode('utf-8') + return value + +Rounding Numerics +^^^^^^^^^^^^^^^^^ + +Some database connectors like those of SQL Server choke if a Decimal is passed with too +many decimal places. Here's a recipe that rounds them down:: + + from sqlalchemy.types import TypeDecorator, Numeric + from decimal import Decimal + + class SafeNumeric(TypeDecorator): + """Adds quantization to Numeric.""" + + impl = Numeric + + def __init__(self, *arg, **kw): + TypeDecorator.__init__(self, *arg, **kw) + self.quantize_int = -(self.impl.precision - self.impl.scale) + self.quantize = Decimal(10) ** self.quantize_int + + def process_bind_param(self, value, dialect): + if isinstance(value, Decimal) and \ + value.as_tuple()[2] < self.quantize_int: + value = value.quantize(self.quantize) + return value + +.. _custom_guid_type: + +Backend-agnostic GUID Type +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Receives and returns Python uuid() objects. Uses the PG UUID type +when using Postgresql, CHAR(32) on other backends, storing them +in stringified hex format. Can be modified to store +binary in CHAR(16) if desired:: + + from sqlalchemy.types import TypeDecorator, CHAR + from sqlalchemy.dialects.postgresql import UUID + import uuid + + class GUID(TypeDecorator): + """Platform-independent GUID type. + + Uses Postgresql's UUID type, otherwise uses + CHAR(32), storing as stringified hex values. + + """ + impl = CHAR + + def load_dialect_impl(self, dialect): + if dialect.name == 'postgresql': + return dialect.type_descriptor(UUID()) + else: + return dialect.type_descriptor(CHAR(32)) + + def process_bind_param(self, value, dialect): + if value is None: + return value + elif dialect.name == 'postgresql': + return str(value) + else: + if not isinstance(value, uuid.UUID): + return "%.32x" % uuid.UUID(value) + else: + # hexstring + return "%.32x" % value + + def process_result_value(self, value, dialect): + if value is None: + return value + else: + return uuid.UUID(value) + +Marshal JSON Strings +^^^^^^^^^^^^^^^^^^^^^ + +This type uses ``simplejson`` to marshal Python data structures +to/from JSON. Can be modified to use Python's builtin json encoder:: + + from sqlalchemy.types import TypeDecorator, VARCHAR + import json + + class JSONEncodedDict(TypeDecorator): + """Represents an immutable structure as a json-encoded string. + + Usage:: + + JSONEncodedDict(255) + + """ + + impl = VARCHAR + + def process_bind_param(self, value, dialect): + if value is not None: + value = json.dumps(value) + + return value + + def process_result_value(self, value, dialect): + if value is not None: + value = json.loads(value) + return value + +Note that the ORM by default will not detect "mutability" on such a type - +meaning, in-place changes to values will not be detected and will not be +flushed. Without further steps, you instead would need to replace the existing +value with a new one on each parent object to detect changes. Note that +there's nothing wrong with this, as many applications may not require that the +values are ever mutated once created. For those which do have this requirement, +support for mutability is best applied using the ``sqlalchemy.ext.mutable`` +extension - see the example in :ref:`mutable_toplevel`. + +.. _replacing_processors: + +Replacing the Bind/Result Processing of Existing Types +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Most augmentation of type behavior at the bind/result level +is achieved using :class:`.TypeDecorator`. For the rare scenario +where the specific processing applied by SQLAlchemy at the DBAPI +level needs to be replaced, the SQLAlchemy type can be subclassed +directly, and the ``bind_processor()`` or ``result_processor()`` +methods can be overridden. Doing so requires that the +``adapt()`` method also be overridden. This method is the mechanism +by which SQLAlchemy produces DBAPI-specific type behavior during +statement execution. Overriding it allows a copy of the custom +type to be used in lieu of a DBAPI-specific type. Below we subclass +the :class:`.types.TIME` type to have custom result processing behavior. +The ``process()`` function will receive ``value`` from the DBAPI +cursor directly:: + + class MySpecialTime(TIME): + def __init__(self, special_argument): + super(MySpecialTime, self).__init__() + self.special_argument = special_argument + + def result_processor(self, dialect, coltype): + import datetime + time = datetime.time + def process(value): + if value is not None: + microseconds = value.microseconds + seconds = value.seconds + minutes = seconds / 60 + return time( + minutes / 60, + minutes % 60, + seconds - minutes * 60, + microseconds) + else: + return None + return process + + def adapt(self, impltype): + return MySpecialTime(self.special_argument) + +.. _types_sql_value_processing: + +Applying SQL-level Bind/Result Processing +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +As seen in the sections :ref:`types_typedecorator` and :ref:`replacing_processors`, +SQLAlchemy allows Python functions to be invoked both when parameters are sent +to a statement, as well as when result rows are loaded from the database, to apply +transformations to the values as they are sent to or from the database. It is also +possible to define SQL-level transformations as well. The rationale here is when +only the relational database contains a particular series of functions that are necessary +to coerce incoming and outgoing data between an application and persistence format. +Examples include using database-defined encryption/decryption functions, as well +as stored procedures that handle geographic data. The Postgis extension to Postgresql +includes an extensive array of SQL functions that are necessary for coercing +data into particular formats. + +Any :class:`.TypeEngine`, :class:`.UserDefinedType` or :class:`.TypeDecorator` subclass +can include implementations of +:meth:`.TypeEngine.bind_expression` and/or :meth:`.TypeEngine.column_expression`, which +when defined to return a non-``None`` value should return a :class:`.ColumnElement` +expression to be injected into the SQL statement, either surrounding +bound parameters or a column expression. For example, to build a ``Geometry`` +type which will apply the Postgis function ``ST_GeomFromText`` to all outgoing +values and the function ``ST_AsText`` to all incoming data, we can create +our own subclass of :class:`.UserDefinedType` which provides these methods +in conjunction with :data:`~.sqlalchemy.sql.expression.func`:: + + from sqlalchemy import func + from sqlalchemy.types import UserDefinedType + + class Geometry(UserDefinedType): + def get_col_spec(self): + return "GEOMETRY" + + def bind_expression(self, bindvalue): + return func.ST_GeomFromText(bindvalue, type_=self) + + def column_expression(self, col): + return func.ST_AsText(col, type_=self) + +We can apply the ``Geometry`` type into :class:`.Table` metadata +and use it in a :func:`.select` construct:: + + geometry = Table('geometry', metadata, + Column('geom_id', Integer, primary_key=True), + Column('geom_data', Geometry) + ) + + print select([geometry]).where( + geometry.c.geom_data == 'LINESTRING(189412 252431,189631 259122)') + +The resulting SQL embeds both functions as appropriate. ``ST_AsText`` +is applied to the columns clause so that the return value is run through +the function before passing into a result set, and ``ST_GeomFromText`` +is run on the bound parameter so that the passed-in value is converted:: + + SELECT geometry.geom_id, ST_AsText(geometry.geom_data) AS geom_data_1 + FROM geometry + WHERE geometry.geom_data = ST_GeomFromText(:geom_data_2) + +The :meth:`.TypeEngine.column_expression` method interacts with the +mechanics of the compiler such that the SQL expression does not interfere +with the labeling of the wrapped expression. Such as, if we rendered +a :func:`.select` against a :func:`.label` of our expression, the string +label is moved to the outside of the wrapped expression:: + + print select([geometry.c.geom_data.label('my_data')]) + +Output:: + + SELECT ST_AsText(geometry.geom_data) AS my_data + FROM geometry + +For an example of subclassing a built in type directly, we subclass +:class:`.postgresql.BYTEA` to provide a ``PGPString``, which will make use of the +Postgresql ``pgcrypto`` extension to encrpyt/decrypt values +transparently:: + + from sqlalchemy import create_engine, String, select, func, \ + MetaData, Table, Column, type_coerce + + from sqlalchemy.dialects.postgresql import BYTEA + + class PGPString(BYTEA): + def __init__(self, passphrase, length=None): + super(PGPString, self).__init__(length) + self.passphrase = passphrase + + def bind_expression(self, bindvalue): + # convert the bind's type from PGPString to + # String, so that it's passed to psycopg2 as is without + # a dbapi.Binary wrapper + bindvalue = type_coerce(bindvalue, String) + return func.pgp_sym_encrypt(bindvalue, self.passphrase) + + def column_expression(self, col): + return func.pgp_sym_decrypt(col, self.passphrase) + + metadata = MetaData() + message = Table('message', metadata, + Column('username', String(50)), + Column('message', + PGPString("this is my passphrase", length=1000)), + ) + + engine = create_engine("postgresql://scott:tiger@localhost/test", echo=True) + with engine.begin() as conn: + metadata.create_all(conn) + + conn.execute(message.insert(), username="some user", + message="this is my message") + + print conn.scalar( + select([message.c.message]).\ + where(message.c.username == "some user") + ) + +The ``pgp_sym_encrypt`` and ``pgp_sym_decrypt`` functions are applied +to the INSERT and SELECT statements:: + + INSERT INTO message (username, message) + VALUES (%(username)s, pgp_sym_encrypt(%(message)s, %(pgp_sym_encrypt_1)s)) + {'username': 'some user', 'message': 'this is my message', + 'pgp_sym_encrypt_1': 'this is my passphrase'} + + SELECT pgp_sym_decrypt(message.message, %(pgp_sym_decrypt_1)s) AS message_1 + FROM message + WHERE message.username = %(username_1)s + {'pgp_sym_decrypt_1': 'this is my passphrase', 'username_1': 'some user'} + + +.. versionadded:: 0.8 Added the :meth:`.TypeEngine.bind_expression` and + :meth:`.TypeEngine.column_expression` methods. + +See also: + +:ref:`examples_postgis` + +.. _types_operators: + +Redefining and Creating New Operators +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +SQLAlchemy Core defines a fixed set of expression operators available to all column expressions. +Some of these operations have the effect of overloading Python's built in operators; +examples of such operators include +:meth:`.ColumnOperators.__eq__` (``table.c.somecolumn == 'foo'``), +:meth:`.ColumnOperators.__invert__` (``~table.c.flag``), +and :meth:`.ColumnOperators.__add__` (``table.c.x + table.c.y``). Other operators are exposed as +explicit methods on column expressions, such as +:meth:`.ColumnOperators.in_` (``table.c.value.in_(['x', 'y'])``) and :meth:`.ColumnOperators.like` +(``table.c.value.like('%ed%')``). + +The Core expression constructs in all cases consult the type of the expression in order to determine +the behavior of existing operators, as well as to locate additional operators that aren't part of +the built in set. The :class:`.TypeEngine` base class defines a root "comparison" implementation +:class:`.TypeEngine.Comparator`, and many specific types provide their own sub-implementations of this +class. User-defined :class:`.TypeEngine.Comparator` implementations can be built directly into a +simple subclass of a particular type in order to override or define new operations. Below, +we create a :class:`.Integer` subclass which overrides the :meth:`.ColumnOperators.__add__` operator:: + + from sqlalchemy import Integer + + class MyInt(Integer): + class comparator_factory(Integer.Comparator): + def __add__(self, other): + return self.op("goofy")(other) + +The above configuration creates a new class ``MyInt``, which +establishes the :attr:`.TypeEngine.comparator_factory` attribute as +referring to a new class, subclassing the :class:`.TypeEngine.Comparator` class +associated with the :class:`.Integer` type. + +Usage:: + + >>> sometable = Table("sometable", metadata, Column("data", MyInt)) + >>> print sometable.c.data + 5 + sometable.data goofy :data_1 + +The implementation for :meth:`.ColumnOperators.__add__` is consulted +by an owning SQL expression, by instantiating the :class:`.TypeEngine.Comparator` with +itself as the ``expr`` attribute. The mechanics of the expression +system are such that operations continue recursively until an +expression object produces a new SQL expression construct. Above, we +could just as well have said ``self.expr.op("goofy")(other)`` instead +of ``self.op("goofy")(other)``. + +New methods added to a :class:`.TypeEngine.Comparator` are exposed on an +owning SQL expression +using a ``__getattr__`` scheme, which exposes methods added to +:class:`.TypeEngine.Comparator` onto the owning :class:`.ColumnElement`. +For example, to add a ``log()`` function +to integers:: + + from sqlalchemy import Integer, func + + class MyInt(Integer): + class comparator_factory(Integer.Comparator): + def log(self, other): + return func.log(self.expr, other) + +Using the above type:: + + >>> print sometable.c.data.log(5) + log(:log_1, :log_2) + + +Unary operations +are also possible. For example, to add an implementation of the +Postgresql factorial operator, we combine the :class:`.UnaryExpression` construct +along with a :class:`.custom_op` to produce the factorial expression:: + + from sqlalchemy import Integer + from sqlalchemy.sql.expression import UnaryExpression + from sqlalchemy.sql import operators + + class MyInteger(Integer): + class comparator_factory(Integer.Comparator): + def factorial(self): + return UnaryExpression(self.expr, + modifier=operators.custom_op("!"), + type_=MyInteger) + +Using the above type:: + + >>> from sqlalchemy.sql import column + >>> print column('x', MyInteger).factorial() + x ! + +See also: + +:attr:`.TypeEngine.comparator_factory` + +.. versionadded:: 0.8 The expression system was enhanced to support + customization of operators on a per-type level. + + +Creating New Types +~~~~~~~~~~~~~~~~~~ + +The :class:`.UserDefinedType` class is provided as a simple base class +for defining entirely new database types. Use this to represent native +database types not known by SQLAlchemy. If only Python translation behavior +is needed, use :class:`.TypeDecorator` instead. + +.. autoclass:: UserDefinedType + :members: + + diff --git a/doc/build/core/engines_connections.rst b/doc/build/core/engines_connections.rst new file mode 100644 index 0000000000..f163a7629d --- /dev/null +++ b/doc/build/core/engines_connections.rst @@ -0,0 +1,11 @@ +========================= +Engine and Connection Use +========================= + +.. toctree:: + :maxdepth: 2 + + engines + connections + pooling + events diff --git a/doc/build/core/expression_api.rst b/doc/build/core/expression_api.rst index 99bb988811..b32fa0e239 100644 --- a/doc/build/core/expression_api.rst +++ b/doc/build/core/expression_api.rst @@ -16,5 +16,5 @@ see :ref:`sqlexpression_toplevel`. selectable dml functions - types - + compiler + serializer diff --git a/doc/build/core/index.rst b/doc/build/core/index.rst index 210f284123..26c26af07a 100644 --- a/doc/build/core/index.rst +++ b/doc/build/core/index.rst @@ -9,19 +9,11 @@ In contrast to the ORM’s domain-centric mode of usage, the SQL Expression Language provides a schema-centric usage paradigm. .. toctree:: - :maxdepth: 3 + :maxdepth: 2 tutorial expression_api schema - engines - connections - pooling - event - events - compiler - inspection - serializer - interfaces - exceptions - internals + types + engines_connections + api_basics diff --git a/doc/build/core/schema.rst b/doc/build/core/schema.rst index aeb04be18c..8553ebcbf6 100644 --- a/doc/build/core/schema.rst +++ b/doc/build/core/schema.rst @@ -33,7 +33,7 @@ real DDL. They are therefore most intuitive to those who have some background in creating real schema generation scripts. .. toctree:: - :maxdepth: 1 + :maxdepth: 2 metadata reflection @@ -41,5 +41,3 @@ in creating real schema generation scripts. constraints ddl - - diff --git a/doc/build/core/type_api.rst b/doc/build/core/type_api.rst new file mode 100644 index 0000000000..88da4939e8 --- /dev/null +++ b/doc/build/core/type_api.rst @@ -0,0 +1,22 @@ +.. module:: sqlalchemy.types + +.. _types_api: + +Base Type API +-------------- + +.. autoclass:: TypeEngine + :members: + + +.. autoclass:: Concatenable + :members: + :inherited-members: + + +.. autoclass:: NullType + + +.. autoclass:: Variant + + :members: with_variant, __init__ diff --git a/doc/build/core/type_basics.rst b/doc/build/core/type_basics.rst new file mode 100644 index 0000000000..e36da251a8 --- /dev/null +++ b/doc/build/core/type_basics.rst @@ -0,0 +1,226 @@ +Column and Data Types +===================== + +.. module:: sqlalchemy.types + +SQLAlchemy provides abstractions for most common database data types, +and a mechanism for specifying your own custom data types. + +The methods and attributes of type objects are rarely used directly. +Type objects are supplied to :class:`~sqlalchemy.schema.Table` definitions +and can be supplied as type hints to `functions` for occasions where +the database driver returns an incorrect type. + +.. code-block:: pycon + + >>> users = Table('users', metadata, + ... Column('id', Integer, primary_key=True) + ... Column('login', String(32)) + ... ) + + +SQLAlchemy will use the ``Integer`` and ``String(32)`` type +information when issuing a ``CREATE TABLE`` statement and will use it +again when reading back rows ``SELECTed`` from the database. +Functions that accept a type (such as :func:`~sqlalchemy.schema.Column`) will +typically accept a type class or instance; ``Integer`` is equivalent +to ``Integer()`` with no construction arguments in this case. + +.. _types_generic: + +Generic Types +------------- + +Generic types specify a column that can read, write and store a +particular type of Python data. SQLAlchemy will choose the best +database column type available on the target database when issuing a +``CREATE TABLE`` statement. For complete control over which column +type is emitted in ``CREATE TABLE``, such as ``VARCHAR`` see `SQL +Standard Types`_ and the other sections of this chapter. + +.. autoclass:: BigInteger + :members: + +.. autoclass:: Boolean + :members: + +.. autoclass:: Date + :members: + +.. autoclass:: DateTime + :members: + +.. autoclass:: Enum + :members: __init__, create, drop + +.. autoclass:: Float + :members: + +.. autoclass:: Integer + :members: + +.. autoclass:: Interval + :members: + +.. autoclass:: LargeBinary + :members: + +.. autoclass:: Numeric + :members: + +.. autoclass:: PickleType + :members: + +.. autoclass:: SchemaType + :members: + :undoc-members: + +.. autoclass:: SmallInteger + :members: + +.. autoclass:: String + :members: + +.. autoclass:: Text + :members: + +.. autoclass:: Time + :members: + +.. autoclass:: Unicode + :members: + +.. autoclass:: UnicodeText + :members: + +.. _types_sqlstandard: + +SQL Standard Types +------------------ + +The SQL standard types always create database column types of the same +name when ``CREATE TABLE`` is issued. Some types may not be supported +on all databases. + +.. autoclass:: BIGINT + + +.. autoclass:: BINARY + + +.. autoclass:: BLOB + + +.. autoclass:: BOOLEAN + + +.. autoclass:: CHAR + + +.. autoclass:: CLOB + + +.. autoclass:: DATE + + +.. autoclass:: DATETIME + + +.. autoclass:: DECIMAL + + +.. autoclass:: FLOAT + + +.. autoclass:: INT + + +.. autoclass:: sqlalchemy.types.INTEGER + + +.. autoclass:: NCHAR + + +.. autoclass:: NVARCHAR + + +.. autoclass:: NUMERIC + + +.. autoclass:: REAL + + +.. autoclass:: SMALLINT + + +.. autoclass:: TEXT + + +.. autoclass:: TIME + + +.. autoclass:: TIMESTAMP + + +.. autoclass:: VARBINARY + + +.. autoclass:: VARCHAR + + +.. _types_vendor: + +Vendor-Specific Types +--------------------- + +Database-specific types are also available for import from each +database's dialect module. See the :ref:`dialect_toplevel` +reference for the database you're interested in. + +For example, MySQL has a ``BIGINT`` type and PostgreSQL has an +``INET`` type. To use these, import them from the module explicitly:: + + from sqlalchemy.dialects import mysql + + table = Table('foo', metadata, + Column('id', mysql.BIGINT), + Column('enumerates', mysql.ENUM('a', 'b', 'c')) + ) + +Or some PostgreSQL types:: + + from sqlalchemy.dialects import postgresql + + table = Table('foo', metadata, + Column('ipaddress', postgresql.INET), + Column('elements', postgresql.ARRAY(String)) + ) + +Each dialect provides the full set of typenames supported by +that backend within its `__all__` collection, so that a simple +`import *` or similar will import all supported types as +implemented for that backend:: + + from sqlalchemy.dialects.postgresql import * + + t = Table('mytable', metadata, + Column('id', INTEGER, primary_key=True), + Column('name', VARCHAR(300)), + Column('inetaddr', INET) + ) + +Where above, the INTEGER and VARCHAR types are ultimately from +sqlalchemy.types, and INET is specific to the Postgresql dialect. + +Some dialect level types have the same name as the SQL standard type, +but also provide additional arguments. For example, MySQL implements +the full range of character and string types including additional arguments +such as `collation` and `charset`:: + + from sqlalchemy.dialects.mysql import VARCHAR, TEXT + + table = Table('foo', meta, + Column('col1', VARCHAR(200, collation='binary')), + Column('col2', TEXT(charset='latin1')) + ) + diff --git a/doc/build/core/types.rst b/doc/build/core/types.rst index 14e30e46d1..ab761a1cb0 100644 --- a/doc/build/core/types.rst +++ b/doc/build/core/types.rst @@ -3,744 +3,9 @@ Column and Data Types ===================== -.. module:: sqlalchemy.types +.. toctree:: + :maxdepth: 2 -SQLAlchemy provides abstractions for most common database data types, -and a mechanism for specifying your own custom data types. - -The methods and attributes of type objects are rarely used directly. -Type objects are supplied to :class:`~sqlalchemy.schema.Table` definitions -and can be supplied as type hints to `functions` for occasions where -the database driver returns an incorrect type. - -.. code-block:: pycon - - >>> users = Table('users', metadata, - ... Column('id', Integer, primary_key=True) - ... Column('login', String(32)) - ... ) - - -SQLAlchemy will use the ``Integer`` and ``String(32)`` type -information when issuing a ``CREATE TABLE`` statement and will use it -again when reading back rows ``SELECTed`` from the database. -Functions that accept a type (such as :func:`~sqlalchemy.schema.Column`) will -typically accept a type class or instance; ``Integer`` is equivalent -to ``Integer()`` with no construction arguments in this case. - -.. _types_generic: - -Generic Types -------------- - -Generic types specify a column that can read, write and store a -particular type of Python data. SQLAlchemy will choose the best -database column type available on the target database when issuing a -``CREATE TABLE`` statement. For complete control over which column -type is emitted in ``CREATE TABLE``, such as ``VARCHAR`` see `SQL -Standard Types`_ and the other sections of this chapter. - -.. autoclass:: BigInteger - :members: - -.. autoclass:: Boolean - :members: - -.. autoclass:: Date - :members: - -.. autoclass:: DateTime - :members: - -.. autoclass:: Enum - :members: __init__, create, drop - -.. autoclass:: Float - :members: - -.. autoclass:: Integer - :members: - -.. autoclass:: Interval - :members: - -.. autoclass:: LargeBinary - :members: - -.. autoclass:: Numeric - :members: - -.. autoclass:: PickleType - :members: - -.. autoclass:: SchemaType - :members: - :undoc-members: - -.. autoclass:: SmallInteger - :members: - -.. autoclass:: String - :members: - -.. autoclass:: Text - :members: - -.. autoclass:: Time - :members: - -.. autoclass:: Unicode - :members: - -.. autoclass:: UnicodeText - :members: - -.. _types_sqlstandard: - -SQL Standard Types ------------------- - -The SQL standard types always create database column types of the same -name when ``CREATE TABLE`` is issued. Some types may not be supported -on all databases. - -.. autoclass:: BIGINT - - -.. autoclass:: BINARY - - -.. autoclass:: BLOB - - -.. autoclass:: BOOLEAN - - -.. autoclass:: CHAR - - -.. autoclass:: CLOB - - -.. autoclass:: DATE - - -.. autoclass:: DATETIME - - -.. autoclass:: DECIMAL - - -.. autoclass:: FLOAT - - -.. autoclass:: INT - - -.. autoclass:: sqlalchemy.types.INTEGER - - -.. autoclass:: NCHAR - - -.. autoclass:: NVARCHAR - - -.. autoclass:: NUMERIC - - -.. autoclass:: REAL - - -.. autoclass:: SMALLINT - - -.. autoclass:: TEXT - - -.. autoclass:: TIME - - -.. autoclass:: TIMESTAMP - - -.. autoclass:: VARBINARY - - -.. autoclass:: VARCHAR - - -.. _types_vendor: - -Vendor-Specific Types ---------------------- - -Database-specific types are also available for import from each -database's dialect module. See the :ref:`dialect_toplevel` -reference for the database you're interested in. - -For example, MySQL has a ``BIGINT`` type and PostgreSQL has an -``INET`` type. To use these, import them from the module explicitly:: - - from sqlalchemy.dialects import mysql - - table = Table('foo', metadata, - Column('id', mysql.BIGINT), - Column('enumerates', mysql.ENUM('a', 'b', 'c')) - ) - -Or some PostgreSQL types:: - - from sqlalchemy.dialects import postgresql - - table = Table('foo', metadata, - Column('ipaddress', postgresql.INET), - Column('elements', postgresql.ARRAY(String)) - ) - -Each dialect provides the full set of typenames supported by -that backend within its `__all__` collection, so that a simple -`import *` or similar will import all supported types as -implemented for that backend:: - - from sqlalchemy.dialects.postgresql import * - - t = Table('mytable', metadata, - Column('id', INTEGER, primary_key=True), - Column('name', VARCHAR(300)), - Column('inetaddr', INET) - ) - -Where above, the INTEGER and VARCHAR types are ultimately from -sqlalchemy.types, and INET is specific to the Postgresql dialect. - -Some dialect level types have the same name as the SQL standard type, -but also provide additional arguments. For example, MySQL implements -the full range of character and string types including additional arguments -such as `collation` and `charset`:: - - from sqlalchemy.dialects.mysql import VARCHAR, TEXT - - table = Table('foo', meta, - Column('col1', VARCHAR(200, collation='binary')), - Column('col2', TEXT(charset='latin1')) - ) - -.. _types_custom: - -Custom Types ------------- - -A variety of methods exist to redefine the behavior of existing types -as well as to provide new ones. - -Overriding Type Compilation -~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -A frequent need is to force the "string" version of a type, that is -the one rendered in a CREATE TABLE statement or other SQL function -like CAST, to be changed. For example, an application may want -to force the rendering of ``BINARY`` for all platforms -except for one, in which is wants ``BLOB`` to be rendered. Usage -of an existing generic type, in this case :class:`.LargeBinary`, is -preferred for most use cases. But to control -types more accurately, a compilation directive that is per-dialect -can be associated with any type:: - - from sqlalchemy.ext.compiler import compiles - from sqlalchemy.types import BINARY - - @compiles(BINARY, "sqlite") - def compile_binary_sqlite(type_, compiler, **kw): - return "BLOB" - -The above code allows the usage of :class:`.types.BINARY`, which -will produce the string ``BINARY`` against all backends except SQLite, -in which case it will produce ``BLOB``. - -See the section :ref:`type_compilation_extension`, a subsection of -:ref:`sqlalchemy.ext.compiler_toplevel`, for additional examples. - -.. _types_typedecorator: - -Augmenting Existing Types -~~~~~~~~~~~~~~~~~~~~~~~~~ - -The :class:`.TypeDecorator` allows the creation of custom types which -add bind-parameter and result-processing behavior to an existing -type object. It is used when additional in-Python marshaling of data -to and from the database is required. - -.. note:: - - The bind- and result-processing of :class:`.TypeDecorator` - is *in addition* to the processing already performed by the hosted - type, which is customized by SQLAlchemy on a per-DBAPI basis to perform - processing specific to that DBAPI. To change the DBAPI-level processing - for an existing type, see the section :ref:`replacing_processors`. - -.. autoclass:: TypeDecorator - :members: - :inherited-members: - - -TypeDecorator Recipes -~~~~~~~~~~~~~~~~~~~~~ -A few key :class:`.TypeDecorator` recipes follow. - -.. _coerce_to_unicode: - -Coercing Encoded Strings to Unicode -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -A common source of confusion regarding the :class:`.Unicode` type -is that it is intended to deal *only* with Python ``unicode`` objects -on the Python side, meaning values passed to it as bind parameters -must be of the form ``u'some string'`` if using Python 2 and not 3. -The encoding/decoding functions it performs are only to suit what the -DBAPI in use requires, and are primarily a private implementation detail. - -The use case of a type that can safely receive Python bytestrings, -that is strings that contain non-ASCII characters and are not ``u''`` -objects in Python 2, can be achieved using a :class:`.TypeDecorator` -which coerces as needed:: - - from sqlalchemy.types import TypeDecorator, Unicode - - class CoerceUTF8(TypeDecorator): - """Safely coerce Python bytestrings to Unicode - before passing off to the database.""" - - impl = Unicode - - def process_bind_param(self, value, dialect): - if isinstance(value, str): - value = value.decode('utf-8') - return value - -Rounding Numerics -^^^^^^^^^^^^^^^^^ - -Some database connectors like those of SQL Server choke if a Decimal is passed with too -many decimal places. Here's a recipe that rounds them down:: - - from sqlalchemy.types import TypeDecorator, Numeric - from decimal import Decimal - - class SafeNumeric(TypeDecorator): - """Adds quantization to Numeric.""" - - impl = Numeric - - def __init__(self, *arg, **kw): - TypeDecorator.__init__(self, *arg, **kw) - self.quantize_int = -(self.impl.precision - self.impl.scale) - self.quantize = Decimal(10) ** self.quantize_int - - def process_bind_param(self, value, dialect): - if isinstance(value, Decimal) and \ - value.as_tuple()[2] < self.quantize_int: - value = value.quantize(self.quantize) - return value - -.. _custom_guid_type: - -Backend-agnostic GUID Type -^^^^^^^^^^^^^^^^^^^^^^^^^^ - -Receives and returns Python uuid() objects. Uses the PG UUID type -when using Postgresql, CHAR(32) on other backends, storing them -in stringified hex format. Can be modified to store -binary in CHAR(16) if desired:: - - from sqlalchemy.types import TypeDecorator, CHAR - from sqlalchemy.dialects.postgresql import UUID - import uuid - - class GUID(TypeDecorator): - """Platform-independent GUID type. - - Uses Postgresql's UUID type, otherwise uses - CHAR(32), storing as stringified hex values. - - """ - impl = CHAR - - def load_dialect_impl(self, dialect): - if dialect.name == 'postgresql': - return dialect.type_descriptor(UUID()) - else: - return dialect.type_descriptor(CHAR(32)) - - def process_bind_param(self, value, dialect): - if value is None: - return value - elif dialect.name == 'postgresql': - return str(value) - else: - if not isinstance(value, uuid.UUID): - return "%.32x" % uuid.UUID(value) - else: - # hexstring - return "%.32x" % value - - def process_result_value(self, value, dialect): - if value is None: - return value - else: - return uuid.UUID(value) - -Marshal JSON Strings -^^^^^^^^^^^^^^^^^^^^^ - -This type uses ``simplejson`` to marshal Python data structures -to/from JSON. Can be modified to use Python's builtin json encoder:: - - from sqlalchemy.types import TypeDecorator, VARCHAR - import json - - class JSONEncodedDict(TypeDecorator): - """Represents an immutable structure as a json-encoded string. - - Usage:: - - JSONEncodedDict(255) - - """ - - impl = VARCHAR - - def process_bind_param(self, value, dialect): - if value is not None: - value = json.dumps(value) - - return value - - def process_result_value(self, value, dialect): - if value is not None: - value = json.loads(value) - return value - -Note that the ORM by default will not detect "mutability" on such a type - -meaning, in-place changes to values will not be detected and will not be -flushed. Without further steps, you instead would need to replace the existing -value with a new one on each parent object to detect changes. Note that -there's nothing wrong with this, as many applications may not require that the -values are ever mutated once created. For those which do have this requirement, -support for mutability is best applied using the ``sqlalchemy.ext.mutable`` -extension - see the example in :ref:`mutable_toplevel`. - -.. _replacing_processors: - -Replacing the Bind/Result Processing of Existing Types -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Most augmentation of type behavior at the bind/result level -is achieved using :class:`.TypeDecorator`. For the rare scenario -where the specific processing applied by SQLAlchemy at the DBAPI -level needs to be replaced, the SQLAlchemy type can be subclassed -directly, and the ``bind_processor()`` or ``result_processor()`` -methods can be overridden. Doing so requires that the -``adapt()`` method also be overridden. This method is the mechanism -by which SQLAlchemy produces DBAPI-specific type behavior during -statement execution. Overriding it allows a copy of the custom -type to be used in lieu of a DBAPI-specific type. Below we subclass -the :class:`.types.TIME` type to have custom result processing behavior. -The ``process()`` function will receive ``value`` from the DBAPI -cursor directly:: - - class MySpecialTime(TIME): - def __init__(self, special_argument): - super(MySpecialTime, self).__init__() - self.special_argument = special_argument - - def result_processor(self, dialect, coltype): - import datetime - time = datetime.time - def process(value): - if value is not None: - microseconds = value.microseconds - seconds = value.seconds - minutes = seconds / 60 - return time( - minutes / 60, - minutes % 60, - seconds - minutes * 60, - microseconds) - else: - return None - return process - - def adapt(self, impltype): - return MySpecialTime(self.special_argument) - -.. _types_sql_value_processing: - -Applying SQL-level Bind/Result Processing -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -As seen in the sections :ref:`types_typedecorator` and :ref:`replacing_processors`, -SQLAlchemy allows Python functions to be invoked both when parameters are sent -to a statement, as well as when result rows are loaded from the database, to apply -transformations to the values as they are sent to or from the database. It is also -possible to define SQL-level transformations as well. The rationale here is when -only the relational database contains a particular series of functions that are necessary -to coerce incoming and outgoing data between an application and persistence format. -Examples include using database-defined encryption/decryption functions, as well -as stored procedures that handle geographic data. The Postgis extension to Postgresql -includes an extensive array of SQL functions that are necessary for coercing -data into particular formats. - -Any :class:`.TypeEngine`, :class:`.UserDefinedType` or :class:`.TypeDecorator` subclass -can include implementations of -:meth:`.TypeEngine.bind_expression` and/or :meth:`.TypeEngine.column_expression`, which -when defined to return a non-``None`` value should return a :class:`.ColumnElement` -expression to be injected into the SQL statement, either surrounding -bound parameters or a column expression. For example, to build a ``Geometry`` -type which will apply the Postgis function ``ST_GeomFromText`` to all outgoing -values and the function ``ST_AsText`` to all incoming data, we can create -our own subclass of :class:`.UserDefinedType` which provides these methods -in conjunction with :data:`~.sqlalchemy.sql.expression.func`:: - - from sqlalchemy import func - from sqlalchemy.types import UserDefinedType - - class Geometry(UserDefinedType): - def get_col_spec(self): - return "GEOMETRY" - - def bind_expression(self, bindvalue): - return func.ST_GeomFromText(bindvalue, type_=self) - - def column_expression(self, col): - return func.ST_AsText(col, type_=self) - -We can apply the ``Geometry`` type into :class:`.Table` metadata -and use it in a :func:`.select` construct:: - - geometry = Table('geometry', metadata, - Column('geom_id', Integer, primary_key=True), - Column('geom_data', Geometry) - ) - - print select([geometry]).where( - geometry.c.geom_data == 'LINESTRING(189412 252431,189631 259122)') - -The resulting SQL embeds both functions as appropriate. ``ST_AsText`` -is applied to the columns clause so that the return value is run through -the function before passing into a result set, and ``ST_GeomFromText`` -is run on the bound parameter so that the passed-in value is converted:: - - SELECT geometry.geom_id, ST_AsText(geometry.geom_data) AS geom_data_1 - FROM geometry - WHERE geometry.geom_data = ST_GeomFromText(:geom_data_2) - -The :meth:`.TypeEngine.column_expression` method interacts with the -mechanics of the compiler such that the SQL expression does not interfere -with the labeling of the wrapped expression. Such as, if we rendered -a :func:`.select` against a :func:`.label` of our expression, the string -label is moved to the outside of the wrapped expression:: - - print select([geometry.c.geom_data.label('my_data')]) - -Output:: - - SELECT ST_AsText(geometry.geom_data) AS my_data - FROM geometry - -For an example of subclassing a built in type directly, we subclass -:class:`.postgresql.BYTEA` to provide a ``PGPString``, which will make use of the -Postgresql ``pgcrypto`` extension to encrpyt/decrypt values -transparently:: - - from sqlalchemy import create_engine, String, select, func, \ - MetaData, Table, Column, type_coerce - - from sqlalchemy.dialects.postgresql import BYTEA - - class PGPString(BYTEA): - def __init__(self, passphrase, length=None): - super(PGPString, self).__init__(length) - self.passphrase = passphrase - - def bind_expression(self, bindvalue): - # convert the bind's type from PGPString to - # String, so that it's passed to psycopg2 as is without - # a dbapi.Binary wrapper - bindvalue = type_coerce(bindvalue, String) - return func.pgp_sym_encrypt(bindvalue, self.passphrase) - - def column_expression(self, col): - return func.pgp_sym_decrypt(col, self.passphrase) - - metadata = MetaData() - message = Table('message', metadata, - Column('username', String(50)), - Column('message', - PGPString("this is my passphrase", length=1000)), - ) - - engine = create_engine("postgresql://scott:tiger@localhost/test", echo=True) - with engine.begin() as conn: - metadata.create_all(conn) - - conn.execute(message.insert(), username="some user", - message="this is my message") - - print conn.scalar( - select([message.c.message]).\ - where(message.c.username == "some user") - ) - -The ``pgp_sym_encrypt`` and ``pgp_sym_decrypt`` functions are applied -to the INSERT and SELECT statements:: - - INSERT INTO message (username, message) - VALUES (%(username)s, pgp_sym_encrypt(%(message)s, %(pgp_sym_encrypt_1)s)) - {'username': 'some user', 'message': 'this is my message', - 'pgp_sym_encrypt_1': 'this is my passphrase'} - - SELECT pgp_sym_decrypt(message.message, %(pgp_sym_decrypt_1)s) AS message_1 - FROM message - WHERE message.username = %(username_1)s - {'pgp_sym_decrypt_1': 'this is my passphrase', 'username_1': 'some user'} - - -.. versionadded:: 0.8 Added the :meth:`.TypeEngine.bind_expression` and - :meth:`.TypeEngine.column_expression` methods. - -See also: - -:ref:`examples_postgis` - -.. _types_operators: - -Redefining and Creating New Operators -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -SQLAlchemy Core defines a fixed set of expression operators available to all column expressions. -Some of these operations have the effect of overloading Python's built in operators; -examples of such operators include -:meth:`.ColumnOperators.__eq__` (``table.c.somecolumn == 'foo'``), -:meth:`.ColumnOperators.__invert__` (``~table.c.flag``), -and :meth:`.ColumnOperators.__add__` (``table.c.x + table.c.y``). Other operators are exposed as -explicit methods on column expressions, such as -:meth:`.ColumnOperators.in_` (``table.c.value.in_(['x', 'y'])``) and :meth:`.ColumnOperators.like` -(``table.c.value.like('%ed%')``). - -The Core expression constructs in all cases consult the type of the expression in order to determine -the behavior of existing operators, as well as to locate additional operators that aren't part of -the built in set. The :class:`.TypeEngine` base class defines a root "comparison" implementation -:class:`.TypeEngine.Comparator`, and many specific types provide their own sub-implementations of this -class. User-defined :class:`.TypeEngine.Comparator` implementations can be built directly into a -simple subclass of a particular type in order to override or define new operations. Below, -we create a :class:`.Integer` subclass which overrides the :meth:`.ColumnOperators.__add__` operator:: - - from sqlalchemy import Integer - - class MyInt(Integer): - class comparator_factory(Integer.Comparator): - def __add__(self, other): - return self.op("goofy")(other) - -The above configuration creates a new class ``MyInt``, which -establishes the :attr:`.TypeEngine.comparator_factory` attribute as -referring to a new class, subclassing the :class:`.TypeEngine.Comparator` class -associated with the :class:`.Integer` type. - -Usage:: - - >>> sometable = Table("sometable", metadata, Column("data", MyInt)) - >>> print sometable.c.data + 5 - sometable.data goofy :data_1 - -The implementation for :meth:`.ColumnOperators.__add__` is consulted -by an owning SQL expression, by instantiating the :class:`.TypeEngine.Comparator` with -itself as the ``expr`` attribute. The mechanics of the expression -system are such that operations continue recursively until an -expression object produces a new SQL expression construct. Above, we -could just as well have said ``self.expr.op("goofy")(other)`` instead -of ``self.op("goofy")(other)``. - -New methods added to a :class:`.TypeEngine.Comparator` are exposed on an -owning SQL expression -using a ``__getattr__`` scheme, which exposes methods added to -:class:`.TypeEngine.Comparator` onto the owning :class:`.ColumnElement`. -For example, to add a ``log()`` function -to integers:: - - from sqlalchemy import Integer, func - - class MyInt(Integer): - class comparator_factory(Integer.Comparator): - def log(self, other): - return func.log(self.expr, other) - -Using the above type:: - - >>> print sometable.c.data.log(5) - log(:log_1, :log_2) - - -Unary operations -are also possible. For example, to add an implementation of the -Postgresql factorial operator, we combine the :class:`.UnaryExpression` construct -along with a :class:`.custom_op` to produce the factorial expression:: - - from sqlalchemy import Integer - from sqlalchemy.sql.expression import UnaryExpression - from sqlalchemy.sql import operators - - class MyInteger(Integer): - class comparator_factory(Integer.Comparator): - def factorial(self): - return UnaryExpression(self.expr, - modifier=operators.custom_op("!"), - type_=MyInteger) - -Using the above type:: - - >>> from sqlalchemy.sql import column - >>> print column('x', MyInteger).factorial() - x ! - -See also: - -:attr:`.TypeEngine.comparator_factory` - -.. versionadded:: 0.8 The expression system was enhanced to support - customization of operators on a per-type level. - - -Creating New Types -~~~~~~~~~~~~~~~~~~ - -The :class:`.UserDefinedType` class is provided as a simple base class -for defining entirely new database types. Use this to represent native -database types not known by SQLAlchemy. If only Python translation behavior -is needed, use :class:`.TypeDecorator` instead. - -.. autoclass:: UserDefinedType - :members: - - -.. _types_api: - -Base Type API --------------- - -.. autoclass:: TypeEngine - :members: - - -.. autoclass:: Concatenable - :members: - :inherited-members: - - -.. autoclass:: NullType - - -.. autoclass:: Variant - - :members: with_variant, __init__ + type_basics + custom_types + type_api diff --git a/doc/build/faq.rst b/doc/build/faq.rst deleted file mode 100644 index 12d8e0acc6..0000000000 --- a/doc/build/faq.rst +++ /dev/null @@ -1,1471 +0,0 @@ -:orphan: - -.. _faq_toplevel: - -============================ -Frequently Asked Questions -============================ - -.. contents:: - :local: - :class: faq - :backlinks: none - - -Connections / Engines -===================== - -How do I configure logging? ---------------------------- - -See :ref:`dbengine_logging`. - -How do I pool database connections? Are my connections pooled? ----------------------------------------------------------------- - -SQLAlchemy performs application-level connection pooling automatically -in most cases. With the exception of SQLite, a :class:`.Engine` object -refers to a :class:`.QueuePool` as a source of connectivity. - -For more detail, see :ref:`engines_toplevel` and :ref:`pooling_toplevel`. - -How do I pass custom connect arguments to my database API? ------------------------------------------------------------ - -The :func:`.create_engine` call accepts additional arguments either -directly via the ``connect_args`` keyword argument:: - - e = create_engine("mysql://scott:tiger@localhost/test", - connect_args={"encoding": "utf8"}) - -Or for basic string and integer arguments, they can usually be specified -in the query string of the URL:: - - e = create_engine("mysql://scott:tiger@localhost/test?encoding=utf8") - -.. seealso:: - - :ref:`custom_dbapi_args` - -"MySQL Server has gone away" ----------------------------- - -There are two major causes for this error: - -1. The MySQL client closes connections which have been idle for a set period -of time, defaulting to eight hours. This can be avoided by using the ``pool_recycle`` -setting with :func:`.create_engine`, described at :ref:`mysql_connection_timeouts`. - -2. Usage of the MySQLdb :term:`DBAPI`, or a similar DBAPI, in a non-threadsafe manner, or in an otherwise -inappropriate way. The MySQLdb connection object is not threadsafe - this expands -out to any SQLAlchemy system that links to a single connection, which includes the ORM -:class:`.Session`. For background -on how :class:`.Session` should be used in a multithreaded environment, -see :ref:`session_faq_threadsafe`. - -Why does SQLAlchemy issue so many ROLLBACKs? ---------------------------------------------- - -SQLAlchemy currently assumes DBAPI connections are in "non-autocommit" mode - -this is the default behavior of the Python database API, meaning it -must be assumed that a transaction is always in progress. The -connection pool issues ``connection.rollback()`` when a connection is returned. -This is so that any transactional resources remaining on the connection are -released. On a database like Postgresql or MSSQL where table resources are -aggressively locked, this is critical so that rows and tables don't remain -locked within connections that are no longer in use. An application can -otherwise hang. It's not just for locks, however, and is equally critical on -any database that has any kind of transaction isolation, including MySQL with -InnoDB. Any connection that is still inside an old transaction will return -stale data, if that data was already queried on that connection within -isolation. For background on why you might see stale data even on MySQL, see -http://dev.mysql.com/doc/refman/5.1/en/innodb-transaction-model.html - -I'm on MyISAM - how do I turn it off? -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -The behavior of the connection pool's connection return behavior can be -configured using ``reset_on_return``:: - - from sqlalchemy import create_engine - from sqlalchemy.pool import QueuePool - - engine = create_engine('mysql://scott:tiger@localhost/myisam_database', pool=QueuePool(reset_on_return=False)) - -I'm on SQL Server - how do I turn those ROLLBACKs into COMMITs? -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -``reset_on_return`` accepts the values ``commit``, ``rollback`` in addition -to ``True``, ``False``, and ``None``. Setting to ``commit`` will cause -a COMMIT as any connection is returned to the pool:: - - engine = create_engine('mssql://scott:tiger@mydsn', pool=QueuePool(reset_on_return='commit')) - - -I am using multiple connections with a SQLite database (typically to test transaction operation), and my test program is not working! ----------------------------------------------------------------------------------------------------------------------------------------------------------- - -If using a SQLite ``:memory:`` database, or a version of SQLAlchemy prior -to version 0.7, the default connection pool is the :class:`.SingletonThreadPool`, -which maintains exactly one SQLite connection per thread. So two -connections in use in the same thread will actually be the same SQLite -connection. Make sure you're not using a :memory: database and -use :class:`.NullPool`, which is the default for non-memory databases in -current SQLAlchemy versions. - -.. seealso:: - - :ref:`pysqlite_threading_pooling` - info on PySQLite's behavior. - -How do I get at the raw DBAPI connection when using an Engine? --------------------------------------------------------------- - -With a regular SA engine-level Connection, you can get at a pool-proxied -version of the DBAPI connection via the :attr:`.Connection.connection` attribute on -:class:`.Connection`, and for the really-real DBAPI connection you can call the -:attr:`.ConnectionFairy.connection` attribute on that - but there should never be any need to access -the non-pool-proxied DBAPI connection, as all methods are proxied through:: - - engine = create_engine(...) - conn = engine.connect() - conn.connection. - cursor = conn.connection.cursor() - -You must ensure that you revert any isolation level settings or other -operation-specific settings on the connection back to normal before returning -it to the pool. - -As an alternative to reverting settings, you can call the :meth:`.Connection.detach` method on -either :class:`.Connection` or the proxied connection, which will de-associate -the connection from the pool such that it will be closed and discarded -when :meth:`.Connection.close` is called:: - - conn = engine.connect() - conn.detach() # detaches the DBAPI connection from the connection pool - conn.connection. - conn.close() # connection is closed for real, the pool replaces it with a new connection - -MetaData / Schema -================== - -My program is hanging when I say ``table.drop()`` / ``metadata.drop_all()`` ----------------------------------------------------------------------------- - -This usually corresponds to two conditions: 1. using PostgreSQL, which is really -strict about table locks, and 2. you have a connection still open which -contains locks on the table and is distinct from the connection being used for -the DROP statement. Heres the most minimal version of the pattern:: - - connection = engine.connect() - result = connection.execute(mytable.select()) - - mytable.drop(engine) - -Above, a connection pool connection is still checked out; furthermore, the -result object above also maintains a link to this connection. If -"implicit execution" is used, the result will hold this connection opened until -the result object is closed or all rows are exhausted. - -The call to ``mytable.drop(engine)`` attempts to emit DROP TABLE on a second -connection procured from the :class:`.Engine` which will lock. - -The solution is to close out all connections before emitting DROP TABLE:: - - connection = engine.connect() - result = connection.execute(mytable.select()) - - # fully read result sets - result.fetchall() - - # close connections - connection.close() - - # now locks are removed - mytable.drop(engine) - -Does SQLAlchemy support ALTER TABLE, CREATE VIEW, CREATE TRIGGER, Schema Upgrade Functionality? ------------------------------------------------------------------------------------------------ - -General ALTER support isn't present in SQLAlchemy directly. For special DDL -on an ad-hoc basis, the :class:`.DDL` and related constructs can be used. -See :doc:`core/ddl` for a discussion on this subject. - -A more comprehensive option is to use schema migration tools, such as Alembic -or SQLAlchemy-Migrate; see :ref:`schema_migrations` for discussion on this. - -How can I sort Table objects in order of their dependency? ------------------------------------------------------------ - -This is available via the :attr:`.MetaData.sorted_tables` function:: - - metadata = MetaData() - # ... add Table objects to metadata - ti = metadata.sorted_tables: - for t in ti: - print t - -How can I get the CREATE TABLE/ DROP TABLE output as a string? ---------------------------------------------------------------- - -Modern SQLAlchemy has clause constructs which represent DDL operations. These -can be rendered to strings like any other SQL expression:: - - from sqlalchemy.schema import CreateTable - - print CreateTable(mytable) - -To get the string specific to a certain engine:: - - print CreateTable(mytable).compile(engine) - -There's also a special form of :class:`.Engine` that can let you dump an entire -metadata creation sequence, using this recipe:: - - def dump(sql, *multiparams, **params): - print sql.compile(dialect=engine.dialect) - engine = create_engine('postgresql://', strategy='mock', executor=dump) - metadata.create_all(engine, checkfirst=False) - -The `Alembic `_ tool also supports -an "offline" SQL generation mode that renders database migrations as SQL scripts. - -How can I subclass Table/Column to provide certain behaviors/configurations? ------------------------------------------------------------------------------- - -:class:`.Table` and :class:`.Column` are not good targets for direct subclassing. -However, there are simple ways to get on-construction behaviors using creation -functions, and behaviors related to the linkages between schema objects such as -constraint conventions or naming conventions using attachment events. -An example of many of these -techniques can be seen at `Naming Conventions `_. - - -SQL Expressions -================= - -.. _faq_sql_expression_string: - -How do I render SQL expressions as strings, possibly with bound parameters inlined? ------------------------------------------------------------------------------------- - -The "stringification" of a SQLAlchemy statement or Query in the vast majority -of cases is as simple as:: - - print(str(statement)) - -this applies both to an ORM :class:`~.orm.query.Query` as well as any :func:`.select` or other -statement. Additionally, to get the statement as compiled to a -specific dialect or engine, if the statement itself is not already -bound to one you can pass this in to :meth:`.ClauseElement.compile`:: - - print(statement.compile(someengine)) - -or without an :class:`.Engine`:: - - from sqlalchemy.dialects import postgresql - print(statement.compile(dialect=postgresql.dialect())) - -When given an ORM :class:`~.orm.query.Query` object, in order to get at the -:meth:`.ClauseElement.compile` -method we only need access the :attr:`~.orm.query.Query.statement` -accessor first:: - - statement = query.statement - print(statement.compile(someengine)) - -The above forms will render the SQL statement as it is passed to the Python -:term:`DBAPI`, which includes that bound parameters are not rendered inline. -SQLAlchemy normally does not stringify bound parameters, as this is handled -appropriately by the Python DBAPI, not to mention bypassing bound -parameters is probably the most widely exploited security hole in -modern web applications. SQLAlchemy has limited ability to do this -stringification in certain circumstances such as that of emitting DDL. -In order to access this functionality one can use the ``literal_binds`` -flag, passed to ``compile_kwargs``:: - - from sqlalchemy.sql import table, column, select - - t = table('t', column('x')) - - s = select([t]).where(t.c.x == 5) - - print(s.compile(compile_kwargs={"literal_binds": True})) - -the above approach has the caveats that it is only supported for basic -types, such as ints and strings, and furthermore if a :func:`.bindparam` -without a pre-set value is used directly, it won't be able to -stringify that either. - -To support inline literal rendering for types not supported, implement -a :class:`.TypeDecorator` for the target type which includes a -:meth:`.TypeDecorator.process_literal_param` method:: - - from sqlalchemy import TypeDecorator, Integer - - - class MyFancyType(TypeDecorator): - impl = Integer - - def process_literal_param(self, value, dialect): - return "my_fancy_formatting(%s)" % value - - from sqlalchemy import Table, Column, MetaData - - tab = Table('mytable', MetaData(), Column('x', MyFancyType())) - - print( - tab.select().where(tab.c.x > 5).compile( - compile_kwargs={"literal_binds": True}) - ) - -producing output like:: - - SELECT mytable.x - FROM mytable - WHERE mytable.x > my_fancy_formatting(5) - - -Why does ``.col.in_([])`` Produce ``col != col``? Why not ``1=0``? -------------------------------------------------------------------- - -A little introduction to the issue. The IN operator in SQL, given a list of -elements to compare against a column, generally does not accept an empty list, -that is while it is valid to say:: - - column IN (1, 2, 3) - -it's not valid to say:: - - column IN () - -SQLAlchemy's :meth:`.Operators.in_` operator, when given an empty list, produces this -expression:: - - column != column - -As of version 0.6, it also produces a warning stating that a less efficient -comparison operation will be rendered. This expression is the only one that is -both database agnostic and produces correct results. - -For example, the naive approach of "just evaluate to false, by comparing 1=0 -or 1!=1", does not handle nulls properly. An expression like:: - - NOT column != column - -will not return a row when "column" is null, but an expression which does not -take the column into account:: - - NOT 1=0 - -will. - -Closer to the mark is the following CASE expression:: - - CASE WHEN column IS NOT NULL THEN 1=0 ELSE NULL END - -We don't use this expression due to its verbosity, and its also not -typically accepted by Oracle within a WHERE clause - depending -on how you phrase it, you'll either get "ORA-00905: missing keyword" or -"ORA-00920: invalid relational operator". It's also still less efficient than -just rendering SQL without the clause altogether (or not issuing the SQL at -all, if the statement is just a simple search). - -The best approach therefore is to avoid the usage of IN given an argument list -of zero length. Instead, don't emit the Query in the first place, if no rows -should be returned. The warning is best promoted to a full error condition -using the Python warnings filter (see http://docs.python.org/library/warnings.html). - -ORM Configuration -================== - -.. _faq_mapper_primary_key: - -How do I map a table that has no primary key? ---------------------------------------------- - -The SQLAlchemy ORM, in order to map to a particular table, needs there to be -at least one column denoted as a primary key column; multiple-column, -i.e. composite, primary keys are of course entirely feasible as well. These -columns do **not** need to be actually known to the database as primary key -columns, though it's a good idea that they are. It's only necessary that the columns -*behave* as a primary key does, e.g. as a unique and not nullable identifier -for a row. - -Most ORMs require that objects have some kind of primary key defined -because the object in memory must correspond to a uniquely identifiable -row in the database table; at the very least, this allows the -object can be targeted for UPDATE and DELETE statements which will affect only -that object's row and no other. However, the importance of the primary key -goes far beyond that. In SQLAlchemy, all ORM-mapped objects are at all times -linked uniquely within a :class:`.Session` -to their specific database row using a pattern called the :term:`identity map`, -a pattern that's central to the unit of work system employed by SQLAlchemy, -and is also key to the most common (and not-so-common) patterns of ORM usage. - - -.. note:: - - It's important to note that we're only talking about the SQLAlchemy ORM; an - application which builds on Core and deals only with :class:`.Table` objects, - :func:`.select` constructs and the like, **does not** need any primary key - to be present on or associated with a table in any way (though again, in SQL, all tables - should really have some kind of primary key, lest you need to actually - update or delete specific rows). - -In almost all cases, a table does have a so-called :term:`candidate key`, which is a column or series -of columns that uniquely identify a row. If a table truly doesn't have this, and has actual -fully duplicate rows, the table is not corresponding to `first normal form `_ and cannot be mapped. Otherwise, whatever columns comprise the best candidate key can be -applied directly to the mapper:: - - class SomeClass(Base): - __table__ = some_table_with_no_pk - __mapper_args__ = { - 'primary_key':[some_table_with_no_pk.c.uid, some_table_with_no_pk.c.bar] - } - -Better yet is when using fully declared table metadata, use the ``primary_key=True`` -flag on those columns:: - - class SomeClass(Base): - __tablename__ = "some_table_with_no_pk" - - uid = Column(Integer, primary_key=True) - bar = Column(String, primary_key=True) - -All tables in a relational database should have primary keys. Even a many-to-many -association table - the primary key would be the composite of the two association -columns:: - - CREATE TABLE my_association ( - user_id INTEGER REFERENCES user(id), - account_id INTEGER REFERENCES account(id), - PRIMARY KEY (user_id, account_id) - ) - - -How do I configure a Column that is a Python reserved word or similar? ----------------------------------------------------------------------------- - -Column-based attributes can be given any name desired in the mapping. See -:ref:`mapper_column_distinct_names`. - -How do I get a list of all columns, relationships, mapped attributes, etc. given a mapped class? -------------------------------------------------------------------------------------------------- - -This information is all available from the :class:`.Mapper` object. - -To get at the :class:`.Mapper` for a particular mapped class, call the -:func:`.inspect` function on it:: - - from sqlalchemy import inspect - - mapper = inspect(MyClass) - -From there, all information about the class can be acquired using such methods as: - -* :attr:`.Mapper.attrs` - a namespace of all mapped attributes. The attributes - themselves are instances of :class:`.MapperProperty`, which contain additional - attributes that can lead to the mapped SQL expression or column, if applicable. - -* :attr:`.Mapper.column_attrs` - the mapped attribute namespace - limited to column and SQL expression attributes. You might want to use - :attr:`.Mapper.columns` to get at the :class:`.Column` objects directly. - -* :attr:`.Mapper.relationships` - namespace of all :class:`.RelationshipProperty` attributes. - -* :attr:`.Mapper.all_orm_descriptors` - namespace of all mapped attributes, plus user-defined - attributes defined using systems such as :class:`.hybrid_property`, :class:`.AssociationProxy` and others. - -* :attr:`.Mapper.columns` - A namespace of :class:`.Column` objects and other named - SQL expressions associated with the mapping. - -* :attr:`.Mapper.mapped_table` - The :class:`.Table` or other selectable to which - this mapper is mapped. - -* :attr:`.Mapper.local_table` - The :class:`.Table` that is "local" to this mapper; - this differs from :attr:`.Mapper.mapped_table` in the case of a mapper mapped - using inheritance to a composed selectable. - -.. _faq_combining_columns: - -I'm getting a warning or error about "Implicitly combining column X under attribute Y" --------------------------------------------------------------------------------------- - -This condition refers to when a mapping contains two columns that are being -mapped under the same attribute name due to their name, but there's no indication -that this is intentional. A mapped class needs to have explicit names for -every attribute that is to store an independent value; when two columns have the -same name and aren't disambiguated, they fall under the same attribute and -the effect is that the value from one column is **copied** into the other, based -on which column was assigned to the attribute first. - -This behavior is often desirable and is allowed without warning in the case -where the two columns are linked together via a foreign key relationship -within an inheritance mapping. When the warning or exception occurs, the -issue can be resolved by either assigning the columns to differently-named -attributes, or if combining them together is desired, by using -:func:`.column_property` to make this explicit. - -Given the example as follows:: - - from sqlalchemy import Integer, Column, ForeignKey - from sqlalchemy.ext.declarative import declarative_base - - Base = declarative_base() - - class A(Base): - __tablename__ = 'a' - - id = Column(Integer, primary_key=True) - - class B(A): - __tablename__ = 'b' - - id = Column(Integer, primary_key=True) - a_id = Column(Integer, ForeignKey('a.id')) - -As of SQLAlchemy version 0.9.5, the above condition is detected, and will -warn that the ``id`` column of ``A`` and ``B`` is being combined under -the same-named attribute ``id``, which above is a serious issue since it means -that a ``B`` object's primary key will always mirror that of its ``A``. - -A mapping which resolves this is as follows:: - - class A(Base): - __tablename__ = 'a' - - id = Column(Integer, primary_key=True) - - class B(A): - __tablename__ = 'b' - - b_id = Column('id', Integer, primary_key=True) - a_id = Column(Integer, ForeignKey('a.id')) - -Suppose we did want ``A.id`` and ``B.id`` to be mirrors of each other, despite -the fact that ``B.a_id`` is where ``A.id`` is related. We could combine -them together using :func:`.column_property`:: - - class A(Base): - __tablename__ = 'a' - - id = Column(Integer, primary_key=True) - - class B(A): - __tablename__ = 'b' - - # probably not what you want, but this is a demonstration - id = column_property(Column(Integer, primary_key=True), A.id) - a_id = Column(Integer, ForeignKey('a.id')) - - - -I'm using Declarative and setting primaryjoin/secondaryjoin using an ``and_()`` or ``or_()``, and I am getting an error message about foreign keys. ------------------------------------------------------------------------------------------------------------------------------------------------------------------- - -Are you doing this?:: - - class MyClass(Base): - # .... - - foo = relationship("Dest", primaryjoin=and_("MyClass.id==Dest.foo_id", "MyClass.foo==Dest.bar")) - -That's an ``and_()`` of two string expressions, which SQLAlchemy cannot apply any mapping towards. Declarative allows :func:`.relationship` arguments to be specified as strings, which are converted into expression objects using ``eval()``. But this doesn't occur inside of an ``and_()`` expression - it's a special operation declarative applies only to the *entirety* of what's passed to primaryjoin or other arguments as a string:: - - class MyClass(Base): - # .... - - foo = relationship("Dest", primaryjoin="and_(MyClass.id==Dest.foo_id, MyClass.foo==Dest.bar)") - -Or if the objects you need are already available, skip the strings:: - - class MyClass(Base): - # .... - - foo = relationship(Dest, primaryjoin=and_(MyClass.id==Dest.foo_id, MyClass.foo==Dest.bar)) - -The same idea applies to all the other arguments, such as ``foreign_keys``:: - - # wrong ! - foo = relationship(Dest, foreign_keys=["Dest.foo_id", "Dest.bar_id"]) - - # correct ! - foo = relationship(Dest, foreign_keys="[Dest.foo_id, Dest.bar_id]") - - # also correct ! - foo = relationship(Dest, foreign_keys=[Dest.foo_id, Dest.bar_id]) - - # if you're using columns from the class that you're inside of, just use the column objects ! - class MyClass(Base): - foo_id = Column(...) - bar_id = Column(...) - # ... - - foo = relationship(Dest, foreign_keys=[foo_id, bar_id]) - -.. _faq_subqueryload_limit_sort: - -Why is ``ORDER BY`` required with ``LIMIT`` (especially with ``subqueryload()``)? ---------------------------------------------------------------------------------- - -A relational database can return rows in any -arbitrary order, when an explicit ordering is not set. -While this ordering very often corresponds to the natural -order of rows within a table, this is not the case for all databases and -all queries. The consequence of this is that any query that limits rows -using ``LIMIT`` or ``OFFSET`` should **always** specify an ``ORDER BY``. -Otherwise, it is not deterministic which rows will actually be returned. - -When we use a SQLAlchemy method like :meth:`.Query.first`, we are in fact -applying a ``LIMIT`` of one to the query, so without an explicit ordering -it is not deterministic what row we actually get back. -While we may not notice this for simple queries on databases that usually -returns rows in their natural -order, it becomes much more of an issue if we also use :func:`.orm.subqueryload` -to load related collections, and we may not be loading the collections -as intended. - -SQLAlchemy implements :func:`.orm.subqueryload` by issuing a separate query, -the results of which are matched up to the results from the first query. -We see two queries emitted like this: - -.. sourcecode:: python+sql - - >>> session.query(User).options(subqueryload(User.addresses)).all() - {opensql}-- the "main" query - SELECT users.id AS users_id - FROM users - {stop} - {opensql}-- the "load" query issued by subqueryload - SELECT addresses.id AS addresses_id, - addresses.user_id AS addresses_user_id, - anon_1.users_id AS anon_1_users_id - FROM (SELECT users.id AS users_id FROM users) AS anon_1 - JOIN addresses ON anon_1.users_id = addresses.user_id - ORDER BY anon_1.users_id - -The second query embeds the first query as a source of rows. -When the inner query uses ``OFFSET`` and/or ``LIMIT`` without ordering, -the two queries may not see the same results: - -.. sourcecode:: python+sql - - >>> user = session.query(User).options(subqueryload(User.addresses)).first() - {opensql}-- the "main" query - SELECT users.id AS users_id - FROM users - LIMIT 1 - {stop} - {opensql}-- the "load" query issued by subqueryload - SELECT addresses.id AS addresses_id, - addresses.user_id AS addresses_user_id, - anon_1.users_id AS anon_1_users_id - FROM (SELECT users.id AS users_id FROM users LIMIT 1) AS anon_1 - JOIN addresses ON anon_1.users_id = addresses.user_id - ORDER BY anon_1.users_id - -Depending on database specifics, there is -a chance we may get the a result like the following for the two queries:: - - -- query #1 - +--------+ - |users_id| - +--------+ - | 1| - +--------+ - - -- query #2 - +------------+-----------------+---------------+ - |addresses_id|addresses_user_id|anon_1_users_id| - +------------+-----------------+---------------+ - | 3| 2| 2| - +------------+-----------------+---------------+ - | 4| 2| 2| - +------------+-----------------+---------------+ - -Above, we receive two ``addresses`` rows for ``user.id`` of 2, and none for -1. We've wasted two rows and failed to actually load the collection. This -is an insidious error because without looking at the SQL and the results, the -ORM will not show that there's any issue; if we access the ``addresses`` -for the ``User`` we have, it will emit a lazy load for the collection and we -won't see that anything actually went wrong. - -The solution to this problem is to always specify a deterministic sort order, -so that the main query always returns the same set of rows. This generally -means that you should :meth:`.Query.order_by` on a unique column on the table. -The primary key is a good choice for this:: - - session.query(User).options(subqueryload(User.addresses)).order_by(User.id).first() - -Note that :func:`.joinedload` does not suffer from the same problem because -only one query is ever issued, so the load query cannot be different from the -main query. - -.. seealso:: - - :ref:`subqueryload_ordering` - -Performance -=========== - -How can I profile a SQLAlchemy powered application? ---------------------------------------------------- - -Looking for performance issues typically involves two stratgies. One -is query profiling, and the other is code profiling. - -Query Profiling -^^^^^^^^^^^^^^^^ - -Sometimes just plain SQL logging (enabled via python's logging module -or via the ``echo=True`` argument on :func:`.create_engine`) can give an -idea how long things are taking. For example, if you log something -right after a SQL operation, you'd see something like this in your -log:: - - 17:37:48,325 INFO [sqlalchemy.engine.base.Engine.0x...048c] SELECT ... - 17:37:48,326 INFO [sqlalchemy.engine.base.Engine.0x...048c] {} - 17:37:48,660 DEBUG [myapp.somemessage] - -if you logged ``myapp.somemessage`` right after the operation, you know -it took 334ms to complete the SQL part of things. - -Logging SQL will also illustrate if dozens/hundreds of queries are -being issued which could be better organized into much fewer queries. -When using the SQLAlchemy ORM, the "eager loading" -feature is provided to partially (:func:`.contains_eager()`) or fully -(:func:`.joinedload()`, :func:`.subqueryload()`) -automate this activity, but without -the ORM "eager loading" typically means to use joins so that results across multiple -tables can be loaded in one result set instead of multiplying numbers -of queries as more depth is added (i.e. ``r + r*r2 + r*r2*r3`` ...) - -For more long-term profiling of queries, or to implement an application-side -"slow query" monitor, events can be used to intercept cursor executions, -using a recipe like the following:: - - from sqlalchemy import event - from sqlalchemy.engine import Engine - import time - import logging - - logging.basicConfig() - logger = logging.getLogger("myapp.sqltime") - logger.setLevel(logging.DEBUG) - - @event.listens_for(Engine, "before_cursor_execute") - def before_cursor_execute(conn, cursor, statement, - parameters, context, executemany): - conn.info.setdefault('query_start_time', []).append(time.time()) - logger.debug("Start Query: %s" % statement) - - @event.listens_for(Engine, "after_cursor_execute") - def after_cursor_execute(conn, cursor, statement, - parameters, context, executemany): - total = time.time() - conn.info['query_start_time'].pop(-1) - logger.debug("Query Complete!") - logger.debug("Total Time: %f" % total) - -Above, we use the :meth:`.ConnectionEvents.before_cursor_execute` and -:meth:`.ConnectionEvents.after_cursor_execute` events to establish an interception -point around when a statement is executed. We attach a timer onto the -connection using the :class:`._ConnectionRecord.info` dictionary; we use a -stack here for the occasional case where the cursor execute events may be nested. - -Code Profiling -^^^^^^^^^^^^^^ - -If logging reveals that individual queries are taking too long, you'd -need a breakdown of how much time was spent within the database -processing the query, sending results over the network, being handled -by the :term:`DBAPI`, and finally being received by SQLAlchemy's result set -and/or ORM layer. Each of these stages can present their own -individual bottlenecks, depending on specifics. - -For that you need to use the -`Python Profiling Module `_. -Below is a simple recipe which works profiling into a context manager:: - - import cProfile - import StringIO - import pstats - import contextlib - - @contextlib.contextmanager - def profiled(): - pr = cProfile.Profile() - pr.enable() - yield - pr.disable() - s = StringIO.StringIO() - ps = pstats.Stats(pr, stream=s).sort_stats('cumulative') - ps.print_stats() - # uncomment this to see who's calling what - # ps.print_callers() - print s.getvalue() - -To profile a section of code:: - - with profiled(): - Session.query(FooClass).filter(FooClass.somevalue==8).all() - -The output of profiling can be used to give an idea where time is -being spent. A section of profiling output looks like this:: - - 13726 function calls (13042 primitive calls) in 0.014 seconds - - Ordered by: cumulative time - - ncalls tottime percall cumtime percall filename:lineno(function) - 222/21 0.001 0.000 0.011 0.001 lib/sqlalchemy/orm/loading.py:26(instances) - 220/20 0.002 0.000 0.010 0.001 lib/sqlalchemy/orm/loading.py:327(_instance) - 220/20 0.000 0.000 0.010 0.000 lib/sqlalchemy/orm/loading.py:284(populate_state) - 20 0.000 0.000 0.010 0.000 lib/sqlalchemy/orm/strategies.py:987(load_collection_from_subq) - 20 0.000 0.000 0.009 0.000 lib/sqlalchemy/orm/strategies.py:935(get) - 1 0.000 0.000 0.009 0.009 lib/sqlalchemy/orm/strategies.py:940(_load) - 21 0.000 0.000 0.008 0.000 lib/sqlalchemy/orm/strategies.py:942() - 2 0.000 0.000 0.004 0.002 lib/sqlalchemy/orm/query.py:2400(__iter__) - 2 0.000 0.000 0.002 0.001 lib/sqlalchemy/orm/query.py:2414(_execute_and_instances) - 2 0.000 0.000 0.002 0.001 lib/sqlalchemy/engine/base.py:659(execute) - 2 0.000 0.000 0.002 0.001 lib/sqlalchemy/sql/elements.py:321(_execute_on_connection) - 2 0.000 0.000 0.002 0.001 lib/sqlalchemy/engine/base.py:788(_execute_clauseelement) - - ... - -Above, we can see that the ``instances()`` SQLAlchemy function was called 222 -times (recursively, and 21 times from the outside), taking a total of .011 -seconds for all calls combined. - -Execution Slowness -^^^^^^^^^^^^^^^^^^ - -The specifics of these calls can tell us where the time is being spent. -If for example, you see time being spent within ``cursor.execute()``, -e.g. against the DBAPI:: - - 2 0.102 0.102 0.204 0.102 {method 'execute' of 'sqlite3.Cursor' objects} - -this would indicate that the database is taking a long time to start returning -results, and it means your query should be optimized, either by adding indexes -or restructuring the query and/or underlying schema. For that task, -analysis of the query plan is warranted, using a system such as EXPLAIN, -SHOW PLAN, etc. as is provided by the database backend. - -Result Fetching Slowness - Core -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -If on the other hand you see many thousands of calls related to fetching rows, -or very long calls to ``fetchall()``, it may -mean your query is returning more rows than expected, or that the fetching -of rows itself is slow. The ORM itself typically uses ``fetchall()`` to fetch -rows (or ``fetchmany()`` if the :meth:`.Query.yield_per` option is used). - -An inordinately large number of rows would be indicated -by a very slow call to ``fetchall()`` at the DBAPI level:: - - 2 0.300 0.600 0.300 0.600 {method 'fetchall' of 'sqlite3.Cursor' objects} - -An unexpectedly large number of rows, even if the ultimate result doesn't seem -to have many rows, can be the result of a cartesian product - when multiple -sets of rows are combined together without appropriately joining the tables -together. It's often easy to produce this behavior with SQLAlchemy Core or -ORM query if the wrong :class:`.Column` objects are used in a complex query, -pulling in additional FROM clauses that are unexpected. - -On the other hand, a fast call to ``fetchall()`` at the DBAPI level, but then -slowness when SQLAlchemy's :class:`.ResultProxy` is asked to do a ``fetchall()``, -may indicate slowness in processing of datatypes, such as unicode conversions -and similar:: - - # the DBAPI cursor is fast... - 2 0.020 0.040 0.020 0.040 {method 'fetchall' of 'sqlite3.Cursor' objects} - - ... - - # but SQLAlchemy's result proxy is slow, this is type-level processing - 2 0.100 0.200 0.100 0.200 lib/sqlalchemy/engine/result.py:778(fetchall) - -In some cases, a backend might be doing type-level processing that isn't -needed. More specifically, seeing calls within the type API that are slow -are better indicators - below is what it looks like when we use a type like -this:: - - from sqlalchemy import TypeDecorator - import time - - class Foo(TypeDecorator): - impl = String - - def process_result_value(self, value, thing): - # intentionally add slowness for illustration purposes - time.sleep(.001) - return value - -the profiling output of this intentionally slow operation can be seen like this:: - - 200 0.001 0.000 0.237 0.001 lib/sqlalchemy/sql/type_api.py:911(process) - 200 0.001 0.000 0.236 0.001 test.py:28(process_result_value) - 200 0.235 0.001 0.235 0.001 {time.sleep} - -that is, we see many expensive calls within the ``type_api`` system, and the actual -time consuming thing is the ``time.sleep()`` call. - -Make sure to check the :doc:`Dialect documentation ` -for notes on known performance tuning suggestions at this level, especially for -databases like Oracle. There may be systems related to ensuring numeric accuracy -or string processing that may not be needed in all cases. - -There also may be even more low-level points at which row-fetching performance is suffering; -for example, if time spent seems to focus on a call like ``socket.receive()``, -that could indicate that everything is fast except for the actual network connection, -and too much time is spent with data moving over the network. - -Result Fetching Slowness - ORM -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -To detect slowness in ORM fetching of rows (which is the most common area -of performance concern), calls like ``populate_state()`` and ``_instance()`` will -illustrate individual ORM object populations:: - - # the ORM calls _instance for each ORM-loaded row it sees, and - # populate_state for each ORM-loaded row that results in the population - # of an object's attributes - 220/20 0.001 0.000 0.010 0.000 lib/sqlalchemy/orm/loading.py:327(_instance) - 220/20 0.000 0.000 0.009 0.000 lib/sqlalchemy/orm/loading.py:284(populate_state) - -The ORM's slowness in turning rows into ORM-mapped objects is a product -of the complexity of this operation combined with the overhead of cPython. -Common strategies to mitigate this include: - -* fetch individual columns instead of full entities, that is:: - - session.query(User.id, User.name) - - instead of:: - - session.query(User) - -* Use :class:`.Bundle` objects to organize column-based results:: - - u_b = Bundle('user', User.id, User.name) - a_b = Bundle('address', Address.id, Address.email) - - for user, address in session.query(u_b, a_b).join(User.addresses): - # ... - -* Use result caching - see :ref:`examples_caching` for an in-depth example - of this. - -* Consider a faster interpreter like that of Pypy. - -The output of a profile can be a little daunting but after some -practice they are very easy to read. - -If you're feeling ambitious, there's also a more involved example of -SQLAlchemy profiling within the SQLAlchemy unit tests in the -``tests/aaa_profiling`` section. Tests in this area -use decorators that assert a -maximum number of method calls being used for particular operations, -so that if something inefficient gets checked in, the tests will -reveal it (it is important to note that in cPython, function calls have -the highest overhead of any operation, and the count of calls is more -often than not nearly proportional to time spent). Of note are the -the "zoomark" tests which use a fancy "SQL capturing" scheme which -cuts out the overhead of the DBAPI from the equation - although that -technique isn't really necessary for garden-variety profiling. - -I'm inserting 400,000 rows with the ORM and it's really slow! --------------------------------------------------------------- - -The SQLAlchemy ORM uses the :term:`unit of work` pattern when synchronizing -changes to the database. This pattern goes far beyond simple "inserts" -of data. It includes that attributes which are assigned on objects are -received using an attribute instrumentation system which tracks -changes on objects as they are made, includes that all rows inserted -are tracked in an identity map which has the effect that for each row -SQLAlchemy must retrieve its "last inserted id" if not already given, -and also involves that rows to be inserted are scanned and sorted for -dependencies as needed. Objects are also subject to a fair degree of -bookkeeping in order to keep all of this running, which for a very -large number of rows at once can create an inordinate amount of time -spent with large data structures, hence it's best to chunk these. - -Basically, unit of work is a large degree of automation in order to -automate the task of persisting a complex object graph into a -relational database with no explicit persistence code, and this -automation has a price. - -ORMs are basically not intended for high-performance bulk inserts - -this is the whole reason SQLAlchemy offers the Core in addition to the -ORM as a first-class component. - -For the use case of fast bulk inserts, the -SQL generation and execution system that the ORM builds on top of -is part of the Core. Using this system directly, we can produce an INSERT that -is competitive with using the raw database API directly. - -The example below illustrates time-based tests for four different -methods of inserting rows, going from the most automated to the least. -With cPython 2.7, runtimes observed:: - - classics-MacBook-Pro:sqlalchemy classic$ python test.py - SQLAlchemy ORM: Total time for 100000 records 14.3528850079 secs - SQLAlchemy ORM pk given: Total time for 100000 records 10.0164160728 secs - SQLAlchemy Core: Total time for 100000 records 0.775382995605 secs - sqlite3: Total time for 100000 records 0.676795005798 sec - -We can reduce the time by a factor of three using recent versions of `Pypy `_:: - - classics-MacBook-Pro:sqlalchemy classic$ /usr/local/src/pypy-2.1-beta2-osx64/bin/pypy test.py - SQLAlchemy ORM: Total time for 100000 records 5.88369488716 secs - SQLAlchemy ORM pk given: Total time for 100000 records 3.52294301987 secs - SQLAlchemy Core: Total time for 100000 records 0.613556146622 secs - sqlite3: Total time for 100000 records 0.442467927933 sec - -Script:: - - import time - import sqlite3 - - from sqlalchemy.ext.declarative import declarative_base - from sqlalchemy import Column, Integer, String, create_engine - from sqlalchemy.orm import scoped_session, sessionmaker - - Base = declarative_base() - DBSession = scoped_session(sessionmaker()) - engine = None - - class Customer(Base): - __tablename__ = "customer" - id = Column(Integer, primary_key=True) - name = Column(String(255)) - - def init_sqlalchemy(dbname='sqlite:///sqlalchemy.db'): - global engine - engine = create_engine(dbname, echo=False) - DBSession.remove() - DBSession.configure(bind=engine, autoflush=False, expire_on_commit=False) - Base.metadata.drop_all(engine) - Base.metadata.create_all(engine) - - def test_sqlalchemy_orm(n=100000): - init_sqlalchemy() - t0 = time.time() - for i in range(n): - customer = Customer() - customer.name = 'NAME ' + str(i) - DBSession.add(customer) - if i % 1000 == 0: - DBSession.flush() - DBSession.commit() - print("SQLAlchemy ORM: Total time for " + str(n) + - " records " + str(time.time() - t0) + " secs") - - def test_sqlalchemy_orm_pk_given(n=100000): - init_sqlalchemy() - t0 = time.time() - for i in range(n): - customer = Customer(id=i+1, name="NAME " + str(i)) - DBSession.add(customer) - if i % 1000 == 0: - DBSession.flush() - DBSession.commit() - print("SQLAlchemy ORM pk given: Total time for " + str(n) + - " records " + str(time.time() - t0) + " secs") - - def test_sqlalchemy_core(n=100000): - init_sqlalchemy() - t0 = time.time() - engine.execute( - Customer.__table__.insert(), - [{"name": 'NAME ' + str(i)} for i in range(n)] - ) - print("SQLAlchemy Core: Total time for " + str(n) + - " records " + str(time.time() - t0) + " secs") - - def init_sqlite3(dbname): - conn = sqlite3.connect(dbname) - c = conn.cursor() - c.execute("DROP TABLE IF EXISTS customer") - c.execute("CREATE TABLE customer (id INTEGER NOT NULL, " - "name VARCHAR(255), PRIMARY KEY(id))") - conn.commit() - return conn - - def test_sqlite3(n=100000, dbname='sqlite3.db'): - conn = init_sqlite3(dbname) - c = conn.cursor() - t0 = time.time() - for i in range(n): - row = ('NAME ' + str(i),) - c.execute("INSERT INTO customer (name) VALUES (?)", row) - conn.commit() - print("sqlite3: Total time for " + str(n) + - " records " + str(time.time() - t0) + " sec") - - if __name__ == '__main__': - test_sqlalchemy_orm(100000) - test_sqlalchemy_orm_pk_given(100000) - test_sqlalchemy_core(100000) - test_sqlite3(100000) - - - -Sessions / Queries -=================== - - -"This Session's transaction has been rolled back due to a previous exception during flush." (or similar) ---------------------------------------------------------------------------------------------------------- - -This is an error that occurs when a :meth:`.Session.flush` raises an exception, rolls back -the transaction, but further commands upon the `Session` are called without an -explicit call to :meth:`.Session.rollback` or :meth:`.Session.close`. - -It usually corresponds to an application that catches an exception -upon :meth:`.Session.flush` or :meth:`.Session.commit` and -does not properly handle the exception. For example:: - - from sqlalchemy import create_engine, Column, Integer - from sqlalchemy.orm import sessionmaker - from sqlalchemy.ext.declarative import declarative_base - - Base = declarative_base(create_engine('sqlite://')) - - class Foo(Base): - __tablename__ = 'foo' - id = Column(Integer, primary_key=True) - - Base.metadata.create_all() - - session = sessionmaker()() - - # constraint violation - session.add_all([Foo(id=1), Foo(id=1)]) - - try: - session.commit() - except: - # ignore error - pass - - # continue using session without rolling back - session.commit() - - -The usage of the :class:`.Session` should fit within a structure similar to this:: - - try: - - session.commit() - except: - session.rollback() - raise - finally: - session.close() # optional, depends on use case - -Many things can cause a failure within the try/except besides flushes. You -should always have some kind of "framing" of your session operations so that -connection and transaction resources have a definitive boundary, otherwise -your application doesn't really have its usage of resources under control. -This is not to say that you need to put try/except blocks all throughout your -application - on the contrary, this would be a terrible idea. You should -architect your application such that there is one (or few) point(s) of -"framing" around session operations. - -For a detailed discussion on how to organize usage of the :class:`.Session`, -please see :ref:`session_faq_whentocreate`. - -But why does flush() insist on issuing a ROLLBACK? -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -It would be great if :meth:`.Session.flush` could partially complete and then not roll -back, however this is beyond its current capabilities since its internal -bookkeeping would have to be modified such that it can be halted at any time -and be exactly consistent with what's been flushed to the database. While this -is theoretically possible, the usefulness of the enhancement is greatly -decreased by the fact that many database operations require a ROLLBACK in any -case. Postgres in particular has operations which, once failed, the -transaction is not allowed to continue:: - - test=> create table foo(id integer primary key); - NOTICE: CREATE TABLE / PRIMARY KEY will create implicit index "foo_pkey" for table "foo" - CREATE TABLE - test=> begin; - BEGIN - test=> insert into foo values(1); - INSERT 0 1 - test=> commit; - COMMIT - test=> begin; - BEGIN - test=> insert into foo values(1); - ERROR: duplicate key value violates unique constraint "foo_pkey" - test=> insert into foo values(2); - ERROR: current transaction is aborted, commands ignored until end of transaction block - -What SQLAlchemy offers that solves both issues is support of SAVEPOINT, via -:meth:`.Session.begin_nested`. Using :meth:`.Session.begin_nested`, you can frame an operation that may -potentially fail within a transaction, and then "roll back" to the point -before its failure while maintaining the enclosing transaction. - -But why isn't the one automatic call to ROLLBACK enough? Why must I ROLLBACK again? -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -This is again a matter of the :class:`.Session` providing a consistent interface and -refusing to guess about what context its being used. For example, the -:class:`.Session` supports "framing" above within multiple levels. Such as, suppose -you had a decorator ``@with_session()``, which did this:: - - def with_session(fn): - def go(*args, **kw): - session.begin(subtransactions=True) - try: - ret = fn(*args, **kw) - session.commit() - return ret - except: - session.rollback() - raise - return go - -The above decorator begins a transaction if one does not exist already, and -then commits it, if it were the creator. The "subtransactions" flag means that -if :meth:`.Session.begin` were already called by an enclosing function, nothing happens -except a counter is incremented - this counter is decremented when :meth:`.Session.commit` -is called and only when it goes back to zero does the actual COMMIT happen. It -allows this usage pattern:: - - @with_session - def one(): - # do stuff - two() - - - @with_session - def two(): - # etc. - - one() - - two() - -``one()`` can call ``two()``, or ``two()`` can be called by itself, and the -``@with_session`` decorator ensures the appropriate "framing" - the transaction -boundaries stay on the outermost call level. As you can see, if ``two()`` calls -``flush()`` which throws an exception and then issues a ``rollback()``, there will -*always* be a second ``rollback()`` performed by the decorator, and possibly a -third corresponding to two levels of decorator. If the ``flush()`` pushed the -``rollback()`` all the way out to the top of the stack, and then we said that -all remaining ``rollback()`` calls are moot, there is some silent behavior going -on there. A poorly written enclosing method might suppress the exception, and -then call ``commit()`` assuming nothing is wrong, and then you have a silent -failure condition. The main reason people get this error in fact is because -they didn't write clean "framing" code and they would have had other problems -down the road. - -If you think the above use case is a little exotic, the same kind of thing -comes into play if you want to SAVEPOINT- you might call ``begin_nested()`` -several times, and the ``commit()``/``rollback()`` calls each resolve the most -recent ``begin_nested()``. The meaning of ``rollback()`` or ``commit()`` is -dependent upon which enclosing block it is called, and you might have any -sequence of ``rollback()``/``commit()`` in any order, and its the level of nesting -that determines their behavior. - -In both of the above cases, if ``flush()`` broke the nesting of transaction -blocks, the behavior is, depending on scenario, anywhere from "magic" to -silent failure to blatant interruption of code flow. - -``flush()`` makes its own "subtransaction", so that a transaction is started up -regardless of the external transactional state, and when complete it calls -``commit()``, or ``rollback()`` upon failure - but that ``rollback()`` corresponds -to its own subtransaction - it doesn't want to guess how you'd like to handle -the external "framing" of the transaction, which could be nested many levels -with any combination of subtransactions and real SAVEPOINTs. The job of -starting/ending the "frame" is kept consistently with the code external to the -``flush()``, and we made a decision that this was the most consistent approach. - - - -How do I make a Query that always adds a certain filter to every query? ------------------------------------------------------------------------------------------------- - -See the recipe at `PreFilteredQuery `_. - -I've created a mapping against an Outer Join, and while the query returns rows, no objects are returned. Why not? ------------------------------------------------------------------------------------------------------------------- - -Rows returned by an outer join may contain NULL for part of the primary key, -as the primary key is the composite of both tables. The :class:`.Query` object ignores incoming rows -that don't have an acceptable primary key. Based on the setting of the ``allow_partial_pks`` -flag on :func:`.mapper`, a primary key is accepted if the value has at least one non-NULL -value, or alternatively if the value has no NULL values. See ``allow_partial_pks`` -at :func:`.mapper`. - - -I'm using ``joinedload()`` or ``lazy=False`` to create a JOIN/OUTER JOIN and SQLAlchemy is not constructing the correct query when I try to add a WHERE, ORDER BY, LIMIT, etc. (which relies upon the (OUTER) JOIN) ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ - -The joins generated by joined eager loading are only used to fully load related -collections, and are designed to have no impact on the primary results of the query. -Since they are anonymously aliased, they cannot be referenced directly. - -For detail on this beahvior, see :doc:`orm/loading`. - -Query has no ``__len__()``, why not? ------------------------------------- - -The Python ``__len__()`` magic method applied to an object allows the ``len()`` -builtin to be used to determine the length of the collection. It's intuitive -that a SQL query object would link ``__len__()`` to the :meth:`.Query.count` -method, which emits a `SELECT COUNT`. The reason this is not possible is -because evaluating the query as a list would incur two SQL calls instead of -one:: - - class Iterates(object): - def __len__(self): - print "LEN!" - return 5 - - def __iter__(self): - print "ITER!" - return iter([1, 2, 3, 4, 5]) - - list(Iterates()) - -output:: - - ITER! - LEN! - -How Do I use Textual SQL with ORM Queries? -------------------------------------------- - -See: - -* :ref:`orm_tutorial_literal_sql` - Ad-hoc textual blocks with :class:`.Query` - -* :ref:`session_sql_expressions` - Using :class:`.Session` with textual SQL directly. - -I'm calling ``Session.delete(myobject)`` and it isn't removed from the parent collection! ------------------------------------------------------------------------------------------- - -See :ref:`session_deleting_from_collections` for a description of this behavior. - -why isn't my ``__init__()`` called when I load objects? -------------------------------------------------------- - -See :ref:`mapping_constructors` for a description of this behavior. - -how do I use ON DELETE CASCADE with SA's ORM? ----------------------------------------------- - -SQLAlchemy will always issue UPDATE or DELETE statements for dependent -rows which are currently loaded in the :class:`.Session`. For rows which -are not loaded, it will by default issue SELECT statements to load -those rows and udpate/delete those as well; in other words it assumes -there is no ON DELETE CASCADE configured. -To configure SQLAlchemy to cooperate with ON DELETE CASCADE, see -:ref:`passive_deletes`. - -I set the "foo_id" attribute on my instance to "7", but the "foo" attribute is still ``None`` - shouldn't it have loaded Foo with id #7? ----------------------------------------------------------------------------------------------------------------------------------------------------- - -The ORM is not constructed in such a way as to support -immediate population of relationships driven from foreign -key attribute changes - instead, it is designed to work the -other way around - foreign key attributes are handled by the -ORM behind the scenes, the end user sets up object -relationships naturally. Therefore, the recommended way to -set ``o.foo`` is to do just that - set it!:: - - foo = Session.query(Foo).get(7) - o.foo = foo - Session.commit() - -Manipulation of foreign key attributes is of course entirely legal. However, -setting a foreign-key attribute to a new value currently does not trigger -an "expire" event of the :func:`.relationship` in which it's involved. This means -that for the following sequence:: - - o = Session.query(SomeClass).first() - assert o.foo is None # accessing an un-set attribute sets it to None - o.foo_id = 7 - -``o.foo`` is initialized to ``None`` when we first accessed it. Setting -``o.foo_id = 7`` will have the value of "7" as pending, but no flush -has occurred - so ``o.foo`` is still ``None``:: - - # attribute is already set to None, has not been - # reconciled with o.foo_id = 7 yet - assert o.foo is None - -For ``o.foo`` to load based on the foreign key mutation is usually achieved -naturally after the commit, which both flushes the new foreign key value -and expires all state:: - - Session.commit() # expires all attributes - - foo_7 = Session.query(Foo).get(7) - - assert o.foo is foo_7 # o.foo lazyloads on access - -A more minimal operation is to expire the attribute individually - this can -be performed for any :term:`persistent` object using :meth:`.Session.expire`:: - - o = Session.query(SomeClass).first() - o.foo_id = 7 - Session.expire(o, ['foo']) # object must be persistent for this - - foo_7 = Session.query(Foo).get(7) - - assert o.foo is foo_7 # o.foo lazyloads on access - -Note that if the object is not persistent but present in the :class:`.Session`, -it's known as :term:`pending`. This means the row for the object has not been -INSERTed into the database yet. For such an object, setting ``foo_id`` does not -have meaning until the row is inserted; otherwise there is no row yet:: - - new_obj = SomeClass() - new_obj.foo_id = 7 - - Session.add(new_obj) - - # accessing an un-set attribute sets it to None - assert new_obj.foo is None - - Session.flush() # emits INSERT - - # expire this because we already set .foo to None - Session.expire(o, ['foo']) - - assert new_obj.foo is foo_7 # now it loads - - -.. topic:: Attribute loading for non-persistent objects - - One variant on the "pending" behavior above is if we use the flag - ``load_on_pending`` on :func:`.relationship`. When this flag is set, the - lazy loader will emit for ``new_obj.foo`` before the INSERT proceeds; another - variant of this is to use the :meth:`.Session.enable_relationship_loading` - method, which can "attach" an object to a :class:`.Session` in such a way that - many-to-one relationships load as according to foreign key attributes - regardless of the object being in any particular state. - Both techniques are **not recommended for general use**; they were added to suit - specific programming scenarios encountered by users which involve the repurposing - of the ORM's usual object states. - -The recipe `ExpireRelationshipOnFKChange `_ features an example using SQLAlchemy events -in order to coordinate the setting of foreign key attributes with many-to-one -relationships. - -Is there a way to automagically have only unique keywords (or other kinds of objects) without doing a query for the keyword and getting a reference to the row containing that keyword? ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- - -When people read the many-to-many example in the docs, they get hit with the -fact that if you create the same ``Keyword`` twice, it gets put in the DB twice. -Which is somewhat inconvenient. - -This `UniqueObject `_ recipe was created to address this issue. - - diff --git a/doc/build/faq/connections.rst b/doc/build/faq/connections.rst new file mode 100644 index 0000000000..81a8678b49 --- /dev/null +++ b/doc/build/faq/connections.rst @@ -0,0 +1,138 @@ +Connections / Engines +===================== + +.. contents:: + :local: + :class: faq + :backlinks: none + + +How do I configure logging? +--------------------------- + +See :ref:`dbengine_logging`. + +How do I pool database connections? Are my connections pooled? +---------------------------------------------------------------- + +SQLAlchemy performs application-level connection pooling automatically +in most cases. With the exception of SQLite, a :class:`.Engine` object +refers to a :class:`.QueuePool` as a source of connectivity. + +For more detail, see :ref:`engines_toplevel` and :ref:`pooling_toplevel`. + +How do I pass custom connect arguments to my database API? +----------------------------------------------------------- + +The :func:`.create_engine` call accepts additional arguments either +directly via the ``connect_args`` keyword argument:: + + e = create_engine("mysql://scott:tiger@localhost/test", + connect_args={"encoding": "utf8"}) + +Or for basic string and integer arguments, they can usually be specified +in the query string of the URL:: + + e = create_engine("mysql://scott:tiger@localhost/test?encoding=utf8") + +.. seealso:: + + :ref:`custom_dbapi_args` + +"MySQL Server has gone away" +---------------------------- + +There are two major causes for this error: + +1. The MySQL client closes connections which have been idle for a set period +of time, defaulting to eight hours. This can be avoided by using the ``pool_recycle`` +setting with :func:`.create_engine`, described at :ref:`mysql_connection_timeouts`. + +2. Usage of the MySQLdb :term:`DBAPI`, or a similar DBAPI, in a non-threadsafe manner, or in an otherwise +inappropriate way. The MySQLdb connection object is not threadsafe - this expands +out to any SQLAlchemy system that links to a single connection, which includes the ORM +:class:`.Session`. For background +on how :class:`.Session` should be used in a multithreaded environment, +see :ref:`session_faq_threadsafe`. + +Why does SQLAlchemy issue so many ROLLBACKs? +--------------------------------------------- + +SQLAlchemy currently assumes DBAPI connections are in "non-autocommit" mode - +this is the default behavior of the Python database API, meaning it +must be assumed that a transaction is always in progress. The +connection pool issues ``connection.rollback()`` when a connection is returned. +This is so that any transactional resources remaining on the connection are +released. On a database like Postgresql or MSSQL where table resources are +aggressively locked, this is critical so that rows and tables don't remain +locked within connections that are no longer in use. An application can +otherwise hang. It's not just for locks, however, and is equally critical on +any database that has any kind of transaction isolation, including MySQL with +InnoDB. Any connection that is still inside an old transaction will return +stale data, if that data was already queried on that connection within +isolation. For background on why you might see stale data even on MySQL, see +http://dev.mysql.com/doc/refman/5.1/en/innodb-transaction-model.html + +I'm on MyISAM - how do I turn it off? +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The behavior of the connection pool's connection return behavior can be +configured using ``reset_on_return``:: + + from sqlalchemy import create_engine + from sqlalchemy.pool import QueuePool + + engine = create_engine('mysql://scott:tiger@localhost/myisam_database', pool=QueuePool(reset_on_return=False)) + +I'm on SQL Server - how do I turn those ROLLBACKs into COMMITs? +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +``reset_on_return`` accepts the values ``commit``, ``rollback`` in addition +to ``True``, ``False``, and ``None``. Setting to ``commit`` will cause +a COMMIT as any connection is returned to the pool:: + + engine = create_engine('mssql://scott:tiger@mydsn', pool=QueuePool(reset_on_return='commit')) + + +I am using multiple connections with a SQLite database (typically to test transaction operation), and my test program is not working! +---------------------------------------------------------------------------------------------------------------------------------------------------------- + +If using a SQLite ``:memory:`` database, or a version of SQLAlchemy prior +to version 0.7, the default connection pool is the :class:`.SingletonThreadPool`, +which maintains exactly one SQLite connection per thread. So two +connections in use in the same thread will actually be the same SQLite +connection. Make sure you're not using a :memory: database and +use :class:`.NullPool`, which is the default for non-memory databases in +current SQLAlchemy versions. + +.. seealso:: + + :ref:`pysqlite_threading_pooling` - info on PySQLite's behavior. + +How do I get at the raw DBAPI connection when using an Engine? +-------------------------------------------------------------- + +With a regular SA engine-level Connection, you can get at a pool-proxied +version of the DBAPI connection via the :attr:`.Connection.connection` attribute on +:class:`.Connection`, and for the really-real DBAPI connection you can call the +:attr:`.ConnectionFairy.connection` attribute on that - but there should never be any need to access +the non-pool-proxied DBAPI connection, as all methods are proxied through:: + + engine = create_engine(...) + conn = engine.connect() + conn.connection. + cursor = conn.connection.cursor() + +You must ensure that you revert any isolation level settings or other +operation-specific settings on the connection back to normal before returning +it to the pool. + +As an alternative to reverting settings, you can call the :meth:`.Connection.detach` method on +either :class:`.Connection` or the proxied connection, which will de-associate +the connection from the pool such that it will be closed and discarded +when :meth:`.Connection.close` is called:: + + conn = engine.connect() + conn.detach() # detaches the DBAPI connection from the connection pool + conn.connection. + conn.close() # connection is closed for real, the pool replaces it with a new connection diff --git a/doc/build/faq/index.rst b/doc/build/faq/index.rst new file mode 100644 index 0000000000..120e0ba3a2 --- /dev/null +++ b/doc/build/faq/index.rst @@ -0,0 +1,19 @@ +.. _faq_toplevel: + +============================ +Frequently Asked Questions +============================ + +The Frequently Asked Questions section is a growing collection of commonly +observed questions to well-known issues. + +.. toctree:: + :maxdepth: 1 + + connections + metadata_schema + sqlexpressions + ormconfiguration + performance + sessions + diff --git a/doc/build/faq/metadata_schema.rst b/doc/build/faq/metadata_schema.rst new file mode 100644 index 0000000000..9697399dcf --- /dev/null +++ b/doc/build/faq/metadata_schema.rst @@ -0,0 +1,102 @@ +================== +MetaData / Schema +================== + +.. contents:: + :local: + :class: faq + :backlinks: none + + + +My program is hanging when I say ``table.drop()`` / ``metadata.drop_all()`` +=========================================================================== + +This usually corresponds to two conditions: 1. using PostgreSQL, which is really +strict about table locks, and 2. you have a connection still open which +contains locks on the table and is distinct from the connection being used for +the DROP statement. Heres the most minimal version of the pattern:: + + connection = engine.connect() + result = connection.execute(mytable.select()) + + mytable.drop(engine) + +Above, a connection pool connection is still checked out; furthermore, the +result object above also maintains a link to this connection. If +"implicit execution" is used, the result will hold this connection opened until +the result object is closed or all rows are exhausted. + +The call to ``mytable.drop(engine)`` attempts to emit DROP TABLE on a second +connection procured from the :class:`.Engine` which will lock. + +The solution is to close out all connections before emitting DROP TABLE:: + + connection = engine.connect() + result = connection.execute(mytable.select()) + + # fully read result sets + result.fetchall() + + # close connections + connection.close() + + # now locks are removed + mytable.drop(engine) + +Does SQLAlchemy support ALTER TABLE, CREATE VIEW, CREATE TRIGGER, Schema Upgrade Functionality? +=============================================================================================== + + +General ALTER support isn't present in SQLAlchemy directly. For special DDL +on an ad-hoc basis, the :class:`.DDL` and related constructs can be used. +See :doc:`core/ddl` for a discussion on this subject. + +A more comprehensive option is to use schema migration tools, such as Alembic +or SQLAlchemy-Migrate; see :ref:`schema_migrations` for discussion on this. + +How can I sort Table objects in order of their dependency? +=========================================================================== + +This is available via the :attr:`.MetaData.sorted_tables` function:: + + metadata = MetaData() + # ... add Table objects to metadata + ti = metadata.sorted_tables: + for t in ti: + print t + +How can I get the CREATE TABLE/ DROP TABLE output as a string? +=========================================================================== + +Modern SQLAlchemy has clause constructs which represent DDL operations. These +can be rendered to strings like any other SQL expression:: + + from sqlalchemy.schema import CreateTable + + print CreateTable(mytable) + +To get the string specific to a certain engine:: + + print CreateTable(mytable).compile(engine) + +There's also a special form of :class:`.Engine` that can let you dump an entire +metadata creation sequence, using this recipe:: + + def dump(sql, *multiparams, **params): + print sql.compile(dialect=engine.dialect) + engine = create_engine('postgresql://', strategy='mock', executor=dump) + metadata.create_all(engine, checkfirst=False) + +The `Alembic `_ tool also supports +an "offline" SQL generation mode that renders database migrations as SQL scripts. + +How can I subclass Table/Column to provide certain behaviors/configurations? +============================================================================= + +:class:`.Table` and :class:`.Column` are not good targets for direct subclassing. +However, there are simple ways to get on-construction behaviors using creation +functions, and behaviors related to the linkages between schema objects such as +constraint conventions or naming conventions using attachment events. +An example of many of these +techniques can be seen at `Naming Conventions `_. diff --git a/doc/build/faq/ormconfiguration.rst b/doc/build/faq/ormconfiguration.rst new file mode 100644 index 0000000000..3a2ea29a67 --- /dev/null +++ b/doc/build/faq/ormconfiguration.rst @@ -0,0 +1,334 @@ +ORM Configuration +================== + +.. contents:: + :local: + :class: faq + :backlinks: none + +.. _faq_mapper_primary_key: + +How do I map a table that has no primary key? +--------------------------------------------- + +The SQLAlchemy ORM, in order to map to a particular table, needs there to be +at least one column denoted as a primary key column; multiple-column, +i.e. composite, primary keys are of course entirely feasible as well. These +columns do **not** need to be actually known to the database as primary key +columns, though it's a good idea that they are. It's only necessary that the columns +*behave* as a primary key does, e.g. as a unique and not nullable identifier +for a row. + +Most ORMs require that objects have some kind of primary key defined +because the object in memory must correspond to a uniquely identifiable +row in the database table; at the very least, this allows the +object can be targeted for UPDATE and DELETE statements which will affect only +that object's row and no other. However, the importance of the primary key +goes far beyond that. In SQLAlchemy, all ORM-mapped objects are at all times +linked uniquely within a :class:`.Session` +to their specific database row using a pattern called the :term:`identity map`, +a pattern that's central to the unit of work system employed by SQLAlchemy, +and is also key to the most common (and not-so-common) patterns of ORM usage. + + +.. note:: + + It's important to note that we're only talking about the SQLAlchemy ORM; an + application which builds on Core and deals only with :class:`.Table` objects, + :func:`.select` constructs and the like, **does not** need any primary key + to be present on or associated with a table in any way (though again, in SQL, all tables + should really have some kind of primary key, lest you need to actually + update or delete specific rows). + +In almost all cases, a table does have a so-called :term:`candidate key`, which is a column or series +of columns that uniquely identify a row. If a table truly doesn't have this, and has actual +fully duplicate rows, the table is not corresponding to `first normal form `_ and cannot be mapped. Otherwise, whatever columns comprise the best candidate key can be +applied directly to the mapper:: + + class SomeClass(Base): + __table__ = some_table_with_no_pk + __mapper_args__ = { + 'primary_key':[some_table_with_no_pk.c.uid, some_table_with_no_pk.c.bar] + } + +Better yet is when using fully declared table metadata, use the ``primary_key=True`` +flag on those columns:: + + class SomeClass(Base): + __tablename__ = "some_table_with_no_pk" + + uid = Column(Integer, primary_key=True) + bar = Column(String, primary_key=True) + +All tables in a relational database should have primary keys. Even a many-to-many +association table - the primary key would be the composite of the two association +columns:: + + CREATE TABLE my_association ( + user_id INTEGER REFERENCES user(id), + account_id INTEGER REFERENCES account(id), + PRIMARY KEY (user_id, account_id) + ) + + +How do I configure a Column that is a Python reserved word or similar? +---------------------------------------------------------------------------- + +Column-based attributes can be given any name desired in the mapping. See +:ref:`mapper_column_distinct_names`. + +How do I get a list of all columns, relationships, mapped attributes, etc. given a mapped class? +------------------------------------------------------------------------------------------------- + +This information is all available from the :class:`.Mapper` object. + +To get at the :class:`.Mapper` for a particular mapped class, call the +:func:`.inspect` function on it:: + + from sqlalchemy import inspect + + mapper = inspect(MyClass) + +From there, all information about the class can be acquired using such methods as: + +* :attr:`.Mapper.attrs` - a namespace of all mapped attributes. The attributes + themselves are instances of :class:`.MapperProperty`, which contain additional + attributes that can lead to the mapped SQL expression or column, if applicable. + +* :attr:`.Mapper.column_attrs` - the mapped attribute namespace + limited to column and SQL expression attributes. You might want to use + :attr:`.Mapper.columns` to get at the :class:`.Column` objects directly. + +* :attr:`.Mapper.relationships` - namespace of all :class:`.RelationshipProperty` attributes. + +* :attr:`.Mapper.all_orm_descriptors` - namespace of all mapped attributes, plus user-defined + attributes defined using systems such as :class:`.hybrid_property`, :class:`.AssociationProxy` and others. + +* :attr:`.Mapper.columns` - A namespace of :class:`.Column` objects and other named + SQL expressions associated with the mapping. + +* :attr:`.Mapper.mapped_table` - The :class:`.Table` or other selectable to which + this mapper is mapped. + +* :attr:`.Mapper.local_table` - The :class:`.Table` that is "local" to this mapper; + this differs from :attr:`.Mapper.mapped_table` in the case of a mapper mapped + using inheritance to a composed selectable. + +.. _faq_combining_columns: + +I'm getting a warning or error about "Implicitly combining column X under attribute Y" +-------------------------------------------------------------------------------------- + +This condition refers to when a mapping contains two columns that are being +mapped under the same attribute name due to their name, but there's no indication +that this is intentional. A mapped class needs to have explicit names for +every attribute that is to store an independent value; when two columns have the +same name and aren't disambiguated, they fall under the same attribute and +the effect is that the value from one column is **copied** into the other, based +on which column was assigned to the attribute first. + +This behavior is often desirable and is allowed without warning in the case +where the two columns are linked together via a foreign key relationship +within an inheritance mapping. When the warning or exception occurs, the +issue can be resolved by either assigning the columns to differently-named +attributes, or if combining them together is desired, by using +:func:`.column_property` to make this explicit. + +Given the example as follows:: + + from sqlalchemy import Integer, Column, ForeignKey + from sqlalchemy.ext.declarative import declarative_base + + Base = declarative_base() + + class A(Base): + __tablename__ = 'a' + + id = Column(Integer, primary_key=True) + + class B(A): + __tablename__ = 'b' + + id = Column(Integer, primary_key=True) + a_id = Column(Integer, ForeignKey('a.id')) + +As of SQLAlchemy version 0.9.5, the above condition is detected, and will +warn that the ``id`` column of ``A`` and ``B`` is being combined under +the same-named attribute ``id``, which above is a serious issue since it means +that a ``B`` object's primary key will always mirror that of its ``A``. + +A mapping which resolves this is as follows:: + + class A(Base): + __tablename__ = 'a' + + id = Column(Integer, primary_key=True) + + class B(A): + __tablename__ = 'b' + + b_id = Column('id', Integer, primary_key=True) + a_id = Column(Integer, ForeignKey('a.id')) + +Suppose we did want ``A.id`` and ``B.id`` to be mirrors of each other, despite +the fact that ``B.a_id`` is where ``A.id`` is related. We could combine +them together using :func:`.column_property`:: + + class A(Base): + __tablename__ = 'a' + + id = Column(Integer, primary_key=True) + + class B(A): + __tablename__ = 'b' + + # probably not what you want, but this is a demonstration + id = column_property(Column(Integer, primary_key=True), A.id) + a_id = Column(Integer, ForeignKey('a.id')) + + + +I'm using Declarative and setting primaryjoin/secondaryjoin using an ``and_()`` or ``or_()``, and I am getting an error message about foreign keys. +------------------------------------------------------------------------------------------------------------------------------------------------------------------ + +Are you doing this?:: + + class MyClass(Base): + # .... + + foo = relationship("Dest", primaryjoin=and_("MyClass.id==Dest.foo_id", "MyClass.foo==Dest.bar")) + +That's an ``and_()`` of two string expressions, which SQLAlchemy cannot apply any mapping towards. Declarative allows :func:`.relationship` arguments to be specified as strings, which are converted into expression objects using ``eval()``. But this doesn't occur inside of an ``and_()`` expression - it's a special operation declarative applies only to the *entirety* of what's passed to primaryjoin or other arguments as a string:: + + class MyClass(Base): + # .... + + foo = relationship("Dest", primaryjoin="and_(MyClass.id==Dest.foo_id, MyClass.foo==Dest.bar)") + +Or if the objects you need are already available, skip the strings:: + + class MyClass(Base): + # .... + + foo = relationship(Dest, primaryjoin=and_(MyClass.id==Dest.foo_id, MyClass.foo==Dest.bar)) + +The same idea applies to all the other arguments, such as ``foreign_keys``:: + + # wrong ! + foo = relationship(Dest, foreign_keys=["Dest.foo_id", "Dest.bar_id"]) + + # correct ! + foo = relationship(Dest, foreign_keys="[Dest.foo_id, Dest.bar_id]") + + # also correct ! + foo = relationship(Dest, foreign_keys=[Dest.foo_id, Dest.bar_id]) + + # if you're using columns from the class that you're inside of, just use the column objects ! + class MyClass(Base): + foo_id = Column(...) + bar_id = Column(...) + # ... + + foo = relationship(Dest, foreign_keys=[foo_id, bar_id]) + +.. _faq_subqueryload_limit_sort: + +Why is ``ORDER BY`` required with ``LIMIT`` (especially with ``subqueryload()``)? +--------------------------------------------------------------------------------- + +A relational database can return rows in any +arbitrary order, when an explicit ordering is not set. +While this ordering very often corresponds to the natural +order of rows within a table, this is not the case for all databases and +all queries. The consequence of this is that any query that limits rows +using ``LIMIT`` or ``OFFSET`` should **always** specify an ``ORDER BY``. +Otherwise, it is not deterministic which rows will actually be returned. + +When we use a SQLAlchemy method like :meth:`.Query.first`, we are in fact +applying a ``LIMIT`` of one to the query, so without an explicit ordering +it is not deterministic what row we actually get back. +While we may not notice this for simple queries on databases that usually +returns rows in their natural +order, it becomes much more of an issue if we also use :func:`.orm.subqueryload` +to load related collections, and we may not be loading the collections +as intended. + +SQLAlchemy implements :func:`.orm.subqueryload` by issuing a separate query, +the results of which are matched up to the results from the first query. +We see two queries emitted like this: + +.. sourcecode:: python+sql + + >>> session.query(User).options(subqueryload(User.addresses)).all() + {opensql}-- the "main" query + SELECT users.id AS users_id + FROM users + {stop} + {opensql}-- the "load" query issued by subqueryload + SELECT addresses.id AS addresses_id, + addresses.user_id AS addresses_user_id, + anon_1.users_id AS anon_1_users_id + FROM (SELECT users.id AS users_id FROM users) AS anon_1 + JOIN addresses ON anon_1.users_id = addresses.user_id + ORDER BY anon_1.users_id + +The second query embeds the first query as a source of rows. +When the inner query uses ``OFFSET`` and/or ``LIMIT`` without ordering, +the two queries may not see the same results: + +.. sourcecode:: python+sql + + >>> user = session.query(User).options(subqueryload(User.addresses)).first() + {opensql}-- the "main" query + SELECT users.id AS users_id + FROM users + LIMIT 1 + {stop} + {opensql}-- the "load" query issued by subqueryload + SELECT addresses.id AS addresses_id, + addresses.user_id AS addresses_user_id, + anon_1.users_id AS anon_1_users_id + FROM (SELECT users.id AS users_id FROM users LIMIT 1) AS anon_1 + JOIN addresses ON anon_1.users_id = addresses.user_id + ORDER BY anon_1.users_id + +Depending on database specifics, there is +a chance we may get the a result like the following for the two queries:: + + -- query #1 + +--------+ + |users_id| + +--------+ + | 1| + +--------+ + + -- query #2 + +------------+-----------------+---------------+ + |addresses_id|addresses_user_id|anon_1_users_id| + +------------+-----------------+---------------+ + | 3| 2| 2| + +------------+-----------------+---------------+ + | 4| 2| 2| + +------------+-----------------+---------------+ + +Above, we receive two ``addresses`` rows for ``user.id`` of 2, and none for +1. We've wasted two rows and failed to actually load the collection. This +is an insidious error because without looking at the SQL and the results, the +ORM will not show that there's any issue; if we access the ``addresses`` +for the ``User`` we have, it will emit a lazy load for the collection and we +won't see that anything actually went wrong. + +The solution to this problem is to always specify a deterministic sort order, +so that the main query always returns the same set of rows. This generally +means that you should :meth:`.Query.order_by` on a unique column on the table. +The primary key is a good choice for this:: + + session.query(User).options(subqueryload(User.addresses)).order_by(User.id).first() + +Note that :func:`.joinedload` does not suffer from the same problem because +only one query is ever issued, so the load query cannot be different from the +main query. + +.. seealso:: + + :ref:`subqueryload_ordering` diff --git a/doc/build/faq/performance.rst b/doc/build/faq/performance.rst new file mode 100644 index 0000000000..21cc88f685 --- /dev/null +++ b/doc/build/faq/performance.rst @@ -0,0 +1,413 @@ +.. _faq_performance: + +Performance +=========== + +.. contents:: + :local: + :class: faq + :backlinks: none + +.. _faq_how_to_profile: + +How can I profile a SQLAlchemy powered application? +--------------------------------------------------- + +Looking for performance issues typically involves two stratgies. One +is query profiling, and the other is code profiling. + +Query Profiling +^^^^^^^^^^^^^^^^ + +Sometimes just plain SQL logging (enabled via python's logging module +or via the ``echo=True`` argument on :func:`.create_engine`) can give an +idea how long things are taking. For example, if you log something +right after a SQL operation, you'd see something like this in your +log:: + + 17:37:48,325 INFO [sqlalchemy.engine.base.Engine.0x...048c] SELECT ... + 17:37:48,326 INFO [sqlalchemy.engine.base.Engine.0x...048c] {} + 17:37:48,660 DEBUG [myapp.somemessage] + +if you logged ``myapp.somemessage`` right after the operation, you know +it took 334ms to complete the SQL part of things. + +Logging SQL will also illustrate if dozens/hundreds of queries are +being issued which could be better organized into much fewer queries. +When using the SQLAlchemy ORM, the "eager loading" +feature is provided to partially (:func:`.contains_eager()`) or fully +(:func:`.joinedload()`, :func:`.subqueryload()`) +automate this activity, but without +the ORM "eager loading" typically means to use joins so that results across multiple +tables can be loaded in one result set instead of multiplying numbers +of queries as more depth is added (i.e. ``r + r*r2 + r*r2*r3`` ...) + +For more long-term profiling of queries, or to implement an application-side +"slow query" monitor, events can be used to intercept cursor executions, +using a recipe like the following:: + + from sqlalchemy import event + from sqlalchemy.engine import Engine + import time + import logging + + logging.basicConfig() + logger = logging.getLogger("myapp.sqltime") + logger.setLevel(logging.DEBUG) + + @event.listens_for(Engine, "before_cursor_execute") + def before_cursor_execute(conn, cursor, statement, + parameters, context, executemany): + conn.info.setdefault('query_start_time', []).append(time.time()) + logger.debug("Start Query: %s", statement) + + @event.listens_for(Engine, "after_cursor_execute") + def after_cursor_execute(conn, cursor, statement, + parameters, context, executemany): + total = time.time() - conn.info['query_start_time'].pop(-1) + logger.debug("Query Complete!") + logger.debug("Total Time: %f", total) + +Above, we use the :meth:`.ConnectionEvents.before_cursor_execute` and +:meth:`.ConnectionEvents.after_cursor_execute` events to establish an interception +point around when a statement is executed. We attach a timer onto the +connection using the :class:`._ConnectionRecord.info` dictionary; we use a +stack here for the occasional case where the cursor execute events may be nested. + +Code Profiling +^^^^^^^^^^^^^^ + +If logging reveals that individual queries are taking too long, you'd +need a breakdown of how much time was spent within the database +processing the query, sending results over the network, being handled +by the :term:`DBAPI`, and finally being received by SQLAlchemy's result set +and/or ORM layer. Each of these stages can present their own +individual bottlenecks, depending on specifics. + +For that you need to use the +`Python Profiling Module `_. +Below is a simple recipe which works profiling into a context manager:: + + import cProfile + import StringIO + import pstats + import contextlib + + @contextlib.contextmanager + def profiled(): + pr = cProfile.Profile() + pr.enable() + yield + pr.disable() + s = StringIO.StringIO() + ps = pstats.Stats(pr, stream=s).sort_stats('cumulative') + ps.print_stats() + # uncomment this to see who's calling what + # ps.print_callers() + print s.getvalue() + +To profile a section of code:: + + with profiled(): + Session.query(FooClass).filter(FooClass.somevalue==8).all() + +The output of profiling can be used to give an idea where time is +being spent. A section of profiling output looks like this:: + + 13726 function calls (13042 primitive calls) in 0.014 seconds + + Ordered by: cumulative time + + ncalls tottime percall cumtime percall filename:lineno(function) + 222/21 0.001 0.000 0.011 0.001 lib/sqlalchemy/orm/loading.py:26(instances) + 220/20 0.002 0.000 0.010 0.001 lib/sqlalchemy/orm/loading.py:327(_instance) + 220/20 0.000 0.000 0.010 0.000 lib/sqlalchemy/orm/loading.py:284(populate_state) + 20 0.000 0.000 0.010 0.000 lib/sqlalchemy/orm/strategies.py:987(load_collection_from_subq) + 20 0.000 0.000 0.009 0.000 lib/sqlalchemy/orm/strategies.py:935(get) + 1 0.000 0.000 0.009 0.009 lib/sqlalchemy/orm/strategies.py:940(_load) + 21 0.000 0.000 0.008 0.000 lib/sqlalchemy/orm/strategies.py:942() + 2 0.000 0.000 0.004 0.002 lib/sqlalchemy/orm/query.py:2400(__iter__) + 2 0.000 0.000 0.002 0.001 lib/sqlalchemy/orm/query.py:2414(_execute_and_instances) + 2 0.000 0.000 0.002 0.001 lib/sqlalchemy/engine/base.py:659(execute) + 2 0.000 0.000 0.002 0.001 lib/sqlalchemy/sql/elements.py:321(_execute_on_connection) + 2 0.000 0.000 0.002 0.001 lib/sqlalchemy/engine/base.py:788(_execute_clauseelement) + + ... + +Above, we can see that the ``instances()`` SQLAlchemy function was called 222 +times (recursively, and 21 times from the outside), taking a total of .011 +seconds for all calls combined. + +Execution Slowness +^^^^^^^^^^^^^^^^^^ + +The specifics of these calls can tell us where the time is being spent. +If for example, you see time being spent within ``cursor.execute()``, +e.g. against the DBAPI:: + + 2 0.102 0.102 0.204 0.102 {method 'execute' of 'sqlite3.Cursor' objects} + +this would indicate that the database is taking a long time to start returning +results, and it means your query should be optimized, either by adding indexes +or restructuring the query and/or underlying schema. For that task, +analysis of the query plan is warranted, using a system such as EXPLAIN, +SHOW PLAN, etc. as is provided by the database backend. + +Result Fetching Slowness - Core +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +If on the other hand you see many thousands of calls related to fetching rows, +or very long calls to ``fetchall()``, it may +mean your query is returning more rows than expected, or that the fetching +of rows itself is slow. The ORM itself typically uses ``fetchall()`` to fetch +rows (or ``fetchmany()`` if the :meth:`.Query.yield_per` option is used). + +An inordinately large number of rows would be indicated +by a very slow call to ``fetchall()`` at the DBAPI level:: + + 2 0.300 0.600 0.300 0.600 {method 'fetchall' of 'sqlite3.Cursor' objects} + +An unexpectedly large number of rows, even if the ultimate result doesn't seem +to have many rows, can be the result of a cartesian product - when multiple +sets of rows are combined together without appropriately joining the tables +together. It's often easy to produce this behavior with SQLAlchemy Core or +ORM query if the wrong :class:`.Column` objects are used in a complex query, +pulling in additional FROM clauses that are unexpected. + +On the other hand, a fast call to ``fetchall()`` at the DBAPI level, but then +slowness when SQLAlchemy's :class:`.ResultProxy` is asked to do a ``fetchall()``, +may indicate slowness in processing of datatypes, such as unicode conversions +and similar:: + + # the DBAPI cursor is fast... + 2 0.020 0.040 0.020 0.040 {method 'fetchall' of 'sqlite3.Cursor' objects} + + ... + + # but SQLAlchemy's result proxy is slow, this is type-level processing + 2 0.100 0.200 0.100 0.200 lib/sqlalchemy/engine/result.py:778(fetchall) + +In some cases, a backend might be doing type-level processing that isn't +needed. More specifically, seeing calls within the type API that are slow +are better indicators - below is what it looks like when we use a type like +this:: + + from sqlalchemy import TypeDecorator + import time + + class Foo(TypeDecorator): + impl = String + + def process_result_value(self, value, thing): + # intentionally add slowness for illustration purposes + time.sleep(.001) + return value + +the profiling output of this intentionally slow operation can be seen like this:: + + 200 0.001 0.000 0.237 0.001 lib/sqlalchemy/sql/type_api.py:911(process) + 200 0.001 0.000 0.236 0.001 test.py:28(process_result_value) + 200 0.235 0.001 0.235 0.001 {time.sleep} + +that is, we see many expensive calls within the ``type_api`` system, and the actual +time consuming thing is the ``time.sleep()`` call. + +Make sure to check the :doc:`Dialect documentation ` +for notes on known performance tuning suggestions at this level, especially for +databases like Oracle. There may be systems related to ensuring numeric accuracy +or string processing that may not be needed in all cases. + +There also may be even more low-level points at which row-fetching performance is suffering; +for example, if time spent seems to focus on a call like ``socket.receive()``, +that could indicate that everything is fast except for the actual network connection, +and too much time is spent with data moving over the network. + +Result Fetching Slowness - ORM +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +To detect slowness in ORM fetching of rows (which is the most common area +of performance concern), calls like ``populate_state()`` and ``_instance()`` will +illustrate individual ORM object populations:: + + # the ORM calls _instance for each ORM-loaded row it sees, and + # populate_state for each ORM-loaded row that results in the population + # of an object's attributes + 220/20 0.001 0.000 0.010 0.000 lib/sqlalchemy/orm/loading.py:327(_instance) + 220/20 0.000 0.000 0.009 0.000 lib/sqlalchemy/orm/loading.py:284(populate_state) + +The ORM's slowness in turning rows into ORM-mapped objects is a product +of the complexity of this operation combined with the overhead of cPython. +Common strategies to mitigate this include: + +* fetch individual columns instead of full entities, that is:: + + session.query(User.id, User.name) + + instead of:: + + session.query(User) + +* Use :class:`.Bundle` objects to organize column-based results:: + + u_b = Bundle('user', User.id, User.name) + a_b = Bundle('address', Address.id, Address.email) + + for user, address in session.query(u_b, a_b).join(User.addresses): + # ... + +* Use result caching - see :ref:`examples_caching` for an in-depth example + of this. + +* Consider a faster interpreter like that of Pypy. + +The output of a profile can be a little daunting but after some +practice they are very easy to read. + + +I'm inserting 400,000 rows with the ORM and it's really slow! +-------------------------------------------------------------- + +The SQLAlchemy ORM uses the :term:`unit of work` pattern when synchronizing +changes to the database. This pattern goes far beyond simple "inserts" +of data. It includes that attributes which are assigned on objects are +received using an attribute instrumentation system which tracks +changes on objects as they are made, includes that all rows inserted +are tracked in an identity map which has the effect that for each row +SQLAlchemy must retrieve its "last inserted id" if not already given, +and also involves that rows to be inserted are scanned and sorted for +dependencies as needed. Objects are also subject to a fair degree of +bookkeeping in order to keep all of this running, which for a very +large number of rows at once can create an inordinate amount of time +spent with large data structures, hence it's best to chunk these. + +Basically, unit of work is a large degree of automation in order to +automate the task of persisting a complex object graph into a +relational database with no explicit persistence code, and this +automation has a price. + +ORMs are basically not intended for high-performance bulk inserts - +this is the whole reason SQLAlchemy offers the Core in addition to the +ORM as a first-class component. + +For the use case of fast bulk inserts, the +SQL generation and execution system that the ORM builds on top of +is part of the :doc:`Core `. Using this system directly, we can produce an INSERT that +is competitive with using the raw database API directly. + +The example below illustrates time-based tests for several different +methods of inserting rows, going from the most automated to the least. +With cPython 2.7, runtimes observed:: + + classics-MacBook-Pro:sqlalchemy classic$ python test.py + SQLAlchemy ORM: Total time for 100000 records 14.3528850079 secs + SQLAlchemy ORM pk given: Total time for 100000 records 10.0164160728 secs + SQLAlchemy Core: Total time for 100000 records 0.775382995605 secs + sqlite3: Total time for 100000 records 0.676795005798 sec + +We can reduce the time by a factor of three using recent versions of `Pypy `_:: + + classics-MacBook-Pro:sqlalchemy classic$ /usr/local/src/pypy-2.1-beta2-osx64/bin/pypy test.py + SQLAlchemy ORM: Total time for 100000 records 5.88369488716 secs + SQLAlchemy ORM pk given: Total time for 100000 records 3.52294301987 secs + SQLAlchemy Core: Total time for 100000 records 0.613556146622 secs + sqlite3: Total time for 100000 records 0.442467927933 sec + +Script:: + + import time + import sqlite3 + + from sqlalchemy.ext.declarative import declarative_base + from sqlalchemy import Column, Integer, String, create_engine + from sqlalchemy.orm import scoped_session, sessionmaker + + Base = declarative_base() + DBSession = scoped_session(sessionmaker()) + engine = None + + + class Customer(Base): + __tablename__ = "customer" + id = Column(Integer, primary_key=True) + name = Column(String(255)) + + + def init_sqlalchemy(dbname='sqlite:///sqlalchemy.db'): + global engine + engine = create_engine(dbname, echo=False) + DBSession.remove() + DBSession.configure(bind=engine, autoflush=False, expire_on_commit=False) + Base.metadata.drop_all(engine) + Base.metadata.create_all(engine) + + + def test_sqlalchemy_orm(n=100000): + init_sqlalchemy() + t0 = time.time() + for i in xrange(n): + customer = Customer() + customer.name = 'NAME ' + str(i) + DBSession.add(customer) + if i % 1000 == 0: + DBSession.flush() + DBSession.commit() + print( + "SQLAlchemy ORM: Total time for " + str(n) + + " records " + str(time.time() - t0) + " secs") + + + def test_sqlalchemy_orm_pk_given(n=100000): + init_sqlalchemy() + t0 = time.time() + for i in xrange(n): + customer = Customer(id=i+1, name="NAME " + str(i)) + DBSession.add(customer) + if i % 1000 == 0: + DBSession.flush() + DBSession.commit() + print( + "SQLAlchemy ORM pk given: Total time for " + str(n) + + " records " + str(time.time() - t0) + " secs") + + + def test_sqlalchemy_core(n=100000): + init_sqlalchemy() + t0 = time.time() + engine.execute( + Customer.__table__.insert(), + [{"name": 'NAME ' + str(i)} for i in xrange(n)] + ) + print( + "SQLAlchemy Core: Total time for " + str(n) + + " records " + str(time.time() - t0) + " secs") + + + def init_sqlite3(dbname): + conn = sqlite3.connect(dbname) + c = conn.cursor() + c.execute("DROP TABLE IF EXISTS customer") + c.execute( + "CREATE TABLE customer (id INTEGER NOT NULL, " + "name VARCHAR(255), PRIMARY KEY(id))") + conn.commit() + return conn + + + def test_sqlite3(n=100000, dbname='sqlite3.db'): + conn = init_sqlite3(dbname) + c = conn.cursor() + t0 = time.time() + for i in xrange(n): + row = ('NAME ' + str(i),) + c.execute("INSERT INTO customer (name) VALUES (?)", row) + conn.commit() + print( + "sqlite3: Total time for " + str(n) + + " records " + str(time.time() - t0) + " sec") + + if __name__ == '__main__': + test_sqlalchemy_orm(100000) + test_sqlalchemy_orm_pk_given(100000) + test_sqlalchemy_core(100000) + test_sqlite3(100000) + diff --git a/doc/build/faq/sessions.rst b/doc/build/faq/sessions.rst new file mode 100644 index 0000000000..300b4bdbcc --- /dev/null +++ b/doc/build/faq/sessions.rst @@ -0,0 +1,363 @@ +Sessions / Queries +=================== + +.. contents:: + :local: + :class: faq + :backlinks: none + + +"This Session's transaction has been rolled back due to a previous exception during flush." (or similar) +--------------------------------------------------------------------------------------------------------- + +This is an error that occurs when a :meth:`.Session.flush` raises an exception, rolls back +the transaction, but further commands upon the `Session` are called without an +explicit call to :meth:`.Session.rollback` or :meth:`.Session.close`. + +It usually corresponds to an application that catches an exception +upon :meth:`.Session.flush` or :meth:`.Session.commit` and +does not properly handle the exception. For example:: + + from sqlalchemy import create_engine, Column, Integer + from sqlalchemy.orm import sessionmaker + from sqlalchemy.ext.declarative import declarative_base + + Base = declarative_base(create_engine('sqlite://')) + + class Foo(Base): + __tablename__ = 'foo' + id = Column(Integer, primary_key=True) + + Base.metadata.create_all() + + session = sessionmaker()() + + # constraint violation + session.add_all([Foo(id=1), Foo(id=1)]) + + try: + session.commit() + except: + # ignore error + pass + + # continue using session without rolling back + session.commit() + + +The usage of the :class:`.Session` should fit within a structure similar to this:: + + try: + + session.commit() + except: + session.rollback() + raise + finally: + session.close() # optional, depends on use case + +Many things can cause a failure within the try/except besides flushes. You +should always have some kind of "framing" of your session operations so that +connection and transaction resources have a definitive boundary, otherwise +your application doesn't really have its usage of resources under control. +This is not to say that you need to put try/except blocks all throughout your +application - on the contrary, this would be a terrible idea. You should +architect your application such that there is one (or few) point(s) of +"framing" around session operations. + +For a detailed discussion on how to organize usage of the :class:`.Session`, +please see :ref:`session_faq_whentocreate`. + +But why does flush() insist on issuing a ROLLBACK? +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +It would be great if :meth:`.Session.flush` could partially complete and then not roll +back, however this is beyond its current capabilities since its internal +bookkeeping would have to be modified such that it can be halted at any time +and be exactly consistent with what's been flushed to the database. While this +is theoretically possible, the usefulness of the enhancement is greatly +decreased by the fact that many database operations require a ROLLBACK in any +case. Postgres in particular has operations which, once failed, the +transaction is not allowed to continue:: + + test=> create table foo(id integer primary key); + NOTICE: CREATE TABLE / PRIMARY KEY will create implicit index "foo_pkey" for table "foo" + CREATE TABLE + test=> begin; + BEGIN + test=> insert into foo values(1); + INSERT 0 1 + test=> commit; + COMMIT + test=> begin; + BEGIN + test=> insert into foo values(1); + ERROR: duplicate key value violates unique constraint "foo_pkey" + test=> insert into foo values(2); + ERROR: current transaction is aborted, commands ignored until end of transaction block + +What SQLAlchemy offers that solves both issues is support of SAVEPOINT, via +:meth:`.Session.begin_nested`. Using :meth:`.Session.begin_nested`, you can frame an operation that may +potentially fail within a transaction, and then "roll back" to the point +before its failure while maintaining the enclosing transaction. + +But why isn't the one automatic call to ROLLBACK enough? Why must I ROLLBACK again? +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +This is again a matter of the :class:`.Session` providing a consistent interface and +refusing to guess about what context its being used. For example, the +:class:`.Session` supports "framing" above within multiple levels. Such as, suppose +you had a decorator ``@with_session()``, which did this:: + + def with_session(fn): + def go(*args, **kw): + session.begin(subtransactions=True) + try: + ret = fn(*args, **kw) + session.commit() + return ret + except: + session.rollback() + raise + return go + +The above decorator begins a transaction if one does not exist already, and +then commits it, if it were the creator. The "subtransactions" flag means that +if :meth:`.Session.begin` were already called by an enclosing function, nothing happens +except a counter is incremented - this counter is decremented when :meth:`.Session.commit` +is called and only when it goes back to zero does the actual COMMIT happen. It +allows this usage pattern:: + + @with_session + def one(): + # do stuff + two() + + + @with_session + def two(): + # etc. + + one() + + two() + +``one()`` can call ``two()``, or ``two()`` can be called by itself, and the +``@with_session`` decorator ensures the appropriate "framing" - the transaction +boundaries stay on the outermost call level. As you can see, if ``two()`` calls +``flush()`` which throws an exception and then issues a ``rollback()``, there will +*always* be a second ``rollback()`` performed by the decorator, and possibly a +third corresponding to two levels of decorator. If the ``flush()`` pushed the +``rollback()`` all the way out to the top of the stack, and then we said that +all remaining ``rollback()`` calls are moot, there is some silent behavior going +on there. A poorly written enclosing method might suppress the exception, and +then call ``commit()`` assuming nothing is wrong, and then you have a silent +failure condition. The main reason people get this error in fact is because +they didn't write clean "framing" code and they would have had other problems +down the road. + +If you think the above use case is a little exotic, the same kind of thing +comes into play if you want to SAVEPOINT- you might call ``begin_nested()`` +several times, and the ``commit()``/``rollback()`` calls each resolve the most +recent ``begin_nested()``. The meaning of ``rollback()`` or ``commit()`` is +dependent upon which enclosing block it is called, and you might have any +sequence of ``rollback()``/``commit()`` in any order, and its the level of nesting +that determines their behavior. + +In both of the above cases, if ``flush()`` broke the nesting of transaction +blocks, the behavior is, depending on scenario, anywhere from "magic" to +silent failure to blatant interruption of code flow. + +``flush()`` makes its own "subtransaction", so that a transaction is started up +regardless of the external transactional state, and when complete it calls +``commit()``, or ``rollback()`` upon failure - but that ``rollback()`` corresponds +to its own subtransaction - it doesn't want to guess how you'd like to handle +the external "framing" of the transaction, which could be nested many levels +with any combination of subtransactions and real SAVEPOINTs. The job of +starting/ending the "frame" is kept consistently with the code external to the +``flush()``, and we made a decision that this was the most consistent approach. + + + +How do I make a Query that always adds a certain filter to every query? +------------------------------------------------------------------------------------------------ + +See the recipe at `PreFilteredQuery `_. + +I've created a mapping against an Outer Join, and while the query returns rows, no objects are returned. Why not? +------------------------------------------------------------------------------------------------------------------ + +Rows returned by an outer join may contain NULL for part of the primary key, +as the primary key is the composite of both tables. The :class:`.Query` object ignores incoming rows +that don't have an acceptable primary key. Based on the setting of the ``allow_partial_pks`` +flag on :func:`.mapper`, a primary key is accepted if the value has at least one non-NULL +value, or alternatively if the value has no NULL values. See ``allow_partial_pks`` +at :func:`.mapper`. + + +I'm using ``joinedload()`` or ``lazy=False`` to create a JOIN/OUTER JOIN and SQLAlchemy is not constructing the correct query when I try to add a WHERE, ORDER BY, LIMIT, etc. (which relies upon the (OUTER) JOIN) +----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + +The joins generated by joined eager loading are only used to fully load related +collections, and are designed to have no impact on the primary results of the query. +Since they are anonymously aliased, they cannot be referenced directly. + +For detail on this beahvior, see :doc:`orm/loading`. + +Query has no ``__len__()``, why not? +------------------------------------ + +The Python ``__len__()`` magic method applied to an object allows the ``len()`` +builtin to be used to determine the length of the collection. It's intuitive +that a SQL query object would link ``__len__()`` to the :meth:`.Query.count` +method, which emits a `SELECT COUNT`. The reason this is not possible is +because evaluating the query as a list would incur two SQL calls instead of +one:: + + class Iterates(object): + def __len__(self): + print "LEN!" + return 5 + + def __iter__(self): + print "ITER!" + return iter([1, 2, 3, 4, 5]) + + list(Iterates()) + +output:: + + ITER! + LEN! + +How Do I use Textual SQL with ORM Queries? +------------------------------------------- + +See: + +* :ref:`orm_tutorial_literal_sql` - Ad-hoc textual blocks with :class:`.Query` + +* :ref:`session_sql_expressions` - Using :class:`.Session` with textual SQL directly. + +I'm calling ``Session.delete(myobject)`` and it isn't removed from the parent collection! +------------------------------------------------------------------------------------------ + +See :ref:`session_deleting_from_collections` for a description of this behavior. + +why isn't my ``__init__()`` called when I load objects? +------------------------------------------------------- + +See :ref:`mapping_constructors` for a description of this behavior. + +how do I use ON DELETE CASCADE with SA's ORM? +---------------------------------------------- + +SQLAlchemy will always issue UPDATE or DELETE statements for dependent +rows which are currently loaded in the :class:`.Session`. For rows which +are not loaded, it will by default issue SELECT statements to load +those rows and udpate/delete those as well; in other words it assumes +there is no ON DELETE CASCADE configured. +To configure SQLAlchemy to cooperate with ON DELETE CASCADE, see +:ref:`passive_deletes`. + +I set the "foo_id" attribute on my instance to "7", but the "foo" attribute is still ``None`` - shouldn't it have loaded Foo with id #7? +---------------------------------------------------------------------------------------------------------------------------------------------------- + +The ORM is not constructed in such a way as to support +immediate population of relationships driven from foreign +key attribute changes - instead, it is designed to work the +other way around - foreign key attributes are handled by the +ORM behind the scenes, the end user sets up object +relationships naturally. Therefore, the recommended way to +set ``o.foo`` is to do just that - set it!:: + + foo = Session.query(Foo).get(7) + o.foo = foo + Session.commit() + +Manipulation of foreign key attributes is of course entirely legal. However, +setting a foreign-key attribute to a new value currently does not trigger +an "expire" event of the :func:`.relationship` in which it's involved. This means +that for the following sequence:: + + o = Session.query(SomeClass).first() + assert o.foo is None # accessing an un-set attribute sets it to None + o.foo_id = 7 + +``o.foo`` is initialized to ``None`` when we first accessed it. Setting +``o.foo_id = 7`` will have the value of "7" as pending, but no flush +has occurred - so ``o.foo`` is still ``None``:: + + # attribute is already set to None, has not been + # reconciled with o.foo_id = 7 yet + assert o.foo is None + +For ``o.foo`` to load based on the foreign key mutation is usually achieved +naturally after the commit, which both flushes the new foreign key value +and expires all state:: + + Session.commit() # expires all attributes + + foo_7 = Session.query(Foo).get(7) + + assert o.foo is foo_7 # o.foo lazyloads on access + +A more minimal operation is to expire the attribute individually - this can +be performed for any :term:`persistent` object using :meth:`.Session.expire`:: + + o = Session.query(SomeClass).first() + o.foo_id = 7 + Session.expire(o, ['foo']) # object must be persistent for this + + foo_7 = Session.query(Foo).get(7) + + assert o.foo is foo_7 # o.foo lazyloads on access + +Note that if the object is not persistent but present in the :class:`.Session`, +it's known as :term:`pending`. This means the row for the object has not been +INSERTed into the database yet. For such an object, setting ``foo_id`` does not +have meaning until the row is inserted; otherwise there is no row yet:: + + new_obj = SomeClass() + new_obj.foo_id = 7 + + Session.add(new_obj) + + # accessing an un-set attribute sets it to None + assert new_obj.foo is None + + Session.flush() # emits INSERT + + # expire this because we already set .foo to None + Session.expire(o, ['foo']) + + assert new_obj.foo is foo_7 # now it loads + + +.. topic:: Attribute loading for non-persistent objects + + One variant on the "pending" behavior above is if we use the flag + ``load_on_pending`` on :func:`.relationship`. When this flag is set, the + lazy loader will emit for ``new_obj.foo`` before the INSERT proceeds; another + variant of this is to use the :meth:`.Session.enable_relationship_loading` + method, which can "attach" an object to a :class:`.Session` in such a way that + many-to-one relationships load as according to foreign key attributes + regardless of the object being in any particular state. + Both techniques are **not recommended for general use**; they were added to suit + specific programming scenarios encountered by users which involve the repurposing + of the ORM's usual object states. + +The recipe `ExpireRelationshipOnFKChange `_ features an example using SQLAlchemy events +in order to coordinate the setting of foreign key attributes with many-to-one +relationships. + +Is there a way to automagically have only unique keywords (or other kinds of objects) without doing a query for the keyword and getting a reference to the row containing that keyword? +--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + +When people read the many-to-many example in the docs, they get hit with the +fact that if you create the same ``Keyword`` twice, it gets put in the DB twice. +Which is somewhat inconvenient. + +This `UniqueObject `_ recipe was created to address this issue. + + diff --git a/doc/build/faq/sqlexpressions.rst b/doc/build/faq/sqlexpressions.rst new file mode 100644 index 0000000000..c3504218b1 --- /dev/null +++ b/doc/build/faq/sqlexpressions.rst @@ -0,0 +1,140 @@ +SQL Expressions +================= + +.. contents:: + :local: + :class: faq + :backlinks: none + +.. _faq_sql_expression_string: + +How do I render SQL expressions as strings, possibly with bound parameters inlined? +------------------------------------------------------------------------------------ + +The "stringification" of a SQLAlchemy statement or Query in the vast majority +of cases is as simple as:: + + print(str(statement)) + +this applies both to an ORM :class:`~.orm.query.Query` as well as any :func:`.select` or other +statement. Additionally, to get the statement as compiled to a +specific dialect or engine, if the statement itself is not already +bound to one you can pass this in to :meth:`.ClauseElement.compile`:: + + print(statement.compile(someengine)) + +or without an :class:`.Engine`:: + + from sqlalchemy.dialects import postgresql + print(statement.compile(dialect=postgresql.dialect())) + +When given an ORM :class:`~.orm.query.Query` object, in order to get at the +:meth:`.ClauseElement.compile` +method we only need access the :attr:`~.orm.query.Query.statement` +accessor first:: + + statement = query.statement + print(statement.compile(someengine)) + +The above forms will render the SQL statement as it is passed to the Python +:term:`DBAPI`, which includes that bound parameters are not rendered inline. +SQLAlchemy normally does not stringify bound parameters, as this is handled +appropriately by the Python DBAPI, not to mention bypassing bound +parameters is probably the most widely exploited security hole in +modern web applications. SQLAlchemy has limited ability to do this +stringification in certain circumstances such as that of emitting DDL. +In order to access this functionality one can use the ``literal_binds`` +flag, passed to ``compile_kwargs``:: + + from sqlalchemy.sql import table, column, select + + t = table('t', column('x')) + + s = select([t]).where(t.c.x == 5) + + print(s.compile(compile_kwargs={"literal_binds": True})) + +the above approach has the caveats that it is only supported for basic +types, such as ints and strings, and furthermore if a :func:`.bindparam` +without a pre-set value is used directly, it won't be able to +stringify that either. + +To support inline literal rendering for types not supported, implement +a :class:`.TypeDecorator` for the target type which includes a +:meth:`.TypeDecorator.process_literal_param` method:: + + from sqlalchemy import TypeDecorator, Integer + + + class MyFancyType(TypeDecorator): + impl = Integer + + def process_literal_param(self, value, dialect): + return "my_fancy_formatting(%s)" % value + + from sqlalchemy import Table, Column, MetaData + + tab = Table('mytable', MetaData(), Column('x', MyFancyType())) + + print( + tab.select().where(tab.c.x > 5).compile( + compile_kwargs={"literal_binds": True}) + ) + +producing output like:: + + SELECT mytable.x + FROM mytable + WHERE mytable.x > my_fancy_formatting(5) + + +Why does ``.col.in_([])`` Produce ``col != col``? Why not ``1=0``? +------------------------------------------------------------------- + +A little introduction to the issue. The IN operator in SQL, given a list of +elements to compare against a column, generally does not accept an empty list, +that is while it is valid to say:: + + column IN (1, 2, 3) + +it's not valid to say:: + + column IN () + +SQLAlchemy's :meth:`.Operators.in_` operator, when given an empty list, produces this +expression:: + + column != column + +As of version 0.6, it also produces a warning stating that a less efficient +comparison operation will be rendered. This expression is the only one that is +both database agnostic and produces correct results. + +For example, the naive approach of "just evaluate to false, by comparing 1=0 +or 1!=1", does not handle nulls properly. An expression like:: + + NOT column != column + +will not return a row when "column" is null, but an expression which does not +take the column into account:: + + NOT 1=0 + +will. + +Closer to the mark is the following CASE expression:: + + CASE WHEN column IS NOT NULL THEN 1=0 ELSE NULL END + +We don't use this expression due to its verbosity, and its also not +typically accepted by Oracle within a WHERE clause - depending +on how you phrase it, you'll either get "ORA-00905: missing keyword" or +"ORA-00920: invalid relational operator". It's also still less efficient than +just rendering SQL without the clause altogether (or not issuing the SQL at +all, if the statement is just a simple search). + +The best approach therefore is to avoid the usage of IN given an argument list +of zero length. Instead, don't emit the Query in the first place, if no rows +should be returned. The warning is best promoted to a full error condition +using the Python warnings filter (see http://docs.python.org/library/warnings.html). + diff --git a/doc/build/index.rst b/doc/build/index.rst index 5c9a62f3a3..497fa7857a 100644 --- a/doc/build/index.rst +++ b/doc/build/index.rst @@ -13,7 +13,7 @@ A high level view and getting set up. :doc:`Overview ` | :ref:`Installation Guide ` | -:doc:`Frequently Asked Questions ` | +:doc:`Frequently Asked Questions ` | :doc:`Migration from 0.8 ` | :doc:`Glossary ` | :doc:`Changelog catalog ` @@ -32,32 +32,23 @@ of Python objects, proceed first to the tutorial. * **ORM Configuration:** :doc:`Mapper Configuration ` | :doc:`Relationship Configuration ` | - :doc:`Inheritance Mapping ` | - :doc:`Advanced Collection Configuration ` * **Configuration Extensions:** - :doc:`Declarative Extension ` | + :doc:`Declarative Extension ` | :doc:`Association Proxy ` | :doc:`Hybrid Attributes ` | :doc:`Automap ` | - :doc:`Mutable Scalars ` | - :doc:`Ordered List ` + :doc:`Mutable Scalars ` * **ORM Usage:** :doc:`Session Usage and Guidelines ` | - :doc:`Query API reference ` | - :doc:`Relationship Loading Techniques ` + :doc:`Loading Objects ` * **Extending the ORM:** - :doc:`ORM Event Interfaces ` | - :doc:`Internals API ` + :doc:`ORM Events and Internals ` * **Other:** - :doc:`Introduction to Examples ` | - :doc:`Deprecated Event Interfaces ` | - :doc:`ORM Exceptions ` | - :doc:`Horizontal Sharding ` | - :doc:`Alternate Instrumentation ` + :doc:`Introduction to Examples ` SQLAlchemy Core =============== @@ -78,6 +69,7 @@ are documented here. In contrast to the ORM's domain-centric mode of usage, the :doc:`Connection Pooling ` * **Schema Definition:** + :doc:`Overview ` | :ref:`Tables and Columns ` | :ref:`Database Introspection (Reflection) ` | :ref:`Insert/Update Defaults ` | @@ -86,23 +78,15 @@ are documented here. In contrast to the ORM's domain-centric mode of usage, the * **Datatypes:** :ref:`Overview ` | - :ref:`Generic Types ` | - :ref:`SQL Standard Types ` | - :ref:`Vendor Specific Types ` | :ref:`Building Custom Types ` | - :ref:`Defining New Operators ` | :ref:`API ` -* **Extending the Core:** - :doc:`SQLAlchemy Events ` | +* **Core Basics:** + :doc:`Overview ` | + :doc:`Runtime Inspection API ` | + :doc:`Event System ` | :doc:`Core Event Interfaces ` | :doc:`Creating Custom SQL Constructs ` | - :doc:`Internals API ` - -* **Other:** - :doc:`Runtime Inspection API ` | - :doc:`core/interfaces` | - :doc:`core/exceptions` Dialect Documentation diff --git a/doc/build/orm/backref.rst b/doc/build/orm/backref.rst new file mode 100644 index 0000000000..16cfe5606a --- /dev/null +++ b/doc/build/orm/backref.rst @@ -0,0 +1,273 @@ +.. _relationships_backref: + +Linking Relationships with Backref +---------------------------------- + +The :paramref:`~.relationship.backref` keyword argument was first introduced in :ref:`ormtutorial_toplevel`, and has been +mentioned throughout many of the examples here. What does it actually do ? Let's start +with the canonical ``User`` and ``Address`` scenario:: + + from sqlalchemy import Integer, ForeignKey, String, Column + from sqlalchemy.ext.declarative import declarative_base + from sqlalchemy.orm import relationship + + Base = declarative_base() + + class User(Base): + __tablename__ = 'user' + id = Column(Integer, primary_key=True) + name = Column(String) + + addresses = relationship("Address", backref="user") + + class Address(Base): + __tablename__ = 'address' + id = Column(Integer, primary_key=True) + email = Column(String) + user_id = Column(Integer, ForeignKey('user.id')) + +The above configuration establishes a collection of ``Address`` objects on ``User`` called +``User.addresses``. It also establishes a ``.user`` attribute on ``Address`` which will +refer to the parent ``User`` object. + +In fact, the :paramref:`~.relationship.backref` keyword is only a common shortcut for placing a second +:func:`.relationship` onto the ``Address`` mapping, including the establishment +of an event listener on both sides which will mirror attribute operations +in both directions. The above configuration is equivalent to:: + + from sqlalchemy import Integer, ForeignKey, String, Column + from sqlalchemy.ext.declarative import declarative_base + from sqlalchemy.orm import relationship + + Base = declarative_base() + + class User(Base): + __tablename__ = 'user' + id = Column(Integer, primary_key=True) + name = Column(String) + + addresses = relationship("Address", back_populates="user") + + class Address(Base): + __tablename__ = 'address' + id = Column(Integer, primary_key=True) + email = Column(String) + user_id = Column(Integer, ForeignKey('user.id')) + + user = relationship("User", back_populates="addresses") + +Above, we add a ``.user`` relationship to ``Address`` explicitly. On +both relationships, the :paramref:`~.relationship.back_populates` directive tells each relationship +about the other one, indicating that they should establish "bidirectional" +behavior between each other. The primary effect of this configuration +is that the relationship adds event handlers to both attributes +which have the behavior of "when an append or set event occurs here, set ourselves +onto the incoming attribute using this particular attribute name". +The behavior is illustrated as follows. Start with a ``User`` and an ``Address`` +instance. The ``.addresses`` collection is empty, and the ``.user`` attribute +is ``None``:: + + >>> u1 = User() + >>> a1 = Address() + >>> u1.addresses + [] + >>> print a1.user + None + +However, once the ``Address`` is appended to the ``u1.addresses`` collection, +both the collection and the scalar attribute have been populated:: + + >>> u1.addresses.append(a1) + >>> u1.addresses + [<__main__.Address object at 0x12a6ed0>] + >>> a1.user + <__main__.User object at 0x12a6590> + +This behavior of course works in reverse for removal operations as well, as well +as for equivalent operations on both sides. Such as +when ``.user`` is set again to ``None``, the ``Address`` object is removed +from the reverse collection:: + + >>> a1.user = None + >>> u1.addresses + [] + +The manipulation of the ``.addresses`` collection and the ``.user`` attribute +occurs entirely in Python without any interaction with the SQL database. +Without this behavior, the proper state would be apparent on both sides once the +data has been flushed to the database, and later reloaded after a commit or +expiration operation occurs. The :paramref:`~.relationship.backref`/:paramref:`~.relationship.back_populates` behavior has the advantage +that common bidirectional operations can reflect the correct state without requiring +a database round trip. + +Remember, when the :paramref:`~.relationship.backref` keyword is used on a single relationship, it's +exactly the same as if the above two relationships were created individually +using :paramref:`~.relationship.back_populates` on each. + +Backref Arguments +~~~~~~~~~~~~~~~~~~ + +We've established that the :paramref:`~.relationship.backref` keyword is merely a shortcut for building +two individual :func:`.relationship` constructs that refer to each other. Part of +the behavior of this shortcut is that certain configurational arguments applied to +the :func:`.relationship` +will also be applied to the other direction - namely those arguments that describe +the relationship at a schema level, and are unlikely to be different in the reverse +direction. The usual case +here is a many-to-many :func:`.relationship` that has a :paramref:`~.relationship.secondary` argument, +or a one-to-many or many-to-one which has a :paramref:`~.relationship.primaryjoin` argument (the +:paramref:`~.relationship.primaryjoin` argument is discussed in :ref:`relationship_primaryjoin`). Such +as if we limited the list of ``Address`` objects to those which start with "tony":: + + from sqlalchemy import Integer, ForeignKey, String, Column + from sqlalchemy.ext.declarative import declarative_base + from sqlalchemy.orm import relationship + + Base = declarative_base() + + class User(Base): + __tablename__ = 'user' + id = Column(Integer, primary_key=True) + name = Column(String) + + addresses = relationship("Address", + primaryjoin="and_(User.id==Address.user_id, " + "Address.email.startswith('tony'))", + backref="user") + + class Address(Base): + __tablename__ = 'address' + id = Column(Integer, primary_key=True) + email = Column(String) + user_id = Column(Integer, ForeignKey('user.id')) + +We can observe, by inspecting the resulting property, that both sides +of the relationship have this join condition applied:: + + >>> print User.addresses.property.primaryjoin + "user".id = address.user_id AND address.email LIKE :email_1 || '%%' + >>> + >>> print Address.user.property.primaryjoin + "user".id = address.user_id AND address.email LIKE :email_1 || '%%' + >>> + +This reuse of arguments should pretty much do the "right thing" - it +uses only arguments that are applicable, and in the case of a many-to- +many relationship, will reverse the usage of +:paramref:`~.relationship.primaryjoin` and +:paramref:`~.relationship.secondaryjoin` to correspond to the other +direction (see the example in :ref:`self_referential_many_to_many` for +this). + +It's very often the case however that we'd like to specify arguments +that are specific to just the side where we happened to place the +"backref". This includes :func:`.relationship` arguments like +:paramref:`~.relationship.lazy`, +:paramref:`~.relationship.remote_side`, +:paramref:`~.relationship.cascade` and +:paramref:`~.relationship.cascade_backrefs`. For this case we use +the :func:`.backref` function in place of a string:: + + # + from sqlalchemy.orm import backref + + class User(Base): + __tablename__ = 'user' + id = Column(Integer, primary_key=True) + name = Column(String) + + addresses = relationship("Address", + backref=backref("user", lazy="joined")) + +Where above, we placed a ``lazy="joined"`` directive only on the ``Address.user`` +side, indicating that when a query against ``Address`` is made, a join to the ``User`` +entity should be made automatically which will populate the ``.user`` attribute of each +returned ``Address``. The :func:`.backref` function formatted the arguments we gave +it into a form that is interpreted by the receiving :func:`.relationship` as additional +arguments to be applied to the new relationship it creates. + +One Way Backrefs +~~~~~~~~~~~~~~~~~ + +An unusual case is that of the "one way backref". This is where the +"back-populating" behavior of the backref is only desirable in one +direction. An example of this is a collection which contains a +filtering :paramref:`~.relationship.primaryjoin` condition. We'd +like to append items to this collection as needed, and have them +populate the "parent" object on the incoming object. However, we'd +also like to have items that are not part of the collection, but still +have the same "parent" association - these items should never be in +the collection. + +Taking our previous example, where we established a +:paramref:`~.relationship.primaryjoin` that limited the collection +only to ``Address`` objects whose email address started with the word +``tony``, the usual backref behavior is that all items populate in +both directions. We wouldn't want this behavior for a case like the +following:: + + >>> u1 = User() + >>> a1 = Address(email='mary') + >>> a1.user = u1 + >>> u1.addresses + [<__main__.Address object at 0x1411910>] + +Above, the ``Address`` object that doesn't match the criterion of "starts with 'tony'" +is present in the ``addresses`` collection of ``u1``. After these objects are flushed, +the transaction committed and their attributes expired for a re-load, the ``addresses`` +collection will hit the database on next access and no longer have this ``Address`` object +present, due to the filtering condition. But we can do away with this unwanted side +of the "backref" behavior on the Python side by using two separate :func:`.relationship` constructs, +placing :paramref:`~.relationship.back_populates` only on one side:: + + from sqlalchemy import Integer, ForeignKey, String, Column + from sqlalchemy.ext.declarative import declarative_base + from sqlalchemy.orm import relationship + + Base = declarative_base() + + class User(Base): + __tablename__ = 'user' + id = Column(Integer, primary_key=True) + name = Column(String) + addresses = relationship("Address", + primaryjoin="and_(User.id==Address.user_id, " + "Address.email.startswith('tony'))", + back_populates="user") + + class Address(Base): + __tablename__ = 'address' + id = Column(Integer, primary_key=True) + email = Column(String) + user_id = Column(Integer, ForeignKey('user.id')) + user = relationship("User") + +With the above scenario, appending an ``Address`` object to the ``.addresses`` +collection of a ``User`` will always establish the ``.user`` attribute on that +``Address``:: + + >>> u1 = User() + >>> a1 = Address(email='tony') + >>> u1.addresses.append(a1) + >>> a1.user + <__main__.User object at 0x1411850> + +However, applying a ``User`` to the ``.user`` attribute of an ``Address``, +will not append the ``Address`` object to the collection:: + + >>> a2 = Address(email='mary') + >>> a2.user = u1 + >>> a2 in u1.addresses + False + +Of course, we've disabled some of the usefulness of +:paramref:`~.relationship.backref` here, in that when we do append an +``Address`` that corresponds to the criteria of +``email.startswith('tony')``, it won't show up in the +``User.addresses`` collection until the session is flushed, and the +attributes reloaded after a commit or expire operation. While we +could consider an attribute event that checks this criterion in +Python, this starts to cross the line of duplicating too much SQL +behavior in Python. The backref behavior itself is only a slight +transgression of this philosophy - SQLAlchemy tries to keep these to a +minimum overall. diff --git a/doc/build/orm/basic_relationships.rst b/doc/build/orm/basic_relationships.rst new file mode 100644 index 0000000000..9a7ad4fa22 --- /dev/null +++ b/doc/build/orm/basic_relationships.rst @@ -0,0 +1,313 @@ +.. _relationship_patterns: + +Basic Relationship Patterns +---------------------------- + +A quick walkthrough of the basic relational patterns. + +The imports used for each of the following sections is as follows:: + + from sqlalchemy import Table, Column, Integer, ForeignKey + from sqlalchemy.orm import relationship, backref + from sqlalchemy.ext.declarative import declarative_base + + Base = declarative_base() + + +One To Many +~~~~~~~~~~~~ + +A one to many relationship places a foreign key on the child table referencing +the parent. :func:`.relationship` is then specified on the parent, as referencing +a collection of items represented by the child:: + + class Parent(Base): + __tablename__ = 'parent' + id = Column(Integer, primary_key=True) + children = relationship("Child") + + class Child(Base): + __tablename__ = 'child' + id = Column(Integer, primary_key=True) + parent_id = Column(Integer, ForeignKey('parent.id')) + +To establish a bidirectional relationship in one-to-many, where the "reverse" +side is a many to one, specify the :paramref:`~.relationship.backref` option:: + + class Parent(Base): + __tablename__ = 'parent' + id = Column(Integer, primary_key=True) + children = relationship("Child", backref="parent") + + class Child(Base): + __tablename__ = 'child' + id = Column(Integer, primary_key=True) + parent_id = Column(Integer, ForeignKey('parent.id')) + +``Child`` will get a ``parent`` attribute with many-to-one semantics. + +Many To One +~~~~~~~~~~~~ + +Many to one places a foreign key in the parent table referencing the child. +:func:`.relationship` is declared on the parent, where a new scalar-holding +attribute will be created:: + + class Parent(Base): + __tablename__ = 'parent' + id = Column(Integer, primary_key=True) + child_id = Column(Integer, ForeignKey('child.id')) + child = relationship("Child") + + class Child(Base): + __tablename__ = 'child' + id = Column(Integer, primary_key=True) + +Bidirectional behavior is achieved by setting +:paramref:`~.relationship.backref` to the value ``"parents"``, which +will place a one-to-many collection on the ``Child`` class:: + + class Parent(Base): + __tablename__ = 'parent' + id = Column(Integer, primary_key=True) + child_id = Column(Integer, ForeignKey('child.id')) + child = relationship("Child", backref="parents") + +.. _relationships_one_to_one: + +One To One +~~~~~~~~~~~ + +One To One is essentially a bidirectional relationship with a scalar +attribute on both sides. To achieve this, the :paramref:`~.relationship.uselist` flag indicates +the placement of a scalar attribute instead of a collection on the "many" side +of the relationship. To convert one-to-many into one-to-one:: + + class Parent(Base): + __tablename__ = 'parent' + id = Column(Integer, primary_key=True) + child = relationship("Child", uselist=False, backref="parent") + + class Child(Base): + __tablename__ = 'child' + id = Column(Integer, primary_key=True) + parent_id = Column(Integer, ForeignKey('parent.id')) + +Or to turn a one-to-many backref into one-to-one, use the :func:`.backref` function +to provide arguments for the reverse side:: + + class Parent(Base): + __tablename__ = 'parent' + id = Column(Integer, primary_key=True) + child_id = Column(Integer, ForeignKey('child.id')) + child = relationship("Child", backref=backref("parent", uselist=False)) + + class Child(Base): + __tablename__ = 'child' + id = Column(Integer, primary_key=True) + +.. _relationships_many_to_many: + +Many To Many +~~~~~~~~~~~~~ + +Many to Many adds an association table between two classes. The association +table is indicated by the :paramref:`~.relationship.secondary` argument to +:func:`.relationship`. Usually, the :class:`.Table` uses the :class:`.MetaData` +object associated with the declarative base class, so that the :class:`.ForeignKey` +directives can locate the remote tables with which to link:: + + association_table = Table('association', Base.metadata, + Column('left_id', Integer, ForeignKey('left.id')), + Column('right_id', Integer, ForeignKey('right.id')) + ) + + class Parent(Base): + __tablename__ = 'left' + id = Column(Integer, primary_key=True) + children = relationship("Child", + secondary=association_table) + + class Child(Base): + __tablename__ = 'right' + id = Column(Integer, primary_key=True) + +For a bidirectional relationship, both sides of the relationship contain a +collection. The :paramref:`~.relationship.backref` keyword will automatically use +the same :paramref:`~.relationship.secondary` argument for the reverse relationship:: + + association_table = Table('association', Base.metadata, + Column('left_id', Integer, ForeignKey('left.id')), + Column('right_id', Integer, ForeignKey('right.id')) + ) + + class Parent(Base): + __tablename__ = 'left' + id = Column(Integer, primary_key=True) + children = relationship("Child", + secondary=association_table, + backref="parents") + + class Child(Base): + __tablename__ = 'right' + id = Column(Integer, primary_key=True) + +The :paramref:`~.relationship.secondary` argument of :func:`.relationship` also accepts a callable +that returns the ultimate argument, which is evaluated only when mappers are +first used. Using this, we can define the ``association_table`` at a later +point, as long as it's available to the callable after all module initialization +is complete:: + + class Parent(Base): + __tablename__ = 'left' + id = Column(Integer, primary_key=True) + children = relationship("Child", + secondary=lambda: association_table, + backref="parents") + +With the declarative extension in use, the traditional "string name of the table" +is accepted as well, matching the name of the table as stored in ``Base.metadata.tables``:: + + class Parent(Base): + __tablename__ = 'left' + id = Column(Integer, primary_key=True) + children = relationship("Child", + secondary="association", + backref="parents") + +.. _relationships_many_to_many_deletion: + +Deleting Rows from the Many to Many Table +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +A behavior which is unique to the :paramref:`~.relationship.secondary` argument to :func:`.relationship` +is that the :class:`.Table` which is specified here is automatically subject +to INSERT and DELETE statements, as objects are added or removed from the collection. +There is **no need to delete from this table manually**. The act of removing a +record from the collection will have the effect of the row being deleted on flush:: + + # row will be deleted from the "secondary" table + # automatically + myparent.children.remove(somechild) + +A question which often arises is how the row in the "secondary" table can be deleted +when the child object is handed directly to :meth:`.Session.delete`:: + + session.delete(somechild) + +There are several possibilities here: + +* If there is a :func:`.relationship` from ``Parent`` to ``Child``, but there is + **not** a reverse-relationship that links a particular ``Child`` to each ``Parent``, + SQLAlchemy will not have any awareness that when deleting this particular + ``Child`` object, it needs to maintain the "secondary" table that links it to + the ``Parent``. No delete of the "secondary" table will occur. +* If there is a relationship that links a particular ``Child`` to each ``Parent``, + suppose it's called ``Child.parents``, SQLAlchemy by default will load in + the ``Child.parents`` collection to locate all ``Parent`` objects, and remove + each row from the "secondary" table which establishes this link. Note that + this relationship does not need to be bidrectional; SQLAlchemy is strictly + looking at every :func:`.relationship` associated with the ``Child`` object + being deleted. +* A higher performing option here is to use ON DELETE CASCADE directives + with the foreign keys used by the database. Assuming the database supports + this feature, the database itself can be made to automatically delete rows in the + "secondary" table as referencing rows in "child" are deleted. SQLAlchemy + can be instructed to forego actively loading in the ``Child.parents`` + collection in this case using the :paramref:`~.relationship.passive_deletes` + directive on :func:`.relationship`; see :ref:`passive_deletes` for more details + on this. + +Note again, these behaviors are *only* relevant to the :paramref:`~.relationship.secondary` option +used with :func:`.relationship`. If dealing with association tables that +are mapped explicitly and are *not* present in the :paramref:`~.relationship.secondary` option +of a relevant :func:`.relationship`, cascade rules can be used instead +to automatically delete entities in reaction to a related entity being +deleted - see :ref:`unitofwork_cascades` for information on this feature. + + +.. _association_pattern: + +Association Object +~~~~~~~~~~~~~~~~~~ + +The association object pattern is a variant on many-to-many: it's used +when your association table contains additional columns beyond those +which are foreign keys to the left and right tables. Instead of using +the :paramref:`~.relationship.secondary` argument, you map a new class +directly to the association table. The left side of the relationship +references the association object via one-to-many, and the association +class references the right side via many-to-one. Below we illustrate +an association table mapped to the ``Association`` class which +includes a column called ``extra_data``, which is a string value that +is stored along with each association between ``Parent`` and +``Child``:: + + class Association(Base): + __tablename__ = 'association' + left_id = Column(Integer, ForeignKey('left.id'), primary_key=True) + right_id = Column(Integer, ForeignKey('right.id'), primary_key=True) + extra_data = Column(String(50)) + child = relationship("Child") + + class Parent(Base): + __tablename__ = 'left' + id = Column(Integer, primary_key=True) + children = relationship("Association") + + class Child(Base): + __tablename__ = 'right' + id = Column(Integer, primary_key=True) + +The bidirectional version adds backrefs to both relationships:: + + class Association(Base): + __tablename__ = 'association' + left_id = Column(Integer, ForeignKey('left.id'), primary_key=True) + right_id = Column(Integer, ForeignKey('right.id'), primary_key=True) + extra_data = Column(String(50)) + child = relationship("Child", backref="parent_assocs") + + class Parent(Base): + __tablename__ = 'left' + id = Column(Integer, primary_key=True) + children = relationship("Association", backref="parent") + + class Child(Base): + __tablename__ = 'right' + id = Column(Integer, primary_key=True) + +Working with the association pattern in its direct form requires that child +objects are associated with an association instance before being appended to +the parent; similarly, access from parent to child goes through the +association object:: + + # create parent, append a child via association + p = Parent() + a = Association(extra_data="some data") + a.child = Child() + p.children.append(a) + + # iterate through child objects via association, including association + # attributes + for assoc in p.children: + print assoc.extra_data + print assoc.child + +To enhance the association object pattern such that direct +access to the ``Association`` object is optional, SQLAlchemy +provides the :ref:`associationproxy_toplevel` extension. This +extension allows the configuration of attributes which will +access two "hops" with a single access, one "hop" to the +associated object, and a second to a target attribute. + +.. note:: + + When using the association object pattern, it is advisable that the + association-mapped table not be used as the + :paramref:`~.relationship.secondary` argument on a + :func:`.relationship` elsewhere, unless that :func:`.relationship` + contains the option :paramref:`~.relationship.viewonly` set to + ``True``. SQLAlchemy otherwise may attempt to emit redundant INSERT + and DELETE statements on the same table, if similar state is + detected on the related attribute as well as the associated object. diff --git a/doc/build/orm/cascades.rst b/doc/build/orm/cascades.rst new file mode 100644 index 0000000000..f645e6dae0 --- /dev/null +++ b/doc/build/orm/cascades.rst @@ -0,0 +1,372 @@ +.. _unitofwork_cascades: + +Cascades +======== + +Mappers support the concept of configurable :term:`cascade` behavior on +:func:`~sqlalchemy.orm.relationship` constructs. This refers +to how operations performed on a "parent" object relative to a +particular :class:`.Session` should be propagated to items +referred to by that relationship (e.g. "child" objects), and is +affected by the :paramref:`.relationship.cascade` option. + +The default behavior of cascade is limited to cascades of the +so-called :ref:`cascade_save_update` and :ref:`cascade_merge` settings. +The typical "alternative" setting for cascade is to add +the :ref:`cascade_delete` and :ref:`cascade_delete_orphan` options; +these settings are appropriate for related objects which only exist as +long as they are attached to their parent, and are otherwise deleted. + +Cascade behavior is configured using the by changing the +:paramref:`~.relationship.cascade` option on +:func:`~sqlalchemy.orm.relationship`:: + + class Order(Base): + __tablename__ = 'order' + + items = relationship("Item", cascade="all, delete-orphan") + customer = relationship("User", cascade="save-update") + +To set cascades on a backref, the same flag can be used with the +:func:`~.sqlalchemy.orm.backref` function, which ultimately feeds +its arguments back into :func:`~sqlalchemy.orm.relationship`:: + + class Item(Base): + __tablename__ = 'item' + + order = relationship("Order", + backref=backref("items", cascade="all, delete-orphan") + ) + +.. sidebar:: The Origins of Cascade + + SQLAlchemy's notion of cascading behavior on relationships, + as well as the options to configure them, are primarily derived + from the similar feature in the Hibernate ORM; Hibernate refers + to "cascade" in a few places such as in + `Example: Parent/Child `_. + If cascades are confusing, we'll refer to their conclusion, + stating "The sections we have just covered can be a bit confusing. + However, in practice, it all works out nicely." + +The default value of :paramref:`~.relationship.cascade` is ``save-update, merge``. +The typical alternative setting for this parameter is either +``all`` or more commonly ``all, delete-orphan``. The ``all`` symbol +is a synonym for ``save-update, merge, refresh-expire, expunge, delete``, +and using it in conjunction with ``delete-orphan`` indicates that the child +object should follow along with its parent in all cases, and be deleted once +it is no longer associated with that parent. + +The list of available values which can be specified for +the :paramref:`~.relationship.cascade` parameter are described in the following subsections. + +.. _cascade_save_update: + +save-update +----------- + +``save-update`` cascade indicates that when an object is placed into a +:class:`.Session` via :meth:`.Session.add`, all the objects associated +with it via this :func:`.relationship` should also be added to that +same :class:`.Session`. Suppose we have an object ``user1`` with two +related objects ``address1``, ``address2``:: + + >>> user1 = User() + >>> address1, address2 = Address(), Address() + >>> user1.addresses = [address1, address2] + +If we add ``user1`` to a :class:`.Session`, it will also add +``address1``, ``address2`` implicitly:: + + >>> sess = Session() + >>> sess.add(user1) + >>> address1 in sess + True + +``save-update`` cascade also affects attribute operations for objects +that are already present in a :class:`.Session`. If we add a third +object, ``address3`` to the ``user1.addresses`` collection, it +becomes part of the state of that :class:`.Session`:: + + >>> address3 = Address() + >>> user1.append(address3) + >>> address3 in sess + >>> True + +``save-update`` has the possibly surprising behavior which is that +persistent objects which were *removed* from a collection +or in some cases a scalar attribute +may also be pulled into the :class:`.Session` of a parent object; this is +so that the flush process may handle that related object appropriately. +This case can usually only arise if an object is removed from one :class:`.Session` +and added to another:: + + >>> user1 = sess1.query(User).filter_by(id=1).first() + >>> address1 = user1.addresses[0] + >>> sess1.close() # user1, address1 no longer associated with sess1 + >>> user1.addresses.remove(address1) # address1 no longer associated with user1 + >>> sess2 = Session() + >>> sess2.add(user1) # ... but it still gets added to the new session, + >>> address1 in sess2 # because it's still "pending" for flush + True + +The ``save-update`` cascade is on by default, and is typically taken +for granted; it simplifies code by allowing a single call to +:meth:`.Session.add` to register an entire structure of objects within +that :class:`.Session` at once. While it can be disabled, there +is usually not a need to do so. + +One case where ``save-update`` cascade does sometimes get in the way is in that +it takes place in both directions for bi-directional relationships, e.g. +backrefs, meaning that the association of a child object with a particular parent +can have the effect of the parent object being implicitly associated with that +child object's :class:`.Session`; this pattern, as well as how to modify its +behavior using the :paramref:`~.relationship.cascade_backrefs` flag, +is discussed in the section :ref:`backref_cascade`. + +.. _cascade_delete: + +delete +------ + +The ``delete`` cascade indicates that when a "parent" object +is marked for deletion, its related "child" objects should also be marked +for deletion. If for example we we have a relationship ``User.addresses`` +with ``delete`` cascade configured:: + + class User(Base): + # ... + + addresses = relationship("Address", cascade="save-update, merge, delete") + +If using the above mapping, we have a ``User`` object and two +related ``Address`` objects:: + + >>> user1 = sess.query(User).filter_by(id=1).first() + >>> address1, address2 = user1.addresses + +If we mark ``user1`` for deletion, after the flush operation proceeds, +``address1`` and ``address2`` will also be deleted: + +.. sourcecode:: python+sql + + >>> sess.delete(user1) + >>> sess.commit() + {opensql}DELETE FROM address WHERE address.id = ? + ((1,), (2,)) + DELETE FROM user WHERE user.id = ? + (1,) + COMMIT + +Alternatively, if our ``User.addresses`` relationship does *not* have +``delete`` cascade, SQLAlchemy's default behavior is to instead de-associate +``address1`` and ``address2`` from ``user1`` by setting their foreign key +reference to ``NULL``. Using a mapping as follows:: + + class User(Base): + # ... + + addresses = relationship("Address") + +Upon deletion of a parent ``User`` object, the rows in ``address`` are not +deleted, but are instead de-associated: + +.. sourcecode:: python+sql + + >>> sess.delete(user1) + >>> sess.commit() + {opensql}UPDATE address SET user_id=? WHERE address.id = ? + (None, 1) + UPDATE address SET user_id=? WHERE address.id = ? + (None, 2) + DELETE FROM user WHERE user.id = ? + (1,) + COMMIT + +``delete`` cascade is more often than not used in conjunction with +:ref:`cascade_delete_orphan` cascade, which will emit a DELETE for the related +row if the "child" object is deassociated from the parent. The combination +of ``delete`` and ``delete-orphan`` cascade covers both situations where +SQLAlchemy has to decide between setting a foreign key column to NULL versus +deleting the row entirely. + +.. topic:: ORM-level "delete" cascade vs. FOREIGN KEY level "ON DELETE" cascade + + The behavior of SQLAlchemy's "delete" cascade has a lot of overlap with the + ``ON DELETE CASCADE`` feature of a database foreign key, as well + as with that of the ``ON DELETE SET NULL`` foreign key setting when "delete" + cascade is not specified. Database level "ON DELETE" cascades are specific to the + "FOREIGN KEY" construct of the relational database; SQLAlchemy allows + configuration of these schema-level constructs at the :term:`DDL` level + using options on :class:`.ForeignKeyConstraint` which are described + at :ref:`on_update_on_delete`. + + It is important to note the differences between the ORM and the relational + database's notion of "cascade" as well as how they integrate: + + * A database level ``ON DELETE`` cascade is configured effectively + on the **many-to-one** side of the relationship; that is, we configure + it relative to the ``FOREIGN KEY`` constraint that is the "many" side + of a relationship. At the ORM level, **this direction is reversed**. + SQLAlchemy handles the deletion of "child" objects relative to a + "parent" from the "parent" side, which means that ``delete`` and + ``delete-orphan`` cascade are configured on the **one-to-many** + side. + + * Database level foreign keys with no ``ON DELETE`` setting + are often used to **prevent** a parent + row from being removed, as it would necessarily leave an unhandled + related row present. If this behavior is desired in a one-to-many + relationship, SQLAlchemy's default behavior of setting a foreign key + to ``NULL`` can be caught in one of two ways: + + * The easiest and most common is just to set the + foreign-key-holding column to ``NOT NULL`` at the database schema + level. An attempt by SQLAlchemy to set the column to NULL will + fail with a simple NOT NULL constraint exception. + + * The other, more special case way is to set the :paramref:`~.relationship.passive_deletes` + flag to the string ``"all"``. This has the effect of entirely + disabling SQLAlchemy's behavior of setting the foreign key column + to NULL, and a DELETE will be emitted for the parent row without + any affect on the child row, even if the child row is present + in memory. This may be desirable in the case when + database-level foreign key triggers, either special ``ON DELETE`` settings + or otherwise, need to be activated in all cases when a parent row is deleted. + + * Database level ``ON DELETE`` cascade is **vastly more efficient** + than that of SQLAlchemy. The database can chain a series of cascade + operations across many relationships at once; e.g. if row A is deleted, + all the related rows in table B can be deleted, and all the C rows related + to each of those B rows, and on and on, all within the scope of a single + DELETE statement. SQLAlchemy on the other hand, in order to support + the cascading delete operation fully, has to individually load each + related collection in order to target all rows that then may have further + related collections. That is, SQLAlchemy isn't sophisticated enough + to emit a DELETE for all those related rows at once within this context. + + * SQLAlchemy doesn't **need** to be this sophisticated, as we instead provide + smooth integration with the database's own ``ON DELETE`` functionality, + by using the :paramref:`~.relationship.passive_deletes` option in conjunction + with properly configured foreign key constraints. Under this behavior, + SQLAlchemy only emits DELETE for those rows that are already locally + present in the :class:`.Session`; for any collections that are unloaded, + it leaves them to the database to handle, rather than emitting a SELECT + for them. The section :ref:`passive_deletes` provides an example of this use. + + * While database-level ``ON DELETE`` functionality works only on the "many" + side of a relationship, SQLAlchemy's "delete" cascade + has **limited** ability to operate in the *reverse* direction as well, + meaning it can be configured on the "many" side to delete an object + on the "one" side when the reference on the "many" side is deleted. However + this can easily result in constraint violations if there are other objects + referring to this "one" side from the "many", so it typically is only + useful when a relationship is in fact a "one to one". The + :paramref:`~.relationship.single_parent` flag should be used to establish + an in-Python assertion for this case. + + +When using a :func:`.relationship` that also includes a many-to-many +table using the :paramref:`~.relationship.secondary` option, SQLAlchemy's +delete cascade handles the rows in this many-to-many table automatically. +Just like, as described in :ref:`relationships_many_to_many_deletion`, +the addition or removal of an object from a many-to-many collection +results in the INSERT or DELETE of a row in the many-to-many table, +the ``delete`` cascade, when activated as the result of a parent object +delete operation, will DELETE not just the row in the "child" table but also +in the many-to-many table. + +.. _cascade_delete_orphan: + +delete-orphan +------------- + +``delete-orphan`` cascade adds behavior to the ``delete`` cascade, +such that a child object will be marked for deletion when it is +de-associated from the parent, not just when the parent is marked +for deletion. This is a common feature when dealing with a related +object that is "owned" by its parent, with a NOT NULL foreign key, +so that removal of the item from the parent collection results +in its deletion. + +``delete-orphan`` cascade implies that each child object can only +have one parent at a time, so is configured in the vast majority of cases +on a one-to-many relationship. Setting it on a many-to-one or +many-to-many relationship is more awkward; for this use case, +SQLAlchemy requires that the :func:`~sqlalchemy.orm.relationship` +be configured with the :paramref:`~.relationship.single_parent` argument, +establishes Python-side validation that ensures the object +is associated with only one parent at a time. + +.. _cascade_merge: + +merge +----- + +``merge`` cascade indicates that the :meth:`.Session.merge` +operation should be propagated from a parent that's the subject +of the :meth:`.Session.merge` call down to referred objects. +This cascade is also on by default. + +.. _cascade_refresh_expire: + +refresh-expire +-------------- + +``refresh-expire`` is an uncommon option, indicating that the +:meth:`.Session.expire` operation should be propagated from a parent +down to referred objects. When using :meth:`.Session.refresh`, +the referred objects are expired only, but not actually refreshed. + +.. _cascade_expunge: + +expunge +------- + +``expunge`` cascade indicates that when the parent object is removed +from the :class:`.Session` using :meth:`.Session.expunge`, the +operation should be propagated down to referred objects. + +.. _backref_cascade: + +Controlling Cascade on Backrefs +------------------------------- + +The :ref:`cascade_save_update` cascade by default takes place on attribute change events +emitted from backrefs. This is probably a confusing statement more +easily described through demonstration; it means that, given a mapping such as this:: + + mapper(Order, order_table, properties={ + 'items' : relationship(Item, backref='order') + }) + +If an ``Order`` is already in the session, and is assigned to the ``order`` +attribute of an ``Item``, the backref appends the ``Order`` to the ``items`` +collection of that ``Order``, resulting in the ``save-update`` cascade taking +place:: + + >>> o1 = Order() + >>> session.add(o1) + >>> o1 in session + True + + >>> i1 = Item() + >>> i1.order = o1 + >>> i1 in o1.items + True + >>> i1 in session + True + +This behavior can be disabled using the :paramref:`~.relationship.cascade_backrefs` flag:: + + mapper(Order, order_table, properties={ + 'items' : relationship(Item, backref='order', + cascade_backrefs=False) + }) + +So above, the assignment of ``i1.order = o1`` will append ``i1`` to the ``items`` +collection of ``o1``, but will not add ``i1`` to the session. You can, of +course, :meth:`~.Session.add` ``i1`` to the session at a later point. This +option may be helpful for situations where an object needs to be kept out of a +session until it's construction is completed, but still needs to be given +associations to objects which are already persistent in the target session. diff --git a/doc/build/orm/classical.rst b/doc/build/orm/classical.rst new file mode 100644 index 0000000000..0f04586c73 --- /dev/null +++ b/doc/build/orm/classical.rst @@ -0,0 +1,68 @@ +.. _classical_mapping: + +Classical Mappings +================== + +A *Classical Mapping* refers to the configuration of a mapped class using the +:func:`.mapper` function, without using the Declarative system. As an example, +start with the declarative mapping introduced in :ref:`ormtutorial_toplevel`:: + + class User(Base): + __tablename__ = 'users' + + id = Column(Integer, primary_key=True) + name = Column(String) + fullname = Column(String) + password = Column(String) + +In "classical" form, the table metadata is created separately with the :class:`.Table` +construct, then associated with the ``User`` class via the :func:`.mapper` function:: + + from sqlalchemy import Table, MetaData, Column, ForeignKey, Integer, String + from sqlalchemy.orm import mapper + + metadata = MetaData() + + user = Table('user', metadata, + Column('id', Integer, primary_key=True), + Column('name', String(50)), + Column('fullname', String(50)), + Column('password', String(12)) + ) + + class User(object): + def __init__(self, name, fullname, password): + self.name = name + self.fullname = fullname + self.password = password + + mapper(User, user) + +Information about mapped attributes, such as relationships to other classes, are provided +via the ``properties`` dictionary. The example below illustrates a second :class:`.Table` +object, mapped to a class called ``Address``, then linked to ``User`` via :func:`.relationship`:: + + address = Table('address', metadata, + Column('id', Integer, primary_key=True), + Column('user_id', Integer, ForeignKey('user.id')), + Column('email_address', String(50)) + ) + + mapper(User, user, properties={ + 'addresses' : relationship(Address, backref='user', order_by=address.c.id) + }) + + mapper(Address, address) + +When using classical mappings, classes must be provided directly without the benefit +of the "string lookup" system provided by Declarative. SQL expressions are typically +specified in terms of the :class:`.Table` objects, i.e. ``address.c.id`` above +for the ``Address`` relationship, and not ``Address.id``, as ``Address`` may not +yet be linked to table metadata, nor can we specify a string here. + +Some examples in the documentation still use the classical approach, but note that +the classical as well as Declarative approaches are **fully interchangeable**. Both +systems ultimately create the same configuration, consisting of a :class:`.Table`, +user-defined class, linked together with a :func:`.mapper`. When we talk about +"the behavior of :func:`.mapper`", this includes when using the Declarative system +as well - it's still used, just behind the scenes. diff --git a/doc/build/orm/composites.rst b/doc/build/orm/composites.rst new file mode 100644 index 0000000000..1c42564b1c --- /dev/null +++ b/doc/build/orm/composites.rst @@ -0,0 +1,160 @@ +.. module:: sqlalchemy.orm + +.. _mapper_composite: + +Composite Column Types +======================= + +Sets of columns can be associated with a single user-defined datatype. The ORM +provides a single attribute which represents the group of columns using the +class you provide. + +.. versionchanged:: 0.7 + Composites have been simplified such that + they no longer "conceal" the underlying column based attributes. Additionally, + in-place mutation is no longer automatic; see the section below on + enabling mutability to support tracking of in-place changes. + +.. versionchanged:: 0.9 + Composites will return their object-form, rather than as individual columns, + when used in a column-oriented :class:`.Query` construct. See :ref:`migration_2824`. + +A simple example represents pairs of columns as a ``Point`` object. +``Point`` represents such a pair as ``.x`` and ``.y``:: + + class Point(object): + def __init__(self, x, y): + self.x = x + self.y = y + + def __composite_values__(self): + return self.x, self.y + + def __repr__(self): + return "Point(x=%r, y=%r)" % (self.x, self.y) + + def __eq__(self, other): + return isinstance(other, Point) and \ + other.x == self.x and \ + other.y == self.y + + def __ne__(self, other): + return not self.__eq__(other) + +The requirements for the custom datatype class are that it have a constructor +which accepts positional arguments corresponding to its column format, and +also provides a method ``__composite_values__()`` which returns the state of +the object as a list or tuple, in order of its column-based attributes. It +also should supply adequate ``__eq__()`` and ``__ne__()`` methods which test +the equality of two instances. + +We will create a mapping to a table ``vertice``, which represents two points +as ``x1/y1`` and ``x2/y2``. These are created normally as :class:`.Column` +objects. Then, the :func:`.composite` function is used to assign new +attributes that will represent sets of columns via the ``Point`` class:: + + from sqlalchemy import Column, Integer + from sqlalchemy.orm import composite + from sqlalchemy.ext.declarative import declarative_base + + Base = declarative_base() + + class Vertex(Base): + __tablename__ = 'vertice' + + id = Column(Integer, primary_key=True) + x1 = Column(Integer) + y1 = Column(Integer) + x2 = Column(Integer) + y2 = Column(Integer) + + start = composite(Point, x1, y1) + end = composite(Point, x2, y2) + +A classical mapping above would define each :func:`.composite` +against the existing table:: + + mapper(Vertex, vertice_table, properties={ + 'start':composite(Point, vertice_table.c.x1, vertice_table.c.y1), + 'end':composite(Point, vertice_table.c.x2, vertice_table.c.y2), + }) + +We can now persist and use ``Vertex`` instances, as well as query for them, +using the ``.start`` and ``.end`` attributes against ad-hoc ``Point`` instances: + +.. sourcecode:: python+sql + + >>> v = Vertex(start=Point(3, 4), end=Point(5, 6)) + >>> session.add(v) + >>> q = session.query(Vertex).filter(Vertex.start == Point(3, 4)) + {sql}>>> print q.first().start + BEGIN (implicit) + INSERT INTO vertice (x1, y1, x2, y2) VALUES (?, ?, ?, ?) + (3, 4, 5, 6) + SELECT vertice.id AS vertice_id, + vertice.x1 AS vertice_x1, + vertice.y1 AS vertice_y1, + vertice.x2 AS vertice_x2, + vertice.y2 AS vertice_y2 + FROM vertice + WHERE vertice.x1 = ? AND vertice.y1 = ? + LIMIT ? OFFSET ? + (3, 4, 1, 0) + {stop}Point(x=3, y=4) + +.. autofunction:: composite + + +Tracking In-Place Mutations on Composites +----------------------------------------- + +In-place changes to an existing composite value are +not tracked automatically. Instead, the composite class needs to provide +events to its parent object explicitly. This task is largely automated +via the usage of the :class:`.MutableComposite` mixin, which uses events +to associate each user-defined composite object with all parent associations. +Please see the example in :ref:`mutable_composites`. + +.. versionchanged:: 0.7 + In-place changes to an existing composite value are no longer + tracked automatically; the functionality is superseded by the + :class:`.MutableComposite` class. + +.. _composite_operations: + +Redefining Comparison Operations for Composites +----------------------------------------------- + +The "equals" comparison operation by default produces an AND of all +corresponding columns equated to one another. This can be changed using +the ``comparator_factory`` argument to :func:`.composite`, where we +specify a custom :class:`.CompositeProperty.Comparator` class +to define existing or new operations. +Below we illustrate the "greater than" operator, implementing +the same expression that the base "greater than" does:: + + from sqlalchemy.orm.properties import CompositeProperty + from sqlalchemy import sql + + class PointComparator(CompositeProperty.Comparator): + def __gt__(self, other): + """redefine the 'greater than' operation""" + + return sql.and_(*[a>b for a, b in + zip(self.__clause_element__().clauses, + other.__composite_values__())]) + + class Vertex(Base): + ___tablename__ = 'vertice' + + id = Column(Integer, primary_key=True) + x1 = Column(Integer) + y1 = Column(Integer) + x2 = Column(Integer) + y2 = Column(Integer) + + start = composite(Point, x1, y1, + comparator_factory=PointComparator) + end = composite(Point, x2, y2, + comparator_factory=PointComparator) + diff --git a/doc/build/orm/constructors.rst b/doc/build/orm/constructors.rst new file mode 100644 index 0000000000..ab66915530 --- /dev/null +++ b/doc/build/orm/constructors.rst @@ -0,0 +1,56 @@ +.. _mapping_constructors: + +Constructors and Object Initialization +======================================= + +Mapping imposes no restrictions or requirements on the constructor +(``__init__``) method for the class. You are free to require any arguments for +the function that you wish, assign attributes to the instance that are unknown +to the ORM, and generally do anything else you would normally do when writing +a constructor for a Python class. + +The SQLAlchemy ORM does not call ``__init__`` when recreating objects from +database rows. The ORM's process is somewhat akin to the Python standard +library's ``pickle`` module, invoking the low level ``__new__`` method and +then quietly restoring attributes directly on the instance rather than calling +``__init__``. + +If you need to do some setup on database-loaded instances before they're ready +to use, you can use the ``@reconstructor`` decorator to tag a method as the +ORM counterpart to ``__init__``. SQLAlchemy will call this method with no +arguments every time it loads or reconstructs one of your instances. This is +useful for recreating transient properties that are normally assigned in your +``__init__``:: + + from sqlalchemy import orm + + class MyMappedClass(object): + def __init__(self, data): + self.data = data + # we need stuff on all instances, but not in the database. + self.stuff = [] + + @orm.reconstructor + def init_on_load(self): + self.stuff = [] + +When ``obj = MyMappedClass()`` is executed, Python calls the ``__init__`` +method as normal and the ``data`` argument is required. When instances are +loaded during a :class:`~sqlalchemy.orm.query.Query` operation as in +``query(MyMappedClass).one()``, ``init_on_load`` is called. + +Any method may be tagged as the :func:`~sqlalchemy.orm.reconstructor`, even +the ``__init__`` method. SQLAlchemy will call the reconstructor method with no +arguments. Scalar (non-collection) database-mapped attributes of the instance +will be available for use within the function. Eagerly-loaded collections are +generally not yet available and will usually only contain the first element. +ORM state changes made to objects at this stage will not be recorded for the +next flush() operation, so the activity within a reconstructor should be +conservative. + +:func:`~sqlalchemy.orm.reconstructor` is a shortcut into a larger system +of "instance level" events, which can be subscribed to using the +event API - see :class:`.InstanceEvents` for the full API description +of these events. + +.. autofunction:: reconstructor diff --git a/doc/build/orm/contextual.rst b/doc/build/orm/contextual.rst new file mode 100644 index 0000000000..cc7016f801 --- /dev/null +++ b/doc/build/orm/contextual.rst @@ -0,0 +1,260 @@ +.. _unitofwork_contextual: + +Contextual/Thread-local Sessions +================================= + +Recall from the section :ref:`session_faq_whentocreate`, the concept of +"session scopes" was introduced, with an emphasis on web applications +and the practice of linking the scope of a :class:`.Session` with that +of a web request. Most modern web frameworks include integration tools +so that the scope of the :class:`.Session` can be managed automatically, +and these tools should be used as they are available. + +SQLAlchemy includes its own helper object, which helps with the establishment +of user-defined :class:`.Session` scopes. It is also used by third-party +integration systems to help construct their integration schemes. + +The object is the :class:`.scoped_session` object, and it represents a +**registry** of :class:`.Session` objects. If you're not familiar with the +registry pattern, a good introduction can be found in `Patterns of Enterprise +Architecture `_. + +.. note:: + + The :class:`.scoped_session` object is a very popular and useful object + used by many SQLAlchemy applications. However, it is important to note + that it presents **only one approach** to the issue of :class:`.Session` + management. If you're new to SQLAlchemy, and especially if the + term "thread-local variable" seems strange to you, we recommend that + if possible you familiarize first with an off-the-shelf integration + system such as `Flask-SQLAlchemy `_ + or `zope.sqlalchemy `_. + +A :class:`.scoped_session` is constructed by calling it, passing it a +**factory** which can create new :class:`.Session` objects. A factory +is just something that produces a new object when called, and in the +case of :class:`.Session`, the most common factory is the :class:`.sessionmaker`, +introduced earlier in this section. Below we illustrate this usage:: + + >>> from sqlalchemy.orm import scoped_session + >>> from sqlalchemy.orm import sessionmaker + + >>> session_factory = sessionmaker(bind=some_engine) + >>> Session = scoped_session(session_factory) + +The :class:`.scoped_session` object we've created will now call upon the +:class:`.sessionmaker` when we "call" the registry:: + + >>> some_session = Session() + +Above, ``some_session`` is an instance of :class:`.Session`, which we +can now use to talk to the database. This same :class:`.Session` is also +present within the :class:`.scoped_session` registry we've created. If +we call upon the registry a second time, we get back the **same** :class:`.Session`:: + + >>> some_other_session = Session() + >>> some_session is some_other_session + True + +This pattern allows disparate sections of the application to call upon a global +:class:`.scoped_session`, so that all those areas may share the same session +without the need to pass it explicitly. The :class:`.Session` we've established +in our registry will remain, until we explicitly tell our registry to dispose of it, +by calling :meth:`.scoped_session.remove`:: + + >>> Session.remove() + +The :meth:`.scoped_session.remove` method first calls :meth:`.Session.close` on +the current :class:`.Session`, which has the effect of releasing any connection/transactional +resources owned by the :class:`.Session` first, then discarding the :class:`.Session` +itself. "Releasing" here means that connections are returned to their connection pool and any transactional state is rolled back, ultimately using the ``rollback()`` method of the underlying DBAPI connection. + +At this point, the :class:`.scoped_session` object is "empty", and will create +a **new** :class:`.Session` when called again. As illustrated below, this +is not the same :class:`.Session` we had before:: + + >>> new_session = Session() + >>> new_session is some_session + False + +The above series of steps illustrates the idea of the "registry" pattern in a +nutshell. With that basic idea in hand, we can discuss some of the details +of how this pattern proceeds. + +Implicit Method Access +---------------------- + +The job of the :class:`.scoped_session` is simple; hold onto a :class:`.Session` +for all who ask for it. As a means of producing more transparent access to this +:class:`.Session`, the :class:`.scoped_session` also includes **proxy behavior**, +meaning that the registry itself can be treated just like a :class:`.Session` +directly; when methods are called on this object, they are **proxied** to the +underlying :class:`.Session` being maintained by the registry:: + + Session = scoped_session(some_factory) + + # equivalent to: + # + # session = Session() + # print session.query(MyClass).all() + # + print Session.query(MyClass).all() + +The above code accomplishes the same task as that of acquiring the current +:class:`.Session` by calling upon the registry, then using that :class:`.Session`. + +Thread-Local Scope +------------------ + +Users who are familiar with multithreaded programming will note that representing +anything as a global variable is usually a bad idea, as it implies that the +global object will be accessed by many threads concurrently. The :class:`.Session` +object is entirely designed to be used in a **non-concurrent** fashion, which +in terms of multithreading means "only in one thread at a time". So our +above example of :class:`.scoped_session` usage, where the same :class:`.Session` +object is maintained across multiple calls, suggests that some process needs +to be in place such that mutltiple calls across many threads don't actually get +a handle to the same session. We call this notion **thread local storage**, +which means, a special object is used that will maintain a distinct object +per each application thread. Python provides this via the +`threading.local() `_ +construct. The :class:`.scoped_session` object by default uses this object +as storage, so that a single :class:`.Session` is maintained for all who call +upon the :class:`.scoped_session` registry, but only within the scope of a single +thread. Callers who call upon the registry in a different thread get a +:class:`.Session` instance that is local to that other thread. + +Using this technique, the :class:`.scoped_session` provides a quick and relatively +simple (if one is familiar with thread-local storage) way of providing +a single, global object in an application that is safe to be called upon +from multiple threads. + +The :meth:`.scoped_session.remove` method, as always, removes the current +:class:`.Session` associated with the thread, if any. However, one advantage of the +``threading.local()`` object is that if the application thread itself ends, the +"storage" for that thread is also garbage collected. So it is in fact "safe" to +use thread local scope with an application that spawns and tears down threads, +without the need to call :meth:`.scoped_session.remove`. However, the scope +of transactions themselves, i.e. ending them via :meth:`.Session.commit` or +:meth:`.Session.rollback`, will usually still be something that must be explicitly +arranged for at the appropriate time, unless the application actually ties the +lifespan of a thread to the lifespan of a transaction. + +.. _session_lifespan: + +Using Thread-Local Scope with Web Applications +---------------------------------------------- + +As discussed in the section :ref:`session_faq_whentocreate`, a web application +is architected around the concept of a **web request**, and integrating +such an application with the :class:`.Session` usually implies that the :class:`.Session` +will be associated with that request. As it turns out, most Python web frameworks, +with notable exceptions such as the asynchronous frameworks Twisted and +Tornado, use threads in a simple way, such that a particular web request is received, +processed, and completed within the scope of a single *worker thread*. When +the request ends, the worker thread is released to a pool of workers where it +is available to handle another request. + +This simple correspondence of web request and thread means that to associate a +:class:`.Session` with a thread implies it is also associated with the web request +running within that thread, and vice versa, provided that the :class:`.Session` is +created only after the web request begins and torn down just before the web request ends. +So it is a common practice to use :class:`.scoped_session` as a quick way +to integrate the :class:`.Session` with a web application. The sequence +diagram below illustrates this flow:: + + Web Server Web Framework SQLAlchemy ORM Code + -------------- -------------- ------------------------------ + startup -> Web framework # Session registry is established + initializes Session = scoped_session(sessionmaker()) + + incoming + web request -> web request -> # The registry is *optionally* + starts # called upon explicitly to create + # a Session local to the thread and/or request + Session() + + # the Session registry can otherwise + # be used at any time, creating the + # request-local Session() if not present, + # or returning the existing one + Session.query(MyClass) # ... + + Session.add(some_object) # ... + + # if data was modified, commit the + # transaction + Session.commit() + + web request ends -> # the registry is instructed to + # remove the Session + Session.remove() + + sends output <- + outgoing web <- + response + +Using the above flow, the process of integrating the :class:`.Session` with the +web application has exactly two requirements: + +1. Create a single :class:`.scoped_session` registry when the web application + first starts, ensuring that this object is accessible by the rest of the + application. +2. Ensure that :meth:`.scoped_session.remove` is called when the web request ends, + usually by integrating with the web framework's event system to establish + an "on request end" event. + +As noted earlier, the above pattern is **just one potential way** to integrate a :class:`.Session` +with a web framework, one which in particular makes the significant assumption +that the **web framework associates web requests with application threads**. It is +however **strongly recommended that the integration tools provided with the web framework +itself be used, if available**, instead of :class:`.scoped_session`. + +In particular, while using a thread local can be convenient, it is preferable that the :class:`.Session` be +associated **directly with the request**, rather than with +the current thread. The next section on custom scopes details a more advanced configuration +which can combine the usage of :class:`.scoped_session` with direct request based scope, or +any kind of scope. + +Using Custom Created Scopes +--------------------------- + +The :class:`.scoped_session` object's default behavior of "thread local" scope is only +one of many options on how to "scope" a :class:`.Session`. A custom scope can be defined +based on any existing system of getting at "the current thing we are working with". + +Suppose a web framework defines a library function ``get_current_request()``. An application +built using this framework can call this function at any time, and the result will be +some kind of ``Request`` object that represents the current request being processed. +If the ``Request`` object is hashable, then this function can be easily integrated with +:class:`.scoped_session` to associate the :class:`.Session` with the request. Below we illustrate +this in conjunction with a hypothetical event marker provided by the web framework +``on_request_end``, which allows code to be invoked whenever a request ends:: + + from my_web_framework import get_current_request, on_request_end + from sqlalchemy.orm import scoped_session, sessionmaker + + Session = scoped_session(sessionmaker(bind=some_engine), scopefunc=get_current_request) + + @on_request_end + def remove_session(req): + Session.remove() + +Above, we instantiate :class:`.scoped_session` in the usual way, except that we pass +our request-returning function as the "scopefunc". This instructs :class:`.scoped_session` +to use this function to generate a dictionary key whenever the registry is called upon +to return the current :class:`.Session`. In this case it is particularly important +that we ensure a reliable "remove" system is implemented, as this dictionary is not +otherwise self-managed. + + +Contextual Session API +---------------------- + +.. autoclass:: sqlalchemy.orm.scoping.scoped_session + :members: + +.. autoclass:: sqlalchemy.util.ScopedRegistry + :members: + +.. autoclass:: sqlalchemy.util.ThreadLocalRegistry diff --git a/doc/build/orm/extending.rst b/doc/build/orm/extending.rst new file mode 100644 index 0000000000..4b2b86f621 --- /dev/null +++ b/doc/build/orm/extending.rst @@ -0,0 +1,12 @@ +==================== +Events and Internals +==================== + +.. toctree:: + :maxdepth: 2 + + events + internals + exceptions + deprecated + diff --git a/doc/build/orm/extensions/declarative.rst b/doc/build/orm/extensions/declarative.rst deleted file mode 100644 index 636bb451b8..0000000000 --- a/doc/build/orm/extensions/declarative.rst +++ /dev/null @@ -1,32 +0,0 @@ -.. _declarative_toplevel: - -Declarative -=========== - -.. automodule:: sqlalchemy.ext.declarative - -API Reference -------------- - -.. autofunction:: declarative_base - -.. autofunction:: as_declarative - -.. autoclass:: declared_attr - -.. autofunction:: sqlalchemy.ext.declarative.api._declarative_constructor - -.. autofunction:: has_inherited_table - -.. autofunction:: synonym_for - -.. autofunction:: comparable_using - -.. autofunction:: instrument_declarative - -.. autoclass:: AbstractConcreteBase - -.. autoclass:: ConcreteBase - -.. autoclass:: DeferredReflection - :members: diff --git a/doc/build/orm/extensions/declarative/api.rst b/doc/build/orm/extensions/declarative/api.rst new file mode 100644 index 0000000000..6daf39ef72 --- /dev/null +++ b/doc/build/orm/extensions/declarative/api.rst @@ -0,0 +1,112 @@ +.. automodule:: sqlalchemy.ext.declarative + +=============== +Declarative API +=============== + +API Reference +============= + +.. autofunction:: declarative_base + +.. autoclass:: declared_attr + :members: + +.. autofunction:: sqlalchemy.ext.declarative.api._declarative_constructor + +.. autofunction:: has_inherited_table + +.. autofunction:: synonym_for + +.. autofunction:: comparable_using + +.. autofunction:: instrument_declarative + +.. autoclass:: AbstractConcreteBase + +.. autoclass:: ConcreteBase + +.. autoclass:: DeferredReflection + :members: + + +Special Directives +------------------ + +``__declare_last__()`` +~~~~~~~~~~~~~~~~~~~~~~ + +The ``__declare_last__()`` hook allows definition of +a class level function that is automatically called by the +:meth:`.MapperEvents.after_configured` event, which occurs after mappings are +assumed to be completed and the 'configure' step has finished:: + + class MyClass(Base): + @classmethod + def __declare_last__(cls): + "" + # do something with mappings + +.. versionadded:: 0.7.3 + +``__declare_first__()`` +~~~~~~~~~~~~~~~~~~~~~~~ + +Like ``__declare_last__()``, but is called at the beginning of mapper +configuration via the :meth:`.MapperEvents.before_configured` event:: + + class MyClass(Base): + @classmethod + def __declare_first__(cls): + "" + # do something before mappings are configured + +.. versionadded:: 0.9.3 + +.. _declarative_abstract: + +``__abstract__`` +~~~~~~~~~~~~~~~~~~~ + +``__abstract__`` causes declarative to skip the production +of a table or mapper for the class entirely. A class can be added within a +hierarchy in the same way as mixin (see :ref:`declarative_mixins`), allowing +subclasses to extend just from the special class:: + + class SomeAbstractBase(Base): + __abstract__ = True + + def some_helpful_method(self): + "" + + @declared_attr + def __mapper_args__(cls): + return {"helpful mapper arguments":True} + + class MyMappedClass(SomeAbstractBase): + "" + +One possible use of ``__abstract__`` is to use a distinct +:class:`.MetaData` for different bases:: + + Base = declarative_base() + + class DefaultBase(Base): + __abstract__ = True + metadata = MetaData() + + class OtherBase(Base): + __abstract__ = True + metadata = MetaData() + +Above, classes which inherit from ``DefaultBase`` will use one +:class:`.MetaData` as the registry of tables, and those which inherit from +``OtherBase`` will use a different one. The tables themselves can then be +created perhaps within distinct databases:: + + DefaultBase.metadata.create_all(some_engine) + OtherBase.metadata_create_all(some_other_engine) + +.. versionadded:: 0.7.3 + + diff --git a/doc/build/orm/extensions/declarative/basic_use.rst b/doc/build/orm/extensions/declarative/basic_use.rst new file mode 100644 index 0000000000..10b79e5a6f --- /dev/null +++ b/doc/build/orm/extensions/declarative/basic_use.rst @@ -0,0 +1,133 @@ +========= +Basic Use +========= + +SQLAlchemy object-relational configuration involves the +combination of :class:`.Table`, :func:`.mapper`, and class +objects to define a mapped class. +:mod:`~sqlalchemy.ext.declarative` allows all three to be +expressed at once within the class declaration. As much as +possible, regular SQLAlchemy schema and ORM constructs are +used directly, so that configuration between "classical" ORM +usage and declarative remain highly similar. + +As a simple example:: + + from sqlalchemy.ext.declarative import declarative_base + + Base = declarative_base() + + class SomeClass(Base): + __tablename__ = 'some_table' + id = Column(Integer, primary_key=True) + name = Column(String(50)) + +Above, the :func:`declarative_base` callable returns a new base class from +which all mapped classes should inherit. When the class definition is +completed, a new :class:`.Table` and :func:`.mapper` will have been generated. + +The resulting table and mapper are accessible via +``__table__`` and ``__mapper__`` attributes on the +``SomeClass`` class:: + + # access the mapped Table + SomeClass.__table__ + + # access the Mapper + SomeClass.__mapper__ + +Defining Attributes +=================== + +In the previous example, the :class:`.Column` objects are +automatically named with the name of the attribute to which they are +assigned. + +To name columns explicitly with a name distinct from their mapped attribute, +just give the column a name. Below, column "some_table_id" is mapped to the +"id" attribute of `SomeClass`, but in SQL will be represented as +"some_table_id":: + + class SomeClass(Base): + __tablename__ = 'some_table' + id = Column("some_table_id", Integer, primary_key=True) + +Attributes may be added to the class after its construction, and they will be +added to the underlying :class:`.Table` and +:func:`.mapper` definitions as appropriate:: + + SomeClass.data = Column('data', Unicode) + SomeClass.related = relationship(RelatedInfo) + +Classes which are constructed using declarative can interact freely +with classes that are mapped explicitly with :func:`.mapper`. + +It is recommended, though not required, that all tables +share the same underlying :class:`~sqlalchemy.schema.MetaData` object, +so that string-configured :class:`~sqlalchemy.schema.ForeignKey` +references can be resolved without issue. + +Accessing the MetaData +======================= + +The :func:`declarative_base` base class contains a +:class:`.MetaData` object where newly defined +:class:`.Table` objects are collected. This object is +intended to be accessed directly for +:class:`.MetaData`-specific operations. Such as, to issue +CREATE statements for all tables:: + + engine = create_engine('sqlite://') + Base.metadata.create_all(engine) + +:func:`declarative_base` can also receive a pre-existing +:class:`.MetaData` object, which allows a +declarative setup to be associated with an already +existing traditional collection of :class:`~sqlalchemy.schema.Table` +objects:: + + mymetadata = MetaData() + Base = declarative_base(metadata=mymetadata) + + +Class Constructor +================= + +As a convenience feature, the :func:`declarative_base` sets a default +constructor on classes which takes keyword arguments, and assigns them +to the named attributes:: + + e = Engineer(primary_language='python') + +Mapper Configuration +==================== + +Declarative makes use of the :func:`~.orm.mapper` function internally +when it creates the mapping to the declared table. The options +for :func:`~.orm.mapper` are passed directly through via the +``__mapper_args__`` class attribute. As always, arguments which reference +locally mapped columns can reference them directly from within the +class declaration:: + + from datetime import datetime + + class Widget(Base): + __tablename__ = 'widgets' + + id = Column(Integer, primary_key=True) + timestamp = Column(DateTime, nullable=False) + + __mapper_args__ = { + 'version_id_col': timestamp, + 'version_id_generator': lambda v:datetime.now() + } + + +.. _declarative_sql_expressions: + +Defining SQL Expressions +======================== + +See :ref:`mapper_sql_expressions` for examples on declaratively +mapping attributes to SQL expressions. + diff --git a/doc/build/orm/extensions/declarative/index.rst b/doc/build/orm/extensions/declarative/index.rst new file mode 100644 index 0000000000..dc4f392f39 --- /dev/null +++ b/doc/build/orm/extensions/declarative/index.rst @@ -0,0 +1,32 @@ +.. _declarative_toplevel: + +=========== +Declarative +=========== + +The Declarative system is the typically used system provided by the SQLAlchemy +ORM in order to define classes mapped to relational database tables. However, +as noted in :ref:`classical_mapping`, Declarative is in fact a series of +extensions that ride on top of the SQLAlchemy :func:`.mapper` construct. + +While the documentation typically refers to Declarative for most examples, +the following sections will provide detailed information on how the +Declarative API interacts with the basic :func:`.mapper` and Core :class:`.Table` +systems, as well as how sophisticated patterns can be built using systems +such as mixins. + + +.. toctree:: + :maxdepth: 2 + + basic_use + relationships + table_config + inheritance + mixins + api + + + + + diff --git a/doc/build/orm/extensions/declarative/inheritance.rst b/doc/build/orm/extensions/declarative/inheritance.rst new file mode 100644 index 0000000000..684b07bfd0 --- /dev/null +++ b/doc/build/orm/extensions/declarative/inheritance.rst @@ -0,0 +1,318 @@ +.. _declarative_inheritance: + +Inheritance Configuration +========================= + +Declarative supports all three forms of inheritance as intuitively +as possible. The ``inherits`` mapper keyword argument is not needed +as declarative will determine this from the class itself. The various +"polymorphic" keyword arguments are specified using ``__mapper_args__``. + +Joined Table Inheritance +~~~~~~~~~~~~~~~~~~~~~~~~ + +Joined table inheritance is defined as a subclass that defines its own +table:: + + class Person(Base): + __tablename__ = 'people' + id = Column(Integer, primary_key=True) + discriminator = Column('type', String(50)) + __mapper_args__ = {'polymorphic_on': discriminator} + + class Engineer(Person): + __tablename__ = 'engineers' + __mapper_args__ = {'polymorphic_identity': 'engineer'} + id = Column(Integer, ForeignKey('people.id'), primary_key=True) + primary_language = Column(String(50)) + +Note that above, the ``Engineer.id`` attribute, since it shares the +same attribute name as the ``Person.id`` attribute, will in fact +represent the ``people.id`` and ``engineers.id`` columns together, +with the "Engineer.id" column taking precedence if queried directly. +To provide the ``Engineer`` class with an attribute that represents +only the ``engineers.id`` column, give it a different attribute name:: + + class Engineer(Person): + __tablename__ = 'engineers' + __mapper_args__ = {'polymorphic_identity': 'engineer'} + engineer_id = Column('id', Integer, ForeignKey('people.id'), + primary_key=True) + primary_language = Column(String(50)) + + +.. versionchanged:: 0.7 joined table inheritance favors the subclass + column over that of the superclass, such as querying above + for ``Engineer.id``. Prior to 0.7 this was the reverse. + +.. _declarative_single_table: + +Single Table Inheritance +~~~~~~~~~~~~~~~~~~~~~~~~ + +Single table inheritance is defined as a subclass that does not have +its own table; you just leave out the ``__table__`` and ``__tablename__`` +attributes:: + + class Person(Base): + __tablename__ = 'people' + id = Column(Integer, primary_key=True) + discriminator = Column('type', String(50)) + __mapper_args__ = {'polymorphic_on': discriminator} + + class Engineer(Person): + __mapper_args__ = {'polymorphic_identity': 'engineer'} + primary_language = Column(String(50)) + +When the above mappers are configured, the ``Person`` class is mapped +to the ``people`` table *before* the ``primary_language`` column is +defined, and this column will not be included in its own mapping. +When ``Engineer`` then defines the ``primary_language`` column, the +column is added to the ``people`` table so that it is included in the +mapping for ``Engineer`` and is also part of the table's full set of +columns. Columns which are not mapped to ``Person`` are also excluded +from any other single or joined inheriting classes using the +``exclude_properties`` mapper argument. Below, ``Manager`` will have +all the attributes of ``Person`` and ``Manager`` but *not* the +``primary_language`` attribute of ``Engineer``:: + + class Manager(Person): + __mapper_args__ = {'polymorphic_identity': 'manager'} + golf_swing = Column(String(50)) + +The attribute exclusion logic is provided by the +``exclude_properties`` mapper argument, and declarative's default +behavior can be disabled by passing an explicit ``exclude_properties`` +collection (empty or otherwise) to the ``__mapper_args__``. + +Resolving Column Conflicts +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Note above that the ``primary_language`` and ``golf_swing`` columns +are "moved up" to be applied to ``Person.__table__``, as a result of their +declaration on a subclass that has no table of its own. A tricky case +comes up when two subclasses want to specify *the same* column, as below:: + + class Person(Base): + __tablename__ = 'people' + id = Column(Integer, primary_key=True) + discriminator = Column('type', String(50)) + __mapper_args__ = {'polymorphic_on': discriminator} + + class Engineer(Person): + __mapper_args__ = {'polymorphic_identity': 'engineer'} + start_date = Column(DateTime) + + class Manager(Person): + __mapper_args__ = {'polymorphic_identity': 'manager'} + start_date = Column(DateTime) + +Above, the ``start_date`` column declared on both ``Engineer`` and ``Manager`` +will result in an error:: + + sqlalchemy.exc.ArgumentError: Column 'start_date' on class + conflicts with existing + column 'people.start_date' + +In a situation like this, Declarative can't be sure +of the intent, especially if the ``start_date`` columns had, for example, +different types. A situation like this can be resolved by using +:class:`.declared_attr` to define the :class:`.Column` conditionally, taking +care to return the **existing column** via the parent ``__table__`` if it +already exists:: + + from sqlalchemy.ext.declarative import declared_attr + + class Person(Base): + __tablename__ = 'people' + id = Column(Integer, primary_key=True) + discriminator = Column('type', String(50)) + __mapper_args__ = {'polymorphic_on': discriminator} + + class Engineer(Person): + __mapper_args__ = {'polymorphic_identity': 'engineer'} + + @declared_attr + def start_date(cls): + "Start date column, if not present already." + return Person.__table__.c.get('start_date', Column(DateTime)) + + class Manager(Person): + __mapper_args__ = {'polymorphic_identity': 'manager'} + + @declared_attr + def start_date(cls): + "Start date column, if not present already." + return Person.__table__.c.get('start_date', Column(DateTime)) + +Above, when ``Manager`` is mapped, the ``start_date`` column is +already present on the ``Person`` class. Declarative lets us return +that :class:`.Column` as a result in this case, where it knows to skip +re-assigning the same column. If the mapping is mis-configured such +that the ``start_date`` column is accidentally re-assigned to a +different table (such as, if we changed ``Manager`` to be joined +inheritance without fixing ``start_date``), an error is raised which +indicates an existing :class:`.Column` is trying to be re-assigned to +a different owning :class:`.Table`. + +.. versionadded:: 0.8 :class:`.declared_attr` can be used on a non-mixin + class, and the returned :class:`.Column` or other mapped attribute + will be applied to the mapping as any other attribute. Previously, + the resulting attribute would be ignored, and also result in a warning + being emitted when a subclass was created. + +.. versionadded:: 0.8 :class:`.declared_attr`, when used either with a + mixin or non-mixin declarative class, can return an existing + :class:`.Column` already assigned to the parent :class:`.Table`, + to indicate that the re-assignment of the :class:`.Column` should be + skipped, however should still be mapped on the target class, + in order to resolve duplicate column conflicts. + +The same concept can be used with mixin classes (see +:ref:`declarative_mixins`):: + + class Person(Base): + __tablename__ = 'people' + id = Column(Integer, primary_key=True) + discriminator = Column('type', String(50)) + __mapper_args__ = {'polymorphic_on': discriminator} + + class HasStartDate(object): + @declared_attr + def start_date(cls): + return cls.__table__.c.get('start_date', Column(DateTime)) + + class Engineer(HasStartDate, Person): + __mapper_args__ = {'polymorphic_identity': 'engineer'} + + class Manager(HasStartDate, Person): + __mapper_args__ = {'polymorphic_identity': 'manager'} + +The above mixin checks the local ``__table__`` attribute for the column. +Because we're using single table inheritance, we're sure that in this case, +``cls.__table__`` refers to ``People.__table__``. If we were mixing joined- +and single-table inheritance, we might want our mixin to check more carefully +if ``cls.__table__`` is really the :class:`.Table` we're looking for. + +Concrete Table Inheritance +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Concrete is defined as a subclass which has its own table and sets the +``concrete`` keyword argument to ``True``:: + + class Person(Base): + __tablename__ = 'people' + id = Column(Integer, primary_key=True) + name = Column(String(50)) + + class Engineer(Person): + __tablename__ = 'engineers' + __mapper_args__ = {'concrete':True} + id = Column(Integer, primary_key=True) + primary_language = Column(String(50)) + name = Column(String(50)) + +Usage of an abstract base class is a little less straightforward as it +requires usage of :func:`~sqlalchemy.orm.util.polymorphic_union`, +which needs to be created with the :class:`.Table` objects +before the class is built:: + + engineers = Table('engineers', Base.metadata, + Column('id', Integer, primary_key=True), + Column('name', String(50)), + Column('primary_language', String(50)) + ) + managers = Table('managers', Base.metadata, + Column('id', Integer, primary_key=True), + Column('name', String(50)), + Column('golf_swing', String(50)) + ) + + punion = polymorphic_union({ + 'engineer':engineers, + 'manager':managers + }, 'type', 'punion') + + class Person(Base): + __table__ = punion + __mapper_args__ = {'polymorphic_on':punion.c.type} + + class Engineer(Person): + __table__ = engineers + __mapper_args__ = {'polymorphic_identity':'engineer', 'concrete':True} + + class Manager(Person): + __table__ = managers + __mapper_args__ = {'polymorphic_identity':'manager', 'concrete':True} + +.. _declarative_concrete_helpers: + +Using the Concrete Helpers +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Helper classes provides a simpler pattern for concrete inheritance. +With these objects, the ``__declare_first__`` helper is used to configure the +"polymorphic" loader for the mapper after all subclasses have been declared. + +.. versionadded:: 0.7.3 + +An abstract base can be declared using the +:class:`.AbstractConcreteBase` class:: + + from sqlalchemy.ext.declarative import AbstractConcreteBase + + class Employee(AbstractConcreteBase, Base): + pass + +To have a concrete ``employee`` table, use :class:`.ConcreteBase` instead:: + + from sqlalchemy.ext.declarative import ConcreteBase + + class Employee(ConcreteBase, Base): + __tablename__ = 'employee' + employee_id = Column(Integer, primary_key=True) + name = Column(String(50)) + __mapper_args__ = { + 'polymorphic_identity':'employee', + 'concrete':True} + + +Either ``Employee`` base can be used in the normal fashion:: + + class Manager(Employee): + __tablename__ = 'manager' + employee_id = Column(Integer, primary_key=True) + name = Column(String(50)) + manager_data = Column(String(40)) + __mapper_args__ = { + 'polymorphic_identity':'manager', + 'concrete':True} + + class Engineer(Employee): + __tablename__ = 'engineer' + employee_id = Column(Integer, primary_key=True) + name = Column(String(50)) + engineer_info = Column(String(40)) + __mapper_args__ = {'polymorphic_identity':'engineer', + 'concrete':True} + + +The :class:`.AbstractConcreteBase` class is itself mapped, and can be +used as a target of relationships:: + + class Company(Base): + __tablename__ = 'company' + + id = Column(Integer, primary_key=True) + employees = relationship("Employee", + primaryjoin="Company.id == Employee.company_id") + + +.. versionchanged:: 0.9.3 Support for use of :class:`.AbstractConcreteBase` + as the target of a :func:`.relationship` has been improved. + +It can also be queried directly:: + + for employee in session.query(Employee).filter(Employee.name == 'qbert'): + print(employee) + diff --git a/doc/build/orm/extensions/declarative/mixins.rst b/doc/build/orm/extensions/declarative/mixins.rst new file mode 100644 index 0000000000..7ad8dcdc90 --- /dev/null +++ b/doc/build/orm/extensions/declarative/mixins.rst @@ -0,0 +1,464 @@ +.. _declarative_mixins: + +Mixin and Custom Base Classes +============================== + +A common need when using :mod:`~sqlalchemy.ext.declarative` is to +share some functionality, such as a set of common columns, some common +table options, or other mapped properties, across many +classes. The standard Python idioms for this is to have the classes +inherit from a base which includes these common features. + +When using :mod:`~sqlalchemy.ext.declarative`, this idiom is allowed +via the usage of a custom declarative base class, as well as a "mixin" class +which is inherited from in addition to the primary base. Declarative +includes several helper features to make this work in terms of how +mappings are declared. An example of some commonly mixed-in +idioms is below:: + + from sqlalchemy.ext.declarative import declared_attr + + class MyMixin(object): + + @declared_attr + def __tablename__(cls): + return cls.__name__.lower() + + __table_args__ = {'mysql_engine': 'InnoDB'} + __mapper_args__= {'always_refresh': True} + + id = Column(Integer, primary_key=True) + + class MyModel(MyMixin, Base): + name = Column(String(1000)) + +Where above, the class ``MyModel`` will contain an "id" column +as the primary key, a ``__tablename__`` attribute that derives +from the name of the class itself, as well as ``__table_args__`` +and ``__mapper_args__`` defined by the ``MyMixin`` mixin class. + +There's no fixed convention over whether ``MyMixin`` precedes +``Base`` or not. Normal Python method resolution rules apply, and +the above example would work just as well with:: + + class MyModel(Base, MyMixin): + name = Column(String(1000)) + +This works because ``Base`` here doesn't define any of the +variables that ``MyMixin`` defines, i.e. ``__tablename__``, +``__table_args__``, ``id``, etc. If the ``Base`` did define +an attribute of the same name, the class placed first in the +inherits list would determine which attribute is used on the +newly defined class. + +Augmenting the Base +~~~~~~~~~~~~~~~~~~~ + +In addition to using a pure mixin, most of the techniques in this +section can also be applied to the base class itself, for patterns that +should apply to all classes derived from a particular base. This is achieved +using the ``cls`` argument of the :func:`.declarative_base` function:: + + from sqlalchemy.ext.declarative import declared_attr + + class Base(object): + @declared_attr + def __tablename__(cls): + return cls.__name__.lower() + + __table_args__ = {'mysql_engine': 'InnoDB'} + + id = Column(Integer, primary_key=True) + + from sqlalchemy.ext.declarative import declarative_base + + Base = declarative_base(cls=Base) + + class MyModel(Base): + name = Column(String(1000)) + +Where above, ``MyModel`` and all other classes that derive from ``Base`` will +have a table name derived from the class name, an ``id`` primary key column, +as well as the "InnoDB" engine for MySQL. + +Mixing in Columns +~~~~~~~~~~~~~~~~~ + +The most basic way to specify a column on a mixin is by simple +declaration:: + + class TimestampMixin(object): + created_at = Column(DateTime, default=func.now()) + + class MyModel(TimestampMixin, Base): + __tablename__ = 'test' + + id = Column(Integer, primary_key=True) + name = Column(String(1000)) + +Where above, all declarative classes that include ``TimestampMixin`` +will also have a column ``created_at`` that applies a timestamp to +all row insertions. + +Those familiar with the SQLAlchemy expression language know that +the object identity of clause elements defines their role in a schema. +Two ``Table`` objects ``a`` and ``b`` may both have a column called +``id``, but the way these are differentiated is that ``a.c.id`` +and ``b.c.id`` are two distinct Python objects, referencing their +parent tables ``a`` and ``b`` respectively. + +In the case of the mixin column, it seems that only one +:class:`.Column` object is explicitly created, yet the ultimate +``created_at`` column above must exist as a distinct Python object +for each separate destination class. To accomplish this, the declarative +extension creates a **copy** of each :class:`.Column` object encountered on +a class that is detected as a mixin. + +This copy mechanism is limited to simple columns that have no foreign +keys, as a :class:`.ForeignKey` itself contains references to columns +which can't be properly recreated at this level. For columns that +have foreign keys, as well as for the variety of mapper-level constructs +that require destination-explicit context, the +:class:`~.declared_attr` decorator is provided so that +patterns common to many classes can be defined as callables:: + + from sqlalchemy.ext.declarative import declared_attr + + class ReferenceAddressMixin(object): + @declared_attr + def address_id(cls): + return Column(Integer, ForeignKey('address.id')) + + class User(ReferenceAddressMixin, Base): + __tablename__ = 'user' + id = Column(Integer, primary_key=True) + +Where above, the ``address_id`` class-level callable is executed at the +point at which the ``User`` class is constructed, and the declarative +extension can use the resulting :class:`.Column` object as returned by +the method without the need to copy it. + +.. versionchanged:: > 0.6.5 + Rename 0.6.5 ``sqlalchemy.util.classproperty`` + into :class:`~.declared_attr`. + +Columns generated by :class:`~.declared_attr` can also be +referenced by ``__mapper_args__`` to a limited degree, currently +by ``polymorphic_on`` and ``version_id_col``; the declarative extension +will resolve them at class construction time:: + + class MyMixin: + @declared_attr + def type_(cls): + return Column(String(50)) + + __mapper_args__= {'polymorphic_on':type_} + + class MyModel(MyMixin, Base): + __tablename__='test' + id = Column(Integer, primary_key=True) + + +Mixing in Relationships +~~~~~~~~~~~~~~~~~~~~~~~ + +Relationships created by :func:`~sqlalchemy.orm.relationship` are provided +with declarative mixin classes exclusively using the +:class:`.declared_attr` approach, eliminating any ambiguity +which could arise when copying a relationship and its possibly column-bound +contents. Below is an example which combines a foreign key column and a +relationship so that two classes ``Foo`` and ``Bar`` can both be configured to +reference a common target class via many-to-one:: + + class RefTargetMixin(object): + @declared_attr + def target_id(cls): + return Column('target_id', ForeignKey('target.id')) + + @declared_attr + def target(cls): + return relationship("Target") + + class Foo(RefTargetMixin, Base): + __tablename__ = 'foo' + id = Column(Integer, primary_key=True) + + class Bar(RefTargetMixin, Base): + __tablename__ = 'bar' + id = Column(Integer, primary_key=True) + + class Target(Base): + __tablename__ = 'target' + id = Column(Integer, primary_key=True) + + +Using Advanced Relationship Arguments (e.g. ``primaryjoin``, etc.) +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:func:`~sqlalchemy.orm.relationship` definitions which require explicit +primaryjoin, order_by etc. expressions should in all but the most +simplistic cases use **late bound** forms +for these arguments, meaning, using either the string form or a lambda. +The reason for this is that the related :class:`.Column` objects which are to +be configured using ``@declared_attr`` are not available to another +``@declared_attr`` attribute; while the methods will work and return new +:class:`.Column` objects, those are not the :class:`.Column` objects that +Declarative will be using as it calls the methods on its own, thus using +*different* :class:`.Column` objects. + +The canonical example is the primaryjoin condition that depends upon +another mixed-in column:: + + class RefTargetMixin(object): + @declared_attr + def target_id(cls): + return Column('target_id', ForeignKey('target.id')) + + @declared_attr + def target(cls): + return relationship(Target, + primaryjoin=Target.id==cls.target_id # this is *incorrect* + ) + +Mapping a class using the above mixin, we will get an error like:: + + sqlalchemy.exc.InvalidRequestError: this ForeignKey's parent column is not + yet associated with a Table. + +This is because the ``target_id`` :class:`.Column` we've called upon in our +``target()`` method is not the same :class:`.Column` that declarative is +actually going to map to our table. + +The condition above is resolved using a lambda:: + + class RefTargetMixin(object): + @declared_attr + def target_id(cls): + return Column('target_id', ForeignKey('target.id')) + + @declared_attr + def target(cls): + return relationship(Target, + primaryjoin=lambda: Target.id==cls.target_id + ) + +or alternatively, the string form (which ultimately generates a lambda):: + + class RefTargetMixin(object): + @declared_attr + def target_id(cls): + return Column('target_id', ForeignKey('target.id')) + + @declared_attr + def target(cls): + return relationship("Target", + primaryjoin="Target.id==%s.target_id" % cls.__name__ + ) + +Mixing in deferred(), column_property(), and other MapperProperty classes +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Like :func:`~sqlalchemy.orm.relationship`, all +:class:`~sqlalchemy.orm.interfaces.MapperProperty` subclasses such as +:func:`~sqlalchemy.orm.deferred`, :func:`~sqlalchemy.orm.column_property`, +etc. ultimately involve references to columns, and therefore, when +used with declarative mixins, have the :class:`.declared_attr` +requirement so that no reliance on copying is needed:: + + class SomethingMixin(object): + + @declared_attr + def dprop(cls): + return deferred(Column(Integer)) + + class Something(SomethingMixin, Base): + __tablename__ = "something" + +Mixing in Association Proxy and Other Attributes +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Mixins can specify user-defined attributes as well as other extension +units such as :func:`.association_proxy`. The usage of +:class:`.declared_attr` is required in those cases where the attribute must +be tailored specifically to the target subclass. An example is when +constructing multiple :func:`.association_proxy` attributes which each +target a different type of child object. Below is an +:func:`.association_proxy` / mixin example which provides a scalar list of +string values to an implementing class:: + + from sqlalchemy import Column, Integer, ForeignKey, String + from sqlalchemy.orm import relationship + from sqlalchemy.ext.associationproxy import association_proxy + from sqlalchemy.ext.declarative import declarative_base, declared_attr + + Base = declarative_base() + + class HasStringCollection(object): + @declared_attr + def _strings(cls): + class StringAttribute(Base): + __tablename__ = cls.string_table_name + id = Column(Integer, primary_key=True) + value = Column(String(50), nullable=False) + parent_id = Column(Integer, + ForeignKey('%s.id' % cls.__tablename__), + nullable=False) + def __init__(self, value): + self.value = value + + return relationship(StringAttribute) + + @declared_attr + def strings(cls): + return association_proxy('_strings', 'value') + + class TypeA(HasStringCollection, Base): + __tablename__ = 'type_a' + string_table_name = 'type_a_strings' + id = Column(Integer(), primary_key=True) + + class TypeB(HasStringCollection, Base): + __tablename__ = 'type_b' + string_table_name = 'type_b_strings' + id = Column(Integer(), primary_key=True) + +Above, the ``HasStringCollection`` mixin produces a :func:`.relationship` +which refers to a newly generated class called ``StringAttribute``. The +``StringAttribute`` class is generated with its own :class:`.Table` +definition which is local to the parent class making usage of the +``HasStringCollection`` mixin. It also produces an :func:`.association_proxy` +object which proxies references to the ``strings`` attribute onto the ``value`` +attribute of each ``StringAttribute`` instance. + +``TypeA`` or ``TypeB`` can be instantiated given the constructor +argument ``strings``, a list of strings:: + + ta = TypeA(strings=['foo', 'bar']) + tb = TypeA(strings=['bat', 'bar']) + +This list will generate a collection +of ``StringAttribute`` objects, which are persisted into a table that's +local to either the ``type_a_strings`` or ``type_b_strings`` table:: + + >>> print ta._strings + [<__main__.StringAttribute object at 0x10151cd90>, + <__main__.StringAttribute object at 0x10151ce10>] + +When constructing the :func:`.association_proxy`, the +:class:`.declared_attr` decorator must be used so that a distinct +:func:`.association_proxy` object is created for each of the ``TypeA`` +and ``TypeB`` classes. + +.. versionadded:: 0.8 :class:`.declared_attr` is usable with non-mapped + attributes, including user-defined attributes as well as + :func:`.association_proxy`. + + +Controlling table inheritance with mixins +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The ``__tablename__`` attribute may be used to provide a function that +will determine the name of the table used for each class in an inheritance +hierarchy, as well as whether a class has its own distinct table. + +This is achieved using the :class:`.declared_attr` indicator in conjunction +with a method named ``__tablename__()``. Declarative will always +invoke :class:`.declared_attr` for the special names +``__tablename__``, ``__mapper_args__`` and ``__table_args__`` +function **for each mapped class in the hierarchy**. The function therefore +needs to expect to receive each class individually and to provide the +correct answer for each. + +For example, to create a mixin that gives every class a simple table +name based on class name:: + + from sqlalchemy.ext.declarative import declared_attr + + class Tablename: + @declared_attr + def __tablename__(cls): + return cls.__name__.lower() + + class Person(Tablename, Base): + id = Column(Integer, primary_key=True) + discriminator = Column('type', String(50)) + __mapper_args__ = {'polymorphic_on': discriminator} + + class Engineer(Person): + __tablename__ = None + __mapper_args__ = {'polymorphic_identity': 'engineer'} + primary_language = Column(String(50)) + +Alternatively, we can modify our ``__tablename__`` function to return +``None`` for subclasses, using :func:`.has_inherited_table`. This has +the effect of those subclasses being mapped with single table inheritance +agaisnt the parent:: + + from sqlalchemy.ext.declarative import declared_attr + from sqlalchemy.ext.declarative import has_inherited_table + + class Tablename(object): + @declared_attr + def __tablename__(cls): + if has_inherited_table(cls): + return None + return cls.__name__.lower() + + class Person(Tablename, Base): + id = Column(Integer, primary_key=True) + discriminator = Column('type', String(50)) + __mapper_args__ = {'polymorphic_on': discriminator} + + class Engineer(Person): + primary_language = Column(String(50)) + __mapper_args__ = {'polymorphic_identity': 'engineer'} + +Combining Table/Mapper Arguments from Multiple Mixins +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +In the case of ``__table_args__`` or ``__mapper_args__`` +specified with declarative mixins, you may want to combine +some parameters from several mixins with those you wish to +define on the class iteself. The +:class:`.declared_attr` decorator can be used +here to create user-defined collation routines that pull +from multiple collections:: + + from sqlalchemy.ext.declarative import declared_attr + + class MySQLSettings(object): + __table_args__ = {'mysql_engine':'InnoDB'} + + class MyOtherMixin(object): + __table_args__ = {'info':'foo'} + + class MyModel(MySQLSettings, MyOtherMixin, Base): + __tablename__='my_model' + + @declared_attr + def __table_args__(cls): + args = dict() + args.update(MySQLSettings.__table_args__) + args.update(MyOtherMixin.__table_args__) + return args + + id = Column(Integer, primary_key=True) + +Creating Indexes with Mixins +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +To define a named, potentially multicolumn :class:`.Index` that applies to all +tables derived from a mixin, use the "inline" form of :class:`.Index` and +establish it as part of ``__table_args__``:: + + class MyMixin(object): + a = Column(Integer) + b = Column(Integer) + + @declared_attr + def __table_args__(cls): + return (Index('test_idx_%s' % cls.__tablename__, 'a', 'b'),) + + class MyModel(MyMixin, Base): + __tablename__ = 'atable' + c = Column(Integer,primary_key=True) diff --git a/doc/build/orm/extensions/declarative/relationships.rst b/doc/build/orm/extensions/declarative/relationships.rst new file mode 100644 index 0000000000..fb53c28bb8 --- /dev/null +++ b/doc/build/orm/extensions/declarative/relationships.rst @@ -0,0 +1,138 @@ +.. _declarative_configuring_relationships: + +========================= +Configuring Relationships +========================= + +Relationships to other classes are done in the usual way, with the added +feature that the class specified to :func:`~sqlalchemy.orm.relationship` +may be a string name. The "class registry" associated with ``Base`` +is used at mapper compilation time to resolve the name into the actual +class object, which is expected to have been defined once the mapper +configuration is used:: + + class User(Base): + __tablename__ = 'users' + + id = Column(Integer, primary_key=True) + name = Column(String(50)) + addresses = relationship("Address", backref="user") + + class Address(Base): + __tablename__ = 'addresses' + + id = Column(Integer, primary_key=True) + email = Column(String(50)) + user_id = Column(Integer, ForeignKey('users.id')) + +Column constructs, since they are just that, are immediately usable, +as below where we define a primary join condition on the ``Address`` +class using them:: + + class Address(Base): + __tablename__ = 'addresses' + + id = Column(Integer, primary_key=True) + email = Column(String(50)) + user_id = Column(Integer, ForeignKey('users.id')) + user = relationship(User, primaryjoin=user_id == User.id) + +In addition to the main argument for :func:`~sqlalchemy.orm.relationship`, +other arguments which depend upon the columns present on an as-yet +undefined class may also be specified as strings. These strings are +evaluated as Python expressions. The full namespace available within +this evaluation includes all classes mapped for this declarative base, +as well as the contents of the ``sqlalchemy`` package, including +expression functions like :func:`~sqlalchemy.sql.expression.desc` and +:attr:`~sqlalchemy.sql.expression.func`:: + + class User(Base): + # .... + addresses = relationship("Address", + order_by="desc(Address.email)", + primaryjoin="Address.user_id==User.id") + +For the case where more than one module contains a class of the same name, +string class names can also be specified as module-qualified paths +within any of these string expressions:: + + class User(Base): + # .... + addresses = relationship("myapp.model.address.Address", + order_by="desc(myapp.model.address.Address.email)", + primaryjoin="myapp.model.address.Address.user_id==" + "myapp.model.user.User.id") + +The qualified path can be any partial path that removes ambiguity between +the names. For example, to disambiguate between +``myapp.model.address.Address`` and ``myapp.model.lookup.Address``, +we can specify ``address.Address`` or ``lookup.Address``:: + + class User(Base): + # .... + addresses = relationship("address.Address", + order_by="desc(address.Address.email)", + primaryjoin="address.Address.user_id==" + "User.id") + +.. versionadded:: 0.8 + module-qualified paths can be used when specifying string arguments + with Declarative, in order to specify specific modules. + +Two alternatives also exist to using string-based attributes. A lambda +can also be used, which will be evaluated after all mappers have been +configured:: + + class User(Base): + # ... + addresses = relationship(lambda: Address, + order_by=lambda: desc(Address.email), + primaryjoin=lambda: Address.user_id==User.id) + +Or, the relationship can be added to the class explicitly after the classes +are available:: + + User.addresses = relationship(Address, + primaryjoin=Address.user_id==User.id) + + + +.. _declarative_many_to_many: + +Configuring Many-to-Many Relationships +====================================== + +Many-to-many relationships are also declared in the same way +with declarative as with traditional mappings. The +``secondary`` argument to +:func:`.relationship` is as usual passed a +:class:`.Table` object, which is typically declared in the +traditional way. The :class:`.Table` usually shares +the :class:`.MetaData` object used by the declarative base:: + + keywords = Table( + 'keywords', Base.metadata, + Column('author_id', Integer, ForeignKey('authors.id')), + Column('keyword_id', Integer, ForeignKey('keywords.id')) + ) + + class Author(Base): + __tablename__ = 'authors' + id = Column(Integer, primary_key=True) + keywords = relationship("Keyword", secondary=keywords) + +Like other :func:`~sqlalchemy.orm.relationship` arguments, a string is accepted +as well, passing the string name of the table as defined in the +``Base.metadata.tables`` collection:: + + class Author(Base): + __tablename__ = 'authors' + id = Column(Integer, primary_key=True) + keywords = relationship("Keyword", secondary="keywords") + +As with traditional mapping, its generally not a good idea to use +a :class:`.Table` as the "secondary" argument which is also mapped to +a class, unless the :func:`.relationship` is declared with ``viewonly=True``. +Otherwise, the unit-of-work system may attempt duplicate INSERT and +DELETE statements against the underlying table. + diff --git a/doc/build/orm/extensions/declarative/table_config.rst b/doc/build/orm/extensions/declarative/table_config.rst new file mode 100644 index 0000000000..9a621e6dd9 --- /dev/null +++ b/doc/build/orm/extensions/declarative/table_config.rst @@ -0,0 +1,143 @@ +.. _declarative_table_args: + +=================== +Table Configuration +=================== + +Table arguments other than the name, metadata, and mapped Column +arguments are specified using the ``__table_args__`` class attribute. +This attribute accommodates both positional as well as keyword +arguments that are normally sent to the +:class:`~sqlalchemy.schema.Table` constructor. +The attribute can be specified in one of two forms. One is as a +dictionary:: + + class MyClass(Base): + __tablename__ = 'sometable' + __table_args__ = {'mysql_engine':'InnoDB'} + +The other, a tuple, where each argument is positional +(usually constraints):: + + class MyClass(Base): + __tablename__ = 'sometable' + __table_args__ = ( + ForeignKeyConstraint(['id'], ['remote_table.id']), + UniqueConstraint('foo'), + ) + +Keyword arguments can be specified with the above form by +specifying the last argument as a dictionary:: + + class MyClass(Base): + __tablename__ = 'sometable' + __table_args__ = ( + ForeignKeyConstraint(['id'], ['remote_table.id']), + UniqueConstraint('foo'), + {'autoload':True} + ) + +Using a Hybrid Approach with __table__ +======================================= + +As an alternative to ``__tablename__``, a direct +:class:`~sqlalchemy.schema.Table` construct may be used. The +:class:`~sqlalchemy.schema.Column` objects, which in this case require +their names, will be added to the mapping just like a regular mapping +to a table:: + + class MyClass(Base): + __table__ = Table('my_table', Base.metadata, + Column('id', Integer, primary_key=True), + Column('name', String(50)) + ) + +``__table__`` provides a more focused point of control for establishing +table metadata, while still getting most of the benefits of using declarative. +An application that uses reflection might want to load table metadata elsewhere +and pass it to declarative classes:: + + from sqlalchemy.ext.declarative import declarative_base + + Base = declarative_base() + Base.metadata.reflect(some_engine) + + class User(Base): + __table__ = metadata.tables['user'] + + class Address(Base): + __table__ = metadata.tables['address'] + +Some configuration schemes may find it more appropriate to use ``__table__``, +such as those which already take advantage of the data-driven nature of +:class:`.Table` to customize and/or automate schema definition. + +Note that when the ``__table__`` approach is used, the object is immediately +usable as a plain :class:`.Table` within the class declaration body itself, +as a Python class is only another syntactical block. Below this is illustrated +by using the ``id`` column in the ``primaryjoin`` condition of a +:func:`.relationship`:: + + class MyClass(Base): + __table__ = Table('my_table', Base.metadata, + Column('id', Integer, primary_key=True), + Column('name', String(50)) + ) + + widgets = relationship(Widget, + primaryjoin=Widget.myclass_id==__table__.c.id) + +Similarly, mapped attributes which refer to ``__table__`` can be placed inline, +as below where we assign the ``name`` column to the attribute ``_name``, +generating a synonym for ``name``:: + + from sqlalchemy.ext.declarative import synonym_for + + class MyClass(Base): + __table__ = Table('my_table', Base.metadata, + Column('id', Integer, primary_key=True), + Column('name', String(50)) + ) + + _name = __table__.c.name + + @synonym_for("_name") + def name(self): + return "Name: %s" % _name + +Using Reflection with Declarative +================================= + +It's easy to set up a :class:`.Table` that uses ``autoload=True`` +in conjunction with a mapped class:: + + class MyClass(Base): + __table__ = Table('mytable', Base.metadata, + autoload=True, autoload_with=some_engine) + +However, one improvement that can be made here is to not +require the :class:`.Engine` to be available when classes are +being first declared. To achieve this, use the +:class:`.DeferredReflection` mixin, which sets up mappings +only after a special ``prepare(engine)`` step is called:: + + from sqlalchemy.ext.declarative import declarative_base, DeferredReflection + + Base = declarative_base(cls=DeferredReflection) + + class Foo(Base): + __tablename__ = 'foo' + bars = relationship("Bar") + + class Bar(Base): + __tablename__ = 'bar' + + # illustrate overriding of "bar.foo_id" to have + # a foreign key constraint otherwise not + # reflected, such as when using MySQL + foo_id = Column(Integer, ForeignKey('foo.id')) + + Base.prepare(e) + +.. versionadded:: 0.8 + Added :class:`.DeferredReflection`. diff --git a/doc/build/orm/extensions/index.rst b/doc/build/orm/extensions/index.rst index 65836f13a2..f7f58e3814 100644 --- a/doc/build/orm/extensions/index.rst +++ b/doc/build/orm/extensions/index.rst @@ -17,7 +17,7 @@ behavior. In particular the "Horizontal Sharding", "Hybrid Attributes", and associationproxy automap - declarative + declarative/index mutable orderinglist horizontal_shard diff --git a/doc/build/orm/index.rst b/doc/build/orm/index.rst index 6c12ebd388..b7683a8ad0 100644 --- a/doc/build/orm/index.rst +++ b/doc/build/orm/index.rst @@ -9,18 +9,13 @@ as well as automated persistence of Python objects, proceed first to the tutorial. .. toctree:: - :maxdepth: 3 + :maxdepth: 2 tutorial mapper_config relationships - collections - inheritance + loading_objects session - query - loading - events + extending extensions/index examples - exceptions - internals diff --git a/doc/build/orm/join_conditions.rst b/doc/build/orm/join_conditions.rst new file mode 100644 index 0000000000..4c0ada5ff0 --- /dev/null +++ b/doc/build/orm/join_conditions.rst @@ -0,0 +1,617 @@ +.. _relationship_configure_joins: + +Configuring how Relationship Joins +------------------------------------ + +:func:`.relationship` will normally create a join between two tables +by examining the foreign key relationship between the two tables +to determine which columns should be compared. There are a variety +of situations where this behavior needs to be customized. + +.. _relationship_foreign_keys: + +Handling Multiple Join Paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +One of the most common situations to deal with is when +there are more than one foreign key path between two tables. + +Consider a ``Customer`` class that contains two foreign keys to an ``Address`` +class:: + + from sqlalchemy import Integer, ForeignKey, String, Column + from sqlalchemy.ext.declarative import declarative_base + from sqlalchemy.orm import relationship + + Base = declarative_base() + + class Customer(Base): + __tablename__ = 'customer' + id = Column(Integer, primary_key=True) + name = Column(String) + + billing_address_id = Column(Integer, ForeignKey("address.id")) + shipping_address_id = Column(Integer, ForeignKey("address.id")) + + billing_address = relationship("Address") + shipping_address = relationship("Address") + + class Address(Base): + __tablename__ = 'address' + id = Column(Integer, primary_key=True) + street = Column(String) + city = Column(String) + state = Column(String) + zip = Column(String) + +The above mapping, when we attempt to use it, will produce the error:: + + sqlalchemy.exc.AmbiguousForeignKeysError: Could not determine join + condition between parent/child tables on relationship + Customer.billing_address - there are multiple foreign key + paths linking the tables. Specify the 'foreign_keys' argument, + providing a list of those columns which should be + counted as containing a foreign key reference to the parent table. + +The above message is pretty long. There are many potential messages +that :func:`.relationship` can return, which have been carefully tailored +to detect a variety of common configurational issues; most will suggest +the additional configuration that's needed to resolve the ambiguity +or other missing information. + +In this case, the message wants us to qualify each :func:`.relationship` +by instructing for each one which foreign key column should be considered, and +the appropriate form is as follows:: + + class Customer(Base): + __tablename__ = 'customer' + id = Column(Integer, primary_key=True) + name = Column(String) + + billing_address_id = Column(Integer, ForeignKey("address.id")) + shipping_address_id = Column(Integer, ForeignKey("address.id")) + + billing_address = relationship("Address", foreign_keys=[billing_address_id]) + shipping_address = relationship("Address", foreign_keys=[shipping_address_id]) + +Above, we specify the ``foreign_keys`` argument, which is a :class:`.Column` or list +of :class:`.Column` objects which indicate those columns to be considered "foreign", +or in other words, the columns that contain a value referring to a parent table. +Loading the ``Customer.billing_address`` relationship from a ``Customer`` +object will use the value present in ``billing_address_id`` in order to +identify the row in ``Address`` to be loaded; similarly, ``shipping_address_id`` +is used for the ``shipping_address`` relationship. The linkage of the two +columns also plays a role during persistence; the newly generated primary key +of a just-inserted ``Address`` object will be copied into the appropriate +foreign key column of an associated ``Customer`` object during a flush. + +When specifying ``foreign_keys`` with Declarative, we can also use string +names to specify, however it is important that if using a list, the **list +is part of the string**:: + + billing_address = relationship("Address", foreign_keys="[Customer.billing_address_id]") + +In this specific example, the list is not necessary in any case as there's only +one :class:`.Column` we need:: + + billing_address = relationship("Address", foreign_keys="Customer.billing_address_id") + +.. versionchanged:: 0.8 + :func:`.relationship` can resolve ambiguity between foreign key targets on the + basis of the ``foreign_keys`` argument alone; the :paramref:`~.relationship.primaryjoin` + argument is no longer needed in this situation. + +.. _relationship_primaryjoin: + +Specifying Alternate Join Conditions +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The default behavior of :func:`.relationship` when constructing a join +is that it equates the value of primary key columns +on one side to that of foreign-key-referring columns on the other. +We can change this criterion to be anything we'd like using the +:paramref:`~.relationship.primaryjoin` +argument, as well as the :paramref:`~.relationship.secondaryjoin` +argument in the case when a "secondary" table is used. + +In the example below, using the ``User`` class +as well as an ``Address`` class which stores a street address, we +create a relationship ``boston_addresses`` which will only +load those ``Address`` objects which specify a city of "Boston":: + + from sqlalchemy import Integer, ForeignKey, String, Column + from sqlalchemy.ext.declarative import declarative_base + from sqlalchemy.orm import relationship + + Base = declarative_base() + + class User(Base): + __tablename__ = 'user' + id = Column(Integer, primary_key=True) + name = Column(String) + boston_addresses = relationship("Address", + primaryjoin="and_(User.id==Address.user_id, " + "Address.city=='Boston')") + + class Address(Base): + __tablename__ = 'address' + id = Column(Integer, primary_key=True) + user_id = Column(Integer, ForeignKey('user.id')) + + street = Column(String) + city = Column(String) + state = Column(String) + zip = Column(String) + +Within this string SQL expression, we made use of the :func:`.and_` conjunction construct to establish +two distinct predicates for the join condition - joining both the ``User.id`` and +``Address.user_id`` columns to each other, as well as limiting rows in ``Address`` +to just ``city='Boston'``. When using Declarative, rudimentary SQL functions like +:func:`.and_` are automatically available in the evaluated namespace of a string +:func:`.relationship` argument. + +The custom criteria we use in a :paramref:`~.relationship.primaryjoin` +is generally only significant when SQLAlchemy is rendering SQL in +order to load or represent this relationship. That is, it's used in +the SQL statement that's emitted in order to perform a per-attribute +lazy load, or when a join is constructed at query time, such as via +:meth:`.Query.join`, or via the eager "joined" or "subquery" styles of +loading. When in-memory objects are being manipulated, we can place +any ``Address`` object we'd like into the ``boston_addresses`` +collection, regardless of what the value of the ``.city`` attribute +is. The objects will remain present in the collection until the +attribute is expired and re-loaded from the database where the +criterion is applied. When a flush occurs, the objects inside of +``boston_addresses`` will be flushed unconditionally, assigning value +of the primary key ``user.id`` column onto the foreign-key-holding +``address.user_id`` column for each row. The ``city`` criteria has no +effect here, as the flush process only cares about synchronizing +primary key values into referencing foreign key values. + +.. _relationship_custom_foreign: + +Creating Custom Foreign Conditions +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Another element of the primary join condition is how those columns +considered "foreign" are determined. Usually, some subset +of :class:`.Column` objects will specify :class:`.ForeignKey`, or otherwise +be part of a :class:`.ForeignKeyConstraint` that's relevant to the join condition. +:func:`.relationship` looks to this foreign key status as it decides +how it should load and persist data for this relationship. However, the +:paramref:`~.relationship.primaryjoin` argument can be used to create a join condition that +doesn't involve any "schema" level foreign keys. We can combine :paramref:`~.relationship.primaryjoin` +along with :paramref:`~.relationship.foreign_keys` and :paramref:`~.relationship.remote_side` explicitly in order to +establish such a join. + +Below, a class ``HostEntry`` joins to itself, equating the string ``content`` +column to the ``ip_address`` column, which is a Postgresql type called ``INET``. +We need to use :func:`.cast` in order to cast one side of the join to the +type of the other:: + + from sqlalchemy import cast, String, Column, Integer + from sqlalchemy.orm import relationship + from sqlalchemy.dialects.postgresql import INET + + from sqlalchemy.ext.declarative import declarative_base + + Base = declarative_base() + + class HostEntry(Base): + __tablename__ = 'host_entry' + + id = Column(Integer, primary_key=True) + ip_address = Column(INET) + content = Column(String(50)) + + # relationship() using explicit foreign_keys, remote_side + parent_host = relationship("HostEntry", + primaryjoin=ip_address == cast(content, INET), + foreign_keys=content, + remote_side=ip_address + ) + +The above relationship will produce a join like:: + + SELECT host_entry.id, host_entry.ip_address, host_entry.content + FROM host_entry JOIN host_entry AS host_entry_1 + ON host_entry_1.ip_address = CAST(host_entry.content AS INET) + +An alternative syntax to the above is to use the :func:`.foreign` and +:func:`.remote` :term:`annotations`, +inline within the :paramref:`~.relationship.primaryjoin` expression. +This syntax represents the annotations that :func:`.relationship` normally +applies by itself to the join condition given the :paramref:`~.relationship.foreign_keys` and +:paramref:`~.relationship.remote_side` arguments. These functions may +be more succinct when an explicit join condition is present, and additionally +serve to mark exactly the column that is "foreign" or "remote" independent +of whether that column is stated multiple times or within complex +SQL expressions:: + + from sqlalchemy.orm import foreign, remote + + class HostEntry(Base): + __tablename__ = 'host_entry' + + id = Column(Integer, primary_key=True) + ip_address = Column(INET) + content = Column(String(50)) + + # relationship() using explicit foreign() and remote() annotations + # in lieu of separate arguments + parent_host = relationship("HostEntry", + primaryjoin=remote(ip_address) == \ + cast(foreign(content), INET), + ) + + +.. _relationship_custom_operator: + +Using custom operators in join conditions +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Another use case for relationships is the use of custom operators, such +as Postgresql's "is contained within" ``<<`` operator when joining with +types such as :class:`.postgresql.INET` and :class:`.postgresql.CIDR`. +For custom operators we use the :meth:`.Operators.op` function:: + + inet_column.op("<<")(cidr_column) + +However, if we construct a :paramref:`~.relationship.primaryjoin` using this +operator, :func:`.relationship` will still need more information. This is because +when it examines our primaryjoin condition, it specifically looks for operators +used for **comparisons**, and this is typically a fixed list containing known +comparison operators such as ``==``, ``<``, etc. So for our custom operator +to participate in this system, we need it to register as a comparison operator +using the :paramref:`~.Operators.op.is_comparison` parameter:: + + inet_column.op("<<", is_comparison=True)(cidr_column) + +A complete example:: + + class IPA(Base): + __tablename__ = 'ip_address' + + id = Column(Integer, primary_key=True) + v4address = Column(INET) + + network = relationship("Network", + primaryjoin="IPA.v4address.op('<<', is_comparison=True)" + "(foreign(Network.v4representation))", + viewonly=True + ) + class Network(Base): + __tablename__ = 'network' + + id = Column(Integer, primary_key=True) + v4representation = Column(CIDR) + +Above, a query such as:: + + session.query(IPA).join(IPA.network) + +Will render as:: + + SELECT ip_address.id AS ip_address_id, ip_address.v4address AS ip_address_v4address + FROM ip_address JOIN network ON ip_address.v4address << network.v4representation + +.. versionadded:: 0.9.2 - Added the :paramref:`.Operators.op.is_comparison` + flag to assist in the creation of :func:`.relationship` constructs using + custom operators. + + +Non-relational Comparisons / Materialized Path +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. warning:: this section details an experimental feature. + +Using custom expressions means we can produce unorthodox join conditions that +don't obey the usual primary/foreign key model. One such example is the +materialized path pattern, where we compare strings for overlapping path tokens +in order to produce a tree structure. + +Through careful use of :func:`.foreign` and :func:`.remote`, we can build +a relationship that effectively produces a rudimentary materialized path +system. Essentially, when :func:`.foreign` and :func:`.remote` are +on the *same* side of the comparison expression, the relationship is considered +to be "one to many"; when they are on *different* sides, the relationship +is considered to be "many to one". For the comparison we'll use here, +we'll be dealing with collections so we keep things configured as "one to many":: + + class Element(Base): + __tablename__ = 'element' + + path = Column(String, primary_key=True) + + descendants = relationship('Element', + primaryjoin= + remote(foreign(path)).like( + path.concat('/%')), + viewonly=True, + order_by=path) + +Above, if given an ``Element`` object with a path attribute of ``"/foo/bar2"``, +we seek for a load of ``Element.descendants`` to look like:: + + SELECT element.path AS element_path + FROM element + WHERE element.path LIKE ('/foo/bar2' || '/%') ORDER BY element.path + +.. versionadded:: 0.9.5 Support has been added to allow a single-column + comparison to itself within a primaryjoin condition, as well as for + primaryjoin conditions that use :meth:`.Operators.like` as the comparison + operator. + +.. _self_referential_many_to_many: + +Self-Referential Many-to-Many Relationship +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Many to many relationships can be customized by one or both of :paramref:`~.relationship.primaryjoin` +and :paramref:`~.relationship.secondaryjoin` - the latter is significant for a relationship that +specifies a many-to-many reference using the :paramref:`~.relationship.secondary` argument. +A common situation which involves the usage of :paramref:`~.relationship.primaryjoin` and :paramref:`~.relationship.secondaryjoin` +is when establishing a many-to-many relationship from a class to itself, as shown below:: + + from sqlalchemy import Integer, ForeignKey, String, Column, Table + from sqlalchemy.ext.declarative import declarative_base + from sqlalchemy.orm import relationship + + Base = declarative_base() + + node_to_node = Table("node_to_node", Base.metadata, + Column("left_node_id", Integer, ForeignKey("node.id"), primary_key=True), + Column("right_node_id", Integer, ForeignKey("node.id"), primary_key=True) + ) + + class Node(Base): + __tablename__ = 'node' + id = Column(Integer, primary_key=True) + label = Column(String) + right_nodes = relationship("Node", + secondary=node_to_node, + primaryjoin=id==node_to_node.c.left_node_id, + secondaryjoin=id==node_to_node.c.right_node_id, + backref="left_nodes" + ) + +Where above, SQLAlchemy can't know automatically which columns should connect +to which for the ``right_nodes`` and ``left_nodes`` relationships. The :paramref:`~.relationship.primaryjoin` +and :paramref:`~.relationship.secondaryjoin` arguments establish how we'd like to join to the association table. +In the Declarative form above, as we are declaring these conditions within the Python +block that corresponds to the ``Node`` class, the ``id`` variable is available directly +as the :class:`.Column` object we wish to join with. + +Alternatively, we can define the :paramref:`~.relationship.primaryjoin` +and :paramref:`~.relationship.secondaryjoin` arguments using strings, which is suitable +in the case that our configuration does not have either the ``Node.id`` column +object available yet or the ``node_to_node`` table perhaps isn't yet available. +When referring to a plain :class:`.Table` object in a declarative string, we +use the string name of the table as it is present in the :class:`.MetaData`:: + + class Node(Base): + __tablename__ = 'node' + id = Column(Integer, primary_key=True) + label = Column(String) + right_nodes = relationship("Node", + secondary="node_to_node", + primaryjoin="Node.id==node_to_node.c.left_node_id", + secondaryjoin="Node.id==node_to_node.c.right_node_id", + backref="left_nodes" + ) + +A classical mapping situation here is similar, where ``node_to_node`` can be joined +to ``node.c.id``:: + + from sqlalchemy import Integer, ForeignKey, String, Column, Table, MetaData + from sqlalchemy.orm import relationship, mapper + + metadata = MetaData() + + node_to_node = Table("node_to_node", metadata, + Column("left_node_id", Integer, ForeignKey("node.id"), primary_key=True), + Column("right_node_id", Integer, ForeignKey("node.id"), primary_key=True) + ) + + node = Table("node", metadata, + Column('id', Integer, primary_key=True), + Column('label', String) + ) + class Node(object): + pass + + mapper(Node, node, properties={ + 'right_nodes':relationship(Node, + secondary=node_to_node, + primaryjoin=node.c.id==node_to_node.c.left_node_id, + secondaryjoin=node.c.id==node_to_node.c.right_node_id, + backref="left_nodes" + )}) + + +Note that in both examples, the :paramref:`~.relationship.backref` +keyword specifies a ``left_nodes`` backref - when +:func:`.relationship` creates the second relationship in the reverse +direction, it's smart enough to reverse the +:paramref:`~.relationship.primaryjoin` and +:paramref:`~.relationship.secondaryjoin` arguments. + +.. _composite_secondary_join: + +Composite "Secondary" Joins +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. note:: + + This section features some new and experimental features of SQLAlchemy. + +Sometimes, when one seeks to build a :func:`.relationship` between two tables +there is a need for more than just two or three tables to be involved in +order to join them. This is an area of :func:`.relationship` where one seeks +to push the boundaries of what's possible, and often the ultimate solution to +many of these exotic use cases needs to be hammered out on the SQLAlchemy mailing +list. + +In more recent versions of SQLAlchemy, the :paramref:`~.relationship.secondary` +parameter can be used in some of these cases in order to provide a composite +target consisting of multiple tables. Below is an example of such a +join condition (requires version 0.9.2 at least to function as is):: + + class A(Base): + __tablename__ = 'a' + + id = Column(Integer, primary_key=True) + b_id = Column(ForeignKey('b.id')) + + d = relationship("D", + secondary="join(B, D, B.d_id == D.id)." + "join(C, C.d_id == D.id)", + primaryjoin="and_(A.b_id == B.id, A.id == C.a_id)", + secondaryjoin="D.id == B.d_id", + uselist=False + ) + + class B(Base): + __tablename__ = 'b' + + id = Column(Integer, primary_key=True) + d_id = Column(ForeignKey('d.id')) + + class C(Base): + __tablename__ = 'c' + + id = Column(Integer, primary_key=True) + a_id = Column(ForeignKey('a.id')) + d_id = Column(ForeignKey('d.id')) + + class D(Base): + __tablename__ = 'd' + + id = Column(Integer, primary_key=True) + +In the above example, we provide all three of :paramref:`~.relationship.secondary`, +:paramref:`~.relationship.primaryjoin`, and :paramref:`~.relationship.secondaryjoin`, +in the declarative style referring to the named tables ``a``, ``b``, ``c``, ``d`` +directly. A query from ``A`` to ``D`` looks like: + +.. sourcecode:: python+sql + + sess.query(A).join(A.d).all() + + {opensql}SELECT a.id AS a_id, a.b_id AS a_b_id + FROM a JOIN ( + b AS b_1 JOIN d AS d_1 ON b_1.d_id = d_1.id + JOIN c AS c_1 ON c_1.d_id = d_1.id) + ON a.b_id = b_1.id AND a.id = c_1.a_id JOIN d ON d.id = b_1.d_id + +In the above example, we take advantage of being able to stuff multiple +tables into a "secondary" container, so that we can join across many +tables while still keeping things "simple" for :func:`.relationship`, in that +there's just "one" table on both the "left" and the "right" side; the +complexity is kept within the middle. + +.. versionadded:: 0.9.2 Support is improved for allowing a :func:`.join()` + construct to be used directly as the target of the :paramref:`~.relationship.secondary` + argument, including support for joins, eager joins and lazy loading, + as well as support within declarative to specify complex conditions such + as joins involving class names as targets. + +.. _relationship_non_primary_mapper: + +Relationship to Non Primary Mapper +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +In the previous section, we illustrated a technique where we used +:paramref:`~.relationship.secondary` in order to place additional +tables within a join condition. There is one complex join case where +even this technique is not sufficient; when we seek to join from ``A`` +to ``B``, making use of any number of ``C``, ``D``, etc. in between, +however there are also join conditions between ``A`` and ``B`` +*directly*. In this case, the join from ``A`` to ``B`` may be +difficult to express with just a complex +:paramref:`~.relationship.primaryjoin` condition, as the intermediary +tables may need special handling, and it is also not expressable with +a :paramref:`~.relationship.secondary` object, since the +``A->secondary->B`` pattern does not support any references between +``A`` and ``B`` directly. When this **extremely advanced** case +arises, we can resort to creating a second mapping as a target for the +relationship. This is where we use :func:`.mapper` in order to make a +mapping to a class that includes all the additional tables we need for +this join. In order to produce this mapper as an "alternative" mapping +for our class, we use the :paramref:`~.mapper.non_primary` flag. + +Below illustrates a :func:`.relationship` with a simple join from ``A`` to +``B``, however the primaryjoin condition is augmented with two additional +entities ``C`` and ``D``, which also must have rows that line up with +the rows in both ``A`` and ``B`` simultaneously:: + + class A(Base): + __tablename__ = 'a' + + id = Column(Integer, primary_key=True) + b_id = Column(ForeignKey('b.id')) + + class B(Base): + __tablename__ = 'b' + + id = Column(Integer, primary_key=True) + + class C(Base): + __tablename__ = 'c' + + id = Column(Integer, primary_key=True) + a_id = Column(ForeignKey('a.id')) + + class D(Base): + __tablename__ = 'd' + + id = Column(Integer, primary_key=True) + c_id = Column(ForeignKey('c.id')) + b_id = Column(ForeignKey('b.id')) + + # 1. set up the join() as a variable, so we can refer + # to it in the mapping multiple times. + j = join(B, D, D.b_id == B.id).join(C, C.id == D.c_id) + + # 2. Create a new mapper() to B, with non_primary=True. + # Columns in the join with the same name must be + # disambiguated within the mapping, using named properties. + B_viacd = mapper(B, j, non_primary=True, properties={ + "b_id": [j.c.b_id, j.c.d_b_id], + "d_id": j.c.d_id + }) + + A.b = relationship(B_viacd, primaryjoin=A.b_id == B_viacd.c.b_id) + +In the above case, our non-primary mapper for ``B`` will emit for +additional columns when we query; these can be ignored: + +.. sourcecode:: python+sql + + sess.query(A).join(A.b).all() + + {opensql}SELECT a.id AS a_id, a.b_id AS a_b_id + FROM a JOIN (b JOIN d ON d.b_id = b.id JOIN c ON c.id = d.c_id) ON a.b_id = b.id + + +Building Query-Enabled Properties +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Very ambitious custom join conditions may fail to be directly persistable, and +in some cases may not even load correctly. To remove the persistence part of +the equation, use the flag :paramref:`~.relationship.viewonly` on the +:func:`~sqlalchemy.orm.relationship`, which establishes it as a read-only +attribute (data written to the collection will be ignored on flush()). +However, in extreme cases, consider using a regular Python property in +conjunction with :class:`.Query` as follows: + +.. sourcecode:: python+sql + + class User(Base): + __tablename__ = 'user' + id = Column(Integer, primary_key=True) + + def _get_addresses(self): + return object_session(self).query(Address).with_parent(self).filter(...).all() + addresses = property(_get_addresses) + diff --git a/doc/build/orm/loading_columns.rst b/doc/build/orm/loading_columns.rst new file mode 100644 index 0000000000..0f69ad2aa7 --- /dev/null +++ b/doc/build/orm/loading_columns.rst @@ -0,0 +1,189 @@ +.. module:: sqlalchemy.orm + +=============== +Loading Columns +=============== + +This section presents additional options regarding the loading of columns. + +.. _deferred: + +Deferred Column Loading +======================== + +This feature allows particular columns of a table be loaded only +upon direct access, instead of when the entity is queried using +:class:`.Query`. This feature is useful when one wants to avoid +loading a large text or binary field into memory when it's not needed. +Individual columns can be lazy loaded by themselves or placed into groups that +lazy-load together, using the :func:`.orm.deferred` function to +mark them as "deferred". In the example below, we define a mapping that will load each of +``.excerpt`` and ``.photo`` in separate, individual-row SELECT statements when each +attribute is first referenced on the individual object instance:: + + from sqlalchemy.orm import deferred + from sqlalchemy import Integer, String, Text, Binary, Column + + class Book(Base): + __tablename__ = 'book' + + book_id = Column(Integer, primary_key=True) + title = Column(String(200), nullable=False) + summary = Column(String(2000)) + excerpt = deferred(Column(Text)) + photo = deferred(Column(Binary)) + +Classical mappings as always place the usage of :func:`.orm.deferred` in the +``properties`` dictionary against the table-bound :class:`.Column`:: + + mapper(Book, book_table, properties={ + 'photo':deferred(book_table.c.photo) + }) + +Deferred columns can be associated with a "group" name, so that they load +together when any of them are first accessed. The example below defines a +mapping with a ``photos`` deferred group. When one ``.photo`` is accessed, all three +photos will be loaded in one SELECT statement. The ``.excerpt`` will be loaded +separately when it is accessed:: + + class Book(Base): + __tablename__ = 'book' + + book_id = Column(Integer, primary_key=True) + title = Column(String(200), nullable=False) + summary = Column(String(2000)) + excerpt = deferred(Column(Text)) + photo1 = deferred(Column(Binary), group='photos') + photo2 = deferred(Column(Binary), group='photos') + photo3 = deferred(Column(Binary), group='photos') + +You can defer or undefer columns at the :class:`~sqlalchemy.orm.query.Query` +level using options, including :func:`.orm.defer` and :func:`.orm.undefer`:: + + from sqlalchemy.orm import defer, undefer + + query = session.query(Book) + query = query.options(defer('summary')) + query = query.options(undefer('excerpt')) + query.all() + +:func:`.orm.deferred` attributes which are marked with a "group" can be undeferred +using :func:`.orm.undefer_group`, sending in the group name:: + + from sqlalchemy.orm import undefer_group + + query = session.query(Book) + query.options(undefer_group('photos')).all() + +Load Only Cols +--------------- + +An arbitrary set of columns can be selected as "load only" columns, which will +be loaded while deferring all other columns on a given entity, using :func:`.orm.load_only`:: + + from sqlalchemy.orm import load_only + + session.query(Book).options(load_only("summary", "excerpt")) + +.. versionadded:: 0.9.0 + +Deferred Loading with Multiple Entities +--------------------------------------- + +To specify column deferral options within a :class:`.Query` that loads multiple types +of entity, the :class:`.Load` object can specify which parent entity to start with:: + + from sqlalchemy.orm import Load + + query = session.query(Book, Author).join(Book.author) + query = query.options( + Load(Book).load_only("summary", "excerpt"), + Load(Author).defer("bio") + ) + +To specify column deferral options along the path of various relationships, +the options support chaining, where the loading style of each relationship +is specified first, then is chained to the deferral options. Such as, to load +``Book`` instances, then joined-eager-load the ``Author``, then apply deferral +options to the ``Author`` entity:: + + from sqlalchemy.orm import joinedload + + query = session.query(Book) + query = query.options( + joinedload(Book.author).load_only("summary", "excerpt"), + ) + +In the case where the loading style of parent relationships should be left +unchanged, use :func:`.orm.defaultload`:: + + from sqlalchemy.orm import defaultload + + query = session.query(Book) + query = query.options( + defaultload(Book.author).load_only("summary", "excerpt"), + ) + +.. versionadded:: 0.9.0 support for :class:`.Load` and other options which + allow for better targeting of deferral options. + +Column Deferral API +------------------- + +.. autofunction:: deferred + +.. autofunction:: defer + +.. autofunction:: load_only + +.. autofunction:: undefer + +.. autofunction:: undefer_group + +.. _bundles: + +Column Bundles +=============== + +The :class:`.Bundle` may be used to query for groups of columns under one +namespace. + +.. versionadded:: 0.9.0 + +The bundle allows columns to be grouped together:: + + from sqlalchemy.orm import Bundle + + bn = Bundle('mybundle', MyClass.data1, MyClass.data2) + for row in session.query(bn).filter(bn.c.data1 == 'd1'): + print row.mybundle.data1, row.mybundle.data2 + +The bundle can be subclassed to provide custom behaviors when results +are fetched. The method :meth:`.Bundle.create_row_processor` is given +the :class:`.Query` and a set of "row processor" functions at query execution +time; these processor functions when given a result row will return the +individual attribute value, which can then be adapted into any kind of +return data structure. Below illustrates replacing the usual :class:`.KeyedTuple` +return structure with a straight Python dictionary:: + + from sqlalchemy.orm import Bundle + + class DictBundle(Bundle): + def create_row_processor(self, query, procs, labels): + """Override create_row_processor to return values as dictionaries""" + def proc(row, result): + return dict( + zip(labels, (proc(row, result) for proc in procs)) + ) + return proc + +A result from the above bundle will return dictionary values:: + + bn = DictBundle('mybundle', MyClass.data1, MyClass.data2) + for row in session.query(bn).filter(bn.c.data1 == 'd1'): + print row.mybundle['data1'], row.mybundle['data2'] + +The :class:`.Bundle` construct is also integrated into the behavior +of :func:`.composite`, where it is used to return composite attributes as objects +when queried as individual attributes. + diff --git a/doc/build/orm/loading_objects.rst b/doc/build/orm/loading_objects.rst new file mode 100644 index 0000000000..e7eb95a3ff --- /dev/null +++ b/doc/build/orm/loading_objects.rst @@ -0,0 +1,15 @@ +======================= +Loading Objects +======================= + +Notes and features regarding the general loading of mapped objects. + +For an in-depth introduction to querying with the SQLAlchemy ORM, please see the :ref:`ormtutorial_toplevel`. + +.. toctree:: + :maxdepth: 2 + + loading_columns + loading_relationships + constructors + query diff --git a/doc/build/orm/loading.rst b/doc/build/orm/loading_relationships.rst similarity index 100% rename from doc/build/orm/loading.rst rename to doc/build/orm/loading_relationships.rst diff --git a/doc/build/orm/mapped_attributes.rst b/doc/build/orm/mapped_attributes.rst new file mode 100644 index 0000000000..1a43fd41e0 --- /dev/null +++ b/doc/build/orm/mapped_attributes.rst @@ -0,0 +1,334 @@ +.. module:: sqlalchemy.orm + +Changing Attribute Behavior +============================ + +.. _simple_validators: + +Simple Validators +----------------- + +A quick way to add a "validation" routine to an attribute is to use the +:func:`~sqlalchemy.orm.validates` decorator. An attribute validator can raise +an exception, halting the process of mutating the attribute's value, or can +change the given value into something different. Validators, like all +attribute extensions, are only called by normal userland code; they are not +issued when the ORM is populating the object:: + + from sqlalchemy.orm import validates + + class EmailAddress(Base): + __tablename__ = 'address' + + id = Column(Integer, primary_key=True) + email = Column(String) + + @validates('email') + def validate_email(self, key, address): + assert '@' in address + return address + +Validators also receive collection append events, when items are added to a +collection:: + + from sqlalchemy.orm import validates + + class User(Base): + # ... + + addresses = relationship("Address") + + @validates('addresses') + def validate_address(self, key, address): + assert '@' in address.email + return address + + +The validation function by default does not get emitted for collection +remove events, as the typical expectation is that a value being discarded +doesn't require validation. However, :func:`.validates` supports reception +of these events by specifying ``include_removes=True`` to the decorator. When +this flag is set, the validation function must receive an additional boolean +argument which if ``True`` indicates that the operation is a removal:: + + from sqlalchemy.orm import validates + + class User(Base): + # ... + + addresses = relationship("Address") + + @validates('addresses', include_removes=True) + def validate_address(self, key, address, is_remove): + if is_remove: + raise ValueError( + "not allowed to remove items from the collection") + else: + assert '@' in address.email + return address + +The case where mutually dependent validators are linked via a backref +can also be tailored, using the ``include_backrefs=False`` option; this option, +when set to ``False``, prevents a validation function from emitting if the +event occurs as a result of a backref:: + + from sqlalchemy.orm import validates + + class User(Base): + # ... + + addresses = relationship("Address", backref='user') + + @validates('addresses', include_backrefs=False) + def validate_address(self, key, address): + assert '@' in address.email + return address + +Above, if we were to assign to ``Address.user`` as in ``some_address.user = some_user``, +the ``validate_address()`` function would *not* be emitted, even though an append +occurs to ``some_user.addresses`` - the event is caused by a backref. + +Note that the :func:`~.validates` decorator is a convenience function built on +top of attribute events. An application that requires more control over +configuration of attribute change behavior can make use of this system, +described at :class:`~.AttributeEvents`. + +.. autofunction:: validates + +.. _mapper_hybrids: + +Using Descriptors and Hybrids +----------------------------- + +A more comprehensive way to produce modified behavior for an attribute is to +use :term:`descriptors`. These are commonly used in Python using the ``property()`` +function. The standard SQLAlchemy technique for descriptors is to create a +plain descriptor, and to have it read/write from a mapped attribute with a +different name. Below we illustrate this using Python 2.6-style properties:: + + class EmailAddress(Base): + __tablename__ = 'email_address' + + id = Column(Integer, primary_key=True) + + # name the attribute with an underscore, + # different from the column name + _email = Column("email", String) + + # then create an ".email" attribute + # to get/set "._email" + @property + def email(self): + return self._email + + @email.setter + def email(self, email): + self._email = email + +The approach above will work, but there's more we can add. While our +``EmailAddress`` object will shuttle the value through the ``email`` +descriptor and into the ``_email`` mapped attribute, the class level +``EmailAddress.email`` attribute does not have the usual expression semantics +usable with :class:`.Query`. To provide these, we instead use the +:mod:`~sqlalchemy.ext.hybrid` extension as follows:: + + from sqlalchemy.ext.hybrid import hybrid_property + + class EmailAddress(Base): + __tablename__ = 'email_address' + + id = Column(Integer, primary_key=True) + + _email = Column("email", String) + + @hybrid_property + def email(self): + return self._email + + @email.setter + def email(self, email): + self._email = email + +The ``.email`` attribute, in addition to providing getter/setter behavior when we have an +instance of ``EmailAddress``, also provides a SQL expression when used at the class level, +that is, from the ``EmailAddress`` class directly: + +.. sourcecode:: python+sql + + from sqlalchemy.orm import Session + session = Session() + + {sql}address = session.query(EmailAddress).\ + filter(EmailAddress.email == 'address@example.com').\ + one() + SELECT address.email AS address_email, address.id AS address_id + FROM address + WHERE address.email = ? + ('address@example.com',) + {stop} + + address.email = 'otheraddress@example.com' + {sql}session.commit() + UPDATE address SET email=? WHERE address.id = ? + ('otheraddress@example.com', 1) + COMMIT + {stop} + +The :class:`~.hybrid_property` also allows us to change the behavior of the +attribute, including defining separate behaviors when the attribute is +accessed at the instance level versus at the class/expression level, using the +:meth:`.hybrid_property.expression` modifier. Such as, if we wanted to add a +host name automatically, we might define two sets of string manipulation +logic:: + + class EmailAddress(Base): + __tablename__ = 'email_address' + + id = Column(Integer, primary_key=True) + + _email = Column("email", String) + + @hybrid_property + def email(self): + """Return the value of _email up until the last twelve + characters.""" + + return self._email[:-12] + + @email.setter + def email(self, email): + """Set the value of _email, tacking on the twelve character + value @example.com.""" + + self._email = email + "@example.com" + + @email.expression + def email(cls): + """Produce a SQL expression that represents the value + of the _email column, minus the last twelve characters.""" + + return func.substr(cls._email, 0, func.length(cls._email) - 12) + +Above, accessing the ``email`` property of an instance of ``EmailAddress`` +will return the value of the ``_email`` attribute, removing or adding the +hostname ``@example.com`` from the value. When we query against the ``email`` +attribute, a SQL function is rendered which produces the same effect: + +.. sourcecode:: python+sql + + {sql}address = session.query(EmailAddress).filter(EmailAddress.email == 'address').one() + SELECT address.email AS address_email, address.id AS address_id + FROM address + WHERE substr(address.email, ?, length(address.email) - ?) = ? + (0, 12, 'address') + {stop} + +Read more about Hybrids at :ref:`hybrids_toplevel`. + +.. _synonyms: + +Synonyms +-------- + +Synonyms are a mapper-level construct that allow any attribute on a class +to "mirror" another attribute that is mapped. + +In the most basic sense, the synonym is an easy way to make a certain +attribute available by an additional name:: + + class MyClass(Base): + __tablename__ = 'my_table' + + id = Column(Integer, primary_key=True) + job_status = Column(String(50)) + + status = synonym("job_status") + +The above class ``MyClass`` has two attributes, ``.job_status`` and +``.status`` that will behave as one attribute, both at the expression +level:: + + >>> print MyClass.job_status == 'some_status' + my_table.job_status = :job_status_1 + + >>> print MyClass.status == 'some_status' + my_table.job_status = :job_status_1 + +and at the instance level:: + + >>> m1 = MyClass(status='x') + >>> m1.status, m1.job_status + ('x', 'x') + + >>> m1.job_status = 'y' + >>> m1.status, m1.job_status + ('y', 'y') + +The :func:`.synonym` can be used for any kind of mapped attribute that +subclasses :class:`.MapperProperty`, including mapped columns and relationships, +as well as synonyms themselves. + +Beyond a simple mirror, :func:`.synonym` can also be made to reference +a user-defined :term:`descriptor`. We can supply our +``status`` synonym with a ``@property``:: + + class MyClass(Base): + __tablename__ = 'my_table' + + id = Column(Integer, primary_key=True) + status = Column(String(50)) + + @property + def job_status(self): + return "Status: " + self.status + + job_status = synonym("status", descriptor=job_status) + +When using Declarative, the above pattern can be expressed more succinctly +using the :func:`.synonym_for` decorator:: + + from sqlalchemy.ext.declarative import synonym_for + + class MyClass(Base): + __tablename__ = 'my_table' + + id = Column(Integer, primary_key=True) + status = Column(String(50)) + + @synonym_for("status") + @property + def job_status(self): + return "Status: " + self.status + +While the :func:`.synonym` is useful for simple mirroring, the use case +of augmenting attribute behavior with descriptors is better handled in modern +usage using the :ref:`hybrid attribute ` feature, which +is more oriented towards Python descriptors. Technically, a :func:`.synonym` +can do everything that a :class:`.hybrid_property` can do, as it also supports +injection of custom SQL capabilities, but the hybrid is more straightforward +to use in more complex situations. + +.. autofunction:: synonym + +.. _custom_comparators: + +Operator Customization +---------------------- + +The "operators" used by the SQLAlchemy ORM and Core expression language +are fully customizable. For example, the comparison expression +``User.name == 'ed'`` makes usage of an operator built into Python +itself called ``operator.eq`` - the actual SQL construct which SQLAlchemy +associates with such an operator can be modified. New +operations can be associated with column expressions as well. The operators +which take place for column expressions are most directly redefined at the +type level - see the +section :ref:`types_operators` for a description. + +ORM level functions like :func:`.column_property`, :func:`.relationship`, +and :func:`.composite` also provide for operator redefinition at the ORM +level, by passing a :class:`.PropComparator` subclass to the ``comparator_factory`` +argument of each function. Customization of operators at this level is a +rare use case. See the documentation at :class:`.PropComparator` +for an overview. + diff --git a/doc/build/orm/mapped_sql_expr.rst b/doc/build/orm/mapped_sql_expr.rst new file mode 100644 index 0000000000..1ae5b12852 --- /dev/null +++ b/doc/build/orm/mapped_sql_expr.rst @@ -0,0 +1,208 @@ +.. module:: sqlalchemy.orm + +.. _mapper_sql_expressions: + +SQL Expressions as Mapped Attributes +===================================== + +Attributes on a mapped class can be linked to SQL expressions, which can +be used in queries. + +Using a Hybrid +-------------- + +The easiest and most flexible way to link relatively simple SQL expressions to a class is to use a so-called +"hybrid attribute", +described in the section :ref:`hybrids_toplevel`. The hybrid provides +for an expression that works at both the Python level as well as at the +SQL expression level. For example, below we map a class ``User``, +containing attributes ``firstname`` and ``lastname``, and include a hybrid that +will provide for us the ``fullname``, which is the string concatenation of the two:: + + from sqlalchemy.ext.hybrid import hybrid_property + + class User(Base): + __tablename__ = 'user' + id = Column(Integer, primary_key=True) + firstname = Column(String(50)) + lastname = Column(String(50)) + + @hybrid_property + def fullname(self): + return self.firstname + " " + self.lastname + +Above, the ``fullname`` attribute is interpreted at both the instance and +class level, so that it is available from an instance:: + + some_user = session.query(User).first() + print some_user.fullname + +as well as usable wtihin queries:: + + some_user = session.query(User).filter(User.fullname == "John Smith").first() + +The string concatenation example is a simple one, where the Python expression +can be dual purposed at the instance and class level. Often, the SQL expression +must be distinguished from the Python expression, which can be achieved using +:meth:`.hybrid_property.expression`. Below we illustrate the case where a conditional +needs to be present inside the hybrid, using the ``if`` statement in Python and the +:func:`.sql.expression.case` construct for SQL expressions:: + + from sqlalchemy.ext.hybrid import hybrid_property + from sqlalchemy.sql import case + + class User(Base): + __tablename__ = 'user' + id = Column(Integer, primary_key=True) + firstname = Column(String(50)) + lastname = Column(String(50)) + + @hybrid_property + def fullname(self): + if self.firstname is not None: + return self.firstname + " " + self.lastname + else: + return self.lastname + + @fullname.expression + def fullname(cls): + return case([ + (cls.firstname != None, cls.firstname + " " + cls.lastname), + ], else_ = cls.lastname) + +.. _mapper_column_property_sql_expressions: + +Using column_property +--------------------- + +The :func:`.orm.column_property` function can be used to map a SQL +expression in a manner similar to a regularly mapped :class:`.Column`. +With this technique, the attribute is loaded +along with all other column-mapped attributes at load time. This is in some +cases an advantage over the usage of hybrids, as the value can be loaded +up front at the same time as the parent row of the object, particularly if +the expression is one which links to other tables (typically as a correlated +subquery) to access data that wouldn't normally be +available on an already loaded object. + +Disadvantages to using :func:`.orm.column_property` for SQL expressions include that +the expression must be compatible with the SELECT statement emitted for the class +as a whole, and there are also some configurational quirks which can occur +when using :func:`.orm.column_property` from declarative mixins. + +Our "fullname" example can be expressed using :func:`.orm.column_property` as +follows:: + + from sqlalchemy.orm import column_property + + class User(Base): + __tablename__ = 'user' + id = Column(Integer, primary_key=True) + firstname = Column(String(50)) + lastname = Column(String(50)) + fullname = column_property(firstname + " " + lastname) + +Correlated subqueries may be used as well. Below we use the :func:`.select` +construct to create a SELECT that links together the count of ``Address`` +objects available for a particular ``User``:: + + from sqlalchemy.orm import column_property + from sqlalchemy import select, func + from sqlalchemy import Column, Integer, String, ForeignKey + + from sqlalchemy.ext.declarative import declarative_base + + Base = declarative_base() + + class Address(Base): + __tablename__ = 'address' + id = Column(Integer, primary_key=True) + user_id = Column(Integer, ForeignKey('user.id')) + + class User(Base): + __tablename__ = 'user' + id = Column(Integer, primary_key=True) + address_count = column_property( + select([func.count(Address.id)]).\ + where(Address.user_id==id).\ + correlate_except(Address) + ) + +In the above example, we define a :func:`.select` construct like the following:: + + select([func.count(Address.id)]).\ + where(Address.user_id==id).\ + correlate_except(Address) + +The meaning of the above statement is, select the count of ``Address.id`` rows +where the ``Address.user_id`` column is equated to ``id``, which in the context +of the ``User`` class is the :class:`.Column` named ``id`` (note that ``id`` is +also the name of a Python built in function, which is not what we want to use +here - if we were outside of the ``User`` class definition, we'd use ``User.id``). + +The :meth:`.select.correlate_except` directive indicates that each element in the +FROM clause of this :func:`.select` may be omitted from the FROM list (that is, correlated +to the enclosing SELECT statement against ``User``) except for the one corresponding +to ``Address``. This isn't strictly necessary, but prevents ``Address`` from +being inadvertently omitted from the FROM list in the case of a long string +of joins between ``User`` and ``Address`` tables where SELECT statements against +``Address`` are nested. + +If import issues prevent the :func:`.column_property` from being defined +inline with the class, it can be assigned to the class after both +are configured. In Declarative this has the effect of calling :meth:`.Mapper.add_property` +to add an additional property after the fact:: + + User.address_count = column_property( + select([func.count(Address.id)]).\ + where(Address.user_id==User.id) + ) + +For many-to-many relationships, use :func:`.and_` to join the fields of the +association table to both tables in a relation, illustrated +here with a classical mapping:: + + from sqlalchemy import and_ + + mapper(Author, authors, properties={ + 'book_count': column_property( + select([func.count(books.c.id)], + and_( + book_authors.c.author_id==authors.c.id, + book_authors.c.book_id==books.c.id + ))) + }) + +Using a plain descriptor +------------------------- + +In cases where a SQL query more elaborate than what :func:`.orm.column_property` +or :class:`.hybrid_property` can provide must be emitted, a regular Python +function accessed as an attribute can be used, assuming the expression +only needs to be available on an already-loaded instance. The function +is decorated with Python's own ``@property`` decorator to mark it as a read-only +attribute. Within the function, :func:`.object_session` +is used to locate the :class:`.Session` corresponding to the current object, +which is then used to emit a query:: + + from sqlalchemy.orm import object_session + from sqlalchemy import select, func + + class User(Base): + __tablename__ = 'user' + id = Column(Integer, primary_key=True) + firstname = Column(String(50)) + lastname = Column(String(50)) + + @property + def address_count(self): + return object_session(self).\ + scalar( + select([func.count(Address.id)]).\ + where(Address.user_id==self.id) + ) + +The plain descriptor approach is useful as a last resort, but is less performant +in the usual case than both the hybrid and column property approaches, in that +it needs to emit a SQL query upon each access. + diff --git a/doc/build/orm/mapper_config.rst b/doc/build/orm/mapper_config.rst index 9139b53f0f..b986d4cab6 100644 --- a/doc/build/orm/mapper_config.rst +++ b/doc/build/orm/mapper_config.rst @@ -1,4 +1,3 @@ -.. module:: sqlalchemy.orm .. _mapper_config_toplevel: @@ -10,1651 +9,13 @@ This section describes a variety of configurational patterns that are usable with mappers. It assumes you've worked through :ref:`ormtutorial_toplevel` and know how to construct and use rudimentary mappers and relationships. -.. _classical_mapping: -Classical Mappings -================== - -A *Classical Mapping* refers to the configuration of a mapped class using the -:func:`.mapper` function, without using the Declarative system. As an example, -start with the declarative mapping introduced in :ref:`ormtutorial_toplevel`:: - - class User(Base): - __tablename__ = 'users' - - id = Column(Integer, primary_key=True) - name = Column(String) - fullname = Column(String) - password = Column(String) - -In "classical" form, the table metadata is created separately with the :class:`.Table` -construct, then associated with the ``User`` class via the :func:`.mapper` function:: - - from sqlalchemy import Table, MetaData, Column, ForeignKey, Integer, String - from sqlalchemy.orm import mapper - - metadata = MetaData() - - user = Table('user', metadata, - Column('id', Integer, primary_key=True), - Column('name', String(50)), - Column('fullname', String(50)), - Column('password', String(12)) - ) - - class User(object): - def __init__(self, name, fullname, password): - self.name = name - self.fullname = fullname - self.password = password - - mapper(User, user) - -Information about mapped attributes, such as relationships to other classes, are provided -via the ``properties`` dictionary. The example below illustrates a second :class:`.Table` -object, mapped to a class called ``Address``, then linked to ``User`` via :func:`.relationship`:: - - address = Table('address', metadata, - Column('id', Integer, primary_key=True), - Column('user_id', Integer, ForeignKey('user.id')), - Column('email_address', String(50)) - ) - - mapper(User, user, properties={ - 'addresses' : relationship(Address, backref='user', order_by=address.c.id) - }) - - mapper(Address, address) - -When using classical mappings, classes must be provided directly without the benefit -of the "string lookup" system provided by Declarative. SQL expressions are typically -specified in terms of the :class:`.Table` objects, i.e. ``address.c.id`` above -for the ``Address`` relationship, and not ``Address.id``, as ``Address`` may not -yet be linked to table metadata, nor can we specify a string here. - -Some examples in the documentation still use the classical approach, but note that -the classical as well as Declarative approaches are **fully interchangeable**. Both -systems ultimately create the same configuration, consisting of a :class:`.Table`, -user-defined class, linked together with a :func:`.mapper`. When we talk about -"the behavior of :func:`.mapper`", this includes when using the Declarative system -as well - it's still used, just behind the scenes. - -Customizing Column Properties -============================== - -The default behavior of :func:`~.orm.mapper` is to assemble all the columns in -the mapped :class:`.Table` into mapped object attributes, each of which are -named according to the name of the column itself (specifically, the ``key`` -attribute of :class:`.Column`). This behavior can be -modified in several ways. - -.. _mapper_column_distinct_names: - -Naming Columns Distinctly from Attribute Names ----------------------------------------------- - -A mapping by default shares the same name for a -:class:`.Column` as that of the mapped attribute - specifically -it matches the :attr:`.Column.key` attribute on :class:`.Column`, which -by default is the same as the :attr:`.Column.name`. - -The name assigned to the Python attribute which maps to -:class:`.Column` can be different from either :attr:`.Column.name` or :attr:`.Column.key` -just by assigning it that way, as we illustrate here in a Declarative mapping:: - - class User(Base): - __tablename__ = 'user' - id = Column('user_id', Integer, primary_key=True) - name = Column('user_name', String(50)) - -Where above ``User.id`` resolves to a column named ``user_id`` -and ``User.name`` resolves to a column named ``user_name``. - -When mapping to an existing table, the :class:`.Column` object -can be referenced directly:: - - class User(Base): - __table__ = user_table - id = user_table.c.user_id - name = user_table.c.user_name - -Or in a classical mapping, placed in the ``properties`` dictionary -with the desired key:: - - mapper(User, user_table, properties={ - 'id': user_table.c.user_id, - 'name': user_table.c.user_name, - }) - -In the next section we'll examine the usage of ``.key`` more closely. - -.. _mapper_automated_reflection_schemes: - -Automating Column Naming Schemes from Reflected Tables ------------------------------------------------------- - -In the previous section :ref:`mapper_column_distinct_names`, we showed how -a :class:`.Column` explicitly mapped to a class can have a different attribute -name than the column. But what if we aren't listing out :class:`.Column` -objects explicitly, and instead are automating the production of :class:`.Table` -objects using reflection (e.g. as described in :ref:`metadata_reflection_toplevel`)? -In this case we can make use of the :meth:`.DDLEvents.column_reflect` event -to intercept the production of :class:`.Column` objects and provide them -with the :attr:`.Column.key` of our choice:: - - @event.listens_for(Table, "column_reflect") - def column_reflect(inspector, table, column_info): - # set column.key = "attr_" - column_info['key'] = "attr_%s" % column_info['name'].lower() - -With the above event, the reflection of :class:`.Column` objects will be intercepted -with our event that adds a new ".key" element, such as in a mapping as below:: - - class MyClass(Base): - __table__ = Table("some_table", Base.metadata, - autoload=True, autoload_with=some_engine) - -If we want to qualify our event to only react for the specific :class:`.MetaData` -object above, we can check for it in our event:: - - @event.listens_for(Table, "column_reflect") - def column_reflect(inspector, table, column_info): - if table.metadata is Base.metadata: - # set column.key = "attr_" - column_info['key'] = "attr_%s" % column_info['name'].lower() - -.. _column_prefix: - -Naming All Columns with a Prefix --------------------------------- - -A quick approach to prefix column names, typically when mapping -to an existing :class:`.Table` object, is to use ``column_prefix``:: - - class User(Base): - __table__ = user_table - __mapper_args__ = {'column_prefix':'_'} - -The above will place attribute names such as ``_user_id``, ``_user_name``, -``_password`` etc. on the mapped ``User`` class. - -This approach is uncommon in modern usage. For dealing with reflected -tables, a more flexible approach is to use that described in -:ref:`mapper_automated_reflection_schemes`. - - -Using column_property for column level options ------------------------------------------------ - -Options can be specified when mapping a :class:`.Column` using the -:func:`.column_property` function. This function -explicitly creates the :class:`.ColumnProperty` used by the -:func:`.mapper` to keep track of the :class:`.Column`; normally, the -:func:`.mapper` creates this automatically. Using :func:`.column_property`, -we can pass additional arguments about how we'd like the :class:`.Column` -to be mapped. Below, we pass an option ``active_history``, -which specifies that a change to this column's value should -result in the former value being loaded first:: - - from sqlalchemy.orm import column_property - - class User(Base): - __tablename__ = 'user' - - id = Column(Integer, primary_key=True) - name = column_property(Column(String(50)), active_history=True) - -:func:`.column_property` is also used to map a single attribute to -multiple columns. This use case arises when mapping to a :func:`~.expression.join` -which has attributes which are equated to each other:: - - class User(Base): - __table__ = user.join(address) - - # assign "user.id", "address.user_id" to the - # "id" attribute - id = column_property(user_table.c.id, address_table.c.user_id) - -For more examples featuring this usage, see :ref:`maptojoin`. - -Another place where :func:`.column_property` is needed is to specify SQL expressions as -mapped attributes, such as below where we create an attribute ``fullname`` -that is the string concatenation of the ``firstname`` and ``lastname`` -columns:: - - class User(Base): - __tablename__ = 'user' - id = Column(Integer, primary_key=True) - firstname = Column(String(50)) - lastname = Column(String(50)) - fullname = column_property(firstname + " " + lastname) - -See examples of this usage at :ref:`mapper_sql_expressions`. - -.. autofunction:: column_property - -.. _include_exclude_cols: - -Mapping a Subset of Table Columns ---------------------------------- - -Sometimes, a :class:`.Table` object was made available using the -reflection process described at :ref:`metadata_reflection` to load -the table's structure from the database. -For such a table that has lots of columns that don't need to be referenced -in the application, the ``include_properties`` or ``exclude_properties`` -arguments can specify that only a subset of columns should be mapped. -For example:: - - class User(Base): - __table__ = user_table - __mapper_args__ = { - 'include_properties' :['user_id', 'user_name'] - } - -...will map the ``User`` class to the ``user_table`` table, only including -the ``user_id`` and ``user_name`` columns - the rest are not referenced. -Similarly:: - - class Address(Base): - __table__ = address_table - __mapper_args__ = { - 'exclude_properties' : ['street', 'city', 'state', 'zip'] - } - -...will map the ``Address`` class to the ``address_table`` table, including -all columns present except ``street``, ``city``, ``state``, and ``zip``. - -When this mapping is used, the columns that are not included will not be -referenced in any SELECT statements emitted by :class:`.Query`, nor will there -be any mapped attribute on the mapped class which represents the column; -assigning an attribute of that name will have no effect beyond that of -a normal Python attribute assignment. - -In some cases, multiple columns may have the same name, such as when -mapping to a join of two or more tables that share some column name. -``include_properties`` and ``exclude_properties`` can also accommodate -:class:`.Column` objects to more accurately describe which columns -should be included or excluded:: - - class UserAddress(Base): - __table__ = user_table.join(addresses_table) - __mapper_args__ = { - 'exclude_properties' :[address_table.c.id], - 'primary_key' : [user_table.c.id] - } - -.. note:: - - insert and update defaults configured on individual - :class:`.Column` objects, i.e. those described at :ref:`metadata_defaults` - including those configured by the ``default``, ``update``, - ``server_default`` and ``server_onupdate`` arguments, will continue to - function normally even if those :class:`.Column` objects are not mapped. - This is because in the case of ``default`` and ``update``, the - :class:`.Column` object is still present on the underlying - :class:`.Table`, thus allowing the default functions to take place when - the ORM emits an INSERT or UPDATE, and in the case of ``server_default`` - and ``server_onupdate``, the relational database itself maintains these - functions. - - -.. _deferred: - -Deferred Column Loading -======================== - -This feature allows particular columns of a table be loaded only -upon direct access, instead of when the entity is queried using -:class:`.Query`. This feature is useful when one wants to avoid -loading a large text or binary field into memory when it's not needed. -Individual columns can be lazy loaded by themselves or placed into groups that -lazy-load together, using the :func:`.orm.deferred` function to -mark them as "deferred". In the example below, we define a mapping that will load each of -``.excerpt`` and ``.photo`` in separate, individual-row SELECT statements when each -attribute is first referenced on the individual object instance:: - - from sqlalchemy.orm import deferred - from sqlalchemy import Integer, String, Text, Binary, Column - - class Book(Base): - __tablename__ = 'book' - - book_id = Column(Integer, primary_key=True) - title = Column(String(200), nullable=False) - summary = Column(String(2000)) - excerpt = deferred(Column(Text)) - photo = deferred(Column(Binary)) - -Classical mappings as always place the usage of :func:`.orm.deferred` in the -``properties`` dictionary against the table-bound :class:`.Column`:: - - mapper(Book, book_table, properties={ - 'photo':deferred(book_table.c.photo) - }) - -Deferred columns can be associated with a "group" name, so that they load -together when any of them are first accessed. The example below defines a -mapping with a ``photos`` deferred group. When one ``.photo`` is accessed, all three -photos will be loaded in one SELECT statement. The ``.excerpt`` will be loaded -separately when it is accessed:: - - class Book(Base): - __tablename__ = 'book' - - book_id = Column(Integer, primary_key=True) - title = Column(String(200), nullable=False) - summary = Column(String(2000)) - excerpt = deferred(Column(Text)) - photo1 = deferred(Column(Binary), group='photos') - photo2 = deferred(Column(Binary), group='photos') - photo3 = deferred(Column(Binary), group='photos') - -You can defer or undefer columns at the :class:`~sqlalchemy.orm.query.Query` -level using options, including :func:`.orm.defer` and :func:`.orm.undefer`:: - - from sqlalchemy.orm import defer, undefer - - query = session.query(Book) - query = query.options(defer('summary')) - query = query.options(undefer('excerpt')) - query.all() - -:func:`.orm.deferred` attributes which are marked with a "group" can be undeferred -using :func:`.orm.undefer_group`, sending in the group name:: - - from sqlalchemy.orm import undefer_group - - query = session.query(Book) - query.options(undefer_group('photos')).all() - -Load Only Cols ---------------- - -An arbitrary set of columns can be selected as "load only" columns, which will -be loaded while deferring all other columns on a given entity, using :func:`.orm.load_only`:: - - from sqlalchemy.orm import load_only - - session.query(Book).options(load_only("summary", "excerpt")) - -.. versionadded:: 0.9.0 - -Deferred Loading with Multiple Entities ---------------------------------------- - -To specify column deferral options within a :class:`.Query` that loads multiple types -of entity, the :class:`.Load` object can specify which parent entity to start with:: - - from sqlalchemy.orm import Load - - query = session.query(Book, Author).join(Book.author) - query = query.options( - Load(Book).load_only("summary", "excerpt"), - Load(Author).defer("bio") - ) - -To specify column deferral options along the path of various relationships, -the options support chaining, where the loading style of each relationship -is specified first, then is chained to the deferral options. Such as, to load -``Book`` instances, then joined-eager-load the ``Author``, then apply deferral -options to the ``Author`` entity:: - - from sqlalchemy.orm import joinedload - - query = session.query(Book) - query = query.options( - joinedload(Book.author).load_only("summary", "excerpt"), - ) - -In the case where the loading style of parent relationships should be left -unchanged, use :func:`.orm.defaultload`:: - - from sqlalchemy.orm import defaultload - - query = session.query(Book) - query = query.options( - defaultload(Book.author).load_only("summary", "excerpt"), - ) - -.. versionadded:: 0.9.0 support for :class:`.Load` and other options which - allow for better targeting of deferral options. - -Column Deferral API -------------------- - -.. autofunction:: deferred - -.. autofunction:: defer - -.. autofunction:: load_only - -.. autofunction:: undefer - -.. autofunction:: undefer_group - -.. _mapper_sql_expressions: - -SQL Expressions as Mapped Attributes -===================================== - -Attributes on a mapped class can be linked to SQL expressions, which can -be used in queries. - -Using a Hybrid --------------- - -The easiest and most flexible way to link relatively simple SQL expressions to a class is to use a so-called -"hybrid attribute", -described in the section :ref:`hybrids_toplevel`. The hybrid provides -for an expression that works at both the Python level as well as at the -SQL expression level. For example, below we map a class ``User``, -containing attributes ``firstname`` and ``lastname``, and include a hybrid that -will provide for us the ``fullname``, which is the string concatenation of the two:: - - from sqlalchemy.ext.hybrid import hybrid_property - - class User(Base): - __tablename__ = 'user' - id = Column(Integer, primary_key=True) - firstname = Column(String(50)) - lastname = Column(String(50)) - - @hybrid_property - def fullname(self): - return self.firstname + " " + self.lastname - -Above, the ``fullname`` attribute is interpreted at both the instance and -class level, so that it is available from an instance:: - - some_user = session.query(User).first() - print some_user.fullname - -as well as usable wtihin queries:: - - some_user = session.query(User).filter(User.fullname == "John Smith").first() - -The string concatenation example is a simple one, where the Python expression -can be dual purposed at the instance and class level. Often, the SQL expression -must be distinguished from the Python expression, which can be achieved using -:meth:`.hybrid_property.expression`. Below we illustrate the case where a conditional -needs to be present inside the hybrid, using the ``if`` statement in Python and the -:func:`.sql.expression.case` construct for SQL expressions:: - - from sqlalchemy.ext.hybrid import hybrid_property - from sqlalchemy.sql import case - - class User(Base): - __tablename__ = 'user' - id = Column(Integer, primary_key=True) - firstname = Column(String(50)) - lastname = Column(String(50)) - - @hybrid_property - def fullname(self): - if self.firstname is not None: - return self.firstname + " " + self.lastname - else: - return self.lastname - - @fullname.expression - def fullname(cls): - return case([ - (cls.firstname != None, cls.firstname + " " + cls.lastname), - ], else_ = cls.lastname) - -.. _mapper_column_property_sql_expressions: - -Using column_property ---------------------- - -The :func:`.orm.column_property` function can be used to map a SQL -expression in a manner similar to a regularly mapped :class:`.Column`. -With this technique, the attribute is loaded -along with all other column-mapped attributes at load time. This is in some -cases an advantage over the usage of hybrids, as the value can be loaded -up front at the same time as the parent row of the object, particularly if -the expression is one which links to other tables (typically as a correlated -subquery) to access data that wouldn't normally be -available on an already loaded object. - -Disadvantages to using :func:`.orm.column_property` for SQL expressions include that -the expression must be compatible with the SELECT statement emitted for the class -as a whole, and there are also some configurational quirks which can occur -when using :func:`.orm.column_property` from declarative mixins. - -Our "fullname" example can be expressed using :func:`.orm.column_property` as -follows:: - - from sqlalchemy.orm import column_property - - class User(Base): - __tablename__ = 'user' - id = Column(Integer, primary_key=True) - firstname = Column(String(50)) - lastname = Column(String(50)) - fullname = column_property(firstname + " " + lastname) - -Correlated subqueries may be used as well. Below we use the :func:`.select` -construct to create a SELECT that links together the count of ``Address`` -objects available for a particular ``User``:: - - from sqlalchemy.orm import column_property - from sqlalchemy import select, func - from sqlalchemy import Column, Integer, String, ForeignKey - - from sqlalchemy.ext.declarative import declarative_base - - Base = declarative_base() - - class Address(Base): - __tablename__ = 'address' - id = Column(Integer, primary_key=True) - user_id = Column(Integer, ForeignKey('user.id')) - - class User(Base): - __tablename__ = 'user' - id = Column(Integer, primary_key=True) - address_count = column_property( - select([func.count(Address.id)]).\ - where(Address.user_id==id).\ - correlate_except(Address) - ) - -In the above example, we define a :func:`.select` construct like the following:: - - select([func.count(Address.id)]).\ - where(Address.user_id==id).\ - correlate_except(Address) - -The meaning of the above statement is, select the count of ``Address.id`` rows -where the ``Address.user_id`` column is equated to ``id``, which in the context -of the ``User`` class is the :class:`.Column` named ``id`` (note that ``id`` is -also the name of a Python built in function, which is not what we want to use -here - if we were outside of the ``User`` class definition, we'd use ``User.id``). - -The :meth:`.select.correlate_except` directive indicates that each element in the -FROM clause of this :func:`.select` may be omitted from the FROM list (that is, correlated -to the enclosing SELECT statement against ``User``) except for the one corresponding -to ``Address``. This isn't strictly necessary, but prevents ``Address`` from -being inadvertently omitted from the FROM list in the case of a long string -of joins between ``User`` and ``Address`` tables where SELECT statements against -``Address`` are nested. - -If import issues prevent the :func:`.column_property` from being defined -inline with the class, it can be assigned to the class after both -are configured. In Declarative this has the effect of calling :meth:`.Mapper.add_property` -to add an additional property after the fact:: - - User.address_count = column_property( - select([func.count(Address.id)]).\ - where(Address.user_id==User.id) - ) - -For many-to-many relationships, use :func:`.and_` to join the fields of the -association table to both tables in a relation, illustrated -here with a classical mapping:: - - from sqlalchemy import and_ - - mapper(Author, authors, properties={ - 'book_count': column_property( - select([func.count(books.c.id)], - and_( - book_authors.c.author_id==authors.c.id, - book_authors.c.book_id==books.c.id - ))) - }) - -Using a plain descriptor -------------------------- - -In cases where a SQL query more elaborate than what :func:`.orm.column_property` -or :class:`.hybrid_property` can provide must be emitted, a regular Python -function accessed as an attribute can be used, assuming the expression -only needs to be available on an already-loaded instance. The function -is decorated with Python's own ``@property`` decorator to mark it as a read-only -attribute. Within the function, :func:`.object_session` -is used to locate the :class:`.Session` corresponding to the current object, -which is then used to emit a query:: - - from sqlalchemy.orm import object_session - from sqlalchemy import select, func - - class User(Base): - __tablename__ = 'user' - id = Column(Integer, primary_key=True) - firstname = Column(String(50)) - lastname = Column(String(50)) - - @property - def address_count(self): - return object_session(self).\ - scalar( - select([func.count(Address.id)]).\ - where(Address.user_id==self.id) - ) - -The plain descriptor approach is useful as a last resort, but is less performant -in the usual case than both the hybrid and column property approaches, in that -it needs to emit a SQL query upon each access. - -Changing Attribute Behavior -============================ - -.. _simple_validators: - -Simple Validators ------------------ - -A quick way to add a "validation" routine to an attribute is to use the -:func:`~sqlalchemy.orm.validates` decorator. An attribute validator can raise -an exception, halting the process of mutating the attribute's value, or can -change the given value into something different. Validators, like all -attribute extensions, are only called by normal userland code; they are not -issued when the ORM is populating the object:: - - from sqlalchemy.orm import validates - - class EmailAddress(Base): - __tablename__ = 'address' - - id = Column(Integer, primary_key=True) - email = Column(String) - - @validates('email') - def validate_email(self, key, address): - assert '@' in address - return address - -Validators also receive collection append events, when items are added to a -collection:: - - from sqlalchemy.orm import validates - - class User(Base): - # ... - - addresses = relationship("Address") - - @validates('addresses') - def validate_address(self, key, address): - assert '@' in address.email - return address - - -The validation function by default does not get emitted for collection -remove events, as the typical expectation is that a value being discarded -doesn't require validation. However, :func:`.validates` supports reception -of these events by specifying ``include_removes=True`` to the decorator. When -this flag is set, the validation function must receive an additional boolean -argument which if ``True`` indicates that the operation is a removal:: - - from sqlalchemy.orm import validates - - class User(Base): - # ... - - addresses = relationship("Address") - - @validates('addresses', include_removes=True) - def validate_address(self, key, address, is_remove): - if is_remove: - raise ValueError( - "not allowed to remove items from the collection") - else: - assert '@' in address.email - return address - -The case where mutually dependent validators are linked via a backref -can also be tailored, using the ``include_backrefs=False`` option; this option, -when set to ``False``, prevents a validation function from emitting if the -event occurs as a result of a backref:: - - from sqlalchemy.orm import validates - - class User(Base): - # ... - - addresses = relationship("Address", backref='user') - - @validates('addresses', include_backrefs=False) - def validate_address(self, key, address): - assert '@' in address.email - return address - -Above, if we were to assign to ``Address.user`` as in ``some_address.user = some_user``, -the ``validate_address()`` function would *not* be emitted, even though an append -occurs to ``some_user.addresses`` - the event is caused by a backref. - -Note that the :func:`~.validates` decorator is a convenience function built on -top of attribute events. An application that requires more control over -configuration of attribute change behavior can make use of this system, -described at :class:`~.AttributeEvents`. - -.. autofunction:: validates - -.. _mapper_hybrids: - -Using Descriptors and Hybrids ------------------------------ - -A more comprehensive way to produce modified behavior for an attribute is to -use :term:`descriptors`. These are commonly used in Python using the ``property()`` -function. The standard SQLAlchemy technique for descriptors is to create a -plain descriptor, and to have it read/write from a mapped attribute with a -different name. Below we illustrate this using Python 2.6-style properties:: - - class EmailAddress(Base): - __tablename__ = 'email_address' - - id = Column(Integer, primary_key=True) - - # name the attribute with an underscore, - # different from the column name - _email = Column("email", String) - - # then create an ".email" attribute - # to get/set "._email" - @property - def email(self): - return self._email - - @email.setter - def email(self, email): - self._email = email - -The approach above will work, but there's more we can add. While our -``EmailAddress`` object will shuttle the value through the ``email`` -descriptor and into the ``_email`` mapped attribute, the class level -``EmailAddress.email`` attribute does not have the usual expression semantics -usable with :class:`.Query`. To provide these, we instead use the -:mod:`~sqlalchemy.ext.hybrid` extension as follows:: - - from sqlalchemy.ext.hybrid import hybrid_property - - class EmailAddress(Base): - __tablename__ = 'email_address' - - id = Column(Integer, primary_key=True) - - _email = Column("email", String) - - @hybrid_property - def email(self): - return self._email - - @email.setter - def email(self, email): - self._email = email - -The ``.email`` attribute, in addition to providing getter/setter behavior when we have an -instance of ``EmailAddress``, also provides a SQL expression when used at the class level, -that is, from the ``EmailAddress`` class directly: - -.. sourcecode:: python+sql - - from sqlalchemy.orm import Session - session = Session() - - {sql}address = session.query(EmailAddress).\ - filter(EmailAddress.email == 'address@example.com').\ - one() - SELECT address.email AS address_email, address.id AS address_id - FROM address - WHERE address.email = ? - ('address@example.com',) - {stop} - - address.email = 'otheraddress@example.com' - {sql}session.commit() - UPDATE address SET email=? WHERE address.id = ? - ('otheraddress@example.com', 1) - COMMIT - {stop} - -The :class:`~.hybrid_property` also allows us to change the behavior of the -attribute, including defining separate behaviors when the attribute is -accessed at the instance level versus at the class/expression level, using the -:meth:`.hybrid_property.expression` modifier. Such as, if we wanted to add a -host name automatically, we might define two sets of string manipulation -logic:: - - class EmailAddress(Base): - __tablename__ = 'email_address' - - id = Column(Integer, primary_key=True) - - _email = Column("email", String) - - @hybrid_property - def email(self): - """Return the value of _email up until the last twelve - characters.""" - - return self._email[:-12] - - @email.setter - def email(self, email): - """Set the value of _email, tacking on the twelve character - value @example.com.""" - - self._email = email + "@example.com" - - @email.expression - def email(cls): - """Produce a SQL expression that represents the value - of the _email column, minus the last twelve characters.""" - - return func.substr(cls._email, 0, func.length(cls._email) - 12) - -Above, accessing the ``email`` property of an instance of ``EmailAddress`` -will return the value of the ``_email`` attribute, removing or adding the -hostname ``@example.com`` from the value. When we query against the ``email`` -attribute, a SQL function is rendered which produces the same effect: - -.. sourcecode:: python+sql - - {sql}address = session.query(EmailAddress).filter(EmailAddress.email == 'address').one() - SELECT address.email AS address_email, address.id AS address_id - FROM address - WHERE substr(address.email, ?, length(address.email) - ?) = ? - (0, 12, 'address') - {stop} - -Read more about Hybrids at :ref:`hybrids_toplevel`. - -.. _synonyms: - -Synonyms --------- - -Synonyms are a mapper-level construct that allow any attribute on a class -to "mirror" another attribute that is mapped. - -In the most basic sense, the synonym is an easy way to make a certain -attribute available by an additional name:: - - class MyClass(Base): - __tablename__ = 'my_table' - - id = Column(Integer, primary_key=True) - job_status = Column(String(50)) - - status = synonym("job_status") - -The above class ``MyClass`` has two attributes, ``.job_status`` and -``.status`` that will behave as one attribute, both at the expression -level:: - - >>> print MyClass.job_status == 'some_status' - my_table.job_status = :job_status_1 - - >>> print MyClass.status == 'some_status' - my_table.job_status = :job_status_1 - -and at the instance level:: - - >>> m1 = MyClass(status='x') - >>> m1.status, m1.job_status - ('x', 'x') - - >>> m1.job_status = 'y' - >>> m1.status, m1.job_status - ('y', 'y') - -The :func:`.synonym` can be used for any kind of mapped attribute that -subclasses :class:`.MapperProperty`, including mapped columns and relationships, -as well as synonyms themselves. - -Beyond a simple mirror, :func:`.synonym` can also be made to reference -a user-defined :term:`descriptor`. We can supply our -``status`` synonym with a ``@property``:: - - class MyClass(Base): - __tablename__ = 'my_table' - - id = Column(Integer, primary_key=True) - status = Column(String(50)) - - @property - def job_status(self): - return "Status: " + self.status - - job_status = synonym("status", descriptor=job_status) - -When using Declarative, the above pattern can be expressed more succinctly -using the :func:`.synonym_for` decorator:: - - from sqlalchemy.ext.declarative import synonym_for - - class MyClass(Base): - __tablename__ = 'my_table' - - id = Column(Integer, primary_key=True) - status = Column(String(50)) - - @synonym_for("status") - @property - def job_status(self): - return "Status: " + self.status - -While the :func:`.synonym` is useful for simple mirroring, the use case -of augmenting attribute behavior with descriptors is better handled in modern -usage using the :ref:`hybrid attribute ` feature, which -is more oriented towards Python descriptors. Technically, a :func:`.synonym` -can do everything that a :class:`.hybrid_property` can do, as it also supports -injection of custom SQL capabilities, but the hybrid is more straightforward -to use in more complex situations. - -.. autofunction:: synonym - -.. _custom_comparators: - -Operator Customization ----------------------- - -The "operators" used by the SQLAlchemy ORM and Core expression language -are fully customizable. For example, the comparison expression -``User.name == 'ed'`` makes usage of an operator built into Python -itself called ``operator.eq`` - the actual SQL construct which SQLAlchemy -associates with such an operator can be modified. New -operations can be associated with column expressions as well. The operators -which take place for column expressions are most directly redefined at the -type level - see the -section :ref:`types_operators` for a description. - -ORM level functions like :func:`.column_property`, :func:`.relationship`, -and :func:`.composite` also provide for operator redefinition at the ORM -level, by passing a :class:`.PropComparator` subclass to the ``comparator_factory`` -argument of each function. Customization of operators at this level is a -rare use case. See the documentation at :class:`.PropComparator` -for an overview. - -.. _mapper_composite: - -Composite Column Types -======================= - -Sets of columns can be associated with a single user-defined datatype. The ORM -provides a single attribute which represents the group of columns using the -class you provide. - -.. versionchanged:: 0.7 - Composites have been simplified such that - they no longer "conceal" the underlying column based attributes. Additionally, - in-place mutation is no longer automatic; see the section below on - enabling mutability to support tracking of in-place changes. - -.. versionchanged:: 0.9 - Composites will return their object-form, rather than as individual columns, - when used in a column-oriented :class:`.Query` construct. See :ref:`migration_2824`. - -A simple example represents pairs of columns as a ``Point`` object. -``Point`` represents such a pair as ``.x`` and ``.y``:: - - class Point(object): - def __init__(self, x, y): - self.x = x - self.y = y - - def __composite_values__(self): - return self.x, self.y - - def __repr__(self): - return "Point(x=%r, y=%r)" % (self.x, self.y) - - def __eq__(self, other): - return isinstance(other, Point) and \ - other.x == self.x and \ - other.y == self.y - - def __ne__(self, other): - return not self.__eq__(other) - -The requirements for the custom datatype class are that it have a constructor -which accepts positional arguments corresponding to its column format, and -also provides a method ``__composite_values__()`` which returns the state of -the object as a list or tuple, in order of its column-based attributes. It -also should supply adequate ``__eq__()`` and ``__ne__()`` methods which test -the equality of two instances. - -We will create a mapping to a table ``vertice``, which represents two points -as ``x1/y1`` and ``x2/y2``. These are created normally as :class:`.Column` -objects. Then, the :func:`.composite` function is used to assign new -attributes that will represent sets of columns via the ``Point`` class:: - - from sqlalchemy import Column, Integer - from sqlalchemy.orm import composite - from sqlalchemy.ext.declarative import declarative_base - - Base = declarative_base() - - class Vertex(Base): - __tablename__ = 'vertice' - - id = Column(Integer, primary_key=True) - x1 = Column(Integer) - y1 = Column(Integer) - x2 = Column(Integer) - y2 = Column(Integer) - - start = composite(Point, x1, y1) - end = composite(Point, x2, y2) - -A classical mapping above would define each :func:`.composite` -against the existing table:: - - mapper(Vertex, vertice_table, properties={ - 'start':composite(Point, vertice_table.c.x1, vertice_table.c.y1), - 'end':composite(Point, vertice_table.c.x2, vertice_table.c.y2), - }) - -We can now persist and use ``Vertex`` instances, as well as query for them, -using the ``.start`` and ``.end`` attributes against ad-hoc ``Point`` instances: - -.. sourcecode:: python+sql - - >>> v = Vertex(start=Point(3, 4), end=Point(5, 6)) - >>> session.add(v) - >>> q = session.query(Vertex).filter(Vertex.start == Point(3, 4)) - {sql}>>> print q.first().start - BEGIN (implicit) - INSERT INTO vertice (x1, y1, x2, y2) VALUES (?, ?, ?, ?) - (3, 4, 5, 6) - SELECT vertice.id AS vertice_id, - vertice.x1 AS vertice_x1, - vertice.y1 AS vertice_y1, - vertice.x2 AS vertice_x2, - vertice.y2 AS vertice_y2 - FROM vertice - WHERE vertice.x1 = ? AND vertice.y1 = ? - LIMIT ? OFFSET ? - (3, 4, 1, 0) - {stop}Point(x=3, y=4) - -.. autofunction:: composite - - -Tracking In-Place Mutations on Composites ------------------------------------------ - -In-place changes to an existing composite value are -not tracked automatically. Instead, the composite class needs to provide -events to its parent object explicitly. This task is largely automated -via the usage of the :class:`.MutableComposite` mixin, which uses events -to associate each user-defined composite object with all parent associations. -Please see the example in :ref:`mutable_composites`. - -.. versionchanged:: 0.7 - In-place changes to an existing composite value are no longer - tracked automatically; the functionality is superseded by the - :class:`.MutableComposite` class. - -.. _composite_operations: - -Redefining Comparison Operations for Composites ------------------------------------------------ - -The "equals" comparison operation by default produces an AND of all -corresponding columns equated to one another. This can be changed using -the ``comparator_factory`` argument to :func:`.composite`, where we -specify a custom :class:`.CompositeProperty.Comparator` class -to define existing or new operations. -Below we illustrate the "greater than" operator, implementing -the same expression that the base "greater than" does:: - - from sqlalchemy.orm.properties import CompositeProperty - from sqlalchemy import sql - - class PointComparator(CompositeProperty.Comparator): - def __gt__(self, other): - """redefine the 'greater than' operation""" - - return sql.and_(*[a>b for a, b in - zip(self.__clause_element__().clauses, - other.__composite_values__())]) - - class Vertex(Base): - ___tablename__ = 'vertice' - - id = Column(Integer, primary_key=True) - x1 = Column(Integer) - y1 = Column(Integer) - x2 = Column(Integer) - y2 = Column(Integer) - - start = composite(Point, x1, y1, - comparator_factory=PointComparator) - end = composite(Point, x2, y2, - comparator_factory=PointComparator) - -.. _bundles: - -Column Bundles -=============== - -The :class:`.Bundle` may be used to query for groups of columns under one -namespace. - -.. versionadded:: 0.9.0 - -The bundle allows columns to be grouped together:: - - from sqlalchemy.orm import Bundle - - bn = Bundle('mybundle', MyClass.data1, MyClass.data2) - for row in session.query(bn).filter(bn.c.data1 == 'd1'): - print row.mybundle.data1, row.mybundle.data2 - -The bundle can be subclassed to provide custom behaviors when results -are fetched. The method :meth:`.Bundle.create_row_processor` is given -the :class:`.Query` and a set of "row processor" functions at query execution -time; these processor functions when given a result row will return the -individual attribute value, which can then be adapted into any kind of -return data structure. Below illustrates replacing the usual :class:`.KeyedTuple` -return structure with a straight Python dictionary:: - - from sqlalchemy.orm import Bundle - - class DictBundle(Bundle): - def create_row_processor(self, query, procs, labels): - """Override create_row_processor to return values as dictionaries""" - def proc(row, result): - return dict( - zip(labels, (proc(row, result) for proc in procs)) - ) - return proc - -A result from the above bundle will return dictionary values:: - - bn = DictBundle('mybundle', MyClass.data1, MyClass.data2) - for row in session.query(bn).filter(bn.c.data1 == 'd1'): - print row.mybundle['data1'], row.mybundle['data2'] - -The :class:`.Bundle` construct is also integrated into the behavior -of :func:`.composite`, where it is used to return composite attributes as objects -when queried as individual attributes. - - -.. _maptojoin: - -Mapping a Class against Multiple Tables -======================================== - -Mappers can be constructed against arbitrary relational units (called -*selectables*) in addition to plain tables. For example, the :func:`~.expression.join` -function creates a selectable unit comprised of -multiple tables, complete with its own composite primary key, which can be -mapped in the same way as a :class:`.Table`:: - - from sqlalchemy import Table, Column, Integer, \ - String, MetaData, join, ForeignKey - from sqlalchemy.ext.declarative import declarative_base - from sqlalchemy.orm import column_property - - metadata = MetaData() - - # define two Table objects - user_table = Table('user', metadata, - Column('id', Integer, primary_key=True), - Column('name', String), - ) - - address_table = Table('address', metadata, - Column('id', Integer, primary_key=True), - Column('user_id', Integer, ForeignKey('user.id')), - Column('email_address', String) - ) - - # define a join between them. This - # takes place across the user.id and address.user_id - # columns. - user_address_join = join(user_table, address_table) - - Base = declarative_base() - - # map to it - class AddressUser(Base): - __table__ = user_address_join - - id = column_property(user_table.c.id, address_table.c.user_id) - address_id = address_table.c.id - -In the example above, the join expresses columns for both the -``user`` and the ``address`` table. The ``user.id`` and ``address.user_id`` -columns are equated by foreign key, so in the mapping they are defined -as one attribute, ``AddressUser.id``, using :func:`.column_property` to -indicate a specialized column mapping. Based on this part of the -configuration, the mapping will copy -new primary key values from ``user.id`` into the ``address.user_id`` column -when a flush occurs. - -Additionally, the ``address.id`` column is mapped explicitly to -an attribute named ``address_id``. This is to **disambiguate** the -mapping of the ``address.id`` column from the same-named ``AddressUser.id`` -attribute, which here has been assigned to refer to the ``user`` table -combined with the ``address.user_id`` foreign key. - -The natural primary key of the above mapping is the composite of -``(user.id, address.id)``, as these are the primary key columns of the -``user`` and ``address`` table combined together. The identity of an -``AddressUser`` object will be in terms of these two values, and -is represented from an ``AddressUser`` object as -``(AddressUser.id, AddressUser.address_id)``. - - -Mapping a Class against Arbitrary Selects -========================================= - -Similar to mapping against a join, a plain :func:`~.expression.select` object can be used with a -mapper as well. The example fragment below illustrates mapping a class -called ``Customer`` to a :func:`~.expression.select` which includes a join to a -subquery:: - - from sqlalchemy import select, func - - subq = select([ - func.count(orders.c.id).label('order_count'), - func.max(orders.c.price).label('highest_order'), - orders.c.customer_id - ]).group_by(orders.c.customer_id).alias() - - customer_select = select([customers, subq]).\ - select_from( - join(customers, subq, - customers.c.id == subq.c.customer_id) - ).alias() - - class Customer(Base): - __table__ = customer_select - -Above, the full row represented by ``customer_select`` will be all the -columns of the ``customers`` table, in addition to those columns -exposed by the ``subq`` subquery, which are ``order_count``, -``highest_order``, and ``customer_id``. Mapping the ``Customer`` -class to this selectable then creates a class which will contain -those attributes. - -When the ORM persists new instances of ``Customer``, only the -``customers`` table will actually receive an INSERT. This is because the -primary key of the ``orders`` table is not represented in the mapping; the ORM -will only emit an INSERT into a table for which it has mapped the primary -key. - -.. note:: - - The practice of mapping to arbitrary SELECT statements, especially - complex ones as above, is - almost never needed; it necessarily tends to produce complex queries - which are often less efficient than that which would be produced - by direct query construction. The practice is to some degree - based on the very early history of SQLAlchemy where the :func:`.mapper` - construct was meant to represent the primary querying interface; - in modern usage, the :class:`.Query` object can be used to construct - virtually any SELECT statement, including complex composites, and should - be favored over the "map-to-selectable" approach. - -Multiple Mappers for One Class -============================== - -In modern SQLAlchemy, a particular class is only mapped by one :func:`.mapper` -at a time. The rationale here is that the :func:`.mapper` modifies the class itself, not only -persisting it towards a particular :class:`.Table`, but also *instrumenting* -attributes upon the class which are structured specifically according to the -table metadata. - -One potential use case for another mapper to exist at the same time is if we -wanted to load instances of our class not just from the immediate :class:`.Table` -to which it is mapped, but from another selectable that is a derivation of that -:class:`.Table`. To create a second mapper that only handles querying -when used explicitly, we can use the :paramref:`.mapper.non_primary` argument. -In practice, this approach is usually not needed, as we -can do this sort of thing at query time using methods such as -:meth:`.Query.select_from`, however it is useful in the rare case that we -wish to build a :func:`.relationship` to such a mapper. An example of this is -at :ref:`relationship_non_primary_mapper`. - -Another potential use is if we genuinely want instances of our class to -be persisted into different tables at different times; certain kinds of -data sharding configurations may persist a particular class into tables -that are identical in structure except for their name. For this kind of -pattern, Python offers a better approach than the complexity of mapping -the same class multiple times, which is to instead create new mapped classes -for each target table. SQLAlchemy refers to this as the "entity name" -pattern, which is described as a recipe at `Entity Name -`_. - - -.. _mapping_constructors: - -Constructors and Object Initialization -======================================= - -Mapping imposes no restrictions or requirements on the constructor -(``__init__``) method for the class. You are free to require any arguments for -the function that you wish, assign attributes to the instance that are unknown -to the ORM, and generally do anything else you would normally do when writing -a constructor for a Python class. - -The SQLAlchemy ORM does not call ``__init__`` when recreating objects from -database rows. The ORM's process is somewhat akin to the Python standard -library's ``pickle`` module, invoking the low level ``__new__`` method and -then quietly restoring attributes directly on the instance rather than calling -``__init__``. - -If you need to do some setup on database-loaded instances before they're ready -to use, you can use the ``@reconstructor`` decorator to tag a method as the -ORM counterpart to ``__init__``. SQLAlchemy will call this method with no -arguments every time it loads or reconstructs one of your instances. This is -useful for recreating transient properties that are normally assigned in your -``__init__``:: - - from sqlalchemy import orm - - class MyMappedClass(object): - def __init__(self, data): - self.data = data - # we need stuff on all instances, but not in the database. - self.stuff = [] - - @orm.reconstructor - def init_on_load(self): - self.stuff = [] - -When ``obj = MyMappedClass()`` is executed, Python calls the ``__init__`` -method as normal and the ``data`` argument is required. When instances are -loaded during a :class:`~sqlalchemy.orm.query.Query` operation as in -``query(MyMappedClass).one()``, ``init_on_load`` is called. - -Any method may be tagged as the :func:`~sqlalchemy.orm.reconstructor`, even -the ``__init__`` method. SQLAlchemy will call the reconstructor method with no -arguments. Scalar (non-collection) database-mapped attributes of the instance -will be available for use within the function. Eagerly-loaded collections are -generally not yet available and will usually only contain the first element. -ORM state changes made to objects at this stage will not be recorded for the -next flush() operation, so the activity within a reconstructor should be -conservative. - -:func:`~sqlalchemy.orm.reconstructor` is a shortcut into a larger system -of "instance level" events, which can be subscribed to using the -event API - see :class:`.InstanceEvents` for the full API description -of these events. - -.. autofunction:: reconstructor - - -.. _mapper_version_counter: - -Configuring a Version Counter -============================= - -The :class:`.Mapper` supports management of a :term:`version id column`, which -is a single table column that increments or otherwise updates its value -each time an ``UPDATE`` to the mapped table occurs. This value is checked each -time the ORM emits an ``UPDATE`` or ``DELETE`` against the row to ensure that -the value held in memory matches the database value. - -.. warning:: - - Because the versioning feature relies upon comparison of the **in memory** - record of an object, the feature only applies to the :meth:`.Session.flush` - process, where the ORM flushes individual in-memory rows to the database. - It does **not** take effect when performing - a multirow UPDATE or DELETE using :meth:`.Query.update` or :meth:`.Query.delete` - methods, as these methods only emit an UPDATE or DELETE statement but otherwise - do not have direct access to the contents of those rows being affected. - -The purpose of this feature is to detect when two concurrent transactions -are modifying the same row at roughly the same time, or alternatively to provide -a guard against the usage of a "stale" row in a system that might be re-using -data from a previous transaction without refreshing (e.g. if one sets ``expire_on_commit=False`` -with a :class:`.Session`, it is possible to re-use the data from a previous -transaction). - -.. topic:: Concurrent transaction updates - - When detecting concurrent updates within transactions, it is typically the - case that the database's transaction isolation level is below the level of - :term:`repeatable read`; otherwise, the transaction will not be exposed - to a new row value created by a concurrent update which conflicts with - the locally updated value. In this case, the SQLAlchemy versioning - feature will typically not be useful for in-transaction conflict detection, - though it still can be used for cross-transaction staleness detection. - - The database that enforces repeatable reads will typically either have locked the - target row against a concurrent update, or is employing some form - of multi version concurrency control such that it will emit an error - when the transaction is committed. SQLAlchemy's version_id_col is an alternative - which allows version tracking to occur for specific tables within a transaction - that otherwise might not have this isolation level set. - - .. seealso:: - - `Repeatable Read Isolation Level `_ - Postgresql's implementation of repeatable read, including a description of the error condition. - -Simple Version Counting ------------------------ - -The most straightforward way to track versions is to add an integer column -to the mapped table, then establish it as the ``version_id_col`` within the -mapper options:: - - class User(Base): - __tablename__ = 'user' - - id = Column(Integer, primary_key=True) - version_id = Column(Integer, nullable=False) - name = Column(String(50), nullable=False) - - __mapper_args__ = { - "version_id_col": version_id - } - -Above, the ``User`` mapping tracks integer versions using the column -``version_id``. When an object of type ``User`` is first flushed, the -``version_id`` column will be given a value of "1". Then, an UPDATE -of the table later on will always be emitted in a manner similar to the -following:: - - UPDATE user SET version_id=:version_id, name=:name - WHERE user.id = :user_id AND user.version_id = :user_version_id - {"name": "new name", "version_id": 2, "user_id": 1, "user_version_id": 1} - -The above UPDATE statement is updating the row that not only matches -``user.id = 1``, it also is requiring that ``user.version_id = 1``, where "1" -is the last version identifier we've been known to use on this object. -If a transaction elsewhere has modified the row independently, this version id -will no longer match, and the UPDATE statement will report that no rows matched; -this is the condition that SQLAlchemy tests, that exactly one row matched our -UPDATE (or DELETE) statement. If zero rows match, that indicates our version -of the data is stale, and a :exc:`.StaleDataError` is raised. - -.. _custom_version_counter: - -Custom Version Counters / Types -------------------------------- - -Other kinds of values or counters can be used for versioning. Common types include -dates and GUIDs. When using an alternate type or counter scheme, SQLAlchemy -provides a hook for this scheme using the ``version_id_generator`` argument, -which accepts a version generation callable. This callable is passed the value of the current -known version, and is expected to return the subsequent version. - -For example, if we wanted to track the versioning of our ``User`` class -using a randomly generated GUID, we could do this (note that some backends -support a native GUID type, but we illustrate here using a simple string):: - - import uuid - - class User(Base): - __tablename__ = 'user' - - id = Column(Integer, primary_key=True) - version_uuid = Column(String(32)) - name = Column(String(50), nullable=False) - - __mapper_args__ = { - 'version_id_col':version_uuid, - 'version_id_generator':lambda version: uuid.uuid4().hex - } - -The persistence engine will call upon ``uuid.uuid4()`` each time a -``User`` object is subject to an INSERT or an UPDATE. In this case, our -version generation function can disregard the incoming value of ``version``, -as the ``uuid4()`` function -generates identifiers without any prerequisite value. If we were using -a sequential versioning scheme such as numeric or a special character system, -we could make use of the given ``version`` in order to help determine the -subsequent value. - -.. seealso:: - - :ref:`custom_guid_type` - -.. _server_side_version_counter: - -Server Side Version Counters ----------------------------- - -The ``version_id_generator`` can also be configured to rely upon a value -that is generated by the database. In this case, the database would need -some means of generating new identifiers when a row is subject to an INSERT -as well as with an UPDATE. For the UPDATE case, typically an update trigger -is needed, unless the database in question supports some other native -version identifier. The Postgresql database in particular supports a system -column called `xmin `_ -which provides UPDATE versioning. We can make use -of the Postgresql ``xmin`` column to version our ``User`` -class as follows:: - - class User(Base): - __tablename__ = 'user' - - id = Column(Integer, primary_key=True) - name = Column(String(50), nullable=False) - xmin = Column("xmin", Integer, system=True) - - __mapper_args__ = { - 'version_id_col': xmin, - 'version_id_generator': False - } - -With the above mapping, the ORM will rely upon the ``xmin`` column for -automatically providing the new value of the version id counter. - -.. topic:: creating tables that refer to system columns - - In the above scenario, as ``xmin`` is a system column provided by Postgresql, - we use the ``system=True`` argument to mark it as a system-provided - column, omitted from the ``CREATE TABLE`` statement. - - -The ORM typically does not actively fetch the values of database-generated -values when it emits an INSERT or UPDATE, instead leaving these columns as -"expired" and to be fetched when they are next accessed, unless the ``eager_defaults`` -:func:`.mapper` flag is set. However, when a -server side version column is used, the ORM needs to actively fetch the newly -generated value. This is so that the version counter is set up *before* -any concurrent transaction may update it again. This fetching is also -best done simultaneously within the INSERT or UPDATE statement using :term:`RETURNING`, -otherwise if emitting a SELECT statement afterwards, there is still a potential -race condition where the version counter may change before it can be fetched. - -When the target database supports RETURNING, an INSERT statement for our ``User`` class will look -like this:: - - INSERT INTO "user" (name) VALUES (%(name)s) RETURNING "user".id, "user".xmin - {'name': 'ed'} - -Where above, the ORM can acquire any newly generated primary key values along -with server-generated version identifiers in one statement. When the backend -does not support RETURNING, an additional SELECT must be emitted for **every** -INSERT and UPDATE, which is much less efficient, and also introduces the possibility of -missed version counters:: - - INSERT INTO "user" (name) VALUES (%(name)s) - {'name': 'ed'} - - SELECT "user".version_id AS user_version_id FROM "user" where - "user".id = :param_1 - {"param_1": 1} - -It is *strongly recommended* that server side version counters only be used -when absolutely necessary and only on backends that support :term:`RETURNING`, -e.g. Postgresql, Oracle, SQL Server (though SQL Server has -`major caveats `_ when triggers are used), Firebird. - -.. versionadded:: 0.9.0 - - Support for server side version identifier tracking. - -Programmatic or Conditional Version Counters ---------------------------------------------- - -When ``version_id_generator`` is set to False, we can also programmatically -(and conditionally) set the version identifier on our object in the same way -we assign any other mapped attribute. Such as if we used our UUID example, but -set ``version_id_generator`` to ``False``, we can set the version identifier -at our choosing:: - - import uuid - - class User(Base): - __tablename__ = 'user' - - id = Column(Integer, primary_key=True) - version_uuid = Column(String(32)) - name = Column(String(50), nullable=False) - - __mapper_args__ = { - 'version_id_col':version_uuid, - 'version_id_generator': False - } - - u1 = User(name='u1', version_uuid=uuid.uuid4()) - - session.add(u1) - - session.commit() - - u1.name = 'u2' - u1.version_uuid = uuid.uuid4() - - session.commit() - -We can update our ``User`` object without incrementing the version counter -as well; the value of the counter will remain unchanged, and the UPDATE -statement will still check against the previous value. This may be useful -for schemes where only certain classes of UPDATE are sensitive to concurrency -issues:: - - # will leave version_uuid unchanged - u1.name = 'u3' - session.commit() - -.. versionadded:: 0.9.0 - - Support for programmatic and conditional version identifier tracking. - - -Class Mapping API -================= - -.. autofunction:: mapper - -.. autofunction:: object_mapper - -.. autofunction:: class_mapper - -.. autofunction:: configure_mappers - -.. autofunction:: clear_mappers - -.. autofunction:: sqlalchemy.orm.util.identity_key - -.. autofunction:: sqlalchemy.orm.util.polymorphic_union - -.. autoclass:: sqlalchemy.orm.mapper.Mapper - :members: +.. toctree:: + :maxdepth: 2 + classical + scalar_mapping + inheritance + nonstandard_mappings + versioning + mapping_api diff --git a/doc/build/orm/mapping_api.rst b/doc/build/orm/mapping_api.rst new file mode 100644 index 0000000000..cd7c379cd2 --- /dev/null +++ b/doc/build/orm/mapping_api.rst @@ -0,0 +1,22 @@ +.. module:: sqlalchemy.orm + +Class Mapping API +================= + +.. autofunction:: mapper + +.. autofunction:: object_mapper + +.. autofunction:: class_mapper + +.. autofunction:: configure_mappers + +.. autofunction:: clear_mappers + +.. autofunction:: sqlalchemy.orm.util.identity_key + +.. autofunction:: sqlalchemy.orm.util.polymorphic_union + +.. autoclass:: sqlalchemy.orm.mapper.Mapper + :members: + diff --git a/doc/build/orm/mapping_columns.rst b/doc/build/orm/mapping_columns.rst new file mode 100644 index 0000000000..b36bfd2f10 --- /dev/null +++ b/doc/build/orm/mapping_columns.rst @@ -0,0 +1,222 @@ +.. module:: sqlalchemy.orm + +Mapping Table Columns +===================== + +The default behavior of :func:`~.orm.mapper` is to assemble all the columns in +the mapped :class:`.Table` into mapped object attributes, each of which are +named according to the name of the column itself (specifically, the ``key`` +attribute of :class:`.Column`). This behavior can be +modified in several ways. + +.. _mapper_column_distinct_names: + +Naming Columns Distinctly from Attribute Names +---------------------------------------------- + +A mapping by default shares the same name for a +:class:`.Column` as that of the mapped attribute - specifically +it matches the :attr:`.Column.key` attribute on :class:`.Column`, which +by default is the same as the :attr:`.Column.name`. + +The name assigned to the Python attribute which maps to +:class:`.Column` can be different from either :attr:`.Column.name` or :attr:`.Column.key` +just by assigning it that way, as we illustrate here in a Declarative mapping:: + + class User(Base): + __tablename__ = 'user' + id = Column('user_id', Integer, primary_key=True) + name = Column('user_name', String(50)) + +Where above ``User.id`` resolves to a column named ``user_id`` +and ``User.name`` resolves to a column named ``user_name``. + +When mapping to an existing table, the :class:`.Column` object +can be referenced directly:: + + class User(Base): + __table__ = user_table + id = user_table.c.user_id + name = user_table.c.user_name + +Or in a classical mapping, placed in the ``properties`` dictionary +with the desired key:: + + mapper(User, user_table, properties={ + 'id': user_table.c.user_id, + 'name': user_table.c.user_name, + }) + +In the next section we'll examine the usage of ``.key`` more closely. + +.. _mapper_automated_reflection_schemes: + +Automating Column Naming Schemes from Reflected Tables +------------------------------------------------------ + +In the previous section :ref:`mapper_column_distinct_names`, we showed how +a :class:`.Column` explicitly mapped to a class can have a different attribute +name than the column. But what if we aren't listing out :class:`.Column` +objects explicitly, and instead are automating the production of :class:`.Table` +objects using reflection (e.g. as described in :ref:`metadata_reflection_toplevel`)? +In this case we can make use of the :meth:`.DDLEvents.column_reflect` event +to intercept the production of :class:`.Column` objects and provide them +with the :attr:`.Column.key` of our choice:: + + @event.listens_for(Table, "column_reflect") + def column_reflect(inspector, table, column_info): + # set column.key = "attr_" + column_info['key'] = "attr_%s" % column_info['name'].lower() + +With the above event, the reflection of :class:`.Column` objects will be intercepted +with our event that adds a new ".key" element, such as in a mapping as below:: + + class MyClass(Base): + __table__ = Table("some_table", Base.metadata, + autoload=True, autoload_with=some_engine) + +If we want to qualify our event to only react for the specific :class:`.MetaData` +object above, we can check for it in our event:: + + @event.listens_for(Table, "column_reflect") + def column_reflect(inspector, table, column_info): + if table.metadata is Base.metadata: + # set column.key = "attr_" + column_info['key'] = "attr_%s" % column_info['name'].lower() + +.. _column_prefix: + +Naming All Columns with a Prefix +-------------------------------- + +A quick approach to prefix column names, typically when mapping +to an existing :class:`.Table` object, is to use ``column_prefix``:: + + class User(Base): + __table__ = user_table + __mapper_args__ = {'column_prefix':'_'} + +The above will place attribute names such as ``_user_id``, ``_user_name``, +``_password`` etc. on the mapped ``User`` class. + +This approach is uncommon in modern usage. For dealing with reflected +tables, a more flexible approach is to use that described in +:ref:`mapper_automated_reflection_schemes`. + + +Using column_property for column level options +----------------------------------------------- + +Options can be specified when mapping a :class:`.Column` using the +:func:`.column_property` function. This function +explicitly creates the :class:`.ColumnProperty` used by the +:func:`.mapper` to keep track of the :class:`.Column`; normally, the +:func:`.mapper` creates this automatically. Using :func:`.column_property`, +we can pass additional arguments about how we'd like the :class:`.Column` +to be mapped. Below, we pass an option ``active_history``, +which specifies that a change to this column's value should +result in the former value being loaded first:: + + from sqlalchemy.orm import column_property + + class User(Base): + __tablename__ = 'user' + + id = Column(Integer, primary_key=True) + name = column_property(Column(String(50)), active_history=True) + +:func:`.column_property` is also used to map a single attribute to +multiple columns. This use case arises when mapping to a :func:`~.expression.join` +which has attributes which are equated to each other:: + + class User(Base): + __table__ = user.join(address) + + # assign "user.id", "address.user_id" to the + # "id" attribute + id = column_property(user_table.c.id, address_table.c.user_id) + +For more examples featuring this usage, see :ref:`maptojoin`. + +Another place where :func:`.column_property` is needed is to specify SQL expressions as +mapped attributes, such as below where we create an attribute ``fullname`` +that is the string concatenation of the ``firstname`` and ``lastname`` +columns:: + + class User(Base): + __tablename__ = 'user' + id = Column(Integer, primary_key=True) + firstname = Column(String(50)) + lastname = Column(String(50)) + fullname = column_property(firstname + " " + lastname) + +See examples of this usage at :ref:`mapper_sql_expressions`. + +.. autofunction:: column_property + +.. _include_exclude_cols: + +Mapping a Subset of Table Columns +--------------------------------- + +Sometimes, a :class:`.Table` object was made available using the +reflection process described at :ref:`metadata_reflection` to load +the table's structure from the database. +For such a table that has lots of columns that don't need to be referenced +in the application, the ``include_properties`` or ``exclude_properties`` +arguments can specify that only a subset of columns should be mapped. +For example:: + + class User(Base): + __table__ = user_table + __mapper_args__ = { + 'include_properties' :['user_id', 'user_name'] + } + +...will map the ``User`` class to the ``user_table`` table, only including +the ``user_id`` and ``user_name`` columns - the rest are not referenced. +Similarly:: + + class Address(Base): + __table__ = address_table + __mapper_args__ = { + 'exclude_properties' : ['street', 'city', 'state', 'zip'] + } + +...will map the ``Address`` class to the ``address_table`` table, including +all columns present except ``street``, ``city``, ``state``, and ``zip``. + +When this mapping is used, the columns that are not included will not be +referenced in any SELECT statements emitted by :class:`.Query`, nor will there +be any mapped attribute on the mapped class which represents the column; +assigning an attribute of that name will have no effect beyond that of +a normal Python attribute assignment. + +In some cases, multiple columns may have the same name, such as when +mapping to a join of two or more tables that share some column name. +``include_properties`` and ``exclude_properties`` can also accommodate +:class:`.Column` objects to more accurately describe which columns +should be included or excluded:: + + class UserAddress(Base): + __table__ = user_table.join(addresses_table) + __mapper_args__ = { + 'exclude_properties' :[address_table.c.id], + 'primary_key' : [user_table.c.id] + } + +.. note:: + + insert and update defaults configured on individual + :class:`.Column` objects, i.e. those described at :ref:`metadata_defaults` + including those configured by the ``default``, ``update``, + ``server_default`` and ``server_onupdate`` arguments, will continue to + function normally even if those :class:`.Column` objects are not mapped. + This is because in the case of ``default`` and ``update``, the + :class:`.Column` object is still present on the underlying + :class:`.Table`, thus allowing the default functions to take place when + the ORM emits an INSERT or UPDATE, and in the case of ``server_default`` + and ``server_onupdate``, the relational database itself maintains these + functions. + + diff --git a/doc/build/orm/nonstandard_mappings.rst b/doc/build/orm/nonstandard_mappings.rst new file mode 100644 index 0000000000..b3733a1b98 --- /dev/null +++ b/doc/build/orm/nonstandard_mappings.rst @@ -0,0 +1,152 @@ +======================== +Non-Traditional Mappings +======================== + +.. _maptojoin: + +Mapping a Class against Multiple Tables +======================================== + +Mappers can be constructed against arbitrary relational units (called +*selectables*) in addition to plain tables. For example, the :func:`~.expression.join` +function creates a selectable unit comprised of +multiple tables, complete with its own composite primary key, which can be +mapped in the same way as a :class:`.Table`:: + + from sqlalchemy import Table, Column, Integer, \ + String, MetaData, join, ForeignKey + from sqlalchemy.ext.declarative import declarative_base + from sqlalchemy.orm import column_property + + metadata = MetaData() + + # define two Table objects + user_table = Table('user', metadata, + Column('id', Integer, primary_key=True), + Column('name', String), + ) + + address_table = Table('address', metadata, + Column('id', Integer, primary_key=True), + Column('user_id', Integer, ForeignKey('user.id')), + Column('email_address', String) + ) + + # define a join between them. This + # takes place across the user.id and address.user_id + # columns. + user_address_join = join(user_table, address_table) + + Base = declarative_base() + + # map to it + class AddressUser(Base): + __table__ = user_address_join + + id = column_property(user_table.c.id, address_table.c.user_id) + address_id = address_table.c.id + +In the example above, the join expresses columns for both the +``user`` and the ``address`` table. The ``user.id`` and ``address.user_id`` +columns are equated by foreign key, so in the mapping they are defined +as one attribute, ``AddressUser.id``, using :func:`.column_property` to +indicate a specialized column mapping. Based on this part of the +configuration, the mapping will copy +new primary key values from ``user.id`` into the ``address.user_id`` column +when a flush occurs. + +Additionally, the ``address.id`` column is mapped explicitly to +an attribute named ``address_id``. This is to **disambiguate** the +mapping of the ``address.id`` column from the same-named ``AddressUser.id`` +attribute, which here has been assigned to refer to the ``user`` table +combined with the ``address.user_id`` foreign key. + +The natural primary key of the above mapping is the composite of +``(user.id, address.id)``, as these are the primary key columns of the +``user`` and ``address`` table combined together. The identity of an +``AddressUser`` object will be in terms of these two values, and +is represented from an ``AddressUser`` object as +``(AddressUser.id, AddressUser.address_id)``. + + +Mapping a Class against Arbitrary Selects +========================================= + +Similar to mapping against a join, a plain :func:`~.expression.select` object can be used with a +mapper as well. The example fragment below illustrates mapping a class +called ``Customer`` to a :func:`~.expression.select` which includes a join to a +subquery:: + + from sqlalchemy import select, func + + subq = select([ + func.count(orders.c.id).label('order_count'), + func.max(orders.c.price).label('highest_order'), + orders.c.customer_id + ]).group_by(orders.c.customer_id).alias() + + customer_select = select([customers, subq]).\ + select_from( + join(customers, subq, + customers.c.id == subq.c.customer_id) + ).alias() + + class Customer(Base): + __table__ = customer_select + +Above, the full row represented by ``customer_select`` will be all the +columns of the ``customers`` table, in addition to those columns +exposed by the ``subq`` subquery, which are ``order_count``, +``highest_order``, and ``customer_id``. Mapping the ``Customer`` +class to this selectable then creates a class which will contain +those attributes. + +When the ORM persists new instances of ``Customer``, only the +``customers`` table will actually receive an INSERT. This is because the +primary key of the ``orders`` table is not represented in the mapping; the ORM +will only emit an INSERT into a table for which it has mapped the primary +key. + +.. note:: + + The practice of mapping to arbitrary SELECT statements, especially + complex ones as above, is + almost never needed; it necessarily tends to produce complex queries + which are often less efficient than that which would be produced + by direct query construction. The practice is to some degree + based on the very early history of SQLAlchemy where the :func:`.mapper` + construct was meant to represent the primary querying interface; + in modern usage, the :class:`.Query` object can be used to construct + virtually any SELECT statement, including complex composites, and should + be favored over the "map-to-selectable" approach. + +Multiple Mappers for One Class +============================== + +In modern SQLAlchemy, a particular class is only mapped by one :func:`.mapper` +at a time. The rationale here is that the :func:`.mapper` modifies the class itself, not only +persisting it towards a particular :class:`.Table`, but also *instrumenting* +attributes upon the class which are structured specifically according to the +table metadata. + +One potential use case for another mapper to exist at the same time is if we +wanted to load instances of our class not just from the immediate :class:`.Table` +to which it is mapped, but from another selectable that is a derivation of that +:class:`.Table`. To create a second mapper that only handles querying +when used explicitly, we can use the :paramref:`.mapper.non_primary` argument. +In practice, this approach is usually not needed, as we +can do this sort of thing at query time using methods such as +:meth:`.Query.select_from`, however it is useful in the rare case that we +wish to build a :func:`.relationship` to such a mapper. An example of this is +at :ref:`relationship_non_primary_mapper`. + +Another potential use is if we genuinely want instances of our class to +be persisted into different tables at different times; certain kinds of +data sharding configurations may persist a particular class into tables +that are identical in structure except for their name. For this kind of +pattern, Python offers a better approach than the complexity of mapping +the same class multiple times, which is to instead create new mapped classes +for each target table. SQLAlchemy refers to this as the "entity name" +pattern, which is described as a recipe at `Entity Name +`_. + diff --git a/doc/build/orm/persistence_techniques.rst b/doc/build/orm/persistence_techniques.rst new file mode 100644 index 0000000000..28bce6705a --- /dev/null +++ b/doc/build/orm/persistence_techniques.rst @@ -0,0 +1,164 @@ +================================= +Additional Persistence Techniques +================================= + +.. _flush_embedded_sql_expressions: + +Embedding SQL Insert/Update Expressions into a Flush +===================================================== + +This feature allows the value of a database column to be set to a SQL +expression instead of a literal value. It's especially useful for atomic +updates, calling stored procedures, etc. All you do is assign an expression to +an attribute:: + + class SomeClass(object): + pass + mapper(SomeClass, some_table) + + someobject = session.query(SomeClass).get(5) + + # set 'value' attribute to a SQL expression adding one + someobject.value = some_table.c.value + 1 + + # issues "UPDATE some_table SET value=value+1" + session.commit() + +This technique works both for INSERT and UPDATE statements. After the +flush/commit operation, the ``value`` attribute on ``someobject`` above is +expired, so that when next accessed the newly generated value will be loaded +from the database. + +.. _session_sql_expressions: + +Using SQL Expressions with Sessions +==================================== + +SQL expressions and strings can be executed via the +:class:`~sqlalchemy.orm.session.Session` within its transactional context. +This is most easily accomplished using the +:meth:`~.Session.execute` method, which returns a +:class:`~sqlalchemy.engine.ResultProxy` in the same manner as an +:class:`~sqlalchemy.engine.Engine` or +:class:`~sqlalchemy.engine.Connection`:: + + Session = sessionmaker(bind=engine) + session = Session() + + # execute a string statement + result = session.execute("select * from table where id=:id", {'id':7}) + + # execute a SQL expression construct + result = session.execute(select([mytable]).where(mytable.c.id==7)) + +The current :class:`~sqlalchemy.engine.Connection` held by the +:class:`~sqlalchemy.orm.session.Session` is accessible using the +:meth:`~.Session.connection` method:: + + connection = session.connection() + +The examples above deal with a :class:`~sqlalchemy.orm.session.Session` that's +bound to a single :class:`~sqlalchemy.engine.Engine` or +:class:`~sqlalchemy.engine.Connection`. To execute statements using a +:class:`~sqlalchemy.orm.session.Session` which is bound either to multiple +engines, or none at all (i.e. relies upon bound metadata), both +:meth:`~.Session.execute` and +:meth:`~.Session.connection` accept a ``mapper`` keyword +argument, which is passed a mapped class or +:class:`~sqlalchemy.orm.mapper.Mapper` instance, which is used to locate the +proper context for the desired engine:: + + Session = sessionmaker() + session = Session() + + # need to specify mapper or class when executing + result = session.execute("select * from table where id=:id", {'id':7}, mapper=MyMappedClass) + + result = session.execute(select([mytable], mytable.c.id==7), mapper=MyMappedClass) + + connection = session.connection(MyMappedClass) + +.. _session_partitioning: + +Partitioning Strategies +======================= + +Simple Vertical Partitioning +---------------------------- + +Vertical partitioning places different kinds of objects, or different tables, +across multiple databases:: + + engine1 = create_engine('postgresql://db1') + engine2 = create_engine('postgresql://db2') + + Session = sessionmaker(twophase=True) + + # bind User operations to engine 1, Account operations to engine 2 + Session.configure(binds={User:engine1, Account:engine2}) + + session = Session() + +Above, operations against either class will make usage of the :class:`.Engine` +linked to that class. Upon a flush operation, similar rules take place +to ensure each class is written to the right database. + +The transactions among the multiple databases can optionally be coordinated +via two phase commit, if the underlying backend supports it. See +:ref:`session_twophase` for an example. + +Custom Vertical Partitioning +---------------------------- + +More comprehensive rule-based class-level partitioning can be built by +overriding the :meth:`.Session.get_bind` method. Below we illustrate +a custom :class:`.Session` which delivers the following rules: + +1. Flush operations are delivered to the engine named ``master``. + +2. Operations on objects that subclass ``MyOtherClass`` all + occur on the ``other`` engine. + +3. Read operations for all other classes occur on a random + choice of the ``slave1`` or ``slave2`` database. + +:: + + engines = { + 'master':create_engine("sqlite:///master.db"), + 'other':create_engine("sqlite:///other.db"), + 'slave1':create_engine("sqlite:///slave1.db"), + 'slave2':create_engine("sqlite:///slave2.db"), + } + + from sqlalchemy.orm import Session, sessionmaker + import random + + class RoutingSession(Session): + def get_bind(self, mapper=None, clause=None): + if mapper and issubclass(mapper.class_, MyOtherClass): + return engines['other'] + elif self._flushing: + return engines['master'] + else: + return engines[ + random.choice(['slave1','slave2']) + ] + +The above :class:`.Session` class is plugged in using the ``class_`` +argument to :class:`.sessionmaker`:: + + Session = sessionmaker(class_=RoutingSession) + +This approach can be combined with multiple :class:`.MetaData` objects, +using an approach such as that of using the declarative ``__abstract__`` +keyword, described at :ref:`declarative_abstract`. + +Horizontal Partitioning +----------------------- + +Horizontal partitioning partitions the rows of a single table (or a set of +tables) across multiple databases. + +See the "sharding" example: :ref:`examples_sharding`. + diff --git a/doc/build/orm/query.rst b/doc/build/orm/query.rst index 5e31d710f7..1517cb997a 100644 --- a/doc/build/orm/query.rst +++ b/doc/build/orm/query.rst @@ -1,15 +1,9 @@ .. _query_api_toplevel: - -Querying -======== - -This section provides API documentation for the :class:`.Query` object and related constructs. - -For an in-depth introduction to querying with the SQLAlchemy ORM, please see the :ref:`ormtutorial_toplevel`. - - .. module:: sqlalchemy.orm +Query API +========= + The Query Object ---------------- diff --git a/doc/build/orm/relationship_api.rst b/doc/build/orm/relationship_api.rst new file mode 100644 index 0000000000..03045f6980 --- /dev/null +++ b/doc/build/orm/relationship_api.rst @@ -0,0 +1,19 @@ +.. automodule:: sqlalchemy.orm + +Relationships API +----------------- + +.. autofunction:: relationship + +.. autofunction:: backref + +.. autofunction:: relation + +.. autofunction:: dynamic_loader + +.. autofunction:: foreign + +.. autofunction:: remote + + + diff --git a/doc/build/orm/relationship_persistence.rst b/doc/build/orm/relationship_persistence.rst new file mode 100644 index 0000000000..6d2ba78823 --- /dev/null +++ b/doc/build/orm/relationship_persistence.rst @@ -0,0 +1,229 @@ +Special Relationship Persistence Patterns +========================================= + +.. _post_update: + +Rows that point to themselves / Mutually Dependent Rows +------------------------------------------------------- + +This is a very specific case where relationship() must perform an INSERT and a +second UPDATE in order to properly populate a row (and vice versa an UPDATE +and DELETE in order to delete without violating foreign key constraints). The +two use cases are: + +* A table contains a foreign key to itself, and a single row will + have a foreign key value pointing to its own primary key. +* Two tables each contain a foreign key referencing the other + table, with a row in each table referencing the other. + +For example:: + + user + --------------------------------- + user_id name related_user_id + 1 'ed' 1 + +Or:: + + widget entry + ------------------------------------------- --------------------------------- + widget_id name favorite_entry_id entry_id name widget_id + 1 'somewidget' 5 5 'someentry' 1 + +In the first case, a row points to itself. Technically, a database that uses +sequences such as PostgreSQL or Oracle can INSERT the row at once using a +previously generated value, but databases which rely upon autoincrement-style +primary key identifiers cannot. The :func:`~sqlalchemy.orm.relationship` +always assumes a "parent/child" model of row population during flush, so +unless you are populating the primary key/foreign key columns directly, +:func:`~sqlalchemy.orm.relationship` needs to use two statements. + +In the second case, the "widget" row must be inserted before any referring +"entry" rows, but then the "favorite_entry_id" column of that "widget" row +cannot be set until the "entry" rows have been generated. In this case, it's +typically impossible to insert the "widget" and "entry" rows using just two +INSERT statements; an UPDATE must be performed in order to keep foreign key +constraints fulfilled. The exception is if the foreign keys are configured as +"deferred until commit" (a feature some databases support) and if the +identifiers were populated manually (again essentially bypassing +:func:`~sqlalchemy.orm.relationship`). + +To enable the usage of a supplementary UPDATE statement, +we use the :paramref:`~.relationship.post_update` option +of :func:`.relationship`. This specifies that the linkage between the +two rows should be created using an UPDATE statement after both rows +have been INSERTED; it also causes the rows to be de-associated with +each other via UPDATE before a DELETE is emitted. The flag should +be placed on just *one* of the relationships, preferably the +many-to-one side. Below we illustrate +a complete example, including two :class:`.ForeignKey` constructs, one which +specifies :paramref:`~.ForeignKey.use_alter` to help with emitting CREATE TABLE statements:: + + from sqlalchemy import Integer, ForeignKey, Column + from sqlalchemy.ext.declarative import declarative_base + from sqlalchemy.orm import relationship + + Base = declarative_base() + + class Entry(Base): + __tablename__ = 'entry' + entry_id = Column(Integer, primary_key=True) + widget_id = Column(Integer, ForeignKey('widget.widget_id')) + name = Column(String(50)) + + class Widget(Base): + __tablename__ = 'widget' + + widget_id = Column(Integer, primary_key=True) + favorite_entry_id = Column(Integer, + ForeignKey('entry.entry_id', + use_alter=True, + name="fk_favorite_entry")) + name = Column(String(50)) + + entries = relationship(Entry, primaryjoin= + widget_id==Entry.widget_id) + favorite_entry = relationship(Entry, + primaryjoin= + favorite_entry_id==Entry.entry_id, + post_update=True) + +When a structure against the above configuration is flushed, the "widget" row will be +INSERTed minus the "favorite_entry_id" value, then all the "entry" rows will +be INSERTed referencing the parent "widget" row, and then an UPDATE statement +will populate the "favorite_entry_id" column of the "widget" table (it's one +row at a time for the time being): + +.. sourcecode:: pycon+sql + + >>> w1 = Widget(name='somewidget') + >>> e1 = Entry(name='someentry') + >>> w1.favorite_entry = e1 + >>> w1.entries = [e1] + >>> session.add_all([w1, e1]) + {sql}>>> session.commit() + BEGIN (implicit) + INSERT INTO widget (favorite_entry_id, name) VALUES (?, ?) + (None, 'somewidget') + INSERT INTO entry (widget_id, name) VALUES (?, ?) + (1, 'someentry') + UPDATE widget SET favorite_entry_id=? WHERE widget.widget_id = ? + (1, 1) + COMMIT + +An additional configuration we can specify is to supply a more +comprehensive foreign key constraint on ``Widget``, such that +it's guaranteed that ``favorite_entry_id`` refers to an ``Entry`` +that also refers to this ``Widget``. We can use a composite foreign key, +as illustrated below:: + + from sqlalchemy import Integer, ForeignKey, String, \ + Column, UniqueConstraint, ForeignKeyConstraint + from sqlalchemy.ext.declarative import declarative_base + from sqlalchemy.orm import relationship + + Base = declarative_base() + + class Entry(Base): + __tablename__ = 'entry' + entry_id = Column(Integer, primary_key=True) + widget_id = Column(Integer, ForeignKey('widget.widget_id')) + name = Column(String(50)) + __table_args__ = ( + UniqueConstraint("entry_id", "widget_id"), + ) + + class Widget(Base): + __tablename__ = 'widget' + + widget_id = Column(Integer, autoincrement='ignore_fk', primary_key=True) + favorite_entry_id = Column(Integer) + + name = Column(String(50)) + + __table_args__ = ( + ForeignKeyConstraint( + ["widget_id", "favorite_entry_id"], + ["entry.widget_id", "entry.entry_id"], + name="fk_favorite_entry", use_alter=True + ), + ) + + entries = relationship(Entry, primaryjoin= + widget_id==Entry.widget_id, + foreign_keys=Entry.widget_id) + favorite_entry = relationship(Entry, + primaryjoin= + favorite_entry_id==Entry.entry_id, + foreign_keys=favorite_entry_id, + post_update=True) + +The above mapping features a composite :class:`.ForeignKeyConstraint` +bridging the ``widget_id`` and ``favorite_entry_id`` columns. To ensure +that ``Widget.widget_id`` remains an "autoincrementing" column we specify +:paramref:`~.Column.autoincrement` to the value ``"ignore_fk"`` +on :class:`.Column`, and additionally on each +:func:`.relationship` we must limit those columns considered as part of +the foreign key for the purposes of joining and cross-population. + +.. _passive_updates: + +Mutable Primary Keys / Update Cascades +--------------------------------------- + +When the primary key of an entity changes, related items +which reference the primary key must also be updated as +well. For databases which enforce referential integrity, +it's required to use the database's ON UPDATE CASCADE +functionality in order to propagate primary key changes +to referenced foreign keys - the values cannot be out +of sync for any moment. + +For databases that don't support this, such as SQLite and +MySQL without their referential integrity options turned +on, the :paramref:`~.relationship.passive_updates` flag can +be set to ``False``, most preferably on a one-to-many or +many-to-many :func:`.relationship`, which instructs +SQLAlchemy to issue UPDATE statements individually for +objects referenced in the collection, loading them into +memory if not already locally present. The +:paramref:`~.relationship.passive_updates` flag can also be ``False`` in +conjunction with ON UPDATE CASCADE functionality, +although in that case the unit of work will be issuing +extra SELECT and UPDATE statements unnecessarily. + +A typical mutable primary key setup might look like:: + + class User(Base): + __tablename__ = 'user' + + username = Column(String(50), primary_key=True) + fullname = Column(String(100)) + + # passive_updates=False *only* needed if the database + # does not implement ON UPDATE CASCADE + addresses = relationship("Address", passive_updates=False) + + class Address(Base): + __tablename__ = 'address' + + email = Column(String(50), primary_key=True) + username = Column(String(50), + ForeignKey('user.username', onupdate="cascade") + ) + +:paramref:`~.relationship.passive_updates` is set to ``True`` by default, +indicating that ON UPDATE CASCADE is expected to be in +place in the usual case for foreign keys that expect +to have a mutating parent key. + +A :paramref:`~.relationship.passive_updates` setting of False may be configured on any +direction of relationship, i.e. one-to-many, many-to-one, +and many-to-many, although it is much more effective when +placed just on the one-to-many or many-to-many side. +Configuring the :paramref:`~.relationship.passive_updates` +to False only on the +many-to-one side will have only a partial effect, as the +unit of work searches only through the current identity +map for objects that may be referencing the one with a +mutating primary key, not throughout the database. diff --git a/doc/build/orm/relationships.rst b/doc/build/orm/relationships.rst index c65f06cbc9..6fea107a7a 100644 --- a/doc/build/orm/relationships.rst +++ b/doc/build/orm/relationships.rst @@ -10,1710 +10,14 @@ of its usage. The reference material here continues into the next section, :ref:`collections_toplevel`, which has additional detail on configuration of collections via :func:`relationship`. -.. _relationship_patterns: - -Basic Relational Patterns --------------------------- - -A quick walkthrough of the basic relational patterns. - -The imports used for each of the following sections is as follows:: - - from sqlalchemy import Table, Column, Integer, ForeignKey - from sqlalchemy.orm import relationship, backref - from sqlalchemy.ext.declarative import declarative_base - - Base = declarative_base() - - -One To Many -~~~~~~~~~~~~ - -A one to many relationship places a foreign key on the child table referencing -the parent. :func:`.relationship` is then specified on the parent, as referencing -a collection of items represented by the child:: - - class Parent(Base): - __tablename__ = 'parent' - id = Column(Integer, primary_key=True) - children = relationship("Child") - - class Child(Base): - __tablename__ = 'child' - id = Column(Integer, primary_key=True) - parent_id = Column(Integer, ForeignKey('parent.id')) - -To establish a bidirectional relationship in one-to-many, where the "reverse" -side is a many to one, specify the :paramref:`~.relationship.backref` option:: - - class Parent(Base): - __tablename__ = 'parent' - id = Column(Integer, primary_key=True) - children = relationship("Child", backref="parent") - - class Child(Base): - __tablename__ = 'child' - id = Column(Integer, primary_key=True) - parent_id = Column(Integer, ForeignKey('parent.id')) - -``Child`` will get a ``parent`` attribute with many-to-one semantics. - -Many To One -~~~~~~~~~~~~ - -Many to one places a foreign key in the parent table referencing the child. -:func:`.relationship` is declared on the parent, where a new scalar-holding -attribute will be created:: - - class Parent(Base): - __tablename__ = 'parent' - id = Column(Integer, primary_key=True) - child_id = Column(Integer, ForeignKey('child.id')) - child = relationship("Child") - - class Child(Base): - __tablename__ = 'child' - id = Column(Integer, primary_key=True) - -Bidirectional behavior is achieved by setting -:paramref:`~.relationship.backref` to the value ``"parents"``, which -will place a one-to-many collection on the ``Child`` class:: - - class Parent(Base): - __tablename__ = 'parent' - id = Column(Integer, primary_key=True) - child_id = Column(Integer, ForeignKey('child.id')) - child = relationship("Child", backref="parents") - -.. _relationships_one_to_one: - -One To One -~~~~~~~~~~~ - -One To One is essentially a bidirectional relationship with a scalar -attribute on both sides. To achieve this, the :paramref:`~.relationship.uselist` flag indicates -the placement of a scalar attribute instead of a collection on the "many" side -of the relationship. To convert one-to-many into one-to-one:: - - class Parent(Base): - __tablename__ = 'parent' - id = Column(Integer, primary_key=True) - child = relationship("Child", uselist=False, backref="parent") - - class Child(Base): - __tablename__ = 'child' - id = Column(Integer, primary_key=True) - parent_id = Column(Integer, ForeignKey('parent.id')) - -Or to turn a one-to-many backref into one-to-one, use the :func:`.backref` function -to provide arguments for the reverse side:: - - class Parent(Base): - __tablename__ = 'parent' - id = Column(Integer, primary_key=True) - child_id = Column(Integer, ForeignKey('child.id')) - child = relationship("Child", backref=backref("parent", uselist=False)) - - class Child(Base): - __tablename__ = 'child' - id = Column(Integer, primary_key=True) - -.. _relationships_many_to_many: - -Many To Many -~~~~~~~~~~~~~ - -Many to Many adds an association table between two classes. The association -table is indicated by the :paramref:`~.relationship.secondary` argument to -:func:`.relationship`. Usually, the :class:`.Table` uses the :class:`.MetaData` -object associated with the declarative base class, so that the :class:`.ForeignKey` -directives can locate the remote tables with which to link:: - - association_table = Table('association', Base.metadata, - Column('left_id', Integer, ForeignKey('left.id')), - Column('right_id', Integer, ForeignKey('right.id')) - ) - - class Parent(Base): - __tablename__ = 'left' - id = Column(Integer, primary_key=True) - children = relationship("Child", - secondary=association_table) - - class Child(Base): - __tablename__ = 'right' - id = Column(Integer, primary_key=True) - -For a bidirectional relationship, both sides of the relationship contain a -collection. The :paramref:`~.relationship.backref` keyword will automatically use -the same :paramref:`~.relationship.secondary` argument for the reverse relationship:: - - association_table = Table('association', Base.metadata, - Column('left_id', Integer, ForeignKey('left.id')), - Column('right_id', Integer, ForeignKey('right.id')) - ) - - class Parent(Base): - __tablename__ = 'left' - id = Column(Integer, primary_key=True) - children = relationship("Child", - secondary=association_table, - backref="parents") - - class Child(Base): - __tablename__ = 'right' - id = Column(Integer, primary_key=True) - -The :paramref:`~.relationship.secondary` argument of :func:`.relationship` also accepts a callable -that returns the ultimate argument, which is evaluated only when mappers are -first used. Using this, we can define the ``association_table`` at a later -point, as long as it's available to the callable after all module initialization -is complete:: - - class Parent(Base): - __tablename__ = 'left' - id = Column(Integer, primary_key=True) - children = relationship("Child", - secondary=lambda: association_table, - backref="parents") - -With the declarative extension in use, the traditional "string name of the table" -is accepted as well, matching the name of the table as stored in ``Base.metadata.tables``:: - - class Parent(Base): - __tablename__ = 'left' - id = Column(Integer, primary_key=True) - children = relationship("Child", - secondary="association", - backref="parents") - -.. _relationships_many_to_many_deletion: - -Deleting Rows from the Many to Many Table -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -A behavior which is unique to the :paramref:`~.relationship.secondary` argument to :func:`.relationship` -is that the :class:`.Table` which is specified here is automatically subject -to INSERT and DELETE statements, as objects are added or removed from the collection. -There is **no need to delete from this table manually**. The act of removing a -record from the collection will have the effect of the row being deleted on flush:: - - # row will be deleted from the "secondary" table - # automatically - myparent.children.remove(somechild) - -A question which often arises is how the row in the "secondary" table can be deleted -when the child object is handed directly to :meth:`.Session.delete`:: - - session.delete(somechild) - -There are several possibilities here: - -* If there is a :func:`.relationship` from ``Parent`` to ``Child``, but there is - **not** a reverse-relationship that links a particular ``Child`` to each ``Parent``, - SQLAlchemy will not have any awareness that when deleting this particular - ``Child`` object, it needs to maintain the "secondary" table that links it to - the ``Parent``. No delete of the "secondary" table will occur. -* If there is a relationship that links a particular ``Child`` to each ``Parent``, - suppose it's called ``Child.parents``, SQLAlchemy by default will load in - the ``Child.parents`` collection to locate all ``Parent`` objects, and remove - each row from the "secondary" table which establishes this link. Note that - this relationship does not need to be bidrectional; SQLAlchemy is strictly - looking at every :func:`.relationship` associated with the ``Child`` object - being deleted. -* A higher performing option here is to use ON DELETE CASCADE directives - with the foreign keys used by the database. Assuming the database supports - this feature, the database itself can be made to automatically delete rows in the - "secondary" table as referencing rows in "child" are deleted. SQLAlchemy - can be instructed to forego actively loading in the ``Child.parents`` - collection in this case using the :paramref:`~.relationship.passive_deletes` - directive on :func:`.relationship`; see :ref:`passive_deletes` for more details - on this. - -Note again, these behaviors are *only* relevant to the :paramref:`~.relationship.secondary` option -used with :func:`.relationship`. If dealing with association tables that -are mapped explicitly and are *not* present in the :paramref:`~.relationship.secondary` option -of a relevant :func:`.relationship`, cascade rules can be used instead -to automatically delete entities in reaction to a related entity being -deleted - see :ref:`unitofwork_cascades` for information on this feature. - - -.. _association_pattern: - -Association Object -~~~~~~~~~~~~~~~~~~ - -The association object pattern is a variant on many-to-many: it's used -when your association table contains additional columns beyond those -which are foreign keys to the left and right tables. Instead of using -the :paramref:`~.relationship.secondary` argument, you map a new class -directly to the association table. The left side of the relationship -references the association object via one-to-many, and the association -class references the right side via many-to-one. Below we illustrate -an association table mapped to the ``Association`` class which -includes a column called ``extra_data``, which is a string value that -is stored along with each association between ``Parent`` and -``Child``:: - - class Association(Base): - __tablename__ = 'association' - left_id = Column(Integer, ForeignKey('left.id'), primary_key=True) - right_id = Column(Integer, ForeignKey('right.id'), primary_key=True) - extra_data = Column(String(50)) - child = relationship("Child") - - class Parent(Base): - __tablename__ = 'left' - id = Column(Integer, primary_key=True) - children = relationship("Association") - - class Child(Base): - __tablename__ = 'right' - id = Column(Integer, primary_key=True) - -The bidirectional version adds backrefs to both relationships:: - - class Association(Base): - __tablename__ = 'association' - left_id = Column(Integer, ForeignKey('left.id'), primary_key=True) - right_id = Column(Integer, ForeignKey('right.id'), primary_key=True) - extra_data = Column(String(50)) - child = relationship("Child", backref="parent_assocs") - - class Parent(Base): - __tablename__ = 'left' - id = Column(Integer, primary_key=True) - children = relationship("Association", backref="parent") - - class Child(Base): - __tablename__ = 'right' - id = Column(Integer, primary_key=True) - -Working with the association pattern in its direct form requires that child -objects are associated with an association instance before being appended to -the parent; similarly, access from parent to child goes through the -association object:: - - # create parent, append a child via association - p = Parent() - a = Association(extra_data="some data") - a.child = Child() - p.children.append(a) - - # iterate through child objects via association, including association - # attributes - for assoc in p.children: - print assoc.extra_data - print assoc.child - -To enhance the association object pattern such that direct -access to the ``Association`` object is optional, SQLAlchemy -provides the :ref:`associationproxy_toplevel` extension. This -extension allows the configuration of attributes which will -access two "hops" with a single access, one "hop" to the -associated object, and a second to a target attribute. - -.. note:: - - When using the association object pattern, it is advisable that the - association-mapped table not be used as the - :paramref:`~.relationship.secondary` argument on a - :func:`.relationship` elsewhere, unless that :func:`.relationship` - contains the option :paramref:`~.relationship.viewonly` set to - ``True``. SQLAlchemy otherwise may attempt to emit redundant INSERT - and DELETE statements on the same table, if similar state is - detected on the related attribute as well as the associated object. - -.. _self_referential: - -Adjacency List Relationships ------------------------------ - -The **adjacency list** pattern is a common relational pattern whereby a table -contains a foreign key reference to itself. This is the most common -way to represent hierarchical data in flat tables. Other methods -include **nested sets**, sometimes called "modified preorder", -as well as **materialized path**. Despite the appeal that modified preorder -has when evaluated for its fluency within SQL queries, the adjacency list model is -probably the most appropriate pattern for the large majority of hierarchical -storage needs, for reasons of concurrency, reduced complexity, and that -modified preorder has little advantage over an application which can fully -load subtrees into the application space. - -In this example, we'll work with a single mapped -class called ``Node``, representing a tree structure:: - - class Node(Base): - __tablename__ = 'node' - id = Column(Integer, primary_key=True) - parent_id = Column(Integer, ForeignKey('node.id')) - data = Column(String(50)) - children = relationship("Node") - -With this structure, a graph such as the following:: - - root --+---> child1 - +---> child2 --+--> subchild1 - | +--> subchild2 - +---> child3 - -Would be represented with data such as:: - - id parent_id data - --- ------- ---- - 1 NULL root - 2 1 child1 - 3 1 child2 - 4 3 subchild1 - 5 3 subchild2 - 6 1 child3 - -The :func:`.relationship` configuration here works in the -same way as a "normal" one-to-many relationship, with the -exception that the "direction", i.e. whether the relationship -is one-to-many or many-to-one, is assumed by default to -be one-to-many. To establish the relationship as many-to-one, -an extra directive is added known as :paramref:`~.relationship.remote_side`, which -is a :class:`.Column` or collection of :class:`.Column` objects -that indicate those which should be considered to be "remote":: - - class Node(Base): - __tablename__ = 'node' - id = Column(Integer, primary_key=True) - parent_id = Column(Integer, ForeignKey('node.id')) - data = Column(String(50)) - parent = relationship("Node", remote_side=[id]) - -Where above, the ``id`` column is applied as the :paramref:`~.relationship.remote_side` -of the ``parent`` :func:`.relationship`, thus establishing -``parent_id`` as the "local" side, and the relationship -then behaves as a many-to-one. - -As always, both directions can be combined into a bidirectional -relationship using the :func:`.backref` function:: - - class Node(Base): - __tablename__ = 'node' - id = Column(Integer, primary_key=True) - parent_id = Column(Integer, ForeignKey('node.id')) - data = Column(String(50)) - children = relationship("Node", - backref=backref('parent', remote_side=[id]) - ) - -There are several examples included with SQLAlchemy illustrating -self-referential strategies; these include :ref:`examples_adjacencylist` and -:ref:`examples_xmlpersistence`. - -Composite Adjacency Lists -~~~~~~~~~~~~~~~~~~~~~~~~~ - -A sub-category of the adjacency list relationship is the rare -case where a particular column is present on both the "local" and -"remote" side of the join condition. An example is the ``Folder`` -class below; using a composite primary key, the ``account_id`` -column refers to itself, to indicate sub folders which are within -the same account as that of the parent; while ``folder_id`` refers -to a specific folder within that account:: - - class Folder(Base): - __tablename__ = 'folder' - __table_args__ = ( - ForeignKeyConstraint( - ['account_id', 'parent_id'], - ['folder.account_id', 'folder.folder_id']), - ) - - account_id = Column(Integer, primary_key=True) - folder_id = Column(Integer, primary_key=True) - parent_id = Column(Integer) - name = Column(String) - - parent_folder = relationship("Folder", - backref="child_folders", - remote_side=[account_id, folder_id] - ) - -Above, we pass ``account_id`` into the :paramref:`~.relationship.remote_side` list. -:func:`.relationship` recognizes that the ``account_id`` column here -is on both sides, and aligns the "remote" column along with the -``folder_id`` column, which it recognizes as uniquely present on -the "remote" side. - -.. versionadded:: 0.8 - Support for self-referential composite keys in :func:`.relationship` - where a column points to itself. - -Self-Referential Query Strategies -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Querying of self-referential structures works like any other query:: - - # get all nodes named 'child2' - session.query(Node).filter(Node.data=='child2') - -However extra care is needed when attempting to join along -the foreign key from one level of the tree to the next. In SQL, -a join from a table to itself requires that at least one side of the -expression be "aliased" so that it can be unambiguously referred to. - -Recall from :ref:`ormtutorial_aliases` in the ORM tutorial that the -:func:`.orm.aliased` construct is normally used to provide an "alias" of -an ORM entity. Joining from ``Node`` to itself using this technique -looks like: - -.. sourcecode:: python+sql - - from sqlalchemy.orm import aliased - - nodealias = aliased(Node) - {sql}session.query(Node).filter(Node.data=='subchild1').\ - join(nodealias, Node.parent).\ - filter(nodealias.data=="child2").\ - all() - SELECT node.id AS node_id, - node.parent_id AS node_parent_id, - node.data AS node_data - FROM node JOIN node AS node_1 - ON node.parent_id = node_1.id - WHERE node.data = ? - AND node_1.data = ? - ['subchild1', 'child2'] - -:meth:`.Query.join` also includes a feature known as -:paramref:`.Query.join.aliased` that can shorten the verbosity self- -referential joins, at the expense of query flexibility. This feature -performs a similar "aliasing" step to that above, without the need for -an explicit entity. Calls to :meth:`.Query.filter` and similar -subsequent to the aliased join will **adapt** the ``Node`` entity to -be that of the alias: - -.. sourcecode:: python+sql - - {sql}session.query(Node).filter(Node.data=='subchild1').\ - join(Node.parent, aliased=True).\ - filter(Node.data=='child2').\ - all() - SELECT node.id AS node_id, - node.parent_id AS node_parent_id, - node.data AS node_data - FROM node - JOIN node AS node_1 ON node_1.id = node.parent_id - WHERE node.data = ? AND node_1.data = ? - ['subchild1', 'child2'] - -To add criterion to multiple points along a longer join, add -:paramref:`.Query.join.from_joinpoint` to the additional -:meth:`~.Query.join` calls: - -.. sourcecode:: python+sql - - # get all nodes named 'subchild1' with a - # parent named 'child2' and a grandparent 'root' - {sql}session.query(Node).\ - filter(Node.data=='subchild1').\ - join(Node.parent, aliased=True).\ - filter(Node.data=='child2').\ - join(Node.parent, aliased=True, from_joinpoint=True).\ - filter(Node.data=='root').\ - all() - SELECT node.id AS node_id, - node.parent_id AS node_parent_id, - node.data AS node_data - FROM node - JOIN node AS node_1 ON node_1.id = node.parent_id - JOIN node AS node_2 ON node_2.id = node_1.parent_id - WHERE node.data = ? - AND node_1.data = ? - AND node_2.data = ? - ['subchild1', 'child2', 'root'] - -:meth:`.Query.reset_joinpoint` will also remove the "aliasing" from filtering -calls:: - - session.query(Node).\ - join(Node.children, aliased=True).\ - filter(Node.data == 'foo').\ - reset_joinpoint().\ - filter(Node.data == 'bar') - -For an example of using :paramref:`.Query.join.aliased` to -arbitrarily join along a chain of self-referential nodes, see -:ref:`examples_xmlpersistence`. - -.. _self_referential_eager_loading: - -Configuring Self-Referential Eager Loading -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Eager loading of relationships occurs using joins or outerjoins from parent to -child table during a normal query operation, such that the parent and its -immediate child collection or reference can be populated from a single SQL -statement, or a second statement for all immediate child collections. -SQLAlchemy's joined and subquery eager loading use aliased tables in all cases -when joining to related items, so are compatible with self-referential -joining. However, to use eager loading with a self-referential relationship, -SQLAlchemy needs to be told how many levels deep it should join and/or query; -otherwise the eager load will not take place at all. This depth setting is -configured via :paramref:`~.relationships.join_depth`: - -.. sourcecode:: python+sql - - class Node(Base): - __tablename__ = 'node' - id = Column(Integer, primary_key=True) - parent_id = Column(Integer, ForeignKey('node.id')) - data = Column(String(50)) - children = relationship("Node", - lazy="joined", - join_depth=2) - - {sql}session.query(Node).all() - SELECT node_1.id AS node_1_id, - node_1.parent_id AS node_1_parent_id, - node_1.data AS node_1_data, - node_2.id AS node_2_id, - node_2.parent_id AS node_2_parent_id, - node_2.data AS node_2_data, - node.id AS node_id, - node.parent_id AS node_parent_id, - node.data AS node_data - FROM node - LEFT OUTER JOIN node AS node_2 - ON node.id = node_2.parent_id - LEFT OUTER JOIN node AS node_1 - ON node_2.id = node_1.parent_id - [] - -.. _relationships_backref: - -Linking Relationships with Backref ----------------------------------- - -The :paramref:`~.relationship.backref` keyword argument was first introduced in :ref:`ormtutorial_toplevel`, and has been -mentioned throughout many of the examples here. What does it actually do ? Let's start -with the canonical ``User`` and ``Address`` scenario:: - - from sqlalchemy import Integer, ForeignKey, String, Column - from sqlalchemy.ext.declarative import declarative_base - from sqlalchemy.orm import relationship - - Base = declarative_base() - - class User(Base): - __tablename__ = 'user' - id = Column(Integer, primary_key=True) - name = Column(String) - - addresses = relationship("Address", backref="user") - - class Address(Base): - __tablename__ = 'address' - id = Column(Integer, primary_key=True) - email = Column(String) - user_id = Column(Integer, ForeignKey('user.id')) - -The above configuration establishes a collection of ``Address`` objects on ``User`` called -``User.addresses``. It also establishes a ``.user`` attribute on ``Address`` which will -refer to the parent ``User`` object. - -In fact, the :paramref:`~.relationship.backref` keyword is only a common shortcut for placing a second -:func:`.relationship` onto the ``Address`` mapping, including the establishment -of an event listener on both sides which will mirror attribute operations -in both directions. The above configuration is equivalent to:: - - from sqlalchemy import Integer, ForeignKey, String, Column - from sqlalchemy.ext.declarative import declarative_base - from sqlalchemy.orm import relationship - - Base = declarative_base() - - class User(Base): - __tablename__ = 'user' - id = Column(Integer, primary_key=True) - name = Column(String) - - addresses = relationship("Address", back_populates="user") - - class Address(Base): - __tablename__ = 'address' - id = Column(Integer, primary_key=True) - email = Column(String) - user_id = Column(Integer, ForeignKey('user.id')) - - user = relationship("User", back_populates="addresses") - -Above, we add a ``.user`` relationship to ``Address`` explicitly. On -both relationships, the :paramref:`~.relationship.back_populates` directive tells each relationship -about the other one, indicating that they should establish "bidirectional" -behavior between each other. The primary effect of this configuration -is that the relationship adds event handlers to both attributes -which have the behavior of "when an append or set event occurs here, set ourselves -onto the incoming attribute using this particular attribute name". -The behavior is illustrated as follows. Start with a ``User`` and an ``Address`` -instance. The ``.addresses`` collection is empty, and the ``.user`` attribute -is ``None``:: - - >>> u1 = User() - >>> a1 = Address() - >>> u1.addresses - [] - >>> print a1.user - None - -However, once the ``Address`` is appended to the ``u1.addresses`` collection, -both the collection and the scalar attribute have been populated:: - - >>> u1.addresses.append(a1) - >>> u1.addresses - [<__main__.Address object at 0x12a6ed0>] - >>> a1.user - <__main__.User object at 0x12a6590> - -This behavior of course works in reverse for removal operations as well, as well -as for equivalent operations on both sides. Such as -when ``.user`` is set again to ``None``, the ``Address`` object is removed -from the reverse collection:: - - >>> a1.user = None - >>> u1.addresses - [] - -The manipulation of the ``.addresses`` collection and the ``.user`` attribute -occurs entirely in Python without any interaction with the SQL database. -Without this behavior, the proper state would be apparent on both sides once the -data has been flushed to the database, and later reloaded after a commit or -expiration operation occurs. The :paramref:`~.relationship.backref`/:paramref:`~.relationship.back_populates` behavior has the advantage -that common bidirectional operations can reflect the correct state without requiring -a database round trip. - -Remember, when the :paramref:`~.relationship.backref` keyword is used on a single relationship, it's -exactly the same as if the above two relationships were created individually -using :paramref:`~.relationship.back_populates` on each. - -Backref Arguments -~~~~~~~~~~~~~~~~~~ - -We've established that the :paramref:`~.relationship.backref` keyword is merely a shortcut for building -two individual :func:`.relationship` constructs that refer to each other. Part of -the behavior of this shortcut is that certain configurational arguments applied to -the :func:`.relationship` -will also be applied to the other direction - namely those arguments that describe -the relationship at a schema level, and are unlikely to be different in the reverse -direction. The usual case -here is a many-to-many :func:`.relationship` that has a :paramref:`~.relationship.secondary` argument, -or a one-to-many or many-to-one which has a :paramref:`~.relationship.primaryjoin` argument (the -:paramref:`~.relationship.primaryjoin` argument is discussed in :ref:`relationship_primaryjoin`). Such -as if we limited the list of ``Address`` objects to those which start with "tony":: - - from sqlalchemy import Integer, ForeignKey, String, Column - from sqlalchemy.ext.declarative import declarative_base - from sqlalchemy.orm import relationship - - Base = declarative_base() - - class User(Base): - __tablename__ = 'user' - id = Column(Integer, primary_key=True) - name = Column(String) - - addresses = relationship("Address", - primaryjoin="and_(User.id==Address.user_id, " - "Address.email.startswith('tony'))", - backref="user") - - class Address(Base): - __tablename__ = 'address' - id = Column(Integer, primary_key=True) - email = Column(String) - user_id = Column(Integer, ForeignKey('user.id')) - -We can observe, by inspecting the resulting property, that both sides -of the relationship have this join condition applied:: - - >>> print User.addresses.property.primaryjoin - "user".id = address.user_id AND address.email LIKE :email_1 || '%%' - >>> - >>> print Address.user.property.primaryjoin - "user".id = address.user_id AND address.email LIKE :email_1 || '%%' - >>> - -This reuse of arguments should pretty much do the "right thing" - it -uses only arguments that are applicable, and in the case of a many-to- -many relationship, will reverse the usage of -:paramref:`~.relationship.primaryjoin` and -:paramref:`~.relationship.secondaryjoin` to correspond to the other -direction (see the example in :ref:`self_referential_many_to_many` for -this). - -It's very often the case however that we'd like to specify arguments -that are specific to just the side where we happened to place the -"backref". This includes :func:`.relationship` arguments like -:paramref:`~.relationship.lazy`, -:paramref:`~.relationship.remote_side`, -:paramref:`~.relationship.cascade` and -:paramref:`~.relationship.cascade_backrefs`. For this case we use -the :func:`.backref` function in place of a string:: - - # - from sqlalchemy.orm import backref - - class User(Base): - __tablename__ = 'user' - id = Column(Integer, primary_key=True) - name = Column(String) - - addresses = relationship("Address", - backref=backref("user", lazy="joined")) - -Where above, we placed a ``lazy="joined"`` directive only on the ``Address.user`` -side, indicating that when a query against ``Address`` is made, a join to the ``User`` -entity should be made automatically which will populate the ``.user`` attribute of each -returned ``Address``. The :func:`.backref` function formatted the arguments we gave -it into a form that is interpreted by the receiving :func:`.relationship` as additional -arguments to be applied to the new relationship it creates. - -One Way Backrefs -~~~~~~~~~~~~~~~~~ - -An unusual case is that of the "one way backref". This is where the -"back-populating" behavior of the backref is only desirable in one -direction. An example of this is a collection which contains a -filtering :paramref:`~.relationship.primaryjoin` condition. We'd -like to append items to this collection as needed, and have them -populate the "parent" object on the incoming object. However, we'd -also like to have items that are not part of the collection, but still -have the same "parent" association - these items should never be in -the collection. - -Taking our previous example, where we established a -:paramref:`~.relationship.primaryjoin` that limited the collection -only to ``Address`` objects whose email address started with the word -``tony``, the usual backref behavior is that all items populate in -both directions. We wouldn't want this behavior for a case like the -following:: - - >>> u1 = User() - >>> a1 = Address(email='mary') - >>> a1.user = u1 - >>> u1.addresses - [<__main__.Address object at 0x1411910>] - -Above, the ``Address`` object that doesn't match the criterion of "starts with 'tony'" -is present in the ``addresses`` collection of ``u1``. After these objects are flushed, -the transaction committed and their attributes expired for a re-load, the ``addresses`` -collection will hit the database on next access and no longer have this ``Address`` object -present, due to the filtering condition. But we can do away with this unwanted side -of the "backref" behavior on the Python side by using two separate :func:`.relationship` constructs, -placing :paramref:`~.relationship.back_populates` only on one side:: - - from sqlalchemy import Integer, ForeignKey, String, Column - from sqlalchemy.ext.declarative import declarative_base - from sqlalchemy.orm import relationship - - Base = declarative_base() - - class User(Base): - __tablename__ = 'user' - id = Column(Integer, primary_key=True) - name = Column(String) - addresses = relationship("Address", - primaryjoin="and_(User.id==Address.user_id, " - "Address.email.startswith('tony'))", - back_populates="user") - - class Address(Base): - __tablename__ = 'address' - id = Column(Integer, primary_key=True) - email = Column(String) - user_id = Column(Integer, ForeignKey('user.id')) - user = relationship("User") - -With the above scenario, appending an ``Address`` object to the ``.addresses`` -collection of a ``User`` will always establish the ``.user`` attribute on that -``Address``:: - - >>> u1 = User() - >>> a1 = Address(email='tony') - >>> u1.addresses.append(a1) - >>> a1.user - <__main__.User object at 0x1411850> - -However, applying a ``User`` to the ``.user`` attribute of an ``Address``, -will not append the ``Address`` object to the collection:: - - >>> a2 = Address(email='mary') - >>> a2.user = u1 - >>> a2 in u1.addresses - False - -Of course, we've disabled some of the usefulness of -:paramref:`~.relationship.backref` here, in that when we do append an -``Address`` that corresponds to the criteria of -``email.startswith('tony')``, it won't show up in the -``User.addresses`` collection until the session is flushed, and the -attributes reloaded after a commit or expire operation. While we -could consider an attribute event that checks this criterion in -Python, this starts to cross the line of duplicating too much SQL -behavior in Python. The backref behavior itself is only a slight -transgression of this philosophy - SQLAlchemy tries to keep these to a -minimum overall. - -.. _relationship_configure_joins: - -Configuring how Relationship Joins ------------------------------------- - -:func:`.relationship` will normally create a join between two tables -by examining the foreign key relationship between the two tables -to determine which columns should be compared. There are a variety -of situations where this behavior needs to be customized. - -.. _relationship_foreign_keys: - -Handling Multiple Join Paths -~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -One of the most common situations to deal with is when -there are more than one foreign key path between two tables. - -Consider a ``Customer`` class that contains two foreign keys to an ``Address`` -class:: - - from sqlalchemy import Integer, ForeignKey, String, Column - from sqlalchemy.ext.declarative import declarative_base - from sqlalchemy.orm import relationship - - Base = declarative_base() - - class Customer(Base): - __tablename__ = 'customer' - id = Column(Integer, primary_key=True) - name = Column(String) - - billing_address_id = Column(Integer, ForeignKey("address.id")) - shipping_address_id = Column(Integer, ForeignKey("address.id")) - - billing_address = relationship("Address") - shipping_address = relationship("Address") - - class Address(Base): - __tablename__ = 'address' - id = Column(Integer, primary_key=True) - street = Column(String) - city = Column(String) - state = Column(String) - zip = Column(String) - -The above mapping, when we attempt to use it, will produce the error:: - - sqlalchemy.exc.AmbiguousForeignKeysError: Could not determine join - condition between parent/child tables on relationship - Customer.billing_address - there are multiple foreign key - paths linking the tables. Specify the 'foreign_keys' argument, - providing a list of those columns which should be - counted as containing a foreign key reference to the parent table. - -The above message is pretty long. There are many potential messages -that :func:`.relationship` can return, which have been carefully tailored -to detect a variety of common configurational issues; most will suggest -the additional configuration that's needed to resolve the ambiguity -or other missing information. - -In this case, the message wants us to qualify each :func:`.relationship` -by instructing for each one which foreign key column should be considered, and -the appropriate form is as follows:: - - class Customer(Base): - __tablename__ = 'customer' - id = Column(Integer, primary_key=True) - name = Column(String) - - billing_address_id = Column(Integer, ForeignKey("address.id")) - shipping_address_id = Column(Integer, ForeignKey("address.id")) - - billing_address = relationship("Address", foreign_keys=[billing_address_id]) - shipping_address = relationship("Address", foreign_keys=[shipping_address_id]) - -Above, we specify the ``foreign_keys`` argument, which is a :class:`.Column` or list -of :class:`.Column` objects which indicate those columns to be considered "foreign", -or in other words, the columns that contain a value referring to a parent table. -Loading the ``Customer.billing_address`` relationship from a ``Customer`` -object will use the value present in ``billing_address_id`` in order to -identify the row in ``Address`` to be loaded; similarly, ``shipping_address_id`` -is used for the ``shipping_address`` relationship. The linkage of the two -columns also plays a role during persistence; the newly generated primary key -of a just-inserted ``Address`` object will be copied into the appropriate -foreign key column of an associated ``Customer`` object during a flush. - -When specifying ``foreign_keys`` with Declarative, we can also use string -names to specify, however it is important that if using a list, the **list -is part of the string**:: - - billing_address = relationship("Address", foreign_keys="[Customer.billing_address_id]") - -In this specific example, the list is not necessary in any case as there's only -one :class:`.Column` we need:: - - billing_address = relationship("Address", foreign_keys="Customer.billing_address_id") - -.. versionchanged:: 0.8 - :func:`.relationship` can resolve ambiguity between foreign key targets on the - basis of the ``foreign_keys`` argument alone; the :paramref:`~.relationship.primaryjoin` - argument is no longer needed in this situation. - -.. _relationship_primaryjoin: - -Specifying Alternate Join Conditions -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -The default behavior of :func:`.relationship` when constructing a join -is that it equates the value of primary key columns -on one side to that of foreign-key-referring columns on the other. -We can change this criterion to be anything we'd like using the -:paramref:`~.relationship.primaryjoin` -argument, as well as the :paramref:`~.relationship.secondaryjoin` -argument in the case when a "secondary" table is used. - -In the example below, using the ``User`` class -as well as an ``Address`` class which stores a street address, we -create a relationship ``boston_addresses`` which will only -load those ``Address`` objects which specify a city of "Boston":: - - from sqlalchemy import Integer, ForeignKey, String, Column - from sqlalchemy.ext.declarative import declarative_base - from sqlalchemy.orm import relationship - - Base = declarative_base() - - class User(Base): - __tablename__ = 'user' - id = Column(Integer, primary_key=True) - name = Column(String) - boston_addresses = relationship("Address", - primaryjoin="and_(User.id==Address.user_id, " - "Address.city=='Boston')") - - class Address(Base): - __tablename__ = 'address' - id = Column(Integer, primary_key=True) - user_id = Column(Integer, ForeignKey('user.id')) - - street = Column(String) - city = Column(String) - state = Column(String) - zip = Column(String) - -Within this string SQL expression, we made use of the :func:`.and_` conjunction construct to establish -two distinct predicates for the join condition - joining both the ``User.id`` and -``Address.user_id`` columns to each other, as well as limiting rows in ``Address`` -to just ``city='Boston'``. When using Declarative, rudimentary SQL functions like -:func:`.and_` are automatically available in the evaluated namespace of a string -:func:`.relationship` argument. - -The custom criteria we use in a :paramref:`~.relationship.primaryjoin` -is generally only significant when SQLAlchemy is rendering SQL in -order to load or represent this relationship. That is, it's used in -the SQL statement that's emitted in order to perform a per-attribute -lazy load, or when a join is constructed at query time, such as via -:meth:`.Query.join`, or via the eager "joined" or "subquery" styles of -loading. When in-memory objects are being manipulated, we can place -any ``Address`` object we'd like into the ``boston_addresses`` -collection, regardless of what the value of the ``.city`` attribute -is. The objects will remain present in the collection until the -attribute is expired and re-loaded from the database where the -criterion is applied. When a flush occurs, the objects inside of -``boston_addresses`` will be flushed unconditionally, assigning value -of the primary key ``user.id`` column onto the foreign-key-holding -``address.user_id`` column for each row. The ``city`` criteria has no -effect here, as the flush process only cares about synchronizing -primary key values into referencing foreign key values. - -.. _relationship_custom_foreign: - -Creating Custom Foreign Conditions -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Another element of the primary join condition is how those columns -considered "foreign" are determined. Usually, some subset -of :class:`.Column` objects will specify :class:`.ForeignKey`, or otherwise -be part of a :class:`.ForeignKeyConstraint` that's relevant to the join condition. -:func:`.relationship` looks to this foreign key status as it decides -how it should load and persist data for this relationship. However, the -:paramref:`~.relationship.primaryjoin` argument can be used to create a join condition that -doesn't involve any "schema" level foreign keys. We can combine :paramref:`~.relationship.primaryjoin` -along with :paramref:`~.relationship.foreign_keys` and :paramref:`~.relationship.remote_side` explicitly in order to -establish such a join. - -Below, a class ``HostEntry`` joins to itself, equating the string ``content`` -column to the ``ip_address`` column, which is a Postgresql type called ``INET``. -We need to use :func:`.cast` in order to cast one side of the join to the -type of the other:: - - from sqlalchemy import cast, String, Column, Integer - from sqlalchemy.orm import relationship - from sqlalchemy.dialects.postgresql import INET - - from sqlalchemy.ext.declarative import declarative_base - - Base = declarative_base() - - class HostEntry(Base): - __tablename__ = 'host_entry' - - id = Column(Integer, primary_key=True) - ip_address = Column(INET) - content = Column(String(50)) - - # relationship() using explicit foreign_keys, remote_side - parent_host = relationship("HostEntry", - primaryjoin=ip_address == cast(content, INET), - foreign_keys=content, - remote_side=ip_address - ) - -The above relationship will produce a join like:: - - SELECT host_entry.id, host_entry.ip_address, host_entry.content - FROM host_entry JOIN host_entry AS host_entry_1 - ON host_entry_1.ip_address = CAST(host_entry.content AS INET) - -An alternative syntax to the above is to use the :func:`.foreign` and -:func:`.remote` :term:`annotations`, inline within the :paramref:`~.relationship.primaryjoin` expression. -This syntax represents the annotations that :func:`.relationship` normally -applies by itself to the join condition given the :paramref:`~.relationship.foreign_keys` and -:paramref:`~.relationship.remote_side` arguments; the functions are provided in the API in the -rare case that :func:`.relationship` can't determine the exact location -of these features on its own:: - - from sqlalchemy.orm import foreign, remote - - class HostEntry(Base): - __tablename__ = 'host_entry' - - id = Column(Integer, primary_key=True) - ip_address = Column(INET) - content = Column(String(50)) - - # relationship() using explicit foreign() and remote() annotations - # in lieu of separate arguments - parent_host = relationship("HostEntry", - primaryjoin=remote(ip_address) == \ - cast(foreign(content), INET), - ) - - -.. _relationship_custom_operator: - -Using custom operators in join conditions -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Another use case for relationships is the use of custom operators, such -as Postgresql's "is contained within" ``<<`` operator when joining with -types such as :class:`.postgresql.INET` and :class:`.postgresql.CIDR`. -For custom operators we use the :meth:`.Operators.op` function:: - - inet_column.op("<<")(cidr_column) - -However, if we construct a :paramref:`~.relationship.primaryjoin` using this -operator, :func:`.relationship` will still need more information. This is because -when it examines our primaryjoin condition, it specifically looks for operators -used for **comparisons**, and this is typically a fixed list containing known -comparison operators such as ``==``, ``<``, etc. So for our custom operator -to participate in this system, we need it to register as a comparison operator -using the :paramref:`~.Operators.op.is_comparison` parameter:: - - inet_column.op("<<", is_comparison=True)(cidr_column) - -A complete example:: - - class IPA(Base): - __tablename__ = 'ip_address' - - id = Column(Integer, primary_key=True) - v4address = Column(INET) - - network = relationship("Network", - primaryjoin="IPA.v4address.op('<<', is_comparison=True)" - "(foreign(Network.v4representation))", - viewonly=True - ) - class Network(Base): - __tablename__ = 'network' - - id = Column(Integer, primary_key=True) - v4representation = Column(CIDR) - -Above, a query such as:: - - session.query(IPA).join(IPA.network) - -Will render as:: - - SELECT ip_address.id AS ip_address_id, ip_address.v4address AS ip_address_v4address - FROM ip_address JOIN network ON ip_address.v4address << network.v4representation - -.. versionadded:: 0.9.2 - Added the :paramref:`.Operators.op.is_comparison` - flag to assist in the creation of :func:`.relationship` constructs using - custom operators. - -Non-relational Comparisons / Materialized Path -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -.. warning:: this section details an experimental feature. - -Using custom expressions means we can produce unorthodox join conditions that -don't obey the usual primary/foreign key model. One such example is the -materialized path pattern, where we compare strings for overlapping path tokens -in order to produce a tree structure. - -Through careful use of :func:`.foreign` and :func:`.remote`, we can build -a relationship that effectively produces a rudimentary materialized path -system. Essentially, when :func:`.foreign` and :func:`.remote` are -on the *same* side of the comparison expression, the relationship is considered -to be "one to many"; when they are on *different* sides, the relationship -is considered to be "many to one". For the comparison we'll use here, -we'll be dealing with collections so we keep things configured as "one to many":: - - class Element(Base): - __tablename__ = 'element' - - path = Column(String, primary_key=True) - - descendants = relationship('Element', - primaryjoin= - remote(foreign(path)).like( - path.concat('/%')), - viewonly=True, - order_by=path) - -Above, if given an ``Element`` object with a path attribute of ``"/foo/bar2"``, -we seek for a load of ``Element.descendants`` to look like:: - - SELECT element.path AS element_path - FROM element - WHERE element.path LIKE ('/foo/bar2' || '/%') ORDER BY element.path - -.. versionadded:: 0.9.5 Support has been added to allow a single-column - comparison to itself within a primaryjoin condition, as well as for - primaryjoin conditions that use :meth:`.Operators.like` as the comparison - operator. - -.. _self_referential_many_to_many: - -Self-Referential Many-to-Many Relationship -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Many to many relationships can be customized by one or both of :paramref:`~.relationship.primaryjoin` -and :paramref:`~.relationship.secondaryjoin` - the latter is significant for a relationship that -specifies a many-to-many reference using the :paramref:`~.relationship.secondary` argument. -A common situation which involves the usage of :paramref:`~.relationship.primaryjoin` and :paramref:`~.relationship.secondaryjoin` -is when establishing a many-to-many relationship from a class to itself, as shown below:: - - from sqlalchemy import Integer, ForeignKey, String, Column, Table - from sqlalchemy.ext.declarative import declarative_base - from sqlalchemy.orm import relationship - - Base = declarative_base() - - node_to_node = Table("node_to_node", Base.metadata, - Column("left_node_id", Integer, ForeignKey("node.id"), primary_key=True), - Column("right_node_id", Integer, ForeignKey("node.id"), primary_key=True) - ) - - class Node(Base): - __tablename__ = 'node' - id = Column(Integer, primary_key=True) - label = Column(String) - right_nodes = relationship("Node", - secondary=node_to_node, - primaryjoin=id==node_to_node.c.left_node_id, - secondaryjoin=id==node_to_node.c.right_node_id, - backref="left_nodes" - ) - -Where above, SQLAlchemy can't know automatically which columns should connect -to which for the ``right_nodes`` and ``left_nodes`` relationships. The :paramref:`~.relationship.primaryjoin` -and :paramref:`~.relationship.secondaryjoin` arguments establish how we'd like to join to the association table. -In the Declarative form above, as we are declaring these conditions within the Python -block that corresponds to the ``Node`` class, the ``id`` variable is available directly -as the :class:`.Column` object we wish to join with. - -Alternatively, we can define the :paramref:`~.relationship.primaryjoin` -and :paramref:`~.relationship.secondaryjoin` arguments using strings, which is suitable -in the case that our configuration does not have either the ``Node.id`` column -object available yet or the ``node_to_node`` table perhaps isn't yet available. -When referring to a plain :class:`.Table` object in a declarative string, we -use the string name of the table as it is present in the :class:`.MetaData`:: - - class Node(Base): - __tablename__ = 'node' - id = Column(Integer, primary_key=True) - label = Column(String) - right_nodes = relationship("Node", - secondary="node_to_node", - primaryjoin="Node.id==node_to_node.c.left_node_id", - secondaryjoin="Node.id==node_to_node.c.right_node_id", - backref="left_nodes" - ) - -A classical mapping situation here is similar, where ``node_to_node`` can be joined -to ``node.c.id``:: - - from sqlalchemy import Integer, ForeignKey, String, Column, Table, MetaData - from sqlalchemy.orm import relationship, mapper - - metadata = MetaData() - - node_to_node = Table("node_to_node", metadata, - Column("left_node_id", Integer, ForeignKey("node.id"), primary_key=True), - Column("right_node_id", Integer, ForeignKey("node.id"), primary_key=True) - ) - - node = Table("node", metadata, - Column('id', Integer, primary_key=True), - Column('label', String) - ) - class Node(object): - pass - - mapper(Node, node, properties={ - 'right_nodes':relationship(Node, - secondary=node_to_node, - primaryjoin=node.c.id==node_to_node.c.left_node_id, - secondaryjoin=node.c.id==node_to_node.c.right_node_id, - backref="left_nodes" - )}) - - -Note that in both examples, the :paramref:`~.relationship.backref` -keyword specifies a ``left_nodes`` backref - when -:func:`.relationship` creates the second relationship in the reverse -direction, it's smart enough to reverse the -:paramref:`~.relationship.primaryjoin` and -:paramref:`~.relationship.secondaryjoin` arguments. - -.. _composite_secondary_join: - -Composite "Secondary" Joins -~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -.. note:: - - This section features some new and experimental features of SQLAlchemy. - -Sometimes, when one seeks to build a :func:`.relationship` between two tables -there is a need for more than just two or three tables to be involved in -order to join them. This is an area of :func:`.relationship` where one seeks -to push the boundaries of what's possible, and often the ultimate solution to -many of these exotic use cases needs to be hammered out on the SQLAlchemy mailing -list. - -In more recent versions of SQLAlchemy, the :paramref:`~.relationship.secondary` -parameter can be used in some of these cases in order to provide a composite -target consisting of multiple tables. Below is an example of such a -join condition (requires version 0.9.2 at least to function as is):: - - class A(Base): - __tablename__ = 'a' - - id = Column(Integer, primary_key=True) - b_id = Column(ForeignKey('b.id')) - - d = relationship("D", - secondary="join(B, D, B.d_id == D.id)." - "join(C, C.d_id == D.id)", - primaryjoin="and_(A.b_id == B.id, A.id == C.a_id)", - secondaryjoin="D.id == B.d_id", - uselist=False - ) - - class B(Base): - __tablename__ = 'b' - - id = Column(Integer, primary_key=True) - d_id = Column(ForeignKey('d.id')) - - class C(Base): - __tablename__ = 'c' - - id = Column(Integer, primary_key=True) - a_id = Column(ForeignKey('a.id')) - d_id = Column(ForeignKey('d.id')) - - class D(Base): - __tablename__ = 'd' - - id = Column(Integer, primary_key=True) - -In the above example, we provide all three of :paramref:`~.relationship.secondary`, -:paramref:`~.relationship.primaryjoin`, and :paramref:`~.relationship.secondaryjoin`, -in the declarative style referring to the named tables ``a``, ``b``, ``c``, ``d`` -directly. A query from ``A`` to ``D`` looks like: - -.. sourcecode:: python+sql - - sess.query(A).join(A.d).all() - - {opensql}SELECT a.id AS a_id, a.b_id AS a_b_id - FROM a JOIN ( - b AS b_1 JOIN d AS d_1 ON b_1.d_id = d_1.id - JOIN c AS c_1 ON c_1.d_id = d_1.id) - ON a.b_id = b_1.id AND a.id = c_1.a_id JOIN d ON d.id = b_1.d_id - -In the above example, we take advantage of being able to stuff multiple -tables into a "secondary" container, so that we can join across many -tables while still keeping things "simple" for :func:`.relationship`, in that -there's just "one" table on both the "left" and the "right" side; the -complexity is kept within the middle. - -.. versionadded:: 0.9.2 Support is improved for allowing a :func:`.join()` - construct to be used directly as the target of the :paramref:`~.relationship.secondary` - argument, including support for joins, eager joins and lazy loading, - as well as support within declarative to specify complex conditions such - as joins involving class names as targets. - -.. _relationship_non_primary_mapper: - -Relationship to Non Primary Mapper -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -In the previous section, we illustrated a technique where we used -:paramref:`~.relationship.secondary` in order to place additional -tables within a join condition. There is one complex join case where -even this technique is not sufficient; when we seek to join from ``A`` -to ``B``, making use of any number of ``C``, ``D``, etc. in between, -however there are also join conditions between ``A`` and ``B`` -*directly*. In this case, the join from ``A`` to ``B`` may be -difficult to express with just a complex -:paramref:`~.relationship.primaryjoin` condition, as the intermediary -tables may need special handling, and it is also not expressable with -a :paramref:`~.relationship.secondary` object, since the -``A->secondary->B`` pattern does not support any references between -``A`` and ``B`` directly. When this **extremely advanced** case -arises, we can resort to creating a second mapping as a target for the -relationship. This is where we use :func:`.mapper` in order to make a -mapping to a class that includes all the additional tables we need for -this join. In order to produce this mapper as an "alternative" mapping -for our class, we use the :paramref:`~.mapper.non_primary` flag. - -Below illustrates a :func:`.relationship` with a simple join from ``A`` to -``B``, however the primaryjoin condition is augmented with two additional -entities ``C`` and ``D``, which also must have rows that line up with -the rows in both ``A`` and ``B`` simultaneously:: - - class A(Base): - __tablename__ = 'a' - - id = Column(Integer, primary_key=True) - b_id = Column(ForeignKey('b.id')) - - class B(Base): - __tablename__ = 'b' - - id = Column(Integer, primary_key=True) - - class C(Base): - __tablename__ = 'c' - - id = Column(Integer, primary_key=True) - a_id = Column(ForeignKey('a.id')) - - class D(Base): - __tablename__ = 'd' - - id = Column(Integer, primary_key=True) - c_id = Column(ForeignKey('c.id')) - b_id = Column(ForeignKey('b.id')) - - # 1. set up the join() as a variable, so we can refer - # to it in the mapping multiple times. - j = join(B, D, D.b_id == B.id).join(C, C.id == D.c_id) - - # 2. Create a new mapper() to B, with non_primary=True. - # Columns in the join with the same name must be - # disambiguated within the mapping, using named properties. - B_viacd = mapper(B, j, non_primary=True, properties={ - "b_id": [j.c.b_id, j.c.d_b_id], - "d_id": j.c.d_id - }) - - A.b = relationship(B_viacd, primaryjoin=A.b_id == B_viacd.c.b_id) - -In the above case, our non-primary mapper for ``B`` will emit for -additional columns when we query; these can be ignored: - -.. sourcecode:: python+sql - - sess.query(A).join(A.b).all() - - {opensql}SELECT a.id AS a_id, a.b_id AS a_b_id - FROM a JOIN (b JOIN d ON d.b_id = b.id JOIN c ON c.id = d.c_id) ON a.b_id = b.id - - -Building Query-Enabled Properties -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Very ambitious custom join conditions may fail to be directly persistable, and -in some cases may not even load correctly. To remove the persistence part of -the equation, use the flag :paramref:`~.relationship.viewonly` on the -:func:`~sqlalchemy.orm.relationship`, which establishes it as a read-only -attribute (data written to the collection will be ignored on flush()). -However, in extreme cases, consider using a regular Python property in -conjunction with :class:`.Query` as follows: - -.. sourcecode:: python+sql - - class User(Base): - __tablename__ = 'user' - id = Column(Integer, primary_key=True) - - def _get_addresses(self): - return object_session(self).query(Address).with_parent(self).filter(...).all() - addresses = property(_get_addresses) - - -.. _post_update: - -Rows that point to themselves / Mutually Dependent Rows -------------------------------------------------------- - -This is a very specific case where relationship() must perform an INSERT and a -second UPDATE in order to properly populate a row (and vice versa an UPDATE -and DELETE in order to delete without violating foreign key constraints). The -two use cases are: - -* A table contains a foreign key to itself, and a single row will - have a foreign key value pointing to its own primary key. -* Two tables each contain a foreign key referencing the other - table, with a row in each table referencing the other. - -For example:: - - user - --------------------------------- - user_id name related_user_id - 1 'ed' 1 - -Or:: - - widget entry - ------------------------------------------- --------------------------------- - widget_id name favorite_entry_id entry_id name widget_id - 1 'somewidget' 5 5 'someentry' 1 - -In the first case, a row points to itself. Technically, a database that uses -sequences such as PostgreSQL or Oracle can INSERT the row at once using a -previously generated value, but databases which rely upon autoincrement-style -primary key identifiers cannot. The :func:`~sqlalchemy.orm.relationship` -always assumes a "parent/child" model of row population during flush, so -unless you are populating the primary key/foreign key columns directly, -:func:`~sqlalchemy.orm.relationship` needs to use two statements. - -In the second case, the "widget" row must be inserted before any referring -"entry" rows, but then the "favorite_entry_id" column of that "widget" row -cannot be set until the "entry" rows have been generated. In this case, it's -typically impossible to insert the "widget" and "entry" rows using just two -INSERT statements; an UPDATE must be performed in order to keep foreign key -constraints fulfilled. The exception is if the foreign keys are configured as -"deferred until commit" (a feature some databases support) and if the -identifiers were populated manually (again essentially bypassing -:func:`~sqlalchemy.orm.relationship`). - -To enable the usage of a supplementary UPDATE statement, -we use the :paramref:`~.relationship.post_update` option -of :func:`.relationship`. This specifies that the linkage between the -two rows should be created using an UPDATE statement after both rows -have been INSERTED; it also causes the rows to be de-associated with -each other via UPDATE before a DELETE is emitted. The flag should -be placed on just *one* of the relationships, preferably the -many-to-one side. Below we illustrate -a complete example, including two :class:`.ForeignKey` constructs, one which -specifies :paramref:`~.ForeignKey.use_alter` to help with emitting CREATE TABLE statements:: - - from sqlalchemy import Integer, ForeignKey, Column - from sqlalchemy.ext.declarative import declarative_base - from sqlalchemy.orm import relationship - - Base = declarative_base() - - class Entry(Base): - __tablename__ = 'entry' - entry_id = Column(Integer, primary_key=True) - widget_id = Column(Integer, ForeignKey('widget.widget_id')) - name = Column(String(50)) - - class Widget(Base): - __tablename__ = 'widget' - - widget_id = Column(Integer, primary_key=True) - favorite_entry_id = Column(Integer, - ForeignKey('entry.entry_id', - use_alter=True, - name="fk_favorite_entry")) - name = Column(String(50)) - - entries = relationship(Entry, primaryjoin= - widget_id==Entry.widget_id) - favorite_entry = relationship(Entry, - primaryjoin= - favorite_entry_id==Entry.entry_id, - post_update=True) - -When a structure against the above configuration is flushed, the "widget" row will be -INSERTed minus the "favorite_entry_id" value, then all the "entry" rows will -be INSERTed referencing the parent "widget" row, and then an UPDATE statement -will populate the "favorite_entry_id" column of the "widget" table (it's one -row at a time for the time being): - -.. sourcecode:: pycon+sql - - >>> w1 = Widget(name='somewidget') - >>> e1 = Entry(name='someentry') - >>> w1.favorite_entry = e1 - >>> w1.entries = [e1] - >>> session.add_all([w1, e1]) - {sql}>>> session.commit() - BEGIN (implicit) - INSERT INTO widget (favorite_entry_id, name) VALUES (?, ?) - (None, 'somewidget') - INSERT INTO entry (widget_id, name) VALUES (?, ?) - (1, 'someentry') - UPDATE widget SET favorite_entry_id=? WHERE widget.widget_id = ? - (1, 1) - COMMIT - -An additional configuration we can specify is to supply a more -comprehensive foreign key constraint on ``Widget``, such that -it's guaranteed that ``favorite_entry_id`` refers to an ``Entry`` -that also refers to this ``Widget``. We can use a composite foreign key, -as illustrated below:: - - from sqlalchemy import Integer, ForeignKey, String, \ - Column, UniqueConstraint, ForeignKeyConstraint - from sqlalchemy.ext.declarative import declarative_base - from sqlalchemy.orm import relationship - - Base = declarative_base() - - class Entry(Base): - __tablename__ = 'entry' - entry_id = Column(Integer, primary_key=True) - widget_id = Column(Integer, ForeignKey('widget.widget_id')) - name = Column(String(50)) - __table_args__ = ( - UniqueConstraint("entry_id", "widget_id"), - ) - - class Widget(Base): - __tablename__ = 'widget' - - widget_id = Column(Integer, autoincrement='ignore_fk', primary_key=True) - favorite_entry_id = Column(Integer) - - name = Column(String(50)) - - __table_args__ = ( - ForeignKeyConstraint( - ["widget_id", "favorite_entry_id"], - ["entry.widget_id", "entry.entry_id"], - name="fk_favorite_entry", use_alter=True - ), - ) - - entries = relationship(Entry, primaryjoin= - widget_id==Entry.widget_id, - foreign_keys=Entry.widget_id) - favorite_entry = relationship(Entry, - primaryjoin= - favorite_entry_id==Entry.entry_id, - foreign_keys=favorite_entry_id, - post_update=True) - -The above mapping features a composite :class:`.ForeignKeyConstraint` -bridging the ``widget_id`` and ``favorite_entry_id`` columns. To ensure -that ``Widget.widget_id`` remains an "autoincrementing" column we specify -:paramref:`~.Column.autoincrement` to the value ``"ignore_fk"`` -on :class:`.Column`, and additionally on each -:func:`.relationship` we must limit those columns considered as part of -the foreign key for the purposes of joining and cross-population. - -.. _passive_updates: - -Mutable Primary Keys / Update Cascades ---------------------------------------- - -When the primary key of an entity changes, related items -which reference the primary key must also be updated as -well. For databases which enforce referential integrity, -it's required to use the database's ON UPDATE CASCADE -functionality in order to propagate primary key changes -to referenced foreign keys - the values cannot be out -of sync for any moment. - -For databases that don't support this, such as SQLite and -MySQL without their referential integrity options turned -on, the :paramref:`~.relationship.passive_updates` flag can -be set to ``False``, most preferably on a one-to-many or -many-to-many :func:`.relationship`, which instructs -SQLAlchemy to issue UPDATE statements individually for -objects referenced in the collection, loading them into -memory if not already locally present. The -:paramref:`~.relationship.passive_updates` flag can also be ``False`` in -conjunction with ON UPDATE CASCADE functionality, -although in that case the unit of work will be issuing -extra SELECT and UPDATE statements unnecessarily. - -A typical mutable primary key setup might look like:: - - class User(Base): - __tablename__ = 'user' - - username = Column(String(50), primary_key=True) - fullname = Column(String(100)) - - # passive_updates=False *only* needed if the database - # does not implement ON UPDATE CASCADE - addresses = relationship("Address", passive_updates=False) - - class Address(Base): - __tablename__ = 'address' - - email = Column(String(50), primary_key=True) - username = Column(String(50), - ForeignKey('user.username', onupdate="cascade") - ) - -:paramref:`~.relationship.passive_updates` is set to ``True`` by default, -indicating that ON UPDATE CASCADE is expected to be in -place in the usual case for foreign keys that expect -to have a mutating parent key. - -A :paramref:`~.relationship.passive_updates` setting of False may be configured on any -direction of relationship, i.e. one-to-many, many-to-one, -and many-to-many, although it is much more effective when -placed just on the one-to-many or many-to-many side. -Configuring the :paramref:`~.relationship.passive_updates` -to False only on the -many-to-one side will have only a partial effect, as the -unit of work searches only through the current identity -map for objects that may be referencing the one with a -mutating primary key, not throughout the database. - -Relationships API ------------------ - -.. autofunction:: relationship - -.. autofunction:: backref - -.. autofunction:: relation - -.. autofunction:: dynamic_loader - -.. autofunction:: foreign - -.. autofunction:: remote - - +.. toctree:: + :maxdepth: 2 + + basic_relationships + self_referential + backref + join_conditions + collections + relationship_persistence + relationship_api diff --git a/doc/build/orm/scalar_mapping.rst b/doc/build/orm/scalar_mapping.rst new file mode 100644 index 0000000000..65efd5dbd9 --- /dev/null +++ b/doc/build/orm/scalar_mapping.rst @@ -0,0 +1,18 @@ +.. module:: sqlalchemy.orm + +=============================== +Mapping Columns and Expressions +=============================== + +The following sections discuss how table columns and SQL expressions are +mapped to individual object attributes. + +.. toctree:: + :maxdepth: 2 + + mapping_columns + mapped_sql_expr + mapped_attributes + composites + + diff --git a/doc/build/orm/self_referential.rst b/doc/build/orm/self_referential.rst new file mode 100644 index 0000000000..f6ed35fd6e --- /dev/null +++ b/doc/build/orm/self_referential.rst @@ -0,0 +1,261 @@ +.. _self_referential: + +Adjacency List Relationships +----------------------------- + +The **adjacency list** pattern is a common relational pattern whereby a table +contains a foreign key reference to itself. This is the most common +way to represent hierarchical data in flat tables. Other methods +include **nested sets**, sometimes called "modified preorder", +as well as **materialized path**. Despite the appeal that modified preorder +has when evaluated for its fluency within SQL queries, the adjacency list model is +probably the most appropriate pattern for the large majority of hierarchical +storage needs, for reasons of concurrency, reduced complexity, and that +modified preorder has little advantage over an application which can fully +load subtrees into the application space. + +In this example, we'll work with a single mapped +class called ``Node``, representing a tree structure:: + + class Node(Base): + __tablename__ = 'node' + id = Column(Integer, primary_key=True) + parent_id = Column(Integer, ForeignKey('node.id')) + data = Column(String(50)) + children = relationship("Node") + +With this structure, a graph such as the following:: + + root --+---> child1 + +---> child2 --+--> subchild1 + | +--> subchild2 + +---> child3 + +Would be represented with data such as:: + + id parent_id data + --- ------- ---- + 1 NULL root + 2 1 child1 + 3 1 child2 + 4 3 subchild1 + 5 3 subchild2 + 6 1 child3 + +The :func:`.relationship` configuration here works in the +same way as a "normal" one-to-many relationship, with the +exception that the "direction", i.e. whether the relationship +is one-to-many or many-to-one, is assumed by default to +be one-to-many. To establish the relationship as many-to-one, +an extra directive is added known as :paramref:`~.relationship.remote_side`, which +is a :class:`.Column` or collection of :class:`.Column` objects +that indicate those which should be considered to be "remote":: + + class Node(Base): + __tablename__ = 'node' + id = Column(Integer, primary_key=True) + parent_id = Column(Integer, ForeignKey('node.id')) + data = Column(String(50)) + parent = relationship("Node", remote_side=[id]) + +Where above, the ``id`` column is applied as the :paramref:`~.relationship.remote_side` +of the ``parent`` :func:`.relationship`, thus establishing +``parent_id`` as the "local" side, and the relationship +then behaves as a many-to-one. + +As always, both directions can be combined into a bidirectional +relationship using the :func:`.backref` function:: + + class Node(Base): + __tablename__ = 'node' + id = Column(Integer, primary_key=True) + parent_id = Column(Integer, ForeignKey('node.id')) + data = Column(String(50)) + children = relationship("Node", + backref=backref('parent', remote_side=[id]) + ) + +There are several examples included with SQLAlchemy illustrating +self-referential strategies; these include :ref:`examples_adjacencylist` and +:ref:`examples_xmlpersistence`. + +Composite Adjacency Lists +~~~~~~~~~~~~~~~~~~~~~~~~~ + +A sub-category of the adjacency list relationship is the rare +case where a particular column is present on both the "local" and +"remote" side of the join condition. An example is the ``Folder`` +class below; using a composite primary key, the ``account_id`` +column refers to itself, to indicate sub folders which are within +the same account as that of the parent; while ``folder_id`` refers +to a specific folder within that account:: + + class Folder(Base): + __tablename__ = 'folder' + __table_args__ = ( + ForeignKeyConstraint( + ['account_id', 'parent_id'], + ['folder.account_id', 'folder.folder_id']), + ) + + account_id = Column(Integer, primary_key=True) + folder_id = Column(Integer, primary_key=True) + parent_id = Column(Integer) + name = Column(String) + + parent_folder = relationship("Folder", + backref="child_folders", + remote_side=[account_id, folder_id] + ) + +Above, we pass ``account_id`` into the :paramref:`~.relationship.remote_side` list. +:func:`.relationship` recognizes that the ``account_id`` column here +is on both sides, and aligns the "remote" column along with the +``folder_id`` column, which it recognizes as uniquely present on +the "remote" side. + +.. versionadded:: 0.8 + Support for self-referential composite keys in :func:`.relationship` + where a column points to itself. + +Self-Referential Query Strategies +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Querying of self-referential structures works like any other query:: + + # get all nodes named 'child2' + session.query(Node).filter(Node.data=='child2') + +However extra care is needed when attempting to join along +the foreign key from one level of the tree to the next. In SQL, +a join from a table to itself requires that at least one side of the +expression be "aliased" so that it can be unambiguously referred to. + +Recall from :ref:`ormtutorial_aliases` in the ORM tutorial that the +:func:`.orm.aliased` construct is normally used to provide an "alias" of +an ORM entity. Joining from ``Node`` to itself using this technique +looks like: + +.. sourcecode:: python+sql + + from sqlalchemy.orm import aliased + + nodealias = aliased(Node) + {sql}session.query(Node).filter(Node.data=='subchild1').\ + join(nodealias, Node.parent).\ + filter(nodealias.data=="child2").\ + all() + SELECT node.id AS node_id, + node.parent_id AS node_parent_id, + node.data AS node_data + FROM node JOIN node AS node_1 + ON node.parent_id = node_1.id + WHERE node.data = ? + AND node_1.data = ? + ['subchild1', 'child2'] + +:meth:`.Query.join` also includes a feature known as +:paramref:`.Query.join.aliased` that can shorten the verbosity self- +referential joins, at the expense of query flexibility. This feature +performs a similar "aliasing" step to that above, without the need for +an explicit entity. Calls to :meth:`.Query.filter` and similar +subsequent to the aliased join will **adapt** the ``Node`` entity to +be that of the alias: + +.. sourcecode:: python+sql + + {sql}session.query(Node).filter(Node.data=='subchild1').\ + join(Node.parent, aliased=True).\ + filter(Node.data=='child2').\ + all() + SELECT node.id AS node_id, + node.parent_id AS node_parent_id, + node.data AS node_data + FROM node + JOIN node AS node_1 ON node_1.id = node.parent_id + WHERE node.data = ? AND node_1.data = ? + ['subchild1', 'child2'] + +To add criterion to multiple points along a longer join, add +:paramref:`.Query.join.from_joinpoint` to the additional +:meth:`~.Query.join` calls: + +.. sourcecode:: python+sql + + # get all nodes named 'subchild1' with a + # parent named 'child2' and a grandparent 'root' + {sql}session.query(Node).\ + filter(Node.data=='subchild1').\ + join(Node.parent, aliased=True).\ + filter(Node.data=='child2').\ + join(Node.parent, aliased=True, from_joinpoint=True).\ + filter(Node.data=='root').\ + all() + SELECT node.id AS node_id, + node.parent_id AS node_parent_id, + node.data AS node_data + FROM node + JOIN node AS node_1 ON node_1.id = node.parent_id + JOIN node AS node_2 ON node_2.id = node_1.parent_id + WHERE node.data = ? + AND node_1.data = ? + AND node_2.data = ? + ['subchild1', 'child2', 'root'] + +:meth:`.Query.reset_joinpoint` will also remove the "aliasing" from filtering +calls:: + + session.query(Node).\ + join(Node.children, aliased=True).\ + filter(Node.data == 'foo').\ + reset_joinpoint().\ + filter(Node.data == 'bar') + +For an example of using :paramref:`.Query.join.aliased` to +arbitrarily join along a chain of self-referential nodes, see +:ref:`examples_xmlpersistence`. + +.. _self_referential_eager_loading: + +Configuring Self-Referential Eager Loading +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Eager loading of relationships occurs using joins or outerjoins from parent to +child table during a normal query operation, such that the parent and its +immediate child collection or reference can be populated from a single SQL +statement, or a second statement for all immediate child collections. +SQLAlchemy's joined and subquery eager loading use aliased tables in all cases +when joining to related items, so are compatible with self-referential +joining. However, to use eager loading with a self-referential relationship, +SQLAlchemy needs to be told how many levels deep it should join and/or query; +otherwise the eager load will not take place at all. This depth setting is +configured via :paramref:`~.relationships.join_depth`: + +.. sourcecode:: python+sql + + class Node(Base): + __tablename__ = 'node' + id = Column(Integer, primary_key=True) + parent_id = Column(Integer, ForeignKey('node.id')) + data = Column(String(50)) + children = relationship("Node", + lazy="joined", + join_depth=2) + + {sql}session.query(Node).all() + SELECT node_1.id AS node_1_id, + node_1.parent_id AS node_1_parent_id, + node_1.data AS node_1_data, + node_2.id AS node_2_id, + node_2.parent_id AS node_2_parent_id, + node_2.data AS node_2_data, + node.id AS node_id, + node.parent_id AS node_parent_id, + node.data AS node_data + FROM node + LEFT OUTER JOIN node AS node_2 + ON node.id = node_2.parent_id + LEFT OUTER JOIN node AS node_1 + ON node_2.id = node_1.parent_id + [] + diff --git a/doc/build/orm/session.rst b/doc/build/orm/session.rst index 78ae1ba814..a6daed14da 100644 --- a/doc/build/orm/session.rst +++ b/doc/build/orm/session.rst @@ -11,2522 +11,14 @@ are the primary configurational interface for the ORM. Once mappings are configured, the primary usage interface for persistence operations is the :class:`.Session`. -What does the Session do ? -========================== - -In the most general sense, the :class:`~.Session` establishes all -conversations with the database and represents a "holding zone" for all the -objects which you've loaded or associated with it during its lifespan. It -provides the entrypoint to acquire a :class:`.Query` object, which sends -queries to the database using the :class:`~.Session` object's current database -connection, populating result rows into objects that are then stored in the -:class:`.Session`, inside a structure called the `Identity Map -`_ - a data structure -that maintains unique copies of each object, where "unique" means "only one -object with a particular primary key". - -The :class:`.Session` begins in an essentially stateless form. Once queries -are issued or other objects are persisted with it, it requests a connection -resource from an :class:`.Engine` that is associated either with the -:class:`.Session` itself or with the mapped :class:`.Table` objects being -operated upon. This connection represents an ongoing transaction, which -remains in effect until the :class:`.Session` is instructed to commit or roll -back its pending state. - -All changes to objects maintained by a :class:`.Session` are tracked - before -the database is queried again or before the current transaction is committed, -it **flushes** all pending changes to the database. This is known as the `Unit -of Work `_ pattern. - -When using a :class:`.Session`, it's important to note that the objects -which are associated with it are **proxy objects** to the transaction being -held by the :class:`.Session` - there are a variety of events that will cause -objects to re-access the database in order to keep synchronized. It is -possible to "detach" objects from a :class:`.Session`, and to continue using -them, though this practice has its caveats. It's intended that -usually, you'd re-associate detached objects with another :class:`.Session` when you -want to work with them again, so that they can resume their normal task of -representing database state. - -.. _session_getting: - -Getting a Session -================= - -:class:`.Session` is a regular Python class which can -be directly instantiated. However, to standardize how sessions are configured -and acquired, the :class:`.sessionmaker` class is normally -used to create a top level :class:`.Session` -configuration which can then be used throughout an application without the -need to repeat the configurational arguments. - -The usage of :class:`.sessionmaker` is illustrated below: - -.. sourcecode:: python+sql - - from sqlalchemy import create_engine - from sqlalchemy.orm import sessionmaker - - # an Engine, which the Session will use for connection - # resources - some_engine = create_engine('postgresql://scott:tiger@localhost/') - - # create a configured "Session" class - Session = sessionmaker(bind=some_engine) - - # create a Session - session = Session() - - # work with sess - myobject = MyObject('foo', 'bar') - session.add(myobject) - session.commit() - -Above, the :class:`.sessionmaker` call creates a factory for us, -which we assign to the name ``Session``. This factory, when -called, will create a new :class:`.Session` object using the configurational -arguments we've given the factory. In this case, as is typical, -we've configured the factory to specify a particular :class:`.Engine` for -connection resources. - -A typical setup will associate the :class:`.sessionmaker` with an :class:`.Engine`, -so that each :class:`.Session` generated will use this :class:`.Engine` -to acquire connection resources. This association can -be set up as in the example above, using the ``bind`` argument. - -When you write your application, place the -:class:`.sessionmaker` factory at the global level. This -factory can then -be used by the rest of the applcation as the source of new :class:`.Session` -instances, keeping the configuration for how :class:`.Session` objects -are constructed in one place. - -The :class:`.sessionmaker` factory can also be used in conjunction with -other helpers, which are passed a user-defined :class:`.sessionmaker` that -is then maintained by the helper. Some of these helpers are discussed in the -section :ref:`session_faq_whentocreate`. - -Adding Additional Configuration to an Existing sessionmaker() --------------------------------------------------------------- - -A common scenario is where the :class:`.sessionmaker` is invoked -at module import time, however the generation of one or more :class:`.Engine` -instances to be associated with the :class:`.sessionmaker` has not yet proceeded. -For this use case, the :class:`.sessionmaker` construct offers the -:meth:`.sessionmaker.configure` method, which will place additional configuration -directives into an existing :class:`.sessionmaker` that will take place -when the construct is invoked:: - - - from sqlalchemy.orm import sessionmaker - from sqlalchemy import create_engine - - # configure Session class with desired options - Session = sessionmaker() - - # later, we create the engine - engine = create_engine('postgresql://...') - - # associate it with our custom Session class - Session.configure(bind=engine) - - # work with the session - session = Session() - -Creating Ad-Hoc Session Objects with Alternate Arguments ---------------------------------------------------------- - -For the use case where an application needs to create a new :class:`.Session` with -special arguments that deviate from what is normally used throughout the application, -such as a :class:`.Session` that binds to an alternate -source of connectivity, or a :class:`.Session` that should -have other arguments such as ``expire_on_commit`` established differently from -what most of the application wants, specific arguments can be passed to the -:class:`.sessionmaker` factory's :meth:`.sessionmaker.__call__` method. -These arguments will override whatever -configurations have already been placed, such as below, where a new :class:`.Session` -is constructed against a specific :class:`.Connection`:: - - # at the module level, the global sessionmaker, - # bound to a specific Engine - Session = sessionmaker(bind=engine) - - # later, some unit of code wants to create a - # Session that is bound to a specific Connection - conn = engine.connect() - session = Session(bind=conn) - -The typical rationale for the association of a :class:`.Session` with a specific -:class:`.Connection` is that of a test fixture that maintains an external -transaction - see :ref:`session_external_transaction` for an example of this. - -Using the Session -================== - -.. _session_object_states: - -Quickie Intro to Object States ------------------------------- - -It's helpful to know the states which an instance can have within a session: - -* **Transient** - an instance that's not in a session, and is not saved to the - database; i.e. it has no database identity. The only relationship such an - object has to the ORM is that its class has a ``mapper()`` associated with - it. - -* **Pending** - when you :meth:`~.Session.add` a transient - instance, it becomes pending. It still wasn't actually flushed to the - database yet, but it will be when the next flush occurs. - -* **Persistent** - An instance which is present in the session and has a record - in the database. You get persistent instances by either flushing so that the - pending instances become persistent, or by querying the database for - existing instances (or moving persistent instances from other sessions into - your local session). - -* **Detached** - an instance which has a record in the database, but is not in - any session. There's nothing wrong with this, and you can use objects - normally when they're detached, **except** they will not be able to issue - any SQL in order to load collections or attributes which are not yet loaded, - or were marked as "expired". - -Knowing these states is important, since the -:class:`.Session` tries to be strict about ambiguous -operations (such as trying to save the same object to two different sessions -at the same time). - -Getting the Current State of an Object -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -The actual state of any mapped object can be viewed at any time using -the :func:`.inspect` system:: - - >>> from sqlalchemy import inspect - >>> insp = inspect(my_object) - >>> insp.persistent - True - -.. seealso:: - - :attr:`.InstanceState.transient` - - :attr:`.InstanceState.pending` - - :attr:`.InstanceState.persistent` - - :attr:`.InstanceState.detached` - - -.. _session_faq: - -Session Frequently Asked Questions ------------------------------------ - - -When do I make a :class:`.sessionmaker`? -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Just one time, somewhere in your application's global scope. It should be -looked upon as part of your application's configuration. If your -application has three .py files in a package, you could, for example, -place the :class:`.sessionmaker` line in your ``__init__.py`` file; from -that point on your other modules say "from mypackage import Session". That -way, everyone else just uses :class:`.Session()`, -and the configuration of that session is controlled by that central point. - -If your application starts up, does imports, but does not know what -database it's going to be connecting to, you can bind the -:class:`.Session` at the "class" level to the -engine later on, using :meth:`.sessionmaker.configure`. - -In the examples in this section, we will frequently show the -:class:`.sessionmaker` being created right above the line where we actually -invoke :class:`.Session`. But that's just for -example's sake! In reality, the :class:`.sessionmaker` would be somewhere -at the module level. The calls to instantiate :class:`.Session` -would then be placed at the point in the application where database -conversations begin. - -.. _session_faq_whentocreate: - -When do I construct a :class:`.Session`, when do I commit it, and when do I close it? -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -.. topic:: tl;dr; - - As a general rule, keep the lifecycle of the session **separate and - external** from functions and objects that access and/or manipulate - database data. - -A :class:`.Session` is typically constructed at the beginning of a logical -operation where database access is potentially anticipated. - -The :class:`.Session`, whenever it is used to talk to the database, -begins a database transaction as soon as it starts communicating. -Assuming the ``autocommit`` flag is left at its recommended default -of ``False``, this transaction remains in progress until the :class:`.Session` -is rolled back, committed, or closed. The :class:`.Session` will -begin a new transaction if it is used again, subsequent to the previous -transaction ending; from this it follows that the :class:`.Session` -is capable of having a lifespan across many transactions, though only -one at a time. We refer to these two concepts as **transaction scope** -and **session scope**. - -The implication here is that the SQLAlchemy ORM is encouraging the -developer to establish these two scopes in their application, -including not only when the scopes begin and end, but also the -expanse of those scopes, for example should a single -:class:`.Session` instance be local to the execution flow within a -function or method, should it be a global object used by the -entire application, or somewhere in between these two. - -The burden placed on the developer to determine this scope is one -area where the SQLAlchemy ORM necessarily has a strong opinion -about how the database should be used. The :term:`unit of work` pattern -is specifically one of accumulating changes over time and flushing -them periodically, keeping in-memory state in sync with what's -known to be present in a local transaction. This pattern is only -effective when meaningful transaction scopes are in place. - -It's usually not very hard to determine the best points at which -to begin and end the scope of a :class:`.Session`, though the wide -variety of application architectures possible can introduce -challenging situations. - -A common choice is to tear down the :class:`.Session` at the same -time the transaction ends, meaning the transaction and session scopes -are the same. This is a great choice to start out with as it -removes the need to consider session scope as separate from transaction -scope. - -While there's no one-size-fits-all recommendation for how transaction -scope should be determined, there are common patterns. Especially -if one is writing a web application, the choice is pretty much established. - -A web application is the easiest case because such an appication is already -constructed around a single, consistent scope - this is the **request**, -which represents an incoming request from a browser, the processing -of that request to formulate a response, and finally the delivery of that -response back to the client. Integrating web applications with the -:class:`.Session` is then the straightforward task of linking the -scope of the :class:`.Session` to that of the request. The :class:`.Session` -can be established as the request begins, or using a :term:`lazy initialization` -pattern which establishes one as soon as it is needed. The request -then proceeds, with some system in place where application logic can access -the current :class:`.Session` in a manner associated with how the actual -request object is accessed. As the request ends, the :class:`.Session` -is torn down as well, usually through the usage of event hooks provided -by the web framework. The transaction used by the :class:`.Session` -may also be committed at this point, or alternatively the application may -opt for an explicit commit pattern, only committing for those requests -where one is warranted, but still always tearing down the :class:`.Session` -unconditionally at the end. - -Some web frameworks include infrastructure to assist in the task -of aligning the lifespan of a :class:`.Session` with that of a web request. -This includes products such as `Flask-SQLAlchemy `_, -for usage in conjunction with the Flask web framework, -and `Zope-SQLAlchemy `_, -typically used with the Pyramid framework. -SQLAlchemy recommends that these products be used as available. - -In those situations where the integration libraries are not -provided or are insufficient, SQLAlchemy includes its own "helper" class known as -:class:`.scoped_session`. A tutorial on the usage of this object -is at :ref:`unitofwork_contextual`. It provides both a quick way -to associate a :class:`.Session` with the current thread, as well as -patterns to associate :class:`.Session` objects with other kinds of -scopes. - -As mentioned before, for non-web applications there is no one clear -pattern, as applications themselves don't have just one pattern -of architecture. The best strategy is to attempt to demarcate -"operations", points at which a particular thread begins to perform -a series of operations for some period of time, which can be committed -at the end. Some examples: - -* A background daemon which spawns off child forks - would want to create a :class:`.Session` local to each child - process, work with that :class:`.Session` through the life of the "job" - that the fork is handling, then tear it down when the job is completed. - -* For a command-line script, the application would create a single, global - :class:`.Session` that is established when the program begins to do its - work, and commits it right as the program is completing its task. - -* For a GUI interface-driven application, the scope of the :class:`.Session` - may best be within the scope of a user-generated event, such as a button - push. Or, the scope may correspond to explicit user interaction, such as - the user "opening" a series of records, then "saving" them. - -As a general rule, the application should manage the lifecycle of the -session *externally* to functions that deal with specific data. This is a -fundamental separation of concerns which keeps data-specific operations -agnostic of the context in which they access and manipulate that data. - -E.g. **don't do this**:: - - ### this is the **wrong way to do it** ### - - class ThingOne(object): - def go(self): - session = Session() - try: - session.query(FooBar).update({"x": 5}) - session.commit() - except: - session.rollback() - raise - - class ThingTwo(object): - def go(self): - session = Session() - try: - session.query(Widget).update({"q": 18}) - session.commit() - except: - session.rollback() - raise - - def run_my_program(): - ThingOne().go() - ThingTwo().go() - -Keep the lifecycle of the session (and usually the transaction) -**separate and external**:: - - ### this is a **better** (but not the only) way to do it ### - - class ThingOne(object): - def go(self, session): - session.query(FooBar).update({"x": 5}) - - class ThingTwo(object): - def go(self, session): - session.query(Widget).update({"q": 18}) - - def run_my_program(): - session = Session() - try: - ThingOne().go(session) - ThingTwo().go(session) - - session.commit() - except: - session.rollback() - raise - finally: - session.close() - -The advanced developer will try to keep the details of session, transaction -and exception management as far as possible from the details of the program -doing its work. For example, we can further separate concerns using a `context manager `_:: - - ### another way (but again *not the only way*) to do it ### - - from contextlib import contextmanager - - @contextmanager - def session_scope(): - """Provide a transactional scope around a series of operations.""" - session = Session() - try: - yield session - session.commit() - except: - session.rollback() - raise - finally: - session.close() - - - def run_my_program(): - with session_scope() as session: - ThingOne().go(session) - ThingTwo().go(session) - - -Is the Session a cache? -~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Yeee...no. It's somewhat used as a cache, in that it implements the -:term:`identity map` pattern, and stores objects keyed to their primary key. -However, it doesn't do any kind of query caching. This means, if you say -``session.query(Foo).filter_by(name='bar')``, even if ``Foo(name='bar')`` -is right there, in the identity map, the session has no idea about that. -It has to issue SQL to the database, get the rows back, and then when it -sees the primary key in the row, *then* it can look in the local identity -map and see that the object is already there. It's only when you say -``query.get({some primary key})`` that the -:class:`~sqlalchemy.orm.session.Session` doesn't have to issue a query. - -Additionally, the Session stores object instances using a weak reference -by default. This also defeats the purpose of using the Session as a cache. - -The :class:`.Session` is not designed to be a -global object from which everyone consults as a "registry" of objects. -That's more the job of a **second level cache**. SQLAlchemy provides -a pattern for implementing second level caching using `dogpile.cache `_, -via the :ref:`examples_caching` example. - -How can I get the :class:`~sqlalchemy.orm.session.Session` for a certain object? -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Use the :meth:`~.Session.object_session` classmethod -available on :class:`~sqlalchemy.orm.session.Session`:: - - session = Session.object_session(someobject) - -The newer :ref:`core_inspection_toplevel` system can also be used:: - - from sqlalchemy import inspect - session = inspect(someobject).session - -.. _session_faq_threadsafe: - -Is the session thread-safe? -~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -The :class:`.Session` is very much intended to be used in a -**non-concurrent** fashion, which usually means in only one thread at a -time. - -The :class:`.Session` should be used in such a way that one -instance exists for a single series of operations within a single -transaction. One expedient way to get this effect is by associating -a :class:`.Session` with the current thread (see :ref:`unitofwork_contextual` -for background). Another is to use a pattern -where the :class:`.Session` is passed between functions and is otherwise -not shared with other threads. - -The bigger point is that you should not *want* to use the session -with multiple concurrent threads. That would be like having everyone at a -restaurant all eat from the same plate. The session is a local "workspace" -that you use for a specific set of tasks; you don't want to, or need to, -share that session with other threads who are doing some other task. - -Making sure the :class:`.Session` is only used in a single concurrent thread at a time -is called a "share nothing" approach to concurrency. But actually, not -sharing the :class:`.Session` implies a more significant pattern; it -means not just the :class:`.Session` object itself, but -also **all objects that are associated with that Session**, must be kept within -the scope of a single concurrent thread. The set of mapped -objects associated with a :class:`.Session` are essentially proxies for data -within database rows accessed over a database connection, and so just like -the :class:`.Session` itself, the whole -set of objects is really just a large-scale proxy for a database connection -(or connections). Ultimately, it's mostly the DBAPI connection itself that -we're keeping away from concurrent access; but since the :class:`.Session` -and all the objects associated with it are all proxies for that DBAPI connection, -the entire graph is essentially not safe for concurrent access. - -If there are in fact multiple threads participating -in the same task, then you may consider sharing the session and its objects between -those threads; however, in this extremely unusual scenario the application would -need to ensure that a proper locking scheme is implemented so that there isn't -*concurrent* access to the :class:`.Session` or its state. A more common approach -to this situation is to maintain a single :class:`.Session` per concurrent thread, -but to instead *copy* objects from one :class:`.Session` to another, often -using the :meth:`.Session.merge` method to copy the state of an object into -a new object local to a different :class:`.Session`. - -Querying --------- - -The :meth:`~.Session.query` function takes one or more -*entities* and returns a new :class:`~sqlalchemy.orm.query.Query` object which -will issue mapper queries within the context of this Session. An entity is -defined as a mapped class, a :class:`~sqlalchemy.orm.mapper.Mapper` object, an -orm-enabled *descriptor*, or an ``AliasedClass`` object:: - - # query from a class - session.query(User).filter_by(name='ed').all() - - # query with multiple classes, returns tuples - session.query(User, Address).join('addresses').filter_by(name='ed').all() - - # query using orm-enabled descriptors - session.query(User.name, User.fullname).all() - - # query from a mapper - user_mapper = class_mapper(User) - session.query(user_mapper) - -When :class:`~sqlalchemy.orm.query.Query` returns results, each object -instantiated is stored within the identity map. When a row matches an object -which is already present, the same object is returned. In the latter case, -whether or not the row is populated onto an existing object depends upon -whether the attributes of the instance have been *expired* or not. A -default-configured :class:`~sqlalchemy.orm.session.Session` automatically -expires all instances along transaction boundaries, so that with a normally -isolated transaction, there shouldn't be any issue of instances representing -data which is stale with regards to the current transaction. - -The :class:`.Query` object is introduced in great detail in -:ref:`ormtutorial_toplevel`, and further documented in -:ref:`query_api_toplevel`. - -Adding New or Existing Items ----------------------------- - -:meth:`~.Session.add` is used to place instances in the -session. For *transient* (i.e. brand new) instances, this will have the effect -of an INSERT taking place for those instances upon the next flush. For -instances which are *persistent* (i.e. were loaded by this session), they are -already present and do not need to be added. Instances which are *detached* -(i.e. have been removed from a session) may be re-associated with a session -using this method:: - - user1 = User(name='user1') - user2 = User(name='user2') - session.add(user1) - session.add(user2) - - session.commit() # write changes to the database - -To add a list of items to the session at once, use -:meth:`~.Session.add_all`:: - - session.add_all([item1, item2, item3]) - -The :meth:`~.Session.add` operation **cascades** along -the ``save-update`` cascade. For more details see the section -:ref:`unitofwork_cascades`. - -.. _unitofwork_merging: - -Merging -------- - -:meth:`~.Session.merge` transfers state from an -outside object into a new or already existing instance within a session. It -also reconciles the incoming data against the state of the -database, producing a history stream which will be applied towards the next -flush, or alternatively can be made to produce a simple "transfer" of -state without producing change history or accessing the database. Usage is as follows:: - - merged_object = session.merge(existing_object) - -When given an instance, it follows these steps: - -* It examines the primary key of the instance. If it's present, it attempts - to locate that instance in the local identity map. If the ``load=True`` - flag is left at its default, it also checks the database for this primary - key if not located locally. -* If the given instance has no primary key, or if no instance can be found - with the primary key given, a new instance is created. -* The state of the given instance is then copied onto the located/newly - created instance. For attributes which are present on the source - instance, the value is transferred to the target instance. For mapped - attributes which aren't present on the source, the attribute is - expired on the target instance, discarding its existing value. - - If the ``load=True`` flag is left at its default, - this copy process emits events and will load the target object's - unloaded collections for each attribute present on the source object, - so that the incoming state can be reconciled against what's - present in the database. If ``load`` - is passed as ``False``, the incoming data is "stamped" directly without - producing any history. -* The operation is cascaded to related objects and collections, as - indicated by the ``merge`` cascade (see :ref:`unitofwork_cascades`). -* The new instance is returned. - -With :meth:`~.Session.merge`, the given "source" -instance is not modified nor is it associated with the target :class:`.Session`, -and remains available to be merged with any number of other :class:`.Session` -objects. :meth:`~.Session.merge` is useful for -taking the state of any kind of object structure without regard for its -origins or current session associations and copying its state into a -new session. Here's some examples: - -* An application which reads an object structure from a file and wishes to - save it to the database might parse the file, build up the - structure, and then use - :meth:`~.Session.merge` to save it - to the database, ensuring that the data within the file is - used to formulate the primary key of each element of the - structure. Later, when the file has changed, the same - process can be re-run, producing a slightly different - object structure, which can then be ``merged`` in again, - and the :class:`~sqlalchemy.orm.session.Session` will - automatically update the database to reflect those - changes, loading each object from the database by primary key and - then updating its state with the new state given. - -* An application is storing objects in an in-memory cache, shared by - many :class:`.Session` objects simultaneously. :meth:`~.Session.merge` - is used each time an object is retrieved from the cache to create - a local copy of it in each :class:`.Session` which requests it. - The cached object remains detached; only its state is moved into - copies of itself that are local to individual :class:`~.Session` - objects. - - In the caching use case, it's common to use the ``load=False`` - flag to remove the overhead of reconciling the object's state - with the database. There's also a "bulk" version of - :meth:`~.Session.merge` called :meth:`~.Query.merge_result` - that was designed to work with cache-extended :class:`.Query` - objects - see the section :ref:`examples_caching`. - -* An application wants to transfer the state of a series of objects - into a :class:`.Session` maintained by a worker thread or other - concurrent system. :meth:`~.Session.merge` makes a copy of each object - to be placed into this new :class:`.Session`. At the end of the operation, - the parent thread/process maintains the objects it started with, - and the thread/worker can proceed with local copies of those objects. - - In the "transfer between threads/processes" use case, the application - may want to use the ``load=False`` flag as well to avoid overhead and - redundant SQL queries as the data is transferred. - -Merge Tips -~~~~~~~~~~ - -:meth:`~.Session.merge` is an extremely useful method for many purposes. However, -it deals with the intricate border between objects that are transient/detached and -those that are persistent, as well as the automated transference of state. -The wide variety of scenarios that can present themselves here often require a -more careful approach to the state of objects. Common problems with merge usually involve -some unexpected state regarding the object being passed to :meth:`~.Session.merge`. - -Lets use the canonical example of the User and Address objects:: - - class User(Base): - __tablename__ = 'user' - - id = Column(Integer, primary_key=True) - name = Column(String(50), nullable=False) - addresses = relationship("Address", backref="user") - - class Address(Base): - __tablename__ = 'address' - - id = Column(Integer, primary_key=True) - email_address = Column(String(50), nullable=False) - user_id = Column(Integer, ForeignKey('user.id'), nullable=False) - -Assume a ``User`` object with one ``Address``, already persistent:: - - >>> u1 = User(name='ed', addresses=[Address(email_address='ed@ed.com')]) - >>> session.add(u1) - >>> session.commit() - -We now create ``a1``, an object outside the session, which we'd like -to merge on top of the existing ``Address``:: - - >>> existing_a1 = u1.addresses[0] - >>> a1 = Address(id=existing_a1.id) - -A surprise would occur if we said this:: - - >>> a1.user = u1 - >>> a1 = session.merge(a1) - >>> session.commit() - sqlalchemy.orm.exc.FlushError: New instance
- with identity key (, (1,)) conflicts with - persistent instance
- -Why is that ? We weren't careful with our cascades. The assignment -of ``a1.user`` to a persistent object cascaded to the backref of ``User.addresses`` -and made our ``a1`` object pending, as though we had added it. Now we have -*two* ``Address`` objects in the session:: - - >>> a1 = Address() - >>> a1.user = u1 - >>> a1 in session - True - >>> existing_a1 in session - True - >>> a1 is existing_a1 - False - -Above, our ``a1`` is already pending in the session. The -subsequent :meth:`~.Session.merge` operation essentially -does nothing. Cascade can be configured via the :paramref:`~.relationship.cascade` -option on :func:`.relationship`, although in this case it -would mean removing the ``save-update`` cascade from the -``User.addresses`` relationship - and usually, that behavior -is extremely convenient. The solution here would usually be to not assign -``a1.user`` to an object already persistent in the target -session. - -The ``cascade_backrefs=False`` option of :func:`.relationship` -will also prevent the ``Address`` from -being added to the session via the ``a1.user = u1`` assignment. - -Further detail on cascade operation is at :ref:`unitofwork_cascades`. - -Another example of unexpected state:: - - >>> a1 = Address(id=existing_a1.id, user_id=u1.id) - >>> assert a1.user is None - >>> True - >>> a1 = session.merge(a1) - >>> session.commit() - sqlalchemy.exc.IntegrityError: (IntegrityError) address.user_id - may not be NULL - -Here, we accessed a1.user, which returned its default value -of ``None``, which as a result of this access, has been placed in the ``__dict__`` of -our object ``a1``. Normally, this operation creates no change event, -so the ``user_id`` attribute takes precedence during a -flush. But when we merge the ``Address`` object into the session, the operation -is equivalent to:: - - >>> existing_a1.id = existing_a1.id - >>> existing_a1.user_id = u1.id - >>> existing_a1.user = None - -Where above, both ``user_id`` and ``user`` are assigned to, and change events -are emitted for both. The ``user`` association -takes precedence, and None is applied to ``user_id``, causing a failure. - -Most :meth:`~.Session.merge` issues can be examined by first checking - -is the object prematurely in the session ? - -.. sourcecode:: python+sql - - >>> a1 = Address(id=existing_a1, user_id=user.id) - >>> assert a1 not in session - >>> a1 = session.merge(a1) - -Or is there state on the object that we don't want ? Examining ``__dict__`` -is a quick way to check:: - - >>> a1 = Address(id=existing_a1, user_id=user.id) - >>> a1.user - >>> a1.__dict__ - {'_sa_instance_state': , - 'user_id': 1, - 'id': 1, - 'user': None} - >>> # we don't want user=None merged, remove it - >>> del a1.user - >>> a1 = session.merge(a1) - >>> # success - >>> session.commit() - -Deleting --------- - -The :meth:`~.Session.delete` method places an instance -into the Session's list of objects to be marked as deleted:: - - # mark two objects to be deleted - session.delete(obj1) - session.delete(obj2) - - # commit (or flush) - session.commit() - -.. _session_deleting_from_collections: - -Deleting from Collections -~~~~~~~~~~~~~~~~~~~~~~~~~~ - -A common confusion that arises regarding :meth:`~.Session.delete` is when -objects which are members of a collection are being deleted. While the -collection member is marked for deletion from the database, this does not -impact the collection itself in memory until the collection is expired. -Below, we illustrate that even after an ``Address`` object is marked -for deletion, it's still present in the collection associated with the -parent ``User``, even after a flush:: - - >>> address = user.addresses[1] - >>> session.delete(address) - >>> session.flush() - >>> address in user.addresses - True - -When the above session is committed, all attributes are expired. The next -access of ``user.addresses`` will re-load the collection, revealing the -desired state:: - - >>> session.commit() - >>> address in user.addresses - False - -The usual practice of deleting items within collections is to forego the usage -of :meth:`~.Session.delete` directly, and instead use cascade behavior to -automatically invoke the deletion as a result of removing the object from -the parent collection. The ``delete-orphan`` cascade accomplishes this, -as illustrated in the example below:: - - mapper(User, users_table, properties={ - 'addresses':relationship(Address, cascade="all, delete, delete-orphan") - }) - del user.addresses[1] - session.flush() - -Where above, upon removing the ``Address`` object from the ``User.addresses`` -collection, the ``delete-orphan`` cascade has the effect of marking the ``Address`` -object for deletion in the same way as passing it to :meth:`~.Session.delete`. - -See also :ref:`unitofwork_cascades` for detail on cascades. - -Deleting based on Filter Criterion -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -The caveat with ``Session.delete()`` is that you need to have an object handy -already in order to delete. The Query includes a -:func:`~sqlalchemy.orm.query.Query.delete` method which deletes based on -filtering criteria:: - - session.query(User).filter(User.id==7).delete() - -The ``Query.delete()`` method includes functionality to "expire" objects -already in the session which match the criteria. However it does have some -caveats, including that "delete" and "delete-orphan" cascades won't be fully -expressed for collections which are already loaded. See the API docs for -:meth:`~sqlalchemy.orm.query.Query.delete` for more details. - -.. _session_flushing: - -Flushing --------- - -When the :class:`~sqlalchemy.orm.session.Session` is used with its default -configuration, the flush step is nearly always done transparently. -Specifically, the flush occurs before any individual -:class:`~sqlalchemy.orm.query.Query` is issued, as well as within the -:meth:`~.Session.commit` call before the transaction is -committed. It also occurs before a SAVEPOINT is issued when -:meth:`~.Session.begin_nested` is used. - -Regardless of the autoflush setting, a flush can always be forced by issuing -:meth:`~.Session.flush`:: - - session.flush() - -The "flush-on-Query" aspect of the behavior can be disabled by constructing -:class:`.sessionmaker` with the flag ``autoflush=False``:: - - Session = sessionmaker(autoflush=False) - -Additionally, autoflush can be temporarily disabled by setting the -``autoflush`` flag at any time:: - - mysession = Session() - mysession.autoflush = False - -Some autoflush-disable recipes are available at `DisableAutoFlush -`_. - -The flush process *always* occurs within a transaction, even if the -:class:`~sqlalchemy.orm.session.Session` has been configured with -``autocommit=True``, a setting that disables the session's persistent -transactional state. If no transaction is present, -:meth:`~.Session.flush` creates its own transaction and -commits it. Any failures during flush will always result in a rollback of -whatever transaction is present. If the Session is not in ``autocommit=True`` -mode, an explicit call to :meth:`~.Session.rollback` is -required after a flush fails, even though the underlying transaction will have -been rolled back already - this is so that the overall nesting pattern of -so-called "subtransactions" is consistently maintained. - -.. _session_committing: - -Committing ----------- - -:meth:`~.Session.commit` is used to commit the current -transaction. It always issues :meth:`~.Session.flush` -beforehand to flush any remaining state to the database; this is independent -of the "autoflush" setting. If no transaction is present, it raises an error. -Note that the default behavior of the :class:`~sqlalchemy.orm.session.Session` -is that a "transaction" is always present; this behavior can be disabled by -setting ``autocommit=True``. In autocommit mode, a transaction can be -initiated by calling the :meth:`~.Session.begin` method. - -.. note:: - - The term "transaction" here refers to a transactional - construct within the :class:`.Session` itself which may be - maintaining zero or more actual database (DBAPI) transactions. An individual - DBAPI connection begins participation in the "transaction" as it is first - used to execute a SQL statement, then remains present until the session-level - "transaction" is completed. See :ref:`unitofwork_transaction` for - further detail. - -Another behavior of :meth:`~.Session.commit` is that by -default it expires the state of all instances present after the commit is -complete. This is so that when the instances are next accessed, either through -attribute access or by them being present in a -:class:`~sqlalchemy.orm.query.Query` result set, they receive the most recent -state. To disable this behavior, configure -:class:`.sessionmaker` with ``expire_on_commit=False``. - -Normally, instances loaded into the :class:`~sqlalchemy.orm.session.Session` -are never changed by subsequent queries; the assumption is that the current -transaction is isolated so the state most recently loaded is correct as long -as the transaction continues. Setting ``autocommit=True`` works against this -model to some degree since the :class:`~sqlalchemy.orm.session.Session` -behaves in exactly the same way with regard to attribute state, except no -transaction is present. - -.. _session_rollback: - -Rolling Back ------------- - -:meth:`~.Session.rollback` rolls back the current -transaction. With a default configured session, the post-rollback state of the -session is as follows: - - * All transactions are rolled back and all connections returned to the - connection pool, unless the Session was bound directly to a Connection, in - which case the connection is still maintained (but still rolled back). - * Objects which were initially in the *pending* state when they were added - to the :class:`~sqlalchemy.orm.session.Session` within the lifespan of the - transaction are expunged, corresponding to their INSERT statement being - rolled back. The state of their attributes remains unchanged. - * Objects which were marked as *deleted* within the lifespan of the - transaction are promoted back to the *persistent* state, corresponding to - their DELETE statement being rolled back. Note that if those objects were - first *pending* within the transaction, that operation takes precedence - instead. - * All objects not expunged are fully expired. - -With that state understood, the :class:`~sqlalchemy.orm.session.Session` may -safely continue usage after a rollback occurs. - -When a :meth:`~.Session.flush` fails, typically for -reasons like primary key, foreign key, or "not nullable" constraint -violations, a :meth:`~.Session.rollback` is issued -automatically (it's currently not possible for a flush to continue after a -partial failure). However, the flush process always uses its own transactional -demarcator called a *subtransaction*, which is described more fully in the -docstrings for :class:`~sqlalchemy.orm.session.Session`. What it means here is -that even though the database transaction has been rolled back, the end user -must still issue :meth:`~.Session.rollback` to fully -reset the state of the :class:`~sqlalchemy.orm.session.Session`. - -Expunging ---------- - -Expunge removes an object from the Session, sending persistent instances to -the detached state, and pending instances to the transient state: - -.. sourcecode:: python+sql - - session.expunge(obj1) - -To remove all items, call :meth:`~.Session.expunge_all` -(this method was formerly known as ``clear()``). - -Closing -------- - -The :meth:`~.Session.close` method issues a -:meth:`~.Session.expunge_all`, and :term:`releases` any -transactional/connection resources. When connections are returned to the -connection pool, transactional state is rolled back as well. - -.. _session_expire: - -Refreshing / Expiring ---------------------- - -:term:`Expiring` means that the database-persisted data held inside a series -of object attributes is erased, in such a way that when those attributes -are next accessed, a SQL query is emitted which will refresh that data from -the database. - -When we talk about expiration of data we are usually talking about an object -that is in the :term:`persistent` state. For example, if we load an object -as follows:: - - user = session.query(User).filter_by(name='user1').first() - -The above ``User`` object is persistent, and has a series of attributes -present; if we were to look inside its ``__dict__``, we'd see that state -loaded:: - - >>> user.__dict__ - { - 'id': 1, 'name': u'user1', - '_sa_instance_state': <...>, - } - -where ``id`` and ``name`` refer to those columns in the database. -``_sa_instance_state`` is a non-database-persisted value used by SQLAlchemy -internally (it refers to the :class:`.InstanceState` for the instance. -While not directly relevant to this section, if we want to get at it, -we should use the :func:`.inspect` function to access it). - -At this point, the state in our ``User`` object matches that of the loaded -database row. But upon expiring the object using a method such as -:meth:`.Session.expire`, we see that the state is removed:: - - >>> session.expire(user) - >>> user.__dict__ - {'_sa_instance_state': <...>} - -We see that while the internal "state" still hangs around, the values which -correspond to the ``id`` and ``name`` columns are gone. If we were to access -one of these columns and are watching SQL, we'd see this: - -.. sourcecode:: python+sql - - >>> print(user.name) - {opensql}SELECT user.id AS user_id, user.name AS user_name - FROM user - WHERE user.id = ? - (1,) - {stop}user1 - -Above, upon accessing the expired attribute ``user.name``, the ORM initiated -a :term:`lazy load` to retrieve the most recent state from the database, -by emitting a SELECT for the user row to which this user refers. Afterwards, -the ``__dict__`` is again populated:: - - >>> user.__dict__ - { - 'id': 1, 'name': u'user1', - '_sa_instance_state': <...>, - } - -.. note:: While we are peeking inside of ``__dict__`` in order to see a bit - of what SQLAlchemy does with object attributes, we **should not modify** - the contents of ``__dict__`` directly, at least as far as those attributes - which the SQLAlchemy ORM is maintaining (other attributes outside of SQLA's - realm are fine). This is because SQLAlchemy uses :term:`descriptors` in - order to track the changes we make to an object, and when we modify ``__dict__`` - directly, the ORM won't be able to track that we changed something. - -Another key behavior of both :meth:`~.Session.expire` and :meth:`~.Session.refresh` -is that all un-flushed changes on an object are discarded. That is, -if we were to modify an attribute on our ``User``:: - - >>> user.name = 'user2' - -but then we call :meth:`~.Session.expire` without first calling :meth:`~.Session.flush`, -our pending value of ``'user2'`` is discarded:: - - >>> session.expire(user) - >>> user.name - 'user1' - -The :meth:`~.Session.expire` method can be used to mark as "expired" all ORM-mapped -attributes for an instance:: - - # expire all ORM-mapped attributes on obj1 - session.expire(obj1) - -it can also be passed a list of string attribute names, referring to specific -attributes to be marked as expired:: - - # expire only attributes obj1.attr1, obj1.attr2 - session.expire(obj1, ['attr1', 'attr2']) - -The :meth:`~.Session.refresh` method has a similar interface, but instead -of expiring, it emits an immediate SELECT for the object's row immediately:: - - # reload all attributes on obj1 - session.refresh(obj1) - -:meth:`~.Session.refresh` also accepts a list of string attribute names, -but unlike :meth:`~.Session.expire`, expects at least one name to -be that of a column-mapped attribute:: - - # reload obj1.attr1, obj1.attr2 - session.refresh(obj1, ['attr1', 'attr2']) - -The :meth:`.Session.expire_all` method allows us to essentially call -:meth:`.Session.expire` on all objects contained within the :class:`.Session` -at once:: - - session.expire_all() - -What Actually Loads -~~~~~~~~~~~~~~~~~~~ - -The SELECT statement that's emitted when an object marked with :meth:`~.Session.expire` -or loaded with :meth:`~.Session.refresh` varies based on several factors, including: - -* The load of expired attributes is triggered from **column-mapped attributes only**. - While any kind of attribute can be marked as expired, including a - :func:`.relationship` - mapped attribute, accessing an expired :func:`.relationship` - attribute will emit a load only for that attribute, using standard - relationship-oriented lazy loading. Column-oriented attributes, even if - expired, will not load as part of this operation, and instead will load when - any column-oriented attribute is accessed. - -* :func:`.relationship`- mapped attributes will not load in response to - expired column-based attributes being accessed. - -* Regarding relationships, :meth:`~.Session.refresh` is more restrictive than - :meth:`~.Session.expire` with regards to attributes that aren't column-mapped. - Calling :meth:`.refresh` and passing a list of names that only includes - relationship-mapped attributes will actually raise an error. - In any case, non-eager-loading :func:`.relationship` attributes will not be - included in any refresh operation. - -* :func:`.relationship` attributes configured as "eager loading" via the - :paramref:`~.relationship.lazy` parameter will load in the case of - :meth:`~.Session.refresh`, if either no attribute names are specified, or - if their names are inclued in the list of attributes to be - refreshed. - -* Attributes that are configured as :func:`.deferred` will not normally load, - during either the expired-attribute load or during a refresh. - An unloaded attribute that's :func:`.deferred` instead loads on its own when directly - accessed, or if part of a "group" of deferred attributes where an unloaded - attribute in that group is accessed. - -* For expired attributes that are loaded on access, a joined-inheritance table - mapping will emit a SELECT that typically only includes those tables for which - unloaded attributes are present. The action here is sophisticated enough - to load only the parent or child table, for example, if the subset of columns - that were originally expired encompass only one or the other of those tables. - -* When :meth:`~.Session.refresh` is used on a joined-inheritance table mapping, - the SELECT emitted will resemble that of when :meth:`.Session.query` is - used on the target object's class. This is typically all those tables that - are set up as part of the mapping. - - -When to Expire or Refresh -~~~~~~~~~~~~~~~~~~~~~~~~~~ - -The :class:`.Session` uses the expiration feature automatically whenever -the transaction referred to by the session ends. Meaning, whenever :meth:`.Session.commit` -or :meth:`.Session.rollback` is called, all objects within the :class:`.Session` -are expired, using a feature equivalent to that of the :meth:`.Session.expire_all` -method. The rationale is that the end of a transaction is a -demarcating point at which there is no more context available in order to know -what the current state of the database is, as any number of other transactions -may be affecting it. Only when a new transaction starts can we again have access -to the current state of the database, at which point any number of changes -may have occurred. - -.. sidebar:: Transaction Isolation - - Of course, most databases are capable of handling - multiple transactions at once, even involving the same rows of data. When - a relational database handles multiple transactions involving the same - tables or rows, this is when the :term:`isolation` aspect of the database comes - into play. The isolation behavior of different databases varies considerably - and even on a single database can be configured to behave in different ways - (via the so-called :term:`isolation level` setting). In that sense, the :class:`.Session` - can't fully predict when the same SELECT statement, emitted a second time, - will definitely return the data we already have, or will return new data. - So as a best guess, it assumes that within the scope of a transaction, unless - it is known that a SQL expression has been emitted to modify a particular row, - there's no need to refresh a row unless explicitly told to do so. - -The :meth:`.Session.expire` and :meth:`.Session.refresh` methods are used in -those cases when one wants to force an object to re-load its data from the -database, in those cases when it is known that the current state of data -is possibly stale. Reasons for this might include: - -* some SQL has been emitted within the transaction outside of the - scope of the ORM's object handling, such as if a :meth:`.Table.update` construct - were emitted using the :meth:`.Session.execute` method; - -* if the application - is attempting to acquire data that is known to have been modified in a - concurrent transaction, and it is also known that the isolation rules in effect - allow this data to be visible. - -The second bullet has the important caveat that "it is also known that the isolation rules in effect -allow this data to be visible." This means that it cannot be assumed that an -UPDATE that happened on another database connection will yet be visible here -locally; in many cases, it will not. This is why if one wishes to use -:meth:`.expire` or :meth:`.refresh` in order to view data between ongoing -transactions, an understanding of the isolation behavior in effect is essential. - -.. seealso:: - - :meth:`.Session.expire` - - :meth:`.Session.expire_all` - - :meth:`.Session.refresh` - - :term:`isolation` - glossary explanation of isolation which includes links - to Wikipedia. - - `The SQLAlchemy Session In-Depth `_ - a video + slides with an in-depth discussion of the object - lifecycle including the role of data expiration. - - -Session Attributes ------------------- - -The :class:`~sqlalchemy.orm.session.Session` itself acts somewhat like a -set-like collection. All items present may be accessed using the iterator -interface:: - - for obj in session: - print obj - -And presence may be tested for using regular "contains" semantics:: - - if obj in session: - print "Object is present" - -The session is also keeping track of all newly created (i.e. pending) objects, -all objects which have had changes since they were last loaded or saved (i.e. -"dirty"), and everything that's been marked as deleted:: - - # pending objects recently added to the Session - session.new - - # persistent objects which currently have changes detected - # (this collection is now created on the fly each time the property is called) - session.dirty - - # persistent objects that have been marked as deleted via session.delete(obj) - session.deleted - - # dictionary of all persistent objects, keyed on their - # identity key - session.identity_map - -(Documentation: :attr:`.Session.new`, :attr:`.Session.dirty`, -:attr:`.Session.deleted`, :attr:`.Session.identity_map`). - -Note that objects within the session are by default *weakly referenced*. This -means that when they are dereferenced in the outside application, they fall -out of scope from within the :class:`~sqlalchemy.orm.session.Session` as well -and are subject to garbage collection by the Python interpreter. The -exceptions to this include objects which are pending, objects which are marked -as deleted, or persistent objects which have pending changes on them. After a -full flush, these collections are all empty, and all objects are again weakly -referenced. To disable the weak referencing behavior and force all objects -within the session to remain until explicitly expunged, configure -:class:`.sessionmaker` with the ``weak_identity_map=False`` -setting. - -.. _unitofwork_cascades: - -Cascades -======== - -Mappers support the concept of configurable :term:`cascade` behavior on -:func:`~sqlalchemy.orm.relationship` constructs. This refers -to how operations performed on a "parent" object relative to a -particular :class:`.Session` should be propagated to items -referred to by that relationship (e.g. "child" objects), and is -affected by the :paramref:`.relationship.cascade` option. - -The default behavior of cascade is limited to cascades of the -so-called :ref:`cascade_save_update` and :ref:`cascade_merge` settings. -The typical "alternative" setting for cascade is to add -the :ref:`cascade_delete` and :ref:`cascade_delete_orphan` options; -these settings are appropriate for related objects which only exist as -long as they are attached to their parent, and are otherwise deleted. - -Cascade behavior is configured using the by changing the -:paramref:`~.relationship.cascade` option on -:func:`~sqlalchemy.orm.relationship`:: - - class Order(Base): - __tablename__ = 'order' - - items = relationship("Item", cascade="all, delete-orphan") - customer = relationship("User", cascade="save-update") - -To set cascades on a backref, the same flag can be used with the -:func:`~.sqlalchemy.orm.backref` function, which ultimately feeds -its arguments back into :func:`~sqlalchemy.orm.relationship`:: - - class Item(Base): - __tablename__ = 'item' - - order = relationship("Order", - backref=backref("items", cascade="all, delete-orphan") - ) - -.. sidebar:: The Origins of Cascade - - SQLAlchemy's notion of cascading behavior on relationships, - as well as the options to configure them, are primarily derived - from the similar feature in the Hibernate ORM; Hibernate refers - to "cascade" in a few places such as in - `Example: Parent/Child `_. - If cascades are confusing, we'll refer to their conclusion, - stating "The sections we have just covered can be a bit confusing. - However, in practice, it all works out nicely." - -The default value of :paramref:`~.relationship.cascade` is ``save-update, merge``. -The typical alternative setting for this parameter is either -``all`` or more commonly ``all, delete-orphan``. The ``all`` symbol -is a synonym for ``save-update, merge, refresh-expire, expunge, delete``, -and using it in conjunction with ``delete-orphan`` indicates that the child -object should follow along with its parent in all cases, and be deleted once -it is no longer associated with that parent. - -The list of available values which can be specified for -the :paramref:`~.relationship.cascade` parameter are described in the following subsections. - -.. _cascade_save_update: - -save-update ------------ - -``save-update`` cascade indicates that when an object is placed into a -:class:`.Session` via :meth:`.Session.add`, all the objects associated -with it via this :func:`.relationship` should also be added to that -same :class:`.Session`. Suppose we have an object ``user1`` with two -related objects ``address1``, ``address2``:: - - >>> user1 = User() - >>> address1, address2 = Address(), Address() - >>> user1.addresses = [address1, address2] - -If we add ``user1`` to a :class:`.Session`, it will also add -``address1``, ``address2`` implicitly:: - - >>> sess = Session() - >>> sess.add(user1) - >>> address1 in sess - True - -``save-update`` cascade also affects attribute operations for objects -that are already present in a :class:`.Session`. If we add a third -object, ``address3`` to the ``user1.addresses`` collection, it -becomes part of the state of that :class:`.Session`:: - - >>> address3 = Address() - >>> user1.append(address3) - >>> address3 in sess - >>> True - -``save-update`` has the possibly surprising behavior which is that -persistent objects which were *removed* from a collection -or in some cases a scalar attribute -may also be pulled into the :class:`.Session` of a parent object; this is -so that the flush process may handle that related object appropriately. -This case can usually only arise if an object is removed from one :class:`.Session` -and added to another:: - - >>> user1 = sess1.query(User).filter_by(id=1).first() - >>> address1 = user1.addresses[0] - >>> sess1.close() # user1, address1 no longer associated with sess1 - >>> user1.addresses.remove(address1) # address1 no longer associated with user1 - >>> sess2 = Session() - >>> sess2.add(user1) # ... but it still gets added to the new session, - >>> address1 in sess2 # because it's still "pending" for flush - True - -The ``save-update`` cascade is on by default, and is typically taken -for granted; it simplifies code by allowing a single call to -:meth:`.Session.add` to register an entire structure of objects within -that :class:`.Session` at once. While it can be disabled, there -is usually not a need to do so. - -One case where ``save-update`` cascade does sometimes get in the way is in that -it takes place in both directions for bi-directional relationships, e.g. -backrefs, meaning that the association of a child object with a particular parent -can have the effect of the parent object being implicitly associated with that -child object's :class:`.Session`; this pattern, as well as how to modify its -behavior using the :paramref:`~.relationship.cascade_backrefs` flag, -is discussed in the section :ref:`backref_cascade`. - -.. _cascade_delete: - -delete ------- - -The ``delete`` cascade indicates that when a "parent" object -is marked for deletion, its related "child" objects should also be marked -for deletion. If for example we we have a relationship ``User.addresses`` -with ``delete`` cascade configured:: - - class User(Base): - # ... - - addresses = relationship("Address", cascade="save-update, merge, delete") - -If using the above mapping, we have a ``User`` object and two -related ``Address`` objects:: - - >>> user1 = sess.query(User).filter_by(id=1).first() - >>> address1, address2 = user1.addresses - -If we mark ``user1`` for deletion, after the flush operation proceeds, -``address1`` and ``address2`` will also be deleted: - -.. sourcecode:: python+sql - - >>> sess.delete(user1) - >>> sess.commit() - {opensql}DELETE FROM address WHERE address.id = ? - ((1,), (2,)) - DELETE FROM user WHERE user.id = ? - (1,) - COMMIT - -Alternatively, if our ``User.addresses`` relationship does *not* have -``delete`` cascade, SQLAlchemy's default behavior is to instead de-associate -``address1`` and ``address2`` from ``user1`` by setting their foreign key -reference to ``NULL``. Using a mapping as follows:: - - class User(Base): - # ... - - addresses = relationship("Address") - -Upon deletion of a parent ``User`` object, the rows in ``address`` are not -deleted, but are instead de-associated: - -.. sourcecode:: python+sql - - >>> sess.delete(user1) - >>> sess.commit() - {opensql}UPDATE address SET user_id=? WHERE address.id = ? - (None, 1) - UPDATE address SET user_id=? WHERE address.id = ? - (None, 2) - DELETE FROM user WHERE user.id = ? - (1,) - COMMIT - -``delete`` cascade is more often than not used in conjunction with -:ref:`cascade_delete_orphan` cascade, which will emit a DELETE for the related -row if the "child" object is deassociated from the parent. The combination -of ``delete`` and ``delete-orphan`` cascade covers both situations where -SQLAlchemy has to decide between setting a foreign key column to NULL versus -deleting the row entirely. - -.. topic:: ORM-level "delete" cascade vs. FOREIGN KEY level "ON DELETE" cascade - - The behavior of SQLAlchemy's "delete" cascade has a lot of overlap with the - ``ON DELETE CASCADE`` feature of a database foreign key, as well - as with that of the ``ON DELETE SET NULL`` foreign key setting when "delete" - cascade is not specified. Database level "ON DELETE" cascades are specific to the - "FOREIGN KEY" construct of the relational database; SQLAlchemy allows - configuration of these schema-level constructs at the :term:`DDL` level - using options on :class:`.ForeignKeyConstraint` which are described - at :ref:`on_update_on_delete`. - - It is important to note the differences between the ORM and the relational - database's notion of "cascade" as well as how they integrate: - - * A database level ``ON DELETE`` cascade is configured effectively - on the **many-to-one** side of the relationship; that is, we configure - it relative to the ``FOREIGN KEY`` constraint that is the "many" side - of a relationship. At the ORM level, **this direction is reversed**. - SQLAlchemy handles the deletion of "child" objects relative to a - "parent" from the "parent" side, which means that ``delete`` and - ``delete-orphan`` cascade are configured on the **one-to-many** - side. - - * Database level foreign keys with no ``ON DELETE`` setting - are often used to **prevent** a parent - row from being removed, as it would necessarily leave an unhandled - related row present. If this behavior is desired in a one-to-many - relationship, SQLAlchemy's default behavior of setting a foreign key - to ``NULL`` can be caught in one of two ways: - - * The easiest and most common is just to set the - foreign-key-holding column to ``NOT NULL`` at the database schema - level. An attempt by SQLAlchemy to set the column to NULL will - fail with a simple NOT NULL constraint exception. - - * The other, more special case way is to set the :paramref:`~.relationship.passive_deletes` - flag to the string ``"all"``. This has the effect of entirely - disabling SQLAlchemy's behavior of setting the foreign key column - to NULL, and a DELETE will be emitted for the parent row without - any affect on the child row, even if the child row is present - in memory. This may be desirable in the case when - database-level foreign key triggers, either special ``ON DELETE`` settings - or otherwise, need to be activated in all cases when a parent row is deleted. - - * Database level ``ON DELETE`` cascade is **vastly more efficient** - than that of SQLAlchemy. The database can chain a series of cascade - operations across many relationships at once; e.g. if row A is deleted, - all the related rows in table B can be deleted, and all the C rows related - to each of those B rows, and on and on, all within the scope of a single - DELETE statement. SQLAlchemy on the other hand, in order to support - the cascading delete operation fully, has to individually load each - related collection in order to target all rows that then may have further - related collections. That is, SQLAlchemy isn't sophisticated enough - to emit a DELETE for all those related rows at once within this context. - - * SQLAlchemy doesn't **need** to be this sophisticated, as we instead provide - smooth integration with the database's own ``ON DELETE`` functionality, - by using the :paramref:`~.relationship.passive_deletes` option in conjunction - with properly configured foreign key constraints. Under this behavior, - SQLAlchemy only emits DELETE for those rows that are already locally - present in the :class:`.Session`; for any collections that are unloaded, - it leaves them to the database to handle, rather than emitting a SELECT - for them. The section :ref:`passive_deletes` provides an example of this use. - - * While database-level ``ON DELETE`` functionality works only on the "many" - side of a relationship, SQLAlchemy's "delete" cascade - has **limited** ability to operate in the *reverse* direction as well, - meaning it can be configured on the "many" side to delete an object - on the "one" side when the reference on the "many" side is deleted. However - this can easily result in constraint violations if there are other objects - referring to this "one" side from the "many", so it typically is only - useful when a relationship is in fact a "one to one". The - :paramref:`~.relationship.single_parent` flag should be used to establish - an in-Python assertion for this case. - - -When using a :func:`.relationship` that also includes a many-to-many -table using the :paramref:`~.relationship.secondary` option, SQLAlchemy's -delete cascade handles the rows in this many-to-many table automatically. -Just like, as described in :ref:`relationships_many_to_many_deletion`, -the addition or removal of an object from a many-to-many collection -results in the INSERT or DELETE of a row in the many-to-many table, -the ``delete`` cascade, when activated as the result of a parent object -delete operation, will DELETE not just the row in the "child" table but also -in the many-to-many table. - -.. _cascade_delete_orphan: - -delete-orphan -------------- - -``delete-orphan`` cascade adds behavior to the ``delete`` cascade, -such that a child object will be marked for deletion when it is -de-associated from the parent, not just when the parent is marked -for deletion. This is a common feature when dealing with a related -object that is "owned" by its parent, with a NOT NULL foreign key, -so that removal of the item from the parent collection results -in its deletion. - -``delete-orphan`` cascade implies that each child object can only -have one parent at a time, so is configured in the vast majority of cases -on a one-to-many relationship. Setting it on a many-to-one or -many-to-many relationship is more awkward; for this use case, -SQLAlchemy requires that the :func:`~sqlalchemy.orm.relationship` -be configured with the :paramref:`~.relationship.single_parent` argument, -establishes Python-side validation that ensures the object -is associated with only one parent at a time. - -.. _cascade_merge: - -merge ------ - -``merge`` cascade indicates that the :meth:`.Session.merge` -operation should be propagated from a parent that's the subject -of the :meth:`.Session.merge` call down to referred objects. -This cascade is also on by default. - -.. _cascade_refresh_expire: - -refresh-expire --------------- - -``refresh-expire`` is an uncommon option, indicating that the -:meth:`.Session.expire` operation should be propagated from a parent -down to referred objects. When using :meth:`.Session.refresh`, -the referred objects are expired only, but not actually refreshed. - -.. _cascade_expunge: - -expunge -------- - -``expunge`` cascade indicates that when the parent object is removed -from the :class:`.Session` using :meth:`.Session.expunge`, the -operation should be propagated down to referred objects. - -.. _backref_cascade: - -Controlling Cascade on Backrefs -------------------------------- - -The :ref:`cascade_save_update` cascade by default takes place on attribute change events -emitted from backrefs. This is probably a confusing statement more -easily described through demonstration; it means that, given a mapping such as this:: - - mapper(Order, order_table, properties={ - 'items' : relationship(Item, backref='order') - }) - -If an ``Order`` is already in the session, and is assigned to the ``order`` -attribute of an ``Item``, the backref appends the ``Order`` to the ``items`` -collection of that ``Order``, resulting in the ``save-update`` cascade taking -place:: - - >>> o1 = Order() - >>> session.add(o1) - >>> o1 in session - True - - >>> i1 = Item() - >>> i1.order = o1 - >>> i1 in o1.items - True - >>> i1 in session - True - -This behavior can be disabled using the :paramref:`~.relationship.cascade_backrefs` flag:: - - mapper(Order, order_table, properties={ - 'items' : relationship(Item, backref='order', - cascade_backrefs=False) - }) - -So above, the assignment of ``i1.order = o1`` will append ``i1`` to the ``items`` -collection of ``o1``, but will not add ``i1`` to the session. You can, of -course, :meth:`~.Session.add` ``i1`` to the session at a later point. This -option may be helpful for situations where an object needs to be kept out of a -session until it's construction is completed, but still needs to be given -associations to objects which are already persistent in the target session. - - -.. _unitofwork_transaction: - -Managing Transactions -===================== - -A newly constructed :class:`.Session` may be said to be in the "begin" state. -In this state, the :class:`.Session` has not established any connection or -transactional state with any of the :class:`.Engine` objects that may be associated -with it. - -The :class:`.Session` then receives requests to operate upon a database connection. -Typically, this means it is called upon to execute SQL statements using a particular -:class:`.Engine`, which may be via :meth:`.Session.query`, :meth:`.Session.execute`, -or within a flush operation of pending data, which occurs when such state exists -and :meth:`.Session.commit` or :meth:`.Session.flush` is called. - -As these requests are received, each new :class:`.Engine` encountered is associated -with an ongoing transactional state maintained by the :class:`.Session`. -When the first :class:`.Engine` is operated upon, the :class:`.Session` can be said -to have left the "begin" state and entered "transactional" state. For each -:class:`.Engine` encountered, a :class:`.Connection` is associated with it, -which is acquired via the :meth:`.Engine.contextual_connect` method. If a -:class:`.Connection` was directly associated with the :class:`.Session` (see :ref:`session_external_transaction` -for an example of this), it is -added to the transactional state directly. - -For each :class:`.Connection`, the :class:`.Session` also maintains a :class:`.Transaction` object, -which is acquired by calling :meth:`.Connection.begin` on each :class:`.Connection`, -or if the :class:`.Session` -object has been established using the flag ``twophase=True``, a :class:`.TwoPhaseTransaction` -object acquired via :meth:`.Connection.begin_twophase`. These transactions are all committed or -rolled back corresponding to the invocation of the -:meth:`.Session.commit` and :meth:`.Session.rollback` methods. A commit operation will -also call the :meth:`.TwoPhaseTransaction.prepare` method on all transactions if applicable. - -When the transactional state is completed after a rollback or commit, the :class:`.Session` -:term:`releases` all :class:`.Transaction` and :class:`.Connection` resources, -and goes back to the "begin" state, which -will again invoke new :class:`.Connection` and :class:`.Transaction` objects as new -requests to emit SQL statements are received. - -The example below illustrates this lifecycle:: - - engine = create_engine("...") - Session = sessionmaker(bind=engine) - - # new session. no connections are in use. - session = Session() - try: - # first query. a Connection is acquired - # from the Engine, and a Transaction - # started. - item1 = session.query(Item).get(1) - - # second query. the same Connection/Transaction - # are used. - item2 = session.query(Item).get(2) - - # pending changes are created. - item1.foo = 'bar' - item2.bar = 'foo' - - # commit. The pending changes above - # are flushed via flush(), the Transaction - # is committed, the Connection object closed - # and discarded, the underlying DBAPI connection - # returned to the connection pool. - session.commit() - except: - # on rollback, the same closure of state - # as that of commit proceeds. - session.rollback() - raise - -.. _session_begin_nested: - -Using SAVEPOINT ---------------- - -SAVEPOINT transactions, if supported by the underlying engine, may be -delineated using the :meth:`~.Session.begin_nested` -method:: - - Session = sessionmaker() - session = Session() - session.add(u1) - session.add(u2) - - session.begin_nested() # establish a savepoint - session.add(u3) - session.rollback() # rolls back u3, keeps u1 and u2 - - session.commit() # commits u1 and u2 - -:meth:`~.Session.begin_nested` may be called any number -of times, which will issue a new SAVEPOINT with a unique identifier for each -call. For each :meth:`~.Session.begin_nested` call, a -corresponding :meth:`~.Session.rollback` or -:meth:`~.Session.commit` must be issued. (But note that if the return value is -used as a context manager, i.e. in a with-statement, then this rollback/commit -is issued by the context manager upon exiting the context, and so should not be -added explicitly.) - -When :meth:`~.Session.begin_nested` is called, a -:meth:`~.Session.flush` is unconditionally issued -(regardless of the ``autoflush`` setting). This is so that when a -:meth:`~.Session.rollback` occurs, the full state of the -session is expired, thus causing all subsequent attribute/instance access to -reference the full state of the :class:`~sqlalchemy.orm.session.Session` right -before :meth:`~.Session.begin_nested` was called. - -:meth:`~.Session.begin_nested`, in the same manner as the less often -used :meth:`~.Session.begin` method, returns a transactional object -which also works as a context manager. -It can be succinctly used around individual record inserts in order to catch -things like unique constraint exceptions:: - - for record in records: - try: - with session.begin_nested(): - session.merge(record) - except: - print "Skipped record %s" % record - session.commit() - -.. _session_autocommit: - -Autocommit Mode ---------------- - -The example of :class:`.Session` transaction lifecycle illustrated at -the start of :ref:`unitofwork_transaction` applies to a :class:`.Session` configured in the -default mode of ``autocommit=False``. Constructing a :class:`.Session` -with ``autocommit=True`` produces a :class:`.Session` placed into "autocommit" mode, where each SQL statement -invoked by a :meth:`.Session.query` or :meth:`.Session.execute` occurs -using a new connection from the connection pool, discarding it after -results have been iterated. The :meth:`.Session.flush` operation -still occurs within the scope of a single transaction, though this transaction -is closed out after the :meth:`.Session.flush` operation completes. - -.. warning:: - - "autocommit" mode should **not be considered for general use**. - If used, it should always be combined with the usage of - :meth:`.Session.begin` and :meth:`.Session.commit`, to ensure - a transaction demarcation. - - Executing queries outside of a demarcated transaction is a legacy mode - of usage, and can in some cases lead to concurrent connection - checkouts. - - In the absence of a demarcated transaction, the :class:`.Session` - cannot make appropriate decisions as to when autoflush should - occur nor when auto-expiration should occur, so these features - should be disabled with ``autoflush=False, expire_on_commit=False``. - -Modern usage of "autocommit" is for framework integrations that need to control -specifically when the "begin" state occurs. A session which is configured with -``autocommit=True`` may be placed into the "begin" state using the -:meth:`.Session.begin` method. -After the cycle completes upon :meth:`.Session.commit` or :meth:`.Session.rollback`, -connection and transaction resources are :term:`released` and the :class:`.Session` -goes back into "autocommit" mode, until :meth:`.Session.begin` is called again:: - - Session = sessionmaker(bind=engine, autocommit=True) - session = Session() - session.begin() - try: - item1 = session.query(Item).get(1) - item2 = session.query(Item).get(2) - item1.foo = 'bar' - item2.bar = 'foo' - session.commit() - except: - session.rollback() - raise - -The :meth:`.Session.begin` method also returns a transactional token which is -compatible with the Python 2.6 ``with`` statement:: - - Session = sessionmaker(bind=engine, autocommit=True) - session = Session() - with session.begin(): - item1 = session.query(Item).get(1) - item2 = session.query(Item).get(2) - item1.foo = 'bar' - item2.bar = 'foo' - -.. _session_subtransactions: - -Using Subtransactions with Autocommit -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -A subtransaction indicates usage of the :meth:`.Session.begin` method in conjunction with -the ``subtransactions=True`` flag. This produces a non-transactional, delimiting construct that -allows nesting of calls to :meth:`~.Session.begin` and :meth:`~.Session.commit`. -Its purpose is to allow the construction of code that can function within a transaction -both independently of any external code that starts a transaction, -as well as within a block that has already demarcated a transaction. - -``subtransactions=True`` is generally only useful in conjunction with -autocommit, and is equivalent to the pattern described at :ref:`connections_nested_transactions`, -where any number of functions can call :meth:`.Connection.begin` and :meth:`.Transaction.commit` -as though they are the initiator of the transaction, but in fact may be participating -in an already ongoing transaction:: - - # method_a starts a transaction and calls method_b - def method_a(session): - session.begin(subtransactions=True) - try: - method_b(session) - session.commit() # transaction is committed here - except: - session.rollback() # rolls back the transaction - raise - - # method_b also starts a transaction, but when - # called from method_a participates in the ongoing - # transaction. - def method_b(session): - session.begin(subtransactions=True) - try: - session.add(SomeObject('bat', 'lala')) - session.commit() # transaction is not committed yet - except: - session.rollback() # rolls back the transaction, in this case - # the one that was initiated in method_a(). - raise - - # create a Session and call method_a - session = Session(autocommit=True) - method_a(session) - session.close() - -Subtransactions are used by the :meth:`.Session.flush` process to ensure that the -flush operation takes place within a transaction, regardless of autocommit. When -autocommit is disabled, it is still useful in that it forces the :class:`.Session` -into a "pending rollback" state, as a failed flush cannot be resumed in mid-operation, -where the end user still maintains the "scope" of the transaction overall. - -.. _session_twophase: - -Enabling Two-Phase Commit -------------------------- - -For backends which support two-phase operaration (currently MySQL and -PostgreSQL), the session can be instructed to use two-phase commit semantics. -This will coordinate the committing of transactions across databases so that -the transaction is either committed or rolled back in all databases. You can -also :meth:`~.Session.prepare` the session for -interacting with transactions not managed by SQLAlchemy. To use two phase -transactions set the flag ``twophase=True`` on the session:: - - engine1 = create_engine('postgresql://db1') - engine2 = create_engine('postgresql://db2') - - Session = sessionmaker(twophase=True) - - # bind User operations to engine 1, Account operations to engine 2 - Session.configure(binds={User:engine1, Account:engine2}) - - session = Session() - - # .... work with accounts and users - - # commit. session will issue a flush to all DBs, and a prepare step to all DBs, - # before committing both transactions - session.commit() - -Embedding SQL Insert/Update Expressions into a Flush -===================================================== - -This feature allows the value of a database column to be set to a SQL -expression instead of a literal value. It's especially useful for atomic -updates, calling stored procedures, etc. All you do is assign an expression to -an attribute:: - - class SomeClass(object): - pass - mapper(SomeClass, some_table) - - someobject = session.query(SomeClass).get(5) - - # set 'value' attribute to a SQL expression adding one - someobject.value = some_table.c.value + 1 - - # issues "UPDATE some_table SET value=value+1" - session.commit() - -This technique works both for INSERT and UPDATE statements. After the -flush/commit operation, the ``value`` attribute on ``someobject`` above is -expired, so that when next accessed the newly generated value will be loaded -from the database. - -.. _session_sql_expressions: - -Using SQL Expressions with Sessions -==================================== - -SQL expressions and strings can be executed via the -:class:`~sqlalchemy.orm.session.Session` within its transactional context. -This is most easily accomplished using the -:meth:`~.Session.execute` method, which returns a -:class:`~sqlalchemy.engine.ResultProxy` in the same manner as an -:class:`~sqlalchemy.engine.Engine` or -:class:`~sqlalchemy.engine.Connection`:: - - Session = sessionmaker(bind=engine) - session = Session() - - # execute a string statement - result = session.execute("select * from table where id=:id", {'id':7}) - - # execute a SQL expression construct - result = session.execute(select([mytable]).where(mytable.c.id==7)) - -The current :class:`~sqlalchemy.engine.Connection` held by the -:class:`~sqlalchemy.orm.session.Session` is accessible using the -:meth:`~.Session.connection` method:: - - connection = session.connection() - -The examples above deal with a :class:`~sqlalchemy.orm.session.Session` that's -bound to a single :class:`~sqlalchemy.engine.Engine` or -:class:`~sqlalchemy.engine.Connection`. To execute statements using a -:class:`~sqlalchemy.orm.session.Session` which is bound either to multiple -engines, or none at all (i.e. relies upon bound metadata), both -:meth:`~.Session.execute` and -:meth:`~.Session.connection` accept a ``mapper`` keyword -argument, which is passed a mapped class or -:class:`~sqlalchemy.orm.mapper.Mapper` instance, which is used to locate the -proper context for the desired engine:: - - Session = sessionmaker() - session = Session() - - # need to specify mapper or class when executing - result = session.execute("select * from table where id=:id", {'id':7}, mapper=MyMappedClass) - - result = session.execute(select([mytable], mytable.c.id==7), mapper=MyMappedClass) - - connection = session.connection(MyMappedClass) - -.. _session_external_transaction: - -Joining a Session into an External Transaction (such as for test suites) -======================================================================== - -If a :class:`.Connection` is being used which is already in a transactional -state (i.e. has a :class:`.Transaction` established), a :class:`.Session` can -be made to participate within that transaction by just binding the -:class:`.Session` to that :class:`.Connection`. The usual rationale for this -is a test suite that allows ORM code to work freely with a :class:`.Session`, -including the ability to call :meth:`.Session.commit`, where afterwards the -entire database interaction is rolled back:: - - from sqlalchemy.orm import sessionmaker - from sqlalchemy import create_engine - from unittest import TestCase - - # global application scope. create Session class, engine - Session = sessionmaker() - - engine = create_engine('postgresql://...') - - class SomeTest(TestCase): - def setUp(self): - # connect to the database - self.connection = engine.connect() - - # begin a non-ORM transaction - self.trans = self.connection.begin() - - # bind an individual Session to the connection - self.session = Session(bind=self.connection) - - def test_something(self): - # use the session in tests. - - self.session.add(Foo()) - self.session.commit() - - def tearDown(self): - self.session.close() - - # rollback - everything that happened with the - # Session above (including calls to commit()) - # is rolled back. - self.trans.rollback() - - # return connection to the Engine - self.connection.close() - -Above, we issue :meth:`.Session.commit` as well as -:meth:`.Transaction.rollback`. This is an example of where we take advantage -of the :class:`.Connection` object's ability to maintain *subtransactions*, or -nested begin/commit-or-rollback pairs where only the outermost begin/commit -pair actually commits the transaction, or if the outermost block rolls back, -everything is rolled back. - -.. topic:: Supporting Tests with Rollbacks - - The above recipe works well for any kind of database enabled test, except - for a test that needs to actually invoke :meth:`.Session.rollback` within - the scope of the test itself. The above recipe can be expanded, such - that the :class:`.Session` always runs all operations within the scope - of a SAVEPOINT, which is established at the start of each transaction, - so that tests can also rollback the "transaction" as well while still - remaining in the scope of a larger "transaction" that's never committed, - using two extra events:: - - from sqlalchemy import event - - class SomeTest(TestCase): - def setUp(self): - # connect to the database - self.connection = engine.connect() - - # begin a non-ORM transaction - self.trans = connection.begin() - - # bind an individual Session to the connection - self.session = Session(bind=self.connection) - - # start the session in a SAVEPOINT... - self.session.begin_nested() - - # then each time that SAVEPOINT ends, reopen it - @event.listens_for(self.session, "after_transaction_end") - def restart_savepoint(session, transaction): - if transaction.nested and not transaction._parent.nested: - session.begin_nested() - - - # ... the tearDown() method stays the same - -.. _unitofwork_contextual: - -Contextual/Thread-local Sessions -================================= - -Recall from the section :ref:`session_faq_whentocreate`, the concept of -"session scopes" was introduced, with an emphasis on web applications -and the practice of linking the scope of a :class:`.Session` with that -of a web request. Most modern web frameworks include integration tools -so that the scope of the :class:`.Session` can be managed automatically, -and these tools should be used as they are available. - -SQLAlchemy includes its own helper object, which helps with the establishment -of user-defined :class:`.Session` scopes. It is also used by third-party -integration systems to help construct their integration schemes. - -The object is the :class:`.scoped_session` object, and it represents a -**registry** of :class:`.Session` objects. If you're not familiar with the -registry pattern, a good introduction can be found in `Patterns of Enterprise -Architecture `_. - -.. note:: - - The :class:`.scoped_session` object is a very popular and useful object - used by many SQLAlchemy applications. However, it is important to note - that it presents **only one approach** to the issue of :class:`.Session` - management. If you're new to SQLAlchemy, and especially if the - term "thread-local variable" seems strange to you, we recommend that - if possible you familiarize first with an off-the-shelf integration - system such as `Flask-SQLAlchemy `_ - or `zope.sqlalchemy `_. - -A :class:`.scoped_session` is constructed by calling it, passing it a -**factory** which can create new :class:`.Session` objects. A factory -is just something that produces a new object when called, and in the -case of :class:`.Session`, the most common factory is the :class:`.sessionmaker`, -introduced earlier in this section. Below we illustrate this usage:: - - >>> from sqlalchemy.orm import scoped_session - >>> from sqlalchemy.orm import sessionmaker - - >>> session_factory = sessionmaker(bind=some_engine) - >>> Session = scoped_session(session_factory) - -The :class:`.scoped_session` object we've created will now call upon the -:class:`.sessionmaker` when we "call" the registry:: - - >>> some_session = Session() - -Above, ``some_session`` is an instance of :class:`.Session`, which we -can now use to talk to the database. This same :class:`.Session` is also -present within the :class:`.scoped_session` registry we've created. If -we call upon the registry a second time, we get back the **same** :class:`.Session`:: - - >>> some_other_session = Session() - >>> some_session is some_other_session - True - -This pattern allows disparate sections of the application to call upon a global -:class:`.scoped_session`, so that all those areas may share the same session -without the need to pass it explicitly. The :class:`.Session` we've established -in our registry will remain, until we explicitly tell our registry to dispose of it, -by calling :meth:`.scoped_session.remove`:: - - >>> Session.remove() - -The :meth:`.scoped_session.remove` method first calls :meth:`.Session.close` on -the current :class:`.Session`, which has the effect of releasing any connection/transactional -resources owned by the :class:`.Session` first, then discarding the :class:`.Session` -itself. "Releasing" here means that connections are returned to their connection pool and any transactional state is rolled back, ultimately using the ``rollback()`` method of the underlying DBAPI connection. - -At this point, the :class:`.scoped_session` object is "empty", and will create -a **new** :class:`.Session` when called again. As illustrated below, this -is not the same :class:`.Session` we had before:: - - >>> new_session = Session() - >>> new_session is some_session - False - -The above series of steps illustrates the idea of the "registry" pattern in a -nutshell. With that basic idea in hand, we can discuss some of the details -of how this pattern proceeds. - -Implicit Method Access ----------------------- - -The job of the :class:`.scoped_session` is simple; hold onto a :class:`.Session` -for all who ask for it. As a means of producing more transparent access to this -:class:`.Session`, the :class:`.scoped_session` also includes **proxy behavior**, -meaning that the registry itself can be treated just like a :class:`.Session` -directly; when methods are called on this object, they are **proxied** to the -underlying :class:`.Session` being maintained by the registry:: - - Session = scoped_session(some_factory) - - # equivalent to: - # - # session = Session() - # print session.query(MyClass).all() - # - print Session.query(MyClass).all() - -The above code accomplishes the same task as that of acquiring the current -:class:`.Session` by calling upon the registry, then using that :class:`.Session`. - -Thread-Local Scope ------------------- - -Users who are familiar with multithreaded programming will note that representing -anything as a global variable is usually a bad idea, as it implies that the -global object will be accessed by many threads concurrently. The :class:`.Session` -object is entirely designed to be used in a **non-concurrent** fashion, which -in terms of multithreading means "only in one thread at a time". So our -above example of :class:`.scoped_session` usage, where the same :class:`.Session` -object is maintained across multiple calls, suggests that some process needs -to be in place such that mutltiple calls across many threads don't actually get -a handle to the same session. We call this notion **thread local storage**, -which means, a special object is used that will maintain a distinct object -per each application thread. Python provides this via the -`threading.local() `_ -construct. The :class:`.scoped_session` object by default uses this object -as storage, so that a single :class:`.Session` is maintained for all who call -upon the :class:`.scoped_session` registry, but only within the scope of a single -thread. Callers who call upon the registry in a different thread get a -:class:`.Session` instance that is local to that other thread. - -Using this technique, the :class:`.scoped_session` provides a quick and relatively -simple (if one is familiar with thread-local storage) way of providing -a single, global object in an application that is safe to be called upon -from multiple threads. - -The :meth:`.scoped_session.remove` method, as always, removes the current -:class:`.Session` associated with the thread, if any. However, one advantage of the -``threading.local()`` object is that if the application thread itself ends, the -"storage" for that thread is also garbage collected. So it is in fact "safe" to -use thread local scope with an application that spawns and tears down threads, -without the need to call :meth:`.scoped_session.remove`. However, the scope -of transactions themselves, i.e. ending them via :meth:`.Session.commit` or -:meth:`.Session.rollback`, will usually still be something that must be explicitly -arranged for at the appropriate time, unless the application actually ties the -lifespan of a thread to the lifespan of a transaction. - -.. _session_lifespan: - -Using Thread-Local Scope with Web Applications ----------------------------------------------- - -As discussed in the section :ref:`session_faq_whentocreate`, a web application -is architected around the concept of a **web request**, and integrating -such an application with the :class:`.Session` usually implies that the :class:`.Session` -will be associated with that request. As it turns out, most Python web frameworks, -with notable exceptions such as the asynchronous frameworks Twisted and -Tornado, use threads in a simple way, such that a particular web request is received, -processed, and completed within the scope of a single *worker thread*. When -the request ends, the worker thread is released to a pool of workers where it -is available to handle another request. - -This simple correspondence of web request and thread means that to associate a -:class:`.Session` with a thread implies it is also associated with the web request -running within that thread, and vice versa, provided that the :class:`.Session` is -created only after the web request begins and torn down just before the web request ends. -So it is a common practice to use :class:`.scoped_session` as a quick way -to integrate the :class:`.Session` with a web application. The sequence -diagram below illustrates this flow:: - - Web Server Web Framework SQLAlchemy ORM Code - -------------- -------------- ------------------------------ - startup -> Web framework # Session registry is established - initializes Session = scoped_session(sessionmaker()) - - incoming - web request -> web request -> # The registry is *optionally* - starts # called upon explicitly to create - # a Session local to the thread and/or request - Session() - - # the Session registry can otherwise - # be used at any time, creating the - # request-local Session() if not present, - # or returning the existing one - Session.query(MyClass) # ... - - Session.add(some_object) # ... - - # if data was modified, commit the - # transaction - Session.commit() - - web request ends -> # the registry is instructed to - # remove the Session - Session.remove() - - sends output <- - outgoing web <- - response - -Using the above flow, the process of integrating the :class:`.Session` with the -web application has exactly two requirements: - -1. Create a single :class:`.scoped_session` registry when the web application - first starts, ensuring that this object is accessible by the rest of the - application. -2. Ensure that :meth:`.scoped_session.remove` is called when the web request ends, - usually by integrating with the web framework's event system to establish - an "on request end" event. - -As noted earlier, the above pattern is **just one potential way** to integrate a :class:`.Session` -with a web framework, one which in particular makes the significant assumption -that the **web framework associates web requests with application threads**. It is -however **strongly recommended that the integration tools provided with the web framework -itself be used, if available**, instead of :class:`.scoped_session`. - -In particular, while using a thread local can be convenient, it is preferable that the :class:`.Session` be -associated **directly with the request**, rather than with -the current thread. The next section on custom scopes details a more advanced configuration -which can combine the usage of :class:`.scoped_session` with direct request based scope, or -any kind of scope. - -Using Custom Created Scopes ---------------------------- - -The :class:`.scoped_session` object's default behavior of "thread local" scope is only -one of many options on how to "scope" a :class:`.Session`. A custom scope can be defined -based on any existing system of getting at "the current thing we are working with". - -Suppose a web framework defines a library function ``get_current_request()``. An application -built using this framework can call this function at any time, and the result will be -some kind of ``Request`` object that represents the current request being processed. -If the ``Request`` object is hashable, then this function can be easily integrated with -:class:`.scoped_session` to associate the :class:`.Session` with the request. Below we illustrate -this in conjunction with a hypothetical event marker provided by the web framework -``on_request_end``, which allows code to be invoked whenever a request ends:: - - from my_web_framework import get_current_request, on_request_end - from sqlalchemy.orm import scoped_session, sessionmaker - - Session = scoped_session(sessionmaker(bind=some_engine), scopefunc=get_current_request) - - @on_request_end - def remove_session(req): - Session.remove() - -Above, we instantiate :class:`.scoped_session` in the usual way, except that we pass -our request-returning function as the "scopefunc". This instructs :class:`.scoped_session` -to use this function to generate a dictionary key whenever the registry is called upon -to return the current :class:`.Session`. In this case it is particularly important -that we ensure a reliable "remove" system is implemented, as this dictionary is not -otherwise self-managed. - - -Contextual Session API ----------------------- - -.. autoclass:: sqlalchemy.orm.scoping.scoped_session - :members: - -.. autoclass:: sqlalchemy.util.ScopedRegistry - :members: - -.. autoclass:: sqlalchemy.util.ThreadLocalRegistry - -.. _session_partitioning: - -Partitioning Strategies -======================= - -Simple Vertical Partitioning ----------------------------- - -Vertical partitioning places different kinds of objects, or different tables, -across multiple databases:: - - engine1 = create_engine('postgresql://db1') - engine2 = create_engine('postgresql://db2') - - Session = sessionmaker(twophase=True) - - # bind User operations to engine 1, Account operations to engine 2 - Session.configure(binds={User:engine1, Account:engine2}) - - session = Session() - -Above, operations against either class will make usage of the :class:`.Engine` -linked to that class. Upon a flush operation, similar rules take place -to ensure each class is written to the right database. - -The transactions among the multiple databases can optionally be coordinated -via two phase commit, if the underlying backend supports it. See -:ref:`session_twophase` for an example. - -Custom Vertical Partitioning ----------------------------- - -More comprehensive rule-based class-level partitioning can be built by -overriding the :meth:`.Session.get_bind` method. Below we illustrate -a custom :class:`.Session` which delivers the following rules: - -1. Flush operations are delivered to the engine named ``master``. - -2. Operations on objects that subclass ``MyOtherClass`` all - occur on the ``other`` engine. - -3. Read operations for all other classes occur on a random - choice of the ``slave1`` or ``slave2`` database. - -:: - - engines = { - 'master':create_engine("sqlite:///master.db"), - 'other':create_engine("sqlite:///other.db"), - 'slave1':create_engine("sqlite:///slave1.db"), - 'slave2':create_engine("sqlite:///slave2.db"), - } - - from sqlalchemy.orm import Session, sessionmaker - import random - - class RoutingSession(Session): - def get_bind(self, mapper=None, clause=None): - if mapper and issubclass(mapper.class_, MyOtherClass): - return engines['other'] - elif self._flushing: - return engines['master'] - else: - return engines[ - random.choice(['slave1','slave2']) - ] - -The above :class:`.Session` class is plugged in using the ``class_`` -argument to :class:`.sessionmaker`:: - - Session = sessionmaker(class_=RoutingSession) - -This approach can be combined with multiple :class:`.MetaData` objects, -using an approach such as that of using the declarative ``__abstract__`` -keyword, described at :ref:`declarative_abstract`. - -Horizontal Partitioning ------------------------ - -Horizontal partitioning partitions the rows of a single table (or a set of -tables) across multiple databases. - -See the "sharding" example: :ref:`examples_sharding`. - -Sessions API -============ - -Session and sessionmaker() ---------------------------- - -.. autoclass:: sessionmaker - :members: - :inherited-members: - -.. autoclass:: sqlalchemy.orm.session.Session - :members: - :inherited-members: - -.. autoclass:: sqlalchemy.orm.session.SessionTransaction - :members: - -Session Utilites ----------------- - -.. autofunction:: make_transient - -.. autofunction:: make_transient_to_detached - -.. autofunction:: object_session - -.. autofunction:: sqlalchemy.orm.util.was_deleted - -Attribute and State Management Utilities ------------------------------------------ - -These functions are provided by the SQLAlchemy attribute -instrumentation API to provide a detailed interface for dealing -with instances, attribute values, and history. Some of them -are useful when constructing event listener functions, such as -those described in :doc:`/orm/events`. - -.. currentmodule:: sqlalchemy.orm.util - -.. autofunction:: object_state - -.. currentmodule:: sqlalchemy.orm.attributes - -.. autofunction:: del_attribute - -.. autofunction:: get_attribute - -.. autofunction:: get_history - -.. autofunction:: init_collection - -.. autofunction:: flag_modified - -.. function:: instance_state - - Return the :class:`.InstanceState` for a given - mapped object. - - This function is the internal version - of :func:`.object_state`. The - :func:`.object_state` and/or the - :func:`.inspect` function is preferred here - as they each emit an informative exception - if the given object is not mapped. - -.. autofunction:: sqlalchemy.orm.instrumentation.is_instrumented - -.. autofunction:: set_attribute - -.. autofunction:: set_committed_value - -.. autoclass:: History - :members: +.. toctree:: + :maxdepth: 2 + + session_basics + session_state_management + cascades + session_transaction + persistence_techniques + contextual + session_api diff --git a/doc/build/orm/session_api.rst b/doc/build/orm/session_api.rst new file mode 100644 index 0000000000..64ac8c0867 --- /dev/null +++ b/doc/build/orm/session_api.rst @@ -0,0 +1,74 @@ +Session API +============ + +Session and sessionmaker() +--------------------------- + +.. autoclass:: sessionmaker + :members: + :inherited-members: + +.. autoclass:: sqlalchemy.orm.session.Session + :members: + :inherited-members: + +.. autoclass:: sqlalchemy.orm.session.SessionTransaction + :members: + +Session Utilites +---------------- + +.. autofunction:: make_transient + +.. autofunction:: make_transient_to_detached + +.. autofunction:: object_session + +.. autofunction:: sqlalchemy.orm.util.was_deleted + +Attribute and State Management Utilities +----------------------------------------- + +These functions are provided by the SQLAlchemy attribute +instrumentation API to provide a detailed interface for dealing +with instances, attribute values, and history. Some of them +are useful when constructing event listener functions, such as +those described in :doc:`/orm/events`. + +.. currentmodule:: sqlalchemy.orm.util + +.. autofunction:: object_state + +.. currentmodule:: sqlalchemy.orm.attributes + +.. autofunction:: del_attribute + +.. autofunction:: get_attribute + +.. autofunction:: get_history + +.. autofunction:: init_collection + +.. autofunction:: flag_modified + +.. function:: instance_state + + Return the :class:`.InstanceState` for a given + mapped object. + + This function is the internal version + of :func:`.object_state`. The + :func:`.object_state` and/or the + :func:`.inspect` function is preferred here + as they each emit an informative exception + if the given object is not mapped. + +.. autofunction:: sqlalchemy.orm.instrumentation.is_instrumented + +.. autofunction:: set_attribute + +.. autofunction:: set_committed_value + +.. autoclass:: History + :members: + diff --git a/doc/build/orm/session_basics.rst b/doc/build/orm/session_basics.rst new file mode 100644 index 0000000000..8919864caf --- /dev/null +++ b/doc/build/orm/session_basics.rst @@ -0,0 +1,744 @@ +========================== +Session Basics +========================== + +What does the Session do ? +========================== + +In the most general sense, the :class:`~.Session` establishes all +conversations with the database and represents a "holding zone" for all the +objects which you've loaded or associated with it during its lifespan. It +provides the entrypoint to acquire a :class:`.Query` object, which sends +queries to the database using the :class:`~.Session` object's current database +connection, populating result rows into objects that are then stored in the +:class:`.Session`, inside a structure called the `Identity Map +`_ - a data structure +that maintains unique copies of each object, where "unique" means "only one +object with a particular primary key". + +The :class:`.Session` begins in an essentially stateless form. Once queries +are issued or other objects are persisted with it, it requests a connection +resource from an :class:`.Engine` that is associated either with the +:class:`.Session` itself or with the mapped :class:`.Table` objects being +operated upon. This connection represents an ongoing transaction, which +remains in effect until the :class:`.Session` is instructed to commit or roll +back its pending state. + +All changes to objects maintained by a :class:`.Session` are tracked - before +the database is queried again or before the current transaction is committed, +it **flushes** all pending changes to the database. This is known as the `Unit +of Work `_ pattern. + +When using a :class:`.Session`, it's important to note that the objects +which are associated with it are **proxy objects** to the transaction being +held by the :class:`.Session` - there are a variety of events that will cause +objects to re-access the database in order to keep synchronized. It is +possible to "detach" objects from a :class:`.Session`, and to continue using +them, though this practice has its caveats. It's intended that +usually, you'd re-associate detached objects with another :class:`.Session` when you +want to work with them again, so that they can resume their normal task of +representing database state. + +.. _session_getting: + +Getting a Session +================= + +:class:`.Session` is a regular Python class which can +be directly instantiated. However, to standardize how sessions are configured +and acquired, the :class:`.sessionmaker` class is normally +used to create a top level :class:`.Session` +configuration which can then be used throughout an application without the +need to repeat the configurational arguments. + +The usage of :class:`.sessionmaker` is illustrated below: + +.. sourcecode:: python+sql + + from sqlalchemy import create_engine + from sqlalchemy.orm import sessionmaker + + # an Engine, which the Session will use for connection + # resources + some_engine = create_engine('postgresql://scott:tiger@localhost/') + + # create a configured "Session" class + Session = sessionmaker(bind=some_engine) + + # create a Session + session = Session() + + # work with sess + myobject = MyObject('foo', 'bar') + session.add(myobject) + session.commit() + +Above, the :class:`.sessionmaker` call creates a factory for us, +which we assign to the name ``Session``. This factory, when +called, will create a new :class:`.Session` object using the configurational +arguments we've given the factory. In this case, as is typical, +we've configured the factory to specify a particular :class:`.Engine` for +connection resources. + +A typical setup will associate the :class:`.sessionmaker` with an :class:`.Engine`, +so that each :class:`.Session` generated will use this :class:`.Engine` +to acquire connection resources. This association can +be set up as in the example above, using the ``bind`` argument. + +When you write your application, place the +:class:`.sessionmaker` factory at the global level. This +factory can then +be used by the rest of the applcation as the source of new :class:`.Session` +instances, keeping the configuration for how :class:`.Session` objects +are constructed in one place. + +The :class:`.sessionmaker` factory can also be used in conjunction with +other helpers, which are passed a user-defined :class:`.sessionmaker` that +is then maintained by the helper. Some of these helpers are discussed in the +section :ref:`session_faq_whentocreate`. + +Adding Additional Configuration to an Existing sessionmaker() +-------------------------------------------------------------- + +A common scenario is where the :class:`.sessionmaker` is invoked +at module import time, however the generation of one or more :class:`.Engine` +instances to be associated with the :class:`.sessionmaker` has not yet proceeded. +For this use case, the :class:`.sessionmaker` construct offers the +:meth:`.sessionmaker.configure` method, which will place additional configuration +directives into an existing :class:`.sessionmaker` that will take place +when the construct is invoked:: + + + from sqlalchemy.orm import sessionmaker + from sqlalchemy import create_engine + + # configure Session class with desired options + Session = sessionmaker() + + # later, we create the engine + engine = create_engine('postgresql://...') + + # associate it with our custom Session class + Session.configure(bind=engine) + + # work with the session + session = Session() + +Creating Ad-Hoc Session Objects with Alternate Arguments +--------------------------------------------------------- + +For the use case where an application needs to create a new :class:`.Session` with +special arguments that deviate from what is normally used throughout the application, +such as a :class:`.Session` that binds to an alternate +source of connectivity, or a :class:`.Session` that should +have other arguments such as ``expire_on_commit`` established differently from +what most of the application wants, specific arguments can be passed to the +:class:`.sessionmaker` factory's :meth:`.sessionmaker.__call__` method. +These arguments will override whatever +configurations have already been placed, such as below, where a new :class:`.Session` +is constructed against a specific :class:`.Connection`:: + + # at the module level, the global sessionmaker, + # bound to a specific Engine + Session = sessionmaker(bind=engine) + + # later, some unit of code wants to create a + # Session that is bound to a specific Connection + conn = engine.connect() + session = Session(bind=conn) + +The typical rationale for the association of a :class:`.Session` with a specific +:class:`.Connection` is that of a test fixture that maintains an external +transaction - see :ref:`session_external_transaction` for an example of this. + + +.. _session_faq: + +Session Frequently Asked Questions +=================================== + +By this point, many users already have questions about sessions. +This section presents a mini-FAQ (note that we have also a `real FAQ `) +of the most basic issues one is presented with when using a :class:`.Session`. + +When do I make a :class:`.sessionmaker`? +------------------------------------------ + +Just one time, somewhere in your application's global scope. It should be +looked upon as part of your application's configuration. If your +application has three .py files in a package, you could, for example, +place the :class:`.sessionmaker` line in your ``__init__.py`` file; from +that point on your other modules say "from mypackage import Session". That +way, everyone else just uses :class:`.Session()`, +and the configuration of that session is controlled by that central point. + +If your application starts up, does imports, but does not know what +database it's going to be connecting to, you can bind the +:class:`.Session` at the "class" level to the +engine later on, using :meth:`.sessionmaker.configure`. + +In the examples in this section, we will frequently show the +:class:`.sessionmaker` being created right above the line where we actually +invoke :class:`.Session`. But that's just for +example's sake! In reality, the :class:`.sessionmaker` would be somewhere +at the module level. The calls to instantiate :class:`.Session` +would then be placed at the point in the application where database +conversations begin. + +.. _session_faq_whentocreate: + +When do I construct a :class:`.Session`, when do I commit it, and when do I close it? +------------------------------------------------------------------------------------- + +.. topic:: tl;dr; + + As a general rule, keep the lifecycle of the session **separate and + external** from functions and objects that access and/or manipulate + database data. + +A :class:`.Session` is typically constructed at the beginning of a logical +operation where database access is potentially anticipated. + +The :class:`.Session`, whenever it is used to talk to the database, +begins a database transaction as soon as it starts communicating. +Assuming the ``autocommit`` flag is left at its recommended default +of ``False``, this transaction remains in progress until the :class:`.Session` +is rolled back, committed, or closed. The :class:`.Session` will +begin a new transaction if it is used again, subsequent to the previous +transaction ending; from this it follows that the :class:`.Session` +is capable of having a lifespan across many transactions, though only +one at a time. We refer to these two concepts as **transaction scope** +and **session scope**. + +The implication here is that the SQLAlchemy ORM is encouraging the +developer to establish these two scopes in their application, +including not only when the scopes begin and end, but also the +expanse of those scopes, for example should a single +:class:`.Session` instance be local to the execution flow within a +function or method, should it be a global object used by the +entire application, or somewhere in between these two. + +The burden placed on the developer to determine this scope is one +area where the SQLAlchemy ORM necessarily has a strong opinion +about how the database should be used. The :term:`unit of work` pattern +is specifically one of accumulating changes over time and flushing +them periodically, keeping in-memory state in sync with what's +known to be present in a local transaction. This pattern is only +effective when meaningful transaction scopes are in place. + +It's usually not very hard to determine the best points at which +to begin and end the scope of a :class:`.Session`, though the wide +variety of application architectures possible can introduce +challenging situations. + +A common choice is to tear down the :class:`.Session` at the same +time the transaction ends, meaning the transaction and session scopes +are the same. This is a great choice to start out with as it +removes the need to consider session scope as separate from transaction +scope. + +While there's no one-size-fits-all recommendation for how transaction +scope should be determined, there are common patterns. Especially +if one is writing a web application, the choice is pretty much established. + +A web application is the easiest case because such an appication is already +constructed around a single, consistent scope - this is the **request**, +which represents an incoming request from a browser, the processing +of that request to formulate a response, and finally the delivery of that +response back to the client. Integrating web applications with the +:class:`.Session` is then the straightforward task of linking the +scope of the :class:`.Session` to that of the request. The :class:`.Session` +can be established as the request begins, or using a :term:`lazy initialization` +pattern which establishes one as soon as it is needed. The request +then proceeds, with some system in place where application logic can access +the current :class:`.Session` in a manner associated with how the actual +request object is accessed. As the request ends, the :class:`.Session` +is torn down as well, usually through the usage of event hooks provided +by the web framework. The transaction used by the :class:`.Session` +may also be committed at this point, or alternatively the application may +opt for an explicit commit pattern, only committing for those requests +where one is warranted, but still always tearing down the :class:`.Session` +unconditionally at the end. + +Some web frameworks include infrastructure to assist in the task +of aligning the lifespan of a :class:`.Session` with that of a web request. +This includes products such as `Flask-SQLAlchemy `_, +for usage in conjunction with the Flask web framework, +and `Zope-SQLAlchemy `_, +typically used with the Pyramid framework. +SQLAlchemy recommends that these products be used as available. + +In those situations where the integration libraries are not +provided or are insufficient, SQLAlchemy includes its own "helper" class known as +:class:`.scoped_session`. A tutorial on the usage of this object +is at :ref:`unitofwork_contextual`. It provides both a quick way +to associate a :class:`.Session` with the current thread, as well as +patterns to associate :class:`.Session` objects with other kinds of +scopes. + +As mentioned before, for non-web applications there is no one clear +pattern, as applications themselves don't have just one pattern +of architecture. The best strategy is to attempt to demarcate +"operations", points at which a particular thread begins to perform +a series of operations for some period of time, which can be committed +at the end. Some examples: + +* A background daemon which spawns off child forks + would want to create a :class:`.Session` local to each child + process, work with that :class:`.Session` through the life of the "job" + that the fork is handling, then tear it down when the job is completed. + +* For a command-line script, the application would create a single, global + :class:`.Session` that is established when the program begins to do its + work, and commits it right as the program is completing its task. + +* For a GUI interface-driven application, the scope of the :class:`.Session` + may best be within the scope of a user-generated event, such as a button + push. Or, the scope may correspond to explicit user interaction, such as + the user "opening" a series of records, then "saving" them. + +As a general rule, the application should manage the lifecycle of the +session *externally* to functions that deal with specific data. This is a +fundamental separation of concerns which keeps data-specific operations +agnostic of the context in which they access and manipulate that data. + +E.g. **don't do this**:: + + ### this is the **wrong way to do it** ### + + class ThingOne(object): + def go(self): + session = Session() + try: + session.query(FooBar).update({"x": 5}) + session.commit() + except: + session.rollback() + raise + + class ThingTwo(object): + def go(self): + session = Session() + try: + session.query(Widget).update({"q": 18}) + session.commit() + except: + session.rollback() + raise + + def run_my_program(): + ThingOne().go() + ThingTwo().go() + +Keep the lifecycle of the session (and usually the transaction) +**separate and external**:: + + ### this is a **better** (but not the only) way to do it ### + + class ThingOne(object): + def go(self, session): + session.query(FooBar).update({"x": 5}) + + class ThingTwo(object): + def go(self, session): + session.query(Widget).update({"q": 18}) + + def run_my_program(): + session = Session() + try: + ThingOne().go(session) + ThingTwo().go(session) + + session.commit() + except: + session.rollback() + raise + finally: + session.close() + +The advanced developer will try to keep the details of session, transaction +and exception management as far as possible from the details of the program +doing its work. For example, we can further separate concerns using a `context manager `_:: + + ### another way (but again *not the only way*) to do it ### + + from contextlib import contextmanager + + @contextmanager + def session_scope(): + """Provide a transactional scope around a series of operations.""" + session = Session() + try: + yield session + session.commit() + except: + session.rollback() + raise + finally: + session.close() + + + def run_my_program(): + with session_scope() as session: + ThingOne().go(session) + ThingTwo().go(session) + + +Is the Session a cache? +---------------------------------- + +Yeee...no. It's somewhat used as a cache, in that it implements the +:term:`identity map` pattern, and stores objects keyed to their primary key. +However, it doesn't do any kind of query caching. This means, if you say +``session.query(Foo).filter_by(name='bar')``, even if ``Foo(name='bar')`` +is right there, in the identity map, the session has no idea about that. +It has to issue SQL to the database, get the rows back, and then when it +sees the primary key in the row, *then* it can look in the local identity +map and see that the object is already there. It's only when you say +``query.get({some primary key})`` that the +:class:`~sqlalchemy.orm.session.Session` doesn't have to issue a query. + +Additionally, the Session stores object instances using a weak reference +by default. This also defeats the purpose of using the Session as a cache. + +The :class:`.Session` is not designed to be a +global object from which everyone consults as a "registry" of objects. +That's more the job of a **second level cache**. SQLAlchemy provides +a pattern for implementing second level caching using `dogpile.cache `_, +via the :ref:`examples_caching` example. + +How can I get the :class:`~sqlalchemy.orm.session.Session` for a certain object? +------------------------------------------------------------------------------------ + +Use the :meth:`~.Session.object_session` classmethod +available on :class:`~sqlalchemy.orm.session.Session`:: + + session = Session.object_session(someobject) + +The newer :ref:`core_inspection_toplevel` system can also be used:: + + from sqlalchemy import inspect + session = inspect(someobject).session + +.. _session_faq_threadsafe: + +Is the session thread-safe? +------------------------------ + +The :class:`.Session` is very much intended to be used in a +**non-concurrent** fashion, which usually means in only one thread at a +time. + +The :class:`.Session` should be used in such a way that one +instance exists for a single series of operations within a single +transaction. One expedient way to get this effect is by associating +a :class:`.Session` with the current thread (see :ref:`unitofwork_contextual` +for background). Another is to use a pattern +where the :class:`.Session` is passed between functions and is otherwise +not shared with other threads. + +The bigger point is that you should not *want* to use the session +with multiple concurrent threads. That would be like having everyone at a +restaurant all eat from the same plate. The session is a local "workspace" +that you use for a specific set of tasks; you don't want to, or need to, +share that session with other threads who are doing some other task. + +Making sure the :class:`.Session` is only used in a single concurrent thread at a time +is called a "share nothing" approach to concurrency. But actually, not +sharing the :class:`.Session` implies a more significant pattern; it +means not just the :class:`.Session` object itself, but +also **all objects that are associated with that Session**, must be kept within +the scope of a single concurrent thread. The set of mapped +objects associated with a :class:`.Session` are essentially proxies for data +within database rows accessed over a database connection, and so just like +the :class:`.Session` itself, the whole +set of objects is really just a large-scale proxy for a database connection +(or connections). Ultimately, it's mostly the DBAPI connection itself that +we're keeping away from concurrent access; but since the :class:`.Session` +and all the objects associated with it are all proxies for that DBAPI connection, +the entire graph is essentially not safe for concurrent access. + +If there are in fact multiple threads participating +in the same task, then you may consider sharing the session and its objects between +those threads; however, in this extremely unusual scenario the application would +need to ensure that a proper locking scheme is implemented so that there isn't +*concurrent* access to the :class:`.Session` or its state. A more common approach +to this situation is to maintain a single :class:`.Session` per concurrent thread, +but to instead *copy* objects from one :class:`.Session` to another, often +using the :meth:`.Session.merge` method to copy the state of an object into +a new object local to a different :class:`.Session`. + +Basics of Using a Session +=========================== + +The most basic :class:`.Session` use patterns are presented here. + +Querying +-------- + +The :meth:`~.Session.query` function takes one or more +*entities* and returns a new :class:`~sqlalchemy.orm.query.Query` object which +will issue mapper queries within the context of this Session. An entity is +defined as a mapped class, a :class:`~sqlalchemy.orm.mapper.Mapper` object, an +orm-enabled *descriptor*, or an ``AliasedClass`` object:: + + # query from a class + session.query(User).filter_by(name='ed').all() + + # query with multiple classes, returns tuples + session.query(User, Address).join('addresses').filter_by(name='ed').all() + + # query using orm-enabled descriptors + session.query(User.name, User.fullname).all() + + # query from a mapper + user_mapper = class_mapper(User) + session.query(user_mapper) + +When :class:`~sqlalchemy.orm.query.Query` returns results, each object +instantiated is stored within the identity map. When a row matches an object +which is already present, the same object is returned. In the latter case, +whether or not the row is populated onto an existing object depends upon +whether the attributes of the instance have been *expired* or not. A +default-configured :class:`~sqlalchemy.orm.session.Session` automatically +expires all instances along transaction boundaries, so that with a normally +isolated transaction, there shouldn't be any issue of instances representing +data which is stale with regards to the current transaction. + +The :class:`.Query` object is introduced in great detail in +:ref:`ormtutorial_toplevel`, and further documented in +:ref:`query_api_toplevel`. + +Adding New or Existing Items +---------------------------- + +:meth:`~.Session.add` is used to place instances in the +session. For *transient* (i.e. brand new) instances, this will have the effect +of an INSERT taking place for those instances upon the next flush. For +instances which are *persistent* (i.e. were loaded by this session), they are +already present and do not need to be added. Instances which are *detached* +(i.e. have been removed from a session) may be re-associated with a session +using this method:: + + user1 = User(name='user1') + user2 = User(name='user2') + session.add(user1) + session.add(user2) + + session.commit() # write changes to the database + +To add a list of items to the session at once, use +:meth:`~.Session.add_all`:: + + session.add_all([item1, item2, item3]) + +The :meth:`~.Session.add` operation **cascades** along +the ``save-update`` cascade. For more details see the section +:ref:`unitofwork_cascades`. + + +Deleting +-------- + +The :meth:`~.Session.delete` method places an instance +into the Session's list of objects to be marked as deleted:: + + # mark two objects to be deleted + session.delete(obj1) + session.delete(obj2) + + # commit (or flush) + session.commit() + +.. _session_deleting_from_collections: + +Deleting from Collections +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +A common confusion that arises regarding :meth:`~.Session.delete` is when +objects which are members of a collection are being deleted. While the +collection member is marked for deletion from the database, this does not +impact the collection itself in memory until the collection is expired. +Below, we illustrate that even after an ``Address`` object is marked +for deletion, it's still present in the collection associated with the +parent ``User``, even after a flush:: + + >>> address = user.addresses[1] + >>> session.delete(address) + >>> session.flush() + >>> address in user.addresses + True + +When the above session is committed, all attributes are expired. The next +access of ``user.addresses`` will re-load the collection, revealing the +desired state:: + + >>> session.commit() + >>> address in user.addresses + False + +The usual practice of deleting items within collections is to forego the usage +of :meth:`~.Session.delete` directly, and instead use cascade behavior to +automatically invoke the deletion as a result of removing the object from +the parent collection. The ``delete-orphan`` cascade accomplishes this, +as illustrated in the example below:: + + mapper(User, users_table, properties={ + 'addresses':relationship(Address, cascade="all, delete, delete-orphan") + }) + del user.addresses[1] + session.flush() + +Where above, upon removing the ``Address`` object from the ``User.addresses`` +collection, the ``delete-orphan`` cascade has the effect of marking the ``Address`` +object for deletion in the same way as passing it to :meth:`~.Session.delete`. + +See also :ref:`unitofwork_cascades` for detail on cascades. + +Deleting based on Filter Criterion +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The caveat with ``Session.delete()`` is that you need to have an object handy +already in order to delete. The Query includes a +:func:`~sqlalchemy.orm.query.Query.delete` method which deletes based on +filtering criteria:: + + session.query(User).filter(User.id==7).delete() + +The ``Query.delete()`` method includes functionality to "expire" objects +already in the session which match the criteria. However it does have some +caveats, including that "delete" and "delete-orphan" cascades won't be fully +expressed for collections which are already loaded. See the API docs for +:meth:`~sqlalchemy.orm.query.Query.delete` for more details. + +.. _session_flushing: + +Flushing +-------- + +When the :class:`~sqlalchemy.orm.session.Session` is used with its default +configuration, the flush step is nearly always done transparently. +Specifically, the flush occurs before any individual +:class:`~sqlalchemy.orm.query.Query` is issued, as well as within the +:meth:`~.Session.commit` call before the transaction is +committed. It also occurs before a SAVEPOINT is issued when +:meth:`~.Session.begin_nested` is used. + +Regardless of the autoflush setting, a flush can always be forced by issuing +:meth:`~.Session.flush`:: + + session.flush() + +The "flush-on-Query" aspect of the behavior can be disabled by constructing +:class:`.sessionmaker` with the flag ``autoflush=False``:: + + Session = sessionmaker(autoflush=False) + +Additionally, autoflush can be temporarily disabled by setting the +``autoflush`` flag at any time:: + + mysession = Session() + mysession.autoflush = False + +Some autoflush-disable recipes are available at `DisableAutoFlush +`_. + +The flush process *always* occurs within a transaction, even if the +:class:`~sqlalchemy.orm.session.Session` has been configured with +``autocommit=True``, a setting that disables the session's persistent +transactional state. If no transaction is present, +:meth:`~.Session.flush` creates its own transaction and +commits it. Any failures during flush will always result in a rollback of +whatever transaction is present. If the Session is not in ``autocommit=True`` +mode, an explicit call to :meth:`~.Session.rollback` is +required after a flush fails, even though the underlying transaction will have +been rolled back already - this is so that the overall nesting pattern of +so-called "subtransactions" is consistently maintained. + +.. _session_committing: + +Committing +---------- + +:meth:`~.Session.commit` is used to commit the current +transaction. It always issues :meth:`~.Session.flush` +beforehand to flush any remaining state to the database; this is independent +of the "autoflush" setting. If no transaction is present, it raises an error. +Note that the default behavior of the :class:`~sqlalchemy.orm.session.Session` +is that a "transaction" is always present; this behavior can be disabled by +setting ``autocommit=True``. In autocommit mode, a transaction can be +initiated by calling the :meth:`~.Session.begin` method. + +.. note:: + + The term "transaction" here refers to a transactional + construct within the :class:`.Session` itself which may be + maintaining zero or more actual database (DBAPI) transactions. An individual + DBAPI connection begins participation in the "transaction" as it is first + used to execute a SQL statement, then remains present until the session-level + "transaction" is completed. See :ref:`unitofwork_transaction` for + further detail. + +Another behavior of :meth:`~.Session.commit` is that by +default it expires the state of all instances present after the commit is +complete. This is so that when the instances are next accessed, either through +attribute access or by them being present in a +:class:`~sqlalchemy.orm.query.Query` result set, they receive the most recent +state. To disable this behavior, configure +:class:`.sessionmaker` with ``expire_on_commit=False``. + +Normally, instances loaded into the :class:`~sqlalchemy.orm.session.Session` +are never changed by subsequent queries; the assumption is that the current +transaction is isolated so the state most recently loaded is correct as long +as the transaction continues. Setting ``autocommit=True`` works against this +model to some degree since the :class:`~sqlalchemy.orm.session.Session` +behaves in exactly the same way with regard to attribute state, except no +transaction is present. + +.. _session_rollback: + +Rolling Back +------------ + +:meth:`~.Session.rollback` rolls back the current +transaction. With a default configured session, the post-rollback state of the +session is as follows: + + * All transactions are rolled back and all connections returned to the + connection pool, unless the Session was bound directly to a Connection, in + which case the connection is still maintained (but still rolled back). + * Objects which were initially in the *pending* state when they were added + to the :class:`~sqlalchemy.orm.session.Session` within the lifespan of the + transaction are expunged, corresponding to their INSERT statement being + rolled back. The state of their attributes remains unchanged. + * Objects which were marked as *deleted* within the lifespan of the + transaction are promoted back to the *persistent* state, corresponding to + their DELETE statement being rolled back. Note that if those objects were + first *pending* within the transaction, that operation takes precedence + instead. + * All objects not expunged are fully expired. + +With that state understood, the :class:`~sqlalchemy.orm.session.Session` may +safely continue usage after a rollback occurs. + +When a :meth:`~.Session.flush` fails, typically for +reasons like primary key, foreign key, or "not nullable" constraint +violations, a :meth:`~.Session.rollback` is issued +automatically (it's currently not possible for a flush to continue after a +partial failure). However, the flush process always uses its own transactional +demarcator called a *subtransaction*, which is described more fully in the +docstrings for :class:`~sqlalchemy.orm.session.Session`. What it means here is +that even though the database transaction has been rolled back, the end user +must still issue :meth:`~.Session.rollback` to fully +reset the state of the :class:`~sqlalchemy.orm.session.Session`. + + +Closing +------- + +The :meth:`~.Session.close` method issues a +:meth:`~.Session.expunge_all`, and :term:`releases` any +transactional/connection resources. When connections are returned to the +connection pool, transactional state is rolled back as well. + + diff --git a/doc/build/orm/session_state_management.rst b/doc/build/orm/session_state_management.rst new file mode 100644 index 0000000000..1ca7ca2e43 --- /dev/null +++ b/doc/build/orm/session_state_management.rst @@ -0,0 +1,560 @@ +State Management +================ + +.. _session_object_states: + +Quickie Intro to Object States +------------------------------ + +It's helpful to know the states which an instance can have within a session: + +* **Transient** - an instance that's not in a session, and is not saved to the + database; i.e. it has no database identity. The only relationship such an + object has to the ORM is that its class has a ``mapper()`` associated with + it. + +* **Pending** - when you :meth:`~.Session.add` a transient + instance, it becomes pending. It still wasn't actually flushed to the + database yet, but it will be when the next flush occurs. + +* **Persistent** - An instance which is present in the session and has a record + in the database. You get persistent instances by either flushing so that the + pending instances become persistent, or by querying the database for + existing instances (or moving persistent instances from other sessions into + your local session). + +* **Detached** - an instance which has a record in the database, but is not in + any session. There's nothing wrong with this, and you can use objects + normally when they're detached, **except** they will not be able to issue + any SQL in order to load collections or attributes which are not yet loaded, + or were marked as "expired". + +Knowing these states is important, since the +:class:`.Session` tries to be strict about ambiguous +operations (such as trying to save the same object to two different sessions +at the same time). + +Getting the Current State of an Object +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The actual state of any mapped object can be viewed at any time using +the :func:`.inspect` system:: + + >>> from sqlalchemy import inspect + >>> insp = inspect(my_object) + >>> insp.persistent + True + +.. seealso:: + + :attr:`.InstanceState.transient` + + :attr:`.InstanceState.pending` + + :attr:`.InstanceState.persistent` + + :attr:`.InstanceState.detached` + + +Session Attributes +------------------ + +The :class:`~sqlalchemy.orm.session.Session` itself acts somewhat like a +set-like collection. All items present may be accessed using the iterator +interface:: + + for obj in session: + print obj + +And presence may be tested for using regular "contains" semantics:: + + if obj in session: + print "Object is present" + +The session is also keeping track of all newly created (i.e. pending) objects, +all objects which have had changes since they were last loaded or saved (i.e. +"dirty"), and everything that's been marked as deleted:: + + # pending objects recently added to the Session + session.new + + # persistent objects which currently have changes detected + # (this collection is now created on the fly each time the property is called) + session.dirty + + # persistent objects that have been marked as deleted via session.delete(obj) + session.deleted + + # dictionary of all persistent objects, keyed on their + # identity key + session.identity_map + +(Documentation: :attr:`.Session.new`, :attr:`.Session.dirty`, +:attr:`.Session.deleted`, :attr:`.Session.identity_map`). + +Note that objects within the session are by default *weakly referenced*. This +means that when they are dereferenced in the outside application, they fall +out of scope from within the :class:`~sqlalchemy.orm.session.Session` as well +and are subject to garbage collection by the Python interpreter. The +exceptions to this include objects which are pending, objects which are marked +as deleted, or persistent objects which have pending changes on them. After a +full flush, these collections are all empty, and all objects are again weakly +referenced. To disable the weak referencing behavior and force all objects +within the session to remain until explicitly expunged, configure +:class:`.sessionmaker` with the ``weak_identity_map=False`` +setting. + +.. _unitofwork_merging: + +Merging +------- + +:meth:`~.Session.merge` transfers state from an +outside object into a new or already existing instance within a session. It +also reconciles the incoming data against the state of the +database, producing a history stream which will be applied towards the next +flush, or alternatively can be made to produce a simple "transfer" of +state without producing change history or accessing the database. Usage is as follows:: + + merged_object = session.merge(existing_object) + +When given an instance, it follows these steps: + +* It examines the primary key of the instance. If it's present, it attempts + to locate that instance in the local identity map. If the ``load=True`` + flag is left at its default, it also checks the database for this primary + key if not located locally. +* If the given instance has no primary key, or if no instance can be found + with the primary key given, a new instance is created. +* The state of the given instance is then copied onto the located/newly + created instance. For attributes which are present on the source + instance, the value is transferred to the target instance. For mapped + attributes which aren't present on the source, the attribute is + expired on the target instance, discarding its existing value. + + If the ``load=True`` flag is left at its default, + this copy process emits events and will load the target object's + unloaded collections for each attribute present on the source object, + so that the incoming state can be reconciled against what's + present in the database. If ``load`` + is passed as ``False``, the incoming data is "stamped" directly without + producing any history. +* The operation is cascaded to related objects and collections, as + indicated by the ``merge`` cascade (see :ref:`unitofwork_cascades`). +* The new instance is returned. + +With :meth:`~.Session.merge`, the given "source" +instance is not modified nor is it associated with the target :class:`.Session`, +and remains available to be merged with any number of other :class:`.Session` +objects. :meth:`~.Session.merge` is useful for +taking the state of any kind of object structure without regard for its +origins or current session associations and copying its state into a +new session. Here's some examples: + +* An application which reads an object structure from a file and wishes to + save it to the database might parse the file, build up the + structure, and then use + :meth:`~.Session.merge` to save it + to the database, ensuring that the data within the file is + used to formulate the primary key of each element of the + structure. Later, when the file has changed, the same + process can be re-run, producing a slightly different + object structure, which can then be ``merged`` in again, + and the :class:`~sqlalchemy.orm.session.Session` will + automatically update the database to reflect those + changes, loading each object from the database by primary key and + then updating its state with the new state given. + +* An application is storing objects in an in-memory cache, shared by + many :class:`.Session` objects simultaneously. :meth:`~.Session.merge` + is used each time an object is retrieved from the cache to create + a local copy of it in each :class:`.Session` which requests it. + The cached object remains detached; only its state is moved into + copies of itself that are local to individual :class:`~.Session` + objects. + + In the caching use case, it's common to use the ``load=False`` + flag to remove the overhead of reconciling the object's state + with the database. There's also a "bulk" version of + :meth:`~.Session.merge` called :meth:`~.Query.merge_result` + that was designed to work with cache-extended :class:`.Query` + objects - see the section :ref:`examples_caching`. + +* An application wants to transfer the state of a series of objects + into a :class:`.Session` maintained by a worker thread or other + concurrent system. :meth:`~.Session.merge` makes a copy of each object + to be placed into this new :class:`.Session`. At the end of the operation, + the parent thread/process maintains the objects it started with, + and the thread/worker can proceed with local copies of those objects. + + In the "transfer between threads/processes" use case, the application + may want to use the ``load=False`` flag as well to avoid overhead and + redundant SQL queries as the data is transferred. + +Merge Tips +~~~~~~~~~~ + +:meth:`~.Session.merge` is an extremely useful method for many purposes. However, +it deals with the intricate border between objects that are transient/detached and +those that are persistent, as well as the automated transference of state. +The wide variety of scenarios that can present themselves here often require a +more careful approach to the state of objects. Common problems with merge usually involve +some unexpected state regarding the object being passed to :meth:`~.Session.merge`. + +Lets use the canonical example of the User and Address objects:: + + class User(Base): + __tablename__ = 'user' + + id = Column(Integer, primary_key=True) + name = Column(String(50), nullable=False) + addresses = relationship("Address", backref="user") + + class Address(Base): + __tablename__ = 'address' + + id = Column(Integer, primary_key=True) + email_address = Column(String(50), nullable=False) + user_id = Column(Integer, ForeignKey('user.id'), nullable=False) + +Assume a ``User`` object with one ``Address``, already persistent:: + + >>> u1 = User(name='ed', addresses=[Address(email_address='ed@ed.com')]) + >>> session.add(u1) + >>> session.commit() + +We now create ``a1``, an object outside the session, which we'd like +to merge on top of the existing ``Address``:: + + >>> existing_a1 = u1.addresses[0] + >>> a1 = Address(id=existing_a1.id) + +A surprise would occur if we said this:: + + >>> a1.user = u1 + >>> a1 = session.merge(a1) + >>> session.commit() + sqlalchemy.orm.exc.FlushError: New instance
+ with identity key (, (1,)) conflicts with + persistent instance
+ +Why is that ? We weren't careful with our cascades. The assignment +of ``a1.user`` to a persistent object cascaded to the backref of ``User.addresses`` +and made our ``a1`` object pending, as though we had added it. Now we have +*two* ``Address`` objects in the session:: + + >>> a1 = Address() + >>> a1.user = u1 + >>> a1 in session + True + >>> existing_a1 in session + True + >>> a1 is existing_a1 + False + +Above, our ``a1`` is already pending in the session. The +subsequent :meth:`~.Session.merge` operation essentially +does nothing. Cascade can be configured via the :paramref:`~.relationship.cascade` +option on :func:`.relationship`, although in this case it +would mean removing the ``save-update`` cascade from the +``User.addresses`` relationship - and usually, that behavior +is extremely convenient. The solution here would usually be to not assign +``a1.user`` to an object already persistent in the target +session. + +The ``cascade_backrefs=False`` option of :func:`.relationship` +will also prevent the ``Address`` from +being added to the session via the ``a1.user = u1`` assignment. + +Further detail on cascade operation is at :ref:`unitofwork_cascades`. + +Another example of unexpected state:: + + >>> a1 = Address(id=existing_a1.id, user_id=u1.id) + >>> assert a1.user is None + >>> True + >>> a1 = session.merge(a1) + >>> session.commit() + sqlalchemy.exc.IntegrityError: (IntegrityError) address.user_id + may not be NULL + +Here, we accessed a1.user, which returned its default value +of ``None``, which as a result of this access, has been placed in the ``__dict__`` of +our object ``a1``. Normally, this operation creates no change event, +so the ``user_id`` attribute takes precedence during a +flush. But when we merge the ``Address`` object into the session, the operation +is equivalent to:: + + >>> existing_a1.id = existing_a1.id + >>> existing_a1.user_id = u1.id + >>> existing_a1.user = None + +Where above, both ``user_id`` and ``user`` are assigned to, and change events +are emitted for both. The ``user`` association +takes precedence, and None is applied to ``user_id``, causing a failure. + +Most :meth:`~.Session.merge` issues can be examined by first checking - +is the object prematurely in the session ? + +.. sourcecode:: python+sql + + >>> a1 = Address(id=existing_a1, user_id=user.id) + >>> assert a1 not in session + >>> a1 = session.merge(a1) + +Or is there state on the object that we don't want ? Examining ``__dict__`` +is a quick way to check:: + + >>> a1 = Address(id=existing_a1, user_id=user.id) + >>> a1.user + >>> a1.__dict__ + {'_sa_instance_state': , + 'user_id': 1, + 'id': 1, + 'user': None} + >>> # we don't want user=None merged, remove it + >>> del a1.user + >>> a1 = session.merge(a1) + >>> # success + >>> session.commit() + +Expunging +--------- + +Expunge removes an object from the Session, sending persistent instances to +the detached state, and pending instances to the transient state: + +.. sourcecode:: python+sql + + session.expunge(obj1) + +To remove all items, call :meth:`~.Session.expunge_all` +(this method was formerly known as ``clear()``). + +.. _session_expire: + +Refreshing / Expiring +--------------------- + +:term:`Expiring` means that the database-persisted data held inside a series +of object attributes is erased, in such a way that when those attributes +are next accessed, a SQL query is emitted which will refresh that data from +the database. + +When we talk about expiration of data we are usually talking about an object +that is in the :term:`persistent` state. For example, if we load an object +as follows:: + + user = session.query(User).filter_by(name='user1').first() + +The above ``User`` object is persistent, and has a series of attributes +present; if we were to look inside its ``__dict__``, we'd see that state +loaded:: + + >>> user.__dict__ + { + 'id': 1, 'name': u'user1', + '_sa_instance_state': <...>, + } + +where ``id`` and ``name`` refer to those columns in the database. +``_sa_instance_state`` is a non-database-persisted value used by SQLAlchemy +internally (it refers to the :class:`.InstanceState` for the instance. +While not directly relevant to this section, if we want to get at it, +we should use the :func:`.inspect` function to access it). + +At this point, the state in our ``User`` object matches that of the loaded +database row. But upon expiring the object using a method such as +:meth:`.Session.expire`, we see that the state is removed:: + + >>> session.expire(user) + >>> user.__dict__ + {'_sa_instance_state': <...>} + +We see that while the internal "state" still hangs around, the values which +correspond to the ``id`` and ``name`` columns are gone. If we were to access +one of these columns and are watching SQL, we'd see this: + +.. sourcecode:: python+sql + + >>> print(user.name) + {opensql}SELECT user.id AS user_id, user.name AS user_name + FROM user + WHERE user.id = ? + (1,) + {stop}user1 + +Above, upon accessing the expired attribute ``user.name``, the ORM initiated +a :term:`lazy load` to retrieve the most recent state from the database, +by emitting a SELECT for the user row to which this user refers. Afterwards, +the ``__dict__`` is again populated:: + + >>> user.__dict__ + { + 'id': 1, 'name': u'user1', + '_sa_instance_state': <...>, + } + +.. note:: While we are peeking inside of ``__dict__`` in order to see a bit + of what SQLAlchemy does with object attributes, we **should not modify** + the contents of ``__dict__`` directly, at least as far as those attributes + which the SQLAlchemy ORM is maintaining (other attributes outside of SQLA's + realm are fine). This is because SQLAlchemy uses :term:`descriptors` in + order to track the changes we make to an object, and when we modify ``__dict__`` + directly, the ORM won't be able to track that we changed something. + +Another key behavior of both :meth:`~.Session.expire` and :meth:`~.Session.refresh` +is that all un-flushed changes on an object are discarded. That is, +if we were to modify an attribute on our ``User``:: + + >>> user.name = 'user2' + +but then we call :meth:`~.Session.expire` without first calling :meth:`~.Session.flush`, +our pending value of ``'user2'`` is discarded:: + + >>> session.expire(user) + >>> user.name + 'user1' + +The :meth:`~.Session.expire` method can be used to mark as "expired" all ORM-mapped +attributes for an instance:: + + # expire all ORM-mapped attributes on obj1 + session.expire(obj1) + +it can also be passed a list of string attribute names, referring to specific +attributes to be marked as expired:: + + # expire only attributes obj1.attr1, obj1.attr2 + session.expire(obj1, ['attr1', 'attr2']) + +The :meth:`~.Session.refresh` method has a similar interface, but instead +of expiring, it emits an immediate SELECT for the object's row immediately:: + + # reload all attributes on obj1 + session.refresh(obj1) + +:meth:`~.Session.refresh` also accepts a list of string attribute names, +but unlike :meth:`~.Session.expire`, expects at least one name to +be that of a column-mapped attribute:: + + # reload obj1.attr1, obj1.attr2 + session.refresh(obj1, ['attr1', 'attr2']) + +The :meth:`.Session.expire_all` method allows us to essentially call +:meth:`.Session.expire` on all objects contained within the :class:`.Session` +at once:: + + session.expire_all() + +What Actually Loads +~~~~~~~~~~~~~~~~~~~ + +The SELECT statement that's emitted when an object marked with :meth:`~.Session.expire` +or loaded with :meth:`~.Session.refresh` varies based on several factors, including: + +* The load of expired attributes is triggered from **column-mapped attributes only**. + While any kind of attribute can be marked as expired, including a + :func:`.relationship` - mapped attribute, accessing an expired :func:`.relationship` + attribute will emit a load only for that attribute, using standard + relationship-oriented lazy loading. Column-oriented attributes, even if + expired, will not load as part of this operation, and instead will load when + any column-oriented attribute is accessed. + +* :func:`.relationship`- mapped attributes will not load in response to + expired column-based attributes being accessed. + +* Regarding relationships, :meth:`~.Session.refresh` is more restrictive than + :meth:`~.Session.expire` with regards to attributes that aren't column-mapped. + Calling :meth:`.refresh` and passing a list of names that only includes + relationship-mapped attributes will actually raise an error. + In any case, non-eager-loading :func:`.relationship` attributes will not be + included in any refresh operation. + +* :func:`.relationship` attributes configured as "eager loading" via the + :paramref:`~.relationship.lazy` parameter will load in the case of + :meth:`~.Session.refresh`, if either no attribute names are specified, or + if their names are inclued in the list of attributes to be + refreshed. + +* Attributes that are configured as :func:`.deferred` will not normally load, + during either the expired-attribute load or during a refresh. + An unloaded attribute that's :func:`.deferred` instead loads on its own when directly + accessed, or if part of a "group" of deferred attributes where an unloaded + attribute in that group is accessed. + +* For expired attributes that are loaded on access, a joined-inheritance table + mapping will emit a SELECT that typically only includes those tables for which + unloaded attributes are present. The action here is sophisticated enough + to load only the parent or child table, for example, if the subset of columns + that were originally expired encompass only one or the other of those tables. + +* When :meth:`~.Session.refresh` is used on a joined-inheritance table mapping, + the SELECT emitted will resemble that of when :meth:`.Session.query` is + used on the target object's class. This is typically all those tables that + are set up as part of the mapping. + + +When to Expire or Refresh +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The :class:`.Session` uses the expiration feature automatically whenever +the transaction referred to by the session ends. Meaning, whenever :meth:`.Session.commit` +or :meth:`.Session.rollback` is called, all objects within the :class:`.Session` +are expired, using a feature equivalent to that of the :meth:`.Session.expire_all` +method. The rationale is that the end of a transaction is a +demarcating point at which there is no more context available in order to know +what the current state of the database is, as any number of other transactions +may be affecting it. Only when a new transaction starts can we again have access +to the current state of the database, at which point any number of changes +may have occurred. + +.. sidebar:: Transaction Isolation + + Of course, most databases are capable of handling + multiple transactions at once, even involving the same rows of data. When + a relational database handles multiple transactions involving the same + tables or rows, this is when the :term:`isolation` aspect of the database comes + into play. The isolation behavior of different databases varies considerably + and even on a single database can be configured to behave in different ways + (via the so-called :term:`isolation level` setting). In that sense, the :class:`.Session` + can't fully predict when the same SELECT statement, emitted a second time, + will definitely return the data we already have, or will return new data. + So as a best guess, it assumes that within the scope of a transaction, unless + it is known that a SQL expression has been emitted to modify a particular row, + there's no need to refresh a row unless explicitly told to do so. + +The :meth:`.Session.expire` and :meth:`.Session.refresh` methods are used in +those cases when one wants to force an object to re-load its data from the +database, in those cases when it is known that the current state of data +is possibly stale. Reasons for this might include: + +* some SQL has been emitted within the transaction outside of the + scope of the ORM's object handling, such as if a :meth:`.Table.update` construct + were emitted using the :meth:`.Session.execute` method; + +* if the application + is attempting to acquire data that is known to have been modified in a + concurrent transaction, and it is also known that the isolation rules in effect + allow this data to be visible. + +The second bullet has the important caveat that "it is also known that the isolation rules in effect +allow this data to be visible." This means that it cannot be assumed that an +UPDATE that happened on another database connection will yet be visible here +locally; in many cases, it will not. This is why if one wishes to use +:meth:`.expire` or :meth:`.refresh` in order to view data between ongoing +transactions, an understanding of the isolation behavior in effect is essential. + +.. seealso:: + + :meth:`.Session.expire` + + :meth:`.Session.expire_all` + + :meth:`.Session.refresh` + + :term:`isolation` - glossary explanation of isolation which includes links + to Wikipedia. + + `The SQLAlchemy Session In-Depth `_ - a video + slides with an in-depth discussion of the object + lifecycle including the role of data expiration. diff --git a/doc/build/orm/session_transaction.rst b/doc/build/orm/session_transaction.rst new file mode 100644 index 0000000000..ce5757dd0b --- /dev/null +++ b/doc/build/orm/session_transaction.rst @@ -0,0 +1,365 @@ +======================================= +Transactions and Connection Management +======================================= + +.. _unitofwork_transaction: + +Managing Transactions +===================== + +A newly constructed :class:`.Session` may be said to be in the "begin" state. +In this state, the :class:`.Session` has not established any connection or +transactional state with any of the :class:`.Engine` objects that may be associated +with it. + +The :class:`.Session` then receives requests to operate upon a database connection. +Typically, this means it is called upon to execute SQL statements using a particular +:class:`.Engine`, which may be via :meth:`.Session.query`, :meth:`.Session.execute`, +or within a flush operation of pending data, which occurs when such state exists +and :meth:`.Session.commit` or :meth:`.Session.flush` is called. + +As these requests are received, each new :class:`.Engine` encountered is associated +with an ongoing transactional state maintained by the :class:`.Session`. +When the first :class:`.Engine` is operated upon, the :class:`.Session` can be said +to have left the "begin" state and entered "transactional" state. For each +:class:`.Engine` encountered, a :class:`.Connection` is associated with it, +which is acquired via the :meth:`.Engine.contextual_connect` method. If a +:class:`.Connection` was directly associated with the :class:`.Session` (see :ref:`session_external_transaction` +for an example of this), it is +added to the transactional state directly. + +For each :class:`.Connection`, the :class:`.Session` also maintains a :class:`.Transaction` object, +which is acquired by calling :meth:`.Connection.begin` on each :class:`.Connection`, +or if the :class:`.Session` +object has been established using the flag ``twophase=True``, a :class:`.TwoPhaseTransaction` +object acquired via :meth:`.Connection.begin_twophase`. These transactions are all committed or +rolled back corresponding to the invocation of the +:meth:`.Session.commit` and :meth:`.Session.rollback` methods. A commit operation will +also call the :meth:`.TwoPhaseTransaction.prepare` method on all transactions if applicable. + +When the transactional state is completed after a rollback or commit, the :class:`.Session` +:term:`releases` all :class:`.Transaction` and :class:`.Connection` resources, +and goes back to the "begin" state, which +will again invoke new :class:`.Connection` and :class:`.Transaction` objects as new +requests to emit SQL statements are received. + +The example below illustrates this lifecycle:: + + engine = create_engine("...") + Session = sessionmaker(bind=engine) + + # new session. no connections are in use. + session = Session() + try: + # first query. a Connection is acquired + # from the Engine, and a Transaction + # started. + item1 = session.query(Item).get(1) + + # second query. the same Connection/Transaction + # are used. + item2 = session.query(Item).get(2) + + # pending changes are created. + item1.foo = 'bar' + item2.bar = 'foo' + + # commit. The pending changes above + # are flushed via flush(), the Transaction + # is committed, the Connection object closed + # and discarded, the underlying DBAPI connection + # returned to the connection pool. + session.commit() + except: + # on rollback, the same closure of state + # as that of commit proceeds. + session.rollback() + raise + +.. _session_begin_nested: + +Using SAVEPOINT +--------------- + +SAVEPOINT transactions, if supported by the underlying engine, may be +delineated using the :meth:`~.Session.begin_nested` +method:: + + Session = sessionmaker() + session = Session() + session.add(u1) + session.add(u2) + + session.begin_nested() # establish a savepoint + session.add(u3) + session.rollback() # rolls back u3, keeps u1 and u2 + + session.commit() # commits u1 and u2 + +:meth:`~.Session.begin_nested` may be called any number +of times, which will issue a new SAVEPOINT with a unique identifier for each +call. For each :meth:`~.Session.begin_nested` call, a +corresponding :meth:`~.Session.rollback` or +:meth:`~.Session.commit` must be issued. (But note that if the return value is +used as a context manager, i.e. in a with-statement, then this rollback/commit +is issued by the context manager upon exiting the context, and so should not be +added explicitly.) + +When :meth:`~.Session.begin_nested` is called, a +:meth:`~.Session.flush` is unconditionally issued +(regardless of the ``autoflush`` setting). This is so that when a +:meth:`~.Session.rollback` occurs, the full state of the +session is expired, thus causing all subsequent attribute/instance access to +reference the full state of the :class:`~sqlalchemy.orm.session.Session` right +before :meth:`~.Session.begin_nested` was called. + +:meth:`~.Session.begin_nested`, in the same manner as the less often +used :meth:`~.Session.begin` method, returns a transactional object +which also works as a context manager. +It can be succinctly used around individual record inserts in order to catch +things like unique constraint exceptions:: + + for record in records: + try: + with session.begin_nested(): + session.merge(record) + except: + print "Skipped record %s" % record + session.commit() + +.. _session_autocommit: + +Autocommit Mode +--------------- + +The example of :class:`.Session` transaction lifecycle illustrated at +the start of :ref:`unitofwork_transaction` applies to a :class:`.Session` configured in the +default mode of ``autocommit=False``. Constructing a :class:`.Session` +with ``autocommit=True`` produces a :class:`.Session` placed into "autocommit" mode, where each SQL statement +invoked by a :meth:`.Session.query` or :meth:`.Session.execute` occurs +using a new connection from the connection pool, discarding it after +results have been iterated. The :meth:`.Session.flush` operation +still occurs within the scope of a single transaction, though this transaction +is closed out after the :meth:`.Session.flush` operation completes. + +.. warning:: + + "autocommit" mode should **not be considered for general use**. + If used, it should always be combined with the usage of + :meth:`.Session.begin` and :meth:`.Session.commit`, to ensure + a transaction demarcation. + + Executing queries outside of a demarcated transaction is a legacy mode + of usage, and can in some cases lead to concurrent connection + checkouts. + + In the absence of a demarcated transaction, the :class:`.Session` + cannot make appropriate decisions as to when autoflush should + occur nor when auto-expiration should occur, so these features + should be disabled with ``autoflush=False, expire_on_commit=False``. + +Modern usage of "autocommit" is for framework integrations that need to control +specifically when the "begin" state occurs. A session which is configured with +``autocommit=True`` may be placed into the "begin" state using the +:meth:`.Session.begin` method. +After the cycle completes upon :meth:`.Session.commit` or :meth:`.Session.rollback`, +connection and transaction resources are :term:`released` and the :class:`.Session` +goes back into "autocommit" mode, until :meth:`.Session.begin` is called again:: + + Session = sessionmaker(bind=engine, autocommit=True) + session = Session() + session.begin() + try: + item1 = session.query(Item).get(1) + item2 = session.query(Item).get(2) + item1.foo = 'bar' + item2.bar = 'foo' + session.commit() + except: + session.rollback() + raise + +The :meth:`.Session.begin` method also returns a transactional token which is +compatible with the Python 2.6 ``with`` statement:: + + Session = sessionmaker(bind=engine, autocommit=True) + session = Session() + with session.begin(): + item1 = session.query(Item).get(1) + item2 = session.query(Item).get(2) + item1.foo = 'bar' + item2.bar = 'foo' + +.. _session_subtransactions: + +Using Subtransactions with Autocommit +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +A subtransaction indicates usage of the :meth:`.Session.begin` method in conjunction with +the ``subtransactions=True`` flag. This produces a non-transactional, delimiting construct that +allows nesting of calls to :meth:`~.Session.begin` and :meth:`~.Session.commit`. +Its purpose is to allow the construction of code that can function within a transaction +both independently of any external code that starts a transaction, +as well as within a block that has already demarcated a transaction. + +``subtransactions=True`` is generally only useful in conjunction with +autocommit, and is equivalent to the pattern described at :ref:`connections_nested_transactions`, +where any number of functions can call :meth:`.Connection.begin` and :meth:`.Transaction.commit` +as though they are the initiator of the transaction, but in fact may be participating +in an already ongoing transaction:: + + # method_a starts a transaction and calls method_b + def method_a(session): + session.begin(subtransactions=True) + try: + method_b(session) + session.commit() # transaction is committed here + except: + session.rollback() # rolls back the transaction + raise + + # method_b also starts a transaction, but when + # called from method_a participates in the ongoing + # transaction. + def method_b(session): + session.begin(subtransactions=True) + try: + session.add(SomeObject('bat', 'lala')) + session.commit() # transaction is not committed yet + except: + session.rollback() # rolls back the transaction, in this case + # the one that was initiated in method_a(). + raise + + # create a Session and call method_a + session = Session(autocommit=True) + method_a(session) + session.close() + +Subtransactions are used by the :meth:`.Session.flush` process to ensure that the +flush operation takes place within a transaction, regardless of autocommit. When +autocommit is disabled, it is still useful in that it forces the :class:`.Session` +into a "pending rollback" state, as a failed flush cannot be resumed in mid-operation, +where the end user still maintains the "scope" of the transaction overall. + +.. _session_twophase: + +Enabling Two-Phase Commit +------------------------- + +For backends which support two-phase operaration (currently MySQL and +PostgreSQL), the session can be instructed to use two-phase commit semantics. +This will coordinate the committing of transactions across databases so that +the transaction is either committed or rolled back in all databases. You can +also :meth:`~.Session.prepare` the session for +interacting with transactions not managed by SQLAlchemy. To use two phase +transactions set the flag ``twophase=True`` on the session:: + + engine1 = create_engine('postgresql://db1') + engine2 = create_engine('postgresql://db2') + + Session = sessionmaker(twophase=True) + + # bind User operations to engine 1, Account operations to engine 2 + Session.configure(binds={User:engine1, Account:engine2}) + + session = Session() + + # .... work with accounts and users + + # commit. session will issue a flush to all DBs, and a prepare step to all DBs, + # before committing both transactions + session.commit() + +.. _session_external_transaction: + +Joining a Session into an External Transaction (such as for test suites) +======================================================================== + +If a :class:`.Connection` is being used which is already in a transactional +state (i.e. has a :class:`.Transaction` established), a :class:`.Session` can +be made to participate within that transaction by just binding the +:class:`.Session` to that :class:`.Connection`. The usual rationale for this +is a test suite that allows ORM code to work freely with a :class:`.Session`, +including the ability to call :meth:`.Session.commit`, where afterwards the +entire database interaction is rolled back:: + + from sqlalchemy.orm import sessionmaker + from sqlalchemy import create_engine + from unittest import TestCase + + # global application scope. create Session class, engine + Session = sessionmaker() + + engine = create_engine('postgresql://...') + + class SomeTest(TestCase): + def setUp(self): + # connect to the database + self.connection = engine.connect() + + # begin a non-ORM transaction + self.trans = self.connection.begin() + + # bind an individual Session to the connection + self.session = Session(bind=self.connection) + + def test_something(self): + # use the session in tests. + + self.session.add(Foo()) + self.session.commit() + + def tearDown(self): + self.session.close() + + # rollback - everything that happened with the + # Session above (including calls to commit()) + # is rolled back. + self.trans.rollback() + + # return connection to the Engine + self.connection.close() + +Above, we issue :meth:`.Session.commit` as well as +:meth:`.Transaction.rollback`. This is an example of where we take advantage +of the :class:`.Connection` object's ability to maintain *subtransactions*, or +nested begin/commit-or-rollback pairs where only the outermost begin/commit +pair actually commits the transaction, or if the outermost block rolls back, +everything is rolled back. + +.. topic:: Supporting Tests with Rollbacks + + The above recipe works well for any kind of database enabled test, except + for a test that needs to actually invoke :meth:`.Session.rollback` within + the scope of the test itself. The above recipe can be expanded, such + that the :class:`.Session` always runs all operations within the scope + of a SAVEPOINT, which is established at the start of each transaction, + so that tests can also rollback the "transaction" as well while still + remaining in the scope of a larger "transaction" that's never committed, + using two extra events:: + + from sqlalchemy import event + + class SomeTest(TestCase): + def setUp(self): + # connect to the database + self.connection = engine.connect() + + # begin a non-ORM transaction + self.trans = connection.begin() + + # bind an individual Session to the connection + self.session = Session(bind=self.connection) + + # start the session in a SAVEPOINT... + self.session.begin_nested() + + # then each time that SAVEPOINT ends, reopen it + @event.listens_for(self.session, "after_transaction_end") + def restart_savepoint(session, transaction): + if transaction.nested and not transaction._parent.nested: + session.begin_nested() + + + # ... the tearDown() method stays the same diff --git a/doc/build/orm/versioning.rst b/doc/build/orm/versioning.rst new file mode 100644 index 0000000000..35304086d0 --- /dev/null +++ b/doc/build/orm/versioning.rst @@ -0,0 +1,253 @@ +.. _mapper_version_counter: + +Configuring a Version Counter +============================= + +The :class:`.Mapper` supports management of a :term:`version id column`, which +is a single table column that increments or otherwise updates its value +each time an ``UPDATE`` to the mapped table occurs. This value is checked each +time the ORM emits an ``UPDATE`` or ``DELETE`` against the row to ensure that +the value held in memory matches the database value. + +.. warning:: + + Because the versioning feature relies upon comparison of the **in memory** + record of an object, the feature only applies to the :meth:`.Session.flush` + process, where the ORM flushes individual in-memory rows to the database. + It does **not** take effect when performing + a multirow UPDATE or DELETE using :meth:`.Query.update` or :meth:`.Query.delete` + methods, as these methods only emit an UPDATE or DELETE statement but otherwise + do not have direct access to the contents of those rows being affected. + +The purpose of this feature is to detect when two concurrent transactions +are modifying the same row at roughly the same time, or alternatively to provide +a guard against the usage of a "stale" row in a system that might be re-using +data from a previous transaction without refreshing (e.g. if one sets ``expire_on_commit=False`` +with a :class:`.Session`, it is possible to re-use the data from a previous +transaction). + +.. topic:: Concurrent transaction updates + + When detecting concurrent updates within transactions, it is typically the + case that the database's transaction isolation level is below the level of + :term:`repeatable read`; otherwise, the transaction will not be exposed + to a new row value created by a concurrent update which conflicts with + the locally updated value. In this case, the SQLAlchemy versioning + feature will typically not be useful for in-transaction conflict detection, + though it still can be used for cross-transaction staleness detection. + + The database that enforces repeatable reads will typically either have locked the + target row against a concurrent update, or is employing some form + of multi version concurrency control such that it will emit an error + when the transaction is committed. SQLAlchemy's version_id_col is an alternative + which allows version tracking to occur for specific tables within a transaction + that otherwise might not have this isolation level set. + + .. seealso:: + + `Repeatable Read Isolation Level `_ - Postgresql's implementation of repeatable read, including a description of the error condition. + +Simple Version Counting +----------------------- + +The most straightforward way to track versions is to add an integer column +to the mapped table, then establish it as the ``version_id_col`` within the +mapper options:: + + class User(Base): + __tablename__ = 'user' + + id = Column(Integer, primary_key=True) + version_id = Column(Integer, nullable=False) + name = Column(String(50), nullable=False) + + __mapper_args__ = { + "version_id_col": version_id + } + +Above, the ``User`` mapping tracks integer versions using the column +``version_id``. When an object of type ``User`` is first flushed, the +``version_id`` column will be given a value of "1". Then, an UPDATE +of the table later on will always be emitted in a manner similar to the +following:: + + UPDATE user SET version_id=:version_id, name=:name + WHERE user.id = :user_id AND user.version_id = :user_version_id + {"name": "new name", "version_id": 2, "user_id": 1, "user_version_id": 1} + +The above UPDATE statement is updating the row that not only matches +``user.id = 1``, it also is requiring that ``user.version_id = 1``, where "1" +is the last version identifier we've been known to use on this object. +If a transaction elsewhere has modified the row independently, this version id +will no longer match, and the UPDATE statement will report that no rows matched; +this is the condition that SQLAlchemy tests, that exactly one row matched our +UPDATE (or DELETE) statement. If zero rows match, that indicates our version +of the data is stale, and a :exc:`.StaleDataError` is raised. + +.. _custom_version_counter: + +Custom Version Counters / Types +------------------------------- + +Other kinds of values or counters can be used for versioning. Common types include +dates and GUIDs. When using an alternate type or counter scheme, SQLAlchemy +provides a hook for this scheme using the ``version_id_generator`` argument, +which accepts a version generation callable. This callable is passed the value of the current +known version, and is expected to return the subsequent version. + +For example, if we wanted to track the versioning of our ``User`` class +using a randomly generated GUID, we could do this (note that some backends +support a native GUID type, but we illustrate here using a simple string):: + + import uuid + + class User(Base): + __tablename__ = 'user' + + id = Column(Integer, primary_key=True) + version_uuid = Column(String(32)) + name = Column(String(50), nullable=False) + + __mapper_args__ = { + 'version_id_col':version_uuid, + 'version_id_generator':lambda version: uuid.uuid4().hex + } + +The persistence engine will call upon ``uuid.uuid4()`` each time a +``User`` object is subject to an INSERT or an UPDATE. In this case, our +version generation function can disregard the incoming value of ``version``, +as the ``uuid4()`` function +generates identifiers without any prerequisite value. If we were using +a sequential versioning scheme such as numeric or a special character system, +we could make use of the given ``version`` in order to help determine the +subsequent value. + +.. seealso:: + + :ref:`custom_guid_type` + +.. _server_side_version_counter: + +Server Side Version Counters +---------------------------- + +The ``version_id_generator`` can also be configured to rely upon a value +that is generated by the database. In this case, the database would need +some means of generating new identifiers when a row is subject to an INSERT +as well as with an UPDATE. For the UPDATE case, typically an update trigger +is needed, unless the database in question supports some other native +version identifier. The Postgresql database in particular supports a system +column called `xmin `_ +which provides UPDATE versioning. We can make use +of the Postgresql ``xmin`` column to version our ``User`` +class as follows:: + + class User(Base): + __tablename__ = 'user' + + id = Column(Integer, primary_key=True) + name = Column(String(50), nullable=False) + xmin = Column("xmin", Integer, system=True) + + __mapper_args__ = { + 'version_id_col': xmin, + 'version_id_generator': False + } + +With the above mapping, the ORM will rely upon the ``xmin`` column for +automatically providing the new value of the version id counter. + +.. topic:: creating tables that refer to system columns + + In the above scenario, as ``xmin`` is a system column provided by Postgresql, + we use the ``system=True`` argument to mark it as a system-provided + column, omitted from the ``CREATE TABLE`` statement. + + +The ORM typically does not actively fetch the values of database-generated +values when it emits an INSERT or UPDATE, instead leaving these columns as +"expired" and to be fetched when they are next accessed, unless the ``eager_defaults`` +:func:`.mapper` flag is set. However, when a +server side version column is used, the ORM needs to actively fetch the newly +generated value. This is so that the version counter is set up *before* +any concurrent transaction may update it again. This fetching is also +best done simultaneously within the INSERT or UPDATE statement using :term:`RETURNING`, +otherwise if emitting a SELECT statement afterwards, there is still a potential +race condition where the version counter may change before it can be fetched. + +When the target database supports RETURNING, an INSERT statement for our ``User`` class will look +like this:: + + INSERT INTO "user" (name) VALUES (%(name)s) RETURNING "user".id, "user".xmin + {'name': 'ed'} + +Where above, the ORM can acquire any newly generated primary key values along +with server-generated version identifiers in one statement. When the backend +does not support RETURNING, an additional SELECT must be emitted for **every** +INSERT and UPDATE, which is much less efficient, and also introduces the possibility of +missed version counters:: + + INSERT INTO "user" (name) VALUES (%(name)s) + {'name': 'ed'} + + SELECT "user".version_id AS user_version_id FROM "user" where + "user".id = :param_1 + {"param_1": 1} + +It is *strongly recommended* that server side version counters only be used +when absolutely necessary and only on backends that support :term:`RETURNING`, +e.g. Postgresql, Oracle, SQL Server (though SQL Server has +`major caveats `_ when triggers are used), Firebird. + +.. versionadded:: 0.9.0 + + Support for server side version identifier tracking. + +Programmatic or Conditional Version Counters +--------------------------------------------- + +When ``version_id_generator`` is set to False, we can also programmatically +(and conditionally) set the version identifier on our object in the same way +we assign any other mapped attribute. Such as if we used our UUID example, but +set ``version_id_generator`` to ``False``, we can set the version identifier +at our choosing:: + + import uuid + + class User(Base): + __tablename__ = 'user' + + id = Column(Integer, primary_key=True) + version_uuid = Column(String(32)) + name = Column(String(50), nullable=False) + + __mapper_args__ = { + 'version_id_col':version_uuid, + 'version_id_generator': False + } + + u1 = User(name='u1', version_uuid=uuid.uuid4()) + + session.add(u1) + + session.commit() + + u1.name = 'u2' + u1.version_uuid = uuid.uuid4() + + session.commit() + +We can update our ``User`` object without incrementing the version counter +as well; the value of the counter will remain unchanged, and the UPDATE +statement will still check against the previous value. This may be useful +for schemes where only certain classes of UPDATE are sensitive to concurrency +issues:: + + # will leave version_uuid unchanged + u1.name = 'u3' + session.commit() + +.. versionadded:: 0.9.0 + + Support for programmatic and conditional version identifier tracking. + diff --git a/doc/build/requirements.txt b/doc/build/requirements.txt index 34f031b0b6..3c26bea708 100644 --- a/doc/build/requirements.txt +++ b/doc/build/requirements.txt @@ -1,3 +1,3 @@ -mako changelog>=0.3.4 sphinx-paramlinks>=0.2.2 +zzzeeksphinx>=1.0.1 diff --git a/doc/build/static/detectmobile.js b/doc/build/static/detectmobile.js deleted file mode 100644 index f86b2d6507..0000000000 --- a/doc/build/static/detectmobile.js +++ /dev/null @@ -1,7 +0,0 @@ -/** - * jQuery.browser.mobile (http://detectmobilebrowser.com/) - * - * jQuery.browser.mobile will be true if the browser is a mobile device - * - **/ -(function(a){(jQuery.browser=jQuery.browser||{}).mobile=/(android|bb\d+|meego).+mobile|avantgo|bada\/|blackberry|blazer|compal|elaine|fennec|hiptop|iemobile|ip(hone|od)|iris|kindle|lge |maemo|midp|mmp|mobile.+firefox|netfront|opera m(ob|in)i|palm( os)?|phone|p(ixi|re)\/|plucker|pocket|psp|series(4|6)0|symbian|treo|up\.(browser|link)|vodafone|wap|windows (ce|phone)|xda|xiino/i.test(a)||/1207|6310|6590|3gso|4thp|50[1-6]i|770s|802s|a wa|abac|ac(er|oo|s\-)|ai(ko|rn)|al(av|ca|co)|amoi|an(ex|ny|yw)|aptu|ar(ch|go)|as(te|us)|attw|au(di|\-m|r |s )|avan|be(ck|ll|nq)|bi(lb|rd)|bl(ac|az)|br(e|v)w|bumb|bw\-(n|u)|c55\/|capi|ccwa|cdm\-|cell|chtm|cldc|cmd\-|co(mp|nd)|craw|da(it|ll|ng)|dbte|dc\-s|devi|dica|dmob|do(c|p)o|ds(12|\-d)|el(49|ai)|em(l2|ul)|er(ic|k0)|esl8|ez([4-7]0|os|wa|ze)|fetc|fly(\-|_)|g1 u|g560|gene|gf\-5|g\-mo|go(\.w|od)|gr(ad|un)|haie|hcit|hd\-(m|p|t)|hei\-|hi(pt|ta)|hp( i|ip)|hs\-c|ht(c(\-| |_|a|g|p|s|t)|tp)|hu(aw|tc)|i\-(20|go|ma)|i230|iac( |\-|\/)|ibro|idea|ig01|ikom|im1k|inno|ipaq|iris|ja(t|v)a|jbro|jemu|jigs|kddi|keji|kgt( |\/)|klon|kpt |kwc\-|kyo(c|k)|le(no|xi)|lg( g|\/(k|l|u)|50|54|\-[a-w])|libw|lynx|m1\-w|m3ga|m50\/|ma(te|ui|xo)|mc(01|21|ca)|m\-cr|me(rc|ri)|mi(o8|oa|ts)|mmef|mo(01|02|bi|de|do|t(\-| |o|v)|zz)|mt(50|p1|v )|mwbp|mywa|n10[0-2]|n20[2-3]|n30(0|2)|n50(0|2|5)|n7(0(0|1)|10)|ne((c|m)\-|on|tf|wf|wg|wt)|nok(6|i)|nzph|o2im|op(ti|wv)|oran|owg1|p800|pan(a|d|t)|pdxg|pg(13|\-([1-8]|c))|phil|pire|pl(ay|uc)|pn\-2|po(ck|rt|se)|prox|psio|pt\-g|qa\-a|qc(07|12|21|32|60|\-[2-7]|i\-)|qtek|r380|r600|raks|rim9|ro(ve|zo)|s55\/|sa(ge|ma|mm|ms|ny|va)|sc(01|h\-|oo|p\-)|sdk\/|se(c(\-|0|1)|47|mc|nd|ri)|sgh\-|shar|sie(\-|m)|sk\-0|sl(45|id)|sm(al|ar|b3|it|t5)|so(ft|ny)|sp(01|h\-|v\-|v )|sy(01|mb)|t2(18|50)|t6(00|10|18)|ta(gt|lk)|tcl\-|tdg\-|tel(i|m)|tim\-|t\-mo|to(pl|sh)|ts(70|m\-|m3|m5)|tx\-9|up(\.b|g1|si)|utst|v400|v750|veri|vi(rg|te)|vk(40|5[0-3]|\-v)|vm40|voda|vulc|vx(52|53|60|61|70|80|81|83|85|98)|w3c(\-| )|webc|whit|wi(g |nc|nw)|wmlb|wonu|x700|yas\-|your|zeto|zte\-/i.test(a.substr(0,4))})(navigator.userAgent||navigator.vendor||window.opera); \ No newline at end of file diff --git a/doc/build/static/docs.css b/doc/build/static/docs.css deleted file mode 100644 index a3ec2cef55..0000000000 --- a/doc/build/static/docs.css +++ /dev/null @@ -1,660 +0,0 @@ -/* global */ - -.body-background { - background-color: #FDFBFC; -} - -body { - background-color: #FDFBFC; - margin:0 38px; - color:#333333; -} - -a { - font-weight:normal; - text-decoration:none; -} - -form { - display:inline; -} - -/* hyperlinks */ - -a:link, a:visited, a:active { - /*color:#0000FF;*/ - color: #990000; -} -a:hover { - color: #FF0000; - /*color:#700000;*/ - text-decoration:underline; -} - -/* paragraph links after sections. - These aren't visible until hovering - over the tag, then have a - "reverse video" effect over the actual - link - */ - -a.headerlink { - font-size: 0.8em; - padding: 0 4px 0 4px; - text-decoration: none; - visibility: hidden; -} - -h1:hover > a.headerlink, -h2:hover > a.headerlink, -h3:hover > a.headerlink, -h4:hover > a.headerlink, -h5:hover > a.headerlink, -h6:hover > a.headerlink, -dt:hover > a.headerlink { - visibility: visible; -} - -a.headerlink:hover { - background-color: #990000; - color: white; -} - - -/* Container setup */ - -#docs-container { - max-width:1000px; - margin: 0 auto; - position: relative; -} - - -/* header/footer elements */ - -#docs-header h1 { - font-size:20px; - color: #222222; - margin: 0; - padding: 0; -} - -#docs-header { - font-family:Verdana,sans-serif; - - font-size:.9em; - position: relative; -} - -#docs-sidebar-popout, -#docs-bottom-navigation, -#index-nav { - font-family: Verdana, sans-serif; - background-color: #FBFBEE; - border: solid 1px #CCC; - font-size:.8em; -} - -#docs-bottom-navigation, -#index-nav { - padding:10px; -} - -#docs-sidebar-popout { - font-size:.75em; -} - -#docs-sidebar-popout p, -#docs-sidebar-popout form { - margin:5px 0 5px 0px; -} - -#docs-sidebar-popout h3 { - margin:0 0 10px 0; -} - - -#docs-version-header { - position: absolute; - right: 0; - bottom: 0; -} - -.docs-navigation-links { - font-family:Verdana,sans-serif; -} - -#docs-bottom-navigation { - float:right; - margin: 1em 0 1em 5px; -} - -#docs-copyright { - font-size:.85em; - padding:5px 0px; -} - -#docs-header h1, -#docs-top-navigation h1, -#docs-top-navigation h2 { - font-family:Tahoma,Geneva,sans-serif; - font-weight:normal; -} - -#docs-top-navigation h2 { - margin:16px 4px 7px 5px; - font-size:1.6em; -} - -#docs-top-page-control { - position: absolute; - right: 20px; - bottom: 14px; -} - -#docs-top-page-control ul { - padding:0; - margin:0; -} - -#docs-top-page-control li { - font-size:.9em; - list-style-type:none; - padding:1px 8px; -} - - -#docs-container .version-num { - font-weight: bold; -} - - -/* content container, sidebar */ - -#docs-body-container { -} - -#docs-body, -#docs-sidebar, -#index-nav - { - /*font-family: helvetica, arial, sans-serif; - font-size:.9em;*/ - - font-family: Verdana, sans-serif; - font-size:.85em; - line-height:1.5em; - -} - -#docs-body { - min-height: 700px; -} - -#docs-sidebar > ul { - font-size:.85em; -} - -#fixed-sidebar { - position: relative; -} - -#fixed-sidebar.withsidebar { - float: left; - width:224px; -} - -#fixed-sidebar.preautomated { - position: fixed; - float: none; - top:0; - bottom: 0; -} - -#fixed-sidebar.automated { - position: fixed; - float: none; - top: 120px; - min-height: 0; -} - - -#docs-sidebar { - font-size:.85em; - - border: solid 1px #CCC; - - z-index: 3; - background-color: #EFEFEF; -} - -#index-nav { - position: relative; - margin-top:10px; - padding:0 10px; -} - -#index-nav form { - padding-top:10px; - float:right; -} - -#sidebar-paginate { - position: absolute; - bottom: 4.5em; - left: 10px; -} - -#sidebar-topnav { - position: absolute; - bottom: 3em; - left: 10px; -} - -#sidebar-search { - position: absolute; - bottom: 1em; - left: 10px; -} - -#docs-sidebar { - top: 132px; - bottom: 0; - min-height: 0; - overflow-y: auto; - margin-top:5px; - width:212px; - padding-left:10px; -} - -#docs-sidebar-popout { - height:120px; - max-height: 120px; - width:212px; - padding-left:10px; - padding-top:10px; - position: relative; -} - - -#fixed-sidebar.preautomated #docs-sidebar, -#fixed-sidebar.preautomated #docs-sidebar-popout { - position:absolute; -} - -#fixed-sidebar.preautomated #docs-sidebar:after { - content: " "; - display:block; - height: 150px; -} - -#docs-sidebar h3, #docs-sidebar h4 { - background-color: #DDDDDD; - color: #222222; - font-family: Verdana,sans-serif; - font-size: 1.1em; - font-weight: normal; - margin: 10px 0 0 -15px; - padding: 5px 10px 5px 15px; - text-shadow: 1px 1px 0 white; - /*width:210px;*/ -} - -#docs-sidebar h3:first-child { - margin-top: 0px; -} - -#docs-sidebar h3 a, #docs-sidebar h4 a { - color: #222222; -} -#docs-sidebar ul { - margin: 10px 10px 10px 0px; - padding: 0; - list-style: none outside none; -} - - -#docs-sidebar ul ul { - margin-bottom: 0; - margin-top: 0; - list-style: square outside none; - margin-left: 20px; -} - - - - -#docs-body { - background-color:#FFFFFF; - padding:1px 10px 10px 10px; - - border: solid 1px #CCC; - margin-top:10px; -} - -#docs-body.withsidebar { - margin-left: 230px; -} - - -#docs-body h1, -#docs-body h2, -#docs-body h3, -#docs-body h4 { - font-family:Helvetica, Arial, sans-serif; -} - -#docs-body #sqlalchemy-documentation h1 { - /* hide the

for each content section. */ - display:none; - font-size:2.0em; -} - - -#docs-body h2 { - font-size:1.8em; - border-top:1px solid; - /*border-bottom:1px solid;*/ - padding-top:20px; -} - -#sqlalchemy-documentation h2 { - border-top:none; - padding-top:0; -} -#docs-body h3 { - font-size:1.4em; -} - -/* SQL popup, code styles */ - -.highlight { - background:none; -} - -#docs-container pre { - font-size:1.2em; -} - -#docs-container .pre { - font-size:1.1em; -} - -#docs-container pre { - background-color: #f0f0f0; - border: solid 1px #ccc; - box-shadow: 2px 2px 3px #DFDFDF; - padding:10px; - margin: 5px 0px 5px 0px; - overflow:auto; - line-height:1.3em; -} - -.popup_sql, .show_sql -{ - background-color: #FBFBEE; - padding:5px 10px; - margin:10px -5px; - border:1px dashed; -} - -/* the [SQL] links used to display SQL */ -#docs-container .sql_link -{ - font-weight:normal; - font-family: arial, sans-serif; - font-size:.9em; - text-transform: uppercase; - color:#990000; - border:1px solid; - padding:1px 2px 1px 2px; - margin:0px 10px 0px 15px; - float:right; - line-height:1.2em; -} - -#docs-container a.sql_link, -#docs-container .sql_link -{ - text-decoration: none; - padding:1px 2px; -} - -#docs-container a.sql_link:hover { - text-decoration: none; - color:#fff; - border:1px solid #900; - background-color: #900; -} - -/* changeset stuff */ - -#docs-container a.changeset-link { - font-size: 0.8em; - padding: 0 4px 0 4px; - text-decoration: none; -} - -/* docutils-specific elements */ - -th.field-name { - text-align:right; -} - -div.section { -} - -div.note, div.warning, p.deprecated, div.topic, div.admonition { - background-color:#EEFFEF; -} - -.footnote { - font-size: .95em; -} - -div.faq { - background-color: #EFEFEF; -} - -div.faq ul { - list-style: square outside none; -} - -div.admonition, div.topic, .deprecated, .versionadded, .versionchanged { - border:1px solid #CCCCCC; - padding:5px 10px; - font-size:.9em; - margin-top:5px; - box-shadow: 2px 2px 3px #DFDFDF; -} - -div.sidebar { - background-color: #FFFFEE; - border: 1px solid #DDDDBB; - float: right; - margin: 10px 0 10px 1em; - padding: 7px 7px 0; - width: 40%; - font-size:.9em; -} - -p.sidebar-title { - font-weight: bold; -} - -/* grrr sphinx changing your document structures, removing classes.... */ - -.versionadded .versionmodified, -.versionchanged .versionmodified, -.deprecated .versionmodified, -.versionadded > p:first-child > span:first-child, -.versionchanged > p:first-child > span:first-child, -.deprecated > p:first-child > span:first-child -{ - background-color: #ECF0F3; - color: #990000; - font-style: italic; -} - - -div.inherited-member { - border:1px solid #CCCCCC; - padding:5px 5px; - font-size:.9em; - box-shadow: 2px 2px 3px #DFDFDF; -} - -div.warning .admonition-title { - color:#FF0000; -} - -div.admonition .admonition-title, div.topic .topic-title { - font-weight:bold; -} - -.viewcode-back, .viewcode-link { - float:right; -} - -dl.function > dt, -dl.attribute > dt, -dl.classmethod > dt, -dl.method > dt, -dl.class > dt, -dl.exception > dt -{ - background-color: #EFEFEF; - margin:25px -10px 10px 10px; - padding: 0px 10px; -} - - -dl.glossary > dt { - font-weight:bold; - font-size:1.1em; - padding-top:10px; -} - - -dt:target, span.highlight { - background-color:#FBE54E; -} - -a.headerlink { - font-size: 0.8em; - padding: 0 4px 0 4px; - text-decoration: none; - visibility: hidden; -} - -h1:hover > a.headerlink, -h2:hover > a.headerlink, -h3:hover > a.headerlink, -h4:hover > a.headerlink, -h5:hover > a.headerlink, -h6:hover > a.headerlink, -dt:hover > a.headerlink { - visibility: visible; -} - -a.headerlink:hover { - background-color: #00f; - color: white; -} - -.clearboth { - clear:both; -} - -tt.descname { - background-color:transparent; - font-size:1.2em; - font-weight:bold; -} - -tt.descclassname { - background-color:transparent; -} - -tt { - background-color:#ECF0F3; - padding:0 1px; -} - -/* syntax highlighting overrides */ -.k, .kn {color:#0908CE;} -.o {color:#BF0005;} -.go {color:#804049;} - - -/* special "index page" sections - with specific formatting -*/ - -div#sqlalchemy-documentation { - font-size:.95em; -} -div#sqlalchemy-documentation em { - font-style:normal; -} -div#sqlalchemy-documentation .rubric{ - font-size:14px; - background-color:#EEFFEF; - padding:5px; - border:1px solid #BFBFBF; -} -div#sqlalchemy-documentation a, div#sqlalchemy-documentation li { - padding:5px 0px; -} - -div#getting-started { - border-bottom:1px solid; -} - -div#sqlalchemy-documentation div#sqlalchemy-orm { - float:left; - width:48%; -} - -div#sqlalchemy-documentation div#sqlalchemy-core { - float:left; - width:48%; - margin:0; - padding-left:10px; - border-left:1px solid; -} - -div#dialect-documentation { - border-top:1px solid; - /*clear:left;*/ -} - -div .versionwarning, -div .version-warning { - font-size:12px; - font-color:red; - border:1px solid; - padding:4px 4px; - margin:8px 0px 2px 0px; - background:#FFBBBB; -} - -/*div .event-signatures { - background-color:#F0F0FD; - padding:0 10px; - border:1px solid #BFBFBF; -}*/ - -/*dl div.floatything { - display:none; - position:fixed; - top:25px; - left:40px; - font-size:.95em; - font-weight: bold; - border:1px solid; - background-color: #FFF; -} -dl:hover div.floatything { - display:block; -}*/ diff --git a/doc/build/static/init.js b/doc/build/static/init.js deleted file mode 100644 index 4bcb4411da..0000000000 --- a/doc/build/static/init.js +++ /dev/null @@ -1,44 +0,0 @@ - -function initSQLPopups() { - $('div.popup_sql').hide(); - $('a.sql_link').click(function() { - $(this).nextAll('div.popup_sql:first').toggle(); - return false; - }); -} - -var automatedBreakpoint = -1; - -function initFloatyThings() { - - automatedBreakpoint = $("#docs-container").position().top + $("#docs-top-navigation-container").height(); - - $("#fixed-sidebar.withsidebar").addClass("preautomated"); - - - function setScroll() { - - var scrolltop = $(window).scrollTop(); - if (scrolltop >= automatedBreakpoint) { - $("#fixed-sidebar.withsidebar").css("top", 5); - } - else { - $("#fixed-sidebar.withsidebar").css( - "top", $("#docs-body").offset().top - Math.max(scrolltop, 0)); - } - - - } - $(window).scroll(setScroll) - - setScroll(); -} - - -$(document).ready(function() { - initSQLPopups(); - if (!$.browser.mobile) { - initFloatyThings(); - } -}); - diff --git a/doc/build/templates/genindex.mako b/doc/build/templates/genindex.mako deleted file mode 100644 index 9ea6795bc5..0000000000 --- a/doc/build/templates/genindex.mako +++ /dev/null @@ -1,77 +0,0 @@ -<%inherit file="layout.mako"/> - -<%block name="show_title" filter="util.striptags"> - ${_('Index')} - - -

${_('Index')}

- - % for i, (key, dummy) in enumerate(genindexentries): - ${i != 0 and '| ' or ''}${key} - % endfor - -
- - % for i, (key, entries) in enumerate(genindexentries): -

${key}

-
-
- <% - breakat = genindexcounts[i] // 2 - numcols = 1 - numitems = 0 - %> -% for entryname, (links, subitems) in entries: - -
- % if links: - ${entryname|h} - % for unknown, link in links[1:]: - , [${i}] - % endfor - % else: - ${entryname|h} - % endif -
- - % if subitems: -
- % for subentryname, subentrylinks in subitems: -
${subentryname|h} - % for j, (unknown, link) in enumerate(subentrylinks[1:]): - [${j}] - % endfor -
- % endfor -
- % endif - - <% - numitems = numitems + 1 + len(subitems) - %> - % if numcols <2 and numitems > breakat: - <% - numcols = numcols + 1 - %> -
- % endif - -% endfor -
-
-% endfor - -<%def name="sidebarrel()"> -% if split_index: -

${_('Index')}

-

- % for i, (key, dummy) in enumerate(genindexentries): - ${i > 0 and '| ' or ''} - ${key} - % endfor -

- -

${_('Full index on one page')}

-% endif - ${parent.sidebarrel()} - diff --git a/doc/build/templates/layout.mako b/doc/build/templates/layout.mako deleted file mode 100644 index 23e57129b5..0000000000 --- a/doc/build/templates/layout.mako +++ /dev/null @@ -1,243 +0,0 @@ -## coding: utf-8 - -<%! - local_script_files = [] - - default_css_files = [ - '_static/pygments.css', - '_static/docs.css', - ] -%> - - -<%doc> - Structural elements are all prefixed with "docs-" - to prevent conflicts when the structure is integrated into the - main site. - - docs-container -> - docs-top-navigation-container -> - docs-header -> - docs-version-header - docs-top-navigation - docs-top-page-control - docs-navigation-banner - docs-body-container -> - docs-sidebar - docs-body - docs-bottom-navigation - docs-copyright - - -<%inherit file="${context['base']}"/> - -<% - if builder == 'epub': - next.body() - return -%> - - -<% -withsidebar = bool(toc) and current_page_name != 'index' -%> - -<%block name="head_title"> - % if current_page_name != 'index': - ${capture(self.show_title) | util.striptags} — - % endif - ${docstitle|h} - - - -
- - -<%block name="headers"> - - ${parent.headers()} - - - - - - - % for scriptfile in script_files + self.attr.local_script_files: - - % endfor - - - - - % if hasdoc('about'): - - % endif - - - % if hasdoc('copyright'): - - % endif - - % if parents: - - % endif - % if nexttopic: - - % endif - % if prevtopic: - - % endif - - - - - -
-
-
- Release: ${release} | Release Date: ${release_date} -
- -

${docstitle|h}

- -
-
- -
- -
- - % if not withsidebar: -
- - -

- Contents | - Index - % if pdf_url: - | Download as PDF - % endif -

- -
- % endif - - % if withsidebar: -
-

${docstitle|h}

- - - - - - - -
- -
- -

\ - <%block name="show_title"> - ${title} - -

- ${toc} - - % if rtd: -

Project Versions

-
    -
- % endif - - -
- % endif - -
- - <%doc> -
- ${docstitle|h} - % if parents: - % for parent in parents: - » ${parent['title']} - % endfor - % endif - % if current_page_name != 'index': - » ${self.show_title()} - % endif - -

- <%block name="show_title"> - ${title} - -

- -
- - -
- ${next.body()} -
- -
- - - -
diff --git a/doc/build/templates/page.mako b/doc/build/templates/page.mako deleted file mode 100644 index e0f98cf648..0000000000 --- a/doc/build/templates/page.mako +++ /dev/null @@ -1,2 +0,0 @@ -<%inherit file="layout.mako"/> -${body| util.strip_toplevel_anchors} \ No newline at end of file diff --git a/doc/build/templates/search.mako b/doc/build/templates/search.mako deleted file mode 100644 index d0aa3d8259..0000000000 --- a/doc/build/templates/search.mako +++ /dev/null @@ -1,21 +0,0 @@ -<%inherit file="layout.mako"/> - -<%! - local_script_files = ['_static/searchtools.js'] -%> -<%block name="show_title"> - ${_('Search')} - - -<%block name="headers"> - ${parent.headers()} - - - -
- -<%block name="footer"> - ${parent.footer()} - diff --git a/doc/build/templates/static_base.mako b/doc/build/templates/static_base.mako deleted file mode 100644 index 9eb5ec0463..0000000000 --- a/doc/build/templates/static_base.mako +++ /dev/null @@ -1,29 +0,0 @@ - - - - - - ${metatags and metatags or ''} - - <%block name="head_title"> - </%block> - - - <%block name="css"> - - % for cssfile in self.attr.default_css_files + css_files: - - % endfor - - - - <%block name="headers"/> - - - ${next.body()} - <%block name="footer"/> - - - - diff --git a/lib/sqlalchemy/dialects/sqlite/base.py b/lib/sqlalchemy/dialects/sqlite/base.py index cf02991d27..666b9ed128 100644 --- a/lib/sqlalchemy/dialects/sqlite/base.py +++ b/lib/sqlalchemy/dialects/sqlite/base.py @@ -93,7 +93,7 @@ file-based architecture and additionally will usually require workarounds to work when using the pysqlite driver. Transaction Isolation Level -=========================== +---------------------------- SQLite supports "transaction isolation" in a non-standard way, along two axes. One is that of the `PRAGMA read_uncommitted `_ @@ -126,7 +126,7 @@ by *not even emitting BEGIN* until the first write operation. for techniques to work around this behavior. SAVEPOINT Support -================= +---------------------------- SQLite supports SAVEPOINTs, which only function once a transaction is begun. SQLAlchemy's SAVEPOINT support is available using the @@ -142,7 +142,7 @@ won't work at all with pysqlite unless workarounds are taken. for techniques to work around this behavior. Transactional DDL -================= +---------------------------- The SQLite database supports transactional :term:`DDL` as well. In this case, the pysqlite driver is not only failing to start transactions, diff --git a/lib/sqlalchemy/ext/declarative/__init__.py b/lib/sqlalchemy/ext/declarative/__init__.py index 3cbc85c0c0..c42f252469 100644 --- a/lib/sqlalchemy/ext/declarative/__init__.py +++ b/lib/sqlalchemy/ext/declarative/__init__.py @@ -5,1310 +5,10 @@ # This module is part of SQLAlchemy and is released under # the MIT License: http://www.opensource.org/licenses/mit-license.php -""" -Synopsis -======== - -SQLAlchemy object-relational configuration involves the -combination of :class:`.Table`, :func:`.mapper`, and class -objects to define a mapped class. -:mod:`~sqlalchemy.ext.declarative` allows all three to be -expressed at once within the class declaration. As much as -possible, regular SQLAlchemy schema and ORM constructs are -used directly, so that configuration between "classical" ORM -usage and declarative remain highly similar. - -As a simple example:: - - from sqlalchemy.ext.declarative import declarative_base - - Base = declarative_base() - - class SomeClass(Base): - __tablename__ = 'some_table' - id = Column(Integer, primary_key=True) - name = Column(String(50)) - -Above, the :func:`declarative_base` callable returns a new base class from -which all mapped classes should inherit. When the class definition is -completed, a new :class:`.Table` and :func:`.mapper` will have been generated. - -The resulting table and mapper are accessible via -``__table__`` and ``__mapper__`` attributes on the -``SomeClass`` class:: - - # access the mapped Table - SomeClass.__table__ - - # access the Mapper - SomeClass.__mapper__ - -Defining Attributes -=================== - -In the previous example, the :class:`.Column` objects are -automatically named with the name of the attribute to which they are -assigned. - -To name columns explicitly with a name distinct from their mapped attribute, -just give the column a name. Below, column "some_table_id" is mapped to the -"id" attribute of `SomeClass`, but in SQL will be represented as -"some_table_id":: - - class SomeClass(Base): - __tablename__ = 'some_table' - id = Column("some_table_id", Integer, primary_key=True) - -Attributes may be added to the class after its construction, and they will be -added to the underlying :class:`.Table` and -:func:`.mapper` definitions as appropriate:: - - SomeClass.data = Column('data', Unicode) - SomeClass.related = relationship(RelatedInfo) - -Classes which are constructed using declarative can interact freely -with classes that are mapped explicitly with :func:`.mapper`. - -It is recommended, though not required, that all tables -share the same underlying :class:`~sqlalchemy.schema.MetaData` object, -so that string-configured :class:`~sqlalchemy.schema.ForeignKey` -references can be resolved without issue. - -Accessing the MetaData -======================= - -The :func:`declarative_base` base class contains a -:class:`.MetaData` object where newly defined -:class:`.Table` objects are collected. This object is -intended to be accessed directly for -:class:`.MetaData`-specific operations. Such as, to issue -CREATE statements for all tables:: - - engine = create_engine('sqlite://') - Base.metadata.create_all(engine) - -:func:`declarative_base` can also receive a pre-existing -:class:`.MetaData` object, which allows a -declarative setup to be associated with an already -existing traditional collection of :class:`~sqlalchemy.schema.Table` -objects:: - - mymetadata = MetaData() - Base = declarative_base(metadata=mymetadata) - - -.. _declarative_configuring_relationships: - -Configuring Relationships -========================= - -Relationships to other classes are done in the usual way, with the added -feature that the class specified to :func:`~sqlalchemy.orm.relationship` -may be a string name. The "class registry" associated with ``Base`` -is used at mapper compilation time to resolve the name into the actual -class object, which is expected to have been defined once the mapper -configuration is used:: - - class User(Base): - __tablename__ = 'users' - - id = Column(Integer, primary_key=True) - name = Column(String(50)) - addresses = relationship("Address", backref="user") - - class Address(Base): - __tablename__ = 'addresses' - - id = Column(Integer, primary_key=True) - email = Column(String(50)) - user_id = Column(Integer, ForeignKey('users.id')) - -Column constructs, since they are just that, are immediately usable, -as below where we define a primary join condition on the ``Address`` -class using them:: - - class Address(Base): - __tablename__ = 'addresses' - - id = Column(Integer, primary_key=True) - email = Column(String(50)) - user_id = Column(Integer, ForeignKey('users.id')) - user = relationship(User, primaryjoin=user_id == User.id) - -In addition to the main argument for :func:`~sqlalchemy.orm.relationship`, -other arguments which depend upon the columns present on an as-yet -undefined class may also be specified as strings. These strings are -evaluated as Python expressions. The full namespace available within -this evaluation includes all classes mapped for this declarative base, -as well as the contents of the ``sqlalchemy`` package, including -expression functions like :func:`~sqlalchemy.sql.expression.desc` and -:attr:`~sqlalchemy.sql.expression.func`:: - - class User(Base): - # .... - addresses = relationship("Address", - order_by="desc(Address.email)", - primaryjoin="Address.user_id==User.id") - -For the case where more than one module contains a class of the same name, -string class names can also be specified as module-qualified paths -within any of these string expressions:: - - class User(Base): - # .... - addresses = relationship("myapp.model.address.Address", - order_by="desc(myapp.model.address.Address.email)", - primaryjoin="myapp.model.address.Address.user_id==" - "myapp.model.user.User.id") - -The qualified path can be any partial path that removes ambiguity between -the names. For example, to disambiguate between -``myapp.model.address.Address`` and ``myapp.model.lookup.Address``, -we can specify ``address.Address`` or ``lookup.Address``:: - - class User(Base): - # .... - addresses = relationship("address.Address", - order_by="desc(address.Address.email)", - primaryjoin="address.Address.user_id==" - "User.id") - -.. versionadded:: 0.8 - module-qualified paths can be used when specifying string arguments - with Declarative, in order to specify specific modules. - -Two alternatives also exist to using string-based attributes. A lambda -can also be used, which will be evaluated after all mappers have been -configured:: - - class User(Base): - # ... - addresses = relationship(lambda: Address, - order_by=lambda: desc(Address.email), - primaryjoin=lambda: Address.user_id==User.id) - -Or, the relationship can be added to the class explicitly after the classes -are available:: - - User.addresses = relationship(Address, - primaryjoin=Address.user_id==User.id) - - - -.. _declarative_many_to_many: - -Configuring Many-to-Many Relationships -====================================== - -Many-to-many relationships are also declared in the same way -with declarative as with traditional mappings. The -``secondary`` argument to -:func:`.relationship` is as usual passed a -:class:`.Table` object, which is typically declared in the -traditional way. The :class:`.Table` usually shares -the :class:`.MetaData` object used by the declarative base:: - - keywords = Table( - 'keywords', Base.metadata, - Column('author_id', Integer, ForeignKey('authors.id')), - Column('keyword_id', Integer, ForeignKey('keywords.id')) - ) - - class Author(Base): - __tablename__ = 'authors' - id = Column(Integer, primary_key=True) - keywords = relationship("Keyword", secondary=keywords) - -Like other :func:`~sqlalchemy.orm.relationship` arguments, a string is accepted -as well, passing the string name of the table as defined in the -``Base.metadata.tables`` collection:: - - class Author(Base): - __tablename__ = 'authors' - id = Column(Integer, primary_key=True) - keywords = relationship("Keyword", secondary="keywords") - -As with traditional mapping, its generally not a good idea to use -a :class:`.Table` as the "secondary" argument which is also mapped to -a class, unless the :func:`.relationship` is declared with ``viewonly=True``. -Otherwise, the unit-of-work system may attempt duplicate INSERT and -DELETE statements against the underlying table. - -.. _declarative_sql_expressions: - -Defining SQL Expressions -======================== - -See :ref:`mapper_sql_expressions` for examples on declaratively -mapping attributes to SQL expressions. - -.. _declarative_table_args: - -Table Configuration -=================== - -Table arguments other than the name, metadata, and mapped Column -arguments are specified using the ``__table_args__`` class attribute. -This attribute accommodates both positional as well as keyword -arguments that are normally sent to the -:class:`~sqlalchemy.schema.Table` constructor. -The attribute can be specified in one of two forms. One is as a -dictionary:: - - class MyClass(Base): - __tablename__ = 'sometable' - __table_args__ = {'mysql_engine':'InnoDB'} - -The other, a tuple, where each argument is positional -(usually constraints):: - - class MyClass(Base): - __tablename__ = 'sometable' - __table_args__ = ( - ForeignKeyConstraint(['id'], ['remote_table.id']), - UniqueConstraint('foo'), - ) - -Keyword arguments can be specified with the above form by -specifying the last argument as a dictionary:: - - class MyClass(Base): - __tablename__ = 'sometable' - __table_args__ = ( - ForeignKeyConstraint(['id'], ['remote_table.id']), - UniqueConstraint('foo'), - {'autoload':True} - ) - -Using a Hybrid Approach with __table__ -======================================= - -As an alternative to ``__tablename__``, a direct -:class:`~sqlalchemy.schema.Table` construct may be used. The -:class:`~sqlalchemy.schema.Column` objects, which in this case require -their names, will be added to the mapping just like a regular mapping -to a table:: - - class MyClass(Base): - __table__ = Table('my_table', Base.metadata, - Column('id', Integer, primary_key=True), - Column('name', String(50)) - ) - -``__table__`` provides a more focused point of control for establishing -table metadata, while still getting most of the benefits of using declarative. -An application that uses reflection might want to load table metadata elsewhere -and pass it to declarative classes:: - - from sqlalchemy.ext.declarative import declarative_base - - Base = declarative_base() - Base.metadata.reflect(some_engine) - - class User(Base): - __table__ = metadata.tables['user'] - - class Address(Base): - __table__ = metadata.tables['address'] - -Some configuration schemes may find it more appropriate to use ``__table__``, -such as those which already take advantage of the data-driven nature of -:class:`.Table` to customize and/or automate schema definition. - -Note that when the ``__table__`` approach is used, the object is immediately -usable as a plain :class:`.Table` within the class declaration body itself, -as a Python class is only another syntactical block. Below this is illustrated -by using the ``id`` column in the ``primaryjoin`` condition of a -:func:`.relationship`:: - - class MyClass(Base): - __table__ = Table('my_table', Base.metadata, - Column('id', Integer, primary_key=True), - Column('name', String(50)) - ) - - widgets = relationship(Widget, - primaryjoin=Widget.myclass_id==__table__.c.id) - -Similarly, mapped attributes which refer to ``__table__`` can be placed inline, -as below where we assign the ``name`` column to the attribute ``_name``, -generating a synonym for ``name``:: - - from sqlalchemy.ext.declarative import synonym_for - - class MyClass(Base): - __table__ = Table('my_table', Base.metadata, - Column('id', Integer, primary_key=True), - Column('name', String(50)) - ) - - _name = __table__.c.name - - @synonym_for("_name") - def name(self): - return "Name: %s" % _name - -Using Reflection with Declarative -================================= - -It's easy to set up a :class:`.Table` that uses ``autoload=True`` -in conjunction with a mapped class:: - - class MyClass(Base): - __table__ = Table('mytable', Base.metadata, - autoload=True, autoload_with=some_engine) - -However, one improvement that can be made here is to not -require the :class:`.Engine` to be available when classes are -being first declared. To achieve this, use the -:class:`.DeferredReflection` mixin, which sets up mappings -only after a special ``prepare(engine)`` step is called:: - - from sqlalchemy.ext.declarative import declarative_base, DeferredReflection - - Base = declarative_base(cls=DeferredReflection) - - class Foo(Base): - __tablename__ = 'foo' - bars = relationship("Bar") - - class Bar(Base): - __tablename__ = 'bar' - - # illustrate overriding of "bar.foo_id" to have - # a foreign key constraint otherwise not - # reflected, such as when using MySQL - foo_id = Column(Integer, ForeignKey('foo.id')) - - Base.prepare(e) - -.. versionadded:: 0.8 - Added :class:`.DeferredReflection`. - -Mapper Configuration -==================== - -Declarative makes use of the :func:`~.orm.mapper` function internally -when it creates the mapping to the declared table. The options -for :func:`~.orm.mapper` are passed directly through via the -``__mapper_args__`` class attribute. As always, arguments which reference -locally mapped columns can reference them directly from within the -class declaration:: - - from datetime import datetime - - class Widget(Base): - __tablename__ = 'widgets' - - id = Column(Integer, primary_key=True) - timestamp = Column(DateTime, nullable=False) - - __mapper_args__ = { - 'version_id_col': timestamp, - 'version_id_generator': lambda v:datetime.now() - } - -.. _declarative_inheritance: - -Inheritance Configuration -========================= - -Declarative supports all three forms of inheritance as intuitively -as possible. The ``inherits`` mapper keyword argument is not needed -as declarative will determine this from the class itself. The various -"polymorphic" keyword arguments are specified using ``__mapper_args__``. - -Joined Table Inheritance -~~~~~~~~~~~~~~~~~~~~~~~~ - -Joined table inheritance is defined as a subclass that defines its own -table:: - - class Person(Base): - __tablename__ = 'people' - id = Column(Integer, primary_key=True) - discriminator = Column('type', String(50)) - __mapper_args__ = {'polymorphic_on': discriminator} - - class Engineer(Person): - __tablename__ = 'engineers' - __mapper_args__ = {'polymorphic_identity': 'engineer'} - id = Column(Integer, ForeignKey('people.id'), primary_key=True) - primary_language = Column(String(50)) - -Note that above, the ``Engineer.id`` attribute, since it shares the -same attribute name as the ``Person.id`` attribute, will in fact -represent the ``people.id`` and ``engineers.id`` columns together, -with the "Engineer.id" column taking precedence if queried directly. -To provide the ``Engineer`` class with an attribute that represents -only the ``engineers.id`` column, give it a different attribute name:: - - class Engineer(Person): - __tablename__ = 'engineers' - __mapper_args__ = {'polymorphic_identity': 'engineer'} - engineer_id = Column('id', Integer, ForeignKey('people.id'), - primary_key=True) - primary_language = Column(String(50)) - - -.. versionchanged:: 0.7 joined table inheritance favors the subclass - column over that of the superclass, such as querying above - for ``Engineer.id``. Prior to 0.7 this was the reverse. - -.. _declarative_single_table: - -Single Table Inheritance -~~~~~~~~~~~~~~~~~~~~~~~~ - -Single table inheritance is defined as a subclass that does not have -its own table; you just leave out the ``__table__`` and ``__tablename__`` -attributes:: - - class Person(Base): - __tablename__ = 'people' - id = Column(Integer, primary_key=True) - discriminator = Column('type', String(50)) - __mapper_args__ = {'polymorphic_on': discriminator} - - class Engineer(Person): - __mapper_args__ = {'polymorphic_identity': 'engineer'} - primary_language = Column(String(50)) - -When the above mappers are configured, the ``Person`` class is mapped -to the ``people`` table *before* the ``primary_language`` column is -defined, and this column will not be included in its own mapping. -When ``Engineer`` then defines the ``primary_language`` column, the -column is added to the ``people`` table so that it is included in the -mapping for ``Engineer`` and is also part of the table's full set of -columns. Columns which are not mapped to ``Person`` are also excluded -from any other single or joined inheriting classes using the -``exclude_properties`` mapper argument. Below, ``Manager`` will have -all the attributes of ``Person`` and ``Manager`` but *not* the -``primary_language`` attribute of ``Engineer``:: - - class Manager(Person): - __mapper_args__ = {'polymorphic_identity': 'manager'} - golf_swing = Column(String(50)) - -The attribute exclusion logic is provided by the -``exclude_properties`` mapper argument, and declarative's default -behavior can be disabled by passing an explicit ``exclude_properties`` -collection (empty or otherwise) to the ``__mapper_args__``. - -Resolving Column Conflicts -^^^^^^^^^^^^^^^^^^^^^^^^^^ - -Note above that the ``primary_language`` and ``golf_swing`` columns -are "moved up" to be applied to ``Person.__table__``, as a result of their -declaration on a subclass that has no table of its own. A tricky case -comes up when two subclasses want to specify *the same* column, as below:: - - class Person(Base): - __tablename__ = 'people' - id = Column(Integer, primary_key=True) - discriminator = Column('type', String(50)) - __mapper_args__ = {'polymorphic_on': discriminator} - - class Engineer(Person): - __mapper_args__ = {'polymorphic_identity': 'engineer'} - start_date = Column(DateTime) - - class Manager(Person): - __mapper_args__ = {'polymorphic_identity': 'manager'} - start_date = Column(DateTime) - -Above, the ``start_date`` column declared on both ``Engineer`` and ``Manager`` -will result in an error:: - - sqlalchemy.exc.ArgumentError: Column 'start_date' on class - conflicts with existing - column 'people.start_date' - -In a situation like this, Declarative can't be sure -of the intent, especially if the ``start_date`` columns had, for example, -different types. A situation like this can be resolved by using -:class:`.declared_attr` to define the :class:`.Column` conditionally, taking -care to return the **existing column** via the parent ``__table__`` if it -already exists:: - - from sqlalchemy.ext.declarative import declared_attr - - class Person(Base): - __tablename__ = 'people' - id = Column(Integer, primary_key=True) - discriminator = Column('type', String(50)) - __mapper_args__ = {'polymorphic_on': discriminator} - - class Engineer(Person): - __mapper_args__ = {'polymorphic_identity': 'engineer'} - - @declared_attr - def start_date(cls): - "Start date column, if not present already." - return Person.__table__.c.get('start_date', Column(DateTime)) - - class Manager(Person): - __mapper_args__ = {'polymorphic_identity': 'manager'} - - @declared_attr - def start_date(cls): - "Start date column, if not present already." - return Person.__table__.c.get('start_date', Column(DateTime)) - -Above, when ``Manager`` is mapped, the ``start_date`` column is -already present on the ``Person`` class. Declarative lets us return -that :class:`.Column` as a result in this case, where it knows to skip -re-assigning the same column. If the mapping is mis-configured such -that the ``start_date`` column is accidentally re-assigned to a -different table (such as, if we changed ``Manager`` to be joined -inheritance without fixing ``start_date``), an error is raised which -indicates an existing :class:`.Column` is trying to be re-assigned to -a different owning :class:`.Table`. - -.. versionadded:: 0.8 :class:`.declared_attr` can be used on a non-mixin - class, and the returned :class:`.Column` or other mapped attribute - will be applied to the mapping as any other attribute. Previously, - the resulting attribute would be ignored, and also result in a warning - being emitted when a subclass was created. - -.. versionadded:: 0.8 :class:`.declared_attr`, when used either with a - mixin or non-mixin declarative class, can return an existing - :class:`.Column` already assigned to the parent :class:`.Table`, - to indicate that the re-assignment of the :class:`.Column` should be - skipped, however should still be mapped on the target class, - in order to resolve duplicate column conflicts. - -The same concept can be used with mixin classes (see -:ref:`declarative_mixins`):: - - class Person(Base): - __tablename__ = 'people' - id = Column(Integer, primary_key=True) - discriminator = Column('type', String(50)) - __mapper_args__ = {'polymorphic_on': discriminator} - - class HasStartDate(object): - @declared_attr - def start_date(cls): - return cls.__table__.c.get('start_date', Column(DateTime)) - - class Engineer(HasStartDate, Person): - __mapper_args__ = {'polymorphic_identity': 'engineer'} - - class Manager(HasStartDate, Person): - __mapper_args__ = {'polymorphic_identity': 'manager'} - -The above mixin checks the local ``__table__`` attribute for the column. -Because we're using single table inheritance, we're sure that in this case, -``cls.__table__`` refers to ``People.__table__``. If we were mixing joined- -and single-table inheritance, we might want our mixin to check more carefully -if ``cls.__table__`` is really the :class:`.Table` we're looking for. - -Concrete Table Inheritance -~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Concrete is defined as a subclass which has its own table and sets the -``concrete`` keyword argument to ``True``:: - - class Person(Base): - __tablename__ = 'people' - id = Column(Integer, primary_key=True) - name = Column(String(50)) - - class Engineer(Person): - __tablename__ = 'engineers' - __mapper_args__ = {'concrete':True} - id = Column(Integer, primary_key=True) - primary_language = Column(String(50)) - name = Column(String(50)) - -Usage of an abstract base class is a little less straightforward as it -requires usage of :func:`~sqlalchemy.orm.util.polymorphic_union`, -which needs to be created with the :class:`.Table` objects -before the class is built:: - - engineers = Table('engineers', Base.metadata, - Column('id', Integer, primary_key=True), - Column('name', String(50)), - Column('primary_language', String(50)) - ) - managers = Table('managers', Base.metadata, - Column('id', Integer, primary_key=True), - Column('name', String(50)), - Column('golf_swing', String(50)) - ) - - punion = polymorphic_union({ - 'engineer':engineers, - 'manager':managers - }, 'type', 'punion') - - class Person(Base): - __table__ = punion - __mapper_args__ = {'polymorphic_on':punion.c.type} - - class Engineer(Person): - __table__ = engineers - __mapper_args__ = {'polymorphic_identity':'engineer', 'concrete':True} - - class Manager(Person): - __table__ = managers - __mapper_args__ = {'polymorphic_identity':'manager', 'concrete':True} - -.. _declarative_concrete_helpers: - -Using the Concrete Helpers -^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -Helper classes provides a simpler pattern for concrete inheritance. -With these objects, the ``__declare_first__`` helper is used to configure the -"polymorphic" loader for the mapper after all subclasses have been declared. - -.. versionadded:: 0.7.3 - -An abstract base can be declared using the -:class:`.AbstractConcreteBase` class:: - - from sqlalchemy.ext.declarative import AbstractConcreteBase - - class Employee(AbstractConcreteBase, Base): - pass - -To have a concrete ``employee`` table, use :class:`.ConcreteBase` instead:: - - from sqlalchemy.ext.declarative import ConcreteBase - - class Employee(ConcreteBase, Base): - __tablename__ = 'employee' - employee_id = Column(Integer, primary_key=True) - name = Column(String(50)) - __mapper_args__ = { - 'polymorphic_identity':'employee', - 'concrete':True} - - -Either ``Employee`` base can be used in the normal fashion:: - - class Manager(Employee): - __tablename__ = 'manager' - employee_id = Column(Integer, primary_key=True) - name = Column(String(50)) - manager_data = Column(String(40)) - __mapper_args__ = { - 'polymorphic_identity':'manager', - 'concrete':True} - - class Engineer(Employee): - __tablename__ = 'engineer' - employee_id = Column(Integer, primary_key=True) - name = Column(String(50)) - engineer_info = Column(String(40)) - __mapper_args__ = {'polymorphic_identity':'engineer', - 'concrete':True} - - -The :class:`.AbstractConcreteBase` class is itself mapped, and can be -used as a target of relationships:: - - class Company(Base): - __tablename__ = 'company' - - id = Column(Integer, primary_key=True) - employees = relationship("Employee", - primaryjoin="Company.id == Employee.company_id") - - -.. versionchanged:: 0.9.3 Support for use of :class:`.AbstractConcreteBase` - as the target of a :func:`.relationship` has been improved. - -It can also be queried directly:: - - for employee in session.query(Employee).filter(Employee.name == 'qbert'): - print(employee) - - -.. _declarative_mixins: - -Mixin and Custom Base Classes -============================== - -A common need when using :mod:`~sqlalchemy.ext.declarative` is to -share some functionality, such as a set of common columns, some common -table options, or other mapped properties, across many -classes. The standard Python idioms for this is to have the classes -inherit from a base which includes these common features. - -When using :mod:`~sqlalchemy.ext.declarative`, this idiom is allowed -via the usage of a custom declarative base class, as well as a "mixin" class -which is inherited from in addition to the primary base. Declarative -includes several helper features to make this work in terms of how -mappings are declared. An example of some commonly mixed-in -idioms is below:: - - from sqlalchemy.ext.declarative import declared_attr - - class MyMixin(object): - - @declared_attr - def __tablename__(cls): - return cls.__name__.lower() - - __table_args__ = {'mysql_engine': 'InnoDB'} - __mapper_args__= {'always_refresh': True} - - id = Column(Integer, primary_key=True) - - class MyModel(MyMixin, Base): - name = Column(String(1000)) - -Where above, the class ``MyModel`` will contain an "id" column -as the primary key, a ``__tablename__`` attribute that derives -from the name of the class itself, as well as ``__table_args__`` -and ``__mapper_args__`` defined by the ``MyMixin`` mixin class. - -There's no fixed convention over whether ``MyMixin`` precedes -``Base`` or not. Normal Python method resolution rules apply, and -the above example would work just as well with:: - - class MyModel(Base, MyMixin): - name = Column(String(1000)) - -This works because ``Base`` here doesn't define any of the -variables that ``MyMixin`` defines, i.e. ``__tablename__``, -``__table_args__``, ``id``, etc. If the ``Base`` did define -an attribute of the same name, the class placed first in the -inherits list would determine which attribute is used on the -newly defined class. - -Augmenting the Base -~~~~~~~~~~~~~~~~~~~ - -In addition to using a pure mixin, most of the techniques in this -section can also be applied to the base class itself, for patterns that -should apply to all classes derived from a particular base. This is achieved -using the ``cls`` argument of the :func:`.declarative_base` function:: - - from sqlalchemy.ext.declarative import declared_attr - - class Base(object): - @declared_attr - def __tablename__(cls): - return cls.__name__.lower() - - __table_args__ = {'mysql_engine': 'InnoDB'} - - id = Column(Integer, primary_key=True) - - from sqlalchemy.ext.declarative import declarative_base - - Base = declarative_base(cls=Base) - - class MyModel(Base): - name = Column(String(1000)) - -Where above, ``MyModel`` and all other classes that derive from ``Base`` will -have a table name derived from the class name, an ``id`` primary key column, -as well as the "InnoDB" engine for MySQL. - -Mixing in Columns -~~~~~~~~~~~~~~~~~ - -The most basic way to specify a column on a mixin is by simple -declaration:: - - class TimestampMixin(object): - created_at = Column(DateTime, default=func.now()) - - class MyModel(TimestampMixin, Base): - __tablename__ = 'test' - - id = Column(Integer, primary_key=True) - name = Column(String(1000)) - -Where above, all declarative classes that include ``TimestampMixin`` -will also have a column ``created_at`` that applies a timestamp to -all row insertions. - -Those familiar with the SQLAlchemy expression language know that -the object identity of clause elements defines their role in a schema. -Two ``Table`` objects ``a`` and ``b`` may both have a column called -``id``, but the way these are differentiated is that ``a.c.id`` -and ``b.c.id`` are two distinct Python objects, referencing their -parent tables ``a`` and ``b`` respectively. - -In the case of the mixin column, it seems that only one -:class:`.Column` object is explicitly created, yet the ultimate -``created_at`` column above must exist as a distinct Python object -for each separate destination class. To accomplish this, the declarative -extension creates a **copy** of each :class:`.Column` object encountered on -a class that is detected as a mixin. - -This copy mechanism is limited to simple columns that have no foreign -keys, as a :class:`.ForeignKey` itself contains references to columns -which can't be properly recreated at this level. For columns that -have foreign keys, as well as for the variety of mapper-level constructs -that require destination-explicit context, the -:class:`~.declared_attr` decorator is provided so that -patterns common to many classes can be defined as callables:: - - from sqlalchemy.ext.declarative import declared_attr - - class ReferenceAddressMixin(object): - @declared_attr - def address_id(cls): - return Column(Integer, ForeignKey('address.id')) - - class User(ReferenceAddressMixin, Base): - __tablename__ = 'user' - id = Column(Integer, primary_key=True) - -Where above, the ``address_id`` class-level callable is executed at the -point at which the ``User`` class is constructed, and the declarative -extension can use the resulting :class:`.Column` object as returned by -the method without the need to copy it. - -.. versionchanged:: > 0.6.5 - Rename 0.6.5 ``sqlalchemy.util.classproperty`` - into :class:`~.declared_attr`. - -Columns generated by :class:`~.declared_attr` can also be -referenced by ``__mapper_args__`` to a limited degree, currently -by ``polymorphic_on`` and ``version_id_col``, by specifying the -classdecorator itself into the dictionary - the declarative extension -will resolve them at class construction time:: - - class MyMixin: - @declared_attr - def type_(cls): - return Column(String(50)) - - __mapper_args__= {'polymorphic_on':type_} - - class MyModel(MyMixin, Base): - __tablename__='test' - id = Column(Integer, primary_key=True) - - - -Mixing in Relationships -~~~~~~~~~~~~~~~~~~~~~~~ - -Relationships created by :func:`~sqlalchemy.orm.relationship` are provided -with declarative mixin classes exclusively using the -:class:`.declared_attr` approach, eliminating any ambiguity -which could arise when copying a relationship and its possibly column-bound -contents. Below is an example which combines a foreign key column and a -relationship so that two classes ``Foo`` and ``Bar`` can both be configured to -reference a common target class via many-to-one:: - - class RefTargetMixin(object): - @declared_attr - def target_id(cls): - return Column('target_id', ForeignKey('target.id')) - - @declared_attr - def target(cls): - return relationship("Target") - - class Foo(RefTargetMixin, Base): - __tablename__ = 'foo' - id = Column(Integer, primary_key=True) - - class Bar(RefTargetMixin, Base): - __tablename__ = 'bar' - id = Column(Integer, primary_key=True) - - class Target(Base): - __tablename__ = 'target' - id = Column(Integer, primary_key=True) - -Using Advanced Relationship Arguments (e.g. ``primaryjoin``, etc.) -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -:func:`~sqlalchemy.orm.relationship` definitions which require explicit -primaryjoin, order_by etc. expressions should in all but the most -simplistic cases use **late bound** forms -for these arguments, meaning, using either the string form or a lambda. -The reason for this is that the related :class:`.Column` objects which are to -be configured using ``@declared_attr`` are not available to another -``@declared_attr`` attribute; while the methods will work and return new -:class:`.Column` objects, those are not the :class:`.Column` objects that -Declarative will be using as it calls the methods on its own, thus using -*different* :class:`.Column` objects. - -The canonical example is the primaryjoin condition that depends upon -another mixed-in column:: - - class RefTargetMixin(object): - @declared_attr - def target_id(cls): - return Column('target_id', ForeignKey('target.id')) - - @declared_attr - def target(cls): - return relationship(Target, - primaryjoin=Target.id==cls.target_id # this is *incorrect* - ) - -Mapping a class using the above mixin, we will get an error like:: - - sqlalchemy.exc.InvalidRequestError: this ForeignKey's parent column is not - yet associated with a Table. - -This is because the ``target_id`` :class:`.Column` we've called upon in our -``target()`` method is not the same :class:`.Column` that declarative is -actually going to map to our table. - -The condition above is resolved using a lambda:: - - class RefTargetMixin(object): - @declared_attr - def target_id(cls): - return Column('target_id', ForeignKey('target.id')) - - @declared_attr - def target(cls): - return relationship(Target, - primaryjoin=lambda: Target.id==cls.target_id - ) - -or alternatively, the string form (which ultimately generates a lambda):: - - class RefTargetMixin(object): - @declared_attr - def target_id(cls): - return Column('target_id', ForeignKey('target.id')) - - @declared_attr - def target(cls): - return relationship("Target", - primaryjoin="Target.id==%s.target_id" % cls.__name__ - ) - -Mixing in deferred(), column_property(), and other MapperProperty classes -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Like :func:`~sqlalchemy.orm.relationship`, all -:class:`~sqlalchemy.orm.interfaces.MapperProperty` subclasses such as -:func:`~sqlalchemy.orm.deferred`, :func:`~sqlalchemy.orm.column_property`, -etc. ultimately involve references to columns, and therefore, when -used with declarative mixins, have the :class:`.declared_attr` -requirement so that no reliance on copying is needed:: - - class SomethingMixin(object): - - @declared_attr - def dprop(cls): - return deferred(Column(Integer)) - - class Something(SomethingMixin, Base): - __tablename__ = "something" - -Mixing in Association Proxy and Other Attributes -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Mixins can specify user-defined attributes as well as other extension -units such as :func:`.association_proxy`. The usage of -:class:`.declared_attr` is required in those cases where the attribute must -be tailored specifically to the target subclass. An example is when -constructing multiple :func:`.association_proxy` attributes which each -target a different type of child object. Below is an -:func:`.association_proxy` / mixin example which provides a scalar list of -string values to an implementing class:: - - from sqlalchemy import Column, Integer, ForeignKey, String - from sqlalchemy.orm import relationship - from sqlalchemy.ext.associationproxy import association_proxy - from sqlalchemy.ext.declarative import declarative_base, declared_attr - - Base = declarative_base() - - class HasStringCollection(object): - @declared_attr - def _strings(cls): - class StringAttribute(Base): - __tablename__ = cls.string_table_name - id = Column(Integer, primary_key=True) - value = Column(String(50), nullable=False) - parent_id = Column(Integer, - ForeignKey('%s.id' % cls.__tablename__), - nullable=False) - def __init__(self, value): - self.value = value - - return relationship(StringAttribute) - - @declared_attr - def strings(cls): - return association_proxy('_strings', 'value') - - class TypeA(HasStringCollection, Base): - __tablename__ = 'type_a' - string_table_name = 'type_a_strings' - id = Column(Integer(), primary_key=True) - - class TypeB(HasStringCollection, Base): - __tablename__ = 'type_b' - string_table_name = 'type_b_strings' - id = Column(Integer(), primary_key=True) - -Above, the ``HasStringCollection`` mixin produces a :func:`.relationship` -which refers to a newly generated class called ``StringAttribute``. The -``StringAttribute`` class is generated with its own :class:`.Table` -definition which is local to the parent class making usage of the -``HasStringCollection`` mixin. It also produces an :func:`.association_proxy` -object which proxies references to the ``strings`` attribute onto the ``value`` -attribute of each ``StringAttribute`` instance. - -``TypeA`` or ``TypeB`` can be instantiated given the constructor -argument ``strings``, a list of strings:: - - ta = TypeA(strings=['foo', 'bar']) - tb = TypeA(strings=['bat', 'bar']) - -This list will generate a collection -of ``StringAttribute`` objects, which are persisted into a table that's -local to either the ``type_a_strings`` or ``type_b_strings`` table:: - - >>> print ta._strings - [<__main__.StringAttribute object at 0x10151cd90>, - <__main__.StringAttribute object at 0x10151ce10>] - -When constructing the :func:`.association_proxy`, the -:class:`.declared_attr` decorator must be used so that a distinct -:func:`.association_proxy` object is created for each of the ``TypeA`` -and ``TypeB`` classes. - -.. versionadded:: 0.8 :class:`.declared_attr` is usable with non-mapped - attributes, including user-defined attributes as well as - :func:`.association_proxy`. - - -Controlling table inheritance with mixins -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -The ``__tablename__`` attribute in conjunction with the hierarchy of -classes involved in a declarative mixin scenario controls what type of -table inheritance, if any, -is configured by the declarative extension. - -If the ``__tablename__`` is computed by a mixin, you may need to -control which classes get the computed attribute in order to get the -type of table inheritance you require. - -For example, if you had a mixin that computes ``__tablename__`` but -where you wanted to use that mixin in a single table inheritance -hierarchy, you can explicitly specify ``__tablename__`` as ``None`` to -indicate that the class should not have a table mapped:: - - from sqlalchemy.ext.declarative import declared_attr - - class Tablename: - @declared_attr - def __tablename__(cls): - return cls.__name__.lower() - - class Person(Tablename, Base): - id = Column(Integer, primary_key=True) - discriminator = Column('type', String(50)) - __mapper_args__ = {'polymorphic_on': discriminator} - - class Engineer(Person): - __tablename__ = None - __mapper_args__ = {'polymorphic_identity': 'engineer'} - primary_language = Column(String(50)) - -Alternatively, you can make the mixin intelligent enough to only -return a ``__tablename__`` in the event that no table is already -mapped in the inheritance hierarchy. To help with this, a -:func:`~sqlalchemy.ext.declarative.has_inherited_table` helper -function is provided that returns ``True`` if a parent class already -has a mapped table. - -As an example, here's a mixin that will only allow single table -inheritance:: - - from sqlalchemy.ext.declarative import declared_attr - from sqlalchemy.ext.declarative import has_inherited_table - - class Tablename(object): - @declared_attr - def __tablename__(cls): - if has_inherited_table(cls): - return None - return cls.__name__.lower() - - class Person(Tablename, Base): - id = Column(Integer, primary_key=True) - discriminator = Column('type', String(50)) - __mapper_args__ = {'polymorphic_on': discriminator} - - class Engineer(Person): - primary_language = Column(String(50)) - __mapper_args__ = {'polymorphic_identity': 'engineer'} - - -Combining Table/Mapper Arguments from Multiple Mixins -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -In the case of ``__table_args__`` or ``__mapper_args__`` -specified with declarative mixins, you may want to combine -some parameters from several mixins with those you wish to -define on the class iteself. The -:class:`.declared_attr` decorator can be used -here to create user-defined collation routines that pull -from multiple collections:: - - from sqlalchemy.ext.declarative import declared_attr - - class MySQLSettings(object): - __table_args__ = {'mysql_engine':'InnoDB'} - - class MyOtherMixin(object): - __table_args__ = {'info':'foo'} - - class MyModel(MySQLSettings, MyOtherMixin, Base): - __tablename__='my_model' - - @declared_attr - def __table_args__(cls): - args = dict() - args.update(MySQLSettings.__table_args__) - args.update(MyOtherMixin.__table_args__) - return args - - id = Column(Integer, primary_key=True) - -Creating Indexes with Mixins -~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -To define a named, potentially multicolumn :class:`.Index` that applies to all -tables derived from a mixin, use the "inline" form of :class:`.Index` and -establish it as part of ``__table_args__``:: - - class MyMixin(object): - a = Column(Integer) - b = Column(Integer) - - @declared_attr - def __table_args__(cls): - return (Index('test_idx_%s' % cls.__tablename__, 'a', 'b'),) - - class MyModel(MyMixin, Base): - __tablename__ = 'atable' - c = Column(Integer,primary_key=True) - -Special Directives -================== - -``__declare_last__()`` -~~~~~~~~~~~~~~~~~~~~~~ - -The ``__declare_last__()`` hook allows definition of -a class level function that is automatically called by the -:meth:`.MapperEvents.after_configured` event, which occurs after mappings are -assumed to be completed and the 'configure' step has finished:: - - class MyClass(Base): - @classmethod - def __declare_last__(cls): - "" - # do something with mappings - -.. versionadded:: 0.7.3 - -``__declare_first__()`` -~~~~~~~~~~~~~~~~~~~~~~~ - -Like ``__declare_last__()``, but is called at the beginning of mapper -configuration via the :meth:`.MapperEvents.before_configured` event:: - - class MyClass(Base): - @classmethod - def __declare_first__(cls): - "" - # do something before mappings are configured - -.. versionadded:: 0.9.3 - -.. _declarative_abstract: - -``__abstract__`` -~~~~~~~~~~~~~~~~~~~ - -``__abstract__`` causes declarative to skip the production -of a table or mapper for the class entirely. A class can be added within a -hierarchy in the same way as mixin (see :ref:`declarative_mixins`), allowing -subclasses to extend just from the special class:: - - class SomeAbstractBase(Base): - __abstract__ = True - - def some_helpful_method(self): - "" - - @declared_attr - def __mapper_args__(cls): - return {"helpful mapper arguments":True} - - class MyMappedClass(SomeAbstractBase): - "" - -One possible use of ``__abstract__`` is to use a distinct -:class:`.MetaData` for different bases:: - - Base = declarative_base() - - class DefaultBase(Base): - __abstract__ = True - metadata = MetaData() - - class OtherBase(Base): - __abstract__ = True - metadata = MetaData() - -Above, classes which inherit from ``DefaultBase`` will use one -:class:`.MetaData` as the registry of tables, and those which inherit from -``OtherBase`` will use a different one. The tables themselves can then be -created perhaps within distinct databases:: - - DefaultBase.metadata.create_all(some_engine) - OtherBase.metadata_create_all(some_other_engine) - -.. versionadded:: 0.7.3 - -Class Constructor -================= - -As a convenience feature, the :func:`declarative_base` sets a default -constructor on classes which takes keyword arguments, and assigns them -to the named attributes:: - - e = Engineer(primary_language='python') - -Sessions -======== - -Note that ``declarative`` does nothing special with sessions, and is -only intended as an easier way to configure mappers and -:class:`~sqlalchemy.schema.Table` objects. A typical application -setup using :class:`~sqlalchemy.orm.scoping.scoped_session` might look like:: - - engine = create_engine('postgresql://scott:tiger@localhost/test') - Session = scoped_session(sessionmaker(autocommit=False, - autoflush=False, - bind=engine)) - Base = declarative_base() - -Mapped instances then make usage of -:class:`~sqlalchemy.orm.session.Session` in the usual way. - -""" - from .api import declarative_base, synonym_for, comparable_using, \ instrument_declarative, ConcreteBase, AbstractConcreteBase, \ DeclarativeMeta, DeferredReflection, has_inherited_table,\ - declared_attr, as_declarative + declared_attr __all__ = ['declarative_base', 'synonym_for', 'has_inherited_table',