"""
-Illustrates three strategies for persisting and querying XML documents as represented by
-ElementTree in a relational database. The techniques do not apply any mappings to the ElementTree objects directly, so are compatible with the native cElementTree as well as lxml, and can be adapted to suit any kind of DOM representation system. Querying along xpath-like strings is illustrated as well.
+Illustrates three strategies for persisting and querying XML
+documents as represented by ElementTree in a relational
+database. The techniques do not apply any mappings to the
+ElementTree objects directly, so are compatible with the
+native cElementTree as well as lxml, and can be adapted to
+suit any kind of DOM representation system. Querying along
+xpath-like strings is illustrated as well.
In order of complexity:
represented in a separate table. The nodes are associated in a hierarchy using an adjacency list
structure. A query function is introduced which can search for nodes along any path with a given
structure of attributes, basically a (very narrow) subset of xpath.
-* ``optimized_al.py`` - Uses the same strategy as ``adjacency_list.py``, but adds a
- :class:`~sqlalchemy.orm.interfaces.MapperExtension` which optimizes how the hierarchical structure
- is loaded, such that the full set of DOM nodes are loaded within a single table result set, and
- are organized hierarchically as they are received during a load.
+* ``optimized_al.py`` - Uses the same strategy as ``adjacency_list.py``, but associates each
+ DOM row with its owning document row, so that a full document of DOM nodes can be
+ loaded using O(1) queries - the construction of the "hierarchy" is performed after
+ the load in a non-recursive fashion and is much more efficient.
E.g.::
################################# PART I - Imports/Coniguration ####################################
from sqlalchemy import (MetaData, Table, Column, Integer, String, ForeignKey,
- Unicode, and_)
-from sqlalchemy.orm import mapper, relationship, create_session, lazyload
+ Unicode, and_, create_engine)
+from sqlalchemy.orm import mapper, relationship, Session, lazyload
import sys, os, StringIO, re
from xml.etree import ElementTree
+e = create_engine('sqlite://')
meta = MetaData()
-meta.bind = 'sqlite://'
################################# PART II - Table Metadata #########################################
Column('name', Unicode(100), nullable=False, primary_key=True),
Column('value', Unicode(255)))
-meta.create_all()
+meta.create_all(e)
#################################### PART III - Model #############################################
line = "\n--------------------------------------------------------"
# save to DB
-session = create_session()
+session = Session(e)
# get ElementTree documents
for file in ('test.xml', 'test2.xml', 'test3.xml'):
session.add(Document(file, doc))
print "\nSaving three documents...", line
-session.flush()
+session.commit()
print "Done."
-# clear session (to illustrate a full load), restore
-session.expunge_all()
-
print "\nFull text of document 'text.xml':", line
document = session.query(Document).filter_by(filename="test.xml").first()
"""
-################################# PART I - Imports/Configuration ###########################################
+##################### PART I - Imports/Configuration #########################
from sqlalchemy import (MetaData, Table, Column, Integer, String, ForeignKey,
- Unicode, and_)
-from sqlalchemy.orm import mapper, relationship, create_session, lazyload
+ Unicode, and_, create_engine)
+from sqlalchemy.orm import mapper, relationship, Session, lazyload
import sys, os, StringIO, re
from xml.etree import ElementTree
+e = create_engine('sqlite://', echo=True)
meta = MetaData()
-meta.bind = 'sqlite://'
-################################# PART II - Table Metadata ###########################################
+####################### PART II - Table Metadata #############################
# stores a top level record of an XML document.
documents = Table('documents', meta,
Column('name', Unicode(100), nullable=False, primary_key=True),
Column('value', Unicode(255)))
-meta.create_all()
+meta.create_all(e)
-#################################### PART III - Model #############################################
+########################### PART III - Model #################################
# our document class. contains a string name,
# and the ElementTree root element.
self.element.write(buf)
return buf.getvalue()
-#################################### PART IV - Persistence Mapping ###################################
+########################## PART IV - Persistence Mapping #####################
# Node class. a non-public class which will represent
# the DB-persisted Element/SubElement object. We cannot create mappers for
# override Document's "element" attribute with the marshaller.
Document.element = ElementTreeMarshal()
-########################################### PART V - Basic Persistence Example ############################
+###################### PART V - Basic Persistence Example ####################
line = "\n--------------------------------------------------------"
# save to DB
-session = create_session()
+session = Session(e)
# get ElementTree documents
for file in ('test.xml', 'test2.xml', 'test3.xml'):
session.add(Document(file, doc))
print "\nSaving three documents...", line
-session.flush()
+session.commit()
print "Done."
-# clear session (to illustrate a full load), restore
-session.expunge_all()
-
print "\nFull text of document 'text.xml':", line
document = session.query(Document).filter_by(filename="test.xml").first()
print document
-############################################ PART VI - Searching for Paths #######################################
+######################## PART VI - Searching for Paths #######################
# manually search for a document which contains "/somefile/header/field1:hi"
print "\nManual search for /somefile/header/field1=='hi':", line
-d = session.query(Document).join('_nodes', aliased=True).filter(and_(_Node.parent_id==None, _Node.tag==u'somefile')).\
- join('children', aliased=True, from_joinpoint=True).filter(_Node.tag==u'header').\
- join('children', aliased=True, from_joinpoint=True).filter(and_(_Node.tag==u'field1', _Node.text==u'hi')).\
- one()
+d = session.query(Document).join('_nodes', aliased=True).\
+ filter(and_(_Node.parent_id==None, _Node.tag==u'somefile')).\
+ join('children', aliased=True, from_joinpoint=True).\
+ filter(_Node.tag==u'header').\
+ join('children', aliased=True, from_joinpoint=True).\
+ filter(and_(_Node.tag==u'field1', _Node.text==u'hi')).\
+ one()
print d
# generalize the above approach into an extremely impoverished xpath function:
from sqlalchemy import (create_engine, MetaData, Table, Column, Integer, String,
PickleType)
-from sqlalchemy.orm import mapper, create_session
+from sqlalchemy.orm import mapper, Session
import sys, os
from xml.etree import ElementTree
-engine = create_engine('sqlite://')
-meta = MetaData(engine)
+e = create_engine('sqlite://')
+meta = MetaData()
# setup a comparator for the PickleType since it's a mutable
# element.
Column('element', PickleType(comparator=are_elements_equal))
)
-meta.create_all()
+meta.create_all(e)
# our document class. contains a string name,
# and the ElementTree root element.
doc = ElementTree.parse(filename)
# save to DB
-session = create_session()
+session = Session(e)
session.add(Document("test.xml", doc))
-session.flush()
+session.commit()
-# clear session (to illustrate a full load), restore
-session.expunge_all()
+# restore
document = session.query(Document).filter_by(filename="test.xml").first()
# print