From fa7c8f88113d2e769274dee4aa4247b9c9aadec8 Mon Sep 17 00:00:00 2001 From: Mike Bayer Date: Sat, 6 Sep 2014 13:01:21 -0400 Subject: [PATCH] - try to finish up the performance example for now --- doc/build/orm/examples.rst | 7 + examples/performance/__init__.py | 299 ++++++++++++++++++++--- examples/performance/__main__.py | 5 + examples/performance/bulk_inserts.py | 6 +- examples/performance/large_resultsets.py | 6 +- examples/performance/single_inserts.py | 12 +- 6 files changed, 291 insertions(+), 44 deletions(-) create mode 100644 examples/performance/__main__.py diff --git a/doc/build/orm/examples.rst b/doc/build/orm/examples.rst index b820dba9f0..93478381a4 100644 --- a/doc/build/orm/examples.rst +++ b/doc/build/orm/examples.rst @@ -62,6 +62,13 @@ Nested Sets .. automodule:: examples.nested_sets +.. _examples_performance: + +Performance +----------- + +.. automodule:: examples.performance + .. _examples_relationships: Relationship Join Conditions diff --git a/examples/performance/__init__.py b/examples/performance/__init__.py index b57f25b947..6e2e1fc898 100644 --- a/examples/performance/__init__.py +++ b/examples/performance/__init__.py @@ -1,55 +1,232 @@ """A performance profiling suite for a variety of SQLAlchemy use cases. -The suites here each focus on some specific type of use case, one which -has a particular performance profile: +Each suite focuses on a specific use case with a particular performance +profile and associated implications: * bulk inserts * individual inserts, with or without transactions * fetching large numbers of rows -* running lots of small queries +* running lots of small queries (TODO) -All suites include a variety of use patterns with both the Core and -ORM, and are sorted in order of performance from worst to greatest, -inversely based on amount of functionality provided by SQLAlchemy, +All suites include a variety of use patterns illustrating both Core +and ORM use, and are generally sorted in order of performance from worst +to greatest, inversely based on amount of functionality provided by SQLAlchemy, greatest to least (these two things generally correspond perfectly). -Each suite is run as a module, and provides a consistent command line -interface:: +A command line tool is presented at the package level which allows +individual suites to be run:: - $ python -m examples.performance.bulk_inserts --profile --num 1000 + $ python -m examples.performance --help + usage: python -m examples.performance [-h] [--test TEST] [--dburl DBURL] + [--num NUM] [--profile] [--dump] + [--runsnake] [--echo] -Using ``--help`` will allow all options:: + {bulk_inserts,large_resultsets,single_inserts} - $ python -m examples.performance.bulk_inserts --help -usage: bulk_inserts.py [-h] [--test TEST] [--dburl DBURL] [--num NUM] - [--profile] [--dump] [--runsnake] [--echo] + positional arguments: + {bulk_inserts,large_resultsets,single_inserts} + suite to run -optional arguments: - -h, --help show this help message and exit - --test TEST run specific test name - --dburl DBURL database URL, default sqlite:///profile.db - --num NUM Number of iterations/items/etc for tests, default 100000 - --profile run profiling and dump call counts - --dump dump full call profile (implies --profile) - --runsnake invoke runsnakerun (implies --profile) - --echo Echo SQL output + optional arguments: + -h, --help show this help message and exit + --test TEST run specific test name + --dburl DBURL database URL, default sqlite:///profile.db + --num NUM Number of iterations/items/etc for tests; default is 0 + module-specific + --profile run profiling and dump call counts + --dump dump full call profile (implies --profile) + --runsnake invoke runsnakerun (implies --profile) + --echo Echo SQL output +An example run looks like:: + + $ python -m examples.performance bulk_inserts + +Or with options:: + + $ python -m examples.performance bulk_inserts \\ + --dburl mysql+mysqldb://scott:tiger@localhost/test \\ + --profile --num 1000 + + +Running all tests with time +--------------------------- + +This is the default form of run:: + + $ python -m examples.performance single_inserts + Tests to run: test_orm_commit, test_bulk_save, + test_bulk_insert_dictionaries, test_core, + test_core_query_caching, test_dbapi_raw_w_connect, + test_dbapi_raw_w_pool + + test_orm_commit : Individual INSERT/COMMIT pairs via the + ORM (10000 iterations); total time 13.690218 sec + test_bulk_save : Individual INSERT/COMMIT pairs using + the "bulk" API (10000 iterations); total time 11.290371 sec + test_bulk_insert_dictionaries : Individual INSERT/COMMIT pairs using + the "bulk" API with dictionaries (10000 iterations); + total time 10.814626 sec + test_core : Individual INSERT/COMMIT pairs using Core. + (10000 iterations); total time 9.665620 sec + test_core_query_caching : Individual INSERT/COMMIT pairs using Core + with query caching (10000 iterations); total time 9.209010 sec + test_dbapi_raw_w_connect : Individual INSERT/COMMIT pairs w/ DBAPI + + connection each time (10000 iterations); total time 9.551103 sec + test_dbapi_raw_w_pool : Individual INSERT/COMMIT pairs w/ DBAPI + + connection pool (10000 iterations); total time 8.001813 sec + +Dumping Profiles for Individual Tests +-------------------------------------- + +A Python profile output can be dumped for all tests, or more commonly +individual tests:: + + $ python -m examples.performance single_inserts --test test_core --num 1000 --dump + Tests to run: test_core + test_core : Individual INSERT/COMMIT pairs using Core. (1000 iterations); total fn calls 186109 + 186109 function calls (186102 primitive calls) in 1.089 seconds + + Ordered by: internal time, call count + + ncalls tottime percall cumtime percall filename:lineno(function) + 1000 0.634 0.001 0.634 0.001 {method 'commit' of 'sqlite3.Connection' objects} + 1000 0.154 0.000 0.154 0.000 {method 'execute' of 'sqlite3.Cursor' objects} + 1000 0.021 0.000 0.074 0.000 /Users/classic/dev/sqlalchemy/lib/sqlalchemy/sql/compiler.py:1950(_get_colparams) + 1000 0.015 0.000 0.034 0.000 /Users/classic/dev/sqlalchemy/lib/sqlalchemy/engine/default.py:503(_init_compiled) + 1 0.012 0.012 1.091 1.091 examples/performance/single_inserts.py:79(test_core) + + ... + +Using RunSnake +-------------- + +This option requires the `RunSnake `_ +command line tool be installed:: + + $ python -m examples.performance single_inserts --test test_core --num 1000 --runsnake + +A graphical RunSnake output will be displayed. + +.. _examples_profiling_writeyourown: + +Writing your Own Suites +----------------------- + +The profiler suite system is extensible, and can be applied to your own set +of tests. This is a valuable technique to use in deciding upon the proper +approach for some performance-critical set of routines. For example, +if we wanted to profile the difference between several kinds of loading, +we can create a file ``test_loads.py``, with the following content:: + + from examples.performance import Profiler + from sqlalchemy import Integer, Column, create_engine, ForeignKey + from sqlalchemy.orm import relationship, joinedload, subqueryload, Session + from sqlalchemy.ext.declarative import declarative_base + + Base = declarative_base() + engine = None + session = None + + + class Parent(Base): + __tablename__ = 'parent' + id = Column(Integer, primary_key=True) + children = relationship("Child") + + + class Child(Base): + __tablename__ = 'child' + id = Column(Integer, primary_key=True) + parent_id = Column(Integer, ForeignKey('parent.id')) + + + # Init with name of file, default number of items + Profiler.init("test_loads", 1000) + + + @Profiler.setup_once + def setup_once(dburl, echo, num): + "setup once. create an engine, insert fixture data" + global engine + engine = create_engine(dburl, echo=echo) + Base.metadata.drop_all(engine) + Base.metadata.create_all(engine) + sess = Session(engine) + sess.add_all([ + Parent(children=[Child() for j in range(100)]) + for i in range(num) + ]) + sess.commit() + + + @Profiler.setup + def setup(dburl, echo, num): + "setup per test. create a new Session." + global session + session = Session(engine) + # pre-connect so this part isn't profiled (if we choose) + session.connection() + + + @Profiler.profile + def test_lazyload(n): + "load everything, no eager loading." + + for parent in session.query(Parent): + parent.children + + + @Profiler.profile + def test_joinedload(n): + "load everything, joined eager loading." + + for parent in session.query(Parent).options(joinedload("children")): + parent.children + + + @Profiler.profile + def test_subqueryload(n): + "load everything, subquery eager loading." + + for parent in session.query(Parent).options(subqueryload("children")): + parent.children + + if __name__ == '__main__': + Profiler.main() + +We can run our new script directly:: + + $ python test_loads.py --dburl postgresql+psycopg2://scott:tiger@localhost/test + Running setup once... + Tests to run: test_lazyload, test_joinedload, test_subqueryload + test_lazyload : load everything, no eager loading. (1000 iterations); total time 11.971159 sec + test_joinedload : load everything, joined eager loading. (1000 iterations); total time 2.754592 sec + test_subqueryload : load everything, subquery eager loading. (1000 iterations); total time 2.977696 sec + +As well as see RunSnake output for an individual test:: + + $ python test_loads.py --num 100 --runsnake --test test_joinedload """ import argparse import cProfile -import StringIO import pstats import os import time +import re +import sys class Profiler(object): tests = [] - def __init__(self, options, setup=None, setup_once=None): - self.setup = setup - self.setup_once = setup_once + _setup = None + _setup_once = None + name = None + num = 0 + + def __init__(self, options): self.test = options.test self.dburl = options.dburl self.runsnake = options.runsnake @@ -59,11 +236,34 @@ class Profiler(object): self.echo = options.echo self.stats = [] + @classmethod + def init(cls, name, num): + cls.name = name + cls.num = num + @classmethod def profile(cls, fn): + if cls.name is None: + raise ValueError( + "Need to call Profile.init(, ) first.") cls.tests.append(fn) return fn + @classmethod + def setup(cls, fn): + if cls._setup is not None: + raise ValueError("setup function already set to %s" % cls._setup) + cls._setup = staticmethod(fn) + return fn + + @classmethod + def setup_once(cls, fn): + if cls._setup_once is not None: + raise ValueError( + "setup_once function already set to %s" % cls._setup_once) + cls._setup_once = staticmethod(fn) + return fn + def run(self): if self.test: tests = [fn for fn in self.tests if fn.__name__ == self.test] @@ -72,9 +272,9 @@ class Profiler(object): else: tests = self.tests - if self.setup_once: + if self._setup_once: print("Running setup once...") - self.setup_once(self.dburl, self.echo, self.num) + self._setup_once(self.dburl, self.echo, self.num) print("Tests to run: %s" % ", ".join([t.__name__ for t in tests])) for test in tests: self._run_test(test) @@ -88,8 +288,7 @@ class Profiler(object): finally: pr.disable() - output = StringIO.StringIO() - stats = pstats.Stats(pr, stream=output).sort_stats('cumulative') + stats = pstats.Stats(pr).sort_stats('cumulative') self.stats.append(TestResult(self, fn, stats=stats)) return result @@ -103,29 +302,42 @@ class Profiler(object): self.stats.append(TestResult(self, fn, total_time=total)) def _run_test(self, fn): - if self.setup: - self.setup(self.dburl, self.echo, self.num) + if self._setup: + self._setup(self.dburl, self.echo, self.num) if self.profile or self.runsnake or self.dump: self._run_with_profile(fn) else: self._run_with_time(fn) @classmethod - def main(cls, num, setup=None, setup_once=None): - parser = argparse.ArgumentParser() + def main(cls): + + parser = argparse.ArgumentParser("python -m examples.performance") + + if cls.name is None: + parser.add_argument( + "name", choices=cls._suite_names(), help="suite to run") + + if len(sys.argv) > 1: + potential_name = sys.argv[1] + try: + suite = __import__(__name__ + "." + potential_name) + except ImportError: + pass parser.add_argument( "--test", type=str, help="run specific test name" ) + parser.add_argument( '--dburl', type=str, default="sqlite:///profile.db", help="database URL, default sqlite:///profile.db" ) parser.add_argument( - '--num', type=int, default=num, + '--num', type=int, default=cls.num, help="Number of iterations/items/etc for tests; " - "default is %d module-specific" % num + "default is %d module-specific" % cls.num ) parser.add_argument( '--profile', action='store_true', @@ -143,7 +355,19 @@ class Profiler(object): args.profile = args.profile or args.dump or args.runsnake - Profiler(args, setup=setup, setup_once=setup_once).run() + if cls.name is None: + suite = __import__(__name__ + "." + args.name) + + Profiler(args).run() + + @classmethod + def _suite_names(cls): + suites = [] + for file_ in os.listdir(os.path.dirname(__file__)): + match = re.match(r'^([a-z].*).py$', file_) + if match: + suites.append(match.group(1)) + return suites class TestResult(object): @@ -185,3 +409,4 @@ class TestResult(object): finally: os.remove(filename) + diff --git a/examples/performance/__main__.py b/examples/performance/__main__.py new file mode 100644 index 0000000000..957d6c699d --- /dev/null +++ b/examples/performance/__main__.py @@ -0,0 +1,5 @@ +from . import Profiler + +if __name__ == '__main__': + Profiler.main() + diff --git a/examples/performance/bulk_inserts.py b/examples/performance/bulk_inserts.py index 531003aa66..9c3cff5b22 100644 --- a/examples/performance/bulk_inserts.py +++ b/examples/performance/bulk_inserts.py @@ -20,6 +20,10 @@ class Customer(Base): description = Column(String(255)) +Profiler.init("bulk_inserts", num=100000) + + +@Profiler.setup def setup_database(dburl, echo, num): global engine engine = create_engine(dburl, echo=echo) @@ -147,4 +151,4 @@ def test_dbapi_raw(n): conn.close() if __name__ == '__main__': - Profiler.main(setup=setup_database, num=100000) + Profiler.main() diff --git a/examples/performance/large_resultsets.py b/examples/performance/large_resultsets.py index 77c0246fc4..0a5857b75b 100644 --- a/examples/performance/large_resultsets.py +++ b/examples/performance/large_resultsets.py @@ -30,6 +30,10 @@ class Customer(Base): description = Column(String(255)) +Profiler.init("large_resultsets", num=500000) + + +@Profiler.setup_once def setup_database(dburl, echo, num): global engine engine = create_engine(dburl, echo=echo) @@ -168,4 +172,4 @@ def _test_dbapi_raw(n, make_objects): conn.close() if __name__ == '__main__': - Profiler.main(setup_once=setup_database, num=500000) + Profiler.main() diff --git a/examples/performance/single_inserts.py b/examples/performance/single_inserts.py index 4178ccea83..cfce903004 100644 --- a/examples/performance/single_inserts.py +++ b/examples/performance/single_inserts.py @@ -21,6 +21,10 @@ class Customer(Base): description = Column(String(255)) +Profiler.init("single_inserts", num=10000) + + +@Profiler.setup def setup_database(dburl, echo, num): global engine engine = create_engine(dburl, echo=echo) @@ -106,16 +110,14 @@ def test_core_query_caching(n): @Profiler.profile def test_dbapi_raw_w_connect(n): - """Individual INSERT/COMMIT pairs using a pure DBAPI connection, - connect each time.""" + """Individual INSERT/COMMIT pairs w/ DBAPI + connection each time""" _test_dbapi_raw(n, True) @Profiler.profile def test_dbapi_raw_w_pool(n): - """Individual INSERT/COMMIT pairs using a pure DBAPI connection, - using a connection pool.""" + """Individual INSERT/COMMIT pairs w/ DBAPI + connection pool""" _test_dbapi_raw(n, False) @@ -161,4 +163,4 @@ def _test_dbapi_raw(n, connect): if __name__ == '__main__': - Profiler.main(setup=setup_database, num=10000) + Profiler.main() -- 2.47.3