From: Ben Darnell Date: Tue, 22 Aug 2023 02:20:34 +0000 (-0400) Subject: test: Refactor circlerefs script into a test X-Git-Tag: v6.4.0b1~12^2~3 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=ddb7e88afd4501e34c5c3bcebd10265f89aa0cf9;p=thirdparty%2Ftornado.git test: Refactor circlerefs script into a test This script was only ever run irregularly on its own; bring it in to the test suite so it can be run automatically. --- diff --git a/maint/circlerefs/circlerefs.py b/maint/circlerefs/circlerefs.py deleted file mode 100755 index bd8214aa8..000000000 --- a/maint/circlerefs/circlerefs.py +++ /dev/null @@ -1,107 +0,0 @@ -#!/usr/bin/env python -"""Test script to find circular references. - -Circular references are not leaks per se, because they will eventually -be GC'd. However, on CPython, they prevent the reference-counting fast -path from being used and instead rely on the slower full GC. This -increases memory footprint and CPU overhead, so we try to eliminate -circular references created by normal operation. -""" - -import gc -import traceback -import types -from tornado import web, ioloop, gen, httpclient - - -def find_circular_references(garbage=None): - def inner(level): - for item in level: - item_id = id(item) - if item_id not in garbage_ids: - continue - if item_id in visited_ids: - continue - if item_id in stack_ids: - candidate = stack[stack.index(item):] - candidate.append(item) - found.append(candidate) - continue - - stack.append(item) - stack_ids.add(item_id) - inner(gc.get_referents(item)) - stack.pop() - stack_ids.remove(item_id) - visited_ids.add(item_id) - - garbage = garbage or gc.garbage - found = [] - stack = [] - stack_ids = set() - garbage_ids = set(map(id, garbage)) - visited_ids = set() - - inner(garbage) - inner = None - return found - - -class CollectHandler(web.RequestHandler): - @gen.coroutine - def get(self): - self.write("Collected: {}\n".format(gc.collect())) - self.write("Garbage: {}\n".format(len(gc.garbage))) - for circular in find_circular_references(): - print('\n==========\n Circular \n==========') - for item in circular: - print(' ', repr(item)) - for item in circular: - if isinstance(item, types.FrameType): - print('\nLocals:', item.f_locals) - print('\nTraceback:', repr(item)) - traceback.print_stack(item) - - -class DummyHandler(web.RequestHandler): - @gen.coroutine - def get(self): - self.write('ok\n') - - -class DummyAsyncHandler(web.RequestHandler): - @gen.coroutine - def get(self): - raise web.Finish('ok\n') - - -application = web.Application([ - (r'/dummy/', DummyHandler), - (r'/dummyasync/', DummyAsyncHandler), - (r'/collect/', CollectHandler), -], debug=True) - - -@gen.coroutine -def main(): - gc.disable() - gc.collect() - gc.set_debug(gc.DEBUG_STATS | gc.DEBUG_SAVEALL) - print('GC disabled') - - print("Start on 8888") - application.listen(8888, '127.0.0.1') - - # Do a little work. Alternately, could leave this script running and - # poke at it with a browser. - client = httpclient.AsyncHTTPClient() - yield client.fetch('http://127.0.0.1:8888/dummy/') - yield client.fetch('http://127.0.0.1:8888/dummyasync/', raise_error=False) - - # Now report on the results. - resp = yield client.fetch('http://127.0.0.1:8888/collect/') - print(resp.body) - - -if __name__ == "__main__": - ioloop.IOLoop.current().run_sync(main) diff --git a/tornado/test/circlerefs_test.py b/tornado/test/circlerefs_test.py new file mode 100644 index 000000000..2502c5fba --- /dev/null +++ b/tornado/test/circlerefs_test.py @@ -0,0 +1,183 @@ +"""Test script to find circular references. + +Circular references are not leaks per se, because they will eventually +be GC'd. However, on CPython, they prevent the reference-counting fast +path from being used and instead rely on the slower full GC. This +increases memory footprint and CPU overhead, so we try to eliminate +circular references created by normal operation. +""" + +import asyncio +import contextlib +import gc +import io +import sys +import traceback +import types +import typing +import unittest + +import tornado +from tornado import web, gen, httpclient + + +def find_circular_references(garbage): + """Find circular references in a list of objects. + + The garbage list contains objects that participate in a cycle, + but also the larger set of objects kept alive by that cycle. + This function finds subsets of those objects that make up + the cycle(s). + """ + + def inner(level): + for item in level: + item_id = id(item) + if item_id not in garbage_ids: + continue + if item_id in visited_ids: + continue + if item_id in stack_ids: + candidate = stack[stack.index(item) :] + candidate.append(item) + found.append(candidate) + continue + + stack.append(item) + stack_ids.add(item_id) + inner(gc.get_referents(item)) + stack.pop() + stack_ids.remove(item_id) + visited_ids.add(item_id) + + found: typing.List[object] = [] + stack = [] + stack_ids = set() + garbage_ids = set(map(id, garbage)) + visited_ids = set() + + inner(garbage) + return found + + +@contextlib.contextmanager +def assert_no_cycle_garbage(): + """Raise AssertionError if the wrapped code creates garbage with cycles.""" + gc.disable() + gc.collect() + gc.set_debug(gc.DEBUG_STATS | gc.DEBUG_SAVEALL) + yield + try: + # We have DEBUG_STATS on which causes gc.collect to write to stderr. + # Capture the output instead of spamming the logs on passing runs. + f = io.StringIO() + old_stderr = sys.stderr + sys.stderr = f + try: + gc.collect() + finally: + sys.stderr = old_stderr + garbage = gc.garbage[:] + # Must clear gc.garbage (the same object, not just replacing it with a + # new list) to avoid warnings at shutdown. + gc.garbage[:] = [] + if len(garbage) == 0: + return + for circular in find_circular_references(garbage): + f.write("\n==========\n Circular \n==========") + for item in circular: + f.write(f"\n {repr(item)}") + for item in circular: + if isinstance(item, types.FrameType): + f.write(f"\nLocals: {item.f_locals}") + f.write(f"\nTraceback: {repr(item)}") + traceback.print_stack(item) + del garbage + raise AssertionError(f.getvalue()) + finally: + gc.set_debug(0) + gc.enable() + + +class CircleRefsTest(unittest.TestCase): + def test_known_leak(self): + # Construct a known leak scenario to make sure the test harness works. + class C(object): + def __init__(self, name): + self.name = name + self.a: typing.Optional[C] = None + self.b: typing.Optional[C] = None + self.c: typing.Optional[C] = None + + def __repr__(self): + return f"name={self.name}" + + with self.assertRaises(AssertionError) as cm: + with assert_no_cycle_garbage(): + # a and b form a reference cycle. c is not part of the cycle, + # but it cannot be GC'd while a and b are alive. + a = C("a") + b = C("b") + c = C("c") + a.b = b + a.c = c + b.a = a + b.c = c + del a, b + self.assertIn("Circular", str(cm.exception)) + self.assertIn("name=a", str(cm.exception)) + self.assertIn("name=b", str(cm.exception)) + self.assertNotIn("name=c", str(cm.exception)) + + async def run_handler(self, handler_class): + app = web.Application( + [ + (r"/", handler_class), + ] + ) + socket, port = tornado.testing.bind_unused_port() + server = tornado.httpserver.HTTPServer(app) + server.add_socket(socket) + + client = httpclient.AsyncHTTPClient() + with assert_no_cycle_garbage(): + # Only the fetch (and the corresponding server-side handler) + # are being tested for cycles. In particular, the Application + # object has internal cycles (as of this writing) which we don't + # care to fix since in real world usage the Application object + # is effectively a global singleton. + await client.fetch(f"http://127.0.0.1:{port}/") + client.close() + server.stop() + socket.close() + + def test_sync_handler(self): + class Handler(web.RequestHandler): + def get(self): + self.write("ok\n") + + asyncio.run(self.run_handler(Handler)) + + def test_finish_exception_handler(self): + class Handler(web.RequestHandler): + def get(self): + raise web.Finish("ok\n") + + asyncio.run(self.run_handler(Handler)) + + def test_coro_handler(self): + class Handler(web.RequestHandler): + @gen.coroutine + def get(self): + yield asyncio.sleep(0.01) + self.write("ok\n") + + asyncio.run(self.run_handler(Handler)) + + def test_async_handler(self): + class Handler(web.RequestHandler): + async def get(self): + await asyncio.sleep(0.01) + self.write("ok\n") + + asyncio.run(self.run_handler(Handler)) diff --git a/tornado/test/runtests.py b/tornado/test/runtests.py index 58cecd383..f35b37254 100644 --- a/tornado/test/runtests.py +++ b/tornado/test/runtests.py @@ -22,6 +22,7 @@ TEST_MODULES = [ "tornado.test.asyncio_test", "tornado.test.auth_test", "tornado.test.autoreload_test", + "tornado.test.circlerefs_test", "tornado.test.concurrent_test", "tornado.test.curl_httpclient_test", "tornado.test.escape_test",