]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
bpo-46576: Speed up test_peg_generator by using a static library for shared sources...
authorJeremy Kloth <jeremy.kloth@gmail.com>
Wed, 6 Apr 2022 21:55:58 +0000 (15:55 -0600)
committerGitHub <noreply@github.com>
Wed, 6 Apr 2022 21:55:58 +0000 (14:55 -0700)
Speed up test_peg_generator by using a static library for shared sources to avoid recompiling as much code.

Lib/test/test_peg_generator/test_c_parser.py
Tools/peg_generator/pegen/build.py
Tools/peg_generator/pegen/testutil.py

index 51a4f7d7c07a081b37ca49159a402529b3f2e064..13b83a9db9eb37eb31f3ae77e3ebc578b9c35f4e 100644 (file)
@@ -72,13 +72,30 @@ unittest.main()
 
 @support.requires_subprocess()
 class TestCParser(unittest.TestCase):
+
+    @classmethod
+    def setUpClass(cls):
+        # When running under regtest, a seperate tempdir is used
+        # as the current directory and watched for left-overs.
+        # Reusing that as the base for temporary directories
+        # ensures everything is cleaned up properly and
+        # cleans up afterwards if not (with warnings).
+        cls.tmp_base = os.getcwd()
+        if os.path.samefile(cls.tmp_base, os_helper.SAVEDCWD):
+            cls.tmp_base = None
+        # Create a directory for the reuseable static library part of
+        # the pegen extension build process.  This greatly reduces the
+        # runtime overhead of spawning compiler processes.
+        cls.library_dir = tempfile.mkdtemp(dir=cls.tmp_base)
+        cls.addClassCleanup(shutil.rmtree, cls.library_dir)
+
     def setUp(self):
         self._backup_config_vars = dict(sysconfig._CONFIG_VARS)
         cmd = support.missing_compiler_executable()
         if cmd is not None:
             self.skipTest("The %r command is not found" % cmd)
         self.old_cwd = os.getcwd()
-        self.tmp_path = tempfile.mkdtemp()
+        self.tmp_path = tempfile.mkdtemp(dir=self.tmp_base)
         change_cwd = os_helper.change_cwd(self.tmp_path)
         change_cwd.__enter__()
         self.addCleanup(change_cwd.__exit__, None, None, None)
@@ -91,7 +108,10 @@ class TestCParser(unittest.TestCase):
 
     def build_extension(self, grammar_source):
         grammar = parse_string(grammar_source, GrammarParser)
-        generate_parser_c_extension(grammar, Path(self.tmp_path))
+        # Because setUp() already changes the current directory to the
+        # temporary path, use a relative path here to prevent excessive
+        # path lengths when compiling.
+        generate_parser_c_extension(grammar, Path('.'), library_dir=self.library_dir)
 
     def run_test(self, grammar_source, test_source):
         self.build_extension(grammar_source)
index 78789b94df2e4ffc90c536fdce19d7a6a49fb836..5805ff637174404bef6f5d00dae2b184fc46c3b9 100644 (file)
@@ -1,6 +1,5 @@
 import itertools
 import pathlib
-import shutil
 import sys
 import sysconfig
 import tempfile
@@ -33,7 +32,8 @@ def compile_c_extension(
     build_dir: Optional[str] = None,
     verbose: bool = False,
     keep_asserts: bool = True,
-    disable_optimization: bool = True,  # Significant test_peg_generator speedup.
+    disable_optimization: bool = False,
+    library_dir: Optional[str] = None,
 ) -> str:
     """Compile the generated source for a parser generator into an extension module.
 
@@ -44,15 +44,21 @@ def compile_c_extension(
 
     If *build_dir* is provided, that path will be used as the temporary build directory
     of distutils (this is useful in case you want to use a temporary directory).
+
+    If *library_dir* is provided, that path will be used as the directory for a
+    static library of the common parser sources (this is useful in case you are
+    creating multiple extensions).
     """
     import distutils.log
-    from distutils.command.build_ext import build_ext  # type: ignore
-    from distutils.command.clean import clean  # type: ignore
     from distutils.core import Distribution, Extension
     from distutils.tests.support import fixup_build_ext  # type: ignore
 
+    from distutils.ccompiler import new_compiler
+    from distutils.dep_util import newer_group
+    from distutils.sysconfig import customize_compiler
+
     if verbose:
-        distutils.log.set_verbosity(distutils.log.DEBUG)
+        distutils.log.set_threshold(distutils.log.DEBUG)
 
     source_file_path = pathlib.Path(generated_source_path)
     extension_name = source_file_path.stem
@@ -71,46 +77,92 @@ def compile_c_extension(
             extra_compile_args.append("-O0")
             if sysconfig.get_config_var("GNULD") == "yes":
                 extra_link_args.append("-fno-lto")
-    extension = [
-        Extension(
-            extension_name,
-            sources=[
-                str(MOD_DIR.parent.parent.parent / "Python" / "Python-ast.c"),
-                str(MOD_DIR.parent.parent.parent / "Python" / "asdl.c"),
-                str(MOD_DIR.parent.parent.parent / "Parser" / "tokenizer.c"),
-                str(MOD_DIR.parent.parent.parent / "Parser" / "pegen.c"),
-                str(MOD_DIR.parent.parent.parent / "Parser" / "pegen_errors.c"),
-                str(MOD_DIR.parent.parent.parent / "Parser" / "action_helpers.c"),
-                str(MOD_DIR.parent.parent.parent / "Parser" / "string_parser.c"),
-                str(MOD_DIR.parent / "peg_extension" / "peg_extension.c"),
-                generated_source_path,
-            ],
-            include_dirs=[
-                str(MOD_DIR.parent.parent.parent / "Include" / "internal"),
-                str(MOD_DIR.parent.parent.parent / "Parser"),
-            ],
-            extra_compile_args=extra_compile_args,
-            extra_link_args=extra_link_args,
-        )
+
+    common_sources = [
+        str(MOD_DIR.parent.parent.parent / "Python" / "Python-ast.c"),
+        str(MOD_DIR.parent.parent.parent / "Python" / "asdl.c"),
+        str(MOD_DIR.parent.parent.parent / "Parser" / "tokenizer.c"),
+        str(MOD_DIR.parent.parent.parent / "Parser" / "pegen.c"),
+        str(MOD_DIR.parent.parent.parent / "Parser" / "pegen_errors.c"),
+        str(MOD_DIR.parent.parent.parent / "Parser" / "action_helpers.c"),
+        str(MOD_DIR.parent.parent.parent / "Parser" / "string_parser.c"),
+        str(MOD_DIR.parent / "peg_extension" / "peg_extension.c"),
+    ]
+    include_dirs = [
+        str(MOD_DIR.parent.parent.parent / "Include" / "internal"),
+        str(MOD_DIR.parent.parent.parent / "Parser"),
     ]
-    dist = Distribution({"name": extension_name, "ext_modules": extension})
-    cmd = build_ext(dist)
+    extension = Extension(
+        extension_name,
+        sources=[generated_source_path],
+        extra_compile_args=extra_compile_args,
+        extra_link_args=extra_link_args,
+    )
+    dist = Distribution({"name": extension_name, "ext_modules": [extension]})
+    cmd = dist.get_command_obj("build_ext")
     fixup_build_ext(cmd)
-    cmd.inplace = True
+    cmd.build_lib = str(source_file_path.parent)
+    cmd.include_dirs = include_dirs
     if build_dir:
         cmd.build_temp = build_dir
-        cmd.build_lib = build_dir
     cmd.ensure_finalized()
-    cmd.run()
-
-    extension_path = source_file_path.parent / cmd.get_ext_filename(extension_name)
-    shutil.move(cmd.get_ext_fullpath(extension_name), extension_path)
-
-    cmd = clean(dist)
-    cmd.finalize_options()
-    cmd.run()
 
-    return extension_path
+    compiler = new_compiler()
+    customize_compiler(compiler)
+    compiler.set_include_dirs(cmd.include_dirs)
+    compiler.set_library_dirs(cmd.library_dirs)
+    # build static lib
+    if library_dir:
+        library_filename = compiler.library_filename(extension_name,
+                                                     output_dir=library_dir)
+        if newer_group(common_sources, library_filename, 'newer'):
+            if sys.platform == 'win32':
+                pdb = compiler.static_lib_format % (extension_name, '.pdb')
+                compile_opts = [f"/Fd{library_dir}\\{pdb}"]
+                compile_opts.extend(extra_compile_args)
+            else:
+                compile_opts = extra_compile_args
+            objects = compiler.compile(common_sources,
+                                       output_dir=library_dir,
+                                       debug=cmd.debug,
+                                       extra_postargs=compile_opts)
+            compiler.create_static_lib(objects, extension_name,
+                                       output_dir=library_dir,
+                                       debug=cmd.debug)
+        if sys.platform == 'win32':
+            compiler.add_library_dir(library_dir)
+            extension.libraries = [extension_name]
+        elif sys.platform == 'darwin':
+            compiler.set_link_objects([
+                '-Wl,-force_load', library_filename,
+            ])
+        else:
+            compiler.set_link_objects([
+                '-Wl,--whole-archive', library_filename, '-Wl,--no-whole-archive',
+            ])
+    else:
+        extension.sources[0:0] = common_sources
+
+    # Compile the source code to object files.
+    ext_path = cmd.get_ext_fullpath(extension_name)
+    if newer_group(extension.sources, ext_path, 'newer'):
+        objects = compiler.compile(extension.sources,
+                                    output_dir=cmd.build_temp,
+                                    debug=cmd.debug,
+                                    extra_postargs=extra_compile_args)
+    else:
+        objects = compiler.object_filenames(extension.sources,
+                                            output_dir=cmd.build_temp)
+    # Now link the object files together into a "shared object"
+    compiler.link_shared_object(
+        objects, ext_path,
+        libraries=cmd.get_libraries(extension),
+        extra_postargs=extra_link_args,
+        export_symbols=cmd.get_export_symbols(extension),
+        debug=cmd.debug,
+        build_temp=cmd.build_temp)
+
+    return pathlib.Path(ext_path)
 
 
 def build_parser(
index 8e5dbc5cdbb33999f928a1d3896f0556a7836380..473d208a786717568838a7a0928c070294eeacbe 100644 (file)
@@ -6,7 +6,7 @@ import sys
 import textwrap
 import token
 import tokenize
-from typing import IO, Any, Dict, Final, Type, cast
+from typing import IO, Any, Dict, Final, Optional, Type, cast
 
 from pegen.build import compile_c_extension
 from pegen.c_generator import CParserGenerator
@@ -83,7 +83,8 @@ def generate_c_parser_source(grammar: Grammar) -> str:
 
 
 def generate_parser_c_extension(
-    grammar: Grammar, path: pathlib.PurePath, debug: bool = False
+    grammar: Grammar, path: pathlib.PurePath, debug: bool = False,
+    library_dir: Optional[str] = None,
 ) -> Any:
     """Generate a parser c extension for the given grammar in the given path
 
@@ -101,7 +102,13 @@ def generate_parser_c_extension(
             grammar, ALL_TOKENS, EXACT_TOKENS, NON_EXACT_TOKENS, file, debug=debug
         )
         genr.generate("parse.c")
-    compile_c_extension(str(source), build_dir=str(path))
+    compile_c_extension(
+        str(source),
+        build_dir=str(path),
+        # Significant test_peg_generator speedups
+        disable_optimization=True,
+        library_dir=library_dir,
+    )
 
 
 def print_memstats() -> bool: