Attempt to speed up deepfreeze.py (#107887)

author Guido van Rossum <guido@python.org>

Mon, 14 Aug 2023 21:41:27 +0000 (14:41 -0700)

committer GitHub <noreply@github.com>

Mon, 14 Aug 2023 21:41:27 +0000 (14:41 -0700)
author Guido van Rossum <guido@python.org>
Mon, 14 Aug 2023 21:41:27 +0000 (14:41 -0700)
committer GitHub <noreply@github.com>
Mon, 14 Aug 2023 21:41:27 +0000 (14:41 -0700)
diff --git a/Makefile.pre.in b/Makefile.pre.in

index 52236f7924503d7dabc072b8fee386f0f6fb1dd0..3a628bf49e97c19c1eeb966c1979f151dcc7c4dc 100644 (file)
--- a/Makefile.pre.in
+++ b/Makefile.pre.in
@@ -1253,7 +1253,7 @@ regen-frozen: Tools/build/freeze_modules.py $(FROZEN_FILES_IN)
  .PHONY: regen-deepfreeze
  regen-deepfreeze: $(DEEPFREEZE_OBJS)
  
-DEEPFREEZE_DEPS=$(srcdir)/Tools/build/deepfreeze.py $(FREEZE_MODULE_DEPS) $(FROZEN_FILES_OUT)
+DEEPFREEZE_DEPS=$(srcdir)/Tools/build/deepfreeze.py Include/internal/pycore_global_strings.h $(FREEZE_MODULE_DEPS) $(FROZEN_FILES_OUT)
  
  # BEGIN: deepfreeze modules
  Python/deepfreeze/deepfreeze.c: $(DEEPFREEZE_DEPS)
diff --git a/Tools/build/deepfreeze.py b/Tools/build/deepfreeze.py

index a11fe6a62811ab01e4e09fd25c90d8b0597389d2..ce609bd0898741033af400b4747767394856cd1c 100644 (file)
--- a/Tools/build/deepfreeze.py
+++ b/Tools/build/deepfreeze.py
@@ -6,7 +6,6 @@ On Windows, and in cross-compilation cases, it is executed
  by Python 3.10, and 3.11 features are not available.
  """
  import argparse
-import ast
  import builtins
  import collections
  import contextlib
@@ -17,10 +16,10 @@ import types
  from typing import Dict, FrozenSet, TextIO, Tuple
  
  import umarshal
-from generate_global_objects import get_identifiers_and_strings
+
+ROOT = os.path.dirname(os.path.dirname(os.path.dirname(__file__)))
  
  verbose = False
-identifiers, strings = get_identifiers_and_strings()
  
  # This must be kept in sync with opcode.py
  RESUME = 151
@@ -114,6 +113,7 @@ class Printer:
          self.hits, self.misses = 0, 0
          self.finis: list[str] = []
          self.inits: list[str] = []
+        self.identifiers, self.strings = self.get_identifiers_and_strings()
          self.write('#include "Python.h"')
          self.write('#include "internal/pycore_gc.h"')
          self.write('#include "internal/pycore_code.h"')
@@ -121,6 +121,19 @@ class Printer:
          self.write('#include "internal/pycore_long.h"')
          self.write("")
  
+    def get_identifiers_and_strings(self) -> tuple[set[str], dict[str, str]]:
+        filename = os.path.join(ROOT, "Include", "internal", "pycore_global_strings.h")
+        with open(filename) as fp:
+            lines = fp.readlines()
+        identifiers: set[str] = set()
+        strings: dict[str, str] = {}
+        for line in lines:
+            if m := re.search(r"STRUCT_FOR_ID\((\w+)\)", line):
+                identifiers.add(m.group(1))
+            if m := re.search(r'STRUCT_FOR_STR\((\w+), "(.*?)"\)', line):
+                strings[m.group(2)] = m.group(1)
+        return identifiers, strings
+
      @contextlib.contextmanager
      def indent(self) -> None:
          save_level = self.level
@@ -171,9 +184,9 @@ class Printer:
          return f"& {name}.ob_base.ob_base"
  
      def generate_unicode(self, name: str, s: str) -> str:
-        if s in strings:
-            return f"&_Py_STR({strings[s]})"
-        if s in identifiers:
+        if s in self.strings:
+            return f"&_Py_STR({self.strings[s]})"
+        if s in self.identifiers:
              return f"&_Py_ID({s})"
          if len(s) == 1:
              c = ord(s)
@@ -441,12 +454,10 @@ def is_frozen_header(source: str) -> bool:
  
  
  def decode_frozen_data(source: str) -> types.CodeType:
-    lines = source.splitlines()
-    while lines and re.match(FROZEN_DATA_LINE, lines[0]) is None:
-        del lines[0]
-    while lines and re.match(FROZEN_DATA_LINE, lines[-1]) is None:
-        del lines[-1]
-    values: Tuple[int, ...] = ast.literal_eval("".join(lines).strip())
+    values: list[int] = []
+    for line in source.splitlines():
+        if re.match(FROZEN_DATA_LINE, line):
+            values.extend([int(x) for x in line.split(",") if x.strip()])
      data = bytes(values)
      return umarshal.loads(data)
author	Guido van Rossum <guido@python.org>
	Mon, 14 Aug 2023 21:41:27 +0000 (14:41 -0700)
committer	GitHub <noreply@github.com>
	Mon, 14 Aug 2023 21:41:27 +0000 (14:41 -0700)
Makefile.pre.in		patch \| blob \| blame \| history
Tools/build/deepfreeze.py		patch \| blob \| blame \| history