gh-112962: in dis module, put cache information in the Instruction instead of creatin...

author Irit Katriel <1055913+iritkatriel@users.noreply.github.com>

Wed, 13 Dec 2023 12:00:21 +0000 (12:00 +0000)

committer GitHub <noreply@github.com>

Wed, 13 Dec 2023 12:00:21 +0000 (12:00 +0000)
author Irit Katriel <1055913+iritkatriel@users.noreply.github.com>
Wed, 13 Dec 2023 12:00:21 +0000 (12:00 +0000)
committer GitHub <noreply@github.com>
Wed, 13 Dec 2023 12:00:21 +0000 (12:00 +0000)
diff --git a/Doc/library/dis.rst b/Doc/library/dis.rst

index 0d93bc9f5da774c40fa76fa5865f2091151e6653..5647021d6a9ba6b7b1bd99cd63e31896f6efac4a 100644 (file)
--- a/Doc/library/dis.rst
+++ b/Doc/library/dis.rst
@@ -328,13 +328,17 @@ operation is being performed, so the intermediate analysis object isn't useful:
     source line information (if any) is taken directly from the disassembled code
     object.
  
-   The *show_caches* and *adaptive* parameters work as they do in :func:`dis`.
+   The *adaptive* parameter works as it does in :func:`dis`.
  
     .. versionadded:: 3.4
  
     .. versionchanged:: 3.11
        Added the *show_caches* and *adaptive* parameters.
  
+   .. versionchanged:: 3.13
+      The *show_caches* parameter is deprecated and has no effect. The *cache_info*
+      field of each instruction is populated regardless of its value.
+
  
  .. function:: findlinestarts(code)
  
@@ -482,6 +486,14 @@ details of bytecode instructions as :class:`Instruction` instances:
        :class:`dis.Positions` object holding the
        start and end locations that are covered by this instruction.
  
+   .. data::cache_info
+
+      Information about the cache entries of this instruction, as
+      triplets of the form ``(name, size, data)``, where the ``name``
+      and ``size`` describe the cache format and data is the contents
+      of the cache. ``cache_info`` is ``None`` if the instruction does not have
+      caches.
+
     .. versionadded:: 3.4
  
     .. versionchanged:: 3.11
@@ -493,8 +505,8 @@ details of bytecode instructions as :class:`Instruction` instances:
        Changed field ``starts_line``.
  
        Added fields ``start_offset``, ``cache_offset``, ``end_offset``,
-      ``baseopname``, ``baseopcode``, ``jump_target``, ``oparg``, and
-      ``line_number``.
+      ``baseopname``, ``baseopcode``, ``jump_target``, ``oparg``,
+      ``line_number`` and ``cache_info``.
  
  
  .. class:: Positions
diff --git a/Lib/dis.py b/Lib/dis.py

index efa935c5a6a0b6fc1b79d059d14c205434e5d61e..183091cb0d609882865a0c3e55929afe926fe5d1 100644 (file)
--- a/Lib/dis.py
+++ b/Lib/dis.py
@@ -267,9 +267,10 @@ _Instruction = collections.namedtuple(
          'starts_line',
          'line_number',
          'label',
-        'positions'
+        'positions',
+        'cache_info',
      ],
-    defaults=[None, None]
+    defaults=[None, None, None]
  )
  
  _Instruction.opname.__doc__ = "Human readable name for operation"
@@ -286,6 +287,7 @@ _Instruction.starts_line.__doc__ = "True if this opcode starts a source line, ot
  _Instruction.line_number.__doc__ = "source line number associated with this opcode (if any), otherwise None"
  _Instruction.label.__doc__ = "A label (int > 0) if this instruction is a jump target, otherwise None"
  _Instruction.positions.__doc__ = "dis.Positions object holding the span of source code covered by this instruction"
+_Instruction.cache_info.__doc__ = "list of (name, size, data), one for each cache entry of the instruction"
  
  _ExceptionTableEntryBase = collections.namedtuple("_ExceptionTableEntryBase",
      "start end target depth lasti")
@@ -334,6 +336,8 @@ class Instruction(_Instruction):
           label - A label if this instruction is a jump target, otherwise None
           positions - Optional dis.Positions object holding the span of source code
                       covered by this instruction
+         cache_info - information about the format and content of the instruction's cache
+                        entries (if any)
      """
  
      @property
@@ -570,7 +574,6 @@ def get_instructions(x, *, first_line=None, show_caches=False, adaptive=False):
                                     linestarts=linestarts,
                                     line_offset=line_offset,
                                     co_positions=co.co_positions(),
-                                   show_caches=show_caches,
                                     original_code=original_code,
                                     arg_resolver=arg_resolver)
  
@@ -645,8 +648,7 @@ def _is_backward_jump(op):
                            'ENTER_EXECUTOR')
  
  def _get_instructions_bytes(code, linestarts=None, line_offset=0, co_positions=None,
-                            show_caches=False, original_code=None, labels_map=None,
-                            arg_resolver=None):
+                            original_code=None, labels_map=None, arg_resolver=None):
      """Iterate over the instructions in a bytecode string.
  
      Generates a sequence of Instruction namedtuples giving the details of each
@@ -682,32 +684,28 @@ def _get_instructions_bytes(code, linestarts=None, line_offset=0, co_positions=N
          else:
              argval, argrepr = arg, repr(arg)
  
+        instr = Instruction(_all_opname[op], op, arg, argval, argrepr,
+                            offset, start_offset, starts_line, line_number,
+                            labels_map.get(offset, None), positions)
+
+        caches = _get_cache_size(_all_opname[deop])
+        # Advance the co_positions iterator:
+        for _ in range(caches):
+            next(co_positions, ())
+
+        if caches:
+            cache_info = []
+            for name, size in _cache_format[opname[deop]].items():
+                data = code[offset + 2: offset + 2 + 2 * size]
+                cache_info.append((name, size, data))
+        else:
+            cache_info = None
+
          yield Instruction(_all_opname[op], op, arg, argval, argrepr,
                            offset, start_offset, starts_line, line_number,
-                          labels_map.get(offset, None), positions)
+                          labels_map.get(offset, None), positions, cache_info)
+
  
-        caches = _get_cache_size(_all_opname[deop])
-        if not caches:
-            continue
-        if not show_caches:
-            # We still need to advance the co_positions iterator:
-            for _ in range(caches):
-                next(co_positions, ())
-            continue
-        for name, size in _cache_format[opname[deop]].items():
-            for i in range(size):
-                offset += 2
-                # Only show the fancy argrepr for a CACHE instruction when it's
-                # the first entry for a particular cache value:
-                if i == 0:
-                    data = code[offset: offset + 2 * size]
-                    argrepr = f"{name}: {int.from_bytes(data, sys.byteorder)}"
-                else:
-                    argrepr = ""
-                yield Instruction(
-                    "CACHE", CACHE, 0, None, argrepr, offset, offset, False, None, None,
-                    Positions(*next(co_positions, ()))
-                )
  
  def disassemble(co, lasti=-1, *, file=None, show_caches=False, adaptive=False,
                  show_offsets=False):
@@ -787,7 +785,6 @@ def _disassemble_bytes(code, lasti=-1, varname_from_oparg=None,
      instrs = _get_instructions_bytes(code, linestarts=linestarts,
                                             line_offset=line_offset,
                                             co_positions=co_positions,
-                                           show_caches=show_caches,
                                             original_code=original_code,
                                             labels_map=labels_map,
                                             arg_resolver=arg_resolver)
@@ -805,6 +802,23 @@ def print_instructions(instrs, exception_entries, formatter, show_caches=False,
              is_current_instr = instr.offset <= lasti \
                  <= instr.offset + 2 * _get_cache_size(_all_opname[_deoptop(instr.opcode)])
          formatter.print_instruction(instr, is_current_instr)
+        deop = _deoptop(instr.opcode)
+        if show_caches and instr.cache_info:
+            offset = instr.offset
+            for name, size, data in instr.cache_info:
+                for i in range(size):
+                    offset += 2
+                    # Only show the fancy argrepr for a CACHE instruction when it's
+                    # the first entry for a particular cache value:
+                    if i == 0:
+                        argrepr = f"{name}: {int.from_bytes(data, sys.byteorder)}"
+                    else:
+                        argrepr = ""
+                    formatter.print_instruction(
+                        Instruction("CACHE", CACHE, 0, None, argrepr, offset, offset,
+                                    False, None, None, instr.positions),
+                        is_current_instr)
+
      formatter.print_exception_table(exception_entries)
  
  def _disassemble_str(source, **kwargs):
@@ -952,7 +966,6 @@ class Bytecode:
                                         linestarts=self._linestarts,
                                         line_offset=self._line_offset,
                                         co_positions=co.co_positions(),
-                                       show_caches=self.show_caches,
                                         original_code=original_code,
                                         labels_map=labels_map,
                                         arg_resolver=arg_resolver)
diff --git a/Lib/test/support/bytecode_helper.py b/Lib/test/support/bytecode_helper.py

index 388d1266773c8a5aa316847beb3c019e9713084e..a4845065a5322e37b686fbb427f6d507d3df2a89 100644 (file)
--- a/Lib/test/support/bytecode_helper.py
+++ b/Lib/test/support/bytecode_helper.py
@@ -7,6 +7,18 @@ from _testinternalcapi import compiler_codegen, optimize_cfg, assemble_code_obje
  
  _UNSPECIFIED = object()
  
+def instructions_with_positions(instrs, co_positions):
+    # Return (instr, positions) pairs from the instrs list and co_positions
+    # iterator. The latter contains items for cache lines and the former
+    # doesn't, so those need to be skipped.
+
+    co_positions = co_positions or iter(())
+    for instr in instrs:
+        yield instr, next(co_positions, ())
+        for _, size, _ in (instr.cache_info or ()):
+            for i in range(size):
+                next(co_positions, ())
+
  class BytecodeTestCase(unittest.TestCase):
      """Custom assertion methods for inspecting bytecode."""
  
diff --git a/Lib/test/test_code.py b/Lib/test/test_code.py

index a961ddbe17a3d3207e875243541fe246e1cb8c85..d8fb826edeb681667b0a60dbe0c5d3ea12bf2244 100644 (file)
--- a/Lib/test/test_code.py
+++ b/Lib/test/test_code.py
@@ -144,6 +144,8 @@ from test.support import (cpython_only,
                            gc_collect)
  from test.support.script_helper import assert_python_ok
  from test.support import threading_helper
+from test.support.bytecode_helper import (BytecodeTestCase,
+                                          instructions_with_positions)
  from opcode import opmap, opname
  COPY_FREE_VARS = opmap['COPY_FREE_VARS']
  
@@ -384,10 +386,8 @@ class CodeTest(unittest.TestCase):
          code = traceback.tb_frame.f_code
  
          artificial_instructions = []
-        for instr, positions in zip(
-            dis.get_instructions(code, show_caches=True),
-            code.co_positions(),
-            strict=True
+        for instr, positions in instructions_with_positions(
+            dis.get_instructions(code), code.co_positions()
          ):
              # If any of the positions is None, then all have to
              # be None as well for the case above. There are still
diff --git a/Lib/test/test_compile.py b/Lib/test/test_compile.py

index df6e5e4b55f728125b1c8224cb7c494c1c9887c9..f681d125db7d7a1bb98cdaf6fde3f89e42fa6cff 100644 (file)
--- a/Lib/test/test_compile.py
+++ b/Lib/test/test_compile.py
@@ -12,6 +12,7 @@ import warnings
  from test import support
  from test.support import (script_helper, requires_debug_ranges,
                            requires_specialization, Py_C_RECURSION_LIMIT)
+from test.support.bytecode_helper import instructions_with_positions
  from test.support.os_helper import FakePath
  
  class TestSpecifics(unittest.TestCase):
@@ -1346,8 +1347,8 @@ class TestSourcePositions(unittest.TestCase):
      def assertOpcodeSourcePositionIs(self, code, opcode,
              line, end_line, column, end_column, occurrence=1):
  
-        for instr, position in zip(
-            dis.Bytecode(code, show_caches=True), code.co_positions(), strict=True
+        for instr, position in instructions_with_positions(
+            dis.Bytecode(code), code.co_positions()
          ):
              if instr.opname == opcode:
                  occurrence -= 1
diff --git a/Lib/test/test_dis.py b/Lib/test/test_dis.py

index 0ea4dc4566a4a4555d7a08d393a9668d076c4149..12e2c57e50b0ba5efe3935a4b72ab9b737ef614b 100644 (file)
--- a/Lib/test/test_dis.py
+++ b/Lib/test/test_dis.py
@@ -13,6 +13,7 @@ from test.support.bytecode_helper import BytecodeTestCase
  
  import opcode
  
+CACHE = dis.opmap["CACHE"]
  
  def get_tb():
      def _error():
@@ -1227,9 +1228,9 @@ class DisTests(DisTestBase):
          else:
              # "copy" the code to un-quicken it:
              f.__code__ = f.__code__.replace()
-        for instruction in dis.get_instructions(
+        for instruction in _unroll_caches_as_Instructions(dis.get_instructions(
              f, show_caches=True, adaptive=adaptive
-        ):
+        ), show_caches=True):
              if instruction.opname == "CACHE":
                  yield instruction.argrepr
  
@@ -1262,7 +1263,8 @@ class DisTests(DisTestBase):
          # However, this might change in the future. So we explicitly try to find
          # a CACHE entry in the instructions. If we can't do that, fail the test
  
-        for inst in dis.get_instructions(f, show_caches=True):
+        for inst in _unroll_caches_as_Instructions(
+                dis.get_instructions(f, show_caches=True), show_caches=True):
              if inst.opname == "CACHE":
                  op_offset = inst.offset - 2
                  cache_offset = inst.offset
@@ -1775,8 +1777,8 @@ expected_opinfo_simple = [
  class InstructionTestCase(BytecodeTestCase):
  
      def assertInstructionsEqual(self, instrs_1, instrs_2, /):
-        instrs_1 = [instr_1._replace(positions=None) for instr_1 in instrs_1]
-        instrs_2 = [instr_2._replace(positions=None) for instr_2 in instrs_2]
+        instrs_1 = [instr_1._replace(positions=None, cache_info=None) for instr_1 in instrs_1]
+        instrs_2 = [instr_2._replace(positions=None, cache_info=None) for instr_2 in instrs_2]
          self.assertEqual(instrs_1, instrs_2)
  
  class InstructionTests(InstructionTestCase):
@@ -1890,9 +1892,9 @@ class InstructionTests(InstructionTestCase):
                              instruction.positions.col_offset,
                              instruction.positions.end_col_offset,
                          )
-                        for instruction in dis.get_instructions(
+                        for instruction in _unroll_caches_as_Instructions(dis.get_instructions(
                              code, adaptive=adaptive, show_caches=show_caches
-                        )
+                        ), show_caches=show_caches)
                      ]
                      self.assertEqual(co_positions, dis_positions)
  
@@ -2233,6 +2235,31 @@ class TestDisTracebackWithFile(TestDisTraceback):
              dis.distb(tb, file=output)
          return output.getvalue()
  
+def _unroll_caches_as_Instructions(instrs, show_caches=False):
+    # Cache entries are no longer reported by dis as fake instructions,
+    # but some tests assume that do. We should rewrite the tests to assume
+    # the new API, but it will be clearer to keep the tests working as
+    # before and do that in a separate PR.
+
+    for instr in instrs:
+        yield instr
+        if not show_caches:
+            continue
+
+        offset = instr.offset
+        for name, size, data in (instr.cache_info or ()):
+            for i in range(size):
+                offset += 2
+                # Only show the fancy argrepr for a CACHE instruction when it's
+                # the first entry for a particular cache value:
+                if i == 0:
+                    argrepr = f"{name}: {int.from_bytes(data, sys.byteorder)}"
+                else:
+                    argrepr = ""
+
+                yield Instruction("CACHE", CACHE, 0, None, argrepr, offset, offset,
+                                  False, None, None, instr.positions)
+
  
  if __name__ == "__main__":
      unittest.main()
diff --git a/Misc/NEWS.d/next/Library/2023-12-12-16-32-55.gh-issue-112962.ZZWXZn.rst b/Misc/NEWS.d/next/Library/2023-12-12-16-32-55.gh-issue-112962.ZZWXZn.rst

new file mode 100644 (file)

index 0000000..b99e6bc
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2023-12-12-16-32-55.gh-issue-112962.ZZWXZn.rst
@@ -0,0 +1,3 @@
+:mod:`dis` module functions add cache information to the
+:class:`~dis.Instruction` instance rather than creating fake
+:class:`~dis.Instruction` instances to represent the cache entries.
author	Irit Katriel <1055913+iritkatriel@users.noreply.github.com>
	Wed, 13 Dec 2023 12:00:21 +0000 (12:00 +0000)
committer	GitHub <noreply@github.com>
	Wed, 13 Dec 2023 12:00:21 +0000 (12:00 +0000)
Doc/library/dis.rst		patch \| blob \| blame \| history
Lib/dis.py		patch \| blob \| blame \| history
Lib/test/support/bytecode_helper.py		patch \| blob \| blame \| history
Lib/test/test_code.py		patch \| blob \| blame \| history
Lib/test/test_compile.py		patch \| blob \| blame \| history
Lib/test/test_dis.py		patch \| blob \| blame \| history
Misc/NEWS.d/next/Library/2023-12-12-16-32-55.gh-issue-112962.ZZWXZn.rst	[new file with mode: 0644]	patch \| blob