]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
Issue #26881: The modulefinder module now supports extended opcode arguments.
authorSerhiy Storchaka <storchaka@gmail.com>
Sun, 8 May 2016 20:43:50 +0000 (23:43 +0300)
committerSerhiy Storchaka <storchaka@gmail.com>
Sun, 8 May 2016 20:43:50 +0000 (23:43 +0300)
Lib/dis.py
Lib/modulefinder.py
Lib/test/test_modulefinder.py
Misc/NEWS

index 841208ffa147831c9b3a1b18d00541f3e01251d0..09776fea0271f117eee97a63b351ee02735bf772 100644 (file)
@@ -275,31 +275,17 @@ def _get_instructions_bytes(code, varnames=None, names=None, constants=None,
 
     """
     labels = findlabels(code)
-    extended_arg = 0
     starts_line = None
     free = None
-    # enumerate() is not an option, since we sometimes process
-    # multiple elements on a single pass through the loop
-    n = len(code)
-    i = 0
-    while i < n:
-        op = code[i]
-        offset = i
+    for offset, op, arg in _unpack_opargs(code):
         if linestarts is not None:
-            starts_line = linestarts.get(i, None)
+            starts_line = linestarts.get(offset, None)
             if starts_line is not None:
                 starts_line += line_offset
-        is_jump_target = i in labels
-        i = i+1
-        arg = None
+        is_jump_target = offset in labels
         argval = None
         argrepr = ''
-        if op >= HAVE_ARGUMENT:
-            arg = code[i] + code[i+1]*256 + extended_arg
-            extended_arg = 0
-            i = i+2
-            if op == EXTENDED_ARG:
-                extended_arg = arg*65536
+        if arg is not None:
             #  Set argval to the dereferenced value of the argument when
             #  availabe, and argrepr to the string representation of argval.
             #    _disassemble_bytes needs the string repr of the
@@ -310,7 +296,7 @@ def _get_instructions_bytes(code, varnames=None, names=None, constants=None,
             elif op in hasname:
                 argval, argrepr = _get_name_info(arg, names)
             elif op in hasjrel:
-                argval = i + arg
+                argval = offset + 3 + arg
                 argrepr = "to " + repr(argval)
             elif op in haslocal:
                 argval, argrepr = _get_name_info(arg, varnames)
@@ -320,7 +306,7 @@ def _get_instructions_bytes(code, varnames=None, names=None, constants=None,
             elif op in hasfree:
                 argval, argrepr = _get_name_info(arg, cells)
             elif op in hasnargs:
-                argrepr = "%d positional, %d keyword pair" % (code[i-2], code[i-1])
+                argrepr = "%d positional, %d keyword pair" % (arg%256, arg//256)
         yield Instruction(opname[op], op,
                           arg, argval, argrepr,
                           offset, starts_line, is_jump_target)
@@ -356,26 +342,37 @@ def _disassemble_str(source, *, file=None):
 
 disco = disassemble                     # XXX For backwards compatibility
 
-def findlabels(code):
-    """Detect all offsets in a byte code which are jump targets.
-
-    Return the list of offsets.
-
-    """
-    labels = []
+def _unpack_opargs(code):
     # enumerate() is not an option, since we sometimes process
     # multiple elements on a single pass through the loop
+    extended_arg = 0
     n = len(code)
     i = 0
     while i < n:
         op = code[i]
+        offset = i
         i = i+1
+        arg = None
         if op >= HAVE_ARGUMENT:
-            arg = code[i] + code[i+1]*256
+            arg = code[i] + code[i+1]*256 + extended_arg
+            extended_arg = 0
             i = i+2
+            if op == EXTENDED_ARG:
+                extended_arg = arg*65536
+        yield (offset, op, arg)
+
+def findlabels(code):
+    """Detect all offsets in a byte code which are jump targets.
+
+    Return the list of offsets.
+
+    """
+    labels = []
+    for offset, op, arg in _unpack_opargs(code):
+        if arg is not None:
             label = -1
             if op in hasjrel:
-                label = i+arg
+                label = offset + 3 + arg
             elif op in hasjabs:
                 label = arg
             if label >= 0:
index 50f2462da01fa7819eb99f8ff3fcfddf9dc0d613..b8cce1f766b711d651cdb9a04313afcb31162b8b 100644 (file)
@@ -13,13 +13,12 @@ with warnings.catch_warnings():
     warnings.simplefilter('ignore', PendingDeprecationWarning)
     import imp
 
-# XXX Clean up once str8's cstor matches bytes.
-LOAD_CONST = bytes([dis.opname.index('LOAD_CONST')])
-IMPORT_NAME = bytes([dis.opname.index('IMPORT_NAME')])
-STORE_NAME = bytes([dis.opname.index('STORE_NAME')])
-STORE_GLOBAL = bytes([dis.opname.index('STORE_GLOBAL')])
-STORE_OPS = [STORE_NAME, STORE_GLOBAL]
-HAVE_ARGUMENT = bytes([dis.HAVE_ARGUMENT])
+LOAD_CONST = dis.opmap['LOAD_CONST']
+IMPORT_NAME = dis.opmap['IMPORT_NAME']
+STORE_NAME = dis.opmap['STORE_NAME']
+STORE_GLOBAL = dis.opmap['STORE_GLOBAL']
+STORE_OPS = STORE_NAME, STORE_GLOBAL
+EXTENDED_ARG = dis.EXTENDED_ARG
 
 # Modulefinder does a good job at simulating Python's, but it can not
 # handle __path__ modifications packages make at runtime.  Therefore there
@@ -337,38 +336,30 @@ class ModuleFinder:
                         fullname = name + "." + sub
                         self._add_badmodule(fullname, caller)
 
-    def scan_opcodes_25(self, co,
-                     unpack = struct.unpack):
+    def scan_opcodes(self, co):
         # Scan the code, and yield 'interesting' opcode combinations
-        # Python 2.5 version (has absolute and relative imports)
         code = co.co_code
         names = co.co_names
         consts = co.co_consts
-        LOAD_LOAD_AND_IMPORT = LOAD_CONST + LOAD_CONST + IMPORT_NAME
-        while code:
-            c = bytes([code[0]])
-            if c in STORE_OPS:
-                oparg, = unpack('<H', code[1:3])
+        opargs = [(op, arg) for _, op, arg in dis._unpack_opargs(code)
+                  if op != EXTENDED_ARG]
+        for i, (op, oparg) in enumerate(opargs):
+            if op in STORE_OPS:
                 yield "store", (names[oparg],)
-                code = code[3:]
                 continue
-            if code[:9:3] == LOAD_LOAD_AND_IMPORT:
-                oparg_1, oparg_2, oparg_3 = unpack('<xHxHxH', code[:9])
-                level = consts[oparg_1]
+            if (op == IMPORT_NAME and i >= 2
+                    and opargs[i-1][0] == opargs[i-2][0] == LOAD_CONST):
+                level = consts[opargs[i-2][1]]
+                fromlist = consts[opargs[i-1][1]]
                 if level == 0: # absolute import
-                    yield "absolute_import", (consts[oparg_2], names[oparg_3])
+                    yield "absolute_import", (fromlist, names[oparg])
                 else: # relative import
-                    yield "relative_import", (level, consts[oparg_2], names[oparg_3])
-                code = code[9:]
+                    yield "relative_import", (level, fromlist, names[oparg])
                 continue
-            if c >= HAVE_ARGUMENT:
-                code = code[3:]
-            else:
-                code = code[1:]
 
     def scan_code(self, co, m):
         code = co.co_code
-        scanner = self.scan_opcodes_25
+        scanner = self.scan_opcodes
         for what, args in scanner(co):
             if what == "store":
                 name, = args
index 4c49e9aeafcd91ee3661fdb1e28e4bcb338485f3..e4df2a90d4a4d0ddc01ff60882cb87c3948d380a 100644 (file)
@@ -319,6 +319,19 @@ class ModuleFinderTest(unittest.TestCase):
         expected = "co_filename %r changed to %r" % (old_path, new_path)
         self.assertIn(expected, output)
 
+    def test_extended_opargs(self):
+        extended_opargs_test = [
+            "a",
+            ["a", "b"],
+            [], [],
+            """\
+a.py
+                                %r
+                                import b
+b.py
+""" % list(range(2**16))]  # 2**16 constants
+        self._do_test(extended_opargs_test)
+
 
 if __name__ == "__main__":
     unittest.main()
index 870f5a3e82f9651d90ce5f51753c92d07de50b0e..f9bb8efbe0831b3cb767e795bb0a267dc89b0db5 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -116,6 +116,8 @@ Core and Builtins
 Library
 -------
 
+- Issue #26881: The modulefinder module now supports extended opcode arguments.
+
 - Issue #23815: Fixed crashes related to directly created instances of types in
   _tkinter and curses.panel modules.
 
@@ -125,6 +127,8 @@ Library
 - Issue #26873: xmlrpc now raises ResponseError on unsupported type tags
   instead of silently return incorrect result.
 
+- Issue #26881: modulefinder now works with bytecode with extended args.
+
 - Issue #26711: Fixed the comparison of plistlib.Data with other types.
 
 - Issue #24114: Fix an uninitialized variable in `ctypes.util`.