]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
gh-98831: Use opcode metadata for stack_effect() (#101704)
authorGuido van Rossum <guido@python.org>
Thu, 9 Feb 2023 00:23:19 +0000 (16:23 -0800)
committerGitHub <noreply@github.com>
Thu, 9 Feb 2023 00:23:19 +0000 (16:23 -0800)
* Write output and metadata in a single run
  This halves the time to run the cases generator
  (most of the time goes into parsing the input).
* Declare or define opcode metadata based on NEED_OPCODE_TABLES
* Use generated metadata for stack_effect()
* compile.o depends on opcode_metadata.h
* Return -1 from _PyOpcode_num_popped/pushed for unknown opcode

Makefile.pre.in
Python/compile.c
Python/opcode_metadata.h
Tools/cases_generator/generate_cases.py

index 2559df8e74952cc9f7d0cd32aa8d6cf5e32e4ef8..7a84b953d97962393c8bffacad59e64959e124b2 100644 (file)
@@ -1445,24 +1445,21 @@ regen-opcode-targets:
 
 .PHONY: regen-cases
 regen-cases:
-       # Regenerate Python/generated_cases.c.h from Python/bytecodes.c
+       # Regenerate Python/generated_cases.c.h
+       # and Python/opcode_metadata.h
+       # from Python/bytecodes.c
        # using Tools/cases_generator/generate_cases.py
        PYTHONPATH=$(srcdir)/Tools/cases_generator \
        $(PYTHON_FOR_REGEN) \
            $(srcdir)/Tools/cases_generator/generate_cases.py \
                -i $(srcdir)/Python/bytecodes.c \
-               -o $(srcdir)/Python/generated_cases.c.h.new
+               -o $(srcdir)/Python/generated_cases.c.h.new \
+               -m $(srcdir)/Python/opcode_metadata.h.new
        $(UPDATE_FILE) $(srcdir)/Python/generated_cases.c.h $(srcdir)/Python/generated_cases.c.h.new
-       # Regenerate Python/opcode_metadata.h from Python/bytecodes.c
-       # using Tools/cases_generator/generate_cases.py --metadata
-       PYTHONPATH=$(srcdir)/Tools/cases_generator \
-       $(PYTHON_FOR_REGEN) \
-           $(srcdir)/Tools/cases_generator/generate_cases.py \
-               --metadata \
-               -i $(srcdir)/Python/bytecodes.c \
-               -o $(srcdir)/Python/opcode_metadata.h.new
        $(UPDATE_FILE) $(srcdir)/Python/opcode_metadata.h $(srcdir)/Python/opcode_metadata.h.new
 
+Python/compile.o: $(srcdir)/Python/opcode_metadata.h
+
 Python/ceval.o: \
                $(srcdir)/Python/ceval_macros.h \
                $(srcdir)/Python/condvar.h \
index df2dffb95bbd7eed9bcae202141d01a65d4c719e..a3c915c3c14a968964fedf4c3b08dbf276c79d3c 100644 (file)
@@ -1074,135 +1074,49 @@ basicblock_next_instr(basicblock *b)
 static int
 stack_effect(int opcode, int oparg, int jump)
 {
-    switch (opcode) {
-        case NOP:
-        case EXTENDED_ARG:
-        case RESUME:
-        case CACHE:
-            return 0;
-
-        /* Stack manipulation */
-        case POP_TOP:
-            return -1;
-        case SWAP:
-            return 0;
-        case END_FOR:
-            return -2;
-
-        /* Unary operators */
-        case UNARY_NEGATIVE:
-        case UNARY_NOT:
-        case UNARY_INVERT:
-            return 0;
-
-        case SET_ADD:
-        case LIST_APPEND:
-            return -1;
-        case MAP_ADD:
-            return -2;
-
-        case BINARY_SUBSCR:
-            return -1;
-        case BINARY_SLICE:
-            return -2;
-        case STORE_SUBSCR:
-            return -3;
-        case STORE_SLICE:
-            return -4;
-        case DELETE_SUBSCR:
-            return -2;
-
-        case GET_ITER:
-            return 0;
-
-        case LOAD_BUILD_CLASS:
-            return 1;
+    if (0 <= opcode && opcode <= MAX_REAL_OPCODE) {
+        if (_PyOpcode_Deopt[opcode] != opcode) {
+            // Specialized instructions are not supported.
+            return PY_INVALID_STACK_EFFECT;
+        }
+        int popped, pushed;
+        if (jump > 0) {
+            popped = _PyOpcode_num_popped(opcode, oparg, true);
+            pushed = _PyOpcode_num_pushed(opcode, oparg, true);
+        }
+        else {
+            popped = _PyOpcode_num_popped(opcode, oparg, false);
+            pushed = _PyOpcode_num_pushed(opcode, oparg, false);
+        }
+        if (popped < 0 || pushed < 0) {
+            return PY_INVALID_STACK_EFFECT;
+        }
+        if (jump >= 0) {
+            return pushed - popped;
+        }
+        if (jump < 0) {
+            // Compute max(pushed - popped, alt_pushed - alt_popped)
+            int alt_popped = _PyOpcode_num_popped(opcode, oparg, true);
+            int alt_pushed = _PyOpcode_num_pushed(opcode, oparg, true);
+            if (alt_popped < 0 || alt_pushed < 0) {
+                return PY_INVALID_STACK_EFFECT;
+            }
+            int diff = pushed - popped;
+            int alt_diff = alt_pushed - alt_popped;
+            if (alt_diff > diff) {
+                return alt_diff;
+            }
+            return diff;
+        }
+    }
 
-        case RETURN_VALUE:
-            return -1;
-        case RETURN_CONST:
-            return 0;
-        case SETUP_ANNOTATIONS:
-            return 0;
-        case YIELD_VALUE:
-            return 0;
+    // Pseudo ops
+    switch (opcode) {
         case POP_BLOCK:
-            return 0;
-        case POP_EXCEPT:
-            return -1;
-
-        case STORE_NAME:
-            return -1;
-        case DELETE_NAME:
-            return 0;
-        case UNPACK_SEQUENCE:
-            return oparg-1;
-        case UNPACK_EX:
-            return (oparg&0xFF) + (oparg>>8);
-        case FOR_ITER:
-            return 1;
-        case SEND:
-            return jump > 0 ? -1 : 0;
-        case STORE_ATTR:
-            return -2;
-        case DELETE_ATTR:
-            return -1;
-        case STORE_GLOBAL:
-            return -1;
-        case DELETE_GLOBAL:
-            return 0;
-        case LOAD_CONST:
-            return 1;
-        case LOAD_NAME:
-            return 1;
-        case BUILD_TUPLE:
-        case BUILD_LIST:
-        case BUILD_SET:
-        case BUILD_STRING:
-            return 1-oparg;
-        case BUILD_MAP:
-            return 1 - 2*oparg;
-        case BUILD_CONST_KEY_MAP:
-            return -oparg;
-        case LOAD_ATTR:
-            return (oparg & 1);
-        case COMPARE_OP:
-        case IS_OP:
-        case CONTAINS_OP:
-            return -1;
-        case CHECK_EXC_MATCH:
-            return 0;
-        case CHECK_EG_MATCH:
-            return 0;
-        case IMPORT_NAME:
-            return -1;
-        case IMPORT_FROM:
-            return 1;
-
-        /* Jumps */
-        case JUMP_FORWARD:
-        case JUMP_BACKWARD:
         case JUMP:
-        case JUMP_BACKWARD_NO_INTERRUPT:
         case JUMP_NO_INTERRUPT:
             return 0;
 
-        case JUMP_IF_TRUE_OR_POP:
-        case JUMP_IF_FALSE_OR_POP:
-            return jump ? 0 : -1;
-
-        case POP_JUMP_IF_NONE:
-        case POP_JUMP_IF_NOT_NONE:
-        case POP_JUMP_IF_FALSE:
-        case POP_JUMP_IF_TRUE:
-            return -1;
-
-        case COMPARE_AND_BRANCH:
-            return -2;
-
-        case LOAD_GLOBAL:
-            return (oparg & 1) + 1;
-
         /* Exception handling pseudo-instructions */
         case SETUP_FINALLY:
             /* 0 in the normal flow.
@@ -1218,109 +1132,13 @@ stack_effect(int opcode, int oparg, int jump)
              * of __(a)enter__ and push 2 values before jumping to the handler
              * if an exception be raised. */
             return jump ? 1 : 0;
-        case PREP_RERAISE_STAR:
-             return -1;
-        case RERAISE:
-            return -1;
-        case PUSH_EXC_INFO:
-            return 1;
-
-        case WITH_EXCEPT_START:
-            return 1;
-
-        case LOAD_FAST:
-        case LOAD_FAST_CHECK:
-            return 1;
-        case STORE_FAST:
-            return -1;
-        case DELETE_FAST:
-            return 0;
-
-        case RETURN_GENERATOR:
-            return 0;
 
-        case RAISE_VARARGS:
-            return -oparg;
-
-        /* Functions and calls */
-        case KW_NAMES:
-            return 0;
-        case CALL:
-            return -1-oparg;
-        case CALL_INTRINSIC_1:
-            return 0;
-        case CALL_FUNCTION_EX:
-            return -2 - ((oparg & 0x01) != 0);
-        case MAKE_FUNCTION:
-            return 0 - ((oparg & 0x01) != 0) - ((oparg & 0x02) != 0) -
-                ((oparg & 0x04) != 0) - ((oparg & 0x08) != 0);
-        case BUILD_SLICE:
-            if (oparg == 3)
-                return -2;
-            else
-                return -1;
-
-        /* Closures */
-        case MAKE_CELL:
-        case COPY_FREE_VARS:
-            return 0;
-        case LOAD_CLOSURE:
-            return 1;
-        case LOAD_DEREF:
-        case LOAD_CLASSDEREF:
-            return 1;
-        case STORE_DEREF:
-            return -1;
-        case DELETE_DEREF:
-            return 0;
-
-        /* Iterators and generators */
-        case GET_AWAITABLE:
-            return 0;
-
-        case BEFORE_ASYNC_WITH:
-        case BEFORE_WITH:
-            return 1;
-        case GET_AITER:
-            return 0;
-        case GET_ANEXT:
-            return 1;
-        case GET_YIELD_FROM_ITER:
-            return 0;
-        case END_ASYNC_FOR:
-            return -2;
-        case CLEANUP_THROW:
-            return -2;
-        case FORMAT_VALUE:
-            /* If there's a fmt_spec on the stack, we go from 2->1,
-               else 1->1. */
-            return (oparg & FVS_MASK) == FVS_HAVE_SPEC ? -1 : 0;
         case LOAD_METHOD:
             return 1;
-        case LOAD_ASSERTION_ERROR:
-            return 1;
-        case LIST_EXTEND:
-        case SET_UPDATE:
-        case DICT_MERGE:
-        case DICT_UPDATE:
-            return -1;
-        case MATCH_CLASS:
-            return -2;
-        case GET_LEN:
-        case MATCH_MAPPING:
-        case MATCH_SEQUENCE:
-        case MATCH_KEYS:
-            return 1;
-        case COPY:
-        case PUSH_NULL:
-            return 1;
-        case BINARY_OP:
-            return -1;
-        case INTERPRETER_EXIT:
-            return -1;
         default:
             return PY_INVALID_STACK_EFFECT;
     }
+
     return PY_INVALID_STACK_EFFECT; /* not reachable */
 }
 
index 98791043f55271867f95a2cdb7af02a287e052ba..db1dfd37a901329873747ef2e4b5fa51fc1bbcdf 100644 (file)
@@ -2,8 +2,10 @@
 // from Python/bytecodes.c
 // Do not edit!
 
-#ifndef NDEBUG
-static int
+#ifndef NEED_OPCODE_TABLES
+extern int _PyOpcode_num_popped(int opcode, int oparg, bool jump);
+#else
+int
 _PyOpcode_num_popped(int opcode, int oparg, bool jump) {
     switch(opcode) {
         case NOP:
@@ -345,13 +347,15 @@ _PyOpcode_num_popped(int opcode, int oparg, bool jump) {
         case CACHE:
             return 0;
         default:
-            Py_UNREACHABLE();
+            return -1;
     }
 }
 #endif
 
-#ifndef NDEBUG
-static int
+#ifndef NEED_OPCODE_TABLES
+extern int _PyOpcode_num_pushed(int opcode, int oparg, bool jump);
+#else
+int
 _PyOpcode_num_pushed(int opcode, int oparg, bool jump) {
     switch(opcode) {
         case NOP:
@@ -693,10 +697,11 @@ _PyOpcode_num_pushed(int opcode, int oparg, bool jump) {
         case CACHE:
             return 0;
         default:
-            Py_UNREACHABLE();
+            return -1;
     }
 }
 #endif
+
 enum Direction { DIR_NONE, DIR_READ, DIR_WRITE };
 enum InstructionFormat { INSTR_FMT_IB, INSTR_FMT_IBC, INSTR_FMT_IBC0, INSTR_FMT_IBC000, INSTR_FMT_IBC0000, INSTR_FMT_IBC00000000, INSTR_FMT_IBIB, INSTR_FMT_IX, INSTR_FMT_IXC, INSTR_FMT_IXC000 };
 struct opcode_metadata {
@@ -705,7 +710,12 @@ struct opcode_metadata {
     enum Direction dir_op3;
     bool valid_entry;
     enum InstructionFormat instr_format;
-} _PyOpcode_opcode_metadata[256] = {
+};
+
+#ifndef NEED_OPCODE_TABLES
+extern const struct opcode_metadata _PyOpcode_opcode_metadata[256];
+#else
+const struct opcode_metadata _PyOpcode_opcode_metadata[256] = {
     [NOP] = { DIR_NONE, DIR_NONE, DIR_NONE, true, INSTR_FMT_IX },
     [RESUME] = { DIR_NONE, DIR_NONE, DIR_NONE, true, INSTR_FMT_IB },
     [LOAD_CLOSURE] = { DIR_NONE, DIR_NONE, DIR_NONE, true, INSTR_FMT_IB },
@@ -876,3 +886,4 @@ struct opcode_metadata {
     [EXTENDED_ARG] = { DIR_NONE, DIR_NONE, DIR_NONE, true, INSTR_FMT_IB },
     [CACHE] = { DIR_NONE, DIR_NONE, DIR_NONE, true, INSTR_FMT_IX },
 };
+#endif
index 1fcfbb6770902930d0160f6a778673546c4612c6..aa8e14075c87387ee862c3bc68b24507b19fd61c 100644 (file)
@@ -43,10 +43,7 @@ arg_parser.add_argument(
     "-o", "--output", type=str, help="Generated code", default=DEFAULT_OUTPUT
 )
 arg_parser.add_argument(
-    "-m",
-    "--metadata",
-    action="store_true",
-    help=f"Generate metadata instead, changes output default to {DEFAULT_METADATA_OUTPUT}",
+    "-m", "--metadata", type=str, help="Generated metadata", default=DEFAULT_METADATA_OUTPUT
 )
 
 
@@ -498,13 +495,15 @@ class Analyzer:
 
     filename: str
     output_filename: str
+    metadata_filename: str
     src: str
     errors: int = 0
 
-    def __init__(self, filename: str, output_filename: str):
+    def __init__(self, filename: str, output_filename: str, metadata_filename: str):
         """Read the input file."""
         self.filename = filename
         self.output_filename = output_filename
+        self.metadata_filename = metadata_filename
         with open(filename) as f:
             self.src = f.read()
 
@@ -889,21 +888,25 @@ class Analyzer:
         def write_function(
             direction: str, data: list[tuple[AnyInstruction, str]]
         ) -> None:
-            self.out.emit("\n#ifndef NDEBUG")
-            self.out.emit("static int")
+            self.out.emit("")
+            self.out.emit("#ifndef NEED_OPCODE_TABLES")
+            self.out.emit(f"extern int _PyOpcode_num_{direction}(int opcode, int oparg, bool jump);")
+            self.out.emit("#else")
+            self.out.emit("int")
             self.out.emit(f"_PyOpcode_num_{direction}(int opcode, int oparg, bool jump) {{")
             self.out.emit("    switch(opcode) {")
             for instr, effect in data:
                 self.out.emit(f"        case {instr.name}:")
                 self.out.emit(f"            return {effect};")
             self.out.emit("        default:")
-            self.out.emit("            Py_UNREACHABLE();")
+            self.out.emit("            return -1;")
             self.out.emit("    }")
             self.out.emit("}")
             self.out.emit("#endif")
 
         write_function("popped", popped_data)
         write_function("pushed", pushed_data)
+        self.out.emit("")
 
     def write_metadata(self) -> None:
         """Write instruction metadata to output file."""
@@ -924,7 +927,7 @@ class Analyzer:
         # Turn it into a list of enum definitions.
         format_enums = [INSTR_FMT_PREFIX + format for format in sorted(all_formats)]
 
-        with open(self.output_filename, "w") as f:
+        with open(self.metadata_filename, "w") as f:
             # Write provenance header
             f.write(f"// This file is generated by {THIS} --metadata\n")
             f.write(f"// from {os.path.relpath(self.filename, ROOT)}\n")
@@ -935,7 +938,7 @@ class Analyzer:
 
             self.write_stack_effect_functions()
 
-            # Write variable definition
+            # Write type definitions
             self.out.emit("enum Direction { DIR_NONE, DIR_READ, DIR_WRITE };")
             self.out.emit(f"enum InstructionFormat {{ {', '.join(format_enums)} }};")
             self.out.emit("struct opcode_metadata {")
@@ -945,7 +948,14 @@ class Analyzer:
                 self.out.emit("enum Direction dir_op3;")
                 self.out.emit("bool valid_entry;")
                 self.out.emit("enum InstructionFormat instr_format;")
-            self.out.emit("} _PyOpcode_opcode_metadata[256] = {")
+            self.out.emit("};")
+            self.out.emit("")
+
+            # Write metadata array declaration
+            self.out.emit("#ifndef NEED_OPCODE_TABLES")
+            self.out.emit("extern const struct opcode_metadata _PyOpcode_opcode_metadata[256];")
+            self.out.emit("#else")
+            self.out.emit("const struct opcode_metadata _PyOpcode_opcode_metadata[256] = {")
 
             # Write metadata for each instruction
             for thing in self.everything:
@@ -962,6 +972,7 @@ class Analyzer:
 
             # Write end of array
             self.out.emit("};")
+            self.out.emit("#endif")
 
     def write_metadata_for_inst(self, instr: Instruction) -> None:
         """Write metadata for a single instruction."""
@@ -1184,18 +1195,13 @@ def variable_used(node: parser.Node, name: str) -> bool:
 def main():
     """Parse command line, parse input, analyze, write output."""
     args = arg_parser.parse_args()  # Prints message and sys.exit(2) on error
-    if args.metadata:
-        if args.output == DEFAULT_OUTPUT:
-            args.output = DEFAULT_METADATA_OUTPUT
-    a = Analyzer(args.input, args.output)  # Raises OSError if input unreadable
+    a = Analyzer(args.input, args.output, args.metadata)  # Raises OSError if input unreadable
     a.parse()  # Raises SyntaxError on failure
     a.analyze()  # Prints messages and sets a.errors on failure
     if a.errors:
         sys.exit(f"Found {a.errors} errors")
-    if args.metadata:
-        a.write_metadata()
-    else:
-        a.write_instructions()  # Raises OSError if output can't be written
+    a.write_instructions()  # Raises OSError if output can't be written
+    a.write_metadata()
 
 
 if __name__ == "__main__":