]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
gh-93678: add _testinternalcapi.optimize_cfg() and test utils for compiler optimizati...
authorIrit Katriel <1055913+iritkatriel@users.noreply.github.com>
Wed, 24 Aug 2022 10:02:53 +0000 (11:02 +0100)
committerGitHub <noreply@github.com>
Wed, 24 Aug 2022 10:02:53 +0000 (11:02 +0100)
Include/internal/pycore_compile.h
Include/internal/pycore_global_strings.h
Include/internal/pycore_runtime_init_generated.h
Lib/test/support/bytecode_helper.py
Lib/test/test_peepholer.py
Misc/NEWS.d/next/Core and Builtins/2022-08-15-20-52-41.gh-issue-93678.X7GuIJ.rst [new file with mode: 0644]
Modules/_testinternalcapi.c
Modules/clinic/_testinternalcapi.c.h [new file with mode: 0644]
Python/compile.c

index 06a6082cddae6a3e01836988de9baa0463ed28ca..1a628a08ca4ebf4ba48fe9b412b53f9a701baf46 100644 (file)
@@ -38,6 +38,11 @@ extern int _PyAST_Optimize(
     struct _arena *arena,
     _PyASTOptimizeState *state);
 
+/* Access compiler internals for unit testing */
+PyAPI_FUNC(PyObject*) _PyCompile_OptimizeCfg(
+        PyObject *instructions,
+        PyObject *consts);
+
 #ifdef __cplusplus
 }
 #endif
index aada220395023de85f28be3cea7580e8bfa7b991..c736bfecd077fd46253101a88ebde52d0abfe514 100644 (file)
@@ -298,6 +298,7 @@ struct _Py_global_strings {
         STRUCT_FOR_ID(code)
         STRUCT_FOR_ID(command)
         STRUCT_FOR_ID(comment_factory)
+        STRUCT_FOR_ID(consts)
         STRUCT_FOR_ID(context)
         STRUCT_FOR_ID(cookie)
         STRUCT_FOR_ID(copy)
@@ -407,6 +408,7 @@ struct _Py_global_strings {
         STRUCT_FOR_ID(input)
         STRUCT_FOR_ID(insert_comments)
         STRUCT_FOR_ID(insert_pis)
+        STRUCT_FOR_ID(instructions)
         STRUCT_FOR_ID(intern)
         STRUCT_FOR_ID(intersection)
         STRUCT_FOR_ID(isatty)
index 09890cd812015b454b00692ff434619dd0aec521..58d9e934b96c19d87a2339caf096583e5a0cc4a4 100644 (file)
@@ -807,6 +807,7 @@ extern "C" {
                 INIT_ID(code), \
                 INIT_ID(command), \
                 INIT_ID(comment_factory), \
+                INIT_ID(consts), \
                 INIT_ID(context), \
                 INIT_ID(cookie), \
                 INIT_ID(copy), \
@@ -916,6 +917,7 @@ extern "C" {
                 INIT_ID(input), \
                 INIT_ID(insert_comments), \
                 INIT_ID(insert_pis), \
+                INIT_ID(instructions), \
                 INIT_ID(intern), \
                 INIT_ID(intersection), \
                 INIT_ID(isatty), \
@@ -1916,6 +1918,8 @@ _PyUnicode_InitStaticStrings(void) {
     PyUnicode_InternInPlace(&string);
     string = &_Py_ID(comment_factory);
     PyUnicode_InternInPlace(&string);
+    string = &_Py_ID(consts);
+    PyUnicode_InternInPlace(&string);
     string = &_Py_ID(context);
     PyUnicode_InternInPlace(&string);
     string = &_Py_ID(cookie);
@@ -2134,6 +2138,8 @@ _PyUnicode_InitStaticStrings(void) {
     PyUnicode_InternInPlace(&string);
     string = &_Py_ID(insert_pis);
     PyUnicode_InternInPlace(&string);
+    string = &_Py_ID(instructions);
+    PyUnicode_InternInPlace(&string);
     string = &_Py_ID(intern);
     PyUnicode_InternInPlace(&string);
     string = &_Py_ID(intersection);
@@ -5755,6 +5761,10 @@ _PyStaticObjects_CheckRefcnt(void) {
         _PyObject_Dump((PyObject *)&_Py_ID(comment_factory));
         Py_FatalError("immortal object has less refcnt than expected _PyObject_IMMORTAL_REFCNT");
     };
+    if (Py_REFCNT((PyObject *)&_Py_ID(consts)) < _PyObject_IMMORTAL_REFCNT) {
+        _PyObject_Dump((PyObject *)&_Py_ID(consts));
+        Py_FatalError("immortal object has less refcnt than expected _PyObject_IMMORTAL_REFCNT");
+    };
     if (Py_REFCNT((PyObject *)&_Py_ID(context)) < _PyObject_IMMORTAL_REFCNT) {
         _PyObject_Dump((PyObject *)&_Py_ID(context));
         Py_FatalError("immortal object has less refcnt than expected _PyObject_IMMORTAL_REFCNT");
@@ -6191,6 +6201,10 @@ _PyStaticObjects_CheckRefcnt(void) {
         _PyObject_Dump((PyObject *)&_Py_ID(insert_pis));
         Py_FatalError("immortal object has less refcnt than expected _PyObject_IMMORTAL_REFCNT");
     };
+    if (Py_REFCNT((PyObject *)&_Py_ID(instructions)) < _PyObject_IMMORTAL_REFCNT) {
+        _PyObject_Dump((PyObject *)&_Py_ID(instructions));
+        Py_FatalError("immortal object has less refcnt than expected _PyObject_IMMORTAL_REFCNT");
+    };
     if (Py_REFCNT((PyObject *)&_Py_ID(intern)) < _PyObject_IMMORTAL_REFCNT) {
         _PyObject_Dump((PyObject *)&_Py_ID(intern));
         Py_FatalError("immortal object has less refcnt than expected _PyObject_IMMORTAL_REFCNT");
index 471d4a68f915aab082591921ebdc3fcfb0996426..05b54911e3f25a93f31525f4837ec63b4dfb62e9 100644 (file)
@@ -3,6 +3,7 @@
 import unittest
 import dis
 import io
+from _testinternalcapi import optimize_cfg
 
 _UNSPECIFIED = object()
 
@@ -40,3 +41,95 @@ class BytecodeTestCase(unittest.TestCase):
                     msg = '(%s,%r) occurs in bytecode:\n%s'
                     msg = msg % (opname, argval, disassembly)
                     self.fail(msg)
+
+
+class CfgOptimizationTestCase(unittest.TestCase):
+
+    HAS_ARG = set(dis.hasarg)
+    HAS_TARGET = set(dis.hasjrel + dis.hasjabs + dis.hasexc)
+    HAS_ARG_OR_TARGET = HAS_ARG.union(HAS_TARGET)
+
+    def setUp(self):
+        self.last_label = 0
+
+    def Label(self):
+        self.last_label += 1
+        return self.last_label
+
+    def complete_insts_info(self, insts):
+        # fill in omitted fields in location, and oparg 0 for ops with no arg.
+        instructions = []
+        for item in insts:
+            if isinstance(item, int):
+                instructions.append(item)
+            else:
+                assert isinstance(item, tuple)
+                inst = list(reversed(item))
+                opcode = dis.opmap[inst.pop()]
+                oparg = inst.pop() if opcode in self.HAS_ARG_OR_TARGET else 0
+                loc = inst + [-1] * (4 - len(inst))
+                instructions.append((opcode, oparg, *loc))
+        return instructions
+
+    def normalize_insts(self, insts):
+        """ Map labels to instruction index.
+            Remove labels which are not used as jump targets.
+        """
+        labels_map = {}
+        targets = set()
+        idx = 1
+        for item in insts:
+            assert isinstance(item, (int, tuple))
+            if isinstance(item, tuple):
+                opcode, oparg, *_ = item
+                if dis.opmap.get(opcode, opcode) in self.HAS_TARGET:
+                    targets.add(oparg)
+                idx += 1
+            elif isinstance(item, int):
+                assert item not in labels_map, "label reused"
+                labels_map[item] = idx
+
+        res = []
+        for item in insts:
+            if isinstance(item, int) and item in targets:
+                if not res or labels_map[item] != res[-1]:
+                    res.append(labels_map[item])
+            elif isinstance(item, tuple):
+                opcode, oparg, *loc = item
+                opcode = dis.opmap.get(opcode, opcode)
+                if opcode in self.HAS_TARGET:
+                    arg = labels_map[oparg]
+                else:
+                    arg = oparg if opcode in self.HAS_TARGET else None
+                opcode = dis.opname[opcode]
+                res.append((opcode, arg, *loc))
+        return res
+
+    def get_optimized(self, insts, consts):
+        insts = self.complete_insts_info(insts)
+        insts = optimize_cfg(insts, consts)
+        return insts, consts
+
+    def compareInstructions(self, actual_, expected_):
+        # get two lists where each entry is a label or
+        # an instruction tuple. Compare them, while mapping
+        # each actual label to a corresponding expected label
+        # based on their locations.
+
+        self.assertIsInstance(actual_, list)
+        self.assertIsInstance(expected_, list)
+
+        actual = self.normalize_insts(actual_)
+        expected = self.normalize_insts(expected_)
+        self.assertEqual(len(actual), len(expected))
+
+        # compare instructions
+        for act, exp in zip(actual, expected):
+            if isinstance(act, int):
+                self.assertEqual(exp, act)
+                continue
+            self.assertIsInstance(exp, tuple)
+            self.assertIsInstance(act, tuple)
+            # pad exp with -1's (if location info is incomplete)
+            exp += (-1,) * (len(act) - len(exp))
+            self.assertEqual(exp, act)
index e03c42c2f823dc8241be1ba3e95b895b633b4888..7ece468363be5882acd33e90955aa579ada6fabd 100644 (file)
@@ -4,7 +4,7 @@ import sys
 import textwrap
 import unittest
 
-from test.support.bytecode_helper import BytecodeTestCase
+from test.support.bytecode_helper import BytecodeTestCase, CfgOptimizationTestCase
 
 
 def compile_pattern_with_fast_locals(pattern):
@@ -864,5 +864,81 @@ class TestMarkingVariablesAsUnKnown(BytecodeTestCase):
         self.assertNotInBytecode(f, "LOAD_FAST_CHECK")
 
 
+class DirectiCfgOptimizerTests(CfgOptimizationTestCase):
+
+    def cfg_optimization_test(self, insts, expected_insts,
+                              consts=None, expected_consts=None):
+        if expected_consts is None:
+            expected_consts = consts
+        opt_insts, opt_consts = self.get_optimized(insts, consts)
+        self.compareInstructions(opt_insts, expected_insts)
+        self.assertEqual(opt_consts, expected_consts)
+
+    def test_conditional_jump_forward_non_const_condition(self):
+        insts = [
+            ('LOAD_NAME', 1, 11),
+            ('POP_JUMP_IF_TRUE', lbl := self.Label(), 12),
+            ('LOAD_CONST', 2, 13),
+            lbl,
+            ('LOAD_CONST', 3, 14),
+        ]
+        expected = [
+            ('LOAD_NAME', '1', 11),
+            ('POP_JUMP_IF_TRUE', lbl := self.Label(), 12),
+            ('LOAD_CONST', '2', 13),
+            lbl,
+            ('LOAD_CONST', '3', 14)
+        ]
+        self.cfg_optimization_test(insts, expected, consts=list(range(5)))
+
+    def test_conditional_jump_forward_const_condition(self):
+        # The unreachable branch of the jump is removed
+
+        insts = [
+            ('LOAD_CONST', 3, 11),
+            ('POP_JUMP_IF_TRUE', lbl := self.Label(), 12),
+            ('LOAD_CONST', 2, 13),
+            lbl,
+            ('LOAD_CONST', 3, 14),
+        ]
+        expected = [
+            ('NOP', None, 11),
+            ('JUMP', lbl := self.Label(), 12),
+            lbl,
+            ('LOAD_CONST', '3', 14)
+        ]
+        self.cfg_optimization_test(insts, expected, consts=list(range(5)))
+
+    def test_conditional_jump_backward_non_const_condition(self):
+        insts = [
+            lbl1 := self.Label(),
+            ('LOAD_NAME', 1, 11),
+            ('POP_JUMP_IF_TRUE', lbl1, 12),
+            ('LOAD_CONST', 2, 13),
+        ]
+        expected = [
+            lbl := self.Label(),
+            ('LOAD_NAME', '1', 11),
+            ('POP_JUMP_IF_TRUE', lbl, 12),
+            ('LOAD_CONST', '2', 13)
+        ]
+        self.cfg_optimization_test(insts, expected, consts=list(range(5)))
+
+    def test_conditional_jump_backward_const_condition(self):
+        # The unreachable branch of the jump is removed
+        insts = [
+            lbl1 := self.Label(),
+            ('LOAD_CONST', 1, 11),
+            ('POP_JUMP_IF_TRUE', lbl1, 12),
+            ('LOAD_CONST', 2, 13),
+        ]
+        expected = [
+            lbl := self.Label(),
+            ('NOP', None, 11),
+            ('JUMP', lbl, 12)
+        ]
+        self.cfg_optimization_test(insts, expected, consts=list(range(5)))
+
+
 if __name__ == "__main__":
     unittest.main()
diff --git a/Misc/NEWS.d/next/Core and Builtins/2022-08-15-20-52-41.gh-issue-93678.X7GuIJ.rst b/Misc/NEWS.d/next/Core and Builtins/2022-08-15-20-52-41.gh-issue-93678.X7GuIJ.rst
new file mode 100644 (file)
index 0000000..9e2b90b
--- /dev/null
@@ -0,0 +1 @@
+Added test a harness for direct unit tests of the compiler's optimization stage. The ``_testinternalcapi.optimize_cfg()`` function runs the optimiser on a sequence of instructions. The ``CfgOptimizationTestCase`` class in ``test.support`` has utilities for invoking the optimizer and checking the output.
index 238de749fffc5d23f3abd6426f17b10931cebfcc..9d92b076387f6133aa2ad563cbfeeb1e7f17d3f8 100644 (file)
@@ -14,6 +14,7 @@
 #include "Python.h"
 #include "pycore_atomic_funcs.h" // _Py_atomic_int_get()
 #include "pycore_bitutils.h"     // _Py_bswap32()
+#include "pycore_compile.h"      // _PyCompile_OptimizeCfg()
 #include "pycore_fileutils.h"    // _Py_normpath
 #include "pycore_frame.h"        // _PyInterpreterFrame
 #include "pycore_gc.h"           // PyGC_Head
 #include "pycore_pystate.h"      // _PyThreadState_GET()
 #include "osdefs.h"              // MAXPATHLEN
 
+#include "clinic/_testinternalcapi.c.h"
 
+/*[clinic input]
+module _testinternalcapi
+[clinic start generated code]*/
+/*[clinic end generated code: output=da39a3ee5e6b4b0d input=7bb583d8c9eb9a78]*/
 static PyObject *
 get_configs(PyObject *self, PyObject *Py_UNUSED(args))
 {
@@ -525,6 +531,25 @@ set_eval_frame_record(PyObject *self, PyObject *list)
 }
 
 
+/*[clinic input]
+
+_testinternalcapi.optimize_cfg -> object
+
+  instructions: object
+  consts: object
+
+Apply compiler optimizations to an instruction list.
+[clinic start generated code]*/
+
+static PyObject *
+_testinternalcapi_optimize_cfg_impl(PyObject *module, PyObject *instructions,
+                                    PyObject *consts)
+/*[clinic end generated code: output=5412aeafca683c8b input=7e8a3de86ebdd0f9]*/
+{
+    return _PyCompile_OptimizeCfg(instructions, consts);
+}
+
+
 static PyMethodDef TestMethods[] = {
     {"get_configs", get_configs, METH_NOARGS},
     {"get_recursion_depth", get_recursion_depth, METH_NOARGS},
@@ -543,6 +568,7 @@ static PyMethodDef TestMethods[] = {
     {"DecodeLocaleEx", decode_locale_ex, METH_VARARGS},
     {"set_eval_frame_default", set_eval_frame_default, METH_NOARGS, NULL},
     {"set_eval_frame_record", set_eval_frame_record, METH_O, NULL},
+    _TESTINTERNALCAPI_OPTIMIZE_CFG_METHODDEF
     {NULL, NULL} /* sentinel */
 };
 
diff --git a/Modules/clinic/_testinternalcapi.c.h b/Modules/clinic/_testinternalcapi.c.h
new file mode 100644 (file)
index 0000000..8113fff
--- /dev/null
@@ -0,0 +1,68 @@
+/*[clinic input]
+preserve
+[clinic start generated code]*/
+
+#if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE)
+#  include "pycore_gc.h"            // PyGC_Head
+#  include "pycore_runtime.h"       // _Py_ID()
+#endif
+
+
+PyDoc_STRVAR(_testinternalcapi_optimize_cfg__doc__,
+"optimize_cfg($module, /, instructions, consts)\n"
+"--\n"
+"\n"
+"Apply compiler optimizations to an instruction list.");
+
+#define _TESTINTERNALCAPI_OPTIMIZE_CFG_METHODDEF    \
+    {"optimize_cfg", _PyCFunction_CAST(_testinternalcapi_optimize_cfg), METH_FASTCALL|METH_KEYWORDS, _testinternalcapi_optimize_cfg__doc__},
+
+static PyObject *
+_testinternalcapi_optimize_cfg_impl(PyObject *module, PyObject *instructions,
+                                    PyObject *consts);
+
+static PyObject *
+_testinternalcapi_optimize_cfg(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames)
+{
+    PyObject *return_value = NULL;
+    #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE)
+
+    #define NUM_KEYWORDS 2
+    static struct {
+        PyGC_Head _this_is_not_used;
+        PyObject_VAR_HEAD
+        PyObject *ob_item[NUM_KEYWORDS];
+    } _kwtuple = {
+        .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS)
+        .ob_item = { &_Py_ID(instructions), &_Py_ID(consts), },
+    };
+    #undef NUM_KEYWORDS
+    #define KWTUPLE (&_kwtuple.ob_base.ob_base)
+
+    #else  // !Py_BUILD_CORE
+    #  define KWTUPLE NULL
+    #endif  // !Py_BUILD_CORE
+
+    static const char * const _keywords[] = {"instructions", "consts", NULL};
+    static _PyArg_Parser _parser = {
+        .keywords = _keywords,
+        .fname = "optimize_cfg",
+        .kwtuple = KWTUPLE,
+    };
+    #undef KWTUPLE
+    PyObject *argsbuf[2];
+    PyObject *instructions;
+    PyObject *consts;
+
+    args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 2, 2, 0, argsbuf);
+    if (!args) {
+        goto exit;
+    }
+    instructions = args[0];
+    consts = args[1];
+    return_value = _testinternalcapi_optimize_cfg_impl(module, instructions, consts);
+
+exit:
+    return return_value;
+}
+/*[clinic end generated code: output=3b1fd713290f68a9 input=a9049054013a1b77]*/
index 339e0e792be4169a097b9dbf5c21a41300e02c9c..e5ac162ccc0a5b333f4d212bf212aec46d801fd0 100644 (file)
@@ -457,6 +457,7 @@ typedef struct {
 
 static int basicblock_next_instr(basicblock *);
 
+static basicblock *cfg_builder_new_block(cfg_builder *g);
 static int cfg_builder_maybe_start_new_block(cfg_builder *g);
 static int cfg_builder_addop_i(cfg_builder *g, int opcode, Py_ssize_t oparg, struct location loc);
 
@@ -767,8 +768,20 @@ cfg_builder_check(cfg_builder *g)
     }
 }
 
+static int
+cfg_builder_init(cfg_builder *g)
+{
+    g->g_block_list = NULL;
+    basicblock *block = cfg_builder_new_block(g);
+    if (block == NULL)
+        return 0;
+    g->g_curblock = g->g_entryblock = block;
+    g->g_current_label = NO_LABEL;
+    return 1;
+}
+
 static void
-cfg_builder_free(cfg_builder* g)
+cfg_builder_fini(cfg_builder* g)
 {
     cfg_builder_check(g);
     basicblock *b = g->g_block_list;
@@ -785,7 +798,7 @@ cfg_builder_free(cfg_builder* g)
 static void
 compiler_unit_free(struct compiler_unit *u)
 {
-    cfg_builder_free(&u->u_cfg_builder);
+    cfg_builder_fini(&u->u_cfg_builder);
     Py_CLEAR(u->u_ste);
     Py_CLEAR(u->u_name);
     Py_CLEAR(u->u_qualname);
@@ -1708,7 +1721,6 @@ compiler_enter_scope(struct compiler *c, identifier name,
                      int scope_type, void *key, int lineno)
 {
     struct compiler_unit *u;
-    basicblock *block;
 
     u = (struct compiler_unit *)PyObject_Calloc(1, sizeof(
                                             struct compiler_unit));
@@ -1786,12 +1798,9 @@ compiler_enter_scope(struct compiler *c, identifier name,
     c->c_nestlevel++;
 
     cfg_builder *g = CFG_BUILDER(c);
-    g->g_block_list = NULL;
-    block = cfg_builder_new_block(g);
-    if (block == NULL)
+    if (!cfg_builder_init(g)) {
         return 0;
-    g->g_curblock = g->g_entryblock = block;
-    g->g_current_label = NO_LABEL;
+    }
 
     if (u->u_scope_type == COMPILER_SCOPE_MODULE) {
         c->u->u_loc.lineno = 0;
@@ -8220,7 +8229,7 @@ dump_instr(struct instr *i)
         sprintf(arg, "arg: %d ", i->i_oparg);
     }
     if (HAS_TARGET(i->i_opcode)) {
-        sprintf(arg, "target: %p ", i->i_target);
+        sprintf(arg, "target: %p [%d] ", i->i_target, i->i_oparg);
     }
     fprintf(stderr, "line: %d, opcode: %d %s%s%s\n",
                     i->i_loc.lineno, i->i_opcode, arg, jabs, jrel);
@@ -8251,7 +8260,7 @@ static int
 calculate_jump_targets(basicblock *entryblock);
 
 static int
-optimize_cfg(basicblock *entryblock, PyObject *consts, PyObject *const_cache);
+optimize_cfg(cfg_builder *g, PyObject *consts, PyObject *const_cache);
 
 static int
 trim_unused_consts(basicblock *entryblock, PyObject *consts);
@@ -8465,18 +8474,18 @@ static void
 propagate_line_numbers(basicblock *entryblock);
 
 static void
-eliminate_empty_basic_blocks(basicblock *entryblock);
+eliminate_empty_basic_blocks(cfg_builder *g);
 
 
 static int
-remove_redundant_jumps(basicblock *entryblock) {
+remove_redundant_jumps(cfg_builder *g) {
     /* If a non-empty block ends with a jump instruction, check if the next
      * non-empty block reached through normal flow control is the target
      * of that jump. If it is, then the jump instruction is redundant and
      * can be deleted.
      */
     int removed = 0;
-    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
+    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
         if (b->b_iused > 0) {
             struct instr *b_last_instr = &b->b_instr[b->b_iused - 1];
             assert(!IS_ASSEMBLER_OPCODE(b_last_instr->i_opcode));
@@ -8495,7 +8504,7 @@ remove_redundant_jumps(basicblock *entryblock) {
         }
     }
     if (removed) {
-        eliminate_empty_basic_blocks(entryblock);
+        eliminate_empty_basic_blocks(g);
     }
     return 0;
 }
@@ -8545,13 +8554,12 @@ assemble(struct compiler *c, int addNone)
     }
 
     cfg_builder *g = CFG_BUILDER(c);
-    basicblock *entryblock = g->g_entryblock;
-    assert(entryblock != NULL);
+    assert(g->g_entryblock != NULL);
 
     /* Set firstlineno if it wasn't explicitly set. */
     if (!c->u->u_firstlineno) {
-        if (entryblock->b_instr && entryblock->b_instr->i_loc.lineno) {
-            c->u->u_firstlineno = entryblock->b_instr->i_loc.lineno;
+        if (g->g_entryblock->b_instr && g->g_entryblock->b_instr->i_loc.lineno) {
+            c->u->u_firstlineno = g->g_entryblock->b_instr->i_loc.lineno;
         }
         else {
             c->u->u_firstlineno = 1;
@@ -8559,11 +8567,11 @@ assemble(struct compiler *c, int addNone)
     }
 
     // This must be called before fix_cell_offsets().
-    if (insert_prefix_instructions(c, entryblock, cellfixedoffsets, nfreevars, code_flags)) {
+    if (insert_prefix_instructions(c, g->g_entryblock, cellfixedoffsets, nfreevars, code_flags)) {
         goto error;
     }
 
-    int numdropped = fix_cell_offsets(c, entryblock, cellfixedoffsets);
+    int numdropped = fix_cell_offsets(c, g->g_entryblock, cellfixedoffsets);
     PyMem_Free(cellfixedoffsets);  // At this point we're done with it.
     cellfixedoffsets = NULL;
     if (numdropped < 0) {
@@ -8575,52 +8583,52 @@ assemble(struct compiler *c, int addNone)
     if (consts == NULL) {
         goto error;
     }
-    if (calculate_jump_targets(entryblock)) {
+    if (calculate_jump_targets(g->g_entryblock)) {
         goto error;
     }
-    if (optimize_cfg(entryblock, consts, c->c_const_cache)) {
+    if (optimize_cfg(g, consts, c->c_const_cache)) {
         goto error;
     }
-    if (trim_unused_consts(entryblock, consts)) {
+    if (trim_unused_consts(g->g_entryblock, consts)) {
         goto error;
     }
     if (duplicate_exits_without_lineno(g)) {
         return NULL;
     }
-    propagate_line_numbers(entryblock);
-    guarantee_lineno_for_exits(entryblock, c->u->u_firstlineno);
+    propagate_line_numbers(g->g_entryblock);
+    guarantee_lineno_for_exits(g->g_entryblock, c->u->u_firstlineno);
 
-    int maxdepth = stackdepth(entryblock, code_flags);
+    int maxdepth = stackdepth(g->g_entryblock, code_flags);
     if (maxdepth < 0) {
         goto error;
     }
     /* TO DO -- For 3.12, make sure that `maxdepth <= MAX_ALLOWED_STACK_USE` */
 
-    if (label_exception_targets(entryblock)) {
+    if (label_exception_targets(g->g_entryblock)) {
         goto error;
     }
-    convert_exception_handlers_to_nops(entryblock);
+    convert_exception_handlers_to_nops(g->g_entryblock);
 
     if (push_cold_blocks_to_end(g, code_flags) < 0) {
         goto error;
     }
 
-    if (remove_redundant_jumps(entryblock) < 0) {
+    if (remove_redundant_jumps(g) < 0) {
         goto error;
     }
-    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
+    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
         clean_basic_block(b);
     }
 
     /* Order of basic blocks must have been determined by now */
-    normalize_jumps(entryblock);
+    normalize_jumps(g->g_entryblock);
 
-    if (add_checks_for_loads_of_unknown_variables(entryblock, c) < 0) {
+    if (add_checks_for_loads_of_unknown_variables(g->g_entryblock, c) < 0) {
         goto error;
     }
 
     /* Can't modify the bytecode after computing jump offsets. */
-    assemble_jump_offsets(entryblock);
+    assemble_jump_offsets(g->g_entryblock);
 
 
     /* Create assembler */
@@ -8628,7 +8636,7 @@ assemble(struct compiler *c, int addNone)
         goto error;
 
     /* Emit code. */
-    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
+    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
         for (int j = 0; j < b->b_iused; j++)
             if (!assemble_emit(&a, &b->b_instr[j]))
                 goto error;
@@ -8636,13 +8644,13 @@ assemble(struct compiler *c, int addNone)
 
     /* Emit location info */
     a.a_lineno = c->u->u_firstlineno;
-    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
+    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
         for (int j = 0; j < b->b_iused; j++)
             if (!assemble_emit_location(&a, &b->b_instr[j]))
                 goto error;
     }
 
-    if (!assemble_exception_table(&a, entryblock)) {
+    if (!assemble_exception_table(&a, g->g_entryblock)) {
         goto error;
     }
     if (_PyBytes_Resize(&a.a_except_table, a.a_except_table_off) < 0) {
@@ -9352,16 +9360,19 @@ mark_reachable(basicblock *entryblock) {
 }
 
 static void
-eliminate_empty_basic_blocks(basicblock *entryblock) {
+eliminate_empty_basic_blocks(cfg_builder *g) {
     /* Eliminate empty blocks */
-    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
+    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
         basicblock *next = b->b_next;
         while (next && next->b_iused == 0) {
             next = next->b_next;
         }
         b->b_next = next;
     }
-    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
+    while(g->g_entryblock && g->g_entryblock->b_iused == 0) {
+        g->g_entryblock = g->g_entryblock->b_next;
+    }
+    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
         assert(b->b_iused > 0);
         for (int i = 0; i < b->b_iused; i++) {
             struct instr *instr = &b->b_instr[i];
@@ -9467,42 +9478,42 @@ calculate_jump_targets(basicblock *entryblock)
 */
 
 static int
-optimize_cfg(basicblock *entryblock, PyObject *consts, PyObject *const_cache)
+optimize_cfg(cfg_builder *g, PyObject *consts, PyObject *const_cache)
 {
     assert(PyDict_CheckExact(const_cache));
-    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
+    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
         if (normalize_basic_block(b)) {
             return -1;
         }
     }
-    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
+    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
         if (extend_block(b)) {
             return -1;
         }
     }
-    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
+    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
         if (optimize_basic_block(const_cache, b, consts)) {
             return -1;
         }
         clean_basic_block(b);
         assert(b->b_predecessors == 0);
     }
-    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
+    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
         if (extend_block(b)) {
             return -1;
         }
     }
-    if (mark_reachable(entryblock)) {
+    if (mark_reachable(g->g_entryblock)) {
         return -1;
     }
     /* Delete unreachable instructions */
-    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
+    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
        if (b->b_predecessors == 0) {
             b->b_iused = 0;
        }
     }
-    eliminate_empty_basic_blocks(entryblock);
-    for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
+    eliminate_empty_basic_blocks(g);
+    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
         clean_basic_block(b);
     }
     return 0;
@@ -9601,6 +9612,157 @@ duplicate_exits_without_lineno(cfg_builder *g)
 }
 
 
+/* Access to compiler optimizations for unit tests.
+ *
+ * _PyCompile_OptimizeCfg takes an instruction list, constructs
+ * a CFG, optimizes it and converts back to an instruction list.
+ *
+ * An instruction list is a PyList where each item is either
+ * a tuple describing a single instruction:
+ * (opcode, oparg, lineno, end_lineno, col, end_col), or
+ * a jump target label marking the beginning of a basic block.
+ */
+
+static int
+instructions_to_cfg(PyObject *instructions, cfg_builder *g)
+{
+    assert(PyList_Check(instructions));
+
+    Py_ssize_t instr_size = PyList_GET_SIZE(instructions);
+    for (Py_ssize_t i = 0; i < instr_size; i++) {
+        PyObject *item = PyList_GET_ITEM(instructions, i);
+        if (PyLong_Check(item)) {
+            int lbl_id = PyLong_AsLong(item);
+            if (PyErr_Occurred()) {
+                return -1;
+            }
+            if (lbl_id <= 0 || lbl_id > instr_size) {
+                /* expect label in a reasonable range */
+                PyErr_SetString(PyExc_ValueError, "label out of range");
+                return -1;
+            }
+            jump_target_label lbl = {lbl_id};
+            if (cfg_builder_use_label(g, lbl) < 0) {
+                return -1;
+            }
+        }
+        else {
+            if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 6) {
+                PyErr_SetString(PyExc_ValueError, "expected a 6-tuple");
+                return -1;
+            }
+            int opcode = PyLong_AsLong(PyTuple_GET_ITEM(item, 0));
+            if (PyErr_Occurred()) {
+                return -1;
+            }
+            int oparg = PyLong_AsLong(PyTuple_GET_ITEM(item, 1));
+            if (PyErr_Occurred()) {
+                return -1;
+            }
+            struct location loc;
+            loc.lineno = PyLong_AsLong(PyTuple_GET_ITEM(item, 2));
+            if (PyErr_Occurred()) {
+                return -1;
+            }
+            loc.end_lineno = PyLong_AsLong(PyTuple_GET_ITEM(item, 3));
+            if (PyErr_Occurred()) {
+                return -1;
+            }
+            loc.col_offset = PyLong_AsLong(PyTuple_GET_ITEM(item, 4));
+            if (PyErr_Occurred()) {
+                return -1;
+            }
+            loc.end_col_offset = PyLong_AsLong(PyTuple_GET_ITEM(item, 5));
+            if (PyErr_Occurred()) {
+                return -1;
+            }
+            if (!cfg_builder_addop(g, opcode, oparg, loc)) {
+                return -1;
+            }
+        }
+    }
+    return 0;
+}
+
+static PyObject *
+cfg_to_instructions(cfg_builder *g)
+{
+    PyObject *instructions = PyList_New(0);
+    if (instructions == NULL) {
+        return NULL;
+    }
+    int lbl = 1;
+    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
+        b->b_label = lbl++;
+    }
+    for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
+        PyObject *lbl = PyLong_FromLong(b->b_label);
+        if (lbl == NULL) {
+            goto error;
+        }
+        if (PyList_Append(instructions, lbl) != 0) {
+            Py_DECREF(lbl);
+            goto error;
+        }
+        Py_DECREF(lbl);
+        for (int i = 0; i < b->b_iused; i++) {
+            struct instr *instr = &b->b_instr[i];
+            struct location loc = instr->i_loc;
+            int arg = HAS_TARGET(instr->i_opcode) ? instr->i_target->b_label : instr->i_oparg;
+            PyObject *inst_tuple = Py_BuildValue(
+                "(iiiiii)", instr->i_opcode, arg,
+                loc.lineno, loc.end_lineno,
+                loc.col_offset, loc.end_col_offset);
+            if (inst_tuple == NULL) {
+                goto error;
+            }
+
+            if (PyList_Append(instructions, inst_tuple) != 0) {
+                Py_DECREF(inst_tuple);
+                goto error;
+            }
+            Py_DECREF(inst_tuple);
+        }
+    }
+
+    return instructions;
+error:
+    Py_DECREF(instructions);
+    return NULL;
+}
+
+
+PyObject *
+_PyCompile_OptimizeCfg(PyObject *instructions, PyObject *consts)
+{
+    PyObject *res = NULL;
+    PyObject *const_cache = NULL;
+    cfg_builder g;
+    memset(&g, 0, sizeof(cfg_builder));
+    if (cfg_builder_init(&g) < 0) {
+        goto error;
+    }
+    if (instructions_to_cfg(instructions, &g) < 0) {
+        goto error;
+    }
+    const_cache = PyDict_New();
+    if (const_cache == NULL) {
+        goto error;
+    }
+    if (calculate_jump_targets(g.g_entryblock)) {
+        goto error;
+    }
+    if (optimize_cfg(&g, consts, const_cache) < 0) {
+        goto error;
+    }
+    res = cfg_to_instructions(&g);
+error:
+    Py_XDECREF(const_cache);
+    cfg_builder_fini(&g);
+    return res;
+}
+
+
 /* Retained for API compatibility.
  * Optimization is now done in optimize_cfg */