import unittest
import dis
import io
+from _testinternalcapi import optimize_cfg
_UNSPECIFIED = object()
msg = '(%s,%r) occurs in bytecode:\n%s'
msg = msg % (opname, argval, disassembly)
self.fail(msg)
+
+
+class CfgOptimizationTestCase(unittest.TestCase):
+
+ HAS_ARG = set(dis.hasarg)
+ HAS_TARGET = set(dis.hasjrel + dis.hasjabs + dis.hasexc)
+ HAS_ARG_OR_TARGET = HAS_ARG.union(HAS_TARGET)
+
+ def setUp(self):
+ self.last_label = 0
+
+ def Label(self):
+ self.last_label += 1
+ return self.last_label
+
+ def complete_insts_info(self, insts):
+ # fill in omitted fields in location, and oparg 0 for ops with no arg.
+ instructions = []
+ for item in insts:
+ if isinstance(item, int):
+ instructions.append(item)
+ else:
+ assert isinstance(item, tuple)
+ inst = list(reversed(item))
+ opcode = dis.opmap[inst.pop()]
+ oparg = inst.pop() if opcode in self.HAS_ARG_OR_TARGET else 0
+ loc = inst + [-1] * (4 - len(inst))
+ instructions.append((opcode, oparg, *loc))
+ return instructions
+
+ def normalize_insts(self, insts):
+ """ Map labels to instruction index.
+ Remove labels which are not used as jump targets.
+ """
+ labels_map = {}
+ targets = set()
+ idx = 1
+ for item in insts:
+ assert isinstance(item, (int, tuple))
+ if isinstance(item, tuple):
+ opcode, oparg, *_ = item
+ if dis.opmap.get(opcode, opcode) in self.HAS_TARGET:
+ targets.add(oparg)
+ idx += 1
+ elif isinstance(item, int):
+ assert item not in labels_map, "label reused"
+ labels_map[item] = idx
+
+ res = []
+ for item in insts:
+ if isinstance(item, int) and item in targets:
+ if not res or labels_map[item] != res[-1]:
+ res.append(labels_map[item])
+ elif isinstance(item, tuple):
+ opcode, oparg, *loc = item
+ opcode = dis.opmap.get(opcode, opcode)
+ if opcode in self.HAS_TARGET:
+ arg = labels_map[oparg]
+ else:
+ arg = oparg if opcode in self.HAS_TARGET else None
+ opcode = dis.opname[opcode]
+ res.append((opcode, arg, *loc))
+ return res
+
+ def get_optimized(self, insts, consts):
+ insts = self.complete_insts_info(insts)
+ insts = optimize_cfg(insts, consts)
+ return insts, consts
+
+ def compareInstructions(self, actual_, expected_):
+ # get two lists where each entry is a label or
+ # an instruction tuple. Compare them, while mapping
+ # each actual label to a corresponding expected label
+ # based on their locations.
+
+ self.assertIsInstance(actual_, list)
+ self.assertIsInstance(expected_, list)
+
+ actual = self.normalize_insts(actual_)
+ expected = self.normalize_insts(expected_)
+ self.assertEqual(len(actual), len(expected))
+
+ # compare instructions
+ for act, exp in zip(actual, expected):
+ if isinstance(act, int):
+ self.assertEqual(exp, act)
+ continue
+ self.assertIsInstance(exp, tuple)
+ self.assertIsInstance(act, tuple)
+ # pad exp with -1's (if location info is incomplete)
+ exp += (-1,) * (len(act) - len(exp))
+ self.assertEqual(exp, act)
import textwrap
import unittest
-from test.support.bytecode_helper import BytecodeTestCase
+from test.support.bytecode_helper import BytecodeTestCase, CfgOptimizationTestCase
def compile_pattern_with_fast_locals(pattern):
self.assertNotInBytecode(f, "LOAD_FAST_CHECK")
+class DirectiCfgOptimizerTests(CfgOptimizationTestCase):
+
+ def cfg_optimization_test(self, insts, expected_insts,
+ consts=None, expected_consts=None):
+ if expected_consts is None:
+ expected_consts = consts
+ opt_insts, opt_consts = self.get_optimized(insts, consts)
+ self.compareInstructions(opt_insts, expected_insts)
+ self.assertEqual(opt_consts, expected_consts)
+
+ def test_conditional_jump_forward_non_const_condition(self):
+ insts = [
+ ('LOAD_NAME', 1, 11),
+ ('POP_JUMP_IF_TRUE', lbl := self.Label(), 12),
+ ('LOAD_CONST', 2, 13),
+ lbl,
+ ('LOAD_CONST', 3, 14),
+ ]
+ expected = [
+ ('LOAD_NAME', '1', 11),
+ ('POP_JUMP_IF_TRUE', lbl := self.Label(), 12),
+ ('LOAD_CONST', '2', 13),
+ lbl,
+ ('LOAD_CONST', '3', 14)
+ ]
+ self.cfg_optimization_test(insts, expected, consts=list(range(5)))
+
+ def test_conditional_jump_forward_const_condition(self):
+ # The unreachable branch of the jump is removed
+
+ insts = [
+ ('LOAD_CONST', 3, 11),
+ ('POP_JUMP_IF_TRUE', lbl := self.Label(), 12),
+ ('LOAD_CONST', 2, 13),
+ lbl,
+ ('LOAD_CONST', 3, 14),
+ ]
+ expected = [
+ ('NOP', None, 11),
+ ('JUMP', lbl := self.Label(), 12),
+ lbl,
+ ('LOAD_CONST', '3', 14)
+ ]
+ self.cfg_optimization_test(insts, expected, consts=list(range(5)))
+
+ def test_conditional_jump_backward_non_const_condition(self):
+ insts = [
+ lbl1 := self.Label(),
+ ('LOAD_NAME', 1, 11),
+ ('POP_JUMP_IF_TRUE', lbl1, 12),
+ ('LOAD_CONST', 2, 13),
+ ]
+ expected = [
+ lbl := self.Label(),
+ ('LOAD_NAME', '1', 11),
+ ('POP_JUMP_IF_TRUE', lbl, 12),
+ ('LOAD_CONST', '2', 13)
+ ]
+ self.cfg_optimization_test(insts, expected, consts=list(range(5)))
+
+ def test_conditional_jump_backward_const_condition(self):
+ # The unreachable branch of the jump is removed
+ insts = [
+ lbl1 := self.Label(),
+ ('LOAD_CONST', 1, 11),
+ ('POP_JUMP_IF_TRUE', lbl1, 12),
+ ('LOAD_CONST', 2, 13),
+ ]
+ expected = [
+ lbl := self.Label(),
+ ('NOP', None, 11),
+ ('JUMP', lbl, 12)
+ ]
+ self.cfg_optimization_test(insts, expected, consts=list(range(5)))
+
+
if __name__ == "__main__":
unittest.main()
static int basicblock_next_instr(basicblock *);
+static basicblock *cfg_builder_new_block(cfg_builder *g);
static int cfg_builder_maybe_start_new_block(cfg_builder *g);
static int cfg_builder_addop_i(cfg_builder *g, int opcode, Py_ssize_t oparg, struct location loc);
}
}
+static int
+cfg_builder_init(cfg_builder *g)
+{
+ g->g_block_list = NULL;
+ basicblock *block = cfg_builder_new_block(g);
+ if (block == NULL)
+ return 0;
+ g->g_curblock = g->g_entryblock = block;
+ g->g_current_label = NO_LABEL;
+ return 1;
+}
+
static void
-cfg_builder_free(cfg_builder* g)
+cfg_builder_fini(cfg_builder* g)
{
cfg_builder_check(g);
basicblock *b = g->g_block_list;
static void
compiler_unit_free(struct compiler_unit *u)
{
- cfg_builder_free(&u->u_cfg_builder);
+ cfg_builder_fini(&u->u_cfg_builder);
Py_CLEAR(u->u_ste);
Py_CLEAR(u->u_name);
Py_CLEAR(u->u_qualname);
int scope_type, void *key, int lineno)
{
struct compiler_unit *u;
- basicblock *block;
u = (struct compiler_unit *)PyObject_Calloc(1, sizeof(
struct compiler_unit));
c->c_nestlevel++;
cfg_builder *g = CFG_BUILDER(c);
- g->g_block_list = NULL;
- block = cfg_builder_new_block(g);
- if (block == NULL)
+ if (!cfg_builder_init(g)) {
return 0;
- g->g_curblock = g->g_entryblock = block;
- g->g_current_label = NO_LABEL;
+ }
if (u->u_scope_type == COMPILER_SCOPE_MODULE) {
c->u->u_loc.lineno = 0;
sprintf(arg, "arg: %d ", i->i_oparg);
}
if (HAS_TARGET(i->i_opcode)) {
- sprintf(arg, "target: %p ", i->i_target);
+ sprintf(arg, "target: %p [%d] ", i->i_target, i->i_oparg);
}
fprintf(stderr, "line: %d, opcode: %d %s%s%s\n",
i->i_loc.lineno, i->i_opcode, arg, jabs, jrel);
calculate_jump_targets(basicblock *entryblock);
static int
-optimize_cfg(basicblock *entryblock, PyObject *consts, PyObject *const_cache);
+optimize_cfg(cfg_builder *g, PyObject *consts, PyObject *const_cache);
static int
trim_unused_consts(basicblock *entryblock, PyObject *consts);
propagate_line_numbers(basicblock *entryblock);
static void
-eliminate_empty_basic_blocks(basicblock *entryblock);
+eliminate_empty_basic_blocks(cfg_builder *g);
static int
-remove_redundant_jumps(basicblock *entryblock) {
+remove_redundant_jumps(cfg_builder *g) {
/* If a non-empty block ends with a jump instruction, check if the next
* non-empty block reached through normal flow control is the target
* of that jump. If it is, then the jump instruction is redundant and
* can be deleted.
*/
int removed = 0;
- for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
+ for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
if (b->b_iused > 0) {
struct instr *b_last_instr = &b->b_instr[b->b_iused - 1];
assert(!IS_ASSEMBLER_OPCODE(b_last_instr->i_opcode));
}
}
if (removed) {
- eliminate_empty_basic_blocks(entryblock);
+ eliminate_empty_basic_blocks(g);
}
return 0;
}
}
cfg_builder *g = CFG_BUILDER(c);
- basicblock *entryblock = g->g_entryblock;
- assert(entryblock != NULL);
+ assert(g->g_entryblock != NULL);
/* Set firstlineno if it wasn't explicitly set. */
if (!c->u->u_firstlineno) {
- if (entryblock->b_instr && entryblock->b_instr->i_loc.lineno) {
- c->u->u_firstlineno = entryblock->b_instr->i_loc.lineno;
+ if (g->g_entryblock->b_instr && g->g_entryblock->b_instr->i_loc.lineno) {
+ c->u->u_firstlineno = g->g_entryblock->b_instr->i_loc.lineno;
}
else {
c->u->u_firstlineno = 1;
}
// This must be called before fix_cell_offsets().
- if (insert_prefix_instructions(c, entryblock, cellfixedoffsets, nfreevars, code_flags)) {
+ if (insert_prefix_instructions(c, g->g_entryblock, cellfixedoffsets, nfreevars, code_flags)) {
goto error;
}
- int numdropped = fix_cell_offsets(c, entryblock, cellfixedoffsets);
+ int numdropped = fix_cell_offsets(c, g->g_entryblock, cellfixedoffsets);
PyMem_Free(cellfixedoffsets); // At this point we're done with it.
cellfixedoffsets = NULL;
if (numdropped < 0) {
if (consts == NULL) {
goto error;
}
- if (calculate_jump_targets(entryblock)) {
+ if (calculate_jump_targets(g->g_entryblock)) {
goto error;
}
- if (optimize_cfg(entryblock, consts, c->c_const_cache)) {
+ if (optimize_cfg(g, consts, c->c_const_cache)) {
goto error;
}
- if (trim_unused_consts(entryblock, consts)) {
+ if (trim_unused_consts(g->g_entryblock, consts)) {
goto error;
}
if (duplicate_exits_without_lineno(g)) {
return NULL;
}
- propagate_line_numbers(entryblock);
- guarantee_lineno_for_exits(entryblock, c->u->u_firstlineno);
+ propagate_line_numbers(g->g_entryblock);
+ guarantee_lineno_for_exits(g->g_entryblock, c->u->u_firstlineno);
- int maxdepth = stackdepth(entryblock, code_flags);
+ int maxdepth = stackdepth(g->g_entryblock, code_flags);
if (maxdepth < 0) {
goto error;
}
/* TO DO -- For 3.12, make sure that `maxdepth <= MAX_ALLOWED_STACK_USE` */
- if (label_exception_targets(entryblock)) {
+ if (label_exception_targets(g->g_entryblock)) {
goto error;
}
- convert_exception_handlers_to_nops(entryblock);
+ convert_exception_handlers_to_nops(g->g_entryblock);
if (push_cold_blocks_to_end(g, code_flags) < 0) {
goto error;
}
- if (remove_redundant_jumps(entryblock) < 0) {
+ if (remove_redundant_jumps(g) < 0) {
goto error;
}
- for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
+ for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
clean_basic_block(b);
}
/* Order of basic blocks must have been determined by now */
- normalize_jumps(entryblock);
+ normalize_jumps(g->g_entryblock);
- if (add_checks_for_loads_of_unknown_variables(entryblock, c) < 0) {
+ if (add_checks_for_loads_of_unknown_variables(g->g_entryblock, c) < 0) {
goto error;
}
/* Can't modify the bytecode after computing jump offsets. */
- assemble_jump_offsets(entryblock);
+ assemble_jump_offsets(g->g_entryblock);
/* Create assembler */
goto error;
/* Emit code. */
- for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
+ for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
for (int j = 0; j < b->b_iused; j++)
if (!assemble_emit(&a, &b->b_instr[j]))
goto error;
/* Emit location info */
a.a_lineno = c->u->u_firstlineno;
- for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
+ for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
for (int j = 0; j < b->b_iused; j++)
if (!assemble_emit_location(&a, &b->b_instr[j]))
goto error;
}
- if (!assemble_exception_table(&a, entryblock)) {
+ if (!assemble_exception_table(&a, g->g_entryblock)) {
goto error;
}
if (_PyBytes_Resize(&a.a_except_table, a.a_except_table_off) < 0) {
}
static void
-eliminate_empty_basic_blocks(basicblock *entryblock) {
+eliminate_empty_basic_blocks(cfg_builder *g) {
/* Eliminate empty blocks */
- for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
+ for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
basicblock *next = b->b_next;
while (next && next->b_iused == 0) {
next = next->b_next;
}
b->b_next = next;
}
- for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
+ while(g->g_entryblock && g->g_entryblock->b_iused == 0) {
+ g->g_entryblock = g->g_entryblock->b_next;
+ }
+ for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
assert(b->b_iused > 0);
for (int i = 0; i < b->b_iused; i++) {
struct instr *instr = &b->b_instr[i];
*/
static int
-optimize_cfg(basicblock *entryblock, PyObject *consts, PyObject *const_cache)
+optimize_cfg(cfg_builder *g, PyObject *consts, PyObject *const_cache)
{
assert(PyDict_CheckExact(const_cache));
- for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
+ for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
if (normalize_basic_block(b)) {
return -1;
}
}
- for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
+ for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
if (extend_block(b)) {
return -1;
}
}
- for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
+ for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
if (optimize_basic_block(const_cache, b, consts)) {
return -1;
}
clean_basic_block(b);
assert(b->b_predecessors == 0);
}
- for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
+ for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
if (extend_block(b)) {
return -1;
}
}
- if (mark_reachable(entryblock)) {
+ if (mark_reachable(g->g_entryblock)) {
return -1;
}
/* Delete unreachable instructions */
- for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
+ for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
if (b->b_predecessors == 0) {
b->b_iused = 0;
}
}
- eliminate_empty_basic_blocks(entryblock);
- for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
+ eliminate_empty_basic_blocks(g);
+ for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
clean_basic_block(b);
}
return 0;
}
+/* Access to compiler optimizations for unit tests.
+ *
+ * _PyCompile_OptimizeCfg takes an instruction list, constructs
+ * a CFG, optimizes it and converts back to an instruction list.
+ *
+ * An instruction list is a PyList where each item is either
+ * a tuple describing a single instruction:
+ * (opcode, oparg, lineno, end_lineno, col, end_col), or
+ * a jump target label marking the beginning of a basic block.
+ */
+
+static int
+instructions_to_cfg(PyObject *instructions, cfg_builder *g)
+{
+ assert(PyList_Check(instructions));
+
+ Py_ssize_t instr_size = PyList_GET_SIZE(instructions);
+ for (Py_ssize_t i = 0; i < instr_size; i++) {
+ PyObject *item = PyList_GET_ITEM(instructions, i);
+ if (PyLong_Check(item)) {
+ int lbl_id = PyLong_AsLong(item);
+ if (PyErr_Occurred()) {
+ return -1;
+ }
+ if (lbl_id <= 0 || lbl_id > instr_size) {
+ /* expect label in a reasonable range */
+ PyErr_SetString(PyExc_ValueError, "label out of range");
+ return -1;
+ }
+ jump_target_label lbl = {lbl_id};
+ if (cfg_builder_use_label(g, lbl) < 0) {
+ return -1;
+ }
+ }
+ else {
+ if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 6) {
+ PyErr_SetString(PyExc_ValueError, "expected a 6-tuple");
+ return -1;
+ }
+ int opcode = PyLong_AsLong(PyTuple_GET_ITEM(item, 0));
+ if (PyErr_Occurred()) {
+ return -1;
+ }
+ int oparg = PyLong_AsLong(PyTuple_GET_ITEM(item, 1));
+ if (PyErr_Occurred()) {
+ return -1;
+ }
+ struct location loc;
+ loc.lineno = PyLong_AsLong(PyTuple_GET_ITEM(item, 2));
+ if (PyErr_Occurred()) {
+ return -1;
+ }
+ loc.end_lineno = PyLong_AsLong(PyTuple_GET_ITEM(item, 3));
+ if (PyErr_Occurred()) {
+ return -1;
+ }
+ loc.col_offset = PyLong_AsLong(PyTuple_GET_ITEM(item, 4));
+ if (PyErr_Occurred()) {
+ return -1;
+ }
+ loc.end_col_offset = PyLong_AsLong(PyTuple_GET_ITEM(item, 5));
+ if (PyErr_Occurred()) {
+ return -1;
+ }
+ if (!cfg_builder_addop(g, opcode, oparg, loc)) {
+ return -1;
+ }
+ }
+ }
+ return 0;
+}
+
+static PyObject *
+cfg_to_instructions(cfg_builder *g)
+{
+ PyObject *instructions = PyList_New(0);
+ if (instructions == NULL) {
+ return NULL;
+ }
+ int lbl = 1;
+ for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
+ b->b_label = lbl++;
+ }
+ for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
+ PyObject *lbl = PyLong_FromLong(b->b_label);
+ if (lbl == NULL) {
+ goto error;
+ }
+ if (PyList_Append(instructions, lbl) != 0) {
+ Py_DECREF(lbl);
+ goto error;
+ }
+ Py_DECREF(lbl);
+ for (int i = 0; i < b->b_iused; i++) {
+ struct instr *instr = &b->b_instr[i];
+ struct location loc = instr->i_loc;
+ int arg = HAS_TARGET(instr->i_opcode) ? instr->i_target->b_label : instr->i_oparg;
+ PyObject *inst_tuple = Py_BuildValue(
+ "(iiiiii)", instr->i_opcode, arg,
+ loc.lineno, loc.end_lineno,
+ loc.col_offset, loc.end_col_offset);
+ if (inst_tuple == NULL) {
+ goto error;
+ }
+
+ if (PyList_Append(instructions, inst_tuple) != 0) {
+ Py_DECREF(inst_tuple);
+ goto error;
+ }
+ Py_DECREF(inst_tuple);
+ }
+ }
+
+ return instructions;
+error:
+ Py_DECREF(instructions);
+ return NULL;
+}
+
+
+PyObject *
+_PyCompile_OptimizeCfg(PyObject *instructions, PyObject *consts)
+{
+ PyObject *res = NULL;
+ PyObject *const_cache = NULL;
+ cfg_builder g;
+ memset(&g, 0, sizeof(cfg_builder));
+ if (cfg_builder_init(&g) < 0) {
+ goto error;
+ }
+ if (instructions_to_cfg(instructions, &g) < 0) {
+ goto error;
+ }
+ const_cache = PyDict_New();
+ if (const_cache == NULL) {
+ goto error;
+ }
+ if (calculate_jump_targets(g.g_entryblock)) {
+ goto error;
+ }
+ if (optimize_cfg(&g, consts, const_cache) < 0) {
+ goto error;
+ }
+ res = cfg_to_instructions(&g);
+error:
+ Py_XDECREF(const_cache);
+ cfg_builder_fini(&g);
+ return res;
+}
+
+
/* Retained for API compatibility.
* Optimization is now done in optimize_cfg */