]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
GH-115685: Optimize `TO_BOOL` and variants based on truthiness of input. (GH-116311)
authorMark Shannon <mark@hotpy.org>
Tue, 5 Mar 2024 11:23:46 +0000 (11:23 +0000)
committerGitHub <noreply@github.com>
Tue, 5 Mar 2024 11:23:46 +0000 (11:23 +0000)
Include/internal/pycore_optimizer.h
Python/optimizer_analysis.c
Python/optimizer_bytecodes.c
Python/optimizer_cases.c.h
Python/optimizer_symbols.c

index d32e6c0174f68084d078f7892db380840756e63c..7c977728a95024ba5985aaa49d4a9f89d04f17b5 100644 (file)
@@ -96,6 +96,7 @@ extern bool _Py_uop_sym_set_non_null(_Py_UopsSymbol *sym);
 extern bool _Py_uop_sym_set_type(_Py_UopsSymbol *sym, PyTypeObject *typ);
 extern bool _Py_uop_sym_set_const(_Py_UopsSymbol *sym, PyObject *const_val);
 extern bool _Py_uop_sym_is_bottom(_Py_UopsSymbol *sym);
+extern int _Py_uop_sym_truthiness(_Py_UopsSymbol *sym);
 
 
 extern int _Py_uop_abstractcontext_init(_Py_UOpsContext *ctx);
index a326e2249bb4dec5965d4292cca16028423af6e1..1e1d5529ee17d7993727466f2500294867652c76 100644 (file)
@@ -298,9 +298,31 @@ remove_globals(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer,
 #define sym_set_type _Py_uop_sym_set_type
 #define sym_set_const _Py_uop_sym_set_const
 #define sym_is_bottom _Py_uop_sym_is_bottom
+#define sym_truthiness _Py_uop_sym_truthiness
 #define frame_new _Py_uop_frame_new
 #define frame_pop _Py_uop_frame_pop
 
+static int
+optimize_to_bool(
+    _PyUOpInstruction *this_instr,
+    _Py_UOpsContext *ctx,
+    _Py_UopsSymbol *value,
+    _Py_UopsSymbol **result_ptr)
+{
+    if (sym_matches_type(value, &PyBool_Type)) {
+        REPLACE_OP(this_instr, _NOP, 0, 0);
+        *result_ptr = value;
+        return 1;
+    }
+    int truthiness = sym_truthiness(value);
+    if (truthiness >= 0) {
+        PyObject *load = truthiness ? Py_True : Py_False;
+        REPLACE_OP(this_instr, _POP_TOP_LOAD_CONST_INLINE_BORROW, 0, (uintptr_t)load);
+        *result_ptr = sym_new_const(ctx, load);
+        return 1;
+    }
+    return 0;
+}
 
 /* 1 for success, 0 for not ready, cannot error at the moment. */
 static int
index 786d884fc5a1a8356eb4fa6c4fa3c25d42b3e4da..2cf54270e4ad3533b32cf36794ad0bb2347b794d 100644 (file)
@@ -29,6 +29,14 @@ typedef struct _Py_UOpsAbstractFrame _Py_UOpsAbstractFrame;
 #define frame_new _Py_uop_frame_new
 #define frame_pop _Py_uop_frame_pop
 
+extern int
+optimize_to_bool(
+    _PyUOpInstruction *this_instr,
+    _Py_UOpsContext *ctx,
+    _Py_UopsSymbol *value,
+    _Py_UopsSymbol **result_ptr);
+
+
 static int
 dummy_func(void) {
 
@@ -271,63 +279,72 @@ dummy_func(void) {
     }
 
     op(_TO_BOOL, (value -- res)) {
-        (void)value;
-        res = sym_new_type(ctx, &PyBool_Type);
-        OUT_OF_SPACE_IF_NULL(res);
+        if (optimize_to_bool(this_instr, ctx, value, &res)) {
+            OUT_OF_SPACE_IF_NULL(res);
+        }
+        else {
+            res = sym_new_type(ctx, &PyBool_Type);
+            OUT_OF_SPACE_IF_NULL(res);
+        }
     }
 
-    op(_TO_BOOL_BOOL, (value -- value)) {
-        if (sym_matches_type(value, &PyBool_Type)) {
-            REPLACE_OP(this_instr, _NOP, 0, 0);
+    op(_TO_BOOL_BOOL, (value -- res)) {
+        if (optimize_to_bool(this_instr, ctx, value, &res)) {
+            OUT_OF_SPACE_IF_NULL(res);
         }
         else {
             if(!sym_set_type(value, &PyBool_Type)) {
                 goto hit_bottom;
             }
+            res = value;
         }
     }
 
     op(_TO_BOOL_INT, (value -- res)) {
-        if (sym_is_const(value) && sym_matches_type(value, &PyLong_Type)) {
-            PyObject *load = _PyLong_IsZero((PyLongObject *)sym_get_const(value))
-                             ? Py_False : Py_True;
-            REPLACE_OP(this_instr, _POP_TOP_LOAD_CONST_INLINE_BORROW, 0, (uintptr_t)load);
-            OUT_OF_SPACE_IF_NULL(res = sym_new_const(ctx, load));
+        if (optimize_to_bool(this_instr, ctx, value, &res)) {
+            OUT_OF_SPACE_IF_NULL(res);
         }
         else {
+            if(!sym_set_type(value, &PyLong_Type)) {
+                goto hit_bottom;
+            }
             OUT_OF_SPACE_IF_NULL(res = sym_new_type(ctx, &PyBool_Type));
         }
-        if(!sym_set_type(value, &PyLong_Type)) {
-            goto hit_bottom;
-        }
     }
 
     op(_TO_BOOL_LIST, (value -- res)) {
-        if(!sym_set_type(value, &PyList_Type)) {
-            goto hit_bottom;
+        if (optimize_to_bool(this_instr, ctx, value, &res)) {
+            OUT_OF_SPACE_IF_NULL(res);
+        }
+        else {
+            if(!sym_set_type(value, &PyList_Type)) {
+                goto hit_bottom;
+            }
+            OUT_OF_SPACE_IF_NULL(res = sym_new_type(ctx, &PyBool_Type));
         }
-        OUT_OF_SPACE_IF_NULL(res = sym_new_type(ctx, &PyBool_Type));
     }
 
     op(_TO_BOOL_NONE, (value -- res)) {
-        if (sym_get_const(value) == Py_None) {
-            REPLACE_OP(this_instr, _POP_TOP_LOAD_CONST_INLINE_BORROW, 0, (uintptr_t)Py_False);
+        if (optimize_to_bool(this_instr, ctx, value, &res)) {
+            OUT_OF_SPACE_IF_NULL(res);
+        }
+        else {
+            if (!sym_set_const(value, Py_None)) {
+                goto hit_bottom;
+            }
+            OUT_OF_SPACE_IF_NULL(res = sym_new_const(ctx, Py_False));
         }
-        sym_set_const(value, Py_None);
-        OUT_OF_SPACE_IF_NULL(res = sym_new_const(ctx, Py_False));
     }
 
     op(_TO_BOOL_STR, (value -- res)) {
-        if (sym_is_const(value) && sym_matches_type(value, &PyUnicode_Type)) {
-            PyObject *load = sym_get_const(value) == &_Py_STR(empty) ? Py_False : Py_True;
-            REPLACE_OP(this_instr, _POP_TOP_LOAD_CONST_INLINE_BORROW, 0, (uintptr_t)load);
-            OUT_OF_SPACE_IF_NULL(res = sym_new_const(ctx, load));
+        if (optimize_to_bool(this_instr, ctx, value, &res)) {
+            OUT_OF_SPACE_IF_NULL(res);
         }
         else {
             OUT_OF_SPACE_IF_NULL(res = sym_new_type(ctx, &PyBool_Type));
-        }
-        if(!sym_set_type(value, &PyUnicode_Type)) {
-            goto hit_bottom;
+            if(!sym_set_type(value, &PyUnicode_Type)) {
+                goto hit_bottom;
+            }
         }
     }
 
index 6d3488f2118589ea5bc27cf0772d56261def7e85..f2c186a0ae138098dc9c3d2e8f5530da6f811dad 100644 (file)
             _Py_UopsSymbol *value;
             _Py_UopsSymbol *res;
             value = stack_pointer[-1];
-            (void)value;
-            res = sym_new_type(ctx, &PyBool_Type);
-            OUT_OF_SPACE_IF_NULL(res);
+            if (optimize_to_bool(this_instr, ctx, value, &res)) {
+                OUT_OF_SPACE_IF_NULL(res);
+            }
+            else {
+                res = sym_new_type(ctx, &PyBool_Type);
+                OUT_OF_SPACE_IF_NULL(res);
+            }
             stack_pointer[-1] = res;
             break;
         }
 
         case _TO_BOOL_BOOL: {
             _Py_UopsSymbol *value;
+            _Py_UopsSymbol *res;
             value = stack_pointer[-1];
-            if (sym_matches_type(value, &PyBool_Type)) {
-                REPLACE_OP(this_instr, _NOP, 0, 0);
+            if (optimize_to_bool(this_instr, ctx, value, &res)) {
+                OUT_OF_SPACE_IF_NULL(res);
             }
             else {
                 if(!sym_set_type(value, &PyBool_Type)) {
                     goto hit_bottom;
                 }
+                res = value;
             }
+            stack_pointer[-1] = res;
             break;
         }
 
             _Py_UopsSymbol *value;
             _Py_UopsSymbol *res;
             value = stack_pointer[-1];
-            if (sym_is_const(value) && sym_matches_type(value, &PyLong_Type)) {
-                PyObject *load = _PyLong_IsZero((PyLongObject *)sym_get_const(value))
-                ? Py_False : Py_True;
-                REPLACE_OP(this_instr, _POP_TOP_LOAD_CONST_INLINE_BORROW, 0, (uintptr_t)load);
-                OUT_OF_SPACE_IF_NULL(res = sym_new_const(ctx, load));
+            if (optimize_to_bool(this_instr, ctx, value, &res)) {
+                OUT_OF_SPACE_IF_NULL(res);
             }
             else {
+                if(!sym_set_type(value, &PyLong_Type)) {
+                    goto hit_bottom;
+                }
                 OUT_OF_SPACE_IF_NULL(res = sym_new_type(ctx, &PyBool_Type));
             }
-            if(!sym_set_type(value, &PyLong_Type)) {
-                goto hit_bottom;
-            }
             stack_pointer[-1] = res;
             break;
         }
             _Py_UopsSymbol *value;
             _Py_UopsSymbol *res;
             value = stack_pointer[-1];
-            if(!sym_set_type(value, &PyList_Type)) {
-                goto hit_bottom;
+            if (optimize_to_bool(this_instr, ctx, value, &res)) {
+                OUT_OF_SPACE_IF_NULL(res);
+            }
+            else {
+                if(!sym_set_type(value, &PyList_Type)) {
+                    goto hit_bottom;
+                }
+                OUT_OF_SPACE_IF_NULL(res = sym_new_type(ctx, &PyBool_Type));
             }
-            OUT_OF_SPACE_IF_NULL(res = sym_new_type(ctx, &PyBool_Type));
             stack_pointer[-1] = res;
             break;
         }
             _Py_UopsSymbol *value;
             _Py_UopsSymbol *res;
             value = stack_pointer[-1];
-            if (sym_get_const(value) == Py_None) {
-                REPLACE_OP(this_instr, _POP_TOP_LOAD_CONST_INLINE_BORROW, 0, (uintptr_t)Py_False);
+            if (optimize_to_bool(this_instr, ctx, value, &res)) {
+                OUT_OF_SPACE_IF_NULL(res);
+            }
+            else {
+                if (!sym_set_const(value, Py_None)) {
+                    goto hit_bottom;
+                }
+                OUT_OF_SPACE_IF_NULL(res = sym_new_const(ctx, Py_False));
             }
-            sym_set_const(value, Py_None);
-            OUT_OF_SPACE_IF_NULL(res = sym_new_const(ctx, Py_False));
             stack_pointer[-1] = res;
             break;
         }
             _Py_UopsSymbol *value;
             _Py_UopsSymbol *res;
             value = stack_pointer[-1];
-            if (sym_is_const(value) && sym_matches_type(value, &PyUnicode_Type)) {
-                PyObject *load = sym_get_const(value) == &_Py_STR(empty) ? Py_False : Py_True;
-                REPLACE_OP(this_instr, _POP_TOP_LOAD_CONST_INLINE_BORROW, 0, (uintptr_t)load);
-                OUT_OF_SPACE_IF_NULL(res = sym_new_const(ctx, load));
+            if (optimize_to_bool(this_instr, ctx, value, &res)) {
+                OUT_OF_SPACE_IF_NULL(res);
             }
             else {
                 OUT_OF_SPACE_IF_NULL(res = sym_new_type(ctx, &PyBool_Type));
-            }
-            if(!sym_set_type(value, &PyUnicode_Type)) {
-                goto hit_bottom;
+                if(!sym_set_type(value, &PyUnicode_Type)) {
+                    goto hit_bottom;
+                }
             }
             stack_pointer[-1] = res;
             break;
index 5c3ec2b5ed1a4c4657168d3c15e231483409dd7e..29fe31a0e9b94c602cea9969c5743713f34c7a9d 100644 (file)
@@ -4,6 +4,7 @@
 #include "cpython/optimizer.h"
 #include "pycore_code.h"
 #include "pycore_frame.h"
+#include "pycore_long.h"
 #include "pycore_optimizer.h"
 
 #include <stdbool.h>
@@ -240,6 +241,40 @@ _Py_uop_sym_matches_type(_Py_UopsSymbol *sym, PyTypeObject *typ)
     return sym->typ == typ;
 }
 
+int
+_Py_uop_sym_truthiness(_Py_UopsSymbol *sym)
+{
+    /* There are some non-constant values for
+     * which `bool(val)` always evaluates to
+     * True or False, such as tuples with known
+     * length, but unknown contents, or bound-methods.
+     * This function will need updating
+     * should we support those values.
+     */
+    if (_Py_uop_sym_is_bottom(sym)) {
+        return -1;
+    }
+    if (!_Py_uop_sym_is_const(sym)) {
+        return -1;
+    }
+    PyObject *value = _Py_uop_sym_get_const(sym);
+    if (value == Py_None) {
+        return 0;
+    }
+    /* Only handle a few known safe types */
+    PyTypeObject *tp = Py_TYPE(value);
+    if (tp == &PyLong_Type) {
+        return !_PyLong_IsZero((PyLongObject *)value);
+    }
+    if (tp == &PyUnicode_Type) {
+        return value != &_Py_STR(empty);
+    }
+    if (tp == &PyBool_Type) {
+        return value == Py_True;
+    }
+    return -1;
+}
+
 // 0 on success, -1 on error.
 _Py_UOpsAbstractFrame *
 _Py_uop_frame_new(
@@ -413,6 +448,7 @@ _Py_uop_symbols_test(PyObject *Py_UNUSED(self), PyObject *Py_UNUSED(ignored))
         goto fail;
     }
     _Py_uop_sym_set_const(sym, val_42);
+    TEST_PREDICATE(_Py_uop_sym_truthiness(sym) == 1, "bool(42) is not True");
     TEST_PREDICATE(!_Py_uop_sym_is_null(sym), "42 is NULL");
     TEST_PREDICATE(_Py_uop_sym_is_not_null(sym), "42 isn't not NULL");
     TEST_PREDICATE(_Py_uop_sym_matches_type(sym, &PyLong_Type), "42 isn't an int");
@@ -436,6 +472,14 @@ _Py_uop_symbols_test(PyObject *Py_UNUSED(self), PyObject *Py_UNUSED(ignored))
     _Py_uop_sym_set_const(sym, val_43);  // Should make it bottom
     TEST_PREDICATE(_Py_uop_sym_is_bottom(sym), "(42 and 43) isn't bottom");
 
+
+    sym = _Py_uop_sym_new_const(ctx, Py_None);
+    TEST_PREDICATE(_Py_uop_sym_truthiness(sym) == 0, "bool(None) is not False");
+    sym = _Py_uop_sym_new_const(ctx, Py_False);
+    TEST_PREDICATE(_Py_uop_sym_truthiness(sym) == 0, "bool(False) is not False");
+    sym = _Py_uop_sym_new_const(ctx, PyLong_FromLong(0));
+    TEST_PREDICATE(_Py_uop_sym_truthiness(sym) == 0, "bool(0) is not False");
+
     _Py_uop_abstractcontext_fini(ctx);
     Py_DECREF(val_42);
     Py_DECREF(val_43);