#define _PY_GC_SCHEDULED_BIT (1U << 4)
#define _PY_EVAL_PLEASE_STOP_BIT (1U << 5)
#define _PY_EVAL_EXPLICIT_MERGE_BIT (1U << 6)
+#define _PY_EVAL_JIT_INVALIDATE_COLD_BIT (1U << 7)
/* Reserve a few bits for future use */
#define _PY_EVAL_EVENTS_BITS 8
struct callable_cache callable_cache;
_PyOptimizerObject *optimizer;
_PyExecutorObject *executor_list_head;
-
+ size_t trace_run_counter;
_rare_events rare_events;
PyDict_WatchCallback builtins_dict_watcher;
typedef struct {
uint8_t opcode;
uint8_t oparg;
- uint16_t valid:1;
- uint16_t linked:1;
- uint16_t chain_depth:14; // Must be big engough for MAX_CHAIN_DEPTH - 1.
+ uint8_t valid:1;
+ uint8_t linked:1;
+ uint8_t chain_depth:6; // Must be big enough for MAX_CHAIN_DEPTH - 1.
+ bool warm;
int index; // Index of ENTER_EXECUTOR (if code isn't NULL, below).
_PyBloomFilter bloom;
_PyExecutorLinkListNode links;
#ifdef _Py_TIER2
PyAPI_FUNC(void) _Py_Executors_InvalidateDependency(PyInterpreterState *interp, void *obj, int is_invalidation);
PyAPI_FUNC(void) _Py_Executors_InvalidateAll(PyInterpreterState *interp, int is_invalidation);
+PyAPI_FUNC(void) _Py_Executors_InvalidateCold(PyInterpreterState *interp);
+
#else
# define _Py_Executors_InvalidateDependency(A, B, C) ((void)0)
# define _Py_Executors_InvalidateAll(A, B) ((void)0)
+# define _Py_Executors_InvalidateCold(A) ((void)0)
+
#endif
+// Used as the threshold to trigger executor invalidation when
+// trace_run_counter is greater than this value.
+#define JIT_CLEANUP_THRESHOLD 100000
// This is the length of the trace we project initially.
#define UOP_MAX_TRACE_LENGTH 800
#define _LOAD_SUPER_ATTR_METHOD LOAD_SUPER_ATTR_METHOD
#define _MAKE_CELL MAKE_CELL
#define _MAKE_FUNCTION MAKE_FUNCTION
+#define _MAKE_WARM 439
#define _MAP_ADD MAP_ADD
#define _MATCH_CLASS MATCH_CLASS
#define _MATCH_KEYS MATCH_KEYS
#define _MATCH_MAPPING MATCH_MAPPING
#define _MATCH_SEQUENCE MATCH_SEQUENCE
-#define _MAYBE_EXPAND_METHOD 439
-#define _MONITOR_CALL 440
-#define _MONITOR_JUMP_BACKWARD 441
-#define _MONITOR_RESUME 442
+#define _MAYBE_EXPAND_METHOD 440
+#define _MONITOR_CALL 441
+#define _MONITOR_JUMP_BACKWARD 442
+#define _MONITOR_RESUME 443
#define _NOP NOP
#define _POP_EXCEPT POP_EXCEPT
-#define _POP_JUMP_IF_FALSE 443
-#define _POP_JUMP_IF_TRUE 444
+#define _POP_JUMP_IF_FALSE 444
+#define _POP_JUMP_IF_TRUE 445
#define _POP_TOP POP_TOP
-#define _POP_TOP_LOAD_CONST_INLINE_BORROW 445
+#define _POP_TOP_LOAD_CONST_INLINE_BORROW 446
#define _PUSH_EXC_INFO PUSH_EXC_INFO
-#define _PUSH_FRAME 446
+#define _PUSH_FRAME 447
#define _PUSH_NULL PUSH_NULL
-#define _PY_FRAME_GENERAL 447
-#define _PY_FRAME_KW 448
-#define _QUICKEN_RESUME 449
-#define _REPLACE_WITH_TRUE 450
+#define _PY_FRAME_GENERAL 448
+#define _PY_FRAME_KW 449
+#define _QUICKEN_RESUME 450
+#define _REPLACE_WITH_TRUE 451
#define _RESUME_CHECK RESUME_CHECK
#define _RETURN_GENERATOR RETURN_GENERATOR
#define _RETURN_VALUE RETURN_VALUE
-#define _SAVE_RETURN_OFFSET 451
-#define _SEND 452
-#define _SEND_GEN_FRAME 453
+#define _SAVE_RETURN_OFFSET 452
+#define _SEND 453
+#define _SEND_GEN_FRAME 454
#define _SETUP_ANNOTATIONS SETUP_ANNOTATIONS
#define _SET_ADD SET_ADD
#define _SET_FUNCTION_ATTRIBUTE SET_FUNCTION_ATTRIBUTE
#define _SET_UPDATE SET_UPDATE
-#define _START_EXECUTOR 454
-#define _STORE_ATTR 455
-#define _STORE_ATTR_INSTANCE_VALUE 456
-#define _STORE_ATTR_SLOT 457
-#define _STORE_ATTR_WITH_HINT 458
+#define _START_EXECUTOR 455
+#define _STORE_ATTR 456
+#define _STORE_ATTR_INSTANCE_VALUE 457
+#define _STORE_ATTR_SLOT 458
+#define _STORE_ATTR_WITH_HINT 459
#define _STORE_DEREF STORE_DEREF
-#define _STORE_FAST 459
-#define _STORE_FAST_0 460
-#define _STORE_FAST_1 461
-#define _STORE_FAST_2 462
-#define _STORE_FAST_3 463
-#define _STORE_FAST_4 464
-#define _STORE_FAST_5 465
-#define _STORE_FAST_6 466
-#define _STORE_FAST_7 467
+#define _STORE_FAST 460
+#define _STORE_FAST_0 461
+#define _STORE_FAST_1 462
+#define _STORE_FAST_2 463
+#define _STORE_FAST_3 464
+#define _STORE_FAST_4 465
+#define _STORE_FAST_5 466
+#define _STORE_FAST_6 467
+#define _STORE_FAST_7 468
#define _STORE_FAST_LOAD_FAST STORE_FAST_LOAD_FAST
#define _STORE_FAST_STORE_FAST STORE_FAST_STORE_FAST
#define _STORE_GLOBAL STORE_GLOBAL
#define _STORE_NAME STORE_NAME
-#define _STORE_SLICE 468
-#define _STORE_SUBSCR 469
+#define _STORE_SLICE 469
+#define _STORE_SUBSCR 470
#define _STORE_SUBSCR_DICT STORE_SUBSCR_DICT
#define _STORE_SUBSCR_LIST_INT STORE_SUBSCR_LIST_INT
#define _SWAP SWAP
-#define _TIER2_RESUME_CHECK 470
-#define _TO_BOOL 471
+#define _TIER2_RESUME_CHECK 471
+#define _TO_BOOL 472
#define _TO_BOOL_BOOL TO_BOOL_BOOL
#define _TO_BOOL_INT TO_BOOL_INT
#define _TO_BOOL_LIST TO_BOOL_LIST
#define _UNARY_NEGATIVE UNARY_NEGATIVE
#define _UNARY_NOT UNARY_NOT
#define _UNPACK_EX UNPACK_EX
-#define _UNPACK_SEQUENCE 472
+#define _UNPACK_SEQUENCE 473
#define _UNPACK_SEQUENCE_LIST UNPACK_SEQUENCE_LIST
#define _UNPACK_SEQUENCE_TUPLE UNPACK_SEQUENCE_TUPLE
#define _UNPACK_SEQUENCE_TWO_TUPLE UNPACK_SEQUENCE_TWO_TUPLE
#define _WITH_EXCEPT_START WITH_EXCEPT_START
#define _YIELD_VALUE YIELD_VALUE
#define __DO_CALL_FUNCTION_EX _DO_CALL_FUNCTION_EX
-#define MAX_UOP_ID 472
+#define MAX_UOP_ID 473
#ifdef __cplusplus
}
[_INTERNAL_INCREMENT_OPT_COUNTER] = 0,
[_DYNAMIC_EXIT] = HAS_ESCAPES_FLAG,
[_START_EXECUTOR] = 0,
+ [_MAKE_WARM] = 0,
[_FATAL_ERROR] = 0,
[_CHECK_VALIDITY_AND_SET_IP] = HAS_DEOPT_FLAG,
[_DEOPT] = 0,
[_LOAD_SUPER_ATTR_METHOD] = "_LOAD_SUPER_ATTR_METHOD",
[_MAKE_CELL] = "_MAKE_CELL",
[_MAKE_FUNCTION] = "_MAKE_FUNCTION",
+ [_MAKE_WARM] = "_MAKE_WARM",
[_MAP_ADD] = "_MAP_ADD",
[_MATCH_CLASS] = "_MATCH_CLASS",
[_MATCH_KEYS] = "_MATCH_KEYS",
return 0;
case _START_EXECUTOR:
return 0;
+ case _MAKE_WARM:
+ return 0;
case _FATAL_ERROR:
return 0;
case _CHECK_VALIDITY_AND_SET_IP:
--- /dev/null
+Improved JIT memory consumption by periodically freeing memory used by infrequently-executed code.
+This change is especially likely to improve the memory footprint of long-running programs.
assert(((_PyExecutorObject *)executor)->vm_data.valid);
}
+ tier2 op(_MAKE_WARM, (--)) {
+ current_executor->vm_data.warm = true;
+ // It's okay if this ends up going negative.
+ if (--tstate->interp->trace_run_counter == 0) {
+ _Py_set_eval_breaker_bit(tstate, _PY_EVAL_JIT_INVALIDATE_COLD_BIT);
+ }
+ }
+
tier2 op(_FATAL_ERROR, (--)) {
assert(0);
Py_FatalError("Fatal error uop executed.");
_Py_RunGC(tstate);
}
+ if ((breaker & _PY_EVAL_JIT_INVALIDATE_COLD_BIT) != 0) {
+ _Py_unset_eval_breaker_bit(tstate, _PY_EVAL_JIT_INVALIDATE_COLD_BIT);
+ _Py_Executors_InvalidateCold(tstate->interp);
+ tstate->interp->trace_run_counter = JIT_CLEANUP_THRESHOLD;
+ }
+
/* GIL drop request */
if ((breaker & _PY_GIL_DROP_REQUEST_BIT) != 0) {
/* Give another thread a chance */
break;
}
+ case _MAKE_WARM: {
+ current_executor->vm_data.warm = true;
+ // It's okay if this ends up going negative.
+ if (--tstate->interp->trace_run_counter == 0) {
+ _Py_set_eval_breaker_bit(tstate, _PY_EVAL_JIT_INVALIDATE_COLD_BIT);
+ }
+ break;
+ }
+
case _FATAL_ERROR: {
assert(0);
Py_FatalError("Fatal error uop executed.");
code->co_firstlineno,
2 * INSTR_IP(initial_instr, code));
ADD_TO_TRACE(_START_EXECUTOR, 0, (uintptr_t)instr, INSTR_IP(instr, code));
+ ADD_TO_TRACE(_MAKE_WARM, 0, 0, 0);
uint32_t target = 0;
for (;;) {
executor->jit_code = NULL;
executor->jit_side_entry = NULL;
executor->jit_size = 0;
+ // This is initialized to true so we can prevent the executor
+ // from being immediately detected as cold and invalidated.
+ executor->vm_data.warm = true;
if (_PyJIT_Compile(executor, executor->trace, length)) {
Py_DECREF(executor);
return NULL;
}
}
+void
+_Py_Executors_InvalidateCold(PyInterpreterState *interp)
+{
+ /* Walk the list of executors */
+ /* TO DO -- Use a tree to avoid traversing as many objects */
+ PyObject *invalidate = PyList_New(0);
+ if (invalidate == NULL) {
+ goto error;
+ }
+
+ /* Clearing an executor can deallocate others, so we need to make a list of
+ * executors to invalidate first */
+ for (_PyExecutorObject *exec = interp->executor_list_head; exec != NULL;) {
+ assert(exec->vm_data.valid);
+ _PyExecutorObject *next = exec->vm_data.links.next;
+
+ if (!exec->vm_data.warm && PyList_Append(invalidate, (PyObject *)exec) < 0) {
+ goto error;
+ }
+ else {
+ exec->vm_data.warm = false;
+ }
+
+ exec = next;
+ }
+ for (Py_ssize_t i = 0; i < PyList_GET_SIZE(invalidate); i++) {
+ _PyExecutorObject *exec = (_PyExecutorObject *)PyList_GET_ITEM(invalidate, i);
+ executor_clear(exec);
+ }
+ Py_DECREF(invalidate);
+ return;
+error:
+ PyErr_Clear();
+ Py_XDECREF(invalidate);
+ // If we're truly out of memory, wiping out everything is a fine fallback
+ _Py_Executors_InvalidateAll(interp, 0);
+}
+
#endif /* _Py_TIER2 */
break;
}
+ case _MAKE_WARM: {
+ break;
+ }
+
case _FATAL_ERROR: {
break;
}
#ifdef _Py_TIER2
(void)_Py_SetOptimizer(interp, NULL);
interp->executor_list_head = NULL;
+ interp->trace_run_counter = JIT_CLEANUP_THRESHOLD;
#endif
if (interp != &runtime->_main_interpreter) {
/* Fix the self-referential, statically initialized fields. */
"_PyList_FromStackRefSteal",
"_PyTuple_FromArraySteal",
"_PyTuple_FromStackRefSteal",
+ "_Py_set_eval_breaker_bit"
)
ESCAPING_FUNCTIONS = (
"-fno-plt",
# Don't call stack-smashing canaries that we can't find or patch:
"-fno-stack-protector",
+ # On aarch64 Linux, intrinsics were being emitted and this flag
+ # was required to disable them.
+ "-mno-outline-atomics",
"-std=c11",
*self.args,
]