Make _PyUnicode_FromId() function compatible with subinterpreters.
Each interpreter now has an array of identifier objects (interned
strings decoded from UTF-8).
* Add PyInterpreterState.unicode.identifiers: array of identifiers
objects.
* Add _PyRuntimeState.unicode_ids used to allocate unique indexes
to _Py_Identifier.
* Rewrite the _Py_Identifier structure.
Microbenchmark on _PyUnicode_FromId(&PyId_a) with _Py_IDENTIFIER(a):
[ref] 2.42 ns +- 0.00 ns -> [atomic] 3.39 ns +- 0.00 ns: 1.40x slower
This change adds 1 ns per _PyUnicode_FromId() call in average.
_PyObject_{Get,Set,Has}AttrId are __getattr__ versions using _Py_Identifier*.
*/
typedef struct _Py_Identifier {
- struct _Py_Identifier *next;
const char* string;
- PyObject *object;
+ // Index in PyInterpreterState.unicode.ids.array. It is process-wide
+ // unique and must be initialized to -1.
+ Py_ssize_t index;
} _Py_Identifier;
-#define _Py_static_string_init(value) { .next = NULL, .string = value, .object = NULL }
+#define _Py_static_string_init(value) { .string = value, .index = -1 }
#define _Py_static_string(varname, value) static _Py_Identifier varname = _Py_static_string_init(value)
#define _Py_IDENTIFIER(varname) _Py_static_string(PyId_##varname, #varname)
PyBytesObject *characters[256];
};
+struct _Py_unicode_ids {
+ Py_ssize_t size;
+ PyObject **array;
+};
+
struct _Py_unicode_state {
// The empty Unicode object is a singleton to improve performance.
PyObject *empty_string;
shared as well. */
PyObject *latin1[256];
struct _Py_unicode_fs_codec fs_codec;
+ // Unicode identifiers (_Py_Identifier): see _PyUnicode_FromId()
+ struct _Py_unicode_ids ids;
};
struct _Py_float_state {
void *userData;
} _Py_AuditHookEntry;
+struct _Py_unicode_runtime_ids {
+ PyThread_type_lock lock;
+ Py_ssize_t next_index;
+};
+
/* Full Python runtime state */
typedef struct pyruntimestate {
void *open_code_userdata;
_Py_AuditHookEntry *audit_hook_head;
+ struct _Py_unicode_runtime_ids unicode_ids;
+
// XXX Consolidate globals found via the check-c-globals script.
} _PyRuntimeState;
--- /dev/null
+Make :c:func:`_PyUnicode_FromId` function compatible with subinterpreters.
+Each interpreter now has an array of identifier objects (interned strings
+decoded from UTF-8). Patch by Victor Stinner.
#define PY_SSIZE_T_CLEAN
#include "Python.h"
#include "pycore_abstract.h" // _PyIndex_Check()
+#include "pycore_atomic_funcs.h" // _Py_atomic_size_get()
#include "pycore_bytes_methods.h" // _Py_bytes_lower()
#include "pycore_format.h" // F_LJUST
#include "pycore_initconfig.h" // _PyStatus_OK()
_Py_error_handler error_handler, const char *errors,
Py_ssize_t *consumed);
-/* List of static strings. */
-static _Py_Identifier *static_strings = NULL;
-
/* Fast detection of the most frequent whitespace characters */
const unsigned char _Py_ascii_whitespace[] = {
0, 0, 0, 0, 0, 0, 0, 0,
return PyUnicode_DecodeUTF8Stateful(u, (Py_ssize_t)size, NULL, NULL);
}
+
PyObject *
_PyUnicode_FromId(_Py_Identifier *id)
{
- if (id->object) {
- return id->object;
+ PyInterpreterState *interp = _PyInterpreterState_GET();
+ struct _Py_unicode_ids *ids = &interp->unicode.ids;
+
+ int index = _Py_atomic_size_get(&id->index);
+ if (index < 0) {
+ struct _Py_unicode_runtime_ids *rt_ids = &interp->runtime->unicode_ids;
+
+ PyThread_acquire_lock(rt_ids->lock, WAIT_LOCK);
+ // Check again to detect concurrent access. Another thread can have
+ // initialized the index while this thread waited for the lock.
+ index = _Py_atomic_size_get(&id->index);
+ if (index < 0) {
+ assert(rt_ids->next_index < PY_SSIZE_T_MAX);
+ index = rt_ids->next_index;
+ rt_ids->next_index++;
+ _Py_atomic_size_set(&id->index, index);
+ }
+ PyThread_release_lock(rt_ids->lock);
}
+ assert(index >= 0);
PyObject *obj;
- obj = PyUnicode_DecodeUTF8Stateful(id->string,
- strlen(id->string),
+ if (index < ids->size) {
+ obj = ids->array[index];
+ if (obj) {
+ // Return a borrowed reference
+ return obj;
+ }
+ }
+
+ obj = PyUnicode_DecodeUTF8Stateful(id->string, strlen(id->string),
NULL, NULL);
if (!obj) {
return NULL;
}
PyUnicode_InternInPlace(&obj);
- assert(!id->next);
- id->object = obj;
- id->next = static_strings;
- static_strings = id;
- return id->object;
+ if (index >= ids->size) {
+ // Overallocate to reduce the number of realloc
+ Py_ssize_t new_size = Py_MAX(index * 2, 16);
+ Py_ssize_t item_size = sizeof(ids->array[0]);
+ PyObject **new_array = PyMem_Realloc(ids->array, new_size * item_size);
+ if (new_array == NULL) {
+ PyErr_NoMemory();
+ return NULL;
+ }
+ memset(&new_array[ids->size], 0, (new_size - ids->size) * item_size);
+ ids->array = new_array;
+ ids->size = new_size;
+ }
+
+ // The array stores a strong reference
+ ids->array[index] = obj;
+
+ // Return a borrowed reference
+ return obj;
}
+
static void
-unicode_clear_static_strings(void)
+unicode_clear_identifiers(PyThreadState *tstate)
{
- _Py_Identifier *tmp, *s = static_strings;
- while (s) {
- Py_CLEAR(s->object);
- tmp = s->next;
- s->next = NULL;
- s = tmp;
+ PyInterpreterState *interp = _PyInterpreterState_GET();
+ struct _Py_unicode_ids *ids = &interp->unicode.ids;
+ for (Py_ssize_t i=0; i < ids->size; i++) {
+ Py_XDECREF(ids->array[i]);
}
- static_strings = NULL;
+ ids->size = 0;
+ PyMem_Free(ids->array);
+ ids->array = NULL;
+ // Don't reset _PyRuntime next_index: _Py_Identifier.id remains valid
+ // after Py_Finalize().
}
+
/* Internal function, doesn't check maximum character */
PyObject*
Py_CLEAR(state->latin1[i]);
}
- if (_Py_IsMainInterpreter(tstate)) {
- unicode_clear_static_strings();
- }
+ unicode_clear_identifiers(tstate);
_PyUnicode_FiniEncodings(&tstate->interp->unicode.fs_codec);
}
runtime->interpreters.mutex = PyThread_allocate_lock();
if (runtime->interpreters.mutex == NULL) {
- return _PyStatus_ERR("Can't initialize threads for interpreter");
+ return _PyStatus_NO_MEMORY();
}
runtime->interpreters.next_id = -1;
runtime->xidregistry.mutex = PyThread_allocate_lock();
if (runtime->xidregistry.mutex == NULL) {
- return _PyStatus_ERR("Can't initialize threads for cross-interpreter data registry");
+ return _PyStatus_NO_MEMORY();
}
// Set it to the ID of the main thread of the main interpreter.
runtime->main_thread = PyThread_get_thread_ident();
+ runtime->unicode_ids.lock = PyThread_allocate_lock();
+ if (runtime->unicode_ids.lock == NULL) {
+ return _PyStatus_NO_MEMORY();
+ }
+ runtime->unicode_ids.next_index = 0;
+
return _PyStatus_OK();
}
/* Force the allocator used by _PyRuntimeState_Init(). */
PyMemAllocatorEx old_alloc;
_PyMem_SetDefaultAllocator(PYMEM_DOMAIN_RAW, &old_alloc);
-
- if (runtime->interpreters.mutex != NULL) {
- PyThread_free_lock(runtime->interpreters.mutex);
- runtime->interpreters.mutex = NULL;
+#define FREE_LOCK(LOCK) \
+ if (LOCK != NULL) { \
+ PyThread_free_lock(LOCK); \
+ LOCK = NULL; \
}
- if (runtime->xidregistry.mutex != NULL) {
- PyThread_free_lock(runtime->xidregistry.mutex);
- runtime->xidregistry.mutex = NULL;
- }
+ FREE_LOCK(runtime->interpreters.mutex);
+ FREE_LOCK(runtime->xidregistry.mutex);
+ FREE_LOCK(runtime->unicode_ids.lock);
+#undef FREE_LOCK
PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &old_alloc);
}
int reinit_interp = _PyThread_at_fork_reinit(&runtime->interpreters.mutex);
int reinit_main_id = _PyThread_at_fork_reinit(&runtime->interpreters.main->id_mutex);
int reinit_xidregistry = _PyThread_at_fork_reinit(&runtime->xidregistry.mutex);
+ int reinit_unicode_ids = _PyThread_at_fork_reinit(&runtime->unicode_ids.lock);
PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &old_alloc);
if (reinit_interp < 0
|| reinit_main_id < 0
- || reinit_xidregistry < 0)
+ || reinit_xidregistry < 0
+ || reinit_unicode_ids < 0)
{
return _PyStatus_ERR("Failed to reinitialize runtime locks");