# define _PyUnicode_CHECK(op) PyUnicode_Check(op)
#endif
-#define _PyUnicode_UTF8(op) \
- (_PyCompactUnicodeObject_CAST(op)->utf8)
-#define PyUnicode_UTF8(op) \
- (assert(_PyUnicode_CHECK(op)), \
- PyUnicode_IS_COMPACT_ASCII(op) ? \
- ((char*)(_PyASCIIObject_CAST(op) + 1)) : \
- _PyUnicode_UTF8(op))
-#define _PyUnicode_UTF8_LENGTH(op) \
- (_PyCompactUnicodeObject_CAST(op)->utf8_length)
-#define PyUnicode_UTF8_LENGTH(op) \
- (assert(_PyUnicode_CHECK(op)), \
- PyUnicode_IS_COMPACT_ASCII(op) ? \
- _PyASCIIObject_CAST(op)->length : \
- _PyUnicode_UTF8_LENGTH(op))
+static inline char* _PyUnicode_UTF8(PyObject *op)
+{
+ return FT_ATOMIC_LOAD_PTR_ACQUIRE(_PyCompactUnicodeObject_CAST(op)->utf8);
+}
+
+static inline char* PyUnicode_UTF8(PyObject *op)
+{
+ assert(_PyUnicode_CHECK(op));
+ if (PyUnicode_IS_COMPACT_ASCII(op)) {
+ return ((char*)(_PyASCIIObject_CAST(op) + 1));
+ }
+ else {
+ return _PyUnicode_UTF8(op);
+ }
+}
+
+static inline void PyUnicode_SET_UTF8(PyObject *op, char *utf8)
+{
+ FT_ATOMIC_STORE_PTR_RELEASE(_PyCompactUnicodeObject_CAST(op)->utf8, utf8);
+}
+
+static inline Py_ssize_t PyUnicode_UTF8_LENGTH(PyObject *op)
+{
+ assert(_PyUnicode_CHECK(op));
+ if (PyUnicode_IS_COMPACT_ASCII(op)) {
+ return _PyASCIIObject_CAST(op)->length;
+ }
+ else {
+ return _PyCompactUnicodeObject_CAST(op)->utf8_length;
+ }
+}
+
+static inline void PyUnicode_SET_UTF8_LENGTH(PyObject *op, Py_ssize_t length)
+{
+ _PyCompactUnicodeObject_CAST(op)->utf8_length = length;
+}
#define _PyUnicode_LENGTH(op) \
(_PyASCIIObject_CAST(op)->length)
(_PyASCIIObject_CAST(op)->state)
#define _PyUnicode_HASH(op) \
(_PyASCIIObject_CAST(op)->hash)
-#define _PyUnicode_KIND(op) \
- (assert(_PyUnicode_CHECK(op)), \
- _PyASCIIObject_CAST(op)->state.kind)
-#define _PyUnicode_GET_LENGTH(op) \
- (assert(_PyUnicode_CHECK(op)), \
- _PyASCIIObject_CAST(op)->length)
+
+static inline Py_hash_t PyUnicode_HASH(PyObject *op)
+{
+ assert(_PyUnicode_CHECK(op));
+ return FT_ATOMIC_LOAD_SSIZE_RELAXED(_PyASCIIObject_CAST(op)->hash);
+}
+
+static inline void PyUnicode_SET_HASH(PyObject *op, Py_hash_t hash)
+{
+ FT_ATOMIC_STORE_SSIZE_RELAXED(_PyASCIIObject_CAST(op)->hash, hash);
+}
+
#define _PyUnicode_DATA_ANY(op) \
(_PyUnicodeObject_CAST(op)->data.any)
-#define _PyUnicode_SHARE_UTF8(op) \
- (assert(_PyUnicode_CHECK(op)), \
- assert(!PyUnicode_IS_COMPACT_ASCII(op)), \
- (_PyUnicode_UTF8(op) == PyUnicode_DATA(op)))
+static inline int _PyUnicode_SHARE_UTF8(PyObject *op)
+{
+ assert(_PyUnicode_CHECK(op));
+ assert(!PyUnicode_IS_COMPACT_ASCII(op));
+ return (_PyUnicode_UTF8(op) == PyUnicode_DATA(op));
+}
/* true if the Unicode object has an allocated UTF-8 memory block
(not shared with other data) */
-#define _PyUnicode_HAS_UTF8_MEMORY(op) \
- ((!PyUnicode_IS_COMPACT_ASCII(op) \
- && _PyUnicode_UTF8(op) \
- && _PyUnicode_UTF8(op) != PyUnicode_DATA(op)))
+static inline int _PyUnicode_HAS_UTF8_MEMORY(PyObject *op)
+{
+ return (!PyUnicode_IS_COMPACT_ASCII(op)
+ && _PyUnicode_UTF8(op) != NULL
+ && _PyUnicode_UTF8(op) != PyUnicode_DATA(op));
+}
+
/* Generic helper macro to convert characters of different types.
from_type and to_type have to be valid type names, begin and end
|| kind == PyUnicode_2BYTE_KIND
|| kind == PyUnicode_4BYTE_KIND);
CHECK(ascii->state.ascii == 0);
- CHECK(compact->utf8 != data);
+ CHECK(_PyUnicode_UTF8(op) != data);
}
else {
PyUnicodeObject *unicode = _PyUnicodeObject_CAST(op);
CHECK(ascii->state.compact == 0);
CHECK(data != NULL);
if (ascii->state.ascii) {
- CHECK(compact->utf8 == data);
+ CHECK(_PyUnicode_UTF8(op) == data);
CHECK(compact->utf8_length == ascii->length);
}
else {
- CHECK(compact->utf8 != data);
+ CHECK(_PyUnicode_UTF8(op) != data);
}
}
-
- if (compact->utf8 == NULL)
+#ifndef Py_GIL_DISABLED
+ if (_PyUnicode_UTF8(op) == NULL)
CHECK(compact->utf8_length == 0);
+#endif
}
/* check that the best kind is used: O(n) operation */
if (_PyUnicode_HAS_UTF8_MEMORY(unicode)) {
PyMem_Free(_PyUnicode_UTF8(unicode));
- _PyUnicode_UTF8(unicode) = NULL;
- _PyUnicode_UTF8_LENGTH(unicode) = 0;
+ PyUnicode_SET_UTF8_LENGTH(unicode, 0);
+ PyUnicode_SET_UTF8(unicode, NULL);
}
#ifdef Py_TRACE_REFS
_Py_ForgetReference(unicode);
if (!share_utf8 && _PyUnicode_HAS_UTF8_MEMORY(unicode))
{
PyMem_Free(_PyUnicode_UTF8(unicode));
- _PyUnicode_UTF8(unicode) = NULL;
- _PyUnicode_UTF8_LENGTH(unicode) = 0;
+ PyUnicode_SET_UTF8_LENGTH(unicode, 0);
+ PyUnicode_SET_UTF8(unicode, NULL);
}
data = (PyObject *)PyObject_Realloc(data, new_size);
}
_PyUnicode_DATA_ANY(unicode) = data;
if (share_utf8) {
- _PyUnicode_UTF8(unicode) = data;
- _PyUnicode_UTF8_LENGTH(unicode) = length;
+ PyUnicode_SET_UTF8_LENGTH(unicode, length);
+ PyUnicode_SET_UTF8(unicode, data);
}
_PyUnicode_LENGTH(unicode) = length;
PyUnicode_WRITE(PyUnicode_KIND(unicode), data, length, 0);
assert(unicode != NULL);
assert(_PyUnicode_CHECK(unicode));
- assert(_PyUnicode_KIND(unicode) == PyUnicode_4BYTE_KIND);
+ assert(PyUnicode_KIND(unicode) == PyUnicode_4BYTE_KIND);
ucs4_out = PyUnicode_4BYTE_DATA(unicode);
for (iter = begin; iter < end; ) {
assert(ucs4_out < (PyUnicode_4BYTE_DATA(unicode) +
- _PyUnicode_GET_LENGTH(unicode)));
+ PyUnicode_GET_LENGTH(unicode)));
if (Py_UNICODE_IS_HIGH_SURROGATE(iter[0])
&& (iter+1) < end
&& Py_UNICODE_IS_LOW_SURROGATE(iter[1]))
}
}
assert(ucs4_out == (PyUnicode_4BYTE_DATA(unicode) +
- _PyUnicode_GET_LENGTH(unicode)));
+ PyUnicode_GET_LENGTH(unicode)));
}
#endif
assert(_PyUnicode_CHECK(unicode));
if (Py_REFCNT(unicode) != 1)
return 0;
- if (FT_ATOMIC_LOAD_SSIZE_RELAXED(_PyUnicode_HASH(unicode)) != -1)
+ if (PyUnicode_HASH(unicode) != -1)
return 0;
if (PyUnicode_CHECK_INTERNED(unicode))
return 0;
static int unicode_fill_utf8(PyObject *unicode);
+
+static int
+unicode_ensure_utf8(PyObject *unicode)
+{
+ int err = 0;
+ if (PyUnicode_UTF8(unicode) == NULL) {
+ Py_BEGIN_CRITICAL_SECTION(unicode);
+ if (PyUnicode_UTF8(unicode) == NULL) {
+ err = unicode_fill_utf8(unicode);
+ }
+ Py_END_CRITICAL_SECTION();
+ }
+ return err;
+}
+
const char *
PyUnicode_AsUTF8AndSize(PyObject *unicode, Py_ssize_t *psize)
{
return NULL;
}
- if (PyUnicode_UTF8(unicode) == NULL) {
- if (unicode_fill_utf8(unicode) == -1) {
- if (psize) {
- *psize = -1;
- }
- return NULL;
+ if (unicode_ensure_utf8(unicode) == -1) {
+ if (psize) {
+ *psize = -1;
}
+ return NULL;
}
if (psize) {
static int
unicode_fill_utf8(PyObject *unicode)
{
+ _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(unicode);
/* the string cannot be ASCII, or PyUnicode_UTF8() would be set */
assert(!PyUnicode_IS_ASCII(unicode));
PyErr_NoMemory();
return -1;
}
- _PyUnicode_UTF8(unicode) = cache;
- _PyUnicode_UTF8_LENGTH(unicode) = len;
memcpy(cache, start, len);
cache[len] = '\0';
+ PyUnicode_SET_UTF8_LENGTH(unicode, len);
+ PyUnicode_SET_UTF8(unicode, cache);
_PyBytesWriter_Dealloc(&writer);
return 0;
}
return 0;
}
- Py_hash_t right_hash = FT_ATOMIC_LOAD_SSIZE_RELAXED(_PyUnicode_HASH(right_uni));
+ Py_hash_t right_hash = PyUnicode_HASH(right_uni);
assert(right_hash != -1);
- Py_hash_t hash = FT_ATOMIC_LOAD_SSIZE_RELAXED(_PyUnicode_HASH(left));
+ Py_hash_t hash = PyUnicode_HASH(left);
if (hash != -1 && hash != right_hash) {
return 0;
}
#ifdef Py_DEBUG
assert(_Py_HashSecret_Initialized);
#endif
- Py_hash_t hash = FT_ATOMIC_LOAD_SSIZE_RELAXED(_PyUnicode_HASH(self));
+ Py_hash_t hash = PyUnicode_HASH(self);
if (hash != -1) {
return hash;
}
x = _Py_HashBytes(PyUnicode_DATA(self),
PyUnicode_GET_LENGTH(self) * PyUnicode_KIND(self));
- FT_ATOMIC_STORE_SSIZE_RELAXED(_PyUnicode_HASH(self), x);
+ PyUnicode_SET_HASH(self, x);
return x;
}
_PyUnicode_STATE(self).compact = 0;
_PyUnicode_STATE(self).ascii = _PyUnicode_STATE(unicode).ascii;
_PyUnicode_STATE(self).statically_allocated = 0;
- _PyUnicode_UTF8_LENGTH(self) = 0;
- _PyUnicode_UTF8(self) = NULL;
+ PyUnicode_SET_UTF8_LENGTH(self, 0);
+ PyUnicode_SET_UTF8(self, NULL);
_PyUnicode_DATA_ANY(self) = NULL;
share_utf8 = 0;
_PyUnicode_DATA_ANY(self) = data;
if (share_utf8) {
- _PyUnicode_UTF8_LENGTH(self) = length;
- _PyUnicode_UTF8(self) = data;
+ PyUnicode_SET_UTF8_LENGTH(self, length);
+ PyUnicode_SET_UTF8(self, data);
}
memcpy(data, PyUnicode_DATA(unicode), kind * (length + 1));