start = 0; \
}
-
-/* helper macros used by _Py_UNICODE_NEXT */
-#define _Py_UNICODE_IS_HIGH_SURROGATE(ch) (0xD800 <= ch && ch <= 0xDBFF)
-#define _Py_UNICODE_IS_LOW_SURROGATE(ch) (0xDC00 <= ch && ch <= 0xDFFF)
-/* Join two surrogate characters and return a single Py_UCS4 value. */
-#define _Py_UNICODE_JOIN_SURROGATES(high, low) \
- (((((Py_UCS4)(high) & 0x03FF) << 10) | \
- ((Py_UCS4)(low) & 0x03FF)) + 0x10000)
-
+ /* _Py_UNICODE_NEXT is a private macro used to retrieve the character pointed
+ * by 'ptr', possibly combining surrogate pairs on narrow builds.
+ * 'ptr' and 'end' must be Py_UNICODE*, with 'ptr' pointing at the character
+ * that should be returned and 'end' pointing to the end of the buffer.
+ * ('end' is used on narrow builds to detect a lone surrogate at the
+ * end of the buffer that should be returned unchanged.)
+ * The ptr and end arguments should be side-effect free and ptr must an lvalue.
+ * The type of the returned char is always Py_UCS4.
+ *
+ * Note: the macro advances ptr to next char, so it might have side-effects
+ * (especially if used with other macros).
+ */
-#define _Py_UNICODE_NEXT(ptr, end) \
- (((_Py_UNICODE_IS_HIGH_SURROGATE(*(ptr)) && (ptr) < (end)) && \
- _Py_UNICODE_IS_LOW_SURROGATE((ptr)[1])) ? \
- ((ptr) += 2,_Py_UNICODE_JOIN_SURROGATES((ptr)[-2], (ptr)[-1])) : \
+ #ifdef Py_UNICODE_WIDE
+ #define _Py_UNICODE_NEXT(ptr, end) *(ptr)++
+ #else
-Py_ssize_t PyUnicode_Count(PyObject *str,
- PyObject *substr,
- Py_ssize_t start,
- Py_ssize_t end)
++#define _Py_UNICODE_NEXT(ptr, end) \
++ (((Py_UNICODE_IS_HIGH_SURROGATE(*(ptr)) && (ptr) < (end)) && \
++ Py_UNICODE_IS_LOW_SURROGATE((ptr)[1])) ? \
++ ((ptr) += 2,Py_UNICODE_JOIN_SURROGATES((ptr)[-2], (ptr)[-1])) : \
+ (Py_UCS4)*(ptr)++)
+ #endif
+
+Py_ssize_t
+PyUnicode_Count(PyObject *str,
+ PyObject *substr,
+ Py_ssize_t start,
+ Py_ssize_t end)
{
Py_ssize_t result;
PyUnicodeObject* str_obj;