self.assertFalse("©".isidentifier())
self.assertFalse("0".isidentifier())
+ @support.cpython_only
+ def test_isidentifier_legacy(self):
+ import _testcapi
+ u = '𝖀𝖓𝖎𝖈𝖔𝖉𝖊'
+ self.assertTrue(u.isidentifier())
+ self.assertTrue(_testcapi.unicode_legacy_string(u).isidentifier())
+
def test_isprintable(self):
self.assertTrue("".isprintable())
self.assertTrue(" ".isprintable())
return len && i == len;
}
else {
- Py_ssize_t i, len = PyUnicode_GET_SIZE(self);
+ Py_ssize_t i = 0, len = PyUnicode_GET_SIZE(self);
if (len == 0) {
/* an empty string is not a valid identifier */
return 0;
}
const wchar_t *wstr = _PyUnicode_WSTR(self);
- Py_UCS4 ch = wstr[0];
+ Py_UCS4 ch = wstr[i++];
+#if SIZEOF_WCHAR_T == 2
+ if (Py_UNICODE_IS_HIGH_SURROGATE(ch)
+ && i < len
+ && Py_UNICODE_IS_LOW_SURROGATE(wstr[i]))
+ {
+ ch = Py_UNICODE_JOIN_SURROGATES(ch, wstr[i]);
+ i++;
+ }
+#endif
if (!_PyUnicode_IsXidStart(ch) && ch != 0x5F /* LOW LINE */) {
return 0;
}
- for (i = 1; i < len; i++) {
- ch = wstr[i];
+ while (i < len) {
+ ch = wstr[i++];
+#if SIZEOF_WCHAR_T == 2
+ if (Py_UNICODE_IS_HIGH_SURROGATE(ch)
+ && i < len
+ && Py_UNICODE_IS_LOW_SURROGATE(wstr[i]))
+ {
+ ch = Py_UNICODE_JOIN_SURROGATES(ch, wstr[i]);
+ i++;
+ }
+#endif
if (!_PyUnicode_IsXidContinue(ch)) {
return 0;
}