# Update this if the database changes. Make sure to do a full rebuild
# (e.g. 'make distclean && make') to get the correct checksum.
- expectedchecksum = ('35e842600fa7ae2db93739db08ef201b726a2374'
+ expectedchecksum = ('1ba453ec456896f1190d849b6e9b7c2e1a4128e0'
if quicktest else
- '23ab09ed4abdf93db23b97359108ed630dd8311d')
+ '46ca89d9fe34881d0be3a4a4b29f5aa8c019640c')
def test_function_checksum(self):
db = self.db
# New in 16.0.0
self.assertEqual(self.db.decomposition('\U0001CCD6'), '' if self.old else '<font> 0041')
+ # Hangul characters
+ self.assertEqual(self.db.decomposition('\uAC00'), '1100 1161')
+ self.assertEqual(self.db.decomposition('\uD4DB'), '1111 1171 11B6')
+ self.assertEqual(self.db.decomposition('\uC2F8'), '110A 1161')
+ self.assertEqual(self.db.decomposition('\uD7A3'), '1112 1175 11C2')
+
self.assertRaises(TypeError, self.db.decomposition)
self.assertRaises(TypeError, self.db.decomposition, 'xx')
class Unicode_3_2_0_FunctionsTest(UnicodeFunctionsTest):
db = unicodedata.ucd_3_2_0
old = True
- expectedchecksum = ('4154d8d1232837e255edf3cdcbb5ab184d71f4a4'
+ expectedchecksum = ('883824cb6c0ccf994e4451ebf281e2d6d479af47'
if quicktest else
- 'b0a8df4ce8cf910def4e75f2d03c93defcc9bb09')
+ 'caf1a7f2f380f927461837f1901ef20683f98683')
class UnicodeMiscTest(unittest.TestCase):
return PyUnicode_FromString(_PyUnicode_EastAsianWidthNames[index]);
}
+// For Hangul decomposition
+#define SBase 0xAC00
+#define LBase 0x1100
+#define VBase 0x1161
+#define TBase 0x11A7
+#define LCount 19
+#define VCount 21
+#define TCount 28
+#define NCount (VCount*TCount)
+#define SCount (LCount*NCount)
+
/*[clinic input]
unicodedata.UCD.decomposition
return Py_GetConstant(Py_CONSTANT_EMPTY_STR); /* unassigned */
}
+ // Hangul Decomposition.
+ // See section 3.12.2, "Hangul Syllable Decomposition"
+ // https://www.unicode.org/versions/latest/core-spec/chapter-3/#G56669
+ if (SBase <= code && code < (SBase + SCount)) {
+ int SIndex = code - SBase;
+ int L = LBase + SIndex / NCount;
+ int V = VBase + (SIndex % NCount) / TCount;
+ int T = TBase + SIndex % TCount;
+ if (T != TBase) {
+ PyOS_snprintf(decomp, sizeof(decomp),
+ "%04X %04X %04X", L, V, T);
+ }
+ else {
+ PyOS_snprintf(decomp, sizeof(decomp),
+ "%04X %04X", L, V);
+ }
+ return PyUnicode_FromString(decomp);
+ }
+
if (code < 0 || code >= 0x110000)
index = 0;
else {
(*index)++;
}
-#define SBase 0xAC00
-#define LBase 0x1100
-#define VBase 0x1161
-#define TBase 0x11A7
-#define LCount 19
-#define VCount 21
-#define TCount 28
-#define NCount (VCount*TCount)
-#define SCount (LCount*NCount)
-
static PyObject*
nfd_nfkd(PyObject *self, PyObject *input, int k)
{
}
output = new_output;
}
- /* Hangul Decomposition. */
+ // Hangul Decomposition.
+ // See section 3.12.2, "Hangul Syllable Decomposition"
+ // https://www.unicode.org/versions/latest/core-spec/chapter-3/#G56669
if (SBase <= code && code < (SBase+SCount)) {
int SIndex = code - SBase;
int L = LBase + SIndex / NCount;