gh-80667: Fix case-sensitivity of some Unicode literal escapes (GH-107281)

author James <snoopjedi@gmail.com>

Thu, 12 Feb 2026 16:50:40 +0000 (11:50 -0500)

committer GitHub <noreply@github.com>

Thu, 12 Feb 2026 16:50:40 +0000 (18:50 +0200)
author James <snoopjedi@gmail.com>
Thu, 12 Feb 2026 16:50:40 +0000 (11:50 -0500)
committer GitHub <noreply@github.com>
Thu, 12 Feb 2026 16:50:40 +0000 (18:50 +0200)
diff --git a/Lib/test/test_ucn.py b/Lib/test/test_ucn.py

index 0e2c25aaff2fe9dadb3439b302da25c54ee76fab..0c641a455c0747cb81e027c49c5f9eee452dd0f8 100644 (file)
--- a/Lib/test/test_ucn.py
+++ b/Lib/test/test_ucn.py
@@ -88,6 +88,9 @@ class UnicodeNamesTest(unittest.TestCase):
          self.checkletter("HANGUL SYLLABLE HWEOK", "\ud6f8")
          self.checkletter("HANGUL SYLLABLE HIH", "\ud7a3")
  
+        self.checkletter("haNGul SYllABle WAe", '\uc65c')
+        self.checkletter("HAngUL syLLabLE waE", '\uc65c')
+
          self.assertRaises(ValueError, unicodedata.name, "\ud7a4")
  
      def test_cjk_unified_ideographs(self):
@@ -103,6 +106,11 @@ class UnicodeNamesTest(unittest.TestCase):
          self.checkletter("CJK UNIFIED IDEOGRAPH-2B81D", "\U0002B81D")
          self.checkletter("CJK UNIFIED IDEOGRAPH-3134A", "\U0003134A")
  
+        self.checkletter("cjK UniFIeD idEogRAph-3aBc", "\u3abc")
+        self.checkletter("CJk uNIfiEd IDeOGraPH-3AbC", "\u3abc")
+        self.checkletter("cjK UniFIeD idEogRAph-2aBcD", "\U0002abcd")
+        self.checkletter("CJk uNIfiEd IDeOGraPH-2AbCd", "\U0002abcd")
+
      def test_bmp_characters(self):
          for code in range(0x10000):
              char = chr(code)
diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2023-07-26-00-03-00.gh-issue-80667.N7Dh8B.rst b/Misc/NEWS.d/next/Core_and_Builtins/2023-07-26-00-03-00.gh-issue-80667.N7Dh8B.rst

new file mode 100644 (file)

index 0000000..db87a5e
--- /dev/null
+++ b/Misc/NEWS.d/next/Core_and_Builtins/2023-07-26-00-03-00.gh-issue-80667.N7Dh8B.rst
@@ -0,0 +1,2 @@
+Literals using the ``\N{name}`` escape syntax can now construct CJK
+ideographs and Hangul syllables using case-insensitive names.
diff --git a/Modules/unicodedata.c b/Modules/unicodedata.c

index 091e6bcb9f3f49b423b93196f95d583fdf64895b..44ffedec3840fea52eea8470476c96d2328a27e3 100644 (file)
--- a/Modules/unicodedata.c
+++ b/Modules/unicodedata.c
@@ -1405,7 +1405,7 @@ find_syllable(const char *str, int *len, int *pos, int count, int column)
          len1 = Py_SAFE_DOWNCAST(strlen(s), size_t, int);
          if (len1 <= *len)
              continue;
-        if (strncmp(str, s, len1) == 0) {
+        if (PyOS_strnicmp(str, s, len1) == 0) {
              *len = len1;
              *pos = i;
          }
@@ -1437,7 +1437,7 @@ _getcode(const char* name, int namelen, Py_UCS4* code)
       * PUA */
  
      /* Check for hangul syllables. */
-    if (strncmp(name, "HANGUL SYLLABLE ", 16) == 0) {
+    if (PyOS_strnicmp(name, "HANGUL SYLLABLE ", 16) == 0) {
          int len, L = -1, V = -1, T = -1;
          const char *pos = name + 16;
          find_syllable(pos, &len, &L, LCount, 0);
@@ -1455,7 +1455,7 @@ _getcode(const char* name, int namelen, Py_UCS4* code)
      }
  
      /* Check for unified ideographs. */
-    if (strncmp(name, "CJK UNIFIED IDEOGRAPH-", 22) == 0) {
+    if (PyOS_strnicmp(name, "CJK UNIFIED IDEOGRAPH-", 22) == 0) {
          /* Four or five hexdigits must follow. */
          unsigned int v;
          v = 0;
@@ -1465,10 +1465,11 @@ _getcode(const char* name, int namelen, Py_UCS4* code)
              return 0;
          while (namelen--) {
              v *= 16;
-            if (*name >= '0' && *name <= '9')
-                v += *name - '0';
-            else if (*name >= 'A' && *name <= 'F')
-                v += *name - 'A' + 10;
+            Py_UCS1 c = Py_TOUPPER(*name);
+            if (c >= '0' && c <= '9')
+                v += c - '0';
+            else if (c >= 'A' && c <= 'F')
+                v += c - 'A' + 10;
              else
                  return 0;
              name++;
author	James <snoopjedi@gmail.com>
	Thu, 12 Feb 2026 16:50:40 +0000 (11:50 -0500)
committer	GitHub <noreply@github.com>
	Thu, 12 Feb 2026 16:50:40 +0000 (18:50 +0200)
Lib/test/test_ucn.py		patch \| blob \| blame \| history
Misc/NEWS.d/next/Core_and_Builtins/2023-07-26-00-03-00.gh-issue-80667.N7Dh8B.rst	[new file with mode: 0644]	patch \| blob
Modules/unicodedata.c		patch \| blob \| blame \| history