From: Stephan Bosch Date: Fri, 21 Mar 2025 04:45:07 +0000 (+0100) Subject: lib: unichar - Move Hangul syllable handling to unicode-transform.c X-Git-Tag: 2.4.2~620 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=db3420536e18c2245a1f426e4265ce0c87821b79;p=thirdparty%2Fdovecot%2Fcore.git lib: unichar - Move Hangul syllable handling to unicode-transform.c --- diff --git a/src/lib/unichar.c b/src/lib/unichar.c index 8248d5c0d4..43da11443e 100644 --- a/src/lib/unichar.c +++ b/src/lib/unichar.c @@ -6,9 +6,6 @@ #include "unicode-data.h" #include "unichar.h" -#define HANGUL_FIRST 0xac00 -#define HANGUL_LAST 0xd7a3 - const unsigned char utf8_replacement_char[UTF8_REPLACEMENT_CHAR_LEN] = { 0xef, 0xbf, 0xbd }; /* 0xfffd */ @@ -240,51 +237,7 @@ unichar_t uni_ucs4_to_titlecase(unichar_t chr) return chr; } -static size_t uni_ucs4_decompose_hangul(unichar_t chr, unichar_t buf[3]) -{ - /* The Unicode Standard, Section 3.12.2: - Hangul Syllable Decomposition - */ - - static const uint16_t s_base = 0xac00; - static const uint16_t l_base = 0x1100; - static const uint16_t v_base = 0x1161; - static const uint16_t t_base = 0x11a7; - static const unsigned int v_count = 21; - static const unsigned int t_count = 28; - static const unsigned int n_count = (v_count * t_count); - - unsigned int s_index = chr - s_base; - unsigned int l_index = s_index / n_count; - unsigned int v_index = (s_index % n_count) / t_count; - unsigned int t_index = s_index % t_count; - uint32_t l_part = l_base + l_index; - uint32_t v_part = v_base + v_index; - - if (t_index == 0) { - buf[0] = l_part; - buf[1] = v_part; - return 2; - } - - uint32_t t_part = t_base + t_index; - - buf[0] = l_part; - buf[1] = v_part; - buf[2] = t_part; - return 3; -} - -static void uni_ucs4_decompose_hangul_utf8(unichar_t chr, buffer_t *output) -{ - unichar_t buf[3]; - size_t len, i; - - len = uni_ucs4_decompose_hangul(chr, buf); - - for (i = 0; i < len; i++) - uni_ucs4_to_utf8_c(buf[i], output); -} +#include "unicode-transform.c" static void uni_ucs4_decompose_one_utf8(unichar_t chr, bool canonical, buffer_t *output) diff --git a/src/lib/unicode-transform.c b/src/lib/unicode-transform.c new file mode 100644 index 0000000000..5bc2bff00a --- /dev/null +++ b/src/lib/unicode-transform.c @@ -0,0 +1,54 @@ +/* Copyright (c) 2025 Dovecot authors, see the included COPYING file */ + +#define HANGUL_FIRST 0xac00 +#define HANGUL_LAST 0xd7a3 + +/* + * Hangul syllable (de)composition + */ + +static size_t uni_ucs4_decompose_hangul(unichar_t chr, unichar_t buf[3]) +{ + /* The Unicode Standard, Section 3.12.2: + Hangul Syllable Decomposition + */ + + static const uint16_t s_base = 0xac00; + static const uint16_t l_base = 0x1100; + static const uint16_t v_base = 0x1161; + static const uint16_t t_base = 0x11a7; + static const unsigned int v_count = 21; + static const unsigned int t_count = 28; + static const unsigned int n_count = (v_count * t_count); + + unsigned int s_index = chr - s_base; + unsigned int l_index = s_index / n_count; + unsigned int v_index = (s_index % n_count) / t_count; + unsigned int t_index = s_index % t_count; + uint32_t l_part = l_base + l_index; + uint32_t v_part = v_base + v_index; + + if (t_index == 0) { + buf[0] = l_part; + buf[1] = v_part; + return 2; + } + + uint32_t t_part = t_base + t_index; + + buf[0] = l_part; + buf[1] = v_part; + buf[2] = t_part; + return 3; +} + +static void uni_ucs4_decompose_hangul_utf8(unichar_t chr, buffer_t *output) +{ + unichar_t buf[3]; + size_t len, i; + + len = uni_ucs4_decompose_hangul(chr, buf); + + for (i = 0; i < len; i++) + uni_ucs4_to_utf8_c(buf[i], output); +}