]> git.ipfire.org Git - thirdparty/dovecot/core.git/commitdiff
lib: unicode-data - Rename several word break bits for common use
authorStephan Bosch <stephan.bosch@open-xchange.com>
Sat, 22 Nov 2025 17:26:30 +0000 (18:26 +0100)
committeraki.tuomi <aki.tuomi@open-xchange.com>
Mon, 8 Dec 2025 14:37:04 +0000 (14:37 +0000)
src/lib-language/lang-tokenizer-generic.c
src/lib/test-unicode-data.c
src/lib/unicode-data-static.h
src/lib/unicode-ucd-compile.py

index 0417b99e185a9dd25a582c43bf1de7f39c3df9f8..fa4105a02900e9e70635be40c3fb625016a98e67 100644 (file)
@@ -389,15 +389,15 @@ static enum letter_type letter_type(unichar_t c)
        const struct unicode_code_point_data *cpd =
                unicode_code_point_get_data(c);
 
-       if (cpd->pb_wb_cr)
+       if (cpd->pb_b_cr)
                return LETTER_TYPE_CR;
-       if (cpd->pb_wb_lf)
+       if (cpd->pb_b_lf)
                return LETTER_TYPE_LF;
        if (cpd->pb_wb_newline)
                return LETTER_TYPE_NEWLINE;
        if (cpd->pb_wb_extend)
                return LETTER_TYPE_EXTEND;
-       if (cpd->pb_wb_regional_indicator)
+       if (cpd->pb_b_regional_indicator)
                return LETTER_TYPE_REGIONAL_INDICATOR;
        if (cpd->pb_wb_format)
                return LETTER_TYPE_FORMAT;
index d1b1f683094ad1162ec7441e943be3ed84013c88..5ee9eb205b626e022c31ddb6ab1888041f7b6dbd 100644 (file)
@@ -512,17 +512,17 @@ test_word_break_property_line(const char *line, unsigned int line_num)
                        unicode_code_point_get_data(cp);
 
                if (strcmp(prop, "CR") == 0)
-                       test_assert_idx(cp_data->pb_wb_cr, cp);
+                       test_assert_idx(cp_data->pb_b_cr, cp);
                else if (strcmp(prop, "LF") == 0)
-                       test_assert_idx(cp_data->pb_wb_lf, cp);
+                       test_assert_idx(cp_data->pb_b_lf, cp);
                else if (strcmp(prop, "Newline") == 0)
                        test_assert_idx(cp_data->pb_wb_newline, cp);
                else if (strcmp(prop, "Extend") == 0)
                        test_assert_idx(cp_data->pb_wb_extend, cp);
                else if (strcmp(prop, "ZWJ") == 0)
-                       test_assert_idx(cp_data->pb_wb_zwj, cp);
+                       test_assert_idx(cp_data->pb_b_zwj, cp);
                else if (strcmp(prop, "Regional_Indicator") == 0)
-                       test_assert_idx(cp_data->pb_wb_regional_indicator, cp);
+                       test_assert_idx(cp_data->pb_b_regional_indicator, cp);
                else if (strcmp(prop, "Format") == 0)
                        test_assert_idx(cp_data->pb_wb_format, cp);
                else if (strcmp(prop, "Katakana") == 0)
index 9ff142e8ba14b1d9ccc82161219acdc0f94d530f..0d74fb80504da15d57e565af65c6bebf0ddca292 100644 (file)
@@ -156,13 +156,15 @@ struct unicode_code_point_data {
        bool pb_m_sentence_terminal:1;
        bool pb_m_terminal_punctuation:1;
 
+       /* Common Break */
+       bool pb_b_cr:1;
+       bool pb_b_lf:1;
+       bool pb_b_zwj:1; // Not currently used
+       bool pb_b_regional_indicator:1;
+
        /* Word_Break (UAX #29, Section 4.1) */
-       bool pb_wb_cr:1;
-       bool pb_wb_lf:1;
        bool pb_wb_newline:1;
        bool pb_wb_extend:1;
-       bool pb_wb_zwj:1; // Not currently used
-       bool pb_wb_regional_indicator:1;
        bool pb_wb_format:1;
        bool pb_wb_katakana:1;
        bool pb_wb_hebrew_letter:1;
index d7f607f8c50206ac7465ffee3432066a2a1c5485..15be589cb3d8be01f39743d780d8f172aeda83bf 100755 (executable)
@@ -615,11 +615,11 @@ def read_ucd_files():
             prop = cols[1].strip()
             if prop == "CR":
                 cpd = CodePointData()
-                cpd.pb_wb_cr = True
+                cpd.pb_b_cr = True
                 CodePointRange(cprng[0], cprng[1], cpd)
             elif prop == "LF":
                 cpd = CodePointData()
-                cpd.pb_wb_lf = True
+                cpd.pb_b_lf = True
                 CodePointRange(cprng[0], cprng[1], cpd)
             elif prop == "Newline":
                 cpd = CodePointData()
@@ -631,11 +631,11 @@ def read_ucd_files():
                 CodePointRange(cprng[0], cprng[1], cpd)
             elif prop == "ZWJ":
                 cpd = CodePointData()
-                cpd.pb_wb_zwj = True
+                cpd.pb_b_zwj = True
                 CodePointRange(cprng[0], cprng[1], cpd)
             elif prop == "Regional_Indicator":
                 cpd = CodePointData()
-                cpd.pb_wb_regional_indicator = True
+                cpd.pb_b_regional_indicator = True
                 CodePointRange(cprng[0], cprng[1], cpd)
             elif prop == "Format":
                 cpd = CodePointData()
@@ -1327,18 +1327,18 @@ def write_tables_c_cpd(cpd):
         print("\t\t.pb_m_sentence_terminal = TRUE,")
     if hasattr(cpd, "pb_m_terminal_punctuation"):
         print("\t\t.pb_m_terminal_punctuation = TRUE,")
-    if hasattr(cpd, "pb_wb_cr"):
-        print("\t\t.pb_wb_cr = TRUE,")
-    if hasattr(cpd, "pb_wb_lf"):
-        print("\t\t.pb_wb_lf = TRUE,")
+    if hasattr(cpd, "pb_b_cr"):
+        print("\t\t.pb_b_cr = TRUE,")
+    if hasattr(cpd, "pb_b_lf"):
+        print("\t\t.pb_b_lf = TRUE,")
+    if hasattr(cpd, "pb_b_zwj"):
+        print("\t\t.pb_b_zwj = TRUE,")
+    if hasattr(cpd, "pb_b_regional_indicator"):
+        print("\t\t.pb_b_regional_indicator = TRUE,")
     if hasattr(cpd, "pb_wb_newline"):
         print("\t\t.pb_wb_newline = TRUE,")
     if hasattr(cpd, "pb_wb_extend"):
         print("\t\t.pb_wb_extend = TRUE,")
-    if hasattr(cpd, "pb_wb_zwj"):
-        print("\t\t.pb_wb_zwj = TRUE,")
-    if hasattr(cpd, "pb_wb_regional_indicator"):
-        print("\t\t.pb_wb_regional_indicator = TRUE,")
     if hasattr(cpd, "pb_wb_format"):
         print("\t\t.pb_wb_format = TRUE,")
     if hasattr(cpd, "pb_wb_katakana"):