From: Stephan Bosch Date: Sat, 22 Nov 2025 17:26:30 +0000 (+0100) Subject: lib: unicode-data - Rename several word break bits for common use X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=afe45e84f50cafbeb5c09e22cc2d470a4b001655;p=thirdparty%2Fdovecot%2Fcore.git lib: unicode-data - Rename several word break bits for common use --- diff --git a/src/lib-language/lang-tokenizer-generic.c b/src/lib-language/lang-tokenizer-generic.c index 0417b99e18..fa4105a029 100644 --- a/src/lib-language/lang-tokenizer-generic.c +++ b/src/lib-language/lang-tokenizer-generic.c @@ -389,15 +389,15 @@ static enum letter_type letter_type(unichar_t c) const struct unicode_code_point_data *cpd = unicode_code_point_get_data(c); - if (cpd->pb_wb_cr) + if (cpd->pb_b_cr) return LETTER_TYPE_CR; - if (cpd->pb_wb_lf) + if (cpd->pb_b_lf) return LETTER_TYPE_LF; if (cpd->pb_wb_newline) return LETTER_TYPE_NEWLINE; if (cpd->pb_wb_extend) return LETTER_TYPE_EXTEND; - if (cpd->pb_wb_regional_indicator) + if (cpd->pb_b_regional_indicator) return LETTER_TYPE_REGIONAL_INDICATOR; if (cpd->pb_wb_format) return LETTER_TYPE_FORMAT; diff --git a/src/lib/test-unicode-data.c b/src/lib/test-unicode-data.c index d1b1f68309..5ee9eb205b 100644 --- a/src/lib/test-unicode-data.c +++ b/src/lib/test-unicode-data.c @@ -512,17 +512,17 @@ test_word_break_property_line(const char *line, unsigned int line_num) unicode_code_point_get_data(cp); if (strcmp(prop, "CR") == 0) - test_assert_idx(cp_data->pb_wb_cr, cp); + test_assert_idx(cp_data->pb_b_cr, cp); else if (strcmp(prop, "LF") == 0) - test_assert_idx(cp_data->pb_wb_lf, cp); + test_assert_idx(cp_data->pb_b_lf, cp); else if (strcmp(prop, "Newline") == 0) test_assert_idx(cp_data->pb_wb_newline, cp); else if (strcmp(prop, "Extend") == 0) test_assert_idx(cp_data->pb_wb_extend, cp); else if (strcmp(prop, "ZWJ") == 0) - test_assert_idx(cp_data->pb_wb_zwj, cp); + test_assert_idx(cp_data->pb_b_zwj, cp); else if (strcmp(prop, "Regional_Indicator") == 0) - test_assert_idx(cp_data->pb_wb_regional_indicator, cp); + test_assert_idx(cp_data->pb_b_regional_indicator, cp); else if (strcmp(prop, "Format") == 0) test_assert_idx(cp_data->pb_wb_format, cp); else if (strcmp(prop, "Katakana") == 0) diff --git a/src/lib/unicode-data-static.h b/src/lib/unicode-data-static.h index 9ff142e8ba..0d74fb8050 100644 --- a/src/lib/unicode-data-static.h +++ b/src/lib/unicode-data-static.h @@ -156,13 +156,15 @@ struct unicode_code_point_data { bool pb_m_sentence_terminal:1; bool pb_m_terminal_punctuation:1; + /* Common Break */ + bool pb_b_cr:1; + bool pb_b_lf:1; + bool pb_b_zwj:1; // Not currently used + bool pb_b_regional_indicator:1; + /* Word_Break (UAX #29, Section 4.1) */ - bool pb_wb_cr:1; - bool pb_wb_lf:1; bool pb_wb_newline:1; bool pb_wb_extend:1; - bool pb_wb_zwj:1; // Not currently used - bool pb_wb_regional_indicator:1; bool pb_wb_format:1; bool pb_wb_katakana:1; bool pb_wb_hebrew_letter:1; diff --git a/src/lib/unicode-ucd-compile.py b/src/lib/unicode-ucd-compile.py index d7f607f8c5..15be589cb3 100755 --- a/src/lib/unicode-ucd-compile.py +++ b/src/lib/unicode-ucd-compile.py @@ -615,11 +615,11 @@ def read_ucd_files(): prop = cols[1].strip() if prop == "CR": cpd = CodePointData() - cpd.pb_wb_cr = True + cpd.pb_b_cr = True CodePointRange(cprng[0], cprng[1], cpd) elif prop == "LF": cpd = CodePointData() - cpd.pb_wb_lf = True + cpd.pb_b_lf = True CodePointRange(cprng[0], cprng[1], cpd) elif prop == "Newline": cpd = CodePointData() @@ -631,11 +631,11 @@ def read_ucd_files(): CodePointRange(cprng[0], cprng[1], cpd) elif prop == "ZWJ": cpd = CodePointData() - cpd.pb_wb_zwj = True + cpd.pb_b_zwj = True CodePointRange(cprng[0], cprng[1], cpd) elif prop == "Regional_Indicator": cpd = CodePointData() - cpd.pb_wb_regional_indicator = True + cpd.pb_b_regional_indicator = True CodePointRange(cprng[0], cprng[1], cpd) elif prop == "Format": cpd = CodePointData() @@ -1327,18 +1327,18 @@ def write_tables_c_cpd(cpd): print("\t\t.pb_m_sentence_terminal = TRUE,") if hasattr(cpd, "pb_m_terminal_punctuation"): print("\t\t.pb_m_terminal_punctuation = TRUE,") - if hasattr(cpd, "pb_wb_cr"): - print("\t\t.pb_wb_cr = TRUE,") - if hasattr(cpd, "pb_wb_lf"): - print("\t\t.pb_wb_lf = TRUE,") + if hasattr(cpd, "pb_b_cr"): + print("\t\t.pb_b_cr = TRUE,") + if hasattr(cpd, "pb_b_lf"): + print("\t\t.pb_b_lf = TRUE,") + if hasattr(cpd, "pb_b_zwj"): + print("\t\t.pb_b_zwj = TRUE,") + if hasattr(cpd, "pb_b_regional_indicator"): + print("\t\t.pb_b_regional_indicator = TRUE,") if hasattr(cpd, "pb_wb_newline"): print("\t\t.pb_wb_newline = TRUE,") if hasattr(cpd, "pb_wb_extend"): print("\t\t.pb_wb_extend = TRUE,") - if hasattr(cpd, "pb_wb_zwj"): - print("\t\t.pb_wb_zwj = TRUE,") - if hasattr(cpd, "pb_wb_regional_indicator"): - print("\t\t.pb_wb_regional_indicator = TRUE,") if hasattr(cpd, "pb_wb_format"): print("\t\t.pb_wb_format = TRUE,") if hasattr(cpd, "pb_wb_katakana"):