UCD_URL = https://dovecot.org/res
UCD_DIR = $(srcdir)/ucd
UCD_FILES = \
+ $(UCD_DIR)/CaseFolding.txt \
$(UCD_DIR)/CompositionExclusions.txt \
$(UCD_DIR)/DerivedCoreProperties.txt \
$(UCD_DIR)/DerivedNormalizationProps.txt \
# dependency, anything including the header will race the bison process.
event-filter-parser.h: event-filter-parser.c
+$(UCD_DIR)/CaseFolding.txt:
+ $(AM_V_at)test -f $@ || $(WGET) -nv -O $@ $(UCD_URL)/CaseFolding.txt
$(UCD_DIR)/CompositionExclusions.txt:
$(AM_V_at)test -f $@ || $(WGET) -nv -O $@ $(UCD_URL)/CompositionExclusions.txt
$(UCD_DIR)/DerivedCoreProperties.txt:
#include <fcntl.h>
+#define UCD_CASE_FOLDING_TXT "CaseFolding.txt"
#define UCD_COMPOSITION_EXCLUSIONS_TXT "CompositionExclusions.txt"
#define UCD_DERIVED_NORMALIZATION_PROPS_TXT "DerivedNormalizationProps.txt"
#define UCD_PROP_LIST_TXT "PropList.txt"
}
}
+static void test_case_folding_line(const char *line, unsigned int line_num)
+{
+ const char *const *columns = t_strsplit(line, ";");
+ size_t num_columns = str_array_length(columns);
+
+ /* <code>; <status>; <mapping>; # <name> */
+
+ if (num_columns < 4) {
+ test_failed(t_strdup_printf(
+ "Invalid data at %s:%u",
+ UCD_CASE_FOLDING_TXT, line_num));
+ return;
+ }
+
+ if (num_columns > 4 && strlen(t_str_trim(columns[4], " ")) > 0) {
+ /* Skip lines with condition list */
+ return;
+ }
+
+ const char *cp_hex = t_str_trim(columns[0], " ");
+ uint32_t cp;
+
+ if (str_to_uint32_hex(cp_hex, &cp) < 0) {
+ test_failed(t_strdup_printf(
+ "Invalid data at %s:%u: "
+ "Bad code point",
+ UCD_CASE_FOLDING_TXT, line_num));
+ return;
+ }
+
+ /* Parse Decomposition_* */
+
+ const char *status = t_str_trim(columns[1], " ");
+
+ if (strcmp(status, "C") != 0 && strcmp(status, "F") != 0)
+ return;
+
+ const char *mapping = t_str_trim(columns[2], " ");
+ const char *const *map = t_strsplit(mapping, " ");
+
+ /* Check data */
+
+ const struct unicode_code_point_data *cp_data =
+ unicode_code_point_get_data(cp);
+ const uint32_t *case_map;
+ size_t case_map_len;
+
+ case_map_len = unicode_code_point_data_get_casefold_mapping(
+ cp_data, &case_map);
+ test_case_mapping(cp, map, case_map, case_map_len);
+}
+
static void
test_composition_exclusions_line(const char *line, unsigned int line_num)
{
property files only the positive assignment of properties to the
code points mentioned in the files is tested, and notably not their
absence for other code points. */
+ test_ucd_file(UCD_CASE_FOLDING_TXT, test_case_folding_line);
test_ucd_file(UCD_COMPOSITION_EXCLUSIONS_TXT,
test_composition_exclusions_line);
test_ucd_file(UCD_DERIVED_NORMALIZATION_PROPS_TXT,
uint8_t uppercase_mapping_length;
uint8_t lowercase_mapping_length;
+ uint8_t casefold_mapping_length;
uint16_t decomposition_first_offset;
uint16_t decomposition_full_offset;
uint16_t uppercase_mapping_offset;
uint16_t lowercase_mapping_offset;
+ uint16_t casefold_mapping_offset;
uint32_t simple_titlecase_mapping;
return cp_data->lowercase_mapping_length;
}
+static inline size_t
+unicode_code_point_data_get_casefold_mapping(
+ const struct unicode_code_point_data *cp_data,
+ const uint32_t **map_r)
+{
+ uint32_t offset;
+
+ offset = cp_data->casefold_mapping_offset;
+ *map_r = &unicode_case_mappings[offset];
+ return cp_data->casefold_mapping_length;
+}
+
uint8_t unicode_general_category_from_string(const char *str);
#endif
# Add range
CodePointRange(cp_first, cp_last, cpd)
+ # CaseFolding.txt
+ with UCDFileOpen("CaseFolding.txt") as ucd:
+ line_num = 0
+ for line in ucd.fd:
+ line_num = line_num + 1
+ data = line.split("#")
+ line = data[0].strip()
+ if len(line) == 0:
+ continue
+
+ cols = line.split(";")
+ if len(cols) < 3:
+ die(f"{ucd}:{line_num}: Missing columns")
+
+ cp_hex = cols[0].strip()
+ if len(cp_hex) == 0:
+ continue
+ cp = int(cp_hex, 16)
+
+ status = cols[1].strip()
+ mapping = cols[2].strip()
+
+ if status != "C" and status != "F":
+ continue
+
+ codes_hex = mapping.split(" ")
+ if len(codes_hex) > 0:
+ first_code_hex = codes_hex[0].strip()
+ first_code = int(first_code_hex, 16)
+ if len(codes_hex) > 1 or first_code != cp:
+ codes = []
+ for code_hex in codes_hex:
+ codes.append(int(code_hex, 16))
+
+ cpd = CodePointData()
+ cpd.case_folding = codes
+ CodePointRange(cp, cp, cpd)
+
# CompositionExclusions.txt
with UCDFileOpen("CompositionExclusions.txt") as ucd:
for line in ucd.fd:
if len(lcase_codes) > ud_case_mapping_max_length:
ud_case_mapping_max_length = len(lcase_codes)
+ # Case_Folding
+ cfold_codes = []
+ if hasattr(cpd, "case_folding"):
+ cfold_codes = cpd.case_folding
+ if len(ucase_codes) > 0 and cfold_codes == ucase_codes:
+ cpd.casefold_mapping_length = cpd.uppercase_mapping_length
+ cpd.casefold_mapping_offset = cpd.uppercase_mapping_offset
+ elif len(lcase_codes) > 0 and cfold_codes == lcase_codes:
+ cpd.casefold_mapping_length = cpd.lowercase_mapping_length
+ cpd.casefold_mapping_offset = cpd.lowercase_mapping_offset
+ elif len(cfold_codes) > 0 and (len(cfold_codes) > 1 or cfold_codes[0] != cp):
+ cpd.casefold_mapping_offset = len(ud_case_mappings)
+ cpd.casefold_mapping_length = len(cfold_codes)
+ ud_case_mappings = ud_case_mappings + cfold_codes
+ if len(cfold_codes) > ud_case_mapping_max_length:
+ ud_case_mapping_max_length = len(cfold_codes)
+
def expand_decompositions():
global ud_codepoints
print(
"\t\t.uppercase_mapping_offset = %s," % cpd.uppercase_mapping_offset
)
+ if (
+ hasattr(cpd, "casefold_mapping_length")
+ and cpd.casefold_mapping_length > 0
+ ):
+ print("\t\t.casefold_mapping_length = %s," % cpd.casefold_mapping_length)
+ print("\t\t.casefold_mapping_offset = %s," % cpd.casefold_mapping_offset)
if hasattr(cpd, "simple_titlecase_mapping"):
print(
"\t\t.simple_titlecase_mapping = 0x%04X,"