test-unichar.c \
test-unicode-data.c \
test-unicode-nf.c \
+ test-unicode-casemap.c \
test-utc-mktime.c \
test-uri.c \
test-wildcard-match.c
TEST(test_unichar)
TEST(test_unicode_data)
TEST(test_unicode_nf)
+TEST(test_unicode_casemap)
TEST(test_uri)
TEST(test_utc_mktime)
TEST(test_wildcard_match)
--- /dev/null
+/* Copyright (c) 2025 Dovecot authors, see the included COPYING file */
+
+#include "test-lib.h"
+#include "strnum.h"
+#include "str.h"
+#include "unichar.h"
+
+static const struct casemap_test {
+ const char *input;
+ const char *lowercase;
+ const char *uppercase;
+ const char *casefold;
+} tests[] = {
+ {
+ /* Wei<U+00DF>kopfseeadler */
+ .input = "\x57\x65\x69\xC3\x9F\x6B\x6F\x70\x66"
+ "\x73\x65\x65\x61\x64\x6C\x65\x72",
+ /* WEISSKOPFSEEADLER */
+ .uppercase = "WEISSKOPFSEEADLER",
+ /* wei<U+00DF>kopfseeadler */
+ .lowercase = "\x77\x65\x69\xC3\x9F\x6B\x6F\x70"
+ "\x66\x73\x65\x65\x61\x64\x6C\x65\x72",
+ /* weisskopfseeadler */
+ .casefold = "weisskopfseeadler",
+ },
+};
+
+static const unsigned int tests_count = N_ELEMENTS(tests);
+
+void test_unicode_casemap(void)
+{
+ unsigned int i;
+
+ test_begin("unicode casemap");
+
+ for (i = 0; i < tests_count; i++) {
+ const struct casemap_test *test = &tests[i];
+ const char *uppercase, *lowercase, *casefold;
+ const char *test_casefold =
+ (test->casefold != NULL ?
+ test->casefold : test->lowercase);
+ int ret;
+
+ ret = uni_utf8_to_uppercase(test->input, strlen(test->input),
+ &uppercase);
+ test_assert_idx(ret >= 0, i);
+ test_assert_strcmp_idx(test->uppercase, uppercase, i);
+
+ ret = uni_utf8_to_lowercase(test->input, strlen(test->input),
+ &lowercase);
+ test_assert_idx(ret >= 0, i);
+ test_assert_strcmp_idx(test->lowercase, lowercase, i);
+
+ ret = uni_utf8_to_casefold(test->input, strlen(test->input),
+ &casefold);
+ test_assert_idx(ret >= 0, i);
+ test_assert_strcmp_idx(test_casefold, casefold, i);
+ }
+
+ test_end();
+}
return uni_utf8_is_nf(input, size, UNICODE_NFKC);
}
+int uni_utf8_write_uppercase(const void *_input, size_t size, buffer_t *output)
+{
+ static struct unicode_casemap map;
+ const char *error;
+
+ unicode_casemap_init_uppercase(&map);
+
+ return uni_utf8_run_transform(_input, size, &map.transform, output,
+ &error);
+}
+
+int uni_utf8_write_lowercase(const void *_input, size_t size, buffer_t *output)
+{
+ static struct unicode_casemap map;
+ const char *error;
+
+ unicode_casemap_init_lowercase(&map);
+
+ return uni_utf8_run_transform(_input, size, &map.transform, output,
+ &error);
+}
+
+int uni_utf8_write_casefold(const void *_input, size_t size, buffer_t *output)
+{
+ static struct unicode_casemap map;
+ const char *error;
+
+ unicode_casemap_init_casefold(&map);
+
+ return uni_utf8_run_transform(_input, size, &map.transform, output,
+ &error);
+}
+
+int uni_utf8_to_uppercase(const void *input, size_t size, const char **output_r)
+{
+ buffer_t *output = t_buffer_create(size);
+ int ret;
+
+ ret = uni_utf8_write_uppercase(input, size, output);
+ *output_r = str_c(output);
+ return ret;
+}
+
+int uni_utf8_to_lowercase(const void *input, size_t size, const char **output_r)
+{
+ buffer_t *output = t_buffer_create(size);
+ int ret;
+
+ ret = uni_utf8_write_lowercase(input, size, output);
+ *output_r = str_c(output);
+ return ret;
+}
+
+int uni_utf8_to_casefold(const void *input, size_t size, const char **output_r)
+{
+ buffer_t *output = t_buffer_create(size);
+ int ret;
+
+ ret = uni_utf8_write_casefold(input, size, output);
+ *output_r = str_c(output);
+ return ret;
+}
+
int uni_utf8_to_decomposed_titlecase(const void *_input, size_t size,
buffer_t *output)
{
int uni_utf8_is_nfc(const void *input, size_t size);
int uni_utf8_is_nfkc(const void *input, size_t size);
+/* Write the input UTF8 string to the provided buffer after mapping it to the
+ requested case. */
+int uni_utf8_write_uppercase(const void *_input, size_t size, buffer_t *output);
+int uni_utf8_write_lowercase(const void *_input, size_t size, buffer_t *output);
+int uni_utf8_write_casefold(const void *_input, size_t size, buffer_t *output);
+
+int uni_utf8_to_uppercase(const void *input, size_t size, const char **output_r);
+int uni_utf8_to_lowercase(const void *input, size_t size, const char **output_r);
+int uni_utf8_to_casefold(const void *input, size_t size, const char **output_r);
+
/* Convert UTF-8 input to titlecase and decompose the titlecase characters to
output buffer. Returns 0 if ok, -1 if input was invalid. This generates
output that's compatible with i;unicode-casemap comparator. Invalid input
return (ret > 0 ? 1 : 0);
}
+/*
+ * Casemap Transform
+ */
+
+static size_t
+unicode_casemap_uppercase_cp(const struct unicode_code_point_data *cp_data,
+ const uint32_t **map_r);
+static size_t
+unicode_casemap_lowercase_cp(const struct unicode_code_point_data *cp_data,
+ const uint32_t **map_r);
+static size_t
+unicode_casemap_casefold_cp(const struct unicode_code_point_data *cp_data,
+ const uint32_t **map_r);
+
+static ssize_t
+unicode_casemap_input(struct unicode_transform *trans,
+ const struct unicode_transform_buffer *buf,
+ const char **error_r);
+static int
+unicode_casemap_flush(struct unicode_transform *trans, bool finished,
+ const char **error_r);
+
+static const struct unicode_transform_def unicode_casemap_def = {
+ .input = unicode_casemap_input,
+ .flush = unicode_casemap_flush,
+};
+
+void unicode_casemap_init_uppercase(struct unicode_casemap *map_r)
+{
+ i_zero(map_r);
+ unicode_transform_init(&map_r->transform, &unicode_casemap_def);
+ map_r->map = unicode_casemap_uppercase_cp;
+}
+
+void unicode_casemap_init_lowercase(struct unicode_casemap *map_r)
+{
+ i_zero(map_r);
+ unicode_transform_init(&map_r->transform, &unicode_casemap_def);
+ map_r->map = unicode_casemap_lowercase_cp;
+}
+
+void unicode_casemap_init_casefold(struct unicode_casemap *map_r)
+{
+ i_zero(map_r);
+ unicode_transform_init(&map_r->transform, &unicode_casemap_def);
+ map_r->map = unicode_casemap_casefold_cp;
+}
+
+static size_t
+unicode_casemap_uppercase_cp(const struct unicode_code_point_data *cp_data,
+ const uint32_t **map_r)
+{
+ return unicode_code_point_data_get_uppercase_mapping(cp_data, map_r);
+}
+
+static size_t
+unicode_casemap_lowercase_cp(const struct unicode_code_point_data *cp_data,
+ const uint32_t **map_r)
+{
+ return unicode_code_point_data_get_lowercase_mapping(cp_data, map_r);
+}
+
+static size_t
+unicode_casemap_casefold_cp(const struct unicode_code_point_data *cp_data,
+ const uint32_t **map_r)
+{
+ return unicode_code_point_data_get_casefold_mapping(cp_data, map_r);
+}
+
+static ssize_t
+unicode_casemap_input_cp(struct unicode_casemap *map, uint32_t cp,
+ const struct unicode_code_point_data *cp_data,
+ const char **error_r)
+{
+ bool was_buffered = map->cp_buffered;
+ ssize_t sret;
+
+ if (cp_data == NULL)
+ cp_data = unicode_code_point_get_data(cp);
+
+ const uint32_t *map_cps;
+ const struct unicode_code_point_data *const *map_cps_data = NULL;
+ size_t map_cps_len;
+
+ map_cps_len = map->map(cp_data, &map_cps);
+ if (map_cps_len == 0) {
+ map_cps = &cp;
+ map_cps_data = &cp_data;
+ map_cps_len = 1;
+ }
+ i_assert(map_cps_len > map->cp_map_pos);
+
+ map_cps += map->cp_map_pos;
+ map_cps_len -= map->cp_map_pos;
+ sret = uniform_transform_forward(&map->transform,
+ map_cps, map_cps_data, map_cps_len,
+ error_r);
+ if (sret < 0) {
+ i_assert(*error_r != NULL);
+ return -1;
+ }
+ if ((size_t)sret < map_cps_len) {
+ map->cp_buffered = TRUE;
+ map->cp = cp;
+ map->cp_data = cp_data;
+ map->cp_map_pos += sret;
+ return (was_buffered ? 0 : 1);
+ }
+
+ map->cp_buffered = FALSE;
+ map->cp_data = NULL;
+ map->cp_map_pos = 0;
+ return 1;
+}
+
+static ssize_t
+unicode_casemap_input(struct unicode_transform *trans,
+ const struct unicode_transform_buffer *buf,
+ const char **error_r)
+{
+ struct unicode_casemap *map =
+ container_of(trans, struct unicode_casemap, transform);
+ int ret;
+
+ ret = unicode_casemap_flush(trans, TRUE, error_r);
+ if (ret < 0) {
+ i_assert(*error_r != NULL);
+ return -1;
+ }
+ if (map->cp_buffered)
+ return 0;
+
+ size_t n;
+ for (n = 0; n < buf->cp_count; n++) {
+ if (map->cp_buffered)
+ break;
+ ret = unicode_casemap_input_cp(map, buf->cp[n],
+ (buf->cp_data != NULL ?
+ buf->cp_data[n] : NULL),
+ error_r);
+ if (ret < 0) {
+ i_assert(*error_r != NULL);
+ return -1;
+ }
+ if (ret == 0)
+ break;
+ }
+ return n;
+}
+
+static int
+unicode_casemap_flush(struct unicode_transform *trans,
+ bool finished ATTR_UNUSED, const char **error_r)
+{
+ struct unicode_casemap *map =
+ container_of(trans, struct unicode_casemap, transform);
+ int ret;
+
+ if (!map->cp_buffered)
+ return 1;
+
+ ret = unicode_casemap_input_cp(map, map->cp, map->cp_data, error_r);
+ i_assert(ret >= 0 || *error_r != NULL);
+ return ret;
+}
+
/*
* RFC 5051 - Simple Unicode Collation Algorithm
*/
const struct unicode_code_point_data **cp_data);
int unicode_nf_checker_finish(struct unicode_nf_checker *unc);
+/*
+ * Casemap Transform
+ */
+
+struct unicode_casemap {
+ struct unicode_transform transform;
+
+ size_t (*map)(const struct unicode_code_point_data *cp_data,
+ const uint32_t **map_r);
+
+ uint32_t cp;
+ const struct unicode_code_point_data *cp_data;
+ unsigned int cp_map_pos;
+
+ bool cp_buffered:1;
+};
+
+void unicode_casemap_init_uppercase(struct unicode_casemap *map);
+void unicode_casemap_init_lowercase(struct unicode_casemap *map);
+void unicode_casemap_init_casefold(struct unicode_casemap *map);
+
/*
* RFC 5051 - Simple Unicode Collation Algorithm
*/