src/lib-smtp/Makefile
src/lib-ssl-iostream/Makefile
src/lib-test/Makefile
+src/lib-regex/Makefile
src/lib-storage/Makefile
src/lib-storage/list/Makefile
src/lib-storage/index/Makefile
lib-dovecot \
$(LIB_LDAP) \
$(LIB_LUA) \
+ lib-regex \
lib-language \
lib-imap-client \
lib-imap-urlauth \
--- /dev/null
+AM_CPPFLAGS = \
+ -I$(top_srcdir)/src/lib \
+ -I$(top_srcdir)/src/lib-test \
+ -I$(top_srcdir)/src/lib-dict \
+ -I$(top_srcdir)/src/lib-doveadm \
+ -I$(top_srcdir)/src/lib-dns-client \
+ -I$(top_srcdir)/src/lib-http \
+ -I$(top_srcdir)/src/lib-ssl-iostream \
+ -I$(top_srcdir)/src/lib-settings \
+ -I$(top_srcdir)/src/lib-master \
+ -I$(top_srcdir)/src/lib-var-expand \
+ $(LIBPCRE_CFLAGS)
+
+headers = \
+ dregex.h
+
+pkginc_libdir=$(pkgincludedir)
+pkginc_lib_HEADERS = $(headers)
+
+noinst_LTLIBRARIES=libdregex.la
+
+if BUILD_LIBREGEX
+libdregex_la_SOURCES = regex.c
+libdregex_la_LIBADD = $(LIBPCRE_LIBS)
+
+EXTRA_DIST =
+
+test_programs = test-regex
+
+noinst_PROGRAMS = $(test_programs)
+
+test_regex_SOURCES = test-regex.c
+test_regex_LDADD = libdregex.la \
+ ../lib-test/libtest.la \
+ ../lib/liblib.la \
+ $(LIBPCRE_LIBS)
+test_regex_DEPENDENCIES = libdregex.la $(LIBPCRE_LIBS)
+
+else
+libdregex_la_SOURCES = empty.c
+endif
--- /dev/null
+#ifndef DREGEX_H
+#define DREGEX_H 1
+
+enum dregex_flags {
+ /* Match only at the first position */
+ DREGEX_ANCHORED = BIT(0),
+ /* Do not create automatic capture groups */
+ DREGEX_NOSUB = BIT(1),
+ /* Case insensitive matching */
+ DREGEX_ICASE = BIT(2),
+ /* ^ and $ match newlines within data */
+ DREGEX_NEWLINE = BIT(3),
+ /* Subject string is not the beginning of a line */
+ DREGEX_NOTBOL = BIT(4),
+ /* Subject string is not the end of a line */
+ DREGEX_NOTEOL = BIT(5),
+ /* Reject non-ascii strings */
+ DREGEX_ASCII_ONLY = BIT(6),
+ /* Extended regular expression, skip whitespace and ignore comments,
+ * see https://www.pcre.org/current/doc/html/pcre2api.html */
+ DREGEX_EXTENDED = BIT(7),
+ /* Skip empty match groups */
+ DREGEX_NO_EMPTY_SUB = BIT(8),
+
+ /* Perform global replace */
+ DREGEX_REPLACE_ALL = BIT(9),
+ /* Replacement string is literal */
+ DREGEX_REPLACE_LITERAL = BIT(10),
+};
+
+struct dregex_params {
+ unsigned int max_cpu_seconds; /* maximum execution time, 1s default */
+ unsigned int max_capture_groups; /* maximum number of capture groups, 100 default */
+ unsigned int max_depth; /* maximum stack depth, 100 default */
+};
+
+/* Matches the given regular expression pattern against the subject string.
+ *
+ * Both pattern and subject are converted to UCS4 internally, making this UTF-8 safe.
+ *
+ * Returns:
+ * - -1 on error (with error_r optionally set to an error message)
+ * - 0 if the pattern does not match
+ * - 1 if the pattern matches
+ */
+int dregex_match(const char *pattern, const char *subject, enum dregex_flags flags,
+ const char **error_r);
+
+int dregex_match_groups(const char *pattern, const char *subject, enum dregex_flags flags,
+ ARRAY_TYPE(const_string) *groups_r, const char **error_r);
+
+/* Performs a regular expression-based substitution on the subject string.
+ * Replaces matches of 'pattern' with 'replace' and stores the result in *result_r.
+ *
+ * Both pattern, subject and replace are converted to UCS4 internally, making this UTF-8 safe.
+ * Result will be allocated from the datastack pool.
+ *
+ * Returns:
+ * - -1 on error (with error_r optionally set to an error message)
+ * - 0 if no substitution was performed (no match)
+ * - 1 if substitution was successful
+ */
+int dregex_replace(const char *pattern, const char *subject, const char *replace,
+ string_t *result_r, enum dregex_flags flags,
+ const char **error_r);
+
+struct dregex_code;
+
+/* Creates a new regular expression context. This context
+ * can be reused by calling code_compile again, which will
+ * clear the old pattern.
+*/
+struct dregex_code *dregex_code_create(void);
+struct dregex_code *dregex_code_create_params(const struct dregex_params *params);
+
+/* Frees the regular expression context. */
+void dregex_code_free(struct dregex_code **_code);
+
+/* Compiles the given pattern into reusable code.
+ *
+ * Pattern is converted to UCS4 internally, making this UTF-8 safe.
+ */
+int dregex_code_compile(struct dregex_code *code, const char *pattern,
+ enum dregex_flags flags, const char **error_r);
+
+/* Exports the compiled pattern into the given buffer. */
+void dregex_code_export(const struct dregex_code *code, buffer_t *buffer);
+
+/* Imports a compiled pattern from the given buffer. */
+int dregex_code_import(struct dregex_code *code, const buffer_t *buffer,
+ const char **error_r);
+
+/* Executes regex matching with capture groups using precompiled code.
+ * Same as dregex_match_groups().
+ *
+ * Subject is converted to UCS4 internally, making this UTF-8 safe.
+ *
+ * Groups are converted from UCS4 to UTF-8 internally.
+ */
+int dregex_code_match_groups(struct dregex_code *code, const char *subject,
+ ARRAY_TYPE(const_string) *groups_r, const char **error_r);
+
+/* Executes regex matching using precompiled code.
+ * Same as dregex_match().
+ *
+ * Subject is converted to UCS4 internally, making this UTF-8 safe.
+ */
+int dregex_code_match(struct dregex_code *code, const char *subject,
+ const char **error_r);
+
+/* Performs regex replacement using precompiled code, starting at given offset.
+ * Same as dregex_replace().
+ *
+ * Subject and replacement are converted to UCS4 internally, making this UTF-8 safe.
+ * Result will be allocated from the datastack pool.
+ */
+int dregex_code_replace_full(struct dregex_code *code,
+ const char *subject, size_t startoffset,
+ const char *replacement,
+ string_t *result_r, enum dregex_flags flags,
+ const char **error_r);
+
+/* Performs regex replacement using precompiled code.
+ * Same as dregex_replace().
+ *
+ * Subject is converted to UCS4 internally, making this UTF-8 safe.
+ * Result will be allocated from the datastack pool.
+ */
+int dregex_code_replace(struct dregex_code *code,
+ const char *subject, const char *replacement,
+ string_t *result_r, enum dregex_flags flags,
+ const char **error_r);
+
+#ifndef HAVE_LIBPCRE
+# define NO_DREGEX_SUPPORT "Missing regular expression support"
+# define NO_DREGEX_SUPPORT_CODE(error_r) \
+ ({STMT_START { *(error_r) = NO_DREGEX_SUPPORT;} STMT_END; -1;})
+# define dregex_match(pattern, subject, flags, error_r) NO_DREGEX_SUPPORT_CODE(error_r)
+# define dregex_match_groups(pattern, subject, flags, groups_r, error_r) \
+ NO_DREGEX_SUPPORT_CODE(error_r)
+# define dregex_replace(pattern, subject, replace, result_r, \
+ flags, error_r) NO_DREGEX_SUPPORT_CODE(error_r)
+# define dregex_code_create() ({ NULL; })
+# define dregex_code_free(code)
+# define dregex_code_compile(code, pattern, flags, error_r) NO_DREGEX_SUPPORT_CODE(error_r)
+# define dregex_code_export(code, buffer)
+# define dregex_code_import(code, buffer, error_r) NO_DREGEX_SUPPORT_CODE(error_r)
+# define dregex_code_match_groups(code, subject, groups_r, error_r) \
+ NO_DREGEX_SUPPORT_CODE(error_r)
+# define dregex_code_match(code, subject, error_r) NO_DREGEX_SUPPORT_CODE(error_r)
+# define dregex_code_replace_full(code, subject, startoffset, replacement, result_r, \
+ flags, error_r) NO_DREGEX_SUPPORT_CODE(error_r)
+# define dregex_code_replace(code, subject, replacement, result_r, flags, error_r) \
+ NO_DREGEX_SUPPORT_CODE(error_r)
+#endif
+
+#endif
--- /dev/null
+/* Copyright (C) 2025 Dovecot authors, see the included COPYING file */
+
+#include "lib.h"
+#include "array.h"
+#include "buffer.h"
+#include "cpu-limit.h"
+#include "str.h"
+#include "unichar.h"
+#include "dregex.h"
+
+#ifdef HAVE_LIBPCRE
+
+#define PCRE2_CODE_UNIT_WIDTH 32
+#include "pcre2.h"
+
+#define DREGEX_MAX_DEPTH 100
+#define DREGEX_MAX_MATCHES 100
+#define DREGEX_MAX_CPU_SECONDS 1
+
+struct dregex_code {
+ pool_t pool;
+
+ pcre2_compile_context *cctx;
+ pcre2_general_context *gctx;
+ pcre2_match_context *mctx;
+ pcre2_code *pat;
+
+ struct cpu_limit *climit;
+
+ unsigned int max_depth;
+ unsigned int max_cpu_seconds;
+ unsigned int max_capture_groups;
+
+ enum dregex_flags flags;
+};
+
+static void *dregex_code_int_malloc(size_t amt, void *_ctx)
+{
+ struct dregex_code *ctx = _ctx;
+ return p_malloc(ctx->pool, amt);
+}
+
+static void dregex_code_int_free(void *ptr, void *_ctx)
+{
+ struct dregex_code *ctx = _ctx;
+ p_free(ctx->pool, ptr);
+}
+
+static int dregex_code_callout(pcre2_callout_block *block ATTR_UNUSED, void *ctx)
+{
+ struct dregex_code *code = ctx;
+ if (cpu_limit_exceeded(code->climit))
+ return PCRE2_ERROR_PATTERN_TOO_COMPLICATED;
+ return 0;
+}
+
+static int
+dregex_code_substitute_callout(pcre2_substitute_callout_block *block ATTR_UNUSED, void *ctx)
+{
+ return dregex_code_callout(NULL, ctx);
+}
+
+static int dregex_code_guard(uint depth, void *ctx)
+{
+ struct dregex_code *code = ctx;
+ if (code->max_depth < depth)
+ return PCRE2_ERROR_DEPTHLIMIT;
+ return 0;
+}
+
+static void dregex_code_init(struct dregex_code *code)
+{
+ code->gctx = pcre2_general_context_create(dregex_code_int_malloc,
+ dregex_code_int_free, code);
+ code->cctx = pcre2_compile_context_create(code->gctx);
+ code->mctx = pcre2_match_context_create(code->gctx);
+
+ pcre2_set_compile_recursion_guard(code->cctx, dregex_code_guard, code);
+ /* these are used to ensure that CPU time isn't exceeded */
+ pcre2_set_callout(code->mctx, dregex_code_callout, code);
+ pcre2_set_substitute_callout(code->mctx, dregex_code_substitute_callout, code);
+
+ /* Set some limits */
+ pcre2_set_match_limit(code->mctx, code->max_capture_groups);
+ pcre2_set_depth_limit(code->mctx, code->max_depth);
+}
+
+struct dregex_code *dregex_code_create_params(const struct dregex_params *params)
+{
+ pool_t pool = pool_allocfree_create("regex pool");
+ struct dregex_code *code = p_new(pool, struct dregex_code, 1);
+ code->pool = pool;
+ code->max_capture_groups = params->max_capture_groups;
+ code->max_cpu_seconds = params->max_cpu_seconds;
+ code->max_depth = params->max_depth;
+ dregex_code_init(code);
+ return code;
+}
+
+static const struct dregex_params default_params = {
+ .max_depth = DREGEX_MAX_DEPTH,
+ .max_cpu_seconds = DREGEX_MAX_CPU_SECONDS,
+ .max_capture_groups = DREGEX_MAX_MATCHES,
+};
+
+struct dregex_code *dregex_code_create(void)
+{
+ struct dregex_code *code = dregex_code_create_params(&default_params);
+ dregex_code_init(code);
+ return code;
+}
+
+static const PCRE2_SPTR empty_str = U"";
+
+/* Convert input into unichars */
+static int convert_to_sptr(const char *input, PCRE2_SPTR *out_r, PCRE2_SIZE *len_r,
+ bool refuse_non_ascii)
+{
+ if (*input == '\0') {
+ *len_r = 0;
+ *out_r = empty_str;
+ }
+ ARRAY_TYPE(unichars) chars;
+ t_array_init(&chars, 128);
+ if (refuse_non_ascii) {
+ /* treat everything as ascii */
+ for (; *input != '\0'; input++) {
+ unichar_t chr = (unsigned char)*input;
+ array_push_back(&chars, &chr);
+ }
+ } else if (uni_utf8_to_ucs4(input, &chars) < 0)
+ return -1;
+ *len_r = array_count(&chars);
+ if (*len_r == 0)
+ *out_r = empty_str;
+ else
+ *out_r = array_idx(&chars, 0);
+ return 0;
+}
+
+/* Handle error */
+static int handle_error(int ret, const char *func, const char **error_r)
+{
+ PCRE2_UCHAR buf[256];
+ if (ret == PCRE2_ERROR_NOMEMORY)
+ i_fatal_status(FATAL_OUTOFMEM, "%s(): Out of memory", func);
+ int rc = pcre2_get_error_message(ret, buf, sizeof(buf));
+ /* Ignore, the error didn't fit to buffer */
+ if (rc == PCRE2_ERROR_BADDATA) {
+ *error_r = t_strdup_printf("Unknown error %d occured", ret);
+ } else if (rc < 0) {
+ *error_r = t_strdup_printf("Unknown error %d occured while handling %d",
+ rc, ret);
+ } else {
+ /* we are ignoring PCRE2_ERROR_NOMEMORY here because it
+ * likely means the output did not fit in 256 characters. */
+ buffer_t *output = t_buffer_create(rc);
+ uni_ucs4_to_utf8(buf, rc, output);
+ *error_r = str_c(output);
+ }
+ return -1;
+}
+#define handle_error(ret, error_r) handle_error((ret), __func__, (error_r))
+
+int dregex_code_compile(struct dregex_code *code, const char *pattern,
+ enum dregex_flags flags, const char **error_r)
+{
+ i_assert(code != NULL);
+ i_assert(pattern != NULL);
+ int errcode;
+ PCRE2_SIZE erroffset;
+
+ if (code->pat != NULL) {
+ pcre2_code_free(code->pat);
+ code->pat = NULL;
+ code->flags = 0;
+ }
+
+ uint options = PCRE2_AUTO_CALLOUT |
+ PCRE2_NEVER_BACKSLASH_C | PCRE2_NO_UTF_CHECK;
+
+ if (HAS_ALL_BITS(flags, DREGEX_ICASE))
+ options |= PCRE2_CASELESS;
+ if (HAS_ALL_BITS(flags, DREGEX_NOSUB))
+ options |= PCRE2_NO_AUTO_CAPTURE;
+ if (HAS_ALL_BITS(flags, DREGEX_NEWLINE))
+ options |= PCRE2_MULTILINE;
+ if (HAS_ALL_BITS(flags, DREGEX_ANCHORED))
+ options |= PCRE2_ANCHORED;
+ if (HAS_ALL_BITS(flags, DREGEX_EXTENDED))
+ options |= PCRE2_EXTENDED;
+
+ bool refuse_non_ascii = HAS_ALL_BITS(flags, DREGEX_ASCII_ONLY);
+ code->flags = flags;
+
+ /* Use Unicode properties for character matching */
+ if (!refuse_non_ascii)
+ options |= (PCRE2_UCP | PCRE2_UTF);
+ else
+ options |= PCRE2_NEVER_UTF;
+
+ T_BEGIN {
+ PCRE2_SIZE slen;
+ PCRE2_SPTR32 pattern32;
+ if (convert_to_sptr(pattern, &pattern32, &slen, refuse_non_ascii) < 0) {
+ errcode = PCRE2_ERROR_BADDATA;
+ code->pat = NULL;
+ } else {
+ code->pat = pcre2_compile(pattern32, slen, options, &errcode,
+ &erroffset, code->cctx);
+ }
+ } T_END;
+
+ i_assert(code->pat != NULL || errcode != 0);
+
+ if (code->pat == NULL)
+ return handle_error(errcode, error_r);
+
+ return 0;
+}
+
+void dregex_code_export(const struct dregex_code *code, buffer_t *buffer)
+{
+ PCRE2_SIZE size;
+ uint8_t *bytes;
+
+ const pcre2_code *codes[] = {
+ code->pat,
+ };
+
+ int ret = pcre2_serialize_encode(codes, N_ELEMENTS(codes), &bytes,
+ &size, code->gctx);
+ if (ret < 0) {
+ const char *error;
+ (void)handle_error(ret, &error);
+ i_panic("BUG: dregex_code_export(): %s", error);
+ }
+
+ /* There must be only one pattern */
+ i_assert(ret == 1);
+
+ buffer_append(buffer, bytes, size);
+ pcre2_serialize_free(bytes);
+}
+
+int dregex_code_import(struct dregex_code *code, const buffer_t *buffer,
+ const char **error_r)
+{
+ int ret = pcre2_serialize_decode(&code->pat, 1, buffer->data,
+ code->gctx);
+ if (ret < 0)
+ return handle_error(ret, error_r);
+ i_assert(ret > 0);
+
+ return 0;
+}
+
+static const char *empty_match_str = "";
+
+static void extract_matches(uint count, pcre2_match_data *mdata,
+ bool skip_empty, ARRAY_TYPE(const_string) *groups_r)
+{
+ /* we don't actually want matches */
+ if (groups_r == NULL)
+ return;
+ for (uint i = 0; i < count; i++) {
+ PCRE2_UCHAR32 *buf;
+ PCRE2_SIZE bsize;
+ int rc = pcre2_substring_length_bynumber(mdata, i, &bsize);
+ if (rc == PCRE2_ERROR_NOSUBSTRING)
+ break;
+ else if (rc == PCRE2_ERROR_UNSET) {
+ if (!skip_empty)
+ array_push_back(groups_r, &empty_match_str);
+ continue;
+ } else if (rc == PCRE2_ERROR_UNAVAILABLE)
+ continue;
+ pcre2_substring_get_bynumber(mdata, i, &buf, &bsize);
+ buffer_t *output = t_buffer_create(bsize);
+ uni_ucs4_to_utf8(buf, bsize, output);
+ const char *substr = str_c(output);
+ array_push_back(groups_r, &substr);
+ }
+}
+
+static int dregex_code_match_int(struct dregex_code *code, const char *subject,
+ pcre2_match_data *mdata, const char **error_r)
+{
+ i_assert(code != NULL);
+ i_assert(code->pat != NULL);
+ i_assert(subject != NULL);
+
+ PCRE2_SIZE slen;
+ PCRE2_SPTR subject32;
+
+ bool refuse_non_ascii = HAS_ALL_BITS(code->flags, DREGEX_ASCII_ONLY);
+ if (convert_to_sptr(subject, &subject32, &slen, refuse_non_ascii) < 0)
+ return handle_error(PCRE2_ERROR_BADDATA, error_r);
+
+ /* Empty string is not a match */
+ uint options = PCRE2_NOTEMPTY;
+
+ if (HAS_ALL_BITS(code->flags, DREGEX_NOTBOL))
+ options |= PCRE2_NOTBOL;
+ if (HAS_ALL_BITS(code->flags, DREGEX_NOTEOL))
+ options |= PCRE2_NOTEOL;
+ if (HAS_ALL_BITS(code->flags, DREGEX_ANCHORED))
+ options |= PCRE2_ANCHORED;
+
+ code->climit = cpu_limit_init(code->max_cpu_seconds, CPU_LIMIT_TYPE_ALL);
+ int ret = pcre2_match(code->pat, subject32, slen, 0, options,
+ mdata, code->mctx);
+ cpu_limit_deinit(&code->climit);
+
+ if (ret == PCRE2_ERROR_NOMATCH) {
+ /* did not match */
+ ret = 0;
+ } else if (ret < 0) {
+ return handle_error(ret, error_r);
+ }
+
+ return ret;
+}
+
+int dregex_code_match_groups(struct dregex_code *code, const char *subject,
+ ARRAY_TYPE(const_string) *groups_r, const char **error_r)
+{
+ i_assert(code != NULL);
+ i_assert(code->pat != NULL);
+ int ret;
+
+ T_BEGIN {
+ pcre2_match_data *mdata =
+ pcre2_match_data_create_from_pattern(code->pat, code->gctx);
+ ret = dregex_code_match_int(code, subject, mdata, error_r);
+ if (ret > 1) {
+ bool skip_empty = HAS_ALL_BITS(code->flags, DREGEX_NO_EMPTY_SUB);
+ /* ret is number of groups */
+ extract_matches((uint32_t)ret, mdata, skip_empty, groups_r);
+ ret = 1;
+ }
+ } T_END_PASS_STR_IF(ret < 0, error_r);
+ return ret;
+}
+
+int dregex_code_match(struct dregex_code *code, const char *subject,
+ const char **error_r)
+{
+ return dregex_code_match_groups(code, subject, NULL, error_r);
+}
+
+int dregex_code_replace_full(struct dregex_code *code,
+ const char *subject, size_t startoffset,
+ const char *replacement, string_t *result_r,
+ enum dregex_flags flags, const char **error_r)
+{
+ i_assert(code != NULL);
+ i_assert(code->pat != NULL);
+ i_assert(subject != NULL);
+ i_assert(replacement != NULL);
+ i_assert(result_r != NULL);
+
+ uint options = PCRE2_NOTEMPTY;
+ if (HAS_ALL_BITS(flags, PCRE2_ANCHORED))
+ options |= PCRE2_ANCHORED;
+ if (HAS_ALL_BITS(flags, DREGEX_REPLACE_ALL))
+ options |= PCRE2_SUBSTITUTE_GLOBAL;
+ if (HAS_ALL_BITS(flags, DREGEX_REPLACE_LITERAL))
+ options |= PCRE2_SUBSTITUTE_LITERAL;
+
+ PCRE2_UCHAR *result32 = U"";
+ PCRE2_SIZE result_len = 0;
+
+ int ret;
+ bool refuse_non_ascii = HAS_ALL_BITS(flags, DREGEX_ASCII_ONLY) ||
+ HAS_ALL_BITS(code->flags, DREGEX_ASCII_ONLY);
+
+ T_BEGIN do {
+ PCRE2_SIZE slen;
+ PCRE2_SPTR subject32;
+ PCRE2_SIZE rlen;
+ PCRE2_SPTR replacement32;
+
+ if (convert_to_sptr(subject, &subject32, &slen, refuse_non_ascii) < 0 ||
+ convert_to_sptr(replacement, &replacement32, &rlen, refuse_non_ascii) < 0) {
+ ret = PCRE2_ERROR_BADDATA;
+ break;
+ }
+
+ pcre2_match_data *mdata =
+ pcre2_match_data_create_from_pattern(code->pat, code->gctx);
+
+ code->climit = cpu_limit_init(code->max_cpu_seconds,
+ CPU_LIMIT_TYPE_ALL);
+ ret = pcre2_substitute(code->pat, subject32, slen, startoffset,
+ options|PCRE2_SUBSTITUTE_OVERFLOW_LENGTH,
+ mdata, code->mctx, replacement32, rlen,
+ result32, &result_len);
+ cpu_limit_deinit(&code->climit);
+ /* Ignore NOMEMORY error here, it's because we asked how long
+ the result would be. */
+ if (ret != PCRE2_ERROR_NOMEMORY && ret < 0) {
+ pcre2_match_data_free(mdata);
+ break;
+ }
+
+ if (result_len > 0)
+ result32 = t_new(PCRE2_UCHAR, result_len);
+
+ /* Run it again as we know the buffer size now */
+ code->climit = cpu_limit_init(code->max_cpu_seconds,
+ CPU_LIMIT_TYPE_ALL);
+ ret = pcre2_substitute(code->pat, subject32, slen, startoffset, options,
+ mdata, code->mctx, replacement32, rlen,
+ result32, &result_len);
+ cpu_limit_deinit(&code->climit);
+ pcre2_match_data_free(mdata);
+ } while(0); T_END;
+
+ if (ret < 0)
+ return handle_error(ret, error_r);
+ else if (ret > 0)
+ uni_ucs4_to_utf8(result32, result_len, result_r);
+
+ return ret > 0 ? 1 : 0;
+}
+
+int dregex_code_replace(struct dregex_code *code, const char *subject,
+ const char *replacement, string_t *result_r,
+ enum dregex_flags flags, const char **error_r)
+{
+ return dregex_code_replace_full(code, subject, 0, replacement, result_r,
+ flags, error_r);
+}
+
+void dregex_code_free(struct dregex_code **_code)
+{
+ struct dregex_code *code = *_code;
+ *_code = NULL;
+ if (code == NULL)
+ return;
+
+ if (code->pat != NULL)
+ pcre2_code_free(code->pat);
+ pcre2_match_context_free(code->mctx);
+ pcre2_compile_context_free(code->cctx);
+ pcre2_general_context_free(code->gctx);
+ pool_unref(&code->pool);
+}
+
+int dregex_match_groups(const char *pattern, const char *subject, enum dregex_flags flags,
+ ARRAY_TYPE(const_string) *groups_r, const char **error_r)
+{
+ struct dregex_code *code = dregex_code_create();
+ int ret;
+
+ T_BEGIN {
+ if (dregex_code_compile(code, pattern, flags, error_r) < 0)
+ ret = -1;
+ else {
+ ret = dregex_code_match_groups(code, subject, groups_r,
+ error_r);
+ }
+ } T_END_PASS_STR_IF(ret < 0, error_r);
+ dregex_code_free(&code);
+
+ return ret;
+}
+
+int dregex_match(const char *pattern, const char *subject, enum dregex_flags flags,
+ const char **error_r)
+{
+ return dregex_match_groups(pattern, subject, flags, NULL, error_r);
+}
+
+int dregex_replace(const char *pattern, const char *subject, const char *replace,
+ string_t *result_r, enum dregex_flags flags,
+ const char **error_r)
+{
+ struct dregex_code *code = dregex_code_create();
+ int ret;
+
+ T_BEGIN {
+ ret = dregex_code_compile(code, pattern, flags, error_r);
+ } T_END_PASS_STR_IF(ret < 0, error_r);
+
+ if (ret >= 0) {
+ ret = dregex_code_replace(code, subject, replace, result_r,
+ flags, error_r);
+ }
+
+ dregex_code_free(&code);
+
+ return ret;
+}
+
+#endif
--- /dev/null
+/* Copyright (C) 2025 Dovecot authors, see the included COPYING file */
+
+#include "lib.h"
+#include "test-common.h"
+#include "array.h"
+#include "str.h"
+#include "dregex.h"
+
+#ifdef HAVE_LIBPCRE
+
+static const bool debug = FALSE;
+
+struct test_case {
+ const char *subject;
+ const char *pattern;
+ const char *replacement;
+ const char *result;
+ const char *error;
+ enum dregex_flags flags;
+ int compile_ret;
+ int match_ret;
+};
+
+static void run_match_tests(const struct test_case *cases)
+{
+ unsigned int idx;
+ struct dregex_code *code = dregex_code_create();
+
+ for(idx = 0; cases[idx].pattern != NULL; idx++) {
+ const char *error = NULL;
+ const struct test_case *test = &cases[idx];
+
+ if (debug) {
+ i_debug("pattern = %s, subject = %s", test->pattern,
+ test->subject);
+ }
+
+ /* compile pattern */
+ int ret = dregex_code_compile(code, test->pattern, test->flags,
+ &error);
+ test_assert_cmp_idx(test->compile_ret, ==, ret, idx);
+ if (test->compile_ret < 0) {
+ test_assert_strcmp_idx(test->error, error, idx);
+ continue;
+ } else if (ret < 0) {
+ error = t_strdup_printf("Unexpected error: %s", error);
+ test_assert_failed_idx(error, __FILE__, __LINE__, idx);
+ continue;
+ }
+
+ ret = dregex_code_match(code, test->subject, &error);
+
+ test_assert_cmp_idx(test->match_ret, ==, ret, idx);
+ if (test->match_ret < 0)
+ test_assert_strcmp_idx(test->error, error, idx);
+ else if (ret < 0) {
+ error = t_strdup_printf("Unexpected error: %s", error);
+ test_assert_failed_idx(error, __FILE__, __LINE__, idx);
+ continue;
+ }
+ }
+ dregex_code_free(&code);
+}
+
+#define MATCH_CASE_FULL(pat, sub, err, cret, mret) \
+ { \
+ .pattern = (pat), \
+ .subject = (sub), \
+ .replacement = NULL, \
+ .result = NULL, \
+ .error = (err), \
+ .compile_ret = (cret), \
+ .match_ret = (mret) \
+ }
+#define MATCH_CASE(pattern, subject) MATCH_CASE_FULL(pattern, subject, NULL, 0, 1)
+#define MATCH_CASE_END { .pattern = NULL }
+
+#define STR(x) x
+#define REP(x) STR(x) STR(x) STR(x) STR(x) STR(x) STR(x) STR(x) STR(x) STR(x) STR(x)
+#define REP10(x) REP(x) REP(x) REP(x) REP(x) REP(x) REP(x) REP(x) REP(x) REP(x) REP(x)
+
+static void test_dregex_match(void)
+{
+ const struct test_case cases[] = {
+ /* simple test case */
+ MATCH_CASE(".*", "hello world"),
+ /* .* matches empty string */
+ MATCH_CASE_FULL(".*", "", NULL, 0, 0),
+ /* but empty string does not match empty string */
+ MATCH_CASE_FULL("", "", NULL, 0, 0),
+ /* Match any single character except newline. */
+ MATCH_CASE(".", "a"),
+ MATCH_CASE_FULL(".", "\n", NULL, 0, 0),
+ /* Bracket expression. Match any one of the enclosed
+ characters. A hypen (-) indicates a range of
+ consecutive characters. */
+ MATCH_CASE("[a-z]", "a"),
+ MATCH_CASE_FULL("[a-z]", "A", NULL, 0, 0),
+ /* Negated bracket expression. */
+ MATCH_CASE("[^a-z]", "A"),
+ MATCH_CASE_FULL("[^a-z]", "a", NULL, 0, 0),
+ /* Character class */
+ MATCH_CASE("^[[:alnum:]]+$", "abc123"),
+ MATCH_CASE_FULL("^[[^:alnum:]]+$", "abc123", NULL, 0, 0),
+ /* Unicode properties */
+ MATCH_CASE("^\\p{L}$", "\xc3\xab"),
+ MATCH_CASE("^\\pL$", "\xc3\xab"),
+ /* Quantifiers */
+ MATCH_CASE("^.$", "h"),
+ MATCH_CASE("^.{2}$", "he"),
+ MATCH_CASE("^.{2,3}$", "he"),
+ MATCH_CASE("^.{2,3}$", "hel"),
+ MATCH_CASE("^.+$", "hello"),
+ MATCH_CASE_FULL("^.+$", "", NULL, 0, 0),
+ /* Alternation and grouping */
+ MATCH_CASE("^(hello|world)$", "hello"),
+ MATCH_CASE("^(hello|world)$", "world"),
+ MATCH_CASE_FULL("^(hello|world)$", "hi", NULL, 0, 0),
+ /* test that we can find 'mojiretsu' (test string) from
+ 'Kore wa tesuto mojiretsudesu.' (this is a test string) */
+ MATCH_CASE(
+ "\xe6\x96\x87\xe5\xad\x97\xe5\x88\x97",
+ "\xe3\x81\x93\xe3\x82\x8c\xe3\x81\xaf\xe3\x83\x86\xe3"
+ "\x82\xb9\xe3\x83\x88\xe6\x96\x87\xe5\xad\x97\xe5\x88"
+ "\x97\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82"
+ ),
+ /* test that we can find <U+1F60A> from <U+1F600><U+1F60A> */
+ MATCH_CASE("\xef\x85\xa0""A", "\xef\x85\xa0""0\xef\x85\xa0""A"),
+ /* binary matching */
+ {
+ .pattern = "\xef\x85\xa0""A",
+ .subject = "\xef\x85\xa0""0\xef\x85\xa0""A",
+ .error = "",
+ .flags = DREGEX_ASCII_ONLY,
+ .compile_ret = 0,
+ .match_ret = 1,
+ },
+ {
+ .pattern = ".*",
+ .subject = "\xef\x85\xa0""0\xef\x85\xa0""A",
+ .error = "",
+ .flags = DREGEX_ASCII_ONLY,
+ .compile_ret = 0,
+ .match_ret = 1,
+ },
+ /* invalid utf-8 */
+ MATCH_CASE_FULL(".*", "\xc2\xc2", "bad data value", 0, -1),
+ /* two evil patterns */
+ MATCH_CASE_FULL(
+ "^([a-zA-Z0-9])(([\\-.]|[_]+)?([a-zA-Z0-9]+))*(@)"
+ "{1}[a-z0-9]+[.]{1}(([a-z]{2,3})|([a-z]{2,3}[.]"
+ "{1}[a-z]{2,3}))$",
+ "thisisabstractly.andtotally.long.email@"
+ REP10("a") "." REP10("a") "." REP10("a")
+ ".has",
+ "match limit exceeded",
+ 0,
+ -1
+ ),
+ MATCH_CASE_FULL(
+ "(a|a?)+",
+ REP10("a") REP10("a"),
+ "match limit exceeded",
+ 0,
+ -1
+ ),
+ /* IEEE.1003-2.1992 */
+ MATCH_CASE("me(\\+.*)?@company\\.com",
+ "me+hello@company.com"),
+ MATCH_CASE("^[^[:lower:]]+$", "HELLO"),
+ MATCH_CASE_FULL(
+ "^[^[:lower:]]+$",
+ "hello",
+ NULL,
+ 0,
+ 0
+ ),
+ MATCH_CASE("<(.*)@", "<simple-list@test.invalid>"),
+ MATCH_CASE("^\\[(.*)\\] (.*)$", "[acme-users] [fwd]: hello, world"),
+ MATCH_CASE_END
+ };
+
+ test_begin("matching");
+
+ run_match_tests(cases);
+
+ test_end();
+}
+
+static void run_replace_tests(const struct test_case *cases)
+{
+ unsigned int idx;
+ struct dregex_code *code = dregex_code_create();
+ string_t *dest = t_str_new(32);
+
+ for(idx = 0; cases[idx].pattern != NULL; idx++) {
+ const char *error = NULL;
+ const struct test_case *test = &cases[idx];
+ str_truncate(dest, 0);
+
+ if (debug) {
+ i_debug("pattern = %s, subject = %s, "
+ "replacement = %s, result = %s",
+ test->pattern, test->subject,
+ test->replacement, test->result);
+ }
+
+ /* compile pattern */
+ int ret = dregex_code_compile(code, test->pattern, test->flags,
+ &error);
+ test_assert_cmp_idx(test->compile_ret, ==, ret, idx);
+ if (test->compile_ret < 0) {
+ test_assert_strcmp_idx(test->error, error, idx);
+ continue;
+ } else if (ret < 0) {
+ error = t_strdup_printf("Unexpected error: %s", error);
+ test_assert_failed_idx(error, __FILE__, __LINE__, idx);
+ continue;
+ }
+
+ ret = dregex_code_replace(code, test->subject, test->replacement,
+ dest, test->flags, &error);
+
+ test_assert_cmp_idx(test->match_ret, ==, ret, idx);
+ if (test->match_ret < 0) {
+ test_assert_strcmp_idx(test->error, error, idx);
+ continue;
+ } else if (ret < 0) {
+ error = t_strdup_printf("Unexpected error: %s", error);
+ test_assert_failed_idx(error, __FILE__, __LINE__, idx);
+ continue;
+ }
+ test_assert_strcmp_idx(test->result, str_c(dest), idx);
+ }
+ dregex_code_free(&code);
+}
+
+#define REP_CASE_FULL(pat, sub, rep, res, err, cret, mret) \
+ { \
+ .pattern = (pat), \
+ .subject = (sub), \
+ .replacement = (rep), \
+ .result = (res), \
+ .error = (err), \
+ .compile_ret = (cret), \
+ .match_ret = (mret) \
+ }
+#define REP_CASE(pattern, subject, replacement, result) \
+ REP_CASE_FULL(pattern, subject, replacement, result, NULL, 0, 1)
+#define REP_CASE_END { .pattern = NULL }
+
+static void test_dregex_replace(void)
+{
+ const struct test_case cases[] = {
+ /* simple replacement */
+ REP_CASE(".*", "hello world", "world hello", "world hello"),
+ /* simple swap */
+ REP_CASE("(.*) (.*)", "hello world", "$2 $1", "world hello"),
+ /* partial replace */
+ REP_CASE("hello .*", "hello world", "$0", "hello world"),
+ /* simple utf-8 test,
+ * '<U+1F600> <U+1F60A>' to '<U+1F60A> <U+1F600>' */
+ REP_CASE(
+ "(.*) (.*)",
+ "\xef\x85\xa0""0 \xef\x85\xa0""A",
+ "$2 $1",
+ "\xef\x85\xa0""A \xef\x85\xa0""0"
+ ),
+ /* Invalid back reference */
+ REP_CASE_FULL(
+ "hello .*",
+ "hello world",
+ "$5",
+ "",
+ "unknown substring",
+ 0,
+ -1
+ ),
+ REP_CASE_END
+ };
+
+ test_begin("replacing");
+
+ run_replace_tests(cases);
+
+ test_end();
+}
+
+int main(void)
+{
+ void (*const tests[])(void) = {
+ test_dregex_match,
+ test_dregex_replace,
+ NULL
+ };
+
+ return test_run(tests);
+}
+
+#else
+
+int main(void) {
+ return 0;
+}
+
+#endif
if (event->disable_callbacks)
return TRUE;
+ if (!array_is_created(&event_handlers))
+ return TRUE;
array_foreach_elem(&event_handlers, callback) {
bool ret;