From 49ae6e798310e5c4b96709db435a3714ea6468a8 Mon Sep 17 00:00:00 2001
From: Aki Tuomi <aki.tuomi@open-xchange.com>
Date: Wed, 23 Jul 2025 13:06:43 +0300
Subject: [PATCH] lib-regex: Add regex matching library

---
 configure.ac               |   1 +
 src/Makefile.am            |   1 +
 src/lib-regex/Makefile.am  |  41 +++
 src/lib-regex/dregex.h     | 157 ++++++++++++
 src/lib-regex/empty.c      |   0
 src/lib-regex/regex.c      | 497 +++++++++++++++++++++++++++++++++++++
 src/lib-regex/test-regex.c | 306 +++++++++++++++++++++++
 src/lib/lib-event.c        |   2 +
 8 files changed, 1005 insertions(+)
 create mode 100644 src/lib-regex/Makefile.am
 create mode 100644 src/lib-regex/dregex.h
 create mode 100644 src/lib-regex/empty.c
 create mode 100644 src/lib-regex/regex.c
 create mode 100644 src/lib-regex/test-regex.c

diff --git a/configure.ac b/configure.ac
index 73c2fa4970..c113617f5e 100644
--- a/configure.ac
+++ b/configure.ac
@@ -856,6 +856,7 @@ src/lib-settings/Makefile
 src/lib-smtp/Makefile
 src/lib-ssl-iostream/Makefile
 src/lib-test/Makefile
+src/lib-regex/Makefile
 src/lib-storage/Makefile
 src/lib-storage/list/Makefile
 src/lib-storage/index/Makefile
diff --git a/src/Makefile.am b/src/Makefile.am
index c7532536f5..85064133ae 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -44,6 +44,7 @@ SUBDIRS = \
 	lib-dovecot \
 	$(LIB_LDAP) \
 	$(LIB_LUA) \
+	lib-regex \
 	lib-language \
 	lib-imap-client \
 	lib-imap-urlauth \
diff --git a/src/lib-regex/Makefile.am b/src/lib-regex/Makefile.am
new file mode 100644
index 0000000000..17ce730a1d
--- /dev/null
+++ b/src/lib-regex/Makefile.am
@@ -0,0 +1,41 @@
+AM_CPPFLAGS = \
+	-I$(top_srcdir)/src/lib \
+	-I$(top_srcdir)/src/lib-test \
+	-I$(top_srcdir)/src/lib-dict \
+	-I$(top_srcdir)/src/lib-doveadm \
+	-I$(top_srcdir)/src/lib-dns-client \
+	-I$(top_srcdir)/src/lib-http \
+	-I$(top_srcdir)/src/lib-ssl-iostream \
+	-I$(top_srcdir)/src/lib-settings \
+	-I$(top_srcdir)/src/lib-master \
+	-I$(top_srcdir)/src/lib-var-expand \
+	$(LIBPCRE_CFLAGS)
+
+headers = \
+	dregex.h
+
+pkginc_libdir=$(pkgincludedir)
+pkginc_lib_HEADERS = $(headers)
+
+noinst_LTLIBRARIES=libdregex.la
+
+if BUILD_LIBREGEX
+libdregex_la_SOURCES = regex.c
+libdregex_la_LIBADD = $(LIBPCRE_LIBS)
+
+EXTRA_DIST =
+
+test_programs = test-regex
+
+noinst_PROGRAMS = $(test_programs)
+
+test_regex_SOURCES = test-regex.c
+test_regex_LDADD = libdregex.la \
+		   ../lib-test/libtest.la \
+		   ../lib/liblib.la \
+		   $(LIBPCRE_LIBS)
+test_regex_DEPENDENCIES = libdregex.la $(LIBPCRE_LIBS)
+
+else
+libdregex_la_SOURCES = empty.c
+endif
diff --git a/src/lib-regex/dregex.h b/src/lib-regex/dregex.h
new file mode 100644
index 0000000000..a40ab5cfb5
--- /dev/null
+++ b/src/lib-regex/dregex.h
@@ -0,0 +1,157 @@
+#ifndef DREGEX_H
+#define DREGEX_H 1
+
+enum dregex_flags {
+	/* Match only at the first position */
+	DREGEX_ANCHORED = BIT(0),
+	/* Do not create automatic capture groups */
+	DREGEX_NOSUB = BIT(1),
+	/* Case insensitive matching */
+	DREGEX_ICASE = BIT(2),
+	/*  ^ and $ match newlines within data */
+	DREGEX_NEWLINE = BIT(3),
+	/* Subject string is not the beginning of a line */
+	DREGEX_NOTBOL = BIT(4),
+	/* Subject string is not the end of a line */
+	DREGEX_NOTEOL = BIT(5),
+	/* Reject non-ascii strings */
+	DREGEX_ASCII_ONLY = BIT(6),
+	/* Extended regular expression, skip whitespace and ignore comments,
+	 * see https://www.pcre.org/current/doc/html/pcre2api.html */
+	DREGEX_EXTENDED = BIT(7),
+	/* Skip empty match groups */
+	DREGEX_NO_EMPTY_SUB = BIT(8),
+
+	/* Perform global replace */
+	DREGEX_REPLACE_ALL = BIT(9),
+	/* Replacement string is literal */
+	DREGEX_REPLACE_LITERAL = BIT(10),
+};
+
+struct dregex_params {
+	unsigned int max_cpu_seconds; /* maximum execution time, 1s default */
+	unsigned int max_capture_groups; /* maximum number of capture groups, 100 default */
+	unsigned int max_depth; /* maximum stack depth, 100 default */
+};
+
+/* Matches the given regular expression pattern against the subject string.
+ *
+ * Both pattern and subject are converted to UCS4 internally, making this UTF-8 safe.
+ *
+ * Returns:
+ *  - -1 on error (with error_r optionally set to an error message)
+ *  -  0 if the pattern does not match
+ *  -  1 if the pattern matches
+ */
+int dregex_match(const char *pattern, const char *subject, enum dregex_flags flags,
+		 const char **error_r);
+
+int dregex_match_groups(const char *pattern, const char *subject, enum dregex_flags flags,
+			ARRAY_TYPE(const_string) *groups_r, const char **error_r);
+
+/* Performs a regular expression-based substitution on the subject string.
+ * Replaces matches of 'pattern' with 'replace' and stores the result in *result_r.
+ *
+ * Both pattern, subject and replace are converted to UCS4 internally, making this UTF-8 safe.
+ * Result will be allocated from the datastack pool.
+ *
+ * Returns:
+ *  - -1 on error (with error_r optionally set to an error message)
+ *  -  0 if no substitution was performed (no match)
+ *  -  1 if substitution was successful
+ */
+int dregex_replace(const char *pattern, const char *subject, const char *replace,
+		   string_t *result_r, enum dregex_flags flags,
+		   const char **error_r);
+
+struct dregex_code;
+
+/* Creates a new regular expression context. This context
+ * can be reused by calling code_compile again, which will
+ * clear the old pattern.
+*/
+struct dregex_code *dregex_code_create(void);
+struct dregex_code *dregex_code_create_params(const struct dregex_params *params);
+
+/* Frees the regular expression context. */
+void dregex_code_free(struct dregex_code **_code);
+
+/* Compiles the given pattern into reusable code.
+ *
+ * Pattern is converted to UCS4 internally, making this UTF-8 safe.
+ */
+int dregex_code_compile(struct dregex_code *code, const char *pattern,
+			enum dregex_flags flags, const char **error_r);
+
+/* Exports the compiled pattern into the given buffer. */
+void dregex_code_export(const struct dregex_code *code, buffer_t *buffer);
+
+/* Imports a compiled pattern from the given buffer. */
+int dregex_code_import(struct dregex_code *code, const buffer_t *buffer,
+		       const char **error_r);
+
+/* Executes regex matching with capture groups using precompiled code.
+ * Same as dregex_match_groups().
+ *
+ * Subject is converted to UCS4 internally, making this UTF-8 safe.
+ *
+ * Groups are converted from UCS4 to UTF-8 internally.
+ */
+int dregex_code_match_groups(struct dregex_code *code, const char *subject,
+			     ARRAY_TYPE(const_string) *groups_r, const char **error_r);
+
+/* Executes regex matching using precompiled code.
+ * Same as dregex_match().
+ *
+ * Subject is converted to UCS4 internally, making this UTF-8 safe.
+ */
+int dregex_code_match(struct dregex_code *code, const char *subject,
+		      const char **error_r);
+
+/* Performs regex replacement using precompiled code, starting at given offset.
+ * Same as dregex_replace().
+ *
+ * Subject and replacement are converted to UCS4 internally, making this UTF-8 safe.
+ * Result will be allocated from the datastack pool.
+ */
+int dregex_code_replace_full(struct dregex_code *code,
+			     const char *subject, size_t startoffset,
+			     const char *replacement,
+			     string_t *result_r, enum dregex_flags flags,
+			     const char **error_r);
+
+/* Performs regex replacement using precompiled code.
+ * Same as dregex_replace().
+ *
+ * Subject is converted to UCS4 internally, making this UTF-8 safe.
+ * Result will be allocated from the datastack pool.
+ */
+int dregex_code_replace(struct dregex_code *code,
+			const char *subject, const char *replacement,
+			string_t *result_r, enum dregex_flags flags,
+			const char **error_r);
+
+#ifndef HAVE_LIBPCRE
+#  define NO_DREGEX_SUPPORT "Missing regular expression support"
+#  define NO_DREGEX_SUPPORT_CODE(error_r) \
+	({STMT_START { *(error_r) = NO_DREGEX_SUPPORT;} STMT_END; -1;})
+#  define dregex_match(pattern, subject, flags, error_r) NO_DREGEX_SUPPORT_CODE(error_r)
+#  define dregex_match_groups(pattern, subject, flags, groups_r, error_r) \
+	NO_DREGEX_SUPPORT_CODE(error_r)
+#  define dregex_replace(pattern, subject, replace, result_r, \
+			flags, error_r) NO_DREGEX_SUPPORT_CODE(error_r)
+#  define dregex_code_create() ({ NULL; })
+#  define dregex_code_free(code)
+#  define dregex_code_compile(code, pattern, flags, error_r) NO_DREGEX_SUPPORT_CODE(error_r)
+#  define dregex_code_export(code, buffer)
+#  define dregex_code_import(code, buffer, error_r) NO_DREGEX_SUPPORT_CODE(error_r)
+#  define dregex_code_match_groups(code, subject, groups_r, error_r) \
+	NO_DREGEX_SUPPORT_CODE(error_r)
+#  define dregex_code_match(code, subject, error_r) NO_DREGEX_SUPPORT_CODE(error_r)
+#  define dregex_code_replace_full(code, subject, startoffset, replacement, result_r, \
+				  flags, error_r) NO_DREGEX_SUPPORT_CODE(error_r)
+#  define dregex_code_replace(code, subject, replacement, result_r, flags, error_r) \
+	NO_DREGEX_SUPPORT_CODE(error_r)
+#endif
+
+#endif
diff --git a/src/lib-regex/empty.c b/src/lib-regex/empty.c
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/src/lib-regex/regex.c b/src/lib-regex/regex.c
new file mode 100644
index 0000000000..b8be11afa1
--- /dev/null
+++ b/src/lib-regex/regex.c
@@ -0,0 +1,497 @@
+/* Copyright (C) 2025 Dovecot authors, see the included COPYING file */
+
+#include "lib.h"
+#include "array.h"
+#include "buffer.h"
+#include "cpu-limit.h"
+#include "str.h"
+#include "unichar.h"
+#include "dregex.h"
+
+#ifdef HAVE_LIBPCRE
+
+#define PCRE2_CODE_UNIT_WIDTH 32
+#include "pcre2.h"
+
+#define DREGEX_MAX_DEPTH 100
+#define DREGEX_MAX_MATCHES 100
+#define DREGEX_MAX_CPU_SECONDS 1
+
+struct dregex_code {
+	pool_t pool;
+
+	pcre2_compile_context *cctx;
+	pcre2_general_context *gctx;
+	pcre2_match_context *mctx;
+	pcre2_code *pat;
+
+	struct cpu_limit *climit;
+
+	unsigned int max_depth;
+	unsigned int max_cpu_seconds;
+	unsigned int max_capture_groups;
+
+	enum dregex_flags flags;
+};
+
+static void *dregex_code_int_malloc(size_t amt, void *_ctx)
+{
+	struct dregex_code *ctx = _ctx;
+	return p_malloc(ctx->pool, amt);
+}
+
+static void dregex_code_int_free(void *ptr, void *_ctx)
+{
+	struct dregex_code *ctx = _ctx;
+	p_free(ctx->pool, ptr);
+}
+
+static int dregex_code_callout(pcre2_callout_block *block ATTR_UNUSED, void *ctx)
+{
+	struct dregex_code *code = ctx;
+	if (cpu_limit_exceeded(code->climit))
+		return PCRE2_ERROR_PATTERN_TOO_COMPLICATED;
+	return 0;
+}
+
+static int
+dregex_code_substitute_callout(pcre2_substitute_callout_block *block ATTR_UNUSED, void *ctx)
+{
+	return dregex_code_callout(NULL, ctx);
+}
+
+static int dregex_code_guard(uint depth, void *ctx)
+{
+	struct dregex_code *code = ctx;
+	if (code->max_depth < depth)
+		return PCRE2_ERROR_DEPTHLIMIT;
+	return 0;
+}
+
+static void dregex_code_init(struct dregex_code *code)
+{
+	code->gctx = pcre2_general_context_create(dregex_code_int_malloc,
+						  dregex_code_int_free, code);
+	code->cctx = pcre2_compile_context_create(code->gctx);
+	code->mctx = pcre2_match_context_create(code->gctx);
+
+	pcre2_set_compile_recursion_guard(code->cctx, dregex_code_guard, code);
+	/* these are used to ensure that CPU time isn't exceeded */
+	pcre2_set_callout(code->mctx, dregex_code_callout, code);
+	pcre2_set_substitute_callout(code->mctx, dregex_code_substitute_callout, code);
+
+	/* Set some limits */
+	pcre2_set_match_limit(code->mctx, code->max_capture_groups);
+	pcre2_set_depth_limit(code->mctx, code->max_depth);
+}
+
+struct dregex_code *dregex_code_create_params(const struct dregex_params *params)
+{
+	pool_t pool = pool_allocfree_create("regex pool");
+	struct dregex_code *code = p_new(pool, struct dregex_code, 1);
+	code->pool = pool;
+	code->max_capture_groups = params->max_capture_groups;
+	code->max_cpu_seconds = params->max_cpu_seconds;
+	code->max_depth = params->max_depth;
+	dregex_code_init(code);
+	return code;
+}
+
+static const struct dregex_params default_params = {
+	.max_depth = DREGEX_MAX_DEPTH,
+	.max_cpu_seconds = DREGEX_MAX_CPU_SECONDS,
+	.max_capture_groups = DREGEX_MAX_MATCHES,
+};
+
+struct dregex_code *dregex_code_create(void)
+{
+	struct dregex_code *code = dregex_code_create_params(&default_params);
+	dregex_code_init(code);
+	return code;
+}
+
+static const PCRE2_SPTR empty_str = U"";
+
+/* Convert input into unichars */
+static int convert_to_sptr(const char *input, PCRE2_SPTR *out_r, PCRE2_SIZE *len_r,
+			   bool refuse_non_ascii)
+{
+	if (*input == '\0') {
+		*len_r = 0;
+		*out_r = empty_str;
+	}
+	ARRAY_TYPE(unichars) chars;
+	t_array_init(&chars, 128);
+	if (refuse_non_ascii) {
+		/* treat everything as ascii */
+		for (; *input != '\0'; input++) {
+			unichar_t chr = (unsigned char)*input;
+			array_push_back(&chars, &chr);
+		}
+	} else if (uni_utf8_to_ucs4(input, &chars) < 0)
+		return -1;
+	*len_r = array_count(&chars);
+	if (*len_r == 0)
+		*out_r = empty_str;
+	else
+		*out_r = array_idx(&chars, 0);
+	return 0;
+}
+
+/* Handle error */
+static int handle_error(int ret, const char *func, const char **error_r)
+{
+	PCRE2_UCHAR buf[256];
+	if (ret == PCRE2_ERROR_NOMEMORY)
+		i_fatal_status(FATAL_OUTOFMEM, "%s(): Out of memory", func);
+	int rc = pcre2_get_error_message(ret, buf, sizeof(buf));
+		/* Ignore, the error didn't fit to buffer */
+	if (rc == PCRE2_ERROR_BADDATA) {
+		*error_r = t_strdup_printf("Unknown error %d occured", ret);
+	} else if (rc < 0) {
+		*error_r = t_strdup_printf("Unknown error %d occured while handling %d",
+					   rc, ret);
+	} else {
+		/* we are ignoring PCRE2_ERROR_NOMEMORY here because it
+		 * likely means the output did not fit in 256 characters. */
+		buffer_t *output = t_buffer_create(rc);
+		uni_ucs4_to_utf8(buf, rc, output);
+		*error_r = str_c(output);
+	}
+	return -1;
+}
+#define handle_error(ret, error_r) handle_error((ret), __func__, (error_r))
+
+int dregex_code_compile(struct dregex_code *code, const char *pattern,
+			enum dregex_flags flags, const char **error_r)
+{
+	i_assert(code != NULL);
+	i_assert(pattern != NULL);
+	int errcode;
+	PCRE2_SIZE erroffset;
+
+	if (code->pat != NULL) {
+		pcre2_code_free(code->pat);
+		code->pat = NULL;
+		code->flags = 0;
+	}
+
+	uint options = PCRE2_AUTO_CALLOUT |
+		PCRE2_NEVER_BACKSLASH_C | PCRE2_NO_UTF_CHECK;
+
+	if (HAS_ALL_BITS(flags, DREGEX_ICASE))
+		options |= PCRE2_CASELESS;
+	if (HAS_ALL_BITS(flags, DREGEX_NOSUB))
+		options |= PCRE2_NO_AUTO_CAPTURE;
+	if (HAS_ALL_BITS(flags, DREGEX_NEWLINE))
+		options |= PCRE2_MULTILINE;
+	if (HAS_ALL_BITS(flags, DREGEX_ANCHORED))
+		options |= PCRE2_ANCHORED;
+	if (HAS_ALL_BITS(flags, DREGEX_EXTENDED))
+		options |= PCRE2_EXTENDED;
+
+	bool refuse_non_ascii = HAS_ALL_BITS(flags, DREGEX_ASCII_ONLY);
+	code->flags = flags;
+
+	/* Use Unicode properties for character matching */
+	if (!refuse_non_ascii)
+		options |= (PCRE2_UCP | PCRE2_UTF);
+	else
+		options |= PCRE2_NEVER_UTF;
+
+	T_BEGIN {
+		PCRE2_SIZE slen;
+		PCRE2_SPTR32 pattern32;
+		if (convert_to_sptr(pattern, &pattern32, &slen, refuse_non_ascii) < 0) {
+			errcode = PCRE2_ERROR_BADDATA;
+			code->pat = NULL;
+		} else {
+			code->pat = pcre2_compile(pattern32, slen, options, &errcode,
+						  &erroffset, code->cctx);
+		}
+	} T_END;
+
+	i_assert(code->pat != NULL || errcode != 0);
+
+	if (code->pat == NULL)
+		return handle_error(errcode, error_r);
+
+	return 0;
+}
+
+void dregex_code_export(const struct dregex_code *code, buffer_t *buffer)
+{
+	PCRE2_SIZE size;
+	uint8_t *bytes;
+
+	const pcre2_code *codes[] = {
+		code->pat,
+	};
+
+	int ret = pcre2_serialize_encode(codes, N_ELEMENTS(codes), &bytes,
+					 &size, code->gctx);
+	if (ret < 0) {
+		const char *error;
+		(void)handle_error(ret, &error);
+		i_panic("BUG: dregex_code_export(): %s", error);
+	}
+
+	/* There must be only one pattern */
+	i_assert(ret == 1);
+
+	buffer_append(buffer, bytes, size);
+	pcre2_serialize_free(bytes);
+}
+
+int dregex_code_import(struct dregex_code *code, const buffer_t *buffer,
+		       const char **error_r)
+{
+	int ret = pcre2_serialize_decode(&code->pat, 1, buffer->data,
+					 code->gctx);
+	if (ret < 0)
+		return handle_error(ret, error_r);
+	i_assert(ret > 0);
+
+	return 0;
+}
+
+static const char *empty_match_str = "";
+
+static void extract_matches(uint count, pcre2_match_data *mdata,
+			    bool skip_empty, ARRAY_TYPE(const_string) *groups_r)
+{
+	/* we don't actually want matches */
+	if (groups_r == NULL)
+		return;
+	for (uint i = 0; i < count; i++) {
+		PCRE2_UCHAR32 *buf;
+		PCRE2_SIZE bsize;
+		int rc = pcre2_substring_length_bynumber(mdata, i, &bsize);
+		if (rc == PCRE2_ERROR_NOSUBSTRING)
+			break;
+		else if (rc == PCRE2_ERROR_UNSET) {
+			if (!skip_empty)
+				array_push_back(groups_r, &empty_match_str);
+			continue;
+		} else if (rc == PCRE2_ERROR_UNAVAILABLE)
+			continue;
+		pcre2_substring_get_bynumber(mdata, i, &buf, &bsize);
+		buffer_t *output = t_buffer_create(bsize);
+		uni_ucs4_to_utf8(buf, bsize, output);
+		const char *substr = str_c(output);
+		array_push_back(groups_r, &substr);
+	}
+}
+
+static int dregex_code_match_int(struct dregex_code *code, const char *subject,
+				 pcre2_match_data *mdata, const char **error_r)
+{
+	i_assert(code != NULL);
+	i_assert(code->pat != NULL);
+	i_assert(subject != NULL);
+
+	PCRE2_SIZE slen;
+	PCRE2_SPTR subject32;
+
+	bool refuse_non_ascii = HAS_ALL_BITS(code->flags, DREGEX_ASCII_ONLY);
+	if (convert_to_sptr(subject, &subject32, &slen, refuse_non_ascii) < 0)
+		return handle_error(PCRE2_ERROR_BADDATA, error_r);
+
+	/* Empty string is not a match */
+	uint options = PCRE2_NOTEMPTY;
+
+	if (HAS_ALL_BITS(code->flags, DREGEX_NOTBOL))
+		options |= PCRE2_NOTBOL;
+	if (HAS_ALL_BITS(code->flags, DREGEX_NOTEOL))
+		options |= PCRE2_NOTEOL;
+	if (HAS_ALL_BITS(code->flags, DREGEX_ANCHORED))
+		options |= PCRE2_ANCHORED;
+
+	code->climit = cpu_limit_init(code->max_cpu_seconds, CPU_LIMIT_TYPE_ALL);
+	int ret = pcre2_match(code->pat, subject32, slen, 0, options,
+			      mdata, code->mctx);
+	cpu_limit_deinit(&code->climit);
+
+	if (ret == PCRE2_ERROR_NOMATCH) {
+		/* did not match */
+		ret = 0;
+	} else if (ret < 0) {
+		return handle_error(ret, error_r);
+	}
+
+	return ret;
+}
+
+int dregex_code_match_groups(struct dregex_code *code, const char *subject,
+			     ARRAY_TYPE(const_string) *groups_r, const char **error_r)
+{
+	i_assert(code != NULL);
+	i_assert(code->pat != NULL);
+	int ret;
+
+	T_BEGIN {
+		pcre2_match_data *mdata =
+			pcre2_match_data_create_from_pattern(code->pat, code->gctx);
+		ret = dregex_code_match_int(code, subject, mdata, error_r);
+		if (ret > 1) {
+			bool skip_empty = HAS_ALL_BITS(code->flags, DREGEX_NO_EMPTY_SUB);
+			/* ret is number of groups */
+			extract_matches((uint32_t)ret, mdata, skip_empty, groups_r);
+			ret = 1;
+		}
+	} T_END_PASS_STR_IF(ret < 0, error_r);
+	return ret;
+}
+
+int dregex_code_match(struct dregex_code *code, const char *subject,
+		      const char **error_r)
+{
+	return dregex_code_match_groups(code, subject, NULL, error_r);
+}
+
+int dregex_code_replace_full(struct dregex_code *code,
+			     const char *subject, size_t startoffset,
+			     const char *replacement, string_t *result_r,
+			     enum dregex_flags flags, const char **error_r)
+{
+	i_assert(code != NULL);
+	i_assert(code->pat != NULL);
+	i_assert(subject != NULL);
+	i_assert(replacement != NULL);
+	i_assert(result_r != NULL);
+
+	uint options = PCRE2_NOTEMPTY;
+	if (HAS_ALL_BITS(flags, PCRE2_ANCHORED))
+		options |= PCRE2_ANCHORED;
+	if (HAS_ALL_BITS(flags, DREGEX_REPLACE_ALL))
+		options |= PCRE2_SUBSTITUTE_GLOBAL;
+	if (HAS_ALL_BITS(flags, DREGEX_REPLACE_LITERAL))
+		options |= PCRE2_SUBSTITUTE_LITERAL;
+
+	PCRE2_UCHAR *result32 = U"";
+	PCRE2_SIZE result_len = 0;
+
+	int ret;
+	bool refuse_non_ascii = HAS_ALL_BITS(flags, DREGEX_ASCII_ONLY) ||
+				HAS_ALL_BITS(code->flags, DREGEX_ASCII_ONLY);
+
+	T_BEGIN do {
+		PCRE2_SIZE slen;
+		PCRE2_SPTR subject32;
+		PCRE2_SIZE rlen;
+		PCRE2_SPTR replacement32;
+
+		if (convert_to_sptr(subject, &subject32, &slen, refuse_non_ascii) < 0 ||
+		    convert_to_sptr(replacement, &replacement32, &rlen, refuse_non_ascii) < 0) {
+			ret = PCRE2_ERROR_BADDATA;
+			break;
+		}
+
+		pcre2_match_data *mdata =
+			pcre2_match_data_create_from_pattern(code->pat, code->gctx);
+
+		code->climit = cpu_limit_init(code->max_cpu_seconds,
+					      CPU_LIMIT_TYPE_ALL);
+		ret = pcre2_substitute(code->pat, subject32, slen, startoffset,
+				       options|PCRE2_SUBSTITUTE_OVERFLOW_LENGTH,
+				       mdata, code->mctx, replacement32, rlen,
+				       result32, &result_len);
+		cpu_limit_deinit(&code->climit);
+		/* Ignore NOMEMORY error here, it's because we asked how long
+		   the result would be. */
+		if (ret != PCRE2_ERROR_NOMEMORY && ret < 0) {
+			pcre2_match_data_free(mdata);
+			break;
+		}
+
+		if (result_len > 0)
+			result32 = t_new(PCRE2_UCHAR, result_len);
+
+		/* Run it again as we know the buffer size now */
+		code->climit = cpu_limit_init(code->max_cpu_seconds,
+					      CPU_LIMIT_TYPE_ALL);
+		ret = pcre2_substitute(code->pat, subject32, slen, startoffset, options,
+				       mdata, code->mctx, replacement32, rlen,
+				       result32, &result_len);
+		cpu_limit_deinit(&code->climit);
+		pcre2_match_data_free(mdata);
+	} while(0); T_END;
+
+	if (ret < 0)
+		return handle_error(ret, error_r);
+	else if (ret > 0)
+		uni_ucs4_to_utf8(result32, result_len, result_r);
+
+	return ret > 0 ? 1 : 0;
+}
+
+int dregex_code_replace(struct dregex_code *code, const char *subject,
+			const char *replacement, string_t *result_r,
+			enum dregex_flags flags, const char **error_r)
+{
+	return dregex_code_replace_full(code, subject, 0, replacement, result_r,
+					flags, error_r);
+}
+
+void dregex_code_free(struct dregex_code **_code)
+{
+	struct dregex_code *code = *_code;
+	*_code = NULL;
+	if (code == NULL)
+		return;
+
+	if (code->pat != NULL)
+		pcre2_code_free(code->pat);
+	pcre2_match_context_free(code->mctx);
+	pcre2_compile_context_free(code->cctx);
+	pcre2_general_context_free(code->gctx);
+	pool_unref(&code->pool);
+}
+
+int dregex_match_groups(const char *pattern, const char *subject, enum dregex_flags flags,
+			ARRAY_TYPE(const_string) *groups_r, const char **error_r)
+{
+	struct dregex_code *code = dregex_code_create();
+	int ret;
+
+	T_BEGIN {
+		if (dregex_code_compile(code, pattern, flags, error_r) < 0)
+			ret = -1;
+		else {
+			ret = dregex_code_match_groups(code, subject, groups_r,
+						       error_r);
+		}
+	} T_END_PASS_STR_IF(ret < 0, error_r);
+	dregex_code_free(&code);
+
+	return ret;
+}
+
+int dregex_match(const char *pattern, const char *subject, enum dregex_flags flags,
+		 const char **error_r)
+{
+	return dregex_match_groups(pattern, subject, flags, NULL, error_r);
+}
+
+int dregex_replace(const char *pattern, const char *subject, const char *replace,
+		   string_t *result_r, enum dregex_flags flags,
+		   const char **error_r)
+{
+	struct dregex_code *code = dregex_code_create();
+	int ret;
+
+	T_BEGIN {
+		ret = dregex_code_compile(code, pattern, flags, error_r);
+	} T_END_PASS_STR_IF(ret < 0, error_r);
+
+	if (ret >= 0) {
+		ret = dregex_code_replace(code, subject, replace, result_r,
+					  flags, error_r);
+	}
+
+	dregex_code_free(&code);
+
+	return ret;
+}
+
+#endif
diff --git a/src/lib-regex/test-regex.c b/src/lib-regex/test-regex.c
new file mode 100644
index 0000000000..ef2d3bd8cf
--- /dev/null
+++ b/src/lib-regex/test-regex.c
@@ -0,0 +1,306 @@
+/* Copyright (C) 2025 Dovecot authors, see the included COPYING file */
+
+#include "lib.h"
+#include "test-common.h"
+#include "array.h"
+#include "str.h"
+#include "dregex.h"
+
+#ifdef HAVE_LIBPCRE
+
+static const bool debug = FALSE;
+
+struct test_case {
+	const char *subject;
+	const char *pattern;
+	const char *replacement;
+	const char *result;
+	const char *error;
+	enum dregex_flags flags;
+	int compile_ret;
+	int match_ret;
+};
+
+static void run_match_tests(const struct test_case *cases)
+{
+	unsigned int idx;
+	struct dregex_code *code = dregex_code_create();
+
+	for(idx = 0; cases[idx].pattern != NULL; idx++) {
+		const char *error = NULL;
+		const struct test_case *test = &cases[idx];
+
+		if (debug) {
+			i_debug("pattern = %s, subject = %s", test->pattern,
+				test->subject);
+		}
+
+		/* compile pattern */
+		int ret = dregex_code_compile(code, test->pattern, test->flags,
+					     &error);
+		test_assert_cmp_idx(test->compile_ret, ==, ret, idx);
+		if (test->compile_ret < 0) {
+			test_assert_strcmp_idx(test->error, error, idx);
+			continue;
+		} else if (ret < 0) {
+			error = t_strdup_printf("Unexpected error: %s", error);
+			test_assert_failed_idx(error, __FILE__, __LINE__, idx);
+			continue;
+		}
+
+		ret = dregex_code_match(code, test->subject, &error);
+
+		test_assert_cmp_idx(test->match_ret, ==, ret, idx);
+		if (test->match_ret < 0)
+			test_assert_strcmp_idx(test->error, error, idx);
+		else if (ret < 0) {
+			error = t_strdup_printf("Unexpected error: %s", error);
+			test_assert_failed_idx(error, __FILE__, __LINE__, idx);
+			continue;
+		}
+	}
+	dregex_code_free(&code);
+}
+
+#define MATCH_CASE_FULL(pat, sub, err, cret, mret) \
+	{ \
+		.pattern = (pat), \
+		.subject = (sub), \
+		.replacement = NULL, \
+		.result = NULL, \
+		.error = (err), \
+		.compile_ret = (cret), \
+		.match_ret = (mret) \
+	}
+#define MATCH_CASE(pattern, subject) MATCH_CASE_FULL(pattern, subject, NULL, 0, 1)
+#define MATCH_CASE_END { .pattern = NULL }
+
+#define STR(x) x
+#define REP(x) STR(x) STR(x) STR(x) STR(x) STR(x) STR(x) STR(x) STR(x) STR(x) STR(x)
+#define REP10(x) REP(x) REP(x) REP(x) REP(x) REP(x) REP(x) REP(x) REP(x) REP(x) REP(x)
+
+static void test_dregex_match(void)
+{
+	const struct test_case cases[] = {
+		/* simple test case */
+		MATCH_CASE(".*", "hello world"),
+		/* .* matches empty string */
+		MATCH_CASE_FULL(".*", "", NULL, 0, 0),
+		/* but empty string does not match empty string */
+		MATCH_CASE_FULL("", "", NULL, 0, 0),
+		/* Match any single character except newline. */
+		MATCH_CASE(".", "a"),
+		MATCH_CASE_FULL(".", "\n", NULL, 0, 0),
+		/* Bracket expression.  Match any one of the enclosed
+		   characters.  A hypen (-) indicates a	range of
+		   consecutive characters. */
+		MATCH_CASE("[a-z]", "a"),
+		MATCH_CASE_FULL("[a-z]", "A", NULL, 0, 0),
+		/* Negated bracket expression. */
+		MATCH_CASE("[^a-z]", "A"),
+		MATCH_CASE_FULL("[^a-z]", "a", NULL, 0, 0),
+		/* Character class */
+		MATCH_CASE("^[[:alnum:]]+$", "abc123"),
+		MATCH_CASE_FULL("^[[^:alnum:]]+$", "abc123", NULL, 0, 0),
+		/* Unicode properties */
+		MATCH_CASE("^\\p{L}$", "\xc3\xab"),
+		MATCH_CASE("^\\pL$", "\xc3\xab"),
+		/* Quantifiers */
+		MATCH_CASE("^.$", "h"),
+		MATCH_CASE("^.{2}$", "he"),
+		MATCH_CASE("^.{2,3}$", "he"),
+		MATCH_CASE("^.{2,3}$", "hel"),
+		MATCH_CASE("^.+$", "hello"),
+		MATCH_CASE_FULL("^.+$", "", NULL, 0, 0),
+		/* Alternation and grouping */
+		MATCH_CASE("^(hello|world)$", "hello"),
+		MATCH_CASE("^(hello|world)$", "world"),
+		MATCH_CASE_FULL("^(hello|world)$", "hi", NULL, 0, 0),
+		/* test that we can find 'mojiretsu' (test string) from
+		  'Kore wa tesuto mojiretsudesu.' (this is a test string) */
+		MATCH_CASE(
+			"\xe6\x96\x87\xe5\xad\x97\xe5\x88\x97",
+			"\xe3\x81\x93\xe3\x82\x8c\xe3\x81\xaf\xe3\x83\x86\xe3"
+			"\x82\xb9\xe3\x83\x88\xe6\x96\x87\xe5\xad\x97\xe5\x88"
+			"\x97\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82"
+		),
+		/* test that we can find <U+1F60A> from <U+1F600><U+1F60A> */
+		MATCH_CASE("\xef\x85\xa0""A", "\xef\x85\xa0""0\xef\x85\xa0""A"),
+		/* binary matching */
+		{
+			.pattern = "\xef\x85\xa0""A",
+			.subject = "\xef\x85\xa0""0\xef\x85\xa0""A",
+			.error = "",
+			.flags = DREGEX_ASCII_ONLY,
+			.compile_ret = 0,
+			.match_ret = 1,
+		},
+		{
+			.pattern = ".*",
+			.subject = "\xef\x85\xa0""0\xef\x85\xa0""A",
+			.error = "",
+			.flags = DREGEX_ASCII_ONLY,
+			.compile_ret = 0,
+			.match_ret = 1,
+		},
+		/* invalid utf-8 */
+		MATCH_CASE_FULL(".*", "\xc2\xc2", "bad data value", 0, -1),
+		/* two evil patterns */
+		MATCH_CASE_FULL(
+			"^([a-zA-Z0-9])(([\\-.]|[_]+)?([a-zA-Z0-9]+))*(@)"
+			"{1}[a-z0-9]+[.]{1}(([a-z]{2,3})|([a-z]{2,3}[.]"
+			"{1}[a-z]{2,3}))$",
+			"thisisabstractly.andtotally.long.email@"
+			REP10("a") "." REP10("a") "." REP10("a")
+			".has",
+			"match limit exceeded",
+			0,
+			-1
+		),
+		MATCH_CASE_FULL(
+			"(a|a?)+",
+			REP10("a") REP10("a"),
+			"match limit exceeded",
+			0,
+			-1
+		),
+		/* IEEE.1003-2.1992 */
+		MATCH_CASE("me(\\+.*)?@company\\.com",
+			"me+hello@company.com"),
+		MATCH_CASE("^[^[:lower:]]+$", "HELLO"),
+		MATCH_CASE_FULL(
+			"^[^[:lower:]]+$",
+			"hello",
+			NULL,
+			0,
+			0
+		),
+		MATCH_CASE("<(.*)@", "<simple-list@test.invalid>"),
+		MATCH_CASE("^\\[(.*)\\] (.*)$", "[acme-users] [fwd]: hello, world"),
+		MATCH_CASE_END
+	};
+
+	test_begin("matching");
+
+	run_match_tests(cases);
+
+	test_end();
+}
+
+static void run_replace_tests(const struct test_case *cases)
+{
+	unsigned int idx;
+	struct dregex_code *code = dregex_code_create();
+	string_t *dest = t_str_new(32);
+
+	for(idx = 0; cases[idx].pattern != NULL; idx++) {
+		const char *error = NULL;
+		const struct test_case *test = &cases[idx];
+		str_truncate(dest, 0);
+
+		if (debug) {
+			i_debug("pattern = %s, subject = %s, "
+				"replacement = %s, result = %s",
+				test->pattern, test->subject,
+				test->replacement, test->result);
+		}
+
+		/* compile pattern */
+		int ret = dregex_code_compile(code, test->pattern, test->flags,
+					     &error);
+		test_assert_cmp_idx(test->compile_ret, ==, ret, idx);
+		if (test->compile_ret < 0) {
+			test_assert_strcmp_idx(test->error, error, idx);
+			continue;
+		} else if (ret < 0) {
+			error = t_strdup_printf("Unexpected error: %s", error);
+			test_assert_failed_idx(error, __FILE__, __LINE__, idx);
+			continue;
+		}
+
+		ret = dregex_code_replace(code, test->subject, test->replacement,
+				         dest, test->flags, &error);
+
+		test_assert_cmp_idx(test->match_ret, ==, ret, idx);
+		if (test->match_ret < 0) {
+			test_assert_strcmp_idx(test->error, error, idx);
+			continue;
+		} else if (ret < 0) {
+			error = t_strdup_printf("Unexpected error: %s", error);
+			test_assert_failed_idx(error, __FILE__, __LINE__, idx);
+			continue;
+		}
+		test_assert_strcmp_idx(test->result, str_c(dest), idx);
+	}
+	dregex_code_free(&code);
+}
+
+#define REP_CASE_FULL(pat, sub, rep, res, err, cret, mret) \
+	{ \
+		.pattern = (pat), \
+		.subject = (sub), \
+		.replacement = (rep), \
+		.result = (res), \
+		.error = (err), \
+		.compile_ret = (cret), \
+		.match_ret = (mret) \
+	}
+#define REP_CASE(pattern, subject, replacement, result) \
+	REP_CASE_FULL(pattern, subject, replacement, result, NULL, 0, 1)
+#define REP_CASE_END { .pattern = NULL }
+
+static void test_dregex_replace(void)
+{
+	const struct test_case cases[] = {
+		/* simple replacement */
+		REP_CASE(".*", "hello world", "world hello", "world hello"),
+		/* simple swap */
+		REP_CASE("(.*) (.*)", "hello world", "$2 $1", "world hello"),
+		/* partial replace */
+		REP_CASE("hello .*", "hello world", "$0", "hello world"),
+		/* simple utf-8 test,
+		 * '<U+1F600> <U+1F60A>' to '<U+1F60A> <U+1F600>' */
+		REP_CASE(
+			"(.*) (.*)",
+			"\xef\x85\xa0""0 \xef\x85\xa0""A",
+			"$2 $1",
+			"\xef\x85\xa0""A \xef\x85\xa0""0"
+		),
+		/* Invalid back reference */
+		REP_CASE_FULL(
+			"hello .*",
+			"hello world",
+			"$5",
+			"",
+			"unknown substring",
+			0,
+			-1
+		),
+		REP_CASE_END
+	};
+
+	test_begin("replacing");
+
+	run_replace_tests(cases);
+
+	test_end();
+}
+
+int main(void)
+{
+	void (*const tests[])(void) = {
+		test_dregex_match,
+		test_dregex_replace,
+		NULL
+	};
+
+	return test_run(tests);
+}
+
+#else
+
+int main(void) {
+	return 0;
+}
+
+#endif
diff --git a/src/lib/lib-event.c b/src/lib/lib-event.c
index cc1b7d71e5..1d5220222e 100644
--- a/src/lib/lib-event.c
+++ b/src/lib/lib-event.c
@@ -133,6 +133,8 @@ event_call_callbacks(struct event *event, enum event_callback_type type,
 
 	if (event->disable_callbacks)
 		return TRUE;
+	if (!array_is_created(&event_handlers))
+		return TRUE;
 
 	array_foreach_elem(&event_handlers, callback) {
 		bool ret;
-- 
2.47.3