From: Yann Ylavic Date: Tue, 16 Jan 2024 16:51:03 +0000 (+0000) Subject: regex: Add ap_regexec_ex() which can take a starting offset to match from. X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=e52a206008bdfced5551206d695b3c01b7d1948a;p=thirdparty%2Fapache%2Fhttpd.git regex: Add ap_regexec_ex() which can take a starting offset to match from. * include/ap_mmn.h: Bump MMN minor. * include/ap_regex.h: Declare ap_regexec_ex(). * server/util_pcre.c(ap_regexec, ap_regexec_len, ap_regexec_ex): Reuse existing ap_regexec_len() code to implement ap_regexec_ex() where the offset is given instead of zero, then implement ap_regexec{,len}() in terms of ap_regexec_ex(). git-svn-id: https://svn.apache.org/repos/asf/httpd/httpd/trunk@1915267 13f79535-47bb-0310-9956-ffa450edef68 --- diff --git a/include/ap_mmn.h b/include/ap_mmn.h index 839ef0550d1..ea1ce0c43af 100644 --- a/include/ap_mmn.h +++ b/include/ap_mmn.h @@ -721,6 +721,7 @@ * 20211221.15 (2.5.1-dev) Add ap_get_pollfd_from_conn() * 20211221.16 (2.5.1-dev) Add ap_proxy_determine_address() * 20211221.17 (2.5.1-dev) Add ap_proxy_worker_get_name() + * 20211221.18 (2.5.1-dev) Add ap_regexec_ex() */ #define MODULE_MAGIC_COOKIE 0x41503235UL /* "AP25" */ @@ -728,7 +729,7 @@ #ifndef MODULE_MAGIC_NUMBER_MAJOR #define MODULE_MAGIC_NUMBER_MAJOR 20211221 #endif -#define MODULE_MAGIC_NUMBER_MINOR 17 /* 0...n */ +#define MODULE_MAGIC_NUMBER_MINOR 18 /* 0...n */ /** * Determine if the server's current MODULE_MAGIC_NUMBER is at least a diff --git a/include/ap_regex.h b/include/ap_regex.h index 72c3743e877..f4fc88bf8b3 100644 --- a/include/ap_regex.h +++ b/include/ap_regex.h @@ -172,7 +172,8 @@ AP_DECLARE(int) ap_regcomp(ap_regex_t *preg, const char *regex, int cflags); * @return 0 for successful match, \p AP_REG_NOMATCH otherwise */ AP_DECLARE(int) ap_regexec(const ap_regex_t *preg, const char *string, - apr_size_t nmatch, ap_regmatch_t *pmatch, int eflags); + apr_size_t nmatch, ap_regmatch_t *pmatch, + int eflags); /** * Match a string with given length against a pre-compiled regex. The string @@ -186,10 +187,28 @@ AP_DECLARE(int) ap_regexec(const ap_regex_t *preg, const char *string, * other flags are ignored) * @return 0 for successful match, AP_REG_NOMATCH otherwise */ -AP_DECLARE(int) ap_regexec_len(const ap_regex_t *preg, const char *buff, - apr_size_t len, apr_size_t nmatch, - ap_regmatch_t *pmatch, int eflags); +AP_DECLARE(int) ap_regexec_len(const ap_regex_t *preg, + const char *buff, apr_size_t len, + apr_size_t nmatch, ap_regmatch_t *pmatch, + int eflags); +/** + * Match a string with given length and at a given offset against a + * pre-compiled regex. The string does not need to be NUL-terminated. + * @param preg The pre-compiled regex + * @param buff The string to match + * @param len Length of the string to match + * @param pos Offset in the string where the match should (re)start + * @param nmatch Provide information regarding the location of any matches + * @param pmatch Provide information regarding the location of any matches + * @param eflags Bitwise OR of AP_REG_* flags (NOTBOL and NOTEOL supported, + * other flags are ignored) + * @return 0 for successful match, AP_REG_NOMATCH otherwise + */ +AP_DECLARE(int) ap_regexec_ex(const ap_regex_t *preg, + const char *buff, apr_size_t len, apr_size_t pos, + apr_size_t nmatch, ap_regmatch_t *pmatch, + int eflags); /** * Return the error code returned by regcomp or regexec into error messages * @param errcode the error code returned by regexec or regcomp diff --git a/server/util_pcre.c b/server/util_pcre.c index 488f389ffe4..da85c2da5df 100644 --- a/server/util_pcre.c +++ b/server/util_pcre.c @@ -427,13 +427,22 @@ AP_DECLARE(int) ap_regexec(const ap_regex_t *preg, const char *string, apr_size_t nmatch, ap_regmatch_t *pmatch, int eflags) { - return ap_regexec_len(preg, string, strlen(string), nmatch, pmatch, - eflags); + return ap_regexec_ex(preg, string, strlen(string), 0, + nmatch, pmatch, eflags); } -AP_DECLARE(int) ap_regexec_len(const ap_regex_t *preg, const char *buff, - apr_size_t len, apr_size_t nmatch, - ap_regmatch_t *pmatch, int eflags) +AP_DECLARE(int) ap_regexec_len(const ap_regex_t *preg, + const char *buff, apr_size_t len, + apr_size_t nmatch, ap_regmatch_t *pmatch, + int eflags) +{ + return ap_regexec_ex(preg, buff, len, 0, nmatch, pmatch, eflags); +} + +AP_DECLARE(int) ap_regexec_ex(const ap_regex_t *preg, + const char *buff, apr_size_t len, apr_size_t pos, + apr_size_t nmatch, ap_regmatch_t *pmatch, + int eflags) { int rc; int options = 0; @@ -442,6 +451,11 @@ AP_DECLARE(int) ap_regexec_len(const ap_regex_t *preg, const char *buff, apr_uint32_t ncaps = (apr_uint32_t)preg->re_nsub + 1; #ifndef HAVE_PCRE2 + /* PCRE1 uses ints, reject overflowing values */ + if (len > APR_INT32_MAX || pos > APR_INT32_MAX) { + return AP_REG_INVARG; + } + /* This is fine if pcre_exec() gets a vector size smaller than the * number of capturing groups (it will treat the remaining ones as * non-capturing), but if the vector is too small to keep track of @@ -482,13 +496,14 @@ AP_DECLARE(int) ap_regexec_len(const ap_regex_t *preg, const char *buff, #ifdef HAVE_PCRE2 rc = pcre2_match((const pcre2_code *)preg->re_pcre, - (const unsigned char *)buff, len, 0, options, + (const unsigned char *)buff, len, pos, options, state.match_data, NULL); ovector = pcre2_get_ovector_pointer(state.match_data); #else ovector = state.match_data; - rc = pcre_exec((const pcre *)preg->re_pcre, NULL, buff, (int)len, - 0, options, ovector, ncaps * 3); + rc = pcre_exec((const pcre *)preg->re_pcre, NULL, + buff, (int)len, (int)pos, options, + ovector, ncaps * 3); #endif if (rc >= 0) {