From: Amos Jeffries Date: Mon, 27 Jul 2015 16:03:11 +0000 (-0700) Subject: Refactor refresh_pattern to using class RegexPattern X-Git-Tag: M-staged-PR71~362^2~11 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=e55f4780178e9b33e571a3d5ecb5d1ec41bbef2e;p=thirdparty%2Fsquid.git Refactor refresh_pattern to using class RegexPattern * re-use class RegexPattern to store refresh_pattern regex field and related case sensitivity flag. * convert class RefreshPattern to MEMPROXY_CLASS. Required to ensure corret RegexPattern construction. * add c_str() method to RegexPattern and hide unencoded pattern member. optimization to avoid ugly 'R->pattern.pattern' code and guarantee const correctness. --- diff --git a/src/RefreshPattern.h b/src/RefreshPattern.h index e835dd6be9..8812253474 100644 --- a/src/RefreshPattern.h +++ b/src/RefreshPattern.h @@ -9,21 +9,48 @@ #ifndef SQUID_REFRESHPATTERN_H_ #define SQUID_REFRESHPATTERN_H_ -#include "compat/GnuRegex.h" +#include "base/RegexPattern.h" -/// a representation of a refresh pattern. Currently a POD. +/// a representation of a refresh pattern. class RefreshPattern { + MEMPROXY_CLASS(RefreshPattern); + public: - const char *pattern; - regex_t compiled_pattern; + + /* + * Defaults: + * MIN NONE + * PCT 20% + * MAX 3 days + */ +#define REFRESH_DEFAULT_MAX static_cast(259200) + + RefreshPattern(const char *aPattern, const decltype(RegexPattern::flags) &reFlags) : + pattern(reFlags, aPattern), + min(0), pct(0.20), max(REFRESH_DEFAULT_MAX), + next(NULL), + max_stale(0) + { + memset(&flags, 0, sizeof(flags)); + } + + ~RefreshPattern() { + while (RefreshPattern *t = next) { + next = t->next; + t->next = nullptr; + delete t; + } + } + // ~RefreshPattern() default destructor is fine + + RegexPattern pattern; time_t min; double pct; time_t max; RefreshPattern *next; struct { - bool icase; bool refresh_ims; bool store_stale; #if USE_HTTP_VIOLATIONS @@ -39,6 +66,8 @@ public: // statistics about how many matches this pattern has had mutable struct stats_ { + stats_() : matchTests(0), matchCount(0) {} + uint64_t matchTests; uint64_t matchCount; // TODO: some stats to indicate how useful/less the flags are would be nice. diff --git a/src/acl/RegexData.cc b/src/acl/RegexData.cc index 3326093f98..5633be9298 100644 --- a/src/acl/RegexData.cc +++ b/src/acl/RegexData.cc @@ -38,7 +38,7 @@ ACLRegexData::match(char const *word) // walk the list of patterns to see if one matches for (auto &i : data) { if (i.match(word)) { - debugs(28, 2, "'" << i.pattern << "' found in '" << word << "'"); + debugs(28, 2, "'" << i.c_str() << "' found in '" << word << "'"); // TODO: old code also popped the pattern to second place of the list // in order to reduce patterns search times. return 1; @@ -66,7 +66,7 @@ ACLRegexData::dump() const flags = i.flags; } - sl.push_back(SBuf(i.pattern)); + sl.push_back(SBuf(i.c_str())); } return sl; @@ -101,7 +101,7 @@ removeUnnecessaryWildcards(char * t) } static bool -compileRE(std::list &curlist, const char * RE, const __decltype(RegexPattern::flags) &flags) +compileRE(std::list &curlist, const char * RE, const decltype(RegexPattern::flags) &flags) { if (RE == NULL || *RE == '\0') return curlist.empty(); // XXX: old code did this. It looks wrong. diff --git a/src/base/RegexPattern.h b/src/base/RegexPattern.h index 83d2837355..7bd9f758b2 100644 --- a/src/base/RegexPattern.h +++ b/src/base/RegexPattern.h @@ -28,13 +28,14 @@ public: RegexPattern(RegexPattern &&) = default; // throws std::regex_error ~RegexPattern(); + const char * c_str() const {return pattern;} bool match(const char *str) const {return std::regex_search(str, regex);} public: std::regex_constants::syntax_option_type flags; - char *pattern; private: + char *pattern; std::regex regex; }; diff --git a/src/cache_cf.cc b/src/cache_cf.cc index 5eafe10450..9b79520cf8 100644 --- a/src/cache_cf.cc +++ b/src/cache_cf.cc @@ -2557,8 +2557,8 @@ dump_refreshpattern(StoreEntry * entry, const char *name, RefreshPattern * head) while (head != NULL) { storeAppendPrintf(entry, "%s%s %s %d %d%% %d", name, - head->flags.icase ? " -i" : null_string, - head->pattern, + head->pattern.flags & std::regex::icase ? " -i" : null_string, + head->pattern.c_str(), (int) head->min / 60, (int) (100.0 * head->pct + 0.5), (int) head->max / 60); @@ -2603,7 +2603,6 @@ static void parse_refreshpattern(RefreshPattern ** head) { char *token; - char *pattern; time_t min = 0; double pct = 0.0; time_t max = 0; @@ -2621,22 +2620,16 @@ parse_refreshpattern(RefreshPattern ** head) int ignore_private = 0; #endif - int i; - RefreshPattern *t; - regex_t comp; - int errcode; - int flags = REG_EXTENDED | REG_NOSUB; + auto flags = std::regex::extended | std::regex::nosubs; if ((token = ConfigParser::RegexPattern()) != NULL) { - if (strcmp(token, "-i") == 0) { - flags |= REG_ICASE; + flags |= std::regex::icase; token = ConfigParser::RegexPattern(); } else if (strcmp(token, "+i") == 0) { - flags &= ~REG_ICASE; + flags &= ~std::regex::icase; token = ConfigParser::RegexPattern(); } - } if (token == NULL) { @@ -2645,9 +2638,9 @@ parse_refreshpattern(RefreshPattern ** head) return; } - pattern = xstrdup(token); + char *pattern = xstrdup(token); - i = GetInteger(); /* token: min */ + int i = GetInteger(); /* token: min */ /* catch negative and insanely huge values close to 32-bit wrap */ if (i < 0) { @@ -2717,27 +2710,23 @@ parse_refreshpattern(RefreshPattern ** head) debugs(22, DBG_CRITICAL, "refreshAddToList: Unknown option '" << pattern << "': " << token); } - if ((errcode = regcomp(&comp, pattern, flags)) != 0) { - char errbuf[256]; - regerror(errcode, &comp, errbuf, sizeof errbuf); + RefreshPattern *t = nullptr; + try { // RegexPattern constructor throws on pattern errors + t = new RefreshPattern(pattern, flags); + + } catch (std::regex_error &e) { debugs(22, DBG_CRITICAL, "" << cfg_filename << " line " << config_lineno << ": " << config_input_line); - debugs(22, DBG_CRITICAL, "refreshAddToList: Invalid regular expression '" << pattern << "': " << errbuf); + debugs(22, DBG_CRITICAL, "ERROR: Invalid regular expression '" << pattern << "': " << e.code()); xfree(pattern); return; } pct = pct < 0.0 ? 0.0 : pct; max = max < 0 ? 0 : max; - t = static_cast(xcalloc(1, sizeof(RefreshPattern))); - t->pattern = (char *) xstrdup(pattern); - t->compiled_pattern = comp; t->min = min; t->pct = pct; t->max = max; - if (flags & REG_ICASE) - t->flags.icase = true; - if (refresh_ims) t->flags.refresh_ims = true; @@ -2774,20 +2763,14 @@ parse_refreshpattern(RefreshPattern ** head) *head = t; - safe_free(pattern); + xfree(pattern); } static void free_refreshpattern(RefreshPattern ** head) { - RefreshPattern *t; - - while ((t = *head) != NULL) { - *head = t->next; - safe_free(t->pattern); - regfree(&t->compiled_pattern); - safe_free(t); - } + delete *head; + *head = nullptr; #if USE_HTTP_VIOLATIONS refresh_nocache_hack = 0; diff --git a/src/refresh.cc b/src/refresh.cc index 2dcbff8b58..b38c6ee264 100644 --- a/src/refresh.cc +++ b/src/refresh.cc @@ -8,10 +8,6 @@ /* DEBUG: section 22 Refresh Calculation */ -#ifndef USE_POSIX_REGEX -#define USE_POSIX_REGEX /* put before includes; always use POSIX */ -#endif - #include "squid.h" #include "HttpHdrCc.h" #include "HttpReply.h" @@ -79,42 +75,28 @@ static struct RefreshCounts { int status[STALE_DEFAULT + 1]; } refreshCounts[rcCount]; -/* - * Defaults: - * MIN NONE - * PCT 20% - * MAX 3 days - */ -#define REFRESH_DEFAULT_MIN (time_t)0 -#define REFRESH_DEFAULT_PCT 0.20 -#define REFRESH_DEFAULT_MAX (time_t)259200 - static const RefreshPattern *refreshUncompiledPattern(const char *); static OBJH refreshStats; static int refreshStaleness(const StoreEntry * entry, time_t check_time, const time_t age, const RefreshPattern * R, stale_flags * sf); -static RefreshPattern DefaultRefresh; +static RefreshPattern DefaultRefresh(".",std::regex_constants::syntax_option_type()); /** Locate the first refresh_pattern rule that matches the given URL by regex. * - * \note regexec() returns 0 if matched, and REG_NOMATCH otherwise - * - * \return A pointer to the refresh_pattern parameters to use, or NULL if there is no match. + * \return A pointer to the refresh_pattern parameters to use, or nullptr if there is no match. */ const RefreshPattern * refreshLimits(const char *url) { - const RefreshPattern *R; - - for (R = Config.Refresh; R; R = R->next) { + for (auto R = Config.Refresh; R; R = R->next) { ++(R->stats.matchTests); - if (!regexec(&(R->compiled_pattern), url, 0, 0, 0)) { + if (R->pattern.match(url)) { ++(R->stats.matchCount); return R; } } - return NULL; + return nullptr; } /** Locate the first refresh_pattern rule that has the given uncompiled regex. @@ -123,19 +105,17 @@ refreshLimits(const char *url) * This function is only ever called if there is no URI. Because a regex match is impossible, Squid * forces the "." rule to apply (if it exists) * - * \return A pointer to the refresh_pattern parameters to use, or NULL if there is no match. + * \return A pointer to the refresh_pattern parameters to use, or nullptr if there is no match. */ static const RefreshPattern * refreshUncompiledPattern(const char *pat) { - const RefreshPattern *R; - - for (R = Config.Refresh; R; R = R->next) { - if (0 == strcmp(R->pattern, pat)) + for (auto R = Config.Refresh; R; R = R->next) { + if (0 == strcmp(R->pattern.c_str(), pat)) return R; } - return NULL; + return nullptr; } /** @@ -318,7 +298,7 @@ refreshCheck(const StoreEntry * entry, HttpRequest * request, time_t delta) if (NULL == R) R = &DefaultRefresh; - debugs(22, 3, "Matched '" << R->pattern << " " << + debugs(22, 3, "Matched '" << R->pattern.c_str() << " " << (int) R->min << " " << (int) (100.0 * R->pct) << "%% " << (int) R->max << "'"); @@ -709,8 +689,8 @@ refreshStats(StoreEntry * sentry) R->stats.matchCount, R->stats.matchTests, xpercent(R->stats.matchCount, R->stats.matchTests), - (R->flags.icase ? "-i " : ""), - R->pattern); + (R->pattern.flags & std::regex::icase ? "-i " : ""), + R->pattern.c_str()); } int i; @@ -762,12 +742,6 @@ refreshInit(void) refreshCounts[rcCDigest].proto = "Cache Digests"; #endif - memset(&DefaultRefresh, '\0', sizeof(DefaultRefresh)); - DefaultRefresh.pattern = ""; - DefaultRefresh.min = REFRESH_DEFAULT_MIN; - DefaultRefresh.pct = REFRESH_DEFAULT_PCT; - DefaultRefresh.max = REFRESH_DEFAULT_MAX; - refreshRegisterWithCacheManager(); }