#ifndef SQUID_REFRESHPATTERN_H_
#define SQUID_REFRESHPATTERN_H_
--#include "compat/GnuRegex.h"
++#include "base/RegexPattern.h"
--/// a representation of a refresh pattern. Currently a POD.
++/// a representation of a refresh pattern.
class RefreshPattern
{
++ MEMPROXY_CLASS(RefreshPattern);
++
public:
-- const char *pattern;
-- regex_t compiled_pattern;
++
++ /*
++ * Defaults:
++ * MIN NONE
++ * PCT 20%
++ * MAX 3 days
++ */
++#define REFRESH_DEFAULT_MAX static_cast<time_t>(259200)
++
++ RefreshPattern(const char *aPattern, const decltype(RegexPattern::flags) &reFlags) :
++ pattern(reFlags, aPattern),
++ min(0), pct(0.20), max(REFRESH_DEFAULT_MAX),
++ next(NULL),
++ max_stale(0)
++ {
++ memset(&flags, 0, sizeof(flags));
++ }
++
++ ~RefreshPattern() {
++ while (RefreshPattern *t = next) {
++ next = t->next;
++ t->next = nullptr;
++ delete t;
++ }
++ }
++ // ~RefreshPattern() default destructor is fine
++
++ RegexPattern pattern;
time_t min;
double pct;
time_t max;
RefreshPattern *next;
struct {
-- bool icase;
bool refresh_ims;
bool store_stale;
#if USE_HTTP_VIOLATIONS
// statistics about how many matches this pattern has had
mutable struct stats_ {
++ stats_() : matchTests(0), matchCount(0) {}
++
uint64_t matchTests;
uint64_t matchCount;
// TODO: some stats to indicate how useful/less the flags are would be nice.
bool
ACLRegexData::match(char const *word)
{
- if (word == NULL)
+ if (!word)
return 0;
- debugs(28, 3, "aclRegexData::match: checking '" << word << "'");
-
- RegexList *first, *prev;
-
- first = data;
-
- prev = NULL;
+ debugs(28, 3, "checking '" << word << "'");
- RegexList *current = first;
-
- while (current) {
- debugs(28, 3, "aclRegexData::match: looking for '" << current->pattern << "'");
-
- if (regexec(¤t->regex, word, 0, 0, 0) == 0) {
- if (prev != NULL) {
- /* shift the element just found to the second position
- * in the list */
- prev->next = current->next;
- current->next = first->next;
- first->next = current;
- }
-
- debugs(28, 2, "aclRegexData::match: match '" << current->pattern << "' found in '" << word << "'");
+ // walk the list of patterns to see if one matches
+ for (auto &i : data) {
- if (regexec(&i.regex, word, 0, 0, 0) == 0) {
- debugs(28, 2, "'" << i.pattern << "' found in '" << word << "'");
++ if (i.match(word)) {
++ debugs(28, 2, "'" << i.c_str() << "' found in '" << word << "'");
+ // TODO: old code also popped the pattern to second place of the list
+ // in order to reduce patterns search times.
return 1;
}
-
- prev = current;
- current = current->next;
}
return 0;
} else {
sl.push_back(SBuf("+i"));
}
- flags = temp->flags;
+ flags = i.flags;
}
- sl.push_back(SBuf(temp->pattern));
- temp = temp->next;
- sl.push_back(SBuf(i.pattern));
++ sl.push_back(SBuf(i.c_str()));
}
return sl;
}
/** Compose and compile one large RE from a set of (small) REs.
-- * The ultimate goal is to have only one RE per ACL so that regexec() is
++ * The ultimate goal is to have only one RE per ACL so that match() is
* called only once per ACL.
*/
static int
CharacterSet.cc \
InstanceId.h \
Lock.h \
+ LookupTable.h \
LruMap.h \
Packable.h \
+ RegexPattern.cc \
+ RegexPattern.h \
RunnersRegistry.cc \
RunnersRegistry.h \
Subscription.h \
*/
#include "squid.h"
- #include "RegexList.h"
+ #include "base/RegexPattern.h"
++RegexPattern::RegexPattern(int aFlags, const char *aPattern) :
++ flags(aFlags),
++ pattern(xstrdup(aPattern))
++{
++ memset(®ex, 0, sizeof(regex));
++}
++
+ RegexPattern::~RegexPattern()
+ {
+ xfree(pattern);
+ regfree(®ex);
+ }
--- /dev/null
- RegexPattern(int aFlags, const char *aPattern) : flags(aFlags), pattern(xstrdup(aPattern)) {}
+ /*
+ * Copyright (C) 1996-2015 The Squid Software Foundation and contributors
+ *
+ * Squid software is distributed under GPLv2+ license and includes
+ * contributions from numerous individuals and organizations.
+ * Please see the COPYING and CONTRIBUTORS files for details.
+ */
+
+ #ifndef SQUID_SRC_BASE_REGEXPATTERN_H
+ #define SQUID_SRC_BASE_REGEXPATTERN_H
+
++#include "compat/GnuRegex.h"
+ #include "mem/forward.h"
+
+ /**
+ * A regular expression,
+ * plain text and compiled representations
+ */
+ class RegexPattern
+ {
+ MEMPROXY_CLASS(RegexPattern);
+
+ public:
+ RegexPattern() = delete;
- char *pattern;
++ RegexPattern(int aFlags, const char *aPattern);
+ RegexPattern(const RegexPattern &) = delete;
+ RegexPattern(RegexPattern &&) = default;
+ ~RegexPattern();
+
++ const char * c_str() const {return pattern;}
++ bool match(const char *str) const {return regexec(®ex,str,0,NULL,0)==0;}
++
++public:
+ int flags;
+ regex_t regex;
++
++private:
++ char *pattern;
+ };
+
+ #endif /* SQUID_SRC_BASE_REGEXPATTERN_H */
+
while (head != NULL) {
storeAppendPrintf(entry, "%s%s %s %d %d%% %d",
name,
-- head->flags.icase ? " -i" : null_string,
-- head->pattern,
++ head->pattern.flags®_ICASE ? " -i" : null_string,
++ head->pattern.c_str(),
(int) head->min / 60,
(int) (100.0 * head->pct + 0.5),
(int) head->max / 60);
pct = pct < 0.0 ? 0.0 : pct;
max = max < 0 ? 0 : max;
-- t = static_cast<RefreshPattern *>(xcalloc(1, sizeof(RefreshPattern)));
-- t->pattern = (char *) xstrdup(pattern);
-- t->compiled_pattern = comp;
++ t = new RefreshPattern(pattern, flags);
++ t->pattern.regex = comp;
t->min = min;
t->pct = pct;
t->max = max;
-- if (flags & REG_ICASE)
-- t->flags.icase = true;
--
if (refresh_ims)
t->flags.refresh_ims = true;
*head = t;
-- safe_free(pattern);
++ xfree(pattern);
}
static void
free_refreshpattern(RefreshPattern ** head)
{
-- RefreshPattern *t;
--
-- while ((t = *head) != NULL) {
-- *head = t->next;
-- safe_free(t->pattern);
-- regfree(&t->compiled_pattern);
-- safe_free(t);
-- }
++ delete *head;
++ *head = nullptr;
#if USE_HTTP_VIOLATIONS
refresh_nocache_hack = 0;
int status[STALE_DEFAULT + 1];
} refreshCounts[rcCount];
--/*
-- * Defaults:
-- * MIN NONE
-- * PCT 20%
-- * MAX 3 days
-- */
--#define REFRESH_DEFAULT_MIN (time_t)0
--#define REFRESH_DEFAULT_PCT 0.20
--#define REFRESH_DEFAULT_MAX (time_t)259200
--
static const RefreshPattern *refreshUncompiledPattern(const char *);
static OBJH refreshStats;
static int refreshStaleness(const StoreEntry * entry, time_t check_time, const time_t age, const RefreshPattern * R, stale_flags * sf);
--static RefreshPattern DefaultRefresh;
++static RefreshPattern DefaultRefresh("<none>", 0);
/** Locate the first refresh_pattern rule that matches the given URL by regex.
*
-- * \note regexec() returns 0 if matched, and REG_NOMATCH otherwise
-- *
-- * \return A pointer to the refresh_pattern parameters to use, or NULL if there is no match.
++ * \return A pointer to the refresh_pattern parameters to use, or nullptr if there is no match.
*/
const RefreshPattern *
refreshLimits(const char *url)
{
-- const RefreshPattern *R;
--
-- for (R = Config.Refresh; R; R = R->next) {
++ for (auto R = Config.Refresh; R; R = R->next) {
++(R->stats.matchTests);
-- if (!regexec(&(R->compiled_pattern), url, 0, 0, 0)) {
++ if (R->pattern.match(url)) {
++(R->stats.matchCount);
return R;
}
}
-- return NULL;
++ return nullptr;
}
/** Locate the first refresh_pattern rule that has the given uncompiled regex.
* This function is only ever called if there is no URI. Because a regex match is impossible, Squid
* forces the "." rule to apply (if it exists)
*
-- * \return A pointer to the refresh_pattern parameters to use, or NULL if there is no match.
++ * \return A pointer to the refresh_pattern parameters to use, or nullptr if there is no match.
*/
static const RefreshPattern *
refreshUncompiledPattern(const char *pat)
{
-- const RefreshPattern *R;
--
-- for (R = Config.Refresh; R; R = R->next) {
-- if (0 == strcmp(R->pattern, pat))
++ for (auto R = Config.Refresh; R; R = R->next) {
++ if (0 == strcmp(R->pattern.c_str(), pat))
return R;
}
-- return NULL;
++ return nullptr;
}
/**
if (NULL == R)
R = &DefaultRefresh;
-- debugs(22, 3, "Matched '" << R->pattern << " " <<
++ debugs(22, 3, "Matched '" << R->pattern.c_str() << " " <<
(int) R->min << " " << (int) (100.0 * R->pct) << "%% " <<
(int) R->max << "'");
R->stats.matchCount,
R->stats.matchTests,
xpercent(R->stats.matchCount, R->stats.matchTests),
-- (R->flags.icase ? "-i " : ""),
-- R->pattern);
++ (R->pattern.flags®_ICASE ? "-i " : ""),
++ R->pattern.c_str());
}
int i;
refreshCounts[rcCDigest].proto = "Cache Digests";
#endif
-- memset(&DefaultRefresh, '\0', sizeof(DefaultRefresh));
-- DefaultRefresh.pattern = "<none>";
-- DefaultRefresh.min = REFRESH_DEFAULT_MIN;
-- DefaultRefresh.pct = REFRESH_DEFAULT_PCT;
-- DefaultRefresh.max = REFRESH_DEFAULT_MAX;
--
refreshRegisterWithCacheManager();
}
--- /dev/null
+ ## Copyright (C) 1996-2015 The Squid Software Foundation and contributors
+ ##
+ ## Squid software is distributed under GPLv2+ license and includes
+ ## contributions from numerous individuals and organizations.
+ ## Please see the COPYING and CONTRIBUTORS files for details.
+ ##
+
+ #
+ # This file contains the list of regular expression syntaxes used
+ # it covers various uses of:
+ # unoptimized multi-line patterns
+ #
+ # Some other regression related patterns are tested in regressions-3.4.0.1
+ #
+
+ acl G dstdom_regex \.g...l\.com$
+ acl G dstdom_regex \.g...le\.com$
+
+ acl B browser ^Mozilla
+ acl B browser ^Java/[0-9]+(\.[0-9]+)?
++
++# invalid pattern - this should ERROR
++acl foo browser *
++