Also moved non-ACL regex configuration parsing code inside ConfigParser.
It is possible to move ACL (data) regex configuration parsing code as
well, but that move is a lot more complex due to regex-pattern-joining
optimizations, and there are no immediate plans to support
non-regcomp(3) regexes in ACL data. We may do that move later as we get
more experience with non-regcomp(3) regexes and decide to join them too.
These moves clean up existing regex-using code and allow adding support
for non-regcomp(3) regexes (e.g., regexes based on C++11 <regex>)
without adjusting ConfigParser::regex() callers. Such support would also
require "virtualizing" RegexPattern. To avoid increasing complexity and
hurting performance, that (simpler) step should be done only if we
decide to actually add support for non-regcomp(3) regexes.
The above changes allowed us to improve RegexData error reporting: Squid
no longer reports REs _optimization_ failure when it is an individual RE
that is broken (and reported as such). Squid still ignores the fact that
broken REs can be "optimized" into a completely "different" valid
combined RE: We do not compile individual REs unless optimization fails.
Also simplified and polished affected code.
#include "squid.h"
#include "acl/Gadgets.h"
#include "base/Here.h"
+#include "base/RegexPattern.h"
#include "cache_cf.h"
#include "ConfigParser.h"
#include "Debug.h"
return token;
}
-char *
-ConfigParser::RegexPattern()
+std::unique_ptr<RegexPattern>
+ConfigParser::regex(const char *expectedRegexDescription)
{
- if (ConfigParser::RecognizeQuotedValues) {
- debugs(3, DBG_CRITICAL, "FATAL: Can not read regex expression while configuration_includes_quoted_values is enabled");
- self_destruct();
- }
+ if (RecognizeQuotedValues)
+ throw TextException("Cannot read regex expression while configuration_includes_quoted_values is enabled", Here());
+
+ SBuf pattern;
+ int flags = REG_EXTENDED | REG_NOSUB;
+
ConfigParser::RecognizeQuotedPair_ = true;
- char * token = NextToken();
+ const auto flagOrPattern = token(expectedRegexDescription);
+ if (flagOrPattern.cmp("-i") == 0) {
+ flags |= REG_ICASE;
+ pattern = token(expectedRegexDescription);
+ } else if (flagOrPattern.cmp("+i") == 0) {
+ flags &= ~REG_ICASE;
+ pattern = token(expectedRegexDescription);
+ } else {
+ pattern = flagOrPattern;
+ }
ConfigParser::RecognizeQuotedPair_ = false;
- return token;
+
+ return std::unique_ptr<RegexPattern>(new RegexPattern(pattern, flags));
}
char *
#define SQUID_CONFIGPARSER_H
#include "acl/forward.h"
+#include "base/forward.h"
#include "sbuf/forward.h"
#include "SquidString.h"
+#include <memory>
#include <queue>
#include <stack>
#include <string>
/// parses an [if [!]<acl>...] construct
Acl::Tree *optionalAclList();
+ /// extracts and returns a regex (including any optional flags)
+ std::unique_ptr<RegexPattern> regex(const char *expectedRegexDescription);
+
static void ParseUShort(unsigned short *var);
static void ParseBool(bool *var);
static const char *QuoteString(const String &var);
*/
static char *RegexStrtokFile();
- /**
- * Parse the next token as a regex pattern. The regex patterns are non quoted
- * tokens.
- */
- static char *RegexPattern();
-
/**
* Parse the next token with support for quoted values enabled even if
* the configuration_includes_quoted_values is set to off
#include "base/RegexPattern.h"
+#include <memory>
+
/// a representation of a refresh pattern.
class RefreshPattern
{
*/
#define REFRESH_DEFAULT_MAX static_cast<time_t>(259200)
- RefreshPattern(const char *aPattern, const decltype(RegexPattern::flags) &reFlags) :
- pattern(reFlags, aPattern),
+ using RegexPointer = std::unique_ptr<RegexPattern>;
+
+ // If given a regex, becomes its owner, creating an explicit refresh_pattern
+ // rule. Otherwise, creates an implicit/default refresh_pattern rule.
+ explicit RefreshPattern(RegexPointer aRegex):
min(0), pct(0.20), max(REFRESH_DEFAULT_MAX),
next(NULL),
- max_stale(0)
+ max_stale(0),
+ regex_(std::move(aRegex))
{
memset(&flags, 0, sizeof(flags));
}
delete t;
}
}
- // ~RefreshPattern() default destructor is fine
- RegexPattern pattern;
time_t min;
double pct;
time_t max;
uint64_t matchCount;
// TODO: some stats to indicate how useful/less the flags are would be nice.
} stats;
+
+ /// reports configuration excluding trailing options
+ void printHead(std::ostream &) const;
+
+ /// reports the configured pattern or a fake pattern of the implicit rule
+ void printPattern(std::ostream &os) const;
+
+ // TODO: Refactor external refresh_pattern rules iterators to make private.
+ /// configured regex; do not use except when iterating configured rules
+ const RegexPattern ®ex() const;
+
+private:
+ /// configured regex or, for the implicit refresh_pattern rule, nil
+ RegexPointer regex_;
};
+inline std::ostream &
+operator <<(std::ostream &os, const RefreshPattern &r)
+{
+ r.printHead(os);
+ return os;
+}
+
#endif /* SQUID_REFRESHPATTERN_H_ */
#include "Debug.h"
#include "sbuf/Algorithms.h"
#include "sbuf/List.h"
+#include "sbuf/Stream.h"
Acl::BooleanOptionValue ACLRegexData::CaseInsensitive_;
// walk the list of patterns to see if one matches
for (auto &i : data) {
if (i.match(word)) {
- debugs(28, 2, '\'' << i.c_str() << "' found in '" << word << '\'');
+ debugs(28, 2, '\'' << i << "' found in '" << word << '\'');
// TODO: old code also popped the pattern to second place of the list
// in order to reduce patterns search times.
return 1;
SBufList
ACLRegexData::dump() const
{
- SBufList sl;
- int flags = REG_EXTENDED | REG_NOSUB;
+ SBufStream os;
- // walk and dump the list
- // keeping the flags values consistent
- for (auto &i : data) {
- if (i.flags != flags) {
- if ((i.flags®_ICASE) != 0) {
- sl.emplace_back("-i");
- } else {
- sl.emplace_back("+i");
- }
- flags = i.flags;
- }
-
- sl.emplace_back(i.c_str());
+ const RegexPattern *previous = nullptr;
+ for (const auto &i: data) {
+ i.print(os, previous); // skip flags implied by the previous entry
+ previous = &i;
}
- return sl;
+ return SBufList(1, os.buf());
}
static const char *
return t;
}
-static bool
-compileRE(std::list<RegexPattern> &curlist, const char * RE, int flags)
+static void
+compileRE(std::list<RegexPattern> &curlist, const SBuf &RE, int flags)
{
- if (RE == NULL || *RE == '\0')
- return curlist.empty(); // XXX: old code did this. It looks wrong.
-
- regex_t comp;
- if (int errcode = regcomp(&comp, RE, flags)) {
- char errbuf[256];
- regerror(errcode, &comp, errbuf, sizeof errbuf);
- debugs(28, DBG_CRITICAL, cfg_filename << " line " << config_lineno << ": " << config_input_line);
- debugs(28, DBG_CRITICAL, "ERROR: invalid regular expression: '" << RE << "': " << errbuf);
- return false;
- }
- debugs(28, 2, "compiled '" << RE << "' with flags " << flags);
-
- curlist.emplace_back(flags, RE);
- curlist.back().regex = comp;
-
- return true;
+ curlist.emplace_back(RE, flags);
}
-static bool
-compileRE(std::list<RegexPattern> &curlist, const SBufList &RE, int flags)
+static void
+compileREs(std::list<RegexPattern> &curlist, const SBufList &RE, int flags)
{
- if (RE.empty())
- return curlist.empty(); // XXX: old code did this. It looks wrong.
+ assert(!RE.empty());
SBuf regexp;
static const SBuf openparen("("), closeparen(")"), separator(")|(");
JoinContainerIntoSBuf(regexp, RE.begin(), RE.end(), separator, openparen,
closeparen);
- return compileRE(curlist, regexp.c_str(), flags);
+ compileRE(curlist, regexp, flags);
}
/** Compose and compile one large RE from a set of (small) REs.
* The ultimate goal is to have only one RE per ACL so that match() is
* called only once per ACL.
*/
-static int
+static void
compileOptimisedREs(std::list<RegexPattern> &curlist, const SBufList &sl, const int flagsAtLineStart)
{
std::list<RegexPattern> newlist;
debugs(28, 2, "optimisation of -i ... -i" );
} else {
debugs(28, 2, "-i" );
- if (!compileRE(newlist, accumulatedRE, flags))
- return 0;
+ if (!accumulatedRE.empty()) {
+ compileREs(newlist, accumulatedRE, flags);
+ accumulatedRE.clear();
+ reSize = 0;
+ }
flags |= REG_ICASE;
- accumulatedRE.clear();
- reSize = 0;
}
continue;
} else if (configurationLineWord == plus_i) {
debugs(28, 2, "optimisation of +i ... +i");
} else {
debugs(28, 2, "+i");
- if (!compileRE(newlist, accumulatedRE, flags))
- return 0;
+ if (!accumulatedRE.empty()) {
+ compileREs(newlist, accumulatedRE, flags);
+ accumulatedRE.clear();
+ reSize = 0;
+ }
flags &= ~REG_ICASE;
- accumulatedRE.clear();
- reSize = 0;
}
continue;
}
if (reSize > 1024) { // must be < BUFSIZ everything included
debugs(28, 2, "buffer full, generating new optimised RE..." );
- if (!compileRE(newlist, accumulatedRE, flags))
- return 0;
+ compileREs(newlist, accumulatedRE, flags);
accumulatedRE.clear();
reSize = 0;
continue; /* do the loop again to add the RE to largeRE */
}
}
- if (!compileRE(newlist, accumulatedRE, flags))
- return 0;
-
- accumulatedRE.clear();
- reSize = 0;
+ if (!accumulatedRE.empty()) {
+ compileREs(newlist, accumulatedRE, flags);
+ accumulatedRE.clear();
+ reSize = 0;
+ }
/* all was successful, so put the new list at the tail */
curlist.splice(curlist.end(), newlist);
debugs(28, (opt_parse_cfg_only?DBG_IMPORTANT:2), "WARNING: there are more than 100 regular expressions. " <<
"Consider using less REs or use rules without expressions like 'dstdomain'.");
}
-
- return 1;
}
static void
auto flags = flagsAtLineStart;
static const SBuf minus_i("-i"), plus_i("+i");
- for (auto configurationLineWord : sl) {
+ for (const auto &configurationLineWord: sl) {
if (configurationLineWord == minus_i) {
flags |= REG_ICASE;
} else if (configurationLineWord == plus_i) {
flags &= ~REG_ICASE;
} else {
- if (!compileRE(curlist, configurationLineWord.c_str(), flags))
- debugs(28, DBG_CRITICAL, "ERROR: Skipping regular expression. "
- "Compile failed: '" << configurationLineWord << "'");
+ try {
+ compileRE(curlist, configurationLineWord, flags);
+ } catch (...) {
+ // TODO: Make these configuration failures fatal (by default).
+ debugs(28, DBG_CRITICAL, "ERROR: Skipping regular expression: '" << configurationLineWord << "'" <<
+ Debug::Extra << "configuration: " << cfg_filename << " line " << config_lineno << ": " << config_input_line <<
+ Debug::Extra << "regex compilation failure: " << CurrentException);
+ }
}
}
}
}
}
- if (!compileOptimisedREs(data, sl, flagsAtLineStart)) {
- debugs(28, DBG_IMPORTANT, "WARNING: optimisation of regular expressions failed; using fallback method without optimisation");
+ try {
+ // ignore the danger of merging invalid REs into a valid "optimized" RE
+ compileOptimisedREs(data, sl, flagsAtLineStart);
+ } catch (...) {
compileUnoptimisedREs(data, sl, flagsAtLineStart);
+ // Delay compileOptimisedREs() failure reporting until we know that
+ // compileUnoptimisedREs() above have succeeded. If
+ // compileUnoptimisedREs() also fails, then the compileOptimisedREs()
+ // exception caught earlier was probably not related to _optimization_
+ // (and we do not want to report the same RE compilation problem twice).
+ debugs(28, DBG_IMPORTANT, "WARNING: Failed to optimize a set of regular expressions; will use them as-is instead;" <<
+ Debug::Extra << "configuration: " << cfg_filename << " line " << config_lineno << ": " << config_input_line <<
+ Debug::Extra << "optimization error: " << CurrentException);
}
}
#include "squid.h"
#include "base/RegexPattern.h"
+#include "base/TextException.h"
+#include "Debug.h"
+#include "sbuf/Stream.h"
+
+#include <iostream>
#include <utility>
-RegexPattern::RegexPattern(int aFlags, const char *aPattern) :
- flags(aFlags),
- pattern(xstrdup(aPattern))
+RegexPattern::RegexPattern(const SBuf &aPattern, const int aFlags):
+ pattern(aPattern),
+ flags(aFlags)
{
- memset(®ex, 0, sizeof(regex));
-}
+ memset(®ex, 0, sizeof(regex)); // paranoid; POSIX does not require this
+ if (const auto errCode = regcomp(®ex, pattern.c_str(), flags)) {
+ char errBuf[256];
+ // for simplicity, ignore any error message truncation
+ (void)regerror(errCode, ®ex, errBuf, sizeof(errBuf));
+ // POSIX examples show no regfree(®ex) after a regcomp() error;
+ // presumably, regcom() frees any allocated memory on failures
+ throw TextException(ToSBuf("POSIX regcomp(3) failure: (", errCode, ") ", errBuf,
+ Debug::Extra, "regular expression: ", pattern), Here());
+ }
-RegexPattern::RegexPattern(RegexPattern &&o) :
- flags(std::move(o.flags)),
- regex(std::move(o.regex)),
- pattern(std::move(o.pattern))
-{
- memset(&o.regex, 0, sizeof(o.regex));
- o.pattern = nullptr;
+ debugs(28, 2, *this);
}
RegexPattern::~RegexPattern()
{
- xfree(pattern);
regfree(®ex);
}
-RegexPattern &
-RegexPattern::operator =(RegexPattern &&o)
+void
+RegexPattern::print(std::ostream &os, const RegexPattern * const previous) const
{
- flags = std::move(o.flags);
- regex = std::move(o.regex);
- memset(&o.regex, 0, sizeof(o.regex));
- pattern = std::move(o.pattern);
- o.pattern = nullptr;
- return *this;
+ // report context-dependent explicit options and delimiters
+ if (!previous) {
+ // do not report default settings
+ if (!caseSensitive())
+ os << "-i ";
+ } else {
+ os << ' '; // separate us from the previous value
+
+ // do not report same-as-previous (i.e. inherited) settings
+ if (previous->flags != flags)
+ os << (caseSensitive() ? "+i " : "-i ");
+ }
+
+ os << pattern;
}
#include "compat/GnuRegex.h"
#include "mem/forward.h"
+#include "sbuf/SBuf.h"
/**
* A regular expression,
public:
RegexPattern() = delete;
- RegexPattern(int aFlags, const char *aPattern);
+ RegexPattern(const SBuf &aPattern, int aFlags);
~RegexPattern();
- // regex type varies by library, usually not safe to copy
- RegexPattern(const RegexPattern &) = delete;
- RegexPattern &operator =(const RegexPattern &) = delete;
+ RegexPattern(RegexPattern &&) = delete; // no copying of any kind
- RegexPattern(RegexPattern &&);
- RegexPattern &operator =(RegexPattern &&);
+ /// whether the regex differentiates letter case
+ bool caseSensitive() const { return !(flags & REG_ICASE); }
+
+ /// whether this is an "any single character" regex (".")
+ bool isDot() const { return pattern.length() == 1 && pattern[0] == '.'; }
- const char * c_str() const {return pattern;}
bool match(const char *str) const {return regexec(®ex,str,0,NULL,0)==0;}
-public:
- int flags;
- regex_t regex;
+ /// Attempts to reproduce this regex (context-sensitive) configuration.
+ /// If the previous regex is nil, may not report default flags.
+ /// Otherwise, may not report same-as-previous flags (and prepends a space).
+ void print(std::ostream &os, const RegexPattern *previous = nullptr) const;
private:
- char *pattern;
+ /// a regular expression in the text form, suitable for regcomp(3)
+ SBuf pattern;
+
+ /// bitmask of REG_* flags for regcomp(3)
+ const int flags;
+
+ /// a "compiled pattern buffer" filled by regcomp(3) for regexec(3)
+ regex_t regex;
};
+inline std::ostream &
+operator <<(std::ostream &os, const RegexPattern &rp)
+{
+ rp.print(os);
+ return os;
+}
+
#endif /* SQUID_SRC_BASE_REGEXPATTERN_H */
class CodeContext;
class ScopedId;
class BadOptionalAccess;
+class RegexPattern;
template <typename Value> class Optional;
dump_refreshpattern(StoreEntry * entry, const char *name, RefreshPattern * head)
{
while (head != NULL) {
- storeAppendPrintf(entry, "%s%s %s %d %d%% %d",
- name,
- head->pattern.flags®_ICASE ? " -i" : null_string,
- head->pattern.c_str(),
- (int) head->min / 60,
- (int) (100.0 * head->pct + 0.5),
- (int) head->max / 60);
+ PackableStream os(*entry);
+ os << name << ' ';
+ head->printHead(os);
if (head->max_stale >= 0)
storeAppendPrintf(entry, " max-stale=%d", head->max_stale);
parse_refreshpattern(RefreshPattern ** head)
{
char *token;
- char *pattern;
time_t min = 0;
double pct = 0.0;
time_t max = 0;
int i;
RefreshPattern *t;
- regex_t comp;
- int errcode;
- int flags = REG_EXTENDED | REG_NOSUB;
-
- if ((token = ConfigParser::RegexPattern()) != NULL) {
-
- if (strcmp(token, "-i") == 0) {
- flags |= REG_ICASE;
- token = ConfigParser::RegexPattern();
- } else if (strcmp(token, "+i") == 0) {
- flags &= ~REG_ICASE;
- token = ConfigParser::RegexPattern();
- }
- }
-
- if (token == NULL) {
- debugs(3, DBG_CRITICAL, "FATAL: refresh_pattern missing the regex pattern parameter");
- self_destruct();
- return;
- }
-
- pattern = xstrdup(token);
+ auto regex = LegacyParser.regex("refresh_pattern regex");
i = GetInteger(); /* token: min */
) {
debugs(22, DBG_PARSE_NOTE(2), "UPGRADE: refresh_pattern option '" << token << "' is obsolete. Remove it.");
} else
- debugs(22, DBG_CRITICAL, "ERROR: refreshAddToList: Unknown option '" << pattern << "': " << token);
- }
-
- if ((errcode = regcomp(&comp, pattern, flags)) != 0) {
- char errbuf[256];
- regerror(errcode, &comp, errbuf, sizeof errbuf);
- debugs(22, DBG_CRITICAL, "" << cfg_filename << " line " << config_lineno << ": " << config_input_line);
- debugs(22, DBG_CRITICAL, "ERROR: refreshAddToList: Invalid regular expression '" << pattern << "': " << errbuf);
- xfree(pattern);
- return;
+ debugs(22, DBG_CRITICAL, "ERROR: Unknown refresh_pattern option: " << token);
}
pct = pct < 0.0 ? 0.0 : pct;
max = max < 0 ? 0 : max;
- t = new RefreshPattern(pattern, flags);
- t->pattern.regex = comp;
+ t = new RefreshPattern(std::move(regex));
t->min = min;
t->pct = pct;
t->max = max;
head = &(*head)->next;
*head = t;
-
- xfree(pattern);
}
static void
#endif
#include "squid.h"
+#include "base/PackableStream.h"
#include "HttpHdrCc.h"
#include "HttpReply.h"
#include "HttpRequest.h"
int status[STALE_DEFAULT + 1];
} refreshCounts[rcCount];
-static const RefreshPattern *refreshUncompiledPattern(const char *);
static OBJH refreshStats;
static int refreshStaleness(const StoreEntry * entry, time_t check_time, const time_t age, const RefreshPattern * R, stale_flags * sf);
-static RefreshPattern DefaultRefresh("<none>", 0);
+static RefreshPattern DefaultRefresh(nullptr);
/** Locate the first refresh_pattern rule that matches the given URL by regex.
*
{
for (auto R = Config.Refresh; R; R = R->next) {
++(R->stats.matchTests);
- if (R->pattern.match(url)) {
+ if (R->regex().match(url)) {
++(R->stats.matchCount);
return R;
}
return nullptr;
}
-/** Locate the first refresh_pattern rule that has the given uncompiled regex.
- *
- * \note There is only one reference to this function, below. It always passes "." as the pattern.
- * This function is only ever called if there is no URI. Because a regex match is impossible, Squid
- * forces the "." rule to apply (if it exists)
- *
- * \return A pointer to the refresh_pattern parameters to use, or nullptr if there is no match.
- */
+/// the first explicit refresh_pattern rule that uses a "." regex (or nil)
static const RefreshPattern *
-refreshUncompiledPattern(const char *pat)
+refreshFirstDotRule()
{
for (auto R = Config.Refresh; R; R = R->next) {
- if (0 == strcmp(R->pattern.c_str(), pat))
+ if (R->regex().isDot())
return R;
}
* 3. the default "." rule
*/
// XXX: performance regression. c_str() reallocates
- const RefreshPattern *R = (uri != nilUri) ? refreshLimits(uri.c_str()) : refreshUncompiledPattern(".");
+ const RefreshPattern *R = (uri != nilUri) ? refreshLimits(uri.c_str()) : refreshFirstDotRule();
if (NULL == R)
R = &DefaultRefresh;
- debugs(22, 3, "Matched '" << R->pattern.c_str() << " " <<
- (int) R->min << " " << (int) (100.0 * R->pct) << "%% " <<
- (int) R->max << "'");
+ debugs(22, 3, "Matched '" << *R << '\'');
debugs(22, 3, "\tage:\t" << age);
storeAppendPrintf(sentry, "\nRefresh pattern usage:\n\n");
storeAppendPrintf(sentry, " Used \tChecks \t%% Matches\tPattern\n");
for (const RefreshPattern *R = Config.Refresh; R; R = R->next) {
- storeAppendPrintf(sentry, " %10" PRIu64 "\t%10" PRIu64 "\t%6.2f\t%s%s\n",
+ storeAppendPrintf(sentry, " %10" PRIu64 "\t%10" PRIu64 "\t%6.2f\t",
R->stats.matchCount,
R->stats.matchTests,
- xpercent(R->stats.matchCount, R->stats.matchTests),
- (R->pattern.flags®_ICASE ? "-i " : ""),
- R->pattern.c_str());
+ xpercent(R->stats.matchCount, R->stats.matchTests));
+ PackableStream os(*sentry);
+ R->printPattern(os);
+ os << "\n";
}
int i;
refreshCountsStats(sentry, refreshCounts[i]);
}
+const RegexPattern &
+RefreshPattern::regex() const
+{
+ assert(regex_);
+ return *regex_;
+}
+
+void
+RefreshPattern::printPattern(std::ostream &os) const
+{
+ if (regex_)
+ regex_->print(os, nullptr); // refresh lines do not inherit line flags
+ else
+ os << "<none>";
+}
+
+void
+RefreshPattern::printHead(std::ostream &os) const
+{
+ printPattern(os);
+ os <<
+ // these adjustments are safe: raw values were configured using integers
+ ' ' << intmax_t(min/60) << // to minutes
+ ' ' << intmax_t(100.0 * pct + 0.5) << '%' << // to percentage points
+ ' ' << intmax_t(max/60); // to minutes
+}
+
static void
refreshRegisterWithCacheManager(void)
{