]>
git.ipfire.org Git - thirdparty/squid.git/blob - src/acl/RegexData.cc
2 * Copyright (C) 1996-2021 The Squid Software Foundation and contributors
4 * Squid software is distributed under GPLv2+ license and includes
5 * contributions from numerous individuals and organizations.
6 * Please see the COPYING and CONTRIBUTORS files for details.
10 * Portions of this code are copyrighted and released under GPLv2+ by:
11 * Copyright (c) 2011, Marcus Kool
12 * Please add new claims to the CONTRIBUTORS file instead.
15 /* DEBUG: section 28 Access Control */
19 #include "acl/Checklist.h"
20 #include "acl/RegexData.h"
21 #include "base/RegexPattern.h"
22 #include "ConfigParser.h"
24 #include "sbuf/Algorithms.h"
25 #include "sbuf/List.h"
27 ACLRegexData::~ACLRegexData()
31 const Acl::ParameterFlags
&
32 ACLRegexData::supportedFlags() const
34 static const Acl::ParameterFlags flags
= { "-i", "+i" };
39 ACLRegexData::match(char const *word
)
44 debugs(28, 3, "checking '" << word
<< "'");
46 // walk the list of patterns to see if one matches
47 for (auto &i
: data
) {
49 debugs(28, 2, '\'' << i
.c_str() << "' found in '" << word
<< '\'');
50 // TODO: old code also popped the pattern to second place of the list
51 // in order to reduce patterns search times.
60 ACLRegexData::dump() const
63 int flags
= REG_EXTENDED
| REG_NOSUB
;
65 // walk and dump the list
66 // keeping the flags values consistent
67 for (auto &i
: data
) {
68 if (i
.flags
!= flags
) {
69 if ((i
.flags
®_ICASE
) != 0) {
70 sl
.emplace_back("-i");
72 sl
.emplace_back("+i");
77 sl
.emplace_back(i
.c_str());
84 removeUnnecessaryWildcards(char * t
)
88 if (strncmp(t
, "^.*", 3) == 0)
91 /* NOTE: an initial '.' might seem unnessary but is not;
92 * it can be a valid requirement that cannot be optimised
94 while (*t
== '.' && *(t
+1) == '*') {
99 debugs(28, DBG_IMPORTANT
, cfg_filename
<< " line " << config_lineno
<< ": " << config_input_line
);
100 debugs(28, DBG_IMPORTANT
, "WARNING: regular expression '" << orig
<< "' has only wildcards and matches all strings. Using '.*' instead.");
104 debugs(28, DBG_IMPORTANT
, cfg_filename
<< " line " << config_lineno
<< ": " << config_input_line
);
105 debugs(28, DBG_IMPORTANT
, "WARNING: regular expression '" << orig
<< "' has unnecessary wildcard(s). Using '" << t
<< "' instead.");
112 compileRE(std::list
<RegexPattern
> &curlist
, const char * RE
, int flags
)
114 if (RE
== NULL
|| *RE
== '\0')
115 return curlist
.empty(); // XXX: old code did this. It looks wrong.
118 if (int errcode
= regcomp(&comp
, RE
, flags
)) {
120 regerror(errcode
, &comp
, errbuf
, sizeof errbuf
);
121 debugs(28, DBG_CRITICAL
, cfg_filename
<< " line " << config_lineno
<< ": " << config_input_line
);
122 debugs(28, DBG_CRITICAL
, "ERROR: invalid regular expression: '" << RE
<< "': " << errbuf
);
125 debugs(28, 2, "compiled '" << RE
<< "' with flags " << flags
);
127 curlist
.emplace_back(flags
, RE
);
128 curlist
.back().regex
= comp
;
134 compileRE(std::list
<RegexPattern
> &curlist
, const SBufList
&RE
, int flags
)
137 return curlist
.empty(); // XXX: old code did this. It looks wrong.
139 static const SBuf
openparen("("), closeparen(")"), separator(")|(");
140 JoinContainerIntoSBuf(regexp
, RE
.begin(), RE
.end(), separator
, openparen
,
142 return compileRE(curlist
, regexp
.c_str(), flags
);
145 /** Compose and compile one large RE from a set of (small) REs.
146 * The ultimate goal is to have only one RE per ACL so that match() is
147 * called only once per ACL.
150 compileOptimisedREs(std::list
<RegexPattern
> &curlist
, const SBufList
&sl
)
152 std::list
<RegexPattern
> newlist
;
153 SBufList accumulatedRE
;
154 int numREs
= 0, reSize
= 0;
155 int flags
= REG_EXTENDED
| REG_NOSUB
;
157 for (const SBuf
& configurationLineWord
: sl
) {
158 static const SBuf
minus_i("-i");
159 static const SBuf
plus_i("+i");
160 if (configurationLineWord
== minus_i
) {
161 if (flags
& REG_ICASE
) {
162 /* optimisation of -i ... -i */
163 debugs(28, 2, "optimisation of -i ... -i" );
165 debugs(28, 2, "-i" );
166 if (!compileRE(newlist
, accumulatedRE
, flags
))
169 accumulatedRE
.clear();
173 } else if (configurationLineWord
== plus_i
) {
174 if ((flags
& REG_ICASE
) == 0) {
175 /* optimisation of +i ... +i */
176 debugs(28, 2, "optimisation of +i ... +i");
179 if (!compileRE(newlist
, accumulatedRE
, flags
))
182 accumulatedRE
.clear();
188 debugs(28, 2, "adding RE '" << configurationLineWord
<< "'");
189 accumulatedRE
.push_back(configurationLineWord
);
191 reSize
+= configurationLineWord
.length();
193 if (reSize
> 1024) { // must be < BUFSIZ everything included
194 debugs(28, 2, "buffer full, generating new optimised RE..." );
195 if (!compileRE(newlist
, accumulatedRE
, flags
))
197 accumulatedRE
.clear();
199 continue; /* do the loop again to add the RE to largeRE */
203 if (!compileRE(newlist
, accumulatedRE
, flags
))
206 accumulatedRE
.clear();
209 /* all was successful, so put the new list at the tail */
210 curlist
.splice(curlist
.end(), newlist
);
212 debugs(28, 2, numREs
<< " REs are optimised into one RE.");
214 debugs(28, (opt_parse_cfg_only
?DBG_IMPORTANT
:2), cfg_filename
<< " line " << config_lineno
<< ": " << config_input_line
);
215 debugs(28, (opt_parse_cfg_only
?DBG_IMPORTANT
:2), "WARNING: there are more than 100 regular expressions. " <<
216 "Consider using less REs or use rules without expressions like 'dstdomain'.");
223 compileUnoptimisedREs(std::list
<RegexPattern
> &curlist
, const SBufList
&sl
)
225 int flags
= REG_EXTENDED
| REG_NOSUB
;
227 static const SBuf
minus_i("-i"), plus_i("+i");
228 for (auto configurationLineWord
: sl
) {
229 if (configurationLineWord
== minus_i
) {
231 } else if (configurationLineWord
== plus_i
) {
234 if (!compileRE(curlist
, configurationLineWord
.c_str(), flags
))
235 debugs(28, DBG_CRITICAL
, "ERROR: Skipping regular expression. "
236 "Compile failed: '" << configurationLineWord
<< "'");
242 ACLRegexData::parse()
244 debugs(28, 2, "new Regex line or file");
247 while (char *t
= ConfigParser::RegexStrtokFile()) {
248 const char *clean
= removeUnnecessaryWildcards(t
);
249 if (strlen(clean
) > BUFSIZ
-1) {
250 debugs(28, DBG_CRITICAL
, cfg_filename
<< " line " << config_lineno
<< ": " << config_input_line
);
251 debugs(28, DBG_CRITICAL
, "ERROR: Skipping regular expression. Larger than " << BUFSIZ
-1 << " characters: '" << clean
<< "'");
253 debugs(28, 3, "buffering RE '" << clean
<< "'");
254 sl
.emplace_back(clean
);
258 if (!compileOptimisedREs(data
, sl
)) {
259 debugs(28, DBG_IMPORTANT
, "WARNING: optimisation of regular expressions failed; using fallback method without optimisation");
260 compileUnoptimisedREs(data
, sl
);
265 ACLRegexData::empty() const
270 ACLData
<char const *> *
271 ACLRegexData::clone() const
273 /* Regex's don't clone yet. */
274 assert(data
.empty());
275 return new ACLRegexData
;