]>
git.ipfire.org Git - thirdparty/squid.git/blob - src/acl/RegexData.cc
2 * Copyright (C) 1996-2014 The Squid Software Foundation and contributors
4 * Squid software is distributed under GPLv2+ license and includes
5 * contributions from numerous individuals and organizations.
6 * Please see the COPYING and CONTRIBUTORS files for details.
10 * Portions of this code are copyrighted and released under GPLv2+ by:
11 * Copyright (c) 2011, Marcus Kool
12 * Please add new claims to the CONTRIBUTORS file instead.
15 /* DEBUG: section 28 Access Control */
19 #include "acl/Checklist.h"
20 #include "acl/RegexData.h"
21 #include "ConfigParser.h"
23 #include "RegexList.h"
27 aclDestroyRegexList(RegexList
* data
)
29 RegexList
*next
= NULL
;
31 for (; data
; data
= next
) {
33 regfree(&data
->regex
);
34 safe_free(data
->pattern
);
35 memFree(data
, MEM_RELIST
);
39 ACLRegexData::~ACLRegexData()
41 aclDestroyRegexList(data
);
45 ACLRegexData::match(char const *word
)
50 debugs(28, 3, "aclRegexData::match: checking '" << word
<< "'");
52 RegexList
*first
, *prev
;
58 RegexList
*current
= first
;
61 debugs(28, 3, "aclRegexData::match: looking for '" << current
->pattern
<< "'");
63 if (regexec(¤t
->regex
, word
, 0, 0, 0) == 0) {
65 /* shift the element just found to the second position
67 prev
->next
= current
->next
;
68 current
->next
= first
->next
;
69 first
->next
= current
;
72 debugs(28, 2, "aclRegexData::match: match '" << current
->pattern
<< "' found in '" << word
<< "'");
77 current
= current
->next
;
84 ACLRegexData::dump() const
87 RegexList
*temp
= data
;
88 int flags
= REG_EXTENDED
| REG_NOSUB
;
90 while (temp
!= NULL
) {
91 if (temp
->flags
!= flags
) {
92 if ((temp
->flags
®_ICASE
) != 0) {
93 sl
.push_back(SBuf("-i"));
95 sl
.push_back(SBuf("+i"));
100 sl
.push_back(SBuf(temp
->pattern
));
108 removeUnnecessaryWildcards(char * t
)
112 if (strncmp(t
, "^.*", 3) == 0)
115 /* NOTE: an initial '.' might seem unnessary but is not;
116 * it can be a valid requirement that cannot be optimised
118 while (*t
== '.' && *(t
+1) == '*') {
123 debugs(28, DBG_IMPORTANT
, "" << cfg_filename
<< " line " << config_lineno
<< ": " << config_input_line
);
124 debugs(28, DBG_IMPORTANT
, "WARNING: regular expression '" << orig
<< "' has only wildcards and matches all strings. Using '.*' instead.");
128 debugs(28, DBG_IMPORTANT
, "" << cfg_filename
<< " line " << config_lineno
<< ": " << config_input_line
);
129 debugs(28, DBG_IMPORTANT
, "WARNING: regular expression '" << orig
<< "' has unnecessary wildcard(s). Using '" << t
<< "' instead.");
136 compileRE(RegexList
**Tail
, char * RE
, int flags
)
142 if (RE
== NULL
|| *RE
== '\0')
145 if ((errcode
= regcomp(&comp
, RE
, flags
)) != 0) {
147 regerror(errcode
, &comp
, errbuf
, sizeof errbuf
);
148 debugs(28, DBG_CRITICAL
, "" << cfg_filename
<< " line " << config_lineno
<< ": " << config_input_line
);
149 debugs(28, DBG_CRITICAL
, "ERROR: invalid regular expression: '" << RE
<< "': " << errbuf
);
152 debugs(28, 2, "compileRE: compiled '" << RE
<< "' with flags " << flags
);
154 q
= (RegexList
*) memAllocate(MEM_RELIST
);
155 q
->pattern
= xstrdup(RE
);
164 /** Compose and compile one large RE from a set of (small) REs.
165 * The ultimate goal is to have only one RE per ACL so that regexec() is
166 * called only once per ACL.
169 compileOptimisedREs(RegexList
**curlist
, wordlist
* wl
)
173 RegexList
**newlistp
;
175 int flags
= REG_EXTENDED
| REG_NOSUB
;
176 int largeREindex
= 0;
177 char largeRE
[BUFSIZ
];
186 RElen
= strlen( wl
->key
);
188 if (strcmp(wl
->key
, "-i") == 0) {
189 if (flags
& REG_ICASE
) {
190 /* optimisation of -i ... -i */
191 debugs(28, 2, "compileOptimisedREs: optimisation of -i ... -i" );
193 debugs(28, 2, "compileOptimisedREs: -i" );
194 newlistp
= compileRE( newlistp
, largeRE
, flags
);
195 if (newlistp
== NULL
) {
196 aclDestroyRegexList( newlist
);
200 largeRE
[largeREindex
=0] = '\0';
202 } else if (strcmp(wl
->key
, "+i") == 0) {
203 if ((flags
& REG_ICASE
) == 0) {
204 /* optimisation of +i ... +i */
205 debugs(28, 2, "compileOptimisedREs: optimisation of +i ... +i");
207 debugs(28, 2, "compileOptimisedREs: +i");
208 newlistp
= compileRE( newlistp
, largeRE
, flags
);
209 if (newlistp
== NULL
) {
210 aclDestroyRegexList( newlist
);
214 largeRE
[largeREindex
=0] = '\0';
216 } else if (RElen
+ largeREindex
+ 3 < BUFSIZ
-1) {
217 debugs(28, 2, "compileOptimisedREs: adding RE '" << wl
->key
<< "'");
218 if (largeREindex
> 0) {
219 largeRE
[largeREindex
] = '|';
222 largeRE
[largeREindex
] = '(';
224 for (char * t
= wl
->key
; *t
!= '\0'; ++t
) {
225 largeRE
[largeREindex
] = *t
;
228 largeRE
[largeREindex
] = ')';
230 largeRE
[largeREindex
] = '\0';
233 debugs(28, 2, "compileOptimisedREs: buffer full, generating new optimised RE..." );
234 newlistp
= compileRE( newlistp
, largeRE
, flags
);
235 if (newlistp
== NULL
) {
236 aclDestroyRegexList( newlist
);
239 largeRE
[largeREindex
=0] = '\0';
240 continue; /* do the loop again to add the RE to largeRE */
245 newlistp
= compileRE( newlistp
, largeRE
, flags
);
246 if (newlistp
== NULL
) {
247 aclDestroyRegexList( newlist
);
251 /* all was successful, so put the new list at the tail */
252 if (*curlist
== NULL
) {
255 for (Tail
= curlist
; *Tail
!= NULL
; Tail
= &((*Tail
)->next
))
260 debugs(28, 2, "compileOptimisedREs: " << numREs
<< " REs are optimised into one RE.");
262 debugs(28, (opt_parse_cfg_only
?DBG_IMPORTANT
:2), "" << cfg_filename
<< " line " << config_lineno
<< ": " << config_input_line
);
263 debugs(28, (opt_parse_cfg_only
?DBG_IMPORTANT
:2), "WARNING: there are more than 100 regular expressions. " <<
264 "Consider using less REs or use rules without expressions like 'dstdomain'.");
271 compileUnoptimisedREs(RegexList
**curlist
, wordlist
* wl
)
275 int flags
= REG_EXTENDED
| REG_NOSUB
;
277 for (Tail
= curlist
; *Tail
!= NULL
; Tail
= &((*Tail
)->next
))
281 if (strcmp(wl
->key
, "-i") == 0) {
283 } else if (strcmp(wl
->key
, "+i") == 0) {
286 newTail
= compileRE( Tail
, wl
->key
, flags
);
288 debugs(28, DBG_CRITICAL
, "ERROR: Skipping regular expression. Compile failed: '" << wl
->key
<< "'");
297 aclParseRegexList(RegexList
**curlist
)
302 debugs(28, 2, HERE
<< "aclParseRegexList: new Regex line or file");
304 while ((t
= ConfigParser::RegexStrtokFile()) != NULL
) {
305 const char *clean
= removeUnnecessaryWildcards(t
);
306 if (strlen(clean
) > BUFSIZ
-1) {
307 debugs(28, DBG_CRITICAL
, "" << cfg_filename
<< " line " << config_lineno
<< ": " << config_input_line
);
308 debugs(28, DBG_CRITICAL
, "ERROR: Skipping regular expression. Larger than " << BUFSIZ
-1 << " characters: '" << clean
<< "'");
310 debugs(28, 3, "aclParseRegexList: buffering RE '" << clean
<< "'");
311 wordlistAdd(&wl
, clean
);
315 if (!compileOptimisedREs(curlist
, wl
)) {
316 debugs(28, DBG_IMPORTANT
, "WARNING: optimisation of regular expressions failed; using fallback method without optimisation");
317 compileUnoptimisedREs(curlist
, wl
);
320 wordlistDestroy(&wl
);
324 ACLRegexData::parse()
326 aclParseRegexList(&data
);
330 ACLRegexData::empty() const
335 ACLData
<char const *> *
336 ACLRegexData::clone() const
338 /* Regex's don't clone yet. */
340 return new ACLRegexData
;