]>
git.ipfire.org Git - thirdparty/squid.git/blob - src/acl/RegexData.cc
2 * Copyright (C) 1996-2014 The Squid Software Foundation and contributors
4 * Squid software is distributed under GPLv2+ license and includes
5 * contributions from numerous individuals and organizations.
6 * Please see the COPYING and CONTRIBUTORS files for details.
10 * Portions of this code are copyrighted and released under GPLv2+ by:
11 * Copyright (c) 2011, Marcus Kool
12 * Please add new claims to the CONTRIBUTORS file instead.
15 /* DEBUG: section 28 Access Control */
19 #include "acl/Checklist.h"
20 #include "acl/RegexData.h"
21 #include "ConfigParser.h"
24 #include "RegexList.h"
28 aclDestroyRegexList(RegexList
* data
)
30 RegexList
*next
= NULL
;
32 for (; data
; data
= next
) {
34 regfree(&data
->regex
);
35 safe_free(data
->pattern
);
36 memFree(data
, MEM_RELIST
);
40 ACLRegexData::~ACLRegexData()
42 aclDestroyRegexList(data
);
46 ACLRegexData::match(char const *word
)
51 debugs(28, 3, "aclRegexData::match: checking '" << word
<< "'");
53 RegexList
*first
, *prev
;
59 RegexList
*current
= first
;
62 debugs(28, 3, "aclRegexData::match: looking for '" << current
->pattern
<< "'");
64 if (regexec(¤t
->regex
, word
, 0, 0, 0) == 0) {
66 /* shift the element just found to the second position
68 prev
->next
= current
->next
;
69 current
->next
= first
->next
;
70 first
->next
= current
;
73 debugs(28, 2, "aclRegexData::match: match '" << current
->pattern
<< "' found in '" << word
<< "'");
78 current
= current
->next
;
85 ACLRegexData::dump() const
88 RegexList
*temp
= data
;
89 int flags
= REG_EXTENDED
| REG_NOSUB
;
91 while (temp
!= NULL
) {
92 if (temp
->flags
!= flags
) {
93 if ((temp
->flags
®_ICASE
) != 0) {
94 sl
.push_back(SBuf("-i"));
96 sl
.push_back(SBuf("+i"));
101 sl
.push_back(SBuf(temp
->pattern
));
109 removeUnnecessaryWildcards(char * t
)
113 if (strncmp(t
, "^.*", 3) == 0)
116 /* NOTE: an initial '.' might seem unnessary but is not;
117 * it can be a valid requirement that cannot be optimised
119 while (*t
== '.' && *(t
+1) == '*') {
124 debugs(28, DBG_IMPORTANT
, "" << cfg_filename
<< " line " << config_lineno
<< ": " << config_input_line
);
125 debugs(28, DBG_IMPORTANT
, "WARNING: regular expression '" << orig
<< "' has only wildcards and matches all strings. Using '.*' instead.");
129 debugs(28, DBG_IMPORTANT
, "" << cfg_filename
<< " line " << config_lineno
<< ": " << config_input_line
);
130 debugs(28, DBG_IMPORTANT
, "WARNING: regular expression '" << orig
<< "' has unnecessary wildcard(s). Using '" << t
<< "' instead.");
137 compileRE(RegexList
**Tail
, char * RE
, int flags
)
143 if (RE
== NULL
|| *RE
== '\0')
146 if ((errcode
= regcomp(&comp
, RE
, flags
)) != 0) {
148 regerror(errcode
, &comp
, errbuf
, sizeof errbuf
);
149 debugs(28, DBG_CRITICAL
, "" << cfg_filename
<< " line " << config_lineno
<< ": " << config_input_line
);
150 debugs(28, DBG_CRITICAL
, "ERROR: invalid regular expression: '" << RE
<< "': " << errbuf
);
153 debugs(28, 2, "compileRE: compiled '" << RE
<< "' with flags " << flags
);
155 q
= (RegexList
*) memAllocate(MEM_RELIST
);
156 q
->pattern
= xstrdup(RE
);
165 /** Compose and compile one large RE from a set of (small) REs.
166 * The ultimate goal is to have only one RE per ACL so that regexec() is
167 * called only once per ACL.
170 compileOptimisedREs(RegexList
**curlist
, wordlist
* wl
)
174 RegexList
**newlistp
;
176 int flags
= REG_EXTENDED
| REG_NOSUB
;
177 int largeREindex
= 0;
178 char largeRE
[BUFSIZ
];
187 RElen
= strlen( wl
->key
);
189 if (strcmp(wl
->key
, "-i") == 0) {
190 if (flags
& REG_ICASE
) {
191 /* optimisation of -i ... -i */
192 debugs(28, 2, "compileOptimisedREs: optimisation of -i ... -i" );
194 debugs(28, 2, "compileOptimisedREs: -i" );
195 newlistp
= compileRE( newlistp
, largeRE
, flags
);
196 if (newlistp
== NULL
) {
197 aclDestroyRegexList( newlist
);
201 largeRE
[largeREindex
=0] = '\0';
203 } else if (strcmp(wl
->key
, "+i") == 0) {
204 if ((flags
& REG_ICASE
) == 0) {
205 /* optimisation of +i ... +i */
206 debugs(28, 2, "compileOptimisedREs: optimisation of +i ... +i");
208 debugs(28, 2, "compileOptimisedREs: +i");
209 newlistp
= compileRE( newlistp
, largeRE
, flags
);
210 if (newlistp
== NULL
) {
211 aclDestroyRegexList( newlist
);
215 largeRE
[largeREindex
=0] = '\0';
217 } else if (RElen
+ largeREindex
+ 3 < BUFSIZ
-1) {
218 debugs(28, 2, "compileOptimisedREs: adding RE '" << wl
->key
<< "'");
219 if (largeREindex
> 0) {
220 largeRE
[largeREindex
] = '|';
223 largeRE
[largeREindex
] = '(';
225 for (char * t
= wl
->key
; *t
!= '\0'; ++t
) {
226 largeRE
[largeREindex
] = *t
;
229 largeRE
[largeREindex
] = ')';
231 largeRE
[largeREindex
] = '\0';
234 debugs(28, 2, "compileOptimisedREs: buffer full, generating new optimised RE..." );
235 newlistp
= compileRE( newlistp
, largeRE
, flags
);
236 if (newlistp
== NULL
) {
237 aclDestroyRegexList( newlist
);
240 largeRE
[largeREindex
=0] = '\0';
241 continue; /* do the loop again to add the RE to largeRE */
246 newlistp
= compileRE( newlistp
, largeRE
, flags
);
247 if (newlistp
== NULL
) {
248 aclDestroyRegexList( newlist
);
252 /* all was successful, so put the new list at the tail */
253 if (*curlist
== NULL
) {
256 for (Tail
= curlist
; *Tail
!= NULL
; Tail
= &((*Tail
)->next
))
261 debugs(28, 2, "compileOptimisedREs: " << numREs
<< " REs are optimised into one RE.");
263 debugs(28, (opt_parse_cfg_only
?DBG_IMPORTANT
:2), "" << cfg_filename
<< " line " << config_lineno
<< ": " << config_input_line
);
264 debugs(28, (opt_parse_cfg_only
?DBG_IMPORTANT
:2), "WARNING: there are more than 100 regular expressions. " <<
265 "Consider using less REs or use rules without expressions like 'dstdomain'.");
272 compileUnoptimisedREs(RegexList
**curlist
, wordlist
* wl
)
276 int flags
= REG_EXTENDED
| REG_NOSUB
;
278 for (Tail
= curlist
; *Tail
!= NULL
; Tail
= &((*Tail
)->next
))
282 if (strcmp(wl
->key
, "-i") == 0) {
284 } else if (strcmp(wl
->key
, "+i") == 0) {
287 newTail
= compileRE( Tail
, wl
->key
, flags
);
289 debugs(28, DBG_CRITICAL
, "ERROR: Skipping regular expression. Compile failed: '" << wl
->key
<< "'");
298 aclParseRegexList(RegexList
**curlist
)
303 debugs(28, 2, HERE
<< "aclParseRegexList: new Regex line or file");
305 while ((t
= ConfigParser::RegexStrtokFile()) != NULL
) {
306 const char *clean
= removeUnnecessaryWildcards(t
);
307 if (strlen(clean
) > BUFSIZ
-1) {
308 debugs(28, DBG_CRITICAL
, "" << cfg_filename
<< " line " << config_lineno
<< ": " << config_input_line
);
309 debugs(28, DBG_CRITICAL
, "ERROR: Skipping regular expression. Larger than " << BUFSIZ
-1 << " characters: '" << clean
<< "'");
311 debugs(28, 3, "aclParseRegexList: buffering RE '" << clean
<< "'");
312 wordlistAdd(&wl
, clean
);
316 if (!compileOptimisedREs(curlist
, wl
)) {
317 debugs(28, DBG_IMPORTANT
, "WARNING: optimisation of regular expressions failed; using fallback method without optimisation");
318 compileUnoptimisedREs(curlist
, wl
);
321 wordlistDestroy(&wl
);
325 ACLRegexData::parse()
327 aclParseRegexList(&data
);
331 ACLRegexData::empty() const
336 ACLData
<char const *> *
337 ACLRegexData::clone() const
339 /* Regex's don't clone yet. */
341 return new ACLRegexData
;