]> git.ipfire.org Git - thirdparty/squid.git/blame - src/acl/RegexData.cc
SourceFormat Enforcement
[thirdparty/squid.git] / src / acl / RegexData.cc
CommitLineData
225b7b10 1/*
bde978a6 2 * Copyright (C) 1996-2015 The Squid Software Foundation and contributors
225b7b10 3 *
bbc27441
AJ
4 * Squid software is distributed under GPLv2+ license and includes
5 * contributions from numerous individuals and organizations.
6 * Please see the COPYING and CONTRIBUTORS files for details.
7 */
8
9/*
10 * Portions of this code are copyrighted and released under GPLv2+ by:
6564daea 11 * Copyright (c) 2011, Marcus Kool
bbc27441 12 * Please add new claims to the CONTRIBUTORS file instead.
225b7b10 13 */
14
bbc27441
AJ
15/* DEBUG: section 28 Access Control */
16
582c2af2 17#include "squid.h"
3ad63615 18#include "acl/Acl.h"
602d9612
A
19#include "acl/Checklist.h"
20#include "acl/RegexData.h"
d295d770 21#include "ConfigParser.h"
582c2af2 22#include "Debug.h"
3ebc8300 23#include "RegexList.h"
582c2af2 24#include "wordlist.h"
225b7b10 25
6564daea 26static void
3ebc8300 27aclDestroyRegexList(RegexList * data)
48071869 28{
3ebc8300 29 RegexList *next = NULL;
48071869 30
31 for (; data; data = next) {
32 next = data->next;
33 regfree(&data->regex);
34 safe_free(data->pattern);
35 memFree(data, MEM_RELIST);
36 }
37}
38
225b7b10 39ACLRegexData::~ACLRegexData()
40{
41 aclDestroyRegexList(data);
62e76326 42}
225b7b10 43
44bool
48071869 45ACLRegexData::match(char const *word)
225b7b10 46{
48071869 47 if (word == NULL)
48 return 0;
49
bf8fe701 50 debugs(28, 3, "aclRegexData::match: checking '" << word << "'");
48071869 51
3ebc8300 52 RegexList *first, *prev;
48071869 53
54 first = data;
55
56 prev = NULL;
57
3ebc8300 58 RegexList *current = first;
48071869 59
60 while (current) {
bf8fe701 61 debugs(28, 3, "aclRegexData::match: looking for '" << current->pattern << "'");
48071869 62
63 if (regexec(&current->regex, word, 0, 0, 0) == 0) {
64 if (prev != NULL) {
65 /* shift the element just found to the second position
66 * in the list */
67 prev->next = current->next;
68 current->next = first->next;
69 first->next = current;
70 }
71
bf8fe701 72 debugs(28, 2, "aclRegexData::match: match '" << current->pattern << "' found in '" << word << "'");
48071869 73 return 1;
74 }
75
76 prev = current;
77 current = current->next;
78 }
79
80 return 0;
225b7b10 81}
82
8966008b 83SBufList
4f8ca96e 84ACLRegexData::dump() const
225b7b10 85{
8966008b 86 SBufList sl;
3ebc8300 87 RegexList *temp = data;
ae315d9c 88 int flags = REG_EXTENDED | REG_NOSUB;
48071869 89
90 while (temp != NULL) {
ae315d9c 91 if (temp->flags != flags) {
37a8ae40 92 if ((temp->flags&REG_ICASE) != 0) {
8966008b 93 sl.push_back(SBuf("-i"));
ae315d9c 94 } else {
8966008b 95 sl.push_back(SBuf("+i"));
ae315d9c
AJ
96 }
97 flags = temp->flags;
98 }
99
8966008b 100 sl.push_back(SBuf(temp->pattern));
48071869 101 temp = temp->next;
102 }
103
8966008b 104 return sl;
48071869 105}
106
e022e8c6 107static const char *
6564daea
MK
108removeUnnecessaryWildcards(char * t)
109{
110 char * orig = t;
111
112 if (strncmp(t, "^.*", 3) == 0)
113 t += 3;
114
115 /* NOTE: an initial '.' might seem unnessary but is not;
116 * it can be a valid requirement that cannot be optimised
117 */
118 while (*t == '.' && *(t+1) == '*') {
119 t += 2;
120 }
121
122 if (*t == '\0') {
123 debugs(28, DBG_IMPORTANT, "" << cfg_filename << " line " << config_lineno << ": " << config_input_line);
124 debugs(28, DBG_IMPORTANT, "WARNING: regular expression '" << orig << "' has only wildcards and matches all strings. Using '.*' instead.");
125 return ".*";
126 }
127 if (t != orig) {
128 debugs(28, DBG_IMPORTANT, "" << cfg_filename << " line " << config_lineno << ": " << config_input_line);
129 debugs(28, DBG_IMPORTANT, "WARNING: regular expression '" << orig << "' has unnecessary wildcard(s). Using '" << t << "' instead.");
130 }
131
132 return t;
133}
134
3ebc8300
FC
135static RegexList **
136compileRE(RegexList **Tail, char * RE, int flags)
48071869 137{
48071869 138 int errcode;
3ebc8300 139 RegexList *q;
6564daea
MK
140 regex_t comp;
141
142 if (RE == NULL || *RE == '\0')
143 return Tail;
144
145 if ((errcode = regcomp(&comp, RE, flags)) != 0) {
146 char errbuf[256];
147 regerror(errcode, &comp, errbuf, sizeof errbuf);
148 debugs(28, DBG_CRITICAL, "" << cfg_filename << " line " << config_lineno << ": " << config_input_line);
149 debugs(28, DBG_CRITICAL, "ERROR: invalid regular expression: '" << RE << "': " << errbuf);
150 return NULL;
151 }
152 debugs(28, 2, "compileRE: compiled '" << RE << "' with flags " << flags );
153
3ebc8300 154 q = (RegexList *) memAllocate(MEM_RELIST);
6564daea
MK
155 q->pattern = xstrdup(RE);
156 q->regex = comp;
157 q->flags = flags;
158 *(Tail) = q;
159 Tail = &q->next;
160
161 return Tail;
162}
163
164/** Compose and compile one large RE from a set of (small) REs.
165 * The ultimate goal is to have only one RE per ACL so that regexec() is
166 * called only once per ACL.
167 */
168static int
3ebc8300 169compileOptimisedREs(RegexList **curlist, wordlist * wl)
6564daea 170{
3ebc8300
FC
171 RegexList **Tail;
172 RegexList *newlist;
173 RegexList **newlistp;
6564daea 174 int numREs = 0;
48071869 175 int flags = REG_EXTENDED | REG_NOSUB;
6564daea
MK
176 int largeREindex = 0;
177 char largeRE[BUFSIZ];
48071869 178
6564daea
MK
179 newlist = NULL;
180 newlistp = &newlist;
d740b987 181
6564daea 182 largeRE[0] = '\0';
d740b987 183
6564daea
MK
184 while (wl != NULL) {
185 int RElen;
186 RElen = strlen( wl->key );
d740b987 187
6564daea
MK
188 if (strcmp(wl->key, "-i") == 0) {
189 if (flags & REG_ICASE) {
190 /* optimisation of -i ... -i */
191 debugs(28, 2, "compileOptimisedREs: optimisation of -i ... -i" );
192 } else {
193 debugs(28, 2, "compileOptimisedREs: -i" );
194 newlistp = compileRE( newlistp, largeRE, flags );
195 if (newlistp == NULL) {
196 aclDestroyRegexList( newlist );
197 return 0;
198 }
199 flags |= REG_ICASE;
200 largeRE[largeREindex=0] = '\0';
201 }
202 } else if (strcmp(wl->key, "+i") == 0) {
203 if ((flags & REG_ICASE) == 0) {
204 /* optimisation of +i ... +i */
205 debugs(28, 2, "compileOptimisedREs: optimisation of +i ... +i");
206 } else {
207 debugs(28, 2, "compileOptimisedREs: +i");
208 newlistp = compileRE( newlistp, largeRE, flags );
209 if (newlistp == NULL) {
210 aclDestroyRegexList( newlist );
211 return 0;
212 }
213 flags &= ~REG_ICASE;
214 largeRE[largeREindex=0] = '\0';
215 }
216 } else if (RElen + largeREindex + 3 < BUFSIZ-1) {
217 debugs(28, 2, "compileOptimisedREs: adding RE '" << wl->key << "'");
f207fe64
FC
218 if (largeREindex > 0) {
219 largeRE[largeREindex] = '|';
220 ++largeREindex;
221 }
222 largeRE[largeREindex] = '(';
223 ++largeREindex;
224 for (char * t = wl->key; *t != '\0'; ++t) {
225 largeRE[largeREindex] = *t;
226 ++largeREindex;
227 }
228 largeRE[largeREindex] = ')';
229 ++largeREindex;
6564daea 230 largeRE[largeREindex] = '\0';
742a021b 231 ++numREs;
6564daea
MK
232 } else {
233 debugs(28, 2, "compileOptimisedREs: buffer full, generating new optimised RE..." );
234 newlistp = compileRE( newlistp, largeRE, flags );
235 if (newlistp == NULL) {
236 aclDestroyRegexList( newlist );
237 return 0;
238 }
239 largeRE[largeREindex=0] = '\0';
240 continue; /* do the loop again to add the RE to largeRE */
48071869 241 }
6564daea
MK
242 wl = wl->next;
243 }
244
245 newlistp = compileRE( newlistp, largeRE, flags );
246 if (newlistp == NULL) {
247 aclDestroyRegexList( newlist );
248 return 0;
249 }
250
251 /* all was successful, so put the new list at the tail */
252 if (*curlist == NULL) {
253 *curlist = newlist;
254 } else {
255 for (Tail = curlist; *Tail != NULL; Tail = &((*Tail)->next))
256 ;
257 (*Tail) = newlist;
258 }
259
260 debugs(28, 2, "compileOptimisedREs: " << numREs << " REs are optimised into one RE.");
261 if (numREs > 100) {
262 debugs(28, (opt_parse_cfg_only?DBG_IMPORTANT:2), "" << cfg_filename << " line " << config_lineno << ": " << config_input_line);
263 debugs(28, (opt_parse_cfg_only?DBG_IMPORTANT:2), "WARNING: there are more than 100 regular expressions. " <<
264 "Consider using less REs or use rules without expressions like 'dstdomain'.");
265 }
266
267 return 1;
268}
48071869 269
6564daea 270static void
3ebc8300 271compileUnoptimisedREs(RegexList **curlist, wordlist * wl)
6564daea 272{
3ebc8300
FC
273 RegexList **Tail;
274 RegexList **newTail;
6564daea
MK
275 int flags = REG_EXTENDED | REG_NOSUB;
276
277 for (Tail = curlist; *Tail != NULL; Tail = &((*Tail)->next))
278 ;
279
280 while (wl != NULL) {
6564daea
MK
281 if (strcmp(wl->key, "-i") == 0) {
282 flags |= REG_ICASE;
283 } else if (strcmp(wl->key, "+i") == 0) {
48071869 284 flags &= ~REG_ICASE;
6564daea
MK
285 } else {
286 newTail = compileRE( Tail, wl->key , flags );
287 if (newTail == NULL)
288 debugs(28, DBG_CRITICAL, "ERROR: Skipping regular expression. Compile failed: '" << wl->key << "'");
289 else
290 Tail = newTail;
48071869 291 }
6564daea
MK
292 wl = wl->next;
293 }
294}
295
296static void
3ebc8300 297aclParseRegexList(RegexList **curlist)
6564daea
MK
298{
299 char *t;
300 wordlist *wl = NULL;
48071869 301
6564daea
MK
302 debugs(28, 2, HERE << "aclParseRegexList: new Regex line or file");
303
bde7a8ce 304 while ((t = ConfigParser::RegexStrtokFile()) != NULL) {
e022e8c6
AJ
305 const char *clean = removeUnnecessaryWildcards(t);
306 if (strlen(clean) > BUFSIZ-1) {
6564daea 307 debugs(28, DBG_CRITICAL, "" << cfg_filename << " line " << config_lineno << ": " << config_input_line);
e022e8c6 308 debugs(28, DBG_CRITICAL, "ERROR: Skipping regular expression. Larger than " << BUFSIZ-1 << " characters: '" << clean << "'");
6564daea 309 } else {
e022e8c6
AJ
310 debugs(28, 3, "aclParseRegexList: buffering RE '" << clean << "'");
311 wordlistAdd(&wl, clean);
48071869 312 }
6564daea 313 }
48071869 314
6564daea
MK
315 if (!compileOptimisedREs(curlist, wl)) {
316 debugs(28, DBG_IMPORTANT, "WARNING: optimisation of regular expressions failed; using fallback method without optimisation");
317 compileUnoptimisedREs(curlist, wl);
48071869 318 }
6564daea
MK
319
320 wordlistDestroy(&wl);
225b7b10 321}
322
323void
324ACLRegexData::parse()
325{
326 aclParseRegexList(&data);
327}
328
65092baf 329bool
330ACLRegexData::empty() const
331{
332 return data == NULL;
333}
225b7b10 334
5dee515e 335ACLData<char const *> *
225b7b10 336ACLRegexData::clone() const
337{
338 /* Regex's don't clone yet. */
339 assert (!data);
340 return new ACLRegexData;
341}
f53969cc 342