]> git.ipfire.org Git - thirdparty/squid.git/blob - src/acl/RegexData.cc
Merged from trunk
[thirdparty/squid.git] / src / acl / RegexData.cc
1 /*
2 * DEBUG: section 28 Access Control
3 * AUTHOR: Duane Wessels
4 * AUTHOR: Marcus Kool
5 *
6 * SQUID Web Proxy Cache http://www.squid-cache.org/
7 * ----------------------------------------------------------
8 *
9 * Squid is the result of efforts by numerous individuals from
10 * the Internet community; see the CONTRIBUTORS file for full
11 * details. Many organizations have provided support for Squid's
12 * development; see the SPONSORS file for full details. Squid is
13 * Copyrighted (C) 2001 by the Regents of the University of
14 * California; see the COPYRIGHT file for full details. Squid
15 * incorporates software developed and/or copyrighted by other
16 * sources; see the CREDITS file for full details.
17 *
18 * This program is free software; you can redistribute it and/or modify
19 * it under the terms of the GNU General Public License as published by
20 * the Free Software Foundation; either version 2 of the License, or
21 * (at your option) any later version.
22 *
23 * This program is distributed in the hope that it will be useful,
24 * but WITHOUT ANY WARRANTY; without even the implied warranty of
25 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
26 * GNU General Public License for more details.
27 *
28 * You should have received a copy of the GNU General Public License
29 * along with this program; if not, write to the Free Software
30 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
31 *
32 *
33 * Copyright (c) 2003, Robert Collins <robertc@squid-cache.org>
34 * Copyright (c) 2011, Marcus Kool
35 */
36
37 #include "squid.h"
38 #include "acl/RegexData.h"
39 #include "acl/Checklist.h"
40 #include "acl/Acl.h"
41 #include "ConfigParser.h"
42 #include "Debug.h"
43 #include "Mem.h"
44 #include "wordlist.h"
45
46 static void
47 aclDestroyRegexList(relist * data)
48 {
49 relist *next = NULL;
50
51 for (; data; data = next) {
52 next = data->next;
53 regfree(&data->regex);
54 safe_free(data->pattern);
55 memFree(data, MEM_RELIST);
56 }
57 }
58
59 ACLRegexData::~ACLRegexData()
60 {
61 aclDestroyRegexList(data);
62 }
63
64 bool
65 ACLRegexData::match(char const *word)
66 {
67 if (word == NULL)
68 return 0;
69
70 debugs(28, 3, "aclRegexData::match: checking '" << word << "'");
71
72 relist *first, *prev;
73
74 first = data;
75
76 prev = NULL;
77
78 relist *current = first;
79
80 while (current) {
81 debugs(28, 3, "aclRegexData::match: looking for '" << current->pattern << "'");
82
83 if (regexec(&current->regex, word, 0, 0, 0) == 0) {
84 if (prev != NULL) {
85 /* shift the element just found to the second position
86 * in the list */
87 prev->next = current->next;
88 current->next = first->next;
89 first->next = current;
90 }
91
92 debugs(28, 2, "aclRegexData::match: match '" << current->pattern << "' found in '" << word << "'");
93 return 1;
94 }
95
96 prev = current;
97 current = current->next;
98 }
99
100 return 0;
101 }
102
103 wordlist *
104 ACLRegexData::dump()
105 {
106 wordlist *W = NULL;
107 relist *temp = data;
108 int flags = REG_EXTENDED | REG_NOSUB;
109
110 while (temp != NULL) {
111 if (temp->flags != flags) {
112 if ((temp->flags&REG_ICASE) != 0) {
113 wordlistAdd(&W, "-i");
114 } else {
115 wordlistAdd(&W, "+i");
116 }
117 flags = temp->flags;
118 }
119
120 wordlistAdd(&W, temp->pattern);
121 temp = temp->next;
122 }
123
124 return W;
125 }
126
127 static const char *
128 removeUnnecessaryWildcards(char * t)
129 {
130 char * orig = t;
131
132 if (strncmp(t, "^.*", 3) == 0)
133 t += 3;
134
135 /* NOTE: an initial '.' might seem unnessary but is not;
136 * it can be a valid requirement that cannot be optimised
137 */
138 while (*t == '.' && *(t+1) == '*') {
139 t += 2;
140 }
141
142 if (*t == '\0') {
143 debugs(28, DBG_IMPORTANT, "" << cfg_filename << " line " << config_lineno << ": " << config_input_line);
144 debugs(28, DBG_IMPORTANT, "WARNING: regular expression '" << orig << "' has only wildcards and matches all strings. Using '.*' instead.");
145 return ".*";
146 }
147 if (t != orig) {
148 debugs(28, DBG_IMPORTANT, "" << cfg_filename << " line " << config_lineno << ": " << config_input_line);
149 debugs(28, DBG_IMPORTANT, "WARNING: regular expression '" << orig << "' has unnecessary wildcard(s). Using '" << t << "' instead.");
150 }
151
152 return t;
153 }
154
155 static relist **
156 compileRE(relist **Tail, char * RE, int flags)
157 {
158 int errcode;
159 relist *q;
160 regex_t comp;
161
162 if (RE == NULL || *RE == '\0')
163 return Tail;
164
165 if ((errcode = regcomp(&comp, RE, flags)) != 0) {
166 char errbuf[256];
167 regerror(errcode, &comp, errbuf, sizeof errbuf);
168 debugs(28, DBG_CRITICAL, "" << cfg_filename << " line " << config_lineno << ": " << config_input_line);
169 debugs(28, DBG_CRITICAL, "ERROR: invalid regular expression: '" << RE << "': " << errbuf);
170 return NULL;
171 }
172 debugs(28, 2, "compileRE: compiled '" << RE << "' with flags " << flags );
173
174 q = (relist *) memAllocate(MEM_RELIST);
175 q->pattern = xstrdup(RE);
176 q->regex = comp;
177 q->flags = flags;
178 *(Tail) = q;
179 Tail = &q->next;
180
181 return Tail;
182 }
183
184 /** Compose and compile one large RE from a set of (small) REs.
185 * The ultimate goal is to have only one RE per ACL so that regexec() is
186 * called only once per ACL.
187 */
188 static int
189 compileOptimisedREs(relist **curlist, wordlist * wl)
190 {
191 relist **Tail;
192 relist *newlist;
193 relist **newlistp;
194 int numREs = 0;
195 int flags = REG_EXTENDED | REG_NOSUB;
196 int largeREindex = 0;
197 char largeRE[BUFSIZ];
198
199 newlist = NULL;
200 newlistp = &newlist;
201
202 largeRE[0] = '\0';
203
204 while (wl != NULL) {
205 int RElen;
206 RElen = strlen( wl->key );
207
208 if (strcmp(wl->key, "-i") == 0) {
209 if (flags & REG_ICASE) {
210 /* optimisation of -i ... -i */
211 debugs(28, 2, "compileOptimisedREs: optimisation of -i ... -i" );
212 } else {
213 debugs(28, 2, "compileOptimisedREs: -i" );
214 newlistp = compileRE( newlistp, largeRE, flags );
215 if (newlistp == NULL) {
216 aclDestroyRegexList( newlist );
217 return 0;
218 }
219 flags |= REG_ICASE;
220 largeRE[largeREindex=0] = '\0';
221 }
222 } else if (strcmp(wl->key, "+i") == 0) {
223 if ((flags & REG_ICASE) == 0) {
224 /* optimisation of +i ... +i */
225 debugs(28, 2, "compileOptimisedREs: optimisation of +i ... +i");
226 } else {
227 debugs(28, 2, "compileOptimisedREs: +i");
228 newlistp = compileRE( newlistp, largeRE, flags );
229 if (newlistp == NULL) {
230 aclDestroyRegexList( newlist );
231 return 0;
232 }
233 flags &= ~REG_ICASE;
234 largeRE[largeREindex=0] = '\0';
235 }
236 } else if (RElen + largeREindex + 3 < BUFSIZ-1) {
237 debugs(28, 2, "compileOptimisedREs: adding RE '" << wl->key << "'");
238 if (largeREindex > 0) {
239 largeRE[largeREindex] = '|';
240 ++largeREindex;
241 }
242 largeRE[largeREindex] = '(';
243 ++largeREindex;
244 for (char * t = wl->key; *t != '\0'; ++t) {
245 largeRE[largeREindex] = *t;
246 ++largeREindex;
247 }
248 largeRE[largeREindex] = ')';
249 ++largeREindex;
250 largeRE[largeREindex] = '\0';
251 ++numREs;
252 } else {
253 debugs(28, 2, "compileOptimisedREs: buffer full, generating new optimised RE..." );
254 newlistp = compileRE( newlistp, largeRE, flags );
255 if (newlistp == NULL) {
256 aclDestroyRegexList( newlist );
257 return 0;
258 }
259 largeRE[largeREindex=0] = '\0';
260 continue; /* do the loop again to add the RE to largeRE */
261 }
262 wl = wl->next;
263 }
264
265 newlistp = compileRE( newlistp, largeRE, flags );
266 if (newlistp == NULL) {
267 aclDestroyRegexList( newlist );
268 return 0;
269 }
270
271 /* all was successful, so put the new list at the tail */
272 if (*curlist == NULL) {
273 *curlist = newlist;
274 } else {
275 for (Tail = curlist; *Tail != NULL; Tail = &((*Tail)->next))
276 ;
277 (*Tail) = newlist;
278 }
279
280 debugs(28, 2, "compileOptimisedREs: " << numREs << " REs are optimised into one RE.");
281 if (numREs > 100) {
282 debugs(28, (opt_parse_cfg_only?DBG_IMPORTANT:2), "" << cfg_filename << " line " << config_lineno << ": " << config_input_line);
283 debugs(28, (opt_parse_cfg_only?DBG_IMPORTANT:2), "WARNING: there are more than 100 regular expressions. " <<
284 "Consider using less REs or use rules without expressions like 'dstdomain'.");
285 }
286
287 return 1;
288 }
289
290 static void
291 compileUnoptimisedREs(relist **curlist, wordlist * wl)
292 {
293 relist **Tail;
294 relist **newTail;
295 int flags = REG_EXTENDED | REG_NOSUB;
296
297 for (Tail = curlist; *Tail != NULL; Tail = &((*Tail)->next))
298 ;
299
300 while (wl != NULL) {
301 if (strcmp(wl->key, "-i") == 0) {
302 flags |= REG_ICASE;
303 } else if (strcmp(wl->key, "+i") == 0) {
304 flags &= ~REG_ICASE;
305 } else {
306 newTail = compileRE( Tail, wl->key , flags );
307 if (newTail == NULL)
308 debugs(28, DBG_CRITICAL, "ERROR: Skipping regular expression. Compile failed: '" << wl->key << "'");
309 else
310 Tail = newTail;
311 }
312 wl = wl->next;
313 }
314 }
315
316 static void
317 aclParseRegexList(relist **curlist)
318 {
319 char *t;
320 wordlist *wl = NULL;
321
322 debugs(28, 2, HERE << "aclParseRegexList: new Regex line or file");
323
324 while ((t = ConfigParser::strtokFile()) != NULL) {
325 const char *clean = removeUnnecessaryWildcards(t);
326 if (strlen(clean) > BUFSIZ-1) {
327 debugs(28, DBG_CRITICAL, "" << cfg_filename << " line " << config_lineno << ": " << config_input_line);
328 debugs(28, DBG_CRITICAL, "ERROR: Skipping regular expression. Larger than " << BUFSIZ-1 << " characters: '" << clean << "'");
329 } else {
330 debugs(28, 3, "aclParseRegexList: buffering RE '" << clean << "'");
331 wordlistAdd(&wl, clean);
332 }
333 }
334
335 if (!compileOptimisedREs(curlist, wl)) {
336 debugs(28, DBG_IMPORTANT, "WARNING: optimisation of regular expressions failed; using fallback method without optimisation");
337 compileUnoptimisedREs(curlist, wl);
338 }
339
340 wordlistDestroy(&wl);
341 }
342
343 void
344 ACLRegexData::parse()
345 {
346 aclParseRegexList(&data);
347 }
348
349 bool
350 ACLRegexData::empty() const
351 {
352 return data == NULL;
353 }
354
355 ACLData<char const *> *
356 ACLRegexData::clone() const
357 {
358 /* Regex's don't clone yet. */
359 assert (!data);
360 return new ACLRegexData;
361 }