]> git.ipfire.org Git - thirdparty/squid.git/blob - src/acl/RegexData.cc
Merged from trunk (r13356).
[thirdparty/squid.git] / src / acl / RegexData.cc
1 /*
2 * DEBUG: section 28 Access Control
3 * AUTHOR: Duane Wessels
4 * AUTHOR: Marcus Kool
5 *
6 * SQUID Web Proxy Cache http://www.squid-cache.org/
7 * ----------------------------------------------------------
8 *
9 * Squid is the result of efforts by numerous individuals from
10 * the Internet community; see the CONTRIBUTORS file for full
11 * details. Many organizations have provided support for Squid's
12 * development; see the SPONSORS file for full details. Squid is
13 * Copyrighted (C) 2001 by the Regents of the University of
14 * California; see the COPYRIGHT file for full details. Squid
15 * incorporates software developed and/or copyrighted by other
16 * sources; see the CREDITS file for full details.
17 *
18 * This program is free software; you can redistribute it and/or modify
19 * it under the terms of the GNU General Public License as published by
20 * the Free Software Foundation; either version 2 of the License, or
21 * (at your option) any later version.
22 *
23 * This program is distributed in the hope that it will be useful,
24 * but WITHOUT ANY WARRANTY; without even the implied warranty of
25 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
26 * GNU General Public License for more details.
27 *
28 * You should have received a copy of the GNU General Public License
29 * along with this program; if not, write to the Free Software
30 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
31 *
32 *
33 * Copyright (c) 2003, Robert Collins <robertc@squid-cache.org>
34 * Copyright (c) 2011, Marcus Kool
35 */
36
37 #include "squid.h"
38 #include "acl/Acl.h"
39 #include "acl/Checklist.h"
40 #include "acl/RegexData.h"
41 #include "ConfigParser.h"
42 #include "Debug.h"
43 #include "Mem.h"
44 #include "RegexList.h"
45 #include "wordlist.h"
46
47 static void
48 aclDestroyRegexList(RegexList * data)
49 {
50 RegexList *next = NULL;
51
52 for (; data; data = next) {
53 next = data->next;
54 regfree(&data->regex);
55 safe_free(data->pattern);
56 memFree(data, MEM_RELIST);
57 }
58 }
59
60 ACLRegexData::~ACLRegexData()
61 {
62 aclDestroyRegexList(data);
63 }
64
65 bool
66 ACLRegexData::match(char const *word)
67 {
68 if (word == NULL)
69 return 0;
70
71 debugs(28, 3, "aclRegexData::match: checking '" << word << "'");
72
73 RegexList *first, *prev;
74
75 first = data;
76
77 prev = NULL;
78
79 RegexList *current = first;
80
81 while (current) {
82 debugs(28, 3, "aclRegexData::match: looking for '" << current->pattern << "'");
83
84 if (regexec(&current->regex, word, 0, 0, 0) == 0) {
85 if (prev != NULL) {
86 /* shift the element just found to the second position
87 * in the list */
88 prev->next = current->next;
89 current->next = first->next;
90 first->next = current;
91 }
92
93 debugs(28, 2, "aclRegexData::match: match '" << current->pattern << "' found in '" << word << "'");
94 return 1;
95 }
96
97 prev = current;
98 current = current->next;
99 }
100
101 return 0;
102 }
103
104 SBufList
105 ACLRegexData::dump() const
106 {
107 SBufList sl;
108 RegexList *temp = data;
109 int flags = REG_EXTENDED | REG_NOSUB;
110
111 while (temp != NULL) {
112 if (temp->flags != flags) {
113 if ((temp->flags&REG_ICASE) != 0) {
114 sl.push_back(SBuf("-i"));
115 } else {
116 sl.push_back(SBuf("+i"));
117 }
118 flags = temp->flags;
119 }
120
121 sl.push_back(SBuf(temp->pattern));
122 temp = temp->next;
123 }
124
125 return sl;
126 }
127
128 static const char *
129 removeUnnecessaryWildcards(char * t)
130 {
131 char * orig = t;
132
133 if (strncmp(t, "^.*", 3) == 0)
134 t += 3;
135
136 /* NOTE: an initial '.' might seem unnessary but is not;
137 * it can be a valid requirement that cannot be optimised
138 */
139 while (*t == '.' && *(t+1) == '*') {
140 t += 2;
141 }
142
143 if (*t == '\0') {
144 debugs(28, DBG_IMPORTANT, "" << cfg_filename << " line " << config_lineno << ": " << config_input_line);
145 debugs(28, DBG_IMPORTANT, "WARNING: regular expression '" << orig << "' has only wildcards and matches all strings. Using '.*' instead.");
146 return ".*";
147 }
148 if (t != orig) {
149 debugs(28, DBG_IMPORTANT, "" << cfg_filename << " line " << config_lineno << ": " << config_input_line);
150 debugs(28, DBG_IMPORTANT, "WARNING: regular expression '" << orig << "' has unnecessary wildcard(s). Using '" << t << "' instead.");
151 }
152
153 return t;
154 }
155
156 static RegexList **
157 compileRE(RegexList **Tail, char * RE, int flags)
158 {
159 int errcode;
160 RegexList *q;
161 regex_t comp;
162
163 if (RE == NULL || *RE == '\0')
164 return Tail;
165
166 if ((errcode = regcomp(&comp, RE, flags)) != 0) {
167 char errbuf[256];
168 regerror(errcode, &comp, errbuf, sizeof errbuf);
169 debugs(28, DBG_CRITICAL, "" << cfg_filename << " line " << config_lineno << ": " << config_input_line);
170 debugs(28, DBG_CRITICAL, "ERROR: invalid regular expression: '" << RE << "': " << errbuf);
171 return NULL;
172 }
173 debugs(28, 2, "compileRE: compiled '" << RE << "' with flags " << flags );
174
175 q = (RegexList *) memAllocate(MEM_RELIST);
176 q->pattern = xstrdup(RE);
177 q->regex = comp;
178 q->flags = flags;
179 *(Tail) = q;
180 Tail = &q->next;
181
182 return Tail;
183 }
184
185 /** Compose and compile one large RE from a set of (small) REs.
186 * The ultimate goal is to have only one RE per ACL so that regexec() is
187 * called only once per ACL.
188 */
189 static int
190 compileOptimisedREs(RegexList **curlist, wordlist * wl)
191 {
192 RegexList **Tail;
193 RegexList *newlist;
194 RegexList **newlistp;
195 int numREs = 0;
196 int flags = REG_EXTENDED | REG_NOSUB;
197 int largeREindex = 0;
198 char largeRE[BUFSIZ];
199
200 newlist = NULL;
201 newlistp = &newlist;
202
203 largeRE[0] = '\0';
204
205 while (wl != NULL) {
206 int RElen;
207 RElen = strlen( wl->key );
208
209 if (strcmp(wl->key, "-i") == 0) {
210 if (flags & REG_ICASE) {
211 /* optimisation of -i ... -i */
212 debugs(28, 2, "compileOptimisedREs: optimisation of -i ... -i" );
213 } else {
214 debugs(28, 2, "compileOptimisedREs: -i" );
215 newlistp = compileRE( newlistp, largeRE, flags );
216 if (newlistp == NULL) {
217 aclDestroyRegexList( newlist );
218 return 0;
219 }
220 flags |= REG_ICASE;
221 largeRE[largeREindex=0] = '\0';
222 }
223 } else if (strcmp(wl->key, "+i") == 0) {
224 if ((flags & REG_ICASE) == 0) {
225 /* optimisation of +i ... +i */
226 debugs(28, 2, "compileOptimisedREs: optimisation of +i ... +i");
227 } else {
228 debugs(28, 2, "compileOptimisedREs: +i");
229 newlistp = compileRE( newlistp, largeRE, flags );
230 if (newlistp == NULL) {
231 aclDestroyRegexList( newlist );
232 return 0;
233 }
234 flags &= ~REG_ICASE;
235 largeRE[largeREindex=0] = '\0';
236 }
237 } else if (RElen + largeREindex + 3 < BUFSIZ-1) {
238 debugs(28, 2, "compileOptimisedREs: adding RE '" << wl->key << "'");
239 if (largeREindex > 0) {
240 largeRE[largeREindex] = '|';
241 ++largeREindex;
242 }
243 largeRE[largeREindex] = '(';
244 ++largeREindex;
245 for (char * t = wl->key; *t != '\0'; ++t) {
246 largeRE[largeREindex] = *t;
247 ++largeREindex;
248 }
249 largeRE[largeREindex] = ')';
250 ++largeREindex;
251 largeRE[largeREindex] = '\0';
252 ++numREs;
253 } else {
254 debugs(28, 2, "compileOptimisedREs: buffer full, generating new optimised RE..." );
255 newlistp = compileRE( newlistp, largeRE, flags );
256 if (newlistp == NULL) {
257 aclDestroyRegexList( newlist );
258 return 0;
259 }
260 largeRE[largeREindex=0] = '\0';
261 continue; /* do the loop again to add the RE to largeRE */
262 }
263 wl = wl->next;
264 }
265
266 newlistp = compileRE( newlistp, largeRE, flags );
267 if (newlistp == NULL) {
268 aclDestroyRegexList( newlist );
269 return 0;
270 }
271
272 /* all was successful, so put the new list at the tail */
273 if (*curlist == NULL) {
274 *curlist = newlist;
275 } else {
276 for (Tail = curlist; *Tail != NULL; Tail = &((*Tail)->next))
277 ;
278 (*Tail) = newlist;
279 }
280
281 debugs(28, 2, "compileOptimisedREs: " << numREs << " REs are optimised into one RE.");
282 if (numREs > 100) {
283 debugs(28, (opt_parse_cfg_only?DBG_IMPORTANT:2), "" << cfg_filename << " line " << config_lineno << ": " << config_input_line);
284 debugs(28, (opt_parse_cfg_only?DBG_IMPORTANT:2), "WARNING: there are more than 100 regular expressions. " <<
285 "Consider using less REs or use rules without expressions like 'dstdomain'.");
286 }
287
288 return 1;
289 }
290
291 static void
292 compileUnoptimisedREs(RegexList **curlist, wordlist * wl)
293 {
294 RegexList **Tail;
295 RegexList **newTail;
296 int flags = REG_EXTENDED | REG_NOSUB;
297
298 for (Tail = curlist; *Tail != NULL; Tail = &((*Tail)->next))
299 ;
300
301 while (wl != NULL) {
302 if (strcmp(wl->key, "-i") == 0) {
303 flags |= REG_ICASE;
304 } else if (strcmp(wl->key, "+i") == 0) {
305 flags &= ~REG_ICASE;
306 } else {
307 newTail = compileRE( Tail, wl->key , flags );
308 if (newTail == NULL)
309 debugs(28, DBG_CRITICAL, "ERROR: Skipping regular expression. Compile failed: '" << wl->key << "'");
310 else
311 Tail = newTail;
312 }
313 wl = wl->next;
314 }
315 }
316
317 static void
318 aclParseRegexList(RegexList **curlist)
319 {
320 char *t;
321 wordlist *wl = NULL;
322
323 debugs(28, 2, HERE << "aclParseRegexList: new Regex line or file");
324
325 while ((t = ConfigParser::RegexStrtokFile()) != NULL) {
326 const char *clean = removeUnnecessaryWildcards(t);
327 if (strlen(clean) > BUFSIZ-1) {
328 debugs(28, DBG_CRITICAL, "" << cfg_filename << " line " << config_lineno << ": " << config_input_line);
329 debugs(28, DBG_CRITICAL, "ERROR: Skipping regular expression. Larger than " << BUFSIZ-1 << " characters: '" << clean << "'");
330 } else {
331 debugs(28, 3, "aclParseRegexList: buffering RE '" << clean << "'");
332 wordlistAdd(&wl, clean);
333 }
334 }
335
336 if (!compileOptimisedREs(curlist, wl)) {
337 debugs(28, DBG_IMPORTANT, "WARNING: optimisation of regular expressions failed; using fallback method without optimisation");
338 compileUnoptimisedREs(curlist, wl);
339 }
340
341 wordlistDestroy(&wl);
342 }
343
344 void
345 ACLRegexData::parse()
346 {
347 aclParseRegexList(&data);
348 }
349
350 bool
351 ACLRegexData::empty() const
352 {
353 return data == NULL;
354 }
355
356 ACLData<char const *> *
357 ACLRegexData::clone() const
358 {
359 /* Regex's don't clone yet. */
360 assert (!data);
361 return new ACLRegexData;
362 }