]> git.ipfire.org Git - thirdparty/squid.git/blob - src/acl/RegexData.cc
Boilerplate: update copyright blurbs on src/
[thirdparty/squid.git] / src / acl / RegexData.cc
1 /*
2 * Copyright (C) 1996-2014 The Squid Software Foundation and contributors
3 *
4 * Squid software is distributed under GPLv2+ license and includes
5 * contributions from numerous individuals and organizations.
6 * Please see the COPYING and CONTRIBUTORS files for details.
7 */
8
9 /*
10 * Portions of this code are copyrighted and released under GPLv2+ by:
11 * Copyright (c) 2011, Marcus Kool
12 * Please add new claims to the CONTRIBUTORS file instead.
13 */
14
15 /* DEBUG: section 28 Access Control */
16
17 #include "squid.h"
18 #include "acl/Acl.h"
19 #include "acl/Checklist.h"
20 #include "acl/RegexData.h"
21 #include "ConfigParser.h"
22 #include "Debug.h"
23 #include "Mem.h"
24 #include "RegexList.h"
25 #include "wordlist.h"
26
27 static void
28 aclDestroyRegexList(RegexList * data)
29 {
30 RegexList *next = NULL;
31
32 for (; data; data = next) {
33 next = data->next;
34 regfree(&data->regex);
35 safe_free(data->pattern);
36 memFree(data, MEM_RELIST);
37 }
38 }
39
40 ACLRegexData::~ACLRegexData()
41 {
42 aclDestroyRegexList(data);
43 }
44
45 bool
46 ACLRegexData::match(char const *word)
47 {
48 if (word == NULL)
49 return 0;
50
51 debugs(28, 3, "aclRegexData::match: checking '" << word << "'");
52
53 RegexList *first, *prev;
54
55 first = data;
56
57 prev = NULL;
58
59 RegexList *current = first;
60
61 while (current) {
62 debugs(28, 3, "aclRegexData::match: looking for '" << current->pattern << "'");
63
64 if (regexec(&current->regex, word, 0, 0, 0) == 0) {
65 if (prev != NULL) {
66 /* shift the element just found to the second position
67 * in the list */
68 prev->next = current->next;
69 current->next = first->next;
70 first->next = current;
71 }
72
73 debugs(28, 2, "aclRegexData::match: match '" << current->pattern << "' found in '" << word << "'");
74 return 1;
75 }
76
77 prev = current;
78 current = current->next;
79 }
80
81 return 0;
82 }
83
84 SBufList
85 ACLRegexData::dump() const
86 {
87 SBufList sl;
88 RegexList *temp = data;
89 int flags = REG_EXTENDED | REG_NOSUB;
90
91 while (temp != NULL) {
92 if (temp->flags != flags) {
93 if ((temp->flags&REG_ICASE) != 0) {
94 sl.push_back(SBuf("-i"));
95 } else {
96 sl.push_back(SBuf("+i"));
97 }
98 flags = temp->flags;
99 }
100
101 sl.push_back(SBuf(temp->pattern));
102 temp = temp->next;
103 }
104
105 return sl;
106 }
107
108 static const char *
109 removeUnnecessaryWildcards(char * t)
110 {
111 char * orig = t;
112
113 if (strncmp(t, "^.*", 3) == 0)
114 t += 3;
115
116 /* NOTE: an initial '.' might seem unnessary but is not;
117 * it can be a valid requirement that cannot be optimised
118 */
119 while (*t == '.' && *(t+1) == '*') {
120 t += 2;
121 }
122
123 if (*t == '\0') {
124 debugs(28, DBG_IMPORTANT, "" << cfg_filename << " line " << config_lineno << ": " << config_input_line);
125 debugs(28, DBG_IMPORTANT, "WARNING: regular expression '" << orig << "' has only wildcards and matches all strings. Using '.*' instead.");
126 return ".*";
127 }
128 if (t != orig) {
129 debugs(28, DBG_IMPORTANT, "" << cfg_filename << " line " << config_lineno << ": " << config_input_line);
130 debugs(28, DBG_IMPORTANT, "WARNING: regular expression '" << orig << "' has unnecessary wildcard(s). Using '" << t << "' instead.");
131 }
132
133 return t;
134 }
135
136 static RegexList **
137 compileRE(RegexList **Tail, char * RE, int flags)
138 {
139 int errcode;
140 RegexList *q;
141 regex_t comp;
142
143 if (RE == NULL || *RE == '\0')
144 return Tail;
145
146 if ((errcode = regcomp(&comp, RE, flags)) != 0) {
147 char errbuf[256];
148 regerror(errcode, &comp, errbuf, sizeof errbuf);
149 debugs(28, DBG_CRITICAL, "" << cfg_filename << " line " << config_lineno << ": " << config_input_line);
150 debugs(28, DBG_CRITICAL, "ERROR: invalid regular expression: '" << RE << "': " << errbuf);
151 return NULL;
152 }
153 debugs(28, 2, "compileRE: compiled '" << RE << "' with flags " << flags );
154
155 q = (RegexList *) memAllocate(MEM_RELIST);
156 q->pattern = xstrdup(RE);
157 q->regex = comp;
158 q->flags = flags;
159 *(Tail) = q;
160 Tail = &q->next;
161
162 return Tail;
163 }
164
165 /** Compose and compile one large RE from a set of (small) REs.
166 * The ultimate goal is to have only one RE per ACL so that regexec() is
167 * called only once per ACL.
168 */
169 static int
170 compileOptimisedREs(RegexList **curlist, wordlist * wl)
171 {
172 RegexList **Tail;
173 RegexList *newlist;
174 RegexList **newlistp;
175 int numREs = 0;
176 int flags = REG_EXTENDED | REG_NOSUB;
177 int largeREindex = 0;
178 char largeRE[BUFSIZ];
179
180 newlist = NULL;
181 newlistp = &newlist;
182
183 largeRE[0] = '\0';
184
185 while (wl != NULL) {
186 int RElen;
187 RElen = strlen( wl->key );
188
189 if (strcmp(wl->key, "-i") == 0) {
190 if (flags & REG_ICASE) {
191 /* optimisation of -i ... -i */
192 debugs(28, 2, "compileOptimisedREs: optimisation of -i ... -i" );
193 } else {
194 debugs(28, 2, "compileOptimisedREs: -i" );
195 newlistp = compileRE( newlistp, largeRE, flags );
196 if (newlistp == NULL) {
197 aclDestroyRegexList( newlist );
198 return 0;
199 }
200 flags |= REG_ICASE;
201 largeRE[largeREindex=0] = '\0';
202 }
203 } else if (strcmp(wl->key, "+i") == 0) {
204 if ((flags & REG_ICASE) == 0) {
205 /* optimisation of +i ... +i */
206 debugs(28, 2, "compileOptimisedREs: optimisation of +i ... +i");
207 } else {
208 debugs(28, 2, "compileOptimisedREs: +i");
209 newlistp = compileRE( newlistp, largeRE, flags );
210 if (newlistp == NULL) {
211 aclDestroyRegexList( newlist );
212 return 0;
213 }
214 flags &= ~REG_ICASE;
215 largeRE[largeREindex=0] = '\0';
216 }
217 } else if (RElen + largeREindex + 3 < BUFSIZ-1) {
218 debugs(28, 2, "compileOptimisedREs: adding RE '" << wl->key << "'");
219 if (largeREindex > 0) {
220 largeRE[largeREindex] = '|';
221 ++largeREindex;
222 }
223 largeRE[largeREindex] = '(';
224 ++largeREindex;
225 for (char * t = wl->key; *t != '\0'; ++t) {
226 largeRE[largeREindex] = *t;
227 ++largeREindex;
228 }
229 largeRE[largeREindex] = ')';
230 ++largeREindex;
231 largeRE[largeREindex] = '\0';
232 ++numREs;
233 } else {
234 debugs(28, 2, "compileOptimisedREs: buffer full, generating new optimised RE..." );
235 newlistp = compileRE( newlistp, largeRE, flags );
236 if (newlistp == NULL) {
237 aclDestroyRegexList( newlist );
238 return 0;
239 }
240 largeRE[largeREindex=0] = '\0';
241 continue; /* do the loop again to add the RE to largeRE */
242 }
243 wl = wl->next;
244 }
245
246 newlistp = compileRE( newlistp, largeRE, flags );
247 if (newlistp == NULL) {
248 aclDestroyRegexList( newlist );
249 return 0;
250 }
251
252 /* all was successful, so put the new list at the tail */
253 if (*curlist == NULL) {
254 *curlist = newlist;
255 } else {
256 for (Tail = curlist; *Tail != NULL; Tail = &((*Tail)->next))
257 ;
258 (*Tail) = newlist;
259 }
260
261 debugs(28, 2, "compileOptimisedREs: " << numREs << " REs are optimised into one RE.");
262 if (numREs > 100) {
263 debugs(28, (opt_parse_cfg_only?DBG_IMPORTANT:2), "" << cfg_filename << " line " << config_lineno << ": " << config_input_line);
264 debugs(28, (opt_parse_cfg_only?DBG_IMPORTANT:2), "WARNING: there are more than 100 regular expressions. " <<
265 "Consider using less REs or use rules without expressions like 'dstdomain'.");
266 }
267
268 return 1;
269 }
270
271 static void
272 compileUnoptimisedREs(RegexList **curlist, wordlist * wl)
273 {
274 RegexList **Tail;
275 RegexList **newTail;
276 int flags = REG_EXTENDED | REG_NOSUB;
277
278 for (Tail = curlist; *Tail != NULL; Tail = &((*Tail)->next))
279 ;
280
281 while (wl != NULL) {
282 if (strcmp(wl->key, "-i") == 0) {
283 flags |= REG_ICASE;
284 } else if (strcmp(wl->key, "+i") == 0) {
285 flags &= ~REG_ICASE;
286 } else {
287 newTail = compileRE( Tail, wl->key , flags );
288 if (newTail == NULL)
289 debugs(28, DBG_CRITICAL, "ERROR: Skipping regular expression. Compile failed: '" << wl->key << "'");
290 else
291 Tail = newTail;
292 }
293 wl = wl->next;
294 }
295 }
296
297 static void
298 aclParseRegexList(RegexList **curlist)
299 {
300 char *t;
301 wordlist *wl = NULL;
302
303 debugs(28, 2, HERE << "aclParseRegexList: new Regex line or file");
304
305 while ((t = ConfigParser::RegexStrtokFile()) != NULL) {
306 const char *clean = removeUnnecessaryWildcards(t);
307 if (strlen(clean) > BUFSIZ-1) {
308 debugs(28, DBG_CRITICAL, "" << cfg_filename << " line " << config_lineno << ": " << config_input_line);
309 debugs(28, DBG_CRITICAL, "ERROR: Skipping regular expression. Larger than " << BUFSIZ-1 << " characters: '" << clean << "'");
310 } else {
311 debugs(28, 3, "aclParseRegexList: buffering RE '" << clean << "'");
312 wordlistAdd(&wl, clean);
313 }
314 }
315
316 if (!compileOptimisedREs(curlist, wl)) {
317 debugs(28, DBG_IMPORTANT, "WARNING: optimisation of regular expressions failed; using fallback method without optimisation");
318 compileUnoptimisedREs(curlist, wl);
319 }
320
321 wordlistDestroy(&wl);
322 }
323
324 void
325 ACLRegexData::parse()
326 {
327 aclParseRegexList(&data);
328 }
329
330 bool
331 ACLRegexData::empty() const
332 {
333 return data == NULL;
334 }
335
336 ACLData<char const *> *
337 ACLRegexData::clone() const
338 {
339 /* Regex's don't clone yet. */
340 assert (!data);
341 return new ACLRegexData;
342 }