]> git.ipfire.org Git - thirdparty/squid.git/blob - src/acl/RegexData.cc
SourceFormat Enforcement
[thirdparty/squid.git] / src / acl / RegexData.cc
1 /*
2 * Copyright (C) 1996-2014 The Squid Software Foundation and contributors
3 *
4 * Squid software is distributed under GPLv2+ license and includes
5 * contributions from numerous individuals and organizations.
6 * Please see the COPYING and CONTRIBUTORS files for details.
7 */
8
9 /*
10 * Portions of this code are copyrighted and released under GPLv2+ by:
11 * Copyright (c) 2011, Marcus Kool
12 * Please add new claims to the CONTRIBUTORS file instead.
13 */
14
15 /* DEBUG: section 28 Access Control */
16
17 #include "squid.h"
18 #include "acl/Acl.h"
19 #include "acl/Checklist.h"
20 #include "acl/RegexData.h"
21 #include "ConfigParser.h"
22 #include "Debug.h"
23 #include "RegexList.h"
24 #include "wordlist.h"
25
26 static void
27 aclDestroyRegexList(RegexList * data)
28 {
29 RegexList *next = NULL;
30
31 for (; data; data = next) {
32 next = data->next;
33 regfree(&data->regex);
34 safe_free(data->pattern);
35 memFree(data, MEM_RELIST);
36 }
37 }
38
39 ACLRegexData::~ACLRegexData()
40 {
41 aclDestroyRegexList(data);
42 }
43
44 bool
45 ACLRegexData::match(char const *word)
46 {
47 if (word == NULL)
48 return 0;
49
50 debugs(28, 3, "aclRegexData::match: checking '" << word << "'");
51
52 RegexList *first, *prev;
53
54 first = data;
55
56 prev = NULL;
57
58 RegexList *current = first;
59
60 while (current) {
61 debugs(28, 3, "aclRegexData::match: looking for '" << current->pattern << "'");
62
63 if (regexec(&current->regex, word, 0, 0, 0) == 0) {
64 if (prev != NULL) {
65 /* shift the element just found to the second position
66 * in the list */
67 prev->next = current->next;
68 current->next = first->next;
69 first->next = current;
70 }
71
72 debugs(28, 2, "aclRegexData::match: match '" << current->pattern << "' found in '" << word << "'");
73 return 1;
74 }
75
76 prev = current;
77 current = current->next;
78 }
79
80 return 0;
81 }
82
83 SBufList
84 ACLRegexData::dump() const
85 {
86 SBufList sl;
87 RegexList *temp = data;
88 int flags = REG_EXTENDED | REG_NOSUB;
89
90 while (temp != NULL) {
91 if (temp->flags != flags) {
92 if ((temp->flags&REG_ICASE) != 0) {
93 sl.push_back(SBuf("-i"));
94 } else {
95 sl.push_back(SBuf("+i"));
96 }
97 flags = temp->flags;
98 }
99
100 sl.push_back(SBuf(temp->pattern));
101 temp = temp->next;
102 }
103
104 return sl;
105 }
106
107 static const char *
108 removeUnnecessaryWildcards(char * t)
109 {
110 char * orig = t;
111
112 if (strncmp(t, "^.*", 3) == 0)
113 t += 3;
114
115 /* NOTE: an initial '.' might seem unnessary but is not;
116 * it can be a valid requirement that cannot be optimised
117 */
118 while (*t == '.' && *(t+1) == '*') {
119 t += 2;
120 }
121
122 if (*t == '\0') {
123 debugs(28, DBG_IMPORTANT, "" << cfg_filename << " line " << config_lineno << ": " << config_input_line);
124 debugs(28, DBG_IMPORTANT, "WARNING: regular expression '" << orig << "' has only wildcards and matches all strings. Using '.*' instead.");
125 return ".*";
126 }
127 if (t != orig) {
128 debugs(28, DBG_IMPORTANT, "" << cfg_filename << " line " << config_lineno << ": " << config_input_line);
129 debugs(28, DBG_IMPORTANT, "WARNING: regular expression '" << orig << "' has unnecessary wildcard(s). Using '" << t << "' instead.");
130 }
131
132 return t;
133 }
134
135 static RegexList **
136 compileRE(RegexList **Tail, char * RE, int flags)
137 {
138 int errcode;
139 RegexList *q;
140 regex_t comp;
141
142 if (RE == NULL || *RE == '\0')
143 return Tail;
144
145 if ((errcode = regcomp(&comp, RE, flags)) != 0) {
146 char errbuf[256];
147 regerror(errcode, &comp, errbuf, sizeof errbuf);
148 debugs(28, DBG_CRITICAL, "" << cfg_filename << " line " << config_lineno << ": " << config_input_line);
149 debugs(28, DBG_CRITICAL, "ERROR: invalid regular expression: '" << RE << "': " << errbuf);
150 return NULL;
151 }
152 debugs(28, 2, "compileRE: compiled '" << RE << "' with flags " << flags );
153
154 q = (RegexList *) memAllocate(MEM_RELIST);
155 q->pattern = xstrdup(RE);
156 q->regex = comp;
157 q->flags = flags;
158 *(Tail) = q;
159 Tail = &q->next;
160
161 return Tail;
162 }
163
164 /** Compose and compile one large RE from a set of (small) REs.
165 * The ultimate goal is to have only one RE per ACL so that regexec() is
166 * called only once per ACL.
167 */
168 static int
169 compileOptimisedREs(RegexList **curlist, wordlist * wl)
170 {
171 RegexList **Tail;
172 RegexList *newlist;
173 RegexList **newlistp;
174 int numREs = 0;
175 int flags = REG_EXTENDED | REG_NOSUB;
176 int largeREindex = 0;
177 char largeRE[BUFSIZ];
178
179 newlist = NULL;
180 newlistp = &newlist;
181
182 largeRE[0] = '\0';
183
184 while (wl != NULL) {
185 int RElen;
186 RElen = strlen( wl->key );
187
188 if (strcmp(wl->key, "-i") == 0) {
189 if (flags & REG_ICASE) {
190 /* optimisation of -i ... -i */
191 debugs(28, 2, "compileOptimisedREs: optimisation of -i ... -i" );
192 } else {
193 debugs(28, 2, "compileOptimisedREs: -i" );
194 newlistp = compileRE( newlistp, largeRE, flags );
195 if (newlistp == NULL) {
196 aclDestroyRegexList( newlist );
197 return 0;
198 }
199 flags |= REG_ICASE;
200 largeRE[largeREindex=0] = '\0';
201 }
202 } else if (strcmp(wl->key, "+i") == 0) {
203 if ((flags & REG_ICASE) == 0) {
204 /* optimisation of +i ... +i */
205 debugs(28, 2, "compileOptimisedREs: optimisation of +i ... +i");
206 } else {
207 debugs(28, 2, "compileOptimisedREs: +i");
208 newlistp = compileRE( newlistp, largeRE, flags );
209 if (newlistp == NULL) {
210 aclDestroyRegexList( newlist );
211 return 0;
212 }
213 flags &= ~REG_ICASE;
214 largeRE[largeREindex=0] = '\0';
215 }
216 } else if (RElen + largeREindex + 3 < BUFSIZ-1) {
217 debugs(28, 2, "compileOptimisedREs: adding RE '" << wl->key << "'");
218 if (largeREindex > 0) {
219 largeRE[largeREindex] = '|';
220 ++largeREindex;
221 }
222 largeRE[largeREindex] = '(';
223 ++largeREindex;
224 for (char * t = wl->key; *t != '\0'; ++t) {
225 largeRE[largeREindex] = *t;
226 ++largeREindex;
227 }
228 largeRE[largeREindex] = ')';
229 ++largeREindex;
230 largeRE[largeREindex] = '\0';
231 ++numREs;
232 } else {
233 debugs(28, 2, "compileOptimisedREs: buffer full, generating new optimised RE..." );
234 newlistp = compileRE( newlistp, largeRE, flags );
235 if (newlistp == NULL) {
236 aclDestroyRegexList( newlist );
237 return 0;
238 }
239 largeRE[largeREindex=0] = '\0';
240 continue; /* do the loop again to add the RE to largeRE */
241 }
242 wl = wl->next;
243 }
244
245 newlistp = compileRE( newlistp, largeRE, flags );
246 if (newlistp == NULL) {
247 aclDestroyRegexList( newlist );
248 return 0;
249 }
250
251 /* all was successful, so put the new list at the tail */
252 if (*curlist == NULL) {
253 *curlist = newlist;
254 } else {
255 for (Tail = curlist; *Tail != NULL; Tail = &((*Tail)->next))
256 ;
257 (*Tail) = newlist;
258 }
259
260 debugs(28, 2, "compileOptimisedREs: " << numREs << " REs are optimised into one RE.");
261 if (numREs > 100) {
262 debugs(28, (opt_parse_cfg_only?DBG_IMPORTANT:2), "" << cfg_filename << " line " << config_lineno << ": " << config_input_line);
263 debugs(28, (opt_parse_cfg_only?DBG_IMPORTANT:2), "WARNING: there are more than 100 regular expressions. " <<
264 "Consider using less REs or use rules without expressions like 'dstdomain'.");
265 }
266
267 return 1;
268 }
269
270 static void
271 compileUnoptimisedREs(RegexList **curlist, wordlist * wl)
272 {
273 RegexList **Tail;
274 RegexList **newTail;
275 int flags = REG_EXTENDED | REG_NOSUB;
276
277 for (Tail = curlist; *Tail != NULL; Tail = &((*Tail)->next))
278 ;
279
280 while (wl != NULL) {
281 if (strcmp(wl->key, "-i") == 0) {
282 flags |= REG_ICASE;
283 } else if (strcmp(wl->key, "+i") == 0) {
284 flags &= ~REG_ICASE;
285 } else {
286 newTail = compileRE( Tail, wl->key , flags );
287 if (newTail == NULL)
288 debugs(28, DBG_CRITICAL, "ERROR: Skipping regular expression. Compile failed: '" << wl->key << "'");
289 else
290 Tail = newTail;
291 }
292 wl = wl->next;
293 }
294 }
295
296 static void
297 aclParseRegexList(RegexList **curlist)
298 {
299 char *t;
300 wordlist *wl = NULL;
301
302 debugs(28, 2, HERE << "aclParseRegexList: new Regex line or file");
303
304 while ((t = ConfigParser::RegexStrtokFile()) != NULL) {
305 const char *clean = removeUnnecessaryWildcards(t);
306 if (strlen(clean) > BUFSIZ-1) {
307 debugs(28, DBG_CRITICAL, "" << cfg_filename << " line " << config_lineno << ": " << config_input_line);
308 debugs(28, DBG_CRITICAL, "ERROR: Skipping regular expression. Larger than " << BUFSIZ-1 << " characters: '" << clean << "'");
309 } else {
310 debugs(28, 3, "aclParseRegexList: buffering RE '" << clean << "'");
311 wordlistAdd(&wl, clean);
312 }
313 }
314
315 if (!compileOptimisedREs(curlist, wl)) {
316 debugs(28, DBG_IMPORTANT, "WARNING: optimisation of regular expressions failed; using fallback method without optimisation");
317 compileUnoptimisedREs(curlist, wl);
318 }
319
320 wordlistDestroy(&wl);
321 }
322
323 void
324 ACLRegexData::parse()
325 {
326 aclParseRegexList(&data);
327 }
328
329 bool
330 ACLRegexData::empty() const
331 {
332 return data == NULL;
333 }
334
335 ACLData<char const *> *
336 ACLRegexData::clone() const
337 {
338 /* Regex's don't clone yet. */
339 assert (!data);
340 return new ACLRegexData;
341 }
342