]>
Commit | Line | Data |
---|---|---|
225b7b10 | 1 | /* |
bde978a6 | 2 | * Copyright (C) 1996-2015 The Squid Software Foundation and contributors |
225b7b10 | 3 | * |
bbc27441 AJ |
4 | * Squid software is distributed under GPLv2+ license and includes |
5 | * contributions from numerous individuals and organizations. | |
6 | * Please see the COPYING and CONTRIBUTORS files for details. | |
7 | */ | |
8 | ||
9 | /* | |
10 | * Portions of this code are copyrighted and released under GPLv2+ by: | |
6564daea | 11 | * Copyright (c) 2011, Marcus Kool |
bbc27441 | 12 | * Please add new claims to the CONTRIBUTORS file instead. |
225b7b10 | 13 | */ |
14 | ||
bbc27441 AJ |
15 | /* DEBUG: section 28 Access Control */ |
16 | ||
582c2af2 | 17 | #include "squid.h" |
3ad63615 | 18 | #include "acl/Acl.h" |
602d9612 A |
19 | #include "acl/Checklist.h" |
20 | #include "acl/RegexData.h" | |
d295d770 | 21 | #include "ConfigParser.h" |
582c2af2 | 22 | #include "Debug.h" |
3ebc8300 | 23 | #include "RegexList.h" |
582c2af2 | 24 | #include "wordlist.h" |
225b7b10 | 25 | |
6564daea | 26 | static void |
3ebc8300 | 27 | aclDestroyRegexList(RegexList * data) |
48071869 | 28 | { |
3ebc8300 | 29 | RegexList *next = NULL; |
48071869 | 30 | |
31 | for (; data; data = next) { | |
32 | next = data->next; | |
33 | regfree(&data->regex); | |
34 | safe_free(data->pattern); | |
35 | memFree(data, MEM_RELIST); | |
36 | } | |
37 | } | |
38 | ||
225b7b10 | 39 | ACLRegexData::~ACLRegexData() |
40 | { | |
41 | aclDestroyRegexList(data); | |
62e76326 | 42 | } |
225b7b10 | 43 | |
44 | bool | |
48071869 | 45 | ACLRegexData::match(char const *word) |
225b7b10 | 46 | { |
48071869 | 47 | if (word == NULL) |
48 | return 0; | |
49 | ||
bf8fe701 | 50 | debugs(28, 3, "aclRegexData::match: checking '" << word << "'"); |
48071869 | 51 | |
3ebc8300 | 52 | RegexList *first, *prev; |
48071869 | 53 | |
54 | first = data; | |
55 | ||
56 | prev = NULL; | |
57 | ||
3ebc8300 | 58 | RegexList *current = first; |
48071869 | 59 | |
60 | while (current) { | |
bf8fe701 | 61 | debugs(28, 3, "aclRegexData::match: looking for '" << current->pattern << "'"); |
48071869 | 62 | |
63 | if (regexec(¤t->regex, word, 0, 0, 0) == 0) { | |
64 | if (prev != NULL) { | |
65 | /* shift the element just found to the second position | |
66 | * in the list */ | |
67 | prev->next = current->next; | |
68 | current->next = first->next; | |
69 | first->next = current; | |
70 | } | |
71 | ||
bf8fe701 | 72 | debugs(28, 2, "aclRegexData::match: match '" << current->pattern << "' found in '" << word << "'"); |
48071869 | 73 | return 1; |
74 | } | |
75 | ||
76 | prev = current; | |
77 | current = current->next; | |
78 | } | |
79 | ||
80 | return 0; | |
225b7b10 | 81 | } |
82 | ||
8966008b | 83 | SBufList |
4f8ca96e | 84 | ACLRegexData::dump() const |
225b7b10 | 85 | { |
8966008b | 86 | SBufList sl; |
3ebc8300 | 87 | RegexList *temp = data; |
ae315d9c | 88 | int flags = REG_EXTENDED | REG_NOSUB; |
48071869 | 89 | |
90 | while (temp != NULL) { | |
ae315d9c | 91 | if (temp->flags != flags) { |
37a8ae40 | 92 | if ((temp->flags®_ICASE) != 0) { |
8966008b | 93 | sl.push_back(SBuf("-i")); |
ae315d9c | 94 | } else { |
8966008b | 95 | sl.push_back(SBuf("+i")); |
ae315d9c AJ |
96 | } |
97 | flags = temp->flags; | |
98 | } | |
99 | ||
8966008b | 100 | sl.push_back(SBuf(temp->pattern)); |
48071869 | 101 | temp = temp->next; |
102 | } | |
103 | ||
8966008b | 104 | return sl; |
48071869 | 105 | } |
106 | ||
e022e8c6 | 107 | static const char * |
6564daea MK |
108 | removeUnnecessaryWildcards(char * t) |
109 | { | |
110 | char * orig = t; | |
111 | ||
112 | if (strncmp(t, "^.*", 3) == 0) | |
113 | t += 3; | |
114 | ||
115 | /* NOTE: an initial '.' might seem unnessary but is not; | |
116 | * it can be a valid requirement that cannot be optimised | |
117 | */ | |
118 | while (*t == '.' && *(t+1) == '*') { | |
119 | t += 2; | |
120 | } | |
121 | ||
122 | if (*t == '\0') { | |
123 | debugs(28, DBG_IMPORTANT, "" << cfg_filename << " line " << config_lineno << ": " << config_input_line); | |
124 | debugs(28, DBG_IMPORTANT, "WARNING: regular expression '" << orig << "' has only wildcards and matches all strings. Using '.*' instead."); | |
125 | return ".*"; | |
126 | } | |
127 | if (t != orig) { | |
128 | debugs(28, DBG_IMPORTANT, "" << cfg_filename << " line " << config_lineno << ": " << config_input_line); | |
129 | debugs(28, DBG_IMPORTANT, "WARNING: regular expression '" << orig << "' has unnecessary wildcard(s). Using '" << t << "' instead."); | |
130 | } | |
131 | ||
132 | return t; | |
133 | } | |
134 | ||
3ebc8300 FC |
135 | static RegexList ** |
136 | compileRE(RegexList **Tail, char * RE, int flags) | |
48071869 | 137 | { |
48071869 | 138 | int errcode; |
3ebc8300 | 139 | RegexList *q; |
6564daea MK |
140 | regex_t comp; |
141 | ||
142 | if (RE == NULL || *RE == '\0') | |
143 | return Tail; | |
144 | ||
145 | if ((errcode = regcomp(&comp, RE, flags)) != 0) { | |
146 | char errbuf[256]; | |
147 | regerror(errcode, &comp, errbuf, sizeof errbuf); | |
148 | debugs(28, DBG_CRITICAL, "" << cfg_filename << " line " << config_lineno << ": " << config_input_line); | |
149 | debugs(28, DBG_CRITICAL, "ERROR: invalid regular expression: '" << RE << "': " << errbuf); | |
150 | return NULL; | |
151 | } | |
152 | debugs(28, 2, "compileRE: compiled '" << RE << "' with flags " << flags ); | |
153 | ||
3ebc8300 | 154 | q = (RegexList *) memAllocate(MEM_RELIST); |
6564daea MK |
155 | q->pattern = xstrdup(RE); |
156 | q->regex = comp; | |
157 | q->flags = flags; | |
158 | *(Tail) = q; | |
159 | Tail = &q->next; | |
160 | ||
161 | return Tail; | |
162 | } | |
163 | ||
164 | /** Compose and compile one large RE from a set of (small) REs. | |
165 | * The ultimate goal is to have only one RE per ACL so that regexec() is | |
166 | * called only once per ACL. | |
167 | */ | |
168 | static int | |
3ebc8300 | 169 | compileOptimisedREs(RegexList **curlist, wordlist * wl) |
6564daea | 170 | { |
3ebc8300 FC |
171 | RegexList **Tail; |
172 | RegexList *newlist; | |
173 | RegexList **newlistp; | |
6564daea | 174 | int numREs = 0; |
48071869 | 175 | int flags = REG_EXTENDED | REG_NOSUB; |
6564daea MK |
176 | int largeREindex = 0; |
177 | char largeRE[BUFSIZ]; | |
48071869 | 178 | |
6564daea MK |
179 | newlist = NULL; |
180 | newlistp = &newlist; | |
d740b987 | 181 | |
6564daea | 182 | largeRE[0] = '\0'; |
d740b987 | 183 | |
6564daea MK |
184 | while (wl != NULL) { |
185 | int RElen; | |
186 | RElen = strlen( wl->key ); | |
d740b987 | 187 | |
6564daea MK |
188 | if (strcmp(wl->key, "-i") == 0) { |
189 | if (flags & REG_ICASE) { | |
190 | /* optimisation of -i ... -i */ | |
191 | debugs(28, 2, "compileOptimisedREs: optimisation of -i ... -i" ); | |
192 | } else { | |
193 | debugs(28, 2, "compileOptimisedREs: -i" ); | |
194 | newlistp = compileRE( newlistp, largeRE, flags ); | |
195 | if (newlistp == NULL) { | |
196 | aclDestroyRegexList( newlist ); | |
197 | return 0; | |
198 | } | |
199 | flags |= REG_ICASE; | |
200 | largeRE[largeREindex=0] = '\0'; | |
201 | } | |
202 | } else if (strcmp(wl->key, "+i") == 0) { | |
203 | if ((flags & REG_ICASE) == 0) { | |
204 | /* optimisation of +i ... +i */ | |
205 | debugs(28, 2, "compileOptimisedREs: optimisation of +i ... +i"); | |
206 | } else { | |
207 | debugs(28, 2, "compileOptimisedREs: +i"); | |
208 | newlistp = compileRE( newlistp, largeRE, flags ); | |
209 | if (newlistp == NULL) { | |
210 | aclDestroyRegexList( newlist ); | |
211 | return 0; | |
212 | } | |
213 | flags &= ~REG_ICASE; | |
214 | largeRE[largeREindex=0] = '\0'; | |
215 | } | |
216 | } else if (RElen + largeREindex + 3 < BUFSIZ-1) { | |
217 | debugs(28, 2, "compileOptimisedREs: adding RE '" << wl->key << "'"); | |
f207fe64 FC |
218 | if (largeREindex > 0) { |
219 | largeRE[largeREindex] = '|'; | |
220 | ++largeREindex; | |
221 | } | |
222 | largeRE[largeREindex] = '('; | |
223 | ++largeREindex; | |
224 | for (char * t = wl->key; *t != '\0'; ++t) { | |
225 | largeRE[largeREindex] = *t; | |
226 | ++largeREindex; | |
227 | } | |
228 | largeRE[largeREindex] = ')'; | |
229 | ++largeREindex; | |
6564daea | 230 | largeRE[largeREindex] = '\0'; |
742a021b | 231 | ++numREs; |
6564daea MK |
232 | } else { |
233 | debugs(28, 2, "compileOptimisedREs: buffer full, generating new optimised RE..." ); | |
234 | newlistp = compileRE( newlistp, largeRE, flags ); | |
235 | if (newlistp == NULL) { | |
236 | aclDestroyRegexList( newlist ); | |
237 | return 0; | |
238 | } | |
239 | largeRE[largeREindex=0] = '\0'; | |
240 | continue; /* do the loop again to add the RE to largeRE */ | |
48071869 | 241 | } |
6564daea MK |
242 | wl = wl->next; |
243 | } | |
244 | ||
245 | newlistp = compileRE( newlistp, largeRE, flags ); | |
246 | if (newlistp == NULL) { | |
247 | aclDestroyRegexList( newlist ); | |
248 | return 0; | |
249 | } | |
250 | ||
251 | /* all was successful, so put the new list at the tail */ | |
252 | if (*curlist == NULL) { | |
253 | *curlist = newlist; | |
254 | } else { | |
255 | for (Tail = curlist; *Tail != NULL; Tail = &((*Tail)->next)) | |
256 | ; | |
257 | (*Tail) = newlist; | |
258 | } | |
259 | ||
260 | debugs(28, 2, "compileOptimisedREs: " << numREs << " REs are optimised into one RE."); | |
261 | if (numREs > 100) { | |
262 | debugs(28, (opt_parse_cfg_only?DBG_IMPORTANT:2), "" << cfg_filename << " line " << config_lineno << ": " << config_input_line); | |
263 | debugs(28, (opt_parse_cfg_only?DBG_IMPORTANT:2), "WARNING: there are more than 100 regular expressions. " << | |
264 | "Consider using less REs or use rules without expressions like 'dstdomain'."); | |
265 | } | |
266 | ||
267 | return 1; | |
268 | } | |
48071869 | 269 | |
6564daea | 270 | static void |
3ebc8300 | 271 | compileUnoptimisedREs(RegexList **curlist, wordlist * wl) |
6564daea | 272 | { |
3ebc8300 FC |
273 | RegexList **Tail; |
274 | RegexList **newTail; | |
6564daea MK |
275 | int flags = REG_EXTENDED | REG_NOSUB; |
276 | ||
277 | for (Tail = curlist; *Tail != NULL; Tail = &((*Tail)->next)) | |
278 | ; | |
279 | ||
280 | while (wl != NULL) { | |
6564daea MK |
281 | if (strcmp(wl->key, "-i") == 0) { |
282 | flags |= REG_ICASE; | |
283 | } else if (strcmp(wl->key, "+i") == 0) { | |
48071869 | 284 | flags &= ~REG_ICASE; |
6564daea MK |
285 | } else { |
286 | newTail = compileRE( Tail, wl->key , flags ); | |
287 | if (newTail == NULL) | |
288 | debugs(28, DBG_CRITICAL, "ERROR: Skipping regular expression. Compile failed: '" << wl->key << "'"); | |
289 | else | |
290 | Tail = newTail; | |
48071869 | 291 | } |
6564daea MK |
292 | wl = wl->next; |
293 | } | |
294 | } | |
295 | ||
296 | static void | |
3ebc8300 | 297 | aclParseRegexList(RegexList **curlist) |
6564daea MK |
298 | { |
299 | char *t; | |
300 | wordlist *wl = NULL; | |
48071869 | 301 | |
6564daea MK |
302 | debugs(28, 2, HERE << "aclParseRegexList: new Regex line or file"); |
303 | ||
bde7a8ce | 304 | while ((t = ConfigParser::RegexStrtokFile()) != NULL) { |
e022e8c6 AJ |
305 | const char *clean = removeUnnecessaryWildcards(t); |
306 | if (strlen(clean) > BUFSIZ-1) { | |
6564daea | 307 | debugs(28, DBG_CRITICAL, "" << cfg_filename << " line " << config_lineno << ": " << config_input_line); |
e022e8c6 | 308 | debugs(28, DBG_CRITICAL, "ERROR: Skipping regular expression. Larger than " << BUFSIZ-1 << " characters: '" << clean << "'"); |
6564daea | 309 | } else { |
e022e8c6 AJ |
310 | debugs(28, 3, "aclParseRegexList: buffering RE '" << clean << "'"); |
311 | wordlistAdd(&wl, clean); | |
48071869 | 312 | } |
6564daea | 313 | } |
48071869 | 314 | |
6564daea MK |
315 | if (!compileOptimisedREs(curlist, wl)) { |
316 | debugs(28, DBG_IMPORTANT, "WARNING: optimisation of regular expressions failed; using fallback method without optimisation"); | |
317 | compileUnoptimisedREs(curlist, wl); | |
48071869 | 318 | } |
6564daea MK |
319 | |
320 | wordlistDestroy(&wl); | |
225b7b10 | 321 | } |
322 | ||
323 | void | |
324 | ACLRegexData::parse() | |
325 | { | |
326 | aclParseRegexList(&data); | |
327 | } | |
328 | ||
65092baf | 329 | bool |
330 | ACLRegexData::empty() const | |
331 | { | |
332 | return data == NULL; | |
333 | } | |
225b7b10 | 334 | |
5dee515e | 335 | ACLData<char const *> * |
225b7b10 | 336 | ACLRegexData::clone() const |
337 | { | |
338 | /* Regex's don't clone yet. */ | |
339 | assert (!data); | |
340 | return new ACLRegexData; | |
341 | } | |
f53969cc | 342 |