]>
Commit | Line | Data |
---|---|---|
1 | /* | |
2 | * Copyright (C) 1996-2017 The Squid Software Foundation and contributors | |
3 | * | |
4 | * Squid software is distributed under GPLv2+ license and includes | |
5 | * contributions from numerous individuals and organizations. | |
6 | * Please see the COPYING and CONTRIBUTORS files for details. | |
7 | */ | |
8 | ||
9 | /* | |
10 | * Portions of this code are copyrighted and released under GPLv2+ by: | |
11 | * Copyright (c) 2011, Marcus Kool | |
12 | * Please add new claims to the CONTRIBUTORS file instead. | |
13 | */ | |
14 | ||
15 | /* DEBUG: section 28 Access Control */ | |
16 | ||
17 | #include "squid.h" | |
18 | #include "acl/Acl.h" | |
19 | #include "acl/Checklist.h" | |
20 | #include "acl/RegexData.h" | |
21 | #include "base/RegexPattern.h" | |
22 | #include "ConfigParser.h" | |
23 | #include "Debug.h" | |
24 | #include "sbuf/Algorithms.h" | |
25 | #include "sbuf/List.h" | |
26 | ||
27 | ACLRegexData::~ACLRegexData() | |
28 | { | |
29 | } | |
30 | ||
31 | bool | |
32 | ACLRegexData::match(char const *word) | |
33 | { | |
34 | if (!word) | |
35 | return 0; | |
36 | ||
37 | debugs(28, 3, "checking '" << word << "'"); | |
38 | ||
39 | // walk the list of patterns to see if one matches | |
40 | for (auto &i : data) { | |
41 | if (i.match(word)) { | |
42 | debugs(28, 2, '\'' << i.c_str() << "' found in '" << word << '\''); | |
43 | // TODO: old code also popped the pattern to second place of the list | |
44 | // in order to reduce patterns search times. | |
45 | return 1; | |
46 | } | |
47 | } | |
48 | ||
49 | return 0; | |
50 | } | |
51 | ||
52 | SBufList | |
53 | ACLRegexData::dump() const | |
54 | { | |
55 | SBufList sl; | |
56 | int flags = REG_EXTENDED | REG_NOSUB; | |
57 | ||
58 | // walk and dump the list | |
59 | // keeping the flags values consistent | |
60 | for (auto &i : data) { | |
61 | if (i.flags != flags) { | |
62 | if ((i.flags®_ICASE) != 0) { | |
63 | sl.emplace_back("-i"); | |
64 | } else { | |
65 | sl.emplace_back("+i"); | |
66 | } | |
67 | flags = i.flags; | |
68 | } | |
69 | ||
70 | sl.emplace_back(i.c_str()); | |
71 | } | |
72 | ||
73 | return sl; | |
74 | } | |
75 | ||
76 | static const char * | |
77 | removeUnnecessaryWildcards(char * t) | |
78 | { | |
79 | char * orig = t; | |
80 | ||
81 | if (strncmp(t, "^.*", 3) == 0) | |
82 | t += 3; | |
83 | ||
84 | /* NOTE: an initial '.' might seem unnessary but is not; | |
85 | * it can be a valid requirement that cannot be optimised | |
86 | */ | |
87 | while (*t == '.' && *(t+1) == '*') { | |
88 | t += 2; | |
89 | } | |
90 | ||
91 | if (*t == '\0') { | |
92 | debugs(28, DBG_IMPORTANT, cfg_filename << " line " << config_lineno << ": " << config_input_line); | |
93 | debugs(28, DBG_IMPORTANT, "WARNING: regular expression '" << orig << "' has only wildcards and matches all strings. Using '.*' instead."); | |
94 | return ".*"; | |
95 | } | |
96 | if (t != orig) { | |
97 | debugs(28, DBG_IMPORTANT, cfg_filename << " line " << config_lineno << ": " << config_input_line); | |
98 | debugs(28, DBG_IMPORTANT, "WARNING: regular expression '" << orig << "' has unnecessary wildcard(s). Using '" << t << "' instead."); | |
99 | } | |
100 | ||
101 | return t; | |
102 | } | |
103 | ||
104 | static bool | |
105 | compileRE(std::list<RegexPattern> &curlist, const char * RE, int flags) | |
106 | { | |
107 | if (RE == NULL || *RE == '\0') | |
108 | return curlist.empty(); // XXX: old code did this. It looks wrong. | |
109 | ||
110 | regex_t comp; | |
111 | if (int errcode = regcomp(&comp, RE, flags)) { | |
112 | char errbuf[256]; | |
113 | regerror(errcode, &comp, errbuf, sizeof errbuf); | |
114 | debugs(28, DBG_CRITICAL, cfg_filename << " line " << config_lineno << ": " << config_input_line); | |
115 | debugs(28, DBG_CRITICAL, "ERROR: invalid regular expression: '" << RE << "': " << errbuf); | |
116 | return false; | |
117 | } | |
118 | debugs(28, 2, "compiled '" << RE << "' with flags " << flags); | |
119 | ||
120 | curlist.emplace_back(flags, RE); | |
121 | curlist.back().regex = comp; | |
122 | ||
123 | return true; | |
124 | } | |
125 | ||
126 | static bool | |
127 | compileRE(std::list<RegexPattern> &curlist, const SBufList &RE, int flags) | |
128 | { | |
129 | if (RE.empty()) | |
130 | return curlist.empty(); // XXX: old code did this. It looks wrong. | |
131 | SBuf regexp; | |
132 | static const SBuf openparen("("), closeparen(")"), separator(")|("); | |
133 | JoinContainerIntoSBuf(regexp, RE.begin(), RE.end(), separator, openparen, | |
134 | closeparen); | |
135 | return compileRE(curlist, regexp.c_str(), flags); | |
136 | } | |
137 | ||
138 | /** Compose and compile one large RE from a set of (small) REs. | |
139 | * The ultimate goal is to have only one RE per ACL so that match() is | |
140 | * called only once per ACL. | |
141 | */ | |
142 | static int | |
143 | compileOptimisedREs(std::list<RegexPattern> &curlist, const SBufList &sl) | |
144 | { | |
145 | std::list<RegexPattern> newlist; | |
146 | SBufList accumulatedRE; | |
147 | int numREs = 0, reSize = 0; | |
148 | int flags = REG_EXTENDED | REG_NOSUB; | |
149 | ||
150 | for (const SBuf & configurationLineWord : sl) { | |
151 | static const SBuf minus_i("-i"); | |
152 | static const SBuf plus_i("+i"); | |
153 | if (configurationLineWord == minus_i) { | |
154 | if (flags & REG_ICASE) { | |
155 | /* optimisation of -i ... -i */ | |
156 | debugs(28, 2, "optimisation of -i ... -i" ); | |
157 | } else { | |
158 | debugs(28, 2, "-i" ); | |
159 | if (!compileRE(newlist, accumulatedRE, flags)) | |
160 | return 0; | |
161 | flags |= REG_ICASE; | |
162 | accumulatedRE.clear(); | |
163 | reSize = 0; | |
164 | } | |
165 | continue; | |
166 | } else if (configurationLineWord == plus_i) { | |
167 | if ((flags & REG_ICASE) == 0) { | |
168 | /* optimisation of +i ... +i */ | |
169 | debugs(28, 2, "optimisation of +i ... +i"); | |
170 | } else { | |
171 | debugs(28, 2, "+i"); | |
172 | if (!compileRE(newlist, accumulatedRE, flags)) | |
173 | return 0; | |
174 | flags &= ~REG_ICASE; | |
175 | accumulatedRE.clear(); | |
176 | reSize = 0; | |
177 | } | |
178 | continue; | |
179 | } | |
180 | ||
181 | debugs(28, 2, "adding RE '" << configurationLineWord << "'"); | |
182 | accumulatedRE.push_back(configurationLineWord); | |
183 | ++numREs; | |
184 | reSize += configurationLineWord.length(); | |
185 | ||
186 | if (reSize > 1024) { // must be < BUFSIZ everything included | |
187 | debugs(28, 2, "buffer full, generating new optimised RE..." ); | |
188 | if (!compileRE(newlist, accumulatedRE, flags)) | |
189 | return 0; | |
190 | accumulatedRE.clear(); | |
191 | reSize = 0; | |
192 | continue; /* do the loop again to add the RE to largeRE */ | |
193 | } | |
194 | } | |
195 | ||
196 | if (!compileRE(newlist, accumulatedRE, flags)) | |
197 | return 0; | |
198 | ||
199 | accumulatedRE.clear(); | |
200 | reSize = 0; | |
201 | ||
202 | /* all was successful, so put the new list at the tail */ | |
203 | curlist.splice(curlist.end(), newlist); | |
204 | ||
205 | debugs(28, 2, numREs << " REs are optimised into one RE."); | |
206 | if (numREs > 100) { | |
207 | debugs(28, (opt_parse_cfg_only?DBG_IMPORTANT:2), cfg_filename << " line " << config_lineno << ": " << config_input_line); | |
208 | debugs(28, (opt_parse_cfg_only?DBG_IMPORTANT:2), "WARNING: there are more than 100 regular expressions. " << | |
209 | "Consider using less REs or use rules without expressions like 'dstdomain'."); | |
210 | } | |
211 | ||
212 | return 1; | |
213 | } | |
214 | ||
215 | static void | |
216 | compileUnoptimisedREs(std::list<RegexPattern> &curlist, const SBufList &sl) | |
217 | { | |
218 | int flags = REG_EXTENDED | REG_NOSUB; | |
219 | ||
220 | static const SBuf minus_i("-i"), plus_i("+i"); | |
221 | for (auto configurationLineWord : sl) { | |
222 | if (configurationLineWord == minus_i) { | |
223 | flags |= REG_ICASE; | |
224 | } else if (configurationLineWord == plus_i) { | |
225 | flags &= ~REG_ICASE; | |
226 | } else { | |
227 | if (!compileRE(curlist, configurationLineWord.c_str() , flags)) | |
228 | debugs(28, DBG_CRITICAL, "ERROR: Skipping regular expression. " | |
229 | "Compile failed: '" << configurationLineWord << "'"); | |
230 | } | |
231 | } | |
232 | } | |
233 | ||
234 | void | |
235 | ACLRegexData::parse() | |
236 | { | |
237 | debugs(28, 2, "new Regex line or file"); | |
238 | ||
239 | SBufList sl; | |
240 | while (char *t = ConfigParser::RegexStrtokFile()) { | |
241 | const char *clean = removeUnnecessaryWildcards(t); | |
242 | if (strlen(clean) > BUFSIZ-1) { | |
243 | debugs(28, DBG_CRITICAL, cfg_filename << " line " << config_lineno << ": " << config_input_line); | |
244 | debugs(28, DBG_CRITICAL, "ERROR: Skipping regular expression. Larger than " << BUFSIZ-1 << " characters: '" << clean << "'"); | |
245 | } else { | |
246 | debugs(28, 3, "buffering RE '" << clean << "'"); | |
247 | sl.emplace_back(clean); | |
248 | } | |
249 | } | |
250 | ||
251 | if (!compileOptimisedREs(data, sl)) { | |
252 | debugs(28, DBG_IMPORTANT, "WARNING: optimisation of regular expressions failed; using fallback method without optimisation"); | |
253 | compileUnoptimisedREs(data, sl); | |
254 | } | |
255 | } | |
256 | ||
257 | bool | |
258 | ACLRegexData::empty() const | |
259 | { | |
260 | return data.empty(); | |
261 | } | |
262 | ||
263 | ACLData<char const *> * | |
264 | ACLRegexData::clone() const | |
265 | { | |
266 | /* Regex's don't clone yet. */ | |
267 | assert(data.empty()); | |
268 | return new ACLRegexData; | |
269 | } | |
270 |