]>
Commit | Line | Data |
---|---|---|
1 | /* | |
2 | * Copyright (C) 1996-2021 The Squid Software Foundation and contributors | |
3 | * | |
4 | * Squid software is distributed under GPLv2+ license and includes | |
5 | * contributions from numerous individuals and organizations. | |
6 | * Please see the COPYING and CONTRIBUTORS files for details. | |
7 | */ | |
8 | ||
9 | /* | |
10 | * Portions of this code are copyrighted and released under GPLv2+ by: | |
11 | * Copyright (c) 2011, Marcus Kool | |
12 | * Please add new claims to the CONTRIBUTORS file instead. | |
13 | */ | |
14 | ||
15 | /* DEBUG: section 28 Access Control */ | |
16 | ||
17 | #include "squid.h" | |
18 | #include "acl/Acl.h" | |
19 | #include "acl/Checklist.h" | |
20 | #include "acl/RegexData.h" | |
21 | #include "base/RegexPattern.h" | |
22 | #include "ConfigParser.h" | |
23 | #include "Debug.h" | |
24 | #include "sbuf/Algorithms.h" | |
25 | #include "sbuf/List.h" | |
26 | ||
27 | ACLRegexData::~ACLRegexData() | |
28 | { | |
29 | } | |
30 | ||
31 | const Acl::ParameterFlags & | |
32 | ACLRegexData::supportedFlags() const | |
33 | { | |
34 | static const Acl::ParameterFlags flags = { "-i", "+i" }; | |
35 | return flags; | |
36 | } | |
37 | ||
38 | bool | |
39 | ACLRegexData::match(char const *word) | |
40 | { | |
41 | if (!word) | |
42 | return 0; | |
43 | ||
44 | debugs(28, 3, "checking '" << word << "'"); | |
45 | ||
46 | // walk the list of patterns to see if one matches | |
47 | for (auto &i : data) { | |
48 | if (i.match(word)) { | |
49 | debugs(28, 2, '\'' << i.c_str() << "' found in '" << word << '\''); | |
50 | // TODO: old code also popped the pattern to second place of the list | |
51 | // in order to reduce patterns search times. | |
52 | return 1; | |
53 | } | |
54 | } | |
55 | ||
56 | return 0; | |
57 | } | |
58 | ||
59 | SBufList | |
60 | ACLRegexData::dump() const | |
61 | { | |
62 | SBufList sl; | |
63 | int flags = REG_EXTENDED | REG_NOSUB; | |
64 | ||
65 | // walk and dump the list | |
66 | // keeping the flags values consistent | |
67 | for (auto &i : data) { | |
68 | if (i.flags != flags) { | |
69 | if ((i.flags®_ICASE) != 0) { | |
70 | sl.emplace_back("-i"); | |
71 | } else { | |
72 | sl.emplace_back("+i"); | |
73 | } | |
74 | flags = i.flags; | |
75 | } | |
76 | ||
77 | sl.emplace_back(i.c_str()); | |
78 | } | |
79 | ||
80 | return sl; | |
81 | } | |
82 | ||
83 | static const char * | |
84 | removeUnnecessaryWildcards(char * t) | |
85 | { | |
86 | char * orig = t; | |
87 | ||
88 | if (strncmp(t, "^.*", 3) == 0) | |
89 | t += 3; | |
90 | ||
91 | /* NOTE: an initial '.' might seem unnessary but is not; | |
92 | * it can be a valid requirement that cannot be optimised | |
93 | */ | |
94 | while (*t == '.' && *(t+1) == '*') { | |
95 | t += 2; | |
96 | } | |
97 | ||
98 | if (*t == '\0') { | |
99 | debugs(28, DBG_IMPORTANT, cfg_filename << " line " << config_lineno << ": " << config_input_line); | |
100 | debugs(28, DBG_IMPORTANT, "WARNING: regular expression '" << orig << "' has only wildcards and matches all strings. Using '.*' instead."); | |
101 | return ".*"; | |
102 | } | |
103 | if (t != orig) { | |
104 | debugs(28, DBG_IMPORTANT, cfg_filename << " line " << config_lineno << ": " << config_input_line); | |
105 | debugs(28, DBG_IMPORTANT, "WARNING: regular expression '" << orig << "' has unnecessary wildcard(s). Using '" << t << "' instead."); | |
106 | } | |
107 | ||
108 | return t; | |
109 | } | |
110 | ||
111 | static bool | |
112 | compileRE(std::list<RegexPattern> &curlist, const char * RE, int flags) | |
113 | { | |
114 | if (RE == NULL || *RE == '\0') | |
115 | return curlist.empty(); // XXX: old code did this. It looks wrong. | |
116 | ||
117 | regex_t comp; | |
118 | if (int errcode = regcomp(&comp, RE, flags)) { | |
119 | char errbuf[256]; | |
120 | regerror(errcode, &comp, errbuf, sizeof errbuf); | |
121 | debugs(28, DBG_CRITICAL, cfg_filename << " line " << config_lineno << ": " << config_input_line); | |
122 | debugs(28, DBG_CRITICAL, "ERROR: invalid regular expression: '" << RE << "': " << errbuf); | |
123 | return false; | |
124 | } | |
125 | debugs(28, 2, "compiled '" << RE << "' with flags " << flags); | |
126 | ||
127 | curlist.emplace_back(flags, RE); | |
128 | curlist.back().regex = comp; | |
129 | ||
130 | return true; | |
131 | } | |
132 | ||
133 | static bool | |
134 | compileRE(std::list<RegexPattern> &curlist, const SBufList &RE, int flags) | |
135 | { | |
136 | if (RE.empty()) | |
137 | return curlist.empty(); // XXX: old code did this. It looks wrong. | |
138 | SBuf regexp; | |
139 | static const SBuf openparen("("), closeparen(")"), separator(")|("); | |
140 | JoinContainerIntoSBuf(regexp, RE.begin(), RE.end(), separator, openparen, | |
141 | closeparen); | |
142 | return compileRE(curlist, regexp.c_str(), flags); | |
143 | } | |
144 | ||
145 | /** Compose and compile one large RE from a set of (small) REs. | |
146 | * The ultimate goal is to have only one RE per ACL so that match() is | |
147 | * called only once per ACL. | |
148 | */ | |
149 | static int | |
150 | compileOptimisedREs(std::list<RegexPattern> &curlist, const SBufList &sl) | |
151 | { | |
152 | std::list<RegexPattern> newlist; | |
153 | SBufList accumulatedRE; | |
154 | int numREs = 0, reSize = 0; | |
155 | int flags = REG_EXTENDED | REG_NOSUB; | |
156 | ||
157 | for (const SBuf & configurationLineWord : sl) { | |
158 | static const SBuf minus_i("-i"); | |
159 | static const SBuf plus_i("+i"); | |
160 | if (configurationLineWord == minus_i) { | |
161 | if (flags & REG_ICASE) { | |
162 | /* optimisation of -i ... -i */ | |
163 | debugs(28, 2, "optimisation of -i ... -i" ); | |
164 | } else { | |
165 | debugs(28, 2, "-i" ); | |
166 | if (!compileRE(newlist, accumulatedRE, flags)) | |
167 | return 0; | |
168 | flags |= REG_ICASE; | |
169 | accumulatedRE.clear(); | |
170 | reSize = 0; | |
171 | } | |
172 | continue; | |
173 | } else if (configurationLineWord == plus_i) { | |
174 | if ((flags & REG_ICASE) == 0) { | |
175 | /* optimisation of +i ... +i */ | |
176 | debugs(28, 2, "optimisation of +i ... +i"); | |
177 | } else { | |
178 | debugs(28, 2, "+i"); | |
179 | if (!compileRE(newlist, accumulatedRE, flags)) | |
180 | return 0; | |
181 | flags &= ~REG_ICASE; | |
182 | accumulatedRE.clear(); | |
183 | reSize = 0; | |
184 | } | |
185 | continue; | |
186 | } | |
187 | ||
188 | debugs(28, 2, "adding RE '" << configurationLineWord << "'"); | |
189 | accumulatedRE.push_back(configurationLineWord); | |
190 | ++numREs; | |
191 | reSize += configurationLineWord.length(); | |
192 | ||
193 | if (reSize > 1024) { // must be < BUFSIZ everything included | |
194 | debugs(28, 2, "buffer full, generating new optimised RE..." ); | |
195 | if (!compileRE(newlist, accumulatedRE, flags)) | |
196 | return 0; | |
197 | accumulatedRE.clear(); | |
198 | reSize = 0; | |
199 | continue; /* do the loop again to add the RE to largeRE */ | |
200 | } | |
201 | } | |
202 | ||
203 | if (!compileRE(newlist, accumulatedRE, flags)) | |
204 | return 0; | |
205 | ||
206 | accumulatedRE.clear(); | |
207 | reSize = 0; | |
208 | ||
209 | /* all was successful, so put the new list at the tail */ | |
210 | curlist.splice(curlist.end(), newlist); | |
211 | ||
212 | debugs(28, 2, numREs << " REs are optimised into one RE."); | |
213 | if (numREs > 100) { | |
214 | debugs(28, (opt_parse_cfg_only?DBG_IMPORTANT:2), cfg_filename << " line " << config_lineno << ": " << config_input_line); | |
215 | debugs(28, (opt_parse_cfg_only?DBG_IMPORTANT:2), "WARNING: there are more than 100 regular expressions. " << | |
216 | "Consider using less REs or use rules without expressions like 'dstdomain'."); | |
217 | } | |
218 | ||
219 | return 1; | |
220 | } | |
221 | ||
222 | static void | |
223 | compileUnoptimisedREs(std::list<RegexPattern> &curlist, const SBufList &sl) | |
224 | { | |
225 | int flags = REG_EXTENDED | REG_NOSUB; | |
226 | ||
227 | static const SBuf minus_i("-i"), plus_i("+i"); | |
228 | for (auto configurationLineWord : sl) { | |
229 | if (configurationLineWord == minus_i) { | |
230 | flags |= REG_ICASE; | |
231 | } else if (configurationLineWord == plus_i) { | |
232 | flags &= ~REG_ICASE; | |
233 | } else { | |
234 | if (!compileRE(curlist, configurationLineWord.c_str(), flags)) | |
235 | debugs(28, DBG_CRITICAL, "ERROR: Skipping regular expression. " | |
236 | "Compile failed: '" << configurationLineWord << "'"); | |
237 | } | |
238 | } | |
239 | } | |
240 | ||
241 | void | |
242 | ACLRegexData::parse() | |
243 | { | |
244 | debugs(28, 2, "new Regex line or file"); | |
245 | ||
246 | SBufList sl; | |
247 | while (char *t = ConfigParser::RegexStrtokFile()) { | |
248 | const char *clean = removeUnnecessaryWildcards(t); | |
249 | if (strlen(clean) > BUFSIZ-1) { | |
250 | debugs(28, DBG_CRITICAL, cfg_filename << " line " << config_lineno << ": " << config_input_line); | |
251 | debugs(28, DBG_CRITICAL, "ERROR: Skipping regular expression. Larger than " << BUFSIZ-1 << " characters: '" << clean << "'"); | |
252 | } else { | |
253 | debugs(28, 3, "buffering RE '" << clean << "'"); | |
254 | sl.emplace_back(clean); | |
255 | } | |
256 | } | |
257 | ||
258 | if (!compileOptimisedREs(data, sl)) { | |
259 | debugs(28, DBG_IMPORTANT, "WARNING: optimisation of regular expressions failed; using fallback method without optimisation"); | |
260 | compileUnoptimisedREs(data, sl); | |
261 | } | |
262 | } | |
263 | ||
264 | bool | |
265 | ACLRegexData::empty() const | |
266 | { | |
267 | return data.empty(); | |
268 | } | |
269 | ||
270 | ACLData<char const *> * | |
271 | ACLRegexData::clone() const | |
272 | { | |
273 | /* Regex's don't clone yet. */ | |
274 | assert(data.empty()); | |
275 | return new ACLRegexData; | |
276 | } | |
277 |