]>
Commit | Line | Data |
---|---|---|
225b7b10 | 1 | /* |
4ac4a490 | 2 | * Copyright (C) 1996-2017 The Squid Software Foundation and contributors |
225b7b10 | 3 | * |
bbc27441 AJ |
4 | * Squid software is distributed under GPLv2+ license and includes |
5 | * contributions from numerous individuals and organizations. | |
6 | * Please see the COPYING and CONTRIBUTORS files for details. | |
7 | */ | |
8 | ||
9 | /* | |
10 | * Portions of this code are copyrighted and released under GPLv2+ by: | |
6564daea | 11 | * Copyright (c) 2011, Marcus Kool |
bbc27441 | 12 | * Please add new claims to the CONTRIBUTORS file instead. |
225b7b10 | 13 | */ |
14 | ||
bbc27441 AJ |
15 | /* DEBUG: section 28 Access Control */ |
16 | ||
582c2af2 | 17 | #include "squid.h" |
3ad63615 | 18 | #include "acl/Acl.h" |
602d9612 A |
19 | #include "acl/Checklist.h" |
20 | #include "acl/RegexData.h" | |
8fcefb30 | 21 | #include "base/RegexPattern.h" |
d295d770 | 22 | #include "ConfigParser.h" |
582c2af2 | 23 | #include "Debug.h" |
d80c8446 | 24 | #include "sbuf/Algorithms.h" |
85fd48c8 | 25 | #include "sbuf/List.h" |
225b7b10 | 26 | |
225b7b10 | 27 | ACLRegexData::~ACLRegexData() |
28 | { | |
62e76326 | 29 | } |
225b7b10 | 30 | |
4eac3407 CT |
31 | const Acl::ParameterFlags & |
32 | ACLRegexData::supportedFlags() const | |
33 | { | |
34 | static const Acl::ParameterFlags flags = { "-i", "+i" }; | |
35 | return flags; | |
36 | } | |
37 | ||
225b7b10 | 38 | bool |
48071869 | 39 | ACLRegexData::match(char const *word) |
225b7b10 | 40 | { |
e2b74520 | 41 | if (!word) |
48071869 | 42 | return 0; |
43 | ||
e2b74520 | 44 | debugs(28, 3, "checking '" << word << "'"); |
48071869 | 45 | |
e2b74520 AJ |
46 | // walk the list of patterns to see if one matches |
47 | for (auto &i : data) { | |
95b8eae2 | 48 | if (i.match(word)) { |
e56933d3 | 49 | debugs(28, 2, '\'' << i.c_str() << "' found in '" << word << '\''); |
e2b74520 AJ |
50 | // TODO: old code also popped the pattern to second place of the list |
51 | // in order to reduce patterns search times. | |
48071869 | 52 | return 1; |
53 | } | |
48071869 | 54 | } |
55 | ||
56 | return 0; | |
225b7b10 | 57 | } |
58 | ||
8966008b | 59 | SBufList |
4f8ca96e | 60 | ACLRegexData::dump() const |
225b7b10 | 61 | { |
8966008b | 62 | SBufList sl; |
ae315d9c | 63 | int flags = REG_EXTENDED | REG_NOSUB; |
48071869 | 64 | |
e2b74520 AJ |
65 | // walk and dump the list |
66 | // keeping the flags values consistent | |
67 | for (auto &i : data) { | |
68 | if (i.flags != flags) { | |
69 | if ((i.flags®_ICASE) != 0) { | |
e56933d3 | 70 | sl.emplace_back("-i"); |
ae315d9c | 71 | } else { |
e56933d3 | 72 | sl.emplace_back("+i"); |
ae315d9c | 73 | } |
e2b74520 | 74 | flags = i.flags; |
ae315d9c AJ |
75 | } |
76 | ||
e56933d3 | 77 | sl.emplace_back(i.c_str()); |
48071869 | 78 | } |
79 | ||
8966008b | 80 | return sl; |
48071869 | 81 | } |
82 | ||
e022e8c6 | 83 | static const char * |
6564daea MK |
84 | removeUnnecessaryWildcards(char * t) |
85 | { | |
86 | char * orig = t; | |
87 | ||
88 | if (strncmp(t, "^.*", 3) == 0) | |
89 | t += 3; | |
90 | ||
91 | /* NOTE: an initial '.' might seem unnessary but is not; | |
92 | * it can be a valid requirement that cannot be optimised | |
93 | */ | |
94 | while (*t == '.' && *(t+1) == '*') { | |
95 | t += 2; | |
96 | } | |
97 | ||
98 | if (*t == '\0') { | |
e56933d3 | 99 | debugs(28, DBG_IMPORTANT, cfg_filename << " line " << config_lineno << ": " << config_input_line); |
6564daea MK |
100 | debugs(28, DBG_IMPORTANT, "WARNING: regular expression '" << orig << "' has only wildcards and matches all strings. Using '.*' instead."); |
101 | return ".*"; | |
102 | } | |
103 | if (t != orig) { | |
e56933d3 | 104 | debugs(28, DBG_IMPORTANT, cfg_filename << " line " << config_lineno << ": " << config_input_line); |
6564daea MK |
105 | debugs(28, DBG_IMPORTANT, "WARNING: regular expression '" << orig << "' has unnecessary wildcard(s). Using '" << t << "' instead."); |
106 | } | |
107 | ||
108 | return t; | |
109 | } | |
110 | ||
e2b74520 | 111 | static bool |
c98b6afe | 112 | compileRE(std::list<RegexPattern> &curlist, const char * RE, int flags) |
48071869 | 113 | { |
e2b74520 AJ |
114 | if (RE == NULL || *RE == '\0') |
115 | return curlist.empty(); // XXX: old code did this. It looks wrong. | |
6564daea | 116 | |
e2b74520 AJ |
117 | regex_t comp; |
118 | if (int errcode = regcomp(&comp, RE, flags)) { | |
6564daea MK |
119 | char errbuf[256]; |
120 | regerror(errcode, &comp, errbuf, sizeof errbuf); | |
e56933d3 | 121 | debugs(28, DBG_CRITICAL, cfg_filename << " line " << config_lineno << ": " << config_input_line); |
6564daea | 122 | debugs(28, DBG_CRITICAL, "ERROR: invalid regular expression: '" << RE << "': " << errbuf); |
e2b74520 | 123 | return false; |
6564daea | 124 | } |
e2b74520 | 125 | debugs(28, 2, "compiled '" << RE << "' with flags " << flags); |
6564daea | 126 | |
e2b74520 AJ |
127 | curlist.emplace_back(flags, RE); |
128 | curlist.back().regex = comp; | |
6564daea | 129 | |
e2b74520 | 130 | return true; |
6564daea MK |
131 | } |
132 | ||
d80c8446 FC |
133 | static bool |
134 | compileRE(std::list<RegexPattern> &curlist, const SBufList &RE, int flags) | |
135 | { | |
85fd48c8 SM |
136 | if (RE.empty()) |
137 | return curlist.empty(); // XXX: old code did this. It looks wrong. | |
138 | SBuf regexp; | |
139 | static const SBuf openparen("("), closeparen(")"), separator(")|("); | |
140 | JoinContainerIntoSBuf(regexp, RE.begin(), RE.end(), separator, openparen, | |
141 | closeparen); | |
142 | return compileRE(curlist, regexp.c_str(), flags); | |
d80c8446 FC |
143 | } |
144 | ||
6564daea | 145 | /** Compose and compile one large RE from a set of (small) REs. |
95b8eae2 | 146 | * The ultimate goal is to have only one RE per ACL so that match() is |
6564daea MK |
147 | * called only once per ACL. |
148 | */ | |
149 | static int | |
c98b6afe | 150 | compileOptimisedREs(std::list<RegexPattern> &curlist, const SBufList &sl) |
6564daea | 151 | { |
e2b74520 | 152 | std::list<RegexPattern> newlist; |
d80c8446 FC |
153 | SBufList accumulatedRE; |
154 | int numREs = 0, reSize = 0; | |
48071869 | 155 | int flags = REG_EXTENDED | REG_NOSUB; |
d740b987 | 156 | |
1284c24c | 157 | for (const SBuf & configurationLineWord : sl) { |
c98b6afe FC |
158 | static const SBuf minus_i("-i"); |
159 | static const SBuf plus_i("+i"); | |
1284c24c | 160 | if (configurationLineWord == minus_i) { |
6564daea MK |
161 | if (flags & REG_ICASE) { |
162 | /* optimisation of -i ... -i */ | |
e56933d3 | 163 | debugs(28, 2, "optimisation of -i ... -i" ); |
6564daea | 164 | } else { |
e56933d3 | 165 | debugs(28, 2, "-i" ); |
d80c8446 | 166 | if (!compileRE(newlist, accumulatedRE, flags)) |
6564daea | 167 | return 0; |
6564daea | 168 | flags |= REG_ICASE; |
d80c8446 FC |
169 | accumulatedRE.clear(); |
170 | reSize = 0; | |
6564daea | 171 | } |
38d17033 | 172 | continue; |
1284c24c | 173 | } else if (configurationLineWord == plus_i) { |
6564daea MK |
174 | if ((flags & REG_ICASE) == 0) { |
175 | /* optimisation of +i ... +i */ | |
e56933d3 | 176 | debugs(28, 2, "optimisation of +i ... +i"); |
6564daea | 177 | } else { |
e56933d3 | 178 | debugs(28, 2, "+i"); |
d80c8446 | 179 | if (!compileRE(newlist, accumulatedRE, flags)) |
6564daea | 180 | return 0; |
6564daea | 181 | flags &= ~REG_ICASE; |
d80c8446 FC |
182 | accumulatedRE.clear(); |
183 | reSize = 0; | |
6564daea | 184 | } |
38d17033 FC |
185 | continue; |
186 | } | |
187 | ||
188 | debugs(28, 2, "adding RE '" << configurationLineWord << "'"); | |
189 | accumulatedRE.push_back(configurationLineWord); | |
190 | ++numREs; | |
191 | reSize += configurationLineWord.length(); | |
192 | ||
193 | if (reSize > 1024) { // must be < BUFSIZ everything included | |
e56933d3 | 194 | debugs(28, 2, "buffer full, generating new optimised RE..." ); |
d80c8446 | 195 | if (!compileRE(newlist, accumulatedRE, flags)) |
6564daea | 196 | return 0; |
d80c8446 FC |
197 | accumulatedRE.clear(); |
198 | reSize = 0; | |
6564daea | 199 | continue; /* do the loop again to add the RE to largeRE */ |
48071869 | 200 | } |
6564daea MK |
201 | } |
202 | ||
d80c8446 | 203 | if (!compileRE(newlist, accumulatedRE, flags)) |
6564daea | 204 | return 0; |
6564daea | 205 | |
d80c8446 FC |
206 | accumulatedRE.clear(); |
207 | reSize = 0; | |
208 | ||
6564daea | 209 | /* all was successful, so put the new list at the tail */ |
e2b74520 | 210 | curlist.splice(curlist.end(), newlist); |
6564daea | 211 | |
e56933d3 | 212 | debugs(28, 2, numREs << " REs are optimised into one RE."); |
6564daea | 213 | if (numREs > 100) { |
e56933d3 | 214 | debugs(28, (opt_parse_cfg_only?DBG_IMPORTANT:2), cfg_filename << " line " << config_lineno << ": " << config_input_line); |
6564daea MK |
215 | debugs(28, (opt_parse_cfg_only?DBG_IMPORTANT:2), "WARNING: there are more than 100 regular expressions. " << |
216 | "Consider using less REs or use rules without expressions like 'dstdomain'."); | |
217 | } | |
218 | ||
219 | return 1; | |
220 | } | |
48071869 | 221 | |
6564daea | 222 | static void |
c98b6afe | 223 | compileUnoptimisedREs(std::list<RegexPattern> &curlist, const SBufList &sl) |
6564daea | 224 | { |
6564daea MK |
225 | int flags = REG_EXTENDED | REG_NOSUB; |
226 | ||
c98b6afe | 227 | static const SBuf minus_i("-i"), plus_i("+i"); |
1284c24c FC |
228 | for (auto configurationLineWord : sl) { |
229 | if (configurationLineWord == minus_i) { | |
6564daea | 230 | flags |= REG_ICASE; |
1284c24c | 231 | } else if (configurationLineWord == plus_i) { |
48071869 | 232 | flags &= ~REG_ICASE; |
6564daea | 233 | } else { |
1284c24c FC |
234 | if (!compileRE(curlist, configurationLineWord.c_str() , flags)) |
235 | debugs(28, DBG_CRITICAL, "ERROR: Skipping regular expression. " | |
236b4c67 | 236 | "Compile failed: '" << configurationLineWord << "'"); |
48071869 | 237 | } |
6564daea MK |
238 | } |
239 | } | |
240 | ||
e2b74520 AJ |
241 | void |
242 | ACLRegexData::parse() | |
6564daea | 243 | { |
e2b74520 | 244 | debugs(28, 2, "new Regex line or file"); |
6564daea | 245 | |
c98b6afe | 246 | SBufList sl; |
e2b74520 | 247 | while (char *t = ConfigParser::RegexStrtokFile()) { |
e022e8c6 AJ |
248 | const char *clean = removeUnnecessaryWildcards(t); |
249 | if (strlen(clean) > BUFSIZ-1) { | |
e56933d3 | 250 | debugs(28, DBG_CRITICAL, cfg_filename << " line " << config_lineno << ": " << config_input_line); |
e022e8c6 | 251 | debugs(28, DBG_CRITICAL, "ERROR: Skipping regular expression. Larger than " << BUFSIZ-1 << " characters: '" << clean << "'"); |
6564daea | 252 | } else { |
e2b74520 | 253 | debugs(28, 3, "buffering RE '" << clean << "'"); |
e56933d3 | 254 | sl.emplace_back(clean); |
48071869 | 255 | } |
6564daea | 256 | } |
48071869 | 257 | |
c98b6afe | 258 | if (!compileOptimisedREs(data, sl)) { |
6564daea | 259 | debugs(28, DBG_IMPORTANT, "WARNING: optimisation of regular expressions failed; using fallback method without optimisation"); |
c98b6afe | 260 | compileUnoptimisedREs(data, sl); |
48071869 | 261 | } |
225b7b10 | 262 | } |
263 | ||
65092baf | 264 | bool |
265 | ACLRegexData::empty() const | |
266 | { | |
e2b74520 | 267 | return data.empty(); |
65092baf | 268 | } |
225b7b10 | 269 | |
5dee515e | 270 | ACLData<char const *> * |
225b7b10 | 271 | ACLRegexData::clone() const |
272 | { | |
273 | /* Regex's don't clone yet. */ | |
e2b74520 | 274 | assert(data.empty()); |
225b7b10 | 275 | return new ACLRegexData; |
276 | } | |
f53969cc | 277 |