]> git.ipfire.org Git - thirdparty/squid.git/blob - src/acl/RegexData.cc
Source Format Enforcement (#763)
[thirdparty/squid.git] / src / acl / RegexData.cc
1 /*
2 * Copyright (C) 1996-2021 The Squid Software Foundation and contributors
3 *
4 * Squid software is distributed under GPLv2+ license and includes
5 * contributions from numerous individuals and organizations.
6 * Please see the COPYING and CONTRIBUTORS files for details.
7 */
8
9 /*
10 * Portions of this code are copyrighted and released under GPLv2+ by:
11 * Copyright (c) 2011, Marcus Kool
12 * Please add new claims to the CONTRIBUTORS file instead.
13 */
14
15 /* DEBUG: section 28 Access Control */
16
17 #include "squid.h"
18 #include "acl/Acl.h"
19 #include "acl/Checklist.h"
20 #include "acl/RegexData.h"
21 #include "base/RegexPattern.h"
22 #include "ConfigParser.h"
23 #include "Debug.h"
24 #include "sbuf/Algorithms.h"
25 #include "sbuf/List.h"
26
27 ACLRegexData::~ACLRegexData()
28 {
29 }
30
31 const Acl::ParameterFlags &
32 ACLRegexData::supportedFlags() const
33 {
34 static const Acl::ParameterFlags flags = { "-i", "+i" };
35 return flags;
36 }
37
38 bool
39 ACLRegexData::match(char const *word)
40 {
41 if (!word)
42 return 0;
43
44 debugs(28, 3, "checking '" << word << "'");
45
46 // walk the list of patterns to see if one matches
47 for (auto &i : data) {
48 if (i.match(word)) {
49 debugs(28, 2, '\'' << i.c_str() << "' found in '" << word << '\'');
50 // TODO: old code also popped the pattern to second place of the list
51 // in order to reduce patterns search times.
52 return 1;
53 }
54 }
55
56 return 0;
57 }
58
59 SBufList
60 ACLRegexData::dump() const
61 {
62 SBufList sl;
63 int flags = REG_EXTENDED | REG_NOSUB;
64
65 // walk and dump the list
66 // keeping the flags values consistent
67 for (auto &i : data) {
68 if (i.flags != flags) {
69 if ((i.flags&REG_ICASE) != 0) {
70 sl.emplace_back("-i");
71 } else {
72 sl.emplace_back("+i");
73 }
74 flags = i.flags;
75 }
76
77 sl.emplace_back(i.c_str());
78 }
79
80 return sl;
81 }
82
83 static const char *
84 removeUnnecessaryWildcards(char * t)
85 {
86 char * orig = t;
87
88 if (strncmp(t, "^.*", 3) == 0)
89 t += 3;
90
91 /* NOTE: an initial '.' might seem unnessary but is not;
92 * it can be a valid requirement that cannot be optimised
93 */
94 while (*t == '.' && *(t+1) == '*') {
95 t += 2;
96 }
97
98 if (*t == '\0') {
99 debugs(28, DBG_IMPORTANT, cfg_filename << " line " << config_lineno << ": " << config_input_line);
100 debugs(28, DBG_IMPORTANT, "WARNING: regular expression '" << orig << "' has only wildcards and matches all strings. Using '.*' instead.");
101 return ".*";
102 }
103 if (t != orig) {
104 debugs(28, DBG_IMPORTANT, cfg_filename << " line " << config_lineno << ": " << config_input_line);
105 debugs(28, DBG_IMPORTANT, "WARNING: regular expression '" << orig << "' has unnecessary wildcard(s). Using '" << t << "' instead.");
106 }
107
108 return t;
109 }
110
111 static bool
112 compileRE(std::list<RegexPattern> &curlist, const char * RE, int flags)
113 {
114 if (RE == NULL || *RE == '\0')
115 return curlist.empty(); // XXX: old code did this. It looks wrong.
116
117 regex_t comp;
118 if (int errcode = regcomp(&comp, RE, flags)) {
119 char errbuf[256];
120 regerror(errcode, &comp, errbuf, sizeof errbuf);
121 debugs(28, DBG_CRITICAL, cfg_filename << " line " << config_lineno << ": " << config_input_line);
122 debugs(28, DBG_CRITICAL, "ERROR: invalid regular expression: '" << RE << "': " << errbuf);
123 return false;
124 }
125 debugs(28, 2, "compiled '" << RE << "' with flags " << flags);
126
127 curlist.emplace_back(flags, RE);
128 curlist.back().regex = comp;
129
130 return true;
131 }
132
133 static bool
134 compileRE(std::list<RegexPattern> &curlist, const SBufList &RE, int flags)
135 {
136 if (RE.empty())
137 return curlist.empty(); // XXX: old code did this. It looks wrong.
138 SBuf regexp;
139 static const SBuf openparen("("), closeparen(")"), separator(")|(");
140 JoinContainerIntoSBuf(regexp, RE.begin(), RE.end(), separator, openparen,
141 closeparen);
142 return compileRE(curlist, regexp.c_str(), flags);
143 }
144
145 /** Compose and compile one large RE from a set of (small) REs.
146 * The ultimate goal is to have only one RE per ACL so that match() is
147 * called only once per ACL.
148 */
149 static int
150 compileOptimisedREs(std::list<RegexPattern> &curlist, const SBufList &sl)
151 {
152 std::list<RegexPattern> newlist;
153 SBufList accumulatedRE;
154 int numREs = 0, reSize = 0;
155 int flags = REG_EXTENDED | REG_NOSUB;
156
157 for (const SBuf & configurationLineWord : sl) {
158 static const SBuf minus_i("-i");
159 static const SBuf plus_i("+i");
160 if (configurationLineWord == minus_i) {
161 if (flags & REG_ICASE) {
162 /* optimisation of -i ... -i */
163 debugs(28, 2, "optimisation of -i ... -i" );
164 } else {
165 debugs(28, 2, "-i" );
166 if (!compileRE(newlist, accumulatedRE, flags))
167 return 0;
168 flags |= REG_ICASE;
169 accumulatedRE.clear();
170 reSize = 0;
171 }
172 continue;
173 } else if (configurationLineWord == plus_i) {
174 if ((flags & REG_ICASE) == 0) {
175 /* optimisation of +i ... +i */
176 debugs(28, 2, "optimisation of +i ... +i");
177 } else {
178 debugs(28, 2, "+i");
179 if (!compileRE(newlist, accumulatedRE, flags))
180 return 0;
181 flags &= ~REG_ICASE;
182 accumulatedRE.clear();
183 reSize = 0;
184 }
185 continue;
186 }
187
188 debugs(28, 2, "adding RE '" << configurationLineWord << "'");
189 accumulatedRE.push_back(configurationLineWord);
190 ++numREs;
191 reSize += configurationLineWord.length();
192
193 if (reSize > 1024) { // must be < BUFSIZ everything included
194 debugs(28, 2, "buffer full, generating new optimised RE..." );
195 if (!compileRE(newlist, accumulatedRE, flags))
196 return 0;
197 accumulatedRE.clear();
198 reSize = 0;
199 continue; /* do the loop again to add the RE to largeRE */
200 }
201 }
202
203 if (!compileRE(newlist, accumulatedRE, flags))
204 return 0;
205
206 accumulatedRE.clear();
207 reSize = 0;
208
209 /* all was successful, so put the new list at the tail */
210 curlist.splice(curlist.end(), newlist);
211
212 debugs(28, 2, numREs << " REs are optimised into one RE.");
213 if (numREs > 100) {
214 debugs(28, (opt_parse_cfg_only?DBG_IMPORTANT:2), cfg_filename << " line " << config_lineno << ": " << config_input_line);
215 debugs(28, (opt_parse_cfg_only?DBG_IMPORTANT:2), "WARNING: there are more than 100 regular expressions. " <<
216 "Consider using less REs or use rules without expressions like 'dstdomain'.");
217 }
218
219 return 1;
220 }
221
222 static void
223 compileUnoptimisedREs(std::list<RegexPattern> &curlist, const SBufList &sl)
224 {
225 int flags = REG_EXTENDED | REG_NOSUB;
226
227 static const SBuf minus_i("-i"), plus_i("+i");
228 for (auto configurationLineWord : sl) {
229 if (configurationLineWord == minus_i) {
230 flags |= REG_ICASE;
231 } else if (configurationLineWord == plus_i) {
232 flags &= ~REG_ICASE;
233 } else {
234 if (!compileRE(curlist, configurationLineWord.c_str(), flags))
235 debugs(28, DBG_CRITICAL, "ERROR: Skipping regular expression. "
236 "Compile failed: '" << configurationLineWord << "'");
237 }
238 }
239 }
240
241 void
242 ACLRegexData::parse()
243 {
244 debugs(28, 2, "new Regex line or file");
245
246 SBufList sl;
247 while (char *t = ConfigParser::RegexStrtokFile()) {
248 const char *clean = removeUnnecessaryWildcards(t);
249 if (strlen(clean) > BUFSIZ-1) {
250 debugs(28, DBG_CRITICAL, cfg_filename << " line " << config_lineno << ": " << config_input_line);
251 debugs(28, DBG_CRITICAL, "ERROR: Skipping regular expression. Larger than " << BUFSIZ-1 << " characters: '" << clean << "'");
252 } else {
253 debugs(28, 3, "buffering RE '" << clean << "'");
254 sl.emplace_back(clean);
255 }
256 }
257
258 if (!compileOptimisedREs(data, sl)) {
259 debugs(28, DBG_IMPORTANT, "WARNING: optimisation of regular expressions failed; using fallback method without optimisation");
260 compileUnoptimisedREs(data, sl);
261 }
262 }
263
264 bool
265 ACLRegexData::empty() const
266 {
267 return data.empty();
268 }
269
270 ACLData<char const *> *
271 ACLRegexData::clone() const
272 {
273 /* Regex's don't clone yet. */
274 assert(data.empty());
275 return new ACLRegexData;
276 }
277