]> git.ipfire.org Git - thirdparty/squid.git/blame_incremental - src/acl/RegexData.cc
SourceFormat Enforcement
[thirdparty/squid.git] / src / acl / RegexData.cc
... / ...
CommitLineData
1/*
2 * Copyright (C) 1996-2017 The Squid Software Foundation and contributors
3 *
4 * Squid software is distributed under GPLv2+ license and includes
5 * contributions from numerous individuals and organizations.
6 * Please see the COPYING and CONTRIBUTORS files for details.
7 */
8
9/*
10 * Portions of this code are copyrighted and released under GPLv2+ by:
11 * Copyright (c) 2011, Marcus Kool
12 * Please add new claims to the CONTRIBUTORS file instead.
13 */
14
15/* DEBUG: section 28 Access Control */
16
17#include "squid.h"
18#include "acl/Acl.h"
19#include "acl/Checklist.h"
20#include "acl/RegexData.h"
21#include "base/RegexPattern.h"
22#include "ConfigParser.h"
23#include "Debug.h"
24#include "sbuf/Algorithms.h"
25#include "sbuf/List.h"
26
27ACLRegexData::~ACLRegexData()
28{
29}
30
31bool
32ACLRegexData::match(char const *word)
33{
34 if (!word)
35 return 0;
36
37 debugs(28, 3, "checking '" << word << "'");
38
39 // walk the list of patterns to see if one matches
40 for (auto &i : data) {
41 if (i.match(word)) {
42 debugs(28, 2, '\'' << i.c_str() << "' found in '" << word << '\'');
43 // TODO: old code also popped the pattern to second place of the list
44 // in order to reduce patterns search times.
45 return 1;
46 }
47 }
48
49 return 0;
50}
51
52SBufList
53ACLRegexData::dump() const
54{
55 SBufList sl;
56 int flags = REG_EXTENDED | REG_NOSUB;
57
58 // walk and dump the list
59 // keeping the flags values consistent
60 for (auto &i : data) {
61 if (i.flags != flags) {
62 if ((i.flags&REG_ICASE) != 0) {
63 sl.emplace_back("-i");
64 } else {
65 sl.emplace_back("+i");
66 }
67 flags = i.flags;
68 }
69
70 sl.emplace_back(i.c_str());
71 }
72
73 return sl;
74}
75
76static const char *
77removeUnnecessaryWildcards(char * t)
78{
79 char * orig = t;
80
81 if (strncmp(t, "^.*", 3) == 0)
82 t += 3;
83
84 /* NOTE: an initial '.' might seem unnessary but is not;
85 * it can be a valid requirement that cannot be optimised
86 */
87 while (*t == '.' && *(t+1) == '*') {
88 t += 2;
89 }
90
91 if (*t == '\0') {
92 debugs(28, DBG_IMPORTANT, cfg_filename << " line " << config_lineno << ": " << config_input_line);
93 debugs(28, DBG_IMPORTANT, "WARNING: regular expression '" << orig << "' has only wildcards and matches all strings. Using '.*' instead.");
94 return ".*";
95 }
96 if (t != orig) {
97 debugs(28, DBG_IMPORTANT, cfg_filename << " line " << config_lineno << ": " << config_input_line);
98 debugs(28, DBG_IMPORTANT, "WARNING: regular expression '" << orig << "' has unnecessary wildcard(s). Using '" << t << "' instead.");
99 }
100
101 return t;
102}
103
104static bool
105compileRE(std::list<RegexPattern> &curlist, const char * RE, int flags)
106{
107 if (RE == NULL || *RE == '\0')
108 return curlist.empty(); // XXX: old code did this. It looks wrong.
109
110 regex_t comp;
111 if (int errcode = regcomp(&comp, RE, flags)) {
112 char errbuf[256];
113 regerror(errcode, &comp, errbuf, sizeof errbuf);
114 debugs(28, DBG_CRITICAL, cfg_filename << " line " << config_lineno << ": " << config_input_line);
115 debugs(28, DBG_CRITICAL, "ERROR: invalid regular expression: '" << RE << "': " << errbuf);
116 return false;
117 }
118 debugs(28, 2, "compiled '" << RE << "' with flags " << flags);
119
120 curlist.emplace_back(flags, RE);
121 curlist.back().regex = comp;
122
123 return true;
124}
125
126static bool
127compileRE(std::list<RegexPattern> &curlist, const SBufList &RE, int flags)
128{
129 if (RE.empty())
130 return curlist.empty(); // XXX: old code did this. It looks wrong.
131 SBuf regexp;
132 static const SBuf openparen("("), closeparen(")"), separator(")|(");
133 JoinContainerIntoSBuf(regexp, RE.begin(), RE.end(), separator, openparen,
134 closeparen);
135 return compileRE(curlist, regexp.c_str(), flags);
136}
137
138/** Compose and compile one large RE from a set of (small) REs.
139 * The ultimate goal is to have only one RE per ACL so that match() is
140 * called only once per ACL.
141 */
142static int
143compileOptimisedREs(std::list<RegexPattern> &curlist, const SBufList &sl)
144{
145 std::list<RegexPattern> newlist;
146 SBufList accumulatedRE;
147 int numREs = 0, reSize = 0;
148 int flags = REG_EXTENDED | REG_NOSUB;
149
150 for (const SBuf & configurationLineWord : sl) {
151 static const SBuf minus_i("-i");
152 static const SBuf plus_i("+i");
153 if (configurationLineWord == minus_i) {
154 if (flags & REG_ICASE) {
155 /* optimisation of -i ... -i */
156 debugs(28, 2, "optimisation of -i ... -i" );
157 } else {
158 debugs(28, 2, "-i" );
159 if (!compileRE(newlist, accumulatedRE, flags))
160 return 0;
161 flags |= REG_ICASE;
162 accumulatedRE.clear();
163 reSize = 0;
164 }
165 continue;
166 } else if (configurationLineWord == plus_i) {
167 if ((flags & REG_ICASE) == 0) {
168 /* optimisation of +i ... +i */
169 debugs(28, 2, "optimisation of +i ... +i");
170 } else {
171 debugs(28, 2, "+i");
172 if (!compileRE(newlist, accumulatedRE, flags))
173 return 0;
174 flags &= ~REG_ICASE;
175 accumulatedRE.clear();
176 reSize = 0;
177 }
178 continue;
179 }
180
181 debugs(28, 2, "adding RE '" << configurationLineWord << "'");
182 accumulatedRE.push_back(configurationLineWord);
183 ++numREs;
184 reSize += configurationLineWord.length();
185
186 if (reSize > 1024) { // must be < BUFSIZ everything included
187 debugs(28, 2, "buffer full, generating new optimised RE..." );
188 if (!compileRE(newlist, accumulatedRE, flags))
189 return 0;
190 accumulatedRE.clear();
191 reSize = 0;
192 continue; /* do the loop again to add the RE to largeRE */
193 }
194 }
195
196 if (!compileRE(newlist, accumulatedRE, flags))
197 return 0;
198
199 accumulatedRE.clear();
200 reSize = 0;
201
202 /* all was successful, so put the new list at the tail */
203 curlist.splice(curlist.end(), newlist);
204
205 debugs(28, 2, numREs << " REs are optimised into one RE.");
206 if (numREs > 100) {
207 debugs(28, (opt_parse_cfg_only?DBG_IMPORTANT:2), cfg_filename << " line " << config_lineno << ": " << config_input_line);
208 debugs(28, (opt_parse_cfg_only?DBG_IMPORTANT:2), "WARNING: there are more than 100 regular expressions. " <<
209 "Consider using less REs or use rules without expressions like 'dstdomain'.");
210 }
211
212 return 1;
213}
214
215static void
216compileUnoptimisedREs(std::list<RegexPattern> &curlist, const SBufList &sl)
217{
218 int flags = REG_EXTENDED | REG_NOSUB;
219
220 static const SBuf minus_i("-i"), plus_i("+i");
221 for (auto configurationLineWord : sl) {
222 if (configurationLineWord == minus_i) {
223 flags |= REG_ICASE;
224 } else if (configurationLineWord == plus_i) {
225 flags &= ~REG_ICASE;
226 } else {
227 if (!compileRE(curlist, configurationLineWord.c_str() , flags))
228 debugs(28, DBG_CRITICAL, "ERROR: Skipping regular expression. "
229 "Compile failed: '" << configurationLineWord << "'");
230 }
231 }
232}
233
234void
235ACLRegexData::parse()
236{
237 debugs(28, 2, "new Regex line or file");
238
239 SBufList sl;
240 while (char *t = ConfigParser::RegexStrtokFile()) {
241 const char *clean = removeUnnecessaryWildcards(t);
242 if (strlen(clean) > BUFSIZ-1) {
243 debugs(28, DBG_CRITICAL, cfg_filename << " line " << config_lineno << ": " << config_input_line);
244 debugs(28, DBG_CRITICAL, "ERROR: Skipping regular expression. Larger than " << BUFSIZ-1 << " characters: '" << clean << "'");
245 } else {
246 debugs(28, 3, "buffering RE '" << clean << "'");
247 sl.emplace_back(clean);
248 }
249 }
250
251 if (!compileOptimisedREs(data, sl)) {
252 debugs(28, DBG_IMPORTANT, "WARNING: optimisation of regular expressions failed; using fallback method without optimisation");
253 compileUnoptimisedREs(data, sl);
254 }
255}
256
257bool
258ACLRegexData::empty() const
259{
260 return data.empty();
261}
262
263ACLData<char const *> *
264ACLRegexData::clone() const
265{
266 /* Regex's don't clone yet. */
267 assert(data.empty());
268 return new ACLRegexData;
269}
270