]> git.ipfire.org Git - thirdparty/squid.git/blame - src/acl/RegexData.cc
Merge SBuf-based RegexData
[thirdparty/squid.git] / src / acl / RegexData.cc
CommitLineData
225b7b10 1/*
ef57eb7b 2 * Copyright (C) 1996-2016 The Squid Software Foundation and contributors
225b7b10 3 *
bbc27441
AJ
4 * Squid software is distributed under GPLv2+ license and includes
5 * contributions from numerous individuals and organizations.
6 * Please see the COPYING and CONTRIBUTORS files for details.
7 */
8
9/*
10 * Portions of this code are copyrighted and released under GPLv2+ by:
6564daea 11 * Copyright (c) 2011, Marcus Kool
bbc27441 12 * Please add new claims to the CONTRIBUTORS file instead.
225b7b10 13 */
14
bbc27441
AJ
15/* DEBUG: section 28 Access Control */
16
582c2af2 17#include "squid.h"
3ad63615 18#include "acl/Acl.h"
602d9612
A
19#include "acl/Checklist.h"
20#include "acl/RegexData.h"
8fcefb30 21#include "base/RegexPattern.h"
d295d770 22#include "ConfigParser.h"
582c2af2 23#include "Debug.h"
c98b6afe 24#include "sbuf/List.h"
d80c8446 25#include "sbuf/Algorithms.h"
225b7b10 26
225b7b10 27ACLRegexData::~ACLRegexData()
28{
62e76326 29}
225b7b10 30
31bool
48071869 32ACLRegexData::match(char const *word)
225b7b10 33{
e2b74520 34 if (!word)
48071869 35 return 0;
36
e2b74520 37 debugs(28, 3, "checking '" << word << "'");
48071869 38
e2b74520
AJ
39 // walk the list of patterns to see if one matches
40 for (auto &i : data) {
95b8eae2 41 if (i.match(word)) {
e56933d3 42 debugs(28, 2, '\'' << i.c_str() << "' found in '" << word << '\'');
e2b74520
AJ
43 // TODO: old code also popped the pattern to second place of the list
44 // in order to reduce patterns search times.
48071869 45 return 1;
46 }
48071869 47 }
48
49 return 0;
225b7b10 50}
51
8966008b 52SBufList
4f8ca96e 53ACLRegexData::dump() const
225b7b10 54{
8966008b 55 SBufList sl;
ae315d9c 56 int flags = REG_EXTENDED | REG_NOSUB;
48071869 57
e2b74520
AJ
58 // walk and dump the list
59 // keeping the flags values consistent
60 for (auto &i : data) {
61 if (i.flags != flags) {
62 if ((i.flags&REG_ICASE) != 0) {
e56933d3 63 sl.emplace_back("-i");
ae315d9c 64 } else {
e56933d3 65 sl.emplace_back("+i");
ae315d9c 66 }
e2b74520 67 flags = i.flags;
ae315d9c
AJ
68 }
69
e56933d3 70 sl.emplace_back(i.c_str());
48071869 71 }
72
8966008b 73 return sl;
48071869 74}
75
e022e8c6 76static const char *
6564daea
MK
77removeUnnecessaryWildcards(char * t)
78{
79 char * orig = t;
80
81 if (strncmp(t, "^.*", 3) == 0)
82 t += 3;
83
84 /* NOTE: an initial '.' might seem unnessary but is not;
85 * it can be a valid requirement that cannot be optimised
86 */
87 while (*t == '.' && *(t+1) == '*') {
88 t += 2;
89 }
90
91 if (*t == '\0') {
e56933d3 92 debugs(28, DBG_IMPORTANT, cfg_filename << " line " << config_lineno << ": " << config_input_line);
6564daea
MK
93 debugs(28, DBG_IMPORTANT, "WARNING: regular expression '" << orig << "' has only wildcards and matches all strings. Using '.*' instead.");
94 return ".*";
95 }
96 if (t != orig) {
e56933d3 97 debugs(28, DBG_IMPORTANT, cfg_filename << " line " << config_lineno << ": " << config_input_line);
6564daea
MK
98 debugs(28, DBG_IMPORTANT, "WARNING: regular expression '" << orig << "' has unnecessary wildcard(s). Using '" << t << "' instead.");
99 }
100
101 return t;
102}
103
e2b74520 104static bool
c98b6afe 105compileRE(std::list<RegexPattern> &curlist, const char * RE, int flags)
48071869 106{
e2b74520
AJ
107 if (RE == NULL || *RE == '\0')
108 return curlist.empty(); // XXX: old code did this. It looks wrong.
6564daea 109
e2b74520
AJ
110 regex_t comp;
111 if (int errcode = regcomp(&comp, RE, flags)) {
6564daea
MK
112 char errbuf[256];
113 regerror(errcode, &comp, errbuf, sizeof errbuf);
e56933d3 114 debugs(28, DBG_CRITICAL, cfg_filename << " line " << config_lineno << ": " << config_input_line);
6564daea 115 debugs(28, DBG_CRITICAL, "ERROR: invalid regular expression: '" << RE << "': " << errbuf);
e2b74520 116 return false;
6564daea 117 }
e2b74520 118 debugs(28, 2, "compiled '" << RE << "' with flags " << flags);
6564daea 119
e2b74520
AJ
120 curlist.emplace_back(flags, RE);
121 curlist.back().regex = comp;
6564daea 122
e2b74520 123 return true;
6564daea
MK
124}
125
d80c8446
FC
126static bool
127compileRE(std::list<RegexPattern> &curlist, const SBufList &RE, int flags)
128{
129 if (RE.empty())
130 return curlist.empty(); // XXX: old code did this. It looks wrong.
131 SBuf regexp;
132 static const SBuf openparen("("), closeparen(")"), separator(")|(");
133 JoinContainerIntoSBuf(regexp, RE.begin(), RE.end(), separator, openparen,
134 closeparen);
135 return compileRE(curlist, regexp.c_str(), flags);
136}
137
6564daea 138/** Compose and compile one large RE from a set of (small) REs.
95b8eae2 139 * The ultimate goal is to have only one RE per ACL so that match() is
6564daea
MK
140 * called only once per ACL.
141 */
142static int
c98b6afe 143compileOptimisedREs(std::list<RegexPattern> &curlist, const SBufList &sl)
6564daea 144{
e2b74520 145 std::list<RegexPattern> newlist;
d80c8446
FC
146 SBufList accumulatedRE;
147 int numREs = 0, reSize = 0;
48071869 148 int flags = REG_EXTENDED | REG_NOSUB;
d740b987 149
1284c24c 150 for (const SBuf & configurationLineWord : sl) {
d80c8446 151 const int RElen = configurationLineWord.length();
d740b987 152
c98b6afe
FC
153 static const SBuf minus_i("-i");
154 static const SBuf plus_i("+i");
1284c24c 155 if (configurationLineWord == minus_i) {
6564daea
MK
156 if (flags & REG_ICASE) {
157 /* optimisation of -i ... -i */
e56933d3 158 debugs(28, 2, "optimisation of -i ... -i" );
6564daea 159 } else {
e56933d3 160 debugs(28, 2, "-i" );
d80c8446 161 if (!compileRE(newlist, accumulatedRE, flags))
6564daea 162 return 0;
6564daea 163 flags |= REG_ICASE;
d80c8446
FC
164 accumulatedRE.clear();
165 reSize = 0;
6564daea 166 }
38d17033 167 continue;
1284c24c 168 } else if (configurationLineWord == plus_i) {
6564daea
MK
169 if ((flags & REG_ICASE) == 0) {
170 /* optimisation of +i ... +i */
e56933d3 171 debugs(28, 2, "optimisation of +i ... +i");
6564daea 172 } else {
e56933d3 173 debugs(28, 2, "+i");
d80c8446 174 if (!compileRE(newlist, accumulatedRE, flags))
6564daea 175 return 0;
6564daea 176 flags &= ~REG_ICASE;
d80c8446
FC
177 accumulatedRE.clear();
178 reSize = 0;
6564daea 179 }
38d17033
FC
180 continue;
181 }
182
183 debugs(28, 2, "adding RE '" << configurationLineWord << "'");
184 accumulatedRE.push_back(configurationLineWord);
185 ++numREs;
186 reSize += configurationLineWord.length();
187
188 if (reSize > 1024) { // must be < BUFSIZ everything included
e56933d3 189 debugs(28, 2, "buffer full, generating new optimised RE..." );
d80c8446 190 if (!compileRE(newlist, accumulatedRE, flags))
6564daea 191 return 0;
d80c8446
FC
192 accumulatedRE.clear();
193 reSize = 0;
6564daea 194 continue; /* do the loop again to add the RE to largeRE */
48071869 195 }
6564daea
MK
196 }
197
d80c8446 198 if (!compileRE(newlist, accumulatedRE, flags))
6564daea 199 return 0;
6564daea 200
d80c8446
FC
201 accumulatedRE.clear();
202 reSize = 0;
203
6564daea 204 /* all was successful, so put the new list at the tail */
e2b74520 205 curlist.splice(curlist.end(), newlist);
6564daea 206
e56933d3 207 debugs(28, 2, numREs << " REs are optimised into one RE.");
6564daea 208 if (numREs > 100) {
e56933d3 209 debugs(28, (opt_parse_cfg_only?DBG_IMPORTANT:2), cfg_filename << " line " << config_lineno << ": " << config_input_line);
6564daea
MK
210 debugs(28, (opt_parse_cfg_only?DBG_IMPORTANT:2), "WARNING: there are more than 100 regular expressions. " <<
211 "Consider using less REs or use rules without expressions like 'dstdomain'.");
212 }
213
214 return 1;
215}
48071869 216
6564daea 217static void
c98b6afe 218compileUnoptimisedREs(std::list<RegexPattern> &curlist, const SBufList &sl)
6564daea 219{
6564daea
MK
220 int flags = REG_EXTENDED | REG_NOSUB;
221
c98b6afe 222 static const SBuf minus_i("-i"), plus_i("+i");
1284c24c
FC
223 for (auto configurationLineWord : sl) {
224 if (configurationLineWord == minus_i) {
6564daea 225 flags |= REG_ICASE;
1284c24c 226 } else if (configurationLineWord == plus_i) {
48071869 227 flags &= ~REG_ICASE;
6564daea 228 } else {
1284c24c
FC
229 if (!compileRE(curlist, configurationLineWord.c_str() , flags))
230 debugs(28, DBG_CRITICAL, "ERROR: Skipping regular expression. "
236b4c67 231 "Compile failed: '" << configurationLineWord << "'");
48071869 232 }
6564daea
MK
233 }
234}
235
e2b74520
AJ
236void
237ACLRegexData::parse()
6564daea 238{
e2b74520 239 debugs(28, 2, "new Regex line or file");
6564daea 240
c98b6afe 241 SBufList sl;
e2b74520 242 while (char *t = ConfigParser::RegexStrtokFile()) {
e022e8c6
AJ
243 const char *clean = removeUnnecessaryWildcards(t);
244 if (strlen(clean) > BUFSIZ-1) {
e56933d3 245 debugs(28, DBG_CRITICAL, cfg_filename << " line " << config_lineno << ": " << config_input_line);
e022e8c6 246 debugs(28, DBG_CRITICAL, "ERROR: Skipping regular expression. Larger than " << BUFSIZ-1 << " characters: '" << clean << "'");
6564daea 247 } else {
e2b74520 248 debugs(28, 3, "buffering RE '" << clean << "'");
e56933d3 249 sl.emplace_back(clean);
48071869 250 }
6564daea 251 }
48071869 252
c98b6afe 253 if (!compileOptimisedREs(data, sl)) {
6564daea 254 debugs(28, DBG_IMPORTANT, "WARNING: optimisation of regular expressions failed; using fallback method without optimisation");
c98b6afe 255 compileUnoptimisedREs(data, sl);
48071869 256 }
225b7b10 257}
258
65092baf 259bool
260ACLRegexData::empty() const
261{
e2b74520 262 return data.empty();
65092baf 263}
225b7b10 264
5dee515e 265ACLData<char const *> *
225b7b10 266ACLRegexData::clone() const
267{
268 /* Regex's don't clone yet. */
e2b74520 269 assert(data.empty());
225b7b10 270 return new ACLRegexData;
271}
f53969cc 272