]> git.ipfire.org Git - thirdparty/squid.git/blame - src/acl/RegexData.cc
SourceFormat Enforcement
[thirdparty/squid.git] / src / acl / RegexData.cc
CommitLineData
225b7b10 1/*
4ac4a490 2 * Copyright (C) 1996-2017 The Squid Software Foundation and contributors
225b7b10 3 *
bbc27441
AJ
4 * Squid software is distributed under GPLv2+ license and includes
5 * contributions from numerous individuals and organizations.
6 * Please see the COPYING and CONTRIBUTORS files for details.
7 */
8
9/*
10 * Portions of this code are copyrighted and released under GPLv2+ by:
6564daea 11 * Copyright (c) 2011, Marcus Kool
bbc27441 12 * Please add new claims to the CONTRIBUTORS file instead.
225b7b10 13 */
14
bbc27441
AJ
15/* DEBUG: section 28 Access Control */
16
582c2af2 17#include "squid.h"
3ad63615 18#include "acl/Acl.h"
602d9612
A
19#include "acl/Checklist.h"
20#include "acl/RegexData.h"
8fcefb30 21#include "base/RegexPattern.h"
d295d770 22#include "ConfigParser.h"
582c2af2 23#include "Debug.h"
d80c8446 24#include "sbuf/Algorithms.h"
85fd48c8 25#include "sbuf/List.h"
225b7b10 26
225b7b10 27ACLRegexData::~ACLRegexData()
28{
62e76326 29}
225b7b10 30
31bool
48071869 32ACLRegexData::match(char const *word)
225b7b10 33{
e2b74520 34 if (!word)
48071869 35 return 0;
36
e2b74520 37 debugs(28, 3, "checking '" << word << "'");
48071869 38
e2b74520
AJ
39 // walk the list of patterns to see if one matches
40 for (auto &i : data) {
95b8eae2 41 if (i.match(word)) {
e56933d3 42 debugs(28, 2, '\'' << i.c_str() << "' found in '" << word << '\'');
e2b74520
AJ
43 // TODO: old code also popped the pattern to second place of the list
44 // in order to reduce patterns search times.
48071869 45 return 1;
46 }
48071869 47 }
48
49 return 0;
225b7b10 50}
51
8966008b 52SBufList
4f8ca96e 53ACLRegexData::dump() const
225b7b10 54{
8966008b 55 SBufList sl;
ae315d9c 56 int flags = REG_EXTENDED | REG_NOSUB;
48071869 57
e2b74520
AJ
58 // walk and dump the list
59 // keeping the flags values consistent
60 for (auto &i : data) {
61 if (i.flags != flags) {
62 if ((i.flags&REG_ICASE) != 0) {
e56933d3 63 sl.emplace_back("-i");
ae315d9c 64 } else {
e56933d3 65 sl.emplace_back("+i");
ae315d9c 66 }
e2b74520 67 flags = i.flags;
ae315d9c
AJ
68 }
69
e56933d3 70 sl.emplace_back(i.c_str());
48071869 71 }
72
8966008b 73 return sl;
48071869 74}
75
e022e8c6 76static const char *
6564daea
MK
77removeUnnecessaryWildcards(char * t)
78{
79 char * orig = t;
80
81 if (strncmp(t, "^.*", 3) == 0)
82 t += 3;
83
84 /* NOTE: an initial '.' might seem unnessary but is not;
85 * it can be a valid requirement that cannot be optimised
86 */
87 while (*t == '.' && *(t+1) == '*') {
88 t += 2;
89 }
90
91 if (*t == '\0') {
e56933d3 92 debugs(28, DBG_IMPORTANT, cfg_filename << " line " << config_lineno << ": " << config_input_line);
6564daea
MK
93 debugs(28, DBG_IMPORTANT, "WARNING: regular expression '" << orig << "' has only wildcards and matches all strings. Using '.*' instead.");
94 return ".*";
95 }
96 if (t != orig) {
e56933d3 97 debugs(28, DBG_IMPORTANT, cfg_filename << " line " << config_lineno << ": " << config_input_line);
6564daea
MK
98 debugs(28, DBG_IMPORTANT, "WARNING: regular expression '" << orig << "' has unnecessary wildcard(s). Using '" << t << "' instead.");
99 }
100
101 return t;
102}
103
e2b74520 104static bool
c98b6afe 105compileRE(std::list<RegexPattern> &curlist, const char * RE, int flags)
48071869 106{
e2b74520
AJ
107 if (RE == NULL || *RE == '\0')
108 return curlist.empty(); // XXX: old code did this. It looks wrong.
6564daea 109
e2b74520
AJ
110 regex_t comp;
111 if (int errcode = regcomp(&comp, RE, flags)) {
6564daea
MK
112 char errbuf[256];
113 regerror(errcode, &comp, errbuf, sizeof errbuf);
e56933d3 114 debugs(28, DBG_CRITICAL, cfg_filename << " line " << config_lineno << ": " << config_input_line);
6564daea 115 debugs(28, DBG_CRITICAL, "ERROR: invalid regular expression: '" << RE << "': " << errbuf);
e2b74520 116 return false;
6564daea 117 }
e2b74520 118 debugs(28, 2, "compiled '" << RE << "' with flags " << flags);
6564daea 119
e2b74520
AJ
120 curlist.emplace_back(flags, RE);
121 curlist.back().regex = comp;
6564daea 122
e2b74520 123 return true;
6564daea
MK
124}
125
d80c8446
FC
126static bool
127compileRE(std::list<RegexPattern> &curlist, const SBufList &RE, int flags)
128{
85fd48c8
SM
129 if (RE.empty())
130 return curlist.empty(); // XXX: old code did this. It looks wrong.
131 SBuf regexp;
132 static const SBuf openparen("("), closeparen(")"), separator(")|(");
133 JoinContainerIntoSBuf(regexp, RE.begin(), RE.end(), separator, openparen,
134 closeparen);
135 return compileRE(curlist, regexp.c_str(), flags);
d80c8446
FC
136}
137
6564daea 138/** Compose and compile one large RE from a set of (small) REs.
95b8eae2 139 * The ultimate goal is to have only one RE per ACL so that match() is
6564daea
MK
140 * called only once per ACL.
141 */
142static int
c98b6afe 143compileOptimisedREs(std::list<RegexPattern> &curlist, const SBufList &sl)
6564daea 144{
e2b74520 145 std::list<RegexPattern> newlist;
d80c8446
FC
146 SBufList accumulatedRE;
147 int numREs = 0, reSize = 0;
48071869 148 int flags = REG_EXTENDED | REG_NOSUB;
d740b987 149
1284c24c 150 for (const SBuf & configurationLineWord : sl) {
c98b6afe
FC
151 static const SBuf minus_i("-i");
152 static const SBuf plus_i("+i");
1284c24c 153 if (configurationLineWord == minus_i) {
6564daea
MK
154 if (flags & REG_ICASE) {
155 /* optimisation of -i ... -i */
e56933d3 156 debugs(28, 2, "optimisation of -i ... -i" );
6564daea 157 } else {
e56933d3 158 debugs(28, 2, "-i" );
d80c8446 159 if (!compileRE(newlist, accumulatedRE, flags))
6564daea 160 return 0;
6564daea 161 flags |= REG_ICASE;
d80c8446
FC
162 accumulatedRE.clear();
163 reSize = 0;
6564daea 164 }
38d17033 165 continue;
1284c24c 166 } else if (configurationLineWord == plus_i) {
6564daea
MK
167 if ((flags & REG_ICASE) == 0) {
168 /* optimisation of +i ... +i */
e56933d3 169 debugs(28, 2, "optimisation of +i ... +i");
6564daea 170 } else {
e56933d3 171 debugs(28, 2, "+i");
d80c8446 172 if (!compileRE(newlist, accumulatedRE, flags))
6564daea 173 return 0;
6564daea 174 flags &= ~REG_ICASE;
d80c8446
FC
175 accumulatedRE.clear();
176 reSize = 0;
6564daea 177 }
38d17033
FC
178 continue;
179 }
180
181 debugs(28, 2, "adding RE '" << configurationLineWord << "'");
182 accumulatedRE.push_back(configurationLineWord);
183 ++numREs;
184 reSize += configurationLineWord.length();
185
186 if (reSize > 1024) { // must be < BUFSIZ everything included
e56933d3 187 debugs(28, 2, "buffer full, generating new optimised RE..." );
d80c8446 188 if (!compileRE(newlist, accumulatedRE, flags))
6564daea 189 return 0;
d80c8446
FC
190 accumulatedRE.clear();
191 reSize = 0;
6564daea 192 continue; /* do the loop again to add the RE to largeRE */
48071869 193 }
6564daea
MK
194 }
195
d80c8446 196 if (!compileRE(newlist, accumulatedRE, flags))
6564daea 197 return 0;
6564daea 198
d80c8446
FC
199 accumulatedRE.clear();
200 reSize = 0;
201
6564daea 202 /* all was successful, so put the new list at the tail */
e2b74520 203 curlist.splice(curlist.end(), newlist);
6564daea 204
e56933d3 205 debugs(28, 2, numREs << " REs are optimised into one RE.");
6564daea 206 if (numREs > 100) {
e56933d3 207 debugs(28, (opt_parse_cfg_only?DBG_IMPORTANT:2), cfg_filename << " line " << config_lineno << ": " << config_input_line);
6564daea
MK
208 debugs(28, (opt_parse_cfg_only?DBG_IMPORTANT:2), "WARNING: there are more than 100 regular expressions. " <<
209 "Consider using less REs or use rules without expressions like 'dstdomain'.");
210 }
211
212 return 1;
213}
48071869 214
6564daea 215static void
c98b6afe 216compileUnoptimisedREs(std::list<RegexPattern> &curlist, const SBufList &sl)
6564daea 217{
6564daea
MK
218 int flags = REG_EXTENDED | REG_NOSUB;
219
c98b6afe 220 static const SBuf minus_i("-i"), plus_i("+i");
1284c24c
FC
221 for (auto configurationLineWord : sl) {
222 if (configurationLineWord == minus_i) {
6564daea 223 flags |= REG_ICASE;
1284c24c 224 } else if (configurationLineWord == plus_i) {
48071869 225 flags &= ~REG_ICASE;
6564daea 226 } else {
1284c24c
FC
227 if (!compileRE(curlist, configurationLineWord.c_str() , flags))
228 debugs(28, DBG_CRITICAL, "ERROR: Skipping regular expression. "
236b4c67 229 "Compile failed: '" << configurationLineWord << "'");
48071869 230 }
6564daea
MK
231 }
232}
233
e2b74520
AJ
234void
235ACLRegexData::parse()
6564daea 236{
e2b74520 237 debugs(28, 2, "new Regex line or file");
6564daea 238
c98b6afe 239 SBufList sl;
e2b74520 240 while (char *t = ConfigParser::RegexStrtokFile()) {
e022e8c6
AJ
241 const char *clean = removeUnnecessaryWildcards(t);
242 if (strlen(clean) > BUFSIZ-1) {
e56933d3 243 debugs(28, DBG_CRITICAL, cfg_filename << " line " << config_lineno << ": " << config_input_line);
e022e8c6 244 debugs(28, DBG_CRITICAL, "ERROR: Skipping regular expression. Larger than " << BUFSIZ-1 << " characters: '" << clean << "'");
6564daea 245 } else {
e2b74520 246 debugs(28, 3, "buffering RE '" << clean << "'");
e56933d3 247 sl.emplace_back(clean);
48071869 248 }
6564daea 249 }
48071869 250
c98b6afe 251 if (!compileOptimisedREs(data, sl)) {
6564daea 252 debugs(28, DBG_IMPORTANT, "WARNING: optimisation of regular expressions failed; using fallback method without optimisation");
c98b6afe 253 compileUnoptimisedREs(data, sl);
48071869 254 }
225b7b10 255}
256
65092baf 257bool
258ACLRegexData::empty() const
259{
e2b74520 260 return data.empty();
65092baf 261}
225b7b10 262
5dee515e 263ACLData<char const *> *
225b7b10 264ACLRegexData::clone() const
265{
266 /* Regex's don't clone yet. */
e2b74520 267 assert(data.empty());
225b7b10 268 return new ACLRegexData;
269}
f53969cc 270