]> git.ipfire.org Git - thirdparty/squid.git/blame - src/acl/RegexData.cc
Source Format Enforcement (#763)
[thirdparty/squid.git] / src / acl / RegexData.cc
CommitLineData
225b7b10 1/*
f70aedc4 2 * Copyright (C) 1996-2021 The Squid Software Foundation and contributors
225b7b10 3 *
bbc27441
AJ
4 * Squid software is distributed under GPLv2+ license and includes
5 * contributions from numerous individuals and organizations.
6 * Please see the COPYING and CONTRIBUTORS files for details.
7 */
8
9/*
10 * Portions of this code are copyrighted and released under GPLv2+ by:
6564daea 11 * Copyright (c) 2011, Marcus Kool
bbc27441 12 * Please add new claims to the CONTRIBUTORS file instead.
225b7b10 13 */
14
bbc27441
AJ
15/* DEBUG: section 28 Access Control */
16
582c2af2 17#include "squid.h"
3ad63615 18#include "acl/Acl.h"
602d9612
A
19#include "acl/Checklist.h"
20#include "acl/RegexData.h"
8fcefb30 21#include "base/RegexPattern.h"
d295d770 22#include "ConfigParser.h"
582c2af2 23#include "Debug.h"
d80c8446 24#include "sbuf/Algorithms.h"
85fd48c8 25#include "sbuf/List.h"
225b7b10 26
225b7b10 27ACLRegexData::~ACLRegexData()
28{
62e76326 29}
225b7b10 30
4eac3407
CT
31const Acl::ParameterFlags &
32ACLRegexData::supportedFlags() const
33{
34 static const Acl::ParameterFlags flags = { "-i", "+i" };
35 return flags;
36}
37
225b7b10 38bool
48071869 39ACLRegexData::match(char const *word)
225b7b10 40{
e2b74520 41 if (!word)
48071869 42 return 0;
43
e2b74520 44 debugs(28, 3, "checking '" << word << "'");
48071869 45
e2b74520
AJ
46 // walk the list of patterns to see if one matches
47 for (auto &i : data) {
95b8eae2 48 if (i.match(word)) {
e56933d3 49 debugs(28, 2, '\'' << i.c_str() << "' found in '" << word << '\'');
e2b74520
AJ
50 // TODO: old code also popped the pattern to second place of the list
51 // in order to reduce patterns search times.
48071869 52 return 1;
53 }
48071869 54 }
55
56 return 0;
225b7b10 57}
58
8966008b 59SBufList
4f8ca96e 60ACLRegexData::dump() const
225b7b10 61{
8966008b 62 SBufList sl;
ae315d9c 63 int flags = REG_EXTENDED | REG_NOSUB;
48071869 64
e2b74520
AJ
65 // walk and dump the list
66 // keeping the flags values consistent
67 for (auto &i : data) {
68 if (i.flags != flags) {
69 if ((i.flags&REG_ICASE) != 0) {
e56933d3 70 sl.emplace_back("-i");
ae315d9c 71 } else {
e56933d3 72 sl.emplace_back("+i");
ae315d9c 73 }
e2b74520 74 flags = i.flags;
ae315d9c
AJ
75 }
76
e56933d3 77 sl.emplace_back(i.c_str());
48071869 78 }
79
8966008b 80 return sl;
48071869 81}
82
e022e8c6 83static const char *
6564daea
MK
84removeUnnecessaryWildcards(char * t)
85{
86 char * orig = t;
87
88 if (strncmp(t, "^.*", 3) == 0)
89 t += 3;
90
91 /* NOTE: an initial '.' might seem unnessary but is not;
92 * it can be a valid requirement that cannot be optimised
93 */
94 while (*t == '.' && *(t+1) == '*') {
95 t += 2;
96 }
97
98 if (*t == '\0') {
e56933d3 99 debugs(28, DBG_IMPORTANT, cfg_filename << " line " << config_lineno << ": " << config_input_line);
6564daea
MK
100 debugs(28, DBG_IMPORTANT, "WARNING: regular expression '" << orig << "' has only wildcards and matches all strings. Using '.*' instead.");
101 return ".*";
102 }
103 if (t != orig) {
e56933d3 104 debugs(28, DBG_IMPORTANT, cfg_filename << " line " << config_lineno << ": " << config_input_line);
6564daea
MK
105 debugs(28, DBG_IMPORTANT, "WARNING: regular expression '" << orig << "' has unnecessary wildcard(s). Using '" << t << "' instead.");
106 }
107
108 return t;
109}
110
e2b74520 111static bool
c98b6afe 112compileRE(std::list<RegexPattern> &curlist, const char * RE, int flags)
48071869 113{
e2b74520
AJ
114 if (RE == NULL || *RE == '\0')
115 return curlist.empty(); // XXX: old code did this. It looks wrong.
6564daea 116
e2b74520
AJ
117 regex_t comp;
118 if (int errcode = regcomp(&comp, RE, flags)) {
6564daea
MK
119 char errbuf[256];
120 regerror(errcode, &comp, errbuf, sizeof errbuf);
e56933d3 121 debugs(28, DBG_CRITICAL, cfg_filename << " line " << config_lineno << ": " << config_input_line);
6564daea 122 debugs(28, DBG_CRITICAL, "ERROR: invalid regular expression: '" << RE << "': " << errbuf);
e2b74520 123 return false;
6564daea 124 }
e2b74520 125 debugs(28, 2, "compiled '" << RE << "' with flags " << flags);
6564daea 126
e2b74520
AJ
127 curlist.emplace_back(flags, RE);
128 curlist.back().regex = comp;
6564daea 129
e2b74520 130 return true;
6564daea
MK
131}
132
d80c8446
FC
133static bool
134compileRE(std::list<RegexPattern> &curlist, const SBufList &RE, int flags)
135{
85fd48c8
SM
136 if (RE.empty())
137 return curlist.empty(); // XXX: old code did this. It looks wrong.
138 SBuf regexp;
139 static const SBuf openparen("("), closeparen(")"), separator(")|(");
140 JoinContainerIntoSBuf(regexp, RE.begin(), RE.end(), separator, openparen,
141 closeparen);
142 return compileRE(curlist, regexp.c_str(), flags);
d80c8446
FC
143}
144
6564daea 145/** Compose and compile one large RE from a set of (small) REs.
95b8eae2 146 * The ultimate goal is to have only one RE per ACL so that match() is
6564daea
MK
147 * called only once per ACL.
148 */
149static int
c98b6afe 150compileOptimisedREs(std::list<RegexPattern> &curlist, const SBufList &sl)
6564daea 151{
e2b74520 152 std::list<RegexPattern> newlist;
d80c8446
FC
153 SBufList accumulatedRE;
154 int numREs = 0, reSize = 0;
48071869 155 int flags = REG_EXTENDED | REG_NOSUB;
d740b987 156
1284c24c 157 for (const SBuf & configurationLineWord : sl) {
c98b6afe
FC
158 static const SBuf minus_i("-i");
159 static const SBuf plus_i("+i");
1284c24c 160 if (configurationLineWord == minus_i) {
6564daea
MK
161 if (flags & REG_ICASE) {
162 /* optimisation of -i ... -i */
e56933d3 163 debugs(28, 2, "optimisation of -i ... -i" );
6564daea 164 } else {
e56933d3 165 debugs(28, 2, "-i" );
d80c8446 166 if (!compileRE(newlist, accumulatedRE, flags))
6564daea 167 return 0;
6564daea 168 flags |= REG_ICASE;
d80c8446
FC
169 accumulatedRE.clear();
170 reSize = 0;
6564daea 171 }
38d17033 172 continue;
1284c24c 173 } else if (configurationLineWord == plus_i) {
6564daea
MK
174 if ((flags & REG_ICASE) == 0) {
175 /* optimisation of +i ... +i */
e56933d3 176 debugs(28, 2, "optimisation of +i ... +i");
6564daea 177 } else {
e56933d3 178 debugs(28, 2, "+i");
d80c8446 179 if (!compileRE(newlist, accumulatedRE, flags))
6564daea 180 return 0;
6564daea 181 flags &= ~REG_ICASE;
d80c8446
FC
182 accumulatedRE.clear();
183 reSize = 0;
6564daea 184 }
38d17033
FC
185 continue;
186 }
187
188 debugs(28, 2, "adding RE '" << configurationLineWord << "'");
189 accumulatedRE.push_back(configurationLineWord);
190 ++numREs;
191 reSize += configurationLineWord.length();
192
193 if (reSize > 1024) { // must be < BUFSIZ everything included
e56933d3 194 debugs(28, 2, "buffer full, generating new optimised RE..." );
d80c8446 195 if (!compileRE(newlist, accumulatedRE, flags))
6564daea 196 return 0;
d80c8446
FC
197 accumulatedRE.clear();
198 reSize = 0;
6564daea 199 continue; /* do the loop again to add the RE to largeRE */
48071869 200 }
6564daea
MK
201 }
202
d80c8446 203 if (!compileRE(newlist, accumulatedRE, flags))
6564daea 204 return 0;
6564daea 205
d80c8446
FC
206 accumulatedRE.clear();
207 reSize = 0;
208
6564daea 209 /* all was successful, so put the new list at the tail */
e2b74520 210 curlist.splice(curlist.end(), newlist);
6564daea 211
e56933d3 212 debugs(28, 2, numREs << " REs are optimised into one RE.");
6564daea 213 if (numREs > 100) {
e56933d3 214 debugs(28, (opt_parse_cfg_only?DBG_IMPORTANT:2), cfg_filename << " line " << config_lineno << ": " << config_input_line);
6564daea
MK
215 debugs(28, (opt_parse_cfg_only?DBG_IMPORTANT:2), "WARNING: there are more than 100 regular expressions. " <<
216 "Consider using less REs or use rules without expressions like 'dstdomain'.");
217 }
218
219 return 1;
220}
48071869 221
6564daea 222static void
c98b6afe 223compileUnoptimisedREs(std::list<RegexPattern> &curlist, const SBufList &sl)
6564daea 224{
6564daea
MK
225 int flags = REG_EXTENDED | REG_NOSUB;
226
c98b6afe 227 static const SBuf minus_i("-i"), plus_i("+i");
1284c24c
FC
228 for (auto configurationLineWord : sl) {
229 if (configurationLineWord == minus_i) {
6564daea 230 flags |= REG_ICASE;
1284c24c 231 } else if (configurationLineWord == plus_i) {
48071869 232 flags &= ~REG_ICASE;
6564daea 233 } else {
9e167fa2 234 if (!compileRE(curlist, configurationLineWord.c_str(), flags))
1284c24c 235 debugs(28, DBG_CRITICAL, "ERROR: Skipping regular expression. "
236b4c67 236 "Compile failed: '" << configurationLineWord << "'");
48071869 237 }
6564daea
MK
238 }
239}
240
e2b74520
AJ
241void
242ACLRegexData::parse()
6564daea 243{
e2b74520 244 debugs(28, 2, "new Regex line or file");
6564daea 245
c98b6afe 246 SBufList sl;
e2b74520 247 while (char *t = ConfigParser::RegexStrtokFile()) {
e022e8c6
AJ
248 const char *clean = removeUnnecessaryWildcards(t);
249 if (strlen(clean) > BUFSIZ-1) {
e56933d3 250 debugs(28, DBG_CRITICAL, cfg_filename << " line " << config_lineno << ": " << config_input_line);
e022e8c6 251 debugs(28, DBG_CRITICAL, "ERROR: Skipping regular expression. Larger than " << BUFSIZ-1 << " characters: '" << clean << "'");
6564daea 252 } else {
e2b74520 253 debugs(28, 3, "buffering RE '" << clean << "'");
e56933d3 254 sl.emplace_back(clean);
48071869 255 }
6564daea 256 }
48071869 257
c98b6afe 258 if (!compileOptimisedREs(data, sl)) {
6564daea 259 debugs(28, DBG_IMPORTANT, "WARNING: optimisation of regular expressions failed; using fallback method without optimisation");
c98b6afe 260 compileUnoptimisedREs(data, sl);
48071869 261 }
225b7b10 262}
263
65092baf 264bool
265ACLRegexData::empty() const
266{
e2b74520 267 return data.empty();
65092baf 268}
225b7b10 269
5dee515e 270ACLData<char const *> *
225b7b10 271ACLRegexData::clone() const
272{
273 /* Regex's don't clone yet. */
e2b74520 274 assert(data.empty());
225b7b10 275 return new ACLRegexData;
276}
f53969cc 277