From: Christos Tsantilas Date: Thu, 24 Dec 2015 06:57:04 +0000 (+0200) Subject: Note ACL substrings matching X-Git-Tag: SQUID_4_0_4~20 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=76ee67ac;p=thirdparty%2Fsquid.git Note ACL substrings matching There are several use cases where an annotation may contain a list of values for a single key. Today it is only possible to match the full annotation value. This patch investigates the -m flag which can be used to enable delimiter separated substrings matching on annotations: acl aclname note [-m[=delimiters]] name value ... The '-m' flag by default matches comma separated substrings. The optional "delimiters" parameter is a list of non-alphanumeric characters, which can be used as alternate delimiters. E.g. if an external ACL sets an annotation like: "applications=http,facebook,facebook-chat" the following ACLs can be used to block access to certain applications: acl fb_chat note -m applications facebook-chat acl db_upload note -m applications dropbox-upload http_access deny fb_chat http_access deny db_upload This is a Measurement Factory project --- diff --git a/src/AclRegs.cc b/src/AclRegs.cc index 2b4d53410a..8215b0011b 100644 --- a/src/AclRegs.cc +++ b/src/AclRegs.cc @@ -221,7 +221,7 @@ ACL::Prototype Acl::AllOf::RegistryProtoype(&Acl::AllOf::RegistryEntry_, "all-of Acl::AllOf Acl::AllOf::RegistryEntry_; ACL::Prototype ACLNote::RegistryProtoype(&ACLNote::RegistryEntry_, "note"); -ACLStrategised ACLNote::RegistryEntry_(new ACLNoteData, ACLNoteStrategy::Instance(), "note"); +ACLStrategised ACLNote::RegistryEntry_(new ACLNoteData, ACLNoteStrategy::Instance(), "note"); #if USE_ADAPTATION ACL::Prototype ACLAdaptationService::RegistryProtoype(&ACLAdaptationService::RegistryEntry_, "adaptation_service"); diff --git a/src/acl/Acl.cc b/src/acl/Acl.cc index f271189752..5dd7252f47 100644 --- a/src/acl/Acl.cc +++ b/src/acl/Acl.cc @@ -24,34 +24,128 @@ #include +#define abortFlags(CONTENT) \ + do { \ + debugs(28, 0, CONTENT); \ + self_destruct(); \ + } while (0) + const ACLFlag ACLFlags::NoFlags[1] = {ACL_F_END}; const char *AclMatchedName = NULL; -bool ACLFlags::supported(const ACLFlag f) const +ACLFlags::FlagsTokenizer::FlagsTokenizer(): tokPos(NULL) { } + +ACLFlag +ACLFlags::FlagsTokenizer::nextFlag() +{ + if (needNextToken()) { + if (!nextToken()) + return 0; + } else + ++tokPos; + return *tokPos; +} + +bool +ACLFlags::FlagsTokenizer::hasParameter() const +{ + return tokPos && tokPos[0] && tokPos[1] == '=' && tokPos[2]; +} + +SBuf +ACLFlags::FlagsTokenizer::getParameter() const +{ + return hasParameter() ? SBuf(&tokPos[2]) : SBuf(); +} + +bool +ACLFlags::FlagsTokenizer::needNextToken() const +{ + return !tokPos || !tokPos[0] || !tokPos[1] || tokPos[1] == '='; +} + +bool +ACLFlags::FlagsTokenizer::nextToken() +{ + char *t = ConfigParser::PeekAtToken(); + if (t == NULL || t[0] != '-' || !t[1]) + return false; + (void)ConfigParser::NextQuotedToken(); + if (strcmp(t, "--") == 0) + return false; + tokPos = t + 1; + return true; +} + +ACLFlags::~ACLFlags() +{ + delete delimiters_; +} + +ACLFlags::Status +ACLFlags::flagStatus(const ACLFlag f) const { if (f == ACL_F_REGEX_CASE) - return true; - return (supported_.find(f) != std::string::npos); + return noParameter; + if (f == ACL_F_SUBSTRING) + return parameterOptional; + if (supported_.find(f) != std::string::npos) + return noParameter; + return notSupported; +} + +bool +ACLFlags::parameterSupported(const ACLFlag f, const SBuf &val) const +{ + if (f == ACL_F_SUBSTRING) + return val.findFirstOf(CharacterSet::ALPHA + CharacterSet::DIGIT) == SBuf::npos; + return true; +} + +void +ACLFlags::makeSet(const ACLFlag f, const SBuf ¶m) +{ + flags_ |= flagToInt(f); + if (!param.isEmpty()) + flagParameters_[f].append(param); +} + +void +ACLFlags::makeUnSet(const ACLFlag f) +{ + flags_ &= ~flagToInt(f); + flagParameters_[f].clear(); } void ACLFlags::parseFlags() { - char *nextToken; - while ((nextToken = ConfigParser::PeekAtToken()) != NULL && nextToken[0] == '-') { - (void)ConfigParser::NextToken(); //Get token from cfg line - //if token is the "--" break flag - if (strcmp(nextToken, "--") == 0) - break; - - for (const char *flg = nextToken+1; *flg!='\0'; flg++ ) { - if (supported(*flg)) { - makeSet(*flg); - } else { - debugs(28, 0, HERE << "Flag '" << *flg << "' not supported"); - self_destruct(); - } + FlagsTokenizer tokenizer; + ACLFlag flag('\0'); + while ((flag = tokenizer.nextFlag())) { + switch (flagStatus(flag)) + { + case notSupported: + abortFlags("Flag '" << flag << "' not supported"); + break; + case noParameter: + makeSet(flag); + break; + case parameterRequired: + if (!tokenizer.hasParameter()) { + abortFlags("Flag '" << flag << "' must have a parameter"); + break; + } + case parameterOptional: + SBuf param; + if (tokenizer.hasParameter()) { + param = tokenizer.getParameter(); + if (!parameterSupported(flag, param)) + abortFlags("Parameter '" << param << "' for flag '" << flag << "' not supported"); + } + makeSet(flag, param); + break; } } @@ -62,6 +156,27 @@ ACLFlags::parseFlags() } } +SBuf +ACLFlags::parameter(const ACLFlag f) const +{ + assert(static_cast(f - 'A') < FlagIndexMax); + auto p = flagParameters_.find(f); + return p == flagParameters_.end() ? SBuf() : p->second; +} + +const CharacterSet * +ACLFlags::delimiters() +{ + if (isSet(ACL_F_SUBSTRING) && !delimiters_) { + static const SBuf defaultParameter(","); + SBuf rawParameter = parameter(ACL_F_SUBSTRING); + if (rawParameter.isEmpty()) + rawParameter = defaultParameter; + delimiters_ = new CharacterSet("ACLFlags::delimiters", rawParameter.c_str()); + } + return delimiters_; +} + const char * ACLFlags::flagsStr() const { diff --git a/src/acl/Acl.h b/src/acl/Acl.h index 56904f108c..79b6e07f1e 100644 --- a/src/acl/Acl.h +++ b/src/acl/Acl.h @@ -10,6 +10,7 @@ #define SQUID_ACL_H #include "acl/forward.h" +#include "base/CharacterSet.h" #include "cbdata.h" #include "defines.h" #include "dlink.h" @@ -26,6 +27,7 @@ typedef char ACLFlag; #define ACL_F_REGEX_CASE 'i' #define ACL_F_NO_LOOKUP 'n' #define ACL_F_STRICT 's' +#define ACL_F_SUBSTRING 'm' #define ACL_F_END '\0' /** @@ -36,16 +38,61 @@ typedef char ACLFlag; class ACLFlags { public: - explicit ACLFlags(const ACLFlag flags[]) : supported_(flags), flags_(0) {} - ACLFlags() : flags_(0) {} - bool supported(const ACLFlag f) const; ///< True if the given flag supported - void makeSet(const ACLFlag f) { flags_ |= flagToInt(f); } ///< Set the given flag - void makeUnSet(const ACLFlag f) { flags_ &= ~flagToInt(f); } ///< Unset the given flag - /// Return true if the given flag is set + enum Status + { + notSupported, + noParameter, + parameterOptional, + parameterRequired + }; + + explicit ACLFlags(const ACLFlag flags[]) : supported_(flags), flags_(0), delimiters_(nullptr) {} + ACLFlags() : flags_(0), delimiters_(nullptr) {} + ~ACLFlags(); + /// \return a Status for the given ACLFlag. + Status flagStatus(const ACLFlag f) const; + /// \return true if the parameter for the given flag is acceptable. + bool parameterSupported(const ACLFlag f, const SBuf &val) const; + /// Set the given flag + void makeSet(const ACLFlag f, const SBuf ¶m = SBuf("")); + void makeUnSet(const ACLFlag f); ///< Unset the given flag + /// \return true if the given flag is set. bool isSet(const ACLFlag f) const { return flags_ & flagToInt(f);} + /// \return the parameter value of the given flag if set. + SBuf parameter(const ACLFlag f) const; + /// \return ACL_F_SUBSTRING parameter value(if set) converted to CharacterSet. + const CharacterSet *delimiters(); /// Parse optional flags given in the form -[A..Z|a..z] void parseFlags(); const char *flagsStr() const; ///< Convert the flags to a string representation + /** + * Lexical analyzer for ACL flags + * + * Support tokens in the form: + * flag := '-' [A-Z|a-z]+ ['=' parameter ] + * Each token consist by one or more single-letter flags, which may + * followed by a parameter string. + * The parameter can belongs only to the last flag in token. + */ + class FlagsTokenizer + { + public: + FlagsTokenizer(); + ACLFlag nextFlag(); ///< The next flag or '\0' if finished + /// \return true if a parameter follows the last parsed flag. + bool hasParameter() const; + /// \return the parameter of last parsed flag, if exist. + SBuf getParameter() const; + + private: + /// \return true if the current token parsing is finished. + bool needNextToken() const; + /// Peeks at the next token and return false if the next token + /// is not flag, or a '--' is read. + bool nextToken(); + + char *tokPos; + }; private: /// Convert a flag to a 64bit unsigned integer. @@ -58,7 +105,10 @@ private: } std::string supported_; ///< The supported character flags - uint64_t flags_; ///< The flags which is set + uint64_t flags_; ///< The flags which are set + static const uint32_t FlagIndexMax = 'z' - 'A'; + std::map flagParameters_; + CharacterSet *delimiters_; public: static const ACLFlag NoFlags[1]; ///< An empty flags list }; @@ -88,7 +138,7 @@ public: /// Orchestrates matching checklist against the ACL using match(), /// after checking preconditions and while providing debugging. - /// Returns true if and only if there was a successful match. + /// \return true if and only if there was a successful match. /// Updates the checklist state on match, async, and failure. bool matches(ACLChecklist *checklist) const; diff --git a/src/acl/Note.cc b/src/acl/Note.cc index 8bf88504f9..c838d53bb3 100644 --- a/src/acl/Note.cc +++ b/src/acl/Note.cc @@ -10,18 +10,50 @@ #include "acl/Checklist.h" #include "acl/HttpHeaderData.h" #include "acl/Note.h" +#include "acl/NoteData.h" +#include "parser/Tokenizer.h" #include "HttpRequest.h" #include "Notes.h" int -ACLNoteStrategy::match (ACLData * &data, ACLFilledChecklist *checklist, ACLFlags &) +ACLNoteStrategy::match(ACLData * &data, ACLFilledChecklist *checklist, ACLFlags &flags) { - if (checklist->request != NULL) - return data->match(checklist->request); - + if (const auto request = checklist->request) { + if (request->notes != NULL && matchNotes(data, request->notes.getRaw(), flags.delimiters())) + return 1; +#if USE_ADAPTATION + const Adaptation::History::Pointer ah = request->adaptLogHistory(); + if (ah != NULL && ah->metaHeaders != NULL && matchNotes(data, ah->metaHeaders.getRaw(), flags.delimiters())) + return 1; +#endif + } return 0; } +bool +ACLNoteStrategy::matchNotes(ACLData *noteData, const NotePairs *note, const CharacterSet *delimiters) const +{ + for (auto &entry: note->entries) { + if (delimiters) { + NotePairs::Entry e(entry->name.termedBuf(), ""); + Parser::Tokenizer t(SBuf(entry->value)); + SBuf s; + while (t.token(s, *delimiters)) { + e.value = s.c_str(); + if (noteData->match(&e)) + return true; + } + s = t.remaining(); + e.value = s.c_str(); + if (noteData->match(&e)) + return true; + } + if (noteData->match(entry)) + return true; + } + return false; +} + ACLNoteStrategy * ACLNoteStrategy::Instance() { diff --git a/src/acl/Note.h b/src/acl/Note.h index bba8a12424..6c3cbd9bea 100644 --- a/src/acl/Note.h +++ b/src/acl/Note.h @@ -12,10 +12,12 @@ #include "acl/Strategised.h" #include "acl/Strategy.h" +class ACLNoteData; +class CharacterSet; class HttpRequest; /// \ingroup ACLAPI -class ACLNoteStrategy : public ACLStrategy +class ACLNoteStrategy : public ACLStrategy { public: @@ -33,6 +35,7 @@ private: ACLNoteStrategy() { } ACLNoteStrategy& operator = (ACLNoteStrategy const &); + bool matchNotes(ACLData *, const NotePairs *, const CharacterSet *) const; }; /// \ingroup ACLAPI @@ -41,7 +44,7 @@ class ACLNote private: static ACL::Prototype RegistryProtoype; - static ACLStrategised RegistryEntry_; + static ACLStrategised RegistryEntry_; }; #endif /* SQUID_ACLNOTE_H */ diff --git a/src/acl/NoteData.cc b/src/acl/NoteData.cc index f81de96b80..d27354a8e5 100644 --- a/src/acl/NoteData.cc +++ b/src/acl/NoteData.cc @@ -13,8 +13,6 @@ #include "acl/StringData.h" #include "ConfigParser.h" #include "Debug.h" -#include "HttpRequest.h" -#include "Notes.h" #include "wordlist.h" ACLNoteData::ACLNoteData() : values(new ACLStringData) @@ -26,36 +24,9 @@ ACLNoteData::~ACLNoteData() } bool -ACLNoteData::matchNotes(NotePairs *note) +ACLNoteData::match(NotePairs::Entry *entry) { - if (note == NULL) - return false; - - debugs(28, 3, "Checking " << name); - - if (values->empty()) - return (note->findFirst(name.termedBuf()) != NULL); - - for (std::vector::iterator i = note->entries.begin(); i!= note->entries.end(); ++i) { - if ((*i)->name.cmp(name.termedBuf()) == 0) { - if (values->match((*i)->value.termedBuf())) - return true; - } - } - return false; -} - -bool -ACLNoteData::match(HttpRequest *request) -{ - if (request->notes != NULL && matchNotes(request->notes.getRaw())) - return true; -#if USE_ADAPTATION - const Adaptation::History::Pointer ah = request->adaptLogHistory(); - if (ah != NULL && ah->metaHeaders != NULL && matchNotes(ah->metaHeaders.getRaw())) - return true; -#endif - return false; + return !entry->name.cmp(name.termedBuf()) && values->match(entry->value.termedBuf()); } SBufList @@ -88,11 +59,12 @@ ACLNoteData::empty() const return name.size() == 0; } -ACLData * +ACLData * ACLNoteData::clone() const { ACLNoteData * result = new ACLNoteData; - result->values = values->clone(); + result->values = dynamic_cast(values->clone()); + assert(result->values); result->name = name; return result; } diff --git a/src/acl/NoteData.h b/src/acl/NoteData.h index 543c13a4db..93c6418813 100644 --- a/src/acl/NoteData.h +++ b/src/acl/NoteData.h @@ -10,29 +10,28 @@ #define SQUID_ACLNOTEDATA_H #include "acl/Data.h" +#include "Notes.h" #include "SquidString.h" -class HttpRequest; -class NotePairs; +class ACLStringData; /// \ingroup ACLAPI -class ACLNoteData : public ACLData +class ACLNoteData : public ACLData { MEMPROXY_CLASS(ACLNoteData); public: ACLNoteData(); virtual ~ACLNoteData(); - virtual bool match(HttpRequest* request); + virtual bool match(NotePairs::Entry *); virtual SBufList dump() const; virtual void parse(); virtual bool empty() const; - virtual ACLData *clone() const; + virtual ACLData *clone() const; private: - bool matchNotes(NotePairs *note); String name; ///< Note name to check. It is always set - ACLData *values; ///< if set, at least one value must match + ACLStringData *values; ///< if set, at least one value must match }; #endif /* SQUID_ACLNOTEDATA_H */ diff --git a/src/acl/StringData.cc b/src/acl/StringData.cc index 9432f54202..36f6bcd8ac 100644 --- a/src/acl/StringData.cc +++ b/src/acl/StringData.cc @@ -25,12 +25,11 @@ ACLStringData::insert(const char *value) } bool -ACLStringData::match(char const *toFind) +ACLStringData::match(const SBuf &tf) { - if (stringValues.empty() || !toFind) + if (stringValues.empty() || tf.isEmpty()) return 0; - SBuf tf(toFind); debugs(28, 3, "aclMatchStringList: checking '" << tf << "'"); bool found = (stringValues.find(tf) != stringValues.end()); @@ -39,6 +38,13 @@ ACLStringData::match(char const *toFind) return found; } +// XXX: performance regression due to SBuf(char*) data-copies. +bool +ACLStringData::match(char const *toFind) +{ + return match(SBuf(toFind)); +} + SBufList ACLStringData::dump() const { diff --git a/src/acl/StringData.h b/src/acl/StringData.h index d017c0e643..49a1044f34 100644 --- a/src/acl/StringData.h +++ b/src/acl/StringData.h @@ -24,7 +24,9 @@ public: ACLStringData(ACLStringData const &); ACLStringData &operator= (ACLStringData const &); virtual ~ACLStringData() {} + /// \deprecated use match(SBuf&) instead. bool match(char const *); + bool match(const SBuf &); virtual SBufList dump() const; virtual void parse(); bool empty() const; diff --git a/src/cf.data.pre b/src/cf.data.pre index 8bb7ff11f6..94c4188ae8 100644 --- a/src/cf.data.pre +++ b/src/cf.data.pre @@ -903,8 +903,10 @@ DOC_START When using "file", the file should contain one item per line. - Some acl types supports options which changes their default behaviour. - The available options are: + + ACL Options + + Some acl types supports options which changes their default behaviour: -i,+i By default, regular expressions are CASE-SENSITIVE. To make them case-insensitive, use the -i option. To return case-sensitive @@ -917,6 +919,14 @@ DOC_START name or IP), then the ACL would immediately declare a mismatch without any warnings or lookups. + -m[=delimiters] + Perform a list membership test, interpreting values as + comma-separated token lists and matching against individual + tokens instead of whole values. + The optional "delimiters" parameter specifies one or more + alternative non-alphanumeric delimiter characters. + non-alphanumeric delimiter characters. + -- Used to stop processing all options, in the case the first acl value has '-' character as first character (for example the '-' is a valid domain name) @@ -1130,12 +1140,15 @@ DOC_START # effect in rules that affect the reply data stream such as # http_reply_access. - acl aclname note name [value ...] + acl aclname note [-m[=delimiters]] name [value ...] # match transaction annotation [fast] # Without values, matches any annotation with a given name. # With value(s), matches any annotation with a given name that # also has one of the given values. - # Names and values are compared using a string equality test. + # If the -m flag is used, then the value of the named + # annotation is interpreted as a list of tokens, and the ACL + # matches individual name=token pairs rather than whole + # name=value pairs. See "ACL Options" above for more info. # Annotation sources include note and adaptation_meta directives # as well as helper and eCAP responses.