From: Francesco Chemolli Date: Sat, 2 Jan 2016 13:52:42 +0000 (+0100) Subject: CharacterSet improvements: implement operators -=, +, -, ==, !=; implement c++11... X-Git-Tag: SQUID_4_0_4~13 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=47efdfc6d5188f3b197dd7e19aa5e4a9e6b9234b;p=thirdparty%2Fsquid.git CharacterSet improvements: implement operators -=, +, -, ==, !=; implement c++11 ranges-constructor, add ostream output operator, and related unit tests. Change build order in top-level Makefile.am so that src/ is built before helpers/ --- diff --git a/Makefile.am b/Makefile.am index c582a6284c..192e00a393 100644 --- a/Makefile.am +++ b/Makefile.am @@ -6,12 +6,12 @@ ## AUTOMAKE_OPTIONS = dist-bzip2 1.5 foreign -DIST_SUBDIRS = compat lib libltdl scripts icons errors contrib doc helpers src test-suite tools +DIST_SUBDIRS = compat lib libltdl scripts icons errors contrib doc src helpers test-suite tools SUBDIRS = compat lib if ENABLE_LOADABLE_MODULES SUBDIRS += libltdl endif -SUBDIRS += scripts icons errors doc helpers src tools test-suite +SUBDIRS += scripts icons errors doc src tools helpers test-suite DISTCLEANFILES = include/stamp-h include/stamp-h[0-9]* DEFAULT_PINGER = $(libexecdir)/`echo pinger | sed '$(transform);s/$$/$(EXEEXT)/'` diff --git a/src/base/CharacterSet.cc b/src/base/CharacterSet.cc index 7106bb9661..31df3964e3 100644 --- a/src/base/CharacterSet.cc +++ b/src/base/CharacterSet.cc @@ -10,6 +10,7 @@ #include "CharacterSet.h" #include +#include #include CharacterSet & @@ -27,12 +28,19 @@ CharacterSet::operator +=(const CharacterSet &src) return *this; } -CharacterSet -CharacterSet::operator +(const CharacterSet &src) const +CharacterSet & +CharacterSet::operator -=(const CharacterSet &src) { - CharacterSet rv(*this); - rv += src; - return rv; + Storage::const_iterator s = src.chars_.begin(); + const Storage::const_iterator e = src.chars_.end(); + Storage::iterator d = chars_.begin(); + while (s != e) { + if (*s) + *d = 0; + ++s; + ++d; + } + return *this; } CharacterSet & @@ -66,7 +74,7 @@ CharacterSet::complement(const char *label) const } CharacterSet::CharacterSet(const char *label, const char * const c) : - name(label == NULL ? "anonymous" : label), + name(label ? label: "anonymous"), chars_(Storage(256,0)) { const size_t clen = strlen(c); @@ -75,43 +83,63 @@ CharacterSet::CharacterSet(const char *label, const char * const c) : } CharacterSet::CharacterSet(const char *label, unsigned char low, unsigned char high) : - name(label == NULL ? "anonymous" : label), + name(label ? label: "anonymous"), chars_(Storage(256,0)) { addRange(low,high); } +CharacterSet::CharacterSet(const char *label, std::initializer_list> ranges) : + name(label ? label: "anonymous"), + chars_(Storage(256,0)) +{ + for (auto range: ranges) + addRange(range.first, range.second); +} + +CharacterSet +operator+ (CharacterSet lhs, const CharacterSet &rhs) +{ + lhs += rhs; + return lhs; +} + +CharacterSet +operator- (CharacterSet lhs, const CharacterSet &rhs) +{ + lhs -= rhs; + return lhs; +} + +std::ostream& +operator <<(std::ostream &s, const CharacterSet &c) +{ + s << "CharacterSet(" << c.name << ')'; + return s; +} + const CharacterSet // RFC 5234 -CharacterSet::ALPHA("ALPHA", "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"), - CharacterSet::BIT("BIT","01"), - CharacterSet::CR("CR","\r"), -#if __cplusplus == 201103L -//CharacterSet::CTL("CTL",{{0x01,0x1f},{0x7f,0x7f}}), -#endif - CharacterSet::DIGIT("DIGIT","0123456789"), - CharacterSet::DQUOTE("DQUOTE","\""), - CharacterSet::HEXDIG("HEXDIG","0123456789aAbBcCdDeEfF"), - CharacterSet::HTAB("HTAB","\t"), - CharacterSet::LF("LF","\n"), - CharacterSet::SP("SP"," "), - CharacterSet::VCHAR("VCHAR", 0x21, 0x7e), -// RFC 7230 - CharacterSet::WSP("WSP"," \t"), -#if __cplusplus == 201103L -//CharacterSet::CTEXT("ctext",{{0x09,0x09},{0x20,0x20},{0x2a,0x5b},{0x5d,0x7e},{0x80,0xff}}), -#endif - CharacterSet::TCHAR("TCHAR","!#$%&'*+-.^_`|~0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"), - CharacterSet::SPECIAL("SPECIAL","()<>@,;:\\\"/[]?={}"), -#if __cplusplus == 201103L -//CharacterSet::QDTEXT("QDTEXT",{{0x09,0x09},{0x20,0x21},{0x23,0x5b},{0x5d,0x7e},{0x80,0xff}}), -#endif - CharacterSet::OBSTEXT("OBSTEXT",0x80,0xff), -// RFC 7232 -#if __cplusplus == 201103L -//CharacterSet::ETAGC("ETAGC",{{0x21,0x21},{0x23,0x7e},{0x80,0xff}}), -#endif -// RFC 7235 - CharacterSet::TOKEN68C("TOKEN68C","-._~+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ") - ; - + CharacterSet::ALPHA("ALPHA", "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"), + CharacterSet::BIT("BIT","01"), + CharacterSet::CR("CR","\r"), + CharacterSet::CTL("CTL",{{0x01,0x1f},{0x7f,0x7f}}), + CharacterSet::DIGIT("DIGIT","0123456789"), + CharacterSet::DQUOTE("DQUOTE","\""), + CharacterSet::HEXDIG("HEXDIG","0123456789aAbBcCdDeEfF"), + CharacterSet::HTAB("HTAB","\t"), + CharacterSet::LF("LF","\n"), + CharacterSet::SP("SP"," "), + CharacterSet::VCHAR("VCHAR", 0x21, 0x7e), + // RFC 7230 + CharacterSet::WSP("WSP"," \t"), + CharacterSet::CTEXT("ctext",{{0x09,0x09},{0x20,0x20},{0x2a,0x5b},{0x5d,0x7e},{0x80,0xff}}), + CharacterSet::TCHAR("TCHAR","!#$%&'*+-.^_`|~0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"), + CharacterSet::SPECIAL("SPECIAL","()<>@,;:\\\"/[]?={}"), + CharacterSet::QDTEXT("QDTEXT",{{0x09,0x09},{0x20,0x21},{0x23,0x5b},{0x5d,0x7e},{0x80,0xff}}), + CharacterSet::OBSTEXT("OBSTEXT",0x80,0xff), + // RFC 7232 + CharacterSet::ETAGC("ETAGC",{{0x21,0x21},{0x23,0x7e},{0x80,0xff}}), + // RFC 7235 + CharacterSet::TOKEN68C("TOKEN68C","-._~+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ") + ; diff --git a/src/base/CharacterSet.h b/src/base/CharacterSet.h index 8f8975765e..c06be1d6c5 100644 --- a/src/base/CharacterSet.h +++ b/src/base/CharacterSet.h @@ -9,6 +9,8 @@ #ifndef _SQUID_SRC_PARSER_CHARACTERSET_H #define _SQUID_SRC_PARSER_CHARACTERSET_H +#include +#include #include /// optimized set of C chars, with quick membership test and merge support @@ -17,15 +19,20 @@ class CharacterSet public: typedef std::vector Storage; - /// define a character set with the given label ("anonymous" if NULL) + /// define a character set with the given label ("anonymous" if nullptr) /// with specified initial contents CharacterSet(const char *label, const char * const initial); - /// define a character set with the given label ("anonymous" if NULL) + /// define a character set with the given label ("anonymous" if nullptr) /// containing characters defined in the supplied ranges /// \see addRange CharacterSet(const char *label, unsigned char low, unsigned char high); + /// define a character set with the given label ("anonymous" if nullptr) + /// containing characters defined in the supplied list of low-high ranges + /// \see addRange + CharacterSet(const char *label, std::initializer_list> ranges); + /// whether a given character exists in the set bool operator[](unsigned char c) const {return chars_[static_cast(c)] != 0;} @@ -35,18 +42,24 @@ public: /// add a list of character ranges, expressed as pairs [low,high], including both ends CharacterSet & addRange(unsigned char low, unsigned char high); - /// add all characters from the given CharacterSet to this one - CharacterSet &operator +=(const CharacterSet &src); + /// set addition: add to this set all characters that are also in rhs + CharacterSet &operator +=(const CharacterSet &rhs); - /// return a new CharacterSet containing the union of two sets - CharacterSet operator +(const CharacterSet &src) const; + /// set subtraction: remove all characters that are also in rhs + CharacterSet &operator -=(const CharacterSet &rhs); /// return a new CharacterSet containing characters not in this set - CharacterSet complement(const char *complementLabel = NULL) const; + /// use the supplied label if provided, default is "complement_of_some_other_set" + CharacterSet complement(const char *complementLabel = nullptr) const; /// change name; handy in const declarations that use operators CharacterSet &rename(const char *label) { name = label; return *this; } + /// \note Ignores label + bool operator == (const CharacterSet &cs) const { return chars_ == cs.chars_; } + /// \note Ignores label + bool operator != (const CharacterSet &cs) const { return !operator==(cs); } + /// optional set label for debugging (default: "anonymous") const char * name; @@ -58,10 +71,7 @@ public: // carriage return static const CharacterSet CR; // controls -#if __cplusplus == 201103L - // ready but disabled as needs C++11 constructor - //static const CharacterSet CTL; -#endif + static const CharacterSet CTL; // 0-9 static const CharacterSet DIGIT; // double quote @@ -81,29 +91,20 @@ public: // HTTP character sets, RFC 7230 // ctext -#if __cplusplus == 201103L - // ready but disabled as needs C++11 constructor - //static const CharacterSet CTEXT; -#endif + static const CharacterSet CTEXT; // XXX: maybe field-vchar = VCHAR / obs-text // any VCHAR except for SPECIAL static const CharacterSet TCHAR; // special VCHARs static const CharacterSet SPECIAL; // qdtext -#if __cplusplus == 201103L - // ready but disabled as needs C++11 constructor - //static const CharacterSet QDTEXT; -#endif + static const CharacterSet QDTEXT; // obs-text static const CharacterSet OBSTEXT; // HTTP character sets, RFC 7232 // etagc -#if __cplusplus == 201103L - // ready but disabled as needs C++11 constructor - //static const CharacterSet ETAGC; -#endif + static const CharacterSet ETAGC; // HTTP character sets, RFC 7235 // token68 (internal charaters only, excludes '=' terminator) @@ -113,11 +114,29 @@ private: /** index of characters in this set * * \note guaranteed to be always 256 slots big, as forced in the - * constructor. This assumption is relied upon in operator[], add, - * operator+= + * constructor. This assumption is relied upon in various methods */ Storage chars_; }; +/** CharacterSet addition + * + * \return a new CharacterSet containing all characters present both in lhs + * and rhs, labeled as lhs is + */ +CharacterSet +operator+ (CharacterSet lhs, const CharacterSet &rhs); + +/** CharacterSet subtraction + * + * \return a new CharacterSet containing all characters present in lhs + * and not present in rhs, labeled as lhs is + */ +CharacterSet +operator- (CharacterSet lhs, const CharacterSet &rhs); + +std::ostream& +operator <<(std::ostream &, const CharacterSet &); + #endif /* _SQUID_SRC_PARSER_CHARACTERSET_H */ diff --git a/src/tests/testCharacterSet.cc b/src/tests/testCharacterSet.cc index d109cbc313..f146843e62 100644 --- a/src/tests/testCharacterSet.cc +++ b/src/tests/testCharacterSet.cc @@ -19,7 +19,7 @@ void testCharacterSet::CharacterSetConstruction() { { - CharacterSet t(NULL,""); + CharacterSet t(nullptr,""); CPPUNIT_ASSERT_EQUAL(std::string("anonymous"),std::string(t.name)); } { @@ -28,15 +28,19 @@ testCharacterSet::CharacterSetConstruction() } { CharacterSet t("test",""); - for (int j = 0; j < 255; ++j) + for (int j = 0; j < 256; ++j) CPPUNIT_ASSERT_EQUAL(false,t[j]); } { CharacterSet t("test","0"); CPPUNIT_ASSERT_EQUAL(true,t['0']); - for (int j = 0; j < 255; ++j) - if (j != '0') + for (int j = 0; j < 256; ++j) { + if (j != '0') { CPPUNIT_ASSERT_EQUAL(false,t[j]); + } else { + CPPUNIT_ASSERT_EQUAL(true,t[j]); + } + } } } @@ -76,15 +80,43 @@ testCharacterSet::CharacterSetUnion() { CharacterSet hex("hex",""); hex += CharacterSet::DIGIT; - hex += CharacterSet(NULL,"aAbBcCdDeEfF"); - for (int j = 0; j < 255; ++j) + hex += CharacterSet(nullptr,"aAbBcCdDeEfF"); + CPPUNIT_ASSERT_EQUAL(CharacterSet::HEXDIG, hex); + for (int j = 0; j < 256; ++j) CPPUNIT_ASSERT_EQUAL(CharacterSet::HEXDIG[j],hex[j]); } { - CharacterSet hex(NULL,""); - hex = CharacterSet::DIGIT + CharacterSet(NULL,"aAbBcCdDeEfF"); - for (int j = 0; j < 255; ++j) + CharacterSet hex(nullptr,""); + hex = CharacterSet::DIGIT + CharacterSet(nullptr,"aAbBcCdDeEfF"); + for (int j = 0; j < 256; ++j) CPPUNIT_ASSERT_EQUAL(CharacterSet::HEXDIG[j],hex[j]); } } +void +testCharacterSet::CharacterSetEqualityOp() +{ + CPPUNIT_ASSERT_EQUAL(CharacterSet::ALPHA, CharacterSet::ALPHA); + CPPUNIT_ASSERT_EQUAL(CharacterSet::BIT, CharacterSet(nullptr,"01")); + CPPUNIT_ASSERT_EQUAL(CharacterSet(nullptr,"01"), CharacterSet(nullptr,"01")); + CPPUNIT_ASSERT_EQUAL(CharacterSet(nullptr,"01"), CharacterSet("","01")); + CPPUNIT_ASSERT_EQUAL(CharacterSet::BIT, CharacterSet("bit",'0','1')); + CPPUNIT_ASSERT_EQUAL(CharacterSet::BIT, CharacterSet("bit",{{'0','1'}})); + CPPUNIT_ASSERT_EQUAL(CharacterSet::BIT, CharacterSet("bit",{{'0','0'},{'1','1'}})); +} + +void +testCharacterSet::CharacterSetSubtract() +{ + CharacterSet sample(nullptr, "0123456789aAbBcCdDeEfFz"); + + sample -= CharacterSet(nullptr, "z"); //character in set + CPPUNIT_ASSERT_EQUAL(CharacterSet::HEXDIG, sample); + + sample -= CharacterSet(nullptr, "z"); // character not in set + CPPUNIT_ASSERT_EQUAL(CharacterSet::HEXDIG, sample); + + sample += CharacterSet(nullptr, "z"); + // one in set, one not; test operator- + CPPUNIT_ASSERT_EQUAL(CharacterSet::HEXDIG, sample - CharacterSet(nullptr, "qz")); +} diff --git a/src/tests/testCharacterSet.h b/src/tests/testCharacterSet.h index 2b6a0bcf53..78032a2747 100644 --- a/src/tests/testCharacterSet.h +++ b/src/tests/testCharacterSet.h @@ -17,8 +17,10 @@ class testCharacterSet : public CPPUNIT_NS::TestFixture CPPUNIT_TEST( CharacterSetConstruction ); CPPUNIT_TEST( CharacterSetAdd ); CPPUNIT_TEST( CharacterSetAddRange ); + CPPUNIT_TEST( CharacterSetEqualityOp ); CPPUNIT_TEST( CharacterSetConstants ); CPPUNIT_TEST( CharacterSetUnion ); + CPPUNIT_TEST( CharacterSetSubtract ); CPPUNIT_TEST_SUITE_END(); protected: @@ -27,6 +29,8 @@ protected: void CharacterSetAddRange(); void CharacterSetConstants(); void CharacterSetUnion(); + void CharacterSetEqualityOp(); + void CharacterSetSubtract(); }; #endif /* SQUID_BASE_TESTCHARACTERSET_H */