From: Francesco Chemolli Date: Sun, 15 Dec 2013 11:47:07 +0000 (+0100) Subject: Implemented CharacterSet-based SBuf::find_first_of and find_first_not_of X-Git-Tag: SQUID_3_5_0_1~456^2~9 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=3c1106a0a516f64b7c07cecd10daee291364c4bb;p=thirdparty%2Fsquid.git Implemented CharacterSet-based SBuf::find_first_of and find_first_not_of --- diff --git a/src/Makefile.am b/src/Makefile.am index 191c6de5a2..6cd5df7b99 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -15,6 +15,7 @@ DNSSOURCE = \ DnsLookupDetails.cc SBUF_SOURCE= \ + base/CharacterSet.h \ base/InstanceId.h \ MemBlob.h \ MemBlob.cc \ diff --git a/src/SBuf.cc b/src/SBuf.cc index 26480b8dcd..18053bd734 100644 --- a/src/SBuf.cc +++ b/src/SBuf.cc @@ -688,12 +688,8 @@ SBuf::rfind(char c, SBuf::size_type endPos) const } SBuf::size_type -SBuf::find_first_of(const SBuf &set, size_type startPos) const +SBuf::find_first_of(const CharacterSet &set, size_type startPos) const { - // if set is 1 char big, use the char search. Stats updated there - if (set.length() == 1) - return find(set[0], startPos); - ++stats.find; if (startPos == npos) @@ -702,18 +698,39 @@ SBuf::find_first_of(const SBuf &set, size_type startPos) const if (startPos >= length()) return npos; - if (set.length() == 0) + debugs(24, 7, "first of characterset " << set.name << " in id " << id); + char *cur = buf()+startPos; + const char *end = bufEnd(); + while (cur < end) { + if (set[*cur]) + return cur-buf(); + ++cur; + } + debugs(24, 7, "not found"); + return npos; +} + +SBuf::size_type +SBuf::find_first_not_of(const CharacterSet &set, size_type startPos) const +{ + ++stats.find; + + if (startPos == npos) return npos; - debugs(24, 7, "any of '" << set << "' " << " in id " << id); - char *cur = buf()+startPos, *end = bufEnd(); + if (startPos >= length()) + return npos; + + debugs(24, 7, "first not of characterset " << set.name << " in id " << id); + char *cur = buf()+startPos; + const char *end = bufEnd(); while (cur < end) { - if (memchr(set.buf(), *cur, set.length())) - return (cur-buf()); + if (!set[*cur]) + return cur-buf(); ++cur; } debugs(24, 7, "not found"); - return npos; + return length(); } /* diff --git a/src/SBuf.h b/src/SBuf.h index d374b11a00..3fcf9f1869 100644 --- a/src/SBuf.h +++ b/src/SBuf.h @@ -29,6 +29,7 @@ #ifndef SQUID_SBUF_H #define SQUID_SBUF_H +#include "base/CharacterSet.h" #include "base/InstanceId.h" #include "MemBlob.h" #include "SBufExceptions.h" @@ -513,7 +514,15 @@ public: * \param startPos if specified, ignore any occurrences before that position * if npos, then npos is always returned */ - size_type find_first_of(const SBuf &set, size_type startPos = 0) const; + size_type find_first_of(const CharacterSet &set, size_type startPos = 0) const; + + /** Find first occurrence character NOT in character set + * + * \return length() if all characters in the SBuf are from set + * \param startPos if specified, ignore any occurrences before that position + * if npos, then npos is always returned + */ + size_type find_first_not_of(const CharacterSet &set, size_type startPos = 0) const; /** sscanf-alike * diff --git a/src/base/CharacterSet.h b/src/base/CharacterSet.h new file mode 100644 index 0000000000..3c11d69388 --- /dev/null +++ b/src/base/CharacterSet.h @@ -0,0 +1,44 @@ +#ifndef _SQUID_SRC_PARSER_CHARACTERSET_H +#define _SQUID_SRC_PARSER_CHARACTERSET_H + +#include + +class CharacterSet +{ +public: + CharacterSet(const char *label, const char * const c) : name(label), chars_(std::vector(256,false)) { + size_t clen = strlen(c); + for (size_t i = 0; i < clen; ++i) + chars_[static_cast(c[i])] = true; + } + + /// whether a given character exists in the set + bool operator[](unsigned char c) const {return chars_[static_cast(c)];} + + /// add a given char to the character set. c must be >= 0. + CharacterSet & add(const unsigned char c) {chars_[static_cast(c)] = true; return *this; } + + /// add all characters from the given CharacterSet to this one + const CharacterSet &operator +=(const CharacterSet &src) { + //precondition: src.chars_.size() == chars_.size() + std::vector::const_iterator s = src.chars_.begin(); + const std::vector::const_iterator e = src.chars_.end(); + std::vector::iterator d = chars_.begin(); + while (s != e) { + if (*s) + *d = true; + ++s; + ++d; + } + return *this; + } + + /// name of this character set + const char * name; + +private: + /// characters defined in this set + std::vector chars_; //std::vector is optimized +}; + +#endif /* _SQUID_SRC_PARSER_CHARACTERSET_H */ diff --git a/src/base/Makefile.am b/src/base/Makefile.am index 1b84589d87..b99beaebc9 100644 --- a/src/base/Makefile.am +++ b/src/base/Makefile.am @@ -12,6 +12,7 @@ libbase_la_SOURCES = \ AsyncJobCalls.h \ AsyncCallQueue.cc \ AsyncCallQueue.h \ + CharacterSet.h \ TidyPointer.h \ CbcPointer.h \ InstanceId.h \ diff --git a/src/icmp/Makefile.am b/src/icmp/Makefile.am index 4be9699521..4cde92efee 100644 --- a/src/icmp/Makefile.am +++ b/src/icmp/Makefile.am @@ -23,6 +23,7 @@ libexec_PROGRAMS = $(PINGER) noinst_LTLIBRARIES = libicmp-core.la libicmp.la SBUF_SOURCE= \ + $(top_srcdir)/src/base/CharacterSet.h \ $(top_srcdir)/src/SBuf.h \ $(top_srcdir)/src/SBuf.cc \ $(top_srcdir)/src/MemBlob.h \ diff --git a/src/tests/SBufFindTest.cc b/src/tests/SBufFindTest.cc index 6bfd1d2915..247b5a5164 100644 --- a/src/tests/SBufFindTest.cc +++ b/src/tests/SBufFindTest.cc @@ -105,7 +105,7 @@ SBufFindTest::testFindFirstOf() { theFindString = theStringHay.find_first_of(theStringNeedle, thePos); theBareNeedlePos = theStringHay.find_first_of(theStringNeedle); - theFindSBuf = theSBufHay.find_first_of(theSBufNeedle, thePos); + theFindSBuf = theSBufHay.find_first_of(CharacterSet("cs",theSBufNeedle.c_str()), thePos); checkResults("find_first_of"); } diff --git a/src/tests/testSBuf.cc b/src/tests/testSBuf.cc index b424444eb2..bc9b56efa2 100644 --- a/src/tests/testSBuf.cc +++ b/src/tests/testSBuf.cc @@ -759,22 +759,46 @@ testSBuf::testFindFirstOf() SBuf::size_type idx; // not found - idx=haystack.find_first_of(SBuf("ADHRWYP")); + idx=haystack.find_first_of(CharacterSet("t1","ADHRWYP")); CPPUNIT_ASSERT_EQUAL(SBuf::npos,idx); // found at beginning - idx=haystack.find_first_of(SBuf("THANDF")); + idx=haystack.find_first_of(CharacterSet("t2","THANDF")); CPPUNIT_ASSERT_EQUAL(0U,idx); //found at end of haystack - idx=haystack.find_first_of(SBuf("QWERYVg")); + idx=haystack.find_first_of(CharacterSet("t3","QWERYVg")); CPPUNIT_ASSERT_EQUAL(haystack.length()-1,idx); //found in the middle of haystack - idx=haystack.find_first_of(SBuf("QWERqYV")); + idx=haystack.find_first_of(CharacterSet("t4","QWERqYV")); CPPUNIT_ASSERT_EQUAL(4U,idx); } +void +testSBuf::testFindFirstNotOf() +{ + SBuf haystack(literal); + SBuf::size_type idx; + + // all chars from the set + idx=haystack.find_first_not_of(CharacterSet("t1",literal.c_str())); + CPPUNIT_ASSERT_EQUAL(haystack.length(),idx); + + // found at beginning + idx=haystack.find_first_not_of(CharacterSet("t2","a")); + CPPUNIT_ASSERT_EQUAL(0U,idx); + + //found at end of haystack + idx=haystack.find_first_not_of(CharacterSet("t3",literal.substr(0,literal.length()-1).c_str())); + CPPUNIT_ASSERT_EQUAL(haystack.length()-1,idx); + + //found in the middle of haystack + idx=haystack.find_first_not_of(CharacterSet("t4","The")); + CPPUNIT_ASSERT_EQUAL(3U,idx); +} + + void testSBuf::testAutoFind() { diff --git a/src/tests/testSBuf.h b/src/tests/testSBuf.h index 373eeef118..f4301e5cf7 100644 --- a/src/tests/testSBuf.h +++ b/src/tests/testSBuf.h @@ -35,6 +35,7 @@ class testSBuf : public CPPUNIT_NS::TestFixture CPPUNIT_TEST( testRFindChar ); CPPUNIT_TEST( testRFindSBuf ); CPPUNIT_TEST( testFindFirstOf ); + CPPUNIT_TEST( testFindFirstNotOf ); CPPUNIT_TEST( testPrintf ); CPPUNIT_TEST( testScanf ); CPPUNIT_TEST( testCopy ); @@ -79,6 +80,7 @@ protected: void testStartsWith(); void testSBufStream(); void testFindFirstOf(); + void testFindFirstNotOf(); void testAutoFind(); void testStdStringOps(); };