From: Amos Jeffries Date: Fri, 23 Jan 2015 06:16:02 +0000 (-0800) Subject: Various Tokenizer fixes X-Git-Tag: merge-candidate-3-v1~240^2~6 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=b54f4137f97706a0026245d07f083eed9e77e9b4;p=thirdparty%2Fsquid.git Various Tokenizer fixes Import prefix() method fix from parser-ng-requestline work. - The prefix() when match reached a provided limit would consume and return the entire buffer contents instead of just the limited/matched range of bytes. Extend int64() method: - allow rejecting of numbers with a +/- symbol prefix. - allow limiting of numeric conversion to a fixed character count --- diff --git a/src/parser/Tokenizer.cc b/src/parser/Tokenizer.cc index 3d43c08b80..b2b6338243 100644 --- a/src/parser/Tokenizer.cc +++ b/src/parser/Tokenizer.cc @@ -71,11 +71,13 @@ Parser::Tokenizer::token(SBuf &returnedToken, const CharacterSet &delimiters) bool Parser::Tokenizer::prefix(SBuf &returnedToken, const CharacterSet &tokenChars, const SBuf::size_type limit) { - const SBuf::size_type prefixLen = buf_.substr(0,limit).findFirstNotOf(tokenChars); + SBuf::size_type prefixLen = buf_.substr(0,limit).findFirstNotOf(tokenChars); if (prefixLen == 0) return false; if (prefixLen == SBuf::npos && (atEnd() || limit == 0)) return false; + if (prefixLen == SBuf::npos && limit > 0) + prefixLen = limit; returnedToken = consume(prefixLen); // cannot be empty after the npos check return true; } @@ -115,23 +117,27 @@ Parser::Tokenizer::skip(const char tokenChar) /* reworked from compat/strtoll.c */ bool -Parser::Tokenizer::int64(int64_t & result, int base) +Parser::Tokenizer::int64(int64_t & result, int base, bool allowSign, const SBuf::size_type limit) { - if (buf_.isEmpty()) + if (atEnd() || limit == 0) return false; + const SBuf range(buf_.substr(0,limit)); + //fixme: account for buf_.size() bool neg = false; - const char *s = buf_.rawContent(); - const char *end = buf_.rawContent() + buf_.length(); - - if (*s == '-') { - neg = true; - ++s; - } else if (*s == '+') { - ++s; + const char *s = range.rawContent(); + const char *end = range.rawContent() + range.length(); + + if (allowSign) { + if (*s == '-') { + neg = true; + ++s; + } else if (*s == '+') { + ++s; + } + if (s >= end) return false; } - if (s >= end) return false; if (( base == 0 || base == 16) && *s == '0' && (s+1 <= end ) && tolower(*(s+1)) == 'x') { s += 2; @@ -184,6 +190,6 @@ Parser::Tokenizer::int64(int64_t & result, int base) acc = -acc; result = acc; - return success(s - buf_.rawContent() - 1); + return success(s - range.rawContent() - 1); } diff --git a/src/parser/Tokenizer.h b/src/parser/Tokenizer.h index 9cd30d912b..8421f780d8 100644 --- a/src/parser/Tokenizer.h +++ b/src/parser/Tokenizer.h @@ -103,10 +103,12 @@ public: * \param result Output value. Not touched if parsing is unsuccessful. * \param base Specify base to do the parsing in, with the same restrictions * as strtoll. Defaults to 0 (meaning guess) + * \param allowSign Whether to accept a '+' or '-' sign prefix. + * \param limit Maximum count of characters to convert. * * \return whether the parsing was successful */ - bool int64(int64_t &result, int base = 0); + bool int64(int64_t &result, int base = 0, bool allowSign = true, SBuf::size_type limit = SBuf::npos); protected: SBuf consume(const SBuf::size_type n);