From: Amos Jeffries <squid3@treenet.co.nz>
Date: Fri, 23 Jan 2015 06:16:02 +0000 (-0800)
Subject: Various Tokenizer fixes
X-Git-Tag: merge-candidate-3-v1~240^2~6
X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=b54f4137f97706a0026245d07f083eed9e77e9b4;p=thirdparty%2Fsquid.git

Various Tokenizer fixes

Import prefix() method fix from parser-ng-requestline work.
- The prefix() when match reached a provided limit would consume and
  return the entire buffer contents instead of just the limited/matched
  range of bytes.

Extend int64() method:
- allow rejecting of numbers with a +/- symbol prefix.
- allow limiting of numeric conversion to a fixed character count
---

diff --git a/src/parser/Tokenizer.cc b/src/parser/Tokenizer.cc
index 3d43c08b80..b2b6338243 100644
--- a/src/parser/Tokenizer.cc
+++ b/src/parser/Tokenizer.cc
@@ -71,11 +71,13 @@ Parser::Tokenizer::token(SBuf &returnedToken, const CharacterSet &delimiters)
 bool
 Parser::Tokenizer::prefix(SBuf &returnedToken, const CharacterSet &tokenChars, const SBuf::size_type limit)
 {
-    const SBuf::size_type prefixLen = buf_.substr(0,limit).findFirstNotOf(tokenChars);
+    SBuf::size_type prefixLen = buf_.substr(0,limit).findFirstNotOf(tokenChars);
     if (prefixLen == 0)
         return false;
     if (prefixLen == SBuf::npos && (atEnd() || limit == 0))
         return false;
+    if (prefixLen == SBuf::npos && limit > 0)
+        prefixLen = limit;
     returnedToken = consume(prefixLen); // cannot be empty after the npos check
     return true;
 }
@@ -115,23 +117,27 @@ Parser::Tokenizer::skip(const char tokenChar)
 
 /* reworked from compat/strtoll.c */
 bool
-Parser::Tokenizer::int64(int64_t & result, int base)
+Parser::Tokenizer::int64(int64_t & result, int base, bool allowSign, const SBuf::size_type limit)
 {
-    if (buf_.isEmpty())
+    if (atEnd() || limit == 0)
         return false;
 
+    const SBuf range(buf_.substr(0,limit));
+
     //fixme: account for buf_.size()
     bool neg = false;
-    const char *s = buf_.rawContent();
-    const char *end = buf_.rawContent() + buf_.length();
-
-    if (*s == '-') {
-        neg = true;
-        ++s;
-    } else if (*s == '+') {
-        ++s;
+    const char *s = range.rawContent();
+    const char *end = range.rawContent() + range.length();
+
+    if (allowSign) {
+        if (*s == '-') {
+            neg = true;
+            ++s;
+        } else if (*s == '+') {
+            ++s;
+        }
+        if (s >= end) return false;
     }
-    if (s >= end) return false;
     if (( base == 0 || base == 16) && *s == '0' && (s+1 <= end ) &&
             tolower(*(s+1)) == 'x') {
         s += 2;
@@ -184,6 +190,6 @@ Parser::Tokenizer::int64(int64_t & result, int base)
         acc = -acc;
 
     result = acc;
-    return success(s - buf_.rawContent() - 1);
+    return success(s - range.rawContent() - 1);
 }
 
diff --git a/src/parser/Tokenizer.h b/src/parser/Tokenizer.h
index 9cd30d912b..8421f780d8 100644
--- a/src/parser/Tokenizer.h
+++ b/src/parser/Tokenizer.h
@@ -103,10 +103,12 @@ public:
      * \param result Output value. Not touched if parsing is unsuccessful.
      * \param base   Specify base to do the parsing in, with the same restrictions
      *               as strtoll. Defaults to 0 (meaning guess)
+     * \param allowSign Whether to accept a '+' or '-' sign prefix.
+     * \param limit  Maximum count of characters to convert.
      *
      * \return whether the parsing was successful
      */
-    bool int64(int64_t &result, int base = 0);
+    bool int64(int64_t &result, int base = 0, bool allowSign = true, SBuf::size_type limit = SBuf::npos);
 
 protected:
     SBuf consume(const SBuf::size_type n);