src/parser/Tokenizer.cc

   1 /*
   2  * Copyright (C) 1996-2015 The Squid Software Foundation and contributors
   3  *
   4  * Squid software is distributed under GPLv2+ license and includes
   5  * contributions from numerous individuals and organizations.
   6  * Please see the COPYING and CONTRIBUTORS files for details.
   7  */
   8
   9 /* DEBUG: section 24    SBuf */
  10
  11 #include "squid.h"
  12 #include "Debug.h"
  13 #include "parser/Tokenizer.h"
  14
  15 #include <cerrno>
  16 #if HAVE_CTYPE_H
  17 #include <ctype.h>
  18 #endif
  19 #if HAVE_STDINT_H
  20 #include <stdint.h>
  21 #endif
  22 #ifndef INT64_MIN
  23 /* Native 64 bit system without strtoll() */
  24 #if defined(LONG_MIN) && (SIZEOF_LONG == 8)
  25 #define INT64_MIN LONG_MIN
  26 #else
  27 /* 32 bit system */
  28 #define INT64_MIN       (-9223372036854775807LL-1LL)
  29 #endif
  30 #endif
  31
  32 #ifndef INT64_MAX
  33 /* Native 64 bit system without strtoll() */
  34 #if defined(LONG_MAX) && (SIZEOF_LONG == 8)
  35 #define INT64_MAX LONG_MAX
  36 #else
  37 /* 32 bit system */
  38 #define INT64_MAX       9223372036854775807LL
  39 #endif
  40 #endif
  41
  42 /// convenience method: consumes up to n bytes, counts, and returns them
  43 SBuf
  44 Parser::Tokenizer::consume(const SBuf::size_type n)
  45 {
  46     // careful: n may be npos!
  47     debugs(24, 5, "consuming " << n << " bytes");
  48     const SBuf result = buf_.consume(n);
  49     parsed_ += result.length();
  50     return result;
  51 }
  52
  53 /// convenience method: consume()s up to n bytes and returns their count
  54 SBuf::size_type
  55 Parser::Tokenizer::success(const SBuf::size_type n)
  56 {
  57     return consume(n).length();
  58 }
  59
  60 bool
  61 Parser::Tokenizer::token(SBuf &returnedToken, const CharacterSet &delimiters)
  62 {
  63     const Tokenizer saved(*this);
  64     skipAll(delimiters);
  65     const SBuf::size_type tokenLen = buf_.findFirstOf(delimiters); // not found = npos => consume to end
  66     if (tokenLen == SBuf::npos) {
  67         debugs(24, 8, "no token found for delimiters " << delimiters.name);
  68         *this = saved;
  69         return false;
  70     }
  71     returnedToken = consume(tokenLen); // cannot be empty
  72     skipAll(delimiters);
  73     debugs(24, DBG_DATA, "token found for delimiters " << delimiters.name << ": '" <<
  74            returnedToken << '\'');
  75     return true;
  76 }
  77
  78 bool
  79 Parser::Tokenizer::prefix(SBuf &returnedToken, const CharacterSet &tokenChars, const SBuf::size_type limit)
  80 {
  81     SBuf::size_type prefixLen = buf_.substr(0,limit).findFirstNotOf(tokenChars);
  82     if (prefixLen == 0) {
  83         debugs(24, 8, "no prefix for set " << tokenChars.name);
  84         return false;
  85     }
  86     if (prefixLen == SBuf::npos && (atEnd() || limit == 0)) {
  87         debugs(24, 8, "no char in set " << tokenChars.name << " while looking for prefix");
  88         return false;
  89     }
  90     if (prefixLen == SBuf::npos && limit > 0) {
  91         debugs(24, 8, "whole haystack matched");
  92         prefixLen = limit;
  93     }
  94     debugs(24, 8, "found with length " << prefixLen);
  95     returnedToken = consume(prefixLen); // cannot be empty after the npos check
  96     return true;
  97 }
  98
  99 bool
 100 Parser::Tokenizer::suffix(SBuf &returnedToken, const CharacterSet &tokenChars, const SBuf::size_type limit)
 101 {
 102     SBuf span = buf_;
 103
 104     if (limit < buf_.length())
 105         span.consume(buf_.length() - limit); // ignore the N prefix characters
 106
 107     auto i = span.rbegin();
 108     SBuf::size_type found = 0;
 109     while (i != span.rend() && tokenChars[*i]) {
 110         ++i;
 111         ++found;
 112     }
 113     if (!found)
 114         return false;
 115     returnedToken = buf_;
 116     buf_ = returnedToken.consume(buf_.length() - found);
 117     return true;
 118 }
 119
 120 SBuf::size_type
 121 Parser::Tokenizer::skipAll(const CharacterSet &tokenChars)
 122 {
 123     const SBuf::size_type prefixLen = buf_.findFirstNotOf(tokenChars);
 124     if (prefixLen == 0) {
 125         debugs(24, 8, "no match when trying to skipAll " << tokenChars.name);
 126         return 0;
 127     }
 128     debugs(24, 8, "skipping all in " << tokenChars.name << " len " << prefixLen);
 129     return success(prefixLen);
 130 }
 131
 132 bool
 133 Parser::Tokenizer::skipOne(const CharacterSet &chars)
 134 {
 135     if (!buf_.isEmpty() && chars[buf_[0]]) {
 136         debugs(24, 8, "skipping one-of " << chars.name);
 137         return success(1);
 138     }
 139     debugs(24, 8, "no match while skipping one-of " << chars.name);
 140     return false;
 141 }
 142
 143 bool
 144 Parser::Tokenizer::skipSuffix(const SBuf &tokenToSkip)
 145 {
 146     if (buf_.length() < tokenToSkip.length())
 147         return false;
 148
 149     SBuf::size_type offset = 0;
 150     if (tokenToSkip.length() < buf_.length())
 151         offset = buf_.length() - tokenToSkip.length();
 152
 153     if (buf_.substr(offset, SBuf::npos).cmp(tokenToSkip) == 0) {
 154         buf_ = buf_.substr(0,offset);
 155         return true;
 156     }
 157     return false;
 158 }
 159
 160 bool
 161 Parser::Tokenizer::skip(const SBuf &tokenToSkip)
 162 {
 163     if (buf_.startsWith(tokenToSkip)) {
 164         debugs(24, 8, "skipping " << tokenToSkip.length());
 165         return success(tokenToSkip.length());
 166     }
 167     debugs(24, 8, "no match, not skipping '" << tokenToSkip << '\'');
 168     return false;
 169 }
 170
 171 bool
 172 Parser::Tokenizer::skip(const char tokenChar)
 173 {
 174     if (!buf_.isEmpty() && buf_[0] == tokenChar) {
 175         debugs(24, 8, "skipping char '" << tokenChar << '\'');
 176         return success(1);
 177     }
 178     debugs(24, 8, "no match, not skipping char '" << tokenChar << '\'');
 179     return false;
 180 }
 181
 182 /* reworked from compat/strtoll.c */
 183 bool
 184 Parser::Tokenizer::int64(int64_t & result, int base)
 185 {
 186     if (buf_.isEmpty())
 187         return false;
 188
 189     //fixme: account for buf_.size()
 190     bool neg = false;
 191     const char *s = buf_.rawContent();
 192     const char *end = buf_.rawContent() + buf_.length();
 193
 194     if (*s == '-') {
 195         neg = true;
 196         ++s;
 197     } else if (*s == '+') {
 198         ++s;
 199     }
 200     if (s >= end) return false;
 201     if (( base == 0 || base == 16) && *s == '0' && (s+1 <= end ) &&
 202             tolower(*(s+1)) == 'x') {
 203         s += 2;
 204         base = 16;
 205     }
 206     if (base == 0) {
 207         if ( *s == '0') {
 208             base = 8;
 209             ++s;
 210         } else {
 211             base = 10;
 212         }
 213     }
 214     if (s >= end) return false;
 215
 216     uint64_t cutoff;
 217
 218     cutoff = neg ? -static_cast<uint64_t>(INT64_MIN) : INT64_MAX;
 219     const int cutlim = cutoff % static_cast<int64_t>(base);
 220     cutoff /= static_cast<uint64_t>(base);
 221
 222     int any = 0, c;
 223     int64_t acc = 0;
 224     for (c = *s++; s <= end; c = *s++) {
 225         if (xisdigit(c)) {
 226             c -= '0';
 227         } else if (xisalpha(c)) {
 228             c -= xisupper(c) ? 'A' - 10 : 'a' - 10;
 229         } else {
 230             break;
 231         }
 232         if (c >= base)
 233             break;
 234         if (any < 0 || static_cast<uint64_t>(acc) > cutoff || (static_cast<uint64_t>(acc) == cutoff && c > cutlim))
 235             any = -1;
 236         else {
 237             any = 1;
 238             acc *= base;
 239             acc += c;
 240         }
 241     }
 242
 243     if (any == 0) // nothing was parsed
 244         return false;
 245     if (any < 0) {
 246         acc = neg ? INT64_MIN : INT64_MAX;
 247         errno = ERANGE;
 248         return false;
 249     } else if (neg)
 250         acc = -acc;
 251
 252     result = acc;
 253     return success(s - buf_.rawContent() - 1);
 254 }
 255