src/parser/Tokenizer.cc

   1 /*
   2  * Copyright (C) 1996-2015 The Squid Software Foundation and contributors
   3  *
   4  * Squid software is distributed under GPLv2+ license and includes
   5  * contributions from numerous individuals and organizations.
   6  * Please see the COPYING and CONTRIBUTORS files for details.
   7  */
   8
   9 /* DEBUG: section 24    SBuf */
  10
  11 #include "squid.h"
  12 #include "Debug.h"
  13 #include "parser/Tokenizer.h"
  14
  15 #include <cerrno>
  16 #if HAVE_CTYPE_H
  17 #include <ctype.h>
  18 #endif
  19 #if HAVE_STDINT_H
  20 #include <stdint.h>
  21 #endif
  22 #ifndef INT64_MIN
  23 /* Native 64 bit system without strtoll() */
  24 #if defined(LONG_MIN) && (SIZEOF_LONG == 8)
  25 #define INT64_MIN LONG_MIN
  26 #else
  27 /* 32 bit system */
  28 #define INT64_MIN       (-9223372036854775807LL-1LL)
  29 #endif
  30 #endif
  31
  32 #ifndef INT64_MAX
  33 /* Native 64 bit system without strtoll() */
  34 #if defined(LONG_MAX) && (SIZEOF_LONG == 8)
  35 #define INT64_MAX LONG_MAX
  36 #else
  37 /* 32 bit system */
  38 #define INT64_MAX       9223372036854775807LL
  39 #endif
  40 #endif
  41
  42 /// convenience method: consumes up to n bytes, counts, and returns them
  43 SBuf
  44 Parser::Tokenizer::consume(const SBuf::size_type n)
  45 {
  46     // careful: n may be npos!
  47     debugs(24, 5, "consuming " << n << " bytes");
  48     const SBuf result = buf_.consume(n);
  49     parsed_ += result.length();
  50     return result;
  51 }
  52
  53 /// convenience method: consume()s up to n bytes and returns their count
  54 SBuf::size_type
  55 Parser::Tokenizer::success(const SBuf::size_type n)
  56 {
  57     return consume(n).length();
  58 }
  59
  60 bool
  61 Parser::Tokenizer::token(SBuf &returnedToken, const CharacterSet &delimiters)
  62 {
  63     const Tokenizer saved(*this);
  64     skipAll(delimiters);
  65     const SBuf::size_type tokenLen = buf_.findFirstOf(delimiters); // not found = npos => consume to end
  66     if (tokenLen == SBuf::npos) {
  67         debugs(24, 8, "no token found for delimiters " << delimiters.name);
  68         *this = saved;
  69         return false;
  70     }
  71     returnedToken = consume(tokenLen); // cannot be empty
  72     skipAll(delimiters);
  73     debugs(24, DBG_DATA, "token found for delimiters " << delimiters.name << ": '" <<
  74            returnedToken << '\'');
  75     return true;
  76 }
  77
  78 bool
  79 Parser::Tokenizer::prefix(SBuf &returnedToken, const CharacterSet &tokenChars, const SBuf::size_type limit)
  80 {
  81     SBuf::size_type prefixLen = buf_.substr(0,limit).findFirstNotOf(tokenChars);
  82     if (prefixLen == 0) {
  83         debugs(24, 8, "no prefix for set " << tokenChars.name);
  84         return false;
  85     }
  86     if (prefixLen == SBuf::npos && (atEnd() || limit == 0)) {
  87         debugs(24, 8, "no char in set " << tokenChars.name << " while looking for prefix");
  88         return false;
  89     }
  90     if (prefixLen == SBuf::npos && limit > 0) {
  91         debugs(24, 8, "whole haystack matched");
  92         prefixLen = limit;
  93     }
  94     debugs(24, 8, "found with length " << prefixLen);
  95     returnedToken = consume(prefixLen); // cannot be empty after the npos check
  96     return true;
  97 }
  98
  99 bool
 100 Parser::Tokenizer::suffix(SBuf &returnedToken, const CharacterSet &tokenChars, const SBuf::size_type limit)
 101 {
 102     SBuf span = buf_;
 103
 104     if (limit < buf_.length())
 105         span.consume(buf_.length() - limit); // ignore the N prefix characters
 106
 107     auto i = span.rbegin();
 108     SBuf::size_type found = 0;
 109     while (i != span.rend() && tokenChars[*i]) {
 110         ++i;
 111         ++found;
 112     }
 113     if (!found)
 114         return false;
 115     returnedToken = buf_;
 116     buf_ = returnedToken.consume(buf_.length() - found);
 117     return true;
 118 }
 119
 120 SBuf::size_type
 121 Parser::Tokenizer::skipAll(const CharacterSet &tokenChars)
 122 {
 123     const SBuf::size_type prefixLen = buf_.findFirstNotOf(tokenChars);
 124     if (prefixLen == 0) {
 125         debugs(24, 8, "no match when trying to skipAll " << tokenChars.name);
 126         return 0;
 127     }
 128     debugs(24, 8, "skipping all in " << tokenChars.name << " len " << prefixLen);
 129     return success(prefixLen);
 130 }
 131
 132 bool
 133 Parser::Tokenizer::skipOne(const CharacterSet &chars)
 134 {
 135     if (!buf_.isEmpty() && chars[buf_[0]]) {
 136         debugs(24, 8, "skipping one-of " << chars.name);
 137         return success(1);
 138     }
 139     debugs(24, 8, "no match while skipping one-of " << chars.name);
 140     return false;
 141 }
 142
 143 bool
 144 Parser::Tokenizer::skipSuffix(const SBuf &tokenToSkip)
 145 {
 146     if (buf_.length() < tokenToSkip.length())
 147         return false;
 148
 149     SBuf::size_type offset = 0;
 150     if (tokenToSkip.length() < buf_.length())
 151         offset = buf_.length() - tokenToSkip.length();
 152
 153     if (buf_.substr(offset, SBuf::npos).cmp(tokenToSkip) == 0) {
 154         buf_ = buf_.substr(0,offset);
 155         return true;
 156     }
 157     return false;
 158 }
 159
 160 bool
 161 Parser::Tokenizer::skip(const SBuf &tokenToSkip)
 162 {
 163     if (buf_.startsWith(tokenToSkip)) {
 164         debugs(24, 8, "skipping " << tokenToSkip.length());
 165         return success(tokenToSkip.length());
 166     }
 167     debugs(24, 8, "no match, not skipping '" << tokenToSkip << '\'');
 168     return false;
 169 }
 170
 171 bool
 172 Parser::Tokenizer::skip(const char tokenChar)
 173 {
 174     if (!buf_.isEmpty() && buf_[0] == tokenChar) {
 175         debugs(24, 8, "skipping char '" << tokenChar << '\'');
 176         return success(1);
 177     }
 178     debugs(24, 8, "no match, not skipping char '" << tokenChar << '\'');
 179     return false;
 180 }
 181
 182 /* reworked from compat/strtoll.c */
 183 bool
 184 Parser::Tokenizer::int64(int64_t & result, int base, bool allowSign, const SBuf::size_type limit)
 185 {
 186     if (atEnd() || limit == 0)
 187         return false;
 188
 189     const SBuf range(buf_.substr(0,limit));
 190
 191     //fixme: account for buf_.size()
 192     bool neg = false;
 193     const char *s = range.rawContent();
 194     const char *end = range.rawContent() + range.length();
 195
 196     if (allowSign) {
 197         if (*s == '-') {
 198             neg = true;
 199             ++s;
 200         } else if (*s == '+') {
 201             ++s;
 202         }
 203         if (s >= end) return false;
 204     }
 205     if (( base == 0 || base == 16) && *s == '0' && (s+1 <= end ) &&
 206             tolower(*(s+1)) == 'x') {
 207         s += 2;
 208         base = 16;
 209     }
 210     if (base == 0) {
 211         if ( *s == '0') {
 212             base = 8;
 213             ++s;
 214         } else {
 215             base = 10;
 216         }
 217     }
 218     if (s >= end) return false;
 219
 220     uint64_t cutoff;
 221
 222     cutoff = neg ? -static_cast<uint64_t>(INT64_MIN) : INT64_MAX;
 223     const int cutlim = cutoff % static_cast<int64_t>(base);
 224     cutoff /= static_cast<uint64_t>(base);
 225
 226     int any = 0, c;
 227     int64_t acc = 0;
 228     for (c = *s++; s <= end; c = *s++) {
 229         if (xisdigit(c)) {
 230             c -= '0';
 231         } else if (xisalpha(c)) {
 232             c -= xisupper(c) ? 'A' - 10 : 'a' - 10;
 233         } else {
 234             break;
 235         }
 236         if (c >= base)
 237             break;
 238         if (any < 0 || static_cast<uint64_t>(acc) > cutoff || (static_cast<uint64_t>(acc) == cutoff && c > cutlim))
 239             any = -1;
 240         else {
 241             any = 1;
 242             acc *= base;
 243             acc += c;
 244         }
 245     }
 246
 247     if (any == 0) // nothing was parsed
 248         return false;
 249     if (any < 0) {
 250         acc = neg ? INT64_MIN : INT64_MAX;
 251         errno = ERANGE;
 252         return false;
 253     } else if (neg)
 254         acc = -acc;
 255
 256     result = acc;
 257     return success(s - range.rawContent() - 1);
 258 }
 259