src/parser/Tokenizer.cc

   1 /*
   2  * Copyright (C) 1996-2018 The Squid Software Foundation and contributors
   3  *
   4  * Squid software is distributed under GPLv2+ license and includes
   5  * contributions from numerous individuals and organizations.
   6  * Please see the COPYING and CONTRIBUTORS files for details.
   7  */
   8
   9 /* DEBUG: section 24    SBuf */
  10
  11 #include "squid.h"
  12 #include "Debug.h"
  13 #include "parser/Tokenizer.h"
  14
  15 #include <cerrno>
  16 #if HAVE_CTYPE_H
  17 #include <ctype.h>
  18 #endif
  19
  20 /// convenience method: consumes up to n bytes, counts, and returns them
  21 SBuf
  22 Parser::Tokenizer::consume(const SBuf::size_type n)
  23 {
  24     // careful: n may be npos!
  25     debugs(24, 5, "consuming " << n << " bytes");
  26     const SBuf result = buf_.consume(n);
  27     parsed_ += result.length();
  28     return result;
  29 }
  30
  31 /// convenience method: consume()s up to n bytes and returns their count
  32 SBuf::size_type
  33 Parser::Tokenizer::success(const SBuf::size_type n)
  34 {
  35     return consume(n).length();
  36 }
  37
  38 /// convenience method: consumes up to n last bytes and returns them
  39 SBuf
  40 Parser::Tokenizer::consumeTrailing(const SBuf::size_type n)
  41 {
  42     debugs(24, 5, "consuming " << n << " bytes");
  43
  44     // If n is npos, we consume everything from buf_ (and nothing from result).
  45     const SBuf::size_type parsed = (n == SBuf::npos) ? buf_.length() : n;
  46
  47     SBuf result = buf_;
  48     buf_ = result.consume(buf_.length() - parsed);
  49     parsed_ += parsed;
  50     return result;
  51 }
  52
  53 /// convenience method: consumes up to n last bytes and returns their count
  54 SBuf::size_type
  55 Parser::Tokenizer::successTrailing(const SBuf::size_type n)
  56 {
  57     return consumeTrailing(n).length();
  58 }
  59
  60 bool
  61 Parser::Tokenizer::token(SBuf &returnedToken, const CharacterSet &delimiters)
  62 {
  63     const Tokenizer saved(*this);
  64     skipAll(delimiters);
  65     const SBuf::size_type tokenLen = buf_.findFirstOf(delimiters); // not found = npos => consume to end
  66     if (tokenLen == SBuf::npos) {
  67         debugs(24, 8, "no token found for delimiters " << delimiters.name);
  68         *this = saved;
  69         return false;
  70     }
  71     returnedToken = consume(tokenLen); // cannot be empty
  72     skipAll(delimiters);
  73     debugs(24, DBG_DATA, "token found for delimiters " << delimiters.name << ": '" <<
  74            returnedToken << '\'');
  75     return true;
  76 }
  77
  78 bool
  79 Parser::Tokenizer::prefix(SBuf &returnedToken, const CharacterSet &tokenChars, const SBuf::size_type limit)
  80 {
  81     SBuf::size_type prefixLen = buf_.substr(0,limit).findFirstNotOf(tokenChars);
  82     if (prefixLen == 0) {
  83         debugs(24, 8, "no prefix for set " << tokenChars.name);
  84         return false;
  85     }
  86     if (prefixLen == SBuf::npos && (atEnd() || limit == 0)) {
  87         debugs(24, 8, "no char in set " << tokenChars.name << " while looking for prefix");
  88         return false;
  89     }
  90     if (prefixLen == SBuf::npos && limit > 0) {
  91         debugs(24, 8, "whole haystack matched");
  92         prefixLen = limit;
  93     }
  94     debugs(24, 8, "found with length " << prefixLen);
  95     returnedToken = consume(prefixLen); // cannot be empty after the npos check
  96     return true;
  97 }
  98
  99 bool
 100 Parser::Tokenizer::suffix(SBuf &returnedToken, const CharacterSet &tokenChars, const SBuf::size_type limit)
 101 {
 102     SBuf span = buf_;
 103
 104     if (limit < buf_.length())
 105         span.consume(buf_.length() - limit); // ignore the N prefix characters
 106
 107     auto i = span.rbegin();
 108     SBuf::size_type found = 0;
 109     while (i != span.rend() && tokenChars[*i]) {
 110         ++i;
 111         ++found;
 112     }
 113     if (!found)
 114         return false;
 115     returnedToken = consumeTrailing(found);
 116     return true;
 117 }
 118
 119 SBuf::size_type
 120 Parser::Tokenizer::skipAll(const CharacterSet &tokenChars)
 121 {
 122     const SBuf::size_type prefixLen = buf_.findFirstNotOf(tokenChars);
 123     if (prefixLen == 0) {
 124         debugs(24, 8, "no match when trying to skipAll " << tokenChars.name);
 125         return 0;
 126     }
 127     debugs(24, 8, "skipping all in " << tokenChars.name << " len " << prefixLen);
 128     return success(prefixLen);
 129 }
 130
 131 bool
 132 Parser::Tokenizer::skipOne(const CharacterSet &chars)
 133 {
 134     if (!buf_.isEmpty() && chars[buf_[0]]) {
 135         debugs(24, 8, "skipping one-of " << chars.name);
 136         return success(1);
 137     }
 138     debugs(24, 8, "no match while skipping one-of " << chars.name);
 139     return false;
 140 }
 141
 142 bool
 143 Parser::Tokenizer::skipSuffix(const SBuf &tokenToSkip)
 144 {
 145     if (buf_.length() < tokenToSkip.length())
 146         return false;
 147
 148     SBuf::size_type offset = 0;
 149     if (tokenToSkip.length() < buf_.length())
 150         offset = buf_.length() - tokenToSkip.length();
 151
 152     if (buf_.substr(offset, SBuf::npos).cmp(tokenToSkip) == 0) {
 153         debugs(24, 8, "skipping " << tokenToSkip.length());
 154         return successTrailing(tokenToSkip.length());
 155     }
 156     return false;
 157 }
 158
 159 bool
 160 Parser::Tokenizer::skip(const SBuf &tokenToSkip)
 161 {
 162     if (buf_.startsWith(tokenToSkip)) {
 163         debugs(24, 8, "skipping " << tokenToSkip.length());
 164         return success(tokenToSkip.length());
 165     }
 166     debugs(24, 8, "no match, not skipping '" << tokenToSkip << '\'');
 167     return false;
 168 }
 169
 170 bool
 171 Parser::Tokenizer::skip(const char tokenChar)
 172 {
 173     if (!buf_.isEmpty() && buf_[0] == tokenChar) {
 174         debugs(24, 8, "skipping char '" << tokenChar << '\'');
 175         return success(1);
 176     }
 177     debugs(24, 8, "no match, not skipping char '" << tokenChar << '\'');
 178     return false;
 179 }
 180
 181 bool
 182 Parser::Tokenizer::skipOneTrailing(const CharacterSet &skippable)
 183 {
 184     if (!buf_.isEmpty() && skippable[buf_[buf_.length()-1]]) {
 185         debugs(24, 8, "skipping one-of " << skippable.name);
 186         return successTrailing(1);
 187     }
 188     debugs(24, 8, "no match while skipping one-of " << skippable.name);
 189     return false;
 190 }
 191
 192 SBuf::size_type
 193 Parser::Tokenizer::skipAllTrailing(const CharacterSet &skippable)
 194 {
 195     const SBuf::size_type prefixEnd = buf_.findLastNotOf(skippable);
 196     const SBuf::size_type prefixLen = prefixEnd == SBuf::npos ?
 197                                       0 : (prefixEnd + 1);
 198     const SBuf::size_type suffixLen = buf_.length() - prefixLen;
 199     if (suffixLen == 0) {
 200         debugs(24, 8, "no match when trying to skip " << skippable.name);
 201         return 0;
 202     }
 203     debugs(24, 8, "skipping in " << skippable.name << " len " << suffixLen);
 204     return successTrailing(suffixLen);
 205 }
 206
 207 /* reworked from compat/strtoll.c */
 208 bool
 209 Parser::Tokenizer::int64(int64_t & result, int base, bool allowSign, const SBuf::size_type limit)
 210 {
 211     if (atEnd() || limit == 0)
 212         return false;
 213
 214     const SBuf range(buf_.substr(0,limit));
 215
 216     //fixme: account for buf_.size()
 217     bool neg = false;
 218     const char *s = range.rawContent();
 219     const char *end = range.rawContent() + range.length();
 220
 221     if (allowSign) {
 222         if (*s == '-') {
 223             neg = true;
 224             ++s;
 225         } else if (*s == '+') {
 226             ++s;
 227         }
 228         if (s >= end) return false;
 229     }
 230     if (( base == 0 || base == 16) && *s == '0' && (s+1 < end ) &&
 231             tolower(*(s+1)) == 'x') {
 232         s += 2;
 233         base = 16;
 234     }
 235     if (base == 0) {
 236         if ( *s == '0') {
 237             base = 8;
 238             ++s;
 239         } else {
 240             base = 10;
 241         }
 242     }
 243     if (s >= end) return false;
 244
 245     uint64_t cutoff;
 246
 247     cutoff = neg ? -static_cast<uint64_t>(INT64_MIN) : INT64_MAX;
 248     const int cutlim = cutoff % static_cast<int64_t>(base);
 249     cutoff /= static_cast<uint64_t>(base);
 250
 251     int any = 0, c;
 252     int64_t acc = 0;
 253     do {
 254         c = *s;
 255         if (xisdigit(c)) {
 256             c -= '0';
 257         } else if (xisalpha(c)) {
 258             c -= xisupper(c) ? 'A' - 10 : 'a' - 10;
 259         } else {
 260             break;
 261         }
 262         if (c >= base)
 263             break;
 264         if (any < 0 || static_cast<uint64_t>(acc) > cutoff || (static_cast<uint64_t>(acc) == cutoff && c > cutlim))
 265             any = -1;
 266         else {
 267             any = 1;
 268             acc *= base;
 269             acc += c;
 270         }
 271     } while (++s < end);
 272
 273     if (any == 0) // nothing was parsed
 274         return false;
 275     if (any < 0) {
 276         acc = neg ? INT64_MIN : INT64_MAX;
 277         errno = ERANGE;
 278         return false;
 279     } else if (neg)
 280         acc = -acc;
 281
 282     result = acc;
 283     return success(s - range.rawContent());
 284 }
 285