src/parser/BinaryTokenizer.h

   1 /*
   2  * Copyright (C) 1996-2016 The Squid Software Foundation and contributors
   3  *
   4  * Squid software is distributed under GPLv2+ license and includes
   5  * contributions from numerous individuals and organizations.
   6  * Please see the COPYING and CONTRIBUTORS files for details.
   7  */
   8
   9 #ifndef SQUID_SRC_PARSER_BINARYTOKENIZER_H
  10 #define SQUID_SRC_PARSER_BINARYTOKENIZER_H
  11
  12 #include "sbuf/SBuf.h"
  13
  14 namespace Parser
  15 {
  16
  17 class BinaryTokenizer;
  18
  19 /// enables efficient debugging with concise field names: Hello.version.major
  20 class BinaryTokenizerContext
  21 {
  22 public:
  23     /// starts parsing named object
  24     explicit BinaryTokenizerContext(BinaryTokenizer &tk, const char *aName);
  25     ~BinaryTokenizerContext() { close(); }
  26
  27     /// ends parsing named object; repeated calls OK
  28     inline void close();
  29
  30     /// reports successful parsing of a named object and calls close()
  31     inline void success();
  32
  33     BinaryTokenizer &tokenizer; ///< tokenizer being used for parsing
  34     const BinaryTokenizerContext * const parent; ///< enclosing context or nullptr
  35     const char *const name; ///< this context description or nullptr
  36     uint64_t start; ///< context parsing begins at this tokenizer position
  37 };
  38
  39 /// Safely extracts byte-oriented (i.e., non-textual) fields from raw input.
  40 /// Assume that the integers are stored in network byte order.
  41 /// Supports commit points for atomic incremental parsing of multi-part fields.
  42 /// Throws InsufficientInput when more input is needed to parse the next field.
  43 /// Throws on errors.
  44 class BinaryTokenizer
  45 {
  46 public:
  47     class InsufficientInput {}; // thrown when a method runs out of data
  48     typedef uint64_t size_type; // enough for the largest supported offset
  49
  50     BinaryTokenizer();
  51     explicit BinaryTokenizer(const SBuf &data, const bool expectMore = false);
  52
  53     /// restart parsing from the very beginning
  54     /// this method is for using one BinaryTokenizer to parse independent inputs
  55     void reset(const SBuf &data, const bool expectMore);
  56
  57     /// change input state without changing parsing state
  58     /// this method avoids append overheads during incremental parsing
  59     void reinput(const SBuf &data, const bool expectMore) { data_ = data; expectMore_ = expectMore; }
  60
  61     /// make progress: future parsing failures will not rollback beyond this point
  62     void commit();
  63
  64     /// resume [incremental] parsing from the last commit point
  65     void rollback();
  66
  67     /// no more bytes to parse or skip
  68     bool atEnd() const;
  69
  70     /// parse a single-byte unsigned integer
  71     uint8_t uint8(const char *description);
  72
  73     /// parse a two-byte unsigned integer
  74     uint16_t uint16(const char *description);
  75
  76     /// parse a three-byte unsigned integer (returned as uint32_t)
  77     uint32_t uint24(const char *description);
  78
  79     /// parse a four-byte unsigned integer
  80     uint32_t uint32(const char *description);
  81
  82     /// parse size consecutive bytes as an opaque blob
  83     SBuf area(uint64_t size, const char *description);
  84
  85     /*
  86      * Variable-length arrays (a.k.a. Pascal or prefix strings).
  87      * pstringN() extracts and returns N-bit length followed by length bytes
  88      */
  89     SBuf pstring8(const char *description); ///< up to 255 byte-long p-string
  90     SBuf pstring16(const char *description); ///< up to 64 KiB-long p-string
  91     SBuf pstring24(const char *description); ///< up to 16 MiB-long p-string!
  92
  93     /// ignore the next size bytes
  94     void skip(uint64_t size, const char *description);
  95
  96     /// the number of already parsed bytes
  97     uint64_t parsed() const { return parsed_; }
  98
  99     /// yet unparsed bytes
 100     SBuf leftovers() const { return data_.substr(parsed_); }
 101
 102     /// debugging helper for parsed multi-field structures
 103     void got(uint64_t size, const char *description) const;
 104
 105     const BinaryTokenizerContext *context; ///< debugging: thing being parsed
 106
 107 protected:
 108     uint32_t octet();
 109     void want(uint64_t size, const char *description) const;
 110     void got(uint32_t value, uint64_t size, const char *description) const;
 111     void got(const SBuf &value, uint64_t size, const char *description) const;
 112     void skipped(uint64_t size, const char *description) const;
 113
 114 private:
 115     SBuf data_;
 116     uint64_t parsed_; ///< number of data bytes parsed or skipped
 117     uint64_t syncPoint_; ///< where to re-start the next parsing attempt
 118     bool expectMore_; ///< whether more data bytes may arrive in the future
 119 };
 120
 121 /* BinaryTokenizerContext */
 122
 123 inline
 124 BinaryTokenizerContext::BinaryTokenizerContext(BinaryTokenizer &tk, const char *aName):
 125     tokenizer(tk),
 126     parent(tk.context),
 127     name(aName),
 128     start(tk.parsed())
 129 {
 130     tk.context = this;
 131 }
 132
 133 inline
 134 void
 135 BinaryTokenizerContext::close() {
 136     tokenizer.context = parent;
 137 }
 138
 139 inline
 140 void
 141 BinaryTokenizerContext::success() {
 142     tokenizer.got(tokenizer.parsed() - start, "");
 143     close();
 144 }
 145
 146 } /* namespace Parser */
 147
 148 #endif // SQUID_SRC_PARSER_BINARYTOKENIZER_H
 149