]> git.ipfire.org Git - thirdparty/squid.git/blob - src/parser/BinaryTokenizer.h
Finalized BinaryTokenizer context handling. Polished.
[thirdparty/squid.git] / src / parser / BinaryTokenizer.h
1 /*
2 * Copyright (C) 1996-2015 The Squid Software Foundation and contributors
3 *
4 * Squid software is distributed under GPLv2+ license and includes
5 * contributions from numerous individuals and organizations.
6 * Please see the COPYING and CONTRIBUTORS files for details.
7 */
8
9 #ifndef SQUID_BINARY_TOKENIZER_H
10 #define SQUID_BINARY_TOKENIZER_H
11
12 #include "sbuf/SBuf.h"
13
14 class BinaryTokenizer;
15
16 /// enables efficient debugging with concise field names: Hello.version.major
17 class BinaryTokenizerContext
18 {
19 public:
20 /// starts parsing named object
21 inline explicit BinaryTokenizerContext(BinaryTokenizer &tk, const char *aName);
22 ~BinaryTokenizerContext() { close(); }
23
24 /// ends parsing named object; repeated calls OK
25 inline void close();
26
27 /// reports successful parsing of a named object and calls close()
28 inline void success();
29
30 BinaryTokenizer &tokenizer; ///< tokenizer being used for parsing
31 const BinaryTokenizerContext *parent; ///< enclosing context or nullptr
32 const char *name; ///< this context description or nullptr
33 uint64_t start; ///< context parsing begins at this tokenizer position
34 };
35
36 /// Safely extracts byte-oriented (i.e., non-textual) fields from raw input.
37 /// Supports commit points for atomic incremental parsing of multi-part fields.
38 /// Throws InsufficientInput when more input is needed to parse the next field.
39 /// Throws on errors.
40 class BinaryTokenizer
41 {
42 public:
43 class InsufficientInput {}; // thrown when a method runs out of data
44 typedef uint64_t size_type; // enough for the largest supported offset
45
46 BinaryTokenizer();
47 explicit BinaryTokenizer(const SBuf &data, const bool expectMore = false);
48
49 /// restart parsing from the very beginning
50 /// this method is for using one BinaryTokenizer to parse independent inputs
51 void reset(const SBuf &data, const bool expectMore);
52
53 /// change input state without changing parsing state
54 /// this method avoids append overheads during incremental parsing
55 void reinput(const SBuf &data, const bool expectMore) { data_ = data; expectMore_ = expectMore; }
56
57 /// make progress: future parsing failures will not rollback beyond this point
58 void commit();
59
60 /// resume [incremental] parsing from the last commit point
61 void rollback();
62
63 /// no more bytes to parse or skip
64 bool atEnd() const;
65
66 /// parse a single-byte unsigned integer
67 uint8_t uint8(const char *description);
68
69 // parse a two-byte unsigned integer
70 uint16_t uint16(const char *description);
71
72 // parse a three-byte unsigned integer (returned as uint32_t)
73 uint32_t uint24(const char *description);
74
75 // parse a four-byte unsigned integer
76 uint32_t uint32(const char *description);
77
78 /// parse size consecutive bytes as an opaque blob
79 SBuf area(uint64_t size, const char *description);
80
81 /*
82 * Variable-length arrays (a.k.a. Pascal or prefix strings).
83 * pstringN() extracts and returns N-bit length followed by length bytes
84 */
85 SBuf pstring8(const char *description); ///< up to 255 byte-long p-string
86 SBuf pstring16(const char *description); ///< up to 64 KiB-long p-string
87 SBuf pstring24(const char *description); ///< up to 16 MiB-long p-string!
88
89 /// ignore the next size bytes
90 void skip(uint64_t size, const char *description);
91
92 /// the number of already parsed bytes
93 uint64_t parsed() const { return parsed_; }
94
95 /// yet unparsed bytes
96 SBuf leftovers() const { return data_.substr(parsed_); }
97
98 /// debugging helper for parsed multi-field structures
99 void got(uint64_t size, const char *description) const;
100
101 const BinaryTokenizerContext *context; ///< debugging: thing being parsed
102
103 protected:
104 uint32_t octet();
105 void want(uint64_t size, const char *description) const;
106 void got(uint32_t value, uint64_t size, const char *description) const;
107 void got(const SBuf &value, uint64_t size, const char *description) const;
108 void skipped(uint64_t size, const char *description) const;
109
110 private:
111 SBuf data_;
112 uint64_t parsed_; ///< number of data bytes parsed or skipped
113 uint64_t syncPoint_; ///< where to re-start the next parsing attempt
114 bool expectMore_; ///< whether more data bytes may arrive in the future
115 };
116
117 /* BinaryTokenizerContext */
118
119 inline
120 BinaryTokenizerContext::BinaryTokenizerContext(BinaryTokenizer &tk, const char *aName):
121 tokenizer(tk),
122 parent(tk.context),
123 name(aName),
124 start(tk.parsed())
125 {
126 tk.context = this;
127 }
128
129 inline
130 void
131 BinaryTokenizerContext::close() {
132 tokenizer.context = parent;
133 }
134
135 inline
136 void
137 BinaryTokenizerContext::success() {
138 tokenizer.got(tokenizer.parsed() - start, "");
139 close();
140 }
141
142 #endif // SQUID_BINARY_TOKENIZER_H