]>
Commit | Line | Data |
---|---|---|
6821c276 | 1 | /* |
5b74111a | 2 | * Copyright (C) 1996-2018 The Squid Software Foundation and contributors |
6821c276 CT |
3 | * |
4 | * Squid software is distributed under GPLv2+ license and includes | |
5 | * contributions from numerous individuals and organizations. | |
6 | * Please see the COPYING and CONTRIBUTORS files for details. | |
7 | */ | |
8 | ||
d9219c2b CT |
9 | #ifndef SQUID_SRC_PARSER_BINARYTOKENIZER_H |
10 | #define SQUID_SRC_PARSER_BINARYTOKENIZER_H | |
6821c276 CT |
11 | |
12 | #include "sbuf/SBuf.h" | |
13 | ||
67c99fc6 CT |
14 | namespace Parser |
15 | { | |
16 | ||
c3149111 AR |
17 | class BinaryTokenizer; |
18 | ||
19 | /// enables efficient debugging with concise field names: Hello.version.major | |
20 | class BinaryTokenizerContext | |
21 | { | |
22 | public: | |
23 | /// starts parsing named object | |
d9219c2b | 24 | explicit BinaryTokenizerContext(BinaryTokenizer &tk, const char *aName); |
c3149111 AR |
25 | ~BinaryTokenizerContext() { close(); } |
26 | ||
27 | /// ends parsing named object; repeated calls OK | |
28 | inline void close(); | |
29 | ||
30 | /// reports successful parsing of a named object and calls close() | |
31 | inline void success(); | |
32 | ||
33 | BinaryTokenizer &tokenizer; ///< tokenizer being used for parsing | |
d9219c2b CT |
34 | const BinaryTokenizerContext * const parent; ///< enclosing context or nullptr |
35 | const char *const name; ///< this context description or nullptr | |
c3149111 AR |
36 | uint64_t start; ///< context parsing begins at this tokenizer position |
37 | }; | |
38 | ||
6821c276 | 39 | /// Safely extracts byte-oriented (i.e., non-textual) fields from raw input. |
d9219c2b | 40 | /// Assume that the integers are stored in network byte order. |
6821c276 CT |
41 | /// Supports commit points for atomic incremental parsing of multi-part fields. |
42 | /// Throws InsufficientInput when more input is needed to parse the next field. | |
43 | /// Throws on errors. | |
44 | class BinaryTokenizer | |
45 | { | |
46 | public: | |
47 | class InsufficientInput {}; // thrown when a method runs out of data | |
48 | typedef uint64_t size_type; // enough for the largest supported offset | |
49 | ||
50 | BinaryTokenizer(); | |
19928af1 | 51 | explicit BinaryTokenizer(const SBuf &data, const bool expectMore = false); |
6821c276 CT |
52 | |
53 | /// restart parsing from the very beginning | |
54 | /// this method is for using one BinaryTokenizer to parse independent inputs | |
19928af1 | 55 | void reset(const SBuf &data, const bool expectMore); |
6821c276 | 56 | |
19928af1 | 57 | /// change input state without changing parsing state |
6821c276 | 58 | /// this method avoids append overheads during incremental parsing |
19928af1 | 59 | void reinput(const SBuf &data, const bool expectMore) { data_ = data; expectMore_ = expectMore; } |
6821c276 CT |
60 | |
61 | /// make progress: future parsing failures will not rollback beyond this point | |
62 | void commit(); | |
63 | ||
64 | /// resume [incremental] parsing from the last commit point | |
65 | void rollback(); | |
66 | ||
67 | /// no more bytes to parse or skip | |
68 | bool atEnd() const; | |
69 | ||
70 | /// parse a single-byte unsigned integer | |
71 | uint8_t uint8(const char *description); | |
72 | ||
d9219c2b | 73 | /// parse a two-byte unsigned integer |
6821c276 CT |
74 | uint16_t uint16(const char *description); |
75 | ||
d9219c2b | 76 | /// parse a three-byte unsigned integer (returned as uint32_t) |
6821c276 CT |
77 | uint32_t uint24(const char *description); |
78 | ||
d9219c2b | 79 | /// parse a four-byte unsigned integer |
6821c276 CT |
80 | uint32_t uint32(const char *description); |
81 | ||
82 | /// parse size consecutive bytes as an opaque blob | |
83 | SBuf area(uint64_t size, const char *description); | |
84 | ||
a804b6fe AR |
85 | /* |
86 | * Variable-length arrays (a.k.a. Pascal or prefix strings). | |
87 | * pstringN() extracts and returns N-bit length followed by length bytes | |
88 | */ | |
89 | SBuf pstring8(const char *description); ///< up to 255 byte-long p-string | |
90 | SBuf pstring16(const char *description); ///< up to 64 KiB-long p-string | |
91 | SBuf pstring24(const char *description); ///< up to 16 MiB-long p-string! | |
92 | ||
6821c276 CT |
93 | /// ignore the next size bytes |
94 | void skip(uint64_t size, const char *description); | |
95 | ||
c3149111 AR |
96 | /// the number of already parsed bytes |
97 | uint64_t parsed() const { return parsed_; } | |
98 | ||
6821c276 CT |
99 | /// yet unparsed bytes |
100 | SBuf leftovers() const { return data_.substr(parsed_); } | |
101 | ||
c3149111 AR |
102 | /// debugging helper for parsed multi-field structures |
103 | void got(uint64_t size, const char *description) const; | |
104 | ||
105 | const BinaryTokenizerContext *context; ///< debugging: thing being parsed | |
6821c276 CT |
106 | |
107 | protected: | |
108 | uint32_t octet(); | |
109 | void want(uint64_t size, const char *description) const; | |
110 | void got(uint32_t value, uint64_t size, const char *description) const; | |
111 | void got(const SBuf &value, uint64_t size, const char *description) const; | |
112 | void skipped(uint64_t size, const char *description) const; | |
113 | ||
114 | private: | |
115 | SBuf data_; | |
116 | uint64_t parsed_; ///< number of data bytes parsed or skipped | |
117 | uint64_t syncPoint_; ///< where to re-start the next parsing attempt | |
19928af1 | 118 | bool expectMore_; ///< whether more data bytes may arrive in the future |
6821c276 CT |
119 | }; |
120 | ||
c3149111 AR |
121 | /* BinaryTokenizerContext */ |
122 | ||
123 | inline | |
124 | BinaryTokenizerContext::BinaryTokenizerContext(BinaryTokenizer &tk, const char *aName): | |
125 | tokenizer(tk), | |
126 | parent(tk.context), | |
127 | name(aName), | |
128 | start(tk.parsed()) | |
129 | { | |
130 | tk.context = this; | |
131 | } | |
132 | ||
133 | inline | |
134 | void | |
135 | BinaryTokenizerContext::close() { | |
136 | tokenizer.context = parent; | |
137 | } | |
138 | ||
139 | inline | |
140 | void | |
141 | BinaryTokenizerContext::success() { | |
142 | tokenizer.got(tokenizer.parsed() - start, ""); | |
143 | close(); | |
144 | } | |
145 | ||
67c99fc6 CT |
146 | } /* namespace Parser */ |
147 | ||
d9219c2b | 148 | #endif // SQUID_SRC_PARSER_BINARYTOKENIZER_H |
fde0b2ca | 149 |