]>
Commit | Line | Data |
---|---|---|
6821c276 | 1 | /* |
f6e9a3ee | 2 | * Copyright (C) 1996-2019 The Squid Software Foundation and contributors |
6821c276 CT |
3 | * |
4 | * Squid software is distributed under GPLv2+ license and includes | |
5 | * contributions from numerous individuals and organizations. | |
6 | * Please see the COPYING and CONTRIBUTORS files for details. | |
7 | */ | |
8 | ||
d9219c2b CT |
9 | #ifndef SQUID_SRC_PARSER_BINARYTOKENIZER_H |
10 | #define SQUID_SRC_PARSER_BINARYTOKENIZER_H | |
6821c276 | 11 | |
36c774f7 | 12 | #include "ip/forward.h" |
417da400 | 13 | #include "parser/forward.h" |
6821c276 CT |
14 | #include "sbuf/SBuf.h" |
15 | ||
67c99fc6 CT |
16 | namespace Parser |
17 | { | |
18 | ||
c3149111 AR |
19 | class BinaryTokenizer; |
20 | ||
21 | /// enables efficient debugging with concise field names: Hello.version.major | |
22 | class BinaryTokenizerContext | |
23 | { | |
24 | public: | |
25 | /// starts parsing named object | |
d9219c2b | 26 | explicit BinaryTokenizerContext(BinaryTokenizer &tk, const char *aName); |
c3149111 AR |
27 | ~BinaryTokenizerContext() { close(); } |
28 | ||
29 | /// ends parsing named object; repeated calls OK | |
30 | inline void close(); | |
31 | ||
32 | /// reports successful parsing of a named object and calls close() | |
33 | inline void success(); | |
34 | ||
35 | BinaryTokenizer &tokenizer; ///< tokenizer being used for parsing | |
d9219c2b CT |
36 | const BinaryTokenizerContext * const parent; ///< enclosing context or nullptr |
37 | const char *const name; ///< this context description or nullptr | |
c3149111 AR |
38 | uint64_t start; ///< context parsing begins at this tokenizer position |
39 | }; | |
40 | ||
6821c276 | 41 | /// Safely extracts byte-oriented (i.e., non-textual) fields from raw input. |
d9219c2b | 42 | /// Assume that the integers are stored in network byte order. |
6821c276 CT |
43 | /// Supports commit points for atomic incremental parsing of multi-part fields. |
44 | /// Throws InsufficientInput when more input is needed to parse the next field. | |
45 | /// Throws on errors. | |
46 | class BinaryTokenizer | |
47 | { | |
48 | public: | |
417da400 | 49 | typedef ::Parser::InsufficientInput InsufficientInput; |
6821c276 CT |
50 | typedef uint64_t size_type; // enough for the largest supported offset |
51 | ||
52 | BinaryTokenizer(); | |
19928af1 | 53 | explicit BinaryTokenizer(const SBuf &data, const bool expectMore = false); |
6821c276 CT |
54 | |
55 | /// restart parsing from the very beginning | |
56 | /// this method is for using one BinaryTokenizer to parse independent inputs | |
19928af1 | 57 | void reset(const SBuf &data, const bool expectMore); |
6821c276 | 58 | |
19928af1 | 59 | /// change input state without changing parsing state |
6821c276 | 60 | /// this method avoids append overheads during incremental parsing |
19928af1 | 61 | void reinput(const SBuf &data, const bool expectMore) { data_ = data; expectMore_ = expectMore; } |
6821c276 CT |
62 | |
63 | /// make progress: future parsing failures will not rollback beyond this point | |
64 | void commit(); | |
65 | ||
66 | /// resume [incremental] parsing from the last commit point | |
67 | void rollback(); | |
68 | ||
69 | /// no more bytes to parse or skip | |
70 | bool atEnd() const; | |
71 | ||
72 | /// parse a single-byte unsigned integer | |
73 | uint8_t uint8(const char *description); | |
74 | ||
d9219c2b | 75 | /// parse a two-byte unsigned integer |
6821c276 CT |
76 | uint16_t uint16(const char *description); |
77 | ||
d9219c2b | 78 | /// parse a three-byte unsigned integer (returned as uint32_t) |
6821c276 CT |
79 | uint32_t uint24(const char *description); |
80 | ||
d9219c2b | 81 | /// parse a four-byte unsigned integer |
6821c276 CT |
82 | uint32_t uint32(const char *description); |
83 | ||
84 | /// parse size consecutive bytes as an opaque blob | |
85 | SBuf area(uint64_t size, const char *description); | |
86 | ||
36c774f7 EB |
87 | /// interpret the next 4 bytes as a raw in_addr structure |
88 | Ip::Address inet4(const char *description); | |
89 | ||
90 | /// interpret the next 16 bytes as a raw in6_addr structure | |
91 | Ip::Address inet6(const char *description); | |
92 | ||
a804b6fe AR |
93 | /* |
94 | * Variable-length arrays (a.k.a. Pascal or prefix strings). | |
95 | * pstringN() extracts and returns N-bit length followed by length bytes | |
96 | */ | |
97 | SBuf pstring8(const char *description); ///< up to 255 byte-long p-string | |
98 | SBuf pstring16(const char *description); ///< up to 64 KiB-long p-string | |
99 | SBuf pstring24(const char *description); ///< up to 16 MiB-long p-string! | |
100 | ||
6821c276 CT |
101 | /// ignore the next size bytes |
102 | void skip(uint64_t size, const char *description); | |
103 | ||
c3149111 AR |
104 | /// the number of already parsed bytes |
105 | uint64_t parsed() const { return parsed_; } | |
106 | ||
6821c276 CT |
107 | /// yet unparsed bytes |
108 | SBuf leftovers() const { return data_.substr(parsed_); } | |
109 | ||
c3149111 AR |
110 | /// debugging helper for parsed multi-field structures |
111 | void got(uint64_t size, const char *description) const; | |
112 | ||
113 | const BinaryTokenizerContext *context; ///< debugging: thing being parsed | |
6821c276 CT |
114 | |
115 | protected: | |
116 | uint32_t octet(); | |
117 | void want(uint64_t size, const char *description) const; | |
118 | void got(uint32_t value, uint64_t size, const char *description) const; | |
119 | void got(const SBuf &value, uint64_t size, const char *description) const; | |
36c774f7 | 120 | void got(const Ip::Address &value, uint64_t size, const char *description) const; |
6821c276 CT |
121 | void skipped(uint64_t size, const char *description) const; |
122 | ||
123 | private: | |
36c774f7 EB |
124 | template <class InAddr> |
125 | Ip::Address inetAny(const char *description); | |
126 | ||
6821c276 CT |
127 | SBuf data_; |
128 | uint64_t parsed_; ///< number of data bytes parsed or skipped | |
129 | uint64_t syncPoint_; ///< where to re-start the next parsing attempt | |
19928af1 | 130 | bool expectMore_; ///< whether more data bytes may arrive in the future |
6821c276 CT |
131 | }; |
132 | ||
c3149111 AR |
133 | /* BinaryTokenizerContext */ |
134 | ||
135 | inline | |
136 | BinaryTokenizerContext::BinaryTokenizerContext(BinaryTokenizer &tk, const char *aName): | |
137 | tokenizer(tk), | |
138 | parent(tk.context), | |
139 | name(aName), | |
140 | start(tk.parsed()) | |
141 | { | |
142 | tk.context = this; | |
143 | } | |
144 | ||
145 | inline | |
146 | void | |
147 | BinaryTokenizerContext::close() { | |
148 | tokenizer.context = parent; | |
149 | } | |
150 | ||
151 | inline | |
152 | void | |
153 | BinaryTokenizerContext::success() { | |
154 | tokenizer.got(tokenizer.parsed() - start, ""); | |
155 | close(); | |
156 | } | |
157 | ||
67c99fc6 CT |
158 | } /* namespace Parser */ |
159 | ||
d9219c2b | 160 | #endif // SQUID_SRC_PARSER_BINARYTOKENIZER_H |
fde0b2ca | 161 |