]> git.ipfire.org Git - thirdparty/squid.git/blame - src/parser/BinaryTokenizer.h
Fix incremental parsing of chunked quoted extensions (#310)
[thirdparty/squid.git] / src / parser / BinaryTokenizer.h
CommitLineData
6821c276 1/*
f6e9a3ee 2 * Copyright (C) 1996-2019 The Squid Software Foundation and contributors
6821c276
CT
3 *
4 * Squid software is distributed under GPLv2+ license and includes
5 * contributions from numerous individuals and organizations.
6 * Please see the COPYING and CONTRIBUTORS files for details.
7 */
8
d9219c2b
CT
9#ifndef SQUID_SRC_PARSER_BINARYTOKENIZER_H
10#define SQUID_SRC_PARSER_BINARYTOKENIZER_H
6821c276 11
36c774f7 12#include "ip/forward.h"
417da400 13#include "parser/forward.h"
6821c276
CT
14#include "sbuf/SBuf.h"
15
67c99fc6
CT
16namespace Parser
17{
18
c3149111
AR
19class BinaryTokenizer;
20
21/// enables efficient debugging with concise field names: Hello.version.major
22class BinaryTokenizerContext
23{
24public:
25 /// starts parsing named object
d9219c2b 26 explicit BinaryTokenizerContext(BinaryTokenizer &tk, const char *aName);
c3149111
AR
27 ~BinaryTokenizerContext() { close(); }
28
29 /// ends parsing named object; repeated calls OK
30 inline void close();
31
32 /// reports successful parsing of a named object and calls close()
33 inline void success();
34
35 BinaryTokenizer &tokenizer; ///< tokenizer being used for parsing
d9219c2b
CT
36 const BinaryTokenizerContext * const parent; ///< enclosing context or nullptr
37 const char *const name; ///< this context description or nullptr
c3149111
AR
38 uint64_t start; ///< context parsing begins at this tokenizer position
39};
40
6821c276 41/// Safely extracts byte-oriented (i.e., non-textual) fields from raw input.
d9219c2b 42/// Assume that the integers are stored in network byte order.
6821c276
CT
43/// Supports commit points for atomic incremental parsing of multi-part fields.
44/// Throws InsufficientInput when more input is needed to parse the next field.
45/// Throws on errors.
46class BinaryTokenizer
47{
48public:
417da400 49 typedef ::Parser::InsufficientInput InsufficientInput;
6821c276
CT
50 typedef uint64_t size_type; // enough for the largest supported offset
51
52 BinaryTokenizer();
19928af1 53 explicit BinaryTokenizer(const SBuf &data, const bool expectMore = false);
6821c276
CT
54
55 /// restart parsing from the very beginning
56 /// this method is for using one BinaryTokenizer to parse independent inputs
19928af1 57 void reset(const SBuf &data, const bool expectMore);
6821c276 58
19928af1 59 /// change input state without changing parsing state
6821c276 60 /// this method avoids append overheads during incremental parsing
19928af1 61 void reinput(const SBuf &data, const bool expectMore) { data_ = data; expectMore_ = expectMore; }
6821c276
CT
62
63 /// make progress: future parsing failures will not rollback beyond this point
64 void commit();
65
66 /// resume [incremental] parsing from the last commit point
67 void rollback();
68
69 /// no more bytes to parse or skip
70 bool atEnd() const;
71
72 /// parse a single-byte unsigned integer
73 uint8_t uint8(const char *description);
74
d9219c2b 75 /// parse a two-byte unsigned integer
6821c276
CT
76 uint16_t uint16(const char *description);
77
d9219c2b 78 /// parse a three-byte unsigned integer (returned as uint32_t)
6821c276
CT
79 uint32_t uint24(const char *description);
80
d9219c2b 81 /// parse a four-byte unsigned integer
6821c276
CT
82 uint32_t uint32(const char *description);
83
84 /// parse size consecutive bytes as an opaque blob
85 SBuf area(uint64_t size, const char *description);
86
36c774f7
EB
87 /// interpret the next 4 bytes as a raw in_addr structure
88 Ip::Address inet4(const char *description);
89
90 /// interpret the next 16 bytes as a raw in6_addr structure
91 Ip::Address inet6(const char *description);
92
a804b6fe
AR
93 /*
94 * Variable-length arrays (a.k.a. Pascal or prefix strings).
95 * pstringN() extracts and returns N-bit length followed by length bytes
96 */
97 SBuf pstring8(const char *description); ///< up to 255 byte-long p-string
98 SBuf pstring16(const char *description); ///< up to 64 KiB-long p-string
99 SBuf pstring24(const char *description); ///< up to 16 MiB-long p-string!
100
6821c276
CT
101 /// ignore the next size bytes
102 void skip(uint64_t size, const char *description);
103
c3149111
AR
104 /// the number of already parsed bytes
105 uint64_t parsed() const { return parsed_; }
106
6821c276
CT
107 /// yet unparsed bytes
108 SBuf leftovers() const { return data_.substr(parsed_); }
109
c3149111
AR
110 /// debugging helper for parsed multi-field structures
111 void got(uint64_t size, const char *description) const;
112
113 const BinaryTokenizerContext *context; ///< debugging: thing being parsed
6821c276
CT
114
115protected:
116 uint32_t octet();
117 void want(uint64_t size, const char *description) const;
118 void got(uint32_t value, uint64_t size, const char *description) const;
119 void got(const SBuf &value, uint64_t size, const char *description) const;
36c774f7 120 void got(const Ip::Address &value, uint64_t size, const char *description) const;
6821c276
CT
121 void skipped(uint64_t size, const char *description) const;
122
123private:
36c774f7
EB
124 template <class InAddr>
125 Ip::Address inetAny(const char *description);
126
6821c276
CT
127 SBuf data_;
128 uint64_t parsed_; ///< number of data bytes parsed or skipped
129 uint64_t syncPoint_; ///< where to re-start the next parsing attempt
19928af1 130 bool expectMore_; ///< whether more data bytes may arrive in the future
6821c276
CT
131};
132
c3149111
AR
133/* BinaryTokenizerContext */
134
135inline
136BinaryTokenizerContext::BinaryTokenizerContext(BinaryTokenizer &tk, const char *aName):
137 tokenizer(tk),
138 parent(tk.context),
139 name(aName),
140 start(tk.parsed())
141{
142 tk.context = this;
143}
144
145inline
146void
147BinaryTokenizerContext::close() {
148 tokenizer.context = parent;
149}
150
151inline
152void
153BinaryTokenizerContext::success() {
154 tokenizer.got(tokenizer.parsed() - start, "");
155 close();
156}
157
67c99fc6
CT
158} /* namespace Parser */
159
d9219c2b 160#endif // SQUID_SRC_PARSER_BINARYTOKENIZER_H
fde0b2ca 161