2 * Copyright (C) 1996-2023 The Squid Software Foundation and contributors
4 * Squid software is distributed under GPLv2+ license and includes
5 * contributions from numerous individuals and organizations.
6 * Please see the COPYING and CONTRIBUTORS files for details.
9 #ifndef SQUID_SRC_PARSER_BINARYTOKENIZER_H
10 #define SQUID_SRC_PARSER_BINARYTOKENIZER_H
12 #include "ip/forward.h"
13 #include "parser/forward.h"
14 #include "sbuf/SBuf.h"
19 class BinaryTokenizer
;
21 /// enables efficient debugging with concise field names: Hello.version.major
22 class BinaryTokenizerContext
25 /// starts parsing named object
26 explicit BinaryTokenizerContext(BinaryTokenizer
&tk
, const char *aName
);
27 ~BinaryTokenizerContext() { close(); }
29 /// ends parsing named object; repeated calls OK
32 /// reports successful parsing of a named object and calls close()
33 inline void success();
35 BinaryTokenizer
&tokenizer
; ///< tokenizer being used for parsing
36 const BinaryTokenizerContext
* const parent
; ///< enclosing context or nullptr
37 const char *const name
; ///< this context description or nullptr
38 uint64_t start
; ///< context parsing begins at this tokenizer position
41 /// Safely extracts byte-oriented (i.e., non-textual) fields from raw input.
42 /// Assume that the integers are stored in network byte order.
43 /// Supports commit points for atomic incremental parsing of multi-part fields.
44 /// Throws InsufficientInput when more input is needed to parse the next field.
49 typedef ::Parser::InsufficientInput InsufficientInput
;
50 typedef uint64_t size_type
; // enough for the largest supported offset
53 explicit BinaryTokenizer(const SBuf
&data
, const bool expectMore
= false);
55 /// restart parsing from the very beginning
56 /// this method is for using one BinaryTokenizer to parse independent inputs
57 void reset(const SBuf
&data
, const bool expectMore
);
59 /// change input state without changing parsing state
60 /// this method avoids append overheads during incremental parsing
61 void reinput(const SBuf
&data
, const bool expectMore
) { data_
= data
; expectMore_
= expectMore
; }
63 /// make progress: future parsing failures will not rollback beyond this point
66 /// resume [incremental] parsing from the last commit point
69 /// no more bytes to parse or skip
72 /// parse a single-byte unsigned integer
73 uint8_t uint8(const char *description
);
75 /// parse a two-byte unsigned integer
76 uint16_t uint16(const char *description
);
78 /// parse a three-byte unsigned integer (returned as uint32_t)
79 uint32_t uint24(const char *description
);
81 /// parse a four-byte unsigned integer
82 uint32_t uint32(const char *description
);
84 /// parse size consecutive bytes as an opaque blob
85 SBuf
area(uint64_t size
, const char *description
);
87 /// interpret the next 4 bytes as a raw in_addr structure
88 Ip::Address
inet4(const char *description
);
90 /// interpret the next 16 bytes as a raw in6_addr structure
91 Ip::Address
inet6(const char *description
);
94 * Variable-length arrays (a.k.a. Pascal or prefix strings).
95 * pstringN() extracts and returns N-bit length followed by length bytes
97 SBuf
pstring8(const char *description
); ///< up to 255 byte-long p-string
98 SBuf
pstring16(const char *description
); ///< up to 64 KiB-long p-string
99 SBuf
pstring24(const char *description
); ///< up to 16 MiB-long p-string!
101 /// ignore the next size bytes
102 void skip(uint64_t size
, const char *description
);
104 /// the number of already parsed bytes
105 uint64_t parsed() const { return parsed_
; }
107 /// yet unparsed bytes
108 SBuf
leftovers() const { return data_
.substr(parsed_
); }
110 /// debugging helper for parsed multi-field structures
111 void got(uint64_t size
, const char *description
) const;
113 const BinaryTokenizerContext
*context
; ///< debugging: thing being parsed
117 void want(uint64_t size
, const char *description
) const;
118 void got(uint32_t value
, uint64_t size
, const char *description
) const;
119 void got(const SBuf
&value
, uint64_t size
, const char *description
) const;
120 void got(const Ip::Address
&value
, uint64_t size
, const char *description
) const;
121 void skipped(uint64_t size
, const char *description
) const;
124 template <class InAddr
>
125 Ip::Address
inetAny(const char *description
);
128 uint64_t parsed_
; ///< number of data bytes parsed or skipped
129 uint64_t syncPoint_
; ///< where to re-start the next parsing attempt
130 bool expectMore_
; ///< whether more data bytes may arrive in the future
133 /* BinaryTokenizerContext */
136 BinaryTokenizerContext::BinaryTokenizerContext(BinaryTokenizer
&tk
, const char *aName
):
147 BinaryTokenizerContext::close() {
148 tokenizer
.context
= parent
;
153 BinaryTokenizerContext::success() {
154 tokenizer
.got(tokenizer
.parsed() - start
, "");
158 } /* namespace Parser */
160 #endif // SQUID_SRC_PARSER_BINARYTOKENIZER_H