]> git.ipfire.org Git - thirdparty/squid.git/blame - src/parser/BinaryTokenizer.h
Docs: Copyright updates for 2018 (#114)
[thirdparty/squid.git] / src / parser / BinaryTokenizer.h
CommitLineData
6821c276 1/*
5b74111a 2 * Copyright (C) 1996-2018 The Squid Software Foundation and contributors
6821c276
CT
3 *
4 * Squid software is distributed under GPLv2+ license and includes
5 * contributions from numerous individuals and organizations.
6 * Please see the COPYING and CONTRIBUTORS files for details.
7 */
8
d9219c2b
CT
9#ifndef SQUID_SRC_PARSER_BINARYTOKENIZER_H
10#define SQUID_SRC_PARSER_BINARYTOKENIZER_H
6821c276
CT
11
12#include "sbuf/SBuf.h"
13
67c99fc6
CT
14namespace Parser
15{
16
c3149111
AR
17class BinaryTokenizer;
18
19/// enables efficient debugging with concise field names: Hello.version.major
20class BinaryTokenizerContext
21{
22public:
23 /// starts parsing named object
d9219c2b 24 explicit BinaryTokenizerContext(BinaryTokenizer &tk, const char *aName);
c3149111
AR
25 ~BinaryTokenizerContext() { close(); }
26
27 /// ends parsing named object; repeated calls OK
28 inline void close();
29
30 /// reports successful parsing of a named object and calls close()
31 inline void success();
32
33 BinaryTokenizer &tokenizer; ///< tokenizer being used for parsing
d9219c2b
CT
34 const BinaryTokenizerContext * const parent; ///< enclosing context or nullptr
35 const char *const name; ///< this context description or nullptr
c3149111
AR
36 uint64_t start; ///< context parsing begins at this tokenizer position
37};
38
6821c276 39/// Safely extracts byte-oriented (i.e., non-textual) fields from raw input.
d9219c2b 40/// Assume that the integers are stored in network byte order.
6821c276
CT
41/// Supports commit points for atomic incremental parsing of multi-part fields.
42/// Throws InsufficientInput when more input is needed to parse the next field.
43/// Throws on errors.
44class BinaryTokenizer
45{
46public:
47 class InsufficientInput {}; // thrown when a method runs out of data
48 typedef uint64_t size_type; // enough for the largest supported offset
49
50 BinaryTokenizer();
19928af1 51 explicit BinaryTokenizer(const SBuf &data, const bool expectMore = false);
6821c276
CT
52
53 /// restart parsing from the very beginning
54 /// this method is for using one BinaryTokenizer to parse independent inputs
19928af1 55 void reset(const SBuf &data, const bool expectMore);
6821c276 56
19928af1 57 /// change input state without changing parsing state
6821c276 58 /// this method avoids append overheads during incremental parsing
19928af1 59 void reinput(const SBuf &data, const bool expectMore) { data_ = data; expectMore_ = expectMore; }
6821c276
CT
60
61 /// make progress: future parsing failures will not rollback beyond this point
62 void commit();
63
64 /// resume [incremental] parsing from the last commit point
65 void rollback();
66
67 /// no more bytes to parse or skip
68 bool atEnd() const;
69
70 /// parse a single-byte unsigned integer
71 uint8_t uint8(const char *description);
72
d9219c2b 73 /// parse a two-byte unsigned integer
6821c276
CT
74 uint16_t uint16(const char *description);
75
d9219c2b 76 /// parse a three-byte unsigned integer (returned as uint32_t)
6821c276
CT
77 uint32_t uint24(const char *description);
78
d9219c2b 79 /// parse a four-byte unsigned integer
6821c276
CT
80 uint32_t uint32(const char *description);
81
82 /// parse size consecutive bytes as an opaque blob
83 SBuf area(uint64_t size, const char *description);
84
a804b6fe
AR
85 /*
86 * Variable-length arrays (a.k.a. Pascal or prefix strings).
87 * pstringN() extracts and returns N-bit length followed by length bytes
88 */
89 SBuf pstring8(const char *description); ///< up to 255 byte-long p-string
90 SBuf pstring16(const char *description); ///< up to 64 KiB-long p-string
91 SBuf pstring24(const char *description); ///< up to 16 MiB-long p-string!
92
6821c276
CT
93 /// ignore the next size bytes
94 void skip(uint64_t size, const char *description);
95
c3149111
AR
96 /// the number of already parsed bytes
97 uint64_t parsed() const { return parsed_; }
98
6821c276
CT
99 /// yet unparsed bytes
100 SBuf leftovers() const { return data_.substr(parsed_); }
101
c3149111
AR
102 /// debugging helper for parsed multi-field structures
103 void got(uint64_t size, const char *description) const;
104
105 const BinaryTokenizerContext *context; ///< debugging: thing being parsed
6821c276
CT
106
107protected:
108 uint32_t octet();
109 void want(uint64_t size, const char *description) const;
110 void got(uint32_t value, uint64_t size, const char *description) const;
111 void got(const SBuf &value, uint64_t size, const char *description) const;
112 void skipped(uint64_t size, const char *description) const;
113
114private:
115 SBuf data_;
116 uint64_t parsed_; ///< number of data bytes parsed or skipped
117 uint64_t syncPoint_; ///< where to re-start the next parsing attempt
19928af1 118 bool expectMore_; ///< whether more data bytes may arrive in the future
6821c276
CT
119};
120
c3149111
AR
121/* BinaryTokenizerContext */
122
123inline
124BinaryTokenizerContext::BinaryTokenizerContext(BinaryTokenizer &tk, const char *aName):
125 tokenizer(tk),
126 parent(tk.context),
127 name(aName),
128 start(tk.parsed())
129{
130 tk.context = this;
131}
132
133inline
134void
135BinaryTokenizerContext::close() {
136 tokenizer.context = parent;
137}
138
139inline
140void
141BinaryTokenizerContext::success() {
142 tokenizer.got(tokenizer.parsed() - start, "");
143 close();
144}
145
67c99fc6
CT
146} /* namespace Parser */
147
d9219c2b 148#endif // SQUID_SRC_PARSER_BINARYTOKENIZER_H
fde0b2ca 149