]>
Commit | Line | Data |
---|---|---|
bbc27441 | 1 | /* |
f70aedc4 | 2 | * Copyright (C) 1996-2021 The Squid Software Foundation and contributors |
bbc27441 AJ |
3 | * |
4 | * Squid software is distributed under GPLv2+ license and includes | |
5 | * contributions from numerous individuals and organizations. | |
6 | * Please see the COPYING and CONTRIBUTORS files for details. | |
7 | */ | |
8 | ||
3c1106a0 FC |
9 | #ifndef _SQUID_SRC_PARSER_CHARACTERSET_H |
10 | #define _SQUID_SRC_PARSER_CHARACTERSET_H | |
11 | ||
47efdfc6 FC |
12 | #include <initializer_list> |
13 | #include <iosfwd> | |
3c1106a0 FC |
14 | #include <vector> |
15 | ||
d27140db | 16 | /// optimized set of C chars, with quick membership test and merge support |
3c1106a0 FC |
17 | class CharacterSet |
18 | { | |
19 | public: | |
d27140db | 20 | typedef std::vector<uint8_t> Storage; |
0e4d80e4 | 21 | |
4eac3407 CT |
22 | /// a character set with a given label and contents |
23 | explicit CharacterSet(const char *label = "anonymous", const char * const chars = ""); | |
3c1106a0 | 24 | |
47efdfc6 | 25 | /// define a character set with the given label ("anonymous" if nullptr) |
8664ceb4 FC |
26 | /// containing characters defined in the supplied ranges |
27 | /// \see addRange | |
decd2fc6 | 28 | CharacterSet(const char *label, unsigned char low, unsigned char high); |
8664ceb4 | 29 | |
47efdfc6 FC |
30 | /// define a character set with the given label ("anonymous" if nullptr) |
31 | /// containing characters defined in the supplied list of low-high ranges | |
32 | /// \see addRange | |
33 | CharacterSet(const char *label, std::initializer_list<std::pair<uint8_t,uint8_t>> ranges); | |
34 | ||
4eac3407 CT |
35 | /// whether the set lacks any members |
36 | bool isEmpty() const { return chars_.empty(); } | |
37 | ||
3c1106a0 | 38 | /// whether a given character exists in the set |
d27140db | 39 | bool operator[](unsigned char c) const {return chars_[static_cast<uint8_t>(c)] != 0;} |
3c1106a0 | 40 | |
d27140db | 41 | /// add a given character to the character set |
dcd4fdac | 42 | CharacterSet & add(const unsigned char c); |
3c1106a0 | 43 | |
614bd511 AJ |
44 | /// remove a given character from the character set |
45 | CharacterSet & remove(const unsigned char c); | |
46 | ||
decd2fc6 FC |
47 | /// add a list of character ranges, expressed as pairs [low,high], including both ends |
48 | CharacterSet & addRange(unsigned char low, unsigned char high); | |
18538c13 | 49 | |
47efdfc6 FC |
50 | /// set addition: add to this set all characters that are also in rhs |
51 | CharacterSet &operator +=(const CharacterSet &rhs); | |
18538c13 | 52 | |
47efdfc6 FC |
53 | /// set subtraction: remove all characters that are also in rhs |
54 | CharacterSet &operator -=(const CharacterSet &rhs); | |
3c1106a0 | 55 | |
b5cb2dbf | 56 | /// return a new CharacterSet containing characters not in this set |
47efdfc6 FC |
57 | /// use the supplied label if provided, default is "complement_of_some_other_set" |
58 | CharacterSet complement(const char *complementLabel = nullptr) const; | |
b5cb2dbf AR |
59 | |
60 | /// change name; handy in const declarations that use operators | |
61 | CharacterSet &rename(const char *label) { name = label; return *this; } | |
62 | ||
47efdfc6 FC |
63 | /// \note Ignores label |
64 | bool operator == (const CharacterSet &cs) const { return chars_ == cs.chars_; } | |
65 | /// \note Ignores label | |
66 | bool operator != (const CharacterSet &cs) const { return !operator==(cs); } | |
67 | ||
4eac3407 CT |
68 | /// prints all chars in arbitrary order, without any quoting/escaping |
69 | void printChars(std::ostream &os) const; | |
70 | ||
1cc3b294 | 71 | /// optional set label for debugging (default: "anonymous") |
3c1106a0 FC |
72 | const char * name; |
73 | ||
98b721ce | 74 | // common character sets, RFC 5234 |
18538c13 FC |
75 | // A-Za-z |
76 | static const CharacterSet ALPHA; | |
77 | // 0-1 | |
78 | static const CharacterSet BIT; | |
7d1cd883 FC |
79 | // carriage return |
80 | static const CharacterSet CR; | |
98b721ce | 81 | // controls |
47efdfc6 | 82 | static const CharacterSet CTL; |
18538c13 FC |
83 | // 0-9 |
84 | static const CharacterSet DIGIT; | |
98b721ce AJ |
85 | // double quote |
86 | static const CharacterSet DQUOTE; | |
18538c13 FC |
87 | // 0-9aAbBcCdDeEfF |
88 | static const CharacterSet HEXDIG; | |
7d1cd883 FC |
89 | // horizontal tab |
90 | static const CharacterSet HTAB; | |
98b721ce AJ |
91 | // line feed |
92 | static const CharacterSet LF; | |
7d1cd883 FC |
93 | // white space |
94 | static const CharacterSet SP; | |
95 | // visible (printable) characters | |
96 | static const CharacterSet VCHAR; | |
18538c13 FC |
97 | // <space><tab> |
98 | static const CharacterSet WSP; | |
98b721ce AJ |
99 | |
100 | // HTTP character sets, RFC 7230 | |
101 | // ctext | |
47efdfc6 | 102 | static const CharacterSet CTEXT; |
98b721ce | 103 | // XXX: maybe field-vchar = VCHAR / obs-text |
7d1cd883 FC |
104 | // any VCHAR except for SPECIAL |
105 | static const CharacterSet TCHAR; | |
106 | // special VCHARs | |
107 | static const CharacterSet SPECIAL; | |
98b721ce | 108 | // qdtext |
47efdfc6 | 109 | static const CharacterSet QDTEXT; |
98b721ce AJ |
110 | // obs-text |
111 | static const CharacterSet OBSTEXT; | |
112 | ||
113 | // HTTP character sets, RFC 7232 | |
114 | // etagc | |
47efdfc6 | 115 | static const CharacterSet ETAGC; |
98b721ce AJ |
116 | |
117 | // HTTP character sets, RFC 7235 | |
2f8abb64 | 118 | // token68 (internal characters only, excludes '=' terminator) |
98b721ce | 119 | static const CharacterSet TOKEN68C; |
18538c13 | 120 | |
3c1106a0 | 121 | private: |
d27140db | 122 | /** index of characters in this set |
dcd4fdac | 123 | * |
1cc3b294 | 124 | * \note guaranteed to be always 256 slots big, as forced in the |
47efdfc6 | 125 | * constructor. This assumption is relied upon in various methods |
dcd4fdac | 126 | */ |
86c63190 | 127 | Storage chars_; |
3c1106a0 FC |
128 | }; |
129 | ||
47efdfc6 FC |
130 | /** CharacterSet addition |
131 | * | |
132 | * \return a new CharacterSet containing all characters present both in lhs | |
133 | * and rhs, labeled as lhs is | |
134 | */ | |
135 | CharacterSet | |
136 | operator+ (CharacterSet lhs, const CharacterSet &rhs); | |
137 | ||
138 | /** CharacterSet subtraction | |
139 | * | |
140 | * \return a new CharacterSet containing all characters present in lhs | |
141 | * and not present in rhs, labeled as lhs is | |
142 | */ | |
143 | CharacterSet | |
144 | operator- (CharacterSet lhs, const CharacterSet &rhs); | |
145 | ||
146 | std::ostream& | |
147 | operator <<(std::ostream &, const CharacterSet &); | |
148 | ||
3c1106a0 | 149 | #endif /* _SQUID_SRC_PARSER_CHARACTERSET_H */ |
f53969cc | 150 |