]> git.ipfire.org Git - thirdparty/squid.git/blame - src/base/CharacterSet.h
Source Format Enforcement (#763)
[thirdparty/squid.git] / src / base / CharacterSet.h
CommitLineData
bbc27441 1/*
f70aedc4 2 * Copyright (C) 1996-2021 The Squid Software Foundation and contributors
bbc27441
AJ
3 *
4 * Squid software is distributed under GPLv2+ license and includes
5 * contributions from numerous individuals and organizations.
6 * Please see the COPYING and CONTRIBUTORS files for details.
7 */
8
3c1106a0
FC
9#ifndef _SQUID_SRC_PARSER_CHARACTERSET_H
10#define _SQUID_SRC_PARSER_CHARACTERSET_H
11
47efdfc6
FC
12#include <initializer_list>
13#include <iosfwd>
3c1106a0
FC
14#include <vector>
15
d27140db 16/// optimized set of C chars, with quick membership test and merge support
3c1106a0
FC
17class CharacterSet
18{
19public:
d27140db 20 typedef std::vector<uint8_t> Storage;
0e4d80e4 21
4eac3407
CT
22 /// a character set with a given label and contents
23 explicit CharacterSet(const char *label = "anonymous", const char * const chars = "");
3c1106a0 24
47efdfc6 25 /// define a character set with the given label ("anonymous" if nullptr)
8664ceb4
FC
26 /// containing characters defined in the supplied ranges
27 /// \see addRange
decd2fc6 28 CharacterSet(const char *label, unsigned char low, unsigned char high);
8664ceb4 29
47efdfc6
FC
30 /// define a character set with the given label ("anonymous" if nullptr)
31 /// containing characters defined in the supplied list of low-high ranges
32 /// \see addRange
33 CharacterSet(const char *label, std::initializer_list<std::pair<uint8_t,uint8_t>> ranges);
34
4eac3407
CT
35 /// whether the set lacks any members
36 bool isEmpty() const { return chars_.empty(); }
37
3c1106a0 38 /// whether a given character exists in the set
d27140db 39 bool operator[](unsigned char c) const {return chars_[static_cast<uint8_t>(c)] != 0;}
3c1106a0 40
d27140db 41 /// add a given character to the character set
dcd4fdac 42 CharacterSet & add(const unsigned char c);
3c1106a0 43
614bd511
AJ
44 /// remove a given character from the character set
45 CharacterSet & remove(const unsigned char c);
46
decd2fc6
FC
47 /// add a list of character ranges, expressed as pairs [low,high], including both ends
48 CharacterSet & addRange(unsigned char low, unsigned char high);
18538c13 49
47efdfc6
FC
50 /// set addition: add to this set all characters that are also in rhs
51 CharacterSet &operator +=(const CharacterSet &rhs);
18538c13 52
47efdfc6
FC
53 /// set subtraction: remove all characters that are also in rhs
54 CharacterSet &operator -=(const CharacterSet &rhs);
3c1106a0 55
b5cb2dbf 56 /// return a new CharacterSet containing characters not in this set
47efdfc6
FC
57 /// use the supplied label if provided, default is "complement_of_some_other_set"
58 CharacterSet complement(const char *complementLabel = nullptr) const;
b5cb2dbf
AR
59
60 /// change name; handy in const declarations that use operators
61 CharacterSet &rename(const char *label) { name = label; return *this; }
62
47efdfc6
FC
63 /// \note Ignores label
64 bool operator == (const CharacterSet &cs) const { return chars_ == cs.chars_; }
65 /// \note Ignores label
66 bool operator != (const CharacterSet &cs) const { return !operator==(cs); }
67
4eac3407
CT
68 /// prints all chars in arbitrary order, without any quoting/escaping
69 void printChars(std::ostream &os) const;
70
1cc3b294 71 /// optional set label for debugging (default: "anonymous")
3c1106a0
FC
72 const char * name;
73
98b721ce 74 // common character sets, RFC 5234
18538c13
FC
75 // A-Za-z
76 static const CharacterSet ALPHA;
77 // 0-1
78 static const CharacterSet BIT;
7d1cd883
FC
79 // carriage return
80 static const CharacterSet CR;
98b721ce 81 // controls
47efdfc6 82 static const CharacterSet CTL;
18538c13
FC
83 // 0-9
84 static const CharacterSet DIGIT;
98b721ce
AJ
85 // double quote
86 static const CharacterSet DQUOTE;
18538c13
FC
87 // 0-9aAbBcCdDeEfF
88 static const CharacterSet HEXDIG;
7d1cd883
FC
89 // horizontal tab
90 static const CharacterSet HTAB;
98b721ce
AJ
91 // line feed
92 static const CharacterSet LF;
7d1cd883
FC
93 // white space
94 static const CharacterSet SP;
95 // visible (printable) characters
96 static const CharacterSet VCHAR;
18538c13
FC
97 // <space><tab>
98 static const CharacterSet WSP;
98b721ce
AJ
99
100 // HTTP character sets, RFC 7230
101 // ctext
47efdfc6 102 static const CharacterSet CTEXT;
98b721ce 103 // XXX: maybe field-vchar = VCHAR / obs-text
7d1cd883
FC
104 // any VCHAR except for SPECIAL
105 static const CharacterSet TCHAR;
106 // special VCHARs
107 static const CharacterSet SPECIAL;
98b721ce 108 // qdtext
47efdfc6 109 static const CharacterSet QDTEXT;
98b721ce
AJ
110 // obs-text
111 static const CharacterSet OBSTEXT;
112
113 // HTTP character sets, RFC 7232
114 // etagc
47efdfc6 115 static const CharacterSet ETAGC;
98b721ce
AJ
116
117 // HTTP character sets, RFC 7235
2f8abb64 118 // token68 (internal characters only, excludes '=' terminator)
98b721ce 119 static const CharacterSet TOKEN68C;
18538c13 120
3c1106a0 121private:
d27140db 122 /** index of characters in this set
dcd4fdac 123 *
1cc3b294 124 * \note guaranteed to be always 256 slots big, as forced in the
47efdfc6 125 * constructor. This assumption is relied upon in various methods
dcd4fdac 126 */
86c63190 127 Storage chars_;
3c1106a0
FC
128};
129
47efdfc6
FC
130/** CharacterSet addition
131 *
132 * \return a new CharacterSet containing all characters present both in lhs
133 * and rhs, labeled as lhs is
134 */
135CharacterSet
136operator+ (CharacterSet lhs, const CharacterSet &rhs);
137
138/** CharacterSet subtraction
139 *
140 * \return a new CharacterSet containing all characters present in lhs
141 * and not present in rhs, labeled as lhs is
142 */
143CharacterSet
144operator- (CharacterSet lhs, const CharacterSet &rhs);
145
146std::ostream&
147operator <<(std::ostream &, const CharacterSet &);
148
3c1106a0 149#endif /* _SQUID_SRC_PARSER_CHARACTERSET_H */
f53969cc 150