]> git.ipfire.org Git - thirdparty/squid.git/blob - src/base/CharacterSet.h
Docs: Copyright updates for 2018 (#114)
[thirdparty/squid.git] / src / base / CharacterSet.h
1 /*
2 * Copyright (C) 1996-2018 The Squid Software Foundation and contributors
3 *
4 * Squid software is distributed under GPLv2+ license and includes
5 * contributions from numerous individuals and organizations.
6 * Please see the COPYING and CONTRIBUTORS files for details.
7 */
8
9 #ifndef _SQUID_SRC_PARSER_CHARACTERSET_H
10 #define _SQUID_SRC_PARSER_CHARACTERSET_H
11
12 #include <initializer_list>
13 #include <iosfwd>
14 #include <vector>
15
16 /// optimized set of C chars, with quick membership test and merge support
17 class CharacterSet
18 {
19 public:
20 typedef std::vector<uint8_t> Storage;
21
22 /// a character set with a given label and contents
23 explicit CharacterSet(const char *label = "anonymous", const char * const chars = "");
24
25 /// define a character set with the given label ("anonymous" if nullptr)
26 /// containing characters defined in the supplied ranges
27 /// \see addRange
28 CharacterSet(const char *label, unsigned char low, unsigned char high);
29
30 /// define a character set with the given label ("anonymous" if nullptr)
31 /// containing characters defined in the supplied list of low-high ranges
32 /// \see addRange
33 CharacterSet(const char *label, std::initializer_list<std::pair<uint8_t,uint8_t>> ranges);
34
35 /// whether the set lacks any members
36 bool isEmpty() const { return chars_.empty(); }
37
38 /// whether a given character exists in the set
39 bool operator[](unsigned char c) const {return chars_[static_cast<uint8_t>(c)] != 0;}
40
41 /// add a given character to the character set
42 CharacterSet & add(const unsigned char c);
43
44 /// add a list of character ranges, expressed as pairs [low,high], including both ends
45 CharacterSet & addRange(unsigned char low, unsigned char high);
46
47 /// set addition: add to this set all characters that are also in rhs
48 CharacterSet &operator +=(const CharacterSet &rhs);
49
50 /// set subtraction: remove all characters that are also in rhs
51 CharacterSet &operator -=(const CharacterSet &rhs);
52
53 /// return a new CharacterSet containing characters not in this set
54 /// use the supplied label if provided, default is "complement_of_some_other_set"
55 CharacterSet complement(const char *complementLabel = nullptr) const;
56
57 /// change name; handy in const declarations that use operators
58 CharacterSet &rename(const char *label) { name = label; return *this; }
59
60 /// \note Ignores label
61 bool operator == (const CharacterSet &cs) const { return chars_ == cs.chars_; }
62 /// \note Ignores label
63 bool operator != (const CharacterSet &cs) const { return !operator==(cs); }
64
65 /// prints all chars in arbitrary order, without any quoting/escaping
66 void printChars(std::ostream &os) const;
67
68 /// optional set label for debugging (default: "anonymous")
69 const char * name;
70
71 // common character sets, RFC 5234
72 // A-Za-z
73 static const CharacterSet ALPHA;
74 // 0-1
75 static const CharacterSet BIT;
76 // carriage return
77 static const CharacterSet CR;
78 // controls
79 static const CharacterSet CTL;
80 // 0-9
81 static const CharacterSet DIGIT;
82 // double quote
83 static const CharacterSet DQUOTE;
84 // 0-9aAbBcCdDeEfF
85 static const CharacterSet HEXDIG;
86 // horizontal tab
87 static const CharacterSet HTAB;
88 // line feed
89 static const CharacterSet LF;
90 // white space
91 static const CharacterSet SP;
92 // visible (printable) characters
93 static const CharacterSet VCHAR;
94 // <space><tab>
95 static const CharacterSet WSP;
96
97 // HTTP character sets, RFC 7230
98 // ctext
99 static const CharacterSet CTEXT;
100 // XXX: maybe field-vchar = VCHAR / obs-text
101 // any VCHAR except for SPECIAL
102 static const CharacterSet TCHAR;
103 // special VCHARs
104 static const CharacterSet SPECIAL;
105 // qdtext
106 static const CharacterSet QDTEXT;
107 // obs-text
108 static const CharacterSet OBSTEXT;
109
110 // HTTP character sets, RFC 7232
111 // etagc
112 static const CharacterSet ETAGC;
113
114 // HTTP character sets, RFC 7235
115 // token68 (internal charaters only, excludes '=' terminator)
116 static const CharacterSet TOKEN68C;
117
118 private:
119 /** index of characters in this set
120 *
121 * \note guaranteed to be always 256 slots big, as forced in the
122 * constructor. This assumption is relied upon in various methods
123 */
124 Storage chars_;
125 };
126
127 /** CharacterSet addition
128 *
129 * \return a new CharacterSet containing all characters present both in lhs
130 * and rhs, labeled as lhs is
131 */
132 CharacterSet
133 operator+ (CharacterSet lhs, const CharacterSet &rhs);
134
135 /** CharacterSet subtraction
136 *
137 * \return a new CharacterSet containing all characters present in lhs
138 * and not present in rhs, labeled as lhs is
139 */
140 CharacterSet
141 operator- (CharacterSet lhs, const CharacterSet &rhs);
142
143 std::ostream&
144 operator <<(std::ostream &, const CharacterSet &);
145
146 #endif /* _SQUID_SRC_PARSER_CHARACTERSET_H */
147