]> git.ipfire.org Git - thirdparty/squid.git/blame - src/base/CharacterSet.h
Source Format Enforcement (#532)
[thirdparty/squid.git] / src / base / CharacterSet.h
CommitLineData
bbc27441 1/*
77b1029d 2 * Copyright (C) 1996-2020 The Squid Software Foundation and contributors
bbc27441
AJ
3 *
4 * Squid software is distributed under GPLv2+ license and includes
5 * contributions from numerous individuals and organizations.
6 * Please see the COPYING and CONTRIBUTORS files for details.
7 */
8
3c1106a0
FC
9#ifndef _SQUID_SRC_PARSER_CHARACTERSET_H
10#define _SQUID_SRC_PARSER_CHARACTERSET_H
11
47efdfc6
FC
12#include <initializer_list>
13#include <iosfwd>
3c1106a0
FC
14#include <vector>
15
d27140db 16/// optimized set of C chars, with quick membership test and merge support
3c1106a0
FC
17class CharacterSet
18{
19public:
d27140db 20 typedef std::vector<uint8_t> Storage;
0e4d80e4 21
4eac3407
CT
22 /// a character set with a given label and contents
23 explicit CharacterSet(const char *label = "anonymous", const char * const chars = "");
3c1106a0 24
47efdfc6 25 /// define a character set with the given label ("anonymous" if nullptr)
8664ceb4
FC
26 /// containing characters defined in the supplied ranges
27 /// \see addRange
decd2fc6 28 CharacterSet(const char *label, unsigned char low, unsigned char high);
8664ceb4 29
47efdfc6
FC
30 /// define a character set with the given label ("anonymous" if nullptr)
31 /// containing characters defined in the supplied list of low-high ranges
32 /// \see addRange
33 CharacterSet(const char *label, std::initializer_list<std::pair<uint8_t,uint8_t>> ranges);
34
4eac3407
CT
35 /// whether the set lacks any members
36 bool isEmpty() const { return chars_.empty(); }
37
3c1106a0 38 /// whether a given character exists in the set
d27140db 39 bool operator[](unsigned char c) const {return chars_[static_cast<uint8_t>(c)] != 0;}
3c1106a0 40
d27140db 41 /// add a given character to the character set
dcd4fdac 42 CharacterSet & add(const unsigned char c);
3c1106a0 43
decd2fc6
FC
44 /// add a list of character ranges, expressed as pairs [low,high], including both ends
45 CharacterSet & addRange(unsigned char low, unsigned char high);
18538c13 46
47efdfc6
FC
47 /// set addition: add to this set all characters that are also in rhs
48 CharacterSet &operator +=(const CharacterSet &rhs);
18538c13 49
47efdfc6
FC
50 /// set subtraction: remove all characters that are also in rhs
51 CharacterSet &operator -=(const CharacterSet &rhs);
3c1106a0 52
b5cb2dbf 53 /// return a new CharacterSet containing characters not in this set
47efdfc6
FC
54 /// use the supplied label if provided, default is "complement_of_some_other_set"
55 CharacterSet complement(const char *complementLabel = nullptr) const;
b5cb2dbf
AR
56
57 /// change name; handy in const declarations that use operators
58 CharacterSet &rename(const char *label) { name = label; return *this; }
59
47efdfc6
FC
60 /// \note Ignores label
61 bool operator == (const CharacterSet &cs) const { return chars_ == cs.chars_; }
62 /// \note Ignores label
63 bool operator != (const CharacterSet &cs) const { return !operator==(cs); }
64
4eac3407
CT
65 /// prints all chars in arbitrary order, without any quoting/escaping
66 void printChars(std::ostream &os) const;
67
1cc3b294 68 /// optional set label for debugging (default: "anonymous")
3c1106a0
FC
69 const char * name;
70
98b721ce 71 // common character sets, RFC 5234
18538c13
FC
72 // A-Za-z
73 static const CharacterSet ALPHA;
74 // 0-1
75 static const CharacterSet BIT;
7d1cd883
FC
76 // carriage return
77 static const CharacterSet CR;
98b721ce 78 // controls
47efdfc6 79 static const CharacterSet CTL;
18538c13
FC
80 // 0-9
81 static const CharacterSet DIGIT;
98b721ce
AJ
82 // double quote
83 static const CharacterSet DQUOTE;
18538c13
FC
84 // 0-9aAbBcCdDeEfF
85 static const CharacterSet HEXDIG;
7d1cd883
FC
86 // horizontal tab
87 static const CharacterSet HTAB;
98b721ce
AJ
88 // line feed
89 static const CharacterSet LF;
7d1cd883
FC
90 // white space
91 static const CharacterSet SP;
92 // visible (printable) characters
93 static const CharacterSet VCHAR;
18538c13
FC
94 // <space><tab>
95 static const CharacterSet WSP;
98b721ce
AJ
96
97 // HTTP character sets, RFC 7230
98 // ctext
47efdfc6 99 static const CharacterSet CTEXT;
98b721ce 100 // XXX: maybe field-vchar = VCHAR / obs-text
7d1cd883
FC
101 // any VCHAR except for SPECIAL
102 static const CharacterSet TCHAR;
103 // special VCHARs
104 static const CharacterSet SPECIAL;
98b721ce 105 // qdtext
47efdfc6 106 static const CharacterSet QDTEXT;
98b721ce
AJ
107 // obs-text
108 static const CharacterSet OBSTEXT;
109
110 // HTTP character sets, RFC 7232
111 // etagc
47efdfc6 112 static const CharacterSet ETAGC;
98b721ce
AJ
113
114 // HTTP character sets, RFC 7235
115 // token68 (internal charaters only, excludes '=' terminator)
116 static const CharacterSet TOKEN68C;
18538c13 117
3c1106a0 118private:
d27140db 119 /** index of characters in this set
dcd4fdac 120 *
1cc3b294 121 * \note guaranteed to be always 256 slots big, as forced in the
47efdfc6 122 * constructor. This assumption is relied upon in various methods
dcd4fdac 123 */
86c63190 124 Storage chars_;
3c1106a0
FC
125};
126
47efdfc6
FC
127/** CharacterSet addition
128 *
129 * \return a new CharacterSet containing all characters present both in lhs
130 * and rhs, labeled as lhs is
131 */
132CharacterSet
133operator+ (CharacterSet lhs, const CharacterSet &rhs);
134
135/** CharacterSet subtraction
136 *
137 * \return a new CharacterSet containing all characters present in lhs
138 * and not present in rhs, labeled as lhs is
139 */
140CharacterSet
141operator- (CharacterSet lhs, const CharacterSet &rhs);
142
143std::ostream&
144operator <<(std::ostream &, const CharacterSet &);
145
3c1106a0 146#endif /* _SQUID_SRC_PARSER_CHARACTERSET_H */
f53969cc 147