]>
git.ipfire.org Git - thirdparty/squid.git/blob - src/base/CharacterSet.h
2 * Copyright (C) 1996-2021 The Squid Software Foundation and contributors
4 * Squid software is distributed under GPLv2+ license and includes
5 * contributions from numerous individuals and organizations.
6 * Please see the COPYING and CONTRIBUTORS files for details.
9 #ifndef _SQUID_SRC_PARSER_CHARACTERSET_H
10 #define _SQUID_SRC_PARSER_CHARACTERSET_H
12 #include <initializer_list>
16 /// optimized set of C chars, with quick membership test and merge support
20 typedef std::vector
<uint8_t> Storage
;
22 /// a character set with a given label and contents
23 explicit CharacterSet(const char *label
= "anonymous", const char * const chars
= "");
25 /// define a character set with the given label ("anonymous" if nullptr)
26 /// containing characters defined in the supplied ranges
28 CharacterSet(const char *label
, unsigned char low
, unsigned char high
);
30 /// define a character set with the given label ("anonymous" if nullptr)
31 /// containing characters defined in the supplied list of low-high ranges
33 CharacterSet(const char *label
, std::initializer_list
<std::pair
<uint8_t,uint8_t>> ranges
);
35 /// whether the set lacks any members
36 bool isEmpty() const { return chars_
.empty(); }
38 /// whether a given character exists in the set
39 bool operator[](unsigned char c
) const {return chars_
[static_cast<uint8_t>(c
)] != 0;}
41 /// add a given character to the character set
42 CharacterSet
& add(const unsigned char c
);
44 /// remove a given character from the character set
45 CharacterSet
& remove(const unsigned char c
);
47 /// add a list of character ranges, expressed as pairs [low,high], including both ends
48 CharacterSet
& addRange(unsigned char low
, unsigned char high
);
50 /// set addition: add to this set all characters that are also in rhs
51 CharacterSet
&operator +=(const CharacterSet
&rhs
);
53 /// set subtraction: remove all characters that are also in rhs
54 CharacterSet
&operator -=(const CharacterSet
&rhs
);
56 /// return a new CharacterSet containing characters not in this set
57 /// use the supplied label if provided, default is "complement_of_some_other_set"
58 CharacterSet
complement(const char *complementLabel
= nullptr) const;
60 /// change name; handy in const declarations that use operators
61 CharacterSet
&rename(const char *label
) { name
= label
; return *this; }
63 /// \note Ignores label
64 bool operator == (const CharacterSet
&cs
) const { return chars_
== cs
.chars_
; }
65 /// \note Ignores label
66 bool operator != (const CharacterSet
&cs
) const { return !operator==(cs
); }
68 /// prints all chars in arbitrary order, without any quoting/escaping
69 void printChars(std::ostream
&os
) const;
71 /// optional set label for debugging (default: "anonymous")
74 // common character sets, RFC 5234
76 static const CharacterSet ALPHA
;
78 static const CharacterSet BIT
;
80 static const CharacterSet CR
;
82 static const CharacterSet CTL
;
84 static const CharacterSet DIGIT
;
86 static const CharacterSet DQUOTE
;
88 static const CharacterSet HEXDIG
;
90 static const CharacterSet HTAB
;
92 static const CharacterSet LF
;
94 static const CharacterSet SP
;
95 // visible (printable) characters
96 static const CharacterSet VCHAR
;
98 static const CharacterSet WSP
;
100 // HTTP character sets, RFC 7230
102 static const CharacterSet CTEXT
;
103 // XXX: maybe field-vchar = VCHAR / obs-text
104 // any VCHAR except for SPECIAL
105 static const CharacterSet TCHAR
;
107 static const CharacterSet SPECIAL
;
109 static const CharacterSet QDTEXT
;
111 static const CharacterSet OBSTEXT
;
113 // HTTP character sets, RFC 7232
115 static const CharacterSet ETAGC
;
117 // HTTP character sets, RFC 7235
118 // token68 (internal characters only, excludes '=' terminator)
119 static const CharacterSet TOKEN68C
;
122 /** index of characters in this set
124 * \note guaranteed to be always 256 slots big, as forced in the
125 * constructor. This assumption is relied upon in various methods
130 /** CharacterSet addition
132 * \return a new CharacterSet containing all characters present both in lhs
133 * and rhs, labeled as lhs is
136 operator+ (CharacterSet lhs
, const CharacterSet
&rhs
);
138 /** CharacterSet subtraction
140 * \return a new CharacterSet containing all characters present in lhs
141 * and not present in rhs, labeled as lhs is
144 operator- (CharacterSet lhs
, const CharacterSet
&rhs
);
147 operator <<(std::ostream
&, const CharacterSet
&);
149 #endif /* _SQUID_SRC_PARSER_CHARACTERSET_H */