]> git.ipfire.org Git - thirdparty/squid.git/blame - src/base/CharacterSet.h
SourceFormat Enforcement
[thirdparty/squid.git] / src / base / CharacterSet.h
CommitLineData
bbc27441 1/*
bde978a6 2 * Copyright (C) 1996-2015 The Squid Software Foundation and contributors
bbc27441
AJ
3 *
4 * Squid software is distributed under GPLv2+ license and includes
5 * contributions from numerous individuals and organizations.
6 * Please see the COPYING and CONTRIBUTORS files for details.
7 */
8
3c1106a0
FC
9#ifndef _SQUID_SRC_PARSER_CHARACTERSET_H
10#define _SQUID_SRC_PARSER_CHARACTERSET_H
11
12#include <vector>
13
d27140db 14/// optimized set of C chars, with quick membership test and merge support
3c1106a0
FC
15class CharacterSet
16{
17public:
d27140db 18 typedef std::vector<uint8_t> Storage;
0e4d80e4 19
d27140db 20 /// define a character set with the given label ("anonymous" if NULL)
1cc3b294
FC
21 /// with specified initial contents
22 CharacterSet(const char *label, const char * const initial);
3c1106a0 23
8664ceb4
FC
24 /// define a character set with the given label ("anonymous" if NULL)
25 /// containing characters defined in the supplied ranges
26 /// \see addRange
decd2fc6 27 CharacterSet(const char *label, unsigned char low, unsigned char high);
8664ceb4 28
3c1106a0 29 /// whether a given character exists in the set
d27140db 30 bool operator[](unsigned char c) const {return chars_[static_cast<uint8_t>(c)] != 0;}
3c1106a0 31
d27140db 32 /// add a given character to the character set
dcd4fdac 33 CharacterSet & add(const unsigned char c);
3c1106a0 34
decd2fc6
FC
35 /// add a list of character ranges, expressed as pairs [low,high], including both ends
36 CharacterSet & addRange(unsigned char low, unsigned char high);
18538c13 37
3c1106a0 38 /// add all characters from the given CharacterSet to this one
18538c13
FC
39 CharacterSet &operator +=(const CharacterSet &src);
40
41 /// return a new CharacterSet containing the union of two sets
42 CharacterSet operator +(const CharacterSet &src) const;
3c1106a0 43
b5cb2dbf
AR
44 /// return a new CharacterSet containing characters not in this set
45 CharacterSet complement(const char *complementLabel = NULL) const;
46
47 /// change name; handy in const declarations that use operators
48 CharacterSet &rename(const char *label) { name = label; return *this; }
49
1cc3b294 50 /// optional set label for debugging (default: "anonymous")
3c1106a0
FC
51 const char * name;
52
98b721ce 53 // common character sets, RFC 5234
18538c13
FC
54 // A-Za-z
55 static const CharacterSet ALPHA;
56 // 0-1
57 static const CharacterSet BIT;
7d1cd883
FC
58 // carriage return
59 static const CharacterSet CR;
98b721ce
AJ
60 // controls
61#if __cplusplus == 201103L
62 // ready but disabled as needs C++11 constructor
63 //static const CharacterSet CTL;
64#endif
18538c13
FC
65 // 0-9
66 static const CharacterSet DIGIT;
98b721ce
AJ
67 // double quote
68 static const CharacterSet DQUOTE;
18538c13
FC
69 // 0-9aAbBcCdDeEfF
70 static const CharacterSet HEXDIG;
7d1cd883
FC
71 // horizontal tab
72 static const CharacterSet HTAB;
98b721ce
AJ
73 // line feed
74 static const CharacterSet LF;
7d1cd883
FC
75 // white space
76 static const CharacterSet SP;
77 // visible (printable) characters
78 static const CharacterSet VCHAR;
18538c13
FC
79 // <space><tab>
80 static const CharacterSet WSP;
98b721ce
AJ
81
82 // HTTP character sets, RFC 7230
83 // ctext
84#if __cplusplus == 201103L
85 // ready but disabled as needs C++11 constructor
86 //static const CharacterSet CTEXT;
87#endif
88 // XXX: maybe field-vchar = VCHAR / obs-text
7d1cd883
FC
89 // any VCHAR except for SPECIAL
90 static const CharacterSet TCHAR;
91 // special VCHARs
92 static const CharacterSet SPECIAL;
98b721ce
AJ
93 // qdtext
94#if __cplusplus == 201103L
95 // ready but disabled as needs C++11 constructor
7d1cd883 96 //static const CharacterSet QDTEXT;
98b721ce
AJ
97#endif
98 // obs-text
99 static const CharacterSet OBSTEXT;
100
101 // HTTP character sets, RFC 7232
102 // etagc
103#if __cplusplus == 201103L
104 // ready but disabled as needs C++11 constructor
105 //static const CharacterSet ETAGC;
106#endif
107
108 // HTTP character sets, RFC 7235
109 // token68 (internal charaters only, excludes '=' terminator)
110 static const CharacterSet TOKEN68C;
18538c13 111
3c1106a0 112private:
d27140db 113 /** index of characters in this set
dcd4fdac 114 *
1cc3b294 115 * \note guaranteed to be always 256 slots big, as forced in the
dcd4fdac
FC
116 * constructor. This assumption is relied upon in operator[], add,
117 * operator+=
118 */
86c63190 119 Storage chars_;
3c1106a0
FC
120};
121
122#endif /* _SQUID_SRC_PARSER_CHARACTERSET_H */
f53969cc 123