]>
Commit | Line | Data |
---|---|---|
bbc27441 | 1 | /* |
bde978a6 | 2 | * Copyright (C) 1996-2015 The Squid Software Foundation and contributors |
bbc27441 AJ |
3 | * |
4 | * Squid software is distributed under GPLv2+ license and includes | |
5 | * contributions from numerous individuals and organizations. | |
6 | * Please see the COPYING and CONTRIBUTORS files for details. | |
7 | */ | |
8 | ||
3c1106a0 FC |
9 | #ifndef _SQUID_SRC_PARSER_CHARACTERSET_H |
10 | #define _SQUID_SRC_PARSER_CHARACTERSET_H | |
11 | ||
12 | #include <vector> | |
13 | ||
d27140db | 14 | /// optimized set of C chars, with quick membership test and merge support |
3c1106a0 FC |
15 | class CharacterSet |
16 | { | |
17 | public: | |
d27140db | 18 | typedef std::vector<uint8_t> Storage; |
0e4d80e4 | 19 | |
d27140db | 20 | /// define a character set with the given label ("anonymous" if NULL) |
1cc3b294 FC |
21 | /// with specified initial contents |
22 | CharacterSet(const char *label, const char * const initial); | |
3c1106a0 | 23 | |
8664ceb4 FC |
24 | /// define a character set with the given label ("anonymous" if NULL) |
25 | /// containing characters defined in the supplied ranges | |
26 | /// \see addRange | |
decd2fc6 | 27 | CharacterSet(const char *label, unsigned char low, unsigned char high); |
8664ceb4 | 28 | |
3c1106a0 | 29 | /// whether a given character exists in the set |
d27140db | 30 | bool operator[](unsigned char c) const {return chars_[static_cast<uint8_t>(c)] != 0;} |
3c1106a0 | 31 | |
d27140db | 32 | /// add a given character to the character set |
dcd4fdac | 33 | CharacterSet & add(const unsigned char c); |
3c1106a0 | 34 | |
decd2fc6 FC |
35 | /// add a list of character ranges, expressed as pairs [low,high], including both ends |
36 | CharacterSet & addRange(unsigned char low, unsigned char high); | |
18538c13 | 37 | |
3c1106a0 | 38 | /// add all characters from the given CharacterSet to this one |
18538c13 FC |
39 | CharacterSet &operator +=(const CharacterSet &src); |
40 | ||
41 | /// return a new CharacterSet containing the union of two sets | |
42 | CharacterSet operator +(const CharacterSet &src) const; | |
3c1106a0 | 43 | |
b5cb2dbf AR |
44 | /// return a new CharacterSet containing characters not in this set |
45 | CharacterSet complement(const char *complementLabel = NULL) const; | |
46 | ||
47 | /// change name; handy in const declarations that use operators | |
48 | CharacterSet &rename(const char *label) { name = label; return *this; } | |
49 | ||
1cc3b294 | 50 | /// optional set label for debugging (default: "anonymous") |
3c1106a0 FC |
51 | const char * name; |
52 | ||
98b721ce | 53 | // common character sets, RFC 5234 |
18538c13 FC |
54 | // A-Za-z |
55 | static const CharacterSet ALPHA; | |
56 | // 0-1 | |
57 | static const CharacterSet BIT; | |
7d1cd883 FC |
58 | // carriage return |
59 | static const CharacterSet CR; | |
98b721ce AJ |
60 | // controls |
61 | #if __cplusplus == 201103L | |
62 | // ready but disabled as needs C++11 constructor | |
63 | //static const CharacterSet CTL; | |
64 | #endif | |
18538c13 FC |
65 | // 0-9 |
66 | static const CharacterSet DIGIT; | |
98b721ce AJ |
67 | // double quote |
68 | static const CharacterSet DQUOTE; | |
18538c13 FC |
69 | // 0-9aAbBcCdDeEfF |
70 | static const CharacterSet HEXDIG; | |
7d1cd883 FC |
71 | // horizontal tab |
72 | static const CharacterSet HTAB; | |
98b721ce AJ |
73 | // line feed |
74 | static const CharacterSet LF; | |
7d1cd883 FC |
75 | // white space |
76 | static const CharacterSet SP; | |
77 | // visible (printable) characters | |
78 | static const CharacterSet VCHAR; | |
18538c13 FC |
79 | // <space><tab> |
80 | static const CharacterSet WSP; | |
98b721ce AJ |
81 | |
82 | // HTTP character sets, RFC 7230 | |
83 | // ctext | |
84 | #if __cplusplus == 201103L | |
85 | // ready but disabled as needs C++11 constructor | |
86 | //static const CharacterSet CTEXT; | |
87 | #endif | |
88 | // XXX: maybe field-vchar = VCHAR / obs-text | |
7d1cd883 FC |
89 | // any VCHAR except for SPECIAL |
90 | static const CharacterSet TCHAR; | |
91 | // special VCHARs | |
92 | static const CharacterSet SPECIAL; | |
98b721ce AJ |
93 | // qdtext |
94 | #if __cplusplus == 201103L | |
95 | // ready but disabled as needs C++11 constructor | |
7d1cd883 | 96 | //static const CharacterSet QDTEXT; |
98b721ce AJ |
97 | #endif |
98 | // obs-text | |
99 | static const CharacterSet OBSTEXT; | |
100 | ||
101 | // HTTP character sets, RFC 7232 | |
102 | // etagc | |
103 | #if __cplusplus == 201103L | |
104 | // ready but disabled as needs C++11 constructor | |
105 | //static const CharacterSet ETAGC; | |
106 | #endif | |
107 | ||
108 | // HTTP character sets, RFC 7235 | |
109 | // token68 (internal charaters only, excludes '=' terminator) | |
110 | static const CharacterSet TOKEN68C; | |
18538c13 | 111 | |
3c1106a0 | 112 | private: |
d27140db | 113 | /** index of characters in this set |
dcd4fdac | 114 | * |
1cc3b294 | 115 | * \note guaranteed to be always 256 slots big, as forced in the |
dcd4fdac FC |
116 | * constructor. This assumption is relied upon in operator[], add, |
117 | * operator+= | |
118 | */ | |
86c63190 | 119 | Storage chars_; |
3c1106a0 FC |
120 | }; |
121 | ||
122 | #endif /* _SQUID_SRC_PARSER_CHARACTERSET_H */ | |
f53969cc | 123 |