]>
Commit | Line | Data |
---|---|---|
3c1106a0 FC |
1 | #ifndef _SQUID_SRC_PARSER_CHARACTERSET_H |
2 | #define _SQUID_SRC_PARSER_CHARACTERSET_H | |
3 | ||
4 | #include <vector> | |
5 | ||
d27140db | 6 | /// optimized set of C chars, with quick membership test and merge support |
3c1106a0 FC |
7 | class CharacterSet |
8 | { | |
9 | public: | |
d27140db | 10 | typedef std::vector<uint8_t> Storage; |
8664ceb4 | 11 | typedef std::vector<std::pair<unsigned char, unsigned char> > RangeSpec; |
0e4d80e4 | 12 | |
d27140db | 13 | /// define a character set with the given label ("anonymous" if NULL) |
1cc3b294 FC |
14 | /// with specified initial contents |
15 | CharacterSet(const char *label, const char * const initial); | |
3c1106a0 | 16 | |
8664ceb4 FC |
17 | /// define a character set with the given label ("anonymous" if NULL) |
18 | /// containing characters defined in the supplied ranges | |
19 | /// \see addRange | |
20 | CharacterSet(const char *label, const RangeSpec &); | |
21 | ||
3c1106a0 | 22 | /// whether a given character exists in the set |
d27140db | 23 | bool operator[](unsigned char c) const {return chars_[static_cast<uint8_t>(c)] != 0;} |
3c1106a0 | 24 | |
d27140db | 25 | /// add a given character to the character set |
dcd4fdac | 26 | CharacterSet & add(const unsigned char c); |
3c1106a0 | 27 | |
8664ceb4 FC |
28 | /** add a list of character ranges, expressed as pairs [low,high] |
29 | * | |
30 | * Both ends of the specified ranges are included in the added set | |
31 | * e.g. addRange(RangeSpec( { { '0','9'}, { 'a', 'z' } ) ) | |
32 | */ | |
33 | CharacterSet & addRange(const RangeSpec &); | |
18538c13 | 34 | |
3c1106a0 | 35 | /// add all characters from the given CharacterSet to this one |
18538c13 FC |
36 | CharacterSet &operator +=(const CharacterSet &src); |
37 | ||
38 | /// return a new CharacterSet containing the union of two sets | |
39 | CharacterSet operator +(const CharacterSet &src) const; | |
3c1106a0 | 40 | |
1cc3b294 | 41 | /// optional set label for debugging (default: "anonymous") |
3c1106a0 FC |
42 | const char * name; |
43 | ||
7d1cd883 | 44 | // common character sets, insipired to RFC5234 |
18538c13 FC |
45 | // A-Za-z |
46 | static const CharacterSet ALPHA; | |
47 | // 0-1 | |
48 | static const CharacterSet BIT; | |
8664ceb4 FC |
49 | // any 7-bit US-ASCII character, except for NUL |
50 | static const CharacterSet CHAR; | |
7d1cd883 FC |
51 | // carriage return |
52 | static const CharacterSet CR; | |
18538c13 FC |
53 | // CRLF |
54 | static const CharacterSet CRLF; | |
7d1cd883 FC |
55 | // double quote |
56 | static const CharacterSet DQUOTE; | |
18538c13 FC |
57 | // 0-9 |
58 | static const CharacterSet DIGIT; | |
59 | // 0-9aAbBcCdDeEfF | |
60 | static const CharacterSet HEXDIG; | |
7d1cd883 FC |
61 | // horizontal tab |
62 | static const CharacterSet HTAB; | |
63 | // white space | |
64 | static const CharacterSet SP; | |
65 | // visible (printable) characters | |
66 | static const CharacterSet VCHAR; | |
18538c13 FC |
67 | // <space><tab> |
68 | static const CharacterSet WSP; | |
7d1cd883 FC |
69 | // character sets from draft httpbis |
70 | // any VCHAR except for SPECIAL | |
71 | static const CharacterSet TCHAR; | |
72 | // special VCHARs | |
73 | static const CharacterSet SPECIAL; | |
74 | // qdtext (ready but not enabled for now) | |
75 | //static const CharacterSet QDTEXT; | |
76 | // obs-text | |
77 | //static const CharacterSet OBSTEXT; | |
18538c13 | 78 | |
3c1106a0 | 79 | private: |
d27140db | 80 | /** index of characters in this set |
dcd4fdac | 81 | * |
1cc3b294 | 82 | * \note guaranteed to be always 256 slots big, as forced in the |
dcd4fdac FC |
83 | * constructor. This assumption is relied upon in operator[], add, |
84 | * operator+= | |
85 | */ | |
d27140db | 86 | Storage chars_; |
3c1106a0 FC |
87 | }; |
88 | ||
89 | #endif /* _SQUID_SRC_PARSER_CHARACTERSET_H */ |