]>
git.ipfire.org Git - thirdparty/squid.git/blob - src/http/one/Tokenizer.cc
2 * Copyright (C) 1996-2019 The Squid Software Foundation and contributors
4 * Squid software is distributed under GPLv2+ license and includes
5 * contributions from numerous individuals and organizations.
6 * Please see the COPYING and CONTRIBUTORS files for details.
11 #include "http/one/Parser.h"
12 #include "http/one/Tokenizer.h"
13 #include "parser/Tokenizer.h"
14 #include "sbuf/Stream.h"
16 /// Extracts quoted-string after the caller removes the initial '"'.
17 /// \param http1p0 whether to prohibit \-escaped characters in quoted strings
18 /// \throws InsufficientInput when input can be a token _prefix_
19 /// \returns extracted quoted string (without quotes and with chars unescaped)
21 parseQuotedStringSuffix(Parser::Tokenizer
&tok
, const bool http1p0
)
24 * RFC 1945 - defines qdtext:
25 * inclusive of LWS (which includes CR and LF)
26 * exclusive of 0x80-0xFF
27 * includes 0x5C ('\') as just a regular character
29 static const CharacterSet qdtext1p0
= CharacterSet("qdtext (HTTP/1.0)", 0x23, 0x7E) +
30 CharacterSet("", "!") +
31 CharacterSet::CR
+ CharacterSet::LF
+ CharacterSet::HTAB
+ CharacterSet::SP
;
33 * RFC 7230 - defines qdtext:
34 * exclusive of CR and LF
35 * inclusive of 0x80-0xFF
36 * includes 0x5C ('\') but only when part of quoted-pair
38 static const CharacterSet qdtext1p1
= CharacterSet("qdtext (HTTP/1.1)", 0x23, 0x5B) +
39 CharacterSet("", "!") +
40 CharacterSet("", 0x5D, 0x7E) +
41 CharacterSet::HTAB
+ CharacterSet::SP
+
42 CharacterSet::OBSTEXT
;
44 // best we can do is a conditional reference since http1p0 value may change per-client
45 const CharacterSet
&tokenChars
= (http1p0
? qdtext1p0
: qdtext1p1
);
49 while (!tok
.atEnd()) {
51 if (tok
.prefix(qdText
, tokenChars
))
52 parsedToken
.append(qdText
);
54 if (!http1p0
&& tok
.skip('\\')) { // HTTP/1.1 allows quoted-pair, HTTP/1.0 does not
58 /* RFC 7230 section 3.2.6
60 * The backslash octet ("\") can be used as a single-octet quoting
61 * mechanism within quoted-string and comment constructs. Recipients
62 * that process the value of a quoted-string MUST handle a quoted-pair
63 * as if it were replaced by the octet following the backslash.
65 * quoted-pair = "\" ( HTAB / SP / VCHAR / obs-text )
67 static const CharacterSet qPairChars
= CharacterSet::HTAB
+ CharacterSet::SP
+ CharacterSet::VCHAR
+ CharacterSet::OBSTEXT
;
69 if (!tok
.prefix(escaped
, qPairChars
, 1))
70 throw TexcHere("invalid escaped character in quoted-pair");
72 parsedToken
.append(escaped
);
77 return parsedToken
; // may be empty
82 throw TexcHere(ToSBuf("invalid bytes for set ", tokenChars
.name
));
85 throw Http::One::InsufficientInput();
89 Http::One::tokenOrQuotedString(Parser::Tokenizer
&tok
, const bool http1p0
)
92 return parseQuotedStringSuffix(tok
, http1p0
);
95 throw InsufficientInput();
98 if (!tok
.prefix(parsedToken
, CharacterSet::TCHAR
))
99 throw TexcHere("invalid input while expecting an HTTP token");
102 throw InsufficientInput();
104 // got the complete token