]> git.ipfire.org Git - thirdparty/squid.git/blob - src/http/one/Tokenizer.cc
Source Format Enforcement (#532)
[thirdparty/squid.git] / src / http / one / Tokenizer.cc
1 /*
2 * Copyright (C) 1996-2020 The Squid Software Foundation and contributors
3 *
4 * Squid software is distributed under GPLv2+ license and includes
5 * contributions from numerous individuals and organizations.
6 * Please see the COPYING and CONTRIBUTORS files for details.
7 */
8
9 #include "squid.h"
10 #include "Debug.h"
11 #include "http/one/Parser.h"
12 #include "http/one/Tokenizer.h"
13 #include "parser/Tokenizer.h"
14 #include "sbuf/Stream.h"
15
16 /// Extracts quoted-string after the caller removes the initial '"'.
17 /// \param http1p0 whether to prohibit \-escaped characters in quoted strings
18 /// \throws InsufficientInput when input can be a token _prefix_
19 /// \returns extracted quoted string (without quotes and with chars unescaped)
20 static SBuf
21 parseQuotedStringSuffix(Parser::Tokenizer &tok, const bool http1p0)
22 {
23 /*
24 * RFC 1945 - defines qdtext:
25 * inclusive of LWS (which includes CR and LF)
26 * exclusive of 0x80-0xFF
27 * includes 0x5C ('\') as just a regular character
28 */
29 static const CharacterSet qdtext1p0 = CharacterSet("qdtext (HTTP/1.0)", 0x23, 0x7E) +
30 CharacterSet("", "!") +
31 CharacterSet::CR + CharacterSet::LF + CharacterSet::HTAB + CharacterSet::SP;
32 /*
33 * RFC 7230 - defines qdtext:
34 * exclusive of CR and LF
35 * inclusive of 0x80-0xFF
36 * includes 0x5C ('\') but only when part of quoted-pair
37 */
38 static const CharacterSet qdtext1p1 = CharacterSet("qdtext (HTTP/1.1)", 0x23, 0x5B) +
39 CharacterSet("", "!") +
40 CharacterSet("", 0x5D, 0x7E) +
41 CharacterSet::HTAB + CharacterSet::SP +
42 CharacterSet::OBSTEXT;
43
44 // best we can do is a conditional reference since http1p0 value may change per-client
45 const CharacterSet &tokenChars = (http1p0 ? qdtext1p0 : qdtext1p1);
46
47 SBuf parsedToken;
48
49 while (!tok.atEnd()) {
50 SBuf qdText;
51 if (tok.prefix(qdText, tokenChars))
52 parsedToken.append(qdText);
53
54 if (!http1p0 && tok.skip('\\')) { // HTTP/1.1 allows quoted-pair, HTTP/1.0 does not
55 if (tok.atEnd())
56 break;
57
58 /* RFC 7230 section 3.2.6
59 *
60 * The backslash octet ("\") can be used as a single-octet quoting
61 * mechanism within quoted-string and comment constructs. Recipients
62 * that process the value of a quoted-string MUST handle a quoted-pair
63 * as if it were replaced by the octet following the backslash.
64 *
65 * quoted-pair = "\" ( HTAB / SP / VCHAR / obs-text )
66 */
67 static const CharacterSet qPairChars = CharacterSet::HTAB + CharacterSet::SP + CharacterSet::VCHAR + CharacterSet::OBSTEXT;
68 SBuf escaped;
69 if (!tok.prefix(escaped, qPairChars, 1))
70 throw TexcHere("invalid escaped character in quoted-pair");
71
72 parsedToken.append(escaped);
73 continue;
74 }
75
76 if (tok.skip('"'))
77 return parsedToken; // may be empty
78
79 if (tok.atEnd())
80 break;
81
82 throw TexcHere(ToSBuf("invalid bytes for set ", tokenChars.name));
83 }
84
85 throw Http::One::InsufficientInput();
86 }
87
88 SBuf
89 Http::One::tokenOrQuotedString(Parser::Tokenizer &tok, const bool http1p0)
90 {
91 if (tok.skip('"'))
92 return parseQuotedStringSuffix(tok, http1p0);
93
94 if (tok.atEnd())
95 throw InsufficientInput();
96
97 SBuf parsedToken;
98 if (!tok.prefix(parsedToken, CharacterSet::TCHAR))
99 throw TexcHere("invalid input while expecting an HTTP token");
100
101 if (tok.atEnd())
102 throw InsufficientInput();
103
104 // got the complete token
105 return parsedToken;
106 }
107