]> git.ipfire.org Git - thirdparty/squid.git/blame - src/http/one/Tokenizer.cc
Source Format Enforcement (#1234)
[thirdparty/squid.git] / src / http / one / Tokenizer.cc
CommitLineData
f29718b0 1/*
b8ae064d 2 * Copyright (C) 1996-2023 The Squid Software Foundation and contributors
f29718b0
AJ
3 *
4 * Squid software is distributed under GPLv2+ license and includes
5 * contributions from numerous individuals and organizations.
6 * Please see the COPYING and CONTRIBUTORS files for details.
7 */
8
9#include "squid.h"
675b8408 10#include "debug/Stream.h"
417da400 11#include "http/one/Parser.h"
f29718b0 12#include "http/one/Tokenizer.h"
417da400
EB
13#include "parser/Tokenizer.h"
14#include "sbuf/Stream.h"
15
16/// Extracts quoted-string after the caller removes the initial '"'.
17/// \param http1p0 whether to prohibit \-escaped characters in quoted strings
18/// \throws InsufficientInput when input can be a token _prefix_
19/// \returns extracted quoted string (without quotes and with chars unescaped)
20static SBuf
21parseQuotedStringSuffix(Parser::Tokenizer &tok, const bool http1p0)
f29718b0 22{
f29718b0
AJ
23 /*
24 * RFC 1945 - defines qdtext:
25 * inclusive of LWS (which includes CR and LF)
26 * exclusive of 0x80-0xFF
4312ade0 27 * includes 0x5C ('\') as just a regular character
f29718b0
AJ
28 */
29 static const CharacterSet qdtext1p0 = CharacterSet("qdtext (HTTP/1.0)", 0x23, 0x7E) +
30 CharacterSet("", "!") +
31 CharacterSet::CR + CharacterSet::LF + CharacterSet::HTAB + CharacterSet::SP;
32 /*
33 * RFC 7230 - defines qdtext:
34 * exclusive of CR and LF
35 * inclusive of 0x80-0xFF
4312ade0 36 * includes 0x5C ('\') but only when part of quoted-pair
f29718b0
AJ
37 */
38 static const CharacterSet qdtext1p1 = CharacterSet("qdtext (HTTP/1.1)", 0x23, 0x5B) +
39 CharacterSet("", "!") +
40 CharacterSet("", 0x5D, 0x7E) +
41 CharacterSet::HTAB + CharacterSet::SP +
42 CharacterSet::OBSTEXT;
43
44 // best we can do is a conditional reference since http1p0 value may change per-client
45 const CharacterSet &tokenChars = (http1p0 ? qdtext1p0 : qdtext1p1);
46
417da400
EB
47 SBuf parsedToken;
48
49 while (!tok.atEnd()) {
50 SBuf qdText;
51 if (tok.prefix(qdText, tokenChars))
52 parsedToken.append(qdText);
53
54 if (!http1p0 && tok.skip('\\')) { // HTTP/1.1 allows quoted-pair, HTTP/1.0 does not
55 if (tok.atEnd())
56 break;
f29718b0 57
f29718b0
AJ
58 /* RFC 7230 section 3.2.6
59 *
60 * The backslash octet ("\") can be used as a single-octet quoting
61 * mechanism within quoted-string and comment constructs. Recipients
62 * that process the value of a quoted-string MUST handle a quoted-pair
63 * as if it were replaced by the octet following the backslash.
64 *
65 * quoted-pair = "\" ( HTAB / SP / VCHAR / obs-text )
66 */
67 static const CharacterSet qPairChars = CharacterSet::HTAB + CharacterSet::SP + CharacterSet::VCHAR + CharacterSet::OBSTEXT;
68 SBuf escaped;
417da400
EB
69 if (!tok.prefix(escaped, qPairChars, 1))
70 throw TexcHere("invalid escaped character in quoted-pair");
71
72 parsedToken.append(escaped);
f29718b0 73 continue;
417da400 74 }
f29718b0 75
417da400
EB
76 if (tok.skip('"'))
77 return parsedToken; // may be empty
f29718b0 78
417da400
EB
79 if (tok.atEnd())
80 break;
f29718b0 81
417da400 82 throw TexcHere(ToSBuf("invalid bytes for set ", tokenChars.name));
f29718b0
AJ
83 }
84
417da400
EB
85 throw Http::One::InsufficientInput();
86}
87
88SBuf
89Http::One::tokenOrQuotedString(Parser::Tokenizer &tok, const bool http1p0)
90{
91 if (tok.skip('"'))
92 return parseQuotedStringSuffix(tok, http1p0);
93
94 if (tok.atEnd())
95 throw InsufficientInput();
96
97 SBuf parsedToken;
98 if (!tok.prefix(parsedToken, CharacterSet::TCHAR))
99 throw TexcHere("invalid input while expecting an HTTP token");
100
101 if (tok.atEnd())
102 throw InsufficientInput();
103
104 // got the complete token
105 return parsedToken;
f29718b0
AJ
106}
107