]> git.ipfire.org Git - thirdparty/squid.git/blame - src/http/one/Parser.cc
Maintenance: Removed most NULLs using modernize-use-nullptr (#1075)
[thirdparty/squid.git] / src / http / one / Parser.cc
CommitLineData
48a37aee 1/*
bf95c10a 2 * Copyright (C) 1996-2022 The Squid Software Foundation and contributors
48a37aee
AJ
3 *
4 * Squid software is distributed under GPLv2+ license and includes
5 * contributions from numerous individuals and organizations.
6 * Please see the COPYING and CONTRIBUTORS files for details.
7 */
8
c99510dd 9#include "squid.h"
417da400 10#include "base/CharacterSet.h"
675b8408 11#include "debug/Stream.h"
c99510dd 12#include "http/one/Parser.h"
f1d5359e 13#include "mime_header.h"
417da400 14#include "parser/Tokenizer.h"
b8f86fd2 15#include "SquidConfig.h"
c99510dd 16
9651320a
AJ
17/// RFC 7230 section 2.6 - 7 magic octets
18const SBuf Http::One::Parser::Http1magic("HTTP/1.");
19
00237269
AJ
20const SBuf &Http::One::CrLf()
21{
22 static const SBuf crlf("\r\n");
23 return crlf;
24}
25
c99510dd
AJ
26void
27Http::One::Parser::clear()
28{
29 parsingStage_ = HTTP_PARSE_NONE;
aee3523a 30 buf_ = nullptr;
c99510dd
AJ
31 msgProtocol_ = AnyP::ProtocolVersion();
32 mimeHeaderBlock_.clear();
33}
34
00237269
AJ
35/// characters HTTP permits tolerant parsers to accept as delimiters
36static const CharacterSet &
37RelaxedDelimiterCharacters()
38{
39 // RFC 7230 section 3.5
40 // tolerant parser MAY accept any of SP, HTAB, VT (%x0B), FF (%x0C),
41 // or bare CR as whitespace between request-line fields
42 static const CharacterSet RelaxedDels =
43 (CharacterSet::SP +
44 CharacterSet::HTAB +
45 CharacterSet("VT,FF","\x0B\x0C") +
46 CharacterSet::CR).rename("relaxed-WSP");
47
48 return RelaxedDels;
49}
50
26f0a359
AR
51const CharacterSet &
52Http::One::Parser::WhitespaceCharacters()
53{
54 return Config.onoff.relaxed_header_parser ?
55 RelaxedDelimiterCharacters() : CharacterSet::WSP;
56}
57
00237269
AJ
58const CharacterSet &
59Http::One::Parser::DelimiterCharacters()
60{
61 return Config.onoff.relaxed_header_parser ?
62 RelaxedDelimiterCharacters() : CharacterSet::SP;
63}
64
417da400
EB
65void
66Http::One::Parser::skipLineTerminator(Tokenizer &tok) const
f1d5359e 67{
00237269 68 if (tok.skip(Http1::CrLf()))
417da400 69 return;
b8f86fd2
AJ
70
71 if (Config.onoff.relaxed_header_parser && tok.skipOne(CharacterSet::LF))
417da400 72 return;
b8f86fd2 73
188ad27f 74 if (tok.atEnd() || (tok.remaining().length() == 1 && tok.remaining().at(0) == '\r'))
417da400 75 throw InsufficientInput();
188ad27f
AJ
76
77 throw TexcHere("garbage instead of CRLF line terminator");
b8f86fd2
AJ
78}
79
00237269
AJ
80/// all characters except the LF line terminator
81static const CharacterSet &
82LineCharacters()
83{
84 static const CharacterSet line = CharacterSet::LF.complement("non-LF");
85 return line;
86}
87
88/**
89 * Remove invalid lines (if any) from the mime prefix
90 *
91 * RFC 7230 section 3:
92 * "A recipient that receives whitespace between the start-line and
93 * the first header field MUST ... consume each whitespace-preceded
94 * line without further processing of it."
95 *
96 * We need to always use the relaxed delimiters here to prevent
97 * line smuggling through strict parsers.
98 *
99 * Note that 'whitespace' in RFC 7230 includes CR. So that means
100 * sequences of CRLF will be pruned, but not sequences of bare-LF.
101 */
102void
103Http::One::Parser::cleanMimePrefix()
104{
417da400 105 Tokenizer tok(mimeHeaderBlock_);
00237269
AJ
106 while (tok.skipOne(RelaxedDelimiterCharacters())) {
107 (void)tok.skipAll(LineCharacters()); // optional line content
108 // LF terminator is required.
109 // trust headersEnd() to ensure that we have at least one LF
110 (void)tok.skipOne(CharacterSet::LF);
111 }
112
113 // If mimeHeaderBlock_ had just whitespace line(s) followed by CRLF,
114 // then we skipped everything, including that terminating LF.
115 // Restore the terminating CRLF if needed.
116 if (tok.atEnd())
117 mimeHeaderBlock_ = Http1::CrLf();
118 else
119 mimeHeaderBlock_ = tok.remaining();
120 // now mimeHeaderBlock_ has 0+ fields followed by the LF terminator
121}
122
123/**
124 * Replace obs-fold with a single SP,
125 *
126 * RFC 7230 section 3.2.4
127 * "A server that receives an obs-fold in a request message that is not
128 * within a message/http container MUST ... replace
129 * each received obs-fold with one or more SP octets prior to
130 * interpreting the field value or forwarding the message downstream."
131 *
132 * "A proxy or gateway that receives an obs-fold in a response message
133 * that is not within a message/http container MUST ... replace each
134 * received obs-fold with one or more SP octets prior to interpreting
135 * the field value or forwarding the message downstream."
136 */
137void
138Http::One::Parser::unfoldMime()
139{
417da400 140 Tokenizer tok(mimeHeaderBlock_);
00237269
AJ
141 const auto szLimit = mimeHeaderBlock_.length();
142 mimeHeaderBlock_.clear();
143 // prevent the mime sender being able to make append() realloc/grow multiple times.
144 mimeHeaderBlock_.reserveSpace(szLimit);
145
146 static const CharacterSet nonCRLF = (CharacterSet::CR + CharacterSet::LF).complement().rename("non-CRLF");
147
148 while (!tok.atEnd()) {
149 const SBuf all(tok.remaining());
150 const auto blobLen = tok.skipAll(nonCRLF); // may not be there
151 const auto crLen = tok.skipAll(CharacterSet::CR); // may not be there
152 const auto lfLen = tok.skipOne(CharacterSet::LF); // may not be there
153
154 if (lfLen && tok.skipAll(CharacterSet::WSP)) { // obs-fold!
155 mimeHeaderBlock_.append(all.substr(0, blobLen));
156 mimeHeaderBlock_.append(' '); // replace one obs-fold with one SP
157 } else
158 mimeHeaderBlock_.append(all.substr(0, blobLen + crLen + lfLen));
159 }
160}
161
b8f86fd2 162bool
f8cab755 163Http::One::Parser::grabMimeBlock(const char *which, const size_t limit)
b8f86fd2
AJ
164{
165 // MIME headers block exist in (only) HTTP/1.x and ICY
166 const bool expectMime = (msgProtocol_.protocol == AnyP::PROTO_HTTP && msgProtocol_.major == 1) ||
e47e0802
AJ
167 msgProtocol_.protocol == AnyP::PROTO_ICY ||
168 hackExpectsMime_;
b8f86fd2
AJ
169
170 if (expectMime) {
f1d5359e
AJ
171 /* NOTE: HTTP/0.9 messages do not have a mime header block.
172 * So the rest of the code will need to deal with '0'-byte headers
173 * (ie, none, so don't try parsing em)
174 */
00237269
AJ
175 bool containsObsFold;
176 if (SBuf::size_type mimeHeaderBytes = headersEnd(buf_, containsObsFold)) {
f8cab755
AJ
177
178 // Squid could handle these headers, but admin does not want to
179 if (firstLineSize() + mimeHeaderBytes >= limit) {
180 debugs(33, 5, "Too large " << which);
181 parseStatusCode = Http::scHeaderTooLarge;
182 buf_.consume(mimeHeaderBytes);
183 parsingStage_ = HTTP_PARSE_DONE;
184 return false;
185 }
186
187 mimeHeaderBlock_ = buf_.consume(mimeHeaderBytes);
00237269
AJ
188 cleanMimePrefix();
189 if (containsObsFold)
190 unfoldMime();
191
f8cab755
AJ
192 debugs(74, 5, "mime header (0-" << mimeHeaderBytes << ") {" << mimeHeaderBlock_ << "}");
193
194 } else { // headersEnd() == 0
f1d5359e
AJ
195 if (buf_.length()+firstLineSize() >= limit) {
196 debugs(33, 5, "Too large " << which);
197 parseStatusCode = Http::scHeaderTooLarge;
198 parsingStage_ = HTTP_PARSE_DONE;
199 } else
200 debugs(33, 5, "Incomplete " << which << ", waiting for end of headers");
201 return false;
202 }
b8f86fd2 203
f1d5359e
AJ
204 } else
205 debugs(33, 3, "Missing HTTP/1.x identifier");
206
207 // NP: we do not do any further stages here yet so go straight to DONE
208 parsingStage_ = HTTP_PARSE_DONE;
209
f1d5359e
AJ
210 return true;
211}
212
2a51e34e 213// arbitrary maximum-length for headers which can be found by Http1Parser::getHostHeaderField()
f53969cc 214#define GET_HDR_SZ 1024
c99510dd 215
687696c1
AJ
216// BUG: returns only the first header line with given name,
217// ignores multi-line headers and obs-fold headers
c99510dd 218char *
2a51e34e 219Http::One::Parser::getHostHeaderField()
c99510dd 220{
2a51e34e 221 if (!headerBlockSize())
aee3523a 222 return nullptr;
c99510dd 223
687696c1 224 LOCAL_ARRAY(char, header, GET_HDR_SZ);
2a51e34e 225 const char *name = "Host";
1296170f 226 const int namelen = strlen(name);
687696c1 227
f6c7fa03 228 debugs(25, 5, "looking for " << name);
c99510dd 229
f6c7fa03 230 // while we can find more LF in the SBuf
417da400 231 Tokenizer tok(mimeHeaderBlock_);
687696c1 232 SBuf p;
c99510dd 233
00237269 234 while (tok.prefix(p, LineCharacters())) {
2d40b13f 235 if (!tok.skipOne(CharacterSet::LF)) // move tokenizer past the LF
2f8abb64 236 break; // error. reached invalid octet or end of buffer instead of an LF ??
c99510dd 237
687696c1
AJ
238 // header lines must start with the name (case insensitive)
239 if (p.substr(0, namelen).caseCmp(name, namelen))
c99510dd
AJ
240 continue;
241
687696c1
AJ
242 // then a COLON
243 if (p[namelen] != ':')
c99510dd
AJ
244 continue;
245
687696c1 246 // drop any trailing *CR sequence
00237269 247 p.trim(Http1::CrLf(), false, true);
c99510dd 248
687696c1
AJ
249 debugs(25, 5, "checking " << p);
250 p.consume(namelen + 1);
c99510dd 251
687696c1 252 // TODO: optimize SBuf::trim to take CharacterSet directly
417da400 253 Tokenizer t(p);
9bafa70d 254 t.skipAll(CharacterSet::WSP);
687696c1 255 p = t.remaining();
c99510dd 256
687696c1
AJ
257 // prevent buffer overrun on char header[];
258 p.chop(0, sizeof(header)-1);
c99510dd 259
2a51e34e
AJ
260 // currently only used for pre-parse Host header, ensure valid domain[:port] or ip[:port]
261 static const auto hostChars = CharacterSet("host",":[].-_") + CharacterSet::ALPHA + CharacterSet::DIGIT;
262 if (p.findFirstNotOf(hostChars) != SBuf::npos)
263 break; // error. line contains character not accepted in Host header
264
687696c1 265 // return the header field-value
3f0e38d6 266 SBufToCstring(header, p);
f6c7fa03 267 debugs(25, 5, "returning " << header);
687696c1 268 return header;
c99510dd
AJ
269 }
270
aee3523a 271 return nullptr;
c99510dd 272}
f53969cc 273
9a4b5048 274int
26f0a359 275Http::One::ErrorLevel()
9a4b5048
AJ
276{
277 return Config.onoff.relaxed_header_parser < 0 ? DBG_IMPORTANT : 5;
278}
2c4e5226 279
26f0a359 280// BWS = *( SP / HTAB ) ; WhitespaceCharacters() may relax this RFC 7230 rule
417da400
EB
281void
282Http::One::ParseBws(Parser::Tokenizer &tok)
26f0a359 283{
417da400
EB
284 const auto count = tok.skipAll(Parser::WhitespaceCharacters());
285
286 if (tok.atEnd())
287 throw InsufficientInput(); // even if count is positive
288
289 if (count) {
26f0a359
AR
290 // Generating BWS is a MUST-level violation so warn about it as needed.
291 debugs(33, ErrorLevel(), "found " << count << " BWS octets");
292 // RFC 7230 says we MUST parse BWS, so we fall through even if
293 // Config.onoff.relaxed_header_parser is off.
294 }
295 // else we successfully "parsed" an empty BWS sequence
296
417da400 297 // success: no more BWS characters expected
26f0a359 298}
cae5602c 299