2 * Copyright (C) 1996-2023 The Squid Software Foundation and contributors
4 * Squid software is distributed under GPLv2+ license and includes
5 * contributions from numerous individuals and organizations.
6 * Please see the COPYING and CONTRIBUTORS files for details.
10 #include "base/CharacterSet.h"
11 #include "debug/Stream.h"
12 #include "http/one/Parser.h"
13 #include "mime_header.h"
14 #include "parser/Tokenizer.h"
15 #include "SquidConfig.h"
17 /// RFC 7230 section 2.6 - 7 magic octets
18 const SBuf
Http::One::Parser::Http1magic("HTTP/1.");
20 const SBuf
&Http::One::CrLf()
22 static const SBuf
crlf("\r\n");
27 Http::One::Parser::clear()
29 parsingStage_
= HTTP_PARSE_NONE
;
31 msgProtocol_
= AnyP::ProtocolVersion();
32 mimeHeaderBlock_
.clear();
35 /// characters HTTP permits tolerant parsers to accept as delimiters
36 static const CharacterSet
&
37 RelaxedDelimiterCharacters()
39 // RFC 7230 section 3.5
40 // tolerant parser MAY accept any of SP, HTAB, VT (%x0B), FF (%x0C),
41 // or bare CR as whitespace between request-line fields
42 static const CharacterSet RelaxedDels
=
45 CharacterSet("VT,FF","\x0B\x0C") +
46 CharacterSet::CR
).rename("relaxed-WSP");
52 Http::One::Parser::WhitespaceCharacters()
54 return Config
.onoff
.relaxed_header_parser
?
55 RelaxedDelimiterCharacters() : CharacterSet::WSP
;
59 Http::One::Parser::DelimiterCharacters()
61 return Config
.onoff
.relaxed_header_parser
?
62 RelaxedDelimiterCharacters() : CharacterSet::SP
;
66 Http::One::Parser::skipLineTerminator(Tokenizer
&tok
) const
68 if (tok
.skip(Http1::CrLf()))
71 if (Config
.onoff
.relaxed_header_parser
&& tok
.skipOne(CharacterSet::LF
))
74 if (tok
.atEnd() || (tok
.remaining().length() == 1 && tok
.remaining().at(0) == '\r'))
75 throw InsufficientInput();
77 throw TexcHere("garbage instead of CRLF line terminator");
80 /// all characters except the LF line terminator
81 static const CharacterSet
&
84 static const CharacterSet line
= CharacterSet::LF
.complement("non-LF");
89 * Remove invalid lines (if any) from the mime prefix
92 * "A recipient that receives whitespace between the start-line and
93 * the first header field MUST ... consume each whitespace-preceded
94 * line without further processing of it."
96 * We need to always use the relaxed delimiters here to prevent
97 * line smuggling through strict parsers.
99 * Note that 'whitespace' in RFC 7230 includes CR. So that means
100 * sequences of CRLF will be pruned, but not sequences of bare-LF.
103 Http::One::Parser::cleanMimePrefix()
105 Tokenizer
tok(mimeHeaderBlock_
);
106 while (tok
.skipOne(RelaxedDelimiterCharacters())) {
107 (void)tok
.skipAll(LineCharacters()); // optional line content
108 // LF terminator is required.
109 // trust headersEnd() to ensure that we have at least one LF
110 (void)tok
.skipOne(CharacterSet::LF
);
113 // If mimeHeaderBlock_ had just whitespace line(s) followed by CRLF,
114 // then we skipped everything, including that terminating LF.
115 // Restore the terminating CRLF if needed.
117 mimeHeaderBlock_
= Http1::CrLf();
119 mimeHeaderBlock_
= tok
.remaining();
120 // now mimeHeaderBlock_ has 0+ fields followed by the LF terminator
124 * Replace obs-fold with a single SP,
126 * RFC 7230 section 3.2.4
127 * "A server that receives an obs-fold in a request message that is not
128 * within a message/http container MUST ... replace
129 * each received obs-fold with one or more SP octets prior to
130 * interpreting the field value or forwarding the message downstream."
132 * "A proxy or gateway that receives an obs-fold in a response message
133 * that is not within a message/http container MUST ... replace each
134 * received obs-fold with one or more SP octets prior to interpreting
135 * the field value or forwarding the message downstream."
138 Http::One::Parser::unfoldMime()
140 Tokenizer
tok(mimeHeaderBlock_
);
141 const auto szLimit
= mimeHeaderBlock_
.length();
142 mimeHeaderBlock_
.clear();
143 // prevent the mime sender being able to make append() realloc/grow multiple times.
144 mimeHeaderBlock_
.reserveSpace(szLimit
);
146 static const CharacterSet nonCRLF
= (CharacterSet::CR
+ CharacterSet::LF
).complement().rename("non-CRLF");
148 while (!tok
.atEnd()) {
149 const SBuf
all(tok
.remaining());
150 const auto blobLen
= tok
.skipAll(nonCRLF
); // may not be there
151 const auto crLen
= tok
.skipAll(CharacterSet::CR
); // may not be there
152 const auto lfLen
= tok
.skipOne(CharacterSet::LF
); // may not be there
154 if (lfLen
&& tok
.skipAll(CharacterSet::WSP
)) { // obs-fold!
155 mimeHeaderBlock_
.append(all
.substr(0, blobLen
));
156 mimeHeaderBlock_
.append(' '); // replace one obs-fold with one SP
158 mimeHeaderBlock_
.append(all
.substr(0, blobLen
+ crLen
+ lfLen
));
163 Http::One::Parser::grabMimeBlock(const char *which
, const size_t limit
)
165 // MIME headers block exist in (only) HTTP/1.x and ICY
166 const bool expectMime
= (msgProtocol_
.protocol
== AnyP::PROTO_HTTP
&& msgProtocol_
.major
== 1) ||
167 msgProtocol_
.protocol
== AnyP::PROTO_ICY
||
171 /* NOTE: HTTP/0.9 messages do not have a mime header block.
172 * So the rest of the code will need to deal with '0'-byte headers
173 * (ie, none, so don't try parsing em)
175 bool containsObsFold
;
176 if (SBuf::size_type mimeHeaderBytes
= headersEnd(buf_
, containsObsFold
)) {
178 // Squid could handle these headers, but admin does not want to
179 if (firstLineSize() + mimeHeaderBytes
>= limit
) {
180 debugs(33, 5, "Too large " << which
);
181 parseStatusCode
= Http::scHeaderTooLarge
;
182 buf_
.consume(mimeHeaderBytes
);
183 parsingStage_
= HTTP_PARSE_DONE
;
187 mimeHeaderBlock_
= buf_
.consume(mimeHeaderBytes
);
192 debugs(74, 5, "mime header (0-" << mimeHeaderBytes
<< ") {" << mimeHeaderBlock_
<< "}");
194 } else { // headersEnd() == 0
195 if (buf_
.length()+firstLineSize() >= limit
) {
196 debugs(33, 5, "Too large " << which
);
197 parseStatusCode
= Http::scHeaderTooLarge
;
198 parsingStage_
= HTTP_PARSE_DONE
;
200 debugs(33, 5, "Incomplete " << which
<< ", waiting for end of headers");
205 debugs(33, 3, "Missing HTTP/1.x identifier");
207 // NP: we do not do any further stages here yet so go straight to DONE
208 parsingStage_
= HTTP_PARSE_DONE
;
213 // arbitrary maximum-length for headers which can be found by Http1Parser::getHostHeaderField()
214 #define GET_HDR_SZ 1024
216 // BUG: returns only the first header line with given name,
217 // ignores multi-line headers and obs-fold headers
219 Http::One::Parser::getHostHeaderField()
221 if (!headerBlockSize())
224 LOCAL_ARRAY(char, header
, GET_HDR_SZ
);
225 const char *name
= "Host";
226 const int namelen
= strlen(name
);
228 debugs(25, 5, "looking for " << name
);
230 // while we can find more LF in the SBuf
231 Tokenizer
tok(mimeHeaderBlock_
);
234 while (tok
.prefix(p
, LineCharacters())) {
235 if (!tok
.skipOne(CharacterSet::LF
)) // move tokenizer past the LF
236 break; // error. reached invalid octet or end of buffer instead of an LF ??
238 // header lines must start with the name (case insensitive)
239 if (p
.substr(0, namelen
).caseCmp(name
, namelen
))
243 if (p
[namelen
] != ':')
246 // drop any trailing *CR sequence
247 p
.trim(Http1::CrLf(), false, true);
249 debugs(25, 5, "checking " << p
);
250 p
.consume(namelen
+ 1);
252 // TODO: optimize SBuf::trim to take CharacterSet directly
254 t
.skipAll(CharacterSet::WSP
);
257 // prevent buffer overrun on char header[];
258 p
.chop(0, sizeof(header
)-1);
260 // currently only used for pre-parse Host header, ensure valid domain[:port] or ip[:port]
261 static const auto hostChars
= CharacterSet("host",":[].-_") + CharacterSet::ALPHA
+ CharacterSet::DIGIT
;
262 if (p
.findFirstNotOf(hostChars
) != SBuf::npos
)
263 break; // error. line contains character not accepted in Host header
265 // return the header field-value
266 SBufToCstring(header
, p
);
267 debugs(25, 5, "returning " << header
);
275 Http::One::ErrorLevel()
277 return Config
.onoff
.relaxed_header_parser
< 0 ? DBG_IMPORTANT
: 5;
280 // BWS = *( SP / HTAB ) ; WhitespaceCharacters() may relax this RFC 7230 rule
282 Http::One::ParseBws(Parser::Tokenizer
&tok
)
284 const auto count
= tok
.skipAll(Parser::WhitespaceCharacters());
287 throw InsufficientInput(); // even if count is positive
290 // Generating BWS is a MUST-level violation so warn about it as needed.
291 debugs(33, ErrorLevel(), "found " << count
<< " BWS octets");
292 // RFC 7230 says we MUST parse BWS, so we fall through even if
293 // Config.onoff.relaxed_header_parser is off.
295 // else we successfully "parsed" an empty BWS sequence
297 // success: no more BWS characters expected