]>
Commit | Line | Data |
---|---|---|
48a37aee | 1 | /* |
bf95c10a | 2 | * Copyright (C) 1996-2022 The Squid Software Foundation and contributors |
48a37aee AJ |
3 | * |
4 | * Squid software is distributed under GPLv2+ license and includes | |
5 | * contributions from numerous individuals and organizations. | |
6 | * Please see the COPYING and CONTRIBUTORS files for details. | |
7 | */ | |
8 | ||
c99510dd | 9 | #include "squid.h" |
417da400 | 10 | #include "base/CharacterSet.h" |
675b8408 | 11 | #include "debug/Stream.h" |
c99510dd | 12 | #include "http/one/Parser.h" |
f1d5359e | 13 | #include "mime_header.h" |
417da400 | 14 | #include "parser/Tokenizer.h" |
b8f86fd2 | 15 | #include "SquidConfig.h" |
c99510dd | 16 | |
9651320a AJ |
17 | /// RFC 7230 section 2.6 - 7 magic octets |
18 | const SBuf Http::One::Parser::Http1magic("HTTP/1."); | |
19 | ||
00237269 AJ |
20 | const SBuf &Http::One::CrLf() |
21 | { | |
22 | static const SBuf crlf("\r\n"); | |
23 | return crlf; | |
24 | } | |
25 | ||
c99510dd AJ |
26 | void |
27 | Http::One::Parser::clear() | |
28 | { | |
29 | parsingStage_ = HTTP_PARSE_NONE; | |
aee3523a | 30 | buf_ = nullptr; |
c99510dd AJ |
31 | msgProtocol_ = AnyP::ProtocolVersion(); |
32 | mimeHeaderBlock_.clear(); | |
33 | } | |
34 | ||
00237269 AJ |
35 | /// characters HTTP permits tolerant parsers to accept as delimiters |
36 | static const CharacterSet & | |
37 | RelaxedDelimiterCharacters() | |
38 | { | |
39 | // RFC 7230 section 3.5 | |
40 | // tolerant parser MAY accept any of SP, HTAB, VT (%x0B), FF (%x0C), | |
41 | // or bare CR as whitespace between request-line fields | |
42 | static const CharacterSet RelaxedDels = | |
43 | (CharacterSet::SP + | |
44 | CharacterSet::HTAB + | |
45 | CharacterSet("VT,FF","\x0B\x0C") + | |
46 | CharacterSet::CR).rename("relaxed-WSP"); | |
47 | ||
48 | return RelaxedDels; | |
49 | } | |
50 | ||
26f0a359 AR |
51 | const CharacterSet & |
52 | Http::One::Parser::WhitespaceCharacters() | |
53 | { | |
54 | return Config.onoff.relaxed_header_parser ? | |
55 | RelaxedDelimiterCharacters() : CharacterSet::WSP; | |
56 | } | |
57 | ||
00237269 AJ |
58 | const CharacterSet & |
59 | Http::One::Parser::DelimiterCharacters() | |
60 | { | |
61 | return Config.onoff.relaxed_header_parser ? | |
62 | RelaxedDelimiterCharacters() : CharacterSet::SP; | |
63 | } | |
64 | ||
417da400 EB |
65 | void |
66 | Http::One::Parser::skipLineTerminator(Tokenizer &tok) const | |
f1d5359e | 67 | { |
00237269 | 68 | if (tok.skip(Http1::CrLf())) |
417da400 | 69 | return; |
b8f86fd2 AJ |
70 | |
71 | if (Config.onoff.relaxed_header_parser && tok.skipOne(CharacterSet::LF)) | |
417da400 | 72 | return; |
b8f86fd2 | 73 | |
188ad27f | 74 | if (tok.atEnd() || (tok.remaining().length() == 1 && tok.remaining().at(0) == '\r')) |
417da400 | 75 | throw InsufficientInput(); |
188ad27f AJ |
76 | |
77 | throw TexcHere("garbage instead of CRLF line terminator"); | |
b8f86fd2 AJ |
78 | } |
79 | ||
00237269 AJ |
80 | /// all characters except the LF line terminator |
81 | static const CharacterSet & | |
82 | LineCharacters() | |
83 | { | |
84 | static const CharacterSet line = CharacterSet::LF.complement("non-LF"); | |
85 | return line; | |
86 | } | |
87 | ||
88 | /** | |
89 | * Remove invalid lines (if any) from the mime prefix | |
90 | * | |
91 | * RFC 7230 section 3: | |
92 | * "A recipient that receives whitespace between the start-line and | |
93 | * the first header field MUST ... consume each whitespace-preceded | |
94 | * line without further processing of it." | |
95 | * | |
96 | * We need to always use the relaxed delimiters here to prevent | |
97 | * line smuggling through strict parsers. | |
98 | * | |
99 | * Note that 'whitespace' in RFC 7230 includes CR. So that means | |
100 | * sequences of CRLF will be pruned, but not sequences of bare-LF. | |
101 | */ | |
102 | void | |
103 | Http::One::Parser::cleanMimePrefix() | |
104 | { | |
417da400 | 105 | Tokenizer tok(mimeHeaderBlock_); |
00237269 AJ |
106 | while (tok.skipOne(RelaxedDelimiterCharacters())) { |
107 | (void)tok.skipAll(LineCharacters()); // optional line content | |
108 | // LF terminator is required. | |
109 | // trust headersEnd() to ensure that we have at least one LF | |
110 | (void)tok.skipOne(CharacterSet::LF); | |
111 | } | |
112 | ||
113 | // If mimeHeaderBlock_ had just whitespace line(s) followed by CRLF, | |
114 | // then we skipped everything, including that terminating LF. | |
115 | // Restore the terminating CRLF if needed. | |
116 | if (tok.atEnd()) | |
117 | mimeHeaderBlock_ = Http1::CrLf(); | |
118 | else | |
119 | mimeHeaderBlock_ = tok.remaining(); | |
120 | // now mimeHeaderBlock_ has 0+ fields followed by the LF terminator | |
121 | } | |
122 | ||
123 | /** | |
124 | * Replace obs-fold with a single SP, | |
125 | * | |
126 | * RFC 7230 section 3.2.4 | |
127 | * "A server that receives an obs-fold in a request message that is not | |
128 | * within a message/http container MUST ... replace | |
129 | * each received obs-fold with one or more SP octets prior to | |
130 | * interpreting the field value or forwarding the message downstream." | |
131 | * | |
132 | * "A proxy or gateway that receives an obs-fold in a response message | |
133 | * that is not within a message/http container MUST ... replace each | |
134 | * received obs-fold with one or more SP octets prior to interpreting | |
135 | * the field value or forwarding the message downstream." | |
136 | */ | |
137 | void | |
138 | Http::One::Parser::unfoldMime() | |
139 | { | |
417da400 | 140 | Tokenizer tok(mimeHeaderBlock_); |
00237269 AJ |
141 | const auto szLimit = mimeHeaderBlock_.length(); |
142 | mimeHeaderBlock_.clear(); | |
143 | // prevent the mime sender being able to make append() realloc/grow multiple times. | |
144 | mimeHeaderBlock_.reserveSpace(szLimit); | |
145 | ||
146 | static const CharacterSet nonCRLF = (CharacterSet::CR + CharacterSet::LF).complement().rename("non-CRLF"); | |
147 | ||
148 | while (!tok.atEnd()) { | |
149 | const SBuf all(tok.remaining()); | |
150 | const auto blobLen = tok.skipAll(nonCRLF); // may not be there | |
151 | const auto crLen = tok.skipAll(CharacterSet::CR); // may not be there | |
152 | const auto lfLen = tok.skipOne(CharacterSet::LF); // may not be there | |
153 | ||
154 | if (lfLen && tok.skipAll(CharacterSet::WSP)) { // obs-fold! | |
155 | mimeHeaderBlock_.append(all.substr(0, blobLen)); | |
156 | mimeHeaderBlock_.append(' '); // replace one obs-fold with one SP | |
157 | } else | |
158 | mimeHeaderBlock_.append(all.substr(0, blobLen + crLen + lfLen)); | |
159 | } | |
160 | } | |
161 | ||
b8f86fd2 | 162 | bool |
f8cab755 | 163 | Http::One::Parser::grabMimeBlock(const char *which, const size_t limit) |
b8f86fd2 AJ |
164 | { |
165 | // MIME headers block exist in (only) HTTP/1.x and ICY | |
166 | const bool expectMime = (msgProtocol_.protocol == AnyP::PROTO_HTTP && msgProtocol_.major == 1) || | |
e47e0802 AJ |
167 | msgProtocol_.protocol == AnyP::PROTO_ICY || |
168 | hackExpectsMime_; | |
b8f86fd2 AJ |
169 | |
170 | if (expectMime) { | |
f1d5359e AJ |
171 | /* NOTE: HTTP/0.9 messages do not have a mime header block. |
172 | * So the rest of the code will need to deal with '0'-byte headers | |
173 | * (ie, none, so don't try parsing em) | |
174 | */ | |
00237269 AJ |
175 | bool containsObsFold; |
176 | if (SBuf::size_type mimeHeaderBytes = headersEnd(buf_, containsObsFold)) { | |
f8cab755 AJ |
177 | |
178 | // Squid could handle these headers, but admin does not want to | |
179 | if (firstLineSize() + mimeHeaderBytes >= limit) { | |
180 | debugs(33, 5, "Too large " << which); | |
181 | parseStatusCode = Http::scHeaderTooLarge; | |
182 | buf_.consume(mimeHeaderBytes); | |
183 | parsingStage_ = HTTP_PARSE_DONE; | |
184 | return false; | |
185 | } | |
186 | ||
187 | mimeHeaderBlock_ = buf_.consume(mimeHeaderBytes); | |
00237269 AJ |
188 | cleanMimePrefix(); |
189 | if (containsObsFold) | |
190 | unfoldMime(); | |
191 | ||
f8cab755 AJ |
192 | debugs(74, 5, "mime header (0-" << mimeHeaderBytes << ") {" << mimeHeaderBlock_ << "}"); |
193 | ||
194 | } else { // headersEnd() == 0 | |
f1d5359e AJ |
195 | if (buf_.length()+firstLineSize() >= limit) { |
196 | debugs(33, 5, "Too large " << which); | |
197 | parseStatusCode = Http::scHeaderTooLarge; | |
198 | parsingStage_ = HTTP_PARSE_DONE; | |
199 | } else | |
200 | debugs(33, 5, "Incomplete " << which << ", waiting for end of headers"); | |
201 | return false; | |
202 | } | |
b8f86fd2 | 203 | |
f1d5359e AJ |
204 | } else |
205 | debugs(33, 3, "Missing HTTP/1.x identifier"); | |
206 | ||
207 | // NP: we do not do any further stages here yet so go straight to DONE | |
208 | parsingStage_ = HTTP_PARSE_DONE; | |
209 | ||
f1d5359e AJ |
210 | return true; |
211 | } | |
212 | ||
2a51e34e | 213 | // arbitrary maximum-length for headers which can be found by Http1Parser::getHostHeaderField() |
f53969cc | 214 | #define GET_HDR_SZ 1024 |
c99510dd | 215 | |
687696c1 AJ |
216 | // BUG: returns only the first header line with given name, |
217 | // ignores multi-line headers and obs-fold headers | |
c99510dd | 218 | char * |
2a51e34e | 219 | Http::One::Parser::getHostHeaderField() |
c99510dd | 220 | { |
2a51e34e | 221 | if (!headerBlockSize()) |
aee3523a | 222 | return nullptr; |
c99510dd | 223 | |
687696c1 | 224 | LOCAL_ARRAY(char, header, GET_HDR_SZ); |
2a51e34e | 225 | const char *name = "Host"; |
1296170f | 226 | const int namelen = strlen(name); |
687696c1 | 227 | |
f6c7fa03 | 228 | debugs(25, 5, "looking for " << name); |
c99510dd | 229 | |
f6c7fa03 | 230 | // while we can find more LF in the SBuf |
417da400 | 231 | Tokenizer tok(mimeHeaderBlock_); |
687696c1 | 232 | SBuf p; |
c99510dd | 233 | |
00237269 | 234 | while (tok.prefix(p, LineCharacters())) { |
2d40b13f | 235 | if (!tok.skipOne(CharacterSet::LF)) // move tokenizer past the LF |
2f8abb64 | 236 | break; // error. reached invalid octet or end of buffer instead of an LF ?? |
c99510dd | 237 | |
687696c1 AJ |
238 | // header lines must start with the name (case insensitive) |
239 | if (p.substr(0, namelen).caseCmp(name, namelen)) | |
c99510dd AJ |
240 | continue; |
241 | ||
687696c1 AJ |
242 | // then a COLON |
243 | if (p[namelen] != ':') | |
c99510dd AJ |
244 | continue; |
245 | ||
687696c1 | 246 | // drop any trailing *CR sequence |
00237269 | 247 | p.trim(Http1::CrLf(), false, true); |
c99510dd | 248 | |
687696c1 AJ |
249 | debugs(25, 5, "checking " << p); |
250 | p.consume(namelen + 1); | |
c99510dd | 251 | |
687696c1 | 252 | // TODO: optimize SBuf::trim to take CharacterSet directly |
417da400 | 253 | Tokenizer t(p); |
9bafa70d | 254 | t.skipAll(CharacterSet::WSP); |
687696c1 | 255 | p = t.remaining(); |
c99510dd | 256 | |
687696c1 AJ |
257 | // prevent buffer overrun on char header[]; |
258 | p.chop(0, sizeof(header)-1); | |
c99510dd | 259 | |
2a51e34e AJ |
260 | // currently only used for pre-parse Host header, ensure valid domain[:port] or ip[:port] |
261 | static const auto hostChars = CharacterSet("host",":[].-_") + CharacterSet::ALPHA + CharacterSet::DIGIT; | |
262 | if (p.findFirstNotOf(hostChars) != SBuf::npos) | |
263 | break; // error. line contains character not accepted in Host header | |
264 | ||
687696c1 | 265 | // return the header field-value |
3f0e38d6 | 266 | SBufToCstring(header, p); |
f6c7fa03 | 267 | debugs(25, 5, "returning " << header); |
687696c1 | 268 | return header; |
c99510dd AJ |
269 | } |
270 | ||
aee3523a | 271 | return nullptr; |
c99510dd | 272 | } |
f53969cc | 273 | |
9a4b5048 | 274 | int |
26f0a359 | 275 | Http::One::ErrorLevel() |
9a4b5048 AJ |
276 | { |
277 | return Config.onoff.relaxed_header_parser < 0 ? DBG_IMPORTANT : 5; | |
278 | } | |
2c4e5226 | 279 | |
26f0a359 | 280 | // BWS = *( SP / HTAB ) ; WhitespaceCharacters() may relax this RFC 7230 rule |
417da400 EB |
281 | void |
282 | Http::One::ParseBws(Parser::Tokenizer &tok) | |
26f0a359 | 283 | { |
417da400 EB |
284 | const auto count = tok.skipAll(Parser::WhitespaceCharacters()); |
285 | ||
286 | if (tok.atEnd()) | |
287 | throw InsufficientInput(); // even if count is positive | |
288 | ||
289 | if (count) { | |
26f0a359 AR |
290 | // Generating BWS is a MUST-level violation so warn about it as needed. |
291 | debugs(33, ErrorLevel(), "found " << count << " BWS octets"); | |
292 | // RFC 7230 says we MUST parse BWS, so we fall through even if | |
293 | // Config.onoff.relaxed_header_parser is off. | |
294 | } | |
295 | // else we successfully "parsed" an empty BWS sequence | |
296 | ||
417da400 | 297 | // success: no more BWS characters expected |
26f0a359 | 298 | } |
cae5602c | 299 |