]>
Commit | Line | Data |
---|---|---|
48a37aee | 1 | /* |
5b74111a | 2 | * Copyright (C) 1996-2018 The Squid Software Foundation and contributors |
48a37aee AJ |
3 | * |
4 | * Squid software is distributed under GPLv2+ license and includes | |
5 | * contributions from numerous individuals and organizations. | |
6 | * Please see the COPYING and CONTRIBUTORS files for details. | |
7 | */ | |
8 | ||
c99510dd AJ |
9 | #include "squid.h" |
10 | #include "Debug.h" | |
11 | #include "http/one/Parser.h" | |
f29718b0 | 12 | #include "http/one/Tokenizer.h" |
f1d5359e | 13 | #include "mime_header.h" |
b8f86fd2 | 14 | #include "SquidConfig.h" |
c99510dd | 15 | |
9651320a AJ |
16 | /// RFC 7230 section 2.6 - 7 magic octets |
17 | const SBuf Http::One::Parser::Http1magic("HTTP/1."); | |
18 | ||
00237269 AJ |
19 | const SBuf &Http::One::CrLf() |
20 | { | |
21 | static const SBuf crlf("\r\n"); | |
22 | return crlf; | |
23 | } | |
24 | ||
c99510dd AJ |
25 | void |
26 | Http::One::Parser::clear() | |
27 | { | |
28 | parsingStage_ = HTTP_PARSE_NONE; | |
b749de75 | 29 | buf_ = NULL; |
c99510dd AJ |
30 | msgProtocol_ = AnyP::ProtocolVersion(); |
31 | mimeHeaderBlock_.clear(); | |
32 | } | |
33 | ||
00237269 AJ |
34 | /// characters HTTP permits tolerant parsers to accept as delimiters |
35 | static const CharacterSet & | |
36 | RelaxedDelimiterCharacters() | |
37 | { | |
38 | // RFC 7230 section 3.5 | |
39 | // tolerant parser MAY accept any of SP, HTAB, VT (%x0B), FF (%x0C), | |
40 | // or bare CR as whitespace between request-line fields | |
41 | static const CharacterSet RelaxedDels = | |
42 | (CharacterSet::SP + | |
43 | CharacterSet::HTAB + | |
44 | CharacterSet("VT,FF","\x0B\x0C") + | |
45 | CharacterSet::CR).rename("relaxed-WSP"); | |
46 | ||
47 | return RelaxedDels; | |
48 | } | |
49 | ||
26f0a359 AR |
50 | const CharacterSet & |
51 | Http::One::Parser::WhitespaceCharacters() | |
52 | { | |
53 | return Config.onoff.relaxed_header_parser ? | |
54 | RelaxedDelimiterCharacters() : CharacterSet::WSP; | |
55 | } | |
56 | ||
00237269 AJ |
57 | const CharacterSet & |
58 | Http::One::Parser::DelimiterCharacters() | |
59 | { | |
60 | return Config.onoff.relaxed_header_parser ? | |
61 | RelaxedDelimiterCharacters() : CharacterSet::SP; | |
62 | } | |
63 | ||
f1d5359e | 64 | bool |
f29718b0 | 65 | Http::One::Parser::skipLineTerminator(Http1::Tokenizer &tok) const |
f1d5359e | 66 | { |
00237269 | 67 | if (tok.skip(Http1::CrLf())) |
b8f86fd2 AJ |
68 | return true; |
69 | ||
70 | if (Config.onoff.relaxed_header_parser && tok.skipOne(CharacterSet::LF)) | |
71 | return true; | |
72 | ||
188ad27f AJ |
73 | if (tok.atEnd() || (tok.remaining().length() == 1 && tok.remaining().at(0) == '\r')) |
74 | return false; // need more data | |
75 | ||
76 | throw TexcHere("garbage instead of CRLF line terminator"); | |
77 | return false; // unreachable, but make naive compilers happy | |
b8f86fd2 AJ |
78 | } |
79 | ||
00237269 AJ |
80 | /// all characters except the LF line terminator |
81 | static const CharacterSet & | |
82 | LineCharacters() | |
83 | { | |
84 | static const CharacterSet line = CharacterSet::LF.complement("non-LF"); | |
85 | return line; | |
86 | } | |
87 | ||
88 | /** | |
89 | * Remove invalid lines (if any) from the mime prefix | |
90 | * | |
91 | * RFC 7230 section 3: | |
92 | * "A recipient that receives whitespace between the start-line and | |
93 | * the first header field MUST ... consume each whitespace-preceded | |
94 | * line without further processing of it." | |
95 | * | |
96 | * We need to always use the relaxed delimiters here to prevent | |
97 | * line smuggling through strict parsers. | |
98 | * | |
99 | * Note that 'whitespace' in RFC 7230 includes CR. So that means | |
100 | * sequences of CRLF will be pruned, but not sequences of bare-LF. | |
101 | */ | |
102 | void | |
103 | Http::One::Parser::cleanMimePrefix() | |
104 | { | |
105 | Http1::Tokenizer tok(mimeHeaderBlock_); | |
106 | while (tok.skipOne(RelaxedDelimiterCharacters())) { | |
107 | (void)tok.skipAll(LineCharacters()); // optional line content | |
108 | // LF terminator is required. | |
109 | // trust headersEnd() to ensure that we have at least one LF | |
110 | (void)tok.skipOne(CharacterSet::LF); | |
111 | } | |
112 | ||
113 | // If mimeHeaderBlock_ had just whitespace line(s) followed by CRLF, | |
114 | // then we skipped everything, including that terminating LF. | |
115 | // Restore the terminating CRLF if needed. | |
116 | if (tok.atEnd()) | |
117 | mimeHeaderBlock_ = Http1::CrLf(); | |
118 | else | |
119 | mimeHeaderBlock_ = tok.remaining(); | |
120 | // now mimeHeaderBlock_ has 0+ fields followed by the LF terminator | |
121 | } | |
122 | ||
123 | /** | |
124 | * Replace obs-fold with a single SP, | |
125 | * | |
126 | * RFC 7230 section 3.2.4 | |
127 | * "A server that receives an obs-fold in a request message that is not | |
128 | * within a message/http container MUST ... replace | |
129 | * each received obs-fold with one or more SP octets prior to | |
130 | * interpreting the field value or forwarding the message downstream." | |
131 | * | |
132 | * "A proxy or gateway that receives an obs-fold in a response message | |
133 | * that is not within a message/http container MUST ... replace each | |
134 | * received obs-fold with one or more SP octets prior to interpreting | |
135 | * the field value or forwarding the message downstream." | |
136 | */ | |
137 | void | |
138 | Http::One::Parser::unfoldMime() | |
139 | { | |
140 | Http1::Tokenizer tok(mimeHeaderBlock_); | |
141 | const auto szLimit = mimeHeaderBlock_.length(); | |
142 | mimeHeaderBlock_.clear(); | |
143 | // prevent the mime sender being able to make append() realloc/grow multiple times. | |
144 | mimeHeaderBlock_.reserveSpace(szLimit); | |
145 | ||
146 | static const CharacterSet nonCRLF = (CharacterSet::CR + CharacterSet::LF).complement().rename("non-CRLF"); | |
147 | ||
148 | while (!tok.atEnd()) { | |
149 | const SBuf all(tok.remaining()); | |
150 | const auto blobLen = tok.skipAll(nonCRLF); // may not be there | |
151 | const auto crLen = tok.skipAll(CharacterSet::CR); // may not be there | |
152 | const auto lfLen = tok.skipOne(CharacterSet::LF); // may not be there | |
153 | ||
154 | if (lfLen && tok.skipAll(CharacterSet::WSP)) { // obs-fold! | |
155 | mimeHeaderBlock_.append(all.substr(0, blobLen)); | |
156 | mimeHeaderBlock_.append(' '); // replace one obs-fold with one SP | |
157 | } else | |
158 | mimeHeaderBlock_.append(all.substr(0, blobLen + crLen + lfLen)); | |
159 | } | |
160 | } | |
161 | ||
b8f86fd2 | 162 | bool |
f8cab755 | 163 | Http::One::Parser::grabMimeBlock(const char *which, const size_t limit) |
b8f86fd2 AJ |
164 | { |
165 | // MIME headers block exist in (only) HTTP/1.x and ICY | |
166 | const bool expectMime = (msgProtocol_.protocol == AnyP::PROTO_HTTP && msgProtocol_.major == 1) || | |
e47e0802 AJ |
167 | msgProtocol_.protocol == AnyP::PROTO_ICY || |
168 | hackExpectsMime_; | |
b8f86fd2 AJ |
169 | |
170 | if (expectMime) { | |
f1d5359e AJ |
171 | /* NOTE: HTTP/0.9 messages do not have a mime header block. |
172 | * So the rest of the code will need to deal with '0'-byte headers | |
173 | * (ie, none, so don't try parsing em) | |
174 | */ | |
00237269 AJ |
175 | bool containsObsFold; |
176 | if (SBuf::size_type mimeHeaderBytes = headersEnd(buf_, containsObsFold)) { | |
f8cab755 AJ |
177 | |
178 | // Squid could handle these headers, but admin does not want to | |
179 | if (firstLineSize() + mimeHeaderBytes >= limit) { | |
180 | debugs(33, 5, "Too large " << which); | |
181 | parseStatusCode = Http::scHeaderTooLarge; | |
182 | buf_.consume(mimeHeaderBytes); | |
183 | parsingStage_ = HTTP_PARSE_DONE; | |
184 | return false; | |
185 | } | |
186 | ||
187 | mimeHeaderBlock_ = buf_.consume(mimeHeaderBytes); | |
00237269 AJ |
188 | cleanMimePrefix(); |
189 | if (containsObsFold) | |
190 | unfoldMime(); | |
191 | ||
f8cab755 AJ |
192 | debugs(74, 5, "mime header (0-" << mimeHeaderBytes << ") {" << mimeHeaderBlock_ << "}"); |
193 | ||
194 | } else { // headersEnd() == 0 | |
f1d5359e AJ |
195 | if (buf_.length()+firstLineSize() >= limit) { |
196 | debugs(33, 5, "Too large " << which); | |
197 | parseStatusCode = Http::scHeaderTooLarge; | |
198 | parsingStage_ = HTTP_PARSE_DONE; | |
199 | } else | |
200 | debugs(33, 5, "Incomplete " << which << ", waiting for end of headers"); | |
201 | return false; | |
202 | } | |
b8f86fd2 | 203 | |
f1d5359e AJ |
204 | } else |
205 | debugs(33, 3, "Missing HTTP/1.x identifier"); | |
206 | ||
207 | // NP: we do not do any further stages here yet so go straight to DONE | |
208 | parsingStage_ = HTTP_PARSE_DONE; | |
209 | ||
f1d5359e AJ |
210 | return true; |
211 | } | |
212 | ||
c99510dd | 213 | // arbitrary maximum-length for headers which can be found by Http1Parser::getHeaderField() |
f53969cc | 214 | #define GET_HDR_SZ 1024 |
c99510dd | 215 | |
687696c1 AJ |
216 | // BUG: returns only the first header line with given name, |
217 | // ignores multi-line headers and obs-fold headers | |
c99510dd AJ |
218 | char * |
219 | Http::One::Parser::getHeaderField(const char *name) | |
220 | { | |
c99510dd AJ |
221 | if (!headerBlockSize() || !name) |
222 | return NULL; | |
223 | ||
687696c1 | 224 | LOCAL_ARRAY(char, header, GET_HDR_SZ); |
1296170f | 225 | const int namelen = strlen(name); |
687696c1 | 226 | |
f6c7fa03 | 227 | debugs(25, 5, "looking for " << name); |
c99510dd | 228 | |
f6c7fa03 | 229 | // while we can find more LF in the SBuf |
f29718b0 | 230 | Http1::Tokenizer tok(mimeHeaderBlock_); |
687696c1 | 231 | SBuf p; |
c99510dd | 232 | |
00237269 | 233 | while (tok.prefix(p, LineCharacters())) { |
2d40b13f AJ |
234 | if (!tok.skipOne(CharacterSet::LF)) // move tokenizer past the LF |
235 | break; // error. reached invalid octet or end of buffer insted of an LF ?? | |
c99510dd | 236 | |
687696c1 AJ |
237 | // header lines must start with the name (case insensitive) |
238 | if (p.substr(0, namelen).caseCmp(name, namelen)) | |
c99510dd AJ |
239 | continue; |
240 | ||
687696c1 AJ |
241 | // then a COLON |
242 | if (p[namelen] != ':') | |
c99510dd AJ |
243 | continue; |
244 | ||
687696c1 | 245 | // drop any trailing *CR sequence |
00237269 | 246 | p.trim(Http1::CrLf(), false, true); |
c99510dd | 247 | |
687696c1 AJ |
248 | debugs(25, 5, "checking " << p); |
249 | p.consume(namelen + 1); | |
c99510dd | 250 | |
687696c1 | 251 | // TODO: optimize SBuf::trim to take CharacterSet directly |
f29718b0 | 252 | Http1::Tokenizer t(p); |
9bafa70d | 253 | t.skipAll(CharacterSet::WSP); |
687696c1 | 254 | p = t.remaining(); |
c99510dd | 255 | |
687696c1 AJ |
256 | // prevent buffer overrun on char header[]; |
257 | p.chop(0, sizeof(header)-1); | |
c99510dd | 258 | |
687696c1 | 259 | // return the header field-value |
3f0e38d6 | 260 | SBufToCstring(header, p); |
f6c7fa03 | 261 | debugs(25, 5, "returning " << header); |
687696c1 | 262 | return header; |
c99510dd AJ |
263 | } |
264 | ||
265 | return NULL; | |
266 | } | |
f53969cc | 267 | |
9a4b5048 | 268 | int |
26f0a359 | 269 | Http::One::ErrorLevel() |
9a4b5048 AJ |
270 | { |
271 | return Config.onoff.relaxed_header_parser < 0 ? DBG_IMPORTANT : 5; | |
272 | } | |
2c4e5226 | 273 | |
26f0a359 AR |
274 | // BWS = *( SP / HTAB ) ; WhitespaceCharacters() may relax this RFC 7230 rule |
275 | bool | |
276 | Http::One::ParseBws(Tokenizer &tok) | |
277 | { | |
278 | if (const auto count = tok.skipAll(Parser::WhitespaceCharacters())) { | |
279 | // Generating BWS is a MUST-level violation so warn about it as needed. | |
280 | debugs(33, ErrorLevel(), "found " << count << " BWS octets"); | |
281 | // RFC 7230 says we MUST parse BWS, so we fall through even if | |
282 | // Config.onoff.relaxed_header_parser is off. | |
283 | } | |
284 | // else we successfully "parsed" an empty BWS sequence | |
285 | ||
286 | return true; | |
287 | } | |
cae5602c | 288 |