]>
Commit | Line | Data |
---|---|---|
1 | /* | |
2 | * Copyright (C) 1996-2017 The Squid Software Foundation and contributors | |
3 | * | |
4 | * Squid software is distributed under GPLv2+ license and includes | |
5 | * contributions from numerous individuals and organizations. | |
6 | * Please see the COPYING and CONTRIBUTORS files for details. | |
7 | */ | |
8 | ||
9 | #include "squid.h" | |
10 | #include "Debug.h" | |
11 | #include "http/one/Parser.h" | |
12 | #include "http/one/Tokenizer.h" | |
13 | #include "mime_header.h" | |
14 | #include "SquidConfig.h" | |
15 | ||
16 | /// RFC 7230 section 2.6 - 7 magic octets | |
17 | const SBuf Http::One::Parser::Http1magic("HTTP/1."); | |
18 | ||
19 | const SBuf &Http::One::CrLf() | |
20 | { | |
21 | static const SBuf crlf("\r\n"); | |
22 | return crlf; | |
23 | } | |
24 | ||
25 | void | |
26 | Http::One::Parser::clear() | |
27 | { | |
28 | parsingStage_ = HTTP_PARSE_NONE; | |
29 | buf_ = NULL; | |
30 | msgProtocol_ = AnyP::ProtocolVersion(); | |
31 | mimeHeaderBlock_.clear(); | |
32 | } | |
33 | ||
34 | /// characters HTTP permits tolerant parsers to accept as delimiters | |
35 | static const CharacterSet & | |
36 | RelaxedDelimiterCharacters() | |
37 | { | |
38 | // RFC 7230 section 3.5 | |
39 | // tolerant parser MAY accept any of SP, HTAB, VT (%x0B), FF (%x0C), | |
40 | // or bare CR as whitespace between request-line fields | |
41 | static const CharacterSet RelaxedDels = | |
42 | (CharacterSet::SP + | |
43 | CharacterSet::HTAB + | |
44 | CharacterSet("VT,FF","\x0B\x0C") + | |
45 | CharacterSet::CR).rename("relaxed-WSP"); | |
46 | ||
47 | return RelaxedDels; | |
48 | } | |
49 | ||
50 | /// characters used to separate HTTP fields | |
51 | const CharacterSet & | |
52 | Http::One::Parser::DelimiterCharacters() | |
53 | { | |
54 | return Config.onoff.relaxed_header_parser ? | |
55 | RelaxedDelimiterCharacters() : CharacterSet::SP; | |
56 | } | |
57 | ||
58 | bool | |
59 | Http::One::Parser::skipLineTerminator(Http1::Tokenizer &tok) const | |
60 | { | |
61 | if (tok.skip(Http1::CrLf())) | |
62 | return true; | |
63 | ||
64 | if (Config.onoff.relaxed_header_parser && tok.skipOne(CharacterSet::LF)) | |
65 | return true; | |
66 | ||
67 | if (tok.atEnd() || (tok.remaining().length() == 1 && tok.remaining().at(0) == '\r')) | |
68 | return false; // need more data | |
69 | ||
70 | throw TexcHere("garbage instead of CRLF line terminator"); | |
71 | return false; // unreachable, but make naive compilers happy | |
72 | } | |
73 | ||
74 | /// all characters except the LF line terminator | |
75 | static const CharacterSet & | |
76 | LineCharacters() | |
77 | { | |
78 | static const CharacterSet line = CharacterSet::LF.complement("non-LF"); | |
79 | return line; | |
80 | } | |
81 | ||
82 | /** | |
83 | * Remove invalid lines (if any) from the mime prefix | |
84 | * | |
85 | * RFC 7230 section 3: | |
86 | * "A recipient that receives whitespace between the start-line and | |
87 | * the first header field MUST ... consume each whitespace-preceded | |
88 | * line without further processing of it." | |
89 | * | |
90 | * We need to always use the relaxed delimiters here to prevent | |
91 | * line smuggling through strict parsers. | |
92 | * | |
93 | * Note that 'whitespace' in RFC 7230 includes CR. So that means | |
94 | * sequences of CRLF will be pruned, but not sequences of bare-LF. | |
95 | */ | |
96 | void | |
97 | Http::One::Parser::cleanMimePrefix() | |
98 | { | |
99 | Http1::Tokenizer tok(mimeHeaderBlock_); | |
100 | while (tok.skipOne(RelaxedDelimiterCharacters())) { | |
101 | (void)tok.skipAll(LineCharacters()); // optional line content | |
102 | // LF terminator is required. | |
103 | // trust headersEnd() to ensure that we have at least one LF | |
104 | (void)tok.skipOne(CharacterSet::LF); | |
105 | } | |
106 | ||
107 | // If mimeHeaderBlock_ had just whitespace line(s) followed by CRLF, | |
108 | // then we skipped everything, including that terminating LF. | |
109 | // Restore the terminating CRLF if needed. | |
110 | if (tok.atEnd()) | |
111 | mimeHeaderBlock_ = Http1::CrLf(); | |
112 | else | |
113 | mimeHeaderBlock_ = tok.remaining(); | |
114 | // now mimeHeaderBlock_ has 0+ fields followed by the LF terminator | |
115 | } | |
116 | ||
117 | /** | |
118 | * Replace obs-fold with a single SP, | |
119 | * | |
120 | * RFC 7230 section 3.2.4 | |
121 | * "A server that receives an obs-fold in a request message that is not | |
122 | * within a message/http container MUST ... replace | |
123 | * each received obs-fold with one or more SP octets prior to | |
124 | * interpreting the field value or forwarding the message downstream." | |
125 | * | |
126 | * "A proxy or gateway that receives an obs-fold in a response message | |
127 | * that is not within a message/http container MUST ... replace each | |
128 | * received obs-fold with one or more SP octets prior to interpreting | |
129 | * the field value or forwarding the message downstream." | |
130 | */ | |
131 | void | |
132 | Http::One::Parser::unfoldMime() | |
133 | { | |
134 | Http1::Tokenizer tok(mimeHeaderBlock_); | |
135 | const auto szLimit = mimeHeaderBlock_.length(); | |
136 | mimeHeaderBlock_.clear(); | |
137 | // prevent the mime sender being able to make append() realloc/grow multiple times. | |
138 | mimeHeaderBlock_.reserveSpace(szLimit); | |
139 | ||
140 | static const CharacterSet nonCRLF = (CharacterSet::CR + CharacterSet::LF).complement().rename("non-CRLF"); | |
141 | ||
142 | while (!tok.atEnd()) { | |
143 | const SBuf all(tok.remaining()); | |
144 | const auto blobLen = tok.skipAll(nonCRLF); // may not be there | |
145 | const auto crLen = tok.skipAll(CharacterSet::CR); // may not be there | |
146 | const auto lfLen = tok.skipOne(CharacterSet::LF); // may not be there | |
147 | ||
148 | if (lfLen && tok.skipAll(CharacterSet::WSP)) { // obs-fold! | |
149 | mimeHeaderBlock_.append(all.substr(0, blobLen)); | |
150 | mimeHeaderBlock_.append(' '); // replace one obs-fold with one SP | |
151 | } else | |
152 | mimeHeaderBlock_.append(all.substr(0, blobLen + crLen + lfLen)); | |
153 | } | |
154 | } | |
155 | ||
156 | bool | |
157 | Http::One::Parser::grabMimeBlock(const char *which, const size_t limit) | |
158 | { | |
159 | // MIME headers block exist in (only) HTTP/1.x and ICY | |
160 | const bool expectMime = (msgProtocol_.protocol == AnyP::PROTO_HTTP && msgProtocol_.major == 1) || | |
161 | msgProtocol_.protocol == AnyP::PROTO_ICY || | |
162 | hackExpectsMime_; | |
163 | ||
164 | if (expectMime) { | |
165 | /* NOTE: HTTP/0.9 messages do not have a mime header block. | |
166 | * So the rest of the code will need to deal with '0'-byte headers | |
167 | * (ie, none, so don't try parsing em) | |
168 | */ | |
169 | bool containsObsFold; | |
170 | if (SBuf::size_type mimeHeaderBytes = headersEnd(buf_, containsObsFold)) { | |
171 | ||
172 | // Squid could handle these headers, but admin does not want to | |
173 | if (firstLineSize() + mimeHeaderBytes >= limit) { | |
174 | debugs(33, 5, "Too large " << which); | |
175 | parseStatusCode = Http::scHeaderTooLarge; | |
176 | buf_.consume(mimeHeaderBytes); | |
177 | parsingStage_ = HTTP_PARSE_DONE; | |
178 | return false; | |
179 | } | |
180 | ||
181 | mimeHeaderBlock_ = buf_.consume(mimeHeaderBytes); | |
182 | cleanMimePrefix(); | |
183 | if (containsObsFold) | |
184 | unfoldMime(); | |
185 | ||
186 | debugs(74, 5, "mime header (0-" << mimeHeaderBytes << ") {" << mimeHeaderBlock_ << "}"); | |
187 | ||
188 | } else { // headersEnd() == 0 | |
189 | if (buf_.length()+firstLineSize() >= limit) { | |
190 | debugs(33, 5, "Too large " << which); | |
191 | parseStatusCode = Http::scHeaderTooLarge; | |
192 | parsingStage_ = HTTP_PARSE_DONE; | |
193 | } else | |
194 | debugs(33, 5, "Incomplete " << which << ", waiting for end of headers"); | |
195 | return false; | |
196 | } | |
197 | ||
198 | } else | |
199 | debugs(33, 3, "Missing HTTP/1.x identifier"); | |
200 | ||
201 | // NP: we do not do any further stages here yet so go straight to DONE | |
202 | parsingStage_ = HTTP_PARSE_DONE; | |
203 | ||
204 | return true; | |
205 | } | |
206 | ||
207 | // arbitrary maximum-length for headers which can be found by Http1Parser::getHeaderField() | |
208 | #define GET_HDR_SZ 1024 | |
209 | ||
210 | // BUG: returns only the first header line with given name, | |
211 | // ignores multi-line headers and obs-fold headers | |
212 | char * | |
213 | Http::One::Parser::getHeaderField(const char *name) | |
214 | { | |
215 | if (!headerBlockSize() || !name) | |
216 | return NULL; | |
217 | ||
218 | LOCAL_ARRAY(char, header, GET_HDR_SZ); | |
219 | const int namelen = strlen(name); | |
220 | ||
221 | debugs(25, 5, "looking for " << name); | |
222 | ||
223 | // while we can find more LF in the SBuf | |
224 | Http1::Tokenizer tok(mimeHeaderBlock_); | |
225 | SBuf p; | |
226 | ||
227 | while (tok.prefix(p, LineCharacters())) { | |
228 | if (!tok.skipOne(CharacterSet::LF)) // move tokenizer past the LF | |
229 | break; // error. reached invalid octet or end of buffer insted of an LF ?? | |
230 | ||
231 | // header lines must start with the name (case insensitive) | |
232 | if (p.substr(0, namelen).caseCmp(name, namelen)) | |
233 | continue; | |
234 | ||
235 | // then a COLON | |
236 | if (p[namelen] != ':') | |
237 | continue; | |
238 | ||
239 | // drop any trailing *CR sequence | |
240 | p.trim(Http1::CrLf(), false, true); | |
241 | ||
242 | debugs(25, 5, "checking " << p); | |
243 | p.consume(namelen + 1); | |
244 | ||
245 | // TODO: optimize SBuf::trim to take CharacterSet directly | |
246 | Http1::Tokenizer t(p); | |
247 | t.skipAll(CharacterSet::WSP); | |
248 | p = t.remaining(); | |
249 | ||
250 | // prevent buffer overrun on char header[]; | |
251 | p.chop(0, sizeof(header)-1); | |
252 | ||
253 | // return the header field-value | |
254 | SBufToCstring(header, p); | |
255 | debugs(25, 5, "returning " << header); | |
256 | return header; | |
257 | } | |
258 | ||
259 | return NULL; | |
260 | } | |
261 | ||
262 | #if USE_HTTP_VIOLATIONS | |
263 | int | |
264 | Http::One::Parser::violationLevel() const | |
265 | { | |
266 | return Config.onoff.relaxed_header_parser < 0 ? DBG_IMPORTANT : 5; | |
267 | } | |
268 | #endif | |
269 |