]>
Commit | Line | Data |
---|---|---|
48a37aee | 1 | /* |
4ac4a490 | 2 | * Copyright (C) 1996-2017 The Squid Software Foundation and contributors |
48a37aee AJ |
3 | * |
4 | * Squid software is distributed under GPLv2+ license and includes | |
5 | * contributions from numerous individuals and organizations. | |
6 | * Please see the COPYING and CONTRIBUTORS files for details. | |
7 | */ | |
8 | ||
c99510dd AJ |
9 | #include "squid.h" |
10 | #include "Debug.h" | |
11 | #include "http/one/Parser.h" | |
f29718b0 | 12 | #include "http/one/Tokenizer.h" |
f1d5359e | 13 | #include "mime_header.h" |
b8f86fd2 | 14 | #include "SquidConfig.h" |
c99510dd | 15 | |
9651320a AJ |
16 | /// RFC 7230 section 2.6 - 7 magic octets |
17 | const SBuf Http::One::Parser::Http1magic("HTTP/1."); | |
18 | ||
00237269 AJ |
19 | const SBuf &Http::One::CrLf() |
20 | { | |
21 | static const SBuf crlf("\r\n"); | |
22 | return crlf; | |
23 | } | |
24 | ||
c99510dd AJ |
25 | void |
26 | Http::One::Parser::clear() | |
27 | { | |
28 | parsingStage_ = HTTP_PARSE_NONE; | |
b749de75 | 29 | buf_ = NULL; |
c99510dd AJ |
30 | msgProtocol_ = AnyP::ProtocolVersion(); |
31 | mimeHeaderBlock_.clear(); | |
32 | } | |
33 | ||
00237269 AJ |
34 | /// characters HTTP permits tolerant parsers to accept as delimiters |
35 | static const CharacterSet & | |
36 | RelaxedDelimiterCharacters() | |
37 | { | |
38 | // RFC 7230 section 3.5 | |
39 | // tolerant parser MAY accept any of SP, HTAB, VT (%x0B), FF (%x0C), | |
40 | // or bare CR as whitespace between request-line fields | |
41 | static const CharacterSet RelaxedDels = | |
42 | (CharacterSet::SP + | |
43 | CharacterSet::HTAB + | |
44 | CharacterSet("VT,FF","\x0B\x0C") + | |
45 | CharacterSet::CR).rename("relaxed-WSP"); | |
46 | ||
47 | return RelaxedDels; | |
48 | } | |
49 | ||
50 | /// characters used to separate HTTP fields | |
51 | const CharacterSet & | |
52 | Http::One::Parser::DelimiterCharacters() | |
53 | { | |
54 | return Config.onoff.relaxed_header_parser ? | |
55 | RelaxedDelimiterCharacters() : CharacterSet::SP; | |
56 | } | |
57 | ||
f1d5359e | 58 | bool |
f29718b0 | 59 | Http::One::Parser::skipLineTerminator(Http1::Tokenizer &tok) const |
f1d5359e | 60 | { |
00237269 | 61 | if (tok.skip(Http1::CrLf())) |
b8f86fd2 AJ |
62 | return true; |
63 | ||
64 | if (Config.onoff.relaxed_header_parser && tok.skipOne(CharacterSet::LF)) | |
65 | return true; | |
66 | ||
188ad27f AJ |
67 | if (tok.atEnd() || (tok.remaining().length() == 1 && tok.remaining().at(0) == '\r')) |
68 | return false; // need more data | |
69 | ||
70 | throw TexcHere("garbage instead of CRLF line terminator"); | |
71 | return false; // unreachable, but make naive compilers happy | |
b8f86fd2 AJ |
72 | } |
73 | ||
00237269 AJ |
74 | /// all characters except the LF line terminator |
75 | static const CharacterSet & | |
76 | LineCharacters() | |
77 | { | |
78 | static const CharacterSet line = CharacterSet::LF.complement("non-LF"); | |
79 | return line; | |
80 | } | |
81 | ||
82 | /** | |
83 | * Remove invalid lines (if any) from the mime prefix | |
84 | * | |
85 | * RFC 7230 section 3: | |
86 | * "A recipient that receives whitespace between the start-line and | |
87 | * the first header field MUST ... consume each whitespace-preceded | |
88 | * line without further processing of it." | |
89 | * | |
90 | * We need to always use the relaxed delimiters here to prevent | |
91 | * line smuggling through strict parsers. | |
92 | * | |
93 | * Note that 'whitespace' in RFC 7230 includes CR. So that means | |
94 | * sequences of CRLF will be pruned, but not sequences of bare-LF. | |
95 | */ | |
96 | void | |
97 | Http::One::Parser::cleanMimePrefix() | |
98 | { | |
99 | Http1::Tokenizer tok(mimeHeaderBlock_); | |
100 | while (tok.skipOne(RelaxedDelimiterCharacters())) { | |
101 | (void)tok.skipAll(LineCharacters()); // optional line content | |
102 | // LF terminator is required. | |
103 | // trust headersEnd() to ensure that we have at least one LF | |
104 | (void)tok.skipOne(CharacterSet::LF); | |
105 | } | |
106 | ||
107 | // If mimeHeaderBlock_ had just whitespace line(s) followed by CRLF, | |
108 | // then we skipped everything, including that terminating LF. | |
109 | // Restore the terminating CRLF if needed. | |
110 | if (tok.atEnd()) | |
111 | mimeHeaderBlock_ = Http1::CrLf(); | |
112 | else | |
113 | mimeHeaderBlock_ = tok.remaining(); | |
114 | // now mimeHeaderBlock_ has 0+ fields followed by the LF terminator | |
115 | } | |
116 | ||
117 | /** | |
118 | * Replace obs-fold with a single SP, | |
119 | * | |
120 | * RFC 7230 section 3.2.4 | |
121 | * "A server that receives an obs-fold in a request message that is not | |
122 | * within a message/http container MUST ... replace | |
123 | * each received obs-fold with one or more SP octets prior to | |
124 | * interpreting the field value or forwarding the message downstream." | |
125 | * | |
126 | * "A proxy or gateway that receives an obs-fold in a response message | |
127 | * that is not within a message/http container MUST ... replace each | |
128 | * received obs-fold with one or more SP octets prior to interpreting | |
129 | * the field value or forwarding the message downstream." | |
130 | */ | |
131 | void | |
132 | Http::One::Parser::unfoldMime() | |
133 | { | |
134 | Http1::Tokenizer tok(mimeHeaderBlock_); | |
135 | const auto szLimit = mimeHeaderBlock_.length(); | |
136 | mimeHeaderBlock_.clear(); | |
137 | // prevent the mime sender being able to make append() realloc/grow multiple times. | |
138 | mimeHeaderBlock_.reserveSpace(szLimit); | |
139 | ||
140 | static const CharacterSet nonCRLF = (CharacterSet::CR + CharacterSet::LF).complement().rename("non-CRLF"); | |
141 | ||
142 | while (!tok.atEnd()) { | |
143 | const SBuf all(tok.remaining()); | |
144 | const auto blobLen = tok.skipAll(nonCRLF); // may not be there | |
145 | const auto crLen = tok.skipAll(CharacterSet::CR); // may not be there | |
146 | const auto lfLen = tok.skipOne(CharacterSet::LF); // may not be there | |
147 | ||
148 | if (lfLen && tok.skipAll(CharacterSet::WSP)) { // obs-fold! | |
149 | mimeHeaderBlock_.append(all.substr(0, blobLen)); | |
150 | mimeHeaderBlock_.append(' '); // replace one obs-fold with one SP | |
151 | } else | |
152 | mimeHeaderBlock_.append(all.substr(0, blobLen + crLen + lfLen)); | |
153 | } | |
154 | } | |
155 | ||
b8f86fd2 | 156 | bool |
f8cab755 | 157 | Http::One::Parser::grabMimeBlock(const char *which, const size_t limit) |
b8f86fd2 AJ |
158 | { |
159 | // MIME headers block exist in (only) HTTP/1.x and ICY | |
160 | const bool expectMime = (msgProtocol_.protocol == AnyP::PROTO_HTTP && msgProtocol_.major == 1) || | |
e47e0802 AJ |
161 | msgProtocol_.protocol == AnyP::PROTO_ICY || |
162 | hackExpectsMime_; | |
b8f86fd2 AJ |
163 | |
164 | if (expectMime) { | |
f1d5359e AJ |
165 | /* NOTE: HTTP/0.9 messages do not have a mime header block. |
166 | * So the rest of the code will need to deal with '0'-byte headers | |
167 | * (ie, none, so don't try parsing em) | |
168 | */ | |
00237269 AJ |
169 | bool containsObsFold; |
170 | if (SBuf::size_type mimeHeaderBytes = headersEnd(buf_, containsObsFold)) { | |
f8cab755 AJ |
171 | |
172 | // Squid could handle these headers, but admin does not want to | |
173 | if (firstLineSize() + mimeHeaderBytes >= limit) { | |
174 | debugs(33, 5, "Too large " << which); | |
175 | parseStatusCode = Http::scHeaderTooLarge; | |
176 | buf_.consume(mimeHeaderBytes); | |
177 | parsingStage_ = HTTP_PARSE_DONE; | |
178 | return false; | |
179 | } | |
180 | ||
181 | mimeHeaderBlock_ = buf_.consume(mimeHeaderBytes); | |
00237269 AJ |
182 | cleanMimePrefix(); |
183 | if (containsObsFold) | |
184 | unfoldMime(); | |
185 | ||
f8cab755 AJ |
186 | debugs(74, 5, "mime header (0-" << mimeHeaderBytes << ") {" << mimeHeaderBlock_ << "}"); |
187 | ||
188 | } else { // headersEnd() == 0 | |
f1d5359e AJ |
189 | if (buf_.length()+firstLineSize() >= limit) { |
190 | debugs(33, 5, "Too large " << which); | |
191 | parseStatusCode = Http::scHeaderTooLarge; | |
192 | parsingStage_ = HTTP_PARSE_DONE; | |
193 | } else | |
194 | debugs(33, 5, "Incomplete " << which << ", waiting for end of headers"); | |
195 | return false; | |
196 | } | |
b8f86fd2 | 197 | |
f1d5359e AJ |
198 | } else |
199 | debugs(33, 3, "Missing HTTP/1.x identifier"); | |
200 | ||
201 | // NP: we do not do any further stages here yet so go straight to DONE | |
202 | parsingStage_ = HTTP_PARSE_DONE; | |
203 | ||
f1d5359e AJ |
204 | return true; |
205 | } | |
206 | ||
c99510dd | 207 | // arbitrary maximum-length for headers which can be found by Http1Parser::getHeaderField() |
f53969cc | 208 | #define GET_HDR_SZ 1024 |
c99510dd | 209 | |
687696c1 AJ |
210 | // BUG: returns only the first header line with given name, |
211 | // ignores multi-line headers and obs-fold headers | |
c99510dd AJ |
212 | char * |
213 | Http::One::Parser::getHeaderField(const char *name) | |
214 | { | |
c99510dd AJ |
215 | if (!headerBlockSize() || !name) |
216 | return NULL; | |
217 | ||
687696c1 | 218 | LOCAL_ARRAY(char, header, GET_HDR_SZ); |
1296170f | 219 | const int namelen = strlen(name); |
687696c1 | 220 | |
f6c7fa03 | 221 | debugs(25, 5, "looking for " << name); |
c99510dd | 222 | |
f6c7fa03 | 223 | // while we can find more LF in the SBuf |
f29718b0 | 224 | Http1::Tokenizer tok(mimeHeaderBlock_); |
687696c1 | 225 | SBuf p; |
c99510dd | 226 | |
00237269 | 227 | while (tok.prefix(p, LineCharacters())) { |
2d40b13f AJ |
228 | if (!tok.skipOne(CharacterSet::LF)) // move tokenizer past the LF |
229 | break; // error. reached invalid octet or end of buffer insted of an LF ?? | |
c99510dd | 230 | |
687696c1 AJ |
231 | // header lines must start with the name (case insensitive) |
232 | if (p.substr(0, namelen).caseCmp(name, namelen)) | |
c99510dd AJ |
233 | continue; |
234 | ||
687696c1 AJ |
235 | // then a COLON |
236 | if (p[namelen] != ':') | |
c99510dd AJ |
237 | continue; |
238 | ||
687696c1 | 239 | // drop any trailing *CR sequence |
00237269 | 240 | p.trim(Http1::CrLf(), false, true); |
c99510dd | 241 | |
687696c1 AJ |
242 | debugs(25, 5, "checking " << p); |
243 | p.consume(namelen + 1); | |
c99510dd | 244 | |
687696c1 | 245 | // TODO: optimize SBuf::trim to take CharacterSet directly |
f29718b0 | 246 | Http1::Tokenizer t(p); |
9bafa70d | 247 | t.skipAll(CharacterSet::WSP); |
687696c1 | 248 | p = t.remaining(); |
c99510dd | 249 | |
687696c1 AJ |
250 | // prevent buffer overrun on char header[]; |
251 | p.chop(0, sizeof(header)-1); | |
c99510dd | 252 | |
687696c1 | 253 | // return the header field-value |
3f0e38d6 | 254 | SBufToCstring(header, p); |
f6c7fa03 | 255 | debugs(25, 5, "returning " << header); |
687696c1 | 256 | return header; |
c99510dd AJ |
257 | } |
258 | ||
259 | return NULL; | |
260 | } | |
f53969cc | 261 | |
9a4b5048 AJ |
262 | #if USE_HTTP_VIOLATIONS |
263 | int | |
264 | Http::One::Parser::violationLevel() const | |
265 | { | |
266 | return Config.onoff.relaxed_header_parser < 0 ? DBG_IMPORTANT : 5; | |
267 | } | |
268 | #endif | |
2c4e5226 | 269 |