]> git.ipfire.org Git - thirdparty/squid.git/blame - src/http/one/Parser.cc
SourceFormat Enforcement
[thirdparty/squid.git] / src / http / one / Parser.cc
CommitLineData
48a37aee 1/*
4ac4a490 2 * Copyright (C) 1996-2017 The Squid Software Foundation and contributors
48a37aee
AJ
3 *
4 * Squid software is distributed under GPLv2+ license and includes
5 * contributions from numerous individuals and organizations.
6 * Please see the COPYING and CONTRIBUTORS files for details.
7 */
8
c99510dd
AJ
9#include "squid.h"
10#include "Debug.h"
11#include "http/one/Parser.h"
f29718b0 12#include "http/one/Tokenizer.h"
f1d5359e 13#include "mime_header.h"
b8f86fd2 14#include "SquidConfig.h"
c99510dd 15
9651320a
AJ
16/// RFC 7230 section 2.6 - 7 magic octets
17const SBuf Http::One::Parser::Http1magic("HTTP/1.");
18
00237269
AJ
19const SBuf &Http::One::CrLf()
20{
21 static const SBuf crlf("\r\n");
22 return crlf;
23}
24
c99510dd
AJ
25void
26Http::One::Parser::clear()
27{
28 parsingStage_ = HTTP_PARSE_NONE;
b749de75 29 buf_ = NULL;
c99510dd
AJ
30 msgProtocol_ = AnyP::ProtocolVersion();
31 mimeHeaderBlock_.clear();
32}
33
00237269
AJ
34/// characters HTTP permits tolerant parsers to accept as delimiters
35static const CharacterSet &
36RelaxedDelimiterCharacters()
37{
38 // RFC 7230 section 3.5
39 // tolerant parser MAY accept any of SP, HTAB, VT (%x0B), FF (%x0C),
40 // or bare CR as whitespace between request-line fields
41 static const CharacterSet RelaxedDels =
42 (CharacterSet::SP +
43 CharacterSet::HTAB +
44 CharacterSet("VT,FF","\x0B\x0C") +
45 CharacterSet::CR).rename("relaxed-WSP");
46
47 return RelaxedDels;
48}
49
50/// characters used to separate HTTP fields
51const CharacterSet &
52Http::One::Parser::DelimiterCharacters()
53{
54 return Config.onoff.relaxed_header_parser ?
55 RelaxedDelimiterCharacters() : CharacterSet::SP;
56}
57
f1d5359e 58bool
f29718b0 59Http::One::Parser::skipLineTerminator(Http1::Tokenizer &tok) const
f1d5359e 60{
00237269 61 if (tok.skip(Http1::CrLf()))
b8f86fd2
AJ
62 return true;
63
64 if (Config.onoff.relaxed_header_parser && tok.skipOne(CharacterSet::LF))
65 return true;
66
188ad27f
AJ
67 if (tok.atEnd() || (tok.remaining().length() == 1 && tok.remaining().at(0) == '\r'))
68 return false; // need more data
69
70 throw TexcHere("garbage instead of CRLF line terminator");
71 return false; // unreachable, but make naive compilers happy
b8f86fd2
AJ
72}
73
00237269
AJ
74/// all characters except the LF line terminator
75static const CharacterSet &
76LineCharacters()
77{
78 static const CharacterSet line = CharacterSet::LF.complement("non-LF");
79 return line;
80}
81
82/**
83 * Remove invalid lines (if any) from the mime prefix
84 *
85 * RFC 7230 section 3:
86 * "A recipient that receives whitespace between the start-line and
87 * the first header field MUST ... consume each whitespace-preceded
88 * line without further processing of it."
89 *
90 * We need to always use the relaxed delimiters here to prevent
91 * line smuggling through strict parsers.
92 *
93 * Note that 'whitespace' in RFC 7230 includes CR. So that means
94 * sequences of CRLF will be pruned, but not sequences of bare-LF.
95 */
96void
97Http::One::Parser::cleanMimePrefix()
98{
99 Http1::Tokenizer tok(mimeHeaderBlock_);
100 while (tok.skipOne(RelaxedDelimiterCharacters())) {
101 (void)tok.skipAll(LineCharacters()); // optional line content
102 // LF terminator is required.
103 // trust headersEnd() to ensure that we have at least one LF
104 (void)tok.skipOne(CharacterSet::LF);
105 }
106
107 // If mimeHeaderBlock_ had just whitespace line(s) followed by CRLF,
108 // then we skipped everything, including that terminating LF.
109 // Restore the terminating CRLF if needed.
110 if (tok.atEnd())
111 mimeHeaderBlock_ = Http1::CrLf();
112 else
113 mimeHeaderBlock_ = tok.remaining();
114 // now mimeHeaderBlock_ has 0+ fields followed by the LF terminator
115}
116
117/**
118 * Replace obs-fold with a single SP,
119 *
120 * RFC 7230 section 3.2.4
121 * "A server that receives an obs-fold in a request message that is not
122 * within a message/http container MUST ... replace
123 * each received obs-fold with one or more SP octets prior to
124 * interpreting the field value or forwarding the message downstream."
125 *
126 * "A proxy or gateway that receives an obs-fold in a response message
127 * that is not within a message/http container MUST ... replace each
128 * received obs-fold with one or more SP octets prior to interpreting
129 * the field value or forwarding the message downstream."
130 */
131void
132Http::One::Parser::unfoldMime()
133{
134 Http1::Tokenizer tok(mimeHeaderBlock_);
135 const auto szLimit = mimeHeaderBlock_.length();
136 mimeHeaderBlock_.clear();
137 // prevent the mime sender being able to make append() realloc/grow multiple times.
138 mimeHeaderBlock_.reserveSpace(szLimit);
139
140 static const CharacterSet nonCRLF = (CharacterSet::CR + CharacterSet::LF).complement().rename("non-CRLF");
141
142 while (!tok.atEnd()) {
143 const SBuf all(tok.remaining());
144 const auto blobLen = tok.skipAll(nonCRLF); // may not be there
145 const auto crLen = tok.skipAll(CharacterSet::CR); // may not be there
146 const auto lfLen = tok.skipOne(CharacterSet::LF); // may not be there
147
148 if (lfLen && tok.skipAll(CharacterSet::WSP)) { // obs-fold!
149 mimeHeaderBlock_.append(all.substr(0, blobLen));
150 mimeHeaderBlock_.append(' '); // replace one obs-fold with one SP
151 } else
152 mimeHeaderBlock_.append(all.substr(0, blobLen + crLen + lfLen));
153 }
154}
155
b8f86fd2 156bool
f8cab755 157Http::One::Parser::grabMimeBlock(const char *which, const size_t limit)
b8f86fd2
AJ
158{
159 // MIME headers block exist in (only) HTTP/1.x and ICY
160 const bool expectMime = (msgProtocol_.protocol == AnyP::PROTO_HTTP && msgProtocol_.major == 1) ||
e47e0802
AJ
161 msgProtocol_.protocol == AnyP::PROTO_ICY ||
162 hackExpectsMime_;
b8f86fd2
AJ
163
164 if (expectMime) {
f1d5359e
AJ
165 /* NOTE: HTTP/0.9 messages do not have a mime header block.
166 * So the rest of the code will need to deal with '0'-byte headers
167 * (ie, none, so don't try parsing em)
168 */
00237269
AJ
169 bool containsObsFold;
170 if (SBuf::size_type mimeHeaderBytes = headersEnd(buf_, containsObsFold)) {
f8cab755
AJ
171
172 // Squid could handle these headers, but admin does not want to
173 if (firstLineSize() + mimeHeaderBytes >= limit) {
174 debugs(33, 5, "Too large " << which);
175 parseStatusCode = Http::scHeaderTooLarge;
176 buf_.consume(mimeHeaderBytes);
177 parsingStage_ = HTTP_PARSE_DONE;
178 return false;
179 }
180
181 mimeHeaderBlock_ = buf_.consume(mimeHeaderBytes);
00237269
AJ
182 cleanMimePrefix();
183 if (containsObsFold)
184 unfoldMime();
185
f8cab755
AJ
186 debugs(74, 5, "mime header (0-" << mimeHeaderBytes << ") {" << mimeHeaderBlock_ << "}");
187
188 } else { // headersEnd() == 0
f1d5359e
AJ
189 if (buf_.length()+firstLineSize() >= limit) {
190 debugs(33, 5, "Too large " << which);
191 parseStatusCode = Http::scHeaderTooLarge;
192 parsingStage_ = HTTP_PARSE_DONE;
193 } else
194 debugs(33, 5, "Incomplete " << which << ", waiting for end of headers");
195 return false;
196 }
b8f86fd2 197
f1d5359e
AJ
198 } else
199 debugs(33, 3, "Missing HTTP/1.x identifier");
200
201 // NP: we do not do any further stages here yet so go straight to DONE
202 parsingStage_ = HTTP_PARSE_DONE;
203
f1d5359e
AJ
204 return true;
205}
206
c99510dd 207// arbitrary maximum-length for headers which can be found by Http1Parser::getHeaderField()
f53969cc 208#define GET_HDR_SZ 1024
c99510dd 209
687696c1
AJ
210// BUG: returns only the first header line with given name,
211// ignores multi-line headers and obs-fold headers
c99510dd
AJ
212char *
213Http::One::Parser::getHeaderField(const char *name)
214{
c99510dd
AJ
215 if (!headerBlockSize() || !name)
216 return NULL;
217
687696c1 218 LOCAL_ARRAY(char, header, GET_HDR_SZ);
1296170f 219 const int namelen = strlen(name);
687696c1 220
f6c7fa03 221 debugs(25, 5, "looking for " << name);
c99510dd 222
f6c7fa03 223 // while we can find more LF in the SBuf
f29718b0 224 Http1::Tokenizer tok(mimeHeaderBlock_);
687696c1 225 SBuf p;
c99510dd 226
00237269 227 while (tok.prefix(p, LineCharacters())) {
2d40b13f
AJ
228 if (!tok.skipOne(CharacterSet::LF)) // move tokenizer past the LF
229 break; // error. reached invalid octet or end of buffer insted of an LF ??
c99510dd 230
687696c1
AJ
231 // header lines must start with the name (case insensitive)
232 if (p.substr(0, namelen).caseCmp(name, namelen))
c99510dd
AJ
233 continue;
234
687696c1
AJ
235 // then a COLON
236 if (p[namelen] != ':')
c99510dd
AJ
237 continue;
238
687696c1 239 // drop any trailing *CR sequence
00237269 240 p.trim(Http1::CrLf(), false, true);
c99510dd 241
687696c1
AJ
242 debugs(25, 5, "checking " << p);
243 p.consume(namelen + 1);
c99510dd 244
687696c1 245 // TODO: optimize SBuf::trim to take CharacterSet directly
f29718b0 246 Http1::Tokenizer t(p);
9bafa70d 247 t.skipAll(CharacterSet::WSP);
687696c1 248 p = t.remaining();
c99510dd 249
687696c1
AJ
250 // prevent buffer overrun on char header[];
251 p.chop(0, sizeof(header)-1);
c99510dd 252
687696c1 253 // return the header field-value
3f0e38d6 254 SBufToCstring(header, p);
f6c7fa03 255 debugs(25, 5, "returning " << header);
687696c1 256 return header;
c99510dd
AJ
257 }
258
259 return NULL;
260}
f53969cc 261
9a4b5048
AJ
262#if USE_HTTP_VIOLATIONS
263int
264Http::One::Parser::violationLevel() const
265{
266 return Config.onoff.relaxed_header_parser < 0 ? DBG_IMPORTANT : 5;
267}
268#endif
2c4e5226 269