From: Amos Jeffries Date: Fri, 20 May 2016 08:28:33 +0000 (+1200) Subject: HTTP/1.1: unfold mime header blocks X-Git-Tag: SQUID_4_0_11~22 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=00237269110b6d9661457a7f8baa4c6450ded737;p=thirdparty%2Fsquid.git HTTP/1.1: unfold mime header blocks Enact the RFC 7230 section 3 requirement that proxies remove obs-fold from received mime header blocks and drop all lines that start with whitespace immediately following the request-line. Also; * Shuffle the DelimiterCharacters() and RelaxedDelimiters() helper methods to Http::One::Parser for use in processing mime. * Extend the headersEnd() function with a wrapper to avoid SBuf::c_str() and to efficiently report whether obs-fold exists without needing to re-scan the block. * Document the mime_header.h API function(s). * Add Http::One::CrLf() to present a SBuf CRLF constant. This can be used instead of the many local string definitions we have now. The implementation could perhapse be done better for performance, but not done here. --- diff --git a/src/http/one/Parser.cc b/src/http/one/Parser.cc index e762957af0..1eb1ea46b8 100644 --- a/src/http/one/Parser.cc +++ b/src/http/one/Parser.cc @@ -16,6 +16,12 @@ /// RFC 7230 section 2.6 - 7 magic octets const SBuf Http::One::Parser::Http1magic("HTTP/1."); +const SBuf &Http::One::CrLf() +{ + static const SBuf crlf("\r\n"); + return crlf; +} + void Http::One::Parser::clear() { @@ -25,11 +31,34 @@ Http::One::Parser::clear() mimeHeaderBlock_.clear(); } +/// characters HTTP permits tolerant parsers to accept as delimiters +static const CharacterSet & +RelaxedDelimiterCharacters() +{ + // RFC 7230 section 3.5 + // tolerant parser MAY accept any of SP, HTAB, VT (%x0B), FF (%x0C), + // or bare CR as whitespace between request-line fields + static const CharacterSet RelaxedDels = + (CharacterSet::SP + + CharacterSet::HTAB + + CharacterSet("VT,FF","\x0B\x0C") + + CharacterSet::CR).rename("relaxed-WSP"); + + return RelaxedDels; +} + +/// characters used to separate HTTP fields +const CharacterSet & +Http::One::Parser::DelimiterCharacters() +{ + return Config.onoff.relaxed_header_parser ? + RelaxedDelimiterCharacters() : CharacterSet::SP; +} + bool Http::One::Parser::skipLineTerminator(Http1::Tokenizer &tok) const { - static const SBuf crlf("\r\n"); - if (tok.skip(crlf)) + if (tok.skip(Http1::CrLf())) return true; if (Config.onoff.relaxed_header_parser && tok.skipOne(CharacterSet::LF)) @@ -38,6 +67,88 @@ Http::One::Parser::skipLineTerminator(Http1::Tokenizer &tok) const return false; } +/// all characters except the LF line terminator +static const CharacterSet & +LineCharacters() +{ + static const CharacterSet line = CharacterSet::LF.complement("non-LF"); + return line; +} + +/** + * Remove invalid lines (if any) from the mime prefix + * + * RFC 7230 section 3: + * "A recipient that receives whitespace between the start-line and + * the first header field MUST ... consume each whitespace-preceded + * line without further processing of it." + * + * We need to always use the relaxed delimiters here to prevent + * line smuggling through strict parsers. + * + * Note that 'whitespace' in RFC 7230 includes CR. So that means + * sequences of CRLF will be pruned, but not sequences of bare-LF. + */ +void +Http::One::Parser::cleanMimePrefix() +{ + Http1::Tokenizer tok(mimeHeaderBlock_); + while (tok.skipOne(RelaxedDelimiterCharacters())) { + (void)tok.skipAll(LineCharacters()); // optional line content + // LF terminator is required. + // trust headersEnd() to ensure that we have at least one LF + (void)tok.skipOne(CharacterSet::LF); + } + + // If mimeHeaderBlock_ had just whitespace line(s) followed by CRLF, + // then we skipped everything, including that terminating LF. + // Restore the terminating CRLF if needed. + if (tok.atEnd()) + mimeHeaderBlock_ = Http1::CrLf(); + else + mimeHeaderBlock_ = tok.remaining(); + // now mimeHeaderBlock_ has 0+ fields followed by the LF terminator +} + +/** + * Replace obs-fold with a single SP, + * + * RFC 7230 section 3.2.4 + * "A server that receives an obs-fold in a request message that is not + * within a message/http container MUST ... replace + * each received obs-fold with one or more SP octets prior to + * interpreting the field value or forwarding the message downstream." + * + * "A proxy or gateway that receives an obs-fold in a response message + * that is not within a message/http container MUST ... replace each + * received obs-fold with one or more SP octets prior to interpreting + * the field value or forwarding the message downstream." + */ +void +Http::One::Parser::unfoldMime() +{ + Http1::Tokenizer tok(mimeHeaderBlock_); + const auto szLimit = mimeHeaderBlock_.length(); + mimeHeaderBlock_.clear(); + // prevent the mime sender being able to make append() realloc/grow multiple times. + mimeHeaderBlock_.reserveSpace(szLimit); + + static const CharacterSet nonCRLF = (CharacterSet::CR + CharacterSet::LF).complement().rename("non-CRLF"); + + while (!tok.atEnd()) { + const SBuf all(tok.remaining()); + const auto blobLen = tok.skipAll(nonCRLF); // may not be there + const auto crLen = tok.skipAll(CharacterSet::CR); // may not be there + const auto lfLen = tok.skipOne(CharacterSet::LF); // may not be there + + if (lfLen && tok.skipAll(CharacterSet::WSP)) { // obs-fold! + mimeHeaderBlock_.append(all.substr(0, blobLen)); + mimeHeaderBlock_.append(' '); // replace one obs-fold with one SP + } else + mimeHeaderBlock_.append(all.substr(0, blobLen + crLen + lfLen)); + } +} + bool Http::One::Parser::grabMimeBlock(const char *which, const size_t limit) { @@ -51,8 +162,8 @@ Http::One::Parser::grabMimeBlock(const char *which, const size_t limit) * So the rest of the code will need to deal with '0'-byte headers * (ie, none, so don't try parsing em) */ - // XXX: c_str() reallocates. performance regression. - if (SBuf::size_type mimeHeaderBytes = headersEnd(buf_.c_str(), buf_.length())) { + bool containsObsFold; + if (SBuf::size_type mimeHeaderBytes = headersEnd(buf_, containsObsFold)) { // Squid could handle these headers, but admin does not want to if (firstLineSize() + mimeHeaderBytes >= limit) { @@ -64,6 +175,10 @@ Http::One::Parser::grabMimeBlock(const char *which, const size_t limit) } mimeHeaderBlock_ = buf_.consume(mimeHeaderBytes); + cleanMimePrefix(); + if (containsObsFold) + unfoldMime(); + debugs(74, 5, "mime header (0-" << mimeHeaderBytes << ") {" << mimeHeaderBlock_ << "}"); } else { // headersEnd() == 0 @@ -102,12 +217,10 @@ Http::One::Parser::getHeaderField(const char *name) debugs(25, 5, "looking for " << name); // while we can find more LF in the SBuf - static CharacterSet iso8859Line = CharacterSet("non-LF",'\0','\n'-1) + CharacterSet(NULL, '\n'+1, (unsigned char)0xFF); Http1::Tokenizer tok(mimeHeaderBlock_); SBuf p; - static const SBuf crlf("\r\n"); - while (tok.prefix(p, iso8859Line)) { + while (tok.prefix(p, LineCharacters())) { if (!tok.skipOne(CharacterSet::LF)) // move tokenizer past the LF break; // error. reached invalid octet or end of buffer insted of an LF ?? @@ -120,7 +233,7 @@ Http::One::Parser::getHeaderField(const char *name) continue; // drop any trailing *CR sequence - p.trim(crlf, false, true); + p.trim(Http1::CrLf(), false, true); debugs(25, 5, "checking " << p); p.consume(namelen + 1); diff --git a/src/http/one/Parser.h b/src/http/one/Parser.h index dc57583e6e..d24c8c503f 100644 --- a/src/http/one/Parser.h +++ b/src/http/one/Parser.h @@ -111,6 +111,10 @@ protected: /// consume from the tokenizer and return true only if found bool skipLineTerminator(Http1::Tokenizer &tok) const; + /// the characters which are to be considered valid whitespace + /// (WSP / BSP / OWS) + static const CharacterSet &DelimiterCharacters(); + /** * Scan to find the mime headers block for current message. * @@ -139,6 +143,10 @@ protected: /// Whether the invalid HTTP as HTTP/0.9 hack expects a mime header block bool hackExpectsMime_; + +private: + void cleanMimePrefix(); + void unfoldMime(); }; } // namespace One diff --git a/src/http/one/RequestParser.cc b/src/http/one/RequestParser.cc index ed04e85ed2..a1c18942e2 100644 --- a/src/http/one/RequestParser.cc +++ b/src/http/one/RequestParser.cc @@ -114,30 +114,6 @@ UriValidCharacters() return UriChars; } -/// characters HTTP permits tolerant parsers to accept as delimiters -static const CharacterSet & -RelaxedDelimiterCharacters() -{ - // RFC 7230 section 3.5 - // tolerant parser MAY accept any of SP, HTAB, VT (%x0B), FF (%x0C), - // or bare CR as whitespace between request-line fields - static const CharacterSet RelaxedDels = - CharacterSet::SP + - CharacterSet::HTAB + - CharacterSet("VT,FF","\x0B\x0C") + - CharacterSet::CR; - - return RelaxedDels; -} - -/// characters used to separate HTTP fields -const CharacterSet & -Http::One::RequestParser::DelimiterCharacters() -{ - return Config.onoff.relaxed_header_parser ? - RelaxedDelimiterCharacters() : CharacterSet::SP; -} - /// characters which Squid will accept in the HTTP request-target (URI) const CharacterSet & Http::One::RequestParser::RequestTargetCharacters() diff --git a/src/http/one/RequestParser.h b/src/http/one/RequestParser.h index d339e94b3c..107b8cf4f7 100644 --- a/src/http/one/RequestParser.h +++ b/src/http/one/RequestParser.h @@ -56,7 +56,6 @@ private: bool skipTrailingCrs(Http1::Tokenizer &tok); bool http0() const {return !msgProtocol_.major;} - static const CharacterSet &DelimiterCharacters(); static const CharacterSet &RequestTargetCharacters(); /// what request method has been found on the first line diff --git a/src/http/one/forward.h b/src/http/one/forward.h index 93f39634c6..791dc100e1 100644 --- a/src/http/one/forward.h +++ b/src/http/one/forward.h @@ -10,6 +10,7 @@ #define SQUID_SRC_HTTP_ONE_FORWARD_H #include "base/RefCount.h" +#include "sbuf/forward.h" namespace Http { namespace One { @@ -27,6 +28,9 @@ typedef RefCount RequestParserPointer; class ResponseParser; typedef RefCount ResponseParserPointer; +/// CRLF textual representation +const SBuf &CrLf(); + } // namespace One } // namespace Http diff --git a/src/mime_header.cc b/src/mime_header.cc index 746cd120eb..f8f5354e97 100644 --- a/src/mime_header.cc +++ b/src/mime_header.cc @@ -13,10 +13,11 @@ #include "profiler/Profiler.h" size_t -headersEnd(const char *mime, size_t l) +headersEnd(const char *mime, size_t l, bool &containsObsFold) { size_t e = 0; int state = 1; + containsObsFold = false; PROF_start(headersEnd); @@ -35,7 +36,10 @@ headersEnd(const char *mime, size_t l) state = 2; else if ('\n' == mime[e]) state = 3; - else + else if (' ' == mime[e] || '\t' == mime[e]) { + containsObsFold = true; + state = 0; + } else state = 0; break; diff --git a/src/mime_header.h b/src/mime_header.h index b57cc0d225..ca176f329c 100644 --- a/src/mime_header.h +++ b/src/mime_header.h @@ -11,7 +11,35 @@ #ifndef SQUID_MIME_HEADER_H_ #define SQUID_MIME_HEADER_H_ -size_t headersEnd(const char *, size_t); +/** + * Scan for the end of mime header block. + * + * Which is one of the following octet patterns: + * - CRLF CRLF, or + * - CRLF LF, or + * - LF CRLF, or + * - LF LF + * + * Also detects whether a obf-fold pattern exists within the mime block + * - CR*LF (SP / HTAB) + * + * \param containsObsFold will be set to true if obs-fold pattern is found. + */ +size_t headersEnd(const char *, size_t, bool &containsObsFold); + +inline size_t +headersEnd(const SBuf &buf, bool &containsObsFold) +{ + return headersEnd(buf.rawContent(), buf.length(), containsObsFold); +} + +/// \deprecated caller needs to be fixed to handle obs-fold +inline size_t +headersEnd(const char *buf, size_t sz) +{ + bool ignored; + return headersEnd(buf, sz, ignored); +} #endif /* SQUID_MIME_HEADER_H_ */ diff --git a/src/tests/stub_mime.cc b/src/tests/stub_mime.cc index 42a9a5f53e..a284b5eb3a 100644 --- a/src/tests/stub_mime.cc +++ b/src/tests/stub_mime.cc @@ -11,5 +11,5 @@ #define STUB_API "mime.cc" #include "tests/STUB.h" -size_t headersEnd(const char *mime, size_t l) STUB_RETVAL(0) +size_t headersEnd(const char *, size_t, bool &) STUB_RETVAL(0)