src/http/one/Parser.cc

   1 /*
   2  * Copyright (C) 1996-2017 The Squid Software Foundation and contributors
   3  *
   4  * Squid software is distributed under GPLv2+ license and includes
   5  * contributions from numerous individuals and organizations.
   6  * Please see the COPYING and CONTRIBUTORS files for details.
   7  */
   8
   9 #include "squid.h"
  10 #include "Debug.h"
  11 #include "http/one/Parser.h"
  12 #include "http/one/Tokenizer.h"
  13 #include "mime_header.h"
  14 #include "SquidConfig.h"
  15
  16 /// RFC 7230 section 2.6 - 7 magic octets
  17 const SBuf Http::One::Parser::Http1magic("HTTP/1.");
  18
  19 const SBuf &Http::One::CrLf()
  20 {
  21     static const SBuf crlf("\r\n");
  22     return crlf;
  23 }
  24
  25 void
  26 Http::One::Parser::clear()
  27 {
  28     parsingStage_ = HTTP_PARSE_NONE;
  29     buf_ = NULL;
  30     msgProtocol_ = AnyP::ProtocolVersion();
  31     mimeHeaderBlock_.clear();
  32 }
  33
  34 /// characters HTTP permits tolerant parsers to accept as delimiters
  35 static const CharacterSet &
  36 RelaxedDelimiterCharacters()
  37 {
  38     // RFC 7230 section 3.5
  39     // tolerant parser MAY accept any of SP, HTAB, VT (%x0B), FF (%x0C),
  40     // or bare CR as whitespace between request-line fields
  41     static const CharacterSet RelaxedDels =
  42         (CharacterSet::SP +
  43          CharacterSet::HTAB +
  44          CharacterSet("VT,FF","\x0B\x0C") +
  45          CharacterSet::CR).rename("relaxed-WSP");
  46
  47     return RelaxedDels;
  48 }
  49
  50 const CharacterSet &
  51 Http::One::Parser::WhitespaceCharacters()
  52 {
  53     return Config.onoff.relaxed_header_parser ?
  54            RelaxedDelimiterCharacters() : CharacterSet::WSP;
  55 }
  56
  57 const CharacterSet &
  58 Http::One::Parser::DelimiterCharacters()
  59 {
  60     return Config.onoff.relaxed_header_parser ?
  61            RelaxedDelimiterCharacters() : CharacterSet::SP;
  62 }
  63
  64 bool
  65 Http::One::Parser::skipLineTerminator(Http1::Tokenizer &tok) const
  66 {
  67     if (tok.skip(Http1::CrLf()))
  68         return true;
  69
  70     if (Config.onoff.relaxed_header_parser && tok.skipOne(CharacterSet::LF))
  71         return true;
  72
  73     if (tok.atEnd() || (tok.remaining().length() == 1 && tok.remaining().at(0) == '\r'))
  74         return false; // need more data
  75
  76     throw TexcHere("garbage instead of CRLF line terminator");
  77     return false; // unreachable, but make naive compilers happy
  78 }
  79
  80 /// all characters except the LF line terminator
  81 static const CharacterSet &
  82 LineCharacters()
  83 {
  84     static const CharacterSet line = CharacterSet::LF.complement("non-LF");
  85     return line;
  86 }
  87
  88 /**
  89  * Remove invalid lines (if any) from the mime prefix
  90  *
  91  * RFC 7230 section 3:
  92  * "A recipient that receives whitespace between the start-line and
  93  * the first header field MUST ... consume each whitespace-preceded
  94  * line without further processing of it."
  95  *
  96  * We need to always use the relaxed delimiters here to prevent
  97  * line smuggling through strict parsers.
  98  *
  99  * Note that 'whitespace' in RFC 7230 includes CR. So that means
 100  * sequences of CRLF will be pruned, but not sequences of bare-LF.
 101  */
 102 void
 103 Http::One::Parser::cleanMimePrefix()
 104 {
 105     Http1::Tokenizer tok(mimeHeaderBlock_);
 106     while (tok.skipOne(RelaxedDelimiterCharacters())) {
 107         (void)tok.skipAll(LineCharacters()); // optional line content
 108         // LF terminator is required.
 109         // trust headersEnd() to ensure that we have at least one LF
 110         (void)tok.skipOne(CharacterSet::LF);
 111     }
 112
 113     // If mimeHeaderBlock_ had just whitespace line(s) followed by CRLF,
 114     // then we skipped everything, including that terminating LF.
 115     // Restore the terminating CRLF if needed.
 116     if (tok.atEnd())
 117         mimeHeaderBlock_ = Http1::CrLf();
 118     else
 119         mimeHeaderBlock_ = tok.remaining();
 120     // now mimeHeaderBlock_ has 0+ fields followed by the LF terminator
 121 }
 122
 123 /**
 124  * Replace obs-fold with a single SP,
 125  *
 126  * RFC 7230 section 3.2.4
 127  * "A server that receives an obs-fold in a request message that is not
 128  *  within a message/http container MUST ... replace
 129  *  each received obs-fold with one or more SP octets prior to
 130  *  interpreting the field value or forwarding the message downstream."
 131  *
 132  * "A proxy or gateway that receives an obs-fold in a response message
 133  *  that is not within a message/http container MUST ... replace each
 134  *  received obs-fold with one or more SP octets prior to interpreting
 135  *  the field value or forwarding the message downstream."
 136  */
 137 void
 138 Http::One::Parser::unfoldMime()
 139 {
 140     Http1::Tokenizer tok(mimeHeaderBlock_);
 141     const auto szLimit = mimeHeaderBlock_.length();
 142     mimeHeaderBlock_.clear();
 143     // prevent the mime sender being able to make append() realloc/grow multiple times.
 144     mimeHeaderBlock_.reserveSpace(szLimit);
 145
 146     static const CharacterSet nonCRLF = (CharacterSet::CR + CharacterSet::LF).complement().rename("non-CRLF");
 147
 148     while (!tok.atEnd()) {
 149         const SBuf all(tok.remaining());
 150         const auto blobLen = tok.skipAll(nonCRLF); // may not be there
 151         const auto crLen = tok.skipAll(CharacterSet::CR); // may not be there
 152         const auto lfLen = tok.skipOne(CharacterSet::LF); // may not be there
 153
 154         if (lfLen && tok.skipAll(CharacterSet::WSP)) { // obs-fold!
 155             mimeHeaderBlock_.append(all.substr(0, blobLen));
 156             mimeHeaderBlock_.append(' '); // replace one obs-fold with one SP
 157         } else
 158             mimeHeaderBlock_.append(all.substr(0, blobLen + crLen + lfLen));
 159     }
 160 }
 161
 162 bool
 163 Http::One::Parser::grabMimeBlock(const char *which, const size_t limit)
 164 {
 165     // MIME headers block exist in (only) HTTP/1.x and ICY
 166     const bool expectMime = (msgProtocol_.protocol == AnyP::PROTO_HTTP && msgProtocol_.major == 1) ||
 167                             msgProtocol_.protocol == AnyP::PROTO_ICY ||
 168                             hackExpectsMime_;
 169
 170     if (expectMime) {
 171         /* NOTE: HTTP/0.9 messages do not have a mime header block.
 172          *       So the rest of the code will need to deal with '0'-byte headers
 173          *       (ie, none, so don't try parsing em)
 174          */
 175         bool containsObsFold;
 176         if (SBuf::size_type mimeHeaderBytes = headersEnd(buf_, containsObsFold)) {
 177
 178             // Squid could handle these headers, but admin does not want to
 179             if (firstLineSize() + mimeHeaderBytes >= limit) {
 180                 debugs(33, 5, "Too large " << which);
 181                 parseStatusCode = Http::scHeaderTooLarge;
 182                 buf_.consume(mimeHeaderBytes);
 183                 parsingStage_ = HTTP_PARSE_DONE;
 184                 return false;
 185             }
 186
 187             mimeHeaderBlock_ = buf_.consume(mimeHeaderBytes);
 188             cleanMimePrefix();
 189             if (containsObsFold)
 190                 unfoldMime();
 191
 192             debugs(74, 5, "mime header (0-" << mimeHeaderBytes << ") {" << mimeHeaderBlock_ << "}");
 193
 194         } else { // headersEnd() == 0
 195             if (buf_.length()+firstLineSize() >= limit) {
 196                 debugs(33, 5, "Too large " << which);
 197                 parseStatusCode = Http::scHeaderTooLarge;
 198                 parsingStage_ = HTTP_PARSE_DONE;
 199             } else
 200                 debugs(33, 5, "Incomplete " << which << ", waiting for end of headers");
 201             return false;
 202         }
 203
 204     } else
 205         debugs(33, 3, "Missing HTTP/1.x identifier");
 206
 207     // NP: we do not do any further stages here yet so go straight to DONE
 208     parsingStage_ = HTTP_PARSE_DONE;
 209
 210     return true;
 211 }
 212
 213 // arbitrary maximum-length for headers which can be found by Http1Parser::getHeaderField()
 214 #define GET_HDR_SZ  1024
 215
 216 // BUG: returns only the first header line with given name,
 217 //      ignores multi-line headers and obs-fold headers
 218 char *
 219 Http::One::Parser::getHeaderField(const char *name)
 220 {
 221     if (!headerBlockSize() || !name)
 222         return NULL;
 223
 224     LOCAL_ARRAY(char, header, GET_HDR_SZ);
 225     const int namelen = strlen(name);
 226
 227     debugs(25, 5, "looking for " << name);
 228
 229     // while we can find more LF in the SBuf
 230     Http1::Tokenizer tok(mimeHeaderBlock_);
 231     SBuf p;
 232
 233     while (tok.prefix(p, LineCharacters())) {
 234         if (!tok.skipOne(CharacterSet::LF)) // move tokenizer past the LF
 235             break; // error. reached invalid octet or end of buffer insted of an LF ??
 236
 237         // header lines must start with the name (case insensitive)
 238         if (p.substr(0, namelen).caseCmp(name, namelen))
 239             continue;
 240
 241         // then a COLON
 242         if (p[namelen] != ':')
 243             continue;
 244
 245         // drop any trailing *CR sequence
 246         p.trim(Http1::CrLf(), false, true);
 247
 248         debugs(25, 5, "checking " << p);
 249         p.consume(namelen + 1);
 250
 251         // TODO: optimize SBuf::trim to take CharacterSet directly
 252         Http1::Tokenizer t(p);
 253         t.skipAll(CharacterSet::WSP);
 254         p = t.remaining();
 255
 256         // prevent buffer overrun on char header[];
 257         p.chop(0, sizeof(header)-1);
 258
 259         // return the header field-value
 260         SBufToCstring(header, p);
 261         debugs(25, 5, "returning " << header);
 262         return header;
 263     }
 264
 265     return NULL;
 266 }
 267
 268 int
 269 Http::One::ErrorLevel()
 270 {
 271     return Config.onoff.relaxed_header_parser < 0 ? DBG_IMPORTANT : 5;
 272 }
 273
 274 // BWS = *( SP / HTAB ) ; WhitespaceCharacters() may relax this RFC 7230 rule
 275 bool
 276 Http::One::ParseBws(Tokenizer &tok)
 277 {
 278     if (const auto count = tok.skipAll(Parser::WhitespaceCharacters())) {
 279         // Generating BWS is a MUST-level violation so warn about it as needed.
 280         debugs(33, ErrorLevel(), "found " << count << " BWS octets");
 281         // RFC 7230 says we MUST parse BWS, so we fall through even if
 282         // Config.onoff.relaxed_header_parser is off.
 283     }
 284     // else we successfully "parsed" an empty BWS sequence
 285
 286     return true;
 287 }
 288