src/http/one/RequestParser.cc

   1 /*
   2  * Copyright (C) 1996-2017 The Squid Software Foundation and contributors
   3  *
   4  * Squid software is distributed under GPLv2+ license and includes
   5  * contributions from numerous individuals and organizations.
   6  * Please see the COPYING and CONTRIBUTORS files for details.
   7  */
   8
   9 #include "squid.h"
  10 #include "Debug.h"
  11 #include "http/one/RequestParser.h"
  12 #include "http/one/Tokenizer.h"
  13 #include "http/ProtocolVersion.h"
  14 #include "profiler/Profiler.h"
  15 #include "SquidConfig.h"
  16
  17 // the right debugs() level for parsing errors
  18 inline static int
  19 ErrorLevel() {
  20     return Config.onoff.relaxed_header_parser < 0 ? DBG_IMPORTANT : 5;
  21 }
  22
  23 Http::One::RequestParser::RequestParser(bool preserveParsed) :
  24     Parser(),
  25     preserveParsed_(preserveParsed)
  26 {}
  27
  28 Http1::Parser::size_type
  29 Http::One::RequestParser::firstLineSize() const
  30 {
  31     // RFC 7230 section 2.6
  32     /* method SP request-target SP "HTTP/" DIGIT "." DIGIT CRLF */
  33     return method_.image().length() + uri_.length() + 12;
  34 }
  35
  36 /**
  37  * Attempt to parse the first line of a new request message.
  38  *
  39  * Governed by RFC 7230 section 3.5
  40  *  "
  41  *    In the interest of robustness, a server that is expecting to receive
  42  *    and parse a request-line SHOULD ignore at least one empty line (CRLF)
  43  *    received prior to the request-line.
  44  *  "
  45  *
  46  * Parsing state is stored between calls to avoid repeating buffer scans.
  47  * If garbage is found the parsing offset is incremented.
  48  */
  49 void
  50 Http::One::RequestParser::skipGarbageLines()
  51 {
  52     if (Config.onoff.relaxed_header_parser) {
  53         if (Config.onoff.relaxed_header_parser < 0 && (buf_[0] == '\r' || buf_[0] == '\n'))
  54             debugs(74, DBG_IMPORTANT, "WARNING: Invalid HTTP Request: " <<
  55                    "CRLF bytes received ahead of request-line. " <<
  56                    "Ignored due to relaxed_header_parser.");
  57         // Be tolerant of prefix empty lines
  58         // ie any series of either \n or \r\n with no other characters and no repeated \r
  59         while (!buf_.isEmpty() && (buf_[0] == '\n' || (buf_[0] == '\r' && buf_[1] == '\n'))) {
  60             buf_.consume(1);
  61         }
  62     }
  63 }
  64
  65 /**
  66  * Attempt to parse the method field out of an HTTP message request-line.
  67  *
  68  * Governed by:
  69  *  RFC 1945 section 5.1
  70  *  RFC 7230 section 2.6, 3.1 and 3.5
  71  */
  72 bool
  73 Http::One::RequestParser::parseMethodField(Http1::Tokenizer &tok)
  74 {
  75     // method field is a sequence of TCHAR.
  76     // Limit to 32 characters to prevent overly long sequences of non-HTTP
  77     // being sucked in before mismatch is detected. 32 is itself annoyingly
  78     // big but there are methods registered by IANA that reach 17 bytes:
  79     //  http://www.iana.org/assignments/http-methods
  80     static const size_t maxMethodLength = 32; // TODO: make this configurable?
  81
  82     SBuf methodFound;
  83     if (!tok.prefix(methodFound, CharacterSet::TCHAR, maxMethodLength)) {
  84         debugs(33, ErrorLevel(), "invalid request-line: missing or malformed method");
  85         parseStatusCode = Http::scBadRequest;
  86         return false;
  87     }
  88     method_ = HttpRequestMethod(methodFound);
  89
  90     if (!skipDelimiter(tok.skipAll(DelimiterCharacters()), "after method"))
  91         return false;
  92
  93     return true;
  94 }
  95
  96 /// the characters which truly are valid within URI
  97 static const CharacterSet &
  98 UriValidCharacters()
  99 {
 100     /* RFC 3986 section 2:
 101      * "
 102      *   A URI is composed from a limited set of characters consisting of
 103      *   digits, letters, and a few graphic symbols.
 104      * "
 105      */
 106     static const CharacterSet UriChars =
 107         CharacterSet("URI-Chars","") +
 108         // RFC 3986 section 2.2 - reserved characters
 109         CharacterSet("gen-delims", ":/?#[]@") +
 110         CharacterSet("sub-delims", "!$&'()*+,;=") +
 111         // RFC 3986 section 2.3 - unreserved characters
 112         CharacterSet::ALPHA +
 113         CharacterSet::DIGIT +
 114         CharacterSet("unreserved", "-._~") +
 115         // RFC 3986 section 2.1 - percent encoding "%" HEXDIG
 116         CharacterSet("pct-encoded", "%") +
 117         CharacterSet::HEXDIG;
 118
 119     return UriChars;
 120 }
 121
 122 /// characters which Squid will accept in the HTTP request-target (URI)
 123 const CharacterSet &
 124 Http::One::RequestParser::RequestTargetCharacters()
 125 {
 126     if (Config.onoff.relaxed_header_parser) {
 127 #if USE_HTTP_VIOLATIONS
 128         static const CharacterSet RelaxedExtended =
 129             UriValidCharacters() +
 130             // accept whitespace (extended), it will be dealt with later
 131             DelimiterCharacters() +
 132             // RFC 2396 unwise character set which must never be transmitted
 133             // in un-escaped form. But many web services do anyway.
 134             CharacterSet("RFC2396-unwise","\"\\|^<>`{}") +
 135             // UTF-8 because we want to be future-proof
 136             CharacterSet("UTF-8", 128, 255);
 137
 138         return RelaxedExtended;
 139 #else
 140         static const CharacterSet RelaxedCompliant =
 141             UriValidCharacters() +
 142             // accept whitespace (extended), it will be dealt with later.
 143             DelimiterCharacters();
 144
 145         return RelaxedCompliant;
 146 #endif
 147     }
 148
 149     // strict parse only accepts what the RFC say we can
 150     return UriValidCharacters();
 151 }
 152
 153 bool
 154 Http::One::RequestParser::parseUriField(Http1::Tokenizer &tok)
 155 {
 156     /* Arbitrary 64KB URI upper length limit.
 157      *
 158      * Not quite as arbitrary as it seems though. Old SquidString objects
 159      * cannot store strings larger than 64KB, so we must limit until they
 160      * have all been replaced with SBuf.
 161      *
 162      * Not that it matters but RFC 7230 section 3.1.1 requires (RECOMMENDED)
 163      * at least 8000 octets for the whole line, including method and version.
 164      */
 165     const size_t maxUriLength = static_cast<size_t>((64*1024)-1);
 166
 167     SBuf uriFound;
 168     if (!tok.prefix(uriFound, RequestTargetCharacters())) {
 169         parseStatusCode = Http::scBadRequest;
 170         debugs(33, ErrorLevel(), "invalid request-line: missing or malformed URI");
 171         return false;
 172     }
 173
 174     if (uriFound.length() > maxUriLength) {
 175         // RFC 7230 section 3.1.1 mandatory (MUST) 414 response
 176         parseStatusCode = Http::scUriTooLong;
 177         debugs(33, ErrorLevel(), "invalid request-line: " << uriFound.length() <<
 178                "-byte URI exceeds " << maxUriLength << "-byte limit");
 179         return false;
 180     }
 181
 182     uri_ = uriFound;
 183     return true;
 184 }
 185
 186 bool
 187 Http::One::RequestParser::parseHttpVersionField(Http1::Tokenizer &tok)
 188 {
 189     static const SBuf http1p0("HTTP/1.0");
 190     static const SBuf http1p1("HTTP/1.1");
 191     const auto savedTok = tok;
 192
 193     // Optimization: Expect (and quickly parse) HTTP/1.1 or HTTP/1.0 in
 194     // the vast majority of cases.
 195     if (tok.skipSuffix(http1p1)) {
 196         msgProtocol_ = Http::ProtocolVersion(1, 1);
 197         return true;
 198     } else if (tok.skipSuffix(http1p0)) {
 199         msgProtocol_ = Http::ProtocolVersion(1, 0);
 200         return true;
 201     } else {
 202         // RFC 7230 section 2.6:
 203         // HTTP-version  = HTTP-name "/" DIGIT "." DIGIT
 204         static const CharacterSet period("Decimal point", ".");
 205         static const SBuf proto("HTTP/");
 206         SBuf majorDigit;
 207         SBuf minorDigit;
 208         if (tok.suffix(minorDigit, CharacterSet::DIGIT) &&
 209                 tok.skipOneTrailing(period) &&
 210                 tok.suffix(majorDigit, CharacterSet::DIGIT) &&
 211                 tok.skipSuffix(proto)) {
 212             const bool multiDigits = majorDigit.length() > 1 || minorDigit.length() > 1;
 213             // use '0.0' for unsupported multiple digit version numbers
 214             const unsigned int major = multiDigits ? 0 : (*majorDigit.rawContent() - '0');
 215             const unsigned int minor = multiDigits ? 0 : (*minorDigit.rawContent() - '0');
 216             msgProtocol_ = Http::ProtocolVersion(major, minor);
 217             return true;
 218         }
 219     }
 220
 221     // A GET request might use HTTP/0.9 syntax
 222     if (method_ == Http::METHOD_GET) {
 223         // RFC 1945 - no HTTP version field at all
 224         tok = savedTok; // in case the URI ends with a digit
 225         // report this assumption as an error if configured to triage parsing
 226         debugs(33, ErrorLevel(), "assuming HTTP/0.9 request-line");
 227         msgProtocol_ = Http::ProtocolVersion(0,9);
 228         return true;
 229     }
 230
 231     debugs(33, ErrorLevel(), "invalid request-line: not HTTP");
 232     parseStatusCode = Http::scBadRequest;
 233     return false;
 234 }
 235
 236 /**
 237  * Skip characters separating request-line fields.
 238  * To handle bidirectional parsing, the caller does the actual skipping and
 239  * we just check how many character the caller has skipped.
 240  */
 241 bool
 242 Http::One::RequestParser::skipDelimiter(const size_t count, const char *where)
 243 {
 244     if (count <= 0) {
 245         debugs(33, ErrorLevel(), "invalid request-line: missing delimiter " << where);
 246         parseStatusCode = Http::scBadRequest;
 247         return false;
 248     }
 249
 250     // tolerant parser allows multiple whitespace characters between request-line fields
 251     if (count > 1 && !Config.onoff.relaxed_header_parser) {
 252         debugs(33, ErrorLevel(), "invalid request-line: too many delimiters " << where);
 253         parseStatusCode = Http::scBadRequest;
 254         return false;
 255     }
 256
 257     return true;
 258 }
 259
 260 /// Parse CRs at the end of request-line, just before the terminating LF.
 261 bool
 262 Http::One::RequestParser::skipTrailingCrs(Http1::Tokenizer &tok)
 263 {
 264     if (Config.onoff.relaxed_header_parser) {
 265         (void)tok.skipAllTrailing(CharacterSet::CR); // optional; multiple OK
 266     } else {
 267         if (!tok.skipOneTrailing(CharacterSet::CR)) {
 268             debugs(33, ErrorLevel(), "invalid request-line: missing CR before LF");
 269             parseStatusCode = Http::scBadRequest;
 270             return false;
 271         }
 272     }
 273     return true;
 274 }
 275
 276 /**
 277  * Attempt to parse the first line of a new request message.
 278  *
 279  * Governed by:
 280  *  RFC 1945 section 5.1
 281  *  RFC 7230 section 2.6, 3.1 and 3.5
 282  *
 283  * \retval -1  an error occurred. parseStatusCode indicates HTTP status result.
 284  * \retval  1  successful parse. member fields contain the request-line items
 285  * \retval  0  more data is needed to complete the parse
 286  */
 287 int
 288 Http::One::RequestParser::parseRequestFirstLine()
 289 {
 290     debugs(74, 5, "parsing possible request: buf.length=" << buf_.length());
 291     debugs(74, DBG_DATA, buf_);
 292
 293     SBuf line;
 294
 295     // Earlier, skipGarbageLines() took care of any leading LFs (if allowed).
 296     // Now, the request line has to end at the first LF.
 297     static const CharacterSet lineChars = CharacterSet::LF.complement("notLF");
 298     ::Parser::Tokenizer lineTok(buf_);
 299     if (!lineTok.prefix(line, lineChars) || !lineTok.skip('\n')) {
 300         if (buf_.length() >= Config.maxRequestHeaderSize) {
 301             /* who should we blame for our failure to parse this line? */
 302
 303             Http1::Tokenizer methodTok(buf_);
 304             if (!parseMethodField(methodTok))
 305                 return -1; // blame a bad method (or its delimiter)
 306
 307             // assume it is the URI
 308             debugs(74, ErrorLevel(), "invalid request-line: URI exceeds " <<
 309                    Config.maxRequestHeaderSize << "-byte limit");
 310             parseStatusCode = Http::scUriTooLong;
 311             return -1;
 312         }
 313         debugs(74, 5, "Parser needs more data");
 314         return 0;
 315     }
 316
 317     Http1::Tokenizer tok(line);
 318
 319     if (!parseMethodField(tok))
 320         return -1;
 321
 322     /* now parse backwards, to leave just the URI */
 323     if (!skipTrailingCrs(tok))
 324         return -1;
 325
 326     if (!parseHttpVersionField(tok))
 327         return -1;
 328
 329     if (!http0() && !skipDelimiter(tok.skipAllTrailing(DelimiterCharacters()), "before protocol version"))
 330         return -1;
 331
 332     /* parsed everything before and after the URI */
 333
 334     if (!parseUriField(tok))
 335         return -1;
 336
 337     if (!tok.atEnd()) {
 338         debugs(33, ErrorLevel(), "invalid request-line: garbage after URI");
 339         parseStatusCode = Http::scBadRequest;
 340         return -1;
 341     }
 342
 343     parseStatusCode = Http::scOkay;
 344     buf_ = lineTok.remaining(); // incremental parse checkpoint
 345     return 1;
 346 }
 347
 348 bool
 349 Http::One::RequestParser::parse(const SBuf &aBuf)
 350 {
 351     const bool result = doParse(aBuf);
 352     if (preserveParsed_) {
 353         assert(aBuf.length() >= remaining().length());
 354         parsed_.append(aBuf.substr(0, aBuf.length() - remaining().length())); // newly parsed bytes
 355     }
 356
 357     return result;
 358 }
 359
 360 // raw is not a reference because a reference might point back to our own buf_ or parsed_
 361 bool
 362 Http::One::RequestParser::doParse(const SBuf &aBuf)
 363 {
 364     buf_ = aBuf;
 365     debugs(74, DBG_DATA, "Parse buf={length=" << aBuf.length() << ", data='" << aBuf << "'}");
 366
 367     // stage 1: locate the request-line
 368     if (parsingStage_ == HTTP_PARSE_NONE) {
 369         skipGarbageLines();
 370
 371         // if we hit something before EOS treat it as a message
 372         if (!buf_.isEmpty())
 373             parsingStage_ = HTTP_PARSE_FIRST;
 374         else
 375             return false;
 376     }
 377
 378     // stage 2: parse the request-line
 379     if (parsingStage_ == HTTP_PARSE_FIRST) {
 380         PROF_start(HttpParserParseReqLine);
 381         const int retcode = parseRequestFirstLine();
 382
 383         // first-line (or a look-alike) found successfully.
 384         if (retcode > 0) {
 385             parsingStage_ = HTTP_PARSE_MIME;
 386         }
 387
 388         debugs(74, 5, "request-line: retval " << retcode << ": line={" << aBuf.length() << ", data='" << aBuf << "'}");
 389         debugs(74, 5, "request-line: method: " << method_);
 390         debugs(74, 5, "request-line: url: " << uri_);
 391         debugs(74, 5, "request-line: proto: " << msgProtocol_);
 392         debugs(74, 5, "Parser: bytes processed=" << (aBuf.length()-buf_.length()));
 393         PROF_stop(HttpParserParseReqLine);
 394
 395         // syntax errors already
 396         if (retcode < 0) {
 397             parsingStage_ = HTTP_PARSE_DONE;
 398             return false;
 399         }
 400     }
 401
 402     // stage 3: locate the mime header block
 403     if (parsingStage_ == HTTP_PARSE_MIME) {
 404         // HTTP/1.x request-line is valid and parsing completed.
 405         if (!grabMimeBlock("Request", Config.maxRequestHeaderSize)) {
 406             if (parseStatusCode == Http::scHeaderTooLarge)
 407                 parseStatusCode = Http::scRequestHeaderFieldsTooLarge;
 408             return false;
 409         }
 410     }
 411
 412     return !needsMoreData();
 413 }
 414