src/http/one/RequestParser.cc

   1 /*
   2  * Copyright (C) 1996-2016 The Squid Software Foundation and contributors
   3  *
   4  * Squid software is distributed under GPLv2+ license and includes
   5  * contributions from numerous individuals and organizations.
   6  * Please see the COPYING and CONTRIBUTORS files for details.
   7  */
   8
   9 #include "squid.h"
  10 #include "Debug.h"
  11 #include "http/one/RequestParser.h"
  12 #include "http/one/Tokenizer.h"
  13 #include "http/ProtocolVersion.h"
  14 #include "profiler/Profiler.h"
  15 #include "SquidConfig.h"
  16
  17 // the right debugs() level for parsing errors
  18 inline static int
  19 ErrorLevel() {
  20     return Config.onoff.relaxed_header_parser < 0 ? DBG_IMPORTANT : 5;
  21 }
  22
  23 Http::One::RequestParser::RequestParser() :
  24     Parser()
  25 {}
  26
  27 Http1::Parser::size_type
  28 Http::One::RequestParser::firstLineSize() const
  29 {
  30     // RFC 7230 section 2.6
  31     /* method SP request-target SP "HTTP/" DIGIT "." DIGIT CRLF */
  32     return method_.image().length() + uri_.length() + 12;
  33 }
  34
  35 /**
  36  * Attempt to parse the first line of a new request message.
  37  *
  38  * Governed by RFC 7230 section 3.5
  39  *  "
  40  *    In the interest of robustness, a server that is expecting to receive
  41  *    and parse a request-line SHOULD ignore at least one empty line (CRLF)
  42  *    received prior to the request-line.
  43  *  "
  44  *
  45  * Parsing state is stored between calls to avoid repeating buffer scans.
  46  * If garbage is found the parsing offset is incremented.
  47  */
  48 void
  49 Http::One::RequestParser::skipGarbageLines()
  50 {
  51     if (Config.onoff.relaxed_header_parser) {
  52         if (Config.onoff.relaxed_header_parser < 0 && (buf_[0] == '\r' || buf_[0] == '\n'))
  53             debugs(74, DBG_IMPORTANT, "WARNING: Invalid HTTP Request: " <<
  54                    "CRLF bytes received ahead of request-line. " <<
  55                    "Ignored due to relaxed_header_parser.");
  56         // Be tolerant of prefix empty lines
  57         // ie any series of either \n or \r\n with no other characters and no repeated \r
  58         while (!buf_.isEmpty() && (buf_[0] == '\n' || (buf_[0] == '\r' && buf_[1] == '\n'))) {
  59             buf_.consume(1);
  60         }
  61     }
  62 }
  63
  64 /**
  65  * Attempt to parse the method field out of an HTTP message request-line.
  66  *
  67  * Governed by:
  68  *  RFC 1945 section 5.1
  69  *  RFC 7230 section 2.6, 3.1 and 3.5
  70  */
  71 bool
  72 Http::One::RequestParser::parseMethodField(Http1::Tokenizer &tok)
  73 {
  74     // method field is a sequence of TCHAR.
  75     // Limit to 32 characters to prevent overly long sequences of non-HTTP
  76     // being sucked in before mismatch is detected. 32 is itself annoyingly
  77     // big but there are methods registered by IANA that reach 17 bytes:
  78     //  http://www.iana.org/assignments/http-methods
  79     static const size_t maxMethodLength = 32; // TODO: make this configurable?
  80
  81     SBuf methodFound;
  82     if (!tok.prefix(methodFound, CharacterSet::TCHAR, maxMethodLength)) {
  83         debugs(33, ErrorLevel(), "invalid request-line: missing or malformed method");
  84         parseStatusCode = Http::scBadRequest;
  85         return false;
  86     }
  87     method_ = HttpRequestMethod(methodFound);
  88
  89     if (!skipDelimiter(tok.skipAll(DelimiterCharacters()), "after method"))
  90         return false;
  91
  92     return true;
  93 }
  94
  95 /// the characters which truly are valid within URI
  96 static const CharacterSet &
  97 UriValidCharacters()
  98 {
  99     /* RFC 3986 section 2:
 100      * "
 101      *   A URI is composed from a limited set of characters consisting of
 102      *   digits, letters, and a few graphic symbols.
 103      * "
 104      */
 105     static const CharacterSet UriChars =
 106         CharacterSet("URI-Chars","") +
 107         // RFC 3986 section 2.2 - reserved characters
 108         CharacterSet("gen-delims", ":/?#[]@") +
 109         CharacterSet("sub-delims", "!$&'()*+,;=") +
 110         // RFC 3986 section 2.3 - unreserved characters
 111         CharacterSet::ALPHA +
 112         CharacterSet::DIGIT +
 113         CharacterSet("unreserved", "-._~") +
 114         // RFC 3986 section 2.1 - percent encoding "%" HEXDIG
 115         CharacterSet("pct-encoded", "%") +
 116         CharacterSet::HEXDIG;
 117
 118     return UriChars;
 119 }
 120
 121 /// characters which Squid will accept in the HTTP request-target (URI)
 122 const CharacterSet &
 123 Http::One::RequestParser::RequestTargetCharacters()
 124 {
 125     if (Config.onoff.relaxed_header_parser) {
 126 #if USE_HTTP_VIOLATIONS
 127         static const CharacterSet RelaxedExtended =
 128             UriValidCharacters() +
 129             // accept whitespace (extended), it will be dealt with later
 130             DelimiterCharacters() +
 131             // RFC 2396 unwise character set which must never be transmitted
 132             // in un-escaped form. But many web services do anyway.
 133             CharacterSet("RFC2396-unwise","\"\\|^<>`{}") +
 134             // UTF-8 because we want to be future-proof
 135             CharacterSet("UTF-8", 128, 255);
 136
 137         return RelaxedExtended;
 138 #else
 139         static const CharacterSet RelaxedCompliant =
 140             UriValidCharacters() +
 141             // accept whitespace (extended), it will be dealt with later.
 142             DelimiterCharacters();
 143
 144         return RelaxedCompliant;
 145 #endif
 146     }
 147
 148     // strict parse only accepts what the RFC say we can
 149     return UriValidCharacters();
 150 }
 151
 152 bool
 153 Http::One::RequestParser::parseUriField(Http1::Tokenizer &tok)
 154 {
 155     /* Arbitrary 64KB URI upper length limit.
 156      *
 157      * Not quite as arbitrary as it seems though. Old SquidString objects
 158      * cannot store strings larger than 64KB, so we must limit until they
 159      * have all been replaced with SBuf.
 160      *
 161      * Not that it matters but RFC 7230 section 3.1.1 requires (RECOMMENDED)
 162      * at least 8000 octets for the whole line, including method and version.
 163      */
 164     const size_t maxUriLength = static_cast<size_t>((64*1024)-1);
 165
 166     SBuf uriFound;
 167     if (!tok.prefix(uriFound, RequestTargetCharacters())) {
 168         parseStatusCode = Http::scBadRequest;
 169         debugs(33, ErrorLevel(), "invalid request-line: missing or malformed URI");
 170         return false;
 171     }
 172
 173     if (uriFound.length() > maxUriLength) {
 174         // RFC 7230 section 3.1.1 mandatory (MUST) 414 response
 175         parseStatusCode = Http::scUriTooLong;
 176         debugs(33, ErrorLevel(), "invalid request-line: " << uriFound.length() <<
 177                "-byte URI exceeds " << maxUriLength << "-byte limit");
 178         return false;
 179     }
 180
 181     uri_ = uriFound;
 182     return true;
 183 }
 184
 185 bool
 186 Http::One::RequestParser::parseHttpVersionField(Http1::Tokenizer &tok)
 187 {
 188     const auto savedTok = tok;
 189
 190     SBuf digit;
 191     // Searching for Http1magic precludes detecting HTTP/2+ versions.
 192     // Rewrite if we ever _need_ to return 505 (Version Not Supported) errors.
 193     if (tok.suffix(digit, CharacterSet::DIGIT) && tok.skipSuffix(Http1magic)) {
 194         msgProtocol_ = Http::ProtocolVersion(1, (*digit.rawContent() - '0'));
 195         return true;
 196     }
 197
 198     // A GET request might use HTTP/0.9 syntax
 199     if (method_ == Http::METHOD_GET) {
 200         // RFC 1945 - no HTTP version field at all
 201         tok = savedTok; // in case the URI ends with a digit
 202         // report this assumption as an error if configured to triage parsing
 203         debugs(33, ErrorLevel(), "assuming HTTP/0.9 request-line");
 204         msgProtocol_ = Http::ProtocolVersion(0,9);
 205         return true;
 206     }
 207
 208     debugs(33, ErrorLevel(), "invalid request-line: not HTTP");
 209     parseStatusCode = Http::scBadRequest;
 210     return false;
 211 }
 212
 213 /**
 214  * Skip characters separating request-line fields.
 215  * To handle bidirectional parsing, the caller does the actual skipping and
 216  * we just check how many character the caller has skipped.
 217  */
 218 bool
 219 Http::One::RequestParser::skipDelimiter(const size_t count, const char *where)
 220 {
 221     if (count <= 0) {
 222         debugs(33, ErrorLevel(), "invalid request-line: missing delimiter " << where);
 223         parseStatusCode = Http::scBadRequest;
 224         return false;
 225     }
 226
 227     // tolerant parser allows multiple whitespace characters between request-line fields
 228     if (count > 1 && !Config.onoff.relaxed_header_parser) {
 229         debugs(33, ErrorLevel(), "invalid request-line: too many delimiters " << where);
 230         parseStatusCode = Http::scBadRequest;
 231         return false;
 232     }
 233
 234     return true;
 235 }
 236
 237 /// Parse CRs at the end of request-line, just before the terminating LF.
 238 bool
 239 Http::One::RequestParser::skipTrailingCrs(Http1::Tokenizer &tok)
 240 {
 241     if (Config.onoff.relaxed_header_parser) {
 242         (void)tok.skipAllTrailing(CharacterSet::CR); // optional; multiple OK
 243     } else {
 244         if (!tok.skipOneTrailing(CharacterSet::CR)) {
 245             debugs(33, ErrorLevel(), "invalid request-line: missing CR before LF");
 246             parseStatusCode = Http::scBadRequest;
 247             return false;
 248         }
 249     }
 250     return true;
 251 }
 252
 253 /**
 254  * Attempt to parse the first line of a new request message.
 255  *
 256  * Governed by:
 257  *  RFC 1945 section 5.1
 258  *  RFC 7230 section 2.6, 3.1 and 3.5
 259  *
 260  * \retval -1  an error occurred. parseStatusCode indicates HTTP status result.
 261  * \retval  1  successful parse. member fields contain the request-line items
 262  * \retval  0  more data is needed to complete the parse
 263  */
 264 int
 265 Http::One::RequestParser::parseRequestFirstLine()
 266 {
 267     debugs(74, 5, "parsing possible request: buf.length=" << buf_.length());
 268     debugs(74, DBG_DATA, buf_);
 269
 270     SBuf line;
 271
 272     // Earlier, skipGarbageLines() took care of any leading LFs (if allowed).
 273     // Now, the request line has to end at the first LF.
 274     static const CharacterSet lineChars = CharacterSet::LF.complement("notLF");
 275     ::Parser::Tokenizer lineTok(buf_);
 276     if (!lineTok.prefix(line, lineChars) || !lineTok.skip('\n')) {
 277         if (buf_.length() >= Config.maxRequestHeaderSize) {
 278             /* who should we blame for our failure to parse this line? */
 279
 280             Http1::Tokenizer methodTok(buf_);
 281             if (!parseMethodField(methodTok))
 282                 return -1; // blame a bad method (or its delimiter)
 283
 284             // assume it is the URI
 285             debugs(74, ErrorLevel(), "invalid request-line: URI exceeds " <<
 286                    Config.maxRequestHeaderSize << "-byte limit");
 287             parseStatusCode = Http::scUriTooLong;
 288             return -1;
 289         }
 290         debugs(74, 5, "Parser needs more data");
 291         return 0;
 292     }
 293
 294     Http1::Tokenizer tok(line);
 295
 296     if (!parseMethodField(tok))
 297         return -1;
 298
 299     /* now parse backwards, to leave just the URI */
 300     if (!skipTrailingCrs(tok))
 301         return -1;
 302
 303     if (!parseHttpVersionField(tok))
 304         return -1;
 305
 306     if (!http0() && !skipDelimiter(tok.skipAllTrailing(DelimiterCharacters()), "before protocol version"))
 307         return -1;
 308
 309     /* parsed everything before and after the URI */
 310
 311     if (!parseUriField(tok))
 312         return -1;
 313
 314     if (!tok.atEnd()) {
 315         debugs(33, ErrorLevel(), "invalid request-line: garbage after URI");
 316         parseStatusCode = Http::scBadRequest;
 317         return -1;
 318     }
 319
 320     parseStatusCode = Http::scOkay;
 321     buf_ = lineTok.remaining(); // incremental parse checkpoint
 322     return 1;
 323 }
 324
 325 bool
 326 Http::One::RequestParser::parse(const SBuf &aBuf)
 327 {
 328     buf_ = aBuf;
 329     debugs(74, DBG_DATA, "Parse buf={length=" << aBuf.length() << ", data='" << aBuf << "'}");
 330
 331     // stage 1: locate the request-line
 332     if (parsingStage_ == HTTP_PARSE_NONE) {
 333         skipGarbageLines();
 334
 335         // if we hit something before EOS treat it as a message
 336         if (!buf_.isEmpty())
 337             parsingStage_ = HTTP_PARSE_FIRST;
 338         else
 339             return false;
 340     }
 341
 342     // stage 2: parse the request-line
 343     if (parsingStage_ == HTTP_PARSE_FIRST) {
 344         PROF_start(HttpParserParseReqLine);
 345         const int retcode = parseRequestFirstLine();
 346
 347         // first-line (or a look-alike) found successfully.
 348         if (retcode > 0) {
 349             parsingStage_ = HTTP_PARSE_MIME;
 350         }
 351
 352         debugs(74, 5, "request-line: retval " << retcode << ": line={" << aBuf.length() << ", data='" << aBuf << "'}");
 353         debugs(74, 5, "request-line: method: " << method_);
 354         debugs(74, 5, "request-line: url: " << uri_);
 355         debugs(74, 5, "request-line: proto: " << msgProtocol_);
 356         debugs(74, 5, "Parser: bytes processed=" << (aBuf.length()-buf_.length()));
 357         PROF_stop(HttpParserParseReqLine);
 358
 359         // syntax errors already
 360         if (retcode < 0) {
 361             parsingStage_ = HTTP_PARSE_DONE;
 362             return false;
 363         }
 364     }
 365
 366     // stage 3: locate the mime header block
 367     if (parsingStage_ == HTTP_PARSE_MIME) {
 368         // HTTP/1.x request-line is valid and parsing completed.
 369         if (!grabMimeBlock("Request", Config.maxRequestHeaderSize)) {
 370             if (parseStatusCode == Http::scHeaderTooLarge)
 371                 parseStatusCode = Http::scRequestHeaderFieldsTooLarge;
 372             return false;
 373         }
 374     }
 375
 376     return !needsMoreData();
 377 }
 378