src/http/one/RequestParser.cc

   1 /*
   2  * Copyright (C) 1996-2015 The Squid Software Foundation and contributors
   3  *
   4  * Squid software is distributed under GPLv2+ license and includes
   5  * contributions from numerous individuals and organizations.
   6  * Please see the COPYING and CONTRIBUTORS files for details.
   7  */
   8
   9 #include "squid.h"
  10 #include "Debug.h"
  11 #include "http/one/RequestParser.h"
  12 #include "http/ProtocolVersion.h"
  13 #include "mime_header.h"
  14 #include "parser/Tokenizer.h"
  15 #include "profiler/Profiler.h"
  16 #include "SquidConfig.h"
  17
  18 Http::One::RequestParser::RequestParser() :
  19     Parser(),
  20     request_parse_status(Http::scNone),
  21     firstLineGarbage_(0)
  22 {}
  23
  24 Http1::Parser::size_type
  25 Http::One::RequestParser::firstLineSize() const
  26 {
  27     // RFC 7230 section 2.6
  28     /* method SP request-target SP "HTTP/" DIGIT "." DIGIT CRLF */
  29     return method_.image().length() + uri_.length() + 12;
  30 }
  31
  32 /**
  33  * Attempt to parse the first line of a new request message.
  34  *
  35  * Governed by RFC 7230 section 3.5
  36  *  "
  37  *    In the interest of robustness, a server that is expecting to receive
  38  *    and parse a request-line SHOULD ignore at least one empty line (CRLF)
  39  *    received prior to the request-line.
  40  *  "
  41  *
  42  * Parsing state is stored between calls to avoid repeating buffer scans.
  43  * If garbage is found the parsing offset is incremented.
  44  */
  45 void
  46 Http::One::RequestParser::skipGarbageLines()
  47 {
  48     if (Config.onoff.relaxed_header_parser) {
  49         if (Config.onoff.relaxed_header_parser < 0 && (buf_[0] == '\r' || buf_[0] == '\n'))
  50             debugs(74, DBG_IMPORTANT, "WARNING: Invalid HTTP Request: " <<
  51                    "CRLF bytes received ahead of request-line. " <<
  52                    "Ignored due to relaxed_header_parser.");
  53         // Be tolerant of prefix empty lines
  54         // ie any series of either \n or \r\n with no other characters and no repeated \r
  55         while (!buf_.isEmpty() && (buf_[0] == '\n' || (buf_[0] == '\r' && buf_[1] == '\n'))) {
  56             buf_.consume(1);
  57         }
  58     }
  59 }
  60
  61 /**
  62  * Attempt to parse the method field out of an HTTP message request-line.
  63  *
  64  * Governed by:
  65  *  RFC 1945 section 5.1
  66  *  RFC 7230 section 2.6, 3.1 and 3.5
  67  *
  68  * Parsing state is stored between calls. The current implementation uses
  69  * checkpoints after each successful request-line field.
  70  * The return value tells you whether the parsing is completed or not.
  71  *
  72  * \retval -1  an error occurred. request_parse_status indicates HTTP status result.
  73  * \retval  1  successful parse. method_ is filled and buffer consumed including first delimiter.
  74  * \retval  0  more data is needed to complete the parse
  75  */
  76 int
  77 Http::One::RequestParser::parseMethodField(::Parser::Tokenizer &tok, const CharacterSet &WspDelim)
  78 {
  79     // scan for up to 16 valid method characters.
  80     static const size_t maxMethodLength = 16; // TODO: make this configurable?
  81
  82     // method field is a sequence of TCHAR.
  83     SBuf methodFound;
  84     if (tok.prefix(methodFound, CharacterSet::TCHAR, maxMethodLength) && tok.skipOne(WspDelim)) {
  85
  86         method_ = HttpRequestMethod(methodFound);
  87         buf_ = tok.remaining(); // incremental parse checkpoint
  88         return 1;
  89
  90     } else if (tok.atEnd()) {
  91         debugs(74, 5, "Parser needs more data to find method");
  92         return 0;
  93
  94     } // else error(s)
  95
  96     // non-delimiter found after accepted method bytes means ...
  97     if (methodFound.length() == maxMethodLength) {
  98         // method longer than acceptible.
  99         // RFC 7230 section 3.1.1 mandatory (SHOULD) 501 response
 100         request_parse_status = Http::scNotImplemented;
 101         debugs(33, 5, "invalid request-line. method too long");
 102     } else {
 103         // invalid character in the URL
 104         // RFC 7230 section 3.1.1 required (SHOULD) 400 response
 105         request_parse_status = Http::scBadRequest;
 106         debugs(33, 5, "invalid request-line. missing method delimiter");
 107     }
 108     return -1;
 109 }
 110
 111 int
 112 Http::One::RequestParser::parseUriField(::Parser::Tokenizer &tok, const CharacterSet &WspDelim)
 113 {
 114     // URI field is a sequence of ... what? segments all have different valid charset
 115     // go with non-whitespace non-binary characters for now
 116     static CharacterSet UriChars("URI-Chars","");
 117     if (!UriChars['a']) { // if it needs initializing...
 118         /* RFC 3986 section 2:
 119          * "
 120          *   A URI is composed from a limited set of characters consisting of
 121          *   digits, letters, and a few graphic symbols.
 122          * "
 123          */
 124         // RFC 3986 section 2.1 - percent encoding "%" HEXDIG
 125         UriChars.add('%');
 126         UriChars += CharacterSet::HEXDIG;
 127         // RFC 3986 section 2.2 - reserved characters
 128         UriChars += CharacterSet("gen-delims", ":/?#[]@");
 129         UriChars += CharacterSet("sub-delims", "!$&'()*+,;=");
 130         // RFC 3986 section 2.3 - unreserved characters
 131         UriChars += CharacterSet::ALPHA;
 132         UriChars += CharacterSet::DIGIT;
 133         UriChars += CharacterSet("unreserved", "-._~");
 134     }
 135
 136     /* Arbitrary 64KB URI upper length limit.
 137      *
 138      * Not quite as arbitrary as it seems though. Old SquidString objects
 139      * cannot store strings larger than 64KB, so we must limit until they
 140      * have all been replaced with SBuf.
 141      *
 142      * Not that it matters but RFC 7230 section 3.1.1 requires (RECOMMENDED)
 143      * at least 8000 octets for the whole line, including method and version.
 144      */
 145     const size_t maxUriLength = min(static_cast<size_t>(Config.maxRequestHeaderSize) - firstLineSize(),
 146                                     static_cast<size_t>((64*1024)-1));
 147
 148     SBuf uriFound;
 149     if (!tok.prefix(uriFound, UriChars, maxUriLength)) {
 150         // NP: prefix() returns true if it finds ANY valid chars
 151         debugs(33, 5, "invalid request-line. missing URL");
 152         request_parse_status = Http::scBadRequest;
 153         return -1;
 154     }
 155
 156     /* NOTE: we do have to check for token/state in this order.
 157      * Because RFC 7230 tolerant parse accepts CR as a whitespace
 158      * delimiter in HTTP/1.1 and we may not yet have the LF final
 159      * terminator character on HTTP/0.9 simple-request lines.
 160      */
 161
 162     // RFC 1945 - for GET the line terminator may follow URL instead of a delimiter
 163     if (method_ == Http::METHOD_GET && skipLineTerminator(tok)) {
 164         debugs(33, 5, "HTTP/0.9 syntax request-line detected");
 165         msgProtocol_ = Http::ProtocolVersion(0,9);
 166         uri_ = uriFound;
 167         request_parse_status = Http::scOkay;
 168         buf_ = tok.remaining(); // incremental parse checkpoint
 169         return 1;
 170     } else if (tok.atEnd() || (tok.skip('\r') && tok.atEnd())) {
 171         debugs(74, 5, "Parser needs more data to find URI");
 172         return 0;
 173     }
 174
 175     // RFC 7230 HTTP/1.x URI are followed by at least one whitespace delimiter
 176     if (tok.skipOne(WspDelim)) {
 177         uri_ = uriFound;
 178         buf_ = tok.remaining(); // incremental parse checkpoint
 179         return 1;
 180
 181     } else if (tok.atEnd()) {
 182         debugs(74, 5, "Parser needs more data to find URI");
 183         return 0;
 184     }
 185
 186     // else errors...
 187
 188     if (uriFound.length() == maxUriLength) {
 189         // URL longer than acceptible.
 190         // RFC 7230 section 3.1.1 mandatory (MUST) 414 response
 191         request_parse_status = Http::scUriTooLong;
 192         debugs(33, 5, "invalid request-line. URI longer than " << maxUriLength << " bytes");
 193     } else {
 194         // invalid non-delimiter character ended the URL
 195         // RFC 7230 section 3.1.1 required (SHOULD) 400 response
 196         request_parse_status = Http::scBadRequest;
 197         debugs(33, 5, "invalid request-line. missing URI delimiter");
 198     }
 199     return -1;
 200 }
 201
 202 int
 203 Http::One::RequestParser::parseHttpVersionField(::Parser::Tokenizer &tok)
 204 {
 205     // partial match of HTTP/1 magic prefix
 206     if (tok.remaining().length() < Http1magic.length() && Http1magic.startsWith(tok.remaining())) {
 207         debugs(74, 5, "Parser needs more data to find version");
 208         return 0;
 209     }
 210
 211     if (!tok.skip(Http1magic)) {
 212         debugs(74, 5, "invalid request-line. not HTTP/1 protocol");
 213         request_parse_status = Http::scHttpVersionNotSupported;
 214         return -1;
 215     }
 216
 217     if (tok.atEnd()) {
 218         debugs(74, 5, "Parser needs more data to find version");
 219         return 0;
 220     }
 221
 222     // get the version minor DIGIT
 223     SBuf digit;
 224     if (tok.prefix(digit, CharacterSet::DIGIT, 1) && skipLineTerminator(tok)) {
 225
 226         // found version fully AND terminator
 227         msgProtocol_ = Http::ProtocolVersion(1, (*digit.rawContent() - '0'));
 228         request_parse_status = Http::scOkay;
 229         buf_ = tok.remaining(); // incremental parse checkpoint
 230         return 1;
 231
 232     } else if (tok.atEnd() || (tok.skip('\r') && tok.atEnd())) {
 233         debugs(74, 5, "Parser needs more data to find version");
 234         return 0;
 235
 236     } // else error ...
 237
 238     // non-DIGIT. invalid version number.
 239     request_parse_status = Http::scHttpVersionNotSupported;
 240     debugs(33, 5, "invalid request-line. garabge before line terminator");
 241     return -1;
 242 }
 243
 244 /**
 245  * Attempt to parse the first line of a new request message.
 246  *
 247  * Governed by:
 248  *  RFC 1945 section 5.1
 249  *  RFC 7230 section 2.6, 3.1 and 3.5
 250  *
 251  * Parsing state is stored between calls. The current implementation uses
 252  * checkpoints after each successful request-line field.
 253  * The return value tells you whether the parsing is completed or not.
 254  *
 255  * \retval -1  an error occurred. request_parse_status indicates HTTP status result.
 256  * \retval  1  successful parse. member fields contain the request-line items
 257  * \retval  0  more data is needed to complete the parse
 258  */
 259 int
 260 Http::One::RequestParser::parseRequestFirstLine()
 261 {
 262     ::Parser::Tokenizer tok(buf_);
 263
 264     debugs(74, 5, "parsing possible request: buf.length=" << buf_.length());
 265     debugs(74, DBG_DATA, buf_);
 266
 267     CharacterSet WspDelim = CharacterSet::SP; // strict parse only accepts SP
 268
 269     if (Config.onoff.relaxed_header_parser) {
 270         // RFC 7230 section 3.5
 271         // tolerant parser MAY accept any of SP, HTAB, VT (%x0B), FF (%x0C), or bare CR
 272         // as whitespace between request-line fields
 273         WspDelim += CharacterSet::HTAB
 274                   + CharacterSet("VT,FF","\x0B\x0C")
 275                   + CharacterSet::CR;
 276     }
 277
 278     // only search for method if we have not yet found one
 279     if (method_ == Http::METHOD_NONE) {
 280         const int res = parseMethodField(tok, WspDelim);
 281         if (res < 1)
 282             return res;
 283         // else keep going...
 284     }
 285
 286     // tolerant parser allows multiple whitespace characters between request-line fields
 287     if (Config.onoff.relaxed_header_parser) {
 288         const size_t garbage = tok.skipAll(WspDelim);
 289         if (garbage > 0) {
 290             firstLineGarbage_ += garbage;
 291             buf_ = tok.remaining(); // re-checkpoint after garbage
 292         }
 293     }
 294     if (tok.atEnd()) {
 295         debugs(74, 5, "Parser needs more data");
 296         return 0;
 297     }
 298
 299     // only search for request-target (URL) if we have not yet found one
 300     if (uri_.isEmpty()) {
 301         const int res = parseUriField(tok, WspDelim);
 302         if (res < 1 || msgProtocol_.protocol == AnyP::PROTO_HTTP)
 303             return res;
 304         // else keep going...
 305     }
 306
 307     // tolerant parser allows multiple whitespace characters between request-line fields
 308     if (Config.onoff.relaxed_header_parser) {
 309         const size_t garbage = tok.skipAll(WspDelim);
 310         if (garbage > 0) {
 311             firstLineGarbage_ += garbage;
 312             buf_ = tok.remaining(); // re-checkpoint after garbage
 313         }
 314     }
 315     if (tok.atEnd()) {
 316         debugs(74, 5, "Parser needs more data");
 317         return 0;
 318     }
 319
 320     // HTTP/1 version suffix (protocol magic) followed by CR*LF
 321     if (msgProtocol_.protocol == AnyP::PROTO_NONE) {
 322         return parseHttpVersionField(tok);
 323     }
 324
 325     // If we got here this method has been called too many times
 326     request_parse_status = Http::scInternalServerError;
 327     debugs(33, 5, "ERROR: Parser already processed request-line");
 328     return -1;
 329 }
 330
 331 bool
 332 Http::One::RequestParser::parse(const SBuf &aBuf)
 333 {
 334     buf_ = aBuf;
 335     debugs(74, DBG_DATA, "Parse buf={length=" << aBuf.length() << ", data='" << aBuf << "'}");
 336
 337     // stage 1: locate the request-line
 338     if (parsingStage_ == HTTP_PARSE_NONE) {
 339         skipGarbageLines();
 340
 341         // if we hit something before EOS treat it as a message
 342         if (!buf_.isEmpty())
 343             parsingStage_ = HTTP_PARSE_FIRST;
 344         else
 345             return false;
 346     }
 347
 348     // stage 2: parse the request-line
 349     if (parsingStage_ == HTTP_PARSE_FIRST) {
 350         PROF_start(HttpParserParseReqLine);
 351         const int retcode = parseRequestFirstLine();
 352
 353         // first-line (or a look-alike) found successfully.
 354         if (retcode > 0) {
 355             parsingStage_ = HTTP_PARSE_MIME;
 356         }
 357
 358         debugs(74, 5, "request-line: retval " << retcode << ": line={" << aBuf.length() << ", data='" << aBuf << "'}");
 359         debugs(74, 5, "request-line: method: " << method_);
 360         debugs(74, 5, "request-line: url: " << uri_);
 361         debugs(74, 5, "request-line: proto: " << msgProtocol_);
 362         debugs(74, 5, "Parser: bytes processed=" << (aBuf.length()-buf_.length()));
 363         PROF_stop(HttpParserParseReqLine);
 364
 365         // syntax errors already
 366         if (retcode < 0) {
 367             parsingStage_ = HTTP_PARSE_DONE;
 368             return false;
 369         }
 370     }
 371
 372     // stage 3: locate the mime header block
 373     if (parsingStage_ == HTTP_PARSE_MIME) {
 374         // HTTP/1.x request-line is valid and parsing completed.
 375         if (msgProtocol_.major == 1) {
 376             /* NOTE: HTTP/0.9 requests do not have a mime header block.
 377              *       So the rest of the code will need to deal with '0'-byte headers
 378              *       (ie, none, so don't try parsing em)
 379              */
 380             int64_t mimeHeaderBytes = 0;
 381             // XXX: c_str() reallocates. performance regression.
 382             if ((mimeHeaderBytes = headersEnd(buf_.c_str(), buf_.length())) == 0) {
 383                 if (buf_.length()+firstLineSize() >= Config.maxRequestHeaderSize) {
 384                     debugs(33, 5, "Too large request");
 385                     request_parse_status = Http::scRequestHeaderFieldsTooLarge;
 386                     parsingStage_ = HTTP_PARSE_DONE;
 387                 } else
 388                     debugs(33, 5, "Incomplete request, waiting for end of headers");
 389                 return false;
 390             }
 391             mimeHeaderBlock_ = buf_.consume(mimeHeaderBytes);
 392             debugs(74, 5, "mime header (0-" << mimeHeaderBytes << ") {" << mimeHeaderBlock_ << "}");
 393
 394         } else
 395             debugs(33, 3, "Missing HTTP/1.x identifier");
 396
 397         // NP: we do not do any further stages here yet so go straight to DONE
 398         parsingStage_ = HTTP_PARSE_DONE;
 399
 400         // Squid could handle these headers, but admin does not want to
 401         if (messageHeaderSize() >= Config.maxRequestHeaderSize) {
 402             debugs(33, 5, "Too large request");
 403             request_parse_status = Http::scRequestHeaderFieldsTooLarge;
 404             return false;
 405         }
 406     }
 407
 408     return !needsMoreData();
 409 }
 410