src/http/one/RequestParser.cc

   1 /*
   2  * Copyright (C) 1996-2015 The Squid Software Foundation and contributors
   3  *
   4  * Squid software is distributed under GPLv2+ license and includes
   5  * contributions from numerous individuals and organizations.
   6  * Please see the COPYING and CONTRIBUTORS files for details.
   7  */
   8
   9 #include "squid.h"
  10 #include "Debug.h"
  11 #include "http/one/RequestParser.h"
  12 #include "http/ProtocolVersion.h"
  13 #include "mime_header.h"
  14 #include "profiler/Profiler.h"
  15 #include "SquidConfig.h"
  16
  17 Http::One::RequestParser::RequestParser() :
  18     Parser(),
  19     request_parse_status(Http::scNone)
  20 {
  21     req.start = req.end = -1;
  22     req.m_start = req.m_end = -1;
  23     req.u_start = req.u_end = -1;
  24     req.v_start = req.v_end = -1;
  25 }
  26
  27 /**
  28  * Attempt to parse the first line of a new request message.
  29  *
  30  * Governed by RFC 7230 section 3.5
  31  *  "
  32  *    In the interest of robustness, a server that is expecting to receive
  33  *    and parse a request-line SHOULD ignore at least one empty line (CRLF)
  34  *    received prior to the request-line.
  35  *  "
  36  *
  37  * Parsing state is stored between calls to avoid repeating buffer scans.
  38  * If garbage is found the parsing offset is incremented.
  39  */
  40 void
  41 Http::One::RequestParser::skipGarbageLines()
  42 {
  43     if (Config.onoff.relaxed_header_parser) {
  44         if (Config.onoff.relaxed_header_parser < 0 && (buf_[0] == '\r' || buf_[0] == '\n'))
  45             debugs(74, DBG_IMPORTANT, "WARNING: Invalid HTTP Request: " <<
  46                    "CRLF bytes received ahead of request-line. " <<
  47                    "Ignored due to relaxed_header_parser.");
  48         // Be tolerant of prefix empty lines
  49         // ie any series of either \n or \r\n with no other characters and no repeated \r
  50         while (!buf_.isEmpty() && (buf_[0] == '\n' || (buf_[0] == '\r' && buf_[1] == '\n'))) {
  51             buf_.consume(1);
  52         }
  53     }
  54
  55     /* XXX: this is a Squid-specific tolerance
  56      * it appears never to have been relevant outside out unit-tests
  57      * because the ConnStateData parser loop starts with consumeWhitespace()
  58      * which absorbs any SP HTAB VTAB CR LF characters.
  59      * But unit-tests called the HttpParser method directly without that pruning.
  60      */
  61 #if USE_HTTP_VIOLATIONS
  62     if (Config.onoff.relaxed_header_parser) {
  63         if (Config.onoff.relaxed_header_parser < 0 && buf_[0] == ' ')
  64             debugs(74, DBG_IMPORTANT, "WARNING: Invalid HTTP Request: " <<
  65                    "Whitespace bytes received ahead of method. " <<
  66                    "Ignored due to relaxed_header_parser.");
  67         // Be tolerant of prefix spaces (other bytes are valid method values)
  68         while (!buf_.isEmpty() && buf_[0] == ' ') {
  69             buf_.consume(1);
  70         }
  71     }
  72 #endif
  73 }
  74
  75 /**
  76  * Attempt to parse the first line of a new request message.
  77  *
  78  * Governed by:
  79  *  RFC 1945 section 5.1
  80  *  RFC 7230 section 3.1 and 3.5
  81  *
  82  * Parsing state is stored between calls. However the current implementation
  83  * begins parsing from scratch on every call.
  84  * The return value tells you whether the parsing state fields are valid or not.
  85  *
  86  * \retval -1  an error occurred. request_parse_status indicates HTTP status result.
  87  * \retval  1  successful parse. member fields contain the request-line items
  88  * \retval  0  more data is needed to complete the parse
  89  */
  90 int
  91 Http::One::RequestParser::parseRequestFirstLine()
  92 {
  93     int second_word = -1; // track the suspected URI start
  94     int first_whitespace = -1, last_whitespace = -1; // track the first and last SP byte
  95     int line_end = -1; // tracks the last byte BEFORE terminal \r\n or \n sequence
  96
  97     debugs(74, 5, "parsing possible request: buf.length=" << buf_.length());
  98     debugs(74, DBG_DATA, buf_);
  99
 100     // Single-pass parse: (provided we have the whole line anyways)
 101
 102     req.start = 0;
 103     req.end = -1;
 104     for (SBuf::size_type i = 0; i < buf_.length(); ++i) {
 105         // track first and last whitespace (SP only)
 106         if (buf_[i] == ' ') {
 107             last_whitespace = i;
 108             if (first_whitespace < req.start)
 109                 first_whitespace = i;
 110         }
 111
 112         // track next non-SP/non-HT byte after first_whitespace
 113         if (second_word < first_whitespace && buf_[i] != ' ' && buf_[i] != '\t') {
 114             second_word = i;
 115         }
 116
 117         // locate line terminator
 118         if (buf_[i] == '\n') {
 119             req.end = i;
 120             line_end = i - 1;
 121             break;
 122         }
 123         if (i < buf_.length() - 1 && buf_[i] == '\r') {
 124             if (Config.onoff.relaxed_header_parser) {
 125                 if (Config.onoff.relaxed_header_parser < 0 && buf_[i + 1] == '\r')
 126                     debugs(74, DBG_IMPORTANT, "WARNING: Invalid HTTP Request: " <<
 127                            "Series of carriage-return bytes received prior to line terminator. " <<
 128                            "Ignored due to relaxed_header_parser.");
 129
 130                 // Be tolerant of invalid multiple \r prior to terminal \n
 131                 if (buf_[i + 1] == '\n' || buf_[i + 1] == '\r')
 132                     line_end = i - 1;
 133                 while (i < buf_.length() - 1 && buf_[i + 1] == '\r')
 134                     ++i;
 135
 136                 if (buf_[i + 1] == '\n') {
 137                     req.end = i + 1;
 138                     break;
 139                 }
 140             } else {
 141                 if (buf_[i + 1] == '\n') {
 142                     req.end = i + 1;
 143                     line_end = i - 1;
 144                     break;
 145                 }
 146             }
 147
 148             // RFC 7230 section 3.1.1 does not prohibit embeded CR like RFC 2616 used to.
 149             // However it does explicitly state an exact syntax which omits un-encoded CR
 150             // and defines 400 (Bad Request) as the required action when
 151             // handed an invalid request-line.
 152             request_parse_status = Http::scBadRequest;
 153             return -1;
 154         }
 155
 156         // We are expecting printable ascii characters for method/first word
 157         if (first_whitespace < 0 && (!xisascii(buf_[i]) || !xisprint(buf_[i]))) {
 158             request_parse_status = Http::scBadRequest;
 159             return -1;
 160         }
 161     }
 162
 163     if (req.end == -1) {
 164         // DoS protection against long first-line
 165         if ((size_t)buf_.length() >= Config.maxRequestHeaderSize) {
 166             debugs(33, 5, "Too large request-line");
 167             // RFC 7230 section 3.1.1 mandatory 414 response if URL longer than acceptible.
 168             request_parse_status = Http::scUriTooLong;
 169             return -1;
 170         }
 171
 172         debugs(74, 5, "Parser: retval 0: from " << req.start <<
 173                "->" << req.end << ": needs more data to complete first line.");
 174         return 0;
 175     }
 176
 177     // NP: we have now seen EOL, more-data (0) cannot occur.
 178     //     From here on any failure is -1, success is 1
 179
 180     // Input Validation:
 181
 182     // DoS protection against long first-line
 183     if ((size_t)(req.end-req.start) >= Config.maxRequestHeaderSize) {
 184         debugs(33, 5, "Too large request-line");
 185         request_parse_status = Http::scUriTooLong;
 186         return -1;
 187     }
 188
 189     // Process what we now know about the line structure into field offsets
 190     // generating HTTP status for any aborts as we go.
 191
 192     // First non-whitespace = beginning of method
 193     if (req.start > line_end) {
 194         request_parse_status = Http::scBadRequest;
 195         return -1;
 196     }
 197     req.m_start = req.start;
 198
 199     // First whitespace = end of method
 200     if (first_whitespace > line_end || first_whitespace < req.start) {
 201         request_parse_status = Http::scBadRequest; // no method
 202         return -1;
 203     }
 204     req.m_end = first_whitespace - 1;
 205     if (req.m_end < req.m_start) {
 206         request_parse_status = Http::scBadRequest; // missing URI?
 207         return -1;
 208     }
 209
 210     /* Set method_ */
 211     const SBuf tmp = buf_.substr(req.m_start, req.m_end - req.m_start + 1);
 212     method_ = HttpRequestMethod(tmp);
 213
 214     // First non-whitespace after first SP = beginning of URL+Version
 215     if (second_word > line_end || second_word < req.start) {
 216         request_parse_status = Http::scBadRequest; // missing URI
 217         return -1;
 218     }
 219     req.u_start = second_word;
 220
 221     // RFC 1945: SP and version following URI are optional, marking version 0.9
 222     // we identify this by the last whitespace being earlier than URI start
 223     if (last_whitespace < second_word && last_whitespace >= req.start) {
 224         msgProtocol_ = Http::ProtocolVersion(0,9);
 225         req.u_end = line_end;
 226         uri_ = buf_.substr(req.u_start, req.u_end - req.u_start + 1);
 227         request_parse_status = Http::scOkay; // HTTP/0.9
 228         return 1;
 229     } else {
 230         // otherwise last whitespace is somewhere after end of URI.
 231         req.u_end = last_whitespace;
 232         // crop any trailing whitespace in the area we think of as URI
 233         for (; req.u_end >= req.u_start && xisspace(buf_[req.u_end]); --req.u_end);
 234     }
 235     if (req.u_end < req.u_start) {
 236         request_parse_status = Http::scBadRequest; // missing URI
 237         return -1;
 238     }
 239     uri_ = buf_.substr(req.u_start, req.u_end - req.u_start + 1);
 240
 241     // Last whitespace SP = before start of protocol/version
 242     if (last_whitespace >= line_end) {
 243         request_parse_status = Http::scBadRequest; // missing version
 244         return -1;
 245     }
 246     req.v_start = last_whitespace + 1;
 247     req.v_end = line_end;
 248
 249     /* RFC 7230 section 2.6 : handle unsupported HTTP major versions cleanly. */
 250     if ((req.v_end - req.v_start +1) < (int)Http1magic.length() || !buf_.substr(req.v_start, SBuf::npos).startsWith(Http1magic)) {
 251         // non-HTTP/1 protocols not supported / implemented.
 252         request_parse_status = Http::scHttpVersionNotSupported;
 253         return -1;
 254     }
 255     // NP: magic octets include the protocol name and major version DIGIT.
 256     msgProtocol_.protocol = AnyP::PROTO_HTTP;
 257     msgProtocol_.major = 1;
 258
 259     int i = req.v_start + Http1magic.length() -1;
 260
 261     // catch missing minor part
 262     if (++i > line_end) {
 263         request_parse_status = Http::scHttpVersionNotSupported;
 264         return -1;
 265     }
 266     /* next should be one or more digits */
 267     if (!isdigit(buf_[i])) {
 268         request_parse_status = Http::scHttpVersionNotSupported;
 269         return -1;
 270     }
 271     int min = 0;
 272     for (; i <= line_end && (isdigit(buf_[i])) && min < 65536; ++i) {
 273         min = min * 10;
 274         min = min + (buf_[i]) - '0';
 275     }
 276     // catch too-big values or trailing garbage
 277     if (min >= 65536 || i < line_end) {
 278         request_parse_status = Http::scHttpVersionNotSupported;
 279         return -1;
 280     }
 281     msgProtocol_.minor = min;
 282
 283     /*
 284      * Rightio - we have all the schtuff. Return true; we've got enough.
 285      */
 286     request_parse_status = Http::scOkay;
 287     return 1;
 288 }
 289
 290 bool
 291 Http::One::RequestParser::parse(const SBuf &aBuf)
 292 {
 293     buf_ = aBuf;
 294     debugs(74, DBG_DATA, "Parse buf={length=" << aBuf.length() << ", data='" << aBuf << "'}");
 295
 296     // stage 1: locate the request-line
 297     if (parsingStage_ == HTTP_PARSE_NONE) {
 298         skipGarbageLines();
 299
 300         // if we hit something before EOS treat it as a message
 301         if (!buf_.isEmpty())
 302             parsingStage_ = HTTP_PARSE_FIRST;
 303         else
 304             return false;
 305     }
 306
 307     // stage 2: parse the request-line
 308     if (parsingStage_ == HTTP_PARSE_FIRST) {
 309         PROF_start(HttpParserParseReqLine);
 310         const int retcode = parseRequestFirstLine();
 311
 312         // first-line (or a look-alike) found successfully.
 313         if (retcode > 0) {
 314             buf_.consume(firstLineSize()); // first line bytes including CRLF terminator are now done.
 315             parsingStage_ = HTTP_PARSE_MIME;
 316         }
 317
 318         debugs(74, 5, "request-line: retval " << retcode << ": from " << req.start << "->" << req.end <<
 319                " line={" << aBuf.length() << ", data='" << aBuf << "'}");
 320         debugs(74, 5, "request-line: method " << req.m_start << "->" << req.m_end << " (" << method_ << ")");
 321         debugs(74, 5, "request-line: url " << req.u_start << "->" << req.u_end << " (" << uri_ << ")");
 322         debugs(74, 5, "request-line: proto " << req.v_start << "->" << req.v_end << " (" << msgProtocol_ << ")");
 323         debugs(74, 5, "Parser: bytes processed=" << (aBuf.length()-buf_.length()));
 324         PROF_stop(HttpParserParseReqLine);
 325
 326         // syntax errors already
 327         if (retcode < 0) {
 328             parsingStage_ = HTTP_PARSE_DONE;
 329             return false;
 330         }
 331     }
 332
 333     // stage 3: locate the mime header block
 334     if (parsingStage_ == HTTP_PARSE_MIME) {
 335         // HTTP/1.x request-line is valid and parsing completed.
 336         if (msgProtocol_.major == 1) {
 337             /* NOTE: HTTP/0.9 requests do not have a mime header block.
 338              *       So the rest of the code will need to deal with '0'-byte headers
 339              *       (ie, none, so don't try parsing em)
 340              */
 341             int64_t mimeHeaderBytes = 0;
 342             // XXX: c_str() reallocates. performance regression.
 343             if ((mimeHeaderBytes = headersEnd(buf_.c_str(), buf_.length())) == 0) {
 344                 if (buf_.length()+firstLineSize() >= Config.maxRequestHeaderSize) {
 345                     debugs(33, 5, "Too large request");
 346                     request_parse_status = Http::scRequestHeaderFieldsTooLarge;
 347                     parsingStage_ = HTTP_PARSE_DONE;
 348                 } else
 349                     debugs(33, 5, "Incomplete request, waiting for end of headers");
 350                 return false;
 351             }
 352             mimeHeaderBlock_ = buf_.consume(mimeHeaderBytes);
 353             debugs(74, 5, "mime header (0-" << mimeHeaderBytes << ") {" << mimeHeaderBlock_ << "}");
 354
 355         } else
 356             debugs(33, 3, "Missing HTTP/1.x identifier");
 357
 358         // NP: we do not do any further stages here yet so go straight to DONE
 359         parsingStage_ = HTTP_PARSE_DONE;
 360
 361         // Squid could handle these headers, but admin does not want to
 362         if (messageHeaderSize() >= Config.maxRequestHeaderSize) {
 363             debugs(33, 5, "Too large request");
 364             request_parse_status = Http::scRequestHeaderFieldsTooLarge;
 365             return false;
 366         }
 367     }
 368
 369     return !needsMoreData();
 370 }
 371