src/http/one/RequestParser.cc

   1 /*
   2  * Copyright (C) 1996-2014 The Squid Software Foundation and contributors
   3  *
   4  * Squid software is distributed under GPLv2+ license and includes
   5  * contributions from numerous individuals and organizations.
   6  * Please see the COPYING and CONTRIBUTORS files for details.
   7  */
   8
   9 #include "squid.h"
  10 #include "Debug.h"
  11 #include "http/one/RequestParser.h"
  12 #include "http/ProtocolVersion.h"
  13 #include "mime_header.h"
  14 #include "profiler/Profiler.h"
  15 #include "SquidConfig.h"
  16
  17 Http::One::RequestParser::RequestParser() :
  18     Parser(),
  19     request_parse_status(Http::scNone)
  20 {
  21     req.start = req.end = -1;
  22     req.m_start = req.m_end = -1;
  23     req.u_start = req.u_end = -1;
  24     req.v_start = req.v_end = -1;
  25 }
  26
  27 /**
  28  * Attempt to parse the first line of a new request message.
  29  *
  30  * Governed by RFC 7230 section 3.5
  31  *  "
  32  *    In the interest of robustness, a server that is expecting to receive
  33  *    and parse a request-line SHOULD ignore at least one empty line (CRLF)
  34  *    received prior to the request-line.
  35  *  "
  36  *
  37  * Parsing state is stored between calls to avoid repeating buffer scans.
  38  * If garbage is found the parsing offset is incremented.
  39  */
  40 void
  41 Http::One::RequestParser::skipGarbageLines()
  42 {
  43     if (Config.onoff.relaxed_header_parser) {
  44         if (Config.onoff.relaxed_header_parser < 0 && (buf_[0] == '\r' || buf_[0] == '\n'))
  45             debugs(74, DBG_IMPORTANT, "WARNING: Invalid HTTP Request: " <<
  46                    "CRLF bytes received ahead of request-line. " <<
  47                    "Ignored due to relaxed_header_parser.");
  48         // Be tolerant of prefix empty lines
  49         // ie any series of either \n or \r\n with no other characters and no repeated \r
  50         while (!buf_.isEmpty() && (buf_[0] == '\n' || (buf_[0] == '\r' && buf_[1] == '\n'))) {
  51             buf_.consume(1);
  52         }
  53     }
  54
  55     /* XXX: this is a Squid-specific tolerance
  56      * it appears never to have been relevant outside out unit-tests
  57      * because the ConnStateData parser loop starts with consumeWhitespace()
  58      * which absorbs any SP HTAB VTAB CR LF characters.
  59      * But unit-tests called the HttpParser method directly without that pruning.
  60      */
  61 #if USE_HTTP_VIOLATIONS
  62     if (Config.onoff.relaxed_header_parser) {
  63         if (Config.onoff.relaxed_header_parser < 0 && buf_[0] == ' ')
  64             debugs(74, DBG_IMPORTANT, "WARNING: Invalid HTTP Request: " <<
  65                    "Whitespace bytes received ahead of method. " <<
  66                    "Ignored due to relaxed_header_parser.");
  67         // Be tolerant of prefix spaces (other bytes are valid method values)
  68         while (!buf_.isEmpty() && buf_[0] == ' ') {
  69             buf_.consume(1);
  70         }
  71     }
  72 #endif
  73 }
  74
  75 /**
  76  * Attempt to parse the first line of a new request message.
  77  *
  78  * Governed by:
  79  *  RFC 1945 section 5.1
  80  *  RFC 7230 section 3.1 and 3.5
  81  *
  82  * Parsing state is stored between calls. However the current implementation
  83  * begins parsing from scratch on every call.
  84  * The return value tells you whether the parsing state fields are valid or not.
  85  *
  86  * \retval -1  an error occurred. request_parse_status indicates HTTP status result.
  87  * \retval  1  successful parse. member fields contain the request-line items
  88  * \retval  0  more data is needed to complete the parse
  89  */
  90 int
  91 Http::One::RequestParser::parseRequestFirstLine()
  92 {
  93     int second_word = -1; // track the suspected URI start
  94     int first_whitespace = -1, last_whitespace = -1; // track the first and last SP byte
  95     int line_end = -1; // tracks the last byte BEFORE terminal \r\n or \n sequence
  96
  97     debugs(74, 5, "parsing possible request: buf.length=" << buf_.length());
  98     debugs(74, DBG_DATA, buf_);
  99
 100     // Single-pass parse: (provided we have the whole line anyways)
 101
 102     req.start = 0;
 103     req.end = -1;
 104     for (SBuf::size_type i = 0; i < buf_.length(); ++i) {
 105         // track first and last whitespace (SP only)
 106         if (buf_[i] == ' ') {
 107             last_whitespace = i;
 108             if (first_whitespace < req.start)
 109                 first_whitespace = i;
 110         }
 111
 112         // track next non-SP/non-HT byte after first_whitespace
 113         if (second_word < first_whitespace && buf_[i] != ' ' && buf_[i] != '\t') {
 114             second_word = i;
 115         }
 116
 117         // locate line terminator
 118         if (buf_[i] == '\n') {
 119             req.end = i;
 120             line_end = i - 1;
 121             break;
 122         }
 123         if (i < buf_.length() - 1 && buf_[i] == '\r') {
 124             if (Config.onoff.relaxed_header_parser) {
 125                 if (Config.onoff.relaxed_header_parser < 0 && buf_[i + 1] == '\r')
 126                     debugs(74, DBG_IMPORTANT, "WARNING: Invalid HTTP Request: " <<
 127                            "Series of carriage-return bytes received prior to line terminator. " <<
 128                            "Ignored due to relaxed_header_parser.");
 129
 130                 // Be tolerant of invalid multiple \r prior to terminal \n
 131                 if (buf_[i + 1] == '\n' || buf_[i + 1] == '\r')
 132                     line_end = i - 1;
 133                 while (i < buf_.length() - 1 && buf_[i + 1] == '\r')
 134                     ++i;
 135
 136                 if (buf_[i + 1] == '\n') {
 137                     req.end = i + 1;
 138                     break;
 139                 }
 140             } else {
 141                 if (buf_[i + 1] == '\n') {
 142                     req.end = i + 1;
 143                     line_end = i - 1;
 144                     break;
 145                 }
 146             }
 147
 148             // RFC 7230 section 3.1.1 does not prohibit embeded CR like RFC 2616 used to.
 149             // However it does explicitly state an exact syntax which omits un-encoded CR
 150             // and defines 400 (Bad Request) as the required action when
 151             // handed an invalid request-line.
 152             request_parse_status = Http::scBadRequest;
 153             return -1;
 154         }
 155     }
 156
 157     if (req.end == -1) {
 158         // DoS protection against long first-line
 159         if ((size_t)buf_.length() >= Config.maxRequestHeaderSize) {
 160             debugs(33, 5, "Too large request-line");
 161             // RFC 7230 section 3.1.1 mandatory 414 response if URL longer than acceptible.
 162             request_parse_status = Http::scUriTooLong;
 163             return -1;
 164         }
 165
 166         debugs(74, 5, "Parser: retval 0: from " << req.start <<
 167                "->" << req.end << ": needs more data to complete first line.");
 168         return 0;
 169     }
 170
 171     // NP: we have now seen EOL, more-data (0) cannot occur.
 172     //     From here on any failure is -1, success is 1
 173
 174     // Input Validation:
 175
 176     // DoS protection against long first-line
 177     if ((size_t)(req.end-req.start) >= Config.maxRequestHeaderSize) {
 178         debugs(33, 5, "Too large request-line");
 179         request_parse_status = Http::scUriTooLong;
 180         return -1;
 181     }
 182
 183     // Process what we now know about the line structure into field offsets
 184     // generating HTTP status for any aborts as we go.
 185
 186     // First non-whitespace = beginning of method
 187     if (req.start > line_end) {
 188         request_parse_status = Http::scBadRequest;
 189         return -1;
 190     }
 191     req.m_start = req.start;
 192
 193     // First whitespace = end of method
 194     if (first_whitespace > line_end || first_whitespace < req.start) {
 195         request_parse_status = Http::scBadRequest; // no method
 196         return -1;
 197     }
 198     req.m_end = first_whitespace - 1;
 199     if (req.m_end < req.m_start) {
 200         request_parse_status = Http::scBadRequest; // missing URI?
 201         return -1;
 202     }
 203
 204     /* Set method_ */
 205     const SBuf tmp = buf_.substr(req.m_start, req.m_end - req.m_start + 1);
 206     method_ = HttpRequestMethod(tmp);
 207
 208     // First non-whitespace after first SP = beginning of URL+Version
 209     if (second_word > line_end || second_word < req.start) {
 210         request_parse_status = Http::scBadRequest; // missing URI
 211         return -1;
 212     }
 213     req.u_start = second_word;
 214
 215     // RFC 1945: SP and version following URI are optional, marking version 0.9
 216     // we identify this by the last whitespace being earlier than URI start
 217     if (last_whitespace < second_word && last_whitespace >= req.start) {
 218         msgProtocol_ = Http::ProtocolVersion(0,9);
 219         req.u_end = line_end;
 220         uri_ = buf_.substr(req.u_start, req.u_end - req.u_start + 1);
 221         request_parse_status = Http::scOkay; // HTTP/0.9
 222         return 1;
 223     } else {
 224         // otherwise last whitespace is somewhere after end of URI.
 225         req.u_end = last_whitespace;
 226         // crop any trailing whitespace in the area we think of as URI
 227         for (; req.u_end >= req.u_start && xisspace(buf_[req.u_end]); --req.u_end);
 228     }
 229     if (req.u_end < req.u_start) {
 230         request_parse_status = Http::scBadRequest; // missing URI
 231         return -1;
 232     }
 233     uri_ = buf_.substr(req.u_start, req.u_end - req.u_start + 1);
 234
 235     // Last whitespace SP = before start of protocol/version
 236     if (last_whitespace >= line_end) {
 237         request_parse_status = Http::scBadRequest; // missing version
 238         return -1;
 239     }
 240     req.v_start = last_whitespace + 1;
 241     req.v_end = line_end;
 242
 243     /* RFC 7230 section 2.6 : handle unsupported HTTP major versions cleanly. */
 244     if ((req.v_end - req.v_start +1) < (int)Http1magic.length() || !buf_.substr(req.v_start, SBuf::npos).startsWith(Http1magic)) {
 245         // non-HTTP/1 protocols not supported / implemented.
 246         request_parse_status = Http::scHttpVersionNotSupported;
 247         return -1;
 248     }
 249     // NP: magic octets include the protocol name and major version DIGIT.
 250     msgProtocol_.protocol = AnyP::PROTO_HTTP;
 251     msgProtocol_.major = 1;
 252
 253     int i = req.v_start + Http1magic.length() -1;
 254
 255     // catch missing minor part
 256     if (++i > line_end) {
 257         request_parse_status = Http::scHttpVersionNotSupported;
 258         return -1;
 259     }
 260     /* next should be one or more digits */
 261     if (!isdigit(buf_[i])) {
 262         request_parse_status = Http::scHttpVersionNotSupported;
 263         return -1;
 264     }
 265     int min = 0;
 266     for (; i <= line_end && (isdigit(buf_[i])) && min < 65536; ++i) {
 267         min = min * 10;
 268         min = min + (buf_[i]) - '0';
 269     }
 270     // catch too-big values or trailing garbage
 271     if (min >= 65536 || i < line_end) {
 272         request_parse_status = Http::scHttpVersionNotSupported;
 273         return -1;
 274     }
 275     msgProtocol_.minor = min;
 276
 277     /*
 278      * Rightio - we have all the schtuff. Return true; we've got enough.
 279      */
 280     request_parse_status = Http::scOkay;
 281     return 1;
 282 }
 283
 284 bool
 285 Http::One::RequestParser::parse(const SBuf &aBuf)
 286 {
 287     buf_ = aBuf;
 288     debugs(74, DBG_DATA, "Parse buf={length=" << aBuf.length() << ", data='" << aBuf << "'}");
 289
 290     // stage 1: locate the request-line
 291     if (parsingStage_ == HTTP_PARSE_NONE) {
 292         skipGarbageLines();
 293
 294         // if we hit something before EOS treat it as a message
 295         if (!buf_.isEmpty())
 296             parsingStage_ = HTTP_PARSE_FIRST;
 297         else
 298             return false;
 299     }
 300
 301     // stage 2: parse the request-line
 302     if (parsingStage_ == HTTP_PARSE_FIRST) {
 303         PROF_start(HttpParserParseReqLine);
 304         const int retcode = parseRequestFirstLine();
 305
 306         // first-line (or a look-alike) found successfully.
 307         if (retcode > 0) {
 308             buf_.consume(firstLineSize()); // first line bytes including CRLF terminator are now done.
 309             parsingStage_ = HTTP_PARSE_MIME;
 310         }
 311
 312         debugs(74, 5, "request-line: retval " << retcode << ": from " << req.start << "->" << req.end <<
 313                " line={" << aBuf.length() << ", data='" << aBuf << "'}");
 314         debugs(74, 5, "request-line: method " << req.m_start << "->" << req.m_end << " (" << method_ << ")");
 315         debugs(74, 5, "request-line: url " << req.u_start << "->" << req.u_end << " (" << uri_ << ")");
 316         debugs(74, 5, "request-line: proto " << req.v_start << "->" << req.v_end << " (" << msgProtocol_ << ")");
 317         debugs(74, 5, "Parser: bytes processed=" << (aBuf.length()-buf_.length()));
 318         PROF_stop(HttpParserParseReqLine);
 319
 320         // syntax errors already
 321         if (retcode < 0) {
 322             parsingStage_ = HTTP_PARSE_DONE;
 323             return false;
 324         }
 325     }
 326
 327     // stage 3: locate the mime header block
 328     if (parsingStage_ == HTTP_PARSE_MIME) {
 329         // HTTP/1.x request-line is valid and parsing completed.
 330         if (msgProtocol_.major == 1) {
 331             /* NOTE: HTTP/0.9 requests do not have a mime header block.
 332              *       So the rest of the code will need to deal with '0'-byte headers
 333              *       (ie, none, so don't try parsing em)
 334              */
 335             int64_t mimeHeaderBytes = 0;
 336             // XXX: c_str() reallocates. performance regression.
 337             if ((mimeHeaderBytes = headersEnd(buf_.c_str(), buf_.length())) == 0) {
 338                 if (buf_.length()+firstLineSize() >= Config.maxRequestHeaderSize) {
 339                     debugs(33, 5, "Too large request");
 340                     request_parse_status = Http::scRequestHeaderFieldsTooLarge;
 341                     parsingStage_ = HTTP_PARSE_DONE;
 342                 } else
 343                     debugs(33, 5, "Incomplete request, waiting for end of headers");
 344                 return false;
 345             }
 346             mimeHeaderBlock_ = buf_.consume(mimeHeaderBytes);
 347             debugs(74, 5, "mime header (0-" << mimeHeaderBytes << ") {" << mimeHeaderBlock_ << "}");
 348
 349         } else
 350             debugs(33, 3, "Missing HTTP/1.x identifier");
 351
 352         // NP: we do not do any further stages here yet so go straight to DONE
 353         parsingStage_ = HTTP_PARSE_DONE;
 354
 355         // Squid could handle these headers, but admin does not want to
 356         if (messageHeaderSize() >= Config.maxRequestHeaderSize) {
 357             debugs(33, 5, "Too large request");
 358             request_parse_status = Http::scRequestHeaderFieldsTooLarge;
 359             return false;
 360         }
 361     }
 362
 363     return !needsMoreData();
 364 }
 365