src/http/one/RequestParser.cc

   1 #include "squid.h"
   2 #include "Debug.h"
   3 #include "http/one/RequestParser.h"
   4 #include "http/ProtocolVersion.h"
   5 #include "mime_header.h"
   6 #include "profiler/Profiler.h"
   7 #include "SquidConfig.h"
   8
   9 void
  10 Http::One::RequestParser::clear()
  11 {
  12     Http1::Parser::clear();
  13
  14     request_parse_status = Http::scNone;
  15     req.start = req.end = -1;
  16     req.m_start = req.m_end = -1;
  17     req.u_start = req.u_end = -1;
  18     req.v_start = req.v_end = -1;
  19     method_ = HttpRequestMethod();
  20 }
  21
  22 /**
  23  * Attempt to parse the first line of a new request message.
  24  *
  25  * Governed by RFC 2616 section 4.1
  26  *  "
  27  *    In the interest of robustness, servers SHOULD ignore any empty
  28  *    line(s) received where a Request-Line is expected. In other words, if
  29  *    the server is reading the protocol stream at the beginning of a
  30  *    message and receives a CRLF first, it should ignore the CRLF.
  31  *
  32  *    ... To restate what is explicitly forbidden by the
  33  *    BNF, an HTTP/1.1 client MUST NOT preface or follow a request with an
  34  *    extra CRLF.
  35  *  "
  36  *
  37  * Parsing state is stored between calls to avoid repeating buffer scans.
  38  * If garbage is found the parsing offset is incremented.
  39  */
  40 void
  41 Http::One::RequestParser::skipGarbageLines()
  42 {
  43 #if WHEN_RFC_COMPLIANT // CRLF or bare-LF is what RFC 2616 tolerant parsers do ...
  44     if (Config.onoff.relaxed_header_parser) {
  45         if (Config.onoff.relaxed_header_parser < 0 && (buf_[0] == '\r' || buf_[0] == '\n'))
  46             debugs(74, DBG_IMPORTANT, "WARNING: Invalid HTTP Request: " <<
  47                    "CRLF bytes received ahead of request-line. " <<
  48                    "Ignored due to relaxed_header_parser.");
  49         // Be tolerant of prefix empty lines
  50         // ie any series of either \n or \r\n with no other characters and no repeated \r
  51         while (!buf_.isEmpty() && (buf_[0] == '\n' || (buf_[0] == '\r' && buf_[1] == '\n'))) {
  52             buf_.consume(1);
  53         }
  54     }
  55 #endif
  56
  57     /* XXX: this is a Squid-specific tolerance
  58      * it appears never to have been relevant outside out unit-tests
  59      * because the ConnStateData parser loop starts with consumeWhitespace()
  60      * which absorbs any SP HTAB VTAB CR LF characters.
  61      * But unit-tests called the HttpParser method directly without that pruning.
  62      */
  63 #if USE_HTTP_VIOLATIONS
  64     if (Config.onoff.relaxed_header_parser) {
  65         if (Config.onoff.relaxed_header_parser < 0 && buf_[0] == ' ')
  66             debugs(74, DBG_IMPORTANT, "WARNING: Invalid HTTP Request: " <<
  67                    "Whitespace bytes received ahead of method. " <<
  68                    "Ignored due to relaxed_header_parser.");
  69         // Be tolerant of prefix spaces (other bytes are valid method values)
  70         while (!buf_.isEmpty() && buf_[0] == ' ') {
  71             buf_.consume(1);
  72         }
  73     }
  74 #endif
  75 }
  76
  77 /**
  78  * Attempt to parse the first line of a new request message.
  79  *
  80  * Governed by:
  81  *  RFC 1945 section 5.1
  82  *  RFC 2616 section 5.1
  83  *  RFC 7230
  84  *
  85  * Parsing state is stored between calls. However the current implementation
  86  * begins parsing from scratch on every call.
  87  * The return value tells you whether the parsing state fields are valid or not.
  88  *
  89  * \retval -1  an error occurred. request_parse_status indicates HTTP status result.
  90  * \retval  1  successful parse. member fields contain the request-line items
  91  * \retval  0  more data is needed to complete the parse
  92  */
  93 int
  94 Http::One::RequestParser::parseRequestFirstLine()
  95 {
  96     int second_word = -1; // track the suspected URI start
  97     int first_whitespace = -1, last_whitespace = -1; // track the first and last SP byte
  98     int line_end = -1; // tracks the last byte BEFORE terminal \r\n or \n sequence
  99
 100     debugs(74, 5, "parsing possible request: buf.length=" << buf_.length());
 101     debugs(74, DBG_DATA, buf_);
 102
 103     // Single-pass parse: (provided we have the whole line anyways)
 104
 105     req.start = 0;
 106     req.end = -1;
 107     for (SBuf::size_type i = 0; i < buf_.length(); ++i) {
 108         // track first and last whitespace (SP only)
 109         if (buf_[i] == ' ') {
 110             last_whitespace = i;
 111             if (first_whitespace < req.start)
 112                 first_whitespace = i;
 113         }
 114
 115         // track next non-SP/non-HT byte after first_whitespace
 116         if (second_word < first_whitespace && buf_[i] != ' ' && buf_[i] != '\t') {
 117             second_word = i;
 118         }
 119
 120         // locate line terminator
 121         if (buf_[i] == '\n') {
 122             req.end = i;
 123             line_end = i - 1;
 124             break;
 125         }
 126         if (i < buf_.length() - 1 && buf_[i] == '\r') {
 127             if (Config.onoff.relaxed_header_parser) {
 128                 if (Config.onoff.relaxed_header_parser < 0 && buf_[i + 1] == '\r')
 129                     debugs(74, DBG_IMPORTANT, "WARNING: Invalid HTTP Request: " <<
 130                            "Series of carriage-return bytes received prior to line terminator. " <<
 131                            "Ignored due to relaxed_header_parser.");
 132
 133                 // Be tolerant of invalid multiple \r prior to terminal \n
 134                 if (buf_[i + 1] == '\n' || buf_[i + 1] == '\r')
 135                     line_end = i - 1;
 136                 while (i < buf_.length() - 1 && buf_[i + 1] == '\r')
 137                     ++i;
 138
 139                 if (buf_[i + 1] == '\n') {
 140                     req.end = i + 1;
 141                     break;
 142                 }
 143             } else {
 144                 if (buf_[i + 1] == '\n') {
 145                     req.end = i + 1;
 146                     line_end = i - 1;
 147                     break;
 148                 }
 149             }
 150
 151             // RFC 2616 section 5.1
 152             // "No CR or LF is allowed except in the final CRLF sequence"
 153             request_parse_status = Http::scBadRequest;
 154             return -1;
 155         }
 156     }
 157
 158     if (req.end == -1) {
 159         // DoS protection against long first-line
 160         if ((size_t)buf_.length() >= Config.maxRequestHeaderSize) {
 161             debugs(33, 5, "Too large request-line");
 162             // XXX: return URL-too-log status code if second_whitespace is not yet found.
 163             request_parse_status = Http::scHeaderTooLarge;
 164             return -1;
 165         }
 166
 167         debugs(74, 5, "Parser: retval 0: from " << req.start <<
 168                "->" << req.end << ": needs more data to complete first line.");
 169         return 0;
 170     }
 171
 172     // NP: we have now seen EOL, more-data (0) cannot occur.
 173     //     From here on any failure is -1, success is 1
 174
 175     // Input Validation:
 176
 177     // DoS protection against long first-line
 178     if ((size_t)(req.end-req.start) >= Config.maxRequestHeaderSize) {
 179         debugs(33, 5, "Too large request-line");
 180         request_parse_status = Http::scHeaderTooLarge;
 181         return -1;
 182     }
 183
 184     // Process what we now know about the line structure into field offsets
 185     // generating HTTP status for any aborts as we go.
 186
 187     // First non-whitespace = beginning of method
 188     if (req.start > line_end) {
 189         request_parse_status = Http::scBadRequest;
 190         return -1;
 191     }
 192     req.m_start = req.start;
 193
 194     // First whitespace = end of method
 195     if (first_whitespace > line_end || first_whitespace < req.start) {
 196         request_parse_status = Http::scBadRequest; // no method
 197         return -1;
 198     }
 199     req.m_end = first_whitespace - 1;
 200     if (req.m_end < req.m_start) {
 201         request_parse_status = Http::scBadRequest; // missing URI?
 202         return -1;
 203     }
 204
 205     /* Set method_ */
 206     const SBuf tmp = buf_.substr(req.m_start, req.m_end - req.m_start + 1);
 207     method_ = HttpRequestMethod(tmp);
 208
 209     // First non-whitespace after first SP = beginning of URL+Version
 210     if (second_word > line_end || second_word < req.start) {
 211         request_parse_status = Http::scBadRequest; // missing URI
 212         return -1;
 213     }
 214     req.u_start = second_word;
 215
 216     // RFC 1945: SP and version following URI are optional, marking version 0.9
 217     // we identify this by the last whitespace being earlier than URI start
 218     if (last_whitespace < second_word && last_whitespace >= req.start) {
 219         msgProtocol_ = Http::ProtocolVersion(0,9);
 220         req.u_end = line_end;
 221         uri_ = buf_.substr(req.u_start, req.u_end - req.u_start + 1);
 222         request_parse_status = Http::scOkay; // HTTP/0.9
 223         return 1;
 224     } else {
 225         // otherwise last whitespace is somewhere after end of URI.
 226         req.u_end = last_whitespace;
 227         // crop any trailing whitespace in the area we think of as URI
 228         for (; req.u_end >= req.u_start && xisspace(buf_[req.u_end]); --req.u_end);
 229     }
 230     if (req.u_end < req.u_start) {
 231         request_parse_status = Http::scBadRequest; // missing URI
 232         return -1;
 233     }
 234     uri_ = buf_.substr(req.u_start, req.u_end - req.u_start + 1);
 235
 236     // Last whitespace SP = before start of protocol/version
 237     if (last_whitespace >= line_end) {
 238         request_parse_status = Http::scBadRequest; // missing version
 239         return -1;
 240     }
 241     req.v_start = last_whitespace + 1;
 242     req.v_end = line_end;
 243
 244     /* RFC 2616 section 10.5.6 : handle unsupported HTTP major versions cleanly. */
 245     if ((req.v_end - req.v_start +1) < (int)Http1magic.length() || !buf_.substr(req.v_start, SBuf::npos).startsWith(Http1magic)) {
 246         // non-HTTP/1 protocols not supported / implemented.
 247         request_parse_status = Http::scHttpVersionNotSupported;
 248         return -1;
 249     }
 250     // NP: magic octets include the protocol name and major version DIGIT.
 251     msgProtocol_.protocol = AnyP::PROTO_HTTP;
 252     msgProtocol_.major = 1;
 253
 254     int i = req.v_start + Http1magic.length() -1;
 255
 256     // catch missing minor part
 257     if (++i > line_end) {
 258         request_parse_status = Http::scHttpVersionNotSupported;
 259         return -1;
 260     }
 261     /* next should be one or more digits */
 262     if (!isdigit(buf_[i])) {
 263         request_parse_status = Http::scHttpVersionNotSupported;
 264         return -1;
 265     }
 266     int min = 0;
 267     for (; i <= line_end && (isdigit(buf_[i])) && min < 65536; ++i) {
 268         min = min * 10;
 269         min = min + (buf_[i]) - '0';
 270     }
 271     // catch too-big values or trailing garbage
 272     if (min >= 65536 || i < line_end) {
 273         request_parse_status = Http::scHttpVersionNotSupported;
 274         return -1;
 275     }
 276     msgProtocol_.minor = min;
 277
 278     /*
 279      * Rightio - we have all the schtuff. Return true; we've got enough.
 280      */
 281     request_parse_status = Http::scOkay;
 282     return 1;
 283 }
 284
 285 bool
 286 Http::One::RequestParser::parse(const SBuf &aBuf)
 287 {
 288     buf_ = aBuf;
 289     debugs(74, DBG_DATA, "Parse buf={length=" << aBuf.length() << ", data='" << aBuf << "'}");
 290
 291     // stage 1: locate the request-line
 292     if (parsingStage_ == HTTP_PARSE_NONE) {
 293         skipGarbageLines();
 294
 295         // if we hit something before EOS treat it as a message
 296         if (!buf_.isEmpty())
 297             parsingStage_ = HTTP_PARSE_FIRST;
 298         else
 299             return false;
 300     }
 301
 302     // stage 2: parse the request-line
 303     if (parsingStage_ == HTTP_PARSE_FIRST) {
 304         PROF_start(HttpParserParseReqLine);
 305         const int retcode = parseRequestFirstLine();
 306
 307         // first-line (or a look-alike) found successfully.
 308         if (retcode > 0) {
 309             buf_.consume(firstLineSize()); // first line bytes including CRLF terminator are now done.
 310             parsingStage_ = HTTP_PARSE_MIME;
 311         }
 312
 313         debugs(74, 5, "request-line: retval " << retcode << ": from " << req.start << "->" << req.end <<
 314                " line={" << aBuf.length() << ", data='" << aBuf << "'}");
 315         debugs(74, 5, "request-line: method " << req.m_start << "->" << req.m_end << " (" << method_ << ")");
 316         debugs(74, 5, "request-line: url " << req.u_start << "->" << req.u_end << " (" << uri_ << ")");
 317         debugs(74, 5, "request-line: proto " << req.v_start << "->" << req.v_end << " (" << msgProtocol_ << ")");
 318         debugs(74, 5, "Parser: bytes processed=" << (aBuf.length()-buf_.length()));
 319         PROF_stop(HttpParserParseReqLine);
 320
 321         // syntax errors already
 322         if (retcode < 0) {
 323             parsingStage_ = HTTP_PARSE_DONE;
 324             return false;
 325         }
 326     }
 327
 328     // stage 3: locate the mime header block
 329     if (parsingStage_ == HTTP_PARSE_MIME) {
 330         // HTTP/1.x request-line is valid and parsing completed.
 331         if (msgProtocol_.major == 1) {
 332             /* NOTE: HTTP/0.9 requests do not have a mime header block.
 333              *       So the rest of the code will need to deal with '0'-byte headers
 334              *       (ie, none, so don't try parsing em)
 335              */
 336             int64_t mimeHeaderBytes = 0;
 337             // XXX: c_str() reallocates. performance regression.
 338             if ((mimeHeaderBytes = headersEnd(buf_.c_str(), buf_.length())) == 0) {
 339                 if (buf_.length()+firstLineSize() >= Config.maxRequestHeaderSize) {
 340                     debugs(33, 5, "Too large request");
 341                     request_parse_status = Http::scHeaderTooLarge;
 342                     parsingStage_ = HTTP_PARSE_DONE;
 343                 } else
 344                     debugs(33, 5, "Incomplete request, waiting for end of headers");
 345                 return false;
 346             }
 347             mimeHeaderBlock_ = buf_.consume(mimeHeaderBytes);
 348             debugs(74, 5, "mime header (0-" << mimeHeaderBytes << ") {" << mimeHeaderBlock_ << "}");
 349
 350         } else
 351             debugs(33, 3, "Missing HTTP/1.x identifier");
 352
 353         // NP: we do not do any further stages here yet so go straight to DONE
 354         parsingStage_ = HTTP_PARSE_DONE;
 355
 356         // Squid could handle these headers, but admin does not want to
 357         if (messageHeaderSize() >= Config.maxRequestHeaderSize) {
 358             debugs(33, 5, "Too large request");
 359             request_parse_status = Http::scHeaderTooLarge;
 360             return false;
 361         }
 362     }
 363
 364     return !needsMoreData();
 365 }