src/http/one/RequestParser.cc

   1 #include "squid.h"
   2 #include "Debug.h"
   3 #include "http/one/RequestParser.h"
   4 #include "http/ProtocolVersion.h"
   5 #include "mime_header.h"
   6 #include "profiler/Profiler.h"
   7 #include "SquidConfig.h"
   8
   9 void
  10 Http::One::RequestParser::clear()
  11 {
  12     Http1::Parser::clear();
  13
  14     request_parse_status = Http::scNone;
  15     req.start = req.end = -1;
  16     req.m_start = req.m_end = -1;
  17     req.u_start = req.u_end = -1;
  18     req.v_start = req.v_end = -1;
  19     method_ = HttpRequestMethod();
  20 }
  21
  22 /**
  23  * Attempt to parse the first line of a new request message.
  24  *
  25  * Governed by RFC 2616 section 4.1
  26  *  "
  27  *    In the interest of robustness, servers SHOULD ignore any empty
  28  *    line(s) received where a Request-Line is expected. In other words, if
  29  *    the server is reading the protocol stream at the beginning of a
  30  *    message and receives a CRLF first, it should ignore the CRLF.
  31  *
  32  *    ... To restate what is explicitly forbidden by the
  33  *    BNF, an HTTP/1.1 client MUST NOT preface or follow a request with an
  34  *    extra CRLF.
  35  *  "
  36  *
  37  * Parsing state is stored between calls to avoid repeating buffer scans.
  38  * If garbage is found the parsing offset is incremented.
  39  */
  40 void
  41 Http::One::RequestParser::skipGarbageLines()
  42 {
  43 #if WHEN_RFC_COMPLIANT // CRLF or bare-LF is what RFC 2616 tolerant parsers do ...
  44     if (Config.onoff.relaxed_header_parser) {
  45         if (Config.onoff.relaxed_header_parser < 0 && (buf[0] == '\r' || buf[0] == '\n'))
  46             debugs(74, DBG_IMPORTANT, "WARNING: Invalid HTTP Request: " <<
  47                    "CRLF bytes received ahead of request-line. " <<
  48                    "Ignored due to relaxed_header_parser.");
  49         // Be tolerant of prefix empty lines
  50         // ie any series of either \n or \r\n with no other characters and no repeated \r
  51         while (!buf.isEmpty() && (buf[0] == '\n' || (buf[0] == '\r' && buf[1] == '\n'))) {
  52             buf.consume(1);
  53         }
  54     }
  55 #endif
  56
  57     /* XXX: this is a Squid-specific tolerance
  58      * it appears never to have been relevant outside out unit-tests
  59      * because the ConnStateData parser loop starts with consumeWhitespace()
  60      * which absorbs any SP HTAB VTAB CR LF characters.
  61      * But unit-tests called the HttpParser method directly without that pruning.
  62      */
  63 #if USE_HTTP_VIOLATIONS
  64     if (Config.onoff.relaxed_header_parser) {
  65         if (Config.onoff.relaxed_header_parser < 0 && buf[0] == ' ')
  66             debugs(74, DBG_IMPORTANT, "WARNING: Invalid HTTP Request: " <<
  67                    "Whitespace bytes received ahead of method. " <<
  68                    "Ignored due to relaxed_header_parser.");
  69         // Be tolerant of prefix spaces (other bytes are valid method values)
  70         while (!buf.isEmpty() && buf[0] == ' ') {
  71             buf.consume(1);
  72         }
  73     }
  74 #endif
  75 }
  76
  77 /**
  78  * Attempt to parse the first line of a new request message.
  79  *
  80  * Governed by:
  81  *  RFC 1945 section 5.1
  82  *  RFC 2616 section 5.1
  83  *
  84  * Parsing state is stored between calls. However the current implementation
  85  * begins parsing from scratch on every call.
  86  * The return value tells you whether the parsing state fields are valid or not.
  87  *
  88  * \retval -1  an error occurred. request_parse_status indicates HTTP status result.
  89  * \retval  1  successful parse. member fields contain the request-line items
  90  * \retval  0  more data is needed to complete the parse
  91  */
  92 int
  93 Http::One::RequestParser::parseRequestFirstLine()
  94 {
  95     int second_word = -1; // track the suspected URI start
  96     int first_whitespace = -1, last_whitespace = -1; // track the first and last SP byte
  97     int line_end = -1; // tracks the last byte BEFORE terminal \r\n or \n sequence
  98
  99     debugs(74, 5, "parsing possible request: buf.length=" << buf.length());
 100     debugs(74, DBG_DATA, buf);
 101
 102     // Single-pass parse: (provided we have the whole line anyways)
 103
 104     req.start = 0;
 105     req.end = -1;
 106     for (SBuf::size_type i = 0; i < buf.length(); ++i) {
 107         // track first and last whitespace (SP only)
 108         if (buf[i] == ' ') {
 109             last_whitespace = i;
 110             if (first_whitespace < req.start)
 111                 first_whitespace = i;
 112         }
 113
 114         // track next non-SP/non-HT byte after first_whitespace
 115         if (second_word < first_whitespace && buf[i] != ' ' && buf[i] != '\t') {
 116             second_word = i;
 117         }
 118
 119         // locate line terminator
 120         if (buf[i] == '\n') {
 121             req.end = i;
 122             line_end = i - 1;
 123             break;
 124         }
 125         if (i < buf.length() - 1 && buf[i] == '\r') {
 126             if (Config.onoff.relaxed_header_parser) {
 127                 if (Config.onoff.relaxed_header_parser < 0 && buf[i + 1] == '\r')
 128                     debugs(74, DBG_IMPORTANT, "WARNING: Invalid HTTP Request: " <<
 129                            "Series of carriage-return bytes received prior to line terminator. " <<
 130                            "Ignored due to relaxed_header_parser.");
 131
 132                 // Be tolerant of invalid multiple \r prior to terminal \n
 133                 if (buf[i + 1] == '\n' || buf[i + 1] == '\r')
 134                     line_end = i - 1;
 135                 while (i < buf.length() - 1 && buf[i + 1] == '\r')
 136                     ++i;
 137
 138                 if (buf[i + 1] == '\n') {
 139                     req.end = i + 1;
 140                     break;
 141                 }
 142             } else {
 143                 if (buf[i + 1] == '\n') {
 144                     req.end = i + 1;
 145                     line_end = i - 1;
 146                     break;
 147                 }
 148             }
 149
 150             // RFC 2616 section 5.1
 151             // "No CR or LF is allowed except in the final CRLF sequence"
 152             request_parse_status = Http::scBadRequest;
 153             return -1;
 154         }
 155     }
 156
 157     if (req.end == -1) {
 158         // DoS protection against long first-line
 159         if ((size_t)buf.length() >= Config.maxRequestHeaderSize) {
 160             debugs(33, 5, "Too large request-line");
 161             // XXX: return URL-too-log status code if second_whitespace is not yet found.
 162             request_parse_status = Http::scHeaderTooLarge;
 163             return -1;
 164         }
 165
 166         debugs(74, 5, "Parser: retval 0: from " << req.start <<
 167                "->" << req.end << ": needs more data to complete first line.");
 168         return 0;
 169     }
 170
 171     // NP: we have now seen EOL, more-data (0) cannot occur.
 172     //     From here on any failure is -1, success is 1
 173
 174     // Input Validation:
 175
 176     // DoS protection against long first-line
 177     if ((size_t)(req.end-req.start) >= Config.maxRequestHeaderSize) {
 178         debugs(33, 5, "Too large request-line");
 179         request_parse_status = Http::scHeaderTooLarge;
 180         return -1;
 181     }
 182
 183     // Process what we now know about the line structure into field offsets
 184     // generating HTTP status for any aborts as we go.
 185
 186     // First non-whitespace = beginning of method
 187     if (req.start > line_end) {
 188         request_parse_status = Http::scBadRequest;
 189         return -1;
 190     }
 191     req.m_start = req.start;
 192
 193     // First whitespace = end of method
 194     if (first_whitespace > line_end || first_whitespace < req.start) {
 195         request_parse_status = Http::scBadRequest; // no method
 196         return -1;
 197     }
 198     req.m_end = first_whitespace - 1;
 199     if (req.m_end < req.m_start) {
 200         request_parse_status = Http::scBadRequest; // missing URI?
 201         return -1;
 202     }
 203
 204     /* Set method_ */
 205     SBuf tmp = buf.substr(req.m_start, req.m_end - req.m_start + 1);
 206     method_ = HttpRequestMethod(tmp);
 207
 208     // First non-whitespace after first SP = beginning of URL+Version
 209     if (second_word > line_end || second_word < req.start) {
 210         request_parse_status = Http::scBadRequest; // missing URI
 211         return -1;
 212     }
 213     req.u_start = second_word;
 214
 215     // RFC 1945: SP and version following URI are optional, marking version 0.9
 216     // we identify this by the last whitespace being earlier than URI start
 217     if (last_whitespace < second_word && last_whitespace >= req.start) {
 218         msgProtocol_ = Http::ProtocolVersion(0,9);
 219         req.u_end = line_end;
 220         uri_ = buf.substr(req.u_start, req.u_end - req.u_start + 1);
 221         request_parse_status = Http::scOkay; // HTTP/0.9
 222         return 1;
 223     } else {
 224         // otherwise last whitespace is somewhere after end of URI.
 225         req.u_end = last_whitespace;
 226         // crop any trailing whitespace in the area we think of as URI
 227         for (; req.u_end >= req.u_start && xisspace(buf[req.u_end]); --req.u_end);
 228     }
 229     if (req.u_end < req.u_start) {
 230         request_parse_status = Http::scBadRequest; // missing URI
 231         return -1;
 232     }
 233     uri_ = buf.substr(req.u_start, req.u_end - req.u_start + 1);
 234
 235     // Last whitespace SP = before start of protocol/version
 236     if (last_whitespace >= line_end) {
 237         request_parse_status = Http::scBadRequest; // missing version
 238         return -1;
 239     }
 240     req.v_start = last_whitespace + 1;
 241     req.v_end = line_end;
 242
 243     // We only accept HTTP protocol requests right now.
 244     // TODO: accept other protocols; RFC 2326 (RTSP protocol) etc
 245     if ((req.v_end - req.v_start +1) < 5 || buf.substr(req.v_start, 5).caseCmp(SBuf("HTTP/")) != 0) {
 246 #if USE_HTTP_VIOLATIONS
 247         // being lax; old parser accepted strange versions
 248         // there is a LOT of cases which are ambiguous, therefore we cannot use relaxed_header_parser here.
 249         msgProtocol_ = Http::ProtocolVersion(0,9);
 250         req.u_end = line_end;
 251         request_parse_status = Http::scOkay; // treat as HTTP/0.9
 252         return 1;
 253 #else
 254         // protocol not supported / implemented.
 255         request_parse_status = Http::scHttpVersionNotSupported;
 256         return -1;
 257 #endif
 258     }
 259     msgProtocol_.protocol = AnyP::PROTO_HTTP;
 260
 261     int i = req.v_start + sizeof("HTTP/") -1;
 262
 263     /* next should be 1 or more digits */
 264     if (!isdigit(buf[i])) {
 265         request_parse_status = Http::scHttpVersionNotSupported;
 266         return -1;
 267     }
 268     int maj = 0;
 269     for (; i <= line_end && (isdigit(buf[i])) && maj < 65536; ++i) {
 270         maj = maj * 10;
 271         maj = maj + (buf[i]) - '0';
 272     }
 273     // catch too-big values or missing remainders
 274     if (maj >= 65536 || i > line_end) {
 275         request_parse_status = Http::scHttpVersionNotSupported;
 276         return -1;
 277     }
 278     msgProtocol_.major = maj;
 279
 280     /* next should be .; we -have- to have this as we have a whole line.. */
 281     if (buf[i] != '.') {
 282         request_parse_status = Http::scHttpVersionNotSupported;
 283         return -1;
 284     }
 285     // catch missing minor part
 286     if (++i > line_end) {
 287         request_parse_status = Http::scHttpVersionNotSupported;
 288         return -1;
 289     }
 290     /* next should be one or more digits */
 291     if (!isdigit(buf[i])) {
 292         request_parse_status = Http::scHttpVersionNotSupported;
 293         return -1;
 294     }
 295     int min = 0;
 296     for (; i <= line_end && (isdigit(buf[i])) && min < 65536; ++i) {
 297         min = min * 10;
 298         min = min + (buf[i]) - '0';
 299     }
 300     // catch too-big values or trailing garbage
 301     if (min >= 65536 || i < line_end) {
 302         request_parse_status = Http::scHttpVersionNotSupported;
 303         return -1;
 304     }
 305     msgProtocol_.minor = min;
 306
 307     /* RFC 2616 section 10.5.6 : handle unsupported HTTP major versions cleanly. */
 308     /* We currently only support 0.9, 1.0, 1.1 properly in this parser */
 309     if ((maj == 0 && min != 9) || (maj > 1)) {
 310         request_parse_status = Http::scHttpVersionNotSupported;
 311         return -1;
 312     }
 313
 314     /*
 315      * Rightio - we have all the schtuff. Return true; we've got enough.
 316      */
 317     request_parse_status = Http::scOkay;
 318     return 1;
 319 }
 320
 321 bool
 322 Http::One::RequestParser::parse(const SBuf &aBuf)
 323 {
 324     buf = aBuf;
 325     debugs(74, DBG_DATA, "Parse buf={length=" << aBuf.length() << ", data='" << aBuf << "'}");
 326
 327     // stage 1: locate the request-line
 328     if (parsingStage_ == HTTP_PARSE_NONE) {
 329         skipGarbageLines();
 330
 331         // if we hit something before EOS treat it as a message
 332         if (!buf.isEmpty())
 333             parsingStage_ = HTTP_PARSE_FIRST;
 334         else
 335             return false;
 336     }
 337
 338     // stage 2: parse the request-line
 339     if (parsingStage_ == HTTP_PARSE_FIRST) {
 340         PROF_start(HttpParserParseReqLine);
 341         const int retcode = parseRequestFirstLine();
 342
 343         // first-line (or a look-alike) found successfully.
 344         if (retcode > 0) {
 345             buf.consume(firstLineSize()); // first line bytes including CRLF terminator are now done.
 346             parsingStage_ = HTTP_PARSE_MIME;
 347         }
 348
 349         debugs(74, 5, "request-line: retval " << retcode << ": from " << req.start << "->" << req.end <<
 350                " line={" << aBuf.length() << ", data='" << aBuf << "'}");
 351         debugs(74, 5, "request-line: method " << req.m_start << "->" << req.m_end << " (" << method_ << ")");
 352         debugs(74, 5, "request-line: url " << req.u_start << "->" << req.u_end << " (" << uri_ << ")");
 353         debugs(74, 5, "request-line: proto " << req.v_start << "->" << req.v_end << " (" << msgProtocol_ << ")");
 354         debugs(74, 5, "Parser: bytes processed=" << (aBuf.length()-buf.length()));
 355         PROF_stop(HttpParserParseReqLine);
 356
 357         // syntax errors already
 358         if (retcode < 0) {
 359             parsingStage_ = HTTP_PARSE_DONE;
 360             return false;
 361         }
 362     }
 363
 364     // stage 3: locate the mime header block
 365     if (parsingStage_ == HTTP_PARSE_MIME) {
 366         // HTTP/1.x request-line is valid and parsing completed.
 367         if (msgProtocol_.major == 1) {
 368             /* NOTE: HTTP/0.9 requests do not have a mime header block.
 369              *       So the rest of the code will need to deal with '0'-byte headers
 370              *       (ie, none, so don't try parsing em)
 371              */
 372             int64_t mimeHeaderBytes = 0;
 373             if ((mimeHeaderBytes = headersEnd(buf.c_str(), buf.length())) == 0) {
 374                 if (buf.length()+firstLineSize() >= Config.maxRequestHeaderSize) {
 375                     debugs(33, 5, "Too large request");
 376                     request_parse_status = Http::scHeaderTooLarge;
 377                     parsingStage_ = HTTP_PARSE_DONE;
 378                 } else
 379                     debugs(33, 5, "Incomplete request, waiting for end of headers");
 380                 return false;
 381             }
 382             mimeHeaderBlock_ = buf.substr(req.end+1, mimeHeaderBytes);
 383             buf.consume(mimeHeaderBytes); // done with these bytes now.
 384
 385         } else
 386             debugs(33, 3, "Missing HTTP/1.x identifier");
 387
 388         // NP: we do not do any further stages here yet so go straight to DONE
 389         parsingStage_ = HTTP_PARSE_DONE;
 390
 391         // Squid could handle these headers, but admin does not want to
 392         if (messageHeaderSize() >= Config.maxRequestHeaderSize) {
 393             debugs(33, 5, "Too large request");
 394             request_parse_status = Http::scHeaderTooLarge;
 395             return false;
 396         }
 397     }
 398
 399     return !needsMoreData();
 400 }