return -1;
}
- /* Set method_ */
- const SBuf tmp = buf_.substr(req.m_start, req.m_end - req.m_start + 1);
- method_ = HttpRequestMethod(tmp);
-
- // First non-whitespace after first SP = beginning of URL+Version
- if (second_word > line_end || second_word < req.start) {
- request_parse_status = Http::scBadRequest; // missing URI
- return -1;
+ if (tok.atEnd()) {
+ debugs(74, 5, "Parser needs more data to find version");
+ return 0;
}
- req.u_start = second_word;
- // RFC 1945: SP and version following URI are optional, marking version 0.9
- // we identify this by the last whitespace being earlier than URI start
- if (last_whitespace < second_word && last_whitespace >= req.start) {
- msgProtocol_ = Http::ProtocolVersion(0,9);
- req.u_end = line_end;
- uri_ = buf_.substr(req.u_start, req.u_end - req.u_start + 1);
- request_parse_status = Http::scOkay; // HTTP/0.9
+ // get the version minor DIGIT
+ SBuf digit;
+ if (tok.prefix(digit, CharacterSet::DIGIT, 1) && skipLineTerminator(tok)) {
+
+ // found version fully AND terminator
+ msgProtocol_ = Http::ProtocolVersion(1, (*digit.rawContent() - '0'));
+ request_parse_status = Http::scOkay;
+ buf_ = tok.remaining(); // incremental parse checkpoint
return 1;
- } else {
- // otherwise last whitespace is somewhere after end of URI.
- req.u_end = last_whitespace;
- // crop any trailing whitespace in the area we think of as URI
- for (; req.u_end >= req.u_start && xisspace(buf_[req.u_end]); --req.u_end);
- }
- if (req.u_end < req.u_start) {
- request_parse_status = Http::scBadRequest; // missing URI
- return -1;
+
+ } else if (tok.atEnd() || (tok.skip('\r') && tok.atEnd())) {
+ debugs(74, 5, "Parser needs more data to find version");
+ return 0;
+
+ } // else error ...
+
+ // non-DIGIT. invalid version number.
+ request_parse_status = Http::scHttpVersionNotSupported;
- debugs(33, 5, "invalid request-line. garabge before line terminator");
++ debugs(33, 5, "invalid request-line. garbage before line terminator");
+ return -1;
+ }
+
+ /**
+ * Attempt to parse the first line of a new request message.
+ *
+ * Governed by:
+ * RFC 1945 section 5.1
+ * RFC 7230 section 2.6, 3.1 and 3.5
+ *
+ * Parsing state is stored between calls. The current implementation uses
+ * checkpoints after each successful request-line field.
+ * The return value tells you whether the parsing is completed or not.
+ *
+ * \retval -1 an error occurred. request_parse_status indicates HTTP status result.
+ * \retval 1 successful parse. member fields contain the request-line items
+ * \retval 0 more data is needed to complete the parse
+ */
+ int
+ Http::One::RequestParser::parseRequestFirstLine()
+ {
+ ::Parser::Tokenizer tok(buf_);
+
+ debugs(74, 5, "parsing possible request: buf.length=" << buf_.length());
+ debugs(74, DBG_DATA, buf_);
+
+ // NP: would be static, except it need to change with reconfigure
+ CharacterSet WspDelim = CharacterSet::SP; // strict parse only accepts SP
+
+ if (Config.onoff.relaxed_header_parser) {
+ // RFC 7230 section 3.5
+ // tolerant parser MAY accept any of SP, HTAB, VT (%x0B), FF (%x0C), or bare CR
+ // as whitespace between request-line fields
+ WspDelim += CharacterSet::HTAB
+ + CharacterSet("VT,FF","\x0B\x0C")
+ + CharacterSet::CR;
}
- uri_ = buf_.substr(req.u_start, req.u_end - req.u_start + 1);
- // Last whitespace SP = before start of protocol/version
- if (last_whitespace >= line_end) {
- request_parse_status = Http::scBadRequest; // missing version
- return -1;
+ // only search for method if we have not yet found one
+ if (method_ == Http::METHOD_NONE) {
+ const int res = parseMethodField(tok, WspDelim);
+ if (res < 1)
+ return res;
+ // else keep going...
}
- req.v_start = last_whitespace + 1;
- req.v_end = line_end;
- /* RFC 7230 section 2.6 : handle unsupported HTTP major versions cleanly. */
- if ((req.v_end - req.v_start +1) < (int)Http1magic.length() || !buf_.substr(req.v_start, SBuf::npos).startsWith(Http1magic)) {
- // non-HTTP/1 protocols not supported / implemented.
- request_parse_status = Http::scHttpVersionNotSupported;
- return -1;
+ // tolerant parser allows multiple whitespace characters between request-line fields
+ if (Config.onoff.relaxed_header_parser) {
+ const size_t garbage = tok.skipAll(WspDelim);
+ if (garbage > 0) {
+ firstLineGarbage_ += garbage;
+ buf_ = tok.remaining(); // re-checkpoint after garbage
+ }
+ }
+ if (tok.atEnd()) {
+ debugs(74, 5, "Parser needs more data");
+ return 0;
}
- // NP: magic octets include the protocol name and major version DIGIT.
- msgProtocol_.protocol = AnyP::PROTO_HTTP;
- msgProtocol_.major = 1;
- int i = req.v_start + Http1magic.length() -1;
+ // from here on, we have two possible parse paths: whitespace tolerant, and strict
+ if (Config.onoff.relaxed_header_parser) {
+ // whitespace tolerant
+
+ // NOTES:
+ // * this would be static, except WspDelim changes with reconfigure
+ // * HTTP-version charset is included by uriValidCharacters()
+ // * terminal CR is included by WspDelim here in relaxed parsing
+ CharacterSet LfDelim = uriValidCharacters() + WspDelim;
+
+ // seek the LF character, then tokenize the line in reverse
+ SBuf line;
+ if (tok.prefix(line, LfDelim) && tok.skip('\n')) {
+ ::Parser::Tokenizer rTok(line);
+ SBuf nil;
+ (void)rTok.suffix(nil,CharacterSet::CR); // optional CR in terminator
+ SBuf digit;
+ if (rTok.suffix(digit,CharacterSet::DIGIT) && rTok.skipSuffix(Http1magic) && rTok.suffix(nil,WspDelim)) {
+ uri_ = rTok.remaining();
+ msgProtocol_ = Http::ProtocolVersion(1, (*digit.rawContent() - '0'));
+ if (uri_.isEmpty()) {
+ debugs(33, 5, "invalid request-line. missing URL");
+ request_parse_status = Http::scBadRequest;
+ return -1;
+ }
- // catch missing minor part
- if (++i > line_end) {
- request_parse_status = Http::scHttpVersionNotSupported;
- return -1;
+ request_parse_status = Http::scOkay;
+ buf_ = tok.remaining(); // incremental parse checkpoint
+ return 1;
+
+ } else if (method_ == Http::METHOD_GET) {
+ // RFC 1945 - for GET the line terminator may follow URL instead of a delimiter
+ debugs(33, 5, "HTTP/0.9 syntax request-line detected");
+ msgProtocol_ = Http::ProtocolVersion(0,9);
+ static const SBuf cr("\r",1);
+ uri_ = line.trim(cr,false,true);
+ request_parse_status = Http::scOkay;
+ buf_ = tok.remaining(); // incremental parse checkpoint
+ return 1;
+ }
+
+ debugs(33, 5, "invalid request-line. not HTTP");
+ request_parse_status = Http::scBadRequest;
+ return -1;
+ }
+
+ debugs(74, 5, "Parser needs more data");
+ return 0;
}
- /* next should be one or more digits */
- if (!isdigit(buf_[i])) {
- request_parse_status = Http::scHttpVersionNotSupported;
- return -1;
+ // else strict non-whitespace tolerant parse
+
+ // only search for request-target (URL) if we have not yet found one
+ if (uri_.isEmpty()) {
+ const int res = parseUriField(tok);
+ if (res < 1 || msgProtocol_.protocol == AnyP::PROTO_HTTP)
+ return res;
+ // else keep going...
}
- int min = 0;
- for (; i <= line_end && (isdigit(buf_[i])) && min < 65536; ++i) {
- min = min * 10;
- min = min + (buf_[i]) - '0';
+
+ if (tok.atEnd()) {
+ debugs(74, 5, "Parser needs more data");
+ return 0;
}
- // catch too-big values or trailing garbage
- if (min >= 65536 || i < line_end) {
- request_parse_status = Http::scHttpVersionNotSupported;
- return -1;
+
+ // HTTP/1 version suffix (protocol magic) followed by CR*LF
+ if (msgProtocol_.protocol == AnyP::PROTO_NONE) {
+ return parseHttpVersionField(tok);
}
- msgProtocol_.minor = min;
- /*
- * Rightio - we have all the schtuff. Return true; we've got enough.
- */
- request_parse_status = Http::scOkay;
- return 1;
+ // If we got here this method has been called too many times
+ request_parse_status = Http::scInternalServerError;
+ debugs(33, 5, "ERROR: Parser already processed request-line");
+ return -1;
}
bool