return -1;
}
+static CharacterSet
+uriValidCharacters()
+{
+ CharacterSet UriChars("URI-Chars","");
+
+ /* RFC 3986 section 2:
+ * "
+ * A URI is composed from a limited set of characters consisting of
+ * digits, letters, and a few graphic symbols.
+ * "
+ */
+ // RFC 3986 section 2.1 - percent encoding "%" HEXDIG
+ UriChars.add('%');
+ UriChars += CharacterSet::HEXDIG;
+ // RFC 3986 section 2.2 - reserved characters
+ UriChars += CharacterSet("gen-delims", ":/?#[]@");
+ UriChars += CharacterSet("sub-delims", "!$&'()*+,;=");
+ // RFC 3986 section 2.3 - unreserved characters
+ UriChars += CharacterSet::ALPHA;
+ UriChars += CharacterSet::DIGIT;
+ UriChars += CharacterSet("unreserved", "-._~");
+
+ return UriChars;
+}
+
int
-Http::One::RequestParser::parseUriField(::Parser::Tokenizer &tok, const CharacterSet &WspDelim)
+Http::One::RequestParser::parseUriField(::Parser::Tokenizer &tok)
{
// URI field is a sequence of ... what? segments all have different valid charset
// go with non-whitespace non-binary characters for now
- static CharacterSet UriChars("URI-Chars","");
- if (!UriChars['a']) { // if it needs initializing...
- /* RFC 3986 section 2:
- * "
- * A URI is composed from a limited set of characters consisting of
- * digits, letters, and a few graphic symbols.
- * "
- */
- // RFC 3986 section 2.1 - percent encoding "%" HEXDIG
- UriChars.add('%');
- UriChars += CharacterSet::HEXDIG;
- // RFC 3986 section 2.2 - reserved characters
- UriChars += CharacterSet("gen-delims", ":/?#[]@");
- UriChars += CharacterSet("sub-delims", "!$&'()*+,;=");
- // RFC 3986 section 2.3 - unreserved characters
- UriChars += CharacterSet::ALPHA;
- UriChars += CharacterSet::DIGIT;
- UriChars += CharacterSet("unreserved", "-._~");
- }
+ static CharacterSet UriChars = uriValidCharacters();
/* Arbitrary 64KB URI upper length limit.
*
static_cast<size_t>((64*1024)-1));
SBuf uriFound;
- if (!tok.prefix(uriFound, UriChars, maxUriLength)) {
- // NP: prefix() returns true if it finds ANY valid chars
- debugs(33, 5, "invalid request-line. missing URL");
- request_parse_status = Http::scBadRequest;
- return -1;
- }
-
- /* NOTE: we do have to check for token/state in this order.
- * Because RFC 7230 tolerant parse accepts CR as a whitespace
- * delimiter in HTTP/1.1 and we may not yet have the LF final
- * terminator character on HTTP/0.9 simple-request lines.
- */
- // RFC 1945 - for GET the line terminator may follow URL instead of a delimiter
- if (method_ == Http::METHOD_GET && skipLineTerminator(tok)) {
- debugs(33, 5, "HTTP/0.9 syntax request-line detected");
- msgProtocol_ = Http::ProtocolVersion(0,9);
+ // RFC 7230 HTTP/1.x URI are followed by at least one whitespace delimiter
+ if (tok.prefix(uriFound, UriChars, maxUriLength) && tok.skipOne(CharacterSet::SP)) {
uri_ = uriFound;
- request_parse_status = Http::scOkay;
buf_ = tok.remaining(); // incremental parse checkpoint
return 1;
- } else if (tok.atEnd() || (tok.skip('\r') && tok.atEnd())) {
- debugs(74, 5, "Parser needs more data to find URI");
- return 0;
- }
- // RFC 7230 HTTP/1.x URI are followed by at least one whitespace delimiter
- if (tok.skipOne(WspDelim)) {
- uri_ = uriFound;
+ // RFC 1945 for GET the line terminator may follow URL instead of a delimiter
+ } else if (method_ == Http::METHOD_GET && skipLineTerminator(tok)) {
+ debugs(33, 5, "HTTP/0.9 syntax request-line detected");
+ msgProtocol_ = Http::ProtocolVersion(0,9);
+ uri_ = uriFound; // found by successful prefix() call earlier.
+ request_parse_status = Http::scOkay;
buf_ = tok.remaining(); // incremental parse checkpoint
return 1;
// else errors...
if (uriFound.length() == maxUriLength) {
- // URL longer than acceptible.
// RFC 7230 section 3.1.1 mandatory (MUST) 414 response
request_parse_status = Http::scUriTooLong;
debugs(33, 5, "invalid request-line. URI longer than " << maxUriLength << " bytes");
} else {
- // invalid non-delimiter character ended the URL
// RFC 7230 section 3.1.1 required (SHOULD) 400 response
request_parse_status = Http::scBadRequest;
debugs(33, 5, "invalid request-line. missing URI delimiter");
debugs(74, 5, "parsing possible request: buf.length=" << buf_.length());
debugs(74, DBG_DATA, buf_);
+ // NP: would be static, except it need to change with reconfigure
CharacterSet WspDelim = CharacterSet::SP; // strict parse only accepts SP
if (Config.onoff.relaxed_header_parser) {
// tolerant parser MAY accept any of SP, HTAB, VT (%x0B), FF (%x0C), or bare CR
// as whitespace between request-line fields
WspDelim += CharacterSet::HTAB
- + CharacterSet("VT,FF","\x0B\x0C")
- + CharacterSet::CR;
+ + CharacterSet("VT,FF","\x0B\x0C")
+ + CharacterSet::CR;
}
// only search for method if we have not yet found one
return 0;
}
+ // from here on, we have two possible parse paths: whitespace tolerant, and strict
+ if (Config.onoff.relaxed_header_parser) {
+ // whitespace tolerant
+
+ // NOTES:
+ // * this would be static, except WspDelim changes with reconfigure
+ // * HTTP-version charset is included by uriValidCharacters()
+ // * terminal CR is included by WspDelim here in relaxed parsing
+ CharacterSet LfDelim = uriValidCharacters() + WspDelim;
+
+ // seek the LF character, then tokenize the line in reverse
+ SBuf line;
+ if (tok.prefix(line, LfDelim) && tok.skip('\n')) {
+ ::Parser::Tokenizer rTok(line);
+ SBuf nil;
+ (void)rTok.suffix(nil,CharacterSet::CR); // optional CR in terminator
+ SBuf digit;
+ if (rTok.suffix(digit,CharacterSet::DIGIT) && rTok.skipSuffix(Http1magic) && rTok.suffix(nil,WspDelim)) {
+ uri_ = rTok.remaining();
+ msgProtocol_ = Http::ProtocolVersion(1, (*digit.rawContent() - '0'));
+ if (uri_.isEmpty()) {
+ debugs(33, 5, "invalid request-line. missing URL");
+ request_parse_status = Http::scBadRequest;
+ return -1;
+ }
+
+ request_parse_status = Http::scOkay;
+ buf_ = tok.remaining(); // incremental parse checkpoint
+ return 1;
+
+ } else if (method_ == Http::METHOD_GET) {
+ // RFC 1945 - for GET the line terminator may follow URL instead of a delimiter
+ debugs(33, 5, "HTTP/0.9 syntax request-line detected");
+ msgProtocol_ = Http::ProtocolVersion(0,9);
+ static const SBuf cr("\r",1);
+ uri_ = line.trim(cr,false,true);
+ request_parse_status = Http::scOkay;
+ buf_ = tok.remaining(); // incremental parse checkpoint
+ return 1;
+ }
+
+ debugs(33, 5, "invalid request-line. not HTTP");
+ request_parse_status = Http::scBadRequest;
+ return -1;
+ }
+
+ debugs(74, 5, "Parser needs more data");
+ return 0;
+ }
+ // else strict non-whitespace tolerant parse
+
// only search for request-target (URL) if we have not yet found one
if (uri_.isEmpty()) {
- const int res = parseUriField(tok, WspDelim);
+ const int res = parseUriField(tok);
if (res < 1 || msgProtocol_.protocol == AnyP::PROTO_HTTP)
return res;
// else keep going...
}
- // tolerant parser allows multiple whitespace characters between request-line fields
- if (Config.onoff.relaxed_header_parser) {
- const size_t garbage = tok.skipAll(WspDelim);
- if (garbage > 0) {
- firstLineGarbage_ += garbage;
- buf_ = tok.remaining(); // re-checkpoint after garbage
- }
- }
if (tok.atEnd()) {
debugs(74, 5, "Parser needs more data");
return 0;
input.clear();
}
-#if 0 // XXX: RFC compliant parser does not have tolerance for this (yet)
// whitespace inside URI. (nasty but happens)
{
input.append("GET /fo o/ HTTP/1.1\r\n", 21);
.parsed = false,
.needsMore = false,
.parserState = Http1::HTTP_PARSE_DONE,
- .status = Http::scBadRequest,
- .suffixSz = 0,
+ .status = Http::scHttpVersionNotSupported, // version being "o/ HTTP/1.1"
+ .suffixSz = 13,
.method = HttpRequestMethod(Http::METHOD_GET),
.uri = NULL,
.version = AnyP::ProtocolVersion()
testResults(__LINE__, input, output, expectStrict);
input.clear();
}
-#endif
// additional data in buffer
{
SBuf::size_type mimeEnd = data.length() - 1;
data.append("...", 3); // trailer to catch mime EOS errors.
- SBuf ioBuf; // begins empty
+ SBuf ioBuf;
Http1::RequestParser hp;
- // only relaxed parser accepts the garbage whitespace
- Config.onoff.relaxed_header_parser = 1;
-
- // state of things we expect right now
- struct resultSet expect = {
- .parsed = false,
- .needsMore = true,
- .parserState = Http1::HTTP_PARSE_NONE,
- .status = Http::scNone,
- .suffixSz = 0,
- .method = HttpRequestMethod(),
- .uri = NULL,
- .version = AnyP::ProtocolVersion()
- };
+ // start with strict and move on to relaxed
+ Config.onoff.relaxed_header_parser = 2;
Config.maxRequestHeaderSize = 1024; // large enough to hold the test data.
- for (SBuf::size_type pos = 0; pos <= data.length(); ++pos) {
-
- // simulate reading one more byte
- ioBuf.append(data.substr(pos,1));
-
- // when the garbage is passed we expect to start seeing first-line bytes
- if (pos == garbageEnd) {
- expect.parserState = Http1::HTTP_PARSE_FIRST;
- }
-
- // all points after garbage start to see accumulated bytes looking for end of current section
- if (pos >= garbageEnd)
- expect.suffixSz = ioBuf.length();
-
- // at end of request line expect to see method details
- if (pos == methodEnd) {
- expect.suffixSz = 0; // and a checkpoint buffer reset
- expect.method = HttpRequestMethod(Http::METHOD_GET);
- }
-
- // at end of URI expect to see method, URI details
- if (pos == uriEnd) {
- expect.suffixSz = 0; // and a checkpoint buffer reset
- expect.uri = "http://example.com/";
- }
+ do {
- // at end of request line expect to see method, URI, version details
- // and switch to seeking Mime header section
- if (pos == reqLineEnd) {
- expect.parserState = Http1::HTTP_PARSE_MIME;
- expect.suffixSz = 0; // and a checkpoint buffer reset
- expect.status = Http::scOkay;
- expect.method = HttpRequestMethod(Http::METHOD_GET);
- expect.uri = "http://example.com/";
- expect.version = AnyP::ProtocolVersion(AnyP::PROTO_HTTP,1,1);
- }
+ // state of things we expect right now
+ struct resultSet expect = {
+ .parsed = false,
+ .needsMore = true,
+ .parserState = Http1::HTTP_PARSE_NONE,
+ .status = Http::scNone,
+ .suffixSz = 0,
+ .method = HttpRequestMethod(),
+ .uri = NULL,
+ .version = AnyP::ProtocolVersion()
+ };
- // one mime header is done we are expecting a new request
- // parse results say true and initial data is all gone from the buffer
- if (pos == mimeEnd) {
- expect.parsed = true;
- expect.needsMore = false;
- expect.suffixSz = 0; // and a checkpoint buffer reset
+ ioBuf.clear(); // begins empty for each parser type
+ hp.clear();
+
+ --Config.onoff.relaxed_header_parser;
+
+ for (SBuf::size_type pos = 0; pos <= data.length(); ++pos) {
+
+ // simulate reading one more byte
+ ioBuf.append(data.substr(pos,1));
+
+ // strict does not permit the garbage prefix
+ if (pos < garbageEnd && !Config.onoff.relaxed_header_parser) {
+ ioBuf.clear();
+ continue;
+ }
+
+ // when the garbage is passed we expect to start seeing first-line bytes
+ if (pos == garbageEnd)
+ expect.parserState = Http1::HTTP_PARSE_FIRST;
+
+ // all points after garbage start to see accumulated bytes looking for end of current section
+ if (pos >= garbageEnd)
+ expect.suffixSz = ioBuf.length();
+
+ // at end of request line expect to see method details
+ if (pos == methodEnd) {
+ expect.suffixSz = 0; // and a checkpoint buffer reset
+ expect.method = HttpRequestMethod(Http::METHOD_GET);
+ }
+
+ // at end of URI strict expects to see method, URI details
+ // relaxed must wait to end of line for whitespace tolerance
+ if (pos == uriEnd && !Config.onoff.relaxed_header_parser) {
+ expect.suffixSz = 0; // and a checkpoint buffer reset
+ expect.uri = "http://example.com/";
+ }
+
+ // at end of request line expect to see method, URI, version details
+ // and switch to seeking Mime header section
+ if (pos == reqLineEnd) {
+ expect.parserState = Http1::HTTP_PARSE_MIME;
+ expect.suffixSz = 0; // and a checkpoint buffer reset
+ expect.status = Http::scOkay;
+ expect.method = HttpRequestMethod(Http::METHOD_GET);
+ expect.uri = "http://example.com/";
+ expect.version = AnyP::ProtocolVersion(AnyP::PROTO_HTTP,1,1);
+ }
+
+ // one mime header is done we are expecting a new request
+ // parse results say true and initial data is all gone from the buffer
+ if (pos == mimeEnd) {
+ expect.parsed = true;
+ expect.needsMore = false;
+ expect.suffixSz = 0; // and a checkpoint buffer reset
+ }
+
+ testResults(__LINE__, ioBuf, hp, expect);
+
+ // sync the buffers like Squid does
+ ioBuf = hp.remaining();
+
+ // Squid stops using the parser once it has parsed the first message.
+ if (!hp.needsMoreData())
+ break;
}
- testResults(__LINE__, ioBuf, hp, expect);
+ } while (Config.onoff.relaxed_header_parser);
- // sync the buffers like Squid does
- ioBuf = hp.remaining();
-
- // Squid stops using the parser once it has parsed the first message.
- if (!hp.needsMoreData())
- break;
- }
}
#endif /* __cplusplus >= 201103L */