WspDelim += CharacterSet::HTAB
+ CharacterSet("VT,FF","\x0B\x0C")
+ CharacterSet::CR;
+ debugs(74, 5, "using Parser relaxed WSP characters");
}
// only search for method if we have not yet found one
if (Config.onoff.relaxed_header_parser) {
// whitespace tolerant
+ int warnOnError = (Config.onoff.relaxed_header_parser <= 0 ? DBG_IMPORTANT : 2);
+
// NOTES:
// * this would be static, except WspDelim changes with reconfigure
// * HTTP-version charset is included by uriValidCharacters()
uri_ = rTok.remaining();
msgProtocol_ = Http::ProtocolVersion(1, (*digit.rawContent() - '0'));
if (uri_.isEmpty()) {
- debugs(33, 5, "invalid request-line. missing URL");
+ debugs(33, warnOnError, "invalid request-line. missing URL");
parseStatusCode = Http::scBadRequest;
return -1;
}
return 1;
}
- debugs(33, 5, "invalid request-line. not HTTP");
+ debugs(33, warnOnError, "invalid request-line. not HTTP");
parseStatusCode = Http::scBadRequest;
return -1;
}
+ if (!tok.atEnd()) {
+
+#if USE_HTTP_VIOLATIONS
+ /*
+ * RFC 3986 explicitly lists the characters permitted in URI.
+ * A non-permitted character was found somewhere in the request-line.
+ * However, as long as we can find the LF, accept the characters
+ * which we know are invalid in any URI but actively used.
+ */
+ LfDelim.add('\0'); // Java
+ LfDelim.add(' '); // IIS
+ LfDelim.add('\"'); // Bing
+ LfDelim.add('\\'); // MSIE, Firefox
+ LfDelim.add('|'); // Amazon
+ LfDelim.add('^'); // Microsoft News
+
+ // other ASCII characters for which RFC 2396 has explicitly disallowed use
+ // since 1998 and which were not later permitted by RFC 3986 in 2005.
+ LfDelim.add('<'); // HTML embedded in URL
+ LfDelim.add('>'); // HTML embedded in URL
+ LfDelim.add('`'); // Shell Script embedded in URL
+ LfDelim.add('{'); // JSON or Javascript embedded in URL
+ LfDelim.add('}'); // JSON or Javascript embedded in URL
+
+ // reset the tokenizer from anything the above did, then seek the LF character.
+ tok.reset(buf_);
+
+ if (tok.prefix(line, LfDelim) && tok.skip('\n')) {
+
+ Http1::Tokenizer rTok(line);
+
+ // strip terminating CR (if any)
+ SBuf nil;
+ (void)rTok.suffix(nil,CharacterSet::CR); // optional CR in terminator
+ line = rTok.remaining();
+
+ // strip terminating 'WSP HTTP-version' (if any)
+ if (rTok.suffix(nil,CharacterSet::DIGIT) && rTok.skipSuffix(Http1magic) && rTok.suffix(nil,WspDelim)) {
+ hackExpectsMime_ = true; // client thinks its speaking HTTP, probably sent a mime block.
+ uri_ = rTok.remaining();
+ } else
+ uri_ = line; // no HTTP/1.x label found. Use the whole line.
+
+ if (uri_.isEmpty()) {
+ debugs(33, warnOnError, "invalid request-line. missing URL");
+ parseStatusCode = Http::scBadRequest;
+ return -1;
+ }
+
+ debugs(33, warnOnError, "invalid request-line. treating as HTTP/0.9" << (hackExpectsMime_?" (with mime)":""));
+ msgProtocol_ = Http::ProtocolVersion(0,9);
+ parseStatusCode = Http::scOkay;
+ buf_ = tok.remaining(); // incremental parse checkpoint
+ return 1;
+
+ } else if (tok.atEnd()) {
+ debugs(74, 5, "Parser needs more data");
+ return 0;
+ }
+ // else, drop back to invalid request-line handling
+#endif
+ const SBuf t = tok.remaining();
+ debugs(33, warnOnError, "invalid request-line characters." << Raw("data", t.rawContent(), t.length()));
+ parseStatusCode = Http::scBadRequest;
+ return -1;
+ }
debugs(74, 5, "Parser needs more data");
return 0;
}