/*
- * Copyright (C) 1996-2015 The Squid Software Foundation and contributors
+ * Copyright (C) 1996-2021 The Squid Software Foundation and contributors
*
* Squid software is distributed under GPLv2+ license and includes
* contributions from numerous individuals and organizations.
#include "anyp/ProtocolVersion.h"
#include "http/one/forward.h"
-#include "SBuf.h"
-
-namespace Parser {
-class Tokenizer;
-}
+#include "http/StatusCode.h"
+#include "parser/forward.h"
+#include "sbuf/SBuf.h"
namespace Http {
namespace One {
// Parser states
enum ParseState {
- HTTP_PARSE_NONE, ///< initialized, but nothing usefully parsed yet
- HTTP_PARSE_FIRST, ///< HTTP/1 message first-line
- HTTP_PARSE_MIME, ///< HTTP/1 mime-header block
- HTTP_PARSE_DONE ///< parsed a message header, or reached a terminal syntax error
+ HTTP_PARSE_NONE, ///< initialized, but nothing usefully parsed yet
+ HTTP_PARSE_FIRST, ///< HTTP/1 message first-line
+ HTTP_PARSE_CHUNK_SZ, ///< HTTP/1.1 chunked encoding chunk-size
+ HTTP_PARSE_CHUNK_EXT, ///< HTTP/1.1 chunked encoding chunk-ext
+ HTTP_PARSE_CHUNK, ///< HTTP/1.1 chunked encoding chunk-data
+ HTTP_PARSE_MIME, ///< HTTP/1 mime-header block
+ HTTP_PARSE_DONE ///< parsed a message header, or reached a terminal syntax error
};
/** HTTP/1.x protocol parser
* Works on a raw character I/O buffer and tokenizes the content into
* the major CRLF delimited segments of an HTTP/1 procotol message:
*
- * \item first-line (request-line / simple-request / status-line)
- * \item mime-header 0*( header-name ':' SP field-value CRLF)
+ * \li first-line (request-line / simple-request / status-line)
+ * \li mime-header 0*( header-name ':' SP field-value CRLF)
*/
class Parser : public RefCountable
{
public:
typedef SBuf::size_type size_type;
+ typedef ::Parser::Tokenizer Tokenizer;
- Parser() : parsingStage_(HTTP_PARSE_NONE) {}
+ Parser() = default;
+ Parser(const Parser &) = default;
+ Parser &operator =(const Parser &) = default;
+ Parser(Parser &&) = default;
+ Parser &operator =(Parser &&) = default;
virtual ~Parser() {}
/// Set this parser back to a default state.
const AnyP::ProtocolVersion & messageProtocol() const {return msgProtocol_;}
/**
- * Scan the mime header block (badly) for a header with teh given name.
+ * Scan the mime header block (badly) for a Host header.
*
* BUG: omits lines when searching for headers with obs-fold or multiple entries.
*
*
* \return A pointer to a field-value of the first matching field-name, or NULL.
*/
- char *getHeaderField(const char *name);
+ char *getHostHeaderField();
/// the remaining unprocessed section of buffer
const SBuf &remaining() const {return buf_;}
+ /**
+ * HTTP status code resulting from the parse process.
+ * to be used on the invalid message handling.
+ *
+ * Http::scNone indicates incomplete parse,
+ * Http::scOkay indicates no error,
+ * other codes represent a parse error.
+ */
+ Http::StatusCode parseStatusCode = Http::scNone;
+
+ /// Whitespace between regular protocol elements.
+ /// Seen in RFCs as OWS, RWS, BWS, SP/HTAB but may be "relaxed" by us.
+ /// See also: DelimiterCharacters().
+ static const CharacterSet &WhitespaceCharacters();
+
+ /// Whitespace between protocol elements in restricted contexts like
+ /// request line, status line, asctime-date, and credentials
+ /// Seen in RFCs as SP but may be "relaxed" by us.
+ /// See also: WhitespaceCharacters().
+ /// XXX: Misnamed and overused.
+ static const CharacterSet &DelimiterCharacters();
+
protected:
- /// detect and skip the CRLF or (if tolerant) LF line terminator
- /// consume from the tokenizer and return true only if found
- bool skipLineTerminator(::Parser::Tokenizer &tok) const;
+ /**
+ * detect and skip the CRLF or (if tolerant) LF line terminator
+ * consume from the tokenizer.
+ *
+ * \throws exception on bad or InsuffientInput.
+ * \retval true only if line terminator found.
+ * \retval false incomplete or missing line terminator, need more data.
+ */
+ void skipLineTerminator(Tokenizer &) const;
+
+ /**
+ * Scan to find the mime headers block for current message.
+ *
+ * \retval true If mime block (or a blocks non-existence) has been
+ * identified accurately within limit characters.
+ * mimeHeaderBlock_ has been updated and buf_ consumed.
+ *
+ * \retval false An error occurred, or no mime terminator found within limit.
+ */
+ bool grabMimeBlock(const char *which, const size_t limit);
/// RFC 7230 section 2.6 - 7 magic octets
static const SBuf Http1magic;
SBuf buf_;
/// what stage the parser is currently up to
- ParseState parsingStage_;
+ ParseState parsingStage_ = HTTP_PARSE_NONE;
/// what protocol label has been found in the first line (if any)
AnyP::ProtocolVersion msgProtocol_;
/// buffer holding the mime headers (if any)
SBuf mimeHeaderBlock_;
+
+ /// Whether the invalid HTTP as HTTP/0.9 hack expects a mime header block
+ bool hackExpectsMime_ = false;
+
+private:
+ void cleanMimePrefix();
+ void unfoldMime();
};
+/// skips and, if needed, warns about RFC 7230 BWS ("bad" whitespace)
+/// \throws InsufficientInput when the end of BWS cannot be confirmed
+void ParseBws(Parser::Tokenizer &);
+
+/// the right debugs() level for logging HTTP violation messages
+int ErrorLevel();
+
} // namespace One
} // namespace Http