RequestParser.cc \
RequestParser.h \
ResponseParser.cc \
- ResponseParser.h
+ ResponseParser.h \
+ Tokenizer.cc \
+ Tokenizer.h
#include "squid.h"
#include "Debug.h"
#include "http/one/Parser.h"
+#include "http/one/Tokenizer.h"
#include "mime_header.h"
-#include "parser/Tokenizer.h"
#include "SquidConfig.h"
/// RFC 7230 section 2.6 - 7 magic octets
}
bool
-Http::One::Parser::skipLineTerminator(::Parser::Tokenizer &tok) const
+Http::One::Parser::skipLineTerminator(Http1::Tokenizer &tok) const
{
static const SBuf crlf("\r\n");
if (tok.skip(crlf))
// while we can find more LF in the SBuf
static CharacterSet iso8859Line = CharacterSet("non-LF",'\0','\n'-1) + CharacterSet(NULL, '\n'+1, (unsigned char)0xFF);
- ::Parser::Tokenizer tok(mimeHeaderBlock_);
+ Http1::Tokenizer tok(mimeHeaderBlock_);
SBuf p;
static const SBuf crlf("\r\n");
p.consume(namelen + 1);
// TODO: optimize SBuf::trim to take CharacterSet directly
- ::Parser::Tokenizer t(p);
+ Http1::Tokenizer t(p);
t.skipAll(CharacterSet::WSP);
p = t.remaining();
#include "http/StatusCode.h"
#include "SBuf.h"
-namespace Parser {
-class Tokenizer;
-}
-
namespace Http {
namespace One {
protected:
/// detect and skip the CRLF or (if tolerant) LF line terminator
/// consume from the tokenizer and return true only if found
- bool skipLineTerminator(::Parser::Tokenizer &tok) const;
+ bool skipLineTerminator(Http1::Tokenizer &tok) const;
/**
* Scan to find the mime headers block for current message.
#include "squid.h"
#include "Debug.h"
#include "http/one/RequestParser.h"
+#include "http/one/Tokenizer.h"
#include "http/ProtocolVersion.h"
-#include "parser/Tokenizer.h"
#include "profiler/Profiler.h"
#include "SquidConfig.h"
* \retval 0 more data is needed to complete the parse
*/
int
-Http::One::RequestParser::parseMethodField(::Parser::Tokenizer &tok, const CharacterSet &WspDelim)
+Http::One::RequestParser::parseMethodField(Http1::Tokenizer &tok, const CharacterSet &WspDelim)
{
// scan for up to 16 valid method characters.
static const size_t maxMethodLength = 16; // TODO: make this configurable?
}
int
-Http::One::RequestParser::parseUriField(::Parser::Tokenizer &tok)
+Http::One::RequestParser::parseUriField(Http1::Tokenizer &tok)
{
// URI field is a sequence of ... what? segments all have different valid charset
// go with non-whitespace non-binary characters for now
}
int
-Http::One::RequestParser::parseHttpVersionField(::Parser::Tokenizer &tok)
+Http::One::RequestParser::parseHttpVersionField(Http1::Tokenizer &tok)
{
// partial match of HTTP/1 magic prefix
if (tok.remaining().length() < Http1magic.length() && Http1magic.startsWith(tok.remaining())) {
int
Http::One::RequestParser::parseRequestFirstLine()
{
- ::Parser::Tokenizer tok(buf_);
+ Http1::Tokenizer tok(buf_);
debugs(74, 5, "parsing possible request: buf.length=" << buf_.length());
debugs(74, DBG_DATA, buf_);
// seek the LF character, then tokenize the line in reverse
SBuf line;
if (tok.prefix(line, LfDelim) && tok.skip('\n')) {
- ::Parser::Tokenizer rTok(line);
+ Http1::Tokenizer rTok(line);
SBuf nil;
(void)rTok.suffix(nil,CharacterSet::CR); // optional CR in terminator
SBuf digit;
private:
void skipGarbageLines();
int parseRequestFirstLine();
- int parseMethodField(::Parser::Tokenizer &, const CharacterSet &);
- int parseUriField(::Parser::Tokenizer &);
- int parseHttpVersionField(::Parser::Tokenizer &);
+ int parseMethodField(Http1::Tokenizer &, const CharacterSet &);
+ int parseUriField(Http1::Tokenizer &);
+ int parseHttpVersionField(Http1::Tokenizer &);
/// what request method has been found on the first line
HttpRequestMethod method_;
#include "squid.h"
#include "Debug.h"
#include "http/one/ResponseParser.h"
+#include "http/one/Tokenizer.h"
#include "http/ProtocolVersion.h"
-#include "parser/Tokenizer.h"
#include "profiler/Profiler.h"
#include "SquidConfig.h"
// NP: we found the protocol version and consumed it already.
// just need the status code and reason phrase
int
-Http::One::ResponseParser::parseResponseStatusAndReason(::Parser::Tokenizer &tok, const CharacterSet &WspDelim)
+Http::One::ResponseParser::parseResponseStatusAndReason(Http1::Tokenizer &tok, const CharacterSet &WspDelim)
{
if (!completedStatus_) {
debugs(74, 9, "seek status-code in: " << tok.remaining().substr(0,10) << "...");
int
Http::One::ResponseParser::parseResponseFirstLine()
{
- ::Parser::Tokenizer tok(buf_);
+ Http1::Tokenizer tok(buf_);
CharacterSet WspDelim = CharacterSet::SP; // strict parse only accepts SP
private:
int parseResponseFirstLine();
- int parseResponseStatusAndReason(::Parser::Tokenizer&, const CharacterSet &);
+ int parseResponseStatusAndReason(Http1::Tokenizer&, const CharacterSet &);
/// magic prefix for identifying ICY response messages
static const SBuf IcyMagic;
--- /dev/null
+/*
+ * Copyright (C) 1996-2015 The Squid Software Foundation and contributors
+ *
+ * Squid software is distributed under GPLv2+ license and includes
+ * contributions from numerous individuals and organizations.
+ * Please see the COPYING and CONTRIBUTORS files for details.
+ */
+
+#include "squid.h"
+#include "Debug.h"
+#include "http/one/Tokenizer.h"
+
+bool
+Http::One::Tokenizer::quotedString(SBuf &returnedToken, const bool http1p0)
+{
+ checkpoint();
+
+ if (!skip('"'))
+ return false;
+
+ return qdText(returnedToken, http1p0);
+}
+
+bool
+Http::One::Tokenizer::quotedStringOrToken(SBuf &returnedToken, const bool http1p0)
+{
+ checkpoint();
+
+ if (!skip('"'))
+ return prefix(returnedToken, CharacterSet::TCHAR);
+
+ return qdText(returnedToken, http1p0);
+}
+
+bool
+Http::One::Tokenizer::qdText(SBuf &returnedToken, const bool http1p0)
+{
+ // the initial DQUOTE has been skipped by the caller
+
+ /*
+ * RFC 1945 - defines qdtext:
+ * inclusive of LWS (which includes CR and LF)
+ * exclusive of 0x80-0xFF
+ * includes 0x5E ('\') as just a regular character
+ */
+ static const CharacterSet qdtext1p0 = CharacterSet("qdtext (HTTP/1.0)", 0x23, 0x7E) +
+ CharacterSet("", "!") +
+ CharacterSet::CR + CharacterSet::LF + CharacterSet::HTAB + CharacterSet::SP;
+ /*
+ * RFC 7230 - defines qdtext:
+ * exclusive of CR and LF
+ * inclusive of 0x80-0xFF
+ * includes 0x5E ('\') but only when part of quoted-pair
+ */
+ static const CharacterSet qdtext1p1 = CharacterSet("qdtext (HTTP/1.1)", 0x23, 0x5B) +
+ CharacterSet("", "!") +
+ CharacterSet("", 0x5D, 0x7E) +
+ CharacterSet::HTAB + CharacterSet::SP +
+ CharacterSet::OBSTEXT;
+
+ // best we can do is a conditional reference since http1p0 value may change per-client
+ const CharacterSet &tokenChars = (http1p0 ? qdtext1p0 : qdtext1p1);
+
+ for (;;) {
+ SBuf::size_type prefixLen = buf().findFirstNotOf(tokenChars);
+ returnedToken.append(consume(prefixLen));
+
+ // HTTP/1.1 allows quoted-pair, HTTP/1.0 does not
+ if (!http1p0 && skip('\\')) {
+ /* RFC 7230 section 3.2.6
+ *
+ * The backslash octet ("\") can be used as a single-octet quoting
+ * mechanism within quoted-string and comment constructs. Recipients
+ * that process the value of a quoted-string MUST handle a quoted-pair
+ * as if it were replaced by the octet following the backslash.
+ *
+ * quoted-pair = "\" ( HTAB / SP / VCHAR / obs-text )
+ */
+ static const CharacterSet qPairChars = CharacterSet::HTAB + CharacterSet::SP + CharacterSet::VCHAR + CharacterSet::OBSTEXT;
+ SBuf escaped;
+ if (!prefix(escaped, qPairChars, 1)) {
+ returnedToken.clear();
+ restoreLastCheckpoint();
+ return false;
+ }
+ returnedToken.append(escaped);
+ continue;
+
+ } else if (skip('"')) {
+ break; // done
+
+ } else if (atEnd()) {
+ // need more data
+ returnedToken.clear();
+ restoreLastCheckpoint();
+ return false;
+ }
+
+ // else, we have an error
+ debugs(24, 8, "invalid bytes for set " << tokenChars.name);
+ returnedToken.clear();
+ restoreLastCheckpoint();
+ return false;
+ }
+
+ // found the whole string
+ return true;
+}
+
--- /dev/null
+/*
+ * Copyright (C) 1996-2015 The Squid Software Foundation and contributors
+ *
+ * Squid software is distributed under GPLv2+ license and includes
+ * contributions from numerous individuals and organizations.
+ * Please see the COPYING and CONTRIBUTORS files for details.
+ */
+
+#ifndef SQUID_SRC_HTTP_ONE_TOKENIZER_H
+#define SQUID_SRC_HTTP_ONE_TOKENIZER_H
+
+#include "parser/Tokenizer.h"
+
+namespace Http {
+namespace One {
+
+/**
+ * Lexical processor extended to tokenize HTTP/1.x syntax.
+ *
+ * \see ::Parser::Tokenizer for more detail
+ */
+class Tokenizer : public ::Parser::Tokenizer
+{
+public:
+ Tokenizer(SBuf &s) : ::Parser::Tokenizer(s) {}
+
+ /**
+ * Attempt to parse a quoted-string lexical construct.
+ *
+ * Governed by:
+ * - RFC 1945 section 2.1
+ * "
+ * A string of text is parsed as a single word if it is quoted using
+ * double-quote marks.
+ *
+ * quoted-string = ( <"> *(qdtext) <"> )
+ *
+ * qdtext = <any CHAR except <"> and CTLs,
+ * but including LWS>
+ *
+ * Single-character quoting using the backslash ("\") character is not
+ * permitted in HTTP/1.0.
+ * "
+ *
+ * - RFC 7230 section 3.2.6
+ * "
+ * A string of text is parsed as a single value if it is quoted using
+ * double-quote marks.
+ *
+ * quoted-string = DQUOTE *( qdtext / quoted-pair ) DQUOTE
+ * qdtext = HTAB / SP /%x21 / %x23-5B / %x5D-7E / obs-text
+ * obs-text = %x80-FF
+ * "
+ *
+ * \param escaped HTTP/1.0 does not permit \-escaped characters
+ */
+ bool quotedString(SBuf &value, const bool http1p0 = false);
+
+ /**
+ * Attempt to parse a (token / quoted-string ) lexical construct.
+ */
+ bool quotedStringOrToken(SBuf &value, const bool http1p0 = false);
+
+private:
+ /// parse the internal component of a quote-string, and terminal DQUOTE
+ bool qdText(SBuf &value, const bool http1p0);
+
+ void checkpoint() { savedCheckpoint_ = buf(); savedStats_ = parsedSize(); }
+ void restoreLastCheckpoint() { undoParse(savedCheckpoint_, savedStats_); }
+
+ SBuf savedCheckpoint_;
+ SBuf::size_type savedStats_;
+};
+
+} // namespace One
+} // namespace Http
+
+#endif /* SQUID_SRC_HTTP_ONE_TOKENIZER_H */
+
namespace Http {
namespace One {
+class Tokenizer;
+
class Parser;
typedef RefCount<Http::One::Parser> ParserPointer;
const SBuf& remaining() const { return buf_; }
/// reinitialize processing for a new buffer
- void reset(const SBuf &newBuf) { buf_ = newBuf; parsed_ = 0; }
+ void reset(const SBuf &newBuf) { undoParse(newBuf, 0); }
/** Basic strtok(3):
* Skips all leading delimiters (if any),
SBuf consume(const SBuf::size_type n);
SBuf::size_type success(const SBuf::size_type n);
+ /// reset the buffer and parsed stats to a saved checkpoint
+ void undoParse(const SBuf &newBuf, SBuf::size_type cParsed) { buf_ = newBuf; parsed_ = cParsed; }
+
private:
SBuf buf_; ///< yet unparsed input
SBuf::size_type parsed_; ///< bytes successfully parsed, including skipped