#include "comm/Connection.h"
#include "err_detail_type.h"
#include "http/ContentLengthInterpreter.h"
-#include "http/one/TeChunkedParser.h"
#include "HttpHeaderTools.h"
#include "HttpReply.h"
#include "MasterXaction.h"
+#include "parser/Tokenizer.h"
+#include "sbuf/Stream.h"
#include "SquidTime.h"
// flow and terminology:
static const size_t TheBackupLimit = BodyPipe::MaxCapacity;
+const SBuf Adaptation::Icap::ChunkExtensionValueParser::UseOriginalBodyName("use-original-body");
+
Adaptation::Icap::ModXact::State::State()
{
memset(this, 0, sizeof(*this));
state.parsing = State::psBody;
replyHttpBodySize = 0;
bodyParser = new Http1::TeChunkedParser;
+ bodyParser->parseExtensionValuesWith(&extensionParser);
makeAdaptedBodyPipe("adapted response from the ICAP server");
Must(state.sending == State::sendingAdapted);
} else {
}
if (parsed) {
- if (state.readyForUob && bodyParser->useOriginBody >= 0)
- prepPartialBodyEchoing(static_cast<uint64_t>(bodyParser->useOriginBody));
+ if (state.readyForUob && extensionParser.sawUseOriginalBody())
+ prepPartialBodyEchoing(extensionParser.useOriginalBody());
else
stopSending(true); // the parser succeeds only if all parsed data fits
if (trailerParser)
return parsed > 0;
}
+void
+Adaptation::Icap::ChunkExtensionValueParser::parse(Tokenizer &tok, const SBuf &extName)
+{
+ if (extName == UseOriginalBodyName) {
+ useOriginalBody_ = tok.udec64("use-original-body");
+ assert(useOriginalBody_ >= 0);
+ } else {
+ Ignore(tok, extName);
+ }
+}
+
#include "adaptation/icap/Xaction.h"
#include "BodyPipe.h"
#include "http/one/forward.h"
+#include "http/one/TeChunkedParser.h"
/*
* ICAPModXact implements ICAP REQMOD and RESPMOD transaction using
size_t hdr_sz; // pedantic XXX: wrong type dictated by HttpHeader::parse() API
};
+/// handles ICAP-specific chunk extensions supported by Squid
+class ChunkExtensionValueParser: public Http1::ChunkExtensionValueParser
+{
+public:
+ /* Http1::ChunkExtensionValueParser API */
+ virtual void parse(Tokenizer &tok, const SBuf &extName) override;
+
+ bool sawUseOriginalBody() const { return useOriginalBody_ >= 0; }
+ uint64_t useOriginalBody() const { assert(sawUseOriginalBody()); return static_cast<uint64_t>(useOriginalBody_); }
+
+private:
+ static const SBuf UseOriginalBodyName;
+
+ /// the value of the parsed use-original-body chunk extension (or -1)
+ int64_t useOriginalBody_ = -1;
+};
+
class ModXact: public Xaction, public BodyProducer, public BodyConsumer
{
CBDATA_CLASS(ModXact);
TrailerParser *trailerParser;
+ ChunkExtensionValueParser extensionParser;
+
class State
{
*/
#include "squid.h"
+#include "base/CharacterSet.h"
#include "Debug.h"
#include "http/one/Parser.h"
-#include "http/one/Tokenizer.h"
#include "mime_header.h"
+#include "parser/Tokenizer.h"
#include "SquidConfig.h"
/// RFC 7230 section 2.6 - 7 magic octets
RelaxedDelimiterCharacters() : CharacterSet::SP;
}
-bool
-Http::One::Parser::skipLineTerminator(Http1::Tokenizer &tok) const
+void
+Http::One::Parser::skipLineTerminator(Tokenizer &tok) const
{
if (tok.skip(Http1::CrLf()))
- return true;
+ return;
if (Config.onoff.relaxed_header_parser && tok.skipOne(CharacterSet::LF))
- return true;
+ return;
if (tok.atEnd() || (tok.remaining().length() == 1 && tok.remaining().at(0) == '\r'))
- return false; // need more data
+ throw InsufficientInput();
throw TexcHere("garbage instead of CRLF line terminator");
- return false; // unreachable, but make naive compilers happy
}
/// all characters except the LF line terminator
void
Http::One::Parser::cleanMimePrefix()
{
- Http1::Tokenizer tok(mimeHeaderBlock_);
+ Tokenizer tok(mimeHeaderBlock_);
while (tok.skipOne(RelaxedDelimiterCharacters())) {
(void)tok.skipAll(LineCharacters()); // optional line content
// LF terminator is required.
void
Http::One::Parser::unfoldMime()
{
- Http1::Tokenizer tok(mimeHeaderBlock_);
+ Tokenizer tok(mimeHeaderBlock_);
const auto szLimit = mimeHeaderBlock_.length();
mimeHeaderBlock_.clear();
// prevent the mime sender being able to make append() realloc/grow multiple times.
debugs(25, 5, "looking for " << name);
// while we can find more LF in the SBuf
- Http1::Tokenizer tok(mimeHeaderBlock_);
+ Tokenizer tok(mimeHeaderBlock_);
SBuf p;
while (tok.prefix(p, LineCharacters())) {
p.consume(namelen + 1);
// TODO: optimize SBuf::trim to take CharacterSet directly
- Http1::Tokenizer t(p);
+ Tokenizer t(p);
t.skipAll(CharacterSet::WSP);
p = t.remaining();
}
// BWS = *( SP / HTAB ) ; WhitespaceCharacters() may relax this RFC 7230 rule
-bool
-Http::One::ParseBws(Tokenizer &tok)
+void
+Http::One::ParseBws(Parser::Tokenizer &tok)
{
- if (const auto count = tok.skipAll(Parser::WhitespaceCharacters())) {
+ const auto count = tok.skipAll(Parser::WhitespaceCharacters());
+
+ if (tok.atEnd())
+ throw InsufficientInput(); // even if count is positive
+
+ if (count) {
// Generating BWS is a MUST-level violation so warn about it as needed.
debugs(33, ErrorLevel(), "found " << count << " BWS octets");
// RFC 7230 says we MUST parse BWS, so we fall through even if
}
// else we successfully "parsed" an empty BWS sequence
- return true;
+ // success: no more BWS characters expected
}
#include "anyp/ProtocolVersion.h"
#include "http/one/forward.h"
#include "http/StatusCode.h"
+#include "parser/forward.h"
#include "sbuf/SBuf.h"
namespace Http {
{
public:
typedef SBuf::size_type size_type;
+ typedef ::Parser::Tokenizer Tokenizer;
Parser() = default;
Parser(const Parser &) = default;
* detect and skip the CRLF or (if tolerant) LF line terminator
* consume from the tokenizer.
*
- * throws if non-terminator is detected.
+ * \throws exception on bad or InsuffientInput.
* \retval true only if line terminator found.
* \retval false incomplete or missing line terminator, need more data.
*/
- bool skipLineTerminator(Http1::Tokenizer &tok) const;
+ void skipLineTerminator(Tokenizer &) const;
/**
* Scan to find the mime headers block for current message.
};
/// skips and, if needed, warns about RFC 7230 BWS ("bad" whitespace)
-/// \returns true (always; unlike all the skip*() functions)
-bool ParseBws(Tokenizer &tok);
+/// \throws InsufficientInput when the end of BWS cannot be confirmed
+void ParseBws(Parser::Tokenizer &);
/// the right debugs() level for logging HTTP violation messages
int ErrorLevel();
#include "squid.h"
#include "Debug.h"
#include "http/one/RequestParser.h"
-#include "http/one/Tokenizer.h"
#include "http/ProtocolVersion.h"
+#include "parser/Tokenizer.h"
#include "profiler/Profiler.h"
#include "SquidConfig.h"
* RFC 7230 section 2.6, 3.1 and 3.5
*/
bool
-Http::One::RequestParser::parseMethodField(Http1::Tokenizer &tok)
+Http::One::RequestParser::parseMethodField(Tokenizer &tok)
{
// method field is a sequence of TCHAR.
// Limit to 32 characters to prevent overly long sequences of non-HTTP
}
bool
-Http::One::RequestParser::parseUriField(Http1::Tokenizer &tok)
+Http::One::RequestParser::parseUriField(Tokenizer &tok)
{
/* Arbitrary 64KB URI upper length limit.
*
}
bool
-Http::One::RequestParser::parseHttpVersionField(Http1::Tokenizer &tok)
+Http::One::RequestParser::parseHttpVersionField(Tokenizer &tok)
{
static const SBuf http1p0("HTTP/1.0");
static const SBuf http1p1("HTTP/1.1");
/// Parse CRs at the end of request-line, just before the terminating LF.
bool
-Http::One::RequestParser::skipTrailingCrs(Http1::Tokenizer &tok)
+Http::One::RequestParser::skipTrailingCrs(Tokenizer &tok)
{
if (Config.onoff.relaxed_header_parser) {
(void)tok.skipAllTrailing(CharacterSet::CR); // optional; multiple OK
// Earlier, skipGarbageLines() took care of any leading LFs (if allowed).
// Now, the request line has to end at the first LF.
static const CharacterSet lineChars = CharacterSet::LF.complement("notLF");
- ::Parser::Tokenizer lineTok(buf_);
+ Tokenizer lineTok(buf_);
if (!lineTok.prefix(line, lineChars) || !lineTok.skip('\n')) {
if (buf_.length() >= Config.maxRequestHeaderSize) {
/* who should we blame for our failure to parse this line? */
- Http1::Tokenizer methodTok(buf_);
+ Tokenizer methodTok(buf_);
if (!parseMethodField(methodTok))
return -1; // blame a bad method (or its delimiter)
return 0;
}
- Http1::Tokenizer tok(line);
+ Tokenizer tok(line);
if (!parseMethodField(tok))
return -1;
bool doParse(const SBuf &aBuf);
/* all these return false and set parseStatusCode on parsing failures */
- bool parseMethodField(Http1::Tokenizer &);
- bool parseUriField(Http1::Tokenizer &);
- bool parseHttpVersionField(Http1::Tokenizer &);
+ bool parseMethodField(Tokenizer &);
+ bool parseUriField(Tokenizer &);
+ bool parseHttpVersionField(Tokenizer &);
bool skipDelimiter(const size_t count, const char *where);
- bool skipTrailingCrs(Http1::Tokenizer &tok);
+ bool skipTrailingCrs(Tokenizer &tok);
bool http0() const {return !msgProtocol_.major;}
static const CharacterSet &RequestTargetCharacters();
#include "squid.h"
#include "Debug.h"
#include "http/one/ResponseParser.h"
-#include "http/one/Tokenizer.h"
#include "http/ProtocolVersion.h"
+#include "parser/Tokenizer.h"
#include "profiler/Profiler.h"
#include "SquidConfig.h"
// NP: we found the protocol version and consumed it already.
// just need the status code and reason phrase
int
-Http::One::ResponseParser::parseResponseStatusAndReason(Http1::Tokenizer &tok, const CharacterSet &WspDelim)
+Http::One::ResponseParser::parseResponseStatusAndReason(Tokenizer &tok, const CharacterSet &WspDelim)
{
if (!completedStatus_) {
debugs(74, 9, "seek status-code in: " << tok.remaining().substr(0,10) << "...");
static const CharacterSet phraseChars = CharacterSet::WSP + CharacterSet::VCHAR + CharacterSet::OBSTEXT;
(void)tok.prefix(reasonPhrase_, phraseChars); // optional, no error if missing
try {
- if (skipLineTerminator(tok)) {
- debugs(74, DBG_DATA, "parse remaining buf={length=" << tok.remaining().length() << ", data='" << tok.remaining() << "'}");
- buf_ = tok.remaining(); // resume checkpoint
- return 1;
- }
+ skipLineTerminator(tok);
+ buf_ = tok.remaining(); // resume checkpoint
+ debugs(74, DBG_DATA, Raw("leftovers", buf_.rawContent(), buf_.length()));
+ return 1;
+ } catch (const InsufficientInput &) {
reasonPhrase_.clear();
return 0; // need more to be sure we have it all
-
} catch (const std::exception &ex) {
debugs(74, 6, "invalid status-line: " << ex.what());
}
int
Http::One::ResponseParser::parseResponseFirstLine()
{
- Http1::Tokenizer tok(buf_);
+ Tokenizer tok(buf_);
const CharacterSet &WspDelim = DelimiterCharacters();
private:
int parseResponseFirstLine();
- int parseResponseStatusAndReason(Http1::Tokenizer&, const CharacterSet &);
+ int parseResponseStatusAndReason(Tokenizer&, const CharacterSet &);
/// magic prefix for identifying ICY response messages
static const SBuf IcyMagic;
#include "http/one/Tokenizer.h"
#include "http/ProtocolVersion.h"
#include "MemBuf.h"
+#include "parser/Tokenizer.h"
#include "Parsing.h"
+#include "sbuf/Stream.h"
#include "SquidConfig.h"
-Http::One::TeChunkedParser::TeChunkedParser()
+Http::One::TeChunkedParser::TeChunkedParser():
+ customExtensionValueParser(nullptr)
{
// chunked encoding only exists in HTTP/1.1
Http1::Parser::msgProtocol_ = Http::ProtocolVersion(1,1);
buf_.clear();
theChunkSize = theLeftBodySize = 0;
theOut = NULL;
- useOriginBody = -1;
+ // XXX: We do not reset customExtensionValueParser here. Based on the
+ // clear() API description, we must, but it makes little sense and could
+ // break method callers if they appear because some of them may forget to
+ // reset customExtensionValueParser. TODO: Remove Http1::Parser as our
+ // parent class and this unnecessary method with it.
}
bool
if (parsingStage_ == Http1::HTTP_PARSE_NONE)
parsingStage_ = Http1::HTTP_PARSE_CHUNK_SZ;
- Http1::Tokenizer tok(buf_);
+ Tokenizer tok(buf_);
// loop for as many chunks as we can
// use do-while instead of while so that we can incrementally
// restart in the middle of a chunk/frame
do {
- if (parsingStage_ == Http1::HTTP_PARSE_CHUNK_EXT && !parseChunkExtension(tok, theChunkSize))
+ if (parsingStage_ == Http1::HTTP_PARSE_CHUNK_EXT && !parseChunkMetadataSuffix(tok))
return false;
if (parsingStage_ == Http1::HTTP_PARSE_CHUNK && !parseChunkBody(tok))
/// RFC 7230 section 4.1 chunk-size
bool
-Http::One::TeChunkedParser::parseChunkSize(Http1::Tokenizer &tok)
+Http::One::TeChunkedParser::parseChunkSize(Tokenizer &tok)
{
Must(theChunkSize <= 0); // Should(), really
return false; // should not be reachable
}
-/**
- * Parses chunk metadata suffix, looking for interesting extensions and/or
- * getting to the line terminator. RFC 7230 section 4.1.1 and its Errata #4667:
- *
- * chunk-ext = *( BWS ";" BWS chunk-ext-name [ BWS "=" BWS chunk-ext-val ] )
- * chunk-ext-name = token
- * chunk-ext-val = token / quoted-string
- *
- * ICAP 'use-original-body=N' extension is supported.
- */
+/// Parses "[chunk-ext] CRLF" from RFC 7230 section 4.1.1:
+/// chunk = chunk-size [ chunk-ext ] CRLF chunk-data CRLF
+/// last-chunk = 1*"0" [ chunk-ext ] CRLF
bool
-Http::One::TeChunkedParser::parseChunkExtension(Http1::Tokenizer &tok, bool skipKnown)
+Http::One::TeChunkedParser::parseChunkMetadataSuffix(Tokenizer &tok)
{
- SBuf ext;
- SBuf value;
- while (
- ParseBws(tok) && // Bug 4492: IBM_HTTP_Server sends SP after chunk-size
- tok.skip(';') &&
- ParseBws(tok) && // Bug 4492: ICAP servers send SP before chunk-ext-name
- tok.prefix(ext, CharacterSet::TCHAR)) { // chunk-ext-name
-
- // whole value part is optional. if no '=' expect next chunk-ext
- if (ParseBws(tok) && tok.skip('=') && ParseBws(tok)) {
-
- if (!skipKnown) {
- if (ext.cmp("use-original-body",17) == 0 && tok.int64(useOriginBody, 10)) {
- debugs(94, 3, "Found chunk extension " << ext << "=" << useOriginBody);
- buf_ = tok.remaining(); // parse checkpoint
- continue;
- }
- }
-
- debugs(94, 5, "skipping unknown chunk extension " << ext);
-
- // unknown might have a value token or quoted-string
- if (tok.quotedStringOrToken(value) && !tok.atEnd()) {
- buf_ = tok.remaining(); // parse checkpoint
- continue;
- }
-
- // otherwise need more data OR corrupt syntax
- break;
- }
-
- if (!tok.atEnd())
- buf_ = tok.remaining(); // parse checkpoint (unless there might be more token name)
- }
-
- if (skipLineTerminator(tok)) {
- buf_ = tok.remaining(); // checkpoint
- // non-0 chunk means data, 0-size means optional Trailer follows
+ // Code becomes much simpler when incremental parsing functions throw on
+ // bad or insufficient input, like in the code below. TODO: Expand up.
+ try {
+ parseChunkExtensions(tok); // a possibly empty chunk-ext list
+ skipLineTerminator(tok);
+ buf_ = tok.remaining();
parsingStage_ = theChunkSize ? Http1::HTTP_PARSE_CHUNK : Http1::HTTP_PARSE_MIME;
return true;
+ } catch (const InsufficientInput &) {
+ tok.reset(buf_); // backtrack to the last commit point
+ return false;
}
+ // other exceptions bubble up to kill message parsing
+}
- return false;
+/// Parses the chunk-ext list (RFC 7230 section 4.1.1 and its Errata #4667):
+/// chunk-ext = *( BWS ";" BWS chunk-ext-name [ BWS "=" BWS chunk-ext-val ] )
+void
+Http::One::TeChunkedParser::parseChunkExtensions(Tokenizer &tok)
+{
+ do {
+ ParseBws(tok); // Bug 4492: IBM_HTTP_Server sends SP after chunk-size
+
+ if (!tok.skip(';'))
+ return; // reached the end of extensions (if any)
+
+ parseOneChunkExtension(tok);
+ buf_ = tok.remaining(); // got one extension
+ } while (true);
+}
+
+void
+Http::One::ChunkExtensionValueParser::Ignore(Tokenizer &tok, const SBuf &extName)
+{
+ const auto ignoredValue = tokenOrQuotedString(tok);
+ debugs(94, 5, extName << " with value " << ignoredValue);
+}
+
+/// Parses a single chunk-ext list element:
+/// chunk-ext = *( BWS ";" BWS chunk-ext-name [ BWS "=" BWS chunk-ext-val ] )
+void
+Http::One::TeChunkedParser::parseOneChunkExtension(Tokenizer &tok)
+{
+ ParseBws(tok); // Bug 4492: ICAP servers send SP before chunk-ext-name
+
+ const auto extName = tok.prefix("chunk-ext-name", CharacterSet::TCHAR);
+
+ ParseBws(tok);
+
+ if (!tok.skip('='))
+ return; // parsed a valueless chunk-ext
+
+ ParseBws(tok);
+
+ // optimization: the only currently supported extension needs last-chunk
+ if (!theChunkSize && customExtensionValueParser)
+ customExtensionValueParser->parse(tok, extName);
+ else
+ ChunkExtensionValueParser::Ignore(tok, extName);
}
bool
-Http::One::TeChunkedParser::parseChunkBody(Http1::Tokenizer &tok)
+Http::One::TeChunkedParser::parseChunkBody(Tokenizer &tok)
{
if (theLeftBodySize > 0) {
buf_ = tok.remaining(); // sync buffers before buf_ use
}
bool
-Http::One::TeChunkedParser::parseChunkEnd(Http1::Tokenizer &tok)
+Http::One::TeChunkedParser::parseChunkEnd(Tokenizer &tok)
{
Must(theLeftBodySize == 0); // Should(), really
- if (skipLineTerminator(tok)) {
+ try {
+ skipLineTerminator(tok);
buf_ = tok.remaining(); // parse checkpoint
theChunkSize = 0; // done with the current chunk
parsingStage_ = Http1::HTTP_PARSE_CHUNK_SZ;
return true;
}
-
- return false;
+ catch (const InsufficientInput &) {
+ return false;
+ }
+ // other exceptions bubble up to kill message parsing
}
namespace One
{
+using ::Parser::InsufficientInput;
+
+// TODO: Move this class into http/one/ChunkExtensionValueParser.*
+/// A customizable parser of a single chunk extension value (chunk-ext-val).
+/// From RFC 7230 section 4.1.1 and its Errata #4667:
+/// chunk-ext = *( BWS ";" BWS chunk-ext-name [ BWS "=" BWS chunk-ext-val ] )
+/// chunk-ext-name = token
+/// chunk-ext-val = token / quoted-string
+class ChunkExtensionValueParser
+{
+public:
+ typedef ::Parser::Tokenizer Tokenizer;
+
+ /// extracts and ignores the value of a named extension
+ static void Ignore(Tokenizer &tok, const SBuf &extName);
+
+ /// extracts and then interprets (or ignores) the extension value
+ virtual void parse(Tokenizer &tok, const SBuf &extName) = 0;
+};
+
/**
* An incremental parser for chunked transfer coding
* defined in RFC 7230 section 4.1.
*
* The parser shovels content bytes from the raw
* input buffer into the content output buffer, both caller-supplied.
- * Ignores chunk extensions except for ICAP's ieof.
+ * Chunk extensions like use-original-body are handled via parseExtensionValuesWith().
* Trailers are available via mimeHeader() if wanted.
*/
class TeChunkedParser : public Http1::Parser
/// set the buffer to be used to store decoded chunk data
void setPayloadBuffer(MemBuf *parsedContent) {theOut = parsedContent;}
+ /// Instead of ignoring all chunk extension values, give the supplied
+ /// parser a chance to handle them. Only applied to last-chunk (for now).
+ void parseExtensionValuesWith(ChunkExtensionValueParser *parser) { customExtensionValueParser = parser; }
+
bool needsMoreSpace() const;
/* Http1::Parser API */
virtual Parser::size_type firstLineSize() const {return 0;} // has no meaning with multiple chunks
private:
- bool parseChunkSize(Http1::Tokenizer &tok);
- bool parseChunkExtension(Http1::Tokenizer &tok, bool skipKnown);
- bool parseChunkBody(Http1::Tokenizer &tok);
- bool parseChunkEnd(Http1::Tokenizer &tok);
+ bool parseChunkSize(Tokenizer &tok);
+ bool parseChunkMetadataSuffix(Tokenizer &);
+ void parseChunkExtensions(Tokenizer &);
+ void parseOneChunkExtension(Tokenizer &);
+ bool parseChunkBody(Tokenizer &tok);
+ bool parseChunkEnd(Tokenizer &tok);
MemBuf *theOut;
uint64_t theChunkSize;
uint64_t theLeftBodySize;
-public:
- int64_t useOriginBody;
+ /// An optional plugin for parsing and interpreting custom chunk-ext-val.
+ /// This "visitor" object is owned by our creator.
+ ChunkExtensionValueParser *customExtensionValueParser;
};
} // namespace One
#include "squid.h"
#include "Debug.h"
+#include "http/one/Parser.h"
#include "http/one/Tokenizer.h"
-
-bool
-Http::One::Tokenizer::quotedString(SBuf &returnedToken, const bool http1p0)
-{
- checkpoint();
-
- if (!skip('"'))
- return false;
-
- return qdText(returnedToken, http1p0);
-}
-
-bool
-Http::One::Tokenizer::quotedStringOrToken(SBuf &returnedToken, const bool http1p0)
+#include "parser/Tokenizer.h"
+#include "sbuf/Stream.h"
+
+/// Extracts quoted-string after the caller removes the initial '"'.
+/// \param http1p0 whether to prohibit \-escaped characters in quoted strings
+/// \throws InsufficientInput when input can be a token _prefix_
+/// \returns extracted quoted string (without quotes and with chars unescaped)
+static SBuf
+parseQuotedStringSuffix(Parser::Tokenizer &tok, const bool http1p0)
{
- checkpoint();
-
- if (!skip('"'))
- return prefix(returnedToken, CharacterSet::TCHAR);
-
- return qdText(returnedToken, http1p0);
-}
-
-bool
-Http::One::Tokenizer::qdText(SBuf &returnedToken, const bool http1p0)
-{
- // the initial DQUOTE has been skipped by the caller
-
/*
* RFC 1945 - defines qdtext:
* inclusive of LWS (which includes CR and LF)
// best we can do is a conditional reference since http1p0 value may change per-client
const CharacterSet &tokenChars = (http1p0 ? qdtext1p0 : qdtext1p1);
- for (;;) {
- SBuf::size_type prefixLen = buf().findFirstNotOf(tokenChars);
- returnedToken.append(consume(prefixLen));
+ SBuf parsedToken;
+
+ while (!tok.atEnd()) {
+ SBuf qdText;
+ if (tok.prefix(qdText, tokenChars))
+ parsedToken.append(qdText);
+
+ if (!http1p0 && tok.skip('\\')) { // HTTP/1.1 allows quoted-pair, HTTP/1.0 does not
+ if (tok.atEnd())
+ break;
- // HTTP/1.1 allows quoted-pair, HTTP/1.0 does not
- if (!http1p0 && skip('\\')) {
/* RFC 7230 section 3.2.6
*
* The backslash octet ("\") can be used as a single-octet quoting
*/
static const CharacterSet qPairChars = CharacterSet::HTAB + CharacterSet::SP + CharacterSet::VCHAR + CharacterSet::OBSTEXT;
SBuf escaped;
- if (!prefix(escaped, qPairChars, 1)) {
- returnedToken.clear();
- restoreLastCheckpoint();
- return false;
- }
- returnedToken.append(escaped);
+ if (!tok.prefix(escaped, qPairChars, 1))
+ throw TexcHere("invalid escaped character in quoted-pair");
+
+ parsedToken.append(escaped);
continue;
+ }
- } else if (skip('"')) {
- break; // done
+ if (tok.skip('"'))
+ return parsedToken; // may be empty
- } else if (atEnd()) {
- // need more data
- returnedToken.clear();
- restoreLastCheckpoint();
- return false;
- }
+ if (tok.atEnd())
+ break;
- // else, we have an error
- debugs(24, 8, "invalid bytes for set " << tokenChars.name);
- returnedToken.clear();
- restoreLastCheckpoint();
- return false;
+ throw TexcHere(ToSBuf("invalid bytes for set ", tokenChars.name));
}
- // found the whole string
- return true;
+ throw Http::One::InsufficientInput();
+}
+
+SBuf
+Http::One::tokenOrQuotedString(Parser::Tokenizer &tok, const bool http1p0)
+{
+ if (tok.skip('"'))
+ return parseQuotedStringSuffix(tok, http1p0);
+
+ if (tok.atEnd())
+ throw InsufficientInput();
+
+ SBuf parsedToken;
+ if (!tok.prefix(parsedToken, CharacterSet::TCHAR))
+ throw TexcHere("invalid input while expecting an HTTP token");
+
+ if (tok.atEnd())
+ throw InsufficientInput();
+
+ // got the complete token
+ return parsedToken;
}
#ifndef SQUID_SRC_HTTP_ONE_TOKENIZER_H
#define SQUID_SRC_HTTP_ONE_TOKENIZER_H
-#include "parser/Tokenizer.h"
+#include "parser/forward.h"
+#include "sbuf/forward.h"
namespace Http {
namespace One {
/**
- * Lexical processor extended to tokenize HTTP/1.x syntax.
+ * Extracts either an HTTP/1 token or quoted-string while dealing with
+ * possibly incomplete input typical for incremental text parsers.
+ * Unescapes escaped characters in HTTP/1.1 quoted strings.
*
- * \see ::Parser::Tokenizer for more detail
+ * \param http1p0 whether to prohibit \-escaped characters in quoted strings
+ * \throws InsufficientInput as appropriate, including on unterminated tokens
+ * \returns extracted token or quoted string (without quotes)
+ *
+ * Governed by:
+ * - RFC 1945 section 2.1
+ * "
+ * A string of text is parsed as a single word if it is quoted using
+ * double-quote marks.
+ *
+ * quoted-string = ( <"> *(qdtext) <"> )
+ *
+ * qdtext = <any CHAR except <"> and CTLs,
+ * but including LWS>
+ *
+ * Single-character quoting using the backslash ("\") character is not
+ * permitted in HTTP/1.0.
+ * "
+ *
+ * - RFC 7230 section 3.2.6
+ * "
+ * A string of text is parsed as a single value if it is quoted using
+ * double-quote marks.
+ *
+ * quoted-string = DQUOTE *( qdtext / quoted-pair ) DQUOTE
+ * qdtext = HTAB / SP /%x21 / %x23-5B / %x5D-7E / obs-text
+ * obs-text = %x80-FF
+ * "
*/
-class Tokenizer : public ::Parser::Tokenizer
-{
-public:
- Tokenizer(SBuf &s) : ::Parser::Tokenizer(s), savedStats_(0) {}
-
- /**
- * Attempt to parse a quoted-string lexical construct.
- *
- * Governed by:
- * - RFC 1945 section 2.1
- * "
- * A string of text is parsed as a single word if it is quoted using
- * double-quote marks.
- *
- * quoted-string = ( <"> *(qdtext) <"> )
- *
- * qdtext = <any CHAR except <"> and CTLs,
- * but including LWS>
- *
- * Single-character quoting using the backslash ("\") character is not
- * permitted in HTTP/1.0.
- * "
- *
- * - RFC 7230 section 3.2.6
- * "
- * A string of text is parsed as a single value if it is quoted using
- * double-quote marks.
- *
- * quoted-string = DQUOTE *( qdtext / quoted-pair ) DQUOTE
- * qdtext = HTAB / SP /%x21 / %x23-5B / %x5D-7E / obs-text
- * obs-text = %x80-FF
- * "
- *
- * \param escaped HTTP/1.0 does not permit \-escaped characters
- */
- bool quotedString(SBuf &value, const bool http1p0 = false);
-
- /**
- * Attempt to parse a (token / quoted-string ) lexical construct.
- */
- bool quotedStringOrToken(SBuf &value, const bool http1p0 = false);
-
-private:
- /// parse the internal component of a quote-string, and terminal DQUOTE
- bool qdText(SBuf &value, const bool http1p0);
-
- void checkpoint() { savedCheckpoint_ = buf(); savedStats_ = parsedSize(); }
- void restoreLastCheckpoint() { undoParse(savedCheckpoint_, savedStats_); }
-
- SBuf savedCheckpoint_;
- SBuf::size_type savedStats_;
-};
+SBuf tokenOrQuotedString(Parser::Tokenizer &tok, const bool http1p0 = false);
} // namespace One
} // namespace Http
#define SQUID_SRC_HTTP_ONE_FORWARD_H
#include "base/RefCount.h"
+#include "parser/forward.h"
#include "sbuf/forward.h"
namespace Http {
/// CRLF textual representation
const SBuf &CrLf();
+using ::Parser::InsufficientInput;
+
} // namespace One
} // namespace Http
#define SQUID_SRC_PARSER_BINARYTOKENIZER_H
#include "ip/forward.h"
+#include "parser/forward.h"
#include "sbuf/SBuf.h"
namespace Parser
class BinaryTokenizer
{
public:
- class InsufficientInput {}; // thrown when a method runs out of data
+ typedef ::Parser::InsufficientInput InsufficientInput;
typedef uint64_t size_type; // enough for the largest supported offset
BinaryTokenizer();
libparser_la_SOURCES = \
BinaryTokenizer.h \
BinaryTokenizer.cc \
+ forward.h \
Tokenizer.h \
Tokenizer.cc
#include "squid.h"
#include "Debug.h"
+#include "parser/forward.h"
#include "parser/Tokenizer.h"
+#include "sbuf/Stream.h"
#include <cerrno>
#if HAVE_CTYPE_H
return true;
}
+SBuf
+Parser::Tokenizer::prefix(const char *description, const CharacterSet &tokenChars, const SBuf::size_type limit)
+{
+ if (atEnd())
+ throw InsufficientInput();
+
+ SBuf result;
+
+ if (!prefix(result, tokenChars, limit))
+ throw TexcHere(ToSBuf("cannot parse ", description));
+
+ if (atEnd())
+ throw InsufficientInput();
+
+ return result;
+}
+
bool
Parser::Tokenizer::suffix(SBuf &returnedToken, const CharacterSet &tokenChars, const SBuf::size_type limit)
{
return success(s - range.rawContent());
}
+int64_t
+Parser::Tokenizer::udec64(const char *description, const SBuf::size_type limit)
+{
+ if (atEnd())
+ throw InsufficientInput();
+
+ int64_t result = 0;
+
+ // Since we only support unsigned decimals, a parsing failure with a
+ // non-empty input always implies invalid/malformed input (or a buggy
+ // limit=0 caller). TODO: Support signed and non-decimal integers by
+ // refactoring int64() to detect insufficient input.
+ if (!int64(result, 10, false, limit))
+ throw TexcHere(ToSBuf("cannot parse ", description));
+
+ if (atEnd())
+ throw InsufficientInput(); // more digits may be coming
+
+ return result;
+}
+
*/
bool int64(int64_t &result, int base = 0, bool allowSign = true, SBuf::size_type limit = SBuf::npos);
+ /*
+ * The methods below mimic their counterparts documented above, but they
+ * throw on errors, including InsufficientInput. The field description
+ * parameter is used for error reporting and debugging.
+ */
+
+ /// prefix() wrapper but throws InsufficientInput if input contains
+ /// nothing but the prefix (i.e. if the prefix is not "terminated")
+ SBuf prefix(const char *description, const CharacterSet &tokenChars, SBuf::size_type limit = SBuf::npos);
+
+ /// int64() wrapper but limited to unsigned decimal integers (for now)
+ int64_t udec64(const char *description, SBuf::size_type limit = SBuf::npos);
+
protected:
SBuf consume(const SBuf::size_type n);
SBuf::size_type success(const SBuf::size_type n);
--- /dev/null
+/*
+ * Copyright (C) 1996-2019 The Squid Software Foundation and contributors
+ *
+ * Squid software is distributed under GPLv2+ license and includes
+ * contributions from numerous individuals and organizations.
+ * Please see the COPYING and CONTRIBUTORS files for details.
+ */
+
+#ifndef SQUID_PARSER_FORWARD_H
+#define SQUID_PARSER_FORWARD_H
+
+namespace Parser {
+class Tokenizer;
+class BinaryTokenizer;
+
+// TODO: Move this declaration (to parser/Elements.h) if we need more like it.
+/// thrown by modern "incremental" parsers when they need more data
+class InsufficientInput {};
+} // namespace Parser
+
+#endif /* SQUID_PARSER_FORWARD_H */
+