/*
- * Copyright (C) 1996-2015 The Squid Software Foundation and contributors
+ * Copyright (C) 1996-2023 The Squid Software Foundation and contributors
*
* Squid software is distributed under GPLv2+ license and includes
* contributions from numerous individuals and organizations.
*/
#include "squid.h"
-#include "Debug.h"
+#include "debug/Stream.h"
+#include "http/one/Parser.h"
#include "http/one/Tokenizer.h"
-
-bool
-Http::One::Tokenizer::quotedString(SBuf &returnedToken, const bool http1p0)
-{
- checkpoint();
-
- if (!skip('"'))
- return false;
-
- return qdText(returnedToken, http1p0);
-}
-
-bool
-Http::One::Tokenizer::quotedStringOrToken(SBuf &returnedToken, const bool http1p0)
+#include "parser/Tokenizer.h"
+#include "sbuf/Stream.h"
+
+/// Extracts quoted-string after the caller removes the initial '"'.
+/// \param http1p0 whether to prohibit \-escaped characters in quoted strings
+/// \throws InsufficientInput when input can be a token _prefix_
+/// \returns extracted quoted string (without quotes and with chars unescaped)
+static SBuf
+parseQuotedStringSuffix(Parser::Tokenizer &tok, const bool http1p0)
{
- checkpoint();
-
- if (!skip('"'))
- return prefix(returnedToken, CharacterSet::TCHAR);
-
- return qdText(returnedToken, http1p0);
-}
-
-bool
-Http::One::Tokenizer::qdText(SBuf &returnedToken, const bool http1p0)
-{
- // the initial DQUOTE has been skipped by the caller
-
/*
* RFC 1945 - defines qdtext:
* inclusive of LWS (which includes CR and LF)
* exclusive of 0x80-0xFF
- * includes 0x5E ('\') as just a regular character
+ * includes 0x5C ('\') as just a regular character
*/
static const CharacterSet qdtext1p0 = CharacterSet("qdtext (HTTP/1.0)", 0x23, 0x7E) +
CharacterSet("", "!") +
* RFC 7230 - defines qdtext:
* exclusive of CR and LF
* inclusive of 0x80-0xFF
- * includes 0x5E ('\') but only when part of quoted-pair
+ * includes 0x5C ('\') but only when part of quoted-pair
*/
static const CharacterSet qdtext1p1 = CharacterSet("qdtext (HTTP/1.1)", 0x23, 0x5B) +
CharacterSet("", "!") +
// best we can do is a conditional reference since http1p0 value may change per-client
const CharacterSet &tokenChars = (http1p0 ? qdtext1p0 : qdtext1p1);
- for (;;) {
- SBuf::size_type prefixLen = buf().findFirstNotOf(tokenChars);
- returnedToken.append(consume(prefixLen));
+ SBuf parsedToken;
+
+ while (!tok.atEnd()) {
+ SBuf qdText;
+ if (tok.prefix(qdText, tokenChars))
+ parsedToken.append(qdText);
+
+ if (!http1p0 && tok.skip('\\')) { // HTTP/1.1 allows quoted-pair, HTTP/1.0 does not
+ if (tok.atEnd())
+ break;
- // HTTP/1.1 allows quoted-pair, HTTP/1.0 does not
- if (!http1p0 && skip('\\')) {
/* RFC 7230 section 3.2.6
*
* The backslash octet ("\") can be used as a single-octet quoting
*/
static const CharacterSet qPairChars = CharacterSet::HTAB + CharacterSet::SP + CharacterSet::VCHAR + CharacterSet::OBSTEXT;
SBuf escaped;
- if (!prefix(escaped, qPairChars, 1)) {
- returnedToken.clear();
- restoreLastCheckpoint();
- return false;
- }
- returnedToken.append(escaped);
+ if (!tok.prefix(escaped, qPairChars, 1))
+ throw TexcHere("invalid escaped character in quoted-pair");
+
+ parsedToken.append(escaped);
continue;
+ }
- } else if (skip('"')) {
- break; // done
+ if (tok.skip('"'))
+ return parsedToken; // may be empty
- } else if (atEnd()) {
- // need more data
- returnedToken.clear();
- restoreLastCheckpoint();
- return false;
- }
+ if (tok.atEnd())
+ break;
- // else, we have an error
- debugs(24, 8, "invalid bytes for set " << tokenChars.name);
- returnedToken.clear();
- restoreLastCheckpoint();
- return false;
+ throw TexcHere(ToSBuf("invalid bytes for set ", tokenChars.name));
}
- // found the whole string
- return true;
+ throw Http::One::InsufficientInput();
+}
+
+SBuf
+Http::One::tokenOrQuotedString(Parser::Tokenizer &tok, const bool http1p0)
+{
+ if (tok.skip('"'))
+ return parseQuotedStringSuffix(tok, http1p0);
+
+ if (tok.atEnd())
+ throw InsufficientInput();
+
+ SBuf parsedToken;
+ if (!tok.prefix(parsedToken, CharacterSet::TCHAR))
+ throw TexcHere("invalid input while expecting an HTTP token");
+
+ if (tok.atEnd())
+ throw InsufficientInput();
+
+ // got the complete token
+ return parsedToken;
}