]> git.ipfire.org Git - thirdparty/squid.git/blobdiff - src/http/one/Tokenizer.cc
Source Format Enforcement (#1234)
[thirdparty/squid.git] / src / http / one / Tokenizer.cc
index 7f41eb5151942a7c21a0550fd169094b30487447..cc8787409f94b0e9293af9372a4a72d78f29e48d 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 1996-2015 The Squid Software Foundation and contributors
+ * Copyright (C) 1996-2023 The Squid Software Foundation and contributors
  *
  * Squid software is distributed under GPLv2+ license and includes
  * contributions from numerous individuals and organizations.
@@ -7,36 +7,19 @@
  */
 
 #include "squid.h"
-#include "Debug.h"
+#include "debug/Stream.h"
+#include "http/one/Parser.h"
 #include "http/one/Tokenizer.h"
-
-bool
-Http::One::Tokenizer::quotedString(SBuf &returnedToken, const bool http1p0)
-{
-    checkpoint();
-
-    if (!skip('"'))
-        return false;
-
-    return qdText(returnedToken, http1p0);
-}
-
-bool
-Http::One::Tokenizer::quotedStringOrToken(SBuf &returnedToken, const bool http1p0)
+#include "parser/Tokenizer.h"
+#include "sbuf/Stream.h"
+
+/// Extracts quoted-string after the caller removes the initial '"'.
+/// \param http1p0 whether to prohibit \-escaped characters in quoted strings
+/// \throws InsufficientInput when input can be a token _prefix_
+/// \returns extracted quoted string (without quotes and with chars unescaped)
+static SBuf
+parseQuotedStringSuffix(Parser::Tokenizer &tok, const bool http1p0)
 {
-    checkpoint();
-
-    if (!skip('"'))
-        return prefix(returnedToken, CharacterSet::TCHAR);
-
-    return qdText(returnedToken, http1p0);
-}
-
-bool
-Http::One::Tokenizer::qdText(SBuf &returnedToken, const bool http1p0)
-{
-    // the initial DQUOTE has been skipped by the caller
-
     /*
      * RFC 1945 - defines qdtext:
      *   inclusive of LWS (which includes CR and LF)
@@ -61,12 +44,17 @@ Http::One::Tokenizer::qdText(SBuf &returnedToken, const bool http1p0)
     // best we can do is a conditional reference since http1p0 value may change per-client
     const CharacterSet &tokenChars = (http1p0 ? qdtext1p0 : qdtext1p1);
 
-    for (;;) {
-        SBuf::size_type prefixLen = buf().findFirstNotOf(tokenChars);
-        returnedToken.append(consume(prefixLen));
+    SBuf parsedToken;
+
+    while (!tok.atEnd()) {
+        SBuf qdText;
+        if (tok.prefix(qdText, tokenChars))
+            parsedToken.append(qdText);
+
+        if (!http1p0 && tok.skip('\\')) { // HTTP/1.1 allows quoted-pair, HTTP/1.0 does not
+            if (tok.atEnd())
+                break;
 
-        // HTTP/1.1 allows quoted-pair, HTTP/1.0 does not
-        if (!http1p0 && skip('\\')) {
             /* RFC 7230 section 3.2.6
              *
              * The backslash octet ("\") can be used as a single-octet quoting
@@ -78,32 +66,42 @@ Http::One::Tokenizer::qdText(SBuf &returnedToken, const bool http1p0)
              */
             static const CharacterSet qPairChars = CharacterSet::HTAB + CharacterSet::SP + CharacterSet::VCHAR + CharacterSet::OBSTEXT;
             SBuf escaped;
-            if (!prefix(escaped, qPairChars, 1)) {
-                returnedToken.clear();
-                restoreLastCheckpoint();
-                return false;
-            }
-            returnedToken.append(escaped);
+            if (!tok.prefix(escaped, qPairChars, 1))
+                throw TexcHere("invalid escaped character in quoted-pair");
+
+            parsedToken.append(escaped);
             continue;
+        }
 
-        } else if (skip('"')) {
-            break; // done
+        if (tok.skip('"'))
+            return parsedToken; // may be empty
 
-        } else if (atEnd()) {
-            // need more data
-            returnedToken.clear();
-            restoreLastCheckpoint();
-            return false;
-        }
+        if (tok.atEnd())
+            break;
 
-        // else, we have an error
-        debugs(24, 8, "invalid bytes for set " << tokenChars.name);
-        returnedToken.clear();
-        restoreLastCheckpoint();
-        return false;
+        throw TexcHere(ToSBuf("invalid bytes for set ", tokenChars.name));
     }
 
-    // found the whole string
-    return true;
+    throw Http::One::InsufficientInput();
+}
+
+SBuf
+Http::One::tokenOrQuotedString(Parser::Tokenizer &tok, const bool http1p0)
+{
+    if (tok.skip('"'))
+        return parseQuotedStringSuffix(tok, http1p0);
+
+    if (tok.atEnd())
+        throw InsufficientInput();
+
+    SBuf parsedToken;
+    if (!tok.prefix(parsedToken, CharacterSet::TCHAR))
+        throw TexcHere("invalid input while expecting an HTTP token");
+
+    if (tok.atEnd())
+        throw InsufficientInput();
+
+    // got the complete token
+    return parsedToken;
 }