]> git.ipfire.org Git - thirdparty/squid.git/commitdiff
Fix and improve html_quote() (#1513)
authorFrancesco Chemolli <5175948+kinkie@users.noreply.github.com>
Sun, 26 Nov 2023 10:37:40 +0000 (10:37 +0000)
committerSquid Anubis <squid-anubis@squid-cache.org>
Sun, 26 Nov 2023 10:37:45 +0000 (10:37 +0000)
Fixed an html_quote() bug: Unwanted space characters in decimal numeric
character references (e.g., `&#  7;` for ASCII BEL character).

Encode single quote character (`'`) as `&apos;` rather than `&#39;`.

Reduced encoding complexity from O(5*n) to O(n), where n is input string
length (and 5 is the number of supported character references)!

Added unit tests.

src/Makefile.am
src/html/Quoting.cc
src/html/Quoting.h
src/tests/testHtmlQuote.cc [new file with mode: 0644]

index 2c3fdc069dd86f38af07bf03a4ed466cfa6f83e9..224ac18c8436dd048b4f13af59eaa9ad190f7c43 100644 (file)
@@ -1726,6 +1726,24 @@ EXTRA_DIST += \
        tests/testACLMaxUserIP.cc
 endif
 
+## Tests of html/*
+
+check_PROGRAMS += tests/testHtmlQuote
+tests_testHtmlQuote_SOURCES = \
+       tests/testHtmlQuote.cc
+nodist_tests_testHtmlQuote_SOURCES = \
+       $(TESTSOURCES) \
+       tests/stub_debug.cc \
+       tests/stub_libmem.cc
+tests_testHtmlQuote_LDADD= \
+       html/libhtml.la \
+       sbuf/libsbuf.la \
+       base/libbase.la \
+       $(LIBCPPUNIT_LIBS) \
+       $(COMPAT_LIB) \
+       $(XTRA_LIBS)
+tests_testHtmlQuote_LDFLAGS = $(LIBADD_DL)
+
 ## Tests of http/* and HTTP Protocol objects
 
 check_PROGRAMS += tests/test_http_range
index d9e0a0bee614b9a897eb465ee519746489388077..26da9cc05c7abccb7daba1512b335f7193024761 100644 (file)
@@ -8,51 +8,44 @@
 
 #include "squid.h"
 #include "html/Quoting.h"
+#include "sbuf/SBuf.h"
 
-/*
- *  HTML defines these characters as special entities that should be quoted.
- */
-static struct {
-    unsigned char code;
-    const char *quote;
-} htmlstandardentities[] =
+#include <array>
+#include <cstring>
 
+static const auto &
+EscapeSequences()
 {
-    /* NOTE: The quoted form MUST not be larger than 6 character.
-     * see close to the MemPool commend below
-     */
-    {
-        '<', "&lt;"
-    },
-    {
-        '>', "&gt;"
-    },
-    {
-        '"', "&quot;"
-    },
-    {
-        '&', "&amp;"
-    },
-    {
-        '\'', "&#39;"
-    },
-    {
-        0, NULL
+    static auto escapeMap = new std::array<SBuf, 256> {};
+    auto &em = *escapeMap;
+    if (!em['<'].isEmpty())
+        return em;
+
+    // Encode control chars just to be on the safe side and make sure all 8-bit
+    // characters are encoded to protect from buggy clients.
+    for (int ch = 0; ch < 256; ++ch) {
+        if ((ch <= 0x1F || ch >= 0x7f) && ch != '\n' && ch != '\r' && ch != '\t') {
+            em[ch] = SBuf().Printf("&#%d;", ch);
+        }
     }
-};
 
-/*
- *  html_do_quote - Returns a static buffer containing the quoted
- *  string.
- */
+    em['<'] = "&lt;";
+    em['>'] = "&gt;";
+    em['"'] = "&quot;";
+    em['&'] = "&amp;";
+    em['\''] = "&apos;";
+
+    return em;
+}
+
 char *
 html_quote(const char *string)
 {
+    static const auto &escapeSequences = EscapeSequences();
     static char *buf = nullptr;
     static size_t bufsize = 0;
     const char *src;
     char *dst;
-    int i;
 
     /* XXX This really should be implemented using a MemPool, but
      * MemPools are not yet available in lib...
@@ -63,31 +56,13 @@ html_quote(const char *string)
         buf = static_cast<char *>(xcalloc(bufsize, 1));
     }
     for (src = string, dst = buf; *src; src++) {
-        const char *escape = NULL;
         const unsigned char ch = *src;
 
-        /* Walk thru the list of HTML Entities that must be quoted to
-         * display safely
-         */
-        for (i = 0; htmlstandardentities[i].code; i++) {
-            if (ch == htmlstandardentities[i].code) {
-                escape = htmlstandardentities[i].quote;
-                break;
-            }
-        }
-        /* Encode control chars just to be on the safe side, and make
-         * sure all 8-bit characters are encoded to protect from buggy
-         * clients
-         */
-        if (!escape && (ch <= 0x1F || ch >= 0x7f) && ch != '\n' && ch != '\r' && ch != '\t') {
-            static char dec_encoded[7];
-            snprintf(dec_encoded, sizeof dec_encoded, "&#%3d;", (int) ch);
-            escape = dec_encoded;
-        }
-        if (escape) {
+        const auto &escape = escapeSequences[ch];
+        if (!escape.isEmpty()) {
             /* Ok, An escaped form was found above. Use it */
-            strncpy(dst, escape, 7);
-            dst += strlen(escape);
+            escape.copy(dst, 7);
+            dst += escape.length();
         } else {
             /* Apparently there is no need to escape this character */
             *dst++ = ch;
index 14d85cd4a786edb1db24221f9549df9c2ff4a4c6..06060e3d9d42492087d6170b80ab867029eaa401 100644 (file)
@@ -9,6 +9,11 @@
 #ifndef SQUID__SRC_HTML_QUOTING_H
 #define SQUID__SRC_HTML_QUOTING_H
 
+/** Obtain a static buffer containing an HTML-encoded version of the given c-string.
+ *
+ * HTML reserved characters are replaced with character references
+ * per https://html.spec.whatwg.org/#character-references
+ */
 char *html_quote(const char *);
 
 #endif /* SQUID__SRC_HTML_QUOTING_H */
diff --git a/src/tests/testHtmlQuote.cc b/src/tests/testHtmlQuote.cc
new file mode 100644 (file)
index 0000000..b9be01b
--- /dev/null
@@ -0,0 +1,65 @@
+/*
+ * Copyright (C) 1996-2023 The Squid Software Foundation and contributors
+ *
+ * Squid software is distributed under GPLv2+ license and includes
+ * contributions from numerous individuals and organizations.
+ * Please see the COPYING and CONTRIBUTORS files for details.
+ */
+
+#include "squid.h"
+#include "compat/cppunit.h"
+#include "html/Quoting.h"
+#include "unitTestMain.h"
+
+#include <cstring>
+#include <iostream>
+
+class TestHtmlQuote: public CPPUNIT_NS::TestFixture
+{
+    CPPUNIT_TEST_SUITE(TestHtmlQuote);
+    CPPUNIT_TEST(test_html_quote_cstr);
+    CPPUNIT_TEST_SUITE_END();
+
+protected:
+    void test_html_quote_cstr();
+    void testPerformance();
+};
+
+CPPUNIT_TEST_SUITE_REGISTRATION( TestHtmlQuote );
+
+void
+TestHtmlQuote::test_html_quote_cstr()
+{
+    CPPUNIT_ASSERT_EQUAL(std::string(""), std::string(html_quote("")));
+    CPPUNIT_ASSERT_EQUAL(std::string("bar"), std::string(html_quote("bar")));
+    CPPUNIT_ASSERT_EQUAL(std::string("foo&lt;bar&gt;gazonk"), std::string(html_quote("foo<bar>gazonk")));
+    CPPUNIT_ASSERT_EQUAL(std::string("foo&amp;bar"), std::string(html_quote("foo&bar")));
+    CPPUNIT_ASSERT_EQUAL(std::string("some&apos;thing"), std::string(html_quote("some'thing")));
+    CPPUNIT_ASSERT_EQUAL(std::string("some&quot;thing"), std::string(html_quote("some\"thing")));
+    CPPUNIT_ASSERT_EQUAL(std::string("&lt;&gt;&quot;&amp;&apos;"), std::string(html_quote("<>\"&'")));
+    CPPUNIT_ASSERT_EQUAL(std::string("&gt;"), std::string(html_quote(">")));
+    CPPUNIT_ASSERT_EQUAL(std::string("&#163;"), std::string(html_quote("\xa3")));
+
+    for (unsigned char ch = 1; ch < 0xff; ++ch) {
+        unsigned char buf[2] = {ch, '\0'};
+        auto quoted = html_quote(reinterpret_cast<char *>(buf));
+
+        if (strlen(quoted) == 1) {
+            CPPUNIT_ASSERT_EQUAL(static_cast<int>(ch), static_cast<int>(quoted[0]));
+        } else {
+            CPPUNIT_ASSERT(strlen(quoted) >= 3);
+            CPPUNIT_ASSERT_EQUAL('&', quoted[0]);
+            CPPUNIT_ASSERT_EQUAL(';', quoted[strlen(quoted)-1]);
+            if (quoted[1] == '#') {
+                CPPUNIT_ASSERT(strlen(quoted) > 3);
+                CPPUNIT_ASSERT(strlen(quoted) <= 6);
+            }
+        }
+    }
+}
+
+int
+main(int argc, char *argv[])
+{
+    return TestProgram().run(argc, argv);
+}