From: Francesco Chemolli <5175948+kinkie@users.noreply.github.com> Date: Sun, 26 Nov 2023 10:37:40 +0000 (+0000) Subject: Fix and improve html_quote() (#1513) X-Git-Tag: SQUID_7_0_1~275 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=703535b8a0a0152b712cf67061b8835e40ab152f;p=thirdparty%2Fsquid.git Fix and improve html_quote() (#1513) Fixed an html_quote() bug: Unwanted space characters in decimal numeric character references (e.g., `&# 7;` for ASCII BEL character). Encode single quote character (`'`) as `'` rather than `'`. Reduced encoding complexity from O(5*n) to O(n), where n is input string length (and 5 is the number of supported character references)! Added unit tests. --- diff --git a/src/Makefile.am b/src/Makefile.am index 2c3fdc069d..224ac18c84 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -1726,6 +1726,24 @@ EXTRA_DIST += \ tests/testACLMaxUserIP.cc endif +## Tests of html/* + +check_PROGRAMS += tests/testHtmlQuote +tests_testHtmlQuote_SOURCES = \ + tests/testHtmlQuote.cc +nodist_tests_testHtmlQuote_SOURCES = \ + $(TESTSOURCES) \ + tests/stub_debug.cc \ + tests/stub_libmem.cc +tests_testHtmlQuote_LDADD= \ + html/libhtml.la \ + sbuf/libsbuf.la \ + base/libbase.la \ + $(LIBCPPUNIT_LIBS) \ + $(COMPAT_LIB) \ + $(XTRA_LIBS) +tests_testHtmlQuote_LDFLAGS = $(LIBADD_DL) + ## Tests of http/* and HTTP Protocol objects check_PROGRAMS += tests/test_http_range diff --git a/src/html/Quoting.cc b/src/html/Quoting.cc index d9e0a0bee6..26da9cc05c 100644 --- a/src/html/Quoting.cc +++ b/src/html/Quoting.cc @@ -8,51 +8,44 @@ #include "squid.h" #include "html/Quoting.h" +#include "sbuf/SBuf.h" -/* - * HTML defines these characters as special entities that should be quoted. - */ -static struct { - unsigned char code; - const char *quote; -} htmlstandardentities[] = +#include +#include +static const auto & +EscapeSequences() { - /* NOTE: The quoted form MUST not be larger than 6 character. - * see close to the MemPool commend below - */ - { - '<', "<" - }, - { - '>', ">" - }, - { - '"', """ - }, - { - '&', "&" - }, - { - '\'', "'" - }, - { - 0, NULL + static auto escapeMap = new std::array {}; + auto &em = *escapeMap; + if (!em['<'].isEmpty()) + return em; + + // Encode control chars just to be on the safe side and make sure all 8-bit + // characters are encoded to protect from buggy clients. + for (int ch = 0; ch < 256; ++ch) { + if ((ch <= 0x1F || ch >= 0x7f) && ch != '\n' && ch != '\r' && ch != '\t') { + em[ch] = SBuf().Printf("&#%d;", ch); + } } -}; -/* - * html_do_quote - Returns a static buffer containing the quoted - * string. - */ + em['<'] = "<"; + em['>'] = ">"; + em['"'] = """; + em['&'] = "&"; + em['\''] = "'"; + + return em; +} + char * html_quote(const char *string) { + static const auto &escapeSequences = EscapeSequences(); static char *buf = nullptr; static size_t bufsize = 0; const char *src; char *dst; - int i; /* XXX This really should be implemented using a MemPool, but * MemPools are not yet available in lib... @@ -63,31 +56,13 @@ html_quote(const char *string) buf = static_cast(xcalloc(bufsize, 1)); } for (src = string, dst = buf; *src; src++) { - const char *escape = NULL; const unsigned char ch = *src; - /* Walk thru the list of HTML Entities that must be quoted to - * display safely - */ - for (i = 0; htmlstandardentities[i].code; i++) { - if (ch == htmlstandardentities[i].code) { - escape = htmlstandardentities[i].quote; - break; - } - } - /* Encode control chars just to be on the safe side, and make - * sure all 8-bit characters are encoded to protect from buggy - * clients - */ - if (!escape && (ch <= 0x1F || ch >= 0x7f) && ch != '\n' && ch != '\r' && ch != '\t') { - static char dec_encoded[7]; - snprintf(dec_encoded, sizeof dec_encoded, "&#%3d;", (int) ch); - escape = dec_encoded; - } - if (escape) { + const auto &escape = escapeSequences[ch]; + if (!escape.isEmpty()) { /* Ok, An escaped form was found above. Use it */ - strncpy(dst, escape, 7); - dst += strlen(escape); + escape.copy(dst, 7); + dst += escape.length(); } else { /* Apparently there is no need to escape this character */ *dst++ = ch; diff --git a/src/html/Quoting.h b/src/html/Quoting.h index 14d85cd4a7..06060e3d9d 100644 --- a/src/html/Quoting.h +++ b/src/html/Quoting.h @@ -9,6 +9,11 @@ #ifndef SQUID__SRC_HTML_QUOTING_H #define SQUID__SRC_HTML_QUOTING_H +/** Obtain a static buffer containing an HTML-encoded version of the given c-string. + * + * HTML reserved characters are replaced with character references + * per https://html.spec.whatwg.org/#character-references + */ char *html_quote(const char *); #endif /* SQUID__SRC_HTML_QUOTING_H */ diff --git a/src/tests/testHtmlQuote.cc b/src/tests/testHtmlQuote.cc new file mode 100644 index 0000000000..b9be01bd28 --- /dev/null +++ b/src/tests/testHtmlQuote.cc @@ -0,0 +1,65 @@ +/* + * Copyright (C) 1996-2023 The Squid Software Foundation and contributors + * + * Squid software is distributed under GPLv2+ license and includes + * contributions from numerous individuals and organizations. + * Please see the COPYING and CONTRIBUTORS files for details. + */ + +#include "squid.h" +#include "compat/cppunit.h" +#include "html/Quoting.h" +#include "unitTestMain.h" + +#include +#include + +class TestHtmlQuote: public CPPUNIT_NS::TestFixture +{ + CPPUNIT_TEST_SUITE(TestHtmlQuote); + CPPUNIT_TEST(test_html_quote_cstr); + CPPUNIT_TEST_SUITE_END(); + +protected: + void test_html_quote_cstr(); + void testPerformance(); +}; + +CPPUNIT_TEST_SUITE_REGISTRATION( TestHtmlQuote ); + +void +TestHtmlQuote::test_html_quote_cstr() +{ + CPPUNIT_ASSERT_EQUAL(std::string(""), std::string(html_quote(""))); + CPPUNIT_ASSERT_EQUAL(std::string("bar"), std::string(html_quote("bar"))); + CPPUNIT_ASSERT_EQUAL(std::string("foo<bar>gazonk"), std::string(html_quote("foogazonk"))); + CPPUNIT_ASSERT_EQUAL(std::string("foo&bar"), std::string(html_quote("foo&bar"))); + CPPUNIT_ASSERT_EQUAL(std::string("some'thing"), std::string(html_quote("some'thing"))); + CPPUNIT_ASSERT_EQUAL(std::string("some"thing"), std::string(html_quote("some\"thing"))); + CPPUNIT_ASSERT_EQUAL(std::string("<>"&'"), std::string(html_quote("<>\"&'"))); + CPPUNIT_ASSERT_EQUAL(std::string(">"), std::string(html_quote(">"))); + CPPUNIT_ASSERT_EQUAL(std::string("£"), std::string(html_quote("\xa3"))); + + for (unsigned char ch = 1; ch < 0xff; ++ch) { + unsigned char buf[2] = {ch, '\0'}; + auto quoted = html_quote(reinterpret_cast(buf)); + + if (strlen(quoted) == 1) { + CPPUNIT_ASSERT_EQUAL(static_cast(ch), static_cast(quoted[0])); + } else { + CPPUNIT_ASSERT(strlen(quoted) >= 3); + CPPUNIT_ASSERT_EQUAL('&', quoted[0]); + CPPUNIT_ASSERT_EQUAL(';', quoted[strlen(quoted)-1]); + if (quoted[1] == '#') { + CPPUNIT_ASSERT(strlen(quoted) > 3); + CPPUNIT_ASSERT(strlen(quoted) <= 6); + } + } + } +} + +int +main(int argc, char *argv[]) +{ + return TestProgram().run(argc, argv); +}