From: Thomas Markwalder Date: Tue, 10 Jul 2018 14:48:09 +0000 (-0400) Subject: [5680] Refactor stringSanitizer func into StringSanitizer class X-Git-Tag: ha_phase2~35 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=eb264aecca534aaf626e2d8b44e38bdd2ec69a4a;p=thirdparty%2Fkea.git [5680] Refactor stringSanitizer func into StringSanitizer class modified: src/lib/dhcpsrv/d2_client_cfg.cc src/lib/dhcpsrv/d2_client_cfg.h src/lib/util/strutil.cc src/lib/util/strutil.h src/lib/util/tests/strutil_unittest.cc --- diff --git a/src/lib/dhcpsrv/d2_client_cfg.cc b/src/lib/dhcpsrv/d2_client_cfg.cc index 903b50f618..6eae3b2b74 100644 --- a/src/lib/dhcpsrv/d2_client_cfg.cc +++ b/src/lib/dhcpsrv/d2_client_cfg.cc @@ -113,7 +113,8 @@ D2ClientConfig::D2ClientConfig(const bool enable_updates, generated_prefix_(generated_prefix), qualifying_suffix_(qualifying_suffix), hostname_char_set_(hostname_char_set), - hostname_char_replacement_(hostname_char_replacement) { + hostname_char_replacement_(hostname_char_replacement), + hostname_sanitizer_(0) { validateContents(); } @@ -133,7 +134,8 @@ D2ClientConfig::D2ClientConfig() generated_prefix_(DFT_GENERATED_PREFIX), qualifying_suffix_(""), hostname_char_set_(DFT_HOSTNAME_CHAR_SET), - hostname_char_replacement_(DFT_HOSTNAME_CHAR_SET) { + hostname_char_replacement_(DFT_HOSTNAME_CHAR_SET), + hostname_sanitizer_(0) { validateContents(); } @@ -172,6 +174,16 @@ D2ClientConfig::validateContents() { << server_ip_.toText() << "/" << server_port_); } + if (!hostname_char_set_.empty()) { + try { + hostname_sanitizer_.reset(new isc::util::str::StringSanitizer(hostname_char_set_, + hostname_char_replacement_)); + } catch (const std::exception& ex) { + isc_throw(D2ClientError, "D2ClientConfig: hostname-char-set" + " is not a valid regular expression"); + } + } + /// @todo perhaps more validation we should do yet? /// Are there any invalid combinations of options we need to test against? } diff --git a/src/lib/dhcpsrv/d2_client_cfg.h b/src/lib/dhcpsrv/d2_client_cfg.h index 8c5470380b..5da4af6004 100644 --- a/src/lib/dhcpsrv/d2_client_cfg.h +++ b/src/lib/dhcpsrv/d2_client_cfg.h @@ -17,6 +17,7 @@ #include #include #include +#include #include @@ -312,6 +313,9 @@ private: /// @brief A string to replace invalid characters when scrubbing hostnames. /// Meaningful only if hostname_char_set_ is not empty. std::string hostname_char_replacement_; + + /// @brief Pointer to compiled regular expression string sanitizer + util::str::StringSanitizerPtr hostname_sanitizer_; }; std::ostream& diff --git a/src/lib/util/strutil.cc b/src/lib/util/strutil.cc index b250d36bf4..7d51bbcd04 100644 --- a/src/lib/util/strutil.cc +++ b/src/lib/util/strutil.cc @@ -299,79 +299,111 @@ decodeFormattedHexString(const std::string& hex_string, } } -std::string -sanitizeString(const std::string& original, - const std::string& invalidChars, - const std::string& replacement) { +class StringSanitizerImpl { +public: + StringSanitizerImpl(const std::string& char_set, const std::string& char_replacement) + : char_set_(char_set), char_replacement_(char_replacement) { #ifdef USE_REGEX - std::regex rexpr; - try { - rexpr = std::regex(invalidChars, std::regex::extended); - } catch (const std::exception& ex) { - isc_throw(isc::BadValue, "invalid regex: '" - << invalidChars << "', " << ex.what()); + try { + scrub_exp_ = std::regex(char_set, std::regex::extended); + } catch (const std::exception& ex) { + isc_throw(isc::BadValue, "invalid regex: '" + << char_set_ << "', " << ex.what()); + } +#else + int ec = regcomp(&scrub_exp_, char_set_.c_str(), REG_EXTENDED); + if (ec) { + char errbuf[512] = ""; + static_cast(regerror(ec, &scrub_exp_, errbuf, sizeof(errbuf))); + regfree(&scrub_exp_); + isc_throw(isc::BadValue, "invalid regex: '" << char_set_ << "', " << errbuf); + } +#endif } - std::stringstream result; - try { - std::regex_replace(std::ostream_iterator(result), - original.begin(), original.end(), - rexpr, replacement); - } catch (const std::exception& ex) { - isc_throw(isc::BadValue, "replacing '" << invalidChars << "' with '" - << replacement << "' in '" << original << "' failed: ," - << ex.what()); + /// @brief Destructor. + ~StringSanitizerImpl() { +#ifndef USE_REGEX + regfree(&scrub_exp_); +#endif } - return (result.str()); + std::string scrub(const std::string& original) { +#ifdef USE_REGEX + std::stringstream result; + try { + std::regex_replace(std::ostream_iterator(result), + original.begin(), original.end(), + scrub_expr, char_replacement_); + } catch (const std::exception& ex) { + isc_throw(isc::BadValue, "replacing '" << char_set_ << "' with '" + << char_replacement_ << "' in '" << original << "' failed: ," + << ex.what()); + } + + return (result.str()); #else - // Compile the expression. - regex_t rex; - int ec = regcomp(&rex, invalidChars.c_str(), REG_EXTENDED); - if (ec) { - char errbuf[512] = ""; - static_cast(regerror(ec, &rex, errbuf, sizeof(errbuf))); - isc_throw(isc::BadValue, "invalid regex: '" << invalidChars - << "', " << errbuf); - } + // Iterate over original string, match by match. + const char* origStr = original.c_str(); + const char* startFrom = origStr; + const char* endAt = origStr + strlen(origStr); + regmatch_t matches[2]; // n matches + 1 + stringstream result; + + while (startFrom < endAt) { + // Look for the next match + if (regexec(&scrub_exp_, startFrom, 1, matches, 0) == REG_NOMATCH) { + // No matches, so add in the remainder + result << startFrom; + break; + } - // Iterate over original string, match by match. - const char* origStr = original.c_str(); - const char* startFrom = origStr; - const char* endAt = origStr + strlen(origStr); - regmatch_t matches[2]; // n matches + 1 - stringstream result; - - while (startFrom < endAt) { - // Look for the next match - if (regexec(&rex, startFrom, 1, matches, 0) == REG_NOMATCH) { - // No matches, so add in the remainder - result << startFrom; - break; - } + // Shouldn't happen, but one never knows eh? + if (matches[0].rm_so == -1) { + isc_throw(isc::Unexpected, "matched but so is -1?"); + } - // Shouldn't happen, but one never knows eh? - if (matches[0].rm_so == -1) { - isc_throw(isc::Unexpected, "matched but so is -1?"); - } + // Add everything from starting point up to the current match + const char* matchAt = startFrom + matches[0].rm_so; + while (startFrom < matchAt) { + result << *startFrom; + ++startFrom; + } + + // Add in the replacement + result << char_replacement_; - // Add everything from starting point up to the current match - const char* matchAt = startFrom + matches[0].rm_so; - while (startFrom < matchAt) { - result << *startFrom; + // Move past the match. ++startFrom; } - // Add in the replacement - result << replacement; - - // Move past the match. - ++startFrom; + return (result.str()); +#endif } - regfree(&rex); - return (result.str()); +private: + std::string char_set_; + std::string char_replacement_; + +#ifdef USE_REGEX + regex scrub_exp_; +#else + regex_t scrub_exp_; #endif +}; + +StringSanitizer::StringSanitizer(const std::string& char_set, + const std::string& char_replacement) + : impl_(new StringSanitizerImpl(char_set, char_replacement)) { +} + +StringSanitizer::~StringSanitizer() { + delete impl_; +} + +std::string +StringSanitizer::scrub(const std::string& original) { + return (impl_->scrub(original)); } } // namespace str diff --git a/src/lib/util/strutil.h b/src/lib/util/strutil.h index c31fd38ac3..1cdb833c18 100644 --- a/src/lib/util/strutil.h +++ b/src/lib/util/strutil.h @@ -15,6 +15,7 @@ #include #include #include +#include namespace isc { namespace util { @@ -255,25 +256,53 @@ void decodeFormattedHexString(const std::string& hex_string, std::vector& binary); -/// \brief Replaces all occurences of a character set in a string -/// -/// This function runs a given string through a regular expression, -/// replacing all "matches" of that expression with the specified string. -/// -/// \param original the string to sanitize -/// \param invalidChars string containing a regular expression (POSIX -/// extended syntax) that describes the characters to replace. If you -/// wanted to sanitize hostnames for example, you could specify the -/// inversion of valid characters "[^A-Za-z0-9_-]". -/// \param replacement string of one or more characters to use as the -/// replacement for invalid characters. -/// \return the new, sanitized string -/// \throw BadValue if given an invalid regular expression, Unexpected if -/// an error occurs executing the expression -std::string -sanitizeString(const std::string& original, - const std::string& invalidChars, - const std::string& replacement); +/// @brief Forward declaration to the @c StringSanitizer implementation. +class StringSanitizerImpl; + +/// @brief Implements a regular expression based string scrubber +/// +/// The implementation uses C++11 regex IF the environemnt supports it +/// (tested in configure.ac). If not it falls back to C lib regcomp/regexec. +/// Older compilers, such as pre Gnu 4.9.0, provided only experimental +/// implementations of regex which are recognized as buggy. +class StringSanitizer { +public: + + /// Constructor + /// + /// Compiles the given character set into a regular expression, and + /// retains the given character replacement. Thereafter, the instance + /// may be used to scrub an arbitrary number of strings. + /// + /// @param char_set string containing a regular expression (POSIX + /// extended syntax) that describes the characters to replace. If you + /// wanted to sanitize hostnames for example, you could specify the + /// inversion of valid characters "[^A-Za-z0-9_-]". + /// @param char_replacement string of one or more characters to use as the + /// replacement for invalid characters. + /// @throw BadValue if given an invalid regular expression + StringSanitizer(const std::string& char_set, + const std::string& char_replacement); + + /// @brief Destructor. + /// + /// Destroys the implementation instance. + ~StringSanitizer(); + + /// Returns a scrubbed copy of a given string + /// + /// Replaces all occurrances of characters described by the regular + /// expression with the character replacement . + /// + /// @param original the string to scrub + /// @throw Unexpected if an error occurs during scrubbing + std::string scrub(const std::string& original); +private: + /// @brief Pointer to the @c StringSanitizerImpl. + StringSanitizerImpl* impl_; +}; + +typedef boost::shared_ptr StringSanitizerPtr; } // namespace str } // namespace util diff --git a/src/lib/util/tests/strutil_unittest.cc b/src/lib/util/tests/strutil_unittest.cc index 799f91273a..893409621d 100644 --- a/src/lib/util/tests/strutil_unittest.cc +++ b/src/lib/util/tests/strutil_unittest.cc @@ -463,49 +463,81 @@ TEST(StringUtilTest, decodeFormattedHexString) { isc::BadValue); } -// Verifies sanitizeString() function -TEST(StringUtilTest, sanitizeString) { +/// @brief Fucntion used to test StringSantitizer +/// @param original - string to sanitize +/// @param char_set - regular expression string describing invalid +/// characters +/// @param char_replacement - character(s) which replace invalid +/// characters +/// @param expected - expected sanitized string +void sanitizeStringTest( + const std::string& original, + const std::string& char_set, + const std::string& char_replacement, + const std::string& expected) { + + StringSanitizerPtr ss; std::string sanitized; + try { + ss.reset(new StringSanitizer(char_set, char_replacement)); + } catch (const std::exception& ex) { + ADD_FAILURE() << "Could not construct sanitizer:" << ex.what(); + return; + } + + try { + sanitized = ss->scrub(original); + } catch (const std::exception& ex) { + ADD_FAILURE() << "Could not scrub string:" << ex.what(); + return; + } + + EXPECT_EQ(sanitized, expected); +} + +// Verifies StringSantizer class +TEST(StringUtilTest, stringSanitizer) { + // Bad regular expression should throw. - ASSERT_THROW (sanitized = sanitizeString("just a string", "[bogus-regex",""), - BadValue); + StringSanitizerPtr ss; + ASSERT_THROW (ss.reset(new StringSanitizer("[bogus-regex","")), BadValue); // List of invalid chars should work: (b,c,2 are invalid) - ASSERT_NO_THROW (sanitized = sanitizeString("abc.123", "[b-c2]","*")); - EXPECT_EQ(sanitized, "a**.1*3"); + sanitizeStringTest("abc.123", "[b-c2]", "*", + "a**.1*3"); - // Inverted list for valid chars should work too: (b,c,2 are valid) - ASSERT_NO_THROW (sanitized = sanitizeString("abc.123", "[^b-c2]","*")); - EXPECT_EQ(sanitized, "*bc**2*"); + // Inverted list of valid chars should work: (b,c,2 are invalid) + sanitizeStringTest("abc.123", "[^b-c2]", "*", + "*bc**2*"); // A string of all valid chars should return an identical string. - ASSERT_NO_THROW (sanitized = sanitizeString("-_A--B__Cabc34567_-", "[^A-Ca-c3-7_-]","x")); - EXPECT_EQ(sanitized, "-_A--B__Cabc34567_-"); + sanitizeStringTest("-_A--B__Cabc34567_-", "[^A-Ca-c3-7_-]", "x", + "-_A--B__Cabc34567_-"); // Replacing with a character should work. - ASSERT_NO_THROW (sanitized = sanitizeString("A[b]c\12JoE3-_x!B$Y#e", "[^A-Za-z0-9_]","*")); - EXPECT_EQ(sanitized, "A*b*c*JoE3*_x*B*Y*e"); + sanitizeStringTest("A[b]c\12JoE3-_x!B$Y#e", "[^A-Za-z0-9_]", "*", + "A*b*c*JoE3*_x*B*Y*e"); // Removing (i.e.replacing with an "empty" string) should work. - ASSERT_NO_THROW (sanitized = sanitizeString("A[b]c\12JoE3-_x!B$Y#e", "[^A-Za-z0-9_]","")); - EXPECT_EQ(sanitized, "AbcJoE3_xBYe"); + sanitizeStringTest("A[b]c\12JoE3-_x!B$Y#e", "[^A-Za-z0-9_]", "", + "AbcJoE3_xBYe"); // More than one non-matching in a row should work. - ASSERT_NO_THROW (sanitized = sanitizeString("%%A%%B%%C%%", "[^A-Za-z0-9_]","x")); - EXPECT_EQ(sanitized, "xxAxxBxxCxx"); + sanitizeStringTest("%%A%%B%%C%%", "[^A-Za-z0-9_]", "x", + "xxAxxBxxCxx"); // Removing than one non-matching in a row should work. - ASSERT_NO_THROW (sanitized = sanitizeString("%%A%%B%%C%%", "[^A-Za-z0-9_]","")); - EXPECT_EQ(sanitized, "ABC"); + sanitizeStringTest("%%A%%B%%C%%", "[^A-Za-z0-9_]", "", + "ABC"); // Replacing with a string should work. - ASSERT_NO_THROW (sanitized = sanitizeString("%%A%%B%%C%%", "[^A-Za-z0-9_]","xyz")); - EXPECT_EQ(sanitized, "xyzxyzAxyzxyzBxyzxyzCxyzxyz"); + sanitizeStringTest("%%A%%B%%C%%", "[^A-Za-z0-9_]", "xyz", + "xyzxyzAxyzxyzBxyzxyzCxyzxyz"); // Dots as valid chars work. - ASSERT_NO_THROW (sanitized = sanitizeString("abc.123", "[^A-Za-z0-9_.]","*")); - EXPECT_EQ(sanitized, "abc.123"); + sanitizeStringTest("abc.123", "[^A-Za-z0-9_.]", "*", + "abc.123"); } } // end of anonymous namespace