[5680] Refactor stringSanitizer func into StringSanitizer class

author Thomas Markwalder <tmark@isc.org>

Tue, 10 Jul 2018 14:48:09 +0000 (10:48 -0400)

committer Tomek Mrugalski <tomasz@isc.org>

Fri, 27 Jul 2018 11:54:10 +0000 (13:54 +0200)
author Thomas Markwalder <tmark@isc.org>
Tue, 10 Jul 2018 14:48:09 +0000 (10:48 -0400)
committer Tomek Mrugalski <tomasz@isc.org>
Fri, 27 Jul 2018 11:54:10 +0000 (13:54 +0200)
diff --git a/src/lib/dhcpsrv/d2_client_cfg.cc b/src/lib/dhcpsrv/d2_client_cfg.cc

index 903b50f6189757cd5433d95b7e8112001936b055..6eae3b2b74cd787d1eee9535a73234a921091875 100644 (file)
--- a/src/lib/dhcpsrv/d2_client_cfg.cc
+++ b/src/lib/dhcpsrv/d2_client_cfg.cc
@@ -113,7 +113,8 @@ D2ClientConfig::D2ClientConfig(const  bool enable_updates,
        generated_prefix_(generated_prefix),
        qualifying_suffix_(qualifying_suffix),
        hostname_char_set_(hostname_char_set),
-      hostname_char_replacement_(hostname_char_replacement) {
+      hostname_char_replacement_(hostname_char_replacement),
+      hostname_sanitizer_(0) {
      validateContents();
  }
  
@@ -133,7 +134,8 @@ D2ClientConfig::D2ClientConfig()
        generated_prefix_(DFT_GENERATED_PREFIX),
        qualifying_suffix_(""),
        hostname_char_set_(DFT_HOSTNAME_CHAR_SET),
-      hostname_char_replacement_(DFT_HOSTNAME_CHAR_SET) {
+      hostname_char_replacement_(DFT_HOSTNAME_CHAR_SET),
+      hostname_sanitizer_(0) {
      validateContents();
  }
  
@@ -172,6 +174,16 @@ D2ClientConfig::validateContents() {
                    << server_ip_.toText() << "/" << server_port_);
      }
  
+    if (!hostname_char_set_.empty()) {
+        try {
+            hostname_sanitizer_.reset(new isc::util::str::StringSanitizer(hostname_char_set_,
+                                                                          hostname_char_replacement_));
+        } catch (const std::exception& ex) {
+            isc_throw(D2ClientError, "D2ClientConfig: hostname-char-set"
+                      " is not a valid regular expression");
+        }
+    }
+
      /// @todo perhaps more validation we should do yet?
      /// Are there any invalid combinations of options we need to test against?
  }
diff --git a/src/lib/dhcpsrv/d2_client_cfg.h b/src/lib/dhcpsrv/d2_client_cfg.h

index 8c5470380be5fa19bf33670c83a03a964dc817cb..5da4af6004ce9587ec6affc842ee2f190cf03780 100644 (file)
--- a/src/lib/dhcpsrv/d2_client_cfg.h
+++ b/src/lib/dhcpsrv/d2_client_cfg.h
@@ -17,6 +17,7 @@
  #include <cc/user_context.h>
  #include <dhcp_ddns/ncr_io.h>
  #include <exceptions/exceptions.h>
+#include <util/strutil.h>
  
  #include <boost/shared_ptr.hpp>
  
@@ -312,6 +313,9 @@ private:
      /// @brief A string to replace invalid characters when scrubbing hostnames.
      /// Meaningful only if hostname_char_set_ is not empty.
      std::string hostname_char_replacement_;
+
+    /// @brief Pointer to compiled regular expression string sanitizer
+    util::str::StringSanitizerPtr hostname_sanitizer_;
  };
  
  std::ostream&
diff --git a/src/lib/util/strutil.cc b/src/lib/util/strutil.cc

index b250d36bf4213595a7cf36963b6f2b4aee9182e5..7d51bbcd041991befd180c40a2a7a97ee742931e 100644 (file)
--- a/src/lib/util/strutil.cc
+++ b/src/lib/util/strutil.cc
@@ -299,79 +299,111 @@ decodeFormattedHexString(const std::string& hex_string,
      }
  }
  
-std::string
-sanitizeString(const std::string& original,
-                            const std::string& invalidChars,
-                            const std::string& replacement) {
+class StringSanitizerImpl {
+public:
+    StringSanitizerImpl(const std::string& char_set, const std::string& char_replacement)
+        : char_set_(char_set), char_replacement_(char_replacement) {
  #ifdef USE_REGEX
-    std::regex rexpr;
-    try {
-        rexpr = std::regex(invalidChars, std::regex::extended);
-    } catch (const std::exception& ex) {
-        isc_throw(isc::BadValue, "invalid regex: '"
-                  << invalidChars << "', " << ex.what());
+        try {
+            scrub_exp_ = std::regex(char_set, std::regex::extended);
+        } catch (const std::exception& ex) {
+            isc_throw(isc::BadValue, "invalid regex: '"
+                      << char_set_ << "', " << ex.what());
+        }
+#else
+        int ec = regcomp(&scrub_exp_, char_set_.c_str(), REG_EXTENDED);
+        if (ec) {
+            char errbuf[512] = "";
+            static_cast<void>(regerror(ec, &scrub_exp_, errbuf, sizeof(errbuf)));
+            regfree(&scrub_exp_);
+            isc_throw(isc::BadValue, "invalid regex: '" << char_set_ << "', " << errbuf);
+        }
+#endif
      }
  
-    std::stringstream result;
-    try {
-        std::regex_replace(std::ostream_iterator<char>(result),
-                           original.begin(), original.end(),
-                           rexpr, replacement);
-    } catch (const std::exception& ex) {
-        isc_throw(isc::BadValue, "replacing '" << invalidChars << "' with '"
-                   << replacement << "' in '" << original << "' failed: ,"
-                   << ex.what());
+    /// @brief Destructor.
+    ~StringSanitizerImpl() {
+#ifndef USE_REGEX
+        regfree(&scrub_exp_);
+#endif
      }
  
-    return (result.str());
+    std::string scrub(const std::string& original) {
+#ifdef USE_REGEX
+        std::stringstream result;
+        try {
+            std::regex_replace(std::ostream_iterator<char>(result),
+                               original.begin(), original.end(),
+                               scrub_expr, char_replacement_);
+        } catch (const std::exception& ex) {
+            isc_throw(isc::BadValue, "replacing '" << char_set_ << "' with '"
+                   << char_replacement_ << "' in '" << original << "' failed: ,"
+                   << ex.what());
+        }
+
+        return (result.str());
  #else
-    // Compile the expression.
-    regex_t rex;
-    int ec = regcomp(&rex, invalidChars.c_str(), REG_EXTENDED);
-    if (ec) {
-        char errbuf[512] = "";
-        static_cast<void>(regerror(ec, &rex, errbuf, sizeof(errbuf)));
-        isc_throw(isc::BadValue, "invalid regex: '" << invalidChars
-                  << "', " << errbuf);
-    }
+        // Iterate over original string, match by match.
+        const char* origStr = original.c_str();
+        const char* startFrom = origStr;
+        const char* endAt = origStr + strlen(origStr);
+        regmatch_t matches[2];  // n matches + 1
+        stringstream result;
+
+        while (startFrom < endAt) {
+            // Look for the next match
+            if (regexec(&scrub_exp_, startFrom, 1, matches, 0) == REG_NOMATCH) {
+                // No matches, so add in the remainder
+                result << startFrom;
+                break;
+            }
  
-    // Iterate over original string, match by match.
-    const char* origStr = original.c_str();
-    const char* startFrom = origStr;
-    const char* endAt = origStr + strlen(origStr);
-    regmatch_t matches[2];  // n matches + 1
-    stringstream result;
-
-    while (startFrom < endAt) {
-        // Look for the next match
-        if (regexec(&rex, startFrom, 1, matches, 0) == REG_NOMATCH) {
-            // No matches, so add in the remainder
-            result << startFrom;
-            break;
-        }
+            // Shouldn't happen, but one never knows eh?
+            if (matches[0].rm_so == -1) {
+                isc_throw(isc::Unexpected, "matched but so is -1?");
+            }
  
-        // Shouldn't happen, but one never knows eh?
-        if (matches[0].rm_so == -1) {
-            isc_throw(isc::Unexpected, "matched but so is -1?");
-        }
+            // Add everything from starting point up to the current match
+            const char* matchAt = startFrom + matches[0].rm_so;
+            while (startFrom < matchAt) {
+                result << *startFrom;
+                ++startFrom;
+            }
+
+            // Add in the replacement
+            result << char_replacement_;
  
-        // Add everything from starting point up to the current match
-        const char* matchAt = startFrom + matches[0].rm_so;
-        while (startFrom < matchAt) {
-            result << *startFrom;
+            // Move past the match.
              ++startFrom;
          }
  
-        // Add in the replacement
-        result << replacement;
-
-        // Move past the match.
-        ++startFrom;
+        return (result.str());
+#endif
      }
  
-    regfree(&rex);
-    return (result.str());
+private:
+    std::string char_set_;
+    std::string char_replacement_;
+
+#ifdef USE_REGEX
+    regex scrub_exp_;
+#else
+    regex_t scrub_exp_;
  #endif
+};
+
+StringSanitizer::StringSanitizer(const std::string& char_set,
+                                 const std::string& char_replacement)
+    : impl_(new StringSanitizerImpl(char_set, char_replacement)) {
+}
+
+StringSanitizer::~StringSanitizer() {
+    delete impl_;
+}
+
+std::string
+StringSanitizer::scrub(const std::string& original) {
+    return (impl_->scrub(original));
  }
  
  } // namespace str
diff --git a/src/lib/util/strutil.h b/src/lib/util/strutil.h

index c31fd38ac356392cece2c3f6adb763113b96f7c2..1cdb833c188389eb08770c98bd38828c95e374d8 100644 (file)
--- a/src/lib/util/strutil.h
+++ b/src/lib/util/strutil.h
@@ -15,6 +15,7 @@
  #include <vector>
  #include <exceptions/exceptions.h>
  #include <boost/lexical_cast.hpp>
+#include <boost/shared_ptr.hpp>
  
  namespace isc {
  namespace util {
@@ -255,25 +256,53 @@ void
  decodeFormattedHexString(const std::string& hex_string,
                           std::vector<uint8_t>& binary);
  
-/// \brief Replaces all occurences of a character set in a string
-///
-/// This function runs a given string through a regular expression,
-/// replacing all "matches" of that expression with the specified string.
-///
-/// \param original the string to sanitize
-/// \param invalidChars  string containing a regular expression (POSIX
-/// extended syntax) that describes the characters to replace.  If you
-/// wanted to sanitize hostnames for example, you could specify the
-/// inversion of valid characters "[^A-Za-z0-9_-]".
-/// \param replacement string of one or more characters to use as the
-/// replacement for invalid characters.
-/// \return the new, sanitized string
-/// \throw BadValue if given an invalid regular expression, Unexpected if
-/// an error occurs executing the expression
-std::string
-sanitizeString(const std::string& original,
-               const std::string& invalidChars,
-               const std::string& replacement);
+/// @brief Forward declaration to the @c StringSanitizer implementation.
+class StringSanitizerImpl;
+
+/// @brief Implements a regular expression based string scrubber
+///
+/// The implementation uses C++11 regex IF the environemnt supports it
+/// (tested in configure.ac). If not it falls back to C lib regcomp/regexec.
+/// Older compilers, such as  pre Gnu 4.9.0, provided only experimental
+/// implementations of regex which are recognized as buggy.
+class StringSanitizer {
+public:
+
+    /// Constructor
+    ///
+    /// Compiles the given character set into a regular expression, and
+    /// retains the given character replacement.  Thereafter, the instance
+    /// may be used to scrub an arbitrary number of strings.
+    ///
+    /// @param char_set string containing a regular expression (POSIX
+    /// extended syntax) that describes the characters to replace.  If you
+    /// wanted to sanitize hostnames for example, you could specify the
+    /// inversion of valid characters "[^A-Za-z0-9_-]".
+    /// @param char_replacement string of one or more characters to use as the
+    /// replacement for invalid characters.
+    /// @throw BadValue if given an invalid regular expression
+    StringSanitizer(const std::string& char_set,
+                    const std::string& char_replacement);
+
+    /// @brief Destructor.
+    ///
+    /// Destroys the implementation instance.
+    ~StringSanitizer();
+
+    /// Returns a scrubbed copy of a given string
+    ///
+    /// Replaces all occurrances of characters described by the regular
+    /// expression with the character replacement .
+    ///
+    /// @param original the string to scrub
+    /// @throw Unexpected if an error occurs during scrubbing
+    std::string scrub(const std::string& original);
+private:
+    /// @brief Pointer to the @c StringSanitizerImpl.
+    StringSanitizerImpl* impl_;
+};
+
+typedef boost::shared_ptr<StringSanitizer> StringSanitizerPtr;
  
  } // namespace str
  } // namespace util
diff --git a/src/lib/util/tests/strutil_unittest.cc b/src/lib/util/tests/strutil_unittest.cc

index 799f91273a0b278a2b64bb91cf8a1e19b45c9418..893409621de296f205cdb9cc0f36264251096a28 100644 (file)
--- a/src/lib/util/tests/strutil_unittest.cc
+++ b/src/lib/util/tests/strutil_unittest.cc
@@ -463,49 +463,81 @@ TEST(StringUtilTest, decodeFormattedHexString) {
                   isc::BadValue);
  }
  
-// Verifies sanitizeString() function
-TEST(StringUtilTest, sanitizeString) {
+/// @brief Fucntion used to test StringSantitizer
+/// @param original - string to sanitize
+/// @param char_set - regular expression string describing invalid
+/// characters
+/// @param char_replacement - character(s) which replace invalid
+/// characters
+/// @param expected - expected sanitized string
+void sanitizeStringTest(
+    const std::string& original,
+    const std::string& char_set,
+    const std::string& char_replacement,
+    const std::string& expected) {
+
+    StringSanitizerPtr ss;
      std::string sanitized;
  
+    try {
+        ss.reset(new StringSanitizer(char_set, char_replacement));
+    } catch (const std::exception& ex) {
+        ADD_FAILURE() << "Could not construct sanitizer:" << ex.what();
+        return;
+    }
+
+    try {
+        sanitized = ss->scrub(original);
+    } catch (const std::exception& ex) {
+        ADD_FAILURE() << "Could not scrub string:" << ex.what();
+        return;
+    }
+
+    EXPECT_EQ(sanitized, expected);
+}
+
+// Verifies StringSantizer class
+TEST(StringUtilTest, stringSanitizer) {
+
      // Bad regular expression should throw.
-    ASSERT_THROW (sanitized = sanitizeString("just a string", "[bogus-regex",""),
-                  BadValue);
+    StringSanitizerPtr ss;
+    ASSERT_THROW (ss.reset(new StringSanitizer("[bogus-regex","")), BadValue);
  
      // List of invalid chars should work: (b,c,2 are invalid)
-    ASSERT_NO_THROW (sanitized = sanitizeString("abc.123", "[b-c2]","*"));
-    EXPECT_EQ(sanitized, "a**.1*3");
+    sanitizeStringTest("abc.123", "[b-c2]", "*",
+                       "a**.1*3");
  
-    // Inverted list for valid chars should work too: (b,c,2 are valid)
-    ASSERT_NO_THROW (sanitized = sanitizeString("abc.123", "[^b-c2]","*"));
-    EXPECT_EQ(sanitized, "*bc**2*");
+    // Inverted list of valid chars should work: (b,c,2 are invalid)
+    sanitizeStringTest("abc.123", "[^b-c2]", "*",
+                       "*bc**2*");
  
      // A string of all valid chars should return an identical string.
-    ASSERT_NO_THROW (sanitized = sanitizeString("-_A--B__Cabc34567_-", "[^A-Ca-c3-7_-]","x"));
-    EXPECT_EQ(sanitized, "-_A--B__Cabc34567_-");
+    sanitizeStringTest("-_A--B__Cabc34567_-", "[^A-Ca-c3-7_-]", "x",
+                       "-_A--B__Cabc34567_-");
  
      // Replacing with a character should work.
-    ASSERT_NO_THROW (sanitized = sanitizeString("A[b]c\12JoE3-_x!B$Y#e", "[^A-Za-z0-9_]","*"));
-    EXPECT_EQ(sanitized, "A*b*c*JoE3*_x*B*Y*e");
+    sanitizeStringTest("A[b]c\12JoE3-_x!B$Y#e", "[^A-Za-z0-9_]", "*",
+                       "A*b*c*JoE3*_x*B*Y*e");
  
      // Removing (i.e.replacing with an "empty" string) should work.
-    ASSERT_NO_THROW (sanitized = sanitizeString("A[b]c\12JoE3-_x!B$Y#e", "[^A-Za-z0-9_]",""));
-    EXPECT_EQ(sanitized, "AbcJoE3_xBYe");
+    sanitizeStringTest("A[b]c\12JoE3-_x!B$Y#e", "[^A-Za-z0-9_]", "",
+                       "AbcJoE3_xBYe");
  
      // More than one non-matching in a row should work.
-    ASSERT_NO_THROW (sanitized = sanitizeString("%%A%%B%%C%%", "[^A-Za-z0-9_]","x"));
-    EXPECT_EQ(sanitized, "xxAxxBxxCxx");
+    sanitizeStringTest("%%A%%B%%C%%", "[^A-Za-z0-9_]", "x",
+                       "xxAxxBxxCxx");
  
      // Removing than one non-matching in a row should work.
-    ASSERT_NO_THROW (sanitized = sanitizeString("%%A%%B%%C%%", "[^A-Za-z0-9_]",""));
-    EXPECT_EQ(sanitized, "ABC");
+    sanitizeStringTest("%%A%%B%%C%%", "[^A-Za-z0-9_]", "",
+                       "ABC");
  
      // Replacing with a string should work.
-    ASSERT_NO_THROW (sanitized = sanitizeString("%%A%%B%%C%%", "[^A-Za-z0-9_]","xyz"));
-    EXPECT_EQ(sanitized, "xyzxyzAxyzxyzBxyzxyzCxyzxyz");
+    sanitizeStringTest("%%A%%B%%C%%", "[^A-Za-z0-9_]", "xyz",
+                       "xyzxyzAxyzxyzBxyzxyzCxyzxyz");
  
      // Dots as valid chars work.
-    ASSERT_NO_THROW (sanitized = sanitizeString("abc.123", "[^A-Za-z0-9_.]","*"));
-    EXPECT_EQ(sanitized, "abc.123");
+    sanitizeStringTest("abc.123", "[^A-Za-z0-9_.]", "*",
+                       "abc.123");
  }
  
  } // end of anonymous namespace
author	Thomas Markwalder <tmark@isc.org>
	Tue, 10 Jul 2018 14:48:09 +0000 (10:48 -0400)
committer	Tomek Mrugalski <tomasz@isc.org>
	Fri, 27 Jul 2018 11:54:10 +0000 (13:54 +0200)
src/lib/dhcpsrv/d2_client_cfg.cc		patch \| blob \| blame \| history
src/lib/dhcpsrv/d2_client_cfg.h		patch \| blob \| blame \| history
src/lib/util/strutil.cc		patch \| blob \| blame \| history
src/lib/util/strutil.h		patch \| blob \| blame \| history
src/lib/util/tests/strutil_unittest.cc		patch \| blob \| blame \| history