AC_DEFINE(HAVE_SA_LEN, 1, [Define to 1 if sockaddr has a sa_len member, and corresponding sin_len and sun_len])],
AC_MSG_RESULT(no))
+AC_MSG_CHECKING(for usuable C++11 regex)
+AC_TRY_RUN([
+#include <regex>
+#include <iostream>
+int main() {
+ const std::regex regex(".*");
+ const std::string string = "This should match!";
+ const auto result = std::regex_search(string, regex);
+ return result ? EXIT_SUCCESS : EXIT_FAILURE;
+}],
+ [AC_MSG_RESULT(yes)
+ AC_DEFINE(USE_REGEX, 1, [Define to 1 if C++11 regex is usable])],
+ AC_MSG_RESULT(no))
+
enable_gtest="no"
GTEST_INCLUDES=
-// Copyright (C) 2011-2017 Internet Systems Consortium, Inc. ("ISC")
+// Copyright (C) 2011-2018 Internet Systems Consortium, Inc. ("ISC")
//
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
#include <boost/algorithm/string/split.hpp>
#include <numeric>
+#include <iostream>
#include <sstream>
-#include <string.h>
+// Early versions of C++11 regex were buggy, use it if we
+// can otherwise, we fall back to regcomp/regexec. For more info see:
+// https://stackoverflow.com/questions/12530406/is-gcc-4-8-or-earlier-buggy-about-regular-expressions
+#ifdef USE_REGEX
+#include <regex>
+#else
+#include <sys/types.h>
+#include <regex.h>
+#endif
+
+#include <string.h>
using namespace std;
}
}
+std::string
+sanitizeString(const std::string& original,
+ const std::string& invalidChars,
+ const std::string& replacement) {
+#ifdef USE_REGEX
+ std::regex rexpr;
+ try {
+ rexpr = std::regex(invalidChars, std::regex::extended);
+ } catch (const std::exception& ex) {
+ isc_throw(isc::BadValue, "invalid regex: '"
+ << invalidChars << "', " << ex.what());
+ }
+
+ std::stringstream result;
+ try {
+ std::regex_replace(std::ostream_iterator<char>(result),
+ original.begin(), original.end(),
+ rexpr, replacement);
+ } catch (const std::exception& ex) {
+ isc_throw(isc::BadValue, "replacing '" << invalidChars << "' with '"
+ << replacement << "' in '" << original << "' failed: ,"
+ << ex.what());
+ }
+
+ return (result.str());
+#else
+ // Compile the expression.
+ regex_t rex;
+ int ec = regcomp(&rex, invalidChars.c_str(), REG_EXTENDED);
+ if (ec) {
+ char errbuf[512] = "";
+ static_cast<void>(regerror(ec, &rex, errbuf, sizeof(errbuf)));
+ isc_throw(isc::BadValue, "invalid regex: '" << invalidChars
+ << "', " << errbuf);
+ }
+
+ // Iterate over original string, match by match.
+ const char* origStr = original.c_str();
+ const char* startFrom = origStr;
+ const char* endAt = origStr + strlen(origStr);
+ regmatch_t matches[2]; // n matches + 1
+ stringstream result;
+
+ while (startFrom < endAt) {
+ // Look for the next match
+ if (regexec(&rex, startFrom, 1, matches, 0) == REG_NOMATCH) {
+ // No matches, so add in the remainder
+ result << startFrom;
+ break;
+ }
+
+ // Shouldn't happen, but one never knows eh?
+ if (matches[0].rm_so == -1) {
+ isc_throw(isc::Unexpected, "matched but so is -1?");
+ }
+
+ // Add everything from starting point up to the current match
+ const char* matchAt = startFrom + matches[0].rm_so;
+ while (startFrom < matchAt) {
+ result << *startFrom;
+ ++startFrom;
+ }
+
+ // Add in the replacement
+ result << replacement;
+
+ // Move past the match.
+ ++startFrom;
+ }
+
+ regfree(&rex);
+ return (result.str());
+#endif
+}
+
} // namespace str
} // namespace util
} // namespace isc
decodeFormattedHexString(const std::string& hex_string,
std::vector<uint8_t>& binary);
+/// \brief Replaces all occurences of a character set in a string
+///
+/// This function runs a given string through a regular expression,
+/// replacing all "matches" of that expression with the specified string.
+///
+/// \param original the string to sanitize
+/// \param invalidChars string containing a regular expression (POSIX
+/// extended syntax) that describes the characters to replace. If you
+/// wanted to sanitize hostnames for example, you could specify the
+/// inversion of valid characters "[^A-Za-z0-9_-]".
+/// \param replacement string of one or more characters to use as the
+/// replacement for invalid characters.
+/// \return the new, sanitized string
+/// \throw BadValue if given an invalid regular expression, Unexpected if
+/// an error occurs executing the expression
+std::string
+sanitizeString(const std::string& original,
+ const std::string& invalidChars,
+ const std::string& replacement);
} // namespace str
} // namespace util
-// Copyright (C) 2011-2017 Internet Systems Consortium, Inc. ("ISC")
+// Copyright (C) 2011-2018 Internet Systems Consortium, Inc. ("ISC")
//
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
isc::BadValue);
}
+// Verifies sanitizeString() function
+TEST(StringUtilTest, sanitizeString) {
+ std::string sanitized;
+
+ // Bad regular expression should throw.
+ ASSERT_THROW (sanitized = sanitizeString("just a string", "[bogus-regex",""),
+ BadValue);
+
+ // A string of all valid chars should return an identical string.
+ ASSERT_NO_THROW (sanitized = sanitizeString("-_A--B__Cabc34567_-", "[^A-Ca-c3-7_-]","x"));
+ EXPECT_EQ(sanitized, "-_A--B__Cabc34567_-");
+
+ // Replacing with a character should work.
+ ASSERT_NO_THROW (sanitized = sanitizeString("A[b]c\12JoE3-_x!B$Y#e", "[^A-Za-z0-9_]","*"));
+ EXPECT_EQ(sanitized, "A*b*c*JoE3*_x*B*Y*e");
+
+ // Removing (i.e.replacing with an "empty" string) should work.
+ ASSERT_NO_THROW (sanitized = sanitizeString("A[b]c\12JoE3-_x!B$Y#e", "[^A-Za-z0-9_]",""));
+ EXPECT_EQ(sanitized, "AbcJoE3_xBYe");
+
+ // More than one non-matching in a row should work.
+ ASSERT_NO_THROW (sanitized = sanitizeString("%%A%%B%%C%%", "[^A-Za-z0-9_]","x"));
+ EXPECT_EQ(sanitized, "xxAxxBxxCxx");
+
+ // Removing than one non-matching in a row should work.
+ ASSERT_NO_THROW (sanitized = sanitizeString("%%A%%B%%C%%", "[^A-Za-z0-9_]",""));
+ EXPECT_EQ(sanitized, "ABC");
+
+ // Replacing with a string should work.
+ ASSERT_NO_THROW (sanitized = sanitizeString("%%A%%B%%C%%", "[^A-Za-z0-9_]","xyz"));
+ EXPECT_EQ(sanitized, "xyzxyzAxyzxyzBxyzxyzCxyzxyz");
+}
+
} // end of anonymous namespace