From: Andrei Pavel Date: Mon, 4 Mar 2024 09:49:45 +0000 (+0200) Subject: [#3210] refactor string utilities X-Git-Tag: Kea-2.5.7~16 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=260ad292e87491476c7f34c111d76ef612f151e4;p=thirdparty%2Fkea.git [#3210] refactor string utilities - Rename util/strutil.h to util/str.h to escape redundancy. - Simplify trim function. - Remove unused functions. - Get rid of the regex conditional compilation that helped Kea build with ancient compilers. Lack of proper regex functionality now results in failure in configure.ac. --- diff --git a/configure.ac b/configure.ac index cf289880f4..453a33315d 100644 --- a/configure.ac +++ b/configure.ac @@ -667,8 +667,8 @@ int main() { # When cross-compiling we don't have any way to check if regex is # usable or not. # Let's be optimistic and assume it is by testing only the negative case. -if test "x$usable_regex" != "xno" ; then - AC_DEFINE(USE_REGEX, 1, [Define to 1 if C++11 regex is usable]) +if test "${usable_regex}" = 'no'; then + AC_MSG_ERROR([Need proper regex functionality.])] fi # Check for NETCONF support. If NETCONF was enabled in the build, and this check diff --git a/src/bin/dhcp4/dhcp4_srv.cc b/src/bin/dhcp4/dhcp4_srv.cc index 7c75edde28..cead6efbfb 100644 --- a/src/bin/dhcp4/dhcp4_srv.cc +++ b/src/bin/dhcp4/dhcp4_srv.cc @@ -49,7 +49,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/bin/dhcp4/json_config_parser.cc b/src/bin/dhcp4/json_config_parser.cc index 6164c3c7d9..0b5c90c6a5 100644 --- a/src/bin/dhcp4/json_config_parser.cc +++ b/src/bin/dhcp4/json_config_parser.cc @@ -42,7 +42,7 @@ #include #include #include -#include +#include #include #include diff --git a/src/bin/dhcp6/json_config_parser.cc b/src/bin/dhcp6/json_config_parser.cc index 8fd4e15743..62d8df4239 100644 --- a/src/bin/dhcp6/json_config_parser.cc +++ b/src/bin/dhcp6/json_config_parser.cc @@ -45,7 +45,7 @@ #include #include #include -#include +#include #include #include diff --git a/src/hooks/dhcp/flex_option/flex_option.cc b/src/hooks/dhcp/flex_option/flex_option.cc index f32d429d0d..5313216ad2 100644 --- a/src/hooks/dhcp/flex_option/flex_option.cc +++ b/src/hooks/dhcp/flex_option/flex_option.cc @@ -8,7 +8,7 @@ #include #include -#include +#include #include #include #include diff --git a/src/hooks/dhcp/flex_option/flex_option.h b/src/hooks/dhcp/flex_option/flex_option.h index 0bdbdce568..1d60a85920 100644 --- a/src/hooks/dhcp/flex_option/flex_option.h +++ b/src/hooks/dhcp/flex_option/flex_option.h @@ -17,7 +17,7 @@ #include #include #include -#include +#include #include #include diff --git a/src/hooks/dhcp/high_availability/ha_config.cc b/src/hooks/dhcp/high_availability/ha_config.cc index a8d70e27e1..89d3f8a0a1 100644 --- a/src/hooks/dhcp/high_availability/ha_config.cc +++ b/src/hooks/dhcp/high_availability/ha_config.cc @@ -14,7 +14,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/hooks/dhcp/lease_cmds/lease_cmds.cc b/src/hooks/dhcp/lease_cmds/lease_cmds.cc index 534d6f5516..7c6ebf0579 100644 --- a/src/hooks/dhcp/lease_cmds/lease_cmds.cc +++ b/src/hooks/dhcp/lease_cmds/lease_cmds.cc @@ -29,7 +29,7 @@ #include #include #include -#include +#include #include #include diff --git a/src/lib/cc/server_tag.cc b/src/lib/cc/server_tag.cc index 80e5184ec5..94e4ce0f84 100644 --- a/src/lib/cc/server_tag.cc +++ b/src/lib/cc/server_tag.cc @@ -8,7 +8,7 @@ #include #include -#include +#include #include namespace isc { diff --git a/src/lib/database/database_connection.cc b/src/lib/database/database_connection.cc index fca514d7da..607c706376 100644 --- a/src/lib/database/database_connection.cc +++ b/src/lib/database/database_connection.cc @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include #include diff --git a/src/lib/dhcp/classify.cc b/src/lib/dhcp/classify.cc index 6803afb363..f42a9e3710 100644 --- a/src/lib/dhcp/classify.cc +++ b/src/lib/dhcp/classify.cc @@ -8,7 +8,7 @@ #include #include -#include +#include #include #include diff --git a/src/lib/dhcp/duid.h b/src/lib/dhcp/duid.h index 2ec1b163d2..2239c232c2 100644 --- a/src/lib/dhcp/duid.h +++ b/src/lib/dhcp/duid.h @@ -8,10 +8,14 @@ #define DUID_H #include -#include -#include +#include + +#include +#include #include -#include + +#include + #include namespace isc { diff --git a/src/lib/dhcp/duid_factory.cc b/src/lib/dhcp/duid_factory.cc index 43c33638dc..ea1ad8ca77 100644 --- a/src/lib/dhcp/duid_factory.cc +++ b/src/lib/dhcp/duid_factory.cc @@ -11,7 +11,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/lib/dhcp/hwaddr.cc b/src/lib/dhcp/hwaddr.cc index 9f51fb891e..a1b3680257 100644 --- a/src/lib/dhcp/hwaddr.cc +++ b/src/lib/dhcp/hwaddr.cc @@ -9,7 +9,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/lib/dhcp/option4_client_fqdn.cc b/src/lib/dhcp/option4_client_fqdn.cc index 17b804b57b..ae2d5b62d9 100644 --- a/src/lib/dhcp/option4_client_fqdn.cc +++ b/src/lib/dhcp/option4_client_fqdn.cc @@ -11,7 +11,7 @@ #include #include #include -#include +#include #include namespace isc { diff --git a/src/lib/dhcp/option4_dnr.h b/src/lib/dhcp/option4_dnr.h index 7c498ceab1..02893d4598 100644 --- a/src/lib/dhcp/option4_dnr.h +++ b/src/lib/dhcp/option4_dnr.h @@ -16,7 +16,7 @@ #include #include #include -#include +#include #include #include diff --git a/src/lib/dhcp/option6_client_fqdn.cc b/src/lib/dhcp/option6_client_fqdn.cc index dd28599fa4..ba4d056425 100644 --- a/src/lib/dhcp/option6_client_fqdn.cc +++ b/src/lib/dhcp/option6_client_fqdn.cc @@ -11,7 +11,7 @@ #include #include #include -#include +#include #include namespace isc { diff --git a/src/lib/dhcp/option_classless_static_route.cc b/src/lib/dhcp/option_classless_static_route.cc index cb4ed0cf13..6d95df9749 100644 --- a/src/lib/dhcp/option_classless_static_route.cc +++ b/src/lib/dhcp/option_classless_static_route.cc @@ -7,7 +7,7 @@ #include #include -#include +#include #include diff --git a/src/lib/dhcp/option_data_types.cc b/src/lib/dhcp/option_data_types.cc index 0308d8c6a3..6be31e8452 100644 --- a/src/lib/dhcp/option_data_types.cc +++ b/src/lib/dhcp/option_data_types.cc @@ -9,7 +9,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/lib/dhcp/option_definition.cc b/src/lib/dhcp/option_definition.cc index 54d569b660..0c9cac9120 100644 --- a/src/lib/dhcp/option_definition.cc +++ b/src/lib/dhcp/option_definition.cc @@ -31,7 +31,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/lib/dhcp/option_string.cc b/src/lib/dhcp/option_string.cc index 5f1d0d4d69..90918d9985 100644 --- a/src/lib/dhcp/option_string.cc +++ b/src/lib/dhcp/option_string.cc @@ -7,7 +7,7 @@ #include #include -#include +#include #include namespace isc { diff --git a/src/lib/dhcp/tests/option_classless_static_route_unittest.cc b/src/lib/dhcp/tests/option_classless_static_route_unittest.cc index 0b99b26104..8eb2775d4a 100644 --- a/src/lib/dhcp/tests/option_classless_static_route_unittest.cc +++ b/src/lib/dhcp/tests/option_classless_static_route_unittest.cc @@ -7,7 +7,7 @@ #include #include -#include +#include #include diff --git a/src/lib/dhcpsrv/cfg_duid.cc b/src/lib/dhcpsrv/cfg_duid.cc index 24be637ab7..831772d429 100644 --- a/src/lib/dhcpsrv/cfg_duid.cc +++ b/src/lib/dhcpsrv/cfg_duid.cc @@ -9,7 +9,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/lib/dhcpsrv/cfg_iface.cc b/src/lib/dhcpsrv/cfg_iface.cc index 78e61ba23d..3991e22396 100644 --- a/src/lib/dhcpsrv/cfg_iface.cc +++ b/src/lib/dhcpsrv/cfg_iface.cc @@ -11,7 +11,7 @@ #include #include #include -#include +#include #include #include diff --git a/src/lib/dhcpsrv/host.cc b/src/lib/dhcpsrv/host.cc index 4390dba322..5d45192ee4 100644 --- a/src/lib/dhcpsrv/host.cc +++ b/src/lib/dhcpsrv/host.cc @@ -14,7 +14,7 @@ #include #include -#include +#include #include #include diff --git a/src/lib/dhcpsrv/lease.cc b/src/lib/dhcpsrv/lease.cc index 9dad4083d1..651b9f58b7 100644 --- a/src/lib/dhcpsrv/lease.cc +++ b/src/lib/dhcpsrv/lease.cc @@ -10,7 +10,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/lib/dhcpsrv/parsers/base_network_parser.cc b/src/lib/dhcpsrv/parsers/base_network_parser.cc index 60717c295b..c70c44d36a 100644 --- a/src/lib/dhcpsrv/parsers/base_network_parser.cc +++ b/src/lib/dhcpsrv/parsers/base_network_parser.cc @@ -9,7 +9,7 @@ #include #include #include -#include +#include using namespace isc::data; using namespace isc::util; diff --git a/src/lib/dhcpsrv/parsers/dhcp_parsers.cc b/src/lib/dhcpsrv/parsers/dhcp_parsers.cc index 78077b5986..5fc899f6f1 100644 --- a/src/lib/dhcpsrv/parsers/dhcp_parsers.cc +++ b/src/lib/dhcpsrv/parsers/dhcp_parsers.cc @@ -19,7 +19,7 @@ #include #include #include -#include +#include #include #include diff --git a/src/lib/dhcpsrv/parsers/option_data_parser.cc b/src/lib/dhcpsrv/parsers/option_data_parser.cc index 8ec49a0309..cd544d5833 100644 --- a/src/lib/dhcpsrv/parsers/option_data_parser.cc +++ b/src/lib/dhcpsrv/parsers/option_data_parser.cc @@ -16,7 +16,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/lib/dhcpsrv/srv_config.cc b/src/lib/dhcpsrv/srv_config.cc index 763e678e9a..30a6ad8124 100644 --- a/src/lib/dhcpsrv/srv_config.cc +++ b/src/lib/dhcpsrv/srv_config.cc @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include diff --git a/src/lib/dhcpsrv/srv_config.h b/src/lib/dhcpsrv/srv_config.h index 8489249242..c329eb6421 100644 --- a/src/lib/dhcpsrv/srv_config.h +++ b/src/lib/dhcpsrv/srv_config.h @@ -31,7 +31,7 @@ #include #include #include -#include +#include #include #include diff --git a/src/lib/dhcpsrv/tests/d2_client_unittest.cc b/src/lib/dhcpsrv/tests/d2_client_unittest.cc index 98efdadced..6bab3abeed 100644 --- a/src/lib/dhcpsrv/tests/d2_client_unittest.cc +++ b/src/lib/dhcpsrv/tests/d2_client_unittest.cc @@ -10,7 +10,7 @@ #include #include #include -#include +#include #include #include diff --git a/src/lib/hooks/hooks_parser.cc b/src/lib/hooks/hooks_parser.cc index 32296214dc..56029896fe 100644 --- a/src/lib/hooks/hooks_parser.cc +++ b/src/lib/hooks/hooks_parser.cc @@ -10,7 +10,7 @@ #include #include #include -#include +#include #include using namespace std; diff --git a/src/lib/http/basic_auth_config.cc b/src/lib/http/basic_auth_config.cc index 1e6a727094..84c0c548ec 100644 --- a/src/lib/http/basic_auth_config.cc +++ b/src/lib/http/basic_auth_config.cc @@ -9,7 +9,7 @@ #include #include #include -#include +#include using namespace isc; using namespace isc::data; diff --git a/src/lib/http/http_header.cc b/src/lib/http/http_header.cc index a543e664b2..555eb3caa8 100644 --- a/src/lib/http/http_header.cc +++ b/src/lib/http/http_header.cc @@ -8,7 +8,7 @@ #include #include -#include +#include #include namespace isc { diff --git a/src/lib/log/compiler/message.cc b/src/lib/log/compiler/message.cc index 91a582f958..81f408742b 100644 --- a/src/lib/log/compiler/message.cc +++ b/src/lib/log/compiler/message.cc @@ -22,7 +22,7 @@ #include #include -#include +#include #include #include diff --git a/src/lib/log/logger.cc b/src/lib/log/logger.cc index e036a096a8..e8e0f74a98 100644 --- a/src/lib/log/logger.cc +++ b/src/lib/log/logger.cc @@ -16,7 +16,7 @@ #include #include -#include +#include using namespace std; diff --git a/src/lib/log/logger_impl.cc b/src/lib/log/logger_impl.cc index d5ca88fd3a..b5e9c8dc8e 100644 --- a/src/lib/log/logger_impl.cc +++ b/src/lib/log/logger_impl.cc @@ -38,7 +38,7 @@ #include #include -#include +#include // Note: as log4cplus and the Kea logger have many concepts in common, and // thus many similar names, to disambiguate types we don't "use" the log4cplus diff --git a/src/lib/log/message_reader.cc b/src/lib/log/message_reader.cc index 2b48608733..bf797d9ffa 100644 --- a/src/lib/log/message_reader.cc +++ b/src/lib/log/message_reader.cc @@ -16,7 +16,7 @@ #include #include #include -#include +#include using namespace std; diff --git a/src/lib/log/tests/logger_example.cc b/src/lib/log/tests/logger_example.cc index ff3d512296..86e9b76431 100644 --- a/src/lib/log/tests/logger_example.cc +++ b/src/lib/log/tests/logger_example.cc @@ -24,7 +24,7 @@ #include #include -#include +#include #include #include diff --git a/src/lib/process/d_cfg_mgr.cc b/src/lib/process/d_cfg_mgr.cc index 24ead16d3b..72bc4dea56 100644 --- a/src/lib/process/d_cfg_mgr.cc +++ b/src/lib/process/d_cfg_mgr.cc @@ -13,7 +13,7 @@ #include #include #include -#include +#include #include #include diff --git a/src/lib/tcp/tcp_connection.cc b/src/lib/tcp/tcp_connection.cc index 0445b35a90..a3fa375bfd 100644 --- a/src/lib/tcp/tcp_connection.cc +++ b/src/lib/tcp/tcp_connection.cc @@ -11,7 +11,7 @@ #include #include #include -#include +#include #include #include diff --git a/src/lib/tcp/tcp_stream_msg.cc b/src/lib/tcp/tcp_stream_msg.cc index 89fd5e8247..64332af840 100644 --- a/src/lib/tcp/tcp_stream_msg.cc +++ b/src/lib/tcp/tcp_stream_msg.cc @@ -7,7 +7,7 @@ #include #include -#include +#include #include #include diff --git a/src/lib/util/Makefile.am b/src/lib/util/Makefile.am index 828e0fe7ed..90cb5e7118 100644 --- a/src/lib/util/Makefile.am +++ b/src/lib/util/Makefile.am @@ -32,7 +32,7 @@ libkea_util_la_SOURCES += staged_value.h libkea_util_la_SOURCES += state_model.cc state_model.h libkea_util_la_SOURCES += stopwatch.cc stopwatch.h libkea_util_la_SOURCES += stopwatch_impl.cc stopwatch_impl.h -libkea_util_la_SOURCES += strutil.h strutil.cc +libkea_util_la_SOURCES += str.h str.cc libkea_util_la_SOURCES += thread_pool.h libkea_util_la_SOURCES += triplet.h libkea_util_la_SOURCES += unlock_guard.h @@ -77,7 +77,7 @@ libkea_util_include_HEADERS = \ state_model.h \ stopwatch.h \ stopwatch_impl.h \ - strutil.h \ + str.h \ thread_pool.h \ triplet.h \ unlock_guard.h \ diff --git a/src/lib/util/str.cc b/src/lib/util/str.cc new file mode 100644 index 0000000000..9c3a3b857a --- /dev/null +++ b/src/lib/util/str.cc @@ -0,0 +1,345 @@ +// Copyright (C) 2011-2024 Internet Systems Consortium, Inc. ("ISC") +// +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +using namespace std; + +namespace isc { +namespace util { +namespace str { + +string +trim(const string& input) { + if (input.empty()) { + return string(); + } + static const char* blanks = " \t\n"; + + // Search for first non-blank character in the string. + size_t const first(input.find_first_not_of(blanks)); + if (first == string::npos) { + return string(); + } + + // String not all blanks, so look for last character. + size_t const last(input.find_last_not_of(blanks)); + + // Extract the trimmed substring. + return input.substr(first, (last - first + 1)); +} + +vector +tokens(const string& text, const string& delim, bool escape) { + vector result; + string token; + bool in_token = false; + bool escaped = false; + for (auto const& c : text) { + if (delim.find(c) != string::npos) { + // Current character is a delimiter + if (!in_token) { + // Two or more delimiters, eat them + } else if (escaped) { + // Escaped delimiter in a token: reset escaped and keep it + escaped = false; + token.push_back(c); + } else { + // End of the current token: save it if not empty + if (!token.empty()) { + result.push_back(token); + } + // Reset state + in_token = false; + token.clear(); + } + } else if (escape && (c == '\\')) { + // Current character is the escape character + if (!in_token) { + // The escape character is the first character of a new token + in_token = true; + } + if (escaped) { + // Escaped escape: reset escaped and keep one character + escaped = false; + token.push_back(c); + } else { + // Remember to keep the next character + escaped = true; + } + } else { + // Not a delimiter nor an escape + if (!in_token) { + // First character of a new token + in_token = true; + } + if (escaped) { + // Escaped common character: as escape was false + escaped = false; + token.push_back('\\'); + token.push_back(c); + } else { + // The common case: keep it + token.push_back(c); + } + } + } + // End of input: close and save the current token if not empty + if (escaped) { + // Pending escape + token.push_back('\\'); + } + if (!token.empty()) { + result.push_back(token); + } + + return (result); +} + +char +toUpper(char const chr) { + return (toupper(chr)); +} + +void +uppercase(string& text) { + transform(text.begin(), text.end(), text.begin(), toUpper); +} + +char +toLower(char const chr) { + return (tolower(static_cast(chr))); +} + +void +lowercase(string& text) { + transform(text.begin(), text.end(), text.begin(), toLower); +} + +vector +quotedStringToBinary(const string& quoted_string) { + vector binary; + // Remove whitespace before and after the quotes. + string trimmed_string = trim(quoted_string); + + // We require two quote characters, so the length of the string must be + // equal to 2 at minimum, and it must start and end with quotes. + if ((trimmed_string.length() > 1) && + ((trimmed_string[0] == '\'') && (trimmed_string[trimmed_string.length() - 1] == '\''))) { + // Remove quotes and trim the text inside the quotes. + trimmed_string = trim(trimmed_string.substr(1, trimmed_string.length() - 2)); + // Copy string contents into the vector. + binary.assign(trimmed_string.begin(), trimmed_string.end()); + } + // Return resulting vector or empty vector. + return (binary); +} + +void +decodeColonSeparatedHexString(const string& hex_string, vector& binary) { + decodeSeparatedHexString(hex_string, ":", binary); +} + +void +decodeSeparatedHexString(const string& hex_string, const string& sep, vector& binary) { + vector split_text; + boost::split(split_text, hex_string, boost::is_any_of(sep), + boost::algorithm::token_compress_off); + + vector binary_vec; + for (size_t i = 0; i < split_text.size(); ++i) { + // If there are multiple tokens and the current one is empty, it + // means that two consecutive colons were specified. This is not + // allowed. + if ((split_text.size() > 1) && split_text[i].empty()) { + isc_throw(BadValue, "two consecutive separators ('" + << sep << "') specified in a decoded string '" << hex_string + << "'"); + + // Between a colon we expect at most two characters. + } else if (split_text[i].size() > 2) { + isc_throw(BadValue, "invalid format of the decoded string" + << " '" << hex_string << "'"); + + } else if (!split_text[i].empty()) { + stringstream s; + s << "0x"; + + for (unsigned int j = 0; j < split_text[i].length(); ++j) { + // Check if we're dealing with hexadecimal digit. + if (!isxdigit(split_text[i][j])) { + isc_throw(BadValue, "'" << split_text[i][j] + << "' is not a valid hexadecimal digit in" + << " decoded string '" << hex_string << "'"); + } + s << split_text[i][j]; + } + + // The stream should now have one or two hexadecimal digits. + // Let's convert it to a number and store in a temporary + // vector. + unsigned int binary_value; + s >> hex >> binary_value; + + binary_vec.push_back(static_cast(binary_value)); + } + } + + // All ok, replace the data in the output vector with a result. + binary.swap(binary_vec); +} + +void +decodeFormattedHexString(const string& hex_string, vector& binary) { + // If there is at least one colon we assume that the string + // comprises octets separated by colons (e.g. MAC address notation). + if (hex_string.find(':') != string::npos) { + decodeSeparatedHexString(hex_string, ":", binary); + } else if (hex_string.find(' ') != string::npos) { + decodeSeparatedHexString(hex_string, " ", binary); + } else { + ostringstream s; + + // If we have odd number of digits we'll have to prepend '0'. + if (hex_string.length() % 2 != 0) { + s << "0"; + } + + // It is ok to use '0x' prefix in a string. + if ((hex_string.length() > 2) && (hex_string.substr(0, 2) == "0x")) { + // Exclude '0x' from the decoded string. + s << hex_string.substr(2); + + } else { + // No '0x', so decode the whole string. + s << hex_string; + } + + try { + // Decode the hex string. + encode::decodeHex(s.str(), binary); + + } catch (...) { + isc_throw(BadValue, "'" << hex_string + << "' is not a valid" + " string of hexadecimal digits"); + } + } +} + +class StringSanitizerImpl { +public: + /// @brief Constructor. + StringSanitizerImpl(const string& char_set, const string& char_replacement) + : char_set_(char_set), char_replacement_(char_replacement) { + if (char_set.size() > StringSanitizer::MAX_DATA_SIZE) { + isc_throw(BadValue, "char set size: '" << char_set.size() << "' exceeds max size: '" + << StringSanitizer::MAX_DATA_SIZE << "'"); + } + + if (char_replacement.size() > StringSanitizer::MAX_DATA_SIZE) { + isc_throw(BadValue, "char replacement size: '" + << char_replacement.size() << "' exceeds max size: '" + << StringSanitizer::MAX_DATA_SIZE << "'"); + } + try { + scrub_exp_ = regex(char_set, regex::extended); + } catch (const exception& ex) { + isc_throw(BadValue, "invalid regex: '" << char_set_ << "', " << ex.what()); + } + } + + string scrub(const string& original) { + stringstream result; + try { + regex_replace(ostream_iterator(result), original.begin(), original.end(), + scrub_exp_, char_replacement_); + } catch (const exception& ex) { + isc_throw(BadValue, "replacing '" << char_set_ << "' with '" << char_replacement_ + << "' in '" << original << "' failed: ," + << ex.what()); + } + + return (result.str()); + } + +private: + /// @brief The char set data for regex. + string char_set_; + + /// @brief The char replacement data for regex. + string char_replacement_; + + regex scrub_exp_; +}; + +// @note The regex engine is implemented using recursion and can cause +// stack overflow if the input data is too large. An arbitrary size of +// 4096 should be enough for all cases. +const uint32_t StringSanitizer::MAX_DATA_SIZE = 4096; + +StringSanitizer::StringSanitizer(const string& char_set, const string& char_replacement) + : impl_(new StringSanitizerImpl(char_set, char_replacement)) { +} + +string +StringSanitizer::scrub(const string& original) { + return (impl_->scrub(original)); +} + +bool +isPrintable(const string& content) { + for (char const ch : content) { + if (isprint(ch) == 0) { + return (false); + } + } + return (true); +} + +bool +isPrintable(const vector& content) { + for (uint8_t const ch : content) { + if (isprint(ch) == 0) { + return (false); + } + } + return (true); +} + +string +dumpAsHex(const uint8_t* data, size_t length) { + stringstream output; + for (unsigned int i = 0; i < length; i++) { + if (i) { + output << ":"; + } + + output << setfill('0') << setw(2) << hex << static_cast(data[i]); + } + + return (output.str()); +} + +} // namespace str +} // namespace util +} // namespace isc diff --git a/src/lib/util/strutil.h b/src/lib/util/str.h similarity index 52% rename from src/lib/util/strutil.h rename to src/lib/util/str.h index 8f3cd13ae8..1e5d4c405f 100644 --- a/src/lib/util/strutil.h +++ b/src/lib/util/str.h @@ -4,19 +4,20 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at http://mozilla.org/MPL/2.0/. -#ifndef STRUTIL_H -#define STRUTIL_H +#ifndef KEA_UTIL_STR_H +#define KEA_UTIL_STR_H + +#include #include -#include -#include -#include -#include +#include +#include +#include #include +#include #include -#include + #include -#include namespace isc { namespace util { @@ -27,59 +28,52 @@ namespace str { /// /// @brief A standard string util exception that is thrown if getToken or /// numToToken are called with bad input data -/// class StringTokenError : public Exception { public: - StringTokenError(const char* file, size_t line, const char* what) : - isc::Exception(file, line, what) {} + StringTokenError(const char* file, size_t line, const char* what) + : isc::Exception(file, line, what) { + } }; -/// @brief Normalize Backslash -/// -/// Only relevant to Windows, this replaces all "\" in a string with "/" -/// and returns the result. On other systems it is a no-op. Note -/// that Windows does recognize file names with the "\" replaced by "/" -/// (at least in system calls, if not the command line). -/// -/// @param name Name to be substituted -void normalizeSlash(std::string& name); - -/// @brief Trim Leading and Trailing Spaces +/// @brief Trim leading and trailing spaces. /// /// Returns a copy of the input string but with any leading or trailing spaces /// or tabs removed. /// -/// @param instring Input string to modify +/// @param input Input string to modify. /// -/// @return String with leading and trailing spaces removed -std::string trim(const std::string& instring); +/// @return String with leading and trailing spaces removed. +std::string +trim(const std::string& input); /// @brief Finds the "trimmed" end of a buffer /// /// Works backward from the end of the buffer, looking for the first /// character not equal to the trim value, and returns an iterator -/// pointing that that position. +/// pointing to that position. /// /// @param begin - Forward iterator pointing to the beginning of the -/// buffer to trim +/// buffer to trim. /// @param end - Forward iterator pointing to the untrimmed end of -/// the buffer to trim +/// the buffer to trim. /// @param trim_val - byte value to trim off /// /// @return Iterator pointing the first character from the end of the -/// buffer not equal to the trim value -template +/// buffer not equal to the trim value. +template Iterator -seekTrimmed(Iterator begin, Iterator end, uint8_t trim_val) { - for (; end != begin && *(end - 1) == trim_val; --end); +seekTrimmed(Iterator const& begin, Iterator end, uint8_t const trim_val) { + while (end != begin && *(end - 1) == trim_val) { + --end; + } return (end); } -/// @brief Split String into Tokens +/// @brief Split string into tokens. /// /// Splits a string into tokens (the tokens being delimited by one or more of -/// the delimiter characters) and returns the tokens in a vector array. Note -/// that adjacent delimiters are considered to be a single delimiter. +/// the delimiter characters) and returns the tokens in a vector. +/// Adjacent delimiters are considered to be a single delimiter. /// /// Special cases are: /// -# The empty string is considered to be zero tokens. @@ -102,120 +96,46 @@ seekTrimmed(Iterator begin, Iterator end, uint8_t trim_val) { /// @param escape Use backslash to escape delimiter characters /// /// @return Vector of tokens. -std::vector tokens(const std::string& text, - const std::string& delim = std::string(" \t\n"), - bool escape = false); +std::vector +tokens(const std::string& text, const std::string& delim = " \t\n", bool escape = false); -/// @brief Uppercase Character +/// @brief Convert character to uppercase. /// -/// Used in uppercase() to pass as an argument to std::transform(). The -/// function std::toupper() can't be used as it takes an "int" as its argument; +/// Used in uppercase() to pass as a parameter to std::transform(). The +/// function std::toupper() can't be used as it takes an "int" as its parameter; /// this confuses the template expansion mechanism because dereferencing a /// string::iterator returns a char. /// /// @param chr Character to be upper-cased. /// -/// @return Uppercase version of the argument -inline char toUpper(char chr) { - return (static_cast(std::toupper(static_cast(chr)))); -} +/// @return Uppercase version of the input character. +char +toUpper(char const chr); -/// @brief Uppercase String -/// -/// A convenience function to uppercase a string. +/// @brief Convert string to uppercase. /// /// @param text String to be upper-cased. -inline void uppercase(std::string& text) { - std::transform(text.begin(), text.end(), text.begin(), - isc::util::str::toUpper); -} +void +uppercase(std::string& text); -/// @brief Lowercase Character +/// @brief Convert character to lowercase. /// -/// Used in lowercase() to pass as an argument to std::transform(). The -/// function std::tolower() can't be used as it takes an "int" as its argument; +/// Used in lowercase() to pass as a parameter to std::transform(). The +/// function std::tolower() can't be used as it takes an "int" as its parameter; /// this confuses the template expansion mechanism because dereferencing a /// string::iterator returns a char. /// /// @param chr Character to be lower-cased. /// -/// @return Lowercase version of the argument -inline char toLower(char chr) { - return (static_cast(std::tolower(static_cast(chr)))); -} +/// @return Lowercase version of the input character. +char +toLower(char const chr); -/// @brief Lowercase String -/// -/// A convenience function to lowercase a string +/// @brief Convert string to lowercase. /// /// @param text String to be lower-cased. -inline void lowercase(std::string& text) { - std::transform(text.begin(), text.end(), text.begin(), - isc::util::str::toLower); -} - -/// @brief Apply Formatting -/// -/// Given a printf-style format string containing only "%s" place holders -/// (others are ignored) and a vector of strings, this produces a single string -/// with the placeholders replaced. -/// -/// @param format Format string -/// @param args Vector of argument strings -/// -/// @return Resultant string -std::string format(const std::string& format, - const std::vector& args); - - -/// @brief Returns one token from the given stringstream -/// -/// Using the >> operator, with basic error checking -/// -/// @throw StringTokenError if the token cannot be read from the stream -/// -/// @param iss stringstream to read one token from -/// -/// @return the first token read from the stringstream -std::string getToken(std::istringstream& iss); - -/// @brief Converts a string token to an *unsigned* integer. -/// -/// The value is converted using a lexical cast, with error and bounds -/// checking. -/// -/// NumType is a *signed* integral type (e.g. int32_t) that is sufficiently -/// wide to store resulting integers. -/// -/// BitSize is the maximum number of bits that the resulting integer can take. -/// This function first checks whether the given token can be converted to -/// an integer of NumType type. It then confirms the conversion result is -/// within the valid range, i.e., [0, 2^BitSize - 1]. The second check is -/// necessary because lexical_cast where T is an unsigned integer type -/// doesn't correctly reject negative numbers when compiled with SunStudio. -/// -/// @throw StringTokenError if the value is out of range, or if it -/// could not be converted -/// -/// @param num_token the string token to convert -/// -/// @return the converted value, of type NumType -template -NumType -tokenToNum(const std::string& num_token) { - NumType num; - try { - num = boost::lexical_cast(num_token); - } catch (const boost::bad_lexical_cast&) { - isc_throw(StringTokenError, "Invalid SRV numeric parameter: " << - num_token); - } - if (num < 0 || num >= (static_cast(1) << BitSize)) { - isc_throw(StringTokenError, "Numeric SRV parameter out of range: " << - num); - } - return (num); -} +void +lowercase(std::string& text); /// @brief Converts a string in quotes into vector. /// @@ -263,16 +183,12 @@ decodeSeparatedHexString(const std::string& hex_string, /// @brief Converts a string of hexadecimal digits with colons into /// a vector. /// -/// Convenience method which calls @c decodeSeparatedHexString() passing -/// in a colon for the separator. - /// @param hex_string Input string. /// @param binary Vector receiving converted string into binary. /// /// @throw isc::BadValue if the format of the input string is invalid. void -decodeColonSeparatedHexString(const std::string& hex_string, - std::vector& binary); +decodeColonSeparatedHexString(const std::string& hex_string, std::vector& binary); /// @brief Converts a formatted string of hexadecimal digits into /// a vector. @@ -293,24 +209,17 @@ decodeColonSeparatedHexString(const std::string& hex_string, /// /// @throw isc::BadValue if the format of the input string is invalid. void -decodeFormattedHexString(const std::string& hex_string, - std::vector& binary); +decodeFormattedHexString(const std::string& hex_string, std::vector& binary); /// @brief Forward declaration to the @c StringSanitizer implementation. class StringSanitizerImpl; /// @brief Type representing the pointer to the @c StringSanitizerImpl. -typedef boost::shared_ptr StringSanitizerImplPtr; +using StringSanitizerImplPtr = std::shared_ptr; -/// @brief Implements a regular expression based string scrubber -/// -/// The implementation uses C++11 regex IF the environment supports it -/// (tested in configure.ac). If not it falls back to C lib regcomp/regexec. -/// Older compilers, such as pre Gnu g++ 4.9.0, provided only experimental -/// implementations of regex which are recognized as buggy. +/// @brief Implements a regular expression based string scrubber. class StringSanitizer { public: - /// @brief Constructor. /// /// Compiles the given character set into a regular expression, and @@ -324,23 +233,17 @@ public: /// @param char_replacement string of one or more characters to use as the /// replacement for invalid characters. /// - /// @throw BadValue if given an invalid regular expression - StringSanitizer(const std::string& char_set, - const std::string& char_replacement); - - /// @brief Destructor. - /// - /// Destroys the implementation instance. - ~StringSanitizer(); + /// @throw BadValue if given an invalid regular expression. + StringSanitizer(const std::string& char_set, const std::string& char_replacement); - /// Returns a scrubbed copy of a given string + /// @brief Returns a scrubbed copy of a given string. /// /// Replaces all occurrences of characters described by the regular /// expression with the character replacement. /// - /// @param original the string to scrub + /// @param original The string to be scrubbed. /// - /// @throw Unexpected if an error occurs during scrubbing + /// @throw Unexpected if an error occurs during scrubbing. std::string scrub(const std::string& original); /// @brief The maximum size for regex parameters. @@ -356,48 +259,34 @@ private: }; /// @brief Type representing the pointer to the @c StringSanitizer. -typedef boost::shared_ptr StringSanitizerPtr; +using StringSanitizerPtr = std::unique_ptr; -/// @brief Check if a string is printable +/// @brief Check if a string is printable. /// -/// @param content String to check for printable characters +/// @param content String to check for printable characters. /// -/// @return True if empty or contains only printable characters, False otherwise -inline bool -isPrintable(const std::string& content) { - for (auto const& ch : content) { - if (isprint(static_cast(ch)) == 0) { - return (false); - } - } - return (true); -} +/// @return True if empty or contains only printable characters, False otherwise. +bool +isPrintable(const std::string& content); -/// @brief Check if a byte vector is printable +/// @brief Check if a byte vector is printable. /// -/// @param content Vector to check for printable characters +/// @param content Vector to check for printable characters. /// -/// @return True if empty or contains only printable characters, False otherwise -inline bool -isPrintable(const std::vector& content) { - for (auto const& ch : content) { - if (isprint(static_cast(ch)) == 0) { - return (false); - } - } - return (true); -} - +/// @return True if empty or contains only printable characters, False otherwise. +bool +isPrintable(const std::vector& content); -/// @brief Dumps a buffer of bytes as a string of hexadecimal digits +/// @brief Dumps a buffer of bytes as a string of hexadecimal digits. /// -/// @param data pointer to the data to dump -/// @param length number of bytes to dump. Caller should ensure the length +/// @param data Pointer to the data to dump. +/// @param length Number of bytes to dump. Caller should ensure the length /// does not exceed the buffer. -std::string dumpAsHex(const uint8_t* data, size_t length); +std::string +dumpAsHex(const uint8_t* data, size_t length); -} // namespace str -} // namespace util -} // namespace isc +} // namespace str +} // namespace util +} // namespace isc -#endif // STRUTIL_H +#endif // KEA_UTIL_STR_H diff --git a/src/lib/util/strutil.cc b/src/lib/util/strutil.cc deleted file mode 100644 index 7c3b2e65e2..0000000000 --- a/src/lib/util/strutil.cc +++ /dev/null @@ -1,467 +0,0 @@ -// Copyright (C) 2011-2024 Internet Systems Consortium, Inc. ("ISC") -// -// This Source Code Form is subject to the terms of the Mozilla Public -// License, v. 2.0. If a copy of the MPL was not distributed with this -// file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#include - -#include -#include - -#include -#include -#include - -#include -#include -#include - -// Early versions of C++11 regex were buggy, use it if we -// can otherwise, we fall back to regcomp/regexec. For more info see: -// https://stackoverflow.com/questions/12530406/is-gcc-4-8-or-earlier-buggy-about-regular-expressions -#ifdef USE_REGEX -#include -#else -#include -#include -#endif - -#include - -using namespace std; - -namespace isc { -namespace util { -namespace str { - -// Normalize slashes - -void -normalizeSlash(std::string& name) { - if (!name.empty()) { - size_t pos = 0; - while ((pos = name.find('\\', pos)) != std::string::npos) { - name[pos] = '/'; - } - } -} - -// Trim String - -string -trim(const string& instring) { - string retstring = ""; - if (!instring.empty()) { - static const char* blanks = " \t\n"; - - // Search for first non-blank character in the string - size_t first = instring.find_first_not_of(blanks); - if (first != string::npos) { - - // String not all blanks, so look for last character - size_t last = instring.find_last_not_of(blanks); - - // Extract the trimmed substring - retstring = instring.substr(first, (last - first + 1)); - } - } - - return (retstring); -} - -// Tokenize string. As noted in the header, this is locally written to avoid -// another dependency on a Boost library. - -vector -tokens(const std::string& text, const std::string& delim, bool escape) { - vector result; - string token; - bool in_token = false; - bool escaped = false; - for (auto const& c : text) { - if (delim.find(c) != string::npos) { - // Current character is a delimiter - if (!in_token) { - // Two or more delimiters, eat them - } else if (escaped) { - // Escaped delimiter in a token: reset escaped and keep it - escaped = false; - token.push_back(c); - } else { - // End of the current token: save it if not empty - if (!token.empty()) { - result.push_back(token); - } - // Reset state - in_token = false; - token.clear(); - } - } else if (escape && (c == '\\')) { - // Current character is the escape character - if (!in_token) { - // The escape character is the first character of a new token - in_token = true; - } - if (escaped) { - // Escaped escape: reset escaped and keep one character - escaped = false; - token.push_back(c); - } else { - // Remember to keep the next character - escaped = true; - } - } else { - // Not a delimiter nor an escape - if (!in_token) { - // First character of a new token - in_token = true; - } - if (escaped) { - // Escaped common character: as escape was false - escaped = false; - token.push_back('\\'); - token.push_back(c); - } else { - // The common case: keep it - token.push_back(c); - } - } - } - // End of input: close and save the current token if not empty - if (escaped) { - // Pending escape - token.push_back('\\'); - } - if (!token.empty()) { - result.push_back(token); - } - - return (result); -} - -// Local function to pass to accumulate() for summing up string lengths. - -namespace { - -size_t -lengthSum(string::size_type curlen, const string& cur_string) { - return (curlen + cur_string.size()); -} - -} - -// Provide printf-style formatting. - -std::string -format(const std::string& format, const std::vector& args) { - - static const string flag = "%s"; - - // Initialize return string. To speed things up, we'll reserve an - // appropriate amount of space - current string size, plus length of all - // the argument strings, less two characters for each argument (the %s in - // the format string is being replaced). - string result; - size_t length = accumulate(args.begin(), args.end(), format.size(), - lengthSum) - (args.size() * flag.size()); - result.reserve(length); - - // Iterate through replacing all tokens - result = format; - size_t tokenpos = 0; // Position of last token replaced - std::vector::size_type i = 0; // Index into argument array - - while ((i < args.size()) && (tokenpos != string::npos)) { - tokenpos = result.find(flag, tokenpos); - if (tokenpos != string::npos) { - result.replace(tokenpos, flag.size(), args[i++]); - } - } - - return (result); -} - -std::string -getToken(std::istringstream& iss) { - string token; - iss >> token; - if (iss.bad() || iss.fail()) { - isc_throw(StringTokenError, "could not read token from string"); - } - return (token); -} - -std::vector -quotedStringToBinary(const std::string& quoted_string) { - std::vector binary; - // Remove whitespace before and after the quotes. - std::string trimmed_string = trim(quoted_string); - - // We require two quote characters, so the length of the string must be - // equal to 2 at minimum, and it must start and end with quotes. - if ((trimmed_string.length() > 1) && ((trimmed_string[0] == '\'') && - (trimmed_string[trimmed_string.length()-1] == '\''))) { - // Remove quotes and trim the text inside the quotes. - trimmed_string = trim(trimmed_string.substr(1, trimmed_string.length() - 2)); - // Copy string contents into the vector. - binary.assign(trimmed_string.begin(), trimmed_string.end()); - } - // Return resulting vector or empty vector. - return (binary); -} - -void -decodeColonSeparatedHexString(const std::string& hex_string, - std::vector& binary) { - decodeSeparatedHexString(hex_string, ":", binary); -} - -void -decodeSeparatedHexString(const std::string& hex_string, const std::string& sep, - std::vector& binary) { - std::vector split_text; - boost::split(split_text, hex_string, boost::is_any_of(sep), - boost::algorithm::token_compress_off); - - std::vector binary_vec; - for (size_t i = 0; i < split_text.size(); ++i) { - - // If there are multiple tokens and the current one is empty, it - // means that two consecutive colons were specified. This is not - // allowed. - if ((split_text.size() > 1) && split_text[i].empty()) { - isc_throw(isc::BadValue, "two consecutive separators ('" << sep << "') specified in" - " a decoded string '" << hex_string << "'"); - - // Between a colon we expect at most two characters. - } else if (split_text[i].size() > 2) { - isc_throw(isc::BadValue, "invalid format of the decoded string" - << " '" << hex_string << "'"); - - } else if (!split_text[i].empty()) { - std::stringstream s; - s << "0x"; - - for (unsigned int j = 0; j < split_text[i].length(); ++j) { - // Check if we're dealing with hexadecimal digit. - if (!isxdigit(split_text[i][j])) { - isc_throw(isc::BadValue, "'" << split_text[i][j] - << "' is not a valid hexadecimal digit in" - << " decoded string '" << hex_string << "'"); - } - s << split_text[i][j]; - } - - // The stream should now have one or two hexadecimal digits. - // Let's convert it to a number and store in a temporary - // vector. - unsigned int binary_value; - s >> std::hex >> binary_value; - - binary_vec.push_back(static_cast(binary_value)); - } - - } - - // All ok, replace the data in the output vector with a result. - binary.swap(binary_vec); -} - - -void -decodeFormattedHexString(const std::string& hex_string, - std::vector& binary) { - // If there is at least one colon we assume that the string - // comprises octets separated by colons (e.g. MAC address notation). - if (hex_string.find(':') != std::string::npos) { - decodeSeparatedHexString(hex_string, ":", binary); - } else if (hex_string.find(' ') != std::string::npos) { - decodeSeparatedHexString(hex_string, " ", binary); - } else { - std::ostringstream s; - - // If we have odd number of digits we'll have to prepend '0'. - if (hex_string.length() % 2 != 0) { - s << "0"; - } - - // It is ok to use '0x' prefix in a string. - if ((hex_string.length() > 2) && (hex_string.substr(0, 2) == "0x")) { - // Exclude '0x' from the decoded string. - s << hex_string.substr(2); - - } else { - // No '0x', so decode the whole string. - s << hex_string; - } - - try { - // Decode the hex string. - encode::decodeHex(s.str(), binary); - - } catch (...) { - isc_throw(isc::BadValue, "'" << hex_string << "' is not a valid" - " string of hexadecimal digits"); - } - } -} - -class StringSanitizerImpl { -public: - /// @brief Constructor. - StringSanitizerImpl(const std::string& char_set, const std::string& char_replacement) - : char_set_(char_set), char_replacement_(char_replacement) { - if (char_set.size() > StringSanitizer::MAX_DATA_SIZE) { - isc_throw(isc::BadValue, "char set size: '" << char_set.size() - << "' exceeds max size: '" - << StringSanitizer::MAX_DATA_SIZE << "'"); - } - - if (char_replacement.size() > StringSanitizer::MAX_DATA_SIZE) { - isc_throw(isc::BadValue, "char replacement size: '" - << char_replacement.size() << "' exceeds max size: '" - << StringSanitizer::MAX_DATA_SIZE << "'"); - } -#ifdef USE_REGEX - try { - scrub_exp_ = std::regex(char_set, std::regex::extended); - } catch (const std::exception& ex) { - isc_throw(isc::BadValue, "invalid regex: '" - << char_set_ << "', " << ex.what()); - } -#else - int ec = regcomp(&scrub_exp_, char_set_.c_str(), REG_EXTENDED); - if (ec) { - char errbuf[512] = ""; - static_cast(regerror(ec, &scrub_exp_, errbuf, sizeof(errbuf))); - regfree(&scrub_exp_); - isc_throw(isc::BadValue, "invalid regex: '" << char_set_ << "', " << errbuf); - } -#endif - } - - /// @brief Destructor. - ~StringSanitizerImpl() { -#ifndef USE_REGEX - regfree(&scrub_exp_); -#endif - } - - std::string scrub(const std::string& original) { -#ifdef USE_REGEX - std::stringstream result; - try { - std::regex_replace(std::ostream_iterator(result), - original.begin(), original.end(), - scrub_exp_, char_replacement_); - } catch (const std::exception& ex) { - isc_throw(isc::BadValue, "replacing '" << char_set_ << "' with '" - << char_replacement_ << "' in '" << original << "' failed: ," - << ex.what()); - } - - return (result.str()); -#else - // In order to handle embedded nuls, we have to process in nul-terminated - // chunks. We iterate over the original data, doing pattern replacement - // on each chunk. - const char* orig_data = original.data(); - const char* dead_end = orig_data + original.size(); - const char* start_from = orig_data; - stringstream result; - - while (start_from < dead_end) { - // Iterate over original string, match by match. - regmatch_t matches[2]; // n matches + 1 - const char* end_at = start_from + strlen(start_from); - - while (start_from < end_at) { - // Look for the next match - if (regexec(&scrub_exp_, start_from, 1, matches, 0) == REG_NOMATCH) { - // No matches, so add in the remainder - result << start_from; - start_from = end_at + 1; - break; - } - - // Shouldn't happen, but one never knows eh? - if (matches[0].rm_so == -1) { - isc_throw(isc::Unexpected, "matched but so is -1?"); - } - - // Add everything from starting point up to the current match - const char* match_at = start_from + matches[0].rm_so; - while (start_from < match_at) { - result << *start_from; - ++start_from; - } - - // Add in the replacement - result << char_replacement_; - - // Move past the match. - ++start_from; - } - - // if we have an embedded nul, replace it and continue - if (start_from < dead_end) { - // Add in the replacement - result << char_replacement_; - start_from = end_at + 1; - } - } - - return (result.str()); -#endif - } - -private: - /// @brief The char set data for regex. - std::string char_set_; - - /// @brief The char replacement data for regex. - std::string char_replacement_; - -#ifdef USE_REGEX - regex scrub_exp_; -#else - regex_t scrub_exp_; -#endif -}; - -// @note The regex engine is implemented using recursion and can cause -// stack overflow if the input data is too large. An arbitrary size of -// 4096 should be enough for all cases. -const uint32_t StringSanitizer::MAX_DATA_SIZE = 4096; - -StringSanitizer::StringSanitizer(const std::string& char_set, - const std::string& char_replacement) - : impl_(new StringSanitizerImpl(char_set, char_replacement)) { -} - -StringSanitizer::~StringSanitizer() { -} - -std::string -StringSanitizer::scrub(const std::string& original) { - return (impl_->scrub(original)); -} - -std::string dumpAsHex(const uint8_t* data, size_t length) { - std::stringstream output; - for (unsigned int i = 0; i < length; i++) { - if (i) { - output << ":"; - } - - output << std::setfill('0') << std::setw(2) << std::hex - << static_cast(data[i]); - } - - return (output.str()); -} - -} // namespace str -} // namespace util -} // namespace isc diff --git a/src/lib/util/tests/str_unittests.cc b/src/lib/util/tests/str_unittests.cc new file mode 100644 index 0000000000..accaf218bd --- /dev/null +++ b/src/lib/util/tests/str_unittests.cc @@ -0,0 +1,514 @@ +// Copyright (C) 2011-2024 Internet Systems Consortium, Inc. ("ISC") +// +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include + +#include +#include +#include + +#include +#include +#include +#include +#include + +#include + +using namespace isc; +using namespace isc::util; +using namespace isc::util::encode; +using namespace isc::util::str; +using namespace std; + +namespace { + +/// @brief Fixture used to test StringSanitizer. +struct StringUtilTest : ::testing::Test { + /// @brief Pass string through scrub and check the result. + /// + /// @param original The string to sanitize. + /// @param char_set The regular expression string describing invalid characters. + /// @param char_replacement - character(s) which replace invalid + /// characters + /// @param expected - expected sanitized string + void checkScrub(const string& original, + const string& char_set, + const string& char_replacement, + const string& expected) { + StringSanitizerPtr ss; + string sanitized; + + try { + ss.reset(new StringSanitizer(char_set, char_replacement)); + } catch (const exception& ex) { + ADD_FAILURE() << "Could not construct sanitizer:" << ex.what(); + return; + } + + try { + sanitized = ss->scrub(original); + } catch (const exception& ex) { + ADD_FAILURE() << "Could not scrub string:" << ex.what(); + return; + } + + EXPECT_EQ(sanitized, expected); + } + + /// @brief Check that hex strings with colons can be decoded. + /// + /// @param input Input string to be decoded. + /// @param reference The expected result. + void checkColonSeparated(const string& input, const string& reference) { + // Create a reference vector. + vector reference_vector; + ASSERT_NO_THROW_LOG(decodeHex(reference, reference_vector)); + + // Fill the output vector with some garbage to make sure that + // the data is erased when a string is decoded successfully. + vector decoded(1, 10); + ASSERT_NO_THROW_LOG(decodeColonSeparatedHexString(input, decoded)); + + // Get the string representation of the decoded data for logging + // purposes. + string encoded; + ASSERT_NO_THROW_LOG(encoded = encodeHex(decoded)); + + // Check if the decoded data matches the reference. + EXPECT_EQ(decoded, reference_vector) << "decoded data don't match the reference, input='" + << input << "', reference='" << reference + << "'" + ", decoded='" + << encoded << "'"; + } + + /// @brief Check that formatted hex strings can be decoded. + /// + /// @param input Input string to be decoded. + /// @param reference The expected result. + void checkFormatted(const string& input, const string& reference) { + // Create a reference vector. + vector reference_vector; + ASSERT_NO_THROW_LOG(decodeHex(reference, reference_vector)); + + // Fill the output vector with some garbage to make sure that + // the data is erased when a string is decoded successfully. + vector decoded(1, 10); + ASSERT_NO_THROW_LOG(decodeFormattedHexString(input, decoded)); + + // Get the string representation of the decoded data for logging + // purposes. + string encoded; + ASSERT_NO_THROW_LOG(encoded = encodeHex(decoded)); + + // Check if the decoded data matches the reference. + EXPECT_EQ(decoded, reference_vector) + << "decoded data don't match the reference, input='" << input << "', reference='" + << reference << "', decoded='" << encoded << "'"; + } + + /// @brief Convenience function which calls quotedStringToBinary + /// and converts returned vector back to string. + /// + /// @param s Input string. + /// + /// @return String holding a copy of a vector returned by the + /// quotedStringToBinary. + string checkQuoted(const string& s) { + vector vec = quotedStringToBinary(s); + string s2(vec.begin(), vec.end()); + return (s2); + } +}; + +// Check that leading and trailing space trimming works. +TEST_F(StringUtilTest, Trim) { + // Empty and full string. + EXPECT_EQ("", trim("")); + EXPECT_EQ("abcxyz", trim("abcxyz")); + + // Trim right-most blanks + EXPECT_EQ("ABC", trim("ABC ")); + EXPECT_EQ("ABC", trim("ABC\t\t \n\t")); + + // Left-most blank trimming + EXPECT_EQ("XYZ", trim(" XYZ")); + EXPECT_EQ("XYZ", trim("\t\t \tXYZ")); + + // Right and left, with embedded spaces + EXPECT_EQ("MN \t OP", trim("\t\tMN \t OP \t")); +} + +// Check tokenization. +TEST_F(StringUtilTest, Tokens) { + vector result; + + // Default delimiters + + // Degenerate cases + result = tokens(""); // Empty string + EXPECT_EQ(0, result.size()); + + result = tokens(" \n "); // String is all delimiters + EXPECT_EQ(0, result.size()); + + result = tokens("abc"); // String has no delimiters + ASSERT_EQ(1, result.size()); + EXPECT_EQ(string("abc"), result[0]); + + // String containing leading and/or trailing delimiters, no embedded ones. + result = tokens("\txyz"); // One leading delimiter + ASSERT_EQ(1, result.size()); + EXPECT_EQ(string("xyz"), result[0]); + + result = tokens("\t \nxyz"); // Multiple leading delimiters + ASSERT_EQ(1, result.size()); + EXPECT_EQ(string("xyz"), result[0]); + + result = tokens("xyz\n"); // One trailing delimiter + ASSERT_EQ(1, result.size()); + EXPECT_EQ(string("xyz"), result[0]); + + result = tokens("xyz \t"); // Multiple trailing + ASSERT_EQ(1, result.size()); + EXPECT_EQ(string("xyz"), result[0]); + + result = tokens("\t xyz \n"); // Leading and trailing + ASSERT_EQ(1, result.size()); + EXPECT_EQ(string("xyz"), result[0]); + + // Embedded delimiters + result = tokens("abc\ndef"); // 2 tokens, one separator + ASSERT_EQ(2, result.size()); + EXPECT_EQ(string("abc"), result[0]); + EXPECT_EQ(string("def"), result[1]); + + result = tokens("abc\t\t\ndef"); // 2 tokens, 3 separators + ASSERT_EQ(2, result.size()); + EXPECT_EQ(string("abc"), result[0]); + EXPECT_EQ(string("def"), result[1]); + + result = tokens("abc\n \tdef\t\tghi"); + ASSERT_EQ(3, result.size()); // Multiple tokens, many delims + EXPECT_EQ(string("abc"), result[0]); + EXPECT_EQ(string("def"), result[1]); + EXPECT_EQ(string("ghi"), result[2]); + + // Embedded and non-embedded delimiters + + result = tokens("\t\t \nabc\n \tdef\t\tghi \n\n"); + ASSERT_EQ(3, result.size()); // Multiple tokens, many delims + EXPECT_EQ(string("abc"), result[0]); + EXPECT_EQ(string("def"), result[1]); + EXPECT_EQ(string("ghi"), result[2]); + + // Non-default delimiter + result = tokens("alpha/beta/ /gamma//delta/epsilon/", "/"); + ASSERT_EQ(6, result.size()); + EXPECT_EQ(string("alpha"), result[0]); + EXPECT_EQ(string("beta"), result[1]); + EXPECT_EQ(string(" "), result[2]); + EXPECT_EQ(string("gamma"), result[3]); + EXPECT_EQ(string("delta"), result[4]); + EXPECT_EQ(string("epsilon"), result[5]); + + // Non-default delimiters (plural) + result = tokens("+*--alpha*beta+ -gamma**delta+epsilon-+**", "*+-"); + ASSERT_EQ(6, result.size()); + EXPECT_EQ(string("alpha"), result[0]); + EXPECT_EQ(string("beta"), result[1]); + EXPECT_EQ(string(" "), result[2]); + EXPECT_EQ(string("gamma"), result[3]); + EXPECT_EQ(string("delta"), result[4]); + EXPECT_EQ(string("epsilon"), result[5]); + + // Escaped delimiter + result = tokens("foo\\,bar", ",", true); + EXPECT_EQ(1, result.size()); + EXPECT_EQ(string("foo,bar"), result[0]); + + // Escaped escape + result = tokens("foo\\\\,bar", ",", true); + ASSERT_EQ(2, result.size()); + EXPECT_EQ(string("foo\\"), result[0]); + EXPECT_EQ(string("bar"), result[1]); + + // Double escapes + result = tokens("foo\\\\\\\\,\\bar", ",", true); + ASSERT_EQ(2, result.size()); + EXPECT_EQ(string("foo\\\\"), result[0]); + EXPECT_EQ(string("\\bar"), result[1]); + + // Escaped standard character + result = tokens("fo\\o,bar", ",", true); + ASSERT_EQ(2, result.size()); + EXPECT_EQ(string("fo\\o"), result[0]); + EXPECT_EQ(string("bar"), result[1]); + + // Escape at the end + result = tokens("foo,bar\\", ",", true); + ASSERT_EQ(2, result.size()); + EXPECT_EQ(string("foo"), result[0]); + EXPECT_EQ(string("bar\\"), result[1]); + + // Escape opening a token + result = tokens("foo,\\,,bar", ",", true); + ASSERT_EQ(3, result.size()); + EXPECT_EQ(string("foo"), result[0]); + EXPECT_EQ(string(","), result[1]); + EXPECT_EQ(string("bar"), result[2]); +} + +// Check changing of case. +TEST_F(StringUtilTest, ChangeCase) { + string mixed("abcDEFghiJKLmno123[]{=+--+]}"); + string upper("ABCDEFGHIJKLMNO123[]{=+--+]}"); + string lower("abcdefghijklmno123[]{=+--+]}"); + + string test = mixed; + lowercase(test); + EXPECT_EQ(lower, test); + + test = mixed; + uppercase(test); + EXPECT_EQ(upper, test); +} + +TEST_F(StringUtilTest, quotedStringToBinary) { + // No opening or closing quote should result in empty string. + EXPECT_TRUE(quotedStringToBinary("'").empty()); + EXPECT_TRUE(quotedStringToBinary("").empty()); + EXPECT_TRUE(quotedStringToBinary(" ").empty()); + EXPECT_TRUE(quotedStringToBinary("'circuit id").empty()); + EXPECT_TRUE(quotedStringToBinary("circuit id'").empty()); + + // If there is only opening and closing quote, an empty + // vector should be returned. + EXPECT_TRUE(quotedStringToBinary("''").empty()); + + // Both opening and ending quote is present. + EXPECT_EQ("circuit id", checkQuoted("'circuit id'")); + EXPECT_EQ("remote id", checkQuoted(" ' remote id'")); + EXPECT_EQ("duid", checkQuoted(" ' duid'")); + EXPECT_EQ("duid", checkQuoted("'duid ' ")); + EXPECT_EQ("remote'id", checkQuoted(" ' remote'id '")); + EXPECT_EQ("remote id'", checkQuoted("'remote id''")); + EXPECT_EQ("'remote id", checkQuoted("''remote id'")); + + // Multiple quotes. + EXPECT_EQ("'", checkQuoted("'''")); + EXPECT_EQ("''", checkQuoted("''''")); +} + +TEST_F(StringUtilTest, decodeColonSeparatedHexString) { + // Test valid strings. + checkColonSeparated("A1:02:C3:d4:e5:F6", "A102C3D4E5F6"); + checkColonSeparated("A:02:3:d:E5:F6", "0A02030DE5F6"); + checkColonSeparated("A:B:C:D", "0A0B0C0D"); + checkColonSeparated("1", "01"); + checkColonSeparated("1e", "1E"); + checkColonSeparated("", ""); + + // Test invalid strings. + vector decoded; + // Whitespaces. + EXPECT_THROW_MSG(decodeColonSeparatedHexString(" ", decoded), BadValue, + "invalid format of the decoded string ' '"); + // Whitespace before digits. + EXPECT_THROW_MSG(decodeColonSeparatedHexString(" A1", decoded), BadValue, + "invalid format of the decoded string ' A1'"); + // Two consecutive colons. + EXPECT_THROW_MSG(decodeColonSeparatedHexString("A::01", decoded), BadValue, + "two consecutive separators (':') specified in a decoded string 'A::01'"); + // Three consecutive colons. + EXPECT_THROW_MSG(decodeColonSeparatedHexString("A:::01", decoded), BadValue, + "two consecutive separators (':') specified in a decoded string 'A:::01'"); + // Whitespace within a string. + EXPECT_THROW_MSG(decodeColonSeparatedHexString("A :01", decoded), BadValue, + "' ' is not a valid hexadecimal digit in decoded string 'A :01'"); + // Terminating colon. + EXPECT_THROW_MSG(decodeColonSeparatedHexString("0A:01:", decoded), BadValue, + "two consecutive separators (':') specified in a decoded string '0A:01:'"); + // Opening colon. + EXPECT_THROW_MSG(decodeColonSeparatedHexString(":0A:01", decoded), BadValue, + "two consecutive separators (':') specified in a decoded string ':0A:01'"); + // Three digits before the colon. + EXPECT_THROW_MSG(decodeColonSeparatedHexString("0A1:B1", decoded), BadValue, + "invalid format of the decoded string '0A1:B1'"); +} + +TEST_F(StringUtilTest, decodeFormattedHexString) { + // Colon separated. + checkFormatted("1:A7:B5:4:23", "01A7B50423"); + // Space separated. + checkFormatted("1 A7 B5 4 23", "01A7B50423"); + // No colons, even number of digits. + checkFormatted("17a534", "17A534"); + // Odd number of digits. + checkFormatted("A3A6f78", "0A3A6F78"); + // '0x' prefix. + checkFormatted("0xA3A6f78", "0A3A6F78"); + // '0x' prefix with a special value of 0. + checkFormatted("0x0", "00"); + // Empty string. + checkFormatted("", ""); + + vector decoded; + // Dangling colon. + EXPECT_THROW_MSG(decodeFormattedHexString("0a:", decoded), BadValue, + "two consecutive separators (':') specified in a decoded string '0a:'"); + // Dangling space. + EXPECT_THROW_MSG(decodeFormattedHexString("0a ", decoded), BadValue, + "two consecutive separators (' ') specified in a decoded string '0a '"); + // '0x' prefix and spaces. + EXPECT_THROW_MSG(decodeFormattedHexString("0x01 02", decoded), BadValue, + "invalid format of the decoded string '0x01 02'"); + // '0x' prefix and colons. + EXPECT_THROW_MSG(decodeFormattedHexString("0x01:02", decoded), BadValue, + "invalid format of the decoded string '0x01:02'"); + // colon and spaces mixed + EXPECT_THROW_MSG(decodeFormattedHexString("01:02 03", decoded), BadValue, + "invalid format of the decoded string '01:02 03'"); + // Missing colon. + EXPECT_THROW_MSG(decodeFormattedHexString("01:0203", decoded), BadValue, + "invalid format of the decoded string '01:0203'"); + // Missing space. + EXPECT_THROW_MSG(decodeFormattedHexString("01 0203", decoded), BadValue, + "invalid format of the decoded string '01 0203'"); + // Invalid prefix. + EXPECT_THROW_MSG(decodeFormattedHexString("x0102", decoded), BadValue, + "'x0102' is not a valid string of hexadecimal digits"); + // Invalid prefix again. + EXPECT_THROW_MSG(decodeFormattedHexString("1x0102", decoded), BadValue, + "'1x0102' is not a valid string of hexadecimal digits"); +} + +// Verifies StringSantizer class +TEST_F(StringUtilTest, stringSanitizer) { + // Bad regular expression should throw. + StringSanitizerPtr ss; + ASSERT_THROW_MSG(ss.reset(new StringSanitizer("[bogus-regex", "")), BadValue, + "invalid regex: '[bogus-regex', Invalid range in bracket expression."); + + string good_data(StringSanitizer::MAX_DATA_SIZE, '0'); + string bad_data(StringSanitizer::MAX_DATA_SIZE + 1, '0'); + + ASSERT_NO_THROW_LOG(ss.reset(new StringSanitizer(good_data, good_data))); + + ASSERT_THROW_MSG(ss.reset(new StringSanitizer(bad_data, "")), BadValue, + "char set size: '4097' exceeds max size: '4096'"); + ASSERT_THROW_MSG(ss.reset(new StringSanitizer("", bad_data)), BadValue, + "char replacement size: '4097' exceeds max size: '4096'"); + + // List of invalid chars should work: (b,c,2 are invalid) + checkScrub("abc.123", "[b-c2]", "*", "a**.1*3"); + // Inverted list of valid chars should work: (b,c,2 are valid) + checkScrub("abc.123", "[^b-c2]", "*", "*bc**2*"); + + // A string of all valid chars should return an identical string. + checkScrub("-_A--B__Cabc34567_-", "[^A-Ca-c3-7_-]", "x", "-_A--B__Cabc34567_-"); + + // Replacing with a character should work. + checkScrub("A[b]c\12JoE3-_x!B$Y#e", "[^A-Za-z0-9_]", "*", "A*b*c*JoE3*_x*B*Y*e"); + + // Removing (i.e.replacing with an "empty" string) should work. + checkScrub("A[b]c\12JoE3-_x!B$Y#e", "[^A-Za-z0-9_]", "", "AbcJoE3_xBYe"); + + // More than one non-matching in a row should work. + checkScrub("%%A%%B%%C%%", "[^A-Za-z0-9_]", "x", "xxAxxBxxCxx"); + + // Removing more than one non-matching in a row should work. + checkScrub("%%A%%B%%C%%", "[^A-Za-z0-9_]", "", "ABC"); + + // Replacing with a string should work. + checkScrub("%%A%%B%%C%%", "[^A-Za-z0-9_]", "xyz", "xyzxyzAxyzxyzBxyzxyzCxyzxyz"); + + // Dots as valid chars work. + checkScrub("abc.123", "[^A-Za-z0-9_.]", "*", "abc.123"); + + string withNulls("\000ab\000c.12\0003", 10); + checkScrub(withNulls, "[^A-Za-z0-9_.]", "*", "*ab*c.12*3"); +} + +// Verifies templated buffer iterator seekTrimmed() function +TEST_F(StringUtilTest, seekTrimmed) { + // Empty buffer should be fine. + vector buffer; + auto begin = buffer.end(); + auto end = buffer.end(); + ASSERT_NO_THROW_LOG(end = seekTrimmed(begin, end, 0)); + EXPECT_EQ(0, distance(begin, end)); + + // Buffer of only trim values, should be fine. + buffer = {1, 1}; + begin = buffer.begin(); + end = buffer.end(); + ASSERT_NO_THROW_LOG(end = seekTrimmed(begin, end, 1)); + EXPECT_EQ(0, distance(begin, end)); + + // One trailing null should trim off. + buffer = {'o', 'n', 'e', 0}; + begin = buffer.begin(); + end = buffer.end(); + ASSERT_NO_THROW_LOG(end = seekTrimmed(begin, end, 0)); + EXPECT_EQ(3, distance(begin, end)); + + // More than one trailing null should trim off. + buffer = {'t', 'h', 'r', 'e', 'e', 0, 0, 0}; + begin = buffer.begin(); + end = buffer.end(); + ASSERT_NO_THROW_LOG(end = seekTrimmed(begin, end, 0)); + EXPECT_EQ(5, distance(begin, end)); + + // Embedded null should be left in place. + buffer = {'e', 'm', 0, 'b', 'e', 'd'}; + begin = buffer.begin(); + end = buffer.end(); + ASSERT_NO_THROW_LOG(end = seekTrimmed(begin, end, 0)); + EXPECT_EQ(6, distance(begin, end)); + + // Leading null should be left in place. + buffer = {0, 'l', 'e', 'a', 'd', 'i', 'n', 'g'}; + begin = buffer.begin(); + end = buffer.end(); + ASSERT_NO_THROW_LOG(end = seekTrimmed(begin, end, 0)); + EXPECT_EQ(8, distance(begin, end)); +} + +// Verifies isPrintable predicate on strings. +TEST_F(StringUtilTest, stringIsPrintable) { + string content; + + // Empty is printable. + EXPECT_TRUE(isPrintable(content)); + + // Check Abcd. + content = "Abcd"; + EXPECT_TRUE(isPrintable(content)); + + // Add a control character (not printable). + content += "\a"; + EXPECT_FALSE(isPrintable(content)); +} + +// Verifies isPrintable predicate on byte vectors. +TEST_F(StringUtilTest, vectorIsPrintable) { + vector content; + + // Empty is printable. + EXPECT_TRUE(isPrintable(content)); + + // Check Abcd. + content = {0x41, 0x62, 0x63, 0x64}; + EXPECT_TRUE(isPrintable(content)); + + // Add a control character (not printable). + content.push_back('\a'); + EXPECT_FALSE(isPrintable(content)); +} + +} // namespace diff --git a/src/lib/util/tests/strutil_unittest.cc b/src/lib/util/tests/strutil_unittest.cc deleted file mode 100644 index 5372ba0c85..0000000000 --- a/src/lib/util/tests/strutil_unittest.cc +++ /dev/null @@ -1,642 +0,0 @@ -// Copyright (C) 2011-2024 Internet Systems Consortium, Inc. ("ISC") -// -// This Source Code Form is subject to the terms of the Mozilla Public -// License, v. 2.0. If a copy of the MPL was not distributed with this -// file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#include - -#include -#include -#include - -#include - -#include -#include - -using namespace isc; -using namespace isc::util; -using namespace isc::util::str; -using namespace std; - -namespace { - -// Check for slash replacement - -TEST(StringUtilTest, Slash) { - - string instring = ""; - isc::util::str::normalizeSlash(instring); - EXPECT_EQ("", instring); - - instring = "C:\\A\\B\\C.D"; - isc::util::str::normalizeSlash(instring); - EXPECT_EQ("C:/A/B/C.D", instring); - - instring = "// \\ //"; - isc::util::str::normalizeSlash(instring); - EXPECT_EQ("// / //", instring); -} - -// Check that leading and trailing space trimming works - -TEST(StringUtilTest, Trim) { - - // Empty and full string. - EXPECT_EQ("", isc::util::str::trim("")); - EXPECT_EQ("abcxyz", isc::util::str::trim("abcxyz")); - - // Trim right-most blanks - EXPECT_EQ("ABC", isc::util::str::trim("ABC ")); - EXPECT_EQ("ABC", isc::util::str::trim("ABC\t\t \n\t")); - - // Left-most blank trimming - EXPECT_EQ("XYZ", isc::util::str::trim(" XYZ")); - EXPECT_EQ("XYZ", isc::util::str::trim("\t\t \tXYZ")); - - // Right and left, with embedded spaces - EXPECT_EQ("MN \t OP", isc::util::str::trim("\t\tMN \t OP \t")); -} - -// Check tokenization. Note that ASSERT_EQ is used to check the size of the -// returned vector; if not as expected, the following references may be invalid -// so should not be used. - -TEST(StringUtilTest, Tokens) { - vector result; - - // Default delimiters - - // Degenerate cases - result = isc::util::str::tokens(""); // Empty string - EXPECT_EQ(0, result.size()); - - result = isc::util::str::tokens(" \n "); // String is all delimiters - EXPECT_EQ(0, result.size()); - - result = isc::util::str::tokens("abc"); // String has no delimiters - ASSERT_EQ(1, result.size()); - EXPECT_EQ(string("abc"), result[0]); - - // String containing leading and/or trailing delimiters, no embedded ones. - result = isc::util::str::tokens("\txyz"); // One leading delimiter - ASSERT_EQ(1, result.size()); - EXPECT_EQ(string("xyz"), result[0]); - - result = isc::util::str::tokens("\t \nxyz"); // Multiple leading delimiters - ASSERT_EQ(1, result.size()); - EXPECT_EQ(string("xyz"), result[0]); - - result = isc::util::str::tokens("xyz\n"); // One trailing delimiter - ASSERT_EQ(1, result.size()); - EXPECT_EQ(string("xyz"), result[0]); - - result = isc::util::str::tokens("xyz \t"); // Multiple trailing - ASSERT_EQ(1, result.size()); - EXPECT_EQ(string("xyz"), result[0]); - - result = isc::util::str::tokens("\t xyz \n"); // Leading and trailing - ASSERT_EQ(1, result.size()); - EXPECT_EQ(string("xyz"), result[0]); - - // Embedded delimiters - result = isc::util::str::tokens("abc\ndef"); // 2 tokens, one separator - ASSERT_EQ(2, result.size()); - EXPECT_EQ(string("abc"), result[0]); - EXPECT_EQ(string("def"), result[1]); - - result = isc::util::str::tokens("abc\t\t\ndef"); // 2 tokens, 3 separators - ASSERT_EQ(2, result.size()); - EXPECT_EQ(string("abc"), result[0]); - EXPECT_EQ(string("def"), result[1]); - - result = isc::util::str::tokens("abc\n \tdef\t\tghi"); - ASSERT_EQ(3, result.size()); // Multiple tokens, many delims - EXPECT_EQ(string("abc"), result[0]); - EXPECT_EQ(string("def"), result[1]); - EXPECT_EQ(string("ghi"), result[2]); - - // Embedded and non-embedded delimiters - - result = isc::util::str::tokens("\t\t \nabc\n \tdef\t\tghi \n\n"); - ASSERT_EQ(3, result.size()); // Multiple tokens, many delims - EXPECT_EQ(string("abc"), result[0]); - EXPECT_EQ(string("def"), result[1]); - EXPECT_EQ(string("ghi"), result[2]); - - // Non-default delimiter - result = isc::util::str::tokens("alpha/beta/ /gamma//delta/epsilon/", "/"); - ASSERT_EQ(6, result.size()); - EXPECT_EQ(string("alpha"), result[0]); - EXPECT_EQ(string("beta"), result[1]); - EXPECT_EQ(string(" "), result[2]); - EXPECT_EQ(string("gamma"), result[3]); - EXPECT_EQ(string("delta"), result[4]); - EXPECT_EQ(string("epsilon"), result[5]); - - // Non-default delimiters (plural) - result = isc::util::str::tokens("+*--alpha*beta+ -gamma**delta+epsilon-+**", - "*+-"); - ASSERT_EQ(6, result.size()); - EXPECT_EQ(string("alpha"), result[0]); - EXPECT_EQ(string("beta"), result[1]); - EXPECT_EQ(string(" "), result[2]); - EXPECT_EQ(string("gamma"), result[3]); - EXPECT_EQ(string("delta"), result[4]); - EXPECT_EQ(string("epsilon"), result[5]); - - // Escaped delimiter - result = isc::util::str::tokens("foo\\,bar", ",", true); - EXPECT_EQ(1, result.size()); - EXPECT_EQ(string("foo,bar"), result[0]); - - // Escaped escape - result = isc::util::str::tokens("foo\\\\,bar", ",", true); - ASSERT_EQ(2, result.size()); - EXPECT_EQ(string("foo\\"), result[0]); - EXPECT_EQ(string("bar"), result[1]); - - // Double escapes - result = isc::util::str::tokens("foo\\\\\\\\,\\bar", ",", true); - ASSERT_EQ(2, result.size()); - EXPECT_EQ(string("foo\\\\"), result[0]); - EXPECT_EQ(string("\\bar"), result[1]); - - // Escaped standard character - result = isc::util::str::tokens("fo\\o,bar", ",", true); - ASSERT_EQ(2, result.size()); - EXPECT_EQ(string("fo\\o"), result[0]); - EXPECT_EQ(string("bar"), result[1]); - - // Escape at the end - result = isc::util::str::tokens("foo,bar\\", ",", true); - ASSERT_EQ(2, result.size()); - EXPECT_EQ(string("foo"), result[0]); - EXPECT_EQ(string("bar\\"), result[1]); - - // Escape opening a token - result = isc::util::str::tokens("foo,\\,,bar", ",", true); - ASSERT_EQ(3, result.size()); - EXPECT_EQ(string("foo"), result[0]); - EXPECT_EQ(string(","), result[1]); - EXPECT_EQ(string("bar"), result[2]); -} - -// Changing case - -TEST(StringUtilTest, ChangeCase) { - string mixed("abcDEFghiJKLmno123[]{=+--+]}"); - string upper("ABCDEFGHIJKLMNO123[]{=+--+]}"); - string lower("abcdefghijklmno123[]{=+--+]}"); - - string test = mixed; - isc::util::str::lowercase(test); - EXPECT_EQ(lower, test); - - test = mixed; - isc::util::str::uppercase(test); - EXPECT_EQ(upper, test); -} - -// Formatting - -TEST(StringUtilTest, Formatting) { - - vector args; - args.push_back("arg1"); - args.push_back("arg2"); - args.push_back("arg3"); - - string format1 = "This is a string with no tokens"; - EXPECT_EQ(format1, isc::util::str::format(format1, args)); - - string format2 = ""; // Empty string - EXPECT_EQ(format2, isc::util::str::format(format2, args)); - - string format3 = " "; // Empty string - EXPECT_EQ(format3, isc::util::str::format(format3, args)); - - string format4 = "String with %d non-string tokens %lf"; - EXPECT_EQ(format4, isc::util::str::format(format4, args)); - - string format5 = "String with %s correct %s number of tokens %s"; - string result5 = "String with arg1 correct arg2 number of tokens arg3"; - EXPECT_EQ(result5, isc::util::str::format(format5, args)); - - string format6 = "String with %s too %s few tokens"; - string result6 = "String with arg1 too arg2 few tokens"; - EXPECT_EQ(result6, isc::util::str::format(format6, args)); - - string format7 = "String with %s too %s many %s tokens %s !"; - string result7 = "String with arg1 too arg2 many arg3 tokens %s !"; - EXPECT_EQ(result7, isc::util::str::format(format7, args)); - - string format8 = "String with embedded%s%s%stokens"; - string result8 = "String with embeddedarg1arg2arg3tokens"; - EXPECT_EQ(result8, isc::util::str::format(format8, args)); - - // Handle an empty vector - args.clear(); - string format9 = "%s %s"; - EXPECT_EQ(format9, isc::util::str::format(format9, args)); -} - -TEST(StringUtilTest, getToken) { - string s("a b c"); - istringstream ss(s); - EXPECT_EQ("a", isc::util::str::getToken(ss)); - EXPECT_EQ("b", isc::util::str::getToken(ss)); - EXPECT_EQ("c", isc::util::str::getToken(ss)); - EXPECT_THROW(isc::util::str::getToken(ss), isc::util::str::StringTokenError); -} - -int32_t tokenToNumCall_32_16(const string& token) { - return isc::util::str::tokenToNum(token); -} - -int16_t tokenToNumCall_16_8(const string& token) { - return isc::util::str::tokenToNum(token); -} - -TEST(StringUtilTest, tokenToNum) { - uint32_t num32 = tokenToNumCall_32_16("0"); - EXPECT_EQ(0, num32); - num32 = tokenToNumCall_32_16("123"); - EXPECT_EQ(123, num32); - num32 = tokenToNumCall_32_16("65535"); - EXPECT_EQ(65535, num32); - - EXPECT_THROW(tokenToNumCall_32_16(""), - isc::util::str::StringTokenError); - EXPECT_THROW(tokenToNumCall_32_16("a"), - isc::util::str::StringTokenError); - EXPECT_THROW(tokenToNumCall_32_16("-1"), - isc::util::str::StringTokenError); - EXPECT_THROW(tokenToNumCall_32_16("65536"), - isc::util::str::StringTokenError); - EXPECT_THROW(tokenToNumCall_32_16("1234567890"), - isc::util::str::StringTokenError); - EXPECT_THROW(tokenToNumCall_32_16("-1234567890"), - isc::util::str::StringTokenError); - - uint16_t num16 = tokenToNumCall_16_8("123"); - EXPECT_EQ(123, num16); - num16 = tokenToNumCall_16_8("0"); - EXPECT_EQ(0, num16); - num16 = tokenToNumCall_16_8("255"); - EXPECT_EQ(255, num16); - - EXPECT_THROW(tokenToNumCall_16_8(""), - isc::util::str::StringTokenError); - EXPECT_THROW(tokenToNumCall_16_8("a"), - isc::util::str::StringTokenError); - EXPECT_THROW(tokenToNumCall_16_8("-1"), - isc::util::str::StringTokenError); - EXPECT_THROW(tokenToNumCall_16_8("256"), - isc::util::str::StringTokenError); - EXPECT_THROW(tokenToNumCall_16_8("1234567890"), - isc::util::str::StringTokenError); - EXPECT_THROW(tokenToNumCall_16_8("-1234567890"), - isc::util::str::StringTokenError); - -} - -/// @brief Convenience function which calls quotedStringToBinary -/// and converts returned vector back to string. -/// -/// @param s Input string. -/// @return String holding a copy of a vector returned by the -/// quotedStringToBinary. -std::string testQuoted(const std::string& s) { - std::vector vec = str::quotedStringToBinary(s); - std::string s2(vec.begin(), vec.end()); - return (s2); -} - -TEST(StringUtilTest, quotedStringToBinary) { - // No opening or closing quote should result in empty string. - EXPECT_TRUE(str::quotedStringToBinary("'").empty()); - EXPECT_TRUE(str::quotedStringToBinary("").empty()); - EXPECT_TRUE(str::quotedStringToBinary(" ").empty()); - EXPECT_TRUE(str::quotedStringToBinary("'circuit id").empty()); - EXPECT_TRUE(str::quotedStringToBinary("circuit id'").empty()); - - // If there is only opening and closing quote, an empty - // vector should be returned. - EXPECT_TRUE(str::quotedStringToBinary("''").empty()); - - // Both opening and ending quote is present. - EXPECT_EQ("circuit id", testQuoted("'circuit id'")); - EXPECT_EQ("remote id", testQuoted(" ' remote id'")); - EXPECT_EQ("duid", testQuoted(" ' duid'")); - EXPECT_EQ("duid", testQuoted("'duid ' ")); - EXPECT_EQ("remote'id", testQuoted(" ' remote'id '")); - EXPECT_EQ("remote id'", testQuoted("'remote id''")); - EXPECT_EQ("'remote id", testQuoted("''remote id'")); - - // Multiple quotes. - EXPECT_EQ("'", testQuoted("'''")); - EXPECT_EQ("''", testQuoted("''''")); -} - -/// @brief Test that hex string with colons can be decoded. -/// -/// @param input Input string to be decoded. -/// @param reference A string without colons representing the -/// decoded data. -void testColonSeparated(const std::string& input, - const std::string& reference) { - // Create a reference vector. - std::vector reference_vector; - ASSERT_NO_THROW(encode::decodeHex(reference, reference_vector)); - - // Fill the output vector with some garbage to make sure that - // the data is erased when a string is decoded successfully. - std::vector decoded(1, 10); - ASSERT_NO_THROW(decodeColonSeparatedHexString(input, decoded)); - - // Get the string representation of the decoded data for logging - // purposes. - std::string encoded; - ASSERT_NO_THROW(encoded = encode::encodeHex(decoded)); - - // Check if the decoded data matches the reference. - EXPECT_TRUE(decoded == reference_vector) - << "decoded data don't match the reference, input='" - << input << "', reference='" << reference << "'" - ", decoded='" << encoded << "'"; -} - -TEST(StringUtilTest, decodeColonSeparatedHexString) { - // Test valid strings. - testColonSeparated("A1:02:C3:d4:e5:F6", "A102C3D4E5F6"); - testColonSeparated("A:02:3:d:E5:F6", "0A02030DE5F6"); - testColonSeparated("A:B:C:D", "0A0B0C0D"); - testColonSeparated("1", "01"); - testColonSeparated("1e", "1E"); - testColonSeparated("", ""); - - // Test invalid strings. - std::vector decoded; - // Whitespaces. - EXPECT_THROW(decodeColonSeparatedHexString(" ", decoded), - isc::BadValue); - // Whitespace before digits. - EXPECT_THROW(decodeColonSeparatedHexString(" A1", decoded), - isc::BadValue); - // Two consecutive colons. - EXPECT_THROW(decodeColonSeparatedHexString("A::01", decoded), - isc::BadValue); - // Three consecutive colons. - EXPECT_THROW(decodeColonSeparatedHexString("A:::01", decoded), - isc::BadValue); - // Whitespace within a string. - EXPECT_THROW(decodeColonSeparatedHexString("A :01", decoded), - isc::BadValue); - // Terminating colon. - EXPECT_THROW(decodeColonSeparatedHexString("0A:01:", decoded), - isc::BadValue); - // Opening colon. - EXPECT_THROW(decodeColonSeparatedHexString(":0A:01", decoded), - isc::BadValue); - // Three digits before the colon. - EXPECT_THROW(decodeColonSeparatedHexString("0A1:B1", decoded), - isc::BadValue); -} - -void testFormatted(const std::string& input, - const std::string& reference) { - // Create a reference vector. - std::vector reference_vector; - ASSERT_NO_THROW(encode::decodeHex(reference, reference_vector)); - - // Fill the output vector with some garbage to make sure that - // the data is erased when a string is decoded successfully. - std::vector decoded(1, 10); - ASSERT_NO_THROW(decodeFormattedHexString(input, decoded)); - - // Get the string representation of the decoded data for logging - // purposes. - std::string encoded; - ASSERT_NO_THROW(encoded = encode::encodeHex(decoded)); - - // Check if the decoded data matches the reference. - EXPECT_TRUE(decoded == reference_vector) - << "decoded data don't match the reference, input='" - << input << "', reference='" << reference << "'" - ", decoded='" << encoded << "'"; -} - -TEST(StringUtilTest, decodeFormattedHexString) { - // Colon separated. - testFormatted("1:A7:B5:4:23", "01A7B50423"); - // Space separated. - testFormatted("1 A7 B5 4 23", "01A7B50423"); - // No colons, even number of digits. - testFormatted("17a534", "17A534"); - // Odd number of digits. - testFormatted("A3A6f78", "0A3A6F78"); - // '0x' prefix. - testFormatted("0xA3A6f78", "0A3A6F78"); - // '0x' prefix with a special value of 0. - testFormatted("0x0", "00"); - // Empty string. - testFormatted("", ""); - - std::vector decoded; - // Dangling colon. - EXPECT_THROW(decodeFormattedHexString("0a:", decoded), - isc::BadValue); - // Dangling space. - EXPECT_THROW(decodeFormattedHexString("0a ", decoded), - isc::BadValue); - // '0x' prefix and spaces. - EXPECT_THROW(decodeFormattedHexString("0x01 02", decoded), - isc::BadValue); - // '0x' prefix and colons. - EXPECT_THROW(decodeFormattedHexString("0x01:02", decoded), - isc::BadValue); - // colon and spaces mixed - EXPECT_THROW(decodeFormattedHexString("01:02 03", decoded), - isc::BadValue); - // Missing colon. - EXPECT_THROW(decodeFormattedHexString("01:0203", decoded), - isc::BadValue); - // Missing space. - EXPECT_THROW(decodeFormattedHexString("01 0203", decoded), - isc::BadValue); - // Invalid prefix. - EXPECT_THROW(decodeFormattedHexString("x0102", decoded), - isc::BadValue); - // Invalid prefix again. - EXPECT_THROW(decodeFormattedHexString("1x0102", decoded), - isc::BadValue); -} - -/// @brief Function used to test StringSantitizer -/// @param original - string to sanitize -/// @param char_set - regular expression string describing invalid -/// characters -/// @param char_replacement - character(s) which replace invalid -/// characters -/// @param expected - expected sanitized string -void sanitizeStringTest( - const std::string& original, - const std::string& char_set, - const std::string& char_replacement, - const std::string& expected) { - - StringSanitizerPtr ss; - std::string sanitized; - - try { - ss.reset(new StringSanitizer(char_set, char_replacement)); - } catch (const std::exception& ex) { - ADD_FAILURE() << "Could not construct sanitizer:" << ex.what(); - return; - } - - try { - sanitized = ss->scrub(original); - } catch (const std::exception& ex) { - ADD_FAILURE() << "Could not scrub string:" << ex.what(); - return; - } - - EXPECT_EQ(sanitized, expected); -} - -// Verifies StringSantizer class -TEST(StringUtilTest, stringSanitizer) { - // Bad regular expression should throw. - StringSanitizerPtr ss; - ASSERT_THROW(ss.reset(new StringSanitizer("[bogus-regex","")), BadValue); - - std::string good_data(StringSanitizer::MAX_DATA_SIZE, '0'); - std::string bad_data(StringSanitizer::MAX_DATA_SIZE + 1, '0'); - - ASSERT_NO_THROW(ss.reset(new StringSanitizer(good_data, good_data))); - - ASSERT_THROW(ss.reset(new StringSanitizer(bad_data, "")), BadValue); - ASSERT_THROW(ss.reset(new StringSanitizer("", bad_data)), BadValue); - - // List of invalid chars should work: (b,c,2 are invalid) - sanitizeStringTest("abc.123", "[b-c2]", "*", - "a**.1*3"); - // Inverted list of valid chars should work: (b,c,2 are valid) - sanitizeStringTest("abc.123", "[^b-c2]", "*", - "*bc**2*"); - - // A string of all valid chars should return an identical string. - sanitizeStringTest("-_A--B__Cabc34567_-", "[^A-Ca-c3-7_-]", "x", - "-_A--B__Cabc34567_-"); - - // Replacing with a character should work. - sanitizeStringTest("A[b]c\12JoE3-_x!B$Y#e", "[^A-Za-z0-9_]", "*", - "A*b*c*JoE3*_x*B*Y*e"); - - // Removing (i.e.replacing with an "empty" string) should work. - sanitizeStringTest("A[b]c\12JoE3-_x!B$Y#e", "[^A-Za-z0-9_]", "", - "AbcJoE3_xBYe"); - - // More than one non-matching in a row should work. - sanitizeStringTest("%%A%%B%%C%%", "[^A-Za-z0-9_]", "x", - "xxAxxBxxCxx"); - - // Removing more than one non-matching in a row should work. - sanitizeStringTest("%%A%%B%%C%%", "[^A-Za-z0-9_]", "", - "ABC"); - - // Replacing with a string should work. - sanitizeStringTest("%%A%%B%%C%%", "[^A-Za-z0-9_]", "xyz", - "xyzxyzAxyzxyzBxyzxyzCxyzxyz"); - - // Dots as valid chars work. - sanitizeStringTest("abc.123", "[^A-Za-z0-9_.]", "*", - "abc.123"); - - std::string withNulls("\000ab\000c.12\0003",10); - sanitizeStringTest(withNulls, "[^A-Za-z0-9_.]", "*", - "*ab*c.12*3"); -} - -// Verifies templated buffer iterator seekTrimmed() function -TEST(StringUtilTest, seekTrimmed) { - - // Empty buffer should be fine. - std::vector buffer; - auto begin = buffer.end(); - auto end = buffer.end(); - ASSERT_NO_THROW(end = seekTrimmed(begin, end, 0)); - EXPECT_EQ(0, std::distance(begin, end)); - - // Buffer of only trim values, should be fine. - buffer = { 1, 1 }; - begin = buffer.begin(); - end = buffer.end(); - ASSERT_NO_THROW(end = seekTrimmed(begin, end, 1)); - EXPECT_EQ(0, std::distance(begin, end)); - - // One trailing null should trim off. - buffer = {'o', 'n', 'e', 0 }; - begin = buffer.begin(); - end = buffer.end(); - ASSERT_NO_THROW(end = seekTrimmed(begin, end, 0)); - EXPECT_EQ(3, std::distance(begin, end)); - - // More than one trailing null should trim off. - buffer = { 't', 'h', 'r', 'e', 'e', 0, 0, 0 }; - begin = buffer.begin(); - end = buffer.end(); - ASSERT_NO_THROW(end = seekTrimmed(begin, end, 0)); - EXPECT_EQ(5, std::distance(begin, end)); - - // Embedded null should be left in place. - buffer = { 'e', 'm', 0, 'b', 'e', 'd' }; - begin = buffer.begin(); - end = buffer.end(); - ASSERT_NO_THROW(end = seekTrimmed(begin, end, 0)); - EXPECT_EQ(6, std::distance(begin, end)); - - // Leading null should be left in place. - buffer = { 0, 'l', 'e', 'a', 'd', 'i', 'n', 'g' }; - begin = buffer.begin(); - end = buffer.end(); - ASSERT_NO_THROW(end = seekTrimmed(begin, end, 0)); - EXPECT_EQ(8, std::distance(begin, end)); -} - -// Verifies isPrintable predicate on strings. -TEST(StringUtilTest, stringIsPrintable) { - string content; - - // Empty is printable. - EXPECT_TRUE(isPrintable(content)); - - // Check Abcd. - content = "Abcd"; - EXPECT_TRUE(isPrintable(content)); - - // Add a control character (not printable). - content += "\a"; - EXPECT_FALSE(isPrintable(content)); -} - -// Verifies isPrintable predicate on byte vectors. -TEST(StringUtilTest, vectorIsPrintable) { - vector content; - - // Empty is printable. - EXPECT_TRUE(isPrintable(content)); - - // Check Abcd. - content = { 0x41, 0x62, 0x63, 0x64 }; - EXPECT_TRUE(isPrintable(content)); - - // Add a control character (not printable). - content.push_back('\a'); - EXPECT_FALSE(isPrintable(content)); -} - -} // end of anonymous namespace diff --git a/src/lib/yang/adaptor_host.cc b/src/lib/yang/adaptor_host.cc index 78f427e651..931696f214 100644 --- a/src/lib/yang/adaptor_host.cc +++ b/src/lib/yang/adaptor_host.cc @@ -7,7 +7,7 @@ #include #include -#include +#include #include #include