libkea_util_la_SOURCES += encode/base_n.cc encode/hex.h
libkea_util_la_SOURCES += encode/binary_from_base32hex.h
libkea_util_la_SOURCES += encode/binary_from_base16.h
+libkea_util_la_SOURCES += encode/utf8.cc encode/utf8.h
libkea_util_la_SOURCES += random/qid_gen.h random/qid_gen.cc
libkea_util_la_SOURCES += random/random_number_generator.h
encode/base64.h \
encode/binary_from_base16.h \
encode/binary_from_base32hex.h \
- encode/hex.h
+ encode/hex.h \
+ encode/utf8.h
libkea_util_io_includedir = $(pkgincludedir)/util/io
libkea_util_io_include_HEADERS = \
--- /dev/null
+// Copyright (C) 2020 Internet Systems Consortium, Inc. ("ISC")
+//
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#include <util/encode/utf8.h>
+
+namespace isc {
+namespace util {
+namespace encode {
+
+std::vector<uint8_t> encodeUtf8(const std::string& value) {
+ std::vector<uint8_t> result;
+ if (value.empty()) {
+ return (result);
+ }
+ const uint8_t* start = reinterpret_cast<const uint8_t*>(value.c_str());
+ std::vector<uint8_t> binary(start, start + value.size());
+ for (uint8_t ch : binary) {
+ if (ch < 0x80) {
+ result.push_back(ch);
+ } else {
+ result.push_back(0xc0 | (ch >> 6));
+ result.push_back(0x80 | (ch & 0x3f));
+ }
+ }
+ return (result);
+}
+
+} // namespace encode
+} // namespace util
+} // namespace isc
--- /dev/null
+// Copyright (C) 2020 Internet Systems Consortium, Inc. ("ISC")
+//
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef UTF8_H
+#define UTF8_H 1
+
+#include <stdint.h>
+#include <string>
+#include <vector>
+
+namespace isc {
+namespace util {
+namespace encode {
+/// @brief Encode value string into UTF-8.
+///
+/// @param value A string in latin1 i.e. no encoding.
+/// @return A vector object storing the data encoded in UTF-8.
+std::vector<uint8_t> encodeUtf8(const std::string& value);
+
+} // namespace encode
+} // namespace util
+} // namespace isc
+
+#endif // UTF8_H
run_unittests_SOURCES += signal_set_unittest.cc
run_unittests_SOURCES += stopwatch_unittest.cc
run_unittests_SOURCES += unlock_guard_unittests.cc
+run_unittests_SOURCES += utf8_unittest.cc
run_unittests_SOURCES += versioned_csv_file_unittest.cc
run_unittests_SOURCES += watch_socket_unittests.cc
run_unittests_SOURCES += watched_thread_unittest.cc
-
run_unittests_CPPFLAGS = $(AM_CPPFLAGS) $(GTEST_INCLUDES)
run_unittests_LDFLAGS = $(AM_LDFLAGS) $(GTEST_LDFLAGS)
--- /dev/null
+// Copyright (C) 2020 Internet Systems Consortium, Inc. ("ISC")
+//
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#include <config.h>
+
+#include <util/encode/utf8.h>
+
+#include <gtest/gtest.h>
+
+using namespace isc::util;
+using namespace isc::util::encode;
+using namespace std;
+
+namespace {
+
+// Verify it does nothing for ASCII.
+TEST(Utf8Test, foobar) {
+ string str("foobar");
+ vector<uint8_t> vec8 = encodeUtf8(str);
+ ASSERT_FALSE(vec8.empty());
+ const char* start = reinterpret_cast<const char*>(&vec8[0]);
+ string str8(start, start + vec8.size());
+ EXPECT_EQ(str, str8);
+}
+
+// Verify it encodes not ASCII as expected.
+TEST(Utf8Test, eightc) {
+ string str("-\x8c-");
+ vector<uint8_t> vec8 = encodeUtf8(str);
+ ASSERT_FALSE(vec8.empty());
+ const char* start = reinterpret_cast<const char*>(&vec8[0]);
+ string str8(start, start + vec8.size());
+ string expected("-\xc2\x8c-");
+ EXPECT_EQ(expected, str8);
+}
+
+// Verify it handles correctly control characters.
+TEST(Utf8Test, control) {
+ string str("fo\x00\n\bar");
+ vector<uint8_t> vec8 = encodeUtf8(str);
+ ASSERT_FALSE(vec8.empty());
+ const char* start = reinterpret_cast<const char*>(&vec8[0]);
+ string str8(start, start + vec8.size());
+ EXPECT_EQ(str, str8);
+}
+
+}