From: Thomas Markwalder Date: Mon, 24 Feb 2020 20:21:52 +0000 (-0500) Subject: [#608] Memfile now tolerates commas in hostname and user-context columns X-Git-Tag: Kea-1.7.6~91 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=fd41bc2dc0a19a93a37949d7184de963269e5b4e;p=thirdparty%2Fkea.git [#608] Memfile now tolerates commas in hostname and user-context columns Commas in hostname and user-context columns are now escaped as "," when written out, and unescaped when read back in. src/lib/util/csv_file.* CSVRow::escapeCharacters() CSVRow::unescapeCharacters() CSVRow::readAtEscaped() CSVRow::writeAtEscaped() - new functions CSVRow::parse() - replaced boost::split() faster, simplified parsing src/lib/util/tests/csv_file_unittest.cc TEST(CSVRowTest, escapeUnescape) - new test updated other tests src/lib/dhcpsrv/csv_lease_file4.cc CSVLeaseFile4 now uses CSVRow::writeAtEscaped() and CSVRow::readAtEscaped() for hostname and user-context columns src/lib/dhcpsrv/csv_lease_file6.cc CSVLeaseFile6 now uses CSVRow::writeAtEscaped() and CSVRow::readAtEscaped() for hostname and user-context columns src/lib/dhcpsrv/tests/csv_lease_file4_unittest.cc TEST_F(CSVLeaseFile4Test, embeddedCommas) - new test src/lib/dhcpsrv/tests/csv_lease_file6_unittest.cc TEST_F(CSVLeaseFile6Test, embeddedCommas) - new test --- diff --git a/src/lib/dhcpsrv/csv_lease_file4.cc b/src/lib/dhcpsrv/csv_lease_file4.cc index fc3cf1886a..2946e810da 100644 --- a/src/lib/dhcpsrv/csv_lease_file4.cc +++ b/src/lib/dhcpsrv/csv_lease_file4.cc @@ -64,11 +64,11 @@ CSVLeaseFile4::append(const Lease4& lease) { row.writeAt(getColumnIndex("subnet_id"), lease.subnet_id_); row.writeAt(getColumnIndex("fqdn_fwd"), lease.fqdn_fwd_); row.writeAt(getColumnIndex("fqdn_rev"), lease.fqdn_rev_); - row.writeAt(getColumnIndex("hostname"), lease.hostname_); + row.writeAtEscaped(getColumnIndex("hostname"), lease.hostname_); row.writeAt(getColumnIndex("state"), lease.state_); // User context is optional. if (lease.getContext()) { - row.writeAt(getColumnIndex("user_context"), lease.getContext()->str()); + row.writeAtEscaped(getColumnIndex("user_context"), lease.getContext()->str()); } try { @@ -239,7 +239,7 @@ CSVLeaseFile4::readFqdnRev(const CSVRow& row) { std::string CSVLeaseFile4::readHostname(const CSVRow& row) { - std::string hostname = row.readAt(getColumnIndex("hostname")); + std::string hostname = row.readAtEscaped(getColumnIndex("hostname")); return (hostname); } @@ -251,7 +251,7 @@ CSVLeaseFile4::readState(const util::CSVRow& row) { ConstElementPtr CSVLeaseFile4::readContext(const util::CSVRow& row) { - std::string user_context = row.readAt(getColumnIndex("user_context")); + std::string user_context = row.readAtEscaped(getColumnIndex("user_context")); if (user_context.empty()) { return (ConstElementPtr()); } diff --git a/src/lib/dhcpsrv/csv_lease_file6.cc b/src/lib/dhcpsrv/csv_lease_file6.cc index c73b7d9aa6..f3eeacf3c2 100644 --- a/src/lib/dhcpsrv/csv_lease_file6.cc +++ b/src/lib/dhcpsrv/csv_lease_file6.cc @@ -55,7 +55,7 @@ CSVLeaseFile6::append(const Lease6& lease) { static_cast(lease.prefixlen_)); row.writeAt(getColumnIndex("fqdn_fwd"), lease.fqdn_fwd_); row.writeAt(getColumnIndex("fqdn_rev"), lease.fqdn_rev_); - row.writeAt(getColumnIndex("hostname"), lease.hostname_); + row.writeAtEscaped(getColumnIndex("hostname"), lease.hostname_); if (lease.hwaddr_) { // We may not have hardware information row.writeAt(getColumnIndex("hwaddr"), lease.hwaddr_->toText(false)); @@ -63,7 +63,7 @@ CSVLeaseFile6::append(const Lease6& lease) { row.writeAt(getColumnIndex("state"), lease.state_); // User context is optional. if (lease.getContext()) { - row.writeAt(getColumnIndex("user_context"), lease.getContext()->str()); + row.writeAtEscaped(getColumnIndex("user_context"), lease.getContext()->str()); } try { VersionedCSVFile::append(row); @@ -227,7 +227,7 @@ CSVLeaseFile6::readFqdnRev(const CSVRow& row) { std::string CSVLeaseFile6::readHostname(const CSVRow& row) { - std::string hostname = row.readAt(getColumnIndex("hostname")); + std::string hostname = row.readAtEscaped(getColumnIndex("hostname")); return (hostname); } @@ -264,7 +264,7 @@ CSVLeaseFile6::readState(const util::CSVRow& row) { ConstElementPtr CSVLeaseFile6::readContext(const util::CSVRow& row) { - std::string user_context = row.readAt(getColumnIndex("user_context")); + std::string user_context = row.readAtEscaped(getColumnIndex("user_context")); if (user_context.empty()) { return (ConstElementPtr()); } diff --git a/src/lib/dhcpsrv/tests/csv_lease_file4_unittest.cc b/src/lib/dhcpsrv/tests/csv_lease_file4_unittest.cc index 9346a0b775..57fef44090 100644 --- a/src/lib/dhcpsrv/tests/csv_lease_file4_unittest.cc +++ b/src/lib/dhcpsrv/tests/csv_lease_file4_unittest.cc @@ -1,4 +1,4 @@ -// Copyright (C) 2014-2019 Internet Systems Consortium, Inc. ("ISC") +// Copyright (C) 2014-2020 Internet Systems Consortium, Inc. ("ISC") // // This Source Code Form is subject to the terms of the Mozilla Public // License, v. 2.0. If a copy of the MPL was not distributed with this @@ -557,6 +557,48 @@ TEST_F(CSVLeaseFile4Test, emptyHWAddrDefaultStateOnly) { EXPECT_EQ(lease->cltt_, lease_read->cltt_); } +// Verifies that it is possible to write and read a lease with commas +// in hostname and user context. +TEST_F(CSVLeaseFile4Test, embeddedCommas) { + CSVLeaseFile4 lf(filename_); + ASSERT_NO_THROW(lf.recreate()); + ASSERT_TRUE(io_.exists()); + + std::string hostname("host,example,com"); + std::string context_str("{ \"bar\": true, \"foo\": false, \"x\": \"factor\" }"); + + // Create a lease with commas in the hostname. + Lease4Ptr lease(new Lease4(IOAddress("192.0.3.2"), + hwaddr0_, + NULL, 0, + 0xFFFFFFFF, time(0), + 8, true, true, + hostname)); + + // Add the user context with commas. + lease->setContext(Element::fromJSON(context_str)); + + // Write this lease out to the lease file. + ASSERT_NO_THROW(lf.append(*lease)); + + // Close the lease file. + lf.close(); + + Lease4Ptr lease_read; + + // Re-open the file for reading. + ASSERT_NO_THROW(lf.open()); + + // Read the lease and make sure it is successful. + EXPECT_TRUE(lf.next(lease_read)); + ASSERT_TRUE(lease_read); + + // Expect the hostname and user context to retain the commas + // they started with. + EXPECT_EQ(hostname, lease->hostname_); + EXPECT_EQ(context_str, lease->getContext()->str()); +} + /// @todo Currently we don't check invalid lease attributes, such as invalid /// lease type, invalid preferred lifetime vs valid lifetime etc. The Lease6 /// should be extended with the function that validates lease attributes. Once diff --git a/src/lib/dhcpsrv/tests/csv_lease_file6_unittest.cc b/src/lib/dhcpsrv/tests/csv_lease_file6_unittest.cc index ffe7b79675..66220ba9b9 100644 --- a/src/lib/dhcpsrv/tests/csv_lease_file6_unittest.cc +++ b/src/lib/dhcpsrv/tests/csv_lease_file6_unittest.cc @@ -1,4 +1,4 @@ -// Copyright (C) 2014-2019 Internet Systems Consortium, Inc. ("ISC") +// Copyright (C) 2014-2020 Internet Systems Consortium, Inc. ("ISC") // // This Source Code Form is subject to the terms of the Mozilla Public // License, v. 2.0. If a copy of the MPL was not distributed with this @@ -618,6 +618,48 @@ TEST_F(CSVLeaseFile6Test, highLeaseLifetime) { EXPECT_EQ(lease->cltt_, lease_read->cltt_); } +// Verifies that it is possible to write and read a lease with commas +// in hostname and user context. +TEST_F(CSVLeaseFile6Test, embeddedCommas) { + CSVLeaseFile6 lf(filename_); + ASSERT_NO_THROW(lf.recreate()); + ASSERT_TRUE(io_.exists()); + + std::string hostname("host,example,com"); + std::string context_str("{ \"bar\": true, \"foo\": false, \"x\": \"factor\" }"); + + // Create a lease with commas in the hostname. + Lease6Ptr lease(new Lease6(Lease::TYPE_NA, IOAddress("2001:db8:1::1"), + makeDUID(DUID0, sizeof(DUID0)), + 7, 100, 0xFFFFFFFF, 8, true, true, + hostname)); + + // Add the user context with commas. + lease->setContext(Element::fromJSON(context_str)); + + // Write this lease out to the lease file. + ASSERT_NO_THROW(lf.append(*lease)); + + // Close the lease file. + lf.close(); + + Lease6Ptr lease_read; + + // Re-open the file for reading. + ASSERT_NO_THROW(lf.open()); + + // Read the lease and make sure it is successful. + EXPECT_TRUE(lf.next(lease_read)); + ASSERT_TRUE(lease_read); + + // Expect the hostname and user context to retain the commas + // they started with. + EXPECT_EQ(hostname, lease->hostname_); + EXPECT_EQ(context_str, lease->getContext()->str()); +} + + + /// @todo Currently we don't check invalid lease attributes, such as invalid /// lease type, invalid preferred lifetime vs valid lifetime etc. The Lease6 /// should be extended with the function that validates lease attributes. Once diff --git a/src/lib/util/csv_file.cc b/src/lib/util/csv_file.cc index 023b6fdb76..b6fd591316 100644 --- a/src/lib/util/csv_file.cc +++ b/src/lib/util/csv_file.cc @@ -1,18 +1,17 @@ -// Copyright (C) 2014-2016 Internet Systems Consortium, Inc. ("ISC") +// Copyright (C) 2014-2020 Internet Systems Consortium, Inc. ("ISC") // // This Source Code Form is subject to the terms of the Mozilla Public // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at http://mozilla.org/MPL/2.0/. #include - #include -#include -#include -#include + #include +#include #include #include +#include namespace isc { namespace util { @@ -29,10 +28,32 @@ CSVRow::CSVRow(const std::string& text, const char separator) void CSVRow::parse(const std::string& line) { - // Tokenize the string using a specified separator. Disable compression, - // so as the two consecutive separators mark an empty value. - boost::split(values_, line, boost::is_any_of(separator_), - boost::algorithm::token_compress_off); + size_t sep_pos = 0; + size_t prev_pos = 0; + size_t len = 0; + + // In case someone is reusing the row. + values_.clear(); + + // Iterate over line, splitting on separators. + while (prev_pos < line.size()) { + // Find the next separator. + sep_pos = line.find_first_of(separator_, prev_pos); + if (sep_pos == std::string::npos) { + break; + } + + // Extract the value for the previous column. + len = sep_pos - prev_pos; + values_.push_back(line.substr(prev_pos, len)); + + // Move past the separator. + prev_pos = sep_pos + 1; + }; + + // Extract the last column. + len = line.size() - prev_pos; + values_.push_back(line.substr(prev_pos, len)); } std::string @@ -41,6 +62,11 @@ CSVRow::readAt(const size_t at) const { return (values_[at]); } +std::string +CSVRow::readAtEscaped(const size_t at) const { + return (unescapeCharacters(readAt(at))); +} + std::string CSVRow::render() const { std::ostringstream s; @@ -60,6 +86,11 @@ CSVRow::writeAt(const size_t at, const char* value) { values_[at] = value; } +void +CSVRow::writeAtEscaped(const size_t at, const std::string& value) { + writeAt(at, escapeCharacters(value, separator_)); +} + void CSVRow::trim(const size_t count) { checkIndex(count); @@ -392,5 +423,124 @@ CSVFile::validateHeader(const CSVRow& header) { return (true); } +const std::string CSVRow::escape_tag("&#x"); + +std::string +CSVRow::escapeCharacters(const std::string& orig_str, const std::string& characters) { + size_t char_pos = 0; + size_t prev_pos = 0; + + // Check for a first occurance. If none, just return a + // copy of the original. + char_pos = orig_str.find_first_of(characters, prev_pos); + if (char_pos == std::string::npos) { + return(orig_str); + } + + std::stringstream ss; + while (char_pos < orig_str.size()) { + // Copy everything upto the charcater to escape. + ss << orig_str.substr(prev_pos, char_pos - prev_pos); + + // Copy the escape tag followed by the hex digits of the character. + ss << escape_tag << std::hex << std::setw(2) + << (uint16_t)(orig_str[char_pos]); + + ++char_pos; + prev_pos = char_pos; + + // Find the next character to escape. + char_pos = orig_str.find_first_of(characters, prev_pos); + + // If no more, copy the remainder of the string. + if (char_pos == std::string::npos) { + ss << orig_str.substr(prev_pos, char_pos - prev_pos); + break; + } + + }; + + // Return the escaped string. + return(ss.str()); +} + +std::string +CSVRow::unescapeCharacters(const std::string& escaped_str) { + size_t esc_pos = 0; + size_t start_pos = 0; + + // Look for the escape tag. + esc_pos = escaped_str.find(escape_tag, start_pos); + if (esc_pos == std::string::npos) { + // No escape tags at all, we're done. + return(escaped_str); + } + + // We have at least one escape tag. + std::stringstream ss; + while (esc_pos < escaped_str.size()) { + // Save everything up to the tag. + ss << escaped_str.substr(start_pos, esc_pos - start_pos); + + // Now we need to see if we have valid hex digits + // following the tag. + unsigned int escaped_char = 0; + bool converted = true; + size_t dig_pos = esc_pos + escape_tag.size(); + if (dig_pos <= escaped_str.size() - 2) { + for (int i = 0; i < 2; ++i) { + uint8_t digit = escaped_str[dig_pos]; + + if (digit >= 'a' && digit <= 'f') { + digit = (digit - 'a' + 10); + } else if (digit >= 'A' && digit <= 'F') { + digit = (digit - 'A' + 10); + } else if (digit >= '0' && digit <= '9') { + digit -= '0'; + } else { + converted = false; + break; + } + + if (i == 0) { + escaped_char = (digit << 4); + } else { + escaped_char |= digit; + } + + ++dig_pos; + } + } + + // If we converted an escaped character, add it. + if (converted) { + ss << static_cast(escaped_char); + esc_pos = dig_pos; + } else { + // Apparently the escape_tag was not followed by two valid hex + // digits. We'll assume it just happens to be in the string, so + // we'll include it in the output. + ss << escape_tag; + esc_pos += escape_tag.size(); + } + + // Set the new start of search. + start_pos = esc_pos; + + // Look for the next escape tag. + esc_pos = escaped_str.find(escape_tag, start_pos); + + // If we're at the end we're done. + if (esc_pos == std::string::npos) { + // Make sure we grab the remnant. + ss << escaped_str.substr(start_pos, esc_pos - start_pos); + break; + } + }; + + return(ss.str()); +} + + } // end of isc::util namespace } // end of isc namespace diff --git a/src/lib/util/csv_file.h b/src/lib/util/csv_file.h index 8f4422974e..1ffdc369f9 100644 --- a/src/lib/util/csv_file.h +++ b/src/lib/util/csv_file.h @@ -1,4 +1,4 @@ -// Copyright (C) 2014-2017 Internet Systems Consortium, Inc. ("ISC") +// Copyright (C) 2014-2020 Internet Systems Consortium, Inc. ("ISC") // // This Source Code Form is subject to the terms of the Mozilla Public // License, v. 2.0. If a copy of the MPL was not distributed with this @@ -109,6 +109,27 @@ public: /// @c CSVRow::getValuesCount. std::string readAt(const size_t at) const; + /// @brief Retrieves a value from the internal container, free of escaped + /// characters. + /// + /// Returns a copy of the internal container value at the given index + /// which has had all escaped characters replaced with their unesacped + /// values. Escaped characters embedded using the following format: + /// + /// This function fetches the value at the given index and passes it + /// into CSVRow::unesacpeCharacters which replaces any escaped special + /// characters with their unescaped form. + /// + /// @param at Index of the value in the container. The values are indexed + /// from 0, where 0 corresponds to the left-most value in the CSV file row. + /// + /// @return Value at specified index in the text form. + /// + /// @throw CSVFileError if the index is out of range. The number of elements + /// being held by the container can be obtained using + /// @c CSVRow::getValuesCount. + std::string readAtEscaped(const size_t at) const; + /// @brief Trims a given number of elements from the end of a row /// /// @param count number of elements to trim @@ -178,6 +199,19 @@ public: writeAt(at, value.c_str()); } + /// @brief Replaces the value at specified index with a value that has + /// had special characters escaped + /// + /// This function first calls @c CSVRow::esacpeCharacters to replace + /// special characters with their escaped form. It then sets the value + /// to be rendered using @c CSVRow::render function. + /// + /// @param at Index of the value to be replaced. + /// @param value Value to be written given as string. + /// + /// @throw CSVFileError if index is out of range. + void writeAtEscaped(const size_t at, const std::string& value); + /// @brief Appends the value as a new column. /// /// @param value Value to be written. @@ -234,6 +268,33 @@ public: return (render() != other.render()); } + /// @brief Returns a copy of a string with special characters escaped + /// + /// @param orig_str string which may contain characters that require + /// escaping. + /// @param characters list of characters which require escaping. + /// + /// The escaped characters will use the followin format: + /// + /// &#x{xx} + /// + /// where {xx} is the two digit hexadecimal ASCII value of the character + /// escaped. A comma, for example is: , + /// + /// @return A copy of the original string with special characters escaped. + static std::string escapeCharacters(const std::string& orig_str, + const std::string& characters); + + /// @brief Returns a copy of a string with special characters unescaped + /// + /// This function reverses the escaping of characters done by @c + /// CSVRow::escapeCharacters. + /// + /// @param escaped_str string which may contain escaped characters. + /// + /// @return A string free of escaped characters + static std::string unescapeCharacters(const std::string& escaped_str); + private: /// @brief Check if the specified index of the value is in range. @@ -254,6 +315,9 @@ private: /// @brief Internal container holding values that belong to the row. std::vector values_; + + /// @brief Prefix used to escape special characters. + static const std::string escape_tag; }; /// @brief Overrides standard output stream operator for @c CSVRow object. diff --git a/src/lib/util/tests/csv_file_unittest.cc b/src/lib/util/tests/csv_file_unittest.cc index e0a70ea36f..be371d0aef 100644 --- a/src/lib/util/tests/csv_file_unittest.cc +++ b/src/lib/util/tests/csv_file_unittest.cc @@ -1,4 +1,4 @@ -// Copyright (C) 2014-2017 Internet Systems Consortium, Inc. ("ISC") +// Copyright (C) 2014-2020 Internet Systems Consortium, Inc. ("ISC") // // This Source Code Form is subject to the terms of the Mozilla Public // License, v. 2.0. If a copy of the MPL was not distributed with this @@ -16,6 +16,18 @@ namespace { using namespace isc::util; +// This test exercizes escaping and unescaping of characters. +TEST(CSVRowTest, escapeUnescape) { + std::string orig(",FO^O\\,B?,AR,"); + + // We'll escape commas, question marks, and carets. + std::string escaped = CSVRow::escapeCharacters(orig, ",?^"); + EXPECT_EQ ("ˏO^O\\ˋ?ˊR,", escaped); + + std::string unescaped = CSVRow::unescapeCharacters(escaped); + EXPECT_EQ (orig, unescaped); +} + // This test checks that the single data row is parsed. TEST(CSVRow, parse) { CSVRow row0("foo,bar,foo-bar"); @@ -30,6 +42,13 @@ TEST(CSVRow, parse) { EXPECT_TRUE(row0.readAt(1).empty()); EXPECT_EQ("foo-bar", row0.readAt(2)); + row0.parse("bar,foo,-bar"); + ASSERT_EQ(2, row0.getValuesCount()); + EXPECT_EQ("bar", row0.readAt(0)); + // Read the second column as-is and escaped + EXPECT_EQ("foo,-bar", row0.readAt(1)); + EXPECT_EQ("foo,-bar", row0.readAtEscaped(1)); + CSVRow row1("foo-bar|foo|bar|", '|'); ASSERT_EQ(4, row1.getValuesCount()); EXPECT_EQ("foo-bar", row1.readAt(0)); @@ -42,6 +61,25 @@ TEST(CSVRow, parse) { EXPECT_TRUE(row1.readAt(0).empty()); } +// Verifies that empty columns are handled correctly. +TEST(CSVRow, emptyColumns) { + // Should get four columns, all blank except column the second one. + CSVRow row(",one,,"); + ASSERT_EQ(4, row.getValuesCount()); + EXPECT_EQ("", row.readAt(0)); + EXPECT_EQ("one", row.readAt(1)); + EXPECT_EQ("", row.readAt(2)); + EXPECT_EQ("", row.readAt(3)); +} + +// Verifies that empty columns are handled correctly. +TEST(CSVRow, oneColumn) { + // Should get one column + CSVRow row("zero"); + ASSERT_EQ(1, row.getValuesCount()); + EXPECT_EQ("zero", row.readAt(0)); +} + // This test checks that the text representation of the CSV row // is created correctly. TEST(CSVRow, render) { @@ -69,17 +107,21 @@ TEST(CSVRow, render) { // This test checks that the data values can be set for the CSV row. TEST(CSVRow, writeAt) { - CSVRow row(3); + CSVRow row(4); row.writeAt(0, 10); row.writeAt(1, "foo"); row.writeAt(2, "bar"); + row.writeAtEscaped(3, "bar,one,two"); EXPECT_EQ("10", row.readAt(0)); EXPECT_EQ("foo", row.readAt(1)); EXPECT_EQ("bar", row.readAt(2)); + // Read third column as-is and unescaped + EXPECT_EQ("bar,one,two", row.readAt(3)); + EXPECT_EQ("bar,one,two", row.readAtEscaped(3)); - EXPECT_THROW(row.writeAt(3, 20), CSVFileError); - EXPECT_THROW(row.writeAt(3, "foo"), CSVFileError); + EXPECT_THROW(row.writeAt(4, 20), CSVFileError); + EXPECT_THROW(row.writeAt(4, "foo"), CSVFileError); } // Checks whether writeAt() and append() can be mixed together. @@ -129,7 +171,6 @@ TEST(CSVRow, trim) { EXPECT_EQ("one", row.readAt(1)); } - /// @brief Test fixture class for testing operations on CSV file. /// /// It implements basic operations on files, such as reading writing @@ -523,5 +564,4 @@ TEST_F(CSVFileTest, exists) { EXPECT_FALSE(csv->exists()); } - } // end of anonymous namespace