From: Otto Moerbeek Date: Wed, 24 Jul 2024 08:00:01 +0000 (+0200) Subject: Rework dedup code and add a test for pdsn::dedup X-Git-Tag: dnsdist-2.0.0-alpha1~182^2~6 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=053da7fa199c8b9b34d5b5dbcfae1c9dc9e4b7a9;p=thirdparty%2Fpdns.git Rework dedup code and add a test for pdsn::dedup --- diff --git a/pdns/recursordist/Makefile.am b/pdns/recursordist/Makefile.am index 7f86c65750..92dcd7583a 100644 --- a/pdns/recursordist/Makefile.am +++ b/pdns/recursordist/Makefile.am @@ -338,8 +338,8 @@ testrunner_SOURCES = \ rpzloader.cc rpzloader.hh \ secpoll.cc \ settings/cxxsupport.cc \ - shuffle.cc shuffle.hh \ sholder.hh \ + shuffle.cc shuffle.hh \ sillyrecords.cc \ sortlist.cc sortlist.hh \ sstuff.hh \ @@ -381,6 +381,7 @@ testrunner_SOURCES = \ test-secpoll_cc.cc \ test-settings.cc \ test-sholder_hh.cc \ + test-shuffle_cc.cc \ test-signers.cc \ test-syncres_cc.cc \ test-syncres_cc.hh \ diff --git a/pdns/recursordist/meson.build b/pdns/recursordist/meson.build index b443795567..14c5be3215 100644 --- a/pdns/recursordist/meson.build +++ b/pdns/recursordist/meson.build @@ -472,6 +472,7 @@ test_sources += files( src_dir / 'test-rpzloader_cc.cc', src_dir / 'test-secpoll_cc.cc', src_dir / 'test-settings.cc', + src_dir / 'test-shuffle_cc.cc', src_dir / 'test-signers.cc', src_dir / 'test-syncres_cc.cc', src_dir / 'test-syncres_cc.hh', diff --git a/pdns/recursordist/syncres.cc b/pdns/recursordist/syncres.cc index cb4f9a6ab0..2681379f20 100644 --- a/pdns/recursordist/syncres.cc +++ b/pdns/recursordist/syncres.cc @@ -4446,7 +4446,9 @@ void SyncRes::sanitizeRecordsPass2(const std::string& prefix, LWResult& lwr, con } lwr.d_records = std::move(vec); } - pdns::dedup(lwr.d_records); + if (auto count = pdns::dedup(lwr.d_records); count > 0) { + LOG(prefix << qname << ": Removed " << count << " duplicate records from response received from " << auth << endl); + } } void SyncRes::rememberParentSetIfNeeded(const DNSName& domain, const vector& newRecords, unsigned int depth, const string& prefix) diff --git a/pdns/recursordist/test-shuffle_cc.cc b/pdns/recursordist/test-shuffle_cc.cc new file mode 100644 index 0000000000..32eebd3b60 --- /dev/null +++ b/pdns/recursordist/test-shuffle_cc.cc @@ -0,0 +1,57 @@ +/* + * This file is part of PowerDNS or dnsdist. + * Copyright -- PowerDNS.COM B.V. and its contributors + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * In addition, for the avoidance of any doubt, permission is granted to + * link this program with OpenSSL and to (re)distribute the binaries + * produced as the result of such linking. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#ifndef BOOST_TEST_DYN_LINK +#define BOOST_TEST_DYN_LINK +#endif + +#define BOOST_TEST_NO_MAIN + +#include "config.h" +#include + +#include "shuffle.hh" +#include "test-common.hh" + +BOOST_AUTO_TEST_SUITE(shuffle_cc) + +BOOST_AUTO_TEST_CASE(test_simple) +{ + std::vector list; + auto* address = &list; + addRecordToList(list, DNSName("foo"), QType::A, "1.2.3.4"); + addRecordToList(list, DNSName("foo2"), QType::A, "1.2.3.4"); + auto dups = pdns::dedup(list); + BOOST_CHECK_EQUAL(dups, 0U); + BOOST_CHECK_EQUAL(list.size(), 2U); + addRecordToList(list, DNSName("foo"), QType::A, "1.2.3.4"); + dups = pdns::dedup(list); + BOOST_CHECK_EQUAL(dups, 1U); + BOOST_CHECK_EQUAL(list.size(), 2U); + addRecordToList(list, DNSName("Foo"), QType::A, "1.2.3.4"); + dups = pdns::dedup(list); + BOOST_CHECK_EQUAL(dups, 1U); + BOOST_CHECK_EQUAL(list.size(), 2U); + BOOST_CHECK_EQUAL(address, &list); +} + +BOOST_AUTO_TEST_SUITE_END() diff --git a/pdns/recursordist/test-syncres_cc.cc b/pdns/recursordist/test-syncres_cc.cc index 3e4644bce4..a75de8db83 100644 --- a/pdns/recursordist/test-syncres_cc.cc +++ b/pdns/recursordist/test-syncres_cc.cc @@ -367,7 +367,7 @@ bool addRRSIG(const testkeysset_t& keys, std::vector& records, const if (auto* bval = std::get_if(&broken); bval != nullptr && *bval) { rrc.d_signature[0] ^= 42; } - else if (auto *ival = std::get_if(&broken)) { + else if (auto* ival = std::get_if(&broken)) { rrc.d_signature[0] ^= *ival; // NOLINT(*-narrowing-conversions) } diff --git a/pdns/shuffle.cc b/pdns/shuffle.cc index 43417426fb..32796fe02e 100644 --- a/pdns/shuffle.cc +++ b/pdns/shuffle.cc @@ -141,45 +141,46 @@ void pdns::orderAndShuffle(vector& rrs, bool includingAdditionals) shuffle(rrs, includingAdditionals); } -void pdns::dedup(vector& rrs) +unsigned int pdns::dedup(vector& rrs) { // This functino tries to avoid unneccesary work // First a vector with zero or one element does not need dedupping if (rrs.size() <= 1) { - return; + return 0; } // If we have a larger vector, first check if we actually have duplicates. // We assume the most common case is: no - std::unordered_set seen; - std::vector unique(rrs.size(), false); + std::set> seen; + std::vector dups(rrs.size(), false); unsigned int counter = 0; - unsigned int numUnique = 0; + unsigned int numDups = 0; for (const auto& rec : rrs) { - // This ignores class, ttl and place - if (seen.emplace(rec.getContent()->serialize(rec.d_name, true, true)).second) { - unique[counter] = true; - numUnique++; + // This ignores class, ttl and place by using constants for those + if (!seen.emplace(rec.d_name.makeLowerCase(), rec.d_type, rec.getContent()->serialize(rec.d_name, true, false)).second) { + dups[counter] = true; + numDups++; } ++counter; } - if (numUnique == rrs.size()) { + if (numDups == 0) { // Original is fine as-is. - return; + return 0; } // We avoid calling erase, as it calls a lot of move constructors. This can hurt, especially if // you call it on a large vector muliple times. // So we just take the elements that are unique std::vector ret; - ret.reserve(numUnique); + ret.reserve(rrs.size() - numDups); for (counter = 0; counter < rrs.size(); ++counter) { - if (unique[counter]) { + if (!dups[counter]) { ret.emplace_back(std::move(rrs[counter])); } } rrs = std::move(ret); + return numDups; } diff --git a/pdns/shuffle.hh b/pdns/shuffle.hh index f21ec73f4d..6e9b7525b3 100644 --- a/pdns/shuffle.hh +++ b/pdns/shuffle.hh @@ -29,5 +29,5 @@ namespace pdns { void shuffle(std::vector& rrs); void orderAndShuffle(std::vector& rrs, bool includingAdditionals); -void dedup(std::vector& rrs); +unsigned int dedup(std::vector& rrs); }