From: Otto Moerbeek Date: Tue, 20 Aug 2024 11:50:15 +0000 (+0200) Subject: Faster dedup, not using zoneRepresentation but wire format, which allows for X-Git-Tag: dnsdist-2.0.0-alpha1~182^2~5 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=75258d5860b10f1c8e28ffb45cf8b9ecfbee5c00;p=thirdparty%2Fpdns.git Faster dedup, not using zoneRepresentation but wire format, which allows for an unordered_set as well. --- diff --git a/pdns/dnsparser.hh b/pdns/dnsparser.hh index 21ad0205c1..5d9889e20d 100644 --- a/pdns/dnsparser.hh +++ b/pdns/dnsparser.hh @@ -220,6 +220,26 @@ public: return record; } + [[nodiscard]] string wireFormatContent(const DNSName& qname, bool canonic = false, bool lowerCase = false) const + { + vector packet; + DNSPacketWriter packetWriter(packet, g_rootdnsname, QType::A); + + if (canonic) { + packetWriter.setCanonic(true); + } + if (lowerCase) { + packetWriter.setLowercase(true); + } + + packetWriter.startRecord(qname, getType()); + toPacket(packetWriter); + + string record; + packetWriter.getContentWireFormat(record); // needs to be called before commit() + return record; + } + virtual bool operator==(const DNSRecordContent& rhs) const { return typeid(*this)==typeid(rhs) && this->getZoneRepresentation() == rhs.getZoneRepresentation(); diff --git a/pdns/dnswriter.cc b/pdns/dnswriter.cc index 67ed5c57d1..1cdc5776be 100644 --- a/pdns/dnswriter.cc +++ b/pdns/dnswriter.cc @@ -457,6 +457,12 @@ template void GenericDNSPacketWriter::getRecordP records.assign(d_content.begin() + d_sor, d_content.end()); } +// call __before commit__ +template void GenericDNSPacketWriter::getContentWireFormat(string& records) +{ + records.assign(d_content.begin() + d_rollbackmarker, d_content.end()); +} + template uint32_t GenericDNSPacketWriter::size() const { return d_content.size(); diff --git a/pdns/dnswriter.hh b/pdns/dnswriter.hh index 4d6d286182..c6ed4b3f04 100644 --- a/pdns/dnswriter.hh +++ b/pdns/dnswriter.hh @@ -138,6 +138,7 @@ public: dnsheader* getHeader(); void getRecordPayload(string& records); // call __before commit__ + void getContentWireFormat(string& records); // call __before commit__ void setCanonic(bool val) { diff --git a/pdns/recursordist/test-shuffle_cc.cc b/pdns/recursordist/test-shuffle_cc.cc index 32eebd3b60..469cc2820f 100644 --- a/pdns/recursordist/test-shuffle_cc.cc +++ b/pdns/recursordist/test-shuffle_cc.cc @@ -48,8 +48,9 @@ BOOST_AUTO_TEST_CASE(test_simple) BOOST_CHECK_EQUAL(dups, 1U); BOOST_CHECK_EQUAL(list.size(), 2U); addRecordToList(list, DNSName("Foo"), QType::A, "1.2.3.4"); + addRecordToList(list, DNSName("FoO"), QType::A, "1.2.3.4", DNSResourceRecord::ADDITIONAL, 999); dups = pdns::dedup(list); - BOOST_CHECK_EQUAL(dups, 1U); + BOOST_CHECK_EQUAL(dups, 2U); BOOST_CHECK_EQUAL(list.size(), 2U); BOOST_CHECK_EQUAL(address, &list); } diff --git a/pdns/shuffle.cc b/pdns/shuffle.cc index 32796fe02e..1fbc004cf9 100644 --- a/pdns/shuffle.cc +++ b/pdns/shuffle.cc @@ -143,7 +143,7 @@ void pdns::orderAndShuffle(vector& rrs, bool includingAdditionals) unsigned int pdns::dedup(vector& rrs) { - // This functino tries to avoid unneccesary work + // This function tries to avoid unneccesary work // First a vector with zero or one element does not need dedupping if (rrs.size() <= 1) { return 0; @@ -151,15 +151,16 @@ unsigned int pdns::dedup(vector& rrs) // If we have a larger vector, first check if we actually have duplicates. // We assume the most common case is: no - std::set> seen; + std::unordered_set seen; std::vector dups(rrs.size(), false); unsigned int counter = 0; unsigned int numDups = 0; for (const auto& rec : rrs) { + const auto key = rec.getContent()->wireFormatContent(rec.d_name, true, true); // This ignores class, ttl and place by using constants for those - if (!seen.emplace(rec.d_name.makeLowerCase(), rec.d_type, rec.getContent()->serialize(rec.d_name, true, false)).second) { + if (!seen.emplace(key).second) { dups[counter] = true; numDups++; }