]> git.ipfire.org Git - thirdparty/pdns.git/commitdiff
Faster dedup, not using zoneRepresentation but wire format, which allows for
authorOtto Moerbeek <otto.moerbeek@open-xchange.com>
Tue, 20 Aug 2024 11:50:15 +0000 (13:50 +0200)
committerOtto Moerbeek <otto.moerbeek@open-xchange.com>
Mon, 16 Dec 2024 10:28:53 +0000 (11:28 +0100)
an unordered_set as well.

pdns/dnsparser.hh
pdns/dnswriter.cc
pdns/dnswriter.hh
pdns/recursordist/test-shuffle_cc.cc
pdns/shuffle.cc

index 21ad0205c14a7f090ea781212ec0f5267bd6a716..5d9889e20df23d3fccae421dd4c637db22e9de44 100644 (file)
@@ -220,6 +220,26 @@ public:
     return record;
   }
 
+  [[nodiscard]] string wireFormatContent(const DNSName& qname, bool canonic = false, bool lowerCase = false) const
+  {
+    vector<uint8_t> packet;
+    DNSPacketWriter packetWriter(packet, g_rootdnsname, QType::A);
+
+    if (canonic) {
+      packetWriter.setCanonic(true);
+    }
+    if (lowerCase) {
+      packetWriter.setLowercase(true);
+    }
+
+    packetWriter.startRecord(qname, getType());
+    toPacket(packetWriter);
+
+    string record;
+    packetWriter.getContentWireFormat(record); // needs to be called before commit()
+    return record;
+  }
+
   virtual bool operator==(const DNSRecordContent& rhs) const
   {
     return typeid(*this)==typeid(rhs) && this->getZoneRepresentation() == rhs.getZoneRepresentation();
index 67ed5c57d12f083bf71ccc0cd7120b88357b77f7..1cdc5776beb248d3a0f435b81aa7d51027593090 100644 (file)
@@ -457,6 +457,12 @@ template <typename Container> void GenericDNSPacketWriter<Container>::getRecordP
   records.assign(d_content.begin() + d_sor, d_content.end());
 }
 
+// call __before commit__
+template <typename Container> void GenericDNSPacketWriter<Container>::getContentWireFormat(string& records)
+{
+  records.assign(d_content.begin() + d_rollbackmarker, d_content.end());
+}
+
 template <typename Container> uint32_t GenericDNSPacketWriter<Container>::size() const
 {
   return d_content.size();
index 4d6d28618242b27aea11e79d21e3b6826a371ebc..c6ed4b3f04fd4215c81fd9b3cbe7446a9c493f0e 100644 (file)
@@ -138,6 +138,7 @@ public:
 
   dnsheader* getHeader();
   void getRecordPayload(string& records); // call __before commit__
+  void getContentWireFormat(string& records); // call __before commit__
 
   void setCanonic(bool val)
   {
index 32eebd3b600473b48918a5cf9170ce8be3e88204..469cc2820f46880c236091a73719cbeae02c5822 100644 (file)
@@ -48,8 +48,9 @@ BOOST_AUTO_TEST_CASE(test_simple)
   BOOST_CHECK_EQUAL(dups, 1U);
   BOOST_CHECK_EQUAL(list.size(), 2U);
   addRecordToList(list, DNSName("Foo"), QType::A, "1.2.3.4");
+  addRecordToList(list, DNSName("FoO"), QType::A, "1.2.3.4", DNSResourceRecord::ADDITIONAL, 999);
   dups = pdns::dedup(list);
-  BOOST_CHECK_EQUAL(dups, 1U);
+  BOOST_CHECK_EQUAL(dups, 2U);
   BOOST_CHECK_EQUAL(list.size(), 2U);
   BOOST_CHECK_EQUAL(address, &list);
 }
index 32796fe02e7f2c930b7b43b2675c437ff73b52a0..1fbc004cf920a82656b59d70d7624a928c076a4f 100644 (file)
@@ -143,7 +143,7 @@ void pdns::orderAndShuffle(vector<DNSRecord>& rrs, bool includingAdditionals)
 
 unsigned int pdns::dedup(vector<DNSRecord>& rrs)
 {
-  // This functino tries to avoid unneccesary work
+  // This function tries to avoid unneccesary work
   // First a vector with zero or one element does not need dedupping
   if (rrs.size() <= 1) {
     return 0;
@@ -151,15 +151,16 @@ unsigned int pdns::dedup(vector<DNSRecord>& rrs)
 
   // If we have a larger vector, first check if we actually have duplicates.
   // We assume the most common case is: no
-  std::set<std::tuple<DNSName, QType, std::string>> seen;
+  std::unordered_set<std::string> seen;
   std::vector<bool> dups(rrs.size(), false);
 
   unsigned int counter = 0;
   unsigned int numDups = 0;
 
   for (const auto& rec : rrs) {
+    const auto key = rec.getContent()->wireFormatContent(rec.d_name, true, true);
     // This ignores class, ttl and place by using constants for those
-    if (!seen.emplace(rec.d_name.makeLowerCase(), rec.d_type, rec.getContent()->serialize(rec.d_name, true, false)).second) {
+    if (!seen.emplace(key).second) {
       dups[counter] = true;
       numDups++;
     }