dnsdist: Implement bounded loads for the whashed and wrandom policies

author Remi Gacogne <remi.gacogne@powerdns.com>

Fri, 6 Mar 2020 11:00:46 +0000 (12:00 +0100)

committer Remi Gacogne <remi.gacogne@powerdns.com>

Fri, 6 Mar 2020 16:17:26 +0000 (17:17 +0100)
author Remi Gacogne <remi.gacogne@powerdns.com>
Fri, 6 Mar 2020 11:00:46 +0000 (12:00 +0100)
committer Remi Gacogne <remi.gacogne@powerdns.com>
Fri, 6 Mar 2020 16:17:26 +0000 (17:17 +0100)
diff --git a/pdns/dnsdist-console.cc b/pdns/dnsdist-console.cc

index a87d82adcb627df48b5f2a6fef263890c0388d72..98841bac685690d531f9142e33b92b7ac11a7fb1 100644 (file)
--- a/pdns/dnsdist-console.cc
+++ b/pdns/dnsdist-console.cc
@@ -551,6 +551,7 @@ const std::vector<ConsoleKeyword> g_consoleKeywords{
    { "setUDPTimeout", true, "n", "set the maximum time dnsdist will wait for a response from a backend over UDP, in seconds" },
    { "setVerboseHealthChecks", true, "bool", "set whether health check errors will be logged" },
    { "setWebserverConfig", true, "[{password=string, apiKey=string, customHeaders}]", "Updates webserver configuration" },
+  { "setWeightedBalancingFactor", true, "factor", "Set the balancing factor for bounded-load weighted policies (whashed, wrandom)" },
    { "setWHashedPertubation", true, "value", "Set the hash perturbation value to be used in the whashed policy instead of a random one, allowing to have consistent whashed results on different instance" },
    { "show", true, "string", "outputs `string`" },
    { "showACL", true, "", "show our ACL set" },
diff --git a/pdns/dnsdist-lbpolicies.hh b/pdns/dnsdist-lbpolicies.hh

index 587c22d956af747f5ff155f24c0e48e12ce3b9b4..ec2cad6c6029d4577274da8ad9782a15c6e62cd7 100644 (file)
--- a/pdns/dnsdist-lbpolicies.hh
+++ b/pdns/dnsdist-lbpolicies.hh
@@ -76,3 +76,8 @@ std::shared_ptr<DownstreamState> chashed(const ServerPolicy::NumberedServerVecto
  std::shared_ptr<DownstreamState> chashedFromHash(const ServerPolicy::NumberedServerVector& servers, size_t hash);
  std::shared_ptr<DownstreamState> roundrobin(const ServerPolicy::NumberedServerVector& servers, const DNSQuestion* dq);
  std::shared_ptr<DownstreamState> getSelectedBackendFromPolicy(const ServerPolicy& policy, const ServerPolicy::NumberedServerVector& servers, DNSQuestion& dq);
+
+extern double g_consistentHashBalancingFactor;
+extern double g_weightedBalancingFactor;
+extern uint32_t g_hashperturb;
+extern bool g_roundrobinFailOnNoServer;
diff --git a/pdns/dnsdist-lua.cc b/pdns/dnsdist-lua.cc

index 431b2e533ed461dd87e5b1c2ece009372a3c2d13..c3c22d16c533f7a4c2755fa05436f1143557ab3d 100644 (file)
--- a/pdns/dnsdist-lua.cc
+++ b/pdns/dnsdist-lua.cc
@@ -1652,6 +1652,18 @@ static void setupLuaConfig(bool client, bool configCheck)
        }
      });
  
+  g_lua.writeFunction("setWeightedBalancingFactor", [](double factor) {
+      setLuaSideEffect();
+      if (factor >= 0) {
+        g_weightedBalancingFactor = factor;
+      }
+      else {
+        errlog("Invalid value passed to setWeightedBalancingFactor()!");
+        g_outputBuffer="Invalid value passed to setWeightedBalancingFactor()!\n";
+        return;
+      }
+    });
+
    g_lua.writeFunction("setRingBuffersSize", [](size_t capacity, boost::optional<size_t> numberOfShards) {
        setLuaSideEffect();
        if (g_configurationDone) {
diff --git a/pdns/dnsdist.hh b/pdns/dnsdist.hh

index c5c36167d5d1816db34355e543c8ba4de571f117..c5aff84584878a56c5f14f568907c5b7981fd123 100644 (file)
--- a/pdns/dnsdist.hh
+++ b/pdns/dnsdist.hh
@@ -1063,14 +1063,11 @@ extern uint32_t g_staleCacheEntriesTTL;
  extern bool g_apiReadWrite;
  extern std::string g_apiConfigDirectory;
  extern bool g_servFailOnNoPolicy;
-extern uint32_t g_hashperturb;
  extern bool g_useTCPSinglePipe;
  extern uint16_t g_downstreamTCPCleanupInterval;
  extern size_t g_udpVectorSize;
  extern bool g_preserveTrailingData;
  extern bool g_allowEmptyResponse;
-extern bool g_roundrobinFailOnNoServer;
-extern double g_consistentHashBalancingFactor;
  
  #ifdef HAVE_EBPF
  extern shared_ptr<BPFFilter> g_defaultBPFFilter;
diff --git a/pdns/dnsdistdist/dnsdist-lbpolicies.cc b/pdns/dnsdistdist/dnsdist-lbpolicies.cc

index aa08a32d236178bc3deeb2a33d2826d9630e90eb..a36cc16afd783c3d556becd68e465e8563857921 100644 (file)
--- a/pdns/dnsdistdist/dnsdist-lbpolicies.cc
+++ b/pdns/dnsdistdist/dnsdist-lbpolicies.cc
@@ -64,17 +64,36 @@ shared_ptr<DownstreamState> firstAvailable(const ServerPolicy::NumberedServerVec
    return leastOutstanding(servers, dq);
  }
  
+double g_weightedBalancingFactor = 0;
+
  static shared_ptr<DownstreamState> valrandom(unsigned int val, const ServerPolicy::NumberedServerVector& servers)
  {
    vector<pair<int, size_t>> poss;
    poss.reserve(servers.size());
    int sum = 0;
    int max = std::numeric_limits<int>::max();
+  double targetLoad = std::numeric_limits<double>::max();
  
-  for(const auto& d : servers) {      // w=1, w=10 -> 1, 11
-    if(d.second->isUp()) {
+  if (g_weightedBalancingFactor > 0) {
+    /* we start with one, representing the query we are currently handling */
+    double currentLoad = 1;
+    size_t totalWeight = 0;
+    for (const auto& pair : servers) {
+      if (pair.second->isUp()) {
+        currentLoad += pair.second->outstanding;
+        totalWeight += pair.second->weight;
+      }
+    }
+
+    if (totalWeight > 0) {
+      targetLoad = (currentLoad / totalWeight) * g_weightedBalancingFactor;
+    }
+  }
+
+  for (const auto& d : servers) {      // w=1, w=10 -> 1, 11
+    if (d.second->isUp() && (g_weightedBalancingFactor == 0 || (d.second->outstanding <= (targetLoad * d.second->weight)))) {
        // Don't overflow sum when adding high weights
-      if(d.second->weight > max - sum) {
+      if (d.second->weight > max - sum) {
          sum = max;
        } else {
          sum += d.second->weight;
@@ -126,14 +145,21 @@ shared_ptr<DownstreamState> chashedFromHash(const ServerPolicy::NumberedServerVe
    if (g_consistentHashBalancingFactor > 0) {
      /* we start with one, representing the query we are currently handling */
      double currentLoad = 1;
+    size_t totalWeight = 0;
      for (const auto& pair : servers) {
-      currentLoad += pair.second->outstanding;
+      if (pair.second->isUp()) {
+        currentLoad += pair.second->outstanding;
+        totalWeight += pair.second->weight;
+      }
+    }
+
+    if (totalWeight > 0) {
+      targetLoad = (currentLoad / totalWeight) * g_consistentHashBalancingFactor;
      }
-    targetLoad = (currentLoad / servers.size()) * g_consistentHashBalancingFactor;
    }
  
    for (const auto& d: servers) {
-    if (d.second->isUp() && d.second->outstanding <= targetLoad) {
+    if (d.second->isUp() && (g_consistentHashBalancingFactor == 0 || d.second->outstanding <= (targetLoad * d.second->weight))) {
        // make sure hashes have been computed
        if (d.second->hashes.empty()) {
          d.second->hash();
diff --git a/pdns/dnsdistdist/docs/guides/serverselection.rst b/pdns/dnsdistdist/docs/guides/serverselection.rst

index 47c4fa55bb6cea552bed3a18c73c2acb1b71aa83..5fed28062eda942c635e1f09e1299202689c8340 100644 (file)
--- a/pdns/dnsdistdist/docs/guides/serverselection.rst
+++ b/pdns/dnsdistdist/docs/guides/serverselection.rst
@@ -33,6 +33,12 @@ A further policy, ``wrandom`` assigns queries randomly, but based on the weight
  For example, if two servers are available, the first one with a weight of 2 and the second one with a weight of 1 (the default), the
  first one should get two-thirds of the incoming queries and the second one the remaining third.
  
+Since 1.5.0, a bounded-load version is also supported, trying to prevent one server from receiving much more queries than intended, even if the distribution of queries is not perfect. This "weighted random with bounded loads" algorithm is enabled by setting :func:`setWeightedBalancingFactor` to a value other than 0, which is the default. This value is the maximum number of outstanding queries that a given server can have at a given time, as a ratio of the total number of outstanding queries for all the active servers in the pool, pondered by the weight of the server.
+
+The algorithm will try to select a server randomly, as is done when no bounded-load is set, but will disqualify all servers that have more outstanding queries than intended times the factor, until a suitable server is found. The higher the factor, the more imbalance between the servers is allowed.
+
+For example, if we have two servers, with respective weights of 1 and 4, we expect the first server to get a fifth of the queries, and the second one 4/5. As the random distribution is not perfect, some server might get more queries than expected. Setting :func:`setWeightedBalancingFactor` to 1.1 limits the imbalance between the ratio of outstanding queries actually handled by a server and the expected number, so in this example the first server would not be allowed to handle more than 1.1/5 of all the outstanding queries at a given time.
+
  ``whashed``
  ~~~~~~~~~~~
  
@@ -43,6 +49,12 @@ The current hash algorithm is based on the qname of the query.
  
    Set the hash perturbation value to be used in the whashed policy instead of a random one, allowing to have consistent whashed results on different instances.
  
+Since 1.5.0, a bounded-load version is also supported, trying to prevent one server from receiving much more queries than intended, even if the distribution of queries is not perfect. This "weighted hashing with bounded loads" algorithm is enabled by setting :func:`setWeightedBalancingFactor` to a value other than 0, which is the default. This value is the maximum number of outstanding queries that a given server can have at a given time, as a ratio of the total number of outstanding queries for all the active servers in the pool, pondered by the weight of the server.
+
+The algorithm will try to select a server based on the hash of the qname, as is done when no bounded-load is set, but will disqualify all servers that have more outstanding queries than intended times the factor, until a suitable server is found. The higher the factor, the more imbalance between the servers is allowed.
+
+For example, if we have two servers, with respective weights of 1 and 4, we expect the first server to get a fifth of the queries, and the second one 4/5. If the qname of the queries are not perfectly distributed, some server might get more queries than expected. Setting :func:`setWeightedBalancingFactor` to 1.1 limits the imbalance between the ratio of outstanding queries actually handled by a server and the expected number, so in this example the first server would not be allowed to handle more than 1.1/5 of all the outstanding queries at a given time.
+
  ``chashed``
  ~~~~~~~~~~~
  
@@ -55,10 +67,11 @@ This is a side-effect of the internal implementation of the consistent hashing a
  
  You can also set the hash perturbation value, see :func:`setWHashedPertubation`. To achieve consistent distribution over :program:`dnsdist` restarts, you will also need to explicitly set the backend's UUIDs with the ``id`` option of :func:`newServer`. You can get the current UUIDs of your backends by calling :func:`showServers` with the ``showUUIDs=true`` option.
  
-Since 1.5.0, a bounded-load version is also supported, preventing one server from receiving much more queries than the others, even if the distribution of queries is not perfect. This "consistent hashing with bounded loads" algorithm is enabled by setting :func:`setConsistentHashingBalancingFactor` to a value other than 0, which is the default. This value is the maximum number of outstanding queries that a given server can have at a given time, as a ratio of the average number of outstanding queries for all the active servers in the pool.
+Since 1.5.0, a bounded-load version is also supported, preventing one server from receiving much more queries than intended, even if the distribution of queries is not perfect. This "consistent hashing with bounded loads" algorithm is enabled by setting :func:`setConsistentHashingBalancingFactor` to a value other than 0, which is the default. This value is the maximum number of outstanding queries that a given server can have at a given time, as a ratio of the total number of outstanding queries for all the active servers in the pool, pondered by the weight of the server.
  
-For example, setting :func:`setConsistentHashingBalancingFactor` to 1.5 means that no server will be allowed to have more outstanding queries than 1.5 times the average of all outstanding queries in the pool. The algorithm will try to select a server based on the hash of the qname, as is done when no bounded-load is set, but will disqualify all servers that have more outstanding queries than the average times the factor, until a suitable server is found.
-The higher the factor, the more imbalance between the servers is allowed.
+The algorithm will try to select a server based on the hash of the qname, as is done when no bounded-load is set, but will disqualify all servers that have more outstanding queries than intended times the factor, until a suitable server is found. The higher the factor, the more imbalance between the servers is allowed.
+
+For example, if we have two servers, with respective weights of 1 and 4, we expect the first server to get a fifth of the queries, and the second one 4/5. If the qname of the queries are not perfectly distributed, some server might get more queries than expected. Setting :func:`setWeightedBalancingFactor` to 1.1 limits the imbalance between the ratio of outstanding queries actually handled by a server and the expected number, so in this example the first server would not be allowed to handle more than 1.1/5 of all the outstanding queries at a given time.
  
  ``roundrobin``
  ~~~~~~~~~~~~~~
@@ -158,8 +171,8 @@ Functions
  
    .. versionadded: 1.5.0
  
-  Set the maximum imbalance between the number of outstanding queries for a given server relative to the average number of outstanding queries for all servers in the pool,
-  when using the ``chashed`` consistent hashing load-balancing policy.
+  Set the maximum imbalance between the number of outstanding queries intended for a given server, based on its weight,
+  and the actual number, when using the ``chashed`` consistent hashing load-balancing policy.
    Default is 0, which disables the bounded-load algorithm.
  
  .. function:: setServerPolicy(policy)
@@ -213,6 +226,14 @@ Functions
  
    :param bool value: whether to fail when all servers are down
  
+.. function:: setWeightedBalancingFactor(factor)
+
+  .. versionadded: 1.5.0
+
+  Set the maximum imbalance between the number of outstanding queries intended for a given server, based on its weight,
+  and the actual number, when using the ``whashed`` or ``wrandom`` load-balancing policy.
+  Default is 0, which disables the bounded-load algorithm.
+
  .. function:: showPoolServerPolicy(pool)
  
    Print server selection policy for ``pool``.
author	Remi Gacogne <remi.gacogne@powerdns.com>
	Fri, 6 Mar 2020 11:00:46 +0000 (12:00 +0100)
committer	Remi Gacogne <remi.gacogne@powerdns.com>
	Fri, 6 Mar 2020 16:17:26 +0000 (17:17 +0100)
pdns/dnsdist-console.cc		patch \| blob \| blame \| history
pdns/dnsdist-lbpolicies.hh		patch \| blob \| blame \| history
pdns/dnsdist-lua.cc		patch \| blob \| blame \| history
pdns/dnsdist.hh		patch \| blob \| blame \| history
pdns/dnsdistdist/dnsdist-lbpolicies.cc		patch \| blob \| blame \| history
pdns/dnsdistdist/docs/guides/serverselection.rst		patch \| blob \| blame \| history