From: Remi Gacogne <remi.gacogne@powerdns.com>
Date: Tue, 18 Oct 2022 14:28:43 +0000 (+0200)
Subject: dnsdist: Add an option to use the 'lazy' mode for auto-upgraded servers
X-Git-Tag: dnsdist-1.8.0-rc1~271^2~4
X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=5485edd19833aff44f874f0b8159204b552d2def;p=thirdparty%2Fpdns.git

dnsdist: Add an option to use the 'lazy' mode for auto-upgraded servers
---

diff --git a/pdns/dnsdist-lua.cc b/pdns/dnsdist-lua.cc
index cb6f5b2edd..2b35e948d3 100644
--- a/pdns/dnsdist-lua.cc
+++ b/pdns/dnsdist-lua.cc
@@ -573,6 +573,10 @@ static void setupLuaConfig(LuaContext& luaCtx, bool client, bool configCheck)
                            }
                          }
 
+                         if (vars.count("lazyHealthCheckWhenUpgraded")) {
+                           config.d_upgradeToLazyHealthChecks = boost::get<bool>(vars.at("lazyHealthCheckWhenUpgraded"));
+                         }
+
                          if (vars.count("useClientSubnet")) {
                            config.useECS = boost::get<bool>(vars["useClientSubnet"]);
                          }
diff --git a/pdns/dnsdist.hh b/pdns/dnsdist.hh
index 4f0e3c66de..a129bab488 100644
--- a/pdns/dnsdist.hh
+++ b/pdns/dnsdist.hh
@@ -829,6 +829,7 @@ struct DownstreamState: public std::enable_shared_from_this<DownstreamState>
     bool d_tcpOnly{false};
     bool d_addXForwardedHeaders{false}; // for DoH backends
     bool d_lazyHealthChecksUseExponentialBackOff{false};
+    bool d_upgradeToLazyHealthChecks{false};
   };
 
   DownstreamState(DownstreamState::Config&& config, std::shared_ptr<TLSCtx> tlsCtx, bool connect);
diff --git a/pdns/dnsdistdist/dnsdist-discovery.cc b/pdns/dnsdistdist/dnsdist-discovery.cc
index 1297cd781e..62e1b43106 100644
--- a/pdns/dnsdistdist/dnsdist-discovery.cc
+++ b/pdns/dnsdistdist/dnsdist-discovery.cc
@@ -398,6 +398,19 @@ bool ServiceDiscovery::tryToUpgradeBackend(const UpgradeableBackend& backend)
   config.remote = discoveredConfig.d_addr;
   config.remote.setPort(discoveredConfig.d_port);
 
+  if (backend.keepAfterUpgrade && config.availability == DownstreamState::Availability::Up) {
+    /* it's OK to keep the forced state if we replace the initial
+       backend, but if we are adding a new backend, it should not
+       inherit that setting, especially since DoX backends are much
+       more likely to fail (certificate errors, ...) */
+    if (config.d_upgradeToLazyHealthChecks) {
+      config.availability = DownstreamState::Availability::Lazy;
+    }
+    else {
+      config.availability = DownstreamState::Availability::Auto;
+    }
+  }
+
   ComboAddress::addressOnlyEqual comparator;
   config.d_dohPath = discoveredConfig.d_dohPath;
   if (!discoveredConfig.d_subjectName.empty() && comparator(config.remote, backend.d_ds->d_config.remote)) {
diff --git a/pdns/dnsdistdist/docs/reference/config.rst b/pdns/dnsdistdist/docs/reference/config.rst
index 9b82ce55d9..8771efe24f 100644
--- a/pdns/dnsdistdist/docs/reference/config.rst
+++ b/pdns/dnsdistdist/docs/reference/config.rst
@@ -561,7 +561,7 @@ Servers
     Added ``addXForwardedHeaders``, ``caStore``, ``checkTCP``, ``ciphers``, ``ciphers13``, ``dohPath``, ``enableRenegotiation``, ``releaseBuffers``, ``subjectName``, ``tcpOnly``, ``tls`` and ``validateCertificates`` to server_table.
 
   .. versionchanged:: 1.8.0
-    Added ``autoUpgrade``, ``autoUpgradeDoHKey``, ``autoUpgradeInterval``, ``autoUpgradeKeep``, ``autoUpgradePool``, ``maxConcurrentTCPConnections``, ``subjectAddr``, ``lazyHealthCheckSampleSize``, ``lazyHealthCheckMinSampleCount``, ``lazyHealthCheckThreshold``, ``lazyHealthCheckFailedInterval``, ``lazyHealthCheckMode``, ``lazyHealthCheckUseExponentialBackOff``, ``lazyHealthCheckMaxBackOff`` and ``healthCheckMode`` to server_table.
+    Added ``autoUpgrade``, ``autoUpgradeDoHKey``, ``autoUpgradeInterval``, ``autoUpgradeKeep``, ``autoUpgradePool``, ``maxConcurrentTCPConnections``, ``subjectAddr``, ``lazyHealthCheckSampleSize``, ``lazyHealthCheckMinSampleCount``, ``lazyHealthCheckThreshold``, ``lazyHealthCheckFailedInterval``, ``lazyHealthCheckMode``, ``lazyHealthCheckUseExponentialBackOff``, ``lazyHealthCheckMaxBackOff``, ``lazyHealthCheckWhenUpgraded`` and ``healthCheckMode`` to server_table.
 
   Add a new backend server. Call this function with either a string::
 
@@ -635,7 +635,8 @@ Servers
       lazyHealthCheckSampleSize=NUM,             -- The maximum size of the sample of queries to record and consider for the ``lazyHealthCheckThreshold``. Default is 100, which means the result (failure or success) of the last 100 queries will be considered.
       lazyHealthCheckThreshold=NUM,              -- The threshold, as a percentage, of queries that should fail for the 'lazy' health-check to be triggered when ``healthCheckMode`` is set to ``lazy``. The default is 20 which means 20% of the last ``lazyHealthCheckSampleSize`` queries should fail for a health-check to be triggered.
       lazyHealthCheckUseExponentialBackOff=BOOL, -- Whether the 'lazy' health-check should use an exponential back-off instead of a fixed value, between health-check probes. The default is false which means that after a backend has been moved to the 'down' state health-check probes are sent every ``lazyHealthCheckFailedInterval`` seconds. When set to true, the delay between each probe starts at ``lazyHealthCheckFailedInterval`` seconds and double between every probe, capped at ``lazyHealthCheckMaxBackOff`` seconds.
-      lazyHealthCheckMaxBackOff=NUM            -- This value, in seconds, caps the time between two health-check queries when ``lazyHealthCheckUseExponentialBackOff`` is set to true. The default is 3600 which means that at most one hour will pass between two health-check queries.
+      lazyHealthCheckMaxBackOff=NUM,             -- This value, in seconds, caps the time between two health-check queries when ``lazyHealthCheckUseExponentialBackOff`` is set to true. The default is 3600 which means that at most one hour will pass between two health-check queries.
+      lazyHealthCheckWhenUpgraded=BOOL           -- Whether the auto-upgraded version of this backend (see ``autoUpgrade``) should use the lazy health-checking mode. Default is false, which means it will use the regular health-checking mode.
     })
 
   :param str server_string: A simple IP:PORT string.