]> git.ipfire.org Git - thirdparty/pdns.git/commitdiff
Provide runtime update API for some health check params
authorOliver Chen <oliver.chen@nokia-sbell.com>
Sun, 27 Apr 2025 03:55:33 +0000 (03:55 +0000)
committerOliver Chen <oliver.chen@nokia-sbell.com>
Sun, 27 Apr 2025 03:57:39 +0000 (03:57 +0000)
pdns/dnsdistdist/dnsdist-lua-bindings.cc
pdns/dnsdistdist/docs/reference/config.rst
regression-tests.dnsdist/test_HealthChecks.py

index 0a4c26772c8403c9bb077448f76f62a8e8827da1..c0460da562bfa8752c50f94864c3a1006d849d98 100644 (file)
@@ -137,6 +137,18 @@ void setupLuaBindings(LuaContext& luaCtx, bool client, bool configCheck)
     }
     state.setLazyAuto();
   });
+  luaCtx.registerFunction<void (DownstreamState::*)(boost::optional<LuaAssociativeTable<boost::variant<bool,size_t,std::string>>>)>("setHealthCheckParams", [](DownstreamState& state, boost::optional<LuaAssociativeTable<boost::variant<bool,size_t,std::string>>> vars) {
+    std::string valueStr;
+    getOptionalValue<size_t>(vars, "maxCheckFailures", state.d_config.maxCheckFailures);
+    getOptionalValue<size_t>(vars, "rise", state.d_config.minRiseSuccesses);
+    getOptionalValue<size_t>(vars, "checkTimeout", state.d_config.checkTimeout);
+    getOptionalValue<size_t>(vars, "checkInterval", state.d_config.checkInterval);
+    getOptionalValue<std::string>(vars, "checkType", state.d_config.checkType);
+    getOptionalValue<bool>(vars, "checkTCP", state.d_config.d_tcpCheck);
+    if (getOptionalValue<std::string>(vars, "checkName", valueStr) > 0) {
+      state.d_config.checkName = DNSName(valueStr);
+    }
+  });
   luaCtx.registerFunction<std::string (DownstreamState::*)() const>("getName", [](const DownstreamState& state) -> const std::string& { return state.getName(); });
   luaCtx.registerFunction<std::string (DownstreamState::*)() const>("getNameWithAddr", [](const DownstreamState& state) -> const std::string& { return state.getNameWithAddr(); });
   luaCtx.registerMember<bool(DownstreamState::*)>(
index b8f8eab9d95a0c8b1693e7486e5a07d0676a3469..e3007e3d483f8133158d497ca0b3d1dda795eab5 100644 (file)
@@ -883,6 +883,29 @@ A server object returned by :func:`getServer` can be manipulated with these func
     Administratively set the server in an ``UP`` state.
     This server will still receive queries and health checks are disabled
 
+  .. method:: Server:setHealthCheckParams([parameter_table])
+
+    .. versionadded:: 2.0.0
+
+    Set multiple health check related parameters for this server.
+
+  :param table parameter_table: A table with key=value pairs. Empty parameter or table, and unknown keys/values are safely ignored.
+
+  The supported parameters are in below table, for descriptions of each parameter, please refer to the same field in :func:`newServer`
+
+  .. csv-table::
+    :delim: space
+    :header: Keyword, Type
+    :widths: auto
+
+    ``checkName``                            ``string``
+    ``checkType``                            ``string``
+    ``checkTimeout``                         ``number``
+    ``checkInterval``                        ``number``
+    ``maxCheckFailures``                     ``number``
+    ``rise``                                 ``number``
+    ``checkTCP``                             ``bool``
+
   Apart from the functions, a :class:`Server` object has these attributes:
 
   .. attribute:: Server.name
index 7a5c6b7e0c06f79af6ccd676e7d103970a1f9044..ed52c76635624cf3824b6093e12ad4d5a2d928a8 100644 (file)
@@ -5,7 +5,8 @@ import ssl
 import threading
 import time
 import dns
-from dnsdisttests import DNSDistTest, pickAvailablePort
+from queue import Queue
+from dnsdisttests import DNSDistTest, pickAvailablePort, ResponderDropAction
 
 class HealthCheckTest(DNSDistTest):
     _consoleKey = DNSDistTest.generateConsoleKey()
@@ -398,3 +399,152 @@ class TestLazyHealthChecks(HealthCheckTest):
         time.sleep(1.5)
         self.assertEqual(_dohHealthCheckQueries, 2)
         self.assertEqual(self.getBackendStatus(), 'up')
+
+class HealthCheckUpdateParams(HealthCheckTest):
+
+    _healthQueue = Queue()
+
+    @classmethod
+    def startResponders(cls):
+        print("Launching responders..")
+        cls._UDPResponder = threading.Thread(name='UDP Responder', target=cls.UDPResponder, args=[cls._testServerPort, cls._toResponderQueue, cls._fromResponderQueue, False, cls.healthCallbackUdp])
+        cls._UDPResponder.daemon = True
+        cls._UDPResponder.start()
+        cls._TCPResponder = threading.Thread(name='TCP Responder', target=cls.TCPResponder, args=[cls._testServerPort, cls._toResponderQueue, cls._fromResponderQueue, False, False, cls.healthCallbackTcp])
+        cls._TCPResponder.daemon = True
+        cls._TCPResponder.start()
+
+    @classmethod
+    def healthCallbackUdp(cls, request):
+        qn, qt= str(request.question[0].name), request.question[0].rdtype
+        response = dns.message.make_response(request)
+        if qn.endswith("drop.hc.dnsdist.org.") or qn.endswith("tcponly.hc.dnsdist.org."):
+            response = None
+        if response is None:
+            cls._healthQueue.put((False, qn, qt))
+            return ResponderDropAction()
+        cls._healthQueue.put((True, qn, qt))
+        return response.to_wire()
+
+    @classmethod
+    def healthCallbackTcp(cls, request):
+        qn, qt= str(request.question[0].name), request.question[0].rdtype
+        response = dns.message.make_response(request)
+        if qn.endswith("drop.hc.dnsdist.org."):
+            response = None
+        if response is None:
+            cls._healthQueue.put((False, qn, qt))
+            return ResponderDropAction()
+        cls._healthQueue.put((True, qn, qt))
+        return response.to_wire()
+
+    @classmethod
+    def wait1(cls):
+        return cls._healthQueue.get()
+
+class TestUpdateHCParamsCombo1(HealthCheckUpdateParams):
+
+    # this test suite uses a different responder port
+    _testServerPort = pickAvailablePort()
+
+    def testCombo1(self):
+        """
+        HealthChecks: Update checkName, maxCheckFailures, rise, checkTCP
+        """
+        # consume health checks upon sys init
+        for _ in [1, 2]: rc, qn, qt = self.wait1()
+        self.assertEqual(rc, True)
+        time.sleep(0.1)
+        self.assertEqual(self.getBackendMetric(0, 'healthCheckFailures'), 0)
+        self.assertEqual(self.getBackendStatus(), 'up')
+
+        self.sendConsoleCommand("getServer(0):setHealthCheckParams({checkName='drop.hc.dnsdist.org',checkType='TXT',maxCheckFailures=2,rise=2})")
+
+        # wait for 1st failure
+        for i in [1,2,3]:
+            rc, qn, qt = self.wait1()
+            if rc is False: break
+        self.assertGreater(3, i)
+        self.assertEqual(qn, 'drop.hc.dnsdist.org.')
+        time.sleep(1.1)
+        # should have failures but still up
+        self.assertGreater(self.getBackendMetric(0, 'healthCheckFailures'), 0)
+        self.assertEqual(self.getBackendStatus(), 'up')
+
+        # wait for 2nd failure
+        rc, qn, qt = self.wait1()
+        self.assertEqual(rc, False)
+        self.assertEqual(qn, 'drop.hc.dnsdist.org.')
+        time.sleep(1.1)
+        # should have more failures and down
+        self.assertGreater(self.getBackendMetric(0, 'healthCheckFailures'), 1)
+        self.assertEqual(self.getBackendStatus(), 'down')
+
+        self.sendConsoleCommand("getServer(0):setHealthCheckParams({checkName='tcponly.hc.powerdns.com',checkTCP=true})")
+
+        # wait for 1st success
+        for i in [1,2,3]:
+            rc, qn, qt = self.wait1()
+            if rc is True: break
+        self.assertGreater(3, i)
+        time.sleep(0.1)
+        # still down
+        self.assertEqual(self.getBackendStatus(), 'down')
+
+        beforeFailure = self.getBackendMetric(0, 'healthCheckFailures')
+
+        # wati for 2nd success
+        rc, qn, qt = self.wait1()
+        self.assertEqual(rc, True)
+        time.sleep(0.1)
+        # should have no more failures, back to up
+        self.assertEqual(self.getBackendMetric(0, 'healthCheckFailures'), beforeFailure)
+        self.assertEqual(self.getBackendStatus(), 'up')
+
+class TestUpdateHCParamsCombo2(HealthCheckUpdateParams):
+
+    # this test suite uses a different responder port
+    _testServerPort = pickAvailablePort()
+
+    def testCombo2(self):
+        """
+        HealthChecks: Update checkType, checkTimeout, checkInterval
+        """
+        # consume health checks upon sys init
+        for _ in [1, 2]: rc, qn, qt = self.wait1()
+        self.assertEqual(rc, True)
+        time.sleep(0.1)
+        self.assertEqual(self.getBackendMetric(0, 'healthCheckFailures'), 0)
+        self.assertEqual(self.getBackendStatus(), 'up')
+
+        self.sendConsoleCommand("getServer(0):setHealthCheckParams({checkType='TXT',checkInterval=2})")
+
+        # start timing
+        rc, qn, qt = self.wait1()
+        t1 = time.time()
+        self.assertEqual(rc, True)
+        self.assertEqual(qt, dns.rdatatype.TXT)
+        rc, qn, qt = self.wait1()
+        t2 = time.time()
+        self.assertEqual(rc, True)
+        self.assertEqual(qt, dns.rdatatype.TXT)
+        # intervals shall be greater than 1
+        self.assertGreater(t2-t1, 1.5)
+
+        self.sendConsoleCommand("getServer(0):setHealthCheckParams({checkName='drop.hc.dnsdist.org',checkTimeout=2000})")
+
+        # wait for 1st failure
+        for i in [1,2,3]:
+            rc, qn, qt = self.wait1()
+            if rc is False: break
+        self.assertGreater(3, i)
+
+        beforeFailure = self.getBackendMetric(0, 'healthCheckFailures')
+
+        time.sleep(1.5)
+        # not timeout yet, should have no failure increase
+        self.assertEqual(self.getBackendMetric(0, 'healthCheckFailures'), beforeFailure)
+
+        time.sleep(1)
+        # now should timeout and failure increased
+        self.assertEqual(self.getBackendMetric(0, 'healthCheckFailures'), beforeFailure+1)