From: Oliver Chen Date: Sun, 27 Apr 2025 03:55:33 +0000 (+0000) Subject: Provide runtime update API for some health check params X-Git-Tag: dnsdist-2.0.0-alpha2~32^2~4 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=0f911ceb4ddf26fdca482c5db3b711d8c321fb71;p=thirdparty%2Fpdns.git Provide runtime update API for some health check params --- diff --git a/pdns/dnsdistdist/dnsdist-lua-bindings.cc b/pdns/dnsdistdist/dnsdist-lua-bindings.cc index 0a4c26772c..c0460da562 100644 --- a/pdns/dnsdistdist/dnsdist-lua-bindings.cc +++ b/pdns/dnsdistdist/dnsdist-lua-bindings.cc @@ -137,6 +137,18 @@ void setupLuaBindings(LuaContext& luaCtx, bool client, bool configCheck) } state.setLazyAuto(); }); + luaCtx.registerFunction>>)>("setHealthCheckParams", [](DownstreamState& state, boost::optional>> vars) { + std::string valueStr; + getOptionalValue(vars, "maxCheckFailures", state.d_config.maxCheckFailures); + getOptionalValue(vars, "rise", state.d_config.minRiseSuccesses); + getOptionalValue(vars, "checkTimeout", state.d_config.checkTimeout); + getOptionalValue(vars, "checkInterval", state.d_config.checkInterval); + getOptionalValue(vars, "checkType", state.d_config.checkType); + getOptionalValue(vars, "checkTCP", state.d_config.d_tcpCheck); + if (getOptionalValue(vars, "checkName", valueStr) > 0) { + state.d_config.checkName = DNSName(valueStr); + } + }); luaCtx.registerFunction("getName", [](const DownstreamState& state) -> const std::string& { return state.getName(); }); luaCtx.registerFunction("getNameWithAddr", [](const DownstreamState& state) -> const std::string& { return state.getNameWithAddr(); }); luaCtx.registerMember( diff --git a/pdns/dnsdistdist/docs/reference/config.rst b/pdns/dnsdistdist/docs/reference/config.rst index b8f8eab9d9..e3007e3d48 100644 --- a/pdns/dnsdistdist/docs/reference/config.rst +++ b/pdns/dnsdistdist/docs/reference/config.rst @@ -883,6 +883,29 @@ A server object returned by :func:`getServer` can be manipulated with these func Administratively set the server in an ``UP`` state. This server will still receive queries and health checks are disabled + .. method:: Server:setHealthCheckParams([parameter_table]) + + .. versionadded:: 2.0.0 + + Set multiple health check related parameters for this server. + + :param table parameter_table: A table with key=value pairs. Empty parameter or table, and unknown keys/values are safely ignored. + + The supported parameters are in below table, for descriptions of each parameter, please refer to the same field in :func:`newServer` + + .. csv-table:: + :delim: space + :header: Keyword, Type + :widths: auto + + ``checkName`` ``string`` + ``checkType`` ``string`` + ``checkTimeout`` ``number`` + ``checkInterval`` ``number`` + ``maxCheckFailures`` ``number`` + ``rise`` ``number`` + ``checkTCP`` ``bool`` + Apart from the functions, a :class:`Server` object has these attributes: .. attribute:: Server.name diff --git a/regression-tests.dnsdist/test_HealthChecks.py b/regression-tests.dnsdist/test_HealthChecks.py index 7a5c6b7e0c..ed52c76635 100644 --- a/regression-tests.dnsdist/test_HealthChecks.py +++ b/regression-tests.dnsdist/test_HealthChecks.py @@ -5,7 +5,8 @@ import ssl import threading import time import dns -from dnsdisttests import DNSDistTest, pickAvailablePort +from queue import Queue +from dnsdisttests import DNSDistTest, pickAvailablePort, ResponderDropAction class HealthCheckTest(DNSDistTest): _consoleKey = DNSDistTest.generateConsoleKey() @@ -398,3 +399,152 @@ class TestLazyHealthChecks(HealthCheckTest): time.sleep(1.5) self.assertEqual(_dohHealthCheckQueries, 2) self.assertEqual(self.getBackendStatus(), 'up') + +class HealthCheckUpdateParams(HealthCheckTest): + + _healthQueue = Queue() + + @classmethod + def startResponders(cls): + print("Launching responders..") + cls._UDPResponder = threading.Thread(name='UDP Responder', target=cls.UDPResponder, args=[cls._testServerPort, cls._toResponderQueue, cls._fromResponderQueue, False, cls.healthCallbackUdp]) + cls._UDPResponder.daemon = True + cls._UDPResponder.start() + cls._TCPResponder = threading.Thread(name='TCP Responder', target=cls.TCPResponder, args=[cls._testServerPort, cls._toResponderQueue, cls._fromResponderQueue, False, False, cls.healthCallbackTcp]) + cls._TCPResponder.daemon = True + cls._TCPResponder.start() + + @classmethod + def healthCallbackUdp(cls, request): + qn, qt= str(request.question[0].name), request.question[0].rdtype + response = dns.message.make_response(request) + if qn.endswith("drop.hc.dnsdist.org.") or qn.endswith("tcponly.hc.dnsdist.org."): + response = None + if response is None: + cls._healthQueue.put((False, qn, qt)) + return ResponderDropAction() + cls._healthQueue.put((True, qn, qt)) + return response.to_wire() + + @classmethod + def healthCallbackTcp(cls, request): + qn, qt= str(request.question[0].name), request.question[0].rdtype + response = dns.message.make_response(request) + if qn.endswith("drop.hc.dnsdist.org."): + response = None + if response is None: + cls._healthQueue.put((False, qn, qt)) + return ResponderDropAction() + cls._healthQueue.put((True, qn, qt)) + return response.to_wire() + + @classmethod + def wait1(cls): + return cls._healthQueue.get() + +class TestUpdateHCParamsCombo1(HealthCheckUpdateParams): + + # this test suite uses a different responder port + _testServerPort = pickAvailablePort() + + def testCombo1(self): + """ + HealthChecks: Update checkName, maxCheckFailures, rise, checkTCP + """ + # consume health checks upon sys init + for _ in [1, 2]: rc, qn, qt = self.wait1() + self.assertEqual(rc, True) + time.sleep(0.1) + self.assertEqual(self.getBackendMetric(0, 'healthCheckFailures'), 0) + self.assertEqual(self.getBackendStatus(), 'up') + + self.sendConsoleCommand("getServer(0):setHealthCheckParams({checkName='drop.hc.dnsdist.org',checkType='TXT',maxCheckFailures=2,rise=2})") + + # wait for 1st failure + for i in [1,2,3]: + rc, qn, qt = self.wait1() + if rc is False: break + self.assertGreater(3, i) + self.assertEqual(qn, 'drop.hc.dnsdist.org.') + time.sleep(1.1) + # should have failures but still up + self.assertGreater(self.getBackendMetric(0, 'healthCheckFailures'), 0) + self.assertEqual(self.getBackendStatus(), 'up') + + # wait for 2nd failure + rc, qn, qt = self.wait1() + self.assertEqual(rc, False) + self.assertEqual(qn, 'drop.hc.dnsdist.org.') + time.sleep(1.1) + # should have more failures and down + self.assertGreater(self.getBackendMetric(0, 'healthCheckFailures'), 1) + self.assertEqual(self.getBackendStatus(), 'down') + + self.sendConsoleCommand("getServer(0):setHealthCheckParams({checkName='tcponly.hc.powerdns.com',checkTCP=true})") + + # wait for 1st success + for i in [1,2,3]: + rc, qn, qt = self.wait1() + if rc is True: break + self.assertGreater(3, i) + time.sleep(0.1) + # still down + self.assertEqual(self.getBackendStatus(), 'down') + + beforeFailure = self.getBackendMetric(0, 'healthCheckFailures') + + # wati for 2nd success + rc, qn, qt = self.wait1() + self.assertEqual(rc, True) + time.sleep(0.1) + # should have no more failures, back to up + self.assertEqual(self.getBackendMetric(0, 'healthCheckFailures'), beforeFailure) + self.assertEqual(self.getBackendStatus(), 'up') + +class TestUpdateHCParamsCombo2(HealthCheckUpdateParams): + + # this test suite uses a different responder port + _testServerPort = pickAvailablePort() + + def testCombo2(self): + """ + HealthChecks: Update checkType, checkTimeout, checkInterval + """ + # consume health checks upon sys init + for _ in [1, 2]: rc, qn, qt = self.wait1() + self.assertEqual(rc, True) + time.sleep(0.1) + self.assertEqual(self.getBackendMetric(0, 'healthCheckFailures'), 0) + self.assertEqual(self.getBackendStatus(), 'up') + + self.sendConsoleCommand("getServer(0):setHealthCheckParams({checkType='TXT',checkInterval=2})") + + # start timing + rc, qn, qt = self.wait1() + t1 = time.time() + self.assertEqual(rc, True) + self.assertEqual(qt, dns.rdatatype.TXT) + rc, qn, qt = self.wait1() + t2 = time.time() + self.assertEqual(rc, True) + self.assertEqual(qt, dns.rdatatype.TXT) + # intervals shall be greater than 1 + self.assertGreater(t2-t1, 1.5) + + self.sendConsoleCommand("getServer(0):setHealthCheckParams({checkName='drop.hc.dnsdist.org',checkTimeout=2000})") + + # wait for 1st failure + for i in [1,2,3]: + rc, qn, qt = self.wait1() + if rc is False: break + self.assertGreater(3, i) + + beforeFailure = self.getBackendMetric(0, 'healthCheckFailures') + + time.sleep(1.5) + # not timeout yet, should have no failure increase + self.assertEqual(self.getBackendMetric(0, 'healthCheckFailures'), beforeFailure) + + time.sleep(1) + # now should timeout and failure increased + self.assertEqual(self.getBackendMetric(0, 'healthCheckFailures'), beforeFailure+1)