From 68cd0bce73b5a33b32eee00965f424b572b64395 Mon Sep 17 00:00:00 2001 From: Miod Vallat Date: Wed, 5 Mar 2025 10:10:13 +0100 Subject: [PATCH] Add a "failOnIncompleteCheck" option to if*up Lua functions. This option, is set to "true", will force the if*up functions to return SERVFAIL, rather than applying the backupSelector, if none of the health checks for the targets to check have completed yet. --- docs/lua-records/functions.rst | 2 + pdns/lua-record.cc | 55 +++++++++++++++++---- regression-tests.auth-py/test_LuaRecords.py | 29 +++++++++++ 3 files changed, 76 insertions(+), 10 deletions(-) diff --git a/docs/lua-records/functions.rst b/docs/lua-records/functions.rst index 9a8826ed5a..2469626a30 100644 --- a/docs/lua-records/functions.rst +++ b/docs/lua-records/functions.rst @@ -75,6 +75,7 @@ Record creation functions - ``timeout``: Maximum time in seconds that you allow the check to take (default 2) - ``interval``: Time interval between two checks, in seconds. Defaults to :ref:`setting-lua-health-checks-interval` if not specified. - ``minimumFailures``: The number of unsuccessful checks in a row required to mark the address as down. Defaults to 1 if not specified, i.e. report as down on the first unsuccessful check. + - ``failOnIncompleteCheck``: if set to ``true``, return SERVFAIL instead of applying ``backupSelector``, if none of the addresses have completed their background health check yet. .. function:: ifurlup(url, addresses[, options]) @@ -104,6 +105,7 @@ Record creation functions - ``useragent``: Set the HTTP "User-Agent" header in the requests. By default it is set to "PowerDNS Authoritative Server" - ``byteslimit``: Limit the maximum download size to ``byteslimit`` bytes (default 0 meaning no limit). - ``minimumFailures``: The number of unsuccessful checks in a row required to mark the address as down. Defaults to 1 if not specified, i.e. report as down on the first unsuccessful check. + - ``failOnIncompleteCheck``: if set to ``true``, return SERVFAIL instead of applying ``backupSelector``, if none of the addresses have completed their background health check yet. An example of a list of address sets: diff --git a/pdns/lua-record.cc b/pdns/lua-record.cc index ec3b037053..3b349804bf 100644 --- a/pdns/lua-record.cc +++ b/pdns/lua-record.cc @@ -325,6 +325,13 @@ private: } }; +// The return value of this function can be one of three sets of values: +// - positive integer: the target is up, the return value is its weight. +// (1 if weights are not used) +// - zero: the target is down. +// - negative integer: the check for this target has not completed yet. +// (this value is only reported if the failOnIncompleteCheck option is +// set, otherwise zero will be returned) //NOLINTNEXTLINE(readability-identifier-length) int IsUpOracle::isUp(const CheckDesc& cd) { @@ -354,6 +361,14 @@ int IsUpOracle::isUp(const CheckDesc& cd) (*statuses)[cd] = std::make_unique(now); } } + // If explicitly asked to fail on incomplete checks, report this (as + // a negative value). + static const std::string foic{"failOnIncompleteCheck"}; + if (cd.opts.count(foic) != 0) { + if (cd.opts.at(foic) == "true") { + return -1; + } + } return 0; } @@ -887,7 +902,7 @@ static std::string pickConsistentWeightedHashed(const ComboAddress& bestwho, con return {}; } -static vector genericIfUp(const boost::variant& ips, boost::optional options, const std::function& upcheckf, uint16_t port = 0) +static vector genericIfUp(const boost::variant& ips, boost::optional options, const std::function& upcheckf, uint16_t port = 0) { vector > candidates; opts_t opts; @@ -897,12 +912,17 @@ static vector genericIfUp(const boost::variant& candidates = convMultiComboAddressList(ips, port); - for (const auto& unit : candidates) { + bool incompleteCheck{true}; + for(const auto& unit : candidates) { vector available; for(const auto& address : unit) { - if (upcheckf(address, opts)) { + int status = upcheckf(address, opts); + if (status > 0) { available.push_back(address); } + if (status >= 0) { + incompleteCheck = false; + } } if(!available.empty()) { vector res = useSelector(getOptionValue(options, "selector", "random"), s_lua_record_ctx->bestwho, available); @@ -910,7 +930,12 @@ static vector genericIfUp(const boost::variant& } } - // All units down, apply backupSelector on all candidates + // All units down or have not completed their checks yet. + if (incompleteCheck) { + throw std::runtime_error("if{url,port}up health check has not completed yet"); + } + + // Apply backupSelector on all candidates vector ret{}; for(const auto& unit : candidates) { ret.insert(ret.end(), unit.begin(), unit.end()); @@ -1228,8 +1253,8 @@ static vector lua_ifportup(int port, const boost::variant(std::numeric_limits::max())); - auto checker = [](const ComboAddress& addr, const opts_t& opts) -> bool { - return g_up.isUp(addr, opts) != 0; + auto checker = [](const ComboAddress& addr, const opts_t& opts) -> int { + return g_up.isUp(addr, opts); }; return genericIfUp(ips, std::move(options), checker, port); } @@ -1246,6 +1271,7 @@ static vector lua_ifurlextup(const vector >& ipurls, b ca_unspec.sin4.sin_family=AF_UNSPEC; // ipurls: { { ["192.0.2.1"] = "https://example.com", ["192.0.2.2"] = "https://example.com/404" } } + bool incompleteCheck{true}; for (const auto& [count, unitmap] : ipurls) { // unitmap: 1 = { ["192.0.2.1"] = "https://example.com", ["192.0.2.2"] = "https://example.com/404" } vector available; @@ -1254,9 +1280,13 @@ static vector lua_ifurlextup(const vector >& ipurls, b // unit: ["192.0.2.1"] = "https://example.com" ComboAddress address(ipStr); candidates.push_back(address); - if (g_up.isUp(ca_unspec, url, opts) != 0) { + int status = g_up.isUp(ca_unspec, url, opts); + if (status > 0) { available.push_back(address); } + if (status >= 0) { + incompleteCheck = false; + } } if(!available.empty()) { vector res = useSelector(getOptionValue(options, "selector", "random"), s_lua_record_ctx->bestwho, available); @@ -1264,15 +1294,20 @@ static vector lua_ifurlextup(const vector >& ipurls, b } } - // All units down, apply backupSelector on all candidates + // All units down or have not completed their checks yet. + if (incompleteCheck) { + throw std::runtime_error("ifexturlup health check has not completed yet"); + } + + // Apply backupSelector on all candidates vector res = useSelector(getOptionValue(options, "backupSelector", "random"), s_lua_record_ctx->bestwho, candidates); return convComboAddressListToString(res); } static vector lua_ifurlup(const std::string& url, const boost::variant& ips, boost::optional options) { - auto checker = [&url](const ComboAddress& addr, const opts_t& opts) -> bool { - return g_up.isUp(addr, url, opts) != 0; + auto checker = [&url](const ComboAddress& addr, const opts_t& opts) -> int { + return g_up.isUp(addr, url, opts); }; return genericIfUp(ips, std::move(options), checker); } diff --git a/regression-tests.auth-py/test_LuaRecords.py b/regression-tests.auth-py/test_LuaRecords.py index 75dc2a832b..a8df4b25c9 100644 --- a/regression-tests.auth-py/test_LuaRecords.py +++ b/regression-tests.auth-py/test_LuaRecords.py @@ -112,6 +112,11 @@ usa-slowcheck IN LUA A ( ";settings={{stringmatch='Programming in Lua', i "return ifurlup('http://www.lua.org:8080/', " "USAips, settings) ") +usa-failincomplete IN LUA A ( ";settings={{stringmatch='Programming in Lua', failOnIncompleteCheck='true'}} " + "USAips={{'192.168.42.105'}}" + "return ifurlup('http://www.lua.org:8080/', " + "USAips, settings) ") + mix.ifurlup IN LUA A ("ifurlup('http://www.other.org:8080/ping.json', " "{{ '192.168.42.101', '{prefix}.101' }}, " "{{ stringmatch='pong' }}) ") @@ -1377,6 +1382,30 @@ lua-health-checks-interval=5 self.assertAnyRRsetInAnswer(res, reachable_rrs) self.assertNoneRRsetInAnswer(res, unreachable_rrs) + def testIfurlupFailOnIncompleteCheck(self): + """ + Simple ifurlup() test with failOnIncompleteCheck option set. + """ + ips = ['192.168.42.105'] + all_rrs = [] + for ip in ips: + rr = dns.rrset.from_text('usa-failincomplete.example.org.', 0, dns.rdataclass.IN, 'A', ip) + all_rrs.append(rr) + + query = dns.message.make_query('usa-failincomplete.example.org', 'A') + res = self.sendUDPQuery(query) + self.assertRcodeEqual(res, dns.rcode.SERVFAIL) + + # The above request being sent at time T, the following events occur: + # T+00: SERVFAIL returned as no data available yet + # T+00: checker thread starts + # T+02: 192.168.42.105 found down and marked as such + + time.sleep(3) + res = self.sendUDPQuery(query) + self.assertRcodeEqual(res, dns.rcode.NOERROR) + self.assertAnyRRsetInAnswer(res, all_rrs) + class TestLuaRecordsExecLimit(BaseLuaTest): # This configuration is similar to BaseLuaTest, but the exec limit is # set to a very low value. -- 2.47.2