From a37aaafa1a694caafb7b034dda8cb4f71eb09d8b Mon Sep 17 00:00:00 2001 From: "b.courtois" Date: Tue, 31 Mar 2026 22:06:01 +0200 Subject: [PATCH] dnsdist: add opt-in fatal bind failures for console and webserver Introduce opt-in fatal behavior when binding the webserver socket or the control socket fails, to make startup failures visible to service managers like systemd. Expose the feature in both configuration styles: - Lua: setConsoleBindFatal(bool), setWebserverBindFatal(bool) - YAML: console.bind_fatal, webserver.bind_fatal When enabled, dnsdist now exits with failure on bind exceptions for: - control socket listeners - webserver listeners Wire the new settings through runtime configuration loading, Lua configuration items, and YAML parsing, and add console completion entries for both setters. Update documentation with new config functions and behavior notes. Add regression tests in test_BindFatal.py for Lua and YAML, validating: - default/not set: bind failures are non-fatal - explicit false: bind failures are non-fatal - explicit true: bind failures are fatal at startup Signed-off-by: b.courtois --- .../dnsdistdist/dnsdist-configuration-yaml.cc | 2 + pdns/dnsdistdist/dnsdist-configuration.hh | 2 + .../dnsdistdist/dnsdist-console-completion.cc | 2 + .../dnsdist-lua-configuration-items.cc | 2 + pdns/dnsdistdist/dnsdist-lua.cc | 6 + .../dnsdist-settings-definitions.yml | 16 ++ pdns/dnsdistdist/dnsdist.cc | 6 + pdns/dnsdistdist/docs/reference/config.rst | 16 ++ regression-tests.dnsdist/test_BindFatal.py | 261 ++++++++++++++++++ 9 files changed, 313 insertions(+) create mode 100644 regression-tests.dnsdist/test_BindFatal.py diff --git a/pdns/dnsdistdist/dnsdist-configuration-yaml.cc b/pdns/dnsdistdist/dnsdist-configuration-yaml.cc index b1a08adb99..47d8052d60 100644 --- a/pdns/dnsdistdist/dnsdist-configuration-yaml.cc +++ b/pdns/dnsdistdist/dnsdist-configuration-yaml.cc @@ -940,6 +940,7 @@ static void loadWebServer(const Context& context, const dnsdist::rust::settings: dnsdist::webserver::setMaxConcurrentConnections(webConfig.max_concurrent_connections); config.d_apiConfigDirectory = std::string(webConfig.api_configuration_directory); config.d_apiReadWrite = webConfig.api_read_write; + config.d_webserverBindFatal = webConfig.bind_fatal; }); } @@ -1062,6 +1063,7 @@ static void handleConsoleConfiguration(const dnsdist::rust::settings::ConsoleCon config.d_consoleACL.addMask(std::string(aclEntry)); } B64Decode(std::string(consoleConf.key), config.d_consoleKey); + config.d_consoleBindFatal = consoleConf.bind_fatal; }); } } diff --git a/pdns/dnsdistdist/dnsdist-configuration.hh b/pdns/dnsdistdist/dnsdist-configuration.hh index 5ab67715bb..42e8be8df6 100644 --- a/pdns/dnsdistdist/dnsdist-configuration.hh +++ b/pdns/dnsdistdist/dnsdist-configuration.hh @@ -181,6 +181,8 @@ struct RuntimeConfiguration bool d_allowEmptyResponse{false}; bool d_dropEmptyQueries{false}; bool d_consoleEnabled{false}; + bool d_consoleBindFatal{false}; + bool d_webserverBindFatal{false}; bool d_logConsoleConnections{true}; bool d_addEDNSToSelfGeneratedResponses{true}; bool d_applyACLToProxiedClients{false}; diff --git a/pdns/dnsdistdist/dnsdist-console-completion.cc b/pdns/dnsdistdist/dnsdist-console-completion.cc index 8cd9d4853c..978e3cf33f 100644 --- a/pdns/dnsdistdist/dnsdist-console-completion.cc +++ b/pdns/dnsdistdist/dnsdist-console-completion.cc @@ -239,6 +239,7 @@ static std::vector s_consoleKeywor {"setCacheCleaningPercentage", true, "num", "Set the percentage of the cache that the cache cleaning algorithm will try to free by removing expired entries. By default (100), all expired entries are remove"}, {"setConsistentHashingBalancingFactor", true, "factor", "Set the balancing factor for bounded-load consistent hashing"}, {"setConsoleACL", true, "{netmask, netmask}", "replace the console ACL set with these netmasks"}, + {"setConsoleBindFatal", true, "enable", "whether a failure to bind the console control socket is fatal"}, {"setConsoleConnectionsLogging", true, "enabled", "whether to log the opening and closing of console connections"}, {"setConsoleMaximumConcurrentConnections", true, "max", "Set the maximum number of concurrent console connections"}, {"setConsoleOutputMaxMsgSize", true, "messageSize", "set console message maximum size in bytes, default is 10 MB"}, @@ -309,6 +310,7 @@ static std::vector s_consoleKeywor {"setVerbose", true, "bool", "set whether log messages at the verbose level will be logged"}, {"setVerboseHealthChecks", true, "bool", "set whether health check errors will be logged"}, {"setVerboseLogDestination", true, "destination file", "Set a destination file to write the 'verbose' log messages to, instead of sending them to syslog and/or the standard output"}, + {"setWebserverBindFatal", true, "enable", "whether a failure to bind a web server socket is fatal"}, {"setWebserverConfig", true, "[{password=string, apiKey=string, customHeaders, statsRequireAuthentication, prometheusAddInstanceLabel=bool}]", "Updates webserver configuration"}, {"setWeightedBalancingFactor", true, "factor", "Set the balancing factor for bounded-load weighted policies (whashed, wrandom)"}, {"setWHashedPerturbation", true, "value", "Set the hash perturbation value to be used in the whashed policy instead of a random one, allowing to have consistent whashed results on different instance"}, diff --git a/pdns/dnsdistdist/dnsdist-lua-configuration-items.cc b/pdns/dnsdistdist/dnsdist-lua-configuration-items.cc index 6b0d277f69..e67b2731d1 100644 --- a/pdns/dnsdistdist/dnsdist-lua-configuration-items.cc +++ b/pdns/dnsdistdist/dnsdist-lua-configuration-items.cc @@ -75,7 +75,9 @@ static const std::map s_booleanConfigIte {"setRoundRobinFailOnNoServer", {[](dnsdist::configuration::RuntimeConfiguration& config, bool newValue) { config.d_roundrobinFailOnNoServer = newValue; }}}, {"setDropEmptyQueries", {[](dnsdist::configuration::RuntimeConfiguration& config, bool newValue) { config.d_dropEmptyQueries = newValue; }}}, {"setAllowEmptyResponse", {[](dnsdist::configuration::RuntimeConfiguration& config, bool newValue) { config.d_allowEmptyResponse = newValue; }}}, + {"setConsoleBindFatal", {[](dnsdist::configuration::RuntimeConfiguration& config, bool newValue) { config.d_consoleBindFatal = newValue; }}}, {"setConsoleConnectionsLogging", {[](dnsdist::configuration::RuntimeConfiguration& config, bool newValue) { config.d_logConsoleConnections = newValue; }}}, + {"setWebserverBindFatal", {[](dnsdist::configuration::RuntimeConfiguration& config, bool newValue) { config.d_webserverBindFatal = newValue; }}}, {"setProxyProtocolApplyACLToProxiedClients", {[](dnsdist::configuration::RuntimeConfiguration& config, bool newValue) { config.d_applyACLToProxiedClients = newValue; }}}, {"setAddEDNSToSelfGeneratedResponses", {[](dnsdist::configuration::RuntimeConfiguration& config, bool newValue) { config.d_addEDNSToSelfGeneratedResponses = newValue; }}}, }; diff --git a/pdns/dnsdistdist/dnsdist-lua.cc b/pdns/dnsdistdist/dnsdist-lua.cc index fe42db8115..3a09410284 100644 --- a/pdns/dnsdistdist/dnsdist-lua.cc +++ b/pdns/dnsdistdist/dnsdist-lua.cc @@ -1123,6 +1123,9 @@ static void setupLuaConfig(LuaContext& luaCtx, bool client, bool configCheck) g_outputBuffer = "Unable to bind to webserver socket on " + local.toStringWithPort() + ": " + e.what(); SLOG(errlog("Unable to bind to webserver socket on %s: %s", local.toStringWithPort(), e.what()), getLogger("webserver")->error(Logr::Error, e.what(), "Error while trying to bind the web server socket", "network.local.address", Logging::Loggable(local))); + if (dnsdist::configuration::getCurrentRuntimeConfiguration().d_webserverBindFatal) { + _exit(EXIT_FAILURE); + } } } }); @@ -1248,6 +1251,9 @@ static void setupLuaConfig(LuaContext& luaCtx, bool client, bool configCheck) g_outputBuffer = "Unable to bind to control socket on " + local.toStringWithPort() + ": " + exp.what(); SLOG(errlog("Unable to bind to control socket on %s: %s", local.toStringWithPort(), exp.what()), getLogger("controlSocket")->error(Logr::Error, exp.what(), "Unable to bind to console's control socket", "network.local.address", Logging::Loggable(local))); + if (dnsdist::configuration::getCurrentRuntimeConfiguration().d_consoleBindFatal) { + _exit(EXIT_FAILURE); + } } } }); diff --git a/pdns/dnsdistdist/dnsdist-settings-definitions.yml b/pdns/dnsdistdist/dnsdist-settings-definitions.yml index c84ed278fc..52af5ed94a 100644 --- a/pdns/dnsdistdist/dnsdist-settings-definitions.yml +++ b/pdns/dnsdistdist/dnsdist-settings-definitions.yml @@ -470,6 +470,14 @@ webserver: type: "bool" default: "false" description: "Allow modifications of the configuration via the API. Optionally saving these changes to disk. Modifications done via the API will not be written to the configuration by default and will not persist after a reload. Note that flushing the content of the packet cache via DELETE requests is still allowed even if the API is read-only" + - name: "bind_fatal" + version_added: "2.2.0" + type: "bool" + default: "false" + lua-name: "setWebserverBindFatal" + internal-field-name: "d_webserverBindFatal" + runtime-configurable: true + description: "Whether a failure to bind a web server socket should be fatal" console: description: "Console-related settings" @@ -507,6 +515,14 @@ console: internal-field-name: "d_consoleMaxConcurrentConnections" runtime-configurable: false description: "Set the maximum number of concurrent console connection" + - name: "bind_fatal" + version_added: "2.2.0" + type: "bool" + default: "false" + lua-name: "setConsoleBindFatal" + internal-field-name: "d_consoleBindFatal" + runtime-configurable: true + description: "Whether a failure to bind the console control socket should be fatal" ebpf_map: description: "An ``eBPF`` map that is used to share data with kernel-land ``AF_XDP``/``XSK``, ``socket filter`` or ``XDP`` programs. Maps can be pinned to a filesystem path, which makes their content persistent across restarts and allows external programs to read their content and to add new entries. :program:`dnsdist` will try to load maps that are pinned to a filesystem path on startups, inheriting any existing entries, and fall back to creating them if they do not exist yet. Note that the user :program`dnsdist` is running under must have the right privileges to read and write to the given file, and to go through all the directories in the path leading to that file. The pinned path must be on a filesystem of type ``BPF``, usually below ``/sys/fs/bpf/``" diff --git a/pdns/dnsdistdist/dnsdist.cc b/pdns/dnsdistdist/dnsdist.cc index 396c860f5c..abcaca16f0 100644 --- a/pdns/dnsdistdist/dnsdist.cc +++ b/pdns/dnsdistdist/dnsdist.cc @@ -3531,6 +3531,9 @@ static ListeningSockets initListeningSockets() catch (const std::exception& exp) { SLOG(errlog("Unable to bind to control socket on %s: %s", local.toStringWithPort(), exp.what()), dnsdist::logging::getTopLogger("setup")->error(Logr::Error, exp.what(), "Unable to bind to console control socket", "network.local.address", Logging::Loggable(local))); + if (currentConfig.d_consoleBindFatal) { + _exit(EXIT_FAILURE); + } } } @@ -3544,6 +3547,9 @@ static ListeningSockets initListeningSockets() catch (const std::exception& exp) { SLOG(errlog("Unable to bind to web server socket on %s: %s", local.toStringWithPort(), exp.what()), dnsdist::logging::getTopLogger("setup")->error(Logr::Error, exp.what(), "Unable to bind to web server socket", "network.local.address", Logging::Loggable(local))); + if (currentConfig.d_webserverBindFatal) { + _exit(EXIT_FAILURE); + } } } diff --git a/pdns/dnsdistdist/docs/reference/config.rst b/pdns/dnsdistdist/docs/reference/config.rst index 9f5cfab1d3..df30c8e316 100644 --- a/pdns/dnsdistdist/docs/reference/config.rst +++ b/pdns/dnsdistdist/docs/reference/config.rst @@ -329,6 +329,14 @@ Control Socket, Console and Webserver Generate and print an encryption key. +.. function:: setConsoleBindFatal(enable) + + .. versionadded:: 2.2.0 + + Whether a failure to bind a console control socket is fatal. + + :param bool enabled: Default to false. + .. function:: setConsoleConnectionsLogging(enabled) Whether to log the opening and closing of console connections. @@ -412,6 +420,14 @@ Webserver configuration :param bool allow: Set to true to allow modification of the configuration through the API :param str dir: A valid directory where the configuration files will be written by the API. +.. function:: setWebserverBindFatal(enable) + + .. versionadded:: 2.2.0 + + Whether a failure to bind a web server socket is fatal. + + :param bool enabled: Default to false. + .. function:: setWebserverConfig(options) .. versionchanged:: 2.1.0 diff --git a/regression-tests.dnsdist/test_BindFatal.py b/regression-tests.dnsdist/test_BindFatal.py new file mode 100644 index 0000000000..b5e0379708 --- /dev/null +++ b/regression-tests.dnsdist/test_BindFatal.py @@ -0,0 +1,261 @@ +#!/usr/bin/env python +import unittest +import time + +from dnsdisttests import DNSDistTest + +_NON_EXISTING_ADDR = "192.0.2.1" # RFC 5737 TEST-NET, never locally routable + + +class _BindFatalMixin: + """Mixin: allow dnsdist to exit during startup (bind_fatal=true).""" + + _config_params = [] + _startupFailed = False + + @classmethod + def setUpClass(cls): + cls._startupFailed = False + try: + super().setUpClass() + for _ in range(0, 20): + if cls._dnsdist.poll() is not None: + cls._startupFailed = True + break + time.sleep(0.1) + except unittest.SkipTest: + raise + except Exception: + cls._startupFailed = True + + @classmethod + def tearDownClass(cls): + for backgroundThread in cls._backgroundThreads: + cls._backgroundThreads[backgroundThread] = False + if hasattr(cls, "_sock"): + cls._sock.close() + if cls._dnsdist is not None: + cls.killProcess(cls._dnsdist) + + +# Lua – webserver + + +class TestWebserverBindFatalNotSet(_BindFatalMixin, DNSDistTest): + """ + BindFatal: Succeeds to start when webserver bind fails using Lua configuration, because the default value of bind_fatal is false. + """ + + _config_params = [] + _config_template = ( + """ + webserver("%s:80") + """ + % _NON_EXISTING_ADDR + ) + + def testStartedSuccessfully(self): + self.assertIsNone(self._dnsdist.poll(), "dnsdist should still be running") + + +class TestWebserverBindFatalFalse(_BindFatalMixin, DNSDistTest): + """ + BindFatal: Succeeds to start when webserver bind fails using Lua configuration, because bind_fatal is explicitly set to false. + """ + + _config_params = [] + _config_template = ( + """ + setWebserverBindFatal(false) + webserver("%s:80") + """ + % _NON_EXISTING_ADDR + ) + + def testStartedSuccessfully(self): + self.assertIsNone(self._dnsdist.poll(), "dnsdist should still be running") + + +class TestWebserverBindFatalTrue(_BindFatalMixin, DNSDistTest): + """ + BindFatal: Fails to start when webserver bind fails using Lua configuration, because bind_fatal is explicitly set to true. + """ + + _config_template = ( + """ + setWebserverBindFatal(true) + webserver("%s:80") + """ + % _NON_EXISTING_ADDR + ) + + def testExitedOnStartup(self): + self.assertTrue(self._startupFailed, "dnsdist should have failed to start but did not") + + +# YAML – webserver + + +class TestYamlWebserverBindFatalNotSet(_BindFatalMixin, DNSDistTest): + """ + BindFatal: Succeeds to start when webserver bind fails using YAML configuration, because the default value of bind_fatal is false. + """ + + _yaml_config_template = ( + """--- +webserver: + listen_addresses: + - "%s:80" +""" + % _NON_EXISTING_ADDR + ) + + def testStartedSuccessfully(self): + self.assertIsNone(self._dnsdist.poll(), "dnsdist should still be running") + + +class TestYamlWebserverBindFatalFalse(_BindFatalMixin, DNSDistTest): + """ + BindFatal: Succeeds to start when webserver bind fails using YAML configuration, because bind_fatal is explicitly set to false. + """ + + _yaml_config_template = ( + """--- +webserver: + listen_addresses: + - "%s:80" + bind_fatal: false +""" + % _NON_EXISTING_ADDR + ) + + def testStartedSuccessfully(self): + self.assertIsNone(self._dnsdist.poll(), "dnsdist should still be running") + + +class TestYamlWebserverBindFatalTrue(_BindFatalMixin, DNSDistTest): + """ + BindFatal: Fails to start when webserver bind fails using YAML configuration, because bind_fatal is explicitly set to true. + """ + + _yaml_config_template = ( + """--- +webserver: + listen_addresses: + - "%s:80" + bind_fatal: true +""" + % _NON_EXISTING_ADDR + ) + + def testExitedOnStartup(self): + self.assertTrue(self._startupFailed, "dnsdist should have failed to start but did not") + + +# Lua – control socket + + +class TestConsoleBindFatalNotSet(_BindFatalMixin, DNSDistTest): + """ + BindFatal: Succeeds to start when control socket bind fails using Lua configuration, because the default value of bind_fatal is false. + """ + + _config_template = ( + """ + controlSocket("%s:5199") + """ + % _NON_EXISTING_ADDR + ) + + def testStartedSuccessfully(self): + self.assertIsNone(self._dnsdist.poll(), "dnsdist should still be running") + + +class TestConsoleBindFatalFalse(_BindFatalMixin, DNSDistTest): + """ + BindFatal: Succeeds to start when control socket bind fails using Lua configuration, because bind_fatal is explicitly set to false. + """ + + _config_template = ( + """ + setConsoleBindFatal(false) + controlSocket("%s:5199") + """ + % _NON_EXISTING_ADDR + ) + + def testStartedSuccessfully(self): + self.assertIsNone(self._dnsdist.poll(), "dnsdist should still be running") + + +class TestConsoleBindFatalTrue(_BindFatalMixin, DNSDistTest): + """ + BindFatal: Fails to start when control socket bind fails using Lua configuration, because bind_fatal is explicitly set to true. + """ + + _config_template = ( + """ + setConsoleBindFatal(true) + controlSocket("%s:5199") + """ + % _NON_EXISTING_ADDR + ) + + def testExitedOnStartup(self): + self.assertTrue(self._startupFailed, "dnsdist should have failed to start but did not") + + +# YAML – control socket + + +class TestYamlConsoleBindFatalNotSet(_BindFatalMixin, DNSDistTest): + """ + BindFatal: Succeeds to start when control socket bind fails using YAML configuration, because the default value of bind_fatal is false. + """ + + _yaml_config_template = ( + """--- +console: + listen_address: "%s:5199" +""" + % _NON_EXISTING_ADDR + ) + + def testStartedSuccessfully(self): + self.assertIsNone(self._dnsdist.poll(), "dnsdist should still be running") + + +class TestYamlConsoleBindFatalFalse(_BindFatalMixin, DNSDistTest): + """ + BindFatal: Succeeds to start when control socket bind fails using YAML configuration, because bind_fatal is explicitly set to false. + """ + + _yaml_config_template = ( + """--- +console: + listen_address: "%s:5199" + bind_fatal: false +""" + % _NON_EXISTING_ADDR + ) + + def testStartedSuccessfully(self): + self.assertIsNone(self._dnsdist.poll(), "dnsdist should still be running") + + +class TestYamlConsoleBindFatalTrue(_BindFatalMixin, DNSDistTest): + """ + BindFatal: Fails to start when control socket bind fails using YAML configuration, because bind_fatal is explicitly set to true. + """ + + _yaml_config_template = ( + """--- +console: + listen_address: "%s:5199" + bind_fatal: true +""" + % _NON_EXISTING_ADDR + ) + + def testExitedOnStartup(self): + self.assertTrue(self._startupFailed, "dnsdist should have failed to start but did not") -- 2.47.3