From: Remi Gacogne Date: Fri, 23 Dec 2016 17:11:19 +0000 (+0100) Subject: dnsdist: Add `tcpConnectTimeout` to `newServer()` X-Git-Tag: rec-4.1.0-alpha1~313^2 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=refs%2Fpull%2F4818%2Fhead;p=thirdparty%2Fpdns.git dnsdist: Add `tcpConnectTimeout` to `newServer()` While we already timeouts when writing to or reading from TCP backends, the initial connection attempt was blocking. In addition to that, if the connection to the backend failed we didn't retry and simply closed the corresponding connection to the client. We now retry up to `retries` times, as we did for I/O errors. --- diff --git a/pdns/README-dnsdist.md b/pdns/README-dnsdist.md index 3d62cafb43..5335a8f280 100644 --- a/pdns/README-dnsdist.md +++ b/pdns/README-dnsdist.md @@ -191,10 +191,11 @@ or if the answer can't be sent in less than 2s. This can be configured with: The same kind of timeouts is enforced on the TCP connections to the downstream servers. The default value of 30s can be modified by passing the `tcpRecvTimeout` and `tcpSendTimeout` -parameters to `newServer`. If the TCP connection to a downstream server fails, `dnsdist` +parameters to `newServer`, with an additional `tcpConnectTimeout` parameter controlling +the connection timeout (5s by default). If the TCP connection to a downstream server fails, `dnsdist` will try to establish a new one up to `retries` times before giving up. ``` -newServer({address="192.0.2.1", tcpRecvTimeout=10, tcpSendTimeout=10, retries=5}) +newServer({address="192.0.2.1", tcpConnectTimeout=5, tcpRecvTimeout=10, tcpSendTimeout=10, retries=5}) ``` Source address @@ -1289,7 +1290,7 @@ Here are all functions: * `setVerboseHealthChecks(bool)`: set whether health check errors will be logged * Server related: * `newServer("ip:port")`: instantiate a new downstream server with default settings - * `newServer({address="ip:port", qps=1000, order=1, weight=10, pool="abuse", retries=5, tcpSendTimeout=30, tcpRecvTimeout=30, checkName="a.root-servers.net.", checkType="A", setCD=false, maxCheckFailures=1, mustResolve=false, useClientSubnet=true, source="address|interface name|address@interface"})`: + * `newServer({address="ip:port", qps=1000, order=1, weight=10, pool="abuse", retries=5, tcpConnectTimeout=5, tcpSendTimeout=30, tcpRecvTimeout=30, checkName="a.root-servers.net.", checkType="A", setCD=false, maxCheckFailures=1, mustResolve=false, useClientSubnet=true, source="address|interface name|address@interface"})`: instantiate a server with additional parameters * `showServers()`: output all servers * `getServer(n)`: returns server with index n diff --git a/pdns/dnsdist-console.cc b/pdns/dnsdist-console.cc index 3c34191d7f..d7b0c8f9bd 100644 --- a/pdns/dnsdist-console.cc +++ b/pdns/dnsdist-console.cc @@ -330,7 +330,7 @@ const std::vector g_consoleKeywords{ { "newQPSLimiter", true, "rate, burst", "configure a QPS limiter with that rate and that burst capacity" }, { "newRemoteLogger", true, "address:port [, timeout=2, maxQueuedEntries=100, reconnectWaitTime=1]", "create a Remote Logger object, to use with `RemoteLogAction()` and `RemoteLogResponseAction()`" }, { "newRuleAction", true, "DNS rule, DNS action", "return a pair of DNS Rule and DNS Action, to be used with `setRules()`" }, - { "newServer", true, "{address=\"ip:port\", qps=1000, order=1, weight=10, pool=\"abuse\", retries=5, tcpSendTimeout=30, tcpRecvTimeout=30, checkName=\"a.root-servers.net.\", checkType=\"A\", maxCheckFailures=1, mustResolve=false, useClientSubnet=true, source=\"address|interface name|address@interface\"", "instantiate a server" }, + { "newServer", true, "{address=\"ip:port\", qps=1000, order=1, weight=10, pool=\"abuse\", retries=5, tcpConnectTimeout=5, tcpSendTimeout=30, tcpRecvTimeout=30, checkName=\"a.root-servers.net.\", checkType=\"A\", maxCheckFailures=1, mustResolve=false, useClientSubnet=true, source=\"address|interface name|address@interface\"", "instantiate a server" }, { "newServerPolicy", true, "name, function", "create a policy object from a Lua function" }, { "newSuffixMatchNode", true, "", "returns a new SuffixMatchNode" }, { "NoRecurseAction", true, "", "strip RD bit from the question, let it go through" }, diff --git a/pdns/dnsdist-lua.cc b/pdns/dnsdist-lua.cc index 44e965762a..d6630bc24f 100644 --- a/pdns/dnsdist-lua.cc +++ b/pdns/dnsdist-lua.cc @@ -373,6 +373,10 @@ vector> setupLua(bool client, const std::string& confi ret->retries=std::stoi(boost::get(vars["retries"])); } + if(vars.count("tcpConnectTimeout")) { + ret->tcpConnectTimeout=std::stoi(boost::get(vars["tcpConnectTimeout"])); + } + if(vars.count("tcpSendTimeout")) { ret->tcpSendTimeout=std::stoi(boost::get(vars["tcpSendTimeout"])); } diff --git a/pdns/dnsdist-tcp.cc b/pdns/dnsdist-tcp.cc index a66137ec9c..6410de8bb4 100644 --- a/pdns/dnsdist-tcp.cc +++ b/pdns/dnsdist-tcp.cc @@ -47,27 +47,34 @@ using std::atomic; Let's start naively. */ -static int setupTCPDownstream(shared_ptr ds) -{ - vinfolog("TCP connecting to downstream %s", ds->remote.toStringWithPort()); - int sock = SSocket(ds->remote.sin4.sin_family, SOCK_STREAM, 0); - try { - if (!IsAnyAddress(ds->sourceAddr)) { - SSetsockopt(sock, SOL_SOCKET, SO_REUSEADDR, 1); +static int setupTCPDownstream(shared_ptr ds, uint16_t& downstreamFailures) +{ + do { + vinfolog("TCP connecting to downstream %s (%d)", ds->remote.toStringWithPort(), downstreamFailures); + int sock = SSocket(ds->remote.sin4.sin_family, SOCK_STREAM, 0); + try { + if (!IsAnyAddress(ds->sourceAddr)) { + SSetsockopt(sock, SOL_SOCKET, SO_REUSEADDR, 1); #ifdef IP_BIND_ADDRESS_NO_PORT - SSetsockopt(sock, SOL_IP, IP_BIND_ADDRESS_NO_PORT, 1); + SSetsockopt(sock, SOL_IP, IP_BIND_ADDRESS_NO_PORT, 1); #endif - SBind(sock, ds->sourceAddr); + SBind(sock, ds->sourceAddr); + } + setNonBlocking(sock); + SConnectWithTimeout(sock, ds->remote, ds->tcpConnectTimeout); + return sock; } - SConnect(sock, ds->remote); - setNonBlocking(sock); - } - catch(const std::runtime_error& e) { - /* don't leak our file descriptor if SConnect() (for example) throws */ - close(sock); - throw; - } - return sock; + catch(const std::runtime_error& e) { + /* don't leak our file descriptor if SConnect() (for example) throws */ + downstreamFailures++; + close(sock); + if (downstreamFailures > ds->retries) { + throw; + } + } + } while(downstreamFailures <= ds->retries); + + return -1; } struct ConnectionInfo @@ -427,8 +434,9 @@ void* tcpClientThread(int pipefd) } int dsock = -1; + uint16_t downstreamFailures=0; if(sockets.count(ds->remote) == 0) { - dsock=setupTCPDownstream(ds); + dsock=setupTCPDownstream(ds, downstreamFailures); sockets[ds->remote]=dsock; } else @@ -438,15 +446,14 @@ void* tcpClientThread(int pipefd) ds->outstanding++; outstanding = true; - uint16_t downstream_failures=0; retry:; if (dsock < 0) { sockets.erase(ds->remote); break; } - if (ds->retries > 0 && downstream_failures > ds->retries) { - vinfolog("Downstream connection to %s failed %d times in a row, giving up.", ds->getName(), downstream_failures); + if (ds->retries > 0 && downstreamFailures > ds->retries) { + vinfolog("Downstream connection to %s failed %d times in a row, giving up.", ds->getName(), downstreamFailures); close(dsock); dsock=-1; sockets.erase(ds->remote); @@ -458,9 +465,9 @@ void* tcpClientThread(int pipefd) close(dsock); dsock=-1; sockets.erase(ds->remote); - dsock=setupTCPDownstream(ds); + downstreamFailures++; + dsock=setupTCPDownstream(ds, downstreamFailures); sockets[ds->remote]=dsock; - downstream_failures++; goto retry; } @@ -477,9 +484,9 @@ void* tcpClientThread(int pipefd) close(dsock); dsock=-1; sockets.erase(ds->remote); - dsock=setupTCPDownstream(ds); + downstreamFailures++; + dsock=setupTCPDownstream(ds, downstreamFailures); sockets[ds->remote]=dsock; - downstream_failures++; goto retry; } @@ -496,9 +503,9 @@ void* tcpClientThread(int pipefd) close(dsock); dsock=-1; sockets.erase(ds->remote); - dsock=setupTCPDownstream(ds); + downstreamFailures++; + dsock=setupTCPDownstream(ds, downstreamFailures); sockets[ds->remote]=dsock; - downstream_failures++; if(xfrStarted) { goto drop; } diff --git a/pdns/dnsdist.hh b/pdns/dnsdist.hh index ba2f8b72cf..dcf9ec0634 100644 --- a/pdns/dnsdist.hh +++ b/pdns/dnsdist.hh @@ -451,6 +451,7 @@ struct DownstreamState double latencyUsec{0.0}; int order{1}; int weight{1}; + int tcpConnectTimeout{5}; int tcpRecvTimeout{30}; int tcpSendTimeout{30}; unsigned int sourceItf{0};