From: Mark Zealey Date: Mon, 2 Dec 2013 09:12:46 +0000 (+0200) Subject: fixes PowerDNS/pdns#666 X-Git-Tag: rec-3.6.0-rc1~239^2~2^2 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=bca4751c676bfaa14d9076d9654cabd5437bc77e;p=thirdparty%2Fpdns.git fixes PowerDNS/pdns#666 SO_REUSEPORT is available on various bsd operating systems as standard, and also as a linux kernel patch from Google. It allows 1) Running 2 powerdns processes concurrently so that you can restart powerdns without loosing any packets 2) (the main purpose for my writing this patch) On linux with a patched kernel removes contention from many threads using a socket. In my tests this improves performance with a packet cache from 300kqps to 1mqps If the SO_REUSEPORT call is available and the reuseport config option set to yes, the attached patch causes each receiver thread to open a new socket for connections which allows each thread (on linux) to operate at full speed rather than waiting on a slow socket. It should fail nicely ie if the call is not available at either compile time or run time it will just use the initially created socket. Was merged in to linux 3.9 series - see https://lwn.net/Articles/542629/ for more information --- diff --git a/pdns/common_startup.cc b/pdns/common_startup.cc index eb75054918..f3d6ea1567 100644 --- a/pdns/common_startup.cc +++ b/pdns/common_startup.cc @@ -54,6 +54,7 @@ void declareArguments() ::arg().set("smtpredirector","Our smtpredir MX host")="a.misconfigured.powerdns.smtp.server"; ::arg().set("local-address","Local IP addresses to which we bind")="0.0.0.0"; ::arg().set("local-ipv6","Local IP address to which we bind")=""; + ::arg().setSwitch("reuseport","Enable higher performance on compliant kernels by using SO_REUSEPORT allowing each receiver thread to open its own socket")="no"; ::arg().set("query-local-address","Source IP address for sending queries")="0.0.0.0"; ::arg().set("query-local-address6","Source IPv6 address for sending queries")="::"; ::arg().set("overload-queue-length","Maximum queuelength moving to packetcache only")="0"; @@ -243,6 +244,15 @@ void *qthread(void *number) bool logDNSQueries = ::arg().mustDo("log-dns-queries"); bool skipfirst=true; unsigned int maintcount = 0; + UDPNameserver *NS = N; + + // If we have SO_REUSEPORT then create a new port for all receiver threads + // other than the first one. + if( number > 0 && NS->canReusePort() ) { + L<receive(&question))) { // receive a packet inline + + if(!(P=NS->receive(&question))) { // receive a packet inline continue; // packet was broken, try again } @@ -297,7 +308,7 @@ void *qthread(void *number) cached.d.id=P->d.id; cached.commitD(); // commit d to the packet inlined - N->send(&cached); // answer it then inlined + NS->send(&cached); // answer it then inlined diff=P->d_dt.udiff(); avg_latency=(int)(0.999*avg_latency+0.001*diff); // 'EWMA' diff --git a/pdns/docs/pdns.xml b/pdns/docs/pdns.xml index 9f9bda4468..27a2179be9 100644 --- a/pdns/docs/pdns.xml +++ b/pdns/docs/pdns.xml @@ -16180,6 +16180,18 @@ Tell PowerDNS to log all incoming DNS queries. This will lead to a lot of loggin If this many packets are waiting for database attention, answer any new questions strictly from the packet cache. + reuseport=[yes|no] + + On Linux 3.9 and some BSD kernels the SO_REUSEPORT option allows each + receiver-thread to open a new socket on the same port which allows + for much higher performance on multi-core boxes. Setting this option + will enable use of SO_REUSEPORT when available and seamlessly fall + back to a single socket when it is not available. A side-effect is + that you can start multiple servers on the same IP/port combination + which may or may not be a good idea. You could use this to enable + transparent restarts, but it may also mask configuration issues and + for this reason it is disabled by default. + server-id diff --git a/pdns/nameserver.cc b/pdns/nameserver.cc index 9c9eddf326..75ab1d511c 100644 --- a/pdns/nameserver.cc +++ b/pdns/nameserver.cc @@ -94,6 +94,7 @@ void UDPNameserver::bindIPv4() { vectorlocals; stringtok(locals,::arg()["local-address"]," ,"); + int one = 1; if(locals.empty()) throw PDNSException("No local address specified"); @@ -118,15 +119,22 @@ void UDPNameserver::bindIPv4() memset(&locala,0,sizeof(locala)); locala.sin4.sin_family=AF_INET; - if(localname=="0.0.0.0") { - int val=1; - setsockopt(s, IPPROTO_IP, GEN_IP_PKTINFO, &val, sizeof(val)); - } + if(localname=="0.0.0.0") + setsockopt(s, IPPROTO_IP, GEN_IP_PKTINFO, &one, sizeof(one)); + +#ifdef SO_REUSEPORT + if( d_can_reuseport ) + if( setsockopt(s, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one)) ) + d_can_reuseport = false; +#endif + locala=ComboAddress(localname, ::arg().asNum("local-port")); if(locala.sin4.sin_family != AF_INET) throw PDNSException("Attempting to bind IPv4 socket to IPv6 address"); - g_localaddresses.push_back(locala); + if( !d_additional_socket ) + g_localaddresses.push_back(locala); + if(::bind(s, (sockaddr*)&locala, locala.getSocklen()) < 0) { L< locals; stringtok(locals,::arg()["local-ipv6"]," ,"); + int one=1; if(locals.empty()) return; @@ -212,12 +221,19 @@ void UDPNameserver::bindIPv6() ComboAddress locala(localname, ::arg().asNum("local-port")); if(IsAnyAddress(locala)) { - int val=1; - setsockopt(s, IPPROTO_IP, GEN_IP_PKTINFO, &val, sizeof(val)); // linux supports this, so why not - might fail on other systems - setsockopt(s, IPPROTO_IPV6, IPV6_RECVPKTINFO, &val, sizeof(val)); - setsockopt(s, IPPROTO_IPV6, IPV6_V6ONLY, &val, sizeof(val)); // if this fails, we report an error in tcpreceiver too + setsockopt(s, IPPROTO_IP, GEN_IP_PKTINFO, &one, sizeof(one)); // linux supports this, so why not - might fail on other systems + setsockopt(s, IPPROTO_IPV6, IPV6_RECVPKTINFO, &one, sizeof(one)); + setsockopt(s, IPPROTO_IPV6, IPV6_V6ONLY, &one, sizeof(one)); // if this fails, we report an error in tcpreceiver too } - g_localaddresses.push_back(locala); + +#ifdef SO_REUSEPORT + if( d_can_reuseport ) + if( setsockopt(s, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one)) ) + d_can_reuseport = false; +#endif + + if( !d_additional_socket ) + g_localaddresses.push_back(locala); if(::bind(s, (sockaddr*)&locala, sizeof(locala))<0) { L< d_sockets; void bindIPv4(); void bindIPv6();