2 * This file is part of PowerDNS or dnsdist.
3 * Copyright -- PowerDNS.COM B.V. and its contributors
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of version 2 of the GNU General Public License as
7 * published by the Free Software Foundation.
9 * In addition, for the avoidance of any doubt, permission is granted to
10 * link this program with OpenSSL and to (re)distribute the binaries
11 * produced as the result of such linking.
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
29 #ifdef HAVE_BOOST_CONTAINER_FLAT_SET_HPP
30 #include <boost/container/flat_set.hpp>
32 #include "ws-recursor.hh"
34 #include "threadname.hh"
35 #include "recpacketcache.hh"
37 #include "dns_random.hh"
41 #include "opensslsigners.hh"
44 #include <boost/static_assert.hpp>
47 #include "recursor_cache.hh"
48 #include "cachecleaner.hh"
55 #include "arguments.hh"
59 #include "sortlist.hh"
61 #include <boost/tuple/tuple.hpp>
62 #include <boost/tuple/tuple_comparison.hpp>
63 #include <boost/shared_array.hpp>
64 #include <boost/function.hpp>
65 #include <boost/algorithm/string.hpp>
67 #include "malloctrace.hh"
69 #include <netinet/tcp.h>
70 #include "capabilities.hh"
71 #include "dnsparser.hh"
72 #include "dnswriter.hh"
73 #include "dnsrecords.hh"
74 #include "zoneparser-tng.hh"
75 #include "rec_channel.hh"
80 #include "lua-recursor4.hh"
82 #include "responsestats.hh"
83 #include "secpoll-recursor.hh"
85 #include "filterpo.hh"
86 #include "rpzloader.hh"
87 #include "validate-recursor.hh"
88 #include "rec-lua-conf.hh"
89 #include "ednsoptions.hh"
91 #include "pubsuffix.hh"
94 #endif /* NOD_ENABLED */
96 #include "rec-protobuf.hh"
97 #include "rec-snmp.hh"
100 #include <systemd/sd-daemon.h>
103 #include "namespaces.hh"
106 #include "uuid-utils.hh"
107 #endif /* HAVE_PROTOBUF */
111 typedef map
<ComboAddress
, uint32_t, ComboAddress::addressOnlyLessThan
> tcpClientCounts_t
;
113 static thread_local
std::shared_ptr
<RecursorLua4
> t_pdl
;
114 static thread_local
unsigned int t_id
= 0;
115 static thread_local
std::shared_ptr
<Regex
> t_traceRegex
;
116 static thread_local
std::unique_ptr
<tcpClientCounts_t
> t_tcpClientCounts
;
118 static thread_local
std::shared_ptr
<std::vector
<std::unique_ptr
<RemoteLogger
>>> t_protobufServers
{nullptr};
119 static thread_local
uint64_t t_protobufServersGeneration
;
120 static thread_local
std::shared_ptr
<std::vector
<std::unique_ptr
<RemoteLogger
>>> t_outgoingProtobufServers
{nullptr};
121 static thread_local
uint64_t t_outgoingProtobufServersGeneration
;
122 #endif /* HAVE_PROTOBUF */
125 static thread_local
std::shared_ptr
<std::vector
<std::unique_ptr
<FrameStreamLogger
>>> t_frameStreamServers
{nullptr};
126 static thread_local
uint64_t t_frameStreamServersGeneration
;
127 #endif /* HAVE_FSTRM */
129 thread_local
std::unique_ptr
<MT_t
> MT
; // the big MTasker
130 std::unique_ptr
<MemRecursorCache
> s_RC
= std::unique_ptr
<MemRecursorCache
>(new MemRecursorCache());
133 thread_local
std::unique_ptr
<RecursorPacketCache
> t_packetCache
;
134 thread_local FDMultiplexer
* t_fdm
{nullptr};
135 thread_local
std::unique_ptr
<addrringbuf_t
> t_remotes
, t_servfailremotes
, t_largeanswerremotes
, t_bogusremotes
;
136 thread_local
std::unique_ptr
<boost::circular_buffer
<pair
<DNSName
, uint16_t> > > t_queryring
, t_servfailqueryring
, t_bogusqueryring
;
137 thread_local
std::shared_ptr
<NetmaskGroup
> t_allowFrom
;
139 thread_local
std::shared_ptr
<nod::NODDB
> t_nodDBp
;
140 thread_local
std::shared_ptr
<nod::UniqueResponseDB
> t_udrDBp
;
141 #endif /* NOD_ENABLED */
142 __thread
struct timeval g_now
; // timestamp, updated (too) frequently
144 typedef vector
<pair
<int, function
< void(int, any
&) > > > deferredAdd_t
;
146 // for communicating with our threads
147 // effectively readonly after startup
152 int writeToThread
{-1};
153 int readToThread
{-1};
154 int writeFromThread
{-1};
155 int readFromThread
{-1};
156 int writeQueriesToThread
{-1}; // this one is non-blocking
157 int readQueriesToThread
{-1};
160 /* FD corresponding to TCP sockets this thread is listening
162 These FDs are also in deferredAdds when we have one
163 socket per listener, and in g_deferredAdds instead. */
164 std::set
<int> tcpSockets
;
165 /* FD corresponding to listening sockets if we have one socket per
166 listener (with reuseport), otherwise all listeners share the
167 same FD and g_deferredAdds is then used instead */
168 deferredAdd_t deferredAdds
;
169 struct ThreadPipeSet pipes
;
172 uint64_t numberOfDistributedQueries
{0};
173 /* handle the web server, carbon, statistics and the control channel */
174 bool isHandler
{false};
175 /* accept incoming queries (and distributes them to the workers if pdns-distributes-queries is set) */
176 bool isListener
{false};
177 /* process queries */
178 bool isWorker
{false};
181 /* first we have the handler thread, t_id == 0 (some other
182 helper threads like SNMP might have t_id == 0 as well)
183 then the distributor threads if any
184 and finally the workers */
185 static std::vector
<RecThreadInfo
> s_threadInfos
;
186 /* without reuseport, all listeners share the same sockets */
187 static deferredAdd_t g_deferredAdds
;
189 typedef vector
<int> tcpListenSockets_t
;
190 typedef map
<int, ComboAddress
> listenSocketsAddresses_t
; // is shared across all threads right now
192 static const ComboAddress
g_local4("0.0.0.0"), g_local6("::");
193 static listenSocketsAddresses_t g_listenSocketsAddresses
; // is shared across all threads right now
194 static set
<int> g_fromtosockets
; // listen sockets that use 'sendfromto()' mechanism
195 static vector
<ComboAddress
> g_localQueryAddresses4
, g_localQueryAddresses6
;
196 static AtomicCounter counter
;
197 static std::shared_ptr
<SyncRes::domainmap_t
> g_initialDomainMap
; // new threads needs this to be setup
198 static std::shared_ptr
<NetmaskGroup
> g_initialAllowFrom
; // new thread needs to be setup with this
199 static NetmaskGroup g_XPFAcl
;
200 static size_t g_tcpMaxQueriesPerConn
;
201 static size_t s_maxUDPQueriesPerRound
;
202 static uint64_t g_latencyStatSize
;
203 static uint32_t g_disthashseed
;
204 static unsigned int g_maxTCPPerClient
;
205 static unsigned int g_maxMThreads
;
206 static unsigned int g_numDistributorThreads
;
207 static unsigned int g_numWorkerThreads
;
208 static int g_tcpTimeout
;
209 static uint16_t g_udpTruncationThreshold
;
210 static uint16_t g_xpfRRCode
{0};
211 static std::atomic
<bool> statsWanted
;
212 static std::atomic
<bool> g_quiet
;
213 static bool g_logCommonErrors
;
214 static bool g_anyToTcp
;
215 static bool g_weDistributeQueries
; // if true, 1 or more threads listen on the incoming query sockets and distribute them to workers
216 static bool g_reusePort
{false};
217 static bool g_gettagNeedsEDNSOptions
{false};
218 static time_t g_statisticsInterval
;
219 static bool g_useIncomingECS
;
220 static bool g_useKernelTimestamp
;
221 std::atomic
<uint32_t> g_maxCacheEntries
, g_maxPacketCacheEntries
;
223 static bool g_nodEnabled
;
224 static DNSName g_nodLookupDomain
;
225 static bool g_nodLog
;
226 static SuffixMatchNode g_nodDomainWL
;
227 static std::string g_nod_pbtag
;
228 static bool g_udrEnabled
;
229 static bool g_udrLog
;
230 static std::string g_udr_pbtag
;
231 #endif /* NOD_ENABLED */
232 #ifdef HAVE_BOOST_CONTAINER_FLAT_SET_HPP
233 static boost::container::flat_set
<uint16_t> s_avoidUdpSourcePorts
;
235 static std::set
<uint16_t> s_avoidUdpSourcePorts
;
237 static uint16_t s_minUdpSourcePort
;
238 static uint16_t s_maxUdpSourcePort
;
239 static double s_balancingFactor
;
241 RecursorControlChannel s_rcc
; // only active in the handler thread
242 RecursorStats g_stats
;
243 string s_programname
="pdns_recursor";
245 bool g_lowercaseOutgoing
;
246 unsigned int g_networkTimeoutMsec
;
247 unsigned int g_numThreads
;
248 uint16_t g_outgoingEDNSBufsize
;
249 bool g_logRPZChanges
{false};
251 // Used in the Syncres to not throttle certain servers
252 GlobalStateHolder
<SuffixMatchNode
> g_dontThrottleNames
;
253 GlobalStateHolder
<NetmaskGroup
> g_dontThrottleNetmasks
;
255 #define LOCAL_NETS "127.0.0.0/8, 10.0.0.0/8, 100.64.0.0/10, 169.254.0.0/16, 192.168.0.0/16, 172.16.0.0/12, ::1/128, fc00::/7, fe80::/10"
256 #define LOCAL_NETS_INVERSE "!127.0.0.0/8, !10.0.0.0/8, !100.64.0.0/10, !169.254.0.0/16, !192.168.0.0/16, !172.16.0.0/12, !::1/128, !fc00::/7, !fe80::/10"
257 // Bad Nets taken from both:
258 // http://www.iana.org/assignments/iana-ipv4-special-registry/iana-ipv4-special-registry.xhtml
260 // http://www.iana.org/assignments/iana-ipv6-special-registry/iana-ipv6-special-registry.xhtml
261 // where such a network may not be considered a valid destination
262 #define BAD_NETS "0.0.0.0/8, 192.0.0.0/24, 192.0.2.0/24, 198.51.100.0/24, 203.0.113.0/24, 240.0.0.0/4, ::/96, ::ffff:0:0/96, 100::/64, 2001:db8::/32"
263 #define DONT_QUERY LOCAL_NETS ", " BAD_NETS
265 //! used to send information to a newborn mthread
266 struct DNSComboWriter
{
267 DNSComboWriter(const std::string
& query
, const struct timeval
& now
): d_mdp(true, query
), d_now(now
), d_query(query
)
271 DNSComboWriter(const std::string
& query
, const struct timeval
& now
, std::vector
<std::string
>&& policyTags
, LuaContext::LuaObject
&& data
, std::vector
<DNSRecord
>&& records
): d_mdp(true, query
), d_now(now
), d_query(query
), d_policyTags(std::move(policyTags
)), d_records(std::move(records
)), d_data(std::move(data
))
275 void setRemote(const ComboAddress
& sa
)
280 void setSource(const ComboAddress
& sa
)
285 void setLocal(const ComboAddress
& sa
)
290 void setDestination(const ComboAddress
& sa
)
295 void setSocket(int sock
)
300 string
getRemote() const
302 if (d_source
== d_remote
) {
303 return d_source
.toStringWithPort();
305 return d_source
.toStringWithPort() + " (proxied by " + d_remote
.toStringWithPort() + ")";
309 struct timeval d_now
;
310 /* Remote client, might differ from d_source
311 in case of XPF, in which case d_source holds
312 the IP of the client and d_remote of the proxy
314 ComboAddress d_remote
;
315 ComboAddress d_source
;
316 /* Destination address, might differ from
317 d_destination in case of XPF, in which case
318 d_destination holds the IP of the proxy and
319 d_local holds our own. */
320 ComboAddress d_local
;
321 ComboAddress d_destination
;
323 boost::uuids::uuid d_uuid
;
324 string d_requestorId
;
327 struct timeval d_kernelTimestamp
{0,0};
330 std::vector
<std::string
> d_policyTags
;
331 std::vector
<DNSRecord
> d_records
;
332 LuaContext::LuaObject d_data
;
333 EDNSSubnetOpts d_ednssubnet
;
334 shared_ptr
<TCPConnection
> d_tcpConnection
;
335 boost::optional
<int> d_rcode
{boost::none
};
337 unsigned int d_tag
{0};
339 uint32_t d_ttlCap
{std::numeric_limits
<uint32_t>::max()};
340 uint16_t d_ecsBegin
{0};
341 uint16_t d_ecsEnd
{0};
342 bool d_variable
{false};
343 bool d_ecsFound
{false};
344 bool d_ecsParsed
{false};
345 bool d_followCNAMERecords
{false};
346 bool d_logResponse
{false};
352 return MT
? MT
.get() : nullptr;
357 static ArgvMap theArg
;
361 unsigned int getRecursorThreadId()
371 static bool isDistributorThread()
377 return g_weDistributeQueries
&& s_threadInfos
.at(t_id
).isListener
;
380 static bool isHandlerThread()
386 return s_threadInfos
.at(t_id
).isHandler
;
389 static void handleTCPClientWritable(int fd
, FDMultiplexer::funcparam_t
& var
);
391 // -1 is error, 0 is timeout, 1 is success
392 int asendtcp(const string
& data
, Socket
* sock
)
398 t_fdm
->addWriteFD(sock
->getHandle(), handleTCPClientWritable
, pident
);
401 int ret
=MT
->waitEvent(pident
, &packet
, g_networkTimeoutMsec
);
403 if(!ret
|| ret
==-1) { // timeout
404 t_fdm
->removeWriteFD(sock
->getHandle());
406 else if(packet
.size() !=data
.size()) { // main loop tells us what it sent out, or empty in case of an error
412 static void handleTCPClientReadable(int fd
, FDMultiplexer::funcparam_t
& var
);
414 // -1 is error, 0 is timeout, 1 is success
415 int arecvtcp(string
& data
, size_t len
, Socket
* sock
, bool incompleteOkay
)
421 pident
.inIncompleteOkay
=incompleteOkay
;
422 t_fdm
->addReadFD(sock
->getHandle(), handleTCPClientReadable
, pident
);
424 int ret
=MT
->waitEvent(pident
,&data
, g_networkTimeoutMsec
);
425 if(!ret
|| ret
==-1) { // timeout
426 t_fdm
->removeReadFD(sock
->getHandle());
428 else if(data
.empty()) {// error, EOF or other
435 static void handleGenUDPQueryResponse(int fd
, FDMultiplexer::funcparam_t
& var
)
437 PacketID pident
=*any_cast
<PacketID
>(&var
);
439 ComboAddress fromaddr
;
440 socklen_t addrlen
=sizeof(fromaddr
);
442 ssize_t ret
=recvfrom(fd
, resp
, sizeof(resp
), 0, (sockaddr
*)&fromaddr
, &addrlen
);
443 if (fromaddr
!= pident
.remote
) {
444 g_log
<<Logger::Notice
<<"Response received from the wrong remote host ("<<fromaddr
.toStringWithPort()<<" instead of "<<pident
.remote
.toStringWithPort()<<"), discarding"<<endl
;
448 t_fdm
->removeReadFD(fd
);
450 string
data(resp
, (size_t) ret
);
451 MT
->sendEvent(pident
, &data
);
455 MT
->sendEvent(pident
, &empty
);
456 // cerr<<"Had some kind of error: "<<ret<<", "<<stringerror()<<endl;
459 string
GenUDPQueryResponse(const ComboAddress
& dest
, const string
& query
)
461 Socket
s(dest
.sin4
.sin_family
, SOCK_DGRAM
);
463 ComboAddress local
= getQueryLocalAddress(dest
.sin4
.sin_family
, 0);
473 t_fdm
->addReadFD(s
.getHandle(), handleGenUDPQueryResponse
, pident
);
477 int ret
=MT
->waitEvent(pident
,&data
, g_networkTimeoutMsec
);
479 if(!ret
|| ret
==-1) { // timeout
480 t_fdm
->removeReadFD(s
.getHandle());
482 else if(data
.empty()) {// error, EOF or other
483 // we could special case this
489 //! pick a random query local address
490 ComboAddress
getQueryLocalAddress(int family
, uint16_t port
)
493 if(family
==AF_INET
) {
494 if(g_localQueryAddresses4
.empty())
497 ret
= g_localQueryAddresses4
[dns_random(g_localQueryAddresses4
.size())];
498 ret
.sin4
.sin_port
= htons(port
);
501 if(g_localQueryAddresses6
.empty())
504 ret
= g_localQueryAddresses6
[dns_random(g_localQueryAddresses6
.size())];
506 ret
.sin6
.sin6_port
= htons(port
);
511 static void handleUDPServerResponse(int fd
, FDMultiplexer::funcparam_t
&);
513 static void setSocketBuffer(int fd
, int optname
, uint32_t size
)
516 socklen_t len
=sizeof(psize
);
518 if(!getsockopt(fd
, SOL_SOCKET
, optname
, (char*)&psize
, &len
) && psize
> size
) {
519 g_log
<<Logger::Error
<<"Not decreasing socket buffer size from "<<psize
<<" to "<<size
<<endl
;
523 if (setsockopt(fd
, SOL_SOCKET
, optname
, (char*)&size
, sizeof(size
)) < 0) {
525 g_log
<< Logger::Error
<< "Unable to raise socket buffer size to " << size
<< ": " << stringerror(err
) << endl
;
530 static void setSocketReceiveBuffer(int fd
, uint32_t size
)
532 setSocketBuffer(fd
, SO_RCVBUF
, size
);
535 static void setSocketSendBuffer(int fd
, uint32_t size
)
537 setSocketBuffer(fd
, SO_SNDBUF
, size
);
541 // you can ask this class for a UDP socket to send a query from
542 // this socket is not yours, don't even think about deleting it
543 // but after you call 'returnSocket' on it, don't assume anything anymore
546 unsigned int d_numsocks
;
548 UDPClientSocks() : d_numsocks(0)
552 // returning -2 means: temporary OS error (ie, out of files), -1 means error related to remote
553 int getSocket(const ComboAddress
& toaddr
, int* fd
)
555 *fd
=makeClientSocket(toaddr
.sin4
.sin_family
);
556 if(*fd
< 0) // temporary error - receive exception otherwise
559 if(connect(*fd
, (struct sockaddr
*)(&toaddr
), toaddr
.getSocklen()) < 0) {
564 catch(const PDNSException
& e
) {
565 g_log
<<Logger::Error
<<"Error closing UDP socket after connect() failed: "<<e
.reason
<<endl
;
568 if(err
==ENETUNREACH
) // Seth "My Interfaces Are Like A Yo Yo" Arnold special
577 // return a socket to the pool, or simply erase it
578 void returnSocket(int fd
)
581 t_fdm
->removeReadFD(fd
);
583 catch(const FDMultiplexerException
& e
) {
584 // we sometimes return a socket that has not yet been assigned to t_fdm
590 catch(const PDNSException
& e
) {
591 g_log
<<Logger::Error
<<"Error closing returned UDP socket: "<<e
.reason
<<endl
;
599 // returns -1 for errors which might go away, throws for ones that won't
600 static int makeClientSocket(int family
)
602 int ret
=socket(family
, SOCK_DGRAM
, 0 ); // turns out that setting CLO_EXEC and NONBLOCK from here is not a performance win on Linux (oddly enough)
604 if(ret
< 0 && errno
==EMFILE
) // this is not a catastrophic error
608 throw PDNSException("Making a socket for resolver (family = "+std::to_string(family
)+"): "+stringerror());
610 // setCloseOnExec(ret); // we're not going to exec
617 if(tries
==1) // fall back to kernel 'random'
621 port
= s_minUdpSourcePort
+ dns_random(s_maxUdpSourcePort
- s_minUdpSourcePort
+ 1);
623 while (s_avoidUdpSourcePorts
.count(port
));
626 sin
=getQueryLocalAddress(family
, port
); // does htons for us
628 if (::bind(ret
, (struct sockaddr
*)&sin
, sin
.getSocklen()) >= 0)
634 throw PDNSException("Resolver binding to local query client socket on "+sin
.toString()+": "+stringerror());
638 setReceiveSocketErrors(ret
, family
);
650 static thread_local
std::unique_ptr
<UDPClientSocks
> t_udpclientsocks
;
652 /* these two functions are used by LWRes */
653 // -2 is OS error, -1 is error that depends on the remote, > 0 is success
654 int asendto(const char *data
, size_t len
, int flags
,
655 const ComboAddress
& toaddr
, uint16_t id
, const DNSName
& domain
, uint16_t qtype
, int* fd
)
659 pident
.domain
= domain
;
660 pident
.remote
= toaddr
;
663 // see if there is an existing outstanding request we can chain on to, using partial equivalence function
664 pair
<MT_t::waiters_t::iterator
, MT_t::waiters_t::iterator
> chain
=MT
->d_waiters
.equal_range(pident
, PacketIDBirthdayCompare());
666 for(; chain
.first
!= chain
.second
; chain
.first
++) {
667 if(chain
.first
->key
.fd
> -1) { // don't chain onto existing chained waiter!
669 cerr<<"Orig: "<<pident.domain<<", "<<pident.remote.toString()<<", id="<<id<<endl;
670 cerr<<"Had hit: "<< chain.first->key.domain<<", "<<chain.first->key.remote.toString()<<", id="<<chain.first->key.id
671 <<", count="<<chain.first->key.chain.size()<<", origfd: "<<chain.first->key.fd<<endl;
673 chain
.first
->key
.chain
.insert(id
); // we can chain
674 *fd
=-1; // gets used in waitEvent / sendEvent later on
679 int ret
=t_udpclientsocks
->getSocket(toaddr
, fd
);
686 t_fdm
->addReadFD(*fd
, handleUDPServerResponse
, pident
);
687 ret
= send(*fd
, data
, len
, 0);
692 t_udpclientsocks
->returnSocket(*fd
);
694 errno
= tmp
; // this is for logging purposes only
698 // -1 is error, 0 is timeout, 1 is success
699 int arecvfrom(std::string
& packet
, int flags
, const ComboAddress
& fromaddr
, size_t *d_len
,
700 uint16_t id
, const DNSName
& domain
, uint16_t qtype
, int fd
, struct timeval
* now
)
702 static optional
<unsigned int> nearMissLimit
;
704 nearMissLimit
=::arg().asNum("spoof-nearmiss-max");
709 pident
.domain
=domain
;
711 pident
.remote
=fromaddr
;
713 int ret
=MT
->waitEvent(pident
, &packet
, g_networkTimeoutMsec
, now
);
715 /* -1 means error, 0 means timeout, 1 means a result from handleUDPServerResponse() which might still be an error */
717 /* handleUDPServerResponse() will close the socket for us no matter what */
718 if(packet
.empty()) // means "error"
721 *d_len
=packet
.size();
723 if(*nearMissLimit
&& pident
.nearMisses
> *nearMissLimit
) {
724 g_log
<<Logger::Error
<<"Too many ("<<pident
.nearMisses
<<" > "<<*nearMissLimit
<<") bogus answers for '"<<domain
<<"' from "<<fromaddr
.toString()<<", assuming spoof attempt."<<endl
;
725 g_stats
.spoofCount
++;
730 /* getting there means error or timeout, it's up to us to close the socket */
732 t_udpclientsocks
->returnSocket(fd
);
737 static void writePid(void)
739 if(!::arg().mustDo("write-pid"))
741 ofstream
of(s_pidfname
.c_str(), std::ios_base::app
);
743 of
<< Utility::getpid() <<endl
;
746 g_log
<< Logger::Error
<< "Writing pid for " << Utility::getpid() << " to " << s_pidfname
<< " failed: "
747 << stringerror(err
) << endl
;
751 uint16_t TCPConnection::s_maxInFlight
;
753 TCPConnection::TCPConnection(int fd
, const ComboAddress
& addr
) : data(2, 0), d_remote(addr
), d_fd(fd
)
755 ++s_currentConnections
;
756 (*t_tcpClientCounts
)[d_remote
]++;
759 TCPConnection::~TCPConnection()
762 if(closesocket(d_fd
) < 0)
763 g_log
<<Logger::Error
<<"Error closing socket for TCPConnection"<<endl
;
765 catch(const PDNSException
& e
) {
766 g_log
<<Logger::Error
<<"Error closing TCPConnection socket: "<<e
.reason
<<endl
;
769 if(t_tcpClientCounts
->count(d_remote
) && !(*t_tcpClientCounts
)[d_remote
]--)
770 t_tcpClientCounts
->erase(d_remote
);
771 --s_currentConnections
;
774 AtomicCounter
TCPConnection::s_currentConnections
;
776 static void handleRunningTCPQuestion(int fd
, FDMultiplexer::funcparam_t
& var
);
778 // the idea is, only do things that depend on the *response* here. Incoming accounting is on incoming.
779 static void updateResponseStats(int res
, const ComboAddress
& remote
, unsigned int packetsize
, const DNSName
* query
, uint16_t qtype
)
781 if(packetsize
> 1000 && t_largeanswerremotes
)
782 t_largeanswerremotes
->push_back(remote
);
784 case RCode::ServFail
:
785 if(t_servfailremotes
) {
786 t_servfailremotes
->push_back(remote
);
787 if(query
&& t_servfailqueryring
) // packet cache
788 t_servfailqueryring
->push_back(make_pair(*query
, qtype
));
792 case RCode::NXDomain
:
801 static string
makeLoginfo(const std::unique_ptr
<DNSComboWriter
>& dc
)
804 return "("+dc
->d_mdp
.d_qname
.toLogString()+"/"+DNSRecordContent::NumberToType(dc
->d_mdp
.d_qtype
)+" from "+(dc
->getRemote())+")";
808 return "Exception making error message for exception";
812 static void protobufLogQuery(uint8_t maskV4
, uint8_t maskV6
, const boost::uuids::uuid
& uniqueId
, const ComboAddress
& remote
, const ComboAddress
& local
, const Netmask
& ednssubnet
, bool tcp
, uint16_t id
, size_t len
, const DNSName
& qname
, uint16_t qtype
, uint16_t qclass
, const std::vector
<std::string
>& policyTags
, const std::string
& requestorId
, const std::string
& deviceId
, const std::string
& deviceName
)
814 if (!t_protobufServers
) {
818 Netmask
requestorNM(remote
, remote
.sin4
.sin_family
== AF_INET
? maskV4
: maskV6
);
819 ComboAddress requestor
= requestorNM
.getMaskedNetwork();
820 requestor
.setPort(remote
.getPort());
821 RecProtoBufMessage
message(DNSProtoBufMessage::Query
, uniqueId
, &requestor
, &local
, qname
, qtype
, qclass
, id
, tcp
, len
);
822 message
.setServerIdentity(SyncRes::s_serverID
);
823 message
.setEDNSSubnet(ednssubnet
, ednssubnet
.isIPv4() ? maskV4
: maskV6
);
824 message
.setRequestorId(requestorId
);
825 message
.setDeviceId(deviceId
);
826 message
.setDeviceName(deviceName
);
828 if (!policyTags
.empty()) {
829 message
.setPolicyTags(policyTags
);
832 // cerr <<message.toDebugString()<<endl;
834 message
.serialize(str
);
836 for (auto& server
: *t_protobufServers
) {
837 server
->queueData(str
);
841 static void protobufLogResponse(const RecProtoBufMessage
& message
)
843 if (!t_protobufServers
) {
847 // cerr <<message.toDebugString()<<endl;
849 message
.serialize(str
);
851 for (auto& server
: *t_protobufServers
) {
852 server
->queueData(str
);
858 * Chases the CNAME provided by the PolicyCustom RPZ policy.
860 * @param spoofed: The DNSRecord that was created by the policy, should already be added to ret
861 * @param qtype: The QType of the original query
862 * @param sr: A SyncRes
863 * @param res: An integer that will contain the RCODE of the lookup we do
864 * @param ret: A vector of DNSRecords where the result of the CNAME chase should be appended to
866 static void handleRPZCustom(const DNSRecord
& spoofed
, const QType
& qtype
, SyncRes
& sr
, int& res
, vector
<DNSRecord
>& ret
)
868 if (spoofed
.d_type
== QType::CNAME
) {
869 bool oldWantsRPZ
= sr
.getWantsRPZ();
870 sr
.setWantsRPZ(false);
871 vector
<DNSRecord
> ans
;
872 res
= sr
.beginResolve(DNSName(spoofed
.d_content
->getZoneRepresentation()), qtype
, QClass::IN
, ans
);
873 for (const auto& rec
: ans
) {
874 if(rec
.d_place
== DNSResourceRecord::ANSWER
) {
878 // Reset the RPZ state of the SyncRes
879 sr
.setWantsRPZ(oldWantsRPZ
);
883 static bool addRecordToPacket(DNSPacketWriter
& pw
, const DNSRecord
& rec
, uint32_t& minTTL
, uint32_t ttlCap
, const uint16_t maxAnswerSize
)
885 pw
.startRecord(rec
.d_name
, rec
.d_type
, (rec
.d_ttl
> ttlCap
? ttlCap
: rec
.d_ttl
), rec
.d_class
, rec
.d_place
);
887 if(rec
.d_type
!= QType::OPT
) // their TTL ain't real
888 minTTL
= min(minTTL
, rec
.d_ttl
);
890 rec
.d_content
->toPacket(pw
);
891 if(pw
.size() > static_cast<size_t>(maxAnswerSize
)) {
893 if(rec
.d_place
!= DNSResourceRecord::ADDITIONAL
) {
894 pw
.getHeader()->tc
=1;
904 static std::shared_ptr
<std::vector
<std::unique_ptr
<RemoteLogger
>>> startProtobufServers(const ProtobufExportConfig
& config
)
906 auto result
= std::make_shared
<std::vector
<std::unique_ptr
<RemoteLogger
>>>();
908 for (const auto& server
: config
.servers
) {
910 auto logger
= make_unique
<RemoteLogger
>(server
, config
.timeout
, 100*config
.maxQueuedEntries
, config
.reconnectWaitTime
, config
.asyncConnect
);
911 logger
->setLogQueries(config
.logQueries
);
912 logger
->setLogResponses(config
.logResponses
);
913 result
->emplace_back(std::move(logger
));
915 catch(const std::exception
& e
) {
916 g_log
<<Logger::Error
<<"Error while starting protobuf logger to '"<<server
<<": "<<e
.what()<<endl
;
918 catch(const PDNSException
& e
) {
919 g_log
<<Logger::Error
<<"Error while starting protobuf logger to '"<<server
<<": "<<e
.reason
<<endl
;
926 static bool checkProtobufExport(LocalStateHolder
<LuaConfigItems
>& luaconfsLocal
)
928 if (!luaconfsLocal
->protobufExportConfig
.enabled
) {
929 if (t_protobufServers
) {
930 for (auto& server
: *t_protobufServers
) {
933 t_protobufServers
.reset();
939 /* if the server was not running, or if it was running according to a
940 previous configuration */
941 if (!t_protobufServers
||
942 t_protobufServersGeneration
< luaconfsLocal
->generation
) {
944 if (t_protobufServers
) {
945 for (auto& server
: *t_protobufServers
) {
949 t_protobufServers
.reset();
951 t_protobufServers
= startProtobufServers(luaconfsLocal
->protobufExportConfig
);
952 t_protobufServersGeneration
= luaconfsLocal
->generation
;
958 static bool checkOutgoingProtobufExport(LocalStateHolder
<LuaConfigItems
>& luaconfsLocal
)
960 if (!luaconfsLocal
->outgoingProtobufExportConfig
.enabled
) {
961 if (t_outgoingProtobufServers
) {
962 for (auto& server
: *t_outgoingProtobufServers
) {
966 t_outgoingProtobufServers
.reset();
971 /* if the server was not running, or if it was running according to a
972 previous configuration */
973 if (!t_outgoingProtobufServers
||
974 t_outgoingProtobufServersGeneration
< luaconfsLocal
->generation
) {
976 if (t_outgoingProtobufServers
) {
977 for (auto& server
: *t_outgoingProtobufServers
) {
981 t_outgoingProtobufServers
.reset();
983 t_outgoingProtobufServers
= startProtobufServers(luaconfsLocal
->outgoingProtobufExportConfig
);
984 t_outgoingProtobufServersGeneration
= luaconfsLocal
->generation
;
992 static std::shared_ptr
<std::vector
<std::unique_ptr
<FrameStreamLogger
>>> startFrameStreamServers(const FrameStreamExportConfig
& config
)
994 auto result
= std::make_shared
<std::vector
<std::unique_ptr
<FrameStreamLogger
>>>();
996 for (const auto& server
: config
.servers
) {
998 std::unordered_map
<string
,unsigned> options
;
999 options
["bufferHint"] = config
.bufferHint
;
1000 options
["flushTimeout"] = config
.flushTimeout
;
1001 options
["inputQueueSize"] = config
.inputQueueSize
;
1002 options
["outputQueueSize"] = config
.outputQueueSize
;
1003 options
["queueNotifyThreshold"] = config
.queueNotifyThreshold
;
1004 options
["reopenInterval"] = config
.reopenInterval
;
1005 FrameStreamLogger
*fsl
= nullptr;
1007 ComboAddress
address(server
);
1008 fsl
= new FrameStreamLogger(address
.sin4
.sin_family
, address
.toStringWithPort(), true, options
);
1010 catch (const PDNSException
& e
) {
1011 fsl
= new FrameStreamLogger(AF_UNIX
, server
, true, options
);
1013 fsl
->setLogQueries(config
.logQueries
);
1014 fsl
->setLogResponses(config
.logResponses
);
1015 result
->emplace_back(fsl
);
1017 catch(const std::exception
& e
) {
1018 g_log
<<Logger::Error
<<"Error while starting dnstap framestream logger to '"<<server
<<": "<<e
.what()<<endl
;
1020 catch(const PDNSException
& e
) {
1021 g_log
<<Logger::Error
<<"Error while starting dnstap framestream logger to '"<<server
<<": "<<e
.reason
<<endl
;
1028 static bool checkFrameStreamExport(LocalStateHolder
<LuaConfigItems
>& luaconfsLocal
)
1030 if (!luaconfsLocal
->frameStreamExportConfig
.enabled
) {
1031 if (t_frameStreamServers
) {
1032 // dt's take care of cleanup
1033 t_frameStreamServers
.reset();
1039 /* if the server was not running, or if it was running according to a
1040 previous configuration */
1041 if (!t_frameStreamServers
||
1042 t_frameStreamServersGeneration
< luaconfsLocal
->generation
) {
1044 if (t_frameStreamServers
) {
1045 // dt's take care of cleanup
1046 t_frameStreamServers
.reset();
1049 t_frameStreamServers
= startFrameStreamServers(luaconfsLocal
->frameStreamExportConfig
);
1050 t_frameStreamServersGeneration
= luaconfsLocal
->generation
;
1055 #endif /* HAVE_FSTRM */
1056 #endif /* HAVE_PROTOBUF */
1059 static bool nodCheckNewDomain(const DNSName
& dname
)
1061 static const QType
qt(QType::A
);
1062 static const uint16_t qc(QClass::IN
);
1064 // First check the (sub)domain isn't whitelisted for NOD purposes
1065 if (!g_nodDomainWL
.check(dname
)) {
1066 // Now check the NODDB (note this is probablistic so can have FNs/FPs)
1067 if (t_nodDBp
&& t_nodDBp
->isNewDomain(dname
)) {
1069 // This should probably log to a dedicated log file
1070 g_log
<<Logger::Notice
<<"Newly observed domain nod="<<dname
.toLogString()<<endl
;
1072 if (!(g_nodLookupDomain
.isRoot())) {
1073 // Send a DNS A query to <domain>.g_nodLookupDomain
1074 DNSName qname
= dname
;
1075 vector
<DNSRecord
> dummy
;
1076 qname
+= g_nodLookupDomain
;
1077 directResolve(qname
, qt
, qc
, dummy
);
1085 static bool udrCheckUniqueDNSRecord(const DNSName
& dname
, uint16_t qtype
, const DNSRecord
& record
)
1088 if (record
.d_place
== DNSResourceRecord::ANSWER
||
1089 record
.d_place
== DNSResourceRecord::ADDITIONAL
) {
1090 // Create a string that represent a triplet of (qname, qtype and RR[type, name, content])
1091 std::stringstream ss
;
1092 ss
<< dname
.toDNSStringLC() << ":" << qtype
<< ":" << qtype
<< ":" << record
.d_type
<< ":" << record
.d_name
.toDNSStringLC() << ":" << record
.d_content
->getZoneRepresentation();
1093 if (t_udrDBp
&& t_udrDBp
->isUniqueResponse(ss
.str())) {
1095 // This should also probably log to a dedicated file.
1096 g_log
<<Logger::Notice
<<"Unique response observed: qname="<<dname
.toLogString()<<" qtype="<<QType(qtype
).getName()<< " rrtype=" << QType(record
.d_type
).getName() << " rrname=" << record
.d_name
.toLogString() << " rrcontent=" << record
.d_content
->getZoneRepresentation() << endl
;
1103 #endif /* NOD_ENABLED */
1105 int followCNAMERecords(vector
<DNSRecord
>& ret
, const QType
& qtype
)
1107 vector
<DNSRecord
> resolved
;
1109 for(const DNSRecord
& rr
: ret
) {
1110 if(rr
.d_type
== QType::CNAME
) {
1111 auto rec
= getRR
<CNAMERecordContent
>(rr
);
1113 target
=rec
->getTarget();
1119 if(target
.empty()) {
1123 int rcode
= directResolve(target
, qtype
, QClass::IN
, resolved
);
1125 for(DNSRecord
& rr
: resolved
) {
1126 ret
.push_back(std::move(rr
));
1131 static void startDoResolve(void *p
)
1133 auto dc
=std::unique_ptr
<DNSComboWriter
>(reinterpret_cast<DNSComboWriter
*>(p
));
1136 t_queryring
->push_back(make_pair(dc
->d_mdp
.d_qname
, dc
->d_mdp
.d_qtype
));
1138 uint16_t maxanswersize
= dc
->d_tcp
? 65535 : min(static_cast<uint16_t>(512), g_udpTruncationThreshold
);
1140 std::vector
<pair
<uint16_t, string
> > ednsOpts
;
1141 bool variableAnswer
= dc
->d_variable
;
1142 bool haveEDNS
=false;
1144 bool hasUDR
= false;
1145 #endif /* NOD_ENABLED */
1146 DNSPacketWriter::optvect_t returnedEdnsOptions
; // Here we stuff all the options for the return packet
1147 uint8_t ednsExtRCode
= 0;
1148 if(getEDNSOpts(dc
->d_mdp
, &edo
)) {
1150 if (edo
.d_version
!= 0) {
1151 ednsExtRCode
= ERCode::BADVERS
;
1156 "Values lower than 512 MUST be treated as equal to 512."
1158 maxanswersize
= min(static_cast<uint16_t>(edo
.d_packetsize
>= 512 ? edo
.d_packetsize
: 512), g_udpTruncationThreshold
);
1160 ednsOpts
= edo
.d_options
;
1161 maxanswersize
-= 11; // EDNS header size
1163 for (const auto& o
: edo
.d_options
) {
1164 if (o
.first
== EDNSOptionCode::ECS
&& g_useIncomingECS
&& !dc
->d_ecsParsed
) {
1165 dc
->d_ecsFound
= getEDNSSubnetOptsFromString(o
.second
, &dc
->d_ednssubnet
);
1166 } else if (o
.first
== EDNSOptionCode::NSID
) {
1167 const static string mode_server_id
= ::arg()["server-id"];
1168 if(mode_server_id
!= "disabled" && !mode_server_id
.empty() &&
1169 maxanswersize
> (2 + 2 + mode_server_id
.size())) {
1170 returnedEdnsOptions
.push_back(make_pair(EDNSOptionCode::NSID
, mode_server_id
));
1171 variableAnswer
= true; // Can't packetcache an answer with NSID
1172 // Option Code and Option Length are both 2
1173 maxanswersize
-= 2 + 2 + mode_server_id
.size();
1178 /* perhaps there was no EDNS or no ECS but by now we looked */
1179 dc
->d_ecsParsed
= true;
1180 vector
<DNSRecord
> ret
;
1181 vector
<uint8_t> packet
;
1183 auto luaconfsLocal
= g_luaconfs
.getLocal();
1184 // Used to tell syncres later on if we should apply NSDNAME and NSIP RPZ triggers for this query
1185 bool wantsRPZ(true);
1186 boost::optional
<RecProtoBufMessage
> pbMessage(boost::none
);
1187 #ifdef HAVE_PROTOBUF
1188 if (checkProtobufExport(luaconfsLocal
)) {
1189 Netmask
requestorNM(dc
->d_source
, dc
->d_source
.sin4
.sin_family
== AF_INET
? luaconfsLocal
->protobufMaskV4
: luaconfsLocal
->protobufMaskV6
);
1190 ComboAddress requestor
= requestorNM
.getMaskedNetwork();
1191 requestor
.setPort(dc
->d_source
.getPort());
1192 pbMessage
= RecProtoBufMessage(RecProtoBufMessage::Response
, dc
->d_uuid
, &requestor
, &dc
->d_destination
, dc
->d_mdp
.d_qname
, dc
->d_mdp
.d_qtype
, dc
->d_mdp
.d_qclass
, dc
->d_mdp
.d_header
.id
, dc
->d_tcp
, 0);
1193 pbMessage
->setServerIdentity(SyncRes::s_serverID
);
1194 pbMessage
->setEDNSSubnet(dc
->d_ednssubnet
.source
, dc
->d_ednssubnet
.source
.isIPv4() ? luaconfsLocal
->protobufMaskV4
: luaconfsLocal
->protobufMaskV6
);
1196 #endif /* HAVE_PROTOBUF */
1199 checkFrameStreamExport(luaconfsLocal
);
1202 DNSPacketWriter
pw(packet
, dc
->d_mdp
.d_qname
, dc
->d_mdp
.d_qtype
, dc
->d_mdp
.d_qclass
);
1204 pw
.getHeader()->aa
=0;
1205 pw
.getHeader()->ra
=1;
1206 pw
.getHeader()->qr
=1;
1207 pw
.getHeader()->tc
=0;
1208 pw
.getHeader()->id
=dc
->d_mdp
.d_header
.id
;
1209 pw
.getHeader()->rd
=dc
->d_mdp
.d_header
.rd
;
1210 pw
.getHeader()->cd
=dc
->d_mdp
.d_header
.cd
;
1212 /* This is the lowest TTL seen in the records of the response,
1213 so we can't cache it for longer than this value.
1214 If we have a TTL cap, this value can't be larger than the
1215 cap no matter what. */
1216 uint32_t minTTL
= dc
->d_ttlCap
;
1218 SyncRes
sr(dc
->d_now
);
1219 sr
.setId(MT
->getTid());
1221 bool DNSSECOK
=false;
1223 sr
.setLuaEngine(t_pdl
);
1225 if(g_dnssecmode
!= DNSSECMode::Off
) {
1226 sr
.setDoDNSSEC(true);
1228 // Does the requestor want DNSSEC records?
1229 if(edo
.d_extFlags
& EDNSOpts::DNSSECOK
) {
1231 g_stats
.dnssecQueries
++;
1233 if (dc
->d_mdp
.d_header
.cd
) {
1234 /* Per rfc6840 section 5.9, "When processing a request with
1235 the Checking Disabled (CD) bit set, a resolver SHOULD attempt
1236 to return all response data, even data that has failed DNSSEC
1238 ++g_stats
.dnssecCheckDisabledQueries
;
1240 if (dc
->d_mdp
.d_header
.ad
) {
1241 /* Per rfc6840 section 5.7, "the AD bit in a query as a signal
1242 indicating that the requester understands and is interested in the
1243 value of the AD bit in the response. This allows a requester to
1244 indicate that it understands the AD bit without also requesting
1245 DNSSEC data via the DO bit. */
1246 ++g_stats
.dnssecAuthenticDataQueries
;
1249 // Ignore the client-set CD flag
1250 pw
.getHeader()->cd
=0;
1252 sr
.setDNSSECValidationRequested(g_dnssecmode
== DNSSECMode::ValidateAll
|| g_dnssecmode
==DNSSECMode::ValidateForLog
|| ((dc
->d_mdp
.d_header
.ad
|| DNSSECOK
) && g_dnssecmode
==DNSSECMode::Process
));
1254 #ifdef HAVE_PROTOBUF
1255 sr
.setInitialRequestId(dc
->d_uuid
);
1256 sr
.setOutgoingProtobufServers(t_outgoingProtobufServers
);
1259 sr
.setFrameStreamServers(t_frameStreamServers
);
1261 sr
.setQuerySource(dc
->d_remote
, g_useIncomingECS
&& !dc
->d_ednssubnet
.source
.empty() ? boost::optional
<const EDNSSubnetOpts
&>(dc
->d_ednssubnet
) : boost::none
);
1263 bool tracedQuery
=false; // we could consider letting Lua know about this too
1264 bool shouldNotValidate
= false;
1266 /* preresolve expects res (dq.rcode) to be set to RCode::NoError by default */
1267 int res
= RCode::NoError
;
1269 DNSFilterEngine::Policy appliedPolicy
;
1270 std::vector
<DNSRecord
> spoofed
;
1271 RecursorLua4::DNSQuestion
dq(dc
->d_source
, dc
->d_destination
, dc
->d_mdp
.d_qname
, dc
->d_mdp
.d_qtype
, dc
->d_tcp
, variableAnswer
, wantsRPZ
, dc
->d_logResponse
);
1272 dq
.ednsFlags
= &edo
.d_extFlags
;
1273 dq
.ednsOptions
= &ednsOpts
;
1275 dq
.discardedPolicies
= &sr
.d_discardedPolicies
;
1276 dq
.policyTags
= &dc
->d_policyTags
;
1277 dq
.appliedPolicy
= &appliedPolicy
;
1278 dq
.currentRecords
= &ret
;
1279 dq
.dh
= &dc
->d_mdp
.d_header
;
1280 dq
.data
= dc
->d_data
;
1281 #ifdef HAVE_PROTOBUF
1282 dq
.requestorId
= dc
->d_requestorId
;
1283 dq
.deviceId
= dc
->d_deviceId
;
1284 dq
.deviceName
= dc
->d_deviceName
;
1287 if(ednsExtRCode
!= 0) {
1291 if(dc
->d_mdp
.d_qtype
==QType::ANY
&& !dc
->d_tcp
&& g_anyToTcp
) {
1292 pw
.getHeader()->tc
= 1;
1294 variableAnswer
= true;
1298 if(t_traceRegex
&& t_traceRegex
->match(dc
->d_mdp
.d_qname
.toString())) {
1299 sr
.setLogMode(SyncRes::Store
);
1303 if(!g_quiet
|| tracedQuery
) {
1304 g_log
<<Logger::Warning
<<t_id
<<" ["<<MT
->getTid()<<"/"<<MT
->numProcesses()<<"] " << (dc
->d_tcp
? "TCP " : "") << "question for '"<<dc
->d_mdp
.d_qname
<<"|"
1305 <<DNSRecordContent::NumberToType(dc
->d_mdp
.d_qtype
)<<"' from "<<dc
->getRemote();
1306 if(!dc
->d_ednssubnet
.source
.empty()) {
1307 g_log
<<" (ecs "<<dc
->d_ednssubnet
.source
.toString()<<")";
1312 if(!dc
->d_mdp
.d_header
.rd
) {
1316 if (dc
->d_rcode
!= boost::none
) {
1317 /* we have a response ready to go, most likely from gettag_ffi */
1318 ret
= std::move(dc
->d_records
);
1320 if (res
== RCode::NoError
&& dc
->d_followCNAMERecords
) {
1321 res
= followCNAMERecords(ret
, QType(dc
->d_mdp
.d_qtype
));
1327 t_pdl
->prerpz(dq
, res
);
1330 // Check if the query has a policy attached to it
1331 if (wantsRPZ
&& (appliedPolicy
.d_type
== DNSFilterEngine::PolicyType::None
|| appliedPolicy
.d_kind
== DNSFilterEngine::PolicyKind::NoAction
)) {
1332 luaconfsLocal
->dfe
.getQueryPolicy(dc
->d_mdp
.d_qname
, dc
->d_source
, sr
.d_discardedPolicies
, appliedPolicy
);
1335 // if there is a RecursorLua active, and it 'took' the query in preResolve, we don't launch beginResolve
1336 if(!t_pdl
|| !t_pdl
->preresolve(dq
, res
)) {
1338 sr
.setWantsRPZ(wantsRPZ
);
1340 switch(appliedPolicy
.d_kind
) {
1341 case DNSFilterEngine::PolicyKind::NoAction
:
1343 case DNSFilterEngine::PolicyKind::Drop
:
1344 g_stats
.policyDrops
++;
1345 g_stats
.policyResults
[appliedPolicy
.d_kind
]++;
1347 case DNSFilterEngine::PolicyKind::NXDOMAIN
:
1348 g_stats
.policyResults
[appliedPolicy
.d_kind
]++;
1349 res
=RCode::NXDomain
;
1351 case DNSFilterEngine::PolicyKind::NODATA
:
1352 g_stats
.policyResults
[appliedPolicy
.d_kind
]++;
1355 case DNSFilterEngine::PolicyKind::Custom
:
1356 g_stats
.policyResults
[appliedPolicy
.d_kind
]++;
1358 spoofed
=appliedPolicy
.getCustomRecords(dc
->d_mdp
.d_qname
, dc
->d_mdp
.d_qtype
);
1359 for (const auto& dr
: spoofed
) {
1361 handleRPZCustom(dr
, QType(dc
->d_mdp
.d_qtype
), sr
, res
, ret
);
1364 case DNSFilterEngine::PolicyKind::Truncate
:
1366 g_stats
.policyResults
[appliedPolicy
.d_kind
]++;
1368 pw
.getHeader()->tc
=1;
1375 // Query got not handled for QNAME Policy reasons, now actually go out to find an answer
1377 sr
.d_appliedPolicy
= appliedPolicy
;
1378 res
= sr
.beginResolve(dc
->d_mdp
.d_qname
, QType(dc
->d_mdp
.d_qtype
), dc
->d_mdp
.d_qclass
, ret
);
1379 shouldNotValidate
= sr
.wasOutOfBand();
1381 catch(const ImmediateServFailException
&e
) {
1382 if(g_logCommonErrors
) {
1383 g_log
<<Logger::Notice
<<"Sending SERVFAIL to "<<dc
->getRemote()<<" during resolve of '"<<dc
->d_mdp
.d_qname
<<"' because: "<<e
.reason
<<endl
;
1385 res
= RCode::ServFail
;
1387 catch(const PolicyHitException
& e
) {
1390 dq
.validationState
= sr
.getValidationState();
1391 appliedPolicy
= sr
.d_appliedPolicy
;
1393 // During lookup, an NSDNAME or NSIP trigger was hit in RPZ
1394 if (res
== -2) { // XXX This block should be macro'd, it is repeated post-resolve.
1395 appliedPolicy
= sr
.d_appliedPolicy
;
1396 g_stats
.policyResults
[appliedPolicy
.d_kind
]++;
1397 switch(appliedPolicy
.d_kind
) {
1398 case DNSFilterEngine::PolicyKind::NoAction
: // This can never happen
1399 throw PDNSException("NoAction policy returned while a NSDNAME or NSIP trigger was hit");
1400 case DNSFilterEngine::PolicyKind::Drop
:
1401 g_stats
.policyDrops
++;
1403 case DNSFilterEngine::PolicyKind::NXDOMAIN
:
1405 res
=RCode::NXDomain
;
1408 case DNSFilterEngine::PolicyKind::NODATA
:
1413 case DNSFilterEngine::PolicyKind::Truncate
:
1417 pw
.getHeader()->tc
=1;
1422 case DNSFilterEngine::PolicyKind::Custom
:
1425 spoofed
=appliedPolicy
.getCustomRecords(dc
->d_mdp
.d_qname
, dc
->d_mdp
.d_qtype
);
1426 for (const auto& dr
: spoofed
) {
1428 handleRPZCustom(dr
, QType(dc
->d_mdp
.d_qtype
), sr
, res
, ret
);
1434 if (wantsRPZ
&& (appliedPolicy
.d_type
== DNSFilterEngine::PolicyType::None
|| appliedPolicy
.d_kind
== DNSFilterEngine::PolicyKind::NoAction
)) {
1435 luaconfsLocal
->dfe
.getPostPolicy(ret
, sr
.d_discardedPolicies
, appliedPolicy
);
1439 if(res
== RCode::NoError
) {
1440 auto i
=ret
.cbegin();
1441 for(; i
!= ret
.cend(); ++i
)
1442 if(i
->d_type
== dc
->d_mdp
.d_qtype
&& i
->d_place
== DNSResourceRecord::ANSWER
)
1444 if(i
== ret
.cend() && t_pdl
->nodata(dq
, res
))
1445 shouldNotValidate
= true;
1448 else if(res
== RCode::NXDomain
&& t_pdl
->nxdomain(dq
, res
))
1449 shouldNotValidate
= true;
1451 if(t_pdl
->postresolve(dq
, res
))
1452 shouldNotValidate
= true;
1455 if (wantsRPZ
) { //XXX This block is repeated, see above
1456 g_stats
.policyResults
[appliedPolicy
.d_kind
]++;
1457 switch(appliedPolicy
.d_kind
) {
1458 case DNSFilterEngine::PolicyKind::NoAction
:
1460 case DNSFilterEngine::PolicyKind::Drop
:
1461 g_stats
.policyDrops
++;
1463 case DNSFilterEngine::PolicyKind::NXDOMAIN
:
1465 res
=RCode::NXDomain
;
1468 case DNSFilterEngine::PolicyKind::NODATA
:
1473 case DNSFilterEngine::PolicyKind::Truncate
:
1477 pw
.getHeader()->tc
=1;
1482 case DNSFilterEngine::PolicyKind::Custom
:
1485 spoofed
=appliedPolicy
.getCustomRecords(dc
->d_mdp
.d_qname
, dc
->d_mdp
.d_qtype
);
1486 for (const auto& dr
: spoofed
) {
1488 handleRPZCustom(dr
, QType(dc
->d_mdp
.d_qtype
), sr
, res
, ret
);
1495 if(res
== PolicyDecision::DROP
) {
1496 g_stats
.policyDrops
++;
1499 if(tracedQuery
|| res
== -1 || res
== RCode::ServFail
|| pw
.getHeader()->rcode
== RCode::ServFail
)
1501 string
trace(sr
.getTrace());
1502 if(!trace
.empty()) {
1503 vector
<string
> lines
;
1504 boost::split(lines
, trace
, boost::is_any_of("\n"));
1505 for(const string
& line
: lines
) {
1507 g_log
<<Logger::Warning
<< line
<< endl
;
1513 pw
.getHeader()->rcode
=RCode::ServFail
;
1514 // no commit here, because no record
1515 g_stats
.servFails
++;
1518 pw
.getHeader()->rcode
=res
;
1520 // Does the validation mode or query demand validation?
1521 if(!shouldNotValidate
&& sr
.isDNSSECValidationRequested()) {
1524 g_log
<<Logger::Warning
<<"Starting validation of answer to "<<dc
->d_mdp
.d_qname
<<"|"<<QType(dc
->d_mdp
.d_qtype
).getName()<<" for "<<dc
->getRemote()<<endl
;
1527 auto state
= sr
.getValidationState();
1529 if(state
== Secure
) {
1531 g_log
<<Logger::Warning
<<"Answer to "<<dc
->d_mdp
.d_qname
<<"|"<<QType(dc
->d_mdp
.d_qtype
).getName()<<" for "<<dc
->getRemote()<<" validates correctly"<<endl
;
1534 // Is the query source interested in the value of the ad-bit?
1535 if (dc
->d_mdp
.d_header
.ad
|| DNSSECOK
)
1536 pw
.getHeader()->ad
=1;
1538 else if(state
== Insecure
) {
1540 g_log
<<Logger::Warning
<<"Answer to "<<dc
->d_mdp
.d_qname
<<"|"<<QType(dc
->d_mdp
.d_qtype
).getName()<<" for "<<dc
->getRemote()<<" validates as Insecure"<<endl
;
1543 pw
.getHeader()->ad
=0;
1545 else if(state
== Bogus
) {
1547 t_bogusremotes
->push_back(dc
->d_source
);
1548 if(t_bogusqueryring
)
1549 t_bogusqueryring
->push_back(make_pair(dc
->d_mdp
.d_qname
, dc
->d_mdp
.d_qtype
));
1550 if(g_dnssecLogBogus
|| sr
.doLog() || g_dnssecmode
== DNSSECMode::ValidateForLog
) {
1551 g_log
<<Logger::Warning
<<"Answer to "<<dc
->d_mdp
.d_qname
<<"|"<<QType(dc
->d_mdp
.d_qtype
).getName()<<" for "<<dc
->getRemote()<<" validates as Bogus"<<endl
;
1554 // Does the query or validation mode sending out a SERVFAIL on validation errors?
1555 if(!pw
.getHeader()->cd
&& (g_dnssecmode
== DNSSECMode::ValidateAll
|| dc
->d_mdp
.d_header
.ad
|| DNSSECOK
)) {
1557 g_log
<<Logger::Warning
<<"Sending out SERVFAIL for "<<dc
->d_mdp
.d_qname
<<"|"<<QType(dc
->d_mdp
.d_qtype
).getName()<<" because recursor or query demands it for Bogus results"<<endl
;
1560 pw
.getHeader()->rcode
=RCode::ServFail
;
1564 g_log
<<Logger::Warning
<<"Not sending out SERVFAIL for "<<dc
->d_mdp
.d_qname
<<"|"<<QType(dc
->d_mdp
.d_qtype
).getName()<<" Bogus validation since neither config nor query demands this"<<endl
;
1569 catch(const ImmediateServFailException
&e
) {
1570 if(g_logCommonErrors
)
1571 g_log
<<Logger::Notice
<<"Sending SERVFAIL to "<<dc
->getRemote()<<" during validation of '"<<dc
->d_mdp
.d_qname
<<"|"<<QType(dc
->d_mdp
.d_qtype
).getName()<<"' because: "<<e
.reason
<<endl
;
1572 pw
.getHeader()->rcode
=RCode::ServFail
;
1578 orderAndShuffle(ret
);
1579 if(auto sl
= luaconfsLocal
->sortlist
.getOrderCmp(dc
->d_source
)) {
1580 stable_sort(ret
.begin(), ret
.end(), *sl
);
1581 variableAnswer
=true;
1585 bool needCommit
= false;
1586 for(auto i
=ret
.cbegin(); i
!=ret
.cend(); ++i
) {
1588 ( i
->d_type
== QType::NSEC3
||
1590 ( i
->d_type
== QType::RRSIG
|| i
->d_type
==QType::NSEC
) &&
1592 ( dc
->d_mdp
.d_qtype
!= i
->d_type
&& dc
->d_mdp
.d_qtype
!= QType::ANY
) ||
1593 i
->d_place
!= DNSResourceRecord::ANSWER
1601 if (!addRecordToPacket(pw
, *i
, minTTL
, dc
->d_ttlCap
, maxanswersize
)) {
1610 udr
= udrCheckUniqueDNSRecord(dc
->d_mdp
.d_qname
, dc
->d_mdp
.d_qtype
, *i
);
1614 #endif /* NOD ENABLED */
1616 #ifdef HAVE_PROTOBUF
1617 if (t_protobufServers
) {
1619 pbMessage
->addRR(*i
, luaconfsLocal
->protobufExportConfig
.exportTypes
, udr
);
1621 pbMessage
->addRR(*i
, luaconfsLocal
->protobufExportConfig
.exportTypes
);
1622 #endif /* NOD_ENABLED */
1631 if(g_useIncomingECS
&& dc
->d_ecsFound
&& !sr
.wasVariable() && !variableAnswer
) {
1632 // cerr<<"Stuffing in a 0 scope because answer is static"<<endl;
1634 eo
.source
= dc
->d_ednssubnet
.source
;
1637 sa
.sin4
.sin_family
= eo
.source
.getNetwork().sin4
.sin_family
;
1638 eo
.scope
= Netmask(sa
, 0);
1640 returnedEdnsOptions
.push_back(make_pair(EDNSOptionCode::ECS
, makeEDNSSubnetOptsString(eo
)));
1644 /* we try to add the EDNS OPT RR even for truncated answers,
1646 "The minimal response MUST be the DNS header, question section, and an
1647 OPT record. This MUST also occur when a truncated response (using
1648 the DNS header's TC bit) is returned."
1650 pw
.addOpt(512, ednsExtRCode
, DNSSECOK
? EDNSOpts::DNSSECOK
: 0, returnedEdnsOptions
);
1654 g_rs
.submitResponse(dc
->d_mdp
.d_qtype
, packet
.size(), !dc
->d_tcp
);
1655 updateResponseStats(res
, dc
->d_source
, packet
.size(), &dc
->d_mdp
.d_qname
, dc
->d_mdp
.d_qtype
);
1659 if (nodCheckNewDomain(dc
->d_mdp
.d_qname
))
1662 #endif /* NOD_ENABLED */
1663 #ifdef HAVE_PROTOBUF
1664 if (t_protobufServers
&& !(luaconfsLocal
->protobufExportConfig
.taggedOnly
&& (!appliedPolicy
.d_name
|| appliedPolicy
.d_name
->empty()) && dc
->d_policyTags
.empty())) {
1665 pbMessage
->setBytes(packet
.size());
1666 pbMessage
->setResponseCode(pw
.getHeader()->rcode
);
1667 if (appliedPolicy
.d_name
) {
1668 pbMessage
->setAppliedPolicy(*appliedPolicy
.d_name
);
1669 pbMessage
->setAppliedPolicyType(appliedPolicy
.d_type
);
1671 pbMessage
->setPolicyTags(dc
->d_policyTags
);
1672 if (g_useKernelTimestamp
&& dc
->d_kernelTimestamp
.tv_sec
) {
1673 pbMessage
->setQueryTime(dc
->d_kernelTimestamp
.tv_sec
, dc
->d_kernelTimestamp
.tv_usec
);
1676 pbMessage
->setQueryTime(dc
->d_now
.tv_sec
, dc
->d_now
.tv_usec
);
1678 pbMessage
->setRequestorId(dq
.requestorId
);
1679 pbMessage
->setDeviceId(dq
.deviceId
);
1680 pbMessage
->setDeviceName(dq
.deviceName
);
1684 pbMessage
->setNOD(true);
1685 pbMessage
->addPolicyTag(g_nod_pbtag
);
1688 pbMessage
->addPolicyTag(g_udr_pbtag
);
1691 #endif /* NOD_ENABLED */
1692 if (dc
->d_logResponse
) {
1693 protobufLogResponse(*pbMessage
);
1697 pbMessage
->setNOD(false);
1698 pbMessage
->clearUDR();
1700 pbMessage
->removePolicyTag(g_nod_pbtag
);
1702 pbMessage
->removePolicyTag(g_udr_pbtag
);
1704 #endif /* NOD_ENABLED */
1710 cmsgbuf_aligned cbuf
;
1711 fillMSGHdr(&msgh
, &iov
, &cbuf
, 0, (char*)&*packet
.begin(), packet
.size(), &dc
->d_remote
);
1712 msgh
.msg_control
=NULL
;
1714 if(g_fromtosockets
.count(dc
->d_socket
)) {
1715 addCMsgSrcAddr(&msgh
, &cbuf
, &dc
->d_local
, 0);
1717 if(sendmsg(dc
->d_socket
, &msgh
, 0) < 0 && g_logCommonErrors
) {
1719 g_log
<< Logger::Warning
<< "Sending UDP reply to client " << dc
->getRemote() << " failed with: "
1720 << strerror(err
) << endl
;
1723 if(variableAnswer
|| sr
.wasVariable()) {
1724 g_stats
.variableResponses
++;
1726 if(!SyncRes::s_nopacketcache
&& !variableAnswer
&& !sr
.wasVariable() ) {
1727 t_packetCache
->insertResponsePacket(dc
->d_tag
, dc
->d_qhash
, std::move(dc
->d_query
), dc
->d_mdp
.d_qname
, dc
->d_mdp
.d_qtype
, dc
->d_mdp
.d_qclass
,
1728 string((const char*)&*packet
.begin(), packet
.size()),
1730 pw
.getHeader()->rcode
== RCode::ServFail
? SyncRes::s_packetcacheservfailttl
:
1731 min(minTTL
,SyncRes::s_packetcachettl
),
1735 std::move(pbMessage
));
1737 // else cerr<<"Not putting in packet cache: "<<sr.wasVariable()<<endl;
1741 buf
[0]=packet
.size()/256;
1742 buf
[1]=packet
.size()%256;
1744 Utility::iovec iov
[2];
1746 iov
[0].iov_base
=(void*)buf
; iov
[0].iov_len
=2;
1747 iov
[1].iov_base
=(void*)&*packet
.begin(); iov
[1].iov_len
= packet
.size();
1749 int wret
=Utility::writev(dc
->d_socket
, iov
, 2);
1753 g_log
<<Logger::Error
<<"EOF writing TCP answer to "<<dc
->getRemote()<<endl
;
1754 else if(wret
< 0 ) {
1756 g_log
<< Logger::Error
<< "Error writing TCP answer to " << dc
->getRemote() << ": " << strerror(err
) << endl
;
1757 } else if((unsigned int)wret
!= 2 + packet
.size())
1758 g_log
<<Logger::Error
<<"Oops, partial answer sent to "<<dc
->getRemote()<<" for "<<dc
->d_mdp
.d_qname
<<" (size="<< (2 + packet
.size()) <<", sent "<<wret
<<")"<<endl
;
1762 // update tcp connection status, closing if needed and doing the fd multiplexer accounting
1763 if (dc
->d_tcpConnection
->d_requestsInFlight
> 0) {
1764 dc
->d_tcpConnection
->d_requestsInFlight
--;
1767 // In the code below, we try to remove the fd from the set, but
1768 // we don't know if another mthread already did the remove, so we can get a
1769 // "Tried to remove unlisted fd" exception. Not that an inflight < limit test
1770 // will not work since we do not know if the other mthread got an error or not.
1773 t_fdm
->removeReadFD(dc
->d_socket
);
1775 catch (FDMultiplexerException
&) {
1780 dc
->d_tcpConnection
->queriesCount
++;
1781 if (g_tcpMaxQueriesPerConn
&& dc
->d_tcpConnection
->queriesCount
>= g_tcpMaxQueriesPerConn
) {
1783 t_fdm
->removeReadFD(dc
->d_socket
);
1785 catch (FDMultiplexerException
&) {
1790 Utility::gettimeofday(&g_now
, 0); // needs to be updated
1791 struct timeval ttd
= g_now
;
1792 // If we cross from max to max-1 in flight requests, the fd was not listened to, add it back
1793 if (dc
->d_tcpConnection
->d_requestsInFlight
== TCPConnection::s_maxInFlight
- 1) {
1794 // A read error might have happened. If we add the fd back, it will most likely error again.
1795 // This is not a big issue, the next handleTCPClientReadable() will see another read error
1797 ttd
.tv_sec
+= g_tcpTimeout
;
1798 t_fdm
->addReadFD(dc
->d_socket
, handleRunningTCPQuestion
, dc
->d_tcpConnection
, &ttd
);
1800 // fd might have been removed by read error code, so expect an exception
1802 t_fdm
->setReadTTD(dc
->d_socket
, ttd
, g_tcpTimeout
);
1804 catch (FDMultiplexerException
&) {
1810 float spent
=makeFloat(sr
.getNow()-dc
->d_now
);
1812 g_log
<<Logger::Error
<<t_id
<<" ["<<MT
->getTid()<<"/"<<MT
->numProcesses()<<"] answer to "<<(dc
->d_mdp
.d_header
.rd
?"":"non-rd ")<<"question '"<<dc
->d_mdp
.d_qname
<<"|"<<DNSRecordContent::NumberToType(dc
->d_mdp
.d_qtype
);
1813 g_log
<<"': "<<ntohs(pw
.getHeader()->ancount
)<<" answers, "<<ntohs(pw
.getHeader()->arcount
)<<" additional, took "<<sr
.d_outqueries
<<" packets, "<<
1814 sr
.d_totUsec
/1000.0<<" netw ms, "<< spent
*1000.0<<" tot ms, "<<
1815 sr
.d_throttledqueries
<<" throttled, "<<sr
.d_timeouts
<<" timeouts, "<<sr
.d_tcpoutqueries
<<" tcp connections, rcode="<< res
;
1817 if(!shouldNotValidate
&& sr
.isDNSSECValidationRequested()) {
1818 g_log
<< ", dnssec="<<vStates
[sr
.getValidationState()];
1825 if (sr
.d_outqueries
|| sr
.d_authzonequeries
) {
1826 s_RC
->cacheMisses
++;
1833 g_stats
.answers0_1
++;
1834 else if(spent
< 0.010)
1835 g_stats
.answers1_10
++;
1836 else if(spent
< 0.1)
1837 g_stats
.answers10_100
++;
1838 else if(spent
< 1.0)
1839 g_stats
.answers100_1000
++;
1841 g_stats
.answersSlow
++;
1843 uint64_t newLat
=(uint64_t)(spent
*1000000);
1844 newLat
= min(newLat
,(uint64_t)(((uint64_t) g_networkTimeoutMsec
)*1000)); // outliers of several minutes exist..
1845 g_stats
.avgLatencyUsec
=(1-1.0/g_latencyStatSize
)*g_stats
.avgLatencyUsec
+ (float)newLat
/g_latencyStatSize
;
1846 // no worries, we do this for packet cache hits elsewhere
1848 auto ourtime
= 1000.0*spent
-sr
.d_totUsec
/1000.0; // in msec
1850 g_stats
.ourtime0_1
++;
1851 else if(ourtime
< 2)
1852 g_stats
.ourtime1_2
++;
1853 else if(ourtime
< 4)
1854 g_stats
.ourtime2_4
++;
1855 else if(ourtime
< 8)
1856 g_stats
.ourtime4_8
++;
1857 else if(ourtime
< 16)
1858 g_stats
.ourtime8_16
++;
1859 else if(ourtime
< 32)
1860 g_stats
.ourtime16_32
++;
1862 // cerr<<"SLOW: "<<ourtime<<"ms -> "<<dc->d_mdp.d_qname<<"|"<<DNSRecordContent::NumberToType(dc->d_mdp.d_qtype)<<endl;
1863 g_stats
.ourtimeSlow
++;
1865 if(ourtime
>= 0.0) {
1866 newLat
=ourtime
*1000; // usec
1867 g_stats
.avgLatencyOursUsec
=(1-1.0/g_latencyStatSize
)*g_stats
.avgLatencyOursUsec
+ (float)newLat
/g_latencyStatSize
;
1869 // cout<<dc->d_mdp.d_qname<<"\t"<<MT->getUsec()<<"\t"<<sr.d_outqueries<<endl;
1871 catch(PDNSException
&ae
) {
1872 g_log
<<Logger::Error
<<"startDoResolve problem "<<makeLoginfo(dc
)<<": "<<ae
.reason
<<endl
;
1874 catch(const MOADNSException
&mde
) {
1875 g_log
<<Logger::Error
<<"DNS parser error "<<makeLoginfo(dc
) <<": "<<dc
->d_mdp
.d_qname
<<", "<<mde
.what()<<endl
;
1877 catch(std::exception
& e
) {
1878 g_log
<<Logger::Error
<<"STL error "<< makeLoginfo(dc
)<<": "<<e
.what();
1880 // Luawrapper nests the exception from Lua, so we unnest it here
1882 std::rethrow_if_nested(e
);
1883 } catch(const std::exception
& ne
) {
1884 g_log
<<". Extra info: "<<ne
.what();
1890 g_log
<<Logger::Error
<<"Any other exception in a resolver context "<< makeLoginfo(dc
) <<endl
;
1893 g_stats
.maxMThreadStackUsage
= max(MT
->getMaxStackUsage(), g_stats
.maxMThreadStackUsage
);
1896 static void makeControlChannelSocket(int processNum
=-1)
1898 string sockname
=::arg()["socket-dir"]+"/"+s_programname
;
1900 sockname
+= "."+std::to_string(processNum
);
1901 sockname
+=".controlsocket";
1902 s_rcc
.listen(sockname
);
1907 if (!::arg().isEmpty("socket-group"))
1908 sockgroup
=::arg().asGid("socket-group");
1909 if (!::arg().isEmpty("socket-owner"))
1910 sockowner
=::arg().asUid("socket-owner");
1912 if (sockgroup
> -1 || sockowner
> -1) {
1913 if(chown(sockname
.c_str(), sockowner
, sockgroup
) < 0) {
1914 unixDie("Failed to chown control socket");
1918 // do mode change if socket-mode is given
1919 if(!::arg().isEmpty("socket-mode")) {
1920 mode_t sockmode
=::arg().asMode("socket-mode");
1921 if(chmod(sockname
.c_str(), sockmode
) < 0) {
1922 unixDie("Failed to chmod control socket");
1927 static void getQNameAndSubnet(const std::string
& question
, DNSName
* dnsname
, uint16_t* qtype
, uint16_t* qclass
,
1928 bool& foundECS
, EDNSSubnetOpts
* ednssubnet
, EDNSOptionViewMap
* options
,
1929 bool& foundXPF
, ComboAddress
* xpfSource
, ComboAddress
* xpfDest
)
1931 const bool lookForXPF
= xpfSource
!= nullptr && g_xpfRRCode
!= 0;
1932 const bool lookForECS
= ednssubnet
!= nullptr;
1933 const struct dnsheader
* dh
= reinterpret_cast<const struct dnsheader
*>(question
.c_str());
1934 size_t questionLen
= question
.length();
1935 unsigned int consumed
=0;
1936 *dnsname
=DNSName(question
.c_str(), questionLen
, sizeof(dnsheader
), false, qtype
, qclass
, &consumed
);
1938 size_t pos
= sizeof(dnsheader
)+consumed
+4;
1939 const size_t headerSize
= /* root */ 1 + sizeof(dnsrecordheader
);
1940 const uint16_t arcount
= ntohs(dh
->arcount
);
1942 for (uint16_t arpos
= 0; arpos
< arcount
&& questionLen
> (pos
+ headerSize
) && ((lookForECS
&& !foundECS
) || (lookForXPF
&& !foundXPF
)); arpos
++) {
1943 if (question
.at(pos
) != 0) {
1944 /* not an OPT or a XPF, bye. */
1949 const dnsrecordheader
* drh
= reinterpret_cast<const dnsrecordheader
*>(&question
.at(pos
));
1950 pos
+= sizeof(dnsrecordheader
);
1952 if (pos
>= questionLen
) {
1956 /* OPT root label (1) followed by type (2) */
1957 if(lookForECS
&& ntohs(drh
->d_type
) == QType::OPT
) {
1959 char* ecsStart
= nullptr;
1961 /* we need to pass the record len */
1962 int res
= getEDNSOption(const_cast<char*>(reinterpret_cast<const char*>(&question
.at(pos
- sizeof(drh
->d_clen
)))), questionLen
- pos
+ sizeof(drh
->d_clen
), EDNSOptionCode::ECS
, &ecsStart
, &ecsLen
);
1963 if (res
== 0 && ecsLen
> 4) {
1965 if(getEDNSSubnetOptsFromString(ecsStart
+ 4, ecsLen
- 4, &eso
)) {
1972 /* we need to pass the record len */
1973 int res
= getEDNSOptions(reinterpret_cast<const char*>(&question
.at(pos
-sizeof(drh
->d_clen
))), questionLen
- pos
+ (sizeof(drh
->d_clen
)), *options
);
1975 const auto& it
= options
->find(EDNSOptionCode::ECS
);
1976 if (it
!= options
->end() && !it
->second
.values
.empty() && it
->second
.values
.at(0).content
!= nullptr && it
->second
.values
.at(0).size
> 0) {
1978 if(getEDNSSubnetOptsFromString(it
->second
.values
.at(0).content
, it
->second
.values
.at(0).size
, &eso
)) {
1986 else if (lookForXPF
&& ntohs(drh
->d_type
) == g_xpfRRCode
&& ntohs(drh
->d_class
) == QClass::IN
&& drh
->d_ttl
== 0) {
1987 if ((questionLen
- pos
) < ntohs(drh
->d_clen
)) {
1991 foundXPF
= parseXPFPayload(reinterpret_cast<const char*>(&question
.at(pos
)), ntohs(drh
->d_clen
), *xpfSource
, xpfDest
);
1994 pos
+= ntohs(drh
->d_clen
);
1998 static void handleRunningTCPQuestion(int fd
, FDMultiplexer::funcparam_t
& var
)
2000 shared_ptr
<TCPConnection
> conn
=any_cast
<shared_ptr
<TCPConnection
> >(var
);
2002 if(conn
->state
==TCPConnection::BYTE0
) {
2003 ssize_t bytes
=recv(conn
->getFD(), &conn
->data
[0], 2, 0);
2005 conn
->state
=TCPConnection::BYTE1
;
2007 conn
->qlen
=(((unsigned char)conn
->data
[0]) << 8)+ (unsigned char)conn
->data
[1];
2008 conn
->data
.resize(conn
->qlen
);
2010 conn
->state
=TCPConnection::GETQUESTION
;
2012 if(!bytes
|| bytes
< 0) {
2013 t_fdm
->removeReadFD(fd
);
2017 else if(conn
->state
==TCPConnection::BYTE1
) {
2018 ssize_t bytes
=recv(conn
->getFD(), &conn
->data
[1], 1, 0);
2020 conn
->state
=TCPConnection::GETQUESTION
;
2021 conn
->qlen
=(((unsigned char)conn
->data
[0]) << 8)+ (unsigned char)conn
->data
[1];
2022 conn
->data
.resize(conn
->qlen
);
2025 if(!bytes
|| bytes
< 0) {
2026 if(g_logCommonErrors
)
2027 g_log
<<Logger::Error
<<"TCP client "<< conn
->d_remote
.toStringWithPort() <<" disconnected after first byte"<<endl
;
2028 t_fdm
->removeReadFD(fd
);
2032 else if(conn
->state
==TCPConnection::GETQUESTION
) {
2033 ssize_t bytes
=recv(conn
->getFD(), &conn
->data
[conn
->bytesread
], conn
->qlen
- conn
->bytesread
, 0);
2034 if(!bytes
|| bytes
< 0 || bytes
> std::numeric_limits
<std::uint16_t>::max()) {
2035 if(g_logCommonErrors
) {
2036 g_log
<<Logger::Error
<<"TCP client "<< conn
->d_remote
.toStringWithPort() <<" disconnected while reading question body"<<endl
;
2038 t_fdm
->removeReadFD(fd
);
2041 conn
->bytesread
+=(uint16_t)bytes
;
2042 if(conn
->bytesread
==conn
->qlen
) {
2043 conn
->state
= TCPConnection::BYTE0
;
2044 std::unique_ptr
<DNSComboWriter
> dc
;
2046 dc
=std::unique_ptr
<DNSComboWriter
>(new DNSComboWriter(conn
->data
, g_now
));
2048 catch(const MOADNSException
&mde
) {
2049 g_stats
.clientParseError
++;
2050 if(g_logCommonErrors
)
2051 g_log
<<Logger::Error
<<"Unable to parse packet from TCP client "<< conn
->d_remote
.toStringWithPort() <<endl
;
2054 dc
->d_tcpConnection
= conn
; // carry the torch
2055 dc
->setSocket(conn
->getFD()); // this is the only time a copy is made of the actual fd
2057 dc
->setRemote(conn
->d_remote
);
2058 dc
->setSource(conn
->d_remote
);
2061 dest
.sin4
.sin_family
= conn
->d_remote
.sin4
.sin_family
;
2062 socklen_t len
= dest
.getSocklen();
2063 getsockname(conn
->getFD(), (sockaddr
*)&dest
, &len
); // if this fails, we're ok with it
2065 dc
->setDestination(dest
);
2069 bool needECS
= false;
2070 bool needXPF
= g_XPFAcl
.match(conn
->d_remote
);
2074 bool logQuery
= false;
2075 #ifdef HAVE_PROTOBUF
2076 auto luaconfsLocal
= g_luaconfs
.getLocal();
2077 if (checkProtobufExport(luaconfsLocal
)) {
2080 logQuery
= t_protobufServers
&& luaconfsLocal
->protobufExportConfig
.logQueries
;
2081 dc
->d_logResponse
= t_protobufServers
&& luaconfsLocal
->protobufExportConfig
.logResponses
;
2082 #endif /* HAVE_PROTOBUF */
2085 checkFrameStreamExport(luaconfsLocal
);
2088 if(needECS
|| needXPF
|| (t_pdl
&& (t_pdl
->d_gettag_ffi
|| t_pdl
->d_gettag
))) {
2091 EDNSOptionViewMap ednsOptions
;
2092 bool xpfFound
= false;
2093 dc
->d_ecsParsed
= true;
2094 dc
->d_ecsFound
= false;
2095 getQNameAndSubnet(conn
->data
, &qname
, &qtype
, &qclass
,
2096 dc
->d_ecsFound
, &dc
->d_ednssubnet
, g_gettagNeedsEDNSOptions
? &ednsOptions
: nullptr,
2097 xpfFound
, needXPF
? &dc
->d_source
: nullptr, needXPF
? &dc
->d_destination
: nullptr);
2101 if (t_pdl
->d_gettag_ffi
) {
2102 dc
->d_tag
= t_pdl
->gettag_ffi(dc
->d_source
, dc
->d_ednssubnet
.source
, dc
->d_destination
, qname
, qtype
, &dc
->d_policyTags
, dc
->d_records
, dc
->d_data
, ednsOptions
, true, requestorId
, deviceId
, deviceName
, dc
->d_rcode
, dc
->d_ttlCap
, dc
->d_variable
, logQuery
, dc
->d_logResponse
, dc
->d_followCNAMERecords
);
2104 else if (t_pdl
->d_gettag
) {
2105 dc
->d_tag
= t_pdl
->gettag(dc
->d_source
, dc
->d_ednssubnet
.source
, dc
->d_destination
, qname
, qtype
, &dc
->d_policyTags
, dc
->d_data
, ednsOptions
, true, requestorId
, deviceId
, deviceName
);
2108 catch(const std::exception
& e
) {
2109 if(g_logCommonErrors
)
2110 g_log
<<Logger::Warning
<<"Error parsing a query packet qname='"<<qname
<<"' for tag determination, setting tag=0: "<<e
.what()<<endl
;
2114 catch(const std::exception
& e
)
2116 if(g_logCommonErrors
)
2117 g_log
<<Logger::Warning
<<"Error parsing a query packet for tag determination, setting tag=0: "<<e
.what()<<endl
;
2121 const struct dnsheader
* dh
= reinterpret_cast<const struct dnsheader
*>(&conn
->data
[0]);
2123 #ifdef HAVE_PROTOBUF
2124 if(t_protobufServers
|| t_outgoingProtobufServers
) {
2125 dc
->d_requestorId
= requestorId
;
2126 dc
->d_deviceId
= deviceId
;
2127 dc
->d_deviceName
= deviceName
;
2128 dc
->d_uuid
= getUniqueID();
2131 if(t_protobufServers
) {
2134 if (logQuery
&& !(luaconfsLocal
->protobufExportConfig
.taggedOnly
&& dc
->d_policyTags
.empty())) {
2135 protobufLogQuery(luaconfsLocal
->protobufMaskV4
, luaconfsLocal
->protobufMaskV6
, dc
->d_uuid
, dc
->d_source
, dc
->d_destination
, dc
->d_ednssubnet
.source
, true, dh
->id
, conn
->qlen
, qname
, qtype
, qclass
, dc
->d_policyTags
, dc
->d_requestorId
, dc
->d_deviceId
, dc
->d_deviceName
);
2138 catch(std::exception
& e
) {
2139 if(g_logCommonErrors
)
2140 g_log
<<Logger::Warning
<<"Error parsing a TCP query packet for edns subnet: "<<e
.what()<<endl
;
2145 if(t_pdl
->ipfilter(dc
->d_source
, dc
->d_destination
, *dh
)) {
2147 g_log
<<Logger::Notice
<<t_id
<<" ["<<MT
->getTid()<<"/"<<MT
->numProcesses()<<"] DROPPED TCP question from "<<dc
->d_source
.toStringWithPort()<<(dc
->d_source
!= dc
->d_remote
? " (via "+dc
->d_remote
.toStringWithPort()+")" : "")<<" based on policy"<<endl
;
2148 g_stats
.policyDrops
++;
2153 if(dc
->d_mdp
.d_header
.qr
) {
2154 g_stats
.ignoredCount
++;
2155 if(g_logCommonErrors
) {
2156 g_log
<<Logger::Error
<<"Ignoring answer from TCP client "<< dc
->getRemote() <<" on server socket!"<<endl
;
2160 if(dc
->d_mdp
.d_header
.opcode
) {
2161 g_stats
.ignoredCount
++;
2162 if(g_logCommonErrors
) {
2163 g_log
<<Logger::Error
<<"Ignoring non-query opcode from TCP client "<< dc
->getRemote() <<" on server socket!"<<endl
;
2167 else if (dh
->qdcount
== 0) {
2168 g_stats
.emptyQueriesCount
++;
2169 if(g_logCommonErrors
) {
2170 g_log
<<Logger::Error
<<"Ignoring empty (qdcount == 0) query from "<< dc
->getRemote() <<" on server socket!"<<endl
;
2176 ++g_stats
.tcpqcounter
;
2177 ++conn
->d_requestsInFlight
;
2178 if (conn
->d_requestsInFlight
>= TCPConnection::s_maxInFlight
) {
2179 t_fdm
->removeReadFD(fd
); // should no longer awake ourselves when there is data to read
2181 Utility::gettimeofday(&g_now
, 0); // needed?
2182 struct timeval ttd
= g_now
;
2183 t_fdm
->setReadTTD(fd
, ttd
, g_tcpTimeout
);
2185 MT
->makeThread(startDoResolve
, dc
.release()); // deletes dc
2192 //! Handle new incoming TCP connection
2193 static void handleNewTCPQuestion(int fd
, FDMultiplexer::funcparam_t
& )
2196 socklen_t addrlen
=sizeof(addr
);
2197 int newsock
=accept(fd
, (struct sockaddr
*)&addr
, &addrlen
);
2199 if(MT
->numProcesses() > g_maxMThreads
) {
2200 g_stats
.overCapacityDrops
++;
2202 closesocket(newsock
);
2204 catch(const PDNSException
& e
) {
2205 g_log
<<Logger::Error
<<"Error closing TCP socket after an over capacity drop: "<<e
.reason
<<endl
;
2211 t_remotes
->push_back(addr
);
2212 if(t_allowFrom
&& !t_allowFrom
->match(&addr
)) {
2214 g_log
<<Logger::Error
<<"["<<MT
->getTid()<<"] dropping TCP query from "<<addr
.toString()<<", address not matched by allow-from"<<endl
;
2216 g_stats
.unauthorizedTCP
++;
2218 closesocket(newsock
);
2220 catch(const PDNSException
& e
) {
2221 g_log
<<Logger::Error
<<"Error closing TCP socket after an ACL drop: "<<e
.reason
<<endl
;
2225 if(g_maxTCPPerClient
&& t_tcpClientCounts
->count(addr
) && (*t_tcpClientCounts
)[addr
] >= g_maxTCPPerClient
) {
2226 g_stats
.tcpClientOverflow
++;
2228 closesocket(newsock
); // don't call TCPConnection::closeAndCleanup here - did not enter it in the counts yet!
2230 catch(const PDNSException
& e
) {
2231 g_log
<<Logger::Error
<<"Error closing TCP socket after an overflow drop: "<<e
.reason
<<endl
;
2236 setNonBlocking(newsock
);
2237 std::shared_ptr
<TCPConnection
> tc
= std::make_shared
<TCPConnection
>(newsock
, addr
);
2238 tc
->state
=TCPConnection::BYTE0
;
2241 Utility::gettimeofday(&ttd
, 0);
2242 ttd
.tv_sec
+= g_tcpTimeout
;
2244 t_fdm
->addReadFD(tc
->getFD(), handleRunningTCPQuestion
, tc
, &ttd
);
2248 static string
* doProcessUDPQuestion(const std::string
& question
, const ComboAddress
& fromaddr
, const ComboAddress
& destaddr
, struct timeval tv
, int fd
)
2250 gettimeofday(&g_now
, 0);
2252 struct timeval diff
= g_now
- tv
;
2253 double delta
=(diff
.tv_sec
*1000 + diff
.tv_usec
/1000.0);
2255 if(delta
> 1000.0) {
2256 g_stats
.tooOldDrops
++;
2262 if(fromaddr
.sin4
.sin_family
==AF_INET6
)
2263 g_stats
.ipv6qcounter
++;
2266 const struct dnsheader
* dh
= (struct dnsheader
*)question
.c_str();
2267 unsigned int ctag
=0;
2269 bool needECS
= false;
2270 bool needXPF
= g_XPFAcl
.match(fromaddr
);
2271 std::vector
<std::string
> policyTags
;
2272 LuaContext::LuaObject data
;
2273 ComboAddress source
= fromaddr
;
2274 ComboAddress destination
= destaddr
;
2278 bool logQuery
= false;
2279 bool logResponse
= false;
2280 #ifdef HAVE_PROTOBUF
2281 boost::uuids::uuid uniqueId
;
2282 auto luaconfsLocal
= g_luaconfs
.getLocal();
2283 if (checkProtobufExport(luaconfsLocal
)) {
2284 uniqueId
= getUniqueID();
2286 } else if (checkOutgoingProtobufExport(luaconfsLocal
)) {
2287 uniqueId
= getUniqueID();
2289 logQuery
= t_protobufServers
&& luaconfsLocal
->protobufExportConfig
.logQueries
;
2290 logResponse
= t_protobufServers
&& luaconfsLocal
->protobufExportConfig
.logResponses
;
2293 checkFrameStreamExport(luaconfsLocal
);
2295 EDNSSubnetOpts ednssubnet
;
2296 bool ecsFound
= false;
2297 bool ecsParsed
= false;
2298 uint16_t ecsBegin
= 0;
2299 uint16_t ecsEnd
= 0;
2300 std::vector
<DNSRecord
> records
;
2301 boost::optional
<int> rcode
= boost::none
;
2302 uint32_t ttlCap
= std::numeric_limits
<uint32_t>::max();
2303 bool variable
= false;
2304 bool followCNAMEs
= false;
2310 bool qnameParsed
=false;
2313 static uint64_t last=0;
2315 g_mtracer->clearAllocators();
2316 cout<<g_mtracer->getAllocs()-last<<" "<<g_mtracer->getNumOut()<<" -- BEGIN TRACE"<<endl;
2317 last=g_mtracer->getAllocs();
2318 cout<<g_mtracer->topAllocatorsString()<<endl;
2319 g_mtracer->clearAllocators();
2323 if(needECS
|| needXPF
|| (t_pdl
&& (t_pdl
->d_gettag
|| t_pdl
->d_gettag_ffi
))) {
2325 EDNSOptionViewMap ednsOptions
;
2326 bool xpfFound
= false;
2330 getQNameAndSubnet(question
, &qname
, &qtype
, &qclass
,
2331 ecsFound
, &ednssubnet
, g_gettagNeedsEDNSOptions
? &ednsOptions
: nullptr,
2332 xpfFound
, needXPF
? &source
: nullptr, needXPF
? &destination
: nullptr);
2339 if (t_pdl
->d_gettag_ffi
) {
2340 ctag
= t_pdl
->gettag_ffi(source
, ednssubnet
.source
, destination
, qname
, qtype
, &policyTags
, records
, data
, ednsOptions
, false, requestorId
, deviceId
, deviceName
, rcode
, ttlCap
, variable
, logQuery
, logResponse
, followCNAMEs
);
2342 else if (t_pdl
->d_gettag
) {
2343 ctag
= t_pdl
->gettag(source
, ednssubnet
.source
, destination
, qname
, qtype
, &policyTags
, data
, ednsOptions
, false, requestorId
, deviceId
, deviceName
);
2346 catch(const std::exception
& e
) {
2347 if(g_logCommonErrors
)
2348 g_log
<<Logger::Warning
<<"Error parsing a query packet qname='"<<qname
<<"' for tag determination, setting tag=0: "<<e
.what()<<endl
;
2352 catch(const std::exception
& e
)
2354 if(g_logCommonErrors
)
2355 g_log
<<Logger::Warning
<<"Error parsing a query packet for tag determination, setting tag=0: "<<e
.what()<<endl
;
2359 bool cacheHit
= false;
2360 boost::optional
<RecProtoBufMessage
> pbMessage(boost::none
);
2361 #ifdef HAVE_PROTOBUF
2362 if (t_protobufServers
) {
2363 pbMessage
= RecProtoBufMessage(DNSProtoBufMessage::DNSProtoBufMessageType::Response
);
2364 pbMessage
->setServerIdentity(SyncRes::s_serverID
);
2365 if (logQuery
&& !(luaconfsLocal
->protobufExportConfig
.taggedOnly
&& policyTags
.empty())) {
2366 protobufLogQuery(luaconfsLocal
->protobufMaskV4
, luaconfsLocal
->protobufMaskV6
, uniqueId
, source
, destination
, ednssubnet
.source
, false, dh
->id
, question
.size(), qname
, qtype
, qclass
, policyTags
, requestorId
, deviceId
, deviceName
);
2369 #endif /* HAVE_PROTOBUF */
2371 /* It might seem like a good idea to skip the packet cache lookup if we know that the answer is not cacheable,
2372 but it means that the hash would not be computed. If some script decides at a later time to mark back the answer
2373 as cacheable we would cache it with a wrong tag, so better safe than sorry. */
2376 cacheHit
= (!SyncRes::s_nopacketcache
&& t_packetCache
->getResponsePacket(ctag
, question
, qname
, qtype
, qclass
, g_now
.tv_sec
, &response
, &age
, &valState
, &qhash
, &ecsBegin
, &ecsEnd
, pbMessage
? &(*pbMessage
) : nullptr));
2379 cacheHit
= (!SyncRes::s_nopacketcache
&& t_packetCache
->getResponsePacket(ctag
, question
, qname
, &qtype
, &qclass
, g_now
.tv_sec
, &response
, &age
, &valState
, &qhash
, &ecsBegin
, &ecsEnd
, pbMessage
? &(*pbMessage
) : nullptr));
2383 if(valState
== Bogus
) {
2385 t_bogusremotes
->push_back(source
);
2386 if(t_bogusqueryring
)
2387 t_bogusqueryring
->push_back(make_pair(qname
, qtype
));
2390 #ifdef HAVE_PROTOBUF
2391 if(t_protobufServers
&& logResponse
&& !(luaconfsLocal
->protobufExportConfig
.taggedOnly
&& pbMessage
->getAppliedPolicy().empty() && pbMessage
->getPolicyTags().empty())) {
2392 Netmask
requestorNM(source
, source
.sin4
.sin_family
== AF_INET
? luaconfsLocal
->protobufMaskV4
: luaconfsLocal
->protobufMaskV6
);
2393 ComboAddress requestor
= requestorNM
.getMaskedNetwork();
2394 requestor
.setPort(source
.getPort());
2395 pbMessage
->update(uniqueId
, &requestor
, &destination
, false, dh
->id
);
2396 pbMessage
->setEDNSSubnet(ednssubnet
.source
, ednssubnet
.source
.isIPv4() ? luaconfsLocal
->protobufMaskV4
: luaconfsLocal
->protobufMaskV6
);
2397 if (g_useKernelTimestamp
&& tv
.tv_sec
) {
2398 pbMessage
->setQueryTime(tv
.tv_sec
, tv
.tv_usec
);
2401 pbMessage
->setQueryTime(g_now
.tv_sec
, g_now
.tv_usec
);
2403 pbMessage
->setRequestorId(requestorId
);
2404 pbMessage
->setDeviceId(deviceId
);
2405 pbMessage
->setDeviceName(deviceName
);
2406 protobufLogResponse(*pbMessage
);
2408 #endif /* HAVE_PROTOBUF */
2410 g_log
<<Logger::Notice
<<t_id
<< " question answered from packet cache tag="<<ctag
<<" from "<<source
.toStringWithPort()<<(source
!= fromaddr
? " (via "+fromaddr
.toStringWithPort()+")" : "")<<endl
;
2412 g_stats
.packetCacheHits
++;
2413 SyncRes::s_queries
++;
2414 ageDNSPacket(response
, age
);
2417 cmsgbuf_aligned cbuf
;
2418 fillMSGHdr(&msgh
, &iov
, &cbuf
, 0, (char*)response
.c_str(), response
.length(), const_cast<ComboAddress
*>(&fromaddr
));
2419 msgh
.msg_control
=NULL
;
2421 if(g_fromtosockets
.count(fd
)) {
2422 addCMsgSrcAddr(&msgh
, &cbuf
, &destaddr
, 0);
2424 if(sendmsg(fd
, &msgh
, 0) < 0 && g_logCommonErrors
) {
2426 g_log
<< Logger::Warning
<< "Sending UDP reply to client " << source
.toStringWithPort()
2427 << (source
!= fromaddr
? " (via " + fromaddr
.toStringWithPort() + ")" : "") << " failed with: "
2428 << strerror(err
) << endl
;
2430 if(response
.length() >= sizeof(struct dnsheader
)) {
2431 struct dnsheader tmpdh
;
2432 memcpy(&tmpdh
, response
.c_str(), sizeof(tmpdh
));
2433 updateResponseStats(tmpdh
.rcode
, source
, response
.length(), 0, 0);
2435 g_stats
.avgLatencyUsec
=(1-1.0/g_latencyStatSize
)*g_stats
.avgLatencyUsec
+ 0.0; // we assume 0 usec
2436 g_stats
.avgLatencyOursUsec
=(1-1.0/g_latencyStatSize
)*g_stats
.avgLatencyOursUsec
+ 0.0; // we assume 0 usec
2440 catch(std::exception
& e
) {
2441 if(g_logCommonErrors
)
2442 g_log
<<Logger::Error
<<"Error processing or aging answer packet: "<<e
.what()<<endl
;
2447 if(t_pdl
->ipfilter(source
, destination
, *dh
)) {
2449 g_log
<<Logger::Notice
<<t_id
<<" ["<<MT
->getTid()<<"/"<<MT
->numProcesses()<<"] DROPPED question from "<<source
.toStringWithPort()<<(source
!= fromaddr
? " (via "+fromaddr
.toStringWithPort()+")" : "")<<" based on policy"<<endl
;
2450 g_stats
.policyDrops
++;
2455 if(MT
->numProcesses() > g_maxMThreads
) {
2457 g_log
<<Logger::Notice
<<t_id
<<" ["<<MT
->getTid()<<"/"<<MT
->numProcesses()<<"] DROPPED question from "<<source
.toStringWithPort()<<(source
!= fromaddr
? " (via "+fromaddr
.toStringWithPort()+")" : "")<<", over capacity"<<endl
;
2459 g_stats
.overCapacityDrops
++;
2463 auto dc
= std::unique_ptr
<DNSComboWriter
>(new DNSComboWriter(question
, g_now
, std::move(policyTags
), std::move(data
), std::move(records
)));
2467 dc
->setRemote(fromaddr
);
2468 dc
->setSource(source
);
2469 dc
->setLocal(destaddr
);
2470 dc
->setDestination(destination
);
2472 dc
->d_ecsFound
= ecsFound
;
2473 dc
->d_ecsParsed
= ecsParsed
;
2474 dc
->d_ecsBegin
= ecsBegin
;
2475 dc
->d_ecsEnd
= ecsEnd
;
2476 dc
->d_ednssubnet
= ednssubnet
;
2477 dc
->d_ttlCap
= ttlCap
;
2478 dc
->d_variable
= variable
;
2479 dc
->d_followCNAMERecords
= followCNAMEs
;
2480 dc
->d_rcode
= rcode
;
2481 dc
->d_logResponse
= logResponse
;
2482 #ifdef HAVE_PROTOBUF
2483 if (t_protobufServers
|| t_outgoingProtobufServers
) {
2484 dc
->d_uuid
= std::move(uniqueId
);
2486 dc
->d_requestorId
= requestorId
;
2487 dc
->d_deviceId
= deviceId
;
2488 dc
->d_deviceName
= deviceName
;
2489 dc
->d_kernelTimestamp
= tv
;
2492 MT
->makeThread(startDoResolve
, (void*) dc
.release()); // deletes dc
2497 static void handleNewUDPQuestion(int fd
, FDMultiplexer::funcparam_t
& var
)
2500 static const size_t maxIncomingQuerySize
= 512;
2501 static thread_local
std::string data
;
2502 ComboAddress fromaddr
;
2505 cmsgbuf_aligned cbuf
;
2506 bool firstQuery
= true;
2508 for(size_t queriesCounter
= 0; queriesCounter
< s_maxUDPQueriesPerRound
; queriesCounter
++) {
2509 data
.resize(maxIncomingQuerySize
);
2510 fromaddr
.sin6
.sin6_family
=AF_INET6
; // this makes sure fromaddr is big enough
2511 fillMSGHdr(&msgh
, &iov
, &cbuf
, sizeof(cbuf
), &data
[0], data
.size(), &fromaddr
);
2513 if((len
=recvmsg(fd
, &msgh
, 0)) >= 0) {
2517 if (static_cast<size_t>(len
) < sizeof(dnsheader
)) {
2518 g_stats
.ignoredCount
++;
2520 g_log
<<Logger::Error
<<"Ignoring too-short ("<<std::to_string(len
)<<") query from "<<fromaddr
.toString()<<endl
;
2525 if (msgh
.msg_flags
& MSG_TRUNC
) {
2526 g_stats
.truncatedDrops
++;
2528 g_log
<<Logger::Error
<<"Ignoring truncated query from "<<fromaddr
.toString()<<endl
;
2534 t_remotes
->push_back(fromaddr
);
2537 if(t_allowFrom
&& !t_allowFrom
->match(&fromaddr
)) {
2539 g_log
<<Logger::Error
<<"["<<MT
->getTid()<<"] dropping UDP query from "<<fromaddr
.toString()<<", address not matched by allow-from"<<endl
;
2542 g_stats
.unauthorizedUDP
++;
2545 BOOST_STATIC_ASSERT(offsetof(sockaddr_in
, sin_port
) == offsetof(sockaddr_in6
, sin6_port
));
2546 if(!fromaddr
.sin4
.sin_port
) { // also works for IPv6
2548 g_log
<<Logger::Error
<<"["<<MT
->getTid()<<"] dropping UDP query from "<<fromaddr
.toStringWithPort()<<", can't deal with port 0"<<endl
;
2551 g_stats
.clientParseError
++; // not quite the best place to put it, but needs to go somewhere
2556 data
.resize(static_cast<size_t>(len
));
2557 dnsheader
* dh
=(dnsheader
*)&data
[0];
2560 g_stats
.ignoredCount
++;
2561 if(g_logCommonErrors
) {
2562 g_log
<<Logger::Error
<<"Ignoring answer from "<<fromaddr
.toString()<<" on server socket!"<<endl
;
2565 else if(dh
->opcode
) {
2566 g_stats
.ignoredCount
++;
2567 if(g_logCommonErrors
) {
2568 g_log
<<Logger::Error
<<"Ignoring non-query opcode "<<dh
->opcode
<<" from "<<fromaddr
.toString()<<" on server socket!"<<endl
;
2571 else if (dh
->qdcount
== 0) {
2572 g_stats
.emptyQueriesCount
++;
2573 if(g_logCommonErrors
) {
2574 g_log
<<Logger::Error
<<"Ignoring empty (qdcount == 0) query from "<<fromaddr
.toString()<<" on server socket!"<<endl
;
2578 struct timeval tv
={0,0};
2579 HarvestTimestamp(&msgh
, &tv
);
2581 dest
.reset(); // this makes sure we ignore this address if not returned by recvmsg above
2582 auto loc
= rplookup(g_listenSocketsAddresses
, fd
);
2583 if(HarvestDestinationAddress(&msgh
, &dest
)) {
2584 // but.. need to get port too
2586 dest
.sin4
.sin_port
= loc
->sin4
.sin_port
;
2594 dest
.sin4
.sin_family
= fromaddr
.sin4
.sin_family
;
2595 socklen_t slen
= dest
.getSocklen();
2596 getsockname(fd
, (sockaddr
*)&dest
, &slen
); // if this fails, we're ok with it
2600 if(g_weDistributeQueries
) {
2601 distributeAsyncFunction(data
, boost::bind(doProcessUDPQuestion
, data
, fromaddr
, dest
, tv
, fd
));
2604 ++s_threadInfos
[t_id
].numberOfDistributedQueries
;
2605 doProcessUDPQuestion(data
, fromaddr
, dest
, tv
, fd
);
2609 catch(const MOADNSException
&mde
) {
2610 g_stats
.clientParseError
++;
2611 if(g_logCommonErrors
) {
2612 g_log
<<Logger::Error
<<"Unable to parse packet from remote UDP client "<<fromaddr
.toString() <<": "<<mde
.what()<<endl
;
2615 catch(const std::runtime_error
& e
) {
2616 g_stats
.clientParseError
++;
2617 if(g_logCommonErrors
) {
2618 g_log
<<Logger::Error
<<"Unable to parse packet from remote UDP client "<<fromaddr
.toString() <<": "<<e
.what()<<endl
;
2623 // cerr<<t_id<<" had error: "<<stringerror()<<endl;
2624 if(firstQuery
&& errno
== EAGAIN
) {
2625 g_stats
.noPacketError
++;
2633 static void makeTCPServerSockets(deferredAdd_t
& deferredAdds
, std::set
<int>& tcpSockets
)
2636 vector
<string
>locals
;
2637 stringtok(locals
,::arg()["local-address"]," ,");
2640 throw PDNSException("No local address specified");
2642 for(vector
<string
>::const_iterator i
=locals
.begin();i
!=locals
.end();++i
) {
2644 st
.port
=::arg().asNum("local-port");
2645 parseService(*i
, st
);
2650 sin
.sin4
.sin_family
= AF_INET
;
2651 if(!IpToU32(st
.host
, (uint32_t*)&sin
.sin4
.sin_addr
.s_addr
)) {
2652 sin
.sin6
.sin6_family
= AF_INET6
;
2653 if(makeIPv6sockaddr(st
.host
, &sin
.sin6
) < 0)
2654 throw PDNSException("Unable to resolve local address for TCP server on '"+ st
.host
+"'");
2657 fd
=socket(sin
.sin6
.sin6_family
, SOCK_STREAM
, 0);
2659 throw PDNSException("Making a TCP server socket for resolver: "+stringerror());
2664 if(setsockopt(fd
, SOL_SOCKET
, SO_REUSEADDR
, &tmp
, sizeof tmp
)<0) {
2665 g_log
<<Logger::Error
<<"Setsockopt failed for TCP listening socket"<<endl
;
2668 if(sin
.sin6
.sin6_family
== AF_INET6
&& setsockopt(fd
, IPPROTO_IPV6
, IPV6_V6ONLY
, &tmp
, sizeof(tmp
)) < 0) {
2670 g_log
<<Logger::Error
<<"Failed to set IPv6 socket to IPv6 only, continuing anyhow: "<<strerror(err
)<<endl
;
2673 #ifdef TCP_DEFER_ACCEPT
2674 if(setsockopt(fd
, IPPROTO_TCP
, TCP_DEFER_ACCEPT
, &tmp
, sizeof tmp
) >= 0) {
2675 if(i
==locals
.begin())
2676 g_log
<<Logger::Info
<<"Enabled TCP data-ready filter for (slight) DoS protection"<<endl
;
2680 if( ::arg().mustDo("non-local-bind") )
2681 Utility::setBindAny(AF_INET
, fd
);
2685 if(setsockopt(fd
, SOL_SOCKET
, SO_REUSEPORT
, &tmp
, sizeof(tmp
)) < 0)
2686 throw PDNSException("SO_REUSEPORT: "+stringerror());
2690 if (::arg().asNum("tcp-fast-open") > 0) {
2692 int fastOpenQueueSize
= ::arg().asNum("tcp-fast-open");
2693 if (setsockopt(fd
, IPPROTO_TCP
, TCP_FASTOPEN
, &fastOpenQueueSize
, sizeof fastOpenQueueSize
) < 0) {
2695 g_log
<<Logger::Error
<<"Failed to enable TCP Fast Open for listening socket: "<<strerror(err
)<<endl
;
2698 g_log
<<Logger::Warning
<<"TCP Fast Open configured but not supported for listening socket"<<endl
;
2702 sin
.sin4
.sin_port
= htons(st
.port
);
2703 socklen_t socklen
=sin
.sin4
.sin_family
==AF_INET
? sizeof(sin
.sin4
) : sizeof(sin
.sin6
);
2704 if (::bind(fd
, (struct sockaddr
*)&sin
, socklen
)<0)
2705 throw PDNSException("Binding TCP server socket for "+ st
.host
+": "+stringerror());
2708 setSocketSendBuffer(fd
, 65000);
2710 deferredAdds
.push_back(make_pair(fd
, handleNewTCPQuestion
));
2711 tcpSockets
.insert(fd
);
2713 // we don't need to update g_listenSocketsAddresses since it doesn't work for TCP/IP:
2714 // - fd is not that which we know here, but returned from accept()
2715 if(sin
.sin4
.sin_family
== AF_INET
)
2716 g_log
<<Logger::Info
<<"Listening for TCP queries on "<< sin
.toString() <<":"<<st
.port
<<endl
;
2718 g_log
<<Logger::Info
<<"Listening for TCP queries on ["<< sin
.toString() <<"]:"<<st
.port
<<endl
;
2722 static void makeUDPServerSockets(deferredAdd_t
& deferredAdds
)
2725 vector
<string
>locals
;
2726 stringtok(locals
,::arg()["local-address"]," ,");
2729 throw PDNSException("No local address specified");
2731 for(vector
<string
>::const_iterator i
=locals
.begin();i
!=locals
.end();++i
) {
2733 st
.port
=::arg().asNum("local-port");
2734 parseService(*i
, st
);
2739 sin
.sin4
.sin_family
= AF_INET
;
2740 if(!IpToU32(st
.host
.c_str() , (uint32_t*)&sin
.sin4
.sin_addr
.s_addr
)) {
2741 sin
.sin6
.sin6_family
= AF_INET6
;
2742 if(makeIPv6sockaddr(st
.host
, &sin
.sin6
) < 0)
2743 throw PDNSException("Unable to resolve local address for UDP server on '"+ st
.host
+"'");
2746 int fd
=socket(sin
.sin4
.sin_family
, SOCK_DGRAM
, 0);
2748 throw PDNSException("Making a UDP server socket for resolver: "+stringerror());
2750 if (!setSocketTimestamps(fd
))
2751 g_log
<<Logger::Warning
<<"Unable to enable timestamp reporting for socket"<<endl
;
2753 if(IsAnyAddress(sin
)) {
2754 if(sin
.sin4
.sin_family
== AF_INET
)
2755 if(!setsockopt(fd
, IPPROTO_IP
, GEN_IP_PKTINFO
, &one
, sizeof(one
))) // linux supports this, so why not - might fail on other systems
2756 g_fromtosockets
.insert(fd
);
2757 #ifdef IPV6_RECVPKTINFO
2758 if(sin
.sin4
.sin_family
== AF_INET6
)
2759 if(!setsockopt(fd
, IPPROTO_IPV6
, IPV6_RECVPKTINFO
, &one
, sizeof(one
)))
2760 g_fromtosockets
.insert(fd
);
2762 if(sin
.sin6
.sin6_family
== AF_INET6
&& setsockopt(fd
, IPPROTO_IPV6
, IPV6_V6ONLY
, &one
, sizeof(one
)) < 0) {
2764 g_log
<<Logger::Error
<<"Failed to set IPv6 socket to IPv6 only, continuing anyhow: "<<strerror(err
)<<endl
;
2767 if( ::arg().mustDo("non-local-bind") )
2768 Utility::setBindAny(AF_INET6
, fd
);
2772 setSocketReceiveBuffer(fd
, 250000);
2773 sin
.sin4
.sin_port
= htons(st
.port
);
2778 if(setsockopt(fd
, SOL_SOCKET
, SO_REUSEPORT
, &one
, sizeof(one
)) < 0)
2779 throw PDNSException("SO_REUSEPORT: "+stringerror());
2785 setSocketIgnorePMTU(fd
);
2787 catch(const std::exception
& e
) {
2788 g_log
<<Logger::Warning
<<"Failed to set IP_MTU_DISCOVER on UDP server socket: "<<e
.what()<<endl
;
2792 socklen_t socklen
=sin
.getSocklen();
2793 if (::bind(fd
, (struct sockaddr
*)&sin
, socklen
)<0)
2794 throw PDNSException("Resolver binding to server socket on port "+ std::to_string(st
.port
) +" for "+ st
.host
+": "+stringerror());
2798 deferredAdds
.push_back(make_pair(fd
, handleNewUDPQuestion
));
2799 g_listenSocketsAddresses
[fd
]=sin
; // this is written to only from the startup thread, not from the workers
2800 if(sin
.sin4
.sin_family
== AF_INET
)
2801 g_log
<<Logger::Info
<<"Listening for UDP queries on "<< sin
.toString() <<":"<<st
.port
<<endl
;
2803 g_log
<<Logger::Info
<<"Listening for UDP queries on ["<< sin
.toString() <<"]:"<<st
.port
<<endl
;
2807 static void daemonize(void)
2814 int i
=open("/dev/null",O_RDWR
); /* open stdin */
2816 g_log
<<Logger::Critical
<<"Unable to open /dev/null: "<<stringerror()<<endl
;
2818 dup2(i
,0); /* stdin */
2819 dup2(i
,1); /* stderr */
2820 dup2(i
,2); /* stderr */
2825 static void termIntHandler(int)
2830 static void usr1Handler(int)
2835 static void usr2Handler(int)
2838 SyncRes::setDefaultLogMode(g_quiet
? SyncRes::LogNone
: SyncRes::Log
);
2839 ::arg().set("quiet")=g_quiet
? "" : "no";
2842 static void doStats(void)
2844 static time_t lastOutputTime
;
2845 static uint64_t lastQueryCount
;
2847 uint64_t cacheHits
= s_RC
->cacheHits
;
2848 uint64_t cacheMisses
= s_RC
->cacheMisses
;
2849 uint64_t cacheSize
= s_RC
->size();
2851 if(g_stats
.qcounter
&& (cacheHits
+ cacheMisses
) && SyncRes::s_queries
&& SyncRes::s_outqueries
) {
2852 g_log
<<Logger::Notice
<<"stats: "<<g_stats
.qcounter
<<" questions, "<<
2853 cacheSize
<< " cache entries, "<<
2854 broadcastAccFunction
<uint64_t>(pleaseGetNegCacheSize
)<<" negative entries, "<<
2855 (int)((cacheHits
*100.0)/(cacheHits
+cacheMisses
))<<"% cache hits"<<endl
;
2857 g_log
<<Logger::Notice
<<"stats: throttle map: "
2858 << broadcastAccFunction
<uint64_t>(pleaseGetThrottleSize
) <<", ns speeds: "
2859 << broadcastAccFunction
<uint64_t>(pleaseGetNsSpeedsSize
)<<", failed ns: "
2860 << broadcastAccFunction
<uint64_t>(pleaseGetFailedServersSize
)<<", ednsmap: "
2861 <<broadcastAccFunction
<uint64_t>(pleaseGetEDNSStatusesSize
)<<endl
;
2862 g_log
<<Logger::Notice
<<"stats: outpacket/query ratio "<<(int)(SyncRes::s_outqueries
*100.0/SyncRes::s_queries
)<<"%";
2863 g_log
<<Logger::Notice
<<", "<<(int)(SyncRes::s_throttledqueries
*100.0/(SyncRes::s_outqueries
+SyncRes::s_throttledqueries
))<<"% throttled, "
2864 <<SyncRes::s_nodelegated
<<" no-delegation drops"<<endl
;
2865 g_log
<<Logger::Notice
<<"stats: "<<SyncRes::s_tcpoutqueries
<<" outgoing tcp connections, "<<
2866 broadcastAccFunction
<uint64_t>(pleaseGetConcurrentQueries
)<<" queries running, "<<SyncRes::s_outgoingtimeouts
<<" outgoing timeouts"<<endl
;
2868 //g_log<<Logger::Notice<<"stats: "<<g_stats.ednsPingMatches<<" ping matches, "<<g_stats.ednsPingMismatches<<" mismatches, "<<
2869 //g_stats.noPingOutQueries<<" outqueries w/o ping, "<< g_stats.noEdnsOutQueries<<" w/o EDNS"<<endl;
2871 g_log
<<Logger::Notice
<<"stats: " << broadcastAccFunction
<uint64_t>(pleaseGetPacketCacheSize
) <<
2872 " packet cache entries, "<<(int)(100.0*broadcastAccFunction
<uint64_t>(pleaseGetPacketCacheHits
)/SyncRes::s_queries
) << "% packet cache hits"<<endl
;
2875 for (const auto& threadInfo
: s_threadInfos
) {
2876 if(threadInfo
.isWorker
) {
2877 g_log
<<Logger::Notice
<<"stats: thread "<<idx
<<" has been distributed "<<threadInfo
.numberOfDistributedQueries
<<" queries"<<endl
;
2882 time_t now
= time(0);
2883 if(lastOutputTime
&& lastQueryCount
&& now
!= lastOutputTime
) {
2884 g_log
<<Logger::Notice
<<"stats: "<< (SyncRes::s_queries
- lastQueryCount
) / (now
- lastOutputTime
) <<" qps (average over "<< (now
- lastOutputTime
) << " seconds)"<<endl
;
2886 lastOutputTime
= now
;
2887 lastQueryCount
= SyncRes::s_queries
;
2889 else if(statsWanted
)
2890 g_log
<<Logger::Notice
<<"stats: no stats yet!"<<endl
;
2895 static void houseKeeping(void *)
2897 static thread_local
time_t last_rootupdate
, last_secpoll
, last_trustAnchorUpdate
{0}, last_RC_prune
;
2898 static thread_local
struct timeval last_prune
;
2900 static thread_local
int cleanCounter
=0;
2901 static thread_local
bool s_running
; // houseKeeping can get suspended in secpoll, and be restarted, which makes us do duplicate work
2902 auto luaconfsLocal
= g_luaconfs
.getLocal();
2904 if (last_trustAnchorUpdate
== 0 && !luaconfsLocal
->trustAnchorFileInfo
.fname
.empty() && luaconfsLocal
->trustAnchorFileInfo
.interval
!= 0) {
2905 // Loading the Lua config file already "refreshed" the TAs
2906 last_trustAnchorUpdate
= g_now
.tv_sec
+ luaconfsLocal
->trustAnchorFileInfo
.interval
* 3600;
2915 struct timeval now
, past
;
2916 Utility::gettimeofday(&now
, nullptr);
2919 if (last_prune
< past
) {
2920 t_packetCache
->doPruneTo(g_maxPacketCacheEntries
/ g_numWorkerThreads
);
2921 SyncRes::pruneNegCache(g_maxCacheEntries
/ (g_numWorkerThreads
* 10));
2924 if(!((cleanCounter
++)%40)) { // this is a full scan!
2925 limit
=now
.tv_sec
-300;
2926 SyncRes::pruneNSSpeeds(limit
);
2928 limit
= now
.tv_sec
- SyncRes::s_serverdownthrottletime
* 10;
2929 SyncRes::pruneFailedServers(limit
);
2930 limit
= now
.tv_sec
- 2*3600;
2931 SyncRes::pruneEDNSStatuses(limit
);
2932 SyncRes::pruneThrottledServers();
2933 Utility::gettimeofday(&last_prune
, nullptr);
2936 if(isHandlerThread()) {
2937 if (now
.tv_sec
- last_RC_prune
> 5) {
2938 s_RC
->doPrune(g_maxCacheEntries
);
2939 last_RC_prune
= now
.tv_sec
;
2942 if(now
.tv_sec
- last_rootupdate
> 7200) {
2943 int res
= SyncRes::getRootNS(g_now
, nullptr);
2945 last_rootupdate
=now
.tv_sec
;
2946 primeRootNSZones(g_dnssecmode
!= DNSSECMode::Off
);
2950 if(now
.tv_sec
- last_secpoll
>= 3600) {
2952 doSecPoll(&last_secpoll
);
2954 catch(const std::exception
& e
)
2956 g_log
<<Logger::Error
<<"Exception while performing security poll: "<<e
.what()<<endl
;
2958 catch(const PDNSException
& e
)
2960 g_log
<<Logger::Error
<<"Exception while performing security poll: "<<e
.reason
<<endl
;
2962 catch(const ImmediateServFailException
&e
)
2964 g_log
<<Logger::Error
<<"Exception while performing security poll: "<<e
.reason
<<endl
;
2966 catch(const PolicyHitException
& e
) {
2967 g_log
<<Logger::Error
<<"Policy hit while performing security poll"<<endl
;
2971 g_log
<<Logger::Error
<<"Exception while performing security poll"<<endl
;
2975 if (!luaconfsLocal
->trustAnchorFileInfo
.fname
.empty() && luaconfsLocal
->trustAnchorFileInfo
.interval
!= 0 &&
2976 g_now
.tv_sec
- last_trustAnchorUpdate
>= (luaconfsLocal
->trustAnchorFileInfo
.interval
* 3600)) {
2977 g_log
<<Logger::Debug
<<"Refreshing Trust Anchors from file"<<endl
;
2979 map
<DNSName
, dsmap_t
> dsAnchors
;
2980 if (updateTrustAnchorsFromFile(luaconfsLocal
->trustAnchorFileInfo
.fname
, dsAnchors
)) {
2981 g_luaconfs
.modify([&dsAnchors
](LuaConfigItems
& lci
) {
2982 lci
.dsAnchors
= dsAnchors
;
2985 last_trustAnchorUpdate
= now
.tv_sec
;
2986 } catch (const PDNSException
&pe
) {
2987 g_log
<<Logger::Error
<<"Unable to update Trust Anchors: "<<pe
.reason
<<endl
;
2993 catch(PDNSException
& ae
)
2996 g_log
<<Logger::Error
<<"Fatal error in housekeeping thread: "<<ae
.reason
<<endl
;
3001 static void makeThreadPipes()
3003 auto pipeBufferSize
= ::arg().asNum("distribution-pipe-buffer-size");
3004 if (pipeBufferSize
> 0) {
3005 g_log
<<Logger::Info
<<"Resizing the buffer of the distribution pipe to "<<pipeBufferSize
<<endl
;
3008 /* thread 0 is the handler / SNMP, we start at 1 */
3009 for(unsigned int n
= 1; n
<= (g_numWorkerThreads
+ g_numDistributorThreads
); ++n
) {
3010 auto& threadInfos
= s_threadInfos
.at(n
);
3014 unixDie("Creating pipe for inter-thread communications");
3016 threadInfos
.pipes
.readToThread
= fd
[0];
3017 threadInfos
.pipes
.writeToThread
= fd
[1];
3020 unixDie("Creating pipe for inter-thread communications");
3022 threadInfos
.pipes
.readFromThread
= fd
[0];
3023 threadInfos
.pipes
.writeFromThread
= fd
[1];
3026 unixDie("Creating pipe for inter-thread communications");
3028 threadInfos
.pipes
.readQueriesToThread
= fd
[0];
3029 threadInfos
.pipes
.writeQueriesToThread
= fd
[1];
3031 if (pipeBufferSize
> 0) {
3032 if (!setPipeBufferSize(threadInfos
.pipes
.writeQueriesToThread
, pipeBufferSize
)) {
3034 g_log
<<Logger::Warning
<<"Error resizing the buffer of the distribution pipe for thread "<<n
<<" to "<<pipeBufferSize
<<": "<<strerror(err
)<<endl
;
3035 auto existingSize
= getPipeBufferSize(threadInfos
.pipes
.writeQueriesToThread
);
3036 if (existingSize
> 0) {
3037 g_log
<<Logger::Warning
<<"The current size of the distribution pipe's buffer for thread "<<n
<<" is "<<existingSize
<<endl
;
3042 if (!setNonBlocking(threadInfos
.pipes
.writeQueriesToThread
)) {
3043 unixDie("Making pipe for inter-thread communications non-blocking");
3054 void broadcastFunction(const pipefunc_t
& func
)
3056 /* This function might be called by the worker with t_id 0 during startup
3057 for the initialization of ACLs and domain maps. After that it should only
3058 be called by the handler. */
3060 if (s_threadInfos
.empty() && isHandlerThread()) {
3061 /* the handler and distributors will call themselves below, but
3062 during startup we get called while s_threadInfos has not been
3063 populated yet to update the ACL or domain maps, so we need to
3070 for (const auto& threadInfo
: s_threadInfos
) {
3072 func(); // don't write to ourselves!
3076 ThreadMSG
* tmsg
= new ThreadMSG();
3078 tmsg
->wantAnswer
= true;
3079 if(write(threadInfo
.pipes
.writeToThread
, &tmsg
, sizeof(tmsg
)) != sizeof(tmsg
)) {
3082 unixDie("write to thread pipe returned wrong size or error");
3085 string
* resp
= nullptr;
3086 if(read(threadInfo
.pipes
.readFromThread
, &resp
, sizeof(resp
)) != sizeof(resp
))
3087 unixDie("read from thread pipe returned wrong size or error");
3096 static bool trySendingQueryToWorker(unsigned int target
, ThreadMSG
* tmsg
)
3098 auto& targetInfo
= s_threadInfos
[target
];
3099 if(!targetInfo
.isWorker
) {
3100 g_log
<<Logger::Error
<<"distributeAsyncFunction() tried to assign a query to a non-worker thread"<<endl
;
3104 const auto& tps
= targetInfo
.pipes
;
3106 ssize_t written
= write(tps
.writeQueriesToThread
, &tmsg
, sizeof(tmsg
));
3108 if (static_cast<size_t>(written
) != sizeof(tmsg
)) {
3110 unixDie("write to thread pipe returned wrong size or error");
3115 if (error
== EAGAIN
|| error
== EWOULDBLOCK
) {
3119 unixDie("write to thread pipe returned wrong size or error:" + std::to_string(error
));
3123 ++targetInfo
.numberOfDistributedQueries
;
3128 static unsigned int getWorkerLoad(size_t workerIdx
)
3130 const auto mt
= s_threadInfos
[/* skip handler */ 1 + g_numDistributorThreads
+ workerIdx
].mt
;
3131 if (mt
!= nullptr) {
3132 return mt
->numProcesses();
3137 static unsigned int selectWorker(unsigned int hash
)
3139 if (s_balancingFactor
== 0) {
3140 return /* skip handler */ 1 + g_numDistributorThreads
+ (hash
% g_numWorkerThreads
);
3143 /* we start with one, representing the query we are currently handling */
3144 double currentLoad
= 1;
3145 std::vector
<unsigned int> load(g_numWorkerThreads
);
3146 for (size_t idx
= 0; idx
< g_numWorkerThreads
; idx
++) {
3147 load
[idx
] = getWorkerLoad(idx
);
3148 currentLoad
+= load
[idx
];
3149 // cerr<<"load for worker "<<idx<<" is "<<load[idx]<<endl;
3152 double targetLoad
= (currentLoad
/ g_numWorkerThreads
) * s_balancingFactor
;
3153 // cerr<<"total load is "<<currentLoad<<", number of workers is "<<g_numWorkerThreads<<", target load is "<<targetLoad<<endl;
3155 unsigned int worker
= hash
% g_numWorkerThreads
;
3156 /* at least one server has to be at or below the average load */
3157 if (load
[worker
] > targetLoad
) {
3158 ++g_stats
.rebalancedQueries
;
3160 // cerr<<"worker "<<worker<<" is above the target load, selecting another one"<<endl;
3161 worker
= (worker
+ 1) % g_numWorkerThreads
;
3163 while(load
[worker
] > targetLoad
);
3166 return /* skip handler */ 1 + g_numDistributorThreads
+ worker
;
3169 // This function is only called by the distributor threads, when pdns-distributes-queries is set
3170 void distributeAsyncFunction(const string
& packet
, const pipefunc_t
& func
)
3172 if (!isDistributorThread()) {
3173 g_log
<<Logger::Error
<<"distributeAsyncFunction() has been called by a worker ("<<t_id
<<")"<<endl
;
3177 unsigned int hash
= hashQuestion(packet
.c_str(), packet
.length(), g_disthashseed
);
3178 unsigned int target
= selectWorker(hash
);
3180 ThreadMSG
* tmsg
= new ThreadMSG();
3182 tmsg
->wantAnswer
= false;
3184 if (!trySendingQueryToWorker(target
, tmsg
)) {
3185 /* if this function failed but did not raise an exception, it means that the pipe
3186 was full, let's try another one */
3187 unsigned int newTarget
= 0;
3189 newTarget
= /* skip handler */ 1 + g_numDistributorThreads
+ dns_random(g_numWorkerThreads
);
3190 } while (newTarget
== target
);
3192 if (!trySendingQueryToWorker(newTarget
, tmsg
)) {
3193 g_stats
.queryPipeFullDrops
++;
3199 static void handlePipeRequest(int fd
, FDMultiplexer::funcparam_t
& var
)
3201 ThreadMSG
* tmsg
= nullptr;
3203 if(read(fd
, &tmsg
, sizeof(tmsg
)) != sizeof(tmsg
)) { // fd == readToThread || fd == readQueriesToThread
3204 unixDie("read from thread pipe returned wrong size or error");
3209 resp
= tmsg
->func();
3211 catch(std::exception
& e
) {
3212 if(g_logCommonErrors
)
3213 g_log
<<Logger::Error
<<"PIPE function we executed created exception: "<<e
.what()<<endl
; // but what if they wanted an answer.. we send 0
3215 catch(PDNSException
& e
) {
3216 if(g_logCommonErrors
)
3217 g_log
<<Logger::Error
<<"PIPE function we executed created PDNS exception: "<<e
.reason
<<endl
; // but what if they wanted an answer.. we send 0
3219 if(tmsg
->wantAnswer
) {
3220 const auto& threadInfo
= s_threadInfos
.at(t_id
);
3221 if(write(threadInfo
.pipes
.writeFromThread
, &resp
, sizeof(resp
)) != sizeof(resp
)) {
3223 unixDie("write to thread pipe returned wrong size or error");
3230 template<class T
> void *voider(const boost::function
<T
*()>& func
)
3235 vector
<ComboAddress
>& operator+=(vector
<ComboAddress
>&a
, const vector
<ComboAddress
>& b
)
3237 a
.insert(a
.end(), b
.begin(), b
.end());
3241 vector
<pair
<string
, uint16_t> >& operator+=(vector
<pair
<string
, uint16_t> >&a
, const vector
<pair
<string
, uint16_t> >& b
)
3243 a
.insert(a
.end(), b
.begin(), b
.end());
3247 vector
<pair
<DNSName
, uint16_t> >& operator+=(vector
<pair
<DNSName
, uint16_t> >&a
, const vector
<pair
<DNSName
, uint16_t> >& b
)
3249 a
.insert(a
.end(), b
.begin(), b
.end());
3255 This function should only be called by the handler to gather metrics, wipe the cache,
3256 reload the Lua script (not the Lua config) or change the current trace regex,
3257 and by the SNMP thread to gather metrics. */
3258 template<class T
> T
broadcastAccFunction(const boost::function
<T
*()>& func
)
3260 if (!isHandlerThread()) {
3261 g_log
<<Logger::Error
<<"broadcastAccFunction has been called by a worker ("<<t_id
<<")"<<endl
;
3267 for (const auto& threadInfo
: s_threadInfos
) {
3272 const auto& tps
= threadInfo
.pipes
;
3273 ThreadMSG
* tmsg
= new ThreadMSG();
3274 tmsg
->func
= boost::bind(voider
<T
>, func
);
3275 tmsg
->wantAnswer
= true;
3277 if(write(tps
.writeToThread
, &tmsg
, sizeof(tmsg
)) != sizeof(tmsg
)) {
3279 unixDie("write to thread pipe returned wrong size or error");
3283 if(read(tps
.readFromThread
, &resp
, sizeof(resp
)) != sizeof(resp
))
3284 unixDie("read from thread pipe returned wrong size or error");
3295 template string
broadcastAccFunction(const boost::function
<string
*()>& fun
); // explicit instantiation
3296 template uint64_t broadcastAccFunction(const boost::function
<uint64_t*()>& fun
); // explicit instantiation
3297 template vector
<ComboAddress
> broadcastAccFunction(const boost::function
<vector
<ComboAddress
> *()>& fun
); // explicit instantiation
3298 template vector
<pair
<DNSName
,uint16_t> > broadcastAccFunction(const boost::function
<vector
<pair
<DNSName
, uint16_t> > *()>& fun
); // explicit instantiation
3299 template ThreadTimes
broadcastAccFunction(const boost::function
<ThreadTimes
*()>& fun
);
3301 static void handleRCC(int fd
, FDMultiplexer::funcparam_t
& var
)
3305 string msg
=s_rcc
.recv(&remote
);
3306 RecursorControlParser rcp
;
3307 RecursorControlParser::func_t
* command
;
3309 string answer
=rcp
.getAnswer(msg
, &command
);
3311 // If we are inside a chroot, we need to strip
3312 if (!arg()["chroot"].empty()) {
3313 size_t len
= arg()["chroot"].length();
3314 remote
= remote
.substr(len
);
3317 s_rcc
.send(answer
, &remote
);
3320 catch(const std::exception
& e
) {
3321 g_log
<<Logger::Error
<<"Error dealing with control socket request: "<<e
.what()<<endl
;
3323 catch(const PDNSException
& ae
) {
3324 g_log
<<Logger::Error
<<"Error dealing with control socket request: "<<ae
.reason
<<endl
;
3328 static void handleTCPClientReadable(int fd
, FDMultiplexer::funcparam_t
& var
)
3330 PacketID
* pident
=any_cast
<PacketID
>(&var
);
3331 // cerr<<"handleTCPClientReadable called for fd "<<fd<<", pident->inNeeded: "<<pident->inNeeded<<", "<<pident->sock->getHandle()<<endl;
3333 shared_array
<char> buffer(new char[pident
->inNeeded
]);
3335 ssize_t ret
=recv(fd
, buffer
.get(), pident
->inNeeded
,0);
3337 pident
->inMSG
.append(&buffer
[0], &buffer
[ret
]);
3338 pident
->inNeeded
-=(size_t)ret
;
3339 if(!pident
->inNeeded
|| pident
->inIncompleteOkay
) {
3340 // cerr<<"Got entire load of "<<pident->inMSG.size()<<" bytes"<<endl;
3341 PacketID pid
=*pident
;
3342 string msg
=pident
->inMSG
;
3344 t_fdm
->removeReadFD(fd
);
3345 MT
->sendEvent(pid
, &msg
);
3348 // cerr<<"Still have "<<pident->inNeeded<<" left to go"<<endl;
3352 PacketID tmp
=*pident
;
3353 t_fdm
->removeReadFD(fd
); // pident might now be invalid (it isn't, but still)
3355 MT
->sendEvent(tmp
, &empty
); // this conveys error status
3359 static void handleTCPClientWritable(int fd
, FDMultiplexer::funcparam_t
& var
)
3361 PacketID
* pid
=any_cast
<PacketID
>(&var
);
3362 ssize_t ret
=send(fd
, pid
->outMSG
.c_str() + pid
->outPos
, pid
->outMSG
.size() - pid
->outPos
,0);
3364 pid
->outPos
+=(ssize_t
)ret
;
3365 if(pid
->outPos
==pid
->outMSG
.size()) {
3367 t_fdm
->removeWriteFD(fd
);
3368 MT
->sendEvent(tmp
, &tmp
.outMSG
); // send back what we sent to convey everything is ok
3371 else { // error or EOF
3373 t_fdm
->removeWriteFD(fd
);
3375 MT
->sendEvent(tmp
, &sent
); // we convey error status by sending empty string
3379 // resend event to everybody chained onto it
3380 static void doResends(MT_t::waiters_t::iterator
& iter
, PacketID resend
, const string
& content
)
3382 if(iter
->key
.chain
.empty())
3384 // cerr<<"doResends called!\n";
3385 for(PacketID::chain_t::iterator i
=iter
->key
.chain
.begin(); i
!= iter
->key
.chain
.end() ; ++i
) {
3388 // cerr<<"\tResending "<<content.size()<<" bytes for fd="<<resend.fd<<" and id="<<resend.id<<endl;
3390 MT
->sendEvent(resend
, &content
);
3391 g_stats
.chainResends
++;
3395 static void handleUDPServerResponse(int fd
, FDMultiplexer::funcparam_t
& var
)
3397 PacketID pid
=any_cast
<PacketID
>(var
);
3400 packet
.resize(g_outgoingEDNSBufsize
);
3401 ComboAddress fromaddr
;
3402 socklen_t addrlen
=sizeof(fromaddr
);
3404 len
=recvfrom(fd
, &packet
.at(0), packet
.size(), 0, (sockaddr
*)&fromaddr
, &addrlen
);
3406 if(len
< (ssize_t
) sizeof(dnsheader
)) {
3408 ; // cerr<<"Error on fd "<<fd<<": "<<stringerror()<<"\n";
3410 g_stats
.serverParseError
++;
3411 if(g_logCommonErrors
)
3412 g_log
<<Logger::Error
<<"Unable to parse packet from remote UDP server "<< fromaddr
.toString() <<
3413 ": packet smaller than DNS header"<<endl
;
3416 t_udpclientsocks
->returnSocket(fd
);
3419 MT_t::waiters_t::iterator iter
=MT
->d_waiters
.find(pid
);
3420 if(iter
!= MT
->d_waiters
.end())
3421 doResends(iter
, pid
, empty
);
3423 MT
->sendEvent(pid
, &empty
); // this denotes error (does lookup again.. at least L1 will be hot)
3429 memcpy(&dh
, &packet
.at(0), sizeof(dh
));
3432 pident
.remote
=fromaddr
;
3436 if(!dh
.qr
&& g_logCommonErrors
) {
3437 g_log
<<Logger::Notice
<<"Not taking data from question on outgoing socket from "<< fromaddr
.toStringWithPort() <<endl
;
3440 if(!dh
.qdcount
|| // UPC, Nominum, very old BIND on FormErr, NSD
3441 !dh
.qr
) { // one weird server
3442 pident
.domain
.clear();
3448 pident
.domain
=DNSName(&packet
.at(0), len
, 12, false, &pident
.type
); // don't copy this from above - we need to do the actual read
3450 catch(std::exception
& e
) {
3451 g_stats
.serverParseError
++; // won't be fed to lwres.cc, so we have to increment
3452 g_log
<<Logger::Warning
<<"Error in packet from remote nameserver "<< fromaddr
.toStringWithPort() << ": "<<e
.what() << endl
;
3457 MT_t::waiters_t::iterator iter
=MT
->d_waiters
.find(pident
);
3458 if(iter
!= MT
->d_waiters
.end()) {
3459 doResends(iter
, pident
, packet
);
3464 if(!MT
->sendEvent(pident
, &packet
)) {
3465 /* we did not find a match for this response, something is wrong */
3467 // we do a full scan for outstanding queries on unexpected answers. not too bad since we only accept them on the right port number, which is hard enough to guess
3468 for(MT_t::waiters_t::iterator mthread
=MT
->d_waiters
.begin(); mthread
!=MT
->d_waiters
.end(); ++mthread
) {
3469 if(pident
.fd
==mthread
->key
.fd
&& mthread
->key
.remote
==pident
.remote
&& mthread
->key
.type
== pident
.type
&&
3470 pident
.domain
== mthread
->key
.domain
) {
3471 mthread
->key
.nearMisses
++;
3474 // be a bit paranoid here since we're weakening our matching
3475 if(pident
.domain
.empty() && !mthread
->key
.domain
.empty() && !pident
.type
&& mthread
->key
.type
&&
3476 pident
.id
== mthread
->key
.id
&& mthread
->key
.remote
== pident
.remote
) {
3477 // cerr<<"Empty response, rest matches though, sending to a waiter"<<endl;
3478 pident
.domain
= mthread
->key
.domain
;
3479 pident
.type
= mthread
->key
.type
;
3480 goto retryWithName
; // note that this only passes on an error, lwres will still reject the packet
3483 g_stats
.unexpectedCount
++; // if we made it here, it really is an unexpected answer
3484 if(g_logCommonErrors
) {
3485 g_log
<<Logger::Warning
<<"Discarding unexpected packet from "<<fromaddr
.toStringWithPort()<<": "<< (pident
.domain
.empty() ? "<empty>" : pident
.domain
.toString())<<", "<<pident
.type
<<", "<<MT
->d_waiters
.size()<<" waiters"<<endl
;
3489 /* we either found a waiter (1) or encountered an issue (-1), it's up to us to clean the socket anyway */
3490 t_udpclientsocks
->returnSocket(fd
);
3494 FDMultiplexer
* getMultiplexer()
3497 for(const auto& i
: FDMultiplexer::getMultiplexerMap()) {
3502 catch(FDMultiplexerException
&fe
) {
3503 g_log
<<Logger::Error
<<"Non-fatal error initializing possible multiplexer ("<<fe
.what()<<"), falling back"<<endl
;
3506 g_log
<<Logger::Error
<<"Non-fatal error initializing possible multiplexer"<<endl
;
3509 g_log
<<Logger::Error
<<"No working multiplexer found!"<<endl
;
3514 static string
* doReloadLuaScript()
3516 string fname
= ::arg()["lua-dns-script"];
3520 g_log
<<Logger::Info
<<t_id
<<" Unloaded current lua script"<<endl
;
3521 return new string("unloaded\n");
3524 t_pdl
= std::make_shared
<RecursorLua4
>();
3525 t_pdl
->loadFile(fname
);
3528 catch(std::exception
& e
) {
3529 g_log
<<Logger::Error
<<t_id
<<" Retaining current script, error from '"<<fname
<<"': "<< e
.what() <<endl
;
3530 return new string("retaining current script, error from '"+fname
+"': "+e
.what()+"\n");
3533 g_log
<<Logger::Warning
<<t_id
<<" (Re)loaded lua script from '"<<fname
<<"'"<<endl
;
3534 return new string("(re)loaded '"+fname
+"'\n");
3537 string
doQueueReloadLuaScript(vector
<string
>::const_iterator begin
, vector
<string
>::const_iterator end
)
3540 ::arg().set("lua-dns-script") = *begin
;
3542 return broadcastAccFunction
<string
>(doReloadLuaScript
);
3545 static string
* pleaseUseNewTraceRegex(const std::string
& newRegex
)
3548 if(newRegex
.empty()) {
3549 t_traceRegex
.reset();
3550 return new string("unset\n");
3553 t_traceRegex
= std::make_shared
<Regex
>(newRegex
);
3554 return new string("ok\n");
3557 catch(PDNSException
& ae
)
3559 return new string(ae
.reason
+"\n");
3562 string
doTraceRegex(vector
<string
>::const_iterator begin
, vector
<string
>::const_iterator end
)
3564 return broadcastAccFunction
<string
>(boost::bind(pleaseUseNewTraceRegex
, begin
!=end
? *begin
: ""));
3567 static void checkLinuxIPv6Limits()
3571 if(readFileIfThere("/proc/sys/net/ipv6/route/max_size", &line
)) {
3572 int lim
=std::stoi(line
);
3574 g_log
<<Logger::Error
<<"If using IPv6, please raise sysctl net.ipv6.route.max_size, currently set to "<<lim
<<" which is < 16384"<<endl
;
3579 static void checkOrFixFDS()
3581 unsigned int availFDs
=getFilenumLimit();
3582 unsigned int wantFDs
= g_maxMThreads
* g_numWorkerThreads
+25; // even healthier margin then before
3584 if(wantFDs
> availFDs
) {
3585 unsigned int hardlimit
= getFilenumLimit(true);
3586 if(hardlimit
>= wantFDs
) {
3587 setFilenumLimit(wantFDs
);
3588 g_log
<<Logger::Warning
<<"Raised soft limit on number of filedescriptors to "<<wantFDs
<<" to match max-mthreads and threads settings"<<endl
;
3591 int newval
= (hardlimit
- 25) / g_numWorkerThreads
;
3592 g_log
<<Logger::Warning
<<"Insufficient number of filedescriptors available for max-mthreads*threads setting! ("<<hardlimit
<<" < "<<wantFDs
<<"), reducing max-mthreads to "<<newval
<<endl
;
3593 g_maxMThreads
= newval
;
3594 setFilenumLimit(hardlimit
);
3599 static void* recursorThread(unsigned int tid
, const string
& threadName
);
3601 static void* pleaseSupplantACLs(std::shared_ptr
<NetmaskGroup
> ng
)
3612 static bool l_initialized
;
3614 if(l_initialized
) { // only reload configuration file on second call
3615 string configname
=::arg()["config-dir"]+"/recursor.conf";
3616 if(::arg()["config-name"]!="") {
3617 configname
=::arg()["config-dir"]+"/recursor-"+::arg()["config-name"]+".conf";
3619 cleanSlashes(configname
);
3621 if(!::arg().preParseFile(configname
.c_str(), "allow-from-file"))
3622 throw runtime_error("Unable to re-parse configuration file '"+configname
+"'");
3623 ::arg().preParseFile(configname
.c_str(), "allow-from", LOCAL_NETS
);
3624 ::arg().preParseFile(configname
.c_str(), "include-dir");
3625 ::arg().preParse(g_argc
, g_argv
, "include-dir");
3627 // then process includes
3628 std::vector
<std::string
> extraConfigs
;
3629 ::arg().gatherIncludes(extraConfigs
);
3631 for(const std::string
& fn
: extraConfigs
) {
3632 if(!::arg().preParseFile(fn
.c_str(), "allow-from-file", ::arg()["allow-from-file"]))
3633 throw runtime_error("Unable to re-parse configuration file include '"+fn
+"'");
3634 if(!::arg().preParseFile(fn
.c_str(), "allow-from", ::arg()["allow-from"]))
3635 throw runtime_error("Unable to re-parse configuration file include '"+fn
+"'");
3638 ::arg().preParse(g_argc
, g_argv
, "allow-from-file");
3639 ::arg().preParse(g_argc
, g_argv
, "allow-from");
3642 std::shared_ptr
<NetmaskGroup
> oldAllowFrom
= t_allowFrom
;
3643 std::shared_ptr
<NetmaskGroup
> allowFrom
= std::make_shared
<NetmaskGroup
>();
3645 if(!::arg()["allow-from-file"].empty()) {
3647 ifstream
ifs(::arg()["allow-from-file"].c_str());
3649 throw runtime_error("Could not open '"+::arg()["allow-from-file"]+"': "+stringerror());
3652 string::size_type pos
;
3653 while(getline(ifs
,line
)) {
3655 if(pos
!=string::npos
)
3661 allowFrom
->addMask(line
);
3663 g_log
<<Logger::Warning
<<"Done parsing " << allowFrom
->size() <<" allow-from ranges from file '"<<::arg()["allow-from-file"]<<"' - overriding 'allow-from' setting"<<endl
;
3665 else if(!::arg()["allow-from"].empty()) {
3667 stringtok(ips
, ::arg()["allow-from"], ", ");
3669 g_log
<<Logger::Warning
<<"Only allowing queries from: ";
3670 for(vector
<string
>::const_iterator i
= ips
.begin(); i
!= ips
.end(); ++i
) {
3671 allowFrom
->addMask(*i
);
3673 g_log
<<Logger::Warning
<<", ";
3674 g_log
<<Logger::Warning
<<*i
;
3676 g_log
<<Logger::Warning
<<endl
;
3679 if(::arg()["local-address"]!="127.0.0.1" && ::arg().asNum("local-port")==53)
3680 g_log
<<Logger::Warning
<<"WARNING: Allowing queries from all IP addresses - this can be a security risk!"<<endl
;
3681 allowFrom
= nullptr;
3684 g_initialAllowFrom
= allowFrom
;
3685 broadcastFunction(boost::bind(pleaseSupplantACLs
, allowFrom
));
3686 oldAllowFrom
= nullptr;
3688 l_initialized
= true;
3692 static void setupDelegationOnly()
3694 vector
<string
> parts
;
3695 stringtok(parts
, ::arg()["delegation-only"], ", \t");
3696 for(const auto& p
: parts
) {
3697 SyncRes::addDelegationOnly(DNSName(p
));
3701 static std::map
<unsigned int, std::set
<int> > parseCPUMap()
3703 std::map
<unsigned int, std::set
<int> > result
;
3705 const std::string value
= ::arg()["cpu-map"];
3707 if (!value
.empty() && !isSettingThreadCPUAffinitySupported()) {
3708 g_log
<<Logger::Warning
<<"CPU mapping requested but not supported, skipping"<<endl
;
3712 std::vector
<std::string
> parts
;
3714 stringtok(parts
, value
, " \t");
3716 for(const auto& part
: parts
) {
3717 if (part
.find('=') == string::npos
)
3721 auto headers
= splitField(part
, '=');
3722 trim(headers
.first
);
3723 trim(headers
.second
);
3725 unsigned int threadId
= pdns_stou(headers
.first
);
3726 std::vector
<std::string
> cpus
;
3728 stringtok(cpus
, headers
.second
, ",");
3730 for(const auto& cpu
: cpus
) {
3731 int cpuId
= std::stoi(cpu
);
3733 result
[threadId
].insert(cpuId
);
3736 catch(const std::exception
& e
) {
3737 g_log
<<Logger::Error
<<"Error parsing cpu-map entry '"<<part
<<"': "<<e
.what()<<endl
;
3744 static void setCPUMap(const std::map
<unsigned int, std::set
<int> >& cpusMap
, unsigned int n
, pthread_t tid
)
3746 const auto& cpuMapping
= cpusMap
.find(n
);
3747 if (cpuMapping
!= cpusMap
.cend()) {
3748 int rc
= mapThreadToCPUList(tid
, cpuMapping
->second
);
3750 g_log
<<Logger::Info
<<"CPU affinity for worker "<<n
<<" has been set to CPU map:";
3751 for (const auto cpu
: cpuMapping
->second
) {
3752 g_log
<<Logger::Info
<<" "<<cpu
;
3754 g_log
<<Logger::Info
<<endl
;
3757 g_log
<<Logger::Warning
<<"Error setting CPU affinity for worker "<<n
<<" to CPU map:";
3758 for (const auto cpu
: cpuMapping
->second
) {
3759 g_log
<<Logger::Info
<<" "<<cpu
;
3761 g_log
<<Logger::Info
<<strerror(rc
)<<endl
;
3767 static void setupNODThread()
3770 uint32_t num_cells
= ::arg().asNum("new-domain-db-size");
3771 t_nodDBp
= std::make_shared
<nod::NODDB
>(num_cells
);
3773 t_nodDBp
->setCacheDir(::arg()["new-domain-history-dir"]);
3775 catch (const PDNSException
& e
) {
3776 g_log
<<Logger::Error
<<"new-domain-history-dir (" << ::arg()["new-domain-history-dir"] << ") is not readable or does not exist"<<endl
;
3779 if (!t_nodDBp
->init()) {
3780 g_log
<<Logger::Error
<<"Could not initialize domain tracking"<<endl
;
3783 std::thread
t(nod::NODDB::startHousekeepingThread
, t_nodDBp
, std::this_thread::get_id());
3785 g_nod_pbtag
= ::arg()["new-domain-pb-tag"];
3788 uint32_t num_cells
= ::arg().asNum("unique-response-db-size");
3789 t_udrDBp
= std::make_shared
<nod::UniqueResponseDB
>(num_cells
);
3791 t_udrDBp
->setCacheDir(::arg()["unique-response-history-dir"]);
3793 catch (const PDNSException
& e
) {
3794 g_log
<<Logger::Error
<<"unique-response-history-dir (" << ::arg()["unique-response-history-dir"] << ") is not readable or does not exist"<<endl
;
3797 if (!t_udrDBp
->init()) {
3798 g_log
<<Logger::Error
<<"Could not initialize unique response tracking"<<endl
;
3801 std::thread
t(nod::UniqueResponseDB::startHousekeepingThread
, t_udrDBp
, std::this_thread::get_id());
3803 g_udr_pbtag
= ::arg()["unique-response-pb-tag"];
3807 void parseNODWhitelist(const std::string
& wlist
)
3809 vector
<string
> parts
;
3810 stringtok(parts
, wlist
, ",; ");
3811 for(const auto& a
: parts
) {
3812 g_nodDomainWL
.add(DNSName(a
));
3816 static void setupNODGlobal()
3818 // Setup NOD subsystem
3819 g_nodEnabled
= ::arg().mustDo("new-domain-tracking");
3820 g_nodLookupDomain
= DNSName(::arg()["new-domain-lookup"]);
3821 g_nodLog
= ::arg().mustDo("new-domain-log");
3822 parseNODWhitelist(::arg()["new-domain-whitelist"]);
3824 // Setup Unique DNS Response subsystem
3825 g_udrEnabled
= ::arg().mustDo("unique-response-tracking");
3826 g_udrLog
= ::arg().mustDo("unique-response-log");
3828 #endif /* NOD_ENABLED */
3830 static void checkSocketDir(void)
3833 string
dir(::arg()["socket-dir"]);
3836 if (stat(dir
.c_str(), &st
) == -1) {
3837 msg
= "it does not exist or cannot access";
3839 else if (!S_ISDIR(st
.st_mode
)) {
3840 msg
= "it is not a directory";
3842 else if (access(dir
.c_str(), R_OK
| W_OK
| X_OK
) != 0) {
3843 msg
= "cannot read, write or search";
3847 g_log
<< Logger::Error
<< "Problem with socket directory " << dir
<< ": " << msg
<< "; see https://docs.powerdns.com/recursor/upgrade.html#x-to-4-3-0-or-master" << endl
;
3851 static int serviceMain(int argc
, char*argv
[])
3853 g_log
.setName(s_programname
);
3854 g_log
.disableSyslog(::arg().mustDo("disable-syslog"));
3855 g_log
.setTimestamps(::arg().mustDo("log-timestamp"));
3857 if(!::arg()["logging-facility"].empty()) {
3858 int val
=logFacilityToLOG(::arg().asNum("logging-facility") );
3860 g_log
.setFacility(val
);
3862 g_log
<<Logger::Error
<<"Unknown logging facility "<<::arg().asNum("logging-facility") <<endl
;
3865 showProductVersion();
3867 g_disthashseed
=dns_random(0xffffffff);
3869 checkLinuxIPv6Limits();
3871 vector
<string
> addrs
;
3872 if(!::arg()["query-local-address6"].empty()) {
3873 SyncRes::s_doIPv6
=true;
3874 g_log
<<Logger::Warning
<<"Enabling IPv6 transport for outgoing queries"<<endl
;
3876 stringtok(addrs
, ::arg()["query-local-address6"], ", ;");
3877 for(const string
& addr
: addrs
) {
3878 g_localQueryAddresses6
.push_back(ComboAddress(addr
));
3882 g_log
<<Logger::Warning
<<"NOT using IPv6 for outgoing queries - set 'query-local-address6=::' to enable"<<endl
;
3885 stringtok(addrs
, ::arg()["query-local-address"], ", ;");
3886 for(const string
& addr
: addrs
) {
3887 g_localQueryAddresses4
.push_back(ComboAddress(addr
));
3890 catch(std::exception
& e
) {
3891 g_log
<<Logger::Error
<<"Assigning local query addresses: "<<e
.what();
3895 // keep this ABOVE loadRecursorLuaConfig!
3896 if(::arg()["dnssec"]=="off")
3897 g_dnssecmode
=DNSSECMode::Off
;
3898 else if(::arg()["dnssec"]=="process-no-validate")
3899 g_dnssecmode
=DNSSECMode::ProcessNoValidate
;
3900 else if(::arg()["dnssec"]=="process")
3901 g_dnssecmode
=DNSSECMode::Process
;
3902 else if(::arg()["dnssec"]=="validate")
3903 g_dnssecmode
=DNSSECMode::ValidateAll
;
3904 else if(::arg()["dnssec"]=="log-fail")
3905 g_dnssecmode
=DNSSECMode::ValidateForLog
;
3907 g_log
<<Logger::Error
<<"Unknown DNSSEC mode "<<::arg()["dnssec"]<<endl
;
3911 g_signatureInceptionSkew
= ::arg().asNum("signature-inception-skew");
3912 if (g_signatureInceptionSkew
< 0) {
3913 g_log
<<Logger::Error
<<"A negative value for 'signature-inception-skew' is not allowed"<<endl
;
3917 g_dnssecLogBogus
= ::arg().mustDo("dnssec-log-bogus");
3918 g_maxNSEC3Iterations
= ::arg().asNum("nsec3-max-iterations");
3920 g_maxCacheEntries
= ::arg().asNum("max-cache-entries");
3921 g_maxPacketCacheEntries
= ::arg().asNum("max-packetcache-entries");
3923 luaConfigDelayedThreads delayedLuaThreads
;
3925 loadRecursorLuaConfig(::arg()["lua-config-file"], delayedLuaThreads
);
3927 catch (PDNSException
&e
) {
3928 g_log
<<Logger::Error
<<"Cannot load Lua configuration: "<<e
.reason
<<endl
;
3933 initPublicSuffixList(::arg()["public-suffix-list-file"]);
3935 if(!::arg()["dont-query"].empty()) {
3937 stringtok(ips
, ::arg()["dont-query"], ", ");
3938 ips
.push_back("0.0.0.0");
3939 ips
.push_back("::");
3941 g_log
<<Logger::Warning
<<"Will not send queries to: ";
3942 for(vector
<string
>::const_iterator i
= ips
.begin(); i
!= ips
.end(); ++i
) {
3943 SyncRes::addDontQuery(*i
);
3945 g_log
<<Logger::Warning
<<", ";
3946 g_log
<<Logger::Warning
<<*i
;
3948 g_log
<<Logger::Warning
<<endl
;
3951 g_quiet
=::arg().mustDo("quiet");
3953 /* this needs to be done before parseACLs(), which call broadcastFunction() */
3954 g_weDistributeQueries
= ::arg().mustDo("pdns-distributes-queries");
3955 if(g_weDistributeQueries
) {
3956 g_log
<<Logger::Warning
<<"PowerDNS Recursor itself will distribute queries over threads"<<endl
;
3959 setupDelegationOnly();
3960 g_outgoingEDNSBufsize
=::arg().asNum("edns-outgoing-bufsize");
3962 if(::arg()["trace"]=="fail") {
3963 SyncRes::setDefaultLogMode(SyncRes::Store
);
3965 else if(::arg().mustDo("trace")) {
3966 SyncRes::setDefaultLogMode(SyncRes::Log
);
3967 ::arg().set("quiet")="no";
3971 string myHostname
= getHostname();
3972 if (myHostname
== "UNKNOWN"){
3973 g_log
<<Logger::Warning
<<"Unable to get the hostname, NSID and id.server values will be empty"<<endl
;
3977 SyncRes::s_minimumTTL
= ::arg().asNum("minimum-ttl-override");
3978 SyncRes::s_minimumECSTTL
= ::arg().asNum("ecs-minimum-ttl-override");
3980 SyncRes::s_nopacketcache
= ::arg().mustDo("disable-packetcache");
3982 SyncRes::s_maxnegttl
=::arg().asNum("max-negative-ttl");
3983 SyncRes::s_maxbogusttl
=::arg().asNum("max-cache-bogus-ttl");
3984 SyncRes::s_maxcachettl
=max(::arg().asNum("max-cache-ttl"), 15);
3985 SyncRes::s_packetcachettl
=::arg().asNum("packetcache-ttl");
3986 // Cap the packetcache-servfail-ttl to the packetcache-ttl
3987 uint32_t packetCacheServFailTTL
= ::arg().asNum("packetcache-servfail-ttl");
3988 SyncRes::s_packetcacheservfailttl
=(packetCacheServFailTTL
> SyncRes::s_packetcachettl
) ? SyncRes::s_packetcachettl
: packetCacheServFailTTL
;
3989 SyncRes::s_serverdownmaxfails
=::arg().asNum("server-down-max-fails");
3990 SyncRes::s_serverdownthrottletime
=::arg().asNum("server-down-throttle-time");
3991 SyncRes::s_serverID
=::arg()["server-id"];
3992 SyncRes::s_maxqperq
=::arg().asNum("max-qperq");
3993 SyncRes::s_maxtotusec
=1000*::arg().asNum("max-total-msec");
3994 SyncRes::s_maxdepth
=::arg().asNum("max-recursion-depth");
3995 SyncRes::s_rootNXTrust
= ::arg().mustDo( "root-nx-trust");
3996 if(SyncRes::s_serverID
.empty()) {
3997 SyncRes::s_serverID
= myHostname
;
4000 SyncRes::s_ecsipv4limit
= ::arg().asNum("ecs-ipv4-bits");
4001 SyncRes::s_ecsipv6limit
= ::arg().asNum("ecs-ipv6-bits");
4002 SyncRes::clearECSStats();
4003 SyncRes::s_ecsipv4cachelimit
= ::arg().asNum("ecs-ipv4-cache-bits");
4004 SyncRes::s_ecsipv6cachelimit
= ::arg().asNum("ecs-ipv6-cache-bits");
4005 SyncRes::s_ecscachelimitttl
= ::arg().asNum("ecs-cache-limit-ttl");
4007 SyncRes::s_qnameminimization
= ::arg().mustDo("qname-minimization");
4009 if (SyncRes::s_qnameminimization
) {
4010 // With an empty cache, a rev ipv6 query with dnssec enabled takes
4011 // almost 100 queries. Default maxqperq is 60.
4012 SyncRes::s_maxqperq
= std::max(SyncRes::s_maxqperq
, static_cast<unsigned int>(100));
4015 SyncRes::s_hardenNXD
= SyncRes::HardenNXD::DNSSEC
;
4016 string value
= ::arg()["nothing-below-nxdomain"];
4017 if (value
== "yes") {
4018 SyncRes::s_hardenNXD
= SyncRes::HardenNXD::Yes
;
4019 } else if (value
== "no") {
4020 SyncRes::s_hardenNXD
= SyncRes::HardenNXD::No
;
4021 } else if (value
!= "dnssec") {
4022 g_log
<< Logger::Error
<< "Unknown nothing-below-nxdomain mode: " << value
<< endl
;
4026 if (!::arg().isEmpty("ecs-scope-zero-address")) {
4027 ComboAddress
scopeZero(::arg()["ecs-scope-zero-address"]);
4028 SyncRes::setECSScopeZeroAddress(Netmask(scopeZero
, scopeZero
.isIPv4() ? 32 : 128));
4032 for (const auto& addr
: g_localQueryAddresses4
) {
4033 if (!IsAnyAddress(addr
)) {
4034 SyncRes::setECSScopeZeroAddress(Netmask(addr
, 32));
4040 for (const auto& addr
: g_localQueryAddresses6
) {
4041 if (!IsAnyAddress(addr
)) {
4042 SyncRes::setECSScopeZeroAddress(Netmask(addr
, 128));
4048 SyncRes::setECSScopeZeroAddress(Netmask("127.0.0.1/32"));
4053 SyncRes::parseEDNSSubnetWhitelist(::arg()["edns-subnet-whitelist"]);
4054 SyncRes::parseEDNSSubnetAddFor(::arg()["ecs-add-for"]);
4055 g_useIncomingECS
= ::arg().mustDo("use-incoming-edns-subnet");
4057 g_XPFAcl
.toMasks(::arg()["xpf-allow-from"]);
4058 g_xpfRRCode
= ::arg().asNum("xpf-rr-code");
4060 g_networkTimeoutMsec
= ::arg().asNum("network-timeout");
4062 g_initialDomainMap
= parseAuthAndForwards();
4064 g_latencyStatSize
=::arg().asNum("latency-statistic-size");
4066 g_logCommonErrors
=::arg().mustDo("log-common-errors");
4067 g_logRPZChanges
= ::arg().mustDo("log-rpz-changes");
4069 g_anyToTcp
= ::arg().mustDo("any-to-tcp");
4070 g_udpTruncationThreshold
= ::arg().asNum("udp-truncation-threshold");
4072 g_lowercaseOutgoing
= ::arg().mustDo("lowercase-outgoing");
4074 g_numDistributorThreads
= ::arg().asNum("distributor-threads");
4075 g_numWorkerThreads
= ::arg().asNum("threads");
4076 if (g_numWorkerThreads
< 1) {
4077 g_log
<<Logger::Warning
<<"Asked to run with 0 threads, raising to 1 instead"<<endl
;
4078 g_numWorkerThreads
= 1;
4081 g_numThreads
= g_numDistributorThreads
+ g_numWorkerThreads
;
4082 g_maxMThreads
= ::arg().asNum("max-mthreads");
4085 int64_t maxInFlight
= ::arg().asNum("max-concurrent-requests-per-tcp-connection");
4086 if (maxInFlight
< 1 || maxInFlight
> USHRT_MAX
|| maxInFlight
>= g_maxMThreads
) {
4087 g_log
<<Logger::Warning
<<"Asked to run with illegal max-concurrent-requests-per-tcp-connection, setting to default (10)"<<endl
;
4088 TCPConnection::s_maxInFlight
= 10;
4090 TCPConnection::s_maxInFlight
= maxInFlight
;
4094 g_gettagNeedsEDNSOptions
= ::arg().mustDo("gettag-needs-edns-options");
4096 g_statisticsInterval
= ::arg().asNum("statistics-interval");
4099 SuffixMatchNode dontThrottleNames
;
4100 vector
<string
> parts
;
4101 stringtok(parts
, ::arg()["dont-throttle-names"], " ,");
4102 for (const auto &p
: parts
) {
4103 dontThrottleNames
.add(DNSName(p
));
4105 g_dontThrottleNames
.setState(std::move(dontThrottleNames
));
4107 NetmaskGroup dontThrottleNetmasks
;
4108 stringtok(parts
, ::arg()["dont-throttle-netmasks"], " ,");
4109 for (const auto &p
: parts
) {
4110 dontThrottleNetmasks
.addMask(Netmask(p
));
4112 g_dontThrottleNetmasks
.setState(std::move(dontThrottleNetmasks
));
4115 s_balancingFactor
= ::arg().asDouble("distribution-load-factor");
4116 if (s_balancingFactor
!= 0.0 && s_balancingFactor
< 1.0) {
4117 s_balancingFactor
= 0.0;
4118 g_log
<<Logger::Warning
<<"Asked to run with a distribution-load-factor below 1.0, disabling it instead"<<endl
;
4122 g_reusePort
= ::arg().mustDo("reuseport");
4125 s_threadInfos
.resize(g_numDistributorThreads
+ g_numWorkerThreads
+ /* handler */ 1);
4128 if (g_weDistributeQueries
) {
4129 /* first thread is the handler, then distributors */
4130 for (unsigned int threadId
= 1; threadId
<= g_numDistributorThreads
; threadId
++) {
4131 auto& deferredAdds
= s_threadInfos
.at(threadId
).deferredAdds
;
4132 auto& tcpSockets
= s_threadInfos
.at(threadId
).tcpSockets
;
4133 makeUDPServerSockets(deferredAdds
);
4134 makeTCPServerSockets(deferredAdds
, tcpSockets
);
4138 /* first thread is the handler, there is no distributor here and workers are accepting queries */
4139 for (unsigned int threadId
= 1; threadId
<= g_numWorkerThreads
; threadId
++) {
4140 auto& deferredAdds
= s_threadInfos
.at(threadId
).deferredAdds
;
4141 auto& tcpSockets
= s_threadInfos
.at(threadId
).tcpSockets
;
4142 makeUDPServerSockets(deferredAdds
);
4143 makeTCPServerSockets(deferredAdds
, tcpSockets
);
4148 std::set
<int> tcpSockets
;
4149 /* we don't have reuseport so we can only open one socket per
4150 listening addr:port and everyone will listen on it */
4151 makeUDPServerSockets(g_deferredAdds
);
4152 makeTCPServerSockets(g_deferredAdds
, tcpSockets
);
4154 /* every listener (so distributor if g_weDistributeQueries, workers otherwise)
4155 needs to listen to the shared sockets */
4156 if (g_weDistributeQueries
) {
4157 /* first thread is the handler, then distributors */
4158 for (unsigned int threadId
= 1; threadId
<= g_numDistributorThreads
; threadId
++) {
4159 s_threadInfos
.at(threadId
).tcpSockets
= tcpSockets
;
4163 /* first thread is the handler, there is no distributor here and workers are accepting queries */
4164 for (unsigned int threadId
= 1; threadId
<= g_numWorkerThreads
; threadId
++) {
4165 s_threadInfos
.at(threadId
).tcpSockets
= tcpSockets
;
4171 // Setup newly observed domain globals
4173 #endif /* NOD_ENABLED */
4176 for(forks
= 0; forks
< ::arg().asNum("processes") - 1; ++forks
) {
4177 if(!fork()) // we are child
4181 if(::arg().mustDo("daemon")) {
4182 g_log
<<Logger::Warning
<<"Calling daemonize, going to background"<<endl
;
4183 g_log
.toConsole(Logger::Critical
);
4186 if(Utility::getpid() == 1) {
4187 /* We are running as pid 1, register sigterm and sigint handler
4189 The Linux kernel will handle SIGTERM and SIGINT for all processes, except PID 1.
4190 It assumes that the processes running as pid 1 is an "init" like system.
4191 For years, this was a safe assumption, but containers change that: in
4192 most (all?) container implementations, the application itself is running
4193 as pid 1. This means that sending signals to those applications, will not
4194 be handled by default. Results might be "your container not responsing
4195 when asking it to stop", or "ctrl-c not working even when the app is
4196 running in the foreground inside a container".
4198 So TL;DR: If we're running pid 1 (container), we should handle SIGTERM and SIGINT ourselves */
4200 signal(SIGTERM
,termIntHandler
);
4201 signal(SIGINT
,termIntHandler
);
4204 signal(SIGUSR1
,usr1Handler
);
4205 signal(SIGUSR2
,usr2Handler
);
4206 signal(SIGPIPE
,SIG_IGN
);
4210 #ifdef HAVE_LIBSODIUM
4211 if (sodium_init() == -1) {
4212 g_log
<<Logger::Error
<<"Unable to initialize sodium crypto library"<<endl
;
4217 openssl_thread_setup();
4219 /* setup rng before chroot */
4222 if(::arg()["server-id"].empty()) {
4223 ::arg().set("server-id") = myHostname
;
4227 if(!::arg()["setgid"].empty())
4228 newgid
= strToGID(::arg()["setgid"]);
4230 if(!::arg()["setuid"].empty())
4231 newuid
= strToUID(::arg()["setuid"]);
4233 Utility::dropGroupPrivs(newuid
, newgid
);
4235 if (!::arg()["chroot"].empty()) {
4238 ns
= getenv("NOTIFY_SOCKET");
4239 if (ns
!= nullptr) {
4240 g_log
<<Logger::Error
<<"Unable to chroot when running from systemd. Please disable chroot= or set the 'Type' for this service to 'simple'"<<endl
;
4244 if (chroot(::arg()["chroot"].c_str())<0 || chdir("/") < 0) {
4246 g_log
<<Logger::Error
<<"Unable to chroot to '"+::arg()["chroot"]+"': "<<strerror (err
)<<", exiting"<<endl
;
4250 g_log
<<Logger::Info
<<"Chrooted to '"<<::arg()["chroot"]<<"'"<<endl
;
4255 s_pidfname
=::arg()["socket-dir"]+"/"+s_programname
+".pid";
4256 if(!s_pidfname
.empty())
4257 unlink(s_pidfname
.c_str()); // remove possible old pid file
4260 makeControlChannelSocket( ::arg().asNum("processes") > 1 ? forks
: -1);
4262 Utility::dropUserPrivs(newuid
);
4264 /* we might still have capabilities remaining, for example if we have been started as root
4265 without --setuid (please don't do that) or as an unprivileged user with ambient capabilities
4266 like CAP_NET_BIND_SERVICE.
4270 catch(const std::exception
& e
) {
4271 g_log
<<Logger::Warning
<<e
.what()<<endl
;
4274 startLuaConfigDelayedThreads(delayedLuaThreads
, g_luaconfs
.getCopy().generation
);
4278 g_tcpTimeout
=::arg().asNum("client-tcp-timeout");
4279 g_maxTCPPerClient
=::arg().asNum("max-tcp-per-client");
4280 g_tcpMaxQueriesPerConn
=::arg().asNum("max-tcp-queries-per-connection");
4281 s_maxUDPQueriesPerRound
=::arg().asNum("max-udp-queries-per-round");
4283 g_useKernelTimestamp
= ::arg().mustDo("protobuf-use-kernel-timestamp");
4285 blacklistStats(StatComponent::API
, ::arg()["stats-api-blacklist"]);
4286 blacklistStats(StatComponent::Carbon
, ::arg()["stats-carbon-blacklist"]);
4287 blacklistStats(StatComponent::RecControl
, ::arg()["stats-rec-control-blacklist"]);
4288 blacklistStats(StatComponent::SNMP
, ::arg()["stats-snmp-blacklist"]);
4290 if (::arg().mustDo("snmp-agent")) {
4291 g_snmpAgent
= std::make_shared
<RecursorSNMPAgent
>("recursor", ::arg()["snmp-master-socket"]);
4295 int port
= ::arg().asNum("udp-source-port-min");
4296 if(port
< 1024 || port
> 65535){
4297 g_log
<<Logger::Error
<<"Unable to launch, udp-source-port-min is not a valid port number"<<endl
;
4298 exit(99); // this isn't going to fix itself either
4300 s_minUdpSourcePort
= port
;
4301 port
= ::arg().asNum("udp-source-port-max");
4302 if(port
< 1024 || port
> 65535 || port
< s_minUdpSourcePort
){
4303 g_log
<<Logger::Error
<<"Unable to launch, udp-source-port-max is not a valid port number or is smaller than udp-source-port-min"<<endl
;
4304 exit(99); // this isn't going to fix itself either
4306 s_maxUdpSourcePort
= port
;
4307 std::vector
<string
> parts
{};
4308 stringtok(parts
, ::arg()["udp-source-port-avoid"], ", ");
4309 for (const auto &part
: parts
)
4311 port
= std::stoi(part
);
4312 if(port
< 1024 || port
> 65535){
4313 g_log
<<Logger::Error
<<"Unable to launch, udp-source-port-avoid contains an invalid port number: "<<part
<<endl
;
4314 exit(99); // this isn't going to fix itself either
4316 s_avoidUdpSourcePorts
.insert(port
);
4319 unsigned int currentThreadId
= 1;
4320 const auto cpusMap
= parseCPUMap();
4322 if(g_numThreads
== 1) {
4323 g_log
<<Logger::Warning
<<"Operating unthreaded"<<endl
;
4325 sd_notify(0, "READY=1");
4328 /* This thread handles the web server, carbon, statistics and the control channel */
4329 auto& handlerInfos
= s_threadInfos
.at(0);
4330 handlerInfos
.isHandler
= true;
4331 handlerInfos
.thread
= std::thread(recursorThread
, 0, "main");
4333 setCPUMap(cpusMap
, currentThreadId
, pthread_self());
4335 auto& infos
= s_threadInfos
.at(currentThreadId
);
4336 infos
.isListener
= true;
4337 infos
.isWorker
= true;
4338 recursorThread(currentThreadId
++, "worker");
4340 handlerInfos
.thread
.join();
4345 if (g_weDistributeQueries
) {
4346 for(unsigned int n
=0; n
< g_numDistributorThreads
; ++n
) {
4347 auto& infos
= s_threadInfos
.at(currentThreadId
+ n
);
4348 infos
.isListener
= true;
4351 for(unsigned int n
=0; n
< g_numWorkerThreads
; ++n
) {
4352 auto& infos
= s_threadInfos
.at(currentThreadId
+ (g_weDistributeQueries
? g_numDistributorThreads
: 0) + n
);
4353 infos
.isListener
= !g_weDistributeQueries
;
4354 infos
.isWorker
= true;
4357 if (g_weDistributeQueries
) {
4358 g_log
<<Logger::Warning
<<"Launching "<< g_numDistributorThreads
<<" distributor threads"<<endl
;
4359 for(unsigned int n
=0; n
< g_numDistributorThreads
; ++n
) {
4360 auto& infos
= s_threadInfos
.at(currentThreadId
);
4361 infos
.thread
= std::thread(recursorThread
, currentThreadId
++, "distr");
4362 setCPUMap(cpusMap
, currentThreadId
, infos
.thread
.native_handle());
4366 g_log
<<Logger::Warning
<<"Launching "<< g_numWorkerThreads
<<" worker threads"<<endl
;
4368 for(unsigned int n
=0; n
< g_numWorkerThreads
; ++n
) {
4369 auto& infos
= s_threadInfos
.at(currentThreadId
);
4370 infos
.thread
= std::thread(recursorThread
, currentThreadId
++, "worker");
4371 setCPUMap(cpusMap
, currentThreadId
, infos
.thread
.native_handle());
4375 sd_notify(0, "READY=1");
4378 /* This thread handles the web server, carbon, statistics and the control channel */
4379 auto& infos
= s_threadInfos
.at(0);
4380 infos
.isHandler
= true;
4381 infos
.thread
= std::thread(recursorThread
, 0, "web+stat");
4383 for (auto & ti
: s_threadInfos
) {
4388 #ifdef HAVE_PROTOBUF
4389 google::protobuf::ShutdownProtobufLibrary();
4390 #endif /* HAVE_PROTOBUF */
4394 static void* recursorThread(unsigned int n
, const string
& threadName
)
4398 auto& threadInfo
= s_threadInfos
.at(t_id
);
4400 static string threadPrefix
= "pdns-r/";
4401 setThreadName(threadPrefix
+ threadName
);
4403 SyncRes
tmp(g_now
); // make sure it allocates tsstorage before we do anything, like primeHints or so..
4404 SyncRes::setDomainMap(g_initialDomainMap
);
4405 t_allowFrom
= g_initialAllowFrom
;
4406 t_udpclientsocks
= std::unique_ptr
<UDPClientSocks
>(new UDPClientSocks());
4407 t_tcpClientCounts
= std::unique_ptr
<tcpClientCounts_t
>(new tcpClientCounts_t());
4410 t_packetCache
= std::unique_ptr
<RecursorPacketCache
>(new RecursorPacketCache());
4412 g_log
<<Logger::Warning
<<"Done priming cache with root hints"<<endl
;
4415 if (threadInfo
.isWorker
)
4417 #endif /* NOD_ENABLED */
4419 /* the listener threads handle TCP queries */
4420 if(threadInfo
.isWorker
|| threadInfo
.isListener
) {
4422 if(!::arg()["lua-dns-script"].empty()) {
4423 t_pdl
= std::make_shared
<RecursorLua4
>();
4424 t_pdl
->loadFile(::arg()["lua-dns-script"]);
4425 g_log
<<Logger::Warning
<<"Loaded 'lua' script from '"<<::arg()["lua-dns-script"]<<"'"<<endl
;
4428 catch(std::exception
&e
) {
4429 g_log
<<Logger::Error
<<"Failed to load 'lua' script from '"<<::arg()["lua-dns-script"]<<"': "<<e
.what()<<endl
;
4434 unsigned int ringsize
=::arg().asNum("stats-ringbuffer-entries") / g_numWorkerThreads
;
4436 t_remotes
= std::unique_ptr
<addrringbuf_t
>(new addrringbuf_t());
4437 if(g_weDistributeQueries
)
4438 t_remotes
->set_capacity(::arg().asNum("stats-ringbuffer-entries") / g_numDistributorThreads
);
4440 t_remotes
->set_capacity(ringsize
);
4441 t_servfailremotes
= std::unique_ptr
<addrringbuf_t
>(new addrringbuf_t());
4442 t_servfailremotes
->set_capacity(ringsize
);
4443 t_bogusremotes
= std::unique_ptr
<addrringbuf_t
>(new addrringbuf_t());
4444 t_bogusremotes
->set_capacity(ringsize
);
4445 t_largeanswerremotes
= std::unique_ptr
<addrringbuf_t
>(new addrringbuf_t());
4446 t_largeanswerremotes
->set_capacity(ringsize
);
4447 t_timeouts
= std::unique_ptr
<addrringbuf_t
>(new addrringbuf_t());
4448 t_timeouts
->set_capacity(ringsize
);
4450 t_queryring
= std::unique_ptr
<boost::circular_buffer
<pair
<DNSName
, uint16_t> > >(new boost::circular_buffer
<pair
<DNSName
, uint16_t> >());
4451 t_queryring
->set_capacity(ringsize
);
4452 t_servfailqueryring
= std::unique_ptr
<boost::circular_buffer
<pair
<DNSName
, uint16_t> > >(new boost::circular_buffer
<pair
<DNSName
, uint16_t> >());
4453 t_servfailqueryring
->set_capacity(ringsize
);
4454 t_bogusqueryring
= std::unique_ptr
<boost::circular_buffer
<pair
<DNSName
, uint16_t> > >(new boost::circular_buffer
<pair
<DNSName
, uint16_t> >());
4455 t_bogusqueryring
->set_capacity(ringsize
);
4458 MT
=std::unique_ptr
<MTasker
<PacketID
,string
> >(new MTasker
<PacketID
,string
>(::arg().asNum("stack-size")));
4459 threadInfo
.mt
= MT
.get();
4461 #ifdef HAVE_PROTOBUF
4462 /* start protobuf export threads if needed */
4463 auto luaconfsLocal
= g_luaconfs
.getLocal();
4464 checkProtobufExport(luaconfsLocal
);
4465 checkOutgoingProtobufExport(luaconfsLocal
);
4466 #endif /* HAVE_PROTOBUF */
4468 checkFrameStreamExport(luaconfsLocal
);
4473 t_fdm
=getMultiplexer();
4475 RecursorWebServer
*rws
= nullptr;
4477 if(threadInfo
.isHandler
) {
4478 if(::arg().mustDo("webserver")) {
4479 g_log
<<Logger::Warning
<< "Enabling web server" << endl
;
4481 rws
= new RecursorWebServer(t_fdm
);
4483 catch(PDNSException
&e
) {
4484 g_log
<<Logger::Error
<<"Exception: "<<e
.reason
<<endl
;
4488 g_log
<<Logger::Info
<<"Enabled '"<< t_fdm
->getName() << "' multiplexer"<<endl
;
4492 t_fdm
->addReadFD(threadInfo
.pipes
.readToThread
, handlePipeRequest
);
4493 t_fdm
->addReadFD(threadInfo
.pipes
.readQueriesToThread
, handlePipeRequest
);
4495 if (threadInfo
.isListener
) {
4497 /* then every listener has its own FDs */
4498 for(const auto deferred
: threadInfo
.deferredAdds
) {
4499 t_fdm
->addReadFD(deferred
.first
, deferred
.second
);
4503 /* otherwise all listeners are listening on the same ones */
4504 for(const auto deferred
: g_deferredAdds
) {
4505 t_fdm
->addReadFD(deferred
.first
, deferred
.second
);
4513 if(threadInfo
.isHandler
) {
4514 t_fdm
->addReadFD(s_rcc
.d_fd
, handleRCC
); // control channel
4517 unsigned int maxTcpClients
=::arg().asNum("max-tcp-clients");
4519 bool listenOnTCP(true);
4521 time_t last_stat
= 0;
4522 time_t last_carbon
=0, last_lua_maintenance
=0;
4523 time_t carbonInterval
=::arg().asNum("carbon-interval");
4524 time_t luaMaintenanceInterval
=::arg().asNum("lua-maintenance-interval");
4525 counter
.store(0); // used to periodically execute certain tasks
4527 while (!RecursorControlChannel::stop
) {
4528 while(MT
->schedule(&g_now
)); // MTasker letting the mthreads do their thing
4530 if(!(counter
%500)) {
4531 MT
->makeThread(houseKeeping
, 0);
4535 typedef vector
<pair
<int, FDMultiplexer::funcparam_t
> > expired_t
;
4536 expired_t expired
=t_fdm
->getTimeouts(g_now
);
4538 for(expired_t::iterator i
=expired
.begin() ; i
!= expired
.end(); ++i
) {
4539 shared_ptr
<TCPConnection
> conn
=any_cast
<shared_ptr
<TCPConnection
> >(i
->second
);
4540 if(g_logCommonErrors
)
4541 g_log
<<Logger::Warning
<<"Timeout from remote TCP client "<< conn
->d_remote
.toStringWithPort() <<endl
;
4542 t_fdm
->removeReadFD(i
->first
);
4548 if(threadInfo
.isHandler
) {
4549 if(statsWanted
|| (g_statisticsInterval
> 0 && (g_now
.tv_sec
- last_stat
) >= g_statisticsInterval
)) {
4551 last_stat
= g_now
.tv_sec
;
4554 Utility::gettimeofday(&g_now
, 0);
4556 if((g_now
.tv_sec
- last_carbon
) >= carbonInterval
) {
4557 MT
->makeThread(doCarbonDump
, 0);
4558 last_carbon
= g_now
.tv_sec
;
4561 if (t_pdl
!= nullptr) {
4562 // lua-dns-script directive is present, call the maintenance callback if needed
4563 /* remember that the listener threads handle TCP queries */
4564 if (threadInfo
.isWorker
|| threadInfo
.isListener
) {
4565 // Only on threads processing queries
4566 if(g_now
.tv_sec
- last_lua_maintenance
>= luaMaintenanceInterval
) {
4567 t_pdl
->maintenance();
4568 last_lua_maintenance
= g_now
.tv_sec
;
4574 // 'run' updates g_now for us
4576 if(threadInfo
.isListener
) {
4578 if(TCPConnection::getCurrentConnections() > maxTcpClients
) { // shutdown, too many connections
4579 for(const auto fd
: threadInfo
.tcpSockets
) {
4580 t_fdm
->removeReadFD(fd
);
4586 if(TCPConnection::getCurrentConnections() <= maxTcpClients
) { // reenable
4587 for(const auto fd
: threadInfo
.tcpSockets
) {
4588 t_fdm
->addReadFD(fd
, handleNewTCPQuestion
);
4599 catch(PDNSException
&ae
) {
4600 g_log
<<Logger::Error
<<"Exception: "<<ae
.reason
<<endl
;
4603 catch(std::exception
&e
) {
4604 g_log
<<Logger::Error
<<"STL Exception: "<<e
.what()<<endl
;
4608 g_log
<<Logger::Error
<<"any other exception in main: "<<endl
;
4613 int main(int argc
, char **argv
)
4617 g_stats
.startupTime
=time(0);
4619 versionSetProduct(ProductRecursor
);
4623 int ret
= EXIT_SUCCESS
;
4626 ::arg().set("stack-size","stack size per mthread")="200000";
4627 ::arg().set("soa-minimum-ttl","Don't change")="0";
4628 ::arg().set("no-shuffle","Don't change")="off";
4629 ::arg().set("local-port","port to listen on")="53";
4630 ::arg().set("local-address","IP addresses to listen on, separated by spaces or commas. Also accepts ports.")="127.0.0.1";
4631 ::arg().setSwitch("non-local-bind", "Enable binding to non-local addresses by using FREEBIND / BINDANY socket options")="no";
4632 ::arg().set("trace","if we should output heaps of logging. set to 'fail' to only log failing domains")="off";
4633 ::arg().set("dnssec", "DNSSEC mode: off/process-no-validate (default)/process/log-fail/validate")="process-no-validate";
4634 ::arg().set("dnssec-log-bogus", "Log DNSSEC bogus validations")="no";
4635 ::arg().set("signature-inception-skew", "Allow the signature inception to be off by this number of seconds")="60";
4636 ::arg().set("daemon","Operate as a daemon")="no";
4637 ::arg().setSwitch("write-pid","Write a PID file")="yes";
4638 ::arg().set("loglevel","Amount of logging. Higher is more. Do not set below 3")="6";
4639 ::arg().set("disable-syslog","Disable logging to syslog, useful when running inside a supervisor that logs stdout")="no";
4640 ::arg().set("log-timestamp","Print timestamps in log lines, useful to disable when running with a tool that timestamps stdout already")="yes";
4641 ::arg().set("log-common-errors","If we should log rather common errors")="no";
4642 ::arg().set("chroot","switch to chroot jail")="";
4643 ::arg().set("setgid","If set, change group id to this gid for more security"
4645 #define SYSTEMD_SETID_MSG ". When running inside systemd, use the User and Group settings in the unit-file!"
4649 ::arg().set("setuid","If set, change user id to this uid for more security"
4654 ::arg().set("network-timeout", "Wait this number of milliseconds for network i/o")="1500";
4655 ::arg().set("threads", "Launch this number of threads")="2";
4656 ::arg().set("distributor-threads", "Launch this number of distributor threads, distributing queries to other threads")="0";
4657 ::arg().set("processes", "Launch this number of processes (EXPERIMENTAL, DO NOT CHANGE)")="1"; // if we un-experimental this, need to fix openssl rand seeding for multiple PIDs!
4658 ::arg().set("config-name","Name of this virtual configuration - will rename the binary image")="";
4659 ::arg().set("api-config-dir", "Directory where REST API stores config and zones") = "";
4660 ::arg().set("api-key", "Static pre-shared authentication key for access to the REST API") = "";
4661 ::arg().setSwitch("webserver", "Start a webserver (for REST API)") = "no";
4662 ::arg().set("webserver-address", "IP Address of webserver to listen on") = "127.0.0.1";
4663 ::arg().set("webserver-port", "Port of webserver to listen on") = "8082";
4664 ::arg().set("webserver-password", "Password required for accessing the webserver") = "";
4665 ::arg().set("webserver-allow-from","Webserver access is only allowed from these subnets")="127.0.0.1,::1";
4666 ::arg().set("webserver-loglevel", "Amount of logging in the webserver (none, normal, detailed)") = "normal";
4667 ::arg().set("carbon-ourname", "If set, overrides our reported hostname for carbon stats")="";
4668 ::arg().set("carbon-server", "If set, send metrics in carbon (graphite) format to this server IP address")="";
4669 ::arg().set("carbon-interval", "Number of seconds between carbon (graphite) updates")="30";
4670 ::arg().set("carbon-namespace", "If set overwrites the first part of the carbon string")="pdns";
4671 ::arg().set("carbon-instance", "If set overwrites the the instance name default")="recursor";
4673 ::arg().set("statistics-interval", "Number of seconds between printing of recursor statistics, 0 to disable")="1800";
4674 ::arg().set("quiet","Suppress logging of questions and answers")="";
4675 ::arg().set("logging-facility","Facility to log messages as. 0 corresponds to local0")="";
4676 ::arg().set("config-dir","Location of configuration directory (recursor.conf)")=SYSCONFDIR
;
4677 ::arg().set("socket-owner","Owner of socket")="";
4678 ::arg().set("socket-group","Group of socket")="";
4679 ::arg().set("socket-mode", "Permissions for socket")="";
4681 ::arg().set("socket-dir",string("Where the controlsocket will live, ")+LOCALSTATEDIR
+"/pdns-recursor when unset and not chrooted" )="";
4682 ::arg().set("delegation-only","Which domains we only accept delegations from")="";
4683 ::arg().set("query-local-address","Source IP address for sending queries")="0.0.0.0";
4684 ::arg().set("query-local-address6","Source IPv6 address for sending queries. IF UNSET, IPv6 WILL NOT BE USED FOR OUTGOING QUERIES")="";
4685 ::arg().set("client-tcp-timeout","Timeout in seconds when talking to TCP clients")="2";
4686 ::arg().set("max-mthreads", "Maximum number of simultaneous Mtasker threads")="2048";
4687 ::arg().set("max-tcp-clients","Maximum number of simultaneous TCP clients")="128";
4688 ::arg().set("max-concurrent-requests-per-tcp-connection", "Maximum number of requests handled concurrently per TCP connection") = "10";
4689 ::arg().set("server-down-max-fails","Maximum number of consecutive timeouts (and unreachables) to mark a server as down ( 0 => disabled )")="64";
4690 ::arg().set("server-down-throttle-time","Number of seconds to throttle all queries to a server after being marked as down")="60";
4691 ::arg().set("dont-throttle-names", "Do not throttle nameservers with this name or suffix")="";
4692 ::arg().set("dont-throttle-netmasks", "Do not throttle nameservers with this IP netmask")="";
4693 ::arg().set("hint-file", "If set, load root hints from this file")="";
4694 ::arg().set("max-cache-entries", "If set, maximum number of entries in the main cache")="1000000";
4695 ::arg().set("max-negative-ttl", "maximum number of seconds to keep a negative cached entry in memory")="3600";
4696 ::arg().set("max-cache-bogus-ttl", "maximum number of seconds to keep a Bogus (positive or negative) cached entry in memory")="3600";
4697 ::arg().set("max-cache-ttl", "maximum number of seconds to keep a cached entry in memory")="86400";
4698 ::arg().set("packetcache-ttl", "maximum number of seconds to keep a cached entry in packetcache")="3600";
4699 ::arg().set("max-packetcache-entries", "maximum number of entries to keep in the packetcache")="500000";
4700 ::arg().set("packetcache-servfail-ttl", "maximum number of seconds to keep a cached servfail entry in packetcache")="60";
4701 ::arg().set("server-id", "Returned when queried for 'id.server' TXT or NSID, defaults to hostname, set custom or 'disabled'")="";
4702 ::arg().set("stats-ringbuffer-entries", "maximum number of packets to store statistics for")="10000";
4703 ::arg().set("version-string", "string reported on version.pdns or version.bind")=fullVersionString();
4704 ::arg().set("allow-from", "If set, only allow these comma separated netmasks to recurse")=LOCAL_NETS
;
4705 ::arg().set("allow-from-file", "If set, load allowed netmasks from this file")="";
4706 ::arg().set("entropy-source", "If set, read entropy from this file")="/dev/urandom";
4707 ::arg().set("dont-query", "If set, do not query these netmasks for DNS data")=DONT_QUERY
;
4708 ::arg().set("max-tcp-per-client", "If set, maximum number of TCP sessions per client (IP address)")="0";
4709 ::arg().set("max-tcp-queries-per-connection", "If set, maximum number of TCP queries in a TCP connection")="0";
4710 ::arg().set("spoof-nearmiss-max", "If non-zero, assume spoofing after this many near misses")="20";
4711 ::arg().set("single-socket", "If set, only use a single socket for outgoing queries")="off";
4712 ::arg().set("auth-zones", "Zones for which we have authoritative data, comma separated domain=file pairs ")="";
4713 ::arg().set("lua-config-file", "More powerful configuration options")="";
4714 ::arg().setSwitch("allow-trust-anchor-query", "Allow queries for trustanchor.server CH TXT and negativetrustanchor.server CH TXT")="no";
4716 ::arg().set("forward-zones", "Zones for which we forward queries, comma separated domain=ip pairs")="";
4717 ::arg().set("forward-zones-recurse", "Zones for which we forward queries with recursion bit, comma separated domain=ip pairs")="";
4718 ::arg().set("forward-zones-file", "File with (+)domain=ip pairs for forwarding")="";
4719 ::arg().set("export-etc-hosts", "If we should serve up contents from /etc/hosts")="off";
4720 ::arg().set("export-etc-hosts-search-suffix", "Also serve up the contents of /etc/hosts with this suffix")="";
4721 ::arg().set("etc-hosts-file", "Path to 'hosts' file")="/etc/hosts";
4722 ::arg().set("serve-rfc1918", "If we should be authoritative for RFC 1918 private IP space")="yes";
4723 ::arg().set("lua-dns-script", "Filename containing an optional 'lua' script that will be used to modify dns answers")="";
4724 ::arg().set("lua-maintenance-interval", "Number of seconds between calls to the lua user defined maintenance() function")="1";
4725 ::arg().set("latency-statistic-size","Number of latency values to calculate the qa-latency average")="10000";
4726 ::arg().setSwitch( "disable-packetcache", "Disable packetcache" )= "no";
4727 ::arg().set("ecs-ipv4-bits", "Number of bits of IPv4 address to pass for EDNS Client Subnet")="24";
4728 ::arg().set("ecs-ipv4-cache-bits", "Maximum number of bits of IPv4 mask to cache ECS response")="24";
4729 ::arg().set("ecs-ipv6-bits", "Number of bits of IPv6 address to pass for EDNS Client Subnet")="56";
4730 ::arg().set("ecs-ipv6-cache-bits", "Maximum number of bits of IPv6 mask to cache ECS response")="56";
4731 ::arg().set("ecs-minimum-ttl-override", "Set under adverse conditions, a minimum TTL for records in ECS-specific answers")="0";
4732 ::arg().set("ecs-cache-limit-ttl", "Minimum TTL to cache ECS response")="0";
4733 ::arg().set("edns-subnet-whitelist", "List of netmasks and domains that we should enable EDNS subnet for")="";
4734 ::arg().set("ecs-add-for", "List of client netmasks for which EDNS Client Subnet will be added")="0.0.0.0/0, ::/0, " LOCAL_NETS_INVERSE
;
4735 ::arg().set("ecs-scope-zero-address", "Address to send to whitelisted authoritative servers for incoming queries with ECS prefix-length source of 0")="";
4736 ::arg().setSwitch( "use-incoming-edns-subnet", "Pass along received EDNS Client Subnet information")="no";
4737 ::arg().setSwitch( "pdns-distributes-queries", "If PowerDNS itself should distribute queries over threads")="yes";
4738 ::arg().setSwitch( "root-nx-trust", "If set, believe that an NXDOMAIN from the root means the TLD does not exist")="yes";
4739 ::arg().setSwitch( "any-to-tcp","Answer ANY queries with tc=1, shunting to TCP" )="no";
4740 ::arg().setSwitch( "lowercase-outgoing","Force outgoing questions to lowercase")="no";
4741 ::arg().setSwitch("gettag-needs-edns-options", "If EDNS Options should be extracted before calling the gettag() hook")="no";
4742 ::arg().set("udp-truncation-threshold", "Maximum UDP response size before we truncate")="1232";
4743 ::arg().set("edns-outgoing-bufsize", "Outgoing EDNS buffer size")="1232";
4744 ::arg().set("minimum-ttl-override", "Set under adverse conditions, a minimum TTL")="0";
4745 ::arg().set("max-qperq", "Maximum outgoing queries per query")="60";
4746 ::arg().set("max-total-msec", "Maximum total wall-clock time per query in milliseconds, 0 for unlimited")="7000";
4747 ::arg().set("max-recursion-depth", "Maximum number of internal recursion calls per query, 0 for unlimited")="40";
4748 ::arg().set("max-udp-queries-per-round", "Maximum number of UDP queries processed per recvmsg() round, before returning back to normal processing")="10000";
4749 ::arg().set("protobuf-use-kernel-timestamp", "Compute the latency of queries in protobuf messages by using the timestamp set by the kernel when the query was received (when available)")="";
4750 ::arg().set("distribution-pipe-buffer-size", "Size in bytes of the internal buffer of the pipe used by the distributor to pass incoming queries to a worker thread")="0";
4752 ::arg().set("include-dir","Include *.conf files from this directory")="";
4753 ::arg().set("security-poll-suffix","Domain name from which to query security update notifications")="secpoll.powerdns.com.";
4755 ::arg().setSwitch("reuseport","Enable SO_REUSEPORT allowing multiple recursors processes to listen to 1 address")="no";
4757 ::arg().setSwitch("snmp-agent", "If set, register as an SNMP agent")="no";
4758 ::arg().set("snmp-master-socket", "If set and snmp-agent is set, the socket to use to register to the SNMP master")="";
4760 std::string defaultBlacklistedStats
= "cache-bytes, packetcache-bytes, special-memory-usage";
4761 for (size_t idx
= 0; idx
< 32; idx
++) {
4762 defaultBlacklistedStats
+= ", ecs-v4-response-bits-" + std::to_string(idx
+ 1);
4764 for (size_t idx
= 0; idx
< 128; idx
++) {
4765 defaultBlacklistedStats
+= ", ecs-v6-response-bits-" + std::to_string(idx
+ 1);
4767 ::arg().set("stats-api-blacklist", "List of statistics that are disabled when retrieving the complete list of statistics via the API")=defaultBlacklistedStats
;
4768 ::arg().set("stats-carbon-blacklist", "List of statistics that are prevented from being exported via Carbon")=defaultBlacklistedStats
;
4769 ::arg().set("stats-rec-control-blacklist", "List of statistics that are prevented from being exported via rec_control get-all")=defaultBlacklistedStats
;
4770 ::arg().set("stats-snmp-blacklist", "List of statistics that are prevented from being exported via SNMP")=defaultBlacklistedStats
;
4772 ::arg().set("tcp-fast-open", "Enable TCP Fast Open support on the listening sockets, using the supplied numerical value as the queue size")="0";
4773 ::arg().set("nsec3-max-iterations", "Maximum number of iterations allowed for an NSEC3 record")="2500";
4775 ::arg().set("cpu-map", "Thread to CPU mapping, space separated thread-id=cpu1,cpu2..cpuN pairs")="";
4777 ::arg().setSwitch("log-rpz-changes", "Log additions and removals to RPZ zones at Info level")="no";
4779 ::arg().set("xpf-allow-from","XPF information is only processed from these subnets")="";
4780 ::arg().set("xpf-rr-code","XPF option code to use")="0";
4782 ::arg().set("udp-source-port-min", "Minimum UDP port to bind on")="1024";
4783 ::arg().set("udp-source-port-max", "Maximum UDP port to bind on")="65535";
4784 ::arg().set("udp-source-port-avoid", "List of comma separated UDP port number to avoid")="11211";
4785 ::arg().set("rng", "Specify random number generator to use. Valid values are auto,sodium,openssl,getrandom,arc4random,urandom.")="auto";
4786 ::arg().set("public-suffix-list-file", "Path to the Public Suffix List file, if any")="";
4787 ::arg().set("distribution-load-factor", "The load factor used when PowerDNS is distributing queries to worker threads")="0.0";
4789 ::arg().setSwitch("qname-minimization", "Use Query Name Minimization")="yes";
4790 ::arg().setSwitch("nothing-below-nxdomain", "When an NXDOMAIN exists in cache for a name with fewer labels than the qname, send NXDOMAIN without doing a lookup (see RFC 8020)")="dnssec";
4791 ::arg().set("max-generate-steps", "Maximum number of $GENERATE steps when loading a zone from a file")="0";
4794 ::arg().set("new-domain-tracking", "Track newly observed domains (i.e. never seen before).")="no";
4795 ::arg().set("new-domain-log", "Log newly observed domains.")="yes";
4796 ::arg().set("new-domain-lookup", "Perform a DNS lookup newly observed domains as a subdomain of the configured domain")="";
4797 ::arg().set("new-domain-history-dir", "Persist new domain tracking data here to persist between restarts")=string(NODCACHEDIR
)+"/nod";
4798 ::arg().set("new-domain-whitelist", "List of domains (and implicitly all subdomains) which will never be considered a new domain")="";
4799 ::arg().set("new-domain-db-size", "Size of the DB used to track new domains in terms of number of cells. Defaults to 67108864")="67108864";
4800 ::arg().set("new-domain-pb-tag", "If protobuf is configured, the tag to use for messages containing newly observed domains. Defaults to 'pdns-nod'")="pdns-nod";
4801 ::arg().set("unique-response-tracking", "Track unique responses (tuple of query name, type and RR).")="no";
4802 ::arg().set("unique-response-log", "Log unique responses")="yes";
4803 ::arg().set("unique-response-history-dir", "Persist unique response tracking data here to persist between restarts")=string(NODCACHEDIR
)+"/udr";
4804 ::arg().set("unique-response-db-size", "Size of the DB used to track unique responses in terms of number of cells. Defaults to 67108864")="67108864";
4805 ::arg().set("unique-response-pb-tag", "If protobuf is configured, the tag to use for messages containing unique DNS responses. Defaults to 'pdns-udr'")="pdns-udr";
4806 #endif /* NOD_ENABLED */
4807 ::arg().setCmd("help","Provide a helpful message");
4808 ::arg().setCmd("version","Print version string");
4809 ::arg().setCmd("config","Output blank configuration");
4810 ::arg().setDefaults();
4811 g_log
.toConsole(Logger::Info
);
4812 ::arg().laxParse(argc
,argv
); // do a lax parse
4814 string configname
=::arg()["config-dir"]+"/recursor.conf";
4815 if(::arg()["config-name"]!="") {
4816 configname
=::arg()["config-dir"]+"/recursor-"+::arg()["config-name"]+".conf";
4817 s_programname
+="-"+::arg()["config-name"];
4819 cleanSlashes(configname
);
4821 if(!::arg().getCommands().empty()) {
4822 cerr
<<"Fatal: non-option";
4823 if (::arg().getCommands().size() > 1) {
4828 for (auto const c
: ::arg().getCommands()) {
4835 cerr
<<") on the command line, perhaps a '--setting=123' statement missed the '='?"<<endl
;
4839 if(::arg().mustDo("config")) {
4840 cout
<<::arg().configstring(false, true);
4844 if(!::arg().file(configname
.c_str()))
4845 g_log
<<Logger::Warning
<<"Unable to parse configuration file '"<<configname
<<"'"<<endl
;
4847 ::arg().parse(argc
,argv
);
4849 if( !::arg()["chroot"].empty() && !::arg()["api-config-dir"].empty() ) {
4850 g_log
<<Logger::Error
<<"Using chroot and enabling the API is not possible"<<endl
;
4854 if (::arg()["socket-dir"].empty()) {
4855 if (::arg()["chroot"].empty())
4856 ::arg().set("socket-dir") = std::string(LOCALSTATEDIR
) + "/pdns-recursor";
4858 ::arg().set("socket-dir") = "/";
4861 ::arg().set("delegation-only")=toLower(::arg()["delegation-only"]);
4863 if(::arg().asNum("threads")==1) {
4864 if (::arg().mustDo("pdns-distributes-queries")) {
4865 g_log
<<Logger::Warning
<<"Only one thread, no need to distribute queries ourselves"<<endl
;
4866 ::arg().set("pdns-distributes-queries")="no";
4870 if(::arg().mustDo("pdns-distributes-queries") && ::arg().asNum("distributor-threads") <= 0) {
4871 g_log
<<Logger::Warning
<<"Asked to run with pdns-distributes-queries set but no distributor threads, raising to 1"<<endl
;
4872 ::arg().set("distributor-threads")="1";
4875 if (!::arg().mustDo("pdns-distributes-queries")) {
4876 ::arg().set("distributor-threads")="0";
4879 if(::arg().mustDo("help")) {
4880 cout
<<"syntax:"<<endl
<<endl
;
4881 cout
<<::arg().helpstring(::arg()["help"])<<endl
;
4884 if(::arg().mustDo("version")) {
4885 showProductVersion();
4886 showBuildConfiguration();
4890 Logger::Urgency logUrgency
= (Logger::Urgency
)::arg().asNum("loglevel");
4892 if (logUrgency
< Logger::Error
)
4893 logUrgency
= Logger::Error
;
4894 if(!g_quiet
&& logUrgency
< Logger::Info
) { // Logger::Info=6, Logger::Debug=7
4895 logUrgency
= Logger::Info
; // if you do --quiet=no, you need Info to also see the query log
4897 g_log
.setLoglevel(logUrgency
);
4898 g_log
.toConsole(logUrgency
);
4900 serviceMain(argc
, argv
);
4902 catch(PDNSException
&ae
) {
4903 g_log
<<Logger::Error
<<"Exception: "<<ae
.reason
<<endl
;
4906 catch(std::exception
&e
) {
4907 g_log
<<Logger::Error
<<"STL Exception: "<<e
.what()<<endl
;
4911 g_log
<<Logger::Error
<<"any other exception in main: "<<endl
;