2 * This file is part of PowerDNS or dnsdist.
3 * Copyright -- PowerDNS.COM B.V. and its contributors
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of version 2 of the GNU General Public License as
7 * published by the Free Software Foundation.
9 * In addition, for the avoidance of any doubt, permission is granted to
10 * link this program with OpenSSL and to (re)distribute the binaries
11 * produced as the result of such linking.
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
29 #ifdef HAVE_BOOST_CONTAINER_FLAT_SET_HPP
30 #include <boost/container/flat_set.hpp>
32 #include "ws-recursor.hh"
34 #include "threadname.hh"
35 #include "recpacketcache.hh"
37 #include "dns_random.hh"
41 #include "opensslsigners.hh"
44 #include <boost/static_assert.hpp>
47 #include "recursor_cache.hh"
48 #include "cachecleaner.hh"
55 #include "arguments.hh"
59 #include "sortlist.hh"
61 #include <boost/tuple/tuple.hpp>
62 #include <boost/tuple/tuple_comparison.hpp>
63 #include <boost/shared_array.hpp>
64 #include <boost/function.hpp>
65 #include <boost/algorithm/string.hpp>
67 #include "malloctrace.hh"
69 #include <netinet/tcp.h>
70 #include "capabilities.hh"
71 #include "dnsparser.hh"
72 #include "dnswriter.hh"
73 #include "dnsrecords.hh"
74 #include "zoneparser-tng.hh"
75 #include "rec_channel.hh"
80 #include "lua-recursor4.hh"
82 #include "responsestats.hh"
83 #include "secpoll-recursor.hh"
85 #include "filterpo.hh"
86 #include "rpzloader.hh"
87 #include "validate-recursor.hh"
88 #include "rec-lua-conf.hh"
89 #include "ednsoptions.hh"
91 #include "proxy-protocol.hh"
92 #include "pubsuffix.hh"
96 #endif /* NOD_ENABLED */
98 #include "rec-protobuf.hh"
99 #include "rec-snmp.hh"
102 #include <systemd/sd-daemon.h>
105 #include "namespaces.hh"
108 #include "uuid-utils.hh"
109 #endif /* HAVE_PROTOBUF */
113 typedef map
<ComboAddress
, uint32_t, ComboAddress::addressOnlyLessThan
> tcpClientCounts_t
;
115 static thread_local
std::shared_ptr
<RecursorLua4
> t_pdl
;
116 static thread_local
unsigned int t_id
= 0;
117 static thread_local
std::shared_ptr
<Regex
> t_traceRegex
;
118 static thread_local
std::unique_ptr
<tcpClientCounts_t
> t_tcpClientCounts
;
120 static thread_local
std::shared_ptr
<std::vector
<std::unique_ptr
<RemoteLogger
>>> t_protobufServers
{nullptr};
121 static thread_local
uint64_t t_protobufServersGeneration
;
122 static thread_local
std::shared_ptr
<std::vector
<std::unique_ptr
<RemoteLogger
>>> t_outgoingProtobufServers
{nullptr};
123 static thread_local
uint64_t t_outgoingProtobufServersGeneration
;
124 #endif /* HAVE_PROTOBUF */
127 static thread_local
std::shared_ptr
<std::vector
<std::unique_ptr
<FrameStreamLogger
>>> t_frameStreamServers
{nullptr};
128 static thread_local
uint64_t t_frameStreamServersGeneration
;
129 #endif /* HAVE_FSTRM */
131 thread_local
std::unique_ptr
<MT_t
> MT
; // the big MTasker
132 std::unique_ptr
<MemRecursorCache
> s_RC
;
135 thread_local
std::unique_ptr
<RecursorPacketCache
> t_packetCache
;
136 thread_local FDMultiplexer
* t_fdm
{nullptr};
137 thread_local
std::unique_ptr
<addrringbuf_t
> t_remotes
, t_servfailremotes
, t_largeanswerremotes
, t_bogusremotes
;
138 thread_local
std::unique_ptr
<boost::circular_buffer
<pair
<DNSName
, uint16_t> > > t_queryring
, t_servfailqueryring
, t_bogusqueryring
;
139 thread_local
std::shared_ptr
<NetmaskGroup
> t_allowFrom
;
141 thread_local
std::shared_ptr
<nod::NODDB
> t_nodDBp
;
142 thread_local
std::shared_ptr
<nod::UniqueResponseDB
> t_udrDBp
;
143 #endif /* NOD_ENABLED */
144 __thread
struct timeval g_now
; // timestamp, updated (too) frequently
146 typedef vector
<pair
<int, function
< void(int, any
&) > > > deferredAdd_t
;
148 // for communicating with our threads
149 // effectively readonly after startup
154 int writeToThread
{-1};
155 int readToThread
{-1};
156 int writeFromThread
{-1};
157 int readFromThread
{-1};
158 int writeQueriesToThread
{-1}; // this one is non-blocking
159 int readQueriesToThread
{-1};
162 /* FD corresponding to TCP sockets this thread is listening
164 These FDs are also in deferredAdds when we have one
165 socket per listener, and in g_deferredAdds instead. */
166 std::set
<int> tcpSockets
;
167 /* FD corresponding to listening sockets if we have one socket per
168 listener (with reuseport), otherwise all listeners share the
169 same FD and g_deferredAdds is then used instead */
170 deferredAdd_t deferredAdds
;
171 struct ThreadPipeSet pipes
;
174 uint64_t numberOfDistributedQueries
{0};
175 /* handle the web server, carbon, statistics and the control channel */
176 bool isHandler
{false};
177 /* accept incoming queries (and distributes them to the workers if pdns-distributes-queries is set) */
178 bool isListener
{false};
179 /* process queries */
180 bool isWorker
{false};
183 /* first we have the handler thread, t_id == 0 (some other
184 helper threads like SNMP might have t_id == 0 as well)
185 then the distributor threads if any
186 and finally the workers */
187 static std::vector
<RecThreadInfo
> s_threadInfos
;
188 /* without reuseport, all listeners share the same sockets */
189 static deferredAdd_t g_deferredAdds
;
191 typedef vector
<int> tcpListenSockets_t
;
192 typedef map
<int, ComboAddress
> listenSocketsAddresses_t
; // is shared across all threads right now
194 static const ComboAddress
g_local4("0.0.0.0"), g_local6("::");
195 static listenSocketsAddresses_t g_listenSocketsAddresses
; // is shared across all threads right now
196 static set
<int> g_fromtosockets
; // listen sockets that use 'sendfromto()' mechanism
197 static vector
<ComboAddress
> g_localQueryAddresses4
, g_localQueryAddresses6
;
198 static AtomicCounter counter
;
199 static std::shared_ptr
<SyncRes::domainmap_t
> g_initialDomainMap
; // new threads needs this to be setup
200 static std::shared_ptr
<NetmaskGroup
> g_initialAllowFrom
; // new thread needs to be setup with this
201 static NetmaskGroup g_XPFAcl
;
202 static NetmaskGroup g_proxyProtocolACL
;
203 static boost::optional
<ComboAddress
> g_dns64Prefix
{boost::none
};
204 static DNSName g_dns64PrefixReverse
;
205 static size_t g_proxyProtocolMaximumSize
;
206 static size_t g_tcpMaxQueriesPerConn
;
207 static size_t s_maxUDPQueriesPerRound
;
208 static uint64_t g_latencyStatSize
;
209 static uint32_t g_disthashseed
;
210 static unsigned int g_maxTCPPerClient
;
211 static unsigned int g_maxMThreads
;
212 static unsigned int g_numDistributorThreads
;
213 static unsigned int g_numWorkerThreads
;
214 static int g_tcpTimeout
;
215 static uint16_t g_udpTruncationThreshold
;
216 static uint16_t g_xpfRRCode
{0};
217 static std::atomic
<bool> statsWanted
;
218 static std::atomic
<bool> g_quiet
;
219 static bool g_logCommonErrors
;
220 static bool g_anyToTcp
;
221 static bool g_weDistributeQueries
; // if true, 1 or more threads listen on the incoming query sockets and distribute them to workers
222 static bool g_reusePort
{false};
223 static bool g_gettagNeedsEDNSOptions
{false};
224 static time_t g_statisticsInterval
;
225 static bool g_useIncomingECS
;
226 static bool g_useKernelTimestamp
;
227 std::atomic
<uint32_t> g_maxCacheEntries
, g_maxPacketCacheEntries
;
229 static bool g_nodEnabled
;
230 static DNSName g_nodLookupDomain
;
231 static bool g_nodLog
;
232 static SuffixMatchNode g_nodDomainWL
;
233 static std::string g_nod_pbtag
;
234 static bool g_udrEnabled
;
235 static bool g_udrLog
;
236 static std::string g_udr_pbtag
;
237 #endif /* NOD_ENABLED */
238 #ifdef HAVE_BOOST_CONTAINER_FLAT_SET_HPP
239 static boost::container::flat_set
<uint16_t> s_avoidUdpSourcePorts
;
241 static std::set
<uint16_t> s_avoidUdpSourcePorts
;
243 static uint16_t s_minUdpSourcePort
;
244 static uint16_t s_maxUdpSourcePort
;
245 static double s_balancingFactor
;
247 RecursorControlChannel s_rcc
; // only active in the handler thread
248 RecursorStats g_stats
;
249 string s_programname
="pdns_recursor";
251 bool g_lowercaseOutgoing
;
252 unsigned int g_networkTimeoutMsec
;
253 unsigned int g_numThreads
;
254 uint16_t g_outgoingEDNSBufsize
;
255 bool g_logRPZChanges
{false};
257 // Used in the Syncres to not throttle certain servers
258 GlobalStateHolder
<SuffixMatchNode
> g_dontThrottleNames
;
259 GlobalStateHolder
<NetmaskGroup
> g_dontThrottleNetmasks
;
261 #define LOCAL_NETS "127.0.0.0/8, 10.0.0.0/8, 100.64.0.0/10, 169.254.0.0/16, 192.168.0.0/16, 172.16.0.0/12, ::1/128, fc00::/7, fe80::/10"
262 #define LOCAL_NETS_INVERSE "!127.0.0.0/8, !10.0.0.0/8, !100.64.0.0/10, !169.254.0.0/16, !192.168.0.0/16, !172.16.0.0/12, !::1/128, !fc00::/7, !fe80::/10"
263 // Bad Nets taken from both:
264 // http://www.iana.org/assignments/iana-ipv4-special-registry/iana-ipv4-special-registry.xhtml
266 // http://www.iana.org/assignments/iana-ipv6-special-registry/iana-ipv6-special-registry.xhtml
267 // where such a network may not be considered a valid destination
268 #define BAD_NETS "0.0.0.0/8, 192.0.0.0/24, 192.0.2.0/24, 198.51.100.0/24, 203.0.113.0/24, 240.0.0.0/4, ::/96, ::ffff:0:0/96, 100::/64, 2001:db8::/32"
269 #define DONT_QUERY LOCAL_NETS ", " BAD_NETS
271 //! used to send information to a newborn mthread
272 struct DNSComboWriter
{
273 DNSComboWriter(const std::string
& query
, const struct timeval
& now
): d_mdp(true, query
), d_now(now
), d_query(query
)
277 DNSComboWriter(const std::string
& query
, const struct timeval
& now
, std::unordered_set
<std::string
>&& policyTags
, LuaContext::LuaObject
&& data
, std::vector
<DNSRecord
>&& records
): d_mdp(true, query
), d_now(now
), d_query(query
), d_policyTags(std::move(policyTags
)), d_records(std::move(records
)), d_data(std::move(data
))
281 void setRemote(const ComboAddress
& sa
)
286 void setSource(const ComboAddress
& sa
)
291 void setLocal(const ComboAddress
& sa
)
296 void setDestination(const ComboAddress
& sa
)
301 void setSocket(int sock
)
306 string
getRemote() const
308 if (d_source
== d_remote
) {
309 return d_source
.toStringWithPort();
311 return d_source
.toStringWithPort() + " (proxied by " + d_remote
.toStringWithPort() + ")";
314 std::vector
<ProxyProtocolValue
> d_proxyProtocolValues
;
316 struct timeval d_now
;
317 /* Remote client, might differ from d_source
318 in case of XPF, in which case d_source holds
319 the IP of the client and d_remote of the proxy
321 ComboAddress d_remote
;
322 ComboAddress d_source
;
323 /* Destination address, might differ from
324 d_destination in case of XPF, in which case
325 d_destination holds the IP of the proxy and
326 d_local holds our own. */
327 ComboAddress d_local
;
328 ComboAddress d_destination
;
330 boost::uuids::uuid d_uuid
;
331 string d_requestorId
;
334 struct timeval d_kernelTimestamp
{0,0};
337 std::unordered_set
<std::string
> d_policyTags
;
338 std::string d_routingTag
;
339 std::vector
<DNSRecord
> d_records
;
340 LuaContext::LuaObject d_data
;
341 EDNSSubnetOpts d_ednssubnet
;
342 shared_ptr
<TCPConnection
> d_tcpConnection
;
343 boost::optional
<int> d_rcode
{boost::none
};
345 unsigned int d_tag
{0};
347 uint32_t d_ttlCap
{std::numeric_limits
<uint32_t>::max()};
348 uint16_t d_ecsBegin
{0};
349 uint16_t d_ecsEnd
{0};
350 bool d_variable
{false};
351 bool d_ecsFound
{false};
352 bool d_ecsParsed
{false};
353 bool d_followCNAMERecords
{false};
354 bool d_logResponse
{false};
360 return MT
? MT
.get() : nullptr;
365 static ArgvMap theArg
;
369 unsigned int getRecursorThreadId()
379 static bool isDistributorThread()
385 return g_weDistributeQueries
&& s_threadInfos
.at(t_id
).isListener
;
388 static bool isHandlerThread()
394 return s_threadInfos
.at(t_id
).isHandler
;
397 static void handleTCPClientWritable(int fd
, FDMultiplexer::funcparam_t
& var
);
399 // -1 is error, 0 is timeout, 1 is success
400 int asendtcp(const string
& data
, Socket
* sock
)
406 t_fdm
->addWriteFD(sock
->getHandle(), handleTCPClientWritable
, pident
);
409 int ret
=MT
->waitEvent(pident
, &packet
, g_networkTimeoutMsec
);
411 if(!ret
|| ret
==-1) { // timeout
412 t_fdm
->removeWriteFD(sock
->getHandle());
414 else if(packet
.size() !=data
.size()) { // main loop tells us what it sent out, or empty in case of an error
420 static void handleTCPClientReadable(int fd
, FDMultiplexer::funcparam_t
& var
);
422 // -1 is error, 0 is timeout, 1 is success
423 int arecvtcp(string
& data
, size_t len
, Socket
* sock
, bool incompleteOkay
)
429 pident
.inIncompleteOkay
=incompleteOkay
;
430 t_fdm
->addReadFD(sock
->getHandle(), handleTCPClientReadable
, pident
);
432 int ret
=MT
->waitEvent(pident
,&data
, g_networkTimeoutMsec
);
433 if(!ret
|| ret
==-1) { // timeout
434 t_fdm
->removeReadFD(sock
->getHandle());
436 else if(data
.empty()) {// error, EOF or other
443 static void handleGenUDPQueryResponse(int fd
, FDMultiplexer::funcparam_t
& var
)
445 PacketID pident
=*any_cast
<PacketID
>(&var
);
447 ComboAddress fromaddr
;
448 socklen_t addrlen
=sizeof(fromaddr
);
450 ssize_t ret
=recvfrom(fd
, resp
, sizeof(resp
), 0, (sockaddr
*)&fromaddr
, &addrlen
);
451 if (fromaddr
!= pident
.remote
) {
452 g_log
<<Logger::Notice
<<"Response received from the wrong remote host ("<<fromaddr
.toStringWithPort()<<" instead of "<<pident
.remote
.toStringWithPort()<<"), discarding"<<endl
;
456 t_fdm
->removeReadFD(fd
);
458 string
data(resp
, (size_t) ret
);
459 MT
->sendEvent(pident
, &data
);
463 MT
->sendEvent(pident
, &empty
);
464 // cerr<<"Had some kind of error: "<<ret<<", "<<stringerror()<<endl;
467 string
GenUDPQueryResponse(const ComboAddress
& dest
, const string
& query
)
469 Socket
s(dest
.sin4
.sin_family
, SOCK_DGRAM
);
471 ComboAddress local
= getQueryLocalAddress(dest
.sin4
.sin_family
, 0);
481 t_fdm
->addReadFD(s
.getHandle(), handleGenUDPQueryResponse
, pident
);
485 int ret
=MT
->waitEvent(pident
,&data
, g_networkTimeoutMsec
);
487 if(!ret
|| ret
==-1) { // timeout
488 t_fdm
->removeReadFD(s
.getHandle());
490 else if(data
.empty()) {// error, EOF or other
491 // we could special case this
497 //! pick a random query local address
498 ComboAddress
getQueryLocalAddress(int family
, uint16_t port
)
501 if(family
==AF_INET
) {
502 if(g_localQueryAddresses4
.empty())
505 ret
= g_localQueryAddresses4
[dns_random(g_localQueryAddresses4
.size())];
506 ret
.sin4
.sin_port
= htons(port
);
509 if(g_localQueryAddresses6
.empty())
512 ret
= g_localQueryAddresses6
[dns_random(g_localQueryAddresses6
.size())];
514 ret
.sin6
.sin6_port
= htons(port
);
519 static void handleUDPServerResponse(int fd
, FDMultiplexer::funcparam_t
&);
521 static void setSocketBuffer(int fd
, int optname
, uint32_t size
)
524 socklen_t len
=sizeof(psize
);
526 if(!getsockopt(fd
, SOL_SOCKET
, optname
, (char*)&psize
, &len
) && psize
> size
) {
527 g_log
<<Logger::Error
<<"Not decreasing socket buffer size from "<<psize
<<" to "<<size
<<endl
;
531 if (setsockopt(fd
, SOL_SOCKET
, optname
, (char*)&size
, sizeof(size
)) < 0) {
533 g_log
<< Logger::Error
<< "Unable to raise socket buffer size to " << size
<< ": " << stringerror(err
) << endl
;
538 static void setSocketReceiveBuffer(int fd
, uint32_t size
)
540 setSocketBuffer(fd
, SO_RCVBUF
, size
);
543 static void setSocketSendBuffer(int fd
, uint32_t size
)
545 setSocketBuffer(fd
, SO_SNDBUF
, size
);
549 // you can ask this class for a UDP socket to send a query from
550 // this socket is not yours, don't even think about deleting it
551 // but after you call 'returnSocket' on it, don't assume anything anymore
554 unsigned int d_numsocks
;
556 UDPClientSocks() : d_numsocks(0)
560 // returning -2 means: temporary OS error (ie, out of files), -1 means error related to remote
561 int getSocket(const ComboAddress
& toaddr
, int* fd
)
563 *fd
=makeClientSocket(toaddr
.sin4
.sin_family
);
564 if(*fd
< 0) // temporary error - receive exception otherwise
567 if(connect(*fd
, (struct sockaddr
*)(&toaddr
), toaddr
.getSocklen()) < 0) {
572 catch(const PDNSException
& e
) {
573 g_log
<<Logger::Error
<<"Error closing UDP socket after connect() failed: "<<e
.reason
<<endl
;
576 if(err
==ENETUNREACH
) // Seth "My Interfaces Are Like A Yo Yo" Arnold special
585 // return a socket to the pool, or simply erase it
586 void returnSocket(int fd
)
589 t_fdm
->removeReadFD(fd
);
591 catch(const FDMultiplexerException
& e
) {
592 // we sometimes return a socket that has not yet been assigned to t_fdm
598 catch(const PDNSException
& e
) {
599 g_log
<<Logger::Error
<<"Error closing returned UDP socket: "<<e
.reason
<<endl
;
607 // returns -1 for errors which might go away, throws for ones that won't
608 static int makeClientSocket(int family
)
610 int ret
=socket(family
, SOCK_DGRAM
, 0 ); // turns out that setting CLO_EXEC and NONBLOCK from here is not a performance win on Linux (oddly enough)
612 if(ret
< 0 && errno
==EMFILE
) // this is not a catastrophic error
616 throw PDNSException("Making a socket for resolver (family = "+std::to_string(family
)+"): "+stringerror());
618 // setCloseOnExec(ret); // we're not going to exec
625 if(tries
==1) // fall back to kernel 'random'
629 port
= s_minUdpSourcePort
+ dns_random(s_maxUdpSourcePort
- s_minUdpSourcePort
+ 1);
631 while (s_avoidUdpSourcePorts
.count(port
));
634 sin
=getQueryLocalAddress(family
, port
); // does htons for us
636 if (::bind(ret
, (struct sockaddr
*)&sin
, sin
.getSocklen()) >= 0)
642 throw PDNSException("Resolver binding to local query client socket on "+sin
.toString()+": "+stringerror());
646 setReceiveSocketErrors(ret
, family
);
658 static thread_local
std::unique_ptr
<UDPClientSocks
> t_udpclientsocks
;
660 /* these two functions are used by LWRes */
661 // -2 is OS error, -1 is error that depends on the remote, > 0 is success
662 int asendto(const char *data
, size_t len
, int flags
,
663 const ComboAddress
& toaddr
, uint16_t id
, const DNSName
& domain
, uint16_t qtype
, int* fd
)
667 pident
.domain
= domain
;
668 pident
.remote
= toaddr
;
671 // see if there is an existing outstanding request we can chain on to, using partial equivalence function
672 pair
<MT_t::waiters_t::iterator
, MT_t::waiters_t::iterator
> chain
=MT
->d_waiters
.equal_range(pident
, PacketIDBirthdayCompare());
674 for(; chain
.first
!= chain
.second
; chain
.first
++) {
675 if(chain
.first
->key
.fd
> -1) { // don't chain onto existing chained waiter!
677 cerr<<"Orig: "<<pident.domain<<", "<<pident.remote.toString()<<", id="<<id<<endl;
678 cerr<<"Had hit: "<< chain.first->key.domain<<", "<<chain.first->key.remote.toString()<<", id="<<chain.first->key.id
679 <<", count="<<chain.first->key.chain.size()<<", origfd: "<<chain.first->key.fd<<endl;
681 chain
.first
->key
.chain
.insert(id
); // we can chain
682 *fd
=-1; // gets used in waitEvent / sendEvent later on
687 int ret
=t_udpclientsocks
->getSocket(toaddr
, fd
);
694 t_fdm
->addReadFD(*fd
, handleUDPServerResponse
, pident
);
695 ret
= send(*fd
, data
, len
, 0);
700 t_udpclientsocks
->returnSocket(*fd
);
702 errno
= tmp
; // this is for logging purposes only
706 // -1 is error, 0 is timeout, 1 is success
707 int arecvfrom(std::string
& packet
, int flags
, const ComboAddress
& fromaddr
, size_t *d_len
,
708 uint16_t id
, const DNSName
& domain
, uint16_t qtype
, int fd
, struct timeval
* now
)
710 static optional
<unsigned int> nearMissLimit
;
712 nearMissLimit
=::arg().asNum("spoof-nearmiss-max");
717 pident
.domain
=domain
;
719 pident
.remote
=fromaddr
;
721 int ret
=MT
->waitEvent(pident
, &packet
, g_networkTimeoutMsec
, now
);
723 /* -1 means error, 0 means timeout, 1 means a result from handleUDPServerResponse() which might still be an error */
725 /* handleUDPServerResponse() will close the socket for us no matter what */
726 if(packet
.empty()) // means "error"
729 *d_len
=packet
.size();
731 if(*nearMissLimit
&& pident
.nearMisses
> *nearMissLimit
) {
732 g_log
<<Logger::Error
<<"Too many ("<<pident
.nearMisses
<<" > "<<*nearMissLimit
<<") bogus answers for '"<<domain
<<"' from "<<fromaddr
.toString()<<", assuming spoof attempt."<<endl
;
733 g_stats
.spoofCount
++;
738 /* getting there means error or timeout, it's up to us to close the socket */
740 t_udpclientsocks
->returnSocket(fd
);
745 static void writePid(void)
747 if(!::arg().mustDo("write-pid"))
749 ofstream
of(s_pidfname
.c_str(), std::ios_base::app
);
751 of
<< Utility::getpid() <<endl
;
754 g_log
<< Logger::Error
<< "Writing pid for " << Utility::getpid() << " to " << s_pidfname
<< " failed: "
755 << stringerror(err
) << endl
;
759 uint16_t TCPConnection::s_maxInFlight
;
761 TCPConnection::TCPConnection(int fd
, const ComboAddress
& addr
) : data(2, 0), d_remote(addr
), d_fd(fd
)
763 ++s_currentConnections
;
764 (*t_tcpClientCounts
)[d_remote
]++;
767 TCPConnection::~TCPConnection()
770 if(closesocket(d_fd
) < 0)
771 g_log
<<Logger::Error
<<"Error closing socket for TCPConnection"<<endl
;
773 catch(const PDNSException
& e
) {
774 g_log
<<Logger::Error
<<"Error closing TCPConnection socket: "<<e
.reason
<<endl
;
777 if(t_tcpClientCounts
->count(d_remote
) && !(*t_tcpClientCounts
)[d_remote
]--)
778 t_tcpClientCounts
->erase(d_remote
);
779 --s_currentConnections
;
782 AtomicCounter
TCPConnection::s_currentConnections
;
784 static void handleRunningTCPQuestion(int fd
, FDMultiplexer::funcparam_t
& var
);
786 // the idea is, only do things that depend on the *response* here. Incoming accounting is on incoming.
787 static void updateResponseStats(int res
, const ComboAddress
& remote
, unsigned int packetsize
, const DNSName
* query
, uint16_t qtype
)
789 if(packetsize
> 1000 && t_largeanswerremotes
)
790 t_largeanswerremotes
->push_back(remote
);
792 case RCode::ServFail
:
793 if(t_servfailremotes
) {
794 t_servfailremotes
->push_back(remote
);
795 if(query
&& t_servfailqueryring
) // packet cache
796 t_servfailqueryring
->push_back(make_pair(*query
, qtype
));
800 case RCode::NXDomain
:
809 static string
makeLoginfo(const std::unique_ptr
<DNSComboWriter
>& dc
)
812 return "("+dc
->d_mdp
.d_qname
.toLogString()+"/"+DNSRecordContent::NumberToType(dc
->d_mdp
.d_qtype
)+" from "+(dc
->getRemote())+")";
816 return "Exception making error message for exception";
820 static void protobufLogQuery(uint8_t maskV4
, uint8_t maskV6
, const boost::uuids::uuid
& uniqueId
, const ComboAddress
& remote
, const ComboAddress
& local
, const Netmask
& ednssubnet
, bool tcp
, uint16_t id
, size_t len
, const DNSName
& qname
, uint16_t qtype
, uint16_t qclass
, const std::unordered_set
<std::string
>& policyTags
, const std::string
& requestorId
, const std::string
& deviceId
, const std::string
& deviceName
)
822 if (!t_protobufServers
) {
826 Netmask
requestorNM(remote
, remote
.sin4
.sin_family
== AF_INET
? maskV4
: maskV6
);
827 ComboAddress requestor
= requestorNM
.getMaskedNetwork();
828 requestor
.setPort(remote
.getPort());
829 RecProtoBufMessage
message(DNSProtoBufMessage::Query
, uniqueId
, &requestor
, &local
, qname
, qtype
, qclass
, id
, tcp
, len
);
830 message
.setServerIdentity(SyncRes::s_serverID
);
831 message
.setEDNSSubnet(ednssubnet
, ednssubnet
.isIPv4() ? maskV4
: maskV6
);
832 message
.setRequestorId(requestorId
);
833 message
.setDeviceId(deviceId
);
834 message
.setDeviceName(deviceName
);
836 if (!policyTags
.empty()) {
837 message
.setPolicyTags(policyTags
);
840 // cerr <<message.toDebugString()<<endl;
842 message
.serialize(str
);
844 for (auto& server
: *t_protobufServers
) {
845 server
->queueData(str
);
849 static void protobufLogResponse(const RecProtoBufMessage
& message
)
851 if (!t_protobufServers
) {
855 // cerr <<message.toDebugString()<<endl;
857 message
.serialize(str
);
859 for (auto& server
: *t_protobufServers
) {
860 server
->queueData(str
);
866 * Chases the CNAME provided by the PolicyCustom RPZ policy.
868 * @param spoofed: The DNSRecord that was created by the policy, should already be added to ret
869 * @param qtype: The QType of the original query
870 * @param sr: A SyncRes
871 * @param res: An integer that will contain the RCODE of the lookup we do
872 * @param ret: A vector of DNSRecords where the result of the CNAME chase should be appended to
874 static void handleRPZCustom(const DNSRecord
& spoofed
, const QType
& qtype
, SyncRes
& sr
, int& res
, vector
<DNSRecord
>& ret
)
876 if (spoofed
.d_type
== QType::CNAME
) {
877 bool oldWantsRPZ
= sr
.getWantsRPZ();
878 sr
.setWantsRPZ(false);
879 vector
<DNSRecord
> ans
;
880 res
= sr
.beginResolve(DNSName(spoofed
.d_content
->getZoneRepresentation()), qtype
, QClass::IN
, ans
);
881 for (const auto& rec
: ans
) {
882 if(rec
.d_place
== DNSResourceRecord::ANSWER
) {
886 // Reset the RPZ state of the SyncRes
887 sr
.setWantsRPZ(oldWantsRPZ
);
891 static bool addRecordToPacket(DNSPacketWriter
& pw
, const DNSRecord
& rec
, uint32_t& minTTL
, uint32_t ttlCap
, const uint16_t maxAnswerSize
)
893 pw
.startRecord(rec
.d_name
, rec
.d_type
, (rec
.d_ttl
> ttlCap
? ttlCap
: rec
.d_ttl
), rec
.d_class
, rec
.d_place
);
895 if(rec
.d_type
!= QType::OPT
) // their TTL ain't real
896 minTTL
= min(minTTL
, rec
.d_ttl
);
898 rec
.d_content
->toPacket(pw
);
899 if(pw
.size() > static_cast<size_t>(maxAnswerSize
)) {
901 if(rec
.d_place
!= DNSResourceRecord::ADDITIONAL
) {
902 pw
.getHeader()->tc
=1;
912 static std::shared_ptr
<std::vector
<std::unique_ptr
<RemoteLogger
>>> startProtobufServers(const ProtobufExportConfig
& config
)
914 auto result
= std::make_shared
<std::vector
<std::unique_ptr
<RemoteLogger
>>>();
916 for (const auto& server
: config
.servers
) {
918 auto logger
= make_unique
<RemoteLogger
>(server
, config
.timeout
, 100*config
.maxQueuedEntries
, config
.reconnectWaitTime
, config
.asyncConnect
);
919 logger
->setLogQueries(config
.logQueries
);
920 logger
->setLogResponses(config
.logResponses
);
921 result
->emplace_back(std::move(logger
));
923 catch(const std::exception
& e
) {
924 g_log
<<Logger::Error
<<"Error while starting protobuf logger to '"<<server
<<": "<<e
.what()<<endl
;
926 catch(const PDNSException
& e
) {
927 g_log
<<Logger::Error
<<"Error while starting protobuf logger to '"<<server
<<": "<<e
.reason
<<endl
;
934 static bool checkProtobufExport(LocalStateHolder
<LuaConfigItems
>& luaconfsLocal
)
936 if (!luaconfsLocal
->protobufExportConfig
.enabled
) {
937 if (t_protobufServers
) {
938 for (auto& server
: *t_protobufServers
) {
941 t_protobufServers
.reset();
947 /* if the server was not running, or if it was running according to a
948 previous configuration */
949 if (!t_protobufServers
||
950 t_protobufServersGeneration
< luaconfsLocal
->generation
) {
952 if (t_protobufServers
) {
953 for (auto& server
: *t_protobufServers
) {
957 t_protobufServers
.reset();
959 t_protobufServers
= startProtobufServers(luaconfsLocal
->protobufExportConfig
);
960 t_protobufServersGeneration
= luaconfsLocal
->generation
;
966 static bool checkOutgoingProtobufExport(LocalStateHolder
<LuaConfigItems
>& luaconfsLocal
)
968 if (!luaconfsLocal
->outgoingProtobufExportConfig
.enabled
) {
969 if (t_outgoingProtobufServers
) {
970 for (auto& server
: *t_outgoingProtobufServers
) {
974 t_outgoingProtobufServers
.reset();
979 /* if the server was not running, or if it was running according to a
980 previous configuration */
981 if (!t_outgoingProtobufServers
||
982 t_outgoingProtobufServersGeneration
< luaconfsLocal
->generation
) {
984 if (t_outgoingProtobufServers
) {
985 for (auto& server
: *t_outgoingProtobufServers
) {
989 t_outgoingProtobufServers
.reset();
991 t_outgoingProtobufServers
= startProtobufServers(luaconfsLocal
->outgoingProtobufExportConfig
);
992 t_outgoingProtobufServersGeneration
= luaconfsLocal
->generation
;
1000 static std::shared_ptr
<std::vector
<std::unique_ptr
<FrameStreamLogger
>>> startFrameStreamServers(const FrameStreamExportConfig
& config
)
1002 auto result
= std::make_shared
<std::vector
<std::unique_ptr
<FrameStreamLogger
>>>();
1004 for (const auto& server
: config
.servers
) {
1006 std::unordered_map
<string
,unsigned> options
;
1007 options
["bufferHint"] = config
.bufferHint
;
1008 options
["flushTimeout"] = config
.flushTimeout
;
1009 options
["inputQueueSize"] = config
.inputQueueSize
;
1010 options
["outputQueueSize"] = config
.outputQueueSize
;
1011 options
["queueNotifyThreshold"] = config
.queueNotifyThreshold
;
1012 options
["reopenInterval"] = config
.reopenInterval
;
1013 FrameStreamLogger
*fsl
= nullptr;
1015 ComboAddress
address(server
);
1016 fsl
= new FrameStreamLogger(address
.sin4
.sin_family
, address
.toStringWithPort(), true, options
);
1018 catch (const PDNSException
& e
) {
1019 fsl
= new FrameStreamLogger(AF_UNIX
, server
, true, options
);
1021 fsl
->setLogQueries(config
.logQueries
);
1022 fsl
->setLogResponses(config
.logResponses
);
1023 result
->emplace_back(fsl
);
1025 catch(const std::exception
& e
) {
1026 g_log
<<Logger::Error
<<"Error while starting dnstap framestream logger to '"<<server
<<": "<<e
.what()<<endl
;
1028 catch(const PDNSException
& e
) {
1029 g_log
<<Logger::Error
<<"Error while starting dnstap framestream logger to '"<<server
<<": "<<e
.reason
<<endl
;
1036 static bool checkFrameStreamExport(LocalStateHolder
<LuaConfigItems
>& luaconfsLocal
)
1038 if (!luaconfsLocal
->frameStreamExportConfig
.enabled
) {
1039 if (t_frameStreamServers
) {
1040 // dt's take care of cleanup
1041 t_frameStreamServers
.reset();
1047 /* if the server was not running, or if it was running according to a
1048 previous configuration */
1049 if (!t_frameStreamServers
||
1050 t_frameStreamServersGeneration
< luaconfsLocal
->generation
) {
1052 if (t_frameStreamServers
) {
1053 // dt's take care of cleanup
1054 t_frameStreamServers
.reset();
1057 t_frameStreamServers
= startFrameStreamServers(luaconfsLocal
->frameStreamExportConfig
);
1058 t_frameStreamServersGeneration
= luaconfsLocal
->generation
;
1063 #endif /* HAVE_FSTRM */
1064 #endif /* HAVE_PROTOBUF */
1067 static bool nodCheckNewDomain(const DNSName
& dname
)
1069 static const QType
qt(QType::A
);
1070 static const uint16_t qc(QClass::IN
);
1072 // First check the (sub)domain isn't whitelisted for NOD purposes
1073 if (!g_nodDomainWL
.check(dname
)) {
1074 // Now check the NODDB (note this is probabilistic so can have FNs/FPs)
1075 if (t_nodDBp
&& t_nodDBp
->isNewDomain(dname
)) {
1077 // This should probably log to a dedicated log file
1078 g_log
<<Logger::Notice
<<"Newly observed domain nod="<<dname
.toLogString()<<endl
;
1080 if (!(g_nodLookupDomain
.isRoot())) {
1081 // Send a DNS A query to <domain>.g_nodLookupDomain
1082 DNSName qname
= dname
;
1083 vector
<DNSRecord
> dummy
;
1084 qname
+= g_nodLookupDomain
;
1085 directResolve(qname
, qt
, qc
, dummy
);
1093 static bool udrCheckUniqueDNSRecord(const DNSName
& dname
, uint16_t qtype
, const DNSRecord
& record
)
1096 if (record
.d_place
== DNSResourceRecord::ANSWER
||
1097 record
.d_place
== DNSResourceRecord::ADDITIONAL
) {
1098 // Create a string that represent a triplet of (qname, qtype and RR[type, name, content])
1099 std::stringstream ss
;
1100 ss
<< dname
.toDNSStringLC() << ":" << qtype
<< ":" << qtype
<< ":" << record
.d_type
<< ":" << record
.d_name
.toDNSStringLC() << ":" << record
.d_content
->getZoneRepresentation();
1101 if (t_udrDBp
&& t_udrDBp
->isUniqueResponse(ss
.str())) {
1103 // This should also probably log to a dedicated file.
1104 g_log
<<Logger::Notice
<<"Unique response observed: qname="<<dname
.toLogString()<<" qtype="<<QType(qtype
).getName()<< " rrtype=" << QType(record
.d_type
).getName() << " rrname=" << record
.d_name
.toLogString() << " rrcontent=" << record
.d_content
->getZoneRepresentation() << endl
;
1111 #endif /* NOD_ENABLED */
1113 int followCNAMERecords(vector
<DNSRecord
>& ret
, const QType
& qtype
)
1115 vector
<DNSRecord
> resolved
;
1117 for(const DNSRecord
& rr
: ret
) {
1118 if(rr
.d_type
== QType::CNAME
) {
1119 auto rec
= getRR
<CNAMERecordContent
>(rr
);
1121 target
=rec
->getTarget();
1127 if(target
.empty()) {
1131 int rcode
= directResolve(target
, qtype
, QClass::IN
, resolved
);
1133 for(DNSRecord
& rr
: resolved
) {
1134 ret
.push_back(std::move(rr
));
1139 int getFakeAAAARecords(const DNSName
& qname
, ComboAddress prefix
, vector
<DNSRecord
>& ret
)
1141 int rcode
= directResolve(qname
, QType(QType::A
), QClass::IN
, ret
);
1143 // Remove double CNAME records
1144 std::set
<DNSName
> seenCNAMEs
;
1145 ret
.erase(std::remove_if(
1148 [&seenCNAMEs
](DNSRecord
& rr
) {
1149 if (rr
.d_type
== QType::CNAME
) {
1150 auto target
= getRR
<CNAMERecordContent
>(rr
);
1151 if (target
== nullptr) {
1154 if (seenCNAMEs
.count(target
->getTarget()) > 0) {
1155 // We've had this CNAME before, remove it
1158 seenCNAMEs
.insert(target
->getTarget());
1165 for (DNSRecord
& rr
: ret
) {
1166 if (rr
.d_type
== QType::A
&& rr
.d_place
== DNSResourceRecord::ANSWER
) {
1167 if (auto rec
= getRR
<ARecordContent
>(rr
)) {
1168 ComboAddress
ipv4(rec
->getCA());
1169 memcpy(&prefix
.sin6
.sin6_addr
.s6_addr
[12], &ipv4
.sin4
.sin_addr
.s_addr
, sizeof(ipv4
.sin4
.sin_addr
.s_addr
));
1170 rr
.d_content
= std::make_shared
<AAAARecordContent
>(prefix
);
1171 rr
.d_type
= QType::AAAA
;
1178 // We've seen an A in the ANSWER section, so there is no need to keep any
1179 // SOA in the AUTHORITY section as this is not a NODATA response.
1180 ret
.erase(std::remove_if(
1184 return (rr
.d_type
== QType::SOA
&& rr
.d_place
== DNSResourceRecord::AUTHORITY
);
1191 int getFakePTRRecords(const DNSName
& qname
, vector
<DNSRecord
>& ret
)
1193 /* qname has a reverse ordered IPv6 address, need to extract the underlying IPv4 address from it
1194 and turn it into an IPv4 in-addr.arpa query */
1196 vector
<string
> parts
= qname
.getRawLabels();
1198 if (parts
.size() < 8) {
1203 for (int n
= 0; n
< 4; ++n
) {
1205 std::to_string(stoll(parts
[n
*2], 0, 16) + 16*stoll(parts
[n
*2+1], 0, 16));
1206 newquery
.append(1, '.');
1208 newquery
+= "in-addr.arpa.";
1212 rr
.d_type
= QType::CNAME
;
1213 rr
.d_content
= std::make_shared
<CNAMERecordContent
>(newquery
);
1216 int rcode
= directResolve(DNSName(newquery
), QType(QType::PTR
), QClass::IN
, ret
);
1221 enum class PolicyResult
: uint8_t { NoAction
, HaveAnswer
, Drop
};
1223 static PolicyResult
handlePolicyHit(const DNSFilterEngine::Policy
& appliedPolicy
, const std::unique_ptr
<DNSComboWriter
>& dc
, SyncRes
& sr
, int& res
, vector
<DNSRecord
>& ret
, DNSPacketWriter
& pw
)
1225 /* don't account truncate actions for TCP queries, since they are not applied */
1226 if (appliedPolicy
.d_kind
!= DNSFilterEngine::PolicyKind::Truncate
|| !dc
->d_tcp
) {
1227 ++g_stats
.policyResults
[appliedPolicy
.d_kind
];
1230 switch (appliedPolicy
.d_kind
) {
1232 case DNSFilterEngine::PolicyKind::NoAction
:
1233 return PolicyResult::NoAction
;
1235 case DNSFilterEngine::PolicyKind::Drop
:
1236 ++g_stats
.policyDrops
;
1237 return PolicyResult::Drop
;
1239 case DNSFilterEngine::PolicyKind::NXDOMAIN
:
1241 res
= RCode::NXDomain
;
1242 return PolicyResult::HaveAnswer
;
1244 case DNSFilterEngine::PolicyKind::NODATA
:
1246 res
= RCode::NoError
;
1247 return PolicyResult::HaveAnswer
;
1249 case DNSFilterEngine::PolicyKind::Truncate
:
1252 res
= RCode::NoError
;
1253 pw
.getHeader()->tc
= 1;
1254 return PolicyResult::HaveAnswer
;
1256 return PolicyResult::NoAction
;
1258 case DNSFilterEngine::PolicyKind::Custom
:
1260 res
= RCode::NoError
;
1262 auto spoofed
= appliedPolicy
.getCustomRecords(dc
->d_mdp
.d_qname
, dc
->d_mdp
.d_qtype
);
1263 for (auto& dr
: spoofed
) {
1265 handleRPZCustom(dr
, QType(dc
->d_mdp
.d_qtype
), sr
, res
, ret
);
1268 return PolicyResult::HaveAnswer
;
1271 return PolicyResult::NoAction
;
1274 static void startDoResolve(void *p
)
1276 auto dc
=std::unique_ptr
<DNSComboWriter
>(reinterpret_cast<DNSComboWriter
*>(p
));
1279 t_queryring
->push_back(make_pair(dc
->d_mdp
.d_qname
, dc
->d_mdp
.d_qtype
));
1281 uint16_t maxanswersize
= dc
->d_tcp
? 65535 : min(static_cast<uint16_t>(512), g_udpTruncationThreshold
);
1283 std::vector
<pair
<uint16_t, string
> > ednsOpts
;
1284 bool variableAnswer
= dc
->d_variable
;
1285 bool haveEDNS
=false;
1287 bool hasUDR
= false;
1288 #endif /* NOD_ENABLED */
1289 DNSPacketWriter::optvect_t returnedEdnsOptions
; // Here we stuff all the options for the return packet
1290 uint8_t ednsExtRCode
= 0;
1291 if(getEDNSOpts(dc
->d_mdp
, &edo
)) {
1293 if (edo
.d_version
!= 0) {
1294 ednsExtRCode
= ERCode::BADVERS
;
1299 "Values lower than 512 MUST be treated as equal to 512."
1301 maxanswersize
= min(static_cast<uint16_t>(edo
.d_packetsize
>= 512 ? edo
.d_packetsize
: 512), g_udpTruncationThreshold
);
1303 ednsOpts
= edo
.d_options
;
1304 maxanswersize
-= 11; // EDNS header size
1306 for (const auto& o
: edo
.d_options
) {
1307 if (o
.first
== EDNSOptionCode::ECS
&& g_useIncomingECS
&& !dc
->d_ecsParsed
) {
1308 dc
->d_ecsFound
= getEDNSSubnetOptsFromString(o
.second
, &dc
->d_ednssubnet
);
1309 } else if (o
.first
== EDNSOptionCode::NSID
) {
1310 const static string mode_server_id
= ::arg()["server-id"];
1311 if(mode_server_id
!= "disabled" && !mode_server_id
.empty() &&
1312 maxanswersize
> (2 + 2 + mode_server_id
.size())) {
1313 returnedEdnsOptions
.push_back(make_pair(EDNSOptionCode::NSID
, mode_server_id
));
1314 variableAnswer
= true; // Can't packetcache an answer with NSID
1315 // Option Code and Option Length are both 2
1316 maxanswersize
-= 2 + 2 + mode_server_id
.size();
1321 /* perhaps there was no EDNS or no ECS but by now we looked */
1322 dc
->d_ecsParsed
= true;
1323 vector
<DNSRecord
> ret
;
1324 vector
<uint8_t> packet
;
1326 auto luaconfsLocal
= g_luaconfs
.getLocal();
1327 // Used to tell syncres later on if we should apply NSDNAME and NSIP RPZ triggers for this query
1328 bool wantsRPZ(true);
1329 boost::optional
<RecProtoBufMessage
> pbMessage(boost::none
);
1330 #ifdef HAVE_PROTOBUF
1331 if (checkProtobufExport(luaconfsLocal
)) {
1332 Netmask
requestorNM(dc
->d_source
, dc
->d_source
.sin4
.sin_family
== AF_INET
? luaconfsLocal
->protobufMaskV4
: luaconfsLocal
->protobufMaskV6
);
1333 ComboAddress requestor
= requestorNM
.getMaskedNetwork();
1334 requestor
.setPort(dc
->d_source
.getPort());
1335 pbMessage
= RecProtoBufMessage(RecProtoBufMessage::Response
, dc
->d_uuid
, &requestor
, &dc
->d_destination
, dc
->d_mdp
.d_qname
, dc
->d_mdp
.d_qtype
, dc
->d_mdp
.d_qclass
, dc
->d_mdp
.d_header
.id
, dc
->d_tcp
, 0);
1336 pbMessage
->setServerIdentity(SyncRes::s_serverID
);
1337 pbMessage
->setEDNSSubnet(dc
->d_ednssubnet
.source
, dc
->d_ednssubnet
.source
.isIPv4() ? luaconfsLocal
->protobufMaskV4
: luaconfsLocal
->protobufMaskV6
);
1339 #endif /* HAVE_PROTOBUF */
1342 checkFrameStreamExport(luaconfsLocal
);
1345 DNSPacketWriter
pw(packet
, dc
->d_mdp
.d_qname
, dc
->d_mdp
.d_qtype
, dc
->d_mdp
.d_qclass
);
1347 pw
.getHeader()->aa
=0;
1348 pw
.getHeader()->ra
=1;
1349 pw
.getHeader()->qr
=1;
1350 pw
.getHeader()->tc
=0;
1351 pw
.getHeader()->id
=dc
->d_mdp
.d_header
.id
;
1352 pw
.getHeader()->rd
=dc
->d_mdp
.d_header
.rd
;
1353 pw
.getHeader()->cd
=dc
->d_mdp
.d_header
.cd
;
1355 /* This is the lowest TTL seen in the records of the response,
1356 so we can't cache it for longer than this value.
1357 If we have a TTL cap, this value can't be larger than the
1358 cap no matter what. */
1359 uint32_t minTTL
= dc
->d_ttlCap
;
1361 SyncRes
sr(dc
->d_now
);
1362 sr
.setId(MT
->getTid());
1364 bool DNSSECOK
=false;
1366 sr
.setLuaEngine(t_pdl
);
1368 if(g_dnssecmode
!= DNSSECMode::Off
) {
1369 sr
.setDoDNSSEC(true);
1371 // Does the requestor want DNSSEC records?
1372 if(edo
.d_extFlags
& EDNSOpts::DNSSECOK
) {
1374 g_stats
.dnssecQueries
++;
1376 if (dc
->d_mdp
.d_header
.cd
) {
1377 /* Per rfc6840 section 5.9, "When processing a request with
1378 the Checking Disabled (CD) bit set, a resolver SHOULD attempt
1379 to return all response data, even data that has failed DNSSEC
1381 ++g_stats
.dnssecCheckDisabledQueries
;
1383 if (dc
->d_mdp
.d_header
.ad
) {
1384 /* Per rfc6840 section 5.7, "the AD bit in a query as a signal
1385 indicating that the requester understands and is interested in the
1386 value of the AD bit in the response. This allows a requester to
1387 indicate that it understands the AD bit without also requesting
1388 DNSSEC data via the DO bit. */
1389 ++g_stats
.dnssecAuthenticDataQueries
;
1392 // Ignore the client-set CD flag
1393 pw
.getHeader()->cd
=0;
1395 sr
.setDNSSECValidationRequested(g_dnssecmode
== DNSSECMode::ValidateAll
|| g_dnssecmode
==DNSSECMode::ValidateForLog
|| ((dc
->d_mdp
.d_header
.ad
|| DNSSECOK
) && g_dnssecmode
==DNSSECMode::Process
));
1397 #ifdef HAVE_PROTOBUF
1398 sr
.setInitialRequestId(dc
->d_uuid
);
1399 sr
.setOutgoingProtobufServers(t_outgoingProtobufServers
);
1402 sr
.setFrameStreamServers(t_frameStreamServers
);
1404 sr
.setQuerySource(dc
->d_remote
, g_useIncomingECS
&& !dc
->d_ednssubnet
.source
.empty() ? boost::optional
<const EDNSSubnetOpts
&>(dc
->d_ednssubnet
) : boost::none
);
1406 bool tracedQuery
=false; // we could consider letting Lua know about this too
1407 bool shouldNotValidate
= false;
1409 /* preresolve expects res (dq.rcode) to be set to RCode::NoError by default */
1410 int res
= RCode::NoError
;
1412 DNSFilterEngine::Policy appliedPolicy
;
1413 RecursorLua4::DNSQuestion
dq(dc
->d_source
, dc
->d_destination
, dc
->d_mdp
.d_qname
, dc
->d_mdp
.d_qtype
, dc
->d_tcp
, variableAnswer
, wantsRPZ
, dc
->d_logResponse
);
1414 dq
.ednsFlags
= &edo
.d_extFlags
;
1415 dq
.ednsOptions
= &ednsOpts
;
1417 dq
.discardedPolicies
= &sr
.d_discardedPolicies
;
1418 dq
.policyTags
= &dc
->d_policyTags
;
1419 dq
.appliedPolicy
= &appliedPolicy
;
1420 dq
.currentRecords
= &ret
;
1421 dq
.dh
= &dc
->d_mdp
.d_header
;
1422 dq
.data
= dc
->d_data
;
1423 #ifdef HAVE_PROTOBUF
1424 dq
.requestorId
= dc
->d_requestorId
;
1425 dq
.deviceId
= dc
->d_deviceId
;
1426 dq
.deviceName
= dc
->d_deviceName
;
1428 dq
.proxyProtocolValues
= &dc
->d_proxyProtocolValues
;
1430 if(ednsExtRCode
!= 0) {
1434 if(dc
->d_mdp
.d_qtype
==QType::ANY
&& !dc
->d_tcp
&& g_anyToTcp
) {
1435 pw
.getHeader()->tc
= 1;
1437 variableAnswer
= true;
1441 if(t_traceRegex
&& t_traceRegex
->match(dc
->d_mdp
.d_qname
.toString())) {
1442 sr
.setLogMode(SyncRes::Store
);
1446 if(!g_quiet
|| tracedQuery
) {
1447 g_log
<<Logger::Warning
<<t_id
<<" ["<<MT
->getTid()<<"/"<<MT
->numProcesses()<<"] " << (dc
->d_tcp
? "TCP " : "") << "question for '"<<dc
->d_mdp
.d_qname
<<"|"
1448 <<DNSRecordContent::NumberToType(dc
->d_mdp
.d_qtype
)<<"' from "<<dc
->getRemote();
1449 if(!dc
->d_ednssubnet
.source
.empty()) {
1450 g_log
<<" (ecs "<<dc
->d_ednssubnet
.source
.toString()<<")";
1455 if(!dc
->d_mdp
.d_header
.rd
) {
1459 if (dc
->d_rcode
!= boost::none
) {
1460 /* we have a response ready to go, most likely from gettag_ffi */
1461 ret
= std::move(dc
->d_records
);
1463 if (res
== RCode::NoError
&& dc
->d_followCNAMERecords
) {
1464 res
= followCNAMERecords(ret
, QType(dc
->d_mdp
.d_qtype
));
1470 t_pdl
->prerpz(dq
, res
);
1473 // Check if the query has a policy attached to it
1474 if (wantsRPZ
&& (appliedPolicy
.d_type
== DNSFilterEngine::PolicyType::None
|| appliedPolicy
.d_kind
== DNSFilterEngine::PolicyKind::NoAction
)) {
1475 if (luaconfsLocal
->dfe
.getQueryPolicy(dc
->d_mdp
.d_qname
, dc
->d_source
, sr
.d_discardedPolicies
, appliedPolicy
)) {
1476 mergePolicyTags(dc
->d_policyTags
, appliedPolicy
.getTags());
1480 // if there is a RecursorLua active, and it 'took' the query in preResolve, we don't launch beginResolve
1481 if (!t_pdl
|| !t_pdl
->preresolve(dq
, res
)) {
1483 if (!g_dns64PrefixReverse
.empty() && dq
.qtype
== QType::PTR
&& dq
.qname
.isPartOf(g_dns64PrefixReverse
)) {
1484 res
= getFakePTRRecords(dq
.qname
, ret
);
1488 sr
.setWantsRPZ(wantsRPZ
);
1489 if (wantsRPZ
&& appliedPolicy
.d_kind
!= DNSFilterEngine::PolicyKind::NoAction
) {
1490 auto policyResult
= handlePolicyHit(appliedPolicy
, dc
, sr
, res
, ret
, pw
);
1491 if (policyResult
== PolicyResult::HaveAnswer
) {
1494 else if (policyResult
== PolicyResult::Drop
) {
1499 // Query got not handled for QNAME Policy reasons, now actually go out to find an answer
1501 sr
.d_appliedPolicy
= appliedPolicy
;
1502 sr
.d_policyTags
= std::move(dc
->d_policyTags
);
1504 if (!dc
->d_routingTag
.empty()) {
1505 sr
.d_routingTag
= dc
->d_routingTag
;
1508 res
= sr
.beginResolve(dc
->d_mdp
.d_qname
, QType(dc
->d_mdp
.d_qtype
), dc
->d_mdp
.d_qclass
, ret
);
1509 shouldNotValidate
= sr
.wasOutOfBand();
1511 catch(const ImmediateServFailException
&e
) {
1512 if(g_logCommonErrors
) {
1513 g_log
<<Logger::Notice
<<"Sending SERVFAIL to "<<dc
->getRemote()<<" during resolve of '"<<dc
->d_mdp
.d_qname
<<"' because: "<<e
.reason
<<endl
;
1515 res
= RCode::ServFail
;
1517 catch(const PolicyHitException
& e
) {
1520 dq
.validationState
= sr
.getValidationState();
1521 appliedPolicy
= sr
.d_appliedPolicy
;
1522 dc
->d_policyTags
= std::move(sr
.d_policyTags
);
1524 // During lookup, an NSDNAME or NSIP trigger was hit in RPZ
1525 if (res
== -2) { // XXX This block should be macro'd, it is repeated post-resolve.
1526 if (appliedPolicy
.d_kind
== DNSFilterEngine::PolicyKind::NoAction
) {
1527 throw PDNSException("NoAction policy returned while a NSDNAME or NSIP trigger was hit");
1529 auto policyResult
= handlePolicyHit(appliedPolicy
, dc
, sr
, res
, ret
, pw
);
1530 if (policyResult
== PolicyResult::HaveAnswer
) {
1533 else if (policyResult
== PolicyResult::Drop
) {
1538 if (wantsRPZ
&& (appliedPolicy
.d_type
== DNSFilterEngine::PolicyType::None
|| appliedPolicy
.d_kind
== DNSFilterEngine::PolicyKind::NoAction
)) {
1539 if (luaconfsLocal
->dfe
.getPostPolicy(ret
, sr
.d_discardedPolicies
, appliedPolicy
)) {
1540 mergePolicyTags(dc
->d_policyTags
, appliedPolicy
.getTags());
1544 if (t_pdl
|| (g_dns64Prefix
&& dq
.qtype
== QType::AAAA
&& dq
.validationState
!= Bogus
)) {
1545 if (res
== RCode::NoError
) {
1546 auto i
= ret
.cbegin();
1547 for(; i
!= ret
.cend(); ++i
) {
1548 if (i
->d_type
== dc
->d_mdp
.d_qtype
&& i
->d_place
== DNSResourceRecord::ANSWER
) {
1553 if (i
== ret
.cend()) {
1554 /* no record in the answer section, NODATA */
1555 if (t_pdl
&& t_pdl
->nodata(dq
, res
)) {
1556 shouldNotValidate
= true;
1558 else if (g_dns64Prefix
&& dq
.qtype
== QType::AAAA
&& dq
.validationState
!= Bogus
) {
1559 res
= getFakeAAAARecords(dq
.qname
, *g_dns64Prefix
, ret
);
1560 shouldNotValidate
= true;
1565 else if(res
== RCode::NXDomain
&& t_pdl
&& t_pdl
->nxdomain(dq
, res
)) {
1566 shouldNotValidate
= true;
1569 if (t_pdl
&& t_pdl
->postresolve(dq
, res
)) {
1570 shouldNotValidate
= true;
1574 if (wantsRPZ
) { //XXX This block is repeated, see above
1576 auto policyResult
= handlePolicyHit(appliedPolicy
, dc
, sr
, res
, ret
, pw
);
1577 if (policyResult
== PolicyResult::HaveAnswer
) {
1580 else if (policyResult
== PolicyResult::Drop
) {
1586 if(res
== PolicyDecision::DROP
) {
1587 g_stats
.policyDrops
++;
1590 if(tracedQuery
|| res
== -1 || res
== RCode::ServFail
|| pw
.getHeader()->rcode
== RCode::ServFail
)
1592 string
trace(sr
.getTrace());
1593 if(!trace
.empty()) {
1594 vector
<string
> lines
;
1595 boost::split(lines
, trace
, boost::is_any_of("\n"));
1596 for(const string
& line
: lines
) {
1598 g_log
<<Logger::Warning
<< line
<< endl
;
1604 pw
.getHeader()->rcode
=RCode::ServFail
;
1605 // no commit here, because no record
1606 g_stats
.servFails
++;
1609 pw
.getHeader()->rcode
=res
;
1611 // Does the validation mode or query demand validation?
1612 if(!shouldNotValidate
&& sr
.isDNSSECValidationRequested()) {
1615 g_log
<<Logger::Warning
<<"Starting validation of answer to "<<dc
->d_mdp
.d_qname
<<"|"<<QType(dc
->d_mdp
.d_qtype
).getName()<<" for "<<dc
->getRemote()<<endl
;
1618 auto state
= sr
.getValidationState();
1620 if(state
== Secure
) {
1622 g_log
<<Logger::Warning
<<"Answer to "<<dc
->d_mdp
.d_qname
<<"|"<<QType(dc
->d_mdp
.d_qtype
).getName()<<" for "<<dc
->getRemote()<<" validates correctly"<<endl
;
1625 // Is the query source interested in the value of the ad-bit?
1626 if (dc
->d_mdp
.d_header
.ad
|| DNSSECOK
)
1627 pw
.getHeader()->ad
=1;
1629 else if(state
== Insecure
) {
1631 g_log
<<Logger::Warning
<<"Answer to "<<dc
->d_mdp
.d_qname
<<"|"<<QType(dc
->d_mdp
.d_qtype
).getName()<<" for "<<dc
->getRemote()<<" validates as Insecure"<<endl
;
1634 pw
.getHeader()->ad
=0;
1636 else if(state
== Bogus
) {
1638 t_bogusremotes
->push_back(dc
->d_source
);
1639 if(t_bogusqueryring
)
1640 t_bogusqueryring
->push_back(make_pair(dc
->d_mdp
.d_qname
, dc
->d_mdp
.d_qtype
));
1641 if(g_dnssecLogBogus
|| sr
.doLog() || g_dnssecmode
== DNSSECMode::ValidateForLog
) {
1642 g_log
<<Logger::Warning
<<"Answer to "<<dc
->d_mdp
.d_qname
<<"|"<<QType(dc
->d_mdp
.d_qtype
).getName()<<" for "<<dc
->getRemote()<<" validates as Bogus"<<endl
;
1645 // Does the query or validation mode sending out a SERVFAIL on validation errors?
1646 if(!pw
.getHeader()->cd
&& (g_dnssecmode
== DNSSECMode::ValidateAll
|| dc
->d_mdp
.d_header
.ad
|| DNSSECOK
)) {
1648 g_log
<<Logger::Warning
<<"Sending out SERVFAIL for "<<dc
->d_mdp
.d_qname
<<"|"<<QType(dc
->d_mdp
.d_qtype
).getName()<<" because recursor or query demands it for Bogus results"<<endl
;
1651 pw
.getHeader()->rcode
=RCode::ServFail
;
1655 g_log
<<Logger::Warning
<<"Not sending out SERVFAIL for "<<dc
->d_mdp
.d_qname
<<"|"<<QType(dc
->d_mdp
.d_qtype
).getName()<<" Bogus validation since neither config nor query demands this"<<endl
;
1660 catch(const ImmediateServFailException
&e
) {
1661 if(g_logCommonErrors
)
1662 g_log
<<Logger::Notice
<<"Sending SERVFAIL to "<<dc
->getRemote()<<" during validation of '"<<dc
->d_mdp
.d_qname
<<"|"<<QType(dc
->d_mdp
.d_qtype
).getName()<<"' because: "<<e
.reason
<<endl
;
1663 pw
.getHeader()->rcode
=RCode::ServFail
;
1669 pdns::orderAndShuffle(ret
);
1670 if(auto sl
= luaconfsLocal
->sortlist
.getOrderCmp(dc
->d_source
)) {
1671 stable_sort(ret
.begin(), ret
.end(), *sl
);
1672 variableAnswer
=true;
1676 bool needCommit
= false;
1677 for(auto i
=ret
.cbegin(); i
!=ret
.cend(); ++i
) {
1679 ( i
->d_type
== QType::NSEC3
||
1681 ( i
->d_type
== QType::RRSIG
|| i
->d_type
==QType::NSEC
) &&
1683 ( dc
->d_mdp
.d_qtype
!= i
->d_type
&& dc
->d_mdp
.d_qtype
!= QType::ANY
) ||
1684 i
->d_place
!= DNSResourceRecord::ANSWER
1692 if (!addRecordToPacket(pw
, *i
, minTTL
, dc
->d_ttlCap
, maxanswersize
)) {
1701 udr
= udrCheckUniqueDNSRecord(dc
->d_mdp
.d_qname
, dc
->d_mdp
.d_qtype
, *i
);
1705 #endif /* NOD ENABLED */
1707 #ifdef HAVE_PROTOBUF
1708 if (t_protobufServers
) {
1710 pbMessage
->addRR(*i
, luaconfsLocal
->protobufExportConfig
.exportTypes
, udr
);
1712 pbMessage
->addRR(*i
, luaconfsLocal
->protobufExportConfig
.exportTypes
);
1713 #endif /* NOD_ENABLED */
1722 if(g_useIncomingECS
&& dc
->d_ecsFound
&& !sr
.wasVariable() && !variableAnswer
) {
1723 // cerr<<"Stuffing in a 0 scope because answer is static"<<endl;
1725 eo
.source
= dc
->d_ednssubnet
.source
;
1728 sa
.sin4
.sin_family
= eo
.source
.getNetwork().sin4
.sin_family
;
1729 eo
.scope
= Netmask(sa
, 0);
1731 returnedEdnsOptions
.push_back(make_pair(EDNSOptionCode::ECS
, makeEDNSSubnetOptsString(eo
)));
1735 /* we try to add the EDNS OPT RR even for truncated answers,
1737 "The minimal response MUST be the DNS header, question section, and an
1738 OPT record. This MUST also occur when a truncated response (using
1739 the DNS header's TC bit) is returned."
1741 pw
.addOpt(512, ednsExtRCode
, DNSSECOK
? EDNSOpts::DNSSECOK
: 0, returnedEdnsOptions
);
1745 g_rs
.submitResponse(dc
->d_mdp
.d_qtype
, packet
.size(), !dc
->d_tcp
);
1746 updateResponseStats(res
, dc
->d_source
, packet
.size(), &dc
->d_mdp
.d_qname
, dc
->d_mdp
.d_qtype
);
1750 if (nodCheckNewDomain(dc
->d_mdp
.d_qname
))
1753 #endif /* NOD_ENABLED */
1754 #ifdef HAVE_PROTOBUF
1755 if (t_protobufServers
&& !(luaconfsLocal
->protobufExportConfig
.taggedOnly
&& appliedPolicy
.getName().empty() && dc
->d_policyTags
.empty())) {
1756 pbMessage
->setBytes(packet
.size());
1757 pbMessage
->setResponseCode(pw
.getHeader()->rcode
);
1758 if (!appliedPolicy
.getName().empty()) {
1759 pbMessage
->setAppliedPolicy(appliedPolicy
.getName());
1760 pbMessage
->setAppliedPolicyType(appliedPolicy
.d_type
);
1762 pbMessage
->setPolicyTags(dc
->d_policyTags
);
1763 if (g_useKernelTimestamp
&& dc
->d_kernelTimestamp
.tv_sec
) {
1764 pbMessage
->setQueryTime(dc
->d_kernelTimestamp
.tv_sec
, dc
->d_kernelTimestamp
.tv_usec
);
1767 pbMessage
->setQueryTime(dc
->d_now
.tv_sec
, dc
->d_now
.tv_usec
);
1769 pbMessage
->setRequestorId(dq
.requestorId
);
1770 pbMessage
->setDeviceId(dq
.deviceId
);
1771 pbMessage
->setDeviceName(dq
.deviceName
);
1775 pbMessage
->setNOD(true);
1776 pbMessage
->addPolicyTag(g_nod_pbtag
);
1779 pbMessage
->addPolicyTag(g_udr_pbtag
);
1782 #endif /* NOD_ENABLED */
1783 if (dc
->d_logResponse
) {
1784 protobufLogResponse(*pbMessage
);
1788 pbMessage
->setNOD(false);
1789 pbMessage
->clearUDR();
1791 pbMessage
->removePolicyTag(g_nod_pbtag
);
1793 pbMessage
->removePolicyTag(g_udr_pbtag
);
1795 #endif /* NOD_ENABLED */
1801 cmsgbuf_aligned cbuf
;
1802 fillMSGHdr(&msgh
, &iov
, &cbuf
, 0, (char*)&*packet
.begin(), packet
.size(), &dc
->d_remote
);
1803 msgh
.msg_control
=NULL
;
1805 if(g_fromtosockets
.count(dc
->d_socket
)) {
1806 addCMsgSrcAddr(&msgh
, &cbuf
, &dc
->d_local
, 0);
1808 if(sendmsg(dc
->d_socket
, &msgh
, 0) < 0 && g_logCommonErrors
) {
1810 g_log
<< Logger::Warning
<< "Sending UDP reply to client " << dc
->getRemote() << " failed with: "
1811 << strerror(err
) << endl
;
1814 if(variableAnswer
|| sr
.wasVariable()) {
1815 g_stats
.variableResponses
++;
1817 if(!SyncRes::s_nopacketcache
&& !variableAnswer
&& !sr
.wasVariable() ) {
1818 t_packetCache
->insertResponsePacket(dc
->d_tag
, dc
->d_qhash
, std::move(dc
->d_query
), dc
->d_mdp
.d_qname
, dc
->d_mdp
.d_qtype
, dc
->d_mdp
.d_qclass
,
1819 string((const char*)&*packet
.begin(), packet
.size()),
1821 pw
.getHeader()->rcode
== RCode::ServFail
? SyncRes::s_packetcacheservfailttl
:
1822 min(minTTL
,SyncRes::s_packetcachettl
),
1826 std::move(pbMessage
));
1828 // else cerr<<"Not putting in packet cache: "<<sr.wasVariable()<<endl;
1832 buf
[0]=packet
.size()/256;
1833 buf
[1]=packet
.size()%256;
1835 Utility::iovec iov
[2];
1837 iov
[0].iov_base
=(void*)buf
; iov
[0].iov_len
=2;
1838 iov
[1].iov_base
=(void*)&*packet
.begin(); iov
[1].iov_len
= packet
.size();
1840 int wret
=Utility::writev(dc
->d_socket
, iov
, 2);
1844 g_log
<<Logger::Error
<<"EOF writing TCP answer to "<<dc
->getRemote()<<endl
;
1845 else if(wret
< 0 ) {
1847 g_log
<< Logger::Error
<< "Error writing TCP answer to " << dc
->getRemote() << ": " << strerror(err
) << endl
;
1848 } else if((unsigned int)wret
!= 2 + packet
.size())
1849 g_log
<<Logger::Error
<<"Oops, partial answer sent to "<<dc
->getRemote()<<" for "<<dc
->d_mdp
.d_qname
<<" (size="<< (2 + packet
.size()) <<", sent "<<wret
<<")"<<endl
;
1853 // update tcp connection status, closing if needed and doing the fd multiplexer accounting
1854 if (dc
->d_tcpConnection
->d_requestsInFlight
> 0) {
1855 dc
->d_tcpConnection
->d_requestsInFlight
--;
1858 // In the code below, we try to remove the fd from the set, but
1859 // we don't know if another mthread already did the remove, so we can get a
1860 // "Tried to remove unlisted fd" exception. Not that an inflight < limit test
1861 // will not work since we do not know if the other mthread got an error or not.
1864 t_fdm
->removeReadFD(dc
->d_socket
);
1866 catch (FDMultiplexerException
&) {
1871 dc
->d_tcpConnection
->queriesCount
++;
1872 if (g_tcpMaxQueriesPerConn
&& dc
->d_tcpConnection
->queriesCount
>= g_tcpMaxQueriesPerConn
) {
1874 t_fdm
->removeReadFD(dc
->d_socket
);
1876 catch (FDMultiplexerException
&) {
1881 Utility::gettimeofday(&g_now
, 0); // needs to be updated
1882 struct timeval ttd
= g_now
;
1883 // If we cross from max to max-1 in flight requests, the fd was not listened to, add it back
1884 if (dc
->d_tcpConnection
->d_requestsInFlight
== TCPConnection::s_maxInFlight
- 1) {
1885 // A read error might have happened. If we add the fd back, it will most likely error again.
1886 // This is not a big issue, the next handleTCPClientReadable() will see another read error
1888 ttd
.tv_sec
+= g_tcpTimeout
;
1889 t_fdm
->addReadFD(dc
->d_socket
, handleRunningTCPQuestion
, dc
->d_tcpConnection
, &ttd
);
1891 // fd might have been removed by read error code, so expect an exception
1893 t_fdm
->setReadTTD(dc
->d_socket
, ttd
, g_tcpTimeout
);
1895 catch (FDMultiplexerException
&) {
1901 float spent
=makeFloat(sr
.getNow()-dc
->d_now
);
1903 g_log
<<Logger::Error
<<t_id
<<" ["<<MT
->getTid()<<"/"<<MT
->numProcesses()<<"] answer to "<<(dc
->d_mdp
.d_header
.rd
?"":"non-rd ")<<"question '"<<dc
->d_mdp
.d_qname
<<"|"<<DNSRecordContent::NumberToType(dc
->d_mdp
.d_qtype
);
1904 g_log
<<"': "<<ntohs(pw
.getHeader()->ancount
)<<" answers, "<<ntohs(pw
.getHeader()->arcount
)<<" additional, took "<<sr
.d_outqueries
<<" packets, "<<
1905 sr
.d_totUsec
/1000.0<<" netw ms, "<< spent
*1000.0<<" tot ms, "<<
1906 sr
.d_throttledqueries
<<" throttled, "<<sr
.d_timeouts
<<" timeouts, "<<sr
.d_tcpoutqueries
<<" tcp connections, rcode="<< res
;
1908 if(!shouldNotValidate
&& sr
.isDNSSECValidationRequested()) {
1909 g_log
<< ", dnssec="<<vStates
[sr
.getValidationState()];
1916 if (sr
.d_outqueries
|| sr
.d_authzonequeries
) {
1917 s_RC
->cacheMisses
++;
1924 g_stats
.answers0_1
++;
1925 else if(spent
< 0.010)
1926 g_stats
.answers1_10
++;
1927 else if(spent
< 0.1)
1928 g_stats
.answers10_100
++;
1929 else if(spent
< 1.0)
1930 g_stats
.answers100_1000
++;
1932 g_stats
.answersSlow
++;
1934 uint64_t newLat
=(uint64_t)(spent
*1000000);
1935 newLat
= min(newLat
,(uint64_t)(((uint64_t) g_networkTimeoutMsec
)*1000)); // outliers of several minutes exist..
1936 g_stats
.avgLatencyUsec
=(1-1.0/g_latencyStatSize
)*g_stats
.avgLatencyUsec
+ (float)newLat
/g_latencyStatSize
;
1937 // no worries, we do this for packet cache hits elsewhere
1939 auto ourtime
= 1000.0*spent
-sr
.d_totUsec
/1000.0; // in msec
1941 g_stats
.ourtime0_1
++;
1942 else if(ourtime
< 2)
1943 g_stats
.ourtime1_2
++;
1944 else if(ourtime
< 4)
1945 g_stats
.ourtime2_4
++;
1946 else if(ourtime
< 8)
1947 g_stats
.ourtime4_8
++;
1948 else if(ourtime
< 16)
1949 g_stats
.ourtime8_16
++;
1950 else if(ourtime
< 32)
1951 g_stats
.ourtime16_32
++;
1953 // cerr<<"SLOW: "<<ourtime<<"ms -> "<<dc->d_mdp.d_qname<<"|"<<DNSRecordContent::NumberToType(dc->d_mdp.d_qtype)<<endl;
1954 g_stats
.ourtimeSlow
++;
1956 if(ourtime
>= 0.0) {
1957 newLat
=ourtime
*1000; // usec
1958 g_stats
.avgLatencyOursUsec
=(1-1.0/g_latencyStatSize
)*g_stats
.avgLatencyOursUsec
+ (float)newLat
/g_latencyStatSize
;
1960 // cout<<dc->d_mdp.d_qname<<"\t"<<MT->getUsec()<<"\t"<<sr.d_outqueries<<endl;
1962 catch(PDNSException
&ae
) {
1963 g_log
<<Logger::Error
<<"startDoResolve problem "<<makeLoginfo(dc
)<<": "<<ae
.reason
<<endl
;
1965 catch(const MOADNSException
&mde
) {
1966 g_log
<<Logger::Error
<<"DNS parser error "<<makeLoginfo(dc
) <<": "<<dc
->d_mdp
.d_qname
<<", "<<mde
.what()<<endl
;
1968 catch(std::exception
& e
) {
1969 g_log
<<Logger::Error
<<"STL error "<< makeLoginfo(dc
)<<": "<<e
.what();
1971 // Luawrapper nests the exception from Lua, so we unnest it here
1973 std::rethrow_if_nested(e
);
1974 } catch(const std::exception
& ne
) {
1975 g_log
<<". Extra info: "<<ne
.what();
1981 g_log
<<Logger::Error
<<"Any other exception in a resolver context "<< makeLoginfo(dc
) <<endl
;
1984 g_stats
.maxMThreadStackUsage
= max(MT
->getMaxStackUsage(), g_stats
.maxMThreadStackUsage
);
1987 static void makeControlChannelSocket(int processNum
=-1)
1989 string sockname
=::arg()["socket-dir"]+"/"+s_programname
;
1991 sockname
+= "."+std::to_string(processNum
);
1992 sockname
+=".controlsocket";
1993 s_rcc
.listen(sockname
);
1998 if (!::arg().isEmpty("socket-group"))
1999 sockgroup
=::arg().asGid("socket-group");
2000 if (!::arg().isEmpty("socket-owner"))
2001 sockowner
=::arg().asUid("socket-owner");
2003 if (sockgroup
> -1 || sockowner
> -1) {
2004 if(chown(sockname
.c_str(), sockowner
, sockgroup
) < 0) {
2005 unixDie("Failed to chown control socket");
2009 // do mode change if socket-mode is given
2010 if(!::arg().isEmpty("socket-mode")) {
2011 mode_t sockmode
=::arg().asMode("socket-mode");
2012 if(chmod(sockname
.c_str(), sockmode
) < 0) {
2013 unixDie("Failed to chmod control socket");
2018 static void getQNameAndSubnet(const std::string
& question
, DNSName
* dnsname
, uint16_t* qtype
, uint16_t* qclass
,
2019 bool& foundECS
, EDNSSubnetOpts
* ednssubnet
, EDNSOptionViewMap
* options
,
2020 bool& foundXPF
, ComboAddress
* xpfSource
, ComboAddress
* xpfDest
)
2022 const bool lookForXPF
= xpfSource
!= nullptr && g_xpfRRCode
!= 0;
2023 const bool lookForECS
= ednssubnet
!= nullptr;
2024 const struct dnsheader
* dh
= reinterpret_cast<const struct dnsheader
*>(question
.c_str());
2025 size_t questionLen
= question
.length();
2026 unsigned int consumed
=0;
2027 *dnsname
=DNSName(question
.c_str(), questionLen
, sizeof(dnsheader
), false, qtype
, qclass
, &consumed
);
2029 size_t pos
= sizeof(dnsheader
)+consumed
+4;
2030 const size_t headerSize
= /* root */ 1 + sizeof(dnsrecordheader
);
2031 const uint16_t arcount
= ntohs(dh
->arcount
);
2033 for (uint16_t arpos
= 0; arpos
< arcount
&& questionLen
> (pos
+ headerSize
) && ((lookForECS
&& !foundECS
) || (lookForXPF
&& !foundXPF
)); arpos
++) {
2034 if (question
.at(pos
) != 0) {
2035 /* not an OPT or a XPF, bye. */
2040 const dnsrecordheader
* drh
= reinterpret_cast<const dnsrecordheader
*>(&question
.at(pos
));
2041 pos
+= sizeof(dnsrecordheader
);
2043 if (pos
>= questionLen
) {
2047 /* OPT root label (1) followed by type (2) */
2048 if(lookForECS
&& ntohs(drh
->d_type
) == QType::OPT
) {
2050 char* ecsStart
= nullptr;
2052 /* we need to pass the record len */
2053 int res
= getEDNSOption(const_cast<char*>(reinterpret_cast<const char*>(&question
.at(pos
- sizeof(drh
->d_clen
)))), questionLen
- pos
+ sizeof(drh
->d_clen
), EDNSOptionCode::ECS
, &ecsStart
, &ecsLen
);
2054 if (res
== 0 && ecsLen
> 4) {
2056 if(getEDNSSubnetOptsFromString(ecsStart
+ 4, ecsLen
- 4, &eso
)) {
2063 /* we need to pass the record len */
2064 int res
= getEDNSOptions(reinterpret_cast<const char*>(&question
.at(pos
-sizeof(drh
->d_clen
))), questionLen
- pos
+ (sizeof(drh
->d_clen
)), *options
);
2066 const auto& it
= options
->find(EDNSOptionCode::ECS
);
2067 if (it
!= options
->end() && !it
->second
.values
.empty() && it
->second
.values
.at(0).content
!= nullptr && it
->second
.values
.at(0).size
> 0) {
2069 if(getEDNSSubnetOptsFromString(it
->second
.values
.at(0).content
, it
->second
.values
.at(0).size
, &eso
)) {
2077 else if (lookForXPF
&& ntohs(drh
->d_type
) == g_xpfRRCode
&& ntohs(drh
->d_class
) == QClass::IN
&& drh
->d_ttl
== 0) {
2078 if ((questionLen
- pos
) < ntohs(drh
->d_clen
)) {
2082 foundXPF
= parseXPFPayload(reinterpret_cast<const char*>(&question
.at(pos
)), ntohs(drh
->d_clen
), *xpfSource
, xpfDest
);
2085 pos
+= ntohs(drh
->d_clen
);
2089 static bool handleTCPReadResult(int fd
, ssize_t bytes
)
2093 t_fdm
->removeReadFD(fd
);
2096 else if (bytes
< 0) {
2097 if (errno
!= EAGAIN
&& errno
!= EWOULDBLOCK
) {
2098 t_fdm
->removeReadFD(fd
);
2106 static void handleRunningTCPQuestion(int fd
, FDMultiplexer::funcparam_t
& var
)
2108 shared_ptr
<TCPConnection
> conn
=any_cast
<shared_ptr
<TCPConnection
> >(var
);
2110 if (conn
->state
== TCPConnection::PROXYPROTOCOLHEADER
) {
2111 ssize_t bytes
= recv(conn
->getFD(), &conn
->data
.at(conn
->proxyProtocolGot
), conn
->proxyProtocolNeed
, 0);
2113 handleTCPReadResult(fd
, bytes
);
2117 conn
->proxyProtocolGot
+= bytes
;
2118 conn
->data
.resize(conn
->proxyProtocolGot
);
2119 ssize_t remaining
= isProxyHeaderComplete(conn
->data
);
2120 if (remaining
== 0) {
2121 if (g_logCommonErrors
) {
2122 g_log
<<Logger::Error
<<"Unable to consume proxy protocol header in packet from TCP client "<< conn
->d_remote
.toStringWithPort() <<endl
;
2124 ++g_stats
.proxyProtocolInvalidCount
;
2125 t_fdm
->removeReadFD(fd
);
2128 else if (remaining
< 0) {
2129 conn
->proxyProtocolNeed
= -remaining
;
2130 conn
->data
.resize(conn
->proxyProtocolGot
+ conn
->proxyProtocolNeed
);
2134 /* proxy header received */
2135 /* we ignore the TCP field for now, but we could properly set whether
2136 the connection was received over UDP or TCP if needed */
2139 size_t used
= parseProxyHeader(conn
->data
, proxy
, conn
->d_source
, conn
->d_destination
, tcp
, conn
->proxyProtocolValues
);
2141 if (g_logCommonErrors
) {
2142 g_log
<<Logger::Error
<<"Unable to parse proxy protocol header in packet from TCP client "<< conn
->d_remote
.toStringWithPort() <<endl
;
2144 ++g_stats
.proxyProtocolInvalidCount
;
2145 t_fdm
->removeReadFD(fd
);
2148 else if (static_cast<size_t>(used
) > g_proxyProtocolMaximumSize
) {
2149 if (g_logCommonErrors
) {
2150 g_log
<<Logger::Error
<<"Proxy protocol header in packet from TCP client "<< conn
->d_remote
.toStringWithPort() << " is larger than proxy-protocol-maximum-size (" << used
<< "), dropping"<< endl
;
2152 ++g_stats
.proxyProtocolInvalidCount
;
2153 t_fdm
->removeReadFD(fd
);
2157 /* Now that we have retrieved the address of the client, as advertised by the proxy
2158 via the proxy protocol header, check that it is allowed by our ACL */
2159 /* note that if the proxy header used a 'LOCAL' command, the original source and destination are untouched so everything should be fine */
2160 if (t_allowFrom
&& !t_allowFrom
->match(&conn
->d_source
)) {
2162 g_log
<<Logger::Error
<<"["<<MT
->getTid()<<"] dropping TCP query from "<<conn
->d_source
.toString()<<", address not matched by allow-from"<<endl
;
2165 ++g_stats
.unauthorizedTCP
;
2166 t_fdm
->removeReadFD(fd
);
2170 conn
->data
.resize(2);
2171 conn
->state
= TCPConnection::BYTE0
;
2175 if (conn
->state
==TCPConnection::BYTE0
) {
2176 ssize_t bytes
=recv(conn
->getFD(), &conn
->data
[0], 2, 0);
2178 conn
->state
=TCPConnection::BYTE1
;
2180 conn
->qlen
=(((unsigned char)conn
->data
[0]) << 8)+ (unsigned char)conn
->data
[1];
2181 conn
->data
.resize(conn
->qlen
);
2183 conn
->state
=TCPConnection::GETQUESTION
;
2186 handleTCPReadResult(fd
, bytes
);
2191 if (conn
->state
==TCPConnection::BYTE1
) {
2192 ssize_t bytes
=recv(conn
->getFD(), &conn
->data
[1], 1, 0);
2194 conn
->state
=TCPConnection::GETQUESTION
;
2195 conn
->qlen
=(((unsigned char)conn
->data
[0]) << 8)+ (unsigned char)conn
->data
[1];
2196 conn
->data
.resize(conn
->qlen
);
2200 if (!handleTCPReadResult(fd
, bytes
)) {
2201 if(g_logCommonErrors
) {
2202 g_log
<<Logger::Error
<<"TCP client "<< conn
->d_remote
.toStringWithPort() <<" disconnected after first byte"<<endl
;
2209 if(conn
->state
==TCPConnection::GETQUESTION
) {
2210 ssize_t bytes
=recv(conn
->getFD(), &conn
->data
[conn
->bytesread
], conn
->qlen
- conn
->bytesread
, 0);
2212 if (!handleTCPReadResult(fd
, bytes
)) {
2213 if(g_logCommonErrors
) {
2214 g_log
<<Logger::Error
<<"TCP client "<< conn
->d_remote
.toStringWithPort() <<" disconnected while reading question body"<<endl
;
2219 else if (bytes
> std::numeric_limits
<std::uint16_t>::max()) {
2220 if(g_logCommonErrors
) {
2221 g_log
<<Logger::Error
<<"TCP client "<< conn
->d_remote
.toStringWithPort() <<" sent an invalid question size while reading question body"<<endl
;
2223 t_fdm
->removeReadFD(fd
);
2226 conn
->bytesread
+=(uint16_t)bytes
;
2227 if(conn
->bytesread
==conn
->qlen
) {
2228 conn
->state
= TCPConnection::BYTE0
;
2229 std::unique_ptr
<DNSComboWriter
> dc
;
2231 dc
=std::unique_ptr
<DNSComboWriter
>(new DNSComboWriter(conn
->data
, g_now
));
2233 catch(const MOADNSException
&mde
) {
2234 g_stats
.clientParseError
++;
2235 if(g_logCommonErrors
)
2236 g_log
<<Logger::Error
<<"Unable to parse packet from TCP client "<< conn
->d_remote
.toStringWithPort() <<endl
;
2239 dc
->d_tcpConnection
= conn
; // carry the torch
2240 dc
->setSocket(conn
->getFD()); // this is the only time a copy is made of the actual fd
2242 dc
->setRemote(conn
->d_remote
);
2243 dc
->setSource(conn
->d_source
);
2246 dest
.sin4
.sin_family
= conn
->d_remote
.sin4
.sin_family
;
2247 socklen_t len
= dest
.getSocklen();
2248 getsockname(conn
->getFD(), (sockaddr
*)&dest
, &len
); // if this fails, we're ok with it
2250 dc
->setDestination(conn
->d_destination
);
2251 /* we can't move this if we want to be able to access the values in
2252 all queries sent over this connection */
2253 dc
->d_proxyProtocolValues
= conn
->proxyProtocolValues
;
2257 bool needECS
= false;
2258 bool needXPF
= g_XPFAcl
.match(conn
->d_remote
);
2262 bool logQuery
= false;
2263 #ifdef HAVE_PROTOBUF
2264 auto luaconfsLocal
= g_luaconfs
.getLocal();
2265 if (checkProtobufExport(luaconfsLocal
)) {
2268 logQuery
= t_protobufServers
&& luaconfsLocal
->protobufExportConfig
.logQueries
;
2269 dc
->d_logResponse
= t_protobufServers
&& luaconfsLocal
->protobufExportConfig
.logResponses
;
2270 #endif /* HAVE_PROTOBUF */
2273 checkFrameStreamExport(luaconfsLocal
);
2276 if(needECS
|| needXPF
|| (t_pdl
&& (t_pdl
->d_gettag_ffi
|| t_pdl
->d_gettag
))) {
2279 EDNSOptionViewMap ednsOptions
;
2280 bool xpfFound
= false;
2281 dc
->d_ecsParsed
= true;
2282 dc
->d_ecsFound
= false;
2283 getQNameAndSubnet(conn
->data
, &qname
, &qtype
, &qclass
,
2284 dc
->d_ecsFound
, &dc
->d_ednssubnet
, g_gettagNeedsEDNSOptions
? &ednsOptions
: nullptr,
2285 xpfFound
, needXPF
? &dc
->d_source
: nullptr, needXPF
? &dc
->d_destination
: nullptr);
2289 if (t_pdl
->d_gettag_ffi
) {
2290 dc
->d_tag
= t_pdl
->gettag_ffi(dc
->d_source
, dc
->d_ednssubnet
.source
, dc
->d_destination
, qname
, qtype
, &dc
->d_policyTags
, dc
->d_records
, dc
->d_data
, ednsOptions
, true, dc
->d_proxyProtocolValues
, requestorId
, deviceId
, deviceName
, dc
->d_routingTag
, dc
->d_rcode
, dc
->d_ttlCap
, dc
->d_variable
, logQuery
, dc
->d_logResponse
, dc
->d_followCNAMERecords
);
2292 else if (t_pdl
->d_gettag
) {
2293 dc
->d_tag
= t_pdl
->gettag(dc
->d_source
, dc
->d_ednssubnet
.source
, dc
->d_destination
, qname
, qtype
, &dc
->d_policyTags
, dc
->d_data
, ednsOptions
, true, requestorId
, deviceId
, deviceName
, dc
->d_routingTag
, dc
->d_proxyProtocolValues
);
2296 catch(const std::exception
& e
) {
2297 if(g_logCommonErrors
)
2298 g_log
<<Logger::Warning
<<"Error parsing a query packet qname='"<<qname
<<"' for tag determination, setting tag=0: "<<e
.what()<<endl
;
2302 catch(const std::exception
& e
)
2304 if(g_logCommonErrors
)
2305 g_log
<<Logger::Warning
<<"Error parsing a query packet for tag determination, setting tag=0: "<<e
.what()<<endl
;
2309 const struct dnsheader
* dh
= reinterpret_cast<const struct dnsheader
*>(&conn
->data
[0]);
2311 #ifdef HAVE_PROTOBUF
2312 if(t_protobufServers
|| t_outgoingProtobufServers
) {
2313 dc
->d_requestorId
= requestorId
;
2314 dc
->d_deviceId
= deviceId
;
2315 dc
->d_deviceName
= deviceName
;
2316 dc
->d_uuid
= getUniqueID();
2319 if(t_protobufServers
) {
2322 if (logQuery
&& !(luaconfsLocal
->protobufExportConfig
.taggedOnly
&& dc
->d_policyTags
.empty())) {
2323 protobufLogQuery(luaconfsLocal
->protobufMaskV4
, luaconfsLocal
->protobufMaskV6
, dc
->d_uuid
, dc
->d_source
, dc
->d_destination
, dc
->d_ednssubnet
.source
, true, dh
->id
, conn
->qlen
, qname
, qtype
, qclass
, dc
->d_policyTags
, dc
->d_requestorId
, dc
->d_deviceId
, dc
->d_deviceName
);
2326 catch(std::exception
& e
) {
2327 if(g_logCommonErrors
)
2328 g_log
<<Logger::Warning
<<"Error parsing a TCP query packet for edns subnet: "<<e
.what()<<endl
;
2333 if(t_pdl
->ipfilter(dc
->d_source
, dc
->d_destination
, *dh
)) {
2335 g_log
<<Logger::Notice
<<t_id
<<" ["<<MT
->getTid()<<"/"<<MT
->numProcesses()<<"] DROPPED TCP question from "<<dc
->d_source
.toStringWithPort()<<(dc
->d_source
!= dc
->d_remote
? " (via "+dc
->d_remote
.toStringWithPort()+")" : "")<<" based on policy"<<endl
;
2336 g_stats
.policyDrops
++;
2341 if(dc
->d_mdp
.d_header
.qr
) {
2342 g_stats
.ignoredCount
++;
2343 if(g_logCommonErrors
) {
2344 g_log
<<Logger::Error
<<"Ignoring answer from TCP client "<< dc
->getRemote() <<" on server socket!"<<endl
;
2348 if(dc
->d_mdp
.d_header
.opcode
) {
2349 g_stats
.ignoredCount
++;
2350 if(g_logCommonErrors
) {
2351 g_log
<<Logger::Error
<<"Ignoring non-query opcode from TCP client "<< dc
->getRemote() <<" on server socket!"<<endl
;
2355 else if (dh
->qdcount
== 0) {
2356 g_stats
.emptyQueriesCount
++;
2357 if(g_logCommonErrors
) {
2358 g_log
<<Logger::Error
<<"Ignoring empty (qdcount == 0) query from "<< dc
->getRemote() <<" on server socket!"<<endl
;
2364 ++g_stats
.tcpqcounter
;
2365 ++conn
->d_requestsInFlight
;
2366 if (conn
->d_requestsInFlight
>= TCPConnection::s_maxInFlight
) {
2367 t_fdm
->removeReadFD(fd
); // should no longer awake ourselves when there is data to read
2369 Utility::gettimeofday(&g_now
, 0); // needed?
2370 struct timeval ttd
= g_now
;
2371 t_fdm
->setReadTTD(fd
, ttd
, g_tcpTimeout
);
2373 MT
->makeThread(startDoResolve
, dc
.release()); // deletes dc
2380 static bool expectProxyProtocol(const ComboAddress
& from
)
2382 return g_proxyProtocolACL
.match(from
);
2385 //! Handle new incoming TCP connection
2386 static void handleNewTCPQuestion(int fd
, FDMultiplexer::funcparam_t
& )
2389 socklen_t addrlen
=sizeof(addr
);
2390 int newsock
=accept(fd
, (struct sockaddr
*)&addr
, &addrlen
);
2392 if(MT
->numProcesses() > g_maxMThreads
) {
2393 g_stats
.overCapacityDrops
++;
2395 closesocket(newsock
);
2397 catch(const PDNSException
& e
) {
2398 g_log
<<Logger::Error
<<"Error closing TCP socket after an over capacity drop: "<<e
.reason
<<endl
;
2404 t_remotes
->push_back(addr
);
2407 bool fromProxyProtocolSource
= expectProxyProtocol(addr
);
2408 if(t_allowFrom
&& !t_allowFrom
->match(&addr
) && !fromProxyProtocolSource
) {
2410 g_log
<<Logger::Error
<<"["<<MT
->getTid()<<"] dropping TCP query from "<<addr
.toString()<<", address neither matched by allow-from nor proxy-protocol-from"<<endl
;
2412 g_stats
.unauthorizedTCP
++;
2414 closesocket(newsock
);
2416 catch(const PDNSException
& e
) {
2417 g_log
<<Logger::Error
<<"Error closing TCP socket after an ACL drop: "<<e
.reason
<<endl
;
2422 if(g_maxTCPPerClient
&& t_tcpClientCounts
->count(addr
) && (*t_tcpClientCounts
)[addr
] >= g_maxTCPPerClient
) {
2423 g_stats
.tcpClientOverflow
++;
2425 closesocket(newsock
); // don't call TCPConnection::closeAndCleanup here - did not enter it in the counts yet!
2427 catch(const PDNSException
& e
) {
2428 g_log
<<Logger::Error
<<"Error closing TCP socket after an overflow drop: "<<e
.reason
<<endl
;
2433 setNonBlocking(newsock
);
2434 std::shared_ptr
<TCPConnection
> tc
= std::make_shared
<TCPConnection
>(newsock
, addr
);
2435 tc
->d_source
= addr
;
2436 tc
->d_destination
.reset();
2437 tc
->d_destination
.sin4
.sin_family
= addr
.sin4
.sin_family
;
2438 socklen_t len
= tc
->d_destination
.getSocklen();
2439 getsockname(tc
->getFD(), reinterpret_cast<sockaddr
*>(&tc
->d_destination
), &len
); // if this fails, we're ok with it
2441 if (fromProxyProtocolSource
) {
2442 tc
->proxyProtocolNeed
= s_proxyProtocolMinimumHeaderSize
;
2443 tc
->data
.resize(tc
->proxyProtocolNeed
);
2444 tc
->state
= TCPConnection::PROXYPROTOCOLHEADER
;
2447 tc
->state
= TCPConnection::BYTE0
;
2451 Utility::gettimeofday(&ttd
, 0);
2452 ttd
.tv_sec
+= g_tcpTimeout
;
2454 t_fdm
->addReadFD(tc
->getFD(), handleRunningTCPQuestion
, tc
, &ttd
);
2458 static string
* doProcessUDPQuestion(const std::string
& question
, const ComboAddress
& fromaddr
, const ComboAddress
& destaddr
, ComboAddress source
, ComboAddress destination
, struct timeval tv
, int fd
, std::vector
<ProxyProtocolValue
>& proxyProtocolValues
)
2460 gettimeofday(&g_now
, 0);
2462 struct timeval diff
= g_now
- tv
;
2463 double delta
=(diff
.tv_sec
*1000 + diff
.tv_usec
/1000.0);
2465 if(delta
> 1000.0) {
2466 g_stats
.tooOldDrops
++;
2472 if(fromaddr
.sin4
.sin_family
==AF_INET6
)
2473 g_stats
.ipv6qcounter
++;
2476 const struct dnsheader
* dh
= (struct dnsheader
*)question
.c_str();
2477 unsigned int ctag
=0;
2479 bool needECS
= false;
2480 bool needXPF
= g_XPFAcl
.match(fromaddr
);
2481 std::unordered_set
<std::string
> policyTags
;
2482 LuaContext::LuaObject data
;
2487 bool logQuery
= false;
2488 bool logResponse
= false;
2489 #ifdef HAVE_PROTOBUF
2490 boost::uuids::uuid uniqueId
;
2491 auto luaconfsLocal
= g_luaconfs
.getLocal();
2492 if (checkProtobufExport(luaconfsLocal
)) {
2493 uniqueId
= getUniqueID();
2495 } else if (checkOutgoingProtobufExport(luaconfsLocal
)) {
2496 uniqueId
= getUniqueID();
2498 logQuery
= t_protobufServers
&& luaconfsLocal
->protobufExportConfig
.logQueries
;
2499 logResponse
= t_protobufServers
&& luaconfsLocal
->protobufExportConfig
.logResponses
;
2502 checkFrameStreamExport(luaconfsLocal
);
2504 EDNSSubnetOpts ednssubnet
;
2505 bool ecsFound
= false;
2506 bool ecsParsed
= false;
2507 uint16_t ecsBegin
= 0;
2508 uint16_t ecsEnd
= 0;
2509 std::vector
<DNSRecord
> records
;
2510 boost::optional
<int> rcode
= boost::none
;
2511 uint32_t ttlCap
= std::numeric_limits
<uint32_t>::max();
2512 bool variable
= false;
2513 bool followCNAMEs
= false;
2519 bool qnameParsed
=false;
2522 static uint64_t last=0;
2524 g_mtracer->clearAllocators();
2525 cout<<g_mtracer->getAllocs()-last<<" "<<g_mtracer->getNumOut()<<" -- BEGIN TRACE"<<endl;
2526 last=g_mtracer->getAllocs();
2527 cout<<g_mtracer->topAllocatorsString()<<endl;
2528 g_mtracer->clearAllocators();
2532 if(needECS
|| needXPF
|| (t_pdl
&& (t_pdl
->d_gettag
|| t_pdl
->d_gettag_ffi
))) {
2534 EDNSOptionViewMap ednsOptions
;
2535 bool xpfFound
= false;
2539 getQNameAndSubnet(question
, &qname
, &qtype
, &qclass
,
2540 ecsFound
, &ednssubnet
, g_gettagNeedsEDNSOptions
? &ednsOptions
: nullptr,
2541 xpfFound
, needXPF
? &source
: nullptr, needXPF
? &destination
: nullptr);
2548 if (t_pdl
->d_gettag_ffi
) {
2549 ctag
= t_pdl
->gettag_ffi(source
, ednssubnet
.source
, destination
, qname
, qtype
, &policyTags
, records
, data
, ednsOptions
, false, proxyProtocolValues
, requestorId
, deviceId
, deviceName
, routingTag
, rcode
, ttlCap
, variable
, logQuery
, logResponse
, followCNAMEs
);
2551 else if (t_pdl
->d_gettag
) {
2552 ctag
= t_pdl
->gettag(source
, ednssubnet
.source
, destination
, qname
, qtype
, &policyTags
, data
, ednsOptions
, false, requestorId
, deviceId
, deviceName
, routingTag
, proxyProtocolValues
);
2555 catch(const std::exception
& e
) {
2556 if(g_logCommonErrors
)
2557 g_log
<<Logger::Warning
<<"Error parsing a query packet qname='"<<qname
<<"' for tag determination, setting tag=0: "<<e
.what()<<endl
;
2561 catch(const std::exception
& e
)
2563 if(g_logCommonErrors
)
2564 g_log
<<Logger::Warning
<<"Error parsing a query packet for tag determination, setting tag=0: "<<e
.what()<<endl
;
2568 bool cacheHit
= false;
2569 boost::optional
<RecProtoBufMessage
> pbMessage(boost::none
);
2570 #ifdef HAVE_PROTOBUF
2571 if (t_protobufServers
) {
2572 pbMessage
= RecProtoBufMessage(DNSProtoBufMessage::DNSProtoBufMessageType::Response
);
2573 pbMessage
->setServerIdentity(SyncRes::s_serverID
);
2574 if (logQuery
&& !(luaconfsLocal
->protobufExportConfig
.taggedOnly
&& policyTags
.empty())) {
2575 protobufLogQuery(luaconfsLocal
->protobufMaskV4
, luaconfsLocal
->protobufMaskV6
, uniqueId
, source
, destination
, ednssubnet
.source
, false, dh
->id
, question
.size(), qname
, qtype
, qclass
, policyTags
, requestorId
, deviceId
, deviceName
);
2578 #endif /* HAVE_PROTOBUF */
2580 /* It might seem like a good idea to skip the packet cache lookup if we know that the answer is not cacheable,
2581 but it means that the hash would not be computed. If some script decides at a later time to mark back the answer
2582 as cacheable we would cache it with a wrong tag, so better safe than sorry. */
2585 cacheHit
= (!SyncRes::s_nopacketcache
&& t_packetCache
->getResponsePacket(ctag
, question
, qname
, qtype
, qclass
, g_now
.tv_sec
, &response
, &age
, &valState
, &qhash
, &ecsBegin
, &ecsEnd
, pbMessage
? &(*pbMessage
) : nullptr));
2588 cacheHit
= (!SyncRes::s_nopacketcache
&& t_packetCache
->getResponsePacket(ctag
, question
, qname
, &qtype
, &qclass
, g_now
.tv_sec
, &response
, &age
, &valState
, &qhash
, &ecsBegin
, &ecsEnd
, pbMessage
? &(*pbMessage
) : nullptr));
2592 if(valState
== Bogus
) {
2594 t_bogusremotes
->push_back(source
);
2595 if(t_bogusqueryring
)
2596 t_bogusqueryring
->push_back(make_pair(qname
, qtype
));
2599 #ifdef HAVE_PROTOBUF
2600 if(t_protobufServers
&& logResponse
&& !(luaconfsLocal
->protobufExportConfig
.taggedOnly
&& pbMessage
->getAppliedPolicy().empty() && pbMessage
->getPolicyTags().empty())) {
2601 Netmask
requestorNM(source
, source
.sin4
.sin_family
== AF_INET
? luaconfsLocal
->protobufMaskV4
: luaconfsLocal
->protobufMaskV6
);
2602 ComboAddress requestor
= requestorNM
.getMaskedNetwork();
2603 requestor
.setPort(source
.getPort());
2604 pbMessage
->update(uniqueId
, &requestor
, &destination
, false, dh
->id
);
2605 pbMessage
->setEDNSSubnet(ednssubnet
.source
, ednssubnet
.source
.isIPv4() ? luaconfsLocal
->protobufMaskV4
: luaconfsLocal
->protobufMaskV6
);
2606 if (g_useKernelTimestamp
&& tv
.tv_sec
) {
2607 pbMessage
->setQueryTime(tv
.tv_sec
, tv
.tv_usec
);
2610 pbMessage
->setQueryTime(g_now
.tv_sec
, g_now
.tv_usec
);
2612 pbMessage
->setRequestorId(requestorId
);
2613 pbMessage
->setDeviceId(deviceId
);
2614 pbMessage
->setDeviceName(deviceName
);
2615 protobufLogResponse(*pbMessage
);
2617 #endif /* HAVE_PROTOBUF */
2619 g_log
<<Logger::Notice
<<t_id
<< " question answered from packet cache tag="<<ctag
<<" from "<<source
.toStringWithPort()<<(source
!= fromaddr
? " (via "+fromaddr
.toStringWithPort()+")" : "")<<endl
;
2621 g_stats
.packetCacheHits
++;
2622 SyncRes::s_queries
++;
2623 ageDNSPacket(response
, age
);
2626 cmsgbuf_aligned cbuf
;
2627 fillMSGHdr(&msgh
, &iov
, &cbuf
, 0, (char*)response
.c_str(), response
.length(), const_cast<ComboAddress
*>(&fromaddr
));
2628 msgh
.msg_control
=NULL
;
2630 if(g_fromtosockets
.count(fd
)) {
2631 addCMsgSrcAddr(&msgh
, &cbuf
, &destaddr
, 0);
2633 if(sendmsg(fd
, &msgh
, 0) < 0 && g_logCommonErrors
) {
2635 g_log
<< Logger::Warning
<< "Sending UDP reply to client " << source
.toStringWithPort()
2636 << (source
!= fromaddr
? " (via " + fromaddr
.toStringWithPort() + ")" : "") << " failed with: "
2637 << strerror(err
) << endl
;
2639 if(response
.length() >= sizeof(struct dnsheader
)) {
2640 struct dnsheader tmpdh
;
2641 memcpy(&tmpdh
, response
.c_str(), sizeof(tmpdh
));
2642 updateResponseStats(tmpdh
.rcode
, source
, response
.length(), 0, 0);
2644 g_stats
.avgLatencyUsec
=(1-1.0/g_latencyStatSize
)*g_stats
.avgLatencyUsec
+ 0.0; // we assume 0 usec
2645 g_stats
.avgLatencyOursUsec
=(1-1.0/g_latencyStatSize
)*g_stats
.avgLatencyOursUsec
+ 0.0; // we assume 0 usec
2649 catch(std::exception
& e
) {
2650 if(g_logCommonErrors
)
2651 g_log
<<Logger::Error
<<"Error processing or aging answer packet: "<<e
.what()<<endl
;
2656 if(t_pdl
->ipfilter(source
, destination
, *dh
)) {
2658 g_log
<<Logger::Notice
<<t_id
<<" ["<<MT
->getTid()<<"/"<<MT
->numProcesses()<<"] DROPPED question from "<<source
.toStringWithPort()<<(source
!= fromaddr
? " (via "+fromaddr
.toStringWithPort()+")" : "")<<" based on policy"<<endl
;
2659 g_stats
.policyDrops
++;
2664 if(MT
->numProcesses() > g_maxMThreads
) {
2666 g_log
<<Logger::Notice
<<t_id
<<" ["<<MT
->getTid()<<"/"<<MT
->numProcesses()<<"] DROPPED question from "<<source
.toStringWithPort()<<(source
!= fromaddr
? " (via "+fromaddr
.toStringWithPort()+")" : "")<<", over capacity"<<endl
;
2668 g_stats
.overCapacityDrops
++;
2672 auto dc
= std::unique_ptr
<DNSComboWriter
>(new DNSComboWriter(question
, g_now
, std::move(policyTags
), std::move(data
), std::move(records
)));
2676 dc
->setRemote(fromaddr
);
2677 dc
->setSource(source
);
2678 dc
->setLocal(destaddr
);
2679 dc
->setDestination(destination
);
2681 dc
->d_ecsFound
= ecsFound
;
2682 dc
->d_ecsParsed
= ecsParsed
;
2683 dc
->d_ecsBegin
= ecsBegin
;
2684 dc
->d_ecsEnd
= ecsEnd
;
2685 dc
->d_ednssubnet
= ednssubnet
;
2686 dc
->d_ttlCap
= ttlCap
;
2687 dc
->d_variable
= variable
;
2688 dc
->d_followCNAMERecords
= followCNAMEs
;
2689 dc
->d_rcode
= rcode
;
2690 dc
->d_logResponse
= logResponse
;
2691 #ifdef HAVE_PROTOBUF
2692 if (t_protobufServers
|| t_outgoingProtobufServers
) {
2693 dc
->d_uuid
= std::move(uniqueId
);
2695 dc
->d_requestorId
= requestorId
;
2696 dc
->d_deviceId
= deviceId
;
2697 dc
->d_deviceName
= deviceName
;
2698 dc
->d_kernelTimestamp
= tv
;
2700 dc
->d_proxyProtocolValues
= std::move(proxyProtocolValues
);
2701 dc
->d_routingTag
= std::move(routingTag
);
2703 MT
->makeThread(startDoResolve
, (void*) dc
.release()); // deletes dc
2708 static void handleNewUDPQuestion(int fd
, FDMultiplexer::funcparam_t
& var
)
2711 static const size_t maxIncomingQuerySize
= g_proxyProtocolACL
.empty() ? 512 : (512 + g_proxyProtocolMaximumSize
);
2712 static thread_local
std::string data
;
2713 ComboAddress fromaddr
;
2714 ComboAddress source
;
2715 ComboAddress destination
;
2718 cmsgbuf_aligned cbuf
;
2719 bool firstQuery
= true;
2720 std::vector
<ProxyProtocolValue
> proxyProtocolValues
;
2722 for(size_t queriesCounter
= 0; queriesCounter
< s_maxUDPQueriesPerRound
; queriesCounter
++) {
2723 bool proxyProto
= false;
2724 data
.resize(maxIncomingQuerySize
);
2725 fromaddr
.sin6
.sin6_family
=AF_INET6
; // this makes sure fromaddr is big enough
2726 fillMSGHdr(&msgh
, &iov
, &cbuf
, sizeof(cbuf
), &data
[0], data
.size(), &fromaddr
);
2728 if((len
=recvmsg(fd
, &msgh
, 0)) >= 0) {
2732 if (msgh
.msg_flags
& MSG_TRUNC
) {
2733 g_stats
.truncatedDrops
++;
2735 g_log
<<Logger::Error
<<"Ignoring truncated query from "<<fromaddr
.toString()<<endl
;
2740 data
.resize(static_cast<size_t>(len
));
2742 if (expectProxyProtocol(fromaddr
)) {
2744 ssize_t used
= parseProxyHeader(data
, proxyProto
, source
, destination
, tcp
, proxyProtocolValues
);
2746 ++g_stats
.proxyProtocolInvalidCount
;
2748 g_log
<<Logger::Error
<<"Ignoring invalid proxy protocol ("<<std::to_string(len
)<<", "<<std::to_string(used
)<<") query from "<<fromaddr
.toStringWithPort()<<endl
;
2752 else if (static_cast<size_t>(used
) > g_proxyProtocolMaximumSize
) {
2754 g_log
<<Logger::Error
<<"Proxy protocol header in UDP packet from "<< fromaddr
.toStringWithPort() << " is larger than proxy-protocol-maximum-size (" << used
<< "), dropping"<< endl
;
2756 ++g_stats
.proxyProtocolInvalidCount
;
2760 data
.erase(0, used
);
2762 else if (len
> 512) {
2763 /* we only allow UDP packets larger than 512 for those with a proxy protocol header */
2764 g_stats
.truncatedDrops
++;
2766 g_log
<<Logger::Error
<<"Ignoring truncated query from "<<fromaddr
.toStringWithPort()<<endl
;
2771 if (data
.size() < sizeof(dnsheader
)) {
2772 g_stats
.ignoredCount
++;
2774 g_log
<<Logger::Error
<<"Ignoring too-short ("<<std::to_string(data
.size())<<") query from "<<fromaddr
.toString()<<endl
;
2784 t_remotes
->push_back(fromaddr
);
2787 if(t_allowFrom
&& !t_allowFrom
->match(&source
)) {
2789 g_log
<<Logger::Error
<<"["<<MT
->getTid()<<"] dropping UDP query from "<<source
.toString()<<", address not matched by allow-from"<<endl
;
2792 g_stats
.unauthorizedUDP
++;
2796 BOOST_STATIC_ASSERT(offsetof(sockaddr_in
, sin_port
) == offsetof(sockaddr_in6
, sin6_port
));
2797 if(!fromaddr
.sin4
.sin_port
) { // also works for IPv6
2799 g_log
<<Logger::Error
<<"["<<MT
->getTid()<<"] dropping UDP query from "<<fromaddr
.toStringWithPort()<<", can't deal with port 0"<<endl
;
2802 g_stats
.clientParseError
++; // not quite the best place to put it, but needs to go somewhere
2807 dnsheader
* dh
=(dnsheader
*)&data
[0];
2810 g_stats
.ignoredCount
++;
2811 if(g_logCommonErrors
) {
2812 g_log
<<Logger::Error
<<"Ignoring answer from "<<fromaddr
.toString()<<" on server socket!"<<endl
;
2815 else if(dh
->opcode
) {
2816 g_stats
.ignoredCount
++;
2817 if(g_logCommonErrors
) {
2818 g_log
<<Logger::Error
<<"Ignoring non-query opcode "<<dh
->opcode
<<" from "<<fromaddr
.toString()<<" on server socket!"<<endl
;
2821 else if (dh
->qdcount
== 0) {
2822 g_stats
.emptyQueriesCount
++;
2823 if(g_logCommonErrors
) {
2824 g_log
<<Logger::Error
<<"Ignoring empty (qdcount == 0) query from "<<fromaddr
.toString()<<" on server socket!"<<endl
;
2828 struct timeval tv
={0,0};
2829 HarvestTimestamp(&msgh
, &tv
);
2831 dest
.reset(); // this makes sure we ignore this address if not returned by recvmsg above
2832 auto loc
= rplookup(g_listenSocketsAddresses
, fd
);
2833 if(HarvestDestinationAddress(&msgh
, &dest
)) {
2834 // but.. need to get port too
2836 dest
.sin4
.sin_port
= loc
->sin4
.sin_port
;
2844 dest
.sin4
.sin_family
= fromaddr
.sin4
.sin_family
;
2845 socklen_t slen
= dest
.getSocklen();
2846 getsockname(fd
, (sockaddr
*)&dest
, &slen
); // if this fails, we're ok with it
2853 if(g_weDistributeQueries
) {
2854 distributeAsyncFunction(data
, boost::bind(doProcessUDPQuestion
, data
, fromaddr
, dest
, source
, destination
, tv
, fd
, proxyProtocolValues
));
2857 ++s_threadInfos
[t_id
].numberOfDistributedQueries
;
2858 doProcessUDPQuestion(data
, fromaddr
, dest
, source
, destination
, tv
, fd
, proxyProtocolValues
);
2862 catch(const MOADNSException
&mde
) {
2863 g_stats
.clientParseError
++;
2864 if(g_logCommonErrors
) {
2865 g_log
<<Logger::Error
<<"Unable to parse packet from remote UDP client "<<fromaddr
.toString() <<": "<<mde
.what()<<endl
;
2868 catch(const std::runtime_error
& e
) {
2869 g_stats
.clientParseError
++;
2870 if(g_logCommonErrors
) {
2871 g_log
<<Logger::Error
<<"Unable to parse packet from remote UDP client "<<fromaddr
.toString() <<": "<<e
.what()<<endl
;
2876 // cerr<<t_id<<" had error: "<<stringerror()<<endl;
2877 if(firstQuery
&& errno
== EAGAIN
) {
2878 g_stats
.noPacketError
++;
2886 static void makeTCPServerSockets(deferredAdd_t
& deferredAdds
, std::set
<int>& tcpSockets
)
2889 vector
<string
>locals
;
2890 stringtok(locals
,::arg()["local-address"]," ,");
2893 throw PDNSException("No local address specified");
2895 for(vector
<string
>::const_iterator i
=locals
.begin();i
!=locals
.end();++i
) {
2897 st
.port
=::arg().asNum("local-port");
2898 parseService(*i
, st
);
2903 sin
.sin4
.sin_family
= AF_INET
;
2904 if(!IpToU32(st
.host
, (uint32_t*)&sin
.sin4
.sin_addr
.s_addr
)) {
2905 sin
.sin6
.sin6_family
= AF_INET6
;
2906 if(makeIPv6sockaddr(st
.host
, &sin
.sin6
) < 0)
2907 throw PDNSException("Unable to resolve local address for TCP server on '"+ st
.host
+"'");
2910 fd
=socket(sin
.sin6
.sin6_family
, SOCK_STREAM
, 0);
2912 throw PDNSException("Making a TCP server socket for resolver: "+stringerror());
2917 if(setsockopt(fd
, SOL_SOCKET
, SO_REUSEADDR
, &tmp
, sizeof tmp
)<0) {
2918 g_log
<<Logger::Error
<<"Setsockopt failed for TCP listening socket"<<endl
;
2921 if(sin
.sin6
.sin6_family
== AF_INET6
&& setsockopt(fd
, IPPROTO_IPV6
, IPV6_V6ONLY
, &tmp
, sizeof(tmp
)) < 0) {
2923 g_log
<<Logger::Error
<<"Failed to set IPv6 socket to IPv6 only, continuing anyhow: "<<strerror(err
)<<endl
;
2926 #ifdef TCP_DEFER_ACCEPT
2927 if(setsockopt(fd
, IPPROTO_TCP
, TCP_DEFER_ACCEPT
, &tmp
, sizeof tmp
) >= 0) {
2928 if(i
==locals
.begin())
2929 g_log
<<Logger::Info
<<"Enabled TCP data-ready filter for (slight) DoS protection"<<endl
;
2933 if( ::arg().mustDo("non-local-bind") )
2934 Utility::setBindAny(AF_INET
, fd
);
2938 if(setsockopt(fd
, SOL_SOCKET
, SO_REUSEPORT
, &tmp
, sizeof(tmp
)) < 0)
2939 throw PDNSException("SO_REUSEPORT: "+stringerror());
2943 if (::arg().asNum("tcp-fast-open") > 0) {
2945 int fastOpenQueueSize
= ::arg().asNum("tcp-fast-open");
2946 if (setsockopt(fd
, IPPROTO_TCP
, TCP_FASTOPEN
, &fastOpenQueueSize
, sizeof fastOpenQueueSize
) < 0) {
2948 g_log
<<Logger::Error
<<"Failed to enable TCP Fast Open for listening socket: "<<strerror(err
)<<endl
;
2951 g_log
<<Logger::Warning
<<"TCP Fast Open configured but not supported for listening socket"<<endl
;
2955 sin
.sin4
.sin_port
= htons(st
.port
);
2956 socklen_t socklen
=sin
.sin4
.sin_family
==AF_INET
? sizeof(sin
.sin4
) : sizeof(sin
.sin6
);
2957 if (::bind(fd
, (struct sockaddr
*)&sin
, socklen
)<0)
2958 throw PDNSException("Binding TCP server socket for "+ st
.host
+": "+stringerror());
2961 setSocketSendBuffer(fd
, 65000);
2963 deferredAdds
.push_back(make_pair(fd
, handleNewTCPQuestion
));
2964 tcpSockets
.insert(fd
);
2966 // we don't need to update g_listenSocketsAddresses since it doesn't work for TCP/IP:
2967 // - fd is not that which we know here, but returned from accept()
2968 if(sin
.sin4
.sin_family
== AF_INET
)
2969 g_log
<<Logger::Info
<<"Listening for TCP queries on "<< sin
.toString() <<":"<<st
.port
<<endl
;
2971 g_log
<<Logger::Info
<<"Listening for TCP queries on ["<< sin
.toString() <<"]:"<<st
.port
<<endl
;
2975 static void makeUDPServerSockets(deferredAdd_t
& deferredAdds
)
2978 vector
<string
>locals
;
2979 stringtok(locals
,::arg()["local-address"]," ,");
2982 throw PDNSException("No local address specified");
2984 for(vector
<string
>::const_iterator i
=locals
.begin();i
!=locals
.end();++i
) {
2986 st
.port
=::arg().asNum("local-port");
2987 parseService(*i
, st
);
2992 sin
.sin4
.sin_family
= AF_INET
;
2993 if(!IpToU32(st
.host
.c_str() , (uint32_t*)&sin
.sin4
.sin_addr
.s_addr
)) {
2994 sin
.sin6
.sin6_family
= AF_INET6
;
2995 if(makeIPv6sockaddr(st
.host
, &sin
.sin6
) < 0)
2996 throw PDNSException("Unable to resolve local address for UDP server on '"+ st
.host
+"'");
2999 int fd
=socket(sin
.sin4
.sin_family
, SOCK_DGRAM
, 0);
3001 throw PDNSException("Making a UDP server socket for resolver: "+stringerror());
3003 if (!setSocketTimestamps(fd
))
3004 g_log
<<Logger::Warning
<<"Unable to enable timestamp reporting for socket"<<endl
;
3006 if(IsAnyAddress(sin
)) {
3007 if(sin
.sin4
.sin_family
== AF_INET
)
3008 if(!setsockopt(fd
, IPPROTO_IP
, GEN_IP_PKTINFO
, &one
, sizeof(one
))) // linux supports this, so why not - might fail on other systems
3009 g_fromtosockets
.insert(fd
);
3010 #ifdef IPV6_RECVPKTINFO
3011 if(sin
.sin4
.sin_family
== AF_INET6
)
3012 if(!setsockopt(fd
, IPPROTO_IPV6
, IPV6_RECVPKTINFO
, &one
, sizeof(one
)))
3013 g_fromtosockets
.insert(fd
);
3015 if(sin
.sin6
.sin6_family
== AF_INET6
&& setsockopt(fd
, IPPROTO_IPV6
, IPV6_V6ONLY
, &one
, sizeof(one
)) < 0) {
3017 g_log
<<Logger::Error
<<"Failed to set IPv6 socket to IPv6 only, continuing anyhow: "<<strerror(err
)<<endl
;
3020 if( ::arg().mustDo("non-local-bind") )
3021 Utility::setBindAny(AF_INET6
, fd
);
3025 setSocketReceiveBuffer(fd
, 250000);
3026 sin
.sin4
.sin_port
= htons(st
.port
);
3031 if(setsockopt(fd
, SOL_SOCKET
, SO_REUSEPORT
, &one
, sizeof(one
)) < 0)
3032 throw PDNSException("SO_REUSEPORT: "+stringerror());
3038 setSocketIgnorePMTU(fd
);
3040 catch(const std::exception
& e
) {
3041 g_log
<<Logger::Warning
<<"Failed to set IP_MTU_DISCOVER on UDP server socket: "<<e
.what()<<endl
;
3045 socklen_t socklen
=sin
.getSocklen();
3046 if (::bind(fd
, (struct sockaddr
*)&sin
, socklen
)<0)
3047 throw PDNSException("Resolver binding to server socket on port "+ std::to_string(st
.port
) +" for "+ st
.host
+": "+stringerror());
3051 deferredAdds
.push_back(make_pair(fd
, handleNewUDPQuestion
));
3052 g_listenSocketsAddresses
[fd
]=sin
; // this is written to only from the startup thread, not from the workers
3053 if(sin
.sin4
.sin_family
== AF_INET
)
3054 g_log
<<Logger::Info
<<"Listening for UDP queries on "<< sin
.toString() <<":"<<st
.port
<<endl
;
3056 g_log
<<Logger::Info
<<"Listening for UDP queries on ["<< sin
.toString() <<"]:"<<st
.port
<<endl
;
3060 static void daemonize(void)
3067 int i
=open("/dev/null",O_RDWR
); /* open stdin */
3069 g_log
<<Logger::Critical
<<"Unable to open /dev/null: "<<stringerror()<<endl
;
3071 dup2(i
,0); /* stdin */
3072 dup2(i
,1); /* stderr */
3073 dup2(i
,2); /* stderr */
3078 static void termIntHandler(int)
3083 static void usr1Handler(int)
3088 static void usr2Handler(int)
3091 SyncRes::setDefaultLogMode(g_quiet
? SyncRes::LogNone
: SyncRes::Log
);
3092 ::arg().set("quiet")=g_quiet
? "" : "no";
3095 static void doStats(void)
3097 static time_t lastOutputTime
;
3098 static uint64_t lastQueryCount
;
3100 uint64_t cacheHits
= s_RC
->cacheHits
;
3101 uint64_t cacheMisses
= s_RC
->cacheMisses
;
3102 uint64_t cacheSize
= s_RC
->size();
3103 auto rc_stats
= s_RC
->stats();
3104 double r
= rc_stats
.second
== 0 ? 0.0 : (100.0 * rc_stats
.first
/ rc_stats
.second
);
3106 if(g_stats
.qcounter
&& (cacheHits
+ cacheMisses
) && SyncRes::s_queries
&& SyncRes::s_outqueries
) {
3107 g_log
<<Logger::Notice
<<"stats: "<<g_stats
.qcounter
<<" questions, "<<
3108 cacheSize
<< " cache entries, "<<
3109 broadcastAccFunction
<uint64_t>(pleaseGetNegCacheSize
)<<" negative entries, "<<
3110 (int)((cacheHits
*100.0)/(cacheHits
+cacheMisses
))<<"% cache hits"<<endl
;
3111 g_log
<< Logger::Notice
<< "stats: cache contended/acquired " << rc_stats
.first
<< '/' << rc_stats
.second
<< " = " << r
<< '%' << endl
;
3113 g_log
<<Logger::Notice
<<"stats: throttle map: "
3114 << broadcastAccFunction
<uint64_t>(pleaseGetThrottleSize
) <<", ns speeds: "
3115 << broadcastAccFunction
<uint64_t>(pleaseGetNsSpeedsSize
)<<", failed ns: "
3116 << broadcastAccFunction
<uint64_t>(pleaseGetFailedServersSize
)<<", ednsmap: "
3117 <<broadcastAccFunction
<uint64_t>(pleaseGetEDNSStatusesSize
)<<endl
;
3118 g_log
<<Logger::Notice
<<"stats: outpacket/query ratio "<<(int)(SyncRes::s_outqueries
*100.0/SyncRes::s_queries
)<<"%";
3119 g_log
<<Logger::Notice
<<", "<<(int)(SyncRes::s_throttledqueries
*100.0/(SyncRes::s_outqueries
+SyncRes::s_throttledqueries
))<<"% throttled, "
3120 <<SyncRes::s_nodelegated
<<" no-delegation drops"<<endl
;
3121 g_log
<<Logger::Notice
<<"stats: "<<SyncRes::s_tcpoutqueries
<<" outgoing tcp connections, "<<
3122 broadcastAccFunction
<uint64_t>(pleaseGetConcurrentQueries
)<<" queries running, "<<SyncRes::s_outgoingtimeouts
<<" outgoing timeouts"<<endl
;
3124 //g_log<<Logger::Notice<<"stats: "<<g_stats.ednsPingMatches<<" ping matches, "<<g_stats.ednsPingMismatches<<" mismatches, "<<
3125 //g_stats.noPingOutQueries<<" outqueries w/o ping, "<< g_stats.noEdnsOutQueries<<" w/o EDNS"<<endl;
3127 g_log
<<Logger::Notice
<<"stats: " << broadcastAccFunction
<uint64_t>(pleaseGetPacketCacheSize
) <<
3128 " packet cache entries, "<<(int)(100.0*broadcastAccFunction
<uint64_t>(pleaseGetPacketCacheHits
)/SyncRes::s_queries
) << "% packet cache hits"<<endl
;
3131 for (const auto& threadInfo
: s_threadInfos
) {
3132 if(threadInfo
.isWorker
) {
3133 g_log
<<Logger::Notice
<<"stats: thread "<<idx
<<" has been distributed "<<threadInfo
.numberOfDistributedQueries
<<" queries"<<endl
;
3138 time_t now
= time(0);
3139 if(lastOutputTime
&& lastQueryCount
&& now
!= lastOutputTime
) {
3140 g_log
<<Logger::Notice
<<"stats: "<< (SyncRes::s_queries
- lastQueryCount
) / (now
- lastOutputTime
) <<" qps (average over "<< (now
- lastOutputTime
) << " seconds)"<<endl
;
3142 lastOutputTime
= now
;
3143 lastQueryCount
= SyncRes::s_queries
;
3145 else if(statsWanted
)
3146 g_log
<<Logger::Notice
<<"stats: no stats yet!"<<endl
;
3151 static void houseKeeping(void *)
3153 static thread_local
time_t last_rootupdate
, last_secpoll
, last_trustAnchorUpdate
{0}, last_RC_prune
;
3154 static thread_local
struct timeval last_prune
;
3156 static thread_local
int cleanCounter
=0;
3157 static thread_local
bool s_running
; // houseKeeping can get suspended in secpoll, and be restarted, which makes us do duplicate work
3158 auto luaconfsLocal
= g_luaconfs
.getLocal();
3160 if (last_trustAnchorUpdate
== 0 && !luaconfsLocal
->trustAnchorFileInfo
.fname
.empty() && luaconfsLocal
->trustAnchorFileInfo
.interval
!= 0) {
3161 // Loading the Lua config file already "refreshed" the TAs
3162 last_trustAnchorUpdate
= g_now
.tv_sec
+ luaconfsLocal
->trustAnchorFileInfo
.interval
* 3600;
3171 struct timeval now
, past
;
3172 Utility::gettimeofday(&now
, nullptr);
3175 if (last_prune
< past
) {
3176 t_packetCache
->doPruneTo(g_maxPacketCacheEntries
/ g_numWorkerThreads
);
3177 SyncRes::pruneNegCache(g_maxCacheEntries
/ (g_numWorkerThreads
* 10));
3180 if(!((cleanCounter
++)%40)) { // this is a full scan!
3181 limit
=now
.tv_sec
-300;
3182 SyncRes::pruneNSSpeeds(limit
);
3184 limit
= now
.tv_sec
- SyncRes::s_serverdownthrottletime
* 10;
3185 SyncRes::pruneFailedServers(limit
);
3186 limit
= now
.tv_sec
- 2*3600;
3187 SyncRes::pruneEDNSStatuses(limit
);
3188 SyncRes::pruneThrottledServers();
3189 Utility::gettimeofday(&last_prune
, nullptr);
3192 if(isHandlerThread()) {
3193 if (now
.tv_sec
- last_RC_prune
> 5) {
3194 s_RC
->doPrune(g_maxCacheEntries
);
3195 last_RC_prune
= now
.tv_sec
;
3198 if(now
.tv_sec
- last_rootupdate
> 7200) {
3199 int res
= SyncRes::getRootNS(g_now
, nullptr);
3201 last_rootupdate
=now
.tv_sec
;
3202 primeRootNSZones(g_dnssecmode
!= DNSSECMode::Off
);
3206 if(now
.tv_sec
- last_secpoll
>= 3600) {
3208 doSecPoll(&last_secpoll
);
3210 catch(const std::exception
& e
)
3212 g_log
<<Logger::Error
<<"Exception while performing security poll: "<<e
.what()<<endl
;
3214 catch(const PDNSException
& e
)
3216 g_log
<<Logger::Error
<<"Exception while performing security poll: "<<e
.reason
<<endl
;
3218 catch(const ImmediateServFailException
&e
)
3220 g_log
<<Logger::Error
<<"Exception while performing security poll: "<<e
.reason
<<endl
;
3222 catch(const PolicyHitException
& e
) {
3223 g_log
<<Logger::Error
<<"Policy hit while performing security poll"<<endl
;
3227 g_log
<<Logger::Error
<<"Exception while performing security poll"<<endl
;
3231 if (!luaconfsLocal
->trustAnchorFileInfo
.fname
.empty() && luaconfsLocal
->trustAnchorFileInfo
.interval
!= 0 &&
3232 g_now
.tv_sec
- last_trustAnchorUpdate
>= (luaconfsLocal
->trustAnchorFileInfo
.interval
* 3600)) {
3233 g_log
<<Logger::Debug
<<"Refreshing Trust Anchors from file"<<endl
;
3235 map
<DNSName
, dsmap_t
> dsAnchors
;
3236 if (updateTrustAnchorsFromFile(luaconfsLocal
->trustAnchorFileInfo
.fname
, dsAnchors
)) {
3237 g_luaconfs
.modify([&dsAnchors
](LuaConfigItems
& lci
) {
3238 lci
.dsAnchors
= dsAnchors
;
3241 last_trustAnchorUpdate
= now
.tv_sec
;
3242 } catch (const PDNSException
&pe
) {
3243 g_log
<<Logger::Error
<<"Unable to update Trust Anchors: "<<pe
.reason
<<endl
;
3249 catch(PDNSException
& ae
)
3252 g_log
<<Logger::Error
<<"Fatal error in housekeeping thread: "<<ae
.reason
<<endl
;
3257 static void makeThreadPipes()
3259 auto pipeBufferSize
= ::arg().asNum("distribution-pipe-buffer-size");
3260 if (pipeBufferSize
> 0) {
3261 g_log
<<Logger::Info
<<"Resizing the buffer of the distribution pipe to "<<pipeBufferSize
<<endl
;
3264 /* thread 0 is the handler / SNMP, we start at 1 */
3265 for(unsigned int n
= 1; n
<= (g_numWorkerThreads
+ g_numDistributorThreads
); ++n
) {
3266 auto& threadInfos
= s_threadInfos
.at(n
);
3270 unixDie("Creating pipe for inter-thread communications");
3272 threadInfos
.pipes
.readToThread
= fd
[0];
3273 threadInfos
.pipes
.writeToThread
= fd
[1];
3276 unixDie("Creating pipe for inter-thread communications");
3278 threadInfos
.pipes
.readFromThread
= fd
[0];
3279 threadInfos
.pipes
.writeFromThread
= fd
[1];
3282 unixDie("Creating pipe for inter-thread communications");
3284 threadInfos
.pipes
.readQueriesToThread
= fd
[0];
3285 threadInfos
.pipes
.writeQueriesToThread
= fd
[1];
3287 if (pipeBufferSize
> 0) {
3288 if (!setPipeBufferSize(threadInfos
.pipes
.writeQueriesToThread
, pipeBufferSize
)) {
3290 g_log
<<Logger::Warning
<<"Error resizing the buffer of the distribution pipe for thread "<<n
<<" to "<<pipeBufferSize
<<": "<<strerror(err
)<<endl
;
3291 auto existingSize
= getPipeBufferSize(threadInfos
.pipes
.writeQueriesToThread
);
3292 if (existingSize
> 0) {
3293 g_log
<<Logger::Warning
<<"The current size of the distribution pipe's buffer for thread "<<n
<<" is "<<existingSize
<<endl
;
3298 if (!setNonBlocking(threadInfos
.pipes
.writeQueriesToThread
)) {
3299 unixDie("Making pipe for inter-thread communications non-blocking");
3310 void broadcastFunction(const pipefunc_t
& func
)
3312 /* This function might be called by the worker with t_id 0 during startup
3313 for the initialization of ACLs and domain maps. After that it should only
3314 be called by the handler. */
3316 if (s_threadInfos
.empty() && isHandlerThread()) {
3317 /* the handler and distributors will call themselves below, but
3318 during startup we get called while s_threadInfos has not been
3319 populated yet to update the ACL or domain maps, so we need to
3326 for (const auto& threadInfo
: s_threadInfos
) {
3328 func(); // don't write to ourselves!
3332 ThreadMSG
* tmsg
= new ThreadMSG();
3334 tmsg
->wantAnswer
= true;
3335 if(write(threadInfo
.pipes
.writeToThread
, &tmsg
, sizeof(tmsg
)) != sizeof(tmsg
)) {
3338 unixDie("write to thread pipe returned wrong size or error");
3341 string
* resp
= nullptr;
3342 if(read(threadInfo
.pipes
.readFromThread
, &resp
, sizeof(resp
)) != sizeof(resp
))
3343 unixDie("read from thread pipe returned wrong size or error");
3352 static bool trySendingQueryToWorker(unsigned int target
, ThreadMSG
* tmsg
)
3354 auto& targetInfo
= s_threadInfos
[target
];
3355 if(!targetInfo
.isWorker
) {
3356 g_log
<<Logger::Error
<<"distributeAsyncFunction() tried to assign a query to a non-worker thread"<<endl
;
3360 const auto& tps
= targetInfo
.pipes
;
3362 ssize_t written
= write(tps
.writeQueriesToThread
, &tmsg
, sizeof(tmsg
));
3364 if (static_cast<size_t>(written
) != sizeof(tmsg
)) {
3366 unixDie("write to thread pipe returned wrong size or error");
3371 if (error
== EAGAIN
|| error
== EWOULDBLOCK
) {
3375 unixDie("write to thread pipe returned wrong size or error:" + std::to_string(error
));
3379 ++targetInfo
.numberOfDistributedQueries
;
3384 static unsigned int getWorkerLoad(size_t workerIdx
)
3386 const auto mt
= s_threadInfos
[/* skip handler */ 1 + g_numDistributorThreads
+ workerIdx
].mt
;
3387 if (mt
!= nullptr) {
3388 return mt
->numProcesses();
3393 static unsigned int selectWorker(unsigned int hash
)
3395 if (s_balancingFactor
== 0) {
3396 return /* skip handler */ 1 + g_numDistributorThreads
+ (hash
% g_numWorkerThreads
);
3399 /* we start with one, representing the query we are currently handling */
3400 double currentLoad
= 1;
3401 std::vector
<unsigned int> load(g_numWorkerThreads
);
3402 for (size_t idx
= 0; idx
< g_numWorkerThreads
; idx
++) {
3403 load
[idx
] = getWorkerLoad(idx
);
3404 currentLoad
+= load
[idx
];
3405 // cerr<<"load for worker "<<idx<<" is "<<load[idx]<<endl;
3408 double targetLoad
= (currentLoad
/ g_numWorkerThreads
) * s_balancingFactor
;
3409 // cerr<<"total load is "<<currentLoad<<", number of workers is "<<g_numWorkerThreads<<", target load is "<<targetLoad<<endl;
3411 unsigned int worker
= hash
% g_numWorkerThreads
;
3412 /* at least one server has to be at or below the average load */
3413 if (load
[worker
] > targetLoad
) {
3414 ++g_stats
.rebalancedQueries
;
3416 // cerr<<"worker "<<worker<<" is above the target load, selecting another one"<<endl;
3417 worker
= (worker
+ 1) % g_numWorkerThreads
;
3419 while(load
[worker
] > targetLoad
);
3422 return /* skip handler */ 1 + g_numDistributorThreads
+ worker
;
3425 // This function is only called by the distributor threads, when pdns-distributes-queries is set
3426 void distributeAsyncFunction(const string
& packet
, const pipefunc_t
& func
)
3428 if (!isDistributorThread()) {
3429 g_log
<<Logger::Error
<<"distributeAsyncFunction() has been called by a worker ("<<t_id
<<")"<<endl
;
3433 unsigned int hash
= hashQuestion(packet
.c_str(), packet
.length(), g_disthashseed
);
3434 unsigned int target
= selectWorker(hash
);
3436 ThreadMSG
* tmsg
= new ThreadMSG();
3438 tmsg
->wantAnswer
= false;
3440 if (!trySendingQueryToWorker(target
, tmsg
)) {
3441 /* if this function failed but did not raise an exception, it means that the pipe
3442 was full, let's try another one */
3443 unsigned int newTarget
= 0;
3445 newTarget
= /* skip handler */ 1 + g_numDistributorThreads
+ dns_random(g_numWorkerThreads
);
3446 } while (newTarget
== target
);
3448 if (!trySendingQueryToWorker(newTarget
, tmsg
)) {
3449 g_stats
.queryPipeFullDrops
++;
3455 static void handlePipeRequest(int fd
, FDMultiplexer::funcparam_t
& var
)
3457 ThreadMSG
* tmsg
= nullptr;
3459 if(read(fd
, &tmsg
, sizeof(tmsg
)) != sizeof(tmsg
)) { // fd == readToThread || fd == readQueriesToThread
3460 unixDie("read from thread pipe returned wrong size or error");
3465 resp
= tmsg
->func();
3467 catch(std::exception
& e
) {
3468 if(g_logCommonErrors
)
3469 g_log
<<Logger::Error
<<"PIPE function we executed created exception: "<<e
.what()<<endl
; // but what if they wanted an answer.. we send 0
3471 catch(PDNSException
& e
) {
3472 if(g_logCommonErrors
)
3473 g_log
<<Logger::Error
<<"PIPE function we executed created PDNS exception: "<<e
.reason
<<endl
; // but what if they wanted an answer.. we send 0
3475 if(tmsg
->wantAnswer
) {
3476 const auto& threadInfo
= s_threadInfos
.at(t_id
);
3477 if(write(threadInfo
.pipes
.writeFromThread
, &resp
, sizeof(resp
)) != sizeof(resp
)) {
3479 unixDie("write to thread pipe returned wrong size or error");
3486 template<class T
> void *voider(const boost::function
<T
*()>& func
)
3491 vector
<ComboAddress
>& operator+=(vector
<ComboAddress
>&a
, const vector
<ComboAddress
>& b
)
3493 a
.insert(a
.end(), b
.begin(), b
.end());
3497 vector
<pair
<string
, uint16_t> >& operator+=(vector
<pair
<string
, uint16_t> >&a
, const vector
<pair
<string
, uint16_t> >& b
)
3499 a
.insert(a
.end(), b
.begin(), b
.end());
3503 vector
<pair
<DNSName
, uint16_t> >& operator+=(vector
<pair
<DNSName
, uint16_t> >&a
, const vector
<pair
<DNSName
, uint16_t> >& b
)
3505 a
.insert(a
.end(), b
.begin(), b
.end());
3511 This function should only be called by the handler to gather metrics, wipe the cache,
3512 reload the Lua script (not the Lua config) or change the current trace regex,
3513 and by the SNMP thread to gather metrics. */
3514 template<class T
> T
broadcastAccFunction(const boost::function
<T
*()>& func
)
3516 if (!isHandlerThread()) {
3517 g_log
<<Logger::Error
<<"broadcastAccFunction has been called by a worker ("<<t_id
<<")"<<endl
;
3523 for (const auto& threadInfo
: s_threadInfos
) {
3528 const auto& tps
= threadInfo
.pipes
;
3529 ThreadMSG
* tmsg
= new ThreadMSG();
3530 tmsg
->func
= boost::bind(voider
<T
>, func
);
3531 tmsg
->wantAnswer
= true;
3533 if(write(tps
.writeToThread
, &tmsg
, sizeof(tmsg
)) != sizeof(tmsg
)) {
3535 unixDie("write to thread pipe returned wrong size or error");
3539 if(read(tps
.readFromThread
, &resp
, sizeof(resp
)) != sizeof(resp
))
3540 unixDie("read from thread pipe returned wrong size or error");
3551 template string
broadcastAccFunction(const boost::function
<string
*()>& fun
); // explicit instantiation
3552 template uint64_t broadcastAccFunction(const boost::function
<uint64_t*()>& fun
); // explicit instantiation
3553 template vector
<ComboAddress
> broadcastAccFunction(const boost::function
<vector
<ComboAddress
> *()>& fun
); // explicit instantiation
3554 template vector
<pair
<DNSName
,uint16_t> > broadcastAccFunction(const boost::function
<vector
<pair
<DNSName
, uint16_t> > *()>& fun
); // explicit instantiation
3555 template ThreadTimes
broadcastAccFunction(const boost::function
<ThreadTimes
*()>& fun
);
3557 static void handleRCC(int fd
, FDMultiplexer::funcparam_t
& var
)
3561 string msg
=s_rcc
.recv(&remote
);
3562 RecursorControlParser rcp
;
3563 RecursorControlParser::func_t
* command
;
3565 string answer
=rcp
.getAnswer(msg
, &command
);
3567 // If we are inside a chroot, we need to strip
3568 if (!arg()["chroot"].empty()) {
3569 size_t len
= arg()["chroot"].length();
3570 remote
= remote
.substr(len
);
3573 s_rcc
.send(answer
, &remote
);
3576 catch(const std::exception
& e
) {
3577 g_log
<<Logger::Error
<<"Error dealing with control socket request: "<<e
.what()<<endl
;
3579 catch(const PDNSException
& ae
) {
3580 g_log
<<Logger::Error
<<"Error dealing with control socket request: "<<ae
.reason
<<endl
;
3584 static void handleTCPClientReadable(int fd
, FDMultiplexer::funcparam_t
& var
)
3586 PacketID
* pident
=any_cast
<PacketID
>(&var
);
3587 // cerr<<"handleTCPClientReadable called for fd "<<fd<<", pident->inNeeded: "<<pident->inNeeded<<", "<<pident->sock->getHandle()<<endl;
3589 shared_array
<char> buffer(new char[pident
->inNeeded
]);
3591 ssize_t ret
=recv(fd
, buffer
.get(), pident
->inNeeded
,0);
3593 pident
->inMSG
.append(&buffer
[0], &buffer
[ret
]);
3594 pident
->inNeeded
-=(size_t)ret
;
3595 if(!pident
->inNeeded
|| pident
->inIncompleteOkay
) {
3596 // cerr<<"Got entire load of "<<pident->inMSG.size()<<" bytes"<<endl;
3597 PacketID pid
=*pident
;
3598 string msg
=pident
->inMSG
;
3600 t_fdm
->removeReadFD(fd
);
3601 MT
->sendEvent(pid
, &msg
);
3604 // cerr<<"Still have "<<pident->inNeeded<<" left to go"<<endl;
3608 PacketID tmp
=*pident
;
3609 t_fdm
->removeReadFD(fd
); // pident might now be invalid (it isn't, but still)
3611 MT
->sendEvent(tmp
, &empty
); // this conveys error status
3615 static void handleTCPClientWritable(int fd
, FDMultiplexer::funcparam_t
& var
)
3617 PacketID
* pid
=any_cast
<PacketID
>(&var
);
3618 ssize_t ret
=send(fd
, pid
->outMSG
.c_str() + pid
->outPos
, pid
->outMSG
.size() - pid
->outPos
,0);
3620 pid
->outPos
+=(ssize_t
)ret
;
3621 if(pid
->outPos
==pid
->outMSG
.size()) {
3623 t_fdm
->removeWriteFD(fd
);
3624 MT
->sendEvent(tmp
, &tmp
.outMSG
); // send back what we sent to convey everything is ok
3627 else { // error or EOF
3629 t_fdm
->removeWriteFD(fd
);
3631 MT
->sendEvent(tmp
, &sent
); // we convey error status by sending empty string
3635 // resend event to everybody chained onto it
3636 static void doResends(MT_t::waiters_t::iterator
& iter
, PacketID resend
, const string
& content
)
3638 if(iter
->key
.chain
.empty())
3640 // cerr<<"doResends called!\n";
3641 for(PacketID::chain_t::iterator i
=iter
->key
.chain
.begin(); i
!= iter
->key
.chain
.end() ; ++i
) {
3644 // cerr<<"\tResending "<<content.size()<<" bytes for fd="<<resend.fd<<" and id="<<resend.id<<endl;
3646 MT
->sendEvent(resend
, &content
);
3647 g_stats
.chainResends
++;
3651 static void handleUDPServerResponse(int fd
, FDMultiplexer::funcparam_t
& var
)
3653 PacketID pid
=any_cast
<PacketID
>(var
);
3656 packet
.resize(g_outgoingEDNSBufsize
);
3657 ComboAddress fromaddr
;
3658 socklen_t addrlen
=sizeof(fromaddr
);
3660 len
=recvfrom(fd
, &packet
.at(0), packet
.size(), 0, (sockaddr
*)&fromaddr
, &addrlen
);
3662 if(len
< (ssize_t
) sizeof(dnsheader
)) {
3664 ; // cerr<<"Error on fd "<<fd<<": "<<stringerror()<<"\n";
3666 g_stats
.serverParseError
++;
3667 if(g_logCommonErrors
)
3668 g_log
<<Logger::Error
<<"Unable to parse packet from remote UDP server "<< fromaddr
.toString() <<
3669 ": packet smaller than DNS header"<<endl
;
3672 t_udpclientsocks
->returnSocket(fd
);
3675 MT_t::waiters_t::iterator iter
=MT
->d_waiters
.find(pid
);
3676 if(iter
!= MT
->d_waiters
.end())
3677 doResends(iter
, pid
, empty
);
3679 MT
->sendEvent(pid
, &empty
); // this denotes error (does lookup again.. at least L1 will be hot)
3685 memcpy(&dh
, &packet
.at(0), sizeof(dh
));
3688 pident
.remote
=fromaddr
;
3692 if(!dh
.qr
&& g_logCommonErrors
) {
3693 g_log
<<Logger::Notice
<<"Not taking data from question on outgoing socket from "<< fromaddr
.toStringWithPort() <<endl
;
3696 if(!dh
.qdcount
|| // UPC, Nominum, very old BIND on FormErr, NSD
3697 !dh
.qr
) { // one weird server
3698 pident
.domain
.clear();
3704 pident
.domain
=DNSName(&packet
.at(0), len
, 12, false, &pident
.type
); // don't copy this from above - we need to do the actual read
3706 catch(std::exception
& e
) {
3707 g_stats
.serverParseError
++; // won't be fed to lwres.cc, so we have to increment
3708 g_log
<<Logger::Warning
<<"Error in packet from remote nameserver "<< fromaddr
.toStringWithPort() << ": "<<e
.what() << endl
;
3713 MT_t::waiters_t::iterator iter
=MT
->d_waiters
.find(pident
);
3714 if(iter
!= MT
->d_waiters
.end()) {
3715 doResends(iter
, pident
, packet
);
3720 if(!MT
->sendEvent(pident
, &packet
)) {
3721 /* we did not find a match for this response, something is wrong */
3723 // we do a full scan for outstanding queries on unexpected answers. not too bad since we only accept them on the right port number, which is hard enough to guess
3724 for(MT_t::waiters_t::iterator mthread
=MT
->d_waiters
.begin(); mthread
!=MT
->d_waiters
.end(); ++mthread
) {
3725 if(pident
.fd
==mthread
->key
.fd
&& mthread
->key
.remote
==pident
.remote
&& mthread
->key
.type
== pident
.type
&&
3726 pident
.domain
== mthread
->key
.domain
) {
3727 mthread
->key
.nearMisses
++;
3730 // be a bit paranoid here since we're weakening our matching
3731 if(pident
.domain
.empty() && !mthread
->key
.domain
.empty() && !pident
.type
&& mthread
->key
.type
&&
3732 pident
.id
== mthread
->key
.id
&& mthread
->key
.remote
== pident
.remote
) {
3733 // cerr<<"Empty response, rest matches though, sending to a waiter"<<endl;
3734 pident
.domain
= mthread
->key
.domain
;
3735 pident
.type
= mthread
->key
.type
;
3736 goto retryWithName
; // note that this only passes on an error, lwres will still reject the packet
3739 g_stats
.unexpectedCount
++; // if we made it here, it really is an unexpected answer
3740 if(g_logCommonErrors
) {
3741 g_log
<<Logger::Warning
<<"Discarding unexpected packet from "<<fromaddr
.toStringWithPort()<<": "<< (pident
.domain
.empty() ? "<empty>" : pident
.domain
.toString())<<", "<<pident
.type
<<", "<<MT
->d_waiters
.size()<<" waiters"<<endl
;
3745 /* we either found a waiter (1) or encountered an issue (-1), it's up to us to clean the socket anyway */
3746 t_udpclientsocks
->returnSocket(fd
);
3750 FDMultiplexer
* getMultiplexer()
3753 for(const auto& i
: FDMultiplexer::getMultiplexerMap()) {
3758 catch(FDMultiplexerException
&fe
) {
3759 g_log
<<Logger::Error
<<"Non-fatal error initializing possible multiplexer ("<<fe
.what()<<"), falling back"<<endl
;
3762 g_log
<<Logger::Error
<<"Non-fatal error initializing possible multiplexer"<<endl
;
3765 g_log
<<Logger::Error
<<"No working multiplexer found!"<<endl
;
3770 static string
* doReloadLuaScript()
3772 string fname
= ::arg()["lua-dns-script"];
3776 g_log
<<Logger::Info
<<t_id
<<" Unloaded current lua script"<<endl
;
3777 return new string("unloaded\n");
3780 t_pdl
= std::make_shared
<RecursorLua4
>();
3781 t_pdl
->loadFile(fname
);
3784 catch(std::exception
& e
) {
3785 g_log
<<Logger::Error
<<t_id
<<" Retaining current script, error from '"<<fname
<<"': "<< e
.what() <<endl
;
3786 return new string("retaining current script, error from '"+fname
+"': "+e
.what()+"\n");
3789 g_log
<<Logger::Warning
<<t_id
<<" (Re)loaded lua script from '"<<fname
<<"'"<<endl
;
3790 return new string("(re)loaded '"+fname
+"'\n");
3793 string
doQueueReloadLuaScript(vector
<string
>::const_iterator begin
, vector
<string
>::const_iterator end
)
3796 ::arg().set("lua-dns-script") = *begin
;
3798 return broadcastAccFunction
<string
>(doReloadLuaScript
);
3801 static string
* pleaseUseNewTraceRegex(const std::string
& newRegex
)
3804 if(newRegex
.empty()) {
3805 t_traceRegex
.reset();
3806 return new string("unset\n");
3809 t_traceRegex
= std::make_shared
<Regex
>(newRegex
);
3810 return new string("ok\n");
3813 catch(PDNSException
& ae
)
3815 return new string(ae
.reason
+"\n");
3818 string
doTraceRegex(vector
<string
>::const_iterator begin
, vector
<string
>::const_iterator end
)
3820 return broadcastAccFunction
<string
>(boost::bind(pleaseUseNewTraceRegex
, begin
!=end
? *begin
: ""));
3823 static void checkLinuxIPv6Limits()
3827 if(readFileIfThere("/proc/sys/net/ipv6/route/max_size", &line
)) {
3828 int lim
=std::stoi(line
);
3830 g_log
<<Logger::Error
<<"If using IPv6, please raise sysctl net.ipv6.route.max_size, currently set to "<<lim
<<" which is < 16384"<<endl
;
3835 static void checkOrFixFDS()
3837 unsigned int availFDs
=getFilenumLimit();
3838 unsigned int wantFDs
= g_maxMThreads
* g_numWorkerThreads
+25; // even healthier margin then before
3840 if(wantFDs
> availFDs
) {
3841 unsigned int hardlimit
= getFilenumLimit(true);
3842 if(hardlimit
>= wantFDs
) {
3843 setFilenumLimit(wantFDs
);
3844 g_log
<<Logger::Warning
<<"Raised soft limit on number of filedescriptors to "<<wantFDs
<<" to match max-mthreads and threads settings"<<endl
;
3847 int newval
= (hardlimit
- 25) / g_numWorkerThreads
;
3848 g_log
<<Logger::Warning
<<"Insufficient number of filedescriptors available for max-mthreads*threads setting! ("<<hardlimit
<<" < "<<wantFDs
<<"), reducing max-mthreads to "<<newval
<<endl
;
3849 g_maxMThreads
= newval
;
3850 setFilenumLimit(hardlimit
);
3855 static void* recursorThread(unsigned int tid
, const string
& threadName
);
3857 static void* pleaseSupplantACLs(std::shared_ptr
<NetmaskGroup
> ng
)
3868 static bool l_initialized
;
3870 if(l_initialized
) { // only reload configuration file on second call
3871 string configname
=::arg()["config-dir"]+"/recursor.conf";
3872 if(::arg()["config-name"]!="") {
3873 configname
=::arg()["config-dir"]+"/recursor-"+::arg()["config-name"]+".conf";
3875 cleanSlashes(configname
);
3877 if(!::arg().preParseFile(configname
.c_str(), "allow-from-file"))
3878 throw runtime_error("Unable to re-parse configuration file '"+configname
+"'");
3879 ::arg().preParseFile(configname
.c_str(), "allow-from", LOCAL_NETS
);
3880 ::arg().preParseFile(configname
.c_str(), "include-dir");
3881 ::arg().preParse(g_argc
, g_argv
, "include-dir");
3883 // then process includes
3884 std::vector
<std::string
> extraConfigs
;
3885 ::arg().gatherIncludes(extraConfigs
);
3887 for(const std::string
& fn
: extraConfigs
) {
3888 if(!::arg().preParseFile(fn
.c_str(), "allow-from-file", ::arg()["allow-from-file"]))
3889 throw runtime_error("Unable to re-parse configuration file include '"+fn
+"'");
3890 if(!::arg().preParseFile(fn
.c_str(), "allow-from", ::arg()["allow-from"]))
3891 throw runtime_error("Unable to re-parse configuration file include '"+fn
+"'");
3894 ::arg().preParse(g_argc
, g_argv
, "allow-from-file");
3895 ::arg().preParse(g_argc
, g_argv
, "allow-from");
3898 std::shared_ptr
<NetmaskGroup
> oldAllowFrom
= t_allowFrom
;
3899 std::shared_ptr
<NetmaskGroup
> allowFrom
= std::make_shared
<NetmaskGroup
>();
3901 if(!::arg()["allow-from-file"].empty()) {
3903 ifstream
ifs(::arg()["allow-from-file"].c_str());
3905 throw runtime_error("Could not open '"+::arg()["allow-from-file"]+"': "+stringerror());
3908 string::size_type pos
;
3909 while(getline(ifs
,line
)) {
3911 if(pos
!=string::npos
)
3917 allowFrom
->addMask(line
);
3919 g_log
<<Logger::Warning
<<"Done parsing " << allowFrom
->size() <<" allow-from ranges from file '"<<::arg()["allow-from-file"]<<"' - overriding 'allow-from' setting"<<endl
;
3921 else if(!::arg()["allow-from"].empty()) {
3923 stringtok(ips
, ::arg()["allow-from"], ", ");
3925 g_log
<<Logger::Warning
<<"Only allowing queries from: ";
3926 for(vector
<string
>::const_iterator i
= ips
.begin(); i
!= ips
.end(); ++i
) {
3927 allowFrom
->addMask(*i
);
3929 g_log
<<Logger::Warning
<<", ";
3930 g_log
<<Logger::Warning
<<*i
;
3932 g_log
<<Logger::Warning
<<endl
;
3935 if(::arg()["local-address"]!="127.0.0.1" && ::arg().asNum("local-port")==53)
3936 g_log
<<Logger::Warning
<<"WARNING: Allowing queries from all IP addresses - this can be a security risk!"<<endl
;
3937 allowFrom
= nullptr;
3940 g_initialAllowFrom
= allowFrom
;
3941 broadcastFunction(boost::bind(pleaseSupplantACLs
, allowFrom
));
3942 oldAllowFrom
= nullptr;
3944 l_initialized
= true;
3948 static void setupDelegationOnly()
3950 vector
<string
> parts
;
3951 stringtok(parts
, ::arg()["delegation-only"], ", \t");
3952 for(const auto& p
: parts
) {
3953 SyncRes::addDelegationOnly(DNSName(p
));
3957 static std::map
<unsigned int, std::set
<int> > parseCPUMap()
3959 std::map
<unsigned int, std::set
<int> > result
;
3961 const std::string value
= ::arg()["cpu-map"];
3963 if (!value
.empty() && !isSettingThreadCPUAffinitySupported()) {
3964 g_log
<<Logger::Warning
<<"CPU mapping requested but not supported, skipping"<<endl
;
3968 std::vector
<std::string
> parts
;
3970 stringtok(parts
, value
, " \t");
3972 for(const auto& part
: parts
) {
3973 if (part
.find('=') == string::npos
)
3977 auto headers
= splitField(part
, '=');
3978 trim(headers
.first
);
3979 trim(headers
.second
);
3981 unsigned int threadId
= pdns_stou(headers
.first
);
3982 std::vector
<std::string
> cpus
;
3984 stringtok(cpus
, headers
.second
, ",");
3986 for(const auto& cpu
: cpus
) {
3987 int cpuId
= std::stoi(cpu
);
3989 result
[threadId
].insert(cpuId
);
3992 catch(const std::exception
& e
) {
3993 g_log
<<Logger::Error
<<"Error parsing cpu-map entry '"<<part
<<"': "<<e
.what()<<endl
;
4000 static void setCPUMap(const std::map
<unsigned int, std::set
<int> >& cpusMap
, unsigned int n
, pthread_t tid
)
4002 const auto& cpuMapping
= cpusMap
.find(n
);
4003 if (cpuMapping
!= cpusMap
.cend()) {
4004 int rc
= mapThreadToCPUList(tid
, cpuMapping
->second
);
4006 g_log
<<Logger::Info
<<"CPU affinity for worker "<<n
<<" has been set to CPU map:";
4007 for (const auto cpu
: cpuMapping
->second
) {
4008 g_log
<<Logger::Info
<<" "<<cpu
;
4010 g_log
<<Logger::Info
<<endl
;
4013 g_log
<<Logger::Warning
<<"Error setting CPU affinity for worker "<<n
<<" to CPU map:";
4014 for (const auto cpu
: cpuMapping
->second
) {
4015 g_log
<<Logger::Info
<<" "<<cpu
;
4017 g_log
<<Logger::Info
<<strerror(rc
)<<endl
;
4023 static void setupNODThread()
4026 uint32_t num_cells
= ::arg().asNum("new-domain-db-size");
4027 t_nodDBp
= std::make_shared
<nod::NODDB
>(num_cells
);
4029 t_nodDBp
->setCacheDir(::arg()["new-domain-history-dir"]);
4031 catch (const PDNSException
& e
) {
4032 g_log
<<Logger::Error
<<"new-domain-history-dir (" << ::arg()["new-domain-history-dir"] << ") is not readable or does not exist"<<endl
;
4035 if (!t_nodDBp
->init()) {
4036 g_log
<<Logger::Error
<<"Could not initialize domain tracking"<<endl
;
4039 std::thread
t(nod::NODDB::startHousekeepingThread
, t_nodDBp
, std::this_thread::get_id());
4041 g_nod_pbtag
= ::arg()["new-domain-pb-tag"];
4044 uint32_t num_cells
= ::arg().asNum("unique-response-db-size");
4045 t_udrDBp
= std::make_shared
<nod::UniqueResponseDB
>(num_cells
);
4047 t_udrDBp
->setCacheDir(::arg()["unique-response-history-dir"]);
4049 catch (const PDNSException
& e
) {
4050 g_log
<<Logger::Error
<<"unique-response-history-dir (" << ::arg()["unique-response-history-dir"] << ") is not readable or does not exist"<<endl
;
4053 if (!t_udrDBp
->init()) {
4054 g_log
<<Logger::Error
<<"Could not initialize unique response tracking"<<endl
;
4057 std::thread
t(nod::UniqueResponseDB::startHousekeepingThread
, t_udrDBp
, std::this_thread::get_id());
4059 g_udr_pbtag
= ::arg()["unique-response-pb-tag"];
4063 void parseNODWhitelist(const std::string
& wlist
)
4065 vector
<string
> parts
;
4066 stringtok(parts
, wlist
, ",; ");
4067 for(const auto& a
: parts
) {
4068 g_nodDomainWL
.add(DNSName(a
));
4072 static void setupNODGlobal()
4074 // Setup NOD subsystem
4075 g_nodEnabled
= ::arg().mustDo("new-domain-tracking");
4076 g_nodLookupDomain
= DNSName(::arg()["new-domain-lookup"]);
4077 g_nodLog
= ::arg().mustDo("new-domain-log");
4078 parseNODWhitelist(::arg()["new-domain-whitelist"]);
4080 // Setup Unique DNS Response subsystem
4081 g_udrEnabled
= ::arg().mustDo("unique-response-tracking");
4082 g_udrLog
= ::arg().mustDo("unique-response-log");
4084 #endif /* NOD_ENABLED */
4086 static void checkSocketDir(void)
4089 string
dir(::arg()["socket-dir"]);
4092 if (stat(dir
.c_str(), &st
) == -1) {
4093 msg
= "it does not exist or cannot access";
4095 else if (!S_ISDIR(st
.st_mode
)) {
4096 msg
= "it is not a directory";
4098 else if (access(dir
.c_str(), R_OK
| W_OK
| X_OK
) != 0) {
4099 msg
= "cannot read, write or search";
4103 g_log
<< Logger::Error
<< "Problem with socket directory " << dir
<< ": " << msg
<< "; see https://docs.powerdns.com/recursor/upgrade.html#x-to-4-3-0-or-master" << endl
;
4107 static int serviceMain(int argc
, char*argv
[])
4109 g_log
.setName(s_programname
);
4110 g_log
.disableSyslog(::arg().mustDo("disable-syslog"));
4111 g_log
.setTimestamps(::arg().mustDo("log-timestamp"));
4113 if(!::arg()["logging-facility"].empty()) {
4114 int val
=logFacilityToLOG(::arg().asNum("logging-facility") );
4116 g_log
.setFacility(val
);
4118 g_log
<<Logger::Error
<<"Unknown logging facility "<<::arg().asNum("logging-facility") <<endl
;
4121 showProductVersion();
4123 g_disthashseed
=dns_random(0xffffffff);
4125 checkLinuxIPv6Limits();
4127 vector
<string
> addrs
;
4128 if(!::arg()["query-local-address6"].empty()) {
4129 SyncRes::s_doIPv6
=true;
4130 g_log
<<Logger::Warning
<<"Enabling IPv6 transport for outgoing queries"<<endl
;
4132 stringtok(addrs
, ::arg()["query-local-address6"], ", ;");
4133 for(const string
& addr
: addrs
) {
4134 g_localQueryAddresses6
.push_back(ComboAddress(addr
));
4138 g_log
<<Logger::Warning
<<"NOT using IPv6 for outgoing queries - set 'query-local-address6=::' to enable"<<endl
;
4141 stringtok(addrs
, ::arg()["query-local-address"], ", ;");
4142 for(const string
& addr
: addrs
) {
4143 g_localQueryAddresses4
.push_back(ComboAddress(addr
));
4146 catch(std::exception
& e
) {
4147 g_log
<<Logger::Error
<<"Assigning local query addresses: "<<e
.what();
4151 // keep this ABOVE loadRecursorLuaConfig!
4152 if(::arg()["dnssec"]=="off")
4153 g_dnssecmode
=DNSSECMode::Off
;
4154 else if(::arg()["dnssec"]=="process-no-validate")
4155 g_dnssecmode
=DNSSECMode::ProcessNoValidate
;
4156 else if(::arg()["dnssec"]=="process")
4157 g_dnssecmode
=DNSSECMode::Process
;
4158 else if(::arg()["dnssec"]=="validate")
4159 g_dnssecmode
=DNSSECMode::ValidateAll
;
4160 else if(::arg()["dnssec"]=="log-fail")
4161 g_dnssecmode
=DNSSECMode::ValidateForLog
;
4163 g_log
<<Logger::Error
<<"Unknown DNSSEC mode "<<::arg()["dnssec"]<<endl
;
4167 g_signatureInceptionSkew
= ::arg().asNum("signature-inception-skew");
4168 if (g_signatureInceptionSkew
< 0) {
4169 g_log
<<Logger::Error
<<"A negative value for 'signature-inception-skew' is not allowed"<<endl
;
4173 g_dnssecLogBogus
= ::arg().mustDo("dnssec-log-bogus");
4174 g_maxNSEC3Iterations
= ::arg().asNum("nsec3-max-iterations");
4176 g_maxCacheEntries
= ::arg().asNum("max-cache-entries");
4177 g_maxPacketCacheEntries
= ::arg().asNum("max-packetcache-entries");
4179 luaConfigDelayedThreads delayedLuaThreads
;
4181 loadRecursorLuaConfig(::arg()["lua-config-file"], delayedLuaThreads
);
4183 catch (PDNSException
&e
) {
4184 g_log
<<Logger::Error
<<"Cannot load Lua configuration: "<<e
.reason
<<endl
;
4189 initPublicSuffixList(::arg()["public-suffix-list-file"]);
4191 if(!::arg()["dont-query"].empty()) {
4193 stringtok(ips
, ::arg()["dont-query"], ", ");
4194 ips
.push_back("0.0.0.0");
4195 ips
.push_back("::");
4197 g_log
<<Logger::Warning
<<"Will not send queries to: ";
4198 for(vector
<string
>::const_iterator i
= ips
.begin(); i
!= ips
.end(); ++i
) {
4199 SyncRes::addDontQuery(*i
);
4201 g_log
<<Logger::Warning
<<", ";
4202 g_log
<<Logger::Warning
<<*i
;
4204 g_log
<<Logger::Warning
<<endl
;
4207 g_quiet
=::arg().mustDo("quiet");
4209 /* this needs to be done before parseACLs(), which call broadcastFunction() */
4210 g_weDistributeQueries
= ::arg().mustDo("pdns-distributes-queries");
4211 if(g_weDistributeQueries
) {
4212 g_log
<<Logger::Warning
<<"PowerDNS Recursor itself will distribute queries over threads"<<endl
;
4215 setupDelegationOnly();
4216 g_outgoingEDNSBufsize
=::arg().asNum("edns-outgoing-bufsize");
4218 if(::arg()["trace"]=="fail") {
4219 SyncRes::setDefaultLogMode(SyncRes::Store
);
4221 else if(::arg().mustDo("trace")) {
4222 SyncRes::setDefaultLogMode(SyncRes::Log
);
4223 ::arg().set("quiet")="no";
4227 string myHostname
= getHostname();
4228 if (myHostname
== "UNKNOWN"){
4229 g_log
<<Logger::Warning
<<"Unable to get the hostname, NSID and id.server values will be empty"<<endl
;
4233 SyncRes::s_minimumTTL
= ::arg().asNum("minimum-ttl-override");
4234 SyncRes::s_minimumECSTTL
= ::arg().asNum("ecs-minimum-ttl-override");
4236 SyncRes::s_nopacketcache
= ::arg().mustDo("disable-packetcache");
4238 SyncRes::s_maxnegttl
=::arg().asNum("max-negative-ttl");
4239 SyncRes::s_maxbogusttl
=::arg().asNum("max-cache-bogus-ttl");
4240 SyncRes::s_maxcachettl
=max(::arg().asNum("max-cache-ttl"), 15);
4241 SyncRes::s_packetcachettl
=::arg().asNum("packetcache-ttl");
4242 // Cap the packetcache-servfail-ttl to the packetcache-ttl
4243 uint32_t packetCacheServFailTTL
= ::arg().asNum("packetcache-servfail-ttl");
4244 SyncRes::s_packetcacheservfailttl
=(packetCacheServFailTTL
> SyncRes::s_packetcachettl
) ? SyncRes::s_packetcachettl
: packetCacheServFailTTL
;
4245 SyncRes::s_serverdownmaxfails
=::arg().asNum("server-down-max-fails");
4246 SyncRes::s_serverdownthrottletime
=::arg().asNum("server-down-throttle-time");
4247 SyncRes::s_serverID
=::arg()["server-id"];
4248 SyncRes::s_maxqperq
=::arg().asNum("max-qperq");
4249 SyncRes::s_maxtotusec
=1000*::arg().asNum("max-total-msec");
4250 SyncRes::s_maxdepth
=::arg().asNum("max-recursion-depth");
4251 SyncRes::s_rootNXTrust
= ::arg().mustDo( "root-nx-trust");
4252 if(SyncRes::s_serverID
.empty()) {
4253 SyncRes::s_serverID
= myHostname
;
4256 SyncRes::s_ecsipv4limit
= ::arg().asNum("ecs-ipv4-bits");
4257 SyncRes::s_ecsipv6limit
= ::arg().asNum("ecs-ipv6-bits");
4258 SyncRes::clearECSStats();
4259 SyncRes::s_ecsipv4cachelimit
= ::arg().asNum("ecs-ipv4-cache-bits");
4260 SyncRes::s_ecsipv6cachelimit
= ::arg().asNum("ecs-ipv6-cache-bits");
4261 SyncRes::s_ecscachelimitttl
= ::arg().asNum("ecs-cache-limit-ttl");
4263 SyncRes::s_qnameminimization
= ::arg().mustDo("qname-minimization");
4265 if (SyncRes::s_qnameminimization
) {
4266 // With an empty cache, a rev ipv6 query with dnssec enabled takes
4267 // almost 100 queries. Default maxqperq is 60.
4268 SyncRes::s_maxqperq
= std::max(SyncRes::s_maxqperq
, static_cast<unsigned int>(100));
4271 SyncRes::s_hardenNXD
= SyncRes::HardenNXD::DNSSEC
;
4272 string value
= ::arg()["nothing-below-nxdomain"];
4273 if (value
== "yes") {
4274 SyncRes::s_hardenNXD
= SyncRes::HardenNXD::Yes
;
4275 } else if (value
== "no") {
4276 SyncRes::s_hardenNXD
= SyncRes::HardenNXD::No
;
4277 } else if (value
!= "dnssec") {
4278 g_log
<< Logger::Error
<< "Unknown nothing-below-nxdomain mode: " << value
<< endl
;
4282 if (!::arg().isEmpty("ecs-scope-zero-address")) {
4283 ComboAddress
scopeZero(::arg()["ecs-scope-zero-address"]);
4284 SyncRes::setECSScopeZeroAddress(Netmask(scopeZero
, scopeZero
.isIPv4() ? 32 : 128));
4288 for (const auto& addr
: g_localQueryAddresses4
) {
4289 if (!IsAnyAddress(addr
)) {
4290 SyncRes::setECSScopeZeroAddress(Netmask(addr
, 32));
4296 for (const auto& addr
: g_localQueryAddresses6
) {
4297 if (!IsAnyAddress(addr
)) {
4298 SyncRes::setECSScopeZeroAddress(Netmask(addr
, 128));
4304 SyncRes::setECSScopeZeroAddress(Netmask("127.0.0.1/32"));
4309 SyncRes::parseEDNSSubnetWhitelist(::arg()["edns-subnet-whitelist"]);
4310 SyncRes::parseEDNSSubnetAddFor(::arg()["ecs-add-for"]);
4311 g_useIncomingECS
= ::arg().mustDo("use-incoming-edns-subnet");
4313 g_XPFAcl
.toMasks(::arg()["xpf-allow-from"]);
4314 g_xpfRRCode
= ::arg().asNum("xpf-rr-code");
4316 g_proxyProtocolACL
.toMasks(::arg()["proxy-protocol-from"]);
4317 g_proxyProtocolMaximumSize
= ::arg().asNum("proxy-protocol-maximum-size");
4319 if (!::arg()["dns64-prefix"].empty()) {
4321 auto dns64Prefix
= Netmask(::arg()["dns64-prefix"]);
4322 if (dns64Prefix
.getBits() != 96) {
4323 g_log
<< Logger::Error
<< "Invalid prefix for 'dns64-prefix', the current implementation only supports /96 prefixes: " << ::arg()["dns64-prefix"] << endl
;
4326 g_dns64Prefix
= dns64Prefix
.getNetwork();
4327 g_dns64PrefixReverse
= reverseNameFromIP(*g_dns64Prefix
);
4328 /* /96 is 24 nibbles + 2 for "ip6.arpa." */
4329 while (g_dns64PrefixReverse
.countLabels() > 26) {
4330 g_dns64PrefixReverse
.chopOff();
4333 catch (const NetmaskException
& ne
) {
4334 g_log
<< Logger::Error
<< "Invalid prefix '" << ::arg()["dns64-prefix"] << "' for 'dns64-prefix': " << ne
.reason
<< endl
;
4339 g_networkTimeoutMsec
= ::arg().asNum("network-timeout");
4341 g_initialDomainMap
= parseAuthAndForwards();
4343 g_latencyStatSize
=::arg().asNum("latency-statistic-size");
4345 g_logCommonErrors
=::arg().mustDo("log-common-errors");
4346 g_logRPZChanges
= ::arg().mustDo("log-rpz-changes");
4348 g_anyToTcp
= ::arg().mustDo("any-to-tcp");
4349 g_udpTruncationThreshold
= ::arg().asNum("udp-truncation-threshold");
4351 g_lowercaseOutgoing
= ::arg().mustDo("lowercase-outgoing");
4353 g_numDistributorThreads
= ::arg().asNum("distributor-threads");
4354 g_numWorkerThreads
= ::arg().asNum("threads");
4355 if (g_numWorkerThreads
< 1) {
4356 g_log
<<Logger::Warning
<<"Asked to run with 0 threads, raising to 1 instead"<<endl
;
4357 g_numWorkerThreads
= 1;
4360 g_numThreads
= g_numDistributorThreads
+ g_numWorkerThreads
;
4361 g_maxMThreads
= ::arg().asNum("max-mthreads");
4364 int64_t maxInFlight
= ::arg().asNum("max-concurrent-requests-per-tcp-connection");
4365 if (maxInFlight
< 1 || maxInFlight
> USHRT_MAX
|| maxInFlight
>= g_maxMThreads
) {
4366 g_log
<<Logger::Warning
<<"Asked to run with illegal max-concurrent-requests-per-tcp-connection, setting to default (10)"<<endl
;
4367 TCPConnection::s_maxInFlight
= 10;
4369 TCPConnection::s_maxInFlight
= maxInFlight
;
4373 g_gettagNeedsEDNSOptions
= ::arg().mustDo("gettag-needs-edns-options");
4375 g_statisticsInterval
= ::arg().asNum("statistics-interval");
4378 SuffixMatchNode dontThrottleNames
;
4379 vector
<string
> parts
;
4380 stringtok(parts
, ::arg()["dont-throttle-names"], " ,");
4381 for (const auto &p
: parts
) {
4382 dontThrottleNames
.add(DNSName(p
));
4384 g_dontThrottleNames
.setState(std::move(dontThrottleNames
));
4386 NetmaskGroup dontThrottleNetmasks
;
4387 stringtok(parts
, ::arg()["dont-throttle-netmasks"], " ,");
4388 for (const auto &p
: parts
) {
4389 dontThrottleNetmasks
.addMask(Netmask(p
));
4391 g_dontThrottleNetmasks
.setState(std::move(dontThrottleNetmasks
));
4394 s_balancingFactor
= ::arg().asDouble("distribution-load-factor");
4395 if (s_balancingFactor
!= 0.0 && s_balancingFactor
< 1.0) {
4396 s_balancingFactor
= 0.0;
4397 g_log
<<Logger::Warning
<<"Asked to run with a distribution-load-factor below 1.0, disabling it instead"<<endl
;
4401 g_reusePort
= ::arg().mustDo("reuseport");
4404 s_threadInfos
.resize(g_numDistributorThreads
+ g_numWorkerThreads
+ /* handler */ 1);
4407 if (g_weDistributeQueries
) {
4408 /* first thread is the handler, then distributors */
4409 for (unsigned int threadId
= 1; threadId
<= g_numDistributorThreads
; threadId
++) {
4410 auto& deferredAdds
= s_threadInfos
.at(threadId
).deferredAdds
;
4411 auto& tcpSockets
= s_threadInfos
.at(threadId
).tcpSockets
;
4412 makeUDPServerSockets(deferredAdds
);
4413 makeTCPServerSockets(deferredAdds
, tcpSockets
);
4417 /* first thread is the handler, there is no distributor here and workers are accepting queries */
4418 for (unsigned int threadId
= 1; threadId
<= g_numWorkerThreads
; threadId
++) {
4419 auto& deferredAdds
= s_threadInfos
.at(threadId
).deferredAdds
;
4420 auto& tcpSockets
= s_threadInfos
.at(threadId
).tcpSockets
;
4421 makeUDPServerSockets(deferredAdds
);
4422 makeTCPServerSockets(deferredAdds
, tcpSockets
);
4427 std::set
<int> tcpSockets
;
4428 /* we don't have reuseport so we can only open one socket per
4429 listening addr:port and everyone will listen on it */
4430 makeUDPServerSockets(g_deferredAdds
);
4431 makeTCPServerSockets(g_deferredAdds
, tcpSockets
);
4433 /* every listener (so distributor if g_weDistributeQueries, workers otherwise)
4434 needs to listen to the shared sockets */
4435 if (g_weDistributeQueries
) {
4436 /* first thread is the handler, then distributors */
4437 for (unsigned int threadId
= 1; threadId
<= g_numDistributorThreads
; threadId
++) {
4438 s_threadInfos
.at(threadId
).tcpSockets
= tcpSockets
;
4442 /* first thread is the handler, there is no distributor here and workers are accepting queries */
4443 for (unsigned int threadId
= 1; threadId
<= g_numWorkerThreads
; threadId
++) {
4444 s_threadInfos
.at(threadId
).tcpSockets
= tcpSockets
;
4450 // Setup newly observed domain globals
4452 #endif /* NOD_ENABLED */
4455 for(forks
= 0; forks
< ::arg().asNum("processes") - 1; ++forks
) {
4456 if(!fork()) // we are child
4460 if(::arg().mustDo("daemon")) {
4461 g_log
<<Logger::Warning
<<"Calling daemonize, going to background"<<endl
;
4462 g_log
.toConsole(Logger::Critical
);
4465 if(Utility::getpid() == 1) {
4466 /* We are running as pid 1, register sigterm and sigint handler
4468 The Linux kernel will handle SIGTERM and SIGINT for all processes, except PID 1.
4469 It assumes that the processes running as pid 1 is an "init" like system.
4470 For years, this was a safe assumption, but containers change that: in
4471 most (all?) container implementations, the application itself is running
4472 as pid 1. This means that sending signals to those applications, will not
4473 be handled by default. Results might be "your container not responding
4474 when asking it to stop", or "ctrl-c not working even when the app is
4475 running in the foreground inside a container".
4477 So TL;DR: If we're running pid 1 (container), we should handle SIGTERM and SIGINT ourselves */
4479 signal(SIGTERM
,termIntHandler
);
4480 signal(SIGINT
,termIntHandler
);
4483 signal(SIGUSR1
,usr1Handler
);
4484 signal(SIGUSR2
,usr2Handler
);
4485 signal(SIGPIPE
,SIG_IGN
);
4489 #ifdef HAVE_LIBSODIUM
4490 if (sodium_init() == -1) {
4491 g_log
<<Logger::Error
<<"Unable to initialize sodium crypto library"<<endl
;
4496 openssl_thread_setup();
4498 /* setup rng before chroot */
4501 if(::arg()["server-id"].empty()) {
4502 ::arg().set("server-id") = myHostname
;
4506 if(!::arg()["setgid"].empty())
4507 newgid
= strToGID(::arg()["setgid"]);
4509 if(!::arg()["setuid"].empty())
4510 newuid
= strToUID(::arg()["setuid"]);
4512 Utility::dropGroupPrivs(newuid
, newgid
);
4514 if (!::arg()["chroot"].empty()) {
4517 ns
= getenv("NOTIFY_SOCKET");
4518 if (ns
!= nullptr) {
4519 g_log
<<Logger::Error
<<"Unable to chroot when running from systemd. Please disable chroot= or set the 'Type' for this service to 'simple'"<<endl
;
4523 if (chroot(::arg()["chroot"].c_str())<0 || chdir("/") < 0) {
4525 g_log
<<Logger::Error
<<"Unable to chroot to '"+::arg()["chroot"]+"': "<<strerror (err
)<<", exiting"<<endl
;
4529 g_log
<<Logger::Info
<<"Chrooted to '"<<::arg()["chroot"]<<"'"<<endl
;
4534 s_pidfname
=::arg()["socket-dir"]+"/"+s_programname
+".pid";
4535 if(!s_pidfname
.empty())
4536 unlink(s_pidfname
.c_str()); // remove possible old pid file
4539 makeControlChannelSocket( ::arg().asNum("processes") > 1 ? forks
: -1);
4541 Utility::dropUserPrivs(newuid
);
4543 /* we might still have capabilities remaining, for example if we have been started as root
4544 without --setuid (please don't do that) or as an unprivileged user with ambient capabilities
4545 like CAP_NET_BIND_SERVICE.
4549 catch(const std::exception
& e
) {
4550 g_log
<<Logger::Warning
<<e
.what()<<endl
;
4553 startLuaConfigDelayedThreads(delayedLuaThreads
, g_luaconfs
.getCopy().generation
);
4557 g_tcpTimeout
=::arg().asNum("client-tcp-timeout");
4558 g_maxTCPPerClient
=::arg().asNum("max-tcp-per-client");
4559 g_tcpMaxQueriesPerConn
=::arg().asNum("max-tcp-queries-per-connection");
4560 s_maxUDPQueriesPerRound
=::arg().asNum("max-udp-queries-per-round");
4562 g_useKernelTimestamp
= ::arg().mustDo("protobuf-use-kernel-timestamp");
4564 blacklistStats(StatComponent::API
, ::arg()["stats-api-blacklist"]);
4565 blacklistStats(StatComponent::Carbon
, ::arg()["stats-carbon-blacklist"]);
4566 blacklistStats(StatComponent::RecControl
, ::arg()["stats-rec-control-blacklist"]);
4567 blacklistStats(StatComponent::SNMP
, ::arg()["stats-snmp-blacklist"]);
4569 if (::arg().mustDo("snmp-agent")) {
4570 g_snmpAgent
= std::make_shared
<RecursorSNMPAgent
>("recursor", ::arg()["snmp-master-socket"]);
4574 int port
= ::arg().asNum("udp-source-port-min");
4575 if(port
< 1024 || port
> 65535){
4576 g_log
<<Logger::Error
<<"Unable to launch, udp-source-port-min is not a valid port number"<<endl
;
4577 exit(99); // this isn't going to fix itself either
4579 s_minUdpSourcePort
= port
;
4580 port
= ::arg().asNum("udp-source-port-max");
4581 if(port
< 1024 || port
> 65535 || port
< s_minUdpSourcePort
){
4582 g_log
<<Logger::Error
<<"Unable to launch, udp-source-port-max is not a valid port number or is smaller than udp-source-port-min"<<endl
;
4583 exit(99); // this isn't going to fix itself either
4585 s_maxUdpSourcePort
= port
;
4586 std::vector
<string
> parts
{};
4587 stringtok(parts
, ::arg()["udp-source-port-avoid"], ", ");
4588 for (const auto &part
: parts
)
4590 port
= std::stoi(part
);
4591 if(port
< 1024 || port
> 65535){
4592 g_log
<<Logger::Error
<<"Unable to launch, udp-source-port-avoid contains an invalid port number: "<<part
<<endl
;
4593 exit(99); // this isn't going to fix itself either
4595 s_avoidUdpSourcePorts
.insert(port
);
4598 unsigned int currentThreadId
= 1;
4599 const auto cpusMap
= parseCPUMap();
4601 if(g_numThreads
== 1) {
4602 g_log
<<Logger::Warning
<<"Operating unthreaded"<<endl
;
4604 sd_notify(0, "READY=1");
4607 /* This thread handles the web server, carbon, statistics and the control channel */
4608 auto& handlerInfos
= s_threadInfos
.at(0);
4609 handlerInfos
.isHandler
= true;
4610 handlerInfos
.thread
= std::thread(recursorThread
, 0, "main");
4612 setCPUMap(cpusMap
, currentThreadId
, pthread_self());
4614 auto& infos
= s_threadInfos
.at(currentThreadId
);
4615 infos
.isListener
= true;
4616 infos
.isWorker
= true;
4617 recursorThread(currentThreadId
++, "worker");
4619 handlerInfos
.thread
.join();
4624 if (g_weDistributeQueries
) {
4625 for(unsigned int n
=0; n
< g_numDistributorThreads
; ++n
) {
4626 auto& infos
= s_threadInfos
.at(currentThreadId
+ n
);
4627 infos
.isListener
= true;
4630 for(unsigned int n
=0; n
< g_numWorkerThreads
; ++n
) {
4631 auto& infos
= s_threadInfos
.at(currentThreadId
+ (g_weDistributeQueries
? g_numDistributorThreads
: 0) + n
);
4632 infos
.isListener
= !g_weDistributeQueries
;
4633 infos
.isWorker
= true;
4636 if (g_weDistributeQueries
) {
4637 g_log
<<Logger::Warning
<<"Launching "<< g_numDistributorThreads
<<" distributor threads"<<endl
;
4638 for(unsigned int n
=0; n
< g_numDistributorThreads
; ++n
) {
4639 auto& infos
= s_threadInfos
.at(currentThreadId
);
4640 infos
.thread
= std::thread(recursorThread
, currentThreadId
++, "distr");
4641 setCPUMap(cpusMap
, currentThreadId
, infos
.thread
.native_handle());
4645 g_log
<<Logger::Warning
<<"Launching "<< g_numWorkerThreads
<<" worker threads"<<endl
;
4647 for(unsigned int n
=0; n
< g_numWorkerThreads
; ++n
) {
4648 auto& infos
= s_threadInfos
.at(currentThreadId
);
4649 infos
.thread
= std::thread(recursorThread
, currentThreadId
++, "worker");
4650 setCPUMap(cpusMap
, currentThreadId
, infos
.thread
.native_handle());
4654 sd_notify(0, "READY=1");
4657 /* This thread handles the web server, carbon, statistics and the control channel */
4658 auto& infos
= s_threadInfos
.at(0);
4659 infos
.isHandler
= true;
4660 infos
.thread
= std::thread(recursorThread
, 0, "web+stat");
4662 for (auto & ti
: s_threadInfos
) {
4667 #ifdef HAVE_PROTOBUF
4668 google::protobuf::ShutdownProtobufLibrary();
4669 #endif /* HAVE_PROTOBUF */
4673 static void* recursorThread(unsigned int n
, const string
& threadName
)
4677 auto& threadInfo
= s_threadInfos
.at(t_id
);
4679 static string threadPrefix
= "pdns-r/";
4680 setThreadName(threadPrefix
+ threadName
);
4682 SyncRes
tmp(g_now
); // make sure it allocates tsstorage before we do anything, like primeHints or so..
4683 SyncRes::setDomainMap(g_initialDomainMap
);
4684 t_allowFrom
= g_initialAllowFrom
;
4685 t_udpclientsocks
= std::unique_ptr
<UDPClientSocks
>(new UDPClientSocks());
4686 t_tcpClientCounts
= std::unique_ptr
<tcpClientCounts_t
>(new tcpClientCounts_t());
4689 t_packetCache
= std::unique_ptr
<RecursorPacketCache
>(new RecursorPacketCache());
4691 g_log
<<Logger::Warning
<<"Done priming cache with root hints"<<endl
;
4694 if (threadInfo
.isWorker
)
4696 #endif /* NOD_ENABLED */
4698 /* the listener threads handle TCP queries */
4699 if(threadInfo
.isWorker
|| threadInfo
.isListener
) {
4701 if(!::arg()["lua-dns-script"].empty()) {
4702 t_pdl
= std::make_shared
<RecursorLua4
>();
4703 t_pdl
->loadFile(::arg()["lua-dns-script"]);
4704 g_log
<<Logger::Warning
<<"Loaded 'lua' script from '"<<::arg()["lua-dns-script"]<<"'"<<endl
;
4707 catch(std::exception
&e
) {
4708 g_log
<<Logger::Error
<<"Failed to load 'lua' script from '"<<::arg()["lua-dns-script"]<<"': "<<e
.what()<<endl
;
4713 unsigned int ringsize
=::arg().asNum("stats-ringbuffer-entries") / g_numWorkerThreads
;
4715 t_remotes
= std::unique_ptr
<addrringbuf_t
>(new addrringbuf_t());
4716 if(g_weDistributeQueries
)
4717 t_remotes
->set_capacity(::arg().asNum("stats-ringbuffer-entries") / g_numDistributorThreads
);
4719 t_remotes
->set_capacity(ringsize
);
4720 t_servfailremotes
= std::unique_ptr
<addrringbuf_t
>(new addrringbuf_t());
4721 t_servfailremotes
->set_capacity(ringsize
);
4722 t_bogusremotes
= std::unique_ptr
<addrringbuf_t
>(new addrringbuf_t());
4723 t_bogusremotes
->set_capacity(ringsize
);
4724 t_largeanswerremotes
= std::unique_ptr
<addrringbuf_t
>(new addrringbuf_t());
4725 t_largeanswerremotes
->set_capacity(ringsize
);
4726 t_timeouts
= std::unique_ptr
<addrringbuf_t
>(new addrringbuf_t());
4727 t_timeouts
->set_capacity(ringsize
);
4729 t_queryring
= std::unique_ptr
<boost::circular_buffer
<pair
<DNSName
, uint16_t> > >(new boost::circular_buffer
<pair
<DNSName
, uint16_t> >());
4730 t_queryring
->set_capacity(ringsize
);
4731 t_servfailqueryring
= std::unique_ptr
<boost::circular_buffer
<pair
<DNSName
, uint16_t> > >(new boost::circular_buffer
<pair
<DNSName
, uint16_t> >());
4732 t_servfailqueryring
->set_capacity(ringsize
);
4733 t_bogusqueryring
= std::unique_ptr
<boost::circular_buffer
<pair
<DNSName
, uint16_t> > >(new boost::circular_buffer
<pair
<DNSName
, uint16_t> >());
4734 t_bogusqueryring
->set_capacity(ringsize
);
4737 MT
=std::unique_ptr
<MTasker
<PacketID
,string
> >(new MTasker
<PacketID
,string
>(::arg().asNum("stack-size")));
4738 threadInfo
.mt
= MT
.get();
4740 #ifdef HAVE_PROTOBUF
4741 /* start protobuf export threads if needed */
4742 auto luaconfsLocal
= g_luaconfs
.getLocal();
4743 checkProtobufExport(luaconfsLocal
);
4744 checkOutgoingProtobufExport(luaconfsLocal
);
4745 #endif /* HAVE_PROTOBUF */
4747 checkFrameStreamExport(luaconfsLocal
);
4752 t_fdm
=getMultiplexer();
4754 RecursorWebServer
*rws
= nullptr;
4756 if(threadInfo
.isHandler
) {
4757 if(::arg().mustDo("webserver")) {
4758 g_log
<<Logger::Warning
<< "Enabling web server" << endl
;
4760 rws
= new RecursorWebServer(t_fdm
);
4762 catch(PDNSException
&e
) {
4763 g_log
<<Logger::Error
<<"Exception: "<<e
.reason
<<endl
;
4767 g_log
<<Logger::Info
<<"Enabled '"<< t_fdm
->getName() << "' multiplexer"<<endl
;
4771 t_fdm
->addReadFD(threadInfo
.pipes
.readToThread
, handlePipeRequest
);
4772 t_fdm
->addReadFD(threadInfo
.pipes
.readQueriesToThread
, handlePipeRequest
);
4774 if (threadInfo
.isListener
) {
4776 /* then every listener has its own FDs */
4777 for(const auto& deferred
: threadInfo
.deferredAdds
) {
4778 t_fdm
->addReadFD(deferred
.first
, deferred
.second
);
4782 /* otherwise all listeners are listening on the same ones */
4783 for(const auto& deferred
: g_deferredAdds
) {
4784 t_fdm
->addReadFD(deferred
.first
, deferred
.second
);
4792 if(threadInfo
.isHandler
) {
4793 t_fdm
->addReadFD(s_rcc
.d_fd
, handleRCC
); // control channel
4796 unsigned int maxTcpClients
=::arg().asNum("max-tcp-clients");
4798 bool listenOnTCP(true);
4800 time_t last_stat
= 0;
4801 time_t last_carbon
=0, last_lua_maintenance
=0;
4802 time_t carbonInterval
=::arg().asNum("carbon-interval");
4803 time_t luaMaintenanceInterval
=::arg().asNum("lua-maintenance-interval");
4804 counter
.store(0); // used to periodically execute certain tasks
4806 while (!RecursorControlChannel::stop
) {
4807 while(MT
->schedule(&g_now
)); // MTasker letting the mthreads do their thing
4809 if(!(counter
%500)) {
4810 MT
->makeThread(houseKeeping
, 0);
4814 typedef vector
<pair
<int, FDMultiplexer::funcparam_t
> > expired_t
;
4815 expired_t expired
=t_fdm
->getTimeouts(g_now
);
4817 for(expired_t::iterator i
=expired
.begin() ; i
!= expired
.end(); ++i
) {
4818 shared_ptr
<TCPConnection
> conn
=any_cast
<shared_ptr
<TCPConnection
> >(i
->second
);
4819 if(g_logCommonErrors
)
4820 g_log
<<Logger::Warning
<<"Timeout from remote TCP client "<< conn
->d_remote
.toStringWithPort() <<endl
;
4821 t_fdm
->removeReadFD(i
->first
);
4827 if(threadInfo
.isHandler
) {
4828 if(statsWanted
|| (g_statisticsInterval
> 0 && (g_now
.tv_sec
- last_stat
) >= g_statisticsInterval
)) {
4830 last_stat
= g_now
.tv_sec
;
4833 Utility::gettimeofday(&g_now
, 0);
4835 if((g_now
.tv_sec
- last_carbon
) >= carbonInterval
) {
4836 MT
->makeThread(doCarbonDump
, 0);
4837 last_carbon
= g_now
.tv_sec
;
4840 if (t_pdl
!= nullptr) {
4841 // lua-dns-script directive is present, call the maintenance callback if needed
4842 /* remember that the listener threads handle TCP queries */
4843 if (threadInfo
.isWorker
|| threadInfo
.isListener
) {
4844 // Only on threads processing queries
4845 if(g_now
.tv_sec
- last_lua_maintenance
>= luaMaintenanceInterval
) {
4846 t_pdl
->maintenance();
4847 last_lua_maintenance
= g_now
.tv_sec
;
4853 // 'run' updates g_now for us
4855 if(threadInfo
.isListener
) {
4857 if(TCPConnection::getCurrentConnections() > maxTcpClients
) { // shutdown, too many connections
4858 for(const auto fd
: threadInfo
.tcpSockets
) {
4859 t_fdm
->removeReadFD(fd
);
4865 if(TCPConnection::getCurrentConnections() <= maxTcpClients
) { // reenable
4866 for(const auto fd
: threadInfo
.tcpSockets
) {
4867 t_fdm
->addReadFD(fd
, handleNewTCPQuestion
);
4878 catch(PDNSException
&ae
) {
4879 g_log
<<Logger::Error
<<"Exception: "<<ae
.reason
<<endl
;
4882 catch(std::exception
&e
) {
4883 g_log
<<Logger::Error
<<"STL Exception: "<<e
.what()<<endl
;
4887 g_log
<<Logger::Error
<<"any other exception in main: "<<endl
;
4892 int main(int argc
, char **argv
)
4896 g_stats
.startupTime
=time(0);
4898 versionSetProduct(ProductRecursor
);
4902 int ret
= EXIT_SUCCESS
;
4905 ::arg().set("stack-size","stack size per mthread")="200000";
4906 ::arg().set("soa-minimum-ttl","Don't change")="0";
4907 ::arg().set("no-shuffle","Don't change")="off";
4908 ::arg().set("local-port","port to listen on")="53";
4909 ::arg().set("local-address","IP addresses to listen on, separated by spaces or commas. Also accepts ports.")="127.0.0.1";
4910 ::arg().setSwitch("non-local-bind", "Enable binding to non-local addresses by using FREEBIND / BINDANY socket options")="no";
4911 ::arg().set("trace","if we should output heaps of logging. set to 'fail' to only log failing domains")="off";
4912 ::arg().set("dnssec", "DNSSEC mode: off/process-no-validate (default)/process/log-fail/validate")="process-no-validate";
4913 ::arg().set("dnssec-log-bogus", "Log DNSSEC bogus validations")="no";
4914 ::arg().set("signature-inception-skew", "Allow the signature inception to be off by this number of seconds")="60";
4915 ::arg().set("daemon","Operate as a daemon")="no";
4916 ::arg().setSwitch("write-pid","Write a PID file")="yes";
4917 ::arg().set("loglevel","Amount of logging. Higher is more. Do not set below 3")="6";
4918 ::arg().set("disable-syslog","Disable logging to syslog, useful when running inside a supervisor that logs stdout")="no";
4919 ::arg().set("log-timestamp","Print timestamps in log lines, useful to disable when running with a tool that timestamps stdout already")="yes";
4920 ::arg().set("log-common-errors","If we should log rather common errors")="no";
4921 ::arg().set("chroot","switch to chroot jail")="";
4922 ::arg().set("setgid","If set, change group id to this gid for more security"
4924 #define SYSTEMD_SETID_MSG ". When running inside systemd, use the User and Group settings in the unit-file!"
4928 ::arg().set("setuid","If set, change user id to this uid for more security"
4933 ::arg().set("network-timeout", "Wait this number of milliseconds for network i/o")="1500";
4934 ::arg().set("threads", "Launch this number of threads")="2";
4935 ::arg().set("distributor-threads", "Launch this number of distributor threads, distributing queries to other threads")="0";
4936 ::arg().set("processes", "Launch this number of processes (EXPERIMENTAL, DO NOT CHANGE)")="1"; // if we un-experimental this, need to fix openssl rand seeding for multiple PIDs!
4937 ::arg().set("config-name","Name of this virtual configuration - will rename the binary image")="";
4938 ::arg().set("api-config-dir", "Directory where REST API stores config and zones") = "";
4939 ::arg().set("api-key", "Static pre-shared authentication key for access to the REST API") = "";
4940 ::arg().setSwitch("webserver", "Start a webserver (for REST API)") = "no";
4941 ::arg().set("webserver-address", "IP Address of webserver to listen on") = "127.0.0.1";
4942 ::arg().set("webserver-port", "Port of webserver to listen on") = "8082";
4943 ::arg().set("webserver-password", "Password required for accessing the webserver") = "";
4944 ::arg().set("webserver-allow-from","Webserver access is only allowed from these subnets")="127.0.0.1,::1";
4945 ::arg().set("webserver-loglevel", "Amount of logging in the webserver (none, normal, detailed)") = "normal";
4946 ::arg().set("carbon-ourname", "If set, overrides our reported hostname for carbon stats")="";
4947 ::arg().set("carbon-server", "If set, send metrics in carbon (graphite) format to this server IP address")="";
4948 ::arg().set("carbon-interval", "Number of seconds between carbon (graphite) updates")="30";
4949 ::arg().set("carbon-namespace", "If set overwrites the first part of the carbon string")="pdns";
4950 ::arg().set("carbon-instance", "If set overwrites the the instance name default")="recursor";
4952 ::arg().set("statistics-interval", "Number of seconds between printing of recursor statistics, 0 to disable")="1800";
4953 ::arg().set("quiet","Suppress logging of questions and answers")="";
4954 ::arg().set("logging-facility","Facility to log messages as. 0 corresponds to local0")="";
4955 ::arg().set("config-dir","Location of configuration directory (recursor.conf)")=SYSCONFDIR
;
4956 ::arg().set("socket-owner","Owner of socket")="";
4957 ::arg().set("socket-group","Group of socket")="";
4958 ::arg().set("socket-mode", "Permissions for socket")="";
4960 ::arg().set("socket-dir",string("Where the controlsocket will live, ")+LOCALSTATEDIR
+"/pdns-recursor when unset and not chrooted" )="";
4961 ::arg().set("delegation-only","Which domains we only accept delegations from")="";
4962 ::arg().set("query-local-address","Source IP address for sending queries")="0.0.0.0";
4963 ::arg().set("query-local-address6","Source IPv6 address for sending queries. IF UNSET, IPv6 WILL NOT BE USED FOR OUTGOING QUERIES")="";
4964 ::arg().set("client-tcp-timeout","Timeout in seconds when talking to TCP clients")="2";
4965 ::arg().set("max-mthreads", "Maximum number of simultaneous Mtasker threads")="2048";
4966 ::arg().set("max-tcp-clients","Maximum number of simultaneous TCP clients")="128";
4967 ::arg().set("max-concurrent-requests-per-tcp-connection", "Maximum number of requests handled concurrently per TCP connection") = "10";
4968 ::arg().set("server-down-max-fails","Maximum number of consecutive timeouts (and unreachables) to mark a server as down ( 0 => disabled )")="64";
4969 ::arg().set("server-down-throttle-time","Number of seconds to throttle all queries to a server after being marked as down")="60";
4970 ::arg().set("dont-throttle-names", "Do not throttle nameservers with this name or suffix")="";
4971 ::arg().set("dont-throttle-netmasks", "Do not throttle nameservers with this IP netmask")="";
4972 ::arg().set("hint-file", "If set, load root hints from this file")="";
4973 ::arg().set("max-cache-entries", "If set, maximum number of entries in the main cache")="1000000";
4974 ::arg().set("max-negative-ttl", "maximum number of seconds to keep a negative cached entry in memory")="3600";
4975 ::arg().set("max-cache-bogus-ttl", "maximum number of seconds to keep a Bogus (positive or negative) cached entry in memory")="3600";
4976 ::arg().set("max-cache-ttl", "maximum number of seconds to keep a cached entry in memory")="86400";
4977 ::arg().set("packetcache-ttl", "maximum number of seconds to keep a cached entry in packetcache")="3600";
4978 ::arg().set("max-packetcache-entries", "maximum number of entries to keep in the packetcache")="500000";
4979 ::arg().set("packetcache-servfail-ttl", "maximum number of seconds to keep a cached servfail entry in packetcache")="60";
4980 ::arg().set("server-id", "Returned when queried for 'id.server' TXT or NSID, defaults to hostname, set custom or 'disabled'")="";
4981 ::arg().set("stats-ringbuffer-entries", "maximum number of packets to store statistics for")="10000";
4982 ::arg().set("version-string", "string reported on version.pdns or version.bind")=fullVersionString();
4983 ::arg().set("allow-from", "If set, only allow these comma separated netmasks to recurse")=LOCAL_NETS
;
4984 ::arg().set("allow-from-file", "If set, load allowed netmasks from this file")="";
4985 ::arg().set("entropy-source", "If set, read entropy from this file")="/dev/urandom";
4986 ::arg().set("dont-query", "If set, do not query these netmasks for DNS data")=DONT_QUERY
;
4987 ::arg().set("max-tcp-per-client", "If set, maximum number of TCP sessions per client (IP address)")="0";
4988 ::arg().set("max-tcp-queries-per-connection", "If set, maximum number of TCP queries in a TCP connection")="0";
4989 ::arg().set("spoof-nearmiss-max", "If non-zero, assume spoofing after this many near misses")="20";
4990 ::arg().set("single-socket", "If set, only use a single socket for outgoing queries")="off";
4991 ::arg().set("auth-zones", "Zones for which we have authoritative data, comma separated domain=file pairs ")="";
4992 ::arg().set("lua-config-file", "More powerful configuration options")="";
4993 ::arg().setSwitch("allow-trust-anchor-query", "Allow queries for trustanchor.server CH TXT and negativetrustanchor.server CH TXT")="no";
4995 ::arg().set("forward-zones", "Zones for which we forward queries, comma separated domain=ip pairs")="";
4996 ::arg().set("forward-zones-recurse", "Zones for which we forward queries with recursion bit, comma separated domain=ip pairs")="";
4997 ::arg().set("forward-zones-file", "File with (+)domain=ip pairs for forwarding")="";
4998 ::arg().set("export-etc-hosts", "If we should serve up contents from /etc/hosts")="off";
4999 ::arg().set("export-etc-hosts-search-suffix", "Also serve up the contents of /etc/hosts with this suffix")="";
5000 ::arg().set("etc-hosts-file", "Path to 'hosts' file")="/etc/hosts";
5001 ::arg().set("serve-rfc1918", "If we should be authoritative for RFC 1918 private IP space")="yes";
5002 ::arg().set("lua-dns-script", "Filename containing an optional 'lua' script that will be used to modify dns answers")="";
5003 ::arg().set("lua-maintenance-interval", "Number of seconds between calls to the lua user defined maintenance() function")="1";
5004 ::arg().set("latency-statistic-size","Number of latency values to calculate the qa-latency average")="10000";
5005 ::arg().setSwitch( "disable-packetcache", "Disable packetcache" )= "no";
5006 ::arg().set("ecs-ipv4-bits", "Number of bits of IPv4 address to pass for EDNS Client Subnet")="24";
5007 ::arg().set("ecs-ipv4-cache-bits", "Maximum number of bits of IPv4 mask to cache ECS response")="24";
5008 ::arg().set("ecs-ipv6-bits", "Number of bits of IPv6 address to pass for EDNS Client Subnet")="56";
5009 ::arg().set("ecs-ipv6-cache-bits", "Maximum number of bits of IPv6 mask to cache ECS response")="56";
5010 ::arg().set("ecs-minimum-ttl-override", "Set under adverse conditions, a minimum TTL for records in ECS-specific answers")="0";
5011 ::arg().set("ecs-cache-limit-ttl", "Minimum TTL to cache ECS response")="0";
5012 ::arg().set("edns-subnet-whitelist", "List of netmasks and domains that we should enable EDNS subnet for")="";
5013 ::arg().set("ecs-add-for", "List of client netmasks for which EDNS Client Subnet will be added")="0.0.0.0/0, ::/0, " LOCAL_NETS_INVERSE
;
5014 ::arg().set("ecs-scope-zero-address", "Address to send to whitelisted authoritative servers for incoming queries with ECS prefix-length source of 0")="";
5015 ::arg().setSwitch( "use-incoming-edns-subnet", "Pass along received EDNS Client Subnet information")="no";
5016 ::arg().setSwitch( "pdns-distributes-queries", "If PowerDNS itself should distribute queries over threads")="yes";
5017 ::arg().setSwitch( "root-nx-trust", "If set, believe that an NXDOMAIN from the root means the TLD does not exist")="yes";
5018 ::arg().setSwitch( "any-to-tcp","Answer ANY queries with tc=1, shunting to TCP" )="no";
5019 ::arg().setSwitch( "lowercase-outgoing","Force outgoing questions to lowercase")="no";
5020 ::arg().setSwitch("gettag-needs-edns-options", "If EDNS Options should be extracted before calling the gettag() hook")="no";
5021 ::arg().set("udp-truncation-threshold", "Maximum UDP response size before we truncate")="1232";
5022 ::arg().set("edns-outgoing-bufsize", "Outgoing EDNS buffer size")="1232";
5023 ::arg().set("minimum-ttl-override", "Set under adverse conditions, a minimum TTL")="0";
5024 ::arg().set("max-qperq", "Maximum outgoing queries per query")="60";
5025 ::arg().set("max-total-msec", "Maximum total wall-clock time per query in milliseconds, 0 for unlimited")="7000";
5026 ::arg().set("max-recursion-depth", "Maximum number of internal recursion calls per query, 0 for unlimited")="40";
5027 ::arg().set("max-udp-queries-per-round", "Maximum number of UDP queries processed per recvmsg() round, before returning back to normal processing")="10000";
5028 ::arg().set("protobuf-use-kernel-timestamp", "Compute the latency of queries in protobuf messages by using the timestamp set by the kernel when the query was received (when available)")="";
5029 ::arg().set("distribution-pipe-buffer-size", "Size in bytes of the internal buffer of the pipe used by the distributor to pass incoming queries to a worker thread")="0";
5031 ::arg().set("include-dir","Include *.conf files from this directory")="";
5032 ::arg().set("security-poll-suffix","Domain name from which to query security update notifications")="secpoll.powerdns.com.";
5034 ::arg().setSwitch("reuseport","Enable SO_REUSEPORT allowing multiple recursors processes to listen to 1 address")="no";
5036 ::arg().setSwitch("snmp-agent", "If set, register as an SNMP agent")="no";
5037 ::arg().set("snmp-master-socket", "If set and snmp-agent is set, the socket to use to register to the SNMP master")="";
5039 std::string defaultBlacklistedStats
= "cache-bytes, packetcache-bytes, special-memory-usage";
5040 for (size_t idx
= 0; idx
< 32; idx
++) {
5041 defaultBlacklistedStats
+= ", ecs-v4-response-bits-" + std::to_string(idx
+ 1);
5043 for (size_t idx
= 0; idx
< 128; idx
++) {
5044 defaultBlacklistedStats
+= ", ecs-v6-response-bits-" + std::to_string(idx
+ 1);
5046 ::arg().set("stats-api-blacklist", "List of statistics that are disabled when retrieving the complete list of statistics via the API")=defaultBlacklistedStats
;
5047 ::arg().set("stats-carbon-blacklist", "List of statistics that are prevented from being exported via Carbon")=defaultBlacklistedStats
;
5048 ::arg().set("stats-rec-control-blacklist", "List of statistics that are prevented from being exported via rec_control get-all")=defaultBlacklistedStats
;
5049 ::arg().set("stats-snmp-blacklist", "List of statistics that are prevented from being exported via SNMP")=defaultBlacklistedStats
;
5051 ::arg().set("tcp-fast-open", "Enable TCP Fast Open support on the listening sockets, using the supplied numerical value as the queue size")="0";
5052 ::arg().set("nsec3-max-iterations", "Maximum number of iterations allowed for an NSEC3 record")="2500";
5054 ::arg().set("cpu-map", "Thread to CPU mapping, space separated thread-id=cpu1,cpu2..cpuN pairs")="";
5056 ::arg().setSwitch("log-rpz-changes", "Log additions and removals to RPZ zones at Info level")="no";
5058 ::arg().set("xpf-allow-from","XPF information is only processed from these subnets")="";
5059 ::arg().set("xpf-rr-code","XPF option code to use")="0";
5061 ::arg().set("proxy-protocol-from", "A Proxy Protocol header is only allowed from these subnets")="";
5062 ::arg().set("proxy-protocol-maximum-size", "The maximum size of a proxy protocol payload, including the TLV values")="512";
5064 ::arg().set("dns64-prefix", "DNS64 prefix")="";
5066 ::arg().set("udp-source-port-min", "Minimum UDP port to bind on")="1024";
5067 ::arg().set("udp-source-port-max", "Maximum UDP port to bind on")="65535";
5068 ::arg().set("udp-source-port-avoid", "List of comma separated UDP port number to avoid")="11211";
5069 ::arg().set("rng", "Specify random number generator to use. Valid values are auto,sodium,openssl,getrandom,arc4random,urandom.")="auto";
5070 ::arg().set("public-suffix-list-file", "Path to the Public Suffix List file, if any")="";
5071 ::arg().set("distribution-load-factor", "The load factor used when PowerDNS is distributing queries to worker threads")="0.0";
5073 ::arg().setSwitch("qname-minimization", "Use Query Name Minimization")="yes";
5074 ::arg().setSwitch("nothing-below-nxdomain", "When an NXDOMAIN exists in cache for a name with fewer labels than the qname, send NXDOMAIN without doing a lookup (see RFC 8020)")="dnssec";
5075 ::arg().set("max-generate-steps", "Maximum number of $GENERATE steps when loading a zone from a file")="0";
5076 ::arg().set("cache-shards", "Number of shards in the record cache")="1024";
5079 ::arg().set("new-domain-tracking", "Track newly observed domains (i.e. never seen before).")="no";
5080 ::arg().set("new-domain-log", "Log newly observed domains.")="yes";
5081 ::arg().set("new-domain-lookup", "Perform a DNS lookup newly observed domains as a subdomain of the configured domain")="";
5082 ::arg().set("new-domain-history-dir", "Persist new domain tracking data here to persist between restarts")=string(NODCACHEDIR
)+"/nod";
5083 ::arg().set("new-domain-whitelist", "List of domains (and implicitly all subdomains) which will never be considered a new domain")="";
5084 ::arg().set("new-domain-db-size", "Size of the DB used to track new domains in terms of number of cells. Defaults to 67108864")="67108864";
5085 ::arg().set("new-domain-pb-tag", "If protobuf is configured, the tag to use for messages containing newly observed domains. Defaults to 'pdns-nod'")="pdns-nod";
5086 ::arg().set("unique-response-tracking", "Track unique responses (tuple of query name, type and RR).")="no";
5087 ::arg().set("unique-response-log", "Log unique responses")="yes";
5088 ::arg().set("unique-response-history-dir", "Persist unique response tracking data here to persist between restarts")=string(NODCACHEDIR
)+"/udr";
5089 ::arg().set("unique-response-db-size", "Size of the DB used to track unique responses in terms of number of cells. Defaults to 67108864")="67108864";
5090 ::arg().set("unique-response-pb-tag", "If protobuf is configured, the tag to use for messages containing unique DNS responses. Defaults to 'pdns-udr'")="pdns-udr";
5091 #endif /* NOD_ENABLED */
5092 ::arg().setCmd("help","Provide a helpful message");
5093 ::arg().setCmd("version","Print version string");
5094 ::arg().setCmd("config","Output blank configuration");
5095 ::arg().setDefaults();
5096 g_log
.toConsole(Logger::Info
);
5097 ::arg().laxParse(argc
,argv
); // do a lax parse
5099 string configname
=::arg()["config-dir"]+"/recursor.conf";
5100 if(::arg()["config-name"]!="") {
5101 configname
=::arg()["config-dir"]+"/recursor-"+::arg()["config-name"]+".conf";
5102 s_programname
+="-"+::arg()["config-name"];
5104 cleanSlashes(configname
);
5106 if(!::arg().getCommands().empty()) {
5107 cerr
<<"Fatal: non-option";
5108 if (::arg().getCommands().size() > 1) {
5113 for (const auto& c
: ::arg().getCommands()) {
5120 cerr
<<") on the command line, perhaps a '--setting=123' statement missed the '='?"<<endl
;
5124 if(::arg().mustDo("config")) {
5125 cout
<<::arg().configstring(false, true);
5129 if(!::arg().file(configname
.c_str()))
5130 g_log
<<Logger::Warning
<<"Unable to parse configuration file '"<<configname
<<"'"<<endl
;
5132 ::arg().parse(argc
,argv
);
5134 if( !::arg()["chroot"].empty() && !::arg()["api-config-dir"].empty() ) {
5135 g_log
<<Logger::Error
<<"Using chroot and enabling the API is not possible"<<endl
;
5139 if (::arg()["socket-dir"].empty()) {
5140 if (::arg()["chroot"].empty())
5141 ::arg().set("socket-dir") = std::string(LOCALSTATEDIR
) + "/pdns-recursor";
5143 ::arg().set("socket-dir") = "/";
5146 ::arg().set("delegation-only")=toLower(::arg()["delegation-only"]);
5148 if(::arg().asNum("threads")==1) {
5149 if (::arg().mustDo("pdns-distributes-queries")) {
5150 g_log
<<Logger::Warning
<<"Only one thread, no need to distribute queries ourselves"<<endl
;
5151 ::arg().set("pdns-distributes-queries")="no";
5155 if(::arg().mustDo("pdns-distributes-queries") && ::arg().asNum("distributor-threads") <= 0) {
5156 g_log
<<Logger::Warning
<<"Asked to run with pdns-distributes-queries set but no distributor threads, raising to 1"<<endl
;
5157 ::arg().set("distributor-threads")="1";
5160 if (!::arg().mustDo("pdns-distributes-queries")) {
5161 ::arg().set("distributor-threads")="0";
5164 if(::arg().mustDo("help")) {
5165 cout
<<"syntax:"<<endl
<<endl
;
5166 cout
<<::arg().helpstring(::arg()["help"])<<endl
;
5169 if(::arg().mustDo("version")) {
5170 showProductVersion();
5171 showBuildConfiguration();
5175 s_RC
= std::unique_ptr
<MemRecursorCache
>(new MemRecursorCache(::arg().asNum("cache-shards")));
5177 Logger::Urgency logUrgency
= (Logger::Urgency
)::arg().asNum("loglevel");
5179 if (logUrgency
< Logger::Error
)
5180 logUrgency
= Logger::Error
;
5181 if(!g_quiet
&& logUrgency
< Logger::Info
) { // Logger::Info=6, Logger::Debug=7
5182 logUrgency
= Logger::Info
; // if you do --quiet=no, you need Info to also see the query log
5184 g_log
.setLoglevel(logUrgency
);
5185 g_log
.toConsole(logUrgency
);
5187 serviceMain(argc
, argv
);
5189 catch(PDNSException
&ae
) {
5190 g_log
<<Logger::Error
<<"Exception: "<<ae
.reason
<<endl
;
5193 catch(std::exception
&e
) {
5194 g_log
<<Logger::Error
<<"STL Exception: "<<e
.what()<<endl
;
5198 g_log
<<Logger::Error
<<"any other exception in main: "<<endl
;