2 * This file is part of PowerDNS or dnsdist.
3 * Copyright -- PowerDNS.COM B.V. and its contributors
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of version 2 of the GNU General Public License as
7 * published by the Free Software Foundation.
9 * In addition, for the avoidance of any doubt, permission is granted to
10 * link this program with OpenSSL and to (re)distribute the binaries
11 * produced as the result of such linking.
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
30 #include "ws-recursor.hh"
32 #include "recpacketcache.hh"
34 #include "dns_random.hh"
38 #include "opensslsigners.hh"
41 #include <boost/static_assert.hpp>
44 #include "recursor_cache.hh"
45 #include "cachecleaner.hh"
52 #include "arguments.hh"
56 #include "sortlist.hh"
58 #include <boost/tuple/tuple.hpp>
59 #include <boost/tuple/tuple_comparison.hpp>
60 #include <boost/shared_array.hpp>
61 #include <boost/function.hpp>
62 #include <boost/algorithm/string.hpp>
64 #include "malloctrace.hh"
66 #include <netinet/tcp.h>
67 #include "dnsparser.hh"
68 #include "dnswriter.hh"
69 #include "dnsrecords.hh"
70 #include "zoneparser-tng.hh"
71 #include "rec_channel.hh"
76 #include "lua-recursor4.hh"
78 #include "responsestats.hh"
79 #include "secpoll-recursor.hh"
81 #include "filterpo.hh"
82 #include "rpzloader.hh"
83 #include "validate-recursor.hh"
84 #include "rec-lua-conf.hh"
85 #include "ednsoptions.hh"
88 #include "rec-protobuf.hh"
89 #include "rec-snmp.hh"
92 #include <systemd/sd-daemon.h>
95 #include "namespaces.hh"
97 typedef map
<ComboAddress
, uint32_t, ComboAddress::addressOnlyLessThan
> tcpClientCounts_t
;
99 static thread_local
std::shared_ptr
<RecursorLua4
> t_pdl
;
100 static thread_local
unsigned int t_id
;
101 static thread_local
std::shared_ptr
<Regex
> t_traceRegex
;
102 static thread_local
std::unique_ptr
<tcpClientCounts_t
> t_tcpClientCounts
;
104 thread_local
std::unique_ptr
<MT_t
> MT
; // the big MTasker
105 thread_local
std::unique_ptr
<MemRecursorCache
> t_RC
;
106 thread_local
std::unique_ptr
<RecursorPacketCache
> t_packetCache
;
107 thread_local FDMultiplexer
* t_fdm
{nullptr};
108 thread_local
std::unique_ptr
<addrringbuf_t
> t_remotes
, t_servfailremotes
, t_largeanswerremotes
;
109 thread_local
std::unique_ptr
<boost::circular_buffer
<pair
<DNSName
, uint16_t> > > t_queryring
, t_servfailqueryring
;
110 thread_local
std::shared_ptr
<NetmaskGroup
> t_allowFrom
;
112 thread_local
std::unique_ptr
<boost::uuids::random_generator
> t_uuidGenerator
;
114 __thread
struct timeval g_now
; // timestamp, updated (too) frequently
116 // for communicating with our threads
123 int writeQueriesToThread
; // this one is non-blocking
124 int readQueriesToThread
;
127 typedef vector
<int> tcpListenSockets_t
;
128 typedef map
<int, ComboAddress
> listenSocketsAddresses_t
; // is shared across all threads right now
129 typedef vector
<pair
<int, function
< void(int, any
&) > > > deferredAdd_t
;
131 static const ComboAddress
g_local4("0.0.0.0"), g_local6("::");
132 static vector
<ThreadPipeSet
> g_pipes
; // effectively readonly after startup
133 static tcpListenSockets_t g_tcpListenSockets
; // shared across threads, but this is fine, never written to from a thread. All threads listen on all sockets
134 static listenSocketsAddresses_t g_listenSocketsAddresses
; // is shared across all threads right now
135 static std::unordered_map
<unsigned int, deferredAdd_t
> deferredAdds
;
136 static set
<int> g_fromtosockets
; // listen sockets that use 'sendfromto()' mechanism
137 static vector
<ComboAddress
> g_localQueryAddresses4
, g_localQueryAddresses6
;
138 static AtomicCounter counter
;
139 static std::shared_ptr
<SyncRes::domainmap_t
> g_initialDomainMap
; // new threads needs this to be setup
140 static std::shared_ptr
<NetmaskGroup
> g_initialAllowFrom
; // new thread needs to be setup with this
141 static size_t g_tcpMaxQueriesPerConn
;
142 static uint64_t g_latencyStatSize
;
143 static uint32_t g_disthashseed
;
144 static unsigned int g_maxTCPPerClient
;
145 static unsigned int g_networkTimeoutMsec
;
146 static unsigned int g_maxMThreads
;
147 static unsigned int g_numWorkerThreads
;
148 static int g_tcpTimeout
;
149 static uint16_t g_udpTruncationThreshold
;
150 static std::atomic
<bool> statsWanted
;
151 static std::atomic
<bool> g_quiet
;
152 static bool g_logCommonErrors
;
153 static bool g_anyToTcp
;
154 static bool g_weDistributeQueries
; // if true, only 1 thread listens on the incoming query sockets
155 static bool g_reusePort
{false};
156 static bool g_useOneSocketPerThread
;
157 static bool g_gettagNeedsEDNSOptions
{false};
158 static time_t g_statisticsInterval
;
159 static bool g_useIncomingECS
;
160 std::atomic
<uint32_t> g_maxCacheEntries
, g_maxPacketCacheEntries
;
162 RecursorControlChannel s_rcc
; // only active in thread 0
163 RecursorStats g_stats
;
164 string s_programname
="pdns_recursor";
166 bool g_lowercaseOutgoing
;
167 unsigned int g_numThreads
;
168 uint16_t g_outgoingEDNSBufsize
;
169 bool g_logRPZChanges
{false};
171 #define LOCAL_NETS "127.0.0.0/8, 10.0.0.0/8, 100.64.0.0/10, 169.254.0.0/16, 192.168.0.0/16, 172.16.0.0/12, ::1/128, fc00::/7, fe80::/10"
172 // Bad Nets taken from both:
173 // http://www.iana.org/assignments/iana-ipv4-special-registry/iana-ipv4-special-registry.xhtml
175 // http://www.iana.org/assignments/iana-ipv6-special-registry/iana-ipv6-special-registry.xhtml
176 // where such a network may not be considered a valid destination
177 #define BAD_NETS "0.0.0.0/8, 192.0.0.0/24, 192.0.2.0/24, 198.51.100.0/24, 203.0.113.0/24, 240.0.0.0/4, ::/96, ::ffff:0:0/96, 100::/64, 2001:db8::/32"
178 #define DONT_QUERY LOCAL_NETS ", " BAD_NETS
180 //! used to send information to a newborn mthread
181 struct DNSComboWriter
{
182 DNSComboWriter(const char* data
, uint16_t len
, const struct timeval
& now
) : d_mdp(true, data
, len
), d_now(now
),
183 d_tcp(false), d_socket(-1)
186 void setRemote(const ComboAddress
* sa
)
191 void setLocal(const ComboAddress
& sa
)
197 void setSocket(int sock
)
202 string
getRemote() const
204 return d_remote
.toString();
207 struct timeval d_now
;
208 ComboAddress d_remote
, d_local
;
210 boost::uuids::uuid d_uuid
;
211 string d_requestorId
;
214 EDNSSubnetOpts d_ednssubnet
;
215 bool d_ecsFound
{false};
216 bool d_ecsParsed
{false};
219 unsigned int d_tag
{0};
222 shared_ptr
<TCPConnection
> d_tcpConnection
;
223 vector
<pair
<uint16_t, string
> > d_ednsOpts
;
224 std::vector
<std::string
> d_policyTags
;
225 LuaContext::LuaObject d_data
;
226 uint32_t d_ttlCap
{std::numeric_limits
<uint32_t>::max()};
227 bool d_variable
{false};
232 return MT
? MT
.get() : nullptr;
237 static ArgvMap theArg
;
241 unsigned int getRecursorThreadId()
251 static void handleTCPClientWritable(int fd
, FDMultiplexer::funcparam_t
& var
);
253 // -1 is error, 0 is timeout, 1 is success
254 int asendtcp(const string
& data
, Socket
* sock
)
260 t_fdm
->addWriteFD(sock
->getHandle(), handleTCPClientWritable
, pident
);
263 int ret
=MT
->waitEvent(pident
, &packet
, g_networkTimeoutMsec
);
265 if(!ret
|| ret
==-1) { // timeout
266 t_fdm
->removeWriteFD(sock
->getHandle());
268 else if(packet
.size() !=data
.size()) { // main loop tells us what it sent out, or empty in case of an error
274 static void handleTCPClientReadable(int fd
, FDMultiplexer::funcparam_t
& var
);
276 // -1 is error, 0 is timeout, 1 is success
277 int arecvtcp(string
& data
, size_t len
, Socket
* sock
, bool incompleteOkay
)
283 pident
.inIncompleteOkay
=incompleteOkay
;
284 t_fdm
->addReadFD(sock
->getHandle(), handleTCPClientReadable
, pident
);
286 int ret
=MT
->waitEvent(pident
,&data
, g_networkTimeoutMsec
);
287 if(!ret
|| ret
==-1) { // timeout
288 t_fdm
->removeReadFD(sock
->getHandle());
290 else if(data
.empty()) {// error, EOF or other
297 static void handleGenUDPQueryResponse(int fd
, FDMultiplexer::funcparam_t
& var
)
299 PacketID pident
=*any_cast
<PacketID
>(&var
);
301 ssize_t ret
=recv(fd
, resp
, sizeof(resp
), 0);
302 t_fdm
->removeReadFD(fd
);
304 string
data(resp
, (size_t) ret
);
305 MT
->sendEvent(pident
, &data
);
309 MT
->sendEvent(pident
, &empty
);
310 // cerr<<"Had some kind of error: "<<ret<<", "<<strerror(errno)<<endl;
313 string
GenUDPQueryResponse(const ComboAddress
& dest
, const string
& query
)
315 Socket
s(dest
.sin4
.sin_family
, SOCK_DGRAM
);
317 ComboAddress local
= getQueryLocalAddress(dest
.sin4
.sin_family
, 0);
326 t_fdm
->addReadFD(s
.getHandle(), handleGenUDPQueryResponse
, pident
);
330 int ret
=MT
->waitEvent(pident
,&data
, g_networkTimeoutMsec
);
332 if(!ret
|| ret
==-1) { // timeout
333 t_fdm
->removeReadFD(s
.getHandle());
335 else if(data
.empty()) {// error, EOF or other
336 // we could special case this
342 //! pick a random query local address
343 ComboAddress
getQueryLocalAddress(int family
, uint16_t port
)
346 if(family
==AF_INET
) {
347 if(g_localQueryAddresses4
.empty())
350 ret
= g_localQueryAddresses4
[dns_random(g_localQueryAddresses4
.size())];
351 ret
.sin4
.sin_port
= htons(port
);
354 if(g_localQueryAddresses6
.empty())
357 ret
= g_localQueryAddresses6
[dns_random(g_localQueryAddresses6
.size())];
359 ret
.sin6
.sin6_port
= htons(port
);
364 static void handleUDPServerResponse(int fd
, FDMultiplexer::funcparam_t
&);
366 static void setSocketBuffer(int fd
, int optname
, uint32_t size
)
369 socklen_t len
=sizeof(psize
);
371 if(!getsockopt(fd
, SOL_SOCKET
, optname
, (char*)&psize
, &len
) && psize
> size
) {
372 L
<<Logger::Error
<<"Not decreasing socket buffer size from "<<psize
<<" to "<<size
<<endl
;
376 if (setsockopt(fd
, SOL_SOCKET
, optname
, (char*)&size
, sizeof(size
)) < 0 )
377 L
<<Logger::Error
<<"Unable to raise socket buffer size to "<<size
<<": "<<strerror(errno
)<<endl
;
381 static void setSocketReceiveBuffer(int fd
, uint32_t size
)
383 setSocketBuffer(fd
, SO_RCVBUF
, size
);
386 static void setSocketSendBuffer(int fd
, uint32_t size
)
388 setSocketBuffer(fd
, SO_SNDBUF
, size
);
392 // you can ask this class for a UDP socket to send a query from
393 // this socket is not yours, don't even think about deleting it
394 // but after you call 'returnSocket' on it, don't assume anything anymore
397 unsigned int d_numsocks
;
399 UDPClientSocks() : d_numsocks(0)
403 typedef set
<int> socks_t
;
406 // returning -2 means: temporary OS error (ie, out of files), -1 means error related to remote
407 int getSocket(const ComboAddress
& toaddr
, int* fd
)
409 *fd
=makeClientSocket(toaddr
.sin4
.sin_family
);
410 if(*fd
< 0) // temporary error - receive exception otherwise
413 if(connect(*fd
, (struct sockaddr
*)(&toaddr
), toaddr
.getSocklen()) < 0) {
415 // returnSocket(*fd);
419 catch(const PDNSException
& e
) {
420 L
<<Logger::Error
<<"Error closing UDP socket after connect() failed: "<<e
.reason
<<endl
;
423 if(err
==ENETUNREACH
) // Seth "My Interfaces Are Like A Yo Yo" Arnold special
433 void returnSocket(int fd
)
435 socks_t::iterator i
=d_socks
.find(fd
);
436 if(i
==d_socks
.end()) {
437 throw PDNSException("Trying to return a socket (fd="+std::to_string(fd
)+") not in the pool");
439 returnSocketLocked(i
);
442 // return a socket to the pool, or simply erase it
443 void returnSocketLocked(socks_t::iterator
& i
)
445 if(i
==d_socks
.end()) {
446 throw PDNSException("Trying to return a socket not in the pool");
449 t_fdm
->removeReadFD(*i
);
451 catch(FDMultiplexerException
& e
) {
452 // we sometimes return a socket that has not yet been assigned to t_fdm
457 catch(const PDNSException
& e
) {
458 L
<<Logger::Error
<<"Error closing returned UDP socket: "<<e
.reason
<<endl
;
465 // returns -1 for errors which might go away, throws for ones that won't
466 static int makeClientSocket(int family
)
468 int ret
=socket(family
, SOCK_DGRAM
, 0 ); // turns out that setting CLO_EXEC and NONBLOCK from here is not a performance win on Linux (oddly enough)
470 if(ret
< 0 && errno
==EMFILE
) // this is not a catastrophic error
474 throw PDNSException("Making a socket for resolver (family = "+std::to_string(family
)+"): "+stringerror());
476 // setCloseOnExec(ret); // we're not going to exec
483 if(tries
==1) // fall back to kernel 'random'
486 port
= 1025 + dns_random(64510);
488 sin
=getQueryLocalAddress(family
, port
); // does htons for us
490 if (::bind(ret
, (struct sockaddr
*)&sin
, sin
.getSocklen()) >= 0)
494 throw PDNSException("Resolver binding to local query client socket on "+sin
.toString()+": "+stringerror());
501 static thread_local
std::unique_ptr
<UDPClientSocks
> t_udpclientsocks
;
503 /* these two functions are used by LWRes */
504 // -2 is OS error, -1 is error that depends on the remote, > 0 is success
505 int asendto(const char *data
, size_t len
, int flags
,
506 const ComboAddress
& toaddr
, uint16_t id
, const DNSName
& domain
, uint16_t qtype
, int* fd
)
510 pident
.domain
= domain
;
511 pident
.remote
= toaddr
;
514 // see if there is an existing outstanding request we can chain on to, using partial equivalence function
515 pair
<MT_t::waiters_t::iterator
, MT_t::waiters_t::iterator
> chain
=MT
->d_waiters
.equal_range(pident
, PacketIDBirthdayCompare());
517 for(; chain
.first
!= chain
.second
; chain
.first
++) {
518 if(chain
.first
->key
.fd
> -1) { // don't chain onto existing chained waiter!
520 cerr<<"Orig: "<<pident.domain<<", "<<pident.remote.toString()<<", id="<<id<<endl;
521 cerr<<"Had hit: "<< chain.first->key.domain<<", "<<chain.first->key.remote.toString()<<", id="<<chain.first->key.id
522 <<", count="<<chain.first->key.chain.size()<<", origfd: "<<chain.first->key.fd<<endl;
524 chain
.first
->key
.chain
.insert(id
); // we can chain
525 *fd
=-1; // gets used in waitEvent / sendEvent later on
530 int ret
=t_udpclientsocks
->getSocket(toaddr
, fd
);
537 t_fdm
->addReadFD(*fd
, handleUDPServerResponse
, pident
);
538 ret
= send(*fd
, data
, len
, 0);
543 t_udpclientsocks
->returnSocket(*fd
);
545 errno
= tmp
; // this is for logging purposes only
549 // -1 is error, 0 is timeout, 1 is success
550 int arecvfrom(char *data
, size_t len
, int flags
, const ComboAddress
& fromaddr
, size_t *d_len
,
551 uint16_t id
, const DNSName
& domain
, uint16_t qtype
, int fd
, struct timeval
* now
)
553 static optional
<unsigned int> nearMissLimit
;
555 nearMissLimit
=::arg().asNum("spoof-nearmiss-max");
560 pident
.domain
=domain
;
562 pident
.remote
=fromaddr
;
565 int ret
=MT
->waitEvent(pident
, &packet
, g_networkTimeoutMsec
, now
);
568 if(packet
.empty()) // means "error"
571 *d_len
=packet
.size();
572 memcpy(data
,packet
.c_str(),min(len
,*d_len
));
573 if(*nearMissLimit
&& pident
.nearMisses
> *nearMissLimit
) {
574 L
<<Logger::Error
<<"Too many ("<<pident
.nearMisses
<<" > "<<*nearMissLimit
<<") bogus answers for '"<<domain
<<"' from "<<fromaddr
.toString()<<", assuming spoof attempt."<<endl
;
575 g_stats
.spoofCount
++;
581 t_udpclientsocks
->returnSocket(fd
);
586 static void writePid(void)
588 if(!::arg().mustDo("write-pid"))
590 ofstream
of(s_pidfname
.c_str(), std::ios_base::app
);
592 of
<< Utility::getpid() <<endl
;
594 L
<<Logger::Error
<<"Writing pid for "<<Utility::getpid()<<" to "<<s_pidfname
<<" failed: "<<strerror(errno
)<<endl
;
597 TCPConnection::TCPConnection(int fd
, const ComboAddress
& addr
) : d_remote(addr
), d_fd(fd
)
599 ++s_currentConnections
;
600 (*t_tcpClientCounts
)[d_remote
]++;
603 TCPConnection::~TCPConnection()
606 if(closesocket(d_fd
) < 0)
607 L
<<Logger::Error
<<"Error closing socket for TCPConnection"<<endl
;
609 catch(const PDNSException
& e
) {
610 L
<<Logger::Error
<<"Error closing TCPConnection socket: "<<e
.reason
<<endl
;
613 if(t_tcpClientCounts
->count(d_remote
) && !(*t_tcpClientCounts
)[d_remote
]--)
614 t_tcpClientCounts
->erase(d_remote
);
615 --s_currentConnections
;
618 AtomicCounter
TCPConnection::s_currentConnections
;
620 static void handleRunningTCPQuestion(int fd
, FDMultiplexer::funcparam_t
& var
);
622 // the idea is, only do things that depend on the *response* here. Incoming accounting is on incoming.
623 static void updateResponseStats(int res
, const ComboAddress
& remote
, unsigned int packetsize
, const DNSName
* query
, uint16_t qtype
)
625 if(packetsize
> 1000 && t_largeanswerremotes
)
626 t_largeanswerremotes
->push_back(remote
);
628 case RCode::ServFail
:
629 if(t_servfailremotes
) {
630 t_servfailremotes
->push_back(remote
);
631 if(query
&& t_servfailqueryring
) // packet cache
632 t_servfailqueryring
->push_back(make_pair(*query
, qtype
));
636 case RCode::NXDomain
:
645 static string
makeLoginfo(DNSComboWriter
* dc
)
648 return "("+dc
->d_mdp
.d_qname
.toLogString()+"/"+DNSRecordContent::NumberToType(dc
->d_mdp
.d_qtype
)+" from "+(dc
->d_remote
.toString())+")";
652 return "Exception making error message for exception";
656 static void protobufLogQuery(const std::shared_ptr
<RemoteLogger
>& logger
, uint8_t maskV4
, uint8_t maskV6
, const boost::uuids::uuid
& uniqueId
, const ComboAddress
& remote
, const ComboAddress
& local
, const Netmask
& ednssubnet
, bool tcp
, uint16_t id
, size_t len
, const DNSName
& qname
, uint16_t qtype
, uint16_t qclass
, const std::vector
<std::string
>& policyTags
, const std::string
& requestorId
, const std::string
& deviceId
)
658 Netmask
requestorNM(remote
, remote
.sin4
.sin_family
== AF_INET
? maskV4
: maskV6
);
659 const ComboAddress
& requestor
= requestorNM
.getMaskedNetwork();
660 RecProtoBufMessage
message(DNSProtoBufMessage::Query
, uniqueId
, &requestor
, &local
, qname
, qtype
, qclass
, id
, tcp
, len
);
661 message
.setEDNSSubnet(ednssubnet
, ednssubnet
.isIpv4() ? maskV4
: maskV6
);
662 message
.setRequestorId(requestorId
);
663 message
.setDeviceId(deviceId
);
665 if (!policyTags
.empty()) {
666 message
.setPolicyTags(policyTags
);
669 // cerr <<message.toDebugString()<<endl;
671 message
.serialize(str
);
672 logger
->queueData(str
);
675 static void protobufLogResponse(const std::shared_ptr
<RemoteLogger
>& logger
, const RecProtoBufMessage
& message
)
677 // cerr <<message.toDebugString()<<endl;
679 message
.serialize(str
);
680 logger
->queueData(str
);
685 * Chases the CNAME provided by the PolicyCustom RPZ policy.
687 * @param spoofed: The DNSRecord that was created by the policy, should already be added to ret
688 * @param qtype: The QType of the original query
689 * @param sr: A SyncRes
690 * @param res: An integer that will contain the RCODE of the lookup we do
691 * @param ret: A vector of DNSRecords where the result of the CNAME chase should be appended to
693 static void handleRPZCustom(const DNSRecord
& spoofed
, const QType
& qtype
, SyncRes
& sr
, int& res
, vector
<DNSRecord
>& ret
)
695 if (spoofed
.d_type
== QType::CNAME
) {
696 bool oldWantsRPZ
= sr
.getWantsRPZ();
697 sr
.setWantsRPZ(false);
698 vector
<DNSRecord
> ans
;
699 res
= sr
.beginResolve(DNSName(spoofed
.d_content
->getZoneRepresentation()), qtype
, 1, ans
);
700 for (const auto& rec
: ans
) {
701 if(rec
.d_place
== DNSResourceRecord::ANSWER
) {
705 // Reset the RPZ state of the SyncRes
706 sr
.setWantsRPZ(oldWantsRPZ
);
710 static bool addRecordToPacket(DNSPacketWriter
& pw
, const DNSRecord
& rec
, uint32_t& minTTL
, uint32_t ttlCap
, const uint16_t maxAnswerSize
)
712 pw
.startRecord(rec
.d_name
, rec
.d_type
, (rec
.d_ttl
> ttlCap
? ttlCap
: rec
.d_ttl
), rec
.d_class
, rec
.d_place
);
714 if(rec
.d_type
!= QType::OPT
) // their TTL ain't real
715 minTTL
= min(minTTL
, rec
.d_ttl
);
717 rec
.d_content
->toPacket(pw
);
718 if(pw
.size() > static_cast<size_t>(maxAnswerSize
)) {
720 if(rec
.d_place
!= DNSResourceRecord::ADDITIONAL
) {
721 pw
.getHeader()->tc
=1;
730 static void startDoResolve(void *p
)
732 DNSComboWriter
* dc
=(DNSComboWriter
*)p
;
735 t_queryring
->push_back(make_pair(dc
->d_mdp
.d_qname
, dc
->d_mdp
.d_qtype
));
737 uint16_t maxanswersize
= dc
->d_tcp
? 65535 : min(static_cast<uint16_t>(512), g_udpTruncationThreshold
);
740 if(getEDNSOpts(dc
->d_mdp
, &edo
)) {
743 "Values lower than 512 MUST be treated as equal to 512."
745 maxanswersize
= min(static_cast<uint16_t>(edo
.d_packetsize
>= 512 ? edo
.d_packetsize
: 512), g_udpTruncationThreshold
);
747 dc
->d_ednsOpts
= edo
.d_options
;
750 if (g_useIncomingECS
&& !dc
->d_ecsParsed
) {
751 for (const auto& o
: edo
.d_options
) {
752 if (o
.first
== EDNSOptionCode::ECS
) {
753 dc
->d_ecsFound
= getEDNSSubnetOptsFromString(o
.second
, &dc
->d_ednssubnet
);
759 /* perhaps there was no EDNS or no ECS but by now we looked */
760 dc
->d_ecsParsed
= true;
761 vector
<DNSRecord
> ret
;
762 vector
<uint8_t> packet
;
764 auto luaconfsLocal
= g_luaconfs
.getLocal();
765 // Used to tell syncres later on if we should apply NSDNAME and NSIP RPZ triggers for this query
767 RecProtoBufMessage
pbMessage(RecProtoBufMessage::Response
);
769 if (luaconfsLocal
->protobufServer
) {
770 Netmask
requestorNM(dc
->d_remote
, dc
->d_remote
.sin4
.sin_family
== AF_INET
? luaconfsLocal
->protobufMaskV4
: luaconfsLocal
->protobufMaskV6
);
771 const ComboAddress
& requestor
= requestorNM
.getMaskedNetwork();
772 pbMessage
.update(dc
->d_uuid
, &requestor
, &dc
->d_local
, dc
->d_tcp
, dc
->d_mdp
.d_header
.id
);
773 pbMessage
.setEDNSSubnet(dc
->d_ednssubnet
.source
, dc
->d_ednssubnet
.source
.isIpv4() ? luaconfsLocal
->protobufMaskV4
: luaconfsLocal
->protobufMaskV6
);
774 pbMessage
.setQuestion(dc
->d_mdp
.d_qname
, dc
->d_mdp
.d_qtype
, dc
->d_mdp
.d_qclass
);
776 #endif /* HAVE_PROTOBUF */
778 DNSPacketWriter
pw(packet
, dc
->d_mdp
.d_qname
, dc
->d_mdp
.d_qtype
, dc
->d_mdp
.d_qclass
);
780 pw
.getHeader()->aa
=0;
781 pw
.getHeader()->ra
=1;
782 pw
.getHeader()->qr
=1;
783 pw
.getHeader()->tc
=0;
784 pw
.getHeader()->id
=dc
->d_mdp
.d_header
.id
;
785 pw
.getHeader()->rd
=dc
->d_mdp
.d_header
.rd
;
786 pw
.getHeader()->cd
=dc
->d_mdp
.d_header
.cd
;
788 /* This is the lowest TTL seen in the records of the response,
789 so we can't cache it for longer than this value.
790 If we have a TTL cap, this value can't be larger than the
791 cap no matter what. */
792 uint32_t minTTL
= dc
->d_ttlCap
;
794 SyncRes
sr(dc
->d_now
);
798 sr
.setLuaEngine(t_pdl
);
800 sr
.d_requestor
=dc
->d_remote
; // ECS needs this too
801 if(g_dnssecmode
!= DNSSECMode::Off
) {
802 sr
.setDoDNSSEC(true);
804 // Does the requestor want DNSSEC records?
805 if(edo
.d_Z
& EDNSOpts::DNSSECOK
) {
807 g_stats
.dnssecQueries
++;
810 // Ignore the client-set CD flag
811 pw
.getHeader()->cd
=0;
813 sr
.setDNSSECValidationRequested(g_dnssecmode
== DNSSECMode::ValidateAll
|| g_dnssecmode
==DNSSECMode::ValidateForLog
|| ((dc
->d_mdp
.d_header
.ad
|| DNSSECOK
) && g_dnssecmode
==DNSSECMode::Process
));
816 sr
.setInitialRequestId(dc
->d_uuid
);
819 if (g_useIncomingECS
) {
820 sr
.setIncomingECSFound(dc
->d_ecsFound
);
821 if (dc
->d_ecsFound
) {
822 sr
.setIncomingECS(dc
->d_ednssubnet
);
826 bool tracedQuery
=false; // we could consider letting Lua know about this too
827 bool variableAnswer
= dc
->d_variable
;
828 bool shouldNotValidate
= false;
830 /* preresolve expects res (dq.rcode) to be set to RCode::NoError by default */
831 int res
= RCode::NoError
;
832 DNSFilterEngine::Policy appliedPolicy
;
834 RecursorLua4::DNSQuestion
dq(dc
->d_remote
, dc
->d_local
, dc
->d_mdp
.d_qname
, dc
->d_mdp
.d_qtype
, dc
->d_tcp
, variableAnswer
, wantsRPZ
);
835 dq
.ednsFlags
= &edo
.d_Z
;
836 dq
.ednsOptions
= &dc
->d_ednsOpts
;
838 dq
.discardedPolicies
= &sr
.d_discardedPolicies
;
839 dq
.policyTags
= &dc
->d_policyTags
;
840 dq
.appliedPolicy
= &appliedPolicy
;
841 dq
.currentRecords
= &ret
;
842 dq
.dh
= &dc
->d_mdp
.d_header
;
843 dq
.data
= dc
->d_data
;
845 dq
.requestorId
= dc
->d_requestorId
;
846 dq
.deviceId
= dc
->d_deviceId
;
849 if(dc
->d_mdp
.d_qtype
==QType::ANY
&& !dc
->d_tcp
&& g_anyToTcp
) {
850 pw
.getHeader()->tc
= 1;
852 variableAnswer
= true;
856 if(t_traceRegex
&& t_traceRegex
->match(dc
->d_mdp
.d_qname
.toString())) {
857 sr
.setLogMode(SyncRes::Store
);
862 if(!g_quiet
|| tracedQuery
) {
863 L
<<Logger::Warning
<<t_id
<<" ["<<MT
->getTid()<<"/"<<MT
->numProcesses()<<"] " << (dc
->d_tcp
? "TCP " : "") << "question for '"<<dc
->d_mdp
.d_qname
<<"|"
864 <<DNSRecordContent::NumberToType(dc
->d_mdp
.d_qtype
)<<"' from "<<dc
->getRemote();
865 if(!dc
->d_ednssubnet
.source
.empty()) {
866 L
<<" (ecs "<<dc
->d_ednssubnet
.source
.toString()<<")";
871 sr
.setId(MT
->getTid());
872 if(!dc
->d_mdp
.d_header
.rd
)
876 t_pdl
->prerpz(dq
, res
);
879 // Check if the query has a policy attached to it
881 appliedPolicy
= luaconfsLocal
->dfe
.getQueryPolicy(dc
->d_mdp
.d_qname
, dc
->d_remote
, sr
.d_discardedPolicies
);
884 // if there is a RecursorLua active, and it 'took' the query in preResolve, we don't launch beginResolve
885 if(!t_pdl
|| !t_pdl
->preresolve(dq
, res
)) {
887 sr
.setWantsRPZ(wantsRPZ
);
889 switch(appliedPolicy
.d_kind
) {
890 case DNSFilterEngine::PolicyKind::NoAction
:
892 case DNSFilterEngine::PolicyKind::Drop
:
893 g_stats
.policyDrops
++;
894 g_stats
.policyResults
[appliedPolicy
.d_kind
]++;
898 case DNSFilterEngine::PolicyKind::NXDOMAIN
:
899 g_stats
.policyResults
[appliedPolicy
.d_kind
]++;
902 case DNSFilterEngine::PolicyKind::NODATA
:
903 g_stats
.policyResults
[appliedPolicy
.d_kind
]++;
906 case DNSFilterEngine::PolicyKind::Custom
:
907 g_stats
.policyResults
[appliedPolicy
.d_kind
]++;
909 spoofed
=appliedPolicy
.getCustomRecord(dc
->d_mdp
.d_qname
);
910 ret
.push_back(spoofed
);
911 handleRPZCustom(spoofed
, QType(dc
->d_mdp
.d_qtype
), sr
, res
, ret
);
913 case DNSFilterEngine::PolicyKind::Truncate
:
915 g_stats
.policyResults
[appliedPolicy
.d_kind
]++;
917 pw
.getHeader()->tc
=1;
924 // Query got not handled for QNAME Policy reasons, now actually go out to find an answer
926 res
= sr
.beginResolve(dc
->d_mdp
.d_qname
, QType(dc
->d_mdp
.d_qtype
), dc
->d_mdp
.d_qclass
, ret
);
927 shouldNotValidate
= sr
.wasOutOfBand();
929 catch(ImmediateServFailException
&e
) {
930 if(g_logCommonErrors
)
931 L
<<Logger::Notice
<<"Sending SERVFAIL to "<<dc
->getRemote()<<" during resolve of '"<<dc
->d_mdp
.d_qname
<<"' because: "<<e
.reason
<<endl
;
932 res
= RCode::ServFail
;
935 dq
.validationState
= sr
.getValidationState();
937 // During lookup, an NSDNAME or NSIP trigger was hit in RPZ
938 if (res
== -2) { // XXX This block should be macro'd, it is repeated post-resolve.
939 appliedPolicy
= sr
.d_appliedPolicy
;
940 g_stats
.policyResults
[appliedPolicy
.d_kind
]++;
941 switch(appliedPolicy
.d_kind
) {
942 case DNSFilterEngine::PolicyKind::NoAction
: // This can never happen
943 throw PDNSException("NoAction policy returned while a NSDNAME or NSIP trigger was hit");
944 case DNSFilterEngine::PolicyKind::Drop
:
945 g_stats
.policyDrops
++;
949 case DNSFilterEngine::PolicyKind::NXDOMAIN
:
954 case DNSFilterEngine::PolicyKind::NODATA
:
959 case DNSFilterEngine::PolicyKind::Truncate
:
963 pw
.getHeader()->tc
=1;
968 case DNSFilterEngine::PolicyKind::Custom
:
971 spoofed
=appliedPolicy
.getCustomRecord(dc
->d_mdp
.d_qname
);
972 ret
.push_back(spoofed
);
973 handleRPZCustom(spoofed
, QType(dc
->d_mdp
.d_qtype
), sr
, res
, ret
);
979 appliedPolicy
= luaconfsLocal
->dfe
.getPostPolicy(ret
, sr
.d_discardedPolicies
);
983 if(res
== RCode::NoError
) {
985 for(; i
!= ret
.cend(); ++i
)
986 if(i
->d_type
== dc
->d_mdp
.d_qtype
&& i
->d_place
== DNSResourceRecord::ANSWER
)
988 if(i
== ret
.cend() && t_pdl
->nodata(dq
, res
))
989 shouldNotValidate
= true;
992 else if(res
== RCode::NXDomain
&& t_pdl
->nxdomain(dq
, res
))
993 shouldNotValidate
= true;
995 if(t_pdl
->postresolve(dq
, res
))
996 shouldNotValidate
= true;
999 if (wantsRPZ
) { //XXX This block is repeated, see above
1000 g_stats
.policyResults
[appliedPolicy
.d_kind
]++;
1001 switch(appliedPolicy
.d_kind
) {
1002 case DNSFilterEngine::PolicyKind::NoAction
:
1004 case DNSFilterEngine::PolicyKind::Drop
:
1005 g_stats
.policyDrops
++;
1009 case DNSFilterEngine::PolicyKind::NXDOMAIN
:
1011 res
=RCode::NXDomain
;
1014 case DNSFilterEngine::PolicyKind::NODATA
:
1019 case DNSFilterEngine::PolicyKind::Truncate
:
1023 pw
.getHeader()->tc
=1;
1028 case DNSFilterEngine::PolicyKind::Custom
:
1031 spoofed
=appliedPolicy
.getCustomRecord(dc
->d_mdp
.d_qname
);
1032 ret
.push_back(spoofed
);
1033 handleRPZCustom(spoofed
, QType(dc
->d_mdp
.d_qtype
), sr
, res
, ret
);
1039 if(res
== PolicyDecision::DROP
) {
1040 g_stats
.policyDrops
++;
1045 if(tracedQuery
|| res
== -1 || res
== RCode::ServFail
|| pw
.getHeader()->rcode
== RCode::ServFail
)
1047 string
trace(sr
.getTrace());
1048 if(!trace
.empty()) {
1049 vector
<string
> lines
;
1050 boost::split(lines
, trace
, boost::is_any_of("\n"));
1051 for(const string
& line
: lines
) {
1053 L
<<Logger::Warning
<< line
<< endl
;
1059 pw
.getHeader()->rcode
=RCode::ServFail
;
1060 // no commit here, because no record
1061 g_stats
.servFails
++;
1064 pw
.getHeader()->rcode
=res
;
1066 // Does the validation mode or query demand validation?
1067 if(!shouldNotValidate
&& sr
.isDNSSECValidationRequested()) {
1070 L
<<Logger::Warning
<<"Starting validation of answer to "<<dc
->d_mdp
.d_qname
<<"|"<<QType(dc
->d_mdp
.d_qtype
).getName()<<" for "<<dc
->d_remote
.toStringWithPort()<<endl
;
1073 auto state
= sr
.getValidationState();
1075 if(state
== Secure
) {
1077 L
<<Logger::Warning
<<"Answer to "<<dc
->d_mdp
.d_qname
<<"|"<<QType(dc
->d_mdp
.d_qtype
).getName()<<" for "<<dc
->d_remote
.toStringWithPort()<<" validates correctly"<<endl
;
1080 // Is the query source interested in the value of the ad-bit?
1081 if (dc
->d_mdp
.d_header
.ad
|| DNSSECOK
)
1082 pw
.getHeader()->ad
=1;
1084 else if(state
== Insecure
) {
1086 L
<<Logger::Warning
<<"Answer to "<<dc
->d_mdp
.d_qname
<<"|"<<QType(dc
->d_mdp
.d_qtype
).getName()<<" for "<<dc
->d_remote
.toStringWithPort()<<" validates as Insecure"<<endl
;
1089 pw
.getHeader()->ad
=0;
1091 else if(state
== Bogus
) {
1092 if(g_dnssecLogBogus
|| sr
.doLog() || g_dnssecmode
== DNSSECMode::ValidateForLog
) {
1093 L
<<Logger::Warning
<<"Answer to "<<dc
->d_mdp
.d_qname
<<"|"<<QType(dc
->d_mdp
.d_qtype
).getName()<<" for "<<dc
->d_remote
.toStringWithPort()<<" validates as Bogus"<<endl
;
1096 // Does the query or validation mode sending out a SERVFAIL on validation errors?
1097 if(!pw
.getHeader()->cd
&& (g_dnssecmode
== DNSSECMode::ValidateAll
|| dc
->d_mdp
.d_header
.ad
|| DNSSECOK
)) {
1099 L
<<Logger::Warning
<<"Sending out SERVFAIL for "<<dc
->d_mdp
.d_qname
<<"|"<<QType(dc
->d_mdp
.d_qtype
).getName()<<" because recursor or query demands it for Bogus results"<<endl
;
1102 pw
.getHeader()->rcode
=RCode::ServFail
;
1106 L
<<Logger::Warning
<<"Not sending out SERVFAIL for "<<dc
->d_mdp
.d_qname
<<"|"<<QType(dc
->d_mdp
.d_qtype
).getName()<<" Bogus validation since neither config nor query demands this"<<endl
;
1111 catch(ImmediateServFailException
&e
) {
1112 if(g_logCommonErrors
)
1113 L
<<Logger::Notice
<<"Sending SERVFAIL to "<<dc
->getRemote()<<" during validation of '"<<dc
->d_mdp
.d_qname
<<"|"<<QType(dc
->d_mdp
.d_qtype
).getName()<<"' because: "<<e
.reason
<<endl
;
1114 pw
.getHeader()->rcode
=RCode::ServFail
;
1120 orderAndShuffle(ret
);
1121 if(auto sl
= luaconfsLocal
->sortlist
.getOrderCmp(dc
->d_remote
)) {
1122 stable_sort(ret
.begin(), ret
.end(), *sl
);
1123 variableAnswer
=true;
1127 bool needCommit
= false;
1128 for(auto i
=ret
.cbegin(); i
!=ret
.cend(); ++i
) {
1130 ( i
->d_type
== QType::NSEC3
||
1132 ( i
->d_type
== QType::RRSIG
|| i
->d_type
==QType::NSEC
) &&
1134 ( dc
->d_mdp
.d_qtype
!= i
->d_type
&& dc
->d_mdp
.d_qtype
!= QType::ANY
) ||
1135 i
->d_place
!= DNSResourceRecord::ANSWER
1143 if (!addRecordToPacket(pw
, *i
, minTTL
, dc
->d_ttlCap
, maxanswersize
)) {
1149 #ifdef HAVE_PROTOBUF
1150 if(luaconfsLocal
->protobufServer
&& (i
->d_type
== QType::A
|| i
->d_type
== QType::AAAA
|| i
->d_type
== QType::CNAME
)) {
1151 pbMessage
.addRR(*i
);
1161 /* we try to add the EDNS OPT RR even for truncated answers,
1163 "The minimal response MUST be the DNS header, question section, and an
1164 OPT record. This MUST also occur when a truncated response (using
1165 the DNS header's TC bit) is returned."
1167 if (addRecordToPacket(pw
, makeOpt(edo
.d_packetsize
, 0, edo
.d_Z
), minTTL
, dc
->d_ttlCap
, maxanswersize
)) {
1172 g_rs
.submitResponse(dc
->d_mdp
.d_qtype
, packet
.size(), !dc
->d_tcp
);
1173 updateResponseStats(res
, dc
->d_remote
, packet
.size(), &dc
->d_mdp
.d_qname
, dc
->d_mdp
.d_qtype
);
1174 #ifdef HAVE_PROTOBUF
1175 if (luaconfsLocal
->protobufServer
&& (!luaconfsLocal
->protobufTaggedOnly
|| (appliedPolicy
.d_name
&& !appliedPolicy
.d_name
->empty()) || !dc
->d_policyTags
.empty())) {
1176 pbMessage
.setBytes(packet
.size());
1177 pbMessage
.setResponseCode(pw
.getHeader()->rcode
);
1178 if (appliedPolicy
.d_name
) {
1179 pbMessage
.setAppliedPolicy(*appliedPolicy
.d_name
);
1180 pbMessage
.setAppliedPolicyType(appliedPolicy
.d_type
);
1182 pbMessage
.setPolicyTags(dc
->d_policyTags
);
1183 pbMessage
.setQueryTime(dc
->d_now
.tv_sec
, dc
->d_now
.tv_usec
);
1184 pbMessage
.setRequestorId(dq
.requestorId
);
1185 pbMessage
.setDeviceId(dq
.deviceId
);
1186 protobufLogResponse(luaconfsLocal
->protobufServer
, pbMessage
);
1193 fillMSGHdr(&msgh
, &iov
, cbuf
, 0, (char*)&*packet
.begin(), packet
.size(), &dc
->d_remote
);
1194 msgh
.msg_control
=NULL
;
1196 if(g_fromtosockets
.count(dc
->d_socket
)) {
1197 addCMsgSrcAddr(&msgh
, cbuf
, &dc
->d_local
, 0);
1199 if(sendmsg(dc
->d_socket
, &msgh
, 0) < 0 && g_logCommonErrors
)
1200 L
<<Logger::Warning
<<"Sending UDP reply to client "<<dc
->d_remote
.toStringWithPort()<<" failed with: "<<strerror(errno
)<<endl
;
1202 if(!SyncRes::s_nopacketcache
&& !variableAnswer
&& !sr
.wasVariable() ) {
1203 t_packetCache
->insertResponsePacket(dc
->d_tag
, dc
->d_qhash
, dc
->d_mdp
.d_qname
, dc
->d_mdp
.d_qtype
, dc
->d_mdp
.d_qclass
,
1204 string((const char*)&*packet
.begin(), packet
.size()),
1206 pw
.getHeader()->rcode
== RCode::ServFail
? SyncRes::s_packetcacheservfailttl
:
1207 min(minTTL
,SyncRes::s_packetcachettl
),
1210 // else cerr<<"Not putting in packet cache: "<<sr.wasVariable()<<endl;
1214 buf
[0]=packet
.size()/256;
1215 buf
[1]=packet
.size()%256;
1217 Utility::iovec iov
[2];
1219 iov
[0].iov_base
=(void*)buf
; iov
[0].iov_len
=2;
1220 iov
[1].iov_base
=(void*)&*packet
.begin(); iov
[1].iov_len
= packet
.size();
1222 int wret
=Utility::writev(dc
->d_socket
, iov
, 2);
1226 L
<<Logger::Error
<<"EOF writing TCP answer to "<<dc
->getRemote()<<endl
;
1228 L
<<Logger::Error
<<"Error writing TCP answer to "<<dc
->getRemote()<<": "<< strerror(errno
) <<endl
;
1229 else if((unsigned int)wret
!= 2 + packet
.size())
1230 L
<<Logger::Error
<<"Oops, partial answer sent to "<<dc
->getRemote()<<" for "<<dc
->d_mdp
.d_qname
<<" (size="<< (2 + packet
.size()) <<", sent "<<wret
<<")"<<endl
;
1234 // update tcp connection status, either by closing or moving to 'BYTE0'
1237 // no need to remove us from FDM, we weren't there
1241 dc
->d_tcpConnection
->queriesCount
++;
1242 if (g_tcpMaxQueriesPerConn
&& dc
->d_tcpConnection
->queriesCount
>= g_tcpMaxQueriesPerConn
) {
1246 dc
->d_tcpConnection
->state
=TCPConnection::BYTE0
;
1247 Utility::gettimeofday(&g_now
, 0); // needs to be updated
1248 t_fdm
->addReadFD(dc
->d_socket
, handleRunningTCPQuestion
, dc
->d_tcpConnection
);
1249 t_fdm
->setReadTTD(dc
->d_socket
, g_now
, g_tcpTimeout
);
1253 float spent
=makeFloat(sr
.getNow()-dc
->d_now
);
1255 L
<<Logger::Error
<<t_id
<<" ["<<MT
->getTid()<<"/"<<MT
->numProcesses()<<"] answer to "<<(dc
->d_mdp
.d_header
.rd
?"":"non-rd ")<<"question '"<<dc
->d_mdp
.d_qname
<<"|"<<DNSRecordContent::NumberToType(dc
->d_mdp
.d_qtype
);
1256 L
<<"': "<<ntohs(pw
.getHeader()->ancount
)<<" answers, "<<ntohs(pw
.getHeader()->arcount
)<<" additional, took "<<sr
.d_outqueries
<<" packets, "<<
1257 sr
.d_totUsec
/1000.0<<" netw ms, "<< spent
*1000.0<<" tot ms, "<<
1258 sr
.d_throttledqueries
<<" throttled, "<<sr
.d_timeouts
<<" timeouts, "<<sr
.d_tcpoutqueries
<<" tcp connections, rcode="<< res
;
1260 if(!shouldNotValidate
&& sr
.isDNSSECValidationRequested()) {
1261 L
<< ", dnssec="<<vStates
[sr
.getValidationState()];
1268 sr
.d_outqueries
? t_RC
->cacheMisses
++ : t_RC
->cacheHits
++;
1271 g_stats
.answers0_1
++;
1272 else if(spent
< 0.010)
1273 g_stats
.answers1_10
++;
1274 else if(spent
< 0.1)
1275 g_stats
.answers10_100
++;
1276 else if(spent
< 1.0)
1277 g_stats
.answers100_1000
++;
1279 g_stats
.answersSlow
++;
1281 uint64_t newLat
=(uint64_t)(spent
*1000000);
1282 newLat
= min(newLat
,(uint64_t)(((uint64_t) g_networkTimeoutMsec
)*1000)); // outliers of several minutes exist..
1283 g_stats
.avgLatencyUsec
=(1-1.0/g_latencyStatSize
)*g_stats
.avgLatencyUsec
+ (float)newLat
/g_latencyStatSize
;
1284 // no worries, we do this for packet cache hits elsewhere
1286 auto ourtime
= 1000.0*spent
-sr
.d_totUsec
/1000.0; // in msec
1288 g_stats
.ourtime0_1
++;
1289 else if(ourtime
< 2)
1290 g_stats
.ourtime1_2
++;
1291 else if(ourtime
< 4)
1292 g_stats
.ourtime2_4
++;
1293 else if(ourtime
< 8)
1294 g_stats
.ourtime4_8
++;
1295 else if(ourtime
< 16)
1296 g_stats
.ourtime8_16
++;
1297 else if(ourtime
< 32)
1298 g_stats
.ourtime16_32
++;
1300 // cerr<<"SLOW: "<<ourtime<<"ms -> "<<dc->d_mdp.d_qname<<"|"<<DNSRecordContent::NumberToType(dc->d_mdp.d_qtype)<<endl;
1301 g_stats
.ourtimeSlow
++;
1303 if(ourtime
>= 0.0) {
1304 newLat
=ourtime
*1000; // usec
1305 g_stats
.avgLatencyOursUsec
=(1-1.0/g_latencyStatSize
)*g_stats
.avgLatencyOursUsec
+ (float)newLat
/g_latencyStatSize
;
1307 // cout<<dc->d_mdp.d_qname<<"\t"<<MT->getUsec()<<"\t"<<sr.d_outqueries<<endl;
1311 catch(PDNSException
&ae
) {
1312 L
<<Logger::Error
<<"startDoResolve problem "<<makeLoginfo(dc
)<<": "<<ae
.reason
<<endl
;
1315 catch(MOADNSException
& e
) {
1316 L
<<Logger::Error
<<"DNS parser error "<<makeLoginfo(dc
) <<": "<<dc
->d_mdp
.d_qname
<<", "<<e
.what()<<endl
;
1319 catch(std::exception
& e
) {
1320 L
<<Logger::Error
<<"STL error "<< makeLoginfo(dc
)<<": "<<e
.what();
1322 // Luawrapper nests the exception from Lua, so we unnest it here
1324 std::rethrow_if_nested(e
);
1325 } catch(const std::exception
& ne
) {
1326 L
<<". Extra info: "<<ne
.what();
1333 L
<<Logger::Error
<<"Any other exception in a resolver context "<< makeLoginfo(dc
) <<endl
;
1336 g_stats
.maxMThreadStackUsage
= max(MT
->getMaxStackUsage(), g_stats
.maxMThreadStackUsage
);
1339 static void makeControlChannelSocket(int processNum
=-1)
1341 string sockname
=::arg()["socket-dir"]+"/"+s_programname
;
1343 sockname
+= "."+std::to_string(processNum
);
1344 sockname
+=".controlsocket";
1345 s_rcc
.listen(sockname
);
1350 if (!::arg().isEmpty("socket-group"))
1351 sockgroup
=::arg().asGid("socket-group");
1352 if (!::arg().isEmpty("socket-owner"))
1353 sockowner
=::arg().asUid("socket-owner");
1355 if (sockgroup
> -1 || sockowner
> -1) {
1356 if(chown(sockname
.c_str(), sockowner
, sockgroup
) < 0) {
1357 unixDie("Failed to chown control socket");
1361 // do mode change if socket-mode is given
1362 if(!::arg().isEmpty("socket-mode")) {
1363 mode_t sockmode
=::arg().asMode("socket-mode");
1364 if(chmod(sockname
.c_str(), sockmode
) < 0) {
1365 unixDie("Failed to chmod control socket");
1370 static bool getQNameAndSubnet(const std::string
& question
, DNSName
* dnsname
, uint16_t* qtype
, uint16_t* qclass
, EDNSSubnetOpts
* ednssubnet
, std::map
<uint16_t, EDNSOptionView
>* options
)
1373 const struct dnsheader
* dh
= (struct dnsheader
*)question
.c_str();
1374 size_t questionLen
= question
.length();
1375 unsigned int consumed
=0;
1376 *dnsname
=DNSName(question
.c_str(), questionLen
, sizeof(dnsheader
), false, qtype
, qclass
, &consumed
);
1378 size_t pos
= sizeof(dnsheader
)+consumed
+4;
1379 /* at least OPT root label (1), type (2), class (2) and ttl (4) + OPT RR rdlen (2)
1381 if(ntohs(dh
->arcount
) == 1 && questionLen
> pos
+ 11) { // this code can extract one (1) EDNS Subnet option
1382 /* OPT root label (1) followed by type (2) */
1383 if(question
.at(pos
)==0 && question
.at(pos
+1)==0 && question
.at(pos
+2)==QType::OPT
) {
1385 char* ecsStart
= nullptr;
1387 int res
= getEDNSOption((char*)question
.c_str()+pos
+9, questionLen
- pos
- 9, EDNSOptionCode::ECS
, &ecsStart
, &ecsLen
);
1388 if (res
== 0 && ecsLen
> 4) {
1390 if(getEDNSSubnetOptsFromString(ecsStart
+ 4, ecsLen
- 4, &eso
)) {
1397 int res
= getEDNSOptions((char*)question
.c_str()+pos
+9, questionLen
- pos
- 9, *options
);
1399 const auto& it
= options
->find(EDNSOptionCode::ECS
);
1400 if (it
!= options
->end() && it
->second
.content
!= nullptr && it
->second
.size
> 0) {
1402 if(getEDNSSubnetOptsFromString(it
->second
.content
, it
->second
.size
, &eso
)) {
1414 static void handleRunningTCPQuestion(int fd
, FDMultiplexer::funcparam_t
& var
)
1416 shared_ptr
<TCPConnection
> conn
=any_cast
<shared_ptr
<TCPConnection
> >(var
);
1418 if(conn
->state
==TCPConnection::BYTE0
) {
1419 ssize_t bytes
=recv(conn
->getFD(), conn
->data
, 2, 0);
1421 conn
->state
=TCPConnection::BYTE1
;
1423 conn
->qlen
=(((unsigned char)conn
->data
[0]) << 8)+ (unsigned char)conn
->data
[1];
1425 conn
->state
=TCPConnection::GETQUESTION
;
1427 if(!bytes
|| bytes
< 0) {
1428 t_fdm
->removeReadFD(fd
);
1432 else if(conn
->state
==TCPConnection::BYTE1
) {
1433 ssize_t bytes
=recv(conn
->getFD(), conn
->data
+1, 1, 0);
1435 conn
->state
=TCPConnection::GETQUESTION
;
1436 conn
->qlen
=(((unsigned char)conn
->data
[0]) << 8)+ (unsigned char)conn
->data
[1];
1439 if(!bytes
|| bytes
< 0) {
1440 if(g_logCommonErrors
)
1441 L
<<Logger::Error
<<"TCP client "<< conn
->d_remote
.toString() <<" disconnected after first byte"<<endl
;
1442 t_fdm
->removeReadFD(fd
);
1446 else if(conn
->state
==TCPConnection::GETQUESTION
) {
1447 ssize_t bytes
=recv(conn
->getFD(), conn
->data
+ conn
->bytesread
, conn
->qlen
- conn
->bytesread
, 0);
1448 if(!bytes
|| bytes
< 0 || bytes
> std::numeric_limits
<std::uint16_t>::max()) {
1449 L
<<Logger::Error
<<"TCP client "<< conn
->d_remote
.toString() <<" disconnected while reading question body"<<endl
;
1450 t_fdm
->removeReadFD(fd
);
1453 conn
->bytesread
+=(uint16_t)bytes
;
1454 if(conn
->bytesread
==conn
->qlen
) {
1455 t_fdm
->removeReadFD(fd
); // should no longer awake ourselves when there is data to read
1457 DNSComboWriter
* dc
=nullptr;
1459 dc
=new DNSComboWriter(conn
->data
, conn
->qlen
, g_now
);
1461 catch(MOADNSException
&mde
) {
1462 g_stats
.clientParseError
++;
1463 if(g_logCommonErrors
)
1464 L
<<Logger::Error
<<"Unable to parse packet from TCP client "<< conn
->d_remote
.toString() <<endl
;
1467 dc
->d_tcpConnection
= conn
; // carry the torch
1468 dc
->setSocket(conn
->getFD()); // this is the only time a copy is made of the actual fd
1470 dc
->setRemote(&conn
->d_remote
);
1472 memset(&dest
, 0, sizeof(dest
));
1473 dest
.sin4
.sin_family
= conn
->d_remote
.sin4
.sin_family
;
1474 socklen_t len
= dest
.getSocklen();
1475 getsockname(conn
->getFD(), (sockaddr
*)&dest
, &len
); // if this fails, we're ok with it
1480 bool needECS
= false;
1483 #ifdef HAVE_PROTOBUF
1484 auto luaconfsLocal
= g_luaconfs
.getLocal();
1485 if (luaconfsLocal
->protobufServer
) {
1490 if(needECS
|| (t_pdl
&& (t_pdl
->d_gettag_ffi
|| t_pdl
->d_gettag
))) {
1493 std::map
<uint16_t, EDNSOptionView
> ednsOptions
;
1494 dc
->d_ecsParsed
= true;
1495 dc
->d_ecsFound
= getQNameAndSubnet(std::string(conn
->data
, conn
->qlen
), &qname
, &qtype
, &qclass
, &dc
->d_ednssubnet
, g_gettagNeedsEDNSOptions
? &ednsOptions
: nullptr);
1499 if (t_pdl
->d_gettag_ffi
) {
1500 dc
->d_tag
= t_pdl
->gettag_ffi(conn
->d_remote
, dc
->d_ednssubnet
.source
, dest
, qname
, qtype
, &dc
->d_policyTags
, dc
->d_data
, ednsOptions
, true, requestorId
, deviceId
, dc
->d_ttlCap
, dc
->d_variable
);
1502 else if (t_pdl
->d_gettag
) {
1503 dc
->d_tag
= t_pdl
->gettag(conn
->d_remote
, dc
->d_ednssubnet
.source
, dest
, qname
, qtype
, &dc
->d_policyTags
, dc
->d_data
, ednsOptions
, true, requestorId
, deviceId
);
1506 catch(const std::exception
& e
) {
1507 if(g_logCommonErrors
)
1508 L
<<Logger::Warning
<<"Error parsing a query packet qname='"<<qname
<<"' for tag determination, setting tag=0: "<<e
.what()<<endl
;
1512 catch(const std::exception
& e
)
1514 if(g_logCommonErrors
)
1515 L
<<Logger::Warning
<<"Error parsing a query packet for tag determination, setting tag=0: "<<e
.what()<<endl
;
1518 #ifdef HAVE_PROTOBUF
1519 if(luaconfsLocal
->protobufServer
|| luaconfsLocal
->outgoingProtobufServer
) {
1520 dc
->d_requestorId
= requestorId
;
1521 dc
->d_deviceId
= deviceId
;
1522 dc
->d_uuid
= (*t_uuidGenerator
)();
1525 if(luaconfsLocal
->protobufServer
) {
1527 const struct dnsheader
* dh
= (const struct dnsheader
*) conn
->data
;
1529 if (!luaconfsLocal
->protobufTaggedOnly
) {
1530 protobufLogQuery(luaconfsLocal
->protobufServer
, luaconfsLocal
->protobufMaskV4
, luaconfsLocal
->protobufMaskV6
, dc
->d_uuid
, conn
->d_remote
, dest
, dc
->d_ednssubnet
.source
, true, dh
->id
, conn
->qlen
, qname
, qtype
, qclass
, dc
->d_policyTags
, dc
->d_requestorId
, dc
->d_deviceId
);
1533 catch(std::exception
& e
) {
1534 if(g_logCommonErrors
)
1535 L
<<Logger::Warning
<<"Error parsing a TCP query packet for edns subnet: "<<e
.what()<<endl
;
1539 if(dc
->d_mdp
.d_header
.qr
) {
1541 g_stats
.ignoredCount
++;
1542 L
<<Logger::Error
<<"Ignoring answer from TCP client "<< conn
->d_remote
.toString() <<" on server socket!"<<endl
;
1545 if(dc
->d_mdp
.d_header
.opcode
) {
1547 g_stats
.ignoredCount
++;
1548 L
<<Logger::Error
<<"Ignoring non-query opcode from TCP client "<< conn
->d_remote
.toString() <<" on server socket!"<<endl
;
1553 ++g_stats
.tcpqcounter
;
1554 MT
->makeThread(startDoResolve
, dc
); // deletes dc, will set state to BYTE0 again
1561 //! Handle new incoming TCP connection
1562 static void handleNewTCPQuestion(int fd
, FDMultiplexer::funcparam_t
& )
1565 socklen_t addrlen
=sizeof(addr
);
1566 int newsock
=accept(fd
, (struct sockaddr
*)&addr
, &addrlen
);
1568 if(MT
->numProcesses() > g_maxMThreads
) {
1569 g_stats
.overCapacityDrops
++;
1571 closesocket(newsock
);
1573 catch(const PDNSException
& e
) {
1574 L
<<Logger::Error
<<"Error closing TCP socket after an over capacity drop: "<<e
.reason
<<endl
;
1580 t_remotes
->push_back(addr
);
1581 if(t_allowFrom
&& !t_allowFrom
->match(&addr
)) {
1583 L
<<Logger::Error
<<"["<<MT
->getTid()<<"] dropping TCP query from "<<addr
.toString()<<", address not matched by allow-from"<<endl
;
1585 g_stats
.unauthorizedTCP
++;
1587 closesocket(newsock
);
1589 catch(const PDNSException
& e
) {
1590 L
<<Logger::Error
<<"Error closing TCP socket after an ACL drop: "<<e
.reason
<<endl
;
1594 if(g_maxTCPPerClient
&& t_tcpClientCounts
->count(addr
) && (*t_tcpClientCounts
)[addr
] >= g_maxTCPPerClient
) {
1595 g_stats
.tcpClientOverflow
++;
1597 closesocket(newsock
); // don't call TCPConnection::closeAndCleanup here - did not enter it in the counts yet!
1599 catch(const PDNSException
& e
) {
1600 L
<<Logger::Error
<<"Error closing TCP socket after an overflow drop: "<<e
.reason
<<endl
;
1605 setNonBlocking(newsock
);
1606 std::shared_ptr
<TCPConnection
> tc
= std::make_shared
<TCPConnection
>(newsock
, addr
);
1607 tc
->state
=TCPConnection::BYTE0
;
1609 t_fdm
->addReadFD(tc
->getFD(), handleRunningTCPQuestion
, tc
);
1612 Utility::gettimeofday(&now
, 0);
1613 t_fdm
->setReadTTD(tc
->getFD(), now
, g_tcpTimeout
);
1617 static string
* doProcessUDPQuestion(const std::string
& question
, const ComboAddress
& fromaddr
, const ComboAddress
& destaddr
, struct timeval tv
, int fd
)
1619 gettimeofday(&g_now
, 0);
1620 struct timeval diff
= g_now
- tv
;
1621 double delta
=(diff
.tv_sec
*1000 + diff
.tv_usec
/1000.0);
1623 if(tv
.tv_sec
&& delta
> 1000.0) {
1624 g_stats
.tooOldDrops
++;
1629 if(fromaddr
.sin4
.sin_family
==AF_INET6
)
1630 g_stats
.ipv6qcounter
++;
1633 const struct dnsheader
* dh
= (struct dnsheader
*)question
.c_str();
1634 unsigned int ctag
=0;
1636 bool needECS
= false;
1637 std::vector
<std::string
> policyTags
;
1638 LuaContext::LuaObject data
;
1641 #ifdef HAVE_PROTOBUF
1642 boost::uuids::uuid uniqueId
;
1643 auto luaconfsLocal
= g_luaconfs
.getLocal();
1644 if (luaconfsLocal
->protobufServer
) {
1645 uniqueId
= (*t_uuidGenerator
)();
1647 } else if (luaconfsLocal
->outgoingProtobufServer
) {
1648 uniqueId
= (*t_uuidGenerator
)();
1651 EDNSSubnetOpts ednssubnet
;
1652 bool ecsFound
= false;
1653 bool ecsParsed
= false;
1654 uint32_t ttlCap
= std::numeric_limits
<uint32_t>::max();
1655 bool variable
= false;
1661 bool qnameParsed
=false;
1664 static uint64_t last=0;
1666 g_mtracer->clearAllocators();
1667 cout<<g_mtracer->getAllocs()-last<<" "<<g_mtracer->getNumOut()<<" -- BEGIN TRACE"<<endl;
1668 last=g_mtracer->getAllocs();
1669 cout<<g_mtracer->topAllocatorsString()<<endl;
1670 g_mtracer->clearAllocators();
1674 if(needECS
|| (t_pdl
&& (t_pdl
->d_gettag
|| t_pdl
->d_gettag_ffi
))) {
1676 std::map
<uint16_t, EDNSOptionView
> ednsOptions
;
1677 ecsFound
= getQNameAndSubnet(question
, &qname
, &qtype
, &qclass
, &ednssubnet
, g_gettagNeedsEDNSOptions
? &ednsOptions
: nullptr);
1683 if (t_pdl
->d_gettag_ffi
) {
1684 ctag
= t_pdl
->gettag_ffi(fromaddr
, ednssubnet
.source
, destaddr
, qname
, qtype
, &policyTags
, data
, ednsOptions
, false, requestorId
, deviceId
, ttlCap
, variable
);
1686 else if (t_pdl
->d_gettag
) {
1687 ctag
=t_pdl
->gettag(fromaddr
, ednssubnet
.source
, destaddr
, qname
, qtype
, &policyTags
, data
, ednsOptions
, false, requestorId
, deviceId
);
1690 catch(const std::exception
& e
) {
1691 if(g_logCommonErrors
)
1692 L
<<Logger::Warning
<<"Error parsing a query packet qname='"<<qname
<<"' for tag determination, setting tag=0: "<<e
.what()<<endl
;
1696 catch(const std::exception
& e
)
1698 if(g_logCommonErrors
)
1699 L
<<Logger::Warning
<<"Error parsing a query packet for tag determination, setting tag=0: "<<e
.what()<<endl
;
1703 bool cacheHit
= false;
1704 RecProtoBufMessage
pbMessage(DNSProtoBufMessage::DNSProtoBufMessageType::Response
);
1705 #ifdef HAVE_PROTOBUF
1706 if(luaconfsLocal
->protobufServer
) {
1707 if (!luaconfsLocal
->protobufTaggedOnly
|| !policyTags
.empty()) {
1708 protobufLogQuery(luaconfsLocal
->protobufServer
, luaconfsLocal
->protobufMaskV4
, luaconfsLocal
->protobufMaskV6
, uniqueId
, fromaddr
, destaddr
, ednssubnet
.source
, false, dh
->id
, question
.size(), qname
, qtype
, qclass
, policyTags
, requestorId
, deviceId
);
1711 #endif /* HAVE_PROTOBUF */
1713 /* It might seem like a good idea to skip the packet cache lookup if we know that the answer is not cacheable,
1714 but it means that the hash would not be computed. If some script decides at a later time to mark back the answer
1715 as cacheable we would cache it with a wrong tag, so better safe than sorry. */
1717 cacheHit
= (!SyncRes::s_nopacketcache
&& t_packetCache
->getResponsePacket(ctag
, question
, qname
, qtype
, qclass
, g_now
.tv_sec
, &response
, &age
, &qhash
, &pbMessage
));
1720 cacheHit
= (!SyncRes::s_nopacketcache
&& t_packetCache
->getResponsePacket(ctag
, question
, g_now
.tv_sec
, &response
, &age
, &qhash
, &pbMessage
));
1724 #ifdef HAVE_PROTOBUF
1725 if(luaconfsLocal
->protobufServer
&& (!luaconfsLocal
->protobufTaggedOnly
|| !pbMessage
.getAppliedPolicy().empty() || !pbMessage
.getPolicyTags().empty())) {
1726 Netmask
requestorNM(fromaddr
, fromaddr
.sin4
.sin_family
== AF_INET
? luaconfsLocal
->protobufMaskV4
: luaconfsLocal
->protobufMaskV6
);
1727 const ComboAddress
& requestor
= requestorNM
.getMaskedNetwork();
1728 pbMessage
.update(uniqueId
, &requestor
, &destaddr
, false, dh
->id
);
1729 pbMessage
.setEDNSSubnet(ednssubnet
.source
, ednssubnet
.source
.isIpv4() ? luaconfsLocal
->protobufMaskV4
: luaconfsLocal
->protobufMaskV6
);
1730 pbMessage
.setQueryTime(g_now
.tv_sec
, g_now
.tv_usec
);
1731 pbMessage
.setRequestorId(requestorId
);
1732 pbMessage
.setDeviceId(deviceId
);
1733 protobufLogResponse(luaconfsLocal
->protobufServer
, pbMessage
);
1735 #endif /* HAVE_PROTOBUF */
1737 L
<<Logger::Notice
<<t_id
<< " question answered from packet cache tag="<<ctag
<<" from "<<fromaddr
.toString()<<endl
;
1739 g_stats
.packetCacheHits
++;
1740 SyncRes::s_queries
++;
1741 ageDNSPacket(response
, age
);
1745 fillMSGHdr(&msgh
, &iov
, cbuf
, 0, (char*)response
.c_str(), response
.length(), const_cast<ComboAddress
*>(&fromaddr
));
1746 msgh
.msg_control
=NULL
;
1748 if(g_fromtosockets
.count(fd
)) {
1749 addCMsgSrcAddr(&msgh
, cbuf
, &destaddr
, 0);
1751 if(sendmsg(fd
, &msgh
, 0) < 0 && g_logCommonErrors
)
1752 L
<<Logger::Warning
<<"Sending UDP reply to client "<<fromaddr
.toStringWithPort()<<" failed with: "<<strerror(errno
)<<endl
;
1754 if(response
.length() >= sizeof(struct dnsheader
)) {
1755 struct dnsheader tmpdh
;
1756 memcpy(&tmpdh
, response
.c_str(), sizeof(tmpdh
));
1757 updateResponseStats(tmpdh
.rcode
, fromaddr
, response
.length(), 0, 0);
1759 g_stats
.avgLatencyUsec
=(1-1.0/g_latencyStatSize
)*g_stats
.avgLatencyUsec
+ 0.0; // we assume 0 usec
1760 g_stats
.avgLatencyOursUsec
=(1-1.0/g_latencyStatSize
)*g_stats
.avgLatencyOursUsec
+ 0.0; // we assume 0 usec
1764 catch(std::exception
& e
) {
1765 L
<<Logger::Error
<<"Error processing or aging answer packet: "<<e
.what()<<endl
;
1770 if(t_pdl
->ipfilter(fromaddr
, destaddr
, *dh
)) {
1772 L
<<Logger::Notice
<<t_id
<<" ["<<MT
->getTid()<<"/"<<MT
->numProcesses()<<"] DROPPED question from "<<fromaddr
.toStringWithPort()<<" based on policy"<<endl
;
1773 g_stats
.policyDrops
++;
1778 if(MT
->numProcesses() > g_maxMThreads
) {
1780 L
<<Logger::Notice
<<t_id
<<" ["<<MT
->getTid()<<"/"<<MT
->numProcesses()<<"] DROPPED question from "<<fromaddr
.toStringWithPort()<<", over capacity"<<endl
;
1782 g_stats
.overCapacityDrops
++;
1786 DNSComboWriter
* dc
= new DNSComboWriter(question
.c_str(), question
.size(), g_now
);
1790 dc
->d_query
= question
;
1791 dc
->setRemote(&fromaddr
);
1792 dc
->setLocal(destaddr
);
1794 dc
->d_policyTags
= policyTags
;
1796 dc
->d_ecsFound
= ecsFound
;
1797 dc
->d_ecsParsed
= ecsParsed
;
1798 dc
->d_ednssubnet
= ednssubnet
;
1799 dc
->d_ttlCap
= ttlCap
;
1800 dc
->d_variable
= variable
;
1801 #ifdef HAVE_PROTOBUF
1802 if (luaconfsLocal
->protobufServer
|| luaconfsLocal
->outgoingProtobufServer
) {
1803 dc
->d_uuid
= uniqueId
;
1805 dc
->d_requestorId
= requestorId
;
1806 dc
->d_deviceId
= deviceId
;
1809 MT
->makeThread(startDoResolve
, (void*) dc
); // deletes dc
1814 static void handleNewUDPQuestion(int fd
, FDMultiplexer::funcparam_t
& var
)
1818 ComboAddress fromaddr
;
1822 bool firstQuery
= true;
1824 fromaddr
.sin6
.sin6_family
=AF_INET6
; // this makes sure fromaddr is big enough
1825 fillMSGHdr(&msgh
, &iov
, cbuf
, sizeof(cbuf
), data
, sizeof(data
), &fromaddr
);
1828 if((len
=recvmsg(fd
, &msgh
, 0)) >= 0) {
1833 t_remotes
->push_back(fromaddr
);
1835 if(t_allowFrom
&& !t_allowFrom
->match(&fromaddr
)) {
1837 L
<<Logger::Error
<<"["<<MT
->getTid()<<"] dropping UDP query from "<<fromaddr
.toString()<<", address not matched by allow-from"<<endl
;
1839 g_stats
.unauthorizedUDP
++;
1842 BOOST_STATIC_ASSERT(offsetof(sockaddr_in
, sin_port
) == offsetof(sockaddr_in6
, sin6_port
));
1843 if(!fromaddr
.sin4
.sin_port
) { // also works for IPv6
1845 L
<<Logger::Error
<<"["<<MT
->getTid()<<"] dropping UDP query from "<<fromaddr
.toStringWithPort()<<", can't deal with port 0"<<endl
;
1847 g_stats
.clientParseError
++; // not quite the best place to put it, but needs to go somewhere
1851 dnsheader
* dh
=(dnsheader
*)data
;
1854 g_stats
.ignoredCount
++;
1855 if(g_logCommonErrors
)
1856 L
<<Logger::Error
<<"Ignoring answer from "<<fromaddr
.toString()<<" on server socket!"<<endl
;
1858 else if(dh
->opcode
) {
1859 g_stats
.ignoredCount
++;
1860 if(g_logCommonErrors
)
1861 L
<<Logger::Error
<<"Ignoring non-query opcode "<<dh
->opcode
<<" from "<<fromaddr
.toString()<<" on server socket!"<<endl
;
1864 string
question(data
, (size_t)len
);
1865 struct timeval tv
={0,0};
1866 HarvestTimestamp(&msgh
, &tv
);
1868 memset(&dest
, 0, sizeof(dest
)); // this makes sure we ignore this address if not returned by recvmsg above
1869 auto loc
= rplookup(g_listenSocketsAddresses
, fd
);
1870 if(HarvestDestinationAddress(&msgh
, &dest
)) {
1871 // but.. need to get port too
1873 dest
.sin4
.sin_port
= loc
->sin4
.sin_port
;
1880 dest
.sin4
.sin_family
= fromaddr
.sin4
.sin_family
;
1881 socklen_t slen
= dest
.getSocklen();
1882 getsockname(fd
, (sockaddr
*)&dest
, &slen
); // if this fails, we're ok with it
1885 if(g_weDistributeQueries
)
1886 distributeAsyncFunction(question
, boost::bind(doProcessUDPQuestion
, question
, fromaddr
, dest
, tv
, fd
));
1888 doProcessUDPQuestion(question
, fromaddr
, dest
, tv
, fd
);
1891 catch(MOADNSException
& mde
) {
1892 g_stats
.clientParseError
++;
1893 if(g_logCommonErrors
)
1894 L
<<Logger::Error
<<"Unable to parse packet from remote UDP client "<<fromaddr
.toString() <<": "<<mde
.what()<<endl
;
1896 catch(std::runtime_error
& e
) {
1897 g_stats
.clientParseError
++;
1898 if(g_logCommonErrors
)
1899 L
<<Logger::Error
<<"Unable to parse packet from remote UDP client "<<fromaddr
.toString() <<": "<<e
.what()<<endl
;
1903 // cerr<<t_id<<" had error: "<<stringerror()<<endl;
1904 if(firstQuery
&& errno
== EAGAIN
)
1905 g_stats
.noPacketError
++;
1911 static void makeTCPServerSockets(unsigned int threadId
)
1914 vector
<string
>locals
;
1915 stringtok(locals
,::arg()["local-address"]," ,");
1918 throw PDNSException("No local address specified");
1920 for(vector
<string
>::const_iterator i
=locals
.begin();i
!=locals
.end();++i
) {
1922 st
.port
=::arg().asNum("local-port");
1923 parseService(*i
, st
);
1927 memset((char *)&sin
,0, sizeof(sin
));
1928 sin
.sin4
.sin_family
= AF_INET
;
1929 if(!IpToU32(st
.host
, (uint32_t*)&sin
.sin4
.sin_addr
.s_addr
)) {
1930 sin
.sin6
.sin6_family
= AF_INET6
;
1931 if(makeIPv6sockaddr(st
.host
, &sin
.sin6
) < 0)
1932 throw PDNSException("Unable to resolve local address for TCP server on '"+ st
.host
+"'");
1935 fd
=socket(sin
.sin6
.sin6_family
, SOCK_STREAM
, 0);
1937 throw PDNSException("Making a TCP server socket for resolver: "+stringerror());
1942 if(setsockopt(fd
, SOL_SOCKET
, SO_REUSEADDR
, &tmp
, sizeof tmp
)<0) {
1943 L
<<Logger::Error
<<"Setsockopt failed for TCP listening socket"<<endl
;
1946 if(sin
.sin6
.sin6_family
== AF_INET6
&& setsockopt(fd
, IPPROTO_IPV6
, IPV6_V6ONLY
, &tmp
, sizeof(tmp
)) < 0) {
1947 L
<<Logger::Error
<<"Failed to set IPv6 socket to IPv6 only, continuing anyhow: "<<strerror(errno
)<<endl
;
1950 #ifdef TCP_DEFER_ACCEPT
1951 if(setsockopt(fd
, SOL_TCP
, TCP_DEFER_ACCEPT
, &tmp
, sizeof tmp
) >= 0) {
1952 if(i
==locals
.begin())
1953 L
<<Logger::Error
<<"Enabled TCP data-ready filter for (slight) DoS protection"<<endl
;
1957 if( ::arg().mustDo("non-local-bind") )
1958 Utility::setBindAny(AF_INET
, fd
);
1962 if(setsockopt(fd
, SOL_SOCKET
, SO_REUSEPORT
, &tmp
, sizeof(tmp
)) < 0)
1963 throw PDNSException("SO_REUSEPORT: "+stringerror());
1967 if (::arg().asNum("tcp-fast-open") > 0) {
1969 int fastOpenQueueSize
= ::arg().asNum("tcp-fast-open");
1970 if (setsockopt(fd
, IPPROTO_TCP
, TCP_FASTOPEN
, &fastOpenQueueSize
, sizeof fastOpenQueueSize
) < 0) {
1971 L
<<Logger::Error
<<"Failed to enable TCP Fast Open for listening socket: "<<strerror(errno
)<<endl
;
1974 L
<<Logger::Warning
<<"TCP Fast Open configured but not supported for listening socket"<<endl
;
1978 sin
.sin4
.sin_port
= htons(st
.port
);
1979 socklen_t socklen
=sin
.sin4
.sin_family
==AF_INET
? sizeof(sin
.sin4
) : sizeof(sin
.sin6
);
1980 if (::bind(fd
, (struct sockaddr
*)&sin
, socklen
)<0)
1981 throw PDNSException("Binding TCP server socket for "+ st
.host
+": "+stringerror());
1984 setSocketSendBuffer(fd
, 65000);
1986 deferredAdds
[threadId
].push_back(make_pair(fd
, handleNewTCPQuestion
));
1987 g_tcpListenSockets
.push_back(fd
);
1988 // we don't need to update g_listenSocketsAddresses since it doesn't work for TCP/IP:
1989 // - fd is not that which we know here, but returned from accept()
1990 if(sin
.sin4
.sin_family
== AF_INET
)
1991 L
<<Logger::Error
<<"Listening for TCP queries on "<< sin
.toString() <<":"<<st
.port
<<endl
;
1993 L
<<Logger::Error
<<"Listening for TCP queries on ["<< sin
.toString() <<"]:"<<st
.port
<<endl
;
1997 static void makeUDPServerSockets(unsigned int threadId
)
2000 vector
<string
>locals
;
2001 stringtok(locals
,::arg()["local-address"]," ,");
2004 throw PDNSException("No local address specified");
2006 for(vector
<string
>::const_iterator i
=locals
.begin();i
!=locals
.end();++i
) {
2008 st
.port
=::arg().asNum("local-port");
2009 parseService(*i
, st
);
2013 memset(&sin
, 0, sizeof(sin
));
2014 sin
.sin4
.sin_family
= AF_INET
;
2015 if(!IpToU32(st
.host
.c_str() , (uint32_t*)&sin
.sin4
.sin_addr
.s_addr
)) {
2016 sin
.sin6
.sin6_family
= AF_INET6
;
2017 if(makeIPv6sockaddr(st
.host
, &sin
.sin6
) < 0)
2018 throw PDNSException("Unable to resolve local address for UDP server on '"+ st
.host
+"'");
2021 int fd
=socket(sin
.sin4
.sin_family
, SOCK_DGRAM
, 0);
2023 throw PDNSException("Making a UDP server socket for resolver: "+netstringerror());
2025 if (!setSocketTimestamps(fd
))
2026 L
<<Logger::Warning
<<"Unable to enable timestamp reporting for socket"<<endl
;
2028 if(IsAnyAddress(sin
)) {
2029 if(sin
.sin4
.sin_family
== AF_INET
)
2030 if(!setsockopt(fd
, IPPROTO_IP
, GEN_IP_PKTINFO
, &one
, sizeof(one
))) // linux supports this, so why not - might fail on other systems
2031 g_fromtosockets
.insert(fd
);
2032 #ifdef IPV6_RECVPKTINFO
2033 if(sin
.sin4
.sin_family
== AF_INET6
)
2034 if(!setsockopt(fd
, IPPROTO_IPV6
, IPV6_RECVPKTINFO
, &one
, sizeof(one
)))
2035 g_fromtosockets
.insert(fd
);
2037 if(sin
.sin6
.sin6_family
== AF_INET6
&& setsockopt(fd
, IPPROTO_IPV6
, IPV6_V6ONLY
, &one
, sizeof(one
)) < 0) {
2038 L
<<Logger::Error
<<"Failed to set IPv6 socket to IPv6 only, continuing anyhow: "<<strerror(errno
)<<endl
;
2041 if( ::arg().mustDo("non-local-bind") )
2042 Utility::setBindAny(AF_INET6
, fd
);
2046 setSocketReceiveBuffer(fd
, 250000);
2047 sin
.sin4
.sin_port
= htons(st
.port
);
2052 if(setsockopt(fd
, SOL_SOCKET
, SO_REUSEPORT
, &one
, sizeof(one
)) < 0)
2053 throw PDNSException("SO_REUSEPORT: "+stringerror());
2056 socklen_t socklen
=sin
.getSocklen();
2057 if (::bind(fd
, (struct sockaddr
*)&sin
, socklen
)<0)
2058 throw PDNSException("Resolver binding to server socket on port "+ std::to_string(st
.port
) +" for "+ st
.host
+": "+stringerror());
2062 deferredAdds
[threadId
].push_back(make_pair(fd
, handleNewUDPQuestion
));
2063 g_listenSocketsAddresses
[fd
]=sin
; // this is written to only from the startup thread, not from the workers
2064 if(sin
.sin4
.sin_family
== AF_INET
)
2065 L
<<Logger::Error
<<"Listening for UDP queries on "<< sin
.toString() <<":"<<st
.port
<<endl
;
2067 L
<<Logger::Error
<<"Listening for UDP queries on ["<< sin
.toString() <<"]:"<<st
.port
<<endl
;
2071 static void daemonize(void)
2078 int i
=open("/dev/null",O_RDWR
); /* open stdin */
2080 L
<<Logger::Critical
<<"Unable to open /dev/null: "<<stringerror()<<endl
;
2082 dup2(i
,0); /* stdin */
2083 dup2(i
,1); /* stderr */
2084 dup2(i
,2); /* stderr */
2089 static void usr1Handler(int)
2094 static void usr2Handler(int)
2097 SyncRes::setDefaultLogMode(g_quiet
? SyncRes::LogNone
: SyncRes::Log
);
2098 ::arg().set("quiet")=g_quiet
? "" : "no";
2101 static void doStats(void)
2103 static time_t lastOutputTime
;
2104 static uint64_t lastQueryCount
;
2106 uint64_t cacheHits
= broadcastAccFunction
<uint64_t>(pleaseGetCacheHits
);
2107 uint64_t cacheMisses
= broadcastAccFunction
<uint64_t>(pleaseGetCacheMisses
);
2109 if(g_stats
.qcounter
&& (cacheHits
+ cacheMisses
) && SyncRes::s_queries
&& SyncRes::s_outqueries
) {
2110 L
<<Logger::Notice
<<"stats: "<<g_stats
.qcounter
<<" questions, "<<
2111 broadcastAccFunction
<uint64_t>(pleaseGetCacheSize
)<< " cache entries, "<<
2112 broadcastAccFunction
<uint64_t>(pleaseGetNegCacheSize
)<<" negative entries, "<<
2113 (int)((cacheHits
*100.0)/(cacheHits
+cacheMisses
))<<"% cache hits"<<endl
;
2115 L
<<Logger::Notice
<<"stats: throttle map: "
2116 << broadcastAccFunction
<uint64_t>(pleaseGetThrottleSize
) <<", ns speeds: "
2117 << broadcastAccFunction
<uint64_t>(pleaseGetNsSpeedsSize
)<<endl
;
2118 L
<<Logger::Notice
<<"stats: outpacket/query ratio "<<(int)(SyncRes::s_outqueries
*100.0/SyncRes::s_queries
)<<"%";
2119 L
<<Logger::Notice
<<", "<<(int)(SyncRes::s_throttledqueries
*100.0/(SyncRes::s_outqueries
+SyncRes::s_throttledqueries
))<<"% throttled, "
2120 <<SyncRes::s_nodelegated
<<" no-delegation drops"<<endl
;
2121 L
<<Logger::Notice
<<"stats: "<<SyncRes::s_tcpoutqueries
<<" outgoing tcp connections, "<<
2122 broadcastAccFunction
<uint64_t>(pleaseGetConcurrentQueries
)<<" queries running, "<<SyncRes::s_outgoingtimeouts
<<" outgoing timeouts"<<endl
;
2124 //L<<Logger::Notice<<"stats: "<<g_stats.ednsPingMatches<<" ping matches, "<<g_stats.ednsPingMismatches<<" mismatches, "<<
2125 //g_stats.noPingOutQueries<<" outqueries w/o ping, "<< g_stats.noEdnsOutQueries<<" w/o EDNS"<<endl;
2127 L
<<Logger::Notice
<<"stats: " << broadcastAccFunction
<uint64_t>(pleaseGetPacketCacheSize
) <<
2128 " packet cache entries, "<<(int)(100.0*broadcastAccFunction
<uint64_t>(pleaseGetPacketCacheHits
)/SyncRes::s_queries
) << "% packet cache hits"<<endl
;
2130 time_t now
= time(0);
2131 if(lastOutputTime
&& lastQueryCount
&& now
!= lastOutputTime
) {
2132 L
<<Logger::Notice
<<"stats: "<< (SyncRes::s_queries
- lastQueryCount
) / (now
- lastOutputTime
) <<" qps (average over "<< (now
- lastOutputTime
) << " seconds)"<<endl
;
2134 lastOutputTime
= now
;
2135 lastQueryCount
= SyncRes::s_queries
;
2137 else if(statsWanted
)
2138 L
<<Logger::Notice
<<"stats: no stats yet!"<<endl
;
2143 static void houseKeeping(void *)
2145 static thread_local
time_t last_stat
, last_rootupdate
, last_prune
, last_secpoll
;
2146 static thread_local
int cleanCounter
=0;
2147 static thread_local
bool s_running
; // houseKeeping can get suspended in secpoll, and be restarted, which makes us do duplicate work
2154 Utility::gettimeofday(&now
, 0);
2156 if(now
.tv_sec
- last_prune
> (time_t)(5 + t_id
)) {
2159 t_RC
->doPrune(g_maxCacheEntries
/ g_numThreads
); // this function is local to a thread, so fine anyhow
2160 t_packetCache
->doPruneTo(g_maxPacketCacheEntries
/ g_numWorkerThreads
);
2162 SyncRes::pruneNegCache(g_maxCacheEntries
/ (g_numWorkerThreads
* 10));
2164 if(!((cleanCounter
++)%40)) { // this is a full scan!
2165 time_t limit
=now
.tv_sec
-300;
2166 SyncRes::pruneNSSpeeds(limit
);
2171 if(now
.tv_sec
- last_rootupdate
> 7200) {
2172 int res
= SyncRes::getRootNS(g_now
, nullptr);
2174 last_rootupdate
=now
.tv_sec
;
2178 if(g_statisticsInterval
> 0 && now
.tv_sec
- last_stat
>= g_statisticsInterval
) {
2183 if(now
.tv_sec
- last_secpoll
>= 3600) {
2185 doSecPoll(&last_secpoll
);
2187 catch(std::exception
& e
)
2189 L
<<Logger::Error
<<"Exception while performing security poll: "<<e
.what()<<endl
;
2191 catch(PDNSException
& e
)
2193 L
<<Logger::Error
<<"Exception while performing security poll: "<<e
.reason
<<endl
;
2195 catch(ImmediateServFailException
&e
)
2197 L
<<Logger::Error
<<"Exception while performing security poll: "<<e
.reason
<<endl
;
2201 L
<<Logger::Error
<<"Exception while performing security poll"<<endl
;
2208 catch(PDNSException
& ae
)
2211 L
<<Logger::Error
<<"Fatal error in housekeeping thread: "<<ae
.reason
<<endl
;
2216 static void makeThreadPipes()
2218 for(unsigned int n
=0; n
< g_numThreads
; ++n
) {
2219 struct ThreadPipeSet tps
;
2222 unixDie("Creating pipe for inter-thread communications");
2224 tps
.readToThread
= fd
[0];
2225 tps
.writeToThread
= fd
[1];
2228 unixDie("Creating pipe for inter-thread communications");
2229 tps
.readFromThread
= fd
[0];
2230 tps
.writeFromThread
= fd
[1];
2233 unixDie("Creating pipe for inter-thread communications");
2234 tps
.readQueriesToThread
= fd
[0];
2235 tps
.writeQueriesToThread
= fd
[1];
2237 if (!setNonBlocking(tps
.writeQueriesToThread
)) {
2238 unixDie("Making pipe for inter-thread communications non-blocking");
2241 g_pipes
.push_back(tps
);
2251 void broadcastFunction(const pipefunc_t
& func
, bool skipSelf
)
2254 for(ThreadPipeSet
& tps
: g_pipes
)
2258 func(); // don't write to ourselves!
2262 ThreadMSG
* tmsg
= new ThreadMSG();
2264 tmsg
->wantAnswer
= true;
2265 if(write(tps
.writeToThread
, &tmsg
, sizeof(tmsg
)) != sizeof(tmsg
)) {
2267 unixDie("write to thread pipe returned wrong size or error");
2271 if(read(tps
.readFromThread
, &resp
, sizeof(resp
)) != sizeof(resp
))
2272 unixDie("read from thread pipe returned wrong size or error");
2275 // cerr <<"got response: " << *resp << endl;
2281 void distributeAsyncFunction(const string
& packet
, const pipefunc_t
& func
)
2283 unsigned int hash
= hashQuestion(packet
.c_str(), packet
.length(), g_disthashseed
);
2284 unsigned int target
= 1 + (hash
% (g_pipes
.size()-1));
2286 if(target
== t_id
) {
2290 ThreadPipeSet
& tps
= g_pipes
[target
];
2291 ThreadMSG
* tmsg
= new ThreadMSG();
2293 tmsg
->wantAnswer
= false;
2295 ssize_t written
= write(tps
.writeQueriesToThread
, &tmsg
, sizeof(tmsg
));
2297 if (static_cast<size_t>(written
) != sizeof(tmsg
)) {
2299 unixDie("write to thread pipe returned wrong size or error");
2305 if (error
== EAGAIN
|| error
== EWOULDBLOCK
) {
2306 g_stats
.queryPipeFullDrops
++;
2308 unixDie("write to thread pipe returned wrong size or error:" + error
);
2313 static void handlePipeRequest(int fd
, FDMultiplexer::funcparam_t
& var
)
2315 ThreadMSG
* tmsg
= nullptr;
2317 if(read(fd
, &tmsg
, sizeof(tmsg
)) != sizeof(tmsg
)) { // fd == readToThread || fd == readQueriesToThread
2318 unixDie("read from thread pipe returned wrong size or error");
2323 resp
= tmsg
->func();
2325 catch(std::exception
& e
) {
2326 if(g_logCommonErrors
)
2327 L
<<Logger::Error
<<"PIPE function we executed created exception: "<<e
.what()<<endl
; // but what if they wanted an answer.. we send 0
2329 catch(PDNSException
& e
) {
2330 if(g_logCommonErrors
)
2331 L
<<Logger::Error
<<"PIPE function we executed created PDNS exception: "<<e
.reason
<<endl
; // but what if they wanted an answer.. we send 0
2333 if(tmsg
->wantAnswer
) {
2334 if(write(g_pipes
[t_id
].writeFromThread
, &resp
, sizeof(resp
)) != sizeof(resp
)) {
2336 unixDie("write to thread pipe returned wrong size or error");
2343 template<class T
> void *voider(const boost::function
<T
*()>& func
)
2348 vector
<ComboAddress
>& operator+=(vector
<ComboAddress
>&a
, const vector
<ComboAddress
>& b
)
2350 a
.insert(a
.end(), b
.begin(), b
.end());
2354 vector
<pair
<string
, uint16_t> >& operator+=(vector
<pair
<string
, uint16_t> >&a
, const vector
<pair
<string
, uint16_t> >& b
)
2356 a
.insert(a
.end(), b
.begin(), b
.end());
2360 vector
<pair
<DNSName
, uint16_t> >& operator+=(vector
<pair
<DNSName
, uint16_t> >&a
, const vector
<pair
<DNSName
, uint16_t> >& b
)
2362 a
.insert(a
.end(), b
.begin(), b
.end());
2367 template<class T
> T
broadcastAccFunction(const boost::function
<T
*()>& func
, bool skipSelf
)
2371 for(ThreadPipeSet
& tps
: g_pipes
)
2375 T
* resp
= (T
*)func(); // don't write to ourselves!
2377 //~ cerr <<"got direct: " << *resp << endl;
2385 ThreadMSG
* tmsg
= new ThreadMSG();
2386 tmsg
->func
= boost::bind(voider
<T
>, func
);
2387 tmsg
->wantAnswer
= true;
2389 if(write(tps
.writeToThread
, &tmsg
, sizeof(tmsg
)) != sizeof(tmsg
)) {
2391 unixDie("write to thread pipe returned wrong size or error");
2395 if(read(tps
.readFromThread
, &resp
, sizeof(resp
)) != sizeof(resp
))
2396 unixDie("read from thread pipe returned wrong size or error");
2399 //~ cerr <<"got response: " << *resp << endl;
2407 template string
broadcastAccFunction(const boost::function
<string
*()>& fun
, bool skipSelf
); // explicit instantiation
2408 template uint64_t broadcastAccFunction(const boost::function
<uint64_t*()>& fun
, bool skipSelf
); // explicit instantiation
2409 template vector
<ComboAddress
> broadcastAccFunction(const boost::function
<vector
<ComboAddress
> *()>& fun
, bool skipSelf
); // explicit instantiation
2410 template vector
<pair
<DNSName
,uint16_t> > broadcastAccFunction(const boost::function
<vector
<pair
<DNSName
, uint16_t> > *()>& fun
, bool skipSelf
); // explicit instantiation
2412 static void handleRCC(int fd
, FDMultiplexer::funcparam_t
& var
)
2415 string msg
=s_rcc
.recv(&remote
);
2416 RecursorControlParser rcp
;
2417 RecursorControlParser::func_t
* command
;
2419 string answer
=rcp
.getAnswer(msg
, &command
);
2421 // If we are inside a chroot, we need to strip
2422 if (!arg()["chroot"].empty()) {
2423 size_t len
= arg()["chroot"].length();
2424 remote
= remote
.substr(len
);
2428 s_rcc
.send(answer
, &remote
);
2431 catch(std::exception
& e
) {
2432 L
<<Logger::Error
<<"Error dealing with control socket request: "<<e
.what()<<endl
;
2434 catch(PDNSException
& ae
) {
2435 L
<<Logger::Error
<<"Error dealing with control socket request: "<<ae
.reason
<<endl
;
2439 static void handleTCPClientReadable(int fd
, FDMultiplexer::funcparam_t
& var
)
2441 PacketID
* pident
=any_cast
<PacketID
>(&var
);
2442 // cerr<<"handleTCPClientReadable called for fd "<<fd<<", pident->inNeeded: "<<pident->inNeeded<<", "<<pident->sock->getHandle()<<endl;
2444 shared_array
<char> buffer(new char[pident
->inNeeded
]);
2446 ssize_t ret
=recv(fd
, buffer
.get(), pident
->inNeeded
,0);
2448 pident
->inMSG
.append(&buffer
[0], &buffer
[ret
]);
2449 pident
->inNeeded
-=(size_t)ret
;
2450 if(!pident
->inNeeded
|| pident
->inIncompleteOkay
) {
2451 // cerr<<"Got entire load of "<<pident->inMSG.size()<<" bytes"<<endl;
2452 PacketID pid
=*pident
;
2453 string msg
=pident
->inMSG
;
2455 t_fdm
->removeReadFD(fd
);
2456 MT
->sendEvent(pid
, &msg
);
2459 // cerr<<"Still have "<<pident->inNeeded<<" left to go"<<endl;
2463 PacketID tmp
=*pident
;
2464 t_fdm
->removeReadFD(fd
); // pident might now be invalid (it isn't, but still)
2466 MT
->sendEvent(tmp
, &empty
); // this conveys error status
2470 static void handleTCPClientWritable(int fd
, FDMultiplexer::funcparam_t
& var
)
2472 PacketID
* pid
=any_cast
<PacketID
>(&var
);
2473 ssize_t ret
=send(fd
, pid
->outMSG
.c_str() + pid
->outPos
, pid
->outMSG
.size() - pid
->outPos
,0);
2475 pid
->outPos
+=(ssize_t
)ret
;
2476 if(pid
->outPos
==pid
->outMSG
.size()) {
2478 t_fdm
->removeWriteFD(fd
);
2479 MT
->sendEvent(tmp
, &tmp
.outMSG
); // send back what we sent to convey everything is ok
2482 else { // error or EOF
2484 t_fdm
->removeWriteFD(fd
);
2486 MT
->sendEvent(tmp
, &sent
); // we convey error status by sending empty string
2490 // resend event to everybody chained onto it
2491 static void doResends(MT_t::waiters_t::iterator
& iter
, PacketID resend
, const string
& content
)
2493 if(iter
->key
.chain
.empty())
2495 // cerr<<"doResends called!\n";
2496 for(PacketID::chain_t::iterator i
=iter
->key
.chain
.begin(); i
!= iter
->key
.chain
.end() ; ++i
) {
2499 // cerr<<"\tResending "<<content.size()<<" bytes for fd="<<resend.fd<<" and id="<<resend.id<<endl;
2501 MT
->sendEvent(resend
, &content
);
2502 g_stats
.chainResends
++;
2506 static void handleUDPServerResponse(int fd
, FDMultiplexer::funcparam_t
& var
)
2508 PacketID pid
=any_cast
<PacketID
>(var
);
2510 char data
[g_outgoingEDNSBufsize
];
2511 ComboAddress fromaddr
;
2512 socklen_t addrlen
=sizeof(fromaddr
);
2514 len
=recvfrom(fd
, data
, sizeof(data
), 0, (sockaddr
*)&fromaddr
, &addrlen
);
2516 if(len
< (ssize_t
) sizeof(dnsheader
)) {
2518 ; // cerr<<"Error on fd "<<fd<<": "<<stringerror()<<"\n";
2520 g_stats
.serverParseError
++;
2521 if(g_logCommonErrors
)
2522 L
<<Logger::Error
<<"Unable to parse packet from remote UDP server "<< fromaddr
.toString() <<
2523 ": packet smaller than DNS header"<<endl
;
2526 t_udpclientsocks
->returnSocket(fd
);
2529 MT_t::waiters_t::iterator iter
=MT
->d_waiters
.find(pid
);
2530 if(iter
!= MT
->d_waiters
.end())
2531 doResends(iter
, pid
, empty
);
2533 MT
->sendEvent(pid
, &empty
); // this denotes error (does lookup again.. at least L1 will be hot)
2538 memcpy(&dh
, data
, sizeof(dh
));
2541 pident
.remote
=fromaddr
;
2545 if(!dh
.qr
&& g_logCommonErrors
) {
2546 L
<<Logger::Notice
<<"Not taking data from question on outgoing socket from "<< fromaddr
.toStringWithPort() <<endl
;
2549 if(!dh
.qdcount
|| // UPC, Nominum, very old BIND on FormErr, NSD
2550 !dh
.qr
) { // one weird server
2551 pident
.domain
.clear();
2557 pident
.domain
=DNSName(data
, len
, 12, false, &pident
.type
); // don't copy this from above - we need to do the actual read
2559 catch(std::exception
& e
) {
2560 g_stats
.serverParseError
++; // won't be fed to lwres.cc, so we have to increment
2561 L
<<Logger::Warning
<<"Error in packet from remote nameserver "<< fromaddr
.toStringWithPort() << ": "<<e
.what() << endl
;
2566 packet
.assign(data
, len
);
2568 MT_t::waiters_t::iterator iter
=MT
->d_waiters
.find(pident
);
2569 if(iter
!= MT
->d_waiters
.end()) {
2570 doResends(iter
, pident
, packet
);
2575 if(!MT
->sendEvent(pident
, &packet
)) {
2576 // we do a full scan for outstanding queries on unexpected answers. not too bad since we only accept them on the right port number, which is hard enough to guess
2577 for(MT_t::waiters_t::iterator mthread
=MT
->d_waiters
.begin(); mthread
!=MT
->d_waiters
.end(); ++mthread
) {
2578 if(pident
.fd
==mthread
->key
.fd
&& mthread
->key
.remote
==pident
.remote
&& mthread
->key
.type
== pident
.type
&&
2579 pident
.domain
== mthread
->key
.domain
) {
2580 mthread
->key
.nearMisses
++;
2583 // be a bit paranoid here since we're weakening our matching
2584 if(pident
.domain
.empty() && !mthread
->key
.domain
.empty() && !pident
.type
&& mthread
->key
.type
&&
2585 pident
.id
== mthread
->key
.id
&& mthread
->key
.remote
== pident
.remote
) {
2586 // cerr<<"Empty response, rest matches though, sending to a waiter"<<endl;
2587 pident
.domain
= mthread
->key
.domain
;
2588 pident
.type
= mthread
->key
.type
;
2589 goto retryWithName
; // note that this only passes on an error, lwres will still reject the packet
2592 g_stats
.unexpectedCount
++; // if we made it here, it really is an unexpected answer
2593 if(g_logCommonErrors
) {
2594 L
<<Logger::Warning
<<"Discarding unexpected packet from "<<fromaddr
.toStringWithPort()<<": "<< (pident
.domain
.empty() ? "<empty>" : pident
.domain
.toString())<<", "<<pident
.type
<<", "<<MT
->d_waiters
.size()<<" waiters"<<endl
;
2598 t_udpclientsocks
->returnSocket(fd
);
2602 FDMultiplexer
* getMultiplexer()
2605 for(const auto& i
: FDMultiplexer::getMultiplexerMap()) {
2610 catch(FDMultiplexerException
&fe
) {
2611 L
<<Logger::Error
<<"Non-fatal error initializing possible multiplexer ("<<fe
.what()<<"), falling back"<<endl
;
2614 L
<<Logger::Error
<<"Non-fatal error initializing possible multiplexer"<<endl
;
2617 L
<<Logger::Error
<<"No working multiplexer found!"<<endl
;
2622 static string
* doReloadLuaScript()
2624 string fname
= ::arg()["lua-dns-script"];
2628 L
<<Logger::Error
<<t_id
<<" Unloaded current lua script"<<endl
;
2629 return new string("unloaded\n");
2632 t_pdl
= std::make_shared
<RecursorLua4
>(fname
);
2635 catch(std::exception
& e
) {
2636 L
<<Logger::Error
<<t_id
<<" Retaining current script, error from '"<<fname
<<"': "<< e
.what() <<endl
;
2637 return new string("retaining current script, error from '"+fname
+"': "+e
.what()+"\n");
2640 L
<<Logger::Warning
<<t_id
<<" (Re)loaded lua script from '"<<fname
<<"'"<<endl
;
2641 return new string("(re)loaded '"+fname
+"'\n");
2644 string
doQueueReloadLuaScript(vector
<string
>::const_iterator begin
, vector
<string
>::const_iterator end
)
2647 ::arg().set("lua-dns-script") = *begin
;
2649 return broadcastAccFunction
<string
>(doReloadLuaScript
);
2652 static string
* pleaseUseNewTraceRegex(const std::string
& newRegex
)
2655 if(newRegex
.empty()) {
2656 t_traceRegex
.reset();
2657 return new string("unset\n");
2660 t_traceRegex
= std::make_shared
<Regex
>(newRegex
);
2661 return new string("ok\n");
2664 catch(PDNSException
& ae
)
2666 return new string(ae
.reason
+"\n");
2669 string
doTraceRegex(vector
<string
>::const_iterator begin
, vector
<string
>::const_iterator end
)
2671 return broadcastAccFunction
<string
>(boost::bind(pleaseUseNewTraceRegex
, begin
!=end
? *begin
: ""));
2674 static void checkLinuxIPv6Limits()
2678 if(readFileIfThere("/proc/sys/net/ipv6/route/max_size", &line
)) {
2679 int lim
=std::stoi(line
);
2681 L
<<Logger::Error
<<"If using IPv6, please raise sysctl net.ipv6.route.max_size, currently set to "<<lim
<<" which is < 16384"<<endl
;
2686 static void checkOrFixFDS()
2688 unsigned int availFDs
=getFilenumLimit();
2689 unsigned int wantFDs
= g_maxMThreads
* g_numWorkerThreads
+25; // even healthier margin then before
2691 if(wantFDs
> availFDs
) {
2692 unsigned int hardlimit
= getFilenumLimit(true);
2693 if(hardlimit
>= wantFDs
) {
2694 setFilenumLimit(wantFDs
);
2695 L
<<Logger::Warning
<<"Raised soft limit on number of filedescriptors to "<<wantFDs
<<" to match max-mthreads and threads settings"<<endl
;
2698 int newval
= (hardlimit
- 25) / g_numWorkerThreads
;
2699 L
<<Logger::Warning
<<"Insufficient number of filedescriptors available for max-mthreads*threads setting! ("<<hardlimit
<<" < "<<wantFDs
<<"), reducing max-mthreads to "<<newval
<<endl
;
2700 g_maxMThreads
= newval
;
2701 setFilenumLimit(hardlimit
);
2706 static void* recursorThread(void*);
2708 static void* pleaseSupplantACLs(std::shared_ptr
<NetmaskGroup
> ng
)
2719 static bool l_initialized
;
2721 if(l_initialized
) { // only reload configuration file on second call
2722 string configname
=::arg()["config-dir"]+"/recursor.conf";
2723 if(::arg()["config-name"]!="") {
2724 configname
=::arg()["config-dir"]+"/recursor-"+::arg()["config-name"]+".conf";
2726 cleanSlashes(configname
);
2728 if(!::arg().preParseFile(configname
.c_str(), "allow-from-file"))
2729 throw runtime_error("Unable to re-parse configuration file '"+configname
+"'");
2730 ::arg().preParseFile(configname
.c_str(), "allow-from", LOCAL_NETS
);
2731 ::arg().preParseFile(configname
.c_str(), "include-dir");
2732 ::arg().preParse(g_argc
, g_argv
, "include-dir");
2734 // then process includes
2735 std::vector
<std::string
> extraConfigs
;
2736 ::arg().gatherIncludes(extraConfigs
);
2738 for(const std::string
& fn
: extraConfigs
) {
2739 if(!::arg().preParseFile(fn
.c_str(), "allow-from-file", ::arg()["allow-from-file"]))
2740 throw runtime_error("Unable to re-parse configuration file include '"+fn
+"'");
2741 if(!::arg().preParseFile(fn
.c_str(), "allow-from", ::arg()["allow-from"]))
2742 throw runtime_error("Unable to re-parse configuration file include '"+fn
+"'");
2745 ::arg().preParse(g_argc
, g_argv
, "allow-from-file");
2746 ::arg().preParse(g_argc
, g_argv
, "allow-from");
2749 std::shared_ptr
<NetmaskGroup
> oldAllowFrom
= t_allowFrom
;
2750 std::shared_ptr
<NetmaskGroup
> allowFrom
= std::make_shared
<NetmaskGroup
>();
2752 if(!::arg()["allow-from-file"].empty()) {
2754 ifstream
ifs(::arg()["allow-from-file"].c_str());
2756 throw runtime_error("Could not open '"+::arg()["allow-from-file"]+"': "+stringerror());
2759 string::size_type pos
;
2760 while(getline(ifs
,line
)) {
2762 if(pos
!=string::npos
)
2768 allowFrom
->addMask(line
);
2770 L
<<Logger::Warning
<<"Done parsing " << allowFrom
->size() <<" allow-from ranges from file '"<<::arg()["allow-from-file"]<<"' - overriding 'allow-from' setting"<<endl
;
2772 else if(!::arg()["allow-from"].empty()) {
2774 stringtok(ips
, ::arg()["allow-from"], ", ");
2776 L
<<Logger::Warning
<<"Only allowing queries from: ";
2777 for(vector
<string
>::const_iterator i
= ips
.begin(); i
!= ips
.end(); ++i
) {
2778 allowFrom
->addMask(*i
);
2780 L
<<Logger::Warning
<<", ";
2781 L
<<Logger::Warning
<<*i
;
2783 L
<<Logger::Warning
<<endl
;
2786 if(::arg()["local-address"]!="127.0.0.1" && ::arg().asNum("local-port")==53)
2787 L
<<Logger::Error
<<"WARNING: Allowing queries from all IP addresses - this can be a security risk!"<<endl
;
2788 allowFrom
= nullptr;
2791 g_initialAllowFrom
= allowFrom
;
2792 broadcastFunction(boost::bind(pleaseSupplantACLs
, allowFrom
));
2793 oldAllowFrom
= nullptr;
2795 l_initialized
= true;
2799 static void setupDelegationOnly()
2801 vector
<string
> parts
;
2802 stringtok(parts
, ::arg()["delegation-only"], ", \t");
2803 for(const auto& p
: parts
) {
2804 SyncRes::addDelegationOnly(DNSName(p
));
2808 static std::map
<unsigned int, std::set
<int> > parseCPUMap()
2810 std::map
<unsigned int, std::set
<int> > result
;
2812 const std::string value
= ::arg()["cpu-map"];
2814 if (!value
.empty() && !isSettingThreadCPUAffinitySupported()) {
2815 L
<<Logger::Warning
<<"CPU mapping requested but not supported, skipping"<<endl
;
2819 std::vector
<std::string
> parts
;
2821 stringtok(parts
, value
, " \t");
2823 for(const auto& part
: parts
) {
2824 if (part
.find('=') == string::npos
)
2828 auto headers
= splitField(part
, '=');
2829 trim(headers
.first
);
2830 trim(headers
.second
);
2832 unsigned int threadId
= pdns_stou(headers
.first
);
2833 std::vector
<std::string
> cpus
;
2835 stringtok(cpus
, headers
.second
, ",");
2837 for(const auto& cpu
: cpus
) {
2838 int cpuId
= std::stoi(cpu
);
2840 result
[threadId
].insert(cpuId
);
2843 catch(const std::exception
& e
) {
2844 L
<<Logger::Error
<<"Error parsing cpu-map entry '"<<part
<<"': "<<e
.what()<<endl
;
2851 static void setCPUMap(const std::map
<unsigned int, std::set
<int> >& cpusMap
, unsigned int n
, pthread_t tid
)
2853 const auto& cpuMapping
= cpusMap
.find(n
);
2854 if (cpuMapping
!= cpusMap
.cend()) {
2855 int rc
= mapThreadToCPUList(tid
, cpuMapping
->second
);
2857 L
<<Logger::Info
<<"CPU affinity for worker "<<n
<<" has been set to CPU map:";
2858 for (const auto cpu
: cpuMapping
->second
) {
2859 L
<<Logger::Info
<<" "<<cpu
;
2861 L
<<Logger::Info
<<endl
;
2864 L
<<Logger::Warning
<<"Error setting CPU affinity for worker "<<n
<<" to CPU map:";
2865 for (const auto cpu
: cpuMapping
->second
) {
2866 L
<<Logger::Info
<<" "<<cpu
;
2868 L
<<Logger::Info
<<strerror(rc
)<<endl
;
2873 static int serviceMain(int argc
, char*argv
[])
2875 L
.setName(s_programname
);
2876 L
.disableSyslog(::arg().mustDo("disable-syslog"));
2877 L
.setTimestamps(::arg().mustDo("log-timestamp"));
2879 if(!::arg()["logging-facility"].empty()) {
2880 int val
=logFacilityToLOG(::arg().asNum("logging-facility") );
2882 theL().setFacility(val
);
2884 L
<<Logger::Error
<<"Unknown logging facility "<<::arg().asNum("logging-facility") <<endl
;
2887 showProductVersion();
2888 seedRandom(::arg()["entropy-source"]);
2890 g_disthashseed
=dns_random(0xffffffff);
2892 checkLinuxIPv6Limits();
2894 vector
<string
> addrs
;
2895 if(!::arg()["query-local-address6"].empty()) {
2896 SyncRes::s_doIPv6
=true;
2897 L
<<Logger::Warning
<<"Enabling IPv6 transport for outgoing queries"<<endl
;
2899 stringtok(addrs
, ::arg()["query-local-address6"], ", ;");
2900 for(const string
& addr
: addrs
) {
2901 g_localQueryAddresses6
.push_back(ComboAddress(addr
));
2905 L
<<Logger::Warning
<<"NOT using IPv6 for outgoing queries - set 'query-local-address6=::' to enable"<<endl
;
2908 stringtok(addrs
, ::arg()["query-local-address"], ", ;");
2909 for(const string
& addr
: addrs
) {
2910 g_localQueryAddresses4
.push_back(ComboAddress(addr
));
2913 catch(std::exception
& e
) {
2914 L
<<Logger::Error
<<"Assigning local query addresses: "<<e
.what();
2918 // keep this ABOVE loadRecursorLuaConfig!
2919 if(::arg()["dnssec"]=="off")
2920 g_dnssecmode
=DNSSECMode::Off
;
2921 else if(::arg()["dnssec"]=="process-no-validate")
2922 g_dnssecmode
=DNSSECMode::ProcessNoValidate
;
2923 else if(::arg()["dnssec"]=="process")
2924 g_dnssecmode
=DNSSECMode::Process
;
2925 else if(::arg()["dnssec"]=="validate")
2926 g_dnssecmode
=DNSSECMode::ValidateAll
;
2927 else if(::arg()["dnssec"]=="log-fail")
2928 g_dnssecmode
=DNSSECMode::ValidateForLog
;
2930 L
<<Logger::Error
<<"Unknown DNSSEC mode "<<::arg()["dnssec"]<<endl
;
2934 g_dnssecLogBogus
= ::arg().mustDo("dnssec-log-bogus");
2935 g_maxNSEC3Iterations
= ::arg().asNum("nsec3-max-iterations");
2937 g_maxCacheEntries
= ::arg().asNum("max-cache-entries");
2938 g_maxPacketCacheEntries
= ::arg().asNum("max-packetcache-entries");
2941 loadRecursorLuaConfig(::arg()["lua-config-file"], ::arg().mustDo("daemon"));
2943 catch (PDNSException
&e
) {
2944 L
<<Logger::Error
<<"Cannot load Lua configuration: "<<e
.reason
<<endl
;
2949 sortPublicSuffixList();
2951 if(!::arg()["dont-query"].empty()) {
2953 stringtok(ips
, ::arg()["dont-query"], ", ");
2954 ips
.push_back("0.0.0.0");
2955 ips
.push_back("::");
2957 L
<<Logger::Warning
<<"Will not send queries to: ";
2958 for(vector
<string
>::const_iterator i
= ips
.begin(); i
!= ips
.end(); ++i
) {
2959 SyncRes::addDontQuery(*i
);
2961 L
<<Logger::Warning
<<", ";
2962 L
<<Logger::Warning
<<*i
;
2964 L
<<Logger::Warning
<<endl
;
2967 g_quiet
=::arg().mustDo("quiet");
2969 g_weDistributeQueries
= ::arg().mustDo("pdns-distributes-queries");
2970 if(g_weDistributeQueries
) {
2971 L
<<Logger::Warning
<<"PowerDNS Recursor itself will distribute queries over threads"<<endl
;
2974 setupDelegationOnly();
2975 g_outgoingEDNSBufsize
=::arg().asNum("edns-outgoing-bufsize");
2977 if(::arg()["trace"]=="fail") {
2978 SyncRes::setDefaultLogMode(SyncRes::Store
);
2980 else if(::arg().mustDo("trace")) {
2981 SyncRes::setDefaultLogMode(SyncRes::Log
);
2982 ::arg().set("quiet")="no";
2987 SyncRes::s_minimumTTL
= ::arg().asNum("minimum-ttl-override");
2989 SyncRes::s_nopacketcache
= ::arg().mustDo("disable-packetcache");
2991 SyncRes::s_maxnegttl
=::arg().asNum("max-negative-ttl");
2992 SyncRes::s_maxcachettl
=max(::arg().asNum("max-cache-ttl"), 15);
2993 SyncRes::s_packetcachettl
=::arg().asNum("packetcache-ttl");
2994 // Cap the packetcache-servfail-ttl to the packetcache-ttl
2995 uint32_t packetCacheServFailTTL
= ::arg().asNum("packetcache-servfail-ttl");
2996 SyncRes::s_packetcacheservfailttl
=(packetCacheServFailTTL
> SyncRes::s_packetcachettl
) ? SyncRes::s_packetcachettl
: packetCacheServFailTTL
;
2997 SyncRes::s_serverdownmaxfails
=::arg().asNum("server-down-max-fails");
2998 SyncRes::s_serverdownthrottletime
=::arg().asNum("server-down-throttle-time");
2999 SyncRes::s_serverID
=::arg()["server-id"];
3000 SyncRes::s_maxqperq
=::arg().asNum("max-qperq");
3001 SyncRes::s_maxtotusec
=1000*::arg().asNum("max-total-msec");
3002 SyncRes::s_maxdepth
=::arg().asNum("max-recursion-depth");
3003 SyncRes::s_rootNXTrust
= ::arg().mustDo( "root-nx-trust");
3004 if(SyncRes::s_serverID
.empty()) {
3006 gethostname(tmp
, sizeof(tmp
)-1);
3007 SyncRes::s_serverID
=tmp
;
3010 SyncRes::s_ecsipv4limit
= ::arg().asNum("ecs-ipv4-bits");
3011 SyncRes::s_ecsipv6limit
= ::arg().asNum("ecs-ipv6-bits");
3013 if (!::arg().isEmpty("ecs-scope-zero-address")) {
3014 ComboAddress
scopeZero(::arg()["ecs-scope-zero-address"]);
3015 SyncRes::setECSScopeZeroAddress(Netmask(scopeZero
, scopeZero
.isIPv4() ? 32 : 128));
3019 for (const auto& addr
: g_localQueryAddresses4
) {
3020 if (!IsAnyAddress(addr
)) {
3021 SyncRes::setECSScopeZeroAddress(Netmask(addr
, 32));
3027 for (const auto& addr
: g_localQueryAddresses6
) {
3028 if (!IsAnyAddress(addr
)) {
3029 SyncRes::setECSScopeZeroAddress(Netmask(addr
, 128));
3035 SyncRes::setECSScopeZeroAddress(Netmask("127.0.0.1/32"));
3040 g_networkTimeoutMsec
= ::arg().asNum("network-timeout");
3042 g_initialDomainMap
= parseAuthAndForwards();
3044 g_latencyStatSize
=::arg().asNum("latency-statistic-size");
3046 g_logCommonErrors
=::arg().mustDo("log-common-errors");
3047 g_logRPZChanges
= ::arg().mustDo("log-rpz-changes");
3049 g_anyToTcp
= ::arg().mustDo("any-to-tcp");
3050 g_udpTruncationThreshold
= ::arg().asNum("udp-truncation-threshold");
3052 g_lowercaseOutgoing
= ::arg().mustDo("lowercase-outgoing");
3054 g_numWorkerThreads
= ::arg().asNum("threads");
3055 if (g_numWorkerThreads
< 1) {
3056 L
<<Logger::Warning
<<"Asked to run with 0 threads, raising to 1 instead"<<endl
;
3057 g_numWorkerThreads
= 1;
3060 g_numThreads
= g_numWorkerThreads
+ g_weDistributeQueries
;
3061 g_maxMThreads
= ::arg().asNum("max-mthreads");
3063 g_gettagNeedsEDNSOptions
= ::arg().mustDo("gettag-needs-edns-options");
3065 g_statisticsInterval
= ::arg().asNum("statistics-interval");
3068 g_reusePort
= ::arg().mustDo("reuseport");
3071 g_useOneSocketPerThread
= (!g_weDistributeQueries
&& g_reusePort
);
3073 if (g_useOneSocketPerThread
) {
3074 for (unsigned int threadId
= 0; threadId
< g_numWorkerThreads
; threadId
++) {
3075 makeUDPServerSockets(threadId
);
3076 makeTCPServerSockets(threadId
);
3080 makeUDPServerSockets(0);
3081 makeTCPServerSockets(0);
3084 SyncRes::parseEDNSSubnetWhitelist(::arg()["edns-subnet-whitelist"]);
3085 g_useIncomingECS
= ::arg().mustDo("use-incoming-edns-subnet");
3088 for(forks
= 0; forks
< ::arg().asNum("processes") - 1; ++forks
) {
3089 if(!fork()) // we are child
3093 if(::arg().mustDo("daemon")) {
3094 L
<<Logger::Warning
<<"Calling daemonize, going to background"<<endl
;
3095 L
.toConsole(Logger::Critical
);
3097 loadRecursorLuaConfig(::arg()["lua-config-file"], false);
3099 signal(SIGUSR1
,usr1Handler
);
3100 signal(SIGUSR2
,usr2Handler
);
3101 signal(SIGPIPE
,SIG_IGN
);
3105 #ifdef HAVE_LIBSODIUM
3106 if (sodium_init() == -1) {
3107 L
<<Logger::Error
<<"Unable to initialize sodium crypto library"<<endl
;
3112 openssl_thread_setup();
3116 if(!::arg()["setgid"].empty())
3117 newgid
=Utility::makeGidNumeric(::arg()["setgid"]);
3119 if(!::arg()["setuid"].empty())
3120 newuid
=Utility::makeUidNumeric(::arg()["setuid"]);
3122 Utility::dropGroupPrivs(newuid
, newgid
);
3124 if (!::arg()["chroot"].empty()) {
3127 ns
= getenv("NOTIFY_SOCKET");
3128 if (ns
!= nullptr) {
3129 L
<<Logger::Error
<<"Unable to chroot when running from systemd. Please disable chroot= or set the 'Type' for this service to 'simple'"<<endl
;
3133 if (chroot(::arg()["chroot"].c_str())<0 || chdir("/") < 0) {
3134 L
<<Logger::Error
<<"Unable to chroot to '"+::arg()["chroot"]+"': "<<strerror (errno
)<<", exiting"<<endl
;
3138 L
<<Logger::Error
<<"Chrooted to '"<<::arg()["chroot"]<<"'"<<endl
;
3141 s_pidfname
=::arg()["socket-dir"]+"/"+s_programname
+".pid";
3142 if(!s_pidfname
.empty())
3143 unlink(s_pidfname
.c_str()); // remove possible old pid file
3146 makeControlChannelSocket( ::arg().asNum("processes") > 1 ? forks
: -1);
3148 Utility::dropUserPrivs(newuid
);
3152 g_tcpTimeout
=::arg().asNum("client-tcp-timeout");
3153 g_maxTCPPerClient
=::arg().asNum("max-tcp-per-client");
3154 g_tcpMaxQueriesPerConn
=::arg().asNum("max-tcp-queries-per-connection");
3156 if (::arg().mustDo("snmp-agent")) {
3157 g_snmpAgent
= std::make_shared
<RecursorSNMPAgent
>("recursor", ::arg()["snmp-master-socket"]);
3161 const auto cpusMap
= parseCPUMap();
3162 if(g_numThreads
== 1) {
3163 L
<<Logger::Warning
<<"Operating unthreaded"<<endl
;
3165 sd_notify(0, "READY=1");
3167 setCPUMap(cpusMap
, 0, pthread_self());
3172 L
<<Logger::Warning
<<"Launching "<< g_numThreads
<<" threads"<<endl
;
3173 for(unsigned int n
=0; n
< g_numThreads
; ++n
) {
3174 pthread_create(&tid
, 0, recursorThread
, (void*)(long)n
);
3176 setCPUMap(cpusMap
, n
, tid
);
3180 sd_notify(0, "READY=1");
3182 pthread_join(tid
, &res
);
3187 static void* recursorThread(void* ptr
)
3190 t_id
=(int) (long) ptr
;
3191 SyncRes
tmp(g_now
); // make sure it allocates tsstorage before we do anything, like primeHints or so..
3192 SyncRes::setDomainMap(g_initialDomainMap
);
3193 t_allowFrom
= g_initialAllowFrom
;
3194 t_udpclientsocks
= std::unique_ptr
<UDPClientSocks
>(new UDPClientSocks());
3195 t_tcpClientCounts
= std::unique_ptr
<tcpClientCounts_t
>(new tcpClientCounts_t());
3198 t_packetCache
= std::unique_ptr
<RecursorPacketCache
>(new RecursorPacketCache());
3200 #ifdef HAVE_PROTOBUF
3201 t_uuidGenerator
= std::unique_ptr
<boost::uuids::random_generator
>(new boost::uuids::random_generator());
3203 L
<<Logger::Warning
<<"Done priming cache with root hints"<<endl
;
3206 if(!::arg()["lua-dns-script"].empty()) {
3207 t_pdl
= std::make_shared
<RecursorLua4
>(::arg()["lua-dns-script"]);
3208 L
<<Logger::Warning
<<"Loaded 'lua' script from '"<<::arg()["lua-dns-script"]<<"'"<<endl
;
3211 catch(std::exception
&e
) {
3212 L
<<Logger::Error
<<"Failed to load 'lua' script from '"<<::arg()["lua-dns-script"]<<"': "<<e
.what()<<endl
;
3216 unsigned int ringsize
=::arg().asNum("stats-ringbuffer-entries") / g_numWorkerThreads
;
3218 t_remotes
= std::unique_ptr
<addrringbuf_t
>(new addrringbuf_t());
3219 if(g_weDistributeQueries
) // if so, only 1 thread does recvfrom
3220 t_remotes
->set_capacity(::arg().asNum("stats-ringbuffer-entries"));
3222 t_remotes
->set_capacity(ringsize
);
3223 t_servfailremotes
= std::unique_ptr
<addrringbuf_t
>(new addrringbuf_t());
3224 t_servfailremotes
->set_capacity(ringsize
);
3225 t_largeanswerremotes
= std::unique_ptr
<addrringbuf_t
>(new addrringbuf_t());
3226 t_largeanswerremotes
->set_capacity(ringsize
);
3228 t_queryring
= std::unique_ptr
<boost::circular_buffer
<pair
<DNSName
, uint16_t> > >(new boost::circular_buffer
<pair
<DNSName
, uint16_t> >());
3229 t_queryring
->set_capacity(ringsize
);
3230 t_servfailqueryring
= std::unique_ptr
<boost::circular_buffer
<pair
<DNSName
, uint16_t> > >(new boost::circular_buffer
<pair
<DNSName
, uint16_t> >());
3231 t_servfailqueryring
->set_capacity(ringsize
);
3234 MT
=std::unique_ptr
<MTasker
<PacketID
,string
> >(new MTasker
<PacketID
,string
>(::arg().asNum("stack-size")));
3238 t_fdm
=getMultiplexer();
3240 if(::arg().mustDo("webserver")) {
3241 L
<<Logger::Warning
<< "Enabling web server" << endl
;
3243 new RecursorWebServer(t_fdm
);
3245 catch(PDNSException
&e
) {
3246 L
<<Logger::Error
<<"Exception: "<<e
.reason
<<endl
;
3250 L
<<Logger::Error
<<"Enabled '"<< t_fdm
->getName() << "' multiplexer"<<endl
;
3253 t_fdm
->addReadFD(g_pipes
[t_id
].readToThread
, handlePipeRequest
);
3254 t_fdm
->addReadFD(g_pipes
[t_id
].readQueriesToThread
, handlePipeRequest
);
3256 if(g_useOneSocketPerThread
) {
3257 for(deferredAdd_t::const_iterator i
= deferredAdds
[t_id
].cbegin(); i
!= deferredAdds
[t_id
].cend(); ++i
) {
3258 t_fdm
->addReadFD(i
->first
, i
->second
);
3262 if(!g_weDistributeQueries
|| !t_id
) { // if we distribute queries, only t_id = 0 listens
3263 for(deferredAdd_t::const_iterator i
= deferredAdds
[0].cbegin(); i
!= deferredAdds
[0].cend(); ++i
) {
3264 t_fdm
->addReadFD(i
->first
, i
->second
);
3271 t_fdm
->addReadFD(s_rcc
.d_fd
, handleRCC
); // control channel
3274 unsigned int maxTcpClients
=::arg().asNum("max-tcp-clients");
3276 bool listenOnTCP(true);
3278 time_t last_carbon
=0;
3279 time_t carbonInterval
=::arg().asNum("carbon-interval");
3280 counter
.store(0); // used to periodically execute certain tasks
3282 while(MT
->schedule(&g_now
)); // MTasker letting the mthreads do their thing
3284 if(!(counter
%500)) {
3285 MT
->makeThread(houseKeeping
, 0);
3289 typedef vector
<pair
<int, FDMultiplexer::funcparam_t
> > expired_t
;
3290 expired_t expired
=t_fdm
->getTimeouts(g_now
);
3292 for(expired_t::iterator i
=expired
.begin() ; i
!= expired
.end(); ++i
) {
3293 shared_ptr
<TCPConnection
> conn
=any_cast
<shared_ptr
<TCPConnection
> >(i
->second
);
3294 if(g_logCommonErrors
)
3295 L
<<Logger::Warning
<<"Timeout from remote TCP client "<< conn
->d_remote
.toString() <<endl
;
3296 t_fdm
->removeReadFD(i
->first
);
3302 if(!t_id
&& statsWanted
) {
3306 Utility::gettimeofday(&g_now
, 0);
3308 if(!t_id
&& (g_now
.tv_sec
- last_carbon
>= carbonInterval
)) {
3309 MT
->makeThread(doCarbonDump
, 0);
3310 last_carbon
= g_now
.tv_sec
;
3314 // 'run' updates g_now for us
3316 if(!g_weDistributeQueries
|| !t_id
) { // if pdns distributes queries, only tid 0 should do this
3318 if(TCPConnection::getCurrentConnections() > maxTcpClients
) { // shutdown, too many connections
3319 for(tcpListenSockets_t::iterator i
=g_tcpListenSockets
.begin(); i
!= g_tcpListenSockets
.end(); ++i
)
3320 t_fdm
->removeReadFD(*i
);
3325 if(TCPConnection::getCurrentConnections() <= maxTcpClients
) { // reenable
3326 for(tcpListenSockets_t::iterator i
=g_tcpListenSockets
.begin(); i
!= g_tcpListenSockets
.end(); ++i
)
3327 t_fdm
->addReadFD(*i
, handleNewTCPQuestion
);
3334 catch(PDNSException
&ae
) {
3335 L
<<Logger::Error
<<"Exception: "<<ae
.reason
<<endl
;
3338 catch(std::exception
&e
) {
3339 L
<<Logger::Error
<<"STL Exception: "<<e
.what()<<endl
;
3343 L
<<Logger::Error
<<"any other exception in main: "<<endl
;
3348 int main(int argc
, char **argv
)
3352 g_stats
.startupTime
=time(0);
3353 versionSetProduct(ProductRecursor
);
3357 int ret
= EXIT_SUCCESS
;
3360 ::arg().set("stack-size","stack size per mthread")="200000";
3361 ::arg().set("soa-minimum-ttl","Don't change")="0";
3362 ::arg().set("no-shuffle","Don't change")="off";
3363 ::arg().set("local-port","port to listen on")="53";
3364 ::arg().set("local-address","IP addresses to listen on, separated by spaces or commas. Also accepts ports.")="127.0.0.1";
3365 ::arg().setSwitch("non-local-bind", "Enable binding to non-local addresses by using FREEBIND / BINDANY socket options")="no";
3366 ::arg().set("trace","if we should output heaps of logging. set to 'fail' to only log failing domains")="off";
3367 ::arg().set("dnssec", "DNSSEC mode: off/process-no-validate (default)/process/log-fail/validate")="process-no-validate";
3368 ::arg().set("dnssec-log-bogus", "Log DNSSEC bogus validations")="no";
3369 ::arg().set("daemon","Operate as a daemon")="no";
3370 ::arg().setSwitch("write-pid","Write a PID file")="yes";
3371 ::arg().set("loglevel","Amount of logging. Higher is more. Do not set below 3")="6";
3372 ::arg().set("disable-syslog","Disable logging to syslog, useful when running inside a supervisor that logs stdout")="no";
3373 ::arg().set("log-timestamp","Print timestamps in log lines, useful to disable when running with a tool that timestamps stdout already")="yes";
3374 ::arg().set("log-common-errors","If we should log rather common errors")="no";
3375 ::arg().set("chroot","switch to chroot jail")="";
3376 ::arg().set("setgid","If set, change group id to this gid for more security")="";
3377 ::arg().set("setuid","If set, change user id to this uid for more security")="";
3378 ::arg().set("network-timeout", "Wait this number of milliseconds for network i/o")="1500";
3379 ::arg().set("threads", "Launch this number of threads")="2";
3380 ::arg().set("processes", "Launch this number of processes (EXPERIMENTAL, DO NOT CHANGE)")="1"; // if we un-experimental this, need to fix openssl rand seeding for multiple PIDs!
3381 ::arg().set("config-name","Name of this virtual configuration - will rename the binary image")="";
3382 ::arg().set("api-config-dir", "Directory where REST API stores config and zones") = "";
3383 ::arg().set("api-key", "Static pre-shared authentication key for access to the REST API") = "";
3384 ::arg().set("api-logfile", "Location of the server logfile (used by the REST API)") = "/var/log/pdns.log";
3385 ::arg().set("api-readonly", "Disallow data modification through the REST API when set") = "no";
3386 ::arg().setSwitch("webserver", "Start a webserver (for REST API)") = "no";
3387 ::arg().set("webserver-address", "IP Address of webserver to listen on") = "127.0.0.1";
3388 ::arg().set("webserver-port", "Port of webserver to listen on") = "8082";
3389 ::arg().set("webserver-password", "Password required for accessing the webserver") = "";
3390 ::arg().set("webserver-allow-from","Webserver access is only allowed from these subnets")="127.0.0.1,::1";
3391 ::arg().set("carbon-ourname", "If set, overrides our reported hostname for carbon stats")="";
3392 ::arg().set("carbon-server", "If set, send metrics in carbon (graphite) format to this server IP address")="";
3393 ::arg().set("carbon-interval", "Number of seconds between carbon (graphite) updates")="30";
3394 ::arg().set("statistics-interval", "Number of seconds between printing of recursor statistics, 0 to disable")="1800";
3395 ::arg().set("quiet","Suppress logging of questions and answers")="";
3396 ::arg().set("logging-facility","Facility to log messages as. 0 corresponds to local0")="";
3397 ::arg().set("config-dir","Location of configuration directory (recursor.conf)")=SYSCONFDIR
;
3398 ::arg().set("socket-owner","Owner of socket")="";
3399 ::arg().set("socket-group","Group of socket")="";
3400 ::arg().set("socket-mode", "Permissions for socket")="";
3402 ::arg().set("socket-dir",string("Where the controlsocket will live, ")+LOCALSTATEDIR
+" when unset and not chrooted" )="";
3403 ::arg().set("delegation-only","Which domains we only accept delegations from")="";
3404 ::arg().set("query-local-address","Source IP address for sending queries")="0.0.0.0";
3405 ::arg().set("query-local-address6","Source IPv6 address for sending queries. IF UNSET, IPv6 WILL NOT BE USED FOR OUTGOING QUERIES")="";
3406 ::arg().set("client-tcp-timeout","Timeout in seconds when talking to TCP clients")="2";
3407 ::arg().set("max-mthreads", "Maximum number of simultaneous Mtasker threads")="2048";
3408 ::arg().set("max-tcp-clients","Maximum number of simultaneous TCP clients")="128";
3409 ::arg().set("server-down-max-fails","Maximum number of consecutive timeouts (and unreachables) to mark a server as down ( 0 => disabled )")="64";
3410 ::arg().set("server-down-throttle-time","Number of seconds to throttle all queries to a server after being marked as down")="60";
3411 ::arg().set("hint-file", "If set, load root hints from this file")="";
3412 ::arg().set("max-cache-entries", "If set, maximum number of entries in the main cache")="1000000";
3413 ::arg().set("max-negative-ttl", "maximum number of seconds to keep a negative cached entry in memory")="3600";
3414 ::arg().set("max-cache-ttl", "maximum number of seconds to keep a cached entry in memory")="86400";
3415 ::arg().set("packetcache-ttl", "maximum number of seconds to keep a cached entry in packetcache")="3600";
3416 ::arg().set("max-packetcache-entries", "maximum number of entries to keep in the packetcache")="500000";
3417 ::arg().set("packetcache-servfail-ttl", "maximum number of seconds to keep a cached servfail entry in packetcache")="60";
3418 ::arg().set("server-id", "Returned when queried for 'id.server' TXT or NSID, defaults to hostname")="";
3419 ::arg().set("stats-ringbuffer-entries", "maximum number of packets to store statistics for")="10000";
3420 ::arg().set("version-string", "string reported on version.pdns or version.bind")=fullVersionString();
3421 ::arg().set("allow-from", "If set, only allow these comma separated netmasks to recurse")=LOCAL_NETS
;
3422 ::arg().set("allow-from-file", "If set, load allowed netmasks from this file")="";
3423 ::arg().set("entropy-source", "If set, read entropy from this file")="/dev/urandom";
3424 ::arg().set("dont-query", "If set, do not query these netmasks for DNS data")=DONT_QUERY
;
3425 ::arg().set("max-tcp-per-client", "If set, maximum number of TCP sessions per client (IP address)")="0";
3426 ::arg().set("max-tcp-queries-per-connection", "If set, maximum number of TCP queries in a TCP connection")="0";
3427 ::arg().set("spoof-nearmiss-max", "If non-zero, assume spoofing after this many near misses")="20";
3428 ::arg().set("single-socket", "If set, only use a single socket for outgoing queries")="off";
3429 ::arg().set("auth-zones", "Zones for which we have authoritative data, comma separated domain=file pairs ")="";
3430 ::arg().set("lua-config-file", "More powerful configuration options")="";
3432 ::arg().set("forward-zones", "Zones for which we forward queries, comma separated domain=ip pairs")="";
3433 ::arg().set("forward-zones-recurse", "Zones for which we forward queries with recursion bit, comma separated domain=ip pairs")="";
3434 ::arg().set("forward-zones-file", "File with (+)domain=ip pairs for forwarding")="";
3435 ::arg().set("export-etc-hosts", "If we should serve up contents from /etc/hosts")="off";
3436 ::arg().set("export-etc-hosts-search-suffix", "Also serve up the contents of /etc/hosts with this suffix")="";
3437 ::arg().set("etc-hosts-file", "Path to 'hosts' file")="/etc/hosts";
3438 ::arg().set("serve-rfc1918", "If we should be authoritative for RFC 1918 private IP space")="yes";
3439 ::arg().set("lua-dns-script", "Filename containing an optional 'lua' script that will be used to modify dns answers")="";
3440 ::arg().set("latency-statistic-size","Number of latency values to calculate the qa-latency average")="10000";
3441 ::arg().setSwitch( "disable-packetcache", "Disable packetcache" )= "no";
3442 ::arg().set("ecs-ipv4-bits", "Number of bits of IPv4 address to pass for EDNS Client Subnet")="24";
3443 ::arg().set("ecs-ipv6-bits", "Number of bits of IPv6 address to pass for EDNS Client Subnet")="56";
3444 ::arg().set("edns-subnet-whitelist", "List of netmasks and domains that we should enable EDNS subnet for")="";
3445 ::arg().set("ecs-scope-zero-address", "Address to send to whitelisted authoritative servers for incoming queries with ECS prefix-length source of 0")="";
3446 ::arg().setSwitch( "use-incoming-edns-subnet", "Pass along received EDNS Client Subnet information")="no";
3447 ::arg().setSwitch( "pdns-distributes-queries", "If PowerDNS itself should distribute queries over threads")="yes";
3448 ::arg().setSwitch( "root-nx-trust", "If set, believe that an NXDOMAIN from the root means the TLD does not exist")="yes";
3449 ::arg().setSwitch( "any-to-tcp","Answer ANY queries with tc=1, shunting to TCP" )="no";
3450 ::arg().setSwitch( "lowercase-outgoing","Force outgoing questions to lowercase")="no";
3451 ::arg().setSwitch("gettag-needs-edns-options", "If EDNS Options should be extracted before calling the gettag() hook")="no";
3452 ::arg().set("udp-truncation-threshold", "Maximum UDP response size before we truncate")="1680";
3453 ::arg().set("edns-outgoing-bufsize", "Outgoing EDNS buffer size")="1680";
3454 ::arg().set("minimum-ttl-override", "Set under adverse conditions, a minimum TTL")="0";
3455 ::arg().set("max-qperq", "Maximum outgoing queries per query")="50";
3456 ::arg().set("max-total-msec", "Maximum total wall-clock time per query in milliseconds, 0 for unlimited")="7000";
3457 ::arg().set("max-recursion-depth", "Maximum number of internal recursion calls per query, 0 for unlimited")="40";
3459 ::arg().set("include-dir","Include *.conf files from this directory")="";
3460 ::arg().set("security-poll-suffix","Domain name from which to query security update notifications")="secpoll.powerdns.com.";
3462 ::arg().setSwitch("reuseport","Enable SO_REUSEPORT allowing multiple recursors processes to listen to 1 address")="no";
3464 ::arg().setSwitch("snmp-agent", "If set, register as an SNMP agent")="no";
3465 ::arg().set("snmp-master-socket", "If set and snmp-agent is set, the socket to use to register to the SNMP master")="";
3467 ::arg().set("tcp-fast-open", "Enable TCP Fast Open support on the listening sockets, using the supplied numerical value as the queue size")="0";
3468 ::arg().set("nsec3-max-iterations", "Maximum number of iterations allowed for an NSEC3 record")="2500";
3470 ::arg().set("cpu-map", "Thread to CPU mapping, space separated thread-id=cpu1,cpu2..cpuN pairs")="";
3472 ::arg().setSwitch("log-rpz-changes", "Log additions and removals to RPZ zones at Info level")="no";
3474 ::arg().setCmd("help","Provide a helpful message");
3475 ::arg().setCmd("version","Print version string");
3476 ::arg().setCmd("config","Output blank configuration");
3477 L
.toConsole(Logger::Info
);
3478 ::arg().laxParse(argc
,argv
); // do a lax parse
3480 string configname
=::arg()["config-dir"]+"/recursor.conf";
3481 if(::arg()["config-name"]!="") {
3482 configname
=::arg()["config-dir"]+"/recursor-"+::arg()["config-name"]+".conf";
3483 s_programname
+="-"+::arg()["config-name"];
3485 cleanSlashes(configname
);
3487 if(::arg().mustDo("config")) {
3488 cout
<<::arg().configstring()<<endl
;
3492 if(!::arg().file(configname
.c_str()))
3493 L
<<Logger::Warning
<<"Unable to parse configuration file '"<<configname
<<"'"<<endl
;
3495 ::arg().parse(argc
,argv
);
3497 if( !::arg()["chroot"].empty() && !::arg()["api-config-dir"].empty() && !::arg().mustDo("api-readonly") ) {
3498 L
<<Logger::Error
<<"Using chroot and a writable API is not possible"<<endl
;
3502 if (::arg()["socket-dir"].empty()) {
3503 if (::arg()["chroot"].empty())
3504 ::arg().set("socket-dir") = LOCALSTATEDIR
;
3506 ::arg().set("socket-dir") = "/";
3509 ::arg().set("delegation-only")=toLower(::arg()["delegation-only"]);
3511 if(::arg().asNum("threads")==1)
3512 ::arg().set("pdns-distributes-queries")="no";
3514 if(::arg().mustDo("help")) {
3515 cout
<<"syntax:"<<endl
<<endl
;
3516 cout
<<::arg().helpstring(::arg()["help"])<<endl
;
3519 if(::arg().mustDo("version")) {
3520 showProductVersion();
3521 showBuildConfiguration();
3525 Logger::Urgency logUrgency
= (Logger::Urgency
)::arg().asNum("loglevel");
3527 if (logUrgency
< Logger::Error
)
3528 logUrgency
= Logger::Error
;
3529 if(!g_quiet
&& logUrgency
< Logger::Info
) { // Logger::Info=6, Logger::Debug=7
3530 logUrgency
= Logger::Info
; // if you do --quiet=no, you need Info to also see the query log
3532 L
.setLoglevel(logUrgency
);
3533 L
.toConsole(logUrgency
);
3535 serviceMain(argc
, argv
);
3537 catch(PDNSException
&ae
) {
3538 L
<<Logger::Error
<<"Exception: "<<ae
.reason
<<endl
;
3541 catch(std::exception
&e
) {
3542 L
<<Logger::Error
<<"STL Exception: "<<e
.what()<<endl
;
3546 L
<<Logger::Error
<<"any other exception in main: "<<endl
;