2 * This file is part of PowerDNS or dnsdist.
3 * Copyright -- PowerDNS.COM B.V. and its contributors
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of version 2 of the GNU General Public License as
7 * published by the Free Software Foundation.
9 * In addition, for the avoidance of any doubt, permission is granted to
10 * link this program with OpenSSL and to (re)distribute the binaries
11 * produced as the result of such linking.
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
30 #include "ws-recursor.hh"
32 #include "recpacketcache.hh"
34 #include "dns_random.hh"
38 #include "opensslsigners.hh"
41 #include <boost/static_assert.hpp>
44 #include "recursor_cache.hh"
45 #include "cachecleaner.hh"
52 #include "arguments.hh"
56 #include "sortlist.hh"
58 #include <boost/tuple/tuple.hpp>
59 #include <boost/tuple/tuple_comparison.hpp>
60 #include <boost/shared_array.hpp>
61 #include <boost/function.hpp>
62 #include <boost/algorithm/string.hpp>
64 #include "malloctrace.hh"
66 #include <netinet/tcp.h>
67 #include "dnsparser.hh"
68 #include "dnswriter.hh"
69 #include "dnsrecords.hh"
70 #include "zoneparser-tng.hh"
71 #include "rec_channel.hh"
76 #include "lua-recursor4.hh"
78 #include "responsestats.hh"
79 #include "secpoll-recursor.hh"
81 #include "filterpo.hh"
82 #include "rpzloader.hh"
83 #include "validate-recursor.hh"
84 #include "rec-lua-conf.hh"
85 #include "ednsoptions.hh"
88 #include "rec-protobuf.hh"
89 #include "rec-snmp.hh"
92 #include <systemd/sd-daemon.h>
95 #include "namespaces.hh"
97 typedef map
<ComboAddress
, uint32_t, ComboAddress::addressOnlyLessThan
> tcpClientCounts_t
;
99 static thread_local
std::shared_ptr
<RecursorLua4
> t_pdl
;
100 static thread_local
int t_id
;
101 static thread_local
std::shared_ptr
<Regex
> t_traceRegex
;
102 static thread_local
std::unique_ptr
<tcpClientCounts_t
> t_tcpClientCounts
;
104 thread_local
std::unique_ptr
<MT_t
> MT
; // the big MTasker
105 thread_local
std::unique_ptr
<MemRecursorCache
> t_RC
;
106 thread_local
std::unique_ptr
<RecursorPacketCache
> t_packetCache
;
107 thread_local FDMultiplexer
* t_fdm
{nullptr};
108 thread_local
std::unique_ptr
<addrringbuf_t
> t_remotes
, t_servfailremotes
, t_largeanswerremotes
;
109 thread_local
std::unique_ptr
<boost::circular_buffer
<pair
<DNSName
, uint16_t> > > t_queryring
, t_servfailqueryring
;
110 thread_local
std::shared_ptr
<NetmaskGroup
> t_allowFrom
;
112 thread_local
std::unique_ptr
<boost::uuids::random_generator
> t_uuidGenerator
;
114 __thread
struct timeval g_now
; // timestamp, updated (too) frequently
116 // for communicating with our threads
125 static const int s_handlerThreadID
= -1;
126 static const int s_distributorThreadID
= 0;
128 typedef vector
<int> tcpListenSockets_t
;
129 typedef map
<int, ComboAddress
> listenSocketsAddresses_t
; // is shared across all threads right now
130 typedef vector
<pair
<int, function
< void(int, any
&) > > > deferredAdd_t
;
132 static const ComboAddress
g_local4("0.0.0.0"), g_local6("::");
133 static vector
<ThreadPipeSet
> g_pipes
; // effectively readonly after startup
134 static tcpListenSockets_t g_tcpListenSockets
; // shared across threads, but this is fine, never written to from a thread. All threads listen on all sockets
135 static listenSocketsAddresses_t g_listenSocketsAddresses
; // is shared across all threads right now
136 static std::unordered_map
<unsigned int, deferredAdd_t
> deferredAdds
;
137 static set
<int> g_fromtosockets
; // listen sockets that use 'sendfromto()' mechanism
138 static vector
<ComboAddress
> g_localQueryAddresses4
, g_localQueryAddresses6
;
139 static AtomicCounter counter
;
140 static std::shared_ptr
<SyncRes::domainmap_t
> g_initialDomainMap
; // new threads needs this to be setup
141 static std::shared_ptr
<NetmaskGroup
> g_initialAllowFrom
; // new thread needs to be setup with this
142 static size_t g_tcpMaxQueriesPerConn
;
143 static uint64_t g_latencyStatSize
;
144 static uint32_t g_disthashseed
;
145 static unsigned int g_maxTCPPerClient
;
146 static unsigned int g_networkTimeoutMsec
;
147 static unsigned int g_maxMThreads
;
148 static unsigned int g_numWorkerThreads
;
149 static int g_tcpTimeout
;
150 static uint16_t g_udpTruncationThreshold
;
151 static std::atomic
<bool> statsWanted
;
152 static std::atomic
<bool> g_quiet
;
153 static bool g_logCommonErrors
;
154 static bool g_anyToTcp
;
155 static bool g_weDistributeQueries
; // if true, only 1 thread listens on the incoming query sockets
156 static bool g_reusePort
{false};
157 static bool g_useOneSocketPerThread
;
158 static bool g_gettagNeedsEDNSOptions
{false};
159 static time_t g_statisticsInterval
;
160 static bool g_useIncomingECS
;
161 std::atomic
<uint32_t> g_maxCacheEntries
, g_maxPacketCacheEntries
;
163 RecursorControlChannel s_rcc
; // only active in thread 0
164 RecursorStats g_stats
;
165 string s_programname
="pdns_recursor";
167 bool g_lowercaseOutgoing
;
168 unsigned int g_numThreads
;
169 uint16_t g_outgoingEDNSBufsize
;
170 bool g_logRPZChanges
{false};
172 #define LOCAL_NETS "127.0.0.0/8, 10.0.0.0/8, 100.64.0.0/10, 169.254.0.0/16, 192.168.0.0/16, 172.16.0.0/12, ::1/128, fc00::/7, fe80::/10"
173 // Bad Nets taken from both:
174 // http://www.iana.org/assignments/iana-ipv4-special-registry/iana-ipv4-special-registry.xhtml
176 // http://www.iana.org/assignments/iana-ipv6-special-registry/iana-ipv6-special-registry.xhtml
177 // where such a network may not be considered a valid destination
178 #define BAD_NETS "0.0.0.0/8, 192.0.0.0/24, 192.0.2.0/24, 198.51.100.0/24, 203.0.113.0/24, 240.0.0.0/4, ::/96, ::ffff:0:0/96, 100::/64, 2001:db8::/32"
179 #define DONT_QUERY LOCAL_NETS ", " BAD_NETS
181 //! used to send information to a newborn mthread
182 struct DNSComboWriter
{
183 DNSComboWriter(const char* data
, uint16_t len
, const struct timeval
& now
) : d_mdp(true, data
, len
), d_now(now
),
184 d_tcp(false), d_socket(-1)
187 void setRemote(const ComboAddress
* sa
)
192 void setLocal(const ComboAddress
& sa
)
198 void setSocket(int sock
)
203 string
getRemote() const
205 return d_remote
.toString();
208 struct timeval d_now
;
209 ComboAddress d_remote
, d_local
;
211 boost::uuids::uuid d_uuid
;
212 string d_requestorId
;
215 EDNSSubnetOpts d_ednssubnet
;
216 bool d_ecsFound
{false};
217 bool d_ecsParsed
{false};
220 unsigned int d_tag
{0};
223 shared_ptr
<TCPConnection
> d_tcpConnection
;
224 vector
<pair
<uint16_t, string
> > d_ednsOpts
;
225 std::vector
<std::string
> d_policyTags
;
226 LuaContext::LuaObject d_data
;
227 uint32_t d_ttlCap
{std::numeric_limits
<uint32_t>::max()};
228 bool d_variable
{false};
233 return MT
? MT
.get() : nullptr;
238 static ArgvMap theArg
;
242 unsigned int getRecursorThreadId()
244 return static_cast<unsigned int>(t_id
);
252 static void handleTCPClientWritable(int fd
, FDMultiplexer::funcparam_t
& var
);
254 // -1 is error, 0 is timeout, 1 is success
255 int asendtcp(const string
& data
, Socket
* sock
)
261 t_fdm
->addWriteFD(sock
->getHandle(), handleTCPClientWritable
, pident
);
264 int ret
=MT
->waitEvent(pident
, &packet
, g_networkTimeoutMsec
);
266 if(!ret
|| ret
==-1) { // timeout
267 t_fdm
->removeWriteFD(sock
->getHandle());
269 else if(packet
.size() !=data
.size()) { // main loop tells us what it sent out, or empty in case of an error
275 static void handleTCPClientReadable(int fd
, FDMultiplexer::funcparam_t
& var
);
277 // -1 is error, 0 is timeout, 1 is success
278 int arecvtcp(string
& data
, size_t len
, Socket
* sock
, bool incompleteOkay
)
284 pident
.inIncompleteOkay
=incompleteOkay
;
285 t_fdm
->addReadFD(sock
->getHandle(), handleTCPClientReadable
, pident
);
287 int ret
=MT
->waitEvent(pident
,&data
, g_networkTimeoutMsec
);
288 if(!ret
|| ret
==-1) { // timeout
289 t_fdm
->removeReadFD(sock
->getHandle());
291 else if(data
.empty()) {// error, EOF or other
298 static void handleGenUDPQueryResponse(int fd
, FDMultiplexer::funcparam_t
& var
)
300 PacketID pident
=*any_cast
<PacketID
>(&var
);
302 ssize_t ret
=recv(fd
, resp
, sizeof(resp
), 0);
303 t_fdm
->removeReadFD(fd
);
305 string
data(resp
, (size_t) ret
);
306 MT
->sendEvent(pident
, &data
);
310 MT
->sendEvent(pident
, &empty
);
311 // cerr<<"Had some kind of error: "<<ret<<", "<<strerror(errno)<<endl;
314 string
GenUDPQueryResponse(const ComboAddress
& dest
, const string
& query
)
316 Socket
s(dest
.sin4
.sin_family
, SOCK_DGRAM
);
318 ComboAddress local
= getQueryLocalAddress(dest
.sin4
.sin_family
, 0);
327 t_fdm
->addReadFD(s
.getHandle(), handleGenUDPQueryResponse
, pident
);
331 int ret
=MT
->waitEvent(pident
,&data
, g_networkTimeoutMsec
);
333 if(!ret
|| ret
==-1) { // timeout
334 t_fdm
->removeReadFD(s
.getHandle());
336 else if(data
.empty()) {// error, EOF or other
337 // we could special case this
343 //! pick a random query local address
344 ComboAddress
getQueryLocalAddress(int family
, uint16_t port
)
347 if(family
==AF_INET
) {
348 if(g_localQueryAddresses4
.empty())
351 ret
= g_localQueryAddresses4
[dns_random(g_localQueryAddresses4
.size())];
352 ret
.sin4
.sin_port
= htons(port
);
355 if(g_localQueryAddresses6
.empty())
358 ret
= g_localQueryAddresses6
[dns_random(g_localQueryAddresses6
.size())];
360 ret
.sin6
.sin6_port
= htons(port
);
365 static void handleUDPServerResponse(int fd
, FDMultiplexer::funcparam_t
&);
367 static void setSocketBuffer(int fd
, int optname
, uint32_t size
)
370 socklen_t len
=sizeof(psize
);
372 if(!getsockopt(fd
, SOL_SOCKET
, optname
, (char*)&psize
, &len
) && psize
> size
) {
373 L
<<Logger::Error
<<"Not decreasing socket buffer size from "<<psize
<<" to "<<size
<<endl
;
377 if (setsockopt(fd
, SOL_SOCKET
, optname
, (char*)&size
, sizeof(size
)) < 0 )
378 L
<<Logger::Error
<<"Unable to raise socket buffer size to "<<size
<<": "<<strerror(errno
)<<endl
;
382 static void setSocketReceiveBuffer(int fd
, uint32_t size
)
384 setSocketBuffer(fd
, SO_RCVBUF
, size
);
387 static void setSocketSendBuffer(int fd
, uint32_t size
)
389 setSocketBuffer(fd
, SO_SNDBUF
, size
);
393 // you can ask this class for a UDP socket to send a query from
394 // this socket is not yours, don't even think about deleting it
395 // but after you call 'returnSocket' on it, don't assume anything anymore
398 unsigned int d_numsocks
;
400 UDPClientSocks() : d_numsocks(0)
404 typedef set
<int> socks_t
;
407 // returning -2 means: temporary OS error (ie, out of files), -1 means error related to remote
408 int getSocket(const ComboAddress
& toaddr
, int* fd
)
410 *fd
=makeClientSocket(toaddr
.sin4
.sin_family
);
411 if(*fd
< 0) // temporary error - receive exception otherwise
414 if(connect(*fd
, (struct sockaddr
*)(&toaddr
), toaddr
.getSocklen()) < 0) {
416 // returnSocket(*fd);
420 catch(const PDNSException
& e
) {
421 L
<<Logger::Error
<<"Error closing UDP socket after connect() failed: "<<e
.reason
<<endl
;
424 if(err
==ENETUNREACH
) // Seth "My Interfaces Are Like A Yo Yo" Arnold special
434 void returnSocket(int fd
)
436 socks_t::iterator i
=d_socks
.find(fd
);
437 if(i
==d_socks
.end()) {
438 throw PDNSException("Trying to return a socket (fd="+std::to_string(fd
)+") not in the pool");
440 returnSocketLocked(i
);
443 // return a socket to the pool, or simply erase it
444 void returnSocketLocked(socks_t::iterator
& i
)
446 if(i
==d_socks
.end()) {
447 throw PDNSException("Trying to return a socket not in the pool");
450 t_fdm
->removeReadFD(*i
);
452 catch(FDMultiplexerException
& e
) {
453 // we sometimes return a socket that has not yet been assigned to t_fdm
458 catch(const PDNSException
& e
) {
459 L
<<Logger::Error
<<"Error closing returned UDP socket: "<<e
.reason
<<endl
;
466 // returns -1 for errors which might go away, throws for ones that won't
467 static int makeClientSocket(int family
)
469 int ret
=socket(family
, SOCK_DGRAM
, 0 ); // turns out that setting CLO_EXEC and NONBLOCK from here is not a performance win on Linux (oddly enough)
471 if(ret
< 0 && errno
==EMFILE
) // this is not a catastrophic error
475 throw PDNSException("Making a socket for resolver (family = "+std::to_string(family
)+"): "+stringerror());
477 // setCloseOnExec(ret); // we're not going to exec
484 if(tries
==1) // fall back to kernel 'random'
487 port
= 1025 + dns_random(64510);
489 sin
=getQueryLocalAddress(family
, port
); // does htons for us
491 if (::bind(ret
, (struct sockaddr
*)&sin
, sin
.getSocklen()) >= 0)
495 throw PDNSException("Resolver binding to local query client socket on "+sin
.toString()+": "+stringerror());
502 static thread_local
std::unique_ptr
<UDPClientSocks
> t_udpclientsocks
;
504 /* these two functions are used by LWRes */
505 // -2 is OS error, -1 is error that depends on the remote, > 0 is success
506 int asendto(const char *data
, size_t len
, int flags
,
507 const ComboAddress
& toaddr
, uint16_t id
, const DNSName
& domain
, uint16_t qtype
, int* fd
)
511 pident
.domain
= domain
;
512 pident
.remote
= toaddr
;
515 // see if there is an existing outstanding request we can chain on to, using partial equivalence function
516 pair
<MT_t::waiters_t::iterator
, MT_t::waiters_t::iterator
> chain
=MT
->d_waiters
.equal_range(pident
, PacketIDBirthdayCompare());
518 for(; chain
.first
!= chain
.second
; chain
.first
++) {
519 if(chain
.first
->key
.fd
> -1) { // don't chain onto existing chained waiter!
521 cerr<<"Orig: "<<pident.domain<<", "<<pident.remote.toString()<<", id="<<id<<endl;
522 cerr<<"Had hit: "<< chain.first->key.domain<<", "<<chain.first->key.remote.toString()<<", id="<<chain.first->key.id
523 <<", count="<<chain.first->key.chain.size()<<", origfd: "<<chain.first->key.fd<<endl;
525 chain
.first
->key
.chain
.insert(id
); // we can chain
526 *fd
=-1; // gets used in waitEvent / sendEvent later on
531 int ret
=t_udpclientsocks
->getSocket(toaddr
, fd
);
538 t_fdm
->addReadFD(*fd
, handleUDPServerResponse
, pident
);
539 ret
= send(*fd
, data
, len
, 0);
544 t_udpclientsocks
->returnSocket(*fd
);
546 errno
= tmp
; // this is for logging purposes only
550 // -1 is error, 0 is timeout, 1 is success
551 int arecvfrom(char *data
, size_t len
, int flags
, const ComboAddress
& fromaddr
, size_t *d_len
,
552 uint16_t id
, const DNSName
& domain
, uint16_t qtype
, int fd
, struct timeval
* now
)
554 static optional
<unsigned int> nearMissLimit
;
556 nearMissLimit
=::arg().asNum("spoof-nearmiss-max");
561 pident
.domain
=domain
;
563 pident
.remote
=fromaddr
;
566 int ret
=MT
->waitEvent(pident
, &packet
, g_networkTimeoutMsec
, now
);
569 if(packet
.empty()) // means "error"
572 *d_len
=packet
.size();
573 memcpy(data
,packet
.c_str(),min(len
,*d_len
));
574 if(*nearMissLimit
&& pident
.nearMisses
> *nearMissLimit
) {
575 L
<<Logger::Error
<<"Too many ("<<pident
.nearMisses
<<" > "<<*nearMissLimit
<<") bogus answers for '"<<domain
<<"' from "<<fromaddr
.toString()<<", assuming spoof attempt."<<endl
;
576 g_stats
.spoofCount
++;
582 t_udpclientsocks
->returnSocket(fd
);
587 static void writePid(void)
589 if(!::arg().mustDo("write-pid"))
591 ofstream
of(s_pidfname
.c_str(), std::ios_base::app
);
593 of
<< Utility::getpid() <<endl
;
595 L
<<Logger::Error
<<"Writing pid for "<<Utility::getpid()<<" to "<<s_pidfname
<<" failed: "<<strerror(errno
)<<endl
;
598 TCPConnection::TCPConnection(int fd
, const ComboAddress
& addr
) : d_remote(addr
), d_fd(fd
)
600 ++s_currentConnections
;
601 (*t_tcpClientCounts
)[d_remote
]++;
604 TCPConnection::~TCPConnection()
607 if(closesocket(d_fd
) < 0)
608 L
<<Logger::Error
<<"Error closing socket for TCPConnection"<<endl
;
610 catch(const PDNSException
& e
) {
611 L
<<Logger::Error
<<"Error closing TCPConnection socket: "<<e
.reason
<<endl
;
614 if(t_tcpClientCounts
->count(d_remote
) && !(*t_tcpClientCounts
)[d_remote
]--)
615 t_tcpClientCounts
->erase(d_remote
);
616 --s_currentConnections
;
619 AtomicCounter
TCPConnection::s_currentConnections
;
621 static void handleRunningTCPQuestion(int fd
, FDMultiplexer::funcparam_t
& var
);
623 // the idea is, only do things that depend on the *response* here. Incoming accounting is on incoming.
624 static void updateResponseStats(int res
, const ComboAddress
& remote
, unsigned int packetsize
, const DNSName
* query
, uint16_t qtype
)
626 if(packetsize
> 1000 && t_largeanswerremotes
)
627 t_largeanswerremotes
->push_back(remote
);
629 case RCode::ServFail
:
630 if(t_servfailremotes
) {
631 t_servfailremotes
->push_back(remote
);
632 if(query
&& t_servfailqueryring
) // packet cache
633 t_servfailqueryring
->push_back(make_pair(*query
, qtype
));
637 case RCode::NXDomain
:
646 static string
makeLoginfo(DNSComboWriter
* dc
)
649 return "("+dc
->d_mdp
.d_qname
.toLogString()+"/"+DNSRecordContent::NumberToType(dc
->d_mdp
.d_qtype
)+" from "+(dc
->d_remote
.toString())+")";
653 return "Exception making error message for exception";
657 static void protobufLogQuery(const std::shared_ptr
<RemoteLogger
>& logger
, uint8_t maskV4
, uint8_t maskV6
, const boost::uuids::uuid
& uniqueId
, const ComboAddress
& remote
, const ComboAddress
& local
, const Netmask
& ednssubnet
, bool tcp
, uint16_t id
, size_t len
, const DNSName
& qname
, uint16_t qtype
, uint16_t qclass
, const std::vector
<std::string
>& policyTags
, const std::string
& requestorId
, const std::string
& deviceId
)
659 Netmask
requestorNM(remote
, remote
.sin4
.sin_family
== AF_INET
? maskV4
: maskV6
);
660 const ComboAddress
& requestor
= requestorNM
.getMaskedNetwork();
661 RecProtoBufMessage
message(DNSProtoBufMessage::Query
, uniqueId
, &requestor
, &local
, qname
, qtype
, qclass
, id
, tcp
, len
);
662 message
.setEDNSSubnet(ednssubnet
, ednssubnet
.isIpv4() ? maskV4
: maskV6
);
663 message
.setRequestorId(requestorId
);
664 message
.setDeviceId(deviceId
);
666 if (!policyTags
.empty()) {
667 message
.setPolicyTags(policyTags
);
670 // cerr <<message.toDebugString()<<endl;
672 message
.serialize(str
);
673 logger
->queueData(str
);
676 static void protobufLogResponse(const std::shared_ptr
<RemoteLogger
>& logger
, const RecProtoBufMessage
& message
)
678 // cerr <<message.toDebugString()<<endl;
680 message
.serialize(str
);
681 logger
->queueData(str
);
686 * Chases the CNAME provided by the PolicyCustom RPZ policy.
688 * @param spoofed: The DNSRecord that was created by the policy, should already be added to ret
689 * @param qtype: The QType of the original query
690 * @param sr: A SyncRes
691 * @param res: An integer that will contain the RCODE of the lookup we do
692 * @param ret: A vector of DNSRecords where the result of the CNAME chase should be appended to
694 static void handleRPZCustom(const DNSRecord
& spoofed
, const QType
& qtype
, SyncRes
& sr
, int& res
, vector
<DNSRecord
>& ret
)
696 if (spoofed
.d_type
== QType::CNAME
) {
697 bool oldWantsRPZ
= sr
.getWantsRPZ();
698 sr
.setWantsRPZ(false);
699 vector
<DNSRecord
> ans
;
700 res
= sr
.beginResolve(DNSName(spoofed
.d_content
->getZoneRepresentation()), qtype
, 1, ans
);
701 for (const auto& rec
: ans
) {
702 if(rec
.d_place
== DNSResourceRecord::ANSWER
) {
706 // Reset the RPZ state of the SyncRes
707 sr
.setWantsRPZ(oldWantsRPZ
);
711 static bool addRecordToPacket(DNSPacketWriter
& pw
, const DNSRecord
& rec
, uint32_t& minTTL
, uint32_t ttlCap
, const uint16_t maxAnswerSize
)
713 pw
.startRecord(rec
.d_name
, rec
.d_type
, (rec
.d_ttl
> ttlCap
? ttlCap
: rec
.d_ttl
), rec
.d_class
, rec
.d_place
);
715 if(rec
.d_type
!= QType::OPT
) // their TTL ain't real
716 minTTL
= min(minTTL
, rec
.d_ttl
);
718 rec
.d_content
->toPacket(pw
);
719 if(pw
.size() > static_cast<size_t>(maxAnswerSize
)) {
721 if(rec
.d_place
!= DNSResourceRecord::ADDITIONAL
) {
722 pw
.getHeader()->tc
=1;
731 static void startDoResolve(void *p
)
733 DNSComboWriter
* dc
=(DNSComboWriter
*)p
;
736 t_queryring
->push_back(make_pair(dc
->d_mdp
.d_qname
, dc
->d_mdp
.d_qtype
));
738 uint16_t maxanswersize
= dc
->d_tcp
? 65535 : min(static_cast<uint16_t>(512), g_udpTruncationThreshold
);
741 if(getEDNSOpts(dc
->d_mdp
, &edo
)) {
744 "Values lower than 512 MUST be treated as equal to 512."
746 maxanswersize
= min(static_cast<uint16_t>(edo
.d_packetsize
>= 512 ? edo
.d_packetsize
: 512), g_udpTruncationThreshold
);
748 dc
->d_ednsOpts
= edo
.d_options
;
751 if (g_useIncomingECS
&& !dc
->d_ecsParsed
) {
752 for (const auto& o
: edo
.d_options
) {
753 if (o
.first
== EDNSOptionCode::ECS
) {
754 dc
->d_ecsFound
= getEDNSSubnetOptsFromString(o
.second
, &dc
->d_ednssubnet
);
760 /* perhaps there was no EDNS or no ECS but by now we looked */
761 dc
->d_ecsParsed
= true;
762 vector
<DNSRecord
> ret
;
763 vector
<uint8_t> packet
;
765 auto luaconfsLocal
= g_luaconfs
.getLocal();
766 // Used to tell syncres later on if we should apply NSDNAME and NSIP RPZ triggers for this query
768 RecProtoBufMessage
pbMessage(RecProtoBufMessage::Response
);
770 if (luaconfsLocal
->protobufServer
) {
771 Netmask
requestorNM(dc
->d_remote
, dc
->d_remote
.sin4
.sin_family
== AF_INET
? luaconfsLocal
->protobufMaskV4
: luaconfsLocal
->protobufMaskV6
);
772 const ComboAddress
& requestor
= requestorNM
.getMaskedNetwork();
773 pbMessage
.update(dc
->d_uuid
, &requestor
, &dc
->d_local
, dc
->d_tcp
, dc
->d_mdp
.d_header
.id
);
774 pbMessage
.setEDNSSubnet(dc
->d_ednssubnet
.source
, dc
->d_ednssubnet
.source
.isIpv4() ? luaconfsLocal
->protobufMaskV4
: luaconfsLocal
->protobufMaskV6
);
775 pbMessage
.setQuestion(dc
->d_mdp
.d_qname
, dc
->d_mdp
.d_qtype
, dc
->d_mdp
.d_qclass
);
777 #endif /* HAVE_PROTOBUF */
779 DNSPacketWriter
pw(packet
, dc
->d_mdp
.d_qname
, dc
->d_mdp
.d_qtype
, dc
->d_mdp
.d_qclass
);
781 pw
.getHeader()->aa
=0;
782 pw
.getHeader()->ra
=1;
783 pw
.getHeader()->qr
=1;
784 pw
.getHeader()->tc
=0;
785 pw
.getHeader()->id
=dc
->d_mdp
.d_header
.id
;
786 pw
.getHeader()->rd
=dc
->d_mdp
.d_header
.rd
;
787 pw
.getHeader()->cd
=dc
->d_mdp
.d_header
.cd
;
789 /* This is the lowest TTL seen in the records of the response,
790 so we can't cache it for longer than this value.
791 If we have a TTL cap, this value can't be larger than the
792 cap no matter what. */
793 uint32_t minTTL
= dc
->d_ttlCap
;
795 SyncRes
sr(dc
->d_now
);
799 sr
.setLuaEngine(t_pdl
);
801 sr
.d_requestor
=dc
->d_remote
; // ECS needs this too
802 if(g_dnssecmode
!= DNSSECMode::Off
) {
803 sr
.setDoDNSSEC(true);
805 // Does the requestor want DNSSEC records?
806 if(edo
.d_Z
& EDNSOpts::DNSSECOK
) {
808 g_stats
.dnssecQueries
++;
811 // Ignore the client-set CD flag
812 pw
.getHeader()->cd
=0;
814 sr
.setDNSSECValidationRequested(g_dnssecmode
== DNSSECMode::ValidateAll
|| g_dnssecmode
==DNSSECMode::ValidateForLog
|| ((dc
->d_mdp
.d_header
.ad
|| DNSSECOK
) && g_dnssecmode
==DNSSECMode::Process
));
817 sr
.setInitialRequestId(dc
->d_uuid
);
820 if (g_useIncomingECS
) {
821 sr
.setIncomingECSFound(dc
->d_ecsFound
);
822 if (dc
->d_ecsFound
) {
823 sr
.setIncomingECS(dc
->d_ednssubnet
);
827 bool tracedQuery
=false; // we could consider letting Lua know about this too
828 bool variableAnswer
= dc
->d_variable
;
829 bool shouldNotValidate
= false;
831 /* preresolve expects res (dq.rcode) to be set to RCode::NoError by default */
832 int res
= RCode::NoError
;
833 DNSFilterEngine::Policy appliedPolicy
;
835 RecursorLua4::DNSQuestion
dq(dc
->d_remote
, dc
->d_local
, dc
->d_mdp
.d_qname
, dc
->d_mdp
.d_qtype
, dc
->d_tcp
, variableAnswer
, wantsRPZ
);
836 dq
.ednsFlags
= &edo
.d_Z
;
837 dq
.ednsOptions
= &dc
->d_ednsOpts
;
839 dq
.discardedPolicies
= &sr
.d_discardedPolicies
;
840 dq
.policyTags
= &dc
->d_policyTags
;
841 dq
.appliedPolicy
= &appliedPolicy
;
842 dq
.currentRecords
= &ret
;
843 dq
.dh
= &dc
->d_mdp
.d_header
;
844 dq
.data
= dc
->d_data
;
846 dq
.requestorId
= dc
->d_requestorId
;
847 dq
.deviceId
= dc
->d_deviceId
;
850 if(dc
->d_mdp
.d_qtype
==QType::ANY
&& !dc
->d_tcp
&& g_anyToTcp
) {
851 pw
.getHeader()->tc
= 1;
853 variableAnswer
= true;
857 if(t_traceRegex
&& t_traceRegex
->match(dc
->d_mdp
.d_qname
.toString())) {
858 sr
.setLogMode(SyncRes::Store
);
863 if(!g_quiet
|| tracedQuery
) {
864 L
<<Logger::Warning
<<t_id
<<" ["<<MT
->getTid()<<"/"<<MT
->numProcesses()<<"] " << (dc
->d_tcp
? "TCP " : "") << "question for '"<<dc
->d_mdp
.d_qname
<<"|"
865 <<DNSRecordContent::NumberToType(dc
->d_mdp
.d_qtype
)<<"' from "<<dc
->getRemote();
866 if(!dc
->d_ednssubnet
.source
.empty()) {
867 L
<<" (ecs "<<dc
->d_ednssubnet
.source
.toString()<<")";
872 sr
.setId(MT
->getTid());
873 if(!dc
->d_mdp
.d_header
.rd
)
877 t_pdl
->prerpz(dq
, res
);
880 // Check if the query has a policy attached to it
882 appliedPolicy
= luaconfsLocal
->dfe
.getQueryPolicy(dc
->d_mdp
.d_qname
, dc
->d_remote
, sr
.d_discardedPolicies
);
885 // if there is a RecursorLua active, and it 'took' the query in preResolve, we don't launch beginResolve
886 if(!t_pdl
|| !t_pdl
->preresolve(dq
, res
)) {
888 sr
.setWantsRPZ(wantsRPZ
);
890 switch(appliedPolicy
.d_kind
) {
891 case DNSFilterEngine::PolicyKind::NoAction
:
893 case DNSFilterEngine::PolicyKind::Drop
:
894 g_stats
.policyDrops
++;
895 g_stats
.policyResults
[appliedPolicy
.d_kind
]++;
899 case DNSFilterEngine::PolicyKind::NXDOMAIN
:
900 g_stats
.policyResults
[appliedPolicy
.d_kind
]++;
903 case DNSFilterEngine::PolicyKind::NODATA
:
904 g_stats
.policyResults
[appliedPolicy
.d_kind
]++;
907 case DNSFilterEngine::PolicyKind::Custom
:
908 g_stats
.policyResults
[appliedPolicy
.d_kind
]++;
910 spoofed
=appliedPolicy
.getCustomRecord(dc
->d_mdp
.d_qname
);
911 ret
.push_back(spoofed
);
912 handleRPZCustom(spoofed
, QType(dc
->d_mdp
.d_qtype
), sr
, res
, ret
);
914 case DNSFilterEngine::PolicyKind::Truncate
:
916 g_stats
.policyResults
[appliedPolicy
.d_kind
]++;
918 pw
.getHeader()->tc
=1;
925 // Query got not handled for QNAME Policy reasons, now actually go out to find an answer
927 res
= sr
.beginResolve(dc
->d_mdp
.d_qname
, QType(dc
->d_mdp
.d_qtype
), dc
->d_mdp
.d_qclass
, ret
);
928 shouldNotValidate
= sr
.wasOutOfBand();
930 catch(ImmediateServFailException
&e
) {
931 if(g_logCommonErrors
)
932 L
<<Logger::Notice
<<"Sending SERVFAIL to "<<dc
->getRemote()<<" during resolve of '"<<dc
->d_mdp
.d_qname
<<"' because: "<<e
.reason
<<endl
;
933 res
= RCode::ServFail
;
936 dq
.validationState
= sr
.getValidationState();
938 // During lookup, an NSDNAME or NSIP trigger was hit in RPZ
939 if (res
== -2) { // XXX This block should be macro'd, it is repeated post-resolve.
940 appliedPolicy
= sr
.d_appliedPolicy
;
941 g_stats
.policyResults
[appliedPolicy
.d_kind
]++;
942 switch(appliedPolicy
.d_kind
) {
943 case DNSFilterEngine::PolicyKind::NoAction
: // This can never happen
944 throw PDNSException("NoAction policy returned while a NSDNAME or NSIP trigger was hit");
945 case DNSFilterEngine::PolicyKind::Drop
:
946 g_stats
.policyDrops
++;
950 case DNSFilterEngine::PolicyKind::NXDOMAIN
:
955 case DNSFilterEngine::PolicyKind::NODATA
:
960 case DNSFilterEngine::PolicyKind::Truncate
:
964 pw
.getHeader()->tc
=1;
969 case DNSFilterEngine::PolicyKind::Custom
:
972 spoofed
=appliedPolicy
.getCustomRecord(dc
->d_mdp
.d_qname
);
973 ret
.push_back(spoofed
);
974 handleRPZCustom(spoofed
, QType(dc
->d_mdp
.d_qtype
), sr
, res
, ret
);
980 appliedPolicy
= luaconfsLocal
->dfe
.getPostPolicy(ret
, sr
.d_discardedPolicies
);
984 if(res
== RCode::NoError
) {
986 for(; i
!= ret
.cend(); ++i
)
987 if(i
->d_type
== dc
->d_mdp
.d_qtype
&& i
->d_place
== DNSResourceRecord::ANSWER
)
989 if(i
== ret
.cend() && t_pdl
->nodata(dq
, res
))
990 shouldNotValidate
= true;
993 else if(res
== RCode::NXDomain
&& t_pdl
->nxdomain(dq
, res
))
994 shouldNotValidate
= true;
996 if(t_pdl
->postresolve(dq
, res
))
997 shouldNotValidate
= true;
1000 if (wantsRPZ
) { //XXX This block is repeated, see above
1001 g_stats
.policyResults
[appliedPolicy
.d_kind
]++;
1002 switch(appliedPolicy
.d_kind
) {
1003 case DNSFilterEngine::PolicyKind::NoAction
:
1005 case DNSFilterEngine::PolicyKind::Drop
:
1006 g_stats
.policyDrops
++;
1010 case DNSFilterEngine::PolicyKind::NXDOMAIN
:
1012 res
=RCode::NXDomain
;
1015 case DNSFilterEngine::PolicyKind::NODATA
:
1020 case DNSFilterEngine::PolicyKind::Truncate
:
1024 pw
.getHeader()->tc
=1;
1029 case DNSFilterEngine::PolicyKind::Custom
:
1032 spoofed
=appliedPolicy
.getCustomRecord(dc
->d_mdp
.d_qname
);
1033 ret
.push_back(spoofed
);
1034 handleRPZCustom(spoofed
, QType(dc
->d_mdp
.d_qtype
), sr
, res
, ret
);
1040 if(res
== PolicyDecision::DROP
) {
1041 g_stats
.policyDrops
++;
1046 if(tracedQuery
|| res
== -1 || res
== RCode::ServFail
|| pw
.getHeader()->rcode
== RCode::ServFail
)
1048 string
trace(sr
.getTrace());
1049 if(!trace
.empty()) {
1050 vector
<string
> lines
;
1051 boost::split(lines
, trace
, boost::is_any_of("\n"));
1052 for(const string
& line
: lines
) {
1054 L
<<Logger::Warning
<< line
<< endl
;
1060 pw
.getHeader()->rcode
=RCode::ServFail
;
1061 // no commit here, because no record
1062 g_stats
.servFails
++;
1065 pw
.getHeader()->rcode
=res
;
1067 // Does the validation mode or query demand validation?
1068 if(!shouldNotValidate
&& sr
.isDNSSECValidationRequested()) {
1071 L
<<Logger::Warning
<<"Starting validation of answer to "<<dc
->d_mdp
.d_qname
<<"|"<<QType(dc
->d_mdp
.d_qtype
).getName()<<" for "<<dc
->d_remote
.toStringWithPort()<<endl
;
1074 auto state
= sr
.getValidationState();
1076 if(state
== Secure
) {
1078 L
<<Logger::Warning
<<"Answer to "<<dc
->d_mdp
.d_qname
<<"|"<<QType(dc
->d_mdp
.d_qtype
).getName()<<" for "<<dc
->d_remote
.toStringWithPort()<<" validates correctly"<<endl
;
1081 // Is the query source interested in the value of the ad-bit?
1082 if (dc
->d_mdp
.d_header
.ad
|| DNSSECOK
)
1083 pw
.getHeader()->ad
=1;
1085 else if(state
== Insecure
) {
1087 L
<<Logger::Warning
<<"Answer to "<<dc
->d_mdp
.d_qname
<<"|"<<QType(dc
->d_mdp
.d_qtype
).getName()<<" for "<<dc
->d_remote
.toStringWithPort()<<" validates as Insecure"<<endl
;
1090 pw
.getHeader()->ad
=0;
1092 else if(state
== Bogus
) {
1093 if(g_dnssecLogBogus
|| sr
.doLog() || g_dnssecmode
== DNSSECMode::ValidateForLog
) {
1094 L
<<Logger::Warning
<<"Answer to "<<dc
->d_mdp
.d_qname
<<"|"<<QType(dc
->d_mdp
.d_qtype
).getName()<<" for "<<dc
->d_remote
.toStringWithPort()<<" validates as Bogus"<<endl
;
1097 // Does the query or validation mode sending out a SERVFAIL on validation errors?
1098 if(!pw
.getHeader()->cd
&& (g_dnssecmode
== DNSSECMode::ValidateAll
|| dc
->d_mdp
.d_header
.ad
|| DNSSECOK
)) {
1100 L
<<Logger::Warning
<<"Sending out SERVFAIL for "<<dc
->d_mdp
.d_qname
<<"|"<<QType(dc
->d_mdp
.d_qtype
).getName()<<" because recursor or query demands it for Bogus results"<<endl
;
1103 pw
.getHeader()->rcode
=RCode::ServFail
;
1107 L
<<Logger::Warning
<<"Not sending out SERVFAIL for "<<dc
->d_mdp
.d_qname
<<"|"<<QType(dc
->d_mdp
.d_qtype
).getName()<<" Bogus validation since neither config nor query demands this"<<endl
;
1112 catch(ImmediateServFailException
&e
) {
1113 if(g_logCommonErrors
)
1114 L
<<Logger::Notice
<<"Sending SERVFAIL to "<<dc
->getRemote()<<" during validation of '"<<dc
->d_mdp
.d_qname
<<"|"<<QType(dc
->d_mdp
.d_qtype
).getName()<<"' because: "<<e
.reason
<<endl
;
1115 pw
.getHeader()->rcode
=RCode::ServFail
;
1121 orderAndShuffle(ret
);
1122 if(auto sl
= luaconfsLocal
->sortlist
.getOrderCmp(dc
->d_remote
)) {
1123 stable_sort(ret
.begin(), ret
.end(), *sl
);
1124 variableAnswer
=true;
1128 bool needCommit
= false;
1129 for(auto i
=ret
.cbegin(); i
!=ret
.cend(); ++i
) {
1131 ( i
->d_type
== QType::NSEC3
||
1133 ( i
->d_type
== QType::RRSIG
|| i
->d_type
==QType::NSEC
) &&
1135 ( dc
->d_mdp
.d_qtype
!= i
->d_type
&& dc
->d_mdp
.d_qtype
!= QType::ANY
) ||
1136 i
->d_place
!= DNSResourceRecord::ANSWER
1144 if (!addRecordToPacket(pw
, *i
, minTTL
, dc
->d_ttlCap
, maxanswersize
)) {
1150 #ifdef HAVE_PROTOBUF
1151 if(luaconfsLocal
->protobufServer
&& (i
->d_type
== QType::A
|| i
->d_type
== QType::AAAA
|| i
->d_type
== QType::CNAME
)) {
1152 pbMessage
.addRR(*i
);
1162 /* we try to add the EDNS OPT RR even for truncated answers,
1164 "The minimal response MUST be the DNS header, question section, and an
1165 OPT record. This MUST also occur when a truncated response (using
1166 the DNS header's TC bit) is returned."
1168 if (addRecordToPacket(pw
, makeOpt(edo
.d_packetsize
, 0, edo
.d_Z
), minTTL
, dc
->d_ttlCap
, maxanswersize
)) {
1173 g_rs
.submitResponse(dc
->d_mdp
.d_qtype
, packet
.size(), !dc
->d_tcp
);
1174 updateResponseStats(res
, dc
->d_remote
, packet
.size(), &dc
->d_mdp
.d_qname
, dc
->d_mdp
.d_qtype
);
1175 #ifdef HAVE_PROTOBUF
1176 if (luaconfsLocal
->protobufServer
&& (!luaconfsLocal
->protobufTaggedOnly
|| (appliedPolicy
.d_name
&& !appliedPolicy
.d_name
->empty()) || !dc
->d_policyTags
.empty())) {
1177 pbMessage
.setBytes(packet
.size());
1178 pbMessage
.setResponseCode(pw
.getHeader()->rcode
);
1179 if (appliedPolicy
.d_name
) {
1180 pbMessage
.setAppliedPolicy(*appliedPolicy
.d_name
);
1181 pbMessage
.setAppliedPolicyType(appliedPolicy
.d_type
);
1183 pbMessage
.setPolicyTags(dc
->d_policyTags
);
1184 pbMessage
.setQueryTime(dc
->d_now
.tv_sec
, dc
->d_now
.tv_usec
);
1185 pbMessage
.setRequestorId(dq
.requestorId
);
1186 pbMessage
.setDeviceId(dq
.deviceId
);
1187 protobufLogResponse(luaconfsLocal
->protobufServer
, pbMessage
);
1194 fillMSGHdr(&msgh
, &iov
, cbuf
, 0, (char*)&*packet
.begin(), packet
.size(), &dc
->d_remote
);
1195 msgh
.msg_control
=NULL
;
1197 if(g_fromtosockets
.count(dc
->d_socket
)) {
1198 addCMsgSrcAddr(&msgh
, cbuf
, &dc
->d_local
, 0);
1200 if(sendmsg(dc
->d_socket
, &msgh
, 0) < 0 && g_logCommonErrors
)
1201 L
<<Logger::Warning
<<"Sending UDP reply to client "<<dc
->d_remote
.toStringWithPort()<<" failed with: "<<strerror(errno
)<<endl
;
1203 if(!SyncRes::s_nopacketcache
&& !variableAnswer
&& !sr
.wasVariable() ) {
1204 t_packetCache
->insertResponsePacket(dc
->d_tag
, dc
->d_qhash
, dc
->d_mdp
.d_qname
, dc
->d_mdp
.d_qtype
, dc
->d_mdp
.d_qclass
,
1205 string((const char*)&*packet
.begin(), packet
.size()),
1207 pw
.getHeader()->rcode
== RCode::ServFail
? SyncRes::s_packetcacheservfailttl
:
1208 min(minTTL
,SyncRes::s_packetcachettl
),
1211 // else cerr<<"Not putting in packet cache: "<<sr.wasVariable()<<endl;
1215 buf
[0]=packet
.size()/256;
1216 buf
[1]=packet
.size()%256;
1218 Utility::iovec iov
[2];
1220 iov
[0].iov_base
=(void*)buf
; iov
[0].iov_len
=2;
1221 iov
[1].iov_base
=(void*)&*packet
.begin(); iov
[1].iov_len
= packet
.size();
1223 int wret
=Utility::writev(dc
->d_socket
, iov
, 2);
1227 L
<<Logger::Error
<<"EOF writing TCP answer to "<<dc
->getRemote()<<endl
;
1229 L
<<Logger::Error
<<"Error writing TCP answer to "<<dc
->getRemote()<<": "<< strerror(errno
) <<endl
;
1230 else if((unsigned int)wret
!= 2 + packet
.size())
1231 L
<<Logger::Error
<<"Oops, partial answer sent to "<<dc
->getRemote()<<" for "<<dc
->d_mdp
.d_qname
<<" (size="<< (2 + packet
.size()) <<", sent "<<wret
<<")"<<endl
;
1235 // update tcp connection status, either by closing or moving to 'BYTE0'
1238 // no need to remove us from FDM, we weren't there
1242 dc
->d_tcpConnection
->queriesCount
++;
1243 if (g_tcpMaxQueriesPerConn
&& dc
->d_tcpConnection
->queriesCount
>= g_tcpMaxQueriesPerConn
) {
1247 dc
->d_tcpConnection
->state
=TCPConnection::BYTE0
;
1248 Utility::gettimeofday(&g_now
, 0); // needs to be updated
1249 t_fdm
->addReadFD(dc
->d_socket
, handleRunningTCPQuestion
, dc
->d_tcpConnection
);
1250 t_fdm
->setReadTTD(dc
->d_socket
, g_now
, g_tcpTimeout
);
1254 float spent
=makeFloat(sr
.getNow()-dc
->d_now
);
1256 L
<<Logger::Error
<<t_id
<<" ["<<MT
->getTid()<<"/"<<MT
->numProcesses()<<"] answer to "<<(dc
->d_mdp
.d_header
.rd
?"":"non-rd ")<<"question '"<<dc
->d_mdp
.d_qname
<<"|"<<DNSRecordContent::NumberToType(dc
->d_mdp
.d_qtype
);
1257 L
<<"': "<<ntohs(pw
.getHeader()->ancount
)<<" answers, "<<ntohs(pw
.getHeader()->arcount
)<<" additional, took "<<sr
.d_outqueries
<<" packets, "<<
1258 sr
.d_totUsec
/1000.0<<" netw ms, "<< spent
*1000.0<<" tot ms, "<<
1259 sr
.d_throttledqueries
<<" throttled, "<<sr
.d_timeouts
<<" timeouts, "<<sr
.d_tcpoutqueries
<<" tcp connections, rcode="<< res
;
1261 if(!shouldNotValidate
&& sr
.isDNSSECValidationRequested()) {
1262 L
<< ", dnssec="<<vStates
[sr
.getValidationState()];
1269 sr
.d_outqueries
? t_RC
->cacheMisses
++ : t_RC
->cacheHits
++;
1272 g_stats
.answers0_1
++;
1273 else if(spent
< 0.010)
1274 g_stats
.answers1_10
++;
1275 else if(spent
< 0.1)
1276 g_stats
.answers10_100
++;
1277 else if(spent
< 1.0)
1278 g_stats
.answers100_1000
++;
1280 g_stats
.answersSlow
++;
1282 uint64_t newLat
=(uint64_t)(spent
*1000000);
1283 newLat
= min(newLat
,(uint64_t)(((uint64_t) g_networkTimeoutMsec
)*1000)); // outliers of several minutes exist..
1284 g_stats
.avgLatencyUsec
=(1-1.0/g_latencyStatSize
)*g_stats
.avgLatencyUsec
+ (float)newLat
/g_latencyStatSize
;
1285 // no worries, we do this for packet cache hits elsewhere
1287 auto ourtime
= 1000.0*spent
-sr
.d_totUsec
/1000.0; // in msec
1289 g_stats
.ourtime0_1
++;
1290 else if(ourtime
< 2)
1291 g_stats
.ourtime1_2
++;
1292 else if(ourtime
< 4)
1293 g_stats
.ourtime2_4
++;
1294 else if(ourtime
< 8)
1295 g_stats
.ourtime4_8
++;
1296 else if(ourtime
< 16)
1297 g_stats
.ourtime8_16
++;
1298 else if(ourtime
< 32)
1299 g_stats
.ourtime16_32
++;
1301 // cerr<<"SLOW: "<<ourtime<<"ms -> "<<dc->d_mdp.d_qname<<"|"<<DNSRecordContent::NumberToType(dc->d_mdp.d_qtype)<<endl;
1302 g_stats
.ourtimeSlow
++;
1304 if(ourtime
>= 0.0) {
1305 newLat
=ourtime
*1000; // usec
1306 g_stats
.avgLatencyOursUsec
=(1-1.0/g_latencyStatSize
)*g_stats
.avgLatencyOursUsec
+ (float)newLat
/g_latencyStatSize
;
1308 // cout<<dc->d_mdp.d_qname<<"\t"<<MT->getUsec()<<"\t"<<sr.d_outqueries<<endl;
1312 catch(PDNSException
&ae
) {
1313 L
<<Logger::Error
<<"startDoResolve problem "<<makeLoginfo(dc
)<<": "<<ae
.reason
<<endl
;
1316 catch(MOADNSException
& e
) {
1317 L
<<Logger::Error
<<"DNS parser error "<<makeLoginfo(dc
) <<": "<<dc
->d_mdp
.d_qname
<<", "<<e
.what()<<endl
;
1320 catch(std::exception
& e
) {
1321 L
<<Logger::Error
<<"STL error "<< makeLoginfo(dc
)<<": "<<e
.what();
1323 // Luawrapper nests the exception from Lua, so we unnest it here
1325 std::rethrow_if_nested(e
);
1326 } catch(const std::exception
& ne
) {
1327 L
<<". Extra info: "<<ne
.what();
1334 L
<<Logger::Error
<<"Any other exception in a resolver context "<< makeLoginfo(dc
) <<endl
;
1337 g_stats
.maxMThreadStackUsage
= max(MT
->getMaxStackUsage(), g_stats
.maxMThreadStackUsage
);
1340 static void makeControlChannelSocket(int processNum
=-1)
1342 string sockname
=::arg()["socket-dir"]+"/"+s_programname
;
1344 sockname
+= "."+std::to_string(processNum
);
1345 sockname
+=".controlsocket";
1346 s_rcc
.listen(sockname
);
1351 if (!::arg().isEmpty("socket-group"))
1352 sockgroup
=::arg().asGid("socket-group");
1353 if (!::arg().isEmpty("socket-owner"))
1354 sockowner
=::arg().asUid("socket-owner");
1356 if (sockgroup
> -1 || sockowner
> -1) {
1357 if(chown(sockname
.c_str(), sockowner
, sockgroup
) < 0) {
1358 unixDie("Failed to chown control socket");
1362 // do mode change if socket-mode is given
1363 if(!::arg().isEmpty("socket-mode")) {
1364 mode_t sockmode
=::arg().asMode("socket-mode");
1365 if(chmod(sockname
.c_str(), sockmode
) < 0) {
1366 unixDie("Failed to chmod control socket");
1371 static bool getQNameAndSubnet(const std::string
& question
, DNSName
* dnsname
, uint16_t* qtype
, uint16_t* qclass
, EDNSSubnetOpts
* ednssubnet
, std::map
<uint16_t, EDNSOptionView
>* options
)
1374 const struct dnsheader
* dh
= (struct dnsheader
*)question
.c_str();
1375 size_t questionLen
= question
.length();
1376 unsigned int consumed
=0;
1377 *dnsname
=DNSName(question
.c_str(), questionLen
, sizeof(dnsheader
), false, qtype
, qclass
, &consumed
);
1379 size_t pos
= sizeof(dnsheader
)+consumed
+4;
1380 /* at least OPT root label (1), type (2), class (2) and ttl (4) + OPT RR rdlen (2)
1382 if(ntohs(dh
->arcount
) == 1 && questionLen
> pos
+ 11) { // this code can extract one (1) EDNS Subnet option
1383 /* OPT root label (1) followed by type (2) */
1384 if(question
.at(pos
)==0 && question
.at(pos
+1)==0 && question
.at(pos
+2)==QType::OPT
) {
1386 char* ecsStart
= nullptr;
1388 int res
= getEDNSOption((char*)question
.c_str()+pos
+9, questionLen
- pos
- 9, EDNSOptionCode::ECS
, &ecsStart
, &ecsLen
);
1389 if (res
== 0 && ecsLen
> 4) {
1391 if(getEDNSSubnetOptsFromString(ecsStart
+ 4, ecsLen
- 4, &eso
)) {
1398 int res
= getEDNSOptions((char*)question
.c_str()+pos
+9, questionLen
- pos
- 9, *options
);
1400 const auto& it
= options
->find(EDNSOptionCode::ECS
);
1401 if (it
!= options
->end() && it
->second
.content
!= nullptr && it
->second
.size
> 0) {
1403 if(getEDNSSubnetOptsFromString(it
->second
.content
, it
->second
.size
, &eso
)) {
1415 static void handleRunningTCPQuestion(int fd
, FDMultiplexer::funcparam_t
& var
)
1417 shared_ptr
<TCPConnection
> conn
=any_cast
<shared_ptr
<TCPConnection
> >(var
);
1419 if(conn
->state
==TCPConnection::BYTE0
) {
1420 ssize_t bytes
=recv(conn
->getFD(), conn
->data
, 2, 0);
1422 conn
->state
=TCPConnection::BYTE1
;
1424 conn
->qlen
=(((unsigned char)conn
->data
[0]) << 8)+ (unsigned char)conn
->data
[1];
1426 conn
->state
=TCPConnection::GETQUESTION
;
1428 if(!bytes
|| bytes
< 0) {
1429 t_fdm
->removeReadFD(fd
);
1433 else if(conn
->state
==TCPConnection::BYTE1
) {
1434 ssize_t bytes
=recv(conn
->getFD(), conn
->data
+1, 1, 0);
1436 conn
->state
=TCPConnection::GETQUESTION
;
1437 conn
->qlen
=(((unsigned char)conn
->data
[0]) << 8)+ (unsigned char)conn
->data
[1];
1440 if(!bytes
|| bytes
< 0) {
1441 if(g_logCommonErrors
)
1442 L
<<Logger::Error
<<"TCP client "<< conn
->d_remote
.toString() <<" disconnected after first byte"<<endl
;
1443 t_fdm
->removeReadFD(fd
);
1447 else if(conn
->state
==TCPConnection::GETQUESTION
) {
1448 ssize_t bytes
=recv(conn
->getFD(), conn
->data
+ conn
->bytesread
, conn
->qlen
- conn
->bytesread
, 0);
1449 if(!bytes
|| bytes
< 0 || bytes
> std::numeric_limits
<std::uint16_t>::max()) {
1450 L
<<Logger::Error
<<"TCP client "<< conn
->d_remote
.toString() <<" disconnected while reading question body"<<endl
;
1451 t_fdm
->removeReadFD(fd
);
1454 conn
->bytesread
+=(uint16_t)bytes
;
1455 if(conn
->bytesread
==conn
->qlen
) {
1456 t_fdm
->removeReadFD(fd
); // should no longer awake ourselves when there is data to read
1458 DNSComboWriter
* dc
=nullptr;
1460 dc
=new DNSComboWriter(conn
->data
, conn
->qlen
, g_now
);
1462 catch(MOADNSException
&mde
) {
1463 g_stats
.clientParseError
++;
1464 if(g_logCommonErrors
)
1465 L
<<Logger::Error
<<"Unable to parse packet from TCP client "<< conn
->d_remote
.toString() <<endl
;
1468 dc
->d_tcpConnection
= conn
; // carry the torch
1469 dc
->setSocket(conn
->getFD()); // this is the only time a copy is made of the actual fd
1471 dc
->setRemote(&conn
->d_remote
);
1473 memset(&dest
, 0, sizeof(dest
));
1474 dest
.sin4
.sin_family
= conn
->d_remote
.sin4
.sin_family
;
1475 socklen_t len
= dest
.getSocklen();
1476 getsockname(conn
->getFD(), (sockaddr
*)&dest
, &len
); // if this fails, we're ok with it
1481 bool needECS
= false;
1484 #ifdef HAVE_PROTOBUF
1485 auto luaconfsLocal
= g_luaconfs
.getLocal();
1486 if (luaconfsLocal
->protobufServer
) {
1491 if(needECS
|| (t_pdl
&& (t_pdl
->d_gettag_ffi
|| t_pdl
->d_gettag
))) {
1494 std::map
<uint16_t, EDNSOptionView
> ednsOptions
;
1495 dc
->d_ecsParsed
= true;
1496 dc
->d_ecsFound
= getQNameAndSubnet(std::string(conn
->data
, conn
->qlen
), &qname
, &qtype
, &qclass
, &dc
->d_ednssubnet
, g_gettagNeedsEDNSOptions
? &ednsOptions
: nullptr);
1500 if (t_pdl
->d_gettag_ffi
) {
1501 dc
->d_tag
= t_pdl
->gettag_ffi(conn
->d_remote
, dc
->d_ednssubnet
.source
, dest
, qname
, qtype
, &dc
->d_policyTags
, dc
->d_data
, ednsOptions
, true, requestorId
, deviceId
, dc
->d_ttlCap
, dc
->d_variable
);
1503 else if (t_pdl
->d_gettag
) {
1504 dc
->d_tag
= t_pdl
->gettag(conn
->d_remote
, dc
->d_ednssubnet
.source
, dest
, qname
, qtype
, &dc
->d_policyTags
, dc
->d_data
, ednsOptions
, true, requestorId
, deviceId
);
1507 catch(const std::exception
& e
) {
1508 if(g_logCommonErrors
)
1509 L
<<Logger::Warning
<<"Error parsing a query packet qname='"<<qname
<<"' for tag determination, setting tag=0: "<<e
.what()<<endl
;
1513 catch(const std::exception
& e
)
1515 if(g_logCommonErrors
)
1516 L
<<Logger::Warning
<<"Error parsing a query packet for tag determination, setting tag=0: "<<e
.what()<<endl
;
1519 #ifdef HAVE_PROTOBUF
1520 if(luaconfsLocal
->protobufServer
|| luaconfsLocal
->outgoingProtobufServer
) {
1521 dc
->d_requestorId
= requestorId
;
1522 dc
->d_deviceId
= deviceId
;
1523 dc
->d_uuid
= (*t_uuidGenerator
)();
1526 if(luaconfsLocal
->protobufServer
) {
1528 const struct dnsheader
* dh
= (const struct dnsheader
*) conn
->data
;
1530 if (!luaconfsLocal
->protobufTaggedOnly
) {
1531 protobufLogQuery(luaconfsLocal
->protobufServer
, luaconfsLocal
->protobufMaskV4
, luaconfsLocal
->protobufMaskV6
, dc
->d_uuid
, conn
->d_remote
, dest
, dc
->d_ednssubnet
.source
, true, dh
->id
, conn
->qlen
, qname
, qtype
, qclass
, dc
->d_policyTags
, dc
->d_requestorId
, dc
->d_deviceId
);
1534 catch(std::exception
& e
) {
1535 if(g_logCommonErrors
)
1536 L
<<Logger::Warning
<<"Error parsing a TCP query packet for edns subnet: "<<e
.what()<<endl
;
1540 if(dc
->d_mdp
.d_header
.qr
) {
1542 g_stats
.ignoredCount
++;
1543 L
<<Logger::Error
<<"Ignoring answer from TCP client "<< conn
->d_remote
.toString() <<" on server socket!"<<endl
;
1546 if(dc
->d_mdp
.d_header
.opcode
) {
1548 g_stats
.ignoredCount
++;
1549 L
<<Logger::Error
<<"Ignoring non-query opcode from TCP client "<< conn
->d_remote
.toString() <<" on server socket!"<<endl
;
1554 ++g_stats
.tcpqcounter
;
1555 MT
->makeThread(startDoResolve
, dc
); // deletes dc, will set state to BYTE0 again
1562 //! Handle new incoming TCP connection
1563 static void handleNewTCPQuestion(int fd
, FDMultiplexer::funcparam_t
& )
1566 socklen_t addrlen
=sizeof(addr
);
1567 int newsock
=accept(fd
, (struct sockaddr
*)&addr
, &addrlen
);
1569 if(MT
->numProcesses() > g_maxMThreads
) {
1570 g_stats
.overCapacityDrops
++;
1572 closesocket(newsock
);
1574 catch(const PDNSException
& e
) {
1575 L
<<Logger::Error
<<"Error closing TCP socket after an over capacity drop: "<<e
.reason
<<endl
;
1581 t_remotes
->push_back(addr
);
1582 if(t_allowFrom
&& !t_allowFrom
->match(&addr
)) {
1584 L
<<Logger::Error
<<"["<<MT
->getTid()<<"] dropping TCP query from "<<addr
.toString()<<", address not matched by allow-from"<<endl
;
1586 g_stats
.unauthorizedTCP
++;
1588 closesocket(newsock
);
1590 catch(const PDNSException
& e
) {
1591 L
<<Logger::Error
<<"Error closing TCP socket after an ACL drop: "<<e
.reason
<<endl
;
1595 if(g_maxTCPPerClient
&& t_tcpClientCounts
->count(addr
) && (*t_tcpClientCounts
)[addr
] >= g_maxTCPPerClient
) {
1596 g_stats
.tcpClientOverflow
++;
1598 closesocket(newsock
); // don't call TCPConnection::closeAndCleanup here - did not enter it in the counts yet!
1600 catch(const PDNSException
& e
) {
1601 L
<<Logger::Error
<<"Error closing TCP socket after an overflow drop: "<<e
.reason
<<endl
;
1606 setNonBlocking(newsock
);
1607 std::shared_ptr
<TCPConnection
> tc
= std::make_shared
<TCPConnection
>(newsock
, addr
);
1608 tc
->state
=TCPConnection::BYTE0
;
1610 t_fdm
->addReadFD(tc
->getFD(), handleRunningTCPQuestion
, tc
);
1613 Utility::gettimeofday(&now
, 0);
1614 t_fdm
->setReadTTD(tc
->getFD(), now
, g_tcpTimeout
);
1618 static string
* doProcessUDPQuestion(const std::string
& question
, const ComboAddress
& fromaddr
, const ComboAddress
& destaddr
, struct timeval tv
, int fd
)
1620 gettimeofday(&g_now
, 0);
1621 struct timeval diff
= g_now
- tv
;
1622 double delta
=(diff
.tv_sec
*1000 + diff
.tv_usec
/1000.0);
1624 if(tv
.tv_sec
&& delta
> 1000.0) {
1625 g_stats
.tooOldDrops
++;
1630 if(fromaddr
.sin4
.sin_family
==AF_INET6
)
1631 g_stats
.ipv6qcounter
++;
1634 const struct dnsheader
* dh
= (struct dnsheader
*)question
.c_str();
1635 unsigned int ctag
=0;
1637 bool needECS
= false;
1638 std::vector
<std::string
> policyTags
;
1639 LuaContext::LuaObject data
;
1642 #ifdef HAVE_PROTOBUF
1643 boost::uuids::uuid uniqueId
;
1644 auto luaconfsLocal
= g_luaconfs
.getLocal();
1645 if (luaconfsLocal
->protobufServer
) {
1646 uniqueId
= (*t_uuidGenerator
)();
1648 } else if (luaconfsLocal
->outgoingProtobufServer
) {
1649 uniqueId
= (*t_uuidGenerator
)();
1652 EDNSSubnetOpts ednssubnet
;
1653 bool ecsFound
= false;
1654 bool ecsParsed
= false;
1655 uint32_t ttlCap
= std::numeric_limits
<uint32_t>::max();
1656 bool variable
= false;
1662 bool qnameParsed
=false;
1665 static uint64_t last=0;
1667 g_mtracer->clearAllocators();
1668 cout<<g_mtracer->getAllocs()-last<<" "<<g_mtracer->getNumOut()<<" -- BEGIN TRACE"<<endl;
1669 last=g_mtracer->getAllocs();
1670 cout<<g_mtracer->topAllocatorsString()<<endl;
1671 g_mtracer->clearAllocators();
1675 if(needECS
|| (t_pdl
&& (t_pdl
->d_gettag
|| t_pdl
->d_gettag_ffi
))) {
1677 std::map
<uint16_t, EDNSOptionView
> ednsOptions
;
1678 ecsFound
= getQNameAndSubnet(question
, &qname
, &qtype
, &qclass
, &ednssubnet
, g_gettagNeedsEDNSOptions
? &ednsOptions
: nullptr);
1684 if (t_pdl
->d_gettag_ffi
) {
1685 ctag
= t_pdl
->gettag_ffi(fromaddr
, ednssubnet
.source
, destaddr
, qname
, qtype
, &policyTags
, data
, ednsOptions
, false, requestorId
, deviceId
, ttlCap
, variable
);
1687 else if (t_pdl
->d_gettag
) {
1688 ctag
=t_pdl
->gettag(fromaddr
, ednssubnet
.source
, destaddr
, qname
, qtype
, &policyTags
, data
, ednsOptions
, false, requestorId
, deviceId
);
1691 catch(const std::exception
& e
) {
1692 if(g_logCommonErrors
)
1693 L
<<Logger::Warning
<<"Error parsing a query packet qname='"<<qname
<<"' for tag determination, setting tag=0: "<<e
.what()<<endl
;
1697 catch(const std::exception
& e
)
1699 if(g_logCommonErrors
)
1700 L
<<Logger::Warning
<<"Error parsing a query packet for tag determination, setting tag=0: "<<e
.what()<<endl
;
1704 bool cacheHit
= false;
1705 RecProtoBufMessage
pbMessage(DNSProtoBufMessage::DNSProtoBufMessageType::Response
);
1706 #ifdef HAVE_PROTOBUF
1707 if(luaconfsLocal
->protobufServer
) {
1708 if (!luaconfsLocal
->protobufTaggedOnly
|| !policyTags
.empty()) {
1709 protobufLogQuery(luaconfsLocal
->protobufServer
, luaconfsLocal
->protobufMaskV4
, luaconfsLocal
->protobufMaskV6
, uniqueId
, fromaddr
, destaddr
, ednssubnet
.source
, false, dh
->id
, question
.size(), qname
, qtype
, qclass
, policyTags
, requestorId
, deviceId
);
1712 #endif /* HAVE_PROTOBUF */
1714 /* It might seem like a good idea to skip the packet cache lookup if we know that the answer is not cacheable,
1715 but it means that the hash would not be computed. If some script decides at a later time to mark back the answer
1716 as cacheable we would cache it with a wrong tag, so better safe than sorry. */
1718 cacheHit
= (!SyncRes::s_nopacketcache
&& t_packetCache
->getResponsePacket(ctag
, question
, qname
, qtype
, qclass
, g_now
.tv_sec
, &response
, &age
, &qhash
, &pbMessage
));
1721 cacheHit
= (!SyncRes::s_nopacketcache
&& t_packetCache
->getResponsePacket(ctag
, question
, g_now
.tv_sec
, &response
, &age
, &qhash
, &pbMessage
));
1725 #ifdef HAVE_PROTOBUF
1726 if(luaconfsLocal
->protobufServer
&& (!luaconfsLocal
->protobufTaggedOnly
|| !pbMessage
.getAppliedPolicy().empty() || !pbMessage
.getPolicyTags().empty())) {
1727 Netmask
requestorNM(fromaddr
, fromaddr
.sin4
.sin_family
== AF_INET
? luaconfsLocal
->protobufMaskV4
: luaconfsLocal
->protobufMaskV6
);
1728 const ComboAddress
& requestor
= requestorNM
.getMaskedNetwork();
1729 pbMessage
.update(uniqueId
, &requestor
, &destaddr
, false, dh
->id
);
1730 pbMessage
.setEDNSSubnet(ednssubnet
.source
, ednssubnet
.source
.isIpv4() ? luaconfsLocal
->protobufMaskV4
: luaconfsLocal
->protobufMaskV6
);
1731 pbMessage
.setQueryTime(g_now
.tv_sec
, g_now
.tv_usec
);
1732 pbMessage
.setRequestorId(requestorId
);
1733 pbMessage
.setDeviceId(deviceId
);
1734 protobufLogResponse(luaconfsLocal
->protobufServer
, pbMessage
);
1736 #endif /* HAVE_PROTOBUF */
1738 L
<<Logger::Notice
<<t_id
<< " question answered from packet cache tag="<<ctag
<<" from "<<fromaddr
.toString()<<endl
;
1740 g_stats
.packetCacheHits
++;
1741 SyncRes::s_queries
++;
1742 ageDNSPacket(response
, age
);
1746 fillMSGHdr(&msgh
, &iov
, cbuf
, 0, (char*)response
.c_str(), response
.length(), const_cast<ComboAddress
*>(&fromaddr
));
1747 msgh
.msg_control
=NULL
;
1749 if(g_fromtosockets
.count(fd
)) {
1750 addCMsgSrcAddr(&msgh
, cbuf
, &destaddr
, 0);
1752 if(sendmsg(fd
, &msgh
, 0) < 0 && g_logCommonErrors
)
1753 L
<<Logger::Warning
<<"Sending UDP reply to client "<<fromaddr
.toStringWithPort()<<" failed with: "<<strerror(errno
)<<endl
;
1755 if(response
.length() >= sizeof(struct dnsheader
)) {
1756 struct dnsheader tmpdh
;
1757 memcpy(&tmpdh
, response
.c_str(), sizeof(tmpdh
));
1758 updateResponseStats(tmpdh
.rcode
, fromaddr
, response
.length(), 0, 0);
1760 g_stats
.avgLatencyUsec
=(1-1.0/g_latencyStatSize
)*g_stats
.avgLatencyUsec
+ 0.0; // we assume 0 usec
1761 g_stats
.avgLatencyOursUsec
=(1-1.0/g_latencyStatSize
)*g_stats
.avgLatencyOursUsec
+ 0.0; // we assume 0 usec
1765 catch(std::exception
& e
) {
1766 L
<<Logger::Error
<<"Error processing or aging answer packet: "<<e
.what()<<endl
;
1771 if(t_pdl
->ipfilter(fromaddr
, destaddr
, *dh
)) {
1773 L
<<Logger::Notice
<<t_id
<<" ["<<MT
->getTid()<<"/"<<MT
->numProcesses()<<"] DROPPED question from "<<fromaddr
.toStringWithPort()<<" based on policy"<<endl
;
1774 g_stats
.policyDrops
++;
1779 if(MT
->numProcesses() > g_maxMThreads
) {
1781 L
<<Logger::Notice
<<t_id
<<" ["<<MT
->getTid()<<"/"<<MT
->numProcesses()<<"] DROPPED question from "<<fromaddr
.toStringWithPort()<<", over capacity"<<endl
;
1783 g_stats
.overCapacityDrops
++;
1787 DNSComboWriter
* dc
= new DNSComboWriter(question
.c_str(), question
.size(), g_now
);
1791 dc
->d_query
= question
;
1792 dc
->setRemote(&fromaddr
);
1793 dc
->setLocal(destaddr
);
1795 dc
->d_policyTags
= policyTags
;
1797 dc
->d_ecsFound
= ecsFound
;
1798 dc
->d_ecsParsed
= ecsParsed
;
1799 dc
->d_ednssubnet
= ednssubnet
;
1800 dc
->d_ttlCap
= ttlCap
;
1801 dc
->d_variable
= variable
;
1802 #ifdef HAVE_PROTOBUF
1803 if (luaconfsLocal
->protobufServer
|| luaconfsLocal
->outgoingProtobufServer
) {
1804 dc
->d_uuid
= uniqueId
;
1806 dc
->d_requestorId
= requestorId
;
1807 dc
->d_deviceId
= deviceId
;
1810 MT
->makeThread(startDoResolve
, (void*) dc
); // deletes dc
1815 static void handleNewUDPQuestion(int fd
, FDMultiplexer::funcparam_t
& var
)
1819 ComboAddress fromaddr
;
1823 bool firstQuery
= true;
1825 fromaddr
.sin6
.sin6_family
=AF_INET6
; // this makes sure fromaddr is big enough
1826 fillMSGHdr(&msgh
, &iov
, cbuf
, sizeof(cbuf
), data
, sizeof(data
), &fromaddr
);
1829 if((len
=recvmsg(fd
, &msgh
, 0)) >= 0) {
1834 t_remotes
->push_back(fromaddr
);
1836 if(t_allowFrom
&& !t_allowFrom
->match(&fromaddr
)) {
1838 L
<<Logger::Error
<<"["<<MT
->getTid()<<"] dropping UDP query from "<<fromaddr
.toString()<<", address not matched by allow-from"<<endl
;
1840 g_stats
.unauthorizedUDP
++;
1843 BOOST_STATIC_ASSERT(offsetof(sockaddr_in
, sin_port
) == offsetof(sockaddr_in6
, sin6_port
));
1844 if(!fromaddr
.sin4
.sin_port
) { // also works for IPv6
1846 L
<<Logger::Error
<<"["<<MT
->getTid()<<"] dropping UDP query from "<<fromaddr
.toStringWithPort()<<", can't deal with port 0"<<endl
;
1848 g_stats
.clientParseError
++; // not quite the best place to put it, but needs to go somewhere
1852 dnsheader
* dh
=(dnsheader
*)data
;
1855 g_stats
.ignoredCount
++;
1856 if(g_logCommonErrors
)
1857 L
<<Logger::Error
<<"Ignoring answer from "<<fromaddr
.toString()<<" on server socket!"<<endl
;
1859 else if(dh
->opcode
) {
1860 g_stats
.ignoredCount
++;
1861 if(g_logCommonErrors
)
1862 L
<<Logger::Error
<<"Ignoring non-query opcode "<<dh
->opcode
<<" from "<<fromaddr
.toString()<<" on server socket!"<<endl
;
1865 string
question(data
, (size_t)len
);
1866 struct timeval tv
={0,0};
1867 HarvestTimestamp(&msgh
, &tv
);
1869 memset(&dest
, 0, sizeof(dest
)); // this makes sure we ignore this address if not returned by recvmsg above
1870 auto loc
= rplookup(g_listenSocketsAddresses
, fd
);
1871 if(HarvestDestinationAddress(&msgh
, &dest
)) {
1872 // but.. need to get port too
1874 dest
.sin4
.sin_port
= loc
->sin4
.sin_port
;
1881 dest
.sin4
.sin_family
= fromaddr
.sin4
.sin_family
;
1882 socklen_t slen
= dest
.getSocklen();
1883 getsockname(fd
, (sockaddr
*)&dest
, &slen
); // if this fails, we're ok with it
1886 if(g_weDistributeQueries
)
1887 distributeAsyncFunction(question
, boost::bind(doProcessUDPQuestion
, question
, fromaddr
, dest
, tv
, fd
));
1889 doProcessUDPQuestion(question
, fromaddr
, dest
, tv
, fd
);
1892 catch(MOADNSException
& mde
) {
1893 g_stats
.clientParseError
++;
1894 if(g_logCommonErrors
)
1895 L
<<Logger::Error
<<"Unable to parse packet from remote UDP client "<<fromaddr
.toString() <<": "<<mde
.what()<<endl
;
1897 catch(std::runtime_error
& e
) {
1898 g_stats
.clientParseError
++;
1899 if(g_logCommonErrors
)
1900 L
<<Logger::Error
<<"Unable to parse packet from remote UDP client "<<fromaddr
.toString() <<": "<<e
.what()<<endl
;
1904 // cerr<<t_id<<" had error: "<<stringerror()<<endl;
1905 if(firstQuery
&& errno
== EAGAIN
)
1906 g_stats
.noPacketError
++;
1912 static void makeTCPServerSockets(unsigned int threadId
)
1915 vector
<string
>locals
;
1916 stringtok(locals
,::arg()["local-address"]," ,");
1919 throw PDNSException("No local address specified");
1921 for(vector
<string
>::const_iterator i
=locals
.begin();i
!=locals
.end();++i
) {
1923 st
.port
=::arg().asNum("local-port");
1924 parseService(*i
, st
);
1928 memset((char *)&sin
,0, sizeof(sin
));
1929 sin
.sin4
.sin_family
= AF_INET
;
1930 if(!IpToU32(st
.host
, (uint32_t*)&sin
.sin4
.sin_addr
.s_addr
)) {
1931 sin
.sin6
.sin6_family
= AF_INET6
;
1932 if(makeIPv6sockaddr(st
.host
, &sin
.sin6
) < 0)
1933 throw PDNSException("Unable to resolve local address for TCP server on '"+ st
.host
+"'");
1936 fd
=socket(sin
.sin6
.sin6_family
, SOCK_STREAM
, 0);
1938 throw PDNSException("Making a TCP server socket for resolver: "+stringerror());
1943 if(setsockopt(fd
, SOL_SOCKET
, SO_REUSEADDR
, &tmp
, sizeof tmp
)<0) {
1944 L
<<Logger::Error
<<"Setsockopt failed for TCP listening socket"<<endl
;
1947 if(sin
.sin6
.sin6_family
== AF_INET6
&& setsockopt(fd
, IPPROTO_IPV6
, IPV6_V6ONLY
, &tmp
, sizeof(tmp
)) < 0) {
1948 L
<<Logger::Error
<<"Failed to set IPv6 socket to IPv6 only, continuing anyhow: "<<strerror(errno
)<<endl
;
1951 #ifdef TCP_DEFER_ACCEPT
1952 if(setsockopt(fd
, SOL_TCP
, TCP_DEFER_ACCEPT
, &tmp
, sizeof tmp
) >= 0) {
1953 if(i
==locals
.begin())
1954 L
<<Logger::Error
<<"Enabled TCP data-ready filter for (slight) DoS protection"<<endl
;
1958 if( ::arg().mustDo("non-local-bind") )
1959 Utility::setBindAny(AF_INET
, fd
);
1963 if(setsockopt(fd
, SOL_SOCKET
, SO_REUSEPORT
, &tmp
, sizeof(tmp
)) < 0)
1964 throw PDNSException("SO_REUSEPORT: "+stringerror());
1968 if (::arg().asNum("tcp-fast-open") > 0) {
1970 int fastOpenQueueSize
= ::arg().asNum("tcp-fast-open");
1971 if (setsockopt(fd
, IPPROTO_TCP
, TCP_FASTOPEN
, &fastOpenQueueSize
, sizeof fastOpenQueueSize
) < 0) {
1972 L
<<Logger::Error
<<"Failed to enable TCP Fast Open for listening socket: "<<strerror(errno
)<<endl
;
1975 L
<<Logger::Warning
<<"TCP Fast Open configured but not supported for listening socket"<<endl
;
1979 sin
.sin4
.sin_port
= htons(st
.port
);
1980 socklen_t socklen
=sin
.sin4
.sin_family
==AF_INET
? sizeof(sin
.sin4
) : sizeof(sin
.sin6
);
1981 if (::bind(fd
, (struct sockaddr
*)&sin
, socklen
)<0)
1982 throw PDNSException("Binding TCP server socket for "+ st
.host
+": "+stringerror());
1985 setSocketSendBuffer(fd
, 65000);
1987 deferredAdds
[threadId
].push_back(make_pair(fd
, handleNewTCPQuestion
));
1988 g_tcpListenSockets
.push_back(fd
);
1989 // we don't need to update g_listenSocketsAddresses since it doesn't work for TCP/IP:
1990 // - fd is not that which we know here, but returned from accept()
1991 if(sin
.sin4
.sin_family
== AF_INET
)
1992 L
<<Logger::Error
<<"Listening for TCP queries on "<< sin
.toString() <<":"<<st
.port
<<endl
;
1994 L
<<Logger::Error
<<"Listening for TCP queries on ["<< sin
.toString() <<"]:"<<st
.port
<<endl
;
1998 static void makeUDPServerSockets(unsigned int threadId
)
2001 vector
<string
>locals
;
2002 stringtok(locals
,::arg()["local-address"]," ,");
2005 throw PDNSException("No local address specified");
2007 for(vector
<string
>::const_iterator i
=locals
.begin();i
!=locals
.end();++i
) {
2009 st
.port
=::arg().asNum("local-port");
2010 parseService(*i
, st
);
2014 memset(&sin
, 0, sizeof(sin
));
2015 sin
.sin4
.sin_family
= AF_INET
;
2016 if(!IpToU32(st
.host
.c_str() , (uint32_t*)&sin
.sin4
.sin_addr
.s_addr
)) {
2017 sin
.sin6
.sin6_family
= AF_INET6
;
2018 if(makeIPv6sockaddr(st
.host
, &sin
.sin6
) < 0)
2019 throw PDNSException("Unable to resolve local address for UDP server on '"+ st
.host
+"'");
2022 int fd
=socket(sin
.sin4
.sin_family
, SOCK_DGRAM
, 0);
2024 throw PDNSException("Making a UDP server socket for resolver: "+netstringerror());
2026 if (!setSocketTimestamps(fd
))
2027 L
<<Logger::Warning
<<"Unable to enable timestamp reporting for socket"<<endl
;
2029 if(IsAnyAddress(sin
)) {
2030 if(sin
.sin4
.sin_family
== AF_INET
)
2031 if(!setsockopt(fd
, IPPROTO_IP
, GEN_IP_PKTINFO
, &one
, sizeof(one
))) // linux supports this, so why not - might fail on other systems
2032 g_fromtosockets
.insert(fd
);
2033 #ifdef IPV6_RECVPKTINFO
2034 if(sin
.sin4
.sin_family
== AF_INET6
)
2035 if(!setsockopt(fd
, IPPROTO_IPV6
, IPV6_RECVPKTINFO
, &one
, sizeof(one
)))
2036 g_fromtosockets
.insert(fd
);
2038 if(sin
.sin6
.sin6_family
== AF_INET6
&& setsockopt(fd
, IPPROTO_IPV6
, IPV6_V6ONLY
, &one
, sizeof(one
)) < 0) {
2039 L
<<Logger::Error
<<"Failed to set IPv6 socket to IPv6 only, continuing anyhow: "<<strerror(errno
)<<endl
;
2042 if( ::arg().mustDo("non-local-bind") )
2043 Utility::setBindAny(AF_INET6
, fd
);
2047 setSocketReceiveBuffer(fd
, 250000);
2048 sin
.sin4
.sin_port
= htons(st
.port
);
2053 if(setsockopt(fd
, SOL_SOCKET
, SO_REUSEPORT
, &one
, sizeof(one
)) < 0)
2054 throw PDNSException("SO_REUSEPORT: "+stringerror());
2057 socklen_t socklen
=sin
.getSocklen();
2058 if (::bind(fd
, (struct sockaddr
*)&sin
, socklen
)<0)
2059 throw PDNSException("Resolver binding to server socket on port "+ std::to_string(st
.port
) +" for "+ st
.host
+": "+stringerror());
2063 deferredAdds
[threadId
].push_back(make_pair(fd
, handleNewUDPQuestion
));
2064 g_listenSocketsAddresses
[fd
]=sin
; // this is written to only from the startup thread, not from the workers
2065 if(sin
.sin4
.sin_family
== AF_INET
)
2066 L
<<Logger::Error
<<"Listening for UDP queries on "<< sin
.toString() <<":"<<st
.port
<<endl
;
2068 L
<<Logger::Error
<<"Listening for UDP queries on ["<< sin
.toString() <<"]:"<<st
.port
<<endl
;
2072 static void daemonize(void)
2079 int i
=open("/dev/null",O_RDWR
); /* open stdin */
2081 L
<<Logger::Critical
<<"Unable to open /dev/null: "<<stringerror()<<endl
;
2083 dup2(i
,0); /* stdin */
2084 dup2(i
,1); /* stderr */
2085 dup2(i
,2); /* stderr */
2090 static void usr1Handler(int)
2095 static void usr2Handler(int)
2098 SyncRes::setDefaultLogMode(g_quiet
? SyncRes::LogNone
: SyncRes::Log
);
2099 ::arg().set("quiet")=g_quiet
? "" : "no";
2102 static void doStats(void)
2104 static time_t lastOutputTime
;
2105 static uint64_t lastQueryCount
;
2107 uint64_t cacheHits
= broadcastAccFunction
<uint64_t>(pleaseGetCacheHits
);
2108 uint64_t cacheMisses
= broadcastAccFunction
<uint64_t>(pleaseGetCacheMisses
);
2110 if(g_stats
.qcounter
&& (cacheHits
+ cacheMisses
) && SyncRes::s_queries
&& SyncRes::s_outqueries
) {
2111 L
<<Logger::Notice
<<"stats: "<<g_stats
.qcounter
<<" questions, "<<
2112 broadcastAccFunction
<uint64_t>(pleaseGetCacheSize
)<< " cache entries, "<<
2113 broadcastAccFunction
<uint64_t>(pleaseGetNegCacheSize
)<<" negative entries, "<<
2114 (int)((cacheHits
*100.0)/(cacheHits
+cacheMisses
))<<"% cache hits"<<endl
;
2116 L
<<Logger::Notice
<<"stats: throttle map: "
2117 << broadcastAccFunction
<uint64_t>(pleaseGetThrottleSize
) <<", ns speeds: "
2118 << broadcastAccFunction
<uint64_t>(pleaseGetNsSpeedsSize
)<<endl
;
2119 L
<<Logger::Notice
<<"stats: outpacket/query ratio "<<(int)(SyncRes::s_outqueries
*100.0/SyncRes::s_queries
)<<"%";
2120 L
<<Logger::Notice
<<", "<<(int)(SyncRes::s_throttledqueries
*100.0/(SyncRes::s_outqueries
+SyncRes::s_throttledqueries
))<<"% throttled, "
2121 <<SyncRes::s_nodelegated
<<" no-delegation drops"<<endl
;
2122 L
<<Logger::Notice
<<"stats: "<<SyncRes::s_tcpoutqueries
<<" outgoing tcp connections, "<<
2123 broadcastAccFunction
<uint64_t>(pleaseGetConcurrentQueries
)<<" queries running, "<<SyncRes::s_outgoingtimeouts
<<" outgoing timeouts"<<endl
;
2125 //L<<Logger::Notice<<"stats: "<<g_stats.ednsPingMatches<<" ping matches, "<<g_stats.ednsPingMismatches<<" mismatches, "<<
2126 //g_stats.noPingOutQueries<<" outqueries w/o ping, "<< g_stats.noEdnsOutQueries<<" w/o EDNS"<<endl;
2128 L
<<Logger::Notice
<<"stats: " << broadcastAccFunction
<uint64_t>(pleaseGetPacketCacheSize
) <<
2129 " packet cache entries, "<<(int)(100.0*broadcastAccFunction
<uint64_t>(pleaseGetPacketCacheHits
)/SyncRes::s_queries
) << "% packet cache hits"<<endl
;
2131 time_t now
= time(0);
2132 if(lastOutputTime
&& lastQueryCount
&& now
!= lastOutputTime
) {
2133 L
<<Logger::Notice
<<"stats: "<< (SyncRes::s_queries
- lastQueryCount
) / (now
- lastOutputTime
) <<" qps (average over "<< (now
- lastOutputTime
) << " seconds)"<<endl
;
2135 lastOutputTime
= now
;
2136 lastQueryCount
= SyncRes::s_queries
;
2138 else if(statsWanted
)
2139 L
<<Logger::Notice
<<"stats: no stats yet!"<<endl
;
2144 static void houseKeeping(void *)
2146 static thread_local
time_t last_stat
, last_rootupdate
, last_prune
, last_secpoll
;
2147 static thread_local
int cleanCounter
=0;
2148 static thread_local
bool s_running
; // houseKeeping can get suspended in secpoll, and be restarted, which makes us do duplicate work
2155 Utility::gettimeofday(&now
, 0);
2157 if(now
.tv_sec
- last_prune
> (time_t)(5 + t_id
)) {
2160 t_RC
->doPrune(g_maxCacheEntries
/ g_numThreads
); // this function is local to a thread, so fine anyhow
2161 t_packetCache
->doPruneTo(g_maxPacketCacheEntries
/ g_numWorkerThreads
);
2163 SyncRes::pruneNegCache(g_maxCacheEntries
/ (g_numWorkerThreads
* 10));
2165 if(!((cleanCounter
++)%40)) { // this is a full scan!
2166 time_t limit
=now
.tv_sec
-300;
2167 SyncRes::pruneNSSpeeds(limit
);
2172 if(now
.tv_sec
- last_rootupdate
> 7200) {
2173 int res
= SyncRes::getRootNS(g_now
, nullptr);
2175 last_rootupdate
=now
.tv_sec
;
2178 if (t_id
== s_handlerThreadID
) {
2179 if(g_statisticsInterval
> 0 && now
.tv_sec
- last_stat
>= g_statisticsInterval
) {
2184 else if(t_id
== s_distributorThreadID
) {
2186 if(now
.tv_sec
- last_secpoll
>= 3600) {
2188 doSecPoll(&last_secpoll
);
2190 catch(std::exception
& e
)
2192 L
<<Logger::Error
<<"Exception while performing security poll: "<<e
.what()<<endl
;
2194 catch(PDNSException
& e
)
2196 L
<<Logger::Error
<<"Exception while performing security poll: "<<e
.reason
<<endl
;
2198 catch(ImmediateServFailException
&e
)
2200 L
<<Logger::Error
<<"Exception while performing security poll: "<<e
.reason
<<endl
;
2204 L
<<Logger::Error
<<"Exception while performing security poll"<<endl
;
2211 catch(PDNSException
& ae
)
2214 L
<<Logger::Error
<<"Fatal error in housekeeping thread: "<<ae
.reason
<<endl
;
2219 static void makeThreadPipes()
2221 for(unsigned int n
=0; n
< g_numThreads
; ++n
) {
2222 struct ThreadPipeSet tps
;
2225 unixDie("Creating pipe for inter-thread communications");
2227 tps
.readToThread
= fd
[0];
2228 tps
.writeToThread
= fd
[1];
2231 unixDie("Creating pipe for inter-thread communications");
2232 tps
.readFromThread
= fd
[0];
2233 tps
.writeFromThread
= fd
[1];
2235 g_pipes
.push_back(tps
);
2245 void broadcastFunction(const pipefunc_t
& func
, bool skipSelf
)
2247 /* This function might be called by the worker with t_id 0 during startup */
2248 if (t_id
!= s_handlerThreadID
&& t_id
!= s_distributorThreadID
) {
2249 L
<<Logger::Error
<<"broadcastFunction() has been called by a worker ("<<t_id
<<")"<<endl
;
2254 for(ThreadPipeSet
& tps
: g_pipes
)
2258 func(); // don't write to ourselves!
2262 ThreadMSG
* tmsg
= new ThreadMSG();
2264 tmsg
->wantAnswer
= true;
2265 if(write(tps
.writeToThread
, &tmsg
, sizeof(tmsg
)) != sizeof(tmsg
)) {
2267 unixDie("write to thread pipe returned wrong size or error");
2271 if(read(tps
.readFromThread
, &resp
, sizeof(resp
)) != sizeof(resp
))
2272 unixDie("read from thread pipe returned wrong size or error");
2275 // cerr <<"got response: " << *resp << endl;
2281 void distributeAsyncFunction(const string
& packet
, const pipefunc_t
& func
)
2283 if (t_id
!= s_distributorThreadID
) {
2284 L
<<Logger::Error
<<"distributeAsyncFunction() has been called by a worker ("<<t_id
<<")"<<endl
;
2288 unsigned int hash
= hashQuestion(packet
.c_str(), packet
.length(), g_disthashseed
);
2289 unsigned int target
= 1 + (hash
% (g_pipes
.size()-1));
2292 L
<<Logger::Error
<<"distributeAsyncFunction() tried to assign a query to the distributor"<<endl
;
2296 ThreadPipeSet
& tps
= g_pipes
[target
];
2297 ThreadMSG
* tmsg
= new ThreadMSG();
2299 tmsg
->wantAnswer
= false;
2301 if(write(tps
.writeToThread
, &tmsg
, sizeof(tmsg
)) != sizeof(tmsg
)) {
2303 unixDie("write to thread pipe returned wrong size or error");
2307 static void handlePipeRequest(int fd
, FDMultiplexer::funcparam_t
& var
)
2309 ThreadMSG
* tmsg
= nullptr;
2311 if(read(fd
, &tmsg
, sizeof(tmsg
)) != sizeof(tmsg
)) { // fd == readToThread
2312 unixDie("read from thread pipe returned wrong size or error");
2317 resp
= tmsg
->func();
2319 catch(std::exception
& e
) {
2320 if(g_logCommonErrors
)
2321 L
<<Logger::Error
<<"PIPE function we executed created exception: "<<e
.what()<<endl
; // but what if they wanted an answer.. we send 0
2323 catch(PDNSException
& e
) {
2324 if(g_logCommonErrors
)
2325 L
<<Logger::Error
<<"PIPE function we executed created PDNS exception: "<<e
.reason
<<endl
; // but what if they wanted an answer.. we send 0
2327 if(tmsg
->wantAnswer
) {
2328 if(write(g_pipes
[t_id
].writeFromThread
, &resp
, sizeof(resp
)) != sizeof(resp
)) {
2330 unixDie("write to thread pipe returned wrong size or error");
2337 template<class T
> void *voider(const boost::function
<T
*()>& func
)
2342 vector
<ComboAddress
>& operator+=(vector
<ComboAddress
>&a
, const vector
<ComboAddress
>& b
)
2344 a
.insert(a
.end(), b
.begin(), b
.end());
2348 vector
<pair
<string
, uint16_t> >& operator+=(vector
<pair
<string
, uint16_t> >&a
, const vector
<pair
<string
, uint16_t> >& b
)
2350 a
.insert(a
.end(), b
.begin(), b
.end());
2354 vector
<pair
<DNSName
, uint16_t> >& operator+=(vector
<pair
<DNSName
, uint16_t> >&a
, const vector
<pair
<DNSName
, uint16_t> >& b
)
2356 a
.insert(a
.end(), b
.begin(), b
.end());
2361 template<class T
> T
broadcastAccFunction(const boost::function
<T
*()>& func
, bool skipSelf
)
2363 if (t_id
!= s_handlerThreadID
) {
2364 L
<<Logger::Error
<<"broadcastFunction has been called by a worker ("<<t_id
<<")"<<endl
;
2370 for(ThreadPipeSet
& tps
: g_pipes
)
2372 ThreadMSG
* tmsg
= new ThreadMSG();
2373 tmsg
->func
= boost::bind(voider
<T
>, func
);
2374 tmsg
->wantAnswer
= true;
2376 if(write(tps
.writeToThread
, &tmsg
, sizeof(tmsg
)) != sizeof(tmsg
)) {
2378 unixDie("write to thread pipe returned wrong size or error");
2382 if(read(tps
.readFromThread
, &resp
, sizeof(resp
)) != sizeof(resp
))
2383 unixDie("read from thread pipe returned wrong size or error");
2386 //~ cerr <<"got response: " << *resp << endl;
2394 template string
broadcastAccFunction(const boost::function
<string
*()>& fun
, bool skipSelf
); // explicit instantiation
2395 template uint64_t broadcastAccFunction(const boost::function
<uint64_t*()>& fun
, bool skipSelf
); // explicit instantiation
2396 template vector
<ComboAddress
> broadcastAccFunction(const boost::function
<vector
<ComboAddress
> *()>& fun
, bool skipSelf
); // explicit instantiation
2397 template vector
<pair
<DNSName
,uint16_t> > broadcastAccFunction(const boost::function
<vector
<pair
<DNSName
, uint16_t> > *()>& fun
, bool skipSelf
); // explicit instantiation
2399 static void handleRCC(int fd
, FDMultiplexer::funcparam_t
& var
)
2402 string msg
=s_rcc
.recv(&remote
);
2403 RecursorControlParser rcp
;
2404 RecursorControlParser::func_t
* command
;
2406 string answer
=rcp
.getAnswer(msg
, &command
);
2408 // If we are inside a chroot, we need to strip
2409 if (!arg()["chroot"].empty()) {
2410 size_t len
= arg()["chroot"].length();
2411 remote
= remote
.substr(len
);
2415 s_rcc
.send(answer
, &remote
);
2418 catch(std::exception
& e
) {
2419 L
<<Logger::Error
<<"Error dealing with control socket request: "<<e
.what()<<endl
;
2421 catch(PDNSException
& ae
) {
2422 L
<<Logger::Error
<<"Error dealing with control socket request: "<<ae
.reason
<<endl
;
2426 static void handleTCPClientReadable(int fd
, FDMultiplexer::funcparam_t
& var
)
2428 PacketID
* pident
=any_cast
<PacketID
>(&var
);
2429 // cerr<<"handleTCPClientReadable called for fd "<<fd<<", pident->inNeeded: "<<pident->inNeeded<<", "<<pident->sock->getHandle()<<endl;
2431 shared_array
<char> buffer(new char[pident
->inNeeded
]);
2433 ssize_t ret
=recv(fd
, buffer
.get(), pident
->inNeeded
,0);
2435 pident
->inMSG
.append(&buffer
[0], &buffer
[ret
]);
2436 pident
->inNeeded
-=(size_t)ret
;
2437 if(!pident
->inNeeded
|| pident
->inIncompleteOkay
) {
2438 // cerr<<"Got entire load of "<<pident->inMSG.size()<<" bytes"<<endl;
2439 PacketID pid
=*pident
;
2440 string msg
=pident
->inMSG
;
2442 t_fdm
->removeReadFD(fd
);
2443 MT
->sendEvent(pid
, &msg
);
2446 // cerr<<"Still have "<<pident->inNeeded<<" left to go"<<endl;
2450 PacketID tmp
=*pident
;
2451 t_fdm
->removeReadFD(fd
); // pident might now be invalid (it isn't, but still)
2453 MT
->sendEvent(tmp
, &empty
); // this conveys error status
2457 static void handleTCPClientWritable(int fd
, FDMultiplexer::funcparam_t
& var
)
2459 PacketID
* pid
=any_cast
<PacketID
>(&var
);
2460 ssize_t ret
=send(fd
, pid
->outMSG
.c_str() + pid
->outPos
, pid
->outMSG
.size() - pid
->outPos
,0);
2462 pid
->outPos
+=(ssize_t
)ret
;
2463 if(pid
->outPos
==pid
->outMSG
.size()) {
2465 t_fdm
->removeWriteFD(fd
);
2466 MT
->sendEvent(tmp
, &tmp
.outMSG
); // send back what we sent to convey everything is ok
2469 else { // error or EOF
2471 t_fdm
->removeWriteFD(fd
);
2473 MT
->sendEvent(tmp
, &sent
); // we convey error status by sending empty string
2477 // resend event to everybody chained onto it
2478 static void doResends(MT_t::waiters_t::iterator
& iter
, PacketID resend
, const string
& content
)
2480 if(iter
->key
.chain
.empty())
2482 // cerr<<"doResends called!\n";
2483 for(PacketID::chain_t::iterator i
=iter
->key
.chain
.begin(); i
!= iter
->key
.chain
.end() ; ++i
) {
2486 // cerr<<"\tResending "<<content.size()<<" bytes for fd="<<resend.fd<<" and id="<<resend.id<<endl;
2488 MT
->sendEvent(resend
, &content
);
2489 g_stats
.chainResends
++;
2493 static void handleUDPServerResponse(int fd
, FDMultiplexer::funcparam_t
& var
)
2495 PacketID pid
=any_cast
<PacketID
>(var
);
2497 char data
[g_outgoingEDNSBufsize
];
2498 ComboAddress fromaddr
;
2499 socklen_t addrlen
=sizeof(fromaddr
);
2501 len
=recvfrom(fd
, data
, sizeof(data
), 0, (sockaddr
*)&fromaddr
, &addrlen
);
2503 if(len
< (ssize_t
) sizeof(dnsheader
)) {
2505 ; // cerr<<"Error on fd "<<fd<<": "<<stringerror()<<"\n";
2507 g_stats
.serverParseError
++;
2508 if(g_logCommonErrors
)
2509 L
<<Logger::Error
<<"Unable to parse packet from remote UDP server "<< fromaddr
.toString() <<
2510 ": packet smaller than DNS header"<<endl
;
2513 t_udpclientsocks
->returnSocket(fd
);
2516 MT_t::waiters_t::iterator iter
=MT
->d_waiters
.find(pid
);
2517 if(iter
!= MT
->d_waiters
.end())
2518 doResends(iter
, pid
, empty
);
2520 MT
->sendEvent(pid
, &empty
); // this denotes error (does lookup again.. at least L1 will be hot)
2525 memcpy(&dh
, data
, sizeof(dh
));
2528 pident
.remote
=fromaddr
;
2532 if(!dh
.qr
&& g_logCommonErrors
) {
2533 L
<<Logger::Notice
<<"Not taking data from question on outgoing socket from "<< fromaddr
.toStringWithPort() <<endl
;
2536 if(!dh
.qdcount
|| // UPC, Nominum, very old BIND on FormErr, NSD
2537 !dh
.qr
) { // one weird server
2538 pident
.domain
.clear();
2544 pident
.domain
=DNSName(data
, len
, 12, false, &pident
.type
); // don't copy this from above - we need to do the actual read
2546 catch(std::exception
& e
) {
2547 g_stats
.serverParseError
++; // won't be fed to lwres.cc, so we have to increment
2548 L
<<Logger::Warning
<<"Error in packet from remote nameserver "<< fromaddr
.toStringWithPort() << ": "<<e
.what() << endl
;
2553 packet
.assign(data
, len
);
2555 MT_t::waiters_t::iterator iter
=MT
->d_waiters
.find(pident
);
2556 if(iter
!= MT
->d_waiters
.end()) {
2557 doResends(iter
, pident
, packet
);
2562 if(!MT
->sendEvent(pident
, &packet
)) {
2563 // we do a full scan for outstanding queries on unexpected answers. not too bad since we only accept them on the right port number, which is hard enough to guess
2564 for(MT_t::waiters_t::iterator mthread
=MT
->d_waiters
.begin(); mthread
!=MT
->d_waiters
.end(); ++mthread
) {
2565 if(pident
.fd
==mthread
->key
.fd
&& mthread
->key
.remote
==pident
.remote
&& mthread
->key
.type
== pident
.type
&&
2566 pident
.domain
== mthread
->key
.domain
) {
2567 mthread
->key
.nearMisses
++;
2570 // be a bit paranoid here since we're weakening our matching
2571 if(pident
.domain
.empty() && !mthread
->key
.domain
.empty() && !pident
.type
&& mthread
->key
.type
&&
2572 pident
.id
== mthread
->key
.id
&& mthread
->key
.remote
== pident
.remote
) {
2573 // cerr<<"Empty response, rest matches though, sending to a waiter"<<endl;
2574 pident
.domain
= mthread
->key
.domain
;
2575 pident
.type
= mthread
->key
.type
;
2576 goto retryWithName
; // note that this only passes on an error, lwres will still reject the packet
2579 g_stats
.unexpectedCount
++; // if we made it here, it really is an unexpected answer
2580 if(g_logCommonErrors
) {
2581 L
<<Logger::Warning
<<"Discarding unexpected packet from "<<fromaddr
.toStringWithPort()<<": "<< (pident
.domain
.empty() ? "<empty>" : pident
.domain
.toString())<<", "<<pident
.type
<<", "<<MT
->d_waiters
.size()<<" waiters"<<endl
;
2585 t_udpclientsocks
->returnSocket(fd
);
2589 FDMultiplexer
* getMultiplexer()
2592 for(const auto& i
: FDMultiplexer::getMultiplexerMap()) {
2597 catch(FDMultiplexerException
&fe
) {
2598 L
<<Logger::Error
<<"Non-fatal error initializing possible multiplexer ("<<fe
.what()<<"), falling back"<<endl
;
2601 L
<<Logger::Error
<<"Non-fatal error initializing possible multiplexer"<<endl
;
2604 L
<<Logger::Error
<<"No working multiplexer found!"<<endl
;
2609 static string
* doReloadLuaScript()
2611 string fname
= ::arg()["lua-dns-script"];
2615 L
<<Logger::Error
<<t_id
<<" Unloaded current lua script"<<endl
;
2616 return new string("unloaded\n");
2619 t_pdl
= std::make_shared
<RecursorLua4
>(fname
);
2622 catch(std::exception
& e
) {
2623 L
<<Logger::Error
<<t_id
<<" Retaining current script, error from '"<<fname
<<"': "<< e
.what() <<endl
;
2624 return new string("retaining current script, error from '"+fname
+"': "+e
.what()+"\n");
2627 L
<<Logger::Warning
<<t_id
<<" (Re)loaded lua script from '"<<fname
<<"'"<<endl
;
2628 return new string("(re)loaded '"+fname
+"'\n");
2631 string
doQueueReloadLuaScript(vector
<string
>::const_iterator begin
, vector
<string
>::const_iterator end
)
2634 ::arg().set("lua-dns-script") = *begin
;
2636 return broadcastAccFunction
<string
>(doReloadLuaScript
);
2639 static string
* pleaseUseNewTraceRegex(const std::string
& newRegex
)
2642 if(newRegex
.empty()) {
2643 t_traceRegex
.reset();
2644 return new string("unset\n");
2647 t_traceRegex
= std::make_shared
<Regex
>(newRegex
);
2648 return new string("ok\n");
2651 catch(PDNSException
& ae
)
2653 return new string(ae
.reason
+"\n");
2656 string
doTraceRegex(vector
<string
>::const_iterator begin
, vector
<string
>::const_iterator end
)
2658 return broadcastAccFunction
<string
>(boost::bind(pleaseUseNewTraceRegex
, begin
!=end
? *begin
: ""));
2661 static void checkLinuxIPv6Limits()
2665 if(readFileIfThere("/proc/sys/net/ipv6/route/max_size", &line
)) {
2666 int lim
=std::stoi(line
);
2668 L
<<Logger::Error
<<"If using IPv6, please raise sysctl net.ipv6.route.max_size, currently set to "<<lim
<<" which is < 16384"<<endl
;
2673 static void checkOrFixFDS()
2675 unsigned int availFDs
=getFilenumLimit();
2676 unsigned int wantFDs
= g_maxMThreads
* g_numWorkerThreads
+25; // even healthier margin then before
2678 if(wantFDs
> availFDs
) {
2679 unsigned int hardlimit
= getFilenumLimit(true);
2680 if(hardlimit
>= wantFDs
) {
2681 setFilenumLimit(wantFDs
);
2682 L
<<Logger::Warning
<<"Raised soft limit on number of filedescriptors to "<<wantFDs
<<" to match max-mthreads and threads settings"<<endl
;
2685 int newval
= (hardlimit
- 25) / g_numWorkerThreads
;
2686 L
<<Logger::Warning
<<"Insufficient number of filedescriptors available for max-mthreads*threads setting! ("<<hardlimit
<<" < "<<wantFDs
<<"), reducing max-mthreads to "<<newval
<<endl
;
2687 g_maxMThreads
= newval
;
2688 setFilenumLimit(hardlimit
);
2693 static void* recursorThread(int tid
, bool worker
);
2695 static void* pleaseSupplantACLs(std::shared_ptr
<NetmaskGroup
> ng
)
2706 static bool l_initialized
;
2708 if(l_initialized
) { // only reload configuration file on second call
2709 string configname
=::arg()["config-dir"]+"/recursor.conf";
2710 if(::arg()["config-name"]!="") {
2711 configname
=::arg()["config-dir"]+"/recursor-"+::arg()["config-name"]+".conf";
2713 cleanSlashes(configname
);
2715 if(!::arg().preParseFile(configname
.c_str(), "allow-from-file"))
2716 throw runtime_error("Unable to re-parse configuration file '"+configname
+"'");
2717 ::arg().preParseFile(configname
.c_str(), "allow-from", LOCAL_NETS
);
2718 ::arg().preParseFile(configname
.c_str(), "include-dir");
2719 ::arg().preParse(g_argc
, g_argv
, "include-dir");
2721 // then process includes
2722 std::vector
<std::string
> extraConfigs
;
2723 ::arg().gatherIncludes(extraConfigs
);
2725 for(const std::string
& fn
: extraConfigs
) {
2726 if(!::arg().preParseFile(fn
.c_str(), "allow-from-file", ::arg()["allow-from-file"]))
2727 throw runtime_error("Unable to re-parse configuration file include '"+fn
+"'");
2728 if(!::arg().preParseFile(fn
.c_str(), "allow-from", ::arg()["allow-from"]))
2729 throw runtime_error("Unable to re-parse configuration file include '"+fn
+"'");
2732 ::arg().preParse(g_argc
, g_argv
, "allow-from-file");
2733 ::arg().preParse(g_argc
, g_argv
, "allow-from");
2736 std::shared_ptr
<NetmaskGroup
> oldAllowFrom
= t_allowFrom
;
2737 std::shared_ptr
<NetmaskGroup
> allowFrom
= std::make_shared
<NetmaskGroup
>();
2739 if(!::arg()["allow-from-file"].empty()) {
2741 ifstream
ifs(::arg()["allow-from-file"].c_str());
2743 throw runtime_error("Could not open '"+::arg()["allow-from-file"]+"': "+stringerror());
2746 string::size_type pos
;
2747 while(getline(ifs
,line
)) {
2749 if(pos
!=string::npos
)
2755 allowFrom
->addMask(line
);
2757 L
<<Logger::Warning
<<"Done parsing " << allowFrom
->size() <<" allow-from ranges from file '"<<::arg()["allow-from-file"]<<"' - overriding 'allow-from' setting"<<endl
;
2759 else if(!::arg()["allow-from"].empty()) {
2761 stringtok(ips
, ::arg()["allow-from"], ", ");
2763 L
<<Logger::Warning
<<"Only allowing queries from: ";
2764 for(vector
<string
>::const_iterator i
= ips
.begin(); i
!= ips
.end(); ++i
) {
2765 allowFrom
->addMask(*i
);
2767 L
<<Logger::Warning
<<", ";
2768 L
<<Logger::Warning
<<*i
;
2770 L
<<Logger::Warning
<<endl
;
2773 if(::arg()["local-address"]!="127.0.0.1" && ::arg().asNum("local-port")==53)
2774 L
<<Logger::Error
<<"WARNING: Allowing queries from all IP addresses - this can be a security risk!"<<endl
;
2775 allowFrom
= nullptr;
2778 g_initialAllowFrom
= allowFrom
;
2779 broadcastFunction(boost::bind(pleaseSupplantACLs
, allowFrom
));
2780 oldAllowFrom
= nullptr;
2782 l_initialized
= true;
2786 static void setupDelegationOnly()
2788 vector
<string
> parts
;
2789 stringtok(parts
, ::arg()["delegation-only"], ", \t");
2790 for(const auto& p
: parts
) {
2791 SyncRes::addDelegationOnly(DNSName(p
));
2795 static std::map
<unsigned int, std::set
<int> > parseCPUMap()
2797 std::map
<unsigned int, std::set
<int> > result
;
2799 const std::string value
= ::arg()["cpu-map"];
2801 if (!value
.empty() && !isSettingThreadCPUAffinitySupported()) {
2802 L
<<Logger::Warning
<<"CPU mapping requested but not supported, skipping"<<endl
;
2806 std::vector
<std::string
> parts
;
2808 stringtok(parts
, value
, " \t");
2810 for(const auto& part
: parts
) {
2811 if (part
.find('=') == string::npos
)
2815 auto headers
= splitField(part
, '=');
2816 trim(headers
.first
);
2817 trim(headers
.second
);
2819 unsigned int threadId
= pdns_stou(headers
.first
);
2820 std::vector
<std::string
> cpus
;
2822 stringtok(cpus
, headers
.second
, ",");
2824 for(const auto& cpu
: cpus
) {
2825 int cpuId
= std::stoi(cpu
);
2827 result
[threadId
].insert(cpuId
);
2830 catch(const std::exception
& e
) {
2831 L
<<Logger::Error
<<"Error parsing cpu-map entry '"<<part
<<"': "<<e
.what()<<endl
;
2838 static void setCPUMap(const std::map
<unsigned int, std::set
<int> >& cpusMap
, unsigned int n
, pthread_t tid
)
2840 const auto& cpuMapping
= cpusMap
.find(n
);
2841 if (cpuMapping
!= cpusMap
.cend()) {
2842 int rc
= mapThreadToCPUList(tid
, cpuMapping
->second
);
2844 L
<<Logger::Info
<<"CPU affinity for worker "<<n
<<" has been set to CPU map:";
2845 for (const auto cpu
: cpuMapping
->second
) {
2846 L
<<Logger::Info
<<" "<<cpu
;
2848 L
<<Logger::Info
<<endl
;
2851 L
<<Logger::Warning
<<"Error setting CPU affinity for worker "<<n
<<" to CPU map:";
2852 for (const auto cpu
: cpuMapping
->second
) {
2853 L
<<Logger::Info
<<" "<<cpu
;
2855 L
<<Logger::Info
<<strerror(rc
)<<endl
;
2860 static int serviceMain(int argc
, char*argv
[])
2862 L
.setName(s_programname
);
2863 L
.disableSyslog(::arg().mustDo("disable-syslog"));
2864 L
.setTimestamps(::arg().mustDo("log-timestamp"));
2866 if(!::arg()["logging-facility"].empty()) {
2867 int val
=logFacilityToLOG(::arg().asNum("logging-facility") );
2869 theL().setFacility(val
);
2871 L
<<Logger::Error
<<"Unknown logging facility "<<::arg().asNum("logging-facility") <<endl
;
2874 showProductVersion();
2875 seedRandom(::arg()["entropy-source"]);
2877 g_disthashseed
=dns_random(0xffffffff);
2879 checkLinuxIPv6Limits();
2881 vector
<string
> addrs
;
2882 if(!::arg()["query-local-address6"].empty()) {
2883 SyncRes::s_doIPv6
=true;
2884 L
<<Logger::Warning
<<"Enabling IPv6 transport for outgoing queries"<<endl
;
2886 stringtok(addrs
, ::arg()["query-local-address6"], ", ;");
2887 for(const string
& addr
: addrs
) {
2888 g_localQueryAddresses6
.push_back(ComboAddress(addr
));
2892 L
<<Logger::Warning
<<"NOT using IPv6 for outgoing queries - set 'query-local-address6=::' to enable"<<endl
;
2895 stringtok(addrs
, ::arg()["query-local-address"], ", ;");
2896 for(const string
& addr
: addrs
) {
2897 g_localQueryAddresses4
.push_back(ComboAddress(addr
));
2900 catch(std::exception
& e
) {
2901 L
<<Logger::Error
<<"Assigning local query addresses: "<<e
.what();
2905 // keep this ABOVE loadRecursorLuaConfig!
2906 if(::arg()["dnssec"]=="off")
2907 g_dnssecmode
=DNSSECMode::Off
;
2908 else if(::arg()["dnssec"]=="process-no-validate")
2909 g_dnssecmode
=DNSSECMode::ProcessNoValidate
;
2910 else if(::arg()["dnssec"]=="process")
2911 g_dnssecmode
=DNSSECMode::Process
;
2912 else if(::arg()["dnssec"]=="validate")
2913 g_dnssecmode
=DNSSECMode::ValidateAll
;
2914 else if(::arg()["dnssec"]=="log-fail")
2915 g_dnssecmode
=DNSSECMode::ValidateForLog
;
2917 L
<<Logger::Error
<<"Unknown DNSSEC mode "<<::arg()["dnssec"]<<endl
;
2921 g_dnssecLogBogus
= ::arg().mustDo("dnssec-log-bogus");
2922 g_maxNSEC3Iterations
= ::arg().asNum("nsec3-max-iterations");
2924 g_maxCacheEntries
= ::arg().asNum("max-cache-entries");
2925 g_maxPacketCacheEntries
= ::arg().asNum("max-packetcache-entries");
2928 loadRecursorLuaConfig(::arg()["lua-config-file"], ::arg().mustDo("daemon"));
2930 catch (PDNSException
&e
) {
2931 L
<<Logger::Error
<<"Cannot load Lua configuration: "<<e
.reason
<<endl
;
2936 sortPublicSuffixList();
2938 if(!::arg()["dont-query"].empty()) {
2940 stringtok(ips
, ::arg()["dont-query"], ", ");
2941 ips
.push_back("0.0.0.0");
2942 ips
.push_back("::");
2944 L
<<Logger::Warning
<<"Will not send queries to: ";
2945 for(vector
<string
>::const_iterator i
= ips
.begin(); i
!= ips
.end(); ++i
) {
2946 SyncRes::addDontQuery(*i
);
2948 L
<<Logger::Warning
<<", ";
2949 L
<<Logger::Warning
<<*i
;
2951 L
<<Logger::Warning
<<endl
;
2954 g_quiet
=::arg().mustDo("quiet");
2956 g_weDistributeQueries
= ::arg().mustDo("pdns-distributes-queries");
2957 if(g_weDistributeQueries
) {
2958 L
<<Logger::Warning
<<"PowerDNS Recursor itself will distribute queries over threads"<<endl
;
2961 setupDelegationOnly();
2962 g_outgoingEDNSBufsize
=::arg().asNum("edns-outgoing-bufsize");
2964 if(::arg()["trace"]=="fail") {
2965 SyncRes::setDefaultLogMode(SyncRes::Store
);
2967 else if(::arg().mustDo("trace")) {
2968 SyncRes::setDefaultLogMode(SyncRes::Log
);
2969 ::arg().set("quiet")="no";
2974 SyncRes::s_minimumTTL
= ::arg().asNum("minimum-ttl-override");
2976 SyncRes::s_nopacketcache
= ::arg().mustDo("disable-packetcache");
2978 SyncRes::s_maxnegttl
=::arg().asNum("max-negative-ttl");
2979 SyncRes::s_maxcachettl
=max(::arg().asNum("max-cache-ttl"), 15);
2980 SyncRes::s_packetcachettl
=::arg().asNum("packetcache-ttl");
2981 // Cap the packetcache-servfail-ttl to the packetcache-ttl
2982 uint32_t packetCacheServFailTTL
= ::arg().asNum("packetcache-servfail-ttl");
2983 SyncRes::s_packetcacheservfailttl
=(packetCacheServFailTTL
> SyncRes::s_packetcachettl
) ? SyncRes::s_packetcachettl
: packetCacheServFailTTL
;
2984 SyncRes::s_serverdownmaxfails
=::arg().asNum("server-down-max-fails");
2985 SyncRes::s_serverdownthrottletime
=::arg().asNum("server-down-throttle-time");
2986 SyncRes::s_serverID
=::arg()["server-id"];
2987 SyncRes::s_maxqperq
=::arg().asNum("max-qperq");
2988 SyncRes::s_maxtotusec
=1000*::arg().asNum("max-total-msec");
2989 SyncRes::s_maxdepth
=::arg().asNum("max-recursion-depth");
2990 SyncRes::s_rootNXTrust
= ::arg().mustDo( "root-nx-trust");
2991 if(SyncRes::s_serverID
.empty()) {
2993 gethostname(tmp
, sizeof(tmp
)-1);
2994 SyncRes::s_serverID
=tmp
;
2997 SyncRes::s_ecsipv4limit
= ::arg().asNum("ecs-ipv4-bits");
2998 SyncRes::s_ecsipv6limit
= ::arg().asNum("ecs-ipv6-bits");
3000 if (!::arg().isEmpty("ecs-scope-zero-address")) {
3001 ComboAddress
scopeZero(::arg()["ecs-scope-zero-address"]);
3002 SyncRes::setECSScopeZeroAddress(Netmask(scopeZero
, scopeZero
.isIPv4() ? 32 : 128));
3006 for (const auto& addr
: g_localQueryAddresses4
) {
3007 if (!IsAnyAddress(addr
)) {
3008 SyncRes::setECSScopeZeroAddress(Netmask(addr
, 32));
3014 for (const auto& addr
: g_localQueryAddresses6
) {
3015 if (!IsAnyAddress(addr
)) {
3016 SyncRes::setECSScopeZeroAddress(Netmask(addr
, 128));
3022 SyncRes::setECSScopeZeroAddress(Netmask("127.0.0.1/32"));
3027 g_networkTimeoutMsec
= ::arg().asNum("network-timeout");
3029 g_initialDomainMap
= parseAuthAndForwards();
3031 g_latencyStatSize
=::arg().asNum("latency-statistic-size");
3033 g_logCommonErrors
=::arg().mustDo("log-common-errors");
3034 g_logRPZChanges
= ::arg().mustDo("log-rpz-changes");
3036 g_anyToTcp
= ::arg().mustDo("any-to-tcp");
3037 g_udpTruncationThreshold
= ::arg().asNum("udp-truncation-threshold");
3039 g_lowercaseOutgoing
= ::arg().mustDo("lowercase-outgoing");
3041 g_numWorkerThreads
= ::arg().asNum("threads");
3042 if (g_numWorkerThreads
< 1) {
3043 L
<<Logger::Warning
<<"Asked to run with 0 threads, raising to 1 instead"<<endl
;
3044 g_numWorkerThreads
= 1;
3047 g_numThreads
= g_numWorkerThreads
+ g_weDistributeQueries
;
3048 g_maxMThreads
= ::arg().asNum("max-mthreads");
3050 g_gettagNeedsEDNSOptions
= ::arg().mustDo("gettag-needs-edns-options");
3052 g_statisticsInterval
= ::arg().asNum("statistics-interval");
3055 g_reusePort
= ::arg().mustDo("reuseport");
3058 g_useOneSocketPerThread
= (!g_weDistributeQueries
&& g_reusePort
);
3060 if (g_useOneSocketPerThread
) {
3061 for (unsigned int threadId
= 0; threadId
< g_numWorkerThreads
; threadId
++) {
3062 makeUDPServerSockets(threadId
);
3063 makeTCPServerSockets(threadId
);
3067 makeUDPServerSockets(0);
3068 makeTCPServerSockets(0);
3071 SyncRes::parseEDNSSubnetWhitelist(::arg()["edns-subnet-whitelist"]);
3072 g_useIncomingECS
= ::arg().mustDo("use-incoming-edns-subnet");
3075 for(forks
= 0; forks
< ::arg().asNum("processes") - 1; ++forks
) {
3076 if(!fork()) // we are child
3080 if(::arg().mustDo("daemon")) {
3081 L
<<Logger::Warning
<<"Calling daemonize, going to background"<<endl
;
3082 L
.toConsole(Logger::Critical
);
3084 loadRecursorLuaConfig(::arg()["lua-config-file"], false);
3086 signal(SIGUSR1
,usr1Handler
);
3087 signal(SIGUSR2
,usr2Handler
);
3088 signal(SIGPIPE
,SIG_IGN
);
3092 #ifdef HAVE_LIBSODIUM
3093 if (sodium_init() == -1) {
3094 L
<<Logger::Error
<<"Unable to initialize sodium crypto library"<<endl
;
3099 openssl_thread_setup();
3103 if(!::arg()["setgid"].empty())
3104 newgid
=Utility::makeGidNumeric(::arg()["setgid"]);
3106 if(!::arg()["setuid"].empty())
3107 newuid
=Utility::makeUidNumeric(::arg()["setuid"]);
3109 Utility::dropGroupPrivs(newuid
, newgid
);
3111 if (!::arg()["chroot"].empty()) {
3114 ns
= getenv("NOTIFY_SOCKET");
3115 if (ns
!= nullptr) {
3116 L
<<Logger::Error
<<"Unable to chroot when running from systemd. Please disable chroot= or set the 'Type' for this service to 'simple'"<<endl
;
3120 if (chroot(::arg()["chroot"].c_str())<0 || chdir("/") < 0) {
3121 L
<<Logger::Error
<<"Unable to chroot to '"+::arg()["chroot"]+"': "<<strerror (errno
)<<", exiting"<<endl
;
3125 L
<<Logger::Error
<<"Chrooted to '"<<::arg()["chroot"]<<"'"<<endl
;
3128 s_pidfname
=::arg()["socket-dir"]+"/"+s_programname
+".pid";
3129 if(!s_pidfname
.empty())
3130 unlink(s_pidfname
.c_str()); // remove possible old pid file
3133 makeControlChannelSocket( ::arg().asNum("processes") > 1 ? forks
: -1);
3135 Utility::dropUserPrivs(newuid
);
3139 g_tcpTimeout
=::arg().asNum("client-tcp-timeout");
3140 g_maxTCPPerClient
=::arg().asNum("max-tcp-per-client");
3141 g_tcpMaxQueriesPerConn
=::arg().asNum("max-tcp-queries-per-connection");
3143 if (::arg().mustDo("snmp-agent")) {
3144 g_snmpAgent
= std::make_shared
<RecursorSNMPAgent
>("recursor", ::arg()["snmp-master-socket"]);
3148 /* This thread handles the web server, carbon, statistics and the control channel */
3149 std::thread
handlerThread(recursorThread
, s_handlerThreadID
, false);
3151 const auto cpusMap
= parseCPUMap();
3153 std::vector
<std::thread
> workers(g_numThreads
);
3154 if(g_numThreads
== 1) {
3155 L
<<Logger::Warning
<<"Operating unthreaded"<<endl
;
3157 sd_notify(0, "READY=1");
3159 setCPUMap(cpusMap
, 0, pthread_self());
3160 recursorThread(0, true);
3163 L
<<Logger::Warning
<<"Launching "<< g_numThreads
<<" threads"<<endl
;
3164 for(unsigned int n
=0; n
< g_numThreads
; ++n
) {
3165 workers
[n
] = std::thread(recursorThread
, n
, true);
3167 setCPUMap(cpusMap
, n
, workers
[n
].native_handle());
3170 sd_notify(0, "READY=1");
3172 workers
.back().join();
3177 static void* recursorThread(int n
, bool worker
)
3181 SyncRes
tmp(g_now
); // make sure it allocates tsstorage before we do anything, like primeHints or so..
3182 SyncRes::setDomainMap(g_initialDomainMap
);
3183 t_allowFrom
= g_initialAllowFrom
;
3184 t_udpclientsocks
= std::unique_ptr
<UDPClientSocks
>(new UDPClientSocks());
3185 t_tcpClientCounts
= std::unique_ptr
<tcpClientCounts_t
>(new tcpClientCounts_t());
3188 t_packetCache
= std::unique_ptr
<RecursorPacketCache
>(new RecursorPacketCache());
3190 #ifdef HAVE_PROTOBUF
3191 t_uuidGenerator
= std::unique_ptr
<boost::uuids::random_generator
>(new boost::uuids::random_generator());
3193 L
<<Logger::Warning
<<"Done priming cache with root hints"<<endl
;
3196 if(!::arg()["lua-dns-script"].empty()) {
3197 t_pdl
= std::make_shared
<RecursorLua4
>(::arg()["lua-dns-script"]);
3198 L
<<Logger::Warning
<<"Loaded 'lua' script from '"<<::arg()["lua-dns-script"]<<"'"<<endl
;
3201 catch(std::exception
&e
) {
3202 L
<<Logger::Error
<<"Failed to load 'lua' script from '"<<::arg()["lua-dns-script"]<<"': "<<e
.what()<<endl
;
3206 unsigned int ringsize
=::arg().asNum("stats-ringbuffer-entries") / g_numWorkerThreads
;
3208 t_remotes
= std::unique_ptr
<addrringbuf_t
>(new addrringbuf_t());
3209 if(g_weDistributeQueries
) // if so, only 1 thread does recvfrom
3210 t_remotes
->set_capacity(::arg().asNum("stats-ringbuffer-entries"));
3212 t_remotes
->set_capacity(ringsize
);
3213 t_servfailremotes
= std::unique_ptr
<addrringbuf_t
>(new addrringbuf_t());
3214 t_servfailremotes
->set_capacity(ringsize
);
3215 t_largeanswerremotes
= std::unique_ptr
<addrringbuf_t
>(new addrringbuf_t());
3216 t_largeanswerremotes
->set_capacity(ringsize
);
3218 t_queryring
= std::unique_ptr
<boost::circular_buffer
<pair
<DNSName
, uint16_t> > >(new boost::circular_buffer
<pair
<DNSName
, uint16_t> >());
3219 t_queryring
->set_capacity(ringsize
);
3220 t_servfailqueryring
= std::unique_ptr
<boost::circular_buffer
<pair
<DNSName
, uint16_t> > >(new boost::circular_buffer
<pair
<DNSName
, uint16_t> >());
3221 t_servfailqueryring
->set_capacity(ringsize
);
3224 MT
=std::unique_ptr
<MTasker
<PacketID
,string
> >(new MTasker
<PacketID
,string
>(::arg().asNum("stack-size")));
3228 t_fdm
=getMultiplexer();
3231 if(::arg().mustDo("webserver")) {
3232 L
<<Logger::Warning
<< "Enabling web server" << endl
;
3234 new RecursorWebServer(t_fdm
);
3236 catch(PDNSException
&e
) {
3237 L
<<Logger::Error
<<"Exception: "<<e
.reason
<<endl
;
3241 L
<<Logger::Error
<<"Enabled '"<< t_fdm
->getName() << "' multiplexer"<<endl
;
3244 t_fdm
->addReadFD(g_pipes
[t_id
].readToThread
, handlePipeRequest
);
3246 if(g_useOneSocketPerThread
) {
3247 for(deferredAdd_t::const_iterator i
= deferredAdds
[t_id
].cbegin(); i
!= deferredAdds
[t_id
].cend(); ++i
) {
3248 t_fdm
->addReadFD(i
->first
, i
->second
);
3252 if(!g_weDistributeQueries
|| t_id
== s_distributorThreadID
) { // if we distribute queries, only t_id = 0 listens
3253 for(deferredAdd_t::const_iterator i
= deferredAdds
[0].cbegin(); i
!= deferredAdds
[0].cend(); ++i
) {
3254 t_fdm
->addReadFD(i
->first
, i
->second
);
3263 t_fdm
->addReadFD(s_rcc
.d_fd
, handleRCC
); // control channel
3266 unsigned int maxTcpClients
=::arg().asNum("max-tcp-clients");
3268 bool listenOnTCP(true);
3270 time_t last_carbon
=0;
3271 time_t carbonInterval
=::arg().asNum("carbon-interval");
3272 counter
.store(0); // used to periodically execute certain tasks
3274 while(MT
->schedule(&g_now
)); // MTasker letting the mthreads do their thing
3276 if(!(counter
%500)) {
3277 MT
->makeThread(houseKeeping
, 0);
3281 typedef vector
<pair
<int, FDMultiplexer::funcparam_t
> > expired_t
;
3282 expired_t expired
=t_fdm
->getTimeouts(g_now
);
3284 for(expired_t::iterator i
=expired
.begin() ; i
!= expired
.end(); ++i
) {
3285 shared_ptr
<TCPConnection
> conn
=any_cast
<shared_ptr
<TCPConnection
> >(i
->second
);
3286 if(g_logCommonErrors
)
3287 L
<<Logger::Warning
<<"Timeout from remote TCP client "<< conn
->d_remote
.toString() <<endl
;
3288 t_fdm
->removeReadFD(i
->first
);
3294 if(!worker
&& statsWanted
) {
3298 Utility::gettimeofday(&g_now
, 0);
3300 if(!worker
&& (g_now
.tv_sec
- last_carbon
>= carbonInterval
)) {
3301 MT
->makeThread(doCarbonDump
, 0);
3302 last_carbon
= g_now
.tv_sec
;
3306 // 'run' updates g_now for us
3308 if(worker
&& (!g_weDistributeQueries
|| t_id
== s_distributorThreadID
)) { // if pdns distributes queries, only tid 0 should do this
3310 if(TCPConnection::getCurrentConnections() > maxTcpClients
) { // shutdown, too many connections
3311 for(tcpListenSockets_t::iterator i
=g_tcpListenSockets
.begin(); i
!= g_tcpListenSockets
.end(); ++i
)
3312 t_fdm
->removeReadFD(*i
);
3317 if(TCPConnection::getCurrentConnections() <= maxTcpClients
) { // reenable
3318 for(tcpListenSockets_t::iterator i
=g_tcpListenSockets
.begin(); i
!= g_tcpListenSockets
.end(); ++i
)
3319 t_fdm
->addReadFD(*i
, handleNewTCPQuestion
);
3326 catch(PDNSException
&ae
) {
3327 L
<<Logger::Error
<<"Exception: "<<ae
.reason
<<endl
;
3330 catch(std::exception
&e
) {
3331 L
<<Logger::Error
<<"STL Exception: "<<e
.what()<<endl
;
3335 L
<<Logger::Error
<<"any other exception in main: "<<endl
;
3340 int main(int argc
, char **argv
)
3344 g_stats
.startupTime
=time(0);
3345 versionSetProduct(ProductRecursor
);
3349 int ret
= EXIT_SUCCESS
;
3352 ::arg().set("stack-size","stack size per mthread")="200000";
3353 ::arg().set("soa-minimum-ttl","Don't change")="0";
3354 ::arg().set("no-shuffle","Don't change")="off";
3355 ::arg().set("local-port","port to listen on")="53";
3356 ::arg().set("local-address","IP addresses to listen on, separated by spaces or commas. Also accepts ports.")="127.0.0.1";
3357 ::arg().setSwitch("non-local-bind", "Enable binding to non-local addresses by using FREEBIND / BINDANY socket options")="no";
3358 ::arg().set("trace","if we should output heaps of logging. set to 'fail' to only log failing domains")="off";
3359 ::arg().set("dnssec", "DNSSEC mode: off/process-no-validate (default)/process/log-fail/validate")="process-no-validate";
3360 ::arg().set("dnssec-log-bogus", "Log DNSSEC bogus validations")="no";
3361 ::arg().set("daemon","Operate as a daemon")="no";
3362 ::arg().setSwitch("write-pid","Write a PID file")="yes";
3363 ::arg().set("loglevel","Amount of logging. Higher is more. Do not set below 3")="6";
3364 ::arg().set("disable-syslog","Disable logging to syslog, useful when running inside a supervisor that logs stdout")="no";
3365 ::arg().set("log-timestamp","Print timestamps in log lines, useful to disable when running with a tool that timestamps stdout already")="yes";
3366 ::arg().set("log-common-errors","If we should log rather common errors")="no";
3367 ::arg().set("chroot","switch to chroot jail")="";
3368 ::arg().set("setgid","If set, change group id to this gid for more security")="";
3369 ::arg().set("setuid","If set, change user id to this uid for more security")="";
3370 ::arg().set("network-timeout", "Wait this number of milliseconds for network i/o")="1500";
3371 ::arg().set("threads", "Launch this number of threads")="2";
3372 ::arg().set("processes", "Launch this number of processes (EXPERIMENTAL, DO NOT CHANGE)")="1"; // if we un-experimental this, need to fix openssl rand seeding for multiple PIDs!
3373 ::arg().set("config-name","Name of this virtual configuration - will rename the binary image")="";
3374 ::arg().set("api-config-dir", "Directory where REST API stores config and zones") = "";
3375 ::arg().set("api-key", "Static pre-shared authentication key for access to the REST API") = "";
3376 ::arg().set("api-logfile", "Location of the server logfile (used by the REST API)") = "/var/log/pdns.log";
3377 ::arg().set("api-readonly", "Disallow data modification through the REST API when set") = "no";
3378 ::arg().setSwitch("webserver", "Start a webserver (for REST API)") = "no";
3379 ::arg().set("webserver-address", "IP Address of webserver to listen on") = "127.0.0.1";
3380 ::arg().set("webserver-port", "Port of webserver to listen on") = "8082";
3381 ::arg().set("webserver-password", "Password required for accessing the webserver") = "";
3382 ::arg().set("webserver-allow-from","Webserver access is only allowed from these subnets")="127.0.0.1,::1";
3383 ::arg().set("carbon-ourname", "If set, overrides our reported hostname for carbon stats")="";
3384 ::arg().set("carbon-server", "If set, send metrics in carbon (graphite) format to this server IP address")="";
3385 ::arg().set("carbon-interval", "Number of seconds between carbon (graphite) updates")="30";
3386 ::arg().set("statistics-interval", "Number of seconds between printing of recursor statistics, 0 to disable")="1800";
3387 ::arg().set("quiet","Suppress logging of questions and answers")="";
3388 ::arg().set("logging-facility","Facility to log messages as. 0 corresponds to local0")="";
3389 ::arg().set("config-dir","Location of configuration directory (recursor.conf)")=SYSCONFDIR
;
3390 ::arg().set("socket-owner","Owner of socket")="";
3391 ::arg().set("socket-group","Group of socket")="";
3392 ::arg().set("socket-mode", "Permissions for socket")="";
3394 ::arg().set("socket-dir",string("Where the controlsocket will live, ")+LOCALSTATEDIR
+" when unset and not chrooted" )="";
3395 ::arg().set("delegation-only","Which domains we only accept delegations from")="";
3396 ::arg().set("query-local-address","Source IP address for sending queries")="0.0.0.0";
3397 ::arg().set("query-local-address6","Source IPv6 address for sending queries. IF UNSET, IPv6 WILL NOT BE USED FOR OUTGOING QUERIES")="";
3398 ::arg().set("client-tcp-timeout","Timeout in seconds when talking to TCP clients")="2";
3399 ::arg().set("max-mthreads", "Maximum number of simultaneous Mtasker threads")="2048";
3400 ::arg().set("max-tcp-clients","Maximum number of simultaneous TCP clients")="128";
3401 ::arg().set("server-down-max-fails","Maximum number of consecutive timeouts (and unreachables) to mark a server as down ( 0 => disabled )")="64";
3402 ::arg().set("server-down-throttle-time","Number of seconds to throttle all queries to a server after being marked as down")="60";
3403 ::arg().set("hint-file", "If set, load root hints from this file")="";
3404 ::arg().set("max-cache-entries", "If set, maximum number of entries in the main cache")="1000000";
3405 ::arg().set("max-negative-ttl", "maximum number of seconds to keep a negative cached entry in memory")="3600";
3406 ::arg().set("max-cache-ttl", "maximum number of seconds to keep a cached entry in memory")="86400";
3407 ::arg().set("packetcache-ttl", "maximum number of seconds to keep a cached entry in packetcache")="3600";
3408 ::arg().set("max-packetcache-entries", "maximum number of entries to keep in the packetcache")="500000";
3409 ::arg().set("packetcache-servfail-ttl", "maximum number of seconds to keep a cached servfail entry in packetcache")="60";
3410 ::arg().set("server-id", "Returned when queried for 'id.server' TXT or NSID, defaults to hostname")="";
3411 ::arg().set("stats-ringbuffer-entries", "maximum number of packets to store statistics for")="10000";
3412 ::arg().set("version-string", "string reported on version.pdns or version.bind")=fullVersionString();
3413 ::arg().set("allow-from", "If set, only allow these comma separated netmasks to recurse")=LOCAL_NETS
;
3414 ::arg().set("allow-from-file", "If set, load allowed netmasks from this file")="";
3415 ::arg().set("entropy-source", "If set, read entropy from this file")="/dev/urandom";
3416 ::arg().set("dont-query", "If set, do not query these netmasks for DNS data")=DONT_QUERY
;
3417 ::arg().set("max-tcp-per-client", "If set, maximum number of TCP sessions per client (IP address)")="0";
3418 ::arg().set("max-tcp-queries-per-connection", "If set, maximum number of TCP queries in a TCP connection")="0";
3419 ::arg().set("spoof-nearmiss-max", "If non-zero, assume spoofing after this many near misses")="20";
3420 ::arg().set("single-socket", "If set, only use a single socket for outgoing queries")="off";
3421 ::arg().set("auth-zones", "Zones for which we have authoritative data, comma separated domain=file pairs ")="";
3422 ::arg().set("lua-config-file", "More powerful configuration options")="";
3424 ::arg().set("forward-zones", "Zones for which we forward queries, comma separated domain=ip pairs")="";
3425 ::arg().set("forward-zones-recurse", "Zones for which we forward queries with recursion bit, comma separated domain=ip pairs")="";
3426 ::arg().set("forward-zones-file", "File with (+)domain=ip pairs for forwarding")="";
3427 ::arg().set("export-etc-hosts", "If we should serve up contents from /etc/hosts")="off";
3428 ::arg().set("export-etc-hosts-search-suffix", "Also serve up the contents of /etc/hosts with this suffix")="";
3429 ::arg().set("etc-hosts-file", "Path to 'hosts' file")="/etc/hosts";
3430 ::arg().set("serve-rfc1918", "If we should be authoritative for RFC 1918 private IP space")="yes";
3431 ::arg().set("lua-dns-script", "Filename containing an optional 'lua' script that will be used to modify dns answers")="";
3432 ::arg().set("latency-statistic-size","Number of latency values to calculate the qa-latency average")="10000";
3433 ::arg().setSwitch( "disable-packetcache", "Disable packetcache" )= "no";
3434 ::arg().set("ecs-ipv4-bits", "Number of bits of IPv4 address to pass for EDNS Client Subnet")="24";
3435 ::arg().set("ecs-ipv6-bits", "Number of bits of IPv6 address to pass for EDNS Client Subnet")="56";
3436 ::arg().set("edns-subnet-whitelist", "List of netmasks and domains that we should enable EDNS subnet for")="";
3437 ::arg().set("ecs-scope-zero-address", "Address to send to whitelisted authoritative servers for incoming queries with ECS prefix-length source of 0")="";
3438 ::arg().setSwitch( "use-incoming-edns-subnet", "Pass along received EDNS Client Subnet information")="no";
3439 ::arg().setSwitch( "pdns-distributes-queries", "If PowerDNS itself should distribute queries over threads")="yes";
3440 ::arg().setSwitch( "root-nx-trust", "If set, believe that an NXDOMAIN from the root means the TLD does not exist")="yes";
3441 ::arg().setSwitch( "any-to-tcp","Answer ANY queries with tc=1, shunting to TCP" )="no";
3442 ::arg().setSwitch( "lowercase-outgoing","Force outgoing questions to lowercase")="no";
3443 ::arg().setSwitch("gettag-needs-edns-options", "If EDNS Options should be extracted before calling the gettag() hook")="no";
3444 ::arg().set("udp-truncation-threshold", "Maximum UDP response size before we truncate")="1680";
3445 ::arg().set("edns-outgoing-bufsize", "Outgoing EDNS buffer size")="1680";
3446 ::arg().set("minimum-ttl-override", "Set under adverse conditions, a minimum TTL")="0";
3447 ::arg().set("max-qperq", "Maximum outgoing queries per query")="50";
3448 ::arg().set("max-total-msec", "Maximum total wall-clock time per query in milliseconds, 0 for unlimited")="7000";
3449 ::arg().set("max-recursion-depth", "Maximum number of internal recursion calls per query, 0 for unlimited")="40";
3451 ::arg().set("include-dir","Include *.conf files from this directory")="";
3452 ::arg().set("security-poll-suffix","Domain name from which to query security update notifications")="secpoll.powerdns.com.";
3454 ::arg().setSwitch("reuseport","Enable SO_REUSEPORT allowing multiple recursors processes to listen to 1 address")="no";
3456 ::arg().setSwitch("snmp-agent", "If set, register as an SNMP agent")="no";
3457 ::arg().set("snmp-master-socket", "If set and snmp-agent is set, the socket to use to register to the SNMP master")="";
3459 ::arg().set("tcp-fast-open", "Enable TCP Fast Open support on the listening sockets, using the supplied numerical value as the queue size")="0";
3460 ::arg().set("nsec3-max-iterations", "Maximum number of iterations allowed for an NSEC3 record")="2500";
3462 ::arg().set("cpu-map", "Thread to CPU mapping, space separated thread-id=cpu1,cpu2..cpuN pairs")="";
3464 ::arg().setSwitch("log-rpz-changes", "Log additions and removals to RPZ zones at Info level")="no";
3466 ::arg().setCmd("help","Provide a helpful message");
3467 ::arg().setCmd("version","Print version string");
3468 ::arg().setCmd("config","Output blank configuration");
3469 L
.toConsole(Logger::Info
);
3470 ::arg().laxParse(argc
,argv
); // do a lax parse
3472 string configname
=::arg()["config-dir"]+"/recursor.conf";
3473 if(::arg()["config-name"]!="") {
3474 configname
=::arg()["config-dir"]+"/recursor-"+::arg()["config-name"]+".conf";
3475 s_programname
+="-"+::arg()["config-name"];
3477 cleanSlashes(configname
);
3479 if(::arg().mustDo("config")) {
3480 cout
<<::arg().configstring()<<endl
;
3484 if(!::arg().file(configname
.c_str()))
3485 L
<<Logger::Warning
<<"Unable to parse configuration file '"<<configname
<<"'"<<endl
;
3487 ::arg().parse(argc
,argv
);
3489 if( !::arg()["chroot"].empty() && !::arg()["api-config-dir"].empty() && !::arg().mustDo("api-readonly") ) {
3490 L
<<Logger::Error
<<"Using chroot and a writable API is not possible"<<endl
;
3494 if (::arg()["socket-dir"].empty()) {
3495 if (::arg()["chroot"].empty())
3496 ::arg().set("socket-dir") = LOCALSTATEDIR
;
3498 ::arg().set("socket-dir") = "/";
3501 ::arg().set("delegation-only")=toLower(::arg()["delegation-only"]);
3503 if(::arg().asNum("threads")==1)
3504 ::arg().set("pdns-distributes-queries")="no";
3506 if(::arg().mustDo("help")) {
3507 cout
<<"syntax:"<<endl
<<endl
;
3508 cout
<<::arg().helpstring(::arg()["help"])<<endl
;
3511 if(::arg().mustDo("version")) {
3512 showProductVersion();
3513 showBuildConfiguration();
3517 Logger::Urgency logUrgency
= (Logger::Urgency
)::arg().asNum("loglevel");
3519 if (logUrgency
< Logger::Error
)
3520 logUrgency
= Logger::Error
;
3521 if(!g_quiet
&& logUrgency
< Logger::Info
) { // Logger::Info=6, Logger::Debug=7
3522 logUrgency
= Logger::Info
; // if you do --quiet=no, you need Info to also see the query log
3524 L
.setLoglevel(logUrgency
);
3525 L
.toConsole(logUrgency
);
3527 serviceMain(argc
, argv
);
3529 catch(PDNSException
&ae
) {
3530 L
<<Logger::Error
<<"Exception: "<<ae
.reason
<<endl
;
3533 catch(std::exception
&e
) {
3534 L
<<Logger::Error
<<"STL Exception: "<<e
.what()<<endl
;
3538 L
<<Logger::Error
<<"any other exception in main: "<<endl
;