2 * This file is part of PowerDNS or dnsdist.
3 * Copyright -- PowerDNS.COM B.V. and its contributors
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of version 2 of the GNU General Public License as
7 * published by the Free Software Foundation.
9 * In addition, for the avoidance of any doubt, permission is granted to
10 * link this program with OpenSSL and to (re)distribute the binaries
11 * produced as the result of such linking.
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
30 #include "recpacketcache.hh"
31 #include "ws-recursor.hh"
34 #include "dns_random.hh"
38 #include "opensslsigners.hh"
41 #include <boost/static_assert.hpp>
44 #include "recursor_cache.hh"
45 #include "cachecleaner.hh"
52 #include "arguments.hh"
56 #include "sortlist.hh"
58 #include <boost/tuple/tuple.hpp>
59 #include <boost/tuple/tuple_comparison.hpp>
60 #include <boost/shared_array.hpp>
61 #include <boost/function.hpp>
62 #include <boost/algorithm/string.hpp>
64 #include "malloctrace.hh"
66 #include <netinet/tcp.h>
67 #include "dnsparser.hh"
68 #include "dnswriter.hh"
69 #include "dnsrecords.hh"
70 #include "zoneparser-tng.hh"
71 #include "rec_channel.hh"
76 #include "lua-recursor4.hh"
78 #include "responsestats.hh"
79 #include "secpoll-recursor.hh"
81 #include "filterpo.hh"
82 #include "rpzloader.hh"
83 #include "validate-recursor.hh"
84 #include "rec-lua-conf.hh"
85 #include "ednsoptions.hh"
88 #include "rec-protobuf.hh"
89 #include "rec-snmp.hh"
92 #include <systemd/sd-daemon.h>
95 #include "namespaces.hh"
97 typedef map
<ComboAddress
, uint32_t, ComboAddress::addressOnlyLessThan
> tcpClientCounts_t
;
99 static thread_local
std::shared_ptr
<RecursorLua4
> t_pdl
;
100 static thread_local
int t_id
;
101 static thread_local
std::shared_ptr
<Regex
> t_traceRegex
;
102 static thread_local
std::unique_ptr
<tcpClientCounts_t
> t_tcpClientCounts
;
104 thread_local
std::unique_ptr
<MT_t
> MT
; // the big MTasker
105 thread_local
std::unique_ptr
<MemRecursorCache
> t_RC
;
106 thread_local
std::unique_ptr
<RecursorPacketCache
> t_packetCache
;
107 thread_local FDMultiplexer
* t_fdm
{nullptr};
108 thread_local
std::unique_ptr
<addrringbuf_t
> t_remotes
, t_servfailremotes
, t_largeanswerremotes
;
109 thread_local
std::unique_ptr
<boost::circular_buffer
<pair
<DNSName
, uint16_t> > > t_queryring
, t_servfailqueryring
;
110 thread_local
std::shared_ptr
<NetmaskGroup
> t_allowFrom
;
112 thread_local
std::unique_ptr
<boost::uuids::random_generator
> t_uuidGenerator
;
114 __thread
struct timeval g_now
; // timestamp, updated (too) frequently
116 // for communicating with our threads
123 int writeQueriesToThread
; // this one is non-blocking
124 int readQueriesToThread
;
127 /* the TID of the thread handling the web server, carbon, statistics and the control channel */
128 static const int s_handlerThreadID
= -1;
129 /* when pdns-distributes-queries is set, the TID of the thread handling, hashing and distributing new queries
130 to the other threads */
131 static const int s_distributorThreadID
= 0;
133 typedef vector
<int> tcpListenSockets_t
;
134 typedef map
<int, ComboAddress
> listenSocketsAddresses_t
; // is shared across all threads right now
135 typedef vector
<pair
<int, function
< void(int, any
&) > > > deferredAdd_t
;
137 static const ComboAddress
g_local4("0.0.0.0"), g_local6("::");
138 static vector
<ThreadPipeSet
> g_pipes
; // effectively readonly after startup
139 static tcpListenSockets_t g_tcpListenSockets
; // shared across threads, but this is fine, never written to from a thread. All threads listen on all sockets
140 static listenSocketsAddresses_t g_listenSocketsAddresses
; // is shared across all threads right now
141 static std::unordered_map
<unsigned int, deferredAdd_t
> deferredAdds
;
142 static set
<int> g_fromtosockets
; // listen sockets that use 'sendfromto()' mechanism
143 static vector
<ComboAddress
> g_localQueryAddresses4
, g_localQueryAddresses6
;
144 static AtomicCounter counter
;
145 static std::shared_ptr
<SyncRes::domainmap_t
> g_initialDomainMap
; // new threads needs this to be setup
146 static std::shared_ptr
<NetmaskGroup
> g_initialAllowFrom
; // new thread needs to be setup with this
147 static size_t g_tcpMaxQueriesPerConn
;
148 static uint64_t g_latencyStatSize
;
149 static uint32_t g_disthashseed
;
150 static unsigned int g_maxTCPPerClient
;
151 static unsigned int g_networkTimeoutMsec
;
152 static unsigned int g_maxMThreads
;
153 static unsigned int g_numWorkerThreads
;
154 static int g_tcpTimeout
;
155 static uint16_t g_udpTruncationThreshold
;
156 static std::atomic
<bool> statsWanted
;
157 static std::atomic
<bool> g_quiet
;
158 static bool g_logCommonErrors
;
159 static bool g_anyToTcp
;
160 static bool g_weDistributeQueries
; // if true, only 1 thread listens on the incoming query sockets
161 static bool g_reusePort
{false};
162 static bool g_useOneSocketPerThread
;
163 static bool g_gettagNeedsEDNSOptions
{false};
164 static time_t g_statisticsInterval
;
165 static bool g_useIncomingECS
;
166 std::atomic
<uint32_t> g_maxCacheEntries
, g_maxPacketCacheEntries
;
168 RecursorControlChannel s_rcc
; // only active in thread 0
169 RecursorStats g_stats
;
170 string s_programname
="pdns_recursor";
172 bool g_lowercaseOutgoing
;
173 unsigned int g_numThreads
;
174 uint16_t g_outgoingEDNSBufsize
;
175 bool g_logRPZChanges
{false};
177 #define LOCAL_NETS "127.0.0.0/8, 10.0.0.0/8, 100.64.0.0/10, 169.254.0.0/16, 192.168.0.0/16, 172.16.0.0/12, ::1/128, fc00::/7, fe80::/10"
178 // Bad Nets taken from both:
179 // http://www.iana.org/assignments/iana-ipv4-special-registry/iana-ipv4-special-registry.xhtml
181 // http://www.iana.org/assignments/iana-ipv6-special-registry/iana-ipv6-special-registry.xhtml
182 // where such a network may not be considered a valid destination
183 #define BAD_NETS "0.0.0.0/8, 192.0.0.0/24, 192.0.2.0/24, 198.51.100.0/24, 203.0.113.0/24, 240.0.0.0/4, ::/96, ::ffff:0:0/96, 100::/64, 2001:db8::/32"
184 #define DONT_QUERY LOCAL_NETS ", " BAD_NETS
186 //! used to send information to a newborn mthread
187 struct DNSComboWriter
{
188 DNSComboWriter(const char* data
, uint16_t len
, const struct timeval
& now
) : d_mdp(true, data
, len
), d_now(now
),
189 d_tcp(false), d_socket(-1)
192 void setRemote(const ComboAddress
* sa
)
197 void setLocal(const ComboAddress
& sa
)
203 void setSocket(int sock
)
208 string
getRemote() const
210 return d_remote
.toString();
213 struct timeval d_now
;
214 ComboAddress d_remote
, d_local
;
216 boost::uuids::uuid d_uuid
;
217 string d_requestorId
;
220 EDNSSubnetOpts d_ednssubnet
;
221 bool d_ecsFound
{false};
222 bool d_ecsParsed
{false};
225 unsigned int d_tag
{0};
228 shared_ptr
<TCPConnection
> d_tcpConnection
;
229 vector
<pair
<uint16_t, string
> > d_ednsOpts
;
230 std::vector
<std::string
> d_policyTags
;
231 LuaContext::LuaObject d_data
;
232 uint32_t d_ttlCap
{std::numeric_limits
<uint32_t>::max()};
233 bool d_variable
{false};
238 return MT
? MT
.get() : nullptr;
243 static ArgvMap theArg
;
247 unsigned int getRecursorThreadId()
249 return static_cast<unsigned int>(t_id
);
257 static void handleTCPClientWritable(int fd
, FDMultiplexer::funcparam_t
& var
);
259 // -1 is error, 0 is timeout, 1 is success
260 int asendtcp(const string
& data
, Socket
* sock
)
266 t_fdm
->addWriteFD(sock
->getHandle(), handleTCPClientWritable
, pident
);
269 int ret
=MT
->waitEvent(pident
, &packet
, g_networkTimeoutMsec
);
271 if(!ret
|| ret
==-1) { // timeout
272 t_fdm
->removeWriteFD(sock
->getHandle());
274 else if(packet
.size() !=data
.size()) { // main loop tells us what it sent out, or empty in case of an error
280 static void handleTCPClientReadable(int fd
, FDMultiplexer::funcparam_t
& var
);
282 // -1 is error, 0 is timeout, 1 is success
283 int arecvtcp(string
& data
, size_t len
, Socket
* sock
, bool incompleteOkay
)
289 pident
.inIncompleteOkay
=incompleteOkay
;
290 t_fdm
->addReadFD(sock
->getHandle(), handleTCPClientReadable
, pident
);
292 int ret
=MT
->waitEvent(pident
,&data
, g_networkTimeoutMsec
);
293 if(!ret
|| ret
==-1) { // timeout
294 t_fdm
->removeReadFD(sock
->getHandle());
296 else if(data
.empty()) {// error, EOF or other
303 static void handleGenUDPQueryResponse(int fd
, FDMultiplexer::funcparam_t
& var
)
305 PacketID pident
=*any_cast
<PacketID
>(&var
);
307 ssize_t ret
=recv(fd
, resp
, sizeof(resp
), 0);
308 t_fdm
->removeReadFD(fd
);
310 string
data(resp
, (size_t) ret
);
311 MT
->sendEvent(pident
, &data
);
315 MT
->sendEvent(pident
, &empty
);
316 // cerr<<"Had some kind of error: "<<ret<<", "<<strerror(errno)<<endl;
319 string
GenUDPQueryResponse(const ComboAddress
& dest
, const string
& query
)
321 Socket
s(dest
.sin4
.sin_family
, SOCK_DGRAM
);
323 ComboAddress local
= getQueryLocalAddress(dest
.sin4
.sin_family
, 0);
332 t_fdm
->addReadFD(s
.getHandle(), handleGenUDPQueryResponse
, pident
);
336 int ret
=MT
->waitEvent(pident
,&data
, g_networkTimeoutMsec
);
338 if(!ret
|| ret
==-1) { // timeout
339 t_fdm
->removeReadFD(s
.getHandle());
341 else if(data
.empty()) {// error, EOF or other
342 // we could special case this
348 //! pick a random query local address
349 ComboAddress
getQueryLocalAddress(int family
, uint16_t port
)
352 if(family
==AF_INET
) {
353 if(g_localQueryAddresses4
.empty())
356 ret
= g_localQueryAddresses4
[dns_random(g_localQueryAddresses4
.size())];
357 ret
.sin4
.sin_port
= htons(port
);
360 if(g_localQueryAddresses6
.empty())
363 ret
= g_localQueryAddresses6
[dns_random(g_localQueryAddresses6
.size())];
365 ret
.sin6
.sin6_port
= htons(port
);
370 static void handleUDPServerResponse(int fd
, FDMultiplexer::funcparam_t
&);
372 static void setSocketBuffer(int fd
, int optname
, uint32_t size
)
375 socklen_t len
=sizeof(psize
);
377 if(!getsockopt(fd
, SOL_SOCKET
, optname
, (char*)&psize
, &len
) && psize
> size
) {
378 L
<<Logger::Error
<<"Not decreasing socket buffer size from "<<psize
<<" to "<<size
<<endl
;
382 if (setsockopt(fd
, SOL_SOCKET
, optname
, (char*)&size
, sizeof(size
)) < 0 )
383 L
<<Logger::Error
<<"Unable to raise socket buffer size to "<<size
<<": "<<strerror(errno
)<<endl
;
387 static void setSocketReceiveBuffer(int fd
, uint32_t size
)
389 setSocketBuffer(fd
, SO_RCVBUF
, size
);
392 static void setSocketSendBuffer(int fd
, uint32_t size
)
394 setSocketBuffer(fd
, SO_SNDBUF
, size
);
398 // you can ask this class for a UDP socket to send a query from
399 // this socket is not yours, don't even think about deleting it
400 // but after you call 'returnSocket' on it, don't assume anything anymore
403 unsigned int d_numsocks
;
405 UDPClientSocks() : d_numsocks(0)
409 typedef set
<int> socks_t
;
412 // returning -2 means: temporary OS error (ie, out of files), -1 means error related to remote
413 int getSocket(const ComboAddress
& toaddr
, int* fd
)
415 *fd
=makeClientSocket(toaddr
.sin4
.sin_family
);
416 if(*fd
< 0) // temporary error - receive exception otherwise
419 if(connect(*fd
, (struct sockaddr
*)(&toaddr
), toaddr
.getSocklen()) < 0) {
421 // returnSocket(*fd);
425 catch(const PDNSException
& e
) {
426 L
<<Logger::Error
<<"Error closing UDP socket after connect() failed: "<<e
.reason
<<endl
;
429 if(err
==ENETUNREACH
) // Seth "My Interfaces Are Like A Yo Yo" Arnold special
439 void returnSocket(int fd
)
441 socks_t::iterator i
=d_socks
.find(fd
);
442 if(i
==d_socks
.end()) {
443 throw PDNSException("Trying to return a socket (fd="+std::to_string(fd
)+") not in the pool");
445 returnSocketLocked(i
);
448 // return a socket to the pool, or simply erase it
449 void returnSocketLocked(socks_t::iterator
& i
)
451 if(i
==d_socks
.end()) {
452 throw PDNSException("Trying to return a socket not in the pool");
455 t_fdm
->removeReadFD(*i
);
457 catch(FDMultiplexerException
& e
) {
458 // we sometimes return a socket that has not yet been assigned to t_fdm
463 catch(const PDNSException
& e
) {
464 L
<<Logger::Error
<<"Error closing returned UDP socket: "<<e
.reason
<<endl
;
471 // returns -1 for errors which might go away, throws for ones that won't
472 static int makeClientSocket(int family
)
474 int ret
=socket(family
, SOCK_DGRAM
, 0 ); // turns out that setting CLO_EXEC and NONBLOCK from here is not a performance win on Linux (oddly enough)
476 if(ret
< 0 && errno
==EMFILE
) // this is not a catastrophic error
480 throw PDNSException("Making a socket for resolver (family = "+std::to_string(family
)+"): "+stringerror());
482 // setCloseOnExec(ret); // we're not going to exec
489 if(tries
==1) // fall back to kernel 'random'
492 port
= 1025 + dns_random(64510);
494 sin
=getQueryLocalAddress(family
, port
); // does htons for us
496 if (::bind(ret
, (struct sockaddr
*)&sin
, sin
.getSocklen()) >= 0)
500 throw PDNSException("Resolver binding to local query client socket on "+sin
.toString()+": "+stringerror());
507 static thread_local
std::unique_ptr
<UDPClientSocks
> t_udpclientsocks
;
509 /* these two functions are used by LWRes */
510 // -2 is OS error, -1 is error that depends on the remote, > 0 is success
511 int asendto(const char *data
, size_t len
, int flags
,
512 const ComboAddress
& toaddr
, uint16_t id
, const DNSName
& domain
, uint16_t qtype
, int* fd
)
516 pident
.domain
= domain
;
517 pident
.remote
= toaddr
;
520 // see if there is an existing outstanding request we can chain on to, using partial equivalence function
521 pair
<MT_t::waiters_t::iterator
, MT_t::waiters_t::iterator
> chain
=MT
->d_waiters
.equal_range(pident
, PacketIDBirthdayCompare());
523 for(; chain
.first
!= chain
.second
; chain
.first
++) {
524 if(chain
.first
->key
.fd
> -1) { // don't chain onto existing chained waiter!
526 cerr<<"Orig: "<<pident.domain<<", "<<pident.remote.toString()<<", id="<<id<<endl;
527 cerr<<"Had hit: "<< chain.first->key.domain<<", "<<chain.first->key.remote.toString()<<", id="<<chain.first->key.id
528 <<", count="<<chain.first->key.chain.size()<<", origfd: "<<chain.first->key.fd<<endl;
530 chain
.first
->key
.chain
.insert(id
); // we can chain
531 *fd
=-1; // gets used in waitEvent / sendEvent later on
536 int ret
=t_udpclientsocks
->getSocket(toaddr
, fd
);
543 t_fdm
->addReadFD(*fd
, handleUDPServerResponse
, pident
);
544 ret
= send(*fd
, data
, len
, 0);
549 t_udpclientsocks
->returnSocket(*fd
);
551 errno
= tmp
; // this is for logging purposes only
555 // -1 is error, 0 is timeout, 1 is success
556 int arecvfrom(char *data
, size_t len
, int flags
, const ComboAddress
& fromaddr
, size_t *d_len
,
557 uint16_t id
, const DNSName
& domain
, uint16_t qtype
, int fd
, struct timeval
* now
)
559 static optional
<unsigned int> nearMissLimit
;
561 nearMissLimit
=::arg().asNum("spoof-nearmiss-max");
566 pident
.domain
=domain
;
568 pident
.remote
=fromaddr
;
571 int ret
=MT
->waitEvent(pident
, &packet
, g_networkTimeoutMsec
, now
);
574 if(packet
.empty()) // means "error"
577 *d_len
=packet
.size();
578 memcpy(data
,packet
.c_str(),min(len
,*d_len
));
579 if(*nearMissLimit
&& pident
.nearMisses
> *nearMissLimit
) {
580 L
<<Logger::Error
<<"Too many ("<<pident
.nearMisses
<<" > "<<*nearMissLimit
<<") bogus answers for '"<<domain
<<"' from "<<fromaddr
.toString()<<", assuming spoof attempt."<<endl
;
581 g_stats
.spoofCount
++;
587 t_udpclientsocks
->returnSocket(fd
);
592 static void writePid(void)
594 if(!::arg().mustDo("write-pid"))
596 ofstream
of(s_pidfname
.c_str(), std::ios_base::app
);
598 of
<< Utility::getpid() <<endl
;
600 L
<<Logger::Error
<<"Writing pid for "<<Utility::getpid()<<" to "<<s_pidfname
<<" failed: "<<strerror(errno
)<<endl
;
603 TCPConnection::TCPConnection(int fd
, const ComboAddress
& addr
) : d_remote(addr
), d_fd(fd
)
605 ++s_currentConnections
;
606 (*t_tcpClientCounts
)[d_remote
]++;
609 TCPConnection::~TCPConnection()
612 if(closesocket(d_fd
) < 0)
613 L
<<Logger::Error
<<"Error closing socket for TCPConnection"<<endl
;
615 catch(const PDNSException
& e
) {
616 L
<<Logger::Error
<<"Error closing TCPConnection socket: "<<e
.reason
<<endl
;
619 if(t_tcpClientCounts
->count(d_remote
) && !(*t_tcpClientCounts
)[d_remote
]--)
620 t_tcpClientCounts
->erase(d_remote
);
621 --s_currentConnections
;
624 AtomicCounter
TCPConnection::s_currentConnections
;
626 static void handleRunningTCPQuestion(int fd
, FDMultiplexer::funcparam_t
& var
);
628 // the idea is, only do things that depend on the *response* here. Incoming accounting is on incoming.
629 static void updateResponseStats(int res
, const ComboAddress
& remote
, unsigned int packetsize
, const DNSName
* query
, uint16_t qtype
)
631 if(packetsize
> 1000 && t_largeanswerremotes
)
632 t_largeanswerremotes
->push_back(remote
);
634 case RCode::ServFail
:
635 if(t_servfailremotes
) {
636 t_servfailremotes
->push_back(remote
);
637 if(query
&& t_servfailqueryring
) // packet cache
638 t_servfailqueryring
->push_back(make_pair(*query
, qtype
));
642 case RCode::NXDomain
:
651 static string
makeLoginfo(DNSComboWriter
* dc
)
654 return "("+dc
->d_mdp
.d_qname
.toLogString()+"/"+DNSRecordContent::NumberToType(dc
->d_mdp
.d_qtype
)+" from "+(dc
->d_remote
.toString())+")";
658 return "Exception making error message for exception";
662 static void protobufLogQuery(const std::shared_ptr
<RemoteLogger
>& logger
, uint8_t maskV4
, uint8_t maskV6
, const boost::uuids::uuid
& uniqueId
, const ComboAddress
& remote
, const ComboAddress
& local
, const Netmask
& ednssubnet
, bool tcp
, uint16_t id
, size_t len
, const DNSName
& qname
, uint16_t qtype
, uint16_t qclass
, const std::vector
<std::string
>& policyTags
, const std::string
& requestorId
, const std::string
& deviceId
)
664 Netmask
requestorNM(remote
, remote
.sin4
.sin_family
== AF_INET
? maskV4
: maskV6
);
665 const ComboAddress
& requestor
= requestorNM
.getMaskedNetwork();
666 RecProtoBufMessage
message(DNSProtoBufMessage::Query
, uniqueId
, &requestor
, &local
, qname
, qtype
, qclass
, id
, tcp
, len
);
667 message
.setEDNSSubnet(ednssubnet
, ednssubnet
.isIpv4() ? maskV4
: maskV6
);
668 message
.setRequestorId(requestorId
);
669 message
.setDeviceId(deviceId
);
671 if (!policyTags
.empty()) {
672 message
.setPolicyTags(policyTags
);
675 // cerr <<message.toDebugString()<<endl;
677 message
.serialize(str
);
678 logger
->queueData(str
);
681 static void protobufLogResponse(const std::shared_ptr
<RemoteLogger
>& logger
, const RecProtoBufMessage
& message
)
683 // cerr <<message.toDebugString()<<endl;
685 message
.serialize(str
);
686 logger
->queueData(str
);
691 * Chases the CNAME provided by the PolicyCustom RPZ policy.
693 * @param spoofed: The DNSRecord that was created by the policy, should already be added to ret
694 * @param qtype: The QType of the original query
695 * @param sr: A SyncRes
696 * @param res: An integer that will contain the RCODE of the lookup we do
697 * @param ret: A vector of DNSRecords where the result of the CNAME chase should be appended to
699 static void handleRPZCustom(const DNSRecord
& spoofed
, const QType
& qtype
, SyncRes
& sr
, int& res
, vector
<DNSRecord
>& ret
)
701 if (spoofed
.d_type
== QType::CNAME
) {
702 bool oldWantsRPZ
= sr
.getWantsRPZ();
703 sr
.setWantsRPZ(false);
704 vector
<DNSRecord
> ans
;
705 res
= sr
.beginResolve(DNSName(spoofed
.d_content
->getZoneRepresentation()), qtype
, 1, ans
);
706 for (const auto& rec
: ans
) {
707 if(rec
.d_place
== DNSResourceRecord::ANSWER
) {
711 // Reset the RPZ state of the SyncRes
712 sr
.setWantsRPZ(oldWantsRPZ
);
716 static bool addRecordToPacket(DNSPacketWriter
& pw
, const DNSRecord
& rec
, uint32_t& minTTL
, uint32_t ttlCap
, const uint16_t maxAnswerSize
)
718 pw
.startRecord(rec
.d_name
, rec
.d_type
, (rec
.d_ttl
> ttlCap
? ttlCap
: rec
.d_ttl
), rec
.d_class
, rec
.d_place
);
720 if(rec
.d_type
!= QType::OPT
) // their TTL ain't real
721 minTTL
= min(minTTL
, rec
.d_ttl
);
723 rec
.d_content
->toPacket(pw
);
724 if(pw
.size() > static_cast<size_t>(maxAnswerSize
)) {
726 if(rec
.d_place
!= DNSResourceRecord::ADDITIONAL
) {
727 pw
.getHeader()->tc
=1;
736 static void startDoResolve(void *p
)
738 DNSComboWriter
* dc
=(DNSComboWriter
*)p
;
741 t_queryring
->push_back(make_pair(dc
->d_mdp
.d_qname
, dc
->d_mdp
.d_qtype
));
743 uint16_t maxanswersize
= dc
->d_tcp
? 65535 : min(static_cast<uint16_t>(512), g_udpTruncationThreshold
);
746 if(getEDNSOpts(dc
->d_mdp
, &edo
)) {
749 "Values lower than 512 MUST be treated as equal to 512."
751 maxanswersize
= min(static_cast<uint16_t>(edo
.d_packetsize
>= 512 ? edo
.d_packetsize
: 512), g_udpTruncationThreshold
);
753 dc
->d_ednsOpts
= edo
.d_options
;
756 if (g_useIncomingECS
&& !dc
->d_ecsParsed
) {
757 for (const auto& o
: edo
.d_options
) {
758 if (o
.first
== EDNSOptionCode::ECS
) {
759 dc
->d_ecsFound
= getEDNSSubnetOptsFromString(o
.second
, &dc
->d_ednssubnet
);
765 /* perhaps there was no EDNS or no ECS but by now we looked */
766 dc
->d_ecsParsed
= true;
767 vector
<DNSRecord
> ret
;
768 vector
<uint8_t> packet
;
770 auto luaconfsLocal
= g_luaconfs
.getLocal();
771 // Used to tell syncres later on if we should apply NSDNAME and NSIP RPZ triggers for this query
773 RecProtoBufMessage
pbMessage(RecProtoBufMessage::Response
);
775 if (luaconfsLocal
->protobufServer
) {
776 Netmask
requestorNM(dc
->d_remote
, dc
->d_remote
.sin4
.sin_family
== AF_INET
? luaconfsLocal
->protobufMaskV4
: luaconfsLocal
->protobufMaskV6
);
777 const ComboAddress
& requestor
= requestorNM
.getMaskedNetwork();
778 pbMessage
.update(dc
->d_uuid
, &requestor
, &dc
->d_local
, dc
->d_tcp
, dc
->d_mdp
.d_header
.id
);
779 pbMessage
.setEDNSSubnet(dc
->d_ednssubnet
.source
, dc
->d_ednssubnet
.source
.isIpv4() ? luaconfsLocal
->protobufMaskV4
: luaconfsLocal
->protobufMaskV6
);
780 pbMessage
.setQuestion(dc
->d_mdp
.d_qname
, dc
->d_mdp
.d_qtype
, dc
->d_mdp
.d_qclass
);
782 #endif /* HAVE_PROTOBUF */
784 DNSPacketWriter
pw(packet
, dc
->d_mdp
.d_qname
, dc
->d_mdp
.d_qtype
, dc
->d_mdp
.d_qclass
);
786 pw
.getHeader()->aa
=0;
787 pw
.getHeader()->ra
=1;
788 pw
.getHeader()->qr
=1;
789 pw
.getHeader()->tc
=0;
790 pw
.getHeader()->id
=dc
->d_mdp
.d_header
.id
;
791 pw
.getHeader()->rd
=dc
->d_mdp
.d_header
.rd
;
792 pw
.getHeader()->cd
=dc
->d_mdp
.d_header
.cd
;
794 /* This is the lowest TTL seen in the records of the response,
795 so we can't cache it for longer than this value.
796 If we have a TTL cap, this value can't be larger than the
797 cap no matter what. */
798 uint32_t minTTL
= dc
->d_ttlCap
;
800 SyncRes
sr(dc
->d_now
);
804 sr
.setLuaEngine(t_pdl
);
806 sr
.d_requestor
=dc
->d_remote
; // ECS needs this too
807 if(g_dnssecmode
!= DNSSECMode::Off
) {
808 sr
.setDoDNSSEC(true);
810 // Does the requestor want DNSSEC records?
811 if(edo
.d_Z
& EDNSOpts::DNSSECOK
) {
813 g_stats
.dnssecQueries
++;
816 // Ignore the client-set CD flag
817 pw
.getHeader()->cd
=0;
819 sr
.setDNSSECValidationRequested(g_dnssecmode
== DNSSECMode::ValidateAll
|| g_dnssecmode
==DNSSECMode::ValidateForLog
|| ((dc
->d_mdp
.d_header
.ad
|| DNSSECOK
) && g_dnssecmode
==DNSSECMode::Process
));
822 sr
.setInitialRequestId(dc
->d_uuid
);
825 if (g_useIncomingECS
) {
826 sr
.setIncomingECSFound(dc
->d_ecsFound
);
827 if (dc
->d_ecsFound
) {
828 sr
.setIncomingECS(dc
->d_ednssubnet
);
832 bool tracedQuery
=false; // we could consider letting Lua know about this too
833 bool variableAnswer
= dc
->d_variable
;
834 bool shouldNotValidate
= false;
836 /* preresolve expects res (dq.rcode) to be set to RCode::NoError by default */
837 int res
= RCode::NoError
;
838 DNSFilterEngine::Policy appliedPolicy
;
840 RecursorLua4::DNSQuestion
dq(dc
->d_remote
, dc
->d_local
, dc
->d_mdp
.d_qname
, dc
->d_mdp
.d_qtype
, dc
->d_tcp
, variableAnswer
, wantsRPZ
);
841 dq
.ednsFlags
= &edo
.d_Z
;
842 dq
.ednsOptions
= &dc
->d_ednsOpts
;
844 dq
.discardedPolicies
= &sr
.d_discardedPolicies
;
845 dq
.policyTags
= &dc
->d_policyTags
;
846 dq
.appliedPolicy
= &appliedPolicy
;
847 dq
.currentRecords
= &ret
;
848 dq
.dh
= &dc
->d_mdp
.d_header
;
849 dq
.data
= dc
->d_data
;
851 dq
.requestorId
= dc
->d_requestorId
;
852 dq
.deviceId
= dc
->d_deviceId
;
855 if(dc
->d_mdp
.d_qtype
==QType::ANY
&& !dc
->d_tcp
&& g_anyToTcp
) {
856 pw
.getHeader()->tc
= 1;
858 variableAnswer
= true;
862 if(t_traceRegex
&& t_traceRegex
->match(dc
->d_mdp
.d_qname
.toString())) {
863 sr
.setLogMode(SyncRes::Store
);
868 if(!g_quiet
|| tracedQuery
) {
869 L
<<Logger::Warning
<<t_id
<<" ["<<MT
->getTid()<<"/"<<MT
->numProcesses()<<"] " << (dc
->d_tcp
? "TCP " : "") << "question for '"<<dc
->d_mdp
.d_qname
<<"|"
870 <<DNSRecordContent::NumberToType(dc
->d_mdp
.d_qtype
)<<"' from "<<dc
->getRemote();
871 if(!dc
->d_ednssubnet
.source
.empty()) {
872 L
<<" (ecs "<<dc
->d_ednssubnet
.source
.toString()<<")";
877 sr
.setId(MT
->getTid());
878 if(!dc
->d_mdp
.d_header
.rd
)
882 t_pdl
->prerpz(dq
, res
);
885 // Check if the query has a policy attached to it
887 appliedPolicy
= luaconfsLocal
->dfe
.getQueryPolicy(dc
->d_mdp
.d_qname
, dc
->d_remote
, sr
.d_discardedPolicies
);
890 // if there is a RecursorLua active, and it 'took' the query in preResolve, we don't launch beginResolve
891 if(!t_pdl
|| !t_pdl
->preresolve(dq
, res
)) {
893 sr
.setWantsRPZ(wantsRPZ
);
895 switch(appliedPolicy
.d_kind
) {
896 case DNSFilterEngine::PolicyKind::NoAction
:
898 case DNSFilterEngine::PolicyKind::Drop
:
899 g_stats
.policyDrops
++;
900 g_stats
.policyResults
[appliedPolicy
.d_kind
]++;
904 case DNSFilterEngine::PolicyKind::NXDOMAIN
:
905 g_stats
.policyResults
[appliedPolicy
.d_kind
]++;
908 case DNSFilterEngine::PolicyKind::NODATA
:
909 g_stats
.policyResults
[appliedPolicy
.d_kind
]++;
912 case DNSFilterEngine::PolicyKind::Custom
:
913 g_stats
.policyResults
[appliedPolicy
.d_kind
]++;
915 spoofed
=appliedPolicy
.getCustomRecord(dc
->d_mdp
.d_qname
);
916 ret
.push_back(spoofed
);
917 handleRPZCustom(spoofed
, QType(dc
->d_mdp
.d_qtype
), sr
, res
, ret
);
919 case DNSFilterEngine::PolicyKind::Truncate
:
921 g_stats
.policyResults
[appliedPolicy
.d_kind
]++;
923 pw
.getHeader()->tc
=1;
930 // Query got not handled for QNAME Policy reasons, now actually go out to find an answer
932 res
= sr
.beginResolve(dc
->d_mdp
.d_qname
, QType(dc
->d_mdp
.d_qtype
), dc
->d_mdp
.d_qclass
, ret
);
933 shouldNotValidate
= sr
.wasOutOfBand();
935 catch(ImmediateServFailException
&e
) {
936 if(g_logCommonErrors
)
937 L
<<Logger::Notice
<<"Sending SERVFAIL to "<<dc
->getRemote()<<" during resolve of '"<<dc
->d_mdp
.d_qname
<<"' because: "<<e
.reason
<<endl
;
938 res
= RCode::ServFail
;
941 dq
.validationState
= sr
.getValidationState();
943 // During lookup, an NSDNAME or NSIP trigger was hit in RPZ
944 if (res
== -2) { // XXX This block should be macro'd, it is repeated post-resolve.
945 appliedPolicy
= sr
.d_appliedPolicy
;
946 g_stats
.policyResults
[appliedPolicy
.d_kind
]++;
947 switch(appliedPolicy
.d_kind
) {
948 case DNSFilterEngine::PolicyKind::NoAction
: // This can never happen
949 throw PDNSException("NoAction policy returned while a NSDNAME or NSIP trigger was hit");
950 case DNSFilterEngine::PolicyKind::Drop
:
951 g_stats
.policyDrops
++;
955 case DNSFilterEngine::PolicyKind::NXDOMAIN
:
960 case DNSFilterEngine::PolicyKind::NODATA
:
965 case DNSFilterEngine::PolicyKind::Truncate
:
969 pw
.getHeader()->tc
=1;
974 case DNSFilterEngine::PolicyKind::Custom
:
977 spoofed
=appliedPolicy
.getCustomRecord(dc
->d_mdp
.d_qname
);
978 ret
.push_back(spoofed
);
979 handleRPZCustom(spoofed
, QType(dc
->d_mdp
.d_qtype
), sr
, res
, ret
);
985 appliedPolicy
= luaconfsLocal
->dfe
.getPostPolicy(ret
, sr
.d_discardedPolicies
);
989 if(res
== RCode::NoError
) {
991 for(; i
!= ret
.cend(); ++i
)
992 if(i
->d_type
== dc
->d_mdp
.d_qtype
&& i
->d_place
== DNSResourceRecord::ANSWER
)
994 if(i
== ret
.cend() && t_pdl
->nodata(dq
, res
))
995 shouldNotValidate
= true;
998 else if(res
== RCode::NXDomain
&& t_pdl
->nxdomain(dq
, res
))
999 shouldNotValidate
= true;
1001 if(t_pdl
->postresolve(dq
, res
))
1002 shouldNotValidate
= true;
1005 if (wantsRPZ
) { //XXX This block is repeated, see above
1006 g_stats
.policyResults
[appliedPolicy
.d_kind
]++;
1007 switch(appliedPolicy
.d_kind
) {
1008 case DNSFilterEngine::PolicyKind::NoAction
:
1010 case DNSFilterEngine::PolicyKind::Drop
:
1011 g_stats
.policyDrops
++;
1015 case DNSFilterEngine::PolicyKind::NXDOMAIN
:
1017 res
=RCode::NXDomain
;
1020 case DNSFilterEngine::PolicyKind::NODATA
:
1025 case DNSFilterEngine::PolicyKind::Truncate
:
1029 pw
.getHeader()->tc
=1;
1034 case DNSFilterEngine::PolicyKind::Custom
:
1037 spoofed
=appliedPolicy
.getCustomRecord(dc
->d_mdp
.d_qname
);
1038 ret
.push_back(spoofed
);
1039 handleRPZCustom(spoofed
, QType(dc
->d_mdp
.d_qtype
), sr
, res
, ret
);
1045 if(res
== PolicyDecision::DROP
) {
1046 g_stats
.policyDrops
++;
1051 if(tracedQuery
|| res
== -1 || res
== RCode::ServFail
|| pw
.getHeader()->rcode
== RCode::ServFail
)
1053 string
trace(sr
.getTrace());
1054 if(!trace
.empty()) {
1055 vector
<string
> lines
;
1056 boost::split(lines
, trace
, boost::is_any_of("\n"));
1057 for(const string
& line
: lines
) {
1059 L
<<Logger::Warning
<< line
<< endl
;
1065 pw
.getHeader()->rcode
=RCode::ServFail
;
1066 // no commit here, because no record
1067 g_stats
.servFails
++;
1070 pw
.getHeader()->rcode
=res
;
1072 // Does the validation mode or query demand validation?
1073 if(!shouldNotValidate
&& sr
.isDNSSECValidationRequested()) {
1076 L
<<Logger::Warning
<<"Starting validation of answer to "<<dc
->d_mdp
.d_qname
<<"|"<<QType(dc
->d_mdp
.d_qtype
).getName()<<" for "<<dc
->d_remote
.toStringWithPort()<<endl
;
1079 auto state
= sr
.getValidationState();
1081 if(state
== Secure
) {
1083 L
<<Logger::Warning
<<"Answer to "<<dc
->d_mdp
.d_qname
<<"|"<<QType(dc
->d_mdp
.d_qtype
).getName()<<" for "<<dc
->d_remote
.toStringWithPort()<<" validates correctly"<<endl
;
1086 // Is the query source interested in the value of the ad-bit?
1087 if (dc
->d_mdp
.d_header
.ad
|| DNSSECOK
)
1088 pw
.getHeader()->ad
=1;
1090 else if(state
== Insecure
) {
1092 L
<<Logger::Warning
<<"Answer to "<<dc
->d_mdp
.d_qname
<<"|"<<QType(dc
->d_mdp
.d_qtype
).getName()<<" for "<<dc
->d_remote
.toStringWithPort()<<" validates as Insecure"<<endl
;
1095 pw
.getHeader()->ad
=0;
1097 else if(state
== Bogus
) {
1098 if(g_dnssecLogBogus
|| sr
.doLog() || g_dnssecmode
== DNSSECMode::ValidateForLog
) {
1099 L
<<Logger::Warning
<<"Answer to "<<dc
->d_mdp
.d_qname
<<"|"<<QType(dc
->d_mdp
.d_qtype
).getName()<<" for "<<dc
->d_remote
.toStringWithPort()<<" validates as Bogus"<<endl
;
1102 // Does the query or validation mode sending out a SERVFAIL on validation errors?
1103 if(!pw
.getHeader()->cd
&& (g_dnssecmode
== DNSSECMode::ValidateAll
|| dc
->d_mdp
.d_header
.ad
|| DNSSECOK
)) {
1105 L
<<Logger::Warning
<<"Sending out SERVFAIL for "<<dc
->d_mdp
.d_qname
<<"|"<<QType(dc
->d_mdp
.d_qtype
).getName()<<" because recursor or query demands it for Bogus results"<<endl
;
1108 pw
.getHeader()->rcode
=RCode::ServFail
;
1112 L
<<Logger::Warning
<<"Not sending out SERVFAIL for "<<dc
->d_mdp
.d_qname
<<"|"<<QType(dc
->d_mdp
.d_qtype
).getName()<<" Bogus validation since neither config nor query demands this"<<endl
;
1117 catch(ImmediateServFailException
&e
) {
1118 if(g_logCommonErrors
)
1119 L
<<Logger::Notice
<<"Sending SERVFAIL to "<<dc
->getRemote()<<" during validation of '"<<dc
->d_mdp
.d_qname
<<"|"<<QType(dc
->d_mdp
.d_qtype
).getName()<<"' because: "<<e
.reason
<<endl
;
1120 pw
.getHeader()->rcode
=RCode::ServFail
;
1126 orderAndShuffle(ret
);
1127 if(auto sl
= luaconfsLocal
->sortlist
.getOrderCmp(dc
->d_remote
)) {
1128 stable_sort(ret
.begin(), ret
.end(), *sl
);
1129 variableAnswer
=true;
1133 bool needCommit
= false;
1134 for(auto i
=ret
.cbegin(); i
!=ret
.cend(); ++i
) {
1136 ( i
->d_type
== QType::NSEC3
||
1138 ( i
->d_type
== QType::RRSIG
|| i
->d_type
==QType::NSEC
) &&
1140 ( dc
->d_mdp
.d_qtype
!= i
->d_type
&& dc
->d_mdp
.d_qtype
!= QType::ANY
) ||
1141 i
->d_place
!= DNSResourceRecord::ANSWER
1149 if (!addRecordToPacket(pw
, *i
, minTTL
, dc
->d_ttlCap
, maxanswersize
)) {
1155 #ifdef HAVE_PROTOBUF
1156 if(luaconfsLocal
->protobufServer
&& (i
->d_type
== QType::A
|| i
->d_type
== QType::AAAA
|| i
->d_type
== QType::CNAME
)) {
1157 pbMessage
.addRR(*i
);
1167 /* we try to add the EDNS OPT RR even for truncated answers,
1169 "The minimal response MUST be the DNS header, question section, and an
1170 OPT record. This MUST also occur when a truncated response (using
1171 the DNS header's TC bit) is returned."
1173 if (addRecordToPacket(pw
, makeOpt(edo
.d_packetsize
, 0, edo
.d_Z
), minTTL
, dc
->d_ttlCap
, maxanswersize
)) {
1178 g_rs
.submitResponse(dc
->d_mdp
.d_qtype
, packet
.size(), !dc
->d_tcp
);
1179 updateResponseStats(res
, dc
->d_remote
, packet
.size(), &dc
->d_mdp
.d_qname
, dc
->d_mdp
.d_qtype
);
1180 #ifdef HAVE_PROTOBUF
1181 if (luaconfsLocal
->protobufServer
&& (!luaconfsLocal
->protobufTaggedOnly
|| (appliedPolicy
.d_name
&& !appliedPolicy
.d_name
->empty()) || !dc
->d_policyTags
.empty())) {
1182 pbMessage
.setBytes(packet
.size());
1183 pbMessage
.setResponseCode(pw
.getHeader()->rcode
);
1184 if (appliedPolicy
.d_name
) {
1185 pbMessage
.setAppliedPolicy(*appliedPolicy
.d_name
);
1186 pbMessage
.setAppliedPolicyType(appliedPolicy
.d_type
);
1188 pbMessage
.setPolicyTags(dc
->d_policyTags
);
1189 pbMessage
.setQueryTime(dc
->d_now
.tv_sec
, dc
->d_now
.tv_usec
);
1190 pbMessage
.setRequestorId(dq
.requestorId
);
1191 pbMessage
.setDeviceId(dq
.deviceId
);
1192 protobufLogResponse(luaconfsLocal
->protobufServer
, pbMessage
);
1199 fillMSGHdr(&msgh
, &iov
, cbuf
, 0, (char*)&*packet
.begin(), packet
.size(), &dc
->d_remote
);
1200 msgh
.msg_control
=NULL
;
1202 if(g_fromtosockets
.count(dc
->d_socket
)) {
1203 addCMsgSrcAddr(&msgh
, cbuf
, &dc
->d_local
, 0);
1205 if(sendmsg(dc
->d_socket
, &msgh
, 0) < 0 && g_logCommonErrors
)
1206 L
<<Logger::Warning
<<"Sending UDP reply to client "<<dc
->d_remote
.toStringWithPort()<<" failed with: "<<strerror(errno
)<<endl
;
1208 if(!SyncRes::s_nopacketcache
&& !variableAnswer
&& !sr
.wasVariable() ) {
1209 t_packetCache
->insertResponsePacket(dc
->d_tag
, dc
->d_qhash
, dc
->d_mdp
.d_qname
, dc
->d_mdp
.d_qtype
, dc
->d_mdp
.d_qclass
,
1210 string((const char*)&*packet
.begin(), packet
.size()),
1212 pw
.getHeader()->rcode
== RCode::ServFail
? SyncRes::s_packetcacheservfailttl
:
1213 min(minTTL
,SyncRes::s_packetcachettl
),
1216 // else cerr<<"Not putting in packet cache: "<<sr.wasVariable()<<endl;
1220 buf
[0]=packet
.size()/256;
1221 buf
[1]=packet
.size()%256;
1223 Utility::iovec iov
[2];
1225 iov
[0].iov_base
=(void*)buf
; iov
[0].iov_len
=2;
1226 iov
[1].iov_base
=(void*)&*packet
.begin(); iov
[1].iov_len
= packet
.size();
1228 int wret
=Utility::writev(dc
->d_socket
, iov
, 2);
1232 L
<<Logger::Error
<<"EOF writing TCP answer to "<<dc
->getRemote()<<endl
;
1234 L
<<Logger::Error
<<"Error writing TCP answer to "<<dc
->getRemote()<<": "<< strerror(errno
) <<endl
;
1235 else if((unsigned int)wret
!= 2 + packet
.size())
1236 L
<<Logger::Error
<<"Oops, partial answer sent to "<<dc
->getRemote()<<" for "<<dc
->d_mdp
.d_qname
<<" (size="<< (2 + packet
.size()) <<", sent "<<wret
<<")"<<endl
;
1240 // update tcp connection status, either by closing or moving to 'BYTE0'
1243 // no need to remove us from FDM, we weren't there
1247 dc
->d_tcpConnection
->queriesCount
++;
1248 if (g_tcpMaxQueriesPerConn
&& dc
->d_tcpConnection
->queriesCount
>= g_tcpMaxQueriesPerConn
) {
1252 dc
->d_tcpConnection
->state
=TCPConnection::BYTE0
;
1253 Utility::gettimeofday(&g_now
, 0); // needs to be updated
1254 t_fdm
->addReadFD(dc
->d_socket
, handleRunningTCPQuestion
, dc
->d_tcpConnection
);
1255 t_fdm
->setReadTTD(dc
->d_socket
, g_now
, g_tcpTimeout
);
1259 float spent
=makeFloat(sr
.getNow()-dc
->d_now
);
1261 L
<<Logger::Error
<<t_id
<<" ["<<MT
->getTid()<<"/"<<MT
->numProcesses()<<"] answer to "<<(dc
->d_mdp
.d_header
.rd
?"":"non-rd ")<<"question '"<<dc
->d_mdp
.d_qname
<<"|"<<DNSRecordContent::NumberToType(dc
->d_mdp
.d_qtype
);
1262 L
<<"': "<<ntohs(pw
.getHeader()->ancount
)<<" answers, "<<ntohs(pw
.getHeader()->arcount
)<<" additional, took "<<sr
.d_outqueries
<<" packets, "<<
1263 sr
.d_totUsec
/1000.0<<" netw ms, "<< spent
*1000.0<<" tot ms, "<<
1264 sr
.d_throttledqueries
<<" throttled, "<<sr
.d_timeouts
<<" timeouts, "<<sr
.d_tcpoutqueries
<<" tcp connections, rcode="<< res
;
1266 if(!shouldNotValidate
&& sr
.isDNSSECValidationRequested()) {
1267 L
<< ", dnssec="<<vStates
[sr
.getValidationState()];
1274 if (sr
.d_outqueries
|| sr
.d_authzonequeries
) {
1275 t_RC
->cacheMisses
++;
1282 g_stats
.answers0_1
++;
1283 else if(spent
< 0.010)
1284 g_stats
.answers1_10
++;
1285 else if(spent
< 0.1)
1286 g_stats
.answers10_100
++;
1287 else if(spent
< 1.0)
1288 g_stats
.answers100_1000
++;
1290 g_stats
.answersSlow
++;
1292 uint64_t newLat
=(uint64_t)(spent
*1000000);
1293 newLat
= min(newLat
,(uint64_t)(((uint64_t) g_networkTimeoutMsec
)*1000)); // outliers of several minutes exist..
1294 g_stats
.avgLatencyUsec
=(1-1.0/g_latencyStatSize
)*g_stats
.avgLatencyUsec
+ (float)newLat
/g_latencyStatSize
;
1295 // no worries, we do this for packet cache hits elsewhere
1297 auto ourtime
= 1000.0*spent
-sr
.d_totUsec
/1000.0; // in msec
1299 g_stats
.ourtime0_1
++;
1300 else if(ourtime
< 2)
1301 g_stats
.ourtime1_2
++;
1302 else if(ourtime
< 4)
1303 g_stats
.ourtime2_4
++;
1304 else if(ourtime
< 8)
1305 g_stats
.ourtime4_8
++;
1306 else if(ourtime
< 16)
1307 g_stats
.ourtime8_16
++;
1308 else if(ourtime
< 32)
1309 g_stats
.ourtime16_32
++;
1311 // cerr<<"SLOW: "<<ourtime<<"ms -> "<<dc->d_mdp.d_qname<<"|"<<DNSRecordContent::NumberToType(dc->d_mdp.d_qtype)<<endl;
1312 g_stats
.ourtimeSlow
++;
1314 if(ourtime
>= 0.0) {
1315 newLat
=ourtime
*1000; // usec
1316 g_stats
.avgLatencyOursUsec
=(1-1.0/g_latencyStatSize
)*g_stats
.avgLatencyOursUsec
+ (float)newLat
/g_latencyStatSize
;
1318 // cout<<dc->d_mdp.d_qname<<"\t"<<MT->getUsec()<<"\t"<<sr.d_outqueries<<endl;
1322 catch(PDNSException
&ae
) {
1323 L
<<Logger::Error
<<"startDoResolve problem "<<makeLoginfo(dc
)<<": "<<ae
.reason
<<endl
;
1326 catch(MOADNSException
& e
) {
1327 L
<<Logger::Error
<<"DNS parser error "<<makeLoginfo(dc
) <<": "<<dc
->d_mdp
.d_qname
<<", "<<e
.what()<<endl
;
1330 catch(std::exception
& e
) {
1331 L
<<Logger::Error
<<"STL error "<< makeLoginfo(dc
)<<": "<<e
.what();
1333 // Luawrapper nests the exception from Lua, so we unnest it here
1335 std::rethrow_if_nested(e
);
1336 } catch(const std::exception
& ne
) {
1337 L
<<". Extra info: "<<ne
.what();
1344 L
<<Logger::Error
<<"Any other exception in a resolver context "<< makeLoginfo(dc
) <<endl
;
1347 g_stats
.maxMThreadStackUsage
= max(MT
->getMaxStackUsage(), g_stats
.maxMThreadStackUsage
);
1350 static void makeControlChannelSocket(int processNum
=-1)
1352 string sockname
=::arg()["socket-dir"]+"/"+s_programname
;
1354 sockname
+= "."+std::to_string(processNum
);
1355 sockname
+=".controlsocket";
1356 s_rcc
.listen(sockname
);
1361 if (!::arg().isEmpty("socket-group"))
1362 sockgroup
=::arg().asGid("socket-group");
1363 if (!::arg().isEmpty("socket-owner"))
1364 sockowner
=::arg().asUid("socket-owner");
1366 if (sockgroup
> -1 || sockowner
> -1) {
1367 if(chown(sockname
.c_str(), sockowner
, sockgroup
) < 0) {
1368 unixDie("Failed to chown control socket");
1372 // do mode change if socket-mode is given
1373 if(!::arg().isEmpty("socket-mode")) {
1374 mode_t sockmode
=::arg().asMode("socket-mode");
1375 if(chmod(sockname
.c_str(), sockmode
) < 0) {
1376 unixDie("Failed to chmod control socket");
1381 static bool getQNameAndSubnet(const std::string
& question
, DNSName
* dnsname
, uint16_t* qtype
, uint16_t* qclass
, EDNSSubnetOpts
* ednssubnet
, std::map
<uint16_t, EDNSOptionView
>* options
)
1384 const struct dnsheader
* dh
= (struct dnsheader
*)question
.c_str();
1385 size_t questionLen
= question
.length();
1386 unsigned int consumed
=0;
1387 *dnsname
=DNSName(question
.c_str(), questionLen
, sizeof(dnsheader
), false, qtype
, qclass
, &consumed
);
1389 size_t pos
= sizeof(dnsheader
)+consumed
+4;
1390 /* at least OPT root label (1), type (2), class (2) and ttl (4) + OPT RR rdlen (2)
1392 if(ntohs(dh
->arcount
) == 1 && questionLen
> pos
+ 11) { // this code can extract one (1) EDNS Subnet option
1393 /* OPT root label (1) followed by type (2) */
1394 if(question
.at(pos
)==0 && question
.at(pos
+1)==0 && question
.at(pos
+2)==QType::OPT
) {
1396 char* ecsStart
= nullptr;
1398 int res
= getEDNSOption((char*)question
.c_str()+pos
+9, questionLen
- pos
- 9, EDNSOptionCode::ECS
, &ecsStart
, &ecsLen
);
1399 if (res
== 0 && ecsLen
> 4) {
1401 if(getEDNSSubnetOptsFromString(ecsStart
+ 4, ecsLen
- 4, &eso
)) {
1408 int res
= getEDNSOptions((char*)question
.c_str()+pos
+9, questionLen
- pos
- 9, *options
);
1410 const auto& it
= options
->find(EDNSOptionCode::ECS
);
1411 if (it
!= options
->end() && it
->second
.content
!= nullptr && it
->second
.size
> 0) {
1413 if(getEDNSSubnetOptsFromString(it
->second
.content
, it
->second
.size
, &eso
)) {
1425 static void handleRunningTCPQuestion(int fd
, FDMultiplexer::funcparam_t
& var
)
1427 shared_ptr
<TCPConnection
> conn
=any_cast
<shared_ptr
<TCPConnection
> >(var
);
1429 if(conn
->state
==TCPConnection::BYTE0
) {
1430 ssize_t bytes
=recv(conn
->getFD(), conn
->data
, 2, 0);
1432 conn
->state
=TCPConnection::BYTE1
;
1434 conn
->qlen
=(((unsigned char)conn
->data
[0]) << 8)+ (unsigned char)conn
->data
[1];
1436 conn
->state
=TCPConnection::GETQUESTION
;
1438 if(!bytes
|| bytes
< 0) {
1439 t_fdm
->removeReadFD(fd
);
1443 else if(conn
->state
==TCPConnection::BYTE1
) {
1444 ssize_t bytes
=recv(conn
->getFD(), conn
->data
+1, 1, 0);
1446 conn
->state
=TCPConnection::GETQUESTION
;
1447 conn
->qlen
=(((unsigned char)conn
->data
[0]) << 8)+ (unsigned char)conn
->data
[1];
1450 if(!bytes
|| bytes
< 0) {
1451 if(g_logCommonErrors
)
1452 L
<<Logger::Error
<<"TCP client "<< conn
->d_remote
.toString() <<" disconnected after first byte"<<endl
;
1453 t_fdm
->removeReadFD(fd
);
1457 else if(conn
->state
==TCPConnection::GETQUESTION
) {
1458 ssize_t bytes
=recv(conn
->getFD(), conn
->data
+ conn
->bytesread
, conn
->qlen
- conn
->bytesread
, 0);
1459 if(!bytes
|| bytes
< 0 || bytes
> std::numeric_limits
<std::uint16_t>::max()) {
1460 L
<<Logger::Error
<<"TCP client "<< conn
->d_remote
.toString() <<" disconnected while reading question body"<<endl
;
1461 t_fdm
->removeReadFD(fd
);
1464 conn
->bytesread
+=(uint16_t)bytes
;
1465 if(conn
->bytesread
==conn
->qlen
) {
1466 t_fdm
->removeReadFD(fd
); // should no longer awake ourselves when there is data to read
1468 DNSComboWriter
* dc
=nullptr;
1470 dc
=new DNSComboWriter(conn
->data
, conn
->qlen
, g_now
);
1472 catch(MOADNSException
&mde
) {
1473 g_stats
.clientParseError
++;
1474 if(g_logCommonErrors
)
1475 L
<<Logger::Error
<<"Unable to parse packet from TCP client "<< conn
->d_remote
.toString() <<endl
;
1478 dc
->d_tcpConnection
= conn
; // carry the torch
1479 dc
->setSocket(conn
->getFD()); // this is the only time a copy is made of the actual fd
1481 dc
->setRemote(&conn
->d_remote
);
1483 memset(&dest
, 0, sizeof(dest
));
1484 dest
.sin4
.sin_family
= conn
->d_remote
.sin4
.sin_family
;
1485 socklen_t len
= dest
.getSocklen();
1486 getsockname(conn
->getFD(), (sockaddr
*)&dest
, &len
); // if this fails, we're ok with it
1491 bool needECS
= false;
1494 #ifdef HAVE_PROTOBUF
1495 auto luaconfsLocal
= g_luaconfs
.getLocal();
1496 if (luaconfsLocal
->protobufServer
) {
1501 if(needECS
|| (t_pdl
&& (t_pdl
->d_gettag_ffi
|| t_pdl
->d_gettag
))) {
1504 std::map
<uint16_t, EDNSOptionView
> ednsOptions
;
1505 dc
->d_ecsParsed
= true;
1506 dc
->d_ecsFound
= getQNameAndSubnet(std::string(conn
->data
, conn
->qlen
), &qname
, &qtype
, &qclass
, &dc
->d_ednssubnet
, g_gettagNeedsEDNSOptions
? &ednsOptions
: nullptr);
1510 if (t_pdl
->d_gettag_ffi
) {
1511 dc
->d_tag
= t_pdl
->gettag_ffi(conn
->d_remote
, dc
->d_ednssubnet
.source
, dest
, qname
, qtype
, &dc
->d_policyTags
, dc
->d_data
, ednsOptions
, true, requestorId
, deviceId
, dc
->d_ttlCap
, dc
->d_variable
);
1513 else if (t_pdl
->d_gettag
) {
1514 dc
->d_tag
= t_pdl
->gettag(conn
->d_remote
, dc
->d_ednssubnet
.source
, dest
, qname
, qtype
, &dc
->d_policyTags
, dc
->d_data
, ednsOptions
, true, requestorId
, deviceId
);
1517 catch(const std::exception
& e
) {
1518 if(g_logCommonErrors
)
1519 L
<<Logger::Warning
<<"Error parsing a query packet qname='"<<qname
<<"' for tag determination, setting tag=0: "<<e
.what()<<endl
;
1523 catch(const std::exception
& e
)
1525 if(g_logCommonErrors
)
1526 L
<<Logger::Warning
<<"Error parsing a query packet for tag determination, setting tag=0: "<<e
.what()<<endl
;
1529 #ifdef HAVE_PROTOBUF
1530 if(luaconfsLocal
->protobufServer
|| luaconfsLocal
->outgoingProtobufServer
) {
1531 dc
->d_requestorId
= requestorId
;
1532 dc
->d_deviceId
= deviceId
;
1533 dc
->d_uuid
= (*t_uuidGenerator
)();
1536 if(luaconfsLocal
->protobufServer
) {
1538 const struct dnsheader
* dh
= (const struct dnsheader
*) conn
->data
;
1540 if (!luaconfsLocal
->protobufTaggedOnly
) {
1541 protobufLogQuery(luaconfsLocal
->protobufServer
, luaconfsLocal
->protobufMaskV4
, luaconfsLocal
->protobufMaskV6
, dc
->d_uuid
, conn
->d_remote
, dest
, dc
->d_ednssubnet
.source
, true, dh
->id
, conn
->qlen
, qname
, qtype
, qclass
, dc
->d_policyTags
, dc
->d_requestorId
, dc
->d_deviceId
);
1544 catch(std::exception
& e
) {
1545 if(g_logCommonErrors
)
1546 L
<<Logger::Warning
<<"Error parsing a TCP query packet for edns subnet: "<<e
.what()<<endl
;
1550 if(dc
->d_mdp
.d_header
.qr
) {
1552 g_stats
.ignoredCount
++;
1553 L
<<Logger::Error
<<"Ignoring answer from TCP client "<< conn
->d_remote
.toString() <<" on server socket!"<<endl
;
1556 if(dc
->d_mdp
.d_header
.opcode
) {
1558 g_stats
.ignoredCount
++;
1559 L
<<Logger::Error
<<"Ignoring non-query opcode from TCP client "<< conn
->d_remote
.toString() <<" on server socket!"<<endl
;
1564 ++g_stats
.tcpqcounter
;
1565 MT
->makeThread(startDoResolve
, dc
); // deletes dc, will set state to BYTE0 again
1572 //! Handle new incoming TCP connection
1573 static void handleNewTCPQuestion(int fd
, FDMultiplexer::funcparam_t
& )
1576 socklen_t addrlen
=sizeof(addr
);
1577 int newsock
=accept(fd
, (struct sockaddr
*)&addr
, &addrlen
);
1579 if(MT
->numProcesses() > g_maxMThreads
) {
1580 g_stats
.overCapacityDrops
++;
1582 closesocket(newsock
);
1584 catch(const PDNSException
& e
) {
1585 L
<<Logger::Error
<<"Error closing TCP socket after an over capacity drop: "<<e
.reason
<<endl
;
1591 t_remotes
->push_back(addr
);
1592 if(t_allowFrom
&& !t_allowFrom
->match(&addr
)) {
1594 L
<<Logger::Error
<<"["<<MT
->getTid()<<"] dropping TCP query from "<<addr
.toString()<<", address not matched by allow-from"<<endl
;
1596 g_stats
.unauthorizedTCP
++;
1598 closesocket(newsock
);
1600 catch(const PDNSException
& e
) {
1601 L
<<Logger::Error
<<"Error closing TCP socket after an ACL drop: "<<e
.reason
<<endl
;
1605 if(g_maxTCPPerClient
&& t_tcpClientCounts
->count(addr
) && (*t_tcpClientCounts
)[addr
] >= g_maxTCPPerClient
) {
1606 g_stats
.tcpClientOverflow
++;
1608 closesocket(newsock
); // don't call TCPConnection::closeAndCleanup here - did not enter it in the counts yet!
1610 catch(const PDNSException
& e
) {
1611 L
<<Logger::Error
<<"Error closing TCP socket after an overflow drop: "<<e
.reason
<<endl
;
1616 setNonBlocking(newsock
);
1617 std::shared_ptr
<TCPConnection
> tc
= std::make_shared
<TCPConnection
>(newsock
, addr
);
1618 tc
->state
=TCPConnection::BYTE0
;
1620 t_fdm
->addReadFD(tc
->getFD(), handleRunningTCPQuestion
, tc
);
1623 Utility::gettimeofday(&now
, 0);
1624 t_fdm
->setReadTTD(tc
->getFD(), now
, g_tcpTimeout
);
1628 static string
* doProcessUDPQuestion(const std::string
& question
, const ComboAddress
& fromaddr
, const ComboAddress
& destaddr
, struct timeval tv
, int fd
)
1630 gettimeofday(&g_now
, 0);
1631 struct timeval diff
= g_now
- tv
;
1632 double delta
=(diff
.tv_sec
*1000 + diff
.tv_usec
/1000.0);
1634 if(tv
.tv_sec
&& delta
> 1000.0) {
1635 g_stats
.tooOldDrops
++;
1640 if(fromaddr
.sin4
.sin_family
==AF_INET6
)
1641 g_stats
.ipv6qcounter
++;
1644 const struct dnsheader
* dh
= (struct dnsheader
*)question
.c_str();
1645 unsigned int ctag
=0;
1647 bool needECS
= false;
1648 std::vector
<std::string
> policyTags
;
1649 LuaContext::LuaObject data
;
1652 #ifdef HAVE_PROTOBUF
1653 boost::uuids::uuid uniqueId
;
1654 auto luaconfsLocal
= g_luaconfs
.getLocal();
1655 if (luaconfsLocal
->protobufServer
) {
1656 uniqueId
= (*t_uuidGenerator
)();
1658 } else if (luaconfsLocal
->outgoingProtobufServer
) {
1659 uniqueId
= (*t_uuidGenerator
)();
1662 EDNSSubnetOpts ednssubnet
;
1663 bool ecsFound
= false;
1664 bool ecsParsed
= false;
1665 uint32_t ttlCap
= std::numeric_limits
<uint32_t>::max();
1666 bool variable
= false;
1672 bool qnameParsed
=false;
1675 static uint64_t last=0;
1677 g_mtracer->clearAllocators();
1678 cout<<g_mtracer->getAllocs()-last<<" "<<g_mtracer->getNumOut()<<" -- BEGIN TRACE"<<endl;
1679 last=g_mtracer->getAllocs();
1680 cout<<g_mtracer->topAllocatorsString()<<endl;
1681 g_mtracer->clearAllocators();
1685 if(needECS
|| (t_pdl
&& (t_pdl
->d_gettag
|| t_pdl
->d_gettag_ffi
))) {
1687 std::map
<uint16_t, EDNSOptionView
> ednsOptions
;
1688 ecsFound
= getQNameAndSubnet(question
, &qname
, &qtype
, &qclass
, &ednssubnet
, g_gettagNeedsEDNSOptions
? &ednsOptions
: nullptr);
1694 if (t_pdl
->d_gettag_ffi
) {
1695 ctag
= t_pdl
->gettag_ffi(fromaddr
, ednssubnet
.source
, destaddr
, qname
, qtype
, &policyTags
, data
, ednsOptions
, false, requestorId
, deviceId
, ttlCap
, variable
);
1697 else if (t_pdl
->d_gettag
) {
1698 ctag
=t_pdl
->gettag(fromaddr
, ednssubnet
.source
, destaddr
, qname
, qtype
, &policyTags
, data
, ednsOptions
, false, requestorId
, deviceId
);
1701 catch(const std::exception
& e
) {
1702 if(g_logCommonErrors
)
1703 L
<<Logger::Warning
<<"Error parsing a query packet qname='"<<qname
<<"' for tag determination, setting tag=0: "<<e
.what()<<endl
;
1707 catch(const std::exception
& e
)
1709 if(g_logCommonErrors
)
1710 L
<<Logger::Warning
<<"Error parsing a query packet for tag determination, setting tag=0: "<<e
.what()<<endl
;
1714 bool cacheHit
= false;
1715 RecProtoBufMessage
pbMessage(DNSProtoBufMessage::DNSProtoBufMessageType::Response
);
1716 #ifdef HAVE_PROTOBUF
1717 if(luaconfsLocal
->protobufServer
) {
1718 if (!luaconfsLocal
->protobufTaggedOnly
|| !policyTags
.empty()) {
1719 protobufLogQuery(luaconfsLocal
->protobufServer
, luaconfsLocal
->protobufMaskV4
, luaconfsLocal
->protobufMaskV6
, uniqueId
, fromaddr
, destaddr
, ednssubnet
.source
, false, dh
->id
, question
.size(), qname
, qtype
, qclass
, policyTags
, requestorId
, deviceId
);
1722 #endif /* HAVE_PROTOBUF */
1724 /* It might seem like a good idea to skip the packet cache lookup if we know that the answer is not cacheable,
1725 but it means that the hash would not be computed. If some script decides at a later time to mark back the answer
1726 as cacheable we would cache it with a wrong tag, so better safe than sorry. */
1728 cacheHit
= (!SyncRes::s_nopacketcache
&& t_packetCache
->getResponsePacket(ctag
, question
, qname
, qtype
, qclass
, g_now
.tv_sec
, &response
, &age
, &qhash
, &pbMessage
));
1731 cacheHit
= (!SyncRes::s_nopacketcache
&& t_packetCache
->getResponsePacket(ctag
, question
, g_now
.tv_sec
, &response
, &age
, &qhash
, &pbMessage
));
1735 #ifdef HAVE_PROTOBUF
1736 if(luaconfsLocal
->protobufServer
&& (!luaconfsLocal
->protobufTaggedOnly
|| !pbMessage
.getAppliedPolicy().empty() || !pbMessage
.getPolicyTags().empty())) {
1737 Netmask
requestorNM(fromaddr
, fromaddr
.sin4
.sin_family
== AF_INET
? luaconfsLocal
->protobufMaskV4
: luaconfsLocal
->protobufMaskV6
);
1738 const ComboAddress
& requestor
= requestorNM
.getMaskedNetwork();
1739 pbMessage
.update(uniqueId
, &requestor
, &destaddr
, false, dh
->id
);
1740 pbMessage
.setEDNSSubnet(ednssubnet
.source
, ednssubnet
.source
.isIpv4() ? luaconfsLocal
->protobufMaskV4
: luaconfsLocal
->protobufMaskV6
);
1741 pbMessage
.setQueryTime(g_now
.tv_sec
, g_now
.tv_usec
);
1742 pbMessage
.setRequestorId(requestorId
);
1743 pbMessage
.setDeviceId(deviceId
);
1744 protobufLogResponse(luaconfsLocal
->protobufServer
, pbMessage
);
1746 #endif /* HAVE_PROTOBUF */
1748 L
<<Logger::Notice
<<t_id
<< " question answered from packet cache tag="<<ctag
<<" from "<<fromaddr
.toString()<<endl
;
1750 g_stats
.packetCacheHits
++;
1751 SyncRes::s_queries
++;
1752 ageDNSPacket(response
, age
);
1756 fillMSGHdr(&msgh
, &iov
, cbuf
, 0, (char*)response
.c_str(), response
.length(), const_cast<ComboAddress
*>(&fromaddr
));
1757 msgh
.msg_control
=NULL
;
1759 if(g_fromtosockets
.count(fd
)) {
1760 addCMsgSrcAddr(&msgh
, cbuf
, &destaddr
, 0);
1762 if(sendmsg(fd
, &msgh
, 0) < 0 && g_logCommonErrors
)
1763 L
<<Logger::Warning
<<"Sending UDP reply to client "<<fromaddr
.toStringWithPort()<<" failed with: "<<strerror(errno
)<<endl
;
1765 if(response
.length() >= sizeof(struct dnsheader
)) {
1766 struct dnsheader tmpdh
;
1767 memcpy(&tmpdh
, response
.c_str(), sizeof(tmpdh
));
1768 updateResponseStats(tmpdh
.rcode
, fromaddr
, response
.length(), 0, 0);
1770 g_stats
.avgLatencyUsec
=(1-1.0/g_latencyStatSize
)*g_stats
.avgLatencyUsec
+ 0.0; // we assume 0 usec
1771 g_stats
.avgLatencyOursUsec
=(1-1.0/g_latencyStatSize
)*g_stats
.avgLatencyOursUsec
+ 0.0; // we assume 0 usec
1775 catch(std::exception
& e
) {
1776 L
<<Logger::Error
<<"Error processing or aging answer packet: "<<e
.what()<<endl
;
1781 if(t_pdl
->ipfilter(fromaddr
, destaddr
, *dh
)) {
1783 L
<<Logger::Notice
<<t_id
<<" ["<<MT
->getTid()<<"/"<<MT
->numProcesses()<<"] DROPPED question from "<<fromaddr
.toStringWithPort()<<" based on policy"<<endl
;
1784 g_stats
.policyDrops
++;
1789 if(MT
->numProcesses() > g_maxMThreads
) {
1791 L
<<Logger::Notice
<<t_id
<<" ["<<MT
->getTid()<<"/"<<MT
->numProcesses()<<"] DROPPED question from "<<fromaddr
.toStringWithPort()<<", over capacity"<<endl
;
1793 g_stats
.overCapacityDrops
++;
1797 DNSComboWriter
* dc
= new DNSComboWriter(question
.c_str(), question
.size(), g_now
);
1801 dc
->d_query
= question
;
1802 dc
->setRemote(&fromaddr
);
1803 dc
->setLocal(destaddr
);
1805 dc
->d_policyTags
= policyTags
;
1807 dc
->d_ecsFound
= ecsFound
;
1808 dc
->d_ecsParsed
= ecsParsed
;
1809 dc
->d_ednssubnet
= ednssubnet
;
1810 dc
->d_ttlCap
= ttlCap
;
1811 dc
->d_variable
= variable
;
1812 #ifdef HAVE_PROTOBUF
1813 if (luaconfsLocal
->protobufServer
|| luaconfsLocal
->outgoingProtobufServer
) {
1814 dc
->d_uuid
= uniqueId
;
1816 dc
->d_requestorId
= requestorId
;
1817 dc
->d_deviceId
= deviceId
;
1820 MT
->makeThread(startDoResolve
, (void*) dc
); // deletes dc
1825 static void handleNewUDPQuestion(int fd
, FDMultiplexer::funcparam_t
& var
)
1829 ComboAddress fromaddr
;
1833 bool firstQuery
= true;
1835 fromaddr
.sin6
.sin6_family
=AF_INET6
; // this makes sure fromaddr is big enough
1836 fillMSGHdr(&msgh
, &iov
, cbuf
, sizeof(cbuf
), data
, sizeof(data
), &fromaddr
);
1839 if((len
=recvmsg(fd
, &msgh
, 0)) >= 0) {
1844 t_remotes
->push_back(fromaddr
);
1846 if(t_allowFrom
&& !t_allowFrom
->match(&fromaddr
)) {
1848 L
<<Logger::Error
<<"["<<MT
->getTid()<<"] dropping UDP query from "<<fromaddr
.toString()<<", address not matched by allow-from"<<endl
;
1850 g_stats
.unauthorizedUDP
++;
1853 BOOST_STATIC_ASSERT(offsetof(sockaddr_in
, sin_port
) == offsetof(sockaddr_in6
, sin6_port
));
1854 if(!fromaddr
.sin4
.sin_port
) { // also works for IPv6
1856 L
<<Logger::Error
<<"["<<MT
->getTid()<<"] dropping UDP query from "<<fromaddr
.toStringWithPort()<<", can't deal with port 0"<<endl
;
1858 g_stats
.clientParseError
++; // not quite the best place to put it, but needs to go somewhere
1862 dnsheader
* dh
=(dnsheader
*)data
;
1865 g_stats
.ignoredCount
++;
1866 if(g_logCommonErrors
)
1867 L
<<Logger::Error
<<"Ignoring answer from "<<fromaddr
.toString()<<" on server socket!"<<endl
;
1869 else if(dh
->opcode
) {
1870 g_stats
.ignoredCount
++;
1871 if(g_logCommonErrors
)
1872 L
<<Logger::Error
<<"Ignoring non-query opcode "<<dh
->opcode
<<" from "<<fromaddr
.toString()<<" on server socket!"<<endl
;
1875 string
question(data
, (size_t)len
);
1876 struct timeval tv
={0,0};
1877 HarvestTimestamp(&msgh
, &tv
);
1879 memset(&dest
, 0, sizeof(dest
)); // this makes sure we ignore this address if not returned by recvmsg above
1880 auto loc
= rplookup(g_listenSocketsAddresses
, fd
);
1881 if(HarvestDestinationAddress(&msgh
, &dest
)) {
1882 // but.. need to get port too
1884 dest
.sin4
.sin_port
= loc
->sin4
.sin_port
;
1891 dest
.sin4
.sin_family
= fromaddr
.sin4
.sin_family
;
1892 socklen_t slen
= dest
.getSocklen();
1893 getsockname(fd
, (sockaddr
*)&dest
, &slen
); // if this fails, we're ok with it
1896 if(g_weDistributeQueries
)
1897 distributeAsyncFunction(question
, boost::bind(doProcessUDPQuestion
, question
, fromaddr
, dest
, tv
, fd
));
1899 doProcessUDPQuestion(question
, fromaddr
, dest
, tv
, fd
);
1902 catch(MOADNSException
& mde
) {
1903 g_stats
.clientParseError
++;
1904 if(g_logCommonErrors
)
1905 L
<<Logger::Error
<<"Unable to parse packet from remote UDP client "<<fromaddr
.toString() <<": "<<mde
.what()<<endl
;
1907 catch(std::runtime_error
& e
) {
1908 g_stats
.clientParseError
++;
1909 if(g_logCommonErrors
)
1910 L
<<Logger::Error
<<"Unable to parse packet from remote UDP client "<<fromaddr
.toString() <<": "<<e
.what()<<endl
;
1914 // cerr<<t_id<<" had error: "<<stringerror()<<endl;
1915 if(firstQuery
&& errno
== EAGAIN
)
1916 g_stats
.noPacketError
++;
1922 static void makeTCPServerSockets(unsigned int threadId
)
1925 vector
<string
>locals
;
1926 stringtok(locals
,::arg()["local-address"]," ,");
1929 throw PDNSException("No local address specified");
1931 for(vector
<string
>::const_iterator i
=locals
.begin();i
!=locals
.end();++i
) {
1933 st
.port
=::arg().asNum("local-port");
1934 parseService(*i
, st
);
1938 memset((char *)&sin
,0, sizeof(sin
));
1939 sin
.sin4
.sin_family
= AF_INET
;
1940 if(!IpToU32(st
.host
, (uint32_t*)&sin
.sin4
.sin_addr
.s_addr
)) {
1941 sin
.sin6
.sin6_family
= AF_INET6
;
1942 if(makeIPv6sockaddr(st
.host
, &sin
.sin6
) < 0)
1943 throw PDNSException("Unable to resolve local address for TCP server on '"+ st
.host
+"'");
1946 fd
=socket(sin
.sin6
.sin6_family
, SOCK_STREAM
, 0);
1948 throw PDNSException("Making a TCP server socket for resolver: "+stringerror());
1953 if(setsockopt(fd
, SOL_SOCKET
, SO_REUSEADDR
, &tmp
, sizeof tmp
)<0) {
1954 L
<<Logger::Error
<<"Setsockopt failed for TCP listening socket"<<endl
;
1957 if(sin
.sin6
.sin6_family
== AF_INET6
&& setsockopt(fd
, IPPROTO_IPV6
, IPV6_V6ONLY
, &tmp
, sizeof(tmp
)) < 0) {
1958 L
<<Logger::Error
<<"Failed to set IPv6 socket to IPv6 only, continuing anyhow: "<<strerror(errno
)<<endl
;
1961 #ifdef TCP_DEFER_ACCEPT
1962 if(setsockopt(fd
, SOL_TCP
, TCP_DEFER_ACCEPT
, &tmp
, sizeof tmp
) >= 0) {
1963 if(i
==locals
.begin())
1964 L
<<Logger::Error
<<"Enabled TCP data-ready filter for (slight) DoS protection"<<endl
;
1968 if( ::arg().mustDo("non-local-bind") )
1969 Utility::setBindAny(AF_INET
, fd
);
1973 if(setsockopt(fd
, SOL_SOCKET
, SO_REUSEPORT
, &tmp
, sizeof(tmp
)) < 0)
1974 throw PDNSException("SO_REUSEPORT: "+stringerror());
1978 if (::arg().asNum("tcp-fast-open") > 0) {
1980 int fastOpenQueueSize
= ::arg().asNum("tcp-fast-open");
1981 if (setsockopt(fd
, IPPROTO_TCP
, TCP_FASTOPEN
, &fastOpenQueueSize
, sizeof fastOpenQueueSize
) < 0) {
1982 L
<<Logger::Error
<<"Failed to enable TCP Fast Open for listening socket: "<<strerror(errno
)<<endl
;
1985 L
<<Logger::Warning
<<"TCP Fast Open configured but not supported for listening socket"<<endl
;
1989 sin
.sin4
.sin_port
= htons(st
.port
);
1990 socklen_t socklen
=sin
.sin4
.sin_family
==AF_INET
? sizeof(sin
.sin4
) : sizeof(sin
.sin6
);
1991 if (::bind(fd
, (struct sockaddr
*)&sin
, socklen
)<0)
1992 throw PDNSException("Binding TCP server socket for "+ st
.host
+": "+stringerror());
1995 setSocketSendBuffer(fd
, 65000);
1997 deferredAdds
[threadId
].push_back(make_pair(fd
, handleNewTCPQuestion
));
1998 g_tcpListenSockets
.push_back(fd
);
1999 // we don't need to update g_listenSocketsAddresses since it doesn't work for TCP/IP:
2000 // - fd is not that which we know here, but returned from accept()
2001 if(sin
.sin4
.sin_family
== AF_INET
)
2002 L
<<Logger::Error
<<"Listening for TCP queries on "<< sin
.toString() <<":"<<st
.port
<<endl
;
2004 L
<<Logger::Error
<<"Listening for TCP queries on ["<< sin
.toString() <<"]:"<<st
.port
<<endl
;
2008 static void makeUDPServerSockets(unsigned int threadId
)
2011 vector
<string
>locals
;
2012 stringtok(locals
,::arg()["local-address"]," ,");
2015 throw PDNSException("No local address specified");
2017 for(vector
<string
>::const_iterator i
=locals
.begin();i
!=locals
.end();++i
) {
2019 st
.port
=::arg().asNum("local-port");
2020 parseService(*i
, st
);
2024 memset(&sin
, 0, sizeof(sin
));
2025 sin
.sin4
.sin_family
= AF_INET
;
2026 if(!IpToU32(st
.host
.c_str() , (uint32_t*)&sin
.sin4
.sin_addr
.s_addr
)) {
2027 sin
.sin6
.sin6_family
= AF_INET6
;
2028 if(makeIPv6sockaddr(st
.host
, &sin
.sin6
) < 0)
2029 throw PDNSException("Unable to resolve local address for UDP server on '"+ st
.host
+"'");
2032 int fd
=socket(sin
.sin4
.sin_family
, SOCK_DGRAM
, 0);
2034 throw PDNSException("Making a UDP server socket for resolver: "+netstringerror());
2036 if (!setSocketTimestamps(fd
))
2037 L
<<Logger::Warning
<<"Unable to enable timestamp reporting for socket"<<endl
;
2039 if(IsAnyAddress(sin
)) {
2040 if(sin
.sin4
.sin_family
== AF_INET
)
2041 if(!setsockopt(fd
, IPPROTO_IP
, GEN_IP_PKTINFO
, &one
, sizeof(one
))) // linux supports this, so why not - might fail on other systems
2042 g_fromtosockets
.insert(fd
);
2043 #ifdef IPV6_RECVPKTINFO
2044 if(sin
.sin4
.sin_family
== AF_INET6
)
2045 if(!setsockopt(fd
, IPPROTO_IPV6
, IPV6_RECVPKTINFO
, &one
, sizeof(one
)))
2046 g_fromtosockets
.insert(fd
);
2048 if(sin
.sin6
.sin6_family
== AF_INET6
&& setsockopt(fd
, IPPROTO_IPV6
, IPV6_V6ONLY
, &one
, sizeof(one
)) < 0) {
2049 L
<<Logger::Error
<<"Failed to set IPv6 socket to IPv6 only, continuing anyhow: "<<strerror(errno
)<<endl
;
2052 if( ::arg().mustDo("non-local-bind") )
2053 Utility::setBindAny(AF_INET6
, fd
);
2057 setSocketReceiveBuffer(fd
, 250000);
2058 sin
.sin4
.sin_port
= htons(st
.port
);
2063 if(setsockopt(fd
, SOL_SOCKET
, SO_REUSEPORT
, &one
, sizeof(one
)) < 0)
2064 throw PDNSException("SO_REUSEPORT: "+stringerror());
2067 socklen_t socklen
=sin
.getSocklen();
2068 if (::bind(fd
, (struct sockaddr
*)&sin
, socklen
)<0)
2069 throw PDNSException("Resolver binding to server socket on port "+ std::to_string(st
.port
) +" for "+ st
.host
+": "+stringerror());
2073 deferredAdds
[threadId
].push_back(make_pair(fd
, handleNewUDPQuestion
));
2074 g_listenSocketsAddresses
[fd
]=sin
; // this is written to only from the startup thread, not from the workers
2075 if(sin
.sin4
.sin_family
== AF_INET
)
2076 L
<<Logger::Error
<<"Listening for UDP queries on "<< sin
.toString() <<":"<<st
.port
<<endl
;
2078 L
<<Logger::Error
<<"Listening for UDP queries on ["<< sin
.toString() <<"]:"<<st
.port
<<endl
;
2082 static void daemonize(void)
2089 int i
=open("/dev/null",O_RDWR
); /* open stdin */
2091 L
<<Logger::Critical
<<"Unable to open /dev/null: "<<stringerror()<<endl
;
2093 dup2(i
,0); /* stdin */
2094 dup2(i
,1); /* stderr */
2095 dup2(i
,2); /* stderr */
2100 static void usr1Handler(int)
2105 static void usr2Handler(int)
2108 SyncRes::setDefaultLogMode(g_quiet
? SyncRes::LogNone
: SyncRes::Log
);
2109 ::arg().set("quiet")=g_quiet
? "" : "no";
2112 static void doStats(void)
2114 static time_t lastOutputTime
;
2115 static uint64_t lastQueryCount
;
2117 uint64_t cacheHits
= broadcastAccFunction
<uint64_t>(pleaseGetCacheHits
);
2118 uint64_t cacheMisses
= broadcastAccFunction
<uint64_t>(pleaseGetCacheMisses
);
2120 if(g_stats
.qcounter
&& (cacheHits
+ cacheMisses
) && SyncRes::s_queries
&& SyncRes::s_outqueries
) {
2121 L
<<Logger::Notice
<<"stats: "<<g_stats
.qcounter
<<" questions, "<<
2122 broadcastAccFunction
<uint64_t>(pleaseGetCacheSize
)<< " cache entries, "<<
2123 broadcastAccFunction
<uint64_t>(pleaseGetNegCacheSize
)<<" negative entries, "<<
2124 (int)((cacheHits
*100.0)/(cacheHits
+cacheMisses
))<<"% cache hits"<<endl
;
2126 L
<<Logger::Notice
<<"stats: throttle map: "
2127 << broadcastAccFunction
<uint64_t>(pleaseGetThrottleSize
) <<", ns speeds: "
2128 << broadcastAccFunction
<uint64_t>(pleaseGetNsSpeedsSize
)<<endl
;
2129 L
<<Logger::Notice
<<"stats: outpacket/query ratio "<<(int)(SyncRes::s_outqueries
*100.0/SyncRes::s_queries
)<<"%";
2130 L
<<Logger::Notice
<<", "<<(int)(SyncRes::s_throttledqueries
*100.0/(SyncRes::s_outqueries
+SyncRes::s_throttledqueries
))<<"% throttled, "
2131 <<SyncRes::s_nodelegated
<<" no-delegation drops"<<endl
;
2132 L
<<Logger::Notice
<<"stats: "<<SyncRes::s_tcpoutqueries
<<" outgoing tcp connections, "<<
2133 broadcastAccFunction
<uint64_t>(pleaseGetConcurrentQueries
)<<" queries running, "<<SyncRes::s_outgoingtimeouts
<<" outgoing timeouts"<<endl
;
2135 //L<<Logger::Notice<<"stats: "<<g_stats.ednsPingMatches<<" ping matches, "<<g_stats.ednsPingMismatches<<" mismatches, "<<
2136 //g_stats.noPingOutQueries<<" outqueries w/o ping, "<< g_stats.noEdnsOutQueries<<" w/o EDNS"<<endl;
2138 L
<<Logger::Notice
<<"stats: " << broadcastAccFunction
<uint64_t>(pleaseGetPacketCacheSize
) <<
2139 " packet cache entries, "<<(int)(100.0*broadcastAccFunction
<uint64_t>(pleaseGetPacketCacheHits
)/SyncRes::s_queries
) << "% packet cache hits"<<endl
;
2141 time_t now
= time(0);
2142 if(lastOutputTime
&& lastQueryCount
&& now
!= lastOutputTime
) {
2143 L
<<Logger::Notice
<<"stats: "<< (SyncRes::s_queries
- lastQueryCount
) / (now
- lastOutputTime
) <<" qps (average over "<< (now
- lastOutputTime
) << " seconds)"<<endl
;
2145 lastOutputTime
= now
;
2146 lastQueryCount
= SyncRes::s_queries
;
2148 else if(statsWanted
)
2149 L
<<Logger::Notice
<<"stats: no stats yet!"<<endl
;
2154 static void houseKeeping(void *)
2156 static thread_local
time_t last_rootupdate
, last_prune
, last_secpoll
;
2157 static thread_local
int cleanCounter
=0;
2158 static thread_local
bool s_running
; // houseKeeping can get suspended in secpoll, and be restarted, which makes us do duplicate work
2165 Utility::gettimeofday(&now
, 0);
2167 if(now
.tv_sec
- last_prune
> (time_t)(5 + t_id
)) {
2170 t_RC
->doPrune(g_maxCacheEntries
/ g_numThreads
); // this function is local to a thread, so fine anyhow
2171 t_packetCache
->doPruneTo(g_maxPacketCacheEntries
/ g_numWorkerThreads
);
2173 SyncRes::pruneNegCache(g_maxCacheEntries
/ (g_numWorkerThreads
* 10));
2175 if(!((cleanCounter
++)%40)) { // this is a full scan!
2176 time_t limit
=now
.tv_sec
-300;
2177 SyncRes::pruneNSSpeeds(limit
);
2182 if(now
.tv_sec
- last_rootupdate
> 7200) {
2183 int res
= SyncRes::getRootNS(g_now
, nullptr);
2185 last_rootupdate
=now
.tv_sec
;
2188 if(t_id
== s_distributorThreadID
) {
2190 if(now
.tv_sec
- last_secpoll
>= 3600) {
2192 doSecPoll(&last_secpoll
);
2194 catch(std::exception
& e
)
2196 L
<<Logger::Error
<<"Exception while performing security poll: "<<e
.what()<<endl
;
2198 catch(PDNSException
& e
)
2200 L
<<Logger::Error
<<"Exception while performing security poll: "<<e
.reason
<<endl
;
2202 catch(ImmediateServFailException
&e
)
2204 L
<<Logger::Error
<<"Exception while performing security poll: "<<e
.reason
<<endl
;
2208 L
<<Logger::Error
<<"Exception while performing security poll"<<endl
;
2215 catch(PDNSException
& ae
)
2218 L
<<Logger::Error
<<"Fatal error in housekeeping thread: "<<ae
.reason
<<endl
;
2223 static void makeThreadPipes()
2225 for(unsigned int n
=0; n
< g_numThreads
; ++n
) {
2226 struct ThreadPipeSet tps
;
2229 unixDie("Creating pipe for inter-thread communications");
2231 tps
.readToThread
= fd
[0];
2232 tps
.writeToThread
= fd
[1];
2235 unixDie("Creating pipe for inter-thread communications");
2236 tps
.readFromThread
= fd
[0];
2237 tps
.writeFromThread
= fd
[1];
2240 unixDie("Creating pipe for inter-thread communications");
2241 tps
.readQueriesToThread
= fd
[0];
2242 tps
.writeQueriesToThread
= fd
[1];
2244 if (!setNonBlocking(tps
.writeQueriesToThread
)) {
2245 unixDie("Making pipe for inter-thread communications non-blocking");
2248 g_pipes
.push_back(tps
);
2258 void broadcastFunction(const pipefunc_t
& func
)
2260 /* This function might be called by the worker with t_id 0 during startup
2261 for the initialization of ACLs and domain maps */
2262 if (t_id
!= s_handlerThreadID
&& t_id
!= s_distributorThreadID
) {
2263 L
<<Logger::Error
<<"broadcastFunction() has been called by a worker ("<<t_id
<<")"<<endl
;
2267 if (t_id
== s_handlerThreadID
) {
2268 /* the distributor will call itself below, but if we are the handler thread,
2269 call the function ourselves to update the ACL or domain maps for example */
2274 for(ThreadPipeSet
& tps
: g_pipes
)
2277 func(); // don't write to ourselves!
2281 ThreadMSG
* tmsg
= new ThreadMSG();
2283 tmsg
->wantAnswer
= true;
2284 if(write(tps
.writeToThread
, &tmsg
, sizeof(tmsg
)) != sizeof(tmsg
)) {
2286 unixDie("write to thread pipe returned wrong size or error");
2289 string
* resp
= nullptr;
2290 if(read(tps
.readFromThread
, &resp
, sizeof(resp
)) != sizeof(resp
))
2291 unixDie("read from thread pipe returned wrong size or error");
2300 // This function is only called by the distributor thread, when pdns-distributes-queries is set
2301 void distributeAsyncFunction(const string
& packet
, const pipefunc_t
& func
)
2303 if (t_id
!= s_distributorThreadID
) {
2304 L
<<Logger::Error
<<"distributeAsyncFunction() has been called by a worker ("<<t_id
<<")"<<endl
;
2308 unsigned int hash
= hashQuestion(packet
.c_str(), packet
.length(), g_disthashseed
);
2309 unsigned int target
= 1 + (hash
% (g_pipes
.size()-1));
2311 if(target
== static_cast<unsigned int>(s_distributorThreadID
)) {
2312 L
<<Logger::Error
<<"distributeAsyncFunction() tried to assign a query to the distributor"<<endl
;
2316 ThreadPipeSet
& tps
= g_pipes
[target
];
2317 ThreadMSG
* tmsg
= new ThreadMSG();
2319 tmsg
->wantAnswer
= false;
2321 ssize_t written
= write(tps
.writeQueriesToThread
, &tmsg
, sizeof(tmsg
));
2323 if (static_cast<size_t>(written
) != sizeof(tmsg
)) {
2325 unixDie("write to thread pipe returned wrong size or error");
2331 if (error
== EAGAIN
|| error
== EWOULDBLOCK
) {
2332 g_stats
.queryPipeFullDrops
++;
2334 unixDie("write to thread pipe returned wrong size or error:" + std::to_string(error
));
2339 static void handlePipeRequest(int fd
, FDMultiplexer::funcparam_t
& var
)
2341 ThreadMSG
* tmsg
= nullptr;
2343 if(read(fd
, &tmsg
, sizeof(tmsg
)) != sizeof(tmsg
)) { // fd == readToThread || fd == readQueriesToThread
2344 unixDie("read from thread pipe returned wrong size or error");
2349 resp
= tmsg
->func();
2351 catch(std::exception
& e
) {
2352 if(g_logCommonErrors
)
2353 L
<<Logger::Error
<<"PIPE function we executed created exception: "<<e
.what()<<endl
; // but what if they wanted an answer.. we send 0
2355 catch(PDNSException
& e
) {
2356 if(g_logCommonErrors
)
2357 L
<<Logger::Error
<<"PIPE function we executed created PDNS exception: "<<e
.reason
<<endl
; // but what if they wanted an answer.. we send 0
2359 if(tmsg
->wantAnswer
) {
2360 if(write(g_pipes
[t_id
].writeFromThread
, &resp
, sizeof(resp
)) != sizeof(resp
)) {
2362 unixDie("write to thread pipe returned wrong size or error");
2369 template<class T
> void *voider(const boost::function
<T
*()>& func
)
2374 vector
<ComboAddress
>& operator+=(vector
<ComboAddress
>&a
, const vector
<ComboAddress
>& b
)
2376 a
.insert(a
.end(), b
.begin(), b
.end());
2380 vector
<pair
<string
, uint16_t> >& operator+=(vector
<pair
<string
, uint16_t> >&a
, const vector
<pair
<string
, uint16_t> >& b
)
2382 a
.insert(a
.end(), b
.begin(), b
.end());
2386 vector
<pair
<DNSName
, uint16_t> >& operator+=(vector
<pair
<DNSName
, uint16_t> >&a
, const vector
<pair
<DNSName
, uint16_t> >& b
)
2388 a
.insert(a
.end(), b
.begin(), b
.end());
2394 This function should only be called by the handler to gather metrics, wipe the cache,
2395 reload the Lua script (not the Lua config) or change the current trace regex */
2396 template<class T
> T
broadcastAccFunction(const boost::function
<T
*()>& func
)
2398 if (t_id
!= s_handlerThreadID
) {
2399 L
<<Logger::Error
<<"broadcastFunction has been called by a worker ("<<t_id
<<")"<<endl
;
2404 for(ThreadPipeSet
& tps
: g_pipes
)
2406 ThreadMSG
* tmsg
= new ThreadMSG();
2407 tmsg
->func
= boost::bind(voider
<T
>, func
);
2408 tmsg
->wantAnswer
= true;
2410 if(write(tps
.writeToThread
, &tmsg
, sizeof(tmsg
)) != sizeof(tmsg
)) {
2412 unixDie("write to thread pipe returned wrong size or error");
2416 if(read(tps
.readFromThread
, &resp
, sizeof(resp
)) != sizeof(resp
))
2417 unixDie("read from thread pipe returned wrong size or error");
2428 template string
broadcastAccFunction(const boost::function
<string
*()>& fun
); // explicit instantiation
2429 template uint64_t broadcastAccFunction(const boost::function
<uint64_t*()>& fun
); // explicit instantiation
2430 template vector
<ComboAddress
> broadcastAccFunction(const boost::function
<vector
<ComboAddress
> *()>& fun
); // explicit instantiation
2431 template vector
<pair
<DNSName
,uint16_t> > broadcastAccFunction(const boost::function
<vector
<pair
<DNSName
, uint16_t> > *()>& fun
); // explicit instantiation
2433 static void handleRCC(int fd
, FDMultiplexer::funcparam_t
& var
)
2436 string msg
=s_rcc
.recv(&remote
);
2437 RecursorControlParser rcp
;
2438 RecursorControlParser::func_t
* command
;
2440 string answer
=rcp
.getAnswer(msg
, &command
);
2442 // If we are inside a chroot, we need to strip
2443 if (!arg()["chroot"].empty()) {
2444 size_t len
= arg()["chroot"].length();
2445 remote
= remote
.substr(len
);
2449 s_rcc
.send(answer
, &remote
);
2452 catch(std::exception
& e
) {
2453 L
<<Logger::Error
<<"Error dealing with control socket request: "<<e
.what()<<endl
;
2455 catch(PDNSException
& ae
) {
2456 L
<<Logger::Error
<<"Error dealing with control socket request: "<<ae
.reason
<<endl
;
2460 static void handleTCPClientReadable(int fd
, FDMultiplexer::funcparam_t
& var
)
2462 PacketID
* pident
=any_cast
<PacketID
>(&var
);
2463 // cerr<<"handleTCPClientReadable called for fd "<<fd<<", pident->inNeeded: "<<pident->inNeeded<<", "<<pident->sock->getHandle()<<endl;
2465 shared_array
<char> buffer(new char[pident
->inNeeded
]);
2467 ssize_t ret
=recv(fd
, buffer
.get(), pident
->inNeeded
,0);
2469 pident
->inMSG
.append(&buffer
[0], &buffer
[ret
]);
2470 pident
->inNeeded
-=(size_t)ret
;
2471 if(!pident
->inNeeded
|| pident
->inIncompleteOkay
) {
2472 // cerr<<"Got entire load of "<<pident->inMSG.size()<<" bytes"<<endl;
2473 PacketID pid
=*pident
;
2474 string msg
=pident
->inMSG
;
2476 t_fdm
->removeReadFD(fd
);
2477 MT
->sendEvent(pid
, &msg
);
2480 // cerr<<"Still have "<<pident->inNeeded<<" left to go"<<endl;
2484 PacketID tmp
=*pident
;
2485 t_fdm
->removeReadFD(fd
); // pident might now be invalid (it isn't, but still)
2487 MT
->sendEvent(tmp
, &empty
); // this conveys error status
2491 static void handleTCPClientWritable(int fd
, FDMultiplexer::funcparam_t
& var
)
2493 PacketID
* pid
=any_cast
<PacketID
>(&var
);
2494 ssize_t ret
=send(fd
, pid
->outMSG
.c_str() + pid
->outPos
, pid
->outMSG
.size() - pid
->outPos
,0);
2496 pid
->outPos
+=(ssize_t
)ret
;
2497 if(pid
->outPos
==pid
->outMSG
.size()) {
2499 t_fdm
->removeWriteFD(fd
);
2500 MT
->sendEvent(tmp
, &tmp
.outMSG
); // send back what we sent to convey everything is ok
2503 else { // error or EOF
2505 t_fdm
->removeWriteFD(fd
);
2507 MT
->sendEvent(tmp
, &sent
); // we convey error status by sending empty string
2511 // resend event to everybody chained onto it
2512 static void doResends(MT_t::waiters_t::iterator
& iter
, PacketID resend
, const string
& content
)
2514 if(iter
->key
.chain
.empty())
2516 // cerr<<"doResends called!\n";
2517 for(PacketID::chain_t::iterator i
=iter
->key
.chain
.begin(); i
!= iter
->key
.chain
.end() ; ++i
) {
2520 // cerr<<"\tResending "<<content.size()<<" bytes for fd="<<resend.fd<<" and id="<<resend.id<<endl;
2522 MT
->sendEvent(resend
, &content
);
2523 g_stats
.chainResends
++;
2527 static void handleUDPServerResponse(int fd
, FDMultiplexer::funcparam_t
& var
)
2529 PacketID pid
=any_cast
<PacketID
>(var
);
2531 char data
[g_outgoingEDNSBufsize
];
2532 ComboAddress fromaddr
;
2533 socklen_t addrlen
=sizeof(fromaddr
);
2535 len
=recvfrom(fd
, data
, sizeof(data
), 0, (sockaddr
*)&fromaddr
, &addrlen
);
2537 if(len
< (ssize_t
) sizeof(dnsheader
)) {
2539 ; // cerr<<"Error on fd "<<fd<<": "<<stringerror()<<"\n";
2541 g_stats
.serverParseError
++;
2542 if(g_logCommonErrors
)
2543 L
<<Logger::Error
<<"Unable to parse packet from remote UDP server "<< fromaddr
.toString() <<
2544 ": packet smaller than DNS header"<<endl
;
2547 t_udpclientsocks
->returnSocket(fd
);
2550 MT_t::waiters_t::iterator iter
=MT
->d_waiters
.find(pid
);
2551 if(iter
!= MT
->d_waiters
.end())
2552 doResends(iter
, pid
, empty
);
2554 MT
->sendEvent(pid
, &empty
); // this denotes error (does lookup again.. at least L1 will be hot)
2559 memcpy(&dh
, data
, sizeof(dh
));
2562 pident
.remote
=fromaddr
;
2566 if(!dh
.qr
&& g_logCommonErrors
) {
2567 L
<<Logger::Notice
<<"Not taking data from question on outgoing socket from "<< fromaddr
.toStringWithPort() <<endl
;
2570 if(!dh
.qdcount
|| // UPC, Nominum, very old BIND on FormErr, NSD
2571 !dh
.qr
) { // one weird server
2572 pident
.domain
.clear();
2578 pident
.domain
=DNSName(data
, len
, 12, false, &pident
.type
); // don't copy this from above - we need to do the actual read
2580 catch(std::exception
& e
) {
2581 g_stats
.serverParseError
++; // won't be fed to lwres.cc, so we have to increment
2582 L
<<Logger::Warning
<<"Error in packet from remote nameserver "<< fromaddr
.toStringWithPort() << ": "<<e
.what() << endl
;
2587 packet
.assign(data
, len
);
2589 MT_t::waiters_t::iterator iter
=MT
->d_waiters
.find(pident
);
2590 if(iter
!= MT
->d_waiters
.end()) {
2591 doResends(iter
, pident
, packet
);
2596 if(!MT
->sendEvent(pident
, &packet
)) {
2597 // we do a full scan for outstanding queries on unexpected answers. not too bad since we only accept them on the right port number, which is hard enough to guess
2598 for(MT_t::waiters_t::iterator mthread
=MT
->d_waiters
.begin(); mthread
!=MT
->d_waiters
.end(); ++mthread
) {
2599 if(pident
.fd
==mthread
->key
.fd
&& mthread
->key
.remote
==pident
.remote
&& mthread
->key
.type
== pident
.type
&&
2600 pident
.domain
== mthread
->key
.domain
) {
2601 mthread
->key
.nearMisses
++;
2604 // be a bit paranoid here since we're weakening our matching
2605 if(pident
.domain
.empty() && !mthread
->key
.domain
.empty() && !pident
.type
&& mthread
->key
.type
&&
2606 pident
.id
== mthread
->key
.id
&& mthread
->key
.remote
== pident
.remote
) {
2607 // cerr<<"Empty response, rest matches though, sending to a waiter"<<endl;
2608 pident
.domain
= mthread
->key
.domain
;
2609 pident
.type
= mthread
->key
.type
;
2610 goto retryWithName
; // note that this only passes on an error, lwres will still reject the packet
2613 g_stats
.unexpectedCount
++; // if we made it here, it really is an unexpected answer
2614 if(g_logCommonErrors
) {
2615 L
<<Logger::Warning
<<"Discarding unexpected packet from "<<fromaddr
.toStringWithPort()<<": "<< (pident
.domain
.empty() ? "<empty>" : pident
.domain
.toString())<<", "<<pident
.type
<<", "<<MT
->d_waiters
.size()<<" waiters"<<endl
;
2619 t_udpclientsocks
->returnSocket(fd
);
2623 FDMultiplexer
* getMultiplexer()
2626 for(const auto& i
: FDMultiplexer::getMultiplexerMap()) {
2631 catch(FDMultiplexerException
&fe
) {
2632 L
<<Logger::Error
<<"Non-fatal error initializing possible multiplexer ("<<fe
.what()<<"), falling back"<<endl
;
2635 L
<<Logger::Error
<<"Non-fatal error initializing possible multiplexer"<<endl
;
2638 L
<<Logger::Error
<<"No working multiplexer found!"<<endl
;
2643 static string
* doReloadLuaScript()
2645 string fname
= ::arg()["lua-dns-script"];
2649 L
<<Logger::Error
<<t_id
<<" Unloaded current lua script"<<endl
;
2650 return new string("unloaded\n");
2653 t_pdl
= std::make_shared
<RecursorLua4
>(fname
);
2656 catch(std::exception
& e
) {
2657 L
<<Logger::Error
<<t_id
<<" Retaining current script, error from '"<<fname
<<"': "<< e
.what() <<endl
;
2658 return new string("retaining current script, error from '"+fname
+"': "+e
.what()+"\n");
2661 L
<<Logger::Warning
<<t_id
<<" (Re)loaded lua script from '"<<fname
<<"'"<<endl
;
2662 return new string("(re)loaded '"+fname
+"'\n");
2665 string
doQueueReloadLuaScript(vector
<string
>::const_iterator begin
, vector
<string
>::const_iterator end
)
2668 ::arg().set("lua-dns-script") = *begin
;
2670 return broadcastAccFunction
<string
>(doReloadLuaScript
);
2673 static string
* pleaseUseNewTraceRegex(const std::string
& newRegex
)
2676 if(newRegex
.empty()) {
2677 t_traceRegex
.reset();
2678 return new string("unset\n");
2681 t_traceRegex
= std::make_shared
<Regex
>(newRegex
);
2682 return new string("ok\n");
2685 catch(PDNSException
& ae
)
2687 return new string(ae
.reason
+"\n");
2690 string
doTraceRegex(vector
<string
>::const_iterator begin
, vector
<string
>::const_iterator end
)
2692 return broadcastAccFunction
<string
>(boost::bind(pleaseUseNewTraceRegex
, begin
!=end
? *begin
: ""));
2695 static void checkLinuxIPv6Limits()
2699 if(readFileIfThere("/proc/sys/net/ipv6/route/max_size", &line
)) {
2700 int lim
=std::stoi(line
);
2702 L
<<Logger::Error
<<"If using IPv6, please raise sysctl net.ipv6.route.max_size, currently set to "<<lim
<<" which is < 16384"<<endl
;
2707 static void checkOrFixFDS()
2709 unsigned int availFDs
=getFilenumLimit();
2710 unsigned int wantFDs
= g_maxMThreads
* g_numWorkerThreads
+25; // even healthier margin then before
2712 if(wantFDs
> availFDs
) {
2713 unsigned int hardlimit
= getFilenumLimit(true);
2714 if(hardlimit
>= wantFDs
) {
2715 setFilenumLimit(wantFDs
);
2716 L
<<Logger::Warning
<<"Raised soft limit on number of filedescriptors to "<<wantFDs
<<" to match max-mthreads and threads settings"<<endl
;
2719 int newval
= (hardlimit
- 25) / g_numWorkerThreads
;
2720 L
<<Logger::Warning
<<"Insufficient number of filedescriptors available for max-mthreads*threads setting! ("<<hardlimit
<<" < "<<wantFDs
<<"), reducing max-mthreads to "<<newval
<<endl
;
2721 g_maxMThreads
= newval
;
2722 setFilenumLimit(hardlimit
);
2727 static void* recursorThread(int tid
, bool worker
);
2729 static void* pleaseSupplantACLs(std::shared_ptr
<NetmaskGroup
> ng
)
2740 static bool l_initialized
;
2742 if(l_initialized
) { // only reload configuration file on second call
2743 string configname
=::arg()["config-dir"]+"/recursor.conf";
2744 if(::arg()["config-name"]!="") {
2745 configname
=::arg()["config-dir"]+"/recursor-"+::arg()["config-name"]+".conf";
2747 cleanSlashes(configname
);
2749 if(!::arg().preParseFile(configname
.c_str(), "allow-from-file"))
2750 throw runtime_error("Unable to re-parse configuration file '"+configname
+"'");
2751 ::arg().preParseFile(configname
.c_str(), "allow-from", LOCAL_NETS
);
2752 ::arg().preParseFile(configname
.c_str(), "include-dir");
2753 ::arg().preParse(g_argc
, g_argv
, "include-dir");
2755 // then process includes
2756 std::vector
<std::string
> extraConfigs
;
2757 ::arg().gatherIncludes(extraConfigs
);
2759 for(const std::string
& fn
: extraConfigs
) {
2760 if(!::arg().preParseFile(fn
.c_str(), "allow-from-file", ::arg()["allow-from-file"]))
2761 throw runtime_error("Unable to re-parse configuration file include '"+fn
+"'");
2762 if(!::arg().preParseFile(fn
.c_str(), "allow-from", ::arg()["allow-from"]))
2763 throw runtime_error("Unable to re-parse configuration file include '"+fn
+"'");
2766 ::arg().preParse(g_argc
, g_argv
, "allow-from-file");
2767 ::arg().preParse(g_argc
, g_argv
, "allow-from");
2770 std::shared_ptr
<NetmaskGroup
> oldAllowFrom
= t_allowFrom
;
2771 std::shared_ptr
<NetmaskGroup
> allowFrom
= std::make_shared
<NetmaskGroup
>();
2773 if(!::arg()["allow-from-file"].empty()) {
2775 ifstream
ifs(::arg()["allow-from-file"].c_str());
2777 throw runtime_error("Could not open '"+::arg()["allow-from-file"]+"': "+stringerror());
2780 string::size_type pos
;
2781 while(getline(ifs
,line
)) {
2783 if(pos
!=string::npos
)
2789 allowFrom
->addMask(line
);
2791 L
<<Logger::Warning
<<"Done parsing " << allowFrom
->size() <<" allow-from ranges from file '"<<::arg()["allow-from-file"]<<"' - overriding 'allow-from' setting"<<endl
;
2793 else if(!::arg()["allow-from"].empty()) {
2795 stringtok(ips
, ::arg()["allow-from"], ", ");
2797 L
<<Logger::Warning
<<"Only allowing queries from: ";
2798 for(vector
<string
>::const_iterator i
= ips
.begin(); i
!= ips
.end(); ++i
) {
2799 allowFrom
->addMask(*i
);
2801 L
<<Logger::Warning
<<", ";
2802 L
<<Logger::Warning
<<*i
;
2804 L
<<Logger::Warning
<<endl
;
2807 if(::arg()["local-address"]!="127.0.0.1" && ::arg().asNum("local-port")==53)
2808 L
<<Logger::Error
<<"WARNING: Allowing queries from all IP addresses - this can be a security risk!"<<endl
;
2809 allowFrom
= nullptr;
2812 g_initialAllowFrom
= allowFrom
;
2813 broadcastFunction(boost::bind(pleaseSupplantACLs
, allowFrom
));
2814 oldAllowFrom
= nullptr;
2816 l_initialized
= true;
2820 static void setupDelegationOnly()
2822 vector
<string
> parts
;
2823 stringtok(parts
, ::arg()["delegation-only"], ", \t");
2824 for(const auto& p
: parts
) {
2825 SyncRes::addDelegationOnly(DNSName(p
));
2829 static std::map
<unsigned int, std::set
<int> > parseCPUMap()
2831 std::map
<unsigned int, std::set
<int> > result
;
2833 const std::string value
= ::arg()["cpu-map"];
2835 if (!value
.empty() && !isSettingThreadCPUAffinitySupported()) {
2836 L
<<Logger::Warning
<<"CPU mapping requested but not supported, skipping"<<endl
;
2840 std::vector
<std::string
> parts
;
2842 stringtok(parts
, value
, " \t");
2844 for(const auto& part
: parts
) {
2845 if (part
.find('=') == string::npos
)
2849 auto headers
= splitField(part
, '=');
2850 trim(headers
.first
);
2851 trim(headers
.second
);
2853 unsigned int threadId
= pdns_stou(headers
.first
);
2854 std::vector
<std::string
> cpus
;
2856 stringtok(cpus
, headers
.second
, ",");
2858 for(const auto& cpu
: cpus
) {
2859 int cpuId
= std::stoi(cpu
);
2861 result
[threadId
].insert(cpuId
);
2864 catch(const std::exception
& e
) {
2865 L
<<Logger::Error
<<"Error parsing cpu-map entry '"<<part
<<"': "<<e
.what()<<endl
;
2872 static void setCPUMap(const std::map
<unsigned int, std::set
<int> >& cpusMap
, unsigned int n
, pthread_t tid
)
2874 const auto& cpuMapping
= cpusMap
.find(n
);
2875 if (cpuMapping
!= cpusMap
.cend()) {
2876 int rc
= mapThreadToCPUList(tid
, cpuMapping
->second
);
2878 L
<<Logger::Info
<<"CPU affinity for worker "<<n
<<" has been set to CPU map:";
2879 for (const auto cpu
: cpuMapping
->second
) {
2880 L
<<Logger::Info
<<" "<<cpu
;
2882 L
<<Logger::Info
<<endl
;
2885 L
<<Logger::Warning
<<"Error setting CPU affinity for worker "<<n
<<" to CPU map:";
2886 for (const auto cpu
: cpuMapping
->second
) {
2887 L
<<Logger::Info
<<" "<<cpu
;
2889 L
<<Logger::Info
<<strerror(rc
)<<endl
;
2894 static int serviceMain(int argc
, char*argv
[])
2896 L
.setName(s_programname
);
2897 L
.disableSyslog(::arg().mustDo("disable-syslog"));
2898 L
.setTimestamps(::arg().mustDo("log-timestamp"));
2900 if(!::arg()["logging-facility"].empty()) {
2901 int val
=logFacilityToLOG(::arg().asNum("logging-facility") );
2903 theL().setFacility(val
);
2905 L
<<Logger::Error
<<"Unknown logging facility "<<::arg().asNum("logging-facility") <<endl
;
2908 showProductVersion();
2909 seedRandom(::arg()["entropy-source"]);
2911 g_disthashseed
=dns_random(0xffffffff);
2913 checkLinuxIPv6Limits();
2915 vector
<string
> addrs
;
2916 if(!::arg()["query-local-address6"].empty()) {
2917 SyncRes::s_doIPv6
=true;
2918 L
<<Logger::Warning
<<"Enabling IPv6 transport for outgoing queries"<<endl
;
2920 stringtok(addrs
, ::arg()["query-local-address6"], ", ;");
2921 for(const string
& addr
: addrs
) {
2922 g_localQueryAddresses6
.push_back(ComboAddress(addr
));
2926 L
<<Logger::Warning
<<"NOT using IPv6 for outgoing queries - set 'query-local-address6=::' to enable"<<endl
;
2929 stringtok(addrs
, ::arg()["query-local-address"], ", ;");
2930 for(const string
& addr
: addrs
) {
2931 g_localQueryAddresses4
.push_back(ComboAddress(addr
));
2934 catch(std::exception
& e
) {
2935 L
<<Logger::Error
<<"Assigning local query addresses: "<<e
.what();
2939 // keep this ABOVE loadRecursorLuaConfig!
2940 if(::arg()["dnssec"]=="off")
2941 g_dnssecmode
=DNSSECMode::Off
;
2942 else if(::arg()["dnssec"]=="process-no-validate")
2943 g_dnssecmode
=DNSSECMode::ProcessNoValidate
;
2944 else if(::arg()["dnssec"]=="process")
2945 g_dnssecmode
=DNSSECMode::Process
;
2946 else if(::arg()["dnssec"]=="validate")
2947 g_dnssecmode
=DNSSECMode::ValidateAll
;
2948 else if(::arg()["dnssec"]=="log-fail")
2949 g_dnssecmode
=DNSSECMode::ValidateForLog
;
2951 L
<<Logger::Error
<<"Unknown DNSSEC mode "<<::arg()["dnssec"]<<endl
;
2955 g_dnssecLogBogus
= ::arg().mustDo("dnssec-log-bogus");
2956 g_maxNSEC3Iterations
= ::arg().asNum("nsec3-max-iterations");
2958 g_maxCacheEntries
= ::arg().asNum("max-cache-entries");
2959 g_maxPacketCacheEntries
= ::arg().asNum("max-packetcache-entries");
2962 loadRecursorLuaConfig(::arg()["lua-config-file"], ::arg().mustDo("daemon"));
2964 catch (PDNSException
&e
) {
2965 L
<<Logger::Error
<<"Cannot load Lua configuration: "<<e
.reason
<<endl
;
2970 sortPublicSuffixList();
2972 if(!::arg()["dont-query"].empty()) {
2974 stringtok(ips
, ::arg()["dont-query"], ", ");
2975 ips
.push_back("0.0.0.0");
2976 ips
.push_back("::");
2978 L
<<Logger::Warning
<<"Will not send queries to: ";
2979 for(vector
<string
>::const_iterator i
= ips
.begin(); i
!= ips
.end(); ++i
) {
2980 SyncRes::addDontQuery(*i
);
2982 L
<<Logger::Warning
<<", ";
2983 L
<<Logger::Warning
<<*i
;
2985 L
<<Logger::Warning
<<endl
;
2988 g_quiet
=::arg().mustDo("quiet");
2990 g_weDistributeQueries
= ::arg().mustDo("pdns-distributes-queries");
2991 if(g_weDistributeQueries
) {
2992 L
<<Logger::Warning
<<"PowerDNS Recursor itself will distribute queries over threads"<<endl
;
2995 setupDelegationOnly();
2996 g_outgoingEDNSBufsize
=::arg().asNum("edns-outgoing-bufsize");
2998 if(::arg()["trace"]=="fail") {
2999 SyncRes::setDefaultLogMode(SyncRes::Store
);
3001 else if(::arg().mustDo("trace")) {
3002 SyncRes::setDefaultLogMode(SyncRes::Log
);
3003 ::arg().set("quiet")="no";
3008 SyncRes::s_minimumTTL
= ::arg().asNum("minimum-ttl-override");
3010 SyncRes::s_nopacketcache
= ::arg().mustDo("disable-packetcache");
3012 SyncRes::s_maxnegttl
=::arg().asNum("max-negative-ttl");
3013 SyncRes::s_maxcachettl
=max(::arg().asNum("max-cache-ttl"), 15);
3014 SyncRes::s_packetcachettl
=::arg().asNum("packetcache-ttl");
3015 // Cap the packetcache-servfail-ttl to the packetcache-ttl
3016 uint32_t packetCacheServFailTTL
= ::arg().asNum("packetcache-servfail-ttl");
3017 SyncRes::s_packetcacheservfailttl
=(packetCacheServFailTTL
> SyncRes::s_packetcachettl
) ? SyncRes::s_packetcachettl
: packetCacheServFailTTL
;
3018 SyncRes::s_serverdownmaxfails
=::arg().asNum("server-down-max-fails");
3019 SyncRes::s_serverdownthrottletime
=::arg().asNum("server-down-throttle-time");
3020 SyncRes::s_serverID
=::arg()["server-id"];
3021 SyncRes::s_maxqperq
=::arg().asNum("max-qperq");
3022 SyncRes::s_maxtotusec
=1000*::arg().asNum("max-total-msec");
3023 SyncRes::s_maxdepth
=::arg().asNum("max-recursion-depth");
3024 SyncRes::s_rootNXTrust
= ::arg().mustDo( "root-nx-trust");
3025 if(SyncRes::s_serverID
.empty()) {
3027 gethostname(tmp
, sizeof(tmp
)-1);
3028 SyncRes::s_serverID
=tmp
;
3031 SyncRes::s_ecsipv4limit
= ::arg().asNum("ecs-ipv4-bits");
3032 SyncRes::s_ecsipv6limit
= ::arg().asNum("ecs-ipv6-bits");
3034 if (!::arg().isEmpty("ecs-scope-zero-address")) {
3035 ComboAddress
scopeZero(::arg()["ecs-scope-zero-address"]);
3036 SyncRes::setECSScopeZeroAddress(Netmask(scopeZero
, scopeZero
.isIPv4() ? 32 : 128));
3040 for (const auto& addr
: g_localQueryAddresses4
) {
3041 if (!IsAnyAddress(addr
)) {
3042 SyncRes::setECSScopeZeroAddress(Netmask(addr
, 32));
3048 for (const auto& addr
: g_localQueryAddresses6
) {
3049 if (!IsAnyAddress(addr
)) {
3050 SyncRes::setECSScopeZeroAddress(Netmask(addr
, 128));
3056 SyncRes::setECSScopeZeroAddress(Netmask("127.0.0.1/32"));
3061 g_networkTimeoutMsec
= ::arg().asNum("network-timeout");
3063 g_initialDomainMap
= parseAuthAndForwards();
3065 g_latencyStatSize
=::arg().asNum("latency-statistic-size");
3067 g_logCommonErrors
=::arg().mustDo("log-common-errors");
3068 g_logRPZChanges
= ::arg().mustDo("log-rpz-changes");
3070 g_anyToTcp
= ::arg().mustDo("any-to-tcp");
3071 g_udpTruncationThreshold
= ::arg().asNum("udp-truncation-threshold");
3073 g_lowercaseOutgoing
= ::arg().mustDo("lowercase-outgoing");
3075 g_numWorkerThreads
= ::arg().asNum("threads");
3076 if (g_numWorkerThreads
< 1) {
3077 L
<<Logger::Warning
<<"Asked to run with 0 threads, raising to 1 instead"<<endl
;
3078 g_numWorkerThreads
= 1;
3081 g_numThreads
= g_numWorkerThreads
+ g_weDistributeQueries
;
3082 g_maxMThreads
= ::arg().asNum("max-mthreads");
3084 g_gettagNeedsEDNSOptions
= ::arg().mustDo("gettag-needs-edns-options");
3086 g_statisticsInterval
= ::arg().asNum("statistics-interval");
3089 g_reusePort
= ::arg().mustDo("reuseport");
3092 g_useOneSocketPerThread
= (!g_weDistributeQueries
&& g_reusePort
);
3094 if (g_useOneSocketPerThread
) {
3095 for (unsigned int threadId
= 0; threadId
< g_numWorkerThreads
; threadId
++) {
3096 makeUDPServerSockets(threadId
);
3097 makeTCPServerSockets(threadId
);
3101 makeUDPServerSockets(0);
3102 makeTCPServerSockets(0);
3105 SyncRes::parseEDNSSubnetWhitelist(::arg()["edns-subnet-whitelist"]);
3106 g_useIncomingECS
= ::arg().mustDo("use-incoming-edns-subnet");
3109 for(forks
= 0; forks
< ::arg().asNum("processes") - 1; ++forks
) {
3110 if(!fork()) // we are child
3114 if(::arg().mustDo("daemon")) {
3115 L
<<Logger::Warning
<<"Calling daemonize, going to background"<<endl
;
3116 L
.toConsole(Logger::Critical
);
3118 loadRecursorLuaConfig(::arg()["lua-config-file"], false);
3120 signal(SIGUSR1
,usr1Handler
);
3121 signal(SIGUSR2
,usr2Handler
);
3122 signal(SIGPIPE
,SIG_IGN
);
3126 #ifdef HAVE_LIBSODIUM
3127 if (sodium_init() == -1) {
3128 L
<<Logger::Error
<<"Unable to initialize sodium crypto library"<<endl
;
3133 openssl_thread_setup();
3137 if(!::arg()["setgid"].empty())
3138 newgid
=Utility::makeGidNumeric(::arg()["setgid"]);
3140 if(!::arg()["setuid"].empty())
3141 newuid
=Utility::makeUidNumeric(::arg()["setuid"]);
3143 Utility::dropGroupPrivs(newuid
, newgid
);
3145 if (!::arg()["chroot"].empty()) {
3148 ns
= getenv("NOTIFY_SOCKET");
3149 if (ns
!= nullptr) {
3150 L
<<Logger::Error
<<"Unable to chroot when running from systemd. Please disable chroot= or set the 'Type' for this service to 'simple'"<<endl
;
3154 if (chroot(::arg()["chroot"].c_str())<0 || chdir("/") < 0) {
3155 L
<<Logger::Error
<<"Unable to chroot to '"+::arg()["chroot"]+"': "<<strerror (errno
)<<", exiting"<<endl
;
3159 L
<<Logger::Error
<<"Chrooted to '"<<::arg()["chroot"]<<"'"<<endl
;
3162 s_pidfname
=::arg()["socket-dir"]+"/"+s_programname
+".pid";
3163 if(!s_pidfname
.empty())
3164 unlink(s_pidfname
.c_str()); // remove possible old pid file
3167 makeControlChannelSocket( ::arg().asNum("processes") > 1 ? forks
: -1);
3169 Utility::dropUserPrivs(newuid
);
3173 g_tcpTimeout
=::arg().asNum("client-tcp-timeout");
3174 g_maxTCPPerClient
=::arg().asNum("max-tcp-per-client");
3175 g_tcpMaxQueriesPerConn
=::arg().asNum("max-tcp-queries-per-connection");
3177 if (::arg().mustDo("snmp-agent")) {
3178 g_snmpAgent
= std::make_shared
<RecursorSNMPAgent
>("recursor", ::arg()["snmp-master-socket"]);
3182 /* This thread handles the web server, carbon, statistics and the control channel */
3183 std::thread
handlerThread(recursorThread
, s_handlerThreadID
, false);
3185 const auto cpusMap
= parseCPUMap();
3187 std::vector
<std::thread
> workers(g_numThreads
);
3188 if(g_numThreads
== 1) {
3189 L
<<Logger::Warning
<<"Operating unthreaded"<<endl
;
3191 sd_notify(0, "READY=1");
3193 setCPUMap(cpusMap
, 0, pthread_self());
3194 recursorThread(0, true);
3197 L
<<Logger::Warning
<<"Launching "<< g_numThreads
<<" threads"<<endl
;
3198 for(unsigned int n
=0; n
< g_numThreads
; ++n
) {
3199 workers
[n
] = std::thread(recursorThread
, n
, true);
3201 setCPUMap(cpusMap
, n
, workers
[n
].native_handle());
3204 sd_notify(0, "READY=1");
3206 workers
.back().join();
3211 static void* recursorThread(int n
, bool worker
)
3215 SyncRes
tmp(g_now
); // make sure it allocates tsstorage before we do anything, like primeHints or so..
3216 SyncRes::setDomainMap(g_initialDomainMap
);
3217 t_allowFrom
= g_initialAllowFrom
;
3218 t_udpclientsocks
= std::unique_ptr
<UDPClientSocks
>(new UDPClientSocks());
3219 t_tcpClientCounts
= std::unique_ptr
<tcpClientCounts_t
>(new tcpClientCounts_t());
3222 t_packetCache
= std::unique_ptr
<RecursorPacketCache
>(new RecursorPacketCache());
3224 #ifdef HAVE_PROTOBUF
3225 t_uuidGenerator
= std::unique_ptr
<boost::uuids::random_generator
>(new boost::uuids::random_generator());
3227 L
<<Logger::Warning
<<"Done priming cache with root hints"<<endl
;
3230 if(!::arg()["lua-dns-script"].empty()) {
3231 t_pdl
= std::make_shared
<RecursorLua4
>(::arg()["lua-dns-script"]);
3232 L
<<Logger::Warning
<<"Loaded 'lua' script from '"<<::arg()["lua-dns-script"]<<"'"<<endl
;
3235 catch(std::exception
&e
) {
3236 L
<<Logger::Error
<<"Failed to load 'lua' script from '"<<::arg()["lua-dns-script"]<<"': "<<e
.what()<<endl
;
3240 unsigned int ringsize
=::arg().asNum("stats-ringbuffer-entries") / g_numWorkerThreads
;
3242 t_remotes
= std::unique_ptr
<addrringbuf_t
>(new addrringbuf_t());
3243 if(g_weDistributeQueries
) // if so, only 1 thread does recvfrom
3244 t_remotes
->set_capacity(::arg().asNum("stats-ringbuffer-entries"));
3246 t_remotes
->set_capacity(ringsize
);
3247 t_servfailremotes
= std::unique_ptr
<addrringbuf_t
>(new addrringbuf_t());
3248 t_servfailremotes
->set_capacity(ringsize
);
3249 t_largeanswerremotes
= std::unique_ptr
<addrringbuf_t
>(new addrringbuf_t());
3250 t_largeanswerremotes
->set_capacity(ringsize
);
3252 t_queryring
= std::unique_ptr
<boost::circular_buffer
<pair
<DNSName
, uint16_t> > >(new boost::circular_buffer
<pair
<DNSName
, uint16_t> >());
3253 t_queryring
->set_capacity(ringsize
);
3254 t_servfailqueryring
= std::unique_ptr
<boost::circular_buffer
<pair
<DNSName
, uint16_t> > >(new boost::circular_buffer
<pair
<DNSName
, uint16_t> >());
3255 t_servfailqueryring
->set_capacity(ringsize
);
3258 MT
=std::unique_ptr
<MTasker
<PacketID
,string
> >(new MTasker
<PacketID
,string
>(::arg().asNum("stack-size")));
3262 t_fdm
=getMultiplexer();
3265 if(::arg().mustDo("webserver")) {
3266 L
<<Logger::Warning
<< "Enabling web server" << endl
;
3268 new RecursorWebServer(t_fdm
);
3270 catch(PDNSException
&e
) {
3271 L
<<Logger::Error
<<"Exception: "<<e
.reason
<<endl
;
3275 L
<<Logger::Error
<<"Enabled '"<< t_fdm
->getName() << "' multiplexer"<<endl
;
3278 t_fdm
->addReadFD(g_pipes
[t_id
].readToThread
, handlePipeRequest
);
3279 t_fdm
->addReadFD(g_pipes
[t_id
].readQueriesToThread
, handlePipeRequest
);
3281 if(g_useOneSocketPerThread
) {
3282 for(deferredAdd_t::const_iterator i
= deferredAdds
[t_id
].cbegin(); i
!= deferredAdds
[t_id
].cend(); ++i
) {
3283 t_fdm
->addReadFD(i
->first
, i
->second
);
3287 if(!g_weDistributeQueries
|| t_id
== s_distributorThreadID
) { // if we distribute queries, only t_id = 0 listens
3288 for(deferredAdd_t::const_iterator i
= deferredAdds
[0].cbegin(); i
!= deferredAdds
[0].cend(); ++i
) {
3289 t_fdm
->addReadFD(i
->first
, i
->second
);
3298 t_fdm
->addReadFD(s_rcc
.d_fd
, handleRCC
); // control channel
3301 unsigned int maxTcpClients
=::arg().asNum("max-tcp-clients");
3303 bool listenOnTCP(true);
3305 time_t last_stat
= 0;
3306 time_t last_carbon
=0;
3307 time_t carbonInterval
=::arg().asNum("carbon-interval");
3308 counter
.store(0); // used to periodically execute certain tasks
3310 while(MT
->schedule(&g_now
)); // MTasker letting the mthreads do their thing
3312 if(!(counter
%500)) {
3313 MT
->makeThread(houseKeeping
, 0);
3317 typedef vector
<pair
<int, FDMultiplexer::funcparam_t
> > expired_t
;
3318 expired_t expired
=t_fdm
->getTimeouts(g_now
);
3320 for(expired_t::iterator i
=expired
.begin() ; i
!= expired
.end(); ++i
) {
3321 shared_ptr
<TCPConnection
> conn
=any_cast
<shared_ptr
<TCPConnection
> >(i
->second
);
3322 if(g_logCommonErrors
)
3323 L
<<Logger::Warning
<<"Timeout from remote TCP client "<< conn
->d_remote
.toString() <<endl
;
3324 t_fdm
->removeReadFD(i
->first
);
3331 if(statsWanted
|| (g_statisticsInterval
> 0 && (g_now
.tv_sec
- last_stat
) >= g_statisticsInterval
)) {
3333 last_stat
= g_now
.tv_sec
;
3336 Utility::gettimeofday(&g_now
, 0);
3338 if((g_now
.tv_sec
- last_carbon
) >= carbonInterval
) {
3339 MT
->makeThread(doCarbonDump
, 0);
3340 last_carbon
= g_now
.tv_sec
;
3345 // 'run' updates g_now for us
3347 if(worker
&& (!g_weDistributeQueries
|| t_id
== s_distributorThreadID
)) { // if pdns distributes queries, only tid 0 should do this
3349 if(TCPConnection::getCurrentConnections() > maxTcpClients
) { // shutdown, too many connections
3350 for(tcpListenSockets_t::iterator i
=g_tcpListenSockets
.begin(); i
!= g_tcpListenSockets
.end(); ++i
)
3351 t_fdm
->removeReadFD(*i
);
3356 if(TCPConnection::getCurrentConnections() <= maxTcpClients
) { // reenable
3357 for(tcpListenSockets_t::iterator i
=g_tcpListenSockets
.begin(); i
!= g_tcpListenSockets
.end(); ++i
)
3358 t_fdm
->addReadFD(*i
, handleNewTCPQuestion
);
3365 catch(PDNSException
&ae
) {
3366 L
<<Logger::Error
<<"Exception: "<<ae
.reason
<<endl
;
3369 catch(std::exception
&e
) {
3370 L
<<Logger::Error
<<"STL Exception: "<<e
.what()<<endl
;
3374 L
<<Logger::Error
<<"any other exception in main: "<<endl
;
3379 int main(int argc
, char **argv
)
3383 g_stats
.startupTime
=time(0);
3384 versionSetProduct(ProductRecursor
);
3388 int ret
= EXIT_SUCCESS
;
3391 ::arg().set("stack-size","stack size per mthread")="200000";
3392 ::arg().set("soa-minimum-ttl","Don't change")="0";
3393 ::arg().set("no-shuffle","Don't change")="off";
3394 ::arg().set("local-port","port to listen on")="53";
3395 ::arg().set("local-address","IP addresses to listen on, separated by spaces or commas. Also accepts ports.")="127.0.0.1";
3396 ::arg().setSwitch("non-local-bind", "Enable binding to non-local addresses by using FREEBIND / BINDANY socket options")="no";
3397 ::arg().set("trace","if we should output heaps of logging. set to 'fail' to only log failing domains")="off";
3398 ::arg().set("dnssec", "DNSSEC mode: off/process-no-validate (default)/process/log-fail/validate")="process-no-validate";
3399 ::arg().set("dnssec-log-bogus", "Log DNSSEC bogus validations")="no";
3400 ::arg().set("daemon","Operate as a daemon")="no";
3401 ::arg().setSwitch("write-pid","Write a PID file")="yes";
3402 ::arg().set("loglevel","Amount of logging. Higher is more. Do not set below 3")="6";
3403 ::arg().set("disable-syslog","Disable logging to syslog, useful when running inside a supervisor that logs stdout")="no";
3404 ::arg().set("log-timestamp","Print timestamps in log lines, useful to disable when running with a tool that timestamps stdout already")="yes";
3405 ::arg().set("log-common-errors","If we should log rather common errors")="no";
3406 ::arg().set("chroot","switch to chroot jail")="";
3407 ::arg().set("setgid","If set, change group id to this gid for more security")="";
3408 ::arg().set("setuid","If set, change user id to this uid for more security")="";
3409 ::arg().set("network-timeout", "Wait this number of milliseconds for network i/o")="1500";
3410 ::arg().set("threads", "Launch this number of threads")="2";
3411 ::arg().set("processes", "Launch this number of processes (EXPERIMENTAL, DO NOT CHANGE)")="1"; // if we un-experimental this, need to fix openssl rand seeding for multiple PIDs!
3412 ::arg().set("config-name","Name of this virtual configuration - will rename the binary image")="";
3413 ::arg().set("api-config-dir", "Directory where REST API stores config and zones") = "";
3414 ::arg().set("api-key", "Static pre-shared authentication key for access to the REST API") = "";
3415 ::arg().set("api-logfile", "Location of the server logfile (used by the REST API)") = "/var/log/pdns.log";
3416 ::arg().set("api-readonly", "Disallow data modification through the REST API when set") = "no";
3417 ::arg().setSwitch("webserver", "Start a webserver (for REST API)") = "no";
3418 ::arg().set("webserver-address", "IP Address of webserver to listen on") = "127.0.0.1";
3419 ::arg().set("webserver-port", "Port of webserver to listen on") = "8082";
3420 ::arg().set("webserver-password", "Password required for accessing the webserver") = "";
3421 ::arg().set("webserver-allow-from","Webserver access is only allowed from these subnets")="127.0.0.1,::1";
3422 ::arg().set("carbon-ourname", "If set, overrides our reported hostname for carbon stats")="";
3423 ::arg().set("carbon-server", "If set, send metrics in carbon (graphite) format to this server IP address")="";
3424 ::arg().set("carbon-interval", "Number of seconds between carbon (graphite) updates")="30";
3425 ::arg().set("statistics-interval", "Number of seconds between printing of recursor statistics, 0 to disable")="1800";
3426 ::arg().set("quiet","Suppress logging of questions and answers")="";
3427 ::arg().set("logging-facility","Facility to log messages as. 0 corresponds to local0")="";
3428 ::arg().set("config-dir","Location of configuration directory (recursor.conf)")=SYSCONFDIR
;
3429 ::arg().set("socket-owner","Owner of socket")="";
3430 ::arg().set("socket-group","Group of socket")="";
3431 ::arg().set("socket-mode", "Permissions for socket")="";
3433 ::arg().set("socket-dir",string("Where the controlsocket will live, ")+LOCALSTATEDIR
+" when unset and not chrooted" )="";
3434 ::arg().set("delegation-only","Which domains we only accept delegations from")="";
3435 ::arg().set("query-local-address","Source IP address for sending queries")="0.0.0.0";
3436 ::arg().set("query-local-address6","Source IPv6 address for sending queries. IF UNSET, IPv6 WILL NOT BE USED FOR OUTGOING QUERIES")="";
3437 ::arg().set("client-tcp-timeout","Timeout in seconds when talking to TCP clients")="2";
3438 ::arg().set("max-mthreads", "Maximum number of simultaneous Mtasker threads")="2048";
3439 ::arg().set("max-tcp-clients","Maximum number of simultaneous TCP clients")="128";
3440 ::arg().set("server-down-max-fails","Maximum number of consecutive timeouts (and unreachables) to mark a server as down ( 0 => disabled )")="64";
3441 ::arg().set("server-down-throttle-time","Number of seconds to throttle all queries to a server after being marked as down")="60";
3442 ::arg().set("hint-file", "If set, load root hints from this file")="";
3443 ::arg().set("max-cache-entries", "If set, maximum number of entries in the main cache")="1000000";
3444 ::arg().set("max-negative-ttl", "maximum number of seconds to keep a negative cached entry in memory")="3600";
3445 ::arg().set("max-cache-ttl", "maximum number of seconds to keep a cached entry in memory")="86400";
3446 ::arg().set("packetcache-ttl", "maximum number of seconds to keep a cached entry in packetcache")="3600";
3447 ::arg().set("max-packetcache-entries", "maximum number of entries to keep in the packetcache")="500000";
3448 ::arg().set("packetcache-servfail-ttl", "maximum number of seconds to keep a cached servfail entry in packetcache")="60";
3449 ::arg().set("server-id", "Returned when queried for 'id.server' TXT or NSID, defaults to hostname")="";
3450 ::arg().set("stats-ringbuffer-entries", "maximum number of packets to store statistics for")="10000";
3451 ::arg().set("version-string", "string reported on version.pdns or version.bind")=fullVersionString();
3452 ::arg().set("allow-from", "If set, only allow these comma separated netmasks to recurse")=LOCAL_NETS
;
3453 ::arg().set("allow-from-file", "If set, load allowed netmasks from this file")="";
3454 ::arg().set("entropy-source", "If set, read entropy from this file")="/dev/urandom";
3455 ::arg().set("dont-query", "If set, do not query these netmasks for DNS data")=DONT_QUERY
;
3456 ::arg().set("max-tcp-per-client", "If set, maximum number of TCP sessions per client (IP address)")="0";
3457 ::arg().set("max-tcp-queries-per-connection", "If set, maximum number of TCP queries in a TCP connection")="0";
3458 ::arg().set("spoof-nearmiss-max", "If non-zero, assume spoofing after this many near misses")="20";
3459 ::arg().set("single-socket", "If set, only use a single socket for outgoing queries")="off";
3460 ::arg().set("auth-zones", "Zones for which we have authoritative data, comma separated domain=file pairs ")="";
3461 ::arg().set("lua-config-file", "More powerful configuration options")="";
3463 ::arg().set("forward-zones", "Zones for which we forward queries, comma separated domain=ip pairs")="";
3464 ::arg().set("forward-zones-recurse", "Zones for which we forward queries with recursion bit, comma separated domain=ip pairs")="";
3465 ::arg().set("forward-zones-file", "File with (+)domain=ip pairs for forwarding")="";
3466 ::arg().set("export-etc-hosts", "If we should serve up contents from /etc/hosts")="off";
3467 ::arg().set("export-etc-hosts-search-suffix", "Also serve up the contents of /etc/hosts with this suffix")="";
3468 ::arg().set("etc-hosts-file", "Path to 'hosts' file")="/etc/hosts";
3469 ::arg().set("serve-rfc1918", "If we should be authoritative for RFC 1918 private IP space")="yes";
3470 ::arg().set("lua-dns-script", "Filename containing an optional 'lua' script that will be used to modify dns answers")="";
3471 ::arg().set("latency-statistic-size","Number of latency values to calculate the qa-latency average")="10000";
3472 ::arg().setSwitch( "disable-packetcache", "Disable packetcache" )= "no";
3473 ::arg().set("ecs-ipv4-bits", "Number of bits of IPv4 address to pass for EDNS Client Subnet")="24";
3474 ::arg().set("ecs-ipv6-bits", "Number of bits of IPv6 address to pass for EDNS Client Subnet")="56";
3475 ::arg().set("edns-subnet-whitelist", "List of netmasks and domains that we should enable EDNS subnet for")="";
3476 ::arg().set("ecs-scope-zero-address", "Address to send to whitelisted authoritative servers for incoming queries with ECS prefix-length source of 0")="";
3477 ::arg().setSwitch( "use-incoming-edns-subnet", "Pass along received EDNS Client Subnet information")="no";
3478 ::arg().setSwitch( "pdns-distributes-queries", "If PowerDNS itself should distribute queries over threads")="yes";
3479 ::arg().setSwitch( "root-nx-trust", "If set, believe that an NXDOMAIN from the root means the TLD does not exist")="yes";
3480 ::arg().setSwitch( "any-to-tcp","Answer ANY queries with tc=1, shunting to TCP" )="no";
3481 ::arg().setSwitch( "lowercase-outgoing","Force outgoing questions to lowercase")="no";
3482 ::arg().setSwitch("gettag-needs-edns-options", "If EDNS Options should be extracted before calling the gettag() hook")="no";
3483 ::arg().set("udp-truncation-threshold", "Maximum UDP response size before we truncate")="1680";
3484 ::arg().set("edns-outgoing-bufsize", "Outgoing EDNS buffer size")="1680";
3485 ::arg().set("minimum-ttl-override", "Set under adverse conditions, a minimum TTL")="0";
3486 ::arg().set("max-qperq", "Maximum outgoing queries per query")="50";
3487 ::arg().set("max-total-msec", "Maximum total wall-clock time per query in milliseconds, 0 for unlimited")="7000";
3488 ::arg().set("max-recursion-depth", "Maximum number of internal recursion calls per query, 0 for unlimited")="40";
3490 ::arg().set("include-dir","Include *.conf files from this directory")="";
3491 ::arg().set("security-poll-suffix","Domain name from which to query security update notifications")="secpoll.powerdns.com.";
3493 ::arg().setSwitch("reuseport","Enable SO_REUSEPORT allowing multiple recursors processes to listen to 1 address")="no";
3495 ::arg().setSwitch("snmp-agent", "If set, register as an SNMP agent")="no";
3496 ::arg().set("snmp-master-socket", "If set and snmp-agent is set, the socket to use to register to the SNMP master")="";
3498 ::arg().set("tcp-fast-open", "Enable TCP Fast Open support on the listening sockets, using the supplied numerical value as the queue size")="0";
3499 ::arg().set("nsec3-max-iterations", "Maximum number of iterations allowed for an NSEC3 record")="2500";
3501 ::arg().set("cpu-map", "Thread to CPU mapping, space separated thread-id=cpu1,cpu2..cpuN pairs")="";
3503 ::arg().setSwitch("log-rpz-changes", "Log additions and removals to RPZ zones at Info level")="no";
3505 ::arg().setCmd("help","Provide a helpful message");
3506 ::arg().setCmd("version","Print version string");
3507 ::arg().setCmd("config","Output blank configuration");
3508 L
.toConsole(Logger::Info
);
3509 ::arg().laxParse(argc
,argv
); // do a lax parse
3511 string configname
=::arg()["config-dir"]+"/recursor.conf";
3512 if(::arg()["config-name"]!="") {
3513 configname
=::arg()["config-dir"]+"/recursor-"+::arg()["config-name"]+".conf";
3514 s_programname
+="-"+::arg()["config-name"];
3516 cleanSlashes(configname
);
3518 if(::arg().mustDo("config")) {
3519 cout
<<::arg().configstring()<<endl
;
3523 if(!::arg().file(configname
.c_str()))
3524 L
<<Logger::Warning
<<"Unable to parse configuration file '"<<configname
<<"'"<<endl
;
3526 ::arg().parse(argc
,argv
);
3528 if( !::arg()["chroot"].empty() && !::arg()["api-config-dir"].empty() && !::arg().mustDo("api-readonly") ) {
3529 L
<<Logger::Error
<<"Using chroot and a writable API is not possible"<<endl
;
3533 if (::arg()["socket-dir"].empty()) {
3534 if (::arg()["chroot"].empty())
3535 ::arg().set("socket-dir") = LOCALSTATEDIR
;
3537 ::arg().set("socket-dir") = "/";
3540 ::arg().set("delegation-only")=toLower(::arg()["delegation-only"]);
3542 if(::arg().asNum("threads")==1)
3543 ::arg().set("pdns-distributes-queries")="no";
3545 if(::arg().mustDo("help")) {
3546 cout
<<"syntax:"<<endl
<<endl
;
3547 cout
<<::arg().helpstring(::arg()["help"])<<endl
;
3550 if(::arg().mustDo("version")) {
3551 showProductVersion();
3552 showBuildConfiguration();
3556 Logger::Urgency logUrgency
= (Logger::Urgency
)::arg().asNum("loglevel");
3558 if (logUrgency
< Logger::Error
)
3559 logUrgency
= Logger::Error
;
3560 if(!g_quiet
&& logUrgency
< Logger::Info
) { // Logger::Info=6, Logger::Debug=7
3561 logUrgency
= Logger::Info
; // if you do --quiet=no, you need Info to also see the query log
3563 L
.setLoglevel(logUrgency
);
3564 L
.toConsole(logUrgency
);
3566 serviceMain(argc
, argv
);
3568 catch(PDNSException
&ae
) {
3569 L
<<Logger::Error
<<"Exception: "<<ae
.reason
<<endl
;
3572 catch(std::exception
&e
) {
3573 L
<<Logger::Error
<<"STL Exception: "<<e
.what()<<endl
;
3577 L
<<Logger::Error
<<"any other exception in main: "<<endl
;