]> git.ipfire.org Git - thirdparty/pdns.git/blob - pdns/pdns_recursor.cc
rec: Fix 'adding an integer to a string does not append to it' error
[thirdparty/pdns.git] / pdns / pdns_recursor.cc
1 /*
2 * This file is part of PowerDNS or dnsdist.
3 * Copyright -- PowerDNS.COM B.V. and its contributors
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of version 2 of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * In addition, for the avoidance of any doubt, permission is granted to
10 * link this program with OpenSSL and to (re)distribute the binaries
11 * produced as the result of such linking.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21 */
22 #ifdef HAVE_CONFIG_H
23 #include "config.h"
24 #endif
25
26 #include <netdb.h>
27 #include <sys/stat.h>
28 #include <unistd.h>
29
30 #include "recpacketcache.hh"
31 #include "ws-recursor.hh"
32 #include <pthread.h>
33 #include "utility.hh"
34 #include "dns_random.hh"
35 #ifdef HAVE_LIBSODIUM
36 #include <sodium.h>
37 #endif
38 #include "opensslsigners.hh"
39 #include <iostream>
40 #include <errno.h>
41 #include <boost/static_assert.hpp>
42 #include <map>
43 #include <set>
44 #include "recursor_cache.hh"
45 #include "cachecleaner.hh"
46 #include <stdio.h>
47 #include <signal.h>
48 #include <stdlib.h>
49 #include "misc.hh"
50 #include "mtasker.hh"
51 #include <utility>
52 #include "arguments.hh"
53 #include "syncres.hh"
54 #include <fcntl.h>
55 #include <fstream>
56 #include "sortlist.hh"
57 #include "sstuff.hh"
58 #include <boost/tuple/tuple.hpp>
59 #include <boost/tuple/tuple_comparison.hpp>
60 #include <boost/shared_array.hpp>
61 #include <boost/function.hpp>
62 #include <boost/algorithm/string.hpp>
63 #ifdef MALLOC_TRACE
64 #include "malloctrace.hh"
65 #endif
66 #include <netinet/tcp.h>
67 #include "dnsparser.hh"
68 #include "dnswriter.hh"
69 #include "dnsrecords.hh"
70 #include "zoneparser-tng.hh"
71 #include "rec_channel.hh"
72 #include "logger.hh"
73 #include "iputils.hh"
74 #include "mplexer.hh"
75 #include "config.h"
76 #include "lua-recursor4.hh"
77 #include "version.hh"
78 #include "responsestats.hh"
79 #include "secpoll-recursor.hh"
80 #include "dnsname.hh"
81 #include "filterpo.hh"
82 #include "rpzloader.hh"
83 #include "validate-recursor.hh"
84 #include "rec-lua-conf.hh"
85 #include "ednsoptions.hh"
86 #include "gettime.hh"
87
88 #include "rec-protobuf.hh"
89 #include "rec-snmp.hh"
90
91 #ifdef HAVE_SYSTEMD
92 #include <systemd/sd-daemon.h>
93 #endif
94
95 #include "namespaces.hh"
96
97 typedef map<ComboAddress, uint32_t, ComboAddress::addressOnlyLessThan> tcpClientCounts_t;
98
99 static thread_local std::shared_ptr<RecursorLua4> t_pdl;
100 static thread_local int t_id;
101 static thread_local std::shared_ptr<Regex> t_traceRegex;
102 static thread_local std::unique_ptr<tcpClientCounts_t> t_tcpClientCounts;
103
104 thread_local std::unique_ptr<MT_t> MT; // the big MTasker
105 thread_local std::unique_ptr<MemRecursorCache> t_RC;
106 thread_local std::unique_ptr<RecursorPacketCache> t_packetCache;
107 thread_local FDMultiplexer* t_fdm{nullptr};
108 thread_local std::unique_ptr<addrringbuf_t> t_remotes, t_servfailremotes, t_largeanswerremotes;
109 thread_local std::unique_ptr<boost::circular_buffer<pair<DNSName, uint16_t> > > t_queryring, t_servfailqueryring;
110 thread_local std::shared_ptr<NetmaskGroup> t_allowFrom;
111 #ifdef HAVE_PROTOBUF
112 thread_local std::unique_ptr<boost::uuids::random_generator> t_uuidGenerator;
113 #endif
114 __thread struct timeval g_now; // timestamp, updated (too) frequently
115
116 // for communicating with our threads
117 struct ThreadPipeSet
118 {
119 int writeToThread;
120 int readToThread;
121 int writeFromThread;
122 int readFromThread;
123 int writeQueriesToThread; // this one is non-blocking
124 int readQueriesToThread;
125 };
126
127 /* the TID of the thread handling the web server, carbon, statistics and the control channel */
128 static const int s_handlerThreadID = -1;
129 /* when pdns-distributes-queries is set, the TID of the thread handling, hashing and distributing new queries
130 to the other threads */
131 static const int s_distributorThreadID = 0;
132
133 typedef vector<int> tcpListenSockets_t;
134 typedef map<int, ComboAddress> listenSocketsAddresses_t; // is shared across all threads right now
135 typedef vector<pair<int, function< void(int, any&) > > > deferredAdd_t;
136
137 static const ComboAddress g_local4("0.0.0.0"), g_local6("::");
138 static vector<ThreadPipeSet> g_pipes; // effectively readonly after startup
139 static tcpListenSockets_t g_tcpListenSockets; // shared across threads, but this is fine, never written to from a thread. All threads listen on all sockets
140 static listenSocketsAddresses_t g_listenSocketsAddresses; // is shared across all threads right now
141 static std::unordered_map<unsigned int, deferredAdd_t> deferredAdds;
142 static set<int> g_fromtosockets; // listen sockets that use 'sendfromto()' mechanism
143 static vector<ComboAddress> g_localQueryAddresses4, g_localQueryAddresses6;
144 static AtomicCounter counter;
145 static std::shared_ptr<SyncRes::domainmap_t> g_initialDomainMap; // new threads needs this to be setup
146 static std::shared_ptr<NetmaskGroup> g_initialAllowFrom; // new thread needs to be setup with this
147 static size_t g_tcpMaxQueriesPerConn;
148 static uint64_t g_latencyStatSize;
149 static uint32_t g_disthashseed;
150 static unsigned int g_maxTCPPerClient;
151 static unsigned int g_networkTimeoutMsec;
152 static unsigned int g_maxMThreads;
153 static unsigned int g_numWorkerThreads;
154 static int g_tcpTimeout;
155 static uint16_t g_udpTruncationThreshold;
156 static std::atomic<bool> statsWanted;
157 static std::atomic<bool> g_quiet;
158 static bool g_logCommonErrors;
159 static bool g_anyToTcp;
160 static bool g_weDistributeQueries; // if true, only 1 thread listens on the incoming query sockets
161 static bool g_reusePort{false};
162 static bool g_useOneSocketPerThread;
163 static bool g_gettagNeedsEDNSOptions{false};
164 static time_t g_statisticsInterval;
165 static bool g_useIncomingECS;
166 std::atomic<uint32_t> g_maxCacheEntries, g_maxPacketCacheEntries;
167
168 RecursorControlChannel s_rcc; // only active in thread 0
169 RecursorStats g_stats;
170 string s_programname="pdns_recursor";
171 string s_pidfname;
172 bool g_lowercaseOutgoing;
173 unsigned int g_numThreads;
174 uint16_t g_outgoingEDNSBufsize;
175 bool g_logRPZChanges{false};
176
177 #define LOCAL_NETS "127.0.0.0/8, 10.0.0.0/8, 100.64.0.0/10, 169.254.0.0/16, 192.168.0.0/16, 172.16.0.0/12, ::1/128, fc00::/7, fe80::/10"
178 // Bad Nets taken from both:
179 // http://www.iana.org/assignments/iana-ipv4-special-registry/iana-ipv4-special-registry.xhtml
180 // and
181 // http://www.iana.org/assignments/iana-ipv6-special-registry/iana-ipv6-special-registry.xhtml
182 // where such a network may not be considered a valid destination
183 #define BAD_NETS "0.0.0.0/8, 192.0.0.0/24, 192.0.2.0/24, 198.51.100.0/24, 203.0.113.0/24, 240.0.0.0/4, ::/96, ::ffff:0:0/96, 100::/64, 2001:db8::/32"
184 #define DONT_QUERY LOCAL_NETS ", " BAD_NETS
185
186 //! used to send information to a newborn mthread
187 struct DNSComboWriter {
188 DNSComboWriter(const char* data, uint16_t len, const struct timeval& now) : d_mdp(true, data, len), d_now(now),
189 d_tcp(false), d_socket(-1)
190 {}
191 MOADNSParser d_mdp;
192 void setRemote(const ComboAddress* sa)
193 {
194 d_remote=*sa;
195 }
196
197 void setLocal(const ComboAddress& sa)
198 {
199 d_local=sa;
200 }
201
202
203 void setSocket(int sock)
204 {
205 d_socket=sock;
206 }
207
208 string getRemote() const
209 {
210 return d_remote.toString();
211 }
212
213 struct timeval d_now;
214 ComboAddress d_remote, d_local;
215 #ifdef HAVE_PROTOBUF
216 boost::uuids::uuid d_uuid;
217 string d_requestorId;
218 string d_deviceId;
219 #endif
220 EDNSSubnetOpts d_ednssubnet;
221 bool d_ecsFound{false};
222 bool d_ecsParsed{false};
223 bool d_tcp;
224 int d_socket;
225 unsigned int d_tag{0};
226 uint32_t d_qhash{0};
227 string d_query;
228 shared_ptr<TCPConnection> d_tcpConnection;
229 vector<pair<uint16_t, string> > d_ednsOpts;
230 std::vector<std::string> d_policyTags;
231 LuaContext::LuaObject d_data;
232 uint32_t d_ttlCap{std::numeric_limits<uint32_t>::max()};
233 bool d_variable{false};
234 };
235
236 MT_t* getMT()
237 {
238 return MT ? MT.get() : nullptr;
239 }
240
241 ArgvMap &arg()
242 {
243 static ArgvMap theArg;
244 return theArg;
245 }
246
247 unsigned int getRecursorThreadId()
248 {
249 return static_cast<unsigned int>(t_id);
250 }
251
252 int getMTaskerTID()
253 {
254 return MT->getTid();
255 }
256
257 static void handleTCPClientWritable(int fd, FDMultiplexer::funcparam_t& var);
258
259 // -1 is error, 0 is timeout, 1 is success
260 int asendtcp(const string& data, Socket* sock)
261 {
262 PacketID pident;
263 pident.sock=sock;
264 pident.outMSG=data;
265
266 t_fdm->addWriteFD(sock->getHandle(), handleTCPClientWritable, pident);
267 string packet;
268
269 int ret=MT->waitEvent(pident, &packet, g_networkTimeoutMsec);
270
271 if(!ret || ret==-1) { // timeout
272 t_fdm->removeWriteFD(sock->getHandle());
273 }
274 else if(packet.size() !=data.size()) { // main loop tells us what it sent out, or empty in case of an error
275 return -1;
276 }
277 return ret;
278 }
279
280 static void handleTCPClientReadable(int fd, FDMultiplexer::funcparam_t& var);
281
282 // -1 is error, 0 is timeout, 1 is success
283 int arecvtcp(string& data, size_t len, Socket* sock, bool incompleteOkay)
284 {
285 data.clear();
286 PacketID pident;
287 pident.sock=sock;
288 pident.inNeeded=len;
289 pident.inIncompleteOkay=incompleteOkay;
290 t_fdm->addReadFD(sock->getHandle(), handleTCPClientReadable, pident);
291
292 int ret=MT->waitEvent(pident,&data, g_networkTimeoutMsec);
293 if(!ret || ret==-1) { // timeout
294 t_fdm->removeReadFD(sock->getHandle());
295 }
296 else if(data.empty()) {// error, EOF or other
297 return -1;
298 }
299
300 return ret;
301 }
302
303 static void handleGenUDPQueryResponse(int fd, FDMultiplexer::funcparam_t& var)
304 {
305 PacketID pident=*any_cast<PacketID>(&var);
306 char resp[512];
307 ssize_t ret=recv(fd, resp, sizeof(resp), 0);
308 t_fdm->removeReadFD(fd);
309 if(ret >= 0) {
310 string data(resp, (size_t) ret);
311 MT->sendEvent(pident, &data);
312 }
313 else {
314 string empty;
315 MT->sendEvent(pident, &empty);
316 // cerr<<"Had some kind of error: "<<ret<<", "<<strerror(errno)<<endl;
317 }
318 }
319 string GenUDPQueryResponse(const ComboAddress& dest, const string& query)
320 {
321 Socket s(dest.sin4.sin_family, SOCK_DGRAM);
322 s.setNonBlocking();
323 ComboAddress local = getQueryLocalAddress(dest.sin4.sin_family, 0);
324
325 s.bind(local);
326 s.connect(dest);
327 s.send(query);
328
329 PacketID pident;
330 pident.sock=&s;
331 pident.type=0;
332 t_fdm->addReadFD(s.getHandle(), handleGenUDPQueryResponse, pident);
333
334 string data;
335
336 int ret=MT->waitEvent(pident,&data, g_networkTimeoutMsec);
337
338 if(!ret || ret==-1) { // timeout
339 t_fdm->removeReadFD(s.getHandle());
340 }
341 else if(data.empty()) {// error, EOF or other
342 // we could special case this
343 return data;
344 }
345 return data;
346 }
347
348 //! pick a random query local address
349 ComboAddress getQueryLocalAddress(int family, uint16_t port)
350 {
351 ComboAddress ret;
352 if(family==AF_INET) {
353 if(g_localQueryAddresses4.empty())
354 ret = g_local4;
355 else
356 ret = g_localQueryAddresses4[dns_random(g_localQueryAddresses4.size())];
357 ret.sin4.sin_port = htons(port);
358 }
359 else {
360 if(g_localQueryAddresses6.empty())
361 ret = g_local6;
362 else
363 ret = g_localQueryAddresses6[dns_random(g_localQueryAddresses6.size())];
364
365 ret.sin6.sin6_port = htons(port);
366 }
367 return ret;
368 }
369
370 static void handleUDPServerResponse(int fd, FDMultiplexer::funcparam_t&);
371
372 static void setSocketBuffer(int fd, int optname, uint32_t size)
373 {
374 uint32_t psize=0;
375 socklen_t len=sizeof(psize);
376
377 if(!getsockopt(fd, SOL_SOCKET, optname, (char*)&psize, &len) && psize > size) {
378 L<<Logger::Error<<"Not decreasing socket buffer size from "<<psize<<" to "<<size<<endl;
379 return;
380 }
381
382 if (setsockopt(fd, SOL_SOCKET, optname, (char*)&size, sizeof(size)) < 0 )
383 L<<Logger::Error<<"Unable to raise socket buffer size to "<<size<<": "<<strerror(errno)<<endl;
384 }
385
386
387 static void setSocketReceiveBuffer(int fd, uint32_t size)
388 {
389 setSocketBuffer(fd, SO_RCVBUF, size);
390 }
391
392 static void setSocketSendBuffer(int fd, uint32_t size)
393 {
394 setSocketBuffer(fd, SO_SNDBUF, size);
395 }
396
397
398 // you can ask this class for a UDP socket to send a query from
399 // this socket is not yours, don't even think about deleting it
400 // but after you call 'returnSocket' on it, don't assume anything anymore
401 class UDPClientSocks
402 {
403 unsigned int d_numsocks;
404 public:
405 UDPClientSocks() : d_numsocks(0)
406 {
407 }
408
409 typedef set<int> socks_t;
410 socks_t d_socks;
411
412 // returning -2 means: temporary OS error (ie, out of files), -1 means error related to remote
413 int getSocket(const ComboAddress& toaddr, int* fd)
414 {
415 *fd=makeClientSocket(toaddr.sin4.sin_family);
416 if(*fd < 0) // temporary error - receive exception otherwise
417 return -2;
418
419 if(connect(*fd, (struct sockaddr*)(&toaddr), toaddr.getSocklen()) < 0) {
420 int err = errno;
421 // returnSocket(*fd);
422 try {
423 closesocket(*fd);
424 }
425 catch(const PDNSException& e) {
426 L<<Logger::Error<<"Error closing UDP socket after connect() failed: "<<e.reason<<endl;
427 }
428
429 if(err==ENETUNREACH) // Seth "My Interfaces Are Like A Yo Yo" Arnold special
430 return -2;
431 return -1;
432 }
433
434 d_socks.insert(*fd);
435 d_numsocks++;
436 return 0;
437 }
438
439 void returnSocket(int fd)
440 {
441 socks_t::iterator i=d_socks.find(fd);
442 if(i==d_socks.end()) {
443 throw PDNSException("Trying to return a socket (fd="+std::to_string(fd)+") not in the pool");
444 }
445 returnSocketLocked(i);
446 }
447
448 // return a socket to the pool, or simply erase it
449 void returnSocketLocked(socks_t::iterator& i)
450 {
451 if(i==d_socks.end()) {
452 throw PDNSException("Trying to return a socket not in the pool");
453 }
454 try {
455 t_fdm->removeReadFD(*i);
456 }
457 catch(FDMultiplexerException& e) {
458 // we sometimes return a socket that has not yet been assigned to t_fdm
459 }
460 try {
461 closesocket(*i);
462 }
463 catch(const PDNSException& e) {
464 L<<Logger::Error<<"Error closing returned UDP socket: "<<e.reason<<endl;
465 }
466
467 d_socks.erase(i++);
468 --d_numsocks;
469 }
470
471 // returns -1 for errors which might go away, throws for ones that won't
472 static int makeClientSocket(int family)
473 {
474 int ret=socket(family, SOCK_DGRAM, 0 ); // turns out that setting CLO_EXEC and NONBLOCK from here is not a performance win on Linux (oddly enough)
475
476 if(ret < 0 && errno==EMFILE) // this is not a catastrophic error
477 return ret;
478
479 if(ret<0)
480 throw PDNSException("Making a socket for resolver (family = "+std::to_string(family)+"): "+stringerror());
481
482 // setCloseOnExec(ret); // we're not going to exec
483
484 int tries=10;
485 ComboAddress sin;
486 while(--tries) {
487 uint16_t port;
488
489 if(tries==1) // fall back to kernel 'random'
490 port = 0;
491 else
492 port = 1025 + dns_random(64510);
493
494 sin=getQueryLocalAddress(family, port); // does htons for us
495
496 if (::bind(ret, (struct sockaddr *)&sin, sin.getSocklen()) >= 0)
497 break;
498 }
499 if(!tries)
500 throw PDNSException("Resolver binding to local query client socket on "+sin.toString()+": "+stringerror());
501
502 setNonBlocking(ret);
503 return ret;
504 }
505 };
506
507 static thread_local std::unique_ptr<UDPClientSocks> t_udpclientsocks;
508
509 /* these two functions are used by LWRes */
510 // -2 is OS error, -1 is error that depends on the remote, > 0 is success
511 int asendto(const char *data, size_t len, int flags,
512 const ComboAddress& toaddr, uint16_t id, const DNSName& domain, uint16_t qtype, int* fd)
513 {
514
515 PacketID pident;
516 pident.domain = domain;
517 pident.remote = toaddr;
518 pident.type = qtype;
519
520 // see if there is an existing outstanding request we can chain on to, using partial equivalence function
521 pair<MT_t::waiters_t::iterator, MT_t::waiters_t::iterator> chain=MT->d_waiters.equal_range(pident, PacketIDBirthdayCompare());
522
523 for(; chain.first != chain.second; chain.first++) {
524 if(chain.first->key.fd > -1) { // don't chain onto existing chained waiter!
525 /*
526 cerr<<"Orig: "<<pident.domain<<", "<<pident.remote.toString()<<", id="<<id<<endl;
527 cerr<<"Had hit: "<< chain.first->key.domain<<", "<<chain.first->key.remote.toString()<<", id="<<chain.first->key.id
528 <<", count="<<chain.first->key.chain.size()<<", origfd: "<<chain.first->key.fd<<endl;
529 */
530 chain.first->key.chain.insert(id); // we can chain
531 *fd=-1; // gets used in waitEvent / sendEvent later on
532 return 1;
533 }
534 }
535
536 int ret=t_udpclientsocks->getSocket(toaddr, fd);
537 if(ret < 0)
538 return ret;
539
540 pident.fd=*fd;
541 pident.id=id;
542
543 t_fdm->addReadFD(*fd, handleUDPServerResponse, pident);
544 ret = send(*fd, data, len, 0);
545
546 int tmp = errno;
547
548 if(ret < 0)
549 t_udpclientsocks->returnSocket(*fd);
550
551 errno = tmp; // this is for logging purposes only
552 return ret;
553 }
554
555 // -1 is error, 0 is timeout, 1 is success
556 int arecvfrom(char *data, size_t len, int flags, const ComboAddress& fromaddr, size_t *d_len,
557 uint16_t id, const DNSName& domain, uint16_t qtype, int fd, struct timeval* now)
558 {
559 static optional<unsigned int> nearMissLimit;
560 if(!nearMissLimit)
561 nearMissLimit=::arg().asNum("spoof-nearmiss-max");
562
563 PacketID pident;
564 pident.fd=fd;
565 pident.id=id;
566 pident.domain=domain;
567 pident.type = qtype;
568 pident.remote=fromaddr;
569
570 string packet;
571 int ret=MT->waitEvent(pident, &packet, g_networkTimeoutMsec, now);
572
573 if(ret > 0) {
574 if(packet.empty()) // means "error"
575 return -1;
576
577 *d_len=packet.size();
578 memcpy(data,packet.c_str(),min(len,*d_len));
579 if(*nearMissLimit && pident.nearMisses > *nearMissLimit) {
580 L<<Logger::Error<<"Too many ("<<pident.nearMisses<<" > "<<*nearMissLimit<<") bogus answers for '"<<domain<<"' from "<<fromaddr.toString()<<", assuming spoof attempt."<<endl;
581 g_stats.spoofCount++;
582 return -1;
583 }
584 }
585 else {
586 if(fd >= 0)
587 t_udpclientsocks->returnSocket(fd);
588 }
589 return ret;
590 }
591
592 static void writePid(void)
593 {
594 if(!::arg().mustDo("write-pid"))
595 return;
596 ofstream of(s_pidfname.c_str(), std::ios_base::app);
597 if(of)
598 of<< Utility::getpid() <<endl;
599 else
600 L<<Logger::Error<<"Writing pid for "<<Utility::getpid()<<" to "<<s_pidfname<<" failed: "<<strerror(errno)<<endl;
601 }
602
603 TCPConnection::TCPConnection(int fd, const ComboAddress& addr) : d_remote(addr), d_fd(fd)
604 {
605 ++s_currentConnections;
606 (*t_tcpClientCounts)[d_remote]++;
607 }
608
609 TCPConnection::~TCPConnection()
610 {
611 try {
612 if(closesocket(d_fd) < 0)
613 L<<Logger::Error<<"Error closing socket for TCPConnection"<<endl;
614 }
615 catch(const PDNSException& e) {
616 L<<Logger::Error<<"Error closing TCPConnection socket: "<<e.reason<<endl;
617 }
618
619 if(t_tcpClientCounts->count(d_remote) && !(*t_tcpClientCounts)[d_remote]--)
620 t_tcpClientCounts->erase(d_remote);
621 --s_currentConnections;
622 }
623
624 AtomicCounter TCPConnection::s_currentConnections;
625
626 static void handleRunningTCPQuestion(int fd, FDMultiplexer::funcparam_t& var);
627
628 // the idea is, only do things that depend on the *response* here. Incoming accounting is on incoming.
629 static void updateResponseStats(int res, const ComboAddress& remote, unsigned int packetsize, const DNSName* query, uint16_t qtype)
630 {
631 if(packetsize > 1000 && t_largeanswerremotes)
632 t_largeanswerremotes->push_back(remote);
633 switch(res) {
634 case RCode::ServFail:
635 if(t_servfailremotes) {
636 t_servfailremotes->push_back(remote);
637 if(query && t_servfailqueryring) // packet cache
638 t_servfailqueryring->push_back(make_pair(*query, qtype));
639 }
640 g_stats.servFails++;
641 break;
642 case RCode::NXDomain:
643 g_stats.nxDomains++;
644 break;
645 case RCode::NoError:
646 g_stats.noErrors++;
647 break;
648 }
649 }
650
651 static string makeLoginfo(DNSComboWriter* dc)
652 try
653 {
654 return "("+dc->d_mdp.d_qname.toLogString()+"/"+DNSRecordContent::NumberToType(dc->d_mdp.d_qtype)+" from "+(dc->d_remote.toString())+")";
655 }
656 catch(...)
657 {
658 return "Exception making error message for exception";
659 }
660
661 #ifdef HAVE_PROTOBUF
662 static void protobufLogQuery(const std::shared_ptr<RemoteLogger>& logger, uint8_t maskV4, uint8_t maskV6, const boost::uuids::uuid& uniqueId, const ComboAddress& remote, const ComboAddress& local, const Netmask& ednssubnet, bool tcp, uint16_t id, size_t len, const DNSName& qname, uint16_t qtype, uint16_t qclass, const std::vector<std::string>& policyTags, const std::string& requestorId, const std::string& deviceId)
663 {
664 Netmask requestorNM(remote, remote.sin4.sin_family == AF_INET ? maskV4 : maskV6);
665 const ComboAddress& requestor = requestorNM.getMaskedNetwork();
666 RecProtoBufMessage message(DNSProtoBufMessage::Query, uniqueId, &requestor, &local, qname, qtype, qclass, id, tcp, len);
667 message.setEDNSSubnet(ednssubnet, ednssubnet.isIpv4() ? maskV4 : maskV6);
668 message.setRequestorId(requestorId);
669 message.setDeviceId(deviceId);
670
671 if (!policyTags.empty()) {
672 message.setPolicyTags(policyTags);
673 }
674
675 // cerr <<message.toDebugString()<<endl;
676 std::string str;
677 message.serialize(str);
678 logger->queueData(str);
679 }
680
681 static void protobufLogResponse(const std::shared_ptr<RemoteLogger>& logger, const RecProtoBufMessage& message)
682 {
683 // cerr <<message.toDebugString()<<endl;
684 std::string str;
685 message.serialize(str);
686 logger->queueData(str);
687 }
688 #endif
689
690 /**
691 * Chases the CNAME provided by the PolicyCustom RPZ policy.
692 *
693 * @param spoofed: The DNSRecord that was created by the policy, should already be added to ret
694 * @param qtype: The QType of the original query
695 * @param sr: A SyncRes
696 * @param res: An integer that will contain the RCODE of the lookup we do
697 * @param ret: A vector of DNSRecords where the result of the CNAME chase should be appended to
698 */
699 static void handleRPZCustom(const DNSRecord& spoofed, const QType& qtype, SyncRes& sr, int& res, vector<DNSRecord>& ret)
700 {
701 if (spoofed.d_type == QType::CNAME) {
702 bool oldWantsRPZ = sr.getWantsRPZ();
703 sr.setWantsRPZ(false);
704 vector<DNSRecord> ans;
705 res = sr.beginResolve(DNSName(spoofed.d_content->getZoneRepresentation()), qtype, 1, ans);
706 for (const auto& rec : ans) {
707 if(rec.d_place == DNSResourceRecord::ANSWER) {
708 ret.push_back(rec);
709 }
710 }
711 // Reset the RPZ state of the SyncRes
712 sr.setWantsRPZ(oldWantsRPZ);
713 }
714 }
715
716 static bool addRecordToPacket(DNSPacketWriter& pw, const DNSRecord& rec, uint32_t& minTTL, uint32_t ttlCap, const uint16_t maxAnswerSize)
717 {
718 pw.startRecord(rec.d_name, rec.d_type, (rec.d_ttl > ttlCap ? ttlCap : rec.d_ttl), rec.d_class, rec.d_place);
719
720 if(rec.d_type != QType::OPT) // their TTL ain't real
721 minTTL = min(minTTL, rec.d_ttl);
722
723 rec.d_content->toPacket(pw);
724 if(pw.size() > static_cast<size_t>(maxAnswerSize)) {
725 pw.rollback();
726 if(rec.d_place != DNSResourceRecord::ADDITIONAL) {
727 pw.getHeader()->tc=1;
728 pw.truncate();
729 }
730 return false;
731 }
732
733 return true;
734 }
735
736 static void startDoResolve(void *p)
737 {
738 DNSComboWriter* dc=(DNSComboWriter *)p;
739 try {
740 if (t_queryring)
741 t_queryring->push_back(make_pair(dc->d_mdp.d_qname, dc->d_mdp.d_qtype));
742
743 uint16_t maxanswersize = dc->d_tcp ? 65535 : min(static_cast<uint16_t>(512), g_udpTruncationThreshold);
744 EDNSOpts edo;
745 bool haveEDNS=false;
746 if(getEDNSOpts(dc->d_mdp, &edo)) {
747 if(!dc->d_tcp) {
748 /* rfc6891 6.2.3:
749 "Values lower than 512 MUST be treated as equal to 512."
750 */
751 maxanswersize = min(static_cast<uint16_t>(edo.d_packetsize >= 512 ? edo.d_packetsize : 512), g_udpTruncationThreshold);
752 }
753 dc->d_ednsOpts = edo.d_options;
754 haveEDNS=true;
755
756 if (g_useIncomingECS && !dc->d_ecsParsed) {
757 for (const auto& o : edo.d_options) {
758 if (o.first == EDNSOptionCode::ECS) {
759 dc->d_ecsFound = getEDNSSubnetOptsFromString(o.second, &dc->d_ednssubnet);
760 break;
761 }
762 }
763 }
764 }
765 /* perhaps there was no EDNS or no ECS but by now we looked */
766 dc->d_ecsParsed = true;
767 vector<DNSRecord> ret;
768 vector<uint8_t> packet;
769
770 auto luaconfsLocal = g_luaconfs.getLocal();
771 // Used to tell syncres later on if we should apply NSDNAME and NSIP RPZ triggers for this query
772 bool wantsRPZ(true);
773 RecProtoBufMessage pbMessage(RecProtoBufMessage::Response);
774 #ifdef HAVE_PROTOBUF
775 if (luaconfsLocal->protobufServer) {
776 Netmask requestorNM(dc->d_remote, dc->d_remote.sin4.sin_family == AF_INET ? luaconfsLocal->protobufMaskV4 : luaconfsLocal->protobufMaskV6);
777 const ComboAddress& requestor = requestorNM.getMaskedNetwork();
778 pbMessage.update(dc->d_uuid, &requestor, &dc->d_local, dc->d_tcp, dc->d_mdp.d_header.id);
779 pbMessage.setEDNSSubnet(dc->d_ednssubnet.source, dc->d_ednssubnet.source.isIpv4() ? luaconfsLocal->protobufMaskV4 : luaconfsLocal->protobufMaskV6);
780 pbMessage.setQuestion(dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_mdp.d_qclass);
781 }
782 #endif /* HAVE_PROTOBUF */
783
784 DNSPacketWriter pw(packet, dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_mdp.d_qclass);
785
786 pw.getHeader()->aa=0;
787 pw.getHeader()->ra=1;
788 pw.getHeader()->qr=1;
789 pw.getHeader()->tc=0;
790 pw.getHeader()->id=dc->d_mdp.d_header.id;
791 pw.getHeader()->rd=dc->d_mdp.d_header.rd;
792 pw.getHeader()->cd=dc->d_mdp.d_header.cd;
793
794 /* This is the lowest TTL seen in the records of the response,
795 so we can't cache it for longer than this value.
796 If we have a TTL cap, this value can't be larger than the
797 cap no matter what. */
798 uint32_t minTTL = dc->d_ttlCap;
799
800 SyncRes sr(dc->d_now);
801
802 bool DNSSECOK=false;
803 if(t_pdl) {
804 sr.setLuaEngine(t_pdl);
805 }
806 sr.d_requestor=dc->d_remote; // ECS needs this too
807 if(g_dnssecmode != DNSSECMode::Off) {
808 sr.setDoDNSSEC(true);
809
810 // Does the requestor want DNSSEC records?
811 if(edo.d_Z & EDNSOpts::DNSSECOK) {
812 DNSSECOK=true;
813 g_stats.dnssecQueries++;
814 }
815 } else {
816 // Ignore the client-set CD flag
817 pw.getHeader()->cd=0;
818 }
819 sr.setDNSSECValidationRequested(g_dnssecmode == DNSSECMode::ValidateAll || g_dnssecmode==DNSSECMode::ValidateForLog || ((dc->d_mdp.d_header.ad || DNSSECOK) && g_dnssecmode==DNSSECMode::Process));
820
821 #ifdef HAVE_PROTOBUF
822 sr.setInitialRequestId(dc->d_uuid);
823 #endif
824
825 if (g_useIncomingECS) {
826 sr.setIncomingECSFound(dc->d_ecsFound);
827 if (dc->d_ecsFound) {
828 sr.setIncomingECS(dc->d_ednssubnet);
829 }
830 }
831
832 bool tracedQuery=false; // we could consider letting Lua know about this too
833 bool variableAnswer = dc->d_variable;
834 bool shouldNotValidate = false;
835
836 /* preresolve expects res (dq.rcode) to be set to RCode::NoError by default */
837 int res = RCode::NoError;
838 DNSFilterEngine::Policy appliedPolicy;
839 DNSRecord spoofed;
840 RecursorLua4::DNSQuestion dq(dc->d_remote, dc->d_local, dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_tcp, variableAnswer, wantsRPZ);
841 dq.ednsFlags = &edo.d_Z;
842 dq.ednsOptions = &dc->d_ednsOpts;
843 dq.tag = dc->d_tag;
844 dq.discardedPolicies = &sr.d_discardedPolicies;
845 dq.policyTags = &dc->d_policyTags;
846 dq.appliedPolicy = &appliedPolicy;
847 dq.currentRecords = &ret;
848 dq.dh = &dc->d_mdp.d_header;
849 dq.data = dc->d_data;
850 #ifdef HAVE_PROTOBUF
851 dq.requestorId = dc->d_requestorId;
852 dq.deviceId = dc->d_deviceId;
853 #endif
854
855 if(dc->d_mdp.d_qtype==QType::ANY && !dc->d_tcp && g_anyToTcp) {
856 pw.getHeader()->tc = 1;
857 res = 0;
858 variableAnswer = true;
859 goto sendit;
860 }
861
862 if(t_traceRegex && t_traceRegex->match(dc->d_mdp.d_qname.toString())) {
863 sr.setLogMode(SyncRes::Store);
864 tracedQuery=true;
865 }
866
867
868 if(!g_quiet || tracedQuery) {
869 L<<Logger::Warning<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] " << (dc->d_tcp ? "TCP " : "") << "question for '"<<dc->d_mdp.d_qname<<"|"
870 <<DNSRecordContent::NumberToType(dc->d_mdp.d_qtype)<<"' from "<<dc->getRemote();
871 if(!dc->d_ednssubnet.source.empty()) {
872 L<<" (ecs "<<dc->d_ednssubnet.source.toString()<<")";
873 }
874 L<<endl;
875 }
876
877 sr.setId(MT->getTid());
878 if(!dc->d_mdp.d_header.rd)
879 sr.setCacheOnly();
880
881 if (t_pdl) {
882 t_pdl->prerpz(dq, res);
883 }
884
885 // Check if the query has a policy attached to it
886 if (wantsRPZ) {
887 appliedPolicy = luaconfsLocal->dfe.getQueryPolicy(dc->d_mdp.d_qname, dc->d_remote, sr.d_discardedPolicies);
888 }
889
890 // if there is a RecursorLua active, and it 'took' the query in preResolve, we don't launch beginResolve
891 if(!t_pdl || !t_pdl->preresolve(dq, res)) {
892
893 sr.setWantsRPZ(wantsRPZ);
894 if(wantsRPZ) {
895 switch(appliedPolicy.d_kind) {
896 case DNSFilterEngine::PolicyKind::NoAction:
897 break;
898 case DNSFilterEngine::PolicyKind::Drop:
899 g_stats.policyDrops++;
900 g_stats.policyResults[appliedPolicy.d_kind]++;
901 delete dc;
902 dc=0;
903 return;
904 case DNSFilterEngine::PolicyKind::NXDOMAIN:
905 g_stats.policyResults[appliedPolicy.d_kind]++;
906 res=RCode::NXDomain;
907 goto haveAnswer;
908 case DNSFilterEngine::PolicyKind::NODATA:
909 g_stats.policyResults[appliedPolicy.d_kind]++;
910 res=RCode::NoError;
911 goto haveAnswer;
912 case DNSFilterEngine::PolicyKind::Custom:
913 g_stats.policyResults[appliedPolicy.d_kind]++;
914 res=RCode::NoError;
915 spoofed=appliedPolicy.getCustomRecord(dc->d_mdp.d_qname);
916 ret.push_back(spoofed);
917 handleRPZCustom(spoofed, QType(dc->d_mdp.d_qtype), sr, res, ret);
918 goto haveAnswer;
919 case DNSFilterEngine::PolicyKind::Truncate:
920 if(!dc->d_tcp) {
921 g_stats.policyResults[appliedPolicy.d_kind]++;
922 res=RCode::NoError;
923 pw.getHeader()->tc=1;
924 goto haveAnswer;
925 }
926 break;
927 }
928 }
929
930 // Query got not handled for QNAME Policy reasons, now actually go out to find an answer
931 try {
932 res = sr.beginResolve(dc->d_mdp.d_qname, QType(dc->d_mdp.d_qtype), dc->d_mdp.d_qclass, ret);
933 shouldNotValidate = sr.wasOutOfBand();
934 }
935 catch(ImmediateServFailException &e) {
936 if(g_logCommonErrors)
937 L<<Logger::Notice<<"Sending SERVFAIL to "<<dc->getRemote()<<" during resolve of '"<<dc->d_mdp.d_qname<<"' because: "<<e.reason<<endl;
938 res = RCode::ServFail;
939 }
940
941 dq.validationState = sr.getValidationState();
942
943 // During lookup, an NSDNAME or NSIP trigger was hit in RPZ
944 if (res == -2) { // XXX This block should be macro'd, it is repeated post-resolve.
945 appliedPolicy = sr.d_appliedPolicy;
946 g_stats.policyResults[appliedPolicy.d_kind]++;
947 switch(appliedPolicy.d_kind) {
948 case DNSFilterEngine::PolicyKind::NoAction: // This can never happen
949 throw PDNSException("NoAction policy returned while a NSDNAME or NSIP trigger was hit");
950 case DNSFilterEngine::PolicyKind::Drop:
951 g_stats.policyDrops++;
952 delete dc;
953 dc=0;
954 return;
955 case DNSFilterEngine::PolicyKind::NXDOMAIN:
956 ret.clear();
957 res=RCode::NXDomain;
958 goto haveAnswer;
959
960 case DNSFilterEngine::PolicyKind::NODATA:
961 ret.clear();
962 res=RCode::NoError;
963 goto haveAnswer;
964
965 case DNSFilterEngine::PolicyKind::Truncate:
966 if(!dc->d_tcp) {
967 ret.clear();
968 res=RCode::NoError;
969 pw.getHeader()->tc=1;
970 goto haveAnswer;
971 }
972 break;
973
974 case DNSFilterEngine::PolicyKind::Custom:
975 ret.clear();
976 res=RCode::NoError;
977 spoofed=appliedPolicy.getCustomRecord(dc->d_mdp.d_qname);
978 ret.push_back(spoofed);
979 handleRPZCustom(spoofed, QType(dc->d_mdp.d_qtype), sr, res, ret);
980 goto haveAnswer;
981 }
982 }
983
984 if (wantsRPZ) {
985 appliedPolicy = luaconfsLocal->dfe.getPostPolicy(ret, sr.d_discardedPolicies);
986 }
987
988 if(t_pdl) {
989 if(res == RCode::NoError) {
990 auto i=ret.cbegin();
991 for(; i!= ret.cend(); ++i)
992 if(i->d_type == dc->d_mdp.d_qtype && i->d_place == DNSResourceRecord::ANSWER)
993 break;
994 if(i == ret.cend() && t_pdl->nodata(dq, res))
995 shouldNotValidate = true;
996
997 }
998 else if(res == RCode::NXDomain && t_pdl->nxdomain(dq, res))
999 shouldNotValidate = true;
1000
1001 if(t_pdl->postresolve(dq, res))
1002 shouldNotValidate = true;
1003 }
1004
1005 if (wantsRPZ) { //XXX This block is repeated, see above
1006 g_stats.policyResults[appliedPolicy.d_kind]++;
1007 switch(appliedPolicy.d_kind) {
1008 case DNSFilterEngine::PolicyKind::NoAction:
1009 break;
1010 case DNSFilterEngine::PolicyKind::Drop:
1011 g_stats.policyDrops++;
1012 delete dc;
1013 dc=0;
1014 return;
1015 case DNSFilterEngine::PolicyKind::NXDOMAIN:
1016 ret.clear();
1017 res=RCode::NXDomain;
1018 goto haveAnswer;
1019
1020 case DNSFilterEngine::PolicyKind::NODATA:
1021 ret.clear();
1022 res=RCode::NoError;
1023 goto haveAnswer;
1024
1025 case DNSFilterEngine::PolicyKind::Truncate:
1026 if(!dc->d_tcp) {
1027 ret.clear();
1028 res=RCode::NoError;
1029 pw.getHeader()->tc=1;
1030 goto haveAnswer;
1031 }
1032 break;
1033
1034 case DNSFilterEngine::PolicyKind::Custom:
1035 ret.clear();
1036 res=RCode::NoError;
1037 spoofed=appliedPolicy.getCustomRecord(dc->d_mdp.d_qname);
1038 ret.push_back(spoofed);
1039 handleRPZCustom(spoofed, QType(dc->d_mdp.d_qtype), sr, res, ret);
1040 goto haveAnswer;
1041 }
1042 }
1043 }
1044 haveAnswer:;
1045 if(res == PolicyDecision::DROP) {
1046 g_stats.policyDrops++;
1047 delete dc;
1048 dc=0;
1049 return;
1050 }
1051 if(tracedQuery || res == -1 || res == RCode::ServFail || pw.getHeader()->rcode == RCode::ServFail)
1052 {
1053 string trace(sr.getTrace());
1054 if(!trace.empty()) {
1055 vector<string> lines;
1056 boost::split(lines, trace, boost::is_any_of("\n"));
1057 for(const string& line : lines) {
1058 if(!line.empty())
1059 L<<Logger::Warning<< line << endl;
1060 }
1061 }
1062 }
1063
1064 if(res == -1) {
1065 pw.getHeader()->rcode=RCode::ServFail;
1066 // no commit here, because no record
1067 g_stats.servFails++;
1068 }
1069 else {
1070 pw.getHeader()->rcode=res;
1071
1072 // Does the validation mode or query demand validation?
1073 if(!shouldNotValidate && sr.isDNSSECValidationRequested()) {
1074 try {
1075 if(sr.doLog()) {
1076 L<<Logger::Warning<<"Starting validation of answer to "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" for "<<dc->d_remote.toStringWithPort()<<endl;
1077 }
1078
1079 auto state = sr.getValidationState();
1080
1081 if(state == Secure) {
1082 if(sr.doLog()) {
1083 L<<Logger::Warning<<"Answer to "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" for "<<dc->d_remote.toStringWithPort()<<" validates correctly"<<endl;
1084 }
1085
1086 // Is the query source interested in the value of the ad-bit?
1087 if (dc->d_mdp.d_header.ad || DNSSECOK)
1088 pw.getHeader()->ad=1;
1089 }
1090 else if(state == Insecure) {
1091 if(sr.doLog()) {
1092 L<<Logger::Warning<<"Answer to "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" for "<<dc->d_remote.toStringWithPort()<<" validates as Insecure"<<endl;
1093 }
1094
1095 pw.getHeader()->ad=0;
1096 }
1097 else if(state == Bogus) {
1098 if(g_dnssecLogBogus || sr.doLog() || g_dnssecmode == DNSSECMode::ValidateForLog) {
1099 L<<Logger::Warning<<"Answer to "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" for "<<dc->d_remote.toStringWithPort()<<" validates as Bogus"<<endl;
1100 }
1101
1102 // Does the query or validation mode sending out a SERVFAIL on validation errors?
1103 if(!pw.getHeader()->cd && (g_dnssecmode == DNSSECMode::ValidateAll || dc->d_mdp.d_header.ad || DNSSECOK)) {
1104 if(sr.doLog()) {
1105 L<<Logger::Warning<<"Sending out SERVFAIL for "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" because recursor or query demands it for Bogus results"<<endl;
1106 }
1107
1108 pw.getHeader()->rcode=RCode::ServFail;
1109 goto sendit;
1110 } else {
1111 if(sr.doLog()) {
1112 L<<Logger::Warning<<"Not sending out SERVFAIL for "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" Bogus validation since neither config nor query demands this"<<endl;
1113 }
1114 }
1115 }
1116 }
1117 catch(ImmediateServFailException &e) {
1118 if(g_logCommonErrors)
1119 L<<Logger::Notice<<"Sending SERVFAIL to "<<dc->getRemote()<<" during validation of '"<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<"' because: "<<e.reason<<endl;
1120 pw.getHeader()->rcode=RCode::ServFail;
1121 goto sendit;
1122 }
1123 }
1124
1125 if(ret.size()) {
1126 orderAndShuffle(ret);
1127 if(auto sl = luaconfsLocal->sortlist.getOrderCmp(dc->d_remote)) {
1128 stable_sort(ret.begin(), ret.end(), *sl);
1129 variableAnswer=true;
1130 }
1131 }
1132
1133 bool needCommit = false;
1134 for(auto i=ret.cbegin(); i!=ret.cend(); ++i) {
1135 if( ! DNSSECOK &&
1136 ( i->d_type == QType::NSEC3 ||
1137 (
1138 ( i->d_type == QType::RRSIG || i->d_type==QType::NSEC ) &&
1139 (
1140 ( dc->d_mdp.d_qtype != i->d_type && dc->d_mdp.d_qtype != QType::ANY ) ||
1141 i->d_place != DNSResourceRecord::ANSWER
1142 )
1143 )
1144 )
1145 ) {
1146 continue;
1147 }
1148
1149 if (!addRecordToPacket(pw, *i, minTTL, dc->d_ttlCap, maxanswersize)) {
1150 needCommit = false;
1151 break;
1152 }
1153 needCommit = true;
1154
1155 #ifdef HAVE_PROTOBUF
1156 if(luaconfsLocal->protobufServer && (i->d_type == QType::A || i->d_type == QType::AAAA || i->d_type == QType::CNAME)) {
1157 pbMessage.addRR(*i);
1158 }
1159 #endif
1160 }
1161 if(needCommit)
1162 pw.commit();
1163 }
1164 sendit:;
1165
1166 if (haveEDNS) {
1167 /* we try to add the EDNS OPT RR even for truncated answers,
1168 as rfc6891 states:
1169 "The minimal response MUST be the DNS header, question section, and an
1170 OPT record. This MUST also occur when a truncated response (using
1171 the DNS header's TC bit) is returned."
1172 */
1173 if (addRecordToPacket(pw, makeOpt(edo.d_packetsize, 0, edo.d_Z), minTTL, dc->d_ttlCap, maxanswersize)) {
1174 pw.commit();
1175 }
1176 }
1177
1178 g_rs.submitResponse(dc->d_mdp.d_qtype, packet.size(), !dc->d_tcp);
1179 updateResponseStats(res, dc->d_remote, packet.size(), &dc->d_mdp.d_qname, dc->d_mdp.d_qtype);
1180 #ifdef HAVE_PROTOBUF
1181 if (luaconfsLocal->protobufServer && (!luaconfsLocal->protobufTaggedOnly || (appliedPolicy.d_name && !appliedPolicy.d_name->empty()) || !dc->d_policyTags.empty())) {
1182 pbMessage.setBytes(packet.size());
1183 pbMessage.setResponseCode(pw.getHeader()->rcode);
1184 if (appliedPolicy.d_name) {
1185 pbMessage.setAppliedPolicy(*appliedPolicy.d_name);
1186 pbMessage.setAppliedPolicyType(appliedPolicy.d_type);
1187 }
1188 pbMessage.setPolicyTags(dc->d_policyTags);
1189 pbMessage.setQueryTime(dc->d_now.tv_sec, dc->d_now.tv_usec);
1190 pbMessage.setRequestorId(dq.requestorId);
1191 pbMessage.setDeviceId(dq.deviceId);
1192 protobufLogResponse(luaconfsLocal->protobufServer, pbMessage);
1193 }
1194 #endif
1195 if(!dc->d_tcp) {
1196 struct msghdr msgh;
1197 struct iovec iov;
1198 char cbuf[256];
1199 fillMSGHdr(&msgh, &iov, cbuf, 0, (char*)&*packet.begin(), packet.size(), &dc->d_remote);
1200 msgh.msg_control=NULL;
1201
1202 if(g_fromtosockets.count(dc->d_socket)) {
1203 addCMsgSrcAddr(&msgh, cbuf, &dc->d_local, 0);
1204 }
1205 if(sendmsg(dc->d_socket, &msgh, 0) < 0 && g_logCommonErrors)
1206 L<<Logger::Warning<<"Sending UDP reply to client "<<dc->d_remote.toStringWithPort()<<" failed with: "<<strerror(errno)<<endl;
1207
1208 if(!SyncRes::s_nopacketcache && !variableAnswer && !sr.wasVariable() ) {
1209 t_packetCache->insertResponsePacket(dc->d_tag, dc->d_qhash, dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_mdp.d_qclass,
1210 string((const char*)&*packet.begin(), packet.size()),
1211 g_now.tv_sec,
1212 pw.getHeader()->rcode == RCode::ServFail ? SyncRes::s_packetcacheservfailttl :
1213 min(minTTL,SyncRes::s_packetcachettl),
1214 &pbMessage);
1215 }
1216 // else cerr<<"Not putting in packet cache: "<<sr.wasVariable()<<endl;
1217 }
1218 else {
1219 char buf[2];
1220 buf[0]=packet.size()/256;
1221 buf[1]=packet.size()%256;
1222
1223 Utility::iovec iov[2];
1224
1225 iov[0].iov_base=(void*)buf; iov[0].iov_len=2;
1226 iov[1].iov_base=(void*)&*packet.begin(); iov[1].iov_len = packet.size();
1227
1228 int wret=Utility::writev(dc->d_socket, iov, 2);
1229 bool hadError=true;
1230
1231 if(wret == 0)
1232 L<<Logger::Error<<"EOF writing TCP answer to "<<dc->getRemote()<<endl;
1233 else if(wret < 0 )
1234 L<<Logger::Error<<"Error writing TCP answer to "<<dc->getRemote()<<": "<< strerror(errno) <<endl;
1235 else if((unsigned int)wret != 2 + packet.size())
1236 L<<Logger::Error<<"Oops, partial answer sent to "<<dc->getRemote()<<" for "<<dc->d_mdp.d_qname<<" (size="<< (2 + packet.size()) <<", sent "<<wret<<")"<<endl;
1237 else
1238 hadError=false;
1239
1240 // update tcp connection status, either by closing or moving to 'BYTE0'
1241
1242 if(hadError) {
1243 // no need to remove us from FDM, we weren't there
1244 dc->d_socket = -1;
1245 }
1246 else {
1247 dc->d_tcpConnection->queriesCount++;
1248 if (g_tcpMaxQueriesPerConn && dc->d_tcpConnection->queriesCount >= g_tcpMaxQueriesPerConn) {
1249 dc->d_socket = -1;
1250 }
1251 else {
1252 dc->d_tcpConnection->state=TCPConnection::BYTE0;
1253 Utility::gettimeofday(&g_now, 0); // needs to be updated
1254 t_fdm->addReadFD(dc->d_socket, handleRunningTCPQuestion, dc->d_tcpConnection);
1255 t_fdm->setReadTTD(dc->d_socket, g_now, g_tcpTimeout);
1256 }
1257 }
1258 }
1259 float spent=makeFloat(sr.getNow()-dc->d_now);
1260 if(!g_quiet) {
1261 L<<Logger::Error<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] answer to "<<(dc->d_mdp.d_header.rd?"":"non-rd ")<<"question '"<<dc->d_mdp.d_qname<<"|"<<DNSRecordContent::NumberToType(dc->d_mdp.d_qtype);
1262 L<<"': "<<ntohs(pw.getHeader()->ancount)<<" answers, "<<ntohs(pw.getHeader()->arcount)<<" additional, took "<<sr.d_outqueries<<" packets, "<<
1263 sr.d_totUsec/1000.0<<" netw ms, "<< spent*1000.0<<" tot ms, "<<
1264 sr.d_throttledqueries<<" throttled, "<<sr.d_timeouts<<" timeouts, "<<sr.d_tcpoutqueries<<" tcp connections, rcode="<< res;
1265
1266 if(!shouldNotValidate && sr.isDNSSECValidationRequested()) {
1267 L<< ", dnssec="<<vStates[sr.getValidationState()];
1268 }
1269
1270 L<<endl;
1271
1272 }
1273
1274 if (sr.d_outqueries || sr.d_authzonequeries) {
1275 t_RC->cacheMisses++;
1276 }
1277 else {
1278 t_RC->cacheHits++;
1279 }
1280
1281 if(spent < 0.001)
1282 g_stats.answers0_1++;
1283 else if(spent < 0.010)
1284 g_stats.answers1_10++;
1285 else if(spent < 0.1)
1286 g_stats.answers10_100++;
1287 else if(spent < 1.0)
1288 g_stats.answers100_1000++;
1289 else
1290 g_stats.answersSlow++;
1291
1292 uint64_t newLat=(uint64_t)(spent*1000000);
1293 newLat = min(newLat,(uint64_t)(((uint64_t) g_networkTimeoutMsec)*1000)); // outliers of several minutes exist..
1294 g_stats.avgLatencyUsec=(1-1.0/g_latencyStatSize)*g_stats.avgLatencyUsec + (float)newLat/g_latencyStatSize;
1295 // no worries, we do this for packet cache hits elsewhere
1296
1297 auto ourtime = 1000.0*spent-sr.d_totUsec/1000.0; // in msec
1298 if(ourtime < 1)
1299 g_stats.ourtime0_1++;
1300 else if(ourtime < 2)
1301 g_stats.ourtime1_2++;
1302 else if(ourtime < 4)
1303 g_stats.ourtime2_4++;
1304 else if(ourtime < 8)
1305 g_stats.ourtime4_8++;
1306 else if(ourtime < 16)
1307 g_stats.ourtime8_16++;
1308 else if(ourtime < 32)
1309 g_stats.ourtime16_32++;
1310 else {
1311 // cerr<<"SLOW: "<<ourtime<<"ms -> "<<dc->d_mdp.d_qname<<"|"<<DNSRecordContent::NumberToType(dc->d_mdp.d_qtype)<<endl;
1312 g_stats.ourtimeSlow++;
1313 }
1314 if(ourtime >= 0.0) {
1315 newLat=ourtime*1000; // usec
1316 g_stats.avgLatencyOursUsec=(1-1.0/g_latencyStatSize)*g_stats.avgLatencyOursUsec + (float)newLat/g_latencyStatSize;
1317 }
1318 // cout<<dc->d_mdp.d_qname<<"\t"<<MT->getUsec()<<"\t"<<sr.d_outqueries<<endl;
1319 delete dc;
1320 dc=0;
1321 }
1322 catch(PDNSException &ae) {
1323 L<<Logger::Error<<"startDoResolve problem "<<makeLoginfo(dc)<<": "<<ae.reason<<endl;
1324 delete dc;
1325 }
1326 catch(MOADNSException& e) {
1327 L<<Logger::Error<<"DNS parser error "<<makeLoginfo(dc) <<": "<<dc->d_mdp.d_qname<<", "<<e.what()<<endl;
1328 delete dc;
1329 }
1330 catch(std::exception& e) {
1331 L<<Logger::Error<<"STL error "<< makeLoginfo(dc)<<": "<<e.what();
1332
1333 // Luawrapper nests the exception from Lua, so we unnest it here
1334 try {
1335 std::rethrow_if_nested(e);
1336 } catch(const std::exception& ne) {
1337 L<<". Extra info: "<<ne.what();
1338 } catch(...) {}
1339
1340 L<<endl;
1341 delete dc;
1342 }
1343 catch(...) {
1344 L<<Logger::Error<<"Any other exception in a resolver context "<< makeLoginfo(dc) <<endl;
1345 }
1346
1347 g_stats.maxMThreadStackUsage = max(MT->getMaxStackUsage(), g_stats.maxMThreadStackUsage);
1348 }
1349
1350 static void makeControlChannelSocket(int processNum=-1)
1351 {
1352 string sockname=::arg()["socket-dir"]+"/"+s_programname;
1353 if(processNum >= 0)
1354 sockname += "."+std::to_string(processNum);
1355 sockname+=".controlsocket";
1356 s_rcc.listen(sockname);
1357
1358 int sockowner = -1;
1359 int sockgroup = -1;
1360
1361 if (!::arg().isEmpty("socket-group"))
1362 sockgroup=::arg().asGid("socket-group");
1363 if (!::arg().isEmpty("socket-owner"))
1364 sockowner=::arg().asUid("socket-owner");
1365
1366 if (sockgroup > -1 || sockowner > -1) {
1367 if(chown(sockname.c_str(), sockowner, sockgroup) < 0) {
1368 unixDie("Failed to chown control socket");
1369 }
1370 }
1371
1372 // do mode change if socket-mode is given
1373 if(!::arg().isEmpty("socket-mode")) {
1374 mode_t sockmode=::arg().asMode("socket-mode");
1375 if(chmod(sockname.c_str(), sockmode) < 0) {
1376 unixDie("Failed to chmod control socket");
1377 }
1378 }
1379 }
1380
1381 static bool getQNameAndSubnet(const std::string& question, DNSName* dnsname, uint16_t* qtype, uint16_t* qclass, EDNSSubnetOpts* ednssubnet, std::map<uint16_t, EDNSOptionView>* options)
1382 {
1383 bool found = false;
1384 const struct dnsheader* dh = (struct dnsheader*)question.c_str();
1385 size_t questionLen = question.length();
1386 unsigned int consumed=0;
1387 *dnsname=DNSName(question.c_str(), questionLen, sizeof(dnsheader), false, qtype, qclass, &consumed);
1388
1389 size_t pos= sizeof(dnsheader)+consumed+4;
1390 /* at least OPT root label (1), type (2), class (2) and ttl (4) + OPT RR rdlen (2)
1391 = 11 */
1392 if(ntohs(dh->arcount) == 1 && questionLen > pos + 11) { // this code can extract one (1) EDNS Subnet option
1393 /* OPT root label (1) followed by type (2) */
1394 if(question.at(pos)==0 && question.at(pos+1)==0 && question.at(pos+2)==QType::OPT) {
1395 if (!options) {
1396 char* ecsStart = nullptr;
1397 size_t ecsLen = 0;
1398 int res = getEDNSOption((char*)question.c_str()+pos+9, questionLen - pos - 9, EDNSOptionCode::ECS, &ecsStart, &ecsLen);
1399 if (res == 0 && ecsLen > 4) {
1400 EDNSSubnetOpts eso;
1401 if(getEDNSSubnetOptsFromString(ecsStart + 4, ecsLen - 4, &eso)) {
1402 *ednssubnet=eso;
1403 found = true;
1404 }
1405 }
1406 }
1407 else {
1408 int res = getEDNSOptions((char*)question.c_str()+pos+9, questionLen - pos - 9, *options);
1409 if (res == 0) {
1410 const auto& it = options->find(EDNSOptionCode::ECS);
1411 if (it != options->end() && it->second.content != nullptr && it->second.size > 0) {
1412 EDNSSubnetOpts eso;
1413 if(getEDNSSubnetOptsFromString(it->second.content, it->second.size, &eso)) {
1414 *ednssubnet=eso;
1415 found = true;
1416 }
1417 }
1418 }
1419 }
1420 }
1421 }
1422 return found;
1423 }
1424
1425 static void handleRunningTCPQuestion(int fd, FDMultiplexer::funcparam_t& var)
1426 {
1427 shared_ptr<TCPConnection> conn=any_cast<shared_ptr<TCPConnection> >(var);
1428
1429 if(conn->state==TCPConnection::BYTE0) {
1430 ssize_t bytes=recv(conn->getFD(), conn->data, 2, 0);
1431 if(bytes==1)
1432 conn->state=TCPConnection::BYTE1;
1433 if(bytes==2) {
1434 conn->qlen=(((unsigned char)conn->data[0]) << 8)+ (unsigned char)conn->data[1];
1435 conn->bytesread=0;
1436 conn->state=TCPConnection::GETQUESTION;
1437 }
1438 if(!bytes || bytes < 0) {
1439 t_fdm->removeReadFD(fd);
1440 return;
1441 }
1442 }
1443 else if(conn->state==TCPConnection::BYTE1) {
1444 ssize_t bytes=recv(conn->getFD(), conn->data+1, 1, 0);
1445 if(bytes==1) {
1446 conn->state=TCPConnection::GETQUESTION;
1447 conn->qlen=(((unsigned char)conn->data[0]) << 8)+ (unsigned char)conn->data[1];
1448 conn->bytesread=0;
1449 }
1450 if(!bytes || bytes < 0) {
1451 if(g_logCommonErrors)
1452 L<<Logger::Error<<"TCP client "<< conn->d_remote.toString() <<" disconnected after first byte"<<endl;
1453 t_fdm->removeReadFD(fd);
1454 return;
1455 }
1456 }
1457 else if(conn->state==TCPConnection::GETQUESTION) {
1458 ssize_t bytes=recv(conn->getFD(), conn->data + conn->bytesread, conn->qlen - conn->bytesread, 0);
1459 if(!bytes || bytes < 0 || bytes > std::numeric_limits<std::uint16_t>::max()) {
1460 L<<Logger::Error<<"TCP client "<< conn->d_remote.toString() <<" disconnected while reading question body"<<endl;
1461 t_fdm->removeReadFD(fd);
1462 return;
1463 }
1464 conn->bytesread+=(uint16_t)bytes;
1465 if(conn->bytesread==conn->qlen) {
1466 t_fdm->removeReadFD(fd); // should no longer awake ourselves when there is data to read
1467
1468 DNSComboWriter* dc=nullptr;
1469 try {
1470 dc=new DNSComboWriter(conn->data, conn->qlen, g_now);
1471 }
1472 catch(MOADNSException &mde) {
1473 g_stats.clientParseError++;
1474 if(g_logCommonErrors)
1475 L<<Logger::Error<<"Unable to parse packet from TCP client "<< conn->d_remote.toString() <<endl;
1476 return;
1477 }
1478 dc->d_tcpConnection = conn; // carry the torch
1479 dc->setSocket(conn->getFD()); // this is the only time a copy is made of the actual fd
1480 dc->d_tcp=true;
1481 dc->setRemote(&conn->d_remote);
1482 ComboAddress dest;
1483 memset(&dest, 0, sizeof(dest));
1484 dest.sin4.sin_family = conn->d_remote.sin4.sin_family;
1485 socklen_t len = dest.getSocklen();
1486 getsockname(conn->getFD(), (sockaddr*)&dest, &len); // if this fails, we're ok with it
1487 dc->setLocal(dest);
1488 DNSName qname;
1489 uint16_t qtype=0;
1490 uint16_t qclass=0;
1491 bool needECS = false;
1492 string requestorId;
1493 string deviceId;
1494 #ifdef HAVE_PROTOBUF
1495 auto luaconfsLocal = g_luaconfs.getLocal();
1496 if (luaconfsLocal->protobufServer) {
1497 needECS = true;
1498 }
1499 #endif
1500
1501 if(needECS || (t_pdl && (t_pdl->d_gettag_ffi || t_pdl->d_gettag))) {
1502
1503 try {
1504 std::map<uint16_t, EDNSOptionView> ednsOptions;
1505 dc->d_ecsParsed = true;
1506 dc->d_ecsFound = getQNameAndSubnet(std::string(conn->data, conn->qlen), &qname, &qtype, &qclass, &dc->d_ednssubnet, g_gettagNeedsEDNSOptions ? &ednsOptions : nullptr);
1507
1508 if(t_pdl) {
1509 try {
1510 if (t_pdl->d_gettag_ffi) {
1511 dc->d_tag = t_pdl->gettag_ffi(conn->d_remote, dc->d_ednssubnet.source, dest, qname, qtype, &dc->d_policyTags, dc->d_data, ednsOptions, true, requestorId, deviceId, dc->d_ttlCap, dc->d_variable);
1512 }
1513 else if (t_pdl->d_gettag) {
1514 dc->d_tag = t_pdl->gettag(conn->d_remote, dc->d_ednssubnet.source, dest, qname, qtype, &dc->d_policyTags, dc->d_data, ednsOptions, true, requestorId, deviceId);
1515 }
1516 }
1517 catch(const std::exception& e) {
1518 if(g_logCommonErrors)
1519 L<<Logger::Warning<<"Error parsing a query packet qname='"<<qname<<"' for tag determination, setting tag=0: "<<e.what()<<endl;
1520 }
1521 }
1522 }
1523 catch(const std::exception& e)
1524 {
1525 if(g_logCommonErrors)
1526 L<<Logger::Warning<<"Error parsing a query packet for tag determination, setting tag=0: "<<e.what()<<endl;
1527 }
1528 }
1529 #ifdef HAVE_PROTOBUF
1530 if(luaconfsLocal->protobufServer || luaconfsLocal->outgoingProtobufServer) {
1531 dc->d_requestorId = requestorId;
1532 dc->d_deviceId = deviceId;
1533 dc->d_uuid = (*t_uuidGenerator)();
1534 }
1535
1536 if(luaconfsLocal->protobufServer) {
1537 try {
1538 const struct dnsheader* dh = (const struct dnsheader*) conn->data;
1539
1540 if (!luaconfsLocal->protobufTaggedOnly) {
1541 protobufLogQuery(luaconfsLocal->protobufServer, luaconfsLocal->protobufMaskV4, luaconfsLocal->protobufMaskV6, dc->d_uuid, conn->d_remote, dest, dc->d_ednssubnet.source, true, dh->id, conn->qlen, qname, qtype, qclass, dc->d_policyTags, dc->d_requestorId, dc->d_deviceId);
1542 }
1543 }
1544 catch(std::exception& e) {
1545 if(g_logCommonErrors)
1546 L<<Logger::Warning<<"Error parsing a TCP query packet for edns subnet: "<<e.what()<<endl;
1547 }
1548 }
1549 #endif
1550 if(dc->d_mdp.d_header.qr) {
1551 delete dc;
1552 g_stats.ignoredCount++;
1553 L<<Logger::Error<<"Ignoring answer from TCP client "<< conn->d_remote.toString() <<" on server socket!"<<endl;
1554 return;
1555 }
1556 if(dc->d_mdp.d_header.opcode) {
1557 delete dc;
1558 g_stats.ignoredCount++;
1559 L<<Logger::Error<<"Ignoring non-query opcode from TCP client "<< conn->d_remote.toString() <<" on server socket!"<<endl;
1560 return;
1561 }
1562 else {
1563 ++g_stats.qcounter;
1564 ++g_stats.tcpqcounter;
1565 MT->makeThread(startDoResolve, dc); // deletes dc, will set state to BYTE0 again
1566 return;
1567 }
1568 }
1569 }
1570 }
1571
1572 //! Handle new incoming TCP connection
1573 static void handleNewTCPQuestion(int fd, FDMultiplexer::funcparam_t& )
1574 {
1575 ComboAddress addr;
1576 socklen_t addrlen=sizeof(addr);
1577 int newsock=accept(fd, (struct sockaddr*)&addr, &addrlen);
1578 if(newsock>=0) {
1579 if(MT->numProcesses() > g_maxMThreads) {
1580 g_stats.overCapacityDrops++;
1581 try {
1582 closesocket(newsock);
1583 }
1584 catch(const PDNSException& e) {
1585 L<<Logger::Error<<"Error closing TCP socket after an over capacity drop: "<<e.reason<<endl;
1586 }
1587 return;
1588 }
1589
1590 if(t_remotes)
1591 t_remotes->push_back(addr);
1592 if(t_allowFrom && !t_allowFrom->match(&addr)) {
1593 if(!g_quiet)
1594 L<<Logger::Error<<"["<<MT->getTid()<<"] dropping TCP query from "<<addr.toString()<<", address not matched by allow-from"<<endl;
1595
1596 g_stats.unauthorizedTCP++;
1597 try {
1598 closesocket(newsock);
1599 }
1600 catch(const PDNSException& e) {
1601 L<<Logger::Error<<"Error closing TCP socket after an ACL drop: "<<e.reason<<endl;
1602 }
1603 return;
1604 }
1605 if(g_maxTCPPerClient && t_tcpClientCounts->count(addr) && (*t_tcpClientCounts)[addr] >= g_maxTCPPerClient) {
1606 g_stats.tcpClientOverflow++;
1607 try {
1608 closesocket(newsock); // don't call TCPConnection::closeAndCleanup here - did not enter it in the counts yet!
1609 }
1610 catch(const PDNSException& e) {
1611 L<<Logger::Error<<"Error closing TCP socket after an overflow drop: "<<e.reason<<endl;
1612 }
1613 return;
1614 }
1615
1616 setNonBlocking(newsock);
1617 std::shared_ptr<TCPConnection> tc = std::make_shared<TCPConnection>(newsock, addr);
1618 tc->state=TCPConnection::BYTE0;
1619
1620 t_fdm->addReadFD(tc->getFD(), handleRunningTCPQuestion, tc);
1621
1622 struct timeval now;
1623 Utility::gettimeofday(&now, 0);
1624 t_fdm->setReadTTD(tc->getFD(), now, g_tcpTimeout);
1625 }
1626 }
1627
1628 static string* doProcessUDPQuestion(const std::string& question, const ComboAddress& fromaddr, const ComboAddress& destaddr, struct timeval tv, int fd)
1629 {
1630 gettimeofday(&g_now, 0);
1631 struct timeval diff = g_now - tv;
1632 double delta=(diff.tv_sec*1000 + diff.tv_usec/1000.0);
1633
1634 if(tv.tv_sec && delta > 1000.0) {
1635 g_stats.tooOldDrops++;
1636 return 0;
1637 }
1638
1639 ++g_stats.qcounter;
1640 if(fromaddr.sin4.sin_family==AF_INET6)
1641 g_stats.ipv6qcounter++;
1642
1643 string response;
1644 const struct dnsheader* dh = (struct dnsheader*)question.c_str();
1645 unsigned int ctag=0;
1646 uint32_t qhash = 0;
1647 bool needECS = false;
1648 std::vector<std::string> policyTags;
1649 LuaContext::LuaObject data;
1650 string requestorId;
1651 string deviceId;
1652 #ifdef HAVE_PROTOBUF
1653 boost::uuids::uuid uniqueId;
1654 auto luaconfsLocal = g_luaconfs.getLocal();
1655 if (luaconfsLocal->protobufServer) {
1656 uniqueId = (*t_uuidGenerator)();
1657 needECS = true;
1658 } else if (luaconfsLocal->outgoingProtobufServer) {
1659 uniqueId = (*t_uuidGenerator)();
1660 }
1661 #endif
1662 EDNSSubnetOpts ednssubnet;
1663 bool ecsFound = false;
1664 bool ecsParsed = false;
1665 uint32_t ttlCap = std::numeric_limits<uint32_t>::max();
1666 bool variable = false;
1667 try {
1668 DNSName qname;
1669 uint16_t qtype=0;
1670 uint16_t qclass=0;
1671 uint32_t age;
1672 bool qnameParsed=false;
1673 #ifdef MALLOC_TRACE
1674 /*
1675 static uint64_t last=0;
1676 if(!last)
1677 g_mtracer->clearAllocators();
1678 cout<<g_mtracer->getAllocs()-last<<" "<<g_mtracer->getNumOut()<<" -- BEGIN TRACE"<<endl;
1679 last=g_mtracer->getAllocs();
1680 cout<<g_mtracer->topAllocatorsString()<<endl;
1681 g_mtracer->clearAllocators();
1682 */
1683 #endif
1684
1685 if(needECS || (t_pdl && (t_pdl->d_gettag || t_pdl->d_gettag_ffi))) {
1686 try {
1687 std::map<uint16_t, EDNSOptionView> ednsOptions;
1688 ecsFound = getQNameAndSubnet(question, &qname, &qtype, &qclass, &ednssubnet, g_gettagNeedsEDNSOptions ? &ednsOptions : nullptr);
1689 qnameParsed = true;
1690 ecsParsed = true;
1691
1692 if(t_pdl) {
1693 try {
1694 if (t_pdl->d_gettag_ffi) {
1695 ctag = t_pdl->gettag_ffi(fromaddr, ednssubnet.source, destaddr, qname, qtype, &policyTags, data, ednsOptions, false, requestorId, deviceId, ttlCap, variable);
1696 }
1697 else if (t_pdl->d_gettag) {
1698 ctag=t_pdl->gettag(fromaddr, ednssubnet.source, destaddr, qname, qtype, &policyTags, data, ednsOptions, false, requestorId, deviceId);
1699 }
1700 }
1701 catch(const std::exception& e) {
1702 if(g_logCommonErrors)
1703 L<<Logger::Warning<<"Error parsing a query packet qname='"<<qname<<"' for tag determination, setting tag=0: "<<e.what()<<endl;
1704 }
1705 }
1706 }
1707 catch(const std::exception& e)
1708 {
1709 if(g_logCommonErrors)
1710 L<<Logger::Warning<<"Error parsing a query packet for tag determination, setting tag=0: "<<e.what()<<endl;
1711 }
1712 }
1713
1714 bool cacheHit = false;
1715 RecProtoBufMessage pbMessage(DNSProtoBufMessage::DNSProtoBufMessageType::Response);
1716 #ifdef HAVE_PROTOBUF
1717 if(luaconfsLocal->protobufServer) {
1718 if (!luaconfsLocal->protobufTaggedOnly || !policyTags.empty()) {
1719 protobufLogQuery(luaconfsLocal->protobufServer, luaconfsLocal->protobufMaskV4, luaconfsLocal->protobufMaskV6, uniqueId, fromaddr, destaddr, ednssubnet.source, false, dh->id, question.size(), qname, qtype, qclass, policyTags, requestorId, deviceId);
1720 }
1721 }
1722 #endif /* HAVE_PROTOBUF */
1723
1724 /* It might seem like a good idea to skip the packet cache lookup if we know that the answer is not cacheable,
1725 but it means that the hash would not be computed. If some script decides at a later time to mark back the answer
1726 as cacheable we would cache it with a wrong tag, so better safe than sorry. */
1727 if (qnameParsed) {
1728 cacheHit = (!SyncRes::s_nopacketcache && t_packetCache->getResponsePacket(ctag, question, qname, qtype, qclass, g_now.tv_sec, &response, &age, &qhash, &pbMessage));
1729 }
1730 else {
1731 cacheHit = (!SyncRes::s_nopacketcache && t_packetCache->getResponsePacket(ctag, question, g_now.tv_sec, &response, &age, &qhash, &pbMessage));
1732 }
1733
1734 if (cacheHit) {
1735 #ifdef HAVE_PROTOBUF
1736 if(luaconfsLocal->protobufServer && (!luaconfsLocal->protobufTaggedOnly || !pbMessage.getAppliedPolicy().empty() || !pbMessage.getPolicyTags().empty())) {
1737 Netmask requestorNM(fromaddr, fromaddr.sin4.sin_family == AF_INET ? luaconfsLocal->protobufMaskV4 : luaconfsLocal->protobufMaskV6);
1738 const ComboAddress& requestor = requestorNM.getMaskedNetwork();
1739 pbMessage.update(uniqueId, &requestor, &destaddr, false, dh->id);
1740 pbMessage.setEDNSSubnet(ednssubnet.source, ednssubnet.source.isIpv4() ? luaconfsLocal->protobufMaskV4 : luaconfsLocal->protobufMaskV6);
1741 pbMessage.setQueryTime(g_now.tv_sec, g_now.tv_usec);
1742 pbMessage.setRequestorId(requestorId);
1743 pbMessage.setDeviceId(deviceId);
1744 protobufLogResponse(luaconfsLocal->protobufServer, pbMessage);
1745 }
1746 #endif /* HAVE_PROTOBUF */
1747 if(!g_quiet)
1748 L<<Logger::Notice<<t_id<< " question answered from packet cache tag="<<ctag<<" from "<<fromaddr.toString()<<endl;
1749
1750 g_stats.packetCacheHits++;
1751 SyncRes::s_queries++;
1752 ageDNSPacket(response, age);
1753 struct msghdr msgh;
1754 struct iovec iov;
1755 char cbuf[256];
1756 fillMSGHdr(&msgh, &iov, cbuf, 0, (char*)response.c_str(), response.length(), const_cast<ComboAddress*>(&fromaddr));
1757 msgh.msg_control=NULL;
1758
1759 if(g_fromtosockets.count(fd)) {
1760 addCMsgSrcAddr(&msgh, cbuf, &destaddr, 0);
1761 }
1762 if(sendmsg(fd, &msgh, 0) < 0 && g_logCommonErrors)
1763 L<<Logger::Warning<<"Sending UDP reply to client "<<fromaddr.toStringWithPort()<<" failed with: "<<strerror(errno)<<endl;
1764
1765 if(response.length() >= sizeof(struct dnsheader)) {
1766 struct dnsheader tmpdh;
1767 memcpy(&tmpdh, response.c_str(), sizeof(tmpdh));
1768 updateResponseStats(tmpdh.rcode, fromaddr, response.length(), 0, 0);
1769 }
1770 g_stats.avgLatencyUsec=(1-1.0/g_latencyStatSize)*g_stats.avgLatencyUsec + 0.0; // we assume 0 usec
1771 g_stats.avgLatencyOursUsec=(1-1.0/g_latencyStatSize)*g_stats.avgLatencyOursUsec + 0.0; // we assume 0 usec
1772 return 0;
1773 }
1774 }
1775 catch(std::exception& e) {
1776 L<<Logger::Error<<"Error processing or aging answer packet: "<<e.what()<<endl;
1777 return 0;
1778 }
1779
1780 if(t_pdl) {
1781 if(t_pdl->ipfilter(fromaddr, destaddr, *dh)) {
1782 if(!g_quiet)
1783 L<<Logger::Notice<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] DROPPED question from "<<fromaddr.toStringWithPort()<<" based on policy"<<endl;
1784 g_stats.policyDrops++;
1785 return 0;
1786 }
1787 }
1788
1789 if(MT->numProcesses() > g_maxMThreads) {
1790 if(!g_quiet)
1791 L<<Logger::Notice<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] DROPPED question from "<<fromaddr.toStringWithPort()<<", over capacity"<<endl;
1792
1793 g_stats.overCapacityDrops++;
1794 return 0;
1795 }
1796
1797 DNSComboWriter* dc = new DNSComboWriter(question.c_str(), question.size(), g_now);
1798 dc->setSocket(fd);
1799 dc->d_tag=ctag;
1800 dc->d_qhash=qhash;
1801 dc->d_query = question;
1802 dc->setRemote(&fromaddr);
1803 dc->setLocal(destaddr);
1804 dc->d_tcp=false;
1805 dc->d_policyTags = policyTags;
1806 dc->d_data = data;
1807 dc->d_ecsFound = ecsFound;
1808 dc->d_ecsParsed = ecsParsed;
1809 dc->d_ednssubnet = ednssubnet;
1810 dc->d_ttlCap = ttlCap;
1811 dc->d_variable = variable;
1812 #ifdef HAVE_PROTOBUF
1813 if (luaconfsLocal->protobufServer || luaconfsLocal->outgoingProtobufServer) {
1814 dc->d_uuid = uniqueId;
1815 }
1816 dc->d_requestorId = requestorId;
1817 dc->d_deviceId = deviceId;
1818 #endif
1819
1820 MT->makeThread(startDoResolve, (void*) dc); // deletes dc
1821 return 0;
1822 }
1823
1824
1825 static void handleNewUDPQuestion(int fd, FDMultiplexer::funcparam_t& var)
1826 {
1827 ssize_t len;
1828 char data[1500];
1829 ComboAddress fromaddr;
1830 struct msghdr msgh;
1831 struct iovec iov;
1832 char cbuf[256];
1833 bool firstQuery = true;
1834
1835 fromaddr.sin6.sin6_family=AF_INET6; // this makes sure fromaddr is big enough
1836 fillMSGHdr(&msgh, &iov, cbuf, sizeof(cbuf), data, sizeof(data), &fromaddr);
1837
1838 for(;;)
1839 if((len=recvmsg(fd, &msgh, 0)) >= 0) {
1840
1841 firstQuery = false;
1842
1843 if(t_remotes)
1844 t_remotes->push_back(fromaddr);
1845
1846 if(t_allowFrom && !t_allowFrom->match(&fromaddr)) {
1847 if(!g_quiet)
1848 L<<Logger::Error<<"["<<MT->getTid()<<"] dropping UDP query from "<<fromaddr.toString()<<", address not matched by allow-from"<<endl;
1849
1850 g_stats.unauthorizedUDP++;
1851 return;
1852 }
1853 BOOST_STATIC_ASSERT(offsetof(sockaddr_in, sin_port) == offsetof(sockaddr_in6, sin6_port));
1854 if(!fromaddr.sin4.sin_port) { // also works for IPv6
1855 if(!g_quiet)
1856 L<<Logger::Error<<"["<<MT->getTid()<<"] dropping UDP query from "<<fromaddr.toStringWithPort()<<", can't deal with port 0"<<endl;
1857
1858 g_stats.clientParseError++; // not quite the best place to put it, but needs to go somewhere
1859 return;
1860 }
1861 try {
1862 dnsheader* dh=(dnsheader*)data;
1863
1864 if(dh->qr) {
1865 g_stats.ignoredCount++;
1866 if(g_logCommonErrors)
1867 L<<Logger::Error<<"Ignoring answer from "<<fromaddr.toString()<<" on server socket!"<<endl;
1868 }
1869 else if(dh->opcode) {
1870 g_stats.ignoredCount++;
1871 if(g_logCommonErrors)
1872 L<<Logger::Error<<"Ignoring non-query opcode "<<dh->opcode<<" from "<<fromaddr.toString()<<" on server socket!"<<endl;
1873 }
1874 else {
1875 string question(data, (size_t)len);
1876 struct timeval tv={0,0};
1877 HarvestTimestamp(&msgh, &tv);
1878 ComboAddress dest;
1879 memset(&dest, 0, sizeof(dest)); // this makes sure we ignore this address if not returned by recvmsg above
1880 auto loc = rplookup(g_listenSocketsAddresses, fd);
1881 if(HarvestDestinationAddress(&msgh, &dest)) {
1882 // but.. need to get port too
1883 if(loc)
1884 dest.sin4.sin_port = loc->sin4.sin_port;
1885 }
1886 else {
1887 if(loc) {
1888 dest = *loc;
1889 }
1890 else {
1891 dest.sin4.sin_family = fromaddr.sin4.sin_family;
1892 socklen_t slen = dest.getSocklen();
1893 getsockname(fd, (sockaddr*)&dest, &slen); // if this fails, we're ok with it
1894 }
1895 }
1896 if(g_weDistributeQueries)
1897 distributeAsyncFunction(question, boost::bind(doProcessUDPQuestion, question, fromaddr, dest, tv, fd));
1898 else
1899 doProcessUDPQuestion(question, fromaddr, dest, tv, fd);
1900 }
1901 }
1902 catch(MOADNSException& mde) {
1903 g_stats.clientParseError++;
1904 if(g_logCommonErrors)
1905 L<<Logger::Error<<"Unable to parse packet from remote UDP client "<<fromaddr.toString() <<": "<<mde.what()<<endl;
1906 }
1907 catch(std::runtime_error& e) {
1908 g_stats.clientParseError++;
1909 if(g_logCommonErrors)
1910 L<<Logger::Error<<"Unable to parse packet from remote UDP client "<<fromaddr.toString() <<": "<<e.what()<<endl;
1911 }
1912 }
1913 else {
1914 // cerr<<t_id<<" had error: "<<stringerror()<<endl;
1915 if(firstQuery && errno == EAGAIN)
1916 g_stats.noPacketError++;
1917
1918 break;
1919 }
1920 }
1921
1922 static void makeTCPServerSockets(unsigned int threadId)
1923 {
1924 int fd;
1925 vector<string>locals;
1926 stringtok(locals,::arg()["local-address"]," ,");
1927
1928 if(locals.empty())
1929 throw PDNSException("No local address specified");
1930
1931 for(vector<string>::const_iterator i=locals.begin();i!=locals.end();++i) {
1932 ServiceTuple st;
1933 st.port=::arg().asNum("local-port");
1934 parseService(*i, st);
1935
1936 ComboAddress sin;
1937
1938 memset((char *)&sin,0, sizeof(sin));
1939 sin.sin4.sin_family = AF_INET;
1940 if(!IpToU32(st.host, (uint32_t*)&sin.sin4.sin_addr.s_addr)) {
1941 sin.sin6.sin6_family = AF_INET6;
1942 if(makeIPv6sockaddr(st.host, &sin.sin6) < 0)
1943 throw PDNSException("Unable to resolve local address for TCP server on '"+ st.host +"'");
1944 }
1945
1946 fd=socket(sin.sin6.sin6_family, SOCK_STREAM, 0);
1947 if(fd<0)
1948 throw PDNSException("Making a TCP server socket for resolver: "+stringerror());
1949
1950 setCloseOnExec(fd);
1951
1952 int tmp=1;
1953 if(setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &tmp, sizeof tmp)<0) {
1954 L<<Logger::Error<<"Setsockopt failed for TCP listening socket"<<endl;
1955 exit(1);
1956 }
1957 if(sin.sin6.sin6_family == AF_INET6 && setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &tmp, sizeof(tmp)) < 0) {
1958 L<<Logger::Error<<"Failed to set IPv6 socket to IPv6 only, continuing anyhow: "<<strerror(errno)<<endl;
1959 }
1960
1961 #ifdef TCP_DEFER_ACCEPT
1962 if(setsockopt(fd, SOL_TCP, TCP_DEFER_ACCEPT, &tmp, sizeof tmp) >= 0) {
1963 if(i==locals.begin())
1964 L<<Logger::Error<<"Enabled TCP data-ready filter for (slight) DoS protection"<<endl;
1965 }
1966 #endif
1967
1968 if( ::arg().mustDo("non-local-bind") )
1969 Utility::setBindAny(AF_INET, fd);
1970
1971 #ifdef SO_REUSEPORT
1972 if(g_reusePort) {
1973 if(setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &tmp, sizeof(tmp)) < 0)
1974 throw PDNSException("SO_REUSEPORT: "+stringerror());
1975 }
1976 #endif
1977
1978 if (::arg().asNum("tcp-fast-open") > 0) {
1979 #ifdef TCP_FASTOPEN
1980 int fastOpenQueueSize = ::arg().asNum("tcp-fast-open");
1981 if (setsockopt(fd, IPPROTO_TCP, TCP_FASTOPEN, &fastOpenQueueSize, sizeof fastOpenQueueSize) < 0) {
1982 L<<Logger::Error<<"Failed to enable TCP Fast Open for listening socket: "<<strerror(errno)<<endl;
1983 }
1984 #else
1985 L<<Logger::Warning<<"TCP Fast Open configured but not supported for listening socket"<<endl;
1986 #endif
1987 }
1988
1989 sin.sin4.sin_port = htons(st.port);
1990 socklen_t socklen=sin.sin4.sin_family==AF_INET ? sizeof(sin.sin4) : sizeof(sin.sin6);
1991 if (::bind(fd, (struct sockaddr *)&sin, socklen )<0)
1992 throw PDNSException("Binding TCP server socket for "+ st.host +": "+stringerror());
1993
1994 setNonBlocking(fd);
1995 setSocketSendBuffer(fd, 65000);
1996 listen(fd, 128);
1997 deferredAdds[threadId].push_back(make_pair(fd, handleNewTCPQuestion));
1998 g_tcpListenSockets.push_back(fd);
1999 // we don't need to update g_listenSocketsAddresses since it doesn't work for TCP/IP:
2000 // - fd is not that which we know here, but returned from accept()
2001 if(sin.sin4.sin_family == AF_INET)
2002 L<<Logger::Error<<"Listening for TCP queries on "<< sin.toString() <<":"<<st.port<<endl;
2003 else
2004 L<<Logger::Error<<"Listening for TCP queries on ["<< sin.toString() <<"]:"<<st.port<<endl;
2005 }
2006 }
2007
2008 static void makeUDPServerSockets(unsigned int threadId)
2009 {
2010 int one=1;
2011 vector<string>locals;
2012 stringtok(locals,::arg()["local-address"]," ,");
2013
2014 if(locals.empty())
2015 throw PDNSException("No local address specified");
2016
2017 for(vector<string>::const_iterator i=locals.begin();i!=locals.end();++i) {
2018 ServiceTuple st;
2019 st.port=::arg().asNum("local-port");
2020 parseService(*i, st);
2021
2022 ComboAddress sin;
2023
2024 memset(&sin, 0, sizeof(sin));
2025 sin.sin4.sin_family = AF_INET;
2026 if(!IpToU32(st.host.c_str() , (uint32_t*)&sin.sin4.sin_addr.s_addr)) {
2027 sin.sin6.sin6_family = AF_INET6;
2028 if(makeIPv6sockaddr(st.host, &sin.sin6) < 0)
2029 throw PDNSException("Unable to resolve local address for UDP server on '"+ st.host +"'");
2030 }
2031
2032 int fd=socket(sin.sin4.sin_family, SOCK_DGRAM, 0);
2033 if(fd < 0) {
2034 throw PDNSException("Making a UDP server socket for resolver: "+netstringerror());
2035 }
2036 if (!setSocketTimestamps(fd))
2037 L<<Logger::Warning<<"Unable to enable timestamp reporting for socket"<<endl;
2038
2039 if(IsAnyAddress(sin)) {
2040 if(sin.sin4.sin_family == AF_INET)
2041 if(!setsockopt(fd, IPPROTO_IP, GEN_IP_PKTINFO, &one, sizeof(one))) // linux supports this, so why not - might fail on other systems
2042 g_fromtosockets.insert(fd);
2043 #ifdef IPV6_RECVPKTINFO
2044 if(sin.sin4.sin_family == AF_INET6)
2045 if(!setsockopt(fd, IPPROTO_IPV6, IPV6_RECVPKTINFO, &one, sizeof(one)))
2046 g_fromtosockets.insert(fd);
2047 #endif
2048 if(sin.sin6.sin6_family == AF_INET6 && setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &one, sizeof(one)) < 0) {
2049 L<<Logger::Error<<"Failed to set IPv6 socket to IPv6 only, continuing anyhow: "<<strerror(errno)<<endl;
2050 }
2051 }
2052 if( ::arg().mustDo("non-local-bind") )
2053 Utility::setBindAny(AF_INET6, fd);
2054
2055 setCloseOnExec(fd);
2056
2057 setSocketReceiveBuffer(fd, 250000);
2058 sin.sin4.sin_port = htons(st.port);
2059
2060
2061 #ifdef SO_REUSEPORT
2062 if(g_reusePort) {
2063 if(setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one)) < 0)
2064 throw PDNSException("SO_REUSEPORT: "+stringerror());
2065 }
2066 #endif
2067 socklen_t socklen=sin.getSocklen();
2068 if (::bind(fd, (struct sockaddr *)&sin, socklen)<0)
2069 throw PDNSException("Resolver binding to server socket on port "+ std::to_string(st.port) +" for "+ st.host+": "+stringerror());
2070
2071 setNonBlocking(fd);
2072
2073 deferredAdds[threadId].push_back(make_pair(fd, handleNewUDPQuestion));
2074 g_listenSocketsAddresses[fd]=sin; // this is written to only from the startup thread, not from the workers
2075 if(sin.sin4.sin_family == AF_INET)
2076 L<<Logger::Error<<"Listening for UDP queries on "<< sin.toString() <<":"<<st.port<<endl;
2077 else
2078 L<<Logger::Error<<"Listening for UDP queries on ["<< sin.toString() <<"]:"<<st.port<<endl;
2079 }
2080 }
2081
2082 static void daemonize(void)
2083 {
2084 if(fork())
2085 exit(0); // bye bye
2086
2087 setsid();
2088
2089 int i=open("/dev/null",O_RDWR); /* open stdin */
2090 if(i < 0)
2091 L<<Logger::Critical<<"Unable to open /dev/null: "<<stringerror()<<endl;
2092 else {
2093 dup2(i,0); /* stdin */
2094 dup2(i,1); /* stderr */
2095 dup2(i,2); /* stderr */
2096 close(i);
2097 }
2098 }
2099
2100 static void usr1Handler(int)
2101 {
2102 statsWanted=true;
2103 }
2104
2105 static void usr2Handler(int)
2106 {
2107 g_quiet= !g_quiet;
2108 SyncRes::setDefaultLogMode(g_quiet ? SyncRes::LogNone : SyncRes::Log);
2109 ::arg().set("quiet")=g_quiet ? "" : "no";
2110 }
2111
2112 static void doStats(void)
2113 {
2114 static time_t lastOutputTime;
2115 static uint64_t lastQueryCount;
2116
2117 uint64_t cacheHits = broadcastAccFunction<uint64_t>(pleaseGetCacheHits);
2118 uint64_t cacheMisses = broadcastAccFunction<uint64_t>(pleaseGetCacheMisses);
2119
2120 if(g_stats.qcounter && (cacheHits + cacheMisses) && SyncRes::s_queries && SyncRes::s_outqueries) {
2121 L<<Logger::Notice<<"stats: "<<g_stats.qcounter<<" questions, "<<
2122 broadcastAccFunction<uint64_t>(pleaseGetCacheSize)<< " cache entries, "<<
2123 broadcastAccFunction<uint64_t>(pleaseGetNegCacheSize)<<" negative entries, "<<
2124 (int)((cacheHits*100.0)/(cacheHits+cacheMisses))<<"% cache hits"<<endl;
2125
2126 L<<Logger::Notice<<"stats: throttle map: "
2127 << broadcastAccFunction<uint64_t>(pleaseGetThrottleSize) <<", ns speeds: "
2128 << broadcastAccFunction<uint64_t>(pleaseGetNsSpeedsSize)<<endl;
2129 L<<Logger::Notice<<"stats: outpacket/query ratio "<<(int)(SyncRes::s_outqueries*100.0/SyncRes::s_queries)<<"%";
2130 L<<Logger::Notice<<", "<<(int)(SyncRes::s_throttledqueries*100.0/(SyncRes::s_outqueries+SyncRes::s_throttledqueries))<<"% throttled, "
2131 <<SyncRes::s_nodelegated<<" no-delegation drops"<<endl;
2132 L<<Logger::Notice<<"stats: "<<SyncRes::s_tcpoutqueries<<" outgoing tcp connections, "<<
2133 broadcastAccFunction<uint64_t>(pleaseGetConcurrentQueries)<<" queries running, "<<SyncRes::s_outgoingtimeouts<<" outgoing timeouts"<<endl;
2134
2135 //L<<Logger::Notice<<"stats: "<<g_stats.ednsPingMatches<<" ping matches, "<<g_stats.ednsPingMismatches<<" mismatches, "<<
2136 //g_stats.noPingOutQueries<<" outqueries w/o ping, "<< g_stats.noEdnsOutQueries<<" w/o EDNS"<<endl;
2137
2138 L<<Logger::Notice<<"stats: " << broadcastAccFunction<uint64_t>(pleaseGetPacketCacheSize) <<
2139 " packet cache entries, "<<(int)(100.0*broadcastAccFunction<uint64_t>(pleaseGetPacketCacheHits)/SyncRes::s_queries) << "% packet cache hits"<<endl;
2140
2141 time_t now = time(0);
2142 if(lastOutputTime && lastQueryCount && now != lastOutputTime) {
2143 L<<Logger::Notice<<"stats: "<< (SyncRes::s_queries - lastQueryCount) / (now - lastOutputTime) <<" qps (average over "<< (now - lastOutputTime) << " seconds)"<<endl;
2144 }
2145 lastOutputTime = now;
2146 lastQueryCount = SyncRes::s_queries;
2147 }
2148 else if(statsWanted)
2149 L<<Logger::Notice<<"stats: no stats yet!"<<endl;
2150
2151 statsWanted=false;
2152 }
2153
2154 static void houseKeeping(void *)
2155 {
2156 static thread_local time_t last_rootupdate, last_prune, last_secpoll;
2157 static thread_local int cleanCounter=0;
2158 static thread_local bool s_running; // houseKeeping can get suspended in secpoll, and be restarted, which makes us do duplicate work
2159 try {
2160 if(s_running)
2161 return;
2162 s_running=true;
2163
2164 struct timeval now;
2165 Utility::gettimeofday(&now, 0);
2166
2167 if(now.tv_sec - last_prune > (time_t)(5 + t_id)) {
2168 DTime dt;
2169 dt.setTimeval(now);
2170 t_RC->doPrune(g_maxCacheEntries / g_numThreads); // this function is local to a thread, so fine anyhow
2171 t_packetCache->doPruneTo(g_maxPacketCacheEntries / g_numWorkerThreads);
2172
2173 SyncRes::pruneNegCache(g_maxCacheEntries / (g_numWorkerThreads * 10));
2174
2175 if(!((cleanCounter++)%40)) { // this is a full scan!
2176 time_t limit=now.tv_sec-300;
2177 SyncRes::pruneNSSpeeds(limit);
2178 }
2179 last_prune=time(0);
2180 }
2181
2182 if(now.tv_sec - last_rootupdate > 7200) {
2183 int res = SyncRes::getRootNS(g_now, nullptr);
2184 if (!res)
2185 last_rootupdate=now.tv_sec;
2186 }
2187
2188 if(t_id == s_distributorThreadID) {
2189
2190 if(now.tv_sec - last_secpoll >= 3600) {
2191 try {
2192 doSecPoll(&last_secpoll);
2193 }
2194 catch(std::exception& e)
2195 {
2196 L<<Logger::Error<<"Exception while performing security poll: "<<e.what()<<endl;
2197 }
2198 catch(PDNSException& e)
2199 {
2200 L<<Logger::Error<<"Exception while performing security poll: "<<e.reason<<endl;
2201 }
2202 catch(ImmediateServFailException &e)
2203 {
2204 L<<Logger::Error<<"Exception while performing security poll: "<<e.reason<<endl;
2205 }
2206 catch(...)
2207 {
2208 L<<Logger::Error<<"Exception while performing security poll"<<endl;
2209 }
2210
2211 }
2212 }
2213 s_running=false;
2214 }
2215 catch(PDNSException& ae)
2216 {
2217 s_running=false;
2218 L<<Logger::Error<<"Fatal error in housekeeping thread: "<<ae.reason<<endl;
2219 throw;
2220 }
2221 }
2222
2223 static void makeThreadPipes()
2224 {
2225 for(unsigned int n=0; n < g_numThreads; ++n) {
2226 struct ThreadPipeSet tps;
2227 int fd[2];
2228 if(pipe(fd) < 0)
2229 unixDie("Creating pipe for inter-thread communications");
2230
2231 tps.readToThread = fd[0];
2232 tps.writeToThread = fd[1];
2233
2234 if(pipe(fd) < 0)
2235 unixDie("Creating pipe for inter-thread communications");
2236 tps.readFromThread = fd[0];
2237 tps.writeFromThread = fd[1];
2238
2239 if(pipe(fd) < 0)
2240 unixDie("Creating pipe for inter-thread communications");
2241 tps.readQueriesToThread = fd[0];
2242 tps.writeQueriesToThread = fd[1];
2243
2244 if (!setNonBlocking(tps.writeQueriesToThread)) {
2245 unixDie("Making pipe for inter-thread communications non-blocking");
2246 }
2247
2248 g_pipes.push_back(tps);
2249 }
2250 }
2251
2252 struct ThreadMSG
2253 {
2254 pipefunc_t func;
2255 bool wantAnswer;
2256 };
2257
2258 void broadcastFunction(const pipefunc_t& func)
2259 {
2260 /* This function might be called by the worker with t_id 0 during startup
2261 for the initialization of ACLs and domain maps */
2262 if (t_id != s_handlerThreadID && t_id != s_distributorThreadID) {
2263 L<<Logger::Error<<"broadcastFunction() has been called by a worker ("<<t_id<<")"<<endl;
2264 exit(1);
2265 }
2266
2267 if (t_id == s_handlerThreadID) {
2268 /* the distributor will call itself below, but if we are the handler thread,
2269 call the function ourselves to update the ACL or domain maps for example */
2270 func();
2271 }
2272
2273 int n = 0;
2274 for(ThreadPipeSet& tps : g_pipes)
2275 {
2276 if(n++ == t_id) {
2277 func(); // don't write to ourselves!
2278 continue;
2279 }
2280
2281 ThreadMSG* tmsg = new ThreadMSG();
2282 tmsg->func = func;
2283 tmsg->wantAnswer = true;
2284 if(write(tps.writeToThread, &tmsg, sizeof(tmsg)) != sizeof(tmsg)) {
2285 delete tmsg;
2286 unixDie("write to thread pipe returned wrong size or error");
2287 }
2288
2289 string* resp = nullptr;
2290 if(read(tps.readFromThread, &resp, sizeof(resp)) != sizeof(resp))
2291 unixDie("read from thread pipe returned wrong size or error");
2292
2293 if(resp) {
2294 delete resp;
2295 resp = nullptr;
2296 }
2297 }
2298 }
2299
2300 // This function is only called by the distributor thread, when pdns-distributes-queries is set
2301 void distributeAsyncFunction(const string& packet, const pipefunc_t& func)
2302 {
2303 if (t_id != s_distributorThreadID) {
2304 L<<Logger::Error<<"distributeAsyncFunction() has been called by a worker ("<<t_id<<")"<<endl;
2305 exit(1);
2306 }
2307
2308 unsigned int hash = hashQuestion(packet.c_str(), packet.length(), g_disthashseed);
2309 unsigned int target = 1 + (hash % (g_pipes.size()-1));
2310
2311 if(target == static_cast<unsigned int>(s_distributorThreadID)) {
2312 L<<Logger::Error<<"distributeAsyncFunction() tried to assign a query to the distributor"<<endl;
2313 exit(1);
2314 }
2315
2316 ThreadPipeSet& tps = g_pipes[target];
2317 ThreadMSG* tmsg = new ThreadMSG();
2318 tmsg->func = func;
2319 tmsg->wantAnswer = false;
2320
2321 ssize_t written = write(tps.writeQueriesToThread, &tmsg, sizeof(tmsg));
2322 if (written > 0) {
2323 if (static_cast<size_t>(written) != sizeof(tmsg)) {
2324 delete tmsg;
2325 unixDie("write to thread pipe returned wrong size or error");
2326 }
2327 }
2328 else {
2329 int error = errno;
2330 delete tmsg;
2331 if (error == EAGAIN || error == EWOULDBLOCK) {
2332 g_stats.queryPipeFullDrops++;
2333 } else {
2334 unixDie("write to thread pipe returned wrong size or error:" + std::to_string(error));
2335 }
2336 }
2337 }
2338
2339 static void handlePipeRequest(int fd, FDMultiplexer::funcparam_t& var)
2340 {
2341 ThreadMSG* tmsg = nullptr;
2342
2343 if(read(fd, &tmsg, sizeof(tmsg)) != sizeof(tmsg)) { // fd == readToThread || fd == readQueriesToThread
2344 unixDie("read from thread pipe returned wrong size or error");
2345 }
2346
2347 void *resp=0;
2348 try {
2349 resp = tmsg->func();
2350 }
2351 catch(std::exception& e) {
2352 if(g_logCommonErrors)
2353 L<<Logger::Error<<"PIPE function we executed created exception: "<<e.what()<<endl; // but what if they wanted an answer.. we send 0
2354 }
2355 catch(PDNSException& e) {
2356 if(g_logCommonErrors)
2357 L<<Logger::Error<<"PIPE function we executed created PDNS exception: "<<e.reason<<endl; // but what if they wanted an answer.. we send 0
2358 }
2359 if(tmsg->wantAnswer) {
2360 if(write(g_pipes[t_id].writeFromThread, &resp, sizeof(resp)) != sizeof(resp)) {
2361 delete tmsg;
2362 unixDie("write to thread pipe returned wrong size or error");
2363 }
2364 }
2365
2366 delete tmsg;
2367 }
2368
2369 template<class T> void *voider(const boost::function<T*()>& func)
2370 {
2371 return func();
2372 }
2373
2374 vector<ComboAddress>& operator+=(vector<ComboAddress>&a, const vector<ComboAddress>& b)
2375 {
2376 a.insert(a.end(), b.begin(), b.end());
2377 return a;
2378 }
2379
2380 vector<pair<string, uint16_t> >& operator+=(vector<pair<string, uint16_t> >&a, const vector<pair<string, uint16_t> >& b)
2381 {
2382 a.insert(a.end(), b.begin(), b.end());
2383 return a;
2384 }
2385
2386 vector<pair<DNSName, uint16_t> >& operator+=(vector<pair<DNSName, uint16_t> >&a, const vector<pair<DNSName, uint16_t> >& b)
2387 {
2388 a.insert(a.end(), b.begin(), b.end());
2389 return a;
2390 }
2391
2392
2393 /*
2394 This function should only be called by the handler to gather metrics, wipe the cache,
2395 reload the Lua script (not the Lua config) or change the current trace regex */
2396 template<class T> T broadcastAccFunction(const boost::function<T*()>& func)
2397 {
2398 if (t_id != s_handlerThreadID) {
2399 L<<Logger::Error<<"broadcastFunction has been called by a worker ("<<t_id<<")"<<endl;
2400 exit(1);
2401 }
2402
2403 T ret=T();
2404 for(ThreadPipeSet& tps : g_pipes)
2405 {
2406 ThreadMSG* tmsg = new ThreadMSG();
2407 tmsg->func = boost::bind(voider<T>, func);
2408 tmsg->wantAnswer = true;
2409
2410 if(write(tps.writeToThread, &tmsg, sizeof(tmsg)) != sizeof(tmsg)) {
2411 delete tmsg;
2412 unixDie("write to thread pipe returned wrong size or error");
2413 }
2414
2415 T* resp = nullptr;
2416 if(read(tps.readFromThread, &resp, sizeof(resp)) != sizeof(resp))
2417 unixDie("read from thread pipe returned wrong size or error");
2418
2419 if(resp) {
2420 ret += *resp;
2421 delete resp;
2422 resp = nullptr;
2423 }
2424 }
2425 return ret;
2426 }
2427
2428 template string broadcastAccFunction(const boost::function<string*()>& fun); // explicit instantiation
2429 template uint64_t broadcastAccFunction(const boost::function<uint64_t*()>& fun); // explicit instantiation
2430 template vector<ComboAddress> broadcastAccFunction(const boost::function<vector<ComboAddress> *()>& fun); // explicit instantiation
2431 template vector<pair<DNSName,uint16_t> > broadcastAccFunction(const boost::function<vector<pair<DNSName, uint16_t> > *()>& fun); // explicit instantiation
2432
2433 static void handleRCC(int fd, FDMultiplexer::funcparam_t& var)
2434 {
2435 string remote;
2436 string msg=s_rcc.recv(&remote);
2437 RecursorControlParser rcp;
2438 RecursorControlParser::func_t* command;
2439
2440 string answer=rcp.getAnswer(msg, &command);
2441
2442 // If we are inside a chroot, we need to strip
2443 if (!arg()["chroot"].empty()) {
2444 size_t len = arg()["chroot"].length();
2445 remote = remote.substr(len);
2446 }
2447
2448 try {
2449 s_rcc.send(answer, &remote);
2450 command();
2451 }
2452 catch(std::exception& e) {
2453 L<<Logger::Error<<"Error dealing with control socket request: "<<e.what()<<endl;
2454 }
2455 catch(PDNSException& ae) {
2456 L<<Logger::Error<<"Error dealing with control socket request: "<<ae.reason<<endl;
2457 }
2458 }
2459
2460 static void handleTCPClientReadable(int fd, FDMultiplexer::funcparam_t& var)
2461 {
2462 PacketID* pident=any_cast<PacketID>(&var);
2463 // cerr<<"handleTCPClientReadable called for fd "<<fd<<", pident->inNeeded: "<<pident->inNeeded<<", "<<pident->sock->getHandle()<<endl;
2464
2465 shared_array<char> buffer(new char[pident->inNeeded]);
2466
2467 ssize_t ret=recv(fd, buffer.get(), pident->inNeeded,0);
2468 if(ret > 0) {
2469 pident->inMSG.append(&buffer[0], &buffer[ret]);
2470 pident->inNeeded-=(size_t)ret;
2471 if(!pident->inNeeded || pident->inIncompleteOkay) {
2472 // cerr<<"Got entire load of "<<pident->inMSG.size()<<" bytes"<<endl;
2473 PacketID pid=*pident;
2474 string msg=pident->inMSG;
2475
2476 t_fdm->removeReadFD(fd);
2477 MT->sendEvent(pid, &msg);
2478 }
2479 else {
2480 // cerr<<"Still have "<<pident->inNeeded<<" left to go"<<endl;
2481 }
2482 }
2483 else {
2484 PacketID tmp=*pident;
2485 t_fdm->removeReadFD(fd); // pident might now be invalid (it isn't, but still)
2486 string empty;
2487 MT->sendEvent(tmp, &empty); // this conveys error status
2488 }
2489 }
2490
2491 static void handleTCPClientWritable(int fd, FDMultiplexer::funcparam_t& var)
2492 {
2493 PacketID* pid=any_cast<PacketID>(&var);
2494 ssize_t ret=send(fd, pid->outMSG.c_str() + pid->outPos, pid->outMSG.size() - pid->outPos,0);
2495 if(ret > 0) {
2496 pid->outPos+=(ssize_t)ret;
2497 if(pid->outPos==pid->outMSG.size()) {
2498 PacketID tmp=*pid;
2499 t_fdm->removeWriteFD(fd);
2500 MT->sendEvent(tmp, &tmp.outMSG); // send back what we sent to convey everything is ok
2501 }
2502 }
2503 else { // error or EOF
2504 PacketID tmp(*pid);
2505 t_fdm->removeWriteFD(fd);
2506 string sent;
2507 MT->sendEvent(tmp, &sent); // we convey error status by sending empty string
2508 }
2509 }
2510
2511 // resend event to everybody chained onto it
2512 static void doResends(MT_t::waiters_t::iterator& iter, PacketID resend, const string& content)
2513 {
2514 if(iter->key.chain.empty())
2515 return;
2516 // cerr<<"doResends called!\n";
2517 for(PacketID::chain_t::iterator i=iter->key.chain.begin(); i != iter->key.chain.end() ; ++i) {
2518 resend.fd=-1;
2519 resend.id=*i;
2520 // cerr<<"\tResending "<<content.size()<<" bytes for fd="<<resend.fd<<" and id="<<resend.id<<endl;
2521
2522 MT->sendEvent(resend, &content);
2523 g_stats.chainResends++;
2524 }
2525 }
2526
2527 static void handleUDPServerResponse(int fd, FDMultiplexer::funcparam_t& var)
2528 {
2529 PacketID pid=any_cast<PacketID>(var);
2530 ssize_t len;
2531 char data[g_outgoingEDNSBufsize];
2532 ComboAddress fromaddr;
2533 socklen_t addrlen=sizeof(fromaddr);
2534
2535 len=recvfrom(fd, data, sizeof(data), 0, (sockaddr *)&fromaddr, &addrlen);
2536
2537 if(len < (ssize_t) sizeof(dnsheader)) {
2538 if(len < 0)
2539 ; // cerr<<"Error on fd "<<fd<<": "<<stringerror()<<"\n";
2540 else {
2541 g_stats.serverParseError++;
2542 if(g_logCommonErrors)
2543 L<<Logger::Error<<"Unable to parse packet from remote UDP server "<< fromaddr.toString() <<
2544 ": packet smaller than DNS header"<<endl;
2545 }
2546
2547 t_udpclientsocks->returnSocket(fd);
2548 string empty;
2549
2550 MT_t::waiters_t::iterator iter=MT->d_waiters.find(pid);
2551 if(iter != MT->d_waiters.end())
2552 doResends(iter, pid, empty);
2553
2554 MT->sendEvent(pid, &empty); // this denotes error (does lookup again.. at least L1 will be hot)
2555 return;
2556 }
2557
2558 dnsheader dh;
2559 memcpy(&dh, data, sizeof(dh));
2560
2561 PacketID pident;
2562 pident.remote=fromaddr;
2563 pident.id=dh.id;
2564 pident.fd=fd;
2565
2566 if(!dh.qr && g_logCommonErrors) {
2567 L<<Logger::Notice<<"Not taking data from question on outgoing socket from "<< fromaddr.toStringWithPort() <<endl;
2568 }
2569
2570 if(!dh.qdcount || // UPC, Nominum, very old BIND on FormErr, NSD
2571 !dh.qr) { // one weird server
2572 pident.domain.clear();
2573 pident.type = 0;
2574 }
2575 else {
2576 try {
2577 if(len > 12)
2578 pident.domain=DNSName(data, len, 12, false, &pident.type); // don't copy this from above - we need to do the actual read
2579 }
2580 catch(std::exception& e) {
2581 g_stats.serverParseError++; // won't be fed to lwres.cc, so we have to increment
2582 L<<Logger::Warning<<"Error in packet from remote nameserver "<< fromaddr.toStringWithPort() << ": "<<e.what() << endl;
2583 return;
2584 }
2585 }
2586 string packet;
2587 packet.assign(data, len);
2588
2589 MT_t::waiters_t::iterator iter=MT->d_waiters.find(pident);
2590 if(iter != MT->d_waiters.end()) {
2591 doResends(iter, pident, packet);
2592 }
2593
2594 retryWithName:
2595
2596 if(!MT->sendEvent(pident, &packet)) {
2597 // we do a full scan for outstanding queries on unexpected answers. not too bad since we only accept them on the right port number, which is hard enough to guess
2598 for(MT_t::waiters_t::iterator mthread=MT->d_waiters.begin(); mthread!=MT->d_waiters.end(); ++mthread) {
2599 if(pident.fd==mthread->key.fd && mthread->key.remote==pident.remote && mthread->key.type == pident.type &&
2600 pident.domain == mthread->key.domain) {
2601 mthread->key.nearMisses++;
2602 }
2603
2604 // be a bit paranoid here since we're weakening our matching
2605 if(pident.domain.empty() && !mthread->key.domain.empty() && !pident.type && mthread->key.type &&
2606 pident.id == mthread->key.id && mthread->key.remote == pident.remote) {
2607 // cerr<<"Empty response, rest matches though, sending to a waiter"<<endl;
2608 pident.domain = mthread->key.domain;
2609 pident.type = mthread->key.type;
2610 goto retryWithName; // note that this only passes on an error, lwres will still reject the packet
2611 }
2612 }
2613 g_stats.unexpectedCount++; // if we made it here, it really is an unexpected answer
2614 if(g_logCommonErrors) {
2615 L<<Logger::Warning<<"Discarding unexpected packet from "<<fromaddr.toStringWithPort()<<": "<< (pident.domain.empty() ? "<empty>" : pident.domain.toString())<<", "<<pident.type<<", "<<MT->d_waiters.size()<<" waiters"<<endl;
2616 }
2617 }
2618 else if(fd >= 0) {
2619 t_udpclientsocks->returnSocket(fd);
2620 }
2621 }
2622
2623 FDMultiplexer* getMultiplexer()
2624 {
2625 FDMultiplexer* ret;
2626 for(const auto& i : FDMultiplexer::getMultiplexerMap()) {
2627 try {
2628 ret=i.second();
2629 return ret;
2630 }
2631 catch(FDMultiplexerException &fe) {
2632 L<<Logger::Error<<"Non-fatal error initializing possible multiplexer ("<<fe.what()<<"), falling back"<<endl;
2633 }
2634 catch(...) {
2635 L<<Logger::Error<<"Non-fatal error initializing possible multiplexer"<<endl;
2636 }
2637 }
2638 L<<Logger::Error<<"No working multiplexer found!"<<endl;
2639 exit(1);
2640 }
2641
2642
2643 static string* doReloadLuaScript()
2644 {
2645 string fname= ::arg()["lua-dns-script"];
2646 try {
2647 if(fname.empty()) {
2648 t_pdl.reset();
2649 L<<Logger::Error<<t_id<<" Unloaded current lua script"<<endl;
2650 return new string("unloaded\n");
2651 }
2652 else {
2653 t_pdl = std::make_shared<RecursorLua4>(fname);
2654 }
2655 }
2656 catch(std::exception& e) {
2657 L<<Logger::Error<<t_id<<" Retaining current script, error from '"<<fname<<"': "<< e.what() <<endl;
2658 return new string("retaining current script, error from '"+fname+"': "+e.what()+"\n");
2659 }
2660
2661 L<<Logger::Warning<<t_id<<" (Re)loaded lua script from '"<<fname<<"'"<<endl;
2662 return new string("(re)loaded '"+fname+"'\n");
2663 }
2664
2665 string doQueueReloadLuaScript(vector<string>::const_iterator begin, vector<string>::const_iterator end)
2666 {
2667 if(begin != end)
2668 ::arg().set("lua-dns-script") = *begin;
2669
2670 return broadcastAccFunction<string>(doReloadLuaScript);
2671 }
2672
2673 static string* pleaseUseNewTraceRegex(const std::string& newRegex)
2674 try
2675 {
2676 if(newRegex.empty()) {
2677 t_traceRegex.reset();
2678 return new string("unset\n");
2679 }
2680 else {
2681 t_traceRegex = std::make_shared<Regex>(newRegex);
2682 return new string("ok\n");
2683 }
2684 }
2685 catch(PDNSException& ae)
2686 {
2687 return new string(ae.reason+"\n");
2688 }
2689
2690 string doTraceRegex(vector<string>::const_iterator begin, vector<string>::const_iterator end)
2691 {
2692 return broadcastAccFunction<string>(boost::bind(pleaseUseNewTraceRegex, begin!=end ? *begin : ""));
2693 }
2694
2695 static void checkLinuxIPv6Limits()
2696 {
2697 #ifdef __linux__
2698 string line;
2699 if(readFileIfThere("/proc/sys/net/ipv6/route/max_size", &line)) {
2700 int lim=std::stoi(line);
2701 if(lim < 16384) {
2702 L<<Logger::Error<<"If using IPv6, please raise sysctl net.ipv6.route.max_size, currently set to "<<lim<<" which is < 16384"<<endl;
2703 }
2704 }
2705 #endif
2706 }
2707 static void checkOrFixFDS()
2708 {
2709 unsigned int availFDs=getFilenumLimit();
2710 unsigned int wantFDs = g_maxMThreads * g_numWorkerThreads +25; // even healthier margin then before
2711
2712 if(wantFDs > availFDs) {
2713 unsigned int hardlimit= getFilenumLimit(true);
2714 if(hardlimit >= wantFDs) {
2715 setFilenumLimit(wantFDs);
2716 L<<Logger::Warning<<"Raised soft limit on number of filedescriptors to "<<wantFDs<<" to match max-mthreads and threads settings"<<endl;
2717 }
2718 else {
2719 int newval = (hardlimit - 25) / g_numWorkerThreads;
2720 L<<Logger::Warning<<"Insufficient number of filedescriptors available for max-mthreads*threads setting! ("<<hardlimit<<" < "<<wantFDs<<"), reducing max-mthreads to "<<newval<<endl;
2721 g_maxMThreads = newval;
2722 setFilenumLimit(hardlimit);
2723 }
2724 }
2725 }
2726
2727 static void* recursorThread(int tid, bool worker);
2728
2729 static void* pleaseSupplantACLs(std::shared_ptr<NetmaskGroup> ng)
2730 {
2731 t_allowFrom = ng;
2732 return nullptr;
2733 }
2734
2735 int g_argc;
2736 char** g_argv;
2737
2738 void parseACLs()
2739 {
2740 static bool l_initialized;
2741
2742 if(l_initialized) { // only reload configuration file on second call
2743 string configname=::arg()["config-dir"]+"/recursor.conf";
2744 if(::arg()["config-name"]!="") {
2745 configname=::arg()["config-dir"]+"/recursor-"+::arg()["config-name"]+".conf";
2746 }
2747 cleanSlashes(configname);
2748
2749 if(!::arg().preParseFile(configname.c_str(), "allow-from-file"))
2750 throw runtime_error("Unable to re-parse configuration file '"+configname+"'");
2751 ::arg().preParseFile(configname.c_str(), "allow-from", LOCAL_NETS);
2752 ::arg().preParseFile(configname.c_str(), "include-dir");
2753 ::arg().preParse(g_argc, g_argv, "include-dir");
2754
2755 // then process includes
2756 std::vector<std::string> extraConfigs;
2757 ::arg().gatherIncludes(extraConfigs);
2758
2759 for(const std::string& fn : extraConfigs) {
2760 if(!::arg().preParseFile(fn.c_str(), "allow-from-file", ::arg()["allow-from-file"]))
2761 throw runtime_error("Unable to re-parse configuration file include '"+fn+"'");
2762 if(!::arg().preParseFile(fn.c_str(), "allow-from", ::arg()["allow-from"]))
2763 throw runtime_error("Unable to re-parse configuration file include '"+fn+"'");
2764 }
2765
2766 ::arg().preParse(g_argc, g_argv, "allow-from-file");
2767 ::arg().preParse(g_argc, g_argv, "allow-from");
2768 }
2769
2770 std::shared_ptr<NetmaskGroup> oldAllowFrom = t_allowFrom;
2771 std::shared_ptr<NetmaskGroup> allowFrom = std::make_shared<NetmaskGroup>();
2772
2773 if(!::arg()["allow-from-file"].empty()) {
2774 string line;
2775 ifstream ifs(::arg()["allow-from-file"].c_str());
2776 if(!ifs) {
2777 throw runtime_error("Could not open '"+::arg()["allow-from-file"]+"': "+stringerror());
2778 }
2779
2780 string::size_type pos;
2781 while(getline(ifs,line)) {
2782 pos=line.find('#');
2783 if(pos!=string::npos)
2784 line.resize(pos);
2785 trim(line);
2786 if(line.empty())
2787 continue;
2788
2789 allowFrom->addMask(line);
2790 }
2791 L<<Logger::Warning<<"Done parsing " << allowFrom->size() <<" allow-from ranges from file '"<<::arg()["allow-from-file"]<<"' - overriding 'allow-from' setting"<<endl;
2792 }
2793 else if(!::arg()["allow-from"].empty()) {
2794 vector<string> ips;
2795 stringtok(ips, ::arg()["allow-from"], ", ");
2796
2797 L<<Logger::Warning<<"Only allowing queries from: ";
2798 for(vector<string>::const_iterator i = ips.begin(); i!= ips.end(); ++i) {
2799 allowFrom->addMask(*i);
2800 if(i!=ips.begin())
2801 L<<Logger::Warning<<", ";
2802 L<<Logger::Warning<<*i;
2803 }
2804 L<<Logger::Warning<<endl;
2805 }
2806 else {
2807 if(::arg()["local-address"]!="127.0.0.1" && ::arg().asNum("local-port")==53)
2808 L<<Logger::Error<<"WARNING: Allowing queries from all IP addresses - this can be a security risk!"<<endl;
2809 allowFrom = nullptr;
2810 }
2811
2812 g_initialAllowFrom = allowFrom;
2813 broadcastFunction(boost::bind(pleaseSupplantACLs, allowFrom));
2814 oldAllowFrom = nullptr;
2815
2816 l_initialized = true;
2817 }
2818
2819
2820 static void setupDelegationOnly()
2821 {
2822 vector<string> parts;
2823 stringtok(parts, ::arg()["delegation-only"], ", \t");
2824 for(const auto& p : parts) {
2825 SyncRes::addDelegationOnly(DNSName(p));
2826 }
2827 }
2828
2829 static std::map<unsigned int, std::set<int> > parseCPUMap()
2830 {
2831 std::map<unsigned int, std::set<int> > result;
2832
2833 const std::string value = ::arg()["cpu-map"];
2834
2835 if (!value.empty() && !isSettingThreadCPUAffinitySupported()) {
2836 L<<Logger::Warning<<"CPU mapping requested but not supported, skipping"<<endl;
2837 return result;
2838 }
2839
2840 std::vector<std::string> parts;
2841
2842 stringtok(parts, value, " \t");
2843
2844 for(const auto& part : parts) {
2845 if (part.find('=') == string::npos)
2846 continue;
2847
2848 try {
2849 auto headers = splitField(part, '=');
2850 trim(headers.first);
2851 trim(headers.second);
2852
2853 unsigned int threadId = pdns_stou(headers.first);
2854 std::vector<std::string> cpus;
2855
2856 stringtok(cpus, headers.second, ",");
2857
2858 for(const auto& cpu : cpus) {
2859 int cpuId = std::stoi(cpu);
2860
2861 result[threadId].insert(cpuId);
2862 }
2863 }
2864 catch(const std::exception& e) {
2865 L<<Logger::Error<<"Error parsing cpu-map entry '"<<part<<"': "<<e.what()<<endl;
2866 }
2867 }
2868
2869 return result;
2870 }
2871
2872 static void setCPUMap(const std::map<unsigned int, std::set<int> >& cpusMap, unsigned int n, pthread_t tid)
2873 {
2874 const auto& cpuMapping = cpusMap.find(n);
2875 if (cpuMapping != cpusMap.cend()) {
2876 int rc = mapThreadToCPUList(tid, cpuMapping->second);
2877 if (rc == 0) {
2878 L<<Logger::Info<<"CPU affinity for worker "<<n<<" has been set to CPU map:";
2879 for (const auto cpu : cpuMapping->second) {
2880 L<<Logger::Info<<" "<<cpu;
2881 }
2882 L<<Logger::Info<<endl;
2883 }
2884 else {
2885 L<<Logger::Warning<<"Error setting CPU affinity for worker "<<n<<" to CPU map:";
2886 for (const auto cpu : cpuMapping->second) {
2887 L<<Logger::Info<<" "<<cpu;
2888 }
2889 L<<Logger::Info<<strerror(rc)<<endl;
2890 }
2891 }
2892 }
2893
2894 static int serviceMain(int argc, char*argv[])
2895 {
2896 L.setName(s_programname);
2897 L.disableSyslog(::arg().mustDo("disable-syslog"));
2898 L.setTimestamps(::arg().mustDo("log-timestamp"));
2899
2900 if(!::arg()["logging-facility"].empty()) {
2901 int val=logFacilityToLOG(::arg().asNum("logging-facility") );
2902 if(val >= 0)
2903 theL().setFacility(val);
2904 else
2905 L<<Logger::Error<<"Unknown logging facility "<<::arg().asNum("logging-facility") <<endl;
2906 }
2907
2908 showProductVersion();
2909 seedRandom(::arg()["entropy-source"]);
2910
2911 g_disthashseed=dns_random(0xffffffff);
2912
2913 checkLinuxIPv6Limits();
2914 try {
2915 vector<string> addrs;
2916 if(!::arg()["query-local-address6"].empty()) {
2917 SyncRes::s_doIPv6=true;
2918 L<<Logger::Warning<<"Enabling IPv6 transport for outgoing queries"<<endl;
2919
2920 stringtok(addrs, ::arg()["query-local-address6"], ", ;");
2921 for(const string& addr : addrs) {
2922 g_localQueryAddresses6.push_back(ComboAddress(addr));
2923 }
2924 }
2925 else {
2926 L<<Logger::Warning<<"NOT using IPv6 for outgoing queries - set 'query-local-address6=::' to enable"<<endl;
2927 }
2928 addrs.clear();
2929 stringtok(addrs, ::arg()["query-local-address"], ", ;");
2930 for(const string& addr : addrs) {
2931 g_localQueryAddresses4.push_back(ComboAddress(addr));
2932 }
2933 }
2934 catch(std::exception& e) {
2935 L<<Logger::Error<<"Assigning local query addresses: "<<e.what();
2936 exit(99);
2937 }
2938
2939 // keep this ABOVE loadRecursorLuaConfig!
2940 if(::arg()["dnssec"]=="off")
2941 g_dnssecmode=DNSSECMode::Off;
2942 else if(::arg()["dnssec"]=="process-no-validate")
2943 g_dnssecmode=DNSSECMode::ProcessNoValidate;
2944 else if(::arg()["dnssec"]=="process")
2945 g_dnssecmode=DNSSECMode::Process;
2946 else if(::arg()["dnssec"]=="validate")
2947 g_dnssecmode=DNSSECMode::ValidateAll;
2948 else if(::arg()["dnssec"]=="log-fail")
2949 g_dnssecmode=DNSSECMode::ValidateForLog;
2950 else {
2951 L<<Logger::Error<<"Unknown DNSSEC mode "<<::arg()["dnssec"]<<endl;
2952 exit(1);
2953 }
2954
2955 g_dnssecLogBogus = ::arg().mustDo("dnssec-log-bogus");
2956 g_maxNSEC3Iterations = ::arg().asNum("nsec3-max-iterations");
2957
2958 g_maxCacheEntries = ::arg().asNum("max-cache-entries");
2959 g_maxPacketCacheEntries = ::arg().asNum("max-packetcache-entries");
2960
2961 try {
2962 loadRecursorLuaConfig(::arg()["lua-config-file"], ::arg().mustDo("daemon"));
2963 }
2964 catch (PDNSException &e) {
2965 L<<Logger::Error<<"Cannot load Lua configuration: "<<e.reason<<endl;
2966 exit(1);
2967 }
2968
2969 parseACLs();
2970 sortPublicSuffixList();
2971
2972 if(!::arg()["dont-query"].empty()) {
2973 vector<string> ips;
2974 stringtok(ips, ::arg()["dont-query"], ", ");
2975 ips.push_back("0.0.0.0");
2976 ips.push_back("::");
2977
2978 L<<Logger::Warning<<"Will not send queries to: ";
2979 for(vector<string>::const_iterator i = ips.begin(); i!= ips.end(); ++i) {
2980 SyncRes::addDontQuery(*i);
2981 if(i!=ips.begin())
2982 L<<Logger::Warning<<", ";
2983 L<<Logger::Warning<<*i;
2984 }
2985 L<<Logger::Warning<<endl;
2986 }
2987
2988 g_quiet=::arg().mustDo("quiet");
2989
2990 g_weDistributeQueries = ::arg().mustDo("pdns-distributes-queries");
2991 if(g_weDistributeQueries) {
2992 L<<Logger::Warning<<"PowerDNS Recursor itself will distribute queries over threads"<<endl;
2993 }
2994
2995 setupDelegationOnly();
2996 g_outgoingEDNSBufsize=::arg().asNum("edns-outgoing-bufsize");
2997
2998 if(::arg()["trace"]=="fail") {
2999 SyncRes::setDefaultLogMode(SyncRes::Store);
3000 }
3001 else if(::arg().mustDo("trace")) {
3002 SyncRes::setDefaultLogMode(SyncRes::Log);
3003 ::arg().set("quiet")="no";
3004 g_quiet=false;
3005 g_dnssecLOG=true;
3006 }
3007
3008 SyncRes::s_minimumTTL = ::arg().asNum("minimum-ttl-override");
3009
3010 SyncRes::s_nopacketcache = ::arg().mustDo("disable-packetcache");
3011
3012 SyncRes::s_maxnegttl=::arg().asNum("max-negative-ttl");
3013 SyncRes::s_maxcachettl=max(::arg().asNum("max-cache-ttl"), 15);
3014 SyncRes::s_packetcachettl=::arg().asNum("packetcache-ttl");
3015 // Cap the packetcache-servfail-ttl to the packetcache-ttl
3016 uint32_t packetCacheServFailTTL = ::arg().asNum("packetcache-servfail-ttl");
3017 SyncRes::s_packetcacheservfailttl=(packetCacheServFailTTL > SyncRes::s_packetcachettl) ? SyncRes::s_packetcachettl : packetCacheServFailTTL;
3018 SyncRes::s_serverdownmaxfails=::arg().asNum("server-down-max-fails");
3019 SyncRes::s_serverdownthrottletime=::arg().asNum("server-down-throttle-time");
3020 SyncRes::s_serverID=::arg()["server-id"];
3021 SyncRes::s_maxqperq=::arg().asNum("max-qperq");
3022 SyncRes::s_maxtotusec=1000*::arg().asNum("max-total-msec");
3023 SyncRes::s_maxdepth=::arg().asNum("max-recursion-depth");
3024 SyncRes::s_rootNXTrust = ::arg().mustDo( "root-nx-trust");
3025 if(SyncRes::s_serverID.empty()) {
3026 char tmp[128];
3027 gethostname(tmp, sizeof(tmp)-1);
3028 SyncRes::s_serverID=tmp;
3029 }
3030
3031 SyncRes::s_ecsipv4limit = ::arg().asNum("ecs-ipv4-bits");
3032 SyncRes::s_ecsipv6limit = ::arg().asNum("ecs-ipv6-bits");
3033
3034 if (!::arg().isEmpty("ecs-scope-zero-address")) {
3035 ComboAddress scopeZero(::arg()["ecs-scope-zero-address"]);
3036 SyncRes::setECSScopeZeroAddress(Netmask(scopeZero, scopeZero.isIPv4() ? 32 : 128));
3037 }
3038 else {
3039 bool found = false;
3040 for (const auto& addr : g_localQueryAddresses4) {
3041 if (!IsAnyAddress(addr)) {
3042 SyncRes::setECSScopeZeroAddress(Netmask(addr, 32));
3043 found = true;
3044 break;
3045 }
3046 }
3047 if (!found) {
3048 for (const auto& addr : g_localQueryAddresses6) {
3049 if (!IsAnyAddress(addr)) {
3050 SyncRes::setECSScopeZeroAddress(Netmask(addr, 128));
3051 found = true;
3052 break;
3053 }
3054 }
3055 if (!found) {
3056 SyncRes::setECSScopeZeroAddress(Netmask("127.0.0.1/32"));
3057 }
3058 }
3059 }
3060
3061 g_networkTimeoutMsec = ::arg().asNum("network-timeout");
3062
3063 g_initialDomainMap = parseAuthAndForwards();
3064
3065 g_latencyStatSize=::arg().asNum("latency-statistic-size");
3066
3067 g_logCommonErrors=::arg().mustDo("log-common-errors");
3068 g_logRPZChanges = ::arg().mustDo("log-rpz-changes");
3069
3070 g_anyToTcp = ::arg().mustDo("any-to-tcp");
3071 g_udpTruncationThreshold = ::arg().asNum("udp-truncation-threshold");
3072
3073 g_lowercaseOutgoing = ::arg().mustDo("lowercase-outgoing");
3074
3075 g_numWorkerThreads = ::arg().asNum("threads");
3076 if (g_numWorkerThreads < 1) {
3077 L<<Logger::Warning<<"Asked to run with 0 threads, raising to 1 instead"<<endl;
3078 g_numWorkerThreads = 1;
3079 }
3080
3081 g_numThreads = g_numWorkerThreads + g_weDistributeQueries;
3082 g_maxMThreads = ::arg().asNum("max-mthreads");
3083
3084 g_gettagNeedsEDNSOptions = ::arg().mustDo("gettag-needs-edns-options");
3085
3086 g_statisticsInterval = ::arg().asNum("statistics-interval");
3087
3088 #ifdef SO_REUSEPORT
3089 g_reusePort = ::arg().mustDo("reuseport");
3090 #endif
3091
3092 g_useOneSocketPerThread = (!g_weDistributeQueries && g_reusePort);
3093
3094 if (g_useOneSocketPerThread) {
3095 for (unsigned int threadId = 0; threadId < g_numWorkerThreads; threadId++) {
3096 makeUDPServerSockets(threadId);
3097 makeTCPServerSockets(threadId);
3098 }
3099 }
3100 else {
3101 makeUDPServerSockets(0);
3102 makeTCPServerSockets(0);
3103 }
3104
3105 SyncRes::parseEDNSSubnetWhitelist(::arg()["edns-subnet-whitelist"]);
3106 g_useIncomingECS = ::arg().mustDo("use-incoming-edns-subnet");
3107
3108 int forks;
3109 for(forks = 0; forks < ::arg().asNum("processes") - 1; ++forks) {
3110 if(!fork()) // we are child
3111 break;
3112 }
3113
3114 if(::arg().mustDo("daemon")) {
3115 L<<Logger::Warning<<"Calling daemonize, going to background"<<endl;
3116 L.toConsole(Logger::Critical);
3117 daemonize();
3118 loadRecursorLuaConfig(::arg()["lua-config-file"], false);
3119 }
3120 signal(SIGUSR1,usr1Handler);
3121 signal(SIGUSR2,usr2Handler);
3122 signal(SIGPIPE,SIG_IGN);
3123
3124 checkOrFixFDS();
3125
3126 #ifdef HAVE_LIBSODIUM
3127 if (sodium_init() == -1) {
3128 L<<Logger::Error<<"Unable to initialize sodium crypto library"<<endl;
3129 exit(99);
3130 }
3131 #endif
3132
3133 openssl_thread_setup();
3134 openssl_seed();
3135
3136 int newgid=0;
3137 if(!::arg()["setgid"].empty())
3138 newgid=Utility::makeGidNumeric(::arg()["setgid"]);
3139 int newuid=0;
3140 if(!::arg()["setuid"].empty())
3141 newuid=Utility::makeUidNumeric(::arg()["setuid"]);
3142
3143 Utility::dropGroupPrivs(newuid, newgid);
3144
3145 if (!::arg()["chroot"].empty()) {
3146 #ifdef HAVE_SYSTEMD
3147 char *ns;
3148 ns = getenv("NOTIFY_SOCKET");
3149 if (ns != nullptr) {
3150 L<<Logger::Error<<"Unable to chroot when running from systemd. Please disable chroot= or set the 'Type' for this service to 'simple'"<<endl;
3151 exit(1);
3152 }
3153 #endif
3154 if (chroot(::arg()["chroot"].c_str())<0 || chdir("/") < 0) {
3155 L<<Logger::Error<<"Unable to chroot to '"+::arg()["chroot"]+"': "<<strerror (errno)<<", exiting"<<endl;
3156 exit(1);
3157 }
3158 else
3159 L<<Logger::Error<<"Chrooted to '"<<::arg()["chroot"]<<"'"<<endl;
3160 }
3161
3162 s_pidfname=::arg()["socket-dir"]+"/"+s_programname+".pid";
3163 if(!s_pidfname.empty())
3164 unlink(s_pidfname.c_str()); // remove possible old pid file
3165 writePid();
3166
3167 makeControlChannelSocket( ::arg().asNum("processes") > 1 ? forks : -1);
3168
3169 Utility::dropUserPrivs(newuid);
3170
3171 makeThreadPipes();
3172
3173 g_tcpTimeout=::arg().asNum("client-tcp-timeout");
3174 g_maxTCPPerClient=::arg().asNum("max-tcp-per-client");
3175 g_tcpMaxQueriesPerConn=::arg().asNum("max-tcp-queries-per-connection");
3176
3177 if (::arg().mustDo("snmp-agent")) {
3178 g_snmpAgent = std::make_shared<RecursorSNMPAgent>("recursor", ::arg()["snmp-master-socket"]);
3179 g_snmpAgent->run();
3180 }
3181
3182 /* This thread handles the web server, carbon, statistics and the control channel */
3183 std::thread handlerThread(recursorThread, s_handlerThreadID, false);
3184
3185 const auto cpusMap = parseCPUMap();
3186
3187 std::vector<std::thread> workers(g_numThreads);
3188 if(g_numThreads == 1) {
3189 L<<Logger::Warning<<"Operating unthreaded"<<endl;
3190 #ifdef HAVE_SYSTEMD
3191 sd_notify(0, "READY=1");
3192 #endif
3193 setCPUMap(cpusMap, 0, pthread_self());
3194 recursorThread(0, true);
3195 }
3196 else {
3197 L<<Logger::Warning<<"Launching "<< g_numThreads <<" threads"<<endl;
3198 for(unsigned int n=0; n < g_numThreads; ++n) {
3199 workers[n] = std::thread(recursorThread, n, true);
3200
3201 setCPUMap(cpusMap, n, workers[n].native_handle());
3202 }
3203 #ifdef HAVE_SYSTEMD
3204 sd_notify(0, "READY=1");
3205 #endif
3206 workers.back().join();
3207 }
3208 return 0;
3209 }
3210
3211 static void* recursorThread(int n, bool worker)
3212 try
3213 {
3214 t_id=n;
3215 SyncRes tmp(g_now); // make sure it allocates tsstorage before we do anything, like primeHints or so..
3216 SyncRes::setDomainMap(g_initialDomainMap);
3217 t_allowFrom = g_initialAllowFrom;
3218 t_udpclientsocks = std::unique_ptr<UDPClientSocks>(new UDPClientSocks());
3219 t_tcpClientCounts = std::unique_ptr<tcpClientCounts_t>(new tcpClientCounts_t());
3220 primeHints();
3221
3222 t_packetCache = std::unique_ptr<RecursorPacketCache>(new RecursorPacketCache());
3223
3224 #ifdef HAVE_PROTOBUF
3225 t_uuidGenerator = std::unique_ptr<boost::uuids::random_generator>(new boost::uuids::random_generator());
3226 #endif
3227 L<<Logger::Warning<<"Done priming cache with root hints"<<endl;
3228
3229 try {
3230 if(!::arg()["lua-dns-script"].empty()) {
3231 t_pdl = std::make_shared<RecursorLua4>(::arg()["lua-dns-script"]);
3232 L<<Logger::Warning<<"Loaded 'lua' script from '"<<::arg()["lua-dns-script"]<<"'"<<endl;
3233 }
3234 }
3235 catch(std::exception &e) {
3236 L<<Logger::Error<<"Failed to load 'lua' script from '"<<::arg()["lua-dns-script"]<<"': "<<e.what()<<endl;
3237 _exit(99);
3238 }
3239
3240 unsigned int ringsize=::arg().asNum("stats-ringbuffer-entries") / g_numWorkerThreads;
3241 if(ringsize) {
3242 t_remotes = std::unique_ptr<addrringbuf_t>(new addrringbuf_t());
3243 if(g_weDistributeQueries) // if so, only 1 thread does recvfrom
3244 t_remotes->set_capacity(::arg().asNum("stats-ringbuffer-entries"));
3245 else
3246 t_remotes->set_capacity(ringsize);
3247 t_servfailremotes = std::unique_ptr<addrringbuf_t>(new addrringbuf_t());
3248 t_servfailremotes->set_capacity(ringsize);
3249 t_largeanswerremotes = std::unique_ptr<addrringbuf_t>(new addrringbuf_t());
3250 t_largeanswerremotes->set_capacity(ringsize);
3251
3252 t_queryring = std::unique_ptr<boost::circular_buffer<pair<DNSName, uint16_t> > >(new boost::circular_buffer<pair<DNSName, uint16_t> >());
3253 t_queryring->set_capacity(ringsize);
3254 t_servfailqueryring = std::unique_ptr<boost::circular_buffer<pair<DNSName, uint16_t> > >(new boost::circular_buffer<pair<DNSName, uint16_t> >());
3255 t_servfailqueryring->set_capacity(ringsize);
3256 }
3257
3258 MT=std::unique_ptr<MTasker<PacketID,string> >(new MTasker<PacketID,string>(::arg().asNum("stack-size")));
3259
3260 PacketID pident;
3261
3262 t_fdm=getMultiplexer();
3263
3264 if(!worker) {
3265 if(::arg().mustDo("webserver")) {
3266 L<<Logger::Warning << "Enabling web server" << endl;
3267 try {
3268 new RecursorWebServer(t_fdm);
3269 }
3270 catch(PDNSException &e) {
3271 L<<Logger::Error<<"Exception: "<<e.reason<<endl;
3272 exit(99);
3273 }
3274 }
3275 L<<Logger::Error<<"Enabled '"<< t_fdm->getName() << "' multiplexer"<<endl;
3276 }
3277 else {
3278 t_fdm->addReadFD(g_pipes[t_id].readToThread, handlePipeRequest);
3279 t_fdm->addReadFD(g_pipes[t_id].readQueriesToThread, handlePipeRequest);
3280
3281 if(g_useOneSocketPerThread) {
3282 for(deferredAdd_t::const_iterator i = deferredAdds[t_id].cbegin(); i != deferredAdds[t_id].cend(); ++i) {
3283 t_fdm->addReadFD(i->first, i->second);
3284 }
3285 }
3286 else {
3287 if(!g_weDistributeQueries || t_id == s_distributorThreadID) { // if we distribute queries, only t_id = 0 listens
3288 for(deferredAdd_t::const_iterator i = deferredAdds[0].cbegin(); i != deferredAdds[0].cend(); ++i) {
3289 t_fdm->addReadFD(i->first, i->second);
3290 }
3291 }
3292 }
3293 }
3294
3295 registerAllStats();
3296
3297 if(!worker) {
3298 t_fdm->addReadFD(s_rcc.d_fd, handleRCC); // control channel
3299 }
3300
3301 unsigned int maxTcpClients=::arg().asNum("max-tcp-clients");
3302
3303 bool listenOnTCP(true);
3304
3305 time_t last_stat = 0;
3306 time_t last_carbon=0;
3307 time_t carbonInterval=::arg().asNum("carbon-interval");
3308 counter.store(0); // used to periodically execute certain tasks
3309 for(;;) {
3310 while(MT->schedule(&g_now)); // MTasker letting the mthreads do their thing
3311
3312 if(!(counter%500)) {
3313 MT->makeThread(houseKeeping, 0);
3314 }
3315
3316 if(!(counter%55)) {
3317 typedef vector<pair<int, FDMultiplexer::funcparam_t> > expired_t;
3318 expired_t expired=t_fdm->getTimeouts(g_now);
3319
3320 for(expired_t::iterator i=expired.begin() ; i != expired.end(); ++i) {
3321 shared_ptr<TCPConnection> conn=any_cast<shared_ptr<TCPConnection> >(i->second);
3322 if(g_logCommonErrors)
3323 L<<Logger::Warning<<"Timeout from remote TCP client "<< conn->d_remote.toString() <<endl;
3324 t_fdm->removeReadFD(i->first);
3325 }
3326 }
3327
3328 counter++;
3329
3330 if(!worker) {
3331 if(statsWanted || (g_statisticsInterval > 0 && (g_now.tv_sec - last_stat) >= g_statisticsInterval)) {
3332 doStats();
3333 last_stat = g_now.tv_sec;
3334 }
3335
3336 Utility::gettimeofday(&g_now, 0);
3337
3338 if((g_now.tv_sec - last_carbon) >= carbonInterval) {
3339 MT->makeThread(doCarbonDump, 0);
3340 last_carbon = g_now.tv_sec;
3341 }
3342 }
3343
3344 t_fdm->run(&g_now);
3345 // 'run' updates g_now for us
3346
3347 if(worker && (!g_weDistributeQueries || t_id == s_distributorThreadID)) { // if pdns distributes queries, only tid 0 should do this
3348 if(listenOnTCP) {
3349 if(TCPConnection::getCurrentConnections() > maxTcpClients) { // shutdown, too many connections
3350 for(tcpListenSockets_t::iterator i=g_tcpListenSockets.begin(); i != g_tcpListenSockets.end(); ++i)
3351 t_fdm->removeReadFD(*i);
3352 listenOnTCP=false;
3353 }
3354 }
3355 else {
3356 if(TCPConnection::getCurrentConnections() <= maxTcpClients) { // reenable
3357 for(tcpListenSockets_t::iterator i=g_tcpListenSockets.begin(); i != g_tcpListenSockets.end(); ++i)
3358 t_fdm->addReadFD(*i, handleNewTCPQuestion);
3359 listenOnTCP=true;
3360 }
3361 }
3362 }
3363 }
3364 }
3365 catch(PDNSException &ae) {
3366 L<<Logger::Error<<"Exception: "<<ae.reason<<endl;
3367 return 0;
3368 }
3369 catch(std::exception &e) {
3370 L<<Logger::Error<<"STL Exception: "<<e.what()<<endl;
3371 return 0;
3372 }
3373 catch(...) {
3374 L<<Logger::Error<<"any other exception in main: "<<endl;
3375 return 0;
3376 }
3377
3378
3379 int main(int argc, char **argv)
3380 {
3381 g_argc = argc;
3382 g_argv = argv;
3383 g_stats.startupTime=time(0);
3384 versionSetProduct(ProductRecursor);
3385 reportBasicTypes();
3386 reportOtherTypes();
3387
3388 int ret = EXIT_SUCCESS;
3389
3390 try {
3391 ::arg().set("stack-size","stack size per mthread")="200000";
3392 ::arg().set("soa-minimum-ttl","Don't change")="0";
3393 ::arg().set("no-shuffle","Don't change")="off";
3394 ::arg().set("local-port","port to listen on")="53";
3395 ::arg().set("local-address","IP addresses to listen on, separated by spaces or commas. Also accepts ports.")="127.0.0.1";
3396 ::arg().setSwitch("non-local-bind", "Enable binding to non-local addresses by using FREEBIND / BINDANY socket options")="no";
3397 ::arg().set("trace","if we should output heaps of logging. set to 'fail' to only log failing domains")="off";
3398 ::arg().set("dnssec", "DNSSEC mode: off/process-no-validate (default)/process/log-fail/validate")="process-no-validate";
3399 ::arg().set("dnssec-log-bogus", "Log DNSSEC bogus validations")="no";
3400 ::arg().set("daemon","Operate as a daemon")="no";
3401 ::arg().setSwitch("write-pid","Write a PID file")="yes";
3402 ::arg().set("loglevel","Amount of logging. Higher is more. Do not set below 3")="6";
3403 ::arg().set("disable-syslog","Disable logging to syslog, useful when running inside a supervisor that logs stdout")="no";
3404 ::arg().set("log-timestamp","Print timestamps in log lines, useful to disable when running with a tool that timestamps stdout already")="yes";
3405 ::arg().set("log-common-errors","If we should log rather common errors")="no";
3406 ::arg().set("chroot","switch to chroot jail")="";
3407 ::arg().set("setgid","If set, change group id to this gid for more security")="";
3408 ::arg().set("setuid","If set, change user id to this uid for more security")="";
3409 ::arg().set("network-timeout", "Wait this number of milliseconds for network i/o")="1500";
3410 ::arg().set("threads", "Launch this number of threads")="2";
3411 ::arg().set("processes", "Launch this number of processes (EXPERIMENTAL, DO NOT CHANGE)")="1"; // if we un-experimental this, need to fix openssl rand seeding for multiple PIDs!
3412 ::arg().set("config-name","Name of this virtual configuration - will rename the binary image")="";
3413 ::arg().set("api-config-dir", "Directory where REST API stores config and zones") = "";
3414 ::arg().set("api-key", "Static pre-shared authentication key for access to the REST API") = "";
3415 ::arg().set("api-logfile", "Location of the server logfile (used by the REST API)") = "/var/log/pdns.log";
3416 ::arg().set("api-readonly", "Disallow data modification through the REST API when set") = "no";
3417 ::arg().setSwitch("webserver", "Start a webserver (for REST API)") = "no";
3418 ::arg().set("webserver-address", "IP Address of webserver to listen on") = "127.0.0.1";
3419 ::arg().set("webserver-port", "Port of webserver to listen on") = "8082";
3420 ::arg().set("webserver-password", "Password required for accessing the webserver") = "";
3421 ::arg().set("webserver-allow-from","Webserver access is only allowed from these subnets")="127.0.0.1,::1";
3422 ::arg().set("carbon-ourname", "If set, overrides our reported hostname for carbon stats")="";
3423 ::arg().set("carbon-server", "If set, send metrics in carbon (graphite) format to this server IP address")="";
3424 ::arg().set("carbon-interval", "Number of seconds between carbon (graphite) updates")="30";
3425 ::arg().set("statistics-interval", "Number of seconds between printing of recursor statistics, 0 to disable")="1800";
3426 ::arg().set("quiet","Suppress logging of questions and answers")="";
3427 ::arg().set("logging-facility","Facility to log messages as. 0 corresponds to local0")="";
3428 ::arg().set("config-dir","Location of configuration directory (recursor.conf)")=SYSCONFDIR;
3429 ::arg().set("socket-owner","Owner of socket")="";
3430 ::arg().set("socket-group","Group of socket")="";
3431 ::arg().set("socket-mode", "Permissions for socket")="";
3432
3433 ::arg().set("socket-dir",string("Where the controlsocket will live, ")+LOCALSTATEDIR+" when unset and not chrooted" )="";
3434 ::arg().set("delegation-only","Which domains we only accept delegations from")="";
3435 ::arg().set("query-local-address","Source IP address for sending queries")="0.0.0.0";
3436 ::arg().set("query-local-address6","Source IPv6 address for sending queries. IF UNSET, IPv6 WILL NOT BE USED FOR OUTGOING QUERIES")="";
3437 ::arg().set("client-tcp-timeout","Timeout in seconds when talking to TCP clients")="2";
3438 ::arg().set("max-mthreads", "Maximum number of simultaneous Mtasker threads")="2048";
3439 ::arg().set("max-tcp-clients","Maximum number of simultaneous TCP clients")="128";
3440 ::arg().set("server-down-max-fails","Maximum number of consecutive timeouts (and unreachables) to mark a server as down ( 0 => disabled )")="64";
3441 ::arg().set("server-down-throttle-time","Number of seconds to throttle all queries to a server after being marked as down")="60";
3442 ::arg().set("hint-file", "If set, load root hints from this file")="";
3443 ::arg().set("max-cache-entries", "If set, maximum number of entries in the main cache")="1000000";
3444 ::arg().set("max-negative-ttl", "maximum number of seconds to keep a negative cached entry in memory")="3600";
3445 ::arg().set("max-cache-ttl", "maximum number of seconds to keep a cached entry in memory")="86400";
3446 ::arg().set("packetcache-ttl", "maximum number of seconds to keep a cached entry in packetcache")="3600";
3447 ::arg().set("max-packetcache-entries", "maximum number of entries to keep in the packetcache")="500000";
3448 ::arg().set("packetcache-servfail-ttl", "maximum number of seconds to keep a cached servfail entry in packetcache")="60";
3449 ::arg().set("server-id", "Returned when queried for 'id.server' TXT or NSID, defaults to hostname")="";
3450 ::arg().set("stats-ringbuffer-entries", "maximum number of packets to store statistics for")="10000";
3451 ::arg().set("version-string", "string reported on version.pdns or version.bind")=fullVersionString();
3452 ::arg().set("allow-from", "If set, only allow these comma separated netmasks to recurse")=LOCAL_NETS;
3453 ::arg().set("allow-from-file", "If set, load allowed netmasks from this file")="";
3454 ::arg().set("entropy-source", "If set, read entropy from this file")="/dev/urandom";
3455 ::arg().set("dont-query", "If set, do not query these netmasks for DNS data")=DONT_QUERY;
3456 ::arg().set("max-tcp-per-client", "If set, maximum number of TCP sessions per client (IP address)")="0";
3457 ::arg().set("max-tcp-queries-per-connection", "If set, maximum number of TCP queries in a TCP connection")="0";
3458 ::arg().set("spoof-nearmiss-max", "If non-zero, assume spoofing after this many near misses")="20";
3459 ::arg().set("single-socket", "If set, only use a single socket for outgoing queries")="off";
3460 ::arg().set("auth-zones", "Zones for which we have authoritative data, comma separated domain=file pairs ")="";
3461 ::arg().set("lua-config-file", "More powerful configuration options")="";
3462
3463 ::arg().set("forward-zones", "Zones for which we forward queries, comma separated domain=ip pairs")="";
3464 ::arg().set("forward-zones-recurse", "Zones for which we forward queries with recursion bit, comma separated domain=ip pairs")="";
3465 ::arg().set("forward-zones-file", "File with (+)domain=ip pairs for forwarding")="";
3466 ::arg().set("export-etc-hosts", "If we should serve up contents from /etc/hosts")="off";
3467 ::arg().set("export-etc-hosts-search-suffix", "Also serve up the contents of /etc/hosts with this suffix")="";
3468 ::arg().set("etc-hosts-file", "Path to 'hosts' file")="/etc/hosts";
3469 ::arg().set("serve-rfc1918", "If we should be authoritative for RFC 1918 private IP space")="yes";
3470 ::arg().set("lua-dns-script", "Filename containing an optional 'lua' script that will be used to modify dns answers")="";
3471 ::arg().set("latency-statistic-size","Number of latency values to calculate the qa-latency average")="10000";
3472 ::arg().setSwitch( "disable-packetcache", "Disable packetcache" )= "no";
3473 ::arg().set("ecs-ipv4-bits", "Number of bits of IPv4 address to pass for EDNS Client Subnet")="24";
3474 ::arg().set("ecs-ipv6-bits", "Number of bits of IPv6 address to pass for EDNS Client Subnet")="56";
3475 ::arg().set("edns-subnet-whitelist", "List of netmasks and domains that we should enable EDNS subnet for")="";
3476 ::arg().set("ecs-scope-zero-address", "Address to send to whitelisted authoritative servers for incoming queries with ECS prefix-length source of 0")="";
3477 ::arg().setSwitch( "use-incoming-edns-subnet", "Pass along received EDNS Client Subnet information")="no";
3478 ::arg().setSwitch( "pdns-distributes-queries", "If PowerDNS itself should distribute queries over threads")="yes";
3479 ::arg().setSwitch( "root-nx-trust", "If set, believe that an NXDOMAIN from the root means the TLD does not exist")="yes";
3480 ::arg().setSwitch( "any-to-tcp","Answer ANY queries with tc=1, shunting to TCP" )="no";
3481 ::arg().setSwitch( "lowercase-outgoing","Force outgoing questions to lowercase")="no";
3482 ::arg().setSwitch("gettag-needs-edns-options", "If EDNS Options should be extracted before calling the gettag() hook")="no";
3483 ::arg().set("udp-truncation-threshold", "Maximum UDP response size before we truncate")="1680";
3484 ::arg().set("edns-outgoing-bufsize", "Outgoing EDNS buffer size")="1680";
3485 ::arg().set("minimum-ttl-override", "Set under adverse conditions, a minimum TTL")="0";
3486 ::arg().set("max-qperq", "Maximum outgoing queries per query")="50";
3487 ::arg().set("max-total-msec", "Maximum total wall-clock time per query in milliseconds, 0 for unlimited")="7000";
3488 ::arg().set("max-recursion-depth", "Maximum number of internal recursion calls per query, 0 for unlimited")="40";
3489
3490 ::arg().set("include-dir","Include *.conf files from this directory")="";
3491 ::arg().set("security-poll-suffix","Domain name from which to query security update notifications")="secpoll.powerdns.com.";
3492
3493 ::arg().setSwitch("reuseport","Enable SO_REUSEPORT allowing multiple recursors processes to listen to 1 address")="no";
3494
3495 ::arg().setSwitch("snmp-agent", "If set, register as an SNMP agent")="no";
3496 ::arg().set("snmp-master-socket", "If set and snmp-agent is set, the socket to use to register to the SNMP master")="";
3497
3498 ::arg().set("tcp-fast-open", "Enable TCP Fast Open support on the listening sockets, using the supplied numerical value as the queue size")="0";
3499 ::arg().set("nsec3-max-iterations", "Maximum number of iterations allowed for an NSEC3 record")="2500";
3500
3501 ::arg().set("cpu-map", "Thread to CPU mapping, space separated thread-id=cpu1,cpu2..cpuN pairs")="";
3502
3503 ::arg().setSwitch("log-rpz-changes", "Log additions and removals to RPZ zones at Info level")="no";
3504
3505 ::arg().setCmd("help","Provide a helpful message");
3506 ::arg().setCmd("version","Print version string");
3507 ::arg().setCmd("config","Output blank configuration");
3508 L.toConsole(Logger::Info);
3509 ::arg().laxParse(argc,argv); // do a lax parse
3510
3511 string configname=::arg()["config-dir"]+"/recursor.conf";
3512 if(::arg()["config-name"]!="") {
3513 configname=::arg()["config-dir"]+"/recursor-"+::arg()["config-name"]+".conf";
3514 s_programname+="-"+::arg()["config-name"];
3515 }
3516 cleanSlashes(configname);
3517
3518 if(::arg().mustDo("config")) {
3519 cout<<::arg().configstring()<<endl;
3520 exit(0);
3521 }
3522
3523 if(!::arg().file(configname.c_str()))
3524 L<<Logger::Warning<<"Unable to parse configuration file '"<<configname<<"'"<<endl;
3525
3526 ::arg().parse(argc,argv);
3527
3528 if( !::arg()["chroot"].empty() && !::arg()["api-config-dir"].empty() && !::arg().mustDo("api-readonly") ) {
3529 L<<Logger::Error<<"Using chroot and a writable API is not possible"<<endl;
3530 exit(EXIT_FAILURE);
3531 }
3532
3533 if (::arg()["socket-dir"].empty()) {
3534 if (::arg()["chroot"].empty())
3535 ::arg().set("socket-dir") = LOCALSTATEDIR;
3536 else
3537 ::arg().set("socket-dir") = "/";
3538 }
3539
3540 ::arg().set("delegation-only")=toLower(::arg()["delegation-only"]);
3541
3542 if(::arg().asNum("threads")==1)
3543 ::arg().set("pdns-distributes-queries")="no";
3544
3545 if(::arg().mustDo("help")) {
3546 cout<<"syntax:"<<endl<<endl;
3547 cout<<::arg().helpstring(::arg()["help"])<<endl;
3548 exit(0);
3549 }
3550 if(::arg().mustDo("version")) {
3551 showProductVersion();
3552 showBuildConfiguration();
3553 exit(0);
3554 }
3555
3556 Logger::Urgency logUrgency = (Logger::Urgency)::arg().asNum("loglevel");
3557
3558 if (logUrgency < Logger::Error)
3559 logUrgency = Logger::Error;
3560 if(!g_quiet && logUrgency < Logger::Info) { // Logger::Info=6, Logger::Debug=7
3561 logUrgency = Logger::Info; // if you do --quiet=no, you need Info to also see the query log
3562 }
3563 L.setLoglevel(logUrgency);
3564 L.toConsole(logUrgency);
3565
3566 serviceMain(argc, argv);
3567 }
3568 catch(PDNSException &ae) {
3569 L<<Logger::Error<<"Exception: "<<ae.reason<<endl;
3570 ret=EXIT_FAILURE;
3571 }
3572 catch(std::exception &e) {
3573 L<<Logger::Error<<"STL Exception: "<<e.what()<<endl;
3574 ret=EXIT_FAILURE;
3575 }
3576 catch(...) {
3577 L<<Logger::Error<<"any other exception in main: "<<endl;
3578 ret=EXIT_FAILURE;
3579 }
3580
3581 return ret;
3582 }