]> git.ipfire.org Git - thirdparty/pdns.git/blob - pdns/pdns_recursor.cc
rec: Move carbon/webserver/control/stats handling to a separate thread
[thirdparty/pdns.git] / pdns / pdns_recursor.cc
1 /*
2 * This file is part of PowerDNS or dnsdist.
3 * Copyright -- PowerDNS.COM B.V. and its contributors
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of version 2 of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * In addition, for the avoidance of any doubt, permission is granted to
10 * link this program with OpenSSL and to (re)distribute the binaries
11 * produced as the result of such linking.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21 */
22 #ifdef HAVE_CONFIG_H
23 #include "config.h"
24 #endif
25
26 #include <netdb.h>
27 #include <sys/stat.h>
28 #include <unistd.h>
29
30 #include "ws-recursor.hh"
31 #include <pthread.h>
32 #include "recpacketcache.hh"
33 #include "utility.hh"
34 #include "dns_random.hh"
35 #ifdef HAVE_LIBSODIUM
36 #include <sodium.h>
37 #endif
38 #include "opensslsigners.hh"
39 #include <iostream>
40 #include <errno.h>
41 #include <boost/static_assert.hpp>
42 #include <map>
43 #include <set>
44 #include "recursor_cache.hh"
45 #include "cachecleaner.hh"
46 #include <stdio.h>
47 #include <signal.h>
48 #include <stdlib.h>
49 #include "misc.hh"
50 #include "mtasker.hh"
51 #include <utility>
52 #include "arguments.hh"
53 #include "syncres.hh"
54 #include <fcntl.h>
55 #include <fstream>
56 #include "sortlist.hh"
57 #include "sstuff.hh"
58 #include <boost/tuple/tuple.hpp>
59 #include <boost/tuple/tuple_comparison.hpp>
60 #include <boost/shared_array.hpp>
61 #include <boost/function.hpp>
62 #include <boost/algorithm/string.hpp>
63 #ifdef MALLOC_TRACE
64 #include "malloctrace.hh"
65 #endif
66 #include <netinet/tcp.h>
67 #include "dnsparser.hh"
68 #include "dnswriter.hh"
69 #include "dnsrecords.hh"
70 #include "zoneparser-tng.hh"
71 #include "rec_channel.hh"
72 #include "logger.hh"
73 #include "iputils.hh"
74 #include "mplexer.hh"
75 #include "config.h"
76 #include "lua-recursor4.hh"
77 #include "version.hh"
78 #include "responsestats.hh"
79 #include "secpoll-recursor.hh"
80 #include "dnsname.hh"
81 #include "filterpo.hh"
82 #include "rpzloader.hh"
83 #include "validate-recursor.hh"
84 #include "rec-lua-conf.hh"
85 #include "ednsoptions.hh"
86 #include "gettime.hh"
87
88 #include "rec-protobuf.hh"
89 #include "rec-snmp.hh"
90
91 #ifdef HAVE_SYSTEMD
92 #include <systemd/sd-daemon.h>
93 #endif
94
95 #include "namespaces.hh"
96
97 typedef map<ComboAddress, uint32_t, ComboAddress::addressOnlyLessThan> tcpClientCounts_t;
98
99 static thread_local std::shared_ptr<RecursorLua4> t_pdl;
100 static thread_local int t_id;
101 static thread_local std::shared_ptr<Regex> t_traceRegex;
102 static thread_local std::unique_ptr<tcpClientCounts_t> t_tcpClientCounts;
103
104 thread_local std::unique_ptr<MT_t> MT; // the big MTasker
105 thread_local std::unique_ptr<MemRecursorCache> t_RC;
106 thread_local std::unique_ptr<RecursorPacketCache> t_packetCache;
107 thread_local FDMultiplexer* t_fdm{nullptr};
108 thread_local std::unique_ptr<addrringbuf_t> t_remotes, t_servfailremotes, t_largeanswerremotes;
109 thread_local std::unique_ptr<boost::circular_buffer<pair<DNSName, uint16_t> > > t_queryring, t_servfailqueryring;
110 thread_local std::shared_ptr<NetmaskGroup> t_allowFrom;
111 #ifdef HAVE_PROTOBUF
112 thread_local std::unique_ptr<boost::uuids::random_generator> t_uuidGenerator;
113 #endif
114 __thread struct timeval g_now; // timestamp, updated (too) frequently
115
116 // for communicating with our threads
117 struct ThreadPipeSet
118 {
119 int writeToThread;
120 int readToThread;
121 int writeFromThread;
122 int readFromThread;
123 };
124
125 typedef vector<int> tcpListenSockets_t;
126 typedef map<int, ComboAddress> listenSocketsAddresses_t; // is shared across all threads right now
127 typedef vector<pair<int, function< void(int, any&) > > > deferredAdd_t;
128
129 static const ComboAddress g_local4("0.0.0.0"), g_local6("::");
130 static vector<ThreadPipeSet> g_pipes; // effectively readonly after startup
131 static tcpListenSockets_t g_tcpListenSockets; // shared across threads, but this is fine, never written to from a thread. All threads listen on all sockets
132 static listenSocketsAddresses_t g_listenSocketsAddresses; // is shared across all threads right now
133 static std::unordered_map<unsigned int, deferredAdd_t> deferredAdds;
134 static set<int> g_fromtosockets; // listen sockets that use 'sendfromto()' mechanism
135 static vector<ComboAddress> g_localQueryAddresses4, g_localQueryAddresses6;
136 static AtomicCounter counter;
137 static std::shared_ptr<SyncRes::domainmap_t> g_initialDomainMap; // new threads needs this to be setup
138 static std::shared_ptr<NetmaskGroup> g_initialAllowFrom; // new thread needs to be setup with this
139 static size_t g_tcpMaxQueriesPerConn;
140 static uint64_t g_latencyStatSize;
141 static uint32_t g_disthashseed;
142 static unsigned int g_maxTCPPerClient;
143 static unsigned int g_networkTimeoutMsec;
144 static unsigned int g_maxMThreads;
145 static unsigned int g_numWorkerThreads;
146 static int g_tcpTimeout;
147 static uint16_t g_udpTruncationThreshold;
148 static std::atomic<bool> statsWanted;
149 static std::atomic<bool> g_quiet;
150 static bool g_logCommonErrors;
151 static bool g_anyToTcp;
152 static bool g_weDistributeQueries; // if true, only 1 thread listens on the incoming query sockets
153 static bool g_reusePort{false};
154 static bool g_useOneSocketPerThread;
155 static bool g_gettagNeedsEDNSOptions{false};
156 static time_t g_statisticsInterval;
157 static bool g_useIncomingECS;
158 std::atomic<uint32_t> g_maxCacheEntries, g_maxPacketCacheEntries;
159
160 RecursorControlChannel s_rcc; // only active in thread 0
161 RecursorStats g_stats;
162 string s_programname="pdns_recursor";
163 string s_pidfname;
164 bool g_lowercaseOutgoing;
165 unsigned int g_numThreads;
166 uint16_t g_outgoingEDNSBufsize;
167 bool g_logRPZChanges{false};
168
169 #define LOCAL_NETS "127.0.0.0/8, 10.0.0.0/8, 100.64.0.0/10, 169.254.0.0/16, 192.168.0.0/16, 172.16.0.0/12, ::1/128, fc00::/7, fe80::/10"
170 // Bad Nets taken from both:
171 // http://www.iana.org/assignments/iana-ipv4-special-registry/iana-ipv4-special-registry.xhtml
172 // and
173 // http://www.iana.org/assignments/iana-ipv6-special-registry/iana-ipv6-special-registry.xhtml
174 // where such a network may not be considered a valid destination
175 #define BAD_NETS "0.0.0.0/8, 192.0.0.0/24, 192.0.2.0/24, 198.51.100.0/24, 203.0.113.0/24, 240.0.0.0/4, ::/96, ::ffff:0:0/96, 100::/64, 2001:db8::/32"
176 #define DONT_QUERY LOCAL_NETS ", " BAD_NETS
177
178 //! used to send information to a newborn mthread
179 struct DNSComboWriter {
180 DNSComboWriter(const char* data, uint16_t len, const struct timeval& now) : d_mdp(true, data, len), d_now(now),
181 d_tcp(false), d_socket(-1)
182 {}
183 MOADNSParser d_mdp;
184 void setRemote(const ComboAddress* sa)
185 {
186 d_remote=*sa;
187 }
188
189 void setLocal(const ComboAddress& sa)
190 {
191 d_local=sa;
192 }
193
194
195 void setSocket(int sock)
196 {
197 d_socket=sock;
198 }
199
200 string getRemote() const
201 {
202 return d_remote.toString();
203 }
204
205 struct timeval d_now;
206 ComboAddress d_remote, d_local;
207 #ifdef HAVE_PROTOBUF
208 boost::uuids::uuid d_uuid;
209 string d_requestorId;
210 string d_deviceId;
211 #endif
212 EDNSSubnetOpts d_ednssubnet;
213 bool d_ecsFound{false};
214 bool d_ecsParsed{false};
215 bool d_tcp;
216 int d_socket;
217 unsigned int d_tag{0};
218 uint32_t d_qhash{0};
219 string d_query;
220 shared_ptr<TCPConnection> d_tcpConnection;
221 vector<pair<uint16_t, string> > d_ednsOpts;
222 std::vector<std::string> d_policyTags;
223 LuaContext::LuaObject d_data;
224 uint32_t d_ttlCap{std::numeric_limits<uint32_t>::max()};
225 bool d_variable{false};
226 };
227
228 MT_t* getMT()
229 {
230 return MT ? MT.get() : nullptr;
231 }
232
233 ArgvMap &arg()
234 {
235 static ArgvMap theArg;
236 return theArg;
237 }
238
239 unsigned int getRecursorThreadId()
240 {
241 return static_cast<unsigned int>(t_id);
242 }
243
244 int getMTaskerTID()
245 {
246 return MT->getTid();
247 }
248
249 static void handleTCPClientWritable(int fd, FDMultiplexer::funcparam_t& var);
250
251 // -1 is error, 0 is timeout, 1 is success
252 int asendtcp(const string& data, Socket* sock)
253 {
254 PacketID pident;
255 pident.sock=sock;
256 pident.outMSG=data;
257
258 t_fdm->addWriteFD(sock->getHandle(), handleTCPClientWritable, pident);
259 string packet;
260
261 int ret=MT->waitEvent(pident, &packet, g_networkTimeoutMsec);
262
263 if(!ret || ret==-1) { // timeout
264 t_fdm->removeWriteFD(sock->getHandle());
265 }
266 else if(packet.size() !=data.size()) { // main loop tells us what it sent out, or empty in case of an error
267 return -1;
268 }
269 return ret;
270 }
271
272 static void handleTCPClientReadable(int fd, FDMultiplexer::funcparam_t& var);
273
274 // -1 is error, 0 is timeout, 1 is success
275 int arecvtcp(string& data, size_t len, Socket* sock, bool incompleteOkay)
276 {
277 data.clear();
278 PacketID pident;
279 pident.sock=sock;
280 pident.inNeeded=len;
281 pident.inIncompleteOkay=incompleteOkay;
282 t_fdm->addReadFD(sock->getHandle(), handleTCPClientReadable, pident);
283
284 int ret=MT->waitEvent(pident,&data, g_networkTimeoutMsec);
285 if(!ret || ret==-1) { // timeout
286 t_fdm->removeReadFD(sock->getHandle());
287 }
288 else if(data.empty()) {// error, EOF or other
289 return -1;
290 }
291
292 return ret;
293 }
294
295 static void handleGenUDPQueryResponse(int fd, FDMultiplexer::funcparam_t& var)
296 {
297 PacketID pident=*any_cast<PacketID>(&var);
298 char resp[512];
299 ssize_t ret=recv(fd, resp, sizeof(resp), 0);
300 t_fdm->removeReadFD(fd);
301 if(ret >= 0) {
302 string data(resp, (size_t) ret);
303 MT->sendEvent(pident, &data);
304 }
305 else {
306 string empty;
307 MT->sendEvent(pident, &empty);
308 // cerr<<"Had some kind of error: "<<ret<<", "<<strerror(errno)<<endl;
309 }
310 }
311 string GenUDPQueryResponse(const ComboAddress& dest, const string& query)
312 {
313 Socket s(dest.sin4.sin_family, SOCK_DGRAM);
314 s.setNonBlocking();
315 ComboAddress local = getQueryLocalAddress(dest.sin4.sin_family, 0);
316
317 s.bind(local);
318 s.connect(dest);
319 s.send(query);
320
321 PacketID pident;
322 pident.sock=&s;
323 pident.type=0;
324 t_fdm->addReadFD(s.getHandle(), handleGenUDPQueryResponse, pident);
325
326 string data;
327
328 int ret=MT->waitEvent(pident,&data, g_networkTimeoutMsec);
329
330 if(!ret || ret==-1) { // timeout
331 t_fdm->removeReadFD(s.getHandle());
332 }
333 else if(data.empty()) {// error, EOF or other
334 // we could special case this
335 return data;
336 }
337 return data;
338 }
339
340 //! pick a random query local address
341 ComboAddress getQueryLocalAddress(int family, uint16_t port)
342 {
343 ComboAddress ret;
344 if(family==AF_INET) {
345 if(g_localQueryAddresses4.empty())
346 ret = g_local4;
347 else
348 ret = g_localQueryAddresses4[dns_random(g_localQueryAddresses4.size())];
349 ret.sin4.sin_port = htons(port);
350 }
351 else {
352 if(g_localQueryAddresses6.empty())
353 ret = g_local6;
354 else
355 ret = g_localQueryAddresses6[dns_random(g_localQueryAddresses6.size())];
356
357 ret.sin6.sin6_port = htons(port);
358 }
359 return ret;
360 }
361
362 static void handleUDPServerResponse(int fd, FDMultiplexer::funcparam_t&);
363
364 static void setSocketBuffer(int fd, int optname, uint32_t size)
365 {
366 uint32_t psize=0;
367 socklen_t len=sizeof(psize);
368
369 if(!getsockopt(fd, SOL_SOCKET, optname, (char*)&psize, &len) && psize > size) {
370 L<<Logger::Error<<"Not decreasing socket buffer size from "<<psize<<" to "<<size<<endl;
371 return;
372 }
373
374 if (setsockopt(fd, SOL_SOCKET, optname, (char*)&size, sizeof(size)) < 0 )
375 L<<Logger::Error<<"Unable to raise socket buffer size to "<<size<<": "<<strerror(errno)<<endl;
376 }
377
378
379 static void setSocketReceiveBuffer(int fd, uint32_t size)
380 {
381 setSocketBuffer(fd, SO_RCVBUF, size);
382 }
383
384 static void setSocketSendBuffer(int fd, uint32_t size)
385 {
386 setSocketBuffer(fd, SO_SNDBUF, size);
387 }
388
389
390 // you can ask this class for a UDP socket to send a query from
391 // this socket is not yours, don't even think about deleting it
392 // but after you call 'returnSocket' on it, don't assume anything anymore
393 class UDPClientSocks
394 {
395 unsigned int d_numsocks;
396 public:
397 UDPClientSocks() : d_numsocks(0)
398 {
399 }
400
401 typedef set<int> socks_t;
402 socks_t d_socks;
403
404 // returning -2 means: temporary OS error (ie, out of files), -1 means error related to remote
405 int getSocket(const ComboAddress& toaddr, int* fd)
406 {
407 *fd=makeClientSocket(toaddr.sin4.sin_family);
408 if(*fd < 0) // temporary error - receive exception otherwise
409 return -2;
410
411 if(connect(*fd, (struct sockaddr*)(&toaddr), toaddr.getSocklen()) < 0) {
412 int err = errno;
413 // returnSocket(*fd);
414 try {
415 closesocket(*fd);
416 }
417 catch(const PDNSException& e) {
418 L<<Logger::Error<<"Error closing UDP socket after connect() failed: "<<e.reason<<endl;
419 }
420
421 if(err==ENETUNREACH) // Seth "My Interfaces Are Like A Yo Yo" Arnold special
422 return -2;
423 return -1;
424 }
425
426 d_socks.insert(*fd);
427 d_numsocks++;
428 return 0;
429 }
430
431 void returnSocket(int fd)
432 {
433 socks_t::iterator i=d_socks.find(fd);
434 if(i==d_socks.end()) {
435 throw PDNSException("Trying to return a socket (fd="+std::to_string(fd)+") not in the pool");
436 }
437 returnSocketLocked(i);
438 }
439
440 // return a socket to the pool, or simply erase it
441 void returnSocketLocked(socks_t::iterator& i)
442 {
443 if(i==d_socks.end()) {
444 throw PDNSException("Trying to return a socket not in the pool");
445 }
446 try {
447 t_fdm->removeReadFD(*i);
448 }
449 catch(FDMultiplexerException& e) {
450 // we sometimes return a socket that has not yet been assigned to t_fdm
451 }
452 try {
453 closesocket(*i);
454 }
455 catch(const PDNSException& e) {
456 L<<Logger::Error<<"Error closing returned UDP socket: "<<e.reason<<endl;
457 }
458
459 d_socks.erase(i++);
460 --d_numsocks;
461 }
462
463 // returns -1 for errors which might go away, throws for ones that won't
464 static int makeClientSocket(int family)
465 {
466 int ret=socket(family, SOCK_DGRAM, 0 ); // turns out that setting CLO_EXEC and NONBLOCK from here is not a performance win on Linux (oddly enough)
467
468 if(ret < 0 && errno==EMFILE) // this is not a catastrophic error
469 return ret;
470
471 if(ret<0)
472 throw PDNSException("Making a socket for resolver (family = "+std::to_string(family)+"): "+stringerror());
473
474 // setCloseOnExec(ret); // we're not going to exec
475
476 int tries=10;
477 ComboAddress sin;
478 while(--tries) {
479 uint16_t port;
480
481 if(tries==1) // fall back to kernel 'random'
482 port = 0;
483 else
484 port = 1025 + dns_random(64510);
485
486 sin=getQueryLocalAddress(family, port); // does htons for us
487
488 if (::bind(ret, (struct sockaddr *)&sin, sin.getSocklen()) >= 0)
489 break;
490 }
491 if(!tries)
492 throw PDNSException("Resolver binding to local query client socket on "+sin.toString()+": "+stringerror());
493
494 setNonBlocking(ret);
495 return ret;
496 }
497 };
498
499 static thread_local std::unique_ptr<UDPClientSocks> t_udpclientsocks;
500
501 /* these two functions are used by LWRes */
502 // -2 is OS error, -1 is error that depends on the remote, > 0 is success
503 int asendto(const char *data, size_t len, int flags,
504 const ComboAddress& toaddr, uint16_t id, const DNSName& domain, uint16_t qtype, int* fd)
505 {
506
507 PacketID pident;
508 pident.domain = domain;
509 pident.remote = toaddr;
510 pident.type = qtype;
511
512 // see if there is an existing outstanding request we can chain on to, using partial equivalence function
513 pair<MT_t::waiters_t::iterator, MT_t::waiters_t::iterator> chain=MT->d_waiters.equal_range(pident, PacketIDBirthdayCompare());
514
515 for(; chain.first != chain.second; chain.first++) {
516 if(chain.first->key.fd > -1) { // don't chain onto existing chained waiter!
517 /*
518 cerr<<"Orig: "<<pident.domain<<", "<<pident.remote.toString()<<", id="<<id<<endl;
519 cerr<<"Had hit: "<< chain.first->key.domain<<", "<<chain.first->key.remote.toString()<<", id="<<chain.first->key.id
520 <<", count="<<chain.first->key.chain.size()<<", origfd: "<<chain.first->key.fd<<endl;
521 */
522 chain.first->key.chain.insert(id); // we can chain
523 *fd=-1; // gets used in waitEvent / sendEvent later on
524 return 1;
525 }
526 }
527
528 int ret=t_udpclientsocks->getSocket(toaddr, fd);
529 if(ret < 0)
530 return ret;
531
532 pident.fd=*fd;
533 pident.id=id;
534
535 t_fdm->addReadFD(*fd, handleUDPServerResponse, pident);
536 ret = send(*fd, data, len, 0);
537
538 int tmp = errno;
539
540 if(ret < 0)
541 t_udpclientsocks->returnSocket(*fd);
542
543 errno = tmp; // this is for logging purposes only
544 return ret;
545 }
546
547 // -1 is error, 0 is timeout, 1 is success
548 int arecvfrom(char *data, size_t len, int flags, const ComboAddress& fromaddr, size_t *d_len,
549 uint16_t id, const DNSName& domain, uint16_t qtype, int fd, struct timeval* now)
550 {
551 static optional<unsigned int> nearMissLimit;
552 if(!nearMissLimit)
553 nearMissLimit=::arg().asNum("spoof-nearmiss-max");
554
555 PacketID pident;
556 pident.fd=fd;
557 pident.id=id;
558 pident.domain=domain;
559 pident.type = qtype;
560 pident.remote=fromaddr;
561
562 string packet;
563 int ret=MT->waitEvent(pident, &packet, g_networkTimeoutMsec, now);
564
565 if(ret > 0) {
566 if(packet.empty()) // means "error"
567 return -1;
568
569 *d_len=packet.size();
570 memcpy(data,packet.c_str(),min(len,*d_len));
571 if(*nearMissLimit && pident.nearMisses > *nearMissLimit) {
572 L<<Logger::Error<<"Too many ("<<pident.nearMisses<<" > "<<*nearMissLimit<<") bogus answers for '"<<domain<<"' from "<<fromaddr.toString()<<", assuming spoof attempt."<<endl;
573 g_stats.spoofCount++;
574 return -1;
575 }
576 }
577 else {
578 if(fd >= 0)
579 t_udpclientsocks->returnSocket(fd);
580 }
581 return ret;
582 }
583
584 static void writePid(void)
585 {
586 if(!::arg().mustDo("write-pid"))
587 return;
588 ofstream of(s_pidfname.c_str(), std::ios_base::app);
589 if(of)
590 of<< Utility::getpid() <<endl;
591 else
592 L<<Logger::Error<<"Writing pid for "<<Utility::getpid()<<" to "<<s_pidfname<<" failed: "<<strerror(errno)<<endl;
593 }
594
595 TCPConnection::TCPConnection(int fd, const ComboAddress& addr) : d_remote(addr), d_fd(fd)
596 {
597 ++s_currentConnections;
598 (*t_tcpClientCounts)[d_remote]++;
599 }
600
601 TCPConnection::~TCPConnection()
602 {
603 try {
604 if(closesocket(d_fd) < 0)
605 L<<Logger::Error<<"Error closing socket for TCPConnection"<<endl;
606 }
607 catch(const PDNSException& e) {
608 L<<Logger::Error<<"Error closing TCPConnection socket: "<<e.reason<<endl;
609 }
610
611 if(t_tcpClientCounts->count(d_remote) && !(*t_tcpClientCounts)[d_remote]--)
612 t_tcpClientCounts->erase(d_remote);
613 --s_currentConnections;
614 }
615
616 AtomicCounter TCPConnection::s_currentConnections;
617
618 static void handleRunningTCPQuestion(int fd, FDMultiplexer::funcparam_t& var);
619
620 // the idea is, only do things that depend on the *response* here. Incoming accounting is on incoming.
621 static void updateResponseStats(int res, const ComboAddress& remote, unsigned int packetsize, const DNSName* query, uint16_t qtype)
622 {
623 if(packetsize > 1000 && t_largeanswerremotes)
624 t_largeanswerremotes->push_back(remote);
625 switch(res) {
626 case RCode::ServFail:
627 if(t_servfailremotes) {
628 t_servfailremotes->push_back(remote);
629 if(query && t_servfailqueryring) // packet cache
630 t_servfailqueryring->push_back(make_pair(*query, qtype));
631 }
632 g_stats.servFails++;
633 break;
634 case RCode::NXDomain:
635 g_stats.nxDomains++;
636 break;
637 case RCode::NoError:
638 g_stats.noErrors++;
639 break;
640 }
641 }
642
643 static string makeLoginfo(DNSComboWriter* dc)
644 try
645 {
646 return "("+dc->d_mdp.d_qname.toLogString()+"/"+DNSRecordContent::NumberToType(dc->d_mdp.d_qtype)+" from "+(dc->d_remote.toString())+")";
647 }
648 catch(...)
649 {
650 return "Exception making error message for exception";
651 }
652
653 #ifdef HAVE_PROTOBUF
654 static void protobufLogQuery(const std::shared_ptr<RemoteLogger>& logger, uint8_t maskV4, uint8_t maskV6, const boost::uuids::uuid& uniqueId, const ComboAddress& remote, const ComboAddress& local, const Netmask& ednssubnet, bool tcp, uint16_t id, size_t len, const DNSName& qname, uint16_t qtype, uint16_t qclass, const std::vector<std::string>& policyTags, const std::string& requestorId, const std::string& deviceId)
655 {
656 Netmask requestorNM(remote, remote.sin4.sin_family == AF_INET ? maskV4 : maskV6);
657 const ComboAddress& requestor = requestorNM.getMaskedNetwork();
658 RecProtoBufMessage message(DNSProtoBufMessage::Query, uniqueId, &requestor, &local, qname, qtype, qclass, id, tcp, len);
659 message.setEDNSSubnet(ednssubnet, ednssubnet.isIpv4() ? maskV4 : maskV6);
660 message.setRequestorId(requestorId);
661 message.setDeviceId(deviceId);
662
663 if (!policyTags.empty()) {
664 message.setPolicyTags(policyTags);
665 }
666
667 // cerr <<message.toDebugString()<<endl;
668 std::string str;
669 message.serialize(str);
670 logger->queueData(str);
671 }
672
673 static void protobufLogResponse(const std::shared_ptr<RemoteLogger>& logger, const RecProtoBufMessage& message)
674 {
675 // cerr <<message.toDebugString()<<endl;
676 std::string str;
677 message.serialize(str);
678 logger->queueData(str);
679 }
680 #endif
681
682 /**
683 * Chases the CNAME provided by the PolicyCustom RPZ policy.
684 *
685 * @param spoofed: The DNSRecord that was created by the policy, should already be added to ret
686 * @param qtype: The QType of the original query
687 * @param sr: A SyncRes
688 * @param res: An integer that will contain the RCODE of the lookup we do
689 * @param ret: A vector of DNSRecords where the result of the CNAME chase should be appended to
690 */
691 static void handleRPZCustom(const DNSRecord& spoofed, const QType& qtype, SyncRes& sr, int& res, vector<DNSRecord>& ret)
692 {
693 if (spoofed.d_type == QType::CNAME) {
694 bool oldWantsRPZ = sr.getWantsRPZ();
695 sr.setWantsRPZ(false);
696 vector<DNSRecord> ans;
697 res = sr.beginResolve(DNSName(spoofed.d_content->getZoneRepresentation()), qtype, 1, ans);
698 for (const auto& rec : ans) {
699 if(rec.d_place == DNSResourceRecord::ANSWER) {
700 ret.push_back(rec);
701 }
702 }
703 // Reset the RPZ state of the SyncRes
704 sr.setWantsRPZ(oldWantsRPZ);
705 }
706 }
707
708 static bool addRecordToPacket(DNSPacketWriter& pw, const DNSRecord& rec, uint32_t& minTTL, uint32_t ttlCap, const uint16_t maxAnswerSize)
709 {
710 pw.startRecord(rec.d_name, rec.d_type, (rec.d_ttl > ttlCap ? ttlCap : rec.d_ttl), rec.d_class, rec.d_place);
711
712 if(rec.d_type != QType::OPT) // their TTL ain't real
713 minTTL = min(minTTL, rec.d_ttl);
714
715 rec.d_content->toPacket(pw);
716 if(pw.size() > static_cast<size_t>(maxAnswerSize)) {
717 pw.rollback();
718 if(rec.d_place != DNSResourceRecord::ADDITIONAL) {
719 pw.getHeader()->tc=1;
720 pw.truncate();
721 }
722 return false;
723 }
724
725 return true;
726 }
727
728 static void startDoResolve(void *p)
729 {
730 DNSComboWriter* dc=(DNSComboWriter *)p;
731 try {
732 if (t_queryring)
733 t_queryring->push_back(make_pair(dc->d_mdp.d_qname, dc->d_mdp.d_qtype));
734
735 uint16_t maxanswersize = dc->d_tcp ? 65535 : min(static_cast<uint16_t>(512), g_udpTruncationThreshold);
736 EDNSOpts edo;
737 bool haveEDNS=false;
738 if(getEDNSOpts(dc->d_mdp, &edo)) {
739 if(!dc->d_tcp) {
740 /* rfc6891 6.2.3:
741 "Values lower than 512 MUST be treated as equal to 512."
742 */
743 maxanswersize = min(static_cast<uint16_t>(edo.d_packetsize >= 512 ? edo.d_packetsize : 512), g_udpTruncationThreshold);
744 }
745 dc->d_ednsOpts = edo.d_options;
746 haveEDNS=true;
747
748 if (g_useIncomingECS && !dc->d_ecsParsed) {
749 for (const auto& o : edo.d_options) {
750 if (o.first == EDNSOptionCode::ECS) {
751 dc->d_ecsFound = getEDNSSubnetOptsFromString(o.second, &dc->d_ednssubnet);
752 break;
753 }
754 }
755 }
756 }
757 /* perhaps there was no EDNS or no ECS but by now we looked */
758 dc->d_ecsParsed = true;
759 vector<DNSRecord> ret;
760 vector<uint8_t> packet;
761
762 auto luaconfsLocal = g_luaconfs.getLocal();
763 // Used to tell syncres later on if we should apply NSDNAME and NSIP RPZ triggers for this query
764 bool wantsRPZ(true);
765 RecProtoBufMessage pbMessage(RecProtoBufMessage::Response);
766 #ifdef HAVE_PROTOBUF
767 if (luaconfsLocal->protobufServer) {
768 Netmask requestorNM(dc->d_remote, dc->d_remote.sin4.sin_family == AF_INET ? luaconfsLocal->protobufMaskV4 : luaconfsLocal->protobufMaskV6);
769 const ComboAddress& requestor = requestorNM.getMaskedNetwork();
770 pbMessage.update(dc->d_uuid, &requestor, &dc->d_local, dc->d_tcp, dc->d_mdp.d_header.id);
771 pbMessage.setEDNSSubnet(dc->d_ednssubnet.source, dc->d_ednssubnet.source.isIpv4() ? luaconfsLocal->protobufMaskV4 : luaconfsLocal->protobufMaskV6);
772 pbMessage.setQuestion(dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_mdp.d_qclass);
773 }
774 #endif /* HAVE_PROTOBUF */
775
776 DNSPacketWriter pw(packet, dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_mdp.d_qclass);
777
778 pw.getHeader()->aa=0;
779 pw.getHeader()->ra=1;
780 pw.getHeader()->qr=1;
781 pw.getHeader()->tc=0;
782 pw.getHeader()->id=dc->d_mdp.d_header.id;
783 pw.getHeader()->rd=dc->d_mdp.d_header.rd;
784 pw.getHeader()->cd=dc->d_mdp.d_header.cd;
785
786 /* This is the lowest TTL seen in the records of the response,
787 so we can't cache it for longer than this value.
788 If we have a TTL cap, this value can't be larger than the
789 cap no matter what. */
790 uint32_t minTTL = dc->d_ttlCap;
791
792 SyncRes sr(dc->d_now);
793
794 bool DNSSECOK=false;
795 if(t_pdl) {
796 sr.setLuaEngine(t_pdl);
797 }
798 sr.d_requestor=dc->d_remote; // ECS needs this too
799 if(g_dnssecmode != DNSSECMode::Off) {
800 sr.setDoDNSSEC(true);
801
802 // Does the requestor want DNSSEC records?
803 if(edo.d_Z & EDNSOpts::DNSSECOK) {
804 DNSSECOK=true;
805 g_stats.dnssecQueries++;
806 }
807 } else {
808 // Ignore the client-set CD flag
809 pw.getHeader()->cd=0;
810 }
811 sr.setDNSSECValidationRequested(g_dnssecmode == DNSSECMode::ValidateAll || g_dnssecmode==DNSSECMode::ValidateForLog || ((dc->d_mdp.d_header.ad || DNSSECOK) && g_dnssecmode==DNSSECMode::Process));
812
813 #ifdef HAVE_PROTOBUF
814 sr.setInitialRequestId(dc->d_uuid);
815 #endif
816
817 if (g_useIncomingECS) {
818 sr.setIncomingECSFound(dc->d_ecsFound);
819 if (dc->d_ecsFound) {
820 sr.setIncomingECS(dc->d_ednssubnet);
821 }
822 }
823
824 bool tracedQuery=false; // we could consider letting Lua know about this too
825 bool variableAnswer = dc->d_variable;
826 bool shouldNotValidate = false;
827
828 /* preresolve expects res (dq.rcode) to be set to RCode::NoError by default */
829 int res = RCode::NoError;
830 DNSFilterEngine::Policy appliedPolicy;
831 DNSRecord spoofed;
832 RecursorLua4::DNSQuestion dq(dc->d_remote, dc->d_local, dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_tcp, variableAnswer, wantsRPZ);
833 dq.ednsFlags = &edo.d_Z;
834 dq.ednsOptions = &dc->d_ednsOpts;
835 dq.tag = dc->d_tag;
836 dq.discardedPolicies = &sr.d_discardedPolicies;
837 dq.policyTags = &dc->d_policyTags;
838 dq.appliedPolicy = &appliedPolicy;
839 dq.currentRecords = &ret;
840 dq.dh = &dc->d_mdp.d_header;
841 dq.data = dc->d_data;
842 #ifdef HAVE_PROTOBUF
843 dq.requestorId = dc->d_requestorId;
844 dq.deviceId = dc->d_deviceId;
845 #endif
846
847 if(dc->d_mdp.d_qtype==QType::ANY && !dc->d_tcp && g_anyToTcp) {
848 pw.getHeader()->tc = 1;
849 res = 0;
850 variableAnswer = true;
851 goto sendit;
852 }
853
854 if(t_traceRegex && t_traceRegex->match(dc->d_mdp.d_qname.toString())) {
855 sr.setLogMode(SyncRes::Store);
856 tracedQuery=true;
857 }
858
859
860 if(!g_quiet || tracedQuery) {
861 L<<Logger::Warning<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] " << (dc->d_tcp ? "TCP " : "") << "question for '"<<dc->d_mdp.d_qname<<"|"
862 <<DNSRecordContent::NumberToType(dc->d_mdp.d_qtype)<<"' from "<<dc->getRemote();
863 if(!dc->d_ednssubnet.source.empty()) {
864 L<<" (ecs "<<dc->d_ednssubnet.source.toString()<<")";
865 }
866 L<<endl;
867 }
868
869 sr.setId(MT->getTid());
870 if(!dc->d_mdp.d_header.rd)
871 sr.setCacheOnly();
872
873 if (t_pdl) {
874 t_pdl->prerpz(dq, res);
875 }
876
877 // Check if the query has a policy attached to it
878 if (wantsRPZ) {
879 appliedPolicy = luaconfsLocal->dfe.getQueryPolicy(dc->d_mdp.d_qname, dc->d_remote, sr.d_discardedPolicies);
880 }
881
882 // if there is a RecursorLua active, and it 'took' the query in preResolve, we don't launch beginResolve
883 if(!t_pdl || !t_pdl->preresolve(dq, res)) {
884
885 sr.setWantsRPZ(wantsRPZ);
886 if(wantsRPZ) {
887 switch(appliedPolicy.d_kind) {
888 case DNSFilterEngine::PolicyKind::NoAction:
889 break;
890 case DNSFilterEngine::PolicyKind::Drop:
891 g_stats.policyDrops++;
892 g_stats.policyResults[appliedPolicy.d_kind]++;
893 delete dc;
894 dc=0;
895 return;
896 case DNSFilterEngine::PolicyKind::NXDOMAIN:
897 g_stats.policyResults[appliedPolicy.d_kind]++;
898 res=RCode::NXDomain;
899 goto haveAnswer;
900 case DNSFilterEngine::PolicyKind::NODATA:
901 g_stats.policyResults[appliedPolicy.d_kind]++;
902 res=RCode::NoError;
903 goto haveAnswer;
904 case DNSFilterEngine::PolicyKind::Custom:
905 g_stats.policyResults[appliedPolicy.d_kind]++;
906 res=RCode::NoError;
907 spoofed=appliedPolicy.getCustomRecord(dc->d_mdp.d_qname);
908 ret.push_back(spoofed);
909 handleRPZCustom(spoofed, QType(dc->d_mdp.d_qtype), sr, res, ret);
910 goto haveAnswer;
911 case DNSFilterEngine::PolicyKind::Truncate:
912 if(!dc->d_tcp) {
913 g_stats.policyResults[appliedPolicy.d_kind]++;
914 res=RCode::NoError;
915 pw.getHeader()->tc=1;
916 goto haveAnswer;
917 }
918 break;
919 }
920 }
921
922 // Query got not handled for QNAME Policy reasons, now actually go out to find an answer
923 try {
924 res = sr.beginResolve(dc->d_mdp.d_qname, QType(dc->d_mdp.d_qtype), dc->d_mdp.d_qclass, ret);
925 shouldNotValidate = sr.wasOutOfBand();
926 }
927 catch(ImmediateServFailException &e) {
928 if(g_logCommonErrors)
929 L<<Logger::Notice<<"Sending SERVFAIL to "<<dc->getRemote()<<" during resolve of '"<<dc->d_mdp.d_qname<<"' because: "<<e.reason<<endl;
930 res = RCode::ServFail;
931 }
932
933 dq.validationState = sr.getValidationState();
934
935 // During lookup, an NSDNAME or NSIP trigger was hit in RPZ
936 if (res == -2) { // XXX This block should be macro'd, it is repeated post-resolve.
937 appliedPolicy = sr.d_appliedPolicy;
938 g_stats.policyResults[appliedPolicy.d_kind]++;
939 switch(appliedPolicy.d_kind) {
940 case DNSFilterEngine::PolicyKind::NoAction: // This can never happen
941 throw PDNSException("NoAction policy returned while a NSDNAME or NSIP trigger was hit");
942 case DNSFilterEngine::PolicyKind::Drop:
943 g_stats.policyDrops++;
944 delete dc;
945 dc=0;
946 return;
947 case DNSFilterEngine::PolicyKind::NXDOMAIN:
948 ret.clear();
949 res=RCode::NXDomain;
950 goto haveAnswer;
951
952 case DNSFilterEngine::PolicyKind::NODATA:
953 ret.clear();
954 res=RCode::NoError;
955 goto haveAnswer;
956
957 case DNSFilterEngine::PolicyKind::Truncate:
958 if(!dc->d_tcp) {
959 ret.clear();
960 res=RCode::NoError;
961 pw.getHeader()->tc=1;
962 goto haveAnswer;
963 }
964 break;
965
966 case DNSFilterEngine::PolicyKind::Custom:
967 ret.clear();
968 res=RCode::NoError;
969 spoofed=appliedPolicy.getCustomRecord(dc->d_mdp.d_qname);
970 ret.push_back(spoofed);
971 handleRPZCustom(spoofed, QType(dc->d_mdp.d_qtype), sr, res, ret);
972 goto haveAnswer;
973 }
974 }
975
976 if (wantsRPZ) {
977 appliedPolicy = luaconfsLocal->dfe.getPostPolicy(ret, sr.d_discardedPolicies);
978 }
979
980 if(t_pdl) {
981 if(res == RCode::NoError) {
982 auto i=ret.cbegin();
983 for(; i!= ret.cend(); ++i)
984 if(i->d_type == dc->d_mdp.d_qtype && i->d_place == DNSResourceRecord::ANSWER)
985 break;
986 if(i == ret.cend() && t_pdl->nodata(dq, res))
987 shouldNotValidate = true;
988
989 }
990 else if(res == RCode::NXDomain && t_pdl->nxdomain(dq, res))
991 shouldNotValidate = true;
992
993 if(t_pdl->postresolve(dq, res))
994 shouldNotValidate = true;
995 }
996
997 if (wantsRPZ) { //XXX This block is repeated, see above
998 g_stats.policyResults[appliedPolicy.d_kind]++;
999 switch(appliedPolicy.d_kind) {
1000 case DNSFilterEngine::PolicyKind::NoAction:
1001 break;
1002 case DNSFilterEngine::PolicyKind::Drop:
1003 g_stats.policyDrops++;
1004 delete dc;
1005 dc=0;
1006 return;
1007 case DNSFilterEngine::PolicyKind::NXDOMAIN:
1008 ret.clear();
1009 res=RCode::NXDomain;
1010 goto haveAnswer;
1011
1012 case DNSFilterEngine::PolicyKind::NODATA:
1013 ret.clear();
1014 res=RCode::NoError;
1015 goto haveAnswer;
1016
1017 case DNSFilterEngine::PolicyKind::Truncate:
1018 if(!dc->d_tcp) {
1019 ret.clear();
1020 res=RCode::NoError;
1021 pw.getHeader()->tc=1;
1022 goto haveAnswer;
1023 }
1024 break;
1025
1026 case DNSFilterEngine::PolicyKind::Custom:
1027 ret.clear();
1028 res=RCode::NoError;
1029 spoofed=appliedPolicy.getCustomRecord(dc->d_mdp.d_qname);
1030 ret.push_back(spoofed);
1031 handleRPZCustom(spoofed, QType(dc->d_mdp.d_qtype), sr, res, ret);
1032 goto haveAnswer;
1033 }
1034 }
1035 }
1036 haveAnswer:;
1037 if(res == PolicyDecision::DROP) {
1038 g_stats.policyDrops++;
1039 delete dc;
1040 dc=0;
1041 return;
1042 }
1043 if(tracedQuery || res == -1 || res == RCode::ServFail || pw.getHeader()->rcode == RCode::ServFail)
1044 {
1045 string trace(sr.getTrace());
1046 if(!trace.empty()) {
1047 vector<string> lines;
1048 boost::split(lines, trace, boost::is_any_of("\n"));
1049 for(const string& line : lines) {
1050 if(!line.empty())
1051 L<<Logger::Warning<< line << endl;
1052 }
1053 }
1054 }
1055
1056 if(res == -1) {
1057 pw.getHeader()->rcode=RCode::ServFail;
1058 // no commit here, because no record
1059 g_stats.servFails++;
1060 }
1061 else {
1062 pw.getHeader()->rcode=res;
1063
1064 // Does the validation mode or query demand validation?
1065 if(!shouldNotValidate && sr.isDNSSECValidationRequested()) {
1066 try {
1067 if(sr.doLog()) {
1068 L<<Logger::Warning<<"Starting validation of answer to "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" for "<<dc->d_remote.toStringWithPort()<<endl;
1069 }
1070
1071 auto state = sr.getValidationState();
1072
1073 if(state == Secure) {
1074 if(sr.doLog()) {
1075 L<<Logger::Warning<<"Answer to "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" for "<<dc->d_remote.toStringWithPort()<<" validates correctly"<<endl;
1076 }
1077
1078 // Is the query source interested in the value of the ad-bit?
1079 if (dc->d_mdp.d_header.ad || DNSSECOK)
1080 pw.getHeader()->ad=1;
1081 }
1082 else if(state == Insecure) {
1083 if(sr.doLog()) {
1084 L<<Logger::Warning<<"Answer to "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" for "<<dc->d_remote.toStringWithPort()<<" validates as Insecure"<<endl;
1085 }
1086
1087 pw.getHeader()->ad=0;
1088 }
1089 else if(state == Bogus) {
1090 if(g_dnssecLogBogus || sr.doLog() || g_dnssecmode == DNSSECMode::ValidateForLog) {
1091 L<<Logger::Warning<<"Answer to "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" for "<<dc->d_remote.toStringWithPort()<<" validates as Bogus"<<endl;
1092 }
1093
1094 // Does the query or validation mode sending out a SERVFAIL on validation errors?
1095 if(!pw.getHeader()->cd && (g_dnssecmode == DNSSECMode::ValidateAll || dc->d_mdp.d_header.ad || DNSSECOK)) {
1096 if(sr.doLog()) {
1097 L<<Logger::Warning<<"Sending out SERVFAIL for "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" because recursor or query demands it for Bogus results"<<endl;
1098 }
1099
1100 pw.getHeader()->rcode=RCode::ServFail;
1101 goto sendit;
1102 } else {
1103 if(sr.doLog()) {
1104 L<<Logger::Warning<<"Not sending out SERVFAIL for "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" Bogus validation since neither config nor query demands this"<<endl;
1105 }
1106 }
1107 }
1108 }
1109 catch(ImmediateServFailException &e) {
1110 if(g_logCommonErrors)
1111 L<<Logger::Notice<<"Sending SERVFAIL to "<<dc->getRemote()<<" during validation of '"<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<"' because: "<<e.reason<<endl;
1112 pw.getHeader()->rcode=RCode::ServFail;
1113 goto sendit;
1114 }
1115 }
1116
1117 if(ret.size()) {
1118 orderAndShuffle(ret);
1119 if(auto sl = luaconfsLocal->sortlist.getOrderCmp(dc->d_remote)) {
1120 stable_sort(ret.begin(), ret.end(), *sl);
1121 variableAnswer=true;
1122 }
1123 }
1124
1125 bool needCommit = false;
1126 for(auto i=ret.cbegin(); i!=ret.cend(); ++i) {
1127 if( ! DNSSECOK &&
1128 ( i->d_type == QType::NSEC3 ||
1129 (
1130 ( i->d_type == QType::RRSIG || i->d_type==QType::NSEC ) &&
1131 (
1132 ( dc->d_mdp.d_qtype != i->d_type && dc->d_mdp.d_qtype != QType::ANY ) ||
1133 i->d_place != DNSResourceRecord::ANSWER
1134 )
1135 )
1136 )
1137 ) {
1138 continue;
1139 }
1140
1141 if (!addRecordToPacket(pw, *i, minTTL, dc->d_ttlCap, maxanswersize)) {
1142 needCommit = false;
1143 break;
1144 }
1145 needCommit = true;
1146
1147 #ifdef HAVE_PROTOBUF
1148 if(luaconfsLocal->protobufServer && (i->d_type == QType::A || i->d_type == QType::AAAA || i->d_type == QType::CNAME)) {
1149 pbMessage.addRR(*i);
1150 }
1151 #endif
1152 }
1153 if(needCommit)
1154 pw.commit();
1155 }
1156 sendit:;
1157
1158 if (haveEDNS) {
1159 /* we try to add the EDNS OPT RR even for truncated answers,
1160 as rfc6891 states:
1161 "The minimal response MUST be the DNS header, question section, and an
1162 OPT record. This MUST also occur when a truncated response (using
1163 the DNS header's TC bit) is returned."
1164 */
1165 if (addRecordToPacket(pw, makeOpt(edo.d_packetsize, 0, edo.d_Z), minTTL, dc->d_ttlCap, maxanswersize)) {
1166 pw.commit();
1167 }
1168 }
1169
1170 g_rs.submitResponse(dc->d_mdp.d_qtype, packet.size(), !dc->d_tcp);
1171 updateResponseStats(res, dc->d_remote, packet.size(), &dc->d_mdp.d_qname, dc->d_mdp.d_qtype);
1172 #ifdef HAVE_PROTOBUF
1173 if (luaconfsLocal->protobufServer && (!luaconfsLocal->protobufTaggedOnly || (appliedPolicy.d_name && !appliedPolicy.d_name->empty()) || !dc->d_policyTags.empty())) {
1174 pbMessage.setBytes(packet.size());
1175 pbMessage.setResponseCode(pw.getHeader()->rcode);
1176 if (appliedPolicy.d_name) {
1177 pbMessage.setAppliedPolicy(*appliedPolicy.d_name);
1178 pbMessage.setAppliedPolicyType(appliedPolicy.d_type);
1179 }
1180 pbMessage.setPolicyTags(dc->d_policyTags);
1181 pbMessage.setQueryTime(dc->d_now.tv_sec, dc->d_now.tv_usec);
1182 pbMessage.setRequestorId(dq.requestorId);
1183 pbMessage.setDeviceId(dq.deviceId);
1184 protobufLogResponse(luaconfsLocal->protobufServer, pbMessage);
1185 }
1186 #endif
1187 if(!dc->d_tcp) {
1188 struct msghdr msgh;
1189 struct iovec iov;
1190 char cbuf[256];
1191 fillMSGHdr(&msgh, &iov, cbuf, 0, (char*)&*packet.begin(), packet.size(), &dc->d_remote);
1192 msgh.msg_control=NULL;
1193
1194 if(g_fromtosockets.count(dc->d_socket)) {
1195 addCMsgSrcAddr(&msgh, cbuf, &dc->d_local, 0);
1196 }
1197 if(sendmsg(dc->d_socket, &msgh, 0) < 0 && g_logCommonErrors)
1198 L<<Logger::Warning<<"Sending UDP reply to client "<<dc->d_remote.toStringWithPort()<<" failed with: "<<strerror(errno)<<endl;
1199
1200 if(!SyncRes::s_nopacketcache && !variableAnswer && !sr.wasVariable() ) {
1201 t_packetCache->insertResponsePacket(dc->d_tag, dc->d_qhash, dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_mdp.d_qclass,
1202 string((const char*)&*packet.begin(), packet.size()),
1203 g_now.tv_sec,
1204 pw.getHeader()->rcode == RCode::ServFail ? SyncRes::s_packetcacheservfailttl :
1205 min(minTTL,SyncRes::s_packetcachettl),
1206 &pbMessage);
1207 }
1208 // else cerr<<"Not putting in packet cache: "<<sr.wasVariable()<<endl;
1209 }
1210 else {
1211 char buf[2];
1212 buf[0]=packet.size()/256;
1213 buf[1]=packet.size()%256;
1214
1215 Utility::iovec iov[2];
1216
1217 iov[0].iov_base=(void*)buf; iov[0].iov_len=2;
1218 iov[1].iov_base=(void*)&*packet.begin(); iov[1].iov_len = packet.size();
1219
1220 int wret=Utility::writev(dc->d_socket, iov, 2);
1221 bool hadError=true;
1222
1223 if(wret == 0)
1224 L<<Logger::Error<<"EOF writing TCP answer to "<<dc->getRemote()<<endl;
1225 else if(wret < 0 )
1226 L<<Logger::Error<<"Error writing TCP answer to "<<dc->getRemote()<<": "<< strerror(errno) <<endl;
1227 else if((unsigned int)wret != 2 + packet.size())
1228 L<<Logger::Error<<"Oops, partial answer sent to "<<dc->getRemote()<<" for "<<dc->d_mdp.d_qname<<" (size="<< (2 + packet.size()) <<", sent "<<wret<<")"<<endl;
1229 else
1230 hadError=false;
1231
1232 // update tcp connection status, either by closing or moving to 'BYTE0'
1233
1234 if(hadError) {
1235 // no need to remove us from FDM, we weren't there
1236 dc->d_socket = -1;
1237 }
1238 else {
1239 dc->d_tcpConnection->queriesCount++;
1240 if (g_tcpMaxQueriesPerConn && dc->d_tcpConnection->queriesCount >= g_tcpMaxQueriesPerConn) {
1241 dc->d_socket = -1;
1242 }
1243 else {
1244 dc->d_tcpConnection->state=TCPConnection::BYTE0;
1245 Utility::gettimeofday(&g_now, 0); // needs to be updated
1246 t_fdm->addReadFD(dc->d_socket, handleRunningTCPQuestion, dc->d_tcpConnection);
1247 t_fdm->setReadTTD(dc->d_socket, g_now, g_tcpTimeout);
1248 }
1249 }
1250 }
1251 float spent=makeFloat(sr.getNow()-dc->d_now);
1252 if(!g_quiet) {
1253 L<<Logger::Error<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] answer to "<<(dc->d_mdp.d_header.rd?"":"non-rd ")<<"question '"<<dc->d_mdp.d_qname<<"|"<<DNSRecordContent::NumberToType(dc->d_mdp.d_qtype);
1254 L<<"': "<<ntohs(pw.getHeader()->ancount)<<" answers, "<<ntohs(pw.getHeader()->arcount)<<" additional, took "<<sr.d_outqueries<<" packets, "<<
1255 sr.d_totUsec/1000.0<<" netw ms, "<< spent*1000.0<<" tot ms, "<<
1256 sr.d_throttledqueries<<" throttled, "<<sr.d_timeouts<<" timeouts, "<<sr.d_tcpoutqueries<<" tcp connections, rcode="<< res;
1257
1258 if(!shouldNotValidate && sr.isDNSSECValidationRequested()) {
1259 L<< ", dnssec="<<vStates[sr.getValidationState()];
1260 }
1261
1262 L<<endl;
1263
1264 }
1265
1266 sr.d_outqueries ? t_RC->cacheMisses++ : t_RC->cacheHits++;
1267
1268 if(spent < 0.001)
1269 g_stats.answers0_1++;
1270 else if(spent < 0.010)
1271 g_stats.answers1_10++;
1272 else if(spent < 0.1)
1273 g_stats.answers10_100++;
1274 else if(spent < 1.0)
1275 g_stats.answers100_1000++;
1276 else
1277 g_stats.answersSlow++;
1278
1279 uint64_t newLat=(uint64_t)(spent*1000000);
1280 newLat = min(newLat,(uint64_t)(((uint64_t) g_networkTimeoutMsec)*1000)); // outliers of several minutes exist..
1281 g_stats.avgLatencyUsec=(1-1.0/g_latencyStatSize)*g_stats.avgLatencyUsec + (float)newLat/g_latencyStatSize;
1282 // no worries, we do this for packet cache hits elsewhere
1283
1284 auto ourtime = 1000.0*spent-sr.d_totUsec/1000.0; // in msec
1285 if(ourtime < 1)
1286 g_stats.ourtime0_1++;
1287 else if(ourtime < 2)
1288 g_stats.ourtime1_2++;
1289 else if(ourtime < 4)
1290 g_stats.ourtime2_4++;
1291 else if(ourtime < 8)
1292 g_stats.ourtime4_8++;
1293 else if(ourtime < 16)
1294 g_stats.ourtime8_16++;
1295 else if(ourtime < 32)
1296 g_stats.ourtime16_32++;
1297 else {
1298 // cerr<<"SLOW: "<<ourtime<<"ms -> "<<dc->d_mdp.d_qname<<"|"<<DNSRecordContent::NumberToType(dc->d_mdp.d_qtype)<<endl;
1299 g_stats.ourtimeSlow++;
1300 }
1301 if(ourtime >= 0.0) {
1302 newLat=ourtime*1000; // usec
1303 g_stats.avgLatencyOursUsec=(1-1.0/g_latencyStatSize)*g_stats.avgLatencyOursUsec + (float)newLat/g_latencyStatSize;
1304 }
1305 // cout<<dc->d_mdp.d_qname<<"\t"<<MT->getUsec()<<"\t"<<sr.d_outqueries<<endl;
1306 delete dc;
1307 dc=0;
1308 }
1309 catch(PDNSException &ae) {
1310 L<<Logger::Error<<"startDoResolve problem "<<makeLoginfo(dc)<<": "<<ae.reason<<endl;
1311 delete dc;
1312 }
1313 catch(MOADNSException& e) {
1314 L<<Logger::Error<<"DNS parser error "<<makeLoginfo(dc) <<": "<<dc->d_mdp.d_qname<<", "<<e.what()<<endl;
1315 delete dc;
1316 }
1317 catch(std::exception& e) {
1318 L<<Logger::Error<<"STL error "<< makeLoginfo(dc)<<": "<<e.what();
1319
1320 // Luawrapper nests the exception from Lua, so we unnest it here
1321 try {
1322 std::rethrow_if_nested(e);
1323 } catch(const std::exception& ne) {
1324 L<<". Extra info: "<<ne.what();
1325 } catch(...) {}
1326
1327 L<<endl;
1328 delete dc;
1329 }
1330 catch(...) {
1331 L<<Logger::Error<<"Any other exception in a resolver context "<< makeLoginfo(dc) <<endl;
1332 }
1333
1334 g_stats.maxMThreadStackUsage = max(MT->getMaxStackUsage(), g_stats.maxMThreadStackUsage);
1335 }
1336
1337 static void makeControlChannelSocket(int processNum=-1)
1338 {
1339 string sockname=::arg()["socket-dir"]+"/"+s_programname;
1340 if(processNum >= 0)
1341 sockname += "."+std::to_string(processNum);
1342 sockname+=".controlsocket";
1343 s_rcc.listen(sockname);
1344
1345 int sockowner = -1;
1346 int sockgroup = -1;
1347
1348 if (!::arg().isEmpty("socket-group"))
1349 sockgroup=::arg().asGid("socket-group");
1350 if (!::arg().isEmpty("socket-owner"))
1351 sockowner=::arg().asUid("socket-owner");
1352
1353 if (sockgroup > -1 || sockowner > -1) {
1354 if(chown(sockname.c_str(), sockowner, sockgroup) < 0) {
1355 unixDie("Failed to chown control socket");
1356 }
1357 }
1358
1359 // do mode change if socket-mode is given
1360 if(!::arg().isEmpty("socket-mode")) {
1361 mode_t sockmode=::arg().asMode("socket-mode");
1362 if(chmod(sockname.c_str(), sockmode) < 0) {
1363 unixDie("Failed to chmod control socket");
1364 }
1365 }
1366 }
1367
1368 static bool getQNameAndSubnet(const std::string& question, DNSName* dnsname, uint16_t* qtype, uint16_t* qclass, EDNSSubnetOpts* ednssubnet, std::map<uint16_t, EDNSOptionView>* options)
1369 {
1370 bool found = false;
1371 const struct dnsheader* dh = (struct dnsheader*)question.c_str();
1372 size_t questionLen = question.length();
1373 unsigned int consumed=0;
1374 *dnsname=DNSName(question.c_str(), questionLen, sizeof(dnsheader), false, qtype, qclass, &consumed);
1375
1376 size_t pos= sizeof(dnsheader)+consumed+4;
1377 /* at least OPT root label (1), type (2), class (2) and ttl (4) + OPT RR rdlen (2)
1378 = 11 */
1379 if(ntohs(dh->arcount) == 1 && questionLen > pos + 11) { // this code can extract one (1) EDNS Subnet option
1380 /* OPT root label (1) followed by type (2) */
1381 if(question.at(pos)==0 && question.at(pos+1)==0 && question.at(pos+2)==QType::OPT) {
1382 if (!options) {
1383 char* ecsStart = nullptr;
1384 size_t ecsLen = 0;
1385 int res = getEDNSOption((char*)question.c_str()+pos+9, questionLen - pos - 9, EDNSOptionCode::ECS, &ecsStart, &ecsLen);
1386 if (res == 0 && ecsLen > 4) {
1387 EDNSSubnetOpts eso;
1388 if(getEDNSSubnetOptsFromString(ecsStart + 4, ecsLen - 4, &eso)) {
1389 *ednssubnet=eso;
1390 found = true;
1391 }
1392 }
1393 }
1394 else {
1395 int res = getEDNSOptions((char*)question.c_str()+pos+9, questionLen - pos - 9, *options);
1396 if (res == 0) {
1397 const auto& it = options->find(EDNSOptionCode::ECS);
1398 if (it != options->end() && it->second.content != nullptr && it->second.size > 0) {
1399 EDNSSubnetOpts eso;
1400 if(getEDNSSubnetOptsFromString(it->second.content, it->second.size, &eso)) {
1401 *ednssubnet=eso;
1402 found = true;
1403 }
1404 }
1405 }
1406 }
1407 }
1408 }
1409 return found;
1410 }
1411
1412 static void handleRunningTCPQuestion(int fd, FDMultiplexer::funcparam_t& var)
1413 {
1414 shared_ptr<TCPConnection> conn=any_cast<shared_ptr<TCPConnection> >(var);
1415
1416 if(conn->state==TCPConnection::BYTE0) {
1417 ssize_t bytes=recv(conn->getFD(), conn->data, 2, 0);
1418 if(bytes==1)
1419 conn->state=TCPConnection::BYTE1;
1420 if(bytes==2) {
1421 conn->qlen=(((unsigned char)conn->data[0]) << 8)+ (unsigned char)conn->data[1];
1422 conn->bytesread=0;
1423 conn->state=TCPConnection::GETQUESTION;
1424 }
1425 if(!bytes || bytes < 0) {
1426 t_fdm->removeReadFD(fd);
1427 return;
1428 }
1429 }
1430 else if(conn->state==TCPConnection::BYTE1) {
1431 ssize_t bytes=recv(conn->getFD(), conn->data+1, 1, 0);
1432 if(bytes==1) {
1433 conn->state=TCPConnection::GETQUESTION;
1434 conn->qlen=(((unsigned char)conn->data[0]) << 8)+ (unsigned char)conn->data[1];
1435 conn->bytesread=0;
1436 }
1437 if(!bytes || bytes < 0) {
1438 if(g_logCommonErrors)
1439 L<<Logger::Error<<"TCP client "<< conn->d_remote.toString() <<" disconnected after first byte"<<endl;
1440 t_fdm->removeReadFD(fd);
1441 return;
1442 }
1443 }
1444 else if(conn->state==TCPConnection::GETQUESTION) {
1445 ssize_t bytes=recv(conn->getFD(), conn->data + conn->bytesread, conn->qlen - conn->bytesread, 0);
1446 if(!bytes || bytes < 0 || bytes > std::numeric_limits<std::uint16_t>::max()) {
1447 L<<Logger::Error<<"TCP client "<< conn->d_remote.toString() <<" disconnected while reading question body"<<endl;
1448 t_fdm->removeReadFD(fd);
1449 return;
1450 }
1451 conn->bytesread+=(uint16_t)bytes;
1452 if(conn->bytesread==conn->qlen) {
1453 t_fdm->removeReadFD(fd); // should no longer awake ourselves when there is data to read
1454
1455 DNSComboWriter* dc=nullptr;
1456 try {
1457 dc=new DNSComboWriter(conn->data, conn->qlen, g_now);
1458 }
1459 catch(MOADNSException &mde) {
1460 g_stats.clientParseError++;
1461 if(g_logCommonErrors)
1462 L<<Logger::Error<<"Unable to parse packet from TCP client "<< conn->d_remote.toString() <<endl;
1463 return;
1464 }
1465 dc->d_tcpConnection = conn; // carry the torch
1466 dc->setSocket(conn->getFD()); // this is the only time a copy is made of the actual fd
1467 dc->d_tcp=true;
1468 dc->setRemote(&conn->d_remote);
1469 ComboAddress dest;
1470 memset(&dest, 0, sizeof(dest));
1471 dest.sin4.sin_family = conn->d_remote.sin4.sin_family;
1472 socklen_t len = dest.getSocklen();
1473 getsockname(conn->getFD(), (sockaddr*)&dest, &len); // if this fails, we're ok with it
1474 dc->setLocal(dest);
1475 DNSName qname;
1476 uint16_t qtype=0;
1477 uint16_t qclass=0;
1478 bool needECS = false;
1479 string requestorId;
1480 string deviceId;
1481 #ifdef HAVE_PROTOBUF
1482 auto luaconfsLocal = g_luaconfs.getLocal();
1483 if (luaconfsLocal->protobufServer) {
1484 needECS = true;
1485 }
1486 #endif
1487
1488 if(needECS || (t_pdl && (t_pdl->d_gettag_ffi || t_pdl->d_gettag))) {
1489
1490 try {
1491 std::map<uint16_t, EDNSOptionView> ednsOptions;
1492 dc->d_ecsParsed = true;
1493 dc->d_ecsFound = getQNameAndSubnet(std::string(conn->data, conn->qlen), &qname, &qtype, &qclass, &dc->d_ednssubnet, g_gettagNeedsEDNSOptions ? &ednsOptions : nullptr);
1494
1495 if(t_pdl) {
1496 try {
1497 if (t_pdl->d_gettag_ffi) {
1498 dc->d_tag = t_pdl->gettag_ffi(conn->d_remote, dc->d_ednssubnet.source, dest, qname, qtype, &dc->d_policyTags, dc->d_data, ednsOptions, true, requestorId, deviceId, dc->d_ttlCap, dc->d_variable);
1499 }
1500 else if (t_pdl->d_gettag) {
1501 dc->d_tag = t_pdl->gettag(conn->d_remote, dc->d_ednssubnet.source, dest, qname, qtype, &dc->d_policyTags, dc->d_data, ednsOptions, true, requestorId, deviceId);
1502 }
1503 }
1504 catch(const std::exception& e) {
1505 if(g_logCommonErrors)
1506 L<<Logger::Warning<<"Error parsing a query packet qname='"<<qname<<"' for tag determination, setting tag=0: "<<e.what()<<endl;
1507 }
1508 }
1509 }
1510 catch(const std::exception& e)
1511 {
1512 if(g_logCommonErrors)
1513 L<<Logger::Warning<<"Error parsing a query packet for tag determination, setting tag=0: "<<e.what()<<endl;
1514 }
1515 }
1516 #ifdef HAVE_PROTOBUF
1517 if(luaconfsLocal->protobufServer || luaconfsLocal->outgoingProtobufServer) {
1518 dc->d_requestorId = requestorId;
1519 dc->d_deviceId = deviceId;
1520 dc->d_uuid = (*t_uuidGenerator)();
1521 }
1522
1523 if(luaconfsLocal->protobufServer) {
1524 try {
1525 const struct dnsheader* dh = (const struct dnsheader*) conn->data;
1526
1527 if (!luaconfsLocal->protobufTaggedOnly) {
1528 protobufLogQuery(luaconfsLocal->protobufServer, luaconfsLocal->protobufMaskV4, luaconfsLocal->protobufMaskV6, dc->d_uuid, conn->d_remote, dest, dc->d_ednssubnet.source, true, dh->id, conn->qlen, qname, qtype, qclass, dc->d_policyTags, dc->d_requestorId, dc->d_deviceId);
1529 }
1530 }
1531 catch(std::exception& e) {
1532 if(g_logCommonErrors)
1533 L<<Logger::Warning<<"Error parsing a TCP query packet for edns subnet: "<<e.what()<<endl;
1534 }
1535 }
1536 #endif
1537 if(dc->d_mdp.d_header.qr) {
1538 delete dc;
1539 g_stats.ignoredCount++;
1540 L<<Logger::Error<<"Ignoring answer from TCP client "<< conn->d_remote.toString() <<" on server socket!"<<endl;
1541 return;
1542 }
1543 if(dc->d_mdp.d_header.opcode) {
1544 delete dc;
1545 g_stats.ignoredCount++;
1546 L<<Logger::Error<<"Ignoring non-query opcode from TCP client "<< conn->d_remote.toString() <<" on server socket!"<<endl;
1547 return;
1548 }
1549 else {
1550 ++g_stats.qcounter;
1551 ++g_stats.tcpqcounter;
1552 MT->makeThread(startDoResolve, dc); // deletes dc, will set state to BYTE0 again
1553 return;
1554 }
1555 }
1556 }
1557 }
1558
1559 //! Handle new incoming TCP connection
1560 static void handleNewTCPQuestion(int fd, FDMultiplexer::funcparam_t& )
1561 {
1562 ComboAddress addr;
1563 socklen_t addrlen=sizeof(addr);
1564 int newsock=accept(fd, (struct sockaddr*)&addr, &addrlen);
1565 if(newsock>=0) {
1566 if(MT->numProcesses() > g_maxMThreads) {
1567 g_stats.overCapacityDrops++;
1568 try {
1569 closesocket(newsock);
1570 }
1571 catch(const PDNSException& e) {
1572 L<<Logger::Error<<"Error closing TCP socket after an over capacity drop: "<<e.reason<<endl;
1573 }
1574 return;
1575 }
1576
1577 if(t_remotes)
1578 t_remotes->push_back(addr);
1579 if(t_allowFrom && !t_allowFrom->match(&addr)) {
1580 if(!g_quiet)
1581 L<<Logger::Error<<"["<<MT->getTid()<<"] dropping TCP query from "<<addr.toString()<<", address not matched by allow-from"<<endl;
1582
1583 g_stats.unauthorizedTCP++;
1584 try {
1585 closesocket(newsock);
1586 }
1587 catch(const PDNSException& e) {
1588 L<<Logger::Error<<"Error closing TCP socket after an ACL drop: "<<e.reason<<endl;
1589 }
1590 return;
1591 }
1592 if(g_maxTCPPerClient && t_tcpClientCounts->count(addr) && (*t_tcpClientCounts)[addr] >= g_maxTCPPerClient) {
1593 g_stats.tcpClientOverflow++;
1594 try {
1595 closesocket(newsock); // don't call TCPConnection::closeAndCleanup here - did not enter it in the counts yet!
1596 }
1597 catch(const PDNSException& e) {
1598 L<<Logger::Error<<"Error closing TCP socket after an overflow drop: "<<e.reason<<endl;
1599 }
1600 return;
1601 }
1602
1603 setNonBlocking(newsock);
1604 std::shared_ptr<TCPConnection> tc = std::make_shared<TCPConnection>(newsock, addr);
1605 tc->state=TCPConnection::BYTE0;
1606
1607 t_fdm->addReadFD(tc->getFD(), handleRunningTCPQuestion, tc);
1608
1609 struct timeval now;
1610 Utility::gettimeofday(&now, 0);
1611 t_fdm->setReadTTD(tc->getFD(), now, g_tcpTimeout);
1612 }
1613 }
1614
1615 static string* doProcessUDPQuestion(const std::string& question, const ComboAddress& fromaddr, const ComboAddress& destaddr, struct timeval tv, int fd)
1616 {
1617 gettimeofday(&g_now, 0);
1618 struct timeval diff = g_now - tv;
1619 double delta=(diff.tv_sec*1000 + diff.tv_usec/1000.0);
1620
1621 if(tv.tv_sec && delta > 1000.0) {
1622 g_stats.tooOldDrops++;
1623 return 0;
1624 }
1625
1626 ++g_stats.qcounter;
1627 if(fromaddr.sin4.sin_family==AF_INET6)
1628 g_stats.ipv6qcounter++;
1629
1630 string response;
1631 const struct dnsheader* dh = (struct dnsheader*)question.c_str();
1632 unsigned int ctag=0;
1633 uint32_t qhash = 0;
1634 bool needECS = false;
1635 std::vector<std::string> policyTags;
1636 LuaContext::LuaObject data;
1637 string requestorId;
1638 string deviceId;
1639 #ifdef HAVE_PROTOBUF
1640 boost::uuids::uuid uniqueId;
1641 auto luaconfsLocal = g_luaconfs.getLocal();
1642 if (luaconfsLocal->protobufServer) {
1643 uniqueId = (*t_uuidGenerator)();
1644 needECS = true;
1645 } else if (luaconfsLocal->outgoingProtobufServer) {
1646 uniqueId = (*t_uuidGenerator)();
1647 }
1648 #endif
1649 EDNSSubnetOpts ednssubnet;
1650 bool ecsFound = false;
1651 bool ecsParsed = false;
1652 uint32_t ttlCap = std::numeric_limits<uint32_t>::max();
1653 bool variable = false;
1654 try {
1655 DNSName qname;
1656 uint16_t qtype=0;
1657 uint16_t qclass=0;
1658 uint32_t age;
1659 bool qnameParsed=false;
1660 #ifdef MALLOC_TRACE
1661 /*
1662 static uint64_t last=0;
1663 if(!last)
1664 g_mtracer->clearAllocators();
1665 cout<<g_mtracer->getAllocs()-last<<" "<<g_mtracer->getNumOut()<<" -- BEGIN TRACE"<<endl;
1666 last=g_mtracer->getAllocs();
1667 cout<<g_mtracer->topAllocatorsString()<<endl;
1668 g_mtracer->clearAllocators();
1669 */
1670 #endif
1671
1672 if(needECS || (t_pdl && (t_pdl->d_gettag || t_pdl->d_gettag_ffi))) {
1673 try {
1674 std::map<uint16_t, EDNSOptionView> ednsOptions;
1675 ecsFound = getQNameAndSubnet(question, &qname, &qtype, &qclass, &ednssubnet, g_gettagNeedsEDNSOptions ? &ednsOptions : nullptr);
1676 qnameParsed = true;
1677 ecsParsed = true;
1678
1679 if(t_pdl) {
1680 try {
1681 if (t_pdl->d_gettag_ffi) {
1682 ctag = t_pdl->gettag_ffi(fromaddr, ednssubnet.source, destaddr, qname, qtype, &policyTags, data, ednsOptions, false, requestorId, deviceId, ttlCap, variable);
1683 }
1684 else if (t_pdl->d_gettag) {
1685 ctag=t_pdl->gettag(fromaddr, ednssubnet.source, destaddr, qname, qtype, &policyTags, data, ednsOptions, false, requestorId, deviceId);
1686 }
1687 }
1688 catch(const std::exception& e) {
1689 if(g_logCommonErrors)
1690 L<<Logger::Warning<<"Error parsing a query packet qname='"<<qname<<"' for tag determination, setting tag=0: "<<e.what()<<endl;
1691 }
1692 }
1693 }
1694 catch(const std::exception& e)
1695 {
1696 if(g_logCommonErrors)
1697 L<<Logger::Warning<<"Error parsing a query packet for tag determination, setting tag=0: "<<e.what()<<endl;
1698 }
1699 }
1700
1701 bool cacheHit = false;
1702 RecProtoBufMessage pbMessage(DNSProtoBufMessage::DNSProtoBufMessageType::Response);
1703 #ifdef HAVE_PROTOBUF
1704 if(luaconfsLocal->protobufServer) {
1705 if (!luaconfsLocal->protobufTaggedOnly || !policyTags.empty()) {
1706 protobufLogQuery(luaconfsLocal->protobufServer, luaconfsLocal->protobufMaskV4, luaconfsLocal->protobufMaskV6, uniqueId, fromaddr, destaddr, ednssubnet.source, false, dh->id, question.size(), qname, qtype, qclass, policyTags, requestorId, deviceId);
1707 }
1708 }
1709 #endif /* HAVE_PROTOBUF */
1710
1711 /* It might seem like a good idea to skip the packet cache lookup if we know that the answer is not cacheable,
1712 but it means that the hash would not be computed. If some script decides at a later time to mark back the answer
1713 as cacheable we would cache it with a wrong tag, so better safe than sorry. */
1714 if (qnameParsed) {
1715 cacheHit = (!SyncRes::s_nopacketcache && t_packetCache->getResponsePacket(ctag, question, qname, qtype, qclass, g_now.tv_sec, &response, &age, &qhash, &pbMessage));
1716 }
1717 else {
1718 cacheHit = (!SyncRes::s_nopacketcache && t_packetCache->getResponsePacket(ctag, question, g_now.tv_sec, &response, &age, &qhash, &pbMessage));
1719 }
1720
1721 if (cacheHit) {
1722 #ifdef HAVE_PROTOBUF
1723 if(luaconfsLocal->protobufServer && (!luaconfsLocal->protobufTaggedOnly || !pbMessage.getAppliedPolicy().empty() || !pbMessage.getPolicyTags().empty())) {
1724 Netmask requestorNM(fromaddr, fromaddr.sin4.sin_family == AF_INET ? luaconfsLocal->protobufMaskV4 : luaconfsLocal->protobufMaskV6);
1725 const ComboAddress& requestor = requestorNM.getMaskedNetwork();
1726 pbMessage.update(uniqueId, &requestor, &destaddr, false, dh->id);
1727 pbMessage.setEDNSSubnet(ednssubnet.source, ednssubnet.source.isIpv4() ? luaconfsLocal->protobufMaskV4 : luaconfsLocal->protobufMaskV6);
1728 pbMessage.setQueryTime(g_now.tv_sec, g_now.tv_usec);
1729 pbMessage.setRequestorId(requestorId);
1730 pbMessage.setDeviceId(deviceId);
1731 protobufLogResponse(luaconfsLocal->protobufServer, pbMessage);
1732 }
1733 #endif /* HAVE_PROTOBUF */
1734 if(!g_quiet)
1735 L<<Logger::Notice<<t_id<< " question answered from packet cache tag="<<ctag<<" from "<<fromaddr.toString()<<endl;
1736
1737 g_stats.packetCacheHits++;
1738 SyncRes::s_queries++;
1739 ageDNSPacket(response, age);
1740 struct msghdr msgh;
1741 struct iovec iov;
1742 char cbuf[256];
1743 fillMSGHdr(&msgh, &iov, cbuf, 0, (char*)response.c_str(), response.length(), const_cast<ComboAddress*>(&fromaddr));
1744 msgh.msg_control=NULL;
1745
1746 if(g_fromtosockets.count(fd)) {
1747 addCMsgSrcAddr(&msgh, cbuf, &destaddr, 0);
1748 }
1749 if(sendmsg(fd, &msgh, 0) < 0 && g_logCommonErrors)
1750 L<<Logger::Warning<<"Sending UDP reply to client "<<fromaddr.toStringWithPort()<<" failed with: "<<strerror(errno)<<endl;
1751
1752 if(response.length() >= sizeof(struct dnsheader)) {
1753 struct dnsheader tmpdh;
1754 memcpy(&tmpdh, response.c_str(), sizeof(tmpdh));
1755 updateResponseStats(tmpdh.rcode, fromaddr, response.length(), 0, 0);
1756 }
1757 g_stats.avgLatencyUsec=(1-1.0/g_latencyStatSize)*g_stats.avgLatencyUsec + 0.0; // we assume 0 usec
1758 g_stats.avgLatencyOursUsec=(1-1.0/g_latencyStatSize)*g_stats.avgLatencyOursUsec + 0.0; // we assume 0 usec
1759 return 0;
1760 }
1761 }
1762 catch(std::exception& e) {
1763 L<<Logger::Error<<"Error processing or aging answer packet: "<<e.what()<<endl;
1764 return 0;
1765 }
1766
1767 if(t_pdl) {
1768 if(t_pdl->ipfilter(fromaddr, destaddr, *dh)) {
1769 if(!g_quiet)
1770 L<<Logger::Notice<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] DROPPED question from "<<fromaddr.toStringWithPort()<<" based on policy"<<endl;
1771 g_stats.policyDrops++;
1772 return 0;
1773 }
1774 }
1775
1776 if(MT->numProcesses() > g_maxMThreads) {
1777 if(!g_quiet)
1778 L<<Logger::Notice<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] DROPPED question from "<<fromaddr.toStringWithPort()<<", over capacity"<<endl;
1779
1780 g_stats.overCapacityDrops++;
1781 return 0;
1782 }
1783
1784 DNSComboWriter* dc = new DNSComboWriter(question.c_str(), question.size(), g_now);
1785 dc->setSocket(fd);
1786 dc->d_tag=ctag;
1787 dc->d_qhash=qhash;
1788 dc->d_query = question;
1789 dc->setRemote(&fromaddr);
1790 dc->setLocal(destaddr);
1791 dc->d_tcp=false;
1792 dc->d_policyTags = policyTags;
1793 dc->d_data = data;
1794 dc->d_ecsFound = ecsFound;
1795 dc->d_ecsParsed = ecsParsed;
1796 dc->d_ednssubnet = ednssubnet;
1797 dc->d_ttlCap = ttlCap;
1798 dc->d_variable = variable;
1799 #ifdef HAVE_PROTOBUF
1800 if (luaconfsLocal->protobufServer || luaconfsLocal->outgoingProtobufServer) {
1801 dc->d_uuid = uniqueId;
1802 }
1803 dc->d_requestorId = requestorId;
1804 dc->d_deviceId = deviceId;
1805 #endif
1806
1807 MT->makeThread(startDoResolve, (void*) dc); // deletes dc
1808 return 0;
1809 }
1810
1811
1812 static void handleNewUDPQuestion(int fd, FDMultiplexer::funcparam_t& var)
1813 {
1814 ssize_t len;
1815 char data[1500];
1816 ComboAddress fromaddr;
1817 struct msghdr msgh;
1818 struct iovec iov;
1819 char cbuf[256];
1820 bool firstQuery = true;
1821
1822 fromaddr.sin6.sin6_family=AF_INET6; // this makes sure fromaddr is big enough
1823 fillMSGHdr(&msgh, &iov, cbuf, sizeof(cbuf), data, sizeof(data), &fromaddr);
1824
1825 for(;;)
1826 if((len=recvmsg(fd, &msgh, 0)) >= 0) {
1827
1828 firstQuery = false;
1829
1830 if(t_remotes)
1831 t_remotes->push_back(fromaddr);
1832
1833 if(t_allowFrom && !t_allowFrom->match(&fromaddr)) {
1834 if(!g_quiet)
1835 L<<Logger::Error<<"["<<MT->getTid()<<"] dropping UDP query from "<<fromaddr.toString()<<", address not matched by allow-from"<<endl;
1836
1837 g_stats.unauthorizedUDP++;
1838 return;
1839 }
1840 BOOST_STATIC_ASSERT(offsetof(sockaddr_in, sin_port) == offsetof(sockaddr_in6, sin6_port));
1841 if(!fromaddr.sin4.sin_port) { // also works for IPv6
1842 if(!g_quiet)
1843 L<<Logger::Error<<"["<<MT->getTid()<<"] dropping UDP query from "<<fromaddr.toStringWithPort()<<", can't deal with port 0"<<endl;
1844
1845 g_stats.clientParseError++; // not quite the best place to put it, but needs to go somewhere
1846 return;
1847 }
1848 try {
1849 dnsheader* dh=(dnsheader*)data;
1850
1851 if(dh->qr) {
1852 g_stats.ignoredCount++;
1853 if(g_logCommonErrors)
1854 L<<Logger::Error<<"Ignoring answer from "<<fromaddr.toString()<<" on server socket!"<<endl;
1855 }
1856 else if(dh->opcode) {
1857 g_stats.ignoredCount++;
1858 if(g_logCommonErrors)
1859 L<<Logger::Error<<"Ignoring non-query opcode "<<dh->opcode<<" from "<<fromaddr.toString()<<" on server socket!"<<endl;
1860 }
1861 else {
1862 string question(data, (size_t)len);
1863 struct timeval tv={0,0};
1864 HarvestTimestamp(&msgh, &tv);
1865 ComboAddress dest;
1866 memset(&dest, 0, sizeof(dest)); // this makes sure we ignore this address if not returned by recvmsg above
1867 auto loc = rplookup(g_listenSocketsAddresses, fd);
1868 if(HarvestDestinationAddress(&msgh, &dest)) {
1869 // but.. need to get port too
1870 if(loc)
1871 dest.sin4.sin_port = loc->sin4.sin_port;
1872 }
1873 else {
1874 if(loc) {
1875 dest = *loc;
1876 }
1877 else {
1878 dest.sin4.sin_family = fromaddr.sin4.sin_family;
1879 socklen_t slen = dest.getSocklen();
1880 getsockname(fd, (sockaddr*)&dest, &slen); // if this fails, we're ok with it
1881 }
1882 }
1883 if(g_weDistributeQueries)
1884 distributeAsyncFunction(question, boost::bind(doProcessUDPQuestion, question, fromaddr, dest, tv, fd));
1885 else
1886 doProcessUDPQuestion(question, fromaddr, dest, tv, fd);
1887 }
1888 }
1889 catch(MOADNSException& mde) {
1890 g_stats.clientParseError++;
1891 if(g_logCommonErrors)
1892 L<<Logger::Error<<"Unable to parse packet from remote UDP client "<<fromaddr.toString() <<": "<<mde.what()<<endl;
1893 }
1894 catch(std::runtime_error& e) {
1895 g_stats.clientParseError++;
1896 if(g_logCommonErrors)
1897 L<<Logger::Error<<"Unable to parse packet from remote UDP client "<<fromaddr.toString() <<": "<<e.what()<<endl;
1898 }
1899 }
1900 else {
1901 // cerr<<t_id<<" had error: "<<stringerror()<<endl;
1902 if(firstQuery && errno == EAGAIN)
1903 g_stats.noPacketError++;
1904
1905 break;
1906 }
1907 }
1908
1909 static void makeTCPServerSockets(unsigned int threadId)
1910 {
1911 int fd;
1912 vector<string>locals;
1913 stringtok(locals,::arg()["local-address"]," ,");
1914
1915 if(locals.empty())
1916 throw PDNSException("No local address specified");
1917
1918 for(vector<string>::const_iterator i=locals.begin();i!=locals.end();++i) {
1919 ServiceTuple st;
1920 st.port=::arg().asNum("local-port");
1921 parseService(*i, st);
1922
1923 ComboAddress sin;
1924
1925 memset((char *)&sin,0, sizeof(sin));
1926 sin.sin4.sin_family = AF_INET;
1927 if(!IpToU32(st.host, (uint32_t*)&sin.sin4.sin_addr.s_addr)) {
1928 sin.sin6.sin6_family = AF_INET6;
1929 if(makeIPv6sockaddr(st.host, &sin.sin6) < 0)
1930 throw PDNSException("Unable to resolve local address for TCP server on '"+ st.host +"'");
1931 }
1932
1933 fd=socket(sin.sin6.sin6_family, SOCK_STREAM, 0);
1934 if(fd<0)
1935 throw PDNSException("Making a TCP server socket for resolver: "+stringerror());
1936
1937 setCloseOnExec(fd);
1938
1939 int tmp=1;
1940 if(setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &tmp, sizeof tmp)<0) {
1941 L<<Logger::Error<<"Setsockopt failed for TCP listening socket"<<endl;
1942 exit(1);
1943 }
1944 if(sin.sin6.sin6_family == AF_INET6 && setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &tmp, sizeof(tmp)) < 0) {
1945 L<<Logger::Error<<"Failed to set IPv6 socket to IPv6 only, continuing anyhow: "<<strerror(errno)<<endl;
1946 }
1947
1948 #ifdef TCP_DEFER_ACCEPT
1949 if(setsockopt(fd, SOL_TCP, TCP_DEFER_ACCEPT, &tmp, sizeof tmp) >= 0) {
1950 if(i==locals.begin())
1951 L<<Logger::Error<<"Enabled TCP data-ready filter for (slight) DoS protection"<<endl;
1952 }
1953 #endif
1954
1955 if( ::arg().mustDo("non-local-bind") )
1956 Utility::setBindAny(AF_INET, fd);
1957
1958 #ifdef SO_REUSEPORT
1959 if(g_reusePort) {
1960 if(setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &tmp, sizeof(tmp)) < 0)
1961 throw PDNSException("SO_REUSEPORT: "+stringerror());
1962 }
1963 #endif
1964
1965 if (::arg().asNum("tcp-fast-open") > 0) {
1966 #ifdef TCP_FASTOPEN
1967 int fastOpenQueueSize = ::arg().asNum("tcp-fast-open");
1968 if (setsockopt(fd, IPPROTO_TCP, TCP_FASTOPEN, &fastOpenQueueSize, sizeof fastOpenQueueSize) < 0) {
1969 L<<Logger::Error<<"Failed to enable TCP Fast Open for listening socket: "<<strerror(errno)<<endl;
1970 }
1971 #else
1972 L<<Logger::Warning<<"TCP Fast Open configured but not supported for listening socket"<<endl;
1973 #endif
1974 }
1975
1976 sin.sin4.sin_port = htons(st.port);
1977 socklen_t socklen=sin.sin4.sin_family==AF_INET ? sizeof(sin.sin4) : sizeof(sin.sin6);
1978 if (::bind(fd, (struct sockaddr *)&sin, socklen )<0)
1979 throw PDNSException("Binding TCP server socket for "+ st.host +": "+stringerror());
1980
1981 setNonBlocking(fd);
1982 setSocketSendBuffer(fd, 65000);
1983 listen(fd, 128);
1984 deferredAdds[threadId].push_back(make_pair(fd, handleNewTCPQuestion));
1985 g_tcpListenSockets.push_back(fd);
1986 // we don't need to update g_listenSocketsAddresses since it doesn't work for TCP/IP:
1987 // - fd is not that which we know here, but returned from accept()
1988 if(sin.sin4.sin_family == AF_INET)
1989 L<<Logger::Error<<"Listening for TCP queries on "<< sin.toString() <<":"<<st.port<<endl;
1990 else
1991 L<<Logger::Error<<"Listening for TCP queries on ["<< sin.toString() <<"]:"<<st.port<<endl;
1992 }
1993 }
1994
1995 static void makeUDPServerSockets(unsigned int threadId)
1996 {
1997 int one=1;
1998 vector<string>locals;
1999 stringtok(locals,::arg()["local-address"]," ,");
2000
2001 if(locals.empty())
2002 throw PDNSException("No local address specified");
2003
2004 for(vector<string>::const_iterator i=locals.begin();i!=locals.end();++i) {
2005 ServiceTuple st;
2006 st.port=::arg().asNum("local-port");
2007 parseService(*i, st);
2008
2009 ComboAddress sin;
2010
2011 memset(&sin, 0, sizeof(sin));
2012 sin.sin4.sin_family = AF_INET;
2013 if(!IpToU32(st.host.c_str() , (uint32_t*)&sin.sin4.sin_addr.s_addr)) {
2014 sin.sin6.sin6_family = AF_INET6;
2015 if(makeIPv6sockaddr(st.host, &sin.sin6) < 0)
2016 throw PDNSException("Unable to resolve local address for UDP server on '"+ st.host +"'");
2017 }
2018
2019 int fd=socket(sin.sin4.sin_family, SOCK_DGRAM, 0);
2020 if(fd < 0) {
2021 throw PDNSException("Making a UDP server socket for resolver: "+netstringerror());
2022 }
2023 if (!setSocketTimestamps(fd))
2024 L<<Logger::Warning<<"Unable to enable timestamp reporting for socket"<<endl;
2025
2026 if(IsAnyAddress(sin)) {
2027 if(sin.sin4.sin_family == AF_INET)
2028 if(!setsockopt(fd, IPPROTO_IP, GEN_IP_PKTINFO, &one, sizeof(one))) // linux supports this, so why not - might fail on other systems
2029 g_fromtosockets.insert(fd);
2030 #ifdef IPV6_RECVPKTINFO
2031 if(sin.sin4.sin_family == AF_INET6)
2032 if(!setsockopt(fd, IPPROTO_IPV6, IPV6_RECVPKTINFO, &one, sizeof(one)))
2033 g_fromtosockets.insert(fd);
2034 #endif
2035 if(sin.sin6.sin6_family == AF_INET6 && setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &one, sizeof(one)) < 0) {
2036 L<<Logger::Error<<"Failed to set IPv6 socket to IPv6 only, continuing anyhow: "<<strerror(errno)<<endl;
2037 }
2038 }
2039 if( ::arg().mustDo("non-local-bind") )
2040 Utility::setBindAny(AF_INET6, fd);
2041
2042 setCloseOnExec(fd);
2043
2044 setSocketReceiveBuffer(fd, 250000);
2045 sin.sin4.sin_port = htons(st.port);
2046
2047
2048 #ifdef SO_REUSEPORT
2049 if(g_reusePort) {
2050 if(setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one)) < 0)
2051 throw PDNSException("SO_REUSEPORT: "+stringerror());
2052 }
2053 #endif
2054 socklen_t socklen=sin.getSocklen();
2055 if (::bind(fd, (struct sockaddr *)&sin, socklen)<0)
2056 throw PDNSException("Resolver binding to server socket on port "+ std::to_string(st.port) +" for "+ st.host+": "+stringerror());
2057
2058 setNonBlocking(fd);
2059
2060 deferredAdds[threadId].push_back(make_pair(fd, handleNewUDPQuestion));
2061 g_listenSocketsAddresses[fd]=sin; // this is written to only from the startup thread, not from the workers
2062 if(sin.sin4.sin_family == AF_INET)
2063 L<<Logger::Error<<"Listening for UDP queries on "<< sin.toString() <<":"<<st.port<<endl;
2064 else
2065 L<<Logger::Error<<"Listening for UDP queries on ["<< sin.toString() <<"]:"<<st.port<<endl;
2066 }
2067 }
2068
2069 static void daemonize(void)
2070 {
2071 if(fork())
2072 exit(0); // bye bye
2073
2074 setsid();
2075
2076 int i=open("/dev/null",O_RDWR); /* open stdin */
2077 if(i < 0)
2078 L<<Logger::Critical<<"Unable to open /dev/null: "<<stringerror()<<endl;
2079 else {
2080 dup2(i,0); /* stdin */
2081 dup2(i,1); /* stderr */
2082 dup2(i,2); /* stderr */
2083 close(i);
2084 }
2085 }
2086
2087 static void usr1Handler(int)
2088 {
2089 statsWanted=true;
2090 }
2091
2092 static void usr2Handler(int)
2093 {
2094 g_quiet= !g_quiet;
2095 SyncRes::setDefaultLogMode(g_quiet ? SyncRes::LogNone : SyncRes::Log);
2096 ::arg().set("quiet")=g_quiet ? "" : "no";
2097 }
2098
2099 static void doStats(void)
2100 {
2101 static time_t lastOutputTime;
2102 static uint64_t lastQueryCount;
2103
2104 uint64_t cacheHits = broadcastAccFunction<uint64_t>(pleaseGetCacheHits);
2105 uint64_t cacheMisses = broadcastAccFunction<uint64_t>(pleaseGetCacheMisses);
2106
2107 if(g_stats.qcounter && (cacheHits + cacheMisses) && SyncRes::s_queries && SyncRes::s_outqueries) {
2108 L<<Logger::Notice<<"stats: "<<g_stats.qcounter<<" questions, "<<
2109 broadcastAccFunction<uint64_t>(pleaseGetCacheSize)<< " cache entries, "<<
2110 broadcastAccFunction<uint64_t>(pleaseGetNegCacheSize)<<" negative entries, "<<
2111 (int)((cacheHits*100.0)/(cacheHits+cacheMisses))<<"% cache hits"<<endl;
2112
2113 L<<Logger::Notice<<"stats: throttle map: "
2114 << broadcastAccFunction<uint64_t>(pleaseGetThrottleSize) <<", ns speeds: "
2115 << broadcastAccFunction<uint64_t>(pleaseGetNsSpeedsSize)<<endl;
2116 L<<Logger::Notice<<"stats: outpacket/query ratio "<<(int)(SyncRes::s_outqueries*100.0/SyncRes::s_queries)<<"%";
2117 L<<Logger::Notice<<", "<<(int)(SyncRes::s_throttledqueries*100.0/(SyncRes::s_outqueries+SyncRes::s_throttledqueries))<<"% throttled, "
2118 <<SyncRes::s_nodelegated<<" no-delegation drops"<<endl;
2119 L<<Logger::Notice<<"stats: "<<SyncRes::s_tcpoutqueries<<" outgoing tcp connections, "<<
2120 broadcastAccFunction<uint64_t>(pleaseGetConcurrentQueries)<<" queries running, "<<SyncRes::s_outgoingtimeouts<<" outgoing timeouts"<<endl;
2121
2122 //L<<Logger::Notice<<"stats: "<<g_stats.ednsPingMatches<<" ping matches, "<<g_stats.ednsPingMismatches<<" mismatches, "<<
2123 //g_stats.noPingOutQueries<<" outqueries w/o ping, "<< g_stats.noEdnsOutQueries<<" w/o EDNS"<<endl;
2124
2125 L<<Logger::Notice<<"stats: " << broadcastAccFunction<uint64_t>(pleaseGetPacketCacheSize) <<
2126 " packet cache entries, "<<(int)(100.0*broadcastAccFunction<uint64_t>(pleaseGetPacketCacheHits)/SyncRes::s_queries) << "% packet cache hits"<<endl;
2127
2128 time_t now = time(0);
2129 if(lastOutputTime && lastQueryCount && now != lastOutputTime) {
2130 L<<Logger::Notice<<"stats: "<< (SyncRes::s_queries - lastQueryCount) / (now - lastOutputTime) <<" qps (average over "<< (now - lastOutputTime) << " seconds)"<<endl;
2131 }
2132 lastOutputTime = now;
2133 lastQueryCount = SyncRes::s_queries;
2134 }
2135 else if(statsWanted)
2136 L<<Logger::Notice<<"stats: no stats yet!"<<endl;
2137
2138 statsWanted=false;
2139 }
2140
2141 static void houseKeeping(void *)
2142 {
2143 static thread_local time_t last_stat, last_rootupdate, last_prune, last_secpoll;
2144 static thread_local int cleanCounter=0;
2145 static thread_local bool s_running; // houseKeeping can get suspended in secpoll, and be restarted, which makes us do duplicate work
2146 try {
2147 if(s_running)
2148 return;
2149 s_running=true;
2150
2151 struct timeval now;
2152 Utility::gettimeofday(&now, 0);
2153
2154 if(now.tv_sec - last_prune > (time_t)(5 + t_id)) {
2155 DTime dt;
2156 dt.setTimeval(now);
2157 t_RC->doPrune(g_maxCacheEntries / g_numThreads); // this function is local to a thread, so fine anyhow
2158 t_packetCache->doPruneTo(g_maxPacketCacheEntries / g_numWorkerThreads);
2159
2160 SyncRes::pruneNegCache(g_maxCacheEntries / (g_numWorkerThreads * 10));
2161
2162 if(!((cleanCounter++)%40)) { // this is a full scan!
2163 time_t limit=now.tv_sec-300;
2164 SyncRes::pruneNSSpeeds(limit);
2165 }
2166 last_prune=time(0);
2167 }
2168
2169 if(now.tv_sec - last_rootupdate > 7200) {
2170 int res = SyncRes::getRootNS(g_now, nullptr);
2171 if (!res)
2172 last_rootupdate=now.tv_sec;
2173 }
2174
2175 if (t_id == -1) {
2176 if(g_statisticsInterval > 0 && now.tv_sec - last_stat >= g_statisticsInterval) {
2177 doStats();
2178 last_stat=time(0);
2179 }
2180 }
2181 else if(!t_id) {
2182
2183 if(now.tv_sec - last_secpoll >= 3600) {
2184 try {
2185 doSecPoll(&last_secpoll);
2186 }
2187 catch(std::exception& e)
2188 {
2189 L<<Logger::Error<<"Exception while performing security poll: "<<e.what()<<endl;
2190 }
2191 catch(PDNSException& e)
2192 {
2193 L<<Logger::Error<<"Exception while performing security poll: "<<e.reason<<endl;
2194 }
2195 catch(ImmediateServFailException &e)
2196 {
2197 L<<Logger::Error<<"Exception while performing security poll: "<<e.reason<<endl;
2198 }
2199 catch(...)
2200 {
2201 L<<Logger::Error<<"Exception while performing security poll"<<endl;
2202 }
2203
2204 }
2205 }
2206 s_running=false;
2207 }
2208 catch(PDNSException& ae)
2209 {
2210 s_running=false;
2211 L<<Logger::Error<<"Fatal error in housekeeping thread: "<<ae.reason<<endl;
2212 throw;
2213 }
2214 }
2215
2216 static void makeThreadPipes()
2217 {
2218 for(unsigned int n=0; n < g_numThreads; ++n) {
2219 struct ThreadPipeSet tps;
2220 int fd[2];
2221 if(pipe(fd) < 0)
2222 unixDie("Creating pipe for inter-thread communications");
2223
2224 tps.readToThread = fd[0];
2225 tps.writeToThread = fd[1];
2226
2227 if(pipe(fd) < 0)
2228 unixDie("Creating pipe for inter-thread communications");
2229 tps.readFromThread = fd[0];
2230 tps.writeFromThread = fd[1];
2231
2232 g_pipes.push_back(tps);
2233 }
2234 }
2235
2236 struct ThreadMSG
2237 {
2238 pipefunc_t func;
2239 bool wantAnswer;
2240 };
2241
2242 void broadcastFunction(const pipefunc_t& func, bool skipSelf)
2243 {
2244 /* This function might be called by the worker with t_id 0 during startup */
2245 if (t_id != -1 && t_id != 0) {
2246 L<<Logger::Error<<"broadcastFunction() has been called by a worker ("<<t_id<<")"<<endl;
2247 exit(1);
2248 }
2249
2250 int n = 0;
2251 for(ThreadPipeSet& tps : g_pipes)
2252 {
2253 if(n++ == t_id) {
2254 if(!skipSelf)
2255 func(); // don't write to ourselves!
2256 continue;
2257 }
2258
2259 ThreadMSG* tmsg = new ThreadMSG();
2260 tmsg->func = func;
2261 tmsg->wantAnswer = true;
2262 if(write(tps.writeToThread, &tmsg, sizeof(tmsg)) != sizeof(tmsg)) {
2263 delete tmsg;
2264 unixDie("write to thread pipe returned wrong size or error");
2265 }
2266
2267 string* resp;
2268 if(read(tps.readFromThread, &resp, sizeof(resp)) != sizeof(resp))
2269 unixDie("read from thread pipe returned wrong size or error");
2270
2271 if(resp) {
2272 // cerr <<"got response: " << *resp << endl;
2273 delete resp;
2274 }
2275 }
2276 }
2277
2278 void distributeAsyncFunction(const string& packet, const pipefunc_t& func)
2279 {
2280 if (t_id != 0) {
2281 L<<Logger::Error<<"distributeAsyncFunction() has been called by a worker ("<<t_id<<")"<<endl;
2282 exit(1);
2283 }
2284
2285 unsigned int hash = hashQuestion(packet.c_str(), packet.length(), g_disthashseed);
2286 unsigned int target = 1 + (hash % (g_pipes.size()-1));
2287
2288 if(target == 0) {
2289 L<<Logger::Error<<"distributeAsyncFunction() tried to assign a query to the distributor"<<endl;
2290 exit(1);
2291 }
2292
2293 ThreadPipeSet& tps = g_pipes[target];
2294 ThreadMSG* tmsg = new ThreadMSG();
2295 tmsg->func = func;
2296 tmsg->wantAnswer = false;
2297
2298 if(write(tps.writeToThread, &tmsg, sizeof(tmsg)) != sizeof(tmsg)) {
2299 delete tmsg;
2300 unixDie("write to thread pipe returned wrong size or error");
2301 }
2302 }
2303
2304 static void handlePipeRequest(int fd, FDMultiplexer::funcparam_t& var)
2305 {
2306 ThreadMSG* tmsg = nullptr;
2307
2308 if(read(fd, &tmsg, sizeof(tmsg)) != sizeof(tmsg)) { // fd == readToThread
2309 unixDie("read from thread pipe returned wrong size or error");
2310 }
2311
2312 void *resp=0;
2313 try {
2314 resp = tmsg->func();
2315 }
2316 catch(std::exception& e) {
2317 if(g_logCommonErrors)
2318 L<<Logger::Error<<"PIPE function we executed created exception: "<<e.what()<<endl; // but what if they wanted an answer.. we send 0
2319 }
2320 catch(PDNSException& e) {
2321 if(g_logCommonErrors)
2322 L<<Logger::Error<<"PIPE function we executed created PDNS exception: "<<e.reason<<endl; // but what if they wanted an answer.. we send 0
2323 }
2324 if(tmsg->wantAnswer) {
2325 if(write(g_pipes[t_id].writeFromThread, &resp, sizeof(resp)) != sizeof(resp)) {
2326 delete tmsg;
2327 unixDie("write to thread pipe returned wrong size or error");
2328 }
2329 }
2330
2331 delete tmsg;
2332 }
2333
2334 template<class T> void *voider(const boost::function<T*()>& func)
2335 {
2336 return func();
2337 }
2338
2339 vector<ComboAddress>& operator+=(vector<ComboAddress>&a, const vector<ComboAddress>& b)
2340 {
2341 a.insert(a.end(), b.begin(), b.end());
2342 return a;
2343 }
2344
2345 vector<pair<string, uint16_t> >& operator+=(vector<pair<string, uint16_t> >&a, const vector<pair<string, uint16_t> >& b)
2346 {
2347 a.insert(a.end(), b.begin(), b.end());
2348 return a;
2349 }
2350
2351 vector<pair<DNSName, uint16_t> >& operator+=(vector<pair<DNSName, uint16_t> >&a, const vector<pair<DNSName, uint16_t> >& b)
2352 {
2353 a.insert(a.end(), b.begin(), b.end());
2354 return a;
2355 }
2356
2357
2358 template<class T> T broadcastAccFunction(const boost::function<T*()>& func, bool skipSelf)
2359 {
2360 if (t_id != -1) {
2361 L<<Logger::Error<<"broadcastFunction has been called by a worker ("<<t_id<<")"<<endl;
2362 exit(1);
2363
2364 }
2365
2366 T ret=T();
2367 for(ThreadPipeSet& tps : g_pipes)
2368 {
2369 ThreadMSG* tmsg = new ThreadMSG();
2370 tmsg->func = boost::bind(voider<T>, func);
2371 tmsg->wantAnswer = true;
2372
2373 if(write(tps.writeToThread, &tmsg, sizeof(tmsg)) != sizeof(tmsg)) {
2374 delete tmsg;
2375 unixDie("write to thread pipe returned wrong size or error");
2376 }
2377
2378 T* resp;
2379 if(read(tps.readFromThread, &resp, sizeof(resp)) != sizeof(resp))
2380 unixDie("read from thread pipe returned wrong size or error");
2381
2382 if(resp) {
2383 //~ cerr <<"got response: " << *resp << endl;
2384 ret += *resp;
2385 delete resp;
2386 }
2387 }
2388 return ret;
2389 }
2390
2391 template string broadcastAccFunction(const boost::function<string*()>& fun, bool skipSelf); // explicit instantiation
2392 template uint64_t broadcastAccFunction(const boost::function<uint64_t*()>& fun, bool skipSelf); // explicit instantiation
2393 template vector<ComboAddress> broadcastAccFunction(const boost::function<vector<ComboAddress> *()>& fun, bool skipSelf); // explicit instantiation
2394 template vector<pair<DNSName,uint16_t> > broadcastAccFunction(const boost::function<vector<pair<DNSName, uint16_t> > *()>& fun, bool skipSelf); // explicit instantiation
2395
2396 static void handleRCC(int fd, FDMultiplexer::funcparam_t& var)
2397 {
2398 string remote;
2399 string msg=s_rcc.recv(&remote);
2400 RecursorControlParser rcp;
2401 RecursorControlParser::func_t* command;
2402
2403 string answer=rcp.getAnswer(msg, &command);
2404
2405 // If we are inside a chroot, we need to strip
2406 if (!arg()["chroot"].empty()) {
2407 size_t len = arg()["chroot"].length();
2408 remote = remote.substr(len);
2409 }
2410
2411 try {
2412 s_rcc.send(answer, &remote);
2413 command();
2414 }
2415 catch(std::exception& e) {
2416 L<<Logger::Error<<"Error dealing with control socket request: "<<e.what()<<endl;
2417 }
2418 catch(PDNSException& ae) {
2419 L<<Logger::Error<<"Error dealing with control socket request: "<<ae.reason<<endl;
2420 }
2421 }
2422
2423 static void handleTCPClientReadable(int fd, FDMultiplexer::funcparam_t& var)
2424 {
2425 PacketID* pident=any_cast<PacketID>(&var);
2426 // cerr<<"handleTCPClientReadable called for fd "<<fd<<", pident->inNeeded: "<<pident->inNeeded<<", "<<pident->sock->getHandle()<<endl;
2427
2428 shared_array<char> buffer(new char[pident->inNeeded]);
2429
2430 ssize_t ret=recv(fd, buffer.get(), pident->inNeeded,0);
2431 if(ret > 0) {
2432 pident->inMSG.append(&buffer[0], &buffer[ret]);
2433 pident->inNeeded-=(size_t)ret;
2434 if(!pident->inNeeded || pident->inIncompleteOkay) {
2435 // cerr<<"Got entire load of "<<pident->inMSG.size()<<" bytes"<<endl;
2436 PacketID pid=*pident;
2437 string msg=pident->inMSG;
2438
2439 t_fdm->removeReadFD(fd);
2440 MT->sendEvent(pid, &msg);
2441 }
2442 else {
2443 // cerr<<"Still have "<<pident->inNeeded<<" left to go"<<endl;
2444 }
2445 }
2446 else {
2447 PacketID tmp=*pident;
2448 t_fdm->removeReadFD(fd); // pident might now be invalid (it isn't, but still)
2449 string empty;
2450 MT->sendEvent(tmp, &empty); // this conveys error status
2451 }
2452 }
2453
2454 static void handleTCPClientWritable(int fd, FDMultiplexer::funcparam_t& var)
2455 {
2456 PacketID* pid=any_cast<PacketID>(&var);
2457 ssize_t ret=send(fd, pid->outMSG.c_str() + pid->outPos, pid->outMSG.size() - pid->outPos,0);
2458 if(ret > 0) {
2459 pid->outPos+=(ssize_t)ret;
2460 if(pid->outPos==pid->outMSG.size()) {
2461 PacketID tmp=*pid;
2462 t_fdm->removeWriteFD(fd);
2463 MT->sendEvent(tmp, &tmp.outMSG); // send back what we sent to convey everything is ok
2464 }
2465 }
2466 else { // error or EOF
2467 PacketID tmp(*pid);
2468 t_fdm->removeWriteFD(fd);
2469 string sent;
2470 MT->sendEvent(tmp, &sent); // we convey error status by sending empty string
2471 }
2472 }
2473
2474 // resend event to everybody chained onto it
2475 static void doResends(MT_t::waiters_t::iterator& iter, PacketID resend, const string& content)
2476 {
2477 if(iter->key.chain.empty())
2478 return;
2479 // cerr<<"doResends called!\n";
2480 for(PacketID::chain_t::iterator i=iter->key.chain.begin(); i != iter->key.chain.end() ; ++i) {
2481 resend.fd=-1;
2482 resend.id=*i;
2483 // cerr<<"\tResending "<<content.size()<<" bytes for fd="<<resend.fd<<" and id="<<resend.id<<endl;
2484
2485 MT->sendEvent(resend, &content);
2486 g_stats.chainResends++;
2487 }
2488 }
2489
2490 static void handleUDPServerResponse(int fd, FDMultiplexer::funcparam_t& var)
2491 {
2492 PacketID pid=any_cast<PacketID>(var);
2493 ssize_t len;
2494 char data[g_outgoingEDNSBufsize];
2495 ComboAddress fromaddr;
2496 socklen_t addrlen=sizeof(fromaddr);
2497
2498 len=recvfrom(fd, data, sizeof(data), 0, (sockaddr *)&fromaddr, &addrlen);
2499
2500 if(len < (ssize_t) sizeof(dnsheader)) {
2501 if(len < 0)
2502 ; // cerr<<"Error on fd "<<fd<<": "<<stringerror()<<"\n";
2503 else {
2504 g_stats.serverParseError++;
2505 if(g_logCommonErrors)
2506 L<<Logger::Error<<"Unable to parse packet from remote UDP server "<< fromaddr.toString() <<
2507 ": packet smaller than DNS header"<<endl;
2508 }
2509
2510 t_udpclientsocks->returnSocket(fd);
2511 string empty;
2512
2513 MT_t::waiters_t::iterator iter=MT->d_waiters.find(pid);
2514 if(iter != MT->d_waiters.end())
2515 doResends(iter, pid, empty);
2516
2517 MT->sendEvent(pid, &empty); // this denotes error (does lookup again.. at least L1 will be hot)
2518 return;
2519 }
2520
2521 dnsheader dh;
2522 memcpy(&dh, data, sizeof(dh));
2523
2524 PacketID pident;
2525 pident.remote=fromaddr;
2526 pident.id=dh.id;
2527 pident.fd=fd;
2528
2529 if(!dh.qr && g_logCommonErrors) {
2530 L<<Logger::Notice<<"Not taking data from question on outgoing socket from "<< fromaddr.toStringWithPort() <<endl;
2531 }
2532
2533 if(!dh.qdcount || // UPC, Nominum, very old BIND on FormErr, NSD
2534 !dh.qr) { // one weird server
2535 pident.domain.clear();
2536 pident.type = 0;
2537 }
2538 else {
2539 try {
2540 if(len > 12)
2541 pident.domain=DNSName(data, len, 12, false, &pident.type); // don't copy this from above - we need to do the actual read
2542 }
2543 catch(std::exception& e) {
2544 g_stats.serverParseError++; // won't be fed to lwres.cc, so we have to increment
2545 L<<Logger::Warning<<"Error in packet from remote nameserver "<< fromaddr.toStringWithPort() << ": "<<e.what() << endl;
2546 return;
2547 }
2548 }
2549 string packet;
2550 packet.assign(data, len);
2551
2552 MT_t::waiters_t::iterator iter=MT->d_waiters.find(pident);
2553 if(iter != MT->d_waiters.end()) {
2554 doResends(iter, pident, packet);
2555 }
2556
2557 retryWithName:
2558
2559 if(!MT->sendEvent(pident, &packet)) {
2560 // we do a full scan for outstanding queries on unexpected answers. not too bad since we only accept them on the right port number, which is hard enough to guess
2561 for(MT_t::waiters_t::iterator mthread=MT->d_waiters.begin(); mthread!=MT->d_waiters.end(); ++mthread) {
2562 if(pident.fd==mthread->key.fd && mthread->key.remote==pident.remote && mthread->key.type == pident.type &&
2563 pident.domain == mthread->key.domain) {
2564 mthread->key.nearMisses++;
2565 }
2566
2567 // be a bit paranoid here since we're weakening our matching
2568 if(pident.domain.empty() && !mthread->key.domain.empty() && !pident.type && mthread->key.type &&
2569 pident.id == mthread->key.id && mthread->key.remote == pident.remote) {
2570 // cerr<<"Empty response, rest matches though, sending to a waiter"<<endl;
2571 pident.domain = mthread->key.domain;
2572 pident.type = mthread->key.type;
2573 goto retryWithName; // note that this only passes on an error, lwres will still reject the packet
2574 }
2575 }
2576 g_stats.unexpectedCount++; // if we made it here, it really is an unexpected answer
2577 if(g_logCommonErrors) {
2578 L<<Logger::Warning<<"Discarding unexpected packet from "<<fromaddr.toStringWithPort()<<": "<< (pident.domain.empty() ? "<empty>" : pident.domain.toString())<<", "<<pident.type<<", "<<MT->d_waiters.size()<<" waiters"<<endl;
2579 }
2580 }
2581 else if(fd >= 0) {
2582 t_udpclientsocks->returnSocket(fd);
2583 }
2584 }
2585
2586 FDMultiplexer* getMultiplexer()
2587 {
2588 FDMultiplexer* ret;
2589 for(const auto& i : FDMultiplexer::getMultiplexerMap()) {
2590 try {
2591 ret=i.second();
2592 return ret;
2593 }
2594 catch(FDMultiplexerException &fe) {
2595 L<<Logger::Error<<"Non-fatal error initializing possible multiplexer ("<<fe.what()<<"), falling back"<<endl;
2596 }
2597 catch(...) {
2598 L<<Logger::Error<<"Non-fatal error initializing possible multiplexer"<<endl;
2599 }
2600 }
2601 L<<Logger::Error<<"No working multiplexer found!"<<endl;
2602 exit(1);
2603 }
2604
2605
2606 static string* doReloadLuaScript()
2607 {
2608 string fname= ::arg()["lua-dns-script"];
2609 try {
2610 if(fname.empty()) {
2611 t_pdl.reset();
2612 L<<Logger::Error<<t_id<<" Unloaded current lua script"<<endl;
2613 return new string("unloaded\n");
2614 }
2615 else {
2616 t_pdl = std::make_shared<RecursorLua4>(fname);
2617 }
2618 }
2619 catch(std::exception& e) {
2620 L<<Logger::Error<<t_id<<" Retaining current script, error from '"<<fname<<"': "<< e.what() <<endl;
2621 return new string("retaining current script, error from '"+fname+"': "+e.what()+"\n");
2622 }
2623
2624 L<<Logger::Warning<<t_id<<" (Re)loaded lua script from '"<<fname<<"'"<<endl;
2625 return new string("(re)loaded '"+fname+"'\n");
2626 }
2627
2628 string doQueueReloadLuaScript(vector<string>::const_iterator begin, vector<string>::const_iterator end)
2629 {
2630 if(begin != end)
2631 ::arg().set("lua-dns-script") = *begin;
2632
2633 return broadcastAccFunction<string>(doReloadLuaScript);
2634 }
2635
2636 static string* pleaseUseNewTraceRegex(const std::string& newRegex)
2637 try
2638 {
2639 if(newRegex.empty()) {
2640 t_traceRegex.reset();
2641 return new string("unset\n");
2642 }
2643 else {
2644 t_traceRegex = std::make_shared<Regex>(newRegex);
2645 return new string("ok\n");
2646 }
2647 }
2648 catch(PDNSException& ae)
2649 {
2650 return new string(ae.reason+"\n");
2651 }
2652
2653 string doTraceRegex(vector<string>::const_iterator begin, vector<string>::const_iterator end)
2654 {
2655 return broadcastAccFunction<string>(boost::bind(pleaseUseNewTraceRegex, begin!=end ? *begin : ""));
2656 }
2657
2658 static void checkLinuxIPv6Limits()
2659 {
2660 #ifdef __linux__
2661 string line;
2662 if(readFileIfThere("/proc/sys/net/ipv6/route/max_size", &line)) {
2663 int lim=std::stoi(line);
2664 if(lim < 16384) {
2665 L<<Logger::Error<<"If using IPv6, please raise sysctl net.ipv6.route.max_size, currently set to "<<lim<<" which is < 16384"<<endl;
2666 }
2667 }
2668 #endif
2669 }
2670 static void checkOrFixFDS()
2671 {
2672 unsigned int availFDs=getFilenumLimit();
2673 unsigned int wantFDs = g_maxMThreads * g_numWorkerThreads +25; // even healthier margin then before
2674
2675 if(wantFDs > availFDs) {
2676 unsigned int hardlimit= getFilenumLimit(true);
2677 if(hardlimit >= wantFDs) {
2678 setFilenumLimit(wantFDs);
2679 L<<Logger::Warning<<"Raised soft limit on number of filedescriptors to "<<wantFDs<<" to match max-mthreads and threads settings"<<endl;
2680 }
2681 else {
2682 int newval = (hardlimit - 25) / g_numWorkerThreads;
2683 L<<Logger::Warning<<"Insufficient number of filedescriptors available for max-mthreads*threads setting! ("<<hardlimit<<" < "<<wantFDs<<"), reducing max-mthreads to "<<newval<<endl;
2684 g_maxMThreads = newval;
2685 setFilenumLimit(hardlimit);
2686 }
2687 }
2688 }
2689
2690 static void* recursorThread(int tid, bool worker);
2691
2692 static void* pleaseSupplantACLs(std::shared_ptr<NetmaskGroup> ng)
2693 {
2694 t_allowFrom = ng;
2695 return nullptr;
2696 }
2697
2698 int g_argc;
2699 char** g_argv;
2700
2701 void parseACLs()
2702 {
2703 static bool l_initialized;
2704
2705 if(l_initialized) { // only reload configuration file on second call
2706 string configname=::arg()["config-dir"]+"/recursor.conf";
2707 if(::arg()["config-name"]!="") {
2708 configname=::arg()["config-dir"]+"/recursor-"+::arg()["config-name"]+".conf";
2709 }
2710 cleanSlashes(configname);
2711
2712 if(!::arg().preParseFile(configname.c_str(), "allow-from-file"))
2713 throw runtime_error("Unable to re-parse configuration file '"+configname+"'");
2714 ::arg().preParseFile(configname.c_str(), "allow-from", LOCAL_NETS);
2715 ::arg().preParseFile(configname.c_str(), "include-dir");
2716 ::arg().preParse(g_argc, g_argv, "include-dir");
2717
2718 // then process includes
2719 std::vector<std::string> extraConfigs;
2720 ::arg().gatherIncludes(extraConfigs);
2721
2722 for(const std::string& fn : extraConfigs) {
2723 if(!::arg().preParseFile(fn.c_str(), "allow-from-file", ::arg()["allow-from-file"]))
2724 throw runtime_error("Unable to re-parse configuration file include '"+fn+"'");
2725 if(!::arg().preParseFile(fn.c_str(), "allow-from", ::arg()["allow-from"]))
2726 throw runtime_error("Unable to re-parse configuration file include '"+fn+"'");
2727 }
2728
2729 ::arg().preParse(g_argc, g_argv, "allow-from-file");
2730 ::arg().preParse(g_argc, g_argv, "allow-from");
2731 }
2732
2733 std::shared_ptr<NetmaskGroup> oldAllowFrom = t_allowFrom;
2734 std::shared_ptr<NetmaskGroup> allowFrom = std::make_shared<NetmaskGroup>();
2735
2736 if(!::arg()["allow-from-file"].empty()) {
2737 string line;
2738 ifstream ifs(::arg()["allow-from-file"].c_str());
2739 if(!ifs) {
2740 throw runtime_error("Could not open '"+::arg()["allow-from-file"]+"': "+stringerror());
2741 }
2742
2743 string::size_type pos;
2744 while(getline(ifs,line)) {
2745 pos=line.find('#');
2746 if(pos!=string::npos)
2747 line.resize(pos);
2748 trim(line);
2749 if(line.empty())
2750 continue;
2751
2752 allowFrom->addMask(line);
2753 }
2754 L<<Logger::Warning<<"Done parsing " << allowFrom->size() <<" allow-from ranges from file '"<<::arg()["allow-from-file"]<<"' - overriding 'allow-from' setting"<<endl;
2755 }
2756 else if(!::arg()["allow-from"].empty()) {
2757 vector<string> ips;
2758 stringtok(ips, ::arg()["allow-from"], ", ");
2759
2760 L<<Logger::Warning<<"Only allowing queries from: ";
2761 for(vector<string>::const_iterator i = ips.begin(); i!= ips.end(); ++i) {
2762 allowFrom->addMask(*i);
2763 if(i!=ips.begin())
2764 L<<Logger::Warning<<", ";
2765 L<<Logger::Warning<<*i;
2766 }
2767 L<<Logger::Warning<<endl;
2768 }
2769 else {
2770 if(::arg()["local-address"]!="127.0.0.1" && ::arg().asNum("local-port")==53)
2771 L<<Logger::Error<<"WARNING: Allowing queries from all IP addresses - this can be a security risk!"<<endl;
2772 allowFrom = nullptr;
2773 }
2774
2775 g_initialAllowFrom = allowFrom;
2776 broadcastFunction(boost::bind(pleaseSupplantACLs, allowFrom));
2777 oldAllowFrom = nullptr;
2778
2779 l_initialized = true;
2780 }
2781
2782
2783 static void setupDelegationOnly()
2784 {
2785 vector<string> parts;
2786 stringtok(parts, ::arg()["delegation-only"], ", \t");
2787 for(const auto& p : parts) {
2788 SyncRes::addDelegationOnly(DNSName(p));
2789 }
2790 }
2791
2792 static std::map<unsigned int, std::set<int> > parseCPUMap()
2793 {
2794 std::map<unsigned int, std::set<int> > result;
2795
2796 const std::string value = ::arg()["cpu-map"];
2797
2798 if (!value.empty() && !isSettingThreadCPUAffinitySupported()) {
2799 L<<Logger::Warning<<"CPU mapping requested but not supported, skipping"<<endl;
2800 return result;
2801 }
2802
2803 std::vector<std::string> parts;
2804
2805 stringtok(parts, value, " \t");
2806
2807 for(const auto& part : parts) {
2808 if (part.find('=') == string::npos)
2809 continue;
2810
2811 try {
2812 auto headers = splitField(part, '=');
2813 trim(headers.first);
2814 trim(headers.second);
2815
2816 unsigned int threadId = pdns_stou(headers.first);
2817 std::vector<std::string> cpus;
2818
2819 stringtok(cpus, headers.second, ",");
2820
2821 for(const auto& cpu : cpus) {
2822 int cpuId = std::stoi(cpu);
2823
2824 result[threadId].insert(cpuId);
2825 }
2826 }
2827 catch(const std::exception& e) {
2828 L<<Logger::Error<<"Error parsing cpu-map entry '"<<part<<"': "<<e.what()<<endl;
2829 }
2830 }
2831
2832 return result;
2833 }
2834
2835 static void setCPUMap(const std::map<unsigned int, std::set<int> >& cpusMap, unsigned int n, pthread_t tid)
2836 {
2837 const auto& cpuMapping = cpusMap.find(n);
2838 if (cpuMapping != cpusMap.cend()) {
2839 int rc = mapThreadToCPUList(tid, cpuMapping->second);
2840 if (rc == 0) {
2841 L<<Logger::Info<<"CPU affinity for worker "<<n<<" has been set to CPU map:";
2842 for (const auto cpu : cpuMapping->second) {
2843 L<<Logger::Info<<" "<<cpu;
2844 }
2845 L<<Logger::Info<<endl;
2846 }
2847 else {
2848 L<<Logger::Warning<<"Error setting CPU affinity for worker "<<n<<" to CPU map:";
2849 for (const auto cpu : cpuMapping->second) {
2850 L<<Logger::Info<<" "<<cpu;
2851 }
2852 L<<Logger::Info<<strerror(rc)<<endl;
2853 }
2854 }
2855 }
2856
2857 static int serviceMain(int argc, char*argv[])
2858 {
2859 L.setName(s_programname);
2860 L.disableSyslog(::arg().mustDo("disable-syslog"));
2861 L.setTimestamps(::arg().mustDo("log-timestamp"));
2862
2863 if(!::arg()["logging-facility"].empty()) {
2864 int val=logFacilityToLOG(::arg().asNum("logging-facility") );
2865 if(val >= 0)
2866 theL().setFacility(val);
2867 else
2868 L<<Logger::Error<<"Unknown logging facility "<<::arg().asNum("logging-facility") <<endl;
2869 }
2870
2871 showProductVersion();
2872 seedRandom(::arg()["entropy-source"]);
2873
2874 g_disthashseed=dns_random(0xffffffff);
2875
2876 checkLinuxIPv6Limits();
2877 try {
2878 vector<string> addrs;
2879 if(!::arg()["query-local-address6"].empty()) {
2880 SyncRes::s_doIPv6=true;
2881 L<<Logger::Warning<<"Enabling IPv6 transport for outgoing queries"<<endl;
2882
2883 stringtok(addrs, ::arg()["query-local-address6"], ", ;");
2884 for(const string& addr : addrs) {
2885 g_localQueryAddresses6.push_back(ComboAddress(addr));
2886 }
2887 }
2888 else {
2889 L<<Logger::Warning<<"NOT using IPv6 for outgoing queries - set 'query-local-address6=::' to enable"<<endl;
2890 }
2891 addrs.clear();
2892 stringtok(addrs, ::arg()["query-local-address"], ", ;");
2893 for(const string& addr : addrs) {
2894 g_localQueryAddresses4.push_back(ComboAddress(addr));
2895 }
2896 }
2897 catch(std::exception& e) {
2898 L<<Logger::Error<<"Assigning local query addresses: "<<e.what();
2899 exit(99);
2900 }
2901
2902 // keep this ABOVE loadRecursorLuaConfig!
2903 if(::arg()["dnssec"]=="off")
2904 g_dnssecmode=DNSSECMode::Off;
2905 else if(::arg()["dnssec"]=="process-no-validate")
2906 g_dnssecmode=DNSSECMode::ProcessNoValidate;
2907 else if(::arg()["dnssec"]=="process")
2908 g_dnssecmode=DNSSECMode::Process;
2909 else if(::arg()["dnssec"]=="validate")
2910 g_dnssecmode=DNSSECMode::ValidateAll;
2911 else if(::arg()["dnssec"]=="log-fail")
2912 g_dnssecmode=DNSSECMode::ValidateForLog;
2913 else {
2914 L<<Logger::Error<<"Unknown DNSSEC mode "<<::arg()["dnssec"]<<endl;
2915 exit(1);
2916 }
2917
2918 g_dnssecLogBogus = ::arg().mustDo("dnssec-log-bogus");
2919 g_maxNSEC3Iterations = ::arg().asNum("nsec3-max-iterations");
2920
2921 g_maxCacheEntries = ::arg().asNum("max-cache-entries");
2922 g_maxPacketCacheEntries = ::arg().asNum("max-packetcache-entries");
2923
2924 try {
2925 loadRecursorLuaConfig(::arg()["lua-config-file"], ::arg().mustDo("daemon"));
2926 }
2927 catch (PDNSException &e) {
2928 L<<Logger::Error<<"Cannot load Lua configuration: "<<e.reason<<endl;
2929 exit(1);
2930 }
2931
2932 parseACLs();
2933 sortPublicSuffixList();
2934
2935 if(!::arg()["dont-query"].empty()) {
2936 vector<string> ips;
2937 stringtok(ips, ::arg()["dont-query"], ", ");
2938 ips.push_back("0.0.0.0");
2939 ips.push_back("::");
2940
2941 L<<Logger::Warning<<"Will not send queries to: ";
2942 for(vector<string>::const_iterator i = ips.begin(); i!= ips.end(); ++i) {
2943 SyncRes::addDontQuery(*i);
2944 if(i!=ips.begin())
2945 L<<Logger::Warning<<", ";
2946 L<<Logger::Warning<<*i;
2947 }
2948 L<<Logger::Warning<<endl;
2949 }
2950
2951 g_quiet=::arg().mustDo("quiet");
2952
2953 g_weDistributeQueries = ::arg().mustDo("pdns-distributes-queries");
2954 if(g_weDistributeQueries) {
2955 L<<Logger::Warning<<"PowerDNS Recursor itself will distribute queries over threads"<<endl;
2956 }
2957
2958 setupDelegationOnly();
2959 g_outgoingEDNSBufsize=::arg().asNum("edns-outgoing-bufsize");
2960
2961 if(::arg()["trace"]=="fail") {
2962 SyncRes::setDefaultLogMode(SyncRes::Store);
2963 }
2964 else if(::arg().mustDo("trace")) {
2965 SyncRes::setDefaultLogMode(SyncRes::Log);
2966 ::arg().set("quiet")="no";
2967 g_quiet=false;
2968 g_dnssecLOG=true;
2969 }
2970
2971 SyncRes::s_minimumTTL = ::arg().asNum("minimum-ttl-override");
2972
2973 SyncRes::s_nopacketcache = ::arg().mustDo("disable-packetcache");
2974
2975 SyncRes::s_maxnegttl=::arg().asNum("max-negative-ttl");
2976 SyncRes::s_maxcachettl=max(::arg().asNum("max-cache-ttl"), 15);
2977 SyncRes::s_packetcachettl=::arg().asNum("packetcache-ttl");
2978 // Cap the packetcache-servfail-ttl to the packetcache-ttl
2979 uint32_t packetCacheServFailTTL = ::arg().asNum("packetcache-servfail-ttl");
2980 SyncRes::s_packetcacheservfailttl=(packetCacheServFailTTL > SyncRes::s_packetcachettl) ? SyncRes::s_packetcachettl : packetCacheServFailTTL;
2981 SyncRes::s_serverdownmaxfails=::arg().asNum("server-down-max-fails");
2982 SyncRes::s_serverdownthrottletime=::arg().asNum("server-down-throttle-time");
2983 SyncRes::s_serverID=::arg()["server-id"];
2984 SyncRes::s_maxqperq=::arg().asNum("max-qperq");
2985 SyncRes::s_maxtotusec=1000*::arg().asNum("max-total-msec");
2986 SyncRes::s_maxdepth=::arg().asNum("max-recursion-depth");
2987 SyncRes::s_rootNXTrust = ::arg().mustDo( "root-nx-trust");
2988 if(SyncRes::s_serverID.empty()) {
2989 char tmp[128];
2990 gethostname(tmp, sizeof(tmp)-1);
2991 SyncRes::s_serverID=tmp;
2992 }
2993
2994 SyncRes::s_ecsipv4limit = ::arg().asNum("ecs-ipv4-bits");
2995 SyncRes::s_ecsipv6limit = ::arg().asNum("ecs-ipv6-bits");
2996
2997 if (!::arg().isEmpty("ecs-scope-zero-address")) {
2998 ComboAddress scopeZero(::arg()["ecs-scope-zero-address"]);
2999 SyncRes::setECSScopeZeroAddress(Netmask(scopeZero, scopeZero.isIPv4() ? 32 : 128));
3000 }
3001 else {
3002 bool found = false;
3003 for (const auto& addr : g_localQueryAddresses4) {
3004 if (!IsAnyAddress(addr)) {
3005 SyncRes::setECSScopeZeroAddress(Netmask(addr, 32));
3006 found = true;
3007 break;
3008 }
3009 }
3010 if (!found) {
3011 for (const auto& addr : g_localQueryAddresses6) {
3012 if (!IsAnyAddress(addr)) {
3013 SyncRes::setECSScopeZeroAddress(Netmask(addr, 128));
3014 found = true;
3015 break;
3016 }
3017 }
3018 if (!found) {
3019 SyncRes::setECSScopeZeroAddress(Netmask("127.0.0.1/32"));
3020 }
3021 }
3022 }
3023
3024 g_networkTimeoutMsec = ::arg().asNum("network-timeout");
3025
3026 g_initialDomainMap = parseAuthAndForwards();
3027
3028 g_latencyStatSize=::arg().asNum("latency-statistic-size");
3029
3030 g_logCommonErrors=::arg().mustDo("log-common-errors");
3031 g_logRPZChanges = ::arg().mustDo("log-rpz-changes");
3032
3033 g_anyToTcp = ::arg().mustDo("any-to-tcp");
3034 g_udpTruncationThreshold = ::arg().asNum("udp-truncation-threshold");
3035
3036 g_lowercaseOutgoing = ::arg().mustDo("lowercase-outgoing");
3037
3038 g_numWorkerThreads = ::arg().asNum("threads");
3039 if (g_numWorkerThreads < 1) {
3040 L<<Logger::Warning<<"Asked to run with 0 threads, raising to 1 instead"<<endl;
3041 g_numWorkerThreads = 1;
3042 }
3043
3044 g_numThreads = g_numWorkerThreads + g_weDistributeQueries;
3045 g_maxMThreads = ::arg().asNum("max-mthreads");
3046
3047 g_gettagNeedsEDNSOptions = ::arg().mustDo("gettag-needs-edns-options");
3048
3049 g_statisticsInterval = ::arg().asNum("statistics-interval");
3050
3051 #ifdef SO_REUSEPORT
3052 g_reusePort = ::arg().mustDo("reuseport");
3053 #endif
3054
3055 g_useOneSocketPerThread = (!g_weDistributeQueries && g_reusePort);
3056
3057 if (g_useOneSocketPerThread) {
3058 for (unsigned int threadId = 0; threadId < g_numWorkerThreads; threadId++) {
3059 makeUDPServerSockets(threadId);
3060 makeTCPServerSockets(threadId);
3061 }
3062 }
3063 else {
3064 makeUDPServerSockets(0);
3065 makeTCPServerSockets(0);
3066 }
3067
3068 SyncRes::parseEDNSSubnetWhitelist(::arg()["edns-subnet-whitelist"]);
3069 g_useIncomingECS = ::arg().mustDo("use-incoming-edns-subnet");
3070
3071 int forks;
3072 for(forks = 0; forks < ::arg().asNum("processes") - 1; ++forks) {
3073 if(!fork()) // we are child
3074 break;
3075 }
3076
3077 if(::arg().mustDo("daemon")) {
3078 L<<Logger::Warning<<"Calling daemonize, going to background"<<endl;
3079 L.toConsole(Logger::Critical);
3080 daemonize();
3081 loadRecursorLuaConfig(::arg()["lua-config-file"], false);
3082 }
3083 signal(SIGUSR1,usr1Handler);
3084 signal(SIGUSR2,usr2Handler);
3085 signal(SIGPIPE,SIG_IGN);
3086
3087 checkOrFixFDS();
3088
3089 #ifdef HAVE_LIBSODIUM
3090 if (sodium_init() == -1) {
3091 L<<Logger::Error<<"Unable to initialize sodium crypto library"<<endl;
3092 exit(99);
3093 }
3094 #endif
3095
3096 openssl_thread_setup();
3097 openssl_seed();
3098
3099 int newgid=0;
3100 if(!::arg()["setgid"].empty())
3101 newgid=Utility::makeGidNumeric(::arg()["setgid"]);
3102 int newuid=0;
3103 if(!::arg()["setuid"].empty())
3104 newuid=Utility::makeUidNumeric(::arg()["setuid"]);
3105
3106 Utility::dropGroupPrivs(newuid, newgid);
3107
3108 if (!::arg()["chroot"].empty()) {
3109 #ifdef HAVE_SYSTEMD
3110 char *ns;
3111 ns = getenv("NOTIFY_SOCKET");
3112 if (ns != nullptr) {
3113 L<<Logger::Error<<"Unable to chroot when running from systemd. Please disable chroot= or set the 'Type' for this service to 'simple'"<<endl;
3114 exit(1);
3115 }
3116 #endif
3117 if (chroot(::arg()["chroot"].c_str())<0 || chdir("/") < 0) {
3118 L<<Logger::Error<<"Unable to chroot to '"+::arg()["chroot"]+"': "<<strerror (errno)<<", exiting"<<endl;
3119 exit(1);
3120 }
3121 else
3122 L<<Logger::Error<<"Chrooted to '"<<::arg()["chroot"]<<"'"<<endl;
3123 }
3124
3125 s_pidfname=::arg()["socket-dir"]+"/"+s_programname+".pid";
3126 if(!s_pidfname.empty())
3127 unlink(s_pidfname.c_str()); // remove possible old pid file
3128 writePid();
3129
3130 makeControlChannelSocket( ::arg().asNum("processes") > 1 ? forks : -1);
3131
3132 Utility::dropUserPrivs(newuid);
3133
3134 makeThreadPipes();
3135
3136 g_tcpTimeout=::arg().asNum("client-tcp-timeout");
3137 g_maxTCPPerClient=::arg().asNum("max-tcp-per-client");
3138 g_tcpMaxQueriesPerConn=::arg().asNum("max-tcp-queries-per-connection");
3139
3140 if (::arg().mustDo("snmp-agent")) {
3141 g_snmpAgent = std::make_shared<RecursorSNMPAgent>("recursor", ::arg()["snmp-master-socket"]);
3142 g_snmpAgent->run();
3143 }
3144
3145 /* This thread handles the web server, carbon, statistics and the control channel */
3146 std::thread handlerThread(recursorThread, -1, false);
3147
3148 const auto cpusMap = parseCPUMap();
3149
3150 std::vector<std::thread> workers(g_numThreads);
3151 if(g_numThreads == 1) {
3152 L<<Logger::Warning<<"Operating unthreaded"<<endl;
3153 #ifdef HAVE_SYSTEMD
3154 sd_notify(0, "READY=1");
3155 #endif
3156 setCPUMap(cpusMap, 0, pthread_self());
3157 recursorThread(0, true);
3158 }
3159 else {
3160 L<<Logger::Warning<<"Launching "<< g_numThreads <<" threads"<<endl;
3161 for(unsigned int n=0; n < g_numThreads; ++n) {
3162 workers[n] = std::thread(recursorThread, n, true);
3163
3164 setCPUMap(cpusMap, n, workers[n].native_handle());
3165 }
3166 #ifdef HAVE_SYSTEMD
3167 sd_notify(0, "READY=1");
3168 #endif
3169 workers.back().join();
3170 }
3171 return 0;
3172 }
3173
3174 static void* recursorThread(int n, bool worker)
3175 try
3176 {
3177 t_id=n;
3178 SyncRes tmp(g_now); // make sure it allocates tsstorage before we do anything, like primeHints or so..
3179 SyncRes::setDomainMap(g_initialDomainMap);
3180 t_allowFrom = g_initialAllowFrom;
3181 t_udpclientsocks = std::unique_ptr<UDPClientSocks>(new UDPClientSocks());
3182 t_tcpClientCounts = std::unique_ptr<tcpClientCounts_t>(new tcpClientCounts_t());
3183 primeHints();
3184
3185 t_packetCache = std::unique_ptr<RecursorPacketCache>(new RecursorPacketCache());
3186
3187 #ifdef HAVE_PROTOBUF
3188 t_uuidGenerator = std::unique_ptr<boost::uuids::random_generator>(new boost::uuids::random_generator());
3189 #endif
3190 L<<Logger::Warning<<"Done priming cache with root hints"<<endl;
3191
3192 try {
3193 if(!::arg()["lua-dns-script"].empty()) {
3194 t_pdl = std::make_shared<RecursorLua4>(::arg()["lua-dns-script"]);
3195 L<<Logger::Warning<<"Loaded 'lua' script from '"<<::arg()["lua-dns-script"]<<"'"<<endl;
3196 }
3197 }
3198 catch(std::exception &e) {
3199 L<<Logger::Error<<"Failed to load 'lua' script from '"<<::arg()["lua-dns-script"]<<"': "<<e.what()<<endl;
3200 _exit(99);
3201 }
3202
3203 unsigned int ringsize=::arg().asNum("stats-ringbuffer-entries") / g_numWorkerThreads;
3204 if(ringsize) {
3205 t_remotes = std::unique_ptr<addrringbuf_t>(new addrringbuf_t());
3206 if(g_weDistributeQueries) // if so, only 1 thread does recvfrom
3207 t_remotes->set_capacity(::arg().asNum("stats-ringbuffer-entries"));
3208 else
3209 t_remotes->set_capacity(ringsize);
3210 t_servfailremotes = std::unique_ptr<addrringbuf_t>(new addrringbuf_t());
3211 t_servfailremotes->set_capacity(ringsize);
3212 t_largeanswerremotes = std::unique_ptr<addrringbuf_t>(new addrringbuf_t());
3213 t_largeanswerremotes->set_capacity(ringsize);
3214
3215 t_queryring = std::unique_ptr<boost::circular_buffer<pair<DNSName, uint16_t> > >(new boost::circular_buffer<pair<DNSName, uint16_t> >());
3216 t_queryring->set_capacity(ringsize);
3217 t_servfailqueryring = std::unique_ptr<boost::circular_buffer<pair<DNSName, uint16_t> > >(new boost::circular_buffer<pair<DNSName, uint16_t> >());
3218 t_servfailqueryring->set_capacity(ringsize);
3219 }
3220
3221 MT=std::unique_ptr<MTasker<PacketID,string> >(new MTasker<PacketID,string>(::arg().asNum("stack-size")));
3222
3223 PacketID pident;
3224
3225 t_fdm=getMultiplexer();
3226
3227 if(!worker) {
3228 if(::arg().mustDo("webserver")) {
3229 L<<Logger::Warning << "Enabling web server" << endl;
3230 try {
3231 new RecursorWebServer(t_fdm);
3232 }
3233 catch(PDNSException &e) {
3234 L<<Logger::Error<<"Exception: "<<e.reason<<endl;
3235 exit(99);
3236 }
3237 }
3238 L<<Logger::Error<<"Enabled '"<< t_fdm->getName() << "' multiplexer"<<endl;
3239 }
3240 else {
3241 t_fdm->addReadFD(g_pipes[t_id].readToThread, handlePipeRequest);
3242
3243 if(g_useOneSocketPerThread) {
3244 for(deferredAdd_t::const_iterator i = deferredAdds[t_id].cbegin(); i != deferredAdds[t_id].cend(); ++i) {
3245 t_fdm->addReadFD(i->first, i->second);
3246 }
3247 }
3248 else {
3249 if(!g_weDistributeQueries || !t_id) { // if we distribute queries, only t_id = 0 listens
3250 for(deferredAdd_t::const_iterator i = deferredAdds[0].cbegin(); i != deferredAdds[0].cend(); ++i) {
3251 t_fdm->addReadFD(i->first, i->second);
3252 }
3253 }
3254 }
3255 }
3256
3257 registerAllStats();
3258
3259 if(!worker) {
3260 t_fdm->addReadFD(s_rcc.d_fd, handleRCC); // control channel
3261 }
3262
3263 unsigned int maxTcpClients=::arg().asNum("max-tcp-clients");
3264
3265 bool listenOnTCP(true);
3266
3267 time_t last_carbon=0;
3268 time_t carbonInterval=::arg().asNum("carbon-interval");
3269 counter.store(0); // used to periodically execute certain tasks
3270 for(;;) {
3271 while(MT->schedule(&g_now)); // MTasker letting the mthreads do their thing
3272
3273 if(!(counter%500)) {
3274 MT->makeThread(houseKeeping, 0);
3275 }
3276
3277 if(!(counter%55)) {
3278 typedef vector<pair<int, FDMultiplexer::funcparam_t> > expired_t;
3279 expired_t expired=t_fdm->getTimeouts(g_now);
3280
3281 for(expired_t::iterator i=expired.begin() ; i != expired.end(); ++i) {
3282 shared_ptr<TCPConnection> conn=any_cast<shared_ptr<TCPConnection> >(i->second);
3283 if(g_logCommonErrors)
3284 L<<Logger::Warning<<"Timeout from remote TCP client "<< conn->d_remote.toString() <<endl;
3285 t_fdm->removeReadFD(i->first);
3286 }
3287 }
3288
3289 counter++;
3290
3291 if(!worker && statsWanted) {
3292 doStats();
3293 }
3294
3295 Utility::gettimeofday(&g_now, 0);
3296
3297 if(!worker && (g_now.tv_sec - last_carbon >= carbonInterval)) {
3298 MT->makeThread(doCarbonDump, 0);
3299 last_carbon = g_now.tv_sec;
3300 }
3301
3302 t_fdm->run(&g_now);
3303 // 'run' updates g_now for us
3304
3305 if(worker && (!g_weDistributeQueries || !t_id)) { // if pdns distributes queries, only tid 0 should do this
3306 if(listenOnTCP) {
3307 if(TCPConnection::getCurrentConnections() > maxTcpClients) { // shutdown, too many connections
3308 for(tcpListenSockets_t::iterator i=g_tcpListenSockets.begin(); i != g_tcpListenSockets.end(); ++i)
3309 t_fdm->removeReadFD(*i);
3310 listenOnTCP=false;
3311 }
3312 }
3313 else {
3314 if(TCPConnection::getCurrentConnections() <= maxTcpClients) { // reenable
3315 for(tcpListenSockets_t::iterator i=g_tcpListenSockets.begin(); i != g_tcpListenSockets.end(); ++i)
3316 t_fdm->addReadFD(*i, handleNewTCPQuestion);
3317 listenOnTCP=true;
3318 }
3319 }
3320 }
3321 }
3322 }
3323 catch(PDNSException &ae) {
3324 L<<Logger::Error<<"Exception: "<<ae.reason<<endl;
3325 return 0;
3326 }
3327 catch(std::exception &e) {
3328 L<<Logger::Error<<"STL Exception: "<<e.what()<<endl;
3329 return 0;
3330 }
3331 catch(...) {
3332 L<<Logger::Error<<"any other exception in main: "<<endl;
3333 return 0;
3334 }
3335
3336
3337 int main(int argc, char **argv)
3338 {
3339 g_argc = argc;
3340 g_argv = argv;
3341 g_stats.startupTime=time(0);
3342 versionSetProduct(ProductRecursor);
3343 reportBasicTypes();
3344 reportOtherTypes();
3345
3346 int ret = EXIT_SUCCESS;
3347
3348 try {
3349 ::arg().set("stack-size","stack size per mthread")="200000";
3350 ::arg().set("soa-minimum-ttl","Don't change")="0";
3351 ::arg().set("no-shuffle","Don't change")="off";
3352 ::arg().set("local-port","port to listen on")="53";
3353 ::arg().set("local-address","IP addresses to listen on, separated by spaces or commas. Also accepts ports.")="127.0.0.1";
3354 ::arg().setSwitch("non-local-bind", "Enable binding to non-local addresses by using FREEBIND / BINDANY socket options")="no";
3355 ::arg().set("trace","if we should output heaps of logging. set to 'fail' to only log failing domains")="off";
3356 ::arg().set("dnssec", "DNSSEC mode: off/process-no-validate (default)/process/log-fail/validate")="process-no-validate";
3357 ::arg().set("dnssec-log-bogus", "Log DNSSEC bogus validations")="no";
3358 ::arg().set("daemon","Operate as a daemon")="no";
3359 ::arg().setSwitch("write-pid","Write a PID file")="yes";
3360 ::arg().set("loglevel","Amount of logging. Higher is more. Do not set below 3")="6";
3361 ::arg().set("disable-syslog","Disable logging to syslog, useful when running inside a supervisor that logs stdout")="no";
3362 ::arg().set("log-timestamp","Print timestamps in log lines, useful to disable when running with a tool that timestamps stdout already")="yes";
3363 ::arg().set("log-common-errors","If we should log rather common errors")="no";
3364 ::arg().set("chroot","switch to chroot jail")="";
3365 ::arg().set("setgid","If set, change group id to this gid for more security")="";
3366 ::arg().set("setuid","If set, change user id to this uid for more security")="";
3367 ::arg().set("network-timeout", "Wait this number of milliseconds for network i/o")="1500";
3368 ::arg().set("threads", "Launch this number of threads")="2";
3369 ::arg().set("processes", "Launch this number of processes (EXPERIMENTAL, DO NOT CHANGE)")="1"; // if we un-experimental this, need to fix openssl rand seeding for multiple PIDs!
3370 ::arg().set("config-name","Name of this virtual configuration - will rename the binary image")="";
3371 ::arg().set("api-config-dir", "Directory where REST API stores config and zones") = "";
3372 ::arg().set("api-key", "Static pre-shared authentication key for access to the REST API") = "";
3373 ::arg().set("api-logfile", "Location of the server logfile (used by the REST API)") = "/var/log/pdns.log";
3374 ::arg().set("api-readonly", "Disallow data modification through the REST API when set") = "no";
3375 ::arg().setSwitch("webserver", "Start a webserver (for REST API)") = "no";
3376 ::arg().set("webserver-address", "IP Address of webserver to listen on") = "127.0.0.1";
3377 ::arg().set("webserver-port", "Port of webserver to listen on") = "8082";
3378 ::arg().set("webserver-password", "Password required for accessing the webserver") = "";
3379 ::arg().set("webserver-allow-from","Webserver access is only allowed from these subnets")="127.0.0.1,::1";
3380 ::arg().set("carbon-ourname", "If set, overrides our reported hostname for carbon stats")="";
3381 ::arg().set("carbon-server", "If set, send metrics in carbon (graphite) format to this server IP address")="";
3382 ::arg().set("carbon-interval", "Number of seconds between carbon (graphite) updates")="30";
3383 ::arg().set("statistics-interval", "Number of seconds between printing of recursor statistics, 0 to disable")="1800";
3384 ::arg().set("quiet","Suppress logging of questions and answers")="";
3385 ::arg().set("logging-facility","Facility to log messages as. 0 corresponds to local0")="";
3386 ::arg().set("config-dir","Location of configuration directory (recursor.conf)")=SYSCONFDIR;
3387 ::arg().set("socket-owner","Owner of socket")="";
3388 ::arg().set("socket-group","Group of socket")="";
3389 ::arg().set("socket-mode", "Permissions for socket")="";
3390
3391 ::arg().set("socket-dir",string("Where the controlsocket will live, ")+LOCALSTATEDIR+" when unset and not chrooted" )="";
3392 ::arg().set("delegation-only","Which domains we only accept delegations from")="";
3393 ::arg().set("query-local-address","Source IP address for sending queries")="0.0.0.0";
3394 ::arg().set("query-local-address6","Source IPv6 address for sending queries. IF UNSET, IPv6 WILL NOT BE USED FOR OUTGOING QUERIES")="";
3395 ::arg().set("client-tcp-timeout","Timeout in seconds when talking to TCP clients")="2";
3396 ::arg().set("max-mthreads", "Maximum number of simultaneous Mtasker threads")="2048";
3397 ::arg().set("max-tcp-clients","Maximum number of simultaneous TCP clients")="128";
3398 ::arg().set("server-down-max-fails","Maximum number of consecutive timeouts (and unreachables) to mark a server as down ( 0 => disabled )")="64";
3399 ::arg().set("server-down-throttle-time","Number of seconds to throttle all queries to a server after being marked as down")="60";
3400 ::arg().set("hint-file", "If set, load root hints from this file")="";
3401 ::arg().set("max-cache-entries", "If set, maximum number of entries in the main cache")="1000000";
3402 ::arg().set("max-negative-ttl", "maximum number of seconds to keep a negative cached entry in memory")="3600";
3403 ::arg().set("max-cache-ttl", "maximum number of seconds to keep a cached entry in memory")="86400";
3404 ::arg().set("packetcache-ttl", "maximum number of seconds to keep a cached entry in packetcache")="3600";
3405 ::arg().set("max-packetcache-entries", "maximum number of entries to keep in the packetcache")="500000";
3406 ::arg().set("packetcache-servfail-ttl", "maximum number of seconds to keep a cached servfail entry in packetcache")="60";
3407 ::arg().set("server-id", "Returned when queried for 'id.server' TXT or NSID, defaults to hostname")="";
3408 ::arg().set("stats-ringbuffer-entries", "maximum number of packets to store statistics for")="10000";
3409 ::arg().set("version-string", "string reported on version.pdns or version.bind")=fullVersionString();
3410 ::arg().set("allow-from", "If set, only allow these comma separated netmasks to recurse")=LOCAL_NETS;
3411 ::arg().set("allow-from-file", "If set, load allowed netmasks from this file")="";
3412 ::arg().set("entropy-source", "If set, read entropy from this file")="/dev/urandom";
3413 ::arg().set("dont-query", "If set, do not query these netmasks for DNS data")=DONT_QUERY;
3414 ::arg().set("max-tcp-per-client", "If set, maximum number of TCP sessions per client (IP address)")="0";
3415 ::arg().set("max-tcp-queries-per-connection", "If set, maximum number of TCP queries in a TCP connection")="0";
3416 ::arg().set("spoof-nearmiss-max", "If non-zero, assume spoofing after this many near misses")="20";
3417 ::arg().set("single-socket", "If set, only use a single socket for outgoing queries")="off";
3418 ::arg().set("auth-zones", "Zones for which we have authoritative data, comma separated domain=file pairs ")="";
3419 ::arg().set("lua-config-file", "More powerful configuration options")="";
3420
3421 ::arg().set("forward-zones", "Zones for which we forward queries, comma separated domain=ip pairs")="";
3422 ::arg().set("forward-zones-recurse", "Zones for which we forward queries with recursion bit, comma separated domain=ip pairs")="";
3423 ::arg().set("forward-zones-file", "File with (+)domain=ip pairs for forwarding")="";
3424 ::arg().set("export-etc-hosts", "If we should serve up contents from /etc/hosts")="off";
3425 ::arg().set("export-etc-hosts-search-suffix", "Also serve up the contents of /etc/hosts with this suffix")="";
3426 ::arg().set("etc-hosts-file", "Path to 'hosts' file")="/etc/hosts";
3427 ::arg().set("serve-rfc1918", "If we should be authoritative for RFC 1918 private IP space")="yes";
3428 ::arg().set("lua-dns-script", "Filename containing an optional 'lua' script that will be used to modify dns answers")="";
3429 ::arg().set("latency-statistic-size","Number of latency values to calculate the qa-latency average")="10000";
3430 ::arg().setSwitch( "disable-packetcache", "Disable packetcache" )= "no";
3431 ::arg().set("ecs-ipv4-bits", "Number of bits of IPv4 address to pass for EDNS Client Subnet")="24";
3432 ::arg().set("ecs-ipv6-bits", "Number of bits of IPv6 address to pass for EDNS Client Subnet")="56";
3433 ::arg().set("edns-subnet-whitelist", "List of netmasks and domains that we should enable EDNS subnet for")="";
3434 ::arg().set("ecs-scope-zero-address", "Address to send to whitelisted authoritative servers for incoming queries with ECS prefix-length source of 0")="";
3435 ::arg().setSwitch( "use-incoming-edns-subnet", "Pass along received EDNS Client Subnet information")="no";
3436 ::arg().setSwitch( "pdns-distributes-queries", "If PowerDNS itself should distribute queries over threads")="yes";
3437 ::arg().setSwitch( "root-nx-trust", "If set, believe that an NXDOMAIN from the root means the TLD does not exist")="yes";
3438 ::arg().setSwitch( "any-to-tcp","Answer ANY queries with tc=1, shunting to TCP" )="no";
3439 ::arg().setSwitch( "lowercase-outgoing","Force outgoing questions to lowercase")="no";
3440 ::arg().setSwitch("gettag-needs-edns-options", "If EDNS Options should be extracted before calling the gettag() hook")="no";
3441 ::arg().set("udp-truncation-threshold", "Maximum UDP response size before we truncate")="1680";
3442 ::arg().set("edns-outgoing-bufsize", "Outgoing EDNS buffer size")="1680";
3443 ::arg().set("minimum-ttl-override", "Set under adverse conditions, a minimum TTL")="0";
3444 ::arg().set("max-qperq", "Maximum outgoing queries per query")="50";
3445 ::arg().set("max-total-msec", "Maximum total wall-clock time per query in milliseconds, 0 for unlimited")="7000";
3446 ::arg().set("max-recursion-depth", "Maximum number of internal recursion calls per query, 0 for unlimited")="40";
3447
3448 ::arg().set("include-dir","Include *.conf files from this directory")="";
3449 ::arg().set("security-poll-suffix","Domain name from which to query security update notifications")="secpoll.powerdns.com.";
3450
3451 ::arg().setSwitch("reuseport","Enable SO_REUSEPORT allowing multiple recursors processes to listen to 1 address")="no";
3452
3453 ::arg().setSwitch("snmp-agent", "If set, register as an SNMP agent")="no";
3454 ::arg().set("snmp-master-socket", "If set and snmp-agent is set, the socket to use to register to the SNMP master")="";
3455
3456 ::arg().set("tcp-fast-open", "Enable TCP Fast Open support on the listening sockets, using the supplied numerical value as the queue size")="0";
3457 ::arg().set("nsec3-max-iterations", "Maximum number of iterations allowed for an NSEC3 record")="2500";
3458
3459 ::arg().set("cpu-map", "Thread to CPU mapping, space separated thread-id=cpu1,cpu2..cpuN pairs")="";
3460
3461 ::arg().setSwitch("log-rpz-changes", "Log additions and removals to RPZ zones at Info level")="no";
3462
3463 ::arg().setCmd("help","Provide a helpful message");
3464 ::arg().setCmd("version","Print version string");
3465 ::arg().setCmd("config","Output blank configuration");
3466 L.toConsole(Logger::Info);
3467 ::arg().laxParse(argc,argv); // do a lax parse
3468
3469 string configname=::arg()["config-dir"]+"/recursor.conf";
3470 if(::arg()["config-name"]!="") {
3471 configname=::arg()["config-dir"]+"/recursor-"+::arg()["config-name"]+".conf";
3472 s_programname+="-"+::arg()["config-name"];
3473 }
3474 cleanSlashes(configname);
3475
3476 if(::arg().mustDo("config")) {
3477 cout<<::arg().configstring()<<endl;
3478 exit(0);
3479 }
3480
3481 if(!::arg().file(configname.c_str()))
3482 L<<Logger::Warning<<"Unable to parse configuration file '"<<configname<<"'"<<endl;
3483
3484 ::arg().parse(argc,argv);
3485
3486 if( !::arg()["chroot"].empty() && !::arg()["api-config-dir"].empty() && !::arg().mustDo("api-readonly") ) {
3487 L<<Logger::Error<<"Using chroot and a writable API is not possible"<<endl;
3488 exit(EXIT_FAILURE);
3489 }
3490
3491 if (::arg()["socket-dir"].empty()) {
3492 if (::arg()["chroot"].empty())
3493 ::arg().set("socket-dir") = LOCALSTATEDIR;
3494 else
3495 ::arg().set("socket-dir") = "/";
3496 }
3497
3498 ::arg().set("delegation-only")=toLower(::arg()["delegation-only"]);
3499
3500 if(::arg().asNum("threads")==1)
3501 ::arg().set("pdns-distributes-queries")="no";
3502
3503 if(::arg().mustDo("help")) {
3504 cout<<"syntax:"<<endl<<endl;
3505 cout<<::arg().helpstring(::arg()["help"])<<endl;
3506 exit(0);
3507 }
3508 if(::arg().mustDo("version")) {
3509 showProductVersion();
3510 showBuildConfiguration();
3511 exit(0);
3512 }
3513
3514 Logger::Urgency logUrgency = (Logger::Urgency)::arg().asNum("loglevel");
3515
3516 if (logUrgency < Logger::Error)
3517 logUrgency = Logger::Error;
3518 if(!g_quiet && logUrgency < Logger::Info) { // Logger::Info=6, Logger::Debug=7
3519 logUrgency = Logger::Info; // if you do --quiet=no, you need Info to also see the query log
3520 }
3521 L.setLoglevel(logUrgency);
3522 L.toConsole(logUrgency);
3523
3524 serviceMain(argc, argv);
3525 }
3526 catch(PDNSException &ae) {
3527 L<<Logger::Error<<"Exception: "<<ae.reason<<endl;
3528 ret=EXIT_FAILURE;
3529 }
3530 catch(std::exception &e) {
3531 L<<Logger::Error<<"STL Exception: "<<e.what()<<endl;
3532 ret=EXIT_FAILURE;
3533 }
3534 catch(...) {
3535 L<<Logger::Error<<"any other exception in main: "<<endl;
3536 ret=EXIT_FAILURE;
3537 }
3538
3539 return ret;
3540 }