]> git.ipfire.org Git - thirdparty/pdns.git/blob - pdns/pdns_recursor.cc
Backport #6566
[thirdparty/pdns.git] / pdns / pdns_recursor.cc
1 /*
2 * This file is part of PowerDNS or dnsdist.
3 * Copyright -- PowerDNS.COM B.V. and its contributors
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of version 2 of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * In addition, for the avoidance of any doubt, permission is granted to
10 * link this program with OpenSSL and to (re)distribute the binaries
11 * produced as the result of such linking.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21 */
22 #ifdef HAVE_CONFIG_H
23 #include "config.h"
24 #endif
25
26 #include <netdb.h>
27 #include <sys/stat.h>
28 #include <unistd.h>
29
30 #include "ws-recursor.hh"
31 #include <pthread.h>
32 #include "recpacketcache.hh"
33 #include "utility.hh"
34 #include "dns_random.hh"
35 #ifdef HAVE_LIBSODIUM
36 #include <sodium.h>
37 #endif
38 #include "opensslsigners.hh"
39 #include <iostream>
40 #include <errno.h>
41 #include <boost/static_assert.hpp>
42 #include <map>
43 #include <set>
44 #include "recursor_cache.hh"
45 #include "cachecleaner.hh"
46 #include <stdio.h>
47 #include <signal.h>
48 #include <stdlib.h>
49 #include "misc.hh"
50 #include "mtasker.hh"
51 #include <utility>
52 #include "arguments.hh"
53 #include "syncres.hh"
54 #include <fcntl.h>
55 #include <fstream>
56 #include "sortlist.hh"
57 #include "sstuff.hh"
58 #include <boost/tuple/tuple.hpp>
59 #include <boost/tuple/tuple_comparison.hpp>
60 #include <boost/shared_array.hpp>
61 #include <boost/function.hpp>
62 #include <boost/algorithm/string.hpp>
63 #ifdef MALLOC_TRACE
64 #include "malloctrace.hh"
65 #endif
66 #include <netinet/tcp.h>
67 #include "dnsparser.hh"
68 #include "dnswriter.hh"
69 #include "dnsrecords.hh"
70 #include "zoneparser-tng.hh"
71 #include "rec_channel.hh"
72 #include "logger.hh"
73 #include "iputils.hh"
74 #include "mplexer.hh"
75 #include "config.h"
76 #include "lua-recursor4.hh"
77 #include "version.hh"
78 #include "responsestats.hh"
79 #include "secpoll-recursor.hh"
80 #include "dnsname.hh"
81 #include "filterpo.hh"
82 #include "rpzloader.hh"
83 #include "validate-recursor.hh"
84 #include "rec-lua-conf.hh"
85 #include "ednsoptions.hh"
86 #include "gettime.hh"
87
88 #include "rec-protobuf.hh"
89 #include "rec-snmp.hh"
90
91 #ifdef HAVE_SYSTEMD
92 #include <systemd/sd-daemon.h>
93 #endif
94
95 #include "namespaces.hh"
96
97 typedef map<ComboAddress, uint32_t, ComboAddress::addressOnlyLessThan> tcpClientCounts_t;
98
99 static thread_local std::shared_ptr<RecursorLua4> t_pdl;
100 static thread_local unsigned int t_id;
101 static thread_local std::shared_ptr<Regex> t_traceRegex;
102 static thread_local std::unique_ptr<tcpClientCounts_t> t_tcpClientCounts;
103
104 thread_local std::unique_ptr<MT_t> MT; // the big MTasker
105 thread_local std::unique_ptr<MemRecursorCache> t_RC;
106 thread_local std::unique_ptr<RecursorPacketCache> t_packetCache;
107 thread_local FDMultiplexer* t_fdm{nullptr};
108 thread_local std::unique_ptr<addrringbuf_t> t_remotes, t_servfailremotes, t_largeanswerremotes;
109 thread_local std::unique_ptr<boost::circular_buffer<pair<DNSName, uint16_t> > > t_queryring, t_servfailqueryring;
110 thread_local std::shared_ptr<NetmaskGroup> t_allowFrom;
111 #ifdef HAVE_PROTOBUF
112 thread_local std::unique_ptr<boost::uuids::random_generator> t_uuidGenerator;
113 #endif
114 __thread struct timeval g_now; // timestamp, updated (too) frequently
115
116 // for communicating with our threads
117 struct ThreadPipeSet
118 {
119 int writeToThread;
120 int readToThread;
121 int writeFromThread;
122 int readFromThread;
123 int writeQueriesToThread; // this one is non-blocking
124 int readQueriesToThread;
125 };
126
127 typedef vector<int> tcpListenSockets_t;
128 typedef map<int, ComboAddress> listenSocketsAddresses_t; // is shared across all threads right now
129 typedef vector<pair<int, function< void(int, any&) > > > deferredAdd_t;
130
131 static const ComboAddress g_local4("0.0.0.0"), g_local6("::");
132 static vector<ThreadPipeSet> g_pipes; // effectively readonly after startup
133 static tcpListenSockets_t g_tcpListenSockets; // shared across threads, but this is fine, never written to from a thread. All threads listen on all sockets
134 static listenSocketsAddresses_t g_listenSocketsAddresses; // is shared across all threads right now
135 static std::unordered_map<unsigned int, deferredAdd_t> deferredAdds;
136 static set<int> g_fromtosockets; // listen sockets that use 'sendfromto()' mechanism
137 static vector<ComboAddress> g_localQueryAddresses4, g_localQueryAddresses6;
138 static AtomicCounter counter;
139 static std::shared_ptr<SyncRes::domainmap_t> g_initialDomainMap; // new threads needs this to be setup
140 static std::shared_ptr<NetmaskGroup> g_initialAllowFrom; // new thread needs to be setup with this
141 static size_t g_tcpMaxQueriesPerConn;
142 static uint64_t g_latencyStatSize;
143 static uint32_t g_disthashseed;
144 static unsigned int g_maxTCPPerClient;
145 static unsigned int g_networkTimeoutMsec;
146 static unsigned int g_maxMThreads;
147 static unsigned int g_numWorkerThreads;
148 static int g_tcpTimeout;
149 static uint16_t g_udpTruncationThreshold;
150 static std::atomic<bool> statsWanted;
151 static std::atomic<bool> g_quiet;
152 static bool g_logCommonErrors;
153 static bool g_anyToTcp;
154 static bool g_weDistributeQueries; // if true, only 1 thread listens on the incoming query sockets
155 static bool g_reusePort{false};
156 static bool g_useOneSocketPerThread;
157 static bool g_gettagNeedsEDNSOptions{false};
158 static time_t g_statisticsInterval;
159 static bool g_useIncomingECS;
160 std::atomic<uint32_t> g_maxCacheEntries, g_maxPacketCacheEntries;
161
162 RecursorControlChannel s_rcc; // only active in thread 0
163 RecursorStats g_stats;
164 string s_programname="pdns_recursor";
165 string s_pidfname;
166 bool g_lowercaseOutgoing;
167 unsigned int g_numThreads;
168 uint16_t g_outgoingEDNSBufsize;
169 bool g_logRPZChanges{false};
170
171 #define LOCAL_NETS "127.0.0.0/8, 10.0.0.0/8, 100.64.0.0/10, 169.254.0.0/16, 192.168.0.0/16, 172.16.0.0/12, ::1/128, fc00::/7, fe80::/10"
172 // Bad Nets taken from both:
173 // http://www.iana.org/assignments/iana-ipv4-special-registry/iana-ipv4-special-registry.xhtml
174 // and
175 // http://www.iana.org/assignments/iana-ipv6-special-registry/iana-ipv6-special-registry.xhtml
176 // where such a network may not be considered a valid destination
177 #define BAD_NETS "0.0.0.0/8, 192.0.0.0/24, 192.0.2.0/24, 198.51.100.0/24, 203.0.113.0/24, 240.0.0.0/4, ::/96, ::ffff:0:0/96, 100::/64, 2001:db8::/32"
178 #define DONT_QUERY LOCAL_NETS ", " BAD_NETS
179
180 //! used to send information to a newborn mthread
181 struct DNSComboWriter {
182 DNSComboWriter(const char* data, uint16_t len, const struct timeval& now) : d_mdp(true, data, len), d_now(now),
183 d_tcp(false), d_socket(-1)
184 {}
185 MOADNSParser d_mdp;
186 void setRemote(const ComboAddress* sa)
187 {
188 d_remote=*sa;
189 }
190
191 void setLocal(const ComboAddress& sa)
192 {
193 d_local=sa;
194 }
195
196
197 void setSocket(int sock)
198 {
199 d_socket=sock;
200 }
201
202 string getRemote() const
203 {
204 return d_remote.toString();
205 }
206
207 struct timeval d_now;
208 ComboAddress d_remote, d_local;
209 #ifdef HAVE_PROTOBUF
210 boost::uuids::uuid d_uuid;
211 string d_requestorId;
212 string d_deviceId;
213 #endif
214 EDNSSubnetOpts d_ednssubnet;
215 bool d_ecsFound{false};
216 bool d_ecsParsed{false};
217 bool d_tcp;
218 int d_socket;
219 unsigned int d_tag{0};
220 uint32_t d_qhash{0};
221 string d_query;
222 shared_ptr<TCPConnection> d_tcpConnection;
223 vector<pair<uint16_t, string> > d_ednsOpts;
224 std::vector<std::string> d_policyTags;
225 LuaContext::LuaObject d_data;
226 uint32_t d_ttlCap{std::numeric_limits<uint32_t>::max()};
227 bool d_variable{false};
228 };
229
230 MT_t* getMT()
231 {
232 return MT ? MT.get() : nullptr;
233 }
234
235 ArgvMap &arg()
236 {
237 static ArgvMap theArg;
238 return theArg;
239 }
240
241 unsigned int getRecursorThreadId()
242 {
243 return t_id;
244 }
245
246 int getMTaskerTID()
247 {
248 return MT->getTid();
249 }
250
251 static void handleTCPClientWritable(int fd, FDMultiplexer::funcparam_t& var);
252
253 // -1 is error, 0 is timeout, 1 is success
254 int asendtcp(const string& data, Socket* sock)
255 {
256 PacketID pident;
257 pident.sock=sock;
258 pident.outMSG=data;
259
260 t_fdm->addWriteFD(sock->getHandle(), handleTCPClientWritable, pident);
261 string packet;
262
263 int ret=MT->waitEvent(pident, &packet, g_networkTimeoutMsec);
264
265 if(!ret || ret==-1) { // timeout
266 t_fdm->removeWriteFD(sock->getHandle());
267 }
268 else if(packet.size() !=data.size()) { // main loop tells us what it sent out, or empty in case of an error
269 return -1;
270 }
271 return ret;
272 }
273
274 static void handleTCPClientReadable(int fd, FDMultiplexer::funcparam_t& var);
275
276 // -1 is error, 0 is timeout, 1 is success
277 int arecvtcp(string& data, size_t len, Socket* sock, bool incompleteOkay)
278 {
279 data.clear();
280 PacketID pident;
281 pident.sock=sock;
282 pident.inNeeded=len;
283 pident.inIncompleteOkay=incompleteOkay;
284 t_fdm->addReadFD(sock->getHandle(), handleTCPClientReadable, pident);
285
286 int ret=MT->waitEvent(pident,&data, g_networkTimeoutMsec);
287 if(!ret || ret==-1) { // timeout
288 t_fdm->removeReadFD(sock->getHandle());
289 }
290 else if(data.empty()) {// error, EOF or other
291 return -1;
292 }
293
294 return ret;
295 }
296
297 static void handleGenUDPQueryResponse(int fd, FDMultiplexer::funcparam_t& var)
298 {
299 PacketID pident=*any_cast<PacketID>(&var);
300 char resp[512];
301 ssize_t ret=recv(fd, resp, sizeof(resp), 0);
302 t_fdm->removeReadFD(fd);
303 if(ret >= 0) {
304 string data(resp, (size_t) ret);
305 MT->sendEvent(pident, &data);
306 }
307 else {
308 string empty;
309 MT->sendEvent(pident, &empty);
310 // cerr<<"Had some kind of error: "<<ret<<", "<<strerror(errno)<<endl;
311 }
312 }
313 string GenUDPQueryResponse(const ComboAddress& dest, const string& query)
314 {
315 Socket s(dest.sin4.sin_family, SOCK_DGRAM);
316 s.setNonBlocking();
317 ComboAddress local = getQueryLocalAddress(dest.sin4.sin_family, 0);
318
319 s.bind(local);
320 s.connect(dest);
321 s.send(query);
322
323 PacketID pident;
324 pident.sock=&s;
325 pident.type=0;
326 t_fdm->addReadFD(s.getHandle(), handleGenUDPQueryResponse, pident);
327
328 string data;
329
330 int ret=MT->waitEvent(pident,&data, g_networkTimeoutMsec);
331
332 if(!ret || ret==-1) { // timeout
333 t_fdm->removeReadFD(s.getHandle());
334 }
335 else if(data.empty()) {// error, EOF or other
336 // we could special case this
337 return data;
338 }
339 return data;
340 }
341
342 //! pick a random query local address
343 ComboAddress getQueryLocalAddress(int family, uint16_t port)
344 {
345 ComboAddress ret;
346 if(family==AF_INET) {
347 if(g_localQueryAddresses4.empty())
348 ret = g_local4;
349 else
350 ret = g_localQueryAddresses4[dns_random(g_localQueryAddresses4.size())];
351 ret.sin4.sin_port = htons(port);
352 }
353 else {
354 if(g_localQueryAddresses6.empty())
355 ret = g_local6;
356 else
357 ret = g_localQueryAddresses6[dns_random(g_localQueryAddresses6.size())];
358
359 ret.sin6.sin6_port = htons(port);
360 }
361 return ret;
362 }
363
364 static void handleUDPServerResponse(int fd, FDMultiplexer::funcparam_t&);
365
366 static void setSocketBuffer(int fd, int optname, uint32_t size)
367 {
368 uint32_t psize=0;
369 socklen_t len=sizeof(psize);
370
371 if(!getsockopt(fd, SOL_SOCKET, optname, (char*)&psize, &len) && psize > size) {
372 L<<Logger::Error<<"Not decreasing socket buffer size from "<<psize<<" to "<<size<<endl;
373 return;
374 }
375
376 if (setsockopt(fd, SOL_SOCKET, optname, (char*)&size, sizeof(size)) < 0 )
377 L<<Logger::Error<<"Unable to raise socket buffer size to "<<size<<": "<<strerror(errno)<<endl;
378 }
379
380
381 static void setSocketReceiveBuffer(int fd, uint32_t size)
382 {
383 setSocketBuffer(fd, SO_RCVBUF, size);
384 }
385
386 static void setSocketSendBuffer(int fd, uint32_t size)
387 {
388 setSocketBuffer(fd, SO_SNDBUF, size);
389 }
390
391
392 // you can ask this class for a UDP socket to send a query from
393 // this socket is not yours, don't even think about deleting it
394 // but after you call 'returnSocket' on it, don't assume anything anymore
395 class UDPClientSocks
396 {
397 unsigned int d_numsocks;
398 public:
399 UDPClientSocks() : d_numsocks(0)
400 {
401 }
402
403 typedef set<int> socks_t;
404 socks_t d_socks;
405
406 // returning -2 means: temporary OS error (ie, out of files), -1 means error related to remote
407 int getSocket(const ComboAddress& toaddr, int* fd)
408 {
409 *fd=makeClientSocket(toaddr.sin4.sin_family);
410 if(*fd < 0) // temporary error - receive exception otherwise
411 return -2;
412
413 if(connect(*fd, (struct sockaddr*)(&toaddr), toaddr.getSocklen()) < 0) {
414 int err = errno;
415 // returnSocket(*fd);
416 try {
417 closesocket(*fd);
418 }
419 catch(const PDNSException& e) {
420 L<<Logger::Error<<"Error closing UDP socket after connect() failed: "<<e.reason<<endl;
421 }
422
423 if(err==ENETUNREACH) // Seth "My Interfaces Are Like A Yo Yo" Arnold special
424 return -2;
425 return -1;
426 }
427
428 d_socks.insert(*fd);
429 d_numsocks++;
430 return 0;
431 }
432
433 void returnSocket(int fd)
434 {
435 socks_t::iterator i=d_socks.find(fd);
436 if(i==d_socks.end()) {
437 throw PDNSException("Trying to return a socket (fd="+std::to_string(fd)+") not in the pool");
438 }
439 returnSocketLocked(i);
440 }
441
442 // return a socket to the pool, or simply erase it
443 void returnSocketLocked(socks_t::iterator& i)
444 {
445 if(i==d_socks.end()) {
446 throw PDNSException("Trying to return a socket not in the pool");
447 }
448 try {
449 t_fdm->removeReadFD(*i);
450 }
451 catch(FDMultiplexerException& e) {
452 // we sometimes return a socket that has not yet been assigned to t_fdm
453 }
454 try {
455 closesocket(*i);
456 }
457 catch(const PDNSException& e) {
458 L<<Logger::Error<<"Error closing returned UDP socket: "<<e.reason<<endl;
459 }
460
461 d_socks.erase(i++);
462 --d_numsocks;
463 }
464
465 // returns -1 for errors which might go away, throws for ones that won't
466 static int makeClientSocket(int family)
467 {
468 int ret=socket(family, SOCK_DGRAM, 0 ); // turns out that setting CLO_EXEC and NONBLOCK from here is not a performance win on Linux (oddly enough)
469
470 if(ret < 0 && errno==EMFILE) // this is not a catastrophic error
471 return ret;
472
473 if(ret<0)
474 throw PDNSException("Making a socket for resolver (family = "+std::to_string(family)+"): "+stringerror());
475
476 // setCloseOnExec(ret); // we're not going to exec
477
478 int tries=10;
479 ComboAddress sin;
480 while(--tries) {
481 uint16_t port;
482
483 if(tries==1) // fall back to kernel 'random'
484 port = 0;
485 else
486 port = 1025 + dns_random(64510);
487
488 sin=getQueryLocalAddress(family, port); // does htons for us
489
490 if (::bind(ret, (struct sockaddr *)&sin, sin.getSocklen()) >= 0)
491 break;
492 }
493 if(!tries)
494 throw PDNSException("Resolver binding to local query client socket on "+sin.toString()+": "+stringerror());
495
496 setNonBlocking(ret);
497 return ret;
498 }
499 };
500
501 static thread_local std::unique_ptr<UDPClientSocks> t_udpclientsocks;
502
503 /* these two functions are used by LWRes */
504 // -2 is OS error, -1 is error that depends on the remote, > 0 is success
505 int asendto(const char *data, size_t len, int flags,
506 const ComboAddress& toaddr, uint16_t id, const DNSName& domain, uint16_t qtype, int* fd)
507 {
508
509 PacketID pident;
510 pident.domain = domain;
511 pident.remote = toaddr;
512 pident.type = qtype;
513
514 // see if there is an existing outstanding request we can chain on to, using partial equivalence function
515 pair<MT_t::waiters_t::iterator, MT_t::waiters_t::iterator> chain=MT->d_waiters.equal_range(pident, PacketIDBirthdayCompare());
516
517 for(; chain.first != chain.second; chain.first++) {
518 if(chain.first->key.fd > -1) { // don't chain onto existing chained waiter!
519 /*
520 cerr<<"Orig: "<<pident.domain<<", "<<pident.remote.toString()<<", id="<<id<<endl;
521 cerr<<"Had hit: "<< chain.first->key.domain<<", "<<chain.first->key.remote.toString()<<", id="<<chain.first->key.id
522 <<", count="<<chain.first->key.chain.size()<<", origfd: "<<chain.first->key.fd<<endl;
523 */
524 chain.first->key.chain.insert(id); // we can chain
525 *fd=-1; // gets used in waitEvent / sendEvent later on
526 return 1;
527 }
528 }
529
530 int ret=t_udpclientsocks->getSocket(toaddr, fd);
531 if(ret < 0)
532 return ret;
533
534 pident.fd=*fd;
535 pident.id=id;
536
537 t_fdm->addReadFD(*fd, handleUDPServerResponse, pident);
538 ret = send(*fd, data, len, 0);
539
540 int tmp = errno;
541
542 if(ret < 0)
543 t_udpclientsocks->returnSocket(*fd);
544
545 errno = tmp; // this is for logging purposes only
546 return ret;
547 }
548
549 // -1 is error, 0 is timeout, 1 is success
550 int arecvfrom(char *data, size_t len, int flags, const ComboAddress& fromaddr, size_t *d_len,
551 uint16_t id, const DNSName& domain, uint16_t qtype, int fd, struct timeval* now)
552 {
553 static optional<unsigned int> nearMissLimit;
554 if(!nearMissLimit)
555 nearMissLimit=::arg().asNum("spoof-nearmiss-max");
556
557 PacketID pident;
558 pident.fd=fd;
559 pident.id=id;
560 pident.domain=domain;
561 pident.type = qtype;
562 pident.remote=fromaddr;
563
564 string packet;
565 int ret=MT->waitEvent(pident, &packet, g_networkTimeoutMsec, now);
566
567 if(ret > 0) {
568 if(packet.empty()) // means "error"
569 return -1;
570
571 *d_len=packet.size();
572 memcpy(data,packet.c_str(),min(len,*d_len));
573 if(*nearMissLimit && pident.nearMisses > *nearMissLimit) {
574 L<<Logger::Error<<"Too many ("<<pident.nearMisses<<" > "<<*nearMissLimit<<") bogus answers for '"<<domain<<"' from "<<fromaddr.toString()<<", assuming spoof attempt."<<endl;
575 g_stats.spoofCount++;
576 return -1;
577 }
578 }
579 else {
580 if(fd >= 0)
581 t_udpclientsocks->returnSocket(fd);
582 }
583 return ret;
584 }
585
586 static void writePid(void)
587 {
588 if(!::arg().mustDo("write-pid"))
589 return;
590 ofstream of(s_pidfname.c_str(), std::ios_base::app);
591 if(of)
592 of<< Utility::getpid() <<endl;
593 else
594 L<<Logger::Error<<"Writing pid for "<<Utility::getpid()<<" to "<<s_pidfname<<" failed: "<<strerror(errno)<<endl;
595 }
596
597 TCPConnection::TCPConnection(int fd, const ComboAddress& addr) : d_remote(addr), d_fd(fd)
598 {
599 ++s_currentConnections;
600 (*t_tcpClientCounts)[d_remote]++;
601 }
602
603 TCPConnection::~TCPConnection()
604 {
605 try {
606 if(closesocket(d_fd) < 0)
607 L<<Logger::Error<<"Error closing socket for TCPConnection"<<endl;
608 }
609 catch(const PDNSException& e) {
610 L<<Logger::Error<<"Error closing TCPConnection socket: "<<e.reason<<endl;
611 }
612
613 if(t_tcpClientCounts->count(d_remote) && !(*t_tcpClientCounts)[d_remote]--)
614 t_tcpClientCounts->erase(d_remote);
615 --s_currentConnections;
616 }
617
618 AtomicCounter TCPConnection::s_currentConnections;
619
620 static void handleRunningTCPQuestion(int fd, FDMultiplexer::funcparam_t& var);
621
622 // the idea is, only do things that depend on the *response* here. Incoming accounting is on incoming.
623 static void updateResponseStats(int res, const ComboAddress& remote, unsigned int packetsize, const DNSName* query, uint16_t qtype)
624 {
625 if(packetsize > 1000 && t_largeanswerremotes)
626 t_largeanswerremotes->push_back(remote);
627 switch(res) {
628 case RCode::ServFail:
629 if(t_servfailremotes) {
630 t_servfailremotes->push_back(remote);
631 if(query && t_servfailqueryring) // packet cache
632 t_servfailqueryring->push_back(make_pair(*query, qtype));
633 }
634 g_stats.servFails++;
635 break;
636 case RCode::NXDomain:
637 g_stats.nxDomains++;
638 break;
639 case RCode::NoError:
640 g_stats.noErrors++;
641 break;
642 }
643 }
644
645 static string makeLoginfo(DNSComboWriter* dc)
646 try
647 {
648 return "("+dc->d_mdp.d_qname.toLogString()+"/"+DNSRecordContent::NumberToType(dc->d_mdp.d_qtype)+" from "+(dc->d_remote.toString())+")";
649 }
650 catch(...)
651 {
652 return "Exception making error message for exception";
653 }
654
655 #ifdef HAVE_PROTOBUF
656 static void protobufLogQuery(const std::shared_ptr<RemoteLogger>& logger, uint8_t maskV4, uint8_t maskV6, const boost::uuids::uuid& uniqueId, const ComboAddress& remote, const ComboAddress& local, const Netmask& ednssubnet, bool tcp, uint16_t id, size_t len, const DNSName& qname, uint16_t qtype, uint16_t qclass, const std::vector<std::string>& policyTags, const std::string& requestorId, const std::string& deviceId)
657 {
658 Netmask requestorNM(remote, remote.sin4.sin_family == AF_INET ? maskV4 : maskV6);
659 const ComboAddress& requestor = requestorNM.getMaskedNetwork();
660 RecProtoBufMessage message(DNSProtoBufMessage::Query, uniqueId, &requestor, &local, qname, qtype, qclass, id, tcp, len);
661 message.setEDNSSubnet(ednssubnet, ednssubnet.isIpv4() ? maskV4 : maskV6);
662 message.setRequestorId(requestorId);
663 message.setDeviceId(deviceId);
664
665 if (!policyTags.empty()) {
666 message.setPolicyTags(policyTags);
667 }
668
669 // cerr <<message.toDebugString()<<endl;
670 std::string str;
671 message.serialize(str);
672 logger->queueData(str);
673 }
674
675 static void protobufLogResponse(const std::shared_ptr<RemoteLogger>& logger, const RecProtoBufMessage& message)
676 {
677 // cerr <<message.toDebugString()<<endl;
678 std::string str;
679 message.serialize(str);
680 logger->queueData(str);
681 }
682 #endif
683
684 /**
685 * Chases the CNAME provided by the PolicyCustom RPZ policy.
686 *
687 * @param spoofed: The DNSRecord that was created by the policy, should already be added to ret
688 * @param qtype: The QType of the original query
689 * @param sr: A SyncRes
690 * @param res: An integer that will contain the RCODE of the lookup we do
691 * @param ret: A vector of DNSRecords where the result of the CNAME chase should be appended to
692 */
693 static void handleRPZCustom(const DNSRecord& spoofed, const QType& qtype, SyncRes& sr, int& res, vector<DNSRecord>& ret)
694 {
695 if (spoofed.d_type == QType::CNAME) {
696 bool oldWantsRPZ = sr.getWantsRPZ();
697 sr.setWantsRPZ(false);
698 vector<DNSRecord> ans;
699 res = sr.beginResolve(DNSName(spoofed.d_content->getZoneRepresentation()), qtype, 1, ans);
700 for (const auto& rec : ans) {
701 if(rec.d_place == DNSResourceRecord::ANSWER) {
702 ret.push_back(rec);
703 }
704 }
705 // Reset the RPZ state of the SyncRes
706 sr.setWantsRPZ(oldWantsRPZ);
707 }
708 }
709
710 static bool addRecordToPacket(DNSPacketWriter& pw, const DNSRecord& rec, uint32_t& minTTL, uint32_t ttlCap, const uint16_t maxAnswerSize)
711 {
712 pw.startRecord(rec.d_name, rec.d_type, (rec.d_ttl > ttlCap ? ttlCap : rec.d_ttl), rec.d_class, rec.d_place);
713
714 if(rec.d_type != QType::OPT) // their TTL ain't real
715 minTTL = min(minTTL, rec.d_ttl);
716
717 rec.d_content->toPacket(pw);
718 if(pw.size() > static_cast<size_t>(maxAnswerSize)) {
719 pw.rollback();
720 if(rec.d_place != DNSResourceRecord::ADDITIONAL) {
721 pw.getHeader()->tc=1;
722 pw.truncate();
723 }
724 return false;
725 }
726
727 return true;
728 }
729
730 static void startDoResolve(void *p)
731 {
732 DNSComboWriter* dc=(DNSComboWriter *)p;
733 try {
734 if (t_queryring)
735 t_queryring->push_back(make_pair(dc->d_mdp.d_qname, dc->d_mdp.d_qtype));
736
737 uint16_t maxanswersize = dc->d_tcp ? 65535 : min(static_cast<uint16_t>(512), g_udpTruncationThreshold);
738 EDNSOpts edo;
739 bool haveEDNS=false;
740 if(getEDNSOpts(dc->d_mdp, &edo)) {
741 if(!dc->d_tcp) {
742 /* rfc6891 6.2.3:
743 "Values lower than 512 MUST be treated as equal to 512."
744 */
745 maxanswersize = min(static_cast<uint16_t>(edo.d_packetsize >= 512 ? edo.d_packetsize : 512), g_udpTruncationThreshold);
746 }
747 dc->d_ednsOpts = edo.d_options;
748 haveEDNS=true;
749
750 if (g_useIncomingECS && !dc->d_ecsParsed) {
751 for (const auto& o : edo.d_options) {
752 if (o.first == EDNSOptionCode::ECS) {
753 dc->d_ecsFound = getEDNSSubnetOptsFromString(o.second, &dc->d_ednssubnet);
754 break;
755 }
756 }
757 }
758 }
759 /* perhaps there was no EDNS or no ECS but by now we looked */
760 dc->d_ecsParsed = true;
761 vector<DNSRecord> ret;
762 vector<uint8_t> packet;
763
764 auto luaconfsLocal = g_luaconfs.getLocal();
765 // Used to tell syncres later on if we should apply NSDNAME and NSIP RPZ triggers for this query
766 bool wantsRPZ(true);
767 RecProtoBufMessage pbMessage(RecProtoBufMessage::Response);
768 #ifdef HAVE_PROTOBUF
769 if (luaconfsLocal->protobufServer) {
770 Netmask requestorNM(dc->d_remote, dc->d_remote.sin4.sin_family == AF_INET ? luaconfsLocal->protobufMaskV4 : luaconfsLocal->protobufMaskV6);
771 const ComboAddress& requestor = requestorNM.getMaskedNetwork();
772 pbMessage.update(dc->d_uuid, &requestor, &dc->d_local, dc->d_tcp, dc->d_mdp.d_header.id);
773 pbMessage.setEDNSSubnet(dc->d_ednssubnet.source, dc->d_ednssubnet.source.isIpv4() ? luaconfsLocal->protobufMaskV4 : luaconfsLocal->protobufMaskV6);
774 pbMessage.setQuestion(dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_mdp.d_qclass);
775 }
776 #endif /* HAVE_PROTOBUF */
777
778 DNSPacketWriter pw(packet, dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_mdp.d_qclass);
779
780 pw.getHeader()->aa=0;
781 pw.getHeader()->ra=1;
782 pw.getHeader()->qr=1;
783 pw.getHeader()->tc=0;
784 pw.getHeader()->id=dc->d_mdp.d_header.id;
785 pw.getHeader()->rd=dc->d_mdp.d_header.rd;
786 pw.getHeader()->cd=dc->d_mdp.d_header.cd;
787
788 /* This is the lowest TTL seen in the records of the response,
789 so we can't cache it for longer than this value.
790 If we have a TTL cap, this value can't be larger than the
791 cap no matter what. */
792 uint32_t minTTL = dc->d_ttlCap;
793
794 SyncRes sr(dc->d_now);
795
796 bool DNSSECOK=false;
797 if(t_pdl) {
798 sr.setLuaEngine(t_pdl);
799 }
800 sr.d_requestor=dc->d_remote; // ECS needs this too
801 if(g_dnssecmode != DNSSECMode::Off) {
802 sr.setDoDNSSEC(true);
803
804 // Does the requestor want DNSSEC records?
805 if(edo.d_Z & EDNSOpts::DNSSECOK) {
806 DNSSECOK=true;
807 g_stats.dnssecQueries++;
808 }
809 } else {
810 // Ignore the client-set CD flag
811 pw.getHeader()->cd=0;
812 }
813 sr.setDNSSECValidationRequested(g_dnssecmode == DNSSECMode::ValidateAll || g_dnssecmode==DNSSECMode::ValidateForLog || ((dc->d_mdp.d_header.ad || DNSSECOK) && g_dnssecmode==DNSSECMode::Process));
814
815 #ifdef HAVE_PROTOBUF
816 sr.setInitialRequestId(dc->d_uuid);
817 #endif
818
819 if (g_useIncomingECS) {
820 sr.setIncomingECSFound(dc->d_ecsFound);
821 if (dc->d_ecsFound) {
822 sr.setIncomingECS(dc->d_ednssubnet);
823 }
824 }
825
826 bool tracedQuery=false; // we could consider letting Lua know about this too
827 bool variableAnswer = dc->d_variable;
828 bool shouldNotValidate = false;
829
830 /* preresolve expects res (dq.rcode) to be set to RCode::NoError by default */
831 int res = RCode::NoError;
832 DNSFilterEngine::Policy appliedPolicy;
833 DNSRecord spoofed;
834 RecursorLua4::DNSQuestion dq(dc->d_remote, dc->d_local, dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_tcp, variableAnswer, wantsRPZ);
835 dq.ednsFlags = &edo.d_Z;
836 dq.ednsOptions = &dc->d_ednsOpts;
837 dq.tag = dc->d_tag;
838 dq.discardedPolicies = &sr.d_discardedPolicies;
839 dq.policyTags = &dc->d_policyTags;
840 dq.appliedPolicy = &appliedPolicy;
841 dq.currentRecords = &ret;
842 dq.dh = &dc->d_mdp.d_header;
843 dq.data = dc->d_data;
844 #ifdef HAVE_PROTOBUF
845 dq.requestorId = dc->d_requestorId;
846 dq.deviceId = dc->d_deviceId;
847 #endif
848
849 if(dc->d_mdp.d_qtype==QType::ANY && !dc->d_tcp && g_anyToTcp) {
850 pw.getHeader()->tc = 1;
851 res = 0;
852 variableAnswer = true;
853 goto sendit;
854 }
855
856 if(t_traceRegex && t_traceRegex->match(dc->d_mdp.d_qname.toString())) {
857 sr.setLogMode(SyncRes::Store);
858 tracedQuery=true;
859 }
860
861
862 if(!g_quiet || tracedQuery) {
863 L<<Logger::Warning<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] " << (dc->d_tcp ? "TCP " : "") << "question for '"<<dc->d_mdp.d_qname<<"|"
864 <<DNSRecordContent::NumberToType(dc->d_mdp.d_qtype)<<"' from "<<dc->getRemote();
865 if(!dc->d_ednssubnet.source.empty()) {
866 L<<" (ecs "<<dc->d_ednssubnet.source.toString()<<")";
867 }
868 L<<endl;
869 }
870
871 sr.setId(MT->getTid());
872 if(!dc->d_mdp.d_header.rd)
873 sr.setCacheOnly();
874
875 if (t_pdl) {
876 t_pdl->prerpz(dq, res);
877 }
878
879 // Check if the query has a policy attached to it
880 if (wantsRPZ) {
881 appliedPolicy = luaconfsLocal->dfe.getQueryPolicy(dc->d_mdp.d_qname, dc->d_remote, sr.d_discardedPolicies);
882 }
883
884 // if there is a RecursorLua active, and it 'took' the query in preResolve, we don't launch beginResolve
885 if(!t_pdl || !t_pdl->preresolve(dq, res)) {
886
887 sr.setWantsRPZ(wantsRPZ);
888 if(wantsRPZ) {
889 switch(appliedPolicy.d_kind) {
890 case DNSFilterEngine::PolicyKind::NoAction:
891 break;
892 case DNSFilterEngine::PolicyKind::Drop:
893 g_stats.policyDrops++;
894 g_stats.policyResults[appliedPolicy.d_kind]++;
895 delete dc;
896 dc=0;
897 return;
898 case DNSFilterEngine::PolicyKind::NXDOMAIN:
899 g_stats.policyResults[appliedPolicy.d_kind]++;
900 res=RCode::NXDomain;
901 goto haveAnswer;
902 case DNSFilterEngine::PolicyKind::NODATA:
903 g_stats.policyResults[appliedPolicy.d_kind]++;
904 res=RCode::NoError;
905 goto haveAnswer;
906 case DNSFilterEngine::PolicyKind::Custom:
907 g_stats.policyResults[appliedPolicy.d_kind]++;
908 res=RCode::NoError;
909 spoofed=appliedPolicy.getCustomRecord(dc->d_mdp.d_qname);
910 ret.push_back(spoofed);
911 handleRPZCustom(spoofed, QType(dc->d_mdp.d_qtype), sr, res, ret);
912 goto haveAnswer;
913 case DNSFilterEngine::PolicyKind::Truncate:
914 if(!dc->d_tcp) {
915 g_stats.policyResults[appliedPolicy.d_kind]++;
916 res=RCode::NoError;
917 pw.getHeader()->tc=1;
918 goto haveAnswer;
919 }
920 break;
921 }
922 }
923
924 // Query got not handled for QNAME Policy reasons, now actually go out to find an answer
925 try {
926 res = sr.beginResolve(dc->d_mdp.d_qname, QType(dc->d_mdp.d_qtype), dc->d_mdp.d_qclass, ret);
927 shouldNotValidate = sr.wasOutOfBand();
928 }
929 catch(ImmediateServFailException &e) {
930 if(g_logCommonErrors)
931 L<<Logger::Notice<<"Sending SERVFAIL to "<<dc->getRemote()<<" during resolve of '"<<dc->d_mdp.d_qname<<"' because: "<<e.reason<<endl;
932 res = RCode::ServFail;
933 }
934
935 dq.validationState = sr.getValidationState();
936
937 // During lookup, an NSDNAME or NSIP trigger was hit in RPZ
938 if (res == -2) { // XXX This block should be macro'd, it is repeated post-resolve.
939 appliedPolicy = sr.d_appliedPolicy;
940 g_stats.policyResults[appliedPolicy.d_kind]++;
941 switch(appliedPolicy.d_kind) {
942 case DNSFilterEngine::PolicyKind::NoAction: // This can never happen
943 throw PDNSException("NoAction policy returned while a NSDNAME or NSIP trigger was hit");
944 case DNSFilterEngine::PolicyKind::Drop:
945 g_stats.policyDrops++;
946 delete dc;
947 dc=0;
948 return;
949 case DNSFilterEngine::PolicyKind::NXDOMAIN:
950 ret.clear();
951 res=RCode::NXDomain;
952 goto haveAnswer;
953
954 case DNSFilterEngine::PolicyKind::NODATA:
955 ret.clear();
956 res=RCode::NoError;
957 goto haveAnswer;
958
959 case DNSFilterEngine::PolicyKind::Truncate:
960 if(!dc->d_tcp) {
961 ret.clear();
962 res=RCode::NoError;
963 pw.getHeader()->tc=1;
964 goto haveAnswer;
965 }
966 break;
967
968 case DNSFilterEngine::PolicyKind::Custom:
969 ret.clear();
970 res=RCode::NoError;
971 spoofed=appliedPolicy.getCustomRecord(dc->d_mdp.d_qname);
972 ret.push_back(spoofed);
973 handleRPZCustom(spoofed, QType(dc->d_mdp.d_qtype), sr, res, ret);
974 goto haveAnswer;
975 }
976 }
977
978 if (wantsRPZ) {
979 appliedPolicy = luaconfsLocal->dfe.getPostPolicy(ret, sr.d_discardedPolicies);
980 }
981
982 if(t_pdl) {
983 if(res == RCode::NoError) {
984 auto i=ret.cbegin();
985 for(; i!= ret.cend(); ++i)
986 if(i->d_type == dc->d_mdp.d_qtype && i->d_place == DNSResourceRecord::ANSWER)
987 break;
988 if(i == ret.cend() && t_pdl->nodata(dq, res))
989 shouldNotValidate = true;
990
991 }
992 else if(res == RCode::NXDomain && t_pdl->nxdomain(dq, res))
993 shouldNotValidate = true;
994
995 if(t_pdl->postresolve(dq, res))
996 shouldNotValidate = true;
997 }
998
999 if (wantsRPZ) { //XXX This block is repeated, see above
1000 g_stats.policyResults[appliedPolicy.d_kind]++;
1001 switch(appliedPolicy.d_kind) {
1002 case DNSFilterEngine::PolicyKind::NoAction:
1003 break;
1004 case DNSFilterEngine::PolicyKind::Drop:
1005 g_stats.policyDrops++;
1006 delete dc;
1007 dc=0;
1008 return;
1009 case DNSFilterEngine::PolicyKind::NXDOMAIN:
1010 ret.clear();
1011 res=RCode::NXDomain;
1012 goto haveAnswer;
1013
1014 case DNSFilterEngine::PolicyKind::NODATA:
1015 ret.clear();
1016 res=RCode::NoError;
1017 goto haveAnswer;
1018
1019 case DNSFilterEngine::PolicyKind::Truncate:
1020 if(!dc->d_tcp) {
1021 ret.clear();
1022 res=RCode::NoError;
1023 pw.getHeader()->tc=1;
1024 goto haveAnswer;
1025 }
1026 break;
1027
1028 case DNSFilterEngine::PolicyKind::Custom:
1029 ret.clear();
1030 res=RCode::NoError;
1031 spoofed=appliedPolicy.getCustomRecord(dc->d_mdp.d_qname);
1032 ret.push_back(spoofed);
1033 handleRPZCustom(spoofed, QType(dc->d_mdp.d_qtype), sr, res, ret);
1034 goto haveAnswer;
1035 }
1036 }
1037 }
1038 haveAnswer:;
1039 if(res == PolicyDecision::DROP) {
1040 g_stats.policyDrops++;
1041 delete dc;
1042 dc=0;
1043 return;
1044 }
1045 if(tracedQuery || res == -1 || res == RCode::ServFail || pw.getHeader()->rcode == RCode::ServFail)
1046 {
1047 string trace(sr.getTrace());
1048 if(!trace.empty()) {
1049 vector<string> lines;
1050 boost::split(lines, trace, boost::is_any_of("\n"));
1051 for(const string& line : lines) {
1052 if(!line.empty())
1053 L<<Logger::Warning<< line << endl;
1054 }
1055 }
1056 }
1057
1058 if(res == -1) {
1059 pw.getHeader()->rcode=RCode::ServFail;
1060 // no commit here, because no record
1061 g_stats.servFails++;
1062 }
1063 else {
1064 pw.getHeader()->rcode=res;
1065
1066 // Does the validation mode or query demand validation?
1067 if(!shouldNotValidate && sr.isDNSSECValidationRequested()) {
1068 try {
1069 if(sr.doLog()) {
1070 L<<Logger::Warning<<"Starting validation of answer to "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" for "<<dc->d_remote.toStringWithPort()<<endl;
1071 }
1072
1073 auto state = sr.getValidationState();
1074
1075 if(state == Secure) {
1076 if(sr.doLog()) {
1077 L<<Logger::Warning<<"Answer to "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" for "<<dc->d_remote.toStringWithPort()<<" validates correctly"<<endl;
1078 }
1079
1080 // Is the query source interested in the value of the ad-bit?
1081 if (dc->d_mdp.d_header.ad || DNSSECOK)
1082 pw.getHeader()->ad=1;
1083 }
1084 else if(state == Insecure) {
1085 if(sr.doLog()) {
1086 L<<Logger::Warning<<"Answer to "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" for "<<dc->d_remote.toStringWithPort()<<" validates as Insecure"<<endl;
1087 }
1088
1089 pw.getHeader()->ad=0;
1090 }
1091 else if(state == Bogus) {
1092 if(g_dnssecLogBogus || sr.doLog() || g_dnssecmode == DNSSECMode::ValidateForLog) {
1093 L<<Logger::Warning<<"Answer to "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" for "<<dc->d_remote.toStringWithPort()<<" validates as Bogus"<<endl;
1094 }
1095
1096 // Does the query or validation mode sending out a SERVFAIL on validation errors?
1097 if(!pw.getHeader()->cd && (g_dnssecmode == DNSSECMode::ValidateAll || dc->d_mdp.d_header.ad || DNSSECOK)) {
1098 if(sr.doLog()) {
1099 L<<Logger::Warning<<"Sending out SERVFAIL for "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" because recursor or query demands it for Bogus results"<<endl;
1100 }
1101
1102 pw.getHeader()->rcode=RCode::ServFail;
1103 goto sendit;
1104 } else {
1105 if(sr.doLog()) {
1106 L<<Logger::Warning<<"Not sending out SERVFAIL for "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" Bogus validation since neither config nor query demands this"<<endl;
1107 }
1108 }
1109 }
1110 }
1111 catch(ImmediateServFailException &e) {
1112 if(g_logCommonErrors)
1113 L<<Logger::Notice<<"Sending SERVFAIL to "<<dc->getRemote()<<" during validation of '"<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<"' because: "<<e.reason<<endl;
1114 pw.getHeader()->rcode=RCode::ServFail;
1115 goto sendit;
1116 }
1117 }
1118
1119 if(ret.size()) {
1120 orderAndShuffle(ret);
1121 if(auto sl = luaconfsLocal->sortlist.getOrderCmp(dc->d_remote)) {
1122 stable_sort(ret.begin(), ret.end(), *sl);
1123 variableAnswer=true;
1124 }
1125 }
1126
1127 bool needCommit = false;
1128 for(auto i=ret.cbegin(); i!=ret.cend(); ++i) {
1129 if( ! DNSSECOK &&
1130 ( i->d_type == QType::NSEC3 ||
1131 (
1132 ( i->d_type == QType::RRSIG || i->d_type==QType::NSEC ) &&
1133 (
1134 ( dc->d_mdp.d_qtype != i->d_type && dc->d_mdp.d_qtype != QType::ANY ) ||
1135 i->d_place != DNSResourceRecord::ANSWER
1136 )
1137 )
1138 )
1139 ) {
1140 continue;
1141 }
1142
1143 if (!addRecordToPacket(pw, *i, minTTL, dc->d_ttlCap, maxanswersize)) {
1144 needCommit = false;
1145 break;
1146 }
1147 needCommit = true;
1148
1149 #ifdef HAVE_PROTOBUF
1150 if(luaconfsLocal->protobufServer && (i->d_type == QType::A || i->d_type == QType::AAAA || i->d_type == QType::CNAME)) {
1151 pbMessage.addRR(*i);
1152 }
1153 #endif
1154 }
1155 if(needCommit)
1156 pw.commit();
1157 }
1158 sendit:;
1159
1160 if (haveEDNS) {
1161 /* we try to add the EDNS OPT RR even for truncated answers,
1162 as rfc6891 states:
1163 "The minimal response MUST be the DNS header, question section, and an
1164 OPT record. This MUST also occur when a truncated response (using
1165 the DNS header's TC bit) is returned."
1166 */
1167 if (addRecordToPacket(pw, makeOpt(edo.d_packetsize, 0, edo.d_Z), minTTL, dc->d_ttlCap, maxanswersize)) {
1168 pw.commit();
1169 }
1170 }
1171
1172 g_rs.submitResponse(dc->d_mdp.d_qtype, packet.size(), !dc->d_tcp);
1173 updateResponseStats(res, dc->d_remote, packet.size(), &dc->d_mdp.d_qname, dc->d_mdp.d_qtype);
1174 #ifdef HAVE_PROTOBUF
1175 if (luaconfsLocal->protobufServer && (!luaconfsLocal->protobufTaggedOnly || (appliedPolicy.d_name && !appliedPolicy.d_name->empty()) || !dc->d_policyTags.empty())) {
1176 pbMessage.setBytes(packet.size());
1177 pbMessage.setResponseCode(pw.getHeader()->rcode);
1178 if (appliedPolicy.d_name) {
1179 pbMessage.setAppliedPolicy(*appliedPolicy.d_name);
1180 pbMessage.setAppliedPolicyType(appliedPolicy.d_type);
1181 }
1182 pbMessage.setPolicyTags(dc->d_policyTags);
1183 pbMessage.setQueryTime(dc->d_now.tv_sec, dc->d_now.tv_usec);
1184 pbMessage.setRequestorId(dq.requestorId);
1185 pbMessage.setDeviceId(dq.deviceId);
1186 protobufLogResponse(luaconfsLocal->protobufServer, pbMessage);
1187 }
1188 #endif
1189 if(!dc->d_tcp) {
1190 struct msghdr msgh;
1191 struct iovec iov;
1192 char cbuf[256];
1193 fillMSGHdr(&msgh, &iov, cbuf, 0, (char*)&*packet.begin(), packet.size(), &dc->d_remote);
1194 msgh.msg_control=NULL;
1195
1196 if(g_fromtosockets.count(dc->d_socket)) {
1197 addCMsgSrcAddr(&msgh, cbuf, &dc->d_local, 0);
1198 }
1199 if(sendmsg(dc->d_socket, &msgh, 0) < 0 && g_logCommonErrors)
1200 L<<Logger::Warning<<"Sending UDP reply to client "<<dc->d_remote.toStringWithPort()<<" failed with: "<<strerror(errno)<<endl;
1201
1202 if(!SyncRes::s_nopacketcache && !variableAnswer && !sr.wasVariable() ) {
1203 t_packetCache->insertResponsePacket(dc->d_tag, dc->d_qhash, dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_mdp.d_qclass,
1204 string((const char*)&*packet.begin(), packet.size()),
1205 g_now.tv_sec,
1206 pw.getHeader()->rcode == RCode::ServFail ? SyncRes::s_packetcacheservfailttl :
1207 min(minTTL,SyncRes::s_packetcachettl),
1208 &pbMessage);
1209 }
1210 // else cerr<<"Not putting in packet cache: "<<sr.wasVariable()<<endl;
1211 }
1212 else {
1213 char buf[2];
1214 buf[0]=packet.size()/256;
1215 buf[1]=packet.size()%256;
1216
1217 Utility::iovec iov[2];
1218
1219 iov[0].iov_base=(void*)buf; iov[0].iov_len=2;
1220 iov[1].iov_base=(void*)&*packet.begin(); iov[1].iov_len = packet.size();
1221
1222 int wret=Utility::writev(dc->d_socket, iov, 2);
1223 bool hadError=true;
1224
1225 if(wret == 0)
1226 L<<Logger::Error<<"EOF writing TCP answer to "<<dc->getRemote()<<endl;
1227 else if(wret < 0 )
1228 L<<Logger::Error<<"Error writing TCP answer to "<<dc->getRemote()<<": "<< strerror(errno) <<endl;
1229 else if((unsigned int)wret != 2 + packet.size())
1230 L<<Logger::Error<<"Oops, partial answer sent to "<<dc->getRemote()<<" for "<<dc->d_mdp.d_qname<<" (size="<< (2 + packet.size()) <<", sent "<<wret<<")"<<endl;
1231 else
1232 hadError=false;
1233
1234 // update tcp connection status, either by closing or moving to 'BYTE0'
1235
1236 if(hadError) {
1237 // no need to remove us from FDM, we weren't there
1238 dc->d_socket = -1;
1239 }
1240 else {
1241 dc->d_tcpConnection->queriesCount++;
1242 if (g_tcpMaxQueriesPerConn && dc->d_tcpConnection->queriesCount >= g_tcpMaxQueriesPerConn) {
1243 dc->d_socket = -1;
1244 }
1245 else {
1246 dc->d_tcpConnection->state=TCPConnection::BYTE0;
1247 Utility::gettimeofday(&g_now, 0); // needs to be updated
1248 t_fdm->addReadFD(dc->d_socket, handleRunningTCPQuestion, dc->d_tcpConnection);
1249 t_fdm->setReadTTD(dc->d_socket, g_now, g_tcpTimeout);
1250 }
1251 }
1252 }
1253 float spent=makeFloat(sr.getNow()-dc->d_now);
1254 if(!g_quiet) {
1255 L<<Logger::Error<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] answer to "<<(dc->d_mdp.d_header.rd?"":"non-rd ")<<"question '"<<dc->d_mdp.d_qname<<"|"<<DNSRecordContent::NumberToType(dc->d_mdp.d_qtype);
1256 L<<"': "<<ntohs(pw.getHeader()->ancount)<<" answers, "<<ntohs(pw.getHeader()->arcount)<<" additional, took "<<sr.d_outqueries<<" packets, "<<
1257 sr.d_totUsec/1000.0<<" netw ms, "<< spent*1000.0<<" tot ms, "<<
1258 sr.d_throttledqueries<<" throttled, "<<sr.d_timeouts<<" timeouts, "<<sr.d_tcpoutqueries<<" tcp connections, rcode="<< res;
1259
1260 if(!shouldNotValidate && sr.isDNSSECValidationRequested()) {
1261 L<< ", dnssec="<<vStates[sr.getValidationState()];
1262 }
1263
1264 L<<endl;
1265
1266 }
1267
1268 sr.d_outqueries ? t_RC->cacheMisses++ : t_RC->cacheHits++;
1269
1270 if(spent < 0.001)
1271 g_stats.answers0_1++;
1272 else if(spent < 0.010)
1273 g_stats.answers1_10++;
1274 else if(spent < 0.1)
1275 g_stats.answers10_100++;
1276 else if(spent < 1.0)
1277 g_stats.answers100_1000++;
1278 else
1279 g_stats.answersSlow++;
1280
1281 uint64_t newLat=(uint64_t)(spent*1000000);
1282 newLat = min(newLat,(uint64_t)(((uint64_t) g_networkTimeoutMsec)*1000)); // outliers of several minutes exist..
1283 g_stats.avgLatencyUsec=(1-1.0/g_latencyStatSize)*g_stats.avgLatencyUsec + (float)newLat/g_latencyStatSize;
1284 // no worries, we do this for packet cache hits elsewhere
1285
1286 auto ourtime = 1000.0*spent-sr.d_totUsec/1000.0; // in msec
1287 if(ourtime < 1)
1288 g_stats.ourtime0_1++;
1289 else if(ourtime < 2)
1290 g_stats.ourtime1_2++;
1291 else if(ourtime < 4)
1292 g_stats.ourtime2_4++;
1293 else if(ourtime < 8)
1294 g_stats.ourtime4_8++;
1295 else if(ourtime < 16)
1296 g_stats.ourtime8_16++;
1297 else if(ourtime < 32)
1298 g_stats.ourtime16_32++;
1299 else {
1300 // cerr<<"SLOW: "<<ourtime<<"ms -> "<<dc->d_mdp.d_qname<<"|"<<DNSRecordContent::NumberToType(dc->d_mdp.d_qtype)<<endl;
1301 g_stats.ourtimeSlow++;
1302 }
1303 if(ourtime >= 0.0) {
1304 newLat=ourtime*1000; // usec
1305 g_stats.avgLatencyOursUsec=(1-1.0/g_latencyStatSize)*g_stats.avgLatencyOursUsec + (float)newLat/g_latencyStatSize;
1306 }
1307 // cout<<dc->d_mdp.d_qname<<"\t"<<MT->getUsec()<<"\t"<<sr.d_outqueries<<endl;
1308 delete dc;
1309 dc=0;
1310 }
1311 catch(PDNSException &ae) {
1312 L<<Logger::Error<<"startDoResolve problem "<<makeLoginfo(dc)<<": "<<ae.reason<<endl;
1313 delete dc;
1314 }
1315 catch(MOADNSException& e) {
1316 L<<Logger::Error<<"DNS parser error "<<makeLoginfo(dc) <<": "<<dc->d_mdp.d_qname<<", "<<e.what()<<endl;
1317 delete dc;
1318 }
1319 catch(std::exception& e) {
1320 L<<Logger::Error<<"STL error "<< makeLoginfo(dc)<<": "<<e.what();
1321
1322 // Luawrapper nests the exception from Lua, so we unnest it here
1323 try {
1324 std::rethrow_if_nested(e);
1325 } catch(const std::exception& ne) {
1326 L<<". Extra info: "<<ne.what();
1327 } catch(...) {}
1328
1329 L<<endl;
1330 delete dc;
1331 }
1332 catch(...) {
1333 L<<Logger::Error<<"Any other exception in a resolver context "<< makeLoginfo(dc) <<endl;
1334 }
1335
1336 g_stats.maxMThreadStackUsage = max(MT->getMaxStackUsage(), g_stats.maxMThreadStackUsage);
1337 }
1338
1339 static void makeControlChannelSocket(int processNum=-1)
1340 {
1341 string sockname=::arg()["socket-dir"]+"/"+s_programname;
1342 if(processNum >= 0)
1343 sockname += "."+std::to_string(processNum);
1344 sockname+=".controlsocket";
1345 s_rcc.listen(sockname);
1346
1347 int sockowner = -1;
1348 int sockgroup = -1;
1349
1350 if (!::arg().isEmpty("socket-group"))
1351 sockgroup=::arg().asGid("socket-group");
1352 if (!::arg().isEmpty("socket-owner"))
1353 sockowner=::arg().asUid("socket-owner");
1354
1355 if (sockgroup > -1 || sockowner > -1) {
1356 if(chown(sockname.c_str(), sockowner, sockgroup) < 0) {
1357 unixDie("Failed to chown control socket");
1358 }
1359 }
1360
1361 // do mode change if socket-mode is given
1362 if(!::arg().isEmpty("socket-mode")) {
1363 mode_t sockmode=::arg().asMode("socket-mode");
1364 if(chmod(sockname.c_str(), sockmode) < 0) {
1365 unixDie("Failed to chmod control socket");
1366 }
1367 }
1368 }
1369
1370 static bool getQNameAndSubnet(const std::string& question, DNSName* dnsname, uint16_t* qtype, uint16_t* qclass, EDNSSubnetOpts* ednssubnet, std::map<uint16_t, EDNSOptionView>* options)
1371 {
1372 bool found = false;
1373 const struct dnsheader* dh = (struct dnsheader*)question.c_str();
1374 size_t questionLen = question.length();
1375 unsigned int consumed=0;
1376 *dnsname=DNSName(question.c_str(), questionLen, sizeof(dnsheader), false, qtype, qclass, &consumed);
1377
1378 size_t pos= sizeof(dnsheader)+consumed+4;
1379 /* at least OPT root label (1), type (2), class (2) and ttl (4) + OPT RR rdlen (2)
1380 = 11 */
1381 if(ntohs(dh->arcount) == 1 && questionLen > pos + 11) { // this code can extract one (1) EDNS Subnet option
1382 /* OPT root label (1) followed by type (2) */
1383 if(question.at(pos)==0 && question.at(pos+1)==0 && question.at(pos+2)==QType::OPT) {
1384 if (!options) {
1385 char* ecsStart = nullptr;
1386 size_t ecsLen = 0;
1387 int res = getEDNSOption((char*)question.c_str()+pos+9, questionLen - pos - 9, EDNSOptionCode::ECS, &ecsStart, &ecsLen);
1388 if (res == 0 && ecsLen > 4) {
1389 EDNSSubnetOpts eso;
1390 if(getEDNSSubnetOptsFromString(ecsStart + 4, ecsLen - 4, &eso)) {
1391 *ednssubnet=eso;
1392 found = true;
1393 }
1394 }
1395 }
1396 else {
1397 int res = getEDNSOptions((char*)question.c_str()+pos+9, questionLen - pos - 9, *options);
1398 if (res == 0) {
1399 const auto& it = options->find(EDNSOptionCode::ECS);
1400 if (it != options->end() && it->second.content != nullptr && it->second.size > 0) {
1401 EDNSSubnetOpts eso;
1402 if(getEDNSSubnetOptsFromString(it->second.content, it->second.size, &eso)) {
1403 *ednssubnet=eso;
1404 found = true;
1405 }
1406 }
1407 }
1408 }
1409 }
1410 }
1411 return found;
1412 }
1413
1414 static void handleRunningTCPQuestion(int fd, FDMultiplexer::funcparam_t& var)
1415 {
1416 shared_ptr<TCPConnection> conn=any_cast<shared_ptr<TCPConnection> >(var);
1417
1418 if(conn->state==TCPConnection::BYTE0) {
1419 ssize_t bytes=recv(conn->getFD(), conn->data, 2, 0);
1420 if(bytes==1)
1421 conn->state=TCPConnection::BYTE1;
1422 if(bytes==2) {
1423 conn->qlen=(((unsigned char)conn->data[0]) << 8)+ (unsigned char)conn->data[1];
1424 conn->bytesread=0;
1425 conn->state=TCPConnection::GETQUESTION;
1426 }
1427 if(!bytes || bytes < 0) {
1428 t_fdm->removeReadFD(fd);
1429 return;
1430 }
1431 }
1432 else if(conn->state==TCPConnection::BYTE1) {
1433 ssize_t bytes=recv(conn->getFD(), conn->data+1, 1, 0);
1434 if(bytes==1) {
1435 conn->state=TCPConnection::GETQUESTION;
1436 conn->qlen=(((unsigned char)conn->data[0]) << 8)+ (unsigned char)conn->data[1];
1437 conn->bytesread=0;
1438 }
1439 if(!bytes || bytes < 0) {
1440 if(g_logCommonErrors)
1441 L<<Logger::Error<<"TCP client "<< conn->d_remote.toString() <<" disconnected after first byte"<<endl;
1442 t_fdm->removeReadFD(fd);
1443 return;
1444 }
1445 }
1446 else if(conn->state==TCPConnection::GETQUESTION) {
1447 ssize_t bytes=recv(conn->getFD(), conn->data + conn->bytesread, conn->qlen - conn->bytesread, 0);
1448 if(!bytes || bytes < 0 || bytes > std::numeric_limits<std::uint16_t>::max()) {
1449 L<<Logger::Error<<"TCP client "<< conn->d_remote.toString() <<" disconnected while reading question body"<<endl;
1450 t_fdm->removeReadFD(fd);
1451 return;
1452 }
1453 conn->bytesread+=(uint16_t)bytes;
1454 if(conn->bytesread==conn->qlen) {
1455 t_fdm->removeReadFD(fd); // should no longer awake ourselves when there is data to read
1456
1457 DNSComboWriter* dc=nullptr;
1458 try {
1459 dc=new DNSComboWriter(conn->data, conn->qlen, g_now);
1460 }
1461 catch(MOADNSException &mde) {
1462 g_stats.clientParseError++;
1463 if(g_logCommonErrors)
1464 L<<Logger::Error<<"Unable to parse packet from TCP client "<< conn->d_remote.toString() <<endl;
1465 return;
1466 }
1467 dc->d_tcpConnection = conn; // carry the torch
1468 dc->setSocket(conn->getFD()); // this is the only time a copy is made of the actual fd
1469 dc->d_tcp=true;
1470 dc->setRemote(&conn->d_remote);
1471 ComboAddress dest;
1472 memset(&dest, 0, sizeof(dest));
1473 dest.sin4.sin_family = conn->d_remote.sin4.sin_family;
1474 socklen_t len = dest.getSocklen();
1475 getsockname(conn->getFD(), (sockaddr*)&dest, &len); // if this fails, we're ok with it
1476 dc->setLocal(dest);
1477 DNSName qname;
1478 uint16_t qtype=0;
1479 uint16_t qclass=0;
1480 bool needECS = false;
1481 string requestorId;
1482 string deviceId;
1483 #ifdef HAVE_PROTOBUF
1484 auto luaconfsLocal = g_luaconfs.getLocal();
1485 if (luaconfsLocal->protobufServer) {
1486 needECS = true;
1487 }
1488 #endif
1489
1490 if(needECS || (t_pdl && (t_pdl->d_gettag_ffi || t_pdl->d_gettag))) {
1491
1492 try {
1493 std::map<uint16_t, EDNSOptionView> ednsOptions;
1494 dc->d_ecsParsed = true;
1495 dc->d_ecsFound = getQNameAndSubnet(std::string(conn->data, conn->qlen), &qname, &qtype, &qclass, &dc->d_ednssubnet, g_gettagNeedsEDNSOptions ? &ednsOptions : nullptr);
1496
1497 if(t_pdl) {
1498 try {
1499 if (t_pdl->d_gettag_ffi) {
1500 dc->d_tag = t_pdl->gettag_ffi(conn->d_remote, dc->d_ednssubnet.source, dest, qname, qtype, &dc->d_policyTags, dc->d_data, ednsOptions, true, requestorId, deviceId, dc->d_ttlCap, dc->d_variable);
1501 }
1502 else if (t_pdl->d_gettag) {
1503 dc->d_tag = t_pdl->gettag(conn->d_remote, dc->d_ednssubnet.source, dest, qname, qtype, &dc->d_policyTags, dc->d_data, ednsOptions, true, requestorId, deviceId);
1504 }
1505 }
1506 catch(const std::exception& e) {
1507 if(g_logCommonErrors)
1508 L<<Logger::Warning<<"Error parsing a query packet qname='"<<qname<<"' for tag determination, setting tag=0: "<<e.what()<<endl;
1509 }
1510 }
1511 }
1512 catch(const std::exception& e)
1513 {
1514 if(g_logCommonErrors)
1515 L<<Logger::Warning<<"Error parsing a query packet for tag determination, setting tag=0: "<<e.what()<<endl;
1516 }
1517 }
1518 #ifdef HAVE_PROTOBUF
1519 if(luaconfsLocal->protobufServer || luaconfsLocal->outgoingProtobufServer) {
1520 dc->d_requestorId = requestorId;
1521 dc->d_deviceId = deviceId;
1522 dc->d_uuid = (*t_uuidGenerator)();
1523 }
1524
1525 if(luaconfsLocal->protobufServer) {
1526 try {
1527 const struct dnsheader* dh = (const struct dnsheader*) conn->data;
1528
1529 if (!luaconfsLocal->protobufTaggedOnly) {
1530 protobufLogQuery(luaconfsLocal->protobufServer, luaconfsLocal->protobufMaskV4, luaconfsLocal->protobufMaskV6, dc->d_uuid, conn->d_remote, dest, dc->d_ednssubnet.source, true, dh->id, conn->qlen, qname, qtype, qclass, dc->d_policyTags, dc->d_requestorId, dc->d_deviceId);
1531 }
1532 }
1533 catch(std::exception& e) {
1534 if(g_logCommonErrors)
1535 L<<Logger::Warning<<"Error parsing a TCP query packet for edns subnet: "<<e.what()<<endl;
1536 }
1537 }
1538 #endif
1539 if(dc->d_mdp.d_header.qr) {
1540 delete dc;
1541 g_stats.ignoredCount++;
1542 L<<Logger::Error<<"Ignoring answer from TCP client "<< conn->d_remote.toString() <<" on server socket!"<<endl;
1543 return;
1544 }
1545 if(dc->d_mdp.d_header.opcode) {
1546 delete dc;
1547 g_stats.ignoredCount++;
1548 L<<Logger::Error<<"Ignoring non-query opcode from TCP client "<< conn->d_remote.toString() <<" on server socket!"<<endl;
1549 return;
1550 }
1551 else {
1552 ++g_stats.qcounter;
1553 ++g_stats.tcpqcounter;
1554 MT->makeThread(startDoResolve, dc); // deletes dc, will set state to BYTE0 again
1555 return;
1556 }
1557 }
1558 }
1559 }
1560
1561 //! Handle new incoming TCP connection
1562 static void handleNewTCPQuestion(int fd, FDMultiplexer::funcparam_t& )
1563 {
1564 ComboAddress addr;
1565 socklen_t addrlen=sizeof(addr);
1566 int newsock=accept(fd, (struct sockaddr*)&addr, &addrlen);
1567 if(newsock>=0) {
1568 if(MT->numProcesses() > g_maxMThreads) {
1569 g_stats.overCapacityDrops++;
1570 try {
1571 closesocket(newsock);
1572 }
1573 catch(const PDNSException& e) {
1574 L<<Logger::Error<<"Error closing TCP socket after an over capacity drop: "<<e.reason<<endl;
1575 }
1576 return;
1577 }
1578
1579 if(t_remotes)
1580 t_remotes->push_back(addr);
1581 if(t_allowFrom && !t_allowFrom->match(&addr)) {
1582 if(!g_quiet)
1583 L<<Logger::Error<<"["<<MT->getTid()<<"] dropping TCP query from "<<addr.toString()<<", address not matched by allow-from"<<endl;
1584
1585 g_stats.unauthorizedTCP++;
1586 try {
1587 closesocket(newsock);
1588 }
1589 catch(const PDNSException& e) {
1590 L<<Logger::Error<<"Error closing TCP socket after an ACL drop: "<<e.reason<<endl;
1591 }
1592 return;
1593 }
1594 if(g_maxTCPPerClient && t_tcpClientCounts->count(addr) && (*t_tcpClientCounts)[addr] >= g_maxTCPPerClient) {
1595 g_stats.tcpClientOverflow++;
1596 try {
1597 closesocket(newsock); // don't call TCPConnection::closeAndCleanup here - did not enter it in the counts yet!
1598 }
1599 catch(const PDNSException& e) {
1600 L<<Logger::Error<<"Error closing TCP socket after an overflow drop: "<<e.reason<<endl;
1601 }
1602 return;
1603 }
1604
1605 setNonBlocking(newsock);
1606 std::shared_ptr<TCPConnection> tc = std::make_shared<TCPConnection>(newsock, addr);
1607 tc->state=TCPConnection::BYTE0;
1608
1609 t_fdm->addReadFD(tc->getFD(), handleRunningTCPQuestion, tc);
1610
1611 struct timeval now;
1612 Utility::gettimeofday(&now, 0);
1613 t_fdm->setReadTTD(tc->getFD(), now, g_tcpTimeout);
1614 }
1615 }
1616
1617 static string* doProcessUDPQuestion(const std::string& question, const ComboAddress& fromaddr, const ComboAddress& destaddr, struct timeval tv, int fd)
1618 {
1619 gettimeofday(&g_now, 0);
1620 struct timeval diff = g_now - tv;
1621 double delta=(diff.tv_sec*1000 + diff.tv_usec/1000.0);
1622
1623 if(tv.tv_sec && delta > 1000.0) {
1624 g_stats.tooOldDrops++;
1625 return 0;
1626 }
1627
1628 ++g_stats.qcounter;
1629 if(fromaddr.sin4.sin_family==AF_INET6)
1630 g_stats.ipv6qcounter++;
1631
1632 string response;
1633 const struct dnsheader* dh = (struct dnsheader*)question.c_str();
1634 unsigned int ctag=0;
1635 uint32_t qhash = 0;
1636 bool needECS = false;
1637 std::vector<std::string> policyTags;
1638 LuaContext::LuaObject data;
1639 string requestorId;
1640 string deviceId;
1641 #ifdef HAVE_PROTOBUF
1642 boost::uuids::uuid uniqueId;
1643 auto luaconfsLocal = g_luaconfs.getLocal();
1644 if (luaconfsLocal->protobufServer) {
1645 uniqueId = (*t_uuidGenerator)();
1646 needECS = true;
1647 } else if (luaconfsLocal->outgoingProtobufServer) {
1648 uniqueId = (*t_uuidGenerator)();
1649 }
1650 #endif
1651 EDNSSubnetOpts ednssubnet;
1652 bool ecsFound = false;
1653 bool ecsParsed = false;
1654 uint32_t ttlCap = std::numeric_limits<uint32_t>::max();
1655 bool variable = false;
1656 try {
1657 DNSName qname;
1658 uint16_t qtype=0;
1659 uint16_t qclass=0;
1660 uint32_t age;
1661 bool qnameParsed=false;
1662 #ifdef MALLOC_TRACE
1663 /*
1664 static uint64_t last=0;
1665 if(!last)
1666 g_mtracer->clearAllocators();
1667 cout<<g_mtracer->getAllocs()-last<<" "<<g_mtracer->getNumOut()<<" -- BEGIN TRACE"<<endl;
1668 last=g_mtracer->getAllocs();
1669 cout<<g_mtracer->topAllocatorsString()<<endl;
1670 g_mtracer->clearAllocators();
1671 */
1672 #endif
1673
1674 if(needECS || (t_pdl && (t_pdl->d_gettag || t_pdl->d_gettag_ffi))) {
1675 try {
1676 std::map<uint16_t, EDNSOptionView> ednsOptions;
1677 ecsFound = getQNameAndSubnet(question, &qname, &qtype, &qclass, &ednssubnet, g_gettagNeedsEDNSOptions ? &ednsOptions : nullptr);
1678 qnameParsed = true;
1679 ecsParsed = true;
1680
1681 if(t_pdl) {
1682 try {
1683 if (t_pdl->d_gettag_ffi) {
1684 ctag = t_pdl->gettag_ffi(fromaddr, ednssubnet.source, destaddr, qname, qtype, &policyTags, data, ednsOptions, false, requestorId, deviceId, ttlCap, variable);
1685 }
1686 else if (t_pdl->d_gettag) {
1687 ctag=t_pdl->gettag(fromaddr, ednssubnet.source, destaddr, qname, qtype, &policyTags, data, ednsOptions, false, requestorId, deviceId);
1688 }
1689 }
1690 catch(const std::exception& e) {
1691 if(g_logCommonErrors)
1692 L<<Logger::Warning<<"Error parsing a query packet qname='"<<qname<<"' for tag determination, setting tag=0: "<<e.what()<<endl;
1693 }
1694 }
1695 }
1696 catch(const std::exception& e)
1697 {
1698 if(g_logCommonErrors)
1699 L<<Logger::Warning<<"Error parsing a query packet for tag determination, setting tag=0: "<<e.what()<<endl;
1700 }
1701 }
1702
1703 bool cacheHit = false;
1704 RecProtoBufMessage pbMessage(DNSProtoBufMessage::DNSProtoBufMessageType::Response);
1705 #ifdef HAVE_PROTOBUF
1706 if(luaconfsLocal->protobufServer) {
1707 if (!luaconfsLocal->protobufTaggedOnly || !policyTags.empty()) {
1708 protobufLogQuery(luaconfsLocal->protobufServer, luaconfsLocal->protobufMaskV4, luaconfsLocal->protobufMaskV6, uniqueId, fromaddr, destaddr, ednssubnet.source, false, dh->id, question.size(), qname, qtype, qclass, policyTags, requestorId, deviceId);
1709 }
1710 }
1711 #endif /* HAVE_PROTOBUF */
1712
1713 /* It might seem like a good idea to skip the packet cache lookup if we know that the answer is not cacheable,
1714 but it means that the hash would not be computed. If some script decides at a later time to mark back the answer
1715 as cacheable we would cache it with a wrong tag, so better safe than sorry. */
1716 if (qnameParsed) {
1717 cacheHit = (!SyncRes::s_nopacketcache && t_packetCache->getResponsePacket(ctag, question, qname, qtype, qclass, g_now.tv_sec, &response, &age, &qhash, &pbMessage));
1718 }
1719 else {
1720 cacheHit = (!SyncRes::s_nopacketcache && t_packetCache->getResponsePacket(ctag, question, g_now.tv_sec, &response, &age, &qhash, &pbMessage));
1721 }
1722
1723 if (cacheHit) {
1724 #ifdef HAVE_PROTOBUF
1725 if(luaconfsLocal->protobufServer && (!luaconfsLocal->protobufTaggedOnly || !pbMessage.getAppliedPolicy().empty() || !pbMessage.getPolicyTags().empty())) {
1726 Netmask requestorNM(fromaddr, fromaddr.sin4.sin_family == AF_INET ? luaconfsLocal->protobufMaskV4 : luaconfsLocal->protobufMaskV6);
1727 const ComboAddress& requestor = requestorNM.getMaskedNetwork();
1728 pbMessage.update(uniqueId, &requestor, &destaddr, false, dh->id);
1729 pbMessage.setEDNSSubnet(ednssubnet.source, ednssubnet.source.isIpv4() ? luaconfsLocal->protobufMaskV4 : luaconfsLocal->protobufMaskV6);
1730 pbMessage.setQueryTime(g_now.tv_sec, g_now.tv_usec);
1731 pbMessage.setRequestorId(requestorId);
1732 pbMessage.setDeviceId(deviceId);
1733 protobufLogResponse(luaconfsLocal->protobufServer, pbMessage);
1734 }
1735 #endif /* HAVE_PROTOBUF */
1736 if(!g_quiet)
1737 L<<Logger::Notice<<t_id<< " question answered from packet cache tag="<<ctag<<" from "<<fromaddr.toString()<<endl;
1738
1739 g_stats.packetCacheHits++;
1740 SyncRes::s_queries++;
1741 ageDNSPacket(response, age);
1742 struct msghdr msgh;
1743 struct iovec iov;
1744 char cbuf[256];
1745 fillMSGHdr(&msgh, &iov, cbuf, 0, (char*)response.c_str(), response.length(), const_cast<ComboAddress*>(&fromaddr));
1746 msgh.msg_control=NULL;
1747
1748 if(g_fromtosockets.count(fd)) {
1749 addCMsgSrcAddr(&msgh, cbuf, &destaddr, 0);
1750 }
1751 if(sendmsg(fd, &msgh, 0) < 0 && g_logCommonErrors)
1752 L<<Logger::Warning<<"Sending UDP reply to client "<<fromaddr.toStringWithPort()<<" failed with: "<<strerror(errno)<<endl;
1753
1754 if(response.length() >= sizeof(struct dnsheader)) {
1755 struct dnsheader tmpdh;
1756 memcpy(&tmpdh, response.c_str(), sizeof(tmpdh));
1757 updateResponseStats(tmpdh.rcode, fromaddr, response.length(), 0, 0);
1758 }
1759 g_stats.avgLatencyUsec=(1-1.0/g_latencyStatSize)*g_stats.avgLatencyUsec + 0.0; // we assume 0 usec
1760 g_stats.avgLatencyOursUsec=(1-1.0/g_latencyStatSize)*g_stats.avgLatencyOursUsec + 0.0; // we assume 0 usec
1761 return 0;
1762 }
1763 }
1764 catch(std::exception& e) {
1765 L<<Logger::Error<<"Error processing or aging answer packet: "<<e.what()<<endl;
1766 return 0;
1767 }
1768
1769 if(t_pdl) {
1770 if(t_pdl->ipfilter(fromaddr, destaddr, *dh)) {
1771 if(!g_quiet)
1772 L<<Logger::Notice<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] DROPPED question from "<<fromaddr.toStringWithPort()<<" based on policy"<<endl;
1773 g_stats.policyDrops++;
1774 return 0;
1775 }
1776 }
1777
1778 if(MT->numProcesses() > g_maxMThreads) {
1779 if(!g_quiet)
1780 L<<Logger::Notice<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] DROPPED question from "<<fromaddr.toStringWithPort()<<", over capacity"<<endl;
1781
1782 g_stats.overCapacityDrops++;
1783 return 0;
1784 }
1785
1786 DNSComboWriter* dc = new DNSComboWriter(question.c_str(), question.size(), g_now);
1787 dc->setSocket(fd);
1788 dc->d_tag=ctag;
1789 dc->d_qhash=qhash;
1790 dc->d_query = question;
1791 dc->setRemote(&fromaddr);
1792 dc->setLocal(destaddr);
1793 dc->d_tcp=false;
1794 dc->d_policyTags = policyTags;
1795 dc->d_data = data;
1796 dc->d_ecsFound = ecsFound;
1797 dc->d_ecsParsed = ecsParsed;
1798 dc->d_ednssubnet = ednssubnet;
1799 dc->d_ttlCap = ttlCap;
1800 dc->d_variable = variable;
1801 #ifdef HAVE_PROTOBUF
1802 if (luaconfsLocal->protobufServer || luaconfsLocal->outgoingProtobufServer) {
1803 dc->d_uuid = uniqueId;
1804 }
1805 dc->d_requestorId = requestorId;
1806 dc->d_deviceId = deviceId;
1807 #endif
1808
1809 MT->makeThread(startDoResolve, (void*) dc); // deletes dc
1810 return 0;
1811 }
1812
1813
1814 static void handleNewUDPQuestion(int fd, FDMultiplexer::funcparam_t& var)
1815 {
1816 ssize_t len;
1817 char data[1500];
1818 ComboAddress fromaddr;
1819 struct msghdr msgh;
1820 struct iovec iov;
1821 char cbuf[256];
1822 bool firstQuery = true;
1823
1824 fromaddr.sin6.sin6_family=AF_INET6; // this makes sure fromaddr is big enough
1825 fillMSGHdr(&msgh, &iov, cbuf, sizeof(cbuf), data, sizeof(data), &fromaddr);
1826
1827 for(;;)
1828 if((len=recvmsg(fd, &msgh, 0)) >= 0) {
1829
1830 firstQuery = false;
1831
1832 if(t_remotes)
1833 t_remotes->push_back(fromaddr);
1834
1835 if(t_allowFrom && !t_allowFrom->match(&fromaddr)) {
1836 if(!g_quiet)
1837 L<<Logger::Error<<"["<<MT->getTid()<<"] dropping UDP query from "<<fromaddr.toString()<<", address not matched by allow-from"<<endl;
1838
1839 g_stats.unauthorizedUDP++;
1840 return;
1841 }
1842 BOOST_STATIC_ASSERT(offsetof(sockaddr_in, sin_port) == offsetof(sockaddr_in6, sin6_port));
1843 if(!fromaddr.sin4.sin_port) { // also works for IPv6
1844 if(!g_quiet)
1845 L<<Logger::Error<<"["<<MT->getTid()<<"] dropping UDP query from "<<fromaddr.toStringWithPort()<<", can't deal with port 0"<<endl;
1846
1847 g_stats.clientParseError++; // not quite the best place to put it, but needs to go somewhere
1848 return;
1849 }
1850 try {
1851 dnsheader* dh=(dnsheader*)data;
1852
1853 if(dh->qr) {
1854 g_stats.ignoredCount++;
1855 if(g_logCommonErrors)
1856 L<<Logger::Error<<"Ignoring answer from "<<fromaddr.toString()<<" on server socket!"<<endl;
1857 }
1858 else if(dh->opcode) {
1859 g_stats.ignoredCount++;
1860 if(g_logCommonErrors)
1861 L<<Logger::Error<<"Ignoring non-query opcode "<<dh->opcode<<" from "<<fromaddr.toString()<<" on server socket!"<<endl;
1862 }
1863 else {
1864 string question(data, (size_t)len);
1865 struct timeval tv={0,0};
1866 HarvestTimestamp(&msgh, &tv);
1867 ComboAddress dest;
1868 memset(&dest, 0, sizeof(dest)); // this makes sure we ignore this address if not returned by recvmsg above
1869 auto loc = rplookup(g_listenSocketsAddresses, fd);
1870 if(HarvestDestinationAddress(&msgh, &dest)) {
1871 // but.. need to get port too
1872 if(loc)
1873 dest.sin4.sin_port = loc->sin4.sin_port;
1874 }
1875 else {
1876 if(loc) {
1877 dest = *loc;
1878 }
1879 else {
1880 dest.sin4.sin_family = fromaddr.sin4.sin_family;
1881 socklen_t slen = dest.getSocklen();
1882 getsockname(fd, (sockaddr*)&dest, &slen); // if this fails, we're ok with it
1883 }
1884 }
1885 if(g_weDistributeQueries)
1886 distributeAsyncFunction(question, boost::bind(doProcessUDPQuestion, question, fromaddr, dest, tv, fd));
1887 else
1888 doProcessUDPQuestion(question, fromaddr, dest, tv, fd);
1889 }
1890 }
1891 catch(MOADNSException& mde) {
1892 g_stats.clientParseError++;
1893 if(g_logCommonErrors)
1894 L<<Logger::Error<<"Unable to parse packet from remote UDP client "<<fromaddr.toString() <<": "<<mde.what()<<endl;
1895 }
1896 catch(std::runtime_error& e) {
1897 g_stats.clientParseError++;
1898 if(g_logCommonErrors)
1899 L<<Logger::Error<<"Unable to parse packet from remote UDP client "<<fromaddr.toString() <<": "<<e.what()<<endl;
1900 }
1901 }
1902 else {
1903 // cerr<<t_id<<" had error: "<<stringerror()<<endl;
1904 if(firstQuery && errno == EAGAIN)
1905 g_stats.noPacketError++;
1906
1907 break;
1908 }
1909 }
1910
1911 static void makeTCPServerSockets(unsigned int threadId)
1912 {
1913 int fd;
1914 vector<string>locals;
1915 stringtok(locals,::arg()["local-address"]," ,");
1916
1917 if(locals.empty())
1918 throw PDNSException("No local address specified");
1919
1920 for(vector<string>::const_iterator i=locals.begin();i!=locals.end();++i) {
1921 ServiceTuple st;
1922 st.port=::arg().asNum("local-port");
1923 parseService(*i, st);
1924
1925 ComboAddress sin;
1926
1927 memset((char *)&sin,0, sizeof(sin));
1928 sin.sin4.sin_family = AF_INET;
1929 if(!IpToU32(st.host, (uint32_t*)&sin.sin4.sin_addr.s_addr)) {
1930 sin.sin6.sin6_family = AF_INET6;
1931 if(makeIPv6sockaddr(st.host, &sin.sin6) < 0)
1932 throw PDNSException("Unable to resolve local address for TCP server on '"+ st.host +"'");
1933 }
1934
1935 fd=socket(sin.sin6.sin6_family, SOCK_STREAM, 0);
1936 if(fd<0)
1937 throw PDNSException("Making a TCP server socket for resolver: "+stringerror());
1938
1939 setCloseOnExec(fd);
1940
1941 int tmp=1;
1942 if(setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &tmp, sizeof tmp)<0) {
1943 L<<Logger::Error<<"Setsockopt failed for TCP listening socket"<<endl;
1944 exit(1);
1945 }
1946 if(sin.sin6.sin6_family == AF_INET6 && setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &tmp, sizeof(tmp)) < 0) {
1947 L<<Logger::Error<<"Failed to set IPv6 socket to IPv6 only, continuing anyhow: "<<strerror(errno)<<endl;
1948 }
1949
1950 #ifdef TCP_DEFER_ACCEPT
1951 if(setsockopt(fd, SOL_TCP, TCP_DEFER_ACCEPT, &tmp, sizeof tmp) >= 0) {
1952 if(i==locals.begin())
1953 L<<Logger::Error<<"Enabled TCP data-ready filter for (slight) DoS protection"<<endl;
1954 }
1955 #endif
1956
1957 if( ::arg().mustDo("non-local-bind") )
1958 Utility::setBindAny(AF_INET, fd);
1959
1960 #ifdef SO_REUSEPORT
1961 if(g_reusePort) {
1962 if(setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &tmp, sizeof(tmp)) < 0)
1963 throw PDNSException("SO_REUSEPORT: "+stringerror());
1964 }
1965 #endif
1966
1967 if (::arg().asNum("tcp-fast-open") > 0) {
1968 #ifdef TCP_FASTOPEN
1969 int fastOpenQueueSize = ::arg().asNum("tcp-fast-open");
1970 if (setsockopt(fd, IPPROTO_TCP, TCP_FASTOPEN, &fastOpenQueueSize, sizeof fastOpenQueueSize) < 0) {
1971 L<<Logger::Error<<"Failed to enable TCP Fast Open for listening socket: "<<strerror(errno)<<endl;
1972 }
1973 #else
1974 L<<Logger::Warning<<"TCP Fast Open configured but not supported for listening socket"<<endl;
1975 #endif
1976 }
1977
1978 sin.sin4.sin_port = htons(st.port);
1979 socklen_t socklen=sin.sin4.sin_family==AF_INET ? sizeof(sin.sin4) : sizeof(sin.sin6);
1980 if (::bind(fd, (struct sockaddr *)&sin, socklen )<0)
1981 throw PDNSException("Binding TCP server socket for "+ st.host +": "+stringerror());
1982
1983 setNonBlocking(fd);
1984 setSocketSendBuffer(fd, 65000);
1985 listen(fd, 128);
1986 deferredAdds[threadId].push_back(make_pair(fd, handleNewTCPQuestion));
1987 g_tcpListenSockets.push_back(fd);
1988 // we don't need to update g_listenSocketsAddresses since it doesn't work for TCP/IP:
1989 // - fd is not that which we know here, but returned from accept()
1990 if(sin.sin4.sin_family == AF_INET)
1991 L<<Logger::Error<<"Listening for TCP queries on "<< sin.toString() <<":"<<st.port<<endl;
1992 else
1993 L<<Logger::Error<<"Listening for TCP queries on ["<< sin.toString() <<"]:"<<st.port<<endl;
1994 }
1995 }
1996
1997 static void makeUDPServerSockets(unsigned int threadId)
1998 {
1999 int one=1;
2000 vector<string>locals;
2001 stringtok(locals,::arg()["local-address"]," ,");
2002
2003 if(locals.empty())
2004 throw PDNSException("No local address specified");
2005
2006 for(vector<string>::const_iterator i=locals.begin();i!=locals.end();++i) {
2007 ServiceTuple st;
2008 st.port=::arg().asNum("local-port");
2009 parseService(*i, st);
2010
2011 ComboAddress sin;
2012
2013 memset(&sin, 0, sizeof(sin));
2014 sin.sin4.sin_family = AF_INET;
2015 if(!IpToU32(st.host.c_str() , (uint32_t*)&sin.sin4.sin_addr.s_addr)) {
2016 sin.sin6.sin6_family = AF_INET6;
2017 if(makeIPv6sockaddr(st.host, &sin.sin6) < 0)
2018 throw PDNSException("Unable to resolve local address for UDP server on '"+ st.host +"'");
2019 }
2020
2021 int fd=socket(sin.sin4.sin_family, SOCK_DGRAM, 0);
2022 if(fd < 0) {
2023 throw PDNSException("Making a UDP server socket for resolver: "+netstringerror());
2024 }
2025 if (!setSocketTimestamps(fd))
2026 L<<Logger::Warning<<"Unable to enable timestamp reporting for socket"<<endl;
2027
2028 if(IsAnyAddress(sin)) {
2029 if(sin.sin4.sin_family == AF_INET)
2030 if(!setsockopt(fd, IPPROTO_IP, GEN_IP_PKTINFO, &one, sizeof(one))) // linux supports this, so why not - might fail on other systems
2031 g_fromtosockets.insert(fd);
2032 #ifdef IPV6_RECVPKTINFO
2033 if(sin.sin4.sin_family == AF_INET6)
2034 if(!setsockopt(fd, IPPROTO_IPV6, IPV6_RECVPKTINFO, &one, sizeof(one)))
2035 g_fromtosockets.insert(fd);
2036 #endif
2037 if(sin.sin6.sin6_family == AF_INET6 && setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &one, sizeof(one)) < 0) {
2038 L<<Logger::Error<<"Failed to set IPv6 socket to IPv6 only, continuing anyhow: "<<strerror(errno)<<endl;
2039 }
2040 }
2041 if( ::arg().mustDo("non-local-bind") )
2042 Utility::setBindAny(AF_INET6, fd);
2043
2044 setCloseOnExec(fd);
2045
2046 setSocketReceiveBuffer(fd, 250000);
2047 sin.sin4.sin_port = htons(st.port);
2048
2049
2050 #ifdef SO_REUSEPORT
2051 if(g_reusePort) {
2052 if(setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one)) < 0)
2053 throw PDNSException("SO_REUSEPORT: "+stringerror());
2054 }
2055 #endif
2056 socklen_t socklen=sin.getSocklen();
2057 if (::bind(fd, (struct sockaddr *)&sin, socklen)<0)
2058 throw PDNSException("Resolver binding to server socket on port "+ std::to_string(st.port) +" for "+ st.host+": "+stringerror());
2059
2060 setNonBlocking(fd);
2061
2062 deferredAdds[threadId].push_back(make_pair(fd, handleNewUDPQuestion));
2063 g_listenSocketsAddresses[fd]=sin; // this is written to only from the startup thread, not from the workers
2064 if(sin.sin4.sin_family == AF_INET)
2065 L<<Logger::Error<<"Listening for UDP queries on "<< sin.toString() <<":"<<st.port<<endl;
2066 else
2067 L<<Logger::Error<<"Listening for UDP queries on ["<< sin.toString() <<"]:"<<st.port<<endl;
2068 }
2069 }
2070
2071 static void daemonize(void)
2072 {
2073 if(fork())
2074 exit(0); // bye bye
2075
2076 setsid();
2077
2078 int i=open("/dev/null",O_RDWR); /* open stdin */
2079 if(i < 0)
2080 L<<Logger::Critical<<"Unable to open /dev/null: "<<stringerror()<<endl;
2081 else {
2082 dup2(i,0); /* stdin */
2083 dup2(i,1); /* stderr */
2084 dup2(i,2); /* stderr */
2085 close(i);
2086 }
2087 }
2088
2089 static void usr1Handler(int)
2090 {
2091 statsWanted=true;
2092 }
2093
2094 static void usr2Handler(int)
2095 {
2096 g_quiet= !g_quiet;
2097 SyncRes::setDefaultLogMode(g_quiet ? SyncRes::LogNone : SyncRes::Log);
2098 ::arg().set("quiet")=g_quiet ? "" : "no";
2099 }
2100
2101 static void doStats(void)
2102 {
2103 static time_t lastOutputTime;
2104 static uint64_t lastQueryCount;
2105
2106 uint64_t cacheHits = broadcastAccFunction<uint64_t>(pleaseGetCacheHits);
2107 uint64_t cacheMisses = broadcastAccFunction<uint64_t>(pleaseGetCacheMisses);
2108
2109 if(g_stats.qcounter && (cacheHits + cacheMisses) && SyncRes::s_queries && SyncRes::s_outqueries) {
2110 L<<Logger::Notice<<"stats: "<<g_stats.qcounter<<" questions, "<<
2111 broadcastAccFunction<uint64_t>(pleaseGetCacheSize)<< " cache entries, "<<
2112 broadcastAccFunction<uint64_t>(pleaseGetNegCacheSize)<<" negative entries, "<<
2113 (int)((cacheHits*100.0)/(cacheHits+cacheMisses))<<"% cache hits"<<endl;
2114
2115 L<<Logger::Notice<<"stats: throttle map: "
2116 << broadcastAccFunction<uint64_t>(pleaseGetThrottleSize) <<", ns speeds: "
2117 << broadcastAccFunction<uint64_t>(pleaseGetNsSpeedsSize)<<endl;
2118 L<<Logger::Notice<<"stats: outpacket/query ratio "<<(int)(SyncRes::s_outqueries*100.0/SyncRes::s_queries)<<"%";
2119 L<<Logger::Notice<<", "<<(int)(SyncRes::s_throttledqueries*100.0/(SyncRes::s_outqueries+SyncRes::s_throttledqueries))<<"% throttled, "
2120 <<SyncRes::s_nodelegated<<" no-delegation drops"<<endl;
2121 L<<Logger::Notice<<"stats: "<<SyncRes::s_tcpoutqueries<<" outgoing tcp connections, "<<
2122 broadcastAccFunction<uint64_t>(pleaseGetConcurrentQueries)<<" queries running, "<<SyncRes::s_outgoingtimeouts<<" outgoing timeouts"<<endl;
2123
2124 //L<<Logger::Notice<<"stats: "<<g_stats.ednsPingMatches<<" ping matches, "<<g_stats.ednsPingMismatches<<" mismatches, "<<
2125 //g_stats.noPingOutQueries<<" outqueries w/o ping, "<< g_stats.noEdnsOutQueries<<" w/o EDNS"<<endl;
2126
2127 L<<Logger::Notice<<"stats: " << broadcastAccFunction<uint64_t>(pleaseGetPacketCacheSize) <<
2128 " packet cache entries, "<<(int)(100.0*broadcastAccFunction<uint64_t>(pleaseGetPacketCacheHits)/SyncRes::s_queries) << "% packet cache hits"<<endl;
2129
2130 time_t now = time(0);
2131 if(lastOutputTime && lastQueryCount && now != lastOutputTime) {
2132 L<<Logger::Notice<<"stats: "<< (SyncRes::s_queries - lastQueryCount) / (now - lastOutputTime) <<" qps (average over "<< (now - lastOutputTime) << " seconds)"<<endl;
2133 }
2134 lastOutputTime = now;
2135 lastQueryCount = SyncRes::s_queries;
2136 }
2137 else if(statsWanted)
2138 L<<Logger::Notice<<"stats: no stats yet!"<<endl;
2139
2140 statsWanted=false;
2141 }
2142
2143 static void houseKeeping(void *)
2144 {
2145 static thread_local time_t last_stat, last_rootupdate, last_prune, last_secpoll;
2146 static thread_local int cleanCounter=0;
2147 static thread_local bool s_running; // houseKeeping can get suspended in secpoll, and be restarted, which makes us do duplicate work
2148 try {
2149 if(s_running)
2150 return;
2151 s_running=true;
2152
2153 struct timeval now;
2154 Utility::gettimeofday(&now, 0);
2155
2156 if(now.tv_sec - last_prune > (time_t)(5 + t_id)) {
2157 DTime dt;
2158 dt.setTimeval(now);
2159 t_RC->doPrune(g_maxCacheEntries / g_numThreads); // this function is local to a thread, so fine anyhow
2160 t_packetCache->doPruneTo(g_maxPacketCacheEntries / g_numWorkerThreads);
2161
2162 SyncRes::pruneNegCache(g_maxCacheEntries / (g_numWorkerThreads * 10));
2163
2164 if(!((cleanCounter++)%40)) { // this is a full scan!
2165 time_t limit=now.tv_sec-300;
2166 SyncRes::pruneNSSpeeds(limit);
2167 }
2168 last_prune=time(0);
2169 }
2170
2171 if(now.tv_sec - last_rootupdate > 7200) {
2172 int res = SyncRes::getRootNS(g_now, nullptr);
2173 if (!res)
2174 last_rootupdate=now.tv_sec;
2175 }
2176
2177 if(!t_id) {
2178 if(g_statisticsInterval > 0 && now.tv_sec - last_stat >= g_statisticsInterval) {
2179 doStats();
2180 last_stat=time(0);
2181 }
2182
2183 if(now.tv_sec - last_secpoll >= 3600) {
2184 try {
2185 doSecPoll(&last_secpoll);
2186 }
2187 catch(std::exception& e)
2188 {
2189 L<<Logger::Error<<"Exception while performing security poll: "<<e.what()<<endl;
2190 }
2191 catch(PDNSException& e)
2192 {
2193 L<<Logger::Error<<"Exception while performing security poll: "<<e.reason<<endl;
2194 }
2195 catch(ImmediateServFailException &e)
2196 {
2197 L<<Logger::Error<<"Exception while performing security poll: "<<e.reason<<endl;
2198 }
2199 catch(...)
2200 {
2201 L<<Logger::Error<<"Exception while performing security poll"<<endl;
2202 }
2203
2204 }
2205 }
2206 s_running=false;
2207 }
2208 catch(PDNSException& ae)
2209 {
2210 s_running=false;
2211 L<<Logger::Error<<"Fatal error in housekeeping thread: "<<ae.reason<<endl;
2212 throw;
2213 }
2214 }
2215
2216 static void makeThreadPipes()
2217 {
2218 for(unsigned int n=0; n < g_numThreads; ++n) {
2219 struct ThreadPipeSet tps;
2220 int fd[2];
2221 if(pipe(fd) < 0)
2222 unixDie("Creating pipe for inter-thread communications");
2223
2224 tps.readToThread = fd[0];
2225 tps.writeToThread = fd[1];
2226
2227 if(pipe(fd) < 0)
2228 unixDie("Creating pipe for inter-thread communications");
2229 tps.readFromThread = fd[0];
2230 tps.writeFromThread = fd[1];
2231
2232 if(pipe(fd) < 0)
2233 unixDie("Creating pipe for inter-thread communications");
2234 tps.readQueriesToThread = fd[0];
2235 tps.writeQueriesToThread = fd[1];
2236
2237 if (!setNonBlocking(tps.writeQueriesToThread)) {
2238 unixDie("Making pipe for inter-thread communications non-blocking");
2239 }
2240
2241 g_pipes.push_back(tps);
2242 }
2243 }
2244
2245 struct ThreadMSG
2246 {
2247 pipefunc_t func;
2248 bool wantAnswer;
2249 };
2250
2251 void broadcastFunction(const pipefunc_t& func, bool skipSelf)
2252 {
2253 unsigned int n = 0;
2254 for(ThreadPipeSet& tps : g_pipes)
2255 {
2256 if(n++ == t_id) {
2257 if(!skipSelf)
2258 func(); // don't write to ourselves!
2259 continue;
2260 }
2261
2262 ThreadMSG* tmsg = new ThreadMSG();
2263 tmsg->func = func;
2264 tmsg->wantAnswer = true;
2265 if(write(tps.writeToThread, &tmsg, sizeof(tmsg)) != sizeof(tmsg)) {
2266 delete tmsg;
2267 unixDie("write to thread pipe returned wrong size or error");
2268 }
2269
2270 string* resp;
2271 if(read(tps.readFromThread, &resp, sizeof(resp)) != sizeof(resp))
2272 unixDie("read from thread pipe returned wrong size or error");
2273
2274 if(resp) {
2275 // cerr <<"got response: " << *resp << endl;
2276 delete resp;
2277 }
2278 }
2279 }
2280
2281 void distributeAsyncFunction(const string& packet, const pipefunc_t& func)
2282 {
2283 unsigned int hash = hashQuestion(packet.c_str(), packet.length(), g_disthashseed);
2284 unsigned int target = 1 + (hash % (g_pipes.size()-1));
2285
2286 if(target == t_id) {
2287 func();
2288 return;
2289 }
2290 ThreadPipeSet& tps = g_pipes[target];
2291 ThreadMSG* tmsg = new ThreadMSG();
2292 tmsg->func = func;
2293 tmsg->wantAnswer = false;
2294
2295 ssize_t written = write(tps.writeQueriesToThread, &tmsg, sizeof(tmsg));
2296 if (written > 0) {
2297 if (static_cast<size_t>(written) != sizeof(tmsg)) {
2298 delete tmsg;
2299 unixDie("write to thread pipe returned wrong size or error");
2300 }
2301 }
2302 else {
2303 int error = errno;
2304 delete tmsg;
2305 if (error == EAGAIN || error == EWOULDBLOCK) {
2306 g_stats.queryPipeFullDrops++;
2307 } else {
2308 unixDie("write to thread pipe returned wrong size or error:" + error);
2309 }
2310 }
2311 }
2312
2313 static void handlePipeRequest(int fd, FDMultiplexer::funcparam_t& var)
2314 {
2315 ThreadMSG* tmsg = nullptr;
2316
2317 if(read(fd, &tmsg, sizeof(tmsg)) != sizeof(tmsg)) { // fd == readToThread || fd == readQueriesToThread
2318 unixDie("read from thread pipe returned wrong size or error");
2319 }
2320
2321 void *resp=0;
2322 try {
2323 resp = tmsg->func();
2324 }
2325 catch(std::exception& e) {
2326 if(g_logCommonErrors)
2327 L<<Logger::Error<<"PIPE function we executed created exception: "<<e.what()<<endl; // but what if they wanted an answer.. we send 0
2328 }
2329 catch(PDNSException& e) {
2330 if(g_logCommonErrors)
2331 L<<Logger::Error<<"PIPE function we executed created PDNS exception: "<<e.reason<<endl; // but what if they wanted an answer.. we send 0
2332 }
2333 if(tmsg->wantAnswer) {
2334 if(write(g_pipes[t_id].writeFromThread, &resp, sizeof(resp)) != sizeof(resp)) {
2335 delete tmsg;
2336 unixDie("write to thread pipe returned wrong size or error");
2337 }
2338 }
2339
2340 delete tmsg;
2341 }
2342
2343 template<class T> void *voider(const boost::function<T*()>& func)
2344 {
2345 return func();
2346 }
2347
2348 vector<ComboAddress>& operator+=(vector<ComboAddress>&a, const vector<ComboAddress>& b)
2349 {
2350 a.insert(a.end(), b.begin(), b.end());
2351 return a;
2352 }
2353
2354 vector<pair<string, uint16_t> >& operator+=(vector<pair<string, uint16_t> >&a, const vector<pair<string, uint16_t> >& b)
2355 {
2356 a.insert(a.end(), b.begin(), b.end());
2357 return a;
2358 }
2359
2360 vector<pair<DNSName, uint16_t> >& operator+=(vector<pair<DNSName, uint16_t> >&a, const vector<pair<DNSName, uint16_t> >& b)
2361 {
2362 a.insert(a.end(), b.begin(), b.end());
2363 return a;
2364 }
2365
2366
2367 template<class T> T broadcastAccFunction(const boost::function<T*()>& func, bool skipSelf)
2368 {
2369 unsigned int n = 0;
2370 T ret=T();
2371 for(ThreadPipeSet& tps : g_pipes)
2372 {
2373 if(n++ == t_id) {
2374 if(!skipSelf) {
2375 T* resp = (T*)func(); // don't write to ourselves!
2376 if(resp) {
2377 //~ cerr <<"got direct: " << *resp << endl;
2378 ret += *resp;
2379 delete resp;
2380 }
2381 }
2382 continue;
2383 }
2384
2385 ThreadMSG* tmsg = new ThreadMSG();
2386 tmsg->func = boost::bind(voider<T>, func);
2387 tmsg->wantAnswer = true;
2388
2389 if(write(tps.writeToThread, &tmsg, sizeof(tmsg)) != sizeof(tmsg)) {
2390 delete tmsg;
2391 unixDie("write to thread pipe returned wrong size or error");
2392 }
2393
2394 T* resp;
2395 if(read(tps.readFromThread, &resp, sizeof(resp)) != sizeof(resp))
2396 unixDie("read from thread pipe returned wrong size or error");
2397
2398 if(resp) {
2399 //~ cerr <<"got response: " << *resp << endl;
2400 ret += *resp;
2401 delete resp;
2402 }
2403 }
2404 return ret;
2405 }
2406
2407 template string broadcastAccFunction(const boost::function<string*()>& fun, bool skipSelf); // explicit instantiation
2408 template uint64_t broadcastAccFunction(const boost::function<uint64_t*()>& fun, bool skipSelf); // explicit instantiation
2409 template vector<ComboAddress> broadcastAccFunction(const boost::function<vector<ComboAddress> *()>& fun, bool skipSelf); // explicit instantiation
2410 template vector<pair<DNSName,uint16_t> > broadcastAccFunction(const boost::function<vector<pair<DNSName, uint16_t> > *()>& fun, bool skipSelf); // explicit instantiation
2411
2412 static void handleRCC(int fd, FDMultiplexer::funcparam_t& var)
2413 {
2414 string remote;
2415 string msg=s_rcc.recv(&remote);
2416 RecursorControlParser rcp;
2417 RecursorControlParser::func_t* command;
2418
2419 string answer=rcp.getAnswer(msg, &command);
2420
2421 // If we are inside a chroot, we need to strip
2422 if (!arg()["chroot"].empty()) {
2423 size_t len = arg()["chroot"].length();
2424 remote = remote.substr(len);
2425 }
2426
2427 try {
2428 s_rcc.send(answer, &remote);
2429 command();
2430 }
2431 catch(std::exception& e) {
2432 L<<Logger::Error<<"Error dealing with control socket request: "<<e.what()<<endl;
2433 }
2434 catch(PDNSException& ae) {
2435 L<<Logger::Error<<"Error dealing with control socket request: "<<ae.reason<<endl;
2436 }
2437 }
2438
2439 static void handleTCPClientReadable(int fd, FDMultiplexer::funcparam_t& var)
2440 {
2441 PacketID* pident=any_cast<PacketID>(&var);
2442 // cerr<<"handleTCPClientReadable called for fd "<<fd<<", pident->inNeeded: "<<pident->inNeeded<<", "<<pident->sock->getHandle()<<endl;
2443
2444 shared_array<char> buffer(new char[pident->inNeeded]);
2445
2446 ssize_t ret=recv(fd, buffer.get(), pident->inNeeded,0);
2447 if(ret > 0) {
2448 pident->inMSG.append(&buffer[0], &buffer[ret]);
2449 pident->inNeeded-=(size_t)ret;
2450 if(!pident->inNeeded || pident->inIncompleteOkay) {
2451 // cerr<<"Got entire load of "<<pident->inMSG.size()<<" bytes"<<endl;
2452 PacketID pid=*pident;
2453 string msg=pident->inMSG;
2454
2455 t_fdm->removeReadFD(fd);
2456 MT->sendEvent(pid, &msg);
2457 }
2458 else {
2459 // cerr<<"Still have "<<pident->inNeeded<<" left to go"<<endl;
2460 }
2461 }
2462 else {
2463 PacketID tmp=*pident;
2464 t_fdm->removeReadFD(fd); // pident might now be invalid (it isn't, but still)
2465 string empty;
2466 MT->sendEvent(tmp, &empty); // this conveys error status
2467 }
2468 }
2469
2470 static void handleTCPClientWritable(int fd, FDMultiplexer::funcparam_t& var)
2471 {
2472 PacketID* pid=any_cast<PacketID>(&var);
2473 ssize_t ret=send(fd, pid->outMSG.c_str() + pid->outPos, pid->outMSG.size() - pid->outPos,0);
2474 if(ret > 0) {
2475 pid->outPos+=(ssize_t)ret;
2476 if(pid->outPos==pid->outMSG.size()) {
2477 PacketID tmp=*pid;
2478 t_fdm->removeWriteFD(fd);
2479 MT->sendEvent(tmp, &tmp.outMSG); // send back what we sent to convey everything is ok
2480 }
2481 }
2482 else { // error or EOF
2483 PacketID tmp(*pid);
2484 t_fdm->removeWriteFD(fd);
2485 string sent;
2486 MT->sendEvent(tmp, &sent); // we convey error status by sending empty string
2487 }
2488 }
2489
2490 // resend event to everybody chained onto it
2491 static void doResends(MT_t::waiters_t::iterator& iter, PacketID resend, const string& content)
2492 {
2493 if(iter->key.chain.empty())
2494 return;
2495 // cerr<<"doResends called!\n";
2496 for(PacketID::chain_t::iterator i=iter->key.chain.begin(); i != iter->key.chain.end() ; ++i) {
2497 resend.fd=-1;
2498 resend.id=*i;
2499 // cerr<<"\tResending "<<content.size()<<" bytes for fd="<<resend.fd<<" and id="<<resend.id<<endl;
2500
2501 MT->sendEvent(resend, &content);
2502 g_stats.chainResends++;
2503 }
2504 }
2505
2506 static void handleUDPServerResponse(int fd, FDMultiplexer::funcparam_t& var)
2507 {
2508 PacketID pid=any_cast<PacketID>(var);
2509 ssize_t len;
2510 char data[g_outgoingEDNSBufsize];
2511 ComboAddress fromaddr;
2512 socklen_t addrlen=sizeof(fromaddr);
2513
2514 len=recvfrom(fd, data, sizeof(data), 0, (sockaddr *)&fromaddr, &addrlen);
2515
2516 if(len < (ssize_t) sizeof(dnsheader)) {
2517 if(len < 0)
2518 ; // cerr<<"Error on fd "<<fd<<": "<<stringerror()<<"\n";
2519 else {
2520 g_stats.serverParseError++;
2521 if(g_logCommonErrors)
2522 L<<Logger::Error<<"Unable to parse packet from remote UDP server "<< fromaddr.toString() <<
2523 ": packet smaller than DNS header"<<endl;
2524 }
2525
2526 t_udpclientsocks->returnSocket(fd);
2527 string empty;
2528
2529 MT_t::waiters_t::iterator iter=MT->d_waiters.find(pid);
2530 if(iter != MT->d_waiters.end())
2531 doResends(iter, pid, empty);
2532
2533 MT->sendEvent(pid, &empty); // this denotes error (does lookup again.. at least L1 will be hot)
2534 return;
2535 }
2536
2537 dnsheader dh;
2538 memcpy(&dh, data, sizeof(dh));
2539
2540 PacketID pident;
2541 pident.remote=fromaddr;
2542 pident.id=dh.id;
2543 pident.fd=fd;
2544
2545 if(!dh.qr && g_logCommonErrors) {
2546 L<<Logger::Notice<<"Not taking data from question on outgoing socket from "<< fromaddr.toStringWithPort() <<endl;
2547 }
2548
2549 if(!dh.qdcount || // UPC, Nominum, very old BIND on FormErr, NSD
2550 !dh.qr) { // one weird server
2551 pident.domain.clear();
2552 pident.type = 0;
2553 }
2554 else {
2555 try {
2556 if(len > 12)
2557 pident.domain=DNSName(data, len, 12, false, &pident.type); // don't copy this from above - we need to do the actual read
2558 }
2559 catch(std::exception& e) {
2560 g_stats.serverParseError++; // won't be fed to lwres.cc, so we have to increment
2561 L<<Logger::Warning<<"Error in packet from remote nameserver "<< fromaddr.toStringWithPort() << ": "<<e.what() << endl;
2562 return;
2563 }
2564 }
2565 string packet;
2566 packet.assign(data, len);
2567
2568 MT_t::waiters_t::iterator iter=MT->d_waiters.find(pident);
2569 if(iter != MT->d_waiters.end()) {
2570 doResends(iter, pident, packet);
2571 }
2572
2573 retryWithName:
2574
2575 if(!MT->sendEvent(pident, &packet)) {
2576 // we do a full scan for outstanding queries on unexpected answers. not too bad since we only accept them on the right port number, which is hard enough to guess
2577 for(MT_t::waiters_t::iterator mthread=MT->d_waiters.begin(); mthread!=MT->d_waiters.end(); ++mthread) {
2578 if(pident.fd==mthread->key.fd && mthread->key.remote==pident.remote && mthread->key.type == pident.type &&
2579 pident.domain == mthread->key.domain) {
2580 mthread->key.nearMisses++;
2581 }
2582
2583 // be a bit paranoid here since we're weakening our matching
2584 if(pident.domain.empty() && !mthread->key.domain.empty() && !pident.type && mthread->key.type &&
2585 pident.id == mthread->key.id && mthread->key.remote == pident.remote) {
2586 // cerr<<"Empty response, rest matches though, sending to a waiter"<<endl;
2587 pident.domain = mthread->key.domain;
2588 pident.type = mthread->key.type;
2589 goto retryWithName; // note that this only passes on an error, lwres will still reject the packet
2590 }
2591 }
2592 g_stats.unexpectedCount++; // if we made it here, it really is an unexpected answer
2593 if(g_logCommonErrors) {
2594 L<<Logger::Warning<<"Discarding unexpected packet from "<<fromaddr.toStringWithPort()<<": "<< (pident.domain.empty() ? "<empty>" : pident.domain.toString())<<", "<<pident.type<<", "<<MT->d_waiters.size()<<" waiters"<<endl;
2595 }
2596 }
2597 else if(fd >= 0) {
2598 t_udpclientsocks->returnSocket(fd);
2599 }
2600 }
2601
2602 FDMultiplexer* getMultiplexer()
2603 {
2604 FDMultiplexer* ret;
2605 for(const auto& i : FDMultiplexer::getMultiplexerMap()) {
2606 try {
2607 ret=i.second();
2608 return ret;
2609 }
2610 catch(FDMultiplexerException &fe) {
2611 L<<Logger::Error<<"Non-fatal error initializing possible multiplexer ("<<fe.what()<<"), falling back"<<endl;
2612 }
2613 catch(...) {
2614 L<<Logger::Error<<"Non-fatal error initializing possible multiplexer"<<endl;
2615 }
2616 }
2617 L<<Logger::Error<<"No working multiplexer found!"<<endl;
2618 exit(1);
2619 }
2620
2621
2622 static string* doReloadLuaScript()
2623 {
2624 string fname= ::arg()["lua-dns-script"];
2625 try {
2626 if(fname.empty()) {
2627 t_pdl.reset();
2628 L<<Logger::Error<<t_id<<" Unloaded current lua script"<<endl;
2629 return new string("unloaded\n");
2630 }
2631 else {
2632 t_pdl = std::make_shared<RecursorLua4>(fname);
2633 }
2634 }
2635 catch(std::exception& e) {
2636 L<<Logger::Error<<t_id<<" Retaining current script, error from '"<<fname<<"': "<< e.what() <<endl;
2637 return new string("retaining current script, error from '"+fname+"': "+e.what()+"\n");
2638 }
2639
2640 L<<Logger::Warning<<t_id<<" (Re)loaded lua script from '"<<fname<<"'"<<endl;
2641 return new string("(re)loaded '"+fname+"'\n");
2642 }
2643
2644 string doQueueReloadLuaScript(vector<string>::const_iterator begin, vector<string>::const_iterator end)
2645 {
2646 if(begin != end)
2647 ::arg().set("lua-dns-script") = *begin;
2648
2649 return broadcastAccFunction<string>(doReloadLuaScript);
2650 }
2651
2652 static string* pleaseUseNewTraceRegex(const std::string& newRegex)
2653 try
2654 {
2655 if(newRegex.empty()) {
2656 t_traceRegex.reset();
2657 return new string("unset\n");
2658 }
2659 else {
2660 t_traceRegex = std::make_shared<Regex>(newRegex);
2661 return new string("ok\n");
2662 }
2663 }
2664 catch(PDNSException& ae)
2665 {
2666 return new string(ae.reason+"\n");
2667 }
2668
2669 string doTraceRegex(vector<string>::const_iterator begin, vector<string>::const_iterator end)
2670 {
2671 return broadcastAccFunction<string>(boost::bind(pleaseUseNewTraceRegex, begin!=end ? *begin : ""));
2672 }
2673
2674 static void checkLinuxIPv6Limits()
2675 {
2676 #ifdef __linux__
2677 string line;
2678 if(readFileIfThere("/proc/sys/net/ipv6/route/max_size", &line)) {
2679 int lim=std::stoi(line);
2680 if(lim < 16384) {
2681 L<<Logger::Error<<"If using IPv6, please raise sysctl net.ipv6.route.max_size, currently set to "<<lim<<" which is < 16384"<<endl;
2682 }
2683 }
2684 #endif
2685 }
2686 static void checkOrFixFDS()
2687 {
2688 unsigned int availFDs=getFilenumLimit();
2689 unsigned int wantFDs = g_maxMThreads * g_numWorkerThreads +25; // even healthier margin then before
2690
2691 if(wantFDs > availFDs) {
2692 unsigned int hardlimit= getFilenumLimit(true);
2693 if(hardlimit >= wantFDs) {
2694 setFilenumLimit(wantFDs);
2695 L<<Logger::Warning<<"Raised soft limit on number of filedescriptors to "<<wantFDs<<" to match max-mthreads and threads settings"<<endl;
2696 }
2697 else {
2698 int newval = (hardlimit - 25) / g_numWorkerThreads;
2699 L<<Logger::Warning<<"Insufficient number of filedescriptors available for max-mthreads*threads setting! ("<<hardlimit<<" < "<<wantFDs<<"), reducing max-mthreads to "<<newval<<endl;
2700 g_maxMThreads = newval;
2701 setFilenumLimit(hardlimit);
2702 }
2703 }
2704 }
2705
2706 static void* recursorThread(void*);
2707
2708 static void* pleaseSupplantACLs(std::shared_ptr<NetmaskGroup> ng)
2709 {
2710 t_allowFrom = ng;
2711 return nullptr;
2712 }
2713
2714 int g_argc;
2715 char** g_argv;
2716
2717 void parseACLs()
2718 {
2719 static bool l_initialized;
2720
2721 if(l_initialized) { // only reload configuration file on second call
2722 string configname=::arg()["config-dir"]+"/recursor.conf";
2723 if(::arg()["config-name"]!="") {
2724 configname=::arg()["config-dir"]+"/recursor-"+::arg()["config-name"]+".conf";
2725 }
2726 cleanSlashes(configname);
2727
2728 if(!::arg().preParseFile(configname.c_str(), "allow-from-file"))
2729 throw runtime_error("Unable to re-parse configuration file '"+configname+"'");
2730 ::arg().preParseFile(configname.c_str(), "allow-from", LOCAL_NETS);
2731 ::arg().preParseFile(configname.c_str(), "include-dir");
2732 ::arg().preParse(g_argc, g_argv, "include-dir");
2733
2734 // then process includes
2735 std::vector<std::string> extraConfigs;
2736 ::arg().gatherIncludes(extraConfigs);
2737
2738 for(const std::string& fn : extraConfigs) {
2739 if(!::arg().preParseFile(fn.c_str(), "allow-from-file", ::arg()["allow-from-file"]))
2740 throw runtime_error("Unable to re-parse configuration file include '"+fn+"'");
2741 if(!::arg().preParseFile(fn.c_str(), "allow-from", ::arg()["allow-from"]))
2742 throw runtime_error("Unable to re-parse configuration file include '"+fn+"'");
2743 }
2744
2745 ::arg().preParse(g_argc, g_argv, "allow-from-file");
2746 ::arg().preParse(g_argc, g_argv, "allow-from");
2747 }
2748
2749 std::shared_ptr<NetmaskGroup> oldAllowFrom = t_allowFrom;
2750 std::shared_ptr<NetmaskGroup> allowFrom = std::make_shared<NetmaskGroup>();
2751
2752 if(!::arg()["allow-from-file"].empty()) {
2753 string line;
2754 ifstream ifs(::arg()["allow-from-file"].c_str());
2755 if(!ifs) {
2756 throw runtime_error("Could not open '"+::arg()["allow-from-file"]+"': "+stringerror());
2757 }
2758
2759 string::size_type pos;
2760 while(getline(ifs,line)) {
2761 pos=line.find('#');
2762 if(pos!=string::npos)
2763 line.resize(pos);
2764 trim(line);
2765 if(line.empty())
2766 continue;
2767
2768 allowFrom->addMask(line);
2769 }
2770 L<<Logger::Warning<<"Done parsing " << allowFrom->size() <<" allow-from ranges from file '"<<::arg()["allow-from-file"]<<"' - overriding 'allow-from' setting"<<endl;
2771 }
2772 else if(!::arg()["allow-from"].empty()) {
2773 vector<string> ips;
2774 stringtok(ips, ::arg()["allow-from"], ", ");
2775
2776 L<<Logger::Warning<<"Only allowing queries from: ";
2777 for(vector<string>::const_iterator i = ips.begin(); i!= ips.end(); ++i) {
2778 allowFrom->addMask(*i);
2779 if(i!=ips.begin())
2780 L<<Logger::Warning<<", ";
2781 L<<Logger::Warning<<*i;
2782 }
2783 L<<Logger::Warning<<endl;
2784 }
2785 else {
2786 if(::arg()["local-address"]!="127.0.0.1" && ::arg().asNum("local-port")==53)
2787 L<<Logger::Error<<"WARNING: Allowing queries from all IP addresses - this can be a security risk!"<<endl;
2788 allowFrom = nullptr;
2789 }
2790
2791 g_initialAllowFrom = allowFrom;
2792 broadcastFunction(boost::bind(pleaseSupplantACLs, allowFrom));
2793 oldAllowFrom = nullptr;
2794
2795 l_initialized = true;
2796 }
2797
2798
2799 static void setupDelegationOnly()
2800 {
2801 vector<string> parts;
2802 stringtok(parts, ::arg()["delegation-only"], ", \t");
2803 for(const auto& p : parts) {
2804 SyncRes::addDelegationOnly(DNSName(p));
2805 }
2806 }
2807
2808 static std::map<unsigned int, std::set<int> > parseCPUMap()
2809 {
2810 std::map<unsigned int, std::set<int> > result;
2811
2812 const std::string value = ::arg()["cpu-map"];
2813
2814 if (!value.empty() && !isSettingThreadCPUAffinitySupported()) {
2815 L<<Logger::Warning<<"CPU mapping requested but not supported, skipping"<<endl;
2816 return result;
2817 }
2818
2819 std::vector<std::string> parts;
2820
2821 stringtok(parts, value, " \t");
2822
2823 for(const auto& part : parts) {
2824 if (part.find('=') == string::npos)
2825 continue;
2826
2827 try {
2828 auto headers = splitField(part, '=');
2829 trim(headers.first);
2830 trim(headers.second);
2831
2832 unsigned int threadId = pdns_stou(headers.first);
2833 std::vector<std::string> cpus;
2834
2835 stringtok(cpus, headers.second, ",");
2836
2837 for(const auto& cpu : cpus) {
2838 int cpuId = std::stoi(cpu);
2839
2840 result[threadId].insert(cpuId);
2841 }
2842 }
2843 catch(const std::exception& e) {
2844 L<<Logger::Error<<"Error parsing cpu-map entry '"<<part<<"': "<<e.what()<<endl;
2845 }
2846 }
2847
2848 return result;
2849 }
2850
2851 static void setCPUMap(const std::map<unsigned int, std::set<int> >& cpusMap, unsigned int n, pthread_t tid)
2852 {
2853 const auto& cpuMapping = cpusMap.find(n);
2854 if (cpuMapping != cpusMap.cend()) {
2855 int rc = mapThreadToCPUList(tid, cpuMapping->second);
2856 if (rc == 0) {
2857 L<<Logger::Info<<"CPU affinity for worker "<<n<<" has been set to CPU map:";
2858 for (const auto cpu : cpuMapping->second) {
2859 L<<Logger::Info<<" "<<cpu;
2860 }
2861 L<<Logger::Info<<endl;
2862 }
2863 else {
2864 L<<Logger::Warning<<"Error setting CPU affinity for worker "<<n<<" to CPU map:";
2865 for (const auto cpu : cpuMapping->second) {
2866 L<<Logger::Info<<" "<<cpu;
2867 }
2868 L<<Logger::Info<<strerror(rc)<<endl;
2869 }
2870 }
2871 }
2872
2873 static int serviceMain(int argc, char*argv[])
2874 {
2875 L.setName(s_programname);
2876 L.disableSyslog(::arg().mustDo("disable-syslog"));
2877 L.setTimestamps(::arg().mustDo("log-timestamp"));
2878
2879 if(!::arg()["logging-facility"].empty()) {
2880 int val=logFacilityToLOG(::arg().asNum("logging-facility") );
2881 if(val >= 0)
2882 theL().setFacility(val);
2883 else
2884 L<<Logger::Error<<"Unknown logging facility "<<::arg().asNum("logging-facility") <<endl;
2885 }
2886
2887 showProductVersion();
2888 seedRandom(::arg()["entropy-source"]);
2889
2890 g_disthashseed=dns_random(0xffffffff);
2891
2892 checkLinuxIPv6Limits();
2893 try {
2894 vector<string> addrs;
2895 if(!::arg()["query-local-address6"].empty()) {
2896 SyncRes::s_doIPv6=true;
2897 L<<Logger::Warning<<"Enabling IPv6 transport for outgoing queries"<<endl;
2898
2899 stringtok(addrs, ::arg()["query-local-address6"], ", ;");
2900 for(const string& addr : addrs) {
2901 g_localQueryAddresses6.push_back(ComboAddress(addr));
2902 }
2903 }
2904 else {
2905 L<<Logger::Warning<<"NOT using IPv6 for outgoing queries - set 'query-local-address6=::' to enable"<<endl;
2906 }
2907 addrs.clear();
2908 stringtok(addrs, ::arg()["query-local-address"], ", ;");
2909 for(const string& addr : addrs) {
2910 g_localQueryAddresses4.push_back(ComboAddress(addr));
2911 }
2912 }
2913 catch(std::exception& e) {
2914 L<<Logger::Error<<"Assigning local query addresses: "<<e.what();
2915 exit(99);
2916 }
2917
2918 // keep this ABOVE loadRecursorLuaConfig!
2919 if(::arg()["dnssec"]=="off")
2920 g_dnssecmode=DNSSECMode::Off;
2921 else if(::arg()["dnssec"]=="process-no-validate")
2922 g_dnssecmode=DNSSECMode::ProcessNoValidate;
2923 else if(::arg()["dnssec"]=="process")
2924 g_dnssecmode=DNSSECMode::Process;
2925 else if(::arg()["dnssec"]=="validate")
2926 g_dnssecmode=DNSSECMode::ValidateAll;
2927 else if(::arg()["dnssec"]=="log-fail")
2928 g_dnssecmode=DNSSECMode::ValidateForLog;
2929 else {
2930 L<<Logger::Error<<"Unknown DNSSEC mode "<<::arg()["dnssec"]<<endl;
2931 exit(1);
2932 }
2933
2934 g_dnssecLogBogus = ::arg().mustDo("dnssec-log-bogus");
2935 g_maxNSEC3Iterations = ::arg().asNum("nsec3-max-iterations");
2936
2937 g_maxCacheEntries = ::arg().asNum("max-cache-entries");
2938 g_maxPacketCacheEntries = ::arg().asNum("max-packetcache-entries");
2939
2940 try {
2941 loadRecursorLuaConfig(::arg()["lua-config-file"], ::arg().mustDo("daemon"));
2942 }
2943 catch (PDNSException &e) {
2944 L<<Logger::Error<<"Cannot load Lua configuration: "<<e.reason<<endl;
2945 exit(1);
2946 }
2947
2948 parseACLs();
2949 sortPublicSuffixList();
2950
2951 if(!::arg()["dont-query"].empty()) {
2952 vector<string> ips;
2953 stringtok(ips, ::arg()["dont-query"], ", ");
2954 ips.push_back("0.0.0.0");
2955 ips.push_back("::");
2956
2957 L<<Logger::Warning<<"Will not send queries to: ";
2958 for(vector<string>::const_iterator i = ips.begin(); i!= ips.end(); ++i) {
2959 SyncRes::addDontQuery(*i);
2960 if(i!=ips.begin())
2961 L<<Logger::Warning<<", ";
2962 L<<Logger::Warning<<*i;
2963 }
2964 L<<Logger::Warning<<endl;
2965 }
2966
2967 g_quiet=::arg().mustDo("quiet");
2968
2969 g_weDistributeQueries = ::arg().mustDo("pdns-distributes-queries");
2970 if(g_weDistributeQueries) {
2971 L<<Logger::Warning<<"PowerDNS Recursor itself will distribute queries over threads"<<endl;
2972 }
2973
2974 setupDelegationOnly();
2975 g_outgoingEDNSBufsize=::arg().asNum("edns-outgoing-bufsize");
2976
2977 if(::arg()["trace"]=="fail") {
2978 SyncRes::setDefaultLogMode(SyncRes::Store);
2979 }
2980 else if(::arg().mustDo("trace")) {
2981 SyncRes::setDefaultLogMode(SyncRes::Log);
2982 ::arg().set("quiet")="no";
2983 g_quiet=false;
2984 g_dnssecLOG=true;
2985 }
2986
2987 SyncRes::s_minimumTTL = ::arg().asNum("minimum-ttl-override");
2988
2989 SyncRes::s_nopacketcache = ::arg().mustDo("disable-packetcache");
2990
2991 SyncRes::s_maxnegttl=::arg().asNum("max-negative-ttl");
2992 SyncRes::s_maxcachettl=max(::arg().asNum("max-cache-ttl"), 15);
2993 SyncRes::s_packetcachettl=::arg().asNum("packetcache-ttl");
2994 // Cap the packetcache-servfail-ttl to the packetcache-ttl
2995 uint32_t packetCacheServFailTTL = ::arg().asNum("packetcache-servfail-ttl");
2996 SyncRes::s_packetcacheservfailttl=(packetCacheServFailTTL > SyncRes::s_packetcachettl) ? SyncRes::s_packetcachettl : packetCacheServFailTTL;
2997 SyncRes::s_serverdownmaxfails=::arg().asNum("server-down-max-fails");
2998 SyncRes::s_serverdownthrottletime=::arg().asNum("server-down-throttle-time");
2999 SyncRes::s_serverID=::arg()["server-id"];
3000 SyncRes::s_maxqperq=::arg().asNum("max-qperq");
3001 SyncRes::s_maxtotusec=1000*::arg().asNum("max-total-msec");
3002 SyncRes::s_maxdepth=::arg().asNum("max-recursion-depth");
3003 SyncRes::s_rootNXTrust = ::arg().mustDo( "root-nx-trust");
3004 if(SyncRes::s_serverID.empty()) {
3005 char tmp[128];
3006 gethostname(tmp, sizeof(tmp)-1);
3007 SyncRes::s_serverID=tmp;
3008 }
3009
3010 SyncRes::s_ecsipv4limit = ::arg().asNum("ecs-ipv4-bits");
3011 SyncRes::s_ecsipv6limit = ::arg().asNum("ecs-ipv6-bits");
3012
3013 if (!::arg().isEmpty("ecs-scope-zero-address")) {
3014 ComboAddress scopeZero(::arg()["ecs-scope-zero-address"]);
3015 SyncRes::setECSScopeZeroAddress(Netmask(scopeZero, scopeZero.isIPv4() ? 32 : 128));
3016 }
3017 else {
3018 bool found = false;
3019 for (const auto& addr : g_localQueryAddresses4) {
3020 if (!IsAnyAddress(addr)) {
3021 SyncRes::setECSScopeZeroAddress(Netmask(addr, 32));
3022 found = true;
3023 break;
3024 }
3025 }
3026 if (!found) {
3027 for (const auto& addr : g_localQueryAddresses6) {
3028 if (!IsAnyAddress(addr)) {
3029 SyncRes::setECSScopeZeroAddress(Netmask(addr, 128));
3030 found = true;
3031 break;
3032 }
3033 }
3034 if (!found) {
3035 SyncRes::setECSScopeZeroAddress(Netmask("127.0.0.1/32"));
3036 }
3037 }
3038 }
3039
3040 g_networkTimeoutMsec = ::arg().asNum("network-timeout");
3041
3042 g_initialDomainMap = parseAuthAndForwards();
3043
3044 g_latencyStatSize=::arg().asNum("latency-statistic-size");
3045
3046 g_logCommonErrors=::arg().mustDo("log-common-errors");
3047 g_logRPZChanges = ::arg().mustDo("log-rpz-changes");
3048
3049 g_anyToTcp = ::arg().mustDo("any-to-tcp");
3050 g_udpTruncationThreshold = ::arg().asNum("udp-truncation-threshold");
3051
3052 g_lowercaseOutgoing = ::arg().mustDo("lowercase-outgoing");
3053
3054 g_numWorkerThreads = ::arg().asNum("threads");
3055 if (g_numWorkerThreads < 1) {
3056 L<<Logger::Warning<<"Asked to run with 0 threads, raising to 1 instead"<<endl;
3057 g_numWorkerThreads = 1;
3058 }
3059
3060 g_numThreads = g_numWorkerThreads + g_weDistributeQueries;
3061 g_maxMThreads = ::arg().asNum("max-mthreads");
3062
3063 g_gettagNeedsEDNSOptions = ::arg().mustDo("gettag-needs-edns-options");
3064
3065 g_statisticsInterval = ::arg().asNum("statistics-interval");
3066
3067 #ifdef SO_REUSEPORT
3068 g_reusePort = ::arg().mustDo("reuseport");
3069 #endif
3070
3071 g_useOneSocketPerThread = (!g_weDistributeQueries && g_reusePort);
3072
3073 if (g_useOneSocketPerThread) {
3074 for (unsigned int threadId = 0; threadId < g_numWorkerThreads; threadId++) {
3075 makeUDPServerSockets(threadId);
3076 makeTCPServerSockets(threadId);
3077 }
3078 }
3079 else {
3080 makeUDPServerSockets(0);
3081 makeTCPServerSockets(0);
3082 }
3083
3084 SyncRes::parseEDNSSubnetWhitelist(::arg()["edns-subnet-whitelist"]);
3085 g_useIncomingECS = ::arg().mustDo("use-incoming-edns-subnet");
3086
3087 int forks;
3088 for(forks = 0; forks < ::arg().asNum("processes") - 1; ++forks) {
3089 if(!fork()) // we are child
3090 break;
3091 }
3092
3093 if(::arg().mustDo("daemon")) {
3094 L<<Logger::Warning<<"Calling daemonize, going to background"<<endl;
3095 L.toConsole(Logger::Critical);
3096 daemonize();
3097 loadRecursorLuaConfig(::arg()["lua-config-file"], false);
3098 }
3099 signal(SIGUSR1,usr1Handler);
3100 signal(SIGUSR2,usr2Handler);
3101 signal(SIGPIPE,SIG_IGN);
3102
3103 checkOrFixFDS();
3104
3105 #ifdef HAVE_LIBSODIUM
3106 if (sodium_init() == -1) {
3107 L<<Logger::Error<<"Unable to initialize sodium crypto library"<<endl;
3108 exit(99);
3109 }
3110 #endif
3111
3112 openssl_thread_setup();
3113 openssl_seed();
3114
3115 int newgid=0;
3116 if(!::arg()["setgid"].empty())
3117 newgid=Utility::makeGidNumeric(::arg()["setgid"]);
3118 int newuid=0;
3119 if(!::arg()["setuid"].empty())
3120 newuid=Utility::makeUidNumeric(::arg()["setuid"]);
3121
3122 Utility::dropGroupPrivs(newuid, newgid);
3123
3124 if (!::arg()["chroot"].empty()) {
3125 #ifdef HAVE_SYSTEMD
3126 char *ns;
3127 ns = getenv("NOTIFY_SOCKET");
3128 if (ns != nullptr) {
3129 L<<Logger::Error<<"Unable to chroot when running from systemd. Please disable chroot= or set the 'Type' for this service to 'simple'"<<endl;
3130 exit(1);
3131 }
3132 #endif
3133 if (chroot(::arg()["chroot"].c_str())<0 || chdir("/") < 0) {
3134 L<<Logger::Error<<"Unable to chroot to '"+::arg()["chroot"]+"': "<<strerror (errno)<<", exiting"<<endl;
3135 exit(1);
3136 }
3137 else
3138 L<<Logger::Error<<"Chrooted to '"<<::arg()["chroot"]<<"'"<<endl;
3139 }
3140
3141 s_pidfname=::arg()["socket-dir"]+"/"+s_programname+".pid";
3142 if(!s_pidfname.empty())
3143 unlink(s_pidfname.c_str()); // remove possible old pid file
3144 writePid();
3145
3146 makeControlChannelSocket( ::arg().asNum("processes") > 1 ? forks : -1);
3147
3148 Utility::dropUserPrivs(newuid);
3149
3150 makeThreadPipes();
3151
3152 g_tcpTimeout=::arg().asNum("client-tcp-timeout");
3153 g_maxTCPPerClient=::arg().asNum("max-tcp-per-client");
3154 g_tcpMaxQueriesPerConn=::arg().asNum("max-tcp-queries-per-connection");
3155
3156 if (::arg().mustDo("snmp-agent")) {
3157 g_snmpAgent = std::make_shared<RecursorSNMPAgent>("recursor", ::arg()["snmp-master-socket"]);
3158 g_snmpAgent->run();
3159 }
3160
3161 const auto cpusMap = parseCPUMap();
3162 if(g_numThreads == 1) {
3163 L<<Logger::Warning<<"Operating unthreaded"<<endl;
3164 #ifdef HAVE_SYSTEMD
3165 sd_notify(0, "READY=1");
3166 #endif
3167 setCPUMap(cpusMap, 0, pthread_self());
3168 recursorThread(0);
3169 }
3170 else {
3171 pthread_t tid;
3172 L<<Logger::Warning<<"Launching "<< g_numThreads <<" threads"<<endl;
3173 for(unsigned int n=0; n < g_numThreads; ++n) {
3174 pthread_create(&tid, 0, recursorThread, (void*)(long)n);
3175
3176 setCPUMap(cpusMap, n, tid);
3177 }
3178 void* res;
3179 #ifdef HAVE_SYSTEMD
3180 sd_notify(0, "READY=1");
3181 #endif
3182 pthread_join(tid, &res);
3183 }
3184 return 0;
3185 }
3186
3187 static void* recursorThread(void* ptr)
3188 try
3189 {
3190 t_id=(int) (long) ptr;
3191 SyncRes tmp(g_now); // make sure it allocates tsstorage before we do anything, like primeHints or so..
3192 SyncRes::setDomainMap(g_initialDomainMap);
3193 t_allowFrom = g_initialAllowFrom;
3194 t_udpclientsocks = std::unique_ptr<UDPClientSocks>(new UDPClientSocks());
3195 t_tcpClientCounts = std::unique_ptr<tcpClientCounts_t>(new tcpClientCounts_t());
3196 primeHints();
3197
3198 t_packetCache = std::unique_ptr<RecursorPacketCache>(new RecursorPacketCache());
3199
3200 #ifdef HAVE_PROTOBUF
3201 t_uuidGenerator = std::unique_ptr<boost::uuids::random_generator>(new boost::uuids::random_generator());
3202 #endif
3203 L<<Logger::Warning<<"Done priming cache with root hints"<<endl;
3204
3205 try {
3206 if(!::arg()["lua-dns-script"].empty()) {
3207 t_pdl = std::make_shared<RecursorLua4>(::arg()["lua-dns-script"]);
3208 L<<Logger::Warning<<"Loaded 'lua' script from '"<<::arg()["lua-dns-script"]<<"'"<<endl;
3209 }
3210 }
3211 catch(std::exception &e) {
3212 L<<Logger::Error<<"Failed to load 'lua' script from '"<<::arg()["lua-dns-script"]<<"': "<<e.what()<<endl;
3213 _exit(99);
3214 }
3215
3216 unsigned int ringsize=::arg().asNum("stats-ringbuffer-entries") / g_numWorkerThreads;
3217 if(ringsize) {
3218 t_remotes = std::unique_ptr<addrringbuf_t>(new addrringbuf_t());
3219 if(g_weDistributeQueries) // if so, only 1 thread does recvfrom
3220 t_remotes->set_capacity(::arg().asNum("stats-ringbuffer-entries"));
3221 else
3222 t_remotes->set_capacity(ringsize);
3223 t_servfailremotes = std::unique_ptr<addrringbuf_t>(new addrringbuf_t());
3224 t_servfailremotes->set_capacity(ringsize);
3225 t_largeanswerremotes = std::unique_ptr<addrringbuf_t>(new addrringbuf_t());
3226 t_largeanswerremotes->set_capacity(ringsize);
3227
3228 t_queryring = std::unique_ptr<boost::circular_buffer<pair<DNSName, uint16_t> > >(new boost::circular_buffer<pair<DNSName, uint16_t> >());
3229 t_queryring->set_capacity(ringsize);
3230 t_servfailqueryring = std::unique_ptr<boost::circular_buffer<pair<DNSName, uint16_t> > >(new boost::circular_buffer<pair<DNSName, uint16_t> >());
3231 t_servfailqueryring->set_capacity(ringsize);
3232 }
3233
3234 MT=std::unique_ptr<MTasker<PacketID,string> >(new MTasker<PacketID,string>(::arg().asNum("stack-size")));
3235
3236 PacketID pident;
3237
3238 t_fdm=getMultiplexer();
3239 if(!t_id) {
3240 if(::arg().mustDo("webserver")) {
3241 L<<Logger::Warning << "Enabling web server" << endl;
3242 try {
3243 new RecursorWebServer(t_fdm);
3244 }
3245 catch(PDNSException &e) {
3246 L<<Logger::Error<<"Exception: "<<e.reason<<endl;
3247 exit(99);
3248 }
3249 }
3250 L<<Logger::Error<<"Enabled '"<< t_fdm->getName() << "' multiplexer"<<endl;
3251 }
3252
3253 t_fdm->addReadFD(g_pipes[t_id].readToThread, handlePipeRequest);
3254 t_fdm->addReadFD(g_pipes[t_id].readQueriesToThread, handlePipeRequest);
3255
3256 if(g_useOneSocketPerThread) {
3257 for(deferredAdd_t::const_iterator i = deferredAdds[t_id].cbegin(); i != deferredAdds[t_id].cend(); ++i) {
3258 t_fdm->addReadFD(i->first, i->second);
3259 }
3260 }
3261 else {
3262 if(!g_weDistributeQueries || !t_id) { // if we distribute queries, only t_id = 0 listens
3263 for(deferredAdd_t::const_iterator i = deferredAdds[0].cbegin(); i != deferredAdds[0].cend(); ++i) {
3264 t_fdm->addReadFD(i->first, i->second);
3265 }
3266 }
3267 }
3268
3269 registerAllStats();
3270 if(!t_id) {
3271 t_fdm->addReadFD(s_rcc.d_fd, handleRCC); // control channel
3272 }
3273
3274 unsigned int maxTcpClients=::arg().asNum("max-tcp-clients");
3275
3276 bool listenOnTCP(true);
3277
3278 time_t last_carbon=0;
3279 time_t carbonInterval=::arg().asNum("carbon-interval");
3280 counter.store(0); // used to periodically execute certain tasks
3281 for(;;) {
3282 while(MT->schedule(&g_now)); // MTasker letting the mthreads do their thing
3283
3284 if(!(counter%500)) {
3285 MT->makeThread(houseKeeping, 0);
3286 }
3287
3288 if(!(counter%55)) {
3289 typedef vector<pair<int, FDMultiplexer::funcparam_t> > expired_t;
3290 expired_t expired=t_fdm->getTimeouts(g_now);
3291
3292 for(expired_t::iterator i=expired.begin() ; i != expired.end(); ++i) {
3293 shared_ptr<TCPConnection> conn=any_cast<shared_ptr<TCPConnection> >(i->second);
3294 if(g_logCommonErrors)
3295 L<<Logger::Warning<<"Timeout from remote TCP client "<< conn->d_remote.toString() <<endl;
3296 t_fdm->removeReadFD(i->first);
3297 }
3298 }
3299
3300 counter++;
3301
3302 if(!t_id && statsWanted) {
3303 doStats();
3304 }
3305
3306 Utility::gettimeofday(&g_now, 0);
3307
3308 if(!t_id && (g_now.tv_sec - last_carbon >= carbonInterval)) {
3309 MT->makeThread(doCarbonDump, 0);
3310 last_carbon = g_now.tv_sec;
3311 }
3312
3313 t_fdm->run(&g_now);
3314 // 'run' updates g_now for us
3315
3316 if(!g_weDistributeQueries || !t_id) { // if pdns distributes queries, only tid 0 should do this
3317 if(listenOnTCP) {
3318 if(TCPConnection::getCurrentConnections() > maxTcpClients) { // shutdown, too many connections
3319 for(tcpListenSockets_t::iterator i=g_tcpListenSockets.begin(); i != g_tcpListenSockets.end(); ++i)
3320 t_fdm->removeReadFD(*i);
3321 listenOnTCP=false;
3322 }
3323 }
3324 else {
3325 if(TCPConnection::getCurrentConnections() <= maxTcpClients) { // reenable
3326 for(tcpListenSockets_t::iterator i=g_tcpListenSockets.begin(); i != g_tcpListenSockets.end(); ++i)
3327 t_fdm->addReadFD(*i, handleNewTCPQuestion);
3328 listenOnTCP=true;
3329 }
3330 }
3331 }
3332 }
3333 }
3334 catch(PDNSException &ae) {
3335 L<<Logger::Error<<"Exception: "<<ae.reason<<endl;
3336 return 0;
3337 }
3338 catch(std::exception &e) {
3339 L<<Logger::Error<<"STL Exception: "<<e.what()<<endl;
3340 return 0;
3341 }
3342 catch(...) {
3343 L<<Logger::Error<<"any other exception in main: "<<endl;
3344 return 0;
3345 }
3346
3347
3348 int main(int argc, char **argv)
3349 {
3350 g_argc = argc;
3351 g_argv = argv;
3352 g_stats.startupTime=time(0);
3353 versionSetProduct(ProductRecursor);
3354 reportBasicTypes();
3355 reportOtherTypes();
3356
3357 int ret = EXIT_SUCCESS;
3358
3359 try {
3360 ::arg().set("stack-size","stack size per mthread")="200000";
3361 ::arg().set("soa-minimum-ttl","Don't change")="0";
3362 ::arg().set("no-shuffle","Don't change")="off";
3363 ::arg().set("local-port","port to listen on")="53";
3364 ::arg().set("local-address","IP addresses to listen on, separated by spaces or commas. Also accepts ports.")="127.0.0.1";
3365 ::arg().setSwitch("non-local-bind", "Enable binding to non-local addresses by using FREEBIND / BINDANY socket options")="no";
3366 ::arg().set("trace","if we should output heaps of logging. set to 'fail' to only log failing domains")="off";
3367 ::arg().set("dnssec", "DNSSEC mode: off/process-no-validate (default)/process/log-fail/validate")="process-no-validate";
3368 ::arg().set("dnssec-log-bogus", "Log DNSSEC bogus validations")="no";
3369 ::arg().set("daemon","Operate as a daemon")="no";
3370 ::arg().setSwitch("write-pid","Write a PID file")="yes";
3371 ::arg().set("loglevel","Amount of logging. Higher is more. Do not set below 3")="6";
3372 ::arg().set("disable-syslog","Disable logging to syslog, useful when running inside a supervisor that logs stdout")="no";
3373 ::arg().set("log-timestamp","Print timestamps in log lines, useful to disable when running with a tool that timestamps stdout already")="yes";
3374 ::arg().set("log-common-errors","If we should log rather common errors")="no";
3375 ::arg().set("chroot","switch to chroot jail")="";
3376 ::arg().set("setgid","If set, change group id to this gid for more security")="";
3377 ::arg().set("setuid","If set, change user id to this uid for more security")="";
3378 ::arg().set("network-timeout", "Wait this number of milliseconds for network i/o")="1500";
3379 ::arg().set("threads", "Launch this number of threads")="2";
3380 ::arg().set("processes", "Launch this number of processes (EXPERIMENTAL, DO NOT CHANGE)")="1"; // if we un-experimental this, need to fix openssl rand seeding for multiple PIDs!
3381 ::arg().set("config-name","Name of this virtual configuration - will rename the binary image")="";
3382 ::arg().set("api-config-dir", "Directory where REST API stores config and zones") = "";
3383 ::arg().set("api-key", "Static pre-shared authentication key for access to the REST API") = "";
3384 ::arg().set("api-logfile", "Location of the server logfile (used by the REST API)") = "/var/log/pdns.log";
3385 ::arg().set("api-readonly", "Disallow data modification through the REST API when set") = "no";
3386 ::arg().setSwitch("webserver", "Start a webserver (for REST API)") = "no";
3387 ::arg().set("webserver-address", "IP Address of webserver to listen on") = "127.0.0.1";
3388 ::arg().set("webserver-port", "Port of webserver to listen on") = "8082";
3389 ::arg().set("webserver-password", "Password required for accessing the webserver") = "";
3390 ::arg().set("webserver-allow-from","Webserver access is only allowed from these subnets")="127.0.0.1,::1";
3391 ::arg().set("carbon-ourname", "If set, overrides our reported hostname for carbon stats")="";
3392 ::arg().set("carbon-server", "If set, send metrics in carbon (graphite) format to this server IP address")="";
3393 ::arg().set("carbon-interval", "Number of seconds between carbon (graphite) updates")="30";
3394 ::arg().set("statistics-interval", "Number of seconds between printing of recursor statistics, 0 to disable")="1800";
3395 ::arg().set("quiet","Suppress logging of questions and answers")="";
3396 ::arg().set("logging-facility","Facility to log messages as. 0 corresponds to local0")="";
3397 ::arg().set("config-dir","Location of configuration directory (recursor.conf)")=SYSCONFDIR;
3398 ::arg().set("socket-owner","Owner of socket")="";
3399 ::arg().set("socket-group","Group of socket")="";
3400 ::arg().set("socket-mode", "Permissions for socket")="";
3401
3402 ::arg().set("socket-dir",string("Where the controlsocket will live, ")+LOCALSTATEDIR+" when unset and not chrooted" )="";
3403 ::arg().set("delegation-only","Which domains we only accept delegations from")="";
3404 ::arg().set("query-local-address","Source IP address for sending queries")="0.0.0.0";
3405 ::arg().set("query-local-address6","Source IPv6 address for sending queries. IF UNSET, IPv6 WILL NOT BE USED FOR OUTGOING QUERIES")="";
3406 ::arg().set("client-tcp-timeout","Timeout in seconds when talking to TCP clients")="2";
3407 ::arg().set("max-mthreads", "Maximum number of simultaneous Mtasker threads")="2048";
3408 ::arg().set("max-tcp-clients","Maximum number of simultaneous TCP clients")="128";
3409 ::arg().set("server-down-max-fails","Maximum number of consecutive timeouts (and unreachables) to mark a server as down ( 0 => disabled )")="64";
3410 ::arg().set("server-down-throttle-time","Number of seconds to throttle all queries to a server after being marked as down")="60";
3411 ::arg().set("hint-file", "If set, load root hints from this file")="";
3412 ::arg().set("max-cache-entries", "If set, maximum number of entries in the main cache")="1000000";
3413 ::arg().set("max-negative-ttl", "maximum number of seconds to keep a negative cached entry in memory")="3600";
3414 ::arg().set("max-cache-ttl", "maximum number of seconds to keep a cached entry in memory")="86400";
3415 ::arg().set("packetcache-ttl", "maximum number of seconds to keep a cached entry in packetcache")="3600";
3416 ::arg().set("max-packetcache-entries", "maximum number of entries to keep in the packetcache")="500000";
3417 ::arg().set("packetcache-servfail-ttl", "maximum number of seconds to keep a cached servfail entry in packetcache")="60";
3418 ::arg().set("server-id", "Returned when queried for 'id.server' TXT or NSID, defaults to hostname")="";
3419 ::arg().set("stats-ringbuffer-entries", "maximum number of packets to store statistics for")="10000";
3420 ::arg().set("version-string", "string reported on version.pdns or version.bind")=fullVersionString();
3421 ::arg().set("allow-from", "If set, only allow these comma separated netmasks to recurse")=LOCAL_NETS;
3422 ::arg().set("allow-from-file", "If set, load allowed netmasks from this file")="";
3423 ::arg().set("entropy-source", "If set, read entropy from this file")="/dev/urandom";
3424 ::arg().set("dont-query", "If set, do not query these netmasks for DNS data")=DONT_QUERY;
3425 ::arg().set("max-tcp-per-client", "If set, maximum number of TCP sessions per client (IP address)")="0";
3426 ::arg().set("max-tcp-queries-per-connection", "If set, maximum number of TCP queries in a TCP connection")="0";
3427 ::arg().set("spoof-nearmiss-max", "If non-zero, assume spoofing after this many near misses")="20";
3428 ::arg().set("single-socket", "If set, only use a single socket for outgoing queries")="off";
3429 ::arg().set("auth-zones", "Zones for which we have authoritative data, comma separated domain=file pairs ")="";
3430 ::arg().set("lua-config-file", "More powerful configuration options")="";
3431
3432 ::arg().set("forward-zones", "Zones for which we forward queries, comma separated domain=ip pairs")="";
3433 ::arg().set("forward-zones-recurse", "Zones for which we forward queries with recursion bit, comma separated domain=ip pairs")="";
3434 ::arg().set("forward-zones-file", "File with (+)domain=ip pairs for forwarding")="";
3435 ::arg().set("export-etc-hosts", "If we should serve up contents from /etc/hosts")="off";
3436 ::arg().set("export-etc-hosts-search-suffix", "Also serve up the contents of /etc/hosts with this suffix")="";
3437 ::arg().set("etc-hosts-file", "Path to 'hosts' file")="/etc/hosts";
3438 ::arg().set("serve-rfc1918", "If we should be authoritative for RFC 1918 private IP space")="yes";
3439 ::arg().set("lua-dns-script", "Filename containing an optional 'lua' script that will be used to modify dns answers")="";
3440 ::arg().set("latency-statistic-size","Number of latency values to calculate the qa-latency average")="10000";
3441 ::arg().setSwitch( "disable-packetcache", "Disable packetcache" )= "no";
3442 ::arg().set("ecs-ipv4-bits", "Number of bits of IPv4 address to pass for EDNS Client Subnet")="24";
3443 ::arg().set("ecs-ipv6-bits", "Number of bits of IPv6 address to pass for EDNS Client Subnet")="56";
3444 ::arg().set("edns-subnet-whitelist", "List of netmasks and domains that we should enable EDNS subnet for")="";
3445 ::arg().set("ecs-scope-zero-address", "Address to send to whitelisted authoritative servers for incoming queries with ECS prefix-length source of 0")="";
3446 ::arg().setSwitch( "use-incoming-edns-subnet", "Pass along received EDNS Client Subnet information")="no";
3447 ::arg().setSwitch( "pdns-distributes-queries", "If PowerDNS itself should distribute queries over threads")="yes";
3448 ::arg().setSwitch( "root-nx-trust", "If set, believe that an NXDOMAIN from the root means the TLD does not exist")="yes";
3449 ::arg().setSwitch( "any-to-tcp","Answer ANY queries with tc=1, shunting to TCP" )="no";
3450 ::arg().setSwitch( "lowercase-outgoing","Force outgoing questions to lowercase")="no";
3451 ::arg().setSwitch("gettag-needs-edns-options", "If EDNS Options should be extracted before calling the gettag() hook")="no";
3452 ::arg().set("udp-truncation-threshold", "Maximum UDP response size before we truncate")="1680";
3453 ::arg().set("edns-outgoing-bufsize", "Outgoing EDNS buffer size")="1680";
3454 ::arg().set("minimum-ttl-override", "Set under adverse conditions, a minimum TTL")="0";
3455 ::arg().set("max-qperq", "Maximum outgoing queries per query")="50";
3456 ::arg().set("max-total-msec", "Maximum total wall-clock time per query in milliseconds, 0 for unlimited")="7000";
3457 ::arg().set("max-recursion-depth", "Maximum number of internal recursion calls per query, 0 for unlimited")="40";
3458
3459 ::arg().set("include-dir","Include *.conf files from this directory")="";
3460 ::arg().set("security-poll-suffix","Domain name from which to query security update notifications")="secpoll.powerdns.com.";
3461
3462 ::arg().setSwitch("reuseport","Enable SO_REUSEPORT allowing multiple recursors processes to listen to 1 address")="no";
3463
3464 ::arg().setSwitch("snmp-agent", "If set, register as an SNMP agent")="no";
3465 ::arg().set("snmp-master-socket", "If set and snmp-agent is set, the socket to use to register to the SNMP master")="";
3466
3467 ::arg().set("tcp-fast-open", "Enable TCP Fast Open support on the listening sockets, using the supplied numerical value as the queue size")="0";
3468 ::arg().set("nsec3-max-iterations", "Maximum number of iterations allowed for an NSEC3 record")="2500";
3469
3470 ::arg().set("cpu-map", "Thread to CPU mapping, space separated thread-id=cpu1,cpu2..cpuN pairs")="";
3471
3472 ::arg().setSwitch("log-rpz-changes", "Log additions and removals to RPZ zones at Info level")="no";
3473
3474 ::arg().setCmd("help","Provide a helpful message");
3475 ::arg().setCmd("version","Print version string");
3476 ::arg().setCmd("config","Output blank configuration");
3477 L.toConsole(Logger::Info);
3478 ::arg().laxParse(argc,argv); // do a lax parse
3479
3480 string configname=::arg()["config-dir"]+"/recursor.conf";
3481 if(::arg()["config-name"]!="") {
3482 configname=::arg()["config-dir"]+"/recursor-"+::arg()["config-name"]+".conf";
3483 s_programname+="-"+::arg()["config-name"];
3484 }
3485 cleanSlashes(configname);
3486
3487 if(::arg().mustDo("config")) {
3488 cout<<::arg().configstring()<<endl;
3489 exit(0);
3490 }
3491
3492 if(!::arg().file(configname.c_str()))
3493 L<<Logger::Warning<<"Unable to parse configuration file '"<<configname<<"'"<<endl;
3494
3495 ::arg().parse(argc,argv);
3496
3497 if( !::arg()["chroot"].empty() && !::arg()["api-config-dir"].empty() && !::arg().mustDo("api-readonly") ) {
3498 L<<Logger::Error<<"Using chroot and a writable API is not possible"<<endl;
3499 exit(EXIT_FAILURE);
3500 }
3501
3502 if (::arg()["socket-dir"].empty()) {
3503 if (::arg()["chroot"].empty())
3504 ::arg().set("socket-dir") = LOCALSTATEDIR;
3505 else
3506 ::arg().set("socket-dir") = "/";
3507 }
3508
3509 ::arg().set("delegation-only")=toLower(::arg()["delegation-only"]);
3510
3511 if(::arg().asNum("threads")==1)
3512 ::arg().set("pdns-distributes-queries")="no";
3513
3514 if(::arg().mustDo("help")) {
3515 cout<<"syntax:"<<endl<<endl;
3516 cout<<::arg().helpstring(::arg()["help"])<<endl;
3517 exit(0);
3518 }
3519 if(::arg().mustDo("version")) {
3520 showProductVersion();
3521 showBuildConfiguration();
3522 exit(0);
3523 }
3524
3525 Logger::Urgency logUrgency = (Logger::Urgency)::arg().asNum("loglevel");
3526
3527 if (logUrgency < Logger::Error)
3528 logUrgency = Logger::Error;
3529 if(!g_quiet && logUrgency < Logger::Info) { // Logger::Info=6, Logger::Debug=7
3530 logUrgency = Logger::Info; // if you do --quiet=no, you need Info to also see the query log
3531 }
3532 L.setLoglevel(logUrgency);
3533 L.toConsole(logUrgency);
3534
3535 serviceMain(argc, argv);
3536 }
3537 catch(PDNSException &ae) {
3538 L<<Logger::Error<<"Exception: "<<ae.reason<<endl;
3539 ret=EXIT_FAILURE;
3540 }
3541 catch(std::exception &e) {
3542 L<<Logger::Error<<"STL Exception: "<<e.what()<<endl;
3543 ret=EXIT_FAILURE;
3544 }
3545 catch(...) {
3546 L<<Logger::Error<<"any other exception in main: "<<endl;
3547 ret=EXIT_FAILURE;
3548 }
3549
3550 return ret;
3551 }