]> git.ipfire.org Git - thirdparty/pdns.git/blob - pdns/pdns_recursor.cc
rec: Use constants for the handler and distributor thread ids
[thirdparty/pdns.git] / pdns / pdns_recursor.cc
1 /*
2 * This file is part of PowerDNS or dnsdist.
3 * Copyright -- PowerDNS.COM B.V. and its contributors
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of version 2 of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * In addition, for the avoidance of any doubt, permission is granted to
10 * link this program with OpenSSL and to (re)distribute the binaries
11 * produced as the result of such linking.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21 */
22 #ifdef HAVE_CONFIG_H
23 #include "config.h"
24 #endif
25
26 #include <netdb.h>
27 #include <sys/stat.h>
28 #include <unistd.h>
29
30 #include "ws-recursor.hh"
31 #include <pthread.h>
32 #include "recpacketcache.hh"
33 #include "utility.hh"
34 #include "dns_random.hh"
35 #ifdef HAVE_LIBSODIUM
36 #include <sodium.h>
37 #endif
38 #include "opensslsigners.hh"
39 #include <iostream>
40 #include <errno.h>
41 #include <boost/static_assert.hpp>
42 #include <map>
43 #include <set>
44 #include "recursor_cache.hh"
45 #include "cachecleaner.hh"
46 #include <stdio.h>
47 #include <signal.h>
48 #include <stdlib.h>
49 #include "misc.hh"
50 #include "mtasker.hh"
51 #include <utility>
52 #include "arguments.hh"
53 #include "syncres.hh"
54 #include <fcntl.h>
55 #include <fstream>
56 #include "sortlist.hh"
57 #include "sstuff.hh"
58 #include <boost/tuple/tuple.hpp>
59 #include <boost/tuple/tuple_comparison.hpp>
60 #include <boost/shared_array.hpp>
61 #include <boost/function.hpp>
62 #include <boost/algorithm/string.hpp>
63 #ifdef MALLOC_TRACE
64 #include "malloctrace.hh"
65 #endif
66 #include <netinet/tcp.h>
67 #include "dnsparser.hh"
68 #include "dnswriter.hh"
69 #include "dnsrecords.hh"
70 #include "zoneparser-tng.hh"
71 #include "rec_channel.hh"
72 #include "logger.hh"
73 #include "iputils.hh"
74 #include "mplexer.hh"
75 #include "config.h"
76 #include "lua-recursor4.hh"
77 #include "version.hh"
78 #include "responsestats.hh"
79 #include "secpoll-recursor.hh"
80 #include "dnsname.hh"
81 #include "filterpo.hh"
82 #include "rpzloader.hh"
83 #include "validate-recursor.hh"
84 #include "rec-lua-conf.hh"
85 #include "ednsoptions.hh"
86 #include "gettime.hh"
87
88 #include "rec-protobuf.hh"
89 #include "rec-snmp.hh"
90
91 #ifdef HAVE_SYSTEMD
92 #include <systemd/sd-daemon.h>
93 #endif
94
95 #include "namespaces.hh"
96
97 typedef map<ComboAddress, uint32_t, ComboAddress::addressOnlyLessThan> tcpClientCounts_t;
98
99 static thread_local std::shared_ptr<RecursorLua4> t_pdl;
100 static thread_local int t_id;
101 static thread_local std::shared_ptr<Regex> t_traceRegex;
102 static thread_local std::unique_ptr<tcpClientCounts_t> t_tcpClientCounts;
103
104 thread_local std::unique_ptr<MT_t> MT; // the big MTasker
105 thread_local std::unique_ptr<MemRecursorCache> t_RC;
106 thread_local std::unique_ptr<RecursorPacketCache> t_packetCache;
107 thread_local FDMultiplexer* t_fdm{nullptr};
108 thread_local std::unique_ptr<addrringbuf_t> t_remotes, t_servfailremotes, t_largeanswerremotes;
109 thread_local std::unique_ptr<boost::circular_buffer<pair<DNSName, uint16_t> > > t_queryring, t_servfailqueryring;
110 thread_local std::shared_ptr<NetmaskGroup> t_allowFrom;
111 #ifdef HAVE_PROTOBUF
112 thread_local std::unique_ptr<boost::uuids::random_generator> t_uuidGenerator;
113 #endif
114 __thread struct timeval g_now; // timestamp, updated (too) frequently
115
116 // for communicating with our threads
117 struct ThreadPipeSet
118 {
119 int writeToThread;
120 int readToThread;
121 int writeFromThread;
122 int readFromThread;
123 };
124
125 static const int s_handlerThreadID = -1;
126 static const int s_distributorThreadID = 0;
127
128 typedef vector<int> tcpListenSockets_t;
129 typedef map<int, ComboAddress> listenSocketsAddresses_t; // is shared across all threads right now
130 typedef vector<pair<int, function< void(int, any&) > > > deferredAdd_t;
131
132 static const ComboAddress g_local4("0.0.0.0"), g_local6("::");
133 static vector<ThreadPipeSet> g_pipes; // effectively readonly after startup
134 static tcpListenSockets_t g_tcpListenSockets; // shared across threads, but this is fine, never written to from a thread. All threads listen on all sockets
135 static listenSocketsAddresses_t g_listenSocketsAddresses; // is shared across all threads right now
136 static std::unordered_map<unsigned int, deferredAdd_t> deferredAdds;
137 static set<int> g_fromtosockets; // listen sockets that use 'sendfromto()' mechanism
138 static vector<ComboAddress> g_localQueryAddresses4, g_localQueryAddresses6;
139 static AtomicCounter counter;
140 static std::shared_ptr<SyncRes::domainmap_t> g_initialDomainMap; // new threads needs this to be setup
141 static std::shared_ptr<NetmaskGroup> g_initialAllowFrom; // new thread needs to be setup with this
142 static size_t g_tcpMaxQueriesPerConn;
143 static uint64_t g_latencyStatSize;
144 static uint32_t g_disthashseed;
145 static unsigned int g_maxTCPPerClient;
146 static unsigned int g_networkTimeoutMsec;
147 static unsigned int g_maxMThreads;
148 static unsigned int g_numWorkerThreads;
149 static int g_tcpTimeout;
150 static uint16_t g_udpTruncationThreshold;
151 static std::atomic<bool> statsWanted;
152 static std::atomic<bool> g_quiet;
153 static bool g_logCommonErrors;
154 static bool g_anyToTcp;
155 static bool g_weDistributeQueries; // if true, only 1 thread listens on the incoming query sockets
156 static bool g_reusePort{false};
157 static bool g_useOneSocketPerThread;
158 static bool g_gettagNeedsEDNSOptions{false};
159 static time_t g_statisticsInterval;
160 static bool g_useIncomingECS;
161 std::atomic<uint32_t> g_maxCacheEntries, g_maxPacketCacheEntries;
162
163 RecursorControlChannel s_rcc; // only active in thread 0
164 RecursorStats g_stats;
165 string s_programname="pdns_recursor";
166 string s_pidfname;
167 bool g_lowercaseOutgoing;
168 unsigned int g_numThreads;
169 uint16_t g_outgoingEDNSBufsize;
170 bool g_logRPZChanges{false};
171
172 #define LOCAL_NETS "127.0.0.0/8, 10.0.0.0/8, 100.64.0.0/10, 169.254.0.0/16, 192.168.0.0/16, 172.16.0.0/12, ::1/128, fc00::/7, fe80::/10"
173 // Bad Nets taken from both:
174 // http://www.iana.org/assignments/iana-ipv4-special-registry/iana-ipv4-special-registry.xhtml
175 // and
176 // http://www.iana.org/assignments/iana-ipv6-special-registry/iana-ipv6-special-registry.xhtml
177 // where such a network may not be considered a valid destination
178 #define BAD_NETS "0.0.0.0/8, 192.0.0.0/24, 192.0.2.0/24, 198.51.100.0/24, 203.0.113.0/24, 240.0.0.0/4, ::/96, ::ffff:0:0/96, 100::/64, 2001:db8::/32"
179 #define DONT_QUERY LOCAL_NETS ", " BAD_NETS
180
181 //! used to send information to a newborn mthread
182 struct DNSComboWriter {
183 DNSComboWriter(const char* data, uint16_t len, const struct timeval& now) : d_mdp(true, data, len), d_now(now),
184 d_tcp(false), d_socket(-1)
185 {}
186 MOADNSParser d_mdp;
187 void setRemote(const ComboAddress* sa)
188 {
189 d_remote=*sa;
190 }
191
192 void setLocal(const ComboAddress& sa)
193 {
194 d_local=sa;
195 }
196
197
198 void setSocket(int sock)
199 {
200 d_socket=sock;
201 }
202
203 string getRemote() const
204 {
205 return d_remote.toString();
206 }
207
208 struct timeval d_now;
209 ComboAddress d_remote, d_local;
210 #ifdef HAVE_PROTOBUF
211 boost::uuids::uuid d_uuid;
212 string d_requestorId;
213 string d_deviceId;
214 #endif
215 EDNSSubnetOpts d_ednssubnet;
216 bool d_ecsFound{false};
217 bool d_ecsParsed{false};
218 bool d_tcp;
219 int d_socket;
220 unsigned int d_tag{0};
221 uint32_t d_qhash{0};
222 string d_query;
223 shared_ptr<TCPConnection> d_tcpConnection;
224 vector<pair<uint16_t, string> > d_ednsOpts;
225 std::vector<std::string> d_policyTags;
226 LuaContext::LuaObject d_data;
227 uint32_t d_ttlCap{std::numeric_limits<uint32_t>::max()};
228 bool d_variable{false};
229 };
230
231 MT_t* getMT()
232 {
233 return MT ? MT.get() : nullptr;
234 }
235
236 ArgvMap &arg()
237 {
238 static ArgvMap theArg;
239 return theArg;
240 }
241
242 unsigned int getRecursorThreadId()
243 {
244 return static_cast<unsigned int>(t_id);
245 }
246
247 int getMTaskerTID()
248 {
249 return MT->getTid();
250 }
251
252 static void handleTCPClientWritable(int fd, FDMultiplexer::funcparam_t& var);
253
254 // -1 is error, 0 is timeout, 1 is success
255 int asendtcp(const string& data, Socket* sock)
256 {
257 PacketID pident;
258 pident.sock=sock;
259 pident.outMSG=data;
260
261 t_fdm->addWriteFD(sock->getHandle(), handleTCPClientWritable, pident);
262 string packet;
263
264 int ret=MT->waitEvent(pident, &packet, g_networkTimeoutMsec);
265
266 if(!ret || ret==-1) { // timeout
267 t_fdm->removeWriteFD(sock->getHandle());
268 }
269 else if(packet.size() !=data.size()) { // main loop tells us what it sent out, or empty in case of an error
270 return -1;
271 }
272 return ret;
273 }
274
275 static void handleTCPClientReadable(int fd, FDMultiplexer::funcparam_t& var);
276
277 // -1 is error, 0 is timeout, 1 is success
278 int arecvtcp(string& data, size_t len, Socket* sock, bool incompleteOkay)
279 {
280 data.clear();
281 PacketID pident;
282 pident.sock=sock;
283 pident.inNeeded=len;
284 pident.inIncompleteOkay=incompleteOkay;
285 t_fdm->addReadFD(sock->getHandle(), handleTCPClientReadable, pident);
286
287 int ret=MT->waitEvent(pident,&data, g_networkTimeoutMsec);
288 if(!ret || ret==-1) { // timeout
289 t_fdm->removeReadFD(sock->getHandle());
290 }
291 else if(data.empty()) {// error, EOF or other
292 return -1;
293 }
294
295 return ret;
296 }
297
298 static void handleGenUDPQueryResponse(int fd, FDMultiplexer::funcparam_t& var)
299 {
300 PacketID pident=*any_cast<PacketID>(&var);
301 char resp[512];
302 ssize_t ret=recv(fd, resp, sizeof(resp), 0);
303 t_fdm->removeReadFD(fd);
304 if(ret >= 0) {
305 string data(resp, (size_t) ret);
306 MT->sendEvent(pident, &data);
307 }
308 else {
309 string empty;
310 MT->sendEvent(pident, &empty);
311 // cerr<<"Had some kind of error: "<<ret<<", "<<strerror(errno)<<endl;
312 }
313 }
314 string GenUDPQueryResponse(const ComboAddress& dest, const string& query)
315 {
316 Socket s(dest.sin4.sin_family, SOCK_DGRAM);
317 s.setNonBlocking();
318 ComboAddress local = getQueryLocalAddress(dest.sin4.sin_family, 0);
319
320 s.bind(local);
321 s.connect(dest);
322 s.send(query);
323
324 PacketID pident;
325 pident.sock=&s;
326 pident.type=0;
327 t_fdm->addReadFD(s.getHandle(), handleGenUDPQueryResponse, pident);
328
329 string data;
330
331 int ret=MT->waitEvent(pident,&data, g_networkTimeoutMsec);
332
333 if(!ret || ret==-1) { // timeout
334 t_fdm->removeReadFD(s.getHandle());
335 }
336 else if(data.empty()) {// error, EOF or other
337 // we could special case this
338 return data;
339 }
340 return data;
341 }
342
343 //! pick a random query local address
344 ComboAddress getQueryLocalAddress(int family, uint16_t port)
345 {
346 ComboAddress ret;
347 if(family==AF_INET) {
348 if(g_localQueryAddresses4.empty())
349 ret = g_local4;
350 else
351 ret = g_localQueryAddresses4[dns_random(g_localQueryAddresses4.size())];
352 ret.sin4.sin_port = htons(port);
353 }
354 else {
355 if(g_localQueryAddresses6.empty())
356 ret = g_local6;
357 else
358 ret = g_localQueryAddresses6[dns_random(g_localQueryAddresses6.size())];
359
360 ret.sin6.sin6_port = htons(port);
361 }
362 return ret;
363 }
364
365 static void handleUDPServerResponse(int fd, FDMultiplexer::funcparam_t&);
366
367 static void setSocketBuffer(int fd, int optname, uint32_t size)
368 {
369 uint32_t psize=0;
370 socklen_t len=sizeof(psize);
371
372 if(!getsockopt(fd, SOL_SOCKET, optname, (char*)&psize, &len) && psize > size) {
373 L<<Logger::Error<<"Not decreasing socket buffer size from "<<psize<<" to "<<size<<endl;
374 return;
375 }
376
377 if (setsockopt(fd, SOL_SOCKET, optname, (char*)&size, sizeof(size)) < 0 )
378 L<<Logger::Error<<"Unable to raise socket buffer size to "<<size<<": "<<strerror(errno)<<endl;
379 }
380
381
382 static void setSocketReceiveBuffer(int fd, uint32_t size)
383 {
384 setSocketBuffer(fd, SO_RCVBUF, size);
385 }
386
387 static void setSocketSendBuffer(int fd, uint32_t size)
388 {
389 setSocketBuffer(fd, SO_SNDBUF, size);
390 }
391
392
393 // you can ask this class for a UDP socket to send a query from
394 // this socket is not yours, don't even think about deleting it
395 // but after you call 'returnSocket' on it, don't assume anything anymore
396 class UDPClientSocks
397 {
398 unsigned int d_numsocks;
399 public:
400 UDPClientSocks() : d_numsocks(0)
401 {
402 }
403
404 typedef set<int> socks_t;
405 socks_t d_socks;
406
407 // returning -2 means: temporary OS error (ie, out of files), -1 means error related to remote
408 int getSocket(const ComboAddress& toaddr, int* fd)
409 {
410 *fd=makeClientSocket(toaddr.sin4.sin_family);
411 if(*fd < 0) // temporary error - receive exception otherwise
412 return -2;
413
414 if(connect(*fd, (struct sockaddr*)(&toaddr), toaddr.getSocklen()) < 0) {
415 int err = errno;
416 // returnSocket(*fd);
417 try {
418 closesocket(*fd);
419 }
420 catch(const PDNSException& e) {
421 L<<Logger::Error<<"Error closing UDP socket after connect() failed: "<<e.reason<<endl;
422 }
423
424 if(err==ENETUNREACH) // Seth "My Interfaces Are Like A Yo Yo" Arnold special
425 return -2;
426 return -1;
427 }
428
429 d_socks.insert(*fd);
430 d_numsocks++;
431 return 0;
432 }
433
434 void returnSocket(int fd)
435 {
436 socks_t::iterator i=d_socks.find(fd);
437 if(i==d_socks.end()) {
438 throw PDNSException("Trying to return a socket (fd="+std::to_string(fd)+") not in the pool");
439 }
440 returnSocketLocked(i);
441 }
442
443 // return a socket to the pool, or simply erase it
444 void returnSocketLocked(socks_t::iterator& i)
445 {
446 if(i==d_socks.end()) {
447 throw PDNSException("Trying to return a socket not in the pool");
448 }
449 try {
450 t_fdm->removeReadFD(*i);
451 }
452 catch(FDMultiplexerException& e) {
453 // we sometimes return a socket that has not yet been assigned to t_fdm
454 }
455 try {
456 closesocket(*i);
457 }
458 catch(const PDNSException& e) {
459 L<<Logger::Error<<"Error closing returned UDP socket: "<<e.reason<<endl;
460 }
461
462 d_socks.erase(i++);
463 --d_numsocks;
464 }
465
466 // returns -1 for errors which might go away, throws for ones that won't
467 static int makeClientSocket(int family)
468 {
469 int ret=socket(family, SOCK_DGRAM, 0 ); // turns out that setting CLO_EXEC and NONBLOCK from here is not a performance win on Linux (oddly enough)
470
471 if(ret < 0 && errno==EMFILE) // this is not a catastrophic error
472 return ret;
473
474 if(ret<0)
475 throw PDNSException("Making a socket for resolver (family = "+std::to_string(family)+"): "+stringerror());
476
477 // setCloseOnExec(ret); // we're not going to exec
478
479 int tries=10;
480 ComboAddress sin;
481 while(--tries) {
482 uint16_t port;
483
484 if(tries==1) // fall back to kernel 'random'
485 port = 0;
486 else
487 port = 1025 + dns_random(64510);
488
489 sin=getQueryLocalAddress(family, port); // does htons for us
490
491 if (::bind(ret, (struct sockaddr *)&sin, sin.getSocklen()) >= 0)
492 break;
493 }
494 if(!tries)
495 throw PDNSException("Resolver binding to local query client socket on "+sin.toString()+": "+stringerror());
496
497 setNonBlocking(ret);
498 return ret;
499 }
500 };
501
502 static thread_local std::unique_ptr<UDPClientSocks> t_udpclientsocks;
503
504 /* these two functions are used by LWRes */
505 // -2 is OS error, -1 is error that depends on the remote, > 0 is success
506 int asendto(const char *data, size_t len, int flags,
507 const ComboAddress& toaddr, uint16_t id, const DNSName& domain, uint16_t qtype, int* fd)
508 {
509
510 PacketID pident;
511 pident.domain = domain;
512 pident.remote = toaddr;
513 pident.type = qtype;
514
515 // see if there is an existing outstanding request we can chain on to, using partial equivalence function
516 pair<MT_t::waiters_t::iterator, MT_t::waiters_t::iterator> chain=MT->d_waiters.equal_range(pident, PacketIDBirthdayCompare());
517
518 for(; chain.first != chain.second; chain.first++) {
519 if(chain.first->key.fd > -1) { // don't chain onto existing chained waiter!
520 /*
521 cerr<<"Orig: "<<pident.domain<<", "<<pident.remote.toString()<<", id="<<id<<endl;
522 cerr<<"Had hit: "<< chain.first->key.domain<<", "<<chain.first->key.remote.toString()<<", id="<<chain.first->key.id
523 <<", count="<<chain.first->key.chain.size()<<", origfd: "<<chain.first->key.fd<<endl;
524 */
525 chain.first->key.chain.insert(id); // we can chain
526 *fd=-1; // gets used in waitEvent / sendEvent later on
527 return 1;
528 }
529 }
530
531 int ret=t_udpclientsocks->getSocket(toaddr, fd);
532 if(ret < 0)
533 return ret;
534
535 pident.fd=*fd;
536 pident.id=id;
537
538 t_fdm->addReadFD(*fd, handleUDPServerResponse, pident);
539 ret = send(*fd, data, len, 0);
540
541 int tmp = errno;
542
543 if(ret < 0)
544 t_udpclientsocks->returnSocket(*fd);
545
546 errno = tmp; // this is for logging purposes only
547 return ret;
548 }
549
550 // -1 is error, 0 is timeout, 1 is success
551 int arecvfrom(char *data, size_t len, int flags, const ComboAddress& fromaddr, size_t *d_len,
552 uint16_t id, const DNSName& domain, uint16_t qtype, int fd, struct timeval* now)
553 {
554 static optional<unsigned int> nearMissLimit;
555 if(!nearMissLimit)
556 nearMissLimit=::arg().asNum("spoof-nearmiss-max");
557
558 PacketID pident;
559 pident.fd=fd;
560 pident.id=id;
561 pident.domain=domain;
562 pident.type = qtype;
563 pident.remote=fromaddr;
564
565 string packet;
566 int ret=MT->waitEvent(pident, &packet, g_networkTimeoutMsec, now);
567
568 if(ret > 0) {
569 if(packet.empty()) // means "error"
570 return -1;
571
572 *d_len=packet.size();
573 memcpy(data,packet.c_str(),min(len,*d_len));
574 if(*nearMissLimit && pident.nearMisses > *nearMissLimit) {
575 L<<Logger::Error<<"Too many ("<<pident.nearMisses<<" > "<<*nearMissLimit<<") bogus answers for '"<<domain<<"' from "<<fromaddr.toString()<<", assuming spoof attempt."<<endl;
576 g_stats.spoofCount++;
577 return -1;
578 }
579 }
580 else {
581 if(fd >= 0)
582 t_udpclientsocks->returnSocket(fd);
583 }
584 return ret;
585 }
586
587 static void writePid(void)
588 {
589 if(!::arg().mustDo("write-pid"))
590 return;
591 ofstream of(s_pidfname.c_str(), std::ios_base::app);
592 if(of)
593 of<< Utility::getpid() <<endl;
594 else
595 L<<Logger::Error<<"Writing pid for "<<Utility::getpid()<<" to "<<s_pidfname<<" failed: "<<strerror(errno)<<endl;
596 }
597
598 TCPConnection::TCPConnection(int fd, const ComboAddress& addr) : d_remote(addr), d_fd(fd)
599 {
600 ++s_currentConnections;
601 (*t_tcpClientCounts)[d_remote]++;
602 }
603
604 TCPConnection::~TCPConnection()
605 {
606 try {
607 if(closesocket(d_fd) < 0)
608 L<<Logger::Error<<"Error closing socket for TCPConnection"<<endl;
609 }
610 catch(const PDNSException& e) {
611 L<<Logger::Error<<"Error closing TCPConnection socket: "<<e.reason<<endl;
612 }
613
614 if(t_tcpClientCounts->count(d_remote) && !(*t_tcpClientCounts)[d_remote]--)
615 t_tcpClientCounts->erase(d_remote);
616 --s_currentConnections;
617 }
618
619 AtomicCounter TCPConnection::s_currentConnections;
620
621 static void handleRunningTCPQuestion(int fd, FDMultiplexer::funcparam_t& var);
622
623 // the idea is, only do things that depend on the *response* here. Incoming accounting is on incoming.
624 static void updateResponseStats(int res, const ComboAddress& remote, unsigned int packetsize, const DNSName* query, uint16_t qtype)
625 {
626 if(packetsize > 1000 && t_largeanswerremotes)
627 t_largeanswerremotes->push_back(remote);
628 switch(res) {
629 case RCode::ServFail:
630 if(t_servfailremotes) {
631 t_servfailremotes->push_back(remote);
632 if(query && t_servfailqueryring) // packet cache
633 t_servfailqueryring->push_back(make_pair(*query, qtype));
634 }
635 g_stats.servFails++;
636 break;
637 case RCode::NXDomain:
638 g_stats.nxDomains++;
639 break;
640 case RCode::NoError:
641 g_stats.noErrors++;
642 break;
643 }
644 }
645
646 static string makeLoginfo(DNSComboWriter* dc)
647 try
648 {
649 return "("+dc->d_mdp.d_qname.toLogString()+"/"+DNSRecordContent::NumberToType(dc->d_mdp.d_qtype)+" from "+(dc->d_remote.toString())+")";
650 }
651 catch(...)
652 {
653 return "Exception making error message for exception";
654 }
655
656 #ifdef HAVE_PROTOBUF
657 static void protobufLogQuery(const std::shared_ptr<RemoteLogger>& logger, uint8_t maskV4, uint8_t maskV6, const boost::uuids::uuid& uniqueId, const ComboAddress& remote, const ComboAddress& local, const Netmask& ednssubnet, bool tcp, uint16_t id, size_t len, const DNSName& qname, uint16_t qtype, uint16_t qclass, const std::vector<std::string>& policyTags, const std::string& requestorId, const std::string& deviceId)
658 {
659 Netmask requestorNM(remote, remote.sin4.sin_family == AF_INET ? maskV4 : maskV6);
660 const ComboAddress& requestor = requestorNM.getMaskedNetwork();
661 RecProtoBufMessage message(DNSProtoBufMessage::Query, uniqueId, &requestor, &local, qname, qtype, qclass, id, tcp, len);
662 message.setEDNSSubnet(ednssubnet, ednssubnet.isIpv4() ? maskV4 : maskV6);
663 message.setRequestorId(requestorId);
664 message.setDeviceId(deviceId);
665
666 if (!policyTags.empty()) {
667 message.setPolicyTags(policyTags);
668 }
669
670 // cerr <<message.toDebugString()<<endl;
671 std::string str;
672 message.serialize(str);
673 logger->queueData(str);
674 }
675
676 static void protobufLogResponse(const std::shared_ptr<RemoteLogger>& logger, const RecProtoBufMessage& message)
677 {
678 // cerr <<message.toDebugString()<<endl;
679 std::string str;
680 message.serialize(str);
681 logger->queueData(str);
682 }
683 #endif
684
685 /**
686 * Chases the CNAME provided by the PolicyCustom RPZ policy.
687 *
688 * @param spoofed: The DNSRecord that was created by the policy, should already be added to ret
689 * @param qtype: The QType of the original query
690 * @param sr: A SyncRes
691 * @param res: An integer that will contain the RCODE of the lookup we do
692 * @param ret: A vector of DNSRecords where the result of the CNAME chase should be appended to
693 */
694 static void handleRPZCustom(const DNSRecord& spoofed, const QType& qtype, SyncRes& sr, int& res, vector<DNSRecord>& ret)
695 {
696 if (spoofed.d_type == QType::CNAME) {
697 bool oldWantsRPZ = sr.getWantsRPZ();
698 sr.setWantsRPZ(false);
699 vector<DNSRecord> ans;
700 res = sr.beginResolve(DNSName(spoofed.d_content->getZoneRepresentation()), qtype, 1, ans);
701 for (const auto& rec : ans) {
702 if(rec.d_place == DNSResourceRecord::ANSWER) {
703 ret.push_back(rec);
704 }
705 }
706 // Reset the RPZ state of the SyncRes
707 sr.setWantsRPZ(oldWantsRPZ);
708 }
709 }
710
711 static bool addRecordToPacket(DNSPacketWriter& pw, const DNSRecord& rec, uint32_t& minTTL, uint32_t ttlCap, const uint16_t maxAnswerSize)
712 {
713 pw.startRecord(rec.d_name, rec.d_type, (rec.d_ttl > ttlCap ? ttlCap : rec.d_ttl), rec.d_class, rec.d_place);
714
715 if(rec.d_type != QType::OPT) // their TTL ain't real
716 minTTL = min(minTTL, rec.d_ttl);
717
718 rec.d_content->toPacket(pw);
719 if(pw.size() > static_cast<size_t>(maxAnswerSize)) {
720 pw.rollback();
721 if(rec.d_place != DNSResourceRecord::ADDITIONAL) {
722 pw.getHeader()->tc=1;
723 pw.truncate();
724 }
725 return false;
726 }
727
728 return true;
729 }
730
731 static void startDoResolve(void *p)
732 {
733 DNSComboWriter* dc=(DNSComboWriter *)p;
734 try {
735 if (t_queryring)
736 t_queryring->push_back(make_pair(dc->d_mdp.d_qname, dc->d_mdp.d_qtype));
737
738 uint16_t maxanswersize = dc->d_tcp ? 65535 : min(static_cast<uint16_t>(512), g_udpTruncationThreshold);
739 EDNSOpts edo;
740 bool haveEDNS=false;
741 if(getEDNSOpts(dc->d_mdp, &edo)) {
742 if(!dc->d_tcp) {
743 /* rfc6891 6.2.3:
744 "Values lower than 512 MUST be treated as equal to 512."
745 */
746 maxanswersize = min(static_cast<uint16_t>(edo.d_packetsize >= 512 ? edo.d_packetsize : 512), g_udpTruncationThreshold);
747 }
748 dc->d_ednsOpts = edo.d_options;
749 haveEDNS=true;
750
751 if (g_useIncomingECS && !dc->d_ecsParsed) {
752 for (const auto& o : edo.d_options) {
753 if (o.first == EDNSOptionCode::ECS) {
754 dc->d_ecsFound = getEDNSSubnetOptsFromString(o.second, &dc->d_ednssubnet);
755 break;
756 }
757 }
758 }
759 }
760 /* perhaps there was no EDNS or no ECS but by now we looked */
761 dc->d_ecsParsed = true;
762 vector<DNSRecord> ret;
763 vector<uint8_t> packet;
764
765 auto luaconfsLocal = g_luaconfs.getLocal();
766 // Used to tell syncres later on if we should apply NSDNAME and NSIP RPZ triggers for this query
767 bool wantsRPZ(true);
768 RecProtoBufMessage pbMessage(RecProtoBufMessage::Response);
769 #ifdef HAVE_PROTOBUF
770 if (luaconfsLocal->protobufServer) {
771 Netmask requestorNM(dc->d_remote, dc->d_remote.sin4.sin_family == AF_INET ? luaconfsLocal->protobufMaskV4 : luaconfsLocal->protobufMaskV6);
772 const ComboAddress& requestor = requestorNM.getMaskedNetwork();
773 pbMessage.update(dc->d_uuid, &requestor, &dc->d_local, dc->d_tcp, dc->d_mdp.d_header.id);
774 pbMessage.setEDNSSubnet(dc->d_ednssubnet.source, dc->d_ednssubnet.source.isIpv4() ? luaconfsLocal->protobufMaskV4 : luaconfsLocal->protobufMaskV6);
775 pbMessage.setQuestion(dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_mdp.d_qclass);
776 }
777 #endif /* HAVE_PROTOBUF */
778
779 DNSPacketWriter pw(packet, dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_mdp.d_qclass);
780
781 pw.getHeader()->aa=0;
782 pw.getHeader()->ra=1;
783 pw.getHeader()->qr=1;
784 pw.getHeader()->tc=0;
785 pw.getHeader()->id=dc->d_mdp.d_header.id;
786 pw.getHeader()->rd=dc->d_mdp.d_header.rd;
787 pw.getHeader()->cd=dc->d_mdp.d_header.cd;
788
789 /* This is the lowest TTL seen in the records of the response,
790 so we can't cache it for longer than this value.
791 If we have a TTL cap, this value can't be larger than the
792 cap no matter what. */
793 uint32_t minTTL = dc->d_ttlCap;
794
795 SyncRes sr(dc->d_now);
796
797 bool DNSSECOK=false;
798 if(t_pdl) {
799 sr.setLuaEngine(t_pdl);
800 }
801 sr.d_requestor=dc->d_remote; // ECS needs this too
802 if(g_dnssecmode != DNSSECMode::Off) {
803 sr.setDoDNSSEC(true);
804
805 // Does the requestor want DNSSEC records?
806 if(edo.d_Z & EDNSOpts::DNSSECOK) {
807 DNSSECOK=true;
808 g_stats.dnssecQueries++;
809 }
810 } else {
811 // Ignore the client-set CD flag
812 pw.getHeader()->cd=0;
813 }
814 sr.setDNSSECValidationRequested(g_dnssecmode == DNSSECMode::ValidateAll || g_dnssecmode==DNSSECMode::ValidateForLog || ((dc->d_mdp.d_header.ad || DNSSECOK) && g_dnssecmode==DNSSECMode::Process));
815
816 #ifdef HAVE_PROTOBUF
817 sr.setInitialRequestId(dc->d_uuid);
818 #endif
819
820 if (g_useIncomingECS) {
821 sr.setIncomingECSFound(dc->d_ecsFound);
822 if (dc->d_ecsFound) {
823 sr.setIncomingECS(dc->d_ednssubnet);
824 }
825 }
826
827 bool tracedQuery=false; // we could consider letting Lua know about this too
828 bool variableAnswer = dc->d_variable;
829 bool shouldNotValidate = false;
830
831 /* preresolve expects res (dq.rcode) to be set to RCode::NoError by default */
832 int res = RCode::NoError;
833 DNSFilterEngine::Policy appliedPolicy;
834 DNSRecord spoofed;
835 RecursorLua4::DNSQuestion dq(dc->d_remote, dc->d_local, dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_tcp, variableAnswer, wantsRPZ);
836 dq.ednsFlags = &edo.d_Z;
837 dq.ednsOptions = &dc->d_ednsOpts;
838 dq.tag = dc->d_tag;
839 dq.discardedPolicies = &sr.d_discardedPolicies;
840 dq.policyTags = &dc->d_policyTags;
841 dq.appliedPolicy = &appliedPolicy;
842 dq.currentRecords = &ret;
843 dq.dh = &dc->d_mdp.d_header;
844 dq.data = dc->d_data;
845 #ifdef HAVE_PROTOBUF
846 dq.requestorId = dc->d_requestorId;
847 dq.deviceId = dc->d_deviceId;
848 #endif
849
850 if(dc->d_mdp.d_qtype==QType::ANY && !dc->d_tcp && g_anyToTcp) {
851 pw.getHeader()->tc = 1;
852 res = 0;
853 variableAnswer = true;
854 goto sendit;
855 }
856
857 if(t_traceRegex && t_traceRegex->match(dc->d_mdp.d_qname.toString())) {
858 sr.setLogMode(SyncRes::Store);
859 tracedQuery=true;
860 }
861
862
863 if(!g_quiet || tracedQuery) {
864 L<<Logger::Warning<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] " << (dc->d_tcp ? "TCP " : "") << "question for '"<<dc->d_mdp.d_qname<<"|"
865 <<DNSRecordContent::NumberToType(dc->d_mdp.d_qtype)<<"' from "<<dc->getRemote();
866 if(!dc->d_ednssubnet.source.empty()) {
867 L<<" (ecs "<<dc->d_ednssubnet.source.toString()<<")";
868 }
869 L<<endl;
870 }
871
872 sr.setId(MT->getTid());
873 if(!dc->d_mdp.d_header.rd)
874 sr.setCacheOnly();
875
876 if (t_pdl) {
877 t_pdl->prerpz(dq, res);
878 }
879
880 // Check if the query has a policy attached to it
881 if (wantsRPZ) {
882 appliedPolicy = luaconfsLocal->dfe.getQueryPolicy(dc->d_mdp.d_qname, dc->d_remote, sr.d_discardedPolicies);
883 }
884
885 // if there is a RecursorLua active, and it 'took' the query in preResolve, we don't launch beginResolve
886 if(!t_pdl || !t_pdl->preresolve(dq, res)) {
887
888 sr.setWantsRPZ(wantsRPZ);
889 if(wantsRPZ) {
890 switch(appliedPolicy.d_kind) {
891 case DNSFilterEngine::PolicyKind::NoAction:
892 break;
893 case DNSFilterEngine::PolicyKind::Drop:
894 g_stats.policyDrops++;
895 g_stats.policyResults[appliedPolicy.d_kind]++;
896 delete dc;
897 dc=0;
898 return;
899 case DNSFilterEngine::PolicyKind::NXDOMAIN:
900 g_stats.policyResults[appliedPolicy.d_kind]++;
901 res=RCode::NXDomain;
902 goto haveAnswer;
903 case DNSFilterEngine::PolicyKind::NODATA:
904 g_stats.policyResults[appliedPolicy.d_kind]++;
905 res=RCode::NoError;
906 goto haveAnswer;
907 case DNSFilterEngine::PolicyKind::Custom:
908 g_stats.policyResults[appliedPolicy.d_kind]++;
909 res=RCode::NoError;
910 spoofed=appliedPolicy.getCustomRecord(dc->d_mdp.d_qname);
911 ret.push_back(spoofed);
912 handleRPZCustom(spoofed, QType(dc->d_mdp.d_qtype), sr, res, ret);
913 goto haveAnswer;
914 case DNSFilterEngine::PolicyKind::Truncate:
915 if(!dc->d_tcp) {
916 g_stats.policyResults[appliedPolicy.d_kind]++;
917 res=RCode::NoError;
918 pw.getHeader()->tc=1;
919 goto haveAnswer;
920 }
921 break;
922 }
923 }
924
925 // Query got not handled for QNAME Policy reasons, now actually go out to find an answer
926 try {
927 res = sr.beginResolve(dc->d_mdp.d_qname, QType(dc->d_mdp.d_qtype), dc->d_mdp.d_qclass, ret);
928 shouldNotValidate = sr.wasOutOfBand();
929 }
930 catch(ImmediateServFailException &e) {
931 if(g_logCommonErrors)
932 L<<Logger::Notice<<"Sending SERVFAIL to "<<dc->getRemote()<<" during resolve of '"<<dc->d_mdp.d_qname<<"' because: "<<e.reason<<endl;
933 res = RCode::ServFail;
934 }
935
936 dq.validationState = sr.getValidationState();
937
938 // During lookup, an NSDNAME or NSIP trigger was hit in RPZ
939 if (res == -2) { // XXX This block should be macro'd, it is repeated post-resolve.
940 appliedPolicy = sr.d_appliedPolicy;
941 g_stats.policyResults[appliedPolicy.d_kind]++;
942 switch(appliedPolicy.d_kind) {
943 case DNSFilterEngine::PolicyKind::NoAction: // This can never happen
944 throw PDNSException("NoAction policy returned while a NSDNAME or NSIP trigger was hit");
945 case DNSFilterEngine::PolicyKind::Drop:
946 g_stats.policyDrops++;
947 delete dc;
948 dc=0;
949 return;
950 case DNSFilterEngine::PolicyKind::NXDOMAIN:
951 ret.clear();
952 res=RCode::NXDomain;
953 goto haveAnswer;
954
955 case DNSFilterEngine::PolicyKind::NODATA:
956 ret.clear();
957 res=RCode::NoError;
958 goto haveAnswer;
959
960 case DNSFilterEngine::PolicyKind::Truncate:
961 if(!dc->d_tcp) {
962 ret.clear();
963 res=RCode::NoError;
964 pw.getHeader()->tc=1;
965 goto haveAnswer;
966 }
967 break;
968
969 case DNSFilterEngine::PolicyKind::Custom:
970 ret.clear();
971 res=RCode::NoError;
972 spoofed=appliedPolicy.getCustomRecord(dc->d_mdp.d_qname);
973 ret.push_back(spoofed);
974 handleRPZCustom(spoofed, QType(dc->d_mdp.d_qtype), sr, res, ret);
975 goto haveAnswer;
976 }
977 }
978
979 if (wantsRPZ) {
980 appliedPolicy = luaconfsLocal->dfe.getPostPolicy(ret, sr.d_discardedPolicies);
981 }
982
983 if(t_pdl) {
984 if(res == RCode::NoError) {
985 auto i=ret.cbegin();
986 for(; i!= ret.cend(); ++i)
987 if(i->d_type == dc->d_mdp.d_qtype && i->d_place == DNSResourceRecord::ANSWER)
988 break;
989 if(i == ret.cend() && t_pdl->nodata(dq, res))
990 shouldNotValidate = true;
991
992 }
993 else if(res == RCode::NXDomain && t_pdl->nxdomain(dq, res))
994 shouldNotValidate = true;
995
996 if(t_pdl->postresolve(dq, res))
997 shouldNotValidate = true;
998 }
999
1000 if (wantsRPZ) { //XXX This block is repeated, see above
1001 g_stats.policyResults[appliedPolicy.d_kind]++;
1002 switch(appliedPolicy.d_kind) {
1003 case DNSFilterEngine::PolicyKind::NoAction:
1004 break;
1005 case DNSFilterEngine::PolicyKind::Drop:
1006 g_stats.policyDrops++;
1007 delete dc;
1008 dc=0;
1009 return;
1010 case DNSFilterEngine::PolicyKind::NXDOMAIN:
1011 ret.clear();
1012 res=RCode::NXDomain;
1013 goto haveAnswer;
1014
1015 case DNSFilterEngine::PolicyKind::NODATA:
1016 ret.clear();
1017 res=RCode::NoError;
1018 goto haveAnswer;
1019
1020 case DNSFilterEngine::PolicyKind::Truncate:
1021 if(!dc->d_tcp) {
1022 ret.clear();
1023 res=RCode::NoError;
1024 pw.getHeader()->tc=1;
1025 goto haveAnswer;
1026 }
1027 break;
1028
1029 case DNSFilterEngine::PolicyKind::Custom:
1030 ret.clear();
1031 res=RCode::NoError;
1032 spoofed=appliedPolicy.getCustomRecord(dc->d_mdp.d_qname);
1033 ret.push_back(spoofed);
1034 handleRPZCustom(spoofed, QType(dc->d_mdp.d_qtype), sr, res, ret);
1035 goto haveAnswer;
1036 }
1037 }
1038 }
1039 haveAnswer:;
1040 if(res == PolicyDecision::DROP) {
1041 g_stats.policyDrops++;
1042 delete dc;
1043 dc=0;
1044 return;
1045 }
1046 if(tracedQuery || res == -1 || res == RCode::ServFail || pw.getHeader()->rcode == RCode::ServFail)
1047 {
1048 string trace(sr.getTrace());
1049 if(!trace.empty()) {
1050 vector<string> lines;
1051 boost::split(lines, trace, boost::is_any_of("\n"));
1052 for(const string& line : lines) {
1053 if(!line.empty())
1054 L<<Logger::Warning<< line << endl;
1055 }
1056 }
1057 }
1058
1059 if(res == -1) {
1060 pw.getHeader()->rcode=RCode::ServFail;
1061 // no commit here, because no record
1062 g_stats.servFails++;
1063 }
1064 else {
1065 pw.getHeader()->rcode=res;
1066
1067 // Does the validation mode or query demand validation?
1068 if(!shouldNotValidate && sr.isDNSSECValidationRequested()) {
1069 try {
1070 if(sr.doLog()) {
1071 L<<Logger::Warning<<"Starting validation of answer to "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" for "<<dc->d_remote.toStringWithPort()<<endl;
1072 }
1073
1074 auto state = sr.getValidationState();
1075
1076 if(state == Secure) {
1077 if(sr.doLog()) {
1078 L<<Logger::Warning<<"Answer to "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" for "<<dc->d_remote.toStringWithPort()<<" validates correctly"<<endl;
1079 }
1080
1081 // Is the query source interested in the value of the ad-bit?
1082 if (dc->d_mdp.d_header.ad || DNSSECOK)
1083 pw.getHeader()->ad=1;
1084 }
1085 else if(state == Insecure) {
1086 if(sr.doLog()) {
1087 L<<Logger::Warning<<"Answer to "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" for "<<dc->d_remote.toStringWithPort()<<" validates as Insecure"<<endl;
1088 }
1089
1090 pw.getHeader()->ad=0;
1091 }
1092 else if(state == Bogus) {
1093 if(g_dnssecLogBogus || sr.doLog() || g_dnssecmode == DNSSECMode::ValidateForLog) {
1094 L<<Logger::Warning<<"Answer to "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" for "<<dc->d_remote.toStringWithPort()<<" validates as Bogus"<<endl;
1095 }
1096
1097 // Does the query or validation mode sending out a SERVFAIL on validation errors?
1098 if(!pw.getHeader()->cd && (g_dnssecmode == DNSSECMode::ValidateAll || dc->d_mdp.d_header.ad || DNSSECOK)) {
1099 if(sr.doLog()) {
1100 L<<Logger::Warning<<"Sending out SERVFAIL for "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" because recursor or query demands it for Bogus results"<<endl;
1101 }
1102
1103 pw.getHeader()->rcode=RCode::ServFail;
1104 goto sendit;
1105 } else {
1106 if(sr.doLog()) {
1107 L<<Logger::Warning<<"Not sending out SERVFAIL for "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" Bogus validation since neither config nor query demands this"<<endl;
1108 }
1109 }
1110 }
1111 }
1112 catch(ImmediateServFailException &e) {
1113 if(g_logCommonErrors)
1114 L<<Logger::Notice<<"Sending SERVFAIL to "<<dc->getRemote()<<" during validation of '"<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<"' because: "<<e.reason<<endl;
1115 pw.getHeader()->rcode=RCode::ServFail;
1116 goto sendit;
1117 }
1118 }
1119
1120 if(ret.size()) {
1121 orderAndShuffle(ret);
1122 if(auto sl = luaconfsLocal->sortlist.getOrderCmp(dc->d_remote)) {
1123 stable_sort(ret.begin(), ret.end(), *sl);
1124 variableAnswer=true;
1125 }
1126 }
1127
1128 bool needCommit = false;
1129 for(auto i=ret.cbegin(); i!=ret.cend(); ++i) {
1130 if( ! DNSSECOK &&
1131 ( i->d_type == QType::NSEC3 ||
1132 (
1133 ( i->d_type == QType::RRSIG || i->d_type==QType::NSEC ) &&
1134 (
1135 ( dc->d_mdp.d_qtype != i->d_type && dc->d_mdp.d_qtype != QType::ANY ) ||
1136 i->d_place != DNSResourceRecord::ANSWER
1137 )
1138 )
1139 )
1140 ) {
1141 continue;
1142 }
1143
1144 if (!addRecordToPacket(pw, *i, minTTL, dc->d_ttlCap, maxanswersize)) {
1145 needCommit = false;
1146 break;
1147 }
1148 needCommit = true;
1149
1150 #ifdef HAVE_PROTOBUF
1151 if(luaconfsLocal->protobufServer && (i->d_type == QType::A || i->d_type == QType::AAAA || i->d_type == QType::CNAME)) {
1152 pbMessage.addRR(*i);
1153 }
1154 #endif
1155 }
1156 if(needCommit)
1157 pw.commit();
1158 }
1159 sendit:;
1160
1161 if (haveEDNS) {
1162 /* we try to add the EDNS OPT RR even for truncated answers,
1163 as rfc6891 states:
1164 "The minimal response MUST be the DNS header, question section, and an
1165 OPT record. This MUST also occur when a truncated response (using
1166 the DNS header's TC bit) is returned."
1167 */
1168 if (addRecordToPacket(pw, makeOpt(edo.d_packetsize, 0, edo.d_Z), minTTL, dc->d_ttlCap, maxanswersize)) {
1169 pw.commit();
1170 }
1171 }
1172
1173 g_rs.submitResponse(dc->d_mdp.d_qtype, packet.size(), !dc->d_tcp);
1174 updateResponseStats(res, dc->d_remote, packet.size(), &dc->d_mdp.d_qname, dc->d_mdp.d_qtype);
1175 #ifdef HAVE_PROTOBUF
1176 if (luaconfsLocal->protobufServer && (!luaconfsLocal->protobufTaggedOnly || (appliedPolicy.d_name && !appliedPolicy.d_name->empty()) || !dc->d_policyTags.empty())) {
1177 pbMessage.setBytes(packet.size());
1178 pbMessage.setResponseCode(pw.getHeader()->rcode);
1179 if (appliedPolicy.d_name) {
1180 pbMessage.setAppliedPolicy(*appliedPolicy.d_name);
1181 pbMessage.setAppliedPolicyType(appliedPolicy.d_type);
1182 }
1183 pbMessage.setPolicyTags(dc->d_policyTags);
1184 pbMessage.setQueryTime(dc->d_now.tv_sec, dc->d_now.tv_usec);
1185 pbMessage.setRequestorId(dq.requestorId);
1186 pbMessage.setDeviceId(dq.deviceId);
1187 protobufLogResponse(luaconfsLocal->protobufServer, pbMessage);
1188 }
1189 #endif
1190 if(!dc->d_tcp) {
1191 struct msghdr msgh;
1192 struct iovec iov;
1193 char cbuf[256];
1194 fillMSGHdr(&msgh, &iov, cbuf, 0, (char*)&*packet.begin(), packet.size(), &dc->d_remote);
1195 msgh.msg_control=NULL;
1196
1197 if(g_fromtosockets.count(dc->d_socket)) {
1198 addCMsgSrcAddr(&msgh, cbuf, &dc->d_local, 0);
1199 }
1200 if(sendmsg(dc->d_socket, &msgh, 0) < 0 && g_logCommonErrors)
1201 L<<Logger::Warning<<"Sending UDP reply to client "<<dc->d_remote.toStringWithPort()<<" failed with: "<<strerror(errno)<<endl;
1202
1203 if(!SyncRes::s_nopacketcache && !variableAnswer && !sr.wasVariable() ) {
1204 t_packetCache->insertResponsePacket(dc->d_tag, dc->d_qhash, dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_mdp.d_qclass,
1205 string((const char*)&*packet.begin(), packet.size()),
1206 g_now.tv_sec,
1207 pw.getHeader()->rcode == RCode::ServFail ? SyncRes::s_packetcacheservfailttl :
1208 min(minTTL,SyncRes::s_packetcachettl),
1209 &pbMessage);
1210 }
1211 // else cerr<<"Not putting in packet cache: "<<sr.wasVariable()<<endl;
1212 }
1213 else {
1214 char buf[2];
1215 buf[0]=packet.size()/256;
1216 buf[1]=packet.size()%256;
1217
1218 Utility::iovec iov[2];
1219
1220 iov[0].iov_base=(void*)buf; iov[0].iov_len=2;
1221 iov[1].iov_base=(void*)&*packet.begin(); iov[1].iov_len = packet.size();
1222
1223 int wret=Utility::writev(dc->d_socket, iov, 2);
1224 bool hadError=true;
1225
1226 if(wret == 0)
1227 L<<Logger::Error<<"EOF writing TCP answer to "<<dc->getRemote()<<endl;
1228 else if(wret < 0 )
1229 L<<Logger::Error<<"Error writing TCP answer to "<<dc->getRemote()<<": "<< strerror(errno) <<endl;
1230 else if((unsigned int)wret != 2 + packet.size())
1231 L<<Logger::Error<<"Oops, partial answer sent to "<<dc->getRemote()<<" for "<<dc->d_mdp.d_qname<<" (size="<< (2 + packet.size()) <<", sent "<<wret<<")"<<endl;
1232 else
1233 hadError=false;
1234
1235 // update tcp connection status, either by closing or moving to 'BYTE0'
1236
1237 if(hadError) {
1238 // no need to remove us from FDM, we weren't there
1239 dc->d_socket = -1;
1240 }
1241 else {
1242 dc->d_tcpConnection->queriesCount++;
1243 if (g_tcpMaxQueriesPerConn && dc->d_tcpConnection->queriesCount >= g_tcpMaxQueriesPerConn) {
1244 dc->d_socket = -1;
1245 }
1246 else {
1247 dc->d_tcpConnection->state=TCPConnection::BYTE0;
1248 Utility::gettimeofday(&g_now, 0); // needs to be updated
1249 t_fdm->addReadFD(dc->d_socket, handleRunningTCPQuestion, dc->d_tcpConnection);
1250 t_fdm->setReadTTD(dc->d_socket, g_now, g_tcpTimeout);
1251 }
1252 }
1253 }
1254 float spent=makeFloat(sr.getNow()-dc->d_now);
1255 if(!g_quiet) {
1256 L<<Logger::Error<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] answer to "<<(dc->d_mdp.d_header.rd?"":"non-rd ")<<"question '"<<dc->d_mdp.d_qname<<"|"<<DNSRecordContent::NumberToType(dc->d_mdp.d_qtype);
1257 L<<"': "<<ntohs(pw.getHeader()->ancount)<<" answers, "<<ntohs(pw.getHeader()->arcount)<<" additional, took "<<sr.d_outqueries<<" packets, "<<
1258 sr.d_totUsec/1000.0<<" netw ms, "<< spent*1000.0<<" tot ms, "<<
1259 sr.d_throttledqueries<<" throttled, "<<sr.d_timeouts<<" timeouts, "<<sr.d_tcpoutqueries<<" tcp connections, rcode="<< res;
1260
1261 if(!shouldNotValidate && sr.isDNSSECValidationRequested()) {
1262 L<< ", dnssec="<<vStates[sr.getValidationState()];
1263 }
1264
1265 L<<endl;
1266
1267 }
1268
1269 sr.d_outqueries ? t_RC->cacheMisses++ : t_RC->cacheHits++;
1270
1271 if(spent < 0.001)
1272 g_stats.answers0_1++;
1273 else if(spent < 0.010)
1274 g_stats.answers1_10++;
1275 else if(spent < 0.1)
1276 g_stats.answers10_100++;
1277 else if(spent < 1.0)
1278 g_stats.answers100_1000++;
1279 else
1280 g_stats.answersSlow++;
1281
1282 uint64_t newLat=(uint64_t)(spent*1000000);
1283 newLat = min(newLat,(uint64_t)(((uint64_t) g_networkTimeoutMsec)*1000)); // outliers of several minutes exist..
1284 g_stats.avgLatencyUsec=(1-1.0/g_latencyStatSize)*g_stats.avgLatencyUsec + (float)newLat/g_latencyStatSize;
1285 // no worries, we do this for packet cache hits elsewhere
1286
1287 auto ourtime = 1000.0*spent-sr.d_totUsec/1000.0; // in msec
1288 if(ourtime < 1)
1289 g_stats.ourtime0_1++;
1290 else if(ourtime < 2)
1291 g_stats.ourtime1_2++;
1292 else if(ourtime < 4)
1293 g_stats.ourtime2_4++;
1294 else if(ourtime < 8)
1295 g_stats.ourtime4_8++;
1296 else if(ourtime < 16)
1297 g_stats.ourtime8_16++;
1298 else if(ourtime < 32)
1299 g_stats.ourtime16_32++;
1300 else {
1301 // cerr<<"SLOW: "<<ourtime<<"ms -> "<<dc->d_mdp.d_qname<<"|"<<DNSRecordContent::NumberToType(dc->d_mdp.d_qtype)<<endl;
1302 g_stats.ourtimeSlow++;
1303 }
1304 if(ourtime >= 0.0) {
1305 newLat=ourtime*1000; // usec
1306 g_stats.avgLatencyOursUsec=(1-1.0/g_latencyStatSize)*g_stats.avgLatencyOursUsec + (float)newLat/g_latencyStatSize;
1307 }
1308 // cout<<dc->d_mdp.d_qname<<"\t"<<MT->getUsec()<<"\t"<<sr.d_outqueries<<endl;
1309 delete dc;
1310 dc=0;
1311 }
1312 catch(PDNSException &ae) {
1313 L<<Logger::Error<<"startDoResolve problem "<<makeLoginfo(dc)<<": "<<ae.reason<<endl;
1314 delete dc;
1315 }
1316 catch(MOADNSException& e) {
1317 L<<Logger::Error<<"DNS parser error "<<makeLoginfo(dc) <<": "<<dc->d_mdp.d_qname<<", "<<e.what()<<endl;
1318 delete dc;
1319 }
1320 catch(std::exception& e) {
1321 L<<Logger::Error<<"STL error "<< makeLoginfo(dc)<<": "<<e.what();
1322
1323 // Luawrapper nests the exception from Lua, so we unnest it here
1324 try {
1325 std::rethrow_if_nested(e);
1326 } catch(const std::exception& ne) {
1327 L<<". Extra info: "<<ne.what();
1328 } catch(...) {}
1329
1330 L<<endl;
1331 delete dc;
1332 }
1333 catch(...) {
1334 L<<Logger::Error<<"Any other exception in a resolver context "<< makeLoginfo(dc) <<endl;
1335 }
1336
1337 g_stats.maxMThreadStackUsage = max(MT->getMaxStackUsage(), g_stats.maxMThreadStackUsage);
1338 }
1339
1340 static void makeControlChannelSocket(int processNum=-1)
1341 {
1342 string sockname=::arg()["socket-dir"]+"/"+s_programname;
1343 if(processNum >= 0)
1344 sockname += "."+std::to_string(processNum);
1345 sockname+=".controlsocket";
1346 s_rcc.listen(sockname);
1347
1348 int sockowner = -1;
1349 int sockgroup = -1;
1350
1351 if (!::arg().isEmpty("socket-group"))
1352 sockgroup=::arg().asGid("socket-group");
1353 if (!::arg().isEmpty("socket-owner"))
1354 sockowner=::arg().asUid("socket-owner");
1355
1356 if (sockgroup > -1 || sockowner > -1) {
1357 if(chown(sockname.c_str(), sockowner, sockgroup) < 0) {
1358 unixDie("Failed to chown control socket");
1359 }
1360 }
1361
1362 // do mode change if socket-mode is given
1363 if(!::arg().isEmpty("socket-mode")) {
1364 mode_t sockmode=::arg().asMode("socket-mode");
1365 if(chmod(sockname.c_str(), sockmode) < 0) {
1366 unixDie("Failed to chmod control socket");
1367 }
1368 }
1369 }
1370
1371 static bool getQNameAndSubnet(const std::string& question, DNSName* dnsname, uint16_t* qtype, uint16_t* qclass, EDNSSubnetOpts* ednssubnet, std::map<uint16_t, EDNSOptionView>* options)
1372 {
1373 bool found = false;
1374 const struct dnsheader* dh = (struct dnsheader*)question.c_str();
1375 size_t questionLen = question.length();
1376 unsigned int consumed=0;
1377 *dnsname=DNSName(question.c_str(), questionLen, sizeof(dnsheader), false, qtype, qclass, &consumed);
1378
1379 size_t pos= sizeof(dnsheader)+consumed+4;
1380 /* at least OPT root label (1), type (2), class (2) and ttl (4) + OPT RR rdlen (2)
1381 = 11 */
1382 if(ntohs(dh->arcount) == 1 && questionLen > pos + 11) { // this code can extract one (1) EDNS Subnet option
1383 /* OPT root label (1) followed by type (2) */
1384 if(question.at(pos)==0 && question.at(pos+1)==0 && question.at(pos+2)==QType::OPT) {
1385 if (!options) {
1386 char* ecsStart = nullptr;
1387 size_t ecsLen = 0;
1388 int res = getEDNSOption((char*)question.c_str()+pos+9, questionLen - pos - 9, EDNSOptionCode::ECS, &ecsStart, &ecsLen);
1389 if (res == 0 && ecsLen > 4) {
1390 EDNSSubnetOpts eso;
1391 if(getEDNSSubnetOptsFromString(ecsStart + 4, ecsLen - 4, &eso)) {
1392 *ednssubnet=eso;
1393 found = true;
1394 }
1395 }
1396 }
1397 else {
1398 int res = getEDNSOptions((char*)question.c_str()+pos+9, questionLen - pos - 9, *options);
1399 if (res == 0) {
1400 const auto& it = options->find(EDNSOptionCode::ECS);
1401 if (it != options->end() && it->second.content != nullptr && it->second.size > 0) {
1402 EDNSSubnetOpts eso;
1403 if(getEDNSSubnetOptsFromString(it->second.content, it->second.size, &eso)) {
1404 *ednssubnet=eso;
1405 found = true;
1406 }
1407 }
1408 }
1409 }
1410 }
1411 }
1412 return found;
1413 }
1414
1415 static void handleRunningTCPQuestion(int fd, FDMultiplexer::funcparam_t& var)
1416 {
1417 shared_ptr<TCPConnection> conn=any_cast<shared_ptr<TCPConnection> >(var);
1418
1419 if(conn->state==TCPConnection::BYTE0) {
1420 ssize_t bytes=recv(conn->getFD(), conn->data, 2, 0);
1421 if(bytes==1)
1422 conn->state=TCPConnection::BYTE1;
1423 if(bytes==2) {
1424 conn->qlen=(((unsigned char)conn->data[0]) << 8)+ (unsigned char)conn->data[1];
1425 conn->bytesread=0;
1426 conn->state=TCPConnection::GETQUESTION;
1427 }
1428 if(!bytes || bytes < 0) {
1429 t_fdm->removeReadFD(fd);
1430 return;
1431 }
1432 }
1433 else if(conn->state==TCPConnection::BYTE1) {
1434 ssize_t bytes=recv(conn->getFD(), conn->data+1, 1, 0);
1435 if(bytes==1) {
1436 conn->state=TCPConnection::GETQUESTION;
1437 conn->qlen=(((unsigned char)conn->data[0]) << 8)+ (unsigned char)conn->data[1];
1438 conn->bytesread=0;
1439 }
1440 if(!bytes || bytes < 0) {
1441 if(g_logCommonErrors)
1442 L<<Logger::Error<<"TCP client "<< conn->d_remote.toString() <<" disconnected after first byte"<<endl;
1443 t_fdm->removeReadFD(fd);
1444 return;
1445 }
1446 }
1447 else if(conn->state==TCPConnection::GETQUESTION) {
1448 ssize_t bytes=recv(conn->getFD(), conn->data + conn->bytesread, conn->qlen - conn->bytesread, 0);
1449 if(!bytes || bytes < 0 || bytes > std::numeric_limits<std::uint16_t>::max()) {
1450 L<<Logger::Error<<"TCP client "<< conn->d_remote.toString() <<" disconnected while reading question body"<<endl;
1451 t_fdm->removeReadFD(fd);
1452 return;
1453 }
1454 conn->bytesread+=(uint16_t)bytes;
1455 if(conn->bytesread==conn->qlen) {
1456 t_fdm->removeReadFD(fd); // should no longer awake ourselves when there is data to read
1457
1458 DNSComboWriter* dc=nullptr;
1459 try {
1460 dc=new DNSComboWriter(conn->data, conn->qlen, g_now);
1461 }
1462 catch(MOADNSException &mde) {
1463 g_stats.clientParseError++;
1464 if(g_logCommonErrors)
1465 L<<Logger::Error<<"Unable to parse packet from TCP client "<< conn->d_remote.toString() <<endl;
1466 return;
1467 }
1468 dc->d_tcpConnection = conn; // carry the torch
1469 dc->setSocket(conn->getFD()); // this is the only time a copy is made of the actual fd
1470 dc->d_tcp=true;
1471 dc->setRemote(&conn->d_remote);
1472 ComboAddress dest;
1473 memset(&dest, 0, sizeof(dest));
1474 dest.sin4.sin_family = conn->d_remote.sin4.sin_family;
1475 socklen_t len = dest.getSocklen();
1476 getsockname(conn->getFD(), (sockaddr*)&dest, &len); // if this fails, we're ok with it
1477 dc->setLocal(dest);
1478 DNSName qname;
1479 uint16_t qtype=0;
1480 uint16_t qclass=0;
1481 bool needECS = false;
1482 string requestorId;
1483 string deviceId;
1484 #ifdef HAVE_PROTOBUF
1485 auto luaconfsLocal = g_luaconfs.getLocal();
1486 if (luaconfsLocal->protobufServer) {
1487 needECS = true;
1488 }
1489 #endif
1490
1491 if(needECS || (t_pdl && (t_pdl->d_gettag_ffi || t_pdl->d_gettag))) {
1492
1493 try {
1494 std::map<uint16_t, EDNSOptionView> ednsOptions;
1495 dc->d_ecsParsed = true;
1496 dc->d_ecsFound = getQNameAndSubnet(std::string(conn->data, conn->qlen), &qname, &qtype, &qclass, &dc->d_ednssubnet, g_gettagNeedsEDNSOptions ? &ednsOptions : nullptr);
1497
1498 if(t_pdl) {
1499 try {
1500 if (t_pdl->d_gettag_ffi) {
1501 dc->d_tag = t_pdl->gettag_ffi(conn->d_remote, dc->d_ednssubnet.source, dest, qname, qtype, &dc->d_policyTags, dc->d_data, ednsOptions, true, requestorId, deviceId, dc->d_ttlCap, dc->d_variable);
1502 }
1503 else if (t_pdl->d_gettag) {
1504 dc->d_tag = t_pdl->gettag(conn->d_remote, dc->d_ednssubnet.source, dest, qname, qtype, &dc->d_policyTags, dc->d_data, ednsOptions, true, requestorId, deviceId);
1505 }
1506 }
1507 catch(const std::exception& e) {
1508 if(g_logCommonErrors)
1509 L<<Logger::Warning<<"Error parsing a query packet qname='"<<qname<<"' for tag determination, setting tag=0: "<<e.what()<<endl;
1510 }
1511 }
1512 }
1513 catch(const std::exception& e)
1514 {
1515 if(g_logCommonErrors)
1516 L<<Logger::Warning<<"Error parsing a query packet for tag determination, setting tag=0: "<<e.what()<<endl;
1517 }
1518 }
1519 #ifdef HAVE_PROTOBUF
1520 if(luaconfsLocal->protobufServer || luaconfsLocal->outgoingProtobufServer) {
1521 dc->d_requestorId = requestorId;
1522 dc->d_deviceId = deviceId;
1523 dc->d_uuid = (*t_uuidGenerator)();
1524 }
1525
1526 if(luaconfsLocal->protobufServer) {
1527 try {
1528 const struct dnsheader* dh = (const struct dnsheader*) conn->data;
1529
1530 if (!luaconfsLocal->protobufTaggedOnly) {
1531 protobufLogQuery(luaconfsLocal->protobufServer, luaconfsLocal->protobufMaskV4, luaconfsLocal->protobufMaskV6, dc->d_uuid, conn->d_remote, dest, dc->d_ednssubnet.source, true, dh->id, conn->qlen, qname, qtype, qclass, dc->d_policyTags, dc->d_requestorId, dc->d_deviceId);
1532 }
1533 }
1534 catch(std::exception& e) {
1535 if(g_logCommonErrors)
1536 L<<Logger::Warning<<"Error parsing a TCP query packet for edns subnet: "<<e.what()<<endl;
1537 }
1538 }
1539 #endif
1540 if(dc->d_mdp.d_header.qr) {
1541 delete dc;
1542 g_stats.ignoredCount++;
1543 L<<Logger::Error<<"Ignoring answer from TCP client "<< conn->d_remote.toString() <<" on server socket!"<<endl;
1544 return;
1545 }
1546 if(dc->d_mdp.d_header.opcode) {
1547 delete dc;
1548 g_stats.ignoredCount++;
1549 L<<Logger::Error<<"Ignoring non-query opcode from TCP client "<< conn->d_remote.toString() <<" on server socket!"<<endl;
1550 return;
1551 }
1552 else {
1553 ++g_stats.qcounter;
1554 ++g_stats.tcpqcounter;
1555 MT->makeThread(startDoResolve, dc); // deletes dc, will set state to BYTE0 again
1556 return;
1557 }
1558 }
1559 }
1560 }
1561
1562 //! Handle new incoming TCP connection
1563 static void handleNewTCPQuestion(int fd, FDMultiplexer::funcparam_t& )
1564 {
1565 ComboAddress addr;
1566 socklen_t addrlen=sizeof(addr);
1567 int newsock=accept(fd, (struct sockaddr*)&addr, &addrlen);
1568 if(newsock>=0) {
1569 if(MT->numProcesses() > g_maxMThreads) {
1570 g_stats.overCapacityDrops++;
1571 try {
1572 closesocket(newsock);
1573 }
1574 catch(const PDNSException& e) {
1575 L<<Logger::Error<<"Error closing TCP socket after an over capacity drop: "<<e.reason<<endl;
1576 }
1577 return;
1578 }
1579
1580 if(t_remotes)
1581 t_remotes->push_back(addr);
1582 if(t_allowFrom && !t_allowFrom->match(&addr)) {
1583 if(!g_quiet)
1584 L<<Logger::Error<<"["<<MT->getTid()<<"] dropping TCP query from "<<addr.toString()<<", address not matched by allow-from"<<endl;
1585
1586 g_stats.unauthorizedTCP++;
1587 try {
1588 closesocket(newsock);
1589 }
1590 catch(const PDNSException& e) {
1591 L<<Logger::Error<<"Error closing TCP socket after an ACL drop: "<<e.reason<<endl;
1592 }
1593 return;
1594 }
1595 if(g_maxTCPPerClient && t_tcpClientCounts->count(addr) && (*t_tcpClientCounts)[addr] >= g_maxTCPPerClient) {
1596 g_stats.tcpClientOverflow++;
1597 try {
1598 closesocket(newsock); // don't call TCPConnection::closeAndCleanup here - did not enter it in the counts yet!
1599 }
1600 catch(const PDNSException& e) {
1601 L<<Logger::Error<<"Error closing TCP socket after an overflow drop: "<<e.reason<<endl;
1602 }
1603 return;
1604 }
1605
1606 setNonBlocking(newsock);
1607 std::shared_ptr<TCPConnection> tc = std::make_shared<TCPConnection>(newsock, addr);
1608 tc->state=TCPConnection::BYTE0;
1609
1610 t_fdm->addReadFD(tc->getFD(), handleRunningTCPQuestion, tc);
1611
1612 struct timeval now;
1613 Utility::gettimeofday(&now, 0);
1614 t_fdm->setReadTTD(tc->getFD(), now, g_tcpTimeout);
1615 }
1616 }
1617
1618 static string* doProcessUDPQuestion(const std::string& question, const ComboAddress& fromaddr, const ComboAddress& destaddr, struct timeval tv, int fd)
1619 {
1620 gettimeofday(&g_now, 0);
1621 struct timeval diff = g_now - tv;
1622 double delta=(diff.tv_sec*1000 + diff.tv_usec/1000.0);
1623
1624 if(tv.tv_sec && delta > 1000.0) {
1625 g_stats.tooOldDrops++;
1626 return 0;
1627 }
1628
1629 ++g_stats.qcounter;
1630 if(fromaddr.sin4.sin_family==AF_INET6)
1631 g_stats.ipv6qcounter++;
1632
1633 string response;
1634 const struct dnsheader* dh = (struct dnsheader*)question.c_str();
1635 unsigned int ctag=0;
1636 uint32_t qhash = 0;
1637 bool needECS = false;
1638 std::vector<std::string> policyTags;
1639 LuaContext::LuaObject data;
1640 string requestorId;
1641 string deviceId;
1642 #ifdef HAVE_PROTOBUF
1643 boost::uuids::uuid uniqueId;
1644 auto luaconfsLocal = g_luaconfs.getLocal();
1645 if (luaconfsLocal->protobufServer) {
1646 uniqueId = (*t_uuidGenerator)();
1647 needECS = true;
1648 } else if (luaconfsLocal->outgoingProtobufServer) {
1649 uniqueId = (*t_uuidGenerator)();
1650 }
1651 #endif
1652 EDNSSubnetOpts ednssubnet;
1653 bool ecsFound = false;
1654 bool ecsParsed = false;
1655 uint32_t ttlCap = std::numeric_limits<uint32_t>::max();
1656 bool variable = false;
1657 try {
1658 DNSName qname;
1659 uint16_t qtype=0;
1660 uint16_t qclass=0;
1661 uint32_t age;
1662 bool qnameParsed=false;
1663 #ifdef MALLOC_TRACE
1664 /*
1665 static uint64_t last=0;
1666 if(!last)
1667 g_mtracer->clearAllocators();
1668 cout<<g_mtracer->getAllocs()-last<<" "<<g_mtracer->getNumOut()<<" -- BEGIN TRACE"<<endl;
1669 last=g_mtracer->getAllocs();
1670 cout<<g_mtracer->topAllocatorsString()<<endl;
1671 g_mtracer->clearAllocators();
1672 */
1673 #endif
1674
1675 if(needECS || (t_pdl && (t_pdl->d_gettag || t_pdl->d_gettag_ffi))) {
1676 try {
1677 std::map<uint16_t, EDNSOptionView> ednsOptions;
1678 ecsFound = getQNameAndSubnet(question, &qname, &qtype, &qclass, &ednssubnet, g_gettagNeedsEDNSOptions ? &ednsOptions : nullptr);
1679 qnameParsed = true;
1680 ecsParsed = true;
1681
1682 if(t_pdl) {
1683 try {
1684 if (t_pdl->d_gettag_ffi) {
1685 ctag = t_pdl->gettag_ffi(fromaddr, ednssubnet.source, destaddr, qname, qtype, &policyTags, data, ednsOptions, false, requestorId, deviceId, ttlCap, variable);
1686 }
1687 else if (t_pdl->d_gettag) {
1688 ctag=t_pdl->gettag(fromaddr, ednssubnet.source, destaddr, qname, qtype, &policyTags, data, ednsOptions, false, requestorId, deviceId);
1689 }
1690 }
1691 catch(const std::exception& e) {
1692 if(g_logCommonErrors)
1693 L<<Logger::Warning<<"Error parsing a query packet qname='"<<qname<<"' for tag determination, setting tag=0: "<<e.what()<<endl;
1694 }
1695 }
1696 }
1697 catch(const std::exception& e)
1698 {
1699 if(g_logCommonErrors)
1700 L<<Logger::Warning<<"Error parsing a query packet for tag determination, setting tag=0: "<<e.what()<<endl;
1701 }
1702 }
1703
1704 bool cacheHit = false;
1705 RecProtoBufMessage pbMessage(DNSProtoBufMessage::DNSProtoBufMessageType::Response);
1706 #ifdef HAVE_PROTOBUF
1707 if(luaconfsLocal->protobufServer) {
1708 if (!luaconfsLocal->protobufTaggedOnly || !policyTags.empty()) {
1709 protobufLogQuery(luaconfsLocal->protobufServer, luaconfsLocal->protobufMaskV4, luaconfsLocal->protobufMaskV6, uniqueId, fromaddr, destaddr, ednssubnet.source, false, dh->id, question.size(), qname, qtype, qclass, policyTags, requestorId, deviceId);
1710 }
1711 }
1712 #endif /* HAVE_PROTOBUF */
1713
1714 /* It might seem like a good idea to skip the packet cache lookup if we know that the answer is not cacheable,
1715 but it means that the hash would not be computed. If some script decides at a later time to mark back the answer
1716 as cacheable we would cache it with a wrong tag, so better safe than sorry. */
1717 if (qnameParsed) {
1718 cacheHit = (!SyncRes::s_nopacketcache && t_packetCache->getResponsePacket(ctag, question, qname, qtype, qclass, g_now.tv_sec, &response, &age, &qhash, &pbMessage));
1719 }
1720 else {
1721 cacheHit = (!SyncRes::s_nopacketcache && t_packetCache->getResponsePacket(ctag, question, g_now.tv_sec, &response, &age, &qhash, &pbMessage));
1722 }
1723
1724 if (cacheHit) {
1725 #ifdef HAVE_PROTOBUF
1726 if(luaconfsLocal->protobufServer && (!luaconfsLocal->protobufTaggedOnly || !pbMessage.getAppliedPolicy().empty() || !pbMessage.getPolicyTags().empty())) {
1727 Netmask requestorNM(fromaddr, fromaddr.sin4.sin_family == AF_INET ? luaconfsLocal->protobufMaskV4 : luaconfsLocal->protobufMaskV6);
1728 const ComboAddress& requestor = requestorNM.getMaskedNetwork();
1729 pbMessage.update(uniqueId, &requestor, &destaddr, false, dh->id);
1730 pbMessage.setEDNSSubnet(ednssubnet.source, ednssubnet.source.isIpv4() ? luaconfsLocal->protobufMaskV4 : luaconfsLocal->protobufMaskV6);
1731 pbMessage.setQueryTime(g_now.tv_sec, g_now.tv_usec);
1732 pbMessage.setRequestorId(requestorId);
1733 pbMessage.setDeviceId(deviceId);
1734 protobufLogResponse(luaconfsLocal->protobufServer, pbMessage);
1735 }
1736 #endif /* HAVE_PROTOBUF */
1737 if(!g_quiet)
1738 L<<Logger::Notice<<t_id<< " question answered from packet cache tag="<<ctag<<" from "<<fromaddr.toString()<<endl;
1739
1740 g_stats.packetCacheHits++;
1741 SyncRes::s_queries++;
1742 ageDNSPacket(response, age);
1743 struct msghdr msgh;
1744 struct iovec iov;
1745 char cbuf[256];
1746 fillMSGHdr(&msgh, &iov, cbuf, 0, (char*)response.c_str(), response.length(), const_cast<ComboAddress*>(&fromaddr));
1747 msgh.msg_control=NULL;
1748
1749 if(g_fromtosockets.count(fd)) {
1750 addCMsgSrcAddr(&msgh, cbuf, &destaddr, 0);
1751 }
1752 if(sendmsg(fd, &msgh, 0) < 0 && g_logCommonErrors)
1753 L<<Logger::Warning<<"Sending UDP reply to client "<<fromaddr.toStringWithPort()<<" failed with: "<<strerror(errno)<<endl;
1754
1755 if(response.length() >= sizeof(struct dnsheader)) {
1756 struct dnsheader tmpdh;
1757 memcpy(&tmpdh, response.c_str(), sizeof(tmpdh));
1758 updateResponseStats(tmpdh.rcode, fromaddr, response.length(), 0, 0);
1759 }
1760 g_stats.avgLatencyUsec=(1-1.0/g_latencyStatSize)*g_stats.avgLatencyUsec + 0.0; // we assume 0 usec
1761 g_stats.avgLatencyOursUsec=(1-1.0/g_latencyStatSize)*g_stats.avgLatencyOursUsec + 0.0; // we assume 0 usec
1762 return 0;
1763 }
1764 }
1765 catch(std::exception& e) {
1766 L<<Logger::Error<<"Error processing or aging answer packet: "<<e.what()<<endl;
1767 return 0;
1768 }
1769
1770 if(t_pdl) {
1771 if(t_pdl->ipfilter(fromaddr, destaddr, *dh)) {
1772 if(!g_quiet)
1773 L<<Logger::Notice<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] DROPPED question from "<<fromaddr.toStringWithPort()<<" based on policy"<<endl;
1774 g_stats.policyDrops++;
1775 return 0;
1776 }
1777 }
1778
1779 if(MT->numProcesses() > g_maxMThreads) {
1780 if(!g_quiet)
1781 L<<Logger::Notice<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] DROPPED question from "<<fromaddr.toStringWithPort()<<", over capacity"<<endl;
1782
1783 g_stats.overCapacityDrops++;
1784 return 0;
1785 }
1786
1787 DNSComboWriter* dc = new DNSComboWriter(question.c_str(), question.size(), g_now);
1788 dc->setSocket(fd);
1789 dc->d_tag=ctag;
1790 dc->d_qhash=qhash;
1791 dc->d_query = question;
1792 dc->setRemote(&fromaddr);
1793 dc->setLocal(destaddr);
1794 dc->d_tcp=false;
1795 dc->d_policyTags = policyTags;
1796 dc->d_data = data;
1797 dc->d_ecsFound = ecsFound;
1798 dc->d_ecsParsed = ecsParsed;
1799 dc->d_ednssubnet = ednssubnet;
1800 dc->d_ttlCap = ttlCap;
1801 dc->d_variable = variable;
1802 #ifdef HAVE_PROTOBUF
1803 if (luaconfsLocal->protobufServer || luaconfsLocal->outgoingProtobufServer) {
1804 dc->d_uuid = uniqueId;
1805 }
1806 dc->d_requestorId = requestorId;
1807 dc->d_deviceId = deviceId;
1808 #endif
1809
1810 MT->makeThread(startDoResolve, (void*) dc); // deletes dc
1811 return 0;
1812 }
1813
1814
1815 static void handleNewUDPQuestion(int fd, FDMultiplexer::funcparam_t& var)
1816 {
1817 ssize_t len;
1818 char data[1500];
1819 ComboAddress fromaddr;
1820 struct msghdr msgh;
1821 struct iovec iov;
1822 char cbuf[256];
1823 bool firstQuery = true;
1824
1825 fromaddr.sin6.sin6_family=AF_INET6; // this makes sure fromaddr is big enough
1826 fillMSGHdr(&msgh, &iov, cbuf, sizeof(cbuf), data, sizeof(data), &fromaddr);
1827
1828 for(;;)
1829 if((len=recvmsg(fd, &msgh, 0)) >= 0) {
1830
1831 firstQuery = false;
1832
1833 if(t_remotes)
1834 t_remotes->push_back(fromaddr);
1835
1836 if(t_allowFrom && !t_allowFrom->match(&fromaddr)) {
1837 if(!g_quiet)
1838 L<<Logger::Error<<"["<<MT->getTid()<<"] dropping UDP query from "<<fromaddr.toString()<<", address not matched by allow-from"<<endl;
1839
1840 g_stats.unauthorizedUDP++;
1841 return;
1842 }
1843 BOOST_STATIC_ASSERT(offsetof(sockaddr_in, sin_port) == offsetof(sockaddr_in6, sin6_port));
1844 if(!fromaddr.sin4.sin_port) { // also works for IPv6
1845 if(!g_quiet)
1846 L<<Logger::Error<<"["<<MT->getTid()<<"] dropping UDP query from "<<fromaddr.toStringWithPort()<<", can't deal with port 0"<<endl;
1847
1848 g_stats.clientParseError++; // not quite the best place to put it, but needs to go somewhere
1849 return;
1850 }
1851 try {
1852 dnsheader* dh=(dnsheader*)data;
1853
1854 if(dh->qr) {
1855 g_stats.ignoredCount++;
1856 if(g_logCommonErrors)
1857 L<<Logger::Error<<"Ignoring answer from "<<fromaddr.toString()<<" on server socket!"<<endl;
1858 }
1859 else if(dh->opcode) {
1860 g_stats.ignoredCount++;
1861 if(g_logCommonErrors)
1862 L<<Logger::Error<<"Ignoring non-query opcode "<<dh->opcode<<" from "<<fromaddr.toString()<<" on server socket!"<<endl;
1863 }
1864 else {
1865 string question(data, (size_t)len);
1866 struct timeval tv={0,0};
1867 HarvestTimestamp(&msgh, &tv);
1868 ComboAddress dest;
1869 memset(&dest, 0, sizeof(dest)); // this makes sure we ignore this address if not returned by recvmsg above
1870 auto loc = rplookup(g_listenSocketsAddresses, fd);
1871 if(HarvestDestinationAddress(&msgh, &dest)) {
1872 // but.. need to get port too
1873 if(loc)
1874 dest.sin4.sin_port = loc->sin4.sin_port;
1875 }
1876 else {
1877 if(loc) {
1878 dest = *loc;
1879 }
1880 else {
1881 dest.sin4.sin_family = fromaddr.sin4.sin_family;
1882 socklen_t slen = dest.getSocklen();
1883 getsockname(fd, (sockaddr*)&dest, &slen); // if this fails, we're ok with it
1884 }
1885 }
1886 if(g_weDistributeQueries)
1887 distributeAsyncFunction(question, boost::bind(doProcessUDPQuestion, question, fromaddr, dest, tv, fd));
1888 else
1889 doProcessUDPQuestion(question, fromaddr, dest, tv, fd);
1890 }
1891 }
1892 catch(MOADNSException& mde) {
1893 g_stats.clientParseError++;
1894 if(g_logCommonErrors)
1895 L<<Logger::Error<<"Unable to parse packet from remote UDP client "<<fromaddr.toString() <<": "<<mde.what()<<endl;
1896 }
1897 catch(std::runtime_error& e) {
1898 g_stats.clientParseError++;
1899 if(g_logCommonErrors)
1900 L<<Logger::Error<<"Unable to parse packet from remote UDP client "<<fromaddr.toString() <<": "<<e.what()<<endl;
1901 }
1902 }
1903 else {
1904 // cerr<<t_id<<" had error: "<<stringerror()<<endl;
1905 if(firstQuery && errno == EAGAIN)
1906 g_stats.noPacketError++;
1907
1908 break;
1909 }
1910 }
1911
1912 static void makeTCPServerSockets(unsigned int threadId)
1913 {
1914 int fd;
1915 vector<string>locals;
1916 stringtok(locals,::arg()["local-address"]," ,");
1917
1918 if(locals.empty())
1919 throw PDNSException("No local address specified");
1920
1921 for(vector<string>::const_iterator i=locals.begin();i!=locals.end();++i) {
1922 ServiceTuple st;
1923 st.port=::arg().asNum("local-port");
1924 parseService(*i, st);
1925
1926 ComboAddress sin;
1927
1928 memset((char *)&sin,0, sizeof(sin));
1929 sin.sin4.sin_family = AF_INET;
1930 if(!IpToU32(st.host, (uint32_t*)&sin.sin4.sin_addr.s_addr)) {
1931 sin.sin6.sin6_family = AF_INET6;
1932 if(makeIPv6sockaddr(st.host, &sin.sin6) < 0)
1933 throw PDNSException("Unable to resolve local address for TCP server on '"+ st.host +"'");
1934 }
1935
1936 fd=socket(sin.sin6.sin6_family, SOCK_STREAM, 0);
1937 if(fd<0)
1938 throw PDNSException("Making a TCP server socket for resolver: "+stringerror());
1939
1940 setCloseOnExec(fd);
1941
1942 int tmp=1;
1943 if(setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &tmp, sizeof tmp)<0) {
1944 L<<Logger::Error<<"Setsockopt failed for TCP listening socket"<<endl;
1945 exit(1);
1946 }
1947 if(sin.sin6.sin6_family == AF_INET6 && setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &tmp, sizeof(tmp)) < 0) {
1948 L<<Logger::Error<<"Failed to set IPv6 socket to IPv6 only, continuing anyhow: "<<strerror(errno)<<endl;
1949 }
1950
1951 #ifdef TCP_DEFER_ACCEPT
1952 if(setsockopt(fd, SOL_TCP, TCP_DEFER_ACCEPT, &tmp, sizeof tmp) >= 0) {
1953 if(i==locals.begin())
1954 L<<Logger::Error<<"Enabled TCP data-ready filter for (slight) DoS protection"<<endl;
1955 }
1956 #endif
1957
1958 if( ::arg().mustDo("non-local-bind") )
1959 Utility::setBindAny(AF_INET, fd);
1960
1961 #ifdef SO_REUSEPORT
1962 if(g_reusePort) {
1963 if(setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &tmp, sizeof(tmp)) < 0)
1964 throw PDNSException("SO_REUSEPORT: "+stringerror());
1965 }
1966 #endif
1967
1968 if (::arg().asNum("tcp-fast-open") > 0) {
1969 #ifdef TCP_FASTOPEN
1970 int fastOpenQueueSize = ::arg().asNum("tcp-fast-open");
1971 if (setsockopt(fd, IPPROTO_TCP, TCP_FASTOPEN, &fastOpenQueueSize, sizeof fastOpenQueueSize) < 0) {
1972 L<<Logger::Error<<"Failed to enable TCP Fast Open for listening socket: "<<strerror(errno)<<endl;
1973 }
1974 #else
1975 L<<Logger::Warning<<"TCP Fast Open configured but not supported for listening socket"<<endl;
1976 #endif
1977 }
1978
1979 sin.sin4.sin_port = htons(st.port);
1980 socklen_t socklen=sin.sin4.sin_family==AF_INET ? sizeof(sin.sin4) : sizeof(sin.sin6);
1981 if (::bind(fd, (struct sockaddr *)&sin, socklen )<0)
1982 throw PDNSException("Binding TCP server socket for "+ st.host +": "+stringerror());
1983
1984 setNonBlocking(fd);
1985 setSocketSendBuffer(fd, 65000);
1986 listen(fd, 128);
1987 deferredAdds[threadId].push_back(make_pair(fd, handleNewTCPQuestion));
1988 g_tcpListenSockets.push_back(fd);
1989 // we don't need to update g_listenSocketsAddresses since it doesn't work for TCP/IP:
1990 // - fd is not that which we know here, but returned from accept()
1991 if(sin.sin4.sin_family == AF_INET)
1992 L<<Logger::Error<<"Listening for TCP queries on "<< sin.toString() <<":"<<st.port<<endl;
1993 else
1994 L<<Logger::Error<<"Listening for TCP queries on ["<< sin.toString() <<"]:"<<st.port<<endl;
1995 }
1996 }
1997
1998 static void makeUDPServerSockets(unsigned int threadId)
1999 {
2000 int one=1;
2001 vector<string>locals;
2002 stringtok(locals,::arg()["local-address"]," ,");
2003
2004 if(locals.empty())
2005 throw PDNSException("No local address specified");
2006
2007 for(vector<string>::const_iterator i=locals.begin();i!=locals.end();++i) {
2008 ServiceTuple st;
2009 st.port=::arg().asNum("local-port");
2010 parseService(*i, st);
2011
2012 ComboAddress sin;
2013
2014 memset(&sin, 0, sizeof(sin));
2015 sin.sin4.sin_family = AF_INET;
2016 if(!IpToU32(st.host.c_str() , (uint32_t*)&sin.sin4.sin_addr.s_addr)) {
2017 sin.sin6.sin6_family = AF_INET6;
2018 if(makeIPv6sockaddr(st.host, &sin.sin6) < 0)
2019 throw PDNSException("Unable to resolve local address for UDP server on '"+ st.host +"'");
2020 }
2021
2022 int fd=socket(sin.sin4.sin_family, SOCK_DGRAM, 0);
2023 if(fd < 0) {
2024 throw PDNSException("Making a UDP server socket for resolver: "+netstringerror());
2025 }
2026 if (!setSocketTimestamps(fd))
2027 L<<Logger::Warning<<"Unable to enable timestamp reporting for socket"<<endl;
2028
2029 if(IsAnyAddress(sin)) {
2030 if(sin.sin4.sin_family == AF_INET)
2031 if(!setsockopt(fd, IPPROTO_IP, GEN_IP_PKTINFO, &one, sizeof(one))) // linux supports this, so why not - might fail on other systems
2032 g_fromtosockets.insert(fd);
2033 #ifdef IPV6_RECVPKTINFO
2034 if(sin.sin4.sin_family == AF_INET6)
2035 if(!setsockopt(fd, IPPROTO_IPV6, IPV6_RECVPKTINFO, &one, sizeof(one)))
2036 g_fromtosockets.insert(fd);
2037 #endif
2038 if(sin.sin6.sin6_family == AF_INET6 && setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &one, sizeof(one)) < 0) {
2039 L<<Logger::Error<<"Failed to set IPv6 socket to IPv6 only, continuing anyhow: "<<strerror(errno)<<endl;
2040 }
2041 }
2042 if( ::arg().mustDo("non-local-bind") )
2043 Utility::setBindAny(AF_INET6, fd);
2044
2045 setCloseOnExec(fd);
2046
2047 setSocketReceiveBuffer(fd, 250000);
2048 sin.sin4.sin_port = htons(st.port);
2049
2050
2051 #ifdef SO_REUSEPORT
2052 if(g_reusePort) {
2053 if(setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one)) < 0)
2054 throw PDNSException("SO_REUSEPORT: "+stringerror());
2055 }
2056 #endif
2057 socklen_t socklen=sin.getSocklen();
2058 if (::bind(fd, (struct sockaddr *)&sin, socklen)<0)
2059 throw PDNSException("Resolver binding to server socket on port "+ std::to_string(st.port) +" for "+ st.host+": "+stringerror());
2060
2061 setNonBlocking(fd);
2062
2063 deferredAdds[threadId].push_back(make_pair(fd, handleNewUDPQuestion));
2064 g_listenSocketsAddresses[fd]=sin; // this is written to only from the startup thread, not from the workers
2065 if(sin.sin4.sin_family == AF_INET)
2066 L<<Logger::Error<<"Listening for UDP queries on "<< sin.toString() <<":"<<st.port<<endl;
2067 else
2068 L<<Logger::Error<<"Listening for UDP queries on ["<< sin.toString() <<"]:"<<st.port<<endl;
2069 }
2070 }
2071
2072 static void daemonize(void)
2073 {
2074 if(fork())
2075 exit(0); // bye bye
2076
2077 setsid();
2078
2079 int i=open("/dev/null",O_RDWR); /* open stdin */
2080 if(i < 0)
2081 L<<Logger::Critical<<"Unable to open /dev/null: "<<stringerror()<<endl;
2082 else {
2083 dup2(i,0); /* stdin */
2084 dup2(i,1); /* stderr */
2085 dup2(i,2); /* stderr */
2086 close(i);
2087 }
2088 }
2089
2090 static void usr1Handler(int)
2091 {
2092 statsWanted=true;
2093 }
2094
2095 static void usr2Handler(int)
2096 {
2097 g_quiet= !g_quiet;
2098 SyncRes::setDefaultLogMode(g_quiet ? SyncRes::LogNone : SyncRes::Log);
2099 ::arg().set("quiet")=g_quiet ? "" : "no";
2100 }
2101
2102 static void doStats(void)
2103 {
2104 static time_t lastOutputTime;
2105 static uint64_t lastQueryCount;
2106
2107 uint64_t cacheHits = broadcastAccFunction<uint64_t>(pleaseGetCacheHits);
2108 uint64_t cacheMisses = broadcastAccFunction<uint64_t>(pleaseGetCacheMisses);
2109
2110 if(g_stats.qcounter && (cacheHits + cacheMisses) && SyncRes::s_queries && SyncRes::s_outqueries) {
2111 L<<Logger::Notice<<"stats: "<<g_stats.qcounter<<" questions, "<<
2112 broadcastAccFunction<uint64_t>(pleaseGetCacheSize)<< " cache entries, "<<
2113 broadcastAccFunction<uint64_t>(pleaseGetNegCacheSize)<<" negative entries, "<<
2114 (int)((cacheHits*100.0)/(cacheHits+cacheMisses))<<"% cache hits"<<endl;
2115
2116 L<<Logger::Notice<<"stats: throttle map: "
2117 << broadcastAccFunction<uint64_t>(pleaseGetThrottleSize) <<", ns speeds: "
2118 << broadcastAccFunction<uint64_t>(pleaseGetNsSpeedsSize)<<endl;
2119 L<<Logger::Notice<<"stats: outpacket/query ratio "<<(int)(SyncRes::s_outqueries*100.0/SyncRes::s_queries)<<"%";
2120 L<<Logger::Notice<<", "<<(int)(SyncRes::s_throttledqueries*100.0/(SyncRes::s_outqueries+SyncRes::s_throttledqueries))<<"% throttled, "
2121 <<SyncRes::s_nodelegated<<" no-delegation drops"<<endl;
2122 L<<Logger::Notice<<"stats: "<<SyncRes::s_tcpoutqueries<<" outgoing tcp connections, "<<
2123 broadcastAccFunction<uint64_t>(pleaseGetConcurrentQueries)<<" queries running, "<<SyncRes::s_outgoingtimeouts<<" outgoing timeouts"<<endl;
2124
2125 //L<<Logger::Notice<<"stats: "<<g_stats.ednsPingMatches<<" ping matches, "<<g_stats.ednsPingMismatches<<" mismatches, "<<
2126 //g_stats.noPingOutQueries<<" outqueries w/o ping, "<< g_stats.noEdnsOutQueries<<" w/o EDNS"<<endl;
2127
2128 L<<Logger::Notice<<"stats: " << broadcastAccFunction<uint64_t>(pleaseGetPacketCacheSize) <<
2129 " packet cache entries, "<<(int)(100.0*broadcastAccFunction<uint64_t>(pleaseGetPacketCacheHits)/SyncRes::s_queries) << "% packet cache hits"<<endl;
2130
2131 time_t now = time(0);
2132 if(lastOutputTime && lastQueryCount && now != lastOutputTime) {
2133 L<<Logger::Notice<<"stats: "<< (SyncRes::s_queries - lastQueryCount) / (now - lastOutputTime) <<" qps (average over "<< (now - lastOutputTime) << " seconds)"<<endl;
2134 }
2135 lastOutputTime = now;
2136 lastQueryCount = SyncRes::s_queries;
2137 }
2138 else if(statsWanted)
2139 L<<Logger::Notice<<"stats: no stats yet!"<<endl;
2140
2141 statsWanted=false;
2142 }
2143
2144 static void houseKeeping(void *)
2145 {
2146 static thread_local time_t last_stat, last_rootupdate, last_prune, last_secpoll;
2147 static thread_local int cleanCounter=0;
2148 static thread_local bool s_running; // houseKeeping can get suspended in secpoll, and be restarted, which makes us do duplicate work
2149 try {
2150 if(s_running)
2151 return;
2152 s_running=true;
2153
2154 struct timeval now;
2155 Utility::gettimeofday(&now, 0);
2156
2157 if(now.tv_sec - last_prune > (time_t)(5 + t_id)) {
2158 DTime dt;
2159 dt.setTimeval(now);
2160 t_RC->doPrune(g_maxCacheEntries / g_numThreads); // this function is local to a thread, so fine anyhow
2161 t_packetCache->doPruneTo(g_maxPacketCacheEntries / g_numWorkerThreads);
2162
2163 SyncRes::pruneNegCache(g_maxCacheEntries / (g_numWorkerThreads * 10));
2164
2165 if(!((cleanCounter++)%40)) { // this is a full scan!
2166 time_t limit=now.tv_sec-300;
2167 SyncRes::pruneNSSpeeds(limit);
2168 }
2169 last_prune=time(0);
2170 }
2171
2172 if(now.tv_sec - last_rootupdate > 7200) {
2173 int res = SyncRes::getRootNS(g_now, nullptr);
2174 if (!res)
2175 last_rootupdate=now.tv_sec;
2176 }
2177
2178 if (t_id == s_handlerThreadID) {
2179 if(g_statisticsInterval > 0 && now.tv_sec - last_stat >= g_statisticsInterval) {
2180 doStats();
2181 last_stat=time(0);
2182 }
2183 }
2184 else if(t_id == s_distributorThreadID) {
2185
2186 if(now.tv_sec - last_secpoll >= 3600) {
2187 try {
2188 doSecPoll(&last_secpoll);
2189 }
2190 catch(std::exception& e)
2191 {
2192 L<<Logger::Error<<"Exception while performing security poll: "<<e.what()<<endl;
2193 }
2194 catch(PDNSException& e)
2195 {
2196 L<<Logger::Error<<"Exception while performing security poll: "<<e.reason<<endl;
2197 }
2198 catch(ImmediateServFailException &e)
2199 {
2200 L<<Logger::Error<<"Exception while performing security poll: "<<e.reason<<endl;
2201 }
2202 catch(...)
2203 {
2204 L<<Logger::Error<<"Exception while performing security poll"<<endl;
2205 }
2206
2207 }
2208 }
2209 s_running=false;
2210 }
2211 catch(PDNSException& ae)
2212 {
2213 s_running=false;
2214 L<<Logger::Error<<"Fatal error in housekeeping thread: "<<ae.reason<<endl;
2215 throw;
2216 }
2217 }
2218
2219 static void makeThreadPipes()
2220 {
2221 for(unsigned int n=0; n < g_numThreads; ++n) {
2222 struct ThreadPipeSet tps;
2223 int fd[2];
2224 if(pipe(fd) < 0)
2225 unixDie("Creating pipe for inter-thread communications");
2226
2227 tps.readToThread = fd[0];
2228 tps.writeToThread = fd[1];
2229
2230 if(pipe(fd) < 0)
2231 unixDie("Creating pipe for inter-thread communications");
2232 tps.readFromThread = fd[0];
2233 tps.writeFromThread = fd[1];
2234
2235 g_pipes.push_back(tps);
2236 }
2237 }
2238
2239 struct ThreadMSG
2240 {
2241 pipefunc_t func;
2242 bool wantAnswer;
2243 };
2244
2245 void broadcastFunction(const pipefunc_t& func, bool skipSelf)
2246 {
2247 /* This function might be called by the worker with t_id 0 during startup */
2248 if (t_id != s_handlerThreadID && t_id != s_distributorThreadID) {
2249 L<<Logger::Error<<"broadcastFunction() has been called by a worker ("<<t_id<<")"<<endl;
2250 exit(1);
2251 }
2252
2253 int n = 0;
2254 for(ThreadPipeSet& tps : g_pipes)
2255 {
2256 if(n++ == t_id) {
2257 if(!skipSelf)
2258 func(); // don't write to ourselves!
2259 continue;
2260 }
2261
2262 ThreadMSG* tmsg = new ThreadMSG();
2263 tmsg->func = func;
2264 tmsg->wantAnswer = true;
2265 if(write(tps.writeToThread, &tmsg, sizeof(tmsg)) != sizeof(tmsg)) {
2266 delete tmsg;
2267 unixDie("write to thread pipe returned wrong size or error");
2268 }
2269
2270 string* resp;
2271 if(read(tps.readFromThread, &resp, sizeof(resp)) != sizeof(resp))
2272 unixDie("read from thread pipe returned wrong size or error");
2273
2274 if(resp) {
2275 // cerr <<"got response: " << *resp << endl;
2276 delete resp;
2277 }
2278 }
2279 }
2280
2281 void distributeAsyncFunction(const string& packet, const pipefunc_t& func)
2282 {
2283 if (t_id != s_distributorThreadID) {
2284 L<<Logger::Error<<"distributeAsyncFunction() has been called by a worker ("<<t_id<<")"<<endl;
2285 exit(1);
2286 }
2287
2288 unsigned int hash = hashQuestion(packet.c_str(), packet.length(), g_disthashseed);
2289 unsigned int target = 1 + (hash % (g_pipes.size()-1));
2290
2291 if(target == 0) {
2292 L<<Logger::Error<<"distributeAsyncFunction() tried to assign a query to the distributor"<<endl;
2293 exit(1);
2294 }
2295
2296 ThreadPipeSet& tps = g_pipes[target];
2297 ThreadMSG* tmsg = new ThreadMSG();
2298 tmsg->func = func;
2299 tmsg->wantAnswer = false;
2300
2301 if(write(tps.writeToThread, &tmsg, sizeof(tmsg)) != sizeof(tmsg)) {
2302 delete tmsg;
2303 unixDie("write to thread pipe returned wrong size or error");
2304 }
2305 }
2306
2307 static void handlePipeRequest(int fd, FDMultiplexer::funcparam_t& var)
2308 {
2309 ThreadMSG* tmsg = nullptr;
2310
2311 if(read(fd, &tmsg, sizeof(tmsg)) != sizeof(tmsg)) { // fd == readToThread
2312 unixDie("read from thread pipe returned wrong size or error");
2313 }
2314
2315 void *resp=0;
2316 try {
2317 resp = tmsg->func();
2318 }
2319 catch(std::exception& e) {
2320 if(g_logCommonErrors)
2321 L<<Logger::Error<<"PIPE function we executed created exception: "<<e.what()<<endl; // but what if they wanted an answer.. we send 0
2322 }
2323 catch(PDNSException& e) {
2324 if(g_logCommonErrors)
2325 L<<Logger::Error<<"PIPE function we executed created PDNS exception: "<<e.reason<<endl; // but what if they wanted an answer.. we send 0
2326 }
2327 if(tmsg->wantAnswer) {
2328 if(write(g_pipes[t_id].writeFromThread, &resp, sizeof(resp)) != sizeof(resp)) {
2329 delete tmsg;
2330 unixDie("write to thread pipe returned wrong size or error");
2331 }
2332 }
2333
2334 delete tmsg;
2335 }
2336
2337 template<class T> void *voider(const boost::function<T*()>& func)
2338 {
2339 return func();
2340 }
2341
2342 vector<ComboAddress>& operator+=(vector<ComboAddress>&a, const vector<ComboAddress>& b)
2343 {
2344 a.insert(a.end(), b.begin(), b.end());
2345 return a;
2346 }
2347
2348 vector<pair<string, uint16_t> >& operator+=(vector<pair<string, uint16_t> >&a, const vector<pair<string, uint16_t> >& b)
2349 {
2350 a.insert(a.end(), b.begin(), b.end());
2351 return a;
2352 }
2353
2354 vector<pair<DNSName, uint16_t> >& operator+=(vector<pair<DNSName, uint16_t> >&a, const vector<pair<DNSName, uint16_t> >& b)
2355 {
2356 a.insert(a.end(), b.begin(), b.end());
2357 return a;
2358 }
2359
2360
2361 template<class T> T broadcastAccFunction(const boost::function<T*()>& func, bool skipSelf)
2362 {
2363 if (t_id != s_handlerThreadID) {
2364 L<<Logger::Error<<"broadcastFunction has been called by a worker ("<<t_id<<")"<<endl;
2365 exit(1);
2366
2367 }
2368
2369 T ret=T();
2370 for(ThreadPipeSet& tps : g_pipes)
2371 {
2372 ThreadMSG* tmsg = new ThreadMSG();
2373 tmsg->func = boost::bind(voider<T>, func);
2374 tmsg->wantAnswer = true;
2375
2376 if(write(tps.writeToThread, &tmsg, sizeof(tmsg)) != sizeof(tmsg)) {
2377 delete tmsg;
2378 unixDie("write to thread pipe returned wrong size or error");
2379 }
2380
2381 T* resp;
2382 if(read(tps.readFromThread, &resp, sizeof(resp)) != sizeof(resp))
2383 unixDie("read from thread pipe returned wrong size or error");
2384
2385 if(resp) {
2386 //~ cerr <<"got response: " << *resp << endl;
2387 ret += *resp;
2388 delete resp;
2389 }
2390 }
2391 return ret;
2392 }
2393
2394 template string broadcastAccFunction(const boost::function<string*()>& fun, bool skipSelf); // explicit instantiation
2395 template uint64_t broadcastAccFunction(const boost::function<uint64_t*()>& fun, bool skipSelf); // explicit instantiation
2396 template vector<ComboAddress> broadcastAccFunction(const boost::function<vector<ComboAddress> *()>& fun, bool skipSelf); // explicit instantiation
2397 template vector<pair<DNSName,uint16_t> > broadcastAccFunction(const boost::function<vector<pair<DNSName, uint16_t> > *()>& fun, bool skipSelf); // explicit instantiation
2398
2399 static void handleRCC(int fd, FDMultiplexer::funcparam_t& var)
2400 {
2401 string remote;
2402 string msg=s_rcc.recv(&remote);
2403 RecursorControlParser rcp;
2404 RecursorControlParser::func_t* command;
2405
2406 string answer=rcp.getAnswer(msg, &command);
2407
2408 // If we are inside a chroot, we need to strip
2409 if (!arg()["chroot"].empty()) {
2410 size_t len = arg()["chroot"].length();
2411 remote = remote.substr(len);
2412 }
2413
2414 try {
2415 s_rcc.send(answer, &remote);
2416 command();
2417 }
2418 catch(std::exception& e) {
2419 L<<Logger::Error<<"Error dealing with control socket request: "<<e.what()<<endl;
2420 }
2421 catch(PDNSException& ae) {
2422 L<<Logger::Error<<"Error dealing with control socket request: "<<ae.reason<<endl;
2423 }
2424 }
2425
2426 static void handleTCPClientReadable(int fd, FDMultiplexer::funcparam_t& var)
2427 {
2428 PacketID* pident=any_cast<PacketID>(&var);
2429 // cerr<<"handleTCPClientReadable called for fd "<<fd<<", pident->inNeeded: "<<pident->inNeeded<<", "<<pident->sock->getHandle()<<endl;
2430
2431 shared_array<char> buffer(new char[pident->inNeeded]);
2432
2433 ssize_t ret=recv(fd, buffer.get(), pident->inNeeded,0);
2434 if(ret > 0) {
2435 pident->inMSG.append(&buffer[0], &buffer[ret]);
2436 pident->inNeeded-=(size_t)ret;
2437 if(!pident->inNeeded || pident->inIncompleteOkay) {
2438 // cerr<<"Got entire load of "<<pident->inMSG.size()<<" bytes"<<endl;
2439 PacketID pid=*pident;
2440 string msg=pident->inMSG;
2441
2442 t_fdm->removeReadFD(fd);
2443 MT->sendEvent(pid, &msg);
2444 }
2445 else {
2446 // cerr<<"Still have "<<pident->inNeeded<<" left to go"<<endl;
2447 }
2448 }
2449 else {
2450 PacketID tmp=*pident;
2451 t_fdm->removeReadFD(fd); // pident might now be invalid (it isn't, but still)
2452 string empty;
2453 MT->sendEvent(tmp, &empty); // this conveys error status
2454 }
2455 }
2456
2457 static void handleTCPClientWritable(int fd, FDMultiplexer::funcparam_t& var)
2458 {
2459 PacketID* pid=any_cast<PacketID>(&var);
2460 ssize_t ret=send(fd, pid->outMSG.c_str() + pid->outPos, pid->outMSG.size() - pid->outPos,0);
2461 if(ret > 0) {
2462 pid->outPos+=(ssize_t)ret;
2463 if(pid->outPos==pid->outMSG.size()) {
2464 PacketID tmp=*pid;
2465 t_fdm->removeWriteFD(fd);
2466 MT->sendEvent(tmp, &tmp.outMSG); // send back what we sent to convey everything is ok
2467 }
2468 }
2469 else { // error or EOF
2470 PacketID tmp(*pid);
2471 t_fdm->removeWriteFD(fd);
2472 string sent;
2473 MT->sendEvent(tmp, &sent); // we convey error status by sending empty string
2474 }
2475 }
2476
2477 // resend event to everybody chained onto it
2478 static void doResends(MT_t::waiters_t::iterator& iter, PacketID resend, const string& content)
2479 {
2480 if(iter->key.chain.empty())
2481 return;
2482 // cerr<<"doResends called!\n";
2483 for(PacketID::chain_t::iterator i=iter->key.chain.begin(); i != iter->key.chain.end() ; ++i) {
2484 resend.fd=-1;
2485 resend.id=*i;
2486 // cerr<<"\tResending "<<content.size()<<" bytes for fd="<<resend.fd<<" and id="<<resend.id<<endl;
2487
2488 MT->sendEvent(resend, &content);
2489 g_stats.chainResends++;
2490 }
2491 }
2492
2493 static void handleUDPServerResponse(int fd, FDMultiplexer::funcparam_t& var)
2494 {
2495 PacketID pid=any_cast<PacketID>(var);
2496 ssize_t len;
2497 char data[g_outgoingEDNSBufsize];
2498 ComboAddress fromaddr;
2499 socklen_t addrlen=sizeof(fromaddr);
2500
2501 len=recvfrom(fd, data, sizeof(data), 0, (sockaddr *)&fromaddr, &addrlen);
2502
2503 if(len < (ssize_t) sizeof(dnsheader)) {
2504 if(len < 0)
2505 ; // cerr<<"Error on fd "<<fd<<": "<<stringerror()<<"\n";
2506 else {
2507 g_stats.serverParseError++;
2508 if(g_logCommonErrors)
2509 L<<Logger::Error<<"Unable to parse packet from remote UDP server "<< fromaddr.toString() <<
2510 ": packet smaller than DNS header"<<endl;
2511 }
2512
2513 t_udpclientsocks->returnSocket(fd);
2514 string empty;
2515
2516 MT_t::waiters_t::iterator iter=MT->d_waiters.find(pid);
2517 if(iter != MT->d_waiters.end())
2518 doResends(iter, pid, empty);
2519
2520 MT->sendEvent(pid, &empty); // this denotes error (does lookup again.. at least L1 will be hot)
2521 return;
2522 }
2523
2524 dnsheader dh;
2525 memcpy(&dh, data, sizeof(dh));
2526
2527 PacketID pident;
2528 pident.remote=fromaddr;
2529 pident.id=dh.id;
2530 pident.fd=fd;
2531
2532 if(!dh.qr && g_logCommonErrors) {
2533 L<<Logger::Notice<<"Not taking data from question on outgoing socket from "<< fromaddr.toStringWithPort() <<endl;
2534 }
2535
2536 if(!dh.qdcount || // UPC, Nominum, very old BIND on FormErr, NSD
2537 !dh.qr) { // one weird server
2538 pident.domain.clear();
2539 pident.type = 0;
2540 }
2541 else {
2542 try {
2543 if(len > 12)
2544 pident.domain=DNSName(data, len, 12, false, &pident.type); // don't copy this from above - we need to do the actual read
2545 }
2546 catch(std::exception& e) {
2547 g_stats.serverParseError++; // won't be fed to lwres.cc, so we have to increment
2548 L<<Logger::Warning<<"Error in packet from remote nameserver "<< fromaddr.toStringWithPort() << ": "<<e.what() << endl;
2549 return;
2550 }
2551 }
2552 string packet;
2553 packet.assign(data, len);
2554
2555 MT_t::waiters_t::iterator iter=MT->d_waiters.find(pident);
2556 if(iter != MT->d_waiters.end()) {
2557 doResends(iter, pident, packet);
2558 }
2559
2560 retryWithName:
2561
2562 if(!MT->sendEvent(pident, &packet)) {
2563 // we do a full scan for outstanding queries on unexpected answers. not too bad since we only accept them on the right port number, which is hard enough to guess
2564 for(MT_t::waiters_t::iterator mthread=MT->d_waiters.begin(); mthread!=MT->d_waiters.end(); ++mthread) {
2565 if(pident.fd==mthread->key.fd && mthread->key.remote==pident.remote && mthread->key.type == pident.type &&
2566 pident.domain == mthread->key.domain) {
2567 mthread->key.nearMisses++;
2568 }
2569
2570 // be a bit paranoid here since we're weakening our matching
2571 if(pident.domain.empty() && !mthread->key.domain.empty() && !pident.type && mthread->key.type &&
2572 pident.id == mthread->key.id && mthread->key.remote == pident.remote) {
2573 // cerr<<"Empty response, rest matches though, sending to a waiter"<<endl;
2574 pident.domain = mthread->key.domain;
2575 pident.type = mthread->key.type;
2576 goto retryWithName; // note that this only passes on an error, lwres will still reject the packet
2577 }
2578 }
2579 g_stats.unexpectedCount++; // if we made it here, it really is an unexpected answer
2580 if(g_logCommonErrors) {
2581 L<<Logger::Warning<<"Discarding unexpected packet from "<<fromaddr.toStringWithPort()<<": "<< (pident.domain.empty() ? "<empty>" : pident.domain.toString())<<", "<<pident.type<<", "<<MT->d_waiters.size()<<" waiters"<<endl;
2582 }
2583 }
2584 else if(fd >= 0) {
2585 t_udpclientsocks->returnSocket(fd);
2586 }
2587 }
2588
2589 FDMultiplexer* getMultiplexer()
2590 {
2591 FDMultiplexer* ret;
2592 for(const auto& i : FDMultiplexer::getMultiplexerMap()) {
2593 try {
2594 ret=i.second();
2595 return ret;
2596 }
2597 catch(FDMultiplexerException &fe) {
2598 L<<Logger::Error<<"Non-fatal error initializing possible multiplexer ("<<fe.what()<<"), falling back"<<endl;
2599 }
2600 catch(...) {
2601 L<<Logger::Error<<"Non-fatal error initializing possible multiplexer"<<endl;
2602 }
2603 }
2604 L<<Logger::Error<<"No working multiplexer found!"<<endl;
2605 exit(1);
2606 }
2607
2608
2609 static string* doReloadLuaScript()
2610 {
2611 string fname= ::arg()["lua-dns-script"];
2612 try {
2613 if(fname.empty()) {
2614 t_pdl.reset();
2615 L<<Logger::Error<<t_id<<" Unloaded current lua script"<<endl;
2616 return new string("unloaded\n");
2617 }
2618 else {
2619 t_pdl = std::make_shared<RecursorLua4>(fname);
2620 }
2621 }
2622 catch(std::exception& e) {
2623 L<<Logger::Error<<t_id<<" Retaining current script, error from '"<<fname<<"': "<< e.what() <<endl;
2624 return new string("retaining current script, error from '"+fname+"': "+e.what()+"\n");
2625 }
2626
2627 L<<Logger::Warning<<t_id<<" (Re)loaded lua script from '"<<fname<<"'"<<endl;
2628 return new string("(re)loaded '"+fname+"'\n");
2629 }
2630
2631 string doQueueReloadLuaScript(vector<string>::const_iterator begin, vector<string>::const_iterator end)
2632 {
2633 if(begin != end)
2634 ::arg().set("lua-dns-script") = *begin;
2635
2636 return broadcastAccFunction<string>(doReloadLuaScript);
2637 }
2638
2639 static string* pleaseUseNewTraceRegex(const std::string& newRegex)
2640 try
2641 {
2642 if(newRegex.empty()) {
2643 t_traceRegex.reset();
2644 return new string("unset\n");
2645 }
2646 else {
2647 t_traceRegex = std::make_shared<Regex>(newRegex);
2648 return new string("ok\n");
2649 }
2650 }
2651 catch(PDNSException& ae)
2652 {
2653 return new string(ae.reason+"\n");
2654 }
2655
2656 string doTraceRegex(vector<string>::const_iterator begin, vector<string>::const_iterator end)
2657 {
2658 return broadcastAccFunction<string>(boost::bind(pleaseUseNewTraceRegex, begin!=end ? *begin : ""));
2659 }
2660
2661 static void checkLinuxIPv6Limits()
2662 {
2663 #ifdef __linux__
2664 string line;
2665 if(readFileIfThere("/proc/sys/net/ipv6/route/max_size", &line)) {
2666 int lim=std::stoi(line);
2667 if(lim < 16384) {
2668 L<<Logger::Error<<"If using IPv6, please raise sysctl net.ipv6.route.max_size, currently set to "<<lim<<" which is < 16384"<<endl;
2669 }
2670 }
2671 #endif
2672 }
2673 static void checkOrFixFDS()
2674 {
2675 unsigned int availFDs=getFilenumLimit();
2676 unsigned int wantFDs = g_maxMThreads * g_numWorkerThreads +25; // even healthier margin then before
2677
2678 if(wantFDs > availFDs) {
2679 unsigned int hardlimit= getFilenumLimit(true);
2680 if(hardlimit >= wantFDs) {
2681 setFilenumLimit(wantFDs);
2682 L<<Logger::Warning<<"Raised soft limit on number of filedescriptors to "<<wantFDs<<" to match max-mthreads and threads settings"<<endl;
2683 }
2684 else {
2685 int newval = (hardlimit - 25) / g_numWorkerThreads;
2686 L<<Logger::Warning<<"Insufficient number of filedescriptors available for max-mthreads*threads setting! ("<<hardlimit<<" < "<<wantFDs<<"), reducing max-mthreads to "<<newval<<endl;
2687 g_maxMThreads = newval;
2688 setFilenumLimit(hardlimit);
2689 }
2690 }
2691 }
2692
2693 static void* recursorThread(int tid, bool worker);
2694
2695 static void* pleaseSupplantACLs(std::shared_ptr<NetmaskGroup> ng)
2696 {
2697 t_allowFrom = ng;
2698 return nullptr;
2699 }
2700
2701 int g_argc;
2702 char** g_argv;
2703
2704 void parseACLs()
2705 {
2706 static bool l_initialized;
2707
2708 if(l_initialized) { // only reload configuration file on second call
2709 string configname=::arg()["config-dir"]+"/recursor.conf";
2710 if(::arg()["config-name"]!="") {
2711 configname=::arg()["config-dir"]+"/recursor-"+::arg()["config-name"]+".conf";
2712 }
2713 cleanSlashes(configname);
2714
2715 if(!::arg().preParseFile(configname.c_str(), "allow-from-file"))
2716 throw runtime_error("Unable to re-parse configuration file '"+configname+"'");
2717 ::arg().preParseFile(configname.c_str(), "allow-from", LOCAL_NETS);
2718 ::arg().preParseFile(configname.c_str(), "include-dir");
2719 ::arg().preParse(g_argc, g_argv, "include-dir");
2720
2721 // then process includes
2722 std::vector<std::string> extraConfigs;
2723 ::arg().gatherIncludes(extraConfigs);
2724
2725 for(const std::string& fn : extraConfigs) {
2726 if(!::arg().preParseFile(fn.c_str(), "allow-from-file", ::arg()["allow-from-file"]))
2727 throw runtime_error("Unable to re-parse configuration file include '"+fn+"'");
2728 if(!::arg().preParseFile(fn.c_str(), "allow-from", ::arg()["allow-from"]))
2729 throw runtime_error("Unable to re-parse configuration file include '"+fn+"'");
2730 }
2731
2732 ::arg().preParse(g_argc, g_argv, "allow-from-file");
2733 ::arg().preParse(g_argc, g_argv, "allow-from");
2734 }
2735
2736 std::shared_ptr<NetmaskGroup> oldAllowFrom = t_allowFrom;
2737 std::shared_ptr<NetmaskGroup> allowFrom = std::make_shared<NetmaskGroup>();
2738
2739 if(!::arg()["allow-from-file"].empty()) {
2740 string line;
2741 ifstream ifs(::arg()["allow-from-file"].c_str());
2742 if(!ifs) {
2743 throw runtime_error("Could not open '"+::arg()["allow-from-file"]+"': "+stringerror());
2744 }
2745
2746 string::size_type pos;
2747 while(getline(ifs,line)) {
2748 pos=line.find('#');
2749 if(pos!=string::npos)
2750 line.resize(pos);
2751 trim(line);
2752 if(line.empty())
2753 continue;
2754
2755 allowFrom->addMask(line);
2756 }
2757 L<<Logger::Warning<<"Done parsing " << allowFrom->size() <<" allow-from ranges from file '"<<::arg()["allow-from-file"]<<"' - overriding 'allow-from' setting"<<endl;
2758 }
2759 else if(!::arg()["allow-from"].empty()) {
2760 vector<string> ips;
2761 stringtok(ips, ::arg()["allow-from"], ", ");
2762
2763 L<<Logger::Warning<<"Only allowing queries from: ";
2764 for(vector<string>::const_iterator i = ips.begin(); i!= ips.end(); ++i) {
2765 allowFrom->addMask(*i);
2766 if(i!=ips.begin())
2767 L<<Logger::Warning<<", ";
2768 L<<Logger::Warning<<*i;
2769 }
2770 L<<Logger::Warning<<endl;
2771 }
2772 else {
2773 if(::arg()["local-address"]!="127.0.0.1" && ::arg().asNum("local-port")==53)
2774 L<<Logger::Error<<"WARNING: Allowing queries from all IP addresses - this can be a security risk!"<<endl;
2775 allowFrom = nullptr;
2776 }
2777
2778 g_initialAllowFrom = allowFrom;
2779 broadcastFunction(boost::bind(pleaseSupplantACLs, allowFrom));
2780 oldAllowFrom = nullptr;
2781
2782 l_initialized = true;
2783 }
2784
2785
2786 static void setupDelegationOnly()
2787 {
2788 vector<string> parts;
2789 stringtok(parts, ::arg()["delegation-only"], ", \t");
2790 for(const auto& p : parts) {
2791 SyncRes::addDelegationOnly(DNSName(p));
2792 }
2793 }
2794
2795 static std::map<unsigned int, std::set<int> > parseCPUMap()
2796 {
2797 std::map<unsigned int, std::set<int> > result;
2798
2799 const std::string value = ::arg()["cpu-map"];
2800
2801 if (!value.empty() && !isSettingThreadCPUAffinitySupported()) {
2802 L<<Logger::Warning<<"CPU mapping requested but not supported, skipping"<<endl;
2803 return result;
2804 }
2805
2806 std::vector<std::string> parts;
2807
2808 stringtok(parts, value, " \t");
2809
2810 for(const auto& part : parts) {
2811 if (part.find('=') == string::npos)
2812 continue;
2813
2814 try {
2815 auto headers = splitField(part, '=');
2816 trim(headers.first);
2817 trim(headers.second);
2818
2819 unsigned int threadId = pdns_stou(headers.first);
2820 std::vector<std::string> cpus;
2821
2822 stringtok(cpus, headers.second, ",");
2823
2824 for(const auto& cpu : cpus) {
2825 int cpuId = std::stoi(cpu);
2826
2827 result[threadId].insert(cpuId);
2828 }
2829 }
2830 catch(const std::exception& e) {
2831 L<<Logger::Error<<"Error parsing cpu-map entry '"<<part<<"': "<<e.what()<<endl;
2832 }
2833 }
2834
2835 return result;
2836 }
2837
2838 static void setCPUMap(const std::map<unsigned int, std::set<int> >& cpusMap, unsigned int n, pthread_t tid)
2839 {
2840 const auto& cpuMapping = cpusMap.find(n);
2841 if (cpuMapping != cpusMap.cend()) {
2842 int rc = mapThreadToCPUList(tid, cpuMapping->second);
2843 if (rc == 0) {
2844 L<<Logger::Info<<"CPU affinity for worker "<<n<<" has been set to CPU map:";
2845 for (const auto cpu : cpuMapping->second) {
2846 L<<Logger::Info<<" "<<cpu;
2847 }
2848 L<<Logger::Info<<endl;
2849 }
2850 else {
2851 L<<Logger::Warning<<"Error setting CPU affinity for worker "<<n<<" to CPU map:";
2852 for (const auto cpu : cpuMapping->second) {
2853 L<<Logger::Info<<" "<<cpu;
2854 }
2855 L<<Logger::Info<<strerror(rc)<<endl;
2856 }
2857 }
2858 }
2859
2860 static int serviceMain(int argc, char*argv[])
2861 {
2862 L.setName(s_programname);
2863 L.disableSyslog(::arg().mustDo("disable-syslog"));
2864 L.setTimestamps(::arg().mustDo("log-timestamp"));
2865
2866 if(!::arg()["logging-facility"].empty()) {
2867 int val=logFacilityToLOG(::arg().asNum("logging-facility") );
2868 if(val >= 0)
2869 theL().setFacility(val);
2870 else
2871 L<<Logger::Error<<"Unknown logging facility "<<::arg().asNum("logging-facility") <<endl;
2872 }
2873
2874 showProductVersion();
2875 seedRandom(::arg()["entropy-source"]);
2876
2877 g_disthashseed=dns_random(0xffffffff);
2878
2879 checkLinuxIPv6Limits();
2880 try {
2881 vector<string> addrs;
2882 if(!::arg()["query-local-address6"].empty()) {
2883 SyncRes::s_doIPv6=true;
2884 L<<Logger::Warning<<"Enabling IPv6 transport for outgoing queries"<<endl;
2885
2886 stringtok(addrs, ::arg()["query-local-address6"], ", ;");
2887 for(const string& addr : addrs) {
2888 g_localQueryAddresses6.push_back(ComboAddress(addr));
2889 }
2890 }
2891 else {
2892 L<<Logger::Warning<<"NOT using IPv6 for outgoing queries - set 'query-local-address6=::' to enable"<<endl;
2893 }
2894 addrs.clear();
2895 stringtok(addrs, ::arg()["query-local-address"], ", ;");
2896 for(const string& addr : addrs) {
2897 g_localQueryAddresses4.push_back(ComboAddress(addr));
2898 }
2899 }
2900 catch(std::exception& e) {
2901 L<<Logger::Error<<"Assigning local query addresses: "<<e.what();
2902 exit(99);
2903 }
2904
2905 // keep this ABOVE loadRecursorLuaConfig!
2906 if(::arg()["dnssec"]=="off")
2907 g_dnssecmode=DNSSECMode::Off;
2908 else if(::arg()["dnssec"]=="process-no-validate")
2909 g_dnssecmode=DNSSECMode::ProcessNoValidate;
2910 else if(::arg()["dnssec"]=="process")
2911 g_dnssecmode=DNSSECMode::Process;
2912 else if(::arg()["dnssec"]=="validate")
2913 g_dnssecmode=DNSSECMode::ValidateAll;
2914 else if(::arg()["dnssec"]=="log-fail")
2915 g_dnssecmode=DNSSECMode::ValidateForLog;
2916 else {
2917 L<<Logger::Error<<"Unknown DNSSEC mode "<<::arg()["dnssec"]<<endl;
2918 exit(1);
2919 }
2920
2921 g_dnssecLogBogus = ::arg().mustDo("dnssec-log-bogus");
2922 g_maxNSEC3Iterations = ::arg().asNum("nsec3-max-iterations");
2923
2924 g_maxCacheEntries = ::arg().asNum("max-cache-entries");
2925 g_maxPacketCacheEntries = ::arg().asNum("max-packetcache-entries");
2926
2927 try {
2928 loadRecursorLuaConfig(::arg()["lua-config-file"], ::arg().mustDo("daemon"));
2929 }
2930 catch (PDNSException &e) {
2931 L<<Logger::Error<<"Cannot load Lua configuration: "<<e.reason<<endl;
2932 exit(1);
2933 }
2934
2935 parseACLs();
2936 sortPublicSuffixList();
2937
2938 if(!::arg()["dont-query"].empty()) {
2939 vector<string> ips;
2940 stringtok(ips, ::arg()["dont-query"], ", ");
2941 ips.push_back("0.0.0.0");
2942 ips.push_back("::");
2943
2944 L<<Logger::Warning<<"Will not send queries to: ";
2945 for(vector<string>::const_iterator i = ips.begin(); i!= ips.end(); ++i) {
2946 SyncRes::addDontQuery(*i);
2947 if(i!=ips.begin())
2948 L<<Logger::Warning<<", ";
2949 L<<Logger::Warning<<*i;
2950 }
2951 L<<Logger::Warning<<endl;
2952 }
2953
2954 g_quiet=::arg().mustDo("quiet");
2955
2956 g_weDistributeQueries = ::arg().mustDo("pdns-distributes-queries");
2957 if(g_weDistributeQueries) {
2958 L<<Logger::Warning<<"PowerDNS Recursor itself will distribute queries over threads"<<endl;
2959 }
2960
2961 setupDelegationOnly();
2962 g_outgoingEDNSBufsize=::arg().asNum("edns-outgoing-bufsize");
2963
2964 if(::arg()["trace"]=="fail") {
2965 SyncRes::setDefaultLogMode(SyncRes::Store);
2966 }
2967 else if(::arg().mustDo("trace")) {
2968 SyncRes::setDefaultLogMode(SyncRes::Log);
2969 ::arg().set("quiet")="no";
2970 g_quiet=false;
2971 g_dnssecLOG=true;
2972 }
2973
2974 SyncRes::s_minimumTTL = ::arg().asNum("minimum-ttl-override");
2975
2976 SyncRes::s_nopacketcache = ::arg().mustDo("disable-packetcache");
2977
2978 SyncRes::s_maxnegttl=::arg().asNum("max-negative-ttl");
2979 SyncRes::s_maxcachettl=max(::arg().asNum("max-cache-ttl"), 15);
2980 SyncRes::s_packetcachettl=::arg().asNum("packetcache-ttl");
2981 // Cap the packetcache-servfail-ttl to the packetcache-ttl
2982 uint32_t packetCacheServFailTTL = ::arg().asNum("packetcache-servfail-ttl");
2983 SyncRes::s_packetcacheservfailttl=(packetCacheServFailTTL > SyncRes::s_packetcachettl) ? SyncRes::s_packetcachettl : packetCacheServFailTTL;
2984 SyncRes::s_serverdownmaxfails=::arg().asNum("server-down-max-fails");
2985 SyncRes::s_serverdownthrottletime=::arg().asNum("server-down-throttle-time");
2986 SyncRes::s_serverID=::arg()["server-id"];
2987 SyncRes::s_maxqperq=::arg().asNum("max-qperq");
2988 SyncRes::s_maxtotusec=1000*::arg().asNum("max-total-msec");
2989 SyncRes::s_maxdepth=::arg().asNum("max-recursion-depth");
2990 SyncRes::s_rootNXTrust = ::arg().mustDo( "root-nx-trust");
2991 if(SyncRes::s_serverID.empty()) {
2992 char tmp[128];
2993 gethostname(tmp, sizeof(tmp)-1);
2994 SyncRes::s_serverID=tmp;
2995 }
2996
2997 SyncRes::s_ecsipv4limit = ::arg().asNum("ecs-ipv4-bits");
2998 SyncRes::s_ecsipv6limit = ::arg().asNum("ecs-ipv6-bits");
2999
3000 if (!::arg().isEmpty("ecs-scope-zero-address")) {
3001 ComboAddress scopeZero(::arg()["ecs-scope-zero-address"]);
3002 SyncRes::setECSScopeZeroAddress(Netmask(scopeZero, scopeZero.isIPv4() ? 32 : 128));
3003 }
3004 else {
3005 bool found = false;
3006 for (const auto& addr : g_localQueryAddresses4) {
3007 if (!IsAnyAddress(addr)) {
3008 SyncRes::setECSScopeZeroAddress(Netmask(addr, 32));
3009 found = true;
3010 break;
3011 }
3012 }
3013 if (!found) {
3014 for (const auto& addr : g_localQueryAddresses6) {
3015 if (!IsAnyAddress(addr)) {
3016 SyncRes::setECSScopeZeroAddress(Netmask(addr, 128));
3017 found = true;
3018 break;
3019 }
3020 }
3021 if (!found) {
3022 SyncRes::setECSScopeZeroAddress(Netmask("127.0.0.1/32"));
3023 }
3024 }
3025 }
3026
3027 g_networkTimeoutMsec = ::arg().asNum("network-timeout");
3028
3029 g_initialDomainMap = parseAuthAndForwards();
3030
3031 g_latencyStatSize=::arg().asNum("latency-statistic-size");
3032
3033 g_logCommonErrors=::arg().mustDo("log-common-errors");
3034 g_logRPZChanges = ::arg().mustDo("log-rpz-changes");
3035
3036 g_anyToTcp = ::arg().mustDo("any-to-tcp");
3037 g_udpTruncationThreshold = ::arg().asNum("udp-truncation-threshold");
3038
3039 g_lowercaseOutgoing = ::arg().mustDo("lowercase-outgoing");
3040
3041 g_numWorkerThreads = ::arg().asNum("threads");
3042 if (g_numWorkerThreads < 1) {
3043 L<<Logger::Warning<<"Asked to run with 0 threads, raising to 1 instead"<<endl;
3044 g_numWorkerThreads = 1;
3045 }
3046
3047 g_numThreads = g_numWorkerThreads + g_weDistributeQueries;
3048 g_maxMThreads = ::arg().asNum("max-mthreads");
3049
3050 g_gettagNeedsEDNSOptions = ::arg().mustDo("gettag-needs-edns-options");
3051
3052 g_statisticsInterval = ::arg().asNum("statistics-interval");
3053
3054 #ifdef SO_REUSEPORT
3055 g_reusePort = ::arg().mustDo("reuseport");
3056 #endif
3057
3058 g_useOneSocketPerThread = (!g_weDistributeQueries && g_reusePort);
3059
3060 if (g_useOneSocketPerThread) {
3061 for (unsigned int threadId = 0; threadId < g_numWorkerThreads; threadId++) {
3062 makeUDPServerSockets(threadId);
3063 makeTCPServerSockets(threadId);
3064 }
3065 }
3066 else {
3067 makeUDPServerSockets(0);
3068 makeTCPServerSockets(0);
3069 }
3070
3071 SyncRes::parseEDNSSubnetWhitelist(::arg()["edns-subnet-whitelist"]);
3072 g_useIncomingECS = ::arg().mustDo("use-incoming-edns-subnet");
3073
3074 int forks;
3075 for(forks = 0; forks < ::arg().asNum("processes") - 1; ++forks) {
3076 if(!fork()) // we are child
3077 break;
3078 }
3079
3080 if(::arg().mustDo("daemon")) {
3081 L<<Logger::Warning<<"Calling daemonize, going to background"<<endl;
3082 L.toConsole(Logger::Critical);
3083 daemonize();
3084 loadRecursorLuaConfig(::arg()["lua-config-file"], false);
3085 }
3086 signal(SIGUSR1,usr1Handler);
3087 signal(SIGUSR2,usr2Handler);
3088 signal(SIGPIPE,SIG_IGN);
3089
3090 checkOrFixFDS();
3091
3092 #ifdef HAVE_LIBSODIUM
3093 if (sodium_init() == -1) {
3094 L<<Logger::Error<<"Unable to initialize sodium crypto library"<<endl;
3095 exit(99);
3096 }
3097 #endif
3098
3099 openssl_thread_setup();
3100 openssl_seed();
3101
3102 int newgid=0;
3103 if(!::arg()["setgid"].empty())
3104 newgid=Utility::makeGidNumeric(::arg()["setgid"]);
3105 int newuid=0;
3106 if(!::arg()["setuid"].empty())
3107 newuid=Utility::makeUidNumeric(::arg()["setuid"]);
3108
3109 Utility::dropGroupPrivs(newuid, newgid);
3110
3111 if (!::arg()["chroot"].empty()) {
3112 #ifdef HAVE_SYSTEMD
3113 char *ns;
3114 ns = getenv("NOTIFY_SOCKET");
3115 if (ns != nullptr) {
3116 L<<Logger::Error<<"Unable to chroot when running from systemd. Please disable chroot= or set the 'Type' for this service to 'simple'"<<endl;
3117 exit(1);
3118 }
3119 #endif
3120 if (chroot(::arg()["chroot"].c_str())<0 || chdir("/") < 0) {
3121 L<<Logger::Error<<"Unable to chroot to '"+::arg()["chroot"]+"': "<<strerror (errno)<<", exiting"<<endl;
3122 exit(1);
3123 }
3124 else
3125 L<<Logger::Error<<"Chrooted to '"<<::arg()["chroot"]<<"'"<<endl;
3126 }
3127
3128 s_pidfname=::arg()["socket-dir"]+"/"+s_programname+".pid";
3129 if(!s_pidfname.empty())
3130 unlink(s_pidfname.c_str()); // remove possible old pid file
3131 writePid();
3132
3133 makeControlChannelSocket( ::arg().asNum("processes") > 1 ? forks : -1);
3134
3135 Utility::dropUserPrivs(newuid);
3136
3137 makeThreadPipes();
3138
3139 g_tcpTimeout=::arg().asNum("client-tcp-timeout");
3140 g_maxTCPPerClient=::arg().asNum("max-tcp-per-client");
3141 g_tcpMaxQueriesPerConn=::arg().asNum("max-tcp-queries-per-connection");
3142
3143 if (::arg().mustDo("snmp-agent")) {
3144 g_snmpAgent = std::make_shared<RecursorSNMPAgent>("recursor", ::arg()["snmp-master-socket"]);
3145 g_snmpAgent->run();
3146 }
3147
3148 /* This thread handles the web server, carbon, statistics and the control channel */
3149 std::thread handlerThread(recursorThread, s_handlerThreadID, false);
3150
3151 const auto cpusMap = parseCPUMap();
3152
3153 std::vector<std::thread> workers(g_numThreads);
3154 if(g_numThreads == 1) {
3155 L<<Logger::Warning<<"Operating unthreaded"<<endl;
3156 #ifdef HAVE_SYSTEMD
3157 sd_notify(0, "READY=1");
3158 #endif
3159 setCPUMap(cpusMap, 0, pthread_self());
3160 recursorThread(0, true);
3161 }
3162 else {
3163 L<<Logger::Warning<<"Launching "<< g_numThreads <<" threads"<<endl;
3164 for(unsigned int n=0; n < g_numThreads; ++n) {
3165 workers[n] = std::thread(recursorThread, n, true);
3166
3167 setCPUMap(cpusMap, n, workers[n].native_handle());
3168 }
3169 #ifdef HAVE_SYSTEMD
3170 sd_notify(0, "READY=1");
3171 #endif
3172 workers.back().join();
3173 }
3174 return 0;
3175 }
3176
3177 static void* recursorThread(int n, bool worker)
3178 try
3179 {
3180 t_id=n;
3181 SyncRes tmp(g_now); // make sure it allocates tsstorage before we do anything, like primeHints or so..
3182 SyncRes::setDomainMap(g_initialDomainMap);
3183 t_allowFrom = g_initialAllowFrom;
3184 t_udpclientsocks = std::unique_ptr<UDPClientSocks>(new UDPClientSocks());
3185 t_tcpClientCounts = std::unique_ptr<tcpClientCounts_t>(new tcpClientCounts_t());
3186 primeHints();
3187
3188 t_packetCache = std::unique_ptr<RecursorPacketCache>(new RecursorPacketCache());
3189
3190 #ifdef HAVE_PROTOBUF
3191 t_uuidGenerator = std::unique_ptr<boost::uuids::random_generator>(new boost::uuids::random_generator());
3192 #endif
3193 L<<Logger::Warning<<"Done priming cache with root hints"<<endl;
3194
3195 try {
3196 if(!::arg()["lua-dns-script"].empty()) {
3197 t_pdl = std::make_shared<RecursorLua4>(::arg()["lua-dns-script"]);
3198 L<<Logger::Warning<<"Loaded 'lua' script from '"<<::arg()["lua-dns-script"]<<"'"<<endl;
3199 }
3200 }
3201 catch(std::exception &e) {
3202 L<<Logger::Error<<"Failed to load 'lua' script from '"<<::arg()["lua-dns-script"]<<"': "<<e.what()<<endl;
3203 _exit(99);
3204 }
3205
3206 unsigned int ringsize=::arg().asNum("stats-ringbuffer-entries") / g_numWorkerThreads;
3207 if(ringsize) {
3208 t_remotes = std::unique_ptr<addrringbuf_t>(new addrringbuf_t());
3209 if(g_weDistributeQueries) // if so, only 1 thread does recvfrom
3210 t_remotes->set_capacity(::arg().asNum("stats-ringbuffer-entries"));
3211 else
3212 t_remotes->set_capacity(ringsize);
3213 t_servfailremotes = std::unique_ptr<addrringbuf_t>(new addrringbuf_t());
3214 t_servfailremotes->set_capacity(ringsize);
3215 t_largeanswerremotes = std::unique_ptr<addrringbuf_t>(new addrringbuf_t());
3216 t_largeanswerremotes->set_capacity(ringsize);
3217
3218 t_queryring = std::unique_ptr<boost::circular_buffer<pair<DNSName, uint16_t> > >(new boost::circular_buffer<pair<DNSName, uint16_t> >());
3219 t_queryring->set_capacity(ringsize);
3220 t_servfailqueryring = std::unique_ptr<boost::circular_buffer<pair<DNSName, uint16_t> > >(new boost::circular_buffer<pair<DNSName, uint16_t> >());
3221 t_servfailqueryring->set_capacity(ringsize);
3222 }
3223
3224 MT=std::unique_ptr<MTasker<PacketID,string> >(new MTasker<PacketID,string>(::arg().asNum("stack-size")));
3225
3226 PacketID pident;
3227
3228 t_fdm=getMultiplexer();
3229
3230 if(!worker) {
3231 if(::arg().mustDo("webserver")) {
3232 L<<Logger::Warning << "Enabling web server" << endl;
3233 try {
3234 new RecursorWebServer(t_fdm);
3235 }
3236 catch(PDNSException &e) {
3237 L<<Logger::Error<<"Exception: "<<e.reason<<endl;
3238 exit(99);
3239 }
3240 }
3241 L<<Logger::Error<<"Enabled '"<< t_fdm->getName() << "' multiplexer"<<endl;
3242 }
3243 else {
3244 t_fdm->addReadFD(g_pipes[t_id].readToThread, handlePipeRequest);
3245
3246 if(g_useOneSocketPerThread) {
3247 for(deferredAdd_t::const_iterator i = deferredAdds[t_id].cbegin(); i != deferredAdds[t_id].cend(); ++i) {
3248 t_fdm->addReadFD(i->first, i->second);
3249 }
3250 }
3251 else {
3252 if(!g_weDistributeQueries || t_id == s_distributorThreadID) { // if we distribute queries, only t_id = 0 listens
3253 for(deferredAdd_t::const_iterator i = deferredAdds[0].cbegin(); i != deferredAdds[0].cend(); ++i) {
3254 t_fdm->addReadFD(i->first, i->second);
3255 }
3256 }
3257 }
3258 }
3259
3260 registerAllStats();
3261
3262 if(!worker) {
3263 t_fdm->addReadFD(s_rcc.d_fd, handleRCC); // control channel
3264 }
3265
3266 unsigned int maxTcpClients=::arg().asNum("max-tcp-clients");
3267
3268 bool listenOnTCP(true);
3269
3270 time_t last_carbon=0;
3271 time_t carbonInterval=::arg().asNum("carbon-interval");
3272 counter.store(0); // used to periodically execute certain tasks
3273 for(;;) {
3274 while(MT->schedule(&g_now)); // MTasker letting the mthreads do their thing
3275
3276 if(!(counter%500)) {
3277 MT->makeThread(houseKeeping, 0);
3278 }
3279
3280 if(!(counter%55)) {
3281 typedef vector<pair<int, FDMultiplexer::funcparam_t> > expired_t;
3282 expired_t expired=t_fdm->getTimeouts(g_now);
3283
3284 for(expired_t::iterator i=expired.begin() ; i != expired.end(); ++i) {
3285 shared_ptr<TCPConnection> conn=any_cast<shared_ptr<TCPConnection> >(i->second);
3286 if(g_logCommonErrors)
3287 L<<Logger::Warning<<"Timeout from remote TCP client "<< conn->d_remote.toString() <<endl;
3288 t_fdm->removeReadFD(i->first);
3289 }
3290 }
3291
3292 counter++;
3293
3294 if(!worker && statsWanted) {
3295 doStats();
3296 }
3297
3298 Utility::gettimeofday(&g_now, 0);
3299
3300 if(!worker && (g_now.tv_sec - last_carbon >= carbonInterval)) {
3301 MT->makeThread(doCarbonDump, 0);
3302 last_carbon = g_now.tv_sec;
3303 }
3304
3305 t_fdm->run(&g_now);
3306 // 'run' updates g_now for us
3307
3308 if(worker && (!g_weDistributeQueries || t_id == s_distributorThreadID)) { // if pdns distributes queries, only tid 0 should do this
3309 if(listenOnTCP) {
3310 if(TCPConnection::getCurrentConnections() > maxTcpClients) { // shutdown, too many connections
3311 for(tcpListenSockets_t::iterator i=g_tcpListenSockets.begin(); i != g_tcpListenSockets.end(); ++i)
3312 t_fdm->removeReadFD(*i);
3313 listenOnTCP=false;
3314 }
3315 }
3316 else {
3317 if(TCPConnection::getCurrentConnections() <= maxTcpClients) { // reenable
3318 for(tcpListenSockets_t::iterator i=g_tcpListenSockets.begin(); i != g_tcpListenSockets.end(); ++i)
3319 t_fdm->addReadFD(*i, handleNewTCPQuestion);
3320 listenOnTCP=true;
3321 }
3322 }
3323 }
3324 }
3325 }
3326 catch(PDNSException &ae) {
3327 L<<Logger::Error<<"Exception: "<<ae.reason<<endl;
3328 return 0;
3329 }
3330 catch(std::exception &e) {
3331 L<<Logger::Error<<"STL Exception: "<<e.what()<<endl;
3332 return 0;
3333 }
3334 catch(...) {
3335 L<<Logger::Error<<"any other exception in main: "<<endl;
3336 return 0;
3337 }
3338
3339
3340 int main(int argc, char **argv)
3341 {
3342 g_argc = argc;
3343 g_argv = argv;
3344 g_stats.startupTime=time(0);
3345 versionSetProduct(ProductRecursor);
3346 reportBasicTypes();
3347 reportOtherTypes();
3348
3349 int ret = EXIT_SUCCESS;
3350
3351 try {
3352 ::arg().set("stack-size","stack size per mthread")="200000";
3353 ::arg().set("soa-minimum-ttl","Don't change")="0";
3354 ::arg().set("no-shuffle","Don't change")="off";
3355 ::arg().set("local-port","port to listen on")="53";
3356 ::arg().set("local-address","IP addresses to listen on, separated by spaces or commas. Also accepts ports.")="127.0.0.1";
3357 ::arg().setSwitch("non-local-bind", "Enable binding to non-local addresses by using FREEBIND / BINDANY socket options")="no";
3358 ::arg().set("trace","if we should output heaps of logging. set to 'fail' to only log failing domains")="off";
3359 ::arg().set("dnssec", "DNSSEC mode: off/process-no-validate (default)/process/log-fail/validate")="process-no-validate";
3360 ::arg().set("dnssec-log-bogus", "Log DNSSEC bogus validations")="no";
3361 ::arg().set("daemon","Operate as a daemon")="no";
3362 ::arg().setSwitch("write-pid","Write a PID file")="yes";
3363 ::arg().set("loglevel","Amount of logging. Higher is more. Do not set below 3")="6";
3364 ::arg().set("disable-syslog","Disable logging to syslog, useful when running inside a supervisor that logs stdout")="no";
3365 ::arg().set("log-timestamp","Print timestamps in log lines, useful to disable when running with a tool that timestamps stdout already")="yes";
3366 ::arg().set("log-common-errors","If we should log rather common errors")="no";
3367 ::arg().set("chroot","switch to chroot jail")="";
3368 ::arg().set("setgid","If set, change group id to this gid for more security")="";
3369 ::arg().set("setuid","If set, change user id to this uid for more security")="";
3370 ::arg().set("network-timeout", "Wait this number of milliseconds for network i/o")="1500";
3371 ::arg().set("threads", "Launch this number of threads")="2";
3372 ::arg().set("processes", "Launch this number of processes (EXPERIMENTAL, DO NOT CHANGE)")="1"; // if we un-experimental this, need to fix openssl rand seeding for multiple PIDs!
3373 ::arg().set("config-name","Name of this virtual configuration - will rename the binary image")="";
3374 ::arg().set("api-config-dir", "Directory where REST API stores config and zones") = "";
3375 ::arg().set("api-key", "Static pre-shared authentication key for access to the REST API") = "";
3376 ::arg().set("api-logfile", "Location of the server logfile (used by the REST API)") = "/var/log/pdns.log";
3377 ::arg().set("api-readonly", "Disallow data modification through the REST API when set") = "no";
3378 ::arg().setSwitch("webserver", "Start a webserver (for REST API)") = "no";
3379 ::arg().set("webserver-address", "IP Address of webserver to listen on") = "127.0.0.1";
3380 ::arg().set("webserver-port", "Port of webserver to listen on") = "8082";
3381 ::arg().set("webserver-password", "Password required for accessing the webserver") = "";
3382 ::arg().set("webserver-allow-from","Webserver access is only allowed from these subnets")="127.0.0.1,::1";
3383 ::arg().set("carbon-ourname", "If set, overrides our reported hostname for carbon stats")="";
3384 ::arg().set("carbon-server", "If set, send metrics in carbon (graphite) format to this server IP address")="";
3385 ::arg().set("carbon-interval", "Number of seconds between carbon (graphite) updates")="30";
3386 ::arg().set("statistics-interval", "Number of seconds between printing of recursor statistics, 0 to disable")="1800";
3387 ::arg().set("quiet","Suppress logging of questions and answers")="";
3388 ::arg().set("logging-facility","Facility to log messages as. 0 corresponds to local0")="";
3389 ::arg().set("config-dir","Location of configuration directory (recursor.conf)")=SYSCONFDIR;
3390 ::arg().set("socket-owner","Owner of socket")="";
3391 ::arg().set("socket-group","Group of socket")="";
3392 ::arg().set("socket-mode", "Permissions for socket")="";
3393
3394 ::arg().set("socket-dir",string("Where the controlsocket will live, ")+LOCALSTATEDIR+" when unset and not chrooted" )="";
3395 ::arg().set("delegation-only","Which domains we only accept delegations from")="";
3396 ::arg().set("query-local-address","Source IP address for sending queries")="0.0.0.0";
3397 ::arg().set("query-local-address6","Source IPv6 address for sending queries. IF UNSET, IPv6 WILL NOT BE USED FOR OUTGOING QUERIES")="";
3398 ::arg().set("client-tcp-timeout","Timeout in seconds when talking to TCP clients")="2";
3399 ::arg().set("max-mthreads", "Maximum number of simultaneous Mtasker threads")="2048";
3400 ::arg().set("max-tcp-clients","Maximum number of simultaneous TCP clients")="128";
3401 ::arg().set("server-down-max-fails","Maximum number of consecutive timeouts (and unreachables) to mark a server as down ( 0 => disabled )")="64";
3402 ::arg().set("server-down-throttle-time","Number of seconds to throttle all queries to a server after being marked as down")="60";
3403 ::arg().set("hint-file", "If set, load root hints from this file")="";
3404 ::arg().set("max-cache-entries", "If set, maximum number of entries in the main cache")="1000000";
3405 ::arg().set("max-negative-ttl", "maximum number of seconds to keep a negative cached entry in memory")="3600";
3406 ::arg().set("max-cache-ttl", "maximum number of seconds to keep a cached entry in memory")="86400";
3407 ::arg().set("packetcache-ttl", "maximum number of seconds to keep a cached entry in packetcache")="3600";
3408 ::arg().set("max-packetcache-entries", "maximum number of entries to keep in the packetcache")="500000";
3409 ::arg().set("packetcache-servfail-ttl", "maximum number of seconds to keep a cached servfail entry in packetcache")="60";
3410 ::arg().set("server-id", "Returned when queried for 'id.server' TXT or NSID, defaults to hostname")="";
3411 ::arg().set("stats-ringbuffer-entries", "maximum number of packets to store statistics for")="10000";
3412 ::arg().set("version-string", "string reported on version.pdns or version.bind")=fullVersionString();
3413 ::arg().set("allow-from", "If set, only allow these comma separated netmasks to recurse")=LOCAL_NETS;
3414 ::arg().set("allow-from-file", "If set, load allowed netmasks from this file")="";
3415 ::arg().set("entropy-source", "If set, read entropy from this file")="/dev/urandom";
3416 ::arg().set("dont-query", "If set, do not query these netmasks for DNS data")=DONT_QUERY;
3417 ::arg().set("max-tcp-per-client", "If set, maximum number of TCP sessions per client (IP address)")="0";
3418 ::arg().set("max-tcp-queries-per-connection", "If set, maximum number of TCP queries in a TCP connection")="0";
3419 ::arg().set("spoof-nearmiss-max", "If non-zero, assume spoofing after this many near misses")="20";
3420 ::arg().set("single-socket", "If set, only use a single socket for outgoing queries")="off";
3421 ::arg().set("auth-zones", "Zones for which we have authoritative data, comma separated domain=file pairs ")="";
3422 ::arg().set("lua-config-file", "More powerful configuration options")="";
3423
3424 ::arg().set("forward-zones", "Zones for which we forward queries, comma separated domain=ip pairs")="";
3425 ::arg().set("forward-zones-recurse", "Zones for which we forward queries with recursion bit, comma separated domain=ip pairs")="";
3426 ::arg().set("forward-zones-file", "File with (+)domain=ip pairs for forwarding")="";
3427 ::arg().set("export-etc-hosts", "If we should serve up contents from /etc/hosts")="off";
3428 ::arg().set("export-etc-hosts-search-suffix", "Also serve up the contents of /etc/hosts with this suffix")="";
3429 ::arg().set("etc-hosts-file", "Path to 'hosts' file")="/etc/hosts";
3430 ::arg().set("serve-rfc1918", "If we should be authoritative for RFC 1918 private IP space")="yes";
3431 ::arg().set("lua-dns-script", "Filename containing an optional 'lua' script that will be used to modify dns answers")="";
3432 ::arg().set("latency-statistic-size","Number of latency values to calculate the qa-latency average")="10000";
3433 ::arg().setSwitch( "disable-packetcache", "Disable packetcache" )= "no";
3434 ::arg().set("ecs-ipv4-bits", "Number of bits of IPv4 address to pass for EDNS Client Subnet")="24";
3435 ::arg().set("ecs-ipv6-bits", "Number of bits of IPv6 address to pass for EDNS Client Subnet")="56";
3436 ::arg().set("edns-subnet-whitelist", "List of netmasks and domains that we should enable EDNS subnet for")="";
3437 ::arg().set("ecs-scope-zero-address", "Address to send to whitelisted authoritative servers for incoming queries with ECS prefix-length source of 0")="";
3438 ::arg().setSwitch( "use-incoming-edns-subnet", "Pass along received EDNS Client Subnet information")="no";
3439 ::arg().setSwitch( "pdns-distributes-queries", "If PowerDNS itself should distribute queries over threads")="yes";
3440 ::arg().setSwitch( "root-nx-trust", "If set, believe that an NXDOMAIN from the root means the TLD does not exist")="yes";
3441 ::arg().setSwitch( "any-to-tcp","Answer ANY queries with tc=1, shunting to TCP" )="no";
3442 ::arg().setSwitch( "lowercase-outgoing","Force outgoing questions to lowercase")="no";
3443 ::arg().setSwitch("gettag-needs-edns-options", "If EDNS Options should be extracted before calling the gettag() hook")="no";
3444 ::arg().set("udp-truncation-threshold", "Maximum UDP response size before we truncate")="1680";
3445 ::arg().set("edns-outgoing-bufsize", "Outgoing EDNS buffer size")="1680";
3446 ::arg().set("minimum-ttl-override", "Set under adverse conditions, a minimum TTL")="0";
3447 ::arg().set("max-qperq", "Maximum outgoing queries per query")="50";
3448 ::arg().set("max-total-msec", "Maximum total wall-clock time per query in milliseconds, 0 for unlimited")="7000";
3449 ::arg().set("max-recursion-depth", "Maximum number of internal recursion calls per query, 0 for unlimited")="40";
3450
3451 ::arg().set("include-dir","Include *.conf files from this directory")="";
3452 ::arg().set("security-poll-suffix","Domain name from which to query security update notifications")="secpoll.powerdns.com.";
3453
3454 ::arg().setSwitch("reuseport","Enable SO_REUSEPORT allowing multiple recursors processes to listen to 1 address")="no";
3455
3456 ::arg().setSwitch("snmp-agent", "If set, register as an SNMP agent")="no";
3457 ::arg().set("snmp-master-socket", "If set and snmp-agent is set, the socket to use to register to the SNMP master")="";
3458
3459 ::arg().set("tcp-fast-open", "Enable TCP Fast Open support on the listening sockets, using the supplied numerical value as the queue size")="0";
3460 ::arg().set("nsec3-max-iterations", "Maximum number of iterations allowed for an NSEC3 record")="2500";
3461
3462 ::arg().set("cpu-map", "Thread to CPU mapping, space separated thread-id=cpu1,cpu2..cpuN pairs")="";
3463
3464 ::arg().setSwitch("log-rpz-changes", "Log additions and removals to RPZ zones at Info level")="no";
3465
3466 ::arg().setCmd("help","Provide a helpful message");
3467 ::arg().setCmd("version","Print version string");
3468 ::arg().setCmd("config","Output blank configuration");
3469 L.toConsole(Logger::Info);
3470 ::arg().laxParse(argc,argv); // do a lax parse
3471
3472 string configname=::arg()["config-dir"]+"/recursor.conf";
3473 if(::arg()["config-name"]!="") {
3474 configname=::arg()["config-dir"]+"/recursor-"+::arg()["config-name"]+".conf";
3475 s_programname+="-"+::arg()["config-name"];
3476 }
3477 cleanSlashes(configname);
3478
3479 if(::arg().mustDo("config")) {
3480 cout<<::arg().configstring()<<endl;
3481 exit(0);
3482 }
3483
3484 if(!::arg().file(configname.c_str()))
3485 L<<Logger::Warning<<"Unable to parse configuration file '"<<configname<<"'"<<endl;
3486
3487 ::arg().parse(argc,argv);
3488
3489 if( !::arg()["chroot"].empty() && !::arg()["api-config-dir"].empty() && !::arg().mustDo("api-readonly") ) {
3490 L<<Logger::Error<<"Using chroot and a writable API is not possible"<<endl;
3491 exit(EXIT_FAILURE);
3492 }
3493
3494 if (::arg()["socket-dir"].empty()) {
3495 if (::arg()["chroot"].empty())
3496 ::arg().set("socket-dir") = LOCALSTATEDIR;
3497 else
3498 ::arg().set("socket-dir") = "/";
3499 }
3500
3501 ::arg().set("delegation-only")=toLower(::arg()["delegation-only"]);
3502
3503 if(::arg().asNum("threads")==1)
3504 ::arg().set("pdns-distributes-queries")="no";
3505
3506 if(::arg().mustDo("help")) {
3507 cout<<"syntax:"<<endl<<endl;
3508 cout<<::arg().helpstring(::arg()["help"])<<endl;
3509 exit(0);
3510 }
3511 if(::arg().mustDo("version")) {
3512 showProductVersion();
3513 showBuildConfiguration();
3514 exit(0);
3515 }
3516
3517 Logger::Urgency logUrgency = (Logger::Urgency)::arg().asNum("loglevel");
3518
3519 if (logUrgency < Logger::Error)
3520 logUrgency = Logger::Error;
3521 if(!g_quiet && logUrgency < Logger::Info) { // Logger::Info=6, Logger::Debug=7
3522 logUrgency = Logger::Info; // if you do --quiet=no, you need Info to also see the query log
3523 }
3524 L.setLoglevel(logUrgency);
3525 L.toConsole(logUrgency);
3526
3527 serviceMain(argc, argv);
3528 }
3529 catch(PDNSException &ae) {
3530 L<<Logger::Error<<"Exception: "<<ae.reason<<endl;
3531 ret=EXIT_FAILURE;
3532 }
3533 catch(std::exception &e) {
3534 L<<Logger::Error<<"STL Exception: "<<e.what()<<endl;
3535 ret=EXIT_FAILURE;
3536 }
3537 catch(...) {
3538 L<<Logger::Error<<"any other exception in main: "<<endl;
3539 ret=EXIT_FAILURE;
3540 }
3541
3542 return ret;
3543 }