]> git.ipfire.org Git - thirdparty/pdns.git/blob - pdns/pdns_recursor.cc
spelling: ignore
[thirdparty/pdns.git] / pdns / pdns_recursor.cc
1 /*
2 PowerDNS Versatile Database Driven Nameserver
3 Copyright (C) 2003 - 2016 PowerDNS.COM BV
4
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License version 2
7 as published by the Free Software Foundation
8
9 Additionally, the license of this program contains a special
10 exception which allows to distribute the program in binary form when
11 it is linked against OpenSSL.
12
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
21 */
22
23 #ifdef HAVE_CONFIG_H
24 #include "config.h"
25 #endif
26
27 #include <netdb.h>
28 #include <sys/stat.h>
29 #include <unistd.h>
30
31 #include "ws-recursor.hh"
32 #include <pthread.h>
33 #include "recpacketcache.hh"
34 #include "utility.hh"
35 #include "dns_random.hh"
36 #include "opensslsigners.hh"
37 #include <iostream>
38 #include <errno.h>
39 #include <boost/static_assert.hpp>
40 #include <map>
41 #include <set>
42 #include "recursor_cache.hh"
43 #include "cachecleaner.hh"
44 #include <stdio.h>
45 #include <signal.h>
46 #include <stdlib.h>
47 #include "misc.hh"
48 #include "mtasker.hh"
49 #include <utility>
50 #include "arguments.hh"
51 #include "syncres.hh"
52 #include <fcntl.h>
53 #include <fstream>
54 #include "sortlist.hh"
55 extern SortList g_sortlist;
56 #include "sstuff.hh"
57 #include <boost/tuple/tuple.hpp>
58 #include <boost/tuple/tuple_comparison.hpp>
59 #include <boost/shared_array.hpp>
60 #include <boost/function.hpp>
61 #include <boost/algorithm/string.hpp>
62 #ifdef MALLOC_TRACE
63 #include "malloctrace.hh"
64 #endif
65 #include <netinet/tcp.h>
66 #include "dnsparser.hh"
67 #include "dnswriter.hh"
68 #include "dnsrecords.hh"
69 #include "zoneparser-tng.hh"
70 #include "rec_channel.hh"
71 #include "logger.hh"
72 #include "iputils.hh"
73 #include "mplexer.hh"
74 #include "config.h"
75 #include "lua-recursor4.hh"
76 #include "version.hh"
77 #include "responsestats.hh"
78 #include "secpoll-recursor.hh"
79 #include "dnsname.hh"
80 #include "filterpo.hh"
81 #include "rpzloader.hh"
82 #include "validate-recursor.hh"
83 #include "rec-lua-conf.hh"
84 #include "ednsoptions.hh"
85 #include "gettime.hh"
86
87 #include "rec-protobuf.hh"
88
89 #ifdef HAVE_SYSTEMD
90 #include <systemd/sd-daemon.h>
91 #endif
92
93 __thread FDMultiplexer* t_fdm;
94 static __thread unsigned int t_id;
95 unsigned int g_maxTCPPerClient;
96 size_t g_tcpMaxQueriesPerConn;
97 unsigned int g_networkTimeoutMsec;
98 uint64_t g_latencyStatSize;
99 bool g_logCommonErrors;
100 bool g_anyToTcp;
101 uint16_t g_udpTruncationThreshold, g_outgoingEDNSBufsize;
102 __thread shared_ptr<RecursorLua4>* t_pdl;
103 bool g_lowercaseOutgoing;
104
105 __thread addrringbuf_t* t_remotes, *t_servfailremotes, *t_largeanswerremotes;
106
107 __thread boost::circular_buffer<pair<DNSName, uint16_t> >* t_queryring, *t_servfailqueryring;
108 __thread shared_ptr<Regex>* t_traceRegex;
109
110 #ifdef HAVE_PROTOBUF
111 __thread boost::uuids::random_generator* t_uuidGenerator;
112 #endif
113
114
115 RecursorControlChannel s_rcc; // only active in thread 0
116
117 // for communicating with our threads
118 struct ThreadPipeSet
119 {
120 int writeToThread;
121 int readToThread;
122 int writeFromThread;
123 int readFromThread;
124 };
125
126 vector<ThreadPipeSet> g_pipes; // effectively readonly after startup
127
128 SyncRes::domainmap_t* g_initialDomainMap; // new threads needs this to be setup
129
130 #include "namespaces.hh"
131
132 __thread MemRecursorCache* t_RC;
133 __thread RecursorPacketCache* t_packetCache;
134 RecursorStats g_stats;
135 bool g_quiet;
136
137 bool g_weDistributeQueries; // if true, only 1 thread listens on the incoming query sockets
138
139 __thread NetmaskGroup* t_allowFrom;
140 static NetmaskGroup* g_initialAllowFrom; // new thread needs to be setup with this
141
142 NetmaskGroup* g_dontQuery;
143 string s_programname="pdns_recursor";
144
145 typedef vector<int> tcpListenSockets_t;
146 tcpListenSockets_t g_tcpListenSockets; // shared across threads, but this is fine, never written to from a thread. All threads listen on all sockets
147 int g_tcpTimeout;
148 unsigned int g_maxMThreads;
149 __thread struct timeval g_now; // timestamp, updated (too) frequently
150 typedef map<int, ComboAddress> listenSocketsAddresses_t; // is shared across all threads right now
151 listenSocketsAddresses_t g_listenSocketsAddresses; // is shared across all threads right now
152 set<int> g_fromtosockets; // listen sockets that use 'sendfromto()' mechanism
153
154 __thread MT_t* MT; // the big MTasker
155
156 unsigned int g_numThreads, g_numWorkerThreads;
157
158 #define LOCAL_NETS "127.0.0.0/8, 10.0.0.0/8, 100.64.0.0/10, 169.254.0.0/16, 192.168.0.0/16, 172.16.0.0/12, ::1/128, fc00::/7, fe80::/10"
159 // Bad Nets taken from both:
160 // http://www.iana.org/assignments/iana-ipv4-special-registry/iana-ipv4-special-registry.xhtml
161 // and
162 // http://www.iana.org/assignments/iana-ipv6-special-registry/iana-ipv6-special-registry.xhtml
163 // where such a network may not be considered a valid destination
164 #define BAD_NETS "0.0.0.0/8, 192.0.0.0/24, 192.0.2.0/24, 198.51.100.0/24, 203.0.113.0/24, 240.0.0.0/4, ::/96, ::ffff:0:0/96, 100::/64, 2001:db8::/32"
165 #define DONT_QUERY LOCAL_NETS ", " BAD_NETS
166
167 //! used to send information to a newborn mthread
168 struct DNSComboWriter {
169 DNSComboWriter(const char* data, uint16_t len, const struct timeval& now) : d_mdp(true, data, len), d_now(now),
170 d_tcp(false), d_socket(-1)
171 {}
172 MOADNSParser d_mdp;
173 void setRemote(const ComboAddress* sa)
174 {
175 d_remote=*sa;
176 }
177
178 void setLocal(const ComboAddress& sa)
179 {
180 d_local=sa;
181 }
182
183
184 void setSocket(int sock)
185 {
186 d_socket=sock;
187 }
188
189 string getRemote() const
190 {
191 return d_remote.toString();
192 }
193
194 struct timeval d_now;
195 ComboAddress d_remote, d_local;
196 #ifdef HAVE_PROTOBUF
197 boost::uuids::uuid d_uuid;
198 Netmask d_ednssubnet;
199 #endif
200 bool d_tcp;
201 int d_socket;
202 int d_tag{0};
203 string d_query;
204 shared_ptr<TCPConnection> d_tcpConnection;
205 vector<pair<uint16_t, string> > d_ednsOpts;
206 std::vector<std::string> d_policyTags;
207 };
208
209
210 ArgvMap &arg()
211 {
212 static ArgvMap theArg;
213 return theArg;
214 }
215
216 unsigned int getRecursorThreadId()
217 {
218 return t_id;
219 }
220
221 void handleTCPClientWritable(int fd, FDMultiplexer::funcparam_t& var);
222
223 // -1 is error, 0 is timeout, 1 is success
224 int asendtcp(const string& data, Socket* sock)
225 {
226 PacketID pident;
227 pident.sock=sock;
228 pident.outMSG=data;
229
230 t_fdm->addWriteFD(sock->getHandle(), handleTCPClientWritable, pident);
231 string packet;
232
233 int ret=MT->waitEvent(pident, &packet, g_networkTimeoutMsec);
234
235 if(!ret || ret==-1) { // timeout
236 t_fdm->removeWriteFD(sock->getHandle());
237 }
238 else if(packet.size() !=data.size()) { // main loop tells us what it sent out, or empty in case of an error
239 return -1;
240 }
241 return ret;
242 }
243
244 void handleTCPClientReadable(int fd, FDMultiplexer::funcparam_t& var);
245
246 // -1 is error, 0 is timeout, 1 is success
247 int arecvtcp(string& data, size_t len, Socket* sock, bool incompleteOkay)
248 {
249 data.clear();
250 PacketID pident;
251 pident.sock=sock;
252 pident.inNeeded=len;
253 pident.inIncompleteOkay=incompleteOkay;
254 t_fdm->addReadFD(sock->getHandle(), handleTCPClientReadable, pident);
255
256 int ret=MT->waitEvent(pident,&data, g_networkTimeoutMsec);
257 if(!ret || ret==-1) { // timeout
258 t_fdm->removeReadFD(sock->getHandle());
259 }
260 else if(data.empty()) {// error, EOF or other
261 return -1;
262 }
263
264 return ret;
265 }
266
267 void handleGenUDPQueryResponse(int fd, FDMultiplexer::funcparam_t& var)
268 {
269 PacketID pident=*any_cast<PacketID>(&var);
270 char resp[512];
271 ssize_t ret=recv(fd, resp, sizeof(resp), 0);
272 t_fdm->removeReadFD(fd);
273 if(ret >= 0) {
274 string data(resp, (size_t) ret);
275 MT->sendEvent(pident, &data);
276 }
277 else {
278 string empty;
279 MT->sendEvent(pident, &empty);
280 // cerr<<"Had some kind of error: "<<ret<<", "<<strerror(errno)<<endl;
281 }
282 }
283 string GenUDPQueryResponse(const ComboAddress& dest, const string& query)
284 {
285 Socket s(dest.sin4.sin_family, SOCK_DGRAM);
286 s.setNonBlocking();
287 ComboAddress local = getQueryLocalAddress(dest.sin4.sin_family, 0);
288
289 s.bind(local);
290 s.connect(dest);
291 s.send(query);
292
293 PacketID pident;
294 pident.sock=&s;
295 pident.type=0;
296 t_fdm->addReadFD(s.getHandle(), handleGenUDPQueryResponse, pident);
297
298 string data;
299
300 int ret=MT->waitEvent(pident,&data, g_networkTimeoutMsec);
301
302 if(!ret || ret==-1) { // timeout
303 t_fdm->removeReadFD(s.getHandle());
304 }
305 else if(data.empty()) {// error, EOF or other
306 // we could special case this
307 return data;
308 }
309 return data;
310 }
311
312
313 vector<ComboAddress> g_localQueryAddresses4, g_localQueryAddresses6;
314 const ComboAddress g_local4("0.0.0.0"), g_local6("::");
315
316 //! pick a random query local address
317 ComboAddress getQueryLocalAddress(int family, uint16_t port)
318 {
319 ComboAddress ret;
320 if(family==AF_INET) {
321 if(g_localQueryAddresses4.empty())
322 ret = g_local4;
323 else
324 ret = g_localQueryAddresses4[dns_random(g_localQueryAddresses4.size())];
325 ret.sin4.sin_port = htons(port);
326 }
327 else {
328 if(g_localQueryAddresses6.empty())
329 ret = g_local6;
330 else
331 ret = g_localQueryAddresses6[dns_random(g_localQueryAddresses6.size())];
332
333 ret.sin6.sin6_port = htons(port);
334 }
335 return ret;
336 }
337
338 void handleUDPServerResponse(int fd, FDMultiplexer::funcparam_t&);
339
340 void setSocketBuffer(int fd, int optname, uint32_t size)
341 {
342 uint32_t psize=0;
343 socklen_t len=sizeof(psize);
344
345 if(!getsockopt(fd, SOL_SOCKET, optname, (char*)&psize, &len) && psize > size) {
346 L<<Logger::Error<<"Not decreasing socket buffer size from "<<psize<<" to "<<size<<endl;
347 return;
348 }
349
350 if (setsockopt(fd, SOL_SOCKET, optname, (char*)&size, sizeof(size)) < 0 )
351 L<<Logger::Error<<"Unable to raise socket buffer size to "<<size<<": "<<strerror(errno)<<endl;
352 }
353
354
355 static void setSocketReceiveBuffer(int fd, uint32_t size)
356 {
357 setSocketBuffer(fd, SO_RCVBUF, size);
358 }
359
360 static void setSocketSendBuffer(int fd, uint32_t size)
361 {
362 setSocketBuffer(fd, SO_SNDBUF, size);
363 }
364
365
366 // you can ask this class for a UDP socket to send a query from
367 // this socket is not yours, don't even think about deleting it
368 // but after you call 'returnSocket' on it, don't assume anything anymore
369 class UDPClientSocks
370 {
371 unsigned int d_numsocks;
372 public:
373 UDPClientSocks() : d_numsocks(0)
374 {
375 }
376
377 typedef set<int> socks_t;
378 socks_t d_socks;
379
380 // returning -2 means: temporary OS error (ie, out of files), -1 means error related to remote
381 int getSocket(const ComboAddress& toaddr, int* fd)
382 {
383 *fd=makeClientSocket(toaddr.sin4.sin_family);
384 if(*fd < 0) // temporary error - receive exception otherwise
385 return -2;
386
387 if(connect(*fd, (struct sockaddr*)(&toaddr), toaddr.getSocklen()) < 0) {
388 int err = errno;
389 // returnSocket(*fd);
390 try {
391 closesocket(*fd);
392 }
393 catch(const PDNSException& e) {
394 L<<Logger::Error<<"Error closing UDP socket after connect() failed: "<<e.reason<<endl;
395 }
396
397 if(err==ENETUNREACH) // Seth "My Interfaces Are Like A Yo Yo" Arnold special
398 return -2;
399 return -1;
400 }
401
402 d_socks.insert(*fd);
403 d_numsocks++;
404 return 0;
405 }
406
407 void returnSocket(int fd)
408 {
409 socks_t::iterator i=d_socks.find(fd);
410 if(i==d_socks.end()) {
411 throw PDNSException("Trying to return a socket (fd="+std::to_string(fd)+") not in the pool");
412 }
413 returnSocketLocked(i);
414 }
415
416 // return a socket to the pool, or simply erase it
417 void returnSocketLocked(socks_t::iterator& i)
418 {
419 if(i==d_socks.end()) {
420 throw PDNSException("Trying to return a socket not in the pool");
421 }
422 try {
423 t_fdm->removeReadFD(*i);
424 }
425 catch(FDMultiplexerException& e) {
426 // we sometimes return a socket that has not yet been assigned to t_fdm
427 }
428 try {
429 closesocket(*i);
430 }
431 catch(const PDNSException& e) {
432 L<<Logger::Error<<"Error closing returned UDP socket: "<<e.reason<<endl;
433 }
434
435 d_socks.erase(i++);
436 --d_numsocks;
437 }
438
439 // returns -1 for errors which might go away, throws for ones that won't
440 static int makeClientSocket(int family)
441 {
442 int ret=socket(family, SOCK_DGRAM, 0 ); // turns out that setting CLO_EXEC and NONBLOCK from here is not a performance win on Linux (oddly enough)
443
444 if(ret < 0 && errno==EMFILE) // this is not a catastrophic error
445 return ret;
446
447 if(ret<0)
448 throw PDNSException("Making a socket for resolver (family = "+std::to_string(family)+"): "+stringerror());
449
450 // setCloseOnExec(ret); // we're not going to exec
451
452 int tries=10;
453 ComboAddress sin;
454 while(--tries) {
455 uint16_t port;
456
457 if(tries==1) // fall back to kernel 'random'
458 port = 0;
459 else
460 port = 1025 + dns_random(64510);
461
462 sin=getQueryLocalAddress(family, port); // does htons for us
463
464 if (::bind(ret, (struct sockaddr *)&sin, sin.getSocklen()) >= 0)
465 break;
466 }
467 if(!tries)
468 throw PDNSException("Resolver binding to local query client socket on "+sin.toString()+": "+stringerror());
469
470 setNonBlocking(ret);
471 return ret;
472 }
473 };
474
475 static __thread UDPClientSocks* t_udpclientsocks;
476
477 /* these two functions are used by LWRes */
478 // -2 is OS error, -1 is error that depends on the remote, > 0 is success
479 int asendto(const char *data, size_t len, int flags,
480 const ComboAddress& toaddr, uint16_t id, const DNSName& domain, uint16_t qtype, int* fd)
481 {
482
483 PacketID pident;
484 pident.domain = domain;
485 pident.remote = toaddr;
486 pident.type = qtype;
487
488 // see if there is an existing outstanding request we can chain on to, using partial equivalence function
489 pair<MT_t::waiters_t::iterator, MT_t::waiters_t::iterator> chain=MT->d_waiters.equal_range(pident, PacketIDBirthdayCompare());
490
491 for(; chain.first != chain.second; chain.first++) {
492 if(chain.first->key.fd > -1) { // don't chain onto existing chained waiter!
493 /*
494 cerr<<"Orig: "<<pident.domain<<", "<<pident.remote.toString()<<", id="<<id<<endl;
495 cerr<<"Had hit: "<< chain.first->key.domain<<", "<<chain.first->key.remote.toString()<<", id="<<chain.first->key.id
496 <<", count="<<chain.first->key.chain.size()<<", origfd: "<<chain.first->key.fd<<endl;
497 */
498 chain.first->key.chain.insert(id); // we can chain
499 *fd=-1; // gets used in waitEvent / sendEvent later on
500 return 1;
501 }
502 }
503
504 int ret=t_udpclientsocks->getSocket(toaddr, fd);
505 if(ret < 0)
506 return ret;
507
508 pident.fd=*fd;
509 pident.id=id;
510
511 t_fdm->addReadFD(*fd, handleUDPServerResponse, pident);
512 ret = send(*fd, data, len, 0);
513
514 int tmp = errno;
515
516 if(ret < 0)
517 t_udpclientsocks->returnSocket(*fd);
518
519 errno = tmp; // this is for logging purposes only
520 return ret;
521 }
522
523 // -1 is error, 0 is timeout, 1 is success
524 int arecvfrom(char *data, size_t len, int flags, const ComboAddress& fromaddr, size_t *d_len,
525 uint16_t id, const DNSName& domain, uint16_t qtype, int fd, struct timeval* now)
526 {
527 static optional<unsigned int> nearMissLimit;
528 if(!nearMissLimit)
529 nearMissLimit=::arg().asNum("spoof-nearmiss-max");
530
531 PacketID pident;
532 pident.fd=fd;
533 pident.id=id;
534 pident.domain=domain;
535 pident.type = qtype;
536 pident.remote=fromaddr;
537
538 string packet;
539 int ret=MT->waitEvent(pident, &packet, g_networkTimeoutMsec, now);
540
541 if(ret > 0) {
542 if(packet.empty()) // means "error"
543 return -1;
544
545 *d_len=packet.size();
546 memcpy(data,packet.c_str(),min(len,*d_len));
547 if(*nearMissLimit && pident.nearMisses > *nearMissLimit) {
548 L<<Logger::Error<<"Too many ("<<pident.nearMisses<<" > "<<*nearMissLimit<<") bogus answers for '"<<domain<<"' from "<<fromaddr.toString()<<", assuming spoof attempt."<<endl;
549 g_stats.spoofCount++;
550 return -1;
551 }
552 }
553 else {
554 if(fd >= 0)
555 t_udpclientsocks->returnSocket(fd);
556 }
557 return ret;
558 }
559
560
561 string s_pidfname;
562 static void writePid(void)
563 {
564 if(!::arg().mustDo("write-pid"))
565 return;
566 ofstream of(s_pidfname.c_str(), std::ios_base::app);
567 if(of)
568 of<< Utility::getpid() <<endl;
569 else
570 L<<Logger::Error<<"Writing pid for "<<Utility::getpid()<<" to "<<s_pidfname<<" failed: "<<strerror(errno)<<endl;
571 }
572
573 typedef map<ComboAddress, uint32_t, ComboAddress::addressOnlyLessThan> tcpClientCounts_t;
574 tcpClientCounts_t __thread* t_tcpClientCounts;
575
576 TCPConnection::TCPConnection(int fd, const ComboAddress& addr) : d_remote(addr), d_fd(fd)
577 {
578 ++s_currentConnections;
579 (*t_tcpClientCounts)[d_remote]++;
580 }
581
582 TCPConnection::~TCPConnection()
583 {
584 try {
585 if(closesocket(d_fd) < 0)
586 L<<Logger::Error<<"Error closing socket for TCPConnection"<<endl;
587 }
588 catch(const PDNSException& e) {
589 L<<Logger::Error<<"Error closing TCPConnection socket: "<<e.reason<<endl;
590 }
591
592 if(t_tcpClientCounts->count(d_remote) && !(*t_tcpClientCounts)[d_remote]--)
593 t_tcpClientCounts->erase(d_remote);
594 --s_currentConnections;
595 }
596
597 AtomicCounter TCPConnection::s_currentConnections;
598 void handleRunningTCPQuestion(int fd, FDMultiplexer::funcparam_t& var);
599
600 // the idea is, only do things that depend on the *response* here. Incoming accounting is on incoming.
601 void updateResponseStats(int res, const ComboAddress& remote, unsigned int packetsize, const DNSName* query, uint16_t qtype)
602 {
603 if(packetsize > 1000 && t_largeanswerremotes)
604 t_largeanswerremotes->push_back(remote);
605 switch(res) {
606 case RCode::ServFail:
607 if(t_servfailremotes) {
608 t_servfailremotes->push_back(remote);
609 if(query && t_servfailqueryring) // packet cache
610 t_servfailqueryring->push_back(make_pair(*query, qtype));
611 }
612 g_stats.servFails++;
613 break;
614 case RCode::NXDomain:
615 g_stats.nxDomains++;
616 break;
617 case RCode::NoError:
618 g_stats.noErrors++;
619 break;
620 }
621 }
622
623 static string makeLoginfo(DNSComboWriter* dc)
624 try
625 {
626 return "("+dc->d_mdp.d_qname.toLogString()+"/"+DNSRecordContent::NumberToType(dc->d_mdp.d_qtype)+" from "+(dc->d_remote.toString())+")";
627 }
628 catch(...)
629 {
630 return "Exception making error message for exception";
631 }
632
633 #ifdef HAVE_PROTOBUF
634 static void protobufLogQuery(const std::shared_ptr<RemoteLogger>& logger, uint8_t maskV4, uint8_t maskV6, const boost::uuids::uuid& uniqueId, const ComboAddress& remote, const ComboAddress& local, const Netmask& ednssubnet, bool tcp, uint16_t id, size_t len, const DNSName& qname, uint16_t qtype, uint16_t qclass, const std::vector<std::string>& policyTags)
635 {
636 Netmask requestorNM(remote, remote.sin4.sin_family == AF_INET ? maskV4 : maskV6);
637 const ComboAddress& requestor = requestorNM.getMaskedNetwork();
638 RecProtoBufMessage message(DNSProtoBufMessage::Query, uniqueId, &requestor, &local, qname, qtype, qclass, id, tcp, len);
639 message.setEDNSSubnet(ednssubnet, ednssubnet.isIpv4() ? maskV4 : maskV6);
640
641 if (!policyTags.empty()) {
642 message.setPolicyTags(policyTags);
643 }
644
645 // cerr <<message.toDebugString()<<endl;
646 std::string str;
647 message.serialize(str);
648 logger->queueData(str);
649 }
650
651 static void protobufLogResponse(const std::shared_ptr<RemoteLogger>& logger, const RecProtoBufMessage& message)
652 {
653 // cerr <<message.toDebugString()<<endl;
654 std::string str;
655 message.serialize(str);
656 logger->queueData(str);
657 }
658 #endif
659
660 /**
661 * Chases the CNAME provided by the PolicyCustom RPZ policy.
662 *
663 * @param spoofed: The DNSRecord that was created by the policy, should already be added to ret
664 * @param qtype: The QType of the original query
665 * @param sr: A SyncRes
666 * @param res: An integer that will contain the RCODE of the lookup we do
667 * @param ret: A vector of DNSRecords where the result of the CNAME chase should be appended to
668 */
669 void handleRPZCustom(const DNSRecord& spoofed, const QType& qtype, SyncRes& sr, int& res, vector<DNSRecord>& ret)
670 {
671 if (spoofed.d_type == QType::CNAME) {
672 bool oldWantsRPZ = sr.d_wantsRPZ;
673 sr.d_wantsRPZ = false;
674 vector<DNSRecord> ans;
675 res = sr.beginResolve(DNSName(spoofed.d_content->getZoneRepresentation()), qtype, 1, ans);
676 for (const auto& rec : ans) {
677 if(rec.d_place == DNSResourceRecord::ANSWER) {
678 ret.push_back(rec);
679 }
680 }
681 // Reset the RPZ state of the SyncRes
682 sr.d_wantsRPZ = oldWantsRPZ;
683 }
684 }
685
686 void startDoResolve(void *p)
687 {
688 DNSComboWriter* dc=(DNSComboWriter *)p;
689 try {
690 if (t_queryring)
691 t_queryring->push_back(make_pair(dc->d_mdp.d_qname, dc->d_mdp.d_qtype));
692
693 uint32_t maxanswersize= dc->d_tcp ? 65535 : min((uint16_t) 512, g_udpTruncationThreshold);
694 EDNSOpts edo;
695 bool haveEDNS=false;
696 if(getEDNSOpts(dc->d_mdp, &edo)) {
697 if(!dc->d_tcp)
698 maxanswersize = min(edo.d_packetsize, g_udpTruncationThreshold);
699 dc->d_ednsOpts = edo.d_options;
700 haveEDNS=true;
701 }
702 vector<DNSRecord> ret;
703 vector<uint8_t> packet;
704
705 auto luaconfsLocal = g_luaconfs.getLocal();
706 // Used to tell syncres later on if we should apply NSDNAME and NSIP RPZ triggers for this query
707 bool wantsRPZ(true);
708 RecProtoBufMessage pbMessage(RecProtoBufMessage::Response);
709 #ifdef HAVE_PROTOBUF
710 if (luaconfsLocal->protobufServer) {
711 Netmask requestorNM(dc->d_remote, dc->d_remote.sin4.sin_family == AF_INET ? luaconfsLocal->protobufMaskV4 : luaconfsLocal->protobufMaskV6);
712 const ComboAddress& requestor = requestorNM.getMaskedNetwork();
713 pbMessage.update(dc->d_uuid, &requestor, &dc->d_local, dc->d_tcp, dc->d_mdp.d_header.id);
714 pbMessage.setEDNSSubnet(dc->d_ednssubnet, dc->d_ednssubnet.isIpv4() ? luaconfsLocal->protobufMaskV4 : luaconfsLocal->protobufMaskV6);
715 pbMessage.setQuestion(dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_mdp.d_qclass);
716 }
717 #endif /* HAVE_PROTOBUF */
718
719 DNSPacketWriter pw(packet, dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_mdp.d_qclass);
720
721 pw.getHeader()->aa=0;
722 pw.getHeader()->ra=1;
723 pw.getHeader()->qr=1;
724 pw.getHeader()->tc=0;
725 pw.getHeader()->id=dc->d_mdp.d_header.id;
726 pw.getHeader()->rd=dc->d_mdp.d_header.rd;
727 pw.getHeader()->cd=dc->d_mdp.d_header.cd;
728
729 // DO NOT MOVE THIS CODE UP - DNSPacketWriter needs to get the original-cased version
730 if (g_lowercaseOutgoing)
731 dc->d_mdp.d_qname = DNSName(toLower(dc->d_mdp.d_qname.toString()));
732
733 uint32_t minTTL=std::numeric_limits<uint32_t>::max();
734
735 SyncRes sr(dc->d_now);
736 bool DNSSECOK=false;
737 if(t_pdl) {
738 sr.setLuaEngine(*t_pdl);
739 sr.d_requestor=dc->d_remote;
740 }
741
742 if(g_dnssecmode != DNSSECMode::Off) {
743 sr.d_doDNSSEC=true;
744
745 // Does the requestor want DNSSEC records?
746 if(edo.d_Z & EDNSOpts::DNSSECOK) {
747 DNSSECOK=true;
748 g_stats.dnssecQueries++;
749 }
750 } else {
751 // Ignore the client-set CD flag
752 pw.getHeader()->cd=0;
753 }
754 #ifdef HAVE_PROTOBUF
755 sr.d_initialRequestId = dc->d_uuid;
756 #endif
757
758 bool tracedQuery=false; // we could consider letting Lua know about this too
759 bool variableAnswer = false;
760 bool shouldNotValidate = false;
761
762 /* preresolve expects res (dq.rcode) to be set to RCode::NoError by default */
763 int res = RCode::NoError;
764 DNSFilterEngine::Policy appliedPolicy;
765 DNSRecord spoofed;
766 RecursorLua4::DNSQuestion dq(dc->d_remote, dc->d_local, dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_tcp, variableAnswer, wantsRPZ);
767 dq.ednsFlags = &edo.d_Z;
768 dq.ednsOptions = &dc->d_ednsOpts;
769 dq.tag = dc->d_tag;
770 dq.discardedPolicies = &sr.d_discardedPolicies;
771 dq.policyTags = &dc->d_policyTags;
772 dq.appliedPolicy = &appliedPolicy;
773 dq.currentRecords = &ret;
774 dq.dh = &dc->d_mdp.d_header;
775
776 if(dc->d_mdp.d_qtype==QType::ANY && !dc->d_tcp && g_anyToTcp) {
777 pw.getHeader()->tc = 1;
778 res = 0;
779 variableAnswer = true;
780 goto sendit;
781 }
782
783 if(t_traceRegex->get() && (*t_traceRegex)->match(dc->d_mdp.d_qname.toString())) {
784 sr.setLogMode(SyncRes::Store);
785 tracedQuery=true;
786 }
787
788
789 if(!g_quiet || tracedQuery) {
790 L<<Logger::Warning<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] " << (dc->d_tcp ? "TCP " : "") << "question for '"<<dc->d_mdp.d_qname<<"|"
791 <<DNSRecordContent::NumberToType(dc->d_mdp.d_qtype)<<"' from "<<dc->getRemote();
792 #ifdef HAVE_PROTOBUF
793 if(!dc->d_ednssubnet.empty()) {
794 L<<" (ecs "<<dc->d_ednssubnet.toString()<<")";
795 }
796 #endif
797 L<<endl;
798 }
799
800 sr.setId(MT->getTid());
801 if(!dc->d_mdp.d_header.rd)
802 sr.setCacheOnly();
803
804 if (t_pdl->get()) {
805 (*t_pdl)->prerpz(dq, res);
806 }
807
808 // Check if the query has a policy attached to it
809 if (wantsRPZ) {
810 appliedPolicy = luaconfsLocal->dfe.getQueryPolicy(dc->d_mdp.d_qname, dc->d_remote, sr.d_discardedPolicies);
811 }
812
813 // if there is a RecursorLua active, and it 'took' the query in preResolve, we don't launch beginResolve
814 if(!t_pdl->get() || !(*t_pdl)->preresolve(dq, res)) {
815
816 sr.d_wantsRPZ = wantsRPZ;
817 if(wantsRPZ) {
818 switch(appliedPolicy.d_kind) {
819 case DNSFilterEngine::PolicyKind::NoAction:
820 break;
821 case DNSFilterEngine::PolicyKind::Drop:
822 g_stats.policyDrops++;
823 g_stats.policyResults[appliedPolicy.d_kind]++;
824 delete dc;
825 dc=0;
826 return;
827 case DNSFilterEngine::PolicyKind::NXDOMAIN:
828 g_stats.policyResults[appliedPolicy.d_kind]++;
829 res=RCode::NXDomain;
830 goto haveAnswer;
831 case DNSFilterEngine::PolicyKind::NODATA:
832 g_stats.policyResults[appliedPolicy.d_kind]++;
833 res=RCode::NoError;
834 goto haveAnswer;
835 case DNSFilterEngine::PolicyKind::Custom:
836 g_stats.policyResults[appliedPolicy.d_kind]++;
837 res=RCode::NoError;
838 spoofed.d_name=dc->d_mdp.d_qname;
839 spoofed.d_type=appliedPolicy.d_custom->getType();
840 spoofed.d_ttl = appliedPolicy.d_ttl;
841 spoofed.d_class = 1;
842 spoofed.d_content = appliedPolicy.d_custom;
843 spoofed.d_place = DNSResourceRecord::ANSWER;
844 ret.push_back(spoofed);
845 handleRPZCustom(spoofed, QType(dc->d_mdp.d_qtype), sr, res, ret);
846 goto haveAnswer;
847 case DNSFilterEngine::PolicyKind::Truncate:
848 if(!dc->d_tcp) {
849 g_stats.policyResults[appliedPolicy.d_kind]++;
850 res=RCode::NoError;
851 pw.getHeader()->tc=1;
852 goto haveAnswer;
853 }
854 break;
855 }
856 }
857
858 // Query got not handled for QNAME Policy reasons, now actually go out to find an answer
859 try {
860 res = sr.beginResolve(dc->d_mdp.d_qname, QType(dc->d_mdp.d_qtype), dc->d_mdp.d_qclass, ret);
861 shouldNotValidate = sr.wasOutOfBand();
862 }
863 catch(ImmediateServFailException &e) {
864 if(g_logCommonErrors)
865 L<<Logger::Notice<<"Sending SERVFAIL to "<<dc->getRemote()<<" during resolve of '"<<dc->d_mdp.d_qname<<"' because: "<<e.reason<<endl;
866 res = RCode::ServFail;
867 }
868
869 // During lookup, an NSDNAME or NSIP trigger was hit in RPZ
870 if (res == -2) { // XXX This block should be macro'd, it is repeated post-resolve.
871 appliedPolicy = sr.d_appliedPolicy;
872 g_stats.policyResults[appliedPolicy.d_kind]++;
873 switch(appliedPolicy.d_kind) {
874 case DNSFilterEngine::PolicyKind::NoAction: // This can never happen
875 throw PDNSException("NoAction policy returned while a NSDNAME or NSIP trigger was hit");
876 case DNSFilterEngine::PolicyKind::Drop:
877 g_stats.policyDrops++;
878 delete dc;
879 dc=0;
880 return;
881 case DNSFilterEngine::PolicyKind::NXDOMAIN:
882 ret.clear();
883 res=RCode::NXDomain;
884 goto haveAnswer;
885
886 case DNSFilterEngine::PolicyKind::NODATA:
887 ret.clear();
888 res=RCode::NoError;
889 goto haveAnswer;
890
891 case DNSFilterEngine::PolicyKind::Truncate:
892 if(!dc->d_tcp) {
893 ret.clear();
894 res=RCode::NoError;
895 pw.getHeader()->tc=1;
896 goto haveAnswer;
897 }
898 break;
899
900 case DNSFilterEngine::PolicyKind::Custom:
901 ret.clear();
902 res=RCode::NoError;
903 spoofed.d_name=dc->d_mdp.d_qname;
904 spoofed.d_type=appliedPolicy.d_custom->getType();
905 spoofed.d_ttl = appliedPolicy.d_ttl;
906 spoofed.d_class = 1;
907 spoofed.d_content = appliedPolicy.d_custom;
908 spoofed.d_place = DNSResourceRecord::ANSWER;
909 ret.push_back(spoofed);
910 handleRPZCustom(spoofed, QType(dc->d_mdp.d_qtype), sr, res, ret);
911 goto haveAnswer;
912 }
913 }
914
915 if (wantsRPZ) {
916 appliedPolicy = luaconfsLocal->dfe.getPostPolicy(ret, sr.d_discardedPolicies);
917 }
918
919 if(t_pdl->get()) {
920 if(res == RCode::NoError) {
921 auto i=ret.cbegin();
922 for(; i!= ret.cend(); ++i)
923 if(i->d_type == dc->d_mdp.d_qtype && i->d_place == DNSResourceRecord::ANSWER)
924 break;
925 if(i == ret.cend() && (*t_pdl)->nodata(dq, res))
926 shouldNotValidate = true;
927
928 }
929 else if(res == RCode::NXDomain && (*t_pdl)->nxdomain(dq, res))
930 shouldNotValidate = true;
931
932 if((*t_pdl)->postresolve(dq, res))
933 shouldNotValidate = true;
934 }
935
936 if (wantsRPZ) { //XXX This block is repeated, see above
937 g_stats.policyResults[appliedPolicy.d_kind]++;
938 switch(appliedPolicy.d_kind) {
939 case DNSFilterEngine::PolicyKind::NoAction:
940 break;
941 case DNSFilterEngine::PolicyKind::Drop:
942 g_stats.policyDrops++;
943 delete dc;
944 dc=0;
945 return;
946 case DNSFilterEngine::PolicyKind::NXDOMAIN:
947 ret.clear();
948 res=RCode::NXDomain;
949 goto haveAnswer;
950
951 case DNSFilterEngine::PolicyKind::NODATA:
952 ret.clear();
953 res=RCode::NoError;
954 goto haveAnswer;
955
956 case DNSFilterEngine::PolicyKind::Truncate:
957 if(!dc->d_tcp) {
958 ret.clear();
959 res=RCode::NoError;
960 pw.getHeader()->tc=1;
961 goto haveAnswer;
962 }
963 break;
964
965 case DNSFilterEngine::PolicyKind::Custom:
966 ret.clear();
967 res=RCode::NoError;
968 spoofed.d_name=dc->d_mdp.d_qname;
969 spoofed.d_type=appliedPolicy.d_custom->getType();
970 spoofed.d_ttl = appliedPolicy.d_ttl;
971 spoofed.d_class = 1;
972 spoofed.d_content = appliedPolicy.d_custom;
973 spoofed.d_place = DNSResourceRecord::ANSWER;
974 ret.push_back(spoofed);
975 handleRPZCustom(spoofed, QType(dc->d_mdp.d_qtype), sr, res, ret);
976 goto haveAnswer;
977 }
978 }
979 }
980 haveAnswer:;
981 if(res == PolicyDecision::DROP) {
982 g_stats.policyDrops++;
983 delete dc;
984 dc=0;
985 return;
986 }
987 if(tracedQuery || res == PolicyDecision::PASS || res == RCode::ServFail || pw.getHeader()->rcode == RCode::ServFail)
988 {
989 string trace(sr.getTrace());
990 if(!trace.empty()) {
991 vector<string> lines;
992 boost::split(lines, trace, boost::is_any_of("\n"));
993 for(const string& line : lines) {
994 if(!line.empty())
995 L<<Logger::Warning<< line << endl;
996 }
997 }
998 }
999
1000 if(res == PolicyDecision::PASS) { // XXX what does this MEAN? Why servfail on PASS?
1001 pw.getHeader()->rcode=RCode::ServFail;
1002 // no commit here, because no record
1003 g_stats.servFails++;
1004 }
1005 else {
1006 pw.getHeader()->rcode=res;
1007
1008 // Does the validation mode or query demand validation?
1009 if(!shouldNotValidate && (g_dnssecmode == DNSSECMode::ValidateAll || g_dnssecmode==DNSSECMode::ValidateForLog || ((dc->d_mdp.d_header.ad || DNSSECOK) && g_dnssecmode==DNSSECMode::Process))) {
1010 try {
1011 if(sr.doLog()) {
1012 L<<Logger::Warning<<"Starting validation of answer to "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" for "<<dc->d_remote.toStringWithPort()<<endl;
1013 }
1014
1015 ResolveContext ctx;
1016 #ifdef HAVE_PROTOBUF
1017 ctx.d_initialRequestId = dc->d_uuid;
1018 #endif
1019 auto state=validateRecords(ctx, ret);
1020 if(state == Secure) {
1021 if(sr.doLog()) {
1022 L<<Logger::Warning<<"Answer to "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" for "<<dc->d_remote.toStringWithPort()<<" validates correctly"<<endl;
1023 }
1024
1025 // Is the query source interested in the value of the ad-bit?
1026 if (dc->d_mdp.d_header.ad || DNSSECOK)
1027 pw.getHeader()->ad=1;
1028 }
1029 else if(state == Insecure) {
1030 if(sr.doLog()) {
1031 L<<Logger::Warning<<"Answer to "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" for "<<dc->d_remote.toStringWithPort()<<" validates as Insecure"<<endl;
1032 }
1033
1034 pw.getHeader()->ad=0;
1035 }
1036 else if(state == Bogus) {
1037 if(g_dnssecLogBogus || sr.doLog() || g_dnssecmode == DNSSECMode::ValidateForLog) {
1038 L<<Logger::Warning<<"Answer to "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" for "<<dc->d_remote.toStringWithPort()<<" validates as Bogus"<<endl;
1039 }
1040
1041 // Does the query or validation mode sending out a SERVFAIL on validation errors?
1042 if(!pw.getHeader()->cd && (g_dnssecmode == DNSSECMode::ValidateAll || dc->d_mdp.d_header.ad || DNSSECOK)) {
1043 if(sr.doLog()) {
1044 L<<Logger::Warning<<"Sending out SERVFAIL for "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" because recursor or query demands it for Bogus results"<<endl;
1045 }
1046
1047 pw.getHeader()->rcode=RCode::ServFail;
1048 goto sendit;
1049 } else {
1050 if(sr.doLog()) {
1051 L<<Logger::Warning<<"Not sending out SERVFAIL for "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" Bogus validation since neither config nor query demands this"<<endl;
1052 }
1053 }
1054 }
1055 }
1056 catch(ImmediateServFailException &e) {
1057 if(g_logCommonErrors)
1058 L<<Logger::Notice<<"Sending SERVFAIL to "<<dc->getRemote()<<" during validation of '"<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<"' because: "<<e.reason<<endl;
1059 pw.getHeader()->rcode=RCode::ServFail;
1060 goto sendit;
1061 }
1062 }
1063
1064 if(ret.size()) {
1065 orderAndShuffle(ret);
1066 if(auto sl = luaconfsLocal->sortlist.getOrderCmp(dc->d_remote)) {
1067 sort(ret.begin(), ret.end(), *sl);
1068 variableAnswer=true;
1069 }
1070 }
1071 if(haveEDNS) {
1072 ret.push_back(makeOpt(edo.d_packetsize, 0, edo.d_Z));
1073 }
1074
1075 bool needCommit = false;
1076 for(auto i=ret.cbegin(); i!=ret.cend(); ++i) {
1077 if( ! DNSSECOK &&
1078 ( i->d_type == QType::NSEC3 ||
1079 (
1080 ( i->d_type == QType::RRSIG || i->d_type==QType::NSEC ) &&
1081 (
1082 ( dc->d_mdp.d_qtype != i->d_type && dc->d_mdp.d_qtype != QType::ANY ) ||
1083 i->d_place != DNSResourceRecord::ANSWER
1084 )
1085 )
1086 )
1087 ) {
1088 continue;
1089 }
1090
1091 pw.startRecord(i->d_name, i->d_type, i->d_ttl, i->d_class, i->d_place);
1092 if(i->d_type != QType::OPT) // their TTL ain't real
1093 minTTL = min(minTTL, i->d_ttl);
1094 i->d_content->toPacket(pw);
1095 if(pw.size() > maxanswersize) {
1096 pw.rollback();
1097 if(i->d_place==DNSResourceRecord::ANSWER) // only truncate if we actually omitted parts of the answer
1098 {
1099 pw.getHeader()->tc=1;
1100 pw.truncate();
1101 }
1102 goto sendit; // need to jump over pw.commit
1103 }
1104 needCommit = true;
1105 #ifdef HAVE_PROTOBUF
1106 if(luaconfsLocal->protobufServer && (i->d_type == QType::A || i->d_type == QType::AAAA || i->d_type == QType::CNAME)) {
1107 pbMessage.addRR(*i);
1108 }
1109 #endif
1110 }
1111 if(needCommit)
1112 pw.commit();
1113 }
1114 sendit:;
1115
1116 g_rs.submitResponse(dc->d_mdp.d_qtype, packet.size(), !dc->d_tcp);
1117 updateResponseStats(res, dc->d_remote, packet.size(), &dc->d_mdp.d_qname, dc->d_mdp.d_qtype);
1118 #ifdef HAVE_PROTOBUF
1119 if (luaconfsLocal->protobufServer && (!luaconfsLocal->protobufTaggedOnly || (appliedPolicy.d_name && !appliedPolicy.d_name->empty()) || !dc->d_policyTags.empty())) {
1120 pbMessage.setBytes(packet.size());
1121 pbMessage.setResponseCode(pw.getHeader()->rcode);
1122 if (appliedPolicy.d_name) {
1123 pbMessage.setAppliedPolicy(*appliedPolicy.d_name);
1124 }
1125 pbMessage.setPolicyTags(dc->d_policyTags);
1126 pbMessage.setQueryTime(dc->d_now.tv_sec, dc->d_now.tv_usec);
1127 protobufLogResponse(luaconfsLocal->protobufServer, pbMessage);
1128 }
1129 #endif
1130 if(!dc->d_tcp) {
1131 struct msghdr msgh;
1132 struct iovec iov;
1133 char cbuf[256];
1134 fillMSGHdr(&msgh, &iov, cbuf, 0, (char*)&*packet.begin(), packet.size(), &dc->d_remote);
1135 msgh.msg_control=NULL;
1136
1137 if(g_fromtosockets.count(dc->d_socket)) {
1138 addCMsgSrcAddr(&msgh, cbuf, &dc->d_local, 0);
1139 }
1140 if(sendmsg(dc->d_socket, &msgh, 0) < 0 && g_logCommonErrors)
1141 L<<Logger::Warning<<"Sending UDP reply to client "<<dc->d_remote.toStringWithPort()<<" failed with: "<<strerror(errno)<<endl;
1142 if(!SyncRes::s_nopacketcache && !variableAnswer && !sr.wasVariable() ) {
1143 t_packetCache->insertResponsePacket(dc->d_tag, dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_query,
1144 string((const char*)&*packet.begin(), packet.size()),
1145 g_now.tv_sec,
1146 pw.getHeader()->rcode == RCode::ServFail ? SyncRes::s_packetcacheservfailttl :
1147 min(minTTL,SyncRes::s_packetcachettl),
1148 &pbMessage);
1149 }
1150 // else cerr<<"Not putting in packet cache: "<<sr.wasVariable()<<endl;
1151 }
1152 else {
1153 char buf[2];
1154 buf[0]=packet.size()/256;
1155 buf[1]=packet.size()%256;
1156
1157 Utility::iovec iov[2];
1158
1159 iov[0].iov_base=(void*)buf; iov[0].iov_len=2;
1160 iov[1].iov_base=(void*)&*packet.begin(); iov[1].iov_len = packet.size();
1161
1162 int wret=Utility::writev(dc->d_socket, iov, 2);
1163 bool hadError=true;
1164
1165 if(wret == 0)
1166 L<<Logger::Error<<"EOF writing TCP answer to "<<dc->getRemote()<<endl;
1167 else if(wret < 0 )
1168 L<<Logger::Error<<"Error writing TCP answer to "<<dc->getRemote()<<": "<< strerror(errno) <<endl;
1169 else if((unsigned int)wret != 2 + packet.size())
1170 L<<Logger::Error<<"Oops, partial answer sent to "<<dc->getRemote()<<" for "<<dc->d_mdp.d_qname<<" (size="<< (2 + packet.size()) <<", sent "<<wret<<")"<<endl;
1171 else
1172 hadError=false;
1173
1174 // update tcp connection status, either by closing or moving to 'BYTE0'
1175
1176 if(hadError) {
1177 // no need to remove us from FDM, we weren't there
1178 dc->d_socket = -1;
1179 }
1180 else {
1181 dc->d_tcpConnection->queriesCount++;
1182 if (g_tcpMaxQueriesPerConn && dc->d_tcpConnection->queriesCount >= g_tcpMaxQueriesPerConn) {
1183 dc->d_socket = -1;
1184 }
1185 else {
1186 dc->d_tcpConnection->state=TCPConnection::BYTE0;
1187 Utility::gettimeofday(&g_now, 0); // needs to be updated
1188 t_fdm->addReadFD(dc->d_socket, handleRunningTCPQuestion, dc->d_tcpConnection);
1189 t_fdm->setReadTTD(dc->d_socket, g_now, g_tcpTimeout);
1190 }
1191 }
1192 }
1193
1194 if(!g_quiet) {
1195 L<<Logger::Error<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] answer to "<<(dc->d_mdp.d_header.rd?"":"non-rd ")<<"question '"<<dc->d_mdp.d_qname<<"|"<<DNSRecordContent::NumberToType(dc->d_mdp.d_qtype);
1196 L<<"': "<<ntohs(pw.getHeader()->ancount)<<" answers, "<<ntohs(pw.getHeader()->arcount)<<" additional, took "<<sr.d_outqueries<<" packets, "<<
1197 sr.d_totUsec/1000.0<<" ms, "<<
1198 sr.d_throttledqueries<<" throttled, "<<sr.d_timeouts<<" timeouts, "<<sr.d_tcpoutqueries<<" tcp connections, rcode="<<res<<endl;
1199 }
1200
1201 sr.d_outqueries ? t_RC->cacheMisses++ : t_RC->cacheHits++;
1202 float spent=makeFloat(sr.d_now-dc->d_now);
1203 if(spent < 0.001)
1204 g_stats.answers0_1++;
1205 else if(spent < 0.010)
1206 g_stats.answers1_10++;
1207 else if(spent < 0.1)
1208 g_stats.answers10_100++;
1209 else if(spent < 1.0)
1210 g_stats.answers100_1000++;
1211 else
1212 g_stats.answersSlow++;
1213
1214 uint64_t newLat=(uint64_t)(spent*1000000);
1215 newLat = min(newLat,(uint64_t)(((uint64_t) g_networkTimeoutMsec)*1000)); // outliers of several minutes exist..
1216 g_stats.avgLatencyUsec=(1-1.0/g_latencyStatSize)*g_stats.avgLatencyUsec + (float)newLat/g_latencyStatSize;
1217 // no worries, we do this for packet cache hits elsewhere
1218 // cout<<dc->d_mdp.d_qname<<"\t"<<MT->getUsec()<<"\t"<<sr.d_outqueries<<endl;
1219 delete dc;
1220 dc=0;
1221 }
1222 catch(PDNSException &ae) {
1223 L<<Logger::Error<<"startDoResolve problem "<<makeLoginfo(dc)<<": "<<ae.reason<<endl;
1224 delete dc;
1225 }
1226 catch(MOADNSException& e) {
1227 L<<Logger::Error<<"DNS parser error "<<makeLoginfo(dc) <<": "<<dc->d_mdp.d_qname<<", "<<e.what()<<endl;
1228 delete dc;
1229 }
1230 catch(std::exception& e) {
1231 L<<Logger::Error<<"STL error "<< makeLoginfo(dc)<<": "<<e.what();
1232
1233 // Luawrapper nests the exception from Lua, so we unnest it here
1234 try {
1235 std::rethrow_if_nested(e);
1236 } catch(const std::exception& e) {
1237 L<<". Extra info: "<<e.what();
1238 } catch(...) {}
1239
1240 L<<endl;
1241 delete dc;
1242 }
1243 catch(...) {
1244 L<<Logger::Error<<"Any other exception in a resolver context "<< makeLoginfo(dc) <<endl;
1245 }
1246
1247 g_stats.maxMThreadStackUsage = max(MT->getMaxStackUsage(), g_stats.maxMThreadStackUsage);
1248 }
1249
1250 void makeControlChannelSocket(int processNum=-1)
1251 {
1252 string sockname=::arg()["socket-dir"]+"/"+s_programname;
1253 if(processNum >= 0)
1254 sockname += "."+std::to_string(processNum);
1255 sockname+=".controlsocket";
1256 s_rcc.listen(sockname);
1257
1258 int sockowner = -1;
1259 int sockgroup = -1;
1260
1261 if (!::arg().isEmpty("socket-group"))
1262 sockgroup=::arg().asGid("socket-group");
1263 if (!::arg().isEmpty("socket-owner"))
1264 sockowner=::arg().asUid("socket-owner");
1265
1266 if (sockgroup > -1 || sockowner > -1) {
1267 if(chown(sockname.c_str(), sockowner, sockgroup) < 0) {
1268 unixDie("Failed to chown control socket");
1269 }
1270 }
1271
1272 // do mode change if socket-mode is given
1273 if(!::arg().isEmpty("socket-mode")) {
1274 mode_t sockmode=::arg().asMode("socket-mode");
1275 if(chmod(sockname.c_str(), sockmode) < 0) {
1276 unixDie("Failed to chmod control socket");
1277 }
1278 }
1279 }
1280
1281 static void getQNameAndSubnet(const std::string& question, DNSName* dnsname, uint16_t* qtype, uint16_t* qclass, Netmask* ednssubnet)
1282 {
1283 const struct dnsheader* dh = (struct dnsheader*)question.c_str();
1284 size_t questionLen = question.length();
1285 unsigned int consumed=0;
1286 *dnsname=DNSName(question.c_str(), questionLen, sizeof(dnsheader), false, qtype, qclass, &consumed);
1287
1288 size_t pos= sizeof(dnsheader)+consumed+4;
1289 /* at least OPT root label (1), type (2), class (2) and ttl (4) + OPT RR rdlen (2)
1290 = 11 */
1291 if(ntohs(dh->arcount) == 1 && questionLen > pos + 11) { // this code can extract one (1) EDNS Subnet option
1292 /* OPT root label (1) followed by type (2) */
1293 if(question.at(pos)==0 && question.at(pos+1)==0 && question.at(pos+2)==QType::OPT) {
1294 char* ecsStart = nullptr;
1295 size_t ecsLen = 0;
1296 int res = getEDNSOption((char*)question.c_str()+pos+9, questionLen - pos - 9, EDNSOptionCode::ECS, &ecsStart, &ecsLen);
1297 if (res == 0 && ecsLen > 4) {
1298 EDNSSubnetOpts eso;
1299 if(getEDNSSubnetOptsFromString(ecsStart + 4, ecsLen - 4, &eso)) {
1300 *ednssubnet=eso.source;
1301 }
1302 }
1303 }
1304 }
1305 }
1306
1307 void handleRunningTCPQuestion(int fd, FDMultiplexer::funcparam_t& var)
1308 {
1309 shared_ptr<TCPConnection> conn=any_cast<shared_ptr<TCPConnection> >(var);
1310
1311 if(conn->state==TCPConnection::BYTE0) {
1312 ssize_t bytes=recv(conn->getFD(), conn->data, 2, 0);
1313 if(bytes==1)
1314 conn->state=TCPConnection::BYTE1;
1315 if(bytes==2) {
1316 conn->qlen=(((unsigned char)conn->data[0]) << 8)+ (unsigned char)conn->data[1];
1317 conn->bytesread=0;
1318 conn->state=TCPConnection::GETQUESTION;
1319 }
1320 if(!bytes || bytes < 0) {
1321 t_fdm->removeReadFD(fd);
1322 return;
1323 }
1324 }
1325 else if(conn->state==TCPConnection::BYTE1) {
1326 ssize_t bytes=recv(conn->getFD(), conn->data+1, 1, 0);
1327 if(bytes==1) {
1328 conn->state=TCPConnection::GETQUESTION;
1329 conn->qlen=(((unsigned char)conn->data[0]) << 8)+ (unsigned char)conn->data[1];
1330 conn->bytesread=0;
1331 }
1332 if(!bytes || bytes < 0) {
1333 if(g_logCommonErrors)
1334 L<<Logger::Error<<"TCP client "<< conn->d_remote.toString() <<" disconnected after first byte"<<endl;
1335 t_fdm->removeReadFD(fd);
1336 return;
1337 }
1338 }
1339 else if(conn->state==TCPConnection::GETQUESTION) {
1340 ssize_t bytes=recv(conn->getFD(), conn->data + conn->bytesread, conn->qlen - conn->bytesread, 0);
1341 if(!bytes || bytes < 0 || bytes > std::numeric_limits<std::uint16_t>::max()) {
1342 L<<Logger::Error<<"TCP client "<< conn->d_remote.toString() <<" disconnected while reading question body"<<endl;
1343 t_fdm->removeReadFD(fd);
1344 return;
1345 }
1346 conn->bytesread+=(uint16_t)bytes;
1347 if(conn->bytesread==conn->qlen) {
1348 t_fdm->removeReadFD(fd); // should no longer awake ourselves when there is data to read
1349
1350 DNSComboWriter* dc=0;
1351 try {
1352 dc=new DNSComboWriter(conn->data, conn->qlen, g_now);
1353 }
1354 catch(MOADNSException &mde) {
1355 g_stats.clientParseError++;
1356 if(g_logCommonErrors)
1357 L<<Logger::Error<<"Unable to parse packet from TCP client "<< conn->d_remote.toString() <<endl;
1358 return;
1359 }
1360 dc->d_tcpConnection = conn; // carry the torch
1361 dc->setSocket(conn->getFD()); // this is the only time a copy is made of the actual fd
1362 dc->d_tcp=true;
1363 dc->setRemote(&conn->d_remote);
1364 ComboAddress dest;
1365 memset(&dest, 0, sizeof(dest));
1366 dest.sin4.sin_family = conn->d_remote.sin4.sin_family;
1367 socklen_t len = dest.getSocklen();
1368 getsockname(conn->getFD(), (sockaddr*)&dest, &len); // if this fails, we're ok with it
1369 dc->setLocal(dest);
1370 Netmask ednssubnet;
1371 DNSName qname;
1372 uint16_t qtype=0;
1373 uint16_t qclass=0;
1374 bool needECS = false;
1375 #ifdef HAVE_PROTOBUF
1376 auto luaconfsLocal = g_luaconfs.getLocal();
1377 if (luaconfsLocal->protobufServer) {
1378 needECS = true;
1379 }
1380 #endif
1381
1382 if(needECS || (t_pdl->get() && (*t_pdl)->d_gettag)) {
1383
1384 try {
1385 getQNameAndSubnet(std::string(conn->data, conn->qlen), &qname, &qtype, &qclass, &ednssubnet);
1386
1387 if(t_pdl->get() && (*t_pdl)->d_gettag) {
1388 try {
1389 dc->d_tag = (*t_pdl)->gettag(conn->d_remote, ednssubnet, dest, qname, qtype, &dc->d_policyTags);
1390 }
1391 catch(std::exception& e) {
1392 if(g_logCommonErrors)
1393 L<<Logger::Warning<<"Error parsing a query packet qname='"<<qname<<"' for tag determination, setting tag=0: "<<e.what()<<endl;
1394 }
1395 }
1396 }
1397 catch(std::exception& e)
1398 {
1399 if(g_logCommonErrors)
1400 L<<Logger::Warning<<"Error parsing a query packet for tag determination, setting tag=0: "<<e.what()<<endl;
1401 }
1402 }
1403 #ifdef HAVE_PROTOBUF
1404 if(luaconfsLocal->protobufServer || luaconfsLocal->outgoingProtobufServer) {
1405 dc->d_uuid = (*t_uuidGenerator)();
1406 }
1407
1408 if(luaconfsLocal->protobufServer) {
1409 try {
1410 const struct dnsheader* dh = (const struct dnsheader*) conn->data;
1411 dc->d_ednssubnet = ednssubnet;
1412
1413 if (!luaconfsLocal->protobufTaggedOnly) {
1414 protobufLogQuery(luaconfsLocal->protobufServer, luaconfsLocal->protobufMaskV4, luaconfsLocal->protobufMaskV6, dc->d_uuid, conn->d_remote, dest, ednssubnet, true, dh->id, conn->qlen, qname, qtype, qclass, dc->d_policyTags);
1415 }
1416 }
1417 catch(std::exception& e) {
1418 if(g_logCommonErrors)
1419 L<<Logger::Warning<<"Error parsing a TCP query packet for edns subnet: "<<e.what()<<endl;
1420 }
1421 }
1422 #endif
1423 if(dc->d_mdp.d_header.qr) {
1424 delete dc;
1425 g_stats.ignoredCount++;
1426 L<<Logger::Error<<"Ignoring answer from TCP client "<< conn->d_remote.toString() <<" on server socket!"<<endl;
1427 return;
1428 }
1429 if(dc->d_mdp.d_header.opcode) {
1430 delete dc;
1431 g_stats.ignoredCount++;
1432 L<<Logger::Error<<"Ignoring non-query opcode from TCP client "<< conn->d_remote.toString() <<" on server socket!"<<endl;
1433 return;
1434 }
1435 else {
1436 ++g_stats.qcounter;
1437 ++g_stats.tcpqcounter;
1438 MT->makeThread(startDoResolve, dc); // deletes dc, will set state to BYTE0 again
1439 return;
1440 }
1441 }
1442 }
1443 }
1444
1445 //! Handle new incoming TCP connection
1446 void handleNewTCPQuestion(int fd, FDMultiplexer::funcparam_t& )
1447 {
1448 ComboAddress addr;
1449 socklen_t addrlen=sizeof(addr);
1450 int newsock=accept(fd, (struct sockaddr*)&addr, &addrlen);
1451 if(newsock>=0) {
1452 if(MT->numProcesses() > g_maxMThreads) {
1453 g_stats.overCapacityDrops++;
1454 try {
1455 closesocket(newsock);
1456 }
1457 catch(const PDNSException& e) {
1458 L<<Logger::Error<<"Error closing TCP socket after an over capacity drop: "<<e.reason<<endl;
1459 }
1460 return;
1461 }
1462
1463 if(t_remotes)
1464 t_remotes->push_back(addr);
1465 if(t_allowFrom && !t_allowFrom->match(&addr)) {
1466 if(!g_quiet)
1467 L<<Logger::Error<<"["<<MT->getTid()<<"] dropping TCP query from "<<addr.toString()<<", address not matched by allow-from"<<endl;
1468
1469 g_stats.unauthorizedTCP++;
1470 try {
1471 closesocket(newsock);
1472 }
1473 catch(const PDNSException& e) {
1474 L<<Logger::Error<<"Error closing TCP socket after an ACL drop: "<<e.reason<<endl;
1475 }
1476 return;
1477 }
1478 if(g_maxTCPPerClient && t_tcpClientCounts->count(addr) && (*t_tcpClientCounts)[addr] >= g_maxTCPPerClient) {
1479 g_stats.tcpClientOverflow++;
1480 try {
1481 closesocket(newsock); // don't call TCPConnection::closeAndCleanup here - did not enter it in the counts yet!
1482 }
1483 catch(const PDNSException& e) {
1484 L<<Logger::Error<<"Error closing TCP socket after an overflow drop: "<<e.reason<<endl;
1485 }
1486 return;
1487 }
1488
1489 setNonBlocking(newsock);
1490 shared_ptr<TCPConnection> tc(new TCPConnection(newsock, addr));
1491 tc->state=TCPConnection::BYTE0;
1492
1493 t_fdm->addReadFD(tc->getFD(), handleRunningTCPQuestion, tc);
1494
1495 struct timeval now;
1496 Utility::gettimeofday(&now, 0);
1497 t_fdm->setReadTTD(tc->getFD(), now, g_tcpTimeout);
1498 }
1499 }
1500
1501 string* doProcessUDPQuestion(const std::string& question, const ComboAddress& fromaddr, const ComboAddress& destaddr, struct timeval tv, int fd)
1502 {
1503 gettimeofday(&g_now, 0);
1504 struct timeval diff = g_now - tv;
1505 double delta=(diff.tv_sec*1000 + diff.tv_usec/1000.0);
1506
1507 if(tv.tv_sec && delta > 1000.0) {
1508 g_stats.tooOldDrops++;
1509 return 0;
1510 }
1511
1512 ++g_stats.qcounter;
1513 if(fromaddr.sin4.sin_family==AF_INET6)
1514 g_stats.ipv6qcounter++;
1515
1516 string response;
1517 const struct dnsheader* dh = (struct dnsheader*)question.c_str();
1518 unsigned int ctag=0;
1519 bool needECS = false;
1520 std::vector<std::string> policyTags;
1521 #ifdef HAVE_PROTOBUF
1522 boost::uuids::uuid uniqueId;
1523 auto luaconfsLocal = g_luaconfs.getLocal();
1524 if (luaconfsLocal->protobufServer) {
1525 uniqueId = (*t_uuidGenerator)();
1526 needECS = true;
1527 } else if (luaconfsLocal->outgoingProtobufServer) {
1528 uniqueId = (*t_uuidGenerator)();
1529 }
1530 #endif
1531 Netmask ednssubnet;
1532 try {
1533 DNSName qname;
1534 uint16_t qtype=0;
1535 uint16_t qclass=0;
1536 uint32_t age;
1537 #ifdef MALLOC_TRACE
1538 /*
1539 static uint64_t last=0;
1540 if(!last)
1541 g_mtracer->clearAllocators();
1542 cout<<g_mtracer->getAllocs()-last<<" "<<g_mtracer->getNumOut()<<" -- BEGIN TRACE"<<endl;
1543 last=g_mtracer->getAllocs();
1544 cout<<g_mtracer->topAllocatorsString()<<endl;
1545 g_mtracer->clearAllocators();
1546 */
1547 #endif
1548
1549 if(needECS || (t_pdl->get() && (*t_pdl)->d_gettag)) {
1550 try {
1551 getQNameAndSubnet(question, &qname, &qtype, &qclass, &ednssubnet);
1552
1553 if(t_pdl->get() && (*t_pdl)->d_gettag) {
1554 try {
1555 ctag=(*t_pdl)->gettag(fromaddr, ednssubnet, destaddr, qname, qtype, &policyTags);
1556 }
1557 catch(std::exception& e) {
1558 if(g_logCommonErrors)
1559 L<<Logger::Warning<<"Error parsing a query packet qname='"<<qname<<"' for tag determination, setting tag=0: "<<e.what()<<endl;
1560 }
1561 }
1562 }
1563 catch(std::exception& e)
1564 {
1565 if(g_logCommonErrors)
1566 L<<Logger::Warning<<"Error parsing a query packet for tag determination, setting tag=0: "<<e.what()<<endl;
1567 }
1568 }
1569
1570 bool cacheHit = false;
1571 RecProtoBufMessage pbMessage(DNSProtoBufMessage::DNSProtoBufMessageType::Response);
1572 #ifdef HAVE_PROTOBUF
1573 if(luaconfsLocal->protobufServer) {
1574 if (!luaconfsLocal->protobufTaggedOnly || !policyTags.empty()) {
1575 protobufLogQuery(luaconfsLocal->protobufServer, luaconfsLocal->protobufMaskV4, luaconfsLocal->protobufMaskV6, uniqueId, fromaddr, destaddr, ednssubnet, false, dh->id, question.size(), qname, qtype, qclass, policyTags);
1576 }
1577 }
1578 #endif /* HAVE_PROTOBUF */
1579
1580 cacheHit = (!SyncRes::s_nopacketcache && t_packetCache->getResponsePacket(ctag, question, g_now.tv_sec, &response, &age, &pbMessage));
1581 if (cacheHit) {
1582 #ifdef HAVE_PROTOBUF
1583 if(luaconfsLocal->protobufServer && (!luaconfsLocal->protobufTaggedOnly || !pbMessage.getAppliedPolicy().empty() || !pbMessage.getPolicyTags().empty())) {
1584 Netmask requestorNM(fromaddr, fromaddr.sin4.sin_family == AF_INET ? luaconfsLocal->protobufMaskV4 : luaconfsLocal->protobufMaskV6);
1585 const ComboAddress& requestor = requestorNM.getMaskedNetwork();
1586 pbMessage.update(uniqueId, &requestor, &destaddr, false, dh->id);
1587 pbMessage.setEDNSSubnet(ednssubnet, ednssubnet.isIpv4() ? luaconfsLocal->protobufMaskV4 : luaconfsLocal->protobufMaskV6);
1588 pbMessage.setQueryTime(g_now.tv_sec, g_now.tv_usec);
1589 protobufLogResponse(luaconfsLocal->protobufServer, pbMessage);
1590 }
1591 #endif /* HAVE_PROTOBUF */
1592 if(!g_quiet)
1593 L<<Logger::Notice<<t_id<< " question answered from packet cache tag="<<ctag<<" from "<<fromaddr.toString()<<endl;
1594
1595 g_stats.packetCacheHits++;
1596 SyncRes::s_queries++;
1597 ageDNSPacket(response, age);
1598 struct msghdr msgh;
1599 struct iovec iov;
1600 char cbuf[256];
1601 fillMSGHdr(&msgh, &iov, cbuf, 0, (char*)response.c_str(), response.length(), const_cast<ComboAddress*>(&fromaddr));
1602 msgh.msg_control=NULL;
1603
1604 if(g_fromtosockets.count(fd)) {
1605 addCMsgSrcAddr(&msgh, cbuf, &destaddr, 0);
1606 }
1607 if(sendmsg(fd, &msgh, 0) < 0 && g_logCommonErrors)
1608 L<<Logger::Warning<<"Sending UDP reply to client "<<fromaddr.toStringWithPort()<<" failed with: "<<strerror(errno)<<endl;
1609
1610 if(response.length() >= sizeof(struct dnsheader)) {
1611 struct dnsheader tmpdh;
1612 memcpy(&tmpdh, response.c_str(), sizeof(tmpdh));
1613 updateResponseStats(tmpdh.rcode, fromaddr, response.length(), 0, 0);
1614 }
1615 g_stats.avgLatencyUsec=(1-1.0/g_latencyStatSize)*g_stats.avgLatencyUsec + 0.0; // we assume 0 usec
1616 return 0;
1617 }
1618 }
1619 catch(std::exception& e) {
1620 L<<Logger::Error<<"Error processing or aging answer packet: "<<e.what()<<endl;
1621 return 0;
1622 }
1623
1624 if(t_pdl->get()) {
1625 if((*t_pdl)->ipfilter(fromaddr, destaddr, *dh)) {
1626 if(!g_quiet)
1627 L<<Logger::Notice<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] DROPPED question from "<<fromaddr.toStringWithPort()<<" based on policy"<<endl;
1628 g_stats.policyDrops++;
1629 return 0;
1630 }
1631 }
1632
1633 if(MT->numProcesses() > g_maxMThreads) {
1634 if(!g_quiet)
1635 L<<Logger::Notice<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] DROPPED question from "<<fromaddr.toStringWithPort()<<", over capacity"<<endl;
1636
1637 g_stats.overCapacityDrops++;
1638 return 0;
1639 }
1640
1641 DNSComboWriter* dc = new DNSComboWriter(question.c_str(), question.size(), g_now);
1642 dc->setSocket(fd);
1643 dc->d_tag=ctag;
1644 dc->d_query = question;
1645 dc->setRemote(&fromaddr);
1646 dc->setLocal(destaddr);
1647 dc->d_tcp=false;
1648 dc->d_policyTags = policyTags;
1649 #ifdef HAVE_PROTOBUF
1650 if (luaconfsLocal->protobufServer || luaconfsLocal->outgoingProtobufServer) {
1651 dc->d_uuid = uniqueId;
1652 }
1653 dc->d_ednssubnet = ednssubnet;
1654 #endif
1655
1656 MT->makeThread(startDoResolve, (void*) dc); // deletes dc
1657 return 0;
1658 }
1659
1660
1661 void handleNewUDPQuestion(int fd, FDMultiplexer::funcparam_t& var)
1662 {
1663 ssize_t len;
1664 char data[1500];
1665 ComboAddress fromaddr;
1666 struct msghdr msgh;
1667 struct iovec iov;
1668 char cbuf[256];
1669
1670 fromaddr.sin6.sin6_family=AF_INET6; // this makes sure fromaddr is big enough
1671 fillMSGHdr(&msgh, &iov, cbuf, sizeof(cbuf), data, sizeof(data), &fromaddr);
1672
1673 for(;;)
1674 if((len=recvmsg(fd, &msgh, 0)) >= 0) {
1675 if(t_remotes)
1676 t_remotes->push_back(fromaddr);
1677
1678 if(t_allowFrom && !t_allowFrom->match(&fromaddr)) {
1679 if(!g_quiet)
1680 L<<Logger::Error<<"["<<MT->getTid()<<"] dropping UDP query from "<<fromaddr.toString()<<", address not matched by allow-from"<<endl;
1681
1682 g_stats.unauthorizedUDP++;
1683 return;
1684 }
1685 BOOST_STATIC_ASSERT(offsetof(sockaddr_in, sin_port) == offsetof(sockaddr_in6, sin6_port));
1686 if(!fromaddr.sin4.sin_port) { // also works for IPv6
1687 if(!g_quiet)
1688 L<<Logger::Error<<"["<<MT->getTid()<<"] dropping UDP query from "<<fromaddr.toStringWithPort()<<", can't deal with port 0"<<endl;
1689
1690 g_stats.clientParseError++; // not quite the best place to put it, but needs to go somewhere
1691 return;
1692 }
1693 try {
1694 dnsheader* dh=(dnsheader*)data;
1695
1696 if(dh->qr) {
1697 g_stats.ignoredCount++;
1698 if(g_logCommonErrors)
1699 L<<Logger::Error<<"Ignoring answer from "<<fromaddr.toString()<<" on server socket!"<<endl;
1700 }
1701 else if(dh->opcode) {
1702 g_stats.ignoredCount++;
1703 if(g_logCommonErrors)
1704 L<<Logger::Error<<"Ignoring non-query opcode "<<dh->opcode<<" from "<<fromaddr.toString()<<" on server socket!"<<endl;
1705 }
1706 else {
1707 string question(data, (size_t)len);
1708 struct timeval tv={0,0};
1709 HarvestTimestamp(&msgh, &tv);
1710 ComboAddress dest;
1711 memset(&dest, 0, sizeof(dest)); // this makes sure we ignore this address if not returned by recvmsg above
1712 auto loc = rplookup(g_listenSocketsAddresses, fd);
1713 if(HarvestDestinationAddress(&msgh, &dest)) {
1714 // but.. need to get port too
1715 if(loc)
1716 dest.sin4.sin_port = loc->sin4.sin_port;
1717 }
1718 else {
1719 if(loc) {
1720 dest = *loc;
1721 }
1722 else {
1723 dest.sin4.sin_family = fromaddr.sin4.sin_family;
1724 socklen_t slen = dest.getSocklen();
1725 getsockname(fd, (sockaddr*)&dest, &slen); // if this fails, we're ok with it
1726 }
1727 }
1728 if(g_weDistributeQueries)
1729 distributeAsyncFunction(question, boost::bind(doProcessUDPQuestion, question, fromaddr, dest, tv, fd));
1730 else
1731 doProcessUDPQuestion(question, fromaddr, dest, tv, fd);
1732 }
1733 }
1734 catch(MOADNSException& mde) {
1735 g_stats.clientParseError++;
1736 if(g_logCommonErrors)
1737 L<<Logger::Error<<"Unable to parse packet from remote UDP client "<<fromaddr.toString() <<": "<<mde.what()<<endl;
1738 }
1739 catch(std::runtime_error& e) {
1740 g_stats.clientParseError++;
1741 if(g_logCommonErrors)
1742 L<<Logger::Error<<"Unable to parse packet from remote UDP client "<<fromaddr.toString() <<": "<<e.what()<<endl;
1743 }
1744 }
1745 else {
1746 // cerr<<t_id<<" had error: "<<stringerror()<<endl;
1747 if(errno == EAGAIN)
1748 g_stats.noPacketError++;
1749 break;
1750 }
1751 }
1752
1753
1754 typedef vector<pair<int, function< void(int, any&) > > > deferredAdd_t;
1755 deferredAdd_t deferredAdd;
1756
1757 void makeTCPServerSockets()
1758 {
1759 int fd;
1760 vector<string>locals;
1761 stringtok(locals,::arg()["local-address"]," ,");
1762
1763 if(locals.empty())
1764 throw PDNSException("No local address specified");
1765
1766 for(vector<string>::const_iterator i=locals.begin();i!=locals.end();++i) {
1767 ServiceTuple st;
1768 st.port=::arg().asNum("local-port");
1769 parseService(*i, st);
1770
1771 ComboAddress sin;
1772
1773 memset((char *)&sin,0, sizeof(sin));
1774 sin.sin4.sin_family = AF_INET;
1775 if(!IpToU32(st.host, (uint32_t*)&sin.sin4.sin_addr.s_addr)) {
1776 sin.sin6.sin6_family = AF_INET6;
1777 if(makeIPv6sockaddr(st.host, &sin.sin6) < 0)
1778 throw PDNSException("Unable to resolve local address for TCP server on '"+ st.host +"'");
1779 }
1780
1781 fd=socket(sin.sin6.sin6_family, SOCK_STREAM, 0);
1782 if(fd<0)
1783 throw PDNSException("Making a TCP server socket for resolver: "+stringerror());
1784
1785 setCloseOnExec(fd);
1786
1787 int tmp=1;
1788 if(setsockopt(fd,SOL_SOCKET,SO_REUSEADDR,(char*)&tmp,sizeof tmp)<0) {
1789 L<<Logger::Error<<"Setsockopt failed for TCP listening socket"<<endl;
1790 exit(1);
1791 }
1792 if(sin.sin6.sin6_family == AF_INET6 && setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &tmp, sizeof(tmp)) < 0) {
1793 L<<Logger::Error<<"Failed to set IPv6 socket to IPv6 only, continuing anyhow: "<<strerror(errno)<<endl;
1794 }
1795
1796 #ifdef TCP_DEFER_ACCEPT
1797 if(setsockopt(fd, SOL_TCP,TCP_DEFER_ACCEPT,(char*)&tmp,sizeof tmp) >= 0) {
1798 if(i==locals.begin())
1799 L<<Logger::Error<<"Enabled TCP data-ready filter for (slight) DoS protection"<<endl;
1800 }
1801 #endif
1802
1803 if( ::arg().mustDo("non-local-bind") )
1804 Utility::setBindAny(AF_INET, fd);
1805
1806 #ifdef SO_REUSEPORT
1807 if(::arg().mustDo("reuseport")) {
1808 int one=1;
1809 if(setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one)) < 0)
1810 throw PDNSException("SO_REUSEPORT: "+stringerror());
1811 }
1812 #endif
1813
1814 sin.sin4.sin_port = htons(st.port);
1815 socklen_t socklen=sin.sin4.sin_family==AF_INET ? sizeof(sin.sin4) : sizeof(sin.sin6);
1816 if (::bind(fd, (struct sockaddr *)&sin, socklen )<0)
1817 throw PDNSException("Binding TCP server socket for "+ st.host +": "+stringerror());
1818
1819 setNonBlocking(fd);
1820 setSocketSendBuffer(fd, 65000);
1821 listen(fd, 128);
1822 deferredAdd.push_back(make_pair(fd, handleNewTCPQuestion));
1823 g_tcpListenSockets.push_back(fd);
1824 // we don't need to update g_listenSocketsAddresses since it doesn't work for TCP/IP:
1825 // - fd is not that which we know here, but returned from accept()
1826 if(sin.sin4.sin_family == AF_INET)
1827 L<<Logger::Error<<"Listening for TCP queries on "<< sin.toString() <<":"<<st.port<<endl;
1828 else
1829 L<<Logger::Error<<"Listening for TCP queries on ["<< sin.toString() <<"]:"<<st.port<<endl;
1830 }
1831 }
1832
1833 void makeUDPServerSockets()
1834 {
1835 int one=1;
1836 vector<string>locals;
1837 stringtok(locals,::arg()["local-address"]," ,");
1838
1839 if(locals.empty())
1840 throw PDNSException("No local address specified");
1841
1842 for(vector<string>::const_iterator i=locals.begin();i!=locals.end();++i) {
1843 ServiceTuple st;
1844 st.port=::arg().asNum("local-port");
1845 parseService(*i, st);
1846
1847 ComboAddress sin;
1848
1849 memset(&sin, 0, sizeof(sin));
1850 sin.sin4.sin_family = AF_INET;
1851 if(!IpToU32(st.host.c_str() , (uint32_t*)&sin.sin4.sin_addr.s_addr)) {
1852 sin.sin6.sin6_family = AF_INET6;
1853 if(makeIPv6sockaddr(st.host, &sin.sin6) < 0)
1854 throw PDNSException("Unable to resolve local address for UDP server on '"+ st.host +"'");
1855 }
1856
1857 int fd=socket(sin.sin4.sin_family, SOCK_DGRAM, 0);
1858 if(fd < 0) {
1859 throw PDNSException("Making a UDP server socket for resolver: "+netstringerror());
1860 }
1861 if (!setSocketTimestamps(fd))
1862 L<<Logger::Warning<<"Unable to enable timestamp reporting for socket"<<endl;
1863
1864 if(IsAnyAddress(sin)) {
1865 if(sin.sin4.sin_family == AF_INET)
1866 if(!setsockopt(fd, IPPROTO_IP, GEN_IP_PKTINFO, &one, sizeof(one))) // linux supports this, so why not - might fail on other systems
1867 g_fromtosockets.insert(fd);
1868 #ifdef IPV6_RECVPKTINFO
1869 if(sin.sin4.sin_family == AF_INET6)
1870 if(!setsockopt(fd, IPPROTO_IPV6, IPV6_RECVPKTINFO, &one, sizeof(one)))
1871 g_fromtosockets.insert(fd);
1872 #endif
1873 if(sin.sin6.sin6_family == AF_INET6 && setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &one, sizeof(one)) < 0) {
1874 L<<Logger::Error<<"Failed to set IPv6 socket to IPv6 only, continuing anyhow: "<<strerror(errno)<<endl;
1875 }
1876 }
1877 if( ::arg().mustDo("non-local-bind") )
1878 Utility::setBindAny(AF_INET6, fd);
1879
1880 setCloseOnExec(fd);
1881
1882 setSocketReceiveBuffer(fd, 250000);
1883 sin.sin4.sin_port = htons(st.port);
1884
1885
1886 #ifdef SO_REUSEPORT
1887 if(::arg().mustDo("reuseport")) {
1888 if(setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one)) < 0)
1889 throw PDNSException("SO_REUSEPORT: "+stringerror());
1890 }
1891 #endif
1892 socklen_t socklen=sin.getSocklen();
1893 if (::bind(fd, (struct sockaddr *)&sin, socklen)<0)
1894 throw PDNSException("Resolver binding to server socket on port "+ std::to_string(st.port) +" for "+ st.host+": "+stringerror());
1895
1896 setNonBlocking(fd);
1897
1898 deferredAdd.push_back(make_pair(fd, handleNewUDPQuestion));
1899 g_listenSocketsAddresses[fd]=sin; // this is written to only from the startup thread, not from the workers
1900 if(sin.sin4.sin_family == AF_INET)
1901 L<<Logger::Error<<"Listening for UDP queries on "<< sin.toString() <<":"<<st.port<<endl;
1902 else
1903 L<<Logger::Error<<"Listening for UDP queries on ["<< sin.toString() <<"]:"<<st.port<<endl;
1904 }
1905 }
1906
1907
1908 void daemonize(void)
1909 {
1910 if(fork())
1911 exit(0); // bye bye
1912
1913 setsid();
1914
1915 int i=open("/dev/null",O_RDWR); /* open stdin */
1916 if(i < 0)
1917 L<<Logger::Critical<<"Unable to open /dev/null: "<<stringerror()<<endl;
1918 else {
1919 dup2(i,0); /* stdin */
1920 dup2(i,1); /* stderr */
1921 dup2(i,2); /* stderr */
1922 close(i);
1923 }
1924 }
1925
1926 AtomicCounter counter;
1927 bool statsWanted;
1928
1929 void usr1Handler(int)
1930 {
1931 statsWanted=true;
1932 }
1933
1934 void usr2Handler(int)
1935 {
1936 g_quiet= !g_quiet;
1937 SyncRes::setDefaultLogMode(g_quiet ? SyncRes::LogNone : SyncRes::Log);
1938 ::arg().set("quiet")=g_quiet ? "" : "no";
1939 }
1940
1941 void doStats(void)
1942 {
1943 static time_t lastOutputTime;
1944 static uint64_t lastQueryCount;
1945
1946 uint64_t cacheHits = broadcastAccFunction<uint64_t>(pleaseGetCacheHits);
1947 uint64_t cacheMisses = broadcastAccFunction<uint64_t>(pleaseGetCacheMisses);
1948
1949 if(g_stats.qcounter && (cacheHits + cacheMisses) && SyncRes::s_queries && SyncRes::s_outqueries) {
1950 L<<Logger::Notice<<"stats: "<<g_stats.qcounter<<" questions, "<<
1951 broadcastAccFunction<uint64_t>(pleaseGetCacheSize)<< " cache entries, "<<
1952 broadcastAccFunction<uint64_t>(pleaseGetNegCacheSize)<<" negative entries, "<<
1953 (int)((cacheHits*100.0)/(cacheHits+cacheMisses))<<"% cache hits"<<endl;
1954
1955 L<<Logger::Notice<<"stats: throttle map: "
1956 << broadcastAccFunction<uint64_t>(pleaseGetThrottleSize) <<", ns speeds: "
1957 << broadcastAccFunction<uint64_t>(pleaseGetNsSpeedsSize)<<endl;
1958 L<<Logger::Notice<<"stats: outpacket/query ratio "<<(int)(SyncRes::s_outqueries*100.0/SyncRes::s_queries)<<"%";
1959 L<<Logger::Notice<<", "<<(int)(SyncRes::s_throttledqueries*100.0/(SyncRes::s_outqueries+SyncRes::s_throttledqueries))<<"% throttled, "
1960 <<SyncRes::s_nodelegated<<" no-delegation drops"<<endl;
1961 L<<Logger::Notice<<"stats: "<<SyncRes::s_tcpoutqueries<<" outgoing tcp connections, "<<
1962 broadcastAccFunction<uint64_t>(pleaseGetConcurrentQueries)<<" queries running, "<<SyncRes::s_outgoingtimeouts<<" outgoing timeouts"<<endl;
1963
1964 //L<<Logger::Notice<<"stats: "<<g_stats.ednsPingMatches<<" ping matches, "<<g_stats.ednsPingMismatches<<" mismatches, "<<
1965 //g_stats.noPingOutQueries<<" outqueries w/o ping, "<< g_stats.noEdnsOutQueries<<" w/o EDNS"<<endl;
1966
1967 L<<Logger::Notice<<"stats: " << broadcastAccFunction<uint64_t>(pleaseGetPacketCacheSize) <<
1968 " packet cache entries, "<<(int)(100.0*broadcastAccFunction<uint64_t>(pleaseGetPacketCacheHits)/SyncRes::s_queries) << "% packet cache hits"<<endl;
1969
1970 time_t now = time(0);
1971 if(lastOutputTime && lastQueryCount && now != lastOutputTime) {
1972 L<<Logger::Notice<<"stats: "<< (SyncRes::s_queries - lastQueryCount) / (now - lastOutputTime) <<" qps (average over "<< (now - lastOutputTime) << " seconds)"<<endl;
1973 }
1974 lastOutputTime = now;
1975 lastQueryCount = SyncRes::s_queries;
1976 }
1977 else if(statsWanted)
1978 L<<Logger::Notice<<"stats: no stats yet!"<<endl;
1979
1980 statsWanted=false;
1981 }
1982
1983 static void houseKeeping(void *)
1984 {
1985 static __thread time_t last_stat, last_rootupdate, last_prune, last_secpoll;
1986 static __thread int cleanCounter=0;
1987 static __thread bool s_running; // houseKeeping can get suspended in secpoll, and be restarted, which makes us do duplicate work
1988 try {
1989 if(s_running)
1990 return;
1991 s_running=true;
1992
1993 struct timeval now;
1994 Utility::gettimeofday(&now, 0);
1995
1996 if(now.tv_sec - last_prune > (time_t)(5 + t_id)) {
1997 DTime dt;
1998 dt.setTimeval(now);
1999 t_RC->doPrune(); // this function is local to a thread, so fine anyhow
2000 t_packetCache->doPruneTo(::arg().asNum("max-packetcache-entries") / g_numWorkerThreads);
2001
2002 pruneCollection(t_sstorage->negcache, ::arg().asNum("max-cache-entries") / (g_numWorkerThreads * 10), 200);
2003
2004 if(!((cleanCounter++)%40)) { // this is a full scan!
2005 time_t limit=now.tv_sec-300;
2006 for(SyncRes::nsspeeds_t::iterator i = t_sstorage->nsSpeeds.begin() ; i!= t_sstorage->nsSpeeds.end(); )
2007 if(i->second.stale(limit))
2008 t_sstorage->nsSpeeds.erase(i++);
2009 else
2010 ++i;
2011 }
2012 last_prune=time(0);
2013 }
2014
2015 if(now.tv_sec - last_rootupdate > 7200) {
2016 int res = getRootNS();
2017 if (!res)
2018 last_rootupdate=now.tv_sec;
2019 }
2020
2021 if(!t_id) {
2022 if(now.tv_sec - last_stat >= 1800) {
2023 doStats();
2024 last_stat=time(0);
2025 }
2026
2027 if(now.tv_sec - last_secpoll >= 3600) {
2028 try {
2029 doSecPoll(&last_secpoll);
2030 }
2031 catch(...) {}
2032 }
2033 }
2034 s_running=false;
2035 }
2036 catch(PDNSException& ae)
2037 {
2038 s_running=false;
2039 L<<Logger::Error<<"Fatal error in housekeeping thread: "<<ae.reason<<endl;
2040 throw;
2041 }
2042 }
2043
2044 void makeThreadPipes()
2045 {
2046 for(unsigned int n=0; n < g_numThreads; ++n) {
2047 struct ThreadPipeSet tps;
2048 int fd[2];
2049 if(pipe(fd) < 0)
2050 unixDie("Creating pipe for inter-thread communications");
2051
2052 tps.readToThread = fd[0];
2053 tps.writeToThread = fd[1];
2054
2055 if(pipe(fd) < 0)
2056 unixDie("Creating pipe for inter-thread communications");
2057 tps.readFromThread = fd[0];
2058 tps.writeFromThread = fd[1];
2059
2060 g_pipes.push_back(tps);
2061 }
2062 }
2063
2064 struct ThreadMSG
2065 {
2066 pipefunc_t func;
2067 bool wantAnswer;
2068 };
2069
2070 void broadcastFunction(const pipefunc_t& func, bool skipSelf)
2071 {
2072 unsigned int n = 0;
2073 for(ThreadPipeSet& tps : g_pipes)
2074 {
2075 if(n++ == t_id) {
2076 if(!skipSelf)
2077 func(); // don't write to ourselves!
2078 continue;
2079 }
2080
2081 ThreadMSG* tmsg = new ThreadMSG();
2082 tmsg->func = func;
2083 tmsg->wantAnswer = true;
2084 if(write(tps.writeToThread, &tmsg, sizeof(tmsg)) != sizeof(tmsg)) {
2085 delete tmsg;
2086 unixDie("write to thread pipe returned wrong size or error");
2087 }
2088
2089 string* resp;
2090 if(read(tps.readFromThread, &resp, sizeof(resp)) != sizeof(resp))
2091 unixDie("read from thread pipe returned wrong size or error");
2092
2093 if(resp) {
2094 // cerr <<"got response: " << *resp << endl;
2095 delete resp;
2096 }
2097 }
2098 }
2099
2100 static uint32_t g_disthashseed;
2101 void distributeAsyncFunction(const string& packet, const pipefunc_t& func)
2102 {
2103 unsigned int hash = hashQuestion(packet.c_str(), packet.length(), g_disthashseed);
2104 unsigned int target = 1 + (hash % (g_pipes.size()-1));
2105
2106 if(target == t_id) {
2107 func();
2108 return;
2109 }
2110 ThreadPipeSet& tps = g_pipes[target];
2111 ThreadMSG* tmsg = new ThreadMSG();
2112 tmsg->func = func;
2113 tmsg->wantAnswer = false;
2114
2115 if(write(tps.writeToThread, &tmsg, sizeof(tmsg)) != sizeof(tmsg)) {
2116 delete tmsg;
2117 unixDie("write to thread pipe returned wrong size or error");
2118 }
2119 }
2120
2121 void handlePipeRequest(int fd, FDMultiplexer::funcparam_t& var)
2122 {
2123 ThreadMSG* tmsg;
2124
2125 if(read(fd, &tmsg, sizeof(tmsg)) != sizeof(tmsg)) { // fd == readToThread
2126 unixDie("read from thread pipe returned wrong size or error");
2127 }
2128
2129 void *resp=0;
2130 try {
2131 resp = tmsg->func();
2132 }
2133 catch(std::exception& e) {
2134 if(g_logCommonErrors)
2135 L<<Logger::Error<<"PIPE function we executed created exception: "<<e.what()<<endl; // but what if they wanted an answer.. we send 0
2136 }
2137 catch(PDNSException& e) {
2138 if(g_logCommonErrors)
2139 L<<Logger::Error<<"PIPE function we executed created PDNS exception: "<<e.reason<<endl; // but what if they wanted an answer.. we send 0
2140 }
2141 if(tmsg->wantAnswer)
2142 if(write(g_pipes[t_id].writeFromThread, &resp, sizeof(resp)) != sizeof(resp))
2143 unixDie("write to thread pipe returned wrong size or error");
2144
2145 delete tmsg;
2146 }
2147
2148 template<class T> void *voider(const boost::function<T*()>& func)
2149 {
2150 return func();
2151 }
2152
2153 vector<ComboAddress>& operator+=(vector<ComboAddress>&a, const vector<ComboAddress>& b)
2154 {
2155 a.insert(a.end(), b.begin(), b.end());
2156 return a;
2157 }
2158
2159 vector<pair<string, uint16_t> >& operator+=(vector<pair<string, uint16_t> >&a, const vector<pair<string, uint16_t> >& b)
2160 {
2161 a.insert(a.end(), b.begin(), b.end());
2162 return a;
2163 }
2164
2165 vector<pair<DNSName, uint16_t> >& operator+=(vector<pair<DNSName, uint16_t> >&a, const vector<pair<DNSName, uint16_t> >& b)
2166 {
2167 a.insert(a.end(), b.begin(), b.end());
2168 return a;
2169 }
2170
2171
2172 template<class T> T broadcastAccFunction(const boost::function<T*()>& func, bool skipSelf)
2173 {
2174 unsigned int n = 0;
2175 T ret=T();
2176 for(ThreadPipeSet& tps : g_pipes)
2177 {
2178 if(n++ == t_id) {
2179 if(!skipSelf) {
2180 T* resp = (T*)func(); // don't write to ourselves!
2181 if(resp) {
2182 //~ cerr <<"got direct: " << *resp << endl;
2183 ret += *resp;
2184 delete resp;
2185 }
2186 }
2187 continue;
2188 }
2189
2190 ThreadMSG* tmsg = new ThreadMSG();
2191 tmsg->func = boost::bind(voider<T>, func);
2192 tmsg->wantAnswer = true;
2193
2194 if(write(tps.writeToThread, &tmsg, sizeof(tmsg)) != sizeof(tmsg)) {
2195 delete tmsg;
2196 unixDie("write to thread pipe returned wrong size or error");
2197 }
2198
2199 T* resp;
2200 if(read(tps.readFromThread, &resp, sizeof(resp)) != sizeof(resp))
2201 unixDie("read from thread pipe returned wrong size or error");
2202
2203 if(resp) {
2204 //~ cerr <<"got response: " << *resp << endl;
2205 ret += *resp;
2206 delete resp;
2207 }
2208 }
2209 return ret;
2210 }
2211
2212 template string broadcastAccFunction(const boost::function<string*()>& fun, bool skipSelf); // explicit instantiation
2213 template uint64_t broadcastAccFunction(const boost::function<uint64_t*()>& fun, bool skipSelf); // explicit instantiation
2214 template vector<ComboAddress> broadcastAccFunction(const boost::function<vector<ComboAddress> *()>& fun, bool skipSelf); // explicit instantiation
2215 template vector<pair<DNSName,uint16_t> > broadcastAccFunction(const boost::function<vector<pair<DNSName, uint16_t> > *()>& fun, bool skipSelf); // explicit instantiation
2216
2217 void handleRCC(int fd, FDMultiplexer::funcparam_t& var)
2218 {
2219 string remote;
2220 string msg=s_rcc.recv(&remote);
2221 RecursorControlParser rcp;
2222 RecursorControlParser::func_t* command;
2223
2224 string answer=rcp.getAnswer(msg, &command);
2225
2226 // If we are inside a chroot, we need to strip
2227 if (!arg()["chroot"].empty()) {
2228 size_t len = arg()["chroot"].length();
2229 remote = remote.substr(len);
2230 }
2231
2232 try {
2233 s_rcc.send(answer, &remote);
2234 command();
2235 }
2236 catch(std::exception& e) {
2237 L<<Logger::Error<<"Error dealing with control socket request: "<<e.what()<<endl;
2238 }
2239 catch(PDNSException& ae) {
2240 L<<Logger::Error<<"Error dealing with control socket request: "<<ae.reason<<endl;
2241 }
2242 }
2243
2244 void handleTCPClientReadable(int fd, FDMultiplexer::funcparam_t& var)
2245 {
2246 PacketID* pident=any_cast<PacketID>(&var);
2247 // cerr<<"handleTCPClientReadable called for fd "<<fd<<", pident->inNeeded: "<<pident->inNeeded<<", "<<pident->sock->getHandle()<<endl;
2248
2249 shared_array<char> buffer(new char[pident->inNeeded]);
2250
2251 ssize_t ret=recv(fd, buffer.get(), pident->inNeeded,0);
2252 if(ret > 0) {
2253 pident->inMSG.append(&buffer[0], &buffer[ret]);
2254 pident->inNeeded-=(size_t)ret;
2255 if(!pident->inNeeded || pident->inIncompleteOkay) {
2256 // cerr<<"Got entire load of "<<pident->inMSG.size()<<" bytes"<<endl;
2257 PacketID pid=*pident;
2258 string msg=pident->inMSG;
2259
2260 t_fdm->removeReadFD(fd);
2261 MT->sendEvent(pid, &msg);
2262 }
2263 else {
2264 // cerr<<"Still have "<<pident->inNeeded<<" left to go"<<endl;
2265 }
2266 }
2267 else {
2268 PacketID tmp=*pident;
2269 t_fdm->removeReadFD(fd); // pident might now be invalid (it isn't, but still)
2270 string empty;
2271 MT->sendEvent(tmp, &empty); // this conveys error status
2272 }
2273 }
2274
2275 void handleTCPClientWritable(int fd, FDMultiplexer::funcparam_t& var)
2276 {
2277 PacketID* pid=any_cast<PacketID>(&var);
2278 ssize_t ret=send(fd, pid->outMSG.c_str() + pid->outPos, pid->outMSG.size() - pid->outPos,0);
2279 if(ret > 0) {
2280 pid->outPos+=(ssize_t)ret;
2281 if(pid->outPos==pid->outMSG.size()) {
2282 PacketID tmp=*pid;
2283 t_fdm->removeWriteFD(fd);
2284 MT->sendEvent(tmp, &tmp.outMSG); // send back what we sent to convey everything is ok
2285 }
2286 }
2287 else { // error or EOF
2288 PacketID tmp(*pid);
2289 t_fdm->removeWriteFD(fd);
2290 string sent;
2291 MT->sendEvent(tmp, &sent); // we convey error status by sending empty string
2292 }
2293 }
2294
2295 // resend event to everybody chained onto it
2296 void doResends(MT_t::waiters_t::iterator& iter, PacketID resend, const string& content)
2297 {
2298 if(iter->key.chain.empty())
2299 return;
2300 // cerr<<"doResends called!\n";
2301 for(PacketID::chain_t::iterator i=iter->key.chain.begin(); i != iter->key.chain.end() ; ++i) {
2302 resend.fd=-1;
2303 resend.id=*i;
2304 // cerr<<"\tResending "<<content.size()<<" bytes for fd="<<resend.fd<<" and id="<<resend.id<<endl;
2305
2306 MT->sendEvent(resend, &content);
2307 g_stats.chainResends++;
2308 }
2309 }
2310
2311 void handleUDPServerResponse(int fd, FDMultiplexer::funcparam_t& var)
2312 {
2313 PacketID pid=any_cast<PacketID>(var);
2314 ssize_t len;
2315 char data[g_outgoingEDNSBufsize];
2316 ComboAddress fromaddr;
2317 socklen_t addrlen=sizeof(fromaddr);
2318
2319 len=recvfrom(fd, data, sizeof(data), 0, (sockaddr *)&fromaddr, &addrlen);
2320
2321 if(len < (ssize_t) sizeof(dnsheader)) {
2322 if(len < 0)
2323 ; // cerr<<"Error on fd "<<fd<<": "<<stringerror()<<"\n";
2324 else {
2325 g_stats.serverParseError++;
2326 if(g_logCommonErrors)
2327 L<<Logger::Error<<"Unable to parse packet from remote UDP server "<< fromaddr.toString() <<
2328 ": packet smaller than DNS header"<<endl;
2329 }
2330
2331 t_udpclientsocks->returnSocket(fd);
2332 string empty;
2333
2334 MT_t::waiters_t::iterator iter=MT->d_waiters.find(pid);
2335 if(iter != MT->d_waiters.end())
2336 doResends(iter, pid, empty);
2337
2338 MT->sendEvent(pid, &empty); // this denotes error (does lookup again.. at least L1 will be hot)
2339 return;
2340 }
2341
2342 dnsheader dh;
2343 memcpy(&dh, data, sizeof(dh));
2344
2345 PacketID pident;
2346 pident.remote=fromaddr;
2347 pident.id=dh.id;
2348 pident.fd=fd;
2349
2350 if(!dh.qr && g_logCommonErrors) {
2351 L<<Logger::Notice<<"Not taking data from question on outgoing socket from "<< fromaddr.toStringWithPort() <<endl;
2352 }
2353
2354 if(!dh.qdcount || // UPC, Nominum, very old BIND on FormErr, NSD
2355 !dh.qr) { // one weird server
2356 pident.domain.clear();
2357 pident.type = 0;
2358 }
2359 else {
2360 try {
2361 if(len > 12)
2362 pident.domain=DNSName(data, len, 12, false, &pident.type); // don't copy this from above - we need to do the actual read
2363 }
2364 catch(std::exception& e) {
2365 g_stats.serverParseError++; // won't be fed to lwres.cc, so we have to increment
2366 L<<Logger::Warning<<"Error in packet from remote nameserver "<< fromaddr.toStringWithPort() << ": "<<e.what() << endl;
2367 return;
2368 }
2369 }
2370 string packet;
2371 packet.assign(data, len);
2372
2373 MT_t::waiters_t::iterator iter=MT->d_waiters.find(pident);
2374 if(iter != MT->d_waiters.end()) {
2375 doResends(iter, pident, packet);
2376 }
2377
2378 retryWithName:
2379
2380 if(!MT->sendEvent(pident, &packet)) {
2381 // we do a full scan for outstanding queries on unexpected answers. not too bad since we only accept them on the right port number, which is hard enough to guess
2382 for(MT_t::waiters_t::iterator mthread=MT->d_waiters.begin(); mthread!=MT->d_waiters.end(); ++mthread) {
2383 if(pident.fd==mthread->key.fd && mthread->key.remote==pident.remote && mthread->key.type == pident.type &&
2384 pident.domain == mthread->key.domain) {
2385 mthread->key.nearMisses++;
2386 }
2387
2388 // be a bit paranoid here since we're weakening our matching
2389 if(pident.domain.empty() && !mthread->key.domain.empty() && !pident.type && mthread->key.type &&
2390 pident.id == mthread->key.id && mthread->key.remote == pident.remote) {
2391 // cerr<<"Empty response, rest matches though, sending to a waiter"<<endl;
2392 pident.domain = mthread->key.domain;
2393 pident.type = mthread->key.type;
2394 goto retryWithName; // note that this only passes on an error, lwres will still reject the packet
2395 }
2396 }
2397 g_stats.unexpectedCount++; // if we made it here, it really is an unexpected answer
2398 if(g_logCommonErrors) {
2399 L<<Logger::Warning<<"Discarding unexpected packet from "<<fromaddr.toStringWithPort()<<": "<< (pident.domain.empty() ? "<empty>" : pident.domain.toString())<<", "<<pident.type<<", "<<MT->d_waiters.size()<<" waiters"<<endl;
2400 }
2401 }
2402 else if(fd >= 0) {
2403 t_udpclientsocks->returnSocket(fd);
2404 }
2405 }
2406
2407 FDMultiplexer* getMultiplexer()
2408 {
2409 FDMultiplexer* ret;
2410 for(FDMultiplexer::FDMultiplexermap_t::const_iterator i = FDMultiplexer::getMultiplexerMap().begin();
2411 i != FDMultiplexer::getMultiplexerMap().end(); ++i) {
2412 try {
2413 ret=i->second();
2414 return ret;
2415 }
2416 catch(FDMultiplexerException &fe) {
2417 L<<Logger::Error<<"Non-fatal error initializing possible multiplexer ("<<fe.what()<<"), falling back"<<endl;
2418 }
2419 catch(...) {
2420 L<<Logger::Error<<"Non-fatal error initializing possible multiplexer"<<endl;
2421 }
2422 }
2423 L<<Logger::Error<<"No working multiplexer found!"<<endl;
2424 exit(1);
2425 }
2426
2427
2428 string* doReloadLuaScript()
2429 {
2430 string fname= ::arg()["lua-dns-script"];
2431 try {
2432 if(fname.empty()) {
2433 t_pdl->reset();
2434 L<<Logger::Error<<t_id<<" Unloaded current lua script"<<endl;
2435 return new string("unloaded\n");
2436 }
2437 else {
2438 *t_pdl = shared_ptr<RecursorLua4>(new RecursorLua4(fname));
2439 }
2440 }
2441 catch(std::exception& e) {
2442 L<<Logger::Error<<t_id<<" Retaining current script, error from '"<<fname<<"': "<< e.what() <<endl;
2443 return new string("retaining current script, error from '"+fname+"': "+e.what()+"\n");
2444 }
2445
2446 L<<Logger::Warning<<t_id<<" (Re)loaded lua script from '"<<fname<<"'"<<endl;
2447 return new string("(re)loaded '"+fname+"'\n");
2448 }
2449
2450 string doQueueReloadLuaScript(vector<string>::const_iterator begin, vector<string>::const_iterator end)
2451 {
2452 if(begin != end)
2453 ::arg().set("lua-dns-script") = *begin;
2454
2455 return broadcastAccFunction<string>(doReloadLuaScript);
2456 }
2457
2458 string* pleaseUseNewTraceRegex(const std::string& newRegex)
2459 try
2460 {
2461 if(newRegex.empty()) {
2462 t_traceRegex->reset();
2463 return new string("unset\n");
2464 }
2465 else {
2466 (*t_traceRegex) = shared_ptr<Regex>(new Regex(newRegex));
2467 return new string("ok\n");
2468 }
2469 }
2470 catch(PDNSException& ae)
2471 {
2472 return new string(ae.reason+"\n");
2473 }
2474
2475 string doTraceRegex(vector<string>::const_iterator begin, vector<string>::const_iterator end)
2476 {
2477 return broadcastAccFunction<string>(boost::bind(pleaseUseNewTraceRegex, begin!=end ? *begin : ""));
2478 }
2479
2480 static void checkLinuxIPv6Limits()
2481 {
2482 #ifdef __linux__
2483 string line;
2484 if(readFileIfThere("/proc/sys/net/ipv6/route/max_size", &line)) {
2485 int lim=std::stoi(line);
2486 if(lim < 16384) {
2487 L<<Logger::Error<<"If using IPv6, please raise sysctl net.ipv6.route.max_size, currently set to "<<lim<<" which is < 16384"<<endl;
2488 }
2489 }
2490 #endif
2491 }
2492 static void checkOrFixFDS()
2493 {
2494 unsigned int availFDs=getFilenumLimit();
2495 unsigned int wantFDs = g_maxMThreads * g_numWorkerThreads +25; // even healthier margin then before
2496
2497 if(wantFDs > availFDs) {
2498 unsigned int hardlimit= getFilenumLimit(true);
2499 if(hardlimit >= wantFDs) {
2500 setFilenumLimit(wantFDs);
2501 L<<Logger::Warning<<"Raised soft limit on number of filedescriptors to "<<wantFDs<<" to match max-mthreads and threads settings"<<endl;
2502 }
2503 else {
2504 int newval = (hardlimit - 25) / g_numWorkerThreads;
2505 L<<Logger::Warning<<"Insufficient number of filedescriptors available for max-mthreads*threads setting! ("<<hardlimit<<" < "<<wantFDs<<"), reducing max-mthreads to "<<newval<<endl;
2506 g_maxMThreads = newval;
2507 setFilenumLimit(hardlimit);
2508 }
2509 }
2510 }
2511
2512 void* recursorThread(void*);
2513
2514 void* pleaseSupplantACLs(NetmaskGroup *ng)
2515 {
2516 t_allowFrom = ng;
2517 return 0;
2518 }
2519
2520 int g_argc;
2521 char** g_argv;
2522
2523 void parseACLs()
2524 {
2525 static bool l_initialized;
2526
2527 if(l_initialized) { // only reload configuration file on second call
2528 string configname=::arg()["config-dir"]+"/recursor.conf";
2529 cleanSlashes(configname);
2530
2531 if(!::arg().preParseFile(configname.c_str(), "allow-from-file"))
2532 throw runtime_error("Unable to re-parse configuration file '"+configname+"'");
2533 ::arg().preParseFile(configname.c_str(), "allow-from", LOCAL_NETS);
2534 ::arg().preParseFile(configname.c_str(), "include-dir");
2535 ::arg().preParse(g_argc, g_argv, "include-dir");
2536
2537 // then process includes
2538 std::vector<std::string> extraConfigs;
2539 ::arg().gatherIncludes(extraConfigs);
2540
2541 for(const std::string& fn : extraConfigs) {
2542 if(!::arg().preParseFile(fn.c_str(), "allow-from-file", ::arg()["allow-from-file"]))
2543 throw runtime_error("Unable to re-parse configuration file include '"+fn+"'");
2544 if(!::arg().preParseFile(fn.c_str(), "allow-from", ::arg()["allow-from"]))
2545 throw runtime_error("Unable to re-parse configuration file include '"+fn+"'");
2546 }
2547
2548 ::arg().preParse(g_argc, g_argv, "allow-from-file");
2549 ::arg().preParse(g_argc, g_argv, "allow-from");
2550 }
2551
2552 NetmaskGroup* oldAllowFrom = t_allowFrom, *allowFrom=new NetmaskGroup;
2553
2554 if(!::arg()["allow-from-file"].empty()) {
2555 string line;
2556 ifstream ifs(::arg()["allow-from-file"].c_str());
2557 if(!ifs) {
2558 delete allowFrom;
2559 throw runtime_error("Could not open '"+::arg()["allow-from-file"]+"': "+stringerror());
2560 }
2561
2562 string::size_type pos;
2563 while(getline(ifs,line)) {
2564 pos=line.find('#');
2565 if(pos!=string::npos)
2566 line.resize(pos);
2567 trim(line);
2568 if(line.empty())
2569 continue;
2570
2571 allowFrom->addMask(line);
2572 }
2573 L<<Logger::Warning<<"Done parsing " << allowFrom->size() <<" allow-from ranges from file '"<<::arg()["allow-from-file"]<<"' - overriding 'allow-from' setting"<<endl;
2574 }
2575 else if(!::arg()["allow-from"].empty()) {
2576 vector<string> ips;
2577 stringtok(ips, ::arg()["allow-from"], ", ");
2578
2579 L<<Logger::Warning<<"Only allowing queries from: ";
2580 for(vector<string>::const_iterator i = ips.begin(); i!= ips.end(); ++i) {
2581 allowFrom->addMask(*i);
2582 if(i!=ips.begin())
2583 L<<Logger::Warning<<", ";
2584 L<<Logger::Warning<<*i;
2585 }
2586 L<<Logger::Warning<<endl;
2587 }
2588 else {
2589 if(::arg()["local-address"]!="127.0.0.1" && ::arg().asNum("local-port")==53)
2590 L<<Logger::Error<<"WARNING: Allowing queries from all IP addresses - this can be a security risk!"<<endl;
2591 delete allowFrom;
2592 allowFrom = 0;
2593 }
2594
2595 g_initialAllowFrom = allowFrom;
2596 broadcastFunction(boost::bind(pleaseSupplantACLs, allowFrom));
2597 delete oldAllowFrom;
2598
2599 l_initialized = true;
2600 }
2601
2602
2603 std::unordered_set<DNSName> g_delegationOnly;
2604 static void setupDelegationOnly()
2605 {
2606 vector<string> parts;
2607 stringtok(parts, ::arg()["delegation-only"], ", \t");
2608 for(const auto& p : parts) {
2609 g_delegationOnly.insert(DNSName(p));
2610 }
2611 }
2612
2613 int serviceMain(int argc, char*argv[])
2614 {
2615 L.setName(s_programname);
2616 L.setLoglevel((Logger::Urgency)(6)); // info and up
2617 L.disableSyslog(::arg().mustDo("disable-syslog"));
2618
2619 if(!::arg()["logging-facility"].empty()) {
2620 int val=logFacilityToLOG(::arg().asNum("logging-facility") );
2621 if(val >= 0)
2622 theL().setFacility(val);
2623 else
2624 L<<Logger::Error<<"Unknown logging facility "<<::arg().asNum("logging-facility") <<endl;
2625 }
2626
2627 showProductVersion();
2628 seedRandom(::arg()["entropy-source"]);
2629
2630 g_disthashseed=dns_random(0xffffffff);
2631
2632 checkLinuxIPv6Limits();
2633 try {
2634 vector<string> addrs;
2635 if(!::arg()["query-local-address6"].empty()) {
2636 SyncRes::s_doIPv6=true;
2637 L<<Logger::Warning<<"Enabling IPv6 transport for outgoing queries"<<endl;
2638
2639 stringtok(addrs, ::arg()["query-local-address6"], ", ;");
2640 for(const string& addr : addrs) {
2641 g_localQueryAddresses6.push_back(ComboAddress(addr));
2642 }
2643 }
2644 else {
2645 L<<Logger::Warning<<"NOT using IPv6 for outgoing queries - set 'query-local-address6=::' to enable"<<endl;
2646 }
2647 addrs.clear();
2648 stringtok(addrs, ::arg()["query-local-address"], ", ;");
2649 for(const string& addr : addrs) {
2650 g_localQueryAddresses4.push_back(ComboAddress(addr));
2651 }
2652 }
2653 catch(std::exception& e) {
2654 L<<Logger::Error<<"Assigning local query addresses: "<<e.what();
2655 exit(99);
2656 }
2657
2658 // keep this ABOVE loadRecursorLuaConfig!
2659 if(::arg()["dnssec"]=="off")
2660 g_dnssecmode=DNSSECMode::Off;
2661 else if(::arg()["dnssec"]=="process-no-validate")
2662 g_dnssecmode=DNSSECMode::ProcessNoValidate;
2663 else if(::arg()["dnssec"]=="process")
2664 g_dnssecmode=DNSSECMode::Process;
2665 else if(::arg()["dnssec"]=="validate")
2666 g_dnssecmode=DNSSECMode::ValidateAll;
2667 else if(::arg()["dnssec"]=="log-fail")
2668 g_dnssecmode=DNSSECMode::ValidateForLog;
2669 else {
2670 L<<Logger::Error<<"Unknown DNSSEC mode "<<::arg()["dnssec"]<<endl;
2671 exit(1);
2672 }
2673
2674 g_dnssecLogBogus = ::arg().mustDo("dnssec-log-bogus");
2675
2676 loadRecursorLuaConfig(::arg()["lua-config-file"], ::arg().mustDo("daemon"));
2677
2678 parseACLs();
2679 sortPublicSuffixList();
2680
2681 if(!::arg()["dont-query"].empty()) {
2682 g_dontQuery=new NetmaskGroup;
2683 vector<string> ips;
2684 stringtok(ips, ::arg()["dont-query"], ", ");
2685 ips.push_back("0.0.0.0");
2686 ips.push_back("::");
2687
2688 L<<Logger::Warning<<"Will not send queries to: ";
2689 for(vector<string>::const_iterator i = ips.begin(); i!= ips.end(); ++i) {
2690 g_dontQuery->addMask(*i);
2691 if(i!=ips.begin())
2692 L<<Logger::Warning<<", ";
2693 L<<Logger::Warning<<*i;
2694 }
2695 L<<Logger::Warning<<endl;
2696 }
2697
2698 g_quiet=::arg().mustDo("quiet");
2699
2700 g_weDistributeQueries = ::arg().mustDo("pdns-distributes-queries");
2701 if(g_weDistributeQueries) {
2702 L<<Logger::Warning<<"PowerDNS Recursor itself will distribute queries over threads"<<endl;
2703 }
2704
2705 setupDelegationOnly();
2706 g_outgoingEDNSBufsize=::arg().asNum("edns-outgoing-bufsize");
2707
2708 if(::arg()["trace"]=="fail") {
2709 SyncRes::setDefaultLogMode(SyncRes::Store);
2710 }
2711 else if(::arg().mustDo("trace")) {
2712 SyncRes::setDefaultLogMode(SyncRes::Log);
2713 ::arg().set("quiet")="no";
2714 g_quiet=false;
2715 g_dnssecLOG=true;
2716 }
2717
2718 SyncRes::s_minimumTTL = ::arg().asNum("minimum-ttl-override");
2719
2720 SyncRes::s_nopacketcache = ::arg().mustDo("disable-packetcache");
2721
2722 SyncRes::s_maxnegttl=::arg().asNum("max-negative-ttl");
2723 SyncRes::s_maxcachettl=::arg().asNum("max-cache-ttl");
2724 SyncRes::s_packetcachettl=::arg().asNum("packetcache-ttl");
2725 // Cap the packetcache-servfail-ttl to the packetcache-ttl
2726 uint32_t packetCacheServFailTTL = ::arg().asNum("packetcache-servfail-ttl");
2727 SyncRes::s_packetcacheservfailttl=(packetCacheServFailTTL > SyncRes::s_packetcachettl) ? SyncRes::s_packetcachettl : packetCacheServFailTTL;
2728 SyncRes::s_serverdownmaxfails=::arg().asNum("server-down-max-fails");
2729 SyncRes::s_serverdownthrottletime=::arg().asNum("server-down-throttle-time");
2730 SyncRes::s_serverID=::arg()["server-id"];
2731 SyncRes::s_maxqperq=::arg().asNum("max-qperq");
2732 SyncRes::s_maxtotusec=1000*::arg().asNum("max-total-msec");
2733 SyncRes::s_maxdepth=::arg().asNum("max-recursion-depth");
2734 SyncRes::s_rootNXTrust = ::arg().mustDo( "root-nx-trust");
2735 if(SyncRes::s_serverID.empty()) {
2736 char tmp[128];
2737 gethostname(tmp, sizeof(tmp)-1);
2738 SyncRes::s_serverID=tmp;
2739 }
2740
2741 g_networkTimeoutMsec = ::arg().asNum("network-timeout");
2742
2743 g_initialDomainMap = parseAuthAndForwards();
2744
2745 g_latencyStatSize=::arg().asNum("latency-statistic-size");
2746
2747 g_logCommonErrors=::arg().mustDo("log-common-errors");
2748
2749 g_anyToTcp = ::arg().mustDo("any-to-tcp");
2750 g_udpTruncationThreshold = ::arg().asNum("udp-truncation-threshold");
2751
2752 g_lowercaseOutgoing = ::arg().mustDo("lowercase-outgoing");
2753
2754 makeUDPServerSockets();
2755 makeTCPServerSockets();
2756
2757 parseEDNSSubnetWhitelist(::arg()["edns-subnet-whitelist"]);
2758
2759 int forks;
2760 for(forks = 0; forks < ::arg().asNum("processes") - 1; ++forks) {
2761 if(!fork()) // we are child
2762 break;
2763 }
2764
2765 if(::arg().mustDo("daemon")) {
2766 L<<Logger::Warning<<"Calling daemonize, going to background"<<endl;
2767 L.toConsole(Logger::Critical);
2768 daemonize();
2769 loadRecursorLuaConfig(::arg()["lua-config-file"], false);
2770 }
2771 signal(SIGUSR1,usr1Handler);
2772 signal(SIGUSR2,usr2Handler);
2773 signal(SIGPIPE,SIG_IGN);
2774 g_numThreads = ::arg().asNum("threads") + ::arg().mustDo("pdns-distributes-queries");
2775 g_numWorkerThreads = ::arg().asNum("threads");
2776 g_maxMThreads = ::arg().asNum("max-mthreads");
2777 checkOrFixFDS();
2778
2779 openssl_thread_setup();
2780 openssl_seed();
2781
2782 int newgid=0;
2783 if(!::arg()["setgid"].empty())
2784 newgid=Utility::makeGidNumeric(::arg()["setgid"]);
2785 int newuid=0;
2786 if(!::arg()["setuid"].empty())
2787 newuid=Utility::makeUidNumeric(::arg()["setuid"]);
2788
2789 Utility::dropGroupPrivs(newuid, newgid);
2790
2791 if (!::arg()["chroot"].empty()) {
2792 if (chroot(::arg()["chroot"].c_str())<0 || chdir("/") < 0) {
2793 L<<Logger::Error<<"Unable to chroot to '"+::arg()["chroot"]+"': "<<strerror (errno)<<", exiting"<<endl;
2794 exit(1);
2795 }
2796 else
2797 L<<Logger::Error<<"Chrooted to '"<<::arg()["chroot"]<<"'"<<endl;
2798 }
2799
2800 s_pidfname=::arg()["socket-dir"]+"/"+s_programname+".pid";
2801 if(!s_pidfname.empty())
2802 unlink(s_pidfname.c_str()); // remove possible old pid file
2803 writePid();
2804
2805 makeControlChannelSocket( ::arg().asNum("processes") > 1 ? forks : -1);
2806
2807 Utility::dropUserPrivs(newuid);
2808
2809 makeThreadPipes();
2810
2811 g_tcpTimeout=::arg().asNum("client-tcp-timeout");
2812 g_maxTCPPerClient=::arg().asNum("max-tcp-per-client");
2813 g_tcpMaxQueriesPerConn=::arg().asNum("max-tcp-queries-per-connection");
2814
2815 if(g_numThreads == 1) {
2816 L<<Logger::Warning<<"Operating unthreaded"<<endl;
2817 #ifdef HAVE_SYSTEMD
2818 sd_notify(0, "READY=1");
2819 #endif
2820 recursorThread(0);
2821 }
2822 else {
2823 pthread_t tid;
2824 L<<Logger::Warning<<"Launching "<< g_numThreads <<" threads"<<endl;
2825 for(unsigned int n=0; n < g_numThreads; ++n) {
2826 pthread_create(&tid, 0, recursorThread, (void*)(long)n);
2827 }
2828 void* res;
2829 #ifdef HAVE_SYSTEMD
2830 sd_notify(0, "READY=1");
2831 #endif
2832 pthread_join(tid, &res);
2833 }
2834 return 0;
2835 }
2836
2837 void* recursorThread(void* ptr)
2838 try
2839 {
2840 t_id=(int) (long) ptr;
2841 SyncRes tmp(g_now); // make sure it allocates tsstorage before we do anything, like primeHints or so..
2842 t_sstorage->domainmap = g_initialDomainMap;
2843 t_allowFrom = g_initialAllowFrom;
2844 t_udpclientsocks = new UDPClientSocks();
2845 t_tcpClientCounts = new tcpClientCounts_t();
2846 primeHints();
2847
2848 t_packetCache = new RecursorPacketCache();
2849
2850 #ifdef HAVE_PROTOBUF
2851 t_uuidGenerator = new boost::uuids::random_generator();
2852 #endif
2853 L<<Logger::Warning<<"Done priming cache with root hints"<<endl;
2854
2855 t_pdl = new shared_ptr<RecursorLua4>();
2856
2857 try {
2858 if(!::arg()["lua-dns-script"].empty()) {
2859 *t_pdl = shared_ptr<RecursorLua4>(new RecursorLua4(::arg()["lua-dns-script"]));
2860 L<<Logger::Warning<<"Loaded 'lua' script from '"<<::arg()["lua-dns-script"]<<"'"<<endl;
2861 }
2862 }
2863 catch(std::exception &e) {
2864 L<<Logger::Error<<"Failed to load 'lua' script from '"<<::arg()["lua-dns-script"]<<"': "<<e.what()<<endl;
2865 _exit(99);
2866 }
2867
2868 t_traceRegex = new shared_ptr<Regex>();
2869 unsigned int ringsize=::arg().asNum("stats-ringbuffer-entries") / g_numWorkerThreads;
2870 if(ringsize) {
2871 t_remotes = new addrringbuf_t();
2872 if(g_weDistributeQueries) // if so, only 1 thread does recvfrom
2873 t_remotes->set_capacity(::arg().asNum("stats-ringbuffer-entries"));
2874 else
2875 t_remotes->set_capacity(ringsize);
2876 t_servfailremotes = new addrringbuf_t();
2877 t_servfailremotes->set_capacity(ringsize);
2878 t_largeanswerremotes = new addrringbuf_t();
2879 t_largeanswerremotes->set_capacity(ringsize);
2880
2881 t_queryring = new boost::circular_buffer<pair<DNSName, uint16_t> >();
2882 t_queryring->set_capacity(ringsize);
2883 t_servfailqueryring = new boost::circular_buffer<pair<DNSName, uint16_t> >();
2884 t_servfailqueryring->set_capacity(ringsize);
2885 }
2886
2887 MT=new MTasker<PacketID,string>(::arg().asNum("stack-size"));
2888
2889 PacketID pident;
2890
2891 t_fdm=getMultiplexer();
2892 if(!t_id) {
2893 if(::arg().mustDo("webserver")) {
2894 L<<Logger::Warning << "Enabling web server" << endl;
2895 try {
2896 new RecursorWebServer(t_fdm);
2897 }
2898 catch(PDNSException &e) {
2899 L<<Logger::Error<<"Exception: "<<e.reason<<endl;
2900 exit(99);
2901 }
2902 }
2903 L<<Logger::Error<<"Enabled '"<< t_fdm->getName() << "' multiplexer"<<endl;
2904 }
2905
2906 t_fdm->addReadFD(g_pipes[t_id].readToThread, handlePipeRequest);
2907
2908 if(!g_weDistributeQueries || !t_id) // if we distribute queries, only t_id = 0 listens
2909 for(deferredAdd_t::const_iterator i=deferredAdd.begin(); i!=deferredAdd.end(); ++i)
2910 t_fdm->addReadFD(i->first, i->second);
2911
2912 if(!t_id) {
2913 t_fdm->addReadFD(s_rcc.d_fd, handleRCC); // control channel
2914 }
2915
2916 unsigned int maxTcpClients=::arg().asNum("max-tcp-clients");
2917
2918 bool listenOnTCP(true);
2919
2920 time_t last_carbon=0;
2921 time_t carbonInterval=::arg().asNum("carbon-interval");
2922 counter.store(0); // used to periodically execute certain tasks
2923 for(;;) {
2924 while(MT->schedule(&g_now)); // MTasker letting the mthreads do their thing
2925
2926 if(!(counter%500)) {
2927 MT->makeThread(houseKeeping, 0);
2928 }
2929
2930 if(!(counter%55)) {
2931 typedef vector<pair<int, FDMultiplexer::funcparam_t> > expired_t;
2932 expired_t expired=t_fdm->getTimeouts(g_now);
2933
2934 for(expired_t::iterator i=expired.begin() ; i != expired.end(); ++i) {
2935 shared_ptr<TCPConnection> conn=any_cast<shared_ptr<TCPConnection> >(i->second);
2936 if(g_logCommonErrors)
2937 L<<Logger::Warning<<"Timeout from remote TCP client "<< conn->d_remote.toString() <<endl;
2938 t_fdm->removeReadFD(i->first);
2939 }
2940 }
2941
2942 counter++;
2943
2944 if(!t_id && statsWanted) {
2945 doStats();
2946 }
2947
2948 Utility::gettimeofday(&g_now, 0);
2949
2950 if(!t_id && (g_now.tv_sec - last_carbon >= carbonInterval)) {
2951 MT->makeThread(doCarbonDump, 0);
2952 last_carbon = g_now.tv_sec;
2953 }
2954
2955 t_fdm->run(&g_now);
2956 // 'run' updates g_now for us
2957
2958 if(!g_weDistributeQueries || !t_id) { // if pdns distributes queries, only tid 0 should do this
2959 if(listenOnTCP) {
2960 if(TCPConnection::getCurrentConnections() > maxTcpClients) { // shutdown, too many connections
2961 for(tcpListenSockets_t::iterator i=g_tcpListenSockets.begin(); i != g_tcpListenSockets.end(); ++i)
2962 t_fdm->removeReadFD(*i);
2963 listenOnTCP=false;
2964 }
2965 }
2966 else {
2967 if(TCPConnection::getCurrentConnections() <= maxTcpClients) { // reenable
2968 for(tcpListenSockets_t::iterator i=g_tcpListenSockets.begin(); i != g_tcpListenSockets.end(); ++i)
2969 t_fdm->addReadFD(*i, handleNewTCPQuestion);
2970 listenOnTCP=true;
2971 }
2972 }
2973 }
2974 }
2975 }
2976 catch(PDNSException &ae) {
2977 L<<Logger::Error<<"Exception: "<<ae.reason<<endl;
2978 return 0;
2979 }
2980 catch(std::exception &e) {
2981 L<<Logger::Error<<"STL Exception: "<<e.what()<<endl;
2982 return 0;
2983 }
2984 catch(...) {
2985 L<<Logger::Error<<"any other exception in main: "<<endl;
2986 return 0;
2987 }
2988
2989
2990 int main(int argc, char **argv)
2991 {
2992 g_argc = argc;
2993 g_argv = argv;
2994 g_stats.startupTime=time(0);
2995 versionSetProduct(ProductRecursor);
2996 reportBasicTypes();
2997 reportOtherTypes();
2998
2999 int ret = EXIT_SUCCESS;
3000
3001 try {
3002 ::arg().set("stack-size","stack size per mthread")="200000";
3003 ::arg().set("soa-minimum-ttl","Don't change")="0";
3004 ::arg().set("no-shuffle","Don't change")="off";
3005 ::arg().set("local-port","port to listen on")="53";
3006 ::arg().set("local-address","IP addresses to listen on, separated by spaces or commas. Also accepts ports.")="127.0.0.1";
3007 ::arg().setSwitch("non-local-bind", "Enable binding to non-local addresses by using FREEBIND / BINDANY socket options")="no";
3008 ::arg().set("trace","if we should output heaps of logging. set to 'fail' to only log failing domains")="off";
3009 ::arg().set("dnssec", "DNSSEC mode: off/process-no-validate (default)/process/log-fail/validate")="process-no-validate";
3010 ::arg().set("dnssec-log-bogus", "Log DNSSEC bogus validations")="no";
3011 ::arg().set("daemon","Operate as a daemon")="no";
3012 ::arg().setSwitch("write-pid","Write a PID file")="yes";
3013 ::arg().set("loglevel","Amount of logging. Higher is more. Do not set below 3")="4";
3014 ::arg().set("disable-syslog","Disable logging to syslog, useful when running inside a supervisor that logs stdout")="no";
3015 ::arg().set("log-common-errors","If we should log rather common errors")="no";
3016 ::arg().set("chroot","switch to chroot jail")="";
3017 ::arg().set("setgid","If set, change group id to this gid for more security")="";
3018 ::arg().set("setuid","If set, change user id to this uid for more security")="";
3019 ::arg().set("network-timeout", "Wait this nummer of milliseconds for network i/o")="1500";
3020 ::arg().set("threads", "Launch this number of threads")="2";
3021 ::arg().set("processes", "Launch this number of processes (EXPERIMENTAL, DO NOT CHANGE)")="1"; // if we un-experimental this, need to fix openssl rand seeding for multiple PIDs!
3022 ::arg().set("config-name","Name of this virtual configuration - will rename the binary image")="";
3023 ::arg().set("api-config-dir", "Directory where REST API stores config and zones") = "";
3024 ::arg().set("api-key", "Static pre-shared authentication key for access to the REST API") = "";
3025 ::arg().set("api-logfile", "Location of the server logfile (used by the REST API)") = "/var/log/pdns.log";
3026 ::arg().set("api-readonly", "Disallow data modification through the REST API when set") = "no";
3027 ::arg().setSwitch("webserver", "Start a webserver (for REST API)") = "no";
3028 ::arg().set("webserver-address", "IP Address of webserver to listen on") = "127.0.0.1";
3029 ::arg().set("webserver-port", "Port of webserver to listen on") = "8082";
3030 ::arg().set("webserver-password", "Password required for accessing the webserver") = "";
3031 ::arg().set("webserver-allow-from","Webserver access is only allowed from these subnets")="0.0.0.0/0,::/0";
3032 ::arg().set("carbon-ourname", "If set, overrides our reported hostname for carbon stats")="";
3033 ::arg().set("carbon-server", "If set, send metrics in carbon (graphite) format to this server IP address")="";
3034 ::arg().set("carbon-interval", "Number of seconds between carbon (graphite) updates")="30";
3035 ::arg().set("quiet","Suppress logging of questions and answers")="";
3036 ::arg().set("logging-facility","Facility to log messages as. 0 corresponds to local0")="";
3037 ::arg().set("config-dir","Location of configuration directory (recursor.conf)")=SYSCONFDIR;
3038 ::arg().set("socket-owner","Owner of socket")="";
3039 ::arg().set("socket-group","Group of socket")="";
3040 ::arg().set("socket-mode", "Permissions for socket")="";
3041
3042 ::arg().set("socket-dir",string("Where the controlsocket will live, ")+LOCALSTATEDIR+" when unset and not chrooted" )="";
3043 ::arg().set("delegation-only","Which domains we only accept delegations from")="";
3044 ::arg().set("query-local-address","Source IP address for sending queries")="0.0.0.0";
3045 ::arg().set("query-local-address6","Source IPv6 address for sending queries. IF UNSET, IPv6 WILL NOT BE USED FOR OUTGOING QUERIES")="";
3046 ::arg().set("client-tcp-timeout","Timeout in seconds when talking to TCP clients")="2";
3047 ::arg().set("max-mthreads", "Maximum number of simultaneous Mtasker threads")="2048";
3048 ::arg().set("max-tcp-clients","Maximum number of simultaneous TCP clients")="128";
3049 ::arg().set("server-down-max-fails","Maximum number of consecutive timeouts (and unreachables) to mark a server as down ( 0 => disabled )")="64";
3050 ::arg().set("server-down-throttle-time","Number of seconds to throttle all queries to a server after being marked as down")="60";
3051 ::arg().set("hint-file", "If set, load root hints from this file")="";
3052 ::arg().set("max-cache-entries", "If set, maximum number of entries in the main cache")="1000000";
3053 ::arg().set("max-negative-ttl", "maximum number of seconds to keep a negative cached entry in memory")="3600";
3054 ::arg().set("max-cache-ttl", "maximum number of seconds to keep a cached entry in memory")="86400";
3055 ::arg().set("packetcache-ttl", "maximum number of seconds to keep a cached entry in packetcache")="3600";
3056 ::arg().set("max-packetcache-entries", "maximum number of entries to keep in the packetcache")="500000";
3057 ::arg().set("packetcache-servfail-ttl", "maximum number of seconds to keep a cached servfail entry in packetcache")="60";
3058 ::arg().set("server-id", "Returned when queried for 'server.id' TXT or NSID, defaults to hostname")="";
3059 ::arg().set("stats-ringbuffer-entries", "maximum number of packets to store statistics for")="10000";
3060 ::arg().set("version-string", "string reported on version.pdns or version.bind")=fullVersionString();
3061 ::arg().set("allow-from", "If set, only allow these comma separated netmasks to recurse")=LOCAL_NETS;
3062 ::arg().set("allow-from-file", "If set, load allowed netmasks from this file")="";
3063 ::arg().set("entropy-source", "If set, read entropy from this file")="/dev/urandom";
3064 ::arg().set("dont-query", "If set, do not query these netmasks for DNS data")=DONT_QUERY;
3065 ::arg().set("max-tcp-per-client", "If set, maximum number of TCP sessions per client (IP address)")="0";
3066 ::arg().set("max-tcp-queries-per-connection", "If set, maximum number of TCP queries in a TCP connection")="0";
3067 ::arg().set("spoof-nearmiss-max", "If non-zero, assume spoofing after this many near misses")="20";
3068 ::arg().set("single-socket", "If set, only use a single socket for outgoing queries")="off";
3069 ::arg().set("auth-zones", "Zones for which we have authoritative data, comma separated domain=file pairs ")="";
3070 ::arg().set("lua-config-file", "More powerful configuration options")="";
3071
3072 ::arg().set("forward-zones", "Zones for which we forward queries, comma separated domain=ip pairs")="";
3073 ::arg().set("forward-zones-recurse", "Zones for which we forward queries with recursion bit, comma separated domain=ip pairs")="";
3074 ::arg().set("forward-zones-file", "File with (+)domain=ip pairs for forwarding")="";
3075 ::arg().set("export-etc-hosts", "If we should serve up contents from /etc/hosts")="off";
3076 ::arg().set("export-etc-hosts-search-suffix", "Also serve up the contents of /etc/hosts with this suffix")="";
3077 ::arg().set("etc-hosts-file", "Path to 'hosts' file")="/etc/hosts";
3078 ::arg().set("serve-rfc1918", "If we should be authoritative for RFC 1918 private IP space")="";
3079 ::arg().set("lua-dns-script", "Filename containing an optional 'lua' script that will be used to modify dns answers")="";
3080 ::arg().set("latency-statistic-size","Number of latency values to calculate the qa-latency average")="10000";
3081 ::arg().setSwitch( "disable-packetcache", "Disable packetcache" )= "no";
3082 ::arg().set("ecs-ipv4-bits", "Number of bits of IPv4 address to pass for EDNS Client Subnet")="24";
3083 ::arg().set("ecs-ipv6-bits", "Number of bits of IPv6 address to pass for EDNS Client Subnet")="56";
3084 ::arg().set("edns-subnet-whitelist", "List of netmasks and domains that we should enable EDNS subnet for")="";
3085 ::arg().setSwitch( "pdns-distributes-queries", "If PowerDNS itself should distribute queries over threads")="";
3086 ::arg().setSwitch( "root-nx-trust", "If set, believe that an NXDOMAIN from the root means the TLD does not exist")="yes";
3087 ::arg().setSwitch( "any-to-tcp","Answer ANY queries with tc=1, shunting to TCP" )="no";
3088 ::arg().setSwitch( "lowercase-outgoing","Force outgoing questions to lowercase")="no";
3089 ::arg().set("udp-truncation-threshold", "Maximum UDP response size before we truncate")="1680";
3090 ::arg().set("edns-outgoing-bufsize", "Outgoing EDNS buffer size")="1680";
3091 ::arg().set("minimum-ttl-override", "Set under adverse conditions, a minimum TTL")="0";
3092 ::arg().set("max-qperq", "Maximum outgoing queries per query")="50";
3093 ::arg().set("max-total-msec", "Maximum total wall-clock time per query in milliseconds, 0 for unlimited")="7000";
3094 ::arg().set("max-recursion-depth", "Maximum number of internal recursion calls per query, 0 for unlimited")="40";
3095
3096 ::arg().set("include-dir","Include *.conf files from this directory")="";
3097 ::arg().set("security-poll-suffix","Domain name from which to query security update notifications")="secpoll.powerdns.com.";
3098
3099 ::arg().setSwitch("reuseport","Enable SO_REUSEPORT allowing multiple recursors processes to listen to 1 address")="no";
3100
3101 ::arg().setCmd("help","Provide a helpful message");
3102 ::arg().setCmd("version","Print version string");
3103 ::arg().setCmd("config","Output blank configuration");
3104 L.toConsole(Logger::Info);
3105 ::arg().laxParse(argc,argv); // do a lax parse
3106
3107 string configname=::arg()["config-dir"]+"/recursor.conf";
3108 if(::arg()["config-name"]!="") {
3109 configname=::arg()["config-dir"]+"/recursor-"+::arg()["config-name"]+".conf";
3110 s_programname+="-"+::arg()["config-name"];
3111 }
3112 cleanSlashes(configname);
3113
3114 if(::arg().mustDo("config")) {
3115 cout<<::arg().configstring()<<endl;
3116 exit(0);
3117 }
3118
3119 if(!::arg().file(configname.c_str()))
3120 L<<Logger::Warning<<"Unable to parse configuration file '"<<configname<<"'"<<endl;
3121
3122 ::arg().parse(argc,argv);
3123
3124 if( !::arg()["chroot"].empty() && !::arg()["api-config-dir"].empty() && !::arg().mustDo("api-readonly") ) {
3125 L<<Logger::Error<<"Using chroot and a writable API is not possible"<<endl;
3126 exit(EXIT_FAILURE);
3127 }
3128
3129 if (::arg()["socket-dir"].empty()) {
3130 if (::arg()["chroot"].empty())
3131 ::arg().set("socket-dir") = LOCALSTATEDIR;
3132 else
3133 ::arg().set("socket-dir") = "/";
3134 }
3135
3136 ::arg().set("delegation-only")=toLower(::arg()["delegation-only"]);
3137
3138 if(::arg().asNum("threads")==1)
3139 ::arg().set("pdns-distributes-queries")="no";
3140
3141 if(::arg().mustDo("help")) {
3142 cout<<"syntax:"<<endl<<endl;
3143 cout<<::arg().helpstring(::arg()["help"])<<endl;
3144 exit(0);
3145 }
3146 if(::arg().mustDo("version")) {
3147 showProductVersion();
3148 showBuildConfiguration();
3149 exit(0);
3150 }
3151
3152 Logger::Urgency logUrgency = (Logger::Urgency)::arg().asNum("loglevel");
3153
3154 if (logUrgency < Logger::Error)
3155 logUrgency = Logger::Error;
3156 if(!g_quiet && logUrgency < Logger::Info) { // Logger::Info=6, Logger::Debug=7
3157 logUrgency = Logger::Info; // if you do --quiet=no, you need Info to also see the query log
3158 }
3159 L.setLoglevel(logUrgency);
3160 L.toConsole(logUrgency);
3161
3162 serviceMain(argc, argv);
3163 }
3164 catch(PDNSException &ae) {
3165 L<<Logger::Error<<"Exception: "<<ae.reason<<endl;
3166 ret=EXIT_FAILURE;
3167 }
3168 catch(std::exception &e) {
3169 L<<Logger::Error<<"STL Exception: "<<e.what()<<endl;
3170 ret=EXIT_FAILURE;
3171 }
3172 catch(...) {
3173 L<<Logger::Error<<"any other exception in main: "<<endl;
3174 ret=EXIT_FAILURE;
3175 }
3176
3177 return ret;
3178 }
3179
3180 int getRootNS(void) {
3181 SyncRes sr(g_now);
3182 sr.setDoEDNS0(true);
3183 sr.setNoCache();
3184 sr.d_doDNSSEC = (g_dnssecmode != DNSSECMode::Off);
3185
3186 vector<DNSRecord> ret;
3187 int res=-1;
3188 try {
3189 res=sr.beginResolve(g_rootdnsname, QType(QType::NS), 1, ret);
3190 if (g_dnssecmode != DNSSECMode::Off && g_dnssecmode != DNSSECMode::ProcessNoValidate) {
3191 ResolveContext ctx;
3192 auto state = validateRecords(ctx, ret);
3193 if (state == Bogus)
3194 throw PDNSException("Got Bogus validation result for .|NS");
3195 }
3196 return res;
3197 }
3198 catch(PDNSException& e)
3199 {
3200 L<<Logger::Error<<"Failed to update . records, got an exception: "<<e.reason<<endl;
3201 }
3202
3203 catch(std::exception& e)
3204 {
3205 L<<Logger::Error<<"Failed to update . records, got an exception: "<<e.what()<<endl;
3206 }
3207
3208 catch(...)
3209 {
3210 L<<Logger::Error<<"Failed to update . records, got an exception"<<endl;
3211 }
3212 if(!res) {
3213 L<<Logger::Notice<<"Refreshed . records"<<endl;
3214 }
3215 else
3216 L<<Logger::Error<<"Failed to update . records, RCODE="<<res<<endl;
3217 return res;
3218 }