]> git.ipfire.org Git - thirdparty/pdns.git/blob - pdns/pdns_recursor.cc
Edit configname to include the 'config-name' argument
[thirdparty/pdns.git] / pdns / pdns_recursor.cc
1 /*
2 * This file is part of PowerDNS or dnsdist.
3 * Copyright -- PowerDNS.COM B.V. and its contributors
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of version 2 of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * In addition, for the avoidance of any doubt, permission is granted to
10 * link this program with OpenSSL and to (re)distribute the binaries
11 * produced as the result of such linking.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21 */
22 #ifdef HAVE_CONFIG_H
23 #include "config.h"
24 #endif
25
26 #include <netdb.h>
27 #include <sys/stat.h>
28 #include <unistd.h>
29
30 #include "ws-recursor.hh"
31 #include <pthread.h>
32 #include "recpacketcache.hh"
33 #include "utility.hh"
34 #include "dns_random.hh"
35 #ifdef HAVE_LIBSODIUM
36 #include <sodium.h>
37 #endif
38 #include "opensslsigners.hh"
39 #include <iostream>
40 #include <errno.h>
41 #include <boost/static_assert.hpp>
42 #include <map>
43 #include <set>
44 #include "recursor_cache.hh"
45 #include "cachecleaner.hh"
46 #include <stdio.h>
47 #include <signal.h>
48 #include <stdlib.h>
49 #include "misc.hh"
50 #include "mtasker.hh"
51 #include <utility>
52 #include "arguments.hh"
53 #include "syncres.hh"
54 #include <fcntl.h>
55 #include <fstream>
56 #include "sortlist.hh"
57 #include "sstuff.hh"
58 #include <boost/tuple/tuple.hpp>
59 #include <boost/tuple/tuple_comparison.hpp>
60 #include <boost/shared_array.hpp>
61 #include <boost/function.hpp>
62 #include <boost/algorithm/string.hpp>
63 #ifdef MALLOC_TRACE
64 #include "malloctrace.hh"
65 #endif
66 #include <netinet/tcp.h>
67 #include "dnsparser.hh"
68 #include "dnswriter.hh"
69 #include "dnsrecords.hh"
70 #include "zoneparser-tng.hh"
71 #include "rec_channel.hh"
72 #include "logger.hh"
73 #include "iputils.hh"
74 #include "mplexer.hh"
75 #include "config.h"
76 #include "lua-recursor4.hh"
77 #include "version.hh"
78 #include "responsestats.hh"
79 #include "secpoll-recursor.hh"
80 #include "dnsname.hh"
81 #include "filterpo.hh"
82 #include "rpzloader.hh"
83 #include "validate-recursor.hh"
84 #include "rec-lua-conf.hh"
85 #include "ednsoptions.hh"
86 #include "gettime.hh"
87
88 #include "rec-protobuf.hh"
89 #include "rec-snmp.hh"
90
91 #ifdef HAVE_SYSTEMD
92 #include <systemd/sd-daemon.h>
93 #endif
94
95 #include "namespaces.hh"
96
97 typedef map<ComboAddress, uint32_t, ComboAddress::addressOnlyLessThan> tcpClientCounts_t;
98
99 static thread_local std::shared_ptr<RecursorLua4> t_pdl;
100 static thread_local unsigned int t_id;
101 static thread_local std::shared_ptr<Regex> t_traceRegex;
102 static thread_local std::unique_ptr<tcpClientCounts_t> t_tcpClientCounts;
103
104 thread_local std::unique_ptr<MT_t> MT; // the big MTasker
105 thread_local std::unique_ptr<MemRecursorCache> t_RC;
106 thread_local std::unique_ptr<RecursorPacketCache> t_packetCache;
107 thread_local FDMultiplexer* t_fdm{nullptr};
108 thread_local std::unique_ptr<addrringbuf_t> t_remotes, t_servfailremotes, t_largeanswerremotes;
109 thread_local std::unique_ptr<boost::circular_buffer<pair<DNSName, uint16_t> > > t_queryring, t_servfailqueryring;
110 thread_local std::shared_ptr<NetmaskGroup> t_allowFrom;
111 #ifdef HAVE_PROTOBUF
112 thread_local std::unique_ptr<boost::uuids::random_generator> t_uuidGenerator;
113 #endif
114 __thread struct timeval g_now; // timestamp, updated (too) frequently
115
116 // for communicating with our threads
117 struct ThreadPipeSet
118 {
119 int writeToThread;
120 int readToThread;
121 int writeFromThread;
122 int readFromThread;
123 };
124
125 typedef vector<int> tcpListenSockets_t;
126 typedef map<int, ComboAddress> listenSocketsAddresses_t; // is shared across all threads right now
127 typedef vector<pair<int, function< void(int, any&) > > > deferredAdd_t;
128
129 static const ComboAddress g_local4("0.0.0.0"), g_local6("::");
130 static vector<ThreadPipeSet> g_pipes; // effectively readonly after startup
131 static tcpListenSockets_t g_tcpListenSockets; // shared across threads, but this is fine, never written to from a thread. All threads listen on all sockets
132 static listenSocketsAddresses_t g_listenSocketsAddresses; // is shared across all threads right now
133 static std::unordered_map<unsigned int, deferredAdd_t> deferredAdds;
134 static set<int> g_fromtosockets; // listen sockets that use 'sendfromto()' mechanism
135 static vector<ComboAddress> g_localQueryAddresses4, g_localQueryAddresses6;
136 static AtomicCounter counter;
137 static std::shared_ptr<SyncRes::domainmap_t> g_initialDomainMap; // new threads needs this to be setup
138 static std::shared_ptr<NetmaskGroup> g_initialAllowFrom; // new thread needs to be setup with this
139 static size_t g_tcpMaxQueriesPerConn;
140 static uint64_t g_latencyStatSize;
141 static uint32_t g_disthashseed;
142 static unsigned int g_maxTCPPerClient;
143 static unsigned int g_networkTimeoutMsec;
144 static unsigned int g_maxMThreads;
145 static unsigned int g_numWorkerThreads;
146 static int g_tcpTimeout;
147 static uint16_t g_udpTruncationThreshold;
148 static std::atomic<bool> statsWanted;
149 static std::atomic<bool> g_quiet;
150 static bool g_logCommonErrors;
151 static bool g_anyToTcp;
152 static bool g_weDistributeQueries; // if true, only 1 thread listens on the incoming query sockets
153 static bool g_reusePort{false};
154 static bool g_useOneSocketPerThread;
155 static bool g_gettagNeedsEDNSOptions{false};
156 static time_t g_statisticsInterval;
157 static bool g_useIncomingECS;
158 std::atomic<uint32_t> g_maxCacheEntries, g_maxPacketCacheEntries;
159
160 RecursorControlChannel s_rcc; // only active in thread 0
161 RecursorStats g_stats;
162 string s_programname="pdns_recursor";
163 string s_pidfname;
164 bool g_lowercaseOutgoing;
165 unsigned int g_numThreads;
166 uint16_t g_outgoingEDNSBufsize;
167 bool g_logRPZChanges{false};
168
169 #define LOCAL_NETS "127.0.0.0/8, 10.0.0.0/8, 100.64.0.0/10, 169.254.0.0/16, 192.168.0.0/16, 172.16.0.0/12, ::1/128, fc00::/7, fe80::/10"
170 // Bad Nets taken from both:
171 // http://www.iana.org/assignments/iana-ipv4-special-registry/iana-ipv4-special-registry.xhtml
172 // and
173 // http://www.iana.org/assignments/iana-ipv6-special-registry/iana-ipv6-special-registry.xhtml
174 // where such a network may not be considered a valid destination
175 #define BAD_NETS "0.0.0.0/8, 192.0.0.0/24, 192.0.2.0/24, 198.51.100.0/24, 203.0.113.0/24, 240.0.0.0/4, ::/96, ::ffff:0:0/96, 100::/64, 2001:db8::/32"
176 #define DONT_QUERY LOCAL_NETS ", " BAD_NETS
177
178 //! used to send information to a newborn mthread
179 struct DNSComboWriter {
180 DNSComboWriter(const char* data, uint16_t len, const struct timeval& now) : d_mdp(true, data, len), d_now(now),
181 d_tcp(false), d_socket(-1)
182 {}
183 MOADNSParser d_mdp;
184 void setRemote(const ComboAddress* sa)
185 {
186 d_remote=*sa;
187 }
188
189 void setLocal(const ComboAddress& sa)
190 {
191 d_local=sa;
192 }
193
194
195 void setSocket(int sock)
196 {
197 d_socket=sock;
198 }
199
200 string getRemote() const
201 {
202 return d_remote.toString();
203 }
204
205 struct timeval d_now;
206 ComboAddress d_remote, d_local;
207 #ifdef HAVE_PROTOBUF
208 boost::uuids::uuid d_uuid;
209 string d_requestorId;
210 string d_deviceId;
211 #endif
212 EDNSSubnetOpts d_ednssubnet;
213 bool d_ecsFound{false};
214 bool d_ecsParsed{false};
215 bool d_tcp;
216 int d_socket;
217 unsigned int d_tag{0};
218 uint32_t d_qhash{0};
219 string d_query;
220 shared_ptr<TCPConnection> d_tcpConnection;
221 vector<pair<uint16_t, string> > d_ednsOpts;
222 std::vector<std::string> d_policyTags;
223 LuaContext::LuaObject d_data;
224 };
225
226 MT_t* getMT()
227 {
228 return MT ? MT.get() : nullptr;
229 }
230
231 ArgvMap &arg()
232 {
233 static ArgvMap theArg;
234 return theArg;
235 }
236
237 unsigned int getRecursorThreadId()
238 {
239 return t_id;
240 }
241
242 int getMTaskerTID()
243 {
244 return MT->getTid();
245 }
246
247 static void handleTCPClientWritable(int fd, FDMultiplexer::funcparam_t& var);
248
249 // -1 is error, 0 is timeout, 1 is success
250 int asendtcp(const string& data, Socket* sock)
251 {
252 PacketID pident;
253 pident.sock=sock;
254 pident.outMSG=data;
255
256 t_fdm->addWriteFD(sock->getHandle(), handleTCPClientWritable, pident);
257 string packet;
258
259 int ret=MT->waitEvent(pident, &packet, g_networkTimeoutMsec);
260
261 if(!ret || ret==-1) { // timeout
262 t_fdm->removeWriteFD(sock->getHandle());
263 }
264 else if(packet.size() !=data.size()) { // main loop tells us what it sent out, or empty in case of an error
265 return -1;
266 }
267 return ret;
268 }
269
270 static void handleTCPClientReadable(int fd, FDMultiplexer::funcparam_t& var);
271
272 // -1 is error, 0 is timeout, 1 is success
273 int arecvtcp(string& data, size_t len, Socket* sock, bool incompleteOkay)
274 {
275 data.clear();
276 PacketID pident;
277 pident.sock=sock;
278 pident.inNeeded=len;
279 pident.inIncompleteOkay=incompleteOkay;
280 t_fdm->addReadFD(sock->getHandle(), handleTCPClientReadable, pident);
281
282 int ret=MT->waitEvent(pident,&data, g_networkTimeoutMsec);
283 if(!ret || ret==-1) { // timeout
284 t_fdm->removeReadFD(sock->getHandle());
285 }
286 else if(data.empty()) {// error, EOF or other
287 return -1;
288 }
289
290 return ret;
291 }
292
293 static void handleGenUDPQueryResponse(int fd, FDMultiplexer::funcparam_t& var)
294 {
295 PacketID pident=*any_cast<PacketID>(&var);
296 char resp[512];
297 ssize_t ret=recv(fd, resp, sizeof(resp), 0);
298 t_fdm->removeReadFD(fd);
299 if(ret >= 0) {
300 string data(resp, (size_t) ret);
301 MT->sendEvent(pident, &data);
302 }
303 else {
304 string empty;
305 MT->sendEvent(pident, &empty);
306 // cerr<<"Had some kind of error: "<<ret<<", "<<strerror(errno)<<endl;
307 }
308 }
309 string GenUDPQueryResponse(const ComboAddress& dest, const string& query)
310 {
311 Socket s(dest.sin4.sin_family, SOCK_DGRAM);
312 s.setNonBlocking();
313 ComboAddress local = getQueryLocalAddress(dest.sin4.sin_family, 0);
314
315 s.bind(local);
316 s.connect(dest);
317 s.send(query);
318
319 PacketID pident;
320 pident.sock=&s;
321 pident.type=0;
322 t_fdm->addReadFD(s.getHandle(), handleGenUDPQueryResponse, pident);
323
324 string data;
325
326 int ret=MT->waitEvent(pident,&data, g_networkTimeoutMsec);
327
328 if(!ret || ret==-1) { // timeout
329 t_fdm->removeReadFD(s.getHandle());
330 }
331 else if(data.empty()) {// error, EOF or other
332 // we could special case this
333 return data;
334 }
335 return data;
336 }
337
338 //! pick a random query local address
339 ComboAddress getQueryLocalAddress(int family, uint16_t port)
340 {
341 ComboAddress ret;
342 if(family==AF_INET) {
343 if(g_localQueryAddresses4.empty())
344 ret = g_local4;
345 else
346 ret = g_localQueryAddresses4[dns_random(g_localQueryAddresses4.size())];
347 ret.sin4.sin_port = htons(port);
348 }
349 else {
350 if(g_localQueryAddresses6.empty())
351 ret = g_local6;
352 else
353 ret = g_localQueryAddresses6[dns_random(g_localQueryAddresses6.size())];
354
355 ret.sin6.sin6_port = htons(port);
356 }
357 return ret;
358 }
359
360 static void handleUDPServerResponse(int fd, FDMultiplexer::funcparam_t&);
361
362 static void setSocketBuffer(int fd, int optname, uint32_t size)
363 {
364 uint32_t psize=0;
365 socklen_t len=sizeof(psize);
366
367 if(!getsockopt(fd, SOL_SOCKET, optname, (char*)&psize, &len) && psize > size) {
368 L<<Logger::Error<<"Not decreasing socket buffer size from "<<psize<<" to "<<size<<endl;
369 return;
370 }
371
372 if (setsockopt(fd, SOL_SOCKET, optname, (char*)&size, sizeof(size)) < 0 )
373 L<<Logger::Error<<"Unable to raise socket buffer size to "<<size<<": "<<strerror(errno)<<endl;
374 }
375
376
377 static void setSocketReceiveBuffer(int fd, uint32_t size)
378 {
379 setSocketBuffer(fd, SO_RCVBUF, size);
380 }
381
382 static void setSocketSendBuffer(int fd, uint32_t size)
383 {
384 setSocketBuffer(fd, SO_SNDBUF, size);
385 }
386
387
388 // you can ask this class for a UDP socket to send a query from
389 // this socket is not yours, don't even think about deleting it
390 // but after you call 'returnSocket' on it, don't assume anything anymore
391 class UDPClientSocks
392 {
393 unsigned int d_numsocks;
394 public:
395 UDPClientSocks() : d_numsocks(0)
396 {
397 }
398
399 typedef set<int> socks_t;
400 socks_t d_socks;
401
402 // returning -2 means: temporary OS error (ie, out of files), -1 means error related to remote
403 int getSocket(const ComboAddress& toaddr, int* fd)
404 {
405 *fd=makeClientSocket(toaddr.sin4.sin_family);
406 if(*fd < 0) // temporary error - receive exception otherwise
407 return -2;
408
409 if(connect(*fd, (struct sockaddr*)(&toaddr), toaddr.getSocklen()) < 0) {
410 int err = errno;
411 // returnSocket(*fd);
412 try {
413 closesocket(*fd);
414 }
415 catch(const PDNSException& e) {
416 L<<Logger::Error<<"Error closing UDP socket after connect() failed: "<<e.reason<<endl;
417 }
418
419 if(err==ENETUNREACH) // Seth "My Interfaces Are Like A Yo Yo" Arnold special
420 return -2;
421 return -1;
422 }
423
424 d_socks.insert(*fd);
425 d_numsocks++;
426 return 0;
427 }
428
429 void returnSocket(int fd)
430 {
431 socks_t::iterator i=d_socks.find(fd);
432 if(i==d_socks.end()) {
433 throw PDNSException("Trying to return a socket (fd="+std::to_string(fd)+") not in the pool");
434 }
435 returnSocketLocked(i);
436 }
437
438 // return a socket to the pool, or simply erase it
439 void returnSocketLocked(socks_t::iterator& i)
440 {
441 if(i==d_socks.end()) {
442 throw PDNSException("Trying to return a socket not in the pool");
443 }
444 try {
445 t_fdm->removeReadFD(*i);
446 }
447 catch(FDMultiplexerException& e) {
448 // we sometimes return a socket that has not yet been assigned to t_fdm
449 }
450 try {
451 closesocket(*i);
452 }
453 catch(const PDNSException& e) {
454 L<<Logger::Error<<"Error closing returned UDP socket: "<<e.reason<<endl;
455 }
456
457 d_socks.erase(i++);
458 --d_numsocks;
459 }
460
461 // returns -1 for errors which might go away, throws for ones that won't
462 static int makeClientSocket(int family)
463 {
464 int ret=socket(family, SOCK_DGRAM, 0 ); // turns out that setting CLO_EXEC and NONBLOCK from here is not a performance win on Linux (oddly enough)
465
466 if(ret < 0 && errno==EMFILE) // this is not a catastrophic error
467 return ret;
468
469 if(ret<0)
470 throw PDNSException("Making a socket for resolver (family = "+std::to_string(family)+"): "+stringerror());
471
472 // setCloseOnExec(ret); // we're not going to exec
473
474 int tries=10;
475 ComboAddress sin;
476 while(--tries) {
477 uint16_t port;
478
479 if(tries==1) // fall back to kernel 'random'
480 port = 0;
481 else
482 port = 1025 + dns_random(64510);
483
484 sin=getQueryLocalAddress(family, port); // does htons for us
485
486 if (::bind(ret, (struct sockaddr *)&sin, sin.getSocklen()) >= 0)
487 break;
488 }
489 if(!tries)
490 throw PDNSException("Resolver binding to local query client socket on "+sin.toString()+": "+stringerror());
491
492 setNonBlocking(ret);
493 return ret;
494 }
495 };
496
497 static thread_local std::unique_ptr<UDPClientSocks> t_udpclientsocks;
498
499 /* these two functions are used by LWRes */
500 // -2 is OS error, -1 is error that depends on the remote, > 0 is success
501 int asendto(const char *data, size_t len, int flags,
502 const ComboAddress& toaddr, uint16_t id, const DNSName& domain, uint16_t qtype, int* fd)
503 {
504
505 PacketID pident;
506 pident.domain = domain;
507 pident.remote = toaddr;
508 pident.type = qtype;
509
510 // see if there is an existing outstanding request we can chain on to, using partial equivalence function
511 pair<MT_t::waiters_t::iterator, MT_t::waiters_t::iterator> chain=MT->d_waiters.equal_range(pident, PacketIDBirthdayCompare());
512
513 for(; chain.first != chain.second; chain.first++) {
514 if(chain.first->key.fd > -1) { // don't chain onto existing chained waiter!
515 /*
516 cerr<<"Orig: "<<pident.domain<<", "<<pident.remote.toString()<<", id="<<id<<endl;
517 cerr<<"Had hit: "<< chain.first->key.domain<<", "<<chain.first->key.remote.toString()<<", id="<<chain.first->key.id
518 <<", count="<<chain.first->key.chain.size()<<", origfd: "<<chain.first->key.fd<<endl;
519 */
520 chain.first->key.chain.insert(id); // we can chain
521 *fd=-1; // gets used in waitEvent / sendEvent later on
522 return 1;
523 }
524 }
525
526 int ret=t_udpclientsocks->getSocket(toaddr, fd);
527 if(ret < 0)
528 return ret;
529
530 pident.fd=*fd;
531 pident.id=id;
532
533 t_fdm->addReadFD(*fd, handleUDPServerResponse, pident);
534 ret = send(*fd, data, len, 0);
535
536 int tmp = errno;
537
538 if(ret < 0)
539 t_udpclientsocks->returnSocket(*fd);
540
541 errno = tmp; // this is for logging purposes only
542 return ret;
543 }
544
545 // -1 is error, 0 is timeout, 1 is success
546 int arecvfrom(char *data, size_t len, int flags, const ComboAddress& fromaddr, size_t *d_len,
547 uint16_t id, const DNSName& domain, uint16_t qtype, int fd, struct timeval* now)
548 {
549 static optional<unsigned int> nearMissLimit;
550 if(!nearMissLimit)
551 nearMissLimit=::arg().asNum("spoof-nearmiss-max");
552
553 PacketID pident;
554 pident.fd=fd;
555 pident.id=id;
556 pident.domain=domain;
557 pident.type = qtype;
558 pident.remote=fromaddr;
559
560 string packet;
561 int ret=MT->waitEvent(pident, &packet, g_networkTimeoutMsec, now);
562
563 if(ret > 0) {
564 if(packet.empty()) // means "error"
565 return -1;
566
567 *d_len=packet.size();
568 memcpy(data,packet.c_str(),min(len,*d_len));
569 if(*nearMissLimit && pident.nearMisses > *nearMissLimit) {
570 L<<Logger::Error<<"Too many ("<<pident.nearMisses<<" > "<<*nearMissLimit<<") bogus answers for '"<<domain<<"' from "<<fromaddr.toString()<<", assuming spoof attempt."<<endl;
571 g_stats.spoofCount++;
572 return -1;
573 }
574 }
575 else {
576 if(fd >= 0)
577 t_udpclientsocks->returnSocket(fd);
578 }
579 return ret;
580 }
581
582 static void writePid(void)
583 {
584 if(!::arg().mustDo("write-pid"))
585 return;
586 ofstream of(s_pidfname.c_str(), std::ios_base::app);
587 if(of)
588 of<< Utility::getpid() <<endl;
589 else
590 L<<Logger::Error<<"Writing pid for "<<Utility::getpid()<<" to "<<s_pidfname<<" failed: "<<strerror(errno)<<endl;
591 }
592
593 TCPConnection::TCPConnection(int fd, const ComboAddress& addr) : d_remote(addr), d_fd(fd)
594 {
595 ++s_currentConnections;
596 (*t_tcpClientCounts)[d_remote]++;
597 }
598
599 TCPConnection::~TCPConnection()
600 {
601 try {
602 if(closesocket(d_fd) < 0)
603 L<<Logger::Error<<"Error closing socket for TCPConnection"<<endl;
604 }
605 catch(const PDNSException& e) {
606 L<<Logger::Error<<"Error closing TCPConnection socket: "<<e.reason<<endl;
607 }
608
609 if(t_tcpClientCounts->count(d_remote) && !(*t_tcpClientCounts)[d_remote]--)
610 t_tcpClientCounts->erase(d_remote);
611 --s_currentConnections;
612 }
613
614 AtomicCounter TCPConnection::s_currentConnections;
615
616 static void handleRunningTCPQuestion(int fd, FDMultiplexer::funcparam_t& var);
617
618 // the idea is, only do things that depend on the *response* here. Incoming accounting is on incoming.
619 static void updateResponseStats(int res, const ComboAddress& remote, unsigned int packetsize, const DNSName* query, uint16_t qtype)
620 {
621 if(packetsize > 1000 && t_largeanswerremotes)
622 t_largeanswerremotes->push_back(remote);
623 switch(res) {
624 case RCode::ServFail:
625 if(t_servfailremotes) {
626 t_servfailremotes->push_back(remote);
627 if(query && t_servfailqueryring) // packet cache
628 t_servfailqueryring->push_back(make_pair(*query, qtype));
629 }
630 g_stats.servFails++;
631 break;
632 case RCode::NXDomain:
633 g_stats.nxDomains++;
634 break;
635 case RCode::NoError:
636 g_stats.noErrors++;
637 break;
638 }
639 }
640
641 static string makeLoginfo(DNSComboWriter* dc)
642 try
643 {
644 return "("+dc->d_mdp.d_qname.toLogString()+"/"+DNSRecordContent::NumberToType(dc->d_mdp.d_qtype)+" from "+(dc->d_remote.toString())+")";
645 }
646 catch(...)
647 {
648 return "Exception making error message for exception";
649 }
650
651 #ifdef HAVE_PROTOBUF
652 static void protobufLogQuery(const std::shared_ptr<RemoteLogger>& logger, uint8_t maskV4, uint8_t maskV6, const boost::uuids::uuid& uniqueId, const ComboAddress& remote, const ComboAddress& local, const Netmask& ednssubnet, bool tcp, uint16_t id, size_t len, const DNSName& qname, uint16_t qtype, uint16_t qclass, const std::vector<std::string>& policyTags, const std::string& requestorId, const std::string& deviceId)
653 {
654 Netmask requestorNM(remote, remote.sin4.sin_family == AF_INET ? maskV4 : maskV6);
655 const ComboAddress& requestor = requestorNM.getMaskedNetwork();
656 RecProtoBufMessage message(DNSProtoBufMessage::Query, uniqueId, &requestor, &local, qname, qtype, qclass, id, tcp, len);
657 message.setEDNSSubnet(ednssubnet, ednssubnet.isIpv4() ? maskV4 : maskV6);
658 message.setRequestorId(requestorId);
659 message.setDeviceId(deviceId);
660
661 if (!policyTags.empty()) {
662 message.setPolicyTags(policyTags);
663 }
664
665 // cerr <<message.toDebugString()<<endl;
666 std::string str;
667 message.serialize(str);
668 logger->queueData(str);
669 }
670
671 static void protobufLogResponse(const std::shared_ptr<RemoteLogger>& logger, const RecProtoBufMessage& message)
672 {
673 // cerr <<message.toDebugString()<<endl;
674 std::string str;
675 message.serialize(str);
676 logger->queueData(str);
677 }
678 #endif
679
680 /**
681 * Chases the CNAME provided by the PolicyCustom RPZ policy.
682 *
683 * @param spoofed: The DNSRecord that was created by the policy, should already be added to ret
684 * @param qtype: The QType of the original query
685 * @param sr: A SyncRes
686 * @param res: An integer that will contain the RCODE of the lookup we do
687 * @param ret: A vector of DNSRecords where the result of the CNAME chase should be appended to
688 */
689 static void handleRPZCustom(const DNSRecord& spoofed, const QType& qtype, SyncRes& sr, int& res, vector<DNSRecord>& ret)
690 {
691 if (spoofed.d_type == QType::CNAME) {
692 bool oldWantsRPZ = sr.getWantsRPZ();
693 sr.setWantsRPZ(false);
694 vector<DNSRecord> ans;
695 res = sr.beginResolve(DNSName(spoofed.d_content->getZoneRepresentation()), qtype, 1, ans);
696 for (const auto& rec : ans) {
697 if(rec.d_place == DNSResourceRecord::ANSWER) {
698 ret.push_back(rec);
699 }
700 }
701 // Reset the RPZ state of the SyncRes
702 sr.setWantsRPZ(oldWantsRPZ);
703 }
704 }
705
706 static bool addRecordToPacket(DNSPacketWriter& pw, const DNSRecord& rec, uint32_t& minTTL, const uint16_t maxAnswerSize)
707 {
708 pw.startRecord(rec.d_name, rec.d_type, rec.d_ttl, rec.d_class, rec.d_place);
709
710 if(rec.d_type != QType::OPT) // their TTL ain't real
711 minTTL = min(minTTL, rec.d_ttl);
712
713 rec.d_content->toPacket(pw);
714 if(pw.size() > static_cast<size_t>(maxAnswerSize)) {
715 pw.rollback();
716 if(rec.d_place != DNSResourceRecord::ADDITIONAL) {
717 pw.getHeader()->tc=1;
718 pw.truncate();
719 }
720 return false;
721 }
722
723 return true;
724 }
725
726 static void startDoResolve(void *p)
727 {
728 DNSComboWriter* dc=(DNSComboWriter *)p;
729 try {
730 if (t_queryring)
731 t_queryring->push_back(make_pair(dc->d_mdp.d_qname, dc->d_mdp.d_qtype));
732
733 uint16_t maxanswersize = dc->d_tcp ? 65535 : min(static_cast<uint16_t>(512), g_udpTruncationThreshold);
734 EDNSOpts edo;
735 bool haveEDNS=false;
736 if(getEDNSOpts(dc->d_mdp, &edo)) {
737 if(!dc->d_tcp) {
738 /* rfc6891 6.2.3:
739 "Values lower than 512 MUST be treated as equal to 512."
740 */
741 maxanswersize = min(static_cast<uint16_t>(edo.d_packetsize >= 512 ? edo.d_packetsize : 512), g_udpTruncationThreshold);
742 }
743 dc->d_ednsOpts = edo.d_options;
744 haveEDNS=true;
745
746 if (g_useIncomingECS && !dc->d_ecsParsed) {
747 for (const auto& o : edo.d_options) {
748 if (o.first == EDNSOptionCode::ECS) {
749 dc->d_ecsFound = getEDNSSubnetOptsFromString(o.second, &dc->d_ednssubnet);
750 break;
751 }
752 }
753 }
754 }
755 /* perhaps there was no EDNS or no ECS but by now we looked */
756 dc->d_ecsParsed = true;
757 vector<DNSRecord> ret;
758 vector<uint8_t> packet;
759
760 auto luaconfsLocal = g_luaconfs.getLocal();
761 // Used to tell syncres later on if we should apply NSDNAME and NSIP RPZ triggers for this query
762 bool wantsRPZ(true);
763 RecProtoBufMessage pbMessage(RecProtoBufMessage::Response);
764 #ifdef HAVE_PROTOBUF
765 if (luaconfsLocal->protobufServer) {
766 Netmask requestorNM(dc->d_remote, dc->d_remote.sin4.sin_family == AF_INET ? luaconfsLocal->protobufMaskV4 : luaconfsLocal->protobufMaskV6);
767 const ComboAddress& requestor = requestorNM.getMaskedNetwork();
768 pbMessage.update(dc->d_uuid, &requestor, &dc->d_local, dc->d_tcp, dc->d_mdp.d_header.id);
769 pbMessage.setEDNSSubnet(dc->d_ednssubnet.source, dc->d_ednssubnet.source.isIpv4() ? luaconfsLocal->protobufMaskV4 : luaconfsLocal->protobufMaskV6);
770 pbMessage.setQuestion(dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_mdp.d_qclass);
771 }
772 #endif /* HAVE_PROTOBUF */
773
774 DNSPacketWriter pw(packet, dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_mdp.d_qclass);
775
776 pw.getHeader()->aa=0;
777 pw.getHeader()->ra=1;
778 pw.getHeader()->qr=1;
779 pw.getHeader()->tc=0;
780 pw.getHeader()->id=dc->d_mdp.d_header.id;
781 pw.getHeader()->rd=dc->d_mdp.d_header.rd;
782 pw.getHeader()->cd=dc->d_mdp.d_header.cd;
783
784 uint32_t minTTL=std::numeric_limits<uint32_t>::max();
785
786 SyncRes sr(dc->d_now);
787
788 bool DNSSECOK=false;
789 if(t_pdl) {
790 sr.setLuaEngine(t_pdl);
791 }
792 sr.d_requestor=dc->d_remote; // ECS needs this too
793 if(g_dnssecmode != DNSSECMode::Off) {
794 sr.setDoDNSSEC(true);
795
796 // Does the requestor want DNSSEC records?
797 if(edo.d_Z & EDNSOpts::DNSSECOK) {
798 DNSSECOK=true;
799 g_stats.dnssecQueries++;
800 }
801 } else {
802 // Ignore the client-set CD flag
803 pw.getHeader()->cd=0;
804 }
805 sr.setDNSSECValidationRequested(g_dnssecmode == DNSSECMode::ValidateAll || g_dnssecmode==DNSSECMode::ValidateForLog || ((dc->d_mdp.d_header.ad || DNSSECOK) && g_dnssecmode==DNSSECMode::Process));
806
807 #ifdef HAVE_PROTOBUF
808 sr.setInitialRequestId(dc->d_uuid);
809 #endif
810
811 if (g_useIncomingECS) {
812 sr.setIncomingECSFound(dc->d_ecsFound);
813 if (dc->d_ecsFound) {
814 sr.setIncomingECS(dc->d_ednssubnet);
815 }
816 }
817
818 bool tracedQuery=false; // we could consider letting Lua know about this too
819 bool variableAnswer = false;
820 bool shouldNotValidate = false;
821
822 /* preresolve expects res (dq.rcode) to be set to RCode::NoError by default */
823 int res = RCode::NoError;
824 DNSFilterEngine::Policy appliedPolicy;
825 DNSRecord spoofed;
826 RecursorLua4::DNSQuestion dq(dc->d_remote, dc->d_local, dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_tcp, variableAnswer, wantsRPZ);
827 dq.ednsFlags = &edo.d_Z;
828 dq.ednsOptions = &dc->d_ednsOpts;
829 dq.tag = dc->d_tag;
830 dq.discardedPolicies = &sr.d_discardedPolicies;
831 dq.policyTags = &dc->d_policyTags;
832 dq.appliedPolicy = &appliedPolicy;
833 dq.currentRecords = &ret;
834 dq.dh = &dc->d_mdp.d_header;
835 dq.data = dc->d_data;
836 #ifdef HAVE_PROTOBUF
837 dq.requestorId = dc->d_requestorId;
838 dq.deviceId = dc->d_deviceId;
839 #endif
840
841 if(dc->d_mdp.d_qtype==QType::ANY && !dc->d_tcp && g_anyToTcp) {
842 pw.getHeader()->tc = 1;
843 res = 0;
844 variableAnswer = true;
845 goto sendit;
846 }
847
848 if(t_traceRegex && t_traceRegex->match(dc->d_mdp.d_qname.toString())) {
849 sr.setLogMode(SyncRes::Store);
850 tracedQuery=true;
851 }
852
853
854 if(!g_quiet || tracedQuery) {
855 L<<Logger::Warning<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] " << (dc->d_tcp ? "TCP " : "") << "question for '"<<dc->d_mdp.d_qname<<"|"
856 <<DNSRecordContent::NumberToType(dc->d_mdp.d_qtype)<<"' from "<<dc->getRemote();
857 if(!dc->d_ednssubnet.source.empty()) {
858 L<<" (ecs "<<dc->d_ednssubnet.source.toString()<<")";
859 }
860 L<<endl;
861 }
862
863 sr.setId(MT->getTid());
864 if(!dc->d_mdp.d_header.rd)
865 sr.setCacheOnly();
866
867 if (t_pdl) {
868 t_pdl->prerpz(dq, res);
869 }
870
871 // Check if the query has a policy attached to it
872 if (wantsRPZ) {
873 appliedPolicy = luaconfsLocal->dfe.getQueryPolicy(dc->d_mdp.d_qname, dc->d_remote, sr.d_discardedPolicies);
874 }
875
876 // if there is a RecursorLua active, and it 'took' the query in preResolve, we don't launch beginResolve
877 if(!t_pdl || !t_pdl->preresolve(dq, res)) {
878
879 sr.setWantsRPZ(wantsRPZ);
880 if(wantsRPZ) {
881 switch(appliedPolicy.d_kind) {
882 case DNSFilterEngine::PolicyKind::NoAction:
883 break;
884 case DNSFilterEngine::PolicyKind::Drop:
885 g_stats.policyDrops++;
886 g_stats.policyResults[appliedPolicy.d_kind]++;
887 delete dc;
888 dc=0;
889 return;
890 case DNSFilterEngine::PolicyKind::NXDOMAIN:
891 g_stats.policyResults[appliedPolicy.d_kind]++;
892 res=RCode::NXDomain;
893 goto haveAnswer;
894 case DNSFilterEngine::PolicyKind::NODATA:
895 g_stats.policyResults[appliedPolicy.d_kind]++;
896 res=RCode::NoError;
897 goto haveAnswer;
898 case DNSFilterEngine::PolicyKind::Custom:
899 g_stats.policyResults[appliedPolicy.d_kind]++;
900 res=RCode::NoError;
901 spoofed=appliedPolicy.getCustomRecord(dc->d_mdp.d_qname);
902 ret.push_back(spoofed);
903 handleRPZCustom(spoofed, QType(dc->d_mdp.d_qtype), sr, res, ret);
904 goto haveAnswer;
905 case DNSFilterEngine::PolicyKind::Truncate:
906 if(!dc->d_tcp) {
907 g_stats.policyResults[appliedPolicy.d_kind]++;
908 res=RCode::NoError;
909 pw.getHeader()->tc=1;
910 goto haveAnswer;
911 }
912 break;
913 }
914 }
915
916 // Query got not handled for QNAME Policy reasons, now actually go out to find an answer
917 try {
918 res = sr.beginResolve(dc->d_mdp.d_qname, QType(dc->d_mdp.d_qtype), dc->d_mdp.d_qclass, ret);
919 shouldNotValidate = sr.wasOutOfBand();
920 }
921 catch(ImmediateServFailException &e) {
922 if(g_logCommonErrors)
923 L<<Logger::Notice<<"Sending SERVFAIL to "<<dc->getRemote()<<" during resolve of '"<<dc->d_mdp.d_qname<<"' because: "<<e.reason<<endl;
924 res = RCode::ServFail;
925 }
926
927 dq.validationState = sr.getValidationState();
928
929 // During lookup, an NSDNAME or NSIP trigger was hit in RPZ
930 if (res == -2) { // XXX This block should be macro'd, it is repeated post-resolve.
931 appliedPolicy = sr.d_appliedPolicy;
932 g_stats.policyResults[appliedPolicy.d_kind]++;
933 switch(appliedPolicy.d_kind) {
934 case DNSFilterEngine::PolicyKind::NoAction: // This can never happen
935 throw PDNSException("NoAction policy returned while a NSDNAME or NSIP trigger was hit");
936 case DNSFilterEngine::PolicyKind::Drop:
937 g_stats.policyDrops++;
938 delete dc;
939 dc=0;
940 return;
941 case DNSFilterEngine::PolicyKind::NXDOMAIN:
942 ret.clear();
943 res=RCode::NXDomain;
944 goto haveAnswer;
945
946 case DNSFilterEngine::PolicyKind::NODATA:
947 ret.clear();
948 res=RCode::NoError;
949 goto haveAnswer;
950
951 case DNSFilterEngine::PolicyKind::Truncate:
952 if(!dc->d_tcp) {
953 ret.clear();
954 res=RCode::NoError;
955 pw.getHeader()->tc=1;
956 goto haveAnswer;
957 }
958 break;
959
960 case DNSFilterEngine::PolicyKind::Custom:
961 ret.clear();
962 res=RCode::NoError;
963 spoofed=appliedPolicy.getCustomRecord(dc->d_mdp.d_qname);
964 ret.push_back(spoofed);
965 handleRPZCustom(spoofed, QType(dc->d_mdp.d_qtype), sr, res, ret);
966 goto haveAnswer;
967 }
968 }
969
970 if (wantsRPZ) {
971 appliedPolicy = luaconfsLocal->dfe.getPostPolicy(ret, sr.d_discardedPolicies);
972 }
973
974 if(t_pdl) {
975 if(res == RCode::NoError) {
976 auto i=ret.cbegin();
977 for(; i!= ret.cend(); ++i)
978 if(i->d_type == dc->d_mdp.d_qtype && i->d_place == DNSResourceRecord::ANSWER)
979 break;
980 if(i == ret.cend() && t_pdl->nodata(dq, res))
981 shouldNotValidate = true;
982
983 }
984 else if(res == RCode::NXDomain && t_pdl->nxdomain(dq, res))
985 shouldNotValidate = true;
986
987 if(t_pdl->postresolve(dq, res))
988 shouldNotValidate = true;
989 }
990
991 if (wantsRPZ) { //XXX This block is repeated, see above
992 g_stats.policyResults[appliedPolicy.d_kind]++;
993 switch(appliedPolicy.d_kind) {
994 case DNSFilterEngine::PolicyKind::NoAction:
995 break;
996 case DNSFilterEngine::PolicyKind::Drop:
997 g_stats.policyDrops++;
998 delete dc;
999 dc=0;
1000 return;
1001 case DNSFilterEngine::PolicyKind::NXDOMAIN:
1002 ret.clear();
1003 res=RCode::NXDomain;
1004 goto haveAnswer;
1005
1006 case DNSFilterEngine::PolicyKind::NODATA:
1007 ret.clear();
1008 res=RCode::NoError;
1009 goto haveAnswer;
1010
1011 case DNSFilterEngine::PolicyKind::Truncate:
1012 if(!dc->d_tcp) {
1013 ret.clear();
1014 res=RCode::NoError;
1015 pw.getHeader()->tc=1;
1016 goto haveAnswer;
1017 }
1018 break;
1019
1020 case DNSFilterEngine::PolicyKind::Custom:
1021 ret.clear();
1022 res=RCode::NoError;
1023 spoofed=appliedPolicy.getCustomRecord(dc->d_mdp.d_qname);
1024 ret.push_back(spoofed);
1025 handleRPZCustom(spoofed, QType(dc->d_mdp.d_qtype), sr, res, ret);
1026 goto haveAnswer;
1027 }
1028 }
1029 }
1030 haveAnswer:;
1031 if(res == PolicyDecision::DROP) {
1032 g_stats.policyDrops++;
1033 delete dc;
1034 dc=0;
1035 return;
1036 }
1037 if(tracedQuery || res == -1 || res == RCode::ServFail || pw.getHeader()->rcode == RCode::ServFail)
1038 {
1039 string trace(sr.getTrace());
1040 if(!trace.empty()) {
1041 vector<string> lines;
1042 boost::split(lines, trace, boost::is_any_of("\n"));
1043 for(const string& line : lines) {
1044 if(!line.empty())
1045 L<<Logger::Warning<< line << endl;
1046 }
1047 }
1048 }
1049
1050 if(res == -1) {
1051 pw.getHeader()->rcode=RCode::ServFail;
1052 // no commit here, because no record
1053 g_stats.servFails++;
1054 }
1055 else {
1056 pw.getHeader()->rcode=res;
1057
1058 // Does the validation mode or query demand validation?
1059 if(!shouldNotValidate && sr.isDNSSECValidationRequested()) {
1060 try {
1061 if(sr.doLog()) {
1062 L<<Logger::Warning<<"Starting validation of answer to "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" for "<<dc->d_remote.toStringWithPort()<<endl;
1063 }
1064
1065 auto state = sr.getValidationState();
1066
1067 if(state == Secure) {
1068 if(sr.doLog()) {
1069 L<<Logger::Warning<<"Answer to "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" for "<<dc->d_remote.toStringWithPort()<<" validates correctly"<<endl;
1070 }
1071
1072 // Is the query source interested in the value of the ad-bit?
1073 if (dc->d_mdp.d_header.ad || DNSSECOK)
1074 pw.getHeader()->ad=1;
1075 }
1076 else if(state == Insecure) {
1077 if(sr.doLog()) {
1078 L<<Logger::Warning<<"Answer to "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" for "<<dc->d_remote.toStringWithPort()<<" validates as Insecure"<<endl;
1079 }
1080
1081 pw.getHeader()->ad=0;
1082 }
1083 else if(state == Bogus) {
1084 if(g_dnssecLogBogus || sr.doLog() || g_dnssecmode == DNSSECMode::ValidateForLog) {
1085 L<<Logger::Warning<<"Answer to "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" for "<<dc->d_remote.toStringWithPort()<<" validates as Bogus"<<endl;
1086 }
1087
1088 // Does the query or validation mode sending out a SERVFAIL on validation errors?
1089 if(!pw.getHeader()->cd && (g_dnssecmode == DNSSECMode::ValidateAll || dc->d_mdp.d_header.ad || DNSSECOK)) {
1090 if(sr.doLog()) {
1091 L<<Logger::Warning<<"Sending out SERVFAIL for "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" because recursor or query demands it for Bogus results"<<endl;
1092 }
1093
1094 pw.getHeader()->rcode=RCode::ServFail;
1095 goto sendit;
1096 } else {
1097 if(sr.doLog()) {
1098 L<<Logger::Warning<<"Not sending out SERVFAIL for "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" Bogus validation since neither config nor query demands this"<<endl;
1099 }
1100 }
1101 }
1102 }
1103 catch(ImmediateServFailException &e) {
1104 if(g_logCommonErrors)
1105 L<<Logger::Notice<<"Sending SERVFAIL to "<<dc->getRemote()<<" during validation of '"<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<"' because: "<<e.reason<<endl;
1106 pw.getHeader()->rcode=RCode::ServFail;
1107 goto sendit;
1108 }
1109 }
1110
1111 if(ret.size()) {
1112 orderAndShuffle(ret);
1113 if(auto sl = luaconfsLocal->sortlist.getOrderCmp(dc->d_remote)) {
1114 stable_sort(ret.begin(), ret.end(), *sl);
1115 variableAnswer=true;
1116 }
1117 }
1118
1119 bool needCommit = false;
1120 for(auto i=ret.cbegin(); i!=ret.cend(); ++i) {
1121 if( ! DNSSECOK &&
1122 ( i->d_type == QType::NSEC3 ||
1123 (
1124 ( i->d_type == QType::RRSIG || i->d_type==QType::NSEC ) &&
1125 (
1126 ( dc->d_mdp.d_qtype != i->d_type && dc->d_mdp.d_qtype != QType::ANY ) ||
1127 i->d_place != DNSResourceRecord::ANSWER
1128 )
1129 )
1130 )
1131 ) {
1132 continue;
1133 }
1134
1135 if (!addRecordToPacket(pw, *i, minTTL, maxanswersize)) {
1136 needCommit = false;
1137 break;
1138 }
1139 needCommit = true;
1140
1141 #ifdef HAVE_PROTOBUF
1142 if(luaconfsLocal->protobufServer && (i->d_type == QType::A || i->d_type == QType::AAAA || i->d_type == QType::CNAME)) {
1143 pbMessage.addRR(*i);
1144 }
1145 #endif
1146 }
1147 if(needCommit)
1148 pw.commit();
1149 }
1150 sendit:;
1151
1152 if (haveEDNS) {
1153 /* we try to add the EDNS OPT RR even for truncated answers,
1154 as rfc6891 states:
1155 "The minimal response MUST be the DNS header, question section, and an
1156 OPT record. This MUST also occur when a truncated response (using
1157 the DNS header's TC bit) is returned."
1158 */
1159 if (addRecordToPacket(pw, makeOpt(edo.d_packetsize, 0, edo.d_Z), minTTL, maxanswersize)) {
1160 pw.commit();
1161 }
1162 }
1163
1164 g_rs.submitResponse(dc->d_mdp.d_qtype, packet.size(), !dc->d_tcp);
1165 updateResponseStats(res, dc->d_remote, packet.size(), &dc->d_mdp.d_qname, dc->d_mdp.d_qtype);
1166 #ifdef HAVE_PROTOBUF
1167 if (luaconfsLocal->protobufServer && (!luaconfsLocal->protobufTaggedOnly || (appliedPolicy.d_name && !appliedPolicy.d_name->empty()) || !dc->d_policyTags.empty())) {
1168 pbMessage.setBytes(packet.size());
1169 pbMessage.setResponseCode(pw.getHeader()->rcode);
1170 if (appliedPolicy.d_name) {
1171 pbMessage.setAppliedPolicy(*appliedPolicy.d_name);
1172 pbMessage.setAppliedPolicyType(appliedPolicy.d_type);
1173 }
1174 pbMessage.setPolicyTags(dc->d_policyTags);
1175 pbMessage.setQueryTime(dc->d_now.tv_sec, dc->d_now.tv_usec);
1176 pbMessage.setRequestorId(dq.requestorId);
1177 pbMessage.setDeviceId(dq.deviceId);
1178 protobufLogResponse(luaconfsLocal->protobufServer, pbMessage);
1179 }
1180 #endif
1181 if(!dc->d_tcp) {
1182 struct msghdr msgh;
1183 struct iovec iov;
1184 char cbuf[256];
1185 fillMSGHdr(&msgh, &iov, cbuf, 0, (char*)&*packet.begin(), packet.size(), &dc->d_remote);
1186 msgh.msg_control=NULL;
1187
1188 if(g_fromtosockets.count(dc->d_socket)) {
1189 addCMsgSrcAddr(&msgh, cbuf, &dc->d_local, 0);
1190 }
1191 if(sendmsg(dc->d_socket, &msgh, 0) < 0 && g_logCommonErrors)
1192 L<<Logger::Warning<<"Sending UDP reply to client "<<dc->d_remote.toStringWithPort()<<" failed with: "<<strerror(errno)<<endl;
1193 if(!SyncRes::s_nopacketcache && !variableAnswer && !sr.wasVariable() ) {
1194 t_packetCache->insertResponsePacket(dc->d_tag, dc->d_qhash, dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_mdp.d_qclass,
1195 string((const char*)&*packet.begin(), packet.size()),
1196 g_now.tv_sec,
1197 pw.getHeader()->rcode == RCode::ServFail ? SyncRes::s_packetcacheservfailttl :
1198 min(minTTL,SyncRes::s_packetcachettl),
1199 &pbMessage);
1200 }
1201 // else cerr<<"Not putting in packet cache: "<<sr.wasVariable()<<endl;
1202 }
1203 else {
1204 char buf[2];
1205 buf[0]=packet.size()/256;
1206 buf[1]=packet.size()%256;
1207
1208 Utility::iovec iov[2];
1209
1210 iov[0].iov_base=(void*)buf; iov[0].iov_len=2;
1211 iov[1].iov_base=(void*)&*packet.begin(); iov[1].iov_len = packet.size();
1212
1213 int wret=Utility::writev(dc->d_socket, iov, 2);
1214 bool hadError=true;
1215
1216 if(wret == 0)
1217 L<<Logger::Error<<"EOF writing TCP answer to "<<dc->getRemote()<<endl;
1218 else if(wret < 0 )
1219 L<<Logger::Error<<"Error writing TCP answer to "<<dc->getRemote()<<": "<< strerror(errno) <<endl;
1220 else if((unsigned int)wret != 2 + packet.size())
1221 L<<Logger::Error<<"Oops, partial answer sent to "<<dc->getRemote()<<" for "<<dc->d_mdp.d_qname<<" (size="<< (2 + packet.size()) <<", sent "<<wret<<")"<<endl;
1222 else
1223 hadError=false;
1224
1225 // update tcp connection status, either by closing or moving to 'BYTE0'
1226
1227 if(hadError) {
1228 // no need to remove us from FDM, we weren't there
1229 dc->d_socket = -1;
1230 }
1231 else {
1232 dc->d_tcpConnection->queriesCount++;
1233 if (g_tcpMaxQueriesPerConn && dc->d_tcpConnection->queriesCount >= g_tcpMaxQueriesPerConn) {
1234 dc->d_socket = -1;
1235 }
1236 else {
1237 dc->d_tcpConnection->state=TCPConnection::BYTE0;
1238 Utility::gettimeofday(&g_now, 0); // needs to be updated
1239 t_fdm->addReadFD(dc->d_socket, handleRunningTCPQuestion, dc->d_tcpConnection);
1240 t_fdm->setReadTTD(dc->d_socket, g_now, g_tcpTimeout);
1241 }
1242 }
1243 }
1244 float spent=makeFloat(sr.getNow()-dc->d_now);
1245 if(!g_quiet) {
1246 L<<Logger::Error<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] answer to "<<(dc->d_mdp.d_header.rd?"":"non-rd ")<<"question '"<<dc->d_mdp.d_qname<<"|"<<DNSRecordContent::NumberToType(dc->d_mdp.d_qtype);
1247 L<<"': "<<ntohs(pw.getHeader()->ancount)<<" answers, "<<ntohs(pw.getHeader()->arcount)<<" additional, took "<<sr.d_outqueries<<" packets, "<<
1248 sr.d_totUsec/1000.0<<" netw ms, "<< spent*1000.0<<" tot ms, "<<
1249 sr.d_throttledqueries<<" throttled, "<<sr.d_timeouts<<" timeouts, "<<sr.d_tcpoutqueries<<" tcp connections, rcode="<< res;
1250
1251 if(!shouldNotValidate && sr.isDNSSECValidationRequested()) {
1252 L<< ", dnssec="<<vStates[sr.getValidationState()];
1253 }
1254
1255 L<<endl;
1256
1257 }
1258
1259 sr.d_outqueries ? t_RC->cacheMisses++ : t_RC->cacheHits++;
1260
1261 if(spent < 0.001)
1262 g_stats.answers0_1++;
1263 else if(spent < 0.010)
1264 g_stats.answers1_10++;
1265 else if(spent < 0.1)
1266 g_stats.answers10_100++;
1267 else if(spent < 1.0)
1268 g_stats.answers100_1000++;
1269 else
1270 g_stats.answersSlow++;
1271
1272 uint64_t newLat=(uint64_t)(spent*1000000);
1273 newLat = min(newLat,(uint64_t)(((uint64_t) g_networkTimeoutMsec)*1000)); // outliers of several minutes exist..
1274 g_stats.avgLatencyUsec=(1-1.0/g_latencyStatSize)*g_stats.avgLatencyUsec + (float)newLat/g_latencyStatSize;
1275 // no worries, we do this for packet cache hits elsewhere
1276
1277 auto ourtime = 1000.0*spent-sr.d_totUsec/1000.0; // in msec
1278 if(ourtime < 1)
1279 g_stats.ourtime0_1++;
1280 else if(ourtime < 2)
1281 g_stats.ourtime1_2++;
1282 else if(ourtime < 4)
1283 g_stats.ourtime2_4++;
1284 else if(ourtime < 8)
1285 g_stats.ourtime4_8++;
1286 else if(ourtime < 16)
1287 g_stats.ourtime8_16++;
1288 else if(ourtime < 32)
1289 g_stats.ourtime16_32++;
1290 else {
1291 // cerr<<"SLOW: "<<ourtime<<"ms -> "<<dc->d_mdp.d_qname<<"|"<<DNSRecordContent::NumberToType(dc->d_mdp.d_qtype)<<endl;
1292 g_stats.ourtimeSlow++;
1293 }
1294 if(ourtime >= 0.0) {
1295 newLat=ourtime*1000; // usec
1296 g_stats.avgLatencyOursUsec=(1-1.0/g_latencyStatSize)*g_stats.avgLatencyOursUsec + (float)newLat/g_latencyStatSize;
1297 }
1298 // cout<<dc->d_mdp.d_qname<<"\t"<<MT->getUsec()<<"\t"<<sr.d_outqueries<<endl;
1299 delete dc;
1300 dc=0;
1301 }
1302 catch(PDNSException &ae) {
1303 L<<Logger::Error<<"startDoResolve problem "<<makeLoginfo(dc)<<": "<<ae.reason<<endl;
1304 delete dc;
1305 }
1306 catch(MOADNSException& e) {
1307 L<<Logger::Error<<"DNS parser error "<<makeLoginfo(dc) <<": "<<dc->d_mdp.d_qname<<", "<<e.what()<<endl;
1308 delete dc;
1309 }
1310 catch(std::exception& e) {
1311 L<<Logger::Error<<"STL error "<< makeLoginfo(dc)<<": "<<e.what();
1312
1313 // Luawrapper nests the exception from Lua, so we unnest it here
1314 try {
1315 std::rethrow_if_nested(e);
1316 } catch(const std::exception& ne) {
1317 L<<". Extra info: "<<ne.what();
1318 } catch(...) {}
1319
1320 L<<endl;
1321 delete dc;
1322 }
1323 catch(...) {
1324 L<<Logger::Error<<"Any other exception in a resolver context "<< makeLoginfo(dc) <<endl;
1325 }
1326
1327 g_stats.maxMThreadStackUsage = max(MT->getMaxStackUsage(), g_stats.maxMThreadStackUsage);
1328 }
1329
1330 static void makeControlChannelSocket(int processNum=-1)
1331 {
1332 string sockname=::arg()["socket-dir"]+"/"+s_programname;
1333 if(processNum >= 0)
1334 sockname += "."+std::to_string(processNum);
1335 sockname+=".controlsocket";
1336 s_rcc.listen(sockname);
1337
1338 int sockowner = -1;
1339 int sockgroup = -1;
1340
1341 if (!::arg().isEmpty("socket-group"))
1342 sockgroup=::arg().asGid("socket-group");
1343 if (!::arg().isEmpty("socket-owner"))
1344 sockowner=::arg().asUid("socket-owner");
1345
1346 if (sockgroup > -1 || sockowner > -1) {
1347 if(chown(sockname.c_str(), sockowner, sockgroup) < 0) {
1348 unixDie("Failed to chown control socket");
1349 }
1350 }
1351
1352 // do mode change if socket-mode is given
1353 if(!::arg().isEmpty("socket-mode")) {
1354 mode_t sockmode=::arg().asMode("socket-mode");
1355 if(chmod(sockname.c_str(), sockmode) < 0) {
1356 unixDie("Failed to chmod control socket");
1357 }
1358 }
1359 }
1360
1361 static bool getQNameAndSubnet(const std::string& question, DNSName* dnsname, uint16_t* qtype, uint16_t* qclass, EDNSSubnetOpts* ednssubnet, std::map<uint16_t, EDNSOptionView>* options)
1362 {
1363 bool found = false;
1364 const struct dnsheader* dh = (struct dnsheader*)question.c_str();
1365 size_t questionLen = question.length();
1366 unsigned int consumed=0;
1367 *dnsname=DNSName(question.c_str(), questionLen, sizeof(dnsheader), false, qtype, qclass, &consumed);
1368
1369 size_t pos= sizeof(dnsheader)+consumed+4;
1370 /* at least OPT root label (1), type (2), class (2) and ttl (4) + OPT RR rdlen (2)
1371 = 11 */
1372 if(ntohs(dh->arcount) == 1 && questionLen > pos + 11) { // this code can extract one (1) EDNS Subnet option
1373 /* OPT root label (1) followed by type (2) */
1374 if(question.at(pos)==0 && question.at(pos+1)==0 && question.at(pos+2)==QType::OPT) {
1375 if (!options) {
1376 char* ecsStart = nullptr;
1377 size_t ecsLen = 0;
1378 int res = getEDNSOption((char*)question.c_str()+pos+9, questionLen - pos - 9, EDNSOptionCode::ECS, &ecsStart, &ecsLen);
1379 if (res == 0 && ecsLen > 4) {
1380 EDNSSubnetOpts eso;
1381 if(getEDNSSubnetOptsFromString(ecsStart + 4, ecsLen - 4, &eso)) {
1382 *ednssubnet=eso;
1383 found = true;
1384 }
1385 }
1386 }
1387 else {
1388 int res = getEDNSOptions((char*)question.c_str()+pos+9, questionLen - pos - 9, *options);
1389 if (res == 0) {
1390 const auto& it = options->find(EDNSOptionCode::ECS);
1391 if (it != options->end() && it->second.content != nullptr && it->second.size > 0) {
1392 EDNSSubnetOpts eso;
1393 if(getEDNSSubnetOptsFromString(it->second.content, it->second.size, &eso)) {
1394 *ednssubnet=eso;
1395 found = true;
1396 }
1397 }
1398 }
1399 }
1400 }
1401 }
1402 return found;
1403 }
1404
1405 static void handleRunningTCPQuestion(int fd, FDMultiplexer::funcparam_t& var)
1406 {
1407 shared_ptr<TCPConnection> conn=any_cast<shared_ptr<TCPConnection> >(var);
1408
1409 if(conn->state==TCPConnection::BYTE0) {
1410 ssize_t bytes=recv(conn->getFD(), conn->data, 2, 0);
1411 if(bytes==1)
1412 conn->state=TCPConnection::BYTE1;
1413 if(bytes==2) {
1414 conn->qlen=(((unsigned char)conn->data[0]) << 8)+ (unsigned char)conn->data[1];
1415 conn->bytesread=0;
1416 conn->state=TCPConnection::GETQUESTION;
1417 }
1418 if(!bytes || bytes < 0) {
1419 t_fdm->removeReadFD(fd);
1420 return;
1421 }
1422 }
1423 else if(conn->state==TCPConnection::BYTE1) {
1424 ssize_t bytes=recv(conn->getFD(), conn->data+1, 1, 0);
1425 if(bytes==1) {
1426 conn->state=TCPConnection::GETQUESTION;
1427 conn->qlen=(((unsigned char)conn->data[0]) << 8)+ (unsigned char)conn->data[1];
1428 conn->bytesread=0;
1429 }
1430 if(!bytes || bytes < 0) {
1431 if(g_logCommonErrors)
1432 L<<Logger::Error<<"TCP client "<< conn->d_remote.toString() <<" disconnected after first byte"<<endl;
1433 t_fdm->removeReadFD(fd);
1434 return;
1435 }
1436 }
1437 else if(conn->state==TCPConnection::GETQUESTION) {
1438 ssize_t bytes=recv(conn->getFD(), conn->data + conn->bytesread, conn->qlen - conn->bytesread, 0);
1439 if(!bytes || bytes < 0 || bytes > std::numeric_limits<std::uint16_t>::max()) {
1440 L<<Logger::Error<<"TCP client "<< conn->d_remote.toString() <<" disconnected while reading question body"<<endl;
1441 t_fdm->removeReadFD(fd);
1442 return;
1443 }
1444 conn->bytesread+=(uint16_t)bytes;
1445 if(conn->bytesread==conn->qlen) {
1446 t_fdm->removeReadFD(fd); // should no longer awake ourselves when there is data to read
1447
1448 DNSComboWriter* dc=nullptr;
1449 try {
1450 dc=new DNSComboWriter(conn->data, conn->qlen, g_now);
1451 }
1452 catch(MOADNSException &mde) {
1453 g_stats.clientParseError++;
1454 if(g_logCommonErrors)
1455 L<<Logger::Error<<"Unable to parse packet from TCP client "<< conn->d_remote.toString() <<endl;
1456 return;
1457 }
1458 dc->d_tcpConnection = conn; // carry the torch
1459 dc->setSocket(conn->getFD()); // this is the only time a copy is made of the actual fd
1460 dc->d_tcp=true;
1461 dc->setRemote(&conn->d_remote);
1462 ComboAddress dest;
1463 memset(&dest, 0, sizeof(dest));
1464 dest.sin4.sin_family = conn->d_remote.sin4.sin_family;
1465 socklen_t len = dest.getSocklen();
1466 getsockname(conn->getFD(), (sockaddr*)&dest, &len); // if this fails, we're ok with it
1467 dc->setLocal(dest);
1468 DNSName qname;
1469 uint16_t qtype=0;
1470 uint16_t qclass=0;
1471 bool needECS = false;
1472 string requestorId;
1473 string deviceId;
1474 #ifdef HAVE_PROTOBUF
1475 auto luaconfsLocal = g_luaconfs.getLocal();
1476 if (luaconfsLocal->protobufServer) {
1477 needECS = true;
1478 }
1479 #endif
1480
1481 if(needECS || (t_pdl && t_pdl->d_gettag)) {
1482
1483 try {
1484 std::map<uint16_t, EDNSOptionView> ednsOptions;
1485 dc->d_ecsParsed = true;
1486 dc->d_ecsFound = getQNameAndSubnet(std::string(conn->data, conn->qlen), &qname, &qtype, &qclass, &dc->d_ednssubnet, g_gettagNeedsEDNSOptions ? &ednsOptions : nullptr);
1487
1488 if(t_pdl && t_pdl->d_gettag) {
1489 try {
1490 dc->d_tag = t_pdl->gettag(conn->d_remote, dc->d_ednssubnet.source, dest, qname, qtype, &dc->d_policyTags, dc->d_data, ednsOptions, true, requestorId, deviceId);
1491 }
1492 catch(std::exception& e) {
1493 if(g_logCommonErrors)
1494 L<<Logger::Warning<<"Error parsing a query packet qname='"<<qname<<"' for tag determination, setting tag=0: "<<e.what()<<endl;
1495 }
1496 }
1497 }
1498 catch(std::exception& e)
1499 {
1500 if(g_logCommonErrors)
1501 L<<Logger::Warning<<"Error parsing a query packet for tag determination, setting tag=0: "<<e.what()<<endl;
1502 }
1503 }
1504 #ifdef HAVE_PROTOBUF
1505 if(luaconfsLocal->protobufServer || luaconfsLocal->outgoingProtobufServer) {
1506 dc->d_requestorId = requestorId;
1507 dc->d_deviceId = deviceId;
1508 dc->d_uuid = (*t_uuidGenerator)();
1509 }
1510
1511 if(luaconfsLocal->protobufServer) {
1512 try {
1513 const struct dnsheader* dh = (const struct dnsheader*) conn->data;
1514
1515 if (!luaconfsLocal->protobufTaggedOnly) {
1516 protobufLogQuery(luaconfsLocal->protobufServer, luaconfsLocal->protobufMaskV4, luaconfsLocal->protobufMaskV6, dc->d_uuid, conn->d_remote, dest, dc->d_ednssubnet.source, true, dh->id, conn->qlen, qname, qtype, qclass, dc->d_policyTags, dc->d_requestorId, dc->d_deviceId);
1517 }
1518 }
1519 catch(std::exception& e) {
1520 if(g_logCommonErrors)
1521 L<<Logger::Warning<<"Error parsing a TCP query packet for edns subnet: "<<e.what()<<endl;
1522 }
1523 }
1524 #endif
1525 if(dc->d_mdp.d_header.qr) {
1526 delete dc;
1527 g_stats.ignoredCount++;
1528 L<<Logger::Error<<"Ignoring answer from TCP client "<< conn->d_remote.toString() <<" on server socket!"<<endl;
1529 return;
1530 }
1531 if(dc->d_mdp.d_header.opcode) {
1532 delete dc;
1533 g_stats.ignoredCount++;
1534 L<<Logger::Error<<"Ignoring non-query opcode from TCP client "<< conn->d_remote.toString() <<" on server socket!"<<endl;
1535 return;
1536 }
1537 else {
1538 ++g_stats.qcounter;
1539 ++g_stats.tcpqcounter;
1540 MT->makeThread(startDoResolve, dc); // deletes dc, will set state to BYTE0 again
1541 return;
1542 }
1543 }
1544 }
1545 }
1546
1547 //! Handle new incoming TCP connection
1548 static void handleNewTCPQuestion(int fd, FDMultiplexer::funcparam_t& )
1549 {
1550 ComboAddress addr;
1551 socklen_t addrlen=sizeof(addr);
1552 int newsock=accept(fd, (struct sockaddr*)&addr, &addrlen);
1553 if(newsock>=0) {
1554 if(MT->numProcesses() > g_maxMThreads) {
1555 g_stats.overCapacityDrops++;
1556 try {
1557 closesocket(newsock);
1558 }
1559 catch(const PDNSException& e) {
1560 L<<Logger::Error<<"Error closing TCP socket after an over capacity drop: "<<e.reason<<endl;
1561 }
1562 return;
1563 }
1564
1565 if(t_remotes)
1566 t_remotes->push_back(addr);
1567 if(t_allowFrom && !t_allowFrom->match(&addr)) {
1568 if(!g_quiet)
1569 L<<Logger::Error<<"["<<MT->getTid()<<"] dropping TCP query from "<<addr.toString()<<", address not matched by allow-from"<<endl;
1570
1571 g_stats.unauthorizedTCP++;
1572 try {
1573 closesocket(newsock);
1574 }
1575 catch(const PDNSException& e) {
1576 L<<Logger::Error<<"Error closing TCP socket after an ACL drop: "<<e.reason<<endl;
1577 }
1578 return;
1579 }
1580 if(g_maxTCPPerClient && t_tcpClientCounts->count(addr) && (*t_tcpClientCounts)[addr] >= g_maxTCPPerClient) {
1581 g_stats.tcpClientOverflow++;
1582 try {
1583 closesocket(newsock); // don't call TCPConnection::closeAndCleanup here - did not enter it in the counts yet!
1584 }
1585 catch(const PDNSException& e) {
1586 L<<Logger::Error<<"Error closing TCP socket after an overflow drop: "<<e.reason<<endl;
1587 }
1588 return;
1589 }
1590
1591 setNonBlocking(newsock);
1592 std::shared_ptr<TCPConnection> tc = std::make_shared<TCPConnection>(newsock, addr);
1593 tc->state=TCPConnection::BYTE0;
1594
1595 t_fdm->addReadFD(tc->getFD(), handleRunningTCPQuestion, tc);
1596
1597 struct timeval now;
1598 Utility::gettimeofday(&now, 0);
1599 t_fdm->setReadTTD(tc->getFD(), now, g_tcpTimeout);
1600 }
1601 }
1602
1603 static string* doProcessUDPQuestion(const std::string& question, const ComboAddress& fromaddr, const ComboAddress& destaddr, struct timeval tv, int fd)
1604 {
1605 gettimeofday(&g_now, 0);
1606 struct timeval diff = g_now - tv;
1607 double delta=(diff.tv_sec*1000 + diff.tv_usec/1000.0);
1608
1609 if(tv.tv_sec && delta > 1000.0) {
1610 g_stats.tooOldDrops++;
1611 return 0;
1612 }
1613
1614 ++g_stats.qcounter;
1615 if(fromaddr.sin4.sin_family==AF_INET6)
1616 g_stats.ipv6qcounter++;
1617
1618 string response;
1619 const struct dnsheader* dh = (struct dnsheader*)question.c_str();
1620 unsigned int ctag=0;
1621 uint32_t qhash = 0;
1622 bool needECS = false;
1623 std::vector<std::string> policyTags;
1624 LuaContext::LuaObject data;
1625 string requestorId;
1626 string deviceId;
1627 #ifdef HAVE_PROTOBUF
1628 boost::uuids::uuid uniqueId;
1629 auto luaconfsLocal = g_luaconfs.getLocal();
1630 if (luaconfsLocal->protobufServer) {
1631 uniqueId = (*t_uuidGenerator)();
1632 needECS = true;
1633 } else if (luaconfsLocal->outgoingProtobufServer) {
1634 uniqueId = (*t_uuidGenerator)();
1635 }
1636 #endif
1637 EDNSSubnetOpts ednssubnet;
1638 bool ecsFound = false;
1639 bool ecsParsed = false;
1640 try {
1641 DNSName qname;
1642 uint16_t qtype=0;
1643 uint16_t qclass=0;
1644 uint32_t age;
1645 bool qnameParsed=false;
1646 #ifdef MALLOC_TRACE
1647 /*
1648 static uint64_t last=0;
1649 if(!last)
1650 g_mtracer->clearAllocators();
1651 cout<<g_mtracer->getAllocs()-last<<" "<<g_mtracer->getNumOut()<<" -- BEGIN TRACE"<<endl;
1652 last=g_mtracer->getAllocs();
1653 cout<<g_mtracer->topAllocatorsString()<<endl;
1654 g_mtracer->clearAllocators();
1655 */
1656 #endif
1657
1658 if(needECS || (t_pdl && t_pdl->d_gettag)) {
1659 try {
1660 std::map<uint16_t, EDNSOptionView> ednsOptions;
1661 ecsFound = getQNameAndSubnet(question, &qname, &qtype, &qclass, &ednssubnet, g_gettagNeedsEDNSOptions ? &ednsOptions : nullptr);
1662 qnameParsed = true;
1663 ecsParsed = true;
1664
1665 if(t_pdl && t_pdl->d_gettag) {
1666 try {
1667 ctag=t_pdl->gettag(fromaddr, ednssubnet.source, destaddr, qname, qtype, &policyTags, data, ednsOptions, false, requestorId, deviceId);
1668 }
1669 catch(std::exception& e) {
1670 if(g_logCommonErrors)
1671 L<<Logger::Warning<<"Error parsing a query packet qname='"<<qname<<"' for tag determination, setting tag=0: "<<e.what()<<endl;
1672 }
1673 }
1674 }
1675 catch(std::exception& e)
1676 {
1677 if(g_logCommonErrors)
1678 L<<Logger::Warning<<"Error parsing a query packet for tag determination, setting tag=0: "<<e.what()<<endl;
1679 }
1680 }
1681
1682 bool cacheHit = false;
1683 RecProtoBufMessage pbMessage(DNSProtoBufMessage::DNSProtoBufMessageType::Response);
1684 #ifdef HAVE_PROTOBUF
1685 if(luaconfsLocal->protobufServer) {
1686 if (!luaconfsLocal->protobufTaggedOnly || !policyTags.empty()) {
1687 protobufLogQuery(luaconfsLocal->protobufServer, luaconfsLocal->protobufMaskV4, luaconfsLocal->protobufMaskV6, uniqueId, fromaddr, destaddr, ednssubnet.source, false, dh->id, question.size(), qname, qtype, qclass, policyTags, requestorId, deviceId);
1688 }
1689 }
1690 #endif /* HAVE_PROTOBUF */
1691
1692 if (qnameParsed) {
1693 cacheHit = (!SyncRes::s_nopacketcache && t_packetCache->getResponsePacket(ctag, question, qname, qtype, qclass, g_now.tv_sec, &response, &age, &qhash, &pbMessage));
1694 }
1695 else {
1696 cacheHit = (!SyncRes::s_nopacketcache && t_packetCache->getResponsePacket(ctag, question, g_now.tv_sec, &response, &age, &qhash, &pbMessage));
1697 }
1698
1699 if (cacheHit) {
1700 #ifdef HAVE_PROTOBUF
1701 if(luaconfsLocal->protobufServer && (!luaconfsLocal->protobufTaggedOnly || !pbMessage.getAppliedPolicy().empty() || !pbMessage.getPolicyTags().empty())) {
1702 Netmask requestorNM(fromaddr, fromaddr.sin4.sin_family == AF_INET ? luaconfsLocal->protobufMaskV4 : luaconfsLocal->protobufMaskV6);
1703 const ComboAddress& requestor = requestorNM.getMaskedNetwork();
1704 pbMessage.update(uniqueId, &requestor, &destaddr, false, dh->id);
1705 pbMessage.setEDNSSubnet(ednssubnet.source, ednssubnet.source.isIpv4() ? luaconfsLocal->protobufMaskV4 : luaconfsLocal->protobufMaskV6);
1706 pbMessage.setQueryTime(g_now.tv_sec, g_now.tv_usec);
1707 pbMessage.setRequestorId(requestorId);
1708 pbMessage.setDeviceId(deviceId);
1709 protobufLogResponse(luaconfsLocal->protobufServer, pbMessage);
1710 }
1711 #endif /* HAVE_PROTOBUF */
1712 if(!g_quiet)
1713 L<<Logger::Notice<<t_id<< " question answered from packet cache tag="<<ctag<<" from "<<fromaddr.toString()<<endl;
1714
1715 g_stats.packetCacheHits++;
1716 SyncRes::s_queries++;
1717 ageDNSPacket(response, age);
1718 struct msghdr msgh;
1719 struct iovec iov;
1720 char cbuf[256];
1721 fillMSGHdr(&msgh, &iov, cbuf, 0, (char*)response.c_str(), response.length(), const_cast<ComboAddress*>(&fromaddr));
1722 msgh.msg_control=NULL;
1723
1724 if(g_fromtosockets.count(fd)) {
1725 addCMsgSrcAddr(&msgh, cbuf, &destaddr, 0);
1726 }
1727 if(sendmsg(fd, &msgh, 0) < 0 && g_logCommonErrors)
1728 L<<Logger::Warning<<"Sending UDP reply to client "<<fromaddr.toStringWithPort()<<" failed with: "<<strerror(errno)<<endl;
1729
1730 if(response.length() >= sizeof(struct dnsheader)) {
1731 struct dnsheader tmpdh;
1732 memcpy(&tmpdh, response.c_str(), sizeof(tmpdh));
1733 updateResponseStats(tmpdh.rcode, fromaddr, response.length(), 0, 0);
1734 }
1735 g_stats.avgLatencyUsec=(1-1.0/g_latencyStatSize)*g_stats.avgLatencyUsec + 0.0; // we assume 0 usec
1736 g_stats.avgLatencyOursUsec=(1-1.0/g_latencyStatSize)*g_stats.avgLatencyOursUsec + 0.0; // we assume 0 usec
1737 return 0;
1738 }
1739 }
1740 catch(std::exception& e) {
1741 L<<Logger::Error<<"Error processing or aging answer packet: "<<e.what()<<endl;
1742 return 0;
1743 }
1744
1745 if(t_pdl) {
1746 if(t_pdl->ipfilter(fromaddr, destaddr, *dh)) {
1747 if(!g_quiet)
1748 L<<Logger::Notice<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] DROPPED question from "<<fromaddr.toStringWithPort()<<" based on policy"<<endl;
1749 g_stats.policyDrops++;
1750 return 0;
1751 }
1752 }
1753
1754 if(MT->numProcesses() > g_maxMThreads) {
1755 if(!g_quiet)
1756 L<<Logger::Notice<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] DROPPED question from "<<fromaddr.toStringWithPort()<<", over capacity"<<endl;
1757
1758 g_stats.overCapacityDrops++;
1759 return 0;
1760 }
1761
1762 DNSComboWriter* dc = new DNSComboWriter(question.c_str(), question.size(), g_now);
1763 dc->setSocket(fd);
1764 dc->d_tag=ctag;
1765 dc->d_qhash=qhash;
1766 dc->d_query = question;
1767 dc->setRemote(&fromaddr);
1768 dc->setLocal(destaddr);
1769 dc->d_tcp=false;
1770 dc->d_policyTags = policyTags;
1771 dc->d_data = data;
1772 dc->d_ecsFound = ecsFound;
1773 dc->d_ecsParsed = ecsParsed;
1774 dc->d_ednssubnet = ednssubnet;
1775 #ifdef HAVE_PROTOBUF
1776 if (luaconfsLocal->protobufServer || luaconfsLocal->outgoingProtobufServer) {
1777 dc->d_uuid = uniqueId;
1778 }
1779 dc->d_requestorId = requestorId;
1780 dc->d_deviceId = deviceId;
1781 #endif
1782
1783 MT->makeThread(startDoResolve, (void*) dc); // deletes dc
1784 return 0;
1785 }
1786
1787
1788 static void handleNewUDPQuestion(int fd, FDMultiplexer::funcparam_t& var)
1789 {
1790 ssize_t len;
1791 char data[1500];
1792 ComboAddress fromaddr;
1793 struct msghdr msgh;
1794 struct iovec iov;
1795 char cbuf[256];
1796 bool firstQuery = true;
1797
1798 fromaddr.sin6.sin6_family=AF_INET6; // this makes sure fromaddr is big enough
1799 fillMSGHdr(&msgh, &iov, cbuf, sizeof(cbuf), data, sizeof(data), &fromaddr);
1800
1801 for(;;)
1802 if((len=recvmsg(fd, &msgh, 0)) >= 0) {
1803
1804 firstQuery = false;
1805
1806 if(t_remotes)
1807 t_remotes->push_back(fromaddr);
1808
1809 if(t_allowFrom && !t_allowFrom->match(&fromaddr)) {
1810 if(!g_quiet)
1811 L<<Logger::Error<<"["<<MT->getTid()<<"] dropping UDP query from "<<fromaddr.toString()<<", address not matched by allow-from"<<endl;
1812
1813 g_stats.unauthorizedUDP++;
1814 return;
1815 }
1816 BOOST_STATIC_ASSERT(offsetof(sockaddr_in, sin_port) == offsetof(sockaddr_in6, sin6_port));
1817 if(!fromaddr.sin4.sin_port) { // also works for IPv6
1818 if(!g_quiet)
1819 L<<Logger::Error<<"["<<MT->getTid()<<"] dropping UDP query from "<<fromaddr.toStringWithPort()<<", can't deal with port 0"<<endl;
1820
1821 g_stats.clientParseError++; // not quite the best place to put it, but needs to go somewhere
1822 return;
1823 }
1824 try {
1825 dnsheader* dh=(dnsheader*)data;
1826
1827 if(dh->qr) {
1828 g_stats.ignoredCount++;
1829 if(g_logCommonErrors)
1830 L<<Logger::Error<<"Ignoring answer from "<<fromaddr.toString()<<" on server socket!"<<endl;
1831 }
1832 else if(dh->opcode) {
1833 g_stats.ignoredCount++;
1834 if(g_logCommonErrors)
1835 L<<Logger::Error<<"Ignoring non-query opcode "<<dh->opcode<<" from "<<fromaddr.toString()<<" on server socket!"<<endl;
1836 }
1837 else {
1838 string question(data, (size_t)len);
1839 struct timeval tv={0,0};
1840 HarvestTimestamp(&msgh, &tv);
1841 ComboAddress dest;
1842 memset(&dest, 0, sizeof(dest)); // this makes sure we ignore this address if not returned by recvmsg above
1843 auto loc = rplookup(g_listenSocketsAddresses, fd);
1844 if(HarvestDestinationAddress(&msgh, &dest)) {
1845 // but.. need to get port too
1846 if(loc)
1847 dest.sin4.sin_port = loc->sin4.sin_port;
1848 }
1849 else {
1850 if(loc) {
1851 dest = *loc;
1852 }
1853 else {
1854 dest.sin4.sin_family = fromaddr.sin4.sin_family;
1855 socklen_t slen = dest.getSocklen();
1856 getsockname(fd, (sockaddr*)&dest, &slen); // if this fails, we're ok with it
1857 }
1858 }
1859 if(g_weDistributeQueries)
1860 distributeAsyncFunction(question, boost::bind(doProcessUDPQuestion, question, fromaddr, dest, tv, fd));
1861 else
1862 doProcessUDPQuestion(question, fromaddr, dest, tv, fd);
1863 }
1864 }
1865 catch(MOADNSException& mde) {
1866 g_stats.clientParseError++;
1867 if(g_logCommonErrors)
1868 L<<Logger::Error<<"Unable to parse packet from remote UDP client "<<fromaddr.toString() <<": "<<mde.what()<<endl;
1869 }
1870 catch(std::runtime_error& e) {
1871 g_stats.clientParseError++;
1872 if(g_logCommonErrors)
1873 L<<Logger::Error<<"Unable to parse packet from remote UDP client "<<fromaddr.toString() <<": "<<e.what()<<endl;
1874 }
1875 }
1876 else {
1877 // cerr<<t_id<<" had error: "<<stringerror()<<endl;
1878 if(firstQuery && errno == EAGAIN)
1879 g_stats.noPacketError++;
1880
1881 break;
1882 }
1883 }
1884
1885 static void makeTCPServerSockets(unsigned int threadId)
1886 {
1887 int fd;
1888 vector<string>locals;
1889 stringtok(locals,::arg()["local-address"]," ,");
1890
1891 if(locals.empty())
1892 throw PDNSException("No local address specified");
1893
1894 for(vector<string>::const_iterator i=locals.begin();i!=locals.end();++i) {
1895 ServiceTuple st;
1896 st.port=::arg().asNum("local-port");
1897 parseService(*i, st);
1898
1899 ComboAddress sin;
1900
1901 memset((char *)&sin,0, sizeof(sin));
1902 sin.sin4.sin_family = AF_INET;
1903 if(!IpToU32(st.host, (uint32_t*)&sin.sin4.sin_addr.s_addr)) {
1904 sin.sin6.sin6_family = AF_INET6;
1905 if(makeIPv6sockaddr(st.host, &sin.sin6) < 0)
1906 throw PDNSException("Unable to resolve local address for TCP server on '"+ st.host +"'");
1907 }
1908
1909 fd=socket(sin.sin6.sin6_family, SOCK_STREAM, 0);
1910 if(fd<0)
1911 throw PDNSException("Making a TCP server socket for resolver: "+stringerror());
1912
1913 setCloseOnExec(fd);
1914
1915 int tmp=1;
1916 if(setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &tmp, sizeof tmp)<0) {
1917 L<<Logger::Error<<"Setsockopt failed for TCP listening socket"<<endl;
1918 exit(1);
1919 }
1920 if(sin.sin6.sin6_family == AF_INET6 && setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &tmp, sizeof(tmp)) < 0) {
1921 L<<Logger::Error<<"Failed to set IPv6 socket to IPv6 only, continuing anyhow: "<<strerror(errno)<<endl;
1922 }
1923
1924 #ifdef TCP_DEFER_ACCEPT
1925 if(setsockopt(fd, SOL_TCP, TCP_DEFER_ACCEPT, &tmp, sizeof tmp) >= 0) {
1926 if(i==locals.begin())
1927 L<<Logger::Error<<"Enabled TCP data-ready filter for (slight) DoS protection"<<endl;
1928 }
1929 #endif
1930
1931 if( ::arg().mustDo("non-local-bind") )
1932 Utility::setBindAny(AF_INET, fd);
1933
1934 #ifdef SO_REUSEPORT
1935 if(g_reusePort) {
1936 if(setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &tmp, sizeof(tmp)) < 0)
1937 throw PDNSException("SO_REUSEPORT: "+stringerror());
1938 }
1939 #endif
1940
1941 if (::arg().asNum("tcp-fast-open") > 0) {
1942 #ifdef TCP_FASTOPEN
1943 int fastOpenQueueSize = ::arg().asNum("tcp-fast-open");
1944 if (setsockopt(fd, IPPROTO_TCP, TCP_FASTOPEN, &fastOpenQueueSize, sizeof fastOpenQueueSize) < 0) {
1945 L<<Logger::Error<<"Failed to enable TCP Fast Open for listening socket: "<<strerror(errno)<<endl;
1946 }
1947 #else
1948 L<<Logger::Warning<<"TCP Fast Open configured but not supported for listening socket"<<endl;
1949 #endif
1950 }
1951
1952 sin.sin4.sin_port = htons(st.port);
1953 socklen_t socklen=sin.sin4.sin_family==AF_INET ? sizeof(sin.sin4) : sizeof(sin.sin6);
1954 if (::bind(fd, (struct sockaddr *)&sin, socklen )<0)
1955 throw PDNSException("Binding TCP server socket for "+ st.host +": "+stringerror());
1956
1957 setNonBlocking(fd);
1958 setSocketSendBuffer(fd, 65000);
1959 listen(fd, 128);
1960 deferredAdds[threadId].push_back(make_pair(fd, handleNewTCPQuestion));
1961 g_tcpListenSockets.push_back(fd);
1962 // we don't need to update g_listenSocketsAddresses since it doesn't work for TCP/IP:
1963 // - fd is not that which we know here, but returned from accept()
1964 if(sin.sin4.sin_family == AF_INET)
1965 L<<Logger::Error<<"Listening for TCP queries on "<< sin.toString() <<":"<<st.port<<endl;
1966 else
1967 L<<Logger::Error<<"Listening for TCP queries on ["<< sin.toString() <<"]:"<<st.port<<endl;
1968 }
1969 }
1970
1971 static void makeUDPServerSockets(unsigned int threadId)
1972 {
1973 int one=1;
1974 vector<string>locals;
1975 stringtok(locals,::arg()["local-address"]," ,");
1976
1977 if(locals.empty())
1978 throw PDNSException("No local address specified");
1979
1980 for(vector<string>::const_iterator i=locals.begin();i!=locals.end();++i) {
1981 ServiceTuple st;
1982 st.port=::arg().asNum("local-port");
1983 parseService(*i, st);
1984
1985 ComboAddress sin;
1986
1987 memset(&sin, 0, sizeof(sin));
1988 sin.sin4.sin_family = AF_INET;
1989 if(!IpToU32(st.host.c_str() , (uint32_t*)&sin.sin4.sin_addr.s_addr)) {
1990 sin.sin6.sin6_family = AF_INET6;
1991 if(makeIPv6sockaddr(st.host, &sin.sin6) < 0)
1992 throw PDNSException("Unable to resolve local address for UDP server on '"+ st.host +"'");
1993 }
1994
1995 int fd=socket(sin.sin4.sin_family, SOCK_DGRAM, 0);
1996 if(fd < 0) {
1997 throw PDNSException("Making a UDP server socket for resolver: "+netstringerror());
1998 }
1999 if (!setSocketTimestamps(fd))
2000 L<<Logger::Warning<<"Unable to enable timestamp reporting for socket"<<endl;
2001
2002 if(IsAnyAddress(sin)) {
2003 if(sin.sin4.sin_family == AF_INET)
2004 if(!setsockopt(fd, IPPROTO_IP, GEN_IP_PKTINFO, &one, sizeof(one))) // linux supports this, so why not - might fail on other systems
2005 g_fromtosockets.insert(fd);
2006 #ifdef IPV6_RECVPKTINFO
2007 if(sin.sin4.sin_family == AF_INET6)
2008 if(!setsockopt(fd, IPPROTO_IPV6, IPV6_RECVPKTINFO, &one, sizeof(one)))
2009 g_fromtosockets.insert(fd);
2010 #endif
2011 if(sin.sin6.sin6_family == AF_INET6 && setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &one, sizeof(one)) < 0) {
2012 L<<Logger::Error<<"Failed to set IPv6 socket to IPv6 only, continuing anyhow: "<<strerror(errno)<<endl;
2013 }
2014 }
2015 if( ::arg().mustDo("non-local-bind") )
2016 Utility::setBindAny(AF_INET6, fd);
2017
2018 setCloseOnExec(fd);
2019
2020 setSocketReceiveBuffer(fd, 250000);
2021 sin.sin4.sin_port = htons(st.port);
2022
2023
2024 #ifdef SO_REUSEPORT
2025 if(g_reusePort) {
2026 if(setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one)) < 0)
2027 throw PDNSException("SO_REUSEPORT: "+stringerror());
2028 }
2029 #endif
2030 socklen_t socklen=sin.getSocklen();
2031 if (::bind(fd, (struct sockaddr *)&sin, socklen)<0)
2032 throw PDNSException("Resolver binding to server socket on port "+ std::to_string(st.port) +" for "+ st.host+": "+stringerror());
2033
2034 setNonBlocking(fd);
2035
2036 deferredAdds[threadId].push_back(make_pair(fd, handleNewUDPQuestion));
2037 g_listenSocketsAddresses[fd]=sin; // this is written to only from the startup thread, not from the workers
2038 if(sin.sin4.sin_family == AF_INET)
2039 L<<Logger::Error<<"Listening for UDP queries on "<< sin.toString() <<":"<<st.port<<endl;
2040 else
2041 L<<Logger::Error<<"Listening for UDP queries on ["<< sin.toString() <<"]:"<<st.port<<endl;
2042 }
2043 }
2044
2045 static void daemonize(void)
2046 {
2047 if(fork())
2048 exit(0); // bye bye
2049
2050 setsid();
2051
2052 int i=open("/dev/null",O_RDWR); /* open stdin */
2053 if(i < 0)
2054 L<<Logger::Critical<<"Unable to open /dev/null: "<<stringerror()<<endl;
2055 else {
2056 dup2(i,0); /* stdin */
2057 dup2(i,1); /* stderr */
2058 dup2(i,2); /* stderr */
2059 close(i);
2060 }
2061 }
2062
2063 static void usr1Handler(int)
2064 {
2065 statsWanted=true;
2066 }
2067
2068 static void usr2Handler(int)
2069 {
2070 g_quiet= !g_quiet;
2071 SyncRes::setDefaultLogMode(g_quiet ? SyncRes::LogNone : SyncRes::Log);
2072 ::arg().set("quiet")=g_quiet ? "" : "no";
2073 }
2074
2075 static void doStats(void)
2076 {
2077 static time_t lastOutputTime;
2078 static uint64_t lastQueryCount;
2079
2080 uint64_t cacheHits = broadcastAccFunction<uint64_t>(pleaseGetCacheHits);
2081 uint64_t cacheMisses = broadcastAccFunction<uint64_t>(pleaseGetCacheMisses);
2082
2083 if(g_stats.qcounter && (cacheHits + cacheMisses) && SyncRes::s_queries && SyncRes::s_outqueries) {
2084 L<<Logger::Notice<<"stats: "<<g_stats.qcounter<<" questions, "<<
2085 broadcastAccFunction<uint64_t>(pleaseGetCacheSize)<< " cache entries, "<<
2086 broadcastAccFunction<uint64_t>(pleaseGetNegCacheSize)<<" negative entries, "<<
2087 (int)((cacheHits*100.0)/(cacheHits+cacheMisses))<<"% cache hits"<<endl;
2088
2089 L<<Logger::Notice<<"stats: throttle map: "
2090 << broadcastAccFunction<uint64_t>(pleaseGetThrottleSize) <<", ns speeds: "
2091 << broadcastAccFunction<uint64_t>(pleaseGetNsSpeedsSize)<<endl;
2092 L<<Logger::Notice<<"stats: outpacket/query ratio "<<(int)(SyncRes::s_outqueries*100.0/SyncRes::s_queries)<<"%";
2093 L<<Logger::Notice<<", "<<(int)(SyncRes::s_throttledqueries*100.0/(SyncRes::s_outqueries+SyncRes::s_throttledqueries))<<"% throttled, "
2094 <<SyncRes::s_nodelegated<<" no-delegation drops"<<endl;
2095 L<<Logger::Notice<<"stats: "<<SyncRes::s_tcpoutqueries<<" outgoing tcp connections, "<<
2096 broadcastAccFunction<uint64_t>(pleaseGetConcurrentQueries)<<" queries running, "<<SyncRes::s_outgoingtimeouts<<" outgoing timeouts"<<endl;
2097
2098 //L<<Logger::Notice<<"stats: "<<g_stats.ednsPingMatches<<" ping matches, "<<g_stats.ednsPingMismatches<<" mismatches, "<<
2099 //g_stats.noPingOutQueries<<" outqueries w/o ping, "<< g_stats.noEdnsOutQueries<<" w/o EDNS"<<endl;
2100
2101 L<<Logger::Notice<<"stats: " << broadcastAccFunction<uint64_t>(pleaseGetPacketCacheSize) <<
2102 " packet cache entries, "<<(int)(100.0*broadcastAccFunction<uint64_t>(pleaseGetPacketCacheHits)/SyncRes::s_queries) << "% packet cache hits"<<endl;
2103
2104 time_t now = time(0);
2105 if(lastOutputTime && lastQueryCount && now != lastOutputTime) {
2106 L<<Logger::Notice<<"stats: "<< (SyncRes::s_queries - lastQueryCount) / (now - lastOutputTime) <<" qps (average over "<< (now - lastOutputTime) << " seconds)"<<endl;
2107 }
2108 lastOutputTime = now;
2109 lastQueryCount = SyncRes::s_queries;
2110 }
2111 else if(statsWanted)
2112 L<<Logger::Notice<<"stats: no stats yet!"<<endl;
2113
2114 statsWanted=false;
2115 }
2116
2117 static void houseKeeping(void *)
2118 {
2119 static thread_local time_t last_stat, last_rootupdate, last_prune, last_secpoll;
2120 static thread_local int cleanCounter=0;
2121 static thread_local bool s_running; // houseKeeping can get suspended in secpoll, and be restarted, which makes us do duplicate work
2122 try {
2123 if(s_running)
2124 return;
2125 s_running=true;
2126
2127 struct timeval now;
2128 Utility::gettimeofday(&now, 0);
2129
2130 if(now.tv_sec - last_prune > (time_t)(5 + t_id)) {
2131 DTime dt;
2132 dt.setTimeval(now);
2133 t_RC->doPrune(g_maxCacheEntries / g_numThreads); // this function is local to a thread, so fine anyhow
2134 t_packetCache->doPruneTo(g_maxPacketCacheEntries / g_numWorkerThreads);
2135
2136 SyncRes::pruneNegCache(g_maxCacheEntries / (g_numWorkerThreads * 10));
2137
2138 if(!((cleanCounter++)%40)) { // this is a full scan!
2139 time_t limit=now.tv_sec-300;
2140 SyncRes::pruneNSSpeeds(limit);
2141 }
2142 last_prune=time(0);
2143 }
2144
2145 if(now.tv_sec - last_rootupdate > 7200) {
2146 int res = SyncRes::getRootNS(g_now, nullptr);
2147 if (!res)
2148 last_rootupdate=now.tv_sec;
2149 }
2150
2151 if(!t_id) {
2152 if(g_statisticsInterval > 0 && now.tv_sec - last_stat >= g_statisticsInterval) {
2153 doStats();
2154 last_stat=time(0);
2155 }
2156
2157 if(now.tv_sec - last_secpoll >= 3600) {
2158 try {
2159 doSecPoll(&last_secpoll);
2160 }
2161 catch(std::exception& e)
2162 {
2163 L<<Logger::Error<<"Exception while performing security poll: "<<e.what()<<endl;
2164 }
2165 catch(PDNSException& e)
2166 {
2167 L<<Logger::Error<<"Exception while performing security poll: "<<e.reason<<endl;
2168 }
2169 catch(...)
2170 {
2171 L<<Logger::Error<<"Exception while performing security poll"<<endl;
2172 }
2173
2174 }
2175 }
2176 s_running=false;
2177 }
2178 catch(PDNSException& ae)
2179 {
2180 s_running=false;
2181 L<<Logger::Error<<"Fatal error in housekeeping thread: "<<ae.reason<<endl;
2182 throw;
2183 }
2184 }
2185
2186 static void makeThreadPipes()
2187 {
2188 for(unsigned int n=0; n < g_numThreads; ++n) {
2189 struct ThreadPipeSet tps;
2190 int fd[2];
2191 if(pipe(fd) < 0)
2192 unixDie("Creating pipe for inter-thread communications");
2193
2194 tps.readToThread = fd[0];
2195 tps.writeToThread = fd[1];
2196
2197 if(pipe(fd) < 0)
2198 unixDie("Creating pipe for inter-thread communications");
2199 tps.readFromThread = fd[0];
2200 tps.writeFromThread = fd[1];
2201
2202 g_pipes.push_back(tps);
2203 }
2204 }
2205
2206 struct ThreadMSG
2207 {
2208 pipefunc_t func;
2209 bool wantAnswer;
2210 };
2211
2212 void broadcastFunction(const pipefunc_t& func, bool skipSelf)
2213 {
2214 unsigned int n = 0;
2215 for(ThreadPipeSet& tps : g_pipes)
2216 {
2217 if(n++ == t_id) {
2218 if(!skipSelf)
2219 func(); // don't write to ourselves!
2220 continue;
2221 }
2222
2223 ThreadMSG* tmsg = new ThreadMSG();
2224 tmsg->func = func;
2225 tmsg->wantAnswer = true;
2226 if(write(tps.writeToThread, &tmsg, sizeof(tmsg)) != sizeof(tmsg)) {
2227 delete tmsg;
2228 unixDie("write to thread pipe returned wrong size or error");
2229 }
2230
2231 string* resp;
2232 if(read(tps.readFromThread, &resp, sizeof(resp)) != sizeof(resp))
2233 unixDie("read from thread pipe returned wrong size or error");
2234
2235 if(resp) {
2236 // cerr <<"got response: " << *resp << endl;
2237 delete resp;
2238 }
2239 }
2240 }
2241
2242 void distributeAsyncFunction(const string& packet, const pipefunc_t& func)
2243 {
2244 unsigned int hash = hashQuestion(packet.c_str(), packet.length(), g_disthashseed);
2245 unsigned int target = 1 + (hash % (g_pipes.size()-1));
2246
2247 if(target == t_id) {
2248 func();
2249 return;
2250 }
2251 ThreadPipeSet& tps = g_pipes[target];
2252 ThreadMSG* tmsg = new ThreadMSG();
2253 tmsg->func = func;
2254 tmsg->wantAnswer = false;
2255
2256 if(write(tps.writeToThread, &tmsg, sizeof(tmsg)) != sizeof(tmsg)) {
2257 delete tmsg;
2258 unixDie("write to thread pipe returned wrong size or error");
2259 }
2260 }
2261
2262 static void handlePipeRequest(int fd, FDMultiplexer::funcparam_t& var)
2263 {
2264 ThreadMSG* tmsg = nullptr;
2265
2266 if(read(fd, &tmsg, sizeof(tmsg)) != sizeof(tmsg)) { // fd == readToThread
2267 unixDie("read from thread pipe returned wrong size or error");
2268 }
2269
2270 void *resp=0;
2271 try {
2272 resp = tmsg->func();
2273 }
2274 catch(std::exception& e) {
2275 if(g_logCommonErrors)
2276 L<<Logger::Error<<"PIPE function we executed created exception: "<<e.what()<<endl; // but what if they wanted an answer.. we send 0
2277 }
2278 catch(PDNSException& e) {
2279 if(g_logCommonErrors)
2280 L<<Logger::Error<<"PIPE function we executed created PDNS exception: "<<e.reason<<endl; // but what if they wanted an answer.. we send 0
2281 }
2282 if(tmsg->wantAnswer) {
2283 if(write(g_pipes[t_id].writeFromThread, &resp, sizeof(resp)) != sizeof(resp)) {
2284 delete tmsg;
2285 unixDie("write to thread pipe returned wrong size or error");
2286 }
2287 }
2288
2289 delete tmsg;
2290 }
2291
2292 template<class T> void *voider(const boost::function<T*()>& func)
2293 {
2294 return func();
2295 }
2296
2297 vector<ComboAddress>& operator+=(vector<ComboAddress>&a, const vector<ComboAddress>& b)
2298 {
2299 a.insert(a.end(), b.begin(), b.end());
2300 return a;
2301 }
2302
2303 vector<pair<string, uint16_t> >& operator+=(vector<pair<string, uint16_t> >&a, const vector<pair<string, uint16_t> >& b)
2304 {
2305 a.insert(a.end(), b.begin(), b.end());
2306 return a;
2307 }
2308
2309 vector<pair<DNSName, uint16_t> >& operator+=(vector<pair<DNSName, uint16_t> >&a, const vector<pair<DNSName, uint16_t> >& b)
2310 {
2311 a.insert(a.end(), b.begin(), b.end());
2312 return a;
2313 }
2314
2315
2316 template<class T> T broadcastAccFunction(const boost::function<T*()>& func, bool skipSelf)
2317 {
2318 unsigned int n = 0;
2319 T ret=T();
2320 for(ThreadPipeSet& tps : g_pipes)
2321 {
2322 if(n++ == t_id) {
2323 if(!skipSelf) {
2324 T* resp = (T*)func(); // don't write to ourselves!
2325 if(resp) {
2326 //~ cerr <<"got direct: " << *resp << endl;
2327 ret += *resp;
2328 delete resp;
2329 }
2330 }
2331 continue;
2332 }
2333
2334 ThreadMSG* tmsg = new ThreadMSG();
2335 tmsg->func = boost::bind(voider<T>, func);
2336 tmsg->wantAnswer = true;
2337
2338 if(write(tps.writeToThread, &tmsg, sizeof(tmsg)) != sizeof(tmsg)) {
2339 delete tmsg;
2340 unixDie("write to thread pipe returned wrong size or error");
2341 }
2342
2343 T* resp;
2344 if(read(tps.readFromThread, &resp, sizeof(resp)) != sizeof(resp))
2345 unixDie("read from thread pipe returned wrong size or error");
2346
2347 if(resp) {
2348 //~ cerr <<"got response: " << *resp << endl;
2349 ret += *resp;
2350 delete resp;
2351 }
2352 }
2353 return ret;
2354 }
2355
2356 template string broadcastAccFunction(const boost::function<string*()>& fun, bool skipSelf); // explicit instantiation
2357 template uint64_t broadcastAccFunction(const boost::function<uint64_t*()>& fun, bool skipSelf); // explicit instantiation
2358 template vector<ComboAddress> broadcastAccFunction(const boost::function<vector<ComboAddress> *()>& fun, bool skipSelf); // explicit instantiation
2359 template vector<pair<DNSName,uint16_t> > broadcastAccFunction(const boost::function<vector<pair<DNSName, uint16_t> > *()>& fun, bool skipSelf); // explicit instantiation
2360
2361 static void handleRCC(int fd, FDMultiplexer::funcparam_t& var)
2362 {
2363 string remote;
2364 string msg=s_rcc.recv(&remote);
2365 RecursorControlParser rcp;
2366 RecursorControlParser::func_t* command;
2367
2368 string answer=rcp.getAnswer(msg, &command);
2369
2370 // If we are inside a chroot, we need to strip
2371 if (!arg()["chroot"].empty()) {
2372 size_t len = arg()["chroot"].length();
2373 remote = remote.substr(len);
2374 }
2375
2376 try {
2377 s_rcc.send(answer, &remote);
2378 command();
2379 }
2380 catch(std::exception& e) {
2381 L<<Logger::Error<<"Error dealing with control socket request: "<<e.what()<<endl;
2382 }
2383 catch(PDNSException& ae) {
2384 L<<Logger::Error<<"Error dealing with control socket request: "<<ae.reason<<endl;
2385 }
2386 }
2387
2388 static void handleTCPClientReadable(int fd, FDMultiplexer::funcparam_t& var)
2389 {
2390 PacketID* pident=any_cast<PacketID>(&var);
2391 // cerr<<"handleTCPClientReadable called for fd "<<fd<<", pident->inNeeded: "<<pident->inNeeded<<", "<<pident->sock->getHandle()<<endl;
2392
2393 shared_array<char> buffer(new char[pident->inNeeded]);
2394
2395 ssize_t ret=recv(fd, buffer.get(), pident->inNeeded,0);
2396 if(ret > 0) {
2397 pident->inMSG.append(&buffer[0], &buffer[ret]);
2398 pident->inNeeded-=(size_t)ret;
2399 if(!pident->inNeeded || pident->inIncompleteOkay) {
2400 // cerr<<"Got entire load of "<<pident->inMSG.size()<<" bytes"<<endl;
2401 PacketID pid=*pident;
2402 string msg=pident->inMSG;
2403
2404 t_fdm->removeReadFD(fd);
2405 MT->sendEvent(pid, &msg);
2406 }
2407 else {
2408 // cerr<<"Still have "<<pident->inNeeded<<" left to go"<<endl;
2409 }
2410 }
2411 else {
2412 PacketID tmp=*pident;
2413 t_fdm->removeReadFD(fd); // pident might now be invalid (it isn't, but still)
2414 string empty;
2415 MT->sendEvent(tmp, &empty); // this conveys error status
2416 }
2417 }
2418
2419 static void handleTCPClientWritable(int fd, FDMultiplexer::funcparam_t& var)
2420 {
2421 PacketID* pid=any_cast<PacketID>(&var);
2422 ssize_t ret=send(fd, pid->outMSG.c_str() + pid->outPos, pid->outMSG.size() - pid->outPos,0);
2423 if(ret > 0) {
2424 pid->outPos+=(ssize_t)ret;
2425 if(pid->outPos==pid->outMSG.size()) {
2426 PacketID tmp=*pid;
2427 t_fdm->removeWriteFD(fd);
2428 MT->sendEvent(tmp, &tmp.outMSG); // send back what we sent to convey everything is ok
2429 }
2430 }
2431 else { // error or EOF
2432 PacketID tmp(*pid);
2433 t_fdm->removeWriteFD(fd);
2434 string sent;
2435 MT->sendEvent(tmp, &sent); // we convey error status by sending empty string
2436 }
2437 }
2438
2439 // resend event to everybody chained onto it
2440 static void doResends(MT_t::waiters_t::iterator& iter, PacketID resend, const string& content)
2441 {
2442 if(iter->key.chain.empty())
2443 return;
2444 // cerr<<"doResends called!\n";
2445 for(PacketID::chain_t::iterator i=iter->key.chain.begin(); i != iter->key.chain.end() ; ++i) {
2446 resend.fd=-1;
2447 resend.id=*i;
2448 // cerr<<"\tResending "<<content.size()<<" bytes for fd="<<resend.fd<<" and id="<<resend.id<<endl;
2449
2450 MT->sendEvent(resend, &content);
2451 g_stats.chainResends++;
2452 }
2453 }
2454
2455 static void handleUDPServerResponse(int fd, FDMultiplexer::funcparam_t& var)
2456 {
2457 PacketID pid=any_cast<PacketID>(var);
2458 ssize_t len;
2459 char data[g_outgoingEDNSBufsize];
2460 ComboAddress fromaddr;
2461 socklen_t addrlen=sizeof(fromaddr);
2462
2463 len=recvfrom(fd, data, sizeof(data), 0, (sockaddr *)&fromaddr, &addrlen);
2464
2465 if(len < (ssize_t) sizeof(dnsheader)) {
2466 if(len < 0)
2467 ; // cerr<<"Error on fd "<<fd<<": "<<stringerror()<<"\n";
2468 else {
2469 g_stats.serverParseError++;
2470 if(g_logCommonErrors)
2471 L<<Logger::Error<<"Unable to parse packet from remote UDP server "<< fromaddr.toString() <<
2472 ": packet smaller than DNS header"<<endl;
2473 }
2474
2475 t_udpclientsocks->returnSocket(fd);
2476 string empty;
2477
2478 MT_t::waiters_t::iterator iter=MT->d_waiters.find(pid);
2479 if(iter != MT->d_waiters.end())
2480 doResends(iter, pid, empty);
2481
2482 MT->sendEvent(pid, &empty); // this denotes error (does lookup again.. at least L1 will be hot)
2483 return;
2484 }
2485
2486 dnsheader dh;
2487 memcpy(&dh, data, sizeof(dh));
2488
2489 PacketID pident;
2490 pident.remote=fromaddr;
2491 pident.id=dh.id;
2492 pident.fd=fd;
2493
2494 if(!dh.qr && g_logCommonErrors) {
2495 L<<Logger::Notice<<"Not taking data from question on outgoing socket from "<< fromaddr.toStringWithPort() <<endl;
2496 }
2497
2498 if(!dh.qdcount || // UPC, Nominum, very old BIND on FormErr, NSD
2499 !dh.qr) { // one weird server
2500 pident.domain.clear();
2501 pident.type = 0;
2502 }
2503 else {
2504 try {
2505 if(len > 12)
2506 pident.domain=DNSName(data, len, 12, false, &pident.type); // don't copy this from above - we need to do the actual read
2507 }
2508 catch(std::exception& e) {
2509 g_stats.serverParseError++; // won't be fed to lwres.cc, so we have to increment
2510 L<<Logger::Warning<<"Error in packet from remote nameserver "<< fromaddr.toStringWithPort() << ": "<<e.what() << endl;
2511 return;
2512 }
2513 }
2514 string packet;
2515 packet.assign(data, len);
2516
2517 MT_t::waiters_t::iterator iter=MT->d_waiters.find(pident);
2518 if(iter != MT->d_waiters.end()) {
2519 doResends(iter, pident, packet);
2520 }
2521
2522 retryWithName:
2523
2524 if(!MT->sendEvent(pident, &packet)) {
2525 // we do a full scan for outstanding queries on unexpected answers. not too bad since we only accept them on the right port number, which is hard enough to guess
2526 for(MT_t::waiters_t::iterator mthread=MT->d_waiters.begin(); mthread!=MT->d_waiters.end(); ++mthread) {
2527 if(pident.fd==mthread->key.fd && mthread->key.remote==pident.remote && mthread->key.type == pident.type &&
2528 pident.domain == mthread->key.domain) {
2529 mthread->key.nearMisses++;
2530 }
2531
2532 // be a bit paranoid here since we're weakening our matching
2533 if(pident.domain.empty() && !mthread->key.domain.empty() && !pident.type && mthread->key.type &&
2534 pident.id == mthread->key.id && mthread->key.remote == pident.remote) {
2535 // cerr<<"Empty response, rest matches though, sending to a waiter"<<endl;
2536 pident.domain = mthread->key.domain;
2537 pident.type = mthread->key.type;
2538 goto retryWithName; // note that this only passes on an error, lwres will still reject the packet
2539 }
2540 }
2541 g_stats.unexpectedCount++; // if we made it here, it really is an unexpected answer
2542 if(g_logCommonErrors) {
2543 L<<Logger::Warning<<"Discarding unexpected packet from "<<fromaddr.toStringWithPort()<<": "<< (pident.domain.empty() ? "<empty>" : pident.domain.toString())<<", "<<pident.type<<", "<<MT->d_waiters.size()<<" waiters"<<endl;
2544 }
2545 }
2546 else if(fd >= 0) {
2547 t_udpclientsocks->returnSocket(fd);
2548 }
2549 }
2550
2551 FDMultiplexer* getMultiplexer()
2552 {
2553 FDMultiplexer* ret;
2554 for(const auto& i : FDMultiplexer::getMultiplexerMap()) {
2555 try {
2556 ret=i.second();
2557 return ret;
2558 }
2559 catch(FDMultiplexerException &fe) {
2560 L<<Logger::Error<<"Non-fatal error initializing possible multiplexer ("<<fe.what()<<"), falling back"<<endl;
2561 }
2562 catch(...) {
2563 L<<Logger::Error<<"Non-fatal error initializing possible multiplexer"<<endl;
2564 }
2565 }
2566 L<<Logger::Error<<"No working multiplexer found!"<<endl;
2567 exit(1);
2568 }
2569
2570
2571 static string* doReloadLuaScript()
2572 {
2573 string fname= ::arg()["lua-dns-script"];
2574 try {
2575 if(fname.empty()) {
2576 t_pdl.reset();
2577 L<<Logger::Error<<t_id<<" Unloaded current lua script"<<endl;
2578 return new string("unloaded\n");
2579 }
2580 else {
2581 t_pdl = std::make_shared<RecursorLua4>(fname);
2582 }
2583 }
2584 catch(std::exception& e) {
2585 L<<Logger::Error<<t_id<<" Retaining current script, error from '"<<fname<<"': "<< e.what() <<endl;
2586 return new string("retaining current script, error from '"+fname+"': "+e.what()+"\n");
2587 }
2588
2589 L<<Logger::Warning<<t_id<<" (Re)loaded lua script from '"<<fname<<"'"<<endl;
2590 return new string("(re)loaded '"+fname+"'\n");
2591 }
2592
2593 string doQueueReloadLuaScript(vector<string>::const_iterator begin, vector<string>::const_iterator end)
2594 {
2595 if(begin != end)
2596 ::arg().set("lua-dns-script") = *begin;
2597
2598 return broadcastAccFunction<string>(doReloadLuaScript);
2599 }
2600
2601 static string* pleaseUseNewTraceRegex(const std::string& newRegex)
2602 try
2603 {
2604 if(newRegex.empty()) {
2605 t_traceRegex.reset();
2606 return new string("unset\n");
2607 }
2608 else {
2609 t_traceRegex = std::make_shared<Regex>(newRegex);
2610 return new string("ok\n");
2611 }
2612 }
2613 catch(PDNSException& ae)
2614 {
2615 return new string(ae.reason+"\n");
2616 }
2617
2618 string doTraceRegex(vector<string>::const_iterator begin, vector<string>::const_iterator end)
2619 {
2620 return broadcastAccFunction<string>(boost::bind(pleaseUseNewTraceRegex, begin!=end ? *begin : ""));
2621 }
2622
2623 static void checkLinuxIPv6Limits()
2624 {
2625 #ifdef __linux__
2626 string line;
2627 if(readFileIfThere("/proc/sys/net/ipv6/route/max_size", &line)) {
2628 int lim=std::stoi(line);
2629 if(lim < 16384) {
2630 L<<Logger::Error<<"If using IPv6, please raise sysctl net.ipv6.route.max_size, currently set to "<<lim<<" which is < 16384"<<endl;
2631 }
2632 }
2633 #endif
2634 }
2635 static void checkOrFixFDS()
2636 {
2637 unsigned int availFDs=getFilenumLimit();
2638 unsigned int wantFDs = g_maxMThreads * g_numWorkerThreads +25; // even healthier margin then before
2639
2640 if(wantFDs > availFDs) {
2641 unsigned int hardlimit= getFilenumLimit(true);
2642 if(hardlimit >= wantFDs) {
2643 setFilenumLimit(wantFDs);
2644 L<<Logger::Warning<<"Raised soft limit on number of filedescriptors to "<<wantFDs<<" to match max-mthreads and threads settings"<<endl;
2645 }
2646 else {
2647 int newval = (hardlimit - 25) / g_numWorkerThreads;
2648 L<<Logger::Warning<<"Insufficient number of filedescriptors available for max-mthreads*threads setting! ("<<hardlimit<<" < "<<wantFDs<<"), reducing max-mthreads to "<<newval<<endl;
2649 g_maxMThreads = newval;
2650 setFilenumLimit(hardlimit);
2651 }
2652 }
2653 }
2654
2655 static void* recursorThread(void*);
2656
2657 static void* pleaseSupplantACLs(std::shared_ptr<NetmaskGroup> ng)
2658 {
2659 t_allowFrom = ng;
2660 return nullptr;
2661 }
2662
2663 int g_argc;
2664 char** g_argv;
2665
2666 void parseACLs()
2667 {
2668 static bool l_initialized;
2669
2670 if(l_initialized) { // only reload configuration file on second call
2671 string configname=::arg()["config-dir"]+"/recursor.conf";
2672 if(::arg()["config-name"]!="") {
2673 configname=::arg()["config-dir"]+"/recursor-"+::arg()["config-name"]+".conf";
2674 }
2675 cleanSlashes(configname);
2676
2677 if(!::arg().preParseFile(configname.c_str(), "allow-from-file"))
2678 throw runtime_error("Unable to re-parse configuration file '"+configname+"'");
2679 ::arg().preParseFile(configname.c_str(), "allow-from", LOCAL_NETS);
2680 ::arg().preParseFile(configname.c_str(), "include-dir");
2681 ::arg().preParse(g_argc, g_argv, "include-dir");
2682
2683 // then process includes
2684 std::vector<std::string> extraConfigs;
2685 ::arg().gatherIncludes(extraConfigs);
2686
2687 for(const std::string& fn : extraConfigs) {
2688 if(!::arg().preParseFile(fn.c_str(), "allow-from-file", ::arg()["allow-from-file"]))
2689 throw runtime_error("Unable to re-parse configuration file include '"+fn+"'");
2690 if(!::arg().preParseFile(fn.c_str(), "allow-from", ::arg()["allow-from"]))
2691 throw runtime_error("Unable to re-parse configuration file include '"+fn+"'");
2692 }
2693
2694 ::arg().preParse(g_argc, g_argv, "allow-from-file");
2695 ::arg().preParse(g_argc, g_argv, "allow-from");
2696 }
2697
2698 std::shared_ptr<NetmaskGroup> oldAllowFrom = t_allowFrom;
2699 std::shared_ptr<NetmaskGroup> allowFrom = std::make_shared<NetmaskGroup>();
2700
2701 if(!::arg()["allow-from-file"].empty()) {
2702 string line;
2703 ifstream ifs(::arg()["allow-from-file"].c_str());
2704 if(!ifs) {
2705 throw runtime_error("Could not open '"+::arg()["allow-from-file"]+"': "+stringerror());
2706 }
2707
2708 string::size_type pos;
2709 while(getline(ifs,line)) {
2710 pos=line.find('#');
2711 if(pos!=string::npos)
2712 line.resize(pos);
2713 trim(line);
2714 if(line.empty())
2715 continue;
2716
2717 allowFrom->addMask(line);
2718 }
2719 L<<Logger::Warning<<"Done parsing " << allowFrom->size() <<" allow-from ranges from file '"<<::arg()["allow-from-file"]<<"' - overriding 'allow-from' setting"<<endl;
2720 }
2721 else if(!::arg()["allow-from"].empty()) {
2722 vector<string> ips;
2723 stringtok(ips, ::arg()["allow-from"], ", ");
2724
2725 L<<Logger::Warning<<"Only allowing queries from: ";
2726 for(vector<string>::const_iterator i = ips.begin(); i!= ips.end(); ++i) {
2727 allowFrom->addMask(*i);
2728 if(i!=ips.begin())
2729 L<<Logger::Warning<<", ";
2730 L<<Logger::Warning<<*i;
2731 }
2732 L<<Logger::Warning<<endl;
2733 }
2734 else {
2735 if(::arg()["local-address"]!="127.0.0.1" && ::arg().asNum("local-port")==53)
2736 L<<Logger::Error<<"WARNING: Allowing queries from all IP addresses - this can be a security risk!"<<endl;
2737 allowFrom = nullptr;
2738 }
2739
2740 g_initialAllowFrom = allowFrom;
2741 broadcastFunction(boost::bind(pleaseSupplantACLs, allowFrom));
2742 oldAllowFrom = nullptr;
2743
2744 l_initialized = true;
2745 }
2746
2747
2748 static void setupDelegationOnly()
2749 {
2750 vector<string> parts;
2751 stringtok(parts, ::arg()["delegation-only"], ", \t");
2752 for(const auto& p : parts) {
2753 SyncRes::addDelegationOnly(DNSName(p));
2754 }
2755 }
2756
2757 static std::map<unsigned int, std::set<int> > parseCPUMap()
2758 {
2759 std::map<unsigned int, std::set<int> > result;
2760
2761 const std::string value = ::arg()["cpu-map"];
2762
2763 if (!value.empty() && !isSettingThreadCPUAffinitySupported()) {
2764 L<<Logger::Warning<<"CPU mapping requested but not supported, skipping"<<endl;
2765 return result;
2766 }
2767
2768 std::vector<std::string> parts;
2769
2770 stringtok(parts, value, " \t");
2771
2772 for(const auto& part : parts) {
2773 if (part.find('=') == string::npos)
2774 continue;
2775
2776 try {
2777 auto headers = splitField(part, '=');
2778 trim(headers.first);
2779 trim(headers.second);
2780
2781 unsigned int threadId = pdns_stou(headers.first);
2782 std::vector<std::string> cpus;
2783
2784 stringtok(cpus, headers.second, ",");
2785
2786 for(const auto& cpu : cpus) {
2787 int cpuId = std::stoi(cpu);
2788
2789 result[threadId].insert(cpuId);
2790 }
2791 }
2792 catch(const std::exception& e) {
2793 L<<Logger::Error<<"Error parsing cpu-map entry '"<<part<<"': "<<e.what()<<endl;
2794 }
2795 }
2796
2797 return result;
2798 }
2799
2800 static void setCPUMap(const std::map<unsigned int, std::set<int> >& cpusMap, unsigned int n, pthread_t tid)
2801 {
2802 const auto& cpuMapping = cpusMap.find(n);
2803 if (cpuMapping != cpusMap.cend()) {
2804 int rc = mapThreadToCPUList(tid, cpuMapping->second);
2805 if (rc == 0) {
2806 L<<Logger::Info<<"CPU affinity for worker "<<n<<" has been set to CPU map:";
2807 for (const auto cpu : cpuMapping->second) {
2808 L<<Logger::Info<<" "<<cpu;
2809 }
2810 L<<Logger::Info<<endl;
2811 }
2812 else {
2813 L<<Logger::Warning<<"Error setting CPU affinity for worker "<<n<<" to CPU map:";
2814 for (const auto cpu : cpuMapping->second) {
2815 L<<Logger::Info<<" "<<cpu;
2816 }
2817 L<<Logger::Info<<strerror(rc)<<endl;
2818 }
2819 }
2820 }
2821
2822 static int serviceMain(int argc, char*argv[])
2823 {
2824 L.setName(s_programname);
2825 L.disableSyslog(::arg().mustDo("disable-syslog"));
2826 L.setTimestamps(::arg().mustDo("log-timestamp"));
2827
2828 if(!::arg()["logging-facility"].empty()) {
2829 int val=logFacilityToLOG(::arg().asNum("logging-facility") );
2830 if(val >= 0)
2831 theL().setFacility(val);
2832 else
2833 L<<Logger::Error<<"Unknown logging facility "<<::arg().asNum("logging-facility") <<endl;
2834 }
2835
2836 showProductVersion();
2837 seedRandom(::arg()["entropy-source"]);
2838
2839 g_disthashseed=dns_random(0xffffffff);
2840
2841 checkLinuxIPv6Limits();
2842 try {
2843 vector<string> addrs;
2844 if(!::arg()["query-local-address6"].empty()) {
2845 SyncRes::s_doIPv6=true;
2846 L<<Logger::Warning<<"Enabling IPv6 transport for outgoing queries"<<endl;
2847
2848 stringtok(addrs, ::arg()["query-local-address6"], ", ;");
2849 for(const string& addr : addrs) {
2850 g_localQueryAddresses6.push_back(ComboAddress(addr));
2851 }
2852 }
2853 else {
2854 L<<Logger::Warning<<"NOT using IPv6 for outgoing queries - set 'query-local-address6=::' to enable"<<endl;
2855 }
2856 addrs.clear();
2857 stringtok(addrs, ::arg()["query-local-address"], ", ;");
2858 for(const string& addr : addrs) {
2859 g_localQueryAddresses4.push_back(ComboAddress(addr));
2860 }
2861 }
2862 catch(std::exception& e) {
2863 L<<Logger::Error<<"Assigning local query addresses: "<<e.what();
2864 exit(99);
2865 }
2866
2867 // keep this ABOVE loadRecursorLuaConfig!
2868 if(::arg()["dnssec"]=="off")
2869 g_dnssecmode=DNSSECMode::Off;
2870 else if(::arg()["dnssec"]=="process-no-validate")
2871 g_dnssecmode=DNSSECMode::ProcessNoValidate;
2872 else if(::arg()["dnssec"]=="process")
2873 g_dnssecmode=DNSSECMode::Process;
2874 else if(::arg()["dnssec"]=="validate")
2875 g_dnssecmode=DNSSECMode::ValidateAll;
2876 else if(::arg()["dnssec"]=="log-fail")
2877 g_dnssecmode=DNSSECMode::ValidateForLog;
2878 else {
2879 L<<Logger::Error<<"Unknown DNSSEC mode "<<::arg()["dnssec"]<<endl;
2880 exit(1);
2881 }
2882
2883 g_dnssecLogBogus = ::arg().mustDo("dnssec-log-bogus");
2884 g_maxNSEC3Iterations = ::arg().asNum("nsec3-max-iterations");
2885
2886 g_maxCacheEntries = ::arg().asNum("max-cache-entries");
2887 g_maxPacketCacheEntries = ::arg().asNum("max-packetcache-entries");
2888
2889 try {
2890 loadRecursorLuaConfig(::arg()["lua-config-file"], ::arg().mustDo("daemon"));
2891 }
2892 catch (PDNSException &e) {
2893 L<<Logger::Error<<"Cannot load Lua configuration: "<<e.reason<<endl;
2894 exit(1);
2895 }
2896
2897 parseACLs();
2898 sortPublicSuffixList();
2899
2900 if(!::arg()["dont-query"].empty()) {
2901 vector<string> ips;
2902 stringtok(ips, ::arg()["dont-query"], ", ");
2903 ips.push_back("0.0.0.0");
2904 ips.push_back("::");
2905
2906 L<<Logger::Warning<<"Will not send queries to: ";
2907 for(vector<string>::const_iterator i = ips.begin(); i!= ips.end(); ++i) {
2908 SyncRes::addDontQuery(*i);
2909 if(i!=ips.begin())
2910 L<<Logger::Warning<<", ";
2911 L<<Logger::Warning<<*i;
2912 }
2913 L<<Logger::Warning<<endl;
2914 }
2915
2916 g_quiet=::arg().mustDo("quiet");
2917
2918 g_weDistributeQueries = ::arg().mustDo("pdns-distributes-queries");
2919 if(g_weDistributeQueries) {
2920 L<<Logger::Warning<<"PowerDNS Recursor itself will distribute queries over threads"<<endl;
2921 }
2922
2923 setupDelegationOnly();
2924 g_outgoingEDNSBufsize=::arg().asNum("edns-outgoing-bufsize");
2925
2926 if(::arg()["trace"]=="fail") {
2927 SyncRes::setDefaultLogMode(SyncRes::Store);
2928 }
2929 else if(::arg().mustDo("trace")) {
2930 SyncRes::setDefaultLogMode(SyncRes::Log);
2931 ::arg().set("quiet")="no";
2932 g_quiet=false;
2933 g_dnssecLOG=true;
2934 }
2935
2936 SyncRes::s_minimumTTL = ::arg().asNum("minimum-ttl-override");
2937
2938 SyncRes::s_nopacketcache = ::arg().mustDo("disable-packetcache");
2939
2940 SyncRes::s_maxnegttl=::arg().asNum("max-negative-ttl");
2941 SyncRes::s_maxcachettl=max(::arg().asNum("max-cache-ttl"), 15);
2942 SyncRes::s_packetcachettl=::arg().asNum("packetcache-ttl");
2943 // Cap the packetcache-servfail-ttl to the packetcache-ttl
2944 uint32_t packetCacheServFailTTL = ::arg().asNum("packetcache-servfail-ttl");
2945 SyncRes::s_packetcacheservfailttl=(packetCacheServFailTTL > SyncRes::s_packetcachettl) ? SyncRes::s_packetcachettl : packetCacheServFailTTL;
2946 SyncRes::s_serverdownmaxfails=::arg().asNum("server-down-max-fails");
2947 SyncRes::s_serverdownthrottletime=::arg().asNum("server-down-throttle-time");
2948 SyncRes::s_serverID=::arg()["server-id"];
2949 SyncRes::s_maxqperq=::arg().asNum("max-qperq");
2950 SyncRes::s_maxtotusec=1000*::arg().asNum("max-total-msec");
2951 SyncRes::s_maxdepth=::arg().asNum("max-recursion-depth");
2952 SyncRes::s_rootNXTrust = ::arg().mustDo( "root-nx-trust");
2953 if(SyncRes::s_serverID.empty()) {
2954 char tmp[128];
2955 gethostname(tmp, sizeof(tmp)-1);
2956 SyncRes::s_serverID=tmp;
2957 }
2958
2959 SyncRes::s_ecsipv4limit = ::arg().asNum("ecs-ipv4-bits");
2960 SyncRes::s_ecsipv6limit = ::arg().asNum("ecs-ipv6-bits");
2961
2962 if (!::arg().isEmpty("ecs-scope-zero-address")) {
2963 ComboAddress scopeZero(::arg()["ecs-scope-zero-address"]);
2964 SyncRes::setECSScopeZeroAddress(Netmask(scopeZero, scopeZero.isIPv4() ? 32 : 128));
2965 }
2966 else {
2967 bool found = false;
2968 for (const auto& addr : g_localQueryAddresses4) {
2969 if (!IsAnyAddress(addr)) {
2970 SyncRes::setECSScopeZeroAddress(Netmask(addr, 32));
2971 found = true;
2972 break;
2973 }
2974 }
2975 if (!found) {
2976 for (const auto& addr : g_localQueryAddresses6) {
2977 if (!IsAnyAddress(addr)) {
2978 SyncRes::setECSScopeZeroAddress(Netmask(addr, 128));
2979 found = true;
2980 break;
2981 }
2982 }
2983 if (!found) {
2984 SyncRes::setECSScopeZeroAddress(Netmask("127.0.0.1/32"));
2985 }
2986 }
2987 }
2988
2989 g_networkTimeoutMsec = ::arg().asNum("network-timeout");
2990
2991 g_initialDomainMap = parseAuthAndForwards();
2992
2993 g_latencyStatSize=::arg().asNum("latency-statistic-size");
2994
2995 g_logCommonErrors=::arg().mustDo("log-common-errors");
2996 g_logRPZChanges = ::arg().mustDo("log-rpz-changes");
2997
2998 g_anyToTcp = ::arg().mustDo("any-to-tcp");
2999 g_udpTruncationThreshold = ::arg().asNum("udp-truncation-threshold");
3000
3001 g_lowercaseOutgoing = ::arg().mustDo("lowercase-outgoing");
3002
3003 g_numWorkerThreads = ::arg().asNum("threads");
3004 if (g_numWorkerThreads < 1) {
3005 L<<Logger::Warning<<"Asked to run with 0 threads, raising to 1 instead"<<endl;
3006 g_numWorkerThreads = 1;
3007 }
3008
3009 g_numThreads = g_numWorkerThreads + g_weDistributeQueries;
3010 g_maxMThreads = ::arg().asNum("max-mthreads");
3011
3012 g_gettagNeedsEDNSOptions = ::arg().mustDo("gettag-needs-edns-options");
3013
3014 g_statisticsInterval = ::arg().asNum("statistics-interval");
3015
3016 #ifdef SO_REUSEPORT
3017 g_reusePort = ::arg().mustDo("reuseport");
3018 #endif
3019
3020 g_useOneSocketPerThread = (!g_weDistributeQueries && g_reusePort);
3021
3022 if (g_useOneSocketPerThread) {
3023 for (unsigned int threadId = 0; threadId < g_numWorkerThreads; threadId++) {
3024 makeUDPServerSockets(threadId);
3025 makeTCPServerSockets(threadId);
3026 }
3027 }
3028 else {
3029 makeUDPServerSockets(0);
3030 makeTCPServerSockets(0);
3031 }
3032
3033 SyncRes::parseEDNSSubnetWhitelist(::arg()["edns-subnet-whitelist"]);
3034 g_useIncomingECS = ::arg().mustDo("use-incoming-edns-subnet");
3035
3036 int forks;
3037 for(forks = 0; forks < ::arg().asNum("processes") - 1; ++forks) {
3038 if(!fork()) // we are child
3039 break;
3040 }
3041
3042 if(::arg().mustDo("daemon")) {
3043 L<<Logger::Warning<<"Calling daemonize, going to background"<<endl;
3044 L.toConsole(Logger::Critical);
3045 daemonize();
3046 loadRecursorLuaConfig(::arg()["lua-config-file"], false);
3047 }
3048 signal(SIGUSR1,usr1Handler);
3049 signal(SIGUSR2,usr2Handler);
3050 signal(SIGPIPE,SIG_IGN);
3051
3052 checkOrFixFDS();
3053
3054 #ifdef HAVE_LIBSODIUM
3055 if (sodium_init() == -1) {
3056 L<<Logger::Error<<"Unable to initialize sodium crypto library"<<endl;
3057 exit(99);
3058 }
3059 #endif
3060
3061 openssl_thread_setup();
3062 openssl_seed();
3063
3064 int newgid=0;
3065 if(!::arg()["setgid"].empty())
3066 newgid=Utility::makeGidNumeric(::arg()["setgid"]);
3067 int newuid=0;
3068 if(!::arg()["setuid"].empty())
3069 newuid=Utility::makeUidNumeric(::arg()["setuid"]);
3070
3071 Utility::dropGroupPrivs(newuid, newgid);
3072
3073 if (!::arg()["chroot"].empty()) {
3074 #ifdef HAVE_SYSTEMD
3075 char *ns;
3076 ns = getenv("NOTIFY_SOCKET");
3077 if (ns != nullptr) {
3078 L<<Logger::Error<<"Unable to chroot when running from systemd. Please disable chroot= or set the 'Type' for this service to 'simple'"<<endl;
3079 exit(1);
3080 }
3081 #endif
3082 if (chroot(::arg()["chroot"].c_str())<0 || chdir("/") < 0) {
3083 L<<Logger::Error<<"Unable to chroot to '"+::arg()["chroot"]+"': "<<strerror (errno)<<", exiting"<<endl;
3084 exit(1);
3085 }
3086 else
3087 L<<Logger::Error<<"Chrooted to '"<<::arg()["chroot"]<<"'"<<endl;
3088 }
3089
3090 s_pidfname=::arg()["socket-dir"]+"/"+s_programname+".pid";
3091 if(!s_pidfname.empty())
3092 unlink(s_pidfname.c_str()); // remove possible old pid file
3093 writePid();
3094
3095 makeControlChannelSocket( ::arg().asNum("processes") > 1 ? forks : -1);
3096
3097 Utility::dropUserPrivs(newuid);
3098
3099 makeThreadPipes();
3100
3101 g_tcpTimeout=::arg().asNum("client-tcp-timeout");
3102 g_maxTCPPerClient=::arg().asNum("max-tcp-per-client");
3103 g_tcpMaxQueriesPerConn=::arg().asNum("max-tcp-queries-per-connection");
3104
3105 if (::arg().mustDo("snmp-agent")) {
3106 g_snmpAgent = std::make_shared<RecursorSNMPAgent>("recursor", ::arg()["snmp-master-socket"]);
3107 g_snmpAgent->run();
3108 }
3109
3110 const auto cpusMap = parseCPUMap();
3111 if(g_numThreads == 1) {
3112 L<<Logger::Warning<<"Operating unthreaded"<<endl;
3113 #ifdef HAVE_SYSTEMD
3114 sd_notify(0, "READY=1");
3115 #endif
3116 setCPUMap(cpusMap, 0, pthread_self());
3117 recursorThread(0);
3118 }
3119 else {
3120 pthread_t tid;
3121 L<<Logger::Warning<<"Launching "<< g_numThreads <<" threads"<<endl;
3122 for(unsigned int n=0; n < g_numThreads; ++n) {
3123 pthread_create(&tid, 0, recursorThread, (void*)(long)n);
3124
3125 setCPUMap(cpusMap, n, tid);
3126 }
3127 void* res;
3128 #ifdef HAVE_SYSTEMD
3129 sd_notify(0, "READY=1");
3130 #endif
3131 pthread_join(tid, &res);
3132 }
3133 return 0;
3134 }
3135
3136 static void* recursorThread(void* ptr)
3137 try
3138 {
3139 t_id=(int) (long) ptr;
3140 SyncRes tmp(g_now); // make sure it allocates tsstorage before we do anything, like primeHints or so..
3141 SyncRes::setDomainMap(g_initialDomainMap);
3142 t_allowFrom = g_initialAllowFrom;
3143 t_udpclientsocks = std::unique_ptr<UDPClientSocks>(new UDPClientSocks());
3144 t_tcpClientCounts = std::unique_ptr<tcpClientCounts_t>(new tcpClientCounts_t());
3145 primeHints();
3146
3147 t_packetCache = std::unique_ptr<RecursorPacketCache>(new RecursorPacketCache());
3148
3149 #ifdef HAVE_PROTOBUF
3150 t_uuidGenerator = std::unique_ptr<boost::uuids::random_generator>(new boost::uuids::random_generator());
3151 #endif
3152 L<<Logger::Warning<<"Done priming cache with root hints"<<endl;
3153
3154 try {
3155 if(!::arg()["lua-dns-script"].empty()) {
3156 t_pdl = std::make_shared<RecursorLua4>(::arg()["lua-dns-script"]);
3157 L<<Logger::Warning<<"Loaded 'lua' script from '"<<::arg()["lua-dns-script"]<<"'"<<endl;
3158 }
3159 }
3160 catch(std::exception &e) {
3161 L<<Logger::Error<<"Failed to load 'lua' script from '"<<::arg()["lua-dns-script"]<<"': "<<e.what()<<endl;
3162 _exit(99);
3163 }
3164
3165 unsigned int ringsize=::arg().asNum("stats-ringbuffer-entries") / g_numWorkerThreads;
3166 if(ringsize) {
3167 t_remotes = std::unique_ptr<addrringbuf_t>(new addrringbuf_t());
3168 if(g_weDistributeQueries) // if so, only 1 thread does recvfrom
3169 t_remotes->set_capacity(::arg().asNum("stats-ringbuffer-entries"));
3170 else
3171 t_remotes->set_capacity(ringsize);
3172 t_servfailremotes = std::unique_ptr<addrringbuf_t>(new addrringbuf_t());
3173 t_servfailremotes->set_capacity(ringsize);
3174 t_largeanswerremotes = std::unique_ptr<addrringbuf_t>(new addrringbuf_t());
3175 t_largeanswerremotes->set_capacity(ringsize);
3176
3177 t_queryring = std::unique_ptr<boost::circular_buffer<pair<DNSName, uint16_t> > >(new boost::circular_buffer<pair<DNSName, uint16_t> >());
3178 t_queryring->set_capacity(ringsize);
3179 t_servfailqueryring = std::unique_ptr<boost::circular_buffer<pair<DNSName, uint16_t> > >(new boost::circular_buffer<pair<DNSName, uint16_t> >());
3180 t_servfailqueryring->set_capacity(ringsize);
3181 }
3182
3183 MT=std::unique_ptr<MTasker<PacketID,string> >(new MTasker<PacketID,string>(::arg().asNum("stack-size")));
3184
3185 PacketID pident;
3186
3187 t_fdm=getMultiplexer();
3188 if(!t_id) {
3189 if(::arg().mustDo("webserver")) {
3190 L<<Logger::Warning << "Enabling web server" << endl;
3191 try {
3192 new RecursorWebServer(t_fdm);
3193 }
3194 catch(PDNSException &e) {
3195 L<<Logger::Error<<"Exception: "<<e.reason<<endl;
3196 exit(99);
3197 }
3198 }
3199 L<<Logger::Error<<"Enabled '"<< t_fdm->getName() << "' multiplexer"<<endl;
3200 }
3201
3202 t_fdm->addReadFD(g_pipes[t_id].readToThread, handlePipeRequest);
3203
3204 if(g_useOneSocketPerThread) {
3205 for(deferredAdd_t::const_iterator i = deferredAdds[t_id].cbegin(); i != deferredAdds[t_id].cend(); ++i) {
3206 t_fdm->addReadFD(i->first, i->second);
3207 }
3208 }
3209 else {
3210 if(!g_weDistributeQueries || !t_id) { // if we distribute queries, only t_id = 0 listens
3211 for(deferredAdd_t::const_iterator i = deferredAdds[0].cbegin(); i != deferredAdds[0].cend(); ++i) {
3212 t_fdm->addReadFD(i->first, i->second);
3213 }
3214 }
3215 }
3216
3217 registerAllStats();
3218 if(!t_id) {
3219 t_fdm->addReadFD(s_rcc.d_fd, handleRCC); // control channel
3220 }
3221
3222 unsigned int maxTcpClients=::arg().asNum("max-tcp-clients");
3223
3224 bool listenOnTCP(true);
3225
3226 time_t last_carbon=0;
3227 time_t carbonInterval=::arg().asNum("carbon-interval");
3228 counter.store(0); // used to periodically execute certain tasks
3229 for(;;) {
3230 while(MT->schedule(&g_now)); // MTasker letting the mthreads do their thing
3231
3232 if(!(counter%500)) {
3233 MT->makeThread(houseKeeping, 0);
3234 }
3235
3236 if(!(counter%55)) {
3237 typedef vector<pair<int, FDMultiplexer::funcparam_t> > expired_t;
3238 expired_t expired=t_fdm->getTimeouts(g_now);
3239
3240 for(expired_t::iterator i=expired.begin() ; i != expired.end(); ++i) {
3241 shared_ptr<TCPConnection> conn=any_cast<shared_ptr<TCPConnection> >(i->second);
3242 if(g_logCommonErrors)
3243 L<<Logger::Warning<<"Timeout from remote TCP client "<< conn->d_remote.toString() <<endl;
3244 t_fdm->removeReadFD(i->first);
3245 }
3246 }
3247
3248 counter++;
3249
3250 if(!t_id && statsWanted) {
3251 doStats();
3252 }
3253
3254 Utility::gettimeofday(&g_now, 0);
3255
3256 if(!t_id && (g_now.tv_sec - last_carbon >= carbonInterval)) {
3257 MT->makeThread(doCarbonDump, 0);
3258 last_carbon = g_now.tv_sec;
3259 }
3260
3261 t_fdm->run(&g_now);
3262 // 'run' updates g_now for us
3263
3264 if(!g_weDistributeQueries || !t_id) { // if pdns distributes queries, only tid 0 should do this
3265 if(listenOnTCP) {
3266 if(TCPConnection::getCurrentConnections() > maxTcpClients) { // shutdown, too many connections
3267 for(tcpListenSockets_t::iterator i=g_tcpListenSockets.begin(); i != g_tcpListenSockets.end(); ++i)
3268 t_fdm->removeReadFD(*i);
3269 listenOnTCP=false;
3270 }
3271 }
3272 else {
3273 if(TCPConnection::getCurrentConnections() <= maxTcpClients) { // reenable
3274 for(tcpListenSockets_t::iterator i=g_tcpListenSockets.begin(); i != g_tcpListenSockets.end(); ++i)
3275 t_fdm->addReadFD(*i, handleNewTCPQuestion);
3276 listenOnTCP=true;
3277 }
3278 }
3279 }
3280 }
3281 }
3282 catch(PDNSException &ae) {
3283 L<<Logger::Error<<"Exception: "<<ae.reason<<endl;
3284 return 0;
3285 }
3286 catch(std::exception &e) {
3287 L<<Logger::Error<<"STL Exception: "<<e.what()<<endl;
3288 return 0;
3289 }
3290 catch(...) {
3291 L<<Logger::Error<<"any other exception in main: "<<endl;
3292 return 0;
3293 }
3294
3295
3296 int main(int argc, char **argv)
3297 {
3298 g_argc = argc;
3299 g_argv = argv;
3300 g_stats.startupTime=time(0);
3301 versionSetProduct(ProductRecursor);
3302 reportBasicTypes();
3303 reportOtherTypes();
3304
3305 int ret = EXIT_SUCCESS;
3306
3307 try {
3308 ::arg().set("stack-size","stack size per mthread")="200000";
3309 ::arg().set("soa-minimum-ttl","Don't change")="0";
3310 ::arg().set("no-shuffle","Don't change")="off";
3311 ::arg().set("local-port","port to listen on")="53";
3312 ::arg().set("local-address","IP addresses to listen on, separated by spaces or commas. Also accepts ports.")="127.0.0.1";
3313 ::arg().setSwitch("non-local-bind", "Enable binding to non-local addresses by using FREEBIND / BINDANY socket options")="no";
3314 ::arg().set("trace","if we should output heaps of logging. set to 'fail' to only log failing domains")="off";
3315 ::arg().set("dnssec", "DNSSEC mode: off/process-no-validate (default)/process/log-fail/validate")="process-no-validate";
3316 ::arg().set("dnssec-log-bogus", "Log DNSSEC bogus validations")="no";
3317 ::arg().set("daemon","Operate as a daemon")="no";
3318 ::arg().setSwitch("write-pid","Write a PID file")="yes";
3319 ::arg().set("loglevel","Amount of logging. Higher is more. Do not set below 3")="6";
3320 ::arg().set("disable-syslog","Disable logging to syslog, useful when running inside a supervisor that logs stdout")="no";
3321 ::arg().set("log-timestamp","Print timestamps in log lines, useful to disable when running with a tool that timestamps stdout already")="yes";
3322 ::arg().set("log-common-errors","If we should log rather common errors")="no";
3323 ::arg().set("chroot","switch to chroot jail")="";
3324 ::arg().set("setgid","If set, change group id to this gid for more security")="";
3325 ::arg().set("setuid","If set, change user id to this uid for more security")="";
3326 ::arg().set("network-timeout", "Wait this number of milliseconds for network i/o")="1500";
3327 ::arg().set("threads", "Launch this number of threads")="2";
3328 ::arg().set("processes", "Launch this number of processes (EXPERIMENTAL, DO NOT CHANGE)")="1"; // if we un-experimental this, need to fix openssl rand seeding for multiple PIDs!
3329 ::arg().set("config-name","Name of this virtual configuration - will rename the binary image")="";
3330 ::arg().set("api-config-dir", "Directory where REST API stores config and zones") = "";
3331 ::arg().set("api-key", "Static pre-shared authentication key for access to the REST API") = "";
3332 ::arg().set("api-logfile", "Location of the server logfile (used by the REST API)") = "/var/log/pdns.log";
3333 ::arg().set("api-readonly", "Disallow data modification through the REST API when set") = "no";
3334 ::arg().setSwitch("webserver", "Start a webserver (for REST API)") = "no";
3335 ::arg().set("webserver-address", "IP Address of webserver to listen on") = "127.0.0.1";
3336 ::arg().set("webserver-port", "Port of webserver to listen on") = "8082";
3337 ::arg().set("webserver-password", "Password required for accessing the webserver") = "";
3338 ::arg().set("webserver-allow-from","Webserver access is only allowed from these subnets")="127.0.0.1,::1";
3339 ::arg().set("carbon-ourname", "If set, overrides our reported hostname for carbon stats")="";
3340 ::arg().set("carbon-server", "If set, send metrics in carbon (graphite) format to this server IP address")="";
3341 ::arg().set("carbon-interval", "Number of seconds between carbon (graphite) updates")="30";
3342 ::arg().set("statistics-interval", "Number of seconds between printing of recursor statistics, 0 to disable")="1800";
3343 ::arg().set("quiet","Suppress logging of questions and answers")="";
3344 ::arg().set("logging-facility","Facility to log messages as. 0 corresponds to local0")="";
3345 ::arg().set("config-dir","Location of configuration directory (recursor.conf)")=SYSCONFDIR;
3346 ::arg().set("socket-owner","Owner of socket")="";
3347 ::arg().set("socket-group","Group of socket")="";
3348 ::arg().set("socket-mode", "Permissions for socket")="";
3349
3350 ::arg().set("socket-dir",string("Where the controlsocket will live, ")+LOCALSTATEDIR+" when unset and not chrooted" )="";
3351 ::arg().set("delegation-only","Which domains we only accept delegations from")="";
3352 ::arg().set("query-local-address","Source IP address for sending queries")="0.0.0.0";
3353 ::arg().set("query-local-address6","Source IPv6 address for sending queries. IF UNSET, IPv6 WILL NOT BE USED FOR OUTGOING QUERIES")="";
3354 ::arg().set("client-tcp-timeout","Timeout in seconds when talking to TCP clients")="2";
3355 ::arg().set("max-mthreads", "Maximum number of simultaneous Mtasker threads")="2048";
3356 ::arg().set("max-tcp-clients","Maximum number of simultaneous TCP clients")="128";
3357 ::arg().set("server-down-max-fails","Maximum number of consecutive timeouts (and unreachables) to mark a server as down ( 0 => disabled )")="64";
3358 ::arg().set("server-down-throttle-time","Number of seconds to throttle all queries to a server after being marked as down")="60";
3359 ::arg().set("hint-file", "If set, load root hints from this file")="";
3360 ::arg().set("max-cache-entries", "If set, maximum number of entries in the main cache")="1000000";
3361 ::arg().set("max-negative-ttl", "maximum number of seconds to keep a negative cached entry in memory")="3600";
3362 ::arg().set("max-cache-ttl", "maximum number of seconds to keep a cached entry in memory")="86400";
3363 ::arg().set("packetcache-ttl", "maximum number of seconds to keep a cached entry in packetcache")="3600";
3364 ::arg().set("max-packetcache-entries", "maximum number of entries to keep in the packetcache")="500000";
3365 ::arg().set("packetcache-servfail-ttl", "maximum number of seconds to keep a cached servfail entry in packetcache")="60";
3366 ::arg().set("server-id", "Returned when queried for 'server.id' TXT or NSID, defaults to hostname")="";
3367 ::arg().set("stats-ringbuffer-entries", "maximum number of packets to store statistics for")="10000";
3368 ::arg().set("version-string", "string reported on version.pdns or version.bind")=fullVersionString();
3369 ::arg().set("allow-from", "If set, only allow these comma separated netmasks to recurse")=LOCAL_NETS;
3370 ::arg().set("allow-from-file", "If set, load allowed netmasks from this file")="";
3371 ::arg().set("entropy-source", "If set, read entropy from this file")="/dev/urandom";
3372 ::arg().set("dont-query", "If set, do not query these netmasks for DNS data")=DONT_QUERY;
3373 ::arg().set("max-tcp-per-client", "If set, maximum number of TCP sessions per client (IP address)")="0";
3374 ::arg().set("max-tcp-queries-per-connection", "If set, maximum number of TCP queries in a TCP connection")="0";
3375 ::arg().set("spoof-nearmiss-max", "If non-zero, assume spoofing after this many near misses")="20";
3376 ::arg().set("single-socket", "If set, only use a single socket for outgoing queries")="off";
3377 ::arg().set("auth-zones", "Zones for which we have authoritative data, comma separated domain=file pairs ")="";
3378 ::arg().set("lua-config-file", "More powerful configuration options")="";
3379
3380 ::arg().set("forward-zones", "Zones for which we forward queries, comma separated domain=ip pairs")="";
3381 ::arg().set("forward-zones-recurse", "Zones for which we forward queries with recursion bit, comma separated domain=ip pairs")="";
3382 ::arg().set("forward-zones-file", "File with (+)domain=ip pairs for forwarding")="";
3383 ::arg().set("export-etc-hosts", "If we should serve up contents from /etc/hosts")="off";
3384 ::arg().set("export-etc-hosts-search-suffix", "Also serve up the contents of /etc/hosts with this suffix")="";
3385 ::arg().set("etc-hosts-file", "Path to 'hosts' file")="/etc/hosts";
3386 ::arg().set("serve-rfc1918", "If we should be authoritative for RFC 1918 private IP space")="yes";
3387 ::arg().set("lua-dns-script", "Filename containing an optional 'lua' script that will be used to modify dns answers")="";
3388 ::arg().set("latency-statistic-size","Number of latency values to calculate the qa-latency average")="10000";
3389 ::arg().setSwitch( "disable-packetcache", "Disable packetcache" )= "no";
3390 ::arg().set("ecs-ipv4-bits", "Number of bits of IPv4 address to pass for EDNS Client Subnet")="24";
3391 ::arg().set("ecs-ipv6-bits", "Number of bits of IPv6 address to pass for EDNS Client Subnet")="56";
3392 ::arg().set("edns-subnet-whitelist", "List of netmasks and domains that we should enable EDNS subnet for")="";
3393 ::arg().set("ecs-scope-zero-address", "Address to send to whitelisted authoritative servers for incoming queries with ECS prefix-length source of 0")="";
3394 ::arg().setSwitch( "use-incoming-edns-subnet", "Pass along received EDNS Client Subnet information")="no";
3395 ::arg().setSwitch( "pdns-distributes-queries", "If PowerDNS itself should distribute queries over threads")="yes";
3396 ::arg().setSwitch( "root-nx-trust", "If set, believe that an NXDOMAIN from the root means the TLD does not exist")="yes";
3397 ::arg().setSwitch( "any-to-tcp","Answer ANY queries with tc=1, shunting to TCP" )="no";
3398 ::arg().setSwitch( "lowercase-outgoing","Force outgoing questions to lowercase")="no";
3399 ::arg().setSwitch("gettag-needs-edns-options", "If EDNS Options should be extracted before calling the gettag() hook")="no";
3400 ::arg().set("udp-truncation-threshold", "Maximum UDP response size before we truncate")="1680";
3401 ::arg().set("edns-outgoing-bufsize", "Outgoing EDNS buffer size")="1680";
3402 ::arg().set("minimum-ttl-override", "Set under adverse conditions, a minimum TTL")="0";
3403 ::arg().set("max-qperq", "Maximum outgoing queries per query")="50";
3404 ::arg().set("max-total-msec", "Maximum total wall-clock time per query in milliseconds, 0 for unlimited")="7000";
3405 ::arg().set("max-recursion-depth", "Maximum number of internal recursion calls per query, 0 for unlimited")="40";
3406
3407 ::arg().set("include-dir","Include *.conf files from this directory")="";
3408 ::arg().set("security-poll-suffix","Domain name from which to query security update notifications")="secpoll.powerdns.com.";
3409
3410 ::arg().setSwitch("reuseport","Enable SO_REUSEPORT allowing multiple recursors processes to listen to 1 address")="no";
3411
3412 ::arg().setSwitch("snmp-agent", "If set, register as an SNMP agent")="no";
3413 ::arg().set("snmp-master-socket", "If set and snmp-agent is set, the socket to use to register to the SNMP master")="";
3414
3415 ::arg().set("tcp-fast-open", "Enable TCP Fast Open support on the listening sockets, using the supplied numerical value as the queue size")="0";
3416 ::arg().set("nsec3-max-iterations", "Maximum number of iterations allowed for an NSEC3 record")="2500";
3417
3418 ::arg().set("cpu-map", "Thread to CPU mapping, space separated thread-id=cpu1,cpu2..cpuN pairs")="";
3419
3420 ::arg().setSwitch("log-rpz-changes", "Log additions and removals to RPZ zones at Info level")="no";
3421
3422 ::arg().setCmd("help","Provide a helpful message");
3423 ::arg().setCmd("version","Print version string");
3424 ::arg().setCmd("config","Output blank configuration");
3425 L.toConsole(Logger::Info);
3426 ::arg().laxParse(argc,argv); // do a lax parse
3427
3428 string configname=::arg()["config-dir"]+"/recursor.conf";
3429 if(::arg()["config-name"]!="") {
3430 configname=::arg()["config-dir"]+"/recursor-"+::arg()["config-name"]+".conf";
3431 s_programname+="-"+::arg()["config-name"];
3432 }
3433 cleanSlashes(configname);
3434
3435 if(::arg().mustDo("config")) {
3436 cout<<::arg().configstring()<<endl;
3437 exit(0);
3438 }
3439
3440 if(!::arg().file(configname.c_str()))
3441 L<<Logger::Warning<<"Unable to parse configuration file '"<<configname<<"'"<<endl;
3442
3443 ::arg().parse(argc,argv);
3444
3445 if( !::arg()["chroot"].empty() && !::arg()["api-config-dir"].empty() && !::arg().mustDo("api-readonly") ) {
3446 L<<Logger::Error<<"Using chroot and a writable API is not possible"<<endl;
3447 exit(EXIT_FAILURE);
3448 }
3449
3450 if (::arg()["socket-dir"].empty()) {
3451 if (::arg()["chroot"].empty())
3452 ::arg().set("socket-dir") = LOCALSTATEDIR;
3453 else
3454 ::arg().set("socket-dir") = "/";
3455 }
3456
3457 ::arg().set("delegation-only")=toLower(::arg()["delegation-only"]);
3458
3459 if(::arg().asNum("threads")==1)
3460 ::arg().set("pdns-distributes-queries")="no";
3461
3462 if(::arg().mustDo("help")) {
3463 cout<<"syntax:"<<endl<<endl;
3464 cout<<::arg().helpstring(::arg()["help"])<<endl;
3465 exit(0);
3466 }
3467 if(::arg().mustDo("version")) {
3468 showProductVersion();
3469 showBuildConfiguration();
3470 exit(0);
3471 }
3472
3473 Logger::Urgency logUrgency = (Logger::Urgency)::arg().asNum("loglevel");
3474
3475 if (logUrgency < Logger::Error)
3476 logUrgency = Logger::Error;
3477 if(!g_quiet && logUrgency < Logger::Info) { // Logger::Info=6, Logger::Debug=7
3478 logUrgency = Logger::Info; // if you do --quiet=no, you need Info to also see the query log
3479 }
3480 L.setLoglevel(logUrgency);
3481 L.toConsole(logUrgency);
3482
3483 serviceMain(argc, argv);
3484 }
3485 catch(PDNSException &ae) {
3486 L<<Logger::Error<<"Exception: "<<ae.reason<<endl;
3487 ret=EXIT_FAILURE;
3488 }
3489 catch(std::exception &e) {
3490 L<<Logger::Error<<"STL Exception: "<<e.what()<<endl;
3491 ret=EXIT_FAILURE;
3492 }
3493 catch(...) {
3494 L<<Logger::Error<<"any other exception in main: "<<endl;
3495 ret=EXIT_FAILURE;
3496 }
3497
3498 return ret;
3499 }