]> git.ipfire.org Git - thirdparty/pdns.git/blob - pdns/pdns_recursor.cc
Merge pull request #5911 from job/improve_error_readability
[thirdparty/pdns.git] / pdns / pdns_recursor.cc
1 /*
2 * This file is part of PowerDNS or dnsdist.
3 * Copyright -- PowerDNS.COM B.V. and its contributors
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of version 2 of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * In addition, for the avoidance of any doubt, permission is granted to
10 * link this program with OpenSSL and to (re)distribute the binaries
11 * produced as the result of such linking.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21 */
22 #ifdef HAVE_CONFIG_H
23 #include "config.h"
24 #endif
25
26 #include <netdb.h>
27 #include <sys/stat.h>
28 #include <unistd.h>
29
30 #include "ws-recursor.hh"
31 #include <pthread.h>
32 #include "recpacketcache.hh"
33 #include "utility.hh"
34 #include "dns_random.hh"
35 #ifdef HAVE_LIBSODIUM
36 #include <sodium.h>
37 #endif
38 #include "opensslsigners.hh"
39 #include <iostream>
40 #include <errno.h>
41 #include <boost/static_assert.hpp>
42 #include <map>
43 #include <set>
44 #include "recursor_cache.hh"
45 #include "cachecleaner.hh"
46 #include <stdio.h>
47 #include <signal.h>
48 #include <stdlib.h>
49 #include "misc.hh"
50 #include "mtasker.hh"
51 #include <utility>
52 #include "arguments.hh"
53 #include "syncres.hh"
54 #include <fcntl.h>
55 #include <fstream>
56 #include "sortlist.hh"
57 #include "sstuff.hh"
58 #include <boost/tuple/tuple.hpp>
59 #include <boost/tuple/tuple_comparison.hpp>
60 #include <boost/shared_array.hpp>
61 #include <boost/function.hpp>
62 #include <boost/algorithm/string.hpp>
63 #ifdef MALLOC_TRACE
64 #include "malloctrace.hh"
65 #endif
66 #include <netinet/tcp.h>
67 #include "dnsparser.hh"
68 #include "dnswriter.hh"
69 #include "dnsrecords.hh"
70 #include "zoneparser-tng.hh"
71 #include "rec_channel.hh"
72 #include "logger.hh"
73 #include "iputils.hh"
74 #include "mplexer.hh"
75 #include "config.h"
76 #include "lua-recursor4.hh"
77 #include "version.hh"
78 #include "responsestats.hh"
79 #include "secpoll-recursor.hh"
80 #include "dnsname.hh"
81 #include "filterpo.hh"
82 #include "rpzloader.hh"
83 #include "validate-recursor.hh"
84 #include "rec-lua-conf.hh"
85 #include "ednsoptions.hh"
86 #include "gettime.hh"
87
88 #include "rec-protobuf.hh"
89 #include "rec-snmp.hh"
90
91 #ifdef HAVE_SYSTEMD
92 #include <systemd/sd-daemon.h>
93 #endif
94
95 #include "namespaces.hh"
96
97 typedef map<ComboAddress, uint32_t, ComboAddress::addressOnlyLessThan> tcpClientCounts_t;
98
99 static thread_local std::shared_ptr<RecursorLua4> t_pdl;
100 static thread_local unsigned int t_id;
101 static thread_local std::shared_ptr<Regex> t_traceRegex;
102 static thread_local std::unique_ptr<tcpClientCounts_t> t_tcpClientCounts;
103
104 thread_local std::unique_ptr<MT_t> MT; // the big MTasker
105 thread_local std::unique_ptr<MemRecursorCache> t_RC;
106 thread_local std::unique_ptr<RecursorPacketCache> t_packetCache;
107 thread_local FDMultiplexer* t_fdm{nullptr};
108 thread_local std::unique_ptr<addrringbuf_t> t_remotes, t_servfailremotes, t_largeanswerremotes;
109 thread_local std::unique_ptr<boost::circular_buffer<pair<DNSName, uint16_t> > > t_queryring, t_servfailqueryring;
110 thread_local std::shared_ptr<NetmaskGroup> t_allowFrom;
111 #ifdef HAVE_PROTOBUF
112 thread_local std::unique_ptr<boost::uuids::random_generator> t_uuidGenerator;
113 #endif
114 __thread struct timeval g_now; // timestamp, updated (too) frequently
115
116 // for communicating with our threads
117 struct ThreadPipeSet
118 {
119 int writeToThread;
120 int readToThread;
121 int writeFromThread;
122 int readFromThread;
123 };
124
125 typedef vector<int> tcpListenSockets_t;
126 typedef map<int, ComboAddress> listenSocketsAddresses_t; // is shared across all threads right now
127 typedef vector<pair<int, function< void(int, any&) > > > deferredAdd_t;
128
129 static const ComboAddress g_local4("0.0.0.0"), g_local6("::");
130 static vector<ThreadPipeSet> g_pipes; // effectively readonly after startup
131 static tcpListenSockets_t g_tcpListenSockets; // shared across threads, but this is fine, never written to from a thread. All threads listen on all sockets
132 static listenSocketsAddresses_t g_listenSocketsAddresses; // is shared across all threads right now
133 static std::unordered_map<unsigned int, deferredAdd_t> deferredAdds;
134 static set<int> g_fromtosockets; // listen sockets that use 'sendfromto()' mechanism
135 static vector<ComboAddress> g_localQueryAddresses4, g_localQueryAddresses6;
136 static AtomicCounter counter;
137 static std::shared_ptr<SyncRes::domainmap_t> g_initialDomainMap; // new threads needs this to be setup
138 static std::shared_ptr<NetmaskGroup> g_initialAllowFrom; // new thread needs to be setup with this
139 static size_t g_tcpMaxQueriesPerConn;
140 static uint64_t g_latencyStatSize;
141 static uint32_t g_disthashseed;
142 static unsigned int g_maxTCPPerClient;
143 static unsigned int g_networkTimeoutMsec;
144 static unsigned int g_maxMThreads;
145 static unsigned int g_numWorkerThreads;
146 static int g_tcpTimeout;
147 static uint16_t g_udpTruncationThreshold;
148 static std::atomic<bool> statsWanted;
149 static std::atomic<bool> g_quiet;
150 static bool g_logCommonErrors;
151 static bool g_anyToTcp;
152 static bool g_weDistributeQueries; // if true, only 1 thread listens on the incoming query sockets
153 static bool g_reusePort{false};
154 static bool g_useOneSocketPerThread;
155 static bool g_gettagNeedsEDNSOptions{false};
156 static time_t g_statisticsInterval;
157 static bool g_useIncomingECS;
158 std::atomic<uint32_t> g_maxCacheEntries, g_maxPacketCacheEntries;
159
160 RecursorControlChannel s_rcc; // only active in thread 0
161 RecursorStats g_stats;
162 string s_programname="pdns_recursor";
163 string s_pidfname;
164 bool g_lowercaseOutgoing;
165 unsigned int g_numThreads;
166 uint16_t g_outgoingEDNSBufsize;
167 bool g_logRPZChanges{false};
168
169 #define LOCAL_NETS "127.0.0.0/8, 10.0.0.0/8, 100.64.0.0/10, 169.254.0.0/16, 192.168.0.0/16, 172.16.0.0/12, ::1/128, fc00::/7, fe80::/10"
170 // Bad Nets taken from both:
171 // http://www.iana.org/assignments/iana-ipv4-special-registry/iana-ipv4-special-registry.xhtml
172 // and
173 // http://www.iana.org/assignments/iana-ipv6-special-registry/iana-ipv6-special-registry.xhtml
174 // where such a network may not be considered a valid destination
175 #define BAD_NETS "0.0.0.0/8, 192.0.0.0/24, 192.0.2.0/24, 198.51.100.0/24, 203.0.113.0/24, 240.0.0.0/4, ::/96, ::ffff:0:0/96, 100::/64, 2001:db8::/32"
176 #define DONT_QUERY LOCAL_NETS ", " BAD_NETS
177
178 //! used to send information to a newborn mthread
179 struct DNSComboWriter {
180 DNSComboWriter(const char* data, uint16_t len, const struct timeval& now) : d_mdp(true, data, len), d_now(now),
181 d_tcp(false), d_socket(-1)
182 {}
183 MOADNSParser d_mdp;
184 void setRemote(const ComboAddress* sa)
185 {
186 d_remote=*sa;
187 }
188
189 void setLocal(const ComboAddress& sa)
190 {
191 d_local=sa;
192 }
193
194
195 void setSocket(int sock)
196 {
197 d_socket=sock;
198 }
199
200 string getRemote() const
201 {
202 return d_remote.toString();
203 }
204
205 struct timeval d_now;
206 ComboAddress d_remote, d_local;
207 #ifdef HAVE_PROTOBUF
208 boost::uuids::uuid d_uuid;
209 string d_requestorId;
210 string d_deviceId;
211 #endif
212 EDNSSubnetOpts d_ednssubnet;
213 bool d_ecsFound{false};
214 bool d_ecsParsed{false};
215 bool d_tcp;
216 int d_socket;
217 unsigned int d_tag{0};
218 uint32_t d_qhash{0};
219 string d_query;
220 shared_ptr<TCPConnection> d_tcpConnection;
221 vector<pair<uint16_t, string> > d_ednsOpts;
222 std::vector<std::string> d_policyTags;
223 LuaContext::LuaObject d_data;
224 };
225
226 MT_t* getMT()
227 {
228 return MT ? MT.get() : nullptr;
229 }
230
231 ArgvMap &arg()
232 {
233 static ArgvMap theArg;
234 return theArg;
235 }
236
237 unsigned int getRecursorThreadId()
238 {
239 return t_id;
240 }
241
242 int getMTaskerTID()
243 {
244 return MT->getTid();
245 }
246
247 static void handleTCPClientWritable(int fd, FDMultiplexer::funcparam_t& var);
248
249 // -1 is error, 0 is timeout, 1 is success
250 int asendtcp(const string& data, Socket* sock)
251 {
252 PacketID pident;
253 pident.sock=sock;
254 pident.outMSG=data;
255
256 t_fdm->addWriteFD(sock->getHandle(), handleTCPClientWritable, pident);
257 string packet;
258
259 int ret=MT->waitEvent(pident, &packet, g_networkTimeoutMsec);
260
261 if(!ret || ret==-1) { // timeout
262 t_fdm->removeWriteFD(sock->getHandle());
263 }
264 else if(packet.size() !=data.size()) { // main loop tells us what it sent out, or empty in case of an error
265 return -1;
266 }
267 return ret;
268 }
269
270 static void handleTCPClientReadable(int fd, FDMultiplexer::funcparam_t& var);
271
272 // -1 is error, 0 is timeout, 1 is success
273 int arecvtcp(string& data, size_t len, Socket* sock, bool incompleteOkay)
274 {
275 data.clear();
276 PacketID pident;
277 pident.sock=sock;
278 pident.inNeeded=len;
279 pident.inIncompleteOkay=incompleteOkay;
280 t_fdm->addReadFD(sock->getHandle(), handleTCPClientReadable, pident);
281
282 int ret=MT->waitEvent(pident,&data, g_networkTimeoutMsec);
283 if(!ret || ret==-1) { // timeout
284 t_fdm->removeReadFD(sock->getHandle());
285 }
286 else if(data.empty()) {// error, EOF or other
287 return -1;
288 }
289
290 return ret;
291 }
292
293 static void handleGenUDPQueryResponse(int fd, FDMultiplexer::funcparam_t& var)
294 {
295 PacketID pident=*any_cast<PacketID>(&var);
296 char resp[512];
297 ssize_t ret=recv(fd, resp, sizeof(resp), 0);
298 t_fdm->removeReadFD(fd);
299 if(ret >= 0) {
300 string data(resp, (size_t) ret);
301 MT->sendEvent(pident, &data);
302 }
303 else {
304 string empty;
305 MT->sendEvent(pident, &empty);
306 // cerr<<"Had some kind of error: "<<ret<<", "<<strerror(errno)<<endl;
307 }
308 }
309 string GenUDPQueryResponse(const ComboAddress& dest, const string& query)
310 {
311 Socket s(dest.sin4.sin_family, SOCK_DGRAM);
312 s.setNonBlocking();
313 ComboAddress local = getQueryLocalAddress(dest.sin4.sin_family, 0);
314
315 s.bind(local);
316 s.connect(dest);
317 s.send(query);
318
319 PacketID pident;
320 pident.sock=&s;
321 pident.type=0;
322 t_fdm->addReadFD(s.getHandle(), handleGenUDPQueryResponse, pident);
323
324 string data;
325
326 int ret=MT->waitEvent(pident,&data, g_networkTimeoutMsec);
327
328 if(!ret || ret==-1) { // timeout
329 t_fdm->removeReadFD(s.getHandle());
330 }
331 else if(data.empty()) {// error, EOF or other
332 // we could special case this
333 return data;
334 }
335 return data;
336 }
337
338 //! pick a random query local address
339 ComboAddress getQueryLocalAddress(int family, uint16_t port)
340 {
341 ComboAddress ret;
342 if(family==AF_INET) {
343 if(g_localQueryAddresses4.empty())
344 ret = g_local4;
345 else
346 ret = g_localQueryAddresses4[dns_random(g_localQueryAddresses4.size())];
347 ret.sin4.sin_port = htons(port);
348 }
349 else {
350 if(g_localQueryAddresses6.empty())
351 ret = g_local6;
352 else
353 ret = g_localQueryAddresses6[dns_random(g_localQueryAddresses6.size())];
354
355 ret.sin6.sin6_port = htons(port);
356 }
357 return ret;
358 }
359
360 static void handleUDPServerResponse(int fd, FDMultiplexer::funcparam_t&);
361
362 static void setSocketBuffer(int fd, int optname, uint32_t size)
363 {
364 uint32_t psize=0;
365 socklen_t len=sizeof(psize);
366
367 if(!getsockopt(fd, SOL_SOCKET, optname, (char*)&psize, &len) && psize > size) {
368 L<<Logger::Error<<"Not decreasing socket buffer size from "<<psize<<" to "<<size<<endl;
369 return;
370 }
371
372 if (setsockopt(fd, SOL_SOCKET, optname, (char*)&size, sizeof(size)) < 0 )
373 L<<Logger::Error<<"Unable to raise socket buffer size to "<<size<<": "<<strerror(errno)<<endl;
374 }
375
376
377 static void setSocketReceiveBuffer(int fd, uint32_t size)
378 {
379 setSocketBuffer(fd, SO_RCVBUF, size);
380 }
381
382 static void setSocketSendBuffer(int fd, uint32_t size)
383 {
384 setSocketBuffer(fd, SO_SNDBUF, size);
385 }
386
387
388 // you can ask this class for a UDP socket to send a query from
389 // this socket is not yours, don't even think about deleting it
390 // but after you call 'returnSocket' on it, don't assume anything anymore
391 class UDPClientSocks
392 {
393 unsigned int d_numsocks;
394 public:
395 UDPClientSocks() : d_numsocks(0)
396 {
397 }
398
399 typedef set<int> socks_t;
400 socks_t d_socks;
401
402 // returning -2 means: temporary OS error (ie, out of files), -1 means error related to remote
403 int getSocket(const ComboAddress& toaddr, int* fd)
404 {
405 *fd=makeClientSocket(toaddr.sin4.sin_family);
406 if(*fd < 0) // temporary error - receive exception otherwise
407 return -2;
408
409 if(connect(*fd, (struct sockaddr*)(&toaddr), toaddr.getSocklen()) < 0) {
410 int err = errno;
411 // returnSocket(*fd);
412 try {
413 closesocket(*fd);
414 }
415 catch(const PDNSException& e) {
416 L<<Logger::Error<<"Error closing UDP socket after connect() failed: "<<e.reason<<endl;
417 }
418
419 if(err==ENETUNREACH) // Seth "My Interfaces Are Like A Yo Yo" Arnold special
420 return -2;
421 return -1;
422 }
423
424 d_socks.insert(*fd);
425 d_numsocks++;
426 return 0;
427 }
428
429 void returnSocket(int fd)
430 {
431 socks_t::iterator i=d_socks.find(fd);
432 if(i==d_socks.end()) {
433 throw PDNSException("Trying to return a socket (fd="+std::to_string(fd)+") not in the pool");
434 }
435 returnSocketLocked(i);
436 }
437
438 // return a socket to the pool, or simply erase it
439 void returnSocketLocked(socks_t::iterator& i)
440 {
441 if(i==d_socks.end()) {
442 throw PDNSException("Trying to return a socket not in the pool");
443 }
444 try {
445 t_fdm->removeReadFD(*i);
446 }
447 catch(FDMultiplexerException& e) {
448 // we sometimes return a socket that has not yet been assigned to t_fdm
449 }
450 try {
451 closesocket(*i);
452 }
453 catch(const PDNSException& e) {
454 L<<Logger::Error<<"Error closing returned UDP socket: "<<e.reason<<endl;
455 }
456
457 d_socks.erase(i++);
458 --d_numsocks;
459 }
460
461 // returns -1 for errors which might go away, throws for ones that won't
462 static int makeClientSocket(int family)
463 {
464 int ret=socket(family, SOCK_DGRAM, 0 ); // turns out that setting CLO_EXEC and NONBLOCK from here is not a performance win on Linux (oddly enough)
465
466 if(ret < 0 && errno==EMFILE) // this is not a catastrophic error
467 return ret;
468
469 if(ret<0)
470 throw PDNSException("Making a socket for resolver (family = "+std::to_string(family)+"): "+stringerror());
471
472 // setCloseOnExec(ret); // we're not going to exec
473
474 int tries=10;
475 ComboAddress sin;
476 while(--tries) {
477 uint16_t port;
478
479 if(tries==1) // fall back to kernel 'random'
480 port = 0;
481 else
482 port = 1025 + dns_random(64510);
483
484 sin=getQueryLocalAddress(family, port); // does htons for us
485
486 if (::bind(ret, (struct sockaddr *)&sin, sin.getSocklen()) >= 0)
487 break;
488 }
489 if(!tries)
490 throw PDNSException("Resolver binding to local query client socket on "+sin.toString()+": "+stringerror());
491
492 setNonBlocking(ret);
493 return ret;
494 }
495 };
496
497 static thread_local std::unique_ptr<UDPClientSocks> t_udpclientsocks;
498
499 /* these two functions are used by LWRes */
500 // -2 is OS error, -1 is error that depends on the remote, > 0 is success
501 int asendto(const char *data, size_t len, int flags,
502 const ComboAddress& toaddr, uint16_t id, const DNSName& domain, uint16_t qtype, int* fd)
503 {
504
505 PacketID pident;
506 pident.domain = domain;
507 pident.remote = toaddr;
508 pident.type = qtype;
509
510 // see if there is an existing outstanding request we can chain on to, using partial equivalence function
511 pair<MT_t::waiters_t::iterator, MT_t::waiters_t::iterator> chain=MT->d_waiters.equal_range(pident, PacketIDBirthdayCompare());
512
513 for(; chain.first != chain.second; chain.first++) {
514 if(chain.first->key.fd > -1) { // don't chain onto existing chained waiter!
515 /*
516 cerr<<"Orig: "<<pident.domain<<", "<<pident.remote.toString()<<", id="<<id<<endl;
517 cerr<<"Had hit: "<< chain.first->key.domain<<", "<<chain.first->key.remote.toString()<<", id="<<chain.first->key.id
518 <<", count="<<chain.first->key.chain.size()<<", origfd: "<<chain.first->key.fd<<endl;
519 */
520 chain.first->key.chain.insert(id); // we can chain
521 *fd=-1; // gets used in waitEvent / sendEvent later on
522 return 1;
523 }
524 }
525
526 int ret=t_udpclientsocks->getSocket(toaddr, fd);
527 if(ret < 0)
528 return ret;
529
530 pident.fd=*fd;
531 pident.id=id;
532
533 t_fdm->addReadFD(*fd, handleUDPServerResponse, pident);
534 ret = send(*fd, data, len, 0);
535
536 int tmp = errno;
537
538 if(ret < 0)
539 t_udpclientsocks->returnSocket(*fd);
540
541 errno = tmp; // this is for logging purposes only
542 return ret;
543 }
544
545 // -1 is error, 0 is timeout, 1 is success
546 int arecvfrom(char *data, size_t len, int flags, const ComboAddress& fromaddr, size_t *d_len,
547 uint16_t id, const DNSName& domain, uint16_t qtype, int fd, struct timeval* now)
548 {
549 static optional<unsigned int> nearMissLimit;
550 if(!nearMissLimit)
551 nearMissLimit=::arg().asNum("spoof-nearmiss-max");
552
553 PacketID pident;
554 pident.fd=fd;
555 pident.id=id;
556 pident.domain=domain;
557 pident.type = qtype;
558 pident.remote=fromaddr;
559
560 string packet;
561 int ret=MT->waitEvent(pident, &packet, g_networkTimeoutMsec, now);
562
563 if(ret > 0) {
564 if(packet.empty()) // means "error"
565 return -1;
566
567 *d_len=packet.size();
568 memcpy(data,packet.c_str(),min(len,*d_len));
569 if(*nearMissLimit && pident.nearMisses > *nearMissLimit) {
570 L<<Logger::Error<<"Too many ("<<pident.nearMisses<<" > "<<*nearMissLimit<<") bogus answers for '"<<domain<<"' from "<<fromaddr.toString()<<", assuming spoof attempt."<<endl;
571 g_stats.spoofCount++;
572 return -1;
573 }
574 }
575 else {
576 if(fd >= 0)
577 t_udpclientsocks->returnSocket(fd);
578 }
579 return ret;
580 }
581
582 static void writePid(void)
583 {
584 if(!::arg().mustDo("write-pid"))
585 return;
586 ofstream of(s_pidfname.c_str(), std::ios_base::app);
587 if(of)
588 of<< Utility::getpid() <<endl;
589 else
590 L<<Logger::Error<<"Writing pid for "<<Utility::getpid()<<" to "<<s_pidfname<<" failed: "<<strerror(errno)<<endl;
591 }
592
593 TCPConnection::TCPConnection(int fd, const ComboAddress& addr) : d_remote(addr), d_fd(fd)
594 {
595 ++s_currentConnections;
596 (*t_tcpClientCounts)[d_remote]++;
597 }
598
599 TCPConnection::~TCPConnection()
600 {
601 try {
602 if(closesocket(d_fd) < 0)
603 L<<Logger::Error<<"Error closing socket for TCPConnection"<<endl;
604 }
605 catch(const PDNSException& e) {
606 L<<Logger::Error<<"Error closing TCPConnection socket: "<<e.reason<<endl;
607 }
608
609 if(t_tcpClientCounts->count(d_remote) && !(*t_tcpClientCounts)[d_remote]--)
610 t_tcpClientCounts->erase(d_remote);
611 --s_currentConnections;
612 }
613
614 AtomicCounter TCPConnection::s_currentConnections;
615
616 static void handleRunningTCPQuestion(int fd, FDMultiplexer::funcparam_t& var);
617
618 // the idea is, only do things that depend on the *response* here. Incoming accounting is on incoming.
619 static void updateResponseStats(int res, const ComboAddress& remote, unsigned int packetsize, const DNSName* query, uint16_t qtype)
620 {
621 if(packetsize > 1000 && t_largeanswerremotes)
622 t_largeanswerremotes->push_back(remote);
623 switch(res) {
624 case RCode::ServFail:
625 if(t_servfailremotes) {
626 t_servfailremotes->push_back(remote);
627 if(query && t_servfailqueryring) // packet cache
628 t_servfailqueryring->push_back(make_pair(*query, qtype));
629 }
630 g_stats.servFails++;
631 break;
632 case RCode::NXDomain:
633 g_stats.nxDomains++;
634 break;
635 case RCode::NoError:
636 g_stats.noErrors++;
637 break;
638 }
639 }
640
641 static string makeLoginfo(DNSComboWriter* dc)
642 try
643 {
644 return "("+dc->d_mdp.d_qname.toLogString()+"/"+DNSRecordContent::NumberToType(dc->d_mdp.d_qtype)+" from "+(dc->d_remote.toString())+")";
645 }
646 catch(...)
647 {
648 return "Exception making error message for exception";
649 }
650
651 #ifdef HAVE_PROTOBUF
652 static void protobufLogQuery(const std::shared_ptr<RemoteLogger>& logger, uint8_t maskV4, uint8_t maskV6, const boost::uuids::uuid& uniqueId, const ComboAddress& remote, const ComboAddress& local, const Netmask& ednssubnet, bool tcp, uint16_t id, size_t len, const DNSName& qname, uint16_t qtype, uint16_t qclass, const std::vector<std::string>& policyTags, const std::string& requestorId, const std::string& deviceId)
653 {
654 Netmask requestorNM(remote, remote.sin4.sin_family == AF_INET ? maskV4 : maskV6);
655 const ComboAddress& requestor = requestorNM.getMaskedNetwork();
656 RecProtoBufMessage message(DNSProtoBufMessage::Query, uniqueId, &requestor, &local, qname, qtype, qclass, id, tcp, len);
657 message.setEDNSSubnet(ednssubnet, ednssubnet.isIpv4() ? maskV4 : maskV6);
658 message.setRequestorId(requestorId);
659 message.setDeviceId(deviceId);
660
661 if (!policyTags.empty()) {
662 message.setPolicyTags(policyTags);
663 }
664
665 // cerr <<message.toDebugString()<<endl;
666 std::string str;
667 message.serialize(str);
668 logger->queueData(str);
669 }
670
671 static void protobufLogResponse(const std::shared_ptr<RemoteLogger>& logger, const RecProtoBufMessage& message)
672 {
673 // cerr <<message.toDebugString()<<endl;
674 std::string str;
675 message.serialize(str);
676 logger->queueData(str);
677 }
678 #endif
679
680 /**
681 * Chases the CNAME provided by the PolicyCustom RPZ policy.
682 *
683 * @param spoofed: The DNSRecord that was created by the policy, should already be added to ret
684 * @param qtype: The QType of the original query
685 * @param sr: A SyncRes
686 * @param res: An integer that will contain the RCODE of the lookup we do
687 * @param ret: A vector of DNSRecords where the result of the CNAME chase should be appended to
688 */
689 static void handleRPZCustom(const DNSRecord& spoofed, const QType& qtype, SyncRes& sr, int& res, vector<DNSRecord>& ret)
690 {
691 if (spoofed.d_type == QType::CNAME) {
692 bool oldWantsRPZ = sr.getWantsRPZ();
693 sr.setWantsRPZ(false);
694 vector<DNSRecord> ans;
695 res = sr.beginResolve(DNSName(spoofed.d_content->getZoneRepresentation()), qtype, 1, ans);
696 for (const auto& rec : ans) {
697 if(rec.d_place == DNSResourceRecord::ANSWER) {
698 ret.push_back(rec);
699 }
700 }
701 // Reset the RPZ state of the SyncRes
702 sr.setWantsRPZ(oldWantsRPZ);
703 }
704 }
705
706 static bool addRecordToPacket(DNSPacketWriter& pw, const DNSRecord& rec, uint32_t& minTTL, const uint16_t maxAnswerSize)
707 {
708 pw.startRecord(rec.d_name, rec.d_type, rec.d_ttl, rec.d_class, rec.d_place);
709
710 if(rec.d_type != QType::OPT) // their TTL ain't real
711 minTTL = min(minTTL, rec.d_ttl);
712
713 rec.d_content->toPacket(pw);
714 if(pw.size() > static_cast<size_t>(maxAnswerSize)) {
715 pw.rollback();
716 if(rec.d_place != DNSResourceRecord::ADDITIONAL) {
717 pw.getHeader()->tc=1;
718 pw.truncate();
719 }
720 return false;
721 }
722
723 return true;
724 }
725
726 static void startDoResolve(void *p)
727 {
728 DNSComboWriter* dc=(DNSComboWriter *)p;
729 try {
730 if (t_queryring)
731 t_queryring->push_back(make_pair(dc->d_mdp.d_qname, dc->d_mdp.d_qtype));
732
733 uint16_t maxanswersize = dc->d_tcp ? 65535 : min(static_cast<uint16_t>(512), g_udpTruncationThreshold);
734 EDNSOpts edo;
735 bool haveEDNS=false;
736 if(getEDNSOpts(dc->d_mdp, &edo)) {
737 if(!dc->d_tcp) {
738 /* rfc6891 6.2.3:
739 "Values lower than 512 MUST be treated as equal to 512."
740 */
741 maxanswersize = min(static_cast<uint16_t>(edo.d_packetsize >= 512 ? edo.d_packetsize : 512), g_udpTruncationThreshold);
742 }
743 dc->d_ednsOpts = edo.d_options;
744 haveEDNS=true;
745
746 if (g_useIncomingECS && !dc->d_ecsParsed) {
747 for (const auto& o : edo.d_options) {
748 if (o.first == EDNSOptionCode::ECS) {
749 dc->d_ecsFound = getEDNSSubnetOptsFromString(o.second, &dc->d_ednssubnet);
750 break;
751 }
752 }
753 }
754 }
755 /* perhaps there was no EDNS or no ECS but by now we looked */
756 dc->d_ecsParsed = true;
757 vector<DNSRecord> ret;
758 vector<uint8_t> packet;
759
760 auto luaconfsLocal = g_luaconfs.getLocal();
761 // Used to tell syncres later on if we should apply NSDNAME and NSIP RPZ triggers for this query
762 bool wantsRPZ(true);
763 RecProtoBufMessage pbMessage(RecProtoBufMessage::Response);
764 #ifdef HAVE_PROTOBUF
765 if (luaconfsLocal->protobufServer) {
766 Netmask requestorNM(dc->d_remote, dc->d_remote.sin4.sin_family == AF_INET ? luaconfsLocal->protobufMaskV4 : luaconfsLocal->protobufMaskV6);
767 const ComboAddress& requestor = requestorNM.getMaskedNetwork();
768 pbMessage.update(dc->d_uuid, &requestor, &dc->d_local, dc->d_tcp, dc->d_mdp.d_header.id);
769 pbMessage.setEDNSSubnet(dc->d_ednssubnet.source, dc->d_ednssubnet.source.isIpv4() ? luaconfsLocal->protobufMaskV4 : luaconfsLocal->protobufMaskV6);
770 pbMessage.setQuestion(dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_mdp.d_qclass);
771 }
772 #endif /* HAVE_PROTOBUF */
773
774 DNSPacketWriter pw(packet, dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_mdp.d_qclass);
775
776 pw.getHeader()->aa=0;
777 pw.getHeader()->ra=1;
778 pw.getHeader()->qr=1;
779 pw.getHeader()->tc=0;
780 pw.getHeader()->id=dc->d_mdp.d_header.id;
781 pw.getHeader()->rd=dc->d_mdp.d_header.rd;
782 pw.getHeader()->cd=dc->d_mdp.d_header.cd;
783
784 uint32_t minTTL=std::numeric_limits<uint32_t>::max();
785
786 SyncRes sr(dc->d_now);
787
788 bool DNSSECOK=false;
789 if(t_pdl) {
790 sr.setLuaEngine(t_pdl);
791 }
792 sr.d_requestor=dc->d_remote; // ECS needs this too
793 if(g_dnssecmode != DNSSECMode::Off) {
794 sr.setDoDNSSEC(true);
795
796 // Does the requestor want DNSSEC records?
797 if(edo.d_Z & EDNSOpts::DNSSECOK) {
798 DNSSECOK=true;
799 g_stats.dnssecQueries++;
800 }
801 } else {
802 // Ignore the client-set CD flag
803 pw.getHeader()->cd=0;
804 }
805 sr.setDNSSECValidationRequested(g_dnssecmode == DNSSECMode::ValidateAll || g_dnssecmode==DNSSECMode::ValidateForLog || ((dc->d_mdp.d_header.ad || DNSSECOK) && g_dnssecmode==DNSSECMode::Process));
806
807 #ifdef HAVE_PROTOBUF
808 sr.setInitialRequestId(dc->d_uuid);
809 #endif
810
811 if (g_useIncomingECS) {
812 sr.setIncomingECSFound(dc->d_ecsFound);
813 if (dc->d_ecsFound) {
814 sr.setIncomingECS(dc->d_ednssubnet);
815 }
816 }
817
818 bool tracedQuery=false; // we could consider letting Lua know about this too
819 bool variableAnswer = false;
820 bool shouldNotValidate = false;
821
822 /* preresolve expects res (dq.rcode) to be set to RCode::NoError by default */
823 int res = RCode::NoError;
824 DNSFilterEngine::Policy appliedPolicy;
825 DNSRecord spoofed;
826 RecursorLua4::DNSQuestion dq(dc->d_remote, dc->d_local, dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_tcp, variableAnswer, wantsRPZ);
827 dq.ednsFlags = &edo.d_Z;
828 dq.ednsOptions = &dc->d_ednsOpts;
829 dq.tag = dc->d_tag;
830 dq.discardedPolicies = &sr.d_discardedPolicies;
831 dq.policyTags = &dc->d_policyTags;
832 dq.appliedPolicy = &appliedPolicy;
833 dq.currentRecords = &ret;
834 dq.dh = &dc->d_mdp.d_header;
835 dq.data = dc->d_data;
836 #ifdef HAVE_PROTOBUF
837 dq.requestorId = dc->d_requestorId;
838 dq.deviceId = dc->d_deviceId;
839 #endif
840
841 if(dc->d_mdp.d_qtype==QType::ANY && !dc->d_tcp && g_anyToTcp) {
842 pw.getHeader()->tc = 1;
843 res = 0;
844 variableAnswer = true;
845 goto sendit;
846 }
847
848 if(t_traceRegex && t_traceRegex->match(dc->d_mdp.d_qname.toString())) {
849 sr.setLogMode(SyncRes::Store);
850 tracedQuery=true;
851 }
852
853
854 if(!g_quiet || tracedQuery) {
855 L<<Logger::Warning<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] " << (dc->d_tcp ? "TCP " : "") << "question for '"<<dc->d_mdp.d_qname<<"|"
856 <<DNSRecordContent::NumberToType(dc->d_mdp.d_qtype)<<"' from "<<dc->getRemote();
857 if(!dc->d_ednssubnet.source.empty()) {
858 L<<" (ecs "<<dc->d_ednssubnet.source.toString()<<")";
859 }
860 L<<endl;
861 }
862
863 sr.setId(MT->getTid());
864 if(!dc->d_mdp.d_header.rd)
865 sr.setCacheOnly();
866
867 if (t_pdl) {
868 t_pdl->prerpz(dq, res);
869 }
870
871 // Check if the query has a policy attached to it
872 if (wantsRPZ) {
873 appliedPolicy = luaconfsLocal->dfe.getQueryPolicy(dc->d_mdp.d_qname, dc->d_remote, sr.d_discardedPolicies);
874 }
875
876 // if there is a RecursorLua active, and it 'took' the query in preResolve, we don't launch beginResolve
877 if(!t_pdl || !t_pdl->preresolve(dq, res)) {
878
879 sr.setWantsRPZ(wantsRPZ);
880 if(wantsRPZ) {
881 switch(appliedPolicy.d_kind) {
882 case DNSFilterEngine::PolicyKind::NoAction:
883 break;
884 case DNSFilterEngine::PolicyKind::Drop:
885 g_stats.policyDrops++;
886 g_stats.policyResults[appliedPolicy.d_kind]++;
887 delete dc;
888 dc=0;
889 return;
890 case DNSFilterEngine::PolicyKind::NXDOMAIN:
891 g_stats.policyResults[appliedPolicy.d_kind]++;
892 res=RCode::NXDomain;
893 goto haveAnswer;
894 case DNSFilterEngine::PolicyKind::NODATA:
895 g_stats.policyResults[appliedPolicy.d_kind]++;
896 res=RCode::NoError;
897 goto haveAnswer;
898 case DNSFilterEngine::PolicyKind::Custom:
899 g_stats.policyResults[appliedPolicy.d_kind]++;
900 res=RCode::NoError;
901 spoofed=appliedPolicy.getCustomRecord(dc->d_mdp.d_qname);
902 ret.push_back(spoofed);
903 handleRPZCustom(spoofed, QType(dc->d_mdp.d_qtype), sr, res, ret);
904 goto haveAnswer;
905 case DNSFilterEngine::PolicyKind::Truncate:
906 if(!dc->d_tcp) {
907 g_stats.policyResults[appliedPolicy.d_kind]++;
908 res=RCode::NoError;
909 pw.getHeader()->tc=1;
910 goto haveAnswer;
911 }
912 break;
913 }
914 }
915
916 // Query got not handled for QNAME Policy reasons, now actually go out to find an answer
917 try {
918 res = sr.beginResolve(dc->d_mdp.d_qname, QType(dc->d_mdp.d_qtype), dc->d_mdp.d_qclass, ret);
919 shouldNotValidate = sr.wasOutOfBand();
920 }
921 catch(ImmediateServFailException &e) {
922 if(g_logCommonErrors)
923 L<<Logger::Notice<<"Sending SERVFAIL to "<<dc->getRemote()<<" during resolve of '"<<dc->d_mdp.d_qname<<"' because: "<<e.reason<<endl;
924 res = RCode::ServFail;
925 }
926
927 // During lookup, an NSDNAME or NSIP trigger was hit in RPZ
928 if (res == -2) { // XXX This block should be macro'd, it is repeated post-resolve.
929 appliedPolicy = sr.d_appliedPolicy;
930 g_stats.policyResults[appliedPolicy.d_kind]++;
931 switch(appliedPolicy.d_kind) {
932 case DNSFilterEngine::PolicyKind::NoAction: // This can never happen
933 throw PDNSException("NoAction policy returned while a NSDNAME or NSIP trigger was hit");
934 case DNSFilterEngine::PolicyKind::Drop:
935 g_stats.policyDrops++;
936 delete dc;
937 dc=0;
938 return;
939 case DNSFilterEngine::PolicyKind::NXDOMAIN:
940 ret.clear();
941 res=RCode::NXDomain;
942 goto haveAnswer;
943
944 case DNSFilterEngine::PolicyKind::NODATA:
945 ret.clear();
946 res=RCode::NoError;
947 goto haveAnswer;
948
949 case DNSFilterEngine::PolicyKind::Truncate:
950 if(!dc->d_tcp) {
951 ret.clear();
952 res=RCode::NoError;
953 pw.getHeader()->tc=1;
954 goto haveAnswer;
955 }
956 break;
957
958 case DNSFilterEngine::PolicyKind::Custom:
959 ret.clear();
960 res=RCode::NoError;
961 spoofed=appliedPolicy.getCustomRecord(dc->d_mdp.d_qname);
962 ret.push_back(spoofed);
963 handleRPZCustom(spoofed, QType(dc->d_mdp.d_qtype), sr, res, ret);
964 goto haveAnswer;
965 }
966 }
967
968 if (wantsRPZ) {
969 appliedPolicy = luaconfsLocal->dfe.getPostPolicy(ret, sr.d_discardedPolicies);
970 }
971
972 if(t_pdl) {
973 if(res == RCode::NoError) {
974 auto i=ret.cbegin();
975 for(; i!= ret.cend(); ++i)
976 if(i->d_type == dc->d_mdp.d_qtype && i->d_place == DNSResourceRecord::ANSWER)
977 break;
978 if(i == ret.cend() && t_pdl->nodata(dq, res))
979 shouldNotValidate = true;
980
981 }
982 else if(res == RCode::NXDomain && t_pdl->nxdomain(dq, res))
983 shouldNotValidate = true;
984
985 if(t_pdl->postresolve(dq, res))
986 shouldNotValidate = true;
987 }
988
989 if (wantsRPZ) { //XXX This block is repeated, see above
990 g_stats.policyResults[appliedPolicy.d_kind]++;
991 switch(appliedPolicy.d_kind) {
992 case DNSFilterEngine::PolicyKind::NoAction:
993 break;
994 case DNSFilterEngine::PolicyKind::Drop:
995 g_stats.policyDrops++;
996 delete dc;
997 dc=0;
998 return;
999 case DNSFilterEngine::PolicyKind::NXDOMAIN:
1000 ret.clear();
1001 res=RCode::NXDomain;
1002 goto haveAnswer;
1003
1004 case DNSFilterEngine::PolicyKind::NODATA:
1005 ret.clear();
1006 res=RCode::NoError;
1007 goto haveAnswer;
1008
1009 case DNSFilterEngine::PolicyKind::Truncate:
1010 if(!dc->d_tcp) {
1011 ret.clear();
1012 res=RCode::NoError;
1013 pw.getHeader()->tc=1;
1014 goto haveAnswer;
1015 }
1016 break;
1017
1018 case DNSFilterEngine::PolicyKind::Custom:
1019 ret.clear();
1020 res=RCode::NoError;
1021 spoofed=appliedPolicy.getCustomRecord(dc->d_mdp.d_qname);
1022 ret.push_back(spoofed);
1023 handleRPZCustom(spoofed, QType(dc->d_mdp.d_qtype), sr, res, ret);
1024 goto haveAnswer;
1025 }
1026 }
1027 }
1028 haveAnswer:;
1029 if(res == PolicyDecision::DROP) {
1030 g_stats.policyDrops++;
1031 delete dc;
1032 dc=0;
1033 return;
1034 }
1035 if(tracedQuery || res == -1 || res == RCode::ServFail || pw.getHeader()->rcode == RCode::ServFail)
1036 {
1037 string trace(sr.getTrace());
1038 if(!trace.empty()) {
1039 vector<string> lines;
1040 boost::split(lines, trace, boost::is_any_of("\n"));
1041 for(const string& line : lines) {
1042 if(!line.empty())
1043 L<<Logger::Warning<< line << endl;
1044 }
1045 }
1046 }
1047
1048 if(res == -1) {
1049 pw.getHeader()->rcode=RCode::ServFail;
1050 // no commit here, because no record
1051 g_stats.servFails++;
1052 }
1053 else {
1054 pw.getHeader()->rcode=res;
1055
1056 // Does the validation mode or query demand validation?
1057 if(!shouldNotValidate && sr.isDNSSECValidationRequested()) {
1058 try {
1059 if(sr.doLog()) {
1060 L<<Logger::Warning<<"Starting validation of answer to "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" for "<<dc->d_remote.toStringWithPort()<<endl;
1061 }
1062
1063 auto state = sr.getValidationState();
1064
1065 if(state == Secure) {
1066 if(sr.doLog()) {
1067 L<<Logger::Warning<<"Answer to "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" for "<<dc->d_remote.toStringWithPort()<<" validates correctly"<<endl;
1068 }
1069
1070 // Is the query source interested in the value of the ad-bit?
1071 if (dc->d_mdp.d_header.ad || DNSSECOK)
1072 pw.getHeader()->ad=1;
1073 }
1074 else if(state == Insecure) {
1075 if(sr.doLog()) {
1076 L<<Logger::Warning<<"Answer to "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" for "<<dc->d_remote.toStringWithPort()<<" validates as Insecure"<<endl;
1077 }
1078
1079 pw.getHeader()->ad=0;
1080 }
1081 else if(state == Bogus) {
1082 if(g_dnssecLogBogus || sr.doLog() || g_dnssecmode == DNSSECMode::ValidateForLog) {
1083 L<<Logger::Warning<<"Answer to "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" for "<<dc->d_remote.toStringWithPort()<<" validates as Bogus"<<endl;
1084 }
1085
1086 // Does the query or validation mode sending out a SERVFAIL on validation errors?
1087 if(!pw.getHeader()->cd && (g_dnssecmode == DNSSECMode::ValidateAll || dc->d_mdp.d_header.ad || DNSSECOK)) {
1088 if(sr.doLog()) {
1089 L<<Logger::Warning<<"Sending out SERVFAIL for "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" because recursor or query demands it for Bogus results"<<endl;
1090 }
1091
1092 pw.getHeader()->rcode=RCode::ServFail;
1093 goto sendit;
1094 } else {
1095 if(sr.doLog()) {
1096 L<<Logger::Warning<<"Not sending out SERVFAIL for "<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<" Bogus validation since neither config nor query demands this"<<endl;
1097 }
1098 }
1099 }
1100 }
1101 catch(ImmediateServFailException &e) {
1102 if(g_logCommonErrors)
1103 L<<Logger::Notice<<"Sending SERVFAIL to "<<dc->getRemote()<<" during validation of '"<<dc->d_mdp.d_qname<<"|"<<QType(dc->d_mdp.d_qtype).getName()<<"' because: "<<e.reason<<endl;
1104 pw.getHeader()->rcode=RCode::ServFail;
1105 goto sendit;
1106 }
1107 }
1108
1109 if(ret.size()) {
1110 orderAndShuffle(ret);
1111 if(auto sl = luaconfsLocal->sortlist.getOrderCmp(dc->d_remote)) {
1112 stable_sort(ret.begin(), ret.end(), *sl);
1113 variableAnswer=true;
1114 }
1115 }
1116
1117 bool needCommit = false;
1118 for(auto i=ret.cbegin(); i!=ret.cend(); ++i) {
1119 if( ! DNSSECOK &&
1120 ( i->d_type == QType::NSEC3 ||
1121 (
1122 ( i->d_type == QType::RRSIG || i->d_type==QType::NSEC ) &&
1123 (
1124 ( dc->d_mdp.d_qtype != i->d_type && dc->d_mdp.d_qtype != QType::ANY ) ||
1125 i->d_place != DNSResourceRecord::ANSWER
1126 )
1127 )
1128 )
1129 ) {
1130 continue;
1131 }
1132
1133 if (!addRecordToPacket(pw, *i, minTTL, maxanswersize)) {
1134 needCommit = false;
1135 break;
1136 }
1137 needCommit = true;
1138
1139 #ifdef HAVE_PROTOBUF
1140 if(luaconfsLocal->protobufServer && (i->d_type == QType::A || i->d_type == QType::AAAA || i->d_type == QType::CNAME)) {
1141 pbMessage.addRR(*i);
1142 }
1143 #endif
1144 }
1145 if(needCommit)
1146 pw.commit();
1147 }
1148 sendit:;
1149
1150 if (haveEDNS) {
1151 /* we try to add the EDNS OPT RR even for truncated answers,
1152 as rfc6891 states:
1153 "The minimal response MUST be the DNS header, question section, and an
1154 OPT record. This MUST also occur when a truncated response (using
1155 the DNS header's TC bit) is returned."
1156 */
1157 if (addRecordToPacket(pw, makeOpt(edo.d_packetsize, 0, edo.d_Z), minTTL, maxanswersize)) {
1158 pw.commit();
1159 }
1160 }
1161
1162 g_rs.submitResponse(dc->d_mdp.d_qtype, packet.size(), !dc->d_tcp);
1163 updateResponseStats(res, dc->d_remote, packet.size(), &dc->d_mdp.d_qname, dc->d_mdp.d_qtype);
1164 #ifdef HAVE_PROTOBUF
1165 if (luaconfsLocal->protobufServer && (!luaconfsLocal->protobufTaggedOnly || (appliedPolicy.d_name && !appliedPolicy.d_name->empty()) || !dc->d_policyTags.empty())) {
1166 pbMessage.setBytes(packet.size());
1167 pbMessage.setResponseCode(pw.getHeader()->rcode);
1168 if (appliedPolicy.d_name) {
1169 pbMessage.setAppliedPolicy(*appliedPolicy.d_name);
1170 pbMessage.setAppliedPolicyType(appliedPolicy.d_type);
1171 }
1172 pbMessage.setPolicyTags(dc->d_policyTags);
1173 pbMessage.setQueryTime(dc->d_now.tv_sec, dc->d_now.tv_usec);
1174 pbMessage.setRequestorId(dq.requestorId);
1175 pbMessage.setDeviceId(dq.deviceId);
1176 protobufLogResponse(luaconfsLocal->protobufServer, pbMessage);
1177 }
1178 #endif
1179 if(!dc->d_tcp) {
1180 struct msghdr msgh;
1181 struct iovec iov;
1182 char cbuf[256];
1183 fillMSGHdr(&msgh, &iov, cbuf, 0, (char*)&*packet.begin(), packet.size(), &dc->d_remote);
1184 msgh.msg_control=NULL;
1185
1186 if(g_fromtosockets.count(dc->d_socket)) {
1187 addCMsgSrcAddr(&msgh, cbuf, &dc->d_local, 0);
1188 }
1189 if(sendmsg(dc->d_socket, &msgh, 0) < 0 && g_logCommonErrors)
1190 L<<Logger::Warning<<"Sending UDP reply to client "<<dc->d_remote.toStringWithPort()<<" failed with: "<<strerror(errno)<<endl;
1191 if(!SyncRes::s_nopacketcache && !variableAnswer && !sr.wasVariable() ) {
1192 t_packetCache->insertResponsePacket(dc->d_tag, dc->d_qhash, dc->d_mdp.d_qname, dc->d_mdp.d_qtype, dc->d_mdp.d_qclass,
1193 string((const char*)&*packet.begin(), packet.size()),
1194 g_now.tv_sec,
1195 pw.getHeader()->rcode == RCode::ServFail ? SyncRes::s_packetcacheservfailttl :
1196 min(minTTL,SyncRes::s_packetcachettl),
1197 &pbMessage);
1198 }
1199 // else cerr<<"Not putting in packet cache: "<<sr.wasVariable()<<endl;
1200 }
1201 else {
1202 char buf[2];
1203 buf[0]=packet.size()/256;
1204 buf[1]=packet.size()%256;
1205
1206 Utility::iovec iov[2];
1207
1208 iov[0].iov_base=(void*)buf; iov[0].iov_len=2;
1209 iov[1].iov_base=(void*)&*packet.begin(); iov[1].iov_len = packet.size();
1210
1211 int wret=Utility::writev(dc->d_socket, iov, 2);
1212 bool hadError=true;
1213
1214 if(wret == 0)
1215 L<<Logger::Error<<"EOF writing TCP answer to "<<dc->getRemote()<<endl;
1216 else if(wret < 0 )
1217 L<<Logger::Error<<"Error writing TCP answer to "<<dc->getRemote()<<": "<< strerror(errno) <<endl;
1218 else if((unsigned int)wret != 2 + packet.size())
1219 L<<Logger::Error<<"Oops, partial answer sent to "<<dc->getRemote()<<" for "<<dc->d_mdp.d_qname<<" (size="<< (2 + packet.size()) <<", sent "<<wret<<")"<<endl;
1220 else
1221 hadError=false;
1222
1223 // update tcp connection status, either by closing or moving to 'BYTE0'
1224
1225 if(hadError) {
1226 // no need to remove us from FDM, we weren't there
1227 dc->d_socket = -1;
1228 }
1229 else {
1230 dc->d_tcpConnection->queriesCount++;
1231 if (g_tcpMaxQueriesPerConn && dc->d_tcpConnection->queriesCount >= g_tcpMaxQueriesPerConn) {
1232 dc->d_socket = -1;
1233 }
1234 else {
1235 dc->d_tcpConnection->state=TCPConnection::BYTE0;
1236 Utility::gettimeofday(&g_now, 0); // needs to be updated
1237 t_fdm->addReadFD(dc->d_socket, handleRunningTCPQuestion, dc->d_tcpConnection);
1238 t_fdm->setReadTTD(dc->d_socket, g_now, g_tcpTimeout);
1239 }
1240 }
1241 }
1242 float spent=makeFloat(sr.getNow()-dc->d_now);
1243 if(!g_quiet) {
1244 L<<Logger::Error<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] answer to "<<(dc->d_mdp.d_header.rd?"":"non-rd ")<<"question '"<<dc->d_mdp.d_qname<<"|"<<DNSRecordContent::NumberToType(dc->d_mdp.d_qtype);
1245 L<<"': "<<ntohs(pw.getHeader()->ancount)<<" answers, "<<ntohs(pw.getHeader()->arcount)<<" additional, took "<<sr.d_outqueries<<" packets, "<<
1246 sr.d_totUsec/1000.0<<" netw ms, "<< spent*1000.0<<" tot ms, "<<
1247 sr.d_throttledqueries<<" throttled, "<<sr.d_timeouts<<" timeouts, "<<sr.d_tcpoutqueries<<" tcp connections, rcode="<< res;
1248
1249 if(!shouldNotValidate && sr.isDNSSECValidationRequested()) {
1250 L<< ", dnssec="<<vStates[sr.getValidationState()];
1251 }
1252
1253 L<<endl;
1254
1255 }
1256
1257 sr.d_outqueries ? t_RC->cacheMisses++ : t_RC->cacheHits++;
1258
1259 if(spent < 0.001)
1260 g_stats.answers0_1++;
1261 else if(spent < 0.010)
1262 g_stats.answers1_10++;
1263 else if(spent < 0.1)
1264 g_stats.answers10_100++;
1265 else if(spent < 1.0)
1266 g_stats.answers100_1000++;
1267 else
1268 g_stats.answersSlow++;
1269
1270 uint64_t newLat=(uint64_t)(spent*1000000);
1271 newLat = min(newLat,(uint64_t)(((uint64_t) g_networkTimeoutMsec)*1000)); // outliers of several minutes exist..
1272 g_stats.avgLatencyUsec=(1-1.0/g_latencyStatSize)*g_stats.avgLatencyUsec + (float)newLat/g_latencyStatSize;
1273 // no worries, we do this for packet cache hits elsewhere
1274
1275 auto ourtime = 1000.0*spent-sr.d_totUsec/1000.0; // in msec
1276 if(ourtime < 1)
1277 g_stats.ourtime0_1++;
1278 else if(ourtime < 2)
1279 g_stats.ourtime1_2++;
1280 else if(ourtime < 4)
1281 g_stats.ourtime2_4++;
1282 else if(ourtime < 8)
1283 g_stats.ourtime4_8++;
1284 else if(ourtime < 16)
1285 g_stats.ourtime8_16++;
1286 else if(ourtime < 32)
1287 g_stats.ourtime16_32++;
1288 else {
1289 // cerr<<"SLOW: "<<ourtime<<"ms -> "<<dc->d_mdp.d_qname<<"|"<<DNSRecordContent::NumberToType(dc->d_mdp.d_qtype)<<endl;
1290 g_stats.ourtimeSlow++;
1291 }
1292 if(ourtime >= 0.0) {
1293 newLat=ourtime*1000; // usec
1294 g_stats.avgLatencyOursUsec=(1-1.0/g_latencyStatSize)*g_stats.avgLatencyOursUsec + (float)newLat/g_latencyStatSize;
1295 }
1296 // cout<<dc->d_mdp.d_qname<<"\t"<<MT->getUsec()<<"\t"<<sr.d_outqueries<<endl;
1297 delete dc;
1298 dc=0;
1299 }
1300 catch(PDNSException &ae) {
1301 L<<Logger::Error<<"startDoResolve problem "<<makeLoginfo(dc)<<": "<<ae.reason<<endl;
1302 delete dc;
1303 }
1304 catch(MOADNSException& e) {
1305 L<<Logger::Error<<"DNS parser error "<<makeLoginfo(dc) <<": "<<dc->d_mdp.d_qname<<", "<<e.what()<<endl;
1306 delete dc;
1307 }
1308 catch(std::exception& e) {
1309 L<<Logger::Error<<"STL error "<< makeLoginfo(dc)<<": "<<e.what();
1310
1311 // Luawrapper nests the exception from Lua, so we unnest it here
1312 try {
1313 std::rethrow_if_nested(e);
1314 } catch(const std::exception& ne) {
1315 L<<". Extra info: "<<ne.what();
1316 } catch(...) {}
1317
1318 L<<endl;
1319 delete dc;
1320 }
1321 catch(...) {
1322 L<<Logger::Error<<"Any other exception in a resolver context "<< makeLoginfo(dc) <<endl;
1323 }
1324
1325 g_stats.maxMThreadStackUsage = max(MT->getMaxStackUsage(), g_stats.maxMThreadStackUsage);
1326 }
1327
1328 static void makeControlChannelSocket(int processNum=-1)
1329 {
1330 string sockname=::arg()["socket-dir"]+"/"+s_programname;
1331 if(processNum >= 0)
1332 sockname += "."+std::to_string(processNum);
1333 sockname+=".controlsocket";
1334 s_rcc.listen(sockname);
1335
1336 int sockowner = -1;
1337 int sockgroup = -1;
1338
1339 if (!::arg().isEmpty("socket-group"))
1340 sockgroup=::arg().asGid("socket-group");
1341 if (!::arg().isEmpty("socket-owner"))
1342 sockowner=::arg().asUid("socket-owner");
1343
1344 if (sockgroup > -1 || sockowner > -1) {
1345 if(chown(sockname.c_str(), sockowner, sockgroup) < 0) {
1346 unixDie("Failed to chown control socket");
1347 }
1348 }
1349
1350 // do mode change if socket-mode is given
1351 if(!::arg().isEmpty("socket-mode")) {
1352 mode_t sockmode=::arg().asMode("socket-mode");
1353 if(chmod(sockname.c_str(), sockmode) < 0) {
1354 unixDie("Failed to chmod control socket");
1355 }
1356 }
1357 }
1358
1359 static bool getQNameAndSubnet(const std::string& question, DNSName* dnsname, uint16_t* qtype, uint16_t* qclass, EDNSSubnetOpts* ednssubnet, std::map<uint16_t, EDNSOptionView>* options)
1360 {
1361 bool found = false;
1362 const struct dnsheader* dh = (struct dnsheader*)question.c_str();
1363 size_t questionLen = question.length();
1364 unsigned int consumed=0;
1365 *dnsname=DNSName(question.c_str(), questionLen, sizeof(dnsheader), false, qtype, qclass, &consumed);
1366
1367 size_t pos= sizeof(dnsheader)+consumed+4;
1368 /* at least OPT root label (1), type (2), class (2) and ttl (4) + OPT RR rdlen (2)
1369 = 11 */
1370 if(ntohs(dh->arcount) == 1 && questionLen > pos + 11) { // this code can extract one (1) EDNS Subnet option
1371 /* OPT root label (1) followed by type (2) */
1372 if(question.at(pos)==0 && question.at(pos+1)==0 && question.at(pos+2)==QType::OPT) {
1373 if (!options) {
1374 char* ecsStart = nullptr;
1375 size_t ecsLen = 0;
1376 int res = getEDNSOption((char*)question.c_str()+pos+9, questionLen - pos - 9, EDNSOptionCode::ECS, &ecsStart, &ecsLen);
1377 if (res == 0 && ecsLen > 4) {
1378 EDNSSubnetOpts eso;
1379 if(getEDNSSubnetOptsFromString(ecsStart + 4, ecsLen - 4, &eso)) {
1380 *ednssubnet=eso;
1381 found = true;
1382 }
1383 }
1384 }
1385 else {
1386 int res = getEDNSOptions((char*)question.c_str()+pos+9, questionLen - pos - 9, *options);
1387 if (res == 0) {
1388 const auto& it = options->find(EDNSOptionCode::ECS);
1389 if (it != options->end() && it->second.content != nullptr && it->second.size > 0) {
1390 EDNSSubnetOpts eso;
1391 if(getEDNSSubnetOptsFromString(it->second.content, it->second.size, &eso)) {
1392 *ednssubnet=eso;
1393 found = true;
1394 }
1395 }
1396 }
1397 }
1398 }
1399 }
1400 return found;
1401 }
1402
1403 static void handleRunningTCPQuestion(int fd, FDMultiplexer::funcparam_t& var)
1404 {
1405 shared_ptr<TCPConnection> conn=any_cast<shared_ptr<TCPConnection> >(var);
1406
1407 if(conn->state==TCPConnection::BYTE0) {
1408 ssize_t bytes=recv(conn->getFD(), conn->data, 2, 0);
1409 if(bytes==1)
1410 conn->state=TCPConnection::BYTE1;
1411 if(bytes==2) {
1412 conn->qlen=(((unsigned char)conn->data[0]) << 8)+ (unsigned char)conn->data[1];
1413 conn->bytesread=0;
1414 conn->state=TCPConnection::GETQUESTION;
1415 }
1416 if(!bytes || bytes < 0) {
1417 t_fdm->removeReadFD(fd);
1418 return;
1419 }
1420 }
1421 else if(conn->state==TCPConnection::BYTE1) {
1422 ssize_t bytes=recv(conn->getFD(), conn->data+1, 1, 0);
1423 if(bytes==1) {
1424 conn->state=TCPConnection::GETQUESTION;
1425 conn->qlen=(((unsigned char)conn->data[0]) << 8)+ (unsigned char)conn->data[1];
1426 conn->bytesread=0;
1427 }
1428 if(!bytes || bytes < 0) {
1429 if(g_logCommonErrors)
1430 L<<Logger::Error<<"TCP client "<< conn->d_remote.toString() <<" disconnected after first byte"<<endl;
1431 t_fdm->removeReadFD(fd);
1432 return;
1433 }
1434 }
1435 else if(conn->state==TCPConnection::GETQUESTION) {
1436 ssize_t bytes=recv(conn->getFD(), conn->data + conn->bytesread, conn->qlen - conn->bytesread, 0);
1437 if(!bytes || bytes < 0 || bytes > std::numeric_limits<std::uint16_t>::max()) {
1438 L<<Logger::Error<<"TCP client "<< conn->d_remote.toString() <<" disconnected while reading question body"<<endl;
1439 t_fdm->removeReadFD(fd);
1440 return;
1441 }
1442 conn->bytesread+=(uint16_t)bytes;
1443 if(conn->bytesread==conn->qlen) {
1444 t_fdm->removeReadFD(fd); // should no longer awake ourselves when there is data to read
1445
1446 DNSComboWriter* dc=nullptr;
1447 try {
1448 dc=new DNSComboWriter(conn->data, conn->qlen, g_now);
1449 }
1450 catch(MOADNSException &mde) {
1451 g_stats.clientParseError++;
1452 if(g_logCommonErrors)
1453 L<<Logger::Error<<"Unable to parse packet from TCP client "<< conn->d_remote.toString() <<endl;
1454 return;
1455 }
1456 dc->d_tcpConnection = conn; // carry the torch
1457 dc->setSocket(conn->getFD()); // this is the only time a copy is made of the actual fd
1458 dc->d_tcp=true;
1459 dc->setRemote(&conn->d_remote);
1460 ComboAddress dest;
1461 memset(&dest, 0, sizeof(dest));
1462 dest.sin4.sin_family = conn->d_remote.sin4.sin_family;
1463 socklen_t len = dest.getSocklen();
1464 getsockname(conn->getFD(), (sockaddr*)&dest, &len); // if this fails, we're ok with it
1465 dc->setLocal(dest);
1466 DNSName qname;
1467 uint16_t qtype=0;
1468 uint16_t qclass=0;
1469 bool needECS = false;
1470 string requestorId;
1471 string deviceId;
1472 #ifdef HAVE_PROTOBUF
1473 auto luaconfsLocal = g_luaconfs.getLocal();
1474 if (luaconfsLocal->protobufServer) {
1475 needECS = true;
1476 }
1477 #endif
1478
1479 if(needECS || (t_pdl && t_pdl->d_gettag)) {
1480
1481 try {
1482 std::map<uint16_t, EDNSOptionView> ednsOptions;
1483 dc->d_ecsParsed = true;
1484 dc->d_ecsFound = getQNameAndSubnet(std::string(conn->data, conn->qlen), &qname, &qtype, &qclass, &dc->d_ednssubnet, g_gettagNeedsEDNSOptions ? &ednsOptions : nullptr);
1485
1486 if(t_pdl && t_pdl->d_gettag) {
1487 try {
1488 dc->d_tag = t_pdl->gettag(conn->d_remote, dc->d_ednssubnet.source, dest, qname, qtype, &dc->d_policyTags, dc->d_data, ednsOptions, true, requestorId, deviceId);
1489 }
1490 catch(std::exception& e) {
1491 if(g_logCommonErrors)
1492 L<<Logger::Warning<<"Error parsing a query packet qname='"<<qname<<"' for tag determination, setting tag=0: "<<e.what()<<endl;
1493 }
1494 }
1495 }
1496 catch(std::exception& e)
1497 {
1498 if(g_logCommonErrors)
1499 L<<Logger::Warning<<"Error parsing a query packet for tag determination, setting tag=0: "<<e.what()<<endl;
1500 }
1501 }
1502 #ifdef HAVE_PROTOBUF
1503 if(luaconfsLocal->protobufServer || luaconfsLocal->outgoingProtobufServer) {
1504 dc->d_requestorId = requestorId;
1505 dc->d_deviceId = deviceId;
1506 dc->d_uuid = (*t_uuidGenerator)();
1507 }
1508
1509 if(luaconfsLocal->protobufServer) {
1510 try {
1511 const struct dnsheader* dh = (const struct dnsheader*) conn->data;
1512
1513 if (!luaconfsLocal->protobufTaggedOnly) {
1514 protobufLogQuery(luaconfsLocal->protobufServer, luaconfsLocal->protobufMaskV4, luaconfsLocal->protobufMaskV6, dc->d_uuid, conn->d_remote, dest, dc->d_ednssubnet.source, true, dh->id, conn->qlen, qname, qtype, qclass, dc->d_policyTags, dc->d_requestorId, dc->d_deviceId);
1515 }
1516 }
1517 catch(std::exception& e) {
1518 if(g_logCommonErrors)
1519 L<<Logger::Warning<<"Error parsing a TCP query packet for edns subnet: "<<e.what()<<endl;
1520 }
1521 }
1522 #endif
1523 if(dc->d_mdp.d_header.qr) {
1524 delete dc;
1525 g_stats.ignoredCount++;
1526 L<<Logger::Error<<"Ignoring answer from TCP client "<< conn->d_remote.toString() <<" on server socket!"<<endl;
1527 return;
1528 }
1529 if(dc->d_mdp.d_header.opcode) {
1530 delete dc;
1531 g_stats.ignoredCount++;
1532 L<<Logger::Error<<"Ignoring non-query opcode from TCP client "<< conn->d_remote.toString() <<" on server socket!"<<endl;
1533 return;
1534 }
1535 else {
1536 ++g_stats.qcounter;
1537 ++g_stats.tcpqcounter;
1538 MT->makeThread(startDoResolve, dc); // deletes dc, will set state to BYTE0 again
1539 return;
1540 }
1541 }
1542 }
1543 }
1544
1545 //! Handle new incoming TCP connection
1546 static void handleNewTCPQuestion(int fd, FDMultiplexer::funcparam_t& )
1547 {
1548 ComboAddress addr;
1549 socklen_t addrlen=sizeof(addr);
1550 int newsock=accept(fd, (struct sockaddr*)&addr, &addrlen);
1551 if(newsock>=0) {
1552 if(MT->numProcesses() > g_maxMThreads) {
1553 g_stats.overCapacityDrops++;
1554 try {
1555 closesocket(newsock);
1556 }
1557 catch(const PDNSException& e) {
1558 L<<Logger::Error<<"Error closing TCP socket after an over capacity drop: "<<e.reason<<endl;
1559 }
1560 return;
1561 }
1562
1563 if(t_remotes)
1564 t_remotes->push_back(addr);
1565 if(t_allowFrom && !t_allowFrom->match(&addr)) {
1566 if(!g_quiet)
1567 L<<Logger::Error<<"["<<MT->getTid()<<"] dropping TCP query from "<<addr.toString()<<", address not matched by allow-from"<<endl;
1568
1569 g_stats.unauthorizedTCP++;
1570 try {
1571 closesocket(newsock);
1572 }
1573 catch(const PDNSException& e) {
1574 L<<Logger::Error<<"Error closing TCP socket after an ACL drop: "<<e.reason<<endl;
1575 }
1576 return;
1577 }
1578 if(g_maxTCPPerClient && t_tcpClientCounts->count(addr) && (*t_tcpClientCounts)[addr] >= g_maxTCPPerClient) {
1579 g_stats.tcpClientOverflow++;
1580 try {
1581 closesocket(newsock); // don't call TCPConnection::closeAndCleanup here - did not enter it in the counts yet!
1582 }
1583 catch(const PDNSException& e) {
1584 L<<Logger::Error<<"Error closing TCP socket after an overflow drop: "<<e.reason<<endl;
1585 }
1586 return;
1587 }
1588
1589 setNonBlocking(newsock);
1590 std::shared_ptr<TCPConnection> tc = std::make_shared<TCPConnection>(newsock, addr);
1591 tc->state=TCPConnection::BYTE0;
1592
1593 t_fdm->addReadFD(tc->getFD(), handleRunningTCPQuestion, tc);
1594
1595 struct timeval now;
1596 Utility::gettimeofday(&now, 0);
1597 t_fdm->setReadTTD(tc->getFD(), now, g_tcpTimeout);
1598 }
1599 }
1600
1601 static string* doProcessUDPQuestion(const std::string& question, const ComboAddress& fromaddr, const ComboAddress& destaddr, struct timeval tv, int fd)
1602 {
1603 gettimeofday(&g_now, 0);
1604 struct timeval diff = g_now - tv;
1605 double delta=(diff.tv_sec*1000 + diff.tv_usec/1000.0);
1606
1607 if(tv.tv_sec && delta > 1000.0) {
1608 g_stats.tooOldDrops++;
1609 return 0;
1610 }
1611
1612 ++g_stats.qcounter;
1613 if(fromaddr.sin4.sin_family==AF_INET6)
1614 g_stats.ipv6qcounter++;
1615
1616 string response;
1617 const struct dnsheader* dh = (struct dnsheader*)question.c_str();
1618 unsigned int ctag=0;
1619 uint32_t qhash = 0;
1620 bool needECS = false;
1621 std::vector<std::string> policyTags;
1622 LuaContext::LuaObject data;
1623 string requestorId;
1624 string deviceId;
1625 #ifdef HAVE_PROTOBUF
1626 boost::uuids::uuid uniqueId;
1627 auto luaconfsLocal = g_luaconfs.getLocal();
1628 if (luaconfsLocal->protobufServer) {
1629 uniqueId = (*t_uuidGenerator)();
1630 needECS = true;
1631 } else if (luaconfsLocal->outgoingProtobufServer) {
1632 uniqueId = (*t_uuidGenerator)();
1633 }
1634 #endif
1635 EDNSSubnetOpts ednssubnet;
1636 bool ecsFound = false;
1637 bool ecsParsed = false;
1638 try {
1639 DNSName qname;
1640 uint16_t qtype=0;
1641 uint16_t qclass=0;
1642 uint32_t age;
1643 bool qnameParsed=false;
1644 #ifdef MALLOC_TRACE
1645 /*
1646 static uint64_t last=0;
1647 if(!last)
1648 g_mtracer->clearAllocators();
1649 cout<<g_mtracer->getAllocs()-last<<" "<<g_mtracer->getNumOut()<<" -- BEGIN TRACE"<<endl;
1650 last=g_mtracer->getAllocs();
1651 cout<<g_mtracer->topAllocatorsString()<<endl;
1652 g_mtracer->clearAllocators();
1653 */
1654 #endif
1655
1656 if(needECS || (t_pdl && t_pdl->d_gettag)) {
1657 try {
1658 std::map<uint16_t, EDNSOptionView> ednsOptions;
1659 ecsFound = getQNameAndSubnet(question, &qname, &qtype, &qclass, &ednssubnet, g_gettagNeedsEDNSOptions ? &ednsOptions : nullptr);
1660 qnameParsed = true;
1661 ecsParsed = true;
1662
1663 if(t_pdl && t_pdl->d_gettag) {
1664 try {
1665 ctag=t_pdl->gettag(fromaddr, ednssubnet.source, destaddr, qname, qtype, &policyTags, data, ednsOptions, false, requestorId, deviceId);
1666 }
1667 catch(std::exception& e) {
1668 if(g_logCommonErrors)
1669 L<<Logger::Warning<<"Error parsing a query packet qname='"<<qname<<"' for tag determination, setting tag=0: "<<e.what()<<endl;
1670 }
1671 }
1672 }
1673 catch(std::exception& e)
1674 {
1675 if(g_logCommonErrors)
1676 L<<Logger::Warning<<"Error parsing a query packet for tag determination, setting tag=0: "<<e.what()<<endl;
1677 }
1678 }
1679
1680 bool cacheHit = false;
1681 RecProtoBufMessage pbMessage(DNSProtoBufMessage::DNSProtoBufMessageType::Response);
1682 #ifdef HAVE_PROTOBUF
1683 if(luaconfsLocal->protobufServer) {
1684 if (!luaconfsLocal->protobufTaggedOnly || !policyTags.empty()) {
1685 protobufLogQuery(luaconfsLocal->protobufServer, luaconfsLocal->protobufMaskV4, luaconfsLocal->protobufMaskV6, uniqueId, fromaddr, destaddr, ednssubnet.source, false, dh->id, question.size(), qname, qtype, qclass, policyTags, requestorId, deviceId);
1686 }
1687 }
1688 #endif /* HAVE_PROTOBUF */
1689
1690 if (qnameParsed) {
1691 cacheHit = (!SyncRes::s_nopacketcache && t_packetCache->getResponsePacket(ctag, question, qname, qtype, qclass, g_now.tv_sec, &response, &age, &qhash, &pbMessage));
1692 }
1693 else {
1694 cacheHit = (!SyncRes::s_nopacketcache && t_packetCache->getResponsePacket(ctag, question, g_now.tv_sec, &response, &age, &qhash, &pbMessage));
1695 }
1696
1697 if (cacheHit) {
1698 #ifdef HAVE_PROTOBUF
1699 if(luaconfsLocal->protobufServer && (!luaconfsLocal->protobufTaggedOnly || !pbMessage.getAppliedPolicy().empty() || !pbMessage.getPolicyTags().empty())) {
1700 Netmask requestorNM(fromaddr, fromaddr.sin4.sin_family == AF_INET ? luaconfsLocal->protobufMaskV4 : luaconfsLocal->protobufMaskV6);
1701 const ComboAddress& requestor = requestorNM.getMaskedNetwork();
1702 pbMessage.update(uniqueId, &requestor, &destaddr, false, dh->id);
1703 pbMessage.setEDNSSubnet(ednssubnet.source, ednssubnet.source.isIpv4() ? luaconfsLocal->protobufMaskV4 : luaconfsLocal->protobufMaskV6);
1704 pbMessage.setQueryTime(g_now.tv_sec, g_now.tv_usec);
1705 pbMessage.setRequestorId(requestorId);
1706 pbMessage.setDeviceId(deviceId);
1707 protobufLogResponse(luaconfsLocal->protobufServer, pbMessage);
1708 }
1709 #endif /* HAVE_PROTOBUF */
1710 if(!g_quiet)
1711 L<<Logger::Notice<<t_id<< " question answered from packet cache tag="<<ctag<<" from "<<fromaddr.toString()<<endl;
1712
1713 g_stats.packetCacheHits++;
1714 SyncRes::s_queries++;
1715 ageDNSPacket(response, age);
1716 struct msghdr msgh;
1717 struct iovec iov;
1718 char cbuf[256];
1719 fillMSGHdr(&msgh, &iov, cbuf, 0, (char*)response.c_str(), response.length(), const_cast<ComboAddress*>(&fromaddr));
1720 msgh.msg_control=NULL;
1721
1722 if(g_fromtosockets.count(fd)) {
1723 addCMsgSrcAddr(&msgh, cbuf, &destaddr, 0);
1724 }
1725 if(sendmsg(fd, &msgh, 0) < 0 && g_logCommonErrors)
1726 L<<Logger::Warning<<"Sending UDP reply to client "<<fromaddr.toStringWithPort()<<" failed with: "<<strerror(errno)<<endl;
1727
1728 if(response.length() >= sizeof(struct dnsheader)) {
1729 struct dnsheader tmpdh;
1730 memcpy(&tmpdh, response.c_str(), sizeof(tmpdh));
1731 updateResponseStats(tmpdh.rcode, fromaddr, response.length(), 0, 0);
1732 }
1733 g_stats.avgLatencyUsec=(1-1.0/g_latencyStatSize)*g_stats.avgLatencyUsec + 0.0; // we assume 0 usec
1734 g_stats.avgLatencyOursUsec=(1-1.0/g_latencyStatSize)*g_stats.avgLatencyOursUsec + 0.0; // we assume 0 usec
1735 return 0;
1736 }
1737 }
1738 catch(std::exception& e) {
1739 L<<Logger::Error<<"Error processing or aging answer packet: "<<e.what()<<endl;
1740 return 0;
1741 }
1742
1743 if(t_pdl) {
1744 if(t_pdl->ipfilter(fromaddr, destaddr, *dh)) {
1745 if(!g_quiet)
1746 L<<Logger::Notice<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] DROPPED question from "<<fromaddr.toStringWithPort()<<" based on policy"<<endl;
1747 g_stats.policyDrops++;
1748 return 0;
1749 }
1750 }
1751
1752 if(MT->numProcesses() > g_maxMThreads) {
1753 if(!g_quiet)
1754 L<<Logger::Notice<<t_id<<" ["<<MT->getTid()<<"/"<<MT->numProcesses()<<"] DROPPED question from "<<fromaddr.toStringWithPort()<<", over capacity"<<endl;
1755
1756 g_stats.overCapacityDrops++;
1757 return 0;
1758 }
1759
1760 DNSComboWriter* dc = new DNSComboWriter(question.c_str(), question.size(), g_now);
1761 dc->setSocket(fd);
1762 dc->d_tag=ctag;
1763 dc->d_qhash=qhash;
1764 dc->d_query = question;
1765 dc->setRemote(&fromaddr);
1766 dc->setLocal(destaddr);
1767 dc->d_tcp=false;
1768 dc->d_policyTags = policyTags;
1769 dc->d_data = data;
1770 dc->d_ecsFound = ecsFound;
1771 dc->d_ecsParsed = ecsParsed;
1772 dc->d_ednssubnet = ednssubnet;
1773 #ifdef HAVE_PROTOBUF
1774 if (luaconfsLocal->protobufServer || luaconfsLocal->outgoingProtobufServer) {
1775 dc->d_uuid = uniqueId;
1776 }
1777 dc->d_requestorId = requestorId;
1778 dc->d_deviceId = deviceId;
1779 #endif
1780
1781 MT->makeThread(startDoResolve, (void*) dc); // deletes dc
1782 return 0;
1783 }
1784
1785
1786 static void handleNewUDPQuestion(int fd, FDMultiplexer::funcparam_t& var)
1787 {
1788 ssize_t len;
1789 char data[1500];
1790 ComboAddress fromaddr;
1791 struct msghdr msgh;
1792 struct iovec iov;
1793 char cbuf[256];
1794 bool firstQuery = true;
1795
1796 fromaddr.sin6.sin6_family=AF_INET6; // this makes sure fromaddr is big enough
1797 fillMSGHdr(&msgh, &iov, cbuf, sizeof(cbuf), data, sizeof(data), &fromaddr);
1798
1799 for(;;)
1800 if((len=recvmsg(fd, &msgh, 0)) >= 0) {
1801
1802 firstQuery = false;
1803
1804 if(t_remotes)
1805 t_remotes->push_back(fromaddr);
1806
1807 if(t_allowFrom && !t_allowFrom->match(&fromaddr)) {
1808 if(!g_quiet)
1809 L<<Logger::Error<<"["<<MT->getTid()<<"] dropping UDP query from "<<fromaddr.toString()<<", address not matched by allow-from"<<endl;
1810
1811 g_stats.unauthorizedUDP++;
1812 return;
1813 }
1814 BOOST_STATIC_ASSERT(offsetof(sockaddr_in, sin_port) == offsetof(sockaddr_in6, sin6_port));
1815 if(!fromaddr.sin4.sin_port) { // also works for IPv6
1816 if(!g_quiet)
1817 L<<Logger::Error<<"["<<MT->getTid()<<"] dropping UDP query from "<<fromaddr.toStringWithPort()<<", can't deal with port 0"<<endl;
1818
1819 g_stats.clientParseError++; // not quite the best place to put it, but needs to go somewhere
1820 return;
1821 }
1822 try {
1823 dnsheader* dh=(dnsheader*)data;
1824
1825 if(dh->qr) {
1826 g_stats.ignoredCount++;
1827 if(g_logCommonErrors)
1828 L<<Logger::Error<<"Ignoring answer from "<<fromaddr.toString()<<" on server socket!"<<endl;
1829 }
1830 else if(dh->opcode) {
1831 g_stats.ignoredCount++;
1832 if(g_logCommonErrors)
1833 L<<Logger::Error<<"Ignoring non-query opcode "<<dh->opcode<<" from "<<fromaddr.toString()<<" on server socket!"<<endl;
1834 }
1835 else {
1836 string question(data, (size_t)len);
1837 struct timeval tv={0,0};
1838 HarvestTimestamp(&msgh, &tv);
1839 ComboAddress dest;
1840 memset(&dest, 0, sizeof(dest)); // this makes sure we ignore this address if not returned by recvmsg above
1841 auto loc = rplookup(g_listenSocketsAddresses, fd);
1842 if(HarvestDestinationAddress(&msgh, &dest)) {
1843 // but.. need to get port too
1844 if(loc)
1845 dest.sin4.sin_port = loc->sin4.sin_port;
1846 }
1847 else {
1848 if(loc) {
1849 dest = *loc;
1850 }
1851 else {
1852 dest.sin4.sin_family = fromaddr.sin4.sin_family;
1853 socklen_t slen = dest.getSocklen();
1854 getsockname(fd, (sockaddr*)&dest, &slen); // if this fails, we're ok with it
1855 }
1856 }
1857 if(g_weDistributeQueries)
1858 distributeAsyncFunction(question, boost::bind(doProcessUDPQuestion, question, fromaddr, dest, tv, fd));
1859 else
1860 doProcessUDPQuestion(question, fromaddr, dest, tv, fd);
1861 }
1862 }
1863 catch(MOADNSException& mde) {
1864 g_stats.clientParseError++;
1865 if(g_logCommonErrors)
1866 L<<Logger::Error<<"Unable to parse packet from remote UDP client "<<fromaddr.toString() <<": "<<mde.what()<<endl;
1867 }
1868 catch(std::runtime_error& e) {
1869 g_stats.clientParseError++;
1870 if(g_logCommonErrors)
1871 L<<Logger::Error<<"Unable to parse packet from remote UDP client "<<fromaddr.toString() <<": "<<e.what()<<endl;
1872 }
1873 }
1874 else {
1875 // cerr<<t_id<<" had error: "<<stringerror()<<endl;
1876 if(firstQuery && errno == EAGAIN)
1877 g_stats.noPacketError++;
1878
1879 break;
1880 }
1881 }
1882
1883 static void makeTCPServerSockets(unsigned int threadId)
1884 {
1885 int fd;
1886 vector<string>locals;
1887 stringtok(locals,::arg()["local-address"]," ,");
1888
1889 if(locals.empty())
1890 throw PDNSException("No local address specified");
1891
1892 for(vector<string>::const_iterator i=locals.begin();i!=locals.end();++i) {
1893 ServiceTuple st;
1894 st.port=::arg().asNum("local-port");
1895 parseService(*i, st);
1896
1897 ComboAddress sin;
1898
1899 memset((char *)&sin,0, sizeof(sin));
1900 sin.sin4.sin_family = AF_INET;
1901 if(!IpToU32(st.host, (uint32_t*)&sin.sin4.sin_addr.s_addr)) {
1902 sin.sin6.sin6_family = AF_INET6;
1903 if(makeIPv6sockaddr(st.host, &sin.sin6) < 0)
1904 throw PDNSException("Unable to resolve local address for TCP server on '"+ st.host +"'");
1905 }
1906
1907 fd=socket(sin.sin6.sin6_family, SOCK_STREAM, 0);
1908 if(fd<0)
1909 throw PDNSException("Making a TCP server socket for resolver: "+stringerror());
1910
1911 setCloseOnExec(fd);
1912
1913 int tmp=1;
1914 if(setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &tmp, sizeof tmp)<0) {
1915 L<<Logger::Error<<"Setsockopt failed for TCP listening socket"<<endl;
1916 exit(1);
1917 }
1918 if(sin.sin6.sin6_family == AF_INET6 && setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &tmp, sizeof(tmp)) < 0) {
1919 L<<Logger::Error<<"Failed to set IPv6 socket to IPv6 only, continuing anyhow: "<<strerror(errno)<<endl;
1920 }
1921
1922 #ifdef TCP_DEFER_ACCEPT
1923 if(setsockopt(fd, SOL_TCP, TCP_DEFER_ACCEPT, &tmp, sizeof tmp) >= 0) {
1924 if(i==locals.begin())
1925 L<<Logger::Error<<"Enabled TCP data-ready filter for (slight) DoS protection"<<endl;
1926 }
1927 #endif
1928
1929 if( ::arg().mustDo("non-local-bind") )
1930 Utility::setBindAny(AF_INET, fd);
1931
1932 #ifdef SO_REUSEPORT
1933 if(g_reusePort) {
1934 if(setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &tmp, sizeof(tmp)) < 0)
1935 throw PDNSException("SO_REUSEPORT: "+stringerror());
1936 }
1937 #endif
1938
1939 if (::arg().asNum("tcp-fast-open") > 0) {
1940 #ifdef TCP_FASTOPEN
1941 int fastOpenQueueSize = ::arg().asNum("tcp-fast-open");
1942 if (setsockopt(fd, IPPROTO_TCP, TCP_FASTOPEN, &fastOpenQueueSize, sizeof fastOpenQueueSize) < 0) {
1943 L<<Logger::Error<<"Failed to enable TCP Fast Open for listening socket: "<<strerror(errno)<<endl;
1944 }
1945 #else
1946 L<<Logger::Warning<<"TCP Fast Open configured but not supported for listening socket"<<endl;
1947 #endif
1948 }
1949
1950 sin.sin4.sin_port = htons(st.port);
1951 socklen_t socklen=sin.sin4.sin_family==AF_INET ? sizeof(sin.sin4) : sizeof(sin.sin6);
1952 if (::bind(fd, (struct sockaddr *)&sin, socklen )<0)
1953 throw PDNSException("Binding TCP server socket for "+ st.host +": "+stringerror());
1954
1955 setNonBlocking(fd);
1956 setSocketSendBuffer(fd, 65000);
1957 listen(fd, 128);
1958 deferredAdds[threadId].push_back(make_pair(fd, handleNewTCPQuestion));
1959 g_tcpListenSockets.push_back(fd);
1960 // we don't need to update g_listenSocketsAddresses since it doesn't work for TCP/IP:
1961 // - fd is not that which we know here, but returned from accept()
1962 if(sin.sin4.sin_family == AF_INET)
1963 L<<Logger::Error<<"Listening for TCP queries on "<< sin.toString() <<":"<<st.port<<endl;
1964 else
1965 L<<Logger::Error<<"Listening for TCP queries on ["<< sin.toString() <<"]:"<<st.port<<endl;
1966 }
1967 }
1968
1969 static void makeUDPServerSockets(unsigned int threadId)
1970 {
1971 int one=1;
1972 vector<string>locals;
1973 stringtok(locals,::arg()["local-address"]," ,");
1974
1975 if(locals.empty())
1976 throw PDNSException("No local address specified");
1977
1978 for(vector<string>::const_iterator i=locals.begin();i!=locals.end();++i) {
1979 ServiceTuple st;
1980 st.port=::arg().asNum("local-port");
1981 parseService(*i, st);
1982
1983 ComboAddress sin;
1984
1985 memset(&sin, 0, sizeof(sin));
1986 sin.sin4.sin_family = AF_INET;
1987 if(!IpToU32(st.host.c_str() , (uint32_t*)&sin.sin4.sin_addr.s_addr)) {
1988 sin.sin6.sin6_family = AF_INET6;
1989 if(makeIPv6sockaddr(st.host, &sin.sin6) < 0)
1990 throw PDNSException("Unable to resolve local address for UDP server on '"+ st.host +"'");
1991 }
1992
1993 int fd=socket(sin.sin4.sin_family, SOCK_DGRAM, 0);
1994 if(fd < 0) {
1995 throw PDNSException("Making a UDP server socket for resolver: "+netstringerror());
1996 }
1997 if (!setSocketTimestamps(fd))
1998 L<<Logger::Warning<<"Unable to enable timestamp reporting for socket"<<endl;
1999
2000 if(IsAnyAddress(sin)) {
2001 if(sin.sin4.sin_family == AF_INET)
2002 if(!setsockopt(fd, IPPROTO_IP, GEN_IP_PKTINFO, &one, sizeof(one))) // linux supports this, so why not - might fail on other systems
2003 g_fromtosockets.insert(fd);
2004 #ifdef IPV6_RECVPKTINFO
2005 if(sin.sin4.sin_family == AF_INET6)
2006 if(!setsockopt(fd, IPPROTO_IPV6, IPV6_RECVPKTINFO, &one, sizeof(one)))
2007 g_fromtosockets.insert(fd);
2008 #endif
2009 if(sin.sin6.sin6_family == AF_INET6 && setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &one, sizeof(one)) < 0) {
2010 L<<Logger::Error<<"Failed to set IPv6 socket to IPv6 only, continuing anyhow: "<<strerror(errno)<<endl;
2011 }
2012 }
2013 if( ::arg().mustDo("non-local-bind") )
2014 Utility::setBindAny(AF_INET6, fd);
2015
2016 setCloseOnExec(fd);
2017
2018 setSocketReceiveBuffer(fd, 250000);
2019 sin.sin4.sin_port = htons(st.port);
2020
2021
2022 #ifdef SO_REUSEPORT
2023 if(g_reusePort) {
2024 if(setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one)) < 0)
2025 throw PDNSException("SO_REUSEPORT: "+stringerror());
2026 }
2027 #endif
2028 socklen_t socklen=sin.getSocklen();
2029 if (::bind(fd, (struct sockaddr *)&sin, socklen)<0)
2030 throw PDNSException("Resolver binding to server socket on port "+ std::to_string(st.port) +" for "+ st.host+": "+stringerror());
2031
2032 setNonBlocking(fd);
2033
2034 deferredAdds[threadId].push_back(make_pair(fd, handleNewUDPQuestion));
2035 g_listenSocketsAddresses[fd]=sin; // this is written to only from the startup thread, not from the workers
2036 if(sin.sin4.sin_family == AF_INET)
2037 L<<Logger::Error<<"Listening for UDP queries on "<< sin.toString() <<":"<<st.port<<endl;
2038 else
2039 L<<Logger::Error<<"Listening for UDP queries on ["<< sin.toString() <<"]:"<<st.port<<endl;
2040 }
2041 }
2042
2043 static void daemonize(void)
2044 {
2045 if(fork())
2046 exit(0); // bye bye
2047
2048 setsid();
2049
2050 int i=open("/dev/null",O_RDWR); /* open stdin */
2051 if(i < 0)
2052 L<<Logger::Critical<<"Unable to open /dev/null: "<<stringerror()<<endl;
2053 else {
2054 dup2(i,0); /* stdin */
2055 dup2(i,1); /* stderr */
2056 dup2(i,2); /* stderr */
2057 close(i);
2058 }
2059 }
2060
2061 static void usr1Handler(int)
2062 {
2063 statsWanted=true;
2064 }
2065
2066 static void usr2Handler(int)
2067 {
2068 g_quiet= !g_quiet;
2069 SyncRes::setDefaultLogMode(g_quiet ? SyncRes::LogNone : SyncRes::Log);
2070 ::arg().set("quiet")=g_quiet ? "" : "no";
2071 }
2072
2073 static void doStats(void)
2074 {
2075 static time_t lastOutputTime;
2076 static uint64_t lastQueryCount;
2077
2078 uint64_t cacheHits = broadcastAccFunction<uint64_t>(pleaseGetCacheHits);
2079 uint64_t cacheMisses = broadcastAccFunction<uint64_t>(pleaseGetCacheMisses);
2080
2081 if(g_stats.qcounter && (cacheHits + cacheMisses) && SyncRes::s_queries && SyncRes::s_outqueries) {
2082 L<<Logger::Notice<<"stats: "<<g_stats.qcounter<<" questions, "<<
2083 broadcastAccFunction<uint64_t>(pleaseGetCacheSize)<< " cache entries, "<<
2084 broadcastAccFunction<uint64_t>(pleaseGetNegCacheSize)<<" negative entries, "<<
2085 (int)((cacheHits*100.0)/(cacheHits+cacheMisses))<<"% cache hits"<<endl;
2086
2087 L<<Logger::Notice<<"stats: throttle map: "
2088 << broadcastAccFunction<uint64_t>(pleaseGetThrottleSize) <<", ns speeds: "
2089 << broadcastAccFunction<uint64_t>(pleaseGetNsSpeedsSize)<<endl;
2090 L<<Logger::Notice<<"stats: outpacket/query ratio "<<(int)(SyncRes::s_outqueries*100.0/SyncRes::s_queries)<<"%";
2091 L<<Logger::Notice<<", "<<(int)(SyncRes::s_throttledqueries*100.0/(SyncRes::s_outqueries+SyncRes::s_throttledqueries))<<"% throttled, "
2092 <<SyncRes::s_nodelegated<<" no-delegation drops"<<endl;
2093 L<<Logger::Notice<<"stats: "<<SyncRes::s_tcpoutqueries<<" outgoing tcp connections, "<<
2094 broadcastAccFunction<uint64_t>(pleaseGetConcurrentQueries)<<" queries running, "<<SyncRes::s_outgoingtimeouts<<" outgoing timeouts"<<endl;
2095
2096 //L<<Logger::Notice<<"stats: "<<g_stats.ednsPingMatches<<" ping matches, "<<g_stats.ednsPingMismatches<<" mismatches, "<<
2097 //g_stats.noPingOutQueries<<" outqueries w/o ping, "<< g_stats.noEdnsOutQueries<<" w/o EDNS"<<endl;
2098
2099 L<<Logger::Notice<<"stats: " << broadcastAccFunction<uint64_t>(pleaseGetPacketCacheSize) <<
2100 " packet cache entries, "<<(int)(100.0*broadcastAccFunction<uint64_t>(pleaseGetPacketCacheHits)/SyncRes::s_queries) << "% packet cache hits"<<endl;
2101
2102 time_t now = time(0);
2103 if(lastOutputTime && lastQueryCount && now != lastOutputTime) {
2104 L<<Logger::Notice<<"stats: "<< (SyncRes::s_queries - lastQueryCount) / (now - lastOutputTime) <<" qps (average over "<< (now - lastOutputTime) << " seconds)"<<endl;
2105 }
2106 lastOutputTime = now;
2107 lastQueryCount = SyncRes::s_queries;
2108 }
2109 else if(statsWanted)
2110 L<<Logger::Notice<<"stats: no stats yet!"<<endl;
2111
2112 statsWanted=false;
2113 }
2114
2115 static void houseKeeping(void *)
2116 {
2117 static thread_local time_t last_stat, last_rootupdate, last_prune, last_secpoll;
2118 static thread_local int cleanCounter=0;
2119 static thread_local bool s_running; // houseKeeping can get suspended in secpoll, and be restarted, which makes us do duplicate work
2120 try {
2121 if(s_running)
2122 return;
2123 s_running=true;
2124
2125 struct timeval now;
2126 Utility::gettimeofday(&now, 0);
2127
2128 if(now.tv_sec - last_prune > (time_t)(5 + t_id)) {
2129 DTime dt;
2130 dt.setTimeval(now);
2131 t_RC->doPrune(g_maxCacheEntries / g_numThreads); // this function is local to a thread, so fine anyhow
2132 t_packetCache->doPruneTo(g_maxPacketCacheEntries / g_numWorkerThreads);
2133
2134 SyncRes::pruneNegCache(g_maxCacheEntries / (g_numWorkerThreads * 10));
2135
2136 if(!((cleanCounter++)%40)) { // this is a full scan!
2137 time_t limit=now.tv_sec-300;
2138 SyncRes::pruneNSSpeeds(limit);
2139 }
2140 last_prune=time(0);
2141 }
2142
2143 if(now.tv_sec - last_rootupdate > 7200) {
2144 int res = SyncRes::getRootNS(g_now, nullptr);
2145 if (!res)
2146 last_rootupdate=now.tv_sec;
2147 }
2148
2149 if(!t_id) {
2150 if(g_statisticsInterval > 0 && now.tv_sec - last_stat >= g_statisticsInterval) {
2151 doStats();
2152 last_stat=time(0);
2153 }
2154
2155 if(now.tv_sec - last_secpoll >= 3600) {
2156 try {
2157 doSecPoll(&last_secpoll);
2158 }
2159 catch(...) {}
2160 }
2161 }
2162 s_running=false;
2163 }
2164 catch(PDNSException& ae)
2165 {
2166 s_running=false;
2167 L<<Logger::Error<<"Fatal error in housekeeping thread: "<<ae.reason<<endl;
2168 throw;
2169 }
2170 }
2171
2172 static void makeThreadPipes()
2173 {
2174 for(unsigned int n=0; n < g_numThreads; ++n) {
2175 struct ThreadPipeSet tps;
2176 int fd[2];
2177 if(pipe(fd) < 0)
2178 unixDie("Creating pipe for inter-thread communications");
2179
2180 tps.readToThread = fd[0];
2181 tps.writeToThread = fd[1];
2182
2183 if(pipe(fd) < 0)
2184 unixDie("Creating pipe for inter-thread communications");
2185 tps.readFromThread = fd[0];
2186 tps.writeFromThread = fd[1];
2187
2188 g_pipes.push_back(tps);
2189 }
2190 }
2191
2192 struct ThreadMSG
2193 {
2194 pipefunc_t func;
2195 bool wantAnswer;
2196 };
2197
2198 void broadcastFunction(const pipefunc_t& func, bool skipSelf)
2199 {
2200 unsigned int n = 0;
2201 for(ThreadPipeSet& tps : g_pipes)
2202 {
2203 if(n++ == t_id) {
2204 if(!skipSelf)
2205 func(); // don't write to ourselves!
2206 continue;
2207 }
2208
2209 ThreadMSG* tmsg = new ThreadMSG();
2210 tmsg->func = func;
2211 tmsg->wantAnswer = true;
2212 if(write(tps.writeToThread, &tmsg, sizeof(tmsg)) != sizeof(tmsg)) {
2213 delete tmsg;
2214 unixDie("write to thread pipe returned wrong size or error");
2215 }
2216
2217 string* resp;
2218 if(read(tps.readFromThread, &resp, sizeof(resp)) != sizeof(resp))
2219 unixDie("read from thread pipe returned wrong size or error");
2220
2221 if(resp) {
2222 // cerr <<"got response: " << *resp << endl;
2223 delete resp;
2224 }
2225 }
2226 }
2227
2228 void distributeAsyncFunction(const string& packet, const pipefunc_t& func)
2229 {
2230 unsigned int hash = hashQuestion(packet.c_str(), packet.length(), g_disthashseed);
2231 unsigned int target = 1 + (hash % (g_pipes.size()-1));
2232
2233 if(target == t_id) {
2234 func();
2235 return;
2236 }
2237 ThreadPipeSet& tps = g_pipes[target];
2238 ThreadMSG* tmsg = new ThreadMSG();
2239 tmsg->func = func;
2240 tmsg->wantAnswer = false;
2241
2242 if(write(tps.writeToThread, &tmsg, sizeof(tmsg)) != sizeof(tmsg)) {
2243 delete tmsg;
2244 unixDie("write to thread pipe returned wrong size or error");
2245 }
2246 }
2247
2248 static void handlePipeRequest(int fd, FDMultiplexer::funcparam_t& var)
2249 {
2250 ThreadMSG* tmsg = nullptr;
2251
2252 if(read(fd, &tmsg, sizeof(tmsg)) != sizeof(tmsg)) { // fd == readToThread
2253 unixDie("read from thread pipe returned wrong size or error");
2254 }
2255
2256 void *resp=0;
2257 try {
2258 resp = tmsg->func();
2259 }
2260 catch(std::exception& e) {
2261 if(g_logCommonErrors)
2262 L<<Logger::Error<<"PIPE function we executed created exception: "<<e.what()<<endl; // but what if they wanted an answer.. we send 0
2263 }
2264 catch(PDNSException& e) {
2265 if(g_logCommonErrors)
2266 L<<Logger::Error<<"PIPE function we executed created PDNS exception: "<<e.reason<<endl; // but what if they wanted an answer.. we send 0
2267 }
2268 if(tmsg->wantAnswer) {
2269 if(write(g_pipes[t_id].writeFromThread, &resp, sizeof(resp)) != sizeof(resp)) {
2270 delete tmsg;
2271 unixDie("write to thread pipe returned wrong size or error");
2272 }
2273 }
2274
2275 delete tmsg;
2276 }
2277
2278 template<class T> void *voider(const boost::function<T*()>& func)
2279 {
2280 return func();
2281 }
2282
2283 vector<ComboAddress>& operator+=(vector<ComboAddress>&a, const vector<ComboAddress>& b)
2284 {
2285 a.insert(a.end(), b.begin(), b.end());
2286 return a;
2287 }
2288
2289 vector<pair<string, uint16_t> >& operator+=(vector<pair<string, uint16_t> >&a, const vector<pair<string, uint16_t> >& b)
2290 {
2291 a.insert(a.end(), b.begin(), b.end());
2292 return a;
2293 }
2294
2295 vector<pair<DNSName, uint16_t> >& operator+=(vector<pair<DNSName, uint16_t> >&a, const vector<pair<DNSName, uint16_t> >& b)
2296 {
2297 a.insert(a.end(), b.begin(), b.end());
2298 return a;
2299 }
2300
2301
2302 template<class T> T broadcastAccFunction(const boost::function<T*()>& func, bool skipSelf)
2303 {
2304 unsigned int n = 0;
2305 T ret=T();
2306 for(ThreadPipeSet& tps : g_pipes)
2307 {
2308 if(n++ == t_id) {
2309 if(!skipSelf) {
2310 T* resp = (T*)func(); // don't write to ourselves!
2311 if(resp) {
2312 //~ cerr <<"got direct: " << *resp << endl;
2313 ret += *resp;
2314 delete resp;
2315 }
2316 }
2317 continue;
2318 }
2319
2320 ThreadMSG* tmsg = new ThreadMSG();
2321 tmsg->func = boost::bind(voider<T>, func);
2322 tmsg->wantAnswer = true;
2323
2324 if(write(tps.writeToThread, &tmsg, sizeof(tmsg)) != sizeof(tmsg)) {
2325 delete tmsg;
2326 unixDie("write to thread pipe returned wrong size or error");
2327 }
2328
2329 T* resp;
2330 if(read(tps.readFromThread, &resp, sizeof(resp)) != sizeof(resp))
2331 unixDie("read from thread pipe returned wrong size or error");
2332
2333 if(resp) {
2334 //~ cerr <<"got response: " << *resp << endl;
2335 ret += *resp;
2336 delete resp;
2337 }
2338 }
2339 return ret;
2340 }
2341
2342 template string broadcastAccFunction(const boost::function<string*()>& fun, bool skipSelf); // explicit instantiation
2343 template uint64_t broadcastAccFunction(const boost::function<uint64_t*()>& fun, bool skipSelf); // explicit instantiation
2344 template vector<ComboAddress> broadcastAccFunction(const boost::function<vector<ComboAddress> *()>& fun, bool skipSelf); // explicit instantiation
2345 template vector<pair<DNSName,uint16_t> > broadcastAccFunction(const boost::function<vector<pair<DNSName, uint16_t> > *()>& fun, bool skipSelf); // explicit instantiation
2346
2347 static void handleRCC(int fd, FDMultiplexer::funcparam_t& var)
2348 {
2349 string remote;
2350 string msg=s_rcc.recv(&remote);
2351 RecursorControlParser rcp;
2352 RecursorControlParser::func_t* command;
2353
2354 string answer=rcp.getAnswer(msg, &command);
2355
2356 // If we are inside a chroot, we need to strip
2357 if (!arg()["chroot"].empty()) {
2358 size_t len = arg()["chroot"].length();
2359 remote = remote.substr(len);
2360 }
2361
2362 try {
2363 s_rcc.send(answer, &remote);
2364 command();
2365 }
2366 catch(std::exception& e) {
2367 L<<Logger::Error<<"Error dealing with control socket request: "<<e.what()<<endl;
2368 }
2369 catch(PDNSException& ae) {
2370 L<<Logger::Error<<"Error dealing with control socket request: "<<ae.reason<<endl;
2371 }
2372 }
2373
2374 static void handleTCPClientReadable(int fd, FDMultiplexer::funcparam_t& var)
2375 {
2376 PacketID* pident=any_cast<PacketID>(&var);
2377 // cerr<<"handleTCPClientReadable called for fd "<<fd<<", pident->inNeeded: "<<pident->inNeeded<<", "<<pident->sock->getHandle()<<endl;
2378
2379 shared_array<char> buffer(new char[pident->inNeeded]);
2380
2381 ssize_t ret=recv(fd, buffer.get(), pident->inNeeded,0);
2382 if(ret > 0) {
2383 pident->inMSG.append(&buffer[0], &buffer[ret]);
2384 pident->inNeeded-=(size_t)ret;
2385 if(!pident->inNeeded || pident->inIncompleteOkay) {
2386 // cerr<<"Got entire load of "<<pident->inMSG.size()<<" bytes"<<endl;
2387 PacketID pid=*pident;
2388 string msg=pident->inMSG;
2389
2390 t_fdm->removeReadFD(fd);
2391 MT->sendEvent(pid, &msg);
2392 }
2393 else {
2394 // cerr<<"Still have "<<pident->inNeeded<<" left to go"<<endl;
2395 }
2396 }
2397 else {
2398 PacketID tmp=*pident;
2399 t_fdm->removeReadFD(fd); // pident might now be invalid (it isn't, but still)
2400 string empty;
2401 MT->sendEvent(tmp, &empty); // this conveys error status
2402 }
2403 }
2404
2405 static void handleTCPClientWritable(int fd, FDMultiplexer::funcparam_t& var)
2406 {
2407 PacketID* pid=any_cast<PacketID>(&var);
2408 ssize_t ret=send(fd, pid->outMSG.c_str() + pid->outPos, pid->outMSG.size() - pid->outPos,0);
2409 if(ret > 0) {
2410 pid->outPos+=(ssize_t)ret;
2411 if(pid->outPos==pid->outMSG.size()) {
2412 PacketID tmp=*pid;
2413 t_fdm->removeWriteFD(fd);
2414 MT->sendEvent(tmp, &tmp.outMSG); // send back what we sent to convey everything is ok
2415 }
2416 }
2417 else { // error or EOF
2418 PacketID tmp(*pid);
2419 t_fdm->removeWriteFD(fd);
2420 string sent;
2421 MT->sendEvent(tmp, &sent); // we convey error status by sending empty string
2422 }
2423 }
2424
2425 // resend event to everybody chained onto it
2426 static void doResends(MT_t::waiters_t::iterator& iter, PacketID resend, const string& content)
2427 {
2428 if(iter->key.chain.empty())
2429 return;
2430 // cerr<<"doResends called!\n";
2431 for(PacketID::chain_t::iterator i=iter->key.chain.begin(); i != iter->key.chain.end() ; ++i) {
2432 resend.fd=-1;
2433 resend.id=*i;
2434 // cerr<<"\tResending "<<content.size()<<" bytes for fd="<<resend.fd<<" and id="<<resend.id<<endl;
2435
2436 MT->sendEvent(resend, &content);
2437 g_stats.chainResends++;
2438 }
2439 }
2440
2441 static void handleUDPServerResponse(int fd, FDMultiplexer::funcparam_t& var)
2442 {
2443 PacketID pid=any_cast<PacketID>(var);
2444 ssize_t len;
2445 char data[g_outgoingEDNSBufsize];
2446 ComboAddress fromaddr;
2447 socklen_t addrlen=sizeof(fromaddr);
2448
2449 len=recvfrom(fd, data, sizeof(data), 0, (sockaddr *)&fromaddr, &addrlen);
2450
2451 if(len < (ssize_t) sizeof(dnsheader)) {
2452 if(len < 0)
2453 ; // cerr<<"Error on fd "<<fd<<": "<<stringerror()<<"\n";
2454 else {
2455 g_stats.serverParseError++;
2456 if(g_logCommonErrors)
2457 L<<Logger::Error<<"Unable to parse packet from remote UDP server "<< fromaddr.toString() <<
2458 ": packet smaller than DNS header"<<endl;
2459 }
2460
2461 t_udpclientsocks->returnSocket(fd);
2462 string empty;
2463
2464 MT_t::waiters_t::iterator iter=MT->d_waiters.find(pid);
2465 if(iter != MT->d_waiters.end())
2466 doResends(iter, pid, empty);
2467
2468 MT->sendEvent(pid, &empty); // this denotes error (does lookup again.. at least L1 will be hot)
2469 return;
2470 }
2471
2472 dnsheader dh;
2473 memcpy(&dh, data, sizeof(dh));
2474
2475 PacketID pident;
2476 pident.remote=fromaddr;
2477 pident.id=dh.id;
2478 pident.fd=fd;
2479
2480 if(!dh.qr && g_logCommonErrors) {
2481 L<<Logger::Notice<<"Not taking data from question on outgoing socket from "<< fromaddr.toStringWithPort() <<endl;
2482 }
2483
2484 if(!dh.qdcount || // UPC, Nominum, very old BIND on FormErr, NSD
2485 !dh.qr) { // one weird server
2486 pident.domain.clear();
2487 pident.type = 0;
2488 }
2489 else {
2490 try {
2491 if(len > 12)
2492 pident.domain=DNSName(data, len, 12, false, &pident.type); // don't copy this from above - we need to do the actual read
2493 }
2494 catch(std::exception& e) {
2495 g_stats.serverParseError++; // won't be fed to lwres.cc, so we have to increment
2496 L<<Logger::Warning<<"Error in packet from remote nameserver "<< fromaddr.toStringWithPort() << ": "<<e.what() << endl;
2497 return;
2498 }
2499 }
2500 string packet;
2501 packet.assign(data, len);
2502
2503 MT_t::waiters_t::iterator iter=MT->d_waiters.find(pident);
2504 if(iter != MT->d_waiters.end()) {
2505 doResends(iter, pident, packet);
2506 }
2507
2508 retryWithName:
2509
2510 if(!MT->sendEvent(pident, &packet)) {
2511 // we do a full scan for outstanding queries on unexpected answers. not too bad since we only accept them on the right port number, which is hard enough to guess
2512 for(MT_t::waiters_t::iterator mthread=MT->d_waiters.begin(); mthread!=MT->d_waiters.end(); ++mthread) {
2513 if(pident.fd==mthread->key.fd && mthread->key.remote==pident.remote && mthread->key.type == pident.type &&
2514 pident.domain == mthread->key.domain) {
2515 mthread->key.nearMisses++;
2516 }
2517
2518 // be a bit paranoid here since we're weakening our matching
2519 if(pident.domain.empty() && !mthread->key.domain.empty() && !pident.type && mthread->key.type &&
2520 pident.id == mthread->key.id && mthread->key.remote == pident.remote) {
2521 // cerr<<"Empty response, rest matches though, sending to a waiter"<<endl;
2522 pident.domain = mthread->key.domain;
2523 pident.type = mthread->key.type;
2524 goto retryWithName; // note that this only passes on an error, lwres will still reject the packet
2525 }
2526 }
2527 g_stats.unexpectedCount++; // if we made it here, it really is an unexpected answer
2528 if(g_logCommonErrors) {
2529 L<<Logger::Warning<<"Discarding unexpected packet from "<<fromaddr.toStringWithPort()<<": "<< (pident.domain.empty() ? "<empty>" : pident.domain.toString())<<", "<<pident.type<<", "<<MT->d_waiters.size()<<" waiters"<<endl;
2530 }
2531 }
2532 else if(fd >= 0) {
2533 t_udpclientsocks->returnSocket(fd);
2534 }
2535 }
2536
2537 FDMultiplexer* getMultiplexer()
2538 {
2539 FDMultiplexer* ret;
2540 for(const auto& i : FDMultiplexer::getMultiplexerMap()) {
2541 try {
2542 ret=i.second();
2543 return ret;
2544 }
2545 catch(FDMultiplexerException &fe) {
2546 L<<Logger::Error<<"Non-fatal error initializing possible multiplexer ("<<fe.what()<<"), falling back"<<endl;
2547 }
2548 catch(...) {
2549 L<<Logger::Error<<"Non-fatal error initializing possible multiplexer"<<endl;
2550 }
2551 }
2552 L<<Logger::Error<<"No working multiplexer found!"<<endl;
2553 exit(1);
2554 }
2555
2556
2557 static string* doReloadLuaScript()
2558 {
2559 string fname= ::arg()["lua-dns-script"];
2560 try {
2561 if(fname.empty()) {
2562 t_pdl.reset();
2563 L<<Logger::Error<<t_id<<" Unloaded current lua script"<<endl;
2564 return new string("unloaded\n");
2565 }
2566 else {
2567 t_pdl = std::make_shared<RecursorLua4>(fname);
2568 }
2569 }
2570 catch(std::exception& e) {
2571 L<<Logger::Error<<t_id<<" Retaining current script, error from '"<<fname<<"': "<< e.what() <<endl;
2572 return new string("retaining current script, error from '"+fname+"': "+e.what()+"\n");
2573 }
2574
2575 L<<Logger::Warning<<t_id<<" (Re)loaded lua script from '"<<fname<<"'"<<endl;
2576 return new string("(re)loaded '"+fname+"'\n");
2577 }
2578
2579 string doQueueReloadLuaScript(vector<string>::const_iterator begin, vector<string>::const_iterator end)
2580 {
2581 if(begin != end)
2582 ::arg().set("lua-dns-script") = *begin;
2583
2584 return broadcastAccFunction<string>(doReloadLuaScript);
2585 }
2586
2587 static string* pleaseUseNewTraceRegex(const std::string& newRegex)
2588 try
2589 {
2590 if(newRegex.empty()) {
2591 t_traceRegex.reset();
2592 return new string("unset\n");
2593 }
2594 else {
2595 t_traceRegex = std::make_shared<Regex>(newRegex);
2596 return new string("ok\n");
2597 }
2598 }
2599 catch(PDNSException& ae)
2600 {
2601 return new string(ae.reason+"\n");
2602 }
2603
2604 string doTraceRegex(vector<string>::const_iterator begin, vector<string>::const_iterator end)
2605 {
2606 return broadcastAccFunction<string>(boost::bind(pleaseUseNewTraceRegex, begin!=end ? *begin : ""));
2607 }
2608
2609 static void checkLinuxIPv6Limits()
2610 {
2611 #ifdef __linux__
2612 string line;
2613 if(readFileIfThere("/proc/sys/net/ipv6/route/max_size", &line)) {
2614 int lim=std::stoi(line);
2615 if(lim < 16384) {
2616 L<<Logger::Error<<"If using IPv6, please raise sysctl net.ipv6.route.max_size, currently set to "<<lim<<" which is < 16384"<<endl;
2617 }
2618 }
2619 #endif
2620 }
2621 static void checkOrFixFDS()
2622 {
2623 unsigned int availFDs=getFilenumLimit();
2624 unsigned int wantFDs = g_maxMThreads * g_numWorkerThreads +25; // even healthier margin then before
2625
2626 if(wantFDs > availFDs) {
2627 unsigned int hardlimit= getFilenumLimit(true);
2628 if(hardlimit >= wantFDs) {
2629 setFilenumLimit(wantFDs);
2630 L<<Logger::Warning<<"Raised soft limit on number of filedescriptors to "<<wantFDs<<" to match max-mthreads and threads settings"<<endl;
2631 }
2632 else {
2633 int newval = (hardlimit - 25) / g_numWorkerThreads;
2634 L<<Logger::Warning<<"Insufficient number of filedescriptors available for max-mthreads*threads setting! ("<<hardlimit<<" < "<<wantFDs<<"), reducing max-mthreads to "<<newval<<endl;
2635 g_maxMThreads = newval;
2636 setFilenumLimit(hardlimit);
2637 }
2638 }
2639 }
2640
2641 static void* recursorThread(void*);
2642
2643 static void* pleaseSupplantACLs(std::shared_ptr<NetmaskGroup> ng)
2644 {
2645 t_allowFrom = ng;
2646 return nullptr;
2647 }
2648
2649 int g_argc;
2650 char** g_argv;
2651
2652 void parseACLs()
2653 {
2654 static bool l_initialized;
2655
2656 if(l_initialized) { // only reload configuration file on second call
2657 string configname=::arg()["config-dir"]+"/recursor.conf";
2658 cleanSlashes(configname);
2659
2660 if(!::arg().preParseFile(configname.c_str(), "allow-from-file"))
2661 throw runtime_error("Unable to re-parse configuration file '"+configname+"'");
2662 ::arg().preParseFile(configname.c_str(), "allow-from", LOCAL_NETS);
2663 ::arg().preParseFile(configname.c_str(), "include-dir");
2664 ::arg().preParse(g_argc, g_argv, "include-dir");
2665
2666 // then process includes
2667 std::vector<std::string> extraConfigs;
2668 ::arg().gatherIncludes(extraConfigs);
2669
2670 for(const std::string& fn : extraConfigs) {
2671 if(!::arg().preParseFile(fn.c_str(), "allow-from-file", ::arg()["allow-from-file"]))
2672 throw runtime_error("Unable to re-parse configuration file include '"+fn+"'");
2673 if(!::arg().preParseFile(fn.c_str(), "allow-from", ::arg()["allow-from"]))
2674 throw runtime_error("Unable to re-parse configuration file include '"+fn+"'");
2675 }
2676
2677 ::arg().preParse(g_argc, g_argv, "allow-from-file");
2678 ::arg().preParse(g_argc, g_argv, "allow-from");
2679 }
2680
2681 std::shared_ptr<NetmaskGroup> oldAllowFrom = t_allowFrom;
2682 std::shared_ptr<NetmaskGroup> allowFrom = std::make_shared<NetmaskGroup>();
2683
2684 if(!::arg()["allow-from-file"].empty()) {
2685 string line;
2686 ifstream ifs(::arg()["allow-from-file"].c_str());
2687 if(!ifs) {
2688 throw runtime_error("Could not open '"+::arg()["allow-from-file"]+"': "+stringerror());
2689 }
2690
2691 string::size_type pos;
2692 while(getline(ifs,line)) {
2693 pos=line.find('#');
2694 if(pos!=string::npos)
2695 line.resize(pos);
2696 trim(line);
2697 if(line.empty())
2698 continue;
2699
2700 allowFrom->addMask(line);
2701 }
2702 L<<Logger::Warning<<"Done parsing " << allowFrom->size() <<" allow-from ranges from file '"<<::arg()["allow-from-file"]<<"' - overriding 'allow-from' setting"<<endl;
2703 }
2704 else if(!::arg()["allow-from"].empty()) {
2705 vector<string> ips;
2706 stringtok(ips, ::arg()["allow-from"], ", ");
2707
2708 L<<Logger::Warning<<"Only allowing queries from: ";
2709 for(vector<string>::const_iterator i = ips.begin(); i!= ips.end(); ++i) {
2710 allowFrom->addMask(*i);
2711 if(i!=ips.begin())
2712 L<<Logger::Warning<<", ";
2713 L<<Logger::Warning<<*i;
2714 }
2715 L<<Logger::Warning<<endl;
2716 }
2717 else {
2718 if(::arg()["local-address"]!="127.0.0.1" && ::arg().asNum("local-port")==53)
2719 L<<Logger::Error<<"WARNING: Allowing queries from all IP addresses - this can be a security risk!"<<endl;
2720 allowFrom = nullptr;
2721 }
2722
2723 g_initialAllowFrom = allowFrom;
2724 broadcastFunction(boost::bind(pleaseSupplantACLs, allowFrom));
2725 oldAllowFrom = nullptr;
2726
2727 l_initialized = true;
2728 }
2729
2730
2731 static void setupDelegationOnly()
2732 {
2733 vector<string> parts;
2734 stringtok(parts, ::arg()["delegation-only"], ", \t");
2735 for(const auto& p : parts) {
2736 SyncRes::addDelegationOnly(DNSName(p));
2737 }
2738 }
2739
2740 static std::map<unsigned int, std::set<int> > parseCPUMap()
2741 {
2742 std::map<unsigned int, std::set<int> > result;
2743
2744 const std::string value = ::arg()["cpu-map"];
2745
2746 if (!value.empty() && !isSettingThreadCPUAffinitySupported()) {
2747 L<<Logger::Warning<<"CPU mapping requested but not supported, skipping"<<endl;
2748 return result;
2749 }
2750
2751 std::vector<std::string> parts;
2752
2753 stringtok(parts, value, " \t");
2754
2755 for(const auto& part : parts) {
2756 if (part.find('=') == string::npos)
2757 continue;
2758
2759 try {
2760 auto headers = splitField(part, '=');
2761 trim(headers.first);
2762 trim(headers.second);
2763
2764 unsigned int threadId = pdns_stou(headers.first);
2765 std::vector<std::string> cpus;
2766
2767 stringtok(cpus, headers.second, ",");
2768
2769 for(const auto& cpu : cpus) {
2770 int cpuId = std::stoi(cpu);
2771
2772 result[threadId].insert(cpuId);
2773 }
2774 }
2775 catch(const std::exception& e) {
2776 L<<Logger::Error<<"Error parsing cpu-map entry '"<<part<<"': "<<e.what()<<endl;
2777 }
2778 }
2779
2780 return result;
2781 }
2782
2783 static void setCPUMap(const std::map<unsigned int, std::set<int> >& cpusMap, unsigned int n, pthread_t tid)
2784 {
2785 const auto& cpuMapping = cpusMap.find(n);
2786 if (cpuMapping != cpusMap.cend()) {
2787 int rc = mapThreadToCPUList(tid, cpuMapping->second);
2788 if (rc == 0) {
2789 L<<Logger::Info<<"CPU affinity for worker "<<n<<" has been set to CPU map:";
2790 for (const auto cpu : cpuMapping->second) {
2791 L<<Logger::Info<<" "<<cpu;
2792 }
2793 L<<Logger::Info<<endl;
2794 }
2795 else {
2796 L<<Logger::Warning<<"Error setting CPU affinity for worker "<<n<<" to CPU map:";
2797 for (const auto cpu : cpuMapping->second) {
2798 L<<Logger::Info<<" "<<cpu;
2799 }
2800 L<<Logger::Info<<strerror(rc)<<endl;
2801 }
2802 }
2803 }
2804
2805 static int serviceMain(int argc, char*argv[])
2806 {
2807 L.setName(s_programname);
2808 L.disableSyslog(::arg().mustDo("disable-syslog"));
2809 L.setTimestamps(::arg().mustDo("log-timestamp"));
2810
2811 if(!::arg()["logging-facility"].empty()) {
2812 int val=logFacilityToLOG(::arg().asNum("logging-facility") );
2813 if(val >= 0)
2814 theL().setFacility(val);
2815 else
2816 L<<Logger::Error<<"Unknown logging facility "<<::arg().asNum("logging-facility") <<endl;
2817 }
2818
2819 showProductVersion();
2820 seedRandom(::arg()["entropy-source"]);
2821
2822 g_disthashseed=dns_random(0xffffffff);
2823
2824 checkLinuxIPv6Limits();
2825 try {
2826 vector<string> addrs;
2827 if(!::arg()["query-local-address6"].empty()) {
2828 SyncRes::s_doIPv6=true;
2829 L<<Logger::Warning<<"Enabling IPv6 transport for outgoing queries"<<endl;
2830
2831 stringtok(addrs, ::arg()["query-local-address6"], ", ;");
2832 for(const string& addr : addrs) {
2833 g_localQueryAddresses6.push_back(ComboAddress(addr));
2834 }
2835 }
2836 else {
2837 L<<Logger::Warning<<"NOT using IPv6 for outgoing queries - set 'query-local-address6=::' to enable"<<endl;
2838 }
2839 addrs.clear();
2840 stringtok(addrs, ::arg()["query-local-address"], ", ;");
2841 for(const string& addr : addrs) {
2842 g_localQueryAddresses4.push_back(ComboAddress(addr));
2843 }
2844 }
2845 catch(std::exception& e) {
2846 L<<Logger::Error<<"Assigning local query addresses: "<<e.what();
2847 exit(99);
2848 }
2849
2850 // keep this ABOVE loadRecursorLuaConfig!
2851 if(::arg()["dnssec"]=="off")
2852 g_dnssecmode=DNSSECMode::Off;
2853 else if(::arg()["dnssec"]=="process-no-validate")
2854 g_dnssecmode=DNSSECMode::ProcessNoValidate;
2855 else if(::arg()["dnssec"]=="process")
2856 g_dnssecmode=DNSSECMode::Process;
2857 else if(::arg()["dnssec"]=="validate")
2858 g_dnssecmode=DNSSECMode::ValidateAll;
2859 else if(::arg()["dnssec"]=="log-fail")
2860 g_dnssecmode=DNSSECMode::ValidateForLog;
2861 else {
2862 L<<Logger::Error<<"Unknown DNSSEC mode "<<::arg()["dnssec"]<<endl;
2863 exit(1);
2864 }
2865
2866 g_dnssecLogBogus = ::arg().mustDo("dnssec-log-bogus");
2867 g_maxNSEC3Iterations = ::arg().asNum("nsec3-max-iterations");
2868
2869 g_maxCacheEntries = ::arg().asNum("max-cache-entries");
2870 g_maxPacketCacheEntries = ::arg().asNum("max-packetcache-entries");
2871
2872 try {
2873 loadRecursorLuaConfig(::arg()["lua-config-file"], ::arg().mustDo("daemon"));
2874 }
2875 catch (PDNSException &e) {
2876 L<<Logger::Error<<"Cannot load Lua configuration: "<<e.reason<<endl;
2877 exit(1);
2878 }
2879
2880 parseACLs();
2881 sortPublicSuffixList();
2882
2883 if(!::arg()["dont-query"].empty()) {
2884 vector<string> ips;
2885 stringtok(ips, ::arg()["dont-query"], ", ");
2886 ips.push_back("0.0.0.0");
2887 ips.push_back("::");
2888
2889 L<<Logger::Warning<<"Will not send queries to: ";
2890 for(vector<string>::const_iterator i = ips.begin(); i!= ips.end(); ++i) {
2891 SyncRes::addDontQuery(*i);
2892 if(i!=ips.begin())
2893 L<<Logger::Warning<<", ";
2894 L<<Logger::Warning<<*i;
2895 }
2896 L<<Logger::Warning<<endl;
2897 }
2898
2899 g_quiet=::arg().mustDo("quiet");
2900
2901 g_weDistributeQueries = ::arg().mustDo("pdns-distributes-queries");
2902 if(g_weDistributeQueries) {
2903 L<<Logger::Warning<<"PowerDNS Recursor itself will distribute queries over threads"<<endl;
2904 }
2905
2906 setupDelegationOnly();
2907 g_outgoingEDNSBufsize=::arg().asNum("edns-outgoing-bufsize");
2908
2909 if(::arg()["trace"]=="fail") {
2910 SyncRes::setDefaultLogMode(SyncRes::Store);
2911 }
2912 else if(::arg().mustDo("trace")) {
2913 SyncRes::setDefaultLogMode(SyncRes::Log);
2914 ::arg().set("quiet")="no";
2915 g_quiet=false;
2916 g_dnssecLOG=true;
2917 }
2918
2919 SyncRes::s_minimumTTL = ::arg().asNum("minimum-ttl-override");
2920
2921 SyncRes::s_nopacketcache = ::arg().mustDo("disable-packetcache");
2922
2923 SyncRes::s_maxnegttl=::arg().asNum("max-negative-ttl");
2924 SyncRes::s_maxcachettl=max(::arg().asNum("max-cache-ttl"), 15);
2925 SyncRes::s_packetcachettl=::arg().asNum("packetcache-ttl");
2926 // Cap the packetcache-servfail-ttl to the packetcache-ttl
2927 uint32_t packetCacheServFailTTL = ::arg().asNum("packetcache-servfail-ttl");
2928 SyncRes::s_packetcacheservfailttl=(packetCacheServFailTTL > SyncRes::s_packetcachettl) ? SyncRes::s_packetcachettl : packetCacheServFailTTL;
2929 SyncRes::s_serverdownmaxfails=::arg().asNum("server-down-max-fails");
2930 SyncRes::s_serverdownthrottletime=::arg().asNum("server-down-throttle-time");
2931 SyncRes::s_serverID=::arg()["server-id"];
2932 SyncRes::s_maxqperq=::arg().asNum("max-qperq");
2933 SyncRes::s_maxtotusec=1000*::arg().asNum("max-total-msec");
2934 SyncRes::s_maxdepth=::arg().asNum("max-recursion-depth");
2935 SyncRes::s_rootNXTrust = ::arg().mustDo( "root-nx-trust");
2936 if(SyncRes::s_serverID.empty()) {
2937 char tmp[128];
2938 gethostname(tmp, sizeof(tmp)-1);
2939 SyncRes::s_serverID=tmp;
2940 }
2941
2942 SyncRes::s_ecsipv4limit = ::arg().asNum("ecs-ipv4-bits");
2943 SyncRes::s_ecsipv6limit = ::arg().asNum("ecs-ipv6-bits");
2944
2945 if (!::arg().isEmpty("ecs-scope-zero-address")) {
2946 ComboAddress scopeZero(::arg()["ecs-scope-zero-address"]);
2947 SyncRes::setECSScopeZeroAddress(Netmask(scopeZero, scopeZero.isIPv4() ? 32 : 128));
2948 }
2949 else {
2950 bool found = false;
2951 for (const auto& addr : g_localQueryAddresses4) {
2952 if (!IsAnyAddress(addr)) {
2953 SyncRes::setECSScopeZeroAddress(Netmask(addr, 32));
2954 found = true;
2955 break;
2956 }
2957 }
2958 if (!found) {
2959 for (const auto& addr : g_localQueryAddresses6) {
2960 if (!IsAnyAddress(addr)) {
2961 SyncRes::setECSScopeZeroAddress(Netmask(addr, 128));
2962 found = true;
2963 break;
2964 }
2965 }
2966 if (!found) {
2967 SyncRes::setECSScopeZeroAddress(Netmask("127.0.0.1/32"));
2968 }
2969 }
2970 }
2971
2972 g_networkTimeoutMsec = ::arg().asNum("network-timeout");
2973
2974 g_initialDomainMap = parseAuthAndForwards();
2975
2976 g_latencyStatSize=::arg().asNum("latency-statistic-size");
2977
2978 g_logCommonErrors=::arg().mustDo("log-common-errors");
2979 g_logRPZChanges = ::arg().mustDo("log-rpz-changes");
2980
2981 g_anyToTcp = ::arg().mustDo("any-to-tcp");
2982 g_udpTruncationThreshold = ::arg().asNum("udp-truncation-threshold");
2983
2984 g_lowercaseOutgoing = ::arg().mustDo("lowercase-outgoing");
2985
2986 g_numWorkerThreads = ::arg().asNum("threads");
2987 g_numThreads = g_numWorkerThreads + g_weDistributeQueries;
2988 g_maxMThreads = ::arg().asNum("max-mthreads");
2989
2990 g_gettagNeedsEDNSOptions = ::arg().mustDo("gettag-needs-edns-options");
2991
2992 g_statisticsInterval = ::arg().asNum("statistics-interval");
2993
2994 #ifdef SO_REUSEPORT
2995 g_reusePort = ::arg().mustDo("reuseport");
2996 #endif
2997
2998 g_useOneSocketPerThread = (!g_weDistributeQueries && g_reusePort);
2999
3000 if (g_useOneSocketPerThread) {
3001 for (unsigned int threadId = 0; threadId < g_numWorkerThreads; threadId++) {
3002 makeUDPServerSockets(threadId);
3003 makeTCPServerSockets(threadId);
3004 }
3005 }
3006 else {
3007 makeUDPServerSockets(0);
3008 makeTCPServerSockets(0);
3009 }
3010
3011 SyncRes::parseEDNSSubnetWhitelist(::arg()["edns-subnet-whitelist"]);
3012 g_useIncomingECS = ::arg().mustDo("use-incoming-edns-subnet");
3013
3014 int forks;
3015 for(forks = 0; forks < ::arg().asNum("processes") - 1; ++forks) {
3016 if(!fork()) // we are child
3017 break;
3018 }
3019
3020 if(::arg().mustDo("daemon")) {
3021 L<<Logger::Warning<<"Calling daemonize, going to background"<<endl;
3022 L.toConsole(Logger::Critical);
3023 daemonize();
3024 loadRecursorLuaConfig(::arg()["lua-config-file"], false);
3025 }
3026 signal(SIGUSR1,usr1Handler);
3027 signal(SIGUSR2,usr2Handler);
3028 signal(SIGPIPE,SIG_IGN);
3029
3030 checkOrFixFDS();
3031
3032 #ifdef HAVE_LIBSODIUM
3033 if (sodium_init() == -1) {
3034 L<<Logger::Error<<"Unable to initialize sodium crypto library"<<endl;
3035 exit(99);
3036 }
3037 #endif
3038
3039 openssl_thread_setup();
3040 openssl_seed();
3041
3042 int newgid=0;
3043 if(!::arg()["setgid"].empty())
3044 newgid=Utility::makeGidNumeric(::arg()["setgid"]);
3045 int newuid=0;
3046 if(!::arg()["setuid"].empty())
3047 newuid=Utility::makeUidNumeric(::arg()["setuid"]);
3048
3049 Utility::dropGroupPrivs(newuid, newgid);
3050
3051 if (!::arg()["chroot"].empty()) {
3052 #ifdef HAVE_SYSTEMD
3053 char *ns;
3054 ns = getenv("NOTIFY_SOCKET");
3055 if (ns != nullptr) {
3056 L<<Logger::Error<<"Unable to chroot when running from systemd. Please disable chroot= or set the 'Type' for this service to 'simple'"<<endl;
3057 exit(1);
3058 }
3059 #endif
3060 if (chroot(::arg()["chroot"].c_str())<0 || chdir("/") < 0) {
3061 L<<Logger::Error<<"Unable to chroot to '"+::arg()["chroot"]+"': "<<strerror (errno)<<", exiting"<<endl;
3062 exit(1);
3063 }
3064 else
3065 L<<Logger::Error<<"Chrooted to '"<<::arg()["chroot"]<<"'"<<endl;
3066 }
3067
3068 s_pidfname=::arg()["socket-dir"]+"/"+s_programname+".pid";
3069 if(!s_pidfname.empty())
3070 unlink(s_pidfname.c_str()); // remove possible old pid file
3071 writePid();
3072
3073 makeControlChannelSocket( ::arg().asNum("processes") > 1 ? forks : -1);
3074
3075 Utility::dropUserPrivs(newuid);
3076
3077 makeThreadPipes();
3078
3079 g_tcpTimeout=::arg().asNum("client-tcp-timeout");
3080 g_maxTCPPerClient=::arg().asNum("max-tcp-per-client");
3081 g_tcpMaxQueriesPerConn=::arg().asNum("max-tcp-queries-per-connection");
3082
3083 if (::arg().mustDo("snmp-agent")) {
3084 g_snmpAgent = std::make_shared<RecursorSNMPAgent>("recursor", ::arg()["snmp-master-socket"]);
3085 g_snmpAgent->run();
3086 }
3087
3088 const auto cpusMap = parseCPUMap();
3089 if(g_numThreads == 1) {
3090 L<<Logger::Warning<<"Operating unthreaded"<<endl;
3091 #ifdef HAVE_SYSTEMD
3092 sd_notify(0, "READY=1");
3093 #endif
3094 setCPUMap(cpusMap, 0, pthread_self());
3095 recursorThread(0);
3096 }
3097 else {
3098 pthread_t tid;
3099 L<<Logger::Warning<<"Launching "<< g_numThreads <<" threads"<<endl;
3100 for(unsigned int n=0; n < g_numThreads; ++n) {
3101 pthread_create(&tid, 0, recursorThread, (void*)(long)n);
3102
3103 setCPUMap(cpusMap, n, tid);
3104 }
3105 void* res;
3106 #ifdef HAVE_SYSTEMD
3107 sd_notify(0, "READY=1");
3108 #endif
3109 pthread_join(tid, &res);
3110 }
3111 return 0;
3112 }
3113
3114 static void* recursorThread(void* ptr)
3115 try
3116 {
3117 t_id=(int) (long) ptr;
3118 SyncRes tmp(g_now); // make sure it allocates tsstorage before we do anything, like primeHints or so..
3119 SyncRes::setDomainMap(g_initialDomainMap);
3120 t_allowFrom = g_initialAllowFrom;
3121 t_udpclientsocks = std::unique_ptr<UDPClientSocks>(new UDPClientSocks());
3122 t_tcpClientCounts = std::unique_ptr<tcpClientCounts_t>(new tcpClientCounts_t());
3123 primeHints();
3124
3125 t_packetCache = std::unique_ptr<RecursorPacketCache>(new RecursorPacketCache());
3126
3127 #ifdef HAVE_PROTOBUF
3128 t_uuidGenerator = std::unique_ptr<boost::uuids::random_generator>(new boost::uuids::random_generator());
3129 #endif
3130 L<<Logger::Warning<<"Done priming cache with root hints"<<endl;
3131
3132 try {
3133 if(!::arg()["lua-dns-script"].empty()) {
3134 t_pdl = std::make_shared<RecursorLua4>(::arg()["lua-dns-script"]);
3135 L<<Logger::Warning<<"Loaded 'lua' script from '"<<::arg()["lua-dns-script"]<<"'"<<endl;
3136 }
3137 }
3138 catch(std::exception &e) {
3139 L<<Logger::Error<<"Failed to load 'lua' script from '"<<::arg()["lua-dns-script"]<<"': "<<e.what()<<endl;
3140 _exit(99);
3141 }
3142
3143 unsigned int ringsize=::arg().asNum("stats-ringbuffer-entries") / g_numWorkerThreads;
3144 if(ringsize) {
3145 t_remotes = std::unique_ptr<addrringbuf_t>(new addrringbuf_t());
3146 if(g_weDistributeQueries) // if so, only 1 thread does recvfrom
3147 t_remotes->set_capacity(::arg().asNum("stats-ringbuffer-entries"));
3148 else
3149 t_remotes->set_capacity(ringsize);
3150 t_servfailremotes = std::unique_ptr<addrringbuf_t>(new addrringbuf_t());
3151 t_servfailremotes->set_capacity(ringsize);
3152 t_largeanswerremotes = std::unique_ptr<addrringbuf_t>(new addrringbuf_t());
3153 t_largeanswerremotes->set_capacity(ringsize);
3154
3155 t_queryring = std::unique_ptr<boost::circular_buffer<pair<DNSName, uint16_t> > >(new boost::circular_buffer<pair<DNSName, uint16_t> >());
3156 t_queryring->set_capacity(ringsize);
3157 t_servfailqueryring = std::unique_ptr<boost::circular_buffer<pair<DNSName, uint16_t> > >(new boost::circular_buffer<pair<DNSName, uint16_t> >());
3158 t_servfailqueryring->set_capacity(ringsize);
3159 }
3160
3161 MT=std::unique_ptr<MTasker<PacketID,string> >(new MTasker<PacketID,string>(::arg().asNum("stack-size")));
3162
3163 PacketID pident;
3164
3165 t_fdm=getMultiplexer();
3166 if(!t_id) {
3167 if(::arg().mustDo("webserver")) {
3168 L<<Logger::Warning << "Enabling web server" << endl;
3169 try {
3170 new RecursorWebServer(t_fdm);
3171 }
3172 catch(PDNSException &e) {
3173 L<<Logger::Error<<"Exception: "<<e.reason<<endl;
3174 exit(99);
3175 }
3176 }
3177 L<<Logger::Error<<"Enabled '"<< t_fdm->getName() << "' multiplexer"<<endl;
3178 }
3179
3180 t_fdm->addReadFD(g_pipes[t_id].readToThread, handlePipeRequest);
3181
3182 if(g_useOneSocketPerThread) {
3183 for(deferredAdd_t::const_iterator i = deferredAdds[t_id].cbegin(); i != deferredAdds[t_id].cend(); ++i) {
3184 t_fdm->addReadFD(i->first, i->second);
3185 }
3186 }
3187 else {
3188 if(!g_weDistributeQueries || !t_id) { // if we distribute queries, only t_id = 0 listens
3189 for(deferredAdd_t::const_iterator i = deferredAdds[0].cbegin(); i != deferredAdds[0].cend(); ++i) {
3190 t_fdm->addReadFD(i->first, i->second);
3191 }
3192 }
3193 }
3194
3195 registerAllStats();
3196 if(!t_id) {
3197 t_fdm->addReadFD(s_rcc.d_fd, handleRCC); // control channel
3198 }
3199
3200 unsigned int maxTcpClients=::arg().asNum("max-tcp-clients");
3201
3202 bool listenOnTCP(true);
3203
3204 time_t last_carbon=0;
3205 time_t carbonInterval=::arg().asNum("carbon-interval");
3206 counter.store(0); // used to periodically execute certain tasks
3207 for(;;) {
3208 while(MT->schedule(&g_now)); // MTasker letting the mthreads do their thing
3209
3210 if(!(counter%500)) {
3211 MT->makeThread(houseKeeping, 0);
3212 }
3213
3214 if(!(counter%55)) {
3215 typedef vector<pair<int, FDMultiplexer::funcparam_t> > expired_t;
3216 expired_t expired=t_fdm->getTimeouts(g_now);
3217
3218 for(expired_t::iterator i=expired.begin() ; i != expired.end(); ++i) {
3219 shared_ptr<TCPConnection> conn=any_cast<shared_ptr<TCPConnection> >(i->second);
3220 if(g_logCommonErrors)
3221 L<<Logger::Warning<<"Timeout from remote TCP client "<< conn->d_remote.toString() <<endl;
3222 t_fdm->removeReadFD(i->first);
3223 }
3224 }
3225
3226 counter++;
3227
3228 if(!t_id && statsWanted) {
3229 doStats();
3230 }
3231
3232 Utility::gettimeofday(&g_now, 0);
3233
3234 if(!t_id && (g_now.tv_sec - last_carbon >= carbonInterval)) {
3235 MT->makeThread(doCarbonDump, 0);
3236 last_carbon = g_now.tv_sec;
3237 }
3238
3239 t_fdm->run(&g_now);
3240 // 'run' updates g_now for us
3241
3242 if(!g_weDistributeQueries || !t_id) { // if pdns distributes queries, only tid 0 should do this
3243 if(listenOnTCP) {
3244 if(TCPConnection::getCurrentConnections() > maxTcpClients) { // shutdown, too many connections
3245 for(tcpListenSockets_t::iterator i=g_tcpListenSockets.begin(); i != g_tcpListenSockets.end(); ++i)
3246 t_fdm->removeReadFD(*i);
3247 listenOnTCP=false;
3248 }
3249 }
3250 else {
3251 if(TCPConnection::getCurrentConnections() <= maxTcpClients) { // reenable
3252 for(tcpListenSockets_t::iterator i=g_tcpListenSockets.begin(); i != g_tcpListenSockets.end(); ++i)
3253 t_fdm->addReadFD(*i, handleNewTCPQuestion);
3254 listenOnTCP=true;
3255 }
3256 }
3257 }
3258 }
3259 }
3260 catch(PDNSException &ae) {
3261 L<<Logger::Error<<"Exception: "<<ae.reason<<endl;
3262 return 0;
3263 }
3264 catch(std::exception &e) {
3265 L<<Logger::Error<<"STL Exception: "<<e.what()<<endl;
3266 return 0;
3267 }
3268 catch(...) {
3269 L<<Logger::Error<<"any other exception in main: "<<endl;
3270 return 0;
3271 }
3272
3273
3274 int main(int argc, char **argv)
3275 {
3276 g_argc = argc;
3277 g_argv = argv;
3278 g_stats.startupTime=time(0);
3279 versionSetProduct(ProductRecursor);
3280 reportBasicTypes();
3281 reportOtherTypes();
3282
3283 int ret = EXIT_SUCCESS;
3284
3285 try {
3286 ::arg().set("stack-size","stack size per mthread")="200000";
3287 ::arg().set("soa-minimum-ttl","Don't change")="0";
3288 ::arg().set("no-shuffle","Don't change")="off";
3289 ::arg().set("local-port","port to listen on")="53";
3290 ::arg().set("local-address","IP addresses to listen on, separated by spaces or commas. Also accepts ports.")="127.0.0.1";
3291 ::arg().setSwitch("non-local-bind", "Enable binding to non-local addresses by using FREEBIND / BINDANY socket options")="no";
3292 ::arg().set("trace","if we should output heaps of logging. set to 'fail' to only log failing domains")="off";
3293 ::arg().set("dnssec", "DNSSEC mode: off/process-no-validate (default)/process/log-fail/validate")="process-no-validate";
3294 ::arg().set("dnssec-log-bogus", "Log DNSSEC bogus validations")="no";
3295 ::arg().set("daemon","Operate as a daemon")="no";
3296 ::arg().setSwitch("write-pid","Write a PID file")="yes";
3297 ::arg().set("loglevel","Amount of logging. Higher is more. Do not set below 3")="6";
3298 ::arg().set("disable-syslog","Disable logging to syslog, useful when running inside a supervisor that logs stdout")="no";
3299 ::arg().set("log-timestamp","Print timestamps in log lines, useful to disable when running with a tool that timestamps stdout already")="yes";
3300 ::arg().set("log-common-errors","If we should log rather common errors")="no";
3301 ::arg().set("chroot","switch to chroot jail")="";
3302 ::arg().set("setgid","If set, change group id to this gid for more security")="";
3303 ::arg().set("setuid","If set, change user id to this uid for more security")="";
3304 ::arg().set("network-timeout", "Wait this number of milliseconds for network i/o")="1500";
3305 ::arg().set("threads", "Launch this number of threads")="2";
3306 ::arg().set("processes", "Launch this number of processes (EXPERIMENTAL, DO NOT CHANGE)")="1"; // if we un-experimental this, need to fix openssl rand seeding for multiple PIDs!
3307 ::arg().set("config-name","Name of this virtual configuration - will rename the binary image")="";
3308 ::arg().set("api-config-dir", "Directory where REST API stores config and zones") = "";
3309 ::arg().set("api-key", "Static pre-shared authentication key for access to the REST API") = "";
3310 ::arg().set("api-logfile", "Location of the server logfile (used by the REST API)") = "/var/log/pdns.log";
3311 ::arg().set("api-readonly", "Disallow data modification through the REST API when set") = "no";
3312 ::arg().setSwitch("webserver", "Start a webserver (for REST API)") = "no";
3313 ::arg().set("webserver-address", "IP Address of webserver to listen on") = "127.0.0.1";
3314 ::arg().set("webserver-port", "Port of webserver to listen on") = "8082";
3315 ::arg().set("webserver-password", "Password required for accessing the webserver") = "";
3316 ::arg().set("webserver-allow-from","Webserver access is only allowed from these subnets")="127.0.0.1,::1";
3317 ::arg().set("carbon-ourname", "If set, overrides our reported hostname for carbon stats")="";
3318 ::arg().set("carbon-server", "If set, send metrics in carbon (graphite) format to this server IP address")="";
3319 ::arg().set("carbon-interval", "Number of seconds between carbon (graphite) updates")="30";
3320 ::arg().set("statistics-interval", "Number of seconds between printing of recursor statistics, 0 to disable")="1800";
3321 ::arg().set("quiet","Suppress logging of questions and answers")="";
3322 ::arg().set("logging-facility","Facility to log messages as. 0 corresponds to local0")="";
3323 ::arg().set("config-dir","Location of configuration directory (recursor.conf)")=SYSCONFDIR;
3324 ::arg().set("socket-owner","Owner of socket")="";
3325 ::arg().set("socket-group","Group of socket")="";
3326 ::arg().set("socket-mode", "Permissions for socket")="";
3327
3328 ::arg().set("socket-dir",string("Where the controlsocket will live, ")+LOCALSTATEDIR+" when unset and not chrooted" )="";
3329 ::arg().set("delegation-only","Which domains we only accept delegations from")="";
3330 ::arg().set("query-local-address","Source IP address for sending queries")="0.0.0.0";
3331 ::arg().set("query-local-address6","Source IPv6 address for sending queries. IF UNSET, IPv6 WILL NOT BE USED FOR OUTGOING QUERIES")="";
3332 ::arg().set("client-tcp-timeout","Timeout in seconds when talking to TCP clients")="2";
3333 ::arg().set("max-mthreads", "Maximum number of simultaneous Mtasker threads")="2048";
3334 ::arg().set("max-tcp-clients","Maximum number of simultaneous TCP clients")="128";
3335 ::arg().set("server-down-max-fails","Maximum number of consecutive timeouts (and unreachables) to mark a server as down ( 0 => disabled )")="64";
3336 ::arg().set("server-down-throttle-time","Number of seconds to throttle all queries to a server after being marked as down")="60";
3337 ::arg().set("hint-file", "If set, load root hints from this file")="";
3338 ::arg().set("max-cache-entries", "If set, maximum number of entries in the main cache")="1000000";
3339 ::arg().set("max-negative-ttl", "maximum number of seconds to keep a negative cached entry in memory")="3600";
3340 ::arg().set("max-cache-ttl", "maximum number of seconds to keep a cached entry in memory")="86400";
3341 ::arg().set("packetcache-ttl", "maximum number of seconds to keep a cached entry in packetcache")="3600";
3342 ::arg().set("max-packetcache-entries", "maximum number of entries to keep in the packetcache")="500000";
3343 ::arg().set("packetcache-servfail-ttl", "maximum number of seconds to keep a cached servfail entry in packetcache")="60";
3344 ::arg().set("server-id", "Returned when queried for 'server.id' TXT or NSID, defaults to hostname")="";
3345 ::arg().set("stats-ringbuffer-entries", "maximum number of packets to store statistics for")="10000";
3346 ::arg().set("version-string", "string reported on version.pdns or version.bind")=fullVersionString();
3347 ::arg().set("allow-from", "If set, only allow these comma separated netmasks to recurse")=LOCAL_NETS;
3348 ::arg().set("allow-from-file", "If set, load allowed netmasks from this file")="";
3349 ::arg().set("entropy-source", "If set, read entropy from this file")="/dev/urandom";
3350 ::arg().set("dont-query", "If set, do not query these netmasks for DNS data")=DONT_QUERY;
3351 ::arg().set("max-tcp-per-client", "If set, maximum number of TCP sessions per client (IP address)")="0";
3352 ::arg().set("max-tcp-queries-per-connection", "If set, maximum number of TCP queries in a TCP connection")="0";
3353 ::arg().set("spoof-nearmiss-max", "If non-zero, assume spoofing after this many near misses")="20";
3354 ::arg().set("single-socket", "If set, only use a single socket for outgoing queries")="off";
3355 ::arg().set("auth-zones", "Zones for which we have authoritative data, comma separated domain=file pairs ")="";
3356 ::arg().set("lua-config-file", "More powerful configuration options")="";
3357
3358 ::arg().set("forward-zones", "Zones for which we forward queries, comma separated domain=ip pairs")="";
3359 ::arg().set("forward-zones-recurse", "Zones for which we forward queries with recursion bit, comma separated domain=ip pairs")="";
3360 ::arg().set("forward-zones-file", "File with (+)domain=ip pairs for forwarding")="";
3361 ::arg().set("export-etc-hosts", "If we should serve up contents from /etc/hosts")="off";
3362 ::arg().set("export-etc-hosts-search-suffix", "Also serve up the contents of /etc/hosts with this suffix")="";
3363 ::arg().set("etc-hosts-file", "Path to 'hosts' file")="/etc/hosts";
3364 ::arg().set("serve-rfc1918", "If we should be authoritative for RFC 1918 private IP space")="yes";
3365 ::arg().set("lua-dns-script", "Filename containing an optional 'lua' script that will be used to modify dns answers")="";
3366 ::arg().set("latency-statistic-size","Number of latency values to calculate the qa-latency average")="10000";
3367 ::arg().setSwitch( "disable-packetcache", "Disable packetcache" )= "no";
3368 ::arg().set("ecs-ipv4-bits", "Number of bits of IPv4 address to pass for EDNS Client Subnet")="24";
3369 ::arg().set("ecs-ipv6-bits", "Number of bits of IPv6 address to pass for EDNS Client Subnet")="56";
3370 ::arg().set("edns-subnet-whitelist", "List of netmasks and domains that we should enable EDNS subnet for")="";
3371 ::arg().set("ecs-scope-zero-address", "Address to send to whitelisted authoritative servers for incoming queries with ECS prefix-length source of 0")="";
3372 ::arg().setSwitch( "use-incoming-edns-subnet", "Pass along received EDNS Client Subnet information")="no";
3373 ::arg().setSwitch( "pdns-distributes-queries", "If PowerDNS itself should distribute queries over threads")="yes";
3374 ::arg().setSwitch( "root-nx-trust", "If set, believe that an NXDOMAIN from the root means the TLD does not exist")="yes";
3375 ::arg().setSwitch( "any-to-tcp","Answer ANY queries with tc=1, shunting to TCP" )="no";
3376 ::arg().setSwitch( "lowercase-outgoing","Force outgoing questions to lowercase")="no";
3377 ::arg().setSwitch("gettag-needs-edns-options", "If EDNS Options should be extracted before calling the gettag() hook")="no";
3378 ::arg().set("udp-truncation-threshold", "Maximum UDP response size before we truncate")="1680";
3379 ::arg().set("edns-outgoing-bufsize", "Outgoing EDNS buffer size")="1680";
3380 ::arg().set("minimum-ttl-override", "Set under adverse conditions, a minimum TTL")="0";
3381 ::arg().set("max-qperq", "Maximum outgoing queries per query")="50";
3382 ::arg().set("max-total-msec", "Maximum total wall-clock time per query in milliseconds, 0 for unlimited")="7000";
3383 ::arg().set("max-recursion-depth", "Maximum number of internal recursion calls per query, 0 for unlimited")="40";
3384
3385 ::arg().set("include-dir","Include *.conf files from this directory")="";
3386 ::arg().set("security-poll-suffix","Domain name from which to query security update notifications")="secpoll.powerdns.com.";
3387
3388 ::arg().setSwitch("reuseport","Enable SO_REUSEPORT allowing multiple recursors processes to listen to 1 address")="no";
3389
3390 ::arg().setSwitch("snmp-agent", "If set, register as an SNMP agent")="no";
3391 ::arg().set("snmp-master-socket", "If set and snmp-agent is set, the socket to use to register to the SNMP master")="";
3392
3393 ::arg().set("tcp-fast-open", "Enable TCP Fast Open support on the listening sockets, using the supplied numerical value as the queue size")="0";
3394 ::arg().set("nsec3-max-iterations", "Maximum number of iterations allowed for an NSEC3 record")="2500";
3395
3396 ::arg().set("cpu-map", "Thread to CPU mapping, space separated thread-id=cpu1,cpu2..cpuN pairs")="";
3397
3398 ::arg().setSwitch("log-rpz-changes", "Log additions and removals to RPZ zones at Info level")="no";
3399
3400 ::arg().setCmd("help","Provide a helpful message");
3401 ::arg().setCmd("version","Print version string");
3402 ::arg().setCmd("config","Output blank configuration");
3403 L.toConsole(Logger::Info);
3404 ::arg().laxParse(argc,argv); // do a lax parse
3405
3406 string configname=::arg()["config-dir"]+"/recursor.conf";
3407 if(::arg()["config-name"]!="") {
3408 configname=::arg()["config-dir"]+"/recursor-"+::arg()["config-name"]+".conf";
3409 s_programname+="-"+::arg()["config-name"];
3410 }
3411 cleanSlashes(configname);
3412
3413 if(::arg().mustDo("config")) {
3414 cout<<::arg().configstring()<<endl;
3415 exit(0);
3416 }
3417
3418 if(!::arg().file(configname.c_str()))
3419 L<<Logger::Warning<<"Unable to parse configuration file '"<<configname<<"'"<<endl;
3420
3421 ::arg().parse(argc,argv);
3422
3423 if( !::arg()["chroot"].empty() && !::arg()["api-config-dir"].empty() && !::arg().mustDo("api-readonly") ) {
3424 L<<Logger::Error<<"Using chroot and a writable API is not possible"<<endl;
3425 exit(EXIT_FAILURE);
3426 }
3427
3428 if (::arg()["socket-dir"].empty()) {
3429 if (::arg()["chroot"].empty())
3430 ::arg().set("socket-dir") = LOCALSTATEDIR;
3431 else
3432 ::arg().set("socket-dir") = "/";
3433 }
3434
3435 ::arg().set("delegation-only")=toLower(::arg()["delegation-only"]);
3436
3437 if(::arg().asNum("threads")==1)
3438 ::arg().set("pdns-distributes-queries")="no";
3439
3440 if(::arg().mustDo("help")) {
3441 cout<<"syntax:"<<endl<<endl;
3442 cout<<::arg().helpstring(::arg()["help"])<<endl;
3443 exit(0);
3444 }
3445 if(::arg().mustDo("version")) {
3446 showProductVersion();
3447 showBuildConfiguration();
3448 exit(0);
3449 }
3450
3451 Logger::Urgency logUrgency = (Logger::Urgency)::arg().asNum("loglevel");
3452
3453 if (logUrgency < Logger::Error)
3454 logUrgency = Logger::Error;
3455 if(!g_quiet && logUrgency < Logger::Info) { // Logger::Info=6, Logger::Debug=7
3456 logUrgency = Logger::Info; // if you do --quiet=no, you need Info to also see the query log
3457 }
3458 L.setLoglevel(logUrgency);
3459 L.toConsole(logUrgency);
3460
3461 serviceMain(argc, argv);
3462 }
3463 catch(PDNSException &ae) {
3464 L<<Logger::Error<<"Exception: "<<ae.reason<<endl;
3465 ret=EXIT_FAILURE;
3466 }
3467 catch(std::exception &e) {
3468 L<<Logger::Error<<"STL Exception: "<<e.what()<<endl;
3469 ret=EXIT_FAILURE;
3470 }
3471 catch(...) {
3472 L<<Logger::Error<<"any other exception in main: "<<endl;
3473 ret=EXIT_FAILURE;
3474 }
3475
3476 return ret;
3477 }